3908 files changed, 129664 insertions, 73407 deletions
diff --git a/.clang-format b/.clang-format
index e92e6dd1780d..a0a96088c74f 100644
--- a/.clang-format
+++ b/.clang-format
@@ -80,6 +80,7 @@ ForEachMacros:
   - 'ax25_uid_for_each'
   - '__bio_for_each_bvec'
   - 'bio_for_each_bvec'
+  - 'bio_for_each_bvec_all'
   - 'bio_for_each_integrity_vec'
   - '__bio_for_each_segment'
   - 'bio_for_each_segment'
diff --git a/.mailmap b/.mailmap
index 4f906b4e9785..c69d9c734fb5 100644
--- a/.mailmap
+++ b/.mailmap
@@ -152,6 +152,7 @@ Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
 Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
 Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
+Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com>
 Leonid I Ananiev <leonid.i.ananiev@intel.com>
 Linas Vepstas <linas@austin.ibm.com>
 Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
@@ -234,7 +235,9 @@ Ralf Baechle <ralf@linux-mips.org>
 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
 Randy Dunlap <rdunlap@infradead.org> <rdunlap@xenotime.net>
 Rémi Denis-Courmont <rdenis@simphalempin.com>
-Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
+Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
+Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
+Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
 Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
 Rudolf Marek <R.Marek@sh.cvut.cz>
 Rui Saraiva <rmps@joel.ist.utl.pt>
diff --git a/CREDITS b/CREDITS
index 032b5994f476..0787b5872906 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3104,14 +3104,16 @@ W: http://www.qsl.net/dl1bke/
 D: Generic Z8530 driver, AX.25 DAMA slave implementation
 D: Several AX.25 hacks
 
-N: Ricardo Ribalda Delgado
-E: ricardo.ribalda@gmail.com
+N: Ricardo Ribalda
+E: ribalda@kernel.org
 W: http://ribalda.com
 D: PLX USB338x driver
 D: PCA9634 driver
 D: Option GTM671WFS
 D: Fintek F81216A
 D: AD5761 iio driver
+D: TI DAC7612 driver
+D: Sony IMX214 driver
 D: Various kernel hacks
 S: Qtechnology A/S
 S: Valby Langgade 142
diff --git a/Documentation/ABI/obsolete/sysfs-cpuidle b/Documentation/ABI/obsolete/sysfs-cpuidle
new file mode 100644
index 000000000000..e398fb5e542f
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-cpuidle
@@ -0,0 +1,9 @@
+What:		/sys/devices/system/cpu/cpuidle/current_governor_ro
+Date:		April, 2020
+Contact:	linux-pm@vger.kernel.org
+Description:
+	current_governor_ro shows current using cpuidle governor, but read only.
+	with the update that cpuidle governor can be changed at runtime in default,
+	both current_governor and current_governor_ro co-exist under
+	/sys/devices/system/cpu/cpuidle/ file, it's duplicate so make
+	current_governor_ro obselete.
diff --git a/Documentation/ABI/obsolete/sysfs-driver-intel_pmc_bxt b/Documentation/ABI/obsolete/sysfs-driver-intel_pmc_bxt
new file mode 100644
index 000000000000..39d5659f388b
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-driver-intel_pmc_bxt
@@ -0,0 +1,22 @@
+These files allow sending arbitrary IPC commands to the PMC/SCU which
+may be dangerous. These will be removed eventually and should not be
+used in any new applications.
+
+What:		/sys/bus/platform/devices/INT34D2:00/simplecmd
+Date:		Jun 2015
+KernelVersion:	4.1
+Contact:	Mika Westerberg <mika.westerberg@linux.intel.com>
+Description:	This interface allows userspace to send an arbitrary
+		IPC command to the PMC/SCU.
+
+		Format: %d %d where first number is command and
+		second number is subcommand.
+
+What:		/sys/bus/platform/devices/INT34D2:00/northpeak
+Date:		Jun 2015
+KernelVersion:	4.1
+Contact:	Mika Westerberg <mika.westerberg@linux.intel.com>
+Description:	This interface allows userspace to enable and disable
+		Northpeak through the PMC/SCU.
+
+		Format: %u.
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index df8413cf1468..484fc04bcc25 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -54,7 +54,7 @@ Date:		October 2002
 Contact:	Linux Memory Management list <linux-mm@kvack.org>
 Description:
 		Provides information about the node's distribution and memory
-		utilization. Similar to /proc/meminfo, see Documentation/filesystems/proc.txt
+		utilization. Similar to /proc/meminfo, see Documentation/filesystems/proc.rst
 
 What:		/sys/devices/system/node/nodeX/numastat
 Date:		October 2002
diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre
index ec4a79e3a807..b4be5f1db4b7 100644
--- a/Documentation/ABI/testing/debugfs-hisi-hpre
+++ b/Documentation/ABI/testing/debugfs-hisi-hpre
@@ -33,7 +33,7 @@ Contact:        linux-crypto@vger.kernel.org
 Description:    Dump debug registers from the HPRE.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/qm_regs
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/regs
 Date:           Sep 2019
 Contact:        linux-crypto@vger.kernel.org
 Description:    Dump debug registers from the QM.
@@ -44,14 +44,97 @@ What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
 Date:           Sep 2019
 Contact:        linux-crypto@vger.kernel.org
 Description:    One QM may contain multiple queues. Select specific queue to
-		show its debug registers in above qm_regs.
+		show its debug registers in above regs.
 		Only available for PF.
 
 What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
 Date:           Sep 2019
 Contact:        linux-crypto@vger.kernel.org
-Description:    QM debug registers(qm_regs) read clear control. 1 means enable
+Description:    QM debug registers(regs) read clear control. 1 means enable
 		register read clear, otherwise 0.
 		Writing to this file has no functional effect, only enable or
 		disable counters clear after reading of these registers.
 		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/err_irq
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of invalid interrupts for
+		QM task completion.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/aeq_irq
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of QM async event queue interrupts.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/abnormal_irq
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of interrupts for QM abnormal event.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/create_qp_err
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of queue allocation errors.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/mb_err
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of failed QM mailbox commands.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/status
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the status of the QM.
+		Four states: initiated, started, stopped and closed.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of sent requests.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/recv_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of received requests.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_busy_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of requests sent
+		with returning busy.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_fail_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of completed but error requests.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/invalid_req_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of invalid requests being received.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/overtime_thrhld
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Set the threshold time for counting the request which is
+		processed longer than the threshold.
+		0: disable(default), 1: 1 microsecond.
+		Available for both PF and VF, and take no other effect on HPRE.
+
+What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/over_thrhld_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of time out requests.
+		Available for both PF and VF, and take no other effect on HPRE.
diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec
index 06adb899495e..85feb4408e0f 100644
--- a/Documentation/ABI/testing/debugfs-hisi-sec
+++ b/Documentation/ABI/testing/debugfs-hisi-sec
@@ -1,10 +1,4 @@
-What:           /sys/kernel/debug/hisi_sec/<bdf>/sec_dfx
-Date:           Oct 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the debug registers of SEC cores.
-		Only available for PF.
-
-What:           /sys/kernel/debug/hisi_sec/<bdf>/clear_enable
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/clear_enable
 Date:           Oct 2019
 Contact:        linux-crypto@vger.kernel.org
 Description:    Enabling/disabling of clear action after reading
@@ -12,7 +6,7 @@ Description:    Enabling/disabling of clear action after reading
 		0: disable, 1: enable.
 		Only available for PF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec/<bdf>/current_qm
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/current_qm
 Date:           Oct 2019
 Contact:        linux-crypto@vger.kernel.org
 Description:    One SEC controller has one PF and multiple VFs, each function
@@ -20,24 +14,100 @@ Description:    One SEC controller has one PF and multiple VFs, each function
 		qm refers to.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_sec/<bdf>/qm/qm_regs
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
 Date:           Oct 2019
 Contact:        linux-crypto@vger.kernel.org
 Description:    Dump of QM related debug registers.
 		Available for PF and VF in host. VF in guest currently only
 		has one debug register.
 
-What:           /sys/kernel/debug/hisi_sec/<bdf>/qm/current_q
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/current_q
 Date:           Oct 2019
 Contact:        linux-crypto@vger.kernel.org
 Description:    One QM of SEC may contain multiple queues. Select specific
-		queue to show its debug registers in above 'qm_regs'.
+		queue to show its debug registers in above 'regs'.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_sec/<bdf>/qm/clear_enable
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/clear_enable
 Date:           Oct 2019
 Contact:        linux-crypto@vger.kernel.org
 Description:    Enabling/disabling of clear action after reading
 		the SEC's QM debug registers.
 		0: disable, 1: enable.
 		Only available for PF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/err_irq
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of invalid interrupts for
+		QM task completion.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/aeq_irq
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of QM async event queue interrupts.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/abnormal_irq
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of interrupts for QM abnormal event.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/create_qp_err
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of queue allocation errors.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/mb_err
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of failed QM mailbox commands.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/status
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the status of the QM.
+		Four states: initiated, started, stopped and closed.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of sent requests.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/recv_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of received requests.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_busy_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of requests sent with returning busy.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/err_bd_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of BD type error requests
+		to be received.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/invalid_req_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of invalid requests being received.
+		Available for both PF and VF, and take no other effect on SEC.
+
+What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/done_flag_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of completed but marked error requests
+		to be received.
+		Available for both PF and VF, and take no other effect on SEC.
diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip
index a7c63e6c4bc3..3034a2bf99ca 100644
--- a/Documentation/ABI/testing/debugfs-hisi-zip
+++ b/Documentation/ABI/testing/debugfs-hisi-zip
@@ -26,7 +26,7 @@ Description:    One ZIP controller has one PF and multiple VFs, each function
 		has a QM. Select the QM which below qm refers to.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/qm_regs
+What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
 Date:           Nov 2018
 Contact:        linux-crypto@vger.kernel.org
 Description:    Dump of QM related debug registers.
@@ -37,14 +37,78 @@ What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
 Date:           Nov 2018
 Contact:        linux-crypto@vger.kernel.org
 Description:    One QM may contain multiple queues. Select specific queue to
-		show its debug registers in above qm_regs.
+		show its debug registers in above regs.
 		Only available for PF.
 
 What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
 Date:           Nov 2018
 Contact:        linux-crypto@vger.kernel.org
-Description:    QM debug registers(qm_regs) read clear control. 1 means enable
+Description:    QM debug registers(regs) read clear control. 1 means enable
 		register read clear, otherwise 0.
 		Writing to this file has no functional effect, only enable or
 		disable counters clear after reading of these registers.
 		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/err_irq
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of invalid interrupts for
+		QM task completion.
+		Available for both PF and VF, and take no other effect on ZIP.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/aeq_irq
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of QM async event queue interrupts.
+		Available for both PF and VF, and take no other effect on ZIP.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/abnormal_irq
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of interrupts for QM abnormal event.
+		Available for both PF and VF, and take no other effect on ZIP.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/create_qp_err
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of queue allocation errors.
+		Available for both PF and VF, and take no other effect on ZIP.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/mb_err
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the number of failed QM mailbox commands.
+		Available for both PF and VF, and take no other effect on ZIP.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/status
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the status of the QM.
+		Four states: initiated, started, stopped and closed.
+		Available for both PF and VF, and take no other effect on ZIP.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of sent requests.
+		Available for both PF and VF, and take no other effect on ZIP.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/recv_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of received requests.
+		Available for both PF and VF, and take no other effect on ZIP.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_busy_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of requests received
+		with returning busy.
+		Available for both PF and VF, and take no other effect on ZIP.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/err_bd_cnt
+Date:           Apr 2020
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump the total number of BD type error requests
+		to be received.
+		Available for both PF and VF, and take no other effect on ZIP.
diff --git a/Documentation/ABI/testing/dev-kmsg b/Documentation/ABI/testing/dev-kmsg
index f307506eb54c..1e6c28b1942b 100644
--- a/Documentation/ABI/testing/dev-kmsg
+++ b/Documentation/ABI/testing/dev-kmsg
@@ -56,6 +56,11 @@ Description:	The /dev/kmsg character device node provides userspace access
 		  seek after the last record available at the time
 		  the last SYSLOG_ACTION_CLEAR was issued.
 
+		Due to the record nature of this interface with a "read all"
+		behavior and the specific positions each seek operation sets,
+		SEEK_CUR is not supported, returning -ESPIPE (invalid seek) to
+		errno whenever requested.
+
 		The output format consists of a prefix carrying the syslog
 		prefix including priority and facility, the 64 bit message
 		sequence number and the monotonic timestamp in microseconds,
diff --git a/Documentation/ABI/testing/procfs-smaps_rollup b/Documentation/ABI/testing/procfs-smaps_rollup
index 274df44d8b1b..046978193368 100644
--- a/Documentation/ABI/testing/procfs-smaps_rollup
+++ b/Documentation/ABI/testing/procfs-smaps_rollup
@@ -11,7 +11,7 @@ Description:
 		Additionally, the fields Pss_Anon, Pss_File and Pss_Shmem
 		are not present in /proc/pid/smaps.  These fields represent
 		the sum of the Pss field of each type (anon, file, shmem).
-		For more details, see Documentation/filesystems/proc.txt
+		For more details, see Documentation/filesystems/proc.rst
 		and the procfs man page.
 
 		Typical output looks like this:
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 2e0e3b45d02a..6b5dafab950c 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -106,10 +106,10 @@ Description:	CPU topology files that describe a logical CPU's relationship
 		See Documentation/admin-guide/cputopology.rst for more information.
 
 
-What:		/sys/devices/system/cpu/cpuidle/current_driver
-		/sys/devices/system/cpu/cpuidle/current_governer_ro
-		/sys/devices/system/cpu/cpuidle/available_governors
+What:		/sys/devices/system/cpu/cpuidle/available_governors
+		/sys/devices/system/cpu/cpuidle/current_driver
 		/sys/devices/system/cpu/cpuidle/current_governor
+		/sys/devices/system/cpu/cpuidle/current_governer_ro
 Date:		September 2007
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Discover cpuidle policy and mechanism
@@ -119,24 +119,18 @@ Description:	Discover cpuidle policy and mechanism
 		consumption during idle.
 
 		Idle policy (governor) is differentiated from idle mechanism
-		(driver)
-
-		current_driver: (RO) displays current idle mechanism
-
-		current_governor_ro: (RO) displays current idle policy
-
-		With the cpuidle_sysfs_switch boot option enabled (meant for
-		developer testing), the following three attributes are visible
-		instead:
-
-		current_driver: same as described above
+		(driver).
 
 		available_governors: (RO) displays a space separated list of
-		available governors
+		available governors.
+
+		current_driver: (RO) displays current idle mechanism.
 
 		current_governor: (RW) displays current idle policy. Users can
 		switch the governor at runtime by writing to this file.
 
+		current_governor_ro: (RO) displays current idle policy.
+
 		See Documentation/admin-guide/pm/cpuidle.rst and
 		Documentation/driver-api/pm/cpuidle.rst for more information.
 
diff --git a/Documentation/ABI/testing/sysfs-platform-dptf b/Documentation/ABI/testing/sysfs-platform-dptf
index 325dc0667dbb..eeed81ca6949 100644
--- a/Documentation/ABI/testing/sysfs-platform-dptf
+++ b/Documentation/ABI/testing/sysfs-platform-dptf
@@ -27,10 +27,12 @@ KernelVersion:	v4.10
 Contact:	linux-acpi@vger.kernel.org
 Description:
 		(RO) Display the platform power source
-		0x00 = DC
-		0x01 = AC
-		0x02 = USB
-		0x03 = Wireless Charger
+		bits[3:0] Current power source
+			0x00 = DC
+			0x01 = AC
+			0x02 = USB
+			0x03 = Wireless Charger
+		bits[7:4] Power source sequence number
 
 What:		/sys/bus/platform/devices/INT3407:00/dptf_power/battery_steady_power
 Date:		Jul, 2016
@@ -38,3 +40,55 @@ KernelVersion:	v4.10
 Contact:	linux-acpi@vger.kernel.org
 Description:
 		(RO) The maximum sustained power for battery in milliwatts.
+
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/rest_of_platform_power_mw
+Date:		June, 2020
+KernelVersion:	v5.8
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) Shows the rest (outside of SoC) of worst-case platform power.
+
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/prochot_confirm
+Date:		June, 2020
+KernelVersion:	v5.8
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(WO) Confirm embedded controller about a prochot notification.
+
+What:		/sys/bus/platform/devices/INT3532:00/dptf_battery/max_platform_power_mw
+Date:		June, 2020
+KernelVersion:	v5.8
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) The maximum platform power that can be supported by the battery in milli watts.
+
+What:		/sys/bus/platform/devices/INT3532:00/dptf_battery/max_steady_state_power_mw
+Date:		June, 2020
+KernelVersion:	v5.8
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) The maximum sustained power for battery in milli watts.
+
+What:		/sys/bus/platform/devices/INT3532:00/dptf_battery/high_freq_impedance_mohm
+Date:		June, 2020
+KernelVersion:	v5.8
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) The high frequency impedance value that can be obtained from battery
+		fuel gauge in milli Ohms.
+
+What:		/sys/bus/platform/devices/INT3532:00/dptf_battery/no_load_voltage_mv
+Date:		June, 2020
+KernelVersion:	v5.8
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) The no-load voltage that can be obtained from battery fuel gauge in
+		milli volts.
+
+What:		/sys/bus/platform/devices/INT3532:00/dptf_battery/current_discharge_capbility_ma
+Date:		June, 2020
+KernelVersion:	v5.8
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) The battery discharge current capability obtained from battery fuel gauge in
+		milli Amps.
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update b/Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update
new file mode 100644
index 000000000000..5aa618987cad
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update
@@ -0,0 +1,12 @@
+What:		/sys/bus/wmi/devices/44FADEB1-B204-40F2-8581-394BBDC1B651/firmware_update_request
+Date:		April 2020
+KernelVersion:	5.7
+Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
+Description:
+		Allow user space entities to trigger update of Slim
+		Bootloader (SBL). This attribute normally has a value
+		of 0 and userspace can signal SBL to update firmware,
+		on next reboot, by writing a value of 1.
+		There are two available states:
+		    * 0 -> Skip firmware update while rebooting
+		    * 1 -> Attempt firmware update on next reboot
diff --git a/Documentation/Makefile b/Documentation/Makefile
index cc786d11a028..db1fc35ded50 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -98,7 +98,11 @@ else # HAVE_PDFLATEX
 
 pdfdocs: latexdocs
 	@$(srctree)/scripts/sphinx-pre-install --version-check
-	$(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit;)
+	$(foreach var,$(SPHINXDIRS), \
+	   $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit; \
+	   mkdir -p $(BUILDDIR)/$(var)/pdf; \
+	   mv $(subst .tex,.pdf,$(wildcard $(BUILDDIR)/$(var)/latex/*.tex)) $(BUILDDIR)/$(var)/pdf/; \
+	)
 
 endif # HAVE_PDFLATEX
 
diff --git a/Documentation/PCI/boot-interrupts.rst b/Documentation/PCI/boot-interrupts.rst
index d078ef3eb192..2ec70121bfca 100644
--- a/Documentation/PCI/boot-interrupts.rst
+++ b/Documentation/PCI/boot-interrupts.rst
@@ -32,12 +32,13 @@ interrupt goes unhandled over time, they are tracked by the Linux kernel as
 Spurious Interrupts. The IRQ will be disabled by the Linux kernel after it
 reaches a specific count with the error "nobody cared". This disabled IRQ
 now prevents valid usage by an existing interrupt which may happen to share
-the IRQ line.
+the IRQ line::
 
   irq 19: nobody cared (try booting with the "irqpoll" option)
   CPU: 0 PID: 2988 Comm: irq/34-nipalk Tainted: 4.14.87-rt49-02410-g4a640ec-dirty #1
   Hardware name: National Instruments NI PXIe-8880/NI PXIe-8880, BIOS 2.1.5f1 01/09/2020
   Call Trace:
+
   <IRQ>
    ? dump_stack+0x46/0x5e
    ? __report_bad_irq+0x2e/0xb0
@@ -85,15 +86,18 @@ Mitigations
 The mitigations take the form of PCI quirks. The preference has been to
 first identify and make use of a means to disable the routing to the PCH.
 In such a case a quirk to disable boot interrupt generation can be
-added.[1]
+added. [1]_
 
-  Intel® 6300ESB I/O Controller Hub
+Intel® 6300ESB I/O Controller Hub
   Alternate Base Address Register:
    BIE: Boot Interrupt Enable
-	  0 = Boot interrupt is enabled.
-	  1 = Boot interrupt is disabled.
 
-  Intel® Sandy Bridge through Sky Lake based Xeon servers:
+	  ==  ===========================
+	  0   Boot interrupt is enabled.
+	  1   Boot interrupt is disabled.
+	  ==  ===========================
+
+Intel® Sandy Bridge through Sky Lake based Xeon servers:
   Coherent Interface Protocol Interrupt Control
    dis_intx_route2pch/dis_intx_route2ich/dis_intx_route2dmi2:
 	  When this bit is set. Local INTx messages received from the
@@ -109,12 +113,12 @@ line by default.  Therefore, on chipsets where this INTx routing cannot be
 disabled, the Linux kernel will reroute the valid interrupt to its legacy
 interrupt. This redirection of the handler will prevent the occurrence of
 the spurious interrupt detection which would ordinarily disable the IRQ
-line due to excessive unhandled counts.[2]
+line due to excessive unhandled counts. [2]_
 
 The config option X86_REROUTE_FOR_BROKEN_BOOT_IRQS exists to enable (or
 disable) the redirection of the interrupt handler to the PCH interrupt
 line. The option can be overridden by either pci=ioapicreroute or
-pci=noioapicreroute.[3]
+pci=noioapicreroute. [3]_
 
 
 More Documentation
@@ -127,19 +131,19 @@ into the evolution of its handling with chipsets.
 Example of disabling of the boot interrupt
 ------------------------------------------
 
-Intel® 6300ESB I/O Controller Hub (Document # 300641-004US)
+      - Intel® 6300ESB I/O Controller Hub (Document # 300641-004US)
 	5.7.3 Boot Interrupt
 	https://www.intel.com/content/dam/doc/datasheet/6300esb-io-controller-hub-datasheet.pdf
 
-Intel® Xeon® Processor E5-1600/2400/2600/4600 v3 Product Families
-Datasheet - Volume 2: Registers (Document # 330784-003)
+      - Intel® Xeon® Processor E5-1600/2400/2600/4600 v3 Product Families
+	Datasheet - Volume 2: Registers (Document # 330784-003)
 	6.6.41 cipintrc Coherent Interface Protocol Interrupt Control
 	https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v3-datasheet-vol-2.pdf
 
 Example of handler rerouting
 ----------------------------
 
-Intel® 6700PXH 64-bit PCI Hub (Document # 302628)
+      - Intel® 6700PXH 64-bit PCI Hub (Document # 302628)
 	2.15.2 PCI Express Legacy INTx Support and Boot Interrupt
 	https://www.intel.com/content/dam/doc/datasheet/6700pxh-64-bit-pci-hub-datasheet.pdf
 
@@ -150,6 +154,6 @@ Cheers,
     Sean V Kelley
     sean.v.kelley@linux.intel.com
 
-[1] https://lore.kernel.org/r/12131949181903-git-send-email-sassmann@suse.de/
-[2] https://lore.kernel.org/r/12131949182094-git-send-email-sassmann@suse.de/
-[3] https://lore.kernel.org/r/487C8EA7.6020205@suse.de/
+.. [1] https://lore.kernel.org/r/12131949181903-git-send-email-sassmann@suse.de/
+.. [2] https://lore.kernel.org/r/12131949182094-git-send-email-sassmann@suse.de/
+.. [3] https://lore.kernel.org/r/487C8EA7.6020205@suse.de/
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index fd5e2cbc4935..75b8ca007a11 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -1943,56 +1943,27 @@ invoked from a CPU-hotplug notifier.
 Scheduler and RCU
 ~~~~~~~~~~~~~~~~~
 
-RCU depends on the scheduler, and the scheduler uses RCU to protect some
-of its data structures. The preemptible-RCU ``rcu_read_unlock()``
-implementation must therefore be written carefully to avoid deadlocks
-involving the scheduler's runqueue and priority-inheritance locks. In
-particular, ``rcu_read_unlock()`` must tolerate an interrupt where the
-interrupt handler invokes both ``rcu_read_lock()`` and
-``rcu_read_unlock()``. This possibility requires ``rcu_read_unlock()``
-to use negative nesting levels to avoid destructive recursion via
-interrupt handler's use of RCU.
-
-This scheduler-RCU requirement came as a `complete
-surprise <https://lwn.net/Articles/453002/>`__.
-
-As noted above, RCU makes use of kthreads, and it is necessary to avoid
-excessive CPU-time accumulation by these kthreads. This requirement was
-no surprise, but RCU's violation of it when running context-switch-heavy
-workloads when built with ``CONFIG_NO_HZ_FULL=y`` `did come as a
-surprise
+RCU makes use of kthreads, and it is necessary to avoid excessive CPU-time
+accumulation by these kthreads. This requirement was no surprise, but
+RCU's violation of it when running context-switch-heavy workloads when
+built with ``CONFIG_NO_HZ_FULL=y`` `did come as a surprise
 [PDF] <http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf>`__.
 RCU has made good progress towards meeting this requirement, even for
 context-switch-heavy ``CONFIG_NO_HZ_FULL=y`` workloads, but there is
 room for further improvement.
 
-It is forbidden to hold any of scheduler's runqueue or
-priority-inheritance spinlocks across an ``rcu_read_unlock()`` unless
-interrupts have been disabled across the entire RCU read-side critical
-section, that is, up to and including the matching ``rcu_read_lock()``.
-Violating this restriction can result in deadlocks involving these
-scheduler spinlocks. There was hope that this restriction might be
-lifted when interrupt-disabled calls to ``rcu_read_unlock()`` started
-deferring the reporting of the resulting RCU-preempt quiescent state
-until the end of the corresponding interrupts-disabled region.
-Unfortunately, timely reporting of the corresponding quiescent state to
-expedited grace periods requires a call to ``raise_softirq()``, which
-can acquire these scheduler spinlocks. In addition, real-time systems
-using RCU priority boosting need this restriction to remain in effect
-because deferred quiescent-state reporting would also defer deboosting,
-which in turn would degrade real-time latencies.
-
-In theory, if a given RCU read-side critical section could be guaranteed
-to be less than one second in duration, holding a scheduler spinlock
-across that critical section's ``rcu_read_unlock()`` would require only
-that preemption be disabled across the entire RCU read-side critical
-section, not interrupts. Unfortunately, given the possibility of vCPU
-preemption, long-running interrupts, and so on, it is not possible in
-practice to guarantee that a given RCU read-side critical section will
-complete in less than one second. Therefore, as noted above, if
-scheduler spinlocks are held across a given call to
-``rcu_read_unlock()``, interrupts must be disabled across the entire RCU
-read-side critical section.
+There is no longer any prohibition against holding any of
+scheduler's runqueue or priority-inheritance spinlocks across an
+``rcu_read_unlock()``, even if interrupts and preemption were enabled
+somewhere within the corresponding RCU read-side critical section.
+Therefore, it is now perfectly legal to execute ``rcu_read_lock()``
+with preemption enabled, acquire one of the scheduler locks, and hold
+that lock across the matching ``rcu_read_unlock()``.
+
+Similarly, the RCU flavor consolidation has removed the need for negative
+nesting.  The fact that interrupt-disabled regions of code act as RCU
+read-side critical sections implicitly avoids earlier issues that used
+to result in destructive recursion via interrupt handler's use of RCU.
 
 Tracing and RCU
 ~~~~~~~~~~~~~~~
diff --git a/Documentation/admin-guide/acpi/ssdt-overlays.rst b/Documentation/admin-guide/acpi/ssdt-overlays.rst
index da37455f96c9..5d7e25988085 100644
--- a/Documentation/admin-guide/acpi/ssdt-overlays.rst
+++ b/Documentation/admin-guide/acpi/ssdt-overlays.rst
@@ -63,7 +63,7 @@ which can then be compiled to AML binary format::
     ASL Input:     minnomax.asl - 30 lines, 614 bytes, 7 keywords
     AML Output:    minnowmax.aml - 165 bytes, 6 named objects, 1 executable opcodes
 
-[1] http://wiki.minnowboard.org/MinnowBoard_MAX#Low_Speed_Expansion_Connector_.28Top.29
+[1] https://www.elinux.org/Minnowboard:MinnowMax#Low_Speed_Expansion_.28Top.29
 
 The resulting AML code can then be loaded by the kernel using one of the methods
 below.
diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst
index 44b8a4edd348..f7c80f4649fc 100644
--- a/Documentation/admin-guide/bug-hunting.rst
+++ b/Documentation/admin-guide/bug-hunting.rst
@@ -49,15 +49,19 @@ the issue, it may also contain the word **Oops**, as on this one::
 
 Despite being an **Oops** or some other sort of stack trace, the offended
 line is usually required to identify and handle the bug. Along this chapter,
-we'll refer to "Oops" for all kinds of stack traces that need to be analized.
+we'll refer to "Oops" for all kinds of stack traces that need to be analyzed.
 
-.. note::
+If the kernel is compiled with ``CONFIG_DEBUG_INFO``, you can enhance the
+quality of the stack trace by using file:`scripts/decode_stacktrace.sh`.
+
+Modules linked in
+-----------------
+
+Modules that are tainted or are being loaded or unloaded are marked with
+"(...)", where the taint flags are described in
+file:`Documentation/admin-guide/tainted-kernels.rst`, "being loaded" is
+annotated with "+", and "being unloaded" is annotated with "-".
 
-  ``ksymoops`` is useless on 2.6 or upper.  Please use the Oops in its original
-  format (from ``dmesg``, etc).  Ignore any references in this or other docs to
-  "decoding the Oops" or "running it through ksymoops".
-  If you post an Oops from 2.6+ that has been run through ``ksymoops``,
-  people will just tell you to repost it.
 
 Where is the Oops message is located?
 -------------------------------------
@@ -71,7 +75,7 @@ by running ``journalctl`` command.
 Sometimes ``klogd`` dies, in which case you can run ``dmesg > file`` to
 read the data from the kernel buffers and save it.  Or you can
 ``cat /proc/kmsg > file``, however you have to break in to stop the transfer,
-``kmsg`` is a "never ending file".
+since ``kmsg`` is a "never ending file".
 
 If the machine has crashed so badly that you cannot enter commands or
 the disk is not available then you have three options:
@@ -81,9 +85,9 @@ the disk is not available then you have three options:
     planned for a crash. Alternatively, you can take a picture of
     the screen with a digital camera - not nice, but better than
     nothing.  If the messages scroll off the top of the console, you
-    may find that booting with a higher resolution (eg, ``vga=791``)
+    may find that booting with a higher resolution (e.g., ``vga=791``)
     will allow you to read more of the text. (Caveat: This needs ``vesafb``,
-    so won't help for 'early' oopses)
+    so won't help for 'early' oopses.)
 
 (2) Boot with a serial console (see
     :ref:`Documentation/admin-guide/serial-console.rst <serial_console>`),
@@ -104,7 +108,7 @@ Kernel source file. There are two methods for doing that. Usually, using
 gdb
 ^^^
 
-The GNU debug (``gdb``) is the best way to figure out the exact file and line
+The GNU debugger (``gdb``) is the best way to figure out the exact file and line
 number of the OOPS from the ``vmlinux`` file.
 
 The usage of gdb works best on a kernel compiled with ``CONFIG_DEBUG_INFO``.
@@ -165,7 +169,7 @@ If you have a call trace, such as::
       [<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee
       ...
 
-this shows the problem likely in the :jbd: module. You can load that module
+this shows the problem likely is in the :jbd: module. You can load that module
 in gdb and list the relevant code::
 
   $ gdb fs/jbd/jbd.ko
@@ -199,8 +203,9 @@ in the kernel hacking menu of the menu configuration.) For example::
    You need to be at the top level of the kernel tree for this to pick up
    your C files.
 
-If you don't have access to the code you can also debug on some crash dumps
-e.g. crash dump output as shown by Dave Miller::
+If you don't have access to the source code you can still debug some crash
+dumps using the following method (example crash dump output as shown by
+Dave Miller)::
 
      EIP is at 	+0x14/0x4c0
       ...
@@ -230,6 +235,9 @@ e.g. crash dump output as shown by Dave Miller::
          mov        0x8(%ebp), %ebx         ! %ebx = skb->sk
          mov        0x13c(%ebx), %eax       ! %eax = inet_sk(sk)->opt
 
+file:`scripts/decodecode` can be used to automate most of this, depending
+on what CPU architecture is being debugged.
+
 Reporting the bug
 -----------------
 
@@ -241,7 +249,7 @@ used for the development of the affected code. This can be done by using
 the ``get_maintainer.pl`` script.
 
 For example, if you find a bug at the gspca's sonixj.c file, you can get
-their maintainers with::
+its maintainers with::
 
 	$ ./scripts/get_maintainer.pl -f drivers/media/usb/gspca/sonixj.c
 	Hans Verkuil <hverkuil@xs4all.nl> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
@@ -253,16 +261,17 @@ their maintainers with::
 
 Please notice that it will point to:
 
-- The last developers that touched on the source code. On the above example,
-  Tejun and Bhaktipriya (in this specific case, none really envolved on the
-  development of this file);
+- The last developers that touched the source code (if this is done inside
+  a git tree). On the above example, Tejun and Bhaktipriya (in this
+  specific case, none really envolved on the development of this file);
 - The driver maintainer (Hans Verkuil);
 - The subsystem maintainer (Mauro Carvalho Chehab);
 - The driver and/or subsystem mailing list (linux-media@vger.kernel.org);
 - the Linux Kernel mailing list (linux-kernel@vger.kernel.org).
 
 Usually, the fastest way to have your bug fixed is to report it to mailing
-list used for the development of the code (linux-media ML) copying the driver maintainer (Hans).
+list used for the development of the code (linux-media ML) copying the
+driver maintainer (Hans).
 
 If you are totally stumped as to whom to send the report, and
 ``get_maintainer.pl`` didn't provide you anything useful, send it to
@@ -303,9 +312,9 @@ protection fault message can be simply cut out of the message files
 and forwarded to the kernel developers.
 
 Two types of address resolution are performed by ``klogd``.  The first is
-static translation and the second is dynamic translation.  Static
-translation uses the System.map file in much the same manner that
-ksymoops does.  In order to do static translation the ``klogd`` daemon
+static translation and the second is dynamic translation.
+Static translation uses the System.map file.
+In order to do static translation the ``klogd`` daemon
 must be able to find a system map file at daemon initialization time.
 See the klogd man page for information on how ``klogd`` searches for map
 files.
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index bcc80269bb6a..b8c0460730f3 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1329,6 +1329,10 @@ PAGE_SIZE multiple when read back.
 	  workingset_activate
 		Number of refaulted pages that were immediately activated
 
+	  workingset_restore
+		Number of restored pages which have been detected as an active
+		workingset before they got reclaimed.
+
 	  workingset_nodereclaim
 		Number of times a shadow node has been reclaimed
 
@@ -1370,6 +1374,22 @@ PAGE_SIZE multiple when read back.
 	The total amount of swap currently being used by the cgroup
 	and its descendants.
 
+  memory.swap.high
+	A read-write single value file which exists on non-root
+	cgroups.  The default is "max".
+
+	Swap usage throttle limit.  If a cgroup's swap usage exceeds
+	this limit, all its further allocations will be throttled to
+	allow userspace to implement custom out-of-memory procedures.
+
+	This limit marks a point of no return for the cgroup. It is NOT
+	designed to manage the amount of swapping a workload does
+	during regular operation. Compare to memory.swap.max, which
+	prohibits swapping past a set amount, but lets the cgroup
+	continue unimpeded as long as other memory can be reclaimed.
+
+	Healthy workloads are not expected to reach this limit.
+
   memory.swap.max
 	A read-write single value file which exists on non-root
 	cgroups.  The default is "max".
@@ -1383,6 +1403,10 @@ PAGE_SIZE multiple when read back.
 	otherwise, a value change in this file generates a file
 	modified event.
 
+	  high
+		The number of times the cgroup's swap usage was over
+		the high threshold.
+
 	  max
 		The number of times the cgroup's swap usage was about
 		to go over the max boundary and swap allocation
diff --git a/Documentation/admin-guide/cpu-load.rst b/Documentation/admin-guide/cpu-load.rst
index 2d01ce43d2a2..ebdecf864080 100644
--- a/Documentation/admin-guide/cpu-load.rst
+++ b/Documentation/admin-guide/cpu-load.rst
@@ -105,7 +105,7 @@ References
 ----------
 
 - http://lkml.org/lkml/2007/2/12/6
-- Documentation/filesystems/proc.txt (1.8)
+- Documentation/filesystems/proc.rst (1.8)
 
 
 Thanks
diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
index f83212fae4d5..3eeeb488d955 100644
--- a/Documentation/admin-guide/hw-vuln/l1tf.rst
+++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
@@ -268,7 +268,7 @@ Guest mitigation mechanisms
    /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is
    available at:
 
-   https://www.kernel.org/doc/Documentation/IRQ-affinity.txt
+   https://www.kernel.org/doc/Documentation/core-api/irq/irq-affinity.rst
 
 .. _smt_control:
 
diff --git a/Documentation/admin-guide/init.rst b/Documentation/admin-guide/init.rst
index e89d97f31eaf..41f06a09152e 100644
--- a/Documentation/admin-guide/init.rst
+++ b/Documentation/admin-guide/init.rst
@@ -1,52 +1,48 @@
-Explaining the dreaded "No init found." boot hang message
+Explaining the "No working init found." boot hang message
 =========================================================
+:Authors: Andreas Mohr <andi at lisas period de>
+          Cristian Souza <cristianmsbr at gmail period com>
 
-OK, so you've got this pretty unintuitive message (currently located
-in init/main.c) and are wondering what the H*** went wrong.
-Some high-level reasons for failure (listed roughly in order of execution)
-to load the init binary are:
-
-A) Unable to mount root FS
-B) init binary doesn't exist on rootfs
-C) broken console device
-D) binary exists but dependencies not available
-E) binary cannot be loaded
-
-Detailed explanations:
-
-A) Set "debug" kernel parameter (in bootloader config file or CONFIG_CMDLINE)
-   to get more detailed kernel messages.
-B) make sure you have the correct root FS type
-   (and ``root=`` kernel parameter points to the correct partition),
-   required drivers such as storage hardware (such as SCSI or USB!)
-   and filesystem (ext3, jffs2 etc.) are builtin (alternatively as modules,
-   to be pre-loaded by an initrd)
-C) Possibly a conflict in ``console= setup`` --> initial console unavailable.
-   E.g. some serial consoles are unreliable due to serial IRQ issues (e.g.
-   missing interrupt-based configuration).
+This document provides some high-level reasons for failure
+(listed roughly in order of execution) to load the init binary.
+
+1) **Unable to mount root FS**: Set "debug" kernel parameter (in bootloader
+   config file or CONFIG_CMDLINE) to get more detailed kernel messages.
+
+2) **init binary doesn't exist on rootfs**: Make sure you have the correct
+   root FS type (and ``root=`` kernel parameter points to the correct
+   partition), required drivers such as storage hardware (such as SCSI or
+   USB!) and filesystem (ext3, jffs2, etc.) are builtin (alternatively as
+   modules, to be pre-loaded by an initrd).
+
+3) **Broken console device**: Possibly a conflict in ``console= setup``
+   --> initial console unavailable. E.g. some serial consoles are unreliable
+   due to serial IRQ issues (e.g. missing interrupt-based configuration).
    Try using a different ``console= device`` or e.g. ``netconsole=``.
-D) e.g. required library dependencies of the init binary such as
-   ``/lib/ld-linux.so.2`` missing or broken. Use
-   ``readelf -d <INIT>|grep NEEDED`` to find out which libraries are required.
-E) make sure the binary's architecture matches your hardware.
-   E.g. i386 vs. x86_64 mismatch, or trying to load x86 on ARM hardware.
-   In case you tried loading a non-binary file here (shell script?),
-   you should make sure that the script specifies an interpreter in its shebang
-   header line (``#!/...``) that is fully working (including its library
-   dependencies). And before tackling scripts, better first test a simple
-   non-script binary such as ``/bin/sh`` and confirm its successful execution.
-   To find out more, add code ``to init/main.c`` to display kernel_execve()s
-   return values.
+
+4) **Binary exists but dependencies not available**: E.g. required library
+   dependencies of the init binary such as ``/lib/ld-linux.so.2`` missing or
+   broken. Use ``readelf -d <INIT>|grep NEEDED`` to find out which libraries
+   are required.
+
+5) **Binary cannot be loaded**: Make sure the binary's architecture matches
+   your hardware. E.g. i386 vs. x86_64 mismatch, or trying to load x86 on ARM
+   hardware. In case you tried loading a non-binary file here (shell script?),
+   you should make sure that the script specifies an interpreter in its
+   shebang header line (``#!/...``) that is fully working (including its
+   library dependencies). And before tackling scripts, better first test a
+   simple non-script binary such as ``/bin/sh`` and confirm its successful
+   execution. To find out more, add code ``to init/main.c`` to display
+   kernel_execve()s return values.
 
 Please extend this explanation whenever you find new failure causes
 (after all loading the init binary is a CRITICAL and hard transition step
-which needs to be made as painless as possible), then submit patch to LKML.
+which needs to be made as painless as possible), then submit a patch to LKML.
 Further TODOs:
 
 - Implement the various ``run_init_process()`` invocations via a struct array
   which can then store the ``kernel_execve()`` result value and on failure
   log it all by iterating over **all** results (very important usability fix).
-- try to make the implementation itself more helpful in general,
-  e.g. by providing additional error messages at affected places.
+- Try to make the implementation itself more helpful in general, e.g. by
+  providing additional error messages at affected places.
 
-Andreas Mohr <andi at lisas period de>
diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index 007a6b86e0ee..e4ee8b2db604 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -393,6 +393,12 @@ KERNELOFFSET
 The kernel randomization offset. Used to compute the page offset. If
 KASLR is disabled, this value is zero.
 
+KERNELPACMASK
+-------------
+
+The mask to extract the Pointer Authentication Code from a kernel virtual
+address.
+
 arm
 ===
 
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a827ec820f73..a76d83ed5262 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1190,6 +1190,11 @@
 			This is designed to be used in conjunction with
 			the boot argument: earlyprintk=vga
 
+			This parameter works in place of the kgdboc parameter
+			but can only be used if the backing tty is available
+			very early in the boot process. For early debugging
+			via a serial port see kgdboc_earlycon instead.
+
 	edd=		[EDD]
 			Format: {"off" | "on" | "skip[mbr]"}
 
@@ -1748,6 +1753,13 @@
 
 	initrd=		[BOOT] Specify the location of the initial ramdisk
 
+	initrdmem=	[KNL] Specify a physical address and size from which to
+			load the initrd. If an initrd is compiled in or
+			specified in the bootparams, it takes priority over this
+			setting.
+			Format: ss[KMG],nn[KMG]
+			Default is 0, 0
+
 	init_on_alloc=	[MM] Fill newly allocated pages and heap objects with
 			zeroes.
 			Format: 0 | 1
@@ -2105,6 +2117,21 @@
 			 kms, kbd format: kms,kbd
 			 kms, kbd and serial format: kms,kbd,<ser_dev>[,baud]
 
+	kgdboc_earlycon=	[KGDB,HW]
+			If the boot console provides the ability to read
+			characters and can work in polling mode, you can use
+			this parameter to tell kgdb to use it as a backend
+			until the normal console is registered. Intended to
+			be used together with the kgdboc parameter which
+			specifies the normal console to transition to.
+
+			The name of the early console should be specified
+			as the value of this parameter. Note that the name of
+			the early console might be different than the tty
+			name passed to kgdboc. It's OK to leave the value
+			blank and the first boot console that implements
+			read() will be picked.
+
 	kgdbwait	[KGDB] Stop kernel execution and enter the
 			kernel debugger at the earliest opportunity.
 
@@ -3329,7 +3356,7 @@
 			See Documentation/admin-guide/sysctl/vm.rst for details.
 
 	ohci1394_dma=early	[HW] enable debugging via the ohci1394 driver.
-			See Documentation/debugging-via-ohci1394.txt for more
+			See Documentation/core-api/debugging-via-ohci1394.rst for more
 			info.
 
 	olpc_ec_timeout= [OLPC] ms delay when issuing EC commands
@@ -4210,12 +4237,24 @@
 			Duration of CPU stall (s) to test RCU CPU stall
 			warnings, zero to disable.
 
+	rcutorture.stall_cpu_block= [KNL]
+			Sleep while stalling if set.  This will result
+			in warnings from preemptible RCU in addition
+			to any other stall-related activity.
+
 	rcutorture.stall_cpu_holdoff= [KNL]
 			Time to wait (s) after boot before inducing stall.
 
 	rcutorture.stall_cpu_irqsoff= [KNL]
 			Disable interrupts while stalling if set.
 
+	rcutorture.stall_gp_kthread= [KNL]
+			Duration (s) of forced sleep within RCU
+			grace-period kthread to test RCU CPU stall
+			warnings, zero to disable.  If both stall_cpu
+			and stall_gp_kthread are specified, the
+			kthread is starved first, then the CPU.
+
 	rcutorture.stat_interval= [KNL]
 			Time (s) between statistics printk()s.
 
@@ -4286,6 +4325,13 @@
 			only normal grace-period primitives.  No effect
 			on CONFIG_TINY_RCU kernels.
 
+	rcupdate.rcu_task_ipi_delay= [KNL]
+			Set time in jiffies during which RCU tasks will
+			avoid sending IPIs, starting with the beginning
+			of a given grace period.  Setting a large
+			number avoids disturbing real-time workloads,
+			but lengthens grace periods.
+
 	rcupdate.rcu_task_stall_timeout= [KNL]
 			Set timeout in jiffies for RCU task stall warning
 			messages.  Disable with a value less than or equal
@@ -5067,6 +5113,12 @@
 			interruptions from clocksource watchdog are not
 			acceptable).
 
+	tsc_early_khz=  [X86] Skip early TSC calibration and use the given
+			value instead. Useful when the early TSC frequency discovery
+			procedure is not reliable, such as on overclocked systems
+			with CPUID.16h support and partial CPUID.15h support.
+			Format: <unsigned int>
+
 	tsx=		[X86] Control Transactional Synchronization
 			Extensions (TSX) feature in Intel processors that
 			support TSX control.
diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
index 21818aca4708..dc36aeb65d0a 100644
--- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@@ -10,7 +10,7 @@ them to a "housekeeping" CPU dedicated to such work.
 References
 ==========
 
--	Documentation/IRQ-affinity.txt:  Binding interrupts to sets of CPUs.
+-	Documentation/core-api/irq/irq-affinity.rst:  Binding interrupts to sets of CPUs.
 
 -	Documentation/admin-guide/cgroup-v1:  Using cgroups to bind tasks to sets of CPUs.
 
diff --git a/Documentation/admin-guide/mm/userfaultfd.rst b/Documentation/admin-guide/mm/userfaultfd.rst
index c30176e67900..0bf49d7313ad 100644
--- a/Documentation/admin-guide/mm/userfaultfd.rst
+++ b/Documentation/admin-guide/mm/userfaultfd.rst
@@ -12,107 +12,107 @@ and more generally they allow userland to take control of various
 memory page faults, something otherwise only the kernel code could do.
 
 For example userfaults allows a proper and more optimal implementation
-of the PROT_NONE+SIGSEGV trick.
+of the ``PROT_NONE+SIGSEGV`` trick.
 
 Design
 ======
 
-Userfaults are delivered and resolved through the userfaultfd syscall.
+Userfaults are delivered and resolved through the ``userfaultfd`` syscall.
 
-The userfaultfd (aside from registering and unregistering virtual
+The ``userfaultfd`` (aside from registering and unregistering virtual
 memory ranges) provides two primary functionalities:
 
-1) read/POLLIN protocol to notify a userland thread of the faults
+1) ``read/POLLIN`` protocol to notify a userland thread of the faults
    happening
 
-2) various UFFDIO_* ioctls that can manage the virtual memory regions
-   registered in the userfaultfd that allows userland to efficiently
+2) various ``UFFDIO_*`` ioctls that can manage the virtual memory regions
+   registered in the ``userfaultfd`` that allows userland to efficiently
    resolve the userfaults it receives via 1) or to manage the virtual
    memory in the background
 
 The real advantage of userfaults if compared to regular virtual memory
 management of mremap/mprotect is that the userfaults in all their
 operations never involve heavyweight structures like vmas (in fact the
-userfaultfd runtime load never takes the mmap_sem for writing).
+``userfaultfd`` runtime load never takes the mmap_sem for writing).
 
 Vmas are not suitable for page- (or hugepage) granular fault tracking
 when dealing with virtual address spaces that could span
 Terabytes. Too many vmas would be needed for that.
 
-The userfaultfd once opened by invoking the syscall, can also be
+The ``userfaultfd`` once opened by invoking the syscall, can also be
 passed using unix domain sockets to a manager process, so the same
 manager process could handle the userfaults of a multitude of
 different processes without them being aware about what is going on
-(well of course unless they later try to use the userfaultfd
+(well of course unless they later try to use the ``userfaultfd``
 themselves on the same region the manager is already tracking, which
-is a corner case that would currently return -EBUSY).
+is a corner case that would currently return ``-EBUSY``).
 
 API
 ===
 
-When first opened the userfaultfd must be enabled invoking the
-UFFDIO_API ioctl specifying a uffdio_api.api value set to UFFD_API (or
-a later API version) which will specify the read/POLLIN protocol
-userland intends to speak on the UFFD and the uffdio_api.features
-userland requires. The UFFDIO_API ioctl if successful (i.e. if the
-requested uffdio_api.api is spoken also by the running kernel and the
+When first opened the ``userfaultfd`` must be enabled invoking the
+``UFFDIO_API`` ioctl specifying a ``uffdio_api.api`` value set to ``UFFD_API`` (or
+a later API version) which will specify the ``read/POLLIN`` protocol
+userland intends to speak on the ``UFFD`` and the ``uffdio_api.features``
+userland requires. The ``UFFDIO_API`` ioctl if successful (i.e. if the
+requested ``uffdio_api.api`` is spoken also by the running kernel and the
 requested features are going to be enabled) will return into
-uffdio_api.features and uffdio_api.ioctls two 64bit bitmasks of
+``uffdio_api.features`` and ``uffdio_api.ioctls`` two 64bit bitmasks of
 respectively all the available features of the read(2) protocol and
 the generic ioctl available.
 
-The uffdio_api.features bitmask returned by the UFFDIO_API ioctl
-defines what memory types are supported by the userfaultfd and what
+The ``uffdio_api.features`` bitmask returned by the ``UFFDIO_API`` ioctl
+defines what memory types are supported by the ``userfaultfd`` and what
 events, except page fault notifications, may be generated.
 
-If the kernel supports registering userfaultfd ranges on hugetlbfs
-virtual memory areas, UFFD_FEATURE_MISSING_HUGETLBFS will be set in
-uffdio_api.features. Similarly, UFFD_FEATURE_MISSING_SHMEM will be
-set if the kernel supports registering userfaultfd ranges on shared
-memory (covering all shmem APIs, i.e. tmpfs, IPCSHM, /dev/zero
-MAP_SHARED, memfd_create, etc).
+If the kernel supports registering ``userfaultfd`` ranges on hugetlbfs
+virtual memory areas, ``UFFD_FEATURE_MISSING_HUGETLBFS`` will be set in
+``uffdio_api.features``. Similarly, ``UFFD_FEATURE_MISSING_SHMEM`` will be
+set if the kernel supports registering ``userfaultfd`` ranges on shared
+memory (covering all shmem APIs, i.e. tmpfs, ``IPCSHM``, ``/dev/zero``,
+``MAP_SHARED``, ``memfd_create``, etc).
 
-The userland application that wants to use userfaultfd with hugetlbfs
+The userland application that wants to use ``userfaultfd`` with hugetlbfs
 or shared memory need to set the corresponding flag in
-uffdio_api.features to enable those features.
+``uffdio_api.features`` to enable those features.
 
 If the userland desires to receive notifications for events other than
-page faults, it has to verify that uffdio_api.features has appropriate
-UFFD_FEATURE_EVENT_* bits set. These events are described in more
-detail below in "Non-cooperative userfaultfd" section.
-
-Once the userfaultfd has been enabled the UFFDIO_REGISTER ioctl should
-be invoked (if present in the returned uffdio_api.ioctls bitmask) to
-register a memory range in the userfaultfd by setting the
-uffdio_register structure accordingly. The uffdio_register.mode
+page faults, it has to verify that ``uffdio_api.features`` has appropriate
+``UFFD_FEATURE_EVENT_*`` bits set. These events are described in more
+detail below in `Non-cooperative userfaultfd`_ section.
+
+Once the ``userfaultfd`` has been enabled the ``UFFDIO_REGISTER`` ioctl should
+be invoked (if present in the returned ``uffdio_api.ioctls`` bitmask) to
+register a memory range in the ``userfaultfd`` by setting the
+uffdio_register structure accordingly. The ``uffdio_register.mode``
 bitmask will specify to the kernel which kind of faults to track for
-the range (UFFDIO_REGISTER_MODE_MISSING would track missing
-pages). The UFFDIO_REGISTER ioctl will return the
-uffdio_register.ioctls bitmask of ioctls that are suitable to resolve
+the range (``UFFDIO_REGISTER_MODE_MISSING`` would track missing
+pages). The ``UFFDIO_REGISTER`` ioctl will return the
+``uffdio_register.ioctls`` bitmask of ioctls that are suitable to resolve
 userfaults on the range registered. Not all ioctls will necessarily be
 supported for all memory types depending on the underlying virtual
 memory backend (anonymous memory vs tmpfs vs real filebacked
 mappings).
 
-Userland can use the uffdio_register.ioctls to manage the virtual
+Userland can use the ``uffdio_register.ioctls`` to manage the virtual
 address space in the background (to add or potentially also remove
-memory from the userfaultfd registered range). This means a userfault
+memory from the ``userfaultfd`` registered range). This means a userfault
 could be triggering just before userland maps in the background the
 user-faulted page.
 
-The primary ioctl to resolve userfaults is UFFDIO_COPY. That
+The primary ioctl to resolve userfaults is ``UFFDIO_COPY``. That
 atomically copies a page into the userfault registered range and wakes
-up the blocked userfaults (unless uffdio_copy.mode &
-UFFDIO_COPY_MODE_DONTWAKE is set). Other ioctl works similarly to
-UFFDIO_COPY. They're atomic as in guaranteeing that nothing can see an
-half copied page since it'll keep userfaulting until the copy has
-finished.
+up the blocked userfaults
+(unless ``uffdio_copy.mode & UFFDIO_COPY_MODE_DONTWAKE`` is set).
+Other ioctl works similarly to ``UFFDIO_COPY``. They're atomic as in
+guaranteeing that nothing can see an half copied page since it'll
+keep userfaulting until the copy has finished.
 
 Notes:
 
-- If you requested UFFDIO_REGISTER_MODE_MISSING when registering then
+- If you requested ``UFFDIO_REGISTER_MODE_MISSING`` when registering then
   you must provide some kind of page in your thread after reading from
-  the uffd.  You must provide either UFFDIO_COPY or UFFDIO_ZEROPAGE.
+  the uffd.  You must provide either ``UFFDIO_COPY`` or ``UFFDIO_ZEROPAGE``.
   The normal behavior of the OS automatically providing a zero page on
   an annonymous mmaping is not in place.
 
@@ -122,13 +122,13 @@ Notes:
 
 - You get the address of the access that triggered the missing page
   event out of a struct uffd_msg that you read in the thread from the
-  uffd.  You can supply as many pages as you want with UFFDIO_COPY or
-  UFFDIO_ZEROPAGE.  Keep in mind that unless you used DONTWAKE then
+  uffd.  You can supply as many pages as you want with ``UFFDIO_COPY`` or
+  ``UFFDIO_ZEROPAGE``.  Keep in mind that unless you used DONTWAKE then
   the first of any of those IOCTLs wakes up the faulting thread.
 
-- Be sure to test for all errors including (pollfd[0].revents &
-  POLLERR).  This can happen, e.g. when ranges supplied were
-  incorrect.
+- Be sure to test for all errors including
+  (``pollfd[0].revents & POLLERR``).  This can happen, e.g. when ranges
+  supplied were incorrect.
 
 Write Protect Notifications
 ---------------------------
@@ -136,41 +136,42 @@ Write Protect Notifications
 This is equivalent to (but faster than) using mprotect and a SIGSEGV
 signal handler.
 
-Firstly you need to register a range with UFFDIO_REGISTER_MODE_WP.
-Instead of using mprotect(2) you use ioctl(uffd, UFFDIO_WRITEPROTECT,
-struct *uffdio_writeprotect) while mode = UFFDIO_WRITEPROTECT_MODE_WP
+Firstly you need to register a range with ``UFFDIO_REGISTER_MODE_WP``.
+Instead of using mprotect(2) you use
+``ioctl(uffd, UFFDIO_WRITEPROTECT, struct *uffdio_writeprotect)``
+while ``mode = UFFDIO_WRITEPROTECT_MODE_WP``
 in the struct passed in.  The range does not default to and does not
 have to be identical to the range you registered with.  You can write
 protect as many ranges as you like (inside the registered range).
 Then, in the thread reading from uffd the struct will have
-msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP set. Now you send
-ioctl(uffd, UFFDIO_WRITEPROTECT, struct *uffdio_writeprotect) again
-while pagefault.mode does not have UFFDIO_WRITEPROTECT_MODE_WP set.
-This wakes up the thread which will continue to run with writes. This
+``msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP`` set. Now you send
+``ioctl(uffd, UFFDIO_WRITEPROTECT, struct *uffdio_writeprotect)``
+again while ``pagefault.mode`` does not have ``UFFDIO_WRITEPROTECT_MODE_WP``
+set. This wakes up the thread which will continue to run with writes. This
 allows you to do the bookkeeping about the write in the uffd reading
 thread before the ioctl.
 
-If you registered with both UFFDIO_REGISTER_MODE_MISSING and
-UFFDIO_REGISTER_MODE_WP then you need to think about the sequence in
+If you registered with both ``UFFDIO_REGISTER_MODE_MISSING`` and
+``UFFDIO_REGISTER_MODE_WP`` then you need to think about the sequence in
 which you supply a page and undo write protect.  Note that there is a
 difference between writes into a WP area and into a !WP area.  The
-former will have UFFD_PAGEFAULT_FLAG_WP set, the latter
-UFFD_PAGEFAULT_FLAG_WRITE.  The latter did not fail on protection but
-you still need to supply a page when UFFDIO_REGISTER_MODE_MISSING was
+former will have ``UFFD_PAGEFAULT_FLAG_WP`` set, the latter
+``UFFD_PAGEFAULT_FLAG_WRITE``.  The latter did not fail on protection but
+you still need to supply a page when ``UFFDIO_REGISTER_MODE_MISSING`` was
 used.
 
 QEMU/KVM
 ========
 
-QEMU/KVM is using the userfaultfd syscall to implement postcopy live
+QEMU/KVM is using the ``userfaultfd`` syscall to implement postcopy live
 migration. Postcopy live migration is one form of memory
 externalization consisting of a virtual machine running with part or
 all of its memory residing on a different node in the cloud. The
-userfaultfd abstraction is generic enough that not a single line of
+``userfaultfd`` abstraction is generic enough that not a single line of
 KVM kernel code had to be modified in order to add postcopy live
 migration to QEMU.
 
-Guest async page faults, FOLL_NOWAIT and all other GUP features work
+Guest async page faults, ``FOLL_NOWAIT`` and all other ``GUP*`` features work
 just fine in combination with userfaults. Userfaults trigger async
 page faults in the guest scheduler so those guest processes that
 aren't waiting for userfaults (i.e. network bound) can keep running in
@@ -183,19 +184,19 @@ generating userfaults for readonly guest regions.
 The implementation of postcopy live migration currently uses one
 single bidirectional socket but in the future two different sockets
 will be used (to reduce the latency of the userfaults to the minimum
-possible without having to decrease /proc/sys/net/ipv4/tcp_wmem).
+possible without having to decrease ``/proc/sys/net/ipv4/tcp_wmem``).
 
 The QEMU in the source node writes all pages that it knows are missing
 in the destination node, into the socket, and the migration thread of
-the QEMU running in the destination node runs UFFDIO_COPY|ZEROPAGE
-ioctls on the userfaultfd in order to map the received pages into the
-guest (UFFDIO_ZEROCOPY is used if the source page was a zero page).
+the QEMU running in the destination node runs ``UFFDIO_COPY|ZEROPAGE``
+ioctls on the ``userfaultfd`` in order to map the received pages into the
+guest (``UFFDIO_ZEROCOPY`` is used if the source page was a zero page).
 
 A different postcopy thread in the destination node listens with
-poll() to the userfaultfd in parallel. When a POLLIN event is
+poll() to the ``userfaultfd`` in parallel. When a ``POLLIN`` event is
 generated after a userfault triggers, the postcopy thread read() from
-the userfaultfd and receives the fault address (or -EAGAIN in case the
-userfault was already resolved and waken by a UFFDIO_COPY|ZEROPAGE run
+the ``userfaultfd`` and receives the fault address (or ``-EAGAIN`` in case the
+userfault was already resolved and waken by a ``UFFDIO_COPY|ZEROPAGE`` run
 by the parallel QEMU migration thread).
 
 After the QEMU postcopy thread (running in the destination node) gets
@@ -206,7 +207,7 @@ remaining missing pages from that new page offset. Soon after that
 (just the time to flush the tcp_wmem queue through the network) the
 migration thread in the QEMU running in the destination node will
 receive the page that triggered the userfault and it'll map it as
-usual with the UFFDIO_COPY|ZEROPAGE (without actually knowing if it
+usual with the ``UFFDIO_COPY|ZEROPAGE`` (without actually knowing if it
 was spontaneously sent by the source or if it was an urgent page
 requested through a userfault).
 
@@ -219,74 +220,74 @@ checked to find which missing pages to send in round robin and we seek
 over it when receiving incoming userfaults. After sending each page of
 course the bitmap is updated accordingly. It's also useful to avoid
 sending the same page twice (in case the userfault is read by the
-postcopy thread just before UFFDIO_COPY|ZEROPAGE runs in the migration
+postcopy thread just before ``UFFDIO_COPY|ZEROPAGE`` runs in the migration
 thread).
 
 Non-cooperative userfaultfd
 ===========================
 
-When the userfaultfd is monitored by an external manager, the manager
+When the ``userfaultfd`` is monitored by an external manager, the manager
 must be able to track changes in the process virtual memory
 layout. Userfaultfd can notify the manager about such changes using
 the same read(2) protocol as for the page fault notifications. The
 manager has to explicitly enable these events by setting appropriate
-bits in uffdio_api.features passed to UFFDIO_API ioctl:
+bits in ``uffdio_api.features`` passed to ``UFFDIO_API`` ioctl:
 
-UFFD_FEATURE_EVENT_FORK
-	enable userfaultfd hooks for fork(). When this feature is
-	enabled, the userfaultfd context of the parent process is
+``UFFD_FEATURE_EVENT_FORK``
+	enable ``userfaultfd`` hooks for fork(). When this feature is
+	enabled, the ``userfaultfd`` context of the parent process is
 	duplicated into the newly created process. The manager
-	receives UFFD_EVENT_FORK with file descriptor of the new
-	userfaultfd context in the uffd_msg.fork.
+	receives ``UFFD_EVENT_FORK`` with file descriptor of the new
+	``userfaultfd`` context in the ``uffd_msg.fork``.
 
-UFFD_FEATURE_EVENT_REMAP
+``UFFD_FEATURE_EVENT_REMAP``
 	enable notifications about mremap() calls. When the
 	non-cooperative process moves a virtual memory area to a
 	different location, the manager will receive
-	UFFD_EVENT_REMAP. The uffd_msg.remap will contain the old and
+	``UFFD_EVENT_REMAP``. The ``uffd_msg.remap`` will contain the old and
 	new addresses of the area and its original length.
 
-UFFD_FEATURE_EVENT_REMOVE
+``UFFD_FEATURE_EVENT_REMOVE``
 	enable notifications about madvise(MADV_REMOVE) and
-	madvise(MADV_DONTNEED) calls. The event UFFD_EVENT_REMOVE will
-	be generated upon these calls to madvise. The uffd_msg.remove
+	madvise(MADV_DONTNEED) calls. The event ``UFFD_EVENT_REMOVE`` will
+	be generated upon these calls to madvise(). The ``uffd_msg.remove``
 	will contain start and end addresses of the removed area.
 
-UFFD_FEATURE_EVENT_UNMAP
+``UFFD_FEATURE_EVENT_UNMAP``
 	enable notifications about memory unmapping. The manager will
-	get UFFD_EVENT_UNMAP with uffd_msg.remove containing start and
+	get ``UFFD_EVENT_UNMAP`` with ``uffd_msg.remove`` containing start and
 	end addresses of the unmapped area.
 
-Although the UFFD_FEATURE_EVENT_REMOVE and UFFD_FEATURE_EVENT_UNMAP
+Although the ``UFFD_FEATURE_EVENT_REMOVE`` and ``UFFD_FEATURE_EVENT_UNMAP``
 are pretty similar, they quite differ in the action expected from the
-userfaultfd manager. In the former case, the virtual memory is
+``userfaultfd`` manager. In the former case, the virtual memory is
 removed, but the area is not, the area remains monitored by the
-userfaultfd, and if a page fault occurs in that area it will be
+``userfaultfd``, and if a page fault occurs in that area it will be
 delivered to the manager. The proper resolution for such page fault is
 to zeromap the faulting address. However, in the latter case, when an
 area is unmapped, either explicitly (with munmap() system call), or
 implicitly (e.g. during mremap()), the area is removed and in turn the
-userfaultfd context for such area disappears too and the manager will
+``userfaultfd`` context for such area disappears too and the manager will
 not get further userland page faults from the removed area. Still, the
 notification is required in order to prevent manager from using
-UFFDIO_COPY on the unmapped area.
+``UFFDIO_COPY`` on the unmapped area.
 
 Unlike userland page faults which have to be synchronous and require
 explicit or implicit wakeup, all the events are delivered
 asynchronously and the non-cooperative process resumes execution as
-soon as manager executes read(). The userfaultfd manager should
-carefully synchronize calls to UFFDIO_COPY with the events
-processing. To aid the synchronization, the UFFDIO_COPY ioctl will
-return -ENOSPC when the monitored process exits at the time of
-UFFDIO_COPY, and -ENOENT, when the non-cooperative process has changed
-its virtual memory layout simultaneously with outstanding UFFDIO_COPY
+soon as manager executes read(). The ``userfaultfd`` manager should
+carefully synchronize calls to ``UFFDIO_COPY`` with the events
+processing. To aid the synchronization, the ``UFFDIO_COPY`` ioctl will
+return ``-ENOSPC`` when the monitored process exits at the time of
+``UFFDIO_COPY``, and ``-ENOENT``, when the non-cooperative process has changed
+its virtual memory layout simultaneously with outstanding ``UFFDIO_COPY``
 operation.
 
 The current asynchronous model of the event delivery is optimal for
-single threaded non-cooperative userfaultfd manager implementations. A
+single threaded non-cooperative ``userfaultfd`` manager implementations. A
 synchronous event delivery model can be added later as a new
-userfaultfd feature to facilitate multithreading enhancements of the
-non cooperative manager, for example to allow UFFDIO_COPY ioctls to
+``userfaultfd`` feature to facilitate multithreading enhancements of the
+non cooperative manager, for example to allow ``UFFDIO_COPY`` ioctls to
 run in parallel to the event reception. Single threaded
 implementations should continue to use the current async event
 delivery model instead.
diff --git a/Documentation/admin-guide/nfs/nfsroot.rst b/Documentation/admin-guide/nfs/nfsroot.rst
index 82a4fda057f9..c6772075c80c 100644
--- a/Documentation/admin-guide/nfs/nfsroot.rst
+++ b/Documentation/admin-guide/nfs/nfsroot.rst
@@ -18,7 +18,7 @@ Mounting the root filesystem via NFS (nfsroot)
 In order to use a diskless system, such as an X-terminal or printer server for
 example, it is necessary for the root filesystem to be present on a non-disk
 device. This may be an initramfs (see
-Documentation/filesystems/ramfs-rootfs-initramfs.txt), a ramdisk (see
+Documentation/filesystems/ramfs-rootfs-initramfs.rst), a ramdisk (see
 Documentation/admin-guide/initrd.rst) or a filesystem mounted via NFS. The
 following text describes on how to use NFS for the root filesystem. For the rest
 of this text 'client' means the diskless system, and 'server' means the NFS
diff --git a/Documentation/admin-guide/numastat.rst b/Documentation/admin-guide/numastat.rst
index aaf1667489f8..08ec2c2bdce3 100644
--- a/Documentation/admin-guide/numastat.rst
+++ b/Documentation/admin-guide/numastat.rst
@@ -6,6 +6,21 @@ Numa policy hit/miss statistics
 
 All units are pages. Hugepages have separate counters.
 
+The numa_hit, numa_miss and numa_foreign counters reflect how well processes
+are able to allocate memory from nodes they prefer. If they succeed, numa_hit
+is incremented on the preferred node, otherwise numa_foreign is incremented on
+the preferred node and numa_miss on the node where allocation succeeded.
+
+Usually preferred node is the one local to the CPU where the process executes,
+but restrictions such as mempolicies can change that, so there are also two
+counters based on CPU local node. local_node is similar to numa_hit and is
+incremented on allocation from a node by CPU on the same node. other_node is
+similar to numa_miss and is incremented on the node where allocation succeeds
+from a CPU from a different node. Note there is no counter analogical to
+numa_foreign.
+
+In more detail:
+
 =============== ============================================================
 numa_hit	A process wanted to allocate memory from this node,
 		and succeeded.
@@ -14,11 +29,13 @@ numa_miss	A process wanted to allocate memory from another node,
 		but ended up with memory from this node.
 
 numa_foreign	A process wanted to allocate on this node,
-		but ended up with memory from another one.
+		but ended up with memory from another node.
 
-local_node	A process ran on this node and got memory from it.
+local_node	A process ran on this node's CPU,
+		and got memory from this node.
 
-other_node	A process ran on this node and got memory from another node.
+other_node	A process ran on a different node's CPU
+		and got memory from this node.
 
 interleave_hit 	Interleaving wanted to allocate from this node
 		and succeeded.
@@ -28,3 +45,11 @@ For easier reading you can use the numastat utility from the numactl package
 (http://oss.sgi.com/projects/libnuma/). Note that it only works
 well right now on machines with a small number of CPUs.
 
+Note that on systems with memoryless nodes (where a node has CPUs but no
+memory) the numa_hit, numa_miss and numa_foreign statistics can be skewed
+heavily. In the current kernel implementation, if a process prefers a
+memoryless node (i.e.  because it is running on one of its local CPU), the
+implementation actually treats one of the nearest nodes with memory as the
+preferred node. As a result, such allocation will not increase the numa_foreign
+counter on the memoryless node, and will skew the numa_hit, numa_miss and
+numa_foreign statistics of the nearest node.
diff --git a/Documentation/admin-guide/perf-security.rst b/Documentation/admin-guide/perf-security.rst
index 72effa7c23b9..1307b5274a0f 100644
--- a/Documentation/admin-guide/perf-security.rst
+++ b/Documentation/admin-guide/perf-security.rst
@@ -1,6 +1,6 @@
 .. _perf_security:
 
-Perf Events and tool security
+Perf events and tool security
 =============================
 
 Overview
@@ -42,11 +42,11 @@ categories:
 Data that belong to the fourth category can potentially contain
 sensitive process data. If PMUs in some monitoring modes capture values
 of execution context registers or data from process memory then access
-to such monitoring capabilities requires to be ordered and secured
-properly. So, perf_events/Perf performance monitoring is the subject for
-security access control management [5]_ .
+to such monitoring modes requires to be ordered and secured properly.
+So, perf_events performance monitoring and observability operations are
+the subject for security access control management [5]_ .
 
-perf_events/Perf access control
+perf_events access control
 -------------------------------
 
 To perform security checks, the Linux implementation splits processes
@@ -66,11 +66,25 @@ into distinct units, known as capabilities [6]_ , which can be
 independently enabled and disabled on per-thread basis for processes and
 files of unprivileged users.
 
-Unprivileged processes with enabled CAP_SYS_ADMIN capability are treated
+Unprivileged processes with enabled CAP_PERFMON capability are treated
 as privileged processes with respect to perf_events performance
-monitoring and bypass *scope* permissions checks in the kernel.
-
-Unprivileged processes using perf_events system call API is also subject
+monitoring and observability operations, thus, bypass *scope* permissions
+checks in the kernel. CAP_PERFMON implements the principle of least
+privilege [13]_ (POSIX 1003.1e: 2.2.2.39) for performance monitoring and
+observability operations in the kernel and provides a secure approach to
+perfomance monitoring and observability in the system.
+
+For backward compatibility reasons the access to perf_events monitoring and
+observability operations is also open for CAP_SYS_ADMIN privileged
+processes but CAP_SYS_ADMIN usage for secure monitoring and observability
+use cases is discouraged with respect to the CAP_PERFMON capability.
+If system audit records [14]_ for a process using perf_events system call
+API contain denial records of acquiring both CAP_PERFMON and CAP_SYS_ADMIN
+capabilities then providing the process with CAP_PERFMON capability singly
+is recommended as the preferred secure approach to resolve double access
+denial logging related to usage of performance monitoring and observability.
+
+Unprivileged processes using perf_events system call are also subject
 for PTRACE_MODE_READ_REALCREDS ptrace access mode check [7]_ , whose
 outcome determines whether monitoring is permitted. So unprivileged
 processes provided with CAP_SYS_PTRACE capability are effectively
@@ -82,14 +96,14 @@ performance analysis of monitored processes or a system. For example,
 CAP_SYSLOG capability permits reading kernel space memory addresses from
 /proc/kallsyms file.
 
-perf_events/Perf privileged users
+Privileged Perf users groups
 ---------------------------------
 
 Mechanisms of capabilities, privileged capability-dumb files [6]_ and
-file system ACLs [10]_ can be used to create a dedicated group of
-perf_events/Perf privileged users who are permitted to execute
-performance monitoring without scope limits. The following steps can be
-taken to create such a group of privileged Perf users.
+file system ACLs [10]_ can be used to create dedicated groups of
+privileged Perf users who are permitted to execute performance monitoring
+and observability without scope limits. The following steps can be
+taken to create such groups of privileged Perf users.
 
 1. Create perf_users group of privileged Perf users, assign perf_users
    group to Perf tool executable and limit access to the executable for
@@ -108,30 +122,51 @@ taken to create such a group of privileged Perf users.
    -rwxr-x---  2 root perf_users  11M Oct 19 15:12 perf
 
 2. Assign the required capabilities to the Perf tool executable file and
-   enable members of perf_users group with performance monitoring
+   enable members of perf_users group with monitoring and observability
    privileges [6]_ :
 
 ::
 
-   # setcap "cap_sys_admin,cap_sys_ptrace,cap_syslog=ep" perf
-   # setcap -v "cap_sys_admin,cap_sys_ptrace,cap_syslog=ep" perf
+   # setcap "cap_perfmon,cap_sys_ptrace,cap_syslog=ep" perf
+   # setcap -v "cap_perfmon,cap_sys_ptrace,cap_syslog=ep" perf
    perf: OK
    # getcap perf
-   perf = cap_sys_ptrace,cap_sys_admin,cap_syslog+ep
+   perf = cap_sys_ptrace,cap_syslog,cap_perfmon+ep
+
+If the libcap installed doesn't yet support "cap_perfmon", use "38" instead,
+i.e.:
+
+::
+
+   # setcap "38,cap_ipc_lock,cap_sys_ptrace,cap_syslog=ep" perf
+
+Note that you may need to have 'cap_ipc_lock' in the mix for tools such as
+'perf top', alternatively use 'perf top -m N', to reduce the memory that
+it uses for the perf ring buffer, see the memory allocation section below.
+
+Using a libcap without support for CAP_PERFMON will make cap_get_flag(caps, 38,
+CAP_EFFECTIVE, &val) fail, which will lead the default event to be 'cycles:u',
+so as a workaround explicitly ask for the 'cycles' event, i.e.:
+
+::
+
+  # perf top -e cycles
+
+To get kernel and user samples with a perf binary with just CAP_PERFMON.
 
 As a result, members of perf_users group are capable of conducting
-performance monitoring by using functionality of the configured Perf
-tool executable that, when executes, passes perf_events subsystem scope
-checks.
+performance monitoring and observability by using functionality of the
+configured Perf tool executable that, when executes, passes perf_events
+subsystem scope checks.
 
 This specific access control management is only available to superuser
 or root running processes with CAP_SETPCAP, CAP_SETFCAP [6]_
 capabilities.
 
-perf_events/Perf unprivileged users
+Unprivileged users
 -----------------------------------
 
-perf_events/Perf *scope* and *access* control for unprivileged processes
+perf_events *scope* and *access* control for unprivileged processes
 is governed by perf_event_paranoid [2]_ setting:
 
 -1:
@@ -166,7 +201,7 @@ is governed by perf_event_paranoid [2]_ setting:
      perf_event_mlock_kb locking limit is imposed but ignored for
      unprivileged processes with CAP_IPC_LOCK capability.
 
-perf_events/Perf resource control
+Resource control
 ---------------------------------
 
 Open file descriptors
@@ -227,4 +262,5 @@ Bibliography
 .. [10] `<http://man7.org/linux/man-pages/man5/acl.5.html>`_
 .. [11] `<http://man7.org/linux/man-pages/man2/getrlimit.2.html>`_
 .. [12] `<http://man7.org/linux/man-pages/man5/limits.conf.5.html>`_
-
+.. [13] `<https://sites.google.com/site/fullycapable>`_
+.. [14] `<http://man7.org/linux/man-pages/man8/auditd.8.html>`_
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index 5605cc6f9560..a96a423e3779 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -159,17 +159,15 @@ governor uses that information depends on what algorithm is implemented by it
 and that is the primary reason for having more than one governor in the
 ``CPUIdle`` subsystem.
 
-There are three ``CPUIdle`` governors available, ``menu``, `TEO <teo-gov_>`_
-and ``ladder``.  Which of them is used by default depends on the configuration
-of the kernel and in particular on whether or not the scheduler tick can be
-`stopped by the idle loop <idle-cpus-and-tick_>`_.  It is possible to change the
-governor at run time if the ``cpuidle_sysfs_switch`` command line parameter has
-been passed to the kernel, but that is not safe in general, so it should not be
-done on production systems (that may change in the future, though).  The name of
-the ``CPUIdle`` governor currently used by the kernel can be read from the
-:file:`current_governor_ro` (or :file:`current_governor` if
-``cpuidle_sysfs_switch`` is present in the kernel command line) file under
-:file:`/sys/devices/system/cpu/cpuidle/` in ``sysfs``.
+There are four ``CPUIdle`` governors available, ``menu``, `TEO <teo-gov_>`_,
+``ladder`` and ``haltpoll``.  Which of them is used by default depends on the
+configuration of the kernel and in particular on whether or not the scheduler
+tick can be `stopped by the idle loop <idle-cpus-and-tick_>`_.  Available
+governors can be read from the :file:`available_governors`, and the governor
+can be changed at runtime.  The name of the ``CPUIdle`` governor currently
+used by the kernel can be read from the :file:`current_governor_ro` or
+:file:`current_governor` file under :file:`/sys/devices/system/cpu/cpuidle/`
+in ``sysfs``.
 
 Which ``CPUIdle`` driver is used, on the other hand, usually depends on the
 platform the kernel is running on, but there are platforms with more than one
diff --git a/Documentation/admin-guide/pm/intel-speed-select.rst b/Documentation/admin-guide/pm/intel-speed-select.rst
new file mode 100644
index 000000000000..b2ca601c21c6
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel-speed-select.rst
@@ -0,0 +1,917 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================================
+Intel(R) Speed Select Technology User Guide
+============================================================
+
+The Intel(R) Speed Select Technology (Intel(R) SST) provides a powerful new
+collection of features that give more granular control over CPU performance.
+With Intel(R) SST, one server can be configured for power and performance for a
+variety of diverse workload requirements.
+
+Refer to the links below for an overview of the technology:
+
+- https://www.intel.com/content/www/us/en/architecture-and-technology/speed-select-technology-article.html
+- https://builders.intel.com/docs/networkbuilders/intel-speed-select-technology-base-frequency-enhancing-performance.pdf
+
+These capabilities are further enhanced in some of the newer generations of
+server platforms where these features can be enumerated and controlled
+dynamically without pre-configuring via BIOS setup options. This dynamic
+configuration is done via mailbox commands to the hardware. One way to enumerate
+and configure these features is by using the Intel Speed Select utility.
+
+This document explains how to use the Intel Speed Select tool to enumerate and
+control Intel(R) SST features. This document gives example commands and explains
+how these commands change the power and performance profile of the system under
+test. Using this tool as an example, customers can replicate the messaging
+implemented in the tool in their production software.
+
+intel-speed-select configuration tool
+======================================
+
+Most Linux distribution packages may include the "intel-speed-select" tool. If not,
+it can be built by downloading the Linux kernel tree from kernel.org. Once
+downloaded, the tool can be built without building the full kernel.
+
+From the kernel tree, run the following commands::
+
+# cd tools/power/x86/intel-speed-select/
+# make
+# make install
+
+Getting Help
+------------
+
+To get help with the tool, execute the command below::
+
+# intel-speed-select --help
+
+The top-level help describes arguments and features. Notice that there is a
+multi-level help structure in the tool. For example, to get help for the feature "perf-profile"::
+
+# intel-speed-select perf-profile --help
+
+To get help on a command, another level of help is provided. For example for the command info "info"::
+
+# intel-speed-select perf-profile info --help
+
+Summary of platform capability
+------------------------------
+To check the current platform and driver capaibilities, execute::
+
+#intel-speed-select --info
+
+For example on a test system::
+
+ # intel-speed-select --info
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ Platform: API version : 1
+ Platform: Driver version : 1
+ Platform: mbox supported : 1
+ Platform: mmio supported : 1
+ Intel(R) SST-PP (feature perf-profile) is supported
+ TDP level change control is unlocked, max level: 4
+ Intel(R) SST-TF (feature turbo-freq) is supported
+ Intel(R) SST-BF (feature base-freq) is not supported
+ Intel(R) SST-CP (feature core-power) is supported
+
+Intel(R) Speed Select Technology - Performance Profile (Intel(R) SST-PP)
+------------------------------------------------------------------------
+
+This feature allows configuration of a server dynamically based on workload
+performance requirements. This helps users during deployment as they do not have
+to choose a specific server configuration statically.  This Intel(R) Speed Select
+Technology - Performance Profile (Intel(R) SST-PP) feature introduces a mechanism
+that allows multiple optimized performance profiles per system. Each profile
+defines a set of CPUs that need to be online and rest offline to sustain a
+guaranteed base frequency. Once the user issues a command to use a specific
+performance profile and meet CPU online/offline requirement, the user can expect
+a change in the base frequency dynamically. This feature is called
+"perf-profile" when using the Intel Speed Select tool.
+
+Number or performance levels
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There can be multiple performance profiles on a system. To get the number of
+profiles, execute the command below::
+
+ # intel-speed-select perf-profile get-config-levels
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+        get-config-levels:4
+ package-1
+  die-0
+    cpu-14
+        get-config-levels:4
+
+On this system under test, there are 4 performance profiles in addition to the
+base performance profile (which is performance level 0).
+
+Lock/Unlock status
+~~~~~~~~~~~~~~~~~~
+
+Even if there are multiple performance profiles, it is possible that that they
+are locked. If they are locked, users cannot issue a command to change the
+performance state. It is possible that there is a BIOS setup to unlock or check
+with your system vendor.
+
+To check if the system is locked, execute the following command::
+
+ # intel-speed-select perf-profile get-lock-status
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+        get-lock-status:0
+ package-1
+  die-0
+    cpu-14
+        get-lock-status:0
+
+In this case, lock status is 0, which means that the system is unlocked.
+
+Properties of a performance level
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get properties of a specific performance level (For example for the level 0, below), execute the command below::
+
+ # intel-speed-select perf-profile info -l 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      perf-profile-level-0
+        cpu-count:28
+        enable-cpu-mask:000003ff,f0003fff
+        enable-cpu-list:0,1,2,3,4,5,6,7,8,9,10,11,12,13,28,29,30,31,32,33,34,35,36,37,38,39,40,41
+        thermal-design-power-ratio:26
+        base-frequency(MHz):2600
+        speed-select-turbo-freq:disabled
+        speed-select-base-freq:disabled
+	...
+	...
+
+Here -l option is used to specify a performance level.
+
+If the option -l is omitted, then this command will print information about all
+the performance levels. The above command is printing properties of the
+performance level 0.
+
+For this performance profile, the list of CPUs displayed by the
+"enable-cpu-mask/enable-cpu-list" at the max can be "online." When that
+condition is met, then base frequency of 2600 MHz can be maintained. To
+understand more, execute "intel-speed-select perf-profile info" for performance
+level 4::
+
+ # intel-speed-select perf-profile info -l 4
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      perf-profile-level-4
+        cpu-count:28
+        enable-cpu-mask:000000fa,f0000faf
+        enable-cpu-list:0,1,2,3,5,7,8,9,10,11,28,29,30,31,33,35,36,37,38,39
+        thermal-design-power-ratio:28
+        base-frequency(MHz):2800
+        speed-select-turbo-freq:disabled
+        speed-select-base-freq:unsupported
+	...
+	...
+
+There are fewer CPUs in the "enable-cpu-mask/enable-cpu-list". Consequently, if
+the user only keeps these CPUs online and the rest "offline," then the base
+frequency is increased to 2.8 GHz compared to 2.6 GHz at performance level 0.
+
+Get current performance level
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get the current performance level, execute::
+
+ # intel-speed-select perf-profile get-config-current-level
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+        get-config-current_level:0
+
+First verify that the base_frequency displayed by the cpufreq sysfs is correct::
+
+ # cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency
+ 2600000
+
+This matches the base-frequency (MHz) field value displayed from the
+"perf-profile info" command for performance level 0(cpufreq frequency is in
+KHz).
+
+To check if the average frequency is equal to the base frequency for a 100% busy
+workload, disable turbo::
+
+# echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
+
+Then runs a busy workload on all CPUs, for example::
+
+#stress -c 64
+
+To verify the base frequency, run turbostat::
+
+ #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1
+
+  Package	Core	CPU	Bzy_MHz
+		-	-	2600
+  0		0	0	2600
+  0		1	1	2600
+  0		2	2	2600
+  0		3	3	2600
+  0		4	4	2600
+  .		.	.	.
+
+
+Changing performance level
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To the change the performance level to 4, execute::
+
+ # intel-speed-select -d perf-profile set-config-level -l 4 -o
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      perf-profile
+        set_tdp_level:success
+
+In the command above, "-o" is optional. If it is specified, then it will also
+offline CPUs which are not present in the enable_cpu_mask for this performance
+level.
+
+Now if the base_frequency is checked::
+
+ #cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency
+ 2800000
+
+Which shows that the base frequency now increased from 2600 MHz at performance
+level 0 to 2800 MHz at performance level 4. As a result, any workload, which can
+use fewer CPUs, can see a boost of 200 MHz compared to performance level 0.
+
+Check presence of other Intel(R) SST features
+---------------------------------------------
+
+Each of the performance profiles also specifies weather there is support of
+other two Intel(R) SST features (Intel(R) Speed Select Technology - Base Frequency
+(Intel(R) SST-BF) and Intel(R) Speed Select Technology - Turbo Frequency (Intel
+SST-TF)).
+
+For example, from the output of "perf-profile info" above, for level 0 and level
+4:
+
+For level 0::
+       speed-select-turbo-freq:disabled
+       speed-select-base-freq:disabled
+
+For level 4::
+       speed-select-turbo-freq:disabled
+       speed-select-base-freq:unsupported
+
+Given these results, the "speed-select-base-freq" (Intel(R) SST-BF) in level 4
+changed from "disabled" to "unsupported" compared to performance level 0.
+
+This means that at performance level 4, the "speed-select-base-freq" feature is
+not supported. However, at performance level 0, this feature is "supported", but
+currently "disabled", meaning the user has not activated this feature. Whereas
+"speed-select-turbo-freq" (Intel(R) SST-TF) is supported at both performance
+levels, but currently not activated by the user.
+
+The Intel(R) SST-BF and the Intel(R) SST-TF features are built on a foundation
+technology called Intel(R) Speed Select Technology - Core Power (Intel(R) SST-CP).
+The platform firmware enables this feature when Intel(R) SST-BF or Intel(R) SST-TF
+is supported on a platform.
+
+Intel(R) Speed Select Technology Core Power (Intel(R) SST-CP)
+---------------------------------------------------------------
+
+Intel(R) Speed Select Technology Core Power (Intel(R) SST-CP) is an interface that
+allows users to define per core priority. This defines a mechanism to distribute
+power among cores when there is a power constrained scenario. This defines a
+class of service (CLOS) configuration.
+
+The user can configure up to 4 class of service configurations. Each CLOS group
+configuration allows definitions of parameters, which affects how the frequency
+can be limited and power is distributed. Each CPU core can be tied to a class of
+service and hence an associated priority. The granularity is at core level not
+at per CPU level.
+
+Enable CLOS based prioritization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To use CLOS based prioritization feature, firmware must be informed to enable
+and use a priority type. There is a default per platform priority type, which
+can be changed with optional command line parameter.
+
+To enable and check the options, execute::
+
+ # intel-speed-select core-power enable --help
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ Enable core-power for a package/die
+	Clos Enable: Specify priority type with [--priority|-p]
+		 0: Proportional, 1: Ordered
+
+There are two types of priority types:
+
+- Ordered
+
+Priority for ordered throttling is defined based on the index of the assigned
+CLOS group. Where CLOS0 gets highest priority (throttled last).
+
+Priority order is:
+CLOS0 > CLOS1 > CLOS2 > CLOS3.
+
+- Proportional
+
+When proportional priority is used, there is an additional parameter called
+frequency_weight, which can be specified per CLOS group. The goal of
+proportional priority is to provide each core with the requested min., then
+distribute all remaining (excess/deficit) budgets in proportion to a defined
+weight. This proportional priority can be configured using "core-power config"
+command.
+
+To enable with the platform default priority type, execute::
+
+ # intel-speed-select core-power enable
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      core-power
+        enable:success
+ package-1
+  die-0
+    cpu-6
+      core-power
+        enable:success
+
+The scope of this enable is per package or die scoped when a package contains
+multiple dies. To check if CLOS is enabled and get priority type, "core-power
+info" command can be used. For example to check the status of core-power feature
+on CPU 0, execute::
+
+ # intel-speed-select -c 0 core-power info
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      core-power
+        support-status:supported
+        enable-status:enabled
+        clos-enable-status:enabled
+        priority-type:proportional
+ package-1
+  die-0
+    cpu-24
+      core-power
+        support-status:supported
+        enable-status:enabled
+        clos-enable-status:enabled
+        priority-type:proportional
+
+Configuring CLOS groups
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Each CLOS group has its own attributes including min, max, freq_weight and
+desired. These parameters can be configured with "core-power config" command.
+Defaults will be used if user skips setting a parameter except clos id, which is
+mandatory. To check core-power config options, execute::
+
+ # intel-speed-select core-power config --help
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ Set core-power configuration for one of the four clos ids
+	Specify targeted clos id with [--clos|-c]
+	Specify clos Proportional Priority [--weight|-w]
+	Specify clos min in MHz with [--min|-n]
+	Specify clos max in MHz with [--max|-m]
+
+For example::
+
+ # intel-speed-select core-power config -c 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ clos epp is not specified, default: 0
+ clos frequency weight is not specified, default: 0
+ clos min is not specified, default: 0 MHz
+ clos max is not specified, default: 25500 MHz
+ clos desired is not specified, default: 0
+ package-0
+  die-0
+    cpu-0
+      core-power
+        config:success
+ package-1
+  die-0
+    cpu-6
+      core-power
+        config:success
+
+The user has the option to change defaults. For example, the user can change the
+"min" and set the base frequency to always get guaranteed base frequency.
+
+Get the current CLOS configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To check the current configuration, "core-power get-config" can be used. For
+example, to get the configuration of CLOS 0::
+
+ # intel-speed-select core-power get-config -c 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      core-power
+        clos:0
+        epp:0
+        clos-proportional-priority:0
+        clos-min:0 MHz
+        clos-max:Max Turbo frequency
+        clos-desired:0 MHz
+ package-1
+  die-0
+    cpu-24
+      core-power
+        clos:0
+        epp:0
+        clos-proportional-priority:0
+        clos-min:0 MHz
+        clos-max:Max Turbo frequency
+        clos-desired:0 MHz
+
+Associating a CPU with a CLOS group
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To associate a CPU to a CLOS group "core-power assoc" command can be used::
+
+ # intel-speed-select core-power assoc --help
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ Associate a clos id to a CPU
+	Specify targeted clos id with [--clos|-c]
+
+
+For example to associate CPU 10 to CLOS group 3, execute::
+
+ # intel-speed-select -c 10 core-power assoc -c 3
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-10
+      core-power
+        assoc:success
+
+Once a CPU is associated, its sibling CPUs are also associated to a CLOS group.
+Once associated, avoid changing Linux "cpufreq" subsystem scaling frequency
+limits.
+
+To check the existing association for a CPU, "core-power get-assoc" command can
+be used. For example, to get association of CPU 10, execute::
+
+ # intel-speed-select -c 10 core-power get-assoc
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-1
+  die-0
+    cpu-10
+      get-assoc
+        clos:3
+
+This shows that CPU 10 is part of a CLOS group 3.
+
+
+Disable CLOS based prioritization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To disable, execute::
+
+# intel-speed-select core-power disable
+
+Some features like Intel(R) SST-TF can only be enabled when CLOS based prioritization
+is enabled. For this reason, disabling while Intel(R) SST-TF is enabled can cause
+Intel(R) SST-TF to fail. This will cause the "disable" command to display an error
+if Intel(R) SST-TF is already enabled. In turn, to disable, the Intel(R) SST-TF
+feature must be disabled first.
+
+Intel(R) Speed Select Technology - Base Frequency (Intel(R) SST-BF)
+-------------------------------------------------------------------
+
+The Intel(R) Speed Select Technology - Base Frequency (Intel(R) SST-BF) feature lets
+the user control base frequency. If some critical workload threads demand
+constant high guaranteed performance, then this feature can be used to execute
+the thread at higher base frequency on specific sets of CPUs (high priority
+CPUs) at the cost of lower base frequency (low priority CPUs) on other CPUs.
+This feature does not require offline of the low priority CPUs.
+
+The support of Intel(R) SST-BF depends on the Intel(R) Speed Select Technology -
+Performance Profile (Intel(R) SST-PP) performance level configuration. It is
+possible that only certain performance levels support Intel(R) SST-BF. It is also
+possible that only base performance level (level = 0) has support of Intel
+SST-BF. Consequently, first select the desired performance level to enable this
+feature.
+
+In the system under test here, Intel(R) SST-BF is supported at the base
+performance level 0, but currently disabled. For example for the level 0::
+
+ # intel-speed-select -c 0 perf-profile info -l 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      perf-profile-level-0
+        ...
+
+        speed-select-base-freq:disabled
+	...
+
+Before enabling Intel(R) SST-BF and measuring its impact on a workload
+performance, execute some workload and measure performance and get a baseline
+performance to compare against.
+
+Here the user wants more guaranteed performance. For this reason, it is likely
+that turbo is disabled. To disable turbo, execute::
+
+#echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
+
+Based on the output of the "intel-speed-select perf-profile info -l 0" base
+frequency of guaranteed frequency 2600 MHz.
+
+
+Measure baseline performance for comparison
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To compare, pick a multi-threaded workload where each thread can be scheduled on
+separate CPUs. "Hackbench pipe" test is a good example on how to improve
+performance using Intel(R) SST-BF.
+
+Below, the workload is measuring average scheduler wakeup latency, so a lower
+number means better performance::
+
+ # taskset -c 3,4 perf bench -r 100 sched pipe
+ # Running 'sched/pipe' benchmark:
+ # Executed 1000000 pipe operations between two processes
+     Total time: 6.102 [sec]
+       6.102445 usecs/op
+         163868 ops/sec
+
+While running the above test, if we take turbostat output, it will show us that
+2 of the CPUs are busy and reaching max. frequency (which would be the base
+frequency as the turbo is disabled). The turbostat output::
+
+ #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1
+ Package	Core	CPU	Bzy_MHz
+ 0		0	0	1000
+ 0		1	1	1005
+ 0		2	2	1000
+ 0		3	3	2600
+ 0		4	4	2600
+ 0		5	5	1000
+ 0		6	6	1000
+ 0		7	7	1005
+ 0		8	8	1005
+ 0		9	9	1000
+ 0		10	10	1000
+ 0		11	11	995
+ 0		12	12	1000
+ 0		13	13	1000
+
+From the above turbostat output, both CPU 3 and 4 are very busy and reaching
+full guaranteed frequency of 2600 MHz.
+
+Intel(R) SST-BF Capabilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get capabilities of Intel(R) SST-BF for the current performance level 0,
+execute::
+
+ # intel-speed-select base-freq info -l 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      speed-select-base-freq
+        high-priority-base-frequency(MHz):3000
+        high-priority-cpu-mask:00000216,00002160
+        high-priority-cpu-list:5,6,8,13,33,34,36,41
+        low-priority-base-frequency(MHz):2400
+        tjunction-temperature(C):125
+        thermal-design-power(W):205
+
+The above capabilities show that there are some CPUs on this system that can
+offer base frequency of 3000 MHz compared to the standard base frequency at this
+performance levels. Nevertheless, these CPUs are fixed, and they are presented
+via high-priority-cpu-list/high-priority-cpu-mask. But if this Intel(R) SST-BF
+feature is selected, the low priorities CPUs (which are not in
+high-priority-cpu-list) can only offer up to 2400 MHz. As a result, if this
+clipping of low priority CPUs is acceptable, then the user can enable Intel
+SST-BF feature particularly for the above "sched pipe" workload since only two
+CPUs are used, they can be scheduled on high priority CPUs and can get boost of
+400 MHz.
+
+Enable Intel(R) SST-BF
+~~~~~~~~~~~~~~~~~~~~~~
+
+To enable Intel(R) SST-BF feature, execute::
+
+ # intel-speed-select base-freq enable -a
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      base-freq
+        enable:success
+ package-1
+  die-0
+    cpu-14
+      base-freq
+        enable:success
+
+In this case, -a option is optional. This not only enables Intel(R) SST-BF, but it
+also adjusts the priority of cores using Intel(R) Speed Select Technology Core
+Power (Intel(R) SST-CP) features. This option sets the minimum performance of each
+Intel(R) Speed Select Technology - Performance Profile (Intel(R) SST-PP) class to
+maximum performance so that the hardware will give maximum performance possible
+for each CPU.
+
+If -a option is not used, then the following steps are required before enabling
+Intel(R) SST-BF:
+
+- Discover Intel(R) SST-BF and note low and high priority base frequency
+- Note the high prioity CPU list
+- Enable CLOS using core-power feature set
+- Configure CLOS parameters. Use CLOS.min to set to minimum performance
+- Subscribe desired CPUs to CLOS groups
+
+With this configuration, if the same workload is executed by pinning the
+workload to high priority CPUs (CPU 5 and 6 in this case)::
+
+ #taskset -c 5,6 perf bench -r 100 sched pipe
+ # Running 'sched/pipe' benchmark:
+ # Executed 1000000 pipe operations between two processes
+     Total time: 5.627 [sec]
+       5.627922 usecs/op
+         177685 ops/sec
+
+This way, by enabling Intel(R) SST-BF, the performance of this benchmark is
+improved (latency reduced) by 7.79%. From the turbostat output, it can be
+observed that the high priority CPUs reached 3000 MHz compared to 2600 MHz.
+The turbostat output::
+
+ #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1
+ Package	Core	CPU	Bzy_MHz
+ 0		0	0	2151
+ 0		1	1	2166
+ 0		2	2	2175
+ 0		3	3	2175
+ 0		4	4	2175
+ 0		5	5	3000
+ 0		6	6	3000
+ 0		7	7	2180
+ 0		8	8	2662
+ 0		9	9	2176
+ 0		10	10	2175
+ 0		11	11	2176
+ 0		12	12	2176
+ 0		13	13	2661
+
+Disable Intel(R) SST-BF
+~~~~~~~~~~~~~~~~~~~~~~~
+
+To disable the Intel(R) SST-BF feature, execute::
+
+# intel-speed-select base-freq disable -a
+
+
+Intel(R) Speed Select Technology - Turbo Frequency (Intel(R) SST-TF)
+--------------------------------------------------------------------
+
+This feature enables the ability to set different "All core turbo ratio limits"
+to cores based on the priority. By using this feature, some cores can be
+configured to get higher turbo frequency by designating them as high priority at
+the cost of lower or no turbo frequency on the low priority cores.
+
+For this reason, this feature is only useful when system is busy utilizing all
+CPUs, but the user wants some configurable option to get high performance on
+some CPUs.
+
+The support of Intel(R) Speed Select Technology - Turbo Frequency (Intel(R) SST-TF)
+depends on the Intel(R) Speed Select Technology - Performance Profile (Intel
+SST-PP) performance level configuration. It is possible that only a certain
+performance level supports Intel(R) SST-TF. It is also possible that only the base
+performance level (level = 0) has the support of Intel(R) SST-TF. Hence, first
+select the desired performance level to enable this feature.
+
+In the system under test here, Intel(R) SST-TF is supported at the base
+performance level 0, but currently disabled::
+
+ # intel-speed-select -c 0 perf-profile info -l 0
+ Intel(R) Speed Select Technology
+ package-0
+  die-0
+    cpu-0
+      perf-profile-level-0
+        ...
+        ...
+        speed-select-turbo-freq:disabled
+        ...
+        ...
+
+
+To check if performance can be improved using Intel(R) SST-TF feature, get the turbo
+frequency properties with Intel(R) SST-TF enabled and compare to the base turbo
+capability of this system.
+
+Get Base turbo capability
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get the base turbo capability of performance level 0, execute::
+
+ # intel-speed-select perf-profile info -l 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      perf-profile-level-0
+        ...
+        ...
+        turbo-ratio-limits-sse
+          bucket-0
+            core-count:2
+            max-turbo-frequency(MHz):3200
+          bucket-1
+            core-count:4
+            max-turbo-frequency(MHz):3100
+          bucket-2
+            core-count:6
+            max-turbo-frequency(MHz):3100
+          bucket-3
+            core-count:8
+            max-turbo-frequency(MHz):3100
+          bucket-4
+            core-count:10
+            max-turbo-frequency(MHz):3100
+          bucket-5
+            core-count:12
+            max-turbo-frequency(MHz):3100
+          bucket-6
+            core-count:14
+            max-turbo-frequency(MHz):3100
+          bucket-7
+            core-count:16
+            max-turbo-frequency(MHz):3100
+
+Based on the data above, when all the CPUS are busy, the max. frequency of 3100
+MHz can be achieved. If there is some busy workload on cpu 0 - 11 (e.g. stress)
+and on CPU 12 and 13, execute "hackbench pipe" workload::
+
+ # taskset -c 12,13 perf bench -r 100 sched pipe
+ # Running 'sched/pipe' benchmark:
+ # Executed 1000000 pipe operations between two processes
+     Total time: 5.705 [sec]
+       5.705488 usecs/op
+         175269 ops/sec
+
+The turbostat output::
+
+ #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1
+ Package	Core	CPU	Bzy_MHz
+ 0		0	0	3000
+ 0		1	1	3000
+ 0		2	2	3000
+ 0		3	3	3000
+ 0		4	4	3000
+ 0		5	5	3100
+ 0		6	6	3100
+ 0		7	7	3000
+ 0		8	8	3100
+ 0		9	9	3000
+ 0		10	10	3000
+ 0		11	11	3000
+ 0		12	12	3100
+ 0		13	13	3100
+
+Based on turbostat output, the performance is limited by frequency cap of 3100
+MHz. To check if the hackbench performance can be improved for CPU 12 and CPU
+13, first check the capability of the Intel(R) SST-TF feature for this performance
+level.
+
+Get Intel(R) SST-TF Capability
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get the capability, the "turbo-freq info" command can be used::
+
+ # intel-speed-select turbo-freq info -l 0
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-0
+      speed-select-turbo-freq
+          bucket-0
+            high-priority-cores-count:2
+            high-priority-max-frequency(MHz):3200
+            high-priority-max-avx2-frequency(MHz):3200
+            high-priority-max-avx512-frequency(MHz):3100
+          bucket-1
+            high-priority-cores-count:4
+            high-priority-max-frequency(MHz):3100
+            high-priority-max-avx2-frequency(MHz):3000
+            high-priority-max-avx512-frequency(MHz):2900
+          bucket-2
+            high-priority-cores-count:6
+            high-priority-max-frequency(MHz):3100
+            high-priority-max-avx2-frequency(MHz):3000
+            high-priority-max-avx512-frequency(MHz):2900
+          speed-select-turbo-freq-clip-frequencies
+            low-priority-max-frequency(MHz):2600
+            low-priority-max-avx2-frequency(MHz):2400
+            low-priority-max-avx512-frequency(MHz):2100
+
+Based on the output above, there is an Intel(R) SST-TF bucket for which there are
+two high priority cores. If only two high priority cores are set, then max.
+turbo frequency on those cores can be increased to 3200 MHz. This is 100 MHz
+more than the base turbo capability for all cores.
+
+In turn, for the hackbench workload, two CPUs can be set as high priority and
+rest as low priority. One side effect is that once enabled, the low priority
+cores will be clipped to a lower frequency of 2600 MHz.
+
+Enable Intel(R) SST-TF
+~~~~~~~~~~~~~~~~~~~~~~
+
+To enable Intel(R) SST-TF, execute::
+
+ # intel-speed-select -c 12,13 turbo-freq enable -a
+ Intel(R) Speed Select Technology
+ Executing on CPU model: X
+ package-0
+  die-0
+    cpu-12
+      turbo-freq
+        enable:success
+ package-0
+  die-0
+    cpu-13
+      turbo-freq
+        enable:success
+ package--1
+  die-0
+    cpu-63
+      turbo-freq --auto
+        enable:success
+
+In this case, the option "-a" is optional. If set, it enables Intel(R) SST-TF
+feature and also sets the CPUs to high and and low priority using Intel Speed
+Select Technology Core Power (Intel(R) SST-CP) features. The CPU numbers passed
+with "-c" arguments are marked as high priority, including its siblings.
+
+If -a option is not used, then the following steps are required before enabling
+Intel(R) SST-TF:
+
+- Discover Intel(R) SST-TF and note buckets of high priority cores and maximum frequency
+
+- Enable CLOS using core-power feature set - Configure CLOS parameters
+
+- Subscribe desired CPUs to CLOS groups making sure that high priority cores are set to the maximum frequency
+
+If the same hackbench workload is executed, schedule hackbench threads on high
+priority CPUs::
+
+ #taskset -c 12,13 perf bench -r 100 sched pipe
+ # Running 'sched/pipe' benchmark:
+ # Executed 1000000 pipe operations between two processes
+     Total time: 5.510 [sec]
+       5.510165 usecs/op
+         180826 ops/sec
+
+This improved performance by around 3.3% improvement on a busy system. Here the
+turbostat output will show that the CPU 12 and CPU 13 are getting 100 MHz boost.
+The turbostat output::
+
+ #turbostat -c 0-13 --show Package,Core,CPU,Bzy_MHz -i 1
+ Package	Core	CPU	Bzy_MHz
+ ...
+ 0		12	12	3200
+ 0		13	13	3200
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index ad392f3aee06..39d80bc29ccd 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -62,9 +62,10 @@ on the capabilities of the processor.
 Active Mode
 -----------
 
-This is the default operation mode of ``intel_pstate``.  If it works in this
-mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq``
-policies contains the string "intel_pstate".
+This is the default operation mode of ``intel_pstate`` for processors with
+hardware-managed P-states (HWP) support.  If it works in this mode, the
+``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq`` policies
+contains the string "intel_pstate".
 
 In this mode the driver bypasses the scaling governors layer of ``CPUFreq`` and
 provides its own scaling algorithms for P-state selection.  Those algorithms
@@ -138,12 +139,13 @@ internal P-state selection logic to be less performance-focused.
 Active Mode Without HWP
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-This is the default operation mode for processors that do not support the HWP
-feature.  It also is used by default with the ``intel_pstate=no_hwp`` argument
-in the kernel command line.  However, in this mode ``intel_pstate`` may refuse
-to work with the given processor if it does not recognize it.  [Note that
-``intel_pstate`` will never refuse to work with any processor with the HWP
-feature enabled.]
+This operation mode is optional for processors that do not support the HWP
+feature or when the ``intel_pstate=no_hwp`` argument is passed to the kernel in
+the command line.  The active mode is used in those cases if the
+``intel_pstate=active`` argument is passed to the kernel in the command line.
+In this mode ``intel_pstate`` may refuse to work with processors that are not
+recognized by it.  [Note that ``intel_pstate`` will never refuse to work with
+any processor with the HWP feature enabled.]
 
 In this mode ``intel_pstate`` registers utilization update callbacks with the
 CPU scheduler in order to run a P-state selection algorithm, either
@@ -188,10 +190,14 @@ is not set.
 Passive Mode
 ------------
 
-This mode is used if the ``intel_pstate=passive`` argument is passed to the
-kernel in the command line (it implies the ``intel_pstate=no_hwp`` setting too).
-Like in the active mode without HWP support, in this mode ``intel_pstate`` may
-refuse to work with the given processor if it does not recognize it.
+This is the default operation mode of ``intel_pstate`` for processors without
+hardware-managed P-states (HWP) support.  It is always used if the
+``intel_pstate=passive`` argument is passed to the kernel in the command line
+regardless of whether or not the given processor supports HWP.  [Note that the
+``intel_pstate=no_hwp`` setting implies ``intel_pstate=passive`` if it is used
+without ``intel_pstate=active``.]  Like in the active mode without HWP support,
+in this mode ``intel_pstate`` may refuse to work with processors that are not
+recognized by it.
 
 If the driver works in this mode, the ``scaling_driver`` policy attribute in
 ``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq".
diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst
index 0a38cdf39df1..f40994c422dc 100644
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@@ -13,3 +13,4 @@ Working-State Power Management
    intel_pstate
    cpufreq_drivers
    intel_epb
+   intel-speed-select
diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst
new file mode 100644
index 000000000000..296d5027787a
--- /dev/null
+++ b/Documentation/admin-guide/pstore-blk.rst
@@ -0,0 +1,243 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+pstore block oops/panic logger
+==============================
+
+Introduction
+------------
+
+pstore block (pstore/blk) is an oops/panic logger that writes its logs to a
+block device and non-block device before the system crashes. You can get
+these log files by mounting pstore filesystem like::
+
+    mount -t pstore pstore /sys/fs/pstore
+
+
+pstore block concepts
+---------------------
+
+pstore/blk provides efficient configuration method for pstore/blk, which
+divides all configurations into two parts, configurations for user and
+configurations for driver.
+
+Configurations for user determine how pstore/blk works, such as pmsg_size,
+kmsg_size and so on. All of them support both Kconfig and module parameters,
+but module parameters have priority over Kconfig.
+
+Configurations for driver are all about block device and non-block device,
+such as total_size of block device and read/write operations.
+
+Configurations for user
+-----------------------
+
+All of these configurations support both Kconfig and module parameters, but
+module parameters have priority over Kconfig.
+
+Here is an example for module parameters::
+
+        pstore_blk.blkdev=179:7 pstore_blk.kmsg_size=64
+
+The detail of each configurations may be of interest to you.
+
+blkdev
+~~~~~~
+
+The block device to use. Most of the time, it is a partition of block device.
+It's required for pstore/blk. It is also used for MTD device.
+
+It accepts the following variants for block device:
+
+1. <hex_major><hex_minor> device number in hexadecimal represents itself; no
+   leading 0x, for example b302.
+#. /dev/<disk_name> represents the device number of disk
+#. /dev/<disk_name><decimal> represents the device number of partition - device
+   number of disk plus the partition number
+#. /dev/<disk_name>p<decimal> - same as the above; this form is used when disk
+   name of partitioned disk ends with a digit.
+#. PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF represents the unique id of
+   a partition if the partition table provides it. The UUID may be either an
+   EFI/GPT UUID, or refer to an MSDOS partition using the format SSSSSSSS-PP,
+   where SSSSSSSS is a zero-filled hex representation of the 32-bit
+   "NT disk signature", and PP is a zero-filled hex representation of the
+   1-based partition number.
+#. PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to a
+   partition with a known unique id.
+#. <major>:<minor> major and minor number of the device separated by a colon.
+
+It accepts the following variants for MTD device:
+
+1. <device name> MTD device name. "pstore" is recommended.
+#. <device number> MTD device number.
+
+kmsg_size
+~~~~~~~~~
+
+The chunk size in KB for oops/panic front-end. It **MUST** be a multiple of 4.
+It's optional if you do not care oops/panic log.
+
+There are multiple chunks for oops/panic front-end depending on the remaining
+space except other pstore front-ends.
+
+pstore/blk will log to oops/panic chunks one by one, and always overwrite the
+oldest chunk if there is no more free chunk.
+
+pmsg_size
+~~~~~~~~~
+
+The chunk size in KB for pmsg front-end. It **MUST** be a multiple of 4.
+It's optional if you do not care pmsg log.
+
+Unlike oops/panic front-end, there is only one chunk for pmsg front-end.
+
+Pmsg is a user space accessible pstore object. Writes to */dev/pmsg0* are
+appended to the chunk. On reboot the contents are available in
+*/sys/fs/pstore/pmsg-pstore-blk-0*.
+
+console_size
+~~~~~~~~~~~~
+
+The chunk size in KB for console front-end.  It **MUST** be a multiple of 4.
+It's optional if you do not care console log.
+
+Similar to pmsg front-end, there is only one chunk for console front-end.
+
+All log of console will be appended to the chunk. On reboot the contents are
+available in */sys/fs/pstore/console-pstore-blk-0*.
+
+ftrace_size
+~~~~~~~~~~~
+
+The chunk size in KB for ftrace front-end. It **MUST** be a multiple of 4.
+It's optional if you do not care console log.
+
+Similar to oops front-end, there are multiple chunks for ftrace front-end
+depending on the count of cpu processors. Each chunk size is equal to
+ftrace_size / processors_count.
+
+All log of ftrace will be appended to the chunk. On reboot the contents are
+combined and available in */sys/fs/pstore/ftrace-pstore-blk-0*.
+
+Persistent function tracing might be useful for debugging software or hardware
+related hangs. Here is an example of usage::
+
+ # mount -t pstore pstore /sys/fs/pstore
+ # mount -t debugfs debugfs /sys/kernel/debug/
+ # echo 1 > /sys/kernel/debug/pstore/record_ftrace
+ # reboot -f
+ [...]
+ # mount -t pstore pstore /sys/fs/pstore
+ # tail /sys/fs/pstore/ftrace-pstore-blk-0
+ CPU:0 ts:5914676 c0063828  c0063b94  call_cpuidle <- cpu_startup_entry+0x1b8/0x1e0
+ CPU:0 ts:5914678 c039ecdc  c006385c  cpuidle_enter_state <- call_cpuidle+0x44/0x48
+ CPU:0 ts:5914680 c039e9a0  c039ecf0  cpuidle_enter_freeze <- cpuidle_enter_state+0x304/0x314
+ CPU:0 ts:5914681 c0063870  c039ea30  sched_idle_set_state <- cpuidle_enter_state+0x44/0x314
+ CPU:1 ts:5916720 c0160f59  c015ee04  kernfs_unmap_bin_file <- __kernfs_remove+0x140/0x204
+ CPU:1 ts:5916721 c05ca625  c015ee0c  __mutex_lock_slowpath <- __kernfs_remove+0x148/0x204
+ CPU:1 ts:5916723 c05c813d  c05ca630  yield_to <- __mutex_lock_slowpath+0x314/0x358
+ CPU:1 ts:5916724 c05ca2d1  c05ca638  __ww_mutex_lock <- __mutex_lock_slowpath+0x31c/0x358
+
+max_reason
+~~~~~~~~~~
+
+Limiting which kinds of kmsg dumps are stored can be controlled via
+the ``max_reason`` value, as defined in include/linux/kmsg_dump.h's
+``enum kmsg_dump_reason``. For example, to store both Oopses and Panics,
+``max_reason`` should be set to 2 (KMSG_DUMP_OOPS), to store only Panics
+``max_reason`` should be set to 1 (KMSG_DUMP_PANIC). Setting this to 0
+(KMSG_DUMP_UNDEF), means the reason filtering will be controlled by the
+``printk.always_kmsg_dump`` boot param: if unset, it'll be KMSG_DUMP_OOPS,
+otherwise KMSG_DUMP_MAX.
+
+Configurations for driver
+-------------------------
+
+Only a block device driver cares about these configurations. A block device
+driver uses ``register_pstore_blk`` to register to pstore/blk.
+
+.. kernel-doc:: fs/pstore/blk.c
+   :identifiers: register_pstore_blk
+
+A non-block device driver uses ``register_pstore_device`` with
+``struct pstore_device_info`` to register to pstore/blk.
+
+.. kernel-doc:: fs/pstore/blk.c
+   :identifiers: register_pstore_device
+
+.. kernel-doc:: include/linux/pstore_blk.h
+   :identifiers: pstore_device_info
+
+Compression and header
+----------------------
+
+Block device is large enough for uncompressed oops data. Actually we do not
+recommend data compression because pstore/blk will insert some information into
+the first line of oops/panic data. For example::
+
+        Panic: Total 16 times
+
+It means that it's OOPS|Panic for the 16th time since the first booting.
+Sometimes the number of occurrences of oops|panic since the first booting is
+important to judge whether the system is stable.
+
+The following line is inserted by pstore filesystem. For example::
+
+        Oops#2 Part1
+
+It means that it's OOPS for the 2nd time on the last boot.
+
+Reading the data
+----------------
+
+The dump data can be read from the pstore filesystem. The format for these
+files is ``dmesg-pstore-blk-[N]`` for oops/panic front-end,
+``pmsg-pstore-blk-0`` for pmsg front-end and so on.  The timestamp of the
+dump file records the trigger time. To delete a stored record from block
+device, simply unlink the respective pstore file.
+
+Attentions in panic read/write APIs
+-----------------------------------
+
+If on panic, the kernel is not going to run for much longer, the tasks will not
+be scheduled and most kernel resources will be out of service. It
+looks like a single-threaded program running on a single-core computer.
+
+The following points require special attention for panic read/write APIs:
+
+1. Can **NOT** allocate any memory.
+   If you need memory, just allocate while the block driver is initializing
+   rather than waiting until the panic.
+#. Must be polled, **NOT** interrupt driven.
+   No task schedule any more. The block driver should delay to ensure the write
+   succeeds, but NOT sleep.
+#. Can **NOT** take any lock.
+   There is no other task, nor any shared resource; you are safe to break all
+   locks.
+#. Just use CPU to transfer.
+   Do not use DMA to transfer unless you are sure that DMA will not keep lock.
+#. Control registers directly.
+   Please control registers directly rather than use Linux kernel resources.
+   Do I/O map while initializing rather than wait until a panic occurs.
+#. Reset your block device and controller if necessary.
+   If you are not sure of the state of your block device and controller when
+   a panic occurs, you are safe to stop and reset them.
+
+pstore/blk supports psblk_blkdev_info(), which is defined in
+*linux/pstore_blk.h*, to get information of using block device, such as the
+device number, sector count and start sector of the whole disk.
+
+pstore block internals
+----------------------
+
+For developer reference, here are all the important structures and APIs:
+
+.. kernel-doc:: fs/pstore/zone.c
+   :internal:
+
+.. kernel-doc:: include/linux/pstore_zone.h
+   :internal:
+
+.. kernel-doc:: fs/pstore/blk.c
+   :export:
+
+.. kernel-doc:: include/linux/pstore_blk.h
+   :internal:
diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst
index 6dbcc5481000..a60a96218ba9 100644
--- a/Documentation/admin-guide/ramoops.rst
+++ b/Documentation/admin-guide/ramoops.rst
@@ -32,11 +32,17 @@ memory to be mapped strongly ordered, and atomic operations on strongly ordered
 memory are implementation defined, and won't work on many ARMs such as omaps.
 
 The memory area is divided into ``record_size`` chunks (also rounded down to
-power of two) and each oops/panic writes a ``record_size`` chunk of
+power of two) and each kmesg dump writes a ``record_size`` chunk of
 information.
 
-Dumping both oopses and panics can be done by setting 1 in the ``dump_oops``
-variable while setting 0 in that variable dumps only the panics.
+Limiting which kinds of kmsg dumps are stored can be controlled via
+the ``max_reason`` value, as defined in include/linux/kmsg_dump.h's
+``enum kmsg_dump_reason``. For example, to store both Oopses and Panics,
+``max_reason`` should be set to 2 (KMSG_DUMP_OOPS), to store only Panics
+``max_reason`` should be set to 1 (KMSG_DUMP_PANIC). Setting this to 0
+(KMSG_DUMP_UNDEF), means the reason filtering will be controlled by the
+``printk.always_kmsg_dump`` boot param: if unset, it'll be KMSG_DUMP_OOPS,
+otherwise KMSG_DUMP_MAX.
 
 The module uses a counter to record multiple dumps but the counter gets reset
 on restart (i.e. new dumps after the restart will overwrite old ones).
@@ -90,7 +96,7 @@ Setting the ramoops parameters can be done in several different manners:
         .mem_address            = <...>,
         .mem_type               = <...>,
         .record_size            = <...>,
-        .dump_oops              = <...>,
+        .max_reason             = <...>,
         .ecc                    = <...>,
   };
 
diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst
index 0310db624964..7b481b2a368e 100644
--- a/Documentation/admin-guide/ras.rst
+++ b/Documentation/admin-guide/ras.rst
@@ -156,11 +156,11 @@ the labels provided by the BIOS won't match the real ones.
 ECC memory
 ----------
 
-As mentioned on the previous section, ECC memory has extra bits to be
-used for error correction. So, on 64 bit systems, a memory module
-has 64 bits of *data width*, and 74 bits of *total width*. So, there are
-8 bits extra bits to be used for the error detection and correction
-mechanisms. Those extra bits are called *syndrome*\ [#f1]_\ [#f2]_.
+As mentioned in the previous section, ECC memory has extra bits to be
+used for error correction. In the above example, a memory module has
+64 bits of *data width*, and 72 bits of *total width*.  The extra 8
+bits which are used for the error detection and correction mechanisms
+are referred to as the *syndrome*\ [#f1]_\ [#f2]_.
 
 So, when the cpu requests the memory controller to write a word with
 *data width*, the memory controller calculates the *syndrome* in real time,
@@ -212,7 +212,7 @@ EDAC - Error Detection And Correction
    purposes.
 
    When the subsystem was pushed upstream for the first time, on
-   Kernel 2.6.16, for the first time, it was renamed to ``EDAC``.
+   Kernel 2.6.16, it was renamed to ``EDAC``.
 
 Purpose
 -------
@@ -351,15 +351,17 @@ controllers. The following example will assume 2 channels:
 	+------------+-----------+-----------+
 	|            |  ``ch0``  |  ``ch1``  |
 	+============+===========+===========+
-	| ``csrow0`` |  DIMM_A0  |  DIMM_B0  |
-	|            |   rank0   |   rank0   |
-	+------------+     -     |     -     |
+	|            |**DIMM_A0**|**DIMM_B0**|
+	+------------+-----------+-----------+
+	| ``csrow0`` |   rank0   |   rank0   |
+	+------------+-----------+-----------+
 	| ``csrow1`` |   rank1   |   rank1   |
 	+------------+-----------+-----------+
-	| ``csrow2`` |  DIMM_A1  | DIMM_B1   |
-	|            |   rank0   |   rank0   |
-	+------------+     -     |     -     |
-	| ``csrow3`` |   rank1   |   rank1   |
+	|            |**DIMM_A1**|**DIMM_B1**|
+	+------------+-----------+-----------+
+	| ``csrow2`` |    rank0  |  rank0    |
+	+------------+-----------+-----------+
+	| ``csrow3`` |    rank1  |  rank1    |
 	+------------+-----------+-----------+
 
 In the above example, there are 4 physical slots on the motherboard
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 0d427fd10941..1ebf68d01141 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -102,6 +102,30 @@ See the ``type_of_loader`` and ``ext_loader_ver`` fields in
 :doc:`/x86/boot` for additional information.
 
 
+bpf_stats_enabled
+=================
+
+Controls whether the kernel should collect statistics on BPF programs
+(total time spent running, number of times run...). Enabling
+statistics causes a slight reduction in performance on each program
+run. The statistics can be seen using ``bpftool``.
+
+= ===================================
+0 Don't collect statistics (default).
+1 Collect statistics.
+= ===================================
+
+
+cad_pid
+=======
+
+This is the pid which will be signalled on reboot (notably, by
+Ctrl-Alt-Delete). Writing a value to this file which doesn't
+correspond to a running process will result in ``-ESRCH``.
+
+See also `ctrl-alt-del`_.
+
+
 cap_last_cap
 ============
 
@@ -241,6 +265,40 @@ domain names are in general different. For a detailed discussion
 see the ``hostname(1)`` man page.
 
 
+firmware_config
+===============
+
+See :doc:`/driver-api/firmware/fallback-mechanisms`.
+
+The entries in this directory allow the firmware loader helper
+fallback to be controlled:
+
+* ``force_sysfs_fallback``, when set to 1, forces the use of the
+  fallback;
+* ``ignore_sysfs_fallback``, when set to 1, ignores any fallback.
+
+
+ftrace_dump_on_oops
+===================
+
+Determines whether ``ftrace_dump()`` should be called on an oops (or
+kernel panic). This will output the contents of the ftrace buffers to
+the console.  This is very useful for capturing traces that lead to
+crashes and outputting them to a serial console.
+
+= ===================================================
+0 Disabled (default).
+1 Dump buffers of all CPUs.
+2 Dump the buffer of the CPU that triggered the oops.
+= ===================================================
+
+
+ftrace_enabled, stack_tracer_enabled
+====================================
+
+See :doc:`/trace/ftrace`.
+
+
 hardlockup_all_cpu_backtrace
 ============================
 
@@ -344,6 +402,25 @@ Controls whether the panic kmsg data should be reported to Hyper-V.
 = =========================================================
 
 
+ignore-unaligned-usertrap
+=========================
+
+On architectures where unaligned accesses cause traps, and where this
+feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN``;
+currently, ``arc`` and ``ia64``), controls whether all unaligned traps
+are logged.
+
+= =============================================================
+0 Log all unaligned accesses.
+1 Only warn the first time a process traps. This is the default
+  setting.
+= =============================================================
+
+See also `unaligned-trap`_ and `unaligned-dump-stack`_. On ``ia64``,
+this allows system administrators to override the
+``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded.
+
+
 kexec_load_disabled
 ===================
 
@@ -459,6 +536,15 @@ Notes:
      successful IPC object allocation. If an IPC object allocation syscall
      fails, it is undefined if the value remains unmodified or is reset to -1.
 
+
+ngroups_max
+===========
+
+Maximum number of supplementary groups, _i.e._ the maximum size which
+``setgroups`` will accept. Exports ``NGROUPS_MAX`` from the kernel.
+
+
+
 nmi_watchdog
 ============
 
@@ -721,7 +807,13 @@ perf_event_paranoid
 ===================
 
 Controls use of the performance events system by unprivileged
-users (without CAP_SYS_ADMIN).  The default value is 2.
+users (without CAP_PERFMON).  The default value is 2.
+
+For backward compatibility reasons access to system performance
+monitoring and observability remains open for CAP_SYS_ADMIN
+privileged processes but CAP_SYS_ADMIN usage for secure system
+performance monitoring and observability operations is discouraged
+with respect to CAP_PERFMON use cases.
 
 ===  ==================================================================
  -1  Allow use of (almost) all events by all users.
@@ -730,13 +822,13 @@ users (without CAP_SYS_ADMIN).  The default value is 2.
      ``CAP_IPC_LOCK``.
 
 >=0  Disallow ftrace function tracepoint by users without
-     ``CAP_SYS_ADMIN``.
+     ``CAP_PERFMON``.
 
-     Disallow raw tracepoint access by users without ``CAP_SYS_ADMIN``.
+     Disallow raw tracepoint access by users without ``CAP_PERFMON``.
 
->=1  Disallow CPU event access by users without ``CAP_SYS_ADMIN``.
+>=1  Disallow CPU event access by users without ``CAP_PERFMON``.
 
->=2  Disallow kernel profiling by users without ``CAP_SYS_ADMIN``.
+>=2  Disallow kernel profiling by users without ``CAP_PERFMON``.
 ===  ==================================================================
 
 
@@ -871,7 +963,7 @@ this sysctl interface anymore.
 pty
 ===
 
-See Documentation/filesystems/devpts.txt.
+See Documentation/filesystems/devpts.rst.
 
 
 randomize_va_space
@@ -1167,6 +1259,65 @@ If a value outside of this range is written to ``threads-max`` an
 ``EINVAL`` error occurs.
 
 
+traceoff_on_warning
+===================
+
+When set, disables tracing (see :doc:`/trace/ftrace`) when a
+``WARN()`` is hit.
+
+
+tracepoint_printk
+=================
+
+When tracepoints are sent to printk() (enabled by the ``tp_printk``
+boot parameter), this entry provides runtime control::
+
+    echo 0 > /proc/sys/kernel/tracepoint_printk
+
+will stop tracepoints from being sent to printk(), and::
+
+    echo 1 > /proc/sys/kernel/tracepoint_printk
+
+will send them to printk() again.
+
+This only works if the kernel was booted with ``tp_printk`` enabled.
+
+See :doc:`/admin-guide/kernel-parameters` and
+:doc:`/trace/boottime-trace`.
+
+
+.. _unaligned-dump-stack:
+
+unaligned-dump-stack (ia64)
+===========================
+
+When logging unaligned accesses, controls whether the stack is
+dumped.
+
+= ===================================================
+0 Do not dump the stack. This is the default setting.
+1 Dump the stack.
+= ===================================================
+
+See also `ignore-unaligned-usertrap`_.
+
+
+unaligned-trap
+==============
+
+On architectures where unaligned accesses cause traps, and where this
+feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW``; currently,
+``arc`` and ``parisc``), controls whether unaligned traps are caught
+and emulated (instead of failing).
+
+= ========================================================
+0 Do not emulate unaligned accesses.
+1 Emulate unaligned accesses. This is the default setting.
+= ========================================================
+
+See also `ignore-unaligned-usertrap`_.
+
+
 unknown_nmi_panic
 =================
 
@@ -1178,6 +1329,16 @@ NMI switch that most IA32 servers have fires unknown NMI up, for
 example.  If a system hangs up, try pressing the NMI switch.
 
 
+unprivileged_bpf_disabled
+=========================
+
+Writing 1 to this entry will disable unprivileged calls to ``bpf()``;
+once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return
+``-EPERM``.
+
+Once set, this can't be cleared.
+
+
 watchdog
 ========
 
diff --git a/Documentation/arm64/amu.rst b/Documentation/arm64/amu.rst
index 036783ee327f..452ec8b115c2 100644
--- a/Documentation/arm64/amu.rst
+++ b/Documentation/arm64/amu.rst
@@ -24,13 +24,13 @@ optional external memory-mapped interface.
 Version 1 of the Activity Monitors architecture implements a counter group
 of four fixed and architecturally defined 64-bit event counters.
 
-- CPU cycle counter: increments at the frequency of the CPU.
-- Constant counter: increments at the fixed frequency of the system
-  clock.
-- Instructions retired: increments with every architecturally executed
-  instruction.
-- Memory stall cycles: counts instruction dispatch stall cycles caused by
-  misses in the last level cache within the clock domain.
+  - CPU cycle counter: increments at the frequency of the CPU.
+  - Constant counter: increments at the fixed frequency of the system
+    clock.
+  - Instructions retired: increments with every architecturally executed
+    instruction.
+  - Memory stall cycles: counts instruction dispatch stall cycles caused by
+    misses in the last level cache within the clock domain.
 
 When in WFI or WFE these counters do not increment.
 
@@ -59,11 +59,11 @@ counters, only the presence of the extension.
 Firmware (code running at higher exception levels, e.g. arm-tf) support is
 needed to:
 
-- Enable access for lower exception levels (EL2 and EL1) to the AMU
-  registers.
-- Enable the counters. If not enabled these will read as 0.
-- Save/restore the counters before/after the CPU is being put/brought up
-  from the 'off' power state.
+ - Enable access for lower exception levels (EL2 and EL1) to the AMU
+   registers.
+ - Enable the counters. If not enabled these will read as 0.
+ - Save/restore the counters before/after the CPU is being put/brought up
+   from the 'off' power state.
 
 When using kernels that have this feature enabled but boot with broken
 firmware the user may experience panics or lockups when accessing the
@@ -81,10 +81,10 @@ are not trapped in EL2/EL3.
 The fixed counters of AMUv1 are accessible though the following system
 register definitions:
 
-- SYS_AMEVCNTR0_CORE_EL0
-- SYS_AMEVCNTR0_CONST_EL0
-- SYS_AMEVCNTR0_INST_RET_EL0
-- SYS_AMEVCNTR0_MEM_STALL_EL0
+ - SYS_AMEVCNTR0_CORE_EL0
+ - SYS_AMEVCNTR0_CONST_EL0
+ - SYS_AMEVCNTR0_INST_RET_EL0
+ - SYS_AMEVCNTR0_MEM_STALL_EL0
 
 Auxiliary platform specific counters can be accessed using
 SYS_AMEVCNTR1_EL0(n), where n is a value between 0 and 15.
@@ -97,9 +97,9 @@ Userspace access
 
 Currently, access from userspace to the AMU registers is disabled due to:
 
-- Security reasons: they might expose information about code executed in
-  secure mode.
-- Purpose: AMU counters are intended for system management use.
+ - Security reasons: they might expose information about code executed in
+   secure mode.
+ - Purpose: AMU counters are intended for system management use.
 
 Also, the presence of the feature is not visible to userspace.
 
@@ -110,8 +110,8 @@ Virtualization
 Currently, access from userspace (EL0) and kernelspace (EL1) on the KVM
 guest side is disabled due to:
 
-- Security reasons: they might expose information about code executed
-  by other guests or the host.
+ - Security reasons: they might expose information about code executed
+   by other guests or the host.
 
 Any attempt to access the AMU registers will result in an UNDEFINED
 exception being injected into the guest.
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index a3f1a47b6f1c..7552dbc1cc54 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -173,7 +173,10 @@ Before jumping into the kernel, the following conditions must be met:
 - Caches, MMUs
 
   The MMU must be off.
-  Instruction cache may be on or off.
+
+  The instruction cache may be on or off, and must not hold any stale
+  entries corresponding to the loaded kernel image.
+
   The address range corresponding to the loaded kernel image must be
   cleaned to the PoC. In the presence of a system cache or other
   coherent masters with caches enabled, this will typically require
@@ -238,6 +241,7 @@ Before jumping into the kernel, the following conditions must be met:
   - The DT or ACPI tables must describe a GICv2 interrupt controller.
 
   For CPUs with pointer authentication functionality:
+
   - If EL3 is present:
 
     - SCR_EL3.APK (bit 16) must be initialised to 0b1
@@ -249,18 +253,22 @@ Before jumping into the kernel, the following conditions must be met:
     - HCR_EL2.API (bit 41) must be initialised to 0b1
 
   For CPUs with Activity Monitors Unit v1 (AMUv1) extension present:
+
   - If EL3 is present:
-    CPTR_EL3.TAM (bit 30) must be initialised to 0b0
-    CPTR_EL2.TAM (bit 30) must be initialised to 0b0
-    AMCNTENSET0_EL0 must be initialised to 0b1111
-    AMCNTENSET1_EL0 must be initialised to a platform specific value
-    having 0b1 set for the corresponding bit for each of the auxiliary
-    counters present.
+
+    - CPTR_EL3.TAM (bit 30) must be initialised to 0b0
+    - CPTR_EL2.TAM (bit 30) must be initialised to 0b0
+    - AMCNTENSET0_EL0 must be initialised to 0b1111
+    - AMCNTENSET1_EL0 must be initialised to a platform specific value
+      having 0b1 set for the corresponding bit for each of the auxiliary
+      counters present.
+
   - If the kernel is entered at EL1:
-    AMCNTENSET0_EL0 must be initialised to 0b1111
-    AMCNTENSET1_EL0 must be initialised to a platform specific value
-    having 0b1 set for the corresponding bit for each of the auxiliary
-    counters present.
+
+    - AMCNTENSET0_EL0 must be initialised to 0b1111
+    - AMCNTENSET1_EL0 must be initialised to a platform specific value
+      having 0b1 set for the corresponding bit for each of the auxiliary
+      counters present.
 
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
@@ -304,7 +312,8 @@ following manner:
   Documentation/devicetree/bindings/arm/psci.yaml.
 
 - Secondary CPU general-purpose register settings
-  x0 = 0 (reserved for future use)
-  x1 = 0 (reserved for future use)
-  x2 = 0 (reserved for future use)
-  x3 = 0 (reserved for future use)
+
+  - x0 = 0 (reserved for future use)
+  - x1 = 0 (reserved for future use)
+  - x2 = 0 (reserved for future use)
+  - x3 = 0 (reserved for future use)
diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst
index 41937a8091aa..314fa5bc2655 100644
--- a/Documentation/arm64/cpu-feature-registers.rst
+++ b/Documentation/arm64/cpu-feature-registers.rst
@@ -176,6 +176,8 @@ infrastructure:
      +------------------------------+---------+---------+
      | SSBS                         | [7-4]   |    y    |
      +------------------------------+---------+---------+
+     | BT                           | [3-0]   |    y    |
+     +------------------------------+---------+---------+
 
 
   4) MIDR_EL1 - Main ID Register
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index 7dfb97dfe416..84a9fd2d41b4 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -236,6 +236,11 @@ HWCAP2_RNG
 
     Functionality implied by ID_AA64ISAR0_EL1.RNDR == 0b0001.
 
+HWCAP2_BTI
+
+    Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001.
+
+
 4. Unused AT_HWCAP bits
 -----------------------
 
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 2c08c628febd..936cf2a59ca4 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -64,6 +64,10 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A53      | #843419         | ARM64_ERRATUM_843419        |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A55      | #1024718        | ARM64_ERRATUM_1024718       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A55      | #1530923        | ARM64_ERRATUM_1530923       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075        |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A57      | #852523         | N/A                         |
@@ -78,8 +82,6 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
 +----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A55      | #1024718        | ARM64_ERRATUM_1024718       |
-+----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A76      | #1188873,1418040| ARM64_ERRATUM_1418040       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A76      | #1165522        | ARM64_ERRATUM_1165522       |
@@ -88,8 +90,6 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A76      | #1463225        | ARM64_ERRATUM_1463225       |
 +----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A55      | #1530923        | ARM64_ERRATUM_1530923       |
-+----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Neoverse-N1     | #1188873,1418040| ARM64_ERRATUM_1418040       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Neoverse-N1     | #1349291        | N/A                         |
diff --git a/Documentation/block/biovecs.rst b/Documentation/block/biovecs.rst
index ad303a2569d3..36771a131b56 100644
--- a/Documentation/block/biovecs.rst
+++ b/Documentation/block/biovecs.rst
@@ -129,6 +129,7 @@ Usage of helpers:
 ::
 
 	bio_for_each_segment_all()
+	bio_for_each_bvec_all()
 	bio_first_bvec_all()
 	bio_first_page_all()
 	bio_last_bvec_all()
@@ -143,4 +144,5 @@ Usage of helpers:
   bio_vec' will contain a multi-page IO vector during the iteration::
 
 	bio_for_each_bvec()
+	bio_for_each_bvec_all()
 	rq_for_each_bvec()
diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst
index 3fa7a52fafa4..026addfc69bc 100644
--- a/Documentation/block/index.rst
+++ b/Documentation/block/index.rst
@@ -14,6 +14,7 @@ Block
    cmdline-partition
    data-integrity
    deadline-iosched
+   inline-encryption
    ioprio
    kyber-iosched
    null_blk
diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst
new file mode 100644
index 000000000000..354817b80887
--- /dev/null
+++ b/Documentation/block/inline-encryption.rst
@@ -0,0 +1,263 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Inline Encryption
+=================
+
+Background
+==========
+
+Inline encryption hardware sits logically between memory and the disk, and can
+en/decrypt data as it goes in/out of the disk. Inline encryption hardware has a
+fixed number of "keyslots" - slots into which encryption contexts (i.e. the
+encryption key, encryption algorithm, data unit size) can be programmed by the
+kernel at any time. Each request sent to the disk can be tagged with the index
+of a keyslot (and also a data unit number to act as an encryption tweak), and
+the inline encryption hardware will en/decrypt the data in the request with the
+encryption context programmed into that keyslot. This is very different from
+full disk encryption solutions like self encrypting drives/TCG OPAL/ATA
+Security standards, since with inline encryption, any block on disk could be
+encrypted with any encryption context the kernel chooses.
+
+
+Objective
+=========
+
+We want to support inline encryption (IE) in the kernel.
+To allow for testing, we also want a crypto API fallback when actual
+IE hardware is absent. We also want IE to work with layered devices
+like dm and loopback (i.e. we want to be able to use the IE hardware
+of the underlying devices if present, or else fall back to crypto API
+en/decryption).
+
+
+Constraints and notes
+=====================
+
+- IE hardware has a limited number of "keyslots" that can be programmed
+  with an encryption context (key, algorithm, data unit size, etc.) at any time.
+  One can specify a keyslot in a data request made to the device, and the
+  device will en/decrypt the data using the encryption context programmed into
+  that specified keyslot. When possible, we want to make multiple requests with
+  the same encryption context share the same keyslot.
+
+- We need a way for upper layers like filesystems to specify an encryption
+  context to use for en/decrypting a struct bio, and a device driver (like UFS)
+  needs to be able to use that encryption context when it processes the bio.
+
+- We need a way for device drivers to expose their inline encryption
+  capabilities in a unified way to the upper layers.
+
+
+Design
+======
+
+We add a :c:type:`struct bio_crypt_ctx` to :c:type:`struct bio` that can
+represent an encryption context, because we need to be able to pass this
+encryption context from the upper layers (like the fs layer) to the
+device driver to act upon.
+
+While IE hardware works on the notion of keyslots, the FS layer has no
+knowledge of keyslots - it simply wants to specify an encryption context to
+use while en/decrypting a bio.
+
+We introduce a keyslot manager (KSM) that handles the translation from
+encryption contexts specified by the FS to keyslots on the IE hardware.
+This KSM also serves as the way IE hardware can expose its capabilities to
+upper layers. The generic mode of operation is: each device driver that wants
+to support IE will construct a KSM and set it up in its struct request_queue.
+Upper layers that want to use IE on this device can then use this KSM in
+the device's struct request_queue to translate an encryption context into
+a keyslot. The presence of the KSM in the request queue shall be used to mean
+that the device supports IE.
+
+The KSM uses refcounts to track which keyslots are idle (either they have no
+encryption context programmed, or there are no in-flight struct bios
+referencing that keyslot). When a new encryption context needs a keyslot, it
+tries to find a keyslot that has already been programmed with the same
+encryption context, and if there is no such keyslot, it evicts the least
+recently used idle keyslot and programs the new encryption context into that
+one. If no idle keyslots are available, then the caller will sleep until there
+is at least one.
+
+
+blk-mq changes, other block layer changes and blk-crypto-fallback
+=================================================================
+
+We add a pointer to a ``bi_crypt_context`` and ``keyslot`` to
+:c:type:`struct request`. These will be referred to as the ``crypto fields``
+for the request. This ``keyslot`` is the keyslot into which the
+``bi_crypt_context`` has been programmed in the KSM of the ``request_queue``
+that this request is being sent to.
+
+We introduce ``block/blk-crypto-fallback.c``, which allows upper layers to remain
+blissfully unaware of whether or not real inline encryption hardware is present
+underneath. When a bio is submitted with a target ``request_queue`` that doesn't
+support the encryption context specified with the bio, the block layer will
+en/decrypt the bio with the blk-crypto-fallback.
+
+If the bio is a ``WRITE`` bio, a bounce bio is allocated, and the data in the bio
+is encrypted stored in the bounce bio - blk-mq will then proceed to process the
+bounce bio as if it were not encrypted at all (except when blk-integrity is
+concerned). ``blk-crypto-fallback`` sets the bounce bio's ``bi_end_io`` to an
+internal function that cleans up the bounce bio and ends the original bio.
+
+If the bio is a ``READ`` bio, the bio's ``bi_end_io`` (and also ``bi_private``)
+is saved and overwritten by ``blk-crypto-fallback`` to
+``bio_crypto_fallback_decrypt_bio``.  The bio's ``bi_crypt_context`` is also
+overwritten with ``NULL``, so that to the rest of the stack, the bio looks
+as if it was a regular bio that never had an encryption context specified.
+``bio_crypto_fallback_decrypt_bio`` will decrypt the bio, restore the original
+``bi_end_io`` (and also ``bi_private``) and end the bio again.
+
+Regardless of whether real inline encryption hardware is used or the
+blk-crypto-fallback is used, the ciphertext written to disk (and hence the
+on-disk format of data) will be the same (assuming the hardware's implementation
+of the algorithm being used adheres to spec and functions correctly).
+
+If a ``request queue``'s inline encryption hardware claimed to support the
+encryption context specified with a bio, then it will not be handled by the
+``blk-crypto-fallback``. We will eventually reach a point in blk-mq when a
+:c:type:`struct request` needs to be allocated for that bio. At that point,
+blk-mq tries to program the encryption context into the ``request_queue``'s
+keyslot_manager, and obtain a keyslot, which it stores in its newly added
+``keyslot`` field. This keyslot is released when the request is completed.
+
+When the first bio is added to a request, ``blk_crypto_rq_bio_prep`` is called,
+which sets the request's ``crypt_ctx`` to a copy of the bio's
+``bi_crypt_context``. bio_crypt_do_front_merge is called whenever a subsequent
+bio is merged to the front of the request, which updates the ``crypt_ctx`` of
+the request so that it matches the newly merged bio's ``bi_crypt_context``. In particular, the request keeps a copy of the ``bi_crypt_context`` of the first
+bio in its bio-list (blk-mq needs to be careful to maintain this invariant
+during bio and request merges).
+
+To make it possible for inline encryption to work with request queue based
+layered devices, when a request is cloned, its ``crypto fields`` are cloned as
+well. When the cloned request is submitted, blk-mq programs the
+``bi_crypt_context`` of the request into the clone's request_queue's keyslot
+manager, and stores the returned keyslot in the clone's ``keyslot``.
+
+
+API presented to users of the block layer
+=========================================
+
+``struct blk_crypto_key`` represents a crypto key (the raw key, size of the
+key, the crypto algorithm to use, the data unit size to use, and the number of
+bytes required to represent data unit numbers that will be specified with the
+``bi_crypt_context``).
+
+``blk_crypto_init_key`` allows upper layers to initialize such a
+``blk_crypto_key``.
+
+``bio_crypt_set_ctx`` should be called on any bio that a user of
+the block layer wants en/decrypted via inline encryption (or the
+blk-crypto-fallback, if hardware support isn't available for the desired
+crypto configuration). This function takes the ``blk_crypto_key`` and the
+data unit number (DUN) to use when en/decrypting the bio.
+
+``blk_crypto_config_supported`` allows upper layers to query whether or not the
+an encryption context passed to request queue can be handled by blk-crypto
+(either by real inline encryption hardware, or by the blk-crypto-fallback).
+This is useful e.g. when blk-crypto-fallback is disabled, and the upper layer
+wants to use an algorithm that may not supported by hardware - this function
+lets the upper layer know ahead of time that the algorithm isn't supported,
+and the upper layer can fallback to something else if appropriate.
+
+``blk_crypto_start_using_key`` - Upper layers must call this function on
+``blk_crypto_key`` and a ``request_queue`` before using the key with any bio
+headed for that ``request_queue``. This function ensures that either the
+hardware supports the key's crypto settings, or the crypto API fallback has
+transforms for the needed mode allocated and ready to go. Note that this
+function may allocate an ``skcipher``, and must not be called from the data
+path, since allocating ``skciphers`` from the data path can deadlock.
+
+``blk_crypto_evict_key`` *must* be called by upper layers before a
+``blk_crypto_key`` is freed. Further, it *must* only be called only once
+there are no more in-flight requests that use that ``blk_crypto_key``.
+``blk_crypto_evict_key`` will ensure that a key is removed from any keyslots in
+inline encryption hardware that the key might have been programmed into (or the blk-crypto-fallback).
+
+API presented to device drivers
+===============================
+
+A :c:type:``struct blk_keyslot_manager`` should be set up by device drivers in
+the ``request_queue`` of the device. The device driver needs to call
+``blk_ksm_init`` on the ``blk_keyslot_manager``, which specifying the number of
+keyslots supported by the hardware.
+
+The device driver also needs to tell the KSM how to actually manipulate the
+IE hardware in the device to do things like programming the crypto key into
+the IE hardware into a particular keyslot. All this is achieved through the
+:c:type:`struct blk_ksm_ll_ops` field in the KSM that the device driver
+must fill up after initing the ``blk_keyslot_manager``.
+
+The KSM also handles runtime power management for the device when applicable
+(e.g. when it wants to program a crypto key into the IE hardware, the device
+must be runtime powered on) - so the device driver must also set the ``dev``
+field in the ksm to point to the `struct device` for the KSM to use for runtime
+power management.
+
+``blk_ksm_reprogram_all_keys`` can be called by device drivers if the device
+needs each and every of its keyslots to be reprogrammed with the key it
+"should have" at the point in time when the function is called. This is useful
+e.g. if a device loses all its keys on runtime power down/up.
+
+``blk_ksm_destroy`` should be called to free up all resources used by a keyslot
+manager upon ``blk_ksm_init``, once the ``blk_keyslot_manager`` is no longer
+needed.
+
+
+Layered Devices
+===============
+
+Request queue based layered devices like dm-rq that wish to support IE need to
+create their own keyslot manager for their request queue, and expose whatever
+functionality they choose. When a layered device wants to pass a clone of that
+request to another ``request_queue``, blk-crypto will initialize and prepare the
+clone as necessary - see ``blk_crypto_insert_cloned_request`` in
+``blk-crypto.c``.
+
+
+Future Optimizations for layered devices
+========================================
+
+Creating a keyslot manager for a layered device uses up memory for each
+keyslot, and in general, a layered device merely passes the request on to a
+"child" device, so the keyslots in the layered device itself are completely
+unused, and don't need any refcounting or keyslot programming. We can instead
+define a new type of KSM; the "passthrough KSM", that layered devices can use
+to advertise an unlimited number of keyslots, and support for any encryption
+algorithms they choose, while not actually using any memory for each keyslot.
+Another use case for the "passthrough KSM" is for IE devices that do not have a
+limited number of keyslots.
+
+
+Interaction between inline encryption and blk integrity
+=======================================================
+
+At the time of this patch, there is no real hardware that supports both these
+features. However, these features do interact with each other, and it's not
+completely trivial to make them both work together properly. In particular,
+when a WRITE bio wants to use inline encryption on a device that supports both
+features, the bio will have an encryption context specified, after which
+its integrity information is calculated (using the plaintext data, since
+the encryption will happen while data is being written), and the data and
+integrity info is sent to the device. Obviously, the integrity info must be
+verified before the data is encrypted. After the data is encrypted, the device
+must not store the integrity info that it received with the plaintext data
+since that might reveal information about the plaintext data. As such, it must
+re-generate the integrity info from the ciphertext data and store that on disk
+instead. Another issue with storing the integrity info of the plaintext data is
+that it changes the on disk format depending on whether hardware inline
+encryption support is present or the kernel crypto API fallback is used (since
+if the fallback is used, the device will receive the integrity info of the
+ciphertext, not that of the plaintext).
+
+Because there isn't any real hardware yet, it seems prudent to assume that
+hardware implementations might not implement both features together correctly,
+and disallow the combination for now. Whenever a device supports integrity, the
+kernel will pretend that the device does not support hardware inline encryption
+(by essentially setting the keyslot manager in the request_queue of the device
+to NULL). When the crypto API fallback is enabled, this means that all bios with
+and encryption context will use the fallback, and IO will complete as usual.
+When the fallback is disabled, a bio with an encryption context will be failed.
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 9ae8e9abf846..f6a1bc07c410 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -388,44 +388,6 @@ if major == 1 and minor < 6:
 #  author, documentclass [howto, manual, or own class]).
 # Sorted in alphabetical order
 latex_documents = [
-    ('admin-guide/index', 'linux-user.tex', 'Linux Kernel User Documentation',
-     'The kernel development community', 'manual'),
-    ('core-api/index', 'core-api.tex', 'The kernel core API manual',
-     'The kernel development community', 'manual'),
-    ('crypto/index', 'crypto-api.tex', 'Linux Kernel Crypto API manual',
-     'The kernel development community', 'manual'),
-    ('dev-tools/index', 'dev-tools.tex', 'Development tools for the Kernel',
-     'The kernel development community', 'manual'),
-    ('doc-guide/index', 'kernel-doc-guide.tex', 'Linux Kernel Documentation Guide',
-     'The kernel development community', 'manual'),
-    ('driver-api/index', 'driver-api.tex', 'The kernel driver API manual',
-     'The kernel development community', 'manual'),
-    ('filesystems/index', 'filesystems.tex', 'Linux Filesystems API',
-     'The kernel development community', 'manual'),
-    ('admin-guide/ext4', 'ext4-admin-guide.tex', 'ext4 Administration Guide',
-     'ext4 Community', 'manual'),
-    ('filesystems/ext4/index', 'ext4-data-structures.tex',
-     'ext4 Data Structures and Algorithms', 'ext4 Community', 'manual'),
-    ('gpu/index', 'gpu.tex', 'Linux GPU Driver Developer\'s Guide',
-     'The kernel development community', 'manual'),
-    ('input/index', 'linux-input.tex', 'The Linux input driver subsystem',
-     'The kernel development community', 'manual'),
-    ('kernel-hacking/index', 'kernel-hacking.tex', 'Unreliable Guide To Hacking The Linux Kernel',
-     'The kernel development community', 'manual'),
-    ('media/index', 'media.tex', 'Linux Media Subsystem Documentation',
-     'The kernel development community', 'manual'),
-    ('networking/index', 'networking.tex', 'Linux Networking Documentation',
-     'The kernel development community', 'manual'),
-    ('process/index', 'development-process.tex', 'Linux Kernel Development Documentation',
-     'The kernel development community', 'manual'),
-    ('security/index', 'security.tex', 'The kernel security subsystem manual',
-     'The kernel development community', 'manual'),
-    ('sh/index', 'sh.tex', 'SuperH architecture implementation manual',
-     'The kernel development community', 'manual'),
-    ('sound/index', 'sound.tex', 'Linux Sound Subsystem Documentation',
-     'The kernel development community', 'manual'),
-    ('userspace-api/index', 'userspace-api.tex', 'The Linux kernel user-space API guide',
-     'The kernel development community', 'manual'),
 ]
 
 # Add all other index files from Documentation/ subdirectories
diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst
index 93cb65d52720..a1582cc79f0f 100644
--- a/Documentation/core-api/cachetlb.rst
+++ b/Documentation/core-api/cachetlb.rst
@@ -213,7 +213,7 @@ Here are the routines, one by one:
 	there will be no entries in the cache for the kernel address
 	space for virtual addresses in the range 'start' to 'end-1'.
 
-	The first of these two routines is invoked after map_vm_area()
+	The first of these two routines is invoked after map_kernel_range()
 	has installed the page table entries.  The second is invoked
 	before unmap_kernel_range() deletes the page table entries.
 
diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/core-api/debugging-via-ohci1394.rst
index 981ad4f89fd3..981ad4f89fd3 100644
--- a/Documentation/debugging-via-ohci1394.txt
+++ b/Documentation/core-api/debugging-via-ohci1394.rst
diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/core-api/dma-api-howto.rst
index 358d495456d1..358d495456d1 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/core-api/dma-api-howto.rst
diff --git a/Documentation/DMA-API.txt b/Documentation/core-api/dma-api.rst
index 2d8d2fed7317..2d8d2fed7317 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/core-api/dma-api.rst
diff --git a/Documentation/DMA-attributes.txt b/Documentation/core-api/dma-attributes.rst
index 29dcbe8826e8..29dcbe8826e8 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/core-api/dma-attributes.rst
diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/core-api/dma-isa-lpc.rst
index b1ec7b16c21f..b1ec7b16c21f 100644
--- a/Documentation/DMA-ISA-LPC.txt
+++ b/Documentation/core-api/dma-isa-lpc.rst
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 0897ad12c119..15ab86112627 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -18,6 +18,7 @@ it.
 
    kernel-api
    workqueue
+   printk-basics
    printk-formats
    symbol-namespaces
 
@@ -30,10 +31,12 @@ Library functionality that is used throughout the kernel.
    :maxdepth: 1
 
    kobject
+   kref
    assoc_array
    xarray
    idr
    circular-buffers
+   rbtree
    generic-radix-tree
    packing
    timekeeping
@@ -50,6 +53,7 @@ How Linux keeps everything from happening at the same time.  See
 
    atomic_ops
    refcount-vs-atomic
+   irq/index
    local_ops
    padata
    ../RCU/index
@@ -78,6 +82,10 @@ more memory-management documentation in :doc:`/vm/index`.
    :maxdepth: 1
 
    memory-allocation
+   dma-api
+   dma-api-howto
+   dma-attributes
+   dma-isa-lpc
    mm-api
    genalloc
    pin_user_pages
@@ -92,6 +100,7 @@ Interfaces for kernel debugging
 
    debug-objects
    tracepoint
+   debugging-via-ohci1394
 
 Everything else
 ===============
diff --git a/Documentation/IRQ.txt b/Documentation/core-api/irq/concepts.rst
index 4273806a606b..4273806a606b 100644
--- a/Documentation/IRQ.txt
+++ b/Documentation/core-api/irq/concepts.rst
diff --git a/Documentation/core-api/irq/index.rst b/Documentation/core-api/irq/index.rst
new file mode 100644
index 000000000000..0d65d11e5420
--- /dev/null
+++ b/Documentation/core-api/irq/index.rst
@@ -0,0 +1,11 @@
+====
+IRQs
+====
+
+.. toctree::
+   :maxdepth: 1
+
+   concepts
+   irq-affinity
+   irq-domain
+   irqflags-tracing
diff --git a/Documentation/IRQ-affinity.txt b/Documentation/core-api/irq/irq-affinity.rst
index 29da5000836a..29da5000836a 100644
--- a/Documentation/IRQ-affinity.txt
+++ b/Documentation/core-api/irq/irq-affinity.rst
diff --git a/Documentation/IRQ-domain.txt b/Documentation/core-api/irq/irq-domain.rst
index 507775cce753..096db12f32d5 100644
--- a/Documentation/IRQ-domain.txt
+++ b/Documentation/core-api/irq/irq-domain.rst
@@ -263,7 +263,8 @@ needs to:
 Hierarchy irq_domain is in no way x86 specific, and is heavily used to
 support other architectures, such as ARM, ARM64 etc.
 
-=== Debugging ===
+Debugging
+=========
 
 Most of the internals of the IRQ subsystem are exposed in debugfs by
 turning CONFIG_GENERIC_IRQ_DEBUGFS on.
diff --git a/Documentation/irqflags-tracing.txt b/Documentation/core-api/irq/irqflags-tracing.rst
index bdd208259fb3..bdd208259fb3 100644
--- a/Documentation/irqflags-tracing.txt
+++ b/Documentation/core-api/irq/irqflags-tracing.rst
diff --git a/Documentation/core-api/kobject.rst b/Documentation/core-api/kobject.rst
index 1f62d4d7d966..e93dc8cf52dd 100644
--- a/Documentation/core-api/kobject.rst
+++ b/Documentation/core-api/kobject.rst
@@ -80,11 +80,11 @@ what is the pointer to the containing structure?  You must avoid tricks
 (such as assuming that the kobject is at the beginning of the structure)
 and, instead, use the container_of() macro, found in ``<linux/kernel.h>``::
 
-    container_of(pointer, type, member)
+    container_of(ptr, type, member)
 
 where:
 
-  * ``pointer`` is the pointer to the embedded kobject,
+  * ``ptr`` is the pointer to the embedded kobject,
   * ``type`` is the type of the containing structure, and
   * ``member`` is the name of the structure field to which ``pointer`` points.
 
@@ -140,7 +140,7 @@ the name of the kobject, call kobject_rename()::
 
     int kobject_rename(struct kobject *kobj, const char *new_name);
 
-kobject_rename does not perform any locking or have a solid notion of
+kobject_rename() does not perform any locking or have a solid notion of
 what names are valid so the caller must provide their own sanity checking
 and serialization.
 
@@ -210,7 +210,7 @@ statically and will warn the developer of this improper usage.
 If all that you want to use a kobject for is to provide a reference counter
 for your structure, please use the struct kref instead; a kobject would be
 overkill.  For more information on how to use struct kref, please see the
-file Documentation/kref.txt in the Linux kernel source tree.
+file Documentation/core-api/kref.rst in the Linux kernel source tree.
 
 
 Creating "simple" kobjects
@@ -222,17 +222,17 @@ ksets, show and store functions, and other details.  This is the one
 exception where a single kobject should be created.  To create such an
 entry, use the function::
 
-    struct kobject *kobject_create_and_add(char *name, struct kobject *parent);
+    struct kobject *kobject_create_and_add(const char *name, struct kobject *parent);
 
 This function will create a kobject and place it in sysfs in the location
 underneath the specified parent kobject.  To create simple attributes
 associated with this kobject, use::
 
-    int sysfs_create_file(struct kobject *kobj, struct attribute *attr);
+    int sysfs_create_file(struct kobject *kobj, const struct attribute *attr);
 
 or::
 
-    int sysfs_create_group(struct kobject *kobj, struct attribute_group *grp);
+    int sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp);
 
 Both types of attributes used here, with a kobject that has been created
 with the kobject_create_and_add(), can be of type kobj_attribute, so no
@@ -300,8 +300,10 @@ kobj_type::
             void (*release)(struct kobject *kobj);
             const struct sysfs_ops *sysfs_ops;
             struct attribute **default_attrs;
+            const struct attribute_group **default_groups;
             const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
             const void *(*namespace)(struct kobject *kobj);
+            void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid);
     };
 
 This structure is used to describe a particular type of kobject (or, more
@@ -352,12 +354,12 @@ created and never declared statically or on the stack.  To create a new
 kset use::
 
   struct kset *kset_create_and_add(const char *name,
-                                   struct kset_uevent_ops *u,
-                                   struct kobject *parent);
+                                   const struct kset_uevent_ops *uevent_ops,
+                                   struct kobject *parent_kobj);
 
 When you are finished with the kset, call::
 
-  void kset_unregister(struct kset *kset);
+  void kset_unregister(struct kset *k);
 
 to destroy it.  This removes the kset from sysfs and decrements its reference
 count.  When the reference count goes to zero, the kset will be released.
@@ -371,9 +373,9 @@ If a kset wishes to control the uevent operations of the kobjects
 associated with it, it can use the struct kset_uevent_ops to handle it::
 
   struct kset_uevent_ops {
-          int (*filter)(struct kset *kset, struct kobject *kobj);
-          const char *(*name)(struct kset *kset, struct kobject *kobj);
-          int (*uevent)(struct kset *kset, struct kobject *kobj,
+          int (* const filter)(struct kset *kset, struct kobject *kobj);
+          const char *(* const name)(struct kset *kset, struct kobject *kobj);
+          int (* const uevent)(struct kset *kset, struct kobject *kobj,
                         struct kobj_uevent_env *env);
   };
 
diff --git a/Documentation/kref.txt b/Documentation/core-api/kref.rst
index c61eea6f1bf2..c61eea6f1bf2 100644
--- a/Documentation/kref.txt
+++ b/Documentation/core-api/kref.rst
diff --git a/Documentation/core-api/printk-basics.rst b/Documentation/core-api/printk-basics.rst
new file mode 100644
index 000000000000..563a9ce5fe1d
--- /dev/null
+++ b/Documentation/core-api/printk-basics.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Message logging with printk
+===========================
+
+printk() is one of the most widely known functions in the Linux kernel. It's the
+standard tool we have for printing messages and usually the most basic way of
+tracing and debugging. If you're familiar with printf(3) you can tell printk()
+is based on it, although it has some functional differences:
+
+  - printk() messages can specify a log level.
+
+  - the format string, while largely compatible with C99, doesn't follow the
+    exact same specification. It has some extensions and a few limitations
+    (no ``%n`` or floating point conversion specifiers). See :ref:`How to get
+    printk format specifiers right <printk-specifiers>`.
+
+All printk() messages are printed to the kernel log buffer, which is a ring
+buffer exported to userspace through /dev/kmsg. The usual way to read it is
+using ``dmesg``.
+
+printk() is typically used like this::
+
+  printk(KERN_INFO "Message: %s\n", arg);
+
+where ``KERN_INFO`` is the log level (note that it's concatenated to the format
+string, the log level is not a separate argument). The available log levels are:
+
++----------------+--------+-----------------------------------------------+
+| Name           | String |  Alias function                               |
++================+========+===============================================+
+| KERN_EMERG     | "0"    | pr_emerg()                                    |
++----------------+--------+-----------------------------------------------+
+| KERN_ALERT     | "1"    | pr_alert()                                    |
++----------------+--------+-----------------------------------------------+
+| KERN_CRIT      | "2"    | pr_crit()                                     |
++----------------+--------+-----------------------------------------------+
+| KERN_ERR       | "3"    | pr_err()                                      |
++----------------+--------+-----------------------------------------------+
+| KERN_WARNING   | "4"    | pr_warn()                                     |
++----------------+--------+-----------------------------------------------+
+| KERN_NOTICE    | "5"    | pr_notice()                                   |
++----------------+--------+-----------------------------------------------+
+| KERN_INFO      | "6"    | pr_info()                                     |
++----------------+--------+-----------------------------------------------+
+| KERN_DEBUG     | "7"    | pr_debug() and pr_devel() if DEBUG is defined |
++----------------+--------+-----------------------------------------------+
+| KERN_DEFAULT   | ""     |                                               |
++----------------+--------+-----------------------------------------------+
+| KERN_CONT      | "c"    | pr_cont()                                     |
++----------------+--------+-----------------------------------------------+
+
+
+The log level specifies the importance of a message. The kernel decides whether
+to show the message immediately (printing it to the current console) depending
+on its log level and the current *console_loglevel* (a kernel variable). If the
+message priority is higher (lower log level value) than the *console_loglevel*
+the message will be printed to the console.
+
+If the log level is omitted, the message is printed with ``KERN_DEFAULT``
+level.
+
+You can check the current *console_loglevel* with::
+
+  $ cat /proc/sys/kernel/printk
+  4        4        1        7
+
+The result shows the *current*, *default*, *minimum* and *boot-time-default* log
+levels.
+
+To change the current console_loglevel simply write the the desired level to
+``/proc/sys/kernel/printk``. For example, to print all messages to the console::
+
+  # echo 8 > /proc/sys/kernel/printk
+
+Another way, using ``dmesg``::
+
+  # dmesg -n 5
+
+sets the console_loglevel to print KERN_WARNING (4) or more severe messages to
+console. See ``dmesg(1)`` for more information.
+
+As an alternative to printk() you can use the ``pr_*()`` aliases for
+logging. This family of macros embed the log level in the macro names. For
+example::
+
+  pr_info("Info message no. %d\n", msg_num);
+
+prints a ``KERN_INFO`` message.
+
+Besides being more concise than the equivalent printk() calls, they can use a
+common definition for the format string through the pr_fmt() macro. For
+instance, defining this at the top of a source file (before any ``#include``
+directive)::
+
+  #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+
+would prefix every pr_*() message in that file with the module and function name
+that originated the message.
+
+For debugging purposes there are also two conditionally-compiled macros:
+pr_debug() and pr_devel(), which are compiled-out unless ``DEBUG`` (or
+also ``CONFIG_DYNAMIC_DEBUG`` in the case of pr_debug()) is defined.
+
+
+Function reference
+==================
+
+.. kernel-doc:: kernel/printk/printk.c
+   :functions: printk
+
+.. kernel-doc:: include/linux/printk.h
+   :functions: pr_emerg pr_alert pr_crit pr_err pr_warn pr_notice pr_info
+      pr_fmt pr_debug pr_devel pr_cont
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 5dfcc4592b23..8c9aba262b1e 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -2,6 +2,8 @@
 How to get printk format specifiers right
 =========================================
 
+.. _printk-specifiers:
+
 :Author: Randy Dunlap <rdunlap@infradead.org>
 :Author: Andrew Murray <amurray@mpc-data.co.uk>
 
@@ -482,21 +484,23 @@ Examples (OF)::
 	%pfwf	/ocp@68000000/i2c@48072000/camera@10/port/endpoint - Full name
 	%pfwP	endpoint				- Node name
 
-Time and date (struct rtc_time)
--------------------------------
+Time and date
+-------------
 
 ::
 
-	%ptR		YYYY-mm-ddTHH:MM:SS
-	%ptRd		YYYY-mm-dd
-	%ptRt		HH:MM:SS
-	%ptR[dt][r]
+	%pt[RT]			YYYY-mm-ddTHH:MM:SS
+	%pt[RT]d		YYYY-mm-dd
+	%pt[RT]t		HH:MM:SS
+	%pt[RT][dt][r]
 
-For printing date and time as represented by struct rtc_time structure in
-human readable format.
+For printing date and time as represented by
+	R  struct rtc_time structure
+	T  time64_t type
+in human readable format.
 
-By default year will be incremented by 1900 and month by 1. Use %ptRr (raw)
-to suppress this behaviour.
+By default year will be incremented by 1900 and month by 1.
+Use %pt[RT]r (raw) to suppress this behaviour.
 
 Passed by reference.
 
diff --git a/Documentation/core-api/protection-keys.rst b/Documentation/core-api/protection-keys.rst
index 49d9833af871..ec575e72d0b2 100644
--- a/Documentation/core-api/protection-keys.rst
+++ b/Documentation/core-api/protection-keys.rst
@@ -5,8 +5,9 @@ Memory Protection Keys
 ======================
 
 Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
-which is found on Intel's Skylake "Scalable Processor" Server CPUs.
-It will be avalable in future non-server parts.
+which is found on Intel's Skylake (and later) "Scalable Processor"
+Server CPUs. It will be available in future non-server Intel parts
+and future AMD processors.
 
 For anyone wishing to test or use this feature, it is available in
 Amazon's EC2 C5 instances and is known to work there using an Ubuntu
diff --git a/Documentation/rbtree.txt b/Documentation/core-api/rbtree.rst
index 523d54b60087..523d54b60087 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/core-api/rbtree.rst
diff --git a/Documentation/dev-tools/kgdb.rst b/Documentation/dev-tools/kgdb.rst
index d38be58f872a..61293f40bc6e 100644
--- a/Documentation/dev-tools/kgdb.rst
+++ b/Documentation/dev-tools/kgdb.rst
@@ -274,6 +274,30 @@ don't like this are to hack gdb to send the :kbd:`SysRq-G` for you as well as
 on the initial connect, or to use a debugger proxy that allows an
 unmodified gdb to do the debugging.
 
+Kernel parameter: ``kgdboc_earlycon``
+-------------------------------------
+
+If you specify the kernel parameter ``kgdboc_earlycon`` and your serial
+driver registers a boot console that supports polling (doesn't need
+interrupts and implements a nonblocking read() function) kgdb will attempt
+to work using the boot console until it can transition to the regular
+tty driver specified by the ``kgdboc`` parameter.
+
+Normally there is only one boot console (especially that implements the
+read() function) so just adding ``kgdboc_earlycon`` on its own is
+sufficient to make this work. If you have more than one boot console you
+can add the boot console's name to differentiate. Note that names that
+are registered through the boot console layer and the tty layer are not
+the same for the same port.
+
+For instance, on one board to be explicit you might do::
+
+   kgdboc_earlycon=qcom_geni kgdboc=ttyMSM0
+
+If the only boot console on the device was "qcom_geni", you could simplify::
+
+   kgdboc_earlycon kgdboc=ttyMSM0
+
 Kernel parameter: ``kgdbwait``
 ------------------------------
 
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
index 9e90c2b00960..e73662c8d339 100644
--- a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
@@ -119,7 +119,7 @@ examples:
         panel@0 {
                 compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
                 reg = <0>;
-                power-gpios = <&pio 1 7 0>; /* PB07 */
+                power-supply = <&reg_display>;
                 reset-gpios = <&r_pio 0 5 1>; /* PL05 */
                 backlight = <&pwm_bl>;
         };
diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7123.txt b/Documentation/devicetree/bindings/display/bridge/adi,adv7123.txt
deleted file mode 100644
index d3c2a4914ea2..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/adi,adv7123.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-Analog Devices ADV7123 Video DAC
---------------------------------
-
-The ADV7123 is a digital-to-analog converter that outputs VGA signals from a
-parallel video input.
-
-Required properties:
-
-- compatible: Should be "adi,adv7123"
-
-Optional properties:
-
-- psave-gpios: Power save control GPIO
-
-Required nodes:
-
-The ADV7123 has two video ports. Their connections are modeled using the OF
-graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for DPI input
-- Video port 1 for VGA output
-
-
-Example
--------
-
-	adv7123: encoder@0 {
-		compatible = "adi,adv7123";
-
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			port@0 {
-				reg = <0>;
-
-				adv7123_in: endpoint@0 {
-					remote-endpoint = <&dpi_out>;
-				};
-			};
-
-			port@1 {
-				reg = <1>;
-
-				adv7123_out: endpoint@0 {
-					remote-endpoint = <&vga_connector_in>;
-				};
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
index c21103869923..8c0e4f285fbc 100644
--- a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
@@ -37,6 +37,12 @@ properties:
     type: object
 
     properties:
+      '#address-cells':
+        const: 1
+
+      '#size-cells':
+        const: 0
+
       port@0:
         type: object
         description: |
@@ -51,6 +57,8 @@ properties:
     required:
       - port@0
 
+    additionalProperties: false
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml b/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml
new file mode 100644
index 000000000000..9f38f55fc990
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2019,2020 Lubomir Rintel <lkundrak@v3.sk>
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/chrontel,ch7033.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Chrontel CH7033 Video Encoder Device Tree Bindings
+
+maintainers:
+  - Lubomir Rintel <lkundrak@v3.sk>
+
+properties:
+  compatible:
+    const: chrontel,ch7033
+
+  reg:
+    maxItems: 1
+    description: I2C address of the device
+
+  ports:
+    type: object
+
+    properties:
+      port@0:
+        type: object
+        description: |
+          Video port for RGB input.
+
+      port@1:
+        type: object
+        description: |
+          DVI port, should be connected to a node compatible with the
+          dvi-connector binding.
+
+    required:
+      - port@0
+      - port@1
+
+required:
+  - compatible
+  - reg
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        vga-dvi-encoder@76 {
+            compatible = "chrontel,ch7033";
+            reg = <0x76>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    endpoint {
+                        remote-endpoint = <&lcd0_rgb_out>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    endpoint {
+                        remote-endpoint = <&dvi_in>;
+                    };
+                };
+
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt b/Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt
deleted file mode 100644
index 164cbb15f04c..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-Dumb RGB to VGA DAC bridge
----------------------------
-
-This binding is aimed for dumb RGB to VGA DAC based bridges that do not require
-any configuration.
-
-Required properties:
-
-- compatible: Must be "dumb-vga-dac"
-
-Required nodes:
-
-This device has two video ports. Their connections are modelled using the OF
-graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for RGB input
-- Video port 1 for VGA output
-
-Optional properties:
-- vdd-supply: Power supply for DAC
-
-Example
--------
-
-bridge {
-	compatible = "dumb-vga-dac";
-	#address-cells = <1>;
-	#size-cells = <0>;
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		port@0 {
-			reg = <0>;
-
-			vga_bridge_in: endpoint {
-				remote-endpoint = <&tcon0_out_vga>;
-			};
-		};
-
-		port@1 {
-			reg = <1>;
-
-			vga_bridge_out: endpoint {
-				remote-endpoint = <&vga_con_in>;
-			};
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt b/Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt
deleted file mode 100644
index b13adf30b8d3..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Synopsys DesignWare MIPI DSI host controller
-============================================
-
-This document defines device tree properties for the Synopsys DesignWare MIPI
-DSI host controller. It doesn't constitue a device tree binding specification
-by itself but is meant to be referenced by platform-specific device tree
-bindings.
-
-When referenced from platform device tree bindings the properties defined in
-this document are defined as follows. The platform device tree bindings are
-responsible for defining whether each optional property is used or not.
-
-- reg: Memory mapped base address and length of the DesignWare MIPI DSI
-  host controller registers. (mandatory)
-
-- clocks: References to all the clocks specified in the clock-names property
-  as specified in [1]. (mandatory)
-
-- clock-names:
-  - "pclk" is the peripheral clock for either AHB and APB. (mandatory)
-  - "px_clk" is the pixel clock for the DPI/RGB input. (optional)
-
-- resets: References to all the resets specified in the reset-names property
-  as specified in [2]. (optional)
-
-- reset-names: string reset name, must be "apb" if used. (optional)
-
-- panel or bridge node: see [3]. (mandatory)
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/reset/reset.txt
-[3] Documentation/devicetree/bindings/display/mipi-dsi-bus.txt
diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
new file mode 100644
index 000000000000..2c500166c65d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/ite,it6505.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ITE it6505 Device Tree Bindings
+
+maintainers:
+  - Allen Chen <allen.chen@ite.com.tw>
+
+description: |
+  The IT6505 is a high-performance DisplayPort 1.1a transmitter,
+  fully compliant with DisplayPort 1.1a, HDCP 1.3 specifications.
+  The IT6505 supports color depth of up to 36 bits (12 bits/color)
+  and ensures robust transmission of high-quality uncompressed video
+  content, along with uncompressed and compressed digital audio content.
+
+  Aside from the various video output formats supported, the IT6505
+  also encodes and transmits up to 8 channels of I2S digital audio,
+  with sampling rate up to 192kHz and sample size up to 24 bits.
+  In addition, an S/PDIF input port takes in compressed audio of up to
+  192kHz frame rate.
+
+  Each IT6505 chip comes preprogrammed with an unique HDCP key,
+  in compliance with the HDCP 1.3 standard so as to provide secure
+  transmission of high-definition content. Users of the IT6505 need not
+  purchase any HDCP keys or ROMs.
+
+properties:
+  compatible:
+    const: ite,it6505
+
+  ovdd-supply:
+    maxItems: 1
+    description: I/O voltage
+
+  pwr18-supply:
+    maxItems: 1
+    description: core voltage
+
+  interrupts:
+    maxItems: 1
+    description: interrupt specifier of INT pin
+
+  reset-gpios:
+    maxItems: 1
+    description: gpio specifier of RESET pin
+
+  extcon:
+    maxItems: 1
+    description: extcon specifier for the Power Delivery
+
+  port:
+    type: object
+    description: A port node pointing to DPI host port node
+
+required:
+  - compatible
+  - ovdd-supply
+  - pwr18-supply
+  - interrupts
+  - reset-gpios
+  - extcon
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        dp-bridge@5c {
+            compatible = "ite,it6505";
+            interrupts = <152 IRQ_TYPE_EDGE_FALLING 152 0>;
+            reg = <0x5c>;
+            pinctrl-names = "default";
+            pinctrl-0 = <&it6505_pins>;
+            ovdd-supply = <&mt6358_vsim1_reg>;
+            pwr18-supply = <&it6505_pp18_reg>;
+            reset-gpios = <&pio 179 1>;
+            extcon = <&usbc_extcon>;
+
+            port {
+                it6505_in: endpoint {
+                    remote-endpoint = <&dpi_out>;
+                };
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
index 8f373029f5d2..800c63764e71 100644
--- a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
@@ -50,6 +50,12 @@ properties:
       This device has two video ports. Their connections are modeled using the
       OF graph bindings specified in Documentation/devicetree/bindings/graph.txt
     properties:
+      '#address-cells':
+        const: 1
+
+      '#size-cells':
+        const: 0
+
       port@0:
         type: object
         description: |
@@ -66,6 +72,8 @@ properties:
       - port@0
       - port@1
 
+    additionalProperties: false
+
   powerdown-gpios:
     description:
       The GPIO used to control the power down line of this device.
diff --git a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml
new file mode 100644
index 000000000000..8aff2d68fc33
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml
@@ -0,0 +1,226 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/nwl-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Northwest Logic MIPI-DSI controller on i.MX SoCs
+
+maintainers:
+  - Guido Gúnther <agx@sigxcpu.org>
+  - Robert Chiras <robert.chiras@nxp.com>
+
+description: |
+  NWL MIPI-DSI host controller found on i.MX8 platforms. This is a dsi bridge for
+  the SOCs NWL MIPI-DSI host controller.
+
+properties:
+  compatible:
+    const: fsl,imx8mq-nwl-dsi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
+
+  clocks:
+    items:
+      - description: DSI core clock
+      - description: RX_ESC clock (used in escape mode)
+      - description: TX_ESC clock (used in escape mode)
+      - description: PHY_REF clock
+      - description: LCDIF clock
+
+  clock-names:
+    items:
+      - const: core
+      - const: rx_esc
+      - const: tx_esc
+      - const: phy_ref
+      - const: lcdif
+
+  mux-controls:
+    description:
+      mux controller node to use for operating the input mux
+
+  phys:
+    maxItems: 1
+    description:
+      A phandle to the phy module representing the DPHY
+
+  phy-names:
+    items:
+      - const: dphy
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    items:
+      - description: dsi byte reset line
+      - description: dsi dpi reset line
+      - description: dsi esc reset line
+      - description: dsi pclk reset line
+
+  reset-names:
+    items:
+      - const: byte
+      - const: dpi
+      - const: esc
+      - const: pclk
+
+  ports:
+    type: object
+    description:
+      A node containing DSI input & output port nodes with endpoint
+      definitions as documented in
+      Documentation/devicetree/bindings/graph.txt.
+    properties:
+      port@0:
+        type: object
+        description:
+          Input port node to receive pixel data from the
+          display controller. Exactly one endpoint must be
+          specified.
+        properties:
+          '#address-cells':
+            const: 1
+
+          '#size-cells':
+            const: 0
+
+          endpoint@0:
+            description: sub-node describing the input from LCDIF
+            type: object
+
+          endpoint@1:
+            description: sub-node describing the input from DCSS
+            type: object
+
+          reg:
+            const: 0
+
+        required:
+          - '#address-cells'
+          - '#size-cells'
+          - reg
+
+        oneOf:
+          - required:
+              - endpoint@0
+          - required:
+              - endpoint@1
+
+        additionalProperties: false
+
+      port@1:
+        type: object
+        description:
+          DSI output port node to the panel or the next bridge
+          in the chain
+
+      '#address-cells':
+        const: 1
+
+      '#size-cells':
+        const: 0
+
+    required:
+      - '#address-cells'
+      - '#size-cells'
+      - port@0
+      - port@1
+
+    additionalProperties: false
+
+patternProperties:
+  "^panel@[0-9]+$":
+    type: object
+
+required:
+  - '#address-cells'
+  - '#size-cells'
+  - clock-names
+  - clocks
+  - compatible
+  - interrupts
+  - mux-controls
+  - phy-names
+  - phys
+  - ports
+  - reg
+  - reset-names
+  - resets
+
+additionalProperties: false
+
+examples:
+ - |
+
+   #include <dt-bindings/clock/imx8mq-clock.h>
+   #include <dt-bindings/interrupt-controller/arm-gic.h>
+   #include <dt-bindings/reset/imx8mq-reset.h>
+
+   mipi_dsi: mipi_dsi@30a00000 {
+              #address-cells = <1>;
+              #size-cells = <0>;
+              compatible = "fsl,imx8mq-nwl-dsi";
+              reg = <0x30A00000 0x300>;
+              clocks = <&clk IMX8MQ_CLK_DSI_CORE>,
+                       <&clk IMX8MQ_CLK_DSI_AHB>,
+                       <&clk IMX8MQ_CLK_DSI_IPG_DIV>,
+                       <&clk IMX8MQ_CLK_DSI_PHY_REF>,
+                       <&clk IMX8MQ_CLK_LCDIF_PIXEL>;
+              clock-names = "core", "rx_esc", "tx_esc", "phy_ref", "lcdif";
+              interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+              mux-controls = <&mux 0>;
+              power-domains = <&pgc_mipi>;
+              resets = <&src IMX8MQ_RESET_MIPI_DSI_RESET_BYTE_N>,
+                       <&src IMX8MQ_RESET_MIPI_DSI_DPI_RESET_N>,
+                       <&src IMX8MQ_RESET_MIPI_DSI_ESC_RESET_N>,
+                       <&src IMX8MQ_RESET_MIPI_DSI_PCLK_RESET_N>;
+              reset-names = "byte", "dpi", "esc", "pclk";
+              phys = <&dphy>;
+              phy-names = "dphy";
+
+              panel@0 {
+                      #address-cells = <1>;
+                      #size-cells = <0>;
+                      compatible = "rocktech,jh057n00900";
+                      reg = <0>;
+                      port@0 {
+                           reg = <0>;
+                           panel_in: endpoint {
+                                     remote-endpoint = <&mipi_dsi_out>;
+                           };
+                      };
+              };
+
+              ports {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    port@0 {
+                           #size-cells = <0>;
+                           #address-cells = <1>;
+                           reg = <0>;
+                           mipi_dsi_in: endpoint@0 {
+                                        reg = <0>;
+                                        remote-endpoint = <&lcdif_mipi_dsi>;
+                           };
+                    };
+                    port@1 {
+                           reg = <1>;
+                           mipi_dsi_out: endpoint {
+                                         remote-endpoint = <&panel_in>;
+                           };
+                    };
+              };
+      };
diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
index 5dff93641bea..7e27cfcf770d 100644
--- a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
@@ -50,6 +50,12 @@ properties:
       Documentation/devicetree/bindings/media/video-interfaces.txt
       Documentation/devicetree/bindings/graph.txt
     properties:
+      '#address-cells':
+        const: 1
+
+      '#size-cells':
+        const: 0
+
       port@0:
         type: object
         description: |
@@ -63,6 +69,8 @@ properties:
     required:
       - port@0
 
+    additionalProperties: false
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml
new file mode 100644
index 000000000000..0880cbf217d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/simple-bridge.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Transparent non-programmable DRM bridges
+
+maintainers:
+  - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+  - Maxime Ripard <mripard@kernel.org>
+
+description: |
+  This binding supports transparent non-programmable bridges that don't require
+  any configuration, with a single input and a single output.
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+        - enum:
+          - ti,ths8134a
+          - ti,ths8134b
+        - const: ti,ths8134
+      - enum:
+        - adi,adv7123
+        - dumb-vga-dac
+        - ti,opa362
+        - ti,ths8134
+        - ti,ths8135
+
+  ports:
+    type: object
+    description: |
+      This device has two video ports. Their connections are modeled using the
+      OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.
+    properties:
+      '#address-cells':
+        const: 1
+
+      '#size-cells':
+        const: 0
+
+      port@0:
+        type: object
+        description: The bridge input
+
+      port@1:
+        type: object
+        description: The bridge output
+
+    required:
+      - port@0
+      - port@1
+
+    additionalProperties: false
+
+  enable-gpios:
+    maxItems: 1
+    description: GPIO controlling bridge enable
+
+  vdd-supply:
+    maxItems: 1
+    description: Power supply for the bridge
+
+required:
+  - compatible
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    bridge {
+        compatible = "ti,ths8134a", "ti,ths8134";
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+
+                vga_bridge_in: endpoint {
+                    remote-endpoint = <&tcon0_out_vga>;
+                };
+            };
+
+            port@1 {
+                reg = <1>;
+
+                vga_bridge_out: endpoint {
+                    remote-endpoint = <&vga_con_in>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml
new file mode 100644
index 000000000000..012aa8e7cb8c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/snps,dw-mipi-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DesignWare MIPI DSI host controller
+
+maintainers:
+  - Philippe CORNU <philippe.cornu@st.com>
+
+description: |
+  This document defines device tree properties for the Synopsys DesignWare MIPI
+  DSI host controller. It doesn't constitue a device tree binding specification
+  by itself but is meant to be referenced by platform-specific device tree
+  bindings.
+
+  When referenced from platform device tree bindings the properties defined in
+  this document are defined as follows. The platform device tree bindings are
+  responsible for defining whether each property is required or optional.
+
+allOf:
+  - $ref: ../dsi-controller.yaml#
+
+properties:
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Module clock
+      - description: DSI bus clock for either AHB and APB
+      - description: Pixel clock for the DPI/RGB input
+    minItems: 2
+
+  clock-names:
+    items:
+      - const: ref
+      - const: pclk
+      - const: px_clk
+    minItems: 2
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    const: apb
+
+  ports:
+    type: object
+
+    properties:
+      port@0:
+        type: object
+        description: Input node to receive pixel data.
+      port@1:
+        type: object
+        description: DSI output node to panel.
+
+    required:
+      - port@0
+      - port@1
+
+required:
+  - clock-names
+  - clocks
+  - ports
+  - reg
diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
deleted file mode 100644
index d17d1e5820d7..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Thine Electronics THC63LVD1024 LVDS decoder
--------------------------------------------
-
-The THC63LVD1024 is a dual link LVDS receiver designed to convert LVDS streams
-to parallel data outputs. The chip supports single/dual input/output modes,
-handling up to two LVDS input streams and up to two digital CMOS/TTL outputs.
-
-Single or dual operation mode, output data mapping and DDR output modes are
-configured through input signals and the chip does not expose any control bus.
-
-Required properties:
-- compatible: Shall be "thine,thc63lvd1024"
-- vcc-supply: Power supply for TTL output, TTL CLOCKOUT signal, LVDS input,
-  PPL and digital circuitry
-
-Optional properties:
-- powerdown-gpios: Power down GPIO signal, pin name "/PDWN". Active low
-- oe-gpios: Output enable GPIO signal, pin name "OE". Active high
-
-The THC63LVD1024 video port connections are modeled according
-to OF graph bindings specified by Documentation/devicetree/bindings/graph.txt
-
-Required video port nodes:
-- port@0: First LVDS input port
-- port@2: First digital CMOS/TTL parallel output
-
-Optional video port nodes:
-- port@1: Second LVDS input port
-- port@3: Second digital CMOS/TTL parallel output
-
-The device can operate in single-link mode or dual-link mode. In single-link
-mode, all pixels are received on port@0, and port@1 shall not contain any
-endpoint. In dual-link mode, even-numbered pixels are received on port@0 and
-odd-numbered pixels on port@1, and both port@0 and port@1 shall contain
-endpoints.
-
-Example:
---------
-
-	thc63lvd1024: lvds-decoder {
-		compatible = "thine,thc63lvd1024";
-
-		vcc-supply = <&reg_lvds_vcc>;
-		powerdown-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>;
-
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			port@0 {
-				reg = <0>;
-
-				lvds_dec_in_0: endpoint {
-					remote-endpoint = <&lvds_out>;
-				};
-			};
-
-			port@2{
-				reg = <2>;
-
-				lvds_dec_out_2: endpoint {
-					remote-endpoint = <&adv7511_in>;
-				};
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml
new file mode 100644
index 000000000000..469ac4a34273
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/thine,thc63lvd1024.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Thine Electronics THC63LVD1024 LVDS Decoder
+
+maintainers:
+  - Jacopo Mondi <jacopo+renesas@jmondi.org>
+  - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+
+description: |
+  The THC63LVD1024 is a dual link LVDS receiver designed to convert LVDS
+  streams to parallel data outputs. The chip supports single/dual input/output
+  modes, handling up to two LVDS input streams and up to two digital CMOS/TTL
+  outputs.
+
+  Single or dual operation mode, output data mapping and DDR output modes are
+  configured through input signals and the chip does not expose any control
+  bus.
+
+properties:
+  compatible:
+    const: thine,thc63lvd1024
+
+  ports:
+    type: object
+    description: |
+      This device has four video ports. Their connections are modeled using the
+      OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.
+
+      The device can operate in single-link mode or dual-link mode. In
+      single-link mode, all pixels are received on port@0, and port@1 shall not
+      contain any endpoint. In dual-link mode, even-numbered pixels are
+      received on port@0 and odd-numbered pixels on port@1, and both port@0 and
+      port@1 shall contain endpoints.
+
+    properties:
+      '#address-cells':
+        const: 1
+
+      '#size-cells':
+        const: 0
+
+      port@0:
+        type: object
+        description: First LVDS input port
+
+      port@1:
+        type: object
+        description: Second LVDS input port
+
+      port@2:
+        type: object
+        description: First digital CMOS/TTL parallel output
+
+      port@3:
+        type: object
+        description: Second digital CMOS/TTL parallel output
+
+    required:
+      - port@0
+      - port@2
+
+    additionalProperties: false
+
+  oe-gpios:
+    maxItems: 1
+    description: Output enable GPIO signal, pin name "OE", active high.
+
+  powerdown-gpios:
+    maxItems: 1
+    description: Power down GPIO signal, pin name "/PDWN", active low.
+
+  vcc-supply:
+    maxItems: 1
+    description:
+      Power supply for the TTL output, TTL CLOCKOUT signal, LVDS input, PLL and
+      digital circuitry.
+
+required:
+  - compatible
+  - ports
+  - vcc-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    lvds-decoder {
+        compatible = "thine,thc63lvd1024";
+
+        vcc-supply = <&reg_lvds_vcc>;
+        powerdown-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+
+                lvds_dec_in_0: endpoint {
+                    remote-endpoint = <&lvds_out>;
+                };
+            };
+
+            port@2 {
+                reg = <2>;
+
+                lvds_dec_out_2: endpoint {
+                    remote-endpoint = <&adv7511_in>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt b/Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt
deleted file mode 100644
index df3d7c1ac09e..000000000000
--- a/Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-THS8134 and THS8135 Video DAC
------------------------------
-
-This is the binding for Texas Instruments THS8134, THS8134A, THS8134B and
-THS8135 Video DAC bridges.
-
-Required properties:
-
-- compatible: Must be one of
-  "ti,ths8134"
-  "ti,ths8134a," "ti,ths8134"
-  "ti,ths8134b", "ti,ths8134"
-  "ti,ths8135"
-
-Required nodes:
-
-This device has two video ports. Their connections are modelled using the OF
-graph bindings specified in Documentation/devicetree/bindings/graph.txt.
-
-- Video port 0 for RGB input
-- Video port 1 for VGA output
-
-Example
--------
-
-vga-bridge {
-	compatible = "ti,ths8135";
-	#address-cells = <1>;
-	#size-cells = <0>;
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		port@0 {
-			reg = <0>;
-
-			vga_bridge_in: endpoint {
-				remote-endpoint = <&lcdc_out_vga>;
-			};
-		};
-
-		port@1 {
-			reg = <1>;
-
-			vga_bridge_out: endpoint {
-				remote-endpoint = <&vga_con_in>;
-			};
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/dsi-controller.yaml b/Documentation/devicetree/bindings/display/dsi-controller.yaml
index fd986c36c737..85b71b1fd28a 100644
--- a/Documentation/devicetree/bindings/display/dsi-controller.yaml
+++ b/Documentation/devicetree/bindings/display/dsi-controller.yaml
@@ -28,7 +28,7 @@ description: |
 
 properties:
   $nodename:
-    pattern: "^dsi-controller(@.*)?$"
+    pattern: "^dsi(@.*)?$"
 
   "#address-cells":
     const: 1
@@ -76,7 +76,7 @@ patternProperties:
 examples:
   - |
     #include <dt-bindings/gpio/gpio.h>
-    dsi-controller@a0351000 {
+    dsi@a0351000 {
         reg = <0xa0351000 0x1000>;
         #address-cells = <1>;
         #size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt
index 58914cf681b8..77def4456706 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt
@@ -17,6 +17,9 @@ Required properties:
   Documentation/devicetree/bindings/graph.txt. This port should be connected
   to the input port of an attached HDMI or LVDS encoder chip.
 
+Optional properties:
+- pinctrl-names: Contain "default" and "sleep".
+
 Example:
 
 dpi0: dpi@1401d000 {
@@ -27,6 +30,9 @@ dpi0: dpi@1401d000 {
 		 <&mmsys CLK_MM_DPI_ENGINE>,
 		 <&apmixedsys CLK_APMIXED_TVDPLL>;
 	clock-names = "pixel", "engine", "pll";
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&dpi_pin_func>;
+	pinctrl-1 = <&dpi_pin_idle>;
 
 	port {
 		dpi0_out: endpoint {
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt
index a19a6cc375ed..8e4729de8c85 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt
@@ -33,6 +33,13 @@ Required properties:
 - #clock-cells: must be <0>;
 - #phy-cells: must be <0>.
 
+Optional properties:
+- drive-strength-microamp: adjust driving current, should be 3000 ~ 6000. And
+						   the step is 200.
+- nvmem-cells: A phandle to the calibration data provided by a nvmem device. If
+               unspecified default values shall be used.
+- nvmem-cell-names: Should be "calibration-data"
+
 Example:
 
 mipi_tx0: mipi-dphy@10215000 {
@@ -42,6 +49,9 @@ mipi_tx0: mipi-dphy@10215000 {
 	clock-output-names = "mipi_tx0_pll";
 	#clock-cells = <0>;
 	#phy-cells = <0>;
+	drive-strength-microamp = <4600>;
+	nvmem-cells= <&mipi_tx_calibration>;
+	nvmem-cell-names = "calibration-data";
 };
 
 dsi0: dsi@1401b000 {
diff --git a/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt b/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt
deleted file mode 100644
index 0601a9e34703..000000000000
--- a/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-ARM Versatile TFT Panels
-
-These panels are connected to the daughterboards found on the
-ARM Versatile reference designs.
-
-This device node must appear as a child to a "syscon"-compatible
-node.
-
-Required properties:
-- compatible: should be "arm,versatile-tft-panel"
-
-Required subnodes:
-- port: see display/panel/panel-common.yaml, graph.txt
-
-
-Example:
-
-sysreg@0 {
-	compatible = "arm,versatile-sysreg", "syscon", "simple-mfd";
-	reg = <0x00000 0x1000>;
-
-	panel: display@0 {
-		compatible = "arm,versatile-tft-panel";
-
-		port {
-			panel_in: endpoint {
-				remote-endpoint = <&foo>;
-			};
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.yaml b/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.yaml
new file mode 100644
index 000000000000..41fd5713c156
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/arm,versatile-tft-panel.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Versatile TFT Panels
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+  These panels are connected to the daughterboards found on the
+  ARM Versatile reference designs.
+
+  This device node must appear as a child to a "syscon"-compatible
+  node.
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: arm,versatile-tft-panel
+
+  port: true
+
+required:
+  - compatible
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    sysreg {
+        compatible = "arm,versatile-sysreg", "syscon", "simple-mfd";
+        reg = <0x00000 0x1000>;
+
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel {
+            compatible = "arm,versatile-tft-panel";
+
+            port {
+                panel_in: endpoint {
+                    remote-endpoint = <&foo>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml
new file mode 100644
index 000000000000..083d2b9d0c69
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/asus,z00t-tm5p5-nt35596.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ASUS Z00T TM5P5 NT35596 5.5" 1080×1920 LCD Panel
+
+maintainers:
+  - Konrad Dybcio <konradybcio@gmail.com>
+
+description: |+
+  This panel seems to only be found in the Asus Z00T
+  smartphone and we have no straightforward way of
+  actually getting the correct model number,
+  as no schematics are released publicly.
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: asus,z00t-tm5p5-n35596
+  reg: true
+  reset-gpios: true
+  vdd-supply:
+     description: core voltage supply
+  vddio-supply:
+     description: vddio supply
+
+required:
+  - compatible
+  - reg
+  - vdd-supply
+  - vddio-supply
+  - reset-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+            #address-cells = <1>;
+            #size-cells = <0>;
+            panel@0 {
+                    reg = <0>;
+
+                    compatible = "asus,z00t-tm5p5-n35596";
+
+                    vdd-supply = <&pm8916_l8>;
+                    vddio-supply = <&pm8916_l6>;
+                    reset-gpios = <&msmgpio 25 GPIO_ACTIVE_HIGH>;
+            };
+    };
diff --git a/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt b/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt
deleted file mode 100644
index 3caea2172b1b..000000000000
--- a/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Boe Himax8279d 1200x1920 TFT LCD panel
-
-Required properties:
-- compatible: should be "boe,himax8279d8p" and one of: "boe,himax8279d10p"
-- reg: DSI virtual channel of the peripheral
-- enable-gpios: panel enable gpio
-- pp33-gpios: a GPIO phandle for the 3.3v pin that provides the supply voltage
-- pp18-gpios: a GPIO phandle for the 1.8v pin that provides the supply voltage
-
-Optional properties:
-- backlight: phandle of the backlight device attached to the panel
-
-Example:
-
-	&mipi_dsi {
-		panel {
-			compatible = "boe,himax8279d8p", "boe,himax8279d10p";
-			reg = <0>;
-			backlight = <&backlight>;
-			enable-gpios = <&gpio 45 GPIO_ACTIVE_HIGH>;
-			pp33-gpios = <&gpio 35 GPIO_ACTIVE_HIGH>;
-			pp18-gpios = <&gpio 36 GPIO_ACTIVE_HIGH>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/boe,himax8279d.yaml b/Documentation/devicetree/bindings/display/panel/boe,himax8279d.yaml
new file mode 100644
index 000000000000..272a3a018a33
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/boe,himax8279d.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/boe,himax8279d.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Boe Himax8279d 1200x1920 TFT LCD panel
+
+maintainers:
+  - Jerry Han <jerry.han.hq@gmail.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    items:
+      - const: boe,himax8279d8p
+      - const: boe,himax8279d10p
+
+  backlight: true
+  enable-gpios: true
+  reg: true
+
+  pp33-gpios:
+    maxItems: 1
+    description: GPIO for the 3.3v pin that provides the supply voltage
+
+  pp18-gpios:
+    maxItems: 1
+    description: GPIO for the 1.8v pin that provides the supply voltage
+
+required:
+  - compatible
+  - reg
+  - enable-gpios
+  - pp33-gpios
+  - pp18-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        panel@0 {
+            compatible = "boe,himax8279d8p", "boe,himax8279d10p";
+            reg = <0>;
+            backlight = <&backlight>;
+            enable-gpios = <&gpio 45 GPIO_ACTIVE_HIGH>;
+            pp33-gpios = <&gpio 35 GPIO_ACTIVE_HIGH>;
+            pp18-gpios = <&gpio 36 GPIO_ACTIVE_HIGH>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml b/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml
index 740213459134..7f5df5851017 100644
--- a/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml
+++ b/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml
@@ -24,6 +24,8 @@ properties:
       - boe,tv101wum-n53
         # AUO B101UAN08.3 10.1" WUXGA TFT LCD panel
       - auo,b101uan08.3
+        # BOE TV105WUM-NW0 10.5" WUXGA TFT LCD panel
+      - boe,tv105wum-nw0
 
   reg:
     description: the virtual channel number of a DSI peripheral
diff --git a/Documentation/devicetree/bindings/display/panel/display-timings.yaml b/Documentation/devicetree/bindings/display/panel/display-timings.yaml
index c8c0c9cb0492..56903ded005e 100644
--- a/Documentation/devicetree/bindings/display/panel/display-timings.yaml
+++ b/Documentation/devicetree/bindings/display/panel/display-timings.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/panel/display-timings.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: display timing bindings
+title: display timings bindings
 
 maintainers:
   - Thierry Reding <thierry.reding@gmail.com>
@@ -14,7 +14,7 @@ maintainers:
 description: |
   A display panel may be able to handle several display timings,
   with different resolutions.
-  The display-timings node makes it possible to specify the timing
+  The display-timings node makes it possible to specify the timings
   and to specify the timing that is native for the display.
 
 properties:
@@ -25,8 +25,8 @@ properties:
     $ref: /schemas/types.yaml#/definitions/phandle
     description: |
       The default display timing is the one specified as native-mode.
-      If no native-mode is specified then the first node is assumed the
-      native mode.
+      If no native-mode is specified then the first node is assumed
+      to be the native mode.
 
 patternProperties:
   "^timing":
diff --git a/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.txt b/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.txt
deleted file mode 100644
index 82caa7b65ae8..000000000000
--- a/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Feiyang FY07024DI26A30-D 7" MIPI-DSI LCD Panel
-
-Required properties:
-- compatible: must be "feiyang,fy07024di26a30d"
-- reg: DSI virtual channel used by that screen
-- avdd-supply: analog regulator dc1 switch
-- dvdd-supply: 3v3 digital regulator
-- reset-gpios: a GPIO phandle for the reset pin
-
-Optional properties:
-- backlight: phandle for the backlight control.
-
-panel@0 {
-	compatible = "feiyang,fy07024di26a30d";
-	reg = <0>;
-	avdd-supply = <&reg_dc1sw>;
-	dvdd-supply = <&reg_dldo2>;
-	reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* LCD-RST: PD24 */
-	backlight = <&backlight>;
-};
diff --git a/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml b/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml
new file mode 100644
index 000000000000..95acf9e96f1c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/feiyang,fy07024di26a30d.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Feiyang FY07024DI26A30-D 7" MIPI-DSI LCD Panel
+
+maintainers:
+  - Jagan Teki <jagan@amarulasolutions.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: feiyang,fy07024di26a30d
+
+  reg:
+    description: DSI virtual channel used by that screen
+    maxItems: 1
+
+  avdd-supply:
+    description: analog regulator dc1 switch
+
+  dvdd-supply:
+    description: 3v3 digital regulator
+
+  reset-gpios: true
+
+  backlight: true
+
+required:
+  - compatible
+  - reg
+  - avdd-supply
+  - dvdd-supply
+  - reset-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "feiyang,fy07024di26a30d";
+            reg = <0>;
+            avdd-supply = <&reg_dc1sw>;
+            dvdd-supply = <&reg_dldo2>;
+            reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* LCD-RST: PD24 */
+            backlight = <&backlight>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.txt b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.txt
deleted file mode 100644
index 3d5ce6ad6ec7..000000000000
--- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Ilitek ILI9322 TFT panel driver with SPI control bus
-
-This is a driver for 320x240 TFT panels, accepting a variety of input
-streams that get adapted and scaled to the panel. The panel output has
-960 TFT source driver pins and 240 TFT gate driver pins, VCOM, VCOML and
-VCOMH outputs.
-
-Required properties:
-  - compatible: "dlink,dir-685-panel", "ilitek,ili9322"
-    (full system-specific compatible is always required to look up configuration)
-  - reg: address of the panel on the SPI bus
-
-Optional properties:
-  - vcc-supply: core voltage supply, see regulator/regulator.txt
-  - iovcc-supply: voltage supply for the interface input/output signals,
-    see regulator/regulator.txt
-  - vci-supply: voltage supply for analog parts, see regulator/regulator.txt
-  - reset-gpios: a GPIO spec for the reset pin, see gpio/gpio.txt
-
-  The following optional properties only apply to RGB and YUV input modes and
-  can be omitted for BT.656 input modes:
-
-  - pixelclk-active: see display/panel/display-timing.txt
-  - de-active: see display/panel/display-timing.txt
-  - hsync-active: see display/panel/display-timing.txt
-  - vsync-active: see display/panel/display-timing.txt
-
-The panel must obey the rules for a SPI slave device as specified in
-spi/spi-bus.txt
-
-The device node can contain one 'port' child node with one child
-'endpoint' node, according to the bindings defined in
-media/video-interfaces.txt. This node should describe panel's video bus.
-
-Example:
-
-panel: display@0 {
-	compatible = "dlink,dir-685-panel", "ilitek,ili9322";
-	reg = <0>;
-	vcc-supply = <&vdisp>;
-	iovcc-supply = <&vdisp>;
-	vci-supply = <&vdisp>;
-
-	port {
-		panel_in: endpoint {
-			remote-endpoint = <&display_out>;
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
new file mode 100644
index 000000000000..177d48c5bd97
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/ilitek,ili9322.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ilitek ILI9322 TFT panel driver with SPI control bus
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+  This is a driver for 320x240 TFT panels, accepting a variety of input
+  streams that get adapted and scaled to the panel. The panel output has
+  960 TFT source driver pins and 240 TFT gate driver pins, VCOM, VCOML and
+  VCOMH outputs.
+
+  The panel must obey the rules for a SPI slave device as specified in
+  spi/spi-controller.yaml
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+        - dlink,dir-685-panel
+
+      - const: ilitek,ili9322
+
+  reset-gpios: true
+  port: true
+
+  vcc-supply:
+    description: Core voltage supply
+
+  iovcc-supply:
+    description: Voltage supply for the interface input/output signals
+
+  vci-supply:
+    description: Voltage supply for analog parts
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel: display@0 {
+            compatible = "dlink,dir-685-panel", "ilitek,ili9322";
+            reg = <0>;
+            vcc-supply = <&vdisp>;
+            iovcc-supply = <&vdisp>;
+            vci-supply = <&vdisp>;
+
+            port {
+                panel_in: endpoint {
+                    remote-endpoint = <&display_out>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.txt b/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.txt
deleted file mode 100644
index 4a041acb4e18..000000000000
--- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Ilitek ILI9881c based MIPI-DSI panels
-
-Required properties:
-  - compatible: must be "ilitek,ili9881c" and one of:
-    * "bananapi,lhr050h41"
-  - reg: DSI virtual channel used by that screen
-  - power-supply: phandle to the power regulator
-  - reset-gpios: a GPIO phandle for the reset pin
-
-Optional properties:
-  - backlight: phandle to the backlight used
-
-Example:
-panel@0 {
-	compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
-	reg = <0>;
-	power-supply = <&reg_display>;
-	reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */
-	backlight = <&pwm_bl>;
-};
diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml
new file mode 100644
index 000000000000..a39332276bab
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/ilitek,ili9881c.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ilitek ILI9881c based MIPI-DSI panels
+
+maintainers:
+  - Maxime Ripard <mripard@kernel.org>
+
+properties:
+  compatible:
+    items:
+      - enum:
+        - bananapi,lhr050h41
+
+      - const: ilitek,ili9881c
+
+  backlight: true
+  power-supply: true
+  reg: true
+  reset-gpios: true
+
+required:
+  - compatible
+  - power-supply
+  - reg
+  - reset-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
+            reg = <0>;
+            power-supply = <&reg_display>;
+            reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */
+            backlight = <&pwm_bl>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.txt b/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.txt
deleted file mode 100644
index d1cab3a8f0fb..000000000000
--- a/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Innolux P097PFG 9.7" 1536x2048 TFT LCD panel
-
-Required properties:
-- compatible: should be "innolux,p097pfg"
-- reg: DSI virtual channel of the peripheral
-- avdd-supply: phandle of the regulator that provides positive voltage
-- avee-supply: phandle of the regulator that provides negative voltage
-- enable-gpios: panel enable gpio
-
-Optional properties:
-- backlight: phandle of the backlight device attached to the panel
-
-Example:
-
-	&mipi_dsi {
-		panel@0 {
-			compatible = "innolux,p079zca";
-			reg = <0>;
-			avdd-supply = <...>;
-			avee-supply = <...>;
-			backlight = <&backlight>;
-			enable-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.yaml b/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.yaml
new file mode 100644
index 000000000000..5a5f071627fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/innolux,p097pfg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Innolux P097PFG 9.7" 1536x2048 TFT LCD panel
+
+maintainers:
+  - Lin Huang <hl@rock-chips.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: innolux,p097pfg
+
+  backlight: true
+  enable-gpios: true
+  reg: true
+
+  avdd-supply:
+    description: The regulator that provides positive voltage
+
+  avee-supply:
+    description: The regulator that provides negative voltage
+
+required:
+  - compatible
+  - reg
+  - avdd-supply
+  - avee-supply
+  - enable-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "innolux,p097pfg";
+            reg = <0>;
+            avdd-supply = <&avdd>;
+            avee-supply = <&avee>;
+            backlight = <&backlight>;
+            enable-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt b/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt
deleted file mode 100644
index 513f03466aba..000000000000
--- a/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Innolux P120ZDG-BF1 12.02 inch eDP 2K display panel
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
-
-Required properties:
-- compatible: should be "innolux,p120zdg-bf1"
-- power-supply: regulator to provide the supply voltage
-
-Optional properties:
-- enable-gpios: GPIO pin to enable or disable the panel
-- backlight: phandle of the backlight device attached to the panel
-- no-hpd: If HPD isn't hooked up; add this property.
-
-Example:
-	panel_edp: panel-edp {
-		compatible = "innolux,p120zdg-bf1";
-		enable-gpios = <&msmgpio 31 GPIO_ACTIVE_LOW>;
-		power-supply = <&pm8916_l2>;
-		backlight = <&backlight>;
-		no-hpd;
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.yaml b/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.yaml
new file mode 100644
index 000000000000..243dac2416f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/innolux,p120zdg-bf1.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Innolux P120ZDG-BF1 12.02 inch eDP 2K display panel
+
+maintainers:
+  - Sandeep Panda <spanda@codeaurora.org>
+  - Douglas Anderson <dianders@chromium.org>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: innolux,p120zdg-bf1
+
+  enable-gpios: true
+  power-supply: true
+  backlight: true
+  no-hpd: true
+
+required:
+  - compatible
+  - power-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    panel_edp: panel-edp {
+        compatible = "innolux,p120zdg-bf1";
+        enable-gpios = <&msmgpio 31 GPIO_ACTIVE_LOW>;
+        power-supply = <&pm8916_l2>;
+        backlight = <&backlight>;
+        no-hpd;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/jdi,lt070me05000.txt b/Documentation/devicetree/bindings/display/panel/jdi,lt070me05000.txt
deleted file mode 100644
index 4989c91d505f..000000000000
--- a/Documentation/devicetree/bindings/display/panel/jdi,lt070me05000.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-JDI model LT070ME05000 1200x1920 7" DSI Panel
-
-Required properties:
-- compatible: should be "jdi,lt070me05000"
-- vddp-supply: phandle of the regulator that provides the supply voltage
-  Power IC supply (3-5V)
-- iovcc-supply: phandle of the regulator that provides the supply voltage
-  IOVCC , power supply for LCM (1.8V)
-- enable-gpios: phandle of gpio for enable line
-  LED_EN, LED backlight enable, High active
-- reset-gpios: phandle of gpio for reset line
-  This should be 8mA, gpio can be configured using mux, pinctrl, pinctrl-names
-  XRES, Reset, Low active
-- dcdc-en-gpios: phandle of the gpio for power ic line
-  Power IC supply enable, High active
-
-Example:
-
-	dsi0: qcom,mdss_dsi@4700000 {
-		panel@0 {
-			compatible = "jdi,lt070me05000";
-			reg = <0>;
-
-			vddp-supply = <&pm8921_l17>;
-			iovcc-supply = <&pm8921_lvs7>;
-
-			enable-gpios = <&pm8921_gpio 36 GPIO_ACTIVE_HIGH>;
-			reset-gpios = <&tlmm_pinmux 54 GPIO_ACTIVE_LOW>;
-			dcdc-en-gpios = <&pm8921_gpio 23 GPIO_ACTIVE_HIGH>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/jdi,lt070me05000.yaml b/Documentation/devicetree/bindings/display/panel/jdi,lt070me05000.yaml
new file mode 100644
index 000000000000..b8b9435e464c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/jdi,lt070me05000.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/jdi,lt070me05000.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: JDI model LT070ME05000 1200x1920 7" DSI Panel
+
+maintainers:
+  - Vinay Simha BN <simhavcs@gmail.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: jdi,lt070me05000
+
+  enable-gpios: true
+  reg: true
+  reset-gpios: true
+
+  vddp-supply:
+    description: |
+      The regulator that provides the supply voltage Power IC supply (3-5V)
+
+  iovcc-supply:
+    description: |
+      The regulator that provides the supply voltage IOVCC,
+      power supply for LCM (1.8V)
+
+  dcdc-en-gpios:
+    description: |
+      phandle of the gpio for power ic line
+      Power IC supply enable, High active
+
+required:
+  - compatible
+  - reg
+  - vddp-supply
+  - iovcc-supply
+  - enable-gpios
+  - reset-gpios
+  - dcdc-en-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "jdi,lt070me05000";
+            reg = <0>;
+
+            vddp-supply = <&pm8921_l17>;
+            iovcc-supply = <&pm8921_lvs7>;
+
+            enable-gpios = <&pm8921_gpio 36 GPIO_ACTIVE_HIGH>;
+            reset-gpios = <&tlmm_pinmux 54 GPIO_ACTIVE_LOW>;
+            dcdc-en-gpios = <&pm8921_gpio 23 GPIO_ACTIVE_HIGH>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt
deleted file mode 100644
index fa9596082e44..000000000000
--- a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-King Display KD035G6-54NT 3.5" (320x240 pixels) 24-bit TFT LCD panel
-
-Required properties:
-- compatible: should be "kingdisplay,kd035g6-54nt"
-- power-supply: See panel-common.txt
-- reset-gpios: See panel-common.txt
-
-Optional properties:
-- backlight: see panel-common.txt
-
-The generic bindings for the SPI slaves documented in [1] also apply.
-
-The device node can contain one 'port' child node with one child
-'endpoint' node, according to the bindings defined in [2]. This
-node should describe panel's video bus.
-
-[1]: Documentation/devicetree/bindings/spi/spi-bus.txt
-[2]: Documentation/devicetree/bindings/graph.txt
-
-Example:
-
-&spi {
-	panel@0 {
-		compatible = "kingdisplay,kd035g6-54nt";
-		reg = <0>;
-
-		spi-max-frequency = <3125000>;
-		spi-3wire;
-		spi-cs-high;
-
-		reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>;
-
-		backlight = <&backlight>;
-		power-supply = <&ldo6>;
-
-		port {
-			panel_input: endpoint {
-				remote-endpoint = <&panel_output>;
-			};
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml
new file mode 100644
index 000000000000..6960036975fa
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/kingdisplay,kd035g6-54nt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: King Display KD035G6-54NT 3.5" (320x240 pixels) 24-bit TFT LCD panel
+
+description: |
+  The panel must obey the rules for a SPI slave device as specified in
+  spi/spi-controller.yaml
+
+maintainers:
+  - Paul Cercueil <paul@crapouillou.net>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: kingdisplay,kd035g6-54nt
+
+  backlight: true
+  port: true
+  power-supply: true
+  reg: true
+  reset-gpios: true
+
+required:
+  - compatible
+  - power-supply
+  - reset-gpios
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "kingdisplay,kd035g6-54nt";
+            reg = <0>;
+
+            spi-max-frequency = <3125000>;
+            spi-3wire;
+            spi-cs-high;
+
+            reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>;
+
+            backlight = <&backlight>;
+            power-supply = <&ldo6>;
+
+            port {
+                panel_input: endpoint {
+                    remote-endpoint = <&panel_output>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd097d04.txt b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd097d04.txt
deleted file mode 100644
index cfefff688614..000000000000
--- a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd097d04.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Kingdisplay KD097D04 9.7" 1536x2048 TFT LCD panel
-
-Required properties:
-- compatible: should be "kingdisplay,kd097d04"
-- reg: DSI virtual channel of the peripheral
-- power-supply: phandle of the regulator that provides the supply voltage
-- enable-gpios: panel enable gpio
-
-Optional properties:
-- backlight: phandle of the backlight device attached to the panel
-
-Example:
-
-	&mipi_dsi {
-		panel@0 {
-			compatible = "kingdisplay,kd097d04";
-			reg = <0>;
-			power-supply = <...>;
-			backlight = <&backlight>;
-			enable-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml
new file mode 100644
index 000000000000..a372bdc5bde1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/leadtek,ltk050h3146w.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Leadtek LTK050H3146W 5.0in 720x1280 DSI panel
+
+maintainers:
+  - Heiko Stuebner <heiko.stuebner@theobroma-systems.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    enum:
+      - leadtek,ltk050h3146w
+      - leadtek,ltk050h3146w-a2
+  reg: true
+  backlight: true
+  reset-gpios: true
+  iovcc-supply:
+     description: regulator that supplies the iovcc voltage
+  vci-supply:
+     description: regulator that supplies the vci voltage
+
+required:
+  - compatible
+  - reg
+  - backlight
+  - iovcc-supply
+  - vci-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        panel@0 {
+            compatible = "leadtek,ltk050h3146w";
+            reg = <0>;
+            backlight = <&backlight>;
+            iovcc-supply = <&vcc_1v8>;
+            vci-supply = <&vcc3v3_lcd>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/lg,acx467akm-7.txt b/Documentation/devicetree/bindings/display/panel/lg,acx467akm-7.txt
deleted file mode 100644
index fc1e1b325e49..000000000000
--- a/Documentation/devicetree/bindings/display/panel/lg,acx467akm-7.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-LG ACX467AKM-7 4.95" 1080×1920 LCD Panel
-
-Required properties:
-- compatible: must be "lg,acx467akm-7"
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/lg,ld070wx3-sl01.txt b/Documentation/devicetree/bindings/display/panel/lg,ld070wx3-sl01.txt
deleted file mode 100644
index 5e649cb9aa1a..000000000000
--- a/Documentation/devicetree/bindings/display/panel/lg,ld070wx3-sl01.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-LG Corporation 7" WXGA TFT LCD panel
-
-Required properties:
-- compatible: should be "lg,ld070wx3-sl01"
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/lg,lg4573.txt b/Documentation/devicetree/bindings/display/panel/lg,lg4573.txt
deleted file mode 100644
index 824441f4e95a..000000000000
--- a/Documentation/devicetree/bindings/display/panel/lg,lg4573.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-LG LG4573 TFT Liquid Crystal Display with SPI control bus
-
-Required properties:
-  - compatible: "lg,lg4573"
-  - reg: address of the panel on the SPI bus
-
-The panel must obey rules for SPI slave device specified in document [1].
-
-[1]: Documentation/devicetree/bindings/spi/spi-bus.txt
-
-Example:
-
-	lcd_panel: display@0 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		compatible = "lg,lg4573";
-		spi-max-frequency = <10000000>;
-		reg = <0>;
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/lg,lg4573.yaml b/Documentation/devicetree/bindings/display/panel/lg,lg4573.yaml
new file mode 100644
index 000000000000..b4314ce7b411
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/lg,lg4573.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/lg,lg4573.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LG LG4573 TFT Liquid Crystal Display with SPI control bus
+
+description: |
+  The panel must obey the rules for a SPI slave device as specified in
+  spi/spi-controller.yaml
+
+maintainers:
+  - Heiko Schocher <hs@denx.de>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: lg,lg4573
+
+  reg: true
+  spi-max-frequency: true
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        lcd_panel: display@0 {
+            compatible = "lg,lg4573";
+            spi-max-frequency = <10000000>;
+            reg = <0>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/lg,lh500wx1-sd03.txt b/Documentation/devicetree/bindings/display/panel/lg,lh500wx1-sd03.txt
deleted file mode 100644
index a04fd2b2e73d..000000000000
--- a/Documentation/devicetree/bindings/display/panel/lg,lh500wx1-sd03.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-LG Corporation 5" HD TFT LCD panel
-
-Required properties:
-- compatible: should be "lg,lh500wx1-sd03"
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.txt b/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.txt
deleted file mode 100644
index 1a1e653e5407..000000000000
--- a/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-LG.Philips LB035Q02 Panel
-=========================
-
-Required properties:
-- compatible: "lgphilips,lb035q02"
-- enable-gpios: panel enable gpio
-
-Optional properties:
-- label: a symbolic name for the panel
-
-Required nodes:
-- Video port for DPI input
-
-Example
--------
-
-lcd-panel: panel@0 {
-	compatible = "lgphilips,lb035q02";
-	reg = <0>;
-	spi-max-frequency = <100000>;
-	spi-cpol;
-	spi-cpha;
-
-	label = "lcd";
-
-	enable-gpios = <&gpio7 7 0>;
-
-	port {
-		lcd_in: endpoint {
-			remote-endpoint = <&dpi_out>;
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml b/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml
new file mode 100644
index 000000000000..830e335ddb53
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/lgphilips,lb035q02.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LG.Philips LB035Q02 Panel
+
+description: |
+  The panel must obey the rules for a SPI slave device as specified in
+  spi/spi-controller.yaml
+
+maintainers:
+  - Tomi Valkeinen <tomi.valkeinen@ti.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: lgphilips,lb035q02
+
+  label: true
+  enable-gpios: true
+  port: true
+
+required:
+  - compatible
+  - enable-gpios
+  - port
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel: panel@0 {
+            compatible = "lgphilips,lb035q02";
+            reg = <0>;
+            spi-max-frequency = <100000>;
+            spi-cpol;
+            spi-cpha;
+
+            label = "lcd";
+
+            enable-gpios = <&gpio7 7 0>;
+
+            port {
+                lcd_in: endpoint {
+                    remote-endpoint = <&dpi_out>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.txt b/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.txt
deleted file mode 100644
index a89f9c830a85..000000000000
--- a/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-Binding for Olimex Ltd. LCD-OLinuXino bridge panel.
-
-This device can be used as bridge between a host controller and LCD panels.
-Currently supported LCDs are:
-  - LCD-OLinuXino-4.3TS
-  - LCD-OLinuXino-5
-  - LCD-OLinuXino-7
-  - LCD-OLinuXino-10
-
-The panel itself contains:
-  - AT24C16C EEPROM holding panel identification and timing requirements
-  - AR1021 resistive touch screen controller (optional)
-  - FT5x6 capacitive touch screnn controller (optional)
-  - GT911/GT928 capacitive touch screen controller (optional)
-
-The above chips share same I2C bus. The EEPROM is factory preprogrammed with
-device information (id, serial, etc.) and timing requirements.
-
-Touchscreen bingings can be found in these files:
-  - input/touchscreen/goodix.txt
-  - input/touchscreen/edt-ft5x06.txt
-  - input/touchscreen/ar1021.txt
-
-Required properties:
-  - compatible: should be "olimex,lcd-olinuxino"
-  - reg: address of the configuration EEPROM, should be <0x50>
-  - power-supply: phandle of the regulator that provides the supply voltage
-
-Optional properties:
-  - enable-gpios: GPIO pin to enable or disable the panel
-  - backlight: phandle of the backlight device attacked to the panel
-
-Example:
-&i2c2 {
-	panel@50 {
-		compatible = "olimex,lcd-olinuxino";
-		reg = <0x50>;
-		power-supply = <&reg_vcc5v0>;
-		enable-gpios = <&pio 7 8 GPIO_ACTIVE_HIGH>;
-		backlight = <&backlight>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml b/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml
new file mode 100644
index 000000000000..2329d9610f83
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/olimex,lcd-olinuxino.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Binding for Olimex Ltd. LCD-OLinuXino bridge panel.
+
+maintainers:
+  - Stefan Mavrodiev <stefan@olimex.com>
+
+description: |
+  This device can be used as bridge between a host controller and LCD panels.
+  Currently supported LCDs are:
+    - LCD-OLinuXino-4.3TS
+    - LCD-OLinuXino-5
+    - LCD-OLinuXino-7
+    - LCD-OLinuXino-10
+
+  The panel itself contains:
+    - AT24C16C EEPROM holding panel identification and timing requirements
+    - AR1021 resistive touch screen controller (optional)
+    - FT5x6 capacitive touch screnn controller (optional)
+    - GT911/GT928 capacitive touch screen controller (optional)
+
+  The above chips share same I2C bus. The EEPROM is factory preprogrammed with
+  device information (id, serial, etc.) and timing requirements.
+
+  Touchscreen bingings can be found in these files:
+    - input/touchscreen/goodix.yaml
+    - input/touchscreen/edt-ft5x06.txt
+    - input/touchscreen/ar1021.txt
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: olimex,lcd-olinuxino
+
+  backlight: true
+  enable-gpios: true
+  power-supply: true
+  reg: true
+
+required:
+  - compatible
+  - reg
+  - power-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@50 {
+            compatible = "olimex,lcd-olinuxino";
+            reg = <0x50>;
+            power-supply = <&reg_vcc5v0>;
+            enable-gpios = <&pio 7 8 GPIO_ACTIVE_HIGH>;
+            backlight = <&backlight>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2587-53ts.txt b/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2587-53ts.txt
deleted file mode 100644
index 9d88e96003fc..000000000000
--- a/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2587-53ts.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-One Stop Displays OSD101T2587-53TS 10.1" 1920x1200 panel
-
-The panel is similar to OSD101T2045-53TS, but it needs additional
-MIPI_DSI_TURN_ON_PERIPHERAL message from the host.
-
-Required properties:
-- compatible: should be "osddisplays,osd101t2587-53ts"
-- power-supply: as specified in the base binding
-
-Optional properties:
-- backlight: as specified in the base binding
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/panel-common.yaml b/Documentation/devicetree/bindings/display/panel/panel-common.yaml
index ed051ba12084..a747b755ad06 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-common.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-common.yaml
@@ -63,9 +63,9 @@ properties:
 
   display-timings:
     description:
-      Some display panels supports several resolutions with different timing.
+      Some display panels support several resolutions with different timings.
       The display-timings bindings supports specifying several timings and
-      optional specify which is the native mode.
+      optionally specifying which is the native mode.
     allOf:
       - $ref: display-timings.yaml#
 
@@ -96,6 +96,12 @@ properties:
       (hot plug detect) signal, but the signal isn't hooked up so we should
       hardcode the max delay from the panel spec when powering up the panel.
 
+  hpd-gpios:
+    maxItems: 1
+    description:
+      If Hot Plug Detect (HPD) is connected to a GPIO in the system rather
+      than a dedicated HPD pin the pin can be specified here.
+
   # Control I/Os
 
   # Many display panels can be controlled through pins driven by GPIOs. The nature
@@ -124,6 +130,13 @@ properties:
       while active. Active high reset signals can be supported by inverting the
       GPIO specifier polarity flag.
 
+  te-gpios:
+    maxItems: 1
+    description:
+      GPIO spec for the tearing effect synchronization signal.
+      The tearing effect signal is active high. Active low signals can be
+      supported by inverting the GPIO specifier polarity flag.
+
   # Power
   power-supply:
     description:
diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml
index b2e8742fd6af..16778ce782fc 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml
@@ -29,6 +29,20 @@ properties:
       # compatible must be listed in alphabetical order, ordered by compatible.
       # The description in the comment is mandatory for each compatible.
 
+        # AU Optronics Corporation 8.0" WUXGA TFT LCD panel
+      - auo,b080uan01
+        # Boe Corporation 8.0" WUXGA TFT LCD panel
+      - boe,tv080wum-nl0
+        # Kingdisplay KD097D04 9.7" 1536x2048 TFT LCD panel
+      - kingdisplay,kd097d04
+        # LG ACX467AKM-7 4.95" 1080×1920 LCD Panel
+      - lg,acx467akm-7
+        # LG Corporation 7" WXGA TFT LCD panel
+      - lg,ld070wx3-sl01
+        # One Stop Displays OSD101T2587-53TS 10.1" 1920x1200 panel
+      - osddisplays,osd101t2587-53ts
+        # Panasonic 10" WUXGA TFT LCD panel
+      - panasonic,vvx10f004b00
         # Panasonic 10" WUXGA TFT LCD panel
       - panasonic,vvx10f034n00
 
diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
index 393ffc6acbba..d6cca1479633 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
@@ -33,8 +33,6 @@ properties:
       - ampire,am-480272h3tmqw-t01h
         # Ampire AM-800480R3TMQW-A1H 7.0" WVGA TFT LCD panel
       - ampire,am800480r3tmqwa1h
-        # AU Optronics Corporation 8.0" WUXGA TFT LCD panel
-      - auo,b080uan01
         # AU Optronics Corporation 10.1" WSVGA TFT LCD panel
       - auo,b101aw03
         # AU Optronics Corporation 10.1" WSVGA TFT LCD panel
@@ -55,10 +53,16 @@ properties:
       - auo,g101evn010
         # AU Optronics Corporation 10.4" (800x600) color TFT LCD panel
       - auo,g104sn02
+        # AU Optronics Corporation 12.1" (1280x800) TFT LCD panel
+      - auo,g121ean01
         # AU Optronics Corporation 13.3" FHD (1920x1080) TFT LCD panel
       - auo,g133han01
+        # AU Optronics Corporation 15.6" (1366x768) TFT LCD panel
+      - auo,g156xtn01
         # AU Optronics Corporation 18.5" FHD (1920x1080) TFT LCD panel
       - auo,g185han01
+        # AU Optronics Corporation 19.0" (1280x1024) TFT LCD panel
+      - auo,g190ean01
         # AU Optronics Corporation 31.5" FHD (1920x1080) TFT LCD panel
       - auo,p320hvn03
         # AU Optronics Corporation 21.5" FHD (1920x1080) color TFT LCD panel
@@ -69,10 +73,12 @@ properties:
       - boe,hv070wsa-100
         # BOE OPTOELECTRONICS TECHNOLOGY 10.1" WXGA TFT LCD panel
       - boe,nv101wxmn51
+        # BOE NV133FHM-N61 13.3" FHD (1920x1080) TFT LCD Panel
+      - boe,nv133fhm-n61
+        # BOE NV133FHM-N62 13.3" FHD (1920x1080) TFT LCD Panel
+      - boe,nv133fhm-n62
         # BOE NV140FHM-N49 14.0" FHD a-Si FT panel
       - boe,nv140fhmn49
-        # Boe Corporation 8.0" WUXGA TFT LCD panel
-      - boe,tv080wum-nl0
         # CDTech(H.K.) Electronics Limited 4.3" 480x272 color TFT-LCD panel
       - cdtech,s043wq26h-ct7
         # CDTech(H.K.) Electronics Limited 7" 800x480 color TFT-LCD panel
@@ -82,6 +88,8 @@ properties:
         # Chunghwa Picture Tubes Ltd. 10.1" WXGA TFT LCD panel
       - chunghwa,claa101wa01a
         # Chunghwa Picture Tubes Ltd. 10.1" WXGA TFT LCD panel
+      - chunghwa,claa101wb01
+        # Chunghwa Picture Tubes Ltd. 10.1" WXGA TFT LCD panel
       - chunghwa,claa101wb03
         # DataImage, Inc. 7" WVGA (800x480) TFT LCD panel with 24-bit parallel interface.
       - dataimage,scf0700c48ggu18
@@ -127,6 +135,8 @@ properties:
       - hannstar,hsd100pxn1
         # Hitachi Ltd. Corporation 9" WVGA (800x480) TFT LCD panel
       - hit,tx23d38vm0caa
+        # InfoVision Optoelectronics M133NWF4 R0 13.3" FHD (1920x1080) TFT LCD panel
+      - ivo,m133nwf4-r0
         # Innolux AT043TN24 4.3" WQVGA TFT LCD panel
       - innolux,at043tn24
         # Innolux AT070TN92 7.0" WQVGA TFT LCD panel
@@ -155,6 +165,8 @@ properties:
       - lemaker,bl035-rgb-002
         # LG 7" (800x480 pixels) TFT LCD panel
       - lg,lb070wv8
+        # LG Corporation 5" HD TFT LCD panel
+      - lg,lh500wx1-sd03
         # LG LP079QX1-SP0V 7.9" (1536x2048 pixels) TFT LCD panel
       - lg,lp079qx1-sp0v
         # LG 9.7" (2048x1536 pixels) TFT LCD panel
@@ -227,6 +239,8 @@ properties:
       - sharp,ls020b1dd01d
         # Shelly SCA07010-BFN-LNN 7.0" WVGA TFT LCD panel
       - shelly,sca07010-bfn-lnn
+        # Starry KR070PE2T 7" WVGA TFT LCD panel
+      - starry,kr070pe2t
         # Starry 12.2" (1920x1200 pixels) TFT LCD panel
       - starry,kr122ea0sra
         # Tianma Micro-electronics TM070JDHG30 7.0" WXGA TFT LCD panel
diff --git a/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt b/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt
deleted file mode 100644
index 10424695aa02..000000000000
--- a/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Raydium RM67171 OLED LCD panel with MIPI-DSI protocol
-
-Required properties:
-- compatible: 		"raydium,rm67191"
-- reg:			virtual channel for MIPI-DSI protocol
-			must be <0>
-- dsi-lanes:		number of DSI lanes to be used
-			must be <3> or <4>
-- port: 		input port node with endpoint definition as
-			defined in Documentation/devicetree/bindings/graph.txt;
-			the input port should be connected to a MIPI-DSI device
-			driver
-
-Optional properties:
-- reset-gpios:		a GPIO spec for the RST_B GPIO pin
-- v3p3-supply:		phandle to 3.3V regulator that powers the VDD_3V3 pin
-- v1p8-supply:		phandle to 1.8V regulator that powers the VDD_1V8 pin
-- width-mm:		see panel-common.txt
-- height-mm:		see panel-common.txt
-- video-mode:		0 - burst-mode
-			1 - non-burst with sync event
-			2 - non-burst with sync pulse
-
-Example:
-
-	panel@0 {
-		compatible = "raydium,rm67191";
-		reg = <0>;
-		pinctrl-0 = <&pinctrl_mipi_dsi_0_1_en>;
-		pinctrl-names = "default";
-		reset-gpios = <&gpio1 7 GPIO_ACTIVE_LOW>;
-		dsi-lanes = <4>;
-		width-mm = <68>;
-		height-mm = <121>;
-
-		port {
-			panel_in: endpoint {
-				remote-endpoint = <&mipi_out>;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/raydium,rm67191.yaml b/Documentation/devicetree/bindings/display/panel/raydium,rm67191.yaml
new file mode 100644
index 000000000000..745dd247c409
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/raydium,rm67191.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/raydium,rm67191.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Raydium RM67171 OLED LCD panel with MIPI-DSI protocol
+
+maintainers:
+  - Robert Chiras <robert.chiras@nxp.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: raydium,rm67191
+
+  reg: true
+  port: true
+  reset-gpios: true
+  width-mm: true
+  height-mm: true
+
+  dsi-lanes:
+    description: Number of DSI lanes to be used must be <3> or <4>
+    enum: [3, 4]
+
+  v3p3-supply:
+    description: phandle to 3.3V regulator that powers the VDD_3V3 pin
+
+  v1p8-supply:
+    description: phandle to 1.8V regulator that powers the VDD_1V8 pin
+
+  video-mode:
+    description: |
+      0 - burst-mode
+      1 - non-burst with sync event
+      2 - non-burst with sync pulse
+    enum: [0, 1, 2]
+
+required:
+  - compatible
+  - reg
+  - dsi-lanes
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "raydium,rm67191";
+            reg = <0>;
+            reset-gpios = <&gpio1 7 GPIO_ACTIVE_LOW>;
+            dsi-lanes = <4>;
+            width-mm = <68>;
+            height-mm = <121>;
+            video-mode = <1>;
+
+            port {
+                panel_in: endpoint {
+                    remote-endpoint = <&mipi_out>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,amoled-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/panel/samsung,amoled-mipi-dsi.yaml
new file mode 100644
index 000000000000..96bdde9298e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/samsung,amoled-mipi-dsi.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/samsung,amoled-mipi-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung AMOLED MIPI-DSI panels
+
+maintainers:
+  - Hoegeun Kwon <hoegeun.kwon@samsung.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    enum:
+        # Samsung S6E63J0X03 1.63" 320x320 AMOLED panel
+      - samsung,s6e63j0x03
+        # Samsung S6E3HA2 5.7" 1440x2560 AMOLED panel
+      - samsung,s6e3ha2
+        # Samsung S6E3HF2 5.65" 1600x2560 AMOLED panel
+      - samsung,s6e3hf2
+
+  reg: true
+  reset-gpios: true
+  enable-gpios: true
+  te-gpios: true
+
+  vdd3-supply:
+    description: I/O voltage supply
+
+  vci-supply:
+    description: voltage supply for analog circuits
+
+required:
+  - compatible
+  - reg
+  - vdd3-supply
+  - vci-supply
+  - reset-gpios
+  - enable-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "samsung,s6e3ha2";
+            reg = <0>;
+            vdd3-supply = <&ldo27_reg>;
+            vci-supply = <&ldo28_reg>;
+            reset-gpios = <&gpg0 0 GPIO_ACTIVE_LOW>;
+            enable-gpios = <&gpf1 5 GPIO_ACTIVE_HIGH>;
+            te-gpios = <&gpf1 3 GPIO_ACTIVE_HIGH>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,ld9040.txt b/Documentation/devicetree/bindings/display/panel/samsung,ld9040.txt
deleted file mode 100644
index 354d4d1df4ff..000000000000
--- a/Documentation/devicetree/bindings/display/panel/samsung,ld9040.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Samsung LD9040 AMOLED LCD parallel RGB panel with SPI control bus
-
-Required properties:
-  - compatible: "samsung,ld9040"
-  - reg: address of the panel on SPI bus
-  - vdd3-supply: core voltage supply
-  - vci-supply: voltage supply for analog circuits
-  - reset-gpios: a GPIO spec for the reset pin
-  - display-timings: timings for the connected panel according to [1]
-
-The panel must obey rules for SPI slave device specified in document [2].
-
-Optional properties:
-  - power-on-delay: delay after turning regulators on [ms]
-  - reset-delay: delay after reset sequence [ms]
-  - panel-width-mm: physical panel width [mm]
-  - panel-height-mm: physical panel height [mm]
-
-The device node can contain one 'port' child node with one child
-'endpoint' node, according to the bindings defined in [3]. This
-node should describe panel's video bus.
-
-[1]: Documentation/devicetree/bindings/display/panel/display-timing.txt
-[2]: Documentation/devicetree/bindings/spi/spi-bus.txt
-[3]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
-
-	lcd@0 {
-		compatible = "samsung,ld9040";
-		reg = <0>;
-		vdd3-supply = <&ldo7_reg>;
-		vci-supply = <&ldo17_reg>;
-		reset-gpios = <&gpy4 5 0>;
-		spi-max-frequency = <1200000>;
-		spi-cpol;
-		spi-cpha;
-		power-on-delay = <10>;
-		reset-delay = <10>;
-		panel-width-mm = <90>;
-		panel-height-mm = <154>;
-
-		display-timings {
-			timing {
-				clock-frequency = <23492370>;
-				hactive = <480>;
-				vactive = <800>;
-				hback-porch = <16>;
-				hfront-porch = <16>;
-				vback-porch = <2>;
-				vfront-porch = <28>;
-				hsync-len = <2>;
-				vsync-len = <1>;
-				hsync-active = <0>;
-				vsync-active = <0>;
-				de-active = <0>;
-				pixelclk-active = <0>;
-			};
-		};
-
-		port {
-			lcd_ep: endpoint {
-				remote-endpoint = <&fimd_dpi_ep>;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml b/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml
new file mode 100644
index 000000000000..060ee27a4749
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/samsung,ld9040.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung LD9040 AMOLED LCD parallel RGB panel with SPI control bus
+
+description: |
+  The panel must obey the rules for a SPI slave device as specified in
+  spi/spi-controller.yaml
+
+maintainers:
+  - Andrzej Hajda <a.hajda@samsung.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: samsung,ld9040
+
+  display-timings: true
+  port: true
+  reg: true
+  reset-gpios: true
+
+  vdd3-supply:
+    description: core voltage supply
+
+  vci-supply:
+    description: voltage supply for analog circuits
+
+  power-on-delay:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: delay after turning regulators on [ms]
+
+  reset-delay:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: delay after reset sequence [ms]
+
+  panel-width-mm:
+    description: physical panel width [mm]
+
+  panel-height-mm:
+    description: physical panel height [mm]
+
+required:
+  - compatible
+  - reg
+  - vdd3-supply
+  - vci-supply
+  - reset-gpios
+  - display-timings
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        lcd@0 {
+            compatible = "samsung,ld9040";
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            reg = <0>;
+            vdd3-supply = <&ldo7_reg>;
+            vci-supply = <&ldo17_reg>;
+            reset-gpios = <&gpy4 5 0>;
+            spi-max-frequency = <1200000>;
+            spi-cpol;
+            spi-cpha;
+            power-on-delay = <10>;
+            reset-delay = <10>;
+            panel-width-mm = <90>;
+            panel-height-mm = <154>;
+
+            display-timings {
+                timing {
+                    clock-frequency = <23492370>;
+                    hactive = <480>;
+                    vactive = <800>;
+                    hback-porch = <16>;
+                    hfront-porch = <16>;
+                    vback-porch = <2>;
+                    vfront-porch = <28>;
+                    hsync-len = <2>;
+                    vsync-len = <1>;
+                    hsync-active = <0>;
+                    vsync-active = <0>;
+                    de-active = <0>;
+                    pixelclk-active = <0>;
+                };
+            };
+
+            port {
+                lcd_ep: endpoint {
+                    remote-endpoint = <&fimd_dpi_ep>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.txt b/Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.txt
deleted file mode 100644
index b94e366f451b..000000000000
--- a/Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Samsung S6D16D0 4" 864x480 AMOLED panel
-
-Required properties:
-  - compatible: should be:
-    "samsung,s6d16d0",
-  - reg: the virtual channel number of a DSI peripheral
-  - vdd1-supply: I/O voltage supply
-  - reset-gpios: a GPIO spec for the reset pin (active low)
-
-The device node can contain one 'port' child node with one child
-'endpoint' node, according to the bindings defined in
-media/video-interfaces.txt. This node should describe panel's video bus.
-
-Example:
-&dsi {
-	...
-
-	panel@0 {
-		compatible = "samsung,s6d16d0";
-		reg = <0>;
-		vdd1-supply = <&foo>;
-		reset-gpios = <&foo_gpio 0 GPIO_ACTIVE_LOW>;
-
-		port {
-			panel_in: endpoint {
-				remote-endpoint = <&dsi_out>;
-			};
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.yaml b/Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.yaml
new file mode 100644
index 000000000000..66d147496bc3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/samsung,s6d16d0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S6D16D0 4" 864x480 AMOLED panel
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: samsung,s6d16d0
+
+  port: true
+  reg: true
+  reset-gpios: true
+
+  vdd1-supply:
+    description: I/O voltage supply
+
+required:
+  - compatible
+  - reg
+  - vdd1-supply
+  - reset-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "samsung,s6d16d0";
+            reg = <0>;
+            vdd1-supply = <&foo>;
+            reset-gpios = <&foo_gpio 0 GPIO_ACTIVE_LOW>;
+
+            port {
+                panel_in: endpoint {
+                    remote-endpoint = <&dsi_out>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e3ha2.txt b/Documentation/devicetree/bindings/display/panel/samsung,s6e3ha2.txt
deleted file mode 100644
index 4acea25c244b..000000000000
--- a/Documentation/devicetree/bindings/display/panel/samsung,s6e3ha2.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-Samsung S6E3HA2 5.7" 1440x2560 AMOLED panel
-Samsung S6E3HF2 5.65" 1600x2560 AMOLED panel
-
-Required properties:
-  - compatible: should be one of:
-    "samsung,s6e3ha2",
-    "samsung,s6e3hf2".
-  - reg: the virtual channel number of a DSI peripheral
-  - vdd3-supply: I/O voltage supply
-  - vci-supply: voltage supply for analog circuits
-  - reset-gpios: a GPIO spec for the reset pin (active low)
-  - enable-gpios: a GPIO spec for the panel enable pin (active high)
-
-Optional properties:
-  - te-gpios: a GPIO spec for the tearing effect synchronization signal
-    gpio pin (active high)
-
-Example:
-&dsi {
-	...
-
-	panel@0 {
-		compatible = "samsung,s6e3ha2";
-		reg = <0>;
-		vdd3-supply = <&ldo27_reg>;
-		vci-supply = <&ldo28_reg>;
-		reset-gpios = <&gpg0 0 GPIO_ACTIVE_LOW>;
-		enable-gpios = <&gpf1 5 GPIO_ACTIVE_HIGH>;
-		te-gpios = <&gpf1 3 GPIO_ACTIVE_HIGH>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e63j0x03.txt b/Documentation/devicetree/bindings/display/panel/samsung,s6e63j0x03.txt
deleted file mode 100644
index 3f1a8392af7f..000000000000
--- a/Documentation/devicetree/bindings/display/panel/samsung,s6e63j0x03.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Samsung S6E63J0X03 1.63" 320x320 AMOLED panel (interface: MIPI-DSI command mode)
-
-Required properties:
-  - compatible: "samsung,s6e63j0x03"
-  - reg: the virtual channel number of a DSI peripheral
-  - vdd3-supply: I/O voltage supply
-  - vci-supply: voltage supply for analog circuits
-  - reset-gpios: a GPIO spec for the reset pin (active low)
-  - te-gpios: a GPIO spec for the tearing effect synchronization signal
-    gpio pin (active high)
-
-Example:
-&dsi {
-	...
-
-	panel@0 {
-		compatible = "samsung,s6e63j0x03";
-		reg = <0>;
-		vdd3-supply = <&ldo16_reg>;
-		vci-supply = <&ldo20_reg>;
-		reset-gpios = <&gpe0 1 GPIO_ACTIVE_LOW>;
-		te-gpios = <&gpx0 6 GPIO_ACTIVE_HIGH>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt
deleted file mode 100644
index 9fb9ebeef8e4..000000000000
--- a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Samsung s6e63m0 AMOLED LCD panel
-
-Required properties:
-  - compatible: "samsung,s6e63m0"
-  - reset-gpios: GPIO spec for reset pin
-  - vdd3-supply: VDD regulator
-  - vci-supply: VCI regulator
-
-The panel must obey rules for SPI slave device specified in document [1].
-
-The device node can contain one 'port' child node with one child
-'endpoint' node, according to the bindings defined in [2]. This
-node should describe panel's video bus.
-
-[1]: Documentation/devicetree/bindings/spi/spi-bus.txt
-[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
-
-		s6e63m0: display@0 {
-			compatible = "samsung,s6e63m0";
-			reg = <0>;
-			reset-gpio = <&mp05 5 1>;
-			vdd3-supply = <&ldo12_reg>;
-			vci-supply = <&ldo11_reg>;
-			spi-max-frequency = <1200000>;
-
-			port {
-				lcd_ep: endpoint {
-					remote-endpoint = <&fimd_ep>;
-				};
-			};
-		};
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml
new file mode 100644
index 000000000000..1dab80ae1d0a
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/samsung,s6e63m0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung s6e63m0 AMOLED LCD panel
+
+maintainers:
+  - Jonathan Bakker <xc-racer2@live.ca>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: samsung,s6e63m0
+
+  reg: true
+  reset-gpios: true
+  port: true
+
+  vdd3-supply:
+    description: VDD regulator
+
+  vci-supply:
+    description: VCI regulator
+
+required:
+  - compatible
+  - reset-gpios
+  - vdd3-supply
+  - vci-supply
+  - port
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        display@0 {
+            compatible = "samsung,s6e63m0";
+            reg = <0>;
+            reset-gpios = <&mp05 5 1>;
+            vdd3-supply = <&ldo12_reg>;
+            vci-supply = <&ldo11_reg>;
+            spi-max-frequency = <1200000>;
+
+            port {
+                lcd_ep: endpoint {
+                    remote-endpoint = <&fimd_ep>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/seiko,43wvf1g.txt b/Documentation/devicetree/bindings/display/panel/seiko,43wvf1g.txt
deleted file mode 100644
index aae57ef36cdd..000000000000
--- a/Documentation/devicetree/bindings/display/panel/seiko,43wvf1g.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Seiko Instruments Inc. 4.3" WVGA (800 x RGB x 480) TFT with Touch-Panel
-
-Required properties:
-- compatible: should be "sii,43wvf1g".
-- "dvdd-supply": 3v3 digital regulator.
-- "avdd-supply": 5v analog regulator.
-
-Optional properties:
-- backlight: phandle for the backlight control.
-
-Example:
-
-	panel {
-		compatible = "sii,43wvf1g";
-		backlight = <&backlight_display>;
-		dvdd-supply = <&reg_lcd_3v3>;
-		avdd-supply = <&reg_lcd_5v>;
-		port {
-			panel_in: endpoint {
-				remote-endpoint = <&display_out>;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/seiko,43wvf1g.yaml b/Documentation/devicetree/bindings/display/panel/seiko,43wvf1g.yaml
new file mode 100644
index 000000000000..cfaa50cf5f5d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/seiko,43wvf1g.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/seiko,43wvf1g.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Seiko Instruments Inc. 4.3" WVGA (800 x RGB x 480) TFT with Touch-Panel
+
+maintainers:
+  - Marco Franchi <marco.franchi@nxp.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: sii,43wvf1g
+
+  backlight: true
+  port: true
+
+  dvdd-supply:
+    description: 3v3 digital regulator
+
+  avdd-supply:
+    description: 5v analog regulator
+
+required:
+  - compatible
+  - dvdd-supply
+  - avdd-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    panel {
+        compatible = "sii,43wvf1g";
+
+        backlight = <&backlight_display>;
+        dvdd-supply = <&reg_lcd_3v3>;
+        avdd-supply = <&reg_lcd_5v>;
+        port {
+            panel_in: endpoint {
+                remote-endpoint = <&display_out>;
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,lq150x1lg11.txt b/Documentation/devicetree/bindings/display/panel/sharp,lq150x1lg11.txt
deleted file mode 100644
index 0f57c3143506..000000000000
--- a/Documentation/devicetree/bindings/display/panel/sharp,lq150x1lg11.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Sharp 15" LQ150X1LG11 XGA TFT LCD panel
-
-Required properties:
-- compatible: should be "sharp,lq150x1lg11"
-- power-supply: regulator to provide the VCC supply voltage (3.3 volts)
-
-Optional properties:
-- backlight: phandle of the backlight device
-- rlud-gpios: a single GPIO for the RL/UD (rotate 180 degrees) pin.
-- sellvds-gpios: a single GPIO for the SELLVDS pin.
-
-If rlud-gpios and/or sellvds-gpios are not specified, the RL/UD and/or SELLVDS
-pins are assumed to be handled appropriately by the hardware.
-
-Example:
-
-	backlight: backlight {
-		compatible = "pwm-backlight";
-		pwms = <&pwm 0 100000>;                      /* VBR */
-
-		brightness-levels = <0 20 40 60 80 100>;
-		default-brightness-level = <2>;
-
-		power-supply = <&vdd_12v_reg>;               /* VDD */
-		enable-gpios = <&gpio 42 GPIO_ACTIVE_HIGH>;  /* XSTABY */
-	};
-
-	panel {
-		compatible = "sharp,lq150x1lg11";
-
-		power-supply = <&vcc_3v3_reg>;               /* VCC */
-
-		backlight = <&backlight>;
-		rlud-gpios = <&gpio 17 GPIO_ACTIVE_HIGH>;    /* RL/UD */
-		sellvds-gpios = <&gpio 18 GPIO_ACTIVE_HIGH>; /* SELLVDS */
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,lq150x1lg11.yaml b/Documentation/devicetree/bindings/display/panel/sharp,lq150x1lg11.yaml
new file mode 100644
index 000000000000..92f2d12f4f4c
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/sharp,lq150x1lg11.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/sharp,lq150x1lg11.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sharp 15" LQ150X1LG11 XGA TFT LCD panel
+
+maintainers:
+  - Peter Rosin <peda@axentia.se>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: sharp,lq150x1lg11
+
+  power-supply: true
+  backlight: true
+
+  rlud-gpios:
+    maxItems: 1
+    description: |
+      GPIO for the RL/UD (rotate 180 degrees) pin.
+      If rlud-gpios and/or sellvds-gpios are not specified,
+      the RL/UD and/or SELLVDS pins are assumed to be handled
+      appropriately by the hardware.
+
+  sellvds-gpios:
+    maxItems: 1
+    description: |
+      GPIO for the SELLVDS pin.
+      If rlud-gpios and/or sellvds-gpios are not specified,
+      the RL/UD and/or SELLVDS pins are assumed to be handled
+      appropriately by the hardware.
+
+required:
+  - compatible
+  - power-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    panel {
+        compatible = "sharp,lq150x1lg11";
+
+        power-supply = <&vcc_3v3_reg>;               /* VCC */
+
+        backlight = <&backlight>;
+        rlud-gpios = <&gpio 17 GPIO_ACTIVE_HIGH>;    /* RL/UD */
+        sellvds-gpios = <&gpio 18 GPIO_ACTIVE_HIGH>; /* SELLVDS */
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,ls037v7dw01.txt b/Documentation/devicetree/bindings/display/panel/sharp,ls037v7dw01.txt
deleted file mode 100644
index 0cc8981e9d49..000000000000
--- a/Documentation/devicetree/bindings/display/panel/sharp,ls037v7dw01.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-SHARP LS037V7DW01 TFT-LCD panel
-===================================
-
-Required properties:
-- compatible: "sharp,ls037v7dw01"
-
-Optional properties:
-- label: a symbolic name for the panel
-- enable-gpios: a GPIO spec for the optional enable pin.
-  This pin is the INI pin as specified in the LS037V7DW01.pdf file.
-- reset-gpios: a GPIO spec for the optional reset pin.
-  This pin is the RESB pin as specified in the LS037V7DW01.pdf file.
-- mode-gpios: a GPIO
-  ordered MO, LR, and UD as specified in the LS037V7DW01.pdf file.
-
-Required nodes:
-- Video port for DPI input
-
-This panel can have zero to five GPIOs to configure to change configuration
-between QVGA and VGA mode and the scan direction. As these pins can be also
-configured with external pulls, all the GPIOs are considered optional with holes
-in the array.
-
-Example
--------
-
-Example when connected to a omap2+ based device:
-
-lcd0: display {
-	compatible = "sharp,ls037v7dw01";
-	power-supply = <&lcd_3v3>;
-	enable-gpios = <&gpio5 24 GPIO_ACTIVE_HIGH>;	/* gpio152, lcd INI */
-	reset-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>;	/* gpio155, lcd RESB */
-	mode-gpios = <&gpio5 26 GPIO_ACTIVE_HIGH	/* gpio154, lcd MO */
-		      &gpio1 2 GPIO_ACTIVE_HIGH		/* gpio2, lcd LR */
-		      &gpio1 3 GPIO_ACTIVE_HIGH>;	/* gpio3, lcd UD */
-
-	port {
-		lcd_in: endpoint {
-			remote-endpoint = <&dpi_out>;
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,ls037v7dw01.yaml b/Documentation/devicetree/bindings/display/panel/sharp,ls037v7dw01.yaml
new file mode 100644
index 000000000000..8c47a9b0b507
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/sharp,ls037v7dw01.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/sharp,ls037v7dw01.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SHARP LS037V7DW01 TFT-LCD panel
+
+description: |
+  This panel can have zero to five GPIOs to configure to change configuration
+  between QVGA and VGA mode and the scan direction. As these pins can be also
+  configured with external pulls, all the GPIOs are considered optional with holes
+  in the array.
+
+maintainers:
+  - Tony Lindgren <tony@atomide.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: sharp,ls037v7dw01
+
+  label: true
+  enable-gpios: true
+  reset-gpios: true
+  port: true
+  power-supply: true
+
+  mode-gpios:
+    minItems: 1
+    maxItems: 3
+    description: |
+      GPIO ordered MO, LR, and UD as specified in LS037V7DW01.pdf
+      This panel can have zero to three GPIOs to configure to
+      change configuration between QVGA and VGA mode and the
+      scan direction. As these pins can be also configured
+      with external pulls, all the GPIOs are considered
+      optional with holes in the array.
+
+required:
+  - compatible
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    lcd0: display {
+        compatible = "sharp,ls037v7dw01";
+        power-supply = <&lcd_3v3>;
+        enable-gpios = <&gpio5 24 GPIO_ACTIVE_HIGH>;    /* gpio152, lcd INI */
+        reset-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>;     /* gpio155, lcd RESB */
+        mode-gpios = <&gpio5 26 GPIO_ACTIVE_HIGH        /* gpio154, lcd MO */
+                      &gpio1 2 GPIO_ACTIVE_HIGH         /* gpio2, lcd LR */
+                      &gpio1 3 GPIO_ACTIVE_HIGH>;       /* gpio3, lcd UD */
+
+        port {
+            lcd_in: endpoint {
+                remote-endpoint = <&dpi_out>;
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.txt b/Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.txt
deleted file mode 100644
index 3770a111968b..000000000000
--- a/Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Sharp Microelectronics 4.3" qHD TFT LCD panel
-
-Required properties:
-- compatible: should be "sharp,ls043t1le01-qhd"
-- reg: DSI virtual channel of the peripheral
-- power-supply: phandle of the regulator that provides the supply voltage
-
-Optional properties:
-- backlight: phandle of the backlight device attached to the panel
-- reset-gpios: a GPIO spec for the reset pin
-
-Example:
-
-	mdss_dsi@fd922800 {
-		panel@0 {
-			compatible = "sharp,ls043t1le01-qhd";
-			reg = <0>;
-			avdd-supply = <&pm8941_l22>;
-			backlight = <&pm8941_wled>;
-			reset-gpios = <&pm8941_gpios 19 GPIO_ACTIVE_HIGH>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.yaml b/Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.yaml
new file mode 100644
index 000000000000..a90d0d8bf7c9
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/sharp,ls043t1le01.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sharp Microelectronics 4.3" qHD TFT LCD panel
+
+maintainers:
+  - Werner Johansson <werner.johansson@sonymobile.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: sharp,ls043t1le01-qhd
+
+  reg: true
+  backlight: true
+  reset-gpios: true
+  port: true
+
+  avdd-supply:
+    description: handle of the regulator that provides the supply voltage
+
+required:
+  - compatible
+  - reg
+  - avdd-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "sharp,ls043t1le01-qhd";
+            reg = <0>;
+            avdd-supply = <&pm8941_l22>;
+            backlight = <&pm8941_wled>;
+            reset-gpios = <&pm8941_gpios 19 GPIO_ACTIVE_HIGH>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/simple-panel.txt b/Documentation/devicetree/bindings/display/panel/simple-panel.txt
deleted file mode 100644
index e11208fb7da8..000000000000
--- a/Documentation/devicetree/bindings/display/panel/simple-panel.txt
+++ /dev/null
@@ -1 +0,0 @@
-See panel-common.yaml in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7701.txt b/Documentation/devicetree/bindings/display/panel/sitronix,st7701.txt
deleted file mode 100644
index ccd17597f1f6..000000000000
--- a/Documentation/devicetree/bindings/display/panel/sitronix,st7701.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Sitronix ST7701 based LCD panels
-
-ST7701 designed for small and medium sizes of TFT LCD display, is
-capable of supporting up to 480RGBX864 in resolution. It provides
-several system interfaces like MIPI/RGB/SPI.
-
-Techstar TS8550B is 480x854, 2-lane MIPI DSI LCD panel which has
-inbuilt ST7701 chip.
-
-Required properties:
-- compatible: must be "sitronix,st7701" and one of
-  * "techstar,ts8550b"
-- reset-gpios: a GPIO phandle for the reset pin
-
-Required properties for techstar,ts8550b:
-- reg: DSI virtual channel used by that screen
-- VCC-supply: analog regulator for MIPI circuit
-- IOVCC-supply: I/O system regulator
-
-Optional properties:
-- backlight: phandle for the backlight control.
-
-panel@0 {
-	compatible = "techstar,ts8550b", "sitronix,st7701";
-	reg = <0>;
-	VCC-supply = <&reg_dldo2>;
-	IOVCC-supply = <&reg_dldo2>;
-	reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* LCD-RST: PD24 */
-	backlight = <&backlight>;
-};
diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml b/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml
new file mode 100644
index 000000000000..6dff59fe4be1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/sitronix,st7701.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sitronix ST7701 based LCD panels
+
+maintainers:
+  - Jagan Teki <jagan@amarulasolutions.com>
+
+description: |
+  ST7701 designed for small and medium sizes of TFT LCD display, is
+  capable of supporting up to 480RGBX864 in resolution. It provides
+  several system interfaces like MIPI/RGB/SPI.
+
+  Techstar TS8550B is 480x854, 2-lane MIPI DSI LCD panel which has
+  inbuilt ST7701 chip.
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - techstar,ts8550b
+      - const: sitronix,st7701
+
+  reg:
+    description: DSI virtual channel used by that screen
+    maxItems: 1
+
+  VCC-supply:
+    description: analog regulator for MIPI circuit
+
+  IOVCC-supply:
+    description: I/O system regulator
+
+  reset-gpios: true
+
+  backlight: true
+
+required:
+  - compatible
+  - reg
+  - VCC-supply
+  - IOVCC-supply
+  - reset-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "techstar,ts8550b", "sitronix,st7701";
+            reg = <0>;
+            VCC-supply = <&reg_dldo2>;
+            IOVCC-supply = <&reg_dldo2>;
+            reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* LCD-RST: PD24 */
+            backlight = <&backlight>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.txt b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.txt
deleted file mode 100644
index c6995dde641b..000000000000
--- a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-Sitronix ST7789V RGB panel with SPI control bus
-
-Required properties:
-  - compatible: "sitronix,st7789v"
-  - reg: Chip select of the panel on the SPI bus
-  - reset-gpios: a GPIO phandle for the reset pin
-  - power-supply: phandle of the regulator that provides the supply voltage
-
-Optional properties:
-  - backlight: phandle to the backlight used
-
-The generic bindings for the SPI slaves documented in [1] also applies
-
-The device node can contain one 'port' child node with one child
-'endpoint' node, according to the bindings defined in [2]. This
-node should describe panel's video bus.
-
-[1]: Documentation/devicetree/bindings/spi/spi-bus.txt
-[2]: Documentation/devicetree/bindings/graph.txt
-
-Example:
-
-panel@0 {
-	compatible = "sitronix,st7789v";
-	reg = <0>;
-	reset-gpios = <&pio 6 11 GPIO_ACTIVE_LOW>;
-	backlight = <&pwm_bl>;
-	spi-max-frequency = <100000>;
-	spi-cpol;
-	spi-cpha;
-
-	port {
-		panel_input: endpoint {
-			remote-endpoint = <&tcon0_out_panel>;
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
new file mode 100644
index 000000000000..fa46d151e7b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/sitronix,st7789v.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sitronix ST7789V RGB panel with SPI control bus
+
+description: |
+  The panel must obey the rules for a SPI slave device as specified in
+  spi/spi-controller.yaml
+
+maintainers:
+  - Maxime Ripard <mripard@kernel.org>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: sitronix,st7789v
+
+  reg: true
+  reset-gpios: true
+  power-supply: true
+  backlight: true
+  port: true
+
+required:
+  - compatible
+  - reg
+  - reset-gpios
+  - power-supply
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "sitronix,st7789v";
+            reg = <0>;
+            reset-gpios = <&pio 6 11 GPIO_ACTIVE_LOW>;
+            backlight = <&pwm_bl>;
+            power-supply = <&power>;
+            spi-max-frequency = <100000>;
+            spi-cpol;
+            spi-cpha;
+
+            port {
+                panel_input: endpoint {
+                    remote-endpoint = <&tcon0_out_panel>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/sony,acx565akm.txt b/Documentation/devicetree/bindings/display/panel/sony,acx565akm.txt
deleted file mode 100644
index e12333280749..000000000000
--- a/Documentation/devicetree/bindings/display/panel/sony,acx565akm.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Sony ACX565AKM SDI Panel
-========================
-
-Required properties:
-- compatible: "sony,acx565akm"
-
-Optional properties:
-- label: a symbolic name for the panel
-- reset-gpios: panel reset gpio
-
-Required nodes:
-- Video port for SDI input
-
-Example
--------
-
-acx565akm@2 {
-	compatible = "sony,acx565akm";
-	spi-max-frequency = <6000000>;
-	reg = <2>;
-
-	label = "lcd";
-	reset-gpios = <&gpio3 26 GPIO_ACTIVE_HIGH>; /* 90 */
-
-	port {
-		lcd_in: endpoint {
-			remote-endpoint = <&sdi_out>;
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml b/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml
new file mode 100644
index 000000000000..95d053c548ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/sony,acx565akm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sony ACX565AKM SDI Panel
+
+description: |
+  The panel must obey the rules for a SPI slave device as specified in
+  spi/spi-controller.yaml
+
+maintainers:
+  - Tomi Valkeinen <tomi.valkeinen@ti.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: sony,acx565akm
+
+  label: true
+  reset-gpios: true
+  port: true
+
+required:
+  - compatible
+  - port
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@2 {
+            compatible = "sony,acx565akm";
+            spi-max-frequency = <6000000>;
+            reg = <2>;
+
+            label = "lcd";
+            reset-gpios = <&gpio3 26 GPIO_ACTIVE_HIGH>; /* 90 */
+
+            port {
+                lcd_in: endpoint {
+                    remote-endpoint = <&sdi_out>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/startek,startek-kd050c.txt b/Documentation/devicetree/bindings/display/panel/startek,startek-kd050c.txt
deleted file mode 100644
index 70cd8d18d841..000000000000
--- a/Documentation/devicetree/bindings/display/panel/startek,startek-kd050c.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-Startek Electronic Technology Co. KD050C 5.0" WVGA TFT LCD panel
-
-Required properties:
-- compatible: should be "startek,startek-kd050c"
diff --git a/Documentation/devicetree/bindings/display/panel/startek,startek-kd050c.yaml b/Documentation/devicetree/bindings/display/panel/startek,startek-kd050c.yaml
new file mode 100644
index 000000000000..fd668640afd1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/startek,startek-kd050c.yaml
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/startek,startek-kd050c.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Startek Electronic Technology Co. KD050C 5.0" WVGA TFT LCD panel
+
+maintainers:
+  - Nikita Kiryanov <nikita@compulab.co.il>
+
+allOf:
+  - $ref: panel-dpi.yaml#
+
+properties:
+  compatible:
+    items:
+      - const: startek,startek-kd050c
+      - {} # panel-dpi, but not listed here to avoid false select
+
+  backlight: true
+  enable-gpios: true
+  height-mm: true
+  label: true
+  panel-timing: true
+  port: true
+  power-supply: true
+  reset-gpios: true
+  width-mm: true
+
+additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/tpo,td.yaml b/Documentation/devicetree/bindings/display/panel/tpo,td.yaml
new file mode 100644
index 000000000000..4aa605613445
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/tpo,td.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/tpo,td.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toppoly TD Panels
+
+description: |
+  The panel must obey the rules for a SPI slave device as specified in
+  spi/spi-controller.yaml
+
+maintainers:
+  - Marek Belisko <marek@goldelico.com>
+  - H. Nikolaus Schaller <hns@goldelico.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    enum:
+        # Toppoly TD028TTEC1 Panel
+      - tpo,td028ttec1
+        # Toppoly TD043MTEA1 Panel
+      - tpo,td043mtea1
+
+  reg: true
+  label: true
+  reset-gpios: true
+  backlight: true
+  port: true
+
+required:
+  - compatible
+  - port
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel: panel@0 {
+            compatible = "tpo,td043mtea1";
+            reg = <0>;
+            spi-max-frequency = <100000>;
+            spi-cpol;
+            spi-cpha;
+
+            label = "lcd";
+
+            reset-gpios = <&gpio7 7 0>;
+
+            port {
+                lcd_in: endpoint {
+                    remote-endpoint = <&dpi_out>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/panel/tpo,td028ttec1.txt b/Documentation/devicetree/bindings/display/panel/tpo,td028ttec1.txt
deleted file mode 100644
index 898e06ecf4ef..000000000000
--- a/Documentation/devicetree/bindings/display/panel/tpo,td028ttec1.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Toppoly TD028TTEC1 Panel
-========================
-
-Required properties:
-- compatible: "tpo,td028ttec1"
-
-Optional properties:
-- label: a symbolic name for the panel
-- backlight: phandle of the backlight device
-
-Required nodes:
-- Video port for DPI input
-
-Example
--------
-
-lcd-panel: td028ttec1@0 {
-	compatible = "tpo,td028ttec1";
-	reg = <0>;
-	spi-max-frequency = <100000>;
-	spi-cpol;
-	spi-cpha;
-
-	label = "lcd";
-	backlight = <&backlight>;
-	port {
-		lcd_in: endpoint {
-			remote-endpoint = <&dpi_out>;
-		};
-	};
-};
-
diff --git a/Documentation/devicetree/bindings/display/panel/tpo,td043mtea1.txt b/Documentation/devicetree/bindings/display/panel/tpo,td043mtea1.txt
deleted file mode 100644
index ec6d62975162..000000000000
--- a/Documentation/devicetree/bindings/display/panel/tpo,td043mtea1.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-TPO TD043MTEA1 Panel
-====================
-
-Required properties:
-- compatible: "tpo,td043mtea1"
-- reset-gpios: panel reset gpio
-
-Optional properties:
-- label: a symbolic name for the panel
-
-Required nodes:
-- Video port for DPI input
-
-Example
--------
-
-lcd-panel: panel@0 {
-	compatible = "tpo,td043mtea1";
-	reg = <0>;
-	spi-max-frequency = <100000>;
-	spi-cpol;
-	spi-cpha;
-
-	label = "lcd";
-
-	reset-gpios = <&gpio7 7 0>;
-
-	port {
-		lcd_in: endpoint {
-			remote-endpoint = <&dpi_out>;
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml b/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml
new file mode 100644
index 000000000000..b36f39f6b233
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/visionox,rm69299.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Visionox model RM69299 Panels Device Tree Bindings.
+
+maintainers:
+ - Harigovindan P <harigovi@codeaurora.org>
+
+description: |
+  This binding is for display panels using a Visionox RM692999 panel.
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: visionox,rm69299-1080p-display
+
+  vdda-supply:
+    description: |
+      Phandle of the regulator that provides the vdda supply voltage.
+
+  vdd3p3-supply:
+    description: |
+      Phandle of the regulator that provides the vdd3p3 supply voltage.
+
+  port: true
+  reset-gpios: true
+
+additionalProperties: false
+
+required:
+  - compatible
+  - vdda-supply
+  - vdd3p3-supply
+  - reset-gpios
+  - port
+
+examples:
+  - |
+    panel {
+        compatible = "visionox,rm69299-1080p-display";
+
+        vdda-supply = <&src_pp1800_l8c>;
+        vdd3p3-supply = <&src_pp2800_l18a>;
+
+        reset-gpios = <&pm6150l_gpio 3 0>;
+        port {
+            panel0_in: endpoint {
+                remote-endpoint = <&dsi0_out>;
+            };
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/display/renesas,du.txt b/Documentation/devicetree/bindings/display/renesas,du.txt
index eb4ae41fe41f..51cd4d162770 100644
--- a/Documentation/devicetree/bindings/display/renesas,du.txt
+++ b/Documentation/devicetree/bindings/display/renesas,du.txt
@@ -50,6 +50,14 @@ Required Properties:
     VSP instance that serves the DU channel, and the channel index identifies
     the LIF instance in that VSP.
 
+Optional properties:
+  - resets: A list of phandle + reset-specifier pairs, one for each entry in
+    the reset-names property.
+  - reset-names: Names of the resets. This property is model-dependent.
+    - All but R8A7779 use one reset for a group of one or more successive
+      channels. The resets must be named "du.x" with "x" being the numerical
+      index of the lowest channel in the group.
+
 Required nodes:
 
 The connections to the DU output video ports are modeled using the OF graph
@@ -96,6 +104,8 @@ Example: R8A7795 (R-Car H3) ES2.0 DU
 			 <&cpg CPG_MOD 722>,
 			 <&cpg CPG_MOD 721>;
 		clock-names = "du.0", "du.1", "du.2", "du.3";
+		resets = <&cpg 724>, <&cpg 722>;
+		reset-names = "du.0", "du.2";
 		renesas,cmms = <&cmm0>, <&cmm1>, <&cmm2>, <&cmm3>;
 		renesas,vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>, <&vspd0 1>;
 
diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.txt b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.txt
deleted file mode 100644
index d1ad31bca8d9..000000000000
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-Rockchip specific extensions for rk3066 HDMI
-============================================
-
-Required properties:
-- compatible:
-	"rockchip,rk3066-hdmi";
-- reg:
-	Physical base address and length of the controller's registers.
-- clocks, clock-names:
-	Phandle to HDMI controller clock, name should be "hclk".
-- interrupts:
-	HDMI interrupt number.
-- power-domains:
-	Phandle to the RK3066_PD_VIO power domain.
-- rockchip,grf:
-	This soc uses GRF regs to switch the HDMI TX input between vop0 and vop1.
-- ports:
-	Contains one port node with two endpoints, numbered 0 and 1,
-	connected respectively to vop0 and vop1.
-	Contains one port node with one endpoint
-	connected to a hdmi-connector node.
-- pinctrl-0, pinctrl-name:
-	Switch the iomux for the HPD/I2C pins to HDMI function.
-
-Example:
-	hdmi: hdmi@10116000 {
-		compatible = "rockchip,rk3066-hdmi";
-		reg = <0x10116000 0x2000>;
-		interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cru HCLK_HDMI>;
-		clock-names = "hclk";
-		power-domains = <&power RK3066_PD_VIO>;
-		rockchip,grf = <&grf>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&hdmii2c_xfer>, <&hdmi_hpd>;
-
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			hdmi_in: port@0 {
-				reg = <0>;
-				#address-cells = <1>;
-				#size-cells = <0>;
-				hdmi_in_vop0: endpoint@0 {
-					reg = <0>;
-					remote-endpoint = <&vop0_out_hdmi>;
-				};
-				hdmi_in_vop1: endpoint@1 {
-					reg = <1>;
-					remote-endpoint = <&vop1_out_hdmi>;
-				};
-			};
-			hdmi_out: port@1 {
-				reg = <1>;
-				hdmi_out_con: endpoint {
-					remote-endpoint = <&hdmi_con_in>;
-				};
-			};
-		};
-	};
-
-&pinctrl {
-		hdmi {
-			hdmi_hpd: hdmi-hpd {
-				rockchip,pins = <0 RK_PA0 1 &pcfg_pull_default>;
-			};
-			hdmii2c_xfer: hdmii2c-xfer {
-				rockchip,pins = <0 RK_PA1 1 &pcfg_pull_none>,
-						<0 RK_PA2 1 &pcfg_pull_none>;
-			};
-		};
-};
diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml
new file mode 100644
index 000000000000..4110d003ce1f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/rockchip/rockchip,rk3066-hdmi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip rk3066 HDMI controller
+
+maintainers:
+  - Sandy Huang <hjc@rock-chips.com>
+  - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+  compatible:
+    const: rockchip,rk3066-hdmi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    const: hclk
+
+  pinctrl-0:
+    maxItems: 2
+
+  pinctrl-names:
+    const: default
+    description:
+      Switch the iomux for the HPD/I2C pins to HDMI function.
+
+  power-domains:
+    maxItems: 1
+
+  rockchip,grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      This soc uses GRF regs to switch the HDMI TX input between vop0 and vop1.
+
+  ports:
+    type: object
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description:
+          Port node with two endpoints, numbered 0 and 1,
+          connected respectively to vop0 and vop1.
+
+      port@1:
+        type: object
+        description:
+          Port node with one endpoint connected to a hdmi-connector node.
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@0
+      - port@1
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - pinctrl-0
+  - pinctrl-names
+  - power-domains
+  - rockchip,grf
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/rk3066a-cru.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/pinctrl/rockchip.h>
+    #include <dt-bindings/power/rk3066-power.h>
+    hdmi: hdmi@10116000 {
+      compatible = "rockchip,rk3066-hdmi";
+      reg = <0x10116000 0x2000>;
+      interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&cru HCLK_HDMI>;
+      clock-names = "hclk";
+      pinctrl-0 = <&hdmii2c_xfer>, <&hdmi_hpd>;
+      pinctrl-names = "default";
+      power-domains = <&power RK3066_PD_VIO>;
+      rockchip,grf = <&grf>;
+
+      ports {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        hdmi_in: port@0 {
+          reg = <0>;
+          #address-cells = <1>;
+          #size-cells = <0>;
+          hdmi_in_vop0: endpoint@0 {
+            reg = <0>;
+            remote-endpoint = <&vop0_out_hdmi>;
+          };
+          hdmi_in_vop1: endpoint@1 {
+            reg = <1>;
+            remote-endpoint = <&vop1_out_hdmi>;
+          };
+        };
+        hdmi_out: port@1 {
+          reg = <1>;
+          hdmi_out_con: endpoint {
+            remote-endpoint = <&hdmi_con_in>;
+          };
+        };
+      };
+    };
+
+    pinctrl {
+      hdmi {
+        hdmi_hpd: hdmi-hpd {
+          rockchip,pins = <0 RK_PA0 1 &pcfg_pull_default>;
+        };
+        hdmii2c_xfer: hdmii2c-xfer {
+          rockchip,pins = <0 RK_PA1 1 &pcfg_pull_none>,
+                          <0 RK_PA2 1 &pcfg_pull_none>;
+        };
+      };
+    };
diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt
deleted file mode 100644
index 8b3a5f514205..000000000000
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt
+++ /dev/null
@@ -1,74 +0,0 @@
-device-tree bindings for rockchip soc display controller (vop)
-
-VOP (Visual Output Processor) is the Display Controller for the Rockchip
-series of SoCs which transfers the image data from a video memory
-buffer to an external LCD interface.
-
-Required properties:
-- compatible: value should be one of the following
-		"rockchip,rk3036-vop";
-		"rockchip,rk3126-vop";
-		"rockchip,px30-vop-lit";
-		"rockchip,px30-vop-big";
-		"rockchip,rk3066-vop";
-		"rockchip,rk3188-vop";
-		"rockchip,rk3288-vop";
-		"rockchip,rk3368-vop";
-		"rockchip,rk3366-vop";
-		"rockchip,rk3399-vop-big";
-		"rockchip,rk3399-vop-lit";
-		"rockchip,rk3228-vop";
-		"rockchip,rk3328-vop";
-
-- reg: Must contain one entry corresponding to the base address and length
-	of the register space. Can optionally contain a second entry
-	corresponding to the CRTC gamma LUT address.
-
-- interrupts: should contain a list of all VOP IP block interrupts in the
-		 order: VSYNC, LCD_SYSTEM. The interrupt specifier
-		 format depends on the interrupt controller used.
-
-- clocks: must include clock specifiers corresponding to entries in the
-		clock-names property.
-
-- clock-names: Must contain
-		aclk_vop: for ddr buffer transfer.
-		hclk_vop: for ahb bus to R/W the phy regs.
-		dclk_vop: pixel clock.
-
-- resets: Must contain an entry for each entry in reset-names.
-  See ../reset/reset.txt for details.
-- reset-names: Must include the following entries:
-  - axi
-  - ahb
-  - dclk
-
-- iommus: required a iommu node
-
-- port: A port node with endpoint definitions as defined in
-  Documentation/devicetree/bindings/media/video-interfaces.txt.
-
-Example:
-SoC specific DT entry:
-	vopb: vopb@ff930000 {
-		compatible = "rockchip,rk3288-vop";
-		reg = <0x0 0xff930000 0x0 0x19c>, <0x0 0xff931000 0x0 0x1000>;
-		interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cru ACLK_VOP0>, <&cru DCLK_VOP0>, <&cru HCLK_VOP0>;
-		clock-names = "aclk_vop", "dclk_vop", "hclk_vop";
-		resets = <&cru SRST_LCDC1_AXI>, <&cru SRST_LCDC1_AHB>, <&cru SRST_LCDC1_DCLK>;
-		reset-names = "axi", "ahb", "dclk";
-		iommus = <&vopb_mmu>;
-		vopb_out: port {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			vopb_out_edp: endpoint@0 {
-				reg = <0>;
-				remote-endpoint=<&edp_in_vopb>;
-			};
-			vopb_out_hdmi: endpoint@1 {
-				reg = <1>;
-				remote-endpoint=<&hdmi_in_vopb>;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml
new file mode 100644
index 000000000000..1695e3e4bcec
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/rockchip/rockchip-vop.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip SoC display controller (VOP)
+
+description:
+  VOP (Video Output Processor) is the display controller for the Rockchip
+  series of SoCs which transfers the image data from a video memory
+  buffer to an external LCD interface.
+
+maintainers:
+  - Sandy Huang <hjc@rock-chips.com>
+  - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+  compatible:
+    enum:
+      - rockchip,px30-vop-big
+      - rockchip,px30-vop-lit
+      - rockchip,rk3036-vop
+      - rockchip,rk3066-vop
+      - rockchip,rk3126-vop
+      - rockchip,rk3188-vop
+      - rockchip,rk3228-vop
+      - rockchip,rk3288-vop
+      - rockchip,rk3328-vop
+      - rockchip,rk3366-vop
+      - rockchip,rk3368-vop
+      - rockchip,rk3399-vop-big
+      - rockchip,rk3399-vop-lit
+
+  reg:
+    minItems: 1
+    items:
+      - description:
+          Must contain one entry corresponding to the base address and length
+          of the register space.
+      - description:
+          Can optionally contain a second entry corresponding to
+          the CRTC gamma LUT address.
+
+  interrupts:
+    maxItems: 1
+    description:
+      The VOP interrupt is shared by several interrupt sources, such as
+      frame start (VSYNC), line flag and other status interrupts.
+
+  clocks:
+    items:
+      - description: Clock for ddr buffer transfer.
+      - description: Pixel clock.
+      - description: Clock for the ahb bus to R/W the phy regs.
+
+  clock-names:
+    items:
+      - const: aclk_vop
+      - const: dclk_vop
+      - const: hclk_vop
+
+  resets:
+    maxItems: 3
+
+  reset-names:
+    items:
+      - const: axi
+      - const: ahb
+      - const: dclk
+
+  port:
+    type: object
+    description:
+      A port node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+  assigned-clocks:
+    maxItems: 2
+
+  assigned-clock-rates:
+    maxItems: 2
+
+  iommus:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/rk3288-cru.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/rk3288-power.h>
+    vopb: vopb@ff930000 {
+      compatible = "rockchip,rk3288-vop";
+      reg = <0x0 0xff930000 0x0 0x19c>,
+            <0x0 0xff931000 0x0 0x1000>;
+      interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&cru ACLK_VOP0>,
+               <&cru DCLK_VOP0>,
+               <&cru HCLK_VOP0>;
+      clock-names = "aclk_vop", "dclk_vop", "hclk_vop";
+      power-domains = <&power RK3288_PD_VIO>;
+      resets = <&cru SRST_LCDC1_AXI>,
+               <&cru SRST_LCDC1_AHB>,
+               <&cru SRST_LCDC1_DCLK>;
+      reset-names = "axi", "ahb", "dclk";
+      iommus = <&vopb_mmu>;
+      vopb_out: port {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        vopb_out_edp: endpoint@0 {
+          reg = <0>;
+          remote-endpoint=<&edp_in_vopb>;
+        };
+        vopb_out_hdmi: endpoint@1 {
+          reg = <1>;
+          remote-endpoint=<&hdmi_in_vopb>;
+        };
+      };
+    };
diff --git a/Documentation/devicetree/bindings/hwmon/baikal,bt1-pvt.yaml b/Documentation/devicetree/bindings/hwmon/baikal,bt1-pvt.yaml
new file mode 100644
index 000000000000..84ae4cdd08ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/baikal,bt1-pvt.yaml
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwmon/baikal,bt1-pvt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Baikal-T1 PVT Sensor
+
+maintainers:
+  - Serge Semin <fancer.lancer@gmail.com>
+
+description: |
+  Baikal-T1 SoC provides an embedded process, voltage and temperature
+  sensor to monitor an internal SoC environment (chip temperature, supply
+  voltage and process monitor) and on time detect critical situations,
+  which may cause the system instability and even damages. The IP-block
+  is based on the Analog Bits PVT sensor, but is equipped with a dedicated
+  control wrapper, which provides a MMIO registers-based access to the
+  sensor core functionality (APB3-bus based) and exposes an additional
+  functions like thresholds/data ready interrupts, its status and masks,
+  measurements timeout. Its internal structure is depicted on the next
+  diagram:
+
+     Analog Bits core                     Bakal-T1 PVT control block
+  +--------------------+                  +------------------------+
+  | Temperature sensor |-+         +------| Sensors control        |
+  |--------------------| |<---En---|      |------------------------|
+  | Voltage sensor     |-|<--Mode--| +--->| Sampled data           |
+  |--------------------| |<--Trim--+ |    |------------------------|
+  | Low-Vt sensor      |-|           | +--| Thresholds comparator  |
+  |--------------------| |---Data----| |  |------------------------|
+  | High-Vt sensor     |-|           | +->| Interrupts status      |
+  |--------------------| |--Valid--+-+ |  |------------------------|
+  | Standard-Vt sensor |-+         +---+--| Interrupts mask        |
+  +--------------------+                  |------------------------|
+           ^                              | Interrupts timeout     |
+           |                              +------------------------+
+           |                                        ^  ^
+  Rclk-----+----------------------------------------+  |
+  APB3-------------------------------------------------+
+
+  This bindings describes the external Baikal-T1 PVT control interfaces
+  like MMIO registers space, interrupt request number and clocks source.
+  These are then used by the corresponding hwmon device driver to
+  implement the sysfs files-based access to the sensors functionality.
+
+properties:
+  compatible:
+    const: baikal,bt1-pvt
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: PVT reference clock
+      - description: APB3 interface clock
+
+  clock-names:
+    items:
+      - const: ref
+      - const: pclk
+
+  "#thermal-sensor-cells":
+    description: Baikal-T1 can be referenced as the CPU thermal-sensor
+    const: 0
+
+  baikal,pvt-temp-offset-millicelsius:
+    description: |
+      Temperature sensor trimming factor. It can be used to manually adjust the
+      temperature measurements within 7.130 degrees Celsius.
+    maxItems: 1
+    items:
+      default: 0
+      minimum: 0
+      maximum: 7130
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/mips-gic.h>
+
+    pvt@1f200000 {
+      compatible = "baikal,bt1-pvt";
+      reg = <0x1f200000 0x1000>;
+      #thermal-sensor-cells = <0>;
+
+      interrupts = <GIC_SHARED 31 IRQ_TYPE_LEVEL_HIGH>;
+
+      baikal,pvt-temp-trim-millicelsius = <1000>;
+
+      clocks = <&ccu_sys>, <&ccu_sys>;
+      clock-names = "ref", "pclk";
+    };
+...
diff --git a/Documentation/devicetree/bindings/interrupt-controller/loongson,htvec.yaml b/Documentation/devicetree/bindings/interrupt-controller/loongson,htvec.yaml
new file mode 100644
index 000000000000..e865cd8f96a9
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/loongson,htvec.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/interrupt-controller/loongson,htvec.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Loongson-3 HyperTransport Interrupt Vector Controller
+
+maintainers:
+  - Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+description:
+  This interrupt controller is found in the Loongson-3 family of chips for
+  receiving vectorized interrupts from PCH's interrupt controller.
+
+properties:
+  compatible:
+    const: loongson,htvec-1.0
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 4
+    description: Four parent interrupts that receive chained interrupts.
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-controller
+  - '#interrupt-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    htvec: interrupt-controller@fb000080 {
+      compatible = "loongson,htvec-1.0";
+      reg = <0xfb000080 0x40>;
+      interrupt-controller;
+      #interrupt-cells = <1>;
+
+      interrupt-parent = <&liointc>;
+      interrupts = <24 IRQ_TYPE_LEVEL_HIGH>,
+                    <25 IRQ_TYPE_LEVEL_HIGH>,
+                    <26 IRQ_TYPE_LEVEL_HIGH>,
+                    <27 IRQ_TYPE_LEVEL_HIGH>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/interrupt-controller/loongson,pch-msi.yaml b/Documentation/devicetree/bindings/interrupt-controller/loongson,pch-msi.yaml
new file mode 100644
index 000000000000..1a5ebbdd219a
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/loongson,pch-msi.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/interrupt-controller/loongson,pch-msi.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Loongson PCH MSI Controller
+
+maintainers:
+  - Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+description:
+  This interrupt controller is found in the Loongson LS7A family of PCH for
+  transforming interrupts from PCIe MSI into HyperTransport vectorized
+  interrupts.
+
+properties:
+  compatible:
+    const: loongson,pch-msi-1.0
+
+  reg:
+    maxItems: 1
+
+  loongson,msi-base-vec:
+    description:
+      u32 value of the base of parent HyperTransport vector allocated
+      to PCH MSI.
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/uint32"
+      - minimum: 0
+        maximum: 255
+
+  loongson,msi-num-vecs:
+    description:
+      u32 value of the number of parent HyperTransport vectors allocated
+      to PCH MSI.
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/uint32"
+      - minimum: 1
+        maximum: 256
+
+  msi-controller: true
+
+required:
+  - compatible
+  - reg
+  - msi-controller
+  - loongson,msi-base-vec
+  - loongson,msi-num-vecs
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    msi: msi-controller@2ff00000 {
+      compatible = "loongson,pch-msi-1.0";
+      reg = <0x2ff00000 0x4>;
+      msi-controller;
+      loongson,msi-base-vec = <64>;
+      loongson,msi-num-vecs = <64>;
+      interrupt-parent = <&htvec>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/interrupt-controller/loongson,pch-pic.yaml b/Documentation/devicetree/bindings/interrupt-controller/loongson,pch-pic.yaml
new file mode 100644
index 000000000000..274adea13f33
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/loongson,pch-pic.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/interrupt-controller/loongson,pch-pic.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Loongson PCH PIC Controller
+
+maintainers:
+  - Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+description:
+  This interrupt controller is found in the Loongson LS7A family of PCH for
+  transforming interrupts from on-chip devices into HyperTransport vectorized
+  interrupts.
+
+properties:
+  compatible:
+    const: loongson,pch-pic-1.0
+
+  reg:
+    maxItems: 1
+
+  loongson,pic-base-vec:
+    description:
+      u32 value of the base of parent HyperTransport vector allocated
+      to PCH PIC.
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/uint32"
+      - minimum: 0
+        maximum: 192
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 2
+
+required:
+  - compatible
+  - reg
+  - loongson,pic-base-vec
+  - interrupt-controller
+  - '#interrupt-cells'
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    pic: interrupt-controller@10000000 {
+      compatible = "loongson,pch-pic-1.0";
+      reg = <0x10000000 0x400>;
+      interrupt-controller;
+      #interrupt-cells = <2>;
+      loongson,pic-base-vec = <64>;
+      interrupt-parent = <&htvec>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml b/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml
new file mode 100644
index 000000000000..487a8445722e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/gateworks-gsc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Gateworks System Controller
+
+description: |
+  The Gateworks System Controller (GSC) is a device present across various
+  Gateworks product families that provides a set of system related features
+  such as the following (refer to the board hardware user manuals to see what
+  features are present)
+   - Watchdog Timer
+   - GPIO
+   - Pushbutton controller
+   - Hardware monitor with ADC's for temperature and voltage rails and
+     fan controller
+
+maintainers:
+  - Tim Harvey <tharvey@gateworks.com>
+  - Robert Jones <rjones@gateworks.com>
+
+properties:
+  $nodename:
+    pattern: "gsc@[0-9a-f]{1,2}"
+  compatible:
+    const: gw,gsc
+
+  reg:
+    description: I2C device address
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 1
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  adc:
+    type: object
+    description: Optional hardware monitoring module
+
+    properties:
+      compatible:
+        const: gw,gsc-adc
+
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+    patternProperties:
+      "^channel@[0-9]+$":
+        type: object
+        description: |
+          Properties for a single ADC which can report cooked values
+          (i.e. temperature sensor based on thermister), raw values
+          (i.e. voltage rail with a pre-scaling resistor divider).
+
+        properties:
+          reg:
+            description: Register of the ADC
+            maxItems: 1
+
+          label:
+            description: Name of the ADC input
+
+          gw,mode:
+            description: |
+              conversion mode:
+                0 - temperature, in C*10
+                1 - pre-scaled voltage value
+                2 - scaled voltage based on an optional resistor divider
+                    and optional offset
+            $ref: /schemas/types.yaml#/definitions/uint32
+            enum: [0, 1, 2]
+
+          gw,voltage-divider-ohms:
+            description: Values of resistors for divider on raw ADC input
+            maxItems: 2
+            items:
+             minimum: 1000
+             maximum: 1000000
+
+          gw,voltage-offset-microvolt:
+            description: |
+              A positive voltage offset to apply to a raw ADC
+              (i.e. to compensate for a diode drop).
+            minimum: 0
+            maximum: 1000000
+
+        required:
+          - gw,mode
+          - reg
+          - label
+
+    required:
+      - compatible
+      - "#address-cells"
+      - "#size-cells"
+
+patternProperties:
+  "^fan-controller@[0-9a-f]+$":
+    type: object
+    description: Optional fan controller
+
+    properties:
+      compatible:
+        const: gw,gsc-fan
+
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      reg:
+        description: The fan controller base address
+        maxItems: 1
+
+    required:
+      - compatible
+      - reg
+      - "#address-cells"
+      - "#size-cells"
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-controller
+  - "#interrupt-cells"
+  - "#address-cells"
+  - "#size-cells"
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        gsc@20 {
+            compatible = "gw,gsc";
+            reg = <0x20>;
+            interrupt-parent = <&gpio1>;
+            interrupts = <4 GPIO_ACTIVE_LOW>;
+            interrupt-controller;
+            #interrupt-cells = <1>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            adc {
+                compatible = "gw,gsc-adc";
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                channel@0 { /* A0: Board Temperature */
+                    reg = <0x00>;
+                    label = "temp";
+                    gw,mode = <0>;
+                };
+
+                channel@2 { /* A1: Input Voltage (raw ADC) */
+                    reg = <0x02>;
+                    label = "vdd_vin";
+                    gw,mode = <1>;
+                    gw,voltage-divider-ohms = <22100 1000>;
+                    gw,voltage-offset-microvolt = <800000>;
+                };
+
+                channel@b { /* A2: Battery voltage */
+                    reg = <0x0b>;
+                    label = "vdd_bat";
+                    gw,mode = <1>;
+                };
+            };
+
+            fan-controller@2c {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                compatible = "gw,gsc-fan";
+                reg = <0x2c>;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/mfd/max8998.txt b/Documentation/devicetree/bindings/mfd/max8998.txt
index 5f2f07c09c90..4ed52184d081 100644
--- a/Documentation/devicetree/bindings/mfd/max8998.txt
+++ b/Documentation/devicetree/bindings/mfd/max8998.txt
@@ -73,6 +73,8 @@ number as described in MAX8998 datasheet.
 	- ESAFEOUT1: (ldo19)
 	- ESAFEOUT2: (ld020)
 
+	- CHARGER: main battery charger current control
+
 Standard regulator bindings are used inside regulator subnodes. Check
   Documentation/devicetree/bindings/regulator/regulator.txt
 for more details.
@@ -113,5 +115,11 @@ Example:
 				regulator-always-on;
 				regulator-boot-on;
 			};
+
+			charger_reg: CHARGER {
+				regulator-name = "CHARGER";
+				regulator-min-microamp = <90000>;
+				regulator-max-microamp = <800000>;
+			};
 		};
 	};
diff --git a/Documentation/devicetree/bindings/mips/loongson/rs780e-acpi.yaml b/Documentation/devicetree/bindings/mips/loongson/rs780e-acpi.yaml
new file mode 100644
index 000000000000..d317897e1115
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/loongson/rs780e-acpi.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/mips/loongson/rs780e-acpi.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Loongson RS780E PCH ACPI Controller
+
+maintainers:
+  - Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+description: |
+  This controller can be found in Loongson-3 systems with RS780E PCH.
+
+properties:
+  compatible:
+    const: loongson,rs780e-acpi
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    isa@0 {
+      compatible = "isa";
+      #address-cells = <2>;
+      #size-cells = <1>;
+      ranges = <1 0 0 0x1000>;
+
+      acpi@800 {
+        compatible = "loongson,rs780e-acpi";
+        reg = <1 0x800 0x100>;
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdhc.yaml b/Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdhc.yaml
new file mode 100644
index 000000000000..7a386a5b8fcb
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdhc.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/amlogic,meson-mx-sdhc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Meson SDHC controller Device Tree Bindings
+
+allOf:
+  - $ref: "mmc-controller.yaml"
+
+maintainers:
+  - Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+description: |
+  The SDHC MMC host controller on Amlogic SoCs provides an eMMC and MMC
+  card interface with 1/4/8-bit bus width.
+  It supports eMMC spec 4.4x/4.5x including HS200 (up to 100MHz clock).
+
+properties:
+  compatible:
+    items:
+      - enum:
+        - amlogic,meson8-sdhc
+        - amlogic,meson8b-sdhc
+        - amlogic,meson8m2-sdhc
+      - const: amlogic,meson-mx-sdhc
+
+  reg:
+    minItems: 1
+
+  interrupts:
+    minItems: 1
+
+  clocks:
+    minItems: 5
+
+  clock-names:
+    items:
+      - const: clkin0
+      - const: clkin1
+      - const: clkin2
+      - const: clkin3
+      - const: pclk
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    sdhc: mmc@8e00 {
+      compatible = "amlogic,meson8-sdhc", "amlogic,meson-mx-sdhc";
+      reg = <0x8e00 0x42>;
+      interrupts = <GIC_SPI 78 IRQ_TYPE_EDGE_RISING>;
+      clocks = <&xtal>,
+               <&fclk_div4>,
+               <&fclk_div3>,
+               <&fclk_div5>,
+               <&sdhc_pclk>;
+      clock-names = "clkin0", "clkin1", "clkin2", "clkin3", "pclk";
+    };
diff --git a/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt b/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
index 428685eb2ded..f29bf7dd2ece 100644
--- a/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
+++ b/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
@@ -18,12 +18,21 @@ Required Properties:
     - "xlnx,zynqmp-8.9a": ZynqMP SDHCI 8.9a PHY
       For this device it is strongly suggested to include clock-output-names and
       #clock-cells.
+    - "xlnx,versal-8.9a": Versal SDHCI 8.9a PHY
+      For this device it is strongly suggested to include clock-output-names and
+      #clock-cells.
     - "ti,am654-sdhci-5.1", "arasan,sdhci-5.1": TI AM654 MMC PHY
 	Note: This binding has been deprecated and moved to [5].
     - "intel,lgm-sdhci-5.1-emmc", "arasan,sdhci-5.1": Intel LGM eMMC PHY
       For this device it is strongly suggested to include arasan,soc-ctl-syscon.
     - "intel,lgm-sdhci-5.1-sdxc", "arasan,sdhci-5.1": Intel LGM SDXC PHY
       For this device it is strongly suggested to include arasan,soc-ctl-syscon.
+    - "intel,keembay-sdhci-5.1-emmc", "arasan,sdhci-5.1": Intel Keem Bay eMMC
+      For this device it is strongly suggested to include arasan,soc-ctl-syscon.
+    - "intel,keembay-sdhci-5.1-sd": Intel Keem Bay SD controller
+      For this device it is strongly suggested to include arasan,soc-ctl-syscon.
+    - "intel,keembay-sdhci-5.1-sdio": Intel Keem Bay SDIO controller
+      For this device it is strongly suggested to include arasan,soc-ctl-syscon.
 
   [5] Documentation/devicetree/bindings/mmc/sdhci-am654.txt
 
@@ -104,6 +113,18 @@ Example:
 		clk-phase-sd-hs = <63>, <72>;
 	};
 
+	sdhci: mmc@f1040000 {
+		compatible = "xlnx,versal-8.9a", "arasan,sdhci-8.9a";
+		interrupt-parent = <&gic>;
+		interrupts = <0 126 4>;
+		reg = <0x0 0xf1040000 0x0 0x10000>;
+		clocks = <&clk200>, <&clk200>;
+		clock-names = "clk_xin", "clk_ahb";
+		clock-output-names = "clk_out_sd0", "clk_in_sd0";
+		#clock-cells = <1>;
+		clk-phase-sd-hs = <132>, <60>;
+	};
+
 	emmc: sdhci@ec700000 {
 		compatible = "intel,lgm-sdhci-5.1-emmc", "arasan,sdhci-5.1";
 		reg = <0xec700000 0x300>;
@@ -133,3 +154,39 @@ Example:
 		phy-names = "phy_arasan";
 		arasan,soc-ctl-syscon = <&sysconf>;
 	};
+
+	mmc: mmc@33000000 {
+		compatible = "intel,keembay-sdhci-5.1-emmc", "arasan,sdhci-5.1";
+		interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+		reg = <0x0 0x33000000 0x0 0x300>;
+		clock-names = "clk_xin", "clk_ahb";
+		clocks = <&scmi_clk KEEM_BAY_PSS_AUX_EMMC>,
+			 <&scmi_clk KEEM_BAY_PSS_EMMC>;
+		phys = <&emmc_phy>;
+		phy-names = "phy_arasan";
+		assigned-clocks = <&scmi_clk KEEM_BAY_PSS_AUX_EMMC>;
+		assigned-clock-rates = <200000000>;
+		clock-output-names = "emmc_cardclock";
+		#clock-cells = <0>;
+		arasan,soc-ctl-syscon = <&mmc_phy_syscon>;
+	};
+
+	sd0: mmc@31000000 {
+		compatible = "intel,keembay-sdhci-5.1-sd";
+		interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+		reg = <0x0 0x31000000 0x0 0x300>;
+		clock-names = "clk_xin", "clk_ahb";
+		clocks = <&scmi_clk KEEM_BAY_PSS_AUX_SD0>,
+			 <&scmi_clk KEEM_BAY_PSS_SD0>;
+		arasan,soc-ctl-syscon = <&sd0_phy_syscon>;
+	};
+
+	sd1: mmc@32000000 {
+		compatible = "intel,keembay-sdhci-5.1-sdio";
+		interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+		reg = <0x0 0x32000000 0x0 0x300>;
+		clock-names = "clk_xin", "clk_ahb";
+		clocks = <&scmi_clk KEEM_BAY_PSS_AUX_SD1>,
+			 <&scmi_clk KEEM_BAY_PSS_SD1>;
+		arasan,soc-ctl-syscon = <&sd1_phy_syscon>;
+	};
diff --git a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
index c064af5838aa..291532ac0446 100644
--- a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
+++ b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
@@ -11,6 +11,7 @@ Required properties:
 	- "renesas,mmcif-r7s72100" for the MMCIF found in r7s72100 SoCs
 	- "renesas,mmcif-r8a73a4" for the MMCIF found in r8a73a4 SoCs
 	- "renesas,mmcif-r8a7740" for the MMCIF found in r8a7740 SoCs
+	- "renesas,mmcif-r8a7742" for the MMCIF found in r8a7742 SoCs
 	- "renesas,mmcif-r8a7743" for the MMCIF found in r8a7743 SoCs
 	- "renesas,mmcif-r8a7744" for the MMCIF found in r8a7744 SoCs
 	- "renesas,mmcif-r8a7745" for the MMCIF found in r8a7745 SoCs
@@ -24,8 +25,8 @@ Required properties:
 - interrupts: Some SoCs have only 1 shared interrupt, while others have either
   2 or 3 individual interrupts (error, int, card detect). Below is the number
   of interrupts for each SoC:
-    1: r8a73a4, r8a7743, r8a7744, r8a7745, r8a7778, r8a7790, r8a7791, r8a7793,
-       r8a7794
+    1: r8a73a4, r8a7742, r8a7743, r8a7744, r8a7745, r8a7778, r8a7790, r8a7791,
+       r8a7793, r8a7794
     2: r8a7740, sh73a0
     3: r7s72100
 
diff --git a/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt b/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt
index e6cc47844207..0ca9a622cce0 100644
--- a/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt
+++ b/Documentation/devicetree/bindings/mmc/renesas,sdhi.txt
@@ -7,6 +7,7 @@ Required properties:
 		"renesas,sdhi-r7s9210" - SDHI IP on R7S9210 SoC
 		"renesas,sdhi-r8a73a4" - SDHI IP on R8A73A4 SoC
 		"renesas,sdhi-r8a7740" - SDHI IP on R8A7740 SoC
+		"renesas,sdhi-r8a7742" - SDHI IP on R8A7742 SoC
 		"renesas,sdhi-r8a7743" - SDHI IP on R8A7743 SoC
 		"renesas,sdhi-r8a7744" - SDHI IP on R8A7744 SoC
 		"renesas,sdhi-r8a7745" - SDHI IP on R8A7745 SoC
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
index 5445931c5ab9..b8e1d2b7aea9 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
@@ -17,6 +17,7 @@ Required properties:
 		"qcom,msm8916-sdhci", "qcom,sdhci-msm-v4"
 		"qcom,msm8992-sdhci", "qcom,sdhci-msm-v4"
 		"qcom,msm8996-sdhci", "qcom,sdhci-msm-v4"
+		"qcom,sm8250-sdhci", "qcom,sdhci-msm-v5"
 		"qcom,sdm845-sdhci", "qcom,sdhci-msm-v5"
 		"qcom,qcs404-sdhci", "qcom,sdhci-msm-v5"
 		"qcom,sc7180-sdhci", "qcom,sdhci-msm-v5";
@@ -46,6 +47,13 @@ Required properties:
 	"cal"	- reference clock for RCLK delay calibration (optional)
 	"sleep"	- sleep clock for RCLK delay calibration (optional)
 
+- qcom,ddr-config: Certain chipsets and platforms require particular settings
+	for the DDR_CONFIG register. Use this field to specify the register
+	value as per the Hardware Programming Guide.
+
+- qcom,dll-config: Chipset and Platform specific value. Use this field to
+	specify the DLL_CONFIG register value as per Hardware Programming Guide.
+
 Example:
 
 	sdhc_1: sdhci@f9824900 {
@@ -63,6 +71,9 @@ Example:
 
 		clocks = <&gcc GCC_SDCC1_APPS_CLK>, <&gcc GCC_SDCC1_AHB_CLK>;
 		clock-names = "core", "iface";
+
+		qcom,dll-config = <0x000f642c>;
+		qcom,ddr-config = <0x80040868>;
 	};
 
 	sdhc_2: sdhci@f98a4900 {
@@ -80,4 +91,7 @@ Example:
 
 		clocks = <&gcc GCC_SDCC2_APPS_CLK>, <&gcc GCC_SDCC2_AHB_CLK>;
 		clock-names = "core", "iface";
+
+		qcom,dll-config = <0x0007642c>;
+		qcom,ddr-config = <0x80040868>;
 	};
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-pxa.txt b/Documentation/devicetree/bindings/mmc/sdhci-pxa.txt
deleted file mode 100644
index 3d1b449d6097..000000000000
--- a/Documentation/devicetree/bindings/mmc/sdhci-pxa.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-* Marvell sdhci-pxa v2/v3 controller
-
-This file documents differences between the core properties in mmc.txt
-and the properties used by the sdhci-pxav2 and sdhci-pxav3 drivers.
-
-Required properties:
-- compatible: Should be "mrvl,pxav2-mmc", "mrvl,pxav3-mmc" or
-  "marvell,armada-380-sdhci".
-- reg:
-  * for "mrvl,pxav2-mmc" and "mrvl,pxav3-mmc", one register area for
-    the SDHCI registers.
-
-  * for "marvell,armada-380-sdhci", three register areas. The first
-    one for the SDHCI registers themselves, the second one for the
-    AXI/Mbus bridge registers of the SDHCI unit, the third one for the
-    SDIO3 Configuration register
-- reg names: should be "sdhci", "mbus", "conf-sdio3". only mandatory
-  for "marvell,armada-380-sdhci"
-- clocks: Array of clocks required for SDHCI; requires at least one for
-    I/O clock.
-- clock-names: Array of names corresponding to clocks property; shall be
-    "io" for I/O clock and "core" for optional core clock.
-
-Optional properties:
-- mrvl,clk-delay-cycles: Specify a number of cycles to delay for tuning.
-
-Example:
-
-sdhci@d4280800 {
-	compatible = "mrvl,pxav3-mmc";
-	reg = <0xd4280800 0x800>;
-	bus-width = <8>;
-	interrupts = <27>;
-	clocks = <&chip CLKID_SDIO1XIN>, <&chip CLKID_SDIO1>;
-	clock-names = "io", "core";
-	non-removable;
-	mrvl,clk-delay-cycles = <31>;
-};
-
-sdhci@d8000 {
-	compatible = "marvell,armada-380-sdhci";
-	reg-names = "sdhci", "mbus", "conf-sdio3";
-	reg = <0xd8000 0x1000>,
-		<0xdc000 0x100>;
-		<0x18454 0x4>;
-	interrupts = <0 25 0x4>;
-	clocks = <&gateclk 17>;
-	clock-names = "io";
-	mrvl,clk-delay-cycles = <0x1F>;
-};
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml b/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml
new file mode 100644
index 000000000000..a58715c860b7
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/sdhci-pxa.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell PXA SDHCI v2/v3 bindings
+
+maintainers:
+  - Ulf Hansson <ulf.hansson@linaro.org>
+
+allOf:
+  - $ref: mmc-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: marvell,armada-380-sdhci
+    then:
+      properties:
+        regs:
+          minItems: 3
+        reg-names:
+          minItems: 3
+      required:
+        - reg-names
+    else:
+      properties:
+        regs:
+          maxItems: 1
+        reg-names:
+          maxItems: 1
+
+properties:
+  compatible:
+    enum:
+      - mrvl,pxav2-mmc
+      - mrvl,pxav3-mmc
+      - marvell,armada-380-sdhci
+
+  reg:
+    minItems: 1
+    maxItems: 3
+
+  reg-names:
+    items:
+      - const: sdhci
+      - const: mbus
+      - const: conf-sdio3
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    maxItems: 2
+
+  clock-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: io
+      - const: core
+
+  mrvl,clk-delay-cycles:
+    description: Specify a number of cycles to delay for tuning.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+examples:
+  - |
+    #include <dt-bindings/clock/berlin2.h>
+    mmc@d4280800 {
+        compatible = "mrvl,pxav3-mmc";
+        reg = <0xd4280800 0x800>;
+        bus-width = <8>;
+        interrupts = <27>;
+        clocks = <&chip CLKID_SDIO1XIN>, <&chip CLKID_SDIO1>;
+        clock-names = "io", "core";
+        non-removable;
+        mrvl,clk-delay-cycles = <31>;
+    };
+  - |
+    mmc@d8000 {
+        compatible = "marvell,armada-380-sdhci";
+        reg-names = "sdhci", "mbus", "conf-sdio3";
+        reg = <0xd8000 0x1000>,
+              <0xdc000 0x100>,
+              <0x18454 0x4>;
+        interrupts = <0 25 0x4>;
+        clocks = <&gateclk 17>;
+        clock-names = "io";
+        mrvl,clk-delay-cycles = <0x1F>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/pci/loongson.yaml b/Documentation/devicetree/bindings/pci/loongson.yaml
new file mode 100644
index 000000000000..30e7cf1aeb87
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/loongson.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pci/loongson.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson PCI Host Controller
+
+maintainers:
+  - Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+description: |+
+  PCI host controller found on Loongson PCHs and SoCs.
+
+allOf:
+  - $ref: /schemas/pci/pci-bus.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - const: loongson,ls2k-pci
+      - const: loongson,ls7a-pci
+      - const: loongson,rs780e-pci
+
+  reg:
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: CFG0 standard config space register
+      - description: CFG1 extended config space register
+
+  ranges:
+    minItems: 1
+    maxItems: 3
+
+
+required:
+  - compatible
+  - reg
+  - ranges
+
+examples:
+  - |
+
+    bus {
+        #address-cells = <2>;
+        #size-cells = <2>;
+        pcie@1a000000 {
+            compatible = "loongson,rs780e-pci";
+            device_type = "pci";
+            #address-cells = <3>;
+            #size-cells = <2>;
+
+            // CPU_PHYSICAL(2)  SIZE(2)
+            reg = <0x0 0x1a000000  0x0 0x2000000>;
+
+            // BUS_ADDRESS(3)  CPU_PHYSICAL(2)  SIZE(2)
+            ranges = <0x01000000 0x0 0x00004000  0x0 0x00004000  0x0 0x00004000>,
+                     <0x02000000 0x0 0x40000000  0x0 0x40000000  0x0 0x40000000>;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/regulator/anatop-regulator.txt b/Documentation/devicetree/bindings/regulator/anatop-regulator.txt
deleted file mode 100644
index a3106c72fbea..000000000000
--- a/Documentation/devicetree/bindings/regulator/anatop-regulator.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-Anatop Voltage regulators
-
-Required properties:
-- compatible: Must be "fsl,anatop-regulator"
-- regulator-name: A string used as a descriptive name for regulator outputs
-- anatop-reg-offset: Anatop MFD register offset
-- anatop-vol-bit-shift: Bit shift for the register
-- anatop-vol-bit-width: Number of bits used in the register
-- anatop-min-bit-val: Minimum value of this register
-- anatop-min-voltage: Minimum voltage of this regulator
-- anatop-max-voltage: Maximum voltage of this regulator
-
-Optional properties:
-- anatop-delay-reg-offset: Anatop MFD step time register offset
-- anatop-delay-bit-shift: Bit shift for the step time register
-- anatop-delay-bit-width: Number of bits used in the step time register
-- vin-supply: The supply for this regulator
-- anatop-enable-bit: Regulator enable bit offset
-
-Any property defined as part of the core regulator
-binding, defined in regulator.txt, can also be used.
-
-Example:
-
-	regulator-vddpu {
-		compatible = "fsl,anatop-regulator";
-		regulator-name = "vddpu";
-		regulator-min-microvolt = <725000>;
-		regulator-max-microvolt = <1300000>;
-		regulator-always-on;
-		anatop-reg-offset = <0x140>;
-		anatop-vol-bit-shift = <9>;
-		anatop-vol-bit-width = <5>;
-		anatop-delay-reg-offset = <0x170>;
-		anatop-delay-bit-shift = <24>;
-		anatop-delay-bit-width = <2>;
-		anatop-min-bit-val = <1>;
-		anatop-min-voltage = <725000>;
-		anatop-max-voltage = <1300000>;
-	};
diff --git a/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml b/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml
new file mode 100644
index 000000000000..e7b3abe30363
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/anatop-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale Anatop Voltage Regulators
+
+maintainers:
+  - Ying-Chun Liu (PaulLiu) <paul.liu@linaro.org>
+
+allOf:
+  - $ref: "regulator.yaml#"
+
+properties:
+  compatible:
+    const: fsl,anatop-regulator
+
+  regulator-name: true
+
+  anatop-reg-offset:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: u32 value representing the anatop MFD register offset.
+
+  anatop-vol-bit-shift:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: u32 value representing the bit shift for the register.
+
+  anatop-vol-bit-width:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: u32 value representing the number of bits used in the register.
+
+  anatop-min-bit-val:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: u32 value representing the minimum value of this register.
+
+  anatop-min-voltage:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: u32 value representing the minimum voltage of this regulator.
+
+  anatop-max-voltage:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: u32 value representing the maximum voltage of this regulator.
+
+  anatop-delay-reg-offset:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: u32 value representing the anatop MFD step time register offset.
+
+  anatop-delay-bit-shift:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: u32 value representing the bit shift for the step time register.
+
+  anatop-delay-bit-width:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: u32 value representing the number of bits used in the step time register.
+
+  anatop-enable-bit:
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: u32 value representing regulator enable bit offset.
+
+  vin-supply:
+    $ref: '/schemas/types.yaml#/definitions/phandle'
+    description: input supply phandle.
+
+required:
+  - compatible
+  - regulator-name
+  - anatop-reg-offset
+  - anatop-vol-bit-shift
+  - anatop-vol-bit-width
+  - anatop-min-bit-val
+  - anatop-min-voltage
+  - anatop-max-voltage
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    regulator-vddpu {
+        compatible = "fsl,anatop-regulator";
+        regulator-name = "vddpu";
+        regulator-min-microvolt = <725000>;
+        regulator-max-microvolt = <1300000>;
+        regulator-always-on;
+        anatop-reg-offset = <0x140>;
+        anatop-vol-bit-shift = <9>;
+        anatop-vol-bit-width = <5>;
+        anatop-delay-reg-offset = <0x170>;
+        anatop-delay-bit-shift = <24>;
+        anatop-delay-bit-width = <2>;
+        anatop-min-bit-val = <1>;
+        anatop-min-voltage = <725000>;
+        anatop-max-voltage = <1300000>;
+    };
diff --git a/Documentation/devicetree/bindings/regulator/maxim,max77826.yaml b/Documentation/devicetree/bindings/regulator/maxim,max77826.yaml
new file mode 100644
index 000000000000..19cbd5eb2897
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/maxim,max77826.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/maxim,max77826.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim Integrated MAX77826 PMIC
+
+maintainers:
+  - Iskren Chernev <iskren.chernev@gmail.com>
+
+properties:
+  $nodename:
+    pattern: "pmic@[0-9a-f]{1,2}"
+  compatible:
+    enum:
+      - maxim,max77826
+
+  reg:
+    maxItems: 1
+
+  regulators:
+    type: object
+    allOf:
+      - $ref: regulator.yaml#
+    description: |
+      list of regulators provided by this controller, must be named
+      after their hardware counterparts LDO[1-15], BUCK and BUCKBOOST
+
+    patternProperties:
+      "^LDO([1-9]|1[0-5])$":
+        type: object
+        allOf:
+          - $ref: regulator.yaml#
+
+      "^BUCK|BUCKBOOST$":
+        type: object
+        allOf:
+          - $ref: regulator.yaml#
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - regulators
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pmic@69 {
+            compatible = "maxim,max77826";
+            reg = <0x69>;
+
+            regulators {
+                LDO2 {
+                    regulator-min-microvolt = <650000>;
+                    regulator-max-microvolt = <3587500>;
+                };
+            };
+       };
+     };
+...
diff --git a/Documentation/devicetree/bindings/reserved-memory/ramoops.txt b/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
index 0eba562fe5c6..b7886fea368c 100644
--- a/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/ramoops.txt
@@ -30,7 +30,7 @@ Optional properties:
 - ecc-size: enables ECC support and specifies ECC buffer size in bytes
   (defaults to 0: no ECC)
 
-- record-size: maximum size in bytes of each dump done on oops/panic
+- record-size: maximum size in bytes of each kmsg dump.
   (defaults to 0: disabled)
 
 - console-size: size in bytes of log buffer reserved for kernel messages
@@ -45,7 +45,16 @@ Optional properties:
 - unbuffered: if present, use unbuffered mappings to map the reserved region
   (defaults to buffered mappings)
 
-- no-dump-oops: if present, only dump panics (defaults to panics and oops)
+- max-reason: if present, sets maximum type of kmsg dump reasons to store
+  (defaults to 2: log Oopses and Panics). This can be set to INT_MAX to
+  store all kmsg dumps. See include/linux/kmsg_dump.h KMSG_DUMP_* for other
+  kmsg dump reason values. Setting this to 0 (KMSG_DUMP_UNDEF), means the
+  reason filtering will be controlled by the printk.always_kmsg_dump boot
+  param: if unset, it will be KMSG_DUMP_OOPS, otherwise KMSG_DUMP_MAX.
+
+- no-dump-oops: deprecated, use max_reason instead. If present, and
+  max_reason is not specified, it is equivalent to max_reason = 1
+  (KMSG_DUMP_PANIC).
 
 - flags: if present, pass ramoops behavioral flags (defaults to 0,
   see include/linux/pstore_ram.h RAMOOPS_FLAG_* for flag values).
diff --git a/Documentation/devicetree/bindings/rng/arm-cctrng.yaml b/Documentation/devicetree/bindings/rng/arm-cctrng.yaml
new file mode 100644
index 000000000000..ca6aad19b6ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/arm-cctrng.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rng/arm-cctrng.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm TrustZone CryptoCell TRNG engine
+
+maintainers:
+  - Hadar Gat <hadar.gat@arm.com>
+
+description: |+
+  Arm TrustZone CryptoCell TRNG (True Random Number Generator) engine.
+
+properties:
+  compatible:
+    enum:
+      - arm,cryptocell-713-trng
+      - arm,cryptocell-703-trng
+
+  interrupts:
+    maxItems: 1
+
+  reg:
+    maxItems: 1
+
+  arm,rosc-ratio:
+    description:
+      Arm TrustZone CryptoCell TRNG engine has 4 ring oscillators.
+      Sampling ratio values for these 4 ring oscillators. (from calibration)
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32-array
+      - items:
+          maxItems: 4
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - interrupts
+  - reg
+  - arm,rosc-ratio
+
+additionalProperties: false
+
+examples:
+  - |
+    arm_cctrng: rng@60000000 {
+        compatible = "arm,cryptocell-713-trng";
+        interrupts = <0 29 4>;
+        reg = <0x60000000 0x10000>;
+        arm,rosc-ratio = <5000 1000 500 0>;
+    };
diff --git a/Documentation/devicetree/bindings/rtc/dw-apb.txt b/Documentation/devicetree/bindings/rtc/dw-apb.txt
deleted file mode 100644
index c703d51abb6c..000000000000
--- a/Documentation/devicetree/bindings/rtc/dw-apb.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-* Designware APB timer
-
-Required properties:
-- compatible: One of:
- 	"snps,dw-apb-timer"
-	"snps,dw-apb-timer-sp" <DEPRECATED>
-	"snps,dw-apb-timer-osc" <DEPRECATED>
-- reg: physical base address of the controller and length of memory mapped
-  region.
-- interrupts: IRQ line for the timer.
-- either clocks+clock-names or clock-frequency properties
-
-Optional properties:
-- clocks	: list of clock specifiers, corresponding to entries in
-		  the clock-names property;
-- clock-names	: should contain "timer" and "pclk" entries, matching entries
-		  in the clocks property.
-- clock-frequency: The frequency in HZ of the timer.
-- clock-freq: For backwards compatibility with picoxcell
-
-If using the clock specifiers, the pclk clock is optional, as not all
-systems may use one.
-
-
-Example:
-	timer@ffe00000 {
-		compatible = "snps,dw-apb-timer";
-		interrupts = <0 170 4>;
-		reg = <0xffe00000 0x1000>;
-		clocks = <&timer_clk>, <&timer_pclk>;
-		clock-names = "timer", "pclk";
-	};
diff --git a/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt b/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt
index ad7ac80a3841..f5e518d099f2 100644
--- a/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt
+++ b/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt
@@ -26,6 +26,16 @@ Required properties:
     "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-qspi" : MSPI+BSPI on BRCMSTB SoCs
     "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
 						   BRCMSTB  SoCs
+    "brcm,spi-bcm7425-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
+    			     			  			    BRCMSTB  SoCs
+    "brcm,spi-bcm7429-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
+    			     			  			    BRCMSTB  SoCs
+    "brcm,spi-bcm7435-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
+    			     			  			    BRCMSTB  SoCs
+    "brcm,spi-bcm7216-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
+    			     			  			    BRCMSTB  SoCs
+    "brcm,spi-bcm7278-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
+    			     			  			    BRCMSTB  SoCs
     "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi"     : MSPI+BSPI on Cygnus, NSP
     "brcm,spi-bcm-qspi", "brcm,spi-ns2-qspi"     : NS2 SoCs
 
diff --git a/Documentation/devicetree/bindings/spi/mikrotik,rb4xx-spi.yaml b/Documentation/devicetree/bindings/spi/mikrotik,rb4xx-spi.yaml
new file mode 100644
index 000000000000..4ddb42a4ae05
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/mikrotik,rb4xx-spi.yaml
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/mikrotik,rb4xx-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MikroTik RB4xx series SPI master
+
+maintainers:
+  - Gabor Juhos <juhosg@openwrt.org>
+  - Bert Vermeulen <bert@biot.com>
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+
+properties:
+  compatible:
+    const: mikrotik,rb4xx-spi
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    spi: spi@1f000000 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        compatible = "mikrotik,rb4xx-spi";
+        reg = <0x1f000000 0x10>;
+    };
+
+...
+\ No newline at end of file
diff --git a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
new file mode 100644
index 000000000000..c54ac059043f
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/renesas,rspi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas (Quad) Serial Peripheral Interface (RSPI/QSPI)
+
+maintainers:
+  - Geert Uytterhoeven <geert+renesas@glider.be>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - renesas,rspi-sh7757    # SH7757
+          - const: renesas,rspi        # Legacy SH
+
+      - items:
+          - enum:
+              - renesas,rspi-r7s72100  # RZ/A1H
+              - renesas,rspi-r7s9210   # RZ/A2
+          - const: renesas,rspi-rz     # RZ/A
+
+      - items:
+          - enum:
+              - renesas,qspi-r8a7743   # RZ/G1M
+              - renesas,qspi-r8a7744   # RZ/G1N
+              - renesas,qspi-r8a7745   # RZ/G1E
+              - renesas,qspi-r8a77470  # RZ/G1C
+              - renesas,qspi-r8a7790   # R-Car H2
+              - renesas,qspi-r8a7791   # R-Car M2-W
+              - renesas,qspi-r8a7792   # R-Car V2H
+              - renesas,qspi-r8a7793   # R-Car M2-N
+              - renesas,qspi-r8a7794   # R-Car E2
+          - const: renesas,qspi        # R-Car Gen2 and RZ/G1
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    oneOf:
+      - items:
+          - description: A combined interrupt
+      - items:
+          - description: Error interrupt (SPEI)
+          - description: Receive Interrupt (SPRI)
+          - description: Transmit Interrupt (SPTI)
+
+  interrupt-names:
+    oneOf:
+      - items:
+          - const: mux
+      - items:
+          - const: error
+          - const: rx
+          - const: tx
+
+  clocks:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  dmas:
+    description:
+      Must contain a list of pairs of references to DMA specifiers, one for
+      transmission, and one for reception.
+
+  dma-names:
+    minItems: 2
+    maxItems: 4
+    items:
+      enum:
+        - tx
+        - rx
+
+  num-cs:
+    description: |
+      Total number of native chip selects.
+      Hardware limitations related to chip selects:
+        - When using GPIO chip selects, at least one native chip select must
+          be left unused, as it will be driven anyway.
+    minimum: 1
+    maximum: 2
+    default: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - power-domains
+  - '#address-cells'
+  - '#size-cells'
+
+allOf:
+  - $ref: spi-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - renesas,rspi-rz
+    then:
+      properties:
+        interrupts:
+          minItems: 3
+      required:
+        - interrupt-names
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - renesas,qspi
+    then:
+      required:
+        - resets
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7791-cpg-mssr.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/r8a7791-sysc.h>
+
+    qspi: spi@e6b10000 {
+            compatible = "renesas,qspi-r8a7791", "renesas,qspi";
+            reg = <0xe6b10000 0x2c>;
+            interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+            clocks = <&cpg CPG_MOD 917>;
+            dmas = <&dmac0 0x17>, <&dmac0 0x18>, <&dmac1 0x17>, <&dmac1 0x18>;
+            dma-names = "tx", "rx", "tx", "rx";
+            power-domains = <&sysc R8A7791_PD_ALWAYS_ON>;
+            resets = <&cpg 917>;
+            num-cs = <1>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+    };
diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt
deleted file mode 100644
index 3ed08ee9feba..000000000000
--- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Synopsys DesignWare AMBA 2.0 Synchronous Serial Interface.
-
-Required properties:
-- compatible : "snps,dw-apb-ssi" or "mscc,<soc>-spi", where soc is "ocelot" or
-  "jaguar2", or "amazon,alpine-dw-apb-ssi"
-- reg : The register base for the controller. For "mscc,<soc>-spi", a second
-  register set is required (named ICPU_CFG:SPI_MST)
-- interrupts : One interrupt, used by the controller.
-- #address-cells : <1>, as required by generic SPI binding.
-- #size-cells : <0>, also as required by generic SPI binding.
-- clocks : phandles for the clocks, see the description of clock-names below.
-   The phandle for the "ssi_clk" is required. The phandle for the "pclk" clock
-   is optional. If a single clock is specified but no clock-name, it is the
-   "ssi_clk" clock. If both clocks are listed, the "ssi_clk" must be first.
-
-Optional properties:
-- clock-names : Contains the names of the clocks:
-    "ssi_clk", for the core clock used to generate the external SPI clock.
-    "pclk", the interface clock, required for register access. If a clock domain
-     used to enable this clock then it should be named "pclk_clkdomain".
-- cs-gpios : Specifies the gpio pins to be used for chipselects.
-- num-cs : The number of chipselects. If omitted, this will default to 4.
-- reg-io-width : The I/O register width (in bytes) implemented by this
-  device.  Supported values are 2 or 4 (the default).
-
-Child nodes as per the generic SPI binding.
-
-Example:
-
-	spi@fff00000 {
-		compatible = "snps,dw-apb-ssi";
-		reg = <0xfff00000 0x1000>;
-		interrupts = <0 154 4>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		clocks = <&spi_m_clk>;
-		num-cs = <2>;
-		cs-gpios = <&gpio0 13 0>,
-			   <&gpio0 14 0>;
-	};
-
diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
new file mode 100644
index 000000000000..c62cbe79f00d
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/snps,dw-apb-ssi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DesignWare AMBA 2.0 Synchronous Serial Interface
+
+maintainers:
+  - Mark Brown <broonie@kernel.org>
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - mscc,ocelot-spi
+              - mscc,jaguar2-spi
+    then:
+      properties:
+        reg:
+          minItems: 2
+
+properties:
+  compatible:
+    oneOf:
+      - description: Generic DW SPI Controller
+        enum:
+          - snps,dw-apb-ssi
+          - snps,dwc-ssi-1.01a
+      - description: Microsemi Ocelot/Jaguar2 SoC SPI Controller
+        items:
+          - enum:
+              - mscc,ocelot-spi
+              - mscc,jaguar2-spi
+          - const: snps,dw-apb-ssi
+      - description: Amazon Alpine SPI Controller
+        const: amazon,alpine-dw-apb-ssi
+      - description: Renesas RZ/N1 SPI Controller
+        items:
+          - const: renesas,rzn1-spi
+          - const: snps,dw-apb-ssi
+      - description: Intel Keem Bay SPI Controller
+        const: intel,keembay-ssi
+
+  reg:
+    minItems: 1
+    items:
+      - description: DW APB SSI controller memory mapped registers
+      - description: SPI MST region map
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    items:
+      - description: SPI Controller reference clock source
+      - description: APB interface clock source
+
+  clock-names:
+    minItems: 1
+    items:
+      - const: ssi_clk
+      - const: pclk
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    const: spi
+
+  reg-io-width:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: I/O register width (in bytes) implemented by this device
+    default: 4
+    enum: [ 2, 4 ]
+
+  num-cs:
+    default: 4
+    minimum: 1
+    maximum: 4
+
+  dmas:
+    items:
+      - description: TX DMA Channel
+      - description: RX DMA Channel
+
+  dma-names:
+    items:
+      - const: tx
+      - const: rx
+
+patternProperties:
+  "^.*@[0-9a-f]+$":
+    type: object
+    properties:
+      reg:
+        minimum: 0
+        maximum: 3
+
+      spi-rx-bus-width:
+        const: 1
+
+      spi-tx-bus-width:
+        const: 1
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - "#address-cells"
+  - "#size-cells"
+  - interrupts
+  - clocks
+
+examples:
+  - |
+    spi@fff00000 {
+      compatible = "snps,dw-apb-ssi";
+      reg = <0xfff00000 0x1000>;
+      #address-cells = <1>;
+      #size-cells = <0>;
+      interrupts = <0 154 4>;
+      clocks = <&spi_m_clk>;
+      num-cs = <2>;
+      cs-gpios = <&gpio0 13 0>,
+                 <&gpio0 14 0>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/spi/socionext,uniphier-spi.yaml b/Documentation/devicetree/bindings/spi/socionext,uniphier-spi.yaml
new file mode 100644
index 000000000000..c25409298bdf
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/socionext,uniphier-spi.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/socionext,uniphier-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Socionext UniPhier SPI controller
+
+description: |
+  UniPhier SoCs have SCSSI which supports SPI single channel.
+
+maintainers:
+  - Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+  - Keiji Hayashibara <hayashibara.keiji@socionext.com>
+
+allOf:
+  - $ref: spi-controller.yaml#
+
+properties:
+  "#address-cells": true
+  "#size-cells": true
+
+  compatible:
+    const: socionext,uniphier-scssi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - resets
+  - "#address-cells"
+  - "#size-cells"
+
+examples:
+  - |
+    spi0: spi@54006000 {
+        compatible = "socionext,uniphier-scssi";
+        reg = <0x54006000 0x100>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+        interrupts = <0 39 4>;
+        clocks = <&peri_clk 11>;
+        resets = <&peri_rst 11>;
+    };
diff --git a/Documentation/devicetree/bindings/spi/spi-dw.txt b/Documentation/devicetree/bindings/spi/spi-dw.txt
deleted file mode 100644
index 7b63ed601990..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-dw.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Synopsys DesignWare SPI master
-
-Required properties:
-- compatible: should be "snps,designware-spi"
-- #address-cells: see spi-bus.txt
-- #size-cells: see spi-bus.txt
-- reg: address and length of the spi master registers
-- interrupts: should contain one interrupt
-- clocks: spi clock phandle
-- num-cs: see spi-bus.txt
-
-Optional properties:
-- cs-gpios: see spi-bus.txt
-
-Example:
-
-spi: spi@4020a000 {
-	compatible = "snps,designware-spi";
-	interrupts = <11 1>;
-	reg = <0x4020a000 0x1000>;
-	clocks = <&pclk>;
-	num-cs = <2>;
-	cs-gpios = <&banka 0 0>;
-};
diff --git a/Documentation/devicetree/bindings/spi/spi-rspi.txt b/Documentation/devicetree/bindings/spi/spi-rspi.txt
deleted file mode 100644
index 421722b93992..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-rspi.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-Device tree configuration for Renesas RSPI/QSPI driver
-
-Required properties:
-- compatible       : For Renesas Serial Peripheral Interface on legacy SH:
-		     "renesas,rspi-<soctype>", "renesas,rspi" as fallback.
-		     For Renesas Serial Peripheral Interface on RZ/A:
-		     "renesas,rspi-<soctype>", "renesas,rspi-rz" as fallback.
-		     For Quad Serial Peripheral Interface on R-Car Gen2 and
-		     RZ/G1 devices:
-		     "renesas,qspi-<soctype>", "renesas,qspi" as fallback.
-		     Examples with soctypes are:
-		        - "renesas,rspi-sh7757" (SH)
-			- "renesas,rspi-r7s72100" (RZ/A1H)
-			- "renesas,rspi-r7s9210" (RZ/A2)
-			- "renesas,qspi-r8a7743" (RZ/G1M)
-			- "renesas,qspi-r8a7744" (RZ/G1N)
-			- "renesas,qspi-r8a7745" (RZ/G1E)
-			- "renesas,qspi-r8a77470" (RZ/G1C)
-			- "renesas,qspi-r8a7790" (R-Car H2)
-			- "renesas,qspi-r8a7791" (R-Car M2-W)
-			- "renesas,qspi-r8a7792" (R-Car V2H)
-			- "renesas,qspi-r8a7793" (R-Car M2-N)
-			- "renesas,qspi-r8a7794" (R-Car E2)
-- reg              : Address start and address range size of the device
-- interrupts       : A list of interrupt-specifiers, one for each entry in
-		     interrupt-names.
-		     If interrupt-names is not present, an interrupt specifier
-		     for a single muxed interrupt.
-- interrupt-names  : A list of interrupt names. Should contain (if present):
-		       - "error" for SPEI,
-		       - "rx" for SPRI,
-		       - "tx" to SPTI,
-		       - "mux" for a single muxed interrupt.
-- num-cs	   : Number of chip selects. Some RSPI cores have more than 1.
-- #address-cells   : Must be <1>
-- #size-cells      : Must be <0>
-
-Optional properties:
-- clocks           : Must contain a reference to the functional clock.
-- dmas             : Must contain a list of two references to DMA specifiers,
-		     one for transmission, and one for reception.
-- dma-names        : Must contain a list of two DMA names, "tx" and "rx".
-
-Pinctrl properties might be needed, too.  See
-Documentation/devicetree/bindings/pinctrl/renesas,*.
-
-Examples:
-
-	spi0: spi@e800c800 {
-		compatible = "renesas,rspi-r7s72100", "renesas,rspi-rz";
-		reg = <0xe800c800 0x24>;
-		interrupts = <0 238 IRQ_TYPE_LEVEL_HIGH>,
-			     <0 239 IRQ_TYPE_LEVEL_HIGH>,
-			     <0 240 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-names = "error", "rx", "tx";
-		interrupt-parent = <&gic>;
-		num-cs = <1>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-	};
-
-	spi: spi@e6b10000 {
-		compatible = "renesas,qspi-r8a7791", "renesas,qspi";
-		reg = <0 0xe6b10000 0 0x2c>;
-		interrupt-parent = <&gic>;
-		interrupts = <0 184 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&mstp9_clks R8A7791_CLK_QSPI_MOD>;
-		num-cs = <1>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		dmas = <&dmac0 0x17>, <&dmac0 0x18>;
-		dma-names = "tx", "rx";
-	};
diff --git a/Documentation/devicetree/bindings/spi/spi-uniphier.txt b/Documentation/devicetree/bindings/spi/spi-uniphier.txt
deleted file mode 100644
index e1201573a29a..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-uniphier.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Socionext UniPhier SPI controller driver
-
-UniPhier SoCs have SCSSI which supports SPI single channel.
-
-Required properties:
- - compatible: should be "socionext,uniphier-scssi"
- - reg: address and length of the spi master registers
- - #address-cells: must be <1>, see spi-bus.txt
- - #size-cells: must be <0>, see spi-bus.txt
- - interrupts: a single interrupt specifier
- - pinctrl-names: should be "default"
- - pinctrl-0: pin control state for the default mode
- - clocks: a phandle to the clock for the device
- - resets: a phandle to the reset control for the device
-
-Example:
-
-spi0: spi@54006000 {
-	compatible = "socionext,uniphier-scssi";
-	reg = <0x54006000 0x100>;
-	#address-cells = <1>;
-	#size-cells = <0>;
-	interrupts = <0 39 4>;
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_spi0>;
-	clocks = <&peri_clk 11>;
-	resets = <&peri_rst 11>;
-};
diff --git a/Documentation/devicetree/bindings/spi/ti_qspi.txt b/Documentation/devicetree/bindings/spi/ti_qspi.txt
index e65fde4a7388..47b184bce414 100644
--- a/Documentation/devicetree/bindings/spi/ti_qspi.txt
+++ b/Documentation/devicetree/bindings/spi/ti_qspi.txt
@@ -29,7 +29,7 @@ modification to bootloader.
 Example:
 
 For am4372:
-qspi: qspi@4b300000 {
+qspi: qspi@47900000 {
 	compatible = "ti,am4372-qspi";
 	reg = <0x47900000 0x100>, <0x30000000 0x4000000>;
 	reg-names = "qspi_base", "qspi_mmap";
diff --git a/Documentation/devicetree/bindings/timer/renesas,em-sti.yaml b/Documentation/devicetree/bindings/timer/renesas,em-sti.yaml
new file mode 100644
index 000000000000..233d74d5402c
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/renesas,em-sti.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/renesas,em-sti.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas EMMA Mobile System Timer
+
+maintainers:
+  - Magnus Damm <magnus.damm@gmail.com>
+
+properties:
+  compatible:
+    const: renesas,em-sti
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    const: sclk
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    timer@e0180000 {
+            compatible = "renesas,em-sti";
+            reg = <0xe0180000 0x54>;
+            interrupts = <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+            clocks = <&sti_sclk>;
+            clock-names = "sclk";
+    };
diff --git a/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml b/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml
new file mode 100644
index 000000000000..5d300efdf0ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/snps,dw-apb-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DesignWare APB Timer
+
+maintainers:
+  - Daniel Lezcano <daniel.lezcano@linaro.org>
+
+properties:
+  compatible:
+    oneOf:
+      - const: snps,dw-apb-timer
+      - enum:
+          - snps,dw-apb-timer-sp
+          - snps,dw-apb-timer-osc
+        deprecated: true
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    items:
+       - description: Timer ticks reference clock source
+       - description: APB interface clock source
+
+  clock-names:
+    minItems: 1
+    items:
+      - const: timer
+      - const: pclk
+
+  clock-frequency: true
+
+  clock-freq:
+    $ref: "/schemas/types.yaml#/definitions/uint32"
+    description: |
+      Has the same meaning as the 'clock-frequency' property - timer clock
+      frequency in HZ, but is defined only for the backwards compatibility
+      with the picoxcell platform.
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+oneOf:
+  - required:
+      - clocks
+      - clock-names
+  - required:
+      - clock-frequency
+  - required:
+      - clock-freq
+
+examples:
+  - |
+    timer@ffe00000 {
+      compatible = "snps,dw-apb-timer";
+      interrupts = <0 170 4>;
+      reg = <0xffe00000 0x1000>;
+      clocks = <&timer_clk>, <&timer_pclk>;
+      clock-names = "timer", "pclk";
+    };
+  - |
+    timer@ffe00000 {
+      compatible = "snps,dw-apb-timer";
+      interrupts = <0 170 4>;
+      reg = <0xffe00000 0x1000>;
+      clocks = <&timer_clk>;
+      clock-names = "timer";
+    };
+  - |
+    timer@ffe00000 {
+      compatible = "snps,dw-apb-timer";
+      interrupts = <0 170 4>;
+      reg = <0xffe00000 0x1000>;
+      clock-frequency = <25000000>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index d3891386d671..997934c58f9a 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -187,6 +187,8 @@ patternProperties:
     description: ChipOne
   "^chipspark,.*":
     description: ChipSPARK
+  "^chrontel,.*":
+    description: Chrontel, Inc.
   "^chrp,.*":
     description: Common Hardware Reference Platform
   "^chunghwa,.*":
@@ -463,6 +465,8 @@ patternProperties:
     description: Infineon Technologies
   "^inforce,.*":
     description: Inforce Computing
+  "^ivo,.*":
+    description: InfoVision Optoelectronics Kunshan Co. Ltd.
   "^ingenic,.*":
     description: Ingenic Semiconductor
   "^innolux,.*":
@@ -488,7 +492,7 @@ patternProperties:
   "^issi,.*":
     description: Integrated Silicon Solutions Inc.
   "^ite,.*":
-    description: ITE Tech, Inc.
+    description: ITE Tech. Inc.
   "^itead,.*":
     description: ITEAD Intelligent Systems Co.Ltd
   "^iwave,.*":
@@ -633,6 +637,8 @@ patternProperties:
     description: Microsoft Corporation
   "^mikroe,.*":
     description: MikroElektronika d.o.o.
+  "^mikrotik,.*":
+    description: MikroTik
   "^miniand,.*":
     description: Miniand Tech
   "^minix,.*":
@@ -1039,6 +1045,8 @@ patternProperties:
     description: Tronsmart
   "^truly,.*":
     description: Truly Semiconductors Limited
+  "^visionox,.*":
+    description: Visionox
   "^tsd,.*":
     description: Theobroma Systems Design und Consulting GmbH
   "^tyan,.*":
diff --git a/Documentation/doc-guide/maintainer-profile.rst b/Documentation/doc-guide/maintainer-profile.rst
index 5afc0ddba40a..755d39f0d407 100644
--- a/Documentation/doc-guide/maintainer-profile.rst
+++ b/Documentation/doc-guide/maintainer-profile.rst
@@ -6,7 +6,7 @@ Documentation subsystem maintainer entry profile
 The documentation "subsystem" is the central coordinating point for the
 kernel's documentation and associated infrastructure.  It covers the
 hierarchy under Documentation/ (with the exception of
-Documentation/device-tree), various utilities under scripts/ and, at least
+Documentation/devicetree), various utilities under scripts/ and, at least
 some of the time, LICENSES/.
 
 It's worth noting, though, that the boundaries of this subsystem are rather
diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst
index c78db28519f7..63dec76d1d8d 100644
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@@ -11,7 +11,7 @@ course not limited to GPU use cases.
 The three main components of this are: (1) dma-buf, representing a
 sg_table and exposed to userspace as a file descriptor to allow passing
 between devices, (2) fence, which provides a mechanism to signal when
-one device as finished access, and (3) reservation, which manages the
+one device has finished access, and (3) reservation, which manages the
 shared or exclusive fence(s) associated with the buffer.
 
 Shared DMA Buffers
@@ -31,7 +31,7 @@ The exporter
  - implements and manages operations in :c:type:`struct dma_buf_ops
    <dma_buf_ops>` for the buffer,
  - allows other users to share the buffer by using dma_buf sharing APIs,
- - manages the details of buffer allocation, wrapped int a :c:type:`struct
+ - manages the details of buffer allocation, wrapped in a :c:type:`struct
    dma_buf <dma_buf>`,
  - decides about the actual backing storage where this allocation happens,
  - and takes care of any migration of scatterlist - for all (shared) users of
diff --git a/Documentation/driver-api/driver-model/device.rst b/Documentation/driver-api/driver-model/device.rst
index 2b868d49d349..b9b022371e85 100644
--- a/Documentation/driver-api/driver-model/device.rst
+++ b/Documentation/driver-api/driver-model/device.rst
@@ -50,10 +50,10 @@ Attributes
 
 Attributes of devices can be exported by a device driver through sysfs.
 
-Please see Documentation/filesystems/sysfs.txt for more information
+Please see Documentation/filesystems/sysfs.rst for more information
 on how sysfs works.
 
-As explained in Documentation/kobject.txt, device attributes must be
+As explained in Documentation/core-api/kobject.rst, device attributes must be
 created before the KOBJ_ADD uevent is generated. The only way to realize
 that is by defining an attribute group.
 
diff --git a/Documentation/driver-api/driver-model/overview.rst b/Documentation/driver-api/driver-model/overview.rst
index d4d1e9b40e0c..e98d0ab4a9b6 100644
--- a/Documentation/driver-api/driver-model/overview.rst
+++ b/Documentation/driver-api/driver-model/overview.rst
@@ -121,4 +121,4 @@ device-specific data or tunable interfaces.
 
 More information about the sysfs directory layout can be found in
 the other documents in this directory and in the file
-Documentation/filesystems/sysfs.txt.
+Documentation/filesystems/sysfs.rst.
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index d4e78cb3ef4d..20c431c8e7be 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -39,6 +39,7 @@ available subsections can be seen below.
    spi
    i2c
    ipmb
+   ipmi
    i3c/index
    interconnect
    devfreq
diff --git a/Documentation/IPMI.txt b/Documentation/driver-api/ipmi.rst
index 5ef1047e2e66..5ef1047e2e66 100644
--- a/Documentation/IPMI.txt
+++ b/Documentation/driver-api/ipmi.rst
diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst
index 08f855cbb4e6..79c0fd39f2af 100644
--- a/Documentation/driver-api/nvdimm/nvdimm.rst
+++ b/Documentation/driver-api/nvdimm/nvdimm.rst
@@ -278,8 +278,8 @@ by a region device with a dynamically assigned id (REGION0 - REGION5).
        be contiguous in DPA-space.
 
     This bus is provided by the kernel under the device
-    /sys/devices/platform/nfit_test.0 when CONFIG_NFIT_TEST is enabled and
-    the nfit_test.ko module is loaded.  This not only test LIBNVDIMM but the
+    /sys/devices/platform/nfit_test.0 when the nfit_test.ko module from
+    tools/testing/nvdimm is loaded.  This not only test LIBNVDIMM but the
     acpi_nfit.ko driver as well.
 
 
diff --git a/Documentation/driver-api/pm/cpuidle.rst b/Documentation/driver-api/pm/cpuidle.rst
index 006cf6db40c6..3588bf078566 100644
--- a/Documentation/driver-api/pm/cpuidle.rst
+++ b/Documentation/driver-api/pm/cpuidle.rst
@@ -68,9 +68,8 @@ only one in the list (that is, the list was empty before) or the value of its
 governor currently in use, or the name of the new governor was passed to the
 kernel as the value of the ``cpuidle.governor=`` command line parameter, the new
 governor will be used from that point on (there can be only one ``CPUIdle``
-governor in use at a time).  Also, if ``cpuidle_sysfs_switch`` is passed to the
-kernel in the command line, user space can choose the ``CPUIdle`` governor to
-use at run time via ``sysfs``.
+governor in use at a time).  Also, user space can choose the ``CPUIdle``
+governor to use at run time via ``sysfs``.
 
 Once registered, ``CPUIdle`` governors cannot be unregistered, so it is not
 practical to put them into loadable kernel modules.
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index f66c7b9126ea..946ad0b94e31 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -349,7 +349,7 @@ the phases are: ``prepare``, ``suspend``, ``suspend_late``, ``suspend_noirq``.
 	PM core will skip the ``suspend``, ``suspend_late`` and
 	``suspend_noirq`` phases as well as all of the corresponding phases of
 	the subsequent device resume for all of these devices.	In that case,
-	the ``->complete`` callback will be invoked directly after the
+	the ``->complete`` callback will be the next one invoked after the
 	``->prepare`` callback and is entirely responsible for putting the
 	device into a consistent state as appropriate.
 
@@ -361,9 +361,9 @@ the phases are: ``prepare``, ``suspend``, ``suspend_late``, ``suspend_noirq``.
 	runtime PM disabled.
 
 	This feature also can be controlled by device drivers by using the
-	``DPM_FLAG_NEVER_SKIP`` and ``DPM_FLAG_SMART_PREPARE`` driver power
-	management flags.  [Typically, they are set at the time the driver is
-	probed against the device in question by passing them to the
+	``DPM_FLAG_NO_DIRECT_COMPLETE`` and ``DPM_FLAG_SMART_PREPARE`` driver
+	power management flags.  [Typically, they are set at the time the driver
+	is probed against the device in question by passing them to the
 	:c:func:`dev_pm_set_driver_flags` helper function.]  If the first of
 	these flags is set, the PM core will not apply the direct-complete
 	procedure described above to the given device and, consequenty, to any
@@ -383,11 +383,15 @@ the phases are: ``prepare``, ``suspend``, ``suspend_late``, ``suspend_noirq``.
 	``->suspend`` methods provided by subsystems (bus types and PM domains
 	in particular) must follow an additional rule regarding what can be done
 	to the devices before their drivers' ``->suspend`` methods are called.
-	Namely, they can only resume the devices from runtime suspend by
-	calling :c:func:`pm_runtime_resume` for them, if that is necessary, and
+	Namely, they may resume the devices from runtime suspend by
+	calling :c:func:`pm_runtime_resume` for them, if that is necessary, but
 	they must not update the state of the devices in any other way at that
 	time (in case the drivers need to resume the devices from runtime
-	suspend in their ``->suspend`` methods).
+	suspend in their ``->suspend`` methods).  In fact, the PM core prevents
+	subsystems or drivers from putting devices into runtime suspend at
+	these times by calling :c:func:`pm_runtime_get_noresume` before issuing
+	the ``->prepare`` callback (and calling :c:func:`pm_runtime_put` after
+	issuing the ``->complete`` callback).
 
     3.	For a number of devices it is convenient to split suspend into the
 	"quiesce device" and "save device state" phases, in which cases
@@ -459,22 +463,22 @@ When resuming from freeze, standby or memory sleep, the phases are:
 
 	Note, however, that new children may be registered below the device as
 	soon as the ``->resume`` callbacks occur; it's not necessary to wait
-	until the ``complete`` phase with that.
+	until the ``complete`` phase runs.
 
 	Moreover, if the preceding ``->prepare`` callback returned a positive
 	number, the device may have been left in runtime suspend throughout the
-	whole system suspend and resume (the ``suspend``, ``suspend_late``,
-	``suspend_noirq`` phases of system suspend and the ``resume_noirq``,
-	``resume_early``, ``resume`` phases of system resume may have been
-	skipped for it).  In that case, the ``->complete`` callback is entirely
+	whole system suspend and resume (its ``->suspend``, ``->suspend_late``,
+	``->suspend_noirq``, ``->resume_noirq``,
+	``->resume_early``, and ``->resume`` callbacks may have been
+	skipped).  In that case, the ``->complete`` callback is entirely
 	responsible for putting the device into a consistent state after system
 	suspend if necessary.  [For example, it may need to queue up a runtime
 	resume request for the device for this purpose.]  To check if that is
 	the case, the ``->complete`` callback can consult the device's
-	``power.direct_complete`` flag.  Namely, if that flag is set when the
-	``->complete`` callback is being run, it has been called directly after
-	the preceding ``->prepare`` and special actions may be required
-	to make the device work correctly afterward.
+	``power.direct_complete`` flag.  If that flag is set when the
+	``->complete`` callback is being run then the direct-complete mechanism
+	was used, and special actions may be required to make the device work
+	correctly afterward.
 
 At the end of these phases, drivers should be as functional as they were before
 suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are
@@ -575,10 +579,12 @@ and the phases are similar.
 
 The ``->poweroff``, ``->poweroff_late`` and ``->poweroff_noirq`` callbacks
 should do essentially the same things as the ``->suspend``, ``->suspend_late``
-and ``->suspend_noirq`` callbacks, respectively.  The only notable difference is
+and ``->suspend_noirq`` callbacks, respectively.  A notable difference is
 that they need not store the device register values, because the registers
 should already have been stored during the ``freeze``, ``freeze_late`` or
-``freeze_noirq`` phases.
+``freeze_noirq`` phases.  Also, on many machines the firmware will power-down
+the entire system, so it is not necessary for the callback to put the device in
+a low-power state.
 
 
 Leaving Hibernation
@@ -764,70 +770,119 @@ device driver in question.
 
 If it is necessary to resume a device from runtime suspend during a system-wide
 transition into a sleep state, that can be done by calling
-:c:func:`pm_runtime_resume` for it from the ``->suspend`` callback (or its
-couterpart for transitions related to hibernation) of either the device's driver
-or a subsystem responsible for it (for example, a bus type or a PM domain).
-That is guaranteed to work by the requirement that subsystems must not change
-the state of devices (possibly except for resuming them from runtime suspend)
+:c:func:`pm_runtime_resume` from the ``->suspend`` callback (or the ``->freeze``
+or ``->poweroff`` callback for transitions related to hibernation) of either the
+device's driver or its subsystem (for example, a bus type or a PM domain).
+However, subsystems must not otherwise change the runtime status of devices
 from their ``->prepare`` and ``->suspend`` callbacks (or equivalent) *before*
 invoking device drivers' ``->suspend`` callbacks (or equivalent).
 
+.. _smart_suspend_flag:
+
+The ``DPM_FLAG_SMART_SUSPEND`` Driver Flag
+------------------------------------------
+
 Some bus types and PM domains have a policy to resume all devices from runtime
 suspend upfront in their ``->suspend`` callbacks, but that may not be really
-necessary if the driver of the device can cope with runtime-suspended devices.
-The driver can indicate that by setting ``DPM_FLAG_SMART_SUSPEND`` in
-:c:member:`power.driver_flags` at the probe time, by passing it to the
-:c:func:`dev_pm_set_driver_flags` helper.  That also may cause middle-layer code
+necessary if the device's driver can cope with runtime-suspended devices.
+The driver can indicate this by setting ``DPM_FLAG_SMART_SUSPEND`` in
+:c:member:`power.driver_flags` at probe time, with the assistance of the
+:c:func:`dev_pm_set_driver_flags` helper routine.
+
+Setting that flag causes the PM core and middle-layer code
 (bus types, PM domains etc.) to skip the ``->suspend_late`` and
 ``->suspend_noirq`` callbacks provided by the driver if the device remains in
-runtime suspend at the beginning of the ``suspend_late`` phase of system-wide
-suspend (or in the ``poweroff_late`` phase of hibernation), when runtime PM
-has been disabled for it, under the assumption that its state should not change
-after that point until the system-wide transition is over (the PM core itself
-does that for devices whose "noirq", "late" and "early" system-wide PM callbacks
-are executed directly by it).  If that happens, the driver's system-wide resume
-callbacks, if present, may still be invoked during the subsequent system-wide
-resume transition and the device's runtime power management status may be set
-to "active" before enabling runtime PM for it, so the driver must be prepared to
-cope with the invocation of its system-wide resume callbacks back-to-back with
-its ``->runtime_suspend`` one (without the intervening ``->runtime_resume`` and
-so on) and the final state of the device must reflect the "active" runtime PM
-status in that case.
+runtime suspend throughout those phases of the system-wide suspend (and
+similarly for the "freeze" and "poweroff" parts of system hibernation).
+[Otherwise the same driver
+callback might be executed twice in a row for the same device, which would not
+be valid in general.]  If the middle-layer system-wide PM callbacks are present
+for the device then they are responsible for skipping these driver callbacks;
+if not then the PM core skips them.  The subsystem callback routines can
+determine whether they need to skip the driver callbacks by testing the return
+value from the :c:func:`dev_pm_skip_suspend` helper function.
+
+In addition, with ``DPM_FLAG_SMART_SUSPEND`` set, the driver's ``->thaw_noirq``
+and ``->thaw_early`` callbacks are skipped in hibernation if the device remained
+in runtime suspend throughout the preceding "freeze" transition.  Again, if the
+middle-layer callbacks are present for the device, they are responsible for
+doing this, otherwise the PM core takes care of it.
+
+
+The ``DPM_FLAG_MAY_SKIP_RESUME`` Driver Flag
+--------------------------------------------
 
 During system-wide resume from a sleep state it's easiest to put devices into
 the full-power state, as explained in :file:`Documentation/power/runtime_pm.rst`.
 [Refer to that document for more information regarding this particular issue as
 well as for information on the device runtime power management framework in
-general.]
-
-However, it often is desirable to leave devices in suspend after system
-transitions to the working state, especially if those devices had been in
+general.]  However, it often is desirable to leave devices in suspend after
+system transitions to the working state, especially if those devices had been in
 runtime suspend before the preceding system-wide suspend (or analogous)
-transition.  Device drivers can use the ``DPM_FLAG_LEAVE_SUSPENDED`` flag to
-indicate to the PM core (and middle-layer code) that they prefer the specific
-devices handled by them to be left suspended and they have no problems with
-skipping their system-wide resume callbacks for this reason.  Whether or not the
-devices will actually be left in suspend may depend on their state before the
-given system suspend-resume cycle and on the type of the system transition under
-way.  In particular, devices are not left suspended if that transition is a
-restore from hibernation, as device states are not guaranteed to be reflected
-by the information stored in the hibernation image in that case.
-
-The middle-layer code involved in the handling of the device is expected to
-indicate to the PM core if the device may be left in suspend by setting its
-:c:member:`power.may_skip_resume` status bit which is checked by the PM core
-during the "noirq" phase of the preceding system-wide suspend (or analogous)
-transition.  The middle layer is then responsible for handling the device as
-appropriate in its "noirq" resume callback, which is executed regardless of
-whether or not the device is left suspended, but the other resume callbacks
-(except for ``->complete``) will be skipped automatically by the PM core if the
-device really can be left in suspend.
-
-For devices whose "noirq", "late" and "early" driver callbacks are invoked
-directly by the PM core, all of the system-wide resume callbacks are skipped if
-``DPM_FLAG_LEAVE_SUSPENDED`` is set and the device is in runtime suspend during
-the ``suspend_noirq`` (or analogous) phase or the transition under way is a
-proper system suspend (rather than anything related to hibernation) and the
-device's wakeup settings are suitable for runtime PM (that is, it cannot
-generate wakeup signals at all or it is allowed to wake up the system from
-sleep).
+transition.
+
+To that end, device drivers can use the ``DPM_FLAG_MAY_SKIP_RESUME`` flag to
+indicate to the PM core and middle-layer code that they allow their "noirq" and
+"early" resume callbacks to be skipped if the device can be left in suspend
+after system-wide PM transitions to the working state.  Whether or not that is
+the case generally depends on the state of the device before the given system
+suspend-resume cycle and on the type of the system transition under way.
+In particular, the "thaw" and "restore" transitions related to hibernation are
+not affected by ``DPM_FLAG_MAY_SKIP_RESUME`` at all.  [All callbacks are
+issued during the "restore" transition regardless of the flag settings,
+and whether or not any driver callbacks
+are skipped during the "thaw" transition depends whether or not the
+``DPM_FLAG_SMART_SUSPEND`` flag is set (see `above <smart_suspend_flag_>`_).
+In addition, a device is not allowed to remain in runtime suspend if any of its
+children will be returned to full power.]
+
+The ``DPM_FLAG_MAY_SKIP_RESUME`` flag is taken into account in combination with
+the :c:member:`power.may_skip_resume` status bit set by the PM core during the
+"suspend" phase of suspend-type transitions.  If the driver or the middle layer
+has a reason to prevent the driver's "noirq" and "early" resume callbacks from
+being skipped during the subsequent system resume transition, it should
+clear :c:member:`power.may_skip_resume` in its ``->suspend``, ``->suspend_late``
+or ``->suspend_noirq`` callback.  [Note that the drivers setting
+``DPM_FLAG_SMART_SUSPEND`` need to clear :c:member:`power.may_skip_resume` in
+their ``->suspend`` callback in case the other two are skipped.]
+
+Setting the :c:member:`power.may_skip_resume` status bit along with the
+``DPM_FLAG_MAY_SKIP_RESUME`` flag is necessary, but generally not sufficient,
+for the driver's "noirq" and "early" resume callbacks to be skipped.  Whether or
+not they should be skipped can be determined by evaluating the
+:c:func:`dev_pm_skip_resume` helper function.
+
+If that function returns ``true``, the driver's "noirq" and "early" resume
+callbacks should be skipped and the device's runtime PM status will be set to
+"suspended" by the PM core.  Otherwise, if the device was runtime-suspended
+during the preceding system-wide suspend transition and its
+``DPM_FLAG_SMART_SUSPEND`` is set, its runtime PM status will be set to
+"active" by the PM core.  [Hence, the drivers that do not set
+``DPM_FLAG_SMART_SUSPEND`` should not expect the runtime PM status of their
+devices to be changed from "suspended" to "active" by the PM core during
+system-wide resume-type transitions.]
+
+If the ``DPM_FLAG_MAY_SKIP_RESUME`` flag is not set for a device, but
+``DPM_FLAG_SMART_SUSPEND`` is set and the driver's "late" and "noirq" suspend
+callbacks are skipped, its system-wide "noirq" and "early" resume callbacks, if
+present, are invoked as usual and the device's runtime PM status is set to
+"active" by the PM core before enabling runtime PM for it.  In that case, the
+driver must be prepared to cope with the invocation of its system-wide resume
+callbacks back-to-back with its ``->runtime_suspend`` one (without the
+intervening ``->runtime_resume`` and system-wide suspend callbacks) and the
+final state of the device must reflect the "active" runtime PM status in that
+case.  [Note that this is not a problem at all if the driver's
+``->suspend_late`` callback pointer points to the same function as its
+``->runtime_suspend`` one and its ``->resume_early`` callback pointer points to
+the same function as the ``->runtime_resume`` one, while none of the other
+system-wide suspend-resume callbacks of the driver are present, for example.]
+
+Likewise, if ``DPM_FLAG_MAY_SKIP_RESUME`` is set for a device, its driver's
+system-wide "noirq" and "early" resume callbacks may be skipped while its "late"
+and "noirq" suspend callbacks may have been executed (in principle, regardless
+of whether or not ``DPM_FLAG_SMART_SUSPEND`` is set).  In that case, the driver
+needs to be able to cope with the invocation of its ``->runtime_resume``
+callback back-to-back with its "late" and "noirq" suspend ones.  [For instance,
+that is not a concern if the driver sets both ``DPM_FLAG_SMART_SUSPEND`` and
+``DPM_FLAG_MAY_SKIP_RESUME`` and uses the same pair of suspend/resume callback
+functions for runtime PM and system-wide suspend/resume.]
diff --git a/Documentation/driver-api/thermal/cpu-idle-cooling.rst b/Documentation/driver-api/thermal/cpu-idle-cooling.rst
index a1c3edecae00..b9f34ceb2a38 100644
--- a/Documentation/driver-api/thermal/cpu-idle-cooling.rst
+++ b/Documentation/driver-api/thermal/cpu-idle-cooling.rst
@@ -1,3 +1,6 @@
+================
+CPU Idle Cooling
+================
 
 Situation:
 ----------
diff --git a/Documentation/driver-api/thermal/index.rst b/Documentation/driver-api/thermal/index.rst
index 5ba61d19c6ae..4cb0b9b6bfb8 100644
--- a/Documentation/driver-api/thermal/index.rst
+++ b/Documentation/driver-api/thermal/index.rst
@@ -8,6 +8,7 @@ Thermal
    :maxdepth: 1
 
    cpu-cooling-api
+   cpu-idle-cooling
    sysfs-api
    power_allocator
 
diff --git a/Documentation/fb/efifb.rst b/Documentation/fb/efifb.rst
index 04840331a00e..6badff64756f 100644
--- a/Documentation/fb/efifb.rst
+++ b/Documentation/fb/efifb.rst
@@ -2,8 +2,10 @@
 What is efifb?
 ==============
 
-This is a generic EFI platform driver for Intel based Apple computers.
-efifb is only for EFI booted Intel Macs.
+This is a generic EFI platform driver for systems with UEFI firmware. The
+system must be booted via the EFI stub for this to be usable. efifb supports
+both firmware with Graphics Output Protocol (GOP) displays as well as older
+systems with only Universal Graphics Adapter (UGA) displays.
 
 Supported Hardware
 ==================
@@ -12,11 +14,14 @@ Supported Hardware
 - Macbook
 - Macbook Pro 15"/17"
 - MacMini
+- ARM/ARM64/X86 systems with UEFI firmware
 
 How to use it?
 ==============
 
-efifb does not have any kind of autodetection of your machine.
+For UGA displays, efifb does not have any kind of autodetection of your
+machine.
+
 You have to add the following kernel parameters in your elilo.conf::
 
 	Macbook :
@@ -28,6 +33,9 @@ You have to add the following kernel parameters in your elilo.conf::
 	Macbook Pro 17", iMac 20" :
 		video=efifb:i20
 
+For GOP displays, efifb can autodetect the display's resolution and framebuffer
+address, so these should work out of the box without any special parameters.
+
 Accepted options:
 
 ======= ===========================================================
@@ -36,4 +44,28 @@ nowc	Don't map the framebuffer write combined. This can be used
 	when large amounts of console data are written.
 ======= ===========================================================
 
+Options for GOP displays:
+
+mode=n
+        The EFI stub will set the mode of the display to mode number n if
+        possible.
+
+<xres>x<yres>[-(rgb|bgr|<bpp>)]
+        The EFI stub will search for a display mode that matches the specified
+        horizontal and vertical resolution, and optionally bit depth, and set
+        the mode of the display to it if one is found. The bit depth can either
+        "rgb" or "bgr" to match specifically those pixel formats, or a number
+        for a mode with matching bits per pixel.
+
+auto
+        The EFI stub will choose the mode with the highest resolution (product
+        of horizontal and vertical resolution). If there are multiple modes
+        with the highest resolution, it will choose one with the highest color
+        depth.
+
+list
+        The EFI stub will list out all the display modes that are available. A
+        specific mode can then be chosen using one of the above options for the
+        next boot.
+
 Edgar Hucek <gimli@dark-green.com>
diff --git a/Documentation/features/core/eBPF-JIT/arch-support.txt b/Documentation/features/core/eBPF-JIT/arch-support.txt
index 9ae6e8d0d10d..9ed964f65224 100644
--- a/Documentation/features/core/eBPF-JIT/arch-support.txt
+++ b/Documentation/features/core/eBPF-JIT/arch-support.txt
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: |  ok  |
diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt
index 304dcd461795..6ff38548923e 100644
--- a/Documentation/features/debug/KASAN/arch-support.txt
+++ b/Documentation/features/debug/KASAN/arch-support.txt
@@ -22,9 +22,9 @@
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
-    |     powerpc: | TODO |
-    |       riscv: | TODO |
-    |        s390: | TODO |
+    |     powerpc: |  ok  |
+    |       riscv: |  ok  |
+    |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt
index 6fb2b0671994..210256f6a4cf 100644
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
index 32b297295fff..97cd7aa74905 100644
--- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
+++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: | TODO |
     |       arm64: | TODO |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt
index e68239b5d2f0..8b316c6e03d4 100644
--- a/Documentation/features/debug/kprobes/arch-support.txt
+++ b/Documentation/features/debug/kprobes/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: |  ok  |
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/debug/kretprobes/arch-support.txt b/Documentation/features/debug/kretprobes/arch-support.txt
index f17131b328e5..b805aada395e 100644
--- a/Documentation/features/debug/kretprobes/arch-support.txt
+++ b/Documentation/features/debug/kretprobes/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: |  ok  |
diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt
index 32bbdfc64c32..12410f606edc 100644
--- a/Documentation/features/debug/stackprotector/arch-support.txt
+++ b/Documentation/features/debug/stackprotector/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
diff --git a/Documentation/features/debug/uprobes/arch-support.txt b/Documentation/features/debug/uprobes/arch-support.txt
index 1c577d0cfc7f..be8acbb95b54 100644
--- a/Documentation/features/debug/uprobes/arch-support.txt
+++ b/Documentation/features/debug/uprobes/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt
index eb28b5c97ca6..895c3b0f6492 100644
--- a/Documentation/features/io/dma-contiguous/arch-support.txt
+++ b/Documentation/features/io/dma-contiguous/arch-support.txt
@@ -16,7 +16,7 @@
     |     hexagon: | TODO |
     |        ia64: | TODO |
     |        m68k: | TODO |
-    |  microblaze: | TODO |
+    |  microblaze: |  ok  |
     |        mips: |  ok  |
     |       nds32: | TODO |
     |       nios2: | TODO |
diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt
index 941fd5b1094d..98cb9d85c55d 100644
--- a/Documentation/features/locking/lockdep/arch-support.txt
+++ b/Documentation/features/locking/lockdep/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
     |        ia64: | TODO |
diff --git a/Documentation/features/perf/kprobes-event/arch-support.txt b/Documentation/features/perf/kprobes-event/arch-support.txt
index d8278bf62b85..518f352fc727 100644
--- a/Documentation/features/perf/kprobes-event/arch-support.txt
+++ b/Documentation/features/perf/kprobes-event/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
     |        ia64: | TODO |
@@ -21,7 +21,7 @@
     |       nds32: |  ok  |
     |       nios2: | TODO |
     |    openrisc: | TODO |
-    |      parisc: | TODO |
+    |      parisc: |  ok  |
     |     powerpc: |  ok  |
     |       riscv: | TODO |
     |        s390: |  ok  |
diff --git a/Documentation/features/perf/perf-regs/arch-support.txt b/Documentation/features/perf/perf-regs/arch-support.txt
index 687d049d9cee..c22cd6f8aa5e 100644
--- a/Documentation/features/perf/perf-regs/arch-support.txt
+++ b/Documentation/features/perf/perf-regs/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/perf/perf-stackdump/arch-support.txt b/Documentation/features/perf/perf-stackdump/arch-support.txt
index 90996e3d18a8..527fe4d0b074 100644
--- a/Documentation/features/perf/perf-stackdump/arch-support.txt
+++ b/Documentation/features/perf/perf-stackdump/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
index 4fe6c3c3be5c..c7b837f735b1 100644
--- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt
+++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index 019131c5acce..8525f1981f19 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -22,7 +22,7 @@
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
-    |     powerpc: | TODO |
+    |     powerpc: |  ok  |
     |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt
index 3d492a34c8ee..2e017387e228 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -17,7 +17,7 @@
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: | TODO |
-    |        mips: | TODO |
+    |        mips: |  ok  |
     |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst
index 671fef39a802..2995279ddc24 100644
--- a/Documentation/filesystems/9p.rst
+++ b/Documentation/filesystems/9p.rst
@@ -192,4 +192,4 @@ For more information on the Plan 9 Operating System check out
 http://plan9.bell-labs.com/plan9
 
 For information on Plan 9 from User Space (Plan 9 applications and libraries
-ported to Linux/BSD/OSX/etc) check out http://swtch.com/plan9
+ported to Linux/BSD/OSX/etc) check out https://9fans.github.io/plan9port/
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.rst
index 7d9f82607562..430f0b40796b 100644
--- a/Documentation/filesystems/automount-support.txt
+++ b/Documentation/filesystems/automount-support.rst
@@ -1,3 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Automount Support
+=================
+
+
 Support is available for filesystems that wish to do automounting
 support (such as kAFS which can be found in fs/afs/ and NFS in
 fs/nfs/). This facility includes allowing in-kernel mounts to be
@@ -5,13 +12,12 @@ performed and mountpoint degradation to be requested. The latter can
 also be requested by userspace.
 
 
-======================
-IN-KERNEL AUTOMOUNTING
+In-Kernel Automounting
 ======================
 
 See section "Mount Traps" of  Documentation/filesystems/autofs.rst
 
-Then from userspace, you can just do something like:
+Then from userspace, you can just do something like::
 
 	[root@andromeda root]# mount -t afs \#root.afs. /afs
 	[root@andromeda root]# ls /afs
@@ -21,7 +27,7 @@ Then from userspace, you can just do something like:
 	[root@andromeda root]# ls /afs/cambridge/afsdoc/
 	ChangeLog  html  LICENSE  pdf  RELNOTES-1.2.2
 
-And then if you look in the mountpoint catalogue, you'll see something like:
+And then if you look in the mountpoint catalogue, you'll see something like::
 
 	[root@andromeda root]# cat /proc/mounts
 	...
@@ -30,8 +36,7 @@ And then if you look in the mountpoint catalogue, you'll see something like:
 	#afsdoc. /afs/cambridge.redhat.com/afsdoc afs rw 0 0
 
 
-===========================
-AUTOMATIC MOUNTPOINT EXPIRY
+Automatic Mountpoint Expiry
 ===========================
 
 Automatic expiration of mountpoints is easy, provided you've mounted the
@@ -43,7 +48,8 @@ To do expiration, you need to follow these steps:
      hung.
 
  (2) When a new mountpoint is created in the ->d_automount method, add
-     the mnt to the list using mnt_set_expiry()
+     the mnt to the list using mnt_set_expiry()::
+
              mnt_set_expiry(newmnt, &afs_vfsmounts);
 
  (3) When you want mountpoints to be expired, call mark_mounts_for_expiry()
@@ -70,8 +76,7 @@ and the copies of those that are on an expiration list will be added to the
 same expiration list.
 
 
-=======================
-USERSPACE DRIVEN EXPIRY
+Userspace Driven Expiry
 =======================
 
 As an alternative, it is possible for userspace to request expiry of any
diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.rst
index c418280c915f..19fbf6b9aa36 100644
--- a/Documentation/filesystems/caching/backend-api.txt
+++ b/Documentation/filesystems/caching/backend-api.rst
@@ -1,6 +1,8 @@
-			  ==========================
-			  FS-CACHE CACHE BACKEND API
-			  ==========================
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+FS-Cache Cache backend API
+==========================
 
 The FS-Cache system provides an API by which actual caches can be supplied to
 FS-Cache for it to then serve out to network filesystems and other interested
@@ -9,15 +11,14 @@ parties.
 This API is declared in <linux/fscache-cache.h>.
 
 
-====================================
-INITIALISING AND REGISTERING A CACHE
+Initialising and Registering a Cache
 ====================================
 
 To start off, a cache definition must be initialised and registered for each
 cache the backend wants to make available.  For instance, CacheFS does this in
 the fill_super() operation on mounting.
 
-The cache definition (struct fscache_cache) should be initialised by calling:
+The cache definition (struct fscache_cache) should be initialised by calling::
 
 	void fscache_init_cache(struct fscache_cache *cache,
 				struct fscache_cache_ops *ops,
@@ -26,17 +27,17 @@ The cache definition (struct fscache_cache) should be initialised by calling:
 
 Where:
 
- (*) "cache" is a pointer to the cache definition;
+   * "cache" is a pointer to the cache definition;
 
- (*) "ops" is a pointer to the table of operations that the backend supports on
+   * "ops" is a pointer to the table of operations that the backend supports on
      this cache; and
 
- (*) "idfmt" is a format and printf-style arguments for constructing a label
+   * "idfmt" is a format and printf-style arguments for constructing a label
      for the cache.
 
 
 The cache should then be registered with FS-Cache by passing a pointer to the
-previously initialised cache definition to:
+previously initialised cache definition to::
 
 	int fscache_add_cache(struct fscache_cache *cache,
 			      struct fscache_object *fsdef,
@@ -44,12 +45,12 @@ previously initialised cache definition to:
 
 Two extra arguments should also be supplied:
 
- (*) "fsdef" which should point to the object representation for the FS-Cache
+   * "fsdef" which should point to the object representation for the FS-Cache
      master index in this cache.  Netfs primary index entries will be created
      here.  FS-Cache keeps the caller's reference to the index object if
      successful and will release it upon withdrawal of the cache.
 
- (*) "tagname" which, if given, should be a text string naming this cache.  If
+   * "tagname" which, if given, should be a text string naming this cache.  If
      this is NULL, the identifier will be used instead.  For CacheFS, the
      identifier is set to name the underlying block device and the tag can be
      supplied by mount.
@@ -58,20 +59,18 @@ This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag
 is already in use.  0 will be returned on success.
 
 
-=====================
-UNREGISTERING A CACHE
+Unregistering a Cache
 =====================
 
 A cache can be withdrawn from the system by calling this function with a
-pointer to the cache definition:
+pointer to the cache definition::
 
 	void fscache_withdraw_cache(struct fscache_cache *cache);
 
 In CacheFS's case, this is called by put_super().
 
 
-========
-SECURITY
+Security
 ========
 
 The cache methods are executed one of two contexts:
@@ -89,8 +88,7 @@ be masqueraded for the duration of the cache driver's access to the cache.
 This is left to the cache to handle; FS-Cache makes no effort in this regard.
 
 
-===================================
-CONTROL AND STATISTICS PRESENTATION
+Control and Statistics Presentation
 ===================================
 
 The cache may present data to the outside world through FS-Cache's interfaces
@@ -101,11 +99,10 @@ is enabled.  This is accessible through the kobject struct fscache_cache::kobj
 and is for use by the cache as it sees fit.
 
 
-========================
-RELEVANT DATA STRUCTURES
+Relevant Data Structures
 ========================
 
- (*) Index/Data file FS-Cache representation cookie:
+   * Index/Data file FS-Cache representation cookie::
 
 	struct fscache_cookie {
 		struct fscache_object_def	*def;
@@ -121,7 +118,7 @@ RELEVANT DATA STRUCTURES
      cache operations.
 
 
- (*) In-cache object representation:
+   * In-cache object representation::
 
 	struct fscache_object {
 		int				debug_id;
@@ -150,7 +147,7 @@ RELEVANT DATA STRUCTURES
      initialised by calling fscache_object_init(object).
 
 
- (*) FS-Cache operation record:
+   * FS-Cache operation record::
 
 	struct fscache_operation {
 		atomic_t		usage;
@@ -173,7 +170,7 @@ RELEVANT DATA STRUCTURES
      an operation needs more processing time, it should be enqueued again.
 
 
- (*) FS-Cache retrieval operation record:
+   * FS-Cache retrieval operation record::
 
 	struct fscache_retrieval {
 		struct fscache_operation op;
@@ -198,7 +195,7 @@ RELEVANT DATA STRUCTURES
      it sees fit.
 
 
- (*) FS-Cache storage operation record:
+   * FS-Cache storage operation record::
 
 	struct fscache_storage {
 		struct fscache_operation op;
@@ -212,16 +209,17 @@ RELEVANT DATA STRUCTURES
      storage.
 
 
-================
-CACHE OPERATIONS
+Cache Operations
 ================
 
 The cache backend provides FS-Cache with a table of operations that can be
 performed on the denizens of the cache.  These are held in a structure of type:
 
-	struct fscache_cache_ops
+	::
+
+	    struct fscache_cache_ops
 
- (*) Name of cache provider [mandatory]:
+   * Name of cache provider [mandatory]::
 
 	const char *name
 
@@ -229,7 +227,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      the backend.
 
 
- (*) Allocate a new object [mandatory]:
+   * Allocate a new object [mandatory]::
 
 	struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
 					       struct fscache_cookie *cookie)
@@ -244,7 +242,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      form once lookup is complete or aborted.
 
 
- (*) Look up and create object [mandatory]:
+   * Look up and create object [mandatory]::
 
 	void (*lookup_object)(struct fscache_object *object)
 
@@ -263,7 +261,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      to abort the lookup of that object.
 
 
- (*) Release lookup data [mandatory]:
+   * Release lookup data [mandatory]::
 
 	void (*lookup_complete)(struct fscache_object *object)
 
@@ -271,7 +269,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      using to perform a lookup.
 
 
- (*) Increment object refcount [mandatory]:
+   * Increment object refcount [mandatory]::
 
 	struct fscache_object *(*grab_object)(struct fscache_object *object)
 
@@ -280,7 +278,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      It should return the object pointer if successful.
 
 
- (*) Lock/Unlock object [mandatory]:
+   * Lock/Unlock object [mandatory]::
 
 	void (*lock_object)(struct fscache_object *object)
 	void (*unlock_object)(struct fscache_object *object)
@@ -289,7 +287,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      to schedule with the lock held, so a spinlock isn't sufficient.
 
 
- (*) Pin/Unpin object [optional]:
+   * Pin/Unpin object [optional]::
 
 	int (*pin_object)(struct fscache_object *object)
 	void (*unpin_object)(struct fscache_object *object)
@@ -299,7 +297,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      enough space in the cache to permit this.
 
 
- (*) Check coherency state of an object [mandatory]:
+   * Check coherency state of an object [mandatory]::
 
 	int (*check_consistency)(struct fscache_object *object)
 
@@ -308,7 +306,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      if they're consistent and -ESTALE otherwise.  -ENOMEM and -ERESTARTSYS
      may also be returned.
 
- (*) Update object [mandatory]:
+   * Update object [mandatory]::
 
 	int (*update_object)(struct fscache_object *object)
 
@@ -317,7 +315,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      obtained by calling object->cookie->def->get_aux()/get_attr().
 
 
- (*) Invalidate data object [mandatory]:
+   * Invalidate data object [mandatory]::
 
 	int (*invalidate_object)(struct fscache_operation *op)
 
@@ -329,7 +327,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      fscache_op_complete() must be called on op before returning.
 
 
- (*) Discard object [mandatory]:
+   * Discard object [mandatory]::
 
 	void (*drop_object)(struct fscache_object *object)
 
@@ -341,7 +339,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      caller.  The caller will invoke the put_object() method as appropriate.
 
 
- (*) Release object reference [mandatory]:
+   * Release object reference [mandatory]::
 
 	void (*put_object)(struct fscache_object *object)
 
@@ -349,7 +347,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      be freed when all the references to it are released.
 
 
- (*) Synchronise a cache [mandatory]:
+   * Synchronise a cache [mandatory]::
 
 	void (*sync)(struct fscache_cache *cache)
 
@@ -357,7 +355,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      device.
 
 
- (*) Dissociate a cache [mandatory]:
+   * Dissociate a cache [mandatory]::
 
 	void (*dissociate_pages)(struct fscache_cache *cache)
 
@@ -365,7 +363,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      cache withdrawal.
 
 
- (*) Notification that the attributes on a netfs file changed [mandatory]:
+   * Notification that the attributes on a netfs file changed [mandatory]::
 
 	int (*attr_changed)(struct fscache_object *object);
 
@@ -386,7 +384,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      execution of this operation.
 
 
- (*) Reserve cache space for an object's data [optional]:
+   * Reserve cache space for an object's data [optional]::
 
 	int (*reserve_space)(struct fscache_object *object, loff_t size);
 
@@ -404,7 +402,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      size if larger than that already.
 
 
- (*) Request page be read from cache [mandatory]:
+   * Request page be read from cache [mandatory]::
 
 	int (*read_or_alloc_page)(struct fscache_retrieval *op,
 				  struct page *page,
@@ -446,7 +444,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      with.  This will complete the operation when all pages are dealt with.
 
 
- (*) Request pages be read from cache [mandatory]:
+   * Request pages be read from cache [mandatory]::
 
 	int (*read_or_alloc_pages)(struct fscache_retrieval *op,
 				   struct list_head *pages,
@@ -457,7 +455,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      of pages instead of one page.  Any pages on which a read operation is
      started must be added to the page cache for the specified mapping and also
      to the LRU.  Such pages must also be removed from the pages list and
-     *nr_pages decremented per page.
+     ``*nr_pages`` decremented per page.
 
      If there was an error such as -ENOMEM, then that should be returned; else
      if one or more pages couldn't be read or allocated, then -ENOBUFS should
@@ -466,7 +464,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      returned.
 
 
- (*) Request page be allocated in the cache [mandatory]:
+   * Request page be allocated in the cache [mandatory]::
 
 	int (*allocate_page)(struct fscache_retrieval *op,
 			     struct page *page,
@@ -482,7 +480,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      allocated, then the netfs page should be marked and 0 returned.
 
 
- (*) Request pages be allocated in the cache [mandatory]:
+   * Request pages be allocated in the cache [mandatory]::
 
 	int (*allocate_pages)(struct fscache_retrieval *op,
 			      struct list_head *pages,
@@ -493,7 +491,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      nr_pages should be treated as for the read_or_alloc_pages() method.
 
 
- (*) Request page be written to cache [mandatory]:
+   * Request page be written to cache [mandatory]::
 
 	int (*write_page)(struct fscache_storage *op,
 			  struct page *page);
@@ -514,7 +512,7 @@ performed on the denizens of the cache.  These are held in a structure of type:
      appropriately.
 
 
- (*) Discard retained per-page metadata [mandatory]:
+   * Discard retained per-page metadata [mandatory]::
 
 	void (*uncache_page)(struct fscache_object *object, struct page *page)
 
@@ -523,13 +521,12 @@ performed on the denizens of the cache.  These are held in a structure of type:
      maintains for this page.
 
 
-==================
-FS-CACHE UTILITIES
+FS-Cache Utilities
 ==================
 
 FS-Cache provides some utilities that a cache backend may make use of:
 
- (*) Note occurrence of an I/O error in a cache:
+   * Note occurrence of an I/O error in a cache::
 
 	void fscache_io_error(struct fscache_cache *cache)
 
@@ -541,7 +538,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      This does not actually withdraw the cache.  That must be done separately.
 
 
- (*) Invoke the retrieval I/O completion function:
+   * Invoke the retrieval I/O completion function::
 
 	void fscache_end_io(struct fscache_retrieval *op, struct page *page,
 			    int error);
@@ -550,8 +547,8 @@ FS-Cache provides some utilities that a cache backend may make use of:
      error value should be 0 if successful and an error otherwise.
 
 
- (*) Record that one or more pages being retrieved or allocated have been dealt
-     with:
+   * Record that one or more pages being retrieved or allocated have been dealt
+     with::
 
 	void fscache_retrieval_complete(struct fscache_retrieval *op,
 					int n_pages);
@@ -562,7 +559,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      completed.
 
 
- (*) Record operation completion:
+   * Record operation completion::
 
 	void fscache_op_complete(struct fscache_operation *op);
 
@@ -571,7 +568,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      one or more pending operations to start running.
 
 
- (*) Set highest store limit:
+   * Set highest store limit::
 
 	void fscache_set_store_limit(struct fscache_object *object,
 				     loff_t i_size);
@@ -581,7 +578,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      rejected by fscache_read_alloc_page() and co with -ENOBUFS.
 
 
- (*) Mark pages as being cached:
+   * Mark pages as being cached::
 
 	void fscache_mark_pages_cached(struct fscache_retrieval *op,
 				       struct pagevec *pagevec);
@@ -590,7 +587,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      the netfs must call fscache_uncache_page() to unmark the pages.
 
 
- (*) Perform coherency check on an object:
+   * Perform coherency check on an object::
 
 	enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
 						const void *data,
@@ -603,29 +600,26 @@ FS-Cache provides some utilities that a cache backend may make use of:
 
      One of three values will be returned:
 
-	(*) FSCACHE_CHECKAUX_OKAY
-
+	FSCACHE_CHECKAUX_OKAY
 	    The coherency data indicates the object is valid as is.
 
-	(*) FSCACHE_CHECKAUX_NEEDS_UPDATE
-
+	FSCACHE_CHECKAUX_NEEDS_UPDATE
 	    The coherency data needs updating, but otherwise the object is
 	    valid.
 
-	(*) FSCACHE_CHECKAUX_OBSOLETE
-
+	FSCACHE_CHECKAUX_OBSOLETE
 	    The coherency data indicates that the object is obsolete and should
 	    be discarded.
 
 
- (*) Initialise a freshly allocated object:
+   * Initialise a freshly allocated object::
 
 	void fscache_object_init(struct fscache_object *object);
 
      This initialises all the fields in an object representation.
 
 
- (*) Indicate the destruction of an object:
+   * Indicate the destruction of an object::
 
 	void fscache_object_destroyed(struct fscache_cache *cache);
 
@@ -635,7 +629,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      all the objects.
 
 
- (*) Indicate negative lookup on an object:
+   * Indicate negative lookup on an object::
 
 	void fscache_object_lookup_negative(struct fscache_object *object);
 
@@ -650,7 +644,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      significant - all subsequent calls are ignored.
 
 
- (*) Indicate an object has been obtained:
+   * Indicate an object has been obtained::
 
 	void fscache_obtained_object(struct fscache_object *object);
 
@@ -667,7 +661,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
         (2) that writes may now proceed against this object.
 
 
- (*) Indicate that object lookup failed:
+   * Indicate that object lookup failed::
 
 	void fscache_object_lookup_error(struct fscache_object *object);
 
@@ -676,7 +670,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      as possible.
 
 
- (*) Indicate that a stale object was found and discarded:
+   * Indicate that a stale object was found and discarded::
 
 	void fscache_object_retrying_stale(struct fscache_object *object);
 
@@ -685,7 +679,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      discarded from the cache and the lookup will be performed again.
 
 
- (*) Indicate that the caching backend killed an object:
+   * Indicate that the caching backend killed an object::
 
 	void fscache_object_mark_killed(struct fscache_object *object,
 					enum fscache_why_object_killed why);
@@ -693,13 +687,20 @@ FS-Cache provides some utilities that a cache backend may make use of:
      This is called to indicate that the cache backend preemptively killed an
      object.  The why parameter should be set to indicate the reason:
 
-	FSCACHE_OBJECT_IS_STALE - the object was stale and needs discarding.
-	FSCACHE_OBJECT_NO_SPACE - there was insufficient cache space
-	FSCACHE_OBJECT_WAS_RETIRED - the object was retired when relinquished.
-	FSCACHE_OBJECT_WAS_CULLED - the object was culled to make space.
+	FSCACHE_OBJECT_IS_STALE
+	    - the object was stale and needs discarding.
+
+	FSCACHE_OBJECT_NO_SPACE
+	    - there was insufficient cache space
+
+	FSCACHE_OBJECT_WAS_RETIRED
+	    - the object was retired when relinquished.
+
+	FSCACHE_OBJECT_WAS_CULLED
+	    - the object was culled to make space.
 
 
- (*) Get and release references on a retrieval record:
+   * Get and release references on a retrieval record::
 
 	void fscache_get_retrieval(struct fscache_retrieval *op);
 	void fscache_put_retrieval(struct fscache_retrieval *op);
@@ -708,7 +709,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      asynchronous data retrieval and block allocation.
 
 
- (*) Enqueue a retrieval record for processing.
+   * Enqueue a retrieval record for processing::
 
 	void fscache_enqueue_retrieval(struct fscache_retrieval *op);
 
@@ -718,7 +719,7 @@ FS-Cache provides some utilities that a cache backend may make use of:
      within the callback function.
 
 
- (*) List of object state names:
+   * List of object state names::
 
 	const char *fscache_object_states[];
 
diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.rst
index 28aefcbb1442..65d3db476765 100644
--- a/Documentation/filesystems/caching/cachefiles.txt
+++ b/Documentation/filesystems/caching/cachefiles.rst
@@ -1,8 +1,10 @@
-	       ===============================================
-	       CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
-	       ===============================================
+.. SPDX-License-Identifier: GPL-2.0
 
-Contents:
+===============================================
+CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
+===============================================
+
+.. Contents:
 
  (*) Overview.
 
@@ -27,8 +29,8 @@ Contents:
  (*) Debugging.
 
 
-========
-OVERVIEW
+
+Overview
 ========
 
 CacheFiles is a caching backend that's meant to use as a cache a directory on
@@ -58,8 +60,8 @@ spare space and automatically contract when the set of data requires more
 space.
 
 
-============
-REQUIREMENTS
+
+Requirements
 ============
 
 The use of CacheFiles and its daemon requires the following features to be
@@ -79,84 +81,70 @@ It is strongly recommended that the "dir_index" option is enabled on Ext3
 filesystems being used as a cache.
 
 
-=============
-CONFIGURATION
+Configuration
 =============
 
 The cache is configured by a script in /etc/cachefilesd.conf.  These commands
 set up cache ready for use.  The following script commands are available:
 
- (*) brun <N>%
- (*) bcull <N>%
- (*) bstop <N>%
- (*) frun <N>%
- (*) fcull <N>%
- (*) fstop <N>%
-
+ brun <N>%, bcull <N>%, bstop <N>%, frun <N>%, fcull <N>%, fstop <N>%
 	Configure the culling limits.  Optional.  See the section on culling
 	The defaults are 7% (run), 5% (cull) and 1% (stop) respectively.
 
 	The commands beginning with a 'b' are file space (block) limits, those
 	beginning with an 'f' are file count limits.
 
- (*) dir <path>
-
+ dir <path>
 	Specify the directory containing the root of the cache.  Mandatory.
 
- (*) tag <name>
-
+ tag <name>
 	Specify a tag to FS-Cache to use in distinguishing multiple caches.
 	Optional.  The default is "CacheFiles".
 
- (*) debug <mask>
-
+ debug <mask>
 	Specify a numeric bitmask to control debugging in the kernel module.
 	Optional.  The default is zero (all off).  The following values can be
 	OR'd into the mask to collect various information:
 
+		==	=================================================
 		1	Turn on trace of function entry (_enter() macros)
 		2	Turn on trace of function exit (_leave() macros)
 		4	Turn on trace of internal debug points (_debug())
+		==	=================================================
 
-	This mask can also be set through sysfs, eg:
+	This mask can also be set through sysfs, eg::
 
 		echo 5 >/sys/modules/cachefiles/parameters/debug
 
 
-==================
-STARTING THE CACHE
+Starting the Cache
 ==================
 
 The cache is started by running the daemon.  The daemon opens the cache device,
 configures the cache and tells it to begin caching.  At that point the cache
 binds to fscache and the cache becomes live.
 
-The daemon is run as follows:
+The daemon is run as follows::
 
 	/sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]
 
 The flags are:
 
- (*) -d
-
+ ``-d``
 	Increase the debugging level.  This can be specified multiple times and
 	is cumulative with itself.
 
- (*) -s
-
+ ``-s``
 	Send messages to stderr instead of syslog.
 
- (*) -n
-
+ ``-n``
 	Don't daemonise and go into background.
 
- (*) -f <configfile>
-
+ ``-f <configfile>``
 	Use an alternative configuration file rather than the default one.
 
 
-===============
-THINGS TO AVOID
+Things to Avoid
 ===============
 
 Do not mount other things within the cache as this will cause problems.  The
@@ -179,8 +167,7 @@ Do not chmod files in the cache.  The module creates things with minimal
 permissions to prevent random users being able to access them directly.
 
 
-=============
-CACHE CULLING
+Cache Culling
 =============
 
 The cache may need culling occasionally to make space.  This involves
@@ -192,27 +179,21 @@ Cache culling is done on the basis of the percentage of blocks and the
 percentage of files available in the underlying filesystem.  There are six
 "limits":
 
- (*) brun
- (*) frun
-
+ brun, frun
      If the amount of free space and the number of available files in the cache
      rises above both these limits, then culling is turned off.
 
- (*) bcull
- (*) fcull
-
+ bcull, fcull
      If the amount of available space or the number of available files in the
      cache falls below either of these limits, then culling is started.
 
- (*) bstop
- (*) fstop
-
+ bstop, fstop
      If the amount of available space or the number of available files in the
      cache falls below either of these limits, then no further allocation of
      disk space or files is permitted until culling has raised things above
      these limits again.
 
-These must be configured thusly:
+These must be configured thusly::
 
 	0 <= bstop < bcull < brun < 100
 	0 <= fstop < fcull < frun < 100
@@ -226,16 +207,14 @@ started as soon as space is made in the table.  Objects will be skipped if
 their atimes have changed or if the kernel module says it is still using them.
 
 
-===============
-CACHE STRUCTURE
+Cache Structure
 ===============
 
 The CacheFiles module will create two directories in the directory it was
 given:
 
- (*) cache/
-
- (*) graveyard/
+ * cache/
+ * graveyard/
 
 The active cache objects all reside in the first directory.  The CacheFiles
 kernel module moves any retired or culled objects that it can't simply unlink
@@ -261,10 +240,10 @@ If an object has children, then it will be represented as a directory.
 Immediately in the representative directory are a collection of directories
 named for hash values of the child object keys with an '@' prepended.  Into
 this directory, if possible, will be placed the representations of the child
-objects:
+objects::
 
-	INDEX     INDEX      INDEX                             DATA FILES
-	========= ========== ================================= ================
+	 /INDEX    /INDEX     /INDEX                            /DATA FILES
+	/=========/==========/=================================/================
 	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
 	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
 	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
@@ -275,7 +254,7 @@ If the key is so long that it exceeds NAME_MAX with the decorations added on to
 it, then it will be cut into pieces, the first few of which will be used to
 make a nest of directories, and the last one of which will be the objects
 inside the last directory.  The names of the intermediate directories will have
-'+' prepended:
+'+' prepended::
 
 	J1223/@23/+xy...z/+kl...m/Epqr
 
@@ -288,11 +267,13 @@ To handle this, CacheFiles will use a suitably printable filename directly and
 "base-64" encode ones that aren't directly suitable.  The two versions of
 object filenames indicate the encoding:
 
+	===============	===============	===============
 	OBJECT TYPE	PRINTABLE	ENCODED
 	===============	===============	===============
 	Index		"I..."		"J..."
 	Data		"D..."		"E..."
 	Special		"S..."		"T..."
+	===============	===============	===============
 
 Intermediate directories are always "@" or "+" as appropriate.
 
@@ -307,8 +288,7 @@ Note that CacheFiles will erase from the cache any file it doesn't recognise or
 any file of an incorrect type (such as a FIFO file or a device file).
 
 
-==========================
-SECURITY MODEL AND SELINUX
+Security Model and SELinux
 ==========================
 
 CacheFiles is implemented to deal properly with the LSM security features of
@@ -331,26 +311,26 @@ When the CacheFiles module is asked to bind to its cache, it:
 
  (1) Finds the security label attached to the root cache directory and uses
      that as the security label with which it will create files.  By default,
-     this is:
+     this is::
 
 	cachefiles_var_t
 
  (2) Finds the security label of the process which issued the bind request
-     (presumed to be the cachefilesd daemon), which by default will be:
+     (presumed to be the cachefilesd daemon), which by default will be::
 
 	cachefilesd_t
 
      and asks LSM to supply a security ID as which it should act given the
-     daemon's label.  By default, this will be:
+     daemon's label.  By default, this will be::
 
 	cachefiles_kernel_t
 
      SELinux transitions the daemon's security ID to the module's security ID
-     based on a rule of this form in the policy.
+     based on a rule of this form in the policy::
 
 	type_transition <daemon's-ID> kernel_t : process <module's-ID>;
 
-     For instance:
+     For instance::
 
 	type_transition cachefilesd_t kernel_t : process cachefiles_kernel_t;
 
@@ -370,7 +350,7 @@ There are policy source files available in:
 
 	http://people.redhat.com/~dhowells/fscache/cachefilesd-0.8.tar.bz2
 
-and later versions.  In that tarball, see the files:
+and later versions.  In that tarball, see the files::
 
 	cachefilesd.te
 	cachefilesd.fc
@@ -379,7 +359,7 @@ and later versions.  In that tarball, see the files:
 They are built and installed directly by the RPM.
 
 If a non-RPM based system is being used, then copy the above files to their own
-directory and run:
+directory and run::
 
 	make -f /usr/share/selinux/devel/Makefile
 	semodule -i cachefilesd.pp
@@ -394,7 +374,7 @@ an auxiliary policy must be installed to label the alternate location of the
 cache.
 
 For instructions on how to add an auxiliary policy to enable the cache to be
-located elsewhere when SELinux is in enforcing mode, please see:
+located elsewhere when SELinux is in enforcing mode, please see::
 
 	/usr/share/doc/cachefilesd-*/move-cache.txt
 
@@ -402,8 +382,7 @@ When the cachefilesd rpm is installed; alternatively, the document can be found
 in the sources.
 
 
-==================
-A NOTE ON SECURITY
+A Note on Security
 ==================
 
 CacheFiles makes use of the split security in the task_struct.  It allocates
@@ -445,17 +424,18 @@ for CacheFiles to run in a context of a specific security label, or to create
 files and directories with another security label.
 
 
-=======================
-STATISTICAL INFORMATION
+Statistical Information
 =======================
 
-If FS-Cache is compiled with the following option enabled:
+If FS-Cache is compiled with the following option enabled::
 
 	CONFIG_CACHEFILES_HISTOGRAM=y
 
 then it will gather certain statistics and display them through a proc file.
 
- (*) /proc/fs/cachefiles/histogram
+ /proc/fs/cachefiles/histogram
+
+     ::
 
 	cat /proc/fs/cachefiles/histogram
 	JIFS  SECS  LOOKUPS   MKDIRS    CREATES
@@ -465,36 +445,39 @@ then it will gather certain statistics and display them through a proc file.
      between 0 jiffies and HZ-1 jiffies a variety of tasks took to run.  The
      columns are as follows:
 
+	=======		=======================================================
 	COLUMN		TIME MEASUREMENT
 	=======		=======================================================
 	LOOKUPS		Length of time to perform a lookup on the backing fs
 	MKDIRS		Length of time to perform a mkdir on the backing fs
 	CREATES		Length of time to perform a create on the backing fs
+	=======		=======================================================
 
      Each row shows the number of events that took a particular range of times.
      Each step is 1 jiffy in size.  The JIFS column indicates the particular
      jiffy range covered, and the SECS field the equivalent number of seconds.
 
 
-=========
-DEBUGGING
+Debugging
 =========
 
 If CONFIG_CACHEFILES_DEBUG is enabled, the CacheFiles facility can have runtime
-debugging enabled by adjusting the value in:
+debugging enabled by adjusting the value in::
 
 	/sys/module/cachefiles/parameters/debug
 
 This is a bitmask of debugging streams to enable:
 
+	=======	=======	===============================	=======================
 	BIT	VALUE	STREAM				POINT
 	=======	=======	===============================	=======================
 	0	1	General				Function entry trace
 	1	2					Function exit trace
 	2	4					General
+	=======	=======	===============================	=======================
 
 The appropriate set of values should be OR'd together and the result written to
-the control file.  For example:
+the control file.  For example::
 
 	echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug
 
diff --git a/Documentation/filesystems/caching/fscache.rst b/Documentation/filesystems/caching/fscache.rst
new file mode 100644
index 000000000000..70de86922b6a
--- /dev/null
+++ b/Documentation/filesystems/caching/fscache.rst
@@ -0,0 +1,565 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+General Filesystem Caching
+==========================
+
+Overview
+========
+
+This facility is a general purpose cache for network filesystems, though it
+could be used for caching other things such as ISO9660 filesystems too.
+
+FS-Cache mediates between cache backends (such as CacheFS) and network
+filesystems::
+
+	+---------+
+	|         |                        +--------------+
+	|   NFS   |--+                     |              |
+	|         |  |                 +-->|   CacheFS    |
+	+---------+  |   +----------+  |   |  /dev/hda5   |
+	             |   |          |  |   +--------------+
+	+---------+  +-->|          |  |
+	|         |      |          |--+
+	|   AFS   |----->| FS-Cache |
+	|         |      |          |--+
+	+---------+  +-->|          |  |
+	             |   |          |  |   +--------------+
+	+---------+  |   +----------+  |   |              |
+	|         |  |                 +-->|  CacheFiles  |
+	|  ISOFS  |--+                     |  /var/cache  |
+	|         |                        +--------------+
+	+---------+
+
+Or to look at it another way, FS-Cache is a module that provides a caching
+facility to a network filesystem such that the cache is transparent to the
+user::
+
+	+---------+
+	|         |
+	| Server  |
+	|         |
+	+---------+
+	     |                  NETWORK
+	~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	     |
+	     |           +----------+
+	     V           |          |
+	+---------+      |          |
+	|         |      |          |
+	|   NFS   |----->| FS-Cache |
+	|         |      |          |--+
+	+---------+      |          |  |   +--------------+   +--------------+
+	     |           |          |  |   |              |   |              |
+	     V           +----------+  +-->|  CacheFiles  |-->|  Ext3        |
+	+---------+                        |  /var/cache  |   |  /dev/sda6   |
+	|         |                        +--------------+   +--------------+
+	|   VFS   |                                ^                     ^
+	|         |                                |                     |
+	+---------+                                +--------------+      |
+	     |                  KERNEL SPACE                      |      |
+	~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|~~~~~~|~~~~
+	     |                  USER SPACE                        |      |
+	     V                                                    |      |
+	+---------+                                           +--------------+
+	|         |                                           |              |
+	| Process |                                           | cachefilesd  |
+	|         |                                           |              |
+	+---------+                                           +--------------+
+
+
+FS-Cache does not follow the idea of completely loading every netfs file
+opened in its entirety into a cache before permitting it to be accessed and
+then serving the pages out of that cache rather than the netfs inode because:
+
+ (1) It must be practical to operate without a cache.
+
+ (2) The size of any accessible file must not be limited to the size of the
+     cache.
+
+ (3) The combined size of all opened files (this includes mapped libraries)
+     must not be limited to the size of the cache.
+
+ (4) The user should not be forced to download an entire file just to do a
+     one-off access of a small portion of it (such as might be done with the
+     "file" program).
+
+It instead serves the cache out in PAGE_SIZE chunks as and when requested by
+the netfs('s) using it.
+
+
+FS-Cache provides the following facilities:
+
+ (1) More than one cache can be used at once.  Caches can be selected
+     explicitly by use of tags.
+
+ (2) Caches can be added / removed at any time.
+
+ (3) The netfs is provided with an interface that allows either party to
+     withdraw caching facilities from a file (required for (2)).
+
+ (4) The interface to the netfs returns as few errors as possible, preferring
+     rather to let the netfs remain oblivious.
+
+ (5) Cookies are used to represent indices, files and other objects to the
+     netfs.  The simplest cookie is just a NULL pointer - indicating nothing
+     cached there.
+
+ (6) The netfs is allowed to propose - dynamically - any index hierarchy it
+     desires, though it must be aware that the index search function is
+     recursive, stack space is limited, and indices can only be children of
+     indices.
+
+ (7) Data I/O is done direct to and from the netfs's pages.  The netfs
+     indicates that page A is at index B of the data-file represented by cookie
+     C, and that it should be read or written.  The cache backend may or may
+     not start I/O on that page, but if it does, a netfs callback will be
+     invoked to indicate completion.  The I/O may be either synchronous or
+     asynchronous.
+
+ (8) Cookies can be "retired" upon release.  At this point FS-Cache will mark
+     them as obsolete and the index hierarchy rooted at that point will get
+     recycled.
+
+ (9) The netfs provides a "match" function for index searches.  In addition to
+     saying whether a match was made or not, this can also specify that an
+     entry should be updated or deleted.
+
+(10) As much as possible is done asynchronously.
+
+
+FS-Cache maintains a virtual indexing tree in which all indices, files, objects
+and pages are kept.  Bits of this tree may actually reside in one or more
+caches::
+
+                                            FSDEF
+                                              |
+                         +------------------------------------+
+                         |                                    |
+                        NFS                                  AFS
+                         |                                    |
+            +--------------------------+                +-----------+
+            |                          |                |           |
+         homedir                     mirror          afs.org   redhat.com
+            |                          |                            |
+      +------------+           +---------------+              +----------+
+      |            |           |               |              |          |
+    00001        00002       00007           00125        vol00001   vol00002
+      |            |           |               |                         |
+  +---+---+     +-----+      +---+      +------+------+            +-----+----+
+  |   |   |     |     |      |   |      |      |      |            |     |    |
+ PG0 PG1 PG2   PG0  XATTR   PG0 PG1   DIRENT DIRENT DIRENT        R/W   R/O  Bak
+                      |                                            |
+                     PG0                                       +-------+
+                                                               |       |
+                                                             00001   00003
+                                                               |
+                                                           +---+---+
+                                                           |   |   |
+                                                          PG0 PG1 PG2
+
+In the example above, you can see two netfs's being backed: NFS and AFS.  These
+have different index hierarchies:
+
+   * The NFS primary index contains per-server indices.  Each server index is
+     indexed by NFS file handles to get data file objects.  Each data file
+     objects can have an array of pages, but may also have further child
+     objects, such as extended attributes and directory entries.  Extended
+     attribute objects themselves have page-array contents.
+
+   * The AFS primary index contains per-cell indices.  Each cell index contains
+     per-logical-volume indices.  Each of volume index contains up to three
+     indices for the read-write, read-only and backup mirrors of those volumes.
+     Each of these contains vnode data file objects, each of which contains an
+     array of pages.
+
+The very top index is the FS-Cache master index in which individual netfs's
+have entries.
+
+Any index object may reside in more than one cache, provided it only has index
+children.  Any index with non-index object children will be assumed to only
+reside in one cache.
+
+
+The netfs API to FS-Cache can be found in:
+
+	Documentation/filesystems/caching/netfs-api.rst
+
+The cache backend API to FS-Cache can be found in:
+
+	Documentation/filesystems/caching/backend-api.rst
+
+A description of the internal representations and object state machine can be
+found in:
+
+	Documentation/filesystems/caching/object.rst
+
+
+Statistical Information
+=======================
+
+If FS-Cache is compiled with the following options enabled::
+
+	CONFIG_FSCACHE_STATS=y
+	CONFIG_FSCACHE_HISTOGRAM=y
+
+then it will gather certain statistics and display them through a number of
+proc files.
+
+/proc/fs/fscache/stats
+----------------------
+
+     This shows counts of a number of events that can happen in FS-Cache:
+
++--------------+-------+-------------------------------------------------------+
+|CLASS         |EVENT  |MEANING                                                |
++==============+=======+=======================================================+
+|Cookies       |idx=N  |Number of index cookies allocated                      |
++              +-------+-------------------------------------------------------+
+|              |dat=N  |Number of data storage cookies allocated               |
++              +-------+-------------------------------------------------------+
+|              |spc=N  |Number of special cookies allocated                    |
++--------------+-------+-------------------------------------------------------+
+|Objects       |alc=N  |Number of objects allocated                            |
++              +-------+-------------------------------------------------------+
+|              |nal=N  |Number of object allocation failures                   |
++              +-------+-------------------------------------------------------+
+|              |avl=N  |Number of objects that reached the available state     |
++              +-------+-------------------------------------------------------+
+|              |ded=N  |Number of objects that reached the dead state          |
++--------------+-------+-------------------------------------------------------+
+|ChkAux        |non=N  |Number of objects that didn't have a coherency check   |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of objects that passed a coherency check        |
++              +-------+-------------------------------------------------------+
+|              |upd=N  |Number of objects that needed a coherency data update  |
++              +-------+-------------------------------------------------------+
+|              |obs=N  |Number of objects that were declared obsolete          |
++--------------+-------+-------------------------------------------------------+
+|Pages         |mrk=N  |Number of pages marked as being cached                 |
+|              |unc=N  |Number of uncache page requests seen                   |
++--------------+-------+-------------------------------------------------------+
+|Acquire       |n=N    |Number of acquire cookie requests seen                 |
++              +-------+-------------------------------------------------------+
+|              |nul=N  |Number of acq reqs given a NULL parent                 |
++              +-------+-------------------------------------------------------+
+|              |noc=N  |Number of acq reqs rejected due to no cache available  |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of acq reqs succeeded                           |
++              +-------+-------------------------------------------------------+
+|              |nbf=N  |Number of acq reqs rejected due to error               |
++              +-------+-------------------------------------------------------+
+|              |oom=N  |Number of acq reqs failed on ENOMEM                    |
++--------------+-------+-------------------------------------------------------+
+|Lookups       |n=N    |Number of lookup calls made on cache backends          |
++              +-------+-------------------------------------------------------+
+|              |neg=N  |Number of negative lookups made                        |
++              +-------+-------------------------------------------------------+
+|              |pos=N  |Number of positive lookups made                        |
++              +-------+-------------------------------------------------------+
+|              |crt=N  |Number of objects created by lookup                    |
++              +-------+-------------------------------------------------------+
+|              |tmo=N  |Number of lookups timed out and requeued               |
++--------------+-------+-------------------------------------------------------+
+|Updates       |n=N    |Number of update cookie requests seen                  |
++              +-------+-------------------------------------------------------+
+|              |nul=N  |Number of upd reqs given a NULL parent                 |
++              +-------+-------------------------------------------------------+
+|              |run=N  |Number of upd reqs granted CPU time                    |
++--------------+-------+-------------------------------------------------------+
+|Relinqs       |n=N    |Number of relinquish cookie requests seen              |
++              +-------+-------------------------------------------------------+
+|              |nul=N  |Number of rlq reqs given a NULL parent                 |
++              +-------+-------------------------------------------------------+
+|              |wcr=N  |Number of rlq reqs waited on completion of creation    |
++--------------+-------+-------------------------------------------------------+
+|AttrChg       |n=N    |Number of attribute changed requests seen              |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of attr changed requests queued                 |
++              +-------+-------------------------------------------------------+
+|              |nbf=N  |Number of attr changed rejected -ENOBUFS               |
++              +-------+-------------------------------------------------------+
+|              |oom=N  |Number of attr changed failed -ENOMEM                  |
++              +-------+-------------------------------------------------------+
+|              |run=N  |Number of attr changed ops given CPU time              |
++--------------+-------+-------------------------------------------------------+
+|Allocs        |n=N    |Number of allocation requests seen                     |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of successful alloc reqs                        |
++              +-------+-------------------------------------------------------+
+|              |wt=N   |Number of alloc reqs that waited on lookup completion  |
++              +-------+-------------------------------------------------------+
+|              |nbf=N  |Number of alloc reqs rejected -ENOBUFS                 |
++              +-------+-------------------------------------------------------+
+|              |int=N  |Number of alloc reqs aborted -ERESTARTSYS              |
++              +-------+-------------------------------------------------------+
+|              |ops=N  |Number of alloc reqs submitted                         |
++              +-------+-------------------------------------------------------+
+|              |owt=N  |Number of alloc reqs waited for CPU time               |
++              +-------+-------------------------------------------------------+
+|              |abt=N  |Number of alloc reqs aborted due to object death       |
++--------------+-------+-------------------------------------------------------+
+|Retrvls       |n=N    |Number of retrieval (read) requests seen               |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of successful retr reqs                         |
++              +-------+-------------------------------------------------------+
+|              |wt=N   |Number of retr reqs that waited on lookup completion   |
++              +-------+-------------------------------------------------------+
+|              |nod=N  |Number of retr reqs returned -ENODATA                  |
++              +-------+-------------------------------------------------------+
+|              |nbf=N  |Number of retr reqs rejected -ENOBUFS                  |
++              +-------+-------------------------------------------------------+
+|              |int=N  |Number of retr reqs aborted -ERESTARTSYS               |
++              +-------+-------------------------------------------------------+
+|              |oom=N  |Number of retr reqs failed -ENOMEM                     |
++              +-------+-------------------------------------------------------+
+|              |ops=N  |Number of retr reqs submitted                          |
++              +-------+-------------------------------------------------------+
+|              |owt=N  |Number of retr reqs waited for CPU time                |
++              +-------+-------------------------------------------------------+
+|              |abt=N  |Number of retr reqs aborted due to object death        |
++--------------+-------+-------------------------------------------------------+
+|Stores        |n=N    |Number of storage (write) requests seen                |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of successful store reqs                        |
++              +-------+-------------------------------------------------------+
+|              |agn=N  |Number of store reqs on a page already pending storage |
++              +-------+-------------------------------------------------------+
+|              |nbf=N  |Number of store reqs rejected -ENOBUFS                 |
++              +-------+-------------------------------------------------------+
+|              |oom=N  |Number of store reqs failed -ENOMEM                    |
++              +-------+-------------------------------------------------------+
+|              |ops=N  |Number of store reqs submitted                         |
++              +-------+-------------------------------------------------------+
+|              |run=N  |Number of store reqs granted CPU time                  |
++              +-------+-------------------------------------------------------+
+|              |pgs=N  |Number of pages given store req processing time        |
++              +-------+-------------------------------------------------------+
+|              |rxd=N  |Number of store reqs deleted from tracking tree        |
++              +-------+-------------------------------------------------------+
+|              |olm=N  |Number of store reqs over store limit                  |
++--------------+-------+-------------------------------------------------------+
+|VmScan        |nos=N  |Number of release reqs against pages with no           |
+|              |       |pending store                                          |
++              +-------+-------------------------------------------------------+
+|              |gon=N  |Number of release reqs against pages stored by         |
+|              |       |time lock granted                                      |
++              +-------+-------------------------------------------------------+
+|              |bsy=N  |Number of release reqs ignored due to in-progress store|
++              +-------+-------------------------------------------------------+
+|              |can=N  |Number of page stores cancelled due to release req     |
++--------------+-------+-------------------------------------------------------+
+|Ops           |pend=N |Number of times async ops added to pending queues      |
++              +-------+-------------------------------------------------------+
+|              |run=N  |Number of times async ops given CPU time               |
++              +-------+-------------------------------------------------------+
+|              |enq=N  |Number of times async ops queued for processing        |
++              +-------+-------------------------------------------------------+
+|              |can=N  |Number of async ops cancelled                          |
++              +-------+-------------------------------------------------------+
+|              |rej=N  |Number of async ops rejected due to object             |
+|              |       |lookup/create failure                                  |
++              +-------+-------------------------------------------------------+
+|              |ini=N  |Number of async ops initialised                        |
++              +-------+-------------------------------------------------------+
+|              |dfr=N  |Number of async ops queued for deferred release        |
++              +-------+-------------------------------------------------------+
+|              |rel=N  |Number of async ops released                           |
+|              |       |(should equal ini=N when idle)                         |
++              +-------+-------------------------------------------------------+
+|              |gc=N   |Number of deferred-release async ops garbage collected |
++--------------+-------+-------------------------------------------------------+
+|CacheOp       |alo=N  |Number of in-progress alloc_object() cache ops         |
++              +-------+-------------------------------------------------------+
+|              |luo=N  |Number of in-progress lookup_object() cache ops        |
++              +-------+-------------------------------------------------------+
+|              |luc=N  |Number of in-progress lookup_complete() cache ops      |
++              +-------+-------------------------------------------------------+
+|              |gro=N  |Number of in-progress grab_object() cache ops          |
++              +-------+-------------------------------------------------------+
+|              |upo=N  |Number of in-progress update_object() cache ops        |
++              +-------+-------------------------------------------------------+
+|              |dro=N  |Number of in-progress drop_object() cache ops          |
++              +-------+-------------------------------------------------------+
+|              |pto=N  |Number of in-progress put_object() cache ops           |
++              +-------+-------------------------------------------------------+
+|              |syn=N  |Number of in-progress sync_cache() cache ops           |
++              +-------+-------------------------------------------------------+
+|              |atc=N  |Number of in-progress attr_changed() cache ops         |
++              +-------+-------------------------------------------------------+
+|              |rap=N  |Number of in-progress read_or_alloc_page() cache ops   |
++              +-------+-------------------------------------------------------+
+|              |ras=N  |Number of in-progress read_or_alloc_pages() cache ops  |
++              +-------+-------------------------------------------------------+
+|              |alp=N  |Number of in-progress allocate_page() cache ops        |
++              +-------+-------------------------------------------------------+
+|              |als=N  |Number of in-progress allocate_pages() cache ops       |
++              +-------+-------------------------------------------------------+
+|              |wrp=N  |Number of in-progress write_page() cache ops           |
++              +-------+-------------------------------------------------------+
+|              |ucp=N  |Number of in-progress uncache_page() cache ops         |
++              +-------+-------------------------------------------------------+
+|              |dsp=N  |Number of in-progress dissociate_pages() cache ops     |
++--------------+-------+-------------------------------------------------------+
+|CacheEv       |nsp=N  |Number of object lookups/creations rejected due to     |
+|              |       |lack of space                                          |
++              +-------+-------------------------------------------------------+
+|              |stl=N  |Number of stale objects deleted                        |
++              +-------+-------------------------------------------------------+
+|              |rtr=N  |Number of objects retired when relinquished            |
++              +-------+-------------------------------------------------------+
+|              |cul=N  |Number of objects culled                               |
++--------------+-------+-------------------------------------------------------+
+
+
+
+/proc/fs/fscache/histogram
+--------------------------
+
+     ::
+
+	cat /proc/fs/fscache/histogram
+	JIFS  SECS  OBJ INST  OP RUNS   OBJ RUNS  RETRV DLY RETRIEVLS
+	===== ===== ========= ========= ========= ========= =========
+
+     This shows the breakdown of the number of times each amount of time
+     between 0 jiffies and HZ-1 jiffies a variety of tasks took to run.  The
+     columns are as follows:
+
+	=========	=======================================================
+	COLUMN		TIME MEASUREMENT
+	=========	=======================================================
+	OBJ INST	Length of time to instantiate an object
+	OP RUNS		Length of time a call to process an operation took
+	OBJ RUNS	Length of time a call to process an object event took
+	RETRV DLY	Time between an requesting a read and lookup completing
+	RETRIEVLS	Time between beginning and end of a retrieval
+	=========	=======================================================
+
+     Each row shows the number of events that took a particular range of times.
+     Each step is 1 jiffy in size.  The JIFS column indicates the particular
+     jiffy range covered, and the SECS field the equivalent number of seconds.
+
+
+
+Object List
+===========
+
+If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a
+list of all the objects currently allocated and allow them to be viewed
+through::
+
+	/proc/fs/fscache/objects
+
+This will look something like::
+
+	[root@andromeda ~]# head /proc/fs/fscache/objects
+	OBJECT   PARENT   STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA       OBJECT_KEY, AUX_DATA
+	======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================
+	   17e4b        2 ACTV     0   0   0   0  0     0 7b  4 0 0 | NFS.fh           DT  0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
+	   1693a        2 ACTV     0   0   0   0  0     0 7b  4 0 0 | NFS.fh           DT  0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
+
+where the first set of columns before the '|' describe the object:
+
+	=======	===============================================================
+	COLUMN	DESCRIPTION
+	=======	===============================================================
+	OBJECT	Object debugging ID (appears as OBJ%x in some debug messages)
+	PARENT	Debugging ID of parent object
+	STAT	Object state
+	CHLDN	Number of child objects of this object
+	OPS	Number of outstanding operations on this object
+	OOP	Number of outstanding child object management operations
+	IPR
+	EX	Number of outstanding exclusive operations
+	READS	Number of outstanding read operations
+	EM	Object's event mask
+	EV	Events raised on this object
+	F	Object flags
+	S	Object work item busy state mask (1:pending 2:running)
+	=======	===============================================================
+
+and the second set of columns describe the object's cookie, if present:
+
+	================ ======================================================
+	COLUMN		 DESCRIPTION
+	================ ======================================================
+	NETFS_COOKIE_DEF Name of netfs cookie definition
+	TY		 Cookie type (IX - index, DT - data, hex - special)
+	FL		 Cookie flags
+	NETFS_DATA	 Netfs private data stored in the cookie
+	OBJECT_KEY	 Object key } 1 column, with separating comma
+	AUX_DATA	 Object aux data } presence may be configured
+	================ ======================================================
+
+The data shown may be filtered by attaching the a key to an appropriate keyring
+before viewing the file.  Something like::
+
+		keyctl add user fscache:objlist <restrictions> @s
+
+where <restrictions> are a selection of the following letters:
+
+	==	=========================================================
+	K	Show hexdump of object key (don't show if not given)
+	A	Show hexdump of object aux data (don't show if not given)
+	==	=========================================================
+
+and the following paired letters:
+
+	==	=========================================================
+	C	Show objects that have a cookie
+	c	Show objects that don't have a cookie
+	B	Show objects that are busy
+	b	Show objects that aren't busy
+	W	Show objects that have pending writes
+	w	Show objects that don't have pending writes
+	R	Show objects that have outstanding reads
+	r	Show objects that don't have outstanding reads
+	S	Show objects that have work queued
+	s	Show objects that don't have work queued
+	==	=========================================================
+
+If neither side of a letter pair is given, then both are implied.  For example:
+
+	keyctl add user fscache:objlist KB @s
+
+shows objects that are busy, and lists their object keys, but does not dump
+their auxiliary data.  It also implies "CcWwRrSs", but as 'B' is given, 'b' is
+not implied.
+
+By default all objects and all fields will be shown.
+
+
+Debugging
+=========
+
+If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime
+debugging enabled by adjusting the value in::
+
+	/sys/module/fscache/parameters/debug
+
+This is a bitmask of debugging streams to enable:
+
+	=======	=======	===============================	=======================
+	BIT	VALUE	STREAM				POINT
+	=======	=======	===============================	=======================
+	0	1	Cache management		Function entry trace
+	1	2					Function exit trace
+	2	4					General
+	3	8	Cookie management		Function entry trace
+	4	16					Function exit trace
+	5	32					General
+	6	64	Page handling			Function entry trace
+	7	128					Function exit trace
+	8	256					General
+	9	512	Operation management		Function entry trace
+	10	1024					Function exit trace
+	11	2048					General
+	=======	=======	===============================	=======================
+
+The appropriate set of values should be OR'd together and the result written to
+the control file.  For example::
+
+	echo $((1|8|64)) >/sys/module/fscache/parameters/debug
+
+will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
deleted file mode 100644
index 50f0a5757f48..000000000000
--- a/Documentation/filesystems/caching/fscache.txt
+++ /dev/null
@@ -1,448 +0,0 @@
-			  ==========================
-			  General Filesystem Caching
-			  ==========================
-
-========
-OVERVIEW
-========
-
-This facility is a general purpose cache for network filesystems, though it
-could be used for caching other things such as ISO9660 filesystems too.
-
-FS-Cache mediates between cache backends (such as CacheFS) and network
-filesystems:
-
-	+---------+
-	|         |                        +--------------+
-	|   NFS   |--+                     |              |
-	|         |  |                 +-->|   CacheFS    |
-	+---------+  |   +----------+  |   |  /dev/hda5   |
-	             |   |          |  |   +--------------+
-	+---------+  +-->|          |  |
-	|         |      |          |--+
-	|   AFS   |----->| FS-Cache |
-	|         |      |          |--+
-	+---------+  +-->|          |  |
-	             |   |          |  |   +--------------+
-	+---------+  |   +----------+  |   |              |
-	|         |  |                 +-->|  CacheFiles  |
-	|  ISOFS  |--+                     |  /var/cache  |
-	|         |                        +--------------+
-	+---------+
-
-Or to look at it another way, FS-Cache is a module that provides a caching
-facility to a network filesystem such that the cache is transparent to the
-user:
-
-	+---------+
-	|         |
-	| Server  |
-	|         |
-	+---------+
-	     |                  NETWORK
-	~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-	     |
-	     |           +----------+
-	     V           |          |
-	+---------+      |          |
-	|         |      |          |
-	|   NFS   |----->| FS-Cache |
-	|         |      |          |--+
-	+---------+      |          |  |   +--------------+   +--------------+
-	     |           |          |  |   |              |   |              |
-	     V           +----------+  +-->|  CacheFiles  |-->|  Ext3        |
-	+---------+                        |  /var/cache  |   |  /dev/sda6   |
-	|         |                        +--------------+   +--------------+
-	|   VFS   |                                ^                     ^
-	|         |                                |                     |
-	+---------+                                +--------------+      |
-	     |                  KERNEL SPACE                      |      |
-	~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|~~~~~~|~~~~
-	     |                  USER SPACE                        |      |
-	     V                                                    |      |
-	+---------+                                           +--------------+
-	|         |                                           |              |
-	| Process |                                           | cachefilesd  |
-	|         |                                           |              |
-	+---------+                                           +--------------+
-
-
-FS-Cache does not follow the idea of completely loading every netfs file
-opened in its entirety into a cache before permitting it to be accessed and
-then serving the pages out of that cache rather than the netfs inode because:
-
- (1) It must be practical to operate without a cache.
-
- (2) The size of any accessible file must not be limited to the size of the
-     cache.
-
- (3) The combined size of all opened files (this includes mapped libraries)
-     must not be limited to the size of the cache.
-
- (4) The user should not be forced to download an entire file just to do a
-     one-off access of a small portion of it (such as might be done with the
-     "file" program).
-
-It instead serves the cache out in PAGE_SIZE chunks as and when requested by
-the netfs('s) using it.
-
-
-FS-Cache provides the following facilities:
-
- (1) More than one cache can be used at once.  Caches can be selected
-     explicitly by use of tags.
-
- (2) Caches can be added / removed at any time.
-
- (3) The netfs is provided with an interface that allows either party to
-     withdraw caching facilities from a file (required for (2)).
-
- (4) The interface to the netfs returns as few errors as possible, preferring
-     rather to let the netfs remain oblivious.
-
- (5) Cookies are used to represent indices, files and other objects to the
-     netfs.  The simplest cookie is just a NULL pointer - indicating nothing
-     cached there.
-
- (6) The netfs is allowed to propose - dynamically - any index hierarchy it
-     desires, though it must be aware that the index search function is
-     recursive, stack space is limited, and indices can only be children of
-     indices.
-
- (7) Data I/O is done direct to and from the netfs's pages.  The netfs
-     indicates that page A is at index B of the data-file represented by cookie
-     C, and that it should be read or written.  The cache backend may or may
-     not start I/O on that page, but if it does, a netfs callback will be
-     invoked to indicate completion.  The I/O may be either synchronous or
-     asynchronous.
-
- (8) Cookies can be "retired" upon release.  At this point FS-Cache will mark
-     them as obsolete and the index hierarchy rooted at that point will get
-     recycled.
-
- (9) The netfs provides a "match" function for index searches.  In addition to
-     saying whether a match was made or not, this can also specify that an
-     entry should be updated or deleted.
-
-(10) As much as possible is done asynchronously.
-
-
-FS-Cache maintains a virtual indexing tree in which all indices, files, objects
-and pages are kept.  Bits of this tree may actually reside in one or more
-caches.
-
-                                           FSDEF
-                                             |
-                        +------------------------------------+
-                        |                                    |
-                       NFS                                  AFS
-                        |                                    |
-           +--------------------------+                +-----------+
-           |                          |                |           |
-        homedir                     mirror          afs.org   redhat.com
-           |                          |                            |
-     +------------+           +---------------+              +----------+
-     |            |           |               |              |          |
-   00001        00002       00007           00125        vol00001   vol00002
-     |            |           |               |                         |
- +---+---+     +-----+      +---+      +------+------+            +-----+----+
- |   |   |     |     |      |   |      |      |      |            |     |    |
-PG0 PG1 PG2   PG0  XATTR   PG0 PG1   DIRENT DIRENT DIRENT        R/W   R/O  Bak
-                     |                                            |
-                    PG0                                       +-------+
-                                                              |       |
-                                                            00001   00003
-                                                              |
-                                                          +---+---+
-                                                          |   |   |
-                                                         PG0 PG1 PG2
-
-In the example above, you can see two netfs's being backed: NFS and AFS.  These
-have different index hierarchies:
-
- (*) The NFS primary index contains per-server indices.  Each server index is
-     indexed by NFS file handles to get data file objects.  Each data file
-     objects can have an array of pages, but may also have further child
-     objects, such as extended attributes and directory entries.  Extended
-     attribute objects themselves have page-array contents.
-
- (*) The AFS primary index contains per-cell indices.  Each cell index contains
-     per-logical-volume indices.  Each of volume index contains up to three
-     indices for the read-write, read-only and backup mirrors of those volumes.
-     Each of these contains vnode data file objects, each of which contains an
-     array of pages.
-
-The very top index is the FS-Cache master index in which individual netfs's
-have entries.
-
-Any index object may reside in more than one cache, provided it only has index
-children.  Any index with non-index object children will be assumed to only
-reside in one cache.
-
-
-The netfs API to FS-Cache can be found in:
-
-	Documentation/filesystems/caching/netfs-api.txt
-
-The cache backend API to FS-Cache can be found in:
-
-	Documentation/filesystems/caching/backend-api.txt
-
-A description of the internal representations and object state machine can be
-found in:
-
-	Documentation/filesystems/caching/object.txt
-
-
-=======================
-STATISTICAL INFORMATION
-=======================
-
-If FS-Cache is compiled with the following options enabled:
-
-	CONFIG_FSCACHE_STATS=y
-	CONFIG_FSCACHE_HISTOGRAM=y
-
-then it will gather certain statistics and display them through a number of
-proc files.
-
- (*) /proc/fs/fscache/stats
-
-     This shows counts of a number of events that can happen in FS-Cache:
-
-	CLASS	EVENT	MEANING
-	=======	=======	=======================================================
-	Cookies	idx=N	Number of index cookies allocated
-		dat=N	Number of data storage cookies allocated
-		spc=N	Number of special cookies allocated
-	Objects	alc=N	Number of objects allocated
-		nal=N	Number of object allocation failures
-		avl=N	Number of objects that reached the available state
-		ded=N	Number of objects that reached the dead state
-	ChkAux	non=N	Number of objects that didn't have a coherency check
-		ok=N	Number of objects that passed a coherency check
-		upd=N	Number of objects that needed a coherency data update
-		obs=N	Number of objects that were declared obsolete
-	Pages	mrk=N	Number of pages marked as being cached
-		unc=N	Number of uncache page requests seen
-	Acquire	n=N	Number of acquire cookie requests seen
-		nul=N	Number of acq reqs given a NULL parent
-		noc=N	Number of acq reqs rejected due to no cache available
-		ok=N	Number of acq reqs succeeded
-		nbf=N	Number of acq reqs rejected due to error
-		oom=N	Number of acq reqs failed on ENOMEM
-	Lookups	n=N	Number of lookup calls made on cache backends
-		neg=N	Number of negative lookups made
-		pos=N	Number of positive lookups made
-		crt=N	Number of objects created by lookup
-		tmo=N	Number of lookups timed out and requeued
-	Updates	n=N	Number of update cookie requests seen
-		nul=N	Number of upd reqs given a NULL parent
-		run=N	Number of upd reqs granted CPU time
-	Relinqs	n=N	Number of relinquish cookie requests seen
-		nul=N	Number of rlq reqs given a NULL parent
-		wcr=N	Number of rlq reqs waited on completion of creation
-	AttrChg	n=N	Number of attribute changed requests seen
-		ok=N	Number of attr changed requests queued
-		nbf=N	Number of attr changed rejected -ENOBUFS
-		oom=N	Number of attr changed failed -ENOMEM
-		run=N	Number of attr changed ops given CPU time
-	Allocs	n=N	Number of allocation requests seen
-		ok=N	Number of successful alloc reqs
-		wt=N	Number of alloc reqs that waited on lookup completion
-		nbf=N	Number of alloc reqs rejected -ENOBUFS
-		int=N	Number of alloc reqs aborted -ERESTARTSYS
-		ops=N	Number of alloc reqs submitted
-		owt=N	Number of alloc reqs waited for CPU time
-		abt=N	Number of alloc reqs aborted due to object death
-	Retrvls	n=N	Number of retrieval (read) requests seen
-		ok=N	Number of successful retr reqs
-		wt=N	Number of retr reqs that waited on lookup completion
-		nod=N	Number of retr reqs returned -ENODATA
-		nbf=N	Number of retr reqs rejected -ENOBUFS
-		int=N	Number of retr reqs aborted -ERESTARTSYS
-		oom=N	Number of retr reqs failed -ENOMEM
-		ops=N	Number of retr reqs submitted
-		owt=N	Number of retr reqs waited for CPU time
-		abt=N	Number of retr reqs aborted due to object death
-	Stores	n=N	Number of storage (write) requests seen
-		ok=N	Number of successful store reqs
-		agn=N	Number of store reqs on a page already pending storage
-		nbf=N	Number of store reqs rejected -ENOBUFS
-		oom=N	Number of store reqs failed -ENOMEM
-		ops=N	Number of store reqs submitted
-		run=N	Number of store reqs granted CPU time
-		pgs=N	Number of pages given store req processing time
-		rxd=N	Number of store reqs deleted from tracking tree
-		olm=N	Number of store reqs over store limit
-	VmScan	nos=N	Number of release reqs against pages with no pending store
-		gon=N	Number of release reqs against pages stored by time lock granted
-		bsy=N	Number of release reqs ignored due to in-progress store
-		can=N	Number of page stores cancelled due to release req
-	Ops	pend=N	Number of times async ops added to pending queues
-		run=N	Number of times async ops given CPU time
-		enq=N	Number of times async ops queued for processing
-		can=N	Number of async ops cancelled
-		rej=N	Number of async ops rejected due to object lookup/create failure
-		ini=N	Number of async ops initialised
-		dfr=N	Number of async ops queued for deferred release
-		rel=N	Number of async ops released (should equal ini=N when idle)
-		gc=N	Number of deferred-release async ops garbage collected
-	CacheOp	alo=N	Number of in-progress alloc_object() cache ops
-		luo=N	Number of in-progress lookup_object() cache ops
-		luc=N	Number of in-progress lookup_complete() cache ops
-		gro=N	Number of in-progress grab_object() cache ops
-		upo=N	Number of in-progress update_object() cache ops
-		dro=N	Number of in-progress drop_object() cache ops
-		pto=N	Number of in-progress put_object() cache ops
-		syn=N	Number of in-progress sync_cache() cache ops
-		atc=N	Number of in-progress attr_changed() cache ops
-		rap=N	Number of in-progress read_or_alloc_page() cache ops
-		ras=N	Number of in-progress read_or_alloc_pages() cache ops
-		alp=N	Number of in-progress allocate_page() cache ops
-		als=N	Number of in-progress allocate_pages() cache ops
-		wrp=N	Number of in-progress write_page() cache ops
-		ucp=N	Number of in-progress uncache_page() cache ops
-		dsp=N	Number of in-progress dissociate_pages() cache ops
-	CacheEv	nsp=N	Number of object lookups/creations rejected due to lack of space
-		stl=N	Number of stale objects deleted
-		rtr=N	Number of objects retired when relinquished
-		cul=N	Number of objects culled
-
-
- (*) /proc/fs/fscache/histogram
-
-	cat /proc/fs/fscache/histogram
-	JIFS  SECS  OBJ INST  OP RUNS   OBJ RUNS  RETRV DLY RETRIEVLS
-	===== ===== ========= ========= ========= ========= =========
-
-     This shows the breakdown of the number of times each amount of time
-     between 0 jiffies and HZ-1 jiffies a variety of tasks took to run.  The
-     columns are as follows:
-
-	COLUMN		TIME MEASUREMENT
-	=======		=======================================================
-	OBJ INST	Length of time to instantiate an object
-	OP RUNS		Length of time a call to process an operation took
-	OBJ RUNS	Length of time a call to process an object event took
-	RETRV DLY	Time between an requesting a read and lookup completing
-	RETRIEVLS	Time between beginning and end of a retrieval
-
-     Each row shows the number of events that took a particular range of times.
-     Each step is 1 jiffy in size.  The JIFS column indicates the particular
-     jiffy range covered, and the SECS field the equivalent number of seconds.
-
-
-===========
-OBJECT LIST
-===========
-
-If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a
-list of all the objects currently allocated and allow them to be viewed
-through:
-
-	/proc/fs/fscache/objects
-
-This will look something like:
-
-	[root@andromeda ~]# head /proc/fs/fscache/objects
-	OBJECT   PARENT   STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA       OBJECT_KEY, AUX_DATA
-	======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================
-	   17e4b        2 ACTV     0   0   0   0  0     0 7b  4 0 0 | NFS.fh           DT  0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
-	   1693a        2 ACTV     0   0   0   0  0     0 7b  4 0 0 | NFS.fh           DT  0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
-
-where the first set of columns before the '|' describe the object:
-
-	COLUMN	DESCRIPTION
-	=======	===============================================================
-	OBJECT	Object debugging ID (appears as OBJ%x in some debug messages)
-	PARENT	Debugging ID of parent object
-	STAT	Object state
-	CHLDN	Number of child objects of this object
-	OPS	Number of outstanding operations on this object
-	OOP	Number of outstanding child object management operations
-	IPR
-	EX	Number of outstanding exclusive operations
-	READS	Number of outstanding read operations
-	EM	Object's event mask
-	EV	Events raised on this object
-	F	Object flags
-	S	Object work item busy state mask (1:pending 2:running)
-
-and the second set of columns describe the object's cookie, if present:
-
-	COLUMN		DESCRIPTION
-	===============	=======================================================
-	NETFS_COOKIE_DEF Name of netfs cookie definition
-	TY		Cookie type (IX - index, DT - data, hex - special)
-	FL		Cookie flags
-	NETFS_DATA	Netfs private data stored in the cookie
-	OBJECT_KEY	Object key	} 1 column, with separating comma
-	AUX_DATA	Object aux data	} presence may be configured
-
-The data shown may be filtered by attaching the a key to an appropriate keyring
-before viewing the file.  Something like:
-
-		keyctl add user fscache:objlist <restrictions> @s
-
-where <restrictions> are a selection of the following letters:
-
-	K	Show hexdump of object key (don't show if not given)
-	A	Show hexdump of object aux data (don't show if not given)
-
-and the following paired letters:
-
-	C	Show objects that have a cookie
-	c	Show objects that don't have a cookie
-	B	Show objects that are busy
-	b	Show objects that aren't busy
-	W	Show objects that have pending writes
-	w	Show objects that don't have pending writes
-	R	Show objects that have outstanding reads
-	r	Show objects that don't have outstanding reads
-	S	Show objects that have work queued
-	s	Show objects that don't have work queued
-
-If neither side of a letter pair is given, then both are implied.  For example:
-
-	keyctl add user fscache:objlist KB @s
-
-shows objects that are busy, and lists their object keys, but does not dump
-their auxiliary data.  It also implies "CcWwRrSs", but as 'B' is given, 'b' is
-not implied.
-
-By default all objects and all fields will be shown.
-
-
-=========
-DEBUGGING
-=========
-
-If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime
-debugging enabled by adjusting the value in:
-
-	/sys/module/fscache/parameters/debug
-
-This is a bitmask of debugging streams to enable:
-
-	BIT	VALUE	STREAM				POINT
-	=======	=======	===============================	=======================
-	0	1	Cache management		Function entry trace
-	1	2					Function exit trace
-	2	4					General
-	3	8	Cookie management		Function entry trace
-	4	16					Function exit trace
-	5	32					General
-	6	64	Page handling			Function entry trace
-	7	128					Function exit trace
-	8	256					General
-	9	512	Operation management		Function entry trace
-	10	1024					Function exit trace
-	11	2048					General
-
-The appropriate set of values should be OR'd together and the result written to
-the control file.  For example:
-
-	echo $((1|8|64)) >/sys/module/fscache/parameters/debug
-
-will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/index.rst b/Documentation/filesystems/caching/index.rst
new file mode 100644
index 000000000000..033da7ac7c6e
--- /dev/null
+++ b/Documentation/filesystems/caching/index.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Filesystem Caching
+==================
+
+.. toctree::
+   :maxdepth: 2
+
+   fscache
+   object
+   backend-api
+   cachefiles
+   netfs-api
+   operations
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.rst
index ba968e8f5704..d9f14b8610ba 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.rst
@@ -1,6 +1,8 @@
-			===============================
-			FS-CACHE NETWORK FILESYSTEM API
-			===============================
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+FS-Cache Network Filesystem API
+===============================
 
 There's an API by which a network filesystem can make use of the FS-Cache
 facilities.  This is based around a number of principles:
@@ -19,7 +21,7 @@ facilities.  This is based around a number of principles:
 
 This API is declared in <linux/fscache.h>.
 
-This document contains the following sections:
+.. This document contains the following sections:
 
 	 (1) Network filesystem definition
 	 (2) Index definition
@@ -41,12 +43,11 @@ This document contains the following sections:
 	(18) FS-Cache specific page flags.
 
 
-=============================
-NETWORK FILESYSTEM DEFINITION
+Network Filesystem Definition
 =============================
 
 FS-Cache needs a description of the network filesystem.  This is specified
-using a record of the following structure:
+using a record of the following structure::
 
 	struct fscache_netfs {
 		uint32_t			version;
@@ -71,7 +72,7 @@ The fields are:
      another parameter passed into the registration function.
 
 For example, kAFS (linux/fs/afs/) uses the following definitions to describe
-itself:
+itself::
 
 	struct fscache_netfs afs_cache_netfs = {
 		.version	= 0,
@@ -79,8 +80,7 @@ itself:
 	};
 
 
-================
-INDEX DEFINITION
+Index Definition
 ================
 
 Indices are used for two purposes:
@@ -114,11 +114,10 @@ There are some limits on indices:
      function is recursive.  Too many layers will run the kernel out of stack.
 
 
-=================
-OBJECT DEFINITION
+Object Definition
 =================
 
-To define an object, a structure of the following type should be filled out:
+To define an object, a structure of the following type should be filled out::
 
 	struct fscache_cookie_def
 	{
@@ -149,16 +148,13 @@ This has the following fields:
 
      This is one of the following values:
 
-	(*) FSCACHE_COOKIE_TYPE_INDEX
-
+	FSCACHE_COOKIE_TYPE_INDEX
 	    This defines an index, which is a special FS-Cache type.
 
-	(*) FSCACHE_COOKIE_TYPE_DATAFILE
-
+	FSCACHE_COOKIE_TYPE_DATAFILE
 	    This defines an ordinary data file.
 
-	(*) Any other value between 2 and 255
-
+	Any other value between 2 and 255
 	    This defines an extraordinary object such as an XATTR.
 
  (2) The name of the object type (NUL terminated unless all 16 chars are used)
@@ -192,9 +188,14 @@ This has the following fields:
 
      If present, the function should return one of the following values:
 
-	(*) FSCACHE_CHECKAUX_OKAY		- the entry is okay as is
-	(*) FSCACHE_CHECKAUX_NEEDS_UPDATE	- the entry requires update
-	(*) FSCACHE_CHECKAUX_OBSOLETE		- the entry should be deleted
+	FSCACHE_CHECKAUX_OKAY
+	    - the entry is okay as is
+
+	FSCACHE_CHECKAUX_NEEDS_UPDATE
+	    - the entry requires update
+
+	FSCACHE_CHECKAUX_OBSOLETE
+	    - the entry should be deleted
 
      This function can also be used to extract data from the auxiliary data in
      the cache and copy it into the netfs's structures.
@@ -236,32 +237,30 @@ This has the following fields:
      This function is not required for indices as they're not permitted data.
 
 
-===================================
-NETWORK FILESYSTEM (UN)REGISTRATION
+Network Filesystem (Un)registration
 ===================================
 
 The first step is to declare the network filesystem to the cache.  This also
 involves specifying the layout of the primary index (for AFS, this would be the
 "cell" level).
 
-The registration function is:
+The registration function is::
 
 	int fscache_register_netfs(struct fscache_netfs *netfs);
 
 It just takes a pointer to the netfs definition.  It returns 0 or an error as
 appropriate.
 
-For kAFS, registration is done as follows:
+For kAFS, registration is done as follows::
 
 	ret = fscache_register_netfs(&afs_cache_netfs);
 
-The last step is, of course, unregistration:
+The last step is, of course, unregistration::
 
 	void fscache_unregister_netfs(struct fscache_netfs *netfs);
 
 
-================
-CACHE TAG LOOKUP
+Cache Tag Lookup
 ================
 
 FS-Cache permits the use of more than one cache.  To permit particular index
@@ -270,7 +269,7 @@ representation tags.  This step is optional; it can be left entirely up to
 FS-Cache as to which cache should be used.  The problem with doing that is that
 FS-Cache will always pick the first cache that was registered.
 
-To get the representation for a named tag:
+To get the representation for a named tag::
 
 	struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name);
 
@@ -278,7 +277,7 @@ This takes a text string as the name and returns a representation of a tag.  It
 will never return an error.  It may return a dummy tag, however, if it runs out
 of memory; this will inhibit caching with this tag.
 
-Any representation so obtained must be released by passing it to this function:
+Any representation so obtained must be released by passing it to this function::
 
 	void fscache_release_cache_tag(struct fscache_cache_tag *tag);
 
@@ -286,13 +285,12 @@ The tag will be retrieved by FS-Cache when it calls the object definition
 operation select_cache().
 
 
-==================
-INDEX REGISTRATION
+Index Registration
 ==================
 
 The third step is to inform FS-Cache about part of an index hierarchy that can
 be used to locate files.  This is done by requesting a cookie for each index in
-the path to the file:
+the path to the file::
 
 	struct fscache_cookie *
 	fscache_acquire_cookie(struct fscache_cookie *parent,
@@ -339,7 +337,7 @@ must be enabled to do anything with it.  A disabled cookie can be enabled by
 calling fscache_enable_cookie() (see below).
 
 For example, with AFS, a cell would be added to the primary index.  This index
-entry would have a dependent inode containing volume mappings within this cell:
+entry would have a dependent inode containing volume mappings within this cell::
 
 	cell->cache =
 		fscache_acquire_cookie(afs_cache_netfs.primary_index,
@@ -349,7 +347,7 @@ entry would have a dependent inode containing volume mappings within this cell:
 				       cell, 0, true);
 
 And then a particular volume could be added to that index by ID, creating
-another index for vnodes (AFS inode equivalents):
+another index for vnodes (AFS inode equivalents)::
 
 	volume->cache =
 		fscache_acquire_cookie(volume->cell->cache,
@@ -359,13 +357,12 @@ another index for vnodes (AFS inode equivalents):
 				       volume, 0, true);
 
 
-======================
-DATA FILE REGISTRATION
+Data File Registration
 ======================
 
 The fourth step is to request a data file be created in the cache.  This is
 identical to index cookie acquisition.  The only difference is that the type in
-the object definition should be something other than index type.
+the object definition should be something other than index type::
 
 	vnode->cache =
 		fscache_acquire_cookie(volume->cache,
@@ -375,15 +372,14 @@ the object definition should be something other than index type.
 				       vnode, vnode->status.size, true);
 
 
-=================================
-MISCELLANEOUS OBJECT REGISTRATION
+Miscellaneous Object Registration
 =================================
 
 An optional step is to request an object of miscellaneous type be created in
 the cache.  This is almost identical to index cookie acquisition.  The only
 difference is that the type in the object definition should be something other
 than index type.  While the parent object could be an index, it's more likely
-it would be some other type of object such as a data file.
+it would be some other type of object such as a data file::
 
 	xattr->cache =
 		fscache_acquire_cookie(vnode->cache,
@@ -396,13 +392,12 @@ Miscellaneous objects might be used to store extended attributes or directory
 entries for example.
 
 
-==========================
-SETTING THE DATA FILE SIZE
+Setting the Data File Size
 ==========================
 
 The fifth step is to set the physical attributes of the file, such as its size.
 This doesn't automatically reserve any space in the cache, but permits the
-cache to adjust its metadata for data tracking appropriately:
+cache to adjust its metadata for data tracking appropriately::
 
 	int fscache_attr_changed(struct fscache_cookie *cookie);
 
@@ -417,8 +412,7 @@ some point in the future, and as such, it may happen after the function returns
 to the caller.  The attribute adjustment excludes read and write operations.
 
 
-=====================
-PAGE ALLOC/READ/WRITE
+Page alloc/read/write
 =====================
 
 And the sixth step is to store and retrieve pages in the cache.  There are
@@ -441,7 +435,7 @@ PAGE READ
 
 Firstly, the netfs should ask FS-Cache to examine the caches and read the
 contents cached for a particular page of a particular file if present, or else
-allocate space to store the contents if not:
+allocate space to store the contents if not::
 
 	typedef
 	void (*fscache_rw_complete_t)(struct page *page,
@@ -474,14 +468,14 @@ Else if there's a copy of the page resident in the cache:
 
  (4) When the read is complete, end_io_func() will be invoked with:
 
-     (*) The netfs data supplied when the cookie was created.
+       * The netfs data supplied when the cookie was created.
 
-     (*) The page descriptor.
+       * The page descriptor.
 
-     (*) The context argument passed to the above function.  This will be
+       * The context argument passed to the above function.  This will be
          maintained with the get_context/put_context functions mentioned above.
 
-     (*) An argument that's 0 on success or negative for an error code.
+       * An argument that's 0 on success or negative for an error code.
 
      If an error occurs, it should be assumed that the page contains no usable
      data.  fscache_readpages_cancel() may need to be called.
@@ -504,11 +498,11 @@ This function may also return -ENOMEM or -EINTR, in which case it won't have
 read any data from the cache.
 
 
-PAGE ALLOCATE
+Page Allocate
 -------------
 
 Alternatively, if there's not expected to be any data in the cache for a page
-because the file has been extended, a block can simply be allocated instead:
+because the file has been extended, a block can simply be allocated instead::
 
 	int fscache_alloc_page(struct fscache_cookie *cookie,
 			       struct page *page,
@@ -523,12 +517,12 @@ The mark_pages_cached() cookie operation will be called on the page if
 successful.
 
 
-PAGE WRITE
+Page Write
 ----------
 
 Secondly, if the netfs changes the contents of the page (either due to an
 initial download or if a user performs a write), then the page should be
-written back to the cache:
+written back to the cache::
 
 	int fscache_write_page(struct fscache_cookie *cookie,
 			       struct page *page,
@@ -566,11 +560,11 @@ place if unforeseen circumstances arose (such as a disk error).
 Writing takes place asynchronously.
 
 
-MULTIPLE PAGE READ
+Multiple Page Read
 ------------------
 
 A facility is provided to read several pages at once, as requested by the
-readpages() address space operation:
+readpages() address space operation::
 
 	int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 					struct address_space *mapping,
@@ -598,7 +592,7 @@ This works in a similar way to fscache_read_or_alloc_page(), except:
      be returned.
 
      Otherwise, if all pages had reads dispatched, then 0 will be returned, the
-     list will be empty and *nr_pages will be 0.
+     list will be empty and ``*nr_pages`` will be 0.
 
  (4) end_io_func will be called once for each page being read as the reads
      complete.  It will be called in process context if error != 0, but it may
@@ -609,13 +603,13 @@ some of the pages being read and some being allocated.  Those pages will have
 been marked appropriately and will need uncaching.
 
 
-CANCELLATION OF UNREAD PAGES
+Cancellation of Unread Pages
 ----------------------------
 
 If one or more pages are passed to fscache_read_or_alloc_pages() but not then
 read from the cache and also not read from the underlying filesystem then
 those pages will need to have any marks and reservations removed.  This can be
-done by calling:
+done by calling::
 
 	void fscache_readpages_cancel(struct fscache_cookie *cookie,
 				      struct list_head *pages);
@@ -625,11 +619,10 @@ fscache_read_or_alloc_pages().  Every page in the pages list will be examined
 and any that have PG_fscache set will be uncached.
 
 
-==============
-PAGE UNCACHING
+Page Uncaching
 ==============
 
-To uncache a page, this function should be called:
+To uncache a page, this function should be called::
 
 	void fscache_uncache_page(struct fscache_cookie *cookie,
 				  struct page *page);
@@ -644,12 +637,12 @@ data file must be retired (see the relinquish cookie function below).
 
 Furthermore, note that this does not cancel the asynchronous read or write
 operation started by the read/alloc and write functions, so the page
-invalidation functions must use:
+invalidation functions must use::
 
 	bool fscache_check_page_write(struct fscache_cookie *cookie,
 				      struct page *page);
 
-to see if a page is being written to the cache, and:
+to see if a page is being written to the cache, and::
 
 	void fscache_wait_on_page_write(struct fscache_cookie *cookie,
 					struct page *page);
@@ -660,7 +653,7 @@ to wait for it to finish if it is.
 When releasepage() is being implemented, a special FS-Cache function exists to
 manage the heuristics of coping with vmscan trying to eject pages, which may
 conflict with the cache trying to write pages to the cache (which may itself
-need to allocate memory):
+need to allocate memory)::
 
 	bool fscache_maybe_release_page(struct fscache_cookie *cookie,
 					struct page *page,
@@ -676,12 +669,12 @@ storage request to complete, or it may attempt to cancel the storage request -
 in which case the page will not be stored in the cache this time.
 
 
-BULK INODE PAGE UNCACHE
+Bulk Image Page Uncache
 -----------------------
 
 A convenience routine is provided to perform an uncache on all the pages
 attached to an inode.  This assumes that the pages on the inode correspond on a
-1:1 basis with the pages in the cache.
+1:1 basis with the pages in the cache::
 
 	void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
 					     struct inode *inode);
@@ -692,12 +685,11 @@ written to the cache and for the cache to finish with the page generally.  No
 error is returned.
 
 
-===============================
-INDEX AND DATA FILE CONSISTENCY
+Index and Data File consistency
 ===============================
 
 To find out whether auxiliary data for an object is up to data within the
-cache, the following function can be called:
+cache, the following function can be called::
 
 	int fscache_check_consistency(struct fscache_cookie *cookie,
 				      const void *aux_data);
@@ -708,7 +700,7 @@ data buffer first.  It returns 0 if it is and -ESTALE if it isn't; it may also
 return -ENOMEM and -ERESTARTSYS.
 
 To request an update of the index data for an index or other object, the
-following function should be called:
+following function should be called::
 
 	void fscache_update_cookie(struct fscache_cookie *cookie,
 				   const void *aux_data);
@@ -721,8 +713,7 @@ Note that partial updates may happen automatically at other times, such as when
 data blocks are added to a data file object.
 
 
-=================
-COOKIE ENABLEMENT
+Cookie Enablement
 =================
 
 Cookies exist in one of two states: enabled and disabled.  If a cookie is
@@ -731,7 +722,7 @@ invalidate its state; allocate, read or write backing pages - though it is
 still possible to uncache pages and relinquish the cookie.
 
 The initial enablement state is set by fscache_acquire_cookie(), but the cookie
-can be enabled or disabled later.  To disable a cookie, call:
+can be enabled or disabled later.  To disable a cookie, call::
 
 	void fscache_disable_cookie(struct fscache_cookie *cookie,
 				    const void *aux_data,
@@ -746,7 +737,7 @@ All possible failures are handled internally.  The caller should consider
 calling fscache_uncache_all_inode_pages() afterwards to make sure all page
 markings are cleared up.
 
-Cookies can be enabled or reenabled with:
+Cookies can be enabled or reenabled with::
 
     	void fscache_enable_cookie(struct fscache_cookie *cookie,
 				   const void *aux_data,
@@ -771,13 +762,12 @@ In both cases, the cookie's auxiliary data buffer is updated from aux_data if
 that is non-NULL inside the enablement lock before proceeding.
 
 
-===============================
-MISCELLANEOUS COOKIE OPERATIONS
+Miscellaneous Cookie operations
 ===============================
 
 There are a number of operations that can be used to control cookies:
 
- (*) Cookie pinning:
+     * Cookie pinning::
 
 	int fscache_pin_cookie(struct fscache_cookie *cookie);
 	void fscache_unpin_cookie(struct fscache_cookie *cookie);
@@ -790,7 +780,7 @@ There are a number of operations that can be used to control cookies:
      -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
      -EIO if there's any other problem.
 
- (*) Data space reservation:
+   * Data space reservation::
 
 	int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size);
 
@@ -809,11 +799,10 @@ There are a number of operations that can be used to control cookies:
      make space if it's not in use.
 
 
-=====================
-COOKIE UNREGISTRATION
+Cookie Unregistration
 =====================
 
-To get rid of a cookie, this function should be called.
+To get rid of a cookie, this function should be called::
 
 	void fscache_relinquish_cookie(struct fscache_cookie *cookie,
 				       const void *aux_data,
@@ -835,16 +824,14 @@ the cookies for "child" indices, objects and pages have been relinquished
 first.
 
 
-==================
-INDEX INVALIDATION
+Index Invalidation
 ==================
 
 There is no direct way to invalidate an index subtree.  To do this, the caller
 should relinquish and retire the cookie they have, and then acquire a new one.
 
 
-======================
-DATA FILE INVALIDATION
+Data File Invalidation
 ======================
 
 Sometimes it will be necessary to invalidate an object that contains data.
@@ -853,7 +840,7 @@ change - at which point the netfs has to throw away all the state it had for an
 inode and reload from the server.
 
 To indicate that a cache object should be invalidated, the following function
-can be called:
+can be called::
 
 	void fscache_invalidate(struct fscache_cookie *cookie);
 
@@ -868,13 +855,12 @@ auxiliary data update operation as it is very likely these will have changed.
 
 Using the following function, the netfs can wait for the invalidation operation
 to have reached a point at which it can start submitting ordinary operations
-once again:
+once again::
 
 	void fscache_wait_on_invalidate(struct fscache_cookie *cookie);
 
 
-===========================
-FS-CACHE SPECIFIC PAGE FLAG
+FS-cache Specific Page Flag
 ===========================
 
 FS-Cache makes use of a page flag, PG_private_2, for its own purpose.  This is
@@ -898,7 +884,7 @@ was given under certain circumstances.
 This bit does not overlap with such as PG_private.  This means that FS-Cache
 can be used with a filesystem that uses the block buffering code.
 
-There are a number of operations defined on this flag:
+There are a number of operations defined on this flag::
 
 	int PageFsCache(struct page *page);
 	void SetPageFsCache(struct page *page)
diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.rst
index 100ff41127e4..ce0e043ccd33 100644
--- a/Documentation/filesystems/caching/object.txt
+++ b/Documentation/filesystems/caching/object.rst
@@ -1,10 +1,12 @@
-	     ====================================================
-	     IN-KERNEL CACHE OBJECT REPRESENTATION AND MANAGEMENT
-	     ====================================================
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================================
+In-Kernel Cache Object Representation and Management
+====================================================
 
 By: David Howells <dhowells@redhat.com>
 
-Contents:
+.. Contents:
 
  (*) Representation
 
@@ -18,8 +20,7 @@ Contents:
  (*) The set of events.
 
 
-==============
-REPRESENTATION
+Representation
 ==============
 
 FS-Cache maintains an in-kernel representation of each object that a netfs is
@@ -38,7 +39,7 @@ or even by no objects (it may not be cached).
 
 Furthermore, both cookies and objects are hierarchical.  The two hierarchies
 correspond, but the cookies tree is a superset of the union of the object trees
-of multiple caches:
+of multiple caches::
 
 	    NETFS INDEX TREE               :      CACHE 1     :      CACHE 2
 	                                   :                  :
@@ -89,8 +90,7 @@ pointers to the cookies.  The cookies themselves and any objects attached to
 those cookies are hidden from it.
 
 
-===============================
-OBJECT MANAGEMENT STATE MACHINE
+Object Management State Machine
 ===============================
 
 Within FS-Cache, each active object is managed by its own individual state
@@ -124,7 +124,7 @@ is not masked, the object will be queued for processing (by calling
 fscache_enqueue_object()).
 
 
-PROVISION OF CPU TIME
+Provision of CPU Time
 ---------------------
 
 The work to be done by the various states was given CPU time by the threads of
@@ -141,7 +141,7 @@ because:
      workqueues don't necessarily have the right numbers of threads.
 
 
-LOCKING SIMPLIFICATION
+Locking Simplification
 ----------------------
 
 Because only one worker thread may be operating on any particular object's
@@ -151,8 +151,7 @@ from the cache backend's representation (fscache_object) - which may be
 requested from either end.
 
 
-=================
-THE SET OF STATES
+The Set of States
 =================
 
 The object state machine has a set of states that it can be in.  There are
@@ -275,19 +274,17 @@ memory and potentially deletes stuff from disk:
      this state.
 
 
-THE SET OF EVENTS
+The Set of Events
 -----------------
 
 There are a number of events that can be raised to an object state machine:
 
- (*) FSCACHE_OBJECT_EV_UPDATE
-
+ FSCACHE_OBJECT_EV_UPDATE
      The netfs requested that an object be updated.  The state machine will ask
      the cache backend to update the object, and the cache backend will ask the
      netfs for details of the change through its cookie definition ops.
 
- (*) FSCACHE_OBJECT_EV_CLEARED
-
+ FSCACHE_OBJECT_EV_CLEARED
      This is signalled in two circumstances:
 
      (a) when an object's last child object is dropped and
@@ -296,20 +293,16 @@ There are a number of events that can be raised to an object state machine:
 
      This is used to proceed from the dying state.
 
- (*) FSCACHE_OBJECT_EV_ERROR
-
+ FSCACHE_OBJECT_EV_ERROR
      This is signalled when an I/O error occurs during the processing of some
      object.
 
- (*) FSCACHE_OBJECT_EV_RELEASE
- (*) FSCACHE_OBJECT_EV_RETIRE
-
+ FSCACHE_OBJECT_EV_RELEASE, FSCACHE_OBJECT_EV_RETIRE
      These are signalled when the netfs relinquishes a cookie it was using.
      The event selected depends on whether the netfs asks for the backing
      object to be retired (deleted) or retained.
 
- (*) FSCACHE_OBJECT_EV_WITHDRAW
-
+ FSCACHE_OBJECT_EV_WITHDRAW
      This is signalled when the cache backend wants to withdraw an object.
      This means that the object will have to be detached from the netfs's
      cookie.
diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.rst
index d8976c434718..f7ddcc028939 100644
--- a/Documentation/filesystems/caching/operations.txt
+++ b/Documentation/filesystems/caching/operations.rst
@@ -1,10 +1,12 @@
-		       ================================
-		       ASYNCHRONOUS OPERATIONS HANDLING
-		       ================================
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+Asynchronous Operations Handling
+================================
 
 By: David Howells <dhowells@redhat.com>
 
-Contents:
+.. Contents:
 
  (*) Overview.
 
@@ -17,8 +19,7 @@ Contents:
  (*) Asynchronous callback.
 
 
-========
-OVERVIEW
+Overview
 ========
 
 FS-Cache has an asynchronous operations handling facility that it uses for its
@@ -33,11 +34,10 @@ backend for completion.
 To make use of this facility, <linux/fscache-cache.h> should be #included.
 
 
-===============================
-OPERATION RECORD INITIALISATION
+Operation Record Initialisation
 ===============================
 
-An operation is recorded in an fscache_operation struct:
+An operation is recorded in an fscache_operation struct::
 
 	struct fscache_operation {
 		union {
@@ -50,7 +50,7 @@ An operation is recorded in an fscache_operation struct:
 	};
 
 Someone wanting to issue an operation should allocate something with this
-struct embedded in it.  They should initialise it by calling:
+struct embedded in it.  They should initialise it by calling::
 
 	void fscache_operation_init(struct fscache_operation *op,
 				    fscache_operation_release_t release);
@@ -67,8 +67,7 @@ FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the
 operation and waited for afterwards.
 
 
-==========
-PARAMETERS
+Parameters
 ==========
 
 There are a number of parameters that can be set in the operation record's flag
@@ -87,7 +86,7 @@ operations:
 
      If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags
      before submitting the operation, and the operating thread must wait for it
-     to be cleared before proceeding:
+     to be cleared before proceeding::
 
 		wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
 			    TASK_UNINTERRUPTIBLE);
@@ -101,7 +100,7 @@ operations:
      page to a netfs page after the backing fs has read the page in.
 
      If this option is used, op->fast_work and op->processor must be
-     initialised before submitting the operation:
+     initialised before submitting the operation::
 
 		INIT_WORK(&op->fast_work, do_some_work);
 
@@ -114,7 +113,7 @@ operations:
      pages that have just been fetched from a remote server.
 
      If this option is used, op->slow_work and op->processor must be
-     initialised before submitting the operation:
+     initialised before submitting the operation::
 
 		fscache_operation_init_slow(op, processor)
 
@@ -132,8 +131,7 @@ Furthermore, operations may be one of two types:
      operations running at the same time.
 
 
-=========
-PROCEDURE
+Procedure
 =========
 
 Operations are used through the following procedure:
@@ -143,7 +141,7 @@ Operations are used through the following procedure:
      generic op embedded within.
 
  (2) The submitting thread must then submit the operation for processing using
-     one of the following two functions:
+     one of the following two functions::
 
 	int fscache_submit_op(struct fscache_object *object,
 			      struct fscache_operation *op);
@@ -164,7 +162,7 @@ Operations are used through the following procedure:
      operation of conflicting exclusivity is in progress on the object.
 
      If the operation is asynchronous, the manager will retain a reference to
-     it, so the caller should put their reference to it by passing it to:
+     it, so the caller should put their reference to it by passing it to::
 
 	void fscache_put_operation(struct fscache_operation *op);
 
@@ -179,12 +177,12 @@ Operations are used through the following procedure:
  (4) The operation holds an effective lock upon the object, preventing other
      exclusive ops conflicting until it is released.  The operation can be
      enqueued for further immediate asynchronous processing by adjusting the
-     CPU time provisioning option if necessary, eg:
+     CPU time provisioning option if necessary, eg::
 
 	op->flags &= ~FSCACHE_OP_TYPE;
 	op->flags |= ~FSCACHE_OP_FAST;
 
-     and calling:
+     and calling::
 
 	void fscache_enqueue_operation(struct fscache_operation *op)
 
@@ -192,13 +190,12 @@ Operations are used through the following procedure:
      pools.
 
 
-=====================
-ASYNCHRONOUS CALLBACK
+Asynchronous Callback
 =====================
 
 When used in asynchronous mode, the worker thread pool will invoke the
 processor method with a pointer to the operation.  This should then get at the
-container struct by using container_of():
+container struct by using container_of()::
 
 	static void fscache_write_op(struct fscache_operation *_op)
 	{
diff --git a/Documentation/filesystems/cifs/cifsroot.txt b/Documentation/filesystems/cifs/cifsroot.rst
index 947b7ec6ce9e..4930bb443134 100644
--- a/Documentation/filesystems/cifs/cifsroot.txt
+++ b/Documentation/filesystems/cifs/cifsroot.rst
@@ -1,7 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
 Mounting root file system via SMB (cifs.ko)
 ===========================================
 
 Written 2019 by Paulo Alcantara <palcantara@suse.de>
+
 Written 2019 by Aurelien Aptel <aaptel@suse.com>
 
 The CONFIG_CIFS_ROOT option enables experimental root file system
@@ -32,7 +36,7 @@ Server configuration
 ====================
 
 To enable SMB1+UNIX extensions you will need to set these global
-settings in Samba smb.conf:
+settings in Samba smb.conf::
 
     [global]
     server min protocol = NT1
@@ -41,12 +45,16 @@ settings in Samba smb.conf:
 Kernel command line
 ===================
 
-root=/dev/cifs
+::
+
+    root=/dev/cifs
 
 This is just a virtual device that basically tells the kernel to mount
 the root file system via SMB protocol.
 
-cifsroot=//<server-ip>/<share>[,options]
+::
+
+    cifsroot=//<server-ip>/<share>[,options]
 
 Enables the kernel to mount the root file system via SMB that are
 located in the <server-ip> and <share> specified in this option.
@@ -65,33 +73,33 @@ options
 Examples
 ========
 
-Export root file system as a Samba share in smb.conf file.
+Export root file system as a Samba share in smb.conf file::
 
-...
-[linux]
-	path = /path/to/rootfs
-	read only = no
-	guest ok = yes
-	force user = root
-	force group = root
-	browseable = yes
-	writeable = yes
-	admin users = root
-	public = yes
-	create mask = 0777
-	directory mask = 0777
-...
+    ...
+    [linux]
+	    path = /path/to/rootfs
+	    read only = no
+	    guest ok = yes
+	    force user = root
+	    force group = root
+	    browseable = yes
+	    writeable = yes
+	    admin users = root
+	    public = yes
+	    create mask = 0777
+	    directory mask = 0777
+    ...
 
-Restart smb service.
+Restart smb service::
 
-# systemctl restart smb
+    # systemctl restart smb
 
 Test it under QEMU on a kernel built with CONFIG_CIFS_ROOT and
-CONFIG_IP_PNP options enabled.
+CONFIG_IP_PNP options enabled::
 
-# qemu-system-x86_64 -enable-kvm -cpu host -m 1024 \
-  -kernel /path/to/linux/arch/x86/boot/bzImage -nographic \
-  -append "root=/dev/cifs rw ip=dhcp cifsroot=//10.0.2.2/linux,username=foo,password=bar console=ttyS0 3"
+    # qemu-system-x86_64 -enable-kvm -cpu host -m 1024 \
+    -kernel /path/to/linux/arch/x86/boot/bzImage -nographic \
+    -append "root=/dev/cifs rw ip=dhcp cifsroot=//10.0.2.2/linux,username=foo,password=bar console=ttyS0 3"
 
 
 1: https://wiki.samba.org/index.php/UNIX_Extensions
diff --git a/Documentation/filesystems/coda.rst b/Documentation/filesystems/coda.rst
new file mode 100644
index 000000000000..84c860c89887
--- /dev/null
+++ b/Documentation/filesystems/coda.rst
@@ -0,0 +1,1670 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Coda Kernel-Venus Interface
+===========================
+
+.. Note::
+
+   This is one of the technical documents describing a component of
+   Coda -- this document describes the client kernel-Venus interface.
+
+For more information:
+
+  http://www.coda.cs.cmu.edu
+
+For user level software needed to run Coda:
+
+  ftp://ftp.coda.cs.cmu.edu
+
+To run Coda you need to get a user level cache manager for the client,
+named Venus, as well as tools to manipulate ACLs, to log in, etc.  The
+client needs to have the Coda filesystem selected in the kernel
+configuration.
+
+The server needs a user level server and at present does not depend on
+kernel support.
+
+  The Venus kernel interface
+
+  Peter J. Braam
+
+  v1.0, Nov 9, 1997
+
+  This document describes the communication between Venus and kernel
+  level filesystem code needed for the operation of the Coda file sys-
+  tem.  This document version is meant to describe the current interface
+  (version 1.0) as well as improvements we envisage.
+
+.. Table of Contents
+
+  1. Introduction
+
+  2. Servicing Coda filesystem calls
+
+  3. The message layer
+
+     3.1 Implementation details
+
+  4. The interface at the call level
+
+     4.1 Data structures shared by the kernel and Venus
+     4.2 The pioctl interface
+     4.3 root
+     4.4 lookup
+     4.5 getattr
+     4.6 setattr
+     4.7 access
+     4.8 create
+     4.9 mkdir
+     4.10 link
+     4.11 symlink
+     4.12 remove
+     4.13 rmdir
+     4.14 readlink
+     4.15 open
+     4.16 close
+     4.17 ioctl
+     4.18 rename
+     4.19 readdir
+     4.20 vget
+     4.21 fsync
+     4.22 inactive
+     4.23 rdwr
+     4.24 odymount
+     4.25 ody_lookup
+     4.26 ody_expand
+     4.27 prefetch
+     4.28 signal
+
+  5. The minicache and downcalls
+
+     5.1 INVALIDATE
+     5.2 FLUSH
+     5.3 PURGEUSER
+     5.4 ZAPFILE
+     5.5 ZAPDIR
+     5.6 ZAPVNODE
+     5.7 PURGEFID
+     5.8 REPLACE
+
+  6. Initialization and cleanup
+
+     6.1 Requirements
+
+1. Introduction
+===============
+
+  A key component in the Coda Distributed File System is the cache
+  manager, Venus.
+
+  When processes on a Coda enabled system access files in the Coda
+  filesystem, requests are directed at the filesystem layer in the
+  operating system. The operating system will communicate with Venus to
+  service the request for the process.  Venus manages a persistent
+  client cache and makes remote procedure calls to Coda file servers and
+  related servers (such as authentication servers) to service these
+  requests it receives from the operating system.  When Venus has
+  serviced a request it replies to the operating system with appropriate
+  return codes, and other data related to the request.  Optionally the
+  kernel support for Coda may maintain a minicache of recently processed
+  requests to limit the number of interactions with Venus.  Venus
+  possesses the facility to inform the kernel when elements from its
+  minicache are no longer valid.
+
+  This document describes precisely this communication between the
+  kernel and Venus.  The definitions of so called upcalls and downcalls
+  will be given with the format of the data they handle. We shall also
+  describe the semantic invariants resulting from the calls.
+
+  Historically Coda was implemented in a BSD file system in Mach 2.6.
+  The interface between the kernel and Venus is very similar to the BSD
+  VFS interface.  Similar functionality is provided, and the format of
+  the parameters and returned data is very similar to the BSD VFS.  This
+  leads to an almost natural environment for implementing a kernel-level
+  filesystem driver for Coda in a BSD system.  However, other operating
+  systems such as Linux and Windows 95 and NT have virtual filesystem
+  with different interfaces.
+
+  To implement Coda on these systems some reverse engineering of the
+  Venus/Kernel protocol is necessary.  Also it came to light that other
+  systems could profit significantly from certain small optimizations
+  and modifications to the protocol. To facilitate this work as well as
+  to make future ports easier, communication between Venus and the
+  kernel should be documented in great detail.  This is the aim of this
+  document.
+
+2.  Servicing Coda filesystem calls
+===================================
+
+  The service of a request for a Coda file system service originates in
+  a process P which accessing a Coda file. It makes a system call which
+  traps to the OS kernel. Examples of such calls trapping to the kernel
+  are ``read``, ``write``, ``open``, ``close``, ``create``, ``mkdir``,
+  ``rmdir``, ``chmod`` in a Unix ontext.  Similar calls exist in the Win32
+  environment, and are named ``CreateFile``.
+
+  Generally the operating system handles the request in a virtual
+  filesystem (VFS) layer, which is named I/O Manager in NT and IFS
+  manager in Windows 95.  The VFS is responsible for partial processing
+  of the request and for locating the specific filesystem(s) which will
+  service parts of the request.  Usually the information in the path
+  assists in locating the correct FS drivers.  Sometimes after extensive
+  pre-processing, the VFS starts invoking exported routines in the FS
+  driver.  This is the point where the FS specific processing of the
+  request starts, and here the Coda specific kernel code comes into
+  play.
+
+  The FS layer for Coda must expose and implement several interfaces.
+  First and foremost the VFS must be able to make all necessary calls to
+  the Coda FS layer, so the Coda FS driver must expose the VFS interface
+  as applicable in the operating system. These differ very significantly
+  among operating systems, but share features such as facilities to
+  read/write and create and remove objects.  The Coda FS layer services
+  such VFS requests by invoking one or more well defined services
+  offered by the cache manager Venus.  When the replies from Venus have
+  come back to the FS driver, servicing of the VFS call continues and
+  finishes with a reply to the kernel's VFS. Finally the VFS layer
+  returns to the process.
+
+  As a result of this design a basic interface exposed by the FS driver
+  must allow Venus to manage message traffic.  In particular Venus must
+  be able to retrieve and place messages and to be notified of the
+  arrival of a new message. The notification must be through a mechanism
+  which does not block Venus since Venus must attend to other tasks even
+  when no messages are waiting or being processed.
+
+  **Interfaces of the Coda FS Driver**
+
+  Furthermore the FS layer provides for a special path of communication
+  between a user process and Venus, called the pioctl interface. The
+  pioctl interface is used for Coda specific services, such as
+  requesting detailed information about the persistent cache managed by
+  Venus. Here the involvement of the kernel is minimal.  It identifies
+  the calling process and passes the information on to Venus.  When
+  Venus replies the response is passed back to the caller in unmodified
+  form.
+
+  Finally Venus allows the kernel FS driver to cache the results from
+  certain services.  This is done to avoid excessive context switches
+  and results in an efficient system.  However, Venus may acquire
+  information, for example from the network which implies that cached
+  information must be flushed or replaced. Venus then makes a downcall
+  to the Coda FS layer to request flushes or updates in the cache.  The
+  kernel FS driver handles such requests synchronously.
+
+  Among these interfaces the VFS interface and the facility to place,
+  receive and be notified of messages are platform specific.  We will
+  not go into the calls exported to the VFS layer but we will state the
+  requirements of the message exchange mechanism.
+
+
+3.  The message layer
+=====================
+
+  At the lowest level the communication between Venus and the FS driver
+  proceeds through messages.  The synchronization between processes
+  requesting Coda file service and Venus relies on blocking and waking
+  up processes.  The Coda FS driver processes VFS- and pioctl-requests
+  on behalf of a process P, creates messages for Venus, awaits replies
+  and finally returns to the caller.  The implementation of the exchange
+  of messages is platform specific, but the semantics have (so far)
+  appeared to be generally applicable.  Data buffers are created by the
+  FS Driver in kernel memory on behalf of P and copied to user memory in
+  Venus.
+
+  The FS Driver while servicing P makes upcalls to Venus.  Such an
+  upcall is dispatched to Venus by creating a message structure.  The
+  structure contains the identification of P, the message sequence
+  number, the size of the request and a pointer to the data in kernel
+  memory for the request.  Since the data buffer is re-used to hold the
+  reply from Venus, there is a field for the size of the reply.  A flags
+  field is used in the message to precisely record the status of the
+  message.  Additional platform dependent structures involve pointers to
+  determine the position of the message on queues and pointers to
+  synchronization objects.  In the upcall routine the message structure
+  is filled in, flags are set to 0, and it is placed on the *pending*
+  queue.  The routine calling upcall is responsible for allocating the
+  data buffer; its structure will be described in the next section.
+
+  A facility must exist to notify Venus that the message has been
+  created, and implemented using available synchronization objects in
+  the OS. This notification is done in the upcall context of the process
+  P. When the message is on the pending queue, process P cannot proceed
+  in upcall.  The (kernel mode) processing of P in the filesystem
+  request routine must be suspended until Venus has replied.  Therefore
+  the calling thread in P is blocked in upcall.  A pointer in the
+  message structure will locate the synchronization object on which P is
+  sleeping.
+
+  Venus detects the notification that a message has arrived, and the FS
+  driver allow Venus to retrieve the message with a getmsg_from_kernel
+  call. This action finishes in the kernel by putting the message on the
+  queue of processing messages and setting flags to READ.  Venus is
+  passed the contents of the data buffer. The getmsg_from_kernel call
+  now returns and Venus processes the request.
+
+  At some later point the FS driver receives a message from Venus,
+  namely when Venus calls sendmsg_to_kernel.  At this moment the Coda FS
+  driver looks at the contents of the message and decides if:
+
+
+  *  the message is a reply for a suspended thread P.  If so it removes
+     the message from the processing queue and marks the message as
+     WRITTEN.  Finally, the FS driver unblocks P (still in the kernel
+     mode context of Venus) and the sendmsg_to_kernel call returns to
+     Venus.  The process P will be scheduled at some point and continues
+     processing its upcall with the data buffer replaced with the reply
+     from Venus.
+
+  *  The message is a ``downcall``.  A downcall is a request from Venus to
+     the FS Driver. The FS driver processes the request immediately
+     (usually a cache eviction or replacement) and when it finishes
+     sendmsg_to_kernel returns.
+
+  Now P awakes and continues processing upcall.  There are some
+  subtleties to take account of. First P will determine if it was woken
+  up in upcall by a signal from some other source (for example an
+  attempt to terminate P) or as is normally the case by Venus in its
+  sendmsg_to_kernel call.  In the normal case, the upcall routine will
+  deallocate the message structure and return.  The FS routine can proceed
+  with its processing.
+
+
+  **Sleeping and IPC arrangements**
+
+  In case P is woken up by a signal and not by Venus, it will first look
+  at the flags field.  If the message is not yet READ, the process P can
+  handle its signal without notifying Venus.  If Venus has READ, and
+  the request should not be processed, P can send Venus a signal message
+  to indicate that it should disregard the previous message.  Such
+  signals are put in the queue at the head, and read first by Venus.  If
+  the message is already marked as WRITTEN it is too late to stop the
+  processing.  The VFS routine will now continue.  (-- If a VFS request
+  involves more than one upcall, this can lead to complicated state, an
+  extra field "handle_signals" could be added in the message structure
+  to indicate points of no return have been passed.--)
+
+
+
+3.1.  Implementation details
+----------------------------
+
+  The Unix implementation of this mechanism has been through the
+  implementation of a character device associated with Coda.  Venus
+  retrieves messages by doing a read on the device, replies are sent
+  with a write and notification is through the select system call on the
+  file descriptor for the device.  The process P is kept waiting on an
+  interruptible wait queue object.
+
+  In Windows NT and the DPMI Windows 95 implementation a DeviceIoControl
+  call is used.  The DeviceIoControl call is designed to copy buffers
+  from user memory to kernel memory with OPCODES. The sendmsg_to_kernel
+  is issued as a synchronous call, while the getmsg_from_kernel call is
+  asynchronous.  Windows EventObjects are used for notification of
+  message arrival.  The process P is kept waiting on a KernelEvent
+  object in NT and a semaphore in Windows 95.
+
+
+4.  The interface at the call level
+===================================
+
+
+  This section describes the upcalls a Coda FS driver can make to Venus.
+  Each of these upcalls make use of two structures: inputArgs and
+  outputArgs.   In pseudo BNF form the structures take the following
+  form::
+
+
+	struct inputArgs {
+	    u_long opcode;
+	    u_long unique;     /* Keep multiple outstanding msgs distinct */
+	    u_short pid;                 /* Common to all */
+	    u_short pgid;                /* Common to all */
+	    struct CodaCred cred;        /* Common to all */
+
+	    <union "in" of call dependent parts of inputArgs>
+	};
+
+	struct outputArgs {
+	    u_long opcode;
+	    u_long unique;       /* Keep multiple outstanding msgs distinct */
+	    u_long result;
+
+	    <union "out" of call dependent parts of inputArgs>
+	};
+
+
+
+  Before going on let us elucidate the role of the various fields. The
+  inputArgs start with the opcode which defines the type of service
+  requested from Venus. There are approximately 30 upcalls at present
+  which we will discuss.   The unique field labels the inputArg with a
+  unique number which will identify the message uniquely.  A process and
+  process group id are passed.  Finally the credentials of the caller
+  are included.
+
+  Before delving into the specific calls we need to discuss a variety of
+  data structures shared by the kernel and Venus.
+
+
+
+
+4.1.  Data structures shared by the kernel and Venus
+----------------------------------------------------
+
+
+  The CodaCred structure defines a variety of user and group ids as
+  they are set for the calling process. The vuid_t and vgid_t are 32 bit
+  unsigned integers.  It also defines group membership in an array.  On
+  Unix the CodaCred has proven sufficient to implement good security
+  semantics for Coda but the structure may have to undergo modification
+  for the Windows environment when these mature::
+
+	struct CodaCred {
+	    vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, effective, set, fs uid */
+	    vgid_t cr_gid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
+	    vgid_t cr_groups[NGROUPS];        /* Group membership for caller */
+	};
+
+
+  .. Note::
+
+     It is questionable if we need CodaCreds in Venus. Finally Venus
+     doesn't know about groups, although it does create files with the
+     default uid/gid.  Perhaps the list of group membership is superfluous.
+
+
+  The next item is the fundamental identifier used to identify Coda
+  files, the ViceFid.  A fid of a file uniquely defines a file or
+  directory in the Coda filesystem within a cell [1]_::
+
+	typedef struct ViceFid {
+	    VolumeId Volume;
+	    VnodeId Vnode;
+	    Unique_t Unique;
+	} ViceFid;
+
+  .. [1] A cell is agroup of Coda servers acting under the aegis of a single
+	 system control machine or SCM. See the Coda Administration manual
+	 for a detailed description of the role of the SCM.
+
+  Each of the constituent fields: VolumeId, VnodeId and Unique_t are
+  unsigned 32 bit integers.  We envisage that a further field will need
+  to be prefixed to identify the Coda cell; this will probably take the
+  form of a Ipv6 size IP address naming the Coda cell through DNS.
+
+  The next important structure shared between Venus and the kernel is
+  the attributes of the file.  The following structure is used to
+  exchange information.  It has room for future extensions such as
+  support for device files (currently not present in Coda)::
+
+
+	struct coda_timespec {
+		int64_t         tv_sec;         /* seconds */
+		long            tv_nsec;        /* nanoseconds */
+	};
+
+	struct coda_vattr {
+		enum coda_vtype va_type;        /* vnode type (for create) */
+		u_short         va_mode;        /* files access mode and type */
+		short           va_nlink;       /* number of references to file */
+		vuid_t          va_uid;         /* owner user id */
+		vgid_t          va_gid;         /* owner group id */
+		long            va_fsid;        /* file system id (dev for now) */
+		long            va_fileid;      /* file id */
+		u_quad_t        va_size;        /* file size in bytes */
+		long            va_blocksize;   /* blocksize preferred for i/o */
+		struct coda_timespec va_atime;  /* time of last access */
+		struct coda_timespec va_mtime;  /* time of last modification */
+		struct coda_timespec va_ctime;  /* time file changed */
+		u_long          va_gen;         /* generation number of file */
+		u_long          va_flags;       /* flags defined for file */
+		dev_t           va_rdev;        /* device special file represents */
+		u_quad_t        va_bytes;       /* bytes of disk space held by file */
+		u_quad_t        va_filerev;     /* file modification number */
+		u_int           va_vaflags;     /* operations flags, see below */
+		long            va_spare;       /* remain quad aligned */
+	};
+
+
+4.2.  The pioctl interface
+--------------------------
+
+
+  Coda specific requests can be made by application through the pioctl
+  interface. The pioctl is implemented as an ordinary ioctl on a
+  fictitious file /coda/.CONTROL.  The pioctl call opens this file, gets
+  a file handle and makes the ioctl call. Finally it closes the file.
+
+  The kernel involvement in this is limited to providing the facility to
+  open and close and pass the ioctl message and to verify that a path in
+  the pioctl data buffers is a file in a Coda filesystem.
+
+  The kernel is handed a data packet of the form::
+
+	struct {
+	    const char *path;
+	    struct ViceIoctl vidata;
+	    int follow;
+	} data;
+
+
+
+  where::
+
+
+	struct ViceIoctl {
+		caddr_t in, out;        /* Data to be transferred in, or out */
+		short in_size;          /* Size of input buffer <= 2K */
+		short out_size;         /* Maximum size of output buffer, <= 2K */
+	};
+
+
+
+  The path must be a Coda file, otherwise the ioctl upcall will not be
+  made.
+
+  .. Note:: The data structures and code are a mess.  We need to clean this up.
+
+
+**We now proceed to document the individual calls**:
+
+
+4.3.  root
+----------
+
+
+  Arguments
+     in
+
+	empty
+
+     out::
+
+		struct cfs_root_out {
+		    ViceFid VFid;
+		} cfs_root;
+
+
+
+  Description
+    This call is made to Venus during the initialization of
+    the Coda filesystem. If the result is zero, the cfs_root structure
+    contains the ViceFid of the root of the Coda filesystem. If a non-zero
+    result is generated, its value is a platform dependent error code
+    indicating the difficulty Venus encountered in locating the root of
+    the Coda filesystem.
+
+4.4.  lookup
+------------
+
+
+  Summary
+    Find the ViceFid and type of an object in a directory if it exists.
+
+  Arguments
+     in::
+
+		struct  cfs_lookup_in {
+		    ViceFid     VFid;
+		    char        *name;          /* Place holder for data. */
+		} cfs_lookup;
+
+
+
+     out::
+
+		struct cfs_lookup_out {
+		    ViceFid VFid;
+		    int vtype;
+		} cfs_lookup;
+
+
+
+  Description
+    This call is made to determine the ViceFid and filetype of
+    a directory entry.  The directory entry requested carries name name
+    and Venus will search the directory identified by cfs_lookup_in.VFid.
+    The result may indicate that the name does not exist, or that
+    difficulty was encountered in finding it (e.g. due to disconnection).
+    If the result is zero, the field cfs_lookup_out.VFid contains the
+    targets ViceFid and cfs_lookup_out.vtype the coda_vtype giving the
+    type of object the name designates.
+
+  The name of the object is an 8 bit character string of maximum length
+  CFS_MAXNAMLEN, currently set to 256 (including a 0 terminator.)
+
+  It is extremely important to realize that Venus bitwise ors the field
+  cfs_lookup.vtype with CFS_NOCACHE to indicate that the object should
+  not be put in the kernel name cache.
+
+  .. Note::
+
+     The type of the vtype is currently wrong.  It should be
+     coda_vtype. Linux does not take note of CFS_NOCACHE.  It should.
+
+
+4.5.  getattr
+-------------
+
+
+  Summary Get the attributes of a file.
+
+  Arguments
+     in::
+
+		struct cfs_getattr_in {
+		    ViceFid VFid;
+		    struct coda_vattr attr; /* XXXXX */
+		} cfs_getattr;
+
+
+
+     out::
+
+		struct cfs_getattr_out {
+		    struct coda_vattr attr;
+		} cfs_getattr;
+
+
+
+  Description
+    This call returns the attributes of the file identified by fid.
+
+  Errors
+    Errors can occur if the object with fid does not exist, is
+    unaccessible or if the caller does not have permission to fetch
+    attributes.
+
+  .. Note::
+
+     Many kernel FS drivers (Linux, NT and Windows 95) need to acquire
+     the attributes as well as the Fid for the instantiation of an internal
+     "inode" or "FileHandle".  A significant improvement in performance on
+     such systems could be made by combining the lookup and getattr calls
+     both at the Venus/kernel interaction level and at the RPC level.
+
+  The vattr structure included in the input arguments is superfluous and
+  should be removed.
+
+
+4.6.  setattr
+-------------
+
+
+  Summary
+    Set the attributes of a file.
+
+  Arguments
+     in::
+
+		struct cfs_setattr_in {
+		    ViceFid VFid;
+		    struct coda_vattr attr;
+		} cfs_setattr;
+
+
+
+
+     out
+
+	empty
+
+  Description
+    The structure attr is filled with attributes to be changed
+    in BSD style.  Attributes not to be changed are set to -1, apart from
+    vtype which is set to VNON. Other are set to the value to be assigned.
+    The only attributes which the FS driver may request to change are the
+    mode, owner, groupid, atime, mtime and ctime.  The return value
+    indicates success or failure.
+
+  Errors
+    A variety of errors can occur.  The object may not exist, may
+    be inaccessible, or permission may not be granted by Venus.
+
+
+4.7.  access
+------------
+
+
+  Arguments
+     in::
+
+		struct cfs_access_in {
+		    ViceFid     VFid;
+		    int flags;
+		} cfs_access;
+
+
+
+     out
+
+	empty
+
+  Description
+    Verify if access to the object identified by VFid for
+    operations described by flags is permitted.  The result indicates if
+    access will be granted.  It is important to remember that Coda uses
+    ACLs to enforce protection and that ultimately the servers, not the
+    clients enforce the security of the system.  The result of this call
+    will depend on whether a token is held by the user.
+
+  Errors
+    The object may not exist, or the ACL describing the protection
+    may not be accessible.
+
+
+4.8.  create
+------------
+
+
+  Summary
+    Invoked to create a file
+
+  Arguments
+     in::
+
+		struct cfs_create_in {
+		    ViceFid VFid;
+		    struct coda_vattr attr;
+		    int excl;
+		    int mode;
+		    char        *name;          /* Place holder for data. */
+		} cfs_create;
+
+
+
+
+     out::
+
+		struct cfs_create_out {
+		    ViceFid VFid;
+		    struct coda_vattr attr;
+		} cfs_create;
+
+
+
+  Description
+    This upcall is invoked to request creation of a file.
+    The file will be created in the directory identified by VFid, its name
+    will be name, and the mode will be mode.  If excl is set an error will
+    be returned if the file already exists.  If the size field in attr is
+    set to zero the file will be truncated.  The uid and gid of the file
+    are set by converting the CodaCred to a uid using a macro CRTOUID
+    (this macro is platform dependent).  Upon success the VFid and
+    attributes of the file are returned.  The Coda FS Driver will normally
+    instantiate a vnode, inode or file handle at kernel level for the new
+    object.
+
+
+  Errors
+    A variety of errors can occur. Permissions may be insufficient.
+    If the object exists and is not a file the error EISDIR is returned
+    under Unix.
+
+  .. Note::
+
+     The packing of parameters is very inefficient and appears to
+     indicate confusion between the system call creat and the VFS operation
+     create. The VFS operation create is only called to create new objects.
+     This create call differs from the Unix one in that it is not invoked
+     to return a file descriptor. The truncate and exclusive options,
+     together with the mode, could simply be part of the mode as it is
+     under Unix.  There should be no flags argument; this is used in open
+     (2) to return a file descriptor for READ or WRITE mode.
+
+  The attributes of the directory should be returned too, since the size
+  and mtime changed.
+
+
+4.9.  mkdir
+-----------
+
+
+  Summary
+    Create a new directory.
+
+  Arguments
+     in::
+
+		struct cfs_mkdir_in {
+		    ViceFid     VFid;
+		    struct coda_vattr attr;
+		    char        *name;          /* Place holder for data. */
+		} cfs_mkdir;
+
+
+
+     out::
+
+		struct cfs_mkdir_out {
+		    ViceFid VFid;
+		    struct coda_vattr attr;
+		} cfs_mkdir;
+
+
+
+
+  Description
+    This call is similar to create but creates a directory.
+    Only the mode field in the input parameters is used for creation.
+    Upon successful creation, the attr returned contains the attributes of
+    the new directory.
+
+  Errors
+    As for create.
+
+  .. Note::
+
+     The input parameter should be changed to mode instead of
+     attributes.
+
+  The attributes of the parent should be returned since the size and
+  mtime changes.
+
+
+4.10.  link
+-----------
+
+
+  Summary
+    Create a link to an existing file.
+
+  Arguments
+     in::
+
+		struct cfs_link_in {
+		    ViceFid sourceFid;          /* cnode to link *to* */
+		    ViceFid destFid;            /* Directory in which to place link */
+		    char        *tname;         /* Place holder for data. */
+		} cfs_link;
+
+
+
+     out
+
+	empty
+
+  Description
+    This call creates a link to the sourceFid in the directory
+    identified by destFid with name tname.  The source must reside in the
+    target's parent, i.e. the source must be have parent destFid, i.e. Coda
+    does not support cross directory hard links.  Only the return value is
+    relevant.  It indicates success or the type of failure.
+
+  Errors
+    The usual errors can occur.
+
+
+4.11.  symlink
+--------------
+
+
+  Summary
+    create a symbolic link
+
+  Arguments
+     in::
+
+		struct cfs_symlink_in {
+		    ViceFid     VFid;          /* Directory to put symlink in */
+		    char        *srcname;
+		    struct coda_vattr attr;
+		    char        *tname;
+		} cfs_symlink;
+
+
+
+     out
+
+	none
+
+  Description
+    Create a symbolic link. The link is to be placed in the
+    directory identified by VFid and named tname.  It should point to the
+    pathname srcname.  The attributes of the newly created object are to
+    be set to attr.
+
+  .. Note::
+
+     The attributes of the target directory should be returned since
+     its size changed.
+
+
+4.12.  remove
+-------------
+
+
+  Summary
+    Remove a file
+
+  Arguments
+     in::
+
+		struct cfs_remove_in {
+		    ViceFid     VFid;
+		    char        *name;          /* Place holder for data. */
+		} cfs_remove;
+
+
+
+     out
+
+	none
+
+  Description
+    Remove file named cfs_remove_in.name in directory
+    identified by   VFid.
+
+
+  .. Note::
+
+     The attributes of the directory should be returned since its
+     mtime and size may change.
+
+
+4.13.  rmdir
+------------
+
+
+  Summary
+    Remove a directory
+
+  Arguments
+     in::
+
+		struct cfs_rmdir_in {
+		    ViceFid     VFid;
+		    char        *name;          /* Place holder for data. */
+		} cfs_rmdir;
+
+
+
+     out
+
+	none
+
+  Description
+    Remove the directory with name name from the directory
+    identified by VFid.
+
+  .. Note:: The attributes of the parent directory should be returned since
+	    its mtime and size may change.
+
+
+4.14.  readlink
+---------------
+
+
+  Summary
+    Read the value of a symbolic link.
+
+  Arguments
+     in::
+
+		struct cfs_readlink_in {
+		    ViceFid VFid;
+		} cfs_readlink;
+
+
+
+     out::
+
+		struct cfs_readlink_out {
+		    int count;
+		    caddr_t     data;           /* Place holder for data. */
+		} cfs_readlink;
+
+
+
+  Description
+    This routine reads the contents of symbolic link
+    identified by VFid into the buffer data.  The buffer data must be able
+    to hold any name up to CFS_MAXNAMLEN (PATH or NAM??).
+
+  Errors
+    No unusual errors.
+
+
+4.15.  open
+-----------
+
+
+  Summary
+    Open a file.
+
+  Arguments
+     in::
+
+		struct cfs_open_in {
+		    ViceFid     VFid;
+		    int flags;
+		} cfs_open;
+
+
+
+     out::
+
+		struct cfs_open_out {
+		    dev_t       dev;
+		    ino_t       inode;
+		} cfs_open;
+
+
+
+  Description
+    This request asks Venus to place the file identified by
+    VFid in its cache and to note that the calling process wishes to open
+    it with flags as in open(2).  The return value to the kernel differs
+    for Unix and Windows systems.  For Unix systems the Coda FS Driver is
+    informed of the device and inode number of the container file in the
+    fields dev and inode.  For Windows the path of the container file is
+    returned to the kernel.
+
+
+  .. Note::
+
+     Currently the cfs_open_out structure is not properly adapted to
+     deal with the Windows case.  It might be best to implement two
+     upcalls, one to open aiming at a container file name, the other at a
+     container file inode.
+
+
+4.16.  close
+------------
+
+
+  Summary
+    Close a file, update it on the servers.
+
+  Arguments
+     in::
+
+		struct cfs_close_in {
+		    ViceFid     VFid;
+		    int flags;
+		} cfs_close;
+
+
+
+     out
+
+	none
+
+  Description
+    Close the file identified by VFid.
+
+  .. Note::
+
+     The flags argument is bogus and not used.  However, Venus' code
+     has room to deal with an execp input field, probably this field should
+     be used to inform Venus that the file was closed but is still memory
+     mapped for execution.  There are comments about fetching versus not
+     fetching the data in Venus vproc_vfscalls.  This seems silly.  If a
+     file is being closed, the data in the container file is to be the new
+     data.  Here again the execp flag might be in play to create confusion:
+     currently Venus might think a file can be flushed from the cache when
+     it is still memory mapped.  This needs to be understood.
+
+
+4.17.  ioctl
+------------
+
+
+  Summary
+    Do an ioctl on a file. This includes the pioctl interface.
+
+  Arguments
+     in::
+
+		struct cfs_ioctl_in {
+		    ViceFid VFid;
+		    int cmd;
+		    int len;
+		    int rwflag;
+		    char *data;                 /* Place holder for data. */
+		} cfs_ioctl;
+
+
+
+     out::
+
+
+		struct cfs_ioctl_out {
+		    int len;
+		    caddr_t     data;           /* Place holder for data. */
+		} cfs_ioctl;
+
+
+
+  Description
+    Do an ioctl operation on a file.  The command, len and
+    data arguments are filled as usual.  flags is not used by Venus.
+
+  .. Note::
+
+     Another bogus parameter.  flags is not used.  What is the
+     business about PREFETCHING in the Venus code?
+
+
+
+4.18.  rename
+-------------
+
+
+  Summary
+    Rename a fid.
+
+  Arguments
+     in::
+
+		struct cfs_rename_in {
+		    ViceFid     sourceFid;
+		    char        *srcname;
+		    ViceFid destFid;
+		    char        *destname;
+		} cfs_rename;
+
+
+
+     out
+
+	none
+
+  Description
+    Rename the object with name srcname in directory
+    sourceFid to destname in destFid.   It is important that the names
+    srcname and destname are 0 terminated strings.  Strings in Unix
+    kernels are not always null terminated.
+
+
+4.19.  readdir
+--------------
+
+
+  Summary
+    Read directory entries.
+
+  Arguments
+     in::
+
+		struct cfs_readdir_in {
+		    ViceFid     VFid;
+		    int count;
+		    int offset;
+		} cfs_readdir;
+
+
+
+
+     out::
+
+		struct cfs_readdir_out {
+		    int size;
+		    caddr_t     data;           /* Place holder for data. */
+		} cfs_readdir;
+
+
+
+  Description
+    Read directory entries from VFid starting at offset and
+    read at most count bytes.  Returns the data in data and returns
+    the size in size.
+
+
+  .. Note::
+
+     This call is not used.  Readdir operations exploit container
+     files.  We will re-evaluate this during the directory revamp which is
+     about to take place.
+
+
+4.20.  vget
+-----------
+
+
+  Summary
+    instructs Venus to do an FSDB->Get.
+
+  Arguments
+     in::
+
+		struct cfs_vget_in {
+		    ViceFid VFid;
+		} cfs_vget;
+
+
+
+     out::
+
+		struct cfs_vget_out {
+		    ViceFid VFid;
+		    int vtype;
+		} cfs_vget;
+
+
+
+  Description
+    This upcall asks Venus to do a get operation on an fsobj
+    labelled by VFid.
+
+  .. Note::
+
+     This operation is not used.  However, it is extremely useful
+     since it can be used to deal with read/write memory mapped files.
+     These can be "pinned" in the Venus cache using vget and released with
+     inactive.
+
+
+4.21.  fsync
+------------
+
+
+  Summary
+    Tell Venus to update the RVM attributes of a file.
+
+  Arguments
+     in::
+
+		struct cfs_fsync_in {
+		    ViceFid VFid;
+		} cfs_fsync;
+
+
+
+     out
+
+	none
+
+  Description
+    Ask Venus to update RVM attributes of object VFid. This
+    should be called as part of kernel level fsync type calls.  The
+    result indicates if the syncing was successful.
+
+  .. Note:: Linux does not implement this call. It should.
+
+
+4.22.  inactive
+---------------
+
+
+  Summary
+    Tell Venus a vnode is no longer in use.
+
+  Arguments
+     in::
+
+		struct cfs_inactive_in {
+		    ViceFid VFid;
+		} cfs_inactive;
+
+
+
+     out
+
+	none
+
+  Description
+    This operation returns EOPNOTSUPP.
+
+  .. Note:: This should perhaps be removed.
+
+
+4.23.  rdwr
+-----------
+
+
+  Summary
+    Read or write from a file
+
+  Arguments
+     in::
+
+		struct cfs_rdwr_in {
+		    ViceFid     VFid;
+		    int rwflag;
+		    int count;
+		    int offset;
+		    int ioflag;
+		    caddr_t     data;           /* Place holder for data. */
+		} cfs_rdwr;
+
+
+
+
+     out::
+
+		struct cfs_rdwr_out {
+		    int rwflag;
+		    int count;
+		    caddr_t     data;   /* Place holder for data. */
+		} cfs_rdwr;
+
+
+
+  Description
+    This upcall asks Venus to read or write from a file.
+
+
+  .. Note::
+
+    It should be removed since it is against the Coda philosophy that
+    read/write operations never reach Venus.  I have been told the
+    operation does not work.  It is not currently used.
+
+
+
+4.24.  odymount
+---------------
+
+
+  Summary
+    Allows mounting multiple Coda "filesystems" on one Unix mount point.
+
+  Arguments
+     in::
+
+		struct ody_mount_in {
+		    char        *name;          /* Place holder for data. */
+		} ody_mount;
+
+
+
+     out::
+
+		struct ody_mount_out {
+		    ViceFid VFid;
+		} ody_mount;
+
+
+
+  Description
+    Asks Venus to return the rootfid of a Coda system named
+    name.  The fid is returned in VFid.
+
+  .. Note::
+
+     This call was used by David for dynamic sets.  It should be
+     removed since it causes a jungle of pointers in the VFS mounting area.
+     It is not used by Coda proper.  Call is not implemented by Venus.
+
+
+4.25.  ody_lookup
+-----------------
+
+
+  Summary
+    Looks up something.
+
+  Arguments
+     in
+
+	irrelevant
+
+
+     out
+
+	irrelevant
+
+
+  .. Note:: Gut it. Call is not implemented by Venus.
+
+
+4.26.  ody_expand
+-----------------
+
+
+  Summary
+    expands something in a dynamic set.
+
+  Arguments
+     in
+
+	irrelevant
+
+     out
+
+	irrelevant
+
+  .. Note:: Gut it. Call is not implemented by Venus.
+
+
+4.27.  prefetch
+---------------
+
+
+  Summary
+    Prefetch a dynamic set.
+
+  Arguments
+
+     in
+
+	Not documented.
+
+     out
+
+	Not documented.
+
+  Description
+    Venus worker.cc has support for this call, although it is
+    noted that it doesn't work.  Not surprising, since the kernel does not
+    have support for it. (ODY_PREFETCH is not a defined operation).
+
+
+  .. Note:: Gut it. It isn't working and isn't used by Coda.
+
+
+
+4.28.  signal
+-------------
+
+
+  Summary
+    Send Venus a signal about an upcall.
+
+  Arguments
+     in
+
+	none
+
+     out
+
+	not applicable.
+
+  Description
+    This is an out-of-band upcall to Venus to inform Venus
+    that the calling process received a signal after Venus read the
+    message from the input queue.  Venus is supposed to clean up the
+    operation.
+
+  Errors
+    No reply is given.
+
+  .. Note::
+
+     We need to better understand what Venus needs to clean up and if
+     it is doing this correctly.  Also we need to handle multiple upcall
+     per system call situations correctly.  It would be important to know
+     what state changes in Venus take place after an upcall for which the
+     kernel is responsible for notifying Venus to clean up (e.g. open
+     definitely is such a state change, but many others are maybe not).
+
+
+5.  The minicache and downcalls
+===============================
+
+
+  The Coda FS Driver can cache results of lookup and access upcalls, to
+  limit the frequency of upcalls.  Upcalls carry a price since a process
+  context switch needs to take place.  The counterpart of caching the
+  information is that Venus will notify the FS Driver that cached
+  entries must be flushed or renamed.
+
+  The kernel code generally has to maintain a structure which links the
+  internal file handles (called vnodes in BSD, inodes in Linux and
+  FileHandles in Windows) with the ViceFid's which Venus maintains.  The
+  reason is that frequent translations back and forth are needed in
+  order to make upcalls and use the results of upcalls.  Such linking
+  objects are called cnodes.
+
+  The current minicache implementations have cache entries which record
+  the following:
+
+  1. the name of the file
+
+  2. the cnode of the directory containing the object
+
+  3. a list of CodaCred's for which the lookup is permitted.
+
+  4. the cnode of the object
+
+  The lookup call in the Coda FS Driver may request the cnode of the
+  desired object from the cache, by passing its name, directory and the
+  CodaCred's of the caller.  The cache will return the cnode or indicate
+  that it cannot be found.  The Coda FS Driver must be careful to
+  invalidate cache entries when it modifies or removes objects.
+
+  When Venus obtains information that indicates that cache entries are
+  no longer valid, it will make a downcall to the kernel.  Downcalls are
+  intercepted by the Coda FS Driver and lead to cache invalidations of
+  the kind described below.  The Coda FS Driver does not return an error
+  unless the downcall data could not be read into kernel memory.
+
+
+5.1.  INVALIDATE
+----------------
+
+
+  No information is available on this call.
+
+
+5.2.  FLUSH
+-----------
+
+
+
+  Arguments
+    None
+
+  Summary
+    Flush the name cache entirely.
+
+  Description
+    Venus issues this call upon startup and when it dies. This
+    is to prevent stale cache information being held.  Some operating
+    systems allow the kernel name cache to be switched off dynamically.
+    When this is done, this downcall is made.
+
+
+5.3.  PURGEUSER
+---------------
+
+
+  Arguments
+    ::
+
+	  struct cfs_purgeuser_out {/* CFS_PURGEUSER is a venus->kernel call */
+	      struct CodaCred cred;
+	  } cfs_purgeuser;
+
+
+
+  Description
+    Remove all entries in the cache carrying the Cred.  This
+    call is issued when tokens for a user expire or are flushed.
+
+
+5.4.  ZAPFILE
+-------------
+
+
+  Arguments
+    ::
+
+	  struct cfs_zapfile_out {  /* CFS_ZAPFILE is a venus->kernel call */
+	      ViceFid CodaFid;
+	  } cfs_zapfile;
+
+
+
+  Description
+    Remove all entries which have the (dir vnode, name) pair.
+    This is issued as a result of an invalidation of cached attributes of
+    a vnode.
+
+  .. Note::
+
+     Call is not named correctly in NetBSD and Mach.  The minicache
+     zapfile routine takes different arguments. Linux does not implement
+     the invalidation of attributes correctly.
+
+
+
+5.5.  ZAPDIR
+------------
+
+
+  Arguments
+    ::
+
+	  struct cfs_zapdir_out {   /* CFS_ZAPDIR is a venus->kernel call */
+	      ViceFid CodaFid;
+	  } cfs_zapdir;
+
+
+
+  Description
+    Remove all entries in the cache lying in a directory
+    CodaFid, and all children of this directory. This call is issued when
+    Venus receives a callback on the directory.
+
+
+5.6.  ZAPVNODE
+--------------
+
+
+
+  Arguments
+    ::
+
+	  struct cfs_zapvnode_out { /* CFS_ZAPVNODE is a venus->kernel call */
+	      struct CodaCred cred;
+	      ViceFid VFid;
+	  } cfs_zapvnode;
+
+
+
+  Description
+    Remove all entries in the cache carrying the cred and VFid
+    as in the arguments. This downcall is probably never issued.
+
+
+5.7.  PURGEFID
+--------------
+
+
+  Arguments
+    ::
+
+	  struct cfs_purgefid_out { /* CFS_PURGEFID is a venus->kernel call */
+	      ViceFid CodaFid;
+	  } cfs_purgefid;
+
+
+
+  Description
+    Flush the attribute for the file. If it is a dir (odd
+    vnode), purge its children from the namecache and remove the file from the
+    namecache.
+
+
+
+5.8.  REPLACE
+-------------
+
+
+  Summary
+    Replace the Fid's for a collection of names.
+
+  Arguments
+    ::
+
+	  struct cfs_replace_out { /* cfs_replace is a venus->kernel call */
+	      ViceFid NewFid;
+	      ViceFid OldFid;
+	  } cfs_replace;
+
+
+
+  Description
+    This routine replaces a ViceFid in the name cache with
+    another.  It is added to allow Venus during reintegration to replace
+    locally allocated temp fids while disconnected with global fids even
+    when the reference counts on those fids are not zero.
+
+
+6.  Initialization and cleanup
+==============================
+
+
+  This section gives brief hints as to desirable features for the Coda
+  FS Driver at startup and upon shutdown or Venus failures.  Before
+  entering the discussion it is useful to repeat that the Coda FS Driver
+  maintains the following data:
+
+
+  1. message queues
+
+  2. cnodes
+
+  3. name cache entries
+
+     The name cache entries are entirely private to the driver, so they
+     can easily be manipulated.   The message queues will generally have
+     clear points of initialization and destruction.  The cnodes are
+     much more delicate.  User processes hold reference counts in Coda
+     filesystems and it can be difficult to clean up the cnodes.
+
+  It can expect requests through:
+
+  1. the message subsystem
+
+  2. the VFS layer
+
+  3. pioctl interface
+
+     Currently the pioctl passes through the VFS for Coda so we can
+     treat these similarly.
+
+
+6.1.  Requirements
+------------------
+
+
+  The following requirements should be accommodated:
+
+  1. The message queues should have open and close routines.  On Unix
+     the opening of the character devices are such routines.
+
+    -  Before opening, no messages can be placed.
+
+    -  Opening will remove any old messages still pending.
+
+    -  Close will notify any sleeping processes that their upcall cannot
+       be completed.
+
+    -  Close will free all memory allocated by the message queues.
+
+
+  2. At open the namecache shall be initialized to empty state.
+
+  3. Before the message queues are open, all VFS operations will fail.
+     Fortunately this can be achieved by making sure than mounting the
+     Coda filesystem cannot succeed before opening.
+
+  4. After closing of the queues, no VFS operations can succeed.  Here
+     one needs to be careful, since a few operations (lookup,
+     read/write, readdir) can proceed without upcalls.  These must be
+     explicitly blocked.
+
+  5. Upon closing the namecache shall be flushed and disabled.
+
+  6. All memory held by cnodes can be freed without relying on upcalls.
+
+  7. Unmounting the file system can be done without relying on upcalls.
+
+  8. Mounting the Coda filesystem should fail gracefully if Venus cannot
+     get the rootfid or the attributes of the rootfid.  The latter is
+     best implemented by Venus fetching these objects before attempting
+     to mount.
+
+  .. Note::
+
+     NetBSD in particular but also Linux have not implemented the
+     above requirements fully.  For smooth operation this needs to be
+     corrected.
+
+
+
diff --git a/Documentation/filesystems/coda.txt b/Documentation/filesystems/coda.txt
deleted file mode 100644
index 1711ad48e38a..000000000000
--- a/Documentation/filesystems/coda.txt
+++ /dev/null
@@ -1,1676 +0,0 @@
-NOTE: 
-This is one of the technical documents describing a component of
-Coda -- this document describes the client kernel-Venus interface.
-
-For more information:
-  http://www.coda.cs.cmu.edu
-For user level software needed to run Coda:
-  ftp://ftp.coda.cs.cmu.edu
-
-To run Coda you need to get a user level cache manager for the client,
-named Venus, as well as tools to manipulate ACLs, to log in, etc.  The
-client needs to have the Coda filesystem selected in the kernel
-configuration.
-
-The server needs a user level server and at present does not depend on
-kernel support.
-
-
-
-
-
-
-
-  The Venus kernel interface
-  Peter J. Braam
-  v1.0, Nov 9, 1997
-
-  This document describes the communication between Venus and kernel
-  level filesystem code needed for the operation of the Coda file sys-
-  tem.  This document version is meant to describe the current interface
-  (version 1.0) as well as improvements we envisage.
-  ______________________________________________________________________
-
-  Table of Contents
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-  1. Introduction
-
-  2. Servicing Coda filesystem calls
-
-  3. The message layer
-
-     3.1 Implementation details
-
-  4. The interface at the call level
-
-     4.1 Data structures shared by the kernel and Venus
-     4.2 The pioctl interface
-     4.3 root
-     4.4 lookup
-     4.5 getattr
-     4.6 setattr
-     4.7 access
-     4.8 create
-     4.9 mkdir
-     4.10 link
-     4.11 symlink
-     4.12 remove
-     4.13 rmdir
-     4.14 readlink
-     4.15 open
-     4.16 close
-     4.17 ioctl
-     4.18 rename
-     4.19 readdir
-     4.20 vget
-     4.21 fsync
-     4.22 inactive
-     4.23 rdwr
-     4.24 odymount
-     4.25 ody_lookup
-     4.26 ody_expand
-     4.27 prefetch
-     4.28 signal
-
-  5. The minicache and downcalls
-
-     5.1 INVALIDATE
-     5.2 FLUSH
-     5.3 PURGEUSER
-     5.4 ZAPFILE
-     5.5 ZAPDIR
-     5.6 ZAPVNODE
-     5.7 PURGEFID
-     5.8 REPLACE
-
-  6. Initialization and cleanup
-
-     6.1 Requirements
-
-
-  ______________________________________________________________________
-  0wpage
-
-  11..  IInnttrroodduuccttiioonn
-
-
-
-  A key component in the Coda Distributed File System is the cache
-  manager, _V_e_n_u_s.
-
-
-  When processes on a Coda enabled system access files in the Coda
-  filesystem, requests are directed at the filesystem layer in the
-  operating system. The operating system will communicate with Venus to
-  service the request for the process.  Venus manages a persistent
-  client cache and makes remote procedure calls to Coda file servers and
-  related servers (such as authentication servers) to service these
-  requests it receives from the operating system.  When Venus has
-  serviced a request it replies to the operating system with appropriate
-  return codes, and other data related to the request.  Optionally the
-  kernel support for Coda may maintain a minicache of recently processed
-  requests to limit the number of interactions with Venus.  Venus
-  possesses the facility to inform the kernel when elements from its
-  minicache are no longer valid.
-
-  This document describes precisely this communication between the
-  kernel and Venus.  The definitions of so called upcalls and downcalls
-  will be given with the format of the data they handle. We shall also
-  describe the semantic invariants resulting from the calls.
-
-  Historically Coda was implemented in a BSD file system in Mach 2.6.
-  The interface between the kernel and Venus is very similar to the BSD
-  VFS interface.  Similar functionality is provided, and the format of
-  the parameters and returned data is very similar to the BSD VFS.  This
-  leads to an almost natural environment for implementing a kernel-level
-  filesystem driver for Coda in a BSD system.  However, other operating
-  systems such as Linux and Windows 95 and NT have virtual filesystem
-  with different interfaces.
-
-  To implement Coda on these systems some reverse engineering of the
-  Venus/Kernel protocol is necessary.  Also it came to light that other
-  systems could profit significantly from certain small optimizations
-  and modifications to the protocol. To facilitate this work as well as
-  to make future ports easier, communication between Venus and the
-  kernel should be documented in great detail.  This is the aim of this
-  document.
-
-  0wpage
-
-  22..  SSeerrvviicciinngg CCooddaa ffiilleessyysstteemm ccaallllss
-
-  The service of a request for a Coda file system service originates in
-  a process PP which accessing a Coda file. It makes a system call which
-  traps to the OS kernel. Examples of such calls trapping to the kernel
-  are _r_e_a_d_, _w_r_i_t_e_, _o_p_e_n_, _c_l_o_s_e_, _c_r_e_a_t_e_, _m_k_d_i_r_, _r_m_d_i_r_, _c_h_m_o_d in a Unix
-  context.  Similar calls exist in the Win32 environment, and are named
-  _C_r_e_a_t_e_F_i_l_e_, .
-
-  Generally the operating system handles the request in a virtual
-  filesystem (VFS) layer, which is named I/O Manager in NT and IFS
-  manager in Windows 95.  The VFS is responsible for partial processing
-  of the request and for locating the specific filesystem(s) which will
-  service parts of the request.  Usually the information in the path
-  assists in locating the correct FS drivers.  Sometimes after extensive
-  pre-processing, the VFS starts invoking exported routines in the FS
-  driver.  This is the point where the FS specific processing of the
-  request starts, and here the Coda specific kernel code comes into
-  play.
-
-  The FS layer for Coda must expose and implement several interfaces.
-  First and foremost the VFS must be able to make all necessary calls to
-  the Coda FS layer, so the Coda FS driver must expose the VFS interface
-  as applicable in the operating system. These differ very significantly
-  among operating systems, but share features such as facilities to
-  read/write and create and remove objects.  The Coda FS layer services
-  such VFS requests by invoking one or more well defined services
-  offered by the cache manager Venus.  When the replies from Venus have
-  come back to the FS driver, servicing of the VFS call continues and
-  finishes with a reply to the kernel's VFS. Finally the VFS layer
-  returns to the process.
-
-  As a result of this design a basic interface exposed by the FS driver
-  must allow Venus to manage message traffic.  In particular Venus must
-  be able to retrieve and place messages and to be notified of the
-  arrival of a new message. The notification must be through a mechanism
-  which does not block Venus since Venus must attend to other tasks even
-  when no messages are waiting or being processed.
-
-
-
-
-
-
-                     Interfaces of the Coda FS Driver
-
-  Furthermore the FS layer provides for a special path of communication
-  between a user process and Venus, called the pioctl interface. The
-  pioctl interface is used for Coda specific services, such as
-  requesting detailed information about the persistent cache managed by
-  Venus. Here the involvement of the kernel is minimal.  It identifies
-  the calling process and passes the information on to Venus.  When
-  Venus replies the response is passed back to the caller in unmodified
-  form.
-
-  Finally Venus allows the kernel FS driver to cache the results from
-  certain services.  This is done to avoid excessive context switches
-  and results in an efficient system.  However, Venus may acquire
-  information, for example from the network which implies that cached
-  information must be flushed or replaced. Venus then makes a downcall
-  to the Coda FS layer to request flushes or updates in the cache.  The
-  kernel FS driver handles such requests synchronously.
-
-  Among these interfaces the VFS interface and the facility to place,
-  receive and be notified of messages are platform specific.  We will
-  not go into the calls exported to the VFS layer but we will state the
-  requirements of the message exchange mechanism.
-
-  0wpage
-
-  33..  TThhee mmeessssaaggee llaayyeerr
-
-
-
-  At the lowest level the communication between Venus and the FS driver
-  proceeds through messages.  The synchronization between processes
-  requesting Coda file service and Venus relies on blocking and waking
-  up processes.  The Coda FS driver processes VFS- and pioctl-requests
-  on behalf of a process P, creates messages for Venus, awaits replies
-  and finally returns to the caller.  The implementation of the exchange
-  of messages is platform specific, but the semantics have (so far)
-  appeared to be generally applicable.  Data buffers are created by the
-  FS Driver in kernel memory on behalf of P and copied to user memory in
-  Venus.
-
-  The FS Driver while servicing P makes upcalls to Venus.  Such an
-  upcall is dispatched to Venus by creating a message structure.  The
-  structure contains the identification of P, the message sequence
-  number, the size of the request and a pointer to the data in kernel
-  memory for the request.  Since the data buffer is re-used to hold the
-  reply from Venus, there is a field for the size of the reply.  A flags
-  field is used in the message to precisely record the status of the
-  message.  Additional platform dependent structures involve pointers to
-  determine the position of the message on queues and pointers to
-  synchronization objects.  In the upcall routine the message structure
-  is filled in, flags are set to 0, and it is placed on the _p_e_n_d_i_n_g
-  queue.  The routine calling upcall is responsible for allocating the
-  data buffer; its structure will be described in the next section.
-
-  A facility must exist to notify Venus that the message has been
-  created, and implemented using available synchronization objects in
-  the OS. This notification is done in the upcall context of the process
-  P. When the message is on the pending queue, process P cannot proceed
-  in upcall.  The (kernel mode) processing of P in the filesystem
-  request routine must be suspended until Venus has replied.  Therefore
-  the calling thread in P is blocked in upcall.  A pointer in the
-  message structure will locate the synchronization object on which P is
-  sleeping.
-
-  Venus detects the notification that a message has arrived, and the FS
-  driver allow Venus to retrieve the message with a getmsg_from_kernel
-  call. This action finishes in the kernel by putting the message on the
-  queue of processing messages and setting flags to READ.  Venus is
-  passed the contents of the data buffer. The getmsg_from_kernel call
-  now returns and Venus processes the request.
-
-  At some later point the FS driver receives a message from Venus,
-  namely when Venus calls sendmsg_to_kernel.  At this moment the Coda FS
-  driver looks at the contents of the message and decides if:
-
-
-  +o  the message is a reply for a suspended thread P.  If so it removes
-     the message from the processing queue and marks the message as
-     WRITTEN.  Finally, the FS driver unblocks P (still in the kernel
-     mode context of Venus) and the sendmsg_to_kernel call returns to
-     Venus.  The process P will be scheduled at some point and continues
-     processing its upcall with the data buffer replaced with the reply
-     from Venus.
-
-  +o  The message is a _d_o_w_n_c_a_l_l.  A downcall is a request from Venus to
-     the FS Driver. The FS driver processes the request immediately
-     (usually a cache eviction or replacement) and when it finishes
-     sendmsg_to_kernel returns.
-
-  Now P awakes and continues processing upcall.  There are some
-  subtleties to take account of. First P will determine if it was woken
-  up in upcall by a signal from some other source (for example an
-  attempt to terminate P) or as is normally the case by Venus in its
-  sendmsg_to_kernel call.  In the normal case, the upcall routine will
-  deallocate the message structure and return.  The FS routine can proceed
-  with its processing.
-
-
-
-
-
-
-
-                      Sleeping and IPC arrangements
-
-  In case P is woken up by a signal and not by Venus, it will first look
-  at the flags field.  If the message is not yet READ, the process P can
-  handle its signal without notifying Venus.  If Venus has READ, and
-  the request should not be processed, P can send Venus a signal message
-  to indicate that it should disregard the previous message.  Such
-  signals are put in the queue at the head, and read first by Venus.  If
-  the message is already marked as WRITTEN it is too late to stop the
-  processing.  The VFS routine will now continue.  (-- If a VFS request
-  involves more than one upcall, this can lead to complicated state, an
-  extra field "handle_signals" could be added in the message structure
-  to indicate points of no return have been passed.--)
-
-
-
-  33..11..  IImmpplleemmeennttaattiioonn ddeettaaiillss
-
-  The Unix implementation of this mechanism has been through the
-  implementation of a character device associated with Coda.  Venus
-  retrieves messages by doing a read on the device, replies are sent
-  with a write and notification is through the select system call on the
-  file descriptor for the device.  The process P is kept waiting on an
-  interruptible wait queue object.
-
-  In Windows NT and the DPMI Windows 95 implementation a DeviceIoControl
-  call is used.  The DeviceIoControl call is designed to copy buffers
-  from user memory to kernel memory with OPCODES. The sendmsg_to_kernel
-  is issued as a synchronous call, while the getmsg_from_kernel call is
-  asynchronous.  Windows EventObjects are used for notification of
-  message arrival.  The process P is kept waiting on a KernelEvent
-  object in NT and a semaphore in Windows 95.
-
-  0wpage
-
-  44..  TThhee iinntteerrffaaccee aatt tthhee ccaallll lleevveell
-
-
-  This section describes the upcalls a Coda FS driver can make to Venus.
-  Each of these upcalls make use of two structures: inputArgs and
-  outputArgs.   In pseudo BNF form the structures take the following
-  form:
-
-
-  struct inputArgs {
-      u_long opcode;
-      u_long unique;     /* Keep multiple outstanding msgs distinct */
-      u_short pid;                 /* Common to all */
-      u_short pgid;                /* Common to all */
-      struct CodaCred cred;        /* Common to all */
-
-      <union "in" of call dependent parts of inputArgs>
-  };
-
-  struct outputArgs {
-      u_long opcode;
-      u_long unique;       /* Keep multiple outstanding msgs distinct */
-      u_long result;
-
-      <union "out" of call dependent parts of inputArgs>
-  };
-
-
-
-  Before going on let us elucidate the role of the various fields. The
-  inputArgs start with the opcode which defines the type of service
-  requested from Venus. There are approximately 30 upcalls at present
-  which we will discuss.   The unique field labels the inputArg with a
-  unique number which will identify the message uniquely.  A process and
-  process group id are passed.  Finally the credentials of the caller
-  are included.
-
-  Before delving into the specific calls we need to discuss a variety of
-  data structures shared by the kernel and Venus.
-
-
-
-
-  44..11..  DDaattaa ssttrruuccttuurreess sshhaarreedd bbyy tthhee kkeerrnneell aanndd VVeennuuss
-
-
-  The CodaCred structure defines a variety of user and group ids as
-  they are set for the calling process. The vuid_t and vgid_t are 32 bit
-  unsigned integers.  It also defines group membership in an array.  On
-  Unix the CodaCred has proven sufficient to implement good security
-  semantics for Coda but the structure may have to undergo modification
-  for the Windows environment when these mature.
-
-  struct CodaCred {
-      vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, effective, set, fs uid */
-      vgid_t cr_gid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
-      vgid_t cr_groups[NGROUPS];        /* Group membership for caller */
-  };
-
-
-
-  NNOOTTEE It is questionable if we need CodaCreds in Venus. Finally Venus
-  doesn't know about groups, although it does create files with the
-  default uid/gid.  Perhaps the list of group membership is superfluous.
-
-
-  The next item is the fundamental identifier used to identify Coda
-  files, the ViceFid.  A fid of a file uniquely defines a file or
-  directory in the Coda filesystem within a _c_e_l_l.   (-- A _c_e_l_l is a
-  group of Coda servers acting under the aegis of a single system
-  control machine or SCM. See the Coda Administration manual for a
-  detailed description of the role of the SCM.--)
-
-
-  typedef struct ViceFid {
-      VolumeId Volume;
-      VnodeId Vnode;
-      Unique_t Unique;
-  } ViceFid;
-
-
-
-  Each of the constituent fields: VolumeId, VnodeId and Unique_t are
-  unsigned 32 bit integers.  We envisage that a further field will need
-  to be prefixed to identify the Coda cell; this will probably take the
-  form of a Ipv6 size IP address naming the Coda cell through DNS.
-
-  The next important structure shared between Venus and the kernel is
-  the attributes of the file.  The following structure is used to
-  exchange information.  It has room for future extensions such as
-  support for device files (currently not present in Coda).
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-  struct coda_timespec {
-          int64_t         tv_sec;         /* seconds */
-          long            tv_nsec;        /* nanoseconds */
-  };
-
-  struct coda_vattr {
-          enum coda_vtype va_type;        /* vnode type (for create) */
-          u_short         va_mode;        /* files access mode and type */
-          short           va_nlink;       /* number of references to file */
-          vuid_t          va_uid;         /* owner user id */
-          vgid_t          va_gid;         /* owner group id */
-          long            va_fsid;        /* file system id (dev for now) */
-          long            va_fileid;      /* file id */
-          u_quad_t        va_size;        /* file size in bytes */
-          long            va_blocksize;   /* blocksize preferred for i/o */
-          struct coda_timespec va_atime;  /* time of last access */
-          struct coda_timespec va_mtime;  /* time of last modification */
-          struct coda_timespec va_ctime;  /* time file changed */
-          u_long          va_gen;         /* generation number of file */
-          u_long          va_flags;       /* flags defined for file */
-          dev_t           va_rdev;        /* device special file represents */
-          u_quad_t        va_bytes;       /* bytes of disk space held by file */
-          u_quad_t        va_filerev;     /* file modification number */
-          u_int           va_vaflags;     /* operations flags, see below */
-          long            va_spare;       /* remain quad aligned */
-  };
-
-
-
-
-  44..22..  TThhee ppiiooccttll iinntteerrffaaccee
-
-
-  Coda specific requests can be made by application through the pioctl
-  interface. The pioctl is implemented as an ordinary ioctl on a
-  fictitious file /coda/.CONTROL.  The pioctl call opens this file, gets
-  a file handle and makes the ioctl call. Finally it closes the file.
-
-  The kernel involvement in this is limited to providing the facility to
-  open and close and pass the ioctl message _a_n_d to verify that a path in
-  the pioctl data buffers is a file in a Coda filesystem.
-
-  The kernel is handed a data packet of the form:
-
-      struct {
-          const char *path;
-          struct ViceIoctl vidata;
-          int follow;
-      } data;
-
-
-
-  where
-
-
-  struct ViceIoctl {
-          caddr_t in, out;        /* Data to be transferred in, or out */
-          short in_size;          /* Size of input buffer <= 2K */
-          short out_size;         /* Maximum size of output buffer, <= 2K */
-  };
-
-
-
-  The path must be a Coda file, otherwise the ioctl upcall will not be
-  made.
-
-  NNOOTTEE  The data structures and code are a mess.  We need to clean this
-  up.
-
-  We now proceed to document the individual calls:
-
-  0wpage
-
-  44..33..  rroooott
-
-
-  AArrgguummeennttss
-
-     iinn empty
-
-     oouutt
-
-                struct cfs_root_out {
-                    ViceFid VFid;
-                } cfs_root;
-
-
-
-  DDeessccrriippttiioonn This call is made to Venus during the initialization of
-  the Coda filesystem. If the result is zero, the cfs_root structure
-  contains the ViceFid of the root of the Coda filesystem. If a non-zero
-  result is generated, its value is a platform dependent error code
-  indicating the difficulty Venus encountered in locating the root of
-  the Coda filesystem.
-
-  0wpage
-
-  44..44..  llooookkuupp
-
-
-  SSuummmmaarryy Find the ViceFid and type of an object in a directory if it
-  exists.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct  cfs_lookup_in {
-                    ViceFid     VFid;
-                    char        *name;          /* Place holder for data. */
-                } cfs_lookup;
-
-
-
-     oouutt
-
-                struct cfs_lookup_out {
-                    ViceFid VFid;
-                    int vtype;
-                } cfs_lookup;
-
-
-
-  DDeessccrriippttiioonn This call is made to determine the ViceFid and filetype of
-  a directory entry.  The directory entry requested carries name name
-  and Venus will search the directory identified by cfs_lookup_in.VFid.
-  The result may indicate that the name does not exist, or that
-  difficulty was encountered in finding it (e.g. due to disconnection).
-  If the result is zero, the field cfs_lookup_out.VFid contains the
-  targets ViceFid and cfs_lookup_out.vtype the coda_vtype giving the
-  type of object the name designates.
-
-  The name of the object is an 8 bit character string of maximum length
-  CFS_MAXNAMLEN, currently set to 256 (including a 0 terminator.)
-
-  It is extremely important to realize that Venus bitwise ors the field
-  cfs_lookup.vtype with CFS_NOCACHE to indicate that the object should
-  not be put in the kernel name cache.
-
-  NNOOTTEE The type of the vtype is currently wrong.  It should be
-  coda_vtype. Linux does not take note of CFS_NOCACHE.  It should.
-
-  0wpage
-
-  44..55..  ggeettaattttrr
-
-
-  SSuummmmaarryy Get the attributes of a file.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_getattr_in {
-                    ViceFid VFid;
-                    struct coda_vattr attr; /* XXXXX */
-                } cfs_getattr;
-
-
-
-     oouutt
-
-                struct cfs_getattr_out {
-                    struct coda_vattr attr;
-                } cfs_getattr;
-
-
-
-  DDeessccrriippttiioonn This call returns the attributes of the file identified by
-  fid.
-
-  EErrrroorrss Errors can occur if the object with fid does not exist, is
-  unaccessible or if the caller does not have permission to fetch
-  attributes.
-
-  NNoottee Many kernel FS drivers (Linux, NT and Windows 95) need to acquire
-  the attributes as well as the Fid for the instantiation of an internal
-  "inode" or "FileHandle".  A significant improvement in performance on
-  such systems could be made by combining the _l_o_o_k_u_p and _g_e_t_a_t_t_r calls
-  both at the Venus/kernel interaction level and at the RPC level.
-
-  The vattr structure included in the input arguments is superfluous and
-  should be removed.
-
-  0wpage
-
-  44..66..  sseettaattttrr
-
-
-  SSuummmmaarryy Set the attributes of a file.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_setattr_in {
-                    ViceFid VFid;
-                    struct coda_vattr attr;
-                } cfs_setattr;
-
-
-
-
-     oouutt
-        empty
-
-  DDeessccrriippttiioonn The structure attr is filled with attributes to be changed
-  in BSD style.  Attributes not to be changed are set to -1, apart from
-  vtype which is set to VNON. Other are set to the value to be assigned.
-  The only attributes which the FS driver may request to change are the
-  mode, owner, groupid, atime, mtime and ctime.  The return value
-  indicates success or failure.
-
-  EErrrroorrss A variety of errors can occur.  The object may not exist, may
-  be inaccessible, or permission may not be granted by Venus.
-
-  0wpage
-
-  44..77..  aacccceessss
-
-
-  SSuummmmaarryy
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_access_in {
-                    ViceFid     VFid;
-                    int flags;
-                } cfs_access;
-
-
-
-     oouutt
-        empty
-
-  DDeessccrriippttiioonn Verify if access to the object identified by VFid for
-  operations described by flags is permitted.  The result indicates if
-  access will be granted.  It is important to remember that Coda uses
-  ACLs to enforce protection and that ultimately the servers, not the
-  clients enforce the security of the system.  The result of this call
-  will depend on whether a _t_o_k_e_n is held by the user.
-
-  EErrrroorrss The object may not exist, or the ACL describing the protection
-  may not be accessible.
-
-  0wpage
-
-  44..88..  ccrreeaattee
-
-
-  SSuummmmaarryy Invoked to create a file
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_create_in {
-                    ViceFid VFid;
-                    struct coda_vattr attr;
-                    int excl;
-                    int mode;
-                    char        *name;          /* Place holder for data. */
-                } cfs_create;
-
-
-
-
-     oouutt
-
-                struct cfs_create_out {
-                    ViceFid VFid;
-                    struct coda_vattr attr;
-                } cfs_create;
-
-
-
-  DDeessccrriippttiioonn  This upcall is invoked to request creation of a file.
-  The file will be created in the directory identified by VFid, its name
-  will be name, and the mode will be mode.  If excl is set an error will
-  be returned if the file already exists.  If the size field in attr is
-  set to zero the file will be truncated.  The uid and gid of the file
-  are set by converting the CodaCred to a uid using a macro CRTOUID
-  (this macro is platform dependent).  Upon success the VFid and
-  attributes of the file are returned.  The Coda FS Driver will normally
-  instantiate a vnode, inode or file handle at kernel level for the new
-  object.
-
-
-  EErrrroorrss A variety of errors can occur. Permissions may be insufficient.
-  If the object exists and is not a file the error EISDIR is returned
-  under Unix.
-
-  NNOOTTEE The packing of parameters is very inefficient and appears to
-  indicate confusion between the system call creat and the VFS operation
-  create. The VFS operation create is only called to create new objects.
-  This create call differs from the Unix one in that it is not invoked
-  to return a file descriptor. The truncate and exclusive options,
-  together with the mode, could simply be part of the mode as it is
-  under Unix.  There should be no flags argument; this is used in open
-  (2) to return a file descriptor for READ or WRITE mode.
-
-  The attributes of the directory should be returned too, since the size
-  and mtime changed.
-
-  0wpage
-
-  44..99..  mmkkddiirr
-
-
-  SSuummmmaarryy Create a new directory.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_mkdir_in {
-                    ViceFid     VFid;
-                    struct coda_vattr attr;
-                    char        *name;          /* Place holder for data. */
-                } cfs_mkdir;
-
-
-
-     oouutt
-
-                struct cfs_mkdir_out {
-                    ViceFid VFid;
-                    struct coda_vattr attr;
-                } cfs_mkdir;
-
-
-
-
-  DDeessccrriippttiioonn This call is similar to create but creates a directory.
-  Only the mode field in the input parameters is used for creation.
-  Upon successful creation, the attr returned contains the attributes of
-  the new directory.
-
-  EErrrroorrss As for create.
-
-  NNOOTTEE The input parameter should be changed to mode instead of
-  attributes.
-
-  The attributes of the parent should be returned since the size and
-  mtime changes.
-
-  0wpage
-
-  44..1100..  lliinnkk
-
-
-  SSuummmmaarryy Create a link to an existing file.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_link_in {
-                    ViceFid sourceFid;          /* cnode to link *to* */
-                    ViceFid destFid;            /* Directory in which to place link */
-                    char        *tname;         /* Place holder for data. */
-                } cfs_link;
-
-
-
-     oouutt
-        empty
-
-  DDeessccrriippttiioonn This call creates a link to the sourceFid in the directory
-  identified by destFid with name tname.  The source must reside in the
-  target's parent, i.e. the source must be have parent destFid, i.e. Coda
-  does not support cross directory hard links.  Only the return value is
-  relevant.  It indicates success or the type of failure.
-
-  EErrrroorrss The usual errors can occur.0wpage
-
-  44..1111..  ssyymmlliinnkk
-
-
-  SSuummmmaarryy create a symbolic link
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_symlink_in {
-                    ViceFid     VFid;          /* Directory to put symlink in */
-                    char        *srcname;
-                    struct coda_vattr attr;
-                    char        *tname;
-                } cfs_symlink;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn Create a symbolic link. The link is to be placed in the
-  directory identified by VFid and named tname.  It should point to the
-  pathname srcname.  The attributes of the newly created object are to
-  be set to attr.
-
-  EErrrroorrss
-
-  NNOOTTEE The attributes of the target directory should be returned since
-  its size changed.
-
-  0wpage
-
-  44..1122..  rreemmoovvee
-
-
-  SSuummmmaarryy Remove a file
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_remove_in {
-                    ViceFid     VFid;
-                    char        *name;          /* Place holder for data. */
-                } cfs_remove;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn  Remove file named cfs_remove_in.name in directory
-  identified by   VFid.
-
-  EErrrroorrss
-
-  NNOOTTEE The attributes of the directory should be returned since its
-  mtime and size may change.
-
-  0wpage
-
-  44..1133..  rrmmddiirr
-
-
-  SSuummmmaarryy Remove a directory
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_rmdir_in {
-                    ViceFid     VFid;
-                    char        *name;          /* Place holder for data. */
-                } cfs_rmdir;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn Remove the directory with name name from the directory
-  identified by VFid.
-
-  EErrrroorrss
-
-  NNOOTTEE The attributes of the parent directory should be returned since
-  its mtime and size may change.
-
-  0wpage
-
-  44..1144..  rreeaaddlliinnkk
-
-
-  SSuummmmaarryy Read the value of a symbolic link.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_readlink_in {
-                    ViceFid VFid;
-                } cfs_readlink;
-
-
-
-     oouutt
-
-                struct cfs_readlink_out {
-                    int count;
-                    caddr_t     data;           /* Place holder for data. */
-                } cfs_readlink;
-
-
-
-  DDeessccrriippttiioonn This routine reads the contents of symbolic link
-  identified by VFid into the buffer data.  The buffer data must be able
-  to hold any name up to CFS_MAXNAMLEN (PATH or NAM??).
-
-  EErrrroorrss No unusual errors.
-
-  0wpage
-
-  44..1155..  ooppeenn
-
-
-  SSuummmmaarryy Open a file.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_open_in {
-                    ViceFid     VFid;
-                    int flags;
-                } cfs_open;
-
-
-
-     oouutt
-
-                struct cfs_open_out {
-                    dev_t       dev;
-                    ino_t       inode;
-                } cfs_open;
-
-
-
-  DDeessccrriippttiioonn  This request asks Venus to place the file identified by
-  VFid in its cache and to note that the calling process wishes to open
-  it with flags as in open(2).  The return value to the kernel differs
-  for Unix and Windows systems.  For Unix systems the Coda FS Driver is
-  informed of the device and inode number of the container file in the
-  fields dev and inode.  For Windows the path of the container file is
-  returned to the kernel.
-  EErrrroorrss
-
-  NNOOTTEE Currently the cfs_open_out structure is not properly adapted to
-  deal with the Windows case.  It might be best to implement two
-  upcalls, one to open aiming at a container file name, the other at a
-  container file inode.
-
-  0wpage
-
-  44..1166..  cclloossee
-
-
-  SSuummmmaarryy Close a file, update it on the servers.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_close_in {
-                    ViceFid     VFid;
-                    int flags;
-                } cfs_close;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn Close the file identified by VFid.
-
-  EErrrroorrss
-
-  NNOOTTEE The flags argument is bogus and not used.  However, Venus' code
-  has room to deal with an execp input field, probably this field should
-  be used to inform Venus that the file was closed but is still memory
-  mapped for execution.  There are comments about fetching versus not
-  fetching the data in Venus vproc_vfscalls.  This seems silly.  If a
-  file is being closed, the data in the container file is to be the new
-  data.  Here again the execp flag might be in play to create confusion:
-  currently Venus might think a file can be flushed from the cache when
-  it is still memory mapped.  This needs to be understood.
-
-  0wpage
-
-  44..1177..  iiooccttll
-
-
-  SSuummmmaarryy Do an ioctl on a file. This includes the pioctl interface.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_ioctl_in {
-                    ViceFid VFid;
-                    int cmd;
-                    int len;
-                    int rwflag;
-                    char *data;                 /* Place holder for data. */
-                } cfs_ioctl;
-
-
-
-     oouutt
-
-
-                struct cfs_ioctl_out {
-                    int len;
-                    caddr_t     data;           /* Place holder for data. */
-                } cfs_ioctl;
-
-
-
-  DDeessccrriippttiioonn Do an ioctl operation on a file.  The command, len and
-  data arguments are filled as usual.  flags is not used by Venus.
-
-  EErrrroorrss
-
-  NNOOTTEE Another bogus parameter.  flags is not used.  What is the
-  business about PREFETCHING in the Venus code?
-
-
-  0wpage
-
-  44..1188..  rreennaammee
-
-
-  SSuummmmaarryy Rename a fid.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_rename_in {
-                    ViceFid     sourceFid;
-                    char        *srcname;
-                    ViceFid destFid;
-                    char        *destname;
-                } cfs_rename;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn  Rename the object with name srcname in directory
-  sourceFid to destname in destFid.   It is important that the names
-  srcname and destname are 0 terminated strings.  Strings in Unix
-  kernels are not always null terminated.
-
-  EErrrroorrss
-
-  0wpage
-
-  44..1199..  rreeaaddddiirr
-
-
-  SSuummmmaarryy Read directory entries.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_readdir_in {
-                    ViceFid     VFid;
-                    int count;
-                    int offset;
-                } cfs_readdir;
-
-
-
-
-     oouutt
-
-                struct cfs_readdir_out {
-                    int size;
-                    caddr_t     data;           /* Place holder for data. */
-                } cfs_readdir;
-
-
-
-  DDeessccrriippttiioonn Read directory entries from VFid starting at offset and
-  read at most count bytes.  Returns the data in data and returns
-  the size in size.
-
-  EErrrroorrss
-
-  NNOOTTEE This call is not used.  Readdir operations exploit container
-  files.  We will re-evaluate this during the directory revamp which is
-  about to take place.
-
-  0wpage
-
-  44..2200..  vvggeett
-
-
-  SSuummmmaarryy instructs Venus to do an FSDB->Get.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_vget_in {
-                    ViceFid VFid;
-                } cfs_vget;
-
-
-
-     oouutt
-
-                struct cfs_vget_out {
-                    ViceFid VFid;
-                    int vtype;
-                } cfs_vget;
-
-
-
-  DDeessccrriippttiioonn This upcall asks Venus to do a get operation on an fsobj
-  labelled by VFid.
-
-  EErrrroorrss
-
-  NNOOTTEE This operation is not used.  However, it is extremely useful
-  since it can be used to deal with read/write memory mapped files.
-  These can be "pinned" in the Venus cache using vget and released with
-  inactive.
-
-  0wpage
-
-  44..2211..  ffssyynncc
-
-
-  SSuummmmaarryy Tell Venus to update the RVM attributes of a file.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_fsync_in {
-                    ViceFid VFid;
-                } cfs_fsync;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn Ask Venus to update RVM attributes of object VFid. This
-  should be called as part of kernel level fsync type calls.  The
-  result indicates if the syncing was successful.
-
-  EErrrroorrss
-
-  NNOOTTEE Linux does not implement this call. It should.
-
-  0wpage
-
-  44..2222..  iinnaaccttiivvee
-
-
-  SSuummmmaarryy Tell Venus a vnode is no longer in use.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_inactive_in {
-                    ViceFid VFid;
-                } cfs_inactive;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn This operation returns EOPNOTSUPP.
-
-  EErrrroorrss
-
-  NNOOTTEE This should perhaps be removed.
-
-  0wpage
-
-  44..2233..  rrddwwrr
-
-
-  SSuummmmaarryy Read or write from a file
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_rdwr_in {
-                    ViceFid     VFid;
-                    int rwflag;
-                    int count;
-                    int offset;
-                    int ioflag;
-                    caddr_t     data;           /* Place holder for data. */
-                } cfs_rdwr;
-
-
-
-
-     oouutt
-
-                struct cfs_rdwr_out {
-                    int rwflag;
-                    int count;
-                    caddr_t     data;   /* Place holder for data. */
-                } cfs_rdwr;
-
-
-
-  DDeessccrriippttiioonn This upcall asks Venus to read or write from a file.
-
-  EErrrroorrss
-
-  NNOOTTEE It should be removed since it is against the Coda philosophy that
-  read/write operations never reach Venus.  I have been told the
-  operation does not work.  It is not currently used.
-
-
-  0wpage
-
-  44..2244..  ooddyymmoouunntt
-
-
-  SSuummmmaarryy Allows mounting multiple Coda "filesystems" on one Unix mount
-  point.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct ody_mount_in {
-                    char        *name;          /* Place holder for data. */
-                } ody_mount;
-
-
-
-     oouutt
-
-                struct ody_mount_out {
-                    ViceFid VFid;
-                } ody_mount;
-
-
-
-  DDeessccrriippttiioonn  Asks Venus to return the rootfid of a Coda system named
-  name.  The fid is returned in VFid.
-
-  EErrrroorrss
-
-  NNOOTTEE This call was used by David for dynamic sets.  It should be
-  removed since it causes a jungle of pointers in the VFS mounting area.
-  It is not used by Coda proper.  Call is not implemented by Venus.
-
-  0wpage
-
-  44..2255..  ooddyy__llooookkuupp
-
-
-  SSuummmmaarryy Looks up something.
-
-  AArrgguummeennttss
-
-     iinn irrelevant
-
-
-     oouutt
-        irrelevant
-
-  DDeessccrriippttiioonn
-
-  EErrrroorrss
-
-  NNOOTTEE Gut it. Call is not implemented by Venus.
-
-  0wpage
-
-  44..2266..  ooddyy__eexxppaanndd
-
-
-  SSuummmmaarryy expands something in a dynamic set.
-
-  AArrgguummeennttss
-
-     iinn irrelevant
-
-     oouutt
-        irrelevant
-
-  DDeessccrriippttiioonn
-
-  EErrrroorrss
-
-  NNOOTTEE Gut it.  Call is not implemented by Venus.
-
-  0wpage
-
-  44..2277..  pprreeffeettcchh
-
-
-  SSuummmmaarryy Prefetch a dynamic set.
-
-  AArrgguummeennttss
-
-     iinn Not documented.
-
-     oouutt
-        Not documented.
-
-  DDeessccrriippttiioonn  Venus worker.cc has support for this call, although it is
-  noted that it doesn't work.  Not surprising, since the kernel does not
-  have support for it. (ODY_PREFETCH is not a defined operation).
-
-  EErrrroorrss
-
-  NNOOTTEE Gut it. It isn't working and isn't used by Coda.
-
-
-  0wpage
-
-  44..2288..  ssiiggnnaall
-
-
-  SSuummmmaarryy Send Venus a signal about an upcall.
-
-  AArrgguummeennttss
-
-     iinn none
-
-     oouutt
-        not applicable.
-
-  DDeessccrriippttiioonn  This is an out-of-band upcall to Venus to inform Venus
-  that the calling process received a signal after Venus read the
-  message from the input queue.  Venus is supposed to clean up the
-  operation.
-
-  EErrrroorrss No reply is given.
-
-  NNOOTTEE We need to better understand what Venus needs to clean up and if
-  it is doing this correctly.  Also we need to handle multiple upcall
-  per system call situations correctly.  It would be important to know
-  what state changes in Venus take place after an upcall for which the
-  kernel is responsible for notifying Venus to clean up (e.g. open
-  definitely is such a state change, but many others are maybe not).
-
-  0wpage
-
-  55..  TThhee mmiinniiccaacchhee aanndd ddoowwnnccaallllss
-
-
-  The Coda FS Driver can cache results of lookup and access upcalls, to
-  limit the frequency of upcalls.  Upcalls carry a price since a process
-  context switch needs to take place.  The counterpart of caching the
-  information is that Venus will notify the FS Driver that cached
-  entries must be flushed or renamed.
-
-  The kernel code generally has to maintain a structure which links the
-  internal file handles (called vnodes in BSD, inodes in Linux and
-  FileHandles in Windows) with the ViceFid's which Venus maintains.  The
-  reason is that frequent translations back and forth are needed in
-  order to make upcalls and use the results of upcalls.  Such linking
-  objects are called ccnnooddeess.
-
-  The current minicache implementations have cache entries which record
-  the following:
-
-  1. the name of the file
-
-  2. the cnode of the directory containing the object
-
-  3. a list of CodaCred's for which the lookup is permitted.
-
-  4. the cnode of the object
-
-  The lookup call in the Coda FS Driver may request the cnode of the
-  desired object from the cache, by passing its name, directory and the
-  CodaCred's of the caller.  The cache will return the cnode or indicate
-  that it cannot be found.  The Coda FS Driver must be careful to
-  invalidate cache entries when it modifies or removes objects.
-
-  When Venus obtains information that indicates that cache entries are
-  no longer valid, it will make a downcall to the kernel.  Downcalls are
-  intercepted by the Coda FS Driver and lead to cache invalidations of
-  the kind described below.  The Coda FS Driver does not return an error
-  unless the downcall data could not be read into kernel memory.
-
-
-  55..11..  IINNVVAALLIIDDAATTEE
-
-
-  No information is available on this call.
-
-
-  55..22..  FFLLUUSSHH
-
-
-
-  AArrgguummeennttss None
-
-  SSuummmmaarryy Flush the name cache entirely.
-
-  DDeessccrriippttiioonn Venus issues this call upon startup and when it dies. This
-  is to prevent stale cache information being held.  Some operating
-  systems allow the kernel name cache to be switched off dynamically.
-  When this is done, this downcall is made.
-
-
-  55..33..  PPUURRGGEEUUSSEERR
-
-
-  AArrgguummeennttss
-
-          struct cfs_purgeuser_out {/* CFS_PURGEUSER is a venus->kernel call */
-              struct CodaCred cred;
-          } cfs_purgeuser;
-
-
-
-  DDeessccrriippttiioonn Remove all entries in the cache carrying the Cred.  This
-  call is issued when tokens for a user expire or are flushed.
-
-
-  55..44..  ZZAAPPFFIILLEE
-
-
-  AArrgguummeennttss
-
-          struct cfs_zapfile_out {  /* CFS_ZAPFILE is a venus->kernel call */
-              ViceFid CodaFid;
-          } cfs_zapfile;
-
-
-
-  DDeessccrriippttiioonn Remove all entries which have the (dir vnode, name) pair.
-  This is issued as a result of an invalidation of cached attributes of
-  a vnode.
-
-  NNOOTTEE Call is not named correctly in NetBSD and Mach.  The minicache
-  zapfile routine takes different arguments. Linux does not implement
-  the invalidation of attributes correctly.
-
-
-
-  55..55..  ZZAAPPDDIIRR
-
-
-  AArrgguummeennttss
-
-          struct cfs_zapdir_out {   /* CFS_ZAPDIR is a venus->kernel call */
-              ViceFid CodaFid;
-          } cfs_zapdir;
-
-
-
-  DDeessccrriippttiioonn Remove all entries in the cache lying in a directory
-  CodaFid, and all children of this directory. This call is issued when
-  Venus receives a callback on the directory.
-
-
-  55..66..  ZZAAPPVVNNOODDEE
-
-
-
-  AArrgguummeennttss
-
-          struct cfs_zapvnode_out { /* CFS_ZAPVNODE is a venus->kernel call */
-              struct CodaCred cred;
-              ViceFid VFid;
-          } cfs_zapvnode;
-
-
-
-  DDeessccrriippttiioonn Remove all entries in the cache carrying the cred and VFid
-  as in the arguments. This downcall is probably never issued.
-
-
-  55..77..  PPUURRGGEEFFIIDD
-
-
-  SSuummmmaarryy
-
-  AArrgguummeennttss
-
-          struct cfs_purgefid_out { /* CFS_PURGEFID is a venus->kernel call */
-              ViceFid CodaFid;
-          } cfs_purgefid;
-
-
-
-  DDeessccrriippttiioonn Flush the attribute for the file. If it is a dir (odd
-  vnode), purge its children from the namecache and remove the file from the
-  namecache.
-
-
-
-  55..88..  RREEPPLLAACCEE
-
-
-  SSuummmmaarryy Replace the Fid's for a collection of names.
-
-  AArrgguummeennttss
-
-          struct cfs_replace_out { /* cfs_replace is a venus->kernel call */
-              ViceFid NewFid;
-              ViceFid OldFid;
-          } cfs_replace;
-
-
-
-  DDeessccrriippttiioonn This routine replaces a ViceFid in the name cache with
-  another.  It is added to allow Venus during reintegration to replace
-  locally allocated temp fids while disconnected with global fids even
-  when the reference counts on those fids are not zero.
-
-  0wpage
-
-  66..  IInniittiiaalliizzaattiioonn aanndd cclleeaannuupp
-
-
-  This section gives brief hints as to desirable features for the Coda
-  FS Driver at startup and upon shutdown or Venus failures.  Before
-  entering the discussion it is useful to repeat that the Coda FS Driver
-  maintains the following data:
-
-
-  1. message queues
-
-  2. cnodes
-
-  3. name cache entries
-
-     The name cache entries are entirely private to the driver, so they
-     can easily be manipulated.   The message queues will generally have
-     clear points of initialization and destruction.  The cnodes are
-     much more delicate.  User processes hold reference counts in Coda
-     filesystems and it can be difficult to clean up the cnodes.
-
-  It can expect requests through:
-
-  1. the message subsystem
-
-  2. the VFS layer
-
-  3. pioctl interface
-
-     Currently the _p_i_o_c_t_l passes through the VFS for Coda so we can
-     treat these similarly.
-
-
-  66..11..  RReeqquuiirreemmeennttss
-
-
-  The following requirements should be accommodated:
-
-  1. The message queues should have open and close routines.  On Unix
-     the opening of the character devices are such routines.
-
-  +o  Before opening, no messages can be placed.
-
-  +o  Opening will remove any old messages still pending.
-
-  +o  Close will notify any sleeping processes that their upcall cannot
-     be completed.
-
-  +o  Close will free all memory allocated by the message queues.
-
-
-  2. At open the namecache shall be initialized to empty state.
-
-  3. Before the message queues are open, all VFS operations will fail.
-     Fortunately this can be achieved by making sure than mounting the
-     Coda filesystem cannot succeed before opening.
-
-  4. After closing of the queues, no VFS operations can succeed.  Here
-     one needs to be careful, since a few operations (lookup,
-     read/write, readdir) can proceed without upcalls.  These must be
-     explicitly blocked.
-
-  5. Upon closing the namecache shall be flushed and disabled.
-
-  6. All memory held by cnodes can be freed without relying on upcalls.
-
-  7. Unmounting the file system can be done without relying on upcalls.
-
-  8. Mounting the Coda filesystem should fail gracefully if Venus cannot
-     get the rootfid or the attributes of the rootfid.  The latter is
-     best implemented by Venus fetching these objects before attempting
-     to mount.
-
-  NNOOTTEE  NetBSD in particular but also Linux have not implemented the
-  above requirements fully.  For smooth operation this needs to be
-  corrected.
-
-
-
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs.rst
index 16e606c11f40..f8941954c667 100644
--- a/Documentation/filesystems/configfs/configfs.txt
+++ b/Documentation/filesystems/configfs.rst
@@ -1,5 +1,6 @@
-
-configfs - Userspace-driven kernel object configuration.
+=======================================================
+Configfs - Userspace-driven Kernel Object Configuration
+=======================================================
 
 Joel Becker <joel.becker@oracle.com>
 
@@ -9,7 +10,8 @@ Copyright (c) 2005 Oracle Corporation,
 	Joel Becker <joel.becker@oracle.com>
 
 
-[What is configfs?]
+What is configfs?
+=================
 
 configfs is a ram-based filesystem that provides the converse of
 sysfs's functionality.  Where sysfs is a filesystem-based view of
@@ -35,10 +37,11 @@ kernel modules backing the items must respond to this.
 Both sysfs and configfs can and should exist together on the same
 system.  One is not a replacement for the other.
 
-[Using configfs]
+Using configfs
+==============
 
 configfs can be compiled as a module or into the kernel.  You can access
-it by doing
+it by doing::
 
 	mount -t configfs none /config
 
@@ -56,28 +59,29 @@ values.  Don't mix more than one attribute in one attribute file.
 There are two types of configfs attributes:
 
 * Normal attributes, which similar to sysfs attributes, are small ASCII text
-files, with a maximum size of one page (PAGE_SIZE, 4096 on i386).  Preferably
-only one value per file should be used, and the same caveats from sysfs apply.
-Configfs expects write(2) to store the entire buffer at once.  When writing to
-normal configfs attributes, userspace processes should first read the entire
-file, modify the portions they wish to change, and then write the entire
-buffer back.
+  files, with a maximum size of one page (PAGE_SIZE, 4096 on i386).  Preferably
+  only one value per file should be used, and the same caveats from sysfs apply.
+  Configfs expects write(2) to store the entire buffer at once.  When writing to
+  normal configfs attributes, userspace processes should first read the entire
+  file, modify the portions they wish to change, and then write the entire
+  buffer back.
 
 * Binary attributes, which are somewhat similar to sysfs binary attributes,
-but with a few slight changes to semantics.  The PAGE_SIZE limitation does not
-apply, but the whole binary item must fit in single kernel vmalloc'ed buffer.
-The write(2) calls from user space are buffered, and the attributes'
-write_bin_attribute method will be invoked on the final close, therefore it is
-imperative for user-space to check the return code of close(2) in order to
-verify that the operation finished successfully.
-To avoid a malicious user OOMing the kernel, there's a per-binary attribute
-maximum buffer value.
+  but with a few slight changes to semantics.  The PAGE_SIZE limitation does not
+  apply, but the whole binary item must fit in single kernel vmalloc'ed buffer.
+  The write(2) calls from user space are buffered, and the attributes'
+  write_bin_attribute method will be invoked on the final close, therefore it is
+  imperative for user-space to check the return code of close(2) in order to
+  verify that the operation finished successfully.
+  To avoid a malicious user OOMing the kernel, there's a per-binary attribute
+  maximum buffer value.
 
 When an item needs to be destroyed, remove it with rmdir(2).  An
 item cannot be destroyed if any other item has a link to it (via
 symlink(2)).  Links can be removed via unlink(2).
 
-[Configuring FakeNBD: an Example]
+Configuring FakeNBD: an Example
+===============================
 
 Imagine there's a Network Block Device (NBD) driver that allows you to
 access remote block devices.  Call it FakeNBD.  FakeNBD uses configfs
@@ -86,14 +90,14 @@ sysadmins use to configure FakeNBD, but somehow that program has to tell
 the driver about it.  Here's where configfs comes in.
 
 When the FakeNBD driver is loaded, it registers itself with configfs.
-readdir(3) sees this just fine:
+readdir(3) sees this just fine::
 
 	# ls /config
 	fakenbd
 
 A fakenbd connection can be created with mkdir(2).  The name is
 arbitrary, but likely the tool will make some use of the name.  Perhaps
-it is a uuid or a disk name:
+it is a uuid or a disk name::
 
 	# mkdir /config/fakenbd/disk1
 	# ls /config/fakenbd/disk1
@@ -102,7 +106,7 @@ it is a uuid or a disk name:
 The target attribute contains the IP address of the server FakeNBD will
 connect to.  The device attribute is the device on the server.
 Predictably, the rw attribute determines whether the connection is
-read-only or read-write.
+read-only or read-write::
 
 	# echo 10.0.0.1 > /config/fakenbd/disk1/target
 	# echo /dev/sda1 > /config/fakenbd/disk1/device
@@ -111,7 +115,8 @@ read-only or read-write.
 That's it.  That's all there is.  Now the device is configured, via the
 shell no less.
 
-[Coding With configfs]
+Coding With configfs
+====================
 
 Every object in configfs is a config_item.  A config_item reflects an
 object in the subsystem.  It has attributes that match values on that
@@ -130,7 +135,10 @@ appears as a directory at the top of the configfs filesystem.  A
 subsystem is also a config_group, and can do everything a config_group
 can.
 
-[struct config_item]
+struct config_item
+==================
+
+::
 
 	struct config_item {
 		char                    *ci_name;
@@ -168,7 +176,10 @@ By itself, a config_item cannot do much more than appear in configfs.
 Usually a subsystem wants the item to display and/or store attributes,
 among other things.  For that, it needs a type.
 
-[struct config_item_type]
+struct config_item_type
+=======================
+
+::
 
 	struct configfs_item_operations {
 		void (*release)(struct config_item *);
@@ -192,7 +203,10 @@ allocated dynamically will need to provide the ct_item_ops->release()
 method.  This method is called when the config_item's reference count
 reaches zero.
 
-[struct configfs_attribute]
+struct configfs_attribute
+=========================
+
+::
 
 	struct configfs_attribute {
 		char                    *ca_name;
@@ -214,7 +228,10 @@ be called whenever userspace asks for a read(2) on the attribute.  If an
 attribute is writable and provides a ->store  method, that method will be
 be called whenever userspace asks for a write(2) on the attribute.
 
-[struct configfs_bin_attribute]
+struct configfs_bin_attribute
+=============================
+
+::
 
 	struct configfs_bin_attribute {
 		struct configfs_attribute	cb_attr;
@@ -240,11 +257,12 @@ will happen for write(2). The reads/writes are bufferred so only a
 single read/write will occur; the attributes' need not concern itself
 with it.
 
-[struct config_group]
+struct config_group
+===================
 
 A config_item cannot live in a vacuum.  The only way one can be created
 is via mkdir(2) on a config_group.  This will trigger creation of a
-child item.
+child item::
 
 	struct config_group {
 		struct config_item		cg_item;
@@ -264,7 +282,7 @@ The config_group structure contains a config_item.  Properly configuring
 that item means that a group can behave as an item in its own right.
 However, it can do more: it can create child items or groups.  This is
 accomplished via the group operations specified on the group's
-config_item_type.
+config_item_type::
 
 	struct configfs_group_operations {
 		struct config_item *(*make_item)(struct config_group *group,
@@ -279,7 +297,8 @@ config_item_type.
 	};
 
 A group creates child items by providing the
-ct_group_ops->make_item() method.  If provided, this method is called from mkdir(2) in the group's directory.  The subsystem allocates a new
+ct_group_ops->make_item() method.  If provided, this method is called from
+mkdir(2) in the group's directory.  The subsystem allocates a new
 config_item (or more likely, its container structure), initializes it,
 and returns it to configfs.  Configfs will then populate the filesystem
 tree to reflect the new item.
@@ -296,13 +315,14 @@ upon item allocation.  If a subsystem has no work to do, it may omit
 the ct_group_ops->drop_item() method, and configfs will call
 config_item_put() on the item on behalf of the subsystem.
 
-IMPORTANT: drop_item() is void, and as such cannot fail.  When rmdir(2)
-is called, configfs WILL remove the item from the filesystem tree
-(assuming that it has no children to keep it busy).  The subsystem is
-responsible for responding to this.  If the subsystem has references to
-the item in other threads, the memory is safe.  It may take some time
-for the item to actually disappear from the subsystem's usage.  But it
-is gone from configfs.
+Important:
+   drop_item() is void, and as such cannot fail.  When rmdir(2)
+   is called, configfs WILL remove the item from the filesystem tree
+   (assuming that it has no children to keep it busy).  The subsystem is
+   responsible for responding to this.  If the subsystem has references to
+   the item in other threads, the memory is safe.  It may take some time
+   for the item to actually disappear from the subsystem's usage.  But it
+   is gone from configfs.
 
 When drop_item() is called, the item's linkage has already been torn
 down.  It no longer has a reference on its parent and has no place in
@@ -319,10 +339,11 @@ is implemented in the configfs rmdir(2) code.  ->drop_item() will not be
 called, as the item has not been dropped.  rmdir(2) will fail, as the
 directory is not empty.
 
-[struct configfs_subsystem]
+struct configfs_subsystem
+=========================
 
 A subsystem must register itself, usually at module_init time.  This
-tells configfs to make the subsystem appear in the file tree.
+tells configfs to make the subsystem appear in the file tree::
 
 	struct configfs_subsystem {
 		struct config_group	su_group;
@@ -332,17 +353,19 @@ tells configfs to make the subsystem appear in the file tree.
 	int configfs_register_subsystem(struct configfs_subsystem *subsys);
 	void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
 
-	A subsystem consists of a toplevel config_group and a mutex.
+A subsystem consists of a toplevel config_group and a mutex.
 The group is where child config_items are created.  For a subsystem,
 this group is usually defined statically.  Before calling
 configfs_register_subsystem(), the subsystem must have initialized the
 group via the usual group _init() functions, and it must also have
 initialized the mutex.
-	When the register call returns, the subsystem is live, and it
+
+When the register call returns, the subsystem is live, and it
 will be visible via configfs.  At that point, mkdir(2) can be called and
 the subsystem must be ready for it.
 
-[An Example]
+An Example
+==========
 
 The best example of these basic concepts is the simple_children
 subsystem/group and the simple_child item in
@@ -350,7 +373,8 @@ samples/configfs/configfs_sample.c. It shows a trivial object displaying
 and storing an attribute, and a simple group creating and destroying
 these children.
 
-[Hierarchy Navigation and the Subsystem Mutex]
+Hierarchy Navigation and the Subsystem Mutex
+============================================
 
 There is an extra bonus that configfs provides.  The config_groups and
 config_items are arranged in a hierarchy due to the fact that they
@@ -375,7 +399,8 @@ be in its parent's cg_children list for the same duration.  This allows
 a subsystem to trust ci_parent and cg_children while they hold the
 mutex.
 
-[Item Aggregation Via symlink(2)]
+Item Aggregation Via symlink(2)
+===============================
 
 configfs provides a simple group via the group->item parent/child
 relationship.  Often, however, a larger environment requires aggregation
@@ -403,7 +428,8 @@ A config_item cannot be removed while it links to any other item, nor
 can it be removed while an item links to it.  Dangling symlinks are not
 allowed in configfs.
 
-[Automatically Created Subgroups]
+Automatically Created Subgroups
+===============================
 
 A new config_group may want to have two types of child config_items.
 While this could be codified by magic names in ->make_item(), it is much
@@ -433,7 +459,8 @@ As a consequence of this, default groups cannot be removed directly via
 rmdir(2).  They also are not considered when rmdir(2) on the parent
 group is checking for children.
 
-[Dependent Subsystems]
+Dependent Subsystems
+====================
 
 Sometimes other drivers depend on particular configfs items.  For
 example, ocfs2 mounts depend on a heartbeat region item.  If that
@@ -460,9 +487,11 @@ succeeds, then heartbeat knows the region is safe to give to ocfs2.
 If it fails, it was being torn down anyway, and heartbeat can gracefully
 pass up an error.
 
-[Committable Items]
+Committable Items
+=================
 
-NOTE: Committable items are currently unimplemented.
+Note:
+     Committable items are currently unimplemented.
 
 Some config_items cannot have a valid initial state.  That is, no
 default values can be specified for the item's attributes such that the
@@ -504,5 +533,3 @@ As rmdir(2) does not work in the "live" directory, an item must be
 shutdown, or "uncommitted".  Again, this is done via rename(2), this
 time from the "live" directory back to the "pending" one.  The subsystem
 is notified by the ct_group_ops->uncommit_object() method.
-
-
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
index 679729442fd2..8e2670781c9b 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -20,8 +20,144 @@ Usage
 If you have a block device which supports DAX, you can make a filesystem
 on it as usual.  The DAX code currently only supports files with a block
 size equal to your kernel's PAGE_SIZE, so you may need to specify a block
-size when creating the filesystem.  When mounting it, use the "-o dax"
-option on the command line or add 'dax' to the options in /etc/fstab.
+size when creating the filesystem.
+
+Currently 3 filesystems support DAX: ext2, ext4 and xfs.  Enabling DAX on them
+is different.
+
+Enabling DAX on ext4 and ext2
+-----------------------------
+
+When mounting the filesystem, use the "-o dax" option on the command line or
+add 'dax' to the options in /etc/fstab.  This works to enable DAX on all files
+within the filesystem.  It is equivalent to the '-o dax=always' behavior below.
+
+
+Enabling DAX on xfs
+-------------------
+
+Summary
+-------
+
+ 1. There exists an in-kernel file access mode flag S_DAX that corresponds to
+    the statx flag STATX_ATTR_DAX.  See the manpage for statx(2) for details
+    about this access mode.
+
+ 2. There exists a persistent flag FS_XFLAG_DAX that can be applied to regular
+    files and directories. This advisory flag can be set or cleared at any
+    time, but doing so does not immediately affect the S_DAX state.
+
+ 3. If the persistent FS_XFLAG_DAX flag is set on a directory, this flag will
+    be inherited by all regular files and subdirectories that are subsequently
+    created in this directory. Files and subdirectories that exist at the time
+    this flag is set or cleared on the parent directory are not modified by
+    this modification of the parent directory.
+
+ 4. There exist dax mount options which can override FS_XFLAG_DAX in the
+    setting of the S_DAX flag.  Given underlying storage which supports DAX the
+    following hold:
+
+    "-o dax=inode"  means "follow FS_XFLAG_DAX" and is the default.
+
+    "-o dax=never"  means "never set S_DAX, ignore FS_XFLAG_DAX."
+
+    "-o dax=always" means "always set S_DAX ignore FS_XFLAG_DAX."
+
+    "-o dax"        is a legacy option which is an alias for "dax=always".
+		    This may be removed in the future so "-o dax=always" is
+		    the preferred method for specifying this behavior.
+
+    NOTE: Modifications to and the inheritance behavior of FS_XFLAG_DAX remain
+    the same even when the filesystem is mounted with a dax option.  However,
+    in-core inode state (S_DAX) will be overridden until the filesystem is
+    remounted with dax=inode and the inode is evicted from kernel memory.
+
+ 5. The S_DAX policy can be changed via:
+
+    a) Setting the parent directory FS_XFLAG_DAX as needed before files are
+       created
+
+    b) Setting the appropriate dax="foo" mount option
+
+    c) Changing the FS_XFLAG_DAX flag on existing regular files and
+       directories.  This has runtime constraints and limitations that are
+       described in 6) below.
+
+ 6. When changing the S_DAX policy via toggling the persistent FS_XFLAG_DAX flag,
+    the change in behaviour for existing regular files may not occur
+    immediately.  If the change must take effect immediately, the administrator
+    needs to:
+
+    a) stop the application so there are no active references to the data set
+       the policy change will affect
+
+    b) evict the data set from kernel caches so it will be re-instantiated when
+       the application is restarted. This can be achieved by:
+
+       i. drop-caches
+       ii. a filesystem unmount and mount cycle
+       iii. a system reboot
+
+
+Details
+-------
+
+There are 2 per-file dax flags.  One is a persistent inode setting (FS_XFLAG_DAX)
+and the other is a volatile flag indicating the active state of the feature
+(S_DAX).
+
+FS_XFLAG_DAX is preserved within the filesystem.  This persistent config
+setting can be set, cleared and/or queried using the FS_IOC_FS[GS]ETXATTR ioctl
+(see ioctl_xfs_fsgetxattr(2)) or an utility such as 'xfs_io'.
+
+New files and directories automatically inherit FS_XFLAG_DAX from
+their parent directory _when_ _created_.  Therefore, setting FS_XFLAG_DAX at
+directory creation time can be used to set a default behavior for an entire
+sub-tree.
+
+To clarify inheritance, here are 3 examples:
+
+Example A:
+
+mkdir -p a/b/c
+xfs_io -c 'chattr +x' a
+mkdir a/b/c/d
+mkdir a/e
+
+	dax: a,e
+	no dax: b,c,d
+
+Example B:
+
+mkdir a
+xfs_io -c 'chattr +x' a
+mkdir -p a/b/c/d
+
+	dax: a,b,c,d
+	no dax:
+
+Example C:
+
+mkdir -p a/b/c
+xfs_io -c 'chattr +x' c
+mkdir a/b/c/d
+
+	dax: c,d
+	no dax: a,b
+
+
+The current enabled state (S_DAX) is set when a file inode is instantiated in
+memory by the kernel.  It is set based on the underlying media support, the
+value of FS_XFLAG_DAX and the filesystem's dax mount option.
+
+statx can be used to query S_DAX.  NOTE that only regular files will ever have
+S_DAX set and therefore statx will never indicate that S_DAX is set on
+directories.
+
+Setting the FS_XFLAG_DAX flag (specifically or through inheritance) occurs even
+if the underlying media does not support dax and/or the filesystem is
+overridden with a mount option.
+
 
 
 Implementation Tips for Block Driver Writers
@@ -74,7 +210,7 @@ are zeroed out and converted to written extents before being returned to avoid
 exposure of uninitialized data through mmap.
 
 These filesystems may be used for inspiration:
-- ext2: see Documentation/filesystems/ext2.txt
+- ext2: see Documentation/filesystems/ext2.rst
 - ext4: see Documentation/filesystems/ext4/
 - xfs:  see Documentation/admin-guide/xfs.rst
 
@@ -94,7 +230,7 @@ sysadmins have an option to restore the lost data from a prior backup/inbuilt
 redundancy in the following ways:
 
 1. Delete the affected file, and restore from a backup (sysadmin route):
-   This will free the file system blocks that were being used by the file,
+   This will free the filesystem blocks that were being used by the file,
    and the next time they're allocated, they will be zeroed first, which
    happens through the driver, and will clear bad sectors.
 
diff --git a/Documentation/filesystems/debugfs.rst b/Documentation/filesystems/debugfs.rst
index 6c032db235a5..1da7a4b7383d 100644
--- a/Documentation/filesystems/debugfs.rst
+++ b/Documentation/filesystems/debugfs.rst
@@ -166,16 +166,17 @@ file::
     };
 
     struct debugfs_regset32 {
-	struct debugfs_reg32 *regs;
+	const struct debugfs_reg32 *regs;
 	int nregs;
 	void __iomem *base;
+	struct device *dev;     /* Optional device for Runtime PM */
     };
 
     debugfs_create_regset32(const char *name, umode_t mode,
 			    struct dentry *parent,
 			    struct debugfs_regset32 *regset);
 
-    void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+    void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
 			 int nregs, void __iomem *base, char *prefix);
 
 The "base" argument may be 0, but you may want to build the reg32 array
diff --git a/Documentation/filesystems/devpts.rst b/Documentation/filesystems/devpts.rst
new file mode 100644
index 000000000000..a03248ddfb4c
--- /dev/null
+++ b/Documentation/filesystems/devpts.rst
@@ -0,0 +1,36 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+The Devpts Filesystem
+=====================
+
+Each mount of the devpts filesystem is now distinct such that ptys
+and their indicies allocated in one mount are independent from ptys
+and their indicies in all other mounts.
+
+All mounts of the devpts filesystem now create a ``/dev/pts/ptmx`` node
+with permissions ``0000``.
+
+To retain backwards compatibility the a ptmx device node (aka any node
+created with ``mknod name c 5 2``) when opened will look for an instance
+of devpts under the name ``pts`` in the same directory as the ptmx device
+node.
+
+As an option instead of placing a ``/dev/ptmx`` device node at ``/dev/ptmx``
+it is possible to place a symlink to ``/dev/pts/ptmx`` at ``/dev/ptmx`` or
+to bind mount ``/dev/ptx/ptmx`` to ``/dev/ptmx``.  If you opt for using
+the devpts filesystem in this manner devpts should be mounted with
+the ``ptmxmode=0666``, or ``chmod 0666 /dev/pts/ptmx`` should be called.
+
+Total count of pty pairs in all instances is limited by sysctls::
+
+    kernel.pty.max = 4096	- global limit
+    kernel.pty.reserve = 1024	- reserved for filesystems mounted from the initial mount namespace
+    kernel.pty.nr		- current count of ptys
+
+Per-instance limit could be set by adding mount option ``max=<count>``.
+
+This feature was added in kernel 3.4 together with
+``sysctl kernel.pty.reserve``.
+
+In kernels older than 3.4 sysctl ``kernel.pty.max`` works as per-instance limit.
diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt
deleted file mode 100644
index 9f94fe276dea..000000000000
--- a/Documentation/filesystems/devpts.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Each mount of the devpts filesystem is now distinct such that ptys
-and their indicies allocated in one mount are independent from ptys
-and their indicies in all other mounts.
-
-All mounts of the devpts filesystem now create a /dev/pts/ptmx node
-with permissions 0000.
-
-To retain backwards compatibility the a ptmx device node (aka any node
-created with "mknod name c 5 2") when opened will look for an instance
-of devpts under the name "pts" in the same directory as the ptmx device
-node.
-
-As an option instead of placing a /dev/ptmx device node at /dev/ptmx
-it is possible to place a symlink to /dev/pts/ptmx at /dev/ptmx or
-to bind mount /dev/ptx/ptmx to /dev/ptmx.  If you opt for using
-the devpts filesystem in this manner devpts should be mounted with
-the ptmxmode=0666, or chmod 0666 /dev/pts/ptmx should be called.
-
-Total count of pty pairs in all instances is limited by sysctls:
-kernel.pty.max = 4096		- global limit
-kernel.pty.reserve = 1024	- reserved for filesystems mounted from the initial mount namespace
-kernel.pty.nr			- current count of ptys
-
-Per-instance limit could be set by adding mount option "max=<count>".
-This feature was added in kernel 3.4 together with sysctl kernel.pty.reserve.
-In kernels older than 3.4 sysctl kernel.pty.max works as per-instance limit.
diff --git a/Documentation/filesystems/dnotify.txt b/Documentation/filesystems/dnotify.rst
index 15156883d321..a28a1f9ef79c 100644
--- a/Documentation/filesystems/dnotify.txt
+++ b/Documentation/filesystems/dnotify.rst
@@ -1,5 +1,8 @@
-		Linux Directory Notification
-		============================
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Linux Directory Notification
+============================
 
 	   Stephen Rothwell <sfr@canb.auug.org.au>
 
@@ -12,6 +15,7 @@ being delivered using signals.
 The application decides which "events" it wants to be notified about.
 The currently defined events are:
 
+	=========	=====================================================
 	DN_ACCESS	A file in the directory was accessed (read)
 	DN_MODIFY	A file in the directory was modified (write,truncate)
 	DN_CREATE	A file was created in the directory
@@ -19,6 +23,7 @@ The currently defined events are:
 	DN_RENAME	A file in the directory was renamed
 	DN_ATTRIB	A file in the directory had its attributes
 			changed (chmod,chown)
+	=========	=====================================================
 
 Usually, the application must reregister after each notification, but
 if DN_MULTISHOT is or'ed with the event mask, then the registration will
@@ -36,7 +41,7 @@ especially important if DN_MULTISHOT is specified.  Note that SIGRTMIN
 is often blocked, so it is better to use (at least) SIGRTMIN + 1.
 
 Implementation expectations (features and bugs :-))
----------------------------
+---------------------------------------------------
 
 The notification should work for any local access to files even if the
 actual file system is on a remote server.  This implies that remote
@@ -67,4 +72,4 @@ See tools/testing/selftests/filesystems/dnotify_test.c for an example.
 NOTE
 ----
 Beginning with Linux 2.6.13, dnotify has been replaced by inotify.
-See Documentation/filesystems/inotify.txt for more information on it.
+See Documentation/filesystems/inotify.rst for more information on it.
diff --git a/Documentation/filesystems/efivarfs.rst b/Documentation/filesystems/efivarfs.rst
index 90ac65683e7e..0551985821b8 100644
--- a/Documentation/filesystems/efivarfs.rst
+++ b/Documentation/filesystems/efivarfs.rst
@@ -24,3 +24,20 @@ files that are not well-known standardized variables are created
 as immutable files.  This doesn't prevent removal - "chattr -i" will work -
 but it does prevent this kind of failure from being accomplished
 accidentally.
+
+.. warning ::
+      When a content of an UEFI variable in /sys/firmware/efi/efivars is
+      displayed, for example using "hexdump", pay attention that the first
+      4 bytes of the output represent the UEFI variable attributes,
+      in little-endian format.
+
+      Practically the output of each efivar is composed of:
+
+          +-----------------------------------+
+          |4_bytes_of_attributes + efivar_data|
+          +-----------------------------------+
+
+*See also:*
+
+- Documentation/admin-guide/acpi/ssdt-overlays.rst
+- Documentation/ABI/stable/sysfs-firmware-efi-vars
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 87d794bc75a4..4218ac658629 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -225,8 +225,12 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
                        pass, but the performance will regress. "nobarrier" is
                        based on "posix", but doesn't issue flush command for
                        non-atomic files likewise "nobarrier" mount option.
-test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
+test_dummy_encryption
+test_dummy_encryption=%s
+                       Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
+                       The argument may be either "v1" or "v2", in order to
+                       select the corresponding fscrypt policy version.
 checkpoint=%s[:%u[%]]  Set to "disable" to turn off checkpointing. Set to "enable"
                        to reenable checkpointing. Is enabled by default. While
                        disabled, any unmounting or unexpected shutdowns will cause
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.rst
index ac87e6fda842..2a572e7edc08 100644
--- a/Documentation/filesystems/fiemap.txt
+++ b/Documentation/filesystems/fiemap.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ============
 Fiemap Ioctl
 ============
@@ -10,9 +12,9 @@ returns a list of extents.
 Request Basics
 --------------
 
-A fiemap request is encoded within struct fiemap:
+A fiemap request is encoded within struct fiemap::
 
-struct fiemap {
+  struct fiemap {
 	__u64	fm_start;	 /* logical offset (inclusive) at
 				  * which to start mapping (in) */
 	__u64	fm_length;	 /* logical length of mapping which
@@ -23,7 +25,7 @@ struct fiemap {
 	__u32	fm_extent_count; /* size of fm_extents array (in) */
 	__u32	fm_reserved;
 	struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
-};
+  };
 
 
 fm_start, and fm_length specify the logical range within the file
@@ -51,12 +53,12 @@ nothing to prevent the file from changing between calls to FIEMAP.
 
 The following flags can be set in fm_flags:
 
-* FIEMAP_FLAG_SYNC
-If this flag is set, the kernel will sync the file before mapping extents.
+FIEMAP_FLAG_SYNC
+  If this flag is set, the kernel will sync the file before mapping extents.
 
-* FIEMAP_FLAG_XATTR
-If this flag is set, the extents returned will describe the inodes
-extended attribute lookup tree, instead of its data tree.
+FIEMAP_FLAG_XATTR
+  If this flag is set, the extents returned will describe the inodes
+  extended attribute lookup tree, instead of its data tree.
 
 
 Extent Mapping
@@ -75,18 +77,18 @@ complete the requested range and will not have the FIEMAP_EXTENT_LAST
 flag set (see the next section on extent flags).
 
 Each extent is described by a single fiemap_extent structure as
-returned in fm_extents.
-
-struct fiemap_extent {
-	__u64	fe_logical;  /* logical offset in bytes for the start of
-			      * the extent */
-	__u64	fe_physical; /* physical offset in bytes for the start
-			      * of the extent */
-	__u64	fe_length;   /* length in bytes for the extent */
-	__u64	fe_reserved64[2];
-	__u32	fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
-	__u32	fe_reserved[3];
-};
+returned in fm_extents::
+
+    struct fiemap_extent {
+	    __u64	fe_logical;  /* logical offset in bytes for the start of
+				* the extent */
+	    __u64	fe_physical; /* physical offset in bytes for the start
+				* of the extent */
+	    __u64	fe_length;   /* length in bytes for the extent */
+	    __u64	fe_reserved64[2];
+	    __u32	fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
+	    __u32	fe_reserved[3];
+    };
 
 All offsets and lengths are in bytes and mirror those on disk.  It is valid
 for an extents logical offset to start before the request or its logical
@@ -114,26 +116,27 @@ worry about all present and future flags which might imply unaligned
 data. Note that the opposite is not true - it would be valid for
 FIEMAP_EXTENT_NOT_ALIGNED to appear alone.
 
-* FIEMAP_EXTENT_LAST
-This is generally the last extent in the file. A mapping attempt past
-this extent may return nothing. Some implementations set this flag to
-indicate this extent is the last one in the range queried by the user
-(via fiemap->fm_length).
+FIEMAP_EXTENT_LAST
+  This is generally the last extent in the file. A mapping attempt past
+  this extent may return nothing. Some implementations set this flag to
+  indicate this extent is the last one in the range queried by the user
+  (via fiemap->fm_length).
+
+FIEMAP_EXTENT_UNKNOWN
+  The location of this extent is currently unknown. This may indicate
+  the data is stored on an inaccessible volume or that no storage has
+  been allocated for the file yet.
 
-* FIEMAP_EXTENT_UNKNOWN
-The location of this extent is currently unknown. This may indicate
-the data is stored on an inaccessible volume or that no storage has
-been allocated for the file yet.
+FIEMAP_EXTENT_DELALLOC
+  This will also set FIEMAP_EXTENT_UNKNOWN.
 
-* FIEMAP_EXTENT_DELALLOC
-  - This will also set FIEMAP_EXTENT_UNKNOWN.
-Delayed allocation - while there is data for this extent, its
-physical location has not been allocated yet.
+  Delayed allocation - while there is data for this extent, its
+  physical location has not been allocated yet.
 
-* FIEMAP_EXTENT_ENCODED
-This extent does not consist of plain filesystem blocks but is
-encoded (e.g. encrypted or compressed).  Reading the data in this
-extent via I/O to the block device will have undefined results.
+FIEMAP_EXTENT_ENCODED
+  This extent does not consist of plain filesystem blocks but is
+  encoded (e.g. encrypted or compressed).  Reading the data in this
+  extent via I/O to the block device will have undefined results.
 
 Note that it is *always* undefined to try to update the data
 in-place by writing to the indicated location without the
@@ -145,32 +148,32 @@ unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is
 clear; user applications must not try reading or writing to the
 filesystem via the block device under any other circumstances.
 
-* FIEMAP_EXTENT_DATA_ENCRYPTED
-  - This will also set FIEMAP_EXTENT_ENCODED
-The data in this extent has been encrypted by the file system.
+FIEMAP_EXTENT_DATA_ENCRYPTED
+  This will also set FIEMAP_EXTENT_ENCODED
+  The data in this extent has been encrypted by the file system.
 
-* FIEMAP_EXTENT_NOT_ALIGNED
-Extent offsets and length are not guaranteed to be block aligned.
+FIEMAP_EXTENT_NOT_ALIGNED
+  Extent offsets and length are not guaranteed to be block aligned.
 
-* FIEMAP_EXTENT_DATA_INLINE
+FIEMAP_EXTENT_DATA_INLINE
   This will also set FIEMAP_EXTENT_NOT_ALIGNED
-Data is located within a meta data block.
+  Data is located within a meta data block.
 
-* FIEMAP_EXTENT_DATA_TAIL
+FIEMAP_EXTENT_DATA_TAIL
   This will also set FIEMAP_EXTENT_NOT_ALIGNED
-Data is packed into a block with data from other files.
+  Data is packed into a block with data from other files.
 
-* FIEMAP_EXTENT_UNWRITTEN
-Unwritten extent - the extent is allocated but its data has not been
-initialized.  This indicates the extent's data will be all zero if read
-through the filesystem but the contents are undefined if read directly from
-the device.
+FIEMAP_EXTENT_UNWRITTEN
+  Unwritten extent - the extent is allocated but its data has not been
+  initialized.  This indicates the extent's data will be all zero if read
+  through the filesystem but the contents are undefined if read directly from
+  the device.
 
-* FIEMAP_EXTENT_MERGED
-This will be set when a file does not support extents, i.e., it uses a block
-based addressing scheme.  Since returning an extent for each block back to
-userspace would be highly inefficient, the kernel will try to merge most
-adjacent blocks into 'extents'.
+FIEMAP_EXTENT_MERGED
+  This will be set when a file does not support extents, i.e., it uses a block
+  based addressing scheme.  Since returning an extent for each block back to
+  userspace would be highly inefficient, the kernel will try to merge most
+  adjacent blocks into 'extents'.
 
 
 VFS -> File System Implementation
@@ -179,23 +182,23 @@ VFS -> File System Implementation
 File systems wishing to support fiemap must implement a ->fiemap callback on
 their inode_operations structure. The fs ->fiemap call is responsible for
 defining its set of supported fiemap flags, and calling a helper function on
-each discovered extent:
+each discovered extent::
 
-struct inode_operations {
+  struct inode_operations {
        ...
 
        int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
                      u64 len);
 
 ->fiemap is passed struct fiemap_extent_info which describes the
-fiemap request:
+fiemap request::
 
-struct fiemap_extent_info {
+  struct fiemap_extent_info {
 	unsigned int fi_flags;		/* Flags as passed from user */
 	unsigned int fi_extents_mapped;	/* Number of mapped extents */
 	unsigned int fi_extents_max;	/* Size of fiemap_extent array */
 	struct fiemap_extent *fi_extents_start;	/* Start of fiemap_extent array */
-};
+  };
 
 It is intended that the file system should not need to access any of this
 structure directly. Filesystem handlers should be tolerant to signals and return
@@ -203,9 +206,9 @@ EINTR once fatal signal received.
 
 
 Flag checking should be done at the beginning of the ->fiemap callback via the
-fiemap_check_flags() helper:
+fiemap_check_flags() helper::
 
-int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
+  int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
 
 The struct fieinfo should be passed in as received from ioctl_fiemap(). The
 set of fiemap flags which the fs understands should be passed via fs_flags. If
@@ -216,10 +219,10 @@ ioctl_fiemap().
 
 
 For each extent in the request range, the file system should call
-the helper function, fiemap_fill_next_extent():
+the helper function, fiemap_fill_next_extent()::
 
-int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
-			    u64 phys, u64 len, u32 flags, u32 dev);
+  int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
+			      u64 phys, u64 len, u32 flags, u32 dev);
 
 fiemap_fill_next_extent() will use the passed values to populate the
 next free extent in the fm_extents array. 'General' extent flags will
diff --git a/Documentation/filesystems/files.txt b/Documentation/filesystems/files.rst
index 46dfc6b038c3..cbf8e57376bf 100644
--- a/Documentation/filesystems/files.txt
+++ b/Documentation/filesystems/files.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
 File management in the Linux kernel
------------------------------------
+===================================
 
 This document describes how locking for files (struct file)
 and file descriptor table (struct files) works.
@@ -34,7 +37,7 @@ appear atomic. Here are the locking rules for
 the fdtable structure -
 
 1. All references to the fdtable must be done through
-   the files_fdtable() macro :
+   the files_fdtable() macro::
 
 	struct fdtable *fdt;
 
@@ -61,7 +64,8 @@ the fdtable structure -
 4. To look up the file structure given an fd, a reader
    must use either fcheck() or fcheck_files() APIs. These
    take care of barrier requirements due to lock-free lookup.
-   An example :
+
+   An example::
 
 	struct file *file;
 
@@ -77,7 +81,7 @@ the fdtable structure -
    of the fd (fget()/fget_light()) are lock-free, it is possible
    that look-up may race with the last put() operation on the
    file structure. This is avoided using atomic_long_inc_not_zero()
-   on ->f_count :
+   on ->f_count::
 
 	rcu_read_lock();
 	file = fcheck_files(files, fd);
@@ -106,7 +110,8 @@ the fdtable structure -
    holding files->file_lock. If ->file_lock is dropped, then
    another thread expand the files thereby creating a new
    fdtable and making the earlier fdtable pointer stale.
-   For example :
+
+   For example::
 
 	spin_lock(&files->file_lock);
 	fd = locate_fd(files, file, start);
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index aa072112cfff..f517af8ec11c 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -292,8 +292,22 @@ files' data differently, inode numbers are included in the IVs.
 Consequently, shrinking the filesystem may not be allowed.
 
 This format is optimized for use with inline encryption hardware
-compliant with the UFS or eMMC standards, which support only 64 IV
-bits per I/O request and may have only a small number of keyslots.
+compliant with the UFS standard, which supports only 64 IV bits per
+I/O request and may have only a small number of keyslots.
+
+IV_INO_LBLK_32 policies
+-----------------------
+
+IV_INO_LBLK_32 policies work like IV_INO_LBLK_64, except that for
+IV_INO_LBLK_32, the inode number is hashed with SipHash-2-4 (where the
+SipHash key is derived from the master key) and added to the file
+logical block number mod 2^32 to produce a 32-bit IV.
+
+This format is optimized for use with inline encryption hardware
+compliant with the eMMC v5.2 standard, which supports only 32 IV bits
+per I/O request and may have only a small number of keyslots.  This
+format results in some level of IV reuse, so it should only be used
+when necessary due to hardware limitations.
 
 Key identifiers
 ---------------
@@ -369,6 +383,10 @@ a little endian number, except that:
   to 32 bits and is placed in bits 0-31 of the IV.  The inode number
   (which is also limited to 32 bits) is placed in bits 32-63.
 
+- With `IV_INO_LBLK_32 policies`_, the logical block number is limited
+  to 32 bits and is placed in bits 0-31 of the IV.  The inode number
+  is then hashed and added mod 2^32.
+
 Note that because file logical block numbers are included in the IVs,
 filesystems must enforce that blocks are never shifted around within
 encrypted files, e.g. via "collapse range" or "insert range".
@@ -465,8 +483,15 @@ This structure must be initialized as follows:
     (0x3).
   - FSCRYPT_POLICY_FLAG_DIRECT_KEY: See `DIRECT_KEY policies`_.
   - FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64: See `IV_INO_LBLK_64
-    policies`_.  This is mutually exclusive with DIRECT_KEY and is not
-    supported on v1 policies.
+    policies`_.
+  - FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32: See `IV_INO_LBLK_32
+    policies`_.
+
+  v1 encryption policies only support the PAD_* and DIRECT_KEY flags.
+  The other flags are only supported by v2 encryption policies.
+
+  The DIRECT_KEY, IV_INO_LBLK_64, and IV_INO_LBLK_32 flags are
+  mutually exclusive.
 
 - For v2 encryption policies, ``__reserved`` must be zeroed.
 
diff --git a/Documentation/filesystems/fuse-io.txt b/Documentation/filesystems/fuse-io.rst
index 07b8f73f100f..255a368fe534 100644
--- a/Documentation/filesystems/fuse-io.txt
+++ b/Documentation/filesystems/fuse-io.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Fuse I/O Modes
+==============
+
 Fuse supports the following I/O modes:
 
 - direct-io
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index e7b46dac7079..17795341e0a3 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -24,6 +24,22 @@ algorithms work.
    splice
    locking
    directory-locking
+   devpts
+   dnotify
+   fiemap
+   files
+   locks
+   mandatory-locking
+   mount_api
+   quota
+   seq_file
+   sharedsubtree
+   sysfs-pci
+   sysfs-tagging
+
+   automount-support
+
+   caching/index
 
    porting
 
@@ -57,7 +73,10 @@ Documentation for filesystem implementations.
    befs
    bfs
    btrfs
+   cifs/cifsroot
    ceph
+   coda
+   configfs
    cramfs
    debugfs
    dlmfs
@@ -73,6 +92,7 @@ Documentation for filesystem implementations.
    hfsplus
    hpfs
    fuse
+   fuse-io
    inotify
    isofs
    nilfs2
@@ -88,6 +108,7 @@ Documentation for filesystem implementations.
    ramfs-rootfs-initramfs
    relay
    romfs
+   spufs/index
    squashfs
    sysfs
    sysv-fs
@@ -97,4 +118,6 @@ Documentation for filesystem implementations.
    udf
    virtiofs
    vfat
+   xfs-delayed-logging-design
+   xfs-self-describing-metadata
    zonefs
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 5057e4d9dcd1..0af2e0e11461 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -239,6 +239,7 @@ prototypes::
 	int (*readpage)(struct file *, struct page *);
 	int (*writepages)(struct address_space *, struct writeback_control *);
 	int (*set_page_dirty)(struct page *page);
+	void (*readahead)(struct readahead_control *);
 	int (*readpages)(struct file *filp, struct address_space *mapping,
 			struct list_head *pages, unsigned nr_pages);
 	int (*write_begin)(struct file *, struct address_space *mapping,
@@ -271,7 +272,8 @@ writepage:		yes, unlocks (see below)
 readpage:		yes, unlocks
 writepages:
 set_page_dirty		no
-readpages:
+readahead:		yes, unlocks
+readpages:		no
 write_begin:		locks the page		 exclusive
 write_end:		yes, unlocks		 exclusive
 bmap:
@@ -295,6 +297,8 @@ the request handler (/dev/loop).
 ->readpage() unlocks the page, either synchronously or via I/O
 completion.
 
+->readahead() unlocks the pages that I/O is attempted on like ->readpage().
+
 ->readpages() populates the pagecache with the passed pages and starts
 I/O against them.  They come unlocked upon I/O completion.
 
diff --git a/Documentation/filesystems/locks.txt b/Documentation/filesystems/locks.rst
index 5368690f412e..c5ae858b1aac 100644
--- a/Documentation/filesystems/locks.txt
+++ b/Documentation/filesystems/locks.rst
@@ -1,4 +1,8 @@
-		      File Locking Release Notes
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+File Locking Release Notes
+==========================
 
 		Andy Walker <andy@lysaker.kvaerner.no>
 
@@ -6,7 +10,7 @@
 
 
 1. What's New?
---------------
+==============
 
 1.1 Broken Flock Emulation
 --------------------------
@@ -25,7 +29,7 @@ anyway (see the file "Documentation/process/changes.rst".)
 ---------------------------
 
 1.2.1 Typical Problems - Sendmail
----------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Because sendmail was unable to use the old flock() emulation, many sendmail
 installations use fcntl() instead of flock(). This is true of Slackware 3.0
 for example. This gave rise to some other subtle problems if sendmail was
@@ -37,7 +41,7 @@ to lock solid with deadlocked processes.
 
 
 1.2.2 The Solution
-------------------
+^^^^^^^^^^^^^^^^^^
 The solution I have chosen, after much experimentation and discussion,
 is to make flock() and fcntl() locks oblivious to each other. Both can
 exists, and neither will have any effect on the other.
@@ -54,7 +58,7 @@ fcntl(), with all the problems that implies.
 ---------------------------------------
 
 Mandatory locking, as described in
-'Documentation/filesystems/mandatory-locking.txt' was prior to this release a
+'Documentation/filesystems/mandatory-locking.rst' was prior to this release a
 general configuration option that was valid for all mounted filesystems.  This
 had a number of inherent dangers, not the least of which was the ability to
 freeze an NFS server by asking it to read a file for which a mandatory lock
diff --git a/Documentation/filesystems/mandatory-locking.txt b/Documentation/filesystems/mandatory-locking.rst
index a251ca33164a..9ce73544a8f0 100644
--- a/Documentation/filesystems/mandatory-locking.txt
+++ b/Documentation/filesystems/mandatory-locking.rst
@@ -1,8 +1,13 @@
-	Mandatory File Locking For The Linux Operating System
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================================
+Mandatory File Locking For The Linux Operating System
+=====================================================
 
 		Andy Walker <andy@lysaker.kvaerner.no>
 
 			   15 April 1996
+
 		     (Updated September 2007)
 
 0. Why you should avoid mandatory locking
@@ -53,15 +58,17 @@ possible on existing user code. The scheme is based on marking individual files
 as candidates for mandatory locking, and using the existing fcntl()/lockf()
 interface for applying locks just as if they were normal, advisory locks.
 
-Note 1: In saying "file" in the paragraphs above I am actually not telling
-the whole truth. System V locking is based on fcntl(). The granularity of
-fcntl() is such that it allows the locking of byte ranges in files, in addition
-to entire files, so the mandatory locking rules also have byte level
-granularity.
+.. Note::
+
+   1. In saying "file" in the paragraphs above I am actually not telling
+      the whole truth. System V locking is based on fcntl(). The granularity of
+      fcntl() is such that it allows the locking of byte ranges in files, in
+      addition to entire files, so the mandatory locking rules also have byte
+      level granularity.
 
-Note 2: POSIX.1 does not specify any scheme for mandatory locking, despite
-borrowing the fcntl() locking scheme from System V. The mandatory locking
-scheme is defined by the System V Interface Definition (SVID) Version 3.
+   2. POSIX.1 does not specify any scheme for mandatory locking, despite
+      borrowing the fcntl() locking scheme from System V. The mandatory locking
+      scheme is defined by the System V Interface Definition (SVID) Version 3.
 
 2. Marking a file for mandatory locking
 ---------------------------------------
diff --git a/Documentation/filesystems/mount_api.txt b/Documentation/filesystems/mount_api.rst
index 87c14bbb2b35..dea22d64f060 100644
--- a/Documentation/filesystems/mount_api.txt
+++ b/Documentation/filesystems/mount_api.rst
@@ -1,8 +1,10 @@
-			     ====================
-			     FILESYSTEM MOUNT API
-			     ====================
+.. SPDX-License-Identifier: GPL-2.0
 
-CONTENTS
+====================
+fILESYSTEM Mount API
+====================
+
+.. CONTENTS
 
  (1) Overview.
 
@@ -21,8 +23,7 @@ CONTENTS
  (8) Parameter helper functions.
 
 
-========
-OVERVIEW
+Overview
 ========
 
 The creation of new mounts is now to be done in a multistep process:
@@ -43,7 +44,7 @@ The creation of new mounts is now to be done in a multistep process:
 
  (7) Destroy the context.
 
-To support this, the file_system_type struct gains two new fields:
+To support this, the file_system_type struct gains two new fields::
 
 	int (*init_fs_context)(struct fs_context *fc);
 	const struct fs_parameter_description *parameters;
@@ -57,12 +58,11 @@ Note that security initialisation is done *after* the filesystem is called so
 that the namespaces may be adjusted first.
 
 
-======================
-THE FILESYSTEM CONTEXT
+The Filesystem context
 ======================
 
 The creation and reconfiguration of a superblock is governed by a filesystem
-context.  This is represented by the fs_context structure:
+context.  This is represented by the fs_context structure::
 
 	struct fs_context {
 		const struct fs_context_operations *ops;
@@ -86,78 +86,106 @@ context.  This is represented by the fs_context structure:
 
 The fs_context fields are as follows:
 
- (*) const struct fs_context_operations *ops
+   * ::
+
+       const struct fs_context_operations *ops
 
      These are operations that can be done on a filesystem context (see
      below).  This must be set by the ->init_fs_context() file_system_type
      operation.
 
- (*) struct file_system_type *fs_type
+   * ::
+
+       struct file_system_type *fs_type
 
      A pointer to the file_system_type of the filesystem that is being
      constructed or reconfigured.  This retains a reference on the type owner.
 
- (*) void *fs_private
+   * ::
+
+       void *fs_private
 
      A pointer to the file system's private data.  This is where the filesystem
      will need to store any options it parses.
 
- (*) struct dentry *root
+   * ::
+
+       struct dentry *root
 
      A pointer to the root of the mountable tree (and indirectly, the
      superblock thereof).  This is filled in by the ->get_tree() op.  If this
      is set, an active reference on root->d_sb must also be held.
 
- (*) struct user_namespace *user_ns
- (*) struct net *net_ns
+   * ::
+
+       struct user_namespace *user_ns
+       struct net *net_ns
 
      There are a subset of the namespaces in use by the invoking process.  They
      retain references on each namespace.  The subscribed namespaces may be
      replaced by the filesystem to reflect other sources, such as the parent
      mount superblock on an automount.
 
- (*) const struct cred *cred
+   * ::
+
+       const struct cred *cred
 
      The mounter's credentials.  This retains a reference on the credentials.
 
- (*) char *source
+   * ::
+
+       char *source
 
      This specifies the source.  It may be a block device (e.g. /dev/sda1) or
      something more exotic, such as the "host:/path" that NFS desires.
 
- (*) char *subtype
+   * ::
+
+       char *subtype
 
      This is a string to be added to the type displayed in /proc/mounts to
      qualify it (used by FUSE).  This is available for the filesystem to set if
      desired.
 
- (*) void *security
+   * ::
+
+       void *security
 
      A place for the LSMs to hang their security data for the superblock.  The
      relevant security operations are described below.
 
- (*) void *s_fs_info
+   * ::
+
+       void *s_fs_info
 
      The proposed s_fs_info for a new superblock, set in the superblock by
      sget_fc().  This can be used to distinguish superblocks.
 
- (*) unsigned int sb_flags
- (*) unsigned int sb_flags_mask
+   * ::
+
+       unsigned int sb_flags
+       unsigned int sb_flags_mask
 
      Which bits SB_* flags are to be set/cleared in super_block::s_flags.
 
- (*) unsigned int s_iflags
+   * ::
+
+       unsigned int s_iflags
 
      These will be bitwise-OR'd with s->s_iflags when a superblock is created.
 
- (*) enum fs_context_purpose
+   * ::
+
+       enum fs_context_purpose
 
      This indicates the purpose for which the context is intended.  The
      available values are:
 
-	FS_CONTEXT_FOR_MOUNT,		-- New superblock for explicit mount
-	FS_CONTEXT_FOR_SUBMOUNT		-- New automatic submount of extant mount
-	FS_CONTEXT_FOR_RECONFIGURE	-- Change an existing mount
+	==========================	======================================
+	FS_CONTEXT_FOR_MOUNT,		New superblock for explicit mount
+	FS_CONTEXT_FOR_SUBMOUNT		New automatic submount of extant mount
+	FS_CONTEXT_FOR_RECONFIGURE	Change an existing mount
+	==========================	======================================
 
 The mount context is created by calling vfs_new_fs_context() or
 vfs_dup_fs_context() and is destroyed with put_fs_context().  Note that the
@@ -176,11 +204,10 @@ mount context.  For instance, NFS might pin the appropriate protocol version
 module.
 
 
-=================================
-THE FILESYSTEM CONTEXT OPERATIONS
+The Filesystem Context Operations
 =================================
 
-The filesystem context points to a table of operations:
+The filesystem context points to a table of operations::
 
 	struct fs_context_operations {
 		void (*free)(struct fs_context *fc);
@@ -195,24 +222,32 @@ The filesystem context points to a table of operations:
 These operations are invoked by the various stages of the mount procedure to
 manage the filesystem context.  They are as follows:
 
- (*) void (*free)(struct fs_context *fc);
+   * ::
+
+	void (*free)(struct fs_context *fc);
 
      Called to clean up the filesystem-specific part of the filesystem context
      when the context is destroyed.  It should be aware that parts of the
      context may have been removed and NULL'd out by ->get_tree().
 
- (*) int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
+   * ::
+
+	int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
 
      Called when a filesystem context has been duplicated to duplicate the
      filesystem-private data.  An error may be returned to indicate failure to
      do this.
 
-     [!] Note that even if this fails, put_fs_context() will be called
+     .. Warning::
+
+         Note that even if this fails, put_fs_context() will be called
 	 immediately thereafter, so ->dup() *must* make the
 	 filesystem-private data safe for ->free().
 
- (*) int (*parse_param)(struct fs_context *fc,
-			struct struct fs_parameter *param);
+   * ::
+
+	int (*parse_param)(struct fs_context *fc,
+			   struct struct fs_parameter *param);
 
      Called when a parameter is being added to the filesystem context.  param
      points to the key name and maybe a value object.  VFS-specific options
@@ -224,7 +259,9 @@ manage the filesystem context.  They are as follows:
 
      If successful, 0 should be returned or a negative error code otherwise.
 
- (*) int (*parse_monolithic)(struct fs_context *fc, void *data);
+   * ::
+
+	int (*parse_monolithic)(struct fs_context *fc, void *data);
 
      Called when the mount(2) system call is invoked to pass the entire data
      page in one go.  If this is expected to be just a list of "key[=val]"
@@ -236,7 +273,9 @@ manage the filesystem context.  They are as follows:
      finds it's the standard key-val list then it may pass it off to
      generic_parse_monolithic().
 
- (*) int (*get_tree)(struct fs_context *fc);
+   * ::
+
+	int (*get_tree)(struct fs_context *fc);
 
      Called to get or create the mountable root and superblock, using the
      information stored in the filesystem context (reconfiguration goes via a
@@ -249,7 +288,9 @@ manage the filesystem context.  They are as follows:
      The phase on a userspace-driven context will be set to only allow this to
      be called once on any particular context.
 
- (*) int (*reconfigure)(struct fs_context *fc);
+   * ::
+
+	int (*reconfigure)(struct fs_context *fc);
 
      Called to effect reconfiguration of a superblock using information stored
      in the filesystem context.  It may detach any resources it desires from
@@ -259,19 +300,20 @@ manage the filesystem context.  They are as follows:
      On success it should return 0.  In the case of an error, it should return
      a negative error code.
 
-     [NOTE] reconfigure is intended as a replacement for remount_fs.
+     .. Note:: reconfigure is intended as a replacement for remount_fs.
 
 
-===========================
-FILESYSTEM CONTEXT SECURITY
+Filesystem context Security
 ===========================
 
 The filesystem context contains a security pointer that the LSMs can use for
 building up a security context for the superblock to be mounted.  There are a
 number of operations used by the new mount code for this purpose:
 
- (*) int security_fs_context_alloc(struct fs_context *fc,
-				   struct dentry *reference);
+   * ::
+
+	int security_fs_context_alloc(struct fs_context *fc,
+				      struct dentry *reference);
 
      Called to initialise fc->security (which is preset to NULL) and allocate
      any resources needed.  It should return 0 on success or a negative error
@@ -283,22 +325,28 @@ number of operations used by the new mount code for this purpose:
      non-NULL in the case of a submount (FS_CONTEXT_FOR_SUBMOUNT) in which case
      it indicates the automount point.
 
- (*) int security_fs_context_dup(struct fs_context *fc,
-				 struct fs_context *src_fc);
+   * ::
+
+	int security_fs_context_dup(struct fs_context *fc,
+				    struct fs_context *src_fc);
 
      Called to initialise fc->security (which is preset to NULL) and allocate
      any resources needed.  The original filesystem context is pointed to by
      src_fc and may be used for reference.  It should return 0 on success or a
      negative error code on failure.
 
- (*) void security_fs_context_free(struct fs_context *fc);
+   * ::
+
+	void security_fs_context_free(struct fs_context *fc);
 
      Called to clean up anything attached to fc->security.  Note that the
      contents may have been transferred to a superblock and the pointer cleared
      during get_tree.
 
- (*) int security_fs_context_parse_param(struct fs_context *fc,
-					 struct fs_parameter *param);
+   * ::
+
+	int security_fs_context_parse_param(struct fs_context *fc,
+					    struct fs_parameter *param);
 
      Called for each mount parameter, including the source.  The arguments are
      as for the ->parse_param() method.  It should return 0 to indicate that
@@ -310,7 +358,9 @@ number of operations used by the new mount code for this purpose:
      (provided the value pointer is NULL'd out).  If it is stolen, 1 must be
      returned to prevent it being passed to the filesystem.
 
- (*) int security_fs_context_validate(struct fs_context *fc);
+   * ::
+
+	int security_fs_context_validate(struct fs_context *fc);
 
      Called after all the options have been parsed to validate the collection
      as a whole and to do any necessary allocation so that
@@ -320,36 +370,43 @@ number of operations used by the new mount code for this purpose:
      In the case of reconfiguration, the target superblock will be accessible
      via fc->root.
 
- (*) int security_sb_get_tree(struct fs_context *fc);
+   * ::
+
+	int security_sb_get_tree(struct fs_context *fc);
 
      Called during the mount procedure to verify that the specified superblock
      is allowed to be mounted and to transfer the security data there.  It
      should return 0 or a negative error code.
 
- (*) void security_sb_reconfigure(struct fs_context *fc);
+   * ::
+
+	void security_sb_reconfigure(struct fs_context *fc);
 
      Called to apply any reconfiguration to an LSM's context.  It must not
      fail.  Error checking and resource allocation must be done in advance by
      the parameter parsing and validation hooks.
 
- (*) int security_sb_mountpoint(struct fs_context *fc, struct path *mountpoint,
-				unsigned int mnt_flags);
+   * ::
+
+	int security_sb_mountpoint(struct fs_context *fc,
+			           struct path *mountpoint,
+				   unsigned int mnt_flags);
 
      Called during the mount procedure to verify that the root dentry attached
      to the context is permitted to be attached to the specified mountpoint.
      It should return 0 on success or a negative error code on failure.
 
 
-==========================
-VFS FILESYSTEM CONTEXT API
+VFS Filesystem context API
 ==========================
 
 There are four operations for creating a filesystem context and one for
 destroying a context:
 
- (*) struct fs_context *fs_context_for_mount(
-		struct file_system_type *fs_type,
-		unsigned int sb_flags);
+   * ::
+
+       struct fs_context *fs_context_for_mount(struct file_system_type *fs_type,
+					       unsigned int sb_flags);
 
      Allocate a filesystem context for the purpose of setting up a new mount,
      whether that be with a new superblock or sharing an existing one.  This
@@ -359,7 +416,9 @@ destroying a context:
      fs_type specifies the filesystem type that will manage the context and
      sb_flags presets the superblock flags stored therein.
 
- (*) struct fs_context *fs_context_for_reconfigure(
+   * ::
+
+       struct fs_context *fs_context_for_reconfigure(
 		struct dentry *dentry,
 		unsigned int sb_flags,
 		unsigned int sb_flags_mask);
@@ -369,7 +428,9 @@ destroying a context:
      configured.  sb_flags and sb_flags_mask indicate which superblock flags
      need changing and to what.
 
- (*) struct fs_context *fs_context_for_submount(
+   * ::
+
+       struct fs_context *fs_context_for_submount(
 		struct file_system_type *fs_type,
 		struct dentry *reference);
 
@@ -382,7 +443,9 @@ destroying a context:
      Note that it's not a requirement that the reference dentry be of the same
      filesystem type as fs_type.
 
- (*) struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc);
+   * ::
+
+        struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc);
 
      Duplicate a filesystem context, copying any options noted and duplicating
      or additionally referencing any resources held therein.  This is available
@@ -392,14 +455,18 @@ destroying a context:
 
      The purpose in the new context is inherited from the old one.
 
- (*) void put_fs_context(struct fs_context *fc);
+   * ::
+
+       void put_fs_context(struct fs_context *fc);
 
      Destroy a filesystem context, releasing any resources it holds.  This
      calls the ->free() operation.  This is intended to be called by anyone who
      created a filesystem context.
 
-     [!] filesystem contexts are not refcounted, so this causes unconditional
-	 destruction.
+     .. Warning::
+
+        filesystem contexts are not refcounted, so this causes unconditional
+	destruction.
 
 In all the above operations, apart from the put op, the return is a mount
 context pointer or a negative error code.
@@ -407,8 +474,10 @@ context pointer or a negative error code.
 For the remaining operations, if an error occurs, a negative error code will be
 returned.
 
- (*) int vfs_parse_fs_param(struct fs_context *fc,
-			    struct fs_parameter *param);
+   * ::
+
+        int vfs_parse_fs_param(struct fs_context *fc,
+			       struct fs_parameter *param);
 
      Supply a single mount parameter to the filesystem context.  This include
      the specification of the source/device which is specified as the "source"
@@ -423,53 +492,64 @@ returned.
 
      The parameter value is typed and can be one of:
 
-	fs_value_is_flag,		Parameter not given a value.
-	fs_value_is_string,		Value is a string
-	fs_value_is_blob,		Value is a binary blob
-	fs_value_is_filename,		Value is a filename* + dirfd
-	fs_value_is_file,		Value is an open file (file*)
+	====================		=============================
+	fs_value_is_flag		Parameter not given a value
+	fs_value_is_string		Value is a string
+	fs_value_is_blob		Value is a binary blob
+	fs_value_is_filename		Value is a filename* + dirfd
+	fs_value_is_file		Value is an open file (file*)
+	====================		=============================
 
      If there is a value, that value is stored in a union in the struct in one
      of param->{string,blob,name,file}.  Note that the function may steal and
      clear the pointer, but then becomes responsible for disposing of the
      object.
 
- (*) int vfs_parse_fs_string(struct fs_context *fc, const char *key,
-			     const char *value, size_t v_size);
+   * ::
+
+       int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+			       const char *value, size_t v_size);
 
      A wrapper around vfs_parse_fs_param() that copies the value string it is
      passed.
 
- (*) int generic_parse_monolithic(struct fs_context *fc, void *data);
+   * ::
+
+       int generic_parse_monolithic(struct fs_context *fc, void *data);
 
      Parse a sys_mount() data page, assuming the form to be a text list
      consisting of key[=val] options separated by commas.  Each item in the
      list is passed to vfs_mount_option().  This is the default when the
      ->parse_monolithic() method is NULL.
 
- (*) int vfs_get_tree(struct fs_context *fc);
+   * ::
+
+       int vfs_get_tree(struct fs_context *fc);
 
      Get or create the mountable root and superblock, using the parameters in
      the filesystem context to select/configure the superblock.  This invokes
      the ->get_tree() method.
 
- (*) struct vfsmount *vfs_create_mount(struct fs_context *fc);
+   * ::
+
+       struct vfsmount *vfs_create_mount(struct fs_context *fc);
 
      Create a mount given the parameters in the specified filesystem context.
      Note that this does not attach the mount to anything.
 
 
-===========================
-SUPERBLOCK CREATION HELPERS
+Superblock Creation Helpers
 ===========================
 
 A number of VFS helpers are available for use by filesystems for the creation
 or looking up of superblocks.
 
- (*) struct super_block *
-     sget_fc(struct fs_context *fc,
-	     int (*test)(struct super_block *sb, struct fs_context *fc),
-	     int (*set)(struct super_block *sb, struct fs_context *fc));
+   * ::
+
+       struct super_block *
+       sget_fc(struct fs_context *fc,
+	       int (*test)(struct super_block *sb, struct fs_context *fc),
+	       int (*set)(struct super_block *sb, struct fs_context *fc));
 
      This is the core routine.  If test is non-NULL, it searches for an
      existing superblock matching the criteria held in the fs_context, using
@@ -482,10 +562,12 @@ or looking up of superblocks.
 
 The following helpers all wrap sget_fc():
 
- (*) int vfs_get_super(struct fs_context *fc,
-		       enum vfs_get_super_keying keying,
-		       int (*fill_super)(struct super_block *sb,
-					 struct fs_context *fc))
+   * ::
+
+       int vfs_get_super(struct fs_context *fc,
+		         enum vfs_get_super_keying keying,
+		         int (*fill_super)(struct super_block *sb,
+					   struct fs_context *fc))
 
      This creates/looks up a deviceless superblock.  The keying indicates how
      many superblocks of this type may exist and in what manner they may be
@@ -515,14 +597,14 @@ PARAMETER DESCRIPTION
 =====================
 
 Parameters are described using structures defined in linux/fs_parser.h.
-There's a core description struct that links everything together:
+There's a core description struct that links everything together::
 
 	struct fs_parameter_description {
 		const struct fs_parameter_spec *specs;
 		const struct fs_parameter_enum *enums;
 	};
 
-For example:
+For example::
 
 	enum {
 		Opt_autocell,
@@ -539,10 +621,12 @@ For example:
 
 The members are as follows:
 
- (1) const struct fs_parameter_specification *specs;
+ (1) ::
+
+       const struct fs_parameter_specification *specs;
 
      Table of parameter specifications, terminated with a null entry, where the
-     entries are of type:
+     entries are of type::
 
 	struct fs_parameter_spec {
 		const char		*name;
@@ -558,6 +642,7 @@ The members are as follows:
 
      The 'type' field indicates the desired value type and must be one of:
 
+	=======================	=======================	=====================
 	TYPE NAME		EXPECTED VALUE		RESULT IN
 	=======================	=======================	=====================
 	fs_param_is_flag	No value		n/a
@@ -573,19 +658,23 @@ The members are as follows:
 	fs_param_is_blockdev	Blockdev path		* Needs lookup
 	fs_param_is_path	Path			* Needs lookup
 	fs_param_is_fd		File descriptor		result->int_32
+	=======================	=======================	=====================
 
      Note that if the value is of fs_param_is_bool type, fs_parse() will try
      to match any string value against "0", "1", "no", "yes", "false", "true".
 
      Each parameter can also be qualified with 'flags':
 
+	=======================	================================================
 	fs_param_v_optional	The value is optional
 	fs_param_neg_with_no	result->negated set if key is prefixed with "no"
 	fs_param_neg_with_empty	result->negated set if value is ""
 	fs_param_deprecated	The parameter is deprecated.
+	=======================	================================================
 
      These are wrapped with a number of convenience wrappers:
 
+	=======================	===============================================
 	MACRO			SPECIFIES
 	=======================	===============================================
 	fsparam_flag()		fs_param_is_flag
@@ -602,9 +691,10 @@ The members are as follows:
 	fsparam_bdev()		fs_param_is_blockdev
 	fsparam_path()		fs_param_is_path
 	fsparam_fd()		fs_param_is_fd
+	=======================	===============================================
 
      all of which take two arguments, name string and option number - for
-     example:
+     example::
 
 	static const struct fs_parameter_spec afs_param_specs[] = {
 		fsparam_flag	("autocell",	Opt_autocell),
@@ -618,10 +708,12 @@ The members are as follows:
      of arguments to specify the type and the flags for anything that doesn't
      match one of the above macros.
 
- (2) const struct fs_parameter_enum *enums;
+ (2) ::
+
+       const struct fs_parameter_enum *enums;
 
      Table of enum value names to integer mappings, terminated with a null
-     entry.  This is of type:
+     entry.  This is of type::
 
 	struct fs_parameter_enum {
 		u8		opt;
@@ -630,7 +722,7 @@ The members are as follows:
 	};
 
      Where the array is an unsorted list of { parameter ID, name }-keyed
-     elements that indicate the value to map to, e.g.:
+     elements that indicate the value to map to, e.g.::
 
 	static const struct fs_parameter_enum afs_param_enums[] = {
 		{ Opt_bar,   "x",      1},
@@ -648,18 +740,19 @@ CONFIG_VALIDATE_FS_PARSER=y) and will allow the description to be queried from
 userspace using the fsinfo() syscall.
 
 
-==========================
-PARAMETER HELPER FUNCTIONS
+Parameter Helper Functions
 ==========================
 
 A number of helper functions are provided to help a filesystem or an LSM
 process the parameters it is given.
 
- (*) int lookup_constant(const struct constant_table tbl[],
-			 const char *name, int not_found);
+   * ::
+
+       int lookup_constant(const struct constant_table tbl[],
+			   const char *name, int not_found);
 
      Look up a constant by name in a table of name -> integer mappings.  The
-     table is an array of elements of the following type:
+     table is an array of elements of the following type::
 
 	struct constant_table {
 		const char	*name;
@@ -669,9 +762,11 @@ process the parameters it is given.
      If a match is found, the corresponding value is returned.  If a match
      isn't found, the not_found value is returned instead.
 
- (*) bool validate_constant_table(const struct constant_table *tbl,
-				  size_t tbl_size,
-				  int low, int high, int special);
+   * ::
+
+       bool validate_constant_table(const struct constant_table *tbl,
+				    size_t tbl_size,
+				    int low, int high, int special);
 
      Validate a constant table.  Checks that all the elements are appropriately
      ordered, that there are no duplicates and that the values are between low
@@ -682,16 +777,20 @@ process the parameters it is given.
      If all is good, true is returned.  If the table is invalid, errors are
      logged to dmesg and false is returned.
 
- (*) bool fs_validate_description(const struct fs_parameter_description *desc);
+   * ::
+
+       bool fs_validate_description(const struct fs_parameter_description *desc);
 
      This performs some validation checks on a parameter description.  It
      returns true if the description is good and false if it is not.  It will
      log errors to dmesg if validation fails.
 
- (*) int fs_parse(struct fs_context *fc,
-		  const struct fs_parameter_description *desc,
-		  struct fs_parameter *param,
-		  struct fs_parse_result *result);
+   * ::
+
+        int fs_parse(struct fs_context *fc,
+		     const struct fs_parameter_description *desc,
+		     struct fs_parameter *param,
+		     struct fs_parse_result *result);
 
      This is the main interpreter of parameters.  It uses the parameter
      description to look up a parameter by key name and to convert that to an
@@ -711,14 +810,16 @@ process the parameters it is given.
      parameter is matched, but the value is erroneous, -EINVAL will be
      returned; otherwise the parameter's option number will be returned.
 
- (*) int fs_lookup_param(struct fs_context *fc,
-			 struct fs_parameter *value,
-			 bool want_bdev,
-			 struct path *_path);
+   * ::
+
+       int fs_lookup_param(struct fs_context *fc,
+			   struct fs_parameter *value,
+			   bool want_bdev,
+			   struct path *_path);
 
      This takes a parameter that carries a string or filename type and attempts
      to do a path lookup on it.  If the parameter expects a blockdev, a check
      is made that the inode actually represents one.
 
-     Returns 0 if successful and *_path will be set; returns a negative error
-     code if not.
+     Returns 0 if successful and ``*_path`` will be set; returns a negative
+     error code if not.
diff --git a/Documentation/filesystems/orangefs.rst b/Documentation/filesystems/orangefs.rst
index e41369709c5b..463e37694250 100644
--- a/Documentation/filesystems/orangefs.rst
+++ b/Documentation/filesystems/orangefs.rst
@@ -119,9 +119,7 @@ it comes to that question::
 
     /opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf
 
-Create an /etc/pvfs2tab file::
-
-Localhost is fine for your pvfs2tab file:
+Create an /etc/pvfs2tab file (localhost is fine)::
 
     echo tcp://localhost:3334/orangefs /pvfsmnt pvfs2 defaults,noauto 0 0 > \
 	/etc/pvfs2tab
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 38b606991065..430963e0e8c3 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -543,6 +543,7 @@ encoded manner. The codes are the following:
     hg    huge page advise flag
     nh    no huge page advise flag
     mg    mergable advise flag
+    bt  - arm64 BTI guarded page
     ==    =======================================
 
 Note that there is no guarantee that every flag and associated mnemonic will
@@ -1042,8 +1043,8 @@ PageTables
               amount of memory dedicated to the lowest level of page
               tables.
 NFS_Unstable
-              NFS pages sent to the server, but not yet committed to stable
-	      storage
+              Always zero. Previous counted pages which had been written to
+              the server, but has not been committed to stable storage.
 Bounce
               Memory used for block device "bounce buffers"
 WritebackTmp
@@ -1870,7 +1871,7 @@ unbindable        mount is unbindable
 
 For more information on mount propagation see:
 
-  Documentation/filesystems/sharedsubtree.txt
+  Documentation/filesystems/sharedsubtree.rst
 
 
 3.6	/proc/<pid>/comm  & /proc/<pid>/task/<tid>/comm
diff --git a/Documentation/filesystems/quota.txt b/Documentation/filesystems/quota.rst
index 32874b06ebe9..a30cdd47c652 100644
--- a/Documentation/filesystems/quota.txt
+++ b/Documentation/filesystems/quota.rst
@@ -1,4 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
 
+===============
 Quota subsystem
 ===============
 
@@ -39,6 +41,7 @@ Currently, the interface supports only one message type QUOTA_NL_C_WARNING.
 This command is used to send a notification about any of the above mentioned
 events. Each message has six attributes. These are (type of the argument is
 in parentheses):
+
         QUOTA_NL_A_QTYPE (u32)
 	  - type of quota being exceeded (one of USRQUOTA, GRPQUOTA)
         QUOTA_NL_A_EXCESS_ID (u64)
@@ -48,20 +51,34 @@ in parentheses):
 	  - UID of a user who caused the event
         QUOTA_NL_A_WARNING (u32)
 	  - what kind of limit is exceeded:
-		QUOTA_NL_IHARDWARN - inode hardlimit
-		QUOTA_NL_ISOFTLONGWARN - inode softlimit is exceeded longer
-		  than given grace period
-		QUOTA_NL_ISOFTWARN - inode softlimit
-		QUOTA_NL_BHARDWARN - space (block) hardlimit
-		QUOTA_NL_BSOFTLONGWARN - space (block) softlimit is exceeded
-		  longer than given grace period.
-		QUOTA_NL_BSOFTWARN - space (block) softlimit
+
+		QUOTA_NL_IHARDWARN
+		    inode hardlimit
+		QUOTA_NL_ISOFTLONGWARN
+		    inode softlimit is exceeded longer
+		    than given grace period
+		QUOTA_NL_ISOFTWARN
+		    inode softlimit
+		QUOTA_NL_BHARDWARN
+		    space (block) hardlimit
+		QUOTA_NL_BSOFTLONGWARN
+		    space (block) softlimit is exceeded
+		    longer than given grace period.
+		QUOTA_NL_BSOFTWARN
+		    space (block) softlimit
+
 	  - four warnings are also defined for the event when user stops
 	    exceeding some limit:
-		QUOTA_NL_IHARDBELOW - inode hardlimit
-		QUOTA_NL_ISOFTBELOW - inode softlimit
-		QUOTA_NL_BHARDBELOW - space (block) hardlimit
-		QUOTA_NL_BSOFTBELOW - space (block) softlimit
+
+		QUOTA_NL_IHARDBELOW
+		    inode hardlimit
+		QUOTA_NL_ISOFTBELOW
+		    inode softlimit
+		QUOTA_NL_BHARDBELOW
+		    space (block) hardlimit
+		QUOTA_NL_BSOFTBELOW
+		    space (block) softlimit
+
         QUOTA_NL_A_DEV_MAJOR (u32)
 	  - major number of a device with the affected filesystem
         QUOTA_NL_A_DEV_MINOR (u32)
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.rst b/Documentation/filesystems/ramfs-rootfs-initramfs.rst
index 6c576e241d86..3fddacc6bf14 100644
--- a/Documentation/filesystems/ramfs-rootfs-initramfs.rst
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.rst
@@ -71,7 +71,7 @@ be allowed write access to a ramfs mount.
 
 A ramfs derivative called tmpfs was created to add size limits, and the ability
 to write the data to swap space.  Normal users can be allowed write access to
-tmpfs mounts.  See Documentation/filesystems/tmpfs.txt for more information.
+tmpfs mounts.  See Documentation/filesystems/tmpfs.rst for more information.
 
 What is rootfs?
 ---------------
diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.rst
index d412b236a9d6..fab302046b13 100644
--- a/Documentation/filesystems/seq_file.txt
+++ b/Documentation/filesystems/seq_file.rst
@@ -1,6 +1,11 @@
-The seq_file interface
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+The seq_file Interface
+======================
 
 	Copyright 2003 Jonathan Corbet <corbet@lwn.net>
+
 	This file is originally from the LWN.net Driver Porting series at
 	http://lwn.net/Articles/driver-porting/
 
@@ -43,7 +48,7 @@ loadable module which creates a file called /proc/sequence. The file, when
 read, simply produces a set of increasing integer values, one per line. The
 sequence will continue until the user loses patience and finds something
 better to do. The file is seekable, in that one can do something like the
-following:
+following::
 
     dd if=/proc/sequence of=out1 count=1
     dd if=/proc/sequence skip=1 of=out2 count=1
@@ -55,16 +60,18 @@ wanting to see the full source for this module can find it at
 http://lwn.net/Articles/22359/).
 
 Deprecated create_proc_entry
+============================
 
 Note that the above article uses create_proc_entry which was removed in
-kernel 3.10. Current versions require the following update
+kernel 3.10. Current versions require the following update::
 
--	entry = create_proc_entry("sequence", 0, NULL);
--	if (entry)
--		entry->proc_fops = &ct_file_ops;
-+	entry = proc_create("sequence", 0, NULL, &ct_file_ops);
+    -	entry = create_proc_entry("sequence", 0, NULL);
+    -	if (entry)
+    -		entry->proc_fops = &ct_file_ops;
+    +	entry = proc_create("sequence", 0, NULL, &ct_file_ops);
 
 The iterator interface
+======================
 
 Modules implementing a virtual file with seq_file must implement an
 iterator object that allows stepping through the data of interest
@@ -99,7 +106,7 @@ position.  The pos passed to start() will always be either zero, or
 the most recent pos used in the previous session.
 
 For our simple sequence example,
-the start() function looks like:
+the start() function looks like::
 
 	static void *ct_seq_start(struct seq_file *s, loff_t *pos)
 	{
@@ -129,7 +136,7 @@ move the iterator forward to the next position in the sequence.  The
 example module can simply increment the position by one; more useful
 modules will do what is needed to step through some data structure. The
 next() function returns a new iterator, or NULL if the sequence is
-complete. Here's the example version:
+complete. Here's the example version::
 
 	static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
 	{
@@ -141,10 +148,10 @@ complete. Here's the example version:
 The stop() function closes a session; its job, of course, is to clean
 up. If dynamic memory is allocated for the iterator, stop() is the
 place to free it; if a lock was taken by start(), stop() must release
-that lock.  The value that *pos was set to by the last next() call
+that lock.  The value that ``*pos`` was set to by the last next() call
 before stop() is remembered, and used for the first start() call of
 the next session unless lseek() has been called on the file; in that
-case next start() will be asked to start at position zero.
+case next start() will be asked to start at position zero::
 
 	static void ct_seq_stop(struct seq_file *s, void *v)
 	{
@@ -152,7 +159,7 @@ case next start() will be asked to start at position zero.
 	}
 
 Finally, the show() function should format the object currently pointed to
-by the iterator for output.  The example module's show() function is:
+by the iterator for output.  The example module's show() function is::
 
 	static int ct_seq_show(struct seq_file *s, void *v)
 	{
@@ -169,7 +176,7 @@ generated output before returning SEQ_SKIP, that output will be dropped.
 
 We will look at seq_printf() in a moment. But first, the definition of the
 seq_file iterator is finished by creating a seq_operations structure with
-the four functions we have just defined:
+the four functions we have just defined::
 
 	static const struct seq_operations ct_seq_ops = {
 	        .start = ct_seq_start,
@@ -194,6 +201,7 @@ other locks while the iterator is active.
 
 
 Formatted output
+================
 
 The seq_file code manages positioning within the output created by the
 iterator and getting it into the user's buffer. But, for that to work, that
@@ -203,7 +211,7 @@ been defined which make this task easy.
 Most code will simply use seq_printf(), which works pretty much like
 printk(), but which requires the seq_file pointer as an argument.
 
-For straight character output, the following functions may be used:
+For straight character output, the following functions may be used::
 
 	seq_putc(struct seq_file *m, char c);
 	seq_puts(struct seq_file *m, const char *s);
@@ -213,7 +221,7 @@ The first two output a single character and a string, just like one would
 expect. seq_escape() is like seq_puts(), except that any character in s
 which is in the string esc will be represented in octal form in the output.
 
-There are also a pair of functions for printing filenames:
+There are also a pair of functions for printing filenames::
 
 	int seq_path(struct seq_file *m, const struct path *path,
 		     const char *esc);
@@ -226,8 +234,10 @@ the path relative to the current process's filesystem root.  If a different
 root is desired, it can be used with seq_path_root().  If it turns out that
 path cannot be reached from root, seq_path_root() returns SEQ_SKIP.
 
-A function producing complicated output may want to check
+A function producing complicated output may want to check::
+
 	bool seq_has_overflowed(struct seq_file *m);
+
 and avoid further seq_<output> calls if true is returned.
 
 A true return from seq_has_overflowed means that the seq_file buffer will
@@ -236,6 +246,7 @@ buffer and retry printing.
 
 
 Making it all work
+==================
 
 So far, we have a nice set of functions which can produce output within the
 seq_file system, but we have not yet turned them into a file that a user
@@ -244,7 +255,7 @@ creation of a set of file_operations which implement the operations on that
 file. The seq_file interface provides a set of canned operations which do
 most of the work. The virtual file author still must implement the open()
 method, however, to hook everything up. The open function is often a single
-line, as in the example module:
+line, as in the example module::
 
 	static int ct_open(struct inode *inode, struct file *file)
 	{
@@ -263,7 +274,7 @@ by the iterator functions.
 There is also a wrapper function to seq_open() called seq_open_private(). It
 kmallocs a zero filled block of memory and stores a pointer to it in the
 private field of the seq_file structure, returning 0 on success. The
-block size is specified in a third parameter to the function, e.g.:
+block size is specified in a third parameter to the function, e.g.::
 
 	static int ct_open(struct inode *inode, struct file *file)
 	{
@@ -273,7 +284,7 @@ block size is specified in a third parameter to the function, e.g.:
 
 There is also a variant function, __seq_open_private(), which is functionally
 identical except that, if successful, it returns the pointer to the allocated
-memory block, allowing further initialisation e.g.:
+memory block, allowing further initialisation e.g.::
 
 	static int ct_open(struct inode *inode, struct file *file)
 	{
@@ -295,7 +306,7 @@ frees the memory allocated in the corresponding open.
 
 The other operations of interest - read(), llseek(), and release() - are
 all implemented by the seq_file code itself. So a virtual file's
-file_operations structure will look like:
+file_operations structure will look like::
 
 	static const struct file_operations ct_file_ops = {
 	        .owner   = THIS_MODULE,
@@ -309,7 +320,7 @@ There is also a seq_release_private() which passes the contents of the
 seq_file private field to kfree() before releasing the structure.
 
 The final step is the creation of the /proc file itself. In the example
-code, that is done in the initialization code in the usual way:
+code, that is done in the initialization code in the usual way::
 
 	static int ct_init(void)
 	{
@@ -325,9 +336,10 @@ And that is pretty much it.
 
 
 seq_list
+========
 
 If your file will be iterating through a linked list, you may find these
-routines useful:
+routines useful::
 
 	struct list_head *seq_list_start(struct list_head *head,
 	       		 		 loff_t pos);
@@ -338,15 +350,16 @@ routines useful:
 
 These helpers will interpret pos as a position within the list and iterate
 accordingly.  Your start() and next() functions need only invoke the
-seq_list_* helpers with a pointer to the appropriate list_head structure.
+``seq_list_*`` helpers with a pointer to the appropriate list_head structure.
 
 
 The extra-simple version
+========================
 
 For extremely simple virtual files, there is an even easier interface.  A
 module can define only the show() function, which should create all the
 output that the virtual file will contain. The file's open() method then
-calls:
+calls::
 
 	int single_open(struct file *file,
 	                int (*show)(struct seq_file *m, void *p),
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.rst
index 8ccfbd55244b..d83395354250 100644
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.rst
@@ -1,7 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
 Shared Subtrees
----------------
+===============
 
-Contents:
+.. Contents:
 	1) Overview
 	2) Features
 	3) Setting mount states
@@ -41,31 +44,38 @@ replicas continue to be exactly same.
 
 	Here is an example:
 
-	Let's say /mnt has a mount that is shared.
-	mount --make-shared /mnt
+	Let's say /mnt has a mount that is shared::
+
+	    mount --make-shared /mnt
 
 	Note: mount(8) command now supports the --make-shared flag,
 	so the sample 'smount' program is no longer needed and has been
 	removed.
 
-	# mount --bind /mnt /tmp
+	::
+
+	    # mount --bind /mnt /tmp
+
 	The above command replicates the mount at /mnt to the mountpoint /tmp
 	and the contents of both the mounts remain identical.
 
-	#ls /mnt
-	a b c
+	::
 
-	#ls /tmp
-	a b c
+	    #ls /mnt
+	    a b c
 
-	Now let's say we mount a device at /tmp/a
-	# mount /dev/sd0  /tmp/a
+	    #ls /tmp
+	    a b c
 
-	#ls /tmp/a
-	t1 t2 t3
+	Now let's say we mount a device at /tmp/a::
 
-	#ls /mnt/a
-	t1 t2 t3
+	    # mount /dev/sd0  /tmp/a
+
+	    #ls /tmp/a
+	    t1 t2 t3
+
+	    #ls /mnt/a
+	    t1 t2 t3
 
 	Note that the mount has propagated to the mount at /mnt as well.
 
@@ -123,14 +133,15 @@ replicas continue to be exactly same.
 
 2d) A unbindable mount is a unbindable private mount
 
-	let's say we have a mount at /mnt and we make it unbindable
+	let's say we have a mount at /mnt and we make it unbindable::
+
+	    # mount --make-unbindable /mnt
 
-	# mount --make-unbindable /mnt
+	 Let's try to bind mount this mount somewhere else::
 
-	 Let's try to bind mount this mount somewhere else.
-	 # mount --bind /mnt /tmp
-	 mount: wrong fs type, bad option, bad superblock on /mnt,
-	        or too many mounted file systems
+	    # mount --bind /mnt /tmp
+	    mount: wrong fs type, bad option, bad superblock on /mnt,
+		    or too many mounted file systems
 
 	Binding a unbindable mount is a invalid operation.
 
@@ -138,12 +149,12 @@ replicas continue to be exactly same.
 3) Setting mount states
 
 	The mount command (util-linux package) can be used to set mount
-	states:
+	states::
 
-	mount --make-shared mountpoint
-	mount --make-slave mountpoint
-	mount --make-private mountpoint
-	mount --make-unbindable mountpoint
+	    mount --make-shared mountpoint
+	    mount --make-slave mountpoint
+	    mount --make-private mountpoint
+	    mount --make-unbindable mountpoint
 
 
 4) Use cases
@@ -154,9 +165,10 @@ replicas continue to be exactly same.
 
 	   Solution:
 
-		The system administrator can make the mount at /cdrom shared
-		mount --bind /cdrom /cdrom
-		mount --make-shared /cdrom
+		The system administrator can make the mount at /cdrom shared::
+
+		    mount --bind /cdrom /cdrom
+		    mount --make-shared /cdrom
 
 		Now any process that clones off a new namespace will have a
 		mount at /cdrom which is a replica of the same mount in the
@@ -172,14 +184,14 @@ replicas continue to be exactly same.
 	   Solution:
 
 		To begin with, the administrator can mark the entire mount tree
-		as shareable.
+		as shareable::
 
-		mount --make-rshared /
+		    mount --make-rshared /
 
 		A new process can clone off a new namespace. And mark some part
-		of its namespace as slave
+		of its namespace as slave::
 
-		mount --make-rslave /myprivatetree
+		    mount --make-rslave /myprivatetree
 
 		Hence forth any mounts within the /myprivatetree done by the
 		process will not show up in any other namespace. However mounts
@@ -206,13 +218,13 @@ replicas continue to be exactly same.
 		versions of the file depending on the path used to access that
 		file.
 
-		An example is:
+		An example is::
 
-		mount --make-shared /
-		mount --rbind / /view/v1
-		mount --rbind / /view/v2
-		mount --rbind / /view/v3
-		mount --rbind / /view/v4
+		    mount --make-shared /
+		    mount --rbind / /view/v1
+		    mount --rbind / /view/v2
+		    mount --rbind / /view/v3
+		    mount --rbind / /view/v4
 
 		and if /usr has a versioning filesystem mounted, then that
 		mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
@@ -224,8 +236,8 @@ replicas continue to be exactly same.
 		filesystem is being requested and return the corresponding
 		inode.
 
-5) Detailed semantics:
--------------------
+5) Detailed semantics
+---------------------
 	The section below explains the detailed semantics of
 	bind, rbind, move, mount, umount and clone-namespace operations.
 
@@ -235,6 +247,7 @@ replicas continue to be exactly same.
 5a) Mount states
 
 	A given mount can be in one of the following states
+
 	1) shared
 	2) slave
 	3) shared and slave
@@ -252,7 +265,8 @@ replicas continue to be exactly same.
 		A 'shared mount' is defined as a vfsmount that belongs to a
 		'peer group'.
 
-		For example:
+		For example::
+
 			mount --make-shared /mnt
 			mount --bind /mnt /tmp
 
@@ -270,7 +284,7 @@ replicas continue to be exactly same.
 		A slave mount as the name implies has a master mount from which
 		mount/unmount events are received. Events do not propagate from
 		the slave mount to the master.  Only a shared mount can be made
-		a slave by executing the following command
+		a slave by executing the following command::
 
 			mount --make-slave mount
 
@@ -290,8 +304,10 @@ replicas continue to be exactly same.
 		peer group.
 
 		Only a slave vfsmount can be made as 'shared and slave' by
-		either executing the following command
+		either executing the following command::
+
 			mount --make-shared mount
+
 		or by moving the slave vfsmount under a shared vfsmount.
 
 	(4) Private mount
@@ -307,30 +323,32 @@ replicas continue to be exactly same.
 
 
    	State diagram:
+
    	The state diagram below explains the state transition of a mount,
-	in response to various commands.
-	------------------------------------------------------------------------
-	|             |make-shared |  make-slave  | make-private |make-unbindab|
-	--------------|------------|--------------|--------------|-------------|
-	|shared	      |shared	   |*slave/private|   private	 | unbindable  |
-	|             |            |              |              |             |
-	|-------------|------------|--------------|--------------|-------------|
-	|slave	      |shared      |	**slave	  |    private   | unbindable  |
-	|             |and slave   |              |              |             |
-	|-------------|------------|--------------|--------------|-------------|
-	|shared	      |shared      |    slave	  |    private   | unbindable  |
-	|and slave    |and slave   |              |              |             |
-	|-------------|------------|--------------|--------------|-------------|
-	|private      |shared	   |  **private	  |    private   | unbindable  |
-	|-------------|------------|--------------|--------------|-------------|
-	|unbindable   |shared	   |**unbindable  |    private   | unbindable  |
-	------------------------------------------------------------------------
-
-	* if the shared mount is the only mount in its peer group, making it
-	slave, makes it private automatically. Note that there is no master to
-	which it can be slaved to.
-
-	** slaving a non-shared mount has no effect on the mount.
+	in response to various commands::
+
+	    -----------------------------------------------------------------------
+	    |             |make-shared |  make-slave  | make-private |make-unbindab|
+	    --------------|------------|--------------|--------------|-------------|
+	    |shared	  |shared      |*slave/private|   private    | unbindable  |
+	    |             |            |              |              |             |
+	    |-------------|------------|--------------|--------------|-------------|
+	    |slave	  |shared      | **slave      |    private   | unbindable  |
+	    |             |and slave   |              |              |             |
+	    |-------------|------------|--------------|--------------|-------------|
+	    |shared       |shared      | slave        |    private   | unbindable  |
+	    |and slave    |and slave   |              |              |             |
+	    |-------------|------------|--------------|--------------|-------------|
+	    |private      |shared      |  **private   |    private   | unbindable  |
+	    |-------------|------------|--------------|--------------|-------------|
+	    |unbindable   |shared      |**unbindable  |    private   | unbindable  |
+	    ------------------------------------------------------------------------
+
+	    * if the shared mount is the only mount in its peer group, making it
+	    slave, makes it private automatically. Note that there is no master to
+	    which it can be slaved to.
+
+	    ** slaving a non-shared mount has no effect on the mount.
 
 	Apart from the commands listed below, the 'move' operation also changes
 	the state of a mount depending on type of the destination mount. Its
@@ -338,31 +356,32 @@ replicas continue to be exactly same.
 
 5b) Bind semantics
 
-	Consider the following command
+	Consider the following command::
 
-	mount --bind A/a  B/b
+	    mount --bind A/a  B/b
 
 	where 'A' is the source mount, 'a' is the dentry in the mount 'A', 'B'
 	is the destination mount and 'b' is the dentry in the destination mount.
 
 	The outcome depends on the type of mount of 'A' and 'B'. The table
-	below contains quick reference.
-   ---------------------------------------------------------------------------
-   |         BIND MOUNT OPERATION                                            |
-   |**************************************************************************
-   |source(A)->| shared       |       private  |       slave    | unbindable |
-   | dest(B)  |               |                |                |            |
-   |   |      |               |                |                |            |
-   |   v      |               |                |                |            |
-   |**************************************************************************
-   |  shared  | shared        |     shared     | shared & slave |  invalid   |
-   |          |               |                |                |            |
-   |non-shared| shared        |      private   |      slave     |  invalid   |
-   ***************************************************************************
+	below contains quick reference::
+
+	    --------------------------------------------------------------------------
+	    |         BIND MOUNT OPERATION                                           |
+	    |************************************************************************|
+	    |source(A)->| shared      |       private  |       slave    | unbindable |
+	    | dest(B)  |              |                |                |            |
+	    |   |      |              |                |                |            |
+	    |   v      |              |                |                |            |
+	    |************************************************************************|
+	    |  shared  | shared       |     shared     | shared & slave |  invalid   |
+	    |          |              |                |                |            |
+	    |non-shared| shared       |      private   |      slave     |  invalid   |
+	    **************************************************************************
 
      	Details:
 
-	1. 'A' is a shared mount and 'B' is a shared mount. A new mount 'C'
+    1. 'A' is a shared mount and 'B' is a shared mount. A new mount 'C'
 	which is clone of 'A', is created. Its root dentry is 'a' . 'C' is
 	mounted on mount 'B' at dentry 'b'. Also new mount 'C1', 'C2', 'C3' ...
 	are created and mounted at the dentry 'b' on all mounts where 'B'
@@ -371,7 +390,7 @@ replicas continue to be exactly same.
 	'B'.  And finally the peer-group of 'C' is merged with the peer group
 	of 'A'.
 
-	2. 'A' is a private mount and 'B' is a shared mount. A new mount 'C'
+    2. 'A' is a private mount and 'B' is a shared mount. A new mount 'C'
 	which is clone of 'A', is created. Its root dentry is 'a'. 'C' is
 	mounted on mount 'B' at dentry 'b'. Also new mount 'C1', 'C2', 'C3' ...
 	are created and mounted at the dentry 'b' on all mounts where 'B'
@@ -379,7 +398,7 @@ replicas continue to be exactly same.
 	'C', 'C1', .., 'Cn' with exactly the same configuration as the
 	propagation tree for 'B'.
 
-	3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount. A new
+    3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount. A new
 	mount 'C' which is clone of 'A', is created. Its root dentry is 'a' .
 	'C' is mounted on mount 'B' at dentry 'b'. Also new mounts 'C1', 'C2',
 	'C3' ... are created and mounted at the dentry 'b' on all mounts where
@@ -389,19 +408,19 @@ replicas continue to be exactly same.
 	is made the slave of mount 'Z'.  In other words, mount 'C' is in the
 	state 'slave and shared'.
 
-	4. 'A' is a unbindable mount and 'B' is a shared mount. This is a
+    4. 'A' is a unbindable mount and 'B' is a shared mount. This is a
 	invalid operation.
 
-	5. 'A' is a private mount and 'B' is a non-shared(private or slave or
+    5. 'A' is a private mount and 'B' is a non-shared(private or slave or
 	unbindable) mount. A new mount 'C' which is clone of 'A', is created.
 	Its root dentry is 'a'. 'C' is mounted on mount 'B' at dentry 'b'.
 
-	6. 'A' is a shared mount and 'B' is a non-shared mount. A new mount 'C'
+    6. 'A' is a shared mount and 'B' is a non-shared mount. A new mount 'C'
 	which is a clone of 'A' is created. Its root dentry is 'a'. 'C' is
 	mounted on mount 'B' at dentry 'b'.  'C' is made a member of the
 	peer-group of 'A'.
 
-	7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount. A
+    7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount. A
 	new mount 'C' which is a clone of 'A' is created. Its root dentry is
 	'a'.  'C' is mounted on mount 'B' at dentry 'b'. Also 'C' is set as a
 	slave mount of 'Z'. In other words 'A' and 'C' are both slave mounts of
@@ -409,7 +428,7 @@ replicas continue to be exactly same.
 	mount/unmount on 'A' do not propagate anywhere else. Similarly
 	mount/unmount on 'C' do not propagate anywhere else.
 
-	8. 'A' is a unbindable mount and 'B' is a non-shared mount. This is a
+    8. 'A' is a unbindable mount and 'B' is a non-shared mount. This is a
 	invalid operation. A unbindable mount cannot be bind mounted.
 
 5c) Rbind semantics
@@ -422,7 +441,9 @@ replicas continue to be exactly same.
 	then the subtree under the unbindable mount is pruned in the new
 	location.
 
-	eg: let's say we have the following mount tree.
+	eg:
+
+	  let's say we have the following mount tree::
 
 		A
 	      /   \
@@ -430,12 +451,12 @@ replicas continue to be exactly same.
 	     / \ / \
 	     D E F G
 
-	     Let's say all the mount except the mount C in the tree are
-	     of a type other than unbindable.
+	  Let's say all the mount except the mount C in the tree are
+	  of a type other than unbindable.
 
-	     If this tree is rbound to say Z
+	  If this tree is rbound to say Z
 
-	     We will have the following tree at the new location.
+	  We will have the following tree at the new location::
 
 		Z
 		|
@@ -457,24 +478,26 @@ replicas continue to be exactly same.
 	the dentry in the destination mount.
 
 	The outcome depends on the type of the mount of 'A' and 'B'. The table
-	below is a quick reference.
-   ---------------------------------------------------------------------------
-   |         		MOVE MOUNT OPERATION                                 |
-   |**************************************************************************
-   | source(A)->| shared      |       private  |       slave    | unbindable |
-   | dest(B)  |               |                |                |            |
-   |   |      |               |                |                |            |
-   |   v      |               |                |                |            |
-   |**************************************************************************
-   |  shared  | shared        |     shared     |shared and slave|  invalid   |
-   |          |               |                |                |            |
-   |non-shared| shared        |      private   |    slave       | unbindable |
-   ***************************************************************************
-	NOTE: moving a mount residing under a shared mount is invalid.
+	below is a quick reference::
+
+	    ---------------------------------------------------------------------------
+	    |         		MOVE MOUNT OPERATION                                 |
+	    |**************************************************************************
+	    | source(A)->| shared      |       private  |       slave    | unbindable |
+	    | dest(B)  |               |                |                |            |
+	    |   |      |               |                |                |            |
+	    |   v      |               |                |                |            |
+	    |**************************************************************************
+	    |  shared  | shared        |     shared     |shared and slave|  invalid   |
+	    |          |               |                |                |            |
+	    |non-shared| shared        |      private   |    slave       | unbindable |
+	    ***************************************************************************
+
+	.. Note:: moving a mount residing under a shared mount is invalid.
 
       Details follow:
 
-	1. 'A' is a shared mount and 'B' is a shared mount.  The mount 'A' is
+    1. 'A' is a shared mount and 'B' is a shared mount.  The mount 'A' is
 	mounted on mount 'B' at dentry 'b'.  Also new mounts 'A1', 'A2'...'An'
 	are created and mounted at dentry 'b' on all mounts that receive
 	propagation from mount 'B'. A new propagation tree is created in the
@@ -483,7 +506,7 @@ replicas continue to be exactly same.
 	propagation tree is appended to the already existing propagation tree
 	of 'A'.
 
-	2. 'A' is a private mount and 'B' is a shared mount. The mount 'A' is
+    2. 'A' is a private mount and 'B' is a shared mount. The mount 'A' is
 	mounted on mount 'B' at dentry 'b'. Also new mount 'A1', 'A2'... 'An'
 	are created and mounted at dentry 'b' on all mounts that receive
 	propagation from mount 'B'. The mount 'A' becomes a shared mount and a
@@ -491,7 +514,7 @@ replicas continue to be exactly same.
 	'B'. This new propagation tree contains all the new mounts 'A1',
 	'A2'...  'An'.
 
-	3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount.  The
+    3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount.  The
 	mount 'A' is mounted on mount 'B' at dentry 'b'.  Also new mounts 'A1',
 	'A2'... 'An' are created and mounted at dentry 'b' on all mounts that
 	receive propagation from mount 'B'. A new propagation tree is created
@@ -501,32 +524,32 @@ replicas continue to be exactly same.
 	'A'.  Mount 'A' continues to be the slave mount of 'Z' but it also
 	becomes 'shared'.
 
-	4. 'A' is a unbindable mount and 'B' is a shared mount. The operation
+    4. 'A' is a unbindable mount and 'B' is a shared mount. The operation
 	is invalid. Because mounting anything on the shared mount 'B' can
 	create new mounts that get mounted on the mounts that receive
 	propagation from 'B'.  And since the mount 'A' is unbindable, cloning
 	it to mount at other mountpoints is not possible.
 
-	5. 'A' is a private mount and 'B' is a non-shared(private or slave or
+    5. 'A' is a private mount and 'B' is a non-shared(private or slave or
 	unbindable) mount. The mount 'A' is mounted on mount 'B' at dentry 'b'.
 
-	6. 'A' is a shared mount and 'B' is a non-shared mount.  The mount 'A'
+    6. 'A' is a shared mount and 'B' is a non-shared mount.  The mount 'A'
 	is mounted on mount 'B' at dentry 'b'.  Mount 'A' continues to be a
 	shared mount.
 
-	7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount.
+    7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount.
 	The mount 'A' is mounted on mount 'B' at dentry 'b'.  Mount 'A'
 	continues to be a slave mount of mount 'Z'.
 
-	8. 'A' is a unbindable mount and 'B' is a non-shared mount. The mount
+    8. 'A' is a unbindable mount and 'B' is a non-shared mount. The mount
 	'A' is mounted on mount 'B' at dentry 'b'. Mount 'A' continues to be a
 	unbindable mount.
 
 5e) Mount semantics
 
-	Consider the following command
+	Consider the following command::
 
-	mount device  B/b
+	    mount device  B/b
 
 	'B' is the destination mount and 'b' is the dentry in the destination
 	mount.
@@ -537,9 +560,9 @@ replicas continue to be exactly same.
 
 5f) Unmount semantics
 
-	Consider the following command
+	Consider the following command::
 
-	umount A
+	    umount A
 
 	where 'A' is a mount mounted on mount 'B' at dentry 'b'.
 
@@ -592,10 +615,12 @@ replicas continue to be exactly same.
 
 	A. What is the result of the following command sequence?
 
-		mount --bind /mnt /mnt
-		mount --make-shared /mnt
-		mount --bind /mnt /tmp
-		mount --move /tmp /mnt/1
+		::
+
+		    mount --bind /mnt /mnt
+		    mount --make-shared /mnt
+		    mount --bind /mnt /tmp
+		    mount --move /tmp /mnt/1
 
 		what should be the contents of /mnt /mnt/1 /mnt/1/1 should be?
 		Should they all be identical? or should /mnt and /mnt/1 be
@@ -604,23 +629,27 @@ replicas continue to be exactly same.
 
 	B. What is the result of the following command sequence?
 
-		mount --make-rshared /
-		mkdir -p /v/1
-		mount --rbind / /v/1
+		::
+
+		    mount --make-rshared /
+		    mkdir -p /v/1
+		    mount --rbind / /v/1
 
 		what should be the content of /v/1/v/1 be?
 
 
 	C. What is the result of the following command sequence?
 
-		mount --bind /mnt /mnt
-		mount --make-shared /mnt
-		mkdir -p /mnt/1/2/3 /mnt/1/test
-		mount --bind /mnt/1 /tmp
-		mount --make-slave /mnt
-		mount --make-shared /mnt
-		mount --bind /mnt/1/2 /tmp1
-		mount --make-slave /mnt
+		::
+
+		    mount --bind /mnt /mnt
+		    mount --make-shared /mnt
+		    mkdir -p /mnt/1/2/3 /mnt/1/test
+		    mount --bind /mnt/1 /tmp
+		    mount --make-slave /mnt
+		    mount --make-shared /mnt
+		    mount --bind /mnt/1/2 /tmp1
+		    mount --make-slave /mnt
 
 		At this point we have the first mount at /tmp and
 		its root dentry is 1. Let's call this mount 'A'
@@ -668,7 +697,8 @@ replicas continue to be exactly same.
 
 		step 1:
 		   let's say the root tree has just two directories with
-		   one vfsmount.
+		   one vfsmount::
+
 				    root
 				   /    \
 				  tmp    usr
@@ -676,14 +706,17 @@ replicas continue to be exactly same.
 		    And we want to replicate the tree at multiple
 		    mountpoints under /root/tmp
 
-		step2:
-		      mount --make-shared /root
+		step 2:
+		      ::
 
-		      mkdir -p /tmp/m1
 
-		      mount --rbind /root /tmp/m1
+			mount --make-shared /root
 
-		      the new tree now looks like this:
+			mkdir -p /tmp/m1
+
+			mount --rbind /root /tmp/m1
+
+		      the new tree now looks like this::
 
 				    root
 				   /    \
@@ -697,11 +730,13 @@ replicas continue to be exactly same.
 
 			  it has two vfsmounts
 
-		step3:
+		step 3:
+		    ::
+
 			    mkdir -p /tmp/m2
 			    mount --rbind /root /tmp/m2
 
-			the new tree now looks like this:
+			the new tree now looks like this::
 
 				      root
 				     /    \
@@ -724,6 +759,7 @@ replicas continue to be exactly same.
 		       it has 6 vfsmounts
 
 		step 4:
+		      ::
 			  mkdir -p /tmp/m3
 			  mount --rbind /root /tmp/m3
 
@@ -740,7 +776,8 @@ replicas continue to be exactly same.
 
 		step 1:
 		   let's say the root tree has just two directories with
-		   one vfsmount.
+		   one vfsmount::
+
 				    root
 				   /    \
 				  tmp    usr
@@ -748,17 +785,20 @@ replicas continue to be exactly same.
 		    How do we set up the same tree at multiple locations under
 		    /root/tmp
 
-		step2:
-		      mount --bind /root/tmp /root/tmp
+		step 2:
+		      ::
 
-		      mount --make-rshared /root
-		      mount --make-unbindable /root/tmp
 
-		      mkdir -p /tmp/m1
+			mount --bind /root/tmp /root/tmp
 
-		      mount --rbind /root /tmp/m1
+			mount --make-rshared /root
+			mount --make-unbindable /root/tmp
 
-		      the new tree now looks like this:
+			mkdir -p /tmp/m1
+
+			mount --rbind /root /tmp/m1
+
+		      the new tree now looks like this::
 
 				    root
 				   /    \
@@ -768,11 +808,13 @@ replicas continue to be exactly same.
 			      /  \
 			     tmp  usr
 
-		step3:
+		step 3:
+		      ::
+
 			    mkdir -p /tmp/m2
 			    mount --rbind /root /tmp/m2
 
-		      the new tree now looks like this:
+		      the new tree now looks like this::
 
 				    root
 				   /    \
@@ -782,12 +824,13 @@ replicas continue to be exactly same.
 			      /  \     / \
 			     tmp  usr tmp usr
 
-		step4:
+		step 4:
+		      ::
 
 			    mkdir -p /tmp/m3
 			    mount --rbind /root /tmp/m3
 
-		      the new tree now looks like this:
+		      the new tree now looks like this::
 
 				    	  root
 				      /    	  \
@@ -801,25 +844,31 @@ replicas continue to be exactly same.
 
 8A) Datastructure
 
-	4 new fields are introduced to struct vfsmount
-	->mnt_share
-	->mnt_slave_list
-	->mnt_slave
-	->mnt_master
+	4 new fields are introduced to struct vfsmount:
+
+	*   ->mnt_share
+	*   ->mnt_slave_list
+	*   ->mnt_slave
+	*   ->mnt_master
 
-	->mnt_share links together all the mount to/from which this vfsmount
+	->mnt_share
+		links together all the mount to/from which this vfsmount
 		send/receives propagation events.
 
-	->mnt_slave_list links all the mounts to which this vfsmount propagates
+	->mnt_slave_list
+		links all the mounts to which this vfsmount propagates
 		to.
 
-	->mnt_slave links together all the slaves that its master vfsmount
+	->mnt_slave
+		links together all the slaves that its master vfsmount
 		propagates to.
 
-	->mnt_master points to the master vfsmount from which this vfsmount
+	->mnt_master
+		points to the master vfsmount from which this vfsmount
 		receives propagation.
 
-	->mnt_flags takes two more flags to indicate the propagation status of
+	->mnt_flags
+		takes two more flags to indicate the propagation status of
 		the vfsmount.  MNT_SHARE indicates that the vfsmount is a shared
 		vfsmount.  MNT_UNCLONABLE indicates that the vfsmount cannot be
 		replicated.
@@ -842,7 +891,7 @@ replicas continue to be exactly same.
 
 	A example propagation tree looks as shown in the figure below.
 	[ NOTE: Though it looks like a forest, if we consider all the shared
-	mounts as a conceptual entity called 'pnode', it becomes a tree]
+	mounts as a conceptual entity called 'pnode', it becomes a tree]::
 
 
 		        A <--> B <--> C <---> D
@@ -864,14 +913,19 @@ replicas continue to be exactly same.
 	A's ->mnt_slave_list links with ->mnt_slave of 'E', 'K', 'F' and 'G'
 
 	E's ->mnt_share links with ->mnt_share of K
-	'E', 'K', 'F', 'G' have their ->mnt_master point to struct
-				vfsmount of 'A'
+
+	'E', 'K', 'F', 'G' have their ->mnt_master point to struct vfsmount of 'A'
+
 	'M', 'L', 'N' have their ->mnt_master point to struct vfsmount of 'K'
+
 	K's ->mnt_slave_list links with ->mnt_slave of 'M', 'L' and 'N'
 
 	C's ->mnt_slave_list links with ->mnt_slave of 'J' and 'K'
+
 	J and K's ->mnt_master points to struct vfsmount of C
+
 	and finally D's ->mnt_slave_list links with ->mnt_slave of 'H' and 'I'
+
 	'H' and 'I' have their ->mnt_master pointing to struct vfsmount of 'D'.
 
 
@@ -903,6 +957,7 @@ replicas continue to be exactly same.
 	Prepare phase:
 
 	for each mount in the source tree:
+
 		   a) Create the necessary number of mount trees to
 		   	be attached to each of the mounts that receive
 			propagation from the destination mount.
@@ -929,11 +984,12 @@ replicas continue to be exactly same.
 	Abort phase
 		delete all the newly created trees.
 
-	NOTE: all the propagation related functionality resides in the file
-	pnode.c
+	.. Note::
+	   all the propagation related functionality resides in the file pnode.c
 
 
 ------------------------------------------------------------------------
 
 version 0.1  (created the initial document, Ram Pai linuxram@us.ibm.com)
+
 version 0.2  (Incorporated comments from Al Viro)
diff --git a/Documentation/filesystems/spufs/index.rst b/Documentation/filesystems/spufs/index.rst
new file mode 100644
index 000000000000..5ed4a8494967
--- /dev/null
+++ b/Documentation/filesystems/spufs/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+SPU Filesystem
+==============
+
+
+.. toctree::
+   :maxdepth: 1
+
+   spufs
+   spu_create
+   spu_run
diff --git a/Documentation/filesystems/spufs/spu_create.rst b/Documentation/filesystems/spufs/spu_create.rst
new file mode 100644
index 000000000000..83108c099696
--- /dev/null
+++ b/Documentation/filesystems/spufs/spu_create.rst
@@ -0,0 +1,131 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+spu_create
+==========
+
+Name
+====
+       spu_create - create a new spu context
+
+
+Synopsis
+========
+
+       ::
+
+         #include <sys/types.h>
+         #include <sys/spu.h>
+
+         int spu_create(const char *pathname, int flags, mode_t mode);
+
+Description
+===========
+       The  spu_create  system call is used on PowerPC machines that implement
+       the Cell Broadband Engine Architecture in order to  access  Synergistic
+       Processor  Units (SPUs). It creates a new logical context for an SPU in
+       pathname and returns a handle to associated  with  it.   pathname  must
+       point  to  a  non-existing directory in the mount point of the SPU file
+       system (spufs).  When spu_create is successful, a directory  gets  cre-
+       ated on pathname and it is populated with files.
+
+       The  returned  file  handle can only be passed to spu_run(2) or closed,
+       other operations are not defined on it. When it is closed, all  associ-
+       ated  directory entries in spufs are removed. When the last file handle
+       pointing either inside  of  the  context  directory  or  to  this  file
+       descriptor is closed, the logical SPU context is destroyed.
+
+       The  parameter flags can be zero or any bitwise or'd combination of the
+       following constants:
+
+       SPU_RAWIO
+              Allow mapping of some of the hardware registers of the SPU  into
+              user space. This flag requires the CAP_SYS_RAWIO capability, see
+              capabilities(7).
+
+       The mode parameter specifies the permissions used for creating the  new
+       directory  in  spufs.   mode is modified with the user's umask(2) value
+       and then used for both the directory and the files contained in it. The
+       file permissions mask out some more bits of mode because they typically
+       support only read or write access. See stat(2) for a full list  of  the
+       possible mode values.
+
+
+Return Value
+============
+       spu_create  returns a new file descriptor. It may return -1 to indicate
+       an error condition and set errno to  one  of  the  error  codes  listed
+       below.
+
+
+Errors
+======
+       EACCES
+              The  current  user does not have write access on the spufs mount
+              point.
+
+       EEXIST An SPU context already exists at the given path name.
+
+       EFAULT pathname is not a valid string pointer in  the  current  address
+              space.
+
+       EINVAL pathname is not a directory in the spufs mount point.
+
+       ELOOP  Too many symlinks were found while resolving pathname.
+
+       EMFILE The process has reached its maximum open file limit.
+
+       ENAMETOOLONG
+              pathname was too long.
+
+       ENFILE The system has reached the global open file limit.
+
+       ENOENT Part of pathname could not be resolved.
+
+       ENOMEM The kernel could not allocate all resources required.
+
+       ENOSPC There  are  not  enough  SPU resources available to create a new
+              context or the user specific limit for the number  of  SPU  con-
+              texts has been reached.
+
+       ENOSYS the functionality is not provided by the current system, because
+              either the hardware does not provide SPUs or the spufs module is
+              not loaded.
+
+       ENOTDIR
+              A part of pathname is not a directory.
+
+
+
+Notes
+=====
+       spu_create  is  meant  to  be used from libraries that implement a more
+       abstract interface to SPUs, not to be used from  regular  applications.
+       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
+       ommended libraries.
+
+
+Files
+=====
+       pathname must point to a location beneath the mount point of spufs.  By
+       convention, it gets mounted in /spu.
+
+
+Conforming to
+=============
+       This call is Linux specific and only implemented by the ppc64 architec-
+       ture. Programs using this system call are not portable.
+
+
+Bugs
+====
+       The code does not yet fully implement all features lined out here.
+
+
+Author
+======
+       Arnd Bergmann <arndb@de.ibm.com>
+
+See Also
+========
+       capabilities(7), close(2), spu_run(2), spufs(7)
diff --git a/Documentation/filesystems/spufs/spu_run.rst b/Documentation/filesystems/spufs/spu_run.rst
new file mode 100644
index 000000000000..7fdb1c31cb91
--- /dev/null
+++ b/Documentation/filesystems/spufs/spu_run.rst
@@ -0,0 +1,138 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+spu_run
+=======
+
+
+Name
+====
+       spu_run - execute an spu context
+
+
+Synopsis
+========
+
+       ::
+
+	    #include <sys/spu.h>
+
+	    int spu_run(int fd, unsigned int *npc, unsigned int *event);
+
+Description
+===========
+       The  spu_run system call is used on PowerPC machines that implement the
+       Cell Broadband Engine Architecture in order to access Synergistic  Pro-
+       cessor  Units  (SPUs).  It  uses the fd that was returned from spu_cre-
+       ate(2) to address a specific SPU context. When the context gets  sched-
+       uled  to a physical SPU, it starts execution at the instruction pointer
+       passed in npc.
+
+       Execution of SPU code happens synchronously, meaning that spu_run  does
+       not  return  while the SPU is still running. If there is a need to exe-
+       cute SPU code in parallel with other code on either  the  main  CPU  or
+       other  SPUs,  you  need to create a new thread of execution first, e.g.
+       using the pthread_create(3) call.
+
+       When spu_run returns, the current value of the SPU instruction  pointer
+       is  written back to npc, so you can call spu_run again without updating
+       the pointers.
+
+       event can be a NULL pointer or point to an extended  status  code  that
+       gets  filled  when spu_run returns. It can be one of the following con-
+       stants:
+
+       SPE_EVENT_DMA_ALIGNMENT
+              A DMA alignment error
+
+       SPE_EVENT_SPE_DATA_SEGMENT
+              A DMA segmentation error
+
+       SPE_EVENT_SPE_DATA_STORAGE
+              A DMA storage error
+
+       If NULL is passed as the event argument, these errors will result in  a
+       signal delivered to the calling process.
+
+Return Value
+============
+       spu_run  returns the value of the spu_status register or -1 to indicate
+       an error and set errno to one of the error  codes  listed  below.   The
+       spu_status  register  value  contains  a  bit  mask of status codes and
+       optionally a 14 bit code returned from the stop-and-signal  instruction
+       on the SPU. The bit masks for the status codes are:
+
+       0x02
+	      SPU was stopped by stop-and-signal.
+
+       0x04
+	      SPU was stopped by halt.
+
+       0x08
+	      SPU is waiting for a channel.
+
+       0x10
+	      SPU is in single-step mode.
+
+       0x20
+	      SPU has tried to execute an invalid instruction.
+
+       0x40
+	      SPU has tried to access an invalid channel.
+
+       0x3fff0000
+              The  bits  masked with this value contain the code returned from
+              stop-and-signal.
+
+       There are always one or more of the lower eight bits set  or  an  error
+       code is returned from spu_run.
+
+Errors
+======
+       EAGAIN or EWOULDBLOCK
+              fd is in non-blocking mode and spu_run would block.
+
+       EBADF  fd is not a valid file descriptor.
+
+       EFAULT npc is not a valid pointer or status is neither NULL nor a valid
+              pointer.
+
+       EINTR  A signal occurred while spu_run was in progress.  The npc  value
+              has  been updated to the new program counter value if necessary.
+
+       EINVAL fd is not a file descriptor returned from spu_create(2).
+
+       ENOMEM Insufficient memory was available to handle a page fault result-
+              ing from an MFC direct memory access.
+
+       ENOSYS the functionality is not provided by the current system, because
+              either the hardware does not provide SPUs or the spufs module is
+              not loaded.
+
+
+Notes
+=====
+       spu_run  is  meant  to  be  used  from  libraries that implement a more
+       abstract interface to SPUs, not to be used from  regular  applications.
+       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
+       ommended libraries.
+
+
+Conforming to
+=============
+       This call is Linux specific and only implemented by the ppc64 architec-
+       ture. Programs using this system call are not portable.
+
+
+Bugs
+====
+       The code does not yet fully implement all features lined out here.
+
+
+Author
+======
+       Arnd Bergmann <arndb@de.ibm.com>
+
+See Also
+========
+       capabilities(7), close(2), spu_create(2), spufs(7)
diff --git a/Documentation/filesystems/spufs.txt b/Documentation/filesystems/spufs/spufs.rst
index eb9e3aa63026..8a42859bb100 100644
--- a/Documentation/filesystems/spufs.txt
+++ b/Documentation/filesystems/spufs/spufs.rst
@@ -1,12 +1,18 @@
-SPUFS(2)                   Linux Programmer's Manual                  SPUFS(2)
+.. SPDX-License-Identifier: GPL-2.0
 
+=====
+spufs
+=====
 
+Name
+====
 
-NAME
        spufs - the SPU file system
 
 
-DESCRIPTION
+Description
+===========
+
        The SPU file system is used on PowerPC machines that implement the Cell
        Broadband Engine Architecture in order to access Synergistic  Processor
        Units (SPUs).
@@ -21,7 +27,9 @@ DESCRIPTION
        ally add or remove files.
 
 
-MOUNT OPTIONS
+Mount Options
+=============
+
        uid=<uid>
               set the user owning the mount point, the default is 0 (root).
 
@@ -29,7 +37,9 @@ MOUNT OPTIONS
               set the group owning the mount point, the default is 0 (root).
 
 
-FILES
+Files
+=====
+
        The files in spufs mostly follow the standard behavior for regular sys-
        tem  calls like read(2) or write(2), but often support only a subset of
        the operations supported on regular file systems. This list details the
@@ -125,14 +135,12 @@ FILES
               space is available for writing.
 
 
-   /mbox_stat
-   /ibox_stat
-   /wbox_stat
+   /mbox_stat, /ibox_stat, /wbox_stat
        Read-only files that contain the length of the current queue, i.e.  how
        many  words  can  be  read  from  mbox or ibox or how many words can be
        written to wbox without blocking.  The files can be read only in 4-byte
        units  and  return  a  big-endian  binary integer number.  The possible
-       operations on an open *box_stat file are:
+       operations on an open ``*box_stat`` file are:
 
        read(2)
               If a count smaller than four is requested, read returns  -1  and
@@ -143,12 +151,7 @@ FILES
               in EAGAIN.
 
 
-   /npc
-   /decr
-   /decr_status
-   /spu_tag_mask
-   /event_mask
-   /srr0
+   /npc, /decr, /decr_status, /spu_tag_mask, /event_mask, /srr0
        Internal  registers  of  the SPU. The representation is an ASCII string
        with the numeric value of the next instruction to  be  executed.  These
        can  be  used in read/write mode for debugging, but normal operation of
@@ -157,17 +160,14 @@ FILES
 
        The contents of these files are:
 
+       =================== ===================================
        npc                 Next Program Counter
-
        decr                SPU Decrementer
-
        decr_status         Decrementer Status
-
        spu_tag_mask        MFC tag mask for SPU DMA
-
        event_mask          Event mask for SPU interrupts
-
        srr0                Interrupt Return address register
+       =================== ===================================
 
 
        The   possible   operations   on   an   open  npc,  decr,  decr_status,
@@ -206,8 +206,7 @@ FILES
               from the data buffer, updating the value of the fpcr register.
 
 
-   /signal1
-   /signal2
+   /signal1, /signal2
        The two signal notification channels of an SPU.  These  are  read-write
        files  that  operate  on  a 32 bit word.  Writing to one of these files
        triggers an interrupt on the SPU.  The  value  written  to  the  signal
@@ -233,8 +232,7 @@ FILES
               file.
 
 
-   /signal1_type
-   /signal2_type
+   /signal1_type, /signal2_type
        These two files change the behavior of the signal1 and signal2  notifi-
        cation  files.  The  contain  a numerical ASCII string which is read as
        either "1" or "0".  In mode 0 (overwrite), the  hardware  replaces  the
@@ -259,263 +257,17 @@ FILES
               the previous setting.
 
 
-EXAMPLES
+Examples
+========
        /etc/fstab entry
               none      /spu      spufs     gid=spu   0    0
 
 
-AUTHORS
+Authors
+=======
        Arnd  Bergmann  <arndb@de.ibm.com>,  Mark  Nutter <mnutter@us.ibm.com>,
        Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
 
-SEE ALSO
+See Also
+========
        capabilities(7), close(2), spu_create(2), spu_run(2), spufs(7)
-
-
-
-Linux                             2005-09-28                          SPUFS(2)
-
-------------------------------------------------------------------------------
-
-SPU_RUN(2)                 Linux Programmer's Manual                SPU_RUN(2)
-
-
-
-NAME
-       spu_run - execute an spu context
-
-
-SYNOPSIS
-       #include <sys/spu.h>
-
-       int spu_run(int fd, unsigned int *npc, unsigned int *event);
-
-DESCRIPTION
-       The  spu_run system call is used on PowerPC machines that implement the
-       Cell Broadband Engine Architecture in order to access Synergistic  Pro-
-       cessor  Units  (SPUs).  It  uses the fd that was returned from spu_cre-
-       ate(2) to address a specific SPU context. When the context gets  sched-
-       uled  to a physical SPU, it starts execution at the instruction pointer
-       passed in npc.
-
-       Execution of SPU code happens synchronously, meaning that spu_run  does
-       not  return  while the SPU is still running. If there is a need to exe-
-       cute SPU code in parallel with other code on either  the  main  CPU  or
-       other  SPUs,  you  need to create a new thread of execution first, e.g.
-       using the pthread_create(3) call.
-
-       When spu_run returns, the current value of the SPU instruction  pointer
-       is  written back to npc, so you can call spu_run again without updating
-       the pointers.
-
-       event can be a NULL pointer or point to an extended  status  code  that
-       gets  filled  when spu_run returns. It can be one of the following con-
-       stants:
-
-       SPE_EVENT_DMA_ALIGNMENT
-              A DMA alignment error
-
-       SPE_EVENT_SPE_DATA_SEGMENT
-              A DMA segmentation error
-
-       SPE_EVENT_SPE_DATA_STORAGE
-              A DMA storage error
-
-       If NULL is passed as the event argument, these errors will result in  a
-       signal delivered to the calling process.
-
-RETURN VALUE
-       spu_run  returns the value of the spu_status register or -1 to indicate
-       an error and set errno to one of the error  codes  listed  below.   The
-       spu_status  register  value  contains  a  bit  mask of status codes and
-       optionally a 14 bit code returned from the stop-and-signal  instruction
-       on the SPU. The bit masks for the status codes are:
-
-       0x02   SPU was stopped by stop-and-signal.
-
-       0x04   SPU was stopped by halt.
-
-       0x08   SPU is waiting for a channel.
-
-       0x10   SPU is in single-step mode.
-
-       0x20   SPU has tried to execute an invalid instruction.
-
-       0x40   SPU has tried to access an invalid channel.
-
-       0x3fff0000
-              The  bits  masked with this value contain the code returned from
-              stop-and-signal.
-
-       There are always one or more of the lower eight bits set  or  an  error
-       code is returned from spu_run.
-
-ERRORS
-       EAGAIN or EWOULDBLOCK
-              fd is in non-blocking mode and spu_run would block.
-
-       EBADF  fd is not a valid file descriptor.
-
-       EFAULT npc is not a valid pointer or status is neither NULL nor a valid
-              pointer.
-
-       EINTR  A signal occurred while spu_run was in progress.  The npc  value
-              has  been updated to the new program counter value if necessary.
-
-       EINVAL fd is not a file descriptor returned from spu_create(2).
-
-       ENOMEM Insufficient memory was available to handle a page fault result-
-              ing from an MFC direct memory access.
-
-       ENOSYS the functionality is not provided by the current system, because
-              either the hardware does not provide SPUs or the spufs module is
-              not loaded.
-
-
-NOTES
-       spu_run  is  meant  to  be  used  from  libraries that implement a more
-       abstract interface to SPUs, not to be used from  regular  applications.
-       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
-       ommended libraries.
-
-
-CONFORMING TO
-       This call is Linux specific and only implemented by the ppc64 architec-
-       ture. Programs using this system call are not portable.
-
-
-BUGS
-       The code does not yet fully implement all features lined out here.
-
-
-AUTHOR
-       Arnd Bergmann <arndb@de.ibm.com>
-
-SEE ALSO
-       capabilities(7), close(2), spu_create(2), spufs(7)
-
-
-
-Linux                             2005-09-28                        SPU_RUN(2)
-
-------------------------------------------------------------------------------
-
-SPU_CREATE(2)              Linux Programmer's Manual             SPU_CREATE(2)
-
-
-
-NAME
-       spu_create - create a new spu context
-
-
-SYNOPSIS
-       #include <sys/types.h>
-       #include <sys/spu.h>
-
-       int spu_create(const char *pathname, int flags, mode_t mode);
-
-DESCRIPTION
-       The  spu_create  system call is used on PowerPC machines that implement
-       the Cell Broadband Engine Architecture in order to  access  Synergistic
-       Processor  Units (SPUs). It creates a new logical context for an SPU in
-       pathname and returns a handle to associated  with  it.   pathname  must
-       point  to  a  non-existing directory in the mount point of the SPU file
-       system (spufs).  When spu_create is successful, a directory  gets  cre-
-       ated on pathname and it is populated with files.
-
-       The  returned  file  handle can only be passed to spu_run(2) or closed,
-       other operations are not defined on it. When it is closed, all  associ-
-       ated  directory entries in spufs are removed. When the last file handle
-       pointing either inside  of  the  context  directory  or  to  this  file
-       descriptor is closed, the logical SPU context is destroyed.
-
-       The  parameter flags can be zero or any bitwise or'd combination of the
-       following constants:
-
-       SPU_RAWIO
-              Allow mapping of some of the hardware registers of the SPU  into
-              user space. This flag requires the CAP_SYS_RAWIO capability, see
-              capabilities(7).
-
-       The mode parameter specifies the permissions used for creating the  new
-       directory  in  spufs.   mode is modified with the user's umask(2) value
-       and then used for both the directory and the files contained in it. The
-       file permissions mask out some more bits of mode because they typically
-       support only read or write access. See stat(2) for a full list  of  the
-       possible mode values.
-
-
-RETURN VALUE
-       spu_create  returns a new file descriptor. It may return -1 to indicate
-       an error condition and set errno to  one  of  the  error  codes  listed
-       below.
-
-
-ERRORS
-       EACCES
-              The  current  user does not have write access on the spufs mount
-              point.
-
-       EEXIST An SPU context already exists at the given path name.
-
-       EFAULT pathname is not a valid string pointer in  the  current  address
-              space.
-
-       EINVAL pathname is not a directory in the spufs mount point.
-
-       ELOOP  Too many symlinks were found while resolving pathname.
-
-       EMFILE The process has reached its maximum open file limit.
-
-       ENAMETOOLONG
-              pathname was too long.
-
-       ENFILE The system has reached the global open file limit.
-
-       ENOENT Part of pathname could not be resolved.
-
-       ENOMEM The kernel could not allocate all resources required.
-
-       ENOSPC There  are  not  enough  SPU resources available to create a new
-              context or the user specific limit for the number  of  SPU  con-
-              texts has been reached.
-
-       ENOSYS the functionality is not provided by the current system, because
-              either the hardware does not provide SPUs or the spufs module is
-              not loaded.
-
-       ENOTDIR
-              A part of pathname is not a directory.
-
-
-
-NOTES
-       spu_create  is  meant  to  be used from libraries that implement a more
-       abstract interface to SPUs, not to be used from  regular  applications.
-       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
-       ommended libraries.
-
-
-FILES
-       pathname must point to a location beneath the mount point of spufs.  By
-       convention, it gets mounted in /spu.
-
-
-CONFORMING TO
-       This call is Linux specific and only implemented by the ppc64 architec-
-       ture. Programs using this system call are not portable.
-
-
-BUGS
-       The code does not yet fully implement all features lined out here.
-
-
-AUTHOR
-       Arnd Bergmann <arndb@de.ibm.com>
-
-SEE ALSO
-       capabilities(7), close(2), spu_run(2), spufs(7)
-
-
-
-Linux                             2005-09-28                     SPU_CREATE(2)
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.rst
index 06f1d64c6f70..a265f3e2cc80 100644
--- a/Documentation/filesystems/sysfs-pci.txt
+++ b/Documentation/filesystems/sysfs-pci.rst
@@ -1,8 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
 Accessing PCI device resources through sysfs
---------------------------------------------
+============================================
 
 sysfs, usually mounted at /sys, provides access to PCI resources on platforms
-that support it.  For example, a given bus might look like this:
+that support it.  For example, a given bus might look like this::
 
      /sys/devices/pci0000:17
      |-- 0000:17:00.0
@@ -30,8 +33,9 @@ This bus contains a single function device in slot 0.  The domain and bus
 numbers are reproduced for convenience.  Under the device directory are several
 files, each with their own function.
 
+       =================== =====================================================
        file		   function
-       ----		   --------
+       =================== =====================================================
        class		   PCI class (ascii, ro)
        config		   PCI config space (binary, rw)
        device		   PCI device (ascii, ro)
@@ -40,13 +44,16 @@ files, each with their own function.
        local_cpus	   nearby CPU mask (cpumask, ro)
        remove		   remove device from kernel's list (ascii, wo)
        resource		   PCI resource host addresses (ascii, ro)
-       resource0..N	   PCI resource N, if present (binary, mmap, rw[1])
+       resource0..N	   PCI resource N, if present (binary, mmap, rw\ [1]_)
        resource0_wc..N_wc  PCI WC map resource N, if prefetchable (binary, mmap)
        revision		   PCI revision (ascii, ro)
        rom		   PCI ROM resource, if present (binary, ro)
        subsystem_device	   PCI subsystem device (ascii, ro)
        subsystem_vendor	   PCI subsystem vendor (ascii, ro)
        vendor		   PCI vendor (ascii, ro)
+       =================== =====================================================
+
+::
 
   ro - read only file
   rw - file is readable and writable
@@ -56,7 +63,7 @@ files, each with their own function.
   binary - file contains binary data
   cpumask - file contains a cpumask type
 
-[1] rw for RESOURCE_IO (I/O port) regions only
+.. [1] rw for RESOURCE_IO (I/O port) regions only
 
 The read only files are informational, writes to them will be ignored, with
 the exception of the 'rom' file.  Writable files can be used to perform
@@ -67,11 +74,11 @@ don't support mmapping of certain resources, so be sure to check the return
 value from any attempted mmap.  The most notable of these are I/O port
 resources, which also provide read/write access.
 
-The 'enable' file provides a counter that indicates how many times the device 
+The 'enable' file provides a counter that indicates how many times the device
 has been enabled.  If the 'enable' file currently returns '4', and a '1' is
 echoed into it, it will then return '5'.  Echoing a '0' into it will decrease
 the count.  Even when it returns to 0, though, some of the initialisation
-may not be reversed.  
+may not be reversed.
 
 The 'rom' file is special in that it provides read-only access to the device's
 ROM file, if available.  It's disabled by default, however, so applications
@@ -93,7 +100,7 @@ Accessing legacy resources through sysfs
 
 Legacy I/O port and ISA memory resources are also provided in sysfs if the
 underlying platform supports them.  They're located in the PCI class hierarchy,
-e.g.
+e.g.::
 
 	/sys/class/pci_bus/0000:17/
 	|-- bridge -> ../../../devices/pci0000:17
diff --git a/Documentation/filesystems/sysfs-tagging.txt b/Documentation/filesystems/sysfs-tagging.rst
index c7c8e6438958..8888a05c398e 100644
--- a/Documentation/filesystems/sysfs-tagging.txt
+++ b/Documentation/filesystems/sysfs-tagging.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
 Sysfs tagging
--------------
+=============
 
 (Taken almost verbatim from Eric Biederman's netns tagging patch
 commit msg)
@@ -18,25 +21,28 @@ in the directories and applications only see a limited set of
 the network devices.
 
 Each sysfs directory entry may be tagged with a namespace via the
-void *ns member of its kernfs_node.  If a directory entry is tagged,
-then kernfs_node->flags will have a flag between KOBJ_NS_TYPE_NONE
+``void *ns member`` of its ``kernfs_node``.  If a directory entry is tagged,
+then ``kernfs_node->flags`` will have a flag between KOBJ_NS_TYPE_NONE
 and KOBJ_NS_TYPES, and ns will point to the namespace to which it
 belongs.
 
-Each sysfs superblock's kernfs_super_info contains an array void
-*ns[KOBJ_NS_TYPES].  When a task in a tagging namespace
+Each sysfs superblock's kernfs_super_info contains an array
+``void *ns[KOBJ_NS_TYPES]``.  When a task in a tagging namespace
 kobj_nstype first mounts sysfs, a new superblock is created.  It
 will be differentiated from other sysfs mounts by having its
-s_fs_info->ns[kobj_nstype] set to the new namespace.  Note that
+``s_fs_info->ns[kobj_nstype]`` set to the new namespace.  Note that
 through bind mounting and mounts propagation, a task can easily view
 the contents of other namespaces' sysfs mounts.  Therefore, when a
 namespace exits, it will call kobj_ns_exit() to invalidate any
 kernfs_node->ns pointers pointing to it.
 
 Users of this interface:
-- define a type in the kobj_ns_type enumeration.
-- call kobj_ns_type_register() with its kobj_ns_type_operations which has
+
+- define a type in the ``kobj_ns_type`` enumeration.
+- call kobj_ns_type_register() with its ``kobj_ns_type_operations`` which has
+
   - current_ns() which returns current's namespace
   - netlink_ns() which returns a socket's namespace
   - initial_ns() which returns the initial namesapce
+
 - call kobj_ns_exit() when an individual tag is no longer valid
diff --git a/Documentation/filesystems/sysfs.rst b/Documentation/filesystems/sysfs.rst
index 290891c3fecb..ab0f7795792b 100644
--- a/Documentation/filesystems/sysfs.rst
+++ b/Documentation/filesystems/sysfs.rst
@@ -20,7 +20,7 @@ a means to export kernel data structures, their attributes, and the
 linkages between them to userspace.
 
 sysfs is tied inherently to the kobject infrastructure. Please read
-Documentation/kobject.txt for more information concerning the kobject
+Documentation/core-api/kobject.rst for more information concerning the kobject
 interface.
 
 
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 7d4d09dd5e6d..ed17771c212b 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -706,6 +706,7 @@ cache in your filesystem.  The following members are defined:
 		int (*readpage)(struct file *, struct page *);
 		int (*writepages)(struct address_space *, struct writeback_control *);
 		int (*set_page_dirty)(struct page *page);
+		void (*readahead)(struct readahead_control *);
 		int (*readpages)(struct file *filp, struct address_space *mapping,
 				 struct list_head *pages, unsigned nr_pages);
 		int (*write_begin)(struct file *, struct address_space *mapping,
@@ -781,12 +782,26 @@ cache in your filesystem.  The following members are defined:
 	If defined, it should set the PageDirty flag, and the
 	PAGECACHE_TAG_DIRTY tag in the radix tree.
 
+``readahead``
+	Called by the VM to read pages associated with the address_space
+	object.  The pages are consecutive in the page cache and are
+	locked.  The implementation should decrement the page refcount
+	after starting I/O on each page.  Usually the page will be
+	unlocked by the I/O completion handler.  If the filesystem decides
+	to stop attempting I/O before reaching the end of the readahead
+	window, it can simply return.  The caller will decrement the page
+	refcount and unlock the remaining pages for you.  Set PageUptodate
+	if the I/O completes successfully.  Setting PageError on any page
+	will be ignored; simply unlock the page if an I/O error occurs.
+
 ``readpages``
 	called by the VM to read pages associated with the address_space
 	object.  This is essentially just a vector version of readpage.
 	Instead of just one page, several pages are requested.
 	readpages is only used for read-ahead, so read errors are
 	ignored.  If anything goes wrong, feel free to give up.
+	This interface is deprecated and will be removed by the end of
+	2020; implement readahead instead.
 
 ``write_begin``
 	Called by the generic buffered write code to ask the filesystem
diff --git a/Documentation/filesystems/xfs-delayed-logging-design.txt b/Documentation/filesystems/xfs-delayed-logging-design.rst
index 9a6dd289b17b..464405d2801e 100644
--- a/Documentation/filesystems/xfs-delayed-logging-design.txt
+++ b/Documentation/filesystems/xfs-delayed-logging-design.rst
@@ -1,8 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
 XFS Delayed Logging Design
---------------------------
+==========================
 
 Introduction to Re-logging in XFS
----------------------------------
+=================================
 
 XFS logging is a combination of logical and physical logging. Some objects,
 such as inodes and dquots, are logged in logical format where the details
@@ -25,7 +28,7 @@ changes in the new transaction that is written to the log.
 That is, if we have a sequence of changes A through to F, and the object was
 written to disk after change D, we would see in the log the following series
 of transactions, their contents and the log sequence number (LSN) of the
-transaction:
+transaction::
 
 	Transaction		Contents	LSN
 	   A			   A		   X
@@ -85,7 +88,7 @@ IO permanently. Hence the XFS journalling subsystem can be considered to be IO
 bound.
 
 Delayed Logging: Concepts
--------------------------
+=========================
 
 The key thing to note about the asynchronous logging combined with the
 relogging technique XFS uses is that we can be relogging changed objects
@@ -154,9 +157,10 @@ The fundamental requirements for delayed logging in XFS are simple:
 	6. No performance regressions for synchronous transaction workloads.
 
 Delayed Logging: Design
------------------------
+=======================
 
 Storing Changes
+---------------
 
 The problem with accumulating changes at a logical level (i.e. just using the
 existing log item dirty region tracking) is that when it comes to writing the
@@ -194,30 +198,30 @@ asynchronous transactions to the log. The differences between the existing
 formatting method and the delayed logging formatting can be seen in the
 diagram below.
 
-Current format log vector:
+Current format log vector::
 
-Object    +---------------------------------------------+
-Vector 1      +----+
-Vector 2                    +----+
-Vector 3                                   +----------+
+    Object    +---------------------------------------------+
+    Vector 1      +----+
+    Vector 2                    +----+
+    Vector 3                                   +----------+
 
-After formatting:
+After formatting::
 
-Log Buffer    +-V1-+-V2-+----V3----+
+    Log Buffer    +-V1-+-V2-+----V3----+
 
-Delayed logging vector:
+Delayed logging vector::
 
-Object    +---------------------------------------------+
-Vector 1      +----+
-Vector 2                    +----+
-Vector 3                                   +----------+
+    Object    +---------------------------------------------+
+    Vector 1      +----+
+    Vector 2                    +----+
+    Vector 3                                   +----------+
 
-After formatting:
+After formatting::
 
-Memory Buffer +-V1-+-V2-+----V3----+
-Vector 1      +----+
-Vector 2           +----+
-Vector 3                +----------+
+    Memory Buffer +-V1-+-V2-+----V3----+
+    Vector 1      +----+
+    Vector 2           +----+
+    Vector 3                +----------+
 
 The memory buffer and associated vector need to be passed as a single object,
 but still need to be associated with the parent object so if the object is
@@ -242,6 +246,7 @@ relogged in memory.
 
 
 Tracking Changes
+----------------
 
 Now that we can record transactional changes in memory in a form that allows
 them to be used without limitations, we need to be able to track and accumulate
@@ -278,6 +283,7 @@ done for convenience/sanity of the developers.
 
 
 Delayed Logging: Checkpoints
+----------------------------
 
 When we have a log synchronisation event, commonly known as a "log force",
 all the items in the CIL must be written into the log via the log buffers.
@@ -341,7 +347,7 @@ Hence log vectors need to be able to be chained together to allow them to be
 detached from the log items. That is, when the CIL is flushed the memory
 buffer and log vector attached to each log item needs to be attached to the
 checkpoint context so that the log item can be released. In diagrammatic form,
-the CIL would look like this before the flush:
+the CIL would look like this before the flush::
 
 	CIL Head
 	   |
@@ -362,7 +368,7 @@ the CIL would look like this before the flush:
 					-> vector array
 
 And after the flush the CIL head is empty, and the checkpoint context log
-vector list would look like:
+vector list would look like::
 
 	Checkpoint Context
 	   |
@@ -411,6 +417,7 @@ compare" situation that can be done after a working and reviewed implementation
 is in the dev tree....
 
 Delayed Logging: Checkpoint Sequencing
+--------------------------------------
 
 One of the key aspects of the XFS transaction subsystem is that it tags
 committed transactions with the log sequence number of the transaction commit.
@@ -474,6 +481,7 @@ force the log at the LSN of that transaction) and so the higher level code
 behaves the same regardless of whether delayed logging is being used or not.
 
 Delayed Logging: Checkpoint Log Space Accounting
+------------------------------------------------
 
 The big issue for a checkpoint transaction is the log space reservation for the
 transaction. We don't know how big a checkpoint transaction is going to be
@@ -491,7 +499,7 @@ the size of the transaction and the number of regions being logged (the number
 of log vectors in the transaction).
 
 An example of the differences would be logging directory changes versus logging
-inode changes. If you modify lots of inode cores (e.g. chmod -R g+w *), then
+inode changes. If you modify lots of inode cores (e.g. ``chmod -R g+w *``), then
 there are lots of transactions that only contain an inode core and an inode log
 format structure. That is, two vectors totaling roughly 150 bytes. If we modify
 10,000 inodes, we have about 1.5MB of metadata to write in 20,000 vectors. Each
@@ -565,6 +573,7 @@ which is once every 30s.
 
 
 Delayed Logging: Log Item Pinning
+---------------------------------
 
 Currently log items are pinned during transaction commit while the items are
 still locked. This happens just after the items are formatted, though it could
@@ -605,6 +614,7 @@ object, we have a race with CIL being flushed between the check and the pin
 lock to guarantee that we pin the items correctly.
 
 Delayed Logging: Concurrent Scalability
+---------------------------------------
 
 A fundamental requirement for the CIL is that accesses through transaction
 commits must scale to many concurrent commits. The current transaction commit
@@ -683,8 +693,9 @@ woken by the wrong event.
 
 
 Lifecycle Changes
+-----------------
 
-The existing log item life cycle is as follows:
+The existing log item life cycle is as follows::
 
 	1. Transaction allocate
 	2. Transaction reserve
@@ -729,7 +740,7 @@ at the same time. If the log item is in the AIL or between steps 6 and 7
 and steps 1-6 are re-entered, then the item is relogged. Only when steps 8-9
 are entered and completed is the object considered clean.
 
-With delayed logging, there are new steps inserted into the life cycle:
+With delayed logging, there are new steps inserted into the life cycle::
 
 	1. Transaction allocate
 	2. Transaction reserve
diff --git a/Documentation/filesystems/xfs-self-describing-metadata.txt b/Documentation/filesystems/xfs-self-describing-metadata.rst
index 8db0121d0980..b79dbf36dc94 100644
--- a/Documentation/filesystems/xfs-self-describing-metadata.txt
+++ b/Documentation/filesystems/xfs-self-describing-metadata.rst
@@ -1,8 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
 XFS Self Describing Metadata
-----------------------------
+============================
 
 Introduction
-------------
+============
 
 The largest scalability problem facing XFS is not one of algorithmic
 scalability, but of verification of the filesystem structure. Scalabilty of the
@@ -34,7 +37,7 @@ required for basic forensic analysis of the filesystem structure.
 
 
 Self Describing Metadata
-------------------------
+========================
 
 One of the problems with the current metadata format is that apart from the
 magic number in the metadata block, we have no other way of identifying what it
@@ -142,7 +145,7 @@ modification occurred between the corruption being written and when it was
 detected.
 
 Runtime Validation
-------------------
+==================
 
 Validation of self-describing metadata takes place at runtime in two places:
 
@@ -183,18 +186,18 @@ error occurs during this process, the buffer is again marked with a EFSCORRUPTED
 error for the higher layers to catch.
 
 Structures
-----------
+==========
 
-A typical on-disk structure needs to contain the following information:
+A typical on-disk structure needs to contain the following information::
 
-struct xfs_ondisk_hdr {
-        __be32  magic;		/* magic number */
-        __be32  crc;		/* CRC, not logged */
-        uuid_t  uuid;		/* filesystem identifier */
-        __be64  owner;		/* parent object */
-        __be64  blkno;		/* location on disk */
-        __be64  lsn;		/* last modification in log, not logged */
-};
+    struct xfs_ondisk_hdr {
+	    __be32  magic;		/* magic number */
+	    __be32  crc;		/* CRC, not logged */
+	    uuid_t  uuid;		/* filesystem identifier */
+	    __be64  owner;		/* parent object */
+	    __be64  blkno;		/* location on disk */
+	    __be64  lsn;		/* last modification in log, not logged */
+    };
 
 Depending on the metadata, this information may be part of a header structure
 separate to the metadata contents, or may be distributed through an existing
@@ -214,24 +217,24 @@ level of information is generally provided. For example:
 	  well. hence the additional metadata headers change the overall format
 	  of the metadata.
 
-A typical buffer read verifier is structured as follows:
+A typical buffer read verifier is structured as follows::
 
-#define XFS_FOO_CRC_OFF		offsetof(struct xfs_ondisk_hdr, crc)
+    #define XFS_FOO_CRC_OFF		offsetof(struct xfs_ondisk_hdr, crc)
 
-static void
-xfs_foo_read_verify(
-	struct xfs_buf	*bp)
-{
-       struct xfs_mount *mp = bp->b_mount;
+    static void
+    xfs_foo_read_verify(
+	    struct xfs_buf	*bp)
+    {
+	struct xfs_mount *mp = bp->b_mount;
 
-        if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-             !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-					XFS_FOO_CRC_OFF)) ||
-            !xfs_foo_verify(bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
-}
+	    if ((xfs_sb_version_hascrc(&mp->m_sb) &&
+		!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+					    XFS_FOO_CRC_OFF)) ||
+		!xfs_foo_verify(bp)) {
+		    XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+		    xfs_buf_ioerror(bp, EFSCORRUPTED);
+	    }
+    }
 
 The code ensures that the CRC is only checked if the filesystem has CRCs enabled
 by checking the superblock of the feature bit, and then if the CRC verifies OK
@@ -239,83 +242,83 @@ by checking the superblock of the feature bit, and then if the CRC verifies OK
 
 The verifier function will take a couple of different forms, depending on
 whether the magic number can be used to determine the format of the block. In
-the case it can't, the code is structured as follows:
+the case it can't, the code is structured as follows::
 
-static bool
-xfs_foo_verify(
-	struct xfs_buf		*bp)
-{
-        struct xfs_mount	*mp = bp->b_mount;
-        struct xfs_ondisk_hdr	*hdr = bp->b_addr;
+    static bool
+    xfs_foo_verify(
+	    struct xfs_buf		*bp)
+    {
+	    struct xfs_mount	*mp = bp->b_mount;
+	    struct xfs_ondisk_hdr	*hdr = bp->b_addr;
 
-        if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
-                return false;
+	    if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
+		    return false;
 
-        if (!xfs_sb_version_hascrc(&mp->m_sb)) {
-		if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
-			return false;
-		if (bp->b_bn != be64_to_cpu(hdr->blkno))
-			return false;
-		if (hdr->owner == 0)
-			return false;
-	}
+	    if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+		    if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
+			    return false;
+		    if (bp->b_bn != be64_to_cpu(hdr->blkno))
+			    return false;
+		    if (hdr->owner == 0)
+			    return false;
+	    }
 
-	/* object specific verification checks here */
+	    /* object specific verification checks here */
 
-        return true;
-}
+	    return true;
+    }
 
 If there are different magic numbers for the different formats, the verifier
-will look like:
-
-static bool
-xfs_foo_verify(
-	struct xfs_buf		*bp)
-{
-        struct xfs_mount	*mp = bp->b_mount;
-        struct xfs_ondisk_hdr	*hdr = bp->b_addr;
-
-        if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) {
-		if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
-			return false;
-		if (bp->b_bn != be64_to_cpu(hdr->blkno))
-			return false;
-		if (hdr->owner == 0)
-			return false;
-	} else if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
-		return false;
-
-	/* object specific verification checks here */
-
-        return true;
-}
+will look like::
+
+    static bool
+    xfs_foo_verify(
+	    struct xfs_buf		*bp)
+    {
+	    struct xfs_mount	*mp = bp->b_mount;
+	    struct xfs_ondisk_hdr	*hdr = bp->b_addr;
+
+	    if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) {
+		    if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
+			    return false;
+		    if (bp->b_bn != be64_to_cpu(hdr->blkno))
+			    return false;
+		    if (hdr->owner == 0)
+			    return false;
+	    } else if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
+		    return false;
+
+	    /* object specific verification checks here */
+
+	    return true;
+    }
 
 Write verifiers are very similar to the read verifiers, they just do things in
-the opposite order to the read verifiers. A typical write verifier:
+the opposite order to the read verifiers. A typical write verifier::
 
-static void
-xfs_foo_write_verify(
-	struct xfs_buf	*bp)
-{
-	struct xfs_mount	*mp = bp->b_mount;
-	struct xfs_buf_log_item	*bip = bp->b_fspriv;
+    static void
+    xfs_foo_write_verify(
+	    struct xfs_buf	*bp)
+    {
+	    struct xfs_mount	*mp = bp->b_mount;
+	    struct xfs_buf_log_item	*bip = bp->b_fspriv;
 
-	if (!xfs_foo_verify(bp)) {
-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-		xfs_buf_ioerror(bp, EFSCORRUPTED);
-		return;
-	}
+	    if (!xfs_foo_verify(bp)) {
+		    XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+		    xfs_buf_ioerror(bp, EFSCORRUPTED);
+		    return;
+	    }
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		return;
+	    if (!xfs_sb_version_hascrc(&mp->m_sb))
+		    return;
 
 
-	if (bip) {
-		struct xfs_ondisk_hdr	*hdr = bp->b_addr;
-		hdr->lsn = cpu_to_be64(bip->bli_item.li_lsn);
-	}
-	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_FOO_CRC_OFF);
-}
+	    if (bip) {
+		    struct xfs_ondisk_hdr	*hdr = bp->b_addr;
+		    hdr->lsn = cpu_to_be64(bip->bli_item.li_lsn);
+	    }
+	    xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_FOO_CRC_OFF);
+    }
 
 This will verify the internal structure of the metadata before we go any
 further, detecting corruptions that have occurred as the metadata has been
@@ -324,7 +327,7 @@ update the LSN field (when it was last modified) and calculate the CRC on the
 metadata. Once this is done, we can issue the IO.
 
 Inodes and Dquots
------------------
+=================
 
 Inodes and dquots are special snowflakes. They have per-object CRC and
 self-identifiers, but they are packed so that there are multiple objects per
@@ -337,14 +340,13 @@ buffer.
 
 The structure of the verifiers and the identifiers checks is very similar to the
 buffer code described above. The only difference is where they are called. For
-example, inode read verification is done in xfs_iread() when the inode is first
-read out of the buffer and the struct xfs_inode is instantiated. The inode is
-already extensively verified during writeback in xfs_iflush_int, so the only
-addition here is to add the LSN and CRC to the inode as it is copied back into
-the buffer.
+example, inode read verification is done in xfs_inode_from_disk() when the inode
+is first read out of the buffer and the struct xfs_inode is instantiated. The
+inode is already extensively verified during writeback in xfs_iflush_int, so the
+only addition here is to add the LSN and CRC to the inode as it is copied back
+into the buffer.
 
 XXX: inode unlinked list modification doesn't recalculate the inode CRC! None of
 the unlinked list modifications check or update CRCs, neither during unlink nor
 log recovery. So, it's gone unnoticed until now. This won't matter immediately -
 repair will probably complain about it - but it needs to be fixed.
-
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index 0efede580039..4cc74325bf91 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -202,3 +202,91 @@ busy_percent
 
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
    :doc: busy_percent
+
+GPU Product Information
+=======================
+
+Information about the GPU can be obtained on certain cards
+via sysfs
+
+product_name
+------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+   :doc: product_name
+
+product_number
+--------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+   :doc: product_name
+
+serial_number
+-------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+   :doc: serial_number
+
+unique_id
+---------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: unique_id
+
+GPU Memory Usage Information
+============================
+
+Various memory accounting can be accessed via sysfs
+
+mem_info_vram_total
+-------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+   :doc: mem_info_vram_total
+
+mem_info_vram_used
+------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+   :doc: mem_info_vram_used
+
+mem_info_vis_vram_total
+-----------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+   :doc: mem_info_vis_vram_total
+
+mem_info_vis_vram_used
+----------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+   :doc: mem_info_vis_vram_used
+
+mem_info_gtt_total
+------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+   :doc: mem_info_gtt_total
+
+mem_info_gtt_used
+-----------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+   :doc: mem_info_gtt_used
+
+PCIe Accounting Information
+===========================
+
+pcie_bw
+-------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: pcie_bw
+
+pcie_replay_count
+-----------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+   :doc: pcie_replay_count
+
+
diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst
index a73320576ca9..12272b168580 100644
--- a/Documentation/gpu/drm-internals.rst
+++ b/Documentation/gpu/drm-internals.rst
@@ -132,6 +132,18 @@ be unmapped; on many devices, the ROM address decoder is shared with
 other BARs, so leaving it mapped could cause undesired behaviour like
 hangs or memory corruption.
 
+Managed Resources
+-----------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_managed.c
+   :doc: managed resources
+
+.. kernel-doc:: drivers/gpu/drm/drm_managed.c
+   :export:
+
+.. kernel-doc:: include/drm/drm_managed.h
+   :internal:
+
 Bus-specific Device Registration and PCI Support
 ------------------------------------------------
 
diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst
index 906771e03103..397314d08f77 100644
--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@@ -3,7 +3,7 @@ Kernel Mode Setting (KMS)
 =========================
 
 Drivers must initialize the mode setting core by calling
-drm_mode_config_init() on the DRM device. The function
+drmm_mode_config_init() on the DRM device. The function
 initializes the :c:type:`struct drm_device <drm_device>`
 mode_config field and never fails. Once done, mode configuration must
 be setup by initializing the following fields.
@@ -397,6 +397,9 @@ Connector Functions Reference
 Writeback Connectors
 --------------------
 
+.. kernel-doc:: include/drm/drm_writeback.h
+  :internal:
+
 .. kernel-doc:: drivers/gpu/drm/drm_writeback.c
   :doc: overview
 
diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index c77b32601260..1839762044be 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -373,15 +373,6 @@ GEM CMA Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_gem_cma_helper.c
    :export:
 
-VRAM Helper Function Reference
-==============================
-
-.. kernel-doc:: drivers/gpu/drm/drm_vram_helper_common.c
-   :doc: overview
-
-.. kernel-doc:: include/drm/drm_gem_vram_helper.h
-   :internal:
-
 GEM VRAM Helper Functions Reference
 -----------------------------------
 
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index f6d363b6756e..33cc6ddf8f64 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -329,6 +329,52 @@ for execution also include a list of all locations within buffers that
 refer to GPU-addresses so that the kernel can edit the buffer correctly.
 This process is dubbed relocation.
 
+Locking Guidelines
+------------------
+
+.. note::
+   This is a description of how the locking should be after
+   refactoring is done. Does not necessarily reflect what the locking
+   looks like while WIP.
+
+#. All locking rules and interface contracts with cross-driver interfaces
+   (dma-buf, dma_fence) need to be followed.
+
+#. No struct_mutex anywhere in the code
+
+#. dma_resv will be the outermost lock (when needed) and ww_acquire_ctx
+   is to be hoisted at highest level and passed down within i915_gem_ctx
+   in the call chain
+
+#. While holding lru/memory manager (buddy, drm_mm, whatever) locks
+   system memory allocations are not allowed
+
+	* Enforce this by priming lockdep (with fs_reclaim). If we
+	  allocate memory while holding these looks we get a rehash
+	  of the shrinker vs. struct_mutex saga, and that would be
+	  real bad.
+
+#. Do not nest different lru/memory manager locks within each other.
+   Take them in turn to update memory allocations, relying on the object’s
+   dma_resv ww_mutex to serialize against other operations.
+
+#. The suggestion for lru/memory managers locks is that they are small
+   enough to be spinlocks.
+
+#. All features need to come with exhaustive kernel selftests and/or
+   IGT tests when appropriate
+
+#. All LMEM uAPI paths need to be fully restartable (_interruptible()
+   for all locks/waits/sleeps)
+
+	* Error handling validation through signal injection.
+	  Still the best strategy we have for validating GEM uAPI
+          corner cases.
+	  Must be excessively used in the IGT, and we need to check
+	  that we really have full path coverage of all error cases.
+
+	* -EDEADLK handling with ww_mutex
+
 GEM BO Management Implementation Details
 ----------------------------------------
 
@@ -391,19 +437,19 @@ Global GTT views
 GTT Fences and Swizzling
 ------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_fence_reg.c
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
    :internal:
 
 Global GTT Fence Handling
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_fence_reg.c
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
    :doc: fence register handling
 
 Hardware Tiling and Swizzling Details
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_fence_reg.c
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
    :doc: tiling swizzling details
 
 Object Tiling IOCTLs
diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 439656f55c5d..658b52f7ffc6 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -347,18 +347,6 @@ Contact: Sean Paul
 
 Level: Starter
 
-Remove drm_display_mode.hsync
------------------------------
-
-We have drm_mode_hsync() to calculate this from hsync_start/end, since drivers
-shouldn't/don't use this, remove this member to avoid any temptations to use it
-in the future. If there is any debug code using drm_display_mode.hsync, convert
-it to use drm_mode_hsync() instead.
-
-Contact: Sean Paul
-
-Level: Starter
-
 connector register/unregister fixes
 -----------------------------------
 
diff --git a/Documentation/hwmon/amd_energy.rst b/Documentation/hwmon/amd_energy.rst
new file mode 100644
index 000000000000..f8288edff664
--- /dev/null
+++ b/Documentation/hwmon/amd_energy.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver amd_energy
+==========================
+
+Supported chips:
+
+* AMD Family 17h Processors
+
+  Prefix: 'amd_energy'
+
+  Addresses used:  RAPL MSRs
+
+  Datasheets:
+
+  - Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors
+
+	https://developer.amd.com/wp-content/resources/55570-B1_PUB.zip
+
+  - Preliminary Processor Programming Reference (PPR) for AMD Family 17h Model 31h, Revision B0 Processors
+
+	https://developer.amd.com/wp-content/resources/56176_ppr_Family_17h_Model_71h_B0_pub_Rev_3.06.zip
+
+Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
+
+Description
+-----------
+
+The Energy driver exposes the energy counters that are
+reported via the Running Average Power Limit (RAPL)
+Model-specific Registers (MSRs) via the hardware monitor
+(HWMON) sysfs interface.
+
+1. Power, Energy and Time Units
+   MSR_RAPL_POWER_UNIT/ C001_0299:
+   shared with all cores in the socket
+
+2. Energy consumed by each Core
+   MSR_CORE_ENERGY_STATUS/ C001_029A:
+   32-bitRO, Accumulator, core-level power reporting
+
+3. Energy consumed by Socket
+   MSR_PACKAGE_ENERGY_STATUS/ C001_029B:
+   32-bitRO, Accumulator, socket-level power reporting,
+   shared with all cores in socket
+
+These registers are updated every 1ms and cleared on
+reset of the system.
+
+Note: If SMT is enabled, Linux enumerates all threads as cpus.
+Since, the energy status registers are accessed at core level,
+reading those registers from the sibling threads would result
+in duplicate values. Hence, energy counter entries are not
+populated for the siblings.
+
+Energy Caluclation
+------------------
+
+Energy information (in Joules) is based on the multiplier,
+1/2^ESU; where ESU is an unsigned integer read from
+MSR_RAPL_POWER_UNIT register. Default value is 10000b,
+indicating energy status unit is 15.3 micro-Joules increment.
+
+Reported values are scaled as per the formula
+
+scaled value = ((1/2^ESU) * (Raw value) * 1000000UL) in uJoules
+
+Users calculate power for a given domain by calculating
+	dEnergy/dTime for that domain.
+
+Energy accumulation
+--------------------------
+
+Current, Socket energy status register is 32bit, assuming a 240W
+2P system, the register would wrap around in
+
+	2^32*15.3 e-6/240 * 2 = 547.60833024 secs to wrap(~9 mins)
+
+The Core energy register may wrap around after several days.
+
+To improve the wrap around time, a kernel thread is implemented
+to accumulate the socket energy counters and one core energy counter
+per run to a respective 64-bit counter. The kernel thread starts
+running during probe, wakes up every 100secs and stops running
+when driver is removed.
+
+A socket and core energy read would return the current register
+value added to the respective energy accumulator.
+
+Sysfs attributes
+----------------
+
+=============== ========  =====================================
+Attribute	Label	  Description
+===============	========  =====================================
+
+* For index N between [1] and [nr_cpus]
+
+===============	========  ======================================
+energy[N]_input EcoreX	  Core Energy   X = [0] to [nr_cpus - 1]
+			  Measured input core energy
+===============	========  ======================================
+
+* For N between [nr_cpus] and [nr_cpus + nr_socks]
+
+===============	========  ======================================
+energy[N]_input EsocketX  Socket Energy X = [0] to [nr_socks -1]
+			  Measured input socket energy
+=============== ========  ======================================
diff --git a/Documentation/hwmon/bt1-pvt.rst b/Documentation/hwmon/bt1-pvt.rst
new file mode 100644
index 000000000000..cbb0c0613132
--- /dev/null
+++ b/Documentation/hwmon/bt1-pvt.rst
@@ -0,0 +1,117 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+Kernel driver bt1-pvt
+=====================
+
+Supported chips:
+
+  * Baikal-T1 PVT sensor (in SoC)
+
+    Prefix: 'bt1-pvt'
+
+    Addresses scanned: -
+
+    Datasheet: Provided by BAIKAL ELECTRONICS upon request and under NDA
+
+Authors:
+    Maxim Kaurkin <maxim.kaurkin@baikalelectronics.ru>
+    Serge Semin <Sergey.Semin@baikalelectronics.ru>
+
+Description
+-----------
+
+This driver implements support for the hardware monitoring capabilities of the
+embedded into Baikal-T1 process, voltage and temperature sensors. PVT IP-core
+consists of one temperature and four voltage sensors, which can be used to
+monitor the chip internal environment like heating, supply voltage and
+transistors performance. The driver can optionally provide the hwmon alarms
+for each sensor the PVT controller supports. The alarms functionality is made
+compile-time configurable due to the hardware interface implementation
+peculiarity, which is connected with an ability to convert data from only one
+sensor at a time. Additional limitation is that the controller performs the
+thresholds checking synchronously with the data conversion procedure. Due to
+these in order to have the hwmon alarms automatically detected the driver code
+must switch from one sensor to another, read converted data and manually check
+the threshold status bits. Depending on the measurements timeout settings
+(update_interval sysfs node value) this design may cause additional burden on
+the system performance. So in case if alarms are unnecessary in your system
+design it's recommended to have them disabled to prevent the PVT IRQs being
+periodically raised to get the data cache/alarms status up to date. By default
+in alarm-less configuration the data conversion is performed by the driver
+on demand when read operation is requested via corresponding _input-file.
+
+Temperature Monitoring
+----------------------
+
+Temperature is measured with 10-bit resolution and reported in millidegree
+Celsius. The driver performs all the scaling by itself therefore reports true
+temperatures that don't need any user-space adjustments. While the data
+translation formulae isn't linear, which gives us non-linear discreteness,
+it's close to one, but giving a bit better accuracy for higher temperatures.
+The temperature input is mapped as follows (the last column indicates the input
+ranges)::
+
+	temp1: CPU embedded diode	-48.38C - +147.438C
+
+In case if the alarms kernel config is enabled in the driver the temperature input
+has associated min and max limits which trigger an alarm when crossed.
+
+Voltage Monitoring
+------------------
+
+The voltage inputs are also sampled with 10-bit resolution and reported in
+millivolts. But in this case the data translation formulae is linear, which
+provides a constant measurements discreteness. The data scaling is also
+performed by the driver, so returning true millivolts. The voltage inputs are
+mapped as follows (the last column indicates the input ranges)::
+
+	in0: VDD		(processor core)		0.62V - 1.168V
+	in1: Low-Vt		(low voltage threshold)		0.62V - 1.168V
+	in2: High-Vt		(high voltage threshold)	0.62V - 1.168V
+	in3: Standard-Vt	(standard voltage threshold)	0.62V - 1.168V
+
+In case if the alarms config is enabled in the driver the voltage inputs
+have associated min and max limits which trigger an alarm when crossed.
+
+Sysfs Attributes
+----------------
+
+Following is a list of all sysfs attributes that the driver provides, their
+permissions and a short description:
+
+=============================== ======= =======================================
+Name				Perm	Description
+=============================== ======= =======================================
+update_interval			RW	Measurements update interval per
+					sensor.
+temp1_type			RO	Sensor type (always 1 as CPU embedded
+					diode).
+temp1_label			RO	CPU Core Temperature sensor.
+temp1_input			RO	Measured temperature in millidegree
+					Celsius.
+temp1_min			RW	Low limit for temp input.
+temp1_max			RW	High limit for temp input.
+temp1_min_alarm			RO	Temperature input alarm. Returns 1 if
+					temperature input went below min limit,
+					0 otherwise.
+temp1_max_alarm			RO	Temperature input alarm. Returns 1 if
+					temperature input went above max limit,
+					0 otherwise.
+temp1_offset			RW	Temperature offset in millidegree
+					Celsius which is added to the
+					temperature reading by the chip. It can
+					be used to manually adjust the
+					temperature measurements within 7.130
+					degrees Celsius.
+in[0-3]_label			RO	CPU Voltage sensor (either core or
+					low/high/standard thresholds).
+in[0-3]_input			RO	Measured voltage in millivolts.
+in[0-3]_min			RW	Low limit for voltage input.
+in[0-3]_max			RW	High limit for voltage input.
+in[0-3]_min_alarm		RO	Voltage input alarm. Returns 1 if
+					voltage input went below min limit,
+					0 otherwise.
+in[0-3]_max_alarm		RO	Voltage input alarm. Returns 1 if
+					voltage input went above max limit,
+					0 otherwise.
+=============================== ======= =======================================
diff --git a/Documentation/hwmon/gsc-hwmon.rst b/Documentation/hwmon/gsc-hwmon.rst
new file mode 100644
index 000000000000..ffac392a7129
--- /dev/null
+++ b/Documentation/hwmon/gsc-hwmon.rst
@@ -0,0 +1,53 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver gsc-hwmon
+=======================
+
+Supported chips: Gateworks GSC
+Datasheet: http://trac.gateworks.com/wiki/gsc
+Author: Tim Harvey <tharvey@gateworks.com>
+
+Description:
+------------
+
+This driver supports hardware monitoring for the temperature sensor,
+various ADC's connected to the GSC, and optional FAN controller available
+on some boards.
+
+
+Voltage Monitoring
+------------------
+
+The voltage inputs are scaled either internally or by the driver depending
+on the GSC version and firmware. The values returned by the driver do not need
+further scaling. The voltage input labels provide the voltage rail name:
+
+inX_input                  Measured voltage (mV).
+inX_label                  Name of voltage rail.
+
+
+Temperature Monitoring
+----------------------
+
+Temperatures are measured with 12-bit or 10-bit resolution and are scaled
+either internally or by the driver depending on the GSC version and firmware.
+The values returned by the driver reflect millidegree Celcius:
+
+tempX_input                Measured temperature.
+tempX_label                Name of temperature input.
+
+
+PWM Output Control
+------------------
+
+The GSC features 1 PWM output that operates in automatic mode where the
+PWM value will be scalled depending on 6 temperature boundaries.
+The tempeature boundaries are read-write and in millidegree Celcius and the
+read-only PWM values range from 0 (off) to 255 (full speed).
+Fan speed will be set to minimum (off) when the temperature sensor reads
+less than pwm1_auto_point1_temp and maximum when the temperature sensor
+equals or exceeds pwm1_auto_point6_temp.
+
+pwm1_auto_point[1-6]_pwm       PWM value.
+pwm1_auto_point[1-6]_temp      Temperature boundary.
+
diff --git a/Documentation/hwmon/ina2xx.rst b/Documentation/hwmon/ina2xx.rst
index 94b9a260c518..ed81f5416331 100644
--- a/Documentation/hwmon/ina2xx.rst
+++ b/Documentation/hwmon/ina2xx.rst
@@ -99,6 +99,25 @@ Sysfs entries for ina226, ina230 and ina231 only
 ------------------------------------------------
 
 ======================= ====================================================
+in0_lcrit		Critical low shunt voltage
+in0_crit		Critical high shunt voltage
+in0_lcrit_alarm		Shunt voltage critical low alarm
+in0_crit_alarm		Shunt voltage critical high alarm
+in1_lcrit		Critical low bus voltage
+in1_crit		Critical high bus voltage
+in1_lcrit_alarm		Bus voltage critical low alarm
+in1_crit_alarm		Bus voltage critical high alarm
+power1_crit		Critical high power
+power1_crit_alarm	Power critical high alarm
 update_interval		data conversion time; affects number of samples used
 			to average results for shunt and bus voltages.
 ======================= ====================================================
+
+.. note::
+
+   - Configure `shunt_resistor` before configure `power1_crit`, because power
+     value is calculated based on `shunt_resistor` set.
+   - Because of the underlying register implementation, only one `*crit` setting
+     and its `alarm` can be active. Writing to one `*crit` setting clears other
+     `*crit` settings and alarms. Writing 0 to any `*crit` setting clears all
+     `*crit` settings and alarms.
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 1f0affb3b6e0..55ff4b7c5349 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -39,11 +39,13 @@ Hardware Monitoring Kernel Drivers
    adt7470
    adt7475
    amc6821
+   amd_energy
    asb100
    asc7621
    aspeed-pwm-tacho
    bcm54140
    bel-pfe
+   bt1-pvt
    coretemp
    da9052
    da9055
@@ -61,6 +63,7 @@ Hardware Monitoring Kernel Drivers
    ftsteutates
    g760a
    g762
+   gsc-hwmon
    gl518sm
    hih6130
    ibmaem
@@ -107,6 +110,7 @@ Hardware Monitoring Kernel Drivers
    max16064
    max16065
    max1619
+   max16601
    max1668
    max197
    max20730
diff --git a/Documentation/hwmon/lm90.rst b/Documentation/hwmon/lm90.rst
index 953315987c06..78dfc01b47a2 100644
--- a/Documentation/hwmon/lm90.rst
+++ b/Documentation/hwmon/lm90.rst
@@ -123,6 +123,18 @@ Supported chips:
 
 	       http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3497
 
+  * Maxim MAX6654
+
+    Prefix: 'max6654'
+
+    Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b,
+
+			   0x4c, 0x4d and 0x4e
+
+    Datasheet: Publicly available at the Maxim website
+
+	       https://www.maximintegrated.com/en/products/sensors/MAX6654.html
+
   * Maxim MAX6657
 
     Prefix: 'max6657'
@@ -301,6 +313,13 @@ ADT7461, ADT7461A, NCT1008:
   * Extended temperature range (breaks compatibility)
   * Lower resolution for remote temperature
 
+MAX6654:
+  * Better local resolution
+  * Selectable address
+  * Remote sensor type selection
+  * Extended temperature range
+  * Extended resolution only available when conversion rate <= 1 Hz
+
 MAX6657 and MAX6658:
   * Better local resolution
   * Remote sensor type selection
@@ -336,8 +355,8 @@ SA56004X:
 
 All temperature values are given in degrees Celsius. Resolution
 is 1.0 degree for the local temperature, 0.125 degree for the remote
-temperature, except for the MAX6657, MAX6658 and MAX6659 which have a
-resolution of 0.125 degree for both temperatures.
+temperature, except for the MAX6654, MAX6657, MAX6658 and MAX6659 which have
+a resolution of 0.125 degree for both temperatures.
 
 Each sensor has its own high and low limits, plus a critical limit.
 Additionally, there is a relative hysteresis value common to both critical
diff --git a/Documentation/hwmon/max16601.rst b/Documentation/hwmon/max16601.rst
new file mode 100644
index 000000000000..346e74674c51
--- /dev/null
+++ b/Documentation/hwmon/max16601.rst
@@ -0,0 +1,159 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver max16601
+======================
+
+Supported chips:
+
+  * Maxim MAX16601
+
+    Prefix: 'max16601'
+
+    Addresses scanned: -
+
+    Datasheet: Not published
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver supports the MAX16601 VR13.HC Dual-Output Voltage Regulator
+Chipset.
+
+The driver is a client driver to the core PMBus driver.
+Please see Documentation/hwmon/pmbus.rst for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported.
+
+======================= =======================================================
+in1_label		"vin1"
+in1_input		VCORE input voltage.
+in1_alarm		Input voltage alarm.
+
+in2_label		"vout1"
+in2_input		VCORE output voltage.
+in2_alarm		Output voltage alarm.
+
+curr1_label		"iin1"
+curr1_input		VCORE input current, derived from duty cycle and output
+			current.
+curr1_max		Maximum input current.
+curr1_max_alarm		Current high alarm.
+
+curr2_label		"iin1.0"
+curr2_input		VCORE phase 0 input current.
+
+curr3_label		"iin1.1"
+curr3_input		VCORE phase 1 input current.
+
+curr4_label		"iin1.2"
+curr4_input		VCORE phase 2 input current.
+
+curr5_label		"iin1.3"
+curr5_input		VCORE phase 3 input current.
+
+curr6_label		"iin1.4"
+curr6_input		VCORE phase 4 input current.
+
+curr7_label		"iin1.5"
+curr7_input		VCORE phase 5 input current.
+
+curr8_label		"iin1.6"
+curr8_input		VCORE phase 6 input current.
+
+curr9_label		"iin1.7"
+curr9_input		VCORE phase 7 input current.
+
+curr10_label		"iin2"
+curr10_input		VCORE input current, derived from sensor element.
+
+curr11_label		"iin3"
+curr11_input		VSA input current.
+
+curr12_label		"iout1"
+curr12_input		VCORE output current.
+curr12_crit		Critical output current.
+curr12_crit_alarm	Output current critical alarm.
+curr12_max		Maximum output current.
+curr12_max_alarm	Output current high alarm.
+
+curr13_label		"iout1.0"
+curr13_input		VCORE phase 0 output current.
+
+curr14_label		"iout1.1"
+curr14_input		VCORE phase 1 output current.
+
+curr15_label		"iout1.2"
+curr15_input		VCORE phase 2 output current.
+
+curr16_label		"iout1.3"
+curr16_input		VCORE phase 3 output current.
+
+curr17_label		"iout1.4"
+curr17_input		VCORE phase 4 output current.
+
+curr18_label		"iout1.5"
+curr18_input		VCORE phase 5 output current.
+
+curr19_label		"iout1.6"
+curr19_input		VCORE phase 6 output current.
+
+curr20_label		"iout1.7"
+curr20_input		VCORE phase 7 output current.
+
+curr21_label		"iout3"
+curr21_input		VSA output current.
+curr21_highest		Historical maximum VSA output current.
+curr21_reset_history	Write any value to reset curr21_highest.
+curr21_crit		Critical output current.
+curr21_crit_alarm	Output current critical alarm.
+curr21_max		Maximum output current.
+curr21_max_alarm	Output current high alarm.
+
+power1_label		"pin1"
+power1_input		Input power, derived from duty cycle and output current.
+power1_alarm		Input power alarm.
+
+power2_label		"pin2"
+power2_input		Input power, derived from input current sensor.
+
+power3_label		"pout"
+power3_input		Output power.
+
+temp1_input		VCORE temperature.
+temp1_crit		Critical high temperature.
+temp1_crit_alarm	Chip temperature critical high alarm.
+temp1_max		Maximum temperature.
+temp1_max_alarm		Chip temperature high alarm.
+
+temp2_input		TSENSE_0 temperature
+temp3_input		TSENSE_1 temperature
+temp4_input		TSENSE_2 temperature
+temp5_input		TSENSE_3 temperature
+
+temp6_input		VSA temperature.
+temp6_crit		Critical high temperature.
+temp6_crit_alarm	Chip temperature critical high alarm.
+temp6_max		Maximum temperature.
+temp6_max_alarm		Chip temperature high alarm.
+======================= =======================================================
diff --git a/Documentation/i2c/i2c.svg b/Documentation/i2c/i2c_bus.svg
index 5979405ad1c3..3170de976373 100644
--- a/Documentation/i2c/i2c.svg
+++ b/Documentation/i2c/i2c_bus.svg
@@ -9,7 +9,7 @@
    xmlns="http://www.w3.org/2000/svg"
    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   sodipodi:docname="i2c.svg"
+   sodipodi:docname="i2c_bus.svg"
    inkscape:version="0.92.3 (2405546, 2018-03-11)"
    version="1.1"
    id="svg2"
diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst
index ce7230025b33..136c4e333be7 100644
--- a/Documentation/i2c/summary.rst
+++ b/Documentation/i2c/summary.rst
@@ -34,7 +34,7 @@ Terminology
 Using the terminology from the official documentation, the I2C bus connects
 one or more *master* chips and one or more *slave* chips.
 
-.. kernel-figure::  i2c.svg
+.. kernel-figure::  i2c_bus.svg
    :alt:    Simple I2C bus with one master and 3 slaves
 
    Simple I2C bus
diff --git a/Documentation/ia64/irq-redir.rst b/Documentation/ia64/irq-redir.rst
index 39bf94484a15..6bbbbe4f73ef 100644
--- a/Documentation/ia64/irq-redir.rst
+++ b/Documentation/ia64/irq-redir.rst
@@ -7,7 +7,7 @@ IRQ affinity on IA64 platforms
 
 By writing to /proc/irq/IRQ#/smp_affinity the interrupt routing can be
 controlled. The behavior on IA64 platforms is slightly different from
-that described in Documentation/IRQ-affinity.txt for i386 systems.
+that described in Documentation/core-api/irq/irq-affinity.rst for i386 systems.
 
 Because of the usage of SAPIC mode and physical destination mode the
 IRQ target is one particular CPU and cannot be a mask of several
diff --git a/Documentation/iio/iio_configfs.rst b/Documentation/iio/iio_configfs.rst
index ecbfdb3afef7..6e38cbbd2981 100644
--- a/Documentation/iio/iio_configfs.rst
+++ b/Documentation/iio/iio_configfs.rst
@@ -9,7 +9,7 @@ Configfs is a filesystem-based manager of kernel objects. IIO uses some
 objects that could be easily configured using configfs (e.g.: devices,
 triggers).
 
-See Documentation/filesystems/configfs/configfs.txt for more information
+See Documentation/filesystems/configfs.rst for more information
 about how configfs works.
 
 2. Usage
diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/locking/futex-requeue-pi.rst
index 14ab5787b9a7..14ab5787b9a7 100644
--- a/Documentation/futex-requeue-pi.txt
+++ b/Documentation/locking/futex-requeue-pi.rst
diff --git a/Documentation/hwspinlock.txt b/Documentation/locking/hwspinlock.rst
index 6f03713b7003..6f03713b7003 100644
--- a/Documentation/hwspinlock.txt
+++ b/Documentation/locking/hwspinlock.rst
diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst
index 5d6800a723dc..d785878cad65 100644
--- a/Documentation/locking/index.rst
+++ b/Documentation/locking/index.rst
@@ -16,6 +16,13 @@ locking
     rt-mutex
     spinlocks
     ww-mutex-design
+    preempt-locking
+    pi-futex
+    futex-requeue-pi
+    hwspinlock
+    percpu-rw-semaphore
+    robust-futexes
+    robust-futex-ABI
 
 .. only::  subproject and html
 
diff --git a/Documentation/locking/locktorture.rst b/Documentation/locking/locktorture.rst
index 5bcb99ba7bd9..8012a74555e7 100644
--- a/Documentation/locking/locktorture.rst
+++ b/Documentation/locking/locktorture.rst
@@ -110,7 +110,7 @@ stutter
 		  same period of time.  Defaults to "stutter=5", so as
 		  to run and pause for (roughly) five-second intervals.
 		  Specifying "stutter=0" causes the test to run continuously
-		  without pausing, which is the old default behavior.
+		  without pausing.
 
 shuffle_interval
 		  The number of seconds to keep the test threads affinitied
diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst
index 09f45ce38d26..1b577a8bf982 100644
--- a/Documentation/locking/locktypes.rst
+++ b/Documentation/locking/locktypes.rst
@@ -13,6 +13,7 @@ The kernel provides a variety of locking primitives which can be divided
 into two categories:
 
  - Sleeping locks
+ - CPU local locks
  - Spinning locks
 
 This document conceptually describes these lock types and provides rules
@@ -44,9 +45,23 @@ Sleeping lock types:
 
 On PREEMPT_RT kernels, these lock types are converted to sleeping locks:
 
+ - local_lock
  - spinlock_t
  - rwlock_t
 
+
+CPU local locks
+---------------
+
+ - local_lock
+
+On non-PREEMPT_RT kernels, local_lock functions are wrappers around
+preemption and interrupt disabling primitives. Contrary to other locking
+mechanisms, disabling preemption or interrupts are pure CPU local
+concurrency control mechanisms and not suited for inter-CPU concurrency
+control.
+
+
 Spinning locks
 --------------
 
@@ -67,6 +82,7 @@ can have suffixes which apply further protections:
  _irqsave/restore()   Save and disable / restore interrupt disabled state
  ===================  ====================================================
 
+
 Owner semantics
 ===============
 
@@ -139,6 +155,56 @@ implementation, thus changing the fairness:
  writer from starving readers.
 
 
+local_lock
+==========
+
+local_lock provides a named scope to critical sections which are protected
+by disabling preemption or interrupts.
+
+On non-PREEMPT_RT kernels local_lock operations map to the preemption and
+interrupt disabling and enabling primitives:
+
+ =========================== ======================
+ local_lock(&llock)          preempt_disable()
+ local_unlock(&llock)        preempt_enable()
+ local_lock_irq(&llock)      local_irq_disable()
+ local_unlock_irq(&llock)    local_irq_enable()
+ local_lock_save(&llock)     local_irq_save()
+ local_lock_restore(&llock)  local_irq_save()
+ =========================== ======================
+
+The named scope of local_lock has two advantages over the regular
+primitives:
+
+  - The lock name allows static analysis and is also a clear documentation
+    of the protection scope while the regular primitives are scopeless and
+    opaque.
+
+  - If lockdep is enabled the local_lock gains a lockmap which allows to
+    validate the correctness of the protection. This can detect cases where
+    e.g. a function using preempt_disable() as protection mechanism is
+    invoked from interrupt or soft-interrupt context. Aside of that
+    lockdep_assert_held(&llock) works as with any other locking primitive.
+
+local_lock and PREEMPT_RT
+-------------------------
+
+PREEMPT_RT kernels map local_lock to a per-CPU spinlock_t, thus changing
+semantics:
+
+  - All spinlock_t changes also apply to local_lock.
+
+local_lock usage
+----------------
+
+local_lock should be used in situations where disabling preemption or
+interrupts is the appropriate form of concurrency control to protect
+per-CPU data structures on a non PREEMPT_RT kernel.
+
+local_lock is not suitable to protect against preemption or interrupts on a
+PREEMPT_RT kernel due to the PREEMPT_RT specific spinlock_t semantics.
+
+
 raw_spinlock_t and spinlock_t
 =============================
 
@@ -258,10 +324,82 @@ implementation, thus changing semantics:
 PREEMPT_RT caveats
 ==================
 
+local_lock on RT
+----------------
+
+The mapping of local_lock to spinlock_t on PREEMPT_RT kernels has a few
+implications. For example, on a non-PREEMPT_RT kernel the following code
+sequence works as expected::
+
+  local_lock_irq(&local_lock);
+  raw_spin_lock(&lock);
+
+and is fully equivalent to::
+
+   raw_spin_lock_irq(&lock);
+
+On a PREEMPT_RT kernel this code sequence breaks because local_lock_irq()
+is mapped to a per-CPU spinlock_t which neither disables interrupts nor
+preemption. The following code sequence works perfectly correct on both
+PREEMPT_RT and non-PREEMPT_RT kernels::
+
+  local_lock_irq(&local_lock);
+  spin_lock(&lock);
+
+Another caveat with local locks is that each local_lock has a specific
+protection scope. So the following substitution is wrong::
+
+  func1()
+  {
+    local_irq_save(flags);    -> local_lock_irqsave(&local_lock_1, flags);
+    func3();
+    local_irq_restore(flags); -> local_lock_irqrestore(&local_lock_1, flags);
+  }
+
+  func2()
+  {
+    local_irq_save(flags);    -> local_lock_irqsave(&local_lock_2, flags);
+    func3();
+    local_irq_restore(flags); -> local_lock_irqrestore(&local_lock_2, flags);
+  }
+
+  func3()
+  {
+    lockdep_assert_irqs_disabled();
+    access_protected_data();
+  }
+
+On a non-PREEMPT_RT kernel this works correctly, but on a PREEMPT_RT kernel
+local_lock_1 and local_lock_2 are distinct and cannot serialize the callers
+of func3(). Also the lockdep assert will trigger on a PREEMPT_RT kernel
+because local_lock_irqsave() does not disable interrupts due to the
+PREEMPT_RT-specific semantics of spinlock_t. The correct substitution is::
+
+  func1()
+  {
+    local_irq_save(flags);    -> local_lock_irqsave(&local_lock, flags);
+    func3();
+    local_irq_restore(flags); -> local_lock_irqrestore(&local_lock, flags);
+  }
+
+  func2()
+  {
+    local_irq_save(flags);    -> local_lock_irqsave(&local_lock, flags);
+    func3();
+    local_irq_restore(flags); -> local_lock_irqrestore(&local_lock, flags);
+  }
+
+  func3()
+  {
+    lockdep_assert_held(&local_lock);
+    access_protected_data();
+  }
+
+
 spinlock_t and rwlock_t
 -----------------------
 
-These changes in spinlock_t and rwlock_t semantics on PREEMPT_RT kernels
+The changes in spinlock_t and rwlock_t semantics on PREEMPT_RT kernels
 have a few implications.  For example, on a non-PREEMPT_RT kernel the
 following code sequence works as expected::
 
@@ -282,9 +420,61 @@ local_lock mechanism.  Acquiring the local_lock pins the task to a CPU,
 allowing things like per-CPU interrupt disabled locks to be acquired.
 However, this approach should be used only where absolutely necessary.
 
+A typical scenario is protection of per-CPU variables in thread context::
 
-raw_spinlock_t
---------------
+  struct foo *p = get_cpu_ptr(&var1);
+
+  spin_lock(&p->lock);
+  p->count += this_cpu_read(var2);
+
+This is correct code on a non-PREEMPT_RT kernel, but on a PREEMPT_RT kernel
+this breaks. The PREEMPT_RT-specific change of spinlock_t semantics does
+not allow to acquire p->lock because get_cpu_ptr() implicitly disables
+preemption. The following substitution works on both kernels::
+
+  struct foo *p;
+
+  migrate_disable();
+  p = this_cpu_ptr(&var1);
+  spin_lock(&p->lock);
+  p->count += this_cpu_read(var2);
+
+On a non-PREEMPT_RT kernel migrate_disable() maps to preempt_disable()
+which makes the above code fully equivalent. On a PREEMPT_RT kernel
+migrate_disable() ensures that the task is pinned on the current CPU which
+in turn guarantees that the per-CPU access to var1 and var2 are staying on
+the same CPU.
+
+The migrate_disable() substitution is not valid for the following
+scenario::
+
+  func()
+  {
+    struct foo *p;
+
+    migrate_disable();
+    p = this_cpu_ptr(&var1);
+    p->val = func2();
+
+While correct on a non-PREEMPT_RT kernel, this breaks on PREEMPT_RT because
+here migrate_disable() does not protect against reentrancy from a
+preempting task. A correct substitution for this case is::
+
+  func()
+  {
+    struct foo *p;
+
+    local_lock(&foo_lock);
+    p = this_cpu_ptr(&var1);
+    p->val = func2();
+
+On a non-PREEMPT_RT kernel this protects against reentrancy by disabling
+preemption. On a PREEMPT_RT kernel this is achieved by acquiring the
+underlying per-CPU spinlock.
+
+
+raw_spinlock_t on RT
+--------------------
 
 Acquiring a raw_spinlock_t disables preemption and possibly also
 interrupts, so the critical section must avoid acquiring a regular
@@ -325,22 +515,25 @@ Lock type nesting rules
 
 The most basic rules are:
 
-  - Lock types of the same lock category (sleeping, spinning) can nest
-    arbitrarily as long as they respect the general lock ordering rules to
-    prevent deadlocks.
+  - Lock types of the same lock category (sleeping, CPU local, spinning)
+    can nest arbitrarily as long as they respect the general lock ordering
+    rules to prevent deadlocks.
+
+  - Sleeping lock types cannot nest inside CPU local and spinning lock types.
 
-  - Sleeping lock types cannot nest inside spinning lock types.
+  - CPU local and spinning lock types can nest inside sleeping lock types.
 
-  - Spinning lock types can nest inside sleeping lock types.
+  - Spinning lock types can nest inside all lock types
 
 These constraints apply both in PREEMPT_RT and otherwise.
 
 The fact that PREEMPT_RT changes the lock category of spinlock_t and
-rwlock_t from spinning to sleeping means that they cannot be acquired while
-holding a raw spinlock.  This results in the following nesting ordering:
+rwlock_t from spinning to sleeping and substitutes local_lock with a
+per-CPU spinlock_t means that they cannot be acquired while holding a raw
+spinlock.  This results in the following nesting ordering:
 
   1) Sleeping locks
-  2) spinlock_t and rwlock_t
+  2) spinlock_t, rwlock_t, local_lock
   3) raw_spinlock_t and bit spinlocks
 
 Lockdep will complain if these constraints are violated, both in
diff --git a/Documentation/percpu-rw-semaphore.txt b/Documentation/locking/percpu-rw-semaphore.rst
index 247de6410855..247de6410855 100644
--- a/Documentation/percpu-rw-semaphore.txt
+++ b/Documentation/locking/percpu-rw-semaphore.rst
diff --git a/Documentation/pi-futex.txt b/Documentation/locking/pi-futex.rst
index c33ba2befbf8..c33ba2befbf8 100644
--- a/Documentation/pi-futex.txt
+++ b/Documentation/locking/pi-futex.rst
diff --git a/Documentation/preempt-locking.txt b/Documentation/locking/preempt-locking.rst
index dce336134e54..dce336134e54 100644
--- a/Documentation/preempt-locking.txt
+++ b/Documentation/locking/preempt-locking.rst
diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/locking/robust-futex-ABI.rst
index f24904f1c16f..f24904f1c16f 100644
--- a/Documentation/robust-futex-ABI.txt
+++ b/Documentation/locking/robust-futex-ABI.rst
diff --git a/Documentation/robust-futexes.txt b/Documentation/locking/robust-futexes.rst
index 6361fb01c9c1..6361fb01c9c1 100644
--- a/Documentation/robust-futexes.txt
+++ b/Documentation/locking/robust-futexes.rst
diff --git a/Documentation/locking/rt-mutex.rst b/Documentation/locking/rt-mutex.rst
index c365dc302081..3b5097a380e6 100644
--- a/Documentation/locking/rt-mutex.rst
+++ b/Documentation/locking/rt-mutex.rst
@@ -4,7 +4,7 @@ RT-mutex subsystem with PI support
 
 RT-mutexes with priority inheritance are used to support PI-futexes,
 which enable pthread_mutex_t priority inheritance attributes
-(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
+(PTHREAD_PRIO_INHERIT). [See Documentation/locking/pi-futex.rst for more details
 about PI-futexes.]
 
 This technology was developed in the -rt tree and streamlined for
diff --git a/Documentation/maintainer/maintainer-entry-profile.rst b/Documentation/maintainer/maintainer-entry-profile.rst
index 11ebe3682771..77e43c8b24b4 100644
--- a/Documentation/maintainer/maintainer-entry-profile.rst
+++ b/Documentation/maintainer/maintainer-entry-profile.rst
@@ -7,7 +7,7 @@ The Maintainer Entry Profile supplements the top-level process documents
 (submitting-patches, submitting drivers...) with
 subsystem/device-driver-local customs as well as details about the patch
 submission life-cycle. A contributor uses this document to level set
-their expectations and avoid common mistakes, maintainers may use these
+their expectations and avoid common mistakes; maintainers may use these
 profiles to look across subsystems for opportunities to converge on
 common practices.
 
@@ -26,7 +26,7 @@ Example questions to consider:
 - Does the subsystem have a patchwork instance? Are patchwork state
   changes notified?
 - Any bots or CI infrastructure that watches the list, or automated
-  testing feedback that the subsystem gates acceptance?
+  testing feedback that the subsystem uses to gate acceptance?
 - Git branches that are pulled into -next?
 - What branch should contributors submit against?
 - Links to any other Maintainer Entry Profiles? For example a
@@ -54,8 +54,8 @@ One of the common misunderstandings of submitters is that patches can be
 sent at any time before the merge window closes and can still be
 considered for the next -rc1. The reality is that most patches need to
 be settled in soaking in linux-next in advance of the merge window
-opening. Clarify for the submitter the key dates (in terms rc release
-week) that patches might considered for merging and when patches need to
+opening. Clarify for the submitter the key dates (in terms of -rc release
+week) that patches might be considered for merging and when patches need to
 wait for the next -rc. At a minimum:
 
 - Last -rc for new feature submissions:
@@ -70,8 +70,8 @@ wait for the next -rc. At a minimum:
 - Last -rc to merge features: Deadline for merge decisions
   Indicate to contributors the point at which an as yet un-applied patch
   set will need to wait for the NEXT+1 merge window. Of course there is no
-  obligation to ever except any given patchset, but if the review has not
-  concluded by this point the expectation the contributor should wait and
+  obligation to ever accept any given patchset, but if the review has not
+  concluded by this point the expectation is the contributor should wait and
   resubmit for the following merge window.
 
 Optional:
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index e1c355e84edd..eaabc3134294 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -620,7 +620,7 @@ because the CPUs that the Linux kernel supports don't do writes
 until they are certain (1) that the write will actually happen, (2)
 of the location of the write, and (3) of the value to be written.
 But please carefully read the "CONTROL DEPENDENCIES" section and the
-Documentation/RCU/rcu_dereference.txt file:  The compiler can and does
+Documentation/RCU/rcu_dereference.rst file:  The compiler can and does
 break dependencies in a great many highly creative ways.
 
 	CPU 1		      CPU 2
diff --git a/Documentation/misc-devices/index.rst b/Documentation/misc-devices/index.rst
index c1dcd2628911..1ecc05fbe6f4 100644
--- a/Documentation/misc-devices/index.rst
+++ b/Documentation/misc-devices/index.rst
@@ -21,4 +21,5 @@ fit into other categories.
    lis3lv02d
    max6875
    mic/index
+   uacce
    xilinx_sdfec
diff --git a/Documentation/networking/scaling.rst b/Documentation/networking/scaling.rst
index f78d7bf27ff5..8f0347b9fb3d 100644
--- a/Documentation/networking/scaling.rst
+++ b/Documentation/networking/scaling.rst
@@ -81,7 +81,7 @@ of queues to IRQs can be determined from /proc/interrupts. By default,
 an IRQ may be handled on any CPU. Because a non-negligible part of packet
 processing takes place in receive interrupt handling, it is advantageous
 to spread receive interrupts between CPUs. To manually adjust the IRQ
-affinity of each interrupt see Documentation/IRQ-affinity.txt. Some systems
+affinity of each interrupt see Documentation/core-api/irq/irq-affinity.rst. Some systems
 will be running irqbalance, a daemon that dynamically optimizes IRQ
 assignments and as a result may override any manual settings.
 
@@ -160,7 +160,7 @@ can be configured for each receive queue using a sysfs file entry::
 
 This file implements a bitmap of CPUs. RPS is disabled when it is zero
 (the default), in which case packets are processed on the interrupting
-CPU. Documentation/IRQ-affinity.txt explains how CPUs are assigned to
+CPU. Documentation/core-api/irq/irq-affinity.rst explains how CPUs are assigned to
 the bitmap.
 
 
diff --git a/Documentation/nvdimm/maintainer-entry-profile.rst b/Documentation/nvdimm/maintainer-entry-profile.rst
index efe37adadcea..9da748e42623 100644
--- a/Documentation/nvdimm/maintainer-entry-profile.rst
+++ b/Documentation/nvdimm/maintainer-entry-profile.rst
@@ -4,15 +4,15 @@ LIBNVDIMM Maintainer Entry Profile
 Overview
 --------
 The libnvdimm subsystem manages persistent memory across multiple
-architectures. The mailing list, is tracked by patchwork here:
+architectures. The mailing list is tracked by patchwork here:
 https://patchwork.kernel.org/project/linux-nvdimm/list/
 ...and that instance is configured to give feedback to submitters on
 patch acceptance and upstream merge. Patches are merged to either the
-'libnvdimm-fixes', or 'libnvdimm-for-next' branch. Those branches are
+'libnvdimm-fixes' or 'libnvdimm-for-next' branch. Those branches are
 available here:
 https://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git/
 
-In general patches can be submitted against the latest -rc, however if
+In general patches can be submitted against the latest -rc; however, if
 the incoming code change is dependent on other pending changes then the
 patch should be based on the libnvdimm-for-next branch. However, since
 persistent memory sits at the intersection of storage and memory there
@@ -35,12 +35,12 @@ getting the test environment set up.
 
 ACPI Device Specific Methods (_DSM)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Before patches enabling for a new _DSM family will be considered it must
+Before patches enabling a new _DSM family will be considered, it must
 be assigned a format-interface-code from the NVDIMM Sub-team of the ACPI
 Specification Working Group. In general, the stance of the subsystem is
-to push back on the proliferation of NVDIMM command sets, do strongly
+to push back on the proliferation of NVDIMM command sets, so do strongly
 consider implementing support for an existing command set. See
-drivers/acpi/nfit/nfit.h for the set of support command sets.
+drivers/acpi/nfit/nfit.h for the set of supported command sets.
 
 
 Key Cycle Dates
@@ -48,7 +48,7 @@ Key Cycle Dates
 New submissions can be sent at any time, but if they intend to hit the
 next merge window they should be sent before -rc4, and ideally
 stabilized in the libnvdimm-for-next branch by -rc6. Of course if a
-patch set requires more than 2 weeks of review -rc4 is already too late
+patch set requires more than 2 weeks of review, -rc4 is already too late
 and some patches may require multiple development cycles to review.
 
 
diff --git a/Documentation/power/pci.rst b/Documentation/power/pci.rst
index 0924d29636ad..1831e431f725 100644
--- a/Documentation/power/pci.rst
+++ b/Documentation/power/pci.rst
@@ -1004,41 +1004,39 @@ including the PCI bus type.  The flags should be set once at the driver probe
 time with the help of the dev_pm_set_driver_flags() function and they should not
 be updated directly afterwards.
 
-The DPM_FLAG_NEVER_SKIP flag prevents the PM core from using the direct-complete
-mechanism allowing device suspend/resume callbacks to be skipped if the device
-is in runtime suspend when the system suspend starts.  That also affects all of
-the ancestors of the device, so this flag should only be used if absolutely
-necessary.
-
-The DPM_FLAG_SMART_PREPARE flag instructs the PCI bus type to only return a
-positive value from pci_pm_prepare() if the ->prepare callback provided by the
+The DPM_FLAG_NO_DIRECT_COMPLETE flag prevents the PM core from using the
+direct-complete mechanism allowing device suspend/resume callbacks to be skipped
+if the device is in runtime suspend when the system suspend starts.  That also
+affects all of the ancestors of the device, so this flag should only be used if
+absolutely necessary.
+
+The DPM_FLAG_SMART_PREPARE flag causes the PCI bus type to return a positive
+value from pci_pm_prepare() only if the ->prepare callback provided by the
 driver of the device returns a positive value.  That allows the driver to opt
-out from using the direct-complete mechanism dynamically.
+out from using the direct-complete mechanism dynamically (whereas setting
+DPM_FLAG_NO_DIRECT_COMPLETE means permanent opt-out).
 
 The DPM_FLAG_SMART_SUSPEND flag tells the PCI bus type that from the driver's
 perspective the device can be safely left in runtime suspend during system
 suspend.  That causes pci_pm_suspend(), pci_pm_freeze() and pci_pm_poweroff()
-to skip resuming the device from runtime suspend unless there are PCI-specific
-reasons for doing that.  Also, it causes pci_pm_suspend_late/noirq(),
-pci_pm_freeze_late/noirq() and pci_pm_poweroff_late/noirq() to return early
-if the device remains in runtime suspend in the beginning of the "late" phase
-of the system-wide transition under way.  Moreover, if the device is in
-runtime suspend in pci_pm_resume_noirq() or pci_pm_restore_noirq(), its runtime
-power management status will be changed to "active" (as it is going to be put
-into D0 going forward), but if it is in runtime suspend in pci_pm_thaw_noirq(),
-the function will set the power.direct_complete flag for it (to make the PM core
-skip the subsequent "thaw" callbacks for it) and return.
-
-Setting the DPM_FLAG_LEAVE_SUSPENDED flag means that the driver prefers the
-device to be left in suspend after system-wide transitions to the working state.
-This flag is checked by the PM core, but the PCI bus type informs the PM core
-which devices may be left in suspend from its perspective (that happens during
-the "noirq" phase of system-wide suspend and analogous transitions) and next it
-uses the dev_pm_may_skip_resume() helper to decide whether or not to return from
-pci_pm_resume_noirq() early, as the PM core will skip the remaining resume
-callbacks for the device during the transition under way and will set its
-runtime PM status to "suspended" if dev_pm_may_skip_resume() returns "true" for
-it.
+to avoid resuming the device from runtime suspend unless there are PCI-specific
+reasons for doing that.  Also, it causes pci_pm_suspend_late/noirq() and
+pci_pm_poweroff_late/noirq() to return early if the device remains in runtime
+suspend during the "late" phase of the system-wide transition under way.
+Moreover, if the device is in runtime suspend in pci_pm_resume_noirq() or
+pci_pm_restore_noirq(), its runtime PM status will be changed to "active" (as it
+is going to be put into D0 going forward).
+
+Setting the DPM_FLAG_MAY_SKIP_RESUME flag means that the driver allows its
+"noirq" and "early" resume callbacks to be skipped if the device can be left
+in suspend after a system-wide transition into the working state.  This flag is
+taken into consideration by the PM core along with the power.may_skip_resume
+status bit of the device which is set by pci_pm_suspend_noirq() in certain
+situations.  If the PM core determines that the driver's "noirq" and "early"
+resume callbacks should be skipped, the dev_pm_skip_resume() helper function
+will return "true" and that will cause pci_pm_resume_noirq() and
+pci_pm_resume_early() to return upfront without touching the device and
+executing the driver callbacks.
 
 3.2. Device Runtime Power Management
 ------------------------------------
diff --git a/Documentation/power/suspend-and-cpuhotplug.rst b/Documentation/power/suspend-and-cpuhotplug.rst
index 572d968c5375..ebedb6c75db9 100644
--- a/Documentation/power/suspend-and-cpuhotplug.rst
+++ b/Documentation/power/suspend-and-cpuhotplug.rst
@@ -48,7 +48,7 @@ More details follow::
                                         |
                                         |
                                         v
-                              disable_nonboot_cpus()
+                              freeze_secondary_cpus()
                                    /* start */
                                         |
                                         v
@@ -83,7 +83,7 @@ More details follow::
                             Release cpu_add_remove_lock
                                         |
                                         v
-                       /* disable_nonboot_cpus() complete */
+                       /* freeze_secondary_cpus() complete */
                                         |
                                         v
                                    Do suspend
@@ -93,7 +93,7 @@ More details follow::
 Resuming back is likewise, with the counterparts being (in the order of
 execution during resume):
 
-* enable_nonboot_cpus() which involves::
+* thaw_secondary_cpus() which involves::
 
    |  Acquire cpu_add_remove_lock
    |  Decrease cpu_hotplug_disabled, thereby enabling regular cpu hotplug
diff --git a/Documentation/powerpc/cxl.rst b/Documentation/powerpc/cxl.rst
index 920546d81326..d2d77057610e 100644
--- a/Documentation/powerpc/cxl.rst
+++ b/Documentation/powerpc/cxl.rst
@@ -133,6 +133,7 @@ User API
 ========
 
 1. AFU character devices
+^^^^^^^^^^^^^^^^^^^^^^^^
 
     For AFUs operating in AFU directed mode, two character device
     files will be created. /dev/cxl/afu0.0m will correspond to a
@@ -395,6 +396,7 @@ read
 
 
 2. Card character device (powerVM guest only)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
     In a powerVM guest, an extra character device is created for the
     card. The device is only used to write (flash) a new image on the
diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst
index b3f3ee135dbe..20ea8cdee0aa 100644
--- a/Documentation/powerpc/firmware-assisted-dump.rst
+++ b/Documentation/powerpc/firmware-assisted-dump.rst
@@ -344,7 +344,7 @@ Here is the list of files under powerpc debugfs:
 
 
 NOTE:
-      Please refer to Documentation/filesystems/debugfs.txt on
+      Please refer to Documentation/filesystems/debugfs.rst on
       how to mount the debugfs filesystem.
 
 
diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst
index 1c3a840d06b9..a6b4a3a5bf3f 100644
--- a/Documentation/process/adding-syscalls.rst
+++ b/Documentation/process/adding-syscalls.rst
@@ -33,7 +33,7 @@ interface.
        to a somewhat opaque API.
 
  - If you're just exposing runtime system information, a new node in sysfs
-   (see ``Documentation/filesystems/sysfs.txt``) or the ``/proc`` filesystem may
+   (see ``Documentation/filesystems/sysfs.rst``) or the ``/proc`` filesystem may
    be more appropriate.  However, access to these mechanisms requires that the
    relevant filesystem is mounted, which might not always be the case (e.g.
    in a namespaced/sandboxed/chrooted environment).  Avoid adding any API to
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 6399d92f0b21..f07c9250c3ac 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -61,6 +61,7 @@ lack of a better place.
    botching-up-ioctls
    clang-format
    ../riscv/patch-acceptance
+   unaligned-memory-access
 
 .. only::  subproject and html
 
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index 8e56337d422d..3f8e9d5d95c2 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -107,7 +107,7 @@ and elsewhere regarding submitting Linux kernel patches.
     and why.
 
 26) If any ioctl's are added by the patch, then also update
-    ``Documentation/ioctl/ioctl-number.rst``.
+    ``Documentation/userspace-api/ioctl/ioctl-number.rst``.
 
 27) If your modified source code depends on or uses any of the kernel
     APIs or features that are related to the following ``Kconfig`` symbols,
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/process/unaligned-memory-access.rst
index 1ee82419d8aa..1ee82419d8aa 100644
--- a/Documentation/unaligned-memory-access.txt
+++ b/Documentation/process/unaligned-memory-access.rst
diff --git a/Documentation/s390/vfio-ap.rst b/Documentation/s390/vfio-ap.rst
index b5c51f7c748d..367e27ec3c50 100644
--- a/Documentation/s390/vfio-ap.rst
+++ b/Documentation/s390/vfio-ap.rst
@@ -484,7 +484,7 @@ CARD.DOMAIN TYPE  MODE
 05.00ff     CEX5A Accelerator
 =========== ===== ============
 
-Guest2
+Guest3
 ------
 =========== ===== ============
 CARD.DOMAIN TYPE  MODE
diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst
index f7504226f445..5c4b7f4f0062 100644
--- a/Documentation/scheduler/sched-domains.rst
+++ b/Documentation/scheduler/sched-domains.rst
@@ -19,10 +19,12 @@ CPUs".
 Each scheduling domain must have one or more CPU groups (struct sched_group)
 which are organised as a circular one way linked list from the ->groups
 pointer. The union of cpumasks of these groups MUST be the same as the
-domain's span. The intersection of cpumasks from any two of these groups
-MUST be the empty set. The group pointed to by the ->groups pointer MUST
-contain the CPU to which the domain belongs. Groups may be shared among
-CPUs as they contain read only data after they have been set up.
+domain's span. The group pointed to by the ->groups pointer MUST contain the CPU
+to which the domain belongs. Groups may be shared among CPUs as they contain
+read only data after they have been set up. The intersection of cpumasks from
+any two of these groups may be non empty. If this is the case the SD_OVERLAP
+flag is set on the corresponding scheduling domain and its groups may not be
+shared between CPUs.
 
 Balancing within a sched domain occurs between groups. That is, each group
 is treated as one entity. The load of a group is defined as the sum of the
diff --git a/Documentation/digsig.txt b/Documentation/security/digsig.rst
index f6a8902d3ef7..f6a8902d3ef7 100644
--- a/Documentation/digsig.txt
+++ b/Documentation/security/digsig.rst
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index fc503dd689a7..8129405eb2cc 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -15,3 +15,4 @@ Security Documentation
    self-protection
    siphash
    tpm/index
+   digsig
diff --git a/Documentation/security/lsm.rst b/Documentation/security/lsm.rst
index aadf47c808c0..6a2a2e973080 100644
--- a/Documentation/security/lsm.rst
+++ b/Documentation/security/lsm.rst
@@ -35,47 +35,50 @@ desired model of security. Linus also suggested the possibility of
 migrating the Linux capabilities code into such a module.
 
 The Linux Security Modules (LSM) project was started by WireX to develop
-such a framework. LSM is a joint development effort by several security
+such a framework. LSM was a joint development effort by several security
 projects, including Immunix, SELinux, SGI and Janus, and several
 individuals, including Greg Kroah-Hartman and James Morris, to develop a
-Linux kernel patch that implements this framework. The patch is
-currently tracking the 2.4 series and is targeted for integration into
-the 2.5 development series. This technical report provides an overview
-of the framework and the example capabilities security module provided
-by the LSM kernel patch.
+Linux kernel patch that implements this framework. The work was
+incorporated in the mainstream in December of 2003. This technical
+report provides an overview of the framework and the capabilities
+security module.
 
 LSM Framework
 =============
 
-The LSM kernel patch provides a general kernel framework to support
+The LSM framework provides a general kernel framework to support
 security modules. In particular, the LSM framework is primarily focused
 on supporting access control modules, although future development is
-likely to address other security needs such as auditing. By itself, the
+likely to address other security needs such as sandboxing. By itself, the
 framework does not provide any additional security; it merely provides
-the infrastructure to support security modules. The LSM kernel patch
-also moves most of the capabilities logic into an optional security
-module, with the system defaulting to the traditional superuser logic.
+the infrastructure to support security modules. The LSM framework is
+optional, requiring `CONFIG_SECURITY` to be enabled. The capabilities
+logic is implemented as a security module.
 This capabilities module is discussed further in
 `LSM Capabilities Module`_.
 
-The LSM kernel patch adds security fields to kernel data structures and
-inserts calls to hook functions at critical points in the kernel code to
-manage the security fields and to perform access control. It also adds
-functions for registering and unregistering security modules, and adds a
-general :c:func:`security()` system call to support new system calls
-for security-aware applications.
-
-The LSM security fields are simply ``void*`` pointers. For process and
-program execution security information, security fields were added to
+The LSM framework includes security fields in kernel data structures and
+calls to hook functions at critical points in the kernel code to
+manage the security fields and to perform access control.
+It also adds functions for registering security modules.
+An interface `/sys/kernel/security/lsm` reports a comma separated list
+of security modules that are active on the system.
+
+The LSM security fields are simply ``void*`` pointers.
+The data is referred to as a blob, which may be managed by
+the framework or by the individual security modules that use it.
+Security blobs that are used by more than one security module are
+typically managed by the framework.
+For process and
+program execution security information, security fields are included in
 :c:type:`struct task_struct <task_struct>` and
-:c:type:`struct linux_binprm <linux_binprm>`. For filesystem
-security information, a security field was added to :c:type:`struct
+:c:type:`struct cred <cred>`.
+For filesystem
+security information, a security field is included in :c:type:`struct
 super_block <super_block>`. For pipe, file, and socket security
-information, security fields were added to :c:type:`struct inode
-<inode>` and :c:type:`struct file <file>`. For packet and
-network device security information, security fields were added to
-:c:type:`struct sk_buff <sk_buff>` and :c:type:`struct
-net_device <net_device>`. For System V IPC security information,
+information, security fields are included in :c:type:`struct inode
+<inode>` and :c:type:`struct file <file>`.
+For System V IPC security information,
 security fields were added to :c:type:`struct kern_ipc_perm
 <kern_ipc_perm>` and :c:type:`struct msg_msg
 <msg_msg>`; additionally, the definitions for :c:type:`struct
@@ -84,118 +87,45 @@ were moved to header files (``include/linux/msg.h`` and
 ``include/linux/shm.h`` as appropriate) to allow the security modules to
 use these definitions.
 
-Each LSM hook is a function pointer in a global table, security_ops.
-This table is a :c:type:`struct security_operations
-<security_operations>` structure as defined by
-``include/linux/security.h``. Detailed documentation for each hook is
-included in this header file. At present, this structure consists of a
-collection of substructures that group related hooks based on the kernel
-object (e.g. task, inode, file, sk_buff, etc) as well as some top-level
-hook function pointers for system operations. This structure is likely
-to be flattened in the future for performance. The placement of the hook
-calls in the kernel code is described by the "called:" lines in the
-per-hook documentation in the header file. The hook calls can also be
-easily found in the kernel code by looking for the string
-"security_ops->".
-
-Linus mentioned per-process security hooks in his original remarks as a
-possible alternative to global security hooks. However, if LSM were to
-start from the perspective of per-process hooks, then the base framework
-would have to deal with how to handle operations that involve multiple
-processes (e.g. kill), since each process might have its own hook for
-controlling the operation. This would require a general mechanism for
-composing hooks in the base framework. Additionally, LSM would still
-need global hooks for operations that have no process context (e.g.
-network input operations). Consequently, LSM provides global security
-hooks, but a security module is free to implement per-process hooks
-(where that makes sense) by storing a security_ops table in each
-process' security field and then invoking these per-process hooks from
-the global hooks. The problem of composition is thus deferred to the
-module.
-
-The global security_ops table is initialized to a set of hook functions
-provided by a dummy security module that provides traditional superuser
-logic. A :c:func:`register_security()` function (in
-``security/security.c``) is provided to allow a security module to set
-security_ops to refer to its own hook functions, and an
-:c:func:`unregister_security()` function is provided to revert
-security_ops to the dummy module hooks. This mechanism is used to set
-the primary security module, which is responsible for making the final
-decision for each hook.
-
-LSM also provides a simple mechanism for stacking additional security
-modules with the primary security module. It defines
-:c:func:`register_security()` and
-:c:func:`unregister_security()` hooks in the :c:type:`struct
-security_operations <security_operations>` structure and
-provides :c:func:`mod_reg_security()` and
-:c:func:`mod_unreg_security()` functions that invoke these hooks
-after performing some sanity checking. A security module can call these
-functions in order to stack with other modules. However, the actual
-details of how this stacking is handled are deferred to the module,
-which can implement these hooks in any way it wishes (including always
-returning an error if it does not wish to support stacking). In this
-manner, LSM again defers the problem of composition to the module.
-
-Although the LSM hooks are organized into substructures based on kernel
-object, all of the hooks can be viewed as falling into two major
+For packet and
+network device security information, security fields were added to
+:c:type:`struct sk_buff <sk_buff>` and
+:c:type:`struct scm_cookie <scm_cookie>`.
+Unlike the other security module data, the data used here is a
+32-bit integer. The security modules are required to map or otherwise
+associate these values with real security attributes.
+
+LSM hooks are maintained in lists. A list is maintained for each
+hook, and the hooks are called in the order specified by CONFIG_LSM.
+Detailed documentation for each hook is
+included in the `include/linux/lsm_hooks.h` header file.
+
+The LSM framework provides for a close approximation of
+general security module stacking. It defines
+security_add_hooks() to which each security module passes a
+:c:type:`struct security_hooks_list <security_hooks_list>`,
+which are added to the lists.
+The LSM framework does not provide a mechanism for removing hooks that
+have been registered. The SELinux security module has implemented
+a way to remove itself, however the feature has been deprecated.
+
+The hooks can be viewed as falling into two major
 categories: hooks that are used to manage the security fields and hooks
 that are used to perform access control. Examples of the first category
-of hooks include the :c:func:`alloc_security()` and
-:c:func:`free_security()` hooks defined for each kernel data
-structure that has a security field. These hooks are used to allocate
-and free security structures for kernel objects. The first category of
-hooks also includes hooks that set information in the security field
-after allocation, such as the :c:func:`post_lookup()` hook in
-:c:type:`struct inode_security_ops <inode_security_ops>`.
-This hook is used to set security information for inodes after
-successful lookup operations. An example of the second category of hooks
-is the :c:func:`permission()` hook in :c:type:`struct
-inode_security_ops <inode_security_ops>`. This hook checks
-permission when accessing an inode.
+of hooks include the security_inode_alloc() and security_inode_free()
+These hooks are used to allocate
+and free security structures for inode objects.
+An example of the second category of hooks
+is the security_inode_permission() hook.
+This hook checks permission when accessing an inode.
 
 LSM Capabilities Module
 =======================
 
-The LSM kernel patch moves most of the existing POSIX.1e capabilities
-logic into an optional security module stored in the file
-``security/capability.c``. This change allows users who do not want to
-use capabilities to omit this code entirely from their kernel, instead
-using the dummy module for traditional superuser logic or any other
-module that they desire. This change also allows the developers of the
-capabilities logic to maintain and enhance their code more freely,
-without needing to integrate patches back into the base kernel.
-
-In addition to moving the capabilities logic, the LSM kernel patch could
-move the capability-related fields from the kernel data structures into
-the new security fields managed by the security modules. However, at
-present, the LSM kernel patch leaves the capability fields in the kernel
-data structures. In his original remarks, Linus suggested that this
-might be preferable so that other security modules can be easily stacked
-with the capabilities module without needing to chain multiple security
-structures on the security field. It also avoids imposing extra overhead
-on the capabilities module to manage the security fields. However, the
-LSM framework could certainly support such a move if it is determined to
-be desirable, with only a few additional changes described below.
-
-At present, the capabilities logic for computing process capabilities on
-:c:func:`execve()` and :c:func:`set\*uid()`, checking
-capabilities for a particular process, saving and checking capabilities
-for netlink messages, and handling the :c:func:`capget()` and
-:c:func:`capset()` system calls have been moved into the
-capabilities module. There are still a few locations in the base kernel
-where capability-related fields are directly examined or modified, but
-the current version of the LSM patch does allow a security module to
-completely replace the assignment and testing of capabilities. These few
-locations would need to be changed if the capability-related fields were
-moved into the security field. The following is a list of known
-locations that still perform such direct examination or modification of
-capability-related fields:
-
--  ``fs/open.c``::c:func:`sys_access()`
-
--  ``fs/lockd/host.c``::c:func:`nlm_bind_host()`
-
--  ``fs/nfsd/auth.c``::c:func:`nfsd_setuser()`
-
--  ``fs/proc/array.c``::c:func:`task_cap()`
+The POSIX.1e capabilities logic is maintained as a security module
+stored in the file ``security/commoncap.c``. The capabilities
+module uses the order field of the :c:type:`lsm_info` description
+to identify it as the first security module to be registered.
+The capabilities security module does not use the general security
+blobs, unlike other modules. The reasons are historical and are
+based on overhead, complexity and performance concerns.
diff --git a/Documentation/security/siphash.rst b/Documentation/security/siphash.rst
index 4eba68cdf0a1..bd9363025fcb 100644
--- a/Documentation/security/siphash.rst
+++ b/Documentation/security/siphash.rst
@@ -7,7 +7,7 @@ SipHash - a short input PRF
 SipHash is a cryptographically secure PRF -- a keyed hash function -- that
 performs very well for short inputs, hence the name. It was designed by
 cryptographers Daniel J. Bernstein and Jean-Philippe Aumasson. It is intended
-as a replacement for some uses of: `jhash`, `md5_transform`, `sha_transform`,
+as a replacement for some uses of: `jhash`, `md5_transform`, `sha1_transform`,
 and so forth.
 
 SipHash takes a secret key filled with randomly generated numbers and either
diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt
index 14e29a0ae480..489f6626de67 100644
--- a/Documentation/sphinx/requirements.txt
+++ b/Documentation/sphinx/requirements.txt
@@ -1,3 +1,3 @@
 docutils
-Sphinx==1.7.9
+Sphinx==2.4.4
 sphinx_rtd_theme
diff --git a/Documentation/trace/coresight/coresight-ect.rst b/Documentation/trace/coresight/coresight-ect.rst
index ecc1e57012ef..a93e52abcf46 100644
--- a/Documentation/trace/coresight/coresight-ect.rst
+++ b/Documentation/trace/coresight/coresight-ect.rst
@@ -1,4 +1,5 @@
 .. SPDX-License-Identifier: GPL-2.0
+
 =============================================
 CoreSight Embedded Cross Trigger (CTI & CTM).
 =============================================
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 4a2ebe0bd19b..f792b1959a33 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -527,8 +527,8 @@ The following commands are supported:
 
   See Documentation/trace/histogram.rst for details and examples.
 
-6.3 In-kernel trace event API
------------------------------
+7. In-kernel trace event API
+============================
 
 In most cases, the command-line interface to trace events is more than
 sufficient.  Sometimes, however, applications might find the need for
@@ -560,8 +560,8 @@ following:
   - tracing synthetic events from in-kernel code
   - the low-level "dynevent_cmd" API
 
-6.3.1 Dyamically creating synthetic event definitions
------------------------------------------------------
+7.1 Dyamically creating synthetic event definitions
+---------------------------------------------------
 
 There are a couple ways to create a new synthetic event from a kernel
 module or other kernel code.
@@ -666,8 +666,8 @@ registered by calling the synth_event_gen_cmd_end() function::
 At this point, the event object is ready to be used for tracing new
 events.
 
-6.3.3 Tracing synthetic events from in-kernel code
---------------------------------------------------
+7.2 Tracing synthetic events from in-kernel code
+------------------------------------------------
 
 To trace a synthetic event, there are several options.  The first
 option is to trace the event in one call, using synth_event_trace()
@@ -678,8 +678,8 @@ synth_event_trace_start() and synth_event_trace_end() along with
 synth_event_add_next_val() or synth_event_add_val() to add the values
 piecewise.
 
-6.3.3.1 Tracing a synthetic event all at once
----------------------------------------------
+7.2.1 Tracing a synthetic event all at once
+-------------------------------------------
 
 To trace a synthetic event all at once, the synth_event_trace() or
 synth_event_trace_array() functions can be used.
@@ -780,8 +780,8 @@ remove the event::
 
        ret = synth_event_delete("schedtest");
 
-6.3.3.1 Tracing a synthetic event piecewise
--------------------------------------------
+7.2.2 Tracing a synthetic event piecewise
+-----------------------------------------
 
 To trace a synthetic using the piecewise method described above, the
 synth_event_trace_start() function is used to 'open' the synthetic
@@ -864,8 +864,8 @@ Note that synth_event_trace_end() must be called at the end regardless
 of whether any of the add calls failed (say due to a bad field name
 being passed in).
 
-6.3.4 Dyamically creating kprobe and kretprobe event definitions
-----------------------------------------------------------------
+7.3 Dyamically creating kprobe and kretprobe event definitions
+--------------------------------------------------------------
 
 To create a kprobe or kretprobe trace event from kernel code, the
 kprobe_event_gen_cmd_start() or kretprobe_event_gen_cmd_start()
@@ -941,8 +941,8 @@ used to give the kprobe event file back and delete the event::
 
   ret = kprobe_event_delete("gen_kprobe_test");
 
-6.3.4 The "dynevent_cmd" low-level API
---------------------------------------
+7.4 The "dynevent_cmd" low-level API
+------------------------------------
 
 Both the in-kernel synthetic event and kprobe interfaces are built on
 top of a lower-level "dynevent_cmd" interface.  This interface is
diff --git a/Documentation/trace/ftrace-design.rst b/Documentation/trace/ftrace-design.rst
index a8e22e0db63c..6893399157f0 100644
--- a/Documentation/trace/ftrace-design.rst
+++ b/Documentation/trace/ftrace-design.rst
@@ -229,14 +229,6 @@ Adding support for it is easy: just define the macro in asm/ftrace.h and
 pass the return address pointer as the 'retp' argument to
 ftrace_push_return_trace().
 
-HAVE_FTRACE_NMI_ENTER
----------------------
-
-If you can't trace NMI functions, then skip this option.
-
-<details to be filled>
-
-
 HAVE_SYSCALL_TRACEPOINTS
 ------------------------
 
diff --git a/Documentation/translations/it_IT/doc-guide/kernel-doc.rst b/Documentation/translations/it_IT/doc-guide/kernel-doc.rst
index a4ecd8f27631..524ad86cadbb 100644
--- a/Documentation/translations/it_IT/doc-guide/kernel-doc.rst
+++ b/Documentation/translations/it_IT/doc-guide/kernel-doc.rst
@@ -515,6 +515,22 @@ internal: *[source-pattern ...]*
     .. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
        :internal:
 
+identifiers: *[ function/type ...]*
+  Include la documentazione per ogni *function* e *type*  in *source*.
+  Se non vengono esplicitamente specificate le funzioni da includere, allora
+  verranno incluse tutte quelle disponibili in *source*.
+
+  Esempi::
+
+    .. kernel-doc:: lib/bitmap.c
+       :identifiers: bitmap_parselist bitmap_parselist_user
+
+    .. kernel-doc:: lib/idr.c
+       :identifiers:
+
+functions: *[ function ...]*
+  Questo è uno pseudonimo, deprecato, per la direttiva 'identifiers'.
+
 doc: *title*
   Include la documentazione del paragrafo ``DOC:`` identificato dal titolo
   (*title*) all'interno del file sorgente (*source*). Gli spazi in *title* sono
@@ -528,15 +544,6 @@ doc: *title*
     .. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
        :doc: High Definition Audio over HDMI and Display Port
 
-functions: *function* *[...]*
-  Dal file sorgente (*source*) include la documentazione per le funzioni
-  elencate (*function*).
-
-  Esempio::
-
-    .. kernel-doc:: lib/bitmap.c
-       :functions: bitmap_parselist bitmap_parselist_user
-
 Senza alcuna opzione, la direttiva kernel-doc include tutti i commenti di
 documentazione presenti nel file sorgente (*source*).
 
diff --git a/Documentation/translations/it_IT/kernel-hacking/hacking.rst b/Documentation/translations/it_IT/kernel-hacking/hacking.rst
index 24c592852bf1..6aab27a8d323 100644
--- a/Documentation/translations/it_IT/kernel-hacking/hacking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/hacking.rst
@@ -627,6 +627,24 @@ Alcuni manutentori e sviluppatori potrebbero comunque richiedere
 :c:func:`EXPORT_SYMBOL_GPL()` quando si aggiungono nuove funzionalità o
 interfacce.
 
+:c:func:`EXPORT_SYMBOL_NS()`
+----------------------------
+
+Definita in ``include/linux/export.h``
+
+Questa è una variate di `EXPORT_SYMBOL()` che permette di specificare uno
+spazio dei nomi. Lo spazio dei nomi è documentato in
+:doc:`../core-api/symbol-namespaces`
+
+:c:func:`EXPORT_SYMBOL_NS_GPL()`
+--------------------------------
+
+Definita in ``include/linux/export.h``
+
+Questa è una variate di `EXPORT_SYMBOL_GPL()` che permette di specificare uno
+spazio dei nomi. Lo spazio dei nomi è documentato in
+:doc:`../core-api/symbol-namespaces`
+
 Procedure e convenzioni
 =======================
 
diff --git a/Documentation/translations/it_IT/kernel-hacking/locking.rst b/Documentation/translations/it_IT/kernel-hacking/locking.rst
index b9a6be4b8499..4615df5723fb 100644
--- a/Documentation/translations/it_IT/kernel-hacking/locking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/locking.rst
@@ -159,17 +159,17 @@ Sincronizzazione in contesto utente
 Se avete una struttura dati che verrà utilizzata solo dal contesto utente,
 allora, per proteggerla, potete utilizzare un semplice mutex
 (``include/linux/mutex.h``). Questo è il caso più semplice: inizializzate il
-mutex; invocate :c:func:`mutex_lock_interruptible()` per trattenerlo e
-:c:func:`mutex_unlock()` per rilasciarlo. C'è anche :c:func:`mutex_lock()`
+mutex; invocate mutex_lock_interruptible() per trattenerlo e
+mutex_unlock() per rilasciarlo. C'è anche mutex_lock()
 ma questa dovrebbe essere evitata perché non ritorna in caso di segnali.
 
 Per esempio: ``net/netfilter/nf_sockopt.c`` permette la registrazione
-di nuove chiamate per :c:func:`setsockopt()` e :c:func:`getsockopt()`
-usando la funzione :c:func:`nf_register_sockopt()`. La registrazione e
+di nuove chiamate per setsockopt() e getsockopt()
+usando la funzione nf_register_sockopt(). La registrazione e
 la rimozione vengono eseguite solamente quando il modulo viene caricato
 o scaricato (e durante l'avvio del sistema, qui non abbiamo concorrenza),
 e la lista delle funzioni registrate viene consultata solamente quando
-:c:func:`setsockopt()` o :c:func:`getsockopt()` sono sconosciute al sistema.
+setsockopt() o getsockopt() sono sconosciute al sistema.
 In questo caso ``nf_sockopt_mutex`` è perfetto allo scopo, in particolar modo
 visto che setsockopt e getsockopt potrebbero dormire.
 
@@ -179,19 +179,19 @@ Sincronizzazione fra il contesto utente e i softirq
 Se un softirq condivide dati col contesto utente, avete due problemi.
 Primo, il contesto utente corrente potrebbe essere interroto da un softirq,
 e secondo, la sezione critica potrebbe essere eseguita da un altro
-processore. Questo è quando :c:func:`spin_lock_bh()`
+processore. Questo è quando spin_lock_bh()
 (``include/linux/spinlock.h``) viene utilizzato. Questo disabilita i softirq
-sul processore e trattiene il *lock*. Invece, :c:func:`spin_unlock_bh()` fa
+sul processore e trattiene il *lock*. Invece, spin_unlock_bh() fa
 l'opposto. (Il suffisso '_bh' è un residuo storico che fa riferimento al
 "Bottom Halves", il vecchio nome delle interruzioni software. In un mondo
 perfetto questa funzione si chiamerebbe 'spin_lock_softirq()').
 
-Da notare che in questo caso potete utilizzare anche :c:func:`spin_lock_irq()`
-o :c:func:`spin_lock_irqsave()`, queste fermano anche le interruzioni hardware:
+Da notare che in questo caso potete utilizzare anche spin_lock_irq()
+o spin_lock_irqsave(), queste fermano anche le interruzioni hardware:
 vedere :ref:`Contesto di interruzione hardware <it_hardirq-context>`.
 
 Questo funziona alla perfezione anche sui sistemi monoprocessore: gli spinlock
-svaniscono e questa macro diventa semplicemente :c:func:`local_bh_disable()`
+svaniscono e questa macro diventa semplicemente local_bh_disable()
 (``include/linux/interrupt.h``), la quale impedisce ai softirq d'essere
 eseguiti.
 
@@ -224,8 +224,8 @@ Differenti tasklet/timer
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 Se un altro tasklet/timer vuole condividere dati col vostro tasklet o timer,
-allora avrete bisogno entrambe di :c:func:`spin_lock()` e
-:c:func:`spin_unlock()`. Qui :c:func:`spin_lock_bh()` è inutile, siete già
+allora avrete bisogno entrambe di spin_lock() e
+spin_unlock(). Qui spin_lock_bh() è inutile, siete già
 in un tasklet ed avete la garanzia che nessun altro verrà eseguito sullo
 stesso processore.
 
@@ -243,13 +243,13 @@ processore (vedere :ref:`Dati per processore <it_per-cpu>`). Se siete arrivati
 fino a questo punto nell'uso dei softirq, probabilmente tenete alla scalabilità
 delle prestazioni abbastanza da giustificarne la complessità aggiuntiva.
 
-Dovete utilizzare :c:func:`spin_lock()` e :c:func:`spin_unlock()` per
+Dovete utilizzare spin_lock() e spin_unlock() per
 proteggere i dati condivisi.
 
 Diversi Softirqs
 ~~~~~~~~~~~~~~~~
 
-Dovete utilizzare :c:func:`spin_lock()` e :c:func:`spin_unlock()` per
+Dovete utilizzare spin_lock() e spin_unlock() per
 proteggere i dati condivisi, che siano timer, tasklet, diversi softirq o
 lo stesso o altri softirq: uno qualsiasi di essi potrebbe essere in esecuzione
 su un diverso processore.
@@ -270,40 +270,40 @@ Se un gestore di interruzioni hardware condivide dati con un softirq, allora
 avrete due preoccupazioni. Primo, il softirq può essere interrotto da
 un'interruzione hardware, e secondo, la sezione critica potrebbe essere
 eseguita da un'interruzione hardware su un processore diverso. Questo è il caso
-dove :c:func:`spin_lock_irq()` viene utilizzato. Disabilita le interruzioni
-sul processore che l'esegue, poi trattiene il lock. :c:func:`spin_unlock_irq()`
+dove spin_lock_irq() viene utilizzato. Disabilita le interruzioni
+sul processore che l'esegue, poi trattiene il lock. spin_unlock_irq()
 fa l'opposto.
 
-Il gestore d'interruzione hardware non usa :c:func:`spin_lock_irq()` perché
-i softirq non possono essere eseguiti quando il gestore d'interruzione hardware
-è in esecuzione: per questo si può usare :c:func:`spin_lock()`, che è un po'
+Il gestore d'interruzione hardware non ha bisogno di usare spin_lock_irq()
+perché i softirq non possono essere eseguiti quando il gestore d'interruzione
+hardware è in esecuzione: per questo si può usare spin_lock(), che è un po'
 più veloce. L'unica eccezione è quando un altro gestore d'interruzioni
-hardware utilizza lo stesso *lock*: :c:func:`spin_lock_irq()` impedirà a questo
+hardware utilizza lo stesso *lock*: spin_lock_irq() impedirà a questo
 secondo gestore di interrompere quello in esecuzione.
 
 Questo funziona alla perfezione anche sui sistemi monoprocessore: gli spinlock
-svaniscono e questa macro diventa semplicemente :c:func:`local_irq_disable()`
+svaniscono e questa macro diventa semplicemente local_irq_disable()
 (``include/asm/smp.h``), la quale impedisce a softirq/tasklet/BH d'essere
 eseguiti.
 
-:c:func:`spin_lock_irqsave()` (``include/linux/spinlock.h``) è una variante che
+spin_lock_irqsave() (``include/linux/spinlock.h``) è una variante che
 salva lo stato delle interruzioni in una variabile, questa verrà poi passata
-a :c:func:`spin_unlock_irqrestore()`. Questo significa che lo stesso codice
+a spin_unlock_irqrestore(). Questo significa che lo stesso codice
 potrà essere utilizzato in un'interruzione hardware (dove le interruzioni sono
 già disabilitate) e in un softirq (dove la disabilitazione delle interruzioni
 è richiesta).
 
 Da notare che i softirq (e quindi tasklet e timer) sono eseguiti al ritorno
-da un'interruzione hardware, quindi :c:func:`spin_lock_irq()` interrompe
+da un'interruzione hardware, quindi spin_lock_irq() interrompe
 anche questi. Tenuto conto di questo si può dire che
-:c:func:`spin_lock_irqsave()` è la funzione di sincronizzazione più generica
+spin_lock_irqsave() è la funzione di sincronizzazione più generica
 e potente.
 
 Sincronizzazione fra due gestori d'interruzioni hardware
 --------------------------------------------------------
 
 Condividere dati fra due gestori di interruzione hardware è molto raro, ma se
-succede, dovreste usare :c:func:`spin_lock_irqsave()`: è una specificità
+succede, dovreste usare spin_lock_irqsave(): è una specificità
 dell'architettura il fatto che tutte le interruzioni vengano interrotte
 quando si eseguono di gestori di interruzioni.
 
@@ -317,11 +317,11 @@ Pete Zaitcev ci offre il seguente riassunto:
    il mutex e dormire (``copy_from_user*(`` o ``kmalloc(x,GFP_KERNEL)``).
 
 -  Altrimenti (== i dati possono essere manipolati da un'interruzione) usate
-   :c:func:`spin_lock_irqsave()` e :c:func:`spin_unlock_irqrestore()`.
+   spin_lock_irqsave() e spin_unlock_irqrestore().
 
 -  Evitate di trattenere uno spinlock per più di 5 righe di codice incluse
    le chiamate a funzione (ad eccezione di quell per l'accesso come
-   :c:func:`readb()`).
+   readb()).
 
 Tabella dei requisiti minimi
 ----------------------------
@@ -334,7 +334,7 @@ processore alla volta, ma se deve condividere dati con un altro thread, allora
 la sincronizzazione è necessaria).
 
 Ricordatevi il suggerimento qui sopra: potete sempre usare
-:c:func:`spin_lock_irqsave()`, che è un sovrainsieme di tutte le altre funzioni
+spin_lock_irqsave(), che è un sovrainsieme di tutte le altre funzioni
 per spinlock.
 
 ============== ============= ============= ========= ========= ========= ========= ======= ======= ============== ==============
@@ -378,13 +378,13 @@ protetti dal *lock* quando qualche altro thread lo sta già facendo
 trattenendo il *lock*. Potrete acquisire il *lock* più tardi se vi
 serve accedere ai dati protetti da questo *lock*.
 
-La funzione :c:func:`spin_trylock()` non ritenta di acquisire il *lock*,
+La funzione spin_trylock() non ritenta di acquisire il *lock*,
 se ci riesce al primo colpo ritorna un valore diverso da zero, altrimenti
 se fallisce ritorna 0. Questa funzione può essere utilizzata in un qualunque
-contesto, ma come :c:func:`spin_lock()`: dovete disabilitare i contesti che
+contesto, ma come spin_lock(): dovete disabilitare i contesti che
 potrebbero interrompervi e quindi trattenere lo spinlock.
 
-La funzione :c:func:`mutex_trylock()` invece di sospendere il vostro processo
+La funzione mutex_trylock() invece di sospendere il vostro processo
 ritorna un valore diverso da zero se è possibile trattenere il lock al primo
 colpo, altrimenti se fallisce ritorna 0. Nonostante non dorma, questa funzione
 non può essere usata in modo sicuro in contesti di interruzione hardware o
@@ -506,7 +506,7 @@ della memoria che il suo contenuto sono protetti dal *lock*. Questo
 caso è semplice dato che copiamo i dati dall'utente e non permettiamo
 mai loro di accedere direttamente agli oggetti.
 
-C'è una piccola ottimizzazione qui: nella funzione :c:func:`cache_add()`
+C'è una piccola ottimizzazione qui: nella funzione cache_add()
 impostiamo i campi dell'oggetto prima di acquisire il *lock*. Questo è
 sicuro perché nessun altro potrà accedervi finché non lo inseriremo
 nella memoria.
@@ -514,7 +514,7 @@ nella memoria.
 Accesso dal contesto utente
 ---------------------------
 
-Ora consideriamo il caso in cui :c:func:`cache_find()` può essere invocata
+Ora consideriamo il caso in cui cache_find() può essere invocata
 dal contesto d'interruzione: sia hardware che software. Un esempio potrebbe
 essere un timer che elimina oggetti dalla memoria.
 
@@ -583,15 +583,15 @@ sono quelle rimosse, mentre quelle ``+`` sono quelle aggiunte.
              return ret;
      }
 
-Da notare che :c:func:`spin_lock_irqsave()` disabiliterà le interruzioni
+Da notare che spin_lock_irqsave() disabiliterà le interruzioni
 se erano attive, altrimenti non farà niente (quando siamo già in un contesto
 d'interruzione); dunque queste funzioni possono essere chiamante in
 sicurezza da qualsiasi contesto.
 
-Sfortunatamente, :c:func:`cache_add()` invoca :c:func:`kmalloc()` con
+Sfortunatamente, cache_add() invoca kmalloc() con
 l'opzione ``GFP_KERNEL`` che è permessa solo in contesto utente. Ho supposto
-che :c:func:`cache_add()` venga chiamata dal contesto utente, altrimenti
-questa opzione deve diventare un parametro di :c:func:`cache_add()`.
+che cache_add() venga chiamata dal contesto utente, altrimenti
+questa opzione deve diventare un parametro di cache_add().
 
 Esporre gli oggetti al di fuori del file
 ----------------------------------------
@@ -610,7 +610,7 @@ Il secondo problema è il problema del ciclo di vita: se un'altra struttura
 mantiene un puntatore ad un oggetto, presumibilmente si aspetta che questo
 puntatore rimanga valido. Sfortunatamente, questo è garantito solo mentre
 si trattiene il *lock*, altrimenti qualcuno potrebbe chiamare
-:c:func:`cache_delete()` o peggio, aggiungere un oggetto che riutilizza lo
+cache_delete() o peggio, aggiungere un oggetto che riutilizza lo
 stesso indirizzo.
 
 Dato che c'è un solo *lock*, non potete trattenerlo a vita: altrimenti
@@ -710,9 +710,9 @@ Ecco il codice::
      }
 
 Abbiamo incapsulato il contatore di riferimenti nelle tipiche funzioni
-di 'get' e 'put'. Ora possiamo ritornare l'oggetto da :c:func:`cache_find()`
+di 'get' e 'put'. Ora possiamo ritornare l'oggetto da cache_find()
 col vantaggio che l'utente può dormire trattenendo l'oggetto (per esempio,
-:c:func:`copy_to_user()` per copiare il nome verso lo spazio utente).
+copy_to_user() per copiare il nome verso lo spazio utente).
 
 Un altro punto da notare è che ho detto che il contatore dovrebbe incrementarsi
 per ogni puntatore ad un oggetto: quindi il contatore di riferimenti è 1
@@ -727,8 +727,8 @@ Ci sono un certo numbero di operazioni atomiche definite
 in ``include/asm/atomic.h``: queste sono garantite come atomiche su qualsiasi
 processore del sistema, quindi non sono necessari i *lock*. In questo caso è
 più semplice rispetto all'uso degli spinlock, benché l'uso degli spinlock
-sia più elegante per casi non banali. Le funzioni :c:func:`atomic_inc()` e
-:c:func:`atomic_dec_and_test()` vengono usate al posto dei tipici operatori di
+sia più elegante per casi non banali. Le funzioni atomic_inc() e
+atomic_dec_and_test() vengono usate al posto dei tipici operatori di
 incremento e decremento, e i *lock* non sono più necessari per proteggere il
 contatore stesso.
 
@@ -820,7 +820,7 @@ al nome di cambiare abbiamo tre possibilità:
 -  Si può togliere static da ``cache_lock`` e dire agli utenti che devono
    trattenere il *lock* prima di modificare il nome di un oggetto.
 
--  Si può fornire una funzione :c:func:`cache_obj_rename()` che prende il
+-  Si può fornire una funzione cache_obj_rename() che prende il
    *lock* e cambia il nome per conto del chiamante; si dirà poi agli utenti
    di usare questa funzione.
 
@@ -878,11 +878,11 @@ Da notare che ho deciso che il contatore di popolarità dovesse essere
 protetto da ``cache_lock`` piuttosto che dal *lock* dell'oggetto; questo
 perché è logicamente parte dell'infrastruttura (come
 :c:type:`struct list_head <list_head>` nell'oggetto). In questo modo,
-in :c:func:`__cache_add()`, non ho bisogno di trattenere il *lock* di ogni
+in __cache_add(), non ho bisogno di trattenere il *lock* di ogni
 oggetto mentre si cerca il meno popolare.
 
 Ho anche deciso che il campo id è immutabile, quindi non ho bisogno di
-trattenere il lock dell'oggetto quando si usa :c:func:`__cache_find()`
+trattenere il lock dell'oggetto quando si usa __cache_find()
 per leggere questo campo; il *lock* dell'oggetto è usato solo dal chiamante
 che vuole leggere o scrivere il campo name.
 
@@ -907,7 +907,7 @@ Questo è facile da diagnosticare: non è uno di quei problemi che ti tengono
 sveglio 5 notti a parlare da solo.
 
 Un caso un pochino più complesso; immaginate d'avere una spazio condiviso
-fra un softirq ed il contesto utente. Se usate :c:func:`spin_lock()` per
+fra un softirq ed il contesto utente. Se usate spin_lock() per
 proteggerlo, il contesto utente potrebbe essere interrotto da un softirq
 mentre trattiene il lock, da qui il softirq rimarrà in attesa attiva provando
 ad acquisire il *lock* già trattenuto nel contesto utente.
@@ -1006,12 +1006,12 @@ potreste fare come segue::
             spin_unlock_bh(&list_lock);
 
 Primo o poi, questo esploderà su un sistema multiprocessore perché un
-temporizzatore potrebbe essere già partiro prima di :c:func:`spin_lock_bh()`,
-e prenderà il *lock* solo dopo :c:func:`spin_unlock_bh()`, e cercherà
+temporizzatore potrebbe essere già partiro prima di spin_lock_bh(),
+e prenderà il *lock* solo dopo spin_unlock_bh(), e cercherà
 di eliminare il suo oggetto (che però è già stato eliminato).
 
 Questo può essere evitato controllando il valore di ritorno di
-:c:func:`del_timer()`: se ritorna 1, il temporizzatore è stato già
+del_timer(): se ritorna 1, il temporizzatore è stato già
 rimosso. Se 0, significa (in questo caso) che il temporizzatore è in
 esecuzione, quindi possiamo fare come segue::
 
@@ -1032,9 +1032,9 @@ esecuzione, quindi possiamo fare come segue::
                     spin_unlock_bh(&list_lock);
 
 Un altro problema è l'eliminazione dei temporizzatori che si riavviano
-da soli (chiamando :c:func:`add_timer()` alla fine della loro esecuzione).
+da soli (chiamando add_timer() alla fine della loro esecuzione).
 Dato che questo è un problema abbastanza comune con una propensione
-alle corse critiche, dovreste usare :c:func:`del_timer_sync()`
+alle corse critiche, dovreste usare del_timer_sync()
 (``include/linux/timer.h``) per gestire questo caso. Questa ritorna il
 numero di volte che il temporizzatore è stato interrotto prima che
 fosse in grado di fermarlo senza che si riavviasse.
@@ -1116,7 +1116,7 @@ chiamata ``list``::
             wmb();
             list->next = new;
 
-La funzione :c:func:`wmb()` è una barriera di sincronizzazione delle
+La funzione wmb() è una barriera di sincronizzazione delle
 scritture. Questa garantisce che la prima operazione (impostare l'elemento
 ``next`` del nuovo elemento) venga completata e vista da tutti i processori
 prima che venga eseguita la seconda operazione (che sarebbe quella di mettere
@@ -1127,7 +1127,7 @@ completamente il nuovo elemento; oppure che lo vedano correttamente e quindi
 il puntatore ``next`` deve puntare al resto della lista.
 
 Fortunatamente, c'è una funzione che fa questa operazione sulle liste
-:c:type:`struct list_head <list_head>`: :c:func:`list_add_rcu()`
+:c:type:`struct list_head <list_head>`: list_add_rcu()
 (``include/linux/list.h``).
 
 Rimuovere un elemento dalla lista è anche più facile: sostituiamo il puntatore
@@ -1138,7 +1138,7 @@ l'elemento o lo salteranno.
 
             list->next = old->next;
 
-La funzione :c:func:`list_del_rcu()` (``include/linux/list.h``) fa esattamente
+La funzione list_del_rcu() (``include/linux/list.h``) fa esattamente
 questo (la versione normale corrompe il vecchio oggetto, e non vogliamo che
 accada).
 
@@ -1146,9 +1146,9 @@ Anche i lettori devono stare attenti: alcuni processori potrebbero leggere
 attraverso il puntatore ``next`` il contenuto dell'elemento successivo
 troppo presto, ma non accorgersi che il contenuto caricato è sbagliato quando
 il puntatore ``next`` viene modificato alla loro spalle. Ancora una volta
-c'è una funzione che viene in vostro aiuto :c:func:`list_for_each_entry_rcu()`
+c'è una funzione che viene in vostro aiuto list_for_each_entry_rcu()
 (``include/linux/list.h``). Ovviamente, gli scrittori possono usare
-:c:func:`list_for_each_entry()` dato che non ci possono essere due scrittori
+list_for_each_entry() dato che non ci possono essere due scrittori
 in contemporanea.
 
 Il nostro ultimo dilemma è il seguente: quando possiamo realmente distruggere
@@ -1156,15 +1156,15 @@ l'elemento rimosso? Ricordate, un lettore potrebbe aver avuto accesso a questo
 elemento proprio ora: se eliminiamo questo elemento ed il puntatore ``next``
 cambia, il lettore salterà direttamente nella spazzatura e scoppierà. Dobbiamo
 aspettare finché tutti i lettori che stanno attraversando la lista abbiano
-finito. Utilizziamo :c:func:`call_rcu()` per registrare una funzione di
+finito. Utilizziamo call_rcu() per registrare una funzione di
 richiamo che distrugga l'oggetto quando tutti i lettori correnti hanno
 terminato. In alternative, potrebbe essere usata la funzione
-:c:func:`synchronize_rcu()` che blocca l'esecuzione finché tutti i lettori
+synchronize_rcu() che blocca l'esecuzione finché tutti i lettori
 non terminano di ispezionare la lista.
 
 Ma come fa l'RCU a sapere quando i lettori sono finiti? Il meccanismo è
 il seguente: innanzi tutto i lettori accedono alla lista solo fra la coppia
-:c:func:`rcu_read_lock()`/:c:func:`rcu_read_unlock()` che disabilita la
+rcu_read_lock()/rcu_read_unlock() che disabilita la
 prelazione così che i lettori non vengano sospesi mentre stanno leggendo
 la lista.
 
@@ -1253,12 +1253,12 @@ codice RCU è un po' più ottimizzato di così, ma questa è l'idea di fondo.
      }
 
 Da notare che i lettori modificano il campo popularity nella funzione
-:c:func:`__cache_find()`, e ora non trattiene alcun *lock*. Una soluzione
+__cache_find(), e ora non trattiene alcun *lock*. Una soluzione
 potrebbe essere quella di rendere la variabile ``atomic_t``, ma per l'uso
 che ne abbiamo fatto qui, non ci interessano queste corse critiche perché un
 risultato approssimativo è comunque accettabile, quindi non l'ho cambiato.
 
-Il risultato è che la funzione :c:func:`cache_find()` non ha bisogno di alcuna
+Il risultato è che la funzione cache_find() non ha bisogno di alcuna
 sincronizzazione con le altre funzioni, quindi è veloce su un sistema
 multi-processore tanto quanto lo sarebbe su un sistema mono-processore.
 
@@ -1271,9 +1271,9 @@ riferimenti.
 
 Ora, dato che il '*lock* di lettura' di un RCU non fa altro che disabilitare
 la prelazione, un chiamante che ha sempre la prelazione disabilitata fra le
-chiamate :c:func:`cache_find()` e :c:func:`object_put()` non necessita
+chiamate cache_find() e object_put() non necessita
 di incrementare e decrementare il contatore di riferimenti. Potremmo
-esporre la funzione :c:func:`__cache_find()` dichiarandola non-static,
+esporre la funzione __cache_find() dichiarandola non-static,
 e quel chiamante potrebbe usare direttamente questa funzione.
 
 Il beneficio qui sta nel fatto che il contatore di riferimenti no
@@ -1293,10 +1293,10 @@ singolo contatore. Facile e pulito.
 Se questo dovesse essere troppo lento (solitamente non lo è, ma se avete
 dimostrato che lo è devvero), potreste usare un contatore per ogni processore
 e quindi non sarebbe più necessaria la mutua esclusione. Vedere
-:c:func:`DEFINE_PER_CPU()`, :c:func:`get_cpu_var()` e :c:func:`put_cpu_var()`
+DEFINE_PER_CPU(), get_cpu_var() e put_cpu_var()
 (``include/linux/percpu.h``).
 
-Il tipo di dato ``local_t``, la funzione :c:func:`cpu_local_inc()` e tutte
+Il tipo di dato ``local_t``, la funzione cpu_local_inc() e tutte
 le altre funzioni associate, sono di particolare utilità per semplici contatori
 per-processore; su alcune architetture sono anche più efficienti
 (``include/asm/local.h``).
@@ -1324,11 +1324,11 @@ da un'interruzione software. Il gestore d'interruzione non utilizza alcun
         enable_irq(irq);
         spin_unlock(&lock);
 
-La funzione :c:func:`disable_irq()` impedisce al gestore d'interruzioni
+La funzione disable_irq() impedisce al gestore d'interruzioni
 d'essere eseguito (e aspetta che finisca nel caso fosse in esecuzione su
 un altro processore). Lo spinlock, invece, previene accessi simultanei.
 Naturalmente, questo è più lento della semplice chiamata
-:c:func:`spin_lock_irq()`, quindi ha senso solo se questo genere di accesso
+spin_lock_irq(), quindi ha senso solo se questo genere di accesso
 è estremamente raro.
 
 .. _`it_sleeping-things`:
@@ -1336,7 +1336,7 @@ Naturalmente, questo è più lento della semplice chiamata
 Quali funzioni possono essere chiamate in modo sicuro dalle interruzioni?
 =========================================================================
 
-Molte funzioni del kernel dormono (in sostanza, chiamano ``schedule()``)
+Molte funzioni del kernel dormono (in sostanza, chiamano schedule())
 direttamente od indirettamente: non potete chiamarle se trattenere uno
 spinlock o avete la prelazione disabilitata, mai. Questo significa che
 dovete necessariamente essere nel contesto utente: chiamarle da un
@@ -1354,23 +1354,23 @@ dormire.
 
 -  Accessi allo spazio utente:
 
-   -  :c:func:`copy_from_user()`
+   -  copy_from_user()
 
-   -  :c:func:`copy_to_user()`
+   -  copy_to_user()
 
-   -  :c:func:`get_user()`
+   -  get_user()
 
-   -  :c:func:`put_user()`
+   -  put_user()
 
--  :c:func:`kmalloc(GFP_KERNEL) <kmalloc>`
+-  kmalloc(GFP_KERNEL) <kmalloc>`
 
--  :c:func:`mutex_lock_interruptible()` and
-   :c:func:`mutex_lock()`
+-  mutex_lock_interruptible() and
+   mutex_lock()
 
-   C'è anche :c:func:`mutex_trylock()` che però non dorme.
+   C'è anche mutex_trylock() che però non dorme.
    Comunque, non deve essere usata in un contesto d'interruzione dato
    che la sua implementazione non è sicura in quel contesto.
-   Anche :c:func:`mutex_unlock()` non dorme mai. Non può comunque essere
+   Anche mutex_unlock() non dorme mai. Non può comunque essere
    usata in un contesto d'interruzione perché un mutex deve essere rilasciato
    dallo stesso processo che l'ha acquisito.
 
@@ -1380,11 +1380,11 @@ Alcune funzioni che non dormono
 Alcune funzioni possono essere chiamate tranquillamente da qualsiasi
 contesto, o trattenendo un qualsiasi *lock*.
 
--  :c:func:`printk()`
+-  printk()
 
--  :c:func:`kfree()`
+-  kfree()
 
--  :c:func:`add_timer()` e :c:func:`del_timer()`
+-  add_timer() e del_timer()
 
 Riferimento per l'API dei Mutex
 ===============================
@@ -1444,14 +1444,14 @@ prelazione
 bh
   Bottom Half: per ragioni storiche, le funzioni che contengono '_bh' nel
   loro nome ora si riferiscono a qualsiasi interruzione software; per esempio,
-  :c:func:`spin_lock_bh()` blocca qualsiasi interuzione software sul processore
+  spin_lock_bh() blocca qualsiasi interuzione software sul processore
   corrente. I *Bottom Halves* sono deprecati, e probabilmente verranno
   sostituiti dai tasklet. In un dato momento potrà esserci solo un
   *bottom half* in esecuzione.
 
 contesto d'interruzione
   Non è il contesto utente: qui si processano le interruzioni hardware e
-  software. La macro :c:func:`in_interrupt()` ritorna vero.
+  software. La macro in_interrupt() ritorna vero.
 
 contesto utente
   Il kernel che esegue qualcosa per conto di un particolare processo (per
@@ -1461,12 +1461,12 @@ contesto utente
   che hardware.
 
 interruzione hardware
-  Richiesta di interruzione hardware. :c:func:`in_irq()` ritorna vero in un
+  Richiesta di interruzione hardware. in_irq() ritorna vero in un
   gestore d'interruzioni hardware.
 
 interruzione software / softirq
-  Gestore di interruzioni software: :c:func:`in_irq()` ritorna falso;
-  :c:func:`in_softirq()` ritorna vero. I tasklet e le softirq sono entrambi
+  Gestore di interruzioni software: in_irq() ritorna falso;
+  in_softirq() ritorna vero. I tasklet e le softirq sono entrambi
   considerati 'interruzioni software'.
 
   In soldoni, un softirq è uno delle 32 interruzioni software che possono
diff --git a/Documentation/translations/it_IT/process/2.Process.rst b/Documentation/translations/it_IT/process/2.Process.rst
index 9af4d01617c4..30dc172f06b0 100644
--- a/Documentation/translations/it_IT/process/2.Process.rst
+++ b/Documentation/translations/it_IT/process/2.Process.rst
@@ -23,18 +23,18 @@ ogni due o tre mesi viene effettuata un rilascio importante del kernel.
 I rilasci più recenti sono stati:
 
 	======  =================
-	4.11	Aprile 30, 2017
-	4.12	Luglio 2, 2017
-	4.13	Settembre 3, 2017
-	4.14	Novembre 12, 2017
-	4.15	Gennaio 28, 2018
-	4.16	Aprile 1, 2018
+	5.0     3 marzo, 2019
+	5.1     5 maggio, 2019
+	5.2     7 luglio, 2019
+	5.3     15 settembre, 2019
+	5.4     24 novembre, 2019
+	5.5     6 gennaio, 2020
 	======  =================
 
-Ciascun rilascio 4.x è un importante rilascio del kernel con nuove
+Ciascun rilascio 5.x è un importante rilascio del kernel con nuove
 funzionalità, modifiche interne dell'API, e molto altro.  Un tipico
-rilascio 4.x contiene quasi 13,000 gruppi di modifiche con ulteriori
-modifiche a parecchie migliaia di linee di codice.  La 4.x. è pertanto la
+rilascio contiene quasi 13,000 gruppi di modifiche con ulteriori
+modifiche a parecchie migliaia di linee di codice.  La 5.x. è pertanto la
 linea di confine nello sviluppo del kernel Linux; il kernel utilizza un sistema
 di sviluppo continuo che integra costantemente nuove importanti modifiche.
 
@@ -55,8 +55,8 @@ verrà descritto dettagliatamente più avanti).
 La finestra di inclusione resta attiva approssimativamente per due settimane.
 Al termine di questo periodo, Linus Torvald dichiarerà che la finestra è
 chiusa e rilascerà il primo degli "rc" del kernel.
-Per il kernel che è destinato ad essere 2.6.40, per esempio, il rilascio
-che emerge al termine della finestra d'inclusione si chiamerà 2.6.40-rc1.
+Per il kernel che è destinato ad essere 5.6, per esempio, il rilascio
+che emerge al termine della finestra d'inclusione si chiamerà 5.6-rc1.
 Questo rilascio indica che il momento di aggiungere nuovi componenti è
 passato, e che è iniziato il periodo di stabilizzazione del prossimo kernel.
 
@@ -76,22 +76,23 @@ Mentre le correzioni si aprono la loro strada all'interno del ramo principale,
 il ritmo delle modifiche rallenta col tempo.  Linus rilascia un nuovo
 kernel -rc circa una volta alla settimana; e ne usciranno circa 6 o 9 prima
 che il kernel venga considerato sufficientemente stabile e che il rilascio
-finale 2.6.x venga fatto.  A quel punto tutto il processo ricomincerà.
+finale venga fatto.  A quel punto tutto il processo ricomincerà.
 
-Esempio: ecco com'è andato il ciclo di sviluppo della versione 4.16
+Esempio: ecco com'è andato il ciclo di sviluppo della versione 5.4
 (tutte le date si collocano nel 2018)
 
 
 	==============  =======================================
-	Gennaio 28	4.15 rilascio stabile
-	Febbraio 11	4.16-rc1, finestra di inclusione chiusa
-	Febbraio 18	4.16-rc2
-	Febbraio 25	4.16-rc3
-	Marzo 4		4.16-rc4
-	Marzo 11	4.16-rc5
-	Marzo 18	4.16-rc6
-	Marzo 25	4.16-rc7
-	Aprile 1		4.17 rilascio stabile
+	15 settembre	5.3 rilascio stabile
+	30 settembre	5.4-rc1, finestra di inclusione chiusa
+	6 ottobre	5.4-rc2
+	13 ottobre	5.4-rc3
+	20 ottobre	5.4-rc4
+	27 ottobre	5.4-rc5
+	3 novembre	5.4-rc6
+	10 novembre	5.4-rc7
+	17 novembre	5.4-rc8
+	24 novembre	5.4 rilascio stabile
 	==============  =======================================
 
 In che modo gli sviluppatori decidono quando chiudere il ciclo di sviluppo e
@@ -108,43 +109,44 @@ tipo di perfezione difficilmente viene raggiunta; esistono troppe variabili
 in un progetto di questa portata.  Arriva un punto dove ritardare il rilascio
 finale peggiora la situazione; la quantità di modifiche in attesa della
 prossima finestra di inclusione crescerà enormemente, creando ancor più
-regressioni al giro successivo.  Quindi molti kernel 4.x escono con una
+regressioni al giro successivo.  Quindi molti kernel 5.x escono con una
 manciata di regressioni delle quali, si spera, nessuna è grave.
 
 Una volta che un rilascio stabile è fatto, il suo costante mantenimento è
 affidato al "squadra stabilità", attualmente composta da Greg Kroah-Hartman.
 Questa squadra rilascia occasionalmente degli aggiornamenti relativi al
-rilascio stabile usando la numerazione 4.x.y.  Per essere presa in
+rilascio stabile usando la numerazione 5.x.y.  Per essere presa in
 considerazione per un rilascio d'aggiornamento, una modifica deve:
 (1) correggere un baco importante (2) essere già inserita nel ramo principale
 per il prossimo sviluppo del kernel.  Solitamente, passato il loro rilascio
 iniziale, i kernel ricevono aggiornamenti per più di un ciclo di sviluppo.
-Quindi, per esempio, la storia del kernel 4.13 appare così:
+Quindi, per esempio, la storia del kernel 5.2 appare così (anno 2019):
 
 	==============  ===============================
-	Settembre 3 	4.13 rilascio stabile
-	Settembre 13	4.13.1
-	Settembre 20	4.13.2
-	Settembre 27	4.13.3
-	Ottobre 5	4.13.4
-	Ottobre 12	4.13.5
+	15 settembre	5.2 rilascio stabile FIXME settembre è sbagliato
+	14 luglio	5.2.1
+	21 luglio	5.2.2
+	26 luglio	5.2.3
+	28 luglio	5.2.4
+	31 luglio	5.2.5
 	...		...
-	Novembre 24	4.13.16
+	11 ottobre	5.2.21
 	==============  ===============================
 
-La 4.13.16 fu l'aggiornamento finale per la versione 4.13.
+La 5.2.21 fu l'aggiornamento finale per la versione 5.2.
 
 Alcuni kernel sono destinati ad essere kernel a "lungo termine"; questi
 riceveranno assistenza per un lungo periodo di tempo.  Al momento in cui
 scriviamo, i manutentori dei kernel stabili a lungo termine sono:
 
-	======  ======================  ==========================================
-	3.16	Ben Hutchings		(kernel stabile molto più a lungo termine)
-	4.1	Sasha Levin
-	4.4	Greg Kroah-Hartman	(kernel stabile molto più a lungo termine)
-	4.9	Greg Kroah-Hartman
-	4.14	Greg Kroah-Hartman
-	======  ======================  ==========================================
+	======  ================================  ==========================================
+	3.16	Ben Hutchings			  (kernel stabile molto più a lungo termine)
+	4.4	Greg Kroah-Hartman e Sasha Levin  (kernel stabile molto più a lungo termine)
+	4.9	Greg Kroah-Hartman e Sasha Levin
+	4.14	Greg Kroah-Hartman e Sasha Levin
+	4.19	Greg Kroah-Hartman e Sasha Levin
+	5.4i	Greg Kroah-Hartman e Sasha Levin
+	======  ================================  ==========================================
 
 
 Questa selezione di kernel di lungo periodo sono puramente dovuti ai loro
@@ -229,12 +231,13 @@ Come le modifiche finiscono nel Kernel
 --------------------------------------
 
 Esiste una sola persona che può inserire le patch nel repositorio principale
-del kernel: Linus Torvalds.  Ma, di tutte le 9500 patch che entrarono nella
-versione 2.6.38 del kernel, solo 112 (circa l'1,3%) furono scelte direttamente
-da Linus in persona.  Il progetto del kernel è cresciuto fino a raggiungere
-una dimensione tale per cui un singolo sviluppatore non può controllare e
-selezionare indipendentemente ogni modifica senza essere supportato.
-La via scelta dagli sviluppatori per indirizzare tale crescita è stata quella
+del kernel: Linus Torvalds.  Ma, per esempio, di tutte le 9500 patch
+che entrarono nella versione 2.6.38 del kernel, solo 112 (circa
+l'1,3%) furono scelte direttamente da Linus in persona.  Il progetto
+del kernel è cresciuto fino a raggiungere una dimensione tale per cui
+un singolo sviluppatore non può controllare e selezionare
+indipendentemente ogni modifica senza essere supportato.  La via
+scelta dagli sviluppatori per indirizzare tale crescita è stata quella
 di utilizzare un sistema di "sottotenenti" basato sulla fiducia.
 
 Il codice base del kernel è spezzato in una serie si sottosistemi: rete,
diff --git a/Documentation/translations/it_IT/process/adding-syscalls.rst b/Documentation/translations/it_IT/process/adding-syscalls.rst
index c3a3439595a6..bff0a82bf127 100644
--- a/Documentation/translations/it_IT/process/adding-syscalls.rst
+++ b/Documentation/translations/it_IT/process/adding-syscalls.rst
@@ -39,7 +39,7 @@ vostra interfaccia.
        un qualche modo opaca.
 
  - Se dovete esporre solo delle informazioni sul sistema, un nuovo nodo in
-   sysfs (vedere ``Documentation/filesystems/sysfs.txt``) o
+   sysfs (vedere ``Documentation/filesystems/sysfs.rst``) o
    in procfs potrebbe essere sufficiente.  Tuttavia, l'accesso a questi
    meccanismi richiede che il filesystem sia montato, il che potrebbe non
    essere sempre vero (per esempio, in ambienti come namespace/sandbox/chroot).
diff --git a/Documentation/translations/it_IT/process/coding-style.rst b/Documentation/translations/it_IT/process/coding-style.rst
index 8725f2b9e960..6f4f85832dee 100644
--- a/Documentation/translations/it_IT/process/coding-style.rst
+++ b/Documentation/translations/it_IT/process/coding-style.rst
@@ -313,7 +313,7 @@ che conta gli utenti attivi, dovreste chiamarla ``count_active_users()`` o
 qualcosa di simile, **non** dovreste chiamarla ``cntusr()``.
 
 Codificare il tipo di funzione nel suo nome (quella cosa chiamata notazione
-ungherese) fa male al cervello - il compilatore conosce comunque il tipo e
+ungherese) è stupido - il compilatore conosce comunque il tipo e
 può verificarli, e inoltre confonde i programmatori.  Non c'è da
 sorprendersi che MicroSoft faccia programmi bacati.
 
@@ -825,8 +825,8 @@ linguaggio assembler.
 
 Agli sviluppatori del kernel piace essere visti come dotti. Tenete un occhio
 di riguardo per l'ortografia e farete una belle figura. In inglese, evitate
-l'uso di parole mozzate come ``dont``: usate ``do not`` oppure ``don't``.
-Scrivete messaggi concisi, chiari, e inequivocabili.
+l'uso incorretto di abbreviazioni come ``dont``: usate ``do not`` oppure
+``don't``.  Scrivete messaggi concisi, chiari, e inequivocabili.
 
 I messaggi del kernel non devono terminare con un punto fermo.
 
diff --git a/Documentation/translations/it_IT/process/deprecated.rst b/Documentation/translations/it_IT/process/deprecated.rst
index 776f26732a94..e108eaf82cf6 100644
--- a/Documentation/translations/it_IT/process/deprecated.rst
+++ b/Documentation/translations/it_IT/process/deprecated.rst
@@ -34,6 +34,33 @@ interfaccia come 'vecchia', questa non è una soluzione completa. L'interfaccia
 deve essere rimossa dal kernel, o aggiunta a questo documento per scoraggiarne
 l'uso.
 
+BUG() e BUG_ON()
+----------------
+Al loro posto usate WARN() e WARN_ON() per gestire le
+condizioni "impossibili" e gestitele come se fosse possibile farlo.
+Nonostante le funzioni della famiglia BUG() siano state progettate
+per asserire "situazioni impossibili" e interrompere in sicurezza un
+thread del kernel, queste si sono rivelate essere troppo rischiose
+(per esempio, in quale ordine rilasciare i *lock*? Ci sono stati che
+sono stati ripristinati?). Molto spesso l'uso di BUG()
+destabilizza il sistema o lo corrompe del tutto, il che rende
+impossibile un'attività di debug o anche solo leggere un rapporto
+circa l'errore.  Linus ha un'opinione molto critica al riguardo:
+`email 1
+<https://lore.kernel.org/lkml/CA+55aFy6jNLsywVYdGp83AMrXBo_P-pkjkphPGrO=82SPKCpLQ@mail.gmail.com/>`_,
+`email 2
+<https://lore.kernel.org/lkml/CAHk-=whDHsbK3HTOpTF=ue_o04onRwTEaK_ZoJp_fjbqq4+=Jw@mail.gmail.com/>`_
+
+Tenete presente che la famiglia di funzioni WARN() dovrebbe essere
+usato solo per situazioni che si suppone siano "impossibili".  Se
+volete avvisare gli utenti riguardo a qualcosa di possibile anche se
+indesiderato, usare le funzioni della famiglia pr_warn().  Chi
+amministra il sistema potrebbe aver attivato l'opzione sysctl
+*panic_on_warn* per essere sicuri che il sistema smetta di funzionare
+in caso si verifichino delle condizioni "inaspettate". (per esempio,
+date un'occhiata al questo `commit
+<https://git.kernel.org/linus/d4689846881d160a4d12a514e991a740bcb5d65a>`_)
+
 Calcoli codificati negli argomenti di un allocatore
 ----------------------------------------------------
 Il calcolo dinamico delle dimensioni (specialmente le moltiplicazioni) non
@@ -68,52 +95,81 @@ Invece, usate la seguente funzione::
 
 	header = kzalloc(struct_size(header, item, count), GFP_KERNEL);
 
-Per maggiori dettagli fate riferimento a :c:func:`array_size`,
-:c:func:`array3_size`, e :c:func:`struct_size`, così come la famiglia di
-funzioni :c:func:`check_add_overflow` e :c:func:`check_mul_overflow`.
+Per maggiori dettagli fate riferimento a array_size(),
+array3_size(), e struct_size(), così come la famiglia di
+funzioni check_add_overflow() e check_mul_overflow().
 
 simple_strtol(), simple_strtoll(), simple_strtoul(), simple_strtoull()
 ----------------------------------------------------------------------
-Le funzioni :c:func:`simple_strtol`, :c:func:`simple_strtoll`,
-:c:func:`simple_strtoul`, e :c:func:`simple_strtoull` ignorano volutamente
+Le funzioni simple_strtol(), simple_strtoll(),
+simple_strtoul(), e simple_strtoull() ignorano volutamente
 i possibili overflow, e questo può portare il chiamante a generare risultati
-inaspettati. Le rispettive funzioni :c:func:`kstrtol`, :c:func:`kstrtoll`,
-:c:func:`kstrtoul`, e :c:func:`kstrtoull` sono da considerarsi le corrette
+inaspettati. Le rispettive funzioni kstrtol(), kstrtoll(),
+kstrtoul(), e kstrtoull() sono da considerarsi le corrette
 sostitute; tuttavia va notato che queste richiedono che la stringa sia
 terminata con il carattere NUL o quello di nuova riga.
 
 strcpy()
 --------
-La funzione :c:func:`strcpy` non fa controlli agli estremi del buffer
+La funzione strcpy() non fa controlli agli estremi del buffer
 di destinazione. Questo può portare ad un overflow oltre i limiti del
 buffer e generare svariati tipi di malfunzionamenti. Nonostante l'opzione
 `CONFIG_FORTIFY_SOURCE=y` e svariate opzioni del compilatore aiutano
 a ridurne il rischio, non c'è alcuna buona ragione per continuare ad usare
-questa funzione. La versione sicura da usare è :c:func:`strscpy`.
+questa funzione. La versione sicura da usare è strscpy().
 
 strncpy() su stringe terminate con NUL
 --------------------------------------
-L'utilizzo di :c:func:`strncpy` non fornisce alcuna garanzia sul fatto che
+L'utilizzo di strncpy() non fornisce alcuna garanzia sul fatto che
 il buffer di destinazione verrà terminato con il carattere NUL. Questo
 potrebbe portare a diversi overflow di lettura o altri malfunzionamenti
 causati, appunto, dalla mancanza del terminatore. Questa estende la
 terminazione nel buffer di destinazione quando la stringa d'origine è più
 corta; questo potrebbe portare ad una penalizzazione delle prestazioni per
 chi usa solo stringe terminate. La versione sicura da usare è
-:c:func:`strscpy`. (chi usa :c:func:`strscpy` e necessita di estendere la
-terminazione con NUL deve aggiungere una chiamata a :c:func:`memset`)
+strscpy(). (chi usa strscpy() e necessita di estendere la
+terminazione con NUL deve aggiungere una chiamata a memset())
 
-Se il chiamate no usa stringhe terminate con NUL, allore :c:func:`strncpy()`
+Se il chiamate no usa stringhe terminate con NUL, allore strncpy()()
 può continuare ad essere usata, ma i buffer di destinazione devono essere
 marchiati con l'attributo `__nonstring <https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html>`_
 per evitare avvisi durante la compilazione.
 
 strlcpy()
 ---------
-La funzione :c:func:`strlcpy`, per prima cosa, legge interamente il buffer di
+La funzione strlcpy(), per prima cosa, legge interamente il buffer di
 origine, magari leggendo più di quanto verrà effettivamente copiato. Questo
 è inefficiente e può portare a overflow di lettura quando la stringa non è
-terminata con NUL. La versione sicura da usare è :c:func:`strscpy`.
+terminata con NUL. La versione sicura da usare è strscpy().
+
+Segnaposto %p nella stringa di formato
+--------------------------------------
+
+Tradizionalmente, l'uso del segnaposto "%p" nella stringa di formato
+esponne un indirizzo di memoria in dmesg, proc, sysfs, eccetera.  Per
+evitare che questi indirizzi vengano sfruttati da malintenzionati,
+tutto gli usi di "%p" nel kernel rappresentano l'hash dell'indirizzo,
+rendendolo di fatto inutilizzabile.  Nuovi usi di "%p" non dovrebbero
+essere aggiunti al kernel.  Per una rappresentazione testuale di un
+indirizzo usate "%pS", l'output è migliore perché mostrerà il nome del
+simbolo.  Per tutto il resto, semplicemente non usate "%p".
+
+Parafrasando la `guida
+<https://lore.kernel.org/lkml/CA+55aFwQEd_d40g4mUCSsVRZzrFPUJt74vc6PPpb675hYNXcKw@mail.gmail.com/>`_
+di Linus:
+
+- Se il valore hash di "%p" è inutile, chiediti se il puntatore stesso
+  è importante. Forse dovrebbe essere rimosso del tutto?
+- Se credi davvero che il vero valore del puntatore sia importante,
+  perché alcuni stati del sistema o i livelli di privilegi di un
+  utente sono considerati "special"? Se pensi di poterlo giustificare
+  (in un commento e nel messaggio del commit) abbastanza bene da
+  affrontare il giudizio di Linus, allora forse potrai usare "%px",
+  assicurandosi anche di averne il permesso.
+
+Infine, sappi che un cambio in favore di "%p" con hash `non verrà
+accettato
+<https://lore.kernel.org/lkml/CA+55aFwieC1-nAs+NFq9RTwaR8ef9hWa4MjNBWL41F-8wM49eA@mail.gmail.com/>`_.
 
 Vettori a dimensione variabile (VLA)
 ------------------------------------
@@ -127,3 +183,47 @@ Questo può portare a dei malfunzionamenti, potrebbe sovrascrivere
 dati importanti alla fine dello stack (quando il kernel è compilato senza
 `CONFIG_THREAD_INFO_IN_TASK=y`), o sovrascrivere un pezzo di memoria adiacente
 allo stack (quando il kernel è compilato senza `CONFIG_VMAP_STACK=y`).
+
+Salto implicito nell'istruzione switch-case
+-------------------------------------------
+
+Il linguaggio C permette ai casi di un'istruzione `switch` di saltare al
+prossimo caso quando l'istruzione "break" viene omessa alla fine del caso
+corrente. Tuttavia questo rende il codice ambiguo perché non è sempre ovvio se
+l'istruzione "break" viene omessa intenzionalmente o è un baco. Per esempio,
+osservando il seguente pezzo di codice non è chiaro se lo stato
+`STATE_ONE` è stato progettato apposta per eseguire anche `STATE_TWO`::
+
+  switch (value) {
+  case STATE_ONE:
+          do_something();
+  case STATE_TWO:
+          do_other();
+          break;
+  default:
+          WARN("unknown state");
+  }
+
+Dato che c'è stata una lunga lista di problemi `dovuti alla mancanza dell'istruzione
+"break" <https://cwe.mitre.org/data/definitions/484.html>`_, oggigiorno non
+permettiamo più che vi sia un "salto implicito" (*fall-through*). Per
+identificare un salto implicito intenzionale abbiamo adottato la pseudo
+parola chiave 'fallthrough' che viene espansa nell'estensione di gcc
+`__attribute__((fallthrough))` `Statement Attributes
+<https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html>`_.
+(Quando la sintassi C17/C18 `[[fallthrough]]` sarà più comunemente
+supportata dai compilatori C, analizzatori statici, e dagli IDE,
+allora potremo usare quella sintassi per la pseudo parola chiave)
+
+Quando la sintassi [[fallthrough]] sarà più comunemente supportata dai
+compilatori, analizzatori statici, e ambienti di sviluppo IDE,
+allora potremo usarla anche noi.
+
+Ne consegue che tutti i blocchi switch/case devono finire in uno dei seguenti
+modi:
+
+* ``break;``
+* `fallthrough;``
+* ``continue;``
+* ``goto <label>;``
+* ``return [expression];``
diff --git a/Documentation/translations/it_IT/process/email-clients.rst b/Documentation/translations/it_IT/process/email-clients.rst
index 224ab031ffd3..89abf6d325f2 100644
--- a/Documentation/translations/it_IT/process/email-clients.rst
+++ b/Documentation/translations/it_IT/process/email-clients.rst
@@ -1,12 +1,334 @@
 .. include:: ../disclaimer-ita.rst
 
-:Original: :ref:`Documentation/process/email-clients.rst <email_clients>`
-
-.. _it_email_clients:
+:Original: :doc:`../../../process/email-clients`
+:Translator: Alessia Mantegazza <amantegazza@vaga.pv.it>
 
 Informazioni sui programmi di posta elettronica per Linux
 =========================================================
 
-.. warning::
+Git
+---
+
+Oggigiorno, la maggior parte degli sviluppatori utilizza ``git send-email``
+al posto dei classici programmi di posta elettronica.  Le pagine man sono
+abbastanza buone. Dal lato del ricevente, i manutentori utilizzano ``git am``
+per applicare le patch.
+
+Se siete dei novelli utilizzatori di ``git`` allora inviate la patch a voi
+stessi. Salvatela come testo includendo tutte le intestazioni. Poi eseguite
+il comando ``git am messaggio-formato-testo.txt`` e revisionatene il risultato
+con ``git log``. Quando tutto funziona correttamente, allora potete inviare
+la patch alla lista di discussione più appropriata.
+
+Panoramica delle opzioni
+------------------------
+
+Le patch per il kernel vengono inviate per posta elettronica, preferibilmente
+come testo integrante del messaggio.  Alcuni manutentori accettano gli
+allegati, ma in questo caso gli allegati devono avere il *content-type*
+impostato come ``text/plain``.  Tuttavia, generalmente gli allegati non sono
+ben apprezzati perché rende più difficile citare porzioni di patch durante il
+processo di revisione.
+
+I programmi di posta elettronica che vengono usati per inviare le patch per il
+kernel Linux dovrebbero inviarle senza alterazioni.  Per esempio, non
+dovrebbero modificare o rimuovere tabulazioni o spazi, nemmeno all'inizio o
+alla fine delle righe.
+
+Non inviate patch con ``format=flowed``.  Questo potrebbe introdurre
+interruzioni di riga inaspettate e indesiderate.
+
+Non lasciate che il vostro programma di posta vada a capo automaticamente.
+Questo può corrompere le patch.
+
+I programmi di posta non dovrebbero modificare la codifica dei caratteri nel
+testo.  Le patch inviate per posta elettronica dovrebbero essere codificate in
+ASCII o UTF-8.
+Se configurate il vostro programma per inviare messaggi codificati con UTF-8
+eviterete possibili problemi di codifica.
+
+I programmi di posta dovrebbero generare e mantenere le intestazioni
+"References" o "In-Reply-To:" cosicché la discussione non venga interrotta.
+
+Di solito, il copia-e-incolla (o taglia-e-incolla) non funziona con le patch
+perché le tabulazioni vengono convertite in spazi.  Usando xclipboard, xclip
+e/o xcutsel potrebbe funzionare, ma è meglio che lo verifichiate o meglio
+ancora: non usate il copia-e-incolla.
+
+Non usate firme PGP/GPG nei messaggi che contengono delle patch.  Questo
+impedisce il corretto funzionamento di alcuni script per leggere o applicare
+patch (questo si dovrebbe poter correggere).
+
+Prima di inviare le patch sulle liste di discussione Linux, può essere una
+buona idea quella di inviare la patch a voi stessi, salvare il messaggio
+ricevuto, e applicarlo ai sorgenti con successo.
+
+
+Alcuni suggerimenti per i programmi di posta elettronica (MUA)
+--------------------------------------------------------------
+
+Qui troverete alcuni suggerimenti per configurare i vostri MUA allo scopo
+di modificare ed inviare patch per il kernel Linux.  Tuttavia, questi
+suggerimenti non sono da considerarsi come un riassunto di una configurazione
+completa.
+
+Legenda:
+
+- TUI = interfaccia utente testuale (*text-based user interface*)
+- GUI = interfaccia utente grafica (*graphical user interface*)
+
+Alpine (TUI)
+************
+
+Opzioni per la configurazione:
+
+Nella sezione :menuselection:`Sending Preferences`:
+
+- :menuselection:`Do Not Send Flowed Text` deve essere ``enabled``
+- :menuselection:`Strip Whitespace Before Sending` deve essere ``disabled``
+
+Quando state scrivendo un messaggio, il cursore dev'essere posizionato
+dove volete che la patch inizi, poi premendo :kbd:`CTRL-R` vi verrà chiesto
+di selezionare il file patch da inserire nel messaggio.
+
+Claws Mail (GUI)
+****************
+
+Funziona. Alcune persone riescono ad usarlo con successo per inviare le patch.
+
+Per inserire una patch usate :menuselection:`Messaggio-->Inserisci file`
+(:kbd:`CTRL-I`) oppure un editor esterno.
+
+Se la patch che avete inserito dev'essere modificata usato la finestra di
+scrittura di Claws, allora assicuratevi che l'"auto-interruzione" sia
+disabilitata :menuselection:`Configurazione-->Preferenze-->Composizione-->Interruzione riga`.
+
+Evolution (GUI)
+***************
+
+Alcune persone riescono ad usarlo con successo per inviare le patch.
+
+Quando state scrivendo una lettera selezionate: Preformattato
+  da :menuselection:`Formato-->Stile del paragrafo-->Preformattato`
+  (:kbd:`CTRL-7`) o dalla barra degli strumenti
+
+Poi per inserire la patch usate:
+:menuselection:`Inserisci--> File di testo...` (:kbd:`ALT-N x`)
+
+Potete anche eseguire ``diff -Nru old.c new.c | xclip``, selezionare
+:menuselection:`Preformattato`, e poi usare il tasto centrale del mouse.
+
+Kmail (GUI)
+***********
+
+Alcune persone riescono ad usarlo con successo per inviare le patch.
+
+La configurazione base che disabilita la composizione di messaggi HTML è
+corretta; non abilitatela.
+
+Quando state scrivendo un messaggio, nel menu opzioni, togliete la selezione a
+"A capo automatico". L'unico svantaggio sarà che qualsiasi altra cosa scriviate
+nel messaggio non verrà mandata a capo in automatico ma dovrete farlo voi.
+Il modo più semplice per ovviare a questo problema è quello di scrivere il
+messaggio con l'opzione abilitata e poi di salvarlo nelle bozze. Riaprendo ora
+il messaggio dalle bozze le andate a capo saranno parte integrante del
+messaggio, per cui togliendo l'opzione "A capo automatico" non perderete nulla.
+
+Alla fine del vostro messaggio, appena prima di inserire la vostra patch,
+aggiungete il delimitatore di patch: tre trattini (``---``).
+
+Ora, dal menu :menuselection:`Messaggio`, selezionate :menuselection:`Inserisci file di testo...`
+quindi scegliete la vostra patch.
+Come soluzione aggiuntiva potreste personalizzare la vostra barra degli
+strumenti aggiungendo un'icona per :menuselection:`Inserisci file di testo...`.
+
+Allargate la finestra di scrittura abbastanza da evitare andate a capo.
+Questo perché in Kmail 1.13.5 (KDE 4.5.4), Kmail aggiunge andate a capo
+automaticamente al momento dell'invio per tutte quelle righe che graficamente,
+nella vostra finestra di composizione, si sono estete su una riga successiva.
+Disabilitare l'andata a capo automatica non è sufficiente. Dunque, se la vostra
+patch contiene delle righe molto lunghe, allora dovrete allargare la finestra
+di composizione per evitare che quelle righe vadano a capo. Vedere:
+https://bugs.kde.org/show_bug.cgi?id=174034
+
+Potete firmare gli allegati con GPG, ma per le patch si preferisce aggiungerle
+al testo del messaggio per cui non usate la firma GPG.  Firmare le patch
+inserite come testo del messaggio le rende più difficili da estrarre dalla loro
+codifica a 7-bit.
+
+Se dovete assolutamente inviare delle patch come allegati invece di integrarle
+nel testo del messaggio, allora premete il tasto destro sull'allegato e
+selezionate :menuselection:`Proprietà`, e poi attivate
+:menuselection:`Suggerisci visualizzazione automatica` per far si che
+l'allegato sia più leggibile venendo visualizzato come parte del messaggio.
+
+Per salvare le patch inviate come parte di un messaggio, selezionate il
+messaggio che la contiene, premete il tasto destro e selezionate
+:menuselection:`Salva come`. Se il messaggio fu ben preparato, allora potrete
+usarlo interamente senza alcuna modifica.
+I messaggi vengono salvati con permessi di lettura-scrittura solo per l'utente,
+nel caso in cui vogliate copiarli altrove per renderli disponibili ad altri
+gruppi o al mondo, ricordatevi di usare ``chmod`` per cambiare i permessi.
+
+Lotus Notes (GUI)
+*****************
+
+Scappate finché potete.
+
+IBM Verse (Web GUI)
+*******************
+
+Vedi il commento per Lotus Notes.
+
+Mutt (TUI)
+**********
+
+Un sacco di sviluppatori Linux usano ``mutt``, per cui deve funzionare
+abbastanza bene.
+
+Mutt non ha un proprio editor, quindi qualunque sia il vostro editor dovrete
+configurarlo per non aggiungere automaticamente le andate a capo.  Molti
+editor hanno un'opzione :menuselection:`Inserisci file` che inserisce il
+contenuto di un file senza alterarlo.
+
+Per usare ``vim`` come editor per mutt::
+
+  set editor="vi"
+
+Se per inserire la patch nel messaggio usate xclip, scrivete il comando::
+
+  :set paste
+
+prima di premere il tasto centrale o shift-insert. Oppure usate il
+comando::
+
+  :r filename
+
+(a)llega funziona bene senza ``set paste``
+
+Potete generare le patch con ``git format-patch`` e usare Mutt per inviarle::
+
+    $ mutt -H 0001-some-bug-fix.patch
+
+Opzioni per la configurazione:
+
+Tutto dovrebbe funzionare già nella configurazione base.
+Tuttavia, è una buona idea quella di impostare ``send_charset``::
+
+   set send_charset="us-ascii:utf-8"
+
+Mutt è molto personalizzabile. Qui di seguito trovate la configurazione minima
+per iniziare ad usare Mutt per inviare patch usando Gmail::
+
+  # .muttrc
+  # ================  IMAP ====================
+  set imap_user = 'yourusername@gmail.com'
+  set imap_pass = 'yourpassword'
+  set spoolfile = imaps://imap.gmail.com/INBOX
+  set folder = imaps://imap.gmail.com/
+  set record="imaps://imap.gmail.com/[Gmail]/Sent Mail"
+  set postponed="imaps://imap.gmail.com/[Gmail]/Drafts"
+  set mbox="imaps://imap.gmail.com/[Gmail]/All Mail"
+
+  # ================  SMTP  ====================
+  set smtp_url = "smtp://username@smtp.gmail.com:587/"
+  set smtp_pass = $imap_pass
+  set ssl_force_tls = yes # Require encrypted connection
+
+  # ================  Composition  ====================
+  set editor = `echo \$EDITOR`
+  set edit_headers = yes  # See the headers when editing
+  set charset = UTF-8     # value of $LANG; also fallback for send_charset
+  # Sender, email address, and sign-off line must match
+  unset use_domain        # because joe@localhost is just embarrassing
+  set realname = "YOUR NAME"
+  set from = "username@gmail.com"
+  set use_from = yes
+
+La documentazione di Mutt contiene molte più informazioni:
+
+    https://gitlab.com/muttmua/mutt/-/wikis/UseCases/Gmail
+
+    http://www.mutt.org/doc/manual/
+
+Pine (TUI)
+**********
+
+Pine aveva alcuni problemi con gli spazi vuoti, ma questi dovrebbero essere
+stati risolti.
+
+Se potete usate alpine (il successore di pine).
+
+Opzioni di configurazione:
+
+- Nelle versioni più recenti è necessario avere ``quell-flowed-text``
+- l'opzione ``no-strip-whitespace-before-send`` è necessaria
+
+Sylpheed (GUI)
+**************
+
+- funziona bene per aggiungere testo in linea (o usando allegati)
+- permette di utilizzare editor esterni
+- è lento su cartelle grandi
+- non farà l'autenticazione TSL SMTP su una connessione non SSL
+- ha un utile righello nella finestra di scrittura
+- la rubrica non comprende correttamente il nome da visualizzare e
+  l'indirizzo associato
+
+Thunderbird (GUI)
+*****************
+
+Thunderbird è un clone di Outlook a cui piace maciullare il testo, ma esistono
+modi per impedirglielo.
+
+- permettere l'uso di editor esterni:
+  La cosa più semplice da fare con Thunderbird e le patch è quello di usare
+  l'estensione "external editor" e di usare il vostro ``$EDITOR`` preferito per
+  leggere/includere patch nel vostro messaggio.  Per farlo, scaricate ed
+  installate l'estensione e aggiungete un bottone per chiamarla rapidamente
+  usando :menuselection:`Visualizza-->Barra degli strumenti-->Personalizza...`;
+  una volta fatto potrete richiamarlo premendo sul bottone mentre siete nella
+  finestra :menuselection:`Scrivi`
+
+  Tenete presente che "external editor" richiede che il vostro editor non
+  faccia alcun fork, in altre parole, l'editor non deve ritornare prima di
+  essere stato chiuso.  Potreste dover passare dei parametri aggiuntivi al
+  vostro editor oppure cambiargli la configurazione.  Per esempio, usando
+  gvim dovrete aggiungere l'opzione -f ``/usr/bin/gvim -f`` (Se il binario
+  si trova in ``/usr/bin``) nell'apposito campo nell'interfaccia di
+  configurazione di  :menuselection:`external editor`.  Se usate altri editor
+  consultate il loro  manuale per sapere come configurarli.
+
+Per rendere l'editor interno un po' più sensato, fate così:
+
+- Modificate le impostazioni di Thunderbird per far si che non usi
+  ``format=flowed``. Andate in :menuselection:`Modifica-->Preferenze-->Avanzate-->Editor di configurazione`
+  per invocare il registro delle impostazioni.
+
+- impostate ``mailnews.send_plaintext_flowed`` a ``false``
+
+- impostate ``mailnews.wraplength`` da ``72`` a ``0``
+
+- :menuselection:`Visualizza-->Corpo del messaggio come-->Testo semplice`
+
+- :menuselection:`Visualizza-->Codifica del testo-->Unicode`
+
+
+TkRat (GUI)
+***********
+
+Funziona. Usare "Inserisci file..." o un editor esterno.
+
+Gmail (Web GUI)
+***************
+
+Non funziona per inviare le patch.
+
+Il programma web Gmail converte automaticamente i tab in spazi.
+
+Allo stesso tempo aggiunge andata a capo ogni 78 caratteri. Comunque
+il problema della conversione fra spazi e tab può essere risolto usando
+un editor esterno.
 
-    TODO ancora da tradurre
+Un altro problema è che Gmail usa la codifica base64 per tutti quei messaggi
+che contengono caratteri non ASCII. Questo include cose tipo i nomi europei.
diff --git a/Documentation/translations/it_IT/process/index.rst b/Documentation/translations/it_IT/process/index.rst
index 012de0f3154a..c4c867132c88 100644
--- a/Documentation/translations/it_IT/process/index.rst
+++ b/Documentation/translations/it_IT/process/index.rst
@@ -59,6 +59,7 @@ perché non si è trovato un posto migliore.
    magic-number
    volatile-considered-harmful
    clang-format
+   ../riscv/patch-acceptance
 
 .. only::  subproject and html
 
diff --git a/Documentation/translations/it_IT/process/management-style.rst b/Documentation/translations/it_IT/process/management-style.rst
index 07e68bfb8402..c709285138a7 100644
--- a/Documentation/translations/it_IT/process/management-style.rst
+++ b/Documentation/translations/it_IT/process/management-style.rst
@@ -1,12 +1,293 @@
 .. include:: ../disclaimer-ita.rst
 
-:Original: :ref:`Documentation/process/management-style.rst <managementstyle>`
+:Original: :doc:`../../../process/management-style`
+:Translator: Alessia Mantegazza <amantegazza@vaga.pv.it>
 
-.. _it_managementstyle:
+Il modello di gestione del kernel Linux
+=======================================
 
-Tipo di gestione del kernel Linux
-=================================
+Questo breve documento descrive il modello di gestione del kernel Linux.
+Per certi versi, esso rispecchia il documento
+:ref:`translations/it_IT/process/coding-style.rst <it_codingstyle>`,
+ed è principalmente scritto per evitare di rispondere [#f1]_ in continuazione
+alle stesse identiche (o quasi) domande.
 
-.. warning::
+Il modello di gestione è qualcosa di molto personale e molto più difficile da
+qualificare rispetto a delle semplici regole di codifica, quindi questo
+documento potrebbe avere più o meno a che fare con la realtà.  È cominciato
+come un gioco, ma ciò non significa che non possa essere vero.
+Lo dovrete decidere voi stessi.
 
-    TODO ancora da tradurre
+In ogni caso, quando si parla del "dirigente del kernel", ci si riferisce
+sempre alla persona che dirige tecnicamente, e non a coloro che
+tradizionalmente hanno un ruolo direttivo all'interno delle aziende.  Se vi
+occupate di convalidare acquisti o avete una qualche idea sul budget del vostro
+gruppo, probabilmente non siete un dirigente del kernel.  Quindi i suggerimenti
+qui indicati potrebbero fare al caso vostro, oppure no.
+
+Prima di tutto, suggerirei di acquistare "Le sette regole per avere successo",
+e di non leggerlo. Bruciatelo, è un grande gesto simbolico.
+
+.. [#f1] Questo documento non fa molto per risponde alla domanda, ma rende
+	 così dannatamente ovvio a chi la pone che non abbiamo la minima idea
+	 di come rispondere.
+
+Comunque, partiamo:
+
+.. _it_decisions:
+
+1) Le decisioni
+---------------
+
+Tutti pensano che i dirigenti decidano, e che questo prendere decisioni
+sia importante.  Più grande e dolorosa è la decisione, più importante deve
+essere il dirigente che la prende.  Questo è molto profondo ed ovvio, ma non è
+del tutto vero.
+
+Il gioco consiste nell'"evitare" di dover prendere decisioni.  In particolare
+se qualcuno vi chiede di "Decidere" tra (a) o (b), e vi dice che ha
+davvero bisogno di voi per questo, come dirigenti siete nei guai.
+Le persone che gestite devono conoscere i dettagli più di quanto li conosciate
+voi, quindi se vengono da voi per una decisione tecnica, siete fottuti.
+Non sarete chiaramente competente per prendere quella decisione per loro.
+
+(Corollario: se le persone che gestite non conoscono i dettagli meglio di voi,
+anche in questo caso sarete fregati, tuttavia per altre ragioni.  Ossia state
+facendo il lavoro sbagliato, e che invece dovrebbero essere "loro" a gestirvi)
+
+Quindi il gioco si chiama "evitare" decisioni, almeno le più grandi e
+difficili.  Prendere decisioni piccoli e senza conseguenze va bene, e vi fa
+sembrare competenti in quello che state facendo, quindi quello che un dirigente
+del kernel ha bisogno di fare è trasformare le decisioni grandi e difficili
+in minuzie delle quali nessuno importa.
+
+Ciò aiuta a capire che la differenza chiave tra una grande decisione ed una
+piccola sta nella possibilità di modificare tale decisione in seguito.
+Qualsiasi decisione importante può essere ridotta in decisioni meno importanti,
+ma dovete assicurarvi che possano essere reversibili in caso di errori
+(presenti o futuri).  Improvvisamente, dovrete essere doppiamente dirigenti
+per **due** decisioni non sequenziali - quella sbagliata **e** quella giusta.
+
+E le persone vedranno tutto ciò come prova di vera capacità di comando
+(*cough* cavolata *cough*)
+
+Così la chiave per evitare le decisioni difficili diviene l'evitare
+di fare cose che non possono essere disfatte.  Non infilatevi in un angolo
+dal quale non potrete sfuggire.  Un topo messo all'angolo può rivelarsi
+pericoloso - un dirigente messo all'angolo è solo pietoso.
+
+**In ogni caso** dato che nessuno è stupido al punto da lasciare veramente ad
+un dirigente del kernel un enorme responsabilità, solitamente è facile fare
+marcia indietro. Annullare una decisione è molto facile: semplicemente dite a
+tutti che siete stati degli scemi incompetenti, dite che siete dispiaciuti, ed
+annullate tutto l'inutile lavoro sul quale gli altri hanno lavorato nell'ultimo
+anno.  Improvvisamente la decisione che avevate preso un anno fa non era poi
+così grossa, dato che può essere facilmente annullata.
+
+È emerso che alcune persone hanno dei problemi con questo tipo di approccio,
+questo per due ragioni:
+
+ - ammettere di essere degli idioti è più difficile di quanto sembri.  A tutti
+   noi piace mantenere le apparenze, ed uscire allo scoperto in pubblico per
+   ammettere che ci si è sbagliati è qualcosa di davvero impegnativo.
+ - avere qualcuno che ti dice che ciò su cui hai lavorato nell'ultimo anno
+   non era del tutto valido, può rivelarsi difficile anche per un povero ed
+   umile ingegnere, e mentre il **lavoro** vero era abbastanza facile da
+   cancellare, dall'altro canto potreste aver irrimediabilmente perso la
+   fiducia di quell'ingegnere.  E ricordate che l'"irrevocabile" era quello
+   che avevamo cercato di evitare fin dall'inizio, e la vostra decisione
+   ha finito per esserlo.
+
+Fortunatamente, entrambe queste ragioni posso essere mitigate semplicemente
+ammettendo fin dal principio che non avete una cavolo di idea, dicendo
+agli altri in anticipo che la vostra decisione è puramente ipotetica, e che
+potrebbe essere sbagliata.  Dovreste sempre riservarvi il diritto di cambiare
+la vostra opinione, e rendere gli altri ben **consapevoli** di ciò.
+Ed è molto più facile ammettere di essere stupidi quando non avete **ancora**
+fatto quella cosa stupida.
+
+Poi, quando è realmente emersa la vostra stupidità, le persone semplicemente
+roteeranno gli occhi e diranno "Uffa, no, ancora".
+
+Questa ammissione preventiva di incompetenza potrebbe anche portare le persone
+che stanno facendo il vero lavoro, a pensarci due volte.  Dopo tutto, se
+**loro** non sono certi se sia una buona idea, voi, sicuro come la morte,
+non dovreste incoraggiarli promettendogli che ciò su cui stanno lavorando
+verrà incluso.  Fate si che ci pensino due volte prima che si imbarchino in un
+grosso lavoro.
+
+Ricordate: loro devono sapere più cose sui dettagli rispetto a voi, e
+solitamente pensano di avere già la risposta a tutto. La miglior cosa che
+potete fare in qualità di dirigente è di non instillare troppa fiducia, ma
+invece fornire una salutare dose di pensiero critico su quanto stanno facendo.
+
+Comunque, un altro modo di evitare una decisione è quello di lamentarsi
+malinconicamente dicendo : "non possiamo farli entrambi e basta?" e con uno
+sguardo pietoso.  Fidatevi, funziona.  Se non è chiaro quale sia il miglior
+approccio, lo scopriranno.  La risposta potrebbe essere data dal fatto che
+entrambe i gruppi di lavoro diventano frustati al punto di rinunciarvi.
+
+Questo può suonare come un fallimento, ma di solito questo è un segno che
+c'era qualcosa che non andava in entrambe i progetti, e il motivo per
+il quale le persone coinvolte non abbiano potuto decidere era che entrambe
+sbagliavano.  Voi ne uscirete freschi come una rosa, e avrete evitato un'altra
+decisione con la quale avreste potuto fregarvi.
+
+
+2) Le persone
+-------------
+
+Ci sono molte persone stupide, ed essere un dirigente significa che dovrete
+scendere a patti con questo, e molto più importate, che **loro** devono avere
+a che fare con **voi**.
+
+Ne emerge che mentre è facile annullare degli errori tecnici, non è invece
+così facile rimuovere i disordini della personalità.  Dovrete semplicemente
+convivere con i loro, ed i vostri, problemi.
+
+Comunque, al fine di preparavi in qualità di dirigenti del kernel, è meglio
+ricordare di non abbattere alcun ponte, bombardare alcun paesano innocente,
+o escludere troppi sviluppatori kernel. Ne emerge che escludere le persone
+è piuttosto facile, mentre includerle nuovamente è difficile. Così
+"l'esclusione" immediatamente cade sotto il titolo di "non reversibile", e
+diviene un no-no secondo la sezione :ref:`it_decisions`.
+
+Esistono alcune semplici regole qui:
+
+ (1) non chiamate le persone teste di c*** (al meno, non in pubblico)
+ (2) imparate a scusarvi quando dimenticate la regola (1)
+
+Il problema del punto numero 1 è che è molto facile da rispettare, dato che
+è possibile dire "sei una testa di c***" in milioni di modi differenti [#f2]_,
+a volte senza nemmeno pensarci, e praticamente sempre con la calda convinzione
+di essere nel giusto.
+
+E più convinti sarete che avete ragione (e diciamolo, potete chiamare
+praticamente **tutti** testa di c**, e spesso **sarete** nel giusto), più
+difficile sarà scusarvi successivamente.
+
+Per risolvere questo problema, avete due possibilità:
+
+ - diventare davvero bravi nello scusarsi
+ - essere amabili così che nessuno finirà col sentirsi preso di mira.  Siate
+   creativi abbastanza, e potrebbero esserne divertiti.
+
+L'opzione dell'essere immancabilmente educati non esiste proprio. Nessuno
+si fiderà di qualcuno che chiaramente sta nascondendo il suo vero carattere.
+
+.. [#f2] Paul Simon cantava: "50 modi per lasciare il vostro amante", perché,
+	 molto francamente, "Un milione di modi per dire ad uno sviluppatore
+	 Testa di c***" non avrebbe funzionato. Ma sono sicuro che ci abbia
+	 pensato.
+
+
+3) Le persone II - quelle buone
+-------------------------------
+
+Mentre emerge che la maggior parte delle persone sono stupide, il corollario
+a questo è il triste fatto che anche voi siete fra queste, e che mentre
+possiamo tutti crogiolarci nella sicurezza di essere migliori della media
+delle persone (diciamocelo, nessuno crede di essere nelle media o sotto di
+essa), dovremmo anche ammettere che non siamo il "coltello più affilato" del
+circondario, e che ci saranno altre persone che sono meno stupide di quanto
+lo siete voi.
+
+Molti reagiscono male davanti alle persone intelligenti. Altri le usano a
+proprio vantaggio.
+
+Assicuratevi che voi, in qualità di manutentori del kernel, siate nel secondo
+gruppo. Inchinatevi dinanzi a loro perché saranno le persone che vi renderanno
+il lavoro più facile.  In particolare, prenderanno le decisioni per voi, che è
+l'oggetto di questo gioco.
+
+Quindi quando trovate qualcuno più sveglio di voi, prendetevela comoda.
+Le vostre responsabilità dirigenziali si ridurranno in gran parte nel dire
+"Sembra una buona idea - Vai", oppure "Sembra buono, ma invece circa questo e
+quello?".  La seconda versione in particolare è una gran modo per imparare
+qualcosa di nuovo circa "questo e quello" o di sembrare **extra** dirigenziali
+sottolineando qualcosa alla quale i più svegli non avevano pensato.  In
+entrambe i casi, vincete.
+
+Una cosa alla quale dovete fare attenzione è che l'essere grandi in qualcosa
+non si traduce automaticamente nell'essere grandi anche in altre cose.  Quindi
+dovreste dare una spintarella alle persone in una specifica direzione, ma
+diciamocelo, potrebbero essere bravi in ciò che fanno e far schifo in tutto
+il resto.  La buona notizia è che le persone tendono a gravitare attorno a ciò
+in cui sono bravi, quindi non state facendo nulla di irreversibile quando li
+spingete verso una certa direzione, solo non spingete troppo.
+
+
+4) Addossare le colpe
+---------------------
+
+Le cose andranno male, e le persone vogliono qualcuno da incolpare. Sarete voi.
+
+Non è poi così difficile accettare la colpa, specialmente se le persone
+riescono a capire che non era **tutta** colpa vostra.  Il che ci porta
+sulla miglior strada per assumersi la colpa: fatelo per qualcun'altro.
+Vi sentirete bene nel assumervi la responsabilità, e loro si sentiranno
+bene nel non essere incolpati, e coloro che hanno perso i loro 36GB di
+pornografia a causa della vostra incompetenza ammetteranno a malincuore che
+almeno non avete cercato di fare il furbetto.
+
+Successivamente fate in modo che gli sviluppatori che in realtà hanno fallito
+(se riuscite a trovarli) sappiano **in privato** che sono "fottuti".
+Questo non per fargli sapere che la prossima volta possono evitarselo ma per
+fargli capire che sono in debito.  E, forse cosa più importante, sono loro che
+devono sistemare la cosa.  Perché, ammettiamolo, è sicuro non sarete voi a
+farlo.
+
+Assumersi la colpa è anche ciò che vi rendere dirigenti in prima battuta.
+È parte di ciò che spinge gli altri a fidarsi di voi, e vi garantisce
+la gloria potenziale, perché siete gli unici a dire "Ho fatto una cavolata".
+E se avete seguito le regole precedenti, sarete decisamente bravi nel dirlo.
+
+
+5) Le cose da evitare
+---------------------
+
+Esiste una cosa che le persone odiano più che essere chiamate "teste di c****",
+ed è essere chiamate "teste di c****" con fare da bigotto.  Se per il primo
+caso potrete comunque scusarvi, per il secondo non ve ne verrà data nemmeno
+l'opportunità.  Probabilmente smetteranno di ascoltarvi anche se tutto sommato
+state svolgendo un buon lavoro.
+
+Tutti crediamo di essere migliori degli altri, il che significa che quando
+qualcuno inizia a darsi delle arie, ci da **davvero** fastidio.  Potreste anche
+essere moralmente ed intellettualmente superiore a tutti quelli attorno a voi,
+ma non cercate di renderlo ovvio per gli altri a meno che non **vogliate**
+veramente far arrabbiare qualcuno [#f3]_.
+
+Allo stesso modo evitate di essere troppo gentili e pacati.  Le buone maniere
+facilmente finiscono per strabordare e nascondere i problemi, e come si usa
+dire, "su internet nessuno può sentire la vostra pacatezza".  Usate argomenti
+diretti per farvi capire, non potete sperare che la gente capisca in altro
+modo.
+
+Un po' di umorismo può aiutare a smorzare sia la franchezza che la moralità.
+Andare oltre i limiti al punto d'essere ridicolo può portare dei punti a casa
+senza renderlo spiacevole per i riceventi, i quali penseranno che stavate
+facendo gli scemi.  Può anche aiutare a lasciare andare quei blocchi mentali
+che abbiamo nei confronti delle critiche.
+
+.. [#f3] Suggerimento: i forum di discussione su internet, che non sono
+  collegati col vostro lavoro, sono ottimi modi per sfogare la frustrazione
+  verso altre persone. Di tanto in tanto scrivete messaggi offensivi col ghigno
+  in faccia per infiammare qualche discussione: vi sentirete purificati. Solo
+  cercate di non cagare troppo vicino a casa.
+
+6) Perché io?
+-------------
+
+Dato che la vostra responsabilità principale è quella di prendervi le colpe
+d'altri, e rendere dolorosamente ovvio a tutti che siete degli incompetenti,
+la domanda naturale che ne segue sarà : perché dovrei fare tutto ciò?
+
+Innanzitutto, potreste diventare o no popolari al punto da avere la fila di
+ragazzine (o ragazzini, evitiamo pregiudizi o sessismo) che gridano e bussano
+alla porta del vostro camerino, ma comunque **proverete** un immenso senso di
+realizzazione personale dall'essere "in carica".  Dimenticate il fatto che voi
+state discutendo con tutti e che cercate di inseguirli il più velocemente che
+potete. Tutti continueranno a pensare che voi siete la persona in carica.
+
+È un bel lavoro se riuscite ad adattarlo a voi.
diff --git a/Documentation/translations/it_IT/process/submit-checklist.rst b/Documentation/translations/it_IT/process/submit-checklist.rst
index 995ee69fab11..3e575502690f 100644
--- a/Documentation/translations/it_IT/process/submit-checklist.rst
+++ b/Documentation/translations/it_IT/process/submit-checklist.rst
@@ -117,7 +117,7 @@ sottomissione delle patch, in particolare
     sorgenti che ne spieghi la logica: cosa fanno e perché.
 
 25) Se la patch aggiunge nuove chiamate ioctl, allora aggiornate
-    ``Documentation/ioctl/ioctl-number.rst``.
+    ``Documentation/userspace-api/ioctl/ioctl-number.rst``.
 
 26) Se il codice che avete modificato dipende o usa una qualsiasi interfaccia o
     funzionalità del kernel che è associata a uno dei seguenti simboli
diff --git a/Documentation/translations/it_IT/riscv/patch-acceptance.rst b/Documentation/translations/it_IT/riscv/patch-acceptance.rst
new file mode 100644
index 000000000000..edf67252b3fb
--- /dev/null
+++ b/Documentation/translations/it_IT/riscv/patch-acceptance.rst
@@ -0,0 +1,40 @@
+.. include:: ../disclaimer-ita.rst
+
+:Original: :doc:`../../../riscv/patch-acceptance`
+:Translator: Federico Vaga <federico.vaga@vaga.pv.it>
+
+arch/riscv linee guida alla manutenzione per gli sviluppatori
+=============================================================
+
+Introduzione
+------------
+
+L'insieme di istruzioni RISC-V sono sviluppate in modo aperto: le
+bozze in fase di sviluppo sono disponibili a tutti per essere
+revisionate e per essere sperimentare nelle implementazioni.  Le bozze
+dei nuovi moduli o estensioni possono cambiare in fase di sviluppo - a
+volte in modo incompatibile rispetto a bozze precedenti.  Questa
+flessibilità può portare a dei problemi di manutenzioni per il
+supporto RISC-V nel kernel Linux. I manutentori Linux non amano
+l'abbandono del codice, e il processo di sviluppo del kernel
+preferisce codice ben revisionato e testato rispetto a quello
+sperimentale.  Desideriamo estendere questi stessi principi al codice
+relativo all'architettura RISC-V che verrà accettato per l'inclusione
+nel kernel.
+
+In aggiunta alla lista delle verifiche da fare prima di inviare una patch
+-------------------------------------------------------------------------
+
+Accetteremo le patch per un nuovo modulo o estensione se la fondazione
+RISC-V li classifica come "Frozen" o "Retified".  (Ovviamente, gli
+sviluppatori sono liberi di mantenere una copia del kernel Linux
+contenente il codice per una bozza di estensione).
+
+In aggiunta, la specifica RISC-V permette agli implementatori di
+creare le proprie estensioni.  Queste estensioni non passano
+attraverso il processo di revisione della fondazione RISC-V.  Per
+questo motivo, al fine di evitare complicazioni o problemi di
+prestazioni, accetteremo patch solo per quelle estensioni che sono
+state ufficialmente accettate dalla fondazione RISC-V.  (Ovviamente,
+gli implementatori sono liberi di mantenere una copia del kernel Linux
+contenente il codice per queste specifiche estensioni).
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 2e831ece6e26..e50fe6541335 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -641,7 +641,7 @@ P 는 짝수 번호 캐시 라인에 저장되어 있고, 변수 B 는 홀수 �
 리눅스 커널이 지원하는 CPU 들은 (1) 쓰기가 정말로 일어날지, (2) 쓰기가 어디에
 이루어질지, 그리고 (3) 쓰여질 값을 확실히 알기 전까지는 쓰기를 수행하지 않기
 때문입니다.  하지만 "컨트롤 의존성" 섹션과
-Documentation/RCU/rcu_dereference.txt 파일을 주의 깊게 읽어 주시기 바랍니다:
+Documentation/RCU/rcu_dereference.rst 파일을 주의 깊게 읽어 주시기 바랍니다:
 컴파일러는 매우 창의적인 많은 방법으로 종속성을 깰 수 있습니다.
 
 	CPU 1		      CPU 2
diff --git a/Documentation/translations/zh_CN/IRQ.txt b/Documentation/translations/zh_CN/IRQ.txt
index 956026d5cf82..9aec8dca4fcf 100644
--- a/Documentation/translations/zh_CN/IRQ.txt
+++ b/Documentation/translations/zh_CN/IRQ.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/IRQ.txt
+Chinese translated version of Documentation/core-api/irq/index.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
 Maintainer: Eric W. Biederman <ebiederman@xmission.com>
 Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
 ---------------------------------------------------------------------
-Documentation/IRQ.txt 的中文翻译
+Documentation/core-api/irq/index.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/filesystems/debugfs.rst b/Documentation/translations/zh_CN/filesystems/debugfs.rst
new file mode 100644
index 000000000000..f8a28793c277
--- /dev/null
+++ b/Documentation/translations/zh_CN/filesystems/debugfs.rst
@@ -0,0 +1,221 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: :ref:`Documentation/filesystems/debugfs.txt <debugfs_index>`
+
+=======
+Debugfs
+=======
+
+译者
+::
+
+	中文版维护者： 罗楚成 Chucheng Luo <luochucheng@vivo.com>
+	中文版翻译者： 罗楚成 Chucheng Luo <luochucheng@vivo.com>
+	中文版校译者:  罗楚成 Chucheng Luo <luochucheng@vivo.com>
+
+
+
+版权所有2020 罗楚成 <luochucheng@vivo.com>
+
+
+Debugfs是内核开发人员在用户空间获取信息的简单方法。与/proc不同，proc只提供进程
+信息。也不像sysfs,具有严格的“每个文件一个值“的规则。debugfs根本没有规则,开发
+人员可以在这里放置他们想要的任何信息。debugfs文件系统也不能用作稳定的ABI接口。
+从理论上讲，debugfs导出文件的时候没有任何约束。但是[1]实际情况并不总是那么
+简单。即使是debugfs接口，也最好根据需要进行设计,并尽量保持接口不变。
+
+
+Debugfs通常使用以下命令安装::
+
+    mount -t debugfs none /sys/kernel/debug
+
+（或等效的/etc/fstab行）。
+debugfs根目录默认仅可由root用户访问。要更改对文件树的访问，请使用“ uid”，“ gid”
+和“ mode”挂载选项。请注意，debugfs API仅按照GPL协议导出到模块。
+
+使用debugfs的代码应包含<linux/debugfs.h>。然后，首先是创建至少一个目录来保存
+一组debugfs文件::
+
+    struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
+
+如果成功，此调用将在指定的父目录下创建一个名为name的目录。如果parent参数为空，
+则会在debugfs根目录中创建。创建目录成功时，返回值是一个指向dentry结构体的指针。
+该dentry结构体的指针可用于在目录中创建文件（以及最后将其清理干净）。ERR_PTR
+（-ERROR）返回值表明出错。如果返回ERR_PTR（-ENODEV），则表明内核是在没有debugfs
+支持的情况下构建的，并且下述函数都不会起作用。
+
+在debugfs目录中创建文件的最通用方法是::
+
+    struct dentry *debugfs_create_file(const char *name, umode_t mode,
+				       struct dentry *parent, void *data,
+				       const struct file_operations *fops);
+
+在这里，name是要创建的文件的名称，mode描述了访问文件应具有的权限，parent指向
+应该保存文件的目录，data将存储在产生的inode结构体的i_private字段中，而fops是
+一组文件操作函数，这些函数中实现文件操作的具体行为。至少，read（）和/或
+write（）操作应提供；其他可以根据需要包括在内。同样的，返回值将是指向创建文件
+的dentry指针，错误时返回ERR_PTR（-ERROR），系统不支持debugfs时返回值为ERR_PTR
+（-ENODEV）。创建一个初始大小的文件，可以使用以下函数代替::
+
+    struct dentry *debugfs_create_file_size(const char *name, umode_t mode,
+				struct dentry *parent, void *data,
+				const struct file_operations *fops,
+				loff_t file_size);
+
+file_size是初始文件大小。其他参数跟函数debugfs_create_file的相同。
+
+在许多情况下，没必要自己去创建一组文件操作;对于一些简单的情况,debugfs代码提供
+了许多帮助函数。包含单个整数值的文件可以使用以下任何一项创建::
+
+    void debugfs_create_u8(const char *name, umode_t mode,
+			   struct dentry *parent, u8 *value);
+    void debugfs_create_u16(const char *name, umode_t mode,
+			    struct dentry *parent, u16 *value);
+    struct dentry *debugfs_create_u32(const char *name, umode_t mode,
+				      struct dentry *parent, u32 *value);
+    void debugfs_create_u64(const char *name, umode_t mode,
+			    struct dentry *parent, u64 *value);
+
+这些文件支持读取和写入给定值。如果某个文件不支持写入，只需根据需要设置mode
+参数位。这些文件中的值以十进制表示；如果需要使用十六进制，可以使用以下函数
+替代::
+
+    void debugfs_create_x8(const char *name, umode_t mode,
+			   struct dentry *parent, u8 *value);
+    void debugfs_create_x16(const char *name, umode_t mode,
+			    struct dentry *parent, u16 *value);
+    void debugfs_create_x32(const char *name, umode_t mode,
+			    struct dentry *parent, u32 *value);
+    void debugfs_create_x64(const char *name, umode_t mode,
+			    struct dentry *parent, u64 *value);
+
+这些功能只有在开发人员知道导出值的大小的时候才有用。某些数据类型在不同的架构上
+有不同的宽度，这样会使情况变得有些复杂。在这种特殊情况下可以使用以下函数::
+
+    void debugfs_create_size_t(const char *name, umode_t mode,
+			       struct dentry *parent, size_t *value);
+
+不出所料，此函数将创建一个debugfs文件来表示类型为size_t的变量。
+
+同样地，也有导出无符号长整型变量的函数，分别以十进制和十六进制表示如下::
+
+    struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
+					struct dentry *parent,
+					unsigned long *value);
+    void debugfs_create_xul(const char *name, umode_t mode,
+			    struct dentry *parent, unsigned long *value);
+
+布尔值可以通过以下方式放置在debugfs中::
+
+    struct dentry *debugfs_create_bool(const char *name, umode_t mode,
+				       struct dentry *parent, bool *value);
+
+
+读取结果文件将产生Y（对于非零值）或N，后跟换行符写入的时候，它只接受大写或小写
+值或1或0。任何其他输入将被忽略。
+
+同样，atomic_t类型的值也可以放置在debugfs中::
+
+    void debugfs_create_atomic_t(const char *name, umode_t mode,
+				 struct dentry *parent, atomic_t *value)
+
+读取此文件将获得atomic_t值，写入此文件将设置atomic_t值。
+
+另一个选择是通过以下结构体和函数导出一个任意二进制数据块::
+
+    struct debugfs_blob_wrapper {
+	void *data;
+	unsigned long size;
+    };
+
+    struct dentry *debugfs_create_blob(const char *name, umode_t mode,
+				       struct dentry *parent,
+				       struct debugfs_blob_wrapper *blob);
+
+读取此文件将返回由指针指向debugfs_blob_wrapper结构体的数据。一些驱动使用“blobs”
+作为一种返回几行（静态）格式化文本的简单方法。这个函数可用于导出二进制信息，但
+似乎在主线中没有任何代码这样做。请注意，使用debugfs_create_blob（）命令创建的
+所有文件是只读的。
+
+如果您要转储一个寄存器块（在开发过程中经常会这么做，但是这样的调试代码很少上传
+到主线中。Debugfs提供两个函数：一个用于创建仅寄存器文件，另一个把一个寄存器块
+插入一个顺序文件中::
+
+    struct debugfs_reg32 {
+	char *name;
+	unsigned long offset;
+    };
+
+    struct debugfs_regset32 {
+	struct debugfs_reg32 *regs;
+	int nregs;
+	void __iomem *base;
+    };
+
+    struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
+				     struct dentry *parent,
+				     struct debugfs_regset32 *regset);
+
+    void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+			 int nregs, void __iomem *base, char *prefix);
+
+“base”参数可能为0，但您可能需要使用__stringify构建reg32数组，实际上有许多寄存器
+名称（宏）是寄存器块在基址上的字节偏移量。
+
+如果要在debugfs中转储u32数组，可以使用以下函数创建文件::
+
+     void debugfs_create_u32_array(const char *name, umode_t mode,
+			struct dentry *parent,
+			u32 *array, u32 elements);
+
+“array”参数提供数据，而“elements”参数为数组中元素的数量。注意：数组创建后，数组
+大小无法更改。
+
+有一个函数来创建与设备相关的seq_file::
+
+   struct dentry *debugfs_create_devm_seqfile(struct device *dev,
+				const char *name,
+				struct dentry *parent,
+				int (*read_fn)(struct seq_file *s,
+					void *data));
+
+“dev”参数是与此debugfs文件相关的设备，并且“read_fn”是一个函数指针，这个函数在
+打印seq_file内容的时候被回调。
+
+还有一些其他的面向目录的函数::
+
+    struct dentry *debugfs_rename(struct dentry *old_dir,
+		                  struct dentry *old_dentry,
+		                  struct dentry *new_dir,
+				  const char *new_name);
+
+    struct dentry *debugfs_create_symlink(const char *name,
+                                          struct dentry *parent,
+                                          const char *target);
+
+调用debugfs_rename()将为现有的debugfs文件重命名，可能同时切换目录。 new_name
+函数调用之前不能存在；返回值为old_dentry，其中包含更新的信息。可以使用
+debugfs_create_symlink（）创建符号链接。
+
+所有debugfs用户必须考虑的一件事是：
+
+debugfs不会自动清除在其中创建的任何目录。如果一个模块在不显式删除debugfs目录的
+情况下卸载模块，结果将会遗留很多野指针，从而导致系统不稳定。因此，所有debugfs
+用户-至少是那些可以作为模块构建的用户-必须做模块卸载的时候准备删除在此创建的
+所有文件和目录。一份文件可以通过以下方式删除::
+
+    void debugfs_remove(struct dentry *dentry);
+
+dentry值可以为NULL或错误值，在这种情况下，不会有任何文件被删除。
+
+很久以前，内核开发者使用debugfs时需要记录他们创建的每个dentry指针，以便最后所有
+文件都可以被清理掉。但是，现在debugfs用户能调用以下函数递归清除之前创建的文件::
+
+    void debugfs_remove_recursive(struct dentry *dentry);
+
+如果将对应顶层目录的dentry传递给以上函数，则该目录下的整个层次结构将会被删除。
+
+注释：
+[1] http://lwn.net/Articles/309298/
diff --git a/Documentation/translations/zh_CN/filesystems/index.rst b/Documentation/translations/zh_CN/filesystems/index.rst
index 14f155edaf69..186501d13bc1 100644
--- a/Documentation/translations/zh_CN/filesystems/index.rst
+++ b/Documentation/translations/zh_CN/filesystems/index.rst
@@ -24,4 +24,5 @@ Linux Kernel中的文件系统
    :maxdepth: 2
 
    virtiofs
+   debugfs
 
diff --git a/Documentation/translations/zh_CN/filesystems/sysfs.txt b/Documentation/translations/zh_CN/filesystems/sysfs.txt
index ee1f37da5b23..fcf620049d11 100644
--- a/Documentation/translations/zh_CN/filesystems/sysfs.txt
+++ b/Documentation/translations/zh_CN/filesystems/sysfs.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/filesystems/sysfs.txt
+Chinese translated version of Documentation/filesystems/sysfs.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -10,7 +10,7 @@ Maintainer: Patrick Mochel	<mochel@osdl.org>
 		Mike Murphy <mamurph@cs.clemson.edu>
 Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
 ---------------------------------------------------------------------
-Documentation/filesystems/sysfs.txt 的中文翻译
+Documentation/filesystems/sysfs.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
@@ -40,7 +40,7 @@ sysfs 是一个最初基于 ramfs 且位于内存的文件系统。它提供导�
 数据结构及其属性，以及它们之间的关联到用户空间的方法。
 
 sysfs 始终与 kobject 的底层结构紧密相关。请阅读
-Documentation/kobject.txt 文档以获得更多关于 kobject 接口的
+Documentation/core-api/kobject.rst 文档以获得更多关于 kobject 接口的
 信息。
 
 
@@ -281,7 +281,7 @@ drivers/ 包含了每个已为特定总线上的设备而挂载的驱动程序�
 假定驱动没有跨越多个总线类型)。
 
 fs/ 包含了一个为文件系统设立的目录。现在每个想要导出属性的文件系统必须
-在 fs/ 下创建自己的层次结构(参见Documentation/filesystems/fuse.txt)。
+在 fs/ 下创建自己的层次结构(参见Documentation/filesystems/fuse.rst)。
 
 dev/ 包含两个子目录： char/ 和 block/。在这两个子目录中，有以
 <major>:<minor> 格式命名的符号链接。这些符号链接指向 sysfs 目录
diff --git a/Documentation/translations/zh_CN/process/submit-checklist.rst b/Documentation/translations/zh_CN/process/submit-checklist.rst
index 8738c55e42a2..50386e0e42e7 100644
--- a/Documentation/translations/zh_CN/process/submit-checklist.rst
+++ b/Documentation/translations/zh_CN/process/submit-checklist.rst
@@ -97,7 +97,7 @@ Linux内核补丁提交清单
 24) 所有内存屏障例如 ``barrier()``, ``rmb()``, ``wmb()`` 都需要源代码中的注
     释来解释它们正在执行的操作及其原因的逻辑。
 
-25) 如果补丁添加了任何ioctl，那么也要更新 ``Documentation/ioctl/ioctl-number.rst``
+25) 如果补丁添加了任何ioctl，那么也要更新 ``Documentation/userspace-api/ioctl/ioctl-number.rst``
 
 26) 如果修改后的源代码依赖或使用与以下 ``Kconfig`` 符号相关的任何内核API或
     功能，则在禁用相关 ``Kconfig`` 符号和/或 ``=m`` （如果该选项可用）的情况
diff --git a/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt b/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt
index 9c39ee58ea50..a96abcdec777 100644
--- a/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt
+++ b/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt
@@ -488,7 +488,7 @@ struct v4l2_subdev *sd = v4l2_i2c_new_subdev(v4l2_dev, adapter,
 
 这个函数会加载给定的模块（如果没有模块需要加载，可以为 NULL），
 并用给定的 i2c 适配器结构体指针（i2c_adapter）和 器件地址（chip/address）
-作为参数调用 i2c_new_device()。如果一切顺利，则就在 v4l2_device
+作为参数调用 i2c_new_client_device()。如果一切顺利，则就在 v4l2_device
 中注册了子设备。
 
 你也可以利用 v4l2_i2c_new_subdev()的最后一个参数，传递一个可能的
diff --git a/Documentation/usb/gadget_configfs.rst b/Documentation/usb/gadget_configfs.rst
index 54fb08baae22..158e48dab586 100644
--- a/Documentation/usb/gadget_configfs.rst
+++ b/Documentation/usb/gadget_configfs.rst
@@ -24,7 +24,7 @@ Linux provides a number of functions for gadgets to use.
 Creating a gadget means deciding what configurations there will be
 and which functions each configuration will provide.
 
-Configfs (please see `Documentation/filesystems/configfs/*`) lends itself nicely
+Configfs (please see `Documentation/filesystems/configfs.rst`) lends itself nicely
 for the purpose of telling the kernel about the above mentioned decision.
 This document is about how to do it.
 
@@ -354,7 +354,7 @@ the directories in general can be named at will. A group can have
 a number of its default sub-groups created automatically.
 
 For more information on configfs please see
-`Documentation/filesystems/configfs/*`.
+`Documentation/filesystems/configfs.rst`.
 
 The concepts described above translate to USB gadgets like this:
 
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index f759edafd938..52bf58417653 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -146,6 +146,7 @@ Code  Seq#    Include File                                           Comments
 'H'   40-4F  sound/hdspm.h                                           conflict!
 'H'   40-4F  sound/hdsp.h                                            conflict!
 'H'   90     sound/usb/usx2y/usb_stream.h
+'H'   00-0F  uapi/misc/habanalabs.h                                  conflict!
 'H'   A0     uapi/linux/usb/cdc-wdm.h
 'H'   C0-F0  net/bluetooth/hci.h                                     conflict!
 'H'   C0-DF  net/bluetooth/hidp/hidp.h                               conflict!
diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst
index c3129b9ba5cb..57c01f531e61 100644
--- a/Documentation/virt/kvm/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
@@ -74,7 +74,7 @@ should point to a file descriptor that is opened on the ``/dev/sev``
 device, if needed (see individual commands).
 
 On output, ``error`` is zero on success, or an error code.  Error codes
-are defined in ``<linux/psp-dev.h>`.
+are defined in ``<linux/psp-dev.h>``.
 
 KVM implements the following commands to support common lifecycle events of SEV
 guests, such as launching, running, snapshotting, migrating and decommissioning.
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index efbbe570aa9b..426f94582b7a 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -2572,13 +2572,15 @@ list in 4.68.
 :Parameters: None
 :Returns: 0 on success, -1 on error
 
-This signals to the host kernel that the specified guest is being paused by
-userspace.  The host will set a flag in the pvclock structure that is checked
-from the soft lockup watchdog.  The flag is part of the pvclock structure that
-is shared between guest and host, specifically the second bit of the flags
+This ioctl sets a flag accessible to the guest indicating that the specified
+vCPU has been paused by the host userspace.
+
+The host will set a flag in the pvclock structure that is checked from the
+soft lockup watchdog.  The flag is part of the pvclock structure that is
+shared between guest and host, specifically the second bit of the flags
 field of the pvclock_vcpu_time_info structure.  It will be set exclusively by
 the host and read/cleared exclusively by the guest.  The guest operation of
-checking and clearing the flag must an atomic operation so
+checking and clearing the flag must be an atomic operation so
 load-link/store-conditional, or equivalent must be used.  There are two cases
 where the guest will clear the flag: when the soft lockup watchdog timer resets
 itself or when a soft lockup is detected.  This ioctl can be called any time
@@ -4334,9 +4336,13 @@ Errors:
   #define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE	0x00000001
   #define KVM_STATE_NESTED_VMX_SMM_VMXON	0x00000002
 
+#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
+
   struct kvm_vmx_nested_state_hdr {
+	__u32 flags;
 	__u64 vmxon_pa;
 	__u64 vmcs12_pa;
+	__u64 preemption_timer_deadline;
 
 	struct {
 		__u16 flags;
@@ -5066,10 +5072,13 @@ EOI was received.
 		struct kvm_hyperv_exit {
   #define KVM_EXIT_HYPERV_SYNIC          1
   #define KVM_EXIT_HYPERV_HCALL          2
+  #define KVM_EXIT_HYPERV_SYNDBG         3
 			__u32 type;
+			__u32 pad1;
 			union {
 				struct {
 					__u32 msr;
+					__u32 pad2;
 					__u64 control;
 					__u64 evt_page;
 					__u64 msg_page;
@@ -5079,6 +5088,15 @@ EOI was received.
 					__u64 result;
 					__u64 params[2];
 				} hcall;
+				struct {
+					__u32 msr;
+					__u32 pad2;
+					__u64 control;
+					__u64 status;
+					__u64 send_page;
+					__u64 recv_page;
+					__u64 pending_page;
+				} syndbg;
 			} u;
 		};
 		/* KVM_EXIT_HYPERV */
@@ -5095,6 +5113,12 @@ Hyper-V SynIC state change. Notification is used to remap SynIC
 event/message pages and to enable/disable SynIC messages/events processing
 in userspace.
 
+	- KVM_EXIT_HYPERV_SYNDBG -- synchronously notify user-space about
+
+Hyper-V Synthetic debugger state change. Notification is used to either update
+the pending_page location or to send a control command (send the buffer located
+in send_page or recv a buffer to recv_page).
+
 ::
 
 		/* KVM_EXIT_ARM_NISV */
@@ -5777,7 +5801,7 @@ will be initialized to 1 when created.  This also improves performance because
 dirty logging can be enabled gradually in small chunks on the first call
 to KVM_CLEAR_DIRTY_LOG.  KVM_DIRTY_LOG_INITIALLY_SET depends on
 KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
-x86 for now).
+x86 and arm64 for now).
 
 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
@@ -5802,6 +5826,23 @@ If present, this capability can be enabled for a VM, meaning that KVM
 will allow the transition to secure guest mode.  Otherwise KVM will
 veto the transition.
 
+7.20 KVM_CAP_HALT_POLL
+----------------------
+
+:Architectures: all
+:Target: VM
+:Parameters: args[0] is the maximum poll time in nanoseconds
+:Returns: 0 on success; -1 on error
+
+This capability overrides the kvm module parameter halt_poll_ns for the
+target VM.
+
+VCPU polling allows a VCPU to poll for wakeup events instead of immediately
+scheduling during guest halts. The maximum time a VCPU can spend polling is
+controlled by the kvm module parameter halt_poll_ns. This capability allows
+the maximum halt time to specified on a per-VM basis, effectively overriding
+the module parameter for the target VM.
+
 8. Other capabilities.
 ======================
 
diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst
index 2357dd2d8655..687b60d76ca9 100644
--- a/Documentation/virt/kvm/arm/pvtime.rst
+++ b/Documentation/virt/kvm/arm/pvtime.rst
@@ -76,5 +76,5 @@ It is advisable that one or more 64k pages are set aside for the purpose of
 these structures and not used for other purposes, this enables the guest to map
 the region using 64k pages and avoids conflicting attributes with other memory.
 
-For the user space interface see Documentation/virt/kvm/devices/vcpu.txt
+For the user space interface see Documentation/virt/kvm/devices/vcpu.rst
 section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL".
diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst
index 01b081f6e7ea..a7dff9186bed 100644
--- a/Documentation/virt/kvm/cpuid.rst
+++ b/Documentation/virt/kvm/cpuid.rst
@@ -50,8 +50,8 @@ KVM_FEATURE_NOP_IO_DELAY          1           not necessary to perform delays
 KVM_FEATURE_MMU_OP                2           deprecated
 
 KVM_FEATURE_CLOCKSOURCE2          3           kvmclock available at msrs
-
                                               0x4b564d00 and 0x4b564d01
+
 KVM_FEATURE_ASYNC_PF              4           async pf can be enabled by
                                               writing to msr 0x4b564d02
 
@@ -86,6 +86,12 @@ KVM_FEATURE_PV_SCHED_YIELD        13          guest checks this feature bit
                                               before using paravirtualized
                                               sched yield.
 
+KVM_FEATURE_ASYNC_PF_INT          14          guest checks this feature bit
+                                              before using the second async
+                                              pf control msr 0x4b564d06 and
+                                              async pf acknowledgment msr
+                                              0x4b564d07.
+
 KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24          host will warn if no guest-side
                                               per-cpu warps are expeced in
                                               kvmclock
diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
index 9963e680770a..ca374d3fe085 100644
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -110,5 +110,5 @@ Returns:
 
 Specifies the base address of the stolen time structure for this VCPU. The
 base address must be 64 byte aligned and exist within a valid guest memory
-region. See Documentation/virt/kvm/arm/pvtime.txt for more information
+region. See Documentation/virt/kvm/arm/pvtime.rst for more information
 including the layout of the stolen time structure.
diff --git a/Documentation/virt/kvm/hypercalls.rst b/Documentation/virt/kvm/hypercalls.rst
index dbaf207e560d..ed4fddd364ea 100644
--- a/Documentation/virt/kvm/hypercalls.rst
+++ b/Documentation/virt/kvm/hypercalls.rst
@@ -22,7 +22,7 @@ S390:
   number in R1.
 
   For further information on the S390 diagnose call as supported by KVM,
-  refer to Documentation/virt/kvm/s390-diag.txt.
+  refer to Documentation/virt/kvm/s390-diag.rst.
 
 PowerPC:
   It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
@@ -30,7 +30,7 @@ PowerPC:
 
   KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions'
   property inside the device tree's /hypervisor node.
-  For more information refer to Documentation/virt/kvm/ppc-pv.txt
+  For more information refer to Documentation/virt/kvm/ppc-pv.rst
 
 MIPS:
   KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall
diff --git a/Documentation/virt/kvm/mmu.rst b/Documentation/virt/kvm/mmu.rst
index 60981887d20b..46126ecc70f7 100644
--- a/Documentation/virt/kvm/mmu.rst
+++ b/Documentation/virt/kvm/mmu.rst
@@ -319,7 +319,7 @@ Handling a page fault is performed as follows:
 
  - If both P bit and R/W bit of error code are set, this could possibly
    be handled as a "fast page fault" (fixed without taking the MMU lock).  See
-   the description in Documentation/virt/kvm/locking.txt.
+   the description in Documentation/virt/kvm/locking.rst.
 
  - if needed, walk the guest page tables to determine the guest translation
    (gva->gpa or ngpa->gpa)
diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst
index 33892036672d..e37a14c323d2 100644
--- a/Documentation/virt/kvm/msr.rst
+++ b/Documentation/virt/kvm/msr.rst
@@ -190,41 +190,72 @@ MSR_KVM_ASYNC_PF_EN:
 	0x4b564d02
 
 data:
-	Bits 63-6 hold 64-byte aligned physical address of a
-	64 byte memory area which must be in guest RAM and must be
-	zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
-	when asynchronous page faults are enabled on the vcpu 0 when
-	disabled. Bit 1 is 1 if asynchronous page faults can be injected
-	when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
-	are delivered to L1 as #PF vmexits.  Bit 2 can be set only if
-	KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
-
-	First 4 byte of 64 byte memory location will be written to by
-	the hypervisor at the time of asynchronous page fault (APF)
-	injection to indicate type of asynchronous page fault. Value
-	of 1 means that the page referred to by the page fault is not
-	present. Value 2 means that the page is now available. Disabling
-	interrupt inhibits APFs. Guest must not enable interrupt
-	before the reason is read, or it may be overwritten by another
-	APF. Since APF uses the same exception vector as regular page
-	fault guest must reset the reason to 0 before it does
-	something that can generate normal page fault.  If during page
-	fault APF reason is 0 it means that this is regular page
-	fault.
-
-	During delivery of type 1 APF cr2 contains a token that will
-	be used to notify a guest when missing page becomes
-	available. When page becomes available type 2 APF is sent with
-	cr2 set to the token associated with the page. There is special
-	kind of token 0xffffffff which tells vcpu that it should wake
-	up all processes waiting for APFs and no individual type 2 APFs
-	will be sent.
+	Asynchronous page fault (APF) control MSR.
+
+	Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area
+	which must be in guest RAM and must be zeroed. This memory is expected
+	to hold a copy of the following structure::
+
+	  struct kvm_vcpu_pv_apf_data {
+		/* Used for 'page not present' events delivered via #PF */
+		__u32 flags;
+
+		/* Used for 'page ready' events delivered via interrupt notification */
+		__u32 token;
+
+		__u8 pad[56];
+		__u32 enabled;
+	  };
+
+	Bits 5-4 of the MSR are reserved and should be zero. Bit 0 is set to 1
+	when asynchronous page faults are enabled on the vcpu, 0 when disabled.
+	Bit 1 is 1 if asynchronous page faults can be injected when vcpu is in
+	cpl == 0. Bit 2 is 1 if asynchronous page faults are delivered to L1 as
+	#PF vmexits.  Bit 2 can be set only if KVM_FEATURE_ASYNC_PF_VMEXIT is
+	present in CPUID. Bit 3 enables interrupt based delivery of 'page ready'
+	events. Bit 3 can only be set if KVM_FEATURE_ASYNC_PF_INT is present in
+	CPUID.
+
+	'Page not present' events are currently always delivered as synthetic
+	#PF exception. During delivery of these events APF CR2 register contains
+	a token that will be used to notify the guest when missing page becomes
+	available. Also, to make it possible to distinguish between real #PF and
+	APF, first 4 bytes of 64 byte memory location ('flags') will be written
+	to by the hypervisor at the time of injection. Only first bit of 'flags'
+	is currently supported, when set, it indicates that the guest is dealing
+	with asynchronous 'page not present' event. If during a page fault APF
+	'flags' is '0' it means that this is regular page fault. Guest is
+	supposed to clear 'flags' when it is done handling #PF exception so the
+	next event can be delivered.
+
+	Note, since APF 'page not present' events use the same exception vector
+	as regular page fault, guest must reset 'flags' to '0' before it does
+	something that can generate normal page fault.
+
+	Bytes 5-7 of 64 byte memory location ('token') will be written to by the
+	hypervisor at the time of APF 'page ready' event injection. The content
+	of these bytes is a token which was previously delivered as 'page not
+	present' event. The event indicates the page in now available. Guest is
+	supposed to write '0' to 'token' when it is done handling 'page ready'
+	event and to write 1' to MSR_KVM_ASYNC_PF_ACK after clearing the location;
+	writing to the MSR forces KVM to re-scan its queue and deliver the next
+	pending notification.
+
+	Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page
+	ready' APF delivery needs to be written to before enabling APF mechanism
+	in MSR_KVM_ASYNC_PF_EN or interrupt #0 can get injected. The MSR is
+	available if KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
+
+	Note, previously, 'page ready' events were delivered via the same #PF
+	exception as 'page not present' events but this is now deprecated. If
+	bit 3 (interrupt based delivery) is not set APF events are not delivered.
 
 	If APF is disabled while there are outstanding APFs, they will
 	not be delivered.
 
-	Currently type 2 APF will be always delivered on the same vcpu as
-	type 1 was, but guest should not rely on that.
+	Currently 'page ready' APF events will be always delivered on the
+	same vcpu as 'page not present' event was, but guest should not rely on
+	that.
 
 MSR_KVM_STEAL_TIME:
 	0x4b564d03
@@ -319,3 +350,29 @@ data:
 
 	KVM guests can request the host not to poll on HLT, for example if
 	they are performing polling themselves.
+
+MSR_KVM_ASYNC_PF_INT:
+	0x4b564d06
+
+data:
+	Second asynchronous page fault (APF) control MSR.
+
+	Bits 0-7: APIC vector for delivery of 'page ready' APF events.
+	Bits 8-63: Reserved
+
+	Interrupt vector for asynchnonous 'page ready' notifications delivery.
+	The vector has to be set up before asynchronous page fault mechanism
+	is enabled in MSR_KVM_ASYNC_PF_EN.  The MSR is only available if
+	KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
+
+MSR_KVM_ASYNC_PF_ACK:
+	0x4b564d07
+
+data:
+	Asynchronous page fault (APF) acknowledgment.
+
+	When the guest is done processing 'page ready' APF event and 'token'
+	field in 'struct kvm_vcpu_pv_apf_data' is cleared it is supposed to
+	write '1' to bit 0 of the MSR, this causes the host to re-scan its queue
+	and check if there are more notifications pending. The MSR is available
+	if KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
diff --git a/Documentation/virt/kvm/nested-vmx.rst b/Documentation/virt/kvm/nested-vmx.rst
index 592b0ab6970b..89851cbb7df9 100644
--- a/Documentation/virt/kvm/nested-vmx.rst
+++ b/Documentation/virt/kvm/nested-vmx.rst
@@ -116,10 +116,7 @@ struct shadow_vmcs is ever changed.
 		natural_width cr4_guest_host_mask;
 		natural_width cr0_read_shadow;
 		natural_width cr4_read_shadow;
-		natural_width cr3_target_value0;
-		natural_width cr3_target_value1;
-		natural_width cr3_target_value2;
-		natural_width cr3_target_value3;
+		natural_width dead_space[4]; /* Last remnants of cr3_target_value[0-3]. */
 		natural_width exit_qualification;
 		natural_width guest_linear_address;
 		natural_width guest_cr0;
diff --git a/Documentation/virt/kvm/review-checklist.rst b/Documentation/virt/kvm/review-checklist.rst
index 1f86a9d3f705..dc01aea4057b 100644
--- a/Documentation/virt/kvm/review-checklist.rst
+++ b/Documentation/virt/kvm/review-checklist.rst
@@ -10,7 +10,7 @@ Review checklist for kvm patches
 2.  Patches should be against kvm.git master branch.
 
 3.  If the patch introduces or modifies a new userspace API:
-    - the API must be documented in Documentation/virt/kvm/api.txt
+    - the API must be documented in Documentation/virt/kvm/api.rst
     - the API must be discoverable using KVM_CHECK_EXTENSION
 
 4.  New state must include support for save/restore.
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 4e3e9362afeb..561969754bc0 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -161,7 +161,7 @@ device must complete the update before the driver callback returns.
 When the device driver wants to populate a range of virtual addresses, it can
 use::
 
-  long hmm_range_fault(struct hmm_range *range);
+  int hmm_range_fault(struct hmm_range *range);
 
 It will trigger a page fault on missing or read-only entries if write access is
 requested (see below). Page faults use the generic mm page fault code path just
@@ -184,10 +184,7 @@ The usage pattern is::
       range.notifier = &interval_sub;
       range.start = ...;
       range.end = ...;
-      range.pfns = ...;
-      range.flags = ...;
-      range.values = ...;
-      range.pfn_shift = ...;
+      range.hmm_pfns = ...;
 
       if (!mmget_not_zero(interval_sub->notifier.mm))
           return -EFAULT;
@@ -229,15 +226,10 @@ The hmm_range struct has 2 fields, default_flags and pfn_flags_mask, that specif
 fault or snapshot policy for the whole range instead of having to set them
 for each entry in the pfns array.
 
-For instance, if the device flags for range.flags are::
+For instance if the device driver wants pages for a range with at least read
+permission, it sets::
 
-    range.flags[HMM_PFN_VALID] = (1 << 63);
-    range.flags[HMM_PFN_WRITE] = (1 << 62);
-
-and the device driver wants pages for a range with at least read permission,
-it sets::
-
-    range->default_flags = (1 << 63);
+    range->default_flags = HMM_PFN_REQ_FAULT;
     range->pfn_flags_mask = 0;
 
 and calls hmm_range_fault() as described above. This will fill fault all pages
@@ -246,18 +238,18 @@ in the range with at least read permission.
 Now let's say the driver wants to do the same except for one page in the range for
 which it wants to have write permission. Now driver set::
 
-    range->default_flags = (1 << 63);
-    range->pfn_flags_mask = (1 << 62);
-    range->pfns[index_of_write] = (1 << 62);
+    range->default_flags = HMM_PFN_REQ_FAULT;
+    range->pfn_flags_mask = HMM_PFN_REQ_WRITE;
+    range->pfns[index_of_write] = HMM_PFN_REQ_WRITE;
 
 With this, HMM will fault in all pages with at least read (i.e., valid) and for the
 address == range->start + (index_of_write << PAGE_SHIFT) it will fault with
 write permission i.e., if the CPU pte does not have write permission set then HMM
 will call handle_mm_fault().
 
-Note that HMM will populate the pfns array with write permission for any page
-that is mapped with CPU write permission no matter what values are set
-in default_flags or pfn_flags_mask.
+After hmm_range_fault completes the flag bits are set to the current state of
+the page tables, ie HMM_PFN_VALID | HMM_PFN_WRITE will be set if the page is
+writable.
 
 
 Represent and manage device memory from core kernel point of view
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
index e8d943b21cf9..611140ffef7e 100644
--- a/Documentation/vm/index.rst
+++ b/Documentation/vm/index.rst
@@ -31,6 +31,7 @@ descriptions of data structures and algorithms.
    active_mm
    balance
    cleancache
+   free_page_reporting
    frontswap
    highmem
    hmm
diff --git a/Documentation/vm/page_frags.rst b/Documentation/vm/page_frags.rst
index 637cc49d1b2f..7d6f9385d129 100644
--- a/Documentation/vm/page_frags.rst
+++ b/Documentation/vm/page_frags.rst
@@ -26,7 +26,7 @@ to be disabled when executing the fragment allocation.
 
 The network stack uses two separate caches per CPU to handle fragment
 allocation.  The netdev_alloc_cache is used by callers making use of the
-__netdev_alloc_frag and __netdev_alloc_skb calls.  The napi_alloc_cache is
+netdev_alloc_frag and __netdev_alloc_skb calls.  The napi_alloc_cache is
 used by callers of the __napi_alloc_frag and __napi_alloc_skb calls.  The
 main difference between these two calls is the context in which they may be
 called.  The "netdev" prefixed functions are usable in any context as these
diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst
index 933ada4368ff..4eee598555c9 100644
--- a/Documentation/vm/slub.rst
+++ b/Documentation/vm/slub.rst
@@ -49,7 +49,7 @@ Possible debug options are::
 	P		Poisoning (object and padding)
 	U		User tracking (free and alloc)
 	T		Trace (please only use on single slabs)
-	A		Toggle failslab filter mark for the cache
+	A		Enable failslab filter mark for the cache
 	O		Switch debugging off for caches that would have
 			caused higher minimum slab orders
 	-		Switch all debugging off (useful if the kernel is
diff --git a/Documentation/vm/zswap.rst b/Documentation/vm/zswap.rst
index f8c6a79d7c70..d8d9fa4a1f0d 100644
--- a/Documentation/vm/zswap.rst
+++ b/Documentation/vm/zswap.rst
@@ -140,10 +140,10 @@ without any real benefit but with a performance drop for the system), a
 special parameter has been introduced to implement a sort of hysteresis to
 refuse taking pages into zswap pool until it has sufficient space if the limit
 has been hit. To set the threshold at which zswap would start accepting pages
-again after it became full, use the sysfs ``accept_threhsold_percent``
+again after it became full, use the sysfs ``accept_threshold_percent``
 attribute, e. g.::
 
-	echo 80 > /sys/module/zswap/parameters/accept_threhsold_percent
+	echo 80 > /sys/module/zswap/parameters/accept_threshold_percent
 
 Setting this parameter to 100 will disable the hysteresis.
 
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.rst b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
index dd934cc08e40..a1c3f038ce0e 100644
--- a/Documentation/watchdog/convert_drivers_to_kernel_api.rst
+++ b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
@@ -2,7 +2,7 @@
 Converting old watchdog drivers to the watchdog framework
 =========================================================
 
-by Wolfram Sang <w.sang@pengutronix.de>
+by Wolfram Sang <wsa@kernel.org>
 
 Before the watchdog framework came into the kernel, every driver had to
 implement the API on its own. Now, as the framework factored out the common
@@ -115,7 +115,7 @@ Add the watchdog operations
 ---------------------------
 
 All possible callbacks are defined in 'struct watchdog_ops'. You can find it
-explained in 'watchdog-kernel-api.txt' in this directory. start(), stop() and
+explained in 'watchdog-kernel-api.txt' in this directory. start() and
 owner must be set, the rest are optional. You will easily find corresponding
 functions in the old driver. Note that you will now get a pointer to the
 watchdog_device as a parameter to these functions, so you probably have to
diff --git a/Documentation/watchdog/watchdog-kernel-api.rst b/Documentation/watchdog/watchdog-kernel-api.rst
index 864edbe932c1..068a55ee0d4a 100644
--- a/Documentation/watchdog/watchdog-kernel-api.rst
+++ b/Documentation/watchdog/watchdog-kernel-api.rst
@@ -123,8 +123,8 @@ The list of watchdog operations is defined as::
 	struct module *owner;
 	/* mandatory operations */
 	int (*start)(struct watchdog_device *);
-	int (*stop)(struct watchdog_device *);
 	/* optional operations */
+	int (*stop)(struct watchdog_device *);
 	int (*ping)(struct watchdog_device *);
 	unsigned int (*status)(struct watchdog_device *);
 	int (*set_timeout)(struct watchdog_device *, unsigned int);
diff --git a/Documentation/x86/x86_64/uefi.rst b/Documentation/x86/x86_64/uefi.rst
index 88c3ba32546f..3b894103a734 100644
--- a/Documentation/x86/x86_64/uefi.rst
+++ b/Documentation/x86/x86_64/uefi.rst
@@ -36,7 +36,7 @@ Mechanics
 
 	elilo bootloader with x86_64 support, elilo configuration file,
 	kernel image built in first step and corresponding
-	initrd. Instructions on building elilo	and its dependencies
+	initrd. Instructions on building elilo and its dependencies
 	can be found in the elilo sourceforge project.
 
 - Boot to EFI shell and invoke elilo choosing the kernel image built
diff --git a/MAINTAINERS b/MAINTAINERS
index 66d1a3f10102..e1897ed32930 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -842,6 +842,13 @@ S:	Supported
 T:	git git://people.freedesktop.org/~agd5f/linux
 F:	drivers/gpu/drm/amd/display/
 
+AMD ENERGY DRIVER
+M:	Naveen Krishna Chatradhi <nchatrad@amd.com>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	Documentation/hwmon/amd_energy.rst
+F:	drivers/hwmon/amd_energy.c
+
 AMD FAM15H PROCESSOR POWER MONITORING DRIVER
 M:	Huang Rui <ray.huang@amd.com>
 L:	linux-hwmon@vger.kernel.org
@@ -892,6 +899,11 @@ F:	drivers/gpu/drm/amd/include/v9_structs.h
 F:	drivers/gpu/drm/amd/include/vi_structs.h
 F:	include/uapi/linux/kfd_ioctl.h
 
+AMD SPI DRIVER
+M:	Sanjay R Mehta <sanju.mehta@amd.com>
+S:	Maintained
+F:	drivers/spi/spi-amd.c
+
 AMD MP2 I2C DRIVER
 M:	Elie Morisse <syniurge@gmail.com>
 M:	Nehal Shah <nehal-bakulchandra.shah@amd.com>
@@ -2225,6 +2237,7 @@ F:	drivers/*/qcom*
 F:	drivers/*/qcom/
 F:	drivers/bluetooth/btqcomsmd.c
 F:	drivers/clocksource/timer-qcom.c
+F:	drivers/cpuidle/cpuidle-qcom-spm.c
 F:	drivers/extcon/extcon-qcom*
 F:	drivers/i2c/busses/i2c-qcom-geni.c
 F:	drivers/i2c/busses/i2c-qup.c
@@ -3730,7 +3743,7 @@ CACHEFILES: FS-CACHE BACKEND FOR CACHING ON MOUNTED FILESYSTEMS
 M:	David Howells <dhowells@redhat.com>
 L:	linux-cachefs@redhat.com (moderated for non-subscribers)
 S:	Supported
-F:	Documentation/filesystems/caching/cachefiles.txt
+F:	Documentation/filesystems/caching/cachefiles.rst
 F:	fs/cachefiles/
 
 CADENCE MIPI-CSI2 BRIDGES
@@ -3896,6 +3909,15 @@ S:	Supported
 W:	https://developer.arm.com/products/system-ip/trustzone-cryptocell/cryptocell-700-family
 F:	drivers/crypto/ccree/
 
+CCTRNG ARM TRUSTZONE CRYPTOCELL TRUE RANDOM NUMBER GENERATOR (TRNG) DRIVER
+M:	Hadar Gat <hadar.gat@arm.com>
+L:	linux-crypto@vger.kernel.org
+S:	Supported
+F:	drivers/char/hw_random/cctrng.c
+F:	drivers/char/hw_random/cctrng.h
+F:	Documentation/devicetree/bindings/rng/arm-cctrng.txt
+W:	https://developer.arm.com/products/system-ip/trustzone-cryptocell/cryptocell-700-family
+
 CEC FRAMEWORK
 M:	Hans Verkuil <hverkuil-cisco@xs4all.nl>
 L:	linux-media@vger.kernel.org
@@ -4198,7 +4220,7 @@ M:	coda@cs.cmu.edu
 L:	codalist@coda.cs.cmu.edu
 S:	Maintained
 W:	http://www.coda.cs.cmu.edu/
-F:	Documentation/filesystems/coda.txt
+F:	Documentation/filesystems/coda.rst
 F:	fs/coda/
 F:	include/linux/coda*.h
 F:	include/uapi/linux/coda*.h
@@ -4991,7 +5013,7 @@ M:	Jan Kara <jack@suse.cz>
 R:	Amir Goldstein <amir73il@gmail.com>
 L:	linux-fsdevel@vger.kernel.org
 S:	Maintained
-F:	Documentation/filesystems/dnotify.txt
+F:	Documentation/filesystems/dnotify.rst
 F:	fs/notify/dnotify/
 F:	include/linux/dnotify.h
 
@@ -5005,7 +5027,7 @@ W:	http://www.win.tue.nl/~aeb/partitions/partition_types-1.html
 DISKQUOTA
 M:	Jan Kara <jack@suse.com>
 S:	Maintained
-F:	Documentation/filesystems/quota.txt
+F:	Documentation/filesystems/quota.rst
 F:	fs/quota/
 F:	include/linux/quota*.h
 F:	include/uapi/linux/quota*.h
@@ -5040,7 +5062,7 @@ F:	drivers/dma-buf/
 F:	include/linux/*fence.h
 F:	include/linux/dma-buf*
 F:	include/linux/dma-resv.h
-K:	dma_(buf|fence|resv)
+K:	\bdma_(?:buf|fence|resv)\b
 
 DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
 M:	Vinod Koul <vkoul@kernel.org>
@@ -5252,7 +5274,7 @@ DRM DRIVER FOR ARM VERSATILE TFT PANELS
 M:	Linus Walleij <linus.walleij@linaro.org>
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
-F:	Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt
+F:	Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.yaml
 F:	drivers/gpu/drm/panel/panel-arm-versatile.c
 
 DRM DRIVER FOR ASPEED BMC GFX
@@ -5278,7 +5300,7 @@ F:	drivers/gpu/drm/bochs/
 DRM DRIVER FOR BOE HIMAX8279D PANELS
 M:	Jerry Han <hanxu5@huaqin.corp-partner.google.com>
 S:	Maintained
-F:	Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt
+F:	Documentation/devicetree/bindings/display/panel/boe,himax8279d.yaml
 F:	drivers/gpu/drm/panel/panel-boe-himax8279d.c
 
 DRM DRIVER FOR FARADAY TVE200 TV ENCODER
@@ -5296,7 +5318,7 @@ F:	drivers/gpu/drm/panel/panel-feixin-k101-im2ba02.c
 DRM DRIVER FOR FEIYANG FY07024DI26A30-D MIPI-DSI LCD PANELS
 M:	Jagan Teki <jagan@amarulasolutions.com>
 S:	Maintained
-F:	Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.txt
+F:	Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml
 F:	drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
 
 DRM DRIVER FOR GRAIN MEDIA GM12U320 PROJECTORS
@@ -5331,6 +5353,14 @@ S:	Orphan / Obsolete
 F:	drivers/gpu/drm/i810/
 F:	include/uapi/drm/i810_drm.h
 
+DRM DRIVER FOR LVDS PANELS
+M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+L:	dri-devel@lists.freedesktop.org
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Maintained
+F:	drivers/gpu/drm/panel/panel-lvds.c
+F:	Documentation/devicetree/bindings/display/panel/lvds.yaml
+
 DRM DRIVER FOR MATROX G200/G400 GRAPHICS CARDS
 S:	Orphan / Obsolete
 F:	drivers/gpu/drm/mga/
@@ -5379,7 +5409,7 @@ F:	include/uapi/drm/nouveau_drm.h
 DRM DRIVER FOR OLIMEX LCD-OLINUXINO PANELS
 M:	Stefan Mavrodiev <stefan@olimex.com>
 S:	Maintained
-F:	Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.txt
+F:	Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml
 F:	drivers/gpu/drm/panel/panel-olimex-lcd-olinuxino.c
 
 DRM DRIVER FOR PERVASIVE DISPLAYS REPAPER PANELS
@@ -5396,7 +5426,7 @@ L:	virtualization@lists.linux-foundation.org
 S:	Obsolete
 W:	https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
 T:	git git://anongit.freedesktop.org/drm/drm-misc
-F:	drivers/gpu/drm/cirrus/
+F:	drivers/gpu/drm/tiny/cirrus.c
 
 DRM DRIVER FOR QXL VIRTUAL GPU
 M:	Dave Airlie <airlied@redhat.com>
@@ -5446,7 +5476,7 @@ F:	drivers/gpu/drm/tiny/st7586.c
 DRM DRIVER FOR SITRONIX ST7701 PANELS
 M:	Jagan Teki <jagan@amarulasolutions.com>
 S:	Maintained
-F:	Documentation/devicetree/bindings/display/panel/sitronix,st7701.txt
+F:	Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml
 F:	drivers/gpu/drm/panel/panel-sitronix-st7701.c
 
 DRM DRIVER FOR SITRONIX ST7735R PANELS
@@ -6172,7 +6202,6 @@ M:	Yash Shah <yash.shah@sifive.com>
 L:	linux-edac@vger.kernel.org
 S:	Supported
 F:	drivers/edac/sifive_edac.c
-F:	drivers/soc/sifive_l2_cache.c
 
 EDAC-SKYLAKE
 M:	Tony Luck <tony.luck@intel.com>
@@ -6713,6 +6742,13 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/crypto/fsl-sec4.txt
 F:	drivers/crypto/caam/
 
+FREESCALE COLDFIRE M5441X MMC DRIVER
+M:	Angelo Dureghello <angelo.dureghello@timesys.com>
+L:	linux-mmc@vger.kernel.org
+S:	Maintained
+F:	drivers/mmc/host/sdhci-esdhc-mcf.c
+F:	include/linux/platform_data/mmc-esdhc-mcf.h
+
 FREESCALE DIU FRAMEBUFFER DRIVER
 M:	Timur Tabi <timur@kernel.org>
 L:	linux-fbdev@vger.kernel.org
@@ -7020,13 +7056,24 @@ R:	Darren Hart <dvhart@infradead.org>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
-F:	Documentation/*futex*
+F:	Documentation/locking/*futex*
 F:	include/asm-generic/futex.h
 F:	include/linux/futex.h
 F:	include/uapi/linux/futex.h
 F:	kernel/futex.c
 F:	tools/perf/bench/futex*
-F:	tools/testing/selftests/futex/
+F:	Documentation/locking/*futex*
+
+GATEWORKS SYSTEM CONTROLLER (GSC) DRIVER
+M:	Tim Harvey <tharvey@gateworks.com>
+M:	Robert Jones <rjones@gateworks.com>
+S:	Maintained
+F:	Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml
+F:	drivers/mfd/gateworks-gsc.c
+F:	include/linux/mfd/gsc.h
+F:	Documentation/hwmon/gsc-hwmon.rst
+F:	drivers/hwmon/gsc-hwmon.c
+F:	include/linux/platform_data/gsc_hwmon.h
 
 GASKET DRIVER FRAMEWORK
 M:	Rob Springer <rspringer@google.com>
@@ -7496,7 +7543,7 @@ L:	linux-remoteproc@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git hwspinlock-next
 F:	Documentation/devicetree/bindings/hwlock/
-F:	Documentation/hwspinlock.txt
+F:	Documentation/locking/hwspinlock.rst
 F:	drivers/hwspinlock/
 F:	include/linux/hwspinlock.h
 
@@ -7729,7 +7776,9 @@ L:	linux-mm@kvack.org
 S:	Maintained
 F:	Documentation/vm/hmm.rst
 F:	include/linux/hmm*
+F:	lib/test_hmm*
 F:	mm/hmm*
+F:	tools/testing/selftests/vm/*hmm*
 
 HOST AP DRIVER
 M:	Jouni Malinen <j@w1.fi>
@@ -7875,6 +7924,7 @@ F:	drivers/pci/controller/pci-hyperv.c
 F:	drivers/scsi/storvsc_drv.c
 F:	drivers/uio/uio_hv_generic.c
 F:	drivers/video/fbdev/hyperv_fb.c
+F:	include/asm-generic/hyperv-tlfs.h
 F:	include/asm-generic/mshyperv.h
 F:	include/clocksource/hyperv_timer.h
 F:	include/linux/hyperv.h
@@ -8491,6 +8541,13 @@ L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	drivers/platform/x86/intel_atomisp2_pm.c
 
+INTEL BROXTON PMC DRIVER
+M:	Mika Westerberg <mika.westerberg@linux.intel.com>
+M:	Zha Qipeng <qipeng.zha@intel.com>
+S:	Maintained
+F:	drivers/mfd/intel_pmc_bxt.c
+F:	include/linux/mfd/intel_pmc_bxt.h
+
 INTEL C600 SERIES SAS CONTROLLER DRIVER
 M:	Intel SCU Linux support <intel-linux-scu@intel.com>
 M:	Artur Paszkiewicz <artur.paszkiewicz@intel.com>
@@ -8698,6 +8755,13 @@ F:	include/uapi/linux/mic_common.h
 F:	include/uapi/linux/mic_ioctl.h
 F:	include/uapi/linux/scif_ioctl.h
 
+INTEL P-Unit IPC DRIVER
+M:	Zha Qipeng <qipeng.zha@intel.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Maintained
+F:	arch/x86/include/asm/intel_punit_ipc.h
+F:	drivers/platform/x86/intel_punit_ipc.c
+
 INTEL PMC CORE DRIVER
 M:	Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
 M:	Vishwanath Somayaji <vishwanath.somayaji@intel.com>
@@ -8705,15 +8769,6 @@ L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	drivers/platform/x86/intel_pmc_core*
 
-INTEL PMC/P-Unit IPC DRIVER
-M:	Zha Qipeng<qipeng.zha@intel.com>
-L:	platform-driver-x86@vger.kernel.org
-S:	Maintained
-F:	arch/x86/include/asm/intel_pmc_ipc.h
-F:	arch/x86/include/asm/intel_punit_ipc.h
-F:	drivers/platform/x86/intel_pmc_ipc.c
-F:	drivers/platform/x86/intel_punit_ipc.c
-
 INTEL PMIC GPIO DRIVERS
 M:	Andy Shevchenko <andy@kernel.org>
 S:	Maintained
@@ -8752,6 +8807,12 @@ S:	Supported
 F:	drivers/infiniband/hw/i40iw/
 F:	include/uapi/rdma/i40iw-abi.h
 
+INTEL SCU DRIVERS
+M:	Mika Westerberg <mika.westerberg@linux.intel.com>
+S:	Maintained
+F:	arch/x86/include/asm/intel_scu_ipc.h
+F:	drivers/platform/x86/intel_scu_*
+
 INTEL SPEED SELECT TECHNOLOGY
 M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 L:	platform-driver-x86@vger.kernel.org
@@ -8818,6 +8879,13 @@ F:	Documentation/admin-guide/wimax/i2400m.rst
 F:	drivers/net/wimax/i2400m/
 F:	include/uapi/linux/wimax/i2400m.h
 
+INTEL WMI SLIM BOOTLOADER (SBL) FIRMWARE UPDATE DRIVER
+M:	Jithu Joseph <jithu.joseph@intel.com>
+R:	Maurice Ma <maurice.ma@intel.com>
+S:	Maintained
+W:	https://slimbootloader.github.io/security/firmware-update.html
+F:	drivers/platform/x86/intel-wmi-sbl-fw-update.c
+
 INTEL WMI THUNDERBOLT FORCE POWER DRIVER
 M:	Mario Limonciello <mario.limonciello@dell.com>
 S:	Maintained
@@ -8903,7 +8971,7 @@ M:	Corey Minyard <minyard@acm.org>
 L:	openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
 S:	Supported
 W:	http://openipmi.sourceforge.net/
-F:	Documentation/IPMI.txt
+F:	Documentation/driver-api/ipmi.rst
 F:	Documentation/devicetree/bindings/ipmi/
 F:	drivers/char/ipmi/
 F:	include/linux/ipmi*
@@ -8945,7 +9013,7 @@ IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
 M:	Marc Zyngier <maz@kernel.org>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
-F:	Documentation/IRQ-domain.txt
+F:	Documentation/core-api/irq/irq-domain.rst
 F:	include/linux/irqdomain.h
 F:	kernel/irq/irqdomain.c
 F:	kernel/irq/msi.c
@@ -9300,7 +9368,6 @@ F:	arch/arm64/include/asm/kvm*
 F:	arch/arm64/include/uapi/asm/kvm*
 F:	arch/arm64/kvm/
 F:	include/kvm/arm_*
-F:	virt/kvm/arm/
 
 KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
 L:	linux-mips@vger.kernel.org
@@ -9725,6 +9792,13 @@ F:	drivers/lightnvm/
 F:	include/linux/lightnvm.h
 F:	include/uapi/linux/lightnvm.h
 
+LINEAR RANGES HELPERS
+M:	Mark Brown <broonie@kernel.org>
+R:	Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
+F:	lib/linear_ranges.c
+F:	lib/test_linear_ranges.c
+F:	include/linux/linear_range.h
+
 LINUX FOR POWER MACINTOSH
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
 L:	linuxppc-dev@lists.ozlabs.org
@@ -12767,7 +12841,7 @@ M:	"James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
 M:	Helge Deller <deller@gmx.de>
 L:	linux-parisc@vger.kernel.org
 S:	Maintained
-W:	http://www.parisc-linux.org/
+W:	https://parisc.wiki.kernel.org
 Q:	http://patchwork.kernel.org/project/linux-parisc/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/parisc-2.6.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git
@@ -13676,6 +13750,7 @@ M:	Tony Luck <tony.luck@intel.com>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/pstore
 F:	Documentation/admin-guide/ramoops.rst
+F:	Documentation/admin-guide/pstore-blk.rst
 F:	Documentation/devicetree/bindings/reserved-memory/ramoops.txt
 F:	drivers/acpi/apei/erst.c
 F:	drivers/firmware/efi/efi-pstore.c
@@ -14074,7 +14149,6 @@ F:	drivers/net/wireless/quantenna
 RADEON and AMDGPU DRM DRIVERS
 M:	Alex Deucher <alexander.deucher@amd.com>
 M:	Christian König <christian.koenig@amd.com>
-M:	David (ChunMing) Zhou <David1.Zhou@amd.com>
 L:	amd-gfx@lists.freedesktop.org
 S:	Supported
 T:	git git://people.freedesktop.org/~agd5f/linux
@@ -14230,7 +14304,7 @@ M:	Reinette Chatre <reinette.chatre@intel.com>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
 F:	Documentation/x86/resctrl*
-F:	arch/x86/include/asm/resctrl_sched.h
+F:	arch/x86/include/asm/resctrl.h
 F:	arch/x86/kernel/cpu/resctrl/
 F:	tools/testing/selftests/resctrl/
 
@@ -14637,6 +14711,7 @@ S:	Supported
 W:	http://www.ibm.com/developerworks/linux/linux390/
 F:	block/partitions/ibm.c
 F:	drivers/s390/block/dasd*
+F:	include/linux/dasd_mod.h
 
 S390 IOMMU (PCI)
 M:	Gerald Schaefer <gerald.schaefer@de.ibm.com>
@@ -15479,6 +15554,15 @@ M:	Nicolas Pitre <nico@fluxnic.net>
 S:	Odd Fixes
 F:	drivers/net/ethernet/smsc/smc91x.*
 
+SECURE MONITOR CALL(SMC) CALLING CONVENTION (SMCCC)
+M:	Mark Rutland <mark.rutland@arm.com>
+M:	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+M:	Sudeep Holla <sudeep.holla@arm.com>
+L:	linux-arm-kernel@lists.infradead.org
+S:	Maintained
+F:	drivers/firmware/smccc/
+F:	include/linux/arm-smccc.h
+
 SMIA AND SMIA++ IMAGE SENSOR DRIVER
 M:	Sakari Ailus <sakari.ailus@linux.intel.com>
 L:	linux-media@vger.kernel.org
@@ -15655,7 +15739,7 @@ F:	drivers/ssb/
 F:	include/linux/ssb/
 
 SONY IMX214 SENSOR DRIVER
-M:	Ricardo Ribalda <ricardo.ribalda@gmail.com>
+M:	Ricardo Ribalda <ribalda@kernel.org>
 L:	linux-media@vger.kernel.org
 S:	Maintained
 T:	git git://linuxtv.org/media_tree.git
@@ -15895,7 +15979,7 @@ M:	Jeremy Kerr <jk@ozlabs.org>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Supported
 W:	http://www.ibm.com/developerworks/power/cell/
-F:	Documentation/filesystems/spufs.txt
+F:	Documentation/filesystems/spufs/spufs.rst
 F:	arch/powerpc/platforms/cell/spufs/
 
 SQUASHFS FILE SYSTEM
@@ -16642,7 +16726,7 @@ S:	Maintained
 F:	sound/soc/ti/
 
 TEXAS INSTRUMENTS' DAC7612 DAC DRIVER
-M:	Ricardo Ribalda <ricardo@ribalda.com>
+M:	Ricardo Ribalda <ribalda@kernel.org>
 L:	linux-iio@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/iio/dac/ti,dac7612.txt
@@ -18550,8 +18634,8 @@ W:	http://xfs.org/
 T:	git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
 F:	Documentation/ABI/testing/sysfs-fs-xfs
 F:	Documentation/admin-guide/xfs.rst
-F:	Documentation/filesystems/xfs-delayed-logging-design.txt
-F:	Documentation/filesystems/xfs-self-describing-metadata.txt
+F:	Documentation/filesystems/xfs-delayed-logging-design.rst
+F:	Documentation/filesystems/xfs-self-describing-metadata.rst
 F:	fs/xfs/
 F:	include/uapi/linux/dqblk_xfs.h
 F:	include/uapi/linux/fsmap.h
diff --git a/Makefile b/Makefile
index f0d118b86287..a7bc91cbac8f 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*
@@ -862,6 +862,12 @@ ifdef CONFIG_LIVEPATCH
 KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
 endif
 
+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS	:= -fsanitize=shadow-call-stack
+KBUILD_CFLAGS	+= $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
 # arch Makefile may override CC so keep this after arch Makefile is included
 NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
 
diff --git a/arch/Kconfig b/arch/Kconfig
index 786a85d4ad40..2e6f843d87c4 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -533,6 +533,31 @@ config STACKPROTECTOR_STRONG
 	  about 20% of all kernel functions, which increases the kernel code
 	  size by about 2%.
 
+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+	bool
+	help
+	  An architecture should select this if it supports Clang's Shadow
+	  Call Stack and implements runtime support for shadow stack
+	  switching.
+
+config SHADOW_CALL_STACK
+	bool "Clang Shadow Call Stack"
+	depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
+	depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
+	help
+	  This option enables Clang's Shadow Call Stack, which uses a
+	  shadow stack to protect function return addresses from being
+	  overwritten by an attacker. More information can be found in
+	  Clang's documentation:
+
+	    https://clang.llvm.org/docs/ShadowCallStack.html
+
+	  Note that security guarantees in the kernel differ from the
+	  ones documented for user space. The kernel must store addresses
+	  of shadow stacks in memory, which means an attacker capable of
+	  reading and writing arbitrary memory may be able to locate them
+	  and hijack control flow by modifying the stacks.
+
 config HAVE_ARCH_WITHIN_STACK_FRAMES
 	bool
 	help
diff --git a/arch/alpha/include/asm/checksum.h b/arch/alpha/include/asm/checksum.h
index 473e6ccb65a3..0eac81624d01 100644
--- a/arch/alpha/include/asm/checksum.h
+++ b/arch/alpha/include/asm/checksum.h
@@ -41,7 +41,8 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum);
  * here even more important to align src and dst on a 32-bit (or even
  * better 64-bit) boundary
  */
-__wsum csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *errp);
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *errp);
 
 __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
 
diff --git a/arch/alpha/include/asm/floppy.h b/arch/alpha/include/asm/floppy.h
index 942924756cf2..8dfdb3aa1d96 100644
--- a/arch/alpha/include/asm/floppy.h
+++ b/arch/alpha/include/asm/floppy.h
@@ -11,8 +11,8 @@
 #define __ASM_ALPHA_FLOPPY_H
 
 
-#define fd_inb(port)			inb_p(port)
-#define fd_outb(value,port)		outb_p(value,port)
+#define fd_inb(base, reg)		inb_p((base) + (reg))
+#define fd_outb(value, base, reg)	outb_p(value, (base) + (reg))
 
 #define fd_enable_dma()         enable_dma(FLOPPY_DMA)
 #define fd_disable_dma()        disable_dma(FLOPPY_DMA)
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 36d42da7466a..5ddd128d4b7a 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -477,3 +477,4 @@
 # 545 reserved for clone3
 547	common	openat2				sys_openat2
 548	common	pidfd_getfd			sys_pidfd_getfd
+549	common	faccessat2			sys_faccessat2
diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c
index e53f96e8aa6d..af1dad74e933 100644
--- a/arch/alpha/lib/csum_partial_copy.c
+++ b/arch/alpha/lib/csum_partial_copy.c
@@ -325,7 +325,7 @@ csum_partial_cfu_unaligned(const unsigned long __user * src,
 }
 
 __wsum
-csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+csum_and_copy_from_user(const void __user *src, void *dst, int len,
 			       __wsum sum, int *errp)
 {
 	unsigned long checksum = (__force u32) sum;
@@ -369,7 +369,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len,
 	}
 	return (__force __wsum)checksum;
 }
-EXPORT_SYMBOL(csum_partial_copy_from_user);
+EXPORT_SYMBOL(csum_and_copy_from_user);
 
 __wsum
 csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
@@ -377,7 +377,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
 	__wsum checksum;
 	mm_segment_t oldfs = get_fs();
 	set_fs(KERNEL_DS);
-	checksum = csum_partial_copy_from_user((__force const void __user *)src,
+	checksum = csum_and_copy_from_user((__force const void __user *)src,
 						dst, len, sum, NULL);
 	set_fs(oldfs);
 	return checksum;
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c77c93c485a0..16fbf74030fe 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -313,6 +313,9 @@ choice
 config ARCH_MULTIPLATFORM
 	bool "Allow multiple platforms to be selected"
 	depends on MMU
+	select ARCH_FLATMEM_ENABLE
+	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_SELECT_MEMORY_MODEL
 	select ARM_HAS_SG_CHAIN
 	select ARM_PATCH_PHYS_VIRT
 	select AUTO_ZRELADDR
@@ -1516,11 +1519,15 @@ config OABI_COMPAT
 config ARCH_HAS_HOLES_MEMORYMODEL
 	bool
 
-config ARCH_SPARSEMEM_ENABLE
+config ARCH_SELECT_MEMORY_MODEL
+	bool
+
+config ARCH_FLATMEM_ENABLE
 	bool
 
-config ARCH_SPARSEMEM_DEFAULT
-	def_bool ARCH_SPARSEMEM_ENABLE
+config ARCH_SPARSEMEM_ENABLE
+	bool
+	select SPARSEMEM_STATIC if SPARSEMEM
 
 config HAVE_ARCH_PFN_VALID
 	def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
@@ -1955,7 +1962,7 @@ config EFI
 	select UCS2_STRING
 	select EFI_PARAMS_FROM_FDT
 	select EFI_STUB
-	select EFI_ARMSTUB
+	select EFI_GENERIC_STUB
 	select EFI_RUNTIME_WRAPPERS
 	---help---
 	  This option provides support for runtime services provided
diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index db05c6ef3e31..60606b0f378d 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -7,12 +7,3 @@ hyp-stub.S
 piggy_data
 vmlinux
 vmlinux.lds
-
-# borrowed libfdt files
-fdt.c
-fdt.h
-fdt_ro.c
-fdt_rw.c
-fdt_wip.c
-libfdt.h
-libfdt_internal.h
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 9c11e7490292..00602a6fba04 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -76,29 +76,30 @@ compress-$(CONFIG_KERNEL_LZMA) = lzma
 compress-$(CONFIG_KERNEL_XZ)   = xzkern
 compress-$(CONFIG_KERNEL_LZ4)  = lz4
 
-# Borrowed libfdt files for the ATAG compatibility mode
-
-libfdt		:= fdt_rw.c fdt_ro.c fdt_wip.c fdt.c
-libfdt_hdrs	:= fdt.h libfdt.h libfdt_internal.h
-
-libfdt_objs	:= $(addsuffix .o, $(basename $(libfdt)))
-
-$(addprefix $(obj)/,$(libfdt) $(libfdt_hdrs)): $(obj)/%: $(srctree)/scripts/dtc/libfdt/%
-	$(call cmd,shipped)
-
-$(addprefix $(obj)/,$(libfdt_objs) atags_to_fdt.o): \
-	$(addprefix $(obj)/,$(libfdt_hdrs))
+libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o
 
 ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y)
 OBJS	+= $(libfdt_objs) atags_to_fdt.o
 endif
 
+# -fstack-protector-strong triggers protection checks in this code,
+# but it is being used too early to link to meaningful stack_chk logic.
+nossp-flags-$(CONFIG_CC_HAS_STACKPROTECTOR_NONE) := -fno-stack-protector
+$(foreach o, $(libfdt_objs) atags_to_fdt.o, \
+	$(eval CFLAGS_$(o) := -I $(srctree)/scripts/dtc/libfdt $(nossp-flags-y)))
+
+# These were previously generated C files. When you are building the kernel
+# with O=, make sure to remove the stale files in the output tree. Otherwise,
+# the build system wrongly compiles the stale ones.
+ifdef building_out_of_srctree
+$(shell rm -f $(addprefix $(obj)/, fdt_rw.c fdt_ro.c fdt_wip.c fdt.c))
+endif
+
 targets       := vmlinux vmlinux.lds piggy_data piggy.o \
 		 lib1funcs.o ashldi3.o bswapsdi2.o \
 		 head.o $(OBJS)
 
-clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S \
-		$(libfdt) $(libfdt_hdrs) hyp-stub.S
+clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S hyp-stub.S
 
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 
@@ -107,15 +108,6 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
-# -fstack-protector-strong triggers protection checks in this code,
-# but it is being used too early to link to meaningful stack_chk logic.
-nossp-flags-$(CONFIG_CC_HAS_STACKPROTECTOR_NONE) := -fno-stack-protector
-CFLAGS_atags_to_fdt.o := $(nossp-flags-y)
-CFLAGS_fdt.o := $(nossp-flags-y)
-CFLAGS_fdt_ro.o := $(nossp-flags-y)
-CFLAGS_fdt_rw.o := $(nossp-flags-y)
-CFLAGS_fdt_wip.o := $(nossp-flags-y)
-
 ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin \
 	     -I$(obj) $(DISABLE_ARM_SSP_PER_TASK_PLUGIN)
 asflags-y := -DZIMAGE
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
index 64c49747f8a3..8452753efebe 100644
--- a/arch/arm/boot/compressed/atags_to_fdt.c
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/libfdt_env.h>
 #include <asm/setup.h>
 #include <libfdt.h>
 
diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
index 62286da318e7..c0e7a745103e 100644
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S
@@ -60,7 +60,7 @@ optional_header:
 		.long	__pecoff_code_size		@ SizeOfCode
 		.long	__pecoff_data_size		@ SizeOfInitializedData
 		.long	0				@ SizeOfUninitializedData
-		.long	efi_entry - start		@ AddressOfEntryPoint
+		.long	efi_pe_entry - start		@ AddressOfEntryPoint
 		.long	start_offset			@ BaseOfCode
 		.long	__pecoff_data_start - start	@ BaseOfData
 
diff --git a/arch/arm/boot/compressed/fdt.c b/arch/arm/boot/compressed/fdt.c
new file mode 100644
index 000000000000..f8ea7a201ab1
--- /dev/null
+++ b/arch/arm/boot/compressed/fdt.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../../../lib/fdt.c"
diff --git a/arch/arm/boot/compressed/fdt_ro.c b/arch/arm/boot/compressed/fdt_ro.c
new file mode 100644
index 000000000000..93970a4ad5ae
--- /dev/null
+++ b/arch/arm/boot/compressed/fdt_ro.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../../../lib/fdt_ro.c"
diff --git a/arch/arm/boot/compressed/fdt_rw.c b/arch/arm/boot/compressed/fdt_rw.c
new file mode 100644
index 000000000000..f7c6b8b7e01c
--- /dev/null
+++ b/arch/arm/boot/compressed/fdt_rw.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../../../lib/fdt_rw.c"
diff --git a/arch/arm/boot/compressed/fdt_wip.c b/arch/arm/boot/compressed/fdt_wip.c
new file mode 100644
index 000000000000..048d2c7a088d
--- /dev/null
+++ b/arch/arm/boot/compressed/fdt_wip.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../../../lib/fdt_wip.c"
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index e8e1c866e413..c79db44ba128 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -287,28 +287,22 @@ not_angel:
 		 */
 		mov	r0, pc
 		cmp	r0, r4
-		ldrcc	r0, LC0+28
+		ldrcc	r0, .Lheadroom
 		addcc	r0, r0, pc
 		cmpcc	r4, r0
 		orrcc	r4, r4, #1		@ remember we skipped cache_on
 		blcs	cache_on
 
-restart:	adr	r0, LC0
-		ldmia	r0, {r1, r2, r3, r6, r11, r12}
-		ldr	sp, [r0, #24]
-
-		/*
-		 * We might be running at a different address.  We need
-		 * to fix up various pointers.
-		 */
-		sub	r0, r0, r1		@ calculate the delta offset
-		add	r6, r6, r0		@ _edata
+restart:	adr	r0, LC1
+		ldr	sp, [r0]
+		ldr	r6, [r0, #4]
+		add	sp, sp, r0
+		add	r6, r6, r0
 
 		get_inflated_image_size	r9, r10, lr
 
 #ifndef CONFIG_ZBOOT_ROM
 		/* malloc space is above the relocated stack (64k max) */
-		add	sp, sp, r0
 		add	r10, sp, #0x10000
 #else
 		/*
@@ -322,9 +316,6 @@ restart:	adr	r0, LC0
 		mov	r5, #0			@ init dtb size to 0
 #ifdef CONFIG_ARM_APPENDED_DTB
 /*
- *   r0  = delta
- *   r2  = BSS start
- *   r3  = BSS end
  *   r4  = final kernel address (possibly with LSB set)
  *   r5  = appended dtb size (still unknown)
  *   r6  = _edata
@@ -332,8 +323,6 @@ restart:	adr	r0, LC0
  *   r8  = atags/device tree pointer
  *   r9  = size of decompressed image
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
- *   r11 = GOT start
- *   r12 = GOT end
  *   sp  = stack pointer
  *
  * if there are device trees (dtb) appended to zImage, advance r10 so that the
@@ -381,7 +370,6 @@ restart:	adr	r0, LC0
 		/* temporarily relocate the stack past the DTB work space */
 		add	sp, sp, r5
 
-		stmfd	sp!, {r0-r3, ip, lr}
 		mov	r0, r8
 		mov	r1, r6
 		mov	r2, r5
@@ -400,7 +388,6 @@ restart:	adr	r0, LC0
 		mov	r2, r5
 		bleq	atags_to_fdt
 
-		ldmfd	sp!, {r0-r3, ip, lr}
 		sub	sp, sp, r5
 #endif
 
@@ -537,6 +524,10 @@ dtb_check_done:
 		mov	pc, r0
 
 wont_overwrite:
+		adr	r0, LC0
+		ldmia	r0, {r1, r2, r3, r11, r12}
+		sub	r0, r0, r1		@ calculate the delta offset
+
 /*
  * If delta is zero, we are running at the address we were linked at.
  *   r0  = delta
@@ -660,13 +651,18 @@ not_relocated:	mov	r0, #0
 LC0:		.word	LC0			@ r1
 		.word	__bss_start		@ r2
 		.word	_end			@ r3
-		.word	_edata			@ r6
 		.word	_got_start		@ r11
 		.word	_got_end		@ ip
-		.word	.L_user_stack_end	@ sp
-		.word	_end - restart + 16384 + 1024*1024
 		.size	LC0, . - LC0
 
+		.type	LC1, #object
+LC1:		.word	.L_user_stack_end - LC1	@ sp
+		.word	_edata - LC1		@ r6
+		.size	LC1, . - LC1
+
+.Lheadroom:
+		.word	_end - restart + 16384 + 1024*1024
+
 .Linflated_image_size_offset:
 		.long	(input_data_end - 4) - .
 
@@ -1434,38 +1430,26 @@ reloc_code_end:
 
 #ifdef CONFIG_EFI_STUB
 ENTRY(efi_enter_kernel)
-		mov	r7, r0				@ preserve image base
-		mov	r4, r1				@ preserve DT pointer
+		mov	r4, r0			@ preserve image base
+		mov	r8, r1			@ preserve DT pointer
 
-		mov	r0, r4				@ DT start
-		add	r1, r4, r2			@ DT end
-		bl	cache_clean_flush
+		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
+		tst	r0, #0x1		@ MMU enabled?
+		orreq	r4, r4, #1		@ set LSB if not
 
-		mov	r0, r7				@ relocated zImage
-		ldr	r1, =_edata			@ size of zImage
-		add	r1, r1, r0			@ end of zImage
+		mov	r0, r8			@ DT start
+		add	r1, r8, r2		@ DT end
 		bl	cache_clean_flush
 
-		@ The PE/COFF loader might not have cleaned the code we are
-		@ running beyond the PoU, and so calling cache_off below from
-		@ inside the PE/COFF loader allocated region is unsafe unless
-		@ we explicitly clean it to the PoC.
- ARM(		adrl	r0, call_cache_fn	)
- THUMB(		adr	r0, call_cache_fn	)	@ region of code we will
-		adr	r1, 0f				@ run with MMU off
-		bl	cache_clean_flush
-		bl	cache_off
+		adr	r0, 0f			@ switch to our stack
+		ldr	sp, [r0]
+		add	sp, sp, r0
 
-		@ Set parameters for booting zImage according to boot protocol
-		@ put FDT address in r2, it was returned by efi_entry()
-		@ r1 is the machine type, and r0 needs to be 0
-		mov	r0, #0
-		mov	r1, #0xFFFFFFFF
-		mov	r2, r4
-		add	r7, r7, #(__efi_start - start)
-		mov	pc, r7				@ no mode switch
+		mov	r5, #0			@ appended DTB size
+		mov	r7, #0xFFFFFFFF		@ machine ID
+		b	wont_overwrite
 ENDPROC(efi_enter_kernel)
-0:
+0:		.long	.L_user_stack_end - .
 #endif
 
 		.align
diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h
deleted file mode 100644
index 6a0f1f524466..000000000000
--- a/arch/arm/boot/compressed/libfdt_env.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ARM_LIBFDT_ENV_H
-#define _ARM_LIBFDT_ENV_H
-
-#include <linux/limits.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <asm/byteorder.h>
-
-#define INT32_MAX	S32_MAX
-#define UINT32_MAX	U32_MAX
-
-typedef __be16 fdt16_t;
-typedef __be32 fdt32_t;
-typedef __be64 fdt64_t;
-
-#define fdt16_to_cpu(x)		be16_to_cpu(x)
-#define cpu_to_fdt16(x)		cpu_to_be16(x)
-#define fdt32_to_cpu(x)		be32_to_cpu(x)
-#define cpu_to_fdt32(x)		cpu_to_be32(x)
-#define fdt64_to_cpu(x)		be64_to_cpu(x)
-#define cpu_to_fdt64(x)		cpu_to_be64(x)
-
-#endif
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
index f82b5962d97e..09ac33f52814 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.S
+++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -63,9 +63,11 @@ SECTIONS
   _etext = .;
 
   .got.plt		: { *(.got.plt) }
+#ifndef CONFIG_EFI_STUB
   _got_start = .;
   .got			: { *(.got) }
   _got_end = .;
+#endif
 
   /* ensure the zImage file size is always a multiple of 64 bits */
   /* (without a dummy byte, ld just ignores the empty section) */
@@ -74,11 +76,14 @@ SECTIONS
 #ifdef CONFIG_EFI_STUB
   .data : ALIGN(4096) {
     __pecoff_data_start = .;
+    _got_start = .;
+    *(.got)
+    _got_end = .;
     /*
      * The EFI stub always executes from RAM, and runs strictly before the
      * decompressor, so we can make an exception for its r/w data, and keep it
      */
-    *(.data.efistub)
+    *(.data.efistub .bss.efistub)
     __pecoff_data_end = .;
 
     /*
diff --git a/arch/arm/boot/dts/am335x-guardian.dts b/arch/arm/boot/dts/am335x-guardian.dts
index 81e0f63e94d3..0ebe9e2c150e 100644
--- a/arch/arm/boot/dts/am335x-guardian.dts
+++ b/arch/arm/boot/dts/am335x-guardian.dts
@@ -105,6 +105,7 @@
 		ti,timers = <&timer7>;
 		pinctrl-names = "default";
 		pinctrl-0 = <&dmtimer7_pins>;
+		ti,clock-source = <0x01>;
 	};
 
 	vmmcsd_fixed: regulator-3v3 {
diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts
index a1fd3e63e86e..92466b9eb6ba 100644
--- a/arch/arm/boot/dts/am3517-evm.dts
+++ b/arch/arm/boot/dts/am3517-evm.dts
@@ -156,6 +156,7 @@
 		pinctrl-0 = <&pwm_pins>;
 		ti,timers = <&timer11>;
 		#pwm-cells = <3>;
+		ti,clock-source = <0x01>;
 	};
 
 	/* HS USB Host PHY on PORT 1 */
diff --git a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi
index f7b82ced4080..381f0e82bb70 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi
@@ -65,6 +65,7 @@
 		pinctrl-0 = <&pwm_pins>;
 		ti,timers = <&timer10>;
 		#pwm-cells = <3>;
+		ti,clock-source = <0x01>;
 	};
 
 };
diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index 409a758c99f1..ecc45862b4f3 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -150,6 +150,7 @@
 		compatible = "ti,omap-dmtimer-pwm";
 		ti,timers = <&timer11>;
 		#pwm-cells = <3>;
+		ti,clock-source = <0x01>;
 	};
 
 	hsusb2_phy: hsusb2_phy {
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 8b83d4a5d309..fe383f5a92fb 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -81,7 +81,7 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_BINFMT_MISC=y
 CONFIG_CMA=y
 CONFIG_ZSMALLOC=m
-CONFIG_PGTABLE_MAPPING=y
+CONFIG_ZSMALLOC_PGTABLE_MAPPING=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index c80b0ebfd02f..4e954b3f7ecd 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -14,7 +14,6 @@
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <crypto/sha1_base.h>
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index 2c3627334335..0071e5e4411a 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -18,7 +18,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <crypto/sha1_base.h>
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
index 215497f011f2..b8a4f79020cf 100644
--- a/arch/arm/crypto/sha256_glue.c
+++ b/arch/arm/crypto/sha256_glue.c
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <crypto/sha.h>
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
index 38645e415196..79820b9e2541 100644
--- a/arch/arm/crypto/sha256_neon_glue.c
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -11,7 +11,6 @@
 
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <crypto/sha.h>
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 3546d294d55f..feac2c8b86f2 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -269,10 +269,9 @@
 	.endif							;\
 	.popsection
 #define ALT_UP_B(label)					\
-	.equ	up_b_offset, label - 9998b			;\
 	.pushsection ".alt.smp.init", "a"			;\
 	.long	9998b						;\
-	W(b)	. + up_b_offset					;\
+	W(b)	. + (label - 9998b)					;\
 	.popsection
 #else
 #define ALT_SMP(instr...)
diff --git a/arch/arm/include/asm/checksum.h b/arch/arm/include/asm/checksum.h
index 20043e0ebb07..ed6073fee338 100644
--- a/arch/arm/include/asm/checksum.h
+++ b/arch/arm/include/asm/checksum.h
@@ -40,6 +40,20 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
 __wsum
 csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr);
 
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+static inline
+__wsum csum_and_copy_from_user (const void __user *src, void *dst,
+				      int len, __wsum sum, int *err_ptr)
+{
+	if (access_ok(src, len))
+		return csum_partial_copy_from_user(src, dst, len, sum, err_ptr);
+
+	if (len)
+		*err_ptr = -EFAULT;
+
+	return sum;
+}
+
 /*
  * 	Fold a partial checksum without adding pseudo headers
  */
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index 5ac46e2860bc..9383f236e795 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -50,14 +50,6 @@ void efi_virtmap_unload(void);
 
 /* arch specific definitions used by the stub code */
 
-#define efi_bs_call(func, ...)	efi_system_table()->boottime->func(__VA_ARGS__)
-#define efi_rt_call(func, ...)	efi_system_table()->runtime->func(__VA_ARGS__)
-#define efi_is_native()		(true)
-
-#define efi_table_attr(inst, attr)	(inst->attr)
-
-#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__)
-
 struct screen_info *alloc_screen_info(void);
 void free_screen_info(struct screen_info *si);
 
diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h
index 79fa327238e8..e1cb04ed5008 100644
--- a/arch/arm/include/asm/floppy.h
+++ b/arch/arm/include/asm/floppy.h
@@ -9,20 +9,20 @@
 #ifndef __ASM_ARM_FLOPPY_H
 #define __ASM_ARM_FLOPPY_H
 
-#define fd_outb(val,port)						\
+#define fd_outb(val, base, reg)						\
 	do {								\
 		int new_val = (val);					\
-		if (((port) & 7) == FD_DOR) {				\
+		if ((reg) == FD_DOR) {					\
 			if (new_val & 0xf0)				\
 				new_val = (new_val & 0x0c) |		\
 					  floppy_selects[new_val & 3];	\
 			else						\
 				new_val &= 0x0c;			\
 		}							\
-		outb(new_val, (port));					\
+		outb(new_val, (base) + (reg));				\
 	} while(0)
 
-#define fd_inb(port)		inb((port))
+#define fd_inb(base, reg)	inb((base) + (reg))
 #define fd_request_irq()	request_irq(IRQ_FLOPPYDISK,floppy_interrupt,\
 					    0,"floppy",NULL)
 #define fd_free_irq()		free_irq(IRQ_FLOPPYDISK,NULL)
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 98bdea51089d..82e96ac83684 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -7,7 +7,6 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/string.h>
-#include <linux/cryptohash.h>
 #include <linux/delay.h>
 #include <linux/in6.h>
 #include <linux/syscalls.h>
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index deef17f34bd2..af0a8500a24e 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -55,6 +55,13 @@ void *module_alloc(unsigned long size)
 }
 #endif
 
+bool module_init_section(const char *name)
+{
+	return strstarts(name, ".init") ||
+		strstarts(name, ".ARM.extab.init") ||
+		strstarts(name, ".ARM.exidx.init");
+}
+
 bool module_exit_section(const char *name)
 {
 	return strstarts(name, ".exit") ||
@@ -409,8 +416,17 @@ module_arch_cleanup(struct module *mod)
 #ifdef CONFIG_ARM_UNWIND
 	int i;
 
-	for (i = 0; i < ARM_SEC_MAX; i++)
-		if (mod->arch.unwind[i])
-			unwind_table_del(mod->arch.unwind[i]);
+	for (i = 0; i < ARM_SEC_MAX; i++) {
+		unwind_table_del(mod->arch.unwind[i]);
+		mod->arch.unwind[i] = NULL;
+	}
+#endif
+}
+
+void __weak module_arch_freeing_init(struct module *mod)
+{
+#ifdef CONFIG_ARM_UNWIND
+	unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]);
+	mod->arch.unwind[ARM_SEC_INIT] = NULL;
 #endif
 }
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 17bd32b22371..0203e545bbc8 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -253,20 +253,15 @@ asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
 {
 	struct oabi_epoll_event user;
 	struct epoll_event kernel;
-	mm_segment_t fs;
-	long ret;
 
-	if (op == EPOLL_CTL_DEL)
-		return sys_epoll_ctl(epfd, op, fd, NULL);
-	if (copy_from_user(&user, event, sizeof(user)))
+	if (ep_op_has_event(op) &&
+	    copy_from_user(&user, event, sizeof(user)))
 		return -EFAULT;
+
 	kernel.events = user.events;
 	kernel.data   = user.data;
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = sys_epoll_ctl(epfd, op, fd, &kernel);
-	set_fs(fs);
-	return ret;
+
+	return do_epoll_ctl(epfd, op, fd, &kernel, false);
 }
 
 asmlinkage long sys_oabi_epoll_wait(int epfd,
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 5bc82e2671c6..351f891b4842 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -104,6 +104,14 @@ static struct fixed_voltage_config shannon_cf_vcc_pdata __initdata = {
 	.enabled_at_boot = 1,
 };
 
+static struct gpiod_lookup_table shannon_display_gpio_table = {
+	.dev_id = "sa11x0-fb",
+	.table = {
+		GPIO_LOOKUP("gpio", 22, "shannon-lcden", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static void __init shannon_init(void)
 {
 	sa11x0_register_fixed_regulator(0, &shannon_cf_vcc_pdata,
@@ -113,6 +121,7 @@ static void __init shannon_init(void)
 	sa11x0_register_pcmcia(0, &shannon_pcmcia0_gpio_table);
 	sa11x0_register_pcmcia(1, &shannon_pcmcia1_gpio_table);
 	sa11x0_ppc_configure_mcp();
+	gpiod_add_lookup_table(&shannon_display_gpio_table);
 	sa11x0_register_lcd(&shannon_lcd_info);
 	sa11x0_register_mtd(&shannon_flash_data, &shannon_flash_resource, 1);
 	sa11x0_register_mcp(&shannon_mcp_data);
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 5461d589a1e2..60ac7c5999a9 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -5,6 +5,7 @@
  *  VMA_VM_FLAGS
  *  VM_EXEC
  */
+#include <linux/const.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 
@@ -30,7 +31,7 @@
  * act_mm - get current->active_mm
  */
 	.macro	act_mm, rd
-	bic	\rd, sp, #8128
+	bic	\rd, sp, #(THREAD_SIZE - 1) & ~63
 	bic	\rd, \rd, #63
 	ldr	\rd, [\rd, #TI_TASK]
 	.if (TSK_ACTIVE_MM > IMM12_MASK)
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 4d1cf74a2caa..d5cae5ffede0 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -451,3 +451,4 @@
 435	common	clone3				sys_clone3
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d513f461957..552d36cacc05 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -9,6 +9,7 @@ config ARM64
 	select ACPI_MCFG if (ACPI && PCI)
 	select ACPI_SPCR_TABLE if ACPI
 	select ACPI_PPTT if ACPI
+	select ARCH_BINFMT_ELF_STATE
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_PREP_COHERENT
@@ -33,6 +34,7 @@ config ARM64
 	select ARCH_HAS_SYSCALL_WRAPPER
 	select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+	select ARCH_HAVE_ELF_PROT
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK if !PREEMPTION
 	select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION
@@ -62,9 +64,12 @@ config ARM64
 	select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION
 	select ARCH_KEEP_MEMBLOCK
 	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_USE_GNU_PROPERTY
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_USE_SYM_ANNOTATIONS
 	select ARCH_SUPPORTS_MEMORY_FAILURE
+	select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
 	select ARCH_SUPPORTS_NUMA_BALANCING
@@ -525,13 +530,13 @@ config ARM64_ERRATUM_1418040
 
 	  If unsure, say Y.
 
-config ARM64_WORKAROUND_SPECULATIVE_AT_VHE
+config ARM64_WORKAROUND_SPECULATIVE_AT
 	bool
 
 config ARM64_ERRATUM_1165522
-	bool "Cortex-A76: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
+	bool "Cortex-A76: 1165522: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
 	default y
-	select ARM64_WORKAROUND_SPECULATIVE_AT_VHE
+	select ARM64_WORKAROUND_SPECULATIVE_AT
 	help
 	  This option adds a workaround for ARM Cortex-A76 erratum 1165522.
 
@@ -541,10 +546,23 @@ config ARM64_ERRATUM_1165522
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_1319367
+	bool "Cortex-A57/A72: 1319537: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
+	default y
+	select ARM64_WORKAROUND_SPECULATIVE_AT
+	help
+	  This option adds work arounds for ARM Cortex-A57 erratum 1319537
+	  and A72 erratum 1319367
+
+	  Cortex-A57 and A72 cores could end-up with corrupted TLBs by
+	  speculating an AT instruction during a guest context switch.
+
+	  If unsure, say Y.
+
 config ARM64_ERRATUM_1530923
-	bool "Cortex-A55: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
+	bool "Cortex-A55: 1530923: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
 	default y
-	select ARM64_WORKAROUND_SPECULATIVE_AT_VHE
+	select ARM64_WORKAROUND_SPECULATIVE_AT
 	help
 	  This option adds a workaround for ARM Cortex-A55 erratum 1530923.
 
@@ -554,6 +572,9 @@ config ARM64_ERRATUM_1530923
 
 	  If unsure, say Y.
 
+config ARM64_WORKAROUND_REPEAT_TLBI
+	bool
+
 config ARM64_ERRATUM_1286807
 	bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
 	default y
@@ -570,22 +591,6 @@ config ARM64_ERRATUM_1286807
 	  invalidated has been observed by other observers. The
 	  workaround repeats the TLBI+DSB operation.
 
-config ARM64_WORKAROUND_SPECULATIVE_AT_NVHE
-	bool
-
-config ARM64_ERRATUM_1319367
-	bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
-	default y
-	select ARM64_WORKAROUND_SPECULATIVE_AT_NVHE
-	help
-	  This option adds work arounds for ARM Cortex-A57 erratum 1319537
-	  and A72 erratum 1319367
-
-	  Cortex-A57 and A72 cores could end-up with corrupted TLBs by
-	  speculating an AT instruction during a guest context switch.
-
-	  If unsure, say Y.
-
 config ARM64_ERRATUM_1463225
 	bool "Cortex-A76: Software Step might prevent interrupt recognition"
 	default y
@@ -695,6 +700,35 @@ config CAVIUM_TX2_ERRATUM_219
 
 	  If unsure, say Y.
 
+config FUJITSU_ERRATUM_010001
+	bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly"
+	default y
+	help
+	  This option adds a workaround for Fujitsu-A64FX erratum E#010001.
+	  On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory
+	  accesses may cause undefined fault (Data abort, DFSC=0b111111).
+	  This fault occurs under a specific hardware condition when a
+	  load/store instruction performs an address translation using:
+	  case-1  TTBR0_EL1 with TCR_EL1.NFD0 == 1.
+	  case-2  TTBR0_EL2 with TCR_EL2.NFD0 == 1.
+	  case-3  TTBR1_EL1 with TCR_EL1.NFD1 == 1.
+	  case-4  TTBR1_EL2 with TCR_EL2.NFD1 == 1.
+
+	  The workaround is to ensure these bits are clear in TCR_ELx.
+	  The workaround only affects the Fujitsu-A64FX.
+
+	  If unsure, say Y.
+
+config HISILICON_ERRATUM_161600802
+	bool "Hip07 161600802: Erroneous redistributor VLPI base"
+	default y
+	help
+	  The HiSilicon Hip07 SoC uses the wrong redistributor base
+	  when issued ITS commands such as VMOVP and VMAPP, and requires
+	  a 128kB offset to be applied to the target address in this commands.
+
+	  If unsure, say Y.
+
 config QCOM_FALKOR_ERRATUM_1003
 	bool "Falkor E1003: Incorrect translation due to ASID change"
 	default y
@@ -706,9 +740,6 @@ config QCOM_FALKOR_ERRATUM_1003
 	  is unchanged. Work around the erratum by invalidating the walk cache
 	  entries for the trampoline before entering the kernel proper.
 
-config ARM64_WORKAROUND_REPEAT_TLBI
-	bool
-
 config QCOM_FALKOR_ERRATUM_1009
 	bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
 	default y
@@ -730,25 +761,6 @@ config QCOM_QDF2400_ERRATUM_0065
 
 	  If unsure, say Y.
 
-config SOCIONEXT_SYNQUACER_PREITS
-	bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
-	default y
-	help
-	  Socionext Synquacer SoCs implement a separate h/w block to generate
-	  MSI doorbell writes with non-zero values for the device ID.
-
-	  If unsure, say Y.
-
-config HISILICON_ERRATUM_161600802
-	bool "Hip07 161600802: Erroneous redistributor VLPI base"
-	default y
-	help
-	  The HiSilicon Hip07 SoC uses the wrong redistributor base
-	  when issued ITS commands such as VMOVP and VMAPP, and requires
-	  a 128kB offset to be applied to the target address in this commands.
-
-	  If unsure, say Y.
-
 config QCOM_FALKOR_ERRATUM_E1041
 	bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
 	default y
@@ -759,22 +771,12 @@ config QCOM_FALKOR_ERRATUM_E1041
 
 	  If unsure, say Y.
 
-config FUJITSU_ERRATUM_010001
-	bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly"
+config SOCIONEXT_SYNQUACER_PREITS
+	bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
 	default y
 	help
-	  This option adds a workaround for Fujitsu-A64FX erratum E#010001.
-	  On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory
-	  accesses may cause undefined fault (Data abort, DFSC=0b111111).
-	  This fault occurs under a specific hardware condition when a
-	  load/store instruction performs an address translation using:
-	  case-1  TTBR0_EL1 with TCR_EL1.NFD0 == 1.
-	  case-2  TTBR0_EL2 with TCR_EL2.NFD0 == 1.
-	  case-3  TTBR1_EL1 with TCR_EL1.NFD1 == 1.
-	  case-4  TTBR1_EL2 with TCR_EL2.NFD1 == 1.
-
-	  The workaround is to ensure these bits are clear in TCR_ELx.
-	  The workaround only affects the Fujitsu-A64FX.
+	  Socionext Synquacer SoCs implement a separate h/w block to generate
+	  MSI doorbell writes with non-zero values for the device ID.
 
 	  If unsure, say Y.
 
@@ -1026,6 +1028,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 	def_bool y if PGTABLE_LEVELS > 2
 
+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+	def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	---help---
@@ -1585,6 +1591,48 @@ endmenu
 
 menu "ARMv8.5 architectural features"
 
+config ARM64_BTI
+	bool "Branch Target Identification support"
+	default y
+	help
+	  Branch Target Identification (part of the ARMv8.5 Extensions)
+	  provides a mechanism to limit the set of locations to which computed
+	  branch instructions such as BR or BLR can jump.
+
+	  To make use of BTI on CPUs that support it, say Y.
+
+	  BTI is intended to provide complementary protection to other control
+	  flow integrity protection mechanisms, such as the Pointer
+	  authentication mechanism provided as part of the ARMv8.3 Extensions.
+	  For this reason, it does not make sense to enable this option without
+	  also enabling support for pointer authentication.  Thus, when
+	  enabling this option you should also select ARM64_PTR_AUTH=y.
+
+	  Userspace binaries must also be specifically compiled to make use of
+	  this mechanism.  If you say N here or the hardware does not support
+	  BTI, such binaries can still run, but you get no additional
+	  enforcement of branch destinations.
+
+config ARM64_BTI_KERNEL
+	bool "Use Branch Target Identification for kernel"
+	default y
+	depends on ARM64_BTI
+	depends on ARM64_PTR_AUTH
+	depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI
+	# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697
+	depends on !CC_IS_GCC || GCC_VERSION >= 100100
+	depends on !(CC_IS_CLANG && GCOV_KERNEL)
+	depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
+	help
+	  Build the kernel with Branch Target Identification annotations
+	  and enable enforcement of this for kernel code. When this option
+	  is enabled and the system supports BTI all kernel code including
+	  modular code must have BTI enabled.
+
+config CC_HAS_BRANCH_PROT_PAC_RET_BTI
+	# GCC 9 or later, clang 8 or later
+	def_bool $(cc-option,-mbranch-protection=pac-ret+leaf+bti)
+
 config ARM64_E0PD
 	bool "Enable support for E0PD"
 	default y
@@ -1786,7 +1834,7 @@ config EFI
 	select EFI_PARAMS_FROM_FDT
 	select EFI_RUNTIME_WRAPPERS
 	select EFI_STUB
-	select EFI_ARMSTUB
+	select EFI_GENERIC_STUB
 	default y
 	help
 	  This option provides support for runtime services provided
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 85e4149cc5d5..650e1185c190 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -12,7 +12,6 @@
 
 LDFLAGS_vmlinux	:=--no-undefined -X
 CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
-GZFLAGS		:=-9
 
 ifeq ($(CONFIG_RELOCATABLE), y)
 # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
@@ -71,7 +70,14 @@ branch-prot-flags-y += $(call cc-option,-mbranch-protection=none)
 
 ifeq ($(CONFIG_ARM64_PTR_AUTH),y)
 branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=all
+# We enable additional protection for leaf functions as there is some
+# narrow potential for ROP protection benefits and no substantial
+# performance impact has been observed.
+ifeq ($(CONFIG_ARM64_BTI_KERNEL),y)
+branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=pac-ret+leaf+bti
+else
 branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=pac-ret+leaf
+endif
 # -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the
 # compiler to generate them and consequently to break the single image contract
 # we pass it only to the assembler. This option is utilized only in case of non
@@ -81,6 +87,10 @@ endif
 
 KBUILD_CFLAGS += $(branch-prot-flags-y)
 
+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS	+= -ffixed-x18
+endif
+
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS	+= -mbig-endian
 CHECKFLAGS	+= -D__AARCH64EB__
@@ -118,7 +128,7 @@ TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \
 		 int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \
 		 rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}")
 else
-TEXT_OFFSET := 0x00080000
+TEXT_OFFSET := 0x0
 endif
 
 ifeq ($(CONFIG_KASAN_SW_TAGS), y)
@@ -131,7 +141,7 @@ KBUILD_CFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT)
 KBUILD_CPPFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT)
 KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT)
 
-export	TEXT_OFFSET GZFLAGS
+export	TEXT_OFFSET
 
 core-y		+= arch/arm64/
 libs-y		:= arch/arm64/lib/ $(libs-y)
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index ed5409c6abf4..395bbf64b2ab 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -158,7 +158,6 @@ static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm,
 					    unsigned int key_len)
 {
 	struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
-	SHASH_DESC_ON_STACK(desc, ctx->hash);
 	u8 digest[SHA256_DIGEST_SIZE];
 	int ret;
 
@@ -166,8 +165,7 @@ static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm,
 	if (ret)
 		return ret;
 
-	desc->tfm = ctx->hash;
-	crypto_shash_digest(desc, in_key, key_len, digest);
+	crypto_shash_tfm_digest(ctx->hash, in_key, key_len, digest);
 
 	return aes_expandkey(&ctx->key2, digest, sizeof(digest));
 }
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index 5a95c2628fbf..111d9c9abddd 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -66,7 +66,7 @@
 #include <asm/assembler.h>
 
 	.text
-	.cpu		generic+crypto
+	.arch		armv8-a+crypto
 
 	init_crc	.req	w19
 	buf		.req	x20
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index ddf4a0d85c1c..77bc6e72abae 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -12,7 +12,6 @@
 #include <crypto/internal/simd.h>
 #include <crypto/sha.h>
 #include <crypto/sha256_base.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <linux/string.h>
 
diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c
index 78d3083de6b7..370ccb29602f 100644
--- a/arch/arm64/crypto/sha512-glue.c
+++ b/arch/arm64/crypto/sha512-glue.c
@@ -6,7 +6,6 @@
  */
 
 #include <crypto/internal/hash.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <crypto/sha.h>
diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index ce2a8486992b..52dead2a8640 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -39,25 +39,58 @@ alternative_if ARM64_HAS_GENERIC_AUTH
 alternative_else_nop_endif
 	.endm
 
-	.macro ptrauth_keys_install_kernel tsk, sync, tmp1, tmp2, tmp3
-alternative_if ARM64_HAS_ADDRESS_AUTH
+	.macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
 	mov	\tmp1, #THREAD_KEYS_KERNEL
 	add	\tmp1, \tsk, \tmp1
 	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_KERNEL_KEY_APIA]
 	msr_s	SYS_APIAKEYLO_EL1, \tmp2
 	msr_s	SYS_APIAKEYHI_EL1, \tmp3
-	.if     \sync == 1
+	.endm
+
+	.macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+alternative_if ARM64_HAS_ADDRESS_AUTH
+	__ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3
+alternative_else_nop_endif
+	.endm
+
+	.macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3
+alternative_if ARM64_HAS_ADDRESS_AUTH
+	__ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3
 	isb
-	.endif
 alternative_else_nop_endif
 	.endm
 
+	.macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
+	mrs	\tmp1, id_aa64isar1_el1
+	ubfx	\tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8
+	cbz	\tmp1, .Lno_addr_auth\@
+	mov_q	\tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
+			SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
+	mrs	\tmp2, sctlr_el1
+	orr	\tmp2, \tmp2, \tmp1
+	msr	sctlr_el1, \tmp2
+	__ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3
+	isb
+.Lno_addr_auth\@:
+	.endm
+
+	.macro ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
+alternative_if_not ARM64_HAS_ADDRESS_AUTH
+	b	.Lno_addr_auth\@
+alternative_else_nop_endif
+	__ptrauth_keys_init_cpu \tsk, \tmp1, \tmp2, \tmp3
+.Lno_addr_auth\@:
+	.endm
+
 #else /* CONFIG_ARM64_PTR_AUTH */
 
 	.macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
 	.endm
 
-	.macro ptrauth_keys_install_kernel tsk, sync, tmp1, tmp2, tmp3
+	.macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+	.endm
+
+	.macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3
 	.endm
 
 #endif /* CONFIG_ARM64_PTR_AUTH */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 0bff325117b4..54d181177656 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -736,4 +736,54 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 .Lyield_out_\@ :
 	.endm
 
+/*
+ * This macro emits a program property note section identifying
+ * architecture features which require special handling, mainly for
+ * use in assembly files included in the VDSO.
+ */
+
+#define NT_GNU_PROPERTY_TYPE_0  5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND      0xc0000000
+
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI      (1U << 0)
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC      (1U << 1)
+
+#ifdef CONFIG_ARM64_BTI_KERNEL
+#define GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT		\
+		((GNU_PROPERTY_AARCH64_FEATURE_1_BTI |	\
+		  GNU_PROPERTY_AARCH64_FEATURE_1_PAC))
+#endif
+
+#ifdef GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT
+.macro emit_aarch64_feature_1_and, feat=GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT
+	.pushsection .note.gnu.property, "a"
+	.align  3
+	.long   2f - 1f
+	.long   6f - 3f
+	.long   NT_GNU_PROPERTY_TYPE_0
+1:      .string "GNU"
+2:
+	.align  3
+3:      .long   GNU_PROPERTY_AARCH64_FEATURE_1_AND
+	.long   5f - 4f
+4:
+	/*
+	 * This is described with an array of char in the Linux API
+	 * spec but the text and all other usage (including binutils,
+	 * clang and GCC) treat this as a 32 bit value so no swizzling
+	 * is required for big endian.
+	 */
+	.long   \feat
+5:
+	.align  3
+6:
+	.popsection
+.endm
+
+#else
+.macro emit_aarch64_feature_1_and, feat=0
+.endm
+
+#endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */
+
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index e6cca3d4acf7..ce50c1f1f1ea 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -79,7 +79,7 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
 	 * IPI all online CPUs so that they undergo a context synchronization
 	 * event and are forced to refetch the new instructions.
 	 */
-#ifdef CONFIG_KGDB
+
 	/*
 	 * KGDB performs cache maintenance with interrupts disabled, so we
 	 * will deadlock trying to IPI the secondary CPUs. In theory, we can
@@ -89,9 +89,9 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
 	 * the patching operation, so we don't need extra IPIs here anyway.
 	 * In which case, add a KGDB-specific bodge and return early.
 	 */
-	if (kgdb_connected && irqs_disabled())
+	if (in_dbg_master())
 		return;
-#endif
+
 	kick_all_cpus_sync();
 }
 
diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h
index eece20d2c55f..51a7ce87cdfe 100644
--- a/arch/arm64/include/asm/compiler.h
+++ b/arch/arm64/include/asm/compiler.h
@@ -2,8 +2,6 @@
 #ifndef __ASM_COMPILER_H
 #define __ASM_COMPILER_H
 
-#if defined(CONFIG_ARM64_PTR_AUTH)
-
 /*
  * The EL0/EL1 pointer bits used by a pointer authentication code.
  * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply.
@@ -19,6 +17,4 @@
 #define __builtin_return_address(val)					\
 	(void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val)))
 
-#endif /* CONFIG_ARM64_PTR_AUTH */
-
 #endif /* __ASM_COMPILER_H */
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index b4a40535a3d8..7faae6ff3ab4 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -33,6 +33,7 @@ struct cpuinfo_arm64 {
 	u64		reg_id_aa64zfr0;
 
 	u32		reg_id_dfr0;
+	u32		reg_id_dfr1;
 	u32		reg_id_isar0;
 	u32		reg_id_isar1;
 	u32		reg_id_isar2;
@@ -44,8 +45,11 @@ struct cpuinfo_arm64 {
 	u32		reg_id_mmfr1;
 	u32		reg_id_mmfr2;
 	u32		reg_id_mmfr3;
+	u32		reg_id_mmfr4;
+	u32		reg_id_mmfr5;
 	u32		reg_id_pfr0;
 	u32		reg_id_pfr1;
+	u32		reg_id_pfr2;
 
 	u32		reg_mvfr0;
 	u32		reg_mvfr1;
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 8eb5a088ae65..d7b3bb0cb180 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -44,7 +44,7 @@
 #define ARM64_SSBS				34
 #define ARM64_WORKAROUND_1418040		35
 #define ARM64_HAS_SB				36
-#define ARM64_WORKAROUND_SPECULATIVE_AT_VHE	37
+#define ARM64_WORKAROUND_SPECULATIVE_AT		37
 #define ARM64_HAS_ADDRESS_AUTH_ARCH		38
 #define ARM64_HAS_ADDRESS_AUTH_IMP_DEF		39
 #define ARM64_HAS_GENERIC_AUTH_ARCH		40
@@ -55,13 +55,14 @@
 #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM	45
 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM	46
 #define ARM64_WORKAROUND_1542419		47
-#define ARM64_WORKAROUND_SPECULATIVE_AT_NVHE	48
-#define ARM64_HAS_E0PD				49
-#define ARM64_HAS_RNG				50
-#define ARM64_HAS_AMU_EXTN			51
-#define ARM64_HAS_ADDRESS_AUTH			52
-#define ARM64_HAS_GENERIC_AUTH			53
+#define ARM64_HAS_E0PD				48
+#define ARM64_HAS_RNG				49
+#define ARM64_HAS_AMU_EXTN			50
+#define ARM64_HAS_ADDRESS_AUTH			51
+#define ARM64_HAS_GENERIC_AUTH			52
+#define ARM64_HAS_32BIT_EL1			53
+#define ARM64_BTI				54
 
-#define ARM64_NCAPS				54
+#define ARM64_NCAPS				55
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index afe08251ff95..5d1f4ae42799 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -551,6 +551,13 @@ static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
 		cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
 }
 
+static inline bool id_aa64pfr0_32bit_el1(u64 pfr0)
+{
+	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT);
+
+	return val == ID_AA64PFR0_EL1_32BIT_64BIT;
+}
+
 static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
 {
 	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
@@ -680,6 +687,11 @@ static inline bool system_has_prio_mask_debugging(void)
 	       system_uses_irq_prio_masking();
 }
 
+static inline bool system_supports_bti(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI);
+}
+
 #define ARM64_BP_HARDEN_UNKNOWN		-1
 #define ARM64_BP_HARDEN_WA_NEEDED	0
 #define ARM64_BP_HARDEN_NOT_REQUIRED	1
@@ -745,6 +757,24 @@ static inline bool cpu_has_hw_af(void)
 extern bool cpu_has_amu_feat(int cpu);
 #endif
 
+static inline unsigned int get_vmid_bits(u64 mmfr1)
+{
+	int vmid_bits;
+
+	vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1,
+						ID_AA64MMFR1_VMIDBITS_SHIFT);
+	if (vmid_bits == ID_AA64MMFR1_VMIDBITS_16)
+		return 16;
+
+	/*
+	 * Return the default here even if any reserved
+	 * value is fetched from the system register.
+	 */
+	return 8;
+}
+
+u32 get_kvm_ipa_limit(void);
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7619f473155f..e5ceea213e39 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -125,5 +125,7 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs)
 
 int aarch32_break_handler(struct pt_regs *regs);
 
+void debug_traps_init(void);
+
 #endif	/* __ASSEMBLY */
 #endif	/* __ASM_DEBUG_MONITORS_H */
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 45e821222774..d4ab3f73e7a3 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -86,14 +86,6 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base,
 	return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1));
 }
 
-#define efi_bs_call(func, ...)	efi_system_table()->boottime->func(__VA_ARGS__)
-#define efi_rt_call(func, ...)	efi_system_table()->runtime->func(__VA_ARGS__)
-#define efi_is_native()		(true)
-
-#define efi_table_attr(inst, attr)	(inst->attr)
-
-#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__)
-
 #define alloc_screen_info(x...)		&screen_info
 
 static inline void free_screen_info(struct screen_info *si)
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index b618017205a3..4f00d50585a4 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -114,7 +114,11 @@
 
 #ifndef __ASSEMBLY__
 
+#include <uapi/linux/elf.h>
 #include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/types.h>
 #include <asm/processor.h> /* for signal_minsigstksz, used by ARCH_DLINFO */
 
 typedef unsigned long elf_greg_t;
@@ -224,6 +228,52 @@ extern int aarch32_setup_additional_pages(struct linux_binprm *bprm,
 
 #endif /* CONFIG_COMPAT */
 
+struct arch_elf_state {
+	int flags;
+};
+
+#define ARM64_ELF_BTI		(1 << 0)
+
+#define INIT_ARCH_ELF_STATE {			\
+	.flags = 0,				\
+}
+
+static inline int arch_parse_elf_property(u32 type, const void *data,
+					  size_t datasz, bool compat,
+					  struct arch_elf_state *arch)
+{
+	/* No known properties for AArch32 yet */
+	if (IS_ENABLED(CONFIG_COMPAT) && compat)
+		return 0;
+
+	if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) {
+		const u32 *p = data;
+
+		if (datasz != sizeof(*p))
+			return -ENOEXEC;
+
+		if (system_supports_bti() &&
+		    (*p & GNU_PROPERTY_AARCH64_FEATURE_1_BTI))
+			arch->flags |= ARM64_ELF_BTI;
+	}
+
+	return 0;
+}
+
+static inline int arch_elf_pt_proc(void *ehdr, void *phdr,
+				   struct file *f, bool is_interp,
+				   struct arch_elf_state *state)
+{
+	return 0;
+}
+
+static inline int arch_check_elf(void *ehdr, bool has_interp,
+				 void *interp_ehdr,
+				 struct arch_elf_state *state)
+{
+	return 0;
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 6a395a7e6707..035003acfa87 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -22,7 +22,7 @@
 #define ESR_ELx_EC_PAC		(0x09)	/* EL2 and above */
 /* Unallocated EC: 0x0A - 0x0B */
 #define ESR_ELx_EC_CP14_64	(0x0C)
-/* Unallocated EC: 0x0d */
+#define ESR_ELx_EC_BTI		(0x0D)
 #define ESR_ELx_EC_ILL		(0x0E)
 /* Unallocated EC: 0x0F - 0x10 */
 #define ESR_ELx_EC_SVC32	(0x11)
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 7a6e81ca23a8..7577a754d443 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr)
 asmlinkage void enter_from_user_mode(void);
 void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
+void do_bti(struct pt_regs *regs);
 asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
 void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
 			struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index 87ad961f3c97..985493af704b 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -32,30 +32,70 @@ u64 smp_irq_stat_cpu(unsigned int cpu);
 
 struct nmi_ctx {
 	u64 hcr;
+	unsigned int cnt;
 };
 
 DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts);
 
-#define arch_nmi_enter()							\
-	do {									\
-		if (is_kernel_in_hyp_mode()) {					\
-			struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts);	\
-			nmi_ctx->hcr = read_sysreg(hcr_el2);			\
-			if (!(nmi_ctx->hcr & HCR_TGE)) {			\
-				write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2);	\
-				isb();						\
-			}							\
-		}								\
-	} while (0)
+#define arch_nmi_enter()						\
+do {									\
+	struct nmi_ctx *___ctx;						\
+	u64 ___hcr;							\
+									\
+	if (!is_kernel_in_hyp_mode())					\
+		break;							\
+									\
+	___ctx = this_cpu_ptr(&nmi_contexts);				\
+	if (___ctx->cnt) {						\
+		___ctx->cnt++;						\
+		break;							\
+	}								\
+									\
+	___hcr = read_sysreg(hcr_el2);					\
+	if (!(___hcr & HCR_TGE)) {					\
+		write_sysreg(___hcr | HCR_TGE, hcr_el2);		\
+		isb();							\
+	}								\
+	/*								\
+	 * Make sure the sysreg write is performed before ___ctx->cnt	\
+	 * is set to 1. NMIs that see cnt == 1 will rely on us.		\
+	 */								\
+	barrier();							\
+	___ctx->cnt = 1;                                                \
+	/*								\
+	 * Make sure ___ctx->cnt is set before we save ___hcr. We	\
+	 * don't want ___ctx->hcr to be overwritten.			\
+	 */								\
+	barrier();							\
+	___ctx->hcr = ___hcr;						\
+} while (0)
 
-#define arch_nmi_exit()								\
-	do {									\
-		if (is_kernel_in_hyp_mode()) {					\
-			struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts);	\
-			if (!(nmi_ctx->hcr & HCR_TGE))				\
-				write_sysreg(nmi_ctx->hcr, hcr_el2);		\
-		}								\
-	} while (0)
+#define arch_nmi_exit()							\
+do {									\
+	struct nmi_ctx *___ctx;						\
+	u64 ___hcr;							\
+									\
+	if (!is_kernel_in_hyp_mode())					\
+		break;							\
+									\
+	___ctx = this_cpu_ptr(&nmi_contexts);				\
+	___hcr = ___ctx->hcr;						\
+	/*								\
+	 * Make sure we read ___ctx->hcr before we release		\
+	 * ___ctx->cnt as it makes ___ctx->hcr updatable again.		\
+	 */								\
+	barrier();							\
+	___ctx->cnt--;							\
+	/*								\
+	 * Make sure ___ctx->cnt release is visible before we		\
+	 * restore the sysreg. Otherwise a new NMI occurring		\
+	 * right after write_sysreg() can be fooled and think		\
+	 * we secured things for it.					\
+	 */								\
+	barrier();							\
+	if (!___ctx->cnt && !(___hcr & HCR_TGE))			\
+		write_sysreg(___hcr, hcr_el2);				\
+} while (0)
 
 static inline void ack_bad_irq(unsigned int irq)
 {
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 0f00265248b5..d683bcbf1e7c 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -94,6 +94,7 @@
 #define KERNEL_HWCAP_BF16		__khwcap2_feature(BF16)
 #define KERNEL_HWCAP_DGH		__khwcap2_feature(DGH)
 #define KERNEL_HWCAP_RNG		__khwcap2_feature(RNG)
+#define KERNEL_HWCAP_BTI		__khwcap2_feature(BTI)
 
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index bb313dde58a4..0bc46149e491 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -39,13 +39,37 @@ enum aarch64_insn_encoding_class {
 					 * system instructions */
 };
 
-enum aarch64_insn_hint_op {
+enum aarch64_insn_hint_cr_op {
 	AARCH64_INSN_HINT_NOP	= 0x0 << 5,
 	AARCH64_INSN_HINT_YIELD	= 0x1 << 5,
 	AARCH64_INSN_HINT_WFE	= 0x2 << 5,
 	AARCH64_INSN_HINT_WFI	= 0x3 << 5,
 	AARCH64_INSN_HINT_SEV	= 0x4 << 5,
 	AARCH64_INSN_HINT_SEVL	= 0x5 << 5,
+
+	AARCH64_INSN_HINT_XPACLRI    = 0x07 << 5,
+	AARCH64_INSN_HINT_PACIA_1716 = 0x08 << 5,
+	AARCH64_INSN_HINT_PACIB_1716 = 0x0A << 5,
+	AARCH64_INSN_HINT_AUTIA_1716 = 0x0C << 5,
+	AARCH64_INSN_HINT_AUTIB_1716 = 0x0E << 5,
+	AARCH64_INSN_HINT_PACIAZ     = 0x18 << 5,
+	AARCH64_INSN_HINT_PACIASP    = 0x19 << 5,
+	AARCH64_INSN_HINT_PACIBZ     = 0x1A << 5,
+	AARCH64_INSN_HINT_PACIBSP    = 0x1B << 5,
+	AARCH64_INSN_HINT_AUTIAZ     = 0x1C << 5,
+	AARCH64_INSN_HINT_AUTIASP    = 0x1D << 5,
+	AARCH64_INSN_HINT_AUTIBZ     = 0x1E << 5,
+	AARCH64_INSN_HINT_AUTIBSP    = 0x1F << 5,
+
+	AARCH64_INSN_HINT_ESB  = 0x10 << 5,
+	AARCH64_INSN_HINT_PSB  = 0x11 << 5,
+	AARCH64_INSN_HINT_TSB  = 0x12 << 5,
+	AARCH64_INSN_HINT_CSDB = 0x14 << 5,
+
+	AARCH64_INSN_HINT_BTI   = 0x20 << 5,
+	AARCH64_INSN_HINT_BTIC  = 0x22 << 5,
+	AARCH64_INSN_HINT_BTIJ  = 0x24 << 5,
+	AARCH64_INSN_HINT_BTIJC = 0x26 << 5,
 };
 
 enum aarch64_insn_imm_type {
@@ -344,7 +368,7 @@ __AARCH64_INSN_FUNCS(msr_reg,	0xFFF00000, 0xD5100000)
 
 #undef	__AARCH64_INSN_FUNCS
 
-bool aarch64_insn_is_nop(u32 insn);
+bool aarch64_insn_is_steppable_hint(u32 insn);
 bool aarch64_insn_is_branch_imm(u32 insn);
 
 static inline bool aarch64_insn_is_adr_adrp(u32 insn)
@@ -370,7 +394,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
 				     enum aarch64_insn_branch_type type);
 u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
 				     enum aarch64_insn_condition cond);
-u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_op op);
+u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op);
 u32 aarch64_insn_gen_nop(void);
 u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
 				enum aarch64_insn_branch_type type);
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 7c7eeeaab9fa..0c9b5fc4ba0a 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -64,12 +64,14 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
-extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
+extern void __kvm_timer_set_cntvoff(u64 cntvoff);
 
 extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
 
 extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
 
+extern void __kvm_enable_ssbs(void);
+
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
 extern u64 __vgic_v3_read_vmcr(void);
 extern void __vgic_v3_write_vmcr(u32 vmcr);
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index a30b4eec7cb4..6ea53e6e8b26 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -507,10 +507,12 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 
 static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
 {
-	if (vcpu_mode_is_32bit(vcpu))
+	if (vcpu_mode_is_32bit(vcpu)) {
 		kvm_skip_instr32(vcpu, is_wide_instr);
-	else
+	} else {
 		*vcpu_pc(vcpu) += 4;
+		*vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK;
+	}
 
 	/* advance the singlestep state machine */
 	*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 32c8a675e5a4..abbdf9703e20 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -46,6 +46,9 @@
 #define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 #define KVM_REQ_RELOAD_GICv4	KVM_ARCH_REQ(4)
 
+#define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+				     KVM_DIRTY_LOG_INITIALLY_SET)
+
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
 extern unsigned int kvm_sve_max_vl;
@@ -112,12 +115,8 @@ struct kvm_vcpu_fault_info {
 	u64 disr_el1;		/* Deferred [SError] Status Register */
 };
 
-/*
- * 0 is reserved as an invalid value.
- * Order should be kept in sync with the save/restore code.
- */
 enum vcpu_sysreg {
-	__INVALID_SYSREG__,
+	__INVALID_SYSREG__,   /* 0 is reserved as an invalid value */
 	MPIDR_EL1,	/* MultiProcessor Affinity Register */
 	CSSELR_EL1,	/* Cache Size Selection Register */
 	SCTLR_EL1,	/* System Control Register */
@@ -415,6 +414,8 @@ struct kvm_vm_stat {
 struct kvm_vcpu_stat {
 	u64 halt_successful_poll;
 	u64 halt_attempted_poll;
+	u64 halt_poll_success_ns;
+	u64 halt_poll_fail_ns;
 	u64 halt_poll_invalid;
 	u64 halt_wakeup;
 	u64 hvc_exit_stat;
@@ -530,39 +531,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
 	cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr();
 }
 
-void __kvm_enable_ssbs(void);
-
-static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
-				       unsigned long hyp_stack_ptr,
-				       unsigned long vector_ptr)
-{
-	/*
-	 * Calculate the raw per-cpu offset without a translation from the
-	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
-	 * so that we can use adr_l to access per-cpu variables in EL2.
-	 */
-	u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) -
-			 (u64)kvm_ksym_ref(kvm_host_data));
-
-	/*
-	 * Call initialization code, and switch to the full blown HYP code.
-	 * If the cpucaps haven't been finalized yet, something has gone very
-	 * wrong, and hyp will crash and burn when it uses any
-	 * cpus_have_const_cap() wrapper.
-	 */
-	BUG_ON(!system_capabilities_finalized());
-	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
-
-	/*
-	 * Disabling SSBD on a non-VHE system requires us to enable SSBS
-	 * at EL2.
-	 */
-	if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) &&
-	    arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
-		kvm_call_hyp(__kvm_enable_ssbs);
-	}
-}
-
 static inline bool kvm_arch_requires_vhe(void)
 {
 	/*
@@ -573,10 +541,6 @@ static inline bool kvm_arch_requires_vhe(void)
 	if (system_supports_sve())
 		return true;
 
-	/* Some implementations have defects that confine them to VHE */
-	if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE))
-		return true;
-
 	return false;
 }
 
@@ -598,8 +562,6 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
 
-static inline void __cpu_init_stage2(void) {}
-
 /* Guest/host FPSIMD coordination helpers */
 int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
@@ -670,7 +632,7 @@ static inline int kvm_arm_have_ssbd(void)
 void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
 void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
 
-void kvm_set_ipa_limit(void);
+int kvm_set_ipa_limit(void);
 
 #define __KVM_HAVE_ARCH_VM_ALLOC
 struct kvm *kvm_arch_alloc_vm(void);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index fe57f60f06a8..ce3080834bfa 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -10,10 +10,9 @@
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
 #include <asm/alternative.h>
-#include <asm/kvm_mmu.h>
 #include <asm/sysreg.h>
 
-#define __hyp_text __section(.hyp.text) notrace
+#define __hyp_text __section(.hyp.text) notrace __noscs
 
 #define read_sysreg_elx(r,nvh,vh)					\
 	({								\
@@ -56,12 +55,12 @@
 
 int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
 
-void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
-void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
-void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
-void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
-void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
 int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
 
 void __timer_enable_traps(struct kvm_vcpu *vcpu);
@@ -88,22 +87,5 @@ void deactivate_traps_vhe_put(void);
 u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
 void __noreturn __hyp_do_panic(unsigned long, ...);
 
-/*
- * Must be called from hyp code running at EL2 with an updated VTTBR
- * and interrupts disabled.
- */
-static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
-{
-	write_sysreg(kvm->arch.vtcr, vtcr_el2);
-	write_sysreg(kvm_get_vttbr(kvm), vttbr_el2);
-
-	/*
-	 * ARM errata 1165522 and 1530923 require the actual execution of the
-	 * above before we can switch to the EL1/EL0 translation regime used by
-	 * the guest.
-	 */
-	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE));
-}
-
 #endif /* __ARM64_KVM_HYP_H__ */
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 30b0e8d6b895..324c8483d2b9 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -363,8 +363,6 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
 	}
 }
 
-#define kvm_virt_to_phys(x)		__pa_symbol(x)
-
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
@@ -416,7 +414,7 @@ static inline unsigned int kvm_get_vmid_bits(void)
 {
 	int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
 
-	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
+	return get_vmid_bits(reg);
 }
 
 /*
@@ -473,7 +471,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
 extern void *__kvm_bp_vect_base;
 extern int __kvm_harden_el2_vector_slot;
 
-/*  This is only called on a VHE system */
+/*  This is called on both VHE and !VHE systems */
 static inline void *kvm_get_hyp_vector(void)
 {
 	struct bp_hardening_data *data = arm64_get_bp_hardening_data();
@@ -604,5 +602,22 @@ static __always_inline u64 kvm_get_vttbr(struct kvm *kvm)
 	return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
 }
 
+/*
+ * Must be called from hyp code running at EL2 with an updated VTTBR
+ * and interrupts disabled.
+ */
+static __always_inline void __load_guest_stage2(struct kvm *kvm)
+{
+	write_sysreg(kvm->arch.vtcr, vtcr_el2);
+	write_sysreg(kvm_get_vttbr(kvm), vttbr_el2);
+
+	/*
+	 * ARM errata 1165522 and 1530923 require the actual execution of the
+	 * above before we can switch to the EL1/EL0 translation regime used by
+	 * the guest.
+	 */
+	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index ebee3113a62f..81fefd2a1d02 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -4,6 +4,52 @@
 #define __ALIGN		.align 2
 #define __ALIGN_STR	".align 2"
 
+#if defined(CONFIG_ARM64_BTI_KERNEL) && defined(__aarch64__)
+
+/*
+ * Since current versions of gas reject the BTI instruction unless we
+ * set the architecture version to v8.5 we use the hint instruction
+ * instead.
+ */
+#define BTI_C hint 34 ;
+#define BTI_J hint 36 ;
+
+/*
+ * When using in-kernel BTI we need to ensure that PCS-conformant assembly
+ * functions have suitable annotations.  Override SYM_FUNC_START to insert
+ * a BTI landing pad at the start of everything.
+ */
+#define SYM_FUNC_START(name)				\
+	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)	\
+	BTI_C
+
+#define SYM_FUNC_START_NOALIGN(name)			\
+	SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE)	\
+	BTI_C
+
+#define SYM_FUNC_START_LOCAL(name)			\
+	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)	\
+	BTI_C
+
+#define SYM_FUNC_START_LOCAL_NOALIGN(name)		\
+	SYM_START(name, SYM_L_LOCAL, SYM_A_NONE)	\
+	BTI_C
+
+#define SYM_FUNC_START_WEAK(name)			\
+	SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN)	\
+	BTI_C
+
+#define SYM_FUNC_START_WEAK_NOALIGN(name)		\
+	SYM_START(name, SYM_L_WEAK, SYM_A_NONE)		\
+	BTI_C
+
+#define SYM_INNER_LABEL(name, linkage)			\
+	.type name SYM_T_NONE ASM_NL			\
+	SYM_ENTRY(name, linkage, SYM_A_NONE)		\
+	BTI_J
+
+#endif
+
 /*
  * Annotate a function as position independent, i.e., safe to be called before
  * the kernel virtual mapping is activated.
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
new file mode 100644
index 000000000000..081ec8de9ea6
--- /dev/null
+++ b/arch/arm64/include/asm/mman.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <uapi/asm/mman.h>
+
+static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
+	unsigned long pkey __always_unused)
+{
+	if (system_supports_bti() && (prot & PROT_BTI))
+		return VM_ARM64_BTI;
+
+	return 0;
+}
+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
+
+static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
+{
+	return (vm_flags & VM_ARM64_BTI) ? __pgprot(PTE_GP) : __pgprot(0);
+}
+#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
+
+static inline bool arch_validate_prot(unsigned long prot,
+	unsigned long addr __always_unused)
+{
+	unsigned long supported = PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM;
+
+	if (system_supports_bti())
+		supported |= PROT_BTI;
+
+	return (prot & ~supported) == 0;
+}
+#define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr)
+
+#endif /* ! __ASM_MMAN_H__ */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 6bf5e650da78..9c91a8f93a0e 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -151,6 +151,7 @@
 #define PTE_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
 #define PTE_AF			(_AT(pteval_t, 1) << 10)	/* Access Flag */
 #define PTE_NG			(_AT(pteval_t, 1) << 11)	/* nG */
+#define PTE_GP			(_AT(pteval_t, 1) << 50)	/* BTI guarded */
 #define PTE_DBM			(_AT(pteval_t, 1) << 51)	/* Dirty Bit Management */
 #define PTE_CONT		(_AT(pteval_t, 1) << 52)	/* Contiguous range */
 #define PTE_PXN			(_AT(pteval_t, 1) << 53)	/* Privileged XN */
@@ -190,7 +191,6 @@
  * Memory Attribute override for Stage-2 (MemAttr[3:0])
  */
 #define PTE_S2_MEMATTR(t)	(_AT(pteval_t, (t)) << 2)
-#define PTE_S2_MEMATTR_MASK	(_AT(pteval_t, 0xf) << 2)
 
 /*
  * EL2/HYP PTE/PMD definitions
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 1305e28225fc..2e7e0f452301 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -21,6 +21,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/cpufeature.h>
 #include <asm/pgtable-types.h>
 
 extern bool arm64_use_ng_mappings;
@@ -31,6 +32,16 @@ extern bool arm64_use_ng_mappings;
 #define PTE_MAYBE_NG		(arm64_use_ng_mappings ? PTE_NG : 0)
 #define PMD_MAYBE_NG		(arm64_use_ng_mappings ? PMD_SECT_NG : 0)
 
+/*
+ * If we have userspace only BTI we don't want to mark kernel pages
+ * guarded even if the system does support BTI.
+ */
+#ifdef CONFIG_ARM64_BTI_KERNEL
+#define PTE_MAYBE_GP		(system_supports_bti() ? PTE_GP : 0)
+#else
+#define PTE_MAYBE_GP		0
+#endif
+
 #define PROT_DEFAULT		(_PROT_DEFAULT | PTE_MAYBE_NG)
 #define PROT_SECT_DEFAULT	(_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 538c85e62f86..dae0466d19d6 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -407,6 +407,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
 #define __pgprot_modify(prot,mask,bits) \
 	__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
 
+#define pgprot_nx(prot) \
+	__pgprot_modify(prot, 0, PTE_PXN)
+
 /*
  * Mark the prot value as uncacheable and unbufferable.
  */
@@ -457,6 +460,7 @@ extern pgd_t init_pg_dir[PTRS_PER_PGD];
 extern pgd_t init_pg_end[];
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
+extern pgd_t idmap_pg_end[];
 extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
 
 extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
@@ -508,7 +512,7 @@ static inline void pte_unmap(pte_t *pte) { }
 #define pte_set_fixmap_offset(pmd, addr)	pte_set_fixmap(pte_offset_phys(pmd, addr))
 #define pte_clear_fixmap()		clear_fixmap(FIX_PTE)
 
-#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd)))
+#define pmd_page(pmd)			phys_to_page(__pmd_to_phys(pmd))
 
 /* use ONLY for statically allocated translation tables */
 #define pte_offset_kimg(dir,addr)	((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
@@ -566,7 +570,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 #define pmd_set_fixmap_offset(pud, addr)	pmd_set_fixmap(pmd_offset_phys(pud, addr))
 #define pmd_clear_fixmap()		clear_fixmap(FIX_PMD)
 
-#define pud_page(pud)		pfn_to_page(__phys_to_pfn(__pud_to_phys(pud)))
+#define pud_page(pud)			phys_to_page(__pud_to_phys(pud))
 
 /* use ONLY for statically allocated translation tables */
 #define pmd_offset_kimg(dir,addr)	((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
@@ -624,7 +628,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 #define pud_set_fixmap_offset(pgd, addr)	pud_set_fixmap(pud_offset_phys(pgd, addr))
 #define pud_clear_fixmap()		clear_fixmap(FIX_PUD)
 
-#define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
+#define pgd_page(pgd)			phys_to_page(__pgd_to_phys(pgd))
 
 /* use ONLY for statically allocated translation tables */
 #define pud_offset_kimg(dir,addr)	((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
@@ -660,7 +664,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
-			      PTE_PROT_NONE | PTE_VALID | PTE_WRITE;
+			      PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP;
 	/* preserve the hardware dirty information */
 	if (pte_hw_dirty(pte))
 		pte = pte_mkdirty(pte);
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index bf57308fcd63..953b6a1ce549 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -35,6 +35,7 @@
 #define GIC_PRIO_PSR_I_SET		(1 << 4)
 
 /* Additional SPSR bits not exposed in the UABI */
+#define PSR_MODE_THREAD_BIT	(1 << 0)
 #define PSR_IL_BIT		(1 << 20)
 
 /* AArch32-specific ptrace requests */
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..eaa2cd92e4c1
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+	scs_sp	.req	x18
+
+	.macro scs_load tsk, tmp
+	ldr	scs_sp, [\tsk, #TSK_TI_SCS_SP]
+	.endm
+
+	.macro scs_save tsk, tmp
+	str	scs_sp, [\tsk, #TSK_TI_SCS_SP]
+	.endm
+#else
+	.macro scs_load tsk, tmp
+	.endm
+
+	.macro scs_save tsk, tmp
+	.endm
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 40d5ba029615..ea268d88b6f7 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -23,14 +23,6 @@
 #define CPU_STUCK_REASON_52_BIT_VA	(UL(1) << CPU_STUCK_REASON_SHIFT)
 #define CPU_STUCK_REASON_NO_GRAN	(UL(2) << CPU_STUCK_REASON_SHIFT)
 
-/* Possible options for __cpu_setup */
-/* Option to setup primary cpu */
-#define ARM64_CPU_BOOT_PRIMARY		(1)
-/* Option to setup secondary cpus */
-#define ARM64_CPU_BOOT_SECONDARY	(2)
-/* Option to setup cpus for different cpu run time services */
-#define ARM64_CPU_RUNTIME		(3)
-
 #ifndef __ASSEMBLY__
 
 #include <asm/percpu.h>
@@ -96,9 +88,6 @@ asmlinkage void secondary_start_kernel(void);
 struct secondary_data {
 	void *stack;
 	struct task_struct *task;
-#ifdef CONFIG_ARM64_PTR_AUTH
-	struct ptrauth_keys_kernel ptrauth_key;
-#endif
 	long status;
 };
 
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 4d9b1f48dc39..5017b531a415 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -68,12 +68,10 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
 
 DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
 
-static inline bool on_irq_stack(unsigned long sp,
+static inline bool on_stack(unsigned long sp, unsigned long low,
+				unsigned long high, enum stack_type type,
 				struct stack_info *info)
 {
-	unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
-	unsigned long high = low + IRQ_STACK_SIZE;
-
 	if (!low)
 		return false;
 
@@ -83,12 +81,20 @@ static inline bool on_irq_stack(unsigned long sp,
 	if (info) {
 		info->low = low;
 		info->high = high;
-		info->type = STACK_TYPE_IRQ;
+		info->type = type;
 	}
-
 	return true;
 }
 
+static inline bool on_irq_stack(unsigned long sp,
+				struct stack_info *info)
+{
+	unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
+	unsigned long high = low + IRQ_STACK_SIZE;
+
+	return on_stack(sp, low, high, STACK_TYPE_IRQ, info);
+}
+
 static inline bool on_task_stack(const struct task_struct *tsk,
 				 unsigned long sp,
 				 struct stack_info *info)
@@ -96,16 +102,7 @@ static inline bool on_task_stack(const struct task_struct *tsk,
 	unsigned long low = (unsigned long)task_stack_page(tsk);
 	unsigned long high = low + THREAD_SIZE;
 
-	if (sp < low || sp >= high)
-		return false;
-
-	if (info) {
-		info->low = low;
-		info->high = high;
-		info->type = STACK_TYPE_TASK;
-	}
-
-	return true;
+	return on_stack(sp, low, high, STACK_TYPE_TASK, info);
 }
 
 #ifdef CONFIG_VMAP_STACK
@@ -117,16 +114,7 @@ static inline bool on_overflow_stack(unsigned long sp,
 	unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
 	unsigned long high = low + OVERFLOW_STACK_SIZE;
 
-	if (sp < low || sp >= high)
-		return false;
-
-	if (info) {
-		info->low = low;
-		info->high = high;
-		info->type = STACK_TYPE_OVERFLOW;
-	}
-
-	return true;
+	return on_stack(sp, low, high, STACK_TYPE_OVERFLOW, info);
 }
 #else
 static inline bool on_overflow_stack(unsigned long sp,
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
 #ifndef __ASM_SUSPEND_H
 #define __ASM_SUSPEND_H
 
-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
 #define NR_CALLEE_SAVED_REGS 12
 
 /*
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index c4ac0ac25a00..463175f80341 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -105,6 +105,10 @@
 #define SYS_DC_CSW			sys_insn(1, 0, 7, 10, 2)
 #define SYS_DC_CISW			sys_insn(1, 0, 7, 14, 2)
 
+/*
+ * System registers, organised loosely by encoding but grouped together
+ * where the architected name contains an index. e.g. ID_MMFR<n>_EL1.
+ */
 #define SYS_OSDTRRX_EL1			sys_reg(2, 0, 0, 0, 2)
 #define SYS_MDCCINT_EL1			sys_reg(2, 0, 0, 2, 0)
 #define SYS_MDSCR_EL1			sys_reg(2, 0, 0, 2, 2)
@@ -134,12 +138,16 @@
 
 #define SYS_ID_PFR0_EL1			sys_reg(3, 0, 0, 1, 0)
 #define SYS_ID_PFR1_EL1			sys_reg(3, 0, 0, 1, 1)
+#define SYS_ID_PFR2_EL1			sys_reg(3, 0, 0, 3, 4)
 #define SYS_ID_DFR0_EL1			sys_reg(3, 0, 0, 1, 2)
+#define SYS_ID_DFR1_EL1			sys_reg(3, 0, 0, 3, 5)
 #define SYS_ID_AFR0_EL1			sys_reg(3, 0, 0, 1, 3)
 #define SYS_ID_MMFR0_EL1		sys_reg(3, 0, 0, 1, 4)
 #define SYS_ID_MMFR1_EL1		sys_reg(3, 0, 0, 1, 5)
 #define SYS_ID_MMFR2_EL1		sys_reg(3, 0, 0, 1, 6)
 #define SYS_ID_MMFR3_EL1		sys_reg(3, 0, 0, 1, 7)
+#define SYS_ID_MMFR4_EL1		sys_reg(3, 0, 0, 2, 6)
+#define SYS_ID_MMFR5_EL1		sys_reg(3, 0, 0, 3, 6)
 
 #define SYS_ID_ISAR0_EL1		sys_reg(3, 0, 0, 2, 0)
 #define SYS_ID_ISAR1_EL1		sys_reg(3, 0, 0, 2, 1)
@@ -147,7 +155,6 @@
 #define SYS_ID_ISAR3_EL1		sys_reg(3, 0, 0, 2, 3)
 #define SYS_ID_ISAR4_EL1		sys_reg(3, 0, 0, 2, 4)
 #define SYS_ID_ISAR5_EL1		sys_reg(3, 0, 0, 2, 5)
-#define SYS_ID_MMFR4_EL1		sys_reg(3, 0, 0, 2, 6)
 #define SYS_ID_ISAR6_EL1		sys_reg(3, 0, 0, 2, 7)
 
 #define SYS_MVFR0_EL1			sys_reg(3, 0, 0, 3, 0)
@@ -552,6 +559,8 @@
 #endif
 
 /* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_BT1		(BIT(36))
+#define SCTLR_EL1_BT0		(BIT(35))
 #define SCTLR_EL1_UCI		(BIT(26))
 #define SCTLR_EL1_E0E		(BIT(24))
 #define SCTLR_EL1_SPAN		(BIT(23))
@@ -594,6 +603,7 @@
 
 /* id_aa64isar0 */
 #define ID_AA64ISAR0_RNDR_SHIFT		60
+#define ID_AA64ISAR0_TLB_SHIFT		56
 #define ID_AA64ISAR0_TS_SHIFT		52
 #define ID_AA64ISAR0_FHM_SHIFT		48
 #define ID_AA64ISAR0_DP_SHIFT		44
@@ -637,6 +647,8 @@
 #define ID_AA64PFR0_CSV2_SHIFT		56
 #define ID_AA64PFR0_DIT_SHIFT		48
 #define ID_AA64PFR0_AMU_SHIFT		44
+#define ID_AA64PFR0_MPAM_SHIFT		40
+#define ID_AA64PFR0_SEL2_SHIFT		36
 #define ID_AA64PFR0_SVE_SHIFT		32
 #define ID_AA64PFR0_RAS_SHIFT		28
 #define ID_AA64PFR0_GIC_SHIFT		24
@@ -655,15 +667,21 @@
 #define ID_AA64PFR0_ASIMD_NI		0xf
 #define ID_AA64PFR0_ASIMD_SUPPORTED	0x0
 #define ID_AA64PFR0_EL1_64BIT_ONLY	0x1
+#define ID_AA64PFR0_EL1_32BIT_64BIT	0x2
 #define ID_AA64PFR0_EL0_64BIT_ONLY	0x1
 #define ID_AA64PFR0_EL0_32BIT_64BIT	0x2
 
 /* id_aa64pfr1 */
+#define ID_AA64PFR1_MPAMFRAC_SHIFT	16
+#define ID_AA64PFR1_RASFRAC_SHIFT	12
+#define ID_AA64PFR1_MTE_SHIFT		8
 #define ID_AA64PFR1_SSBS_SHIFT		4
+#define ID_AA64PFR1_BT_SHIFT		0
 
 #define ID_AA64PFR1_SSBS_PSTATE_NI	0
 #define ID_AA64PFR1_SSBS_PSTATE_ONLY	1
 #define ID_AA64PFR1_SSBS_PSTATE_INSNS	2
+#define ID_AA64PFR1_BT_BTI		0x1
 
 /* id_aa64zfr0 */
 #define ID_AA64ZFR0_F64MM_SHIFT		56
@@ -688,6 +706,9 @@
 #define ID_AA64ZFR0_SVEVER_SVE2		0x1
 
 /* id_aa64mmfr0 */
+#define ID_AA64MMFR0_TGRAN4_2_SHIFT	40
+#define ID_AA64MMFR0_TGRAN64_2_SHIFT	36
+#define ID_AA64MMFR0_TGRAN16_2_SHIFT	32
 #define ID_AA64MMFR0_TGRAN4_SHIFT	28
 #define ID_AA64MMFR0_TGRAN64_SHIFT	24
 #define ID_AA64MMFR0_TGRAN16_SHIFT	20
@@ -752,6 +773,25 @@
 
 #define ID_DFR0_PERFMON_8_1		0x4
 
+#define ID_ISAR4_SWP_FRAC_SHIFT		28
+#define ID_ISAR4_PSR_M_SHIFT		24
+#define ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT	20
+#define ID_ISAR4_BARRIER_SHIFT		16
+#define ID_ISAR4_SMC_SHIFT		12
+#define ID_ISAR4_WRITEBACK_SHIFT	8
+#define ID_ISAR4_WITHSHIFTS_SHIFT	4
+#define ID_ISAR4_UNPRIV_SHIFT		0
+
+#define ID_DFR1_MTPMU_SHIFT		0
+
+#define ID_ISAR0_DIVIDE_SHIFT		24
+#define ID_ISAR0_DEBUG_SHIFT		20
+#define ID_ISAR0_COPROC_SHIFT		16
+#define ID_ISAR0_CMPBRANCH_SHIFT	12
+#define ID_ISAR0_BITFIELD_SHIFT		8
+#define ID_ISAR0_BITCOUNT_SHIFT		4
+#define ID_ISAR0_SWAP_SHIFT		0
+
 #define ID_ISAR5_RDM_SHIFT		24
 #define ID_ISAR5_CRC32_SHIFT		16
 #define ID_ISAR5_SHA2_SHIFT		12
@@ -767,6 +807,22 @@
 #define ID_ISAR6_DP_SHIFT		4
 #define ID_ISAR6_JSCVT_SHIFT		0
 
+#define ID_MMFR4_EVT_SHIFT		28
+#define ID_MMFR4_CCIDX_SHIFT		24
+#define ID_MMFR4_LSM_SHIFT		20
+#define ID_MMFR4_HPDS_SHIFT		16
+#define ID_MMFR4_CNP_SHIFT		12
+#define ID_MMFR4_XNX_SHIFT		8
+#define ID_MMFR4_SPECSEI_SHIFT		0
+
+#define ID_MMFR5_ETS_SHIFT		0
+
+#define ID_PFR0_DIT_SHIFT		24
+#define ID_PFR0_CSV2_SHIFT		16
+
+#define ID_PFR2_SSBS_SHIFT		4
+#define ID_PFR2_CSV3_SHIFT		0
+
 #define MVFR0_FPROUND_SHIFT		28
 #define MVFR0_FPSHVEC_SHIFT		24
 #define MVFR0_FPSQRT_SHIFT		20
@@ -785,17 +841,14 @@
 #define MVFR1_FPDNAN_SHIFT		4
 #define MVFR1_FPFTZ_SHIFT		0
 
-
-#define ID_AA64MMFR0_TGRAN4_SHIFT	28
-#define ID_AA64MMFR0_TGRAN64_SHIFT	24
-#define ID_AA64MMFR0_TGRAN16_SHIFT	20
-
-#define ID_AA64MMFR0_TGRAN4_NI		0xf
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED	0x0
-#define ID_AA64MMFR0_TGRAN64_NI		0xf
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0
-#define ID_AA64MMFR0_TGRAN16_NI		0x0
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1
+#define ID_PFR1_GIC_SHIFT		28
+#define ID_PFR1_VIRT_FRAC_SHIFT		24
+#define ID_PFR1_SEC_FRAC_SHIFT		20
+#define ID_PFR1_GENTIMER_SHIFT		16
+#define ID_PFR1_VIRTUALIZATION_SHIFT	12
+#define ID_PFR1_MPROGMOD_SHIFT		8
+#define ID_PFR1_SECURITY_SHIFT		4
+#define ID_PFR1_PROGMOD_SHIFT		0
 
 #if defined(CONFIG_ARM64_4K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT	ID_AA64MMFR0_TGRAN4_SHIFT
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 512174a8e789..6ea8b6a26ae9 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,10 @@ struct thread_info {
 #endif
 		} preempt;
 	};
+#ifdef CONFIG_SHADOW_CALL_STACK
+	void			*scs_base;
+	void			*scs_sp;
+#endif
 };
 
 #define thread_saved_pc(tsk)	\
@@ -100,11 +104,20 @@ void arch_release_task_struct(struct task_struct *tsk);
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
 				 _TIF_SYSCALL_EMU)
 
+#ifdef CONFIG_SHADOW_CALL_STACK
+#define INIT_SCS							\
+	.scs_base	= init_shadow_call_stack,			\
+	.scs_sp		= init_shadow_call_stack,
+#else
+#define INIT_SCS
+#endif
+
 #define INIT_THREAD_INFO(tsk)						\
 {									\
 	.flags		= _TIF_FOREIGN_FPSTATE,				\
 	.preempt_count	= INIT_PREEMPT_COUNT,				\
 	.addr_limit	= KERNEL_DS,					\
+	INIT_SCS							\
 }
 
 #endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 803039d504de..3b859596840d 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		439
+#define __NR_compat_syscalls		440
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index c1c61635f89c..6d95d0c8bf2f 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -883,6 +883,8 @@ __SYSCALL(__NR_clone3, sys_clone3)
 __SYSCALL(__NR_openat2, sys_openat2)
 #define __NR_pidfd_getfd 438
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
+#define __NR_faccessat2 439
+__SYSCALL(__NR_faccessat2, sys_faccessat2)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 61fd26752adc..5051b388c654 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -85,7 +85,7 @@ static inline bool is_kernel_in_hyp_mode(void)
 
 static __always_inline bool has_vhe(void)
 {
-	if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN))
+	if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN))
 		return true;
 
 	return false;
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h
index 0a12115d9638..0cc6636e3f15 100644
--- a/arch/arm64/include/asm/vmap_stack.h
+++ b/arch/arm64/include/asm/vmap_stack.h
@@ -19,10 +19,8 @@ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
 {
 	BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
 
-	return __vmalloc_node_range(stack_size, THREAD_ALIGN,
-				    VMALLOC_START, VMALLOC_END,
-				    THREADINFO_GFP, PAGE_KERNEL, 0, node,
-				    __builtin_return_address(0));
+	return __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+			__builtin_return_address(0));
 }
 
 #endif /* __ASM_VMAP_STACK_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 7752d93bb50f..2d6ba1c2592e 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -73,5 +73,6 @@
 #define HWCAP2_BF16		(1 << 14)
 #define HWCAP2_DGH		(1 << 15)
 #define HWCAP2_RNG		(1 << 16)
+#define HWCAP2_BTI		(1 << 17)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h
new file mode 100644
index 000000000000..6fdd71eb644f
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/mman.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ASM_MMAN_H
+#define _UAPI__ASM_MMAN_H
+
+#include <asm-generic/mman.h>
+
+#define PROT_BTI	0x10		/* BTI guarded page */
+
+#endif /* ! _UAPI__ASM_MMAN_H */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index d1bb5b69f1ce..42cbe34d95ce 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -46,6 +46,7 @@
 #define PSR_I_BIT	0x00000080
 #define PSR_A_BIT	0x00000100
 #define PSR_D_BIT	0x00000200
+#define PSR_BTYPE_MASK	0x00000c00
 #define PSR_SSBS_BIT	0x00001000
 #define PSR_PAN_BIT	0x00400000
 #define PSR_UAO_BIT	0x00800000
@@ -55,6 +56,8 @@
 #define PSR_Z_BIT	0x40000000
 #define PSR_N_BIT	0x80000000
 
+#define PSR_BTYPE_SHIFT		10
+
 /*
  * Groups of PSR bits
  */
@@ -63,6 +66,12 @@
 #define PSR_x		0x0000ff00	/* Extension		*/
 #define PSR_c		0x000000ff	/* Control		*/
 
+/* Convenience names for the values of PSTATE.BTYPE */
+#define PSR_BTYPE_NONE		(0b00 << PSR_BTYPE_SHIFT)
+#define PSR_BTYPE_JC		(0b01 << PSR_BTYPE_SHIFT)
+#define PSR_BTYPE_C		(0b10 << PSR_BTYPE_SHIFT)
+#define PSR_BTYPE_J		(0b11 << PSR_BTYPE_SHIFT)
+
 /* syscall emulation path in ptrace */
 #define PTRACE_SYSEMU		  31
 #define PTRACE_SYSEMU_SINGLESTEP  32
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 4e5b8ee31442..151f28521f1e 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE)		+= crash_core.o
 obj-$(CONFIG_ARM_SDE_INTERFACE)		+= sdei.o
 obj-$(CONFIG_ARM64_SSBD)		+= ssbd.o
 obj-$(CONFIG_ARM64_PTR_AUTH)		+= pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK)		+= scs.o
 
 obj-y					+= vdso/ probes/
 obj-$(CONFIG_COMPAT_VDSO)		+= vdso32/
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index a100483b47c4..46ec402e97ed 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
+#include <linux/irq_work.h>
 #include <linux/memblock.h>
 #include <linux/of_fdt.h>
 #include <linux/smp.h>
@@ -269,6 +270,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
 int apei_claim_sea(struct pt_regs *regs)
 {
 	int err = -ENOENT;
+	bool return_to_irqs_enabled;
 	unsigned long current_flags;
 
 	if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES))
@@ -276,6 +278,12 @@ int apei_claim_sea(struct pt_regs *regs)
 
 	current_flags = local_daif_save_flags();
 
+	/* current_flags isn't useful here as daif doesn't tell us about pNMI */
+	return_to_irqs_enabled = !irqs_disabled_flags(arch_local_save_flags());
+
+	if (regs)
+		return_to_irqs_enabled = interrupts_enabled(regs);
+
 	/*
 	 * SEA can interrupt SError, mask it and describe this as an NMI so
 	 * that APEI defers the handling.
@@ -284,6 +292,23 @@ int apei_claim_sea(struct pt_regs *regs)
 	nmi_enter();
 	err = ghes_notify_sea();
 	nmi_exit();
+
+	/*
+	 * APEI NMI-like notifications are deferred to irq_work. Unless
+	 * we interrupted irqs-masked code, we can do that now.
+	 */
+	if (!err) {
+		if (return_to_irqs_enabled) {
+			local_daif_restore(DAIF_PROCCTX_NOIRQ);
+			__irq_enter();
+			irq_work_run();
+			__irq_exit();
+		} else {
+			pr_warn_ratelimited("APEI work queued but not completed");
+			err = -EINPROGRESS;
+		}
+	}
+
 	local_daif_restore(current_flags);
 
 	return err;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 9981a0a5a87f..0577e2142284 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -34,6 +34,10 @@ int main(void)
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
   DEFINE(TSK_TI_TTBR0,		offsetof(struct task_struct, thread_info.ttbr0));
 #endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+  DEFINE(TSK_TI_SCS_BASE,	offsetof(struct task_struct, thread_info.scs_base));
+  DEFINE(TSK_TI_SCS_SP,		offsetof(struct task_struct, thread_info.scs_sp));
+#endif
   DEFINE(TSK_STACK,		offsetof(struct task_struct, stack));
 #ifdef CONFIG_STACKPROTECTOR
   DEFINE(TSK_STACK_CANARY,	offsetof(struct task_struct, stack_canary));
@@ -92,11 +96,8 @@ int main(void)
   BLANK();
   DEFINE(CPU_BOOT_STACK,	offsetof(struct secondary_data, stack));
   DEFINE(CPU_BOOT_TASK,		offsetof(struct secondary_data, task));
-#ifdef CONFIG_ARM64_PTR_AUTH
-  DEFINE(CPU_BOOT_PTRAUTH_KEY,	offsetof(struct secondary_data, ptrauth_key));
-#endif
   BLANK();
-#ifdef CONFIG_KVM_ARM_HOST
+#ifdef CONFIG_KVM
   DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt));
   DEFINE(VCPU_FAULT_DISR,	offsetof(struct kvm_vcpu, arch.fault.disr_el1));
   DEFINE(VCPU_WORKAROUND_FLAGS,	offsetof(struct kvm_vcpu, arch.workaround_flags));
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 38087b4c0432..4a18055b2ff9 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -29,7 +29,7 @@
  * branch to what would be the reset vector. It must be executed with the
  * flat identity mapping.
  */
-ENTRY(__cpu_soft_restart)
+SYM_CODE_START(__cpu_soft_restart)
 	/* Clear sctlr_el1 flags. */
 	mrs	x12, sctlr_el1
 	mov_q	x13, SCTLR_ELx_FLAGS
@@ -47,6 +47,6 @@ ENTRY(__cpu_soft_restart)
 	mov	x1, x3				// arg1
 	mov	x2, x4				// arg2
 	br	x8
-ENDPROC(__cpu_soft_restart)
+SYM_CODE_END(__cpu_soft_restart)
 
 .popsection
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index df56d2295d16..ad06d6802d2e 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -234,7 +234,7 @@ static int detect_harden_bp_fw(void)
 		smccc_end = NULL;
 		break;
 
-#if IS_ENABLED(CONFIG_KVM_ARM_HOST)
+#if IS_ENABLED(CONFIG_KVM)
 	case SMCCC_CONDUIT_SMC:
 		cb = call_smc_arch_workaround_1;
 		smccc_start = __smccc_workaround_1_smc;
@@ -635,7 +635,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
 	return is_midr_in_range(midr, &range) && has_dic;
 }
 
-#if defined(CONFIG_HARDEN_EL2_VECTORS) || defined(CONFIG_ARM64_ERRATUM_1319367)
+#if defined(CONFIG_HARDEN_EL2_VECTORS)
 
 static const struct midr_range ca57_a72[] = {
 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
@@ -757,12 +757,16 @@ static const struct arm64_cpu_capabilities erratum_843419_list[] = {
 };
 #endif
 
-#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE
-static const struct midr_range erratum_speculative_at_vhe_list[] = {
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT
+static const struct midr_range erratum_speculative_at_list[] = {
 #ifdef CONFIG_ARM64_ERRATUM_1165522
 	/* Cortex A76 r0p0 to r2p0 */
 	MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
 #endif
+#ifdef CONFIG_ARM64_ERRATUM_1319367
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+#endif
 #ifdef CONFIG_ARM64_ERRATUM_1530923
 	/* Cortex A55 r0p0 to r2p0 */
 	MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 2, 0),
@@ -774,7 +778,7 @@ static const struct midr_range erratum_speculative_at_vhe_list[] = {
 const struct arm64_cpu_capabilities arm64_errata[] = {
 #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
 	{
-		.desc = "ARM errata 826319, 827319, 824069, 819472",
+		.desc = "ARM errata 826319, 827319, 824069, or 819472",
 		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
 		ERRATA_MIDR_RANGE_LIST(workaround_clean_cache),
 		.cpu_enable = cpu_enable_cache_maint_trap,
@@ -856,7 +860,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
 	{
-		.desc = "Qualcomm erratum 1009, ARM erratum 1286807",
+		.desc = "Qualcomm erratum 1009, or ARM erratum 1286807",
 		.capability = ARM64_WORKAROUND_REPEAT_TLBI,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.matches = cpucap_multi_entry_cap_matches,
@@ -897,11 +901,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		ERRATA_MIDR_RANGE_LIST(erratum_1418040_list),
 	},
 #endif
-#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT
 	{
-		.desc = "ARM errata 1165522, 1530923",
-		.capability = ARM64_WORKAROUND_SPECULATIVE_AT_VHE,
-		ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_vhe_list),
+		.desc = "ARM errata 1165522, 1319367, or 1530923",
+		.capability = ARM64_WORKAROUND_SPECULATIVE_AT,
+		ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_list),
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_1463225
@@ -935,13 +939,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.cpu_enable = cpu_enable_trap_ctr_access,
 	},
 #endif
-#ifdef CONFIG_ARM64_ERRATUM_1319367
-	{
-		.desc = "ARM erratum 1319367",
-		.capability = ARM64_WORKAROUND_SPECULATIVE_AT_NVHE,
-		ERRATA_MIDR_RANGE_LIST(ca57_a72),
-	},
-#endif
 	{
 	}
 };
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9fac745aa7bb..4ae41670c2e6 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -3,6 +3,61 @@
  * Contains CPU feature definitions
  *
  * Copyright (C) 2015 ARM Ltd.
+ *
+ * A note for the weary kernel hacker: the code here is confusing and hard to
+ * follow! That's partly because it's solving a nasty problem, but also because
+ * there's a little bit of over-abstraction that tends to obscure what's going
+ * on behind a maze of helper functions and macros.
+ *
+ * The basic problem is that hardware folks have started gluing together CPUs
+ * with distinct architectural features; in some cases even creating SoCs where
+ * user-visible instructions are available only on a subset of the available
+ * cores. We try to address this by snapshotting the feature registers of the
+ * boot CPU and comparing these with the feature registers of each secondary
+ * CPU when bringing them up. If there is a mismatch, then we update the
+ * snapshot state to indicate the lowest-common denominator of the feature,
+ * known as the "safe" value. This snapshot state can be queried to view the
+ * "sanitised" value of a feature register.
+ *
+ * The sanitised register values are used to decide which capabilities we
+ * have in the system. These may be in the form of traditional "hwcaps"
+ * advertised to userspace or internal "cpucaps" which are used to configure
+ * things like alternative patching and static keys. While a feature mismatch
+ * may result in a TAINT_CPU_OUT_OF_SPEC kernel taint, a capability mismatch
+ * may prevent a CPU from being onlined at all.
+ *
+ * Some implementation details worth remembering:
+ *
+ * - Mismatched features are *always* sanitised to a "safe" value, which
+ *   usually indicates that the feature is not supported.
+ *
+ * - A mismatched feature marked with FTR_STRICT will cause a "SANITY CHECK"
+ *   warning when onlining an offending CPU and the kernel will be tainted
+ *   with TAINT_CPU_OUT_OF_SPEC.
+ *
+ * - Features marked as FTR_VISIBLE have their sanitised value visible to
+ *   userspace. FTR_VISIBLE features in registers that are only visible
+ *   to EL0 by trapping *must* have a corresponding HWCAP so that late
+ *   onlining of CPUs cannot lead to features disappearing at runtime.
+ *
+ * - A "feature" is typically a 4-bit register field. A "capability" is the
+ *   high-level description derived from the sanitised field value.
+ *
+ * - Read the Arm ARM (DDI 0487F.a) section D13.1.3 ("Principles of the ID
+ *   scheme for fields in ID registers") to understand when feature fields
+ *   may be signed or unsigned (FTR_SIGNED and FTR_UNSIGNED accordingly).
+ *
+ * - KVM exposes its own view of the feature registers to guest operating
+ *   systems regardless of FTR_VISIBLE. This is typically driven from the
+ *   sanitised register values to allow virtual CPUs to be migrated between
+ *   arbitrary physical CPUs, but some features not present on the host are
+ *   also advertised and emulated. Look at sys_reg_descs[] for the gory
+ *   details.
+ *
+ * - If the arm64_ftr_bits[] for a register has a missing field, then this
+ *   field is treated as STRICT RES0, including for read_sanitised_ftr_reg().
+ *   This is stronger than FTR_HIDDEN and can be used to hide features from
+ *   KVM guests.
  */
 
 #define pr_fmt(fmt) "CPU features: " fmt
@@ -124,6 +179,7 @@ static bool __system_matches_cap(unsigned int n);
  */
 static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RNDR_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TLB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
@@ -166,22 +222,27 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_MPAM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SEL2_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 				   FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
-	/* Linux doesn't care about the EL3 */
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI),
+				    FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -209,6 +270,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
 	/*
+	 * Page size not being supported at Stage-2 is not fatal. You
+	 * just give up KVM if PAGE_SIZE isn't supported there. Go fix
+	 * your favourite nesting hypervisor.
+	 *
+	 * There is a small corner case where the hypervisor explicitly
+	 * advertises a given granule size at Stage-2 (value 2) on some
+	 * vCPUs, and uses the fallback to Stage-1 (value 0) for other
+	 * vCPUs. Although this is not forbidden by the architecture, it
+	 * indicates that the hypervisor is being silly (or buggy).
+	 *
+	 * We make no effort to cope with this and pretend that if these
+	 * fields are inconsistent across vCPUs, then it isn't worth
+	 * trying to bring KVM up.
+	 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_2_SHIFT, 4, 1),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_2_SHIFT, 4, 1),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_2_SHIFT, 4, 1),
+	/*
 	 * We already refuse to boot CPUs that don't support our configured
 	 * page size, so we can only detect mismatches for a page size other
 	 * than the one we're currently using. Unfortunately, SoCs like this
@@ -247,7 +326,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
@@ -289,7 +368,7 @@ static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 36, 28, 0),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 36, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
@@ -316,6 +395,16 @@ static const struct arm64_ftr_bits ftr_dczid[] = {
 	ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_id_isar0[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_COPROC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_CMPBRANCH_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITFIELD_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITCOUNT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_SWAP_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
 
 static const struct arm64_ftr_bits ftr_id_isar5[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0),
@@ -328,7 +417,37 @@ static const struct arm64_ftr_bits ftr_id_isar5[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EVT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CCIDX_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_LSM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_HPDS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CNP_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_XNX_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),	/* ac2 */
+	/*
+	 * SpecSEI = 1 indicates that the PE might generate an SError on an
+	 * external abort on speculative read. It is safe to assume that an
+	 * SError might be generated than it will not be. Hence it has been
+	 * classified as FTR_HIGHER_SAFE.
+	 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_MMFR4_SPECSEI_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_isar4[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SWP_FRAC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_PSR_M_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_BARRIER_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SMC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WRITEBACK_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WITHSHIFTS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_UNPRIV_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_mmfr5[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR5_ETS_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -344,6 +463,8 @@ static const struct arm64_ftr_bits ftr_id_isar6[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_pfr0[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_DIT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_CSV2_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),		/* State3 */
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),		/* State2 */
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),		/* State1 */
@@ -351,8 +472,26 @@ static const struct arm64_ftr_bits ftr_id_pfr0[] = {
 	ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_id_pfr1[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GIC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRT_FRAC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SEC_FRAC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GENTIMER_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRTUALIZATION_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_MPROGMOD_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SECURITY_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_PROGMOD_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_pfr2[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_CSV3_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
 static const struct arm64_ftr_bits ftr_id_dfr0[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+	/* [31:28] TraceFilt */
 	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf),	/* PerfMon */
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
@@ -363,6 +502,11 @@ static const struct arm64_ftr_bits ftr_id_dfr0[] = {
 	ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_id_dfr1[] = {
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR1_MTPMU_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
 static const struct arm64_ftr_bits ftr_zcr[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
 		ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0),	/* LEN */
@@ -373,7 +517,7 @@ static const struct arm64_ftr_bits ftr_zcr[] = {
  * Common ftr bits for a 32bit register with all hidden, strict
  * attributes, with 4bit feature fields and a default safe value of
  * 0. Covers the following 32bit registers:
- * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
+ * id_isar[1-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
  */
 static const struct arm64_ftr_bits ftr_generic_32bits[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
@@ -411,7 +555,7 @@ static const struct __ftr_reg_entry {
 
 	/* Op1 = 0, CRn = 0, CRm = 1 */
 	ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
-	ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_id_pfr1),
 	ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0),
 	ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
 	ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
@@ -419,11 +563,11 @@ static const struct __ftr_reg_entry {
 	ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits),
 
 	/* Op1 = 0, CRn = 0, CRm = 2 */
-	ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_id_isar0),
 	ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits),
 	ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits),
 	ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits),
-	ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_id_isar4),
 	ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5),
 	ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4),
 	ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6),
@@ -432,6 +576,9 @@ static const struct __ftr_reg_entry {
 	ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits),
 	ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits),
 	ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2),
+	ARM64_FTR_REG(SYS_ID_PFR2_EL1, ftr_id_pfr2),
+	ARM64_FTR_REG(SYS_ID_DFR1_EL1, ftr_id_dfr1),
+	ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5),
 
 	/* Op1 = 0, CRn = 0, CRm = 4 */
 	ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
@@ -468,16 +615,16 @@ static int search_cmp_ftr_reg(const void *id, const void *regp)
 }
 
 /*
- * get_arm64_ftr_reg - Lookup a feature register entry using its
- * sys_reg() encoding. With the array arm64_ftr_regs sorted in the
- * ascending order of sys_id , we use binary search to find a matching
+ * get_arm64_ftr_reg_nowarn - Looks up a feature register entry using
+ * its sys_reg() encoding. With the array arm64_ftr_regs sorted in the
+ * ascending order of sys_id, we use binary search to find a matching
  * entry.
  *
  * returns - Upon success,  matching ftr_reg entry for id.
  *         - NULL on failure. It is upto the caller to decide
  *	     the impact of a failure.
  */
-static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+static struct arm64_ftr_reg *get_arm64_ftr_reg_nowarn(u32 sys_id)
 {
 	const struct __ftr_reg_entry *ret;
 
@@ -491,6 +638,27 @@ static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
 	return NULL;
 }
 
+/*
+ * get_arm64_ftr_reg - Looks up a feature register entry using
+ * its sys_reg() encoding. This calls get_arm64_ftr_reg_nowarn().
+ *
+ * returns - Upon success,  matching ftr_reg entry for id.
+ *         - NULL on failure but with an WARN_ON().
+ */
+static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+{
+	struct arm64_ftr_reg *reg;
+
+	reg = get_arm64_ftr_reg_nowarn(sys_id);
+
+	/*
+	 * Requesting a non-existent register search is an error. Warn
+	 * and let the caller handle it.
+	 */
+	WARN_ON(!reg);
+	return reg;
+}
+
 static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg,
 			       s64 ftr_val)
 {
@@ -552,7 +720,8 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
 	const struct arm64_ftr_bits *ftrp;
 	struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
 
-	BUG_ON(!reg);
+	if (!reg)
+		return;
 
 	for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
 		u64 ftr_mask = arm64_ftr_mask(ftrp);
@@ -625,6 +794,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 
 	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
 		init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+		init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1);
 		init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
 		init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
 		init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
@@ -636,8 +806,11 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 		init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
 		init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
 		init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+		init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4);
+		init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5);
 		init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
 		init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+		init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2);
 		init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
 		init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
 		init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
@@ -682,7 +855,9 @@ static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot)
 {
 	struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
 
-	BUG_ON(!regp);
+	if (!regp)
+		return 0;
+
 	update_cpu_ftr_reg(regp, val);
 	if ((boot & regp->strict_mask) == (val & regp->strict_mask))
 		return 0;
@@ -691,6 +866,104 @@ static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot)
 	return 1;
 }
 
+static void relax_cpu_ftr_reg(u32 sys_id, int field)
+{
+	const struct arm64_ftr_bits *ftrp;
+	struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
+
+	if (!regp)
+		return;
+
+	for (ftrp = regp->ftr_bits; ftrp->width; ftrp++) {
+		if (ftrp->shift == field) {
+			regp->strict_mask &= ~arm64_ftr_mask(ftrp);
+			break;
+		}
+	}
+
+	/* Bogus field? */
+	WARN_ON(!ftrp->width);
+}
+
+static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info,
+				     struct cpuinfo_arm64 *boot)
+{
+	int taint = 0;
+	u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+	/*
+	 * If we don't have AArch32 at all then skip the checks entirely
+	 * as the register values may be UNKNOWN and we're not going to be
+	 * using them for anything.
+	 */
+	if (!id_aa64pfr0_32bit_el0(pfr0))
+		return taint;
+
+	/*
+	 * If we don't have AArch32 at EL1, then relax the strictness of
+	 * EL1-dependent register fields to avoid spurious sanity check fails.
+	 */
+	if (!id_aa64pfr0_32bit_el1(pfr0)) {
+		relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_SMC_SHIFT);
+		relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRT_FRAC_SHIFT);
+		relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SEC_FRAC_SHIFT);
+		relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRTUALIZATION_SHIFT);
+		relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SECURITY_SHIFT);
+		relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_PROGMOD_SHIFT);
+	}
+
+	taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
+				      info->reg_id_dfr0, boot->reg_id_dfr0);
+	taint |= check_update_ftr_reg(SYS_ID_DFR1_EL1, cpu,
+				      info->reg_id_dfr1, boot->reg_id_dfr1);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
+				      info->reg_id_isar0, boot->reg_id_isar0);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
+				      info->reg_id_isar1, boot->reg_id_isar1);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
+				      info->reg_id_isar2, boot->reg_id_isar2);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
+				      info->reg_id_isar3, boot->reg_id_isar3);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
+				      info->reg_id_isar4, boot->reg_id_isar4);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
+				      info->reg_id_isar5, boot->reg_id_isar5);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu,
+				      info->reg_id_isar6, boot->reg_id_isar6);
+
+	/*
+	 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+	 * ACTLR formats could differ across CPUs and therefore would have to
+	 * be trapped for virtualization anyway.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
+				      info->reg_id_mmfr0, boot->reg_id_mmfr0);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
+				      info->reg_id_mmfr1, boot->reg_id_mmfr1);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
+				      info->reg_id_mmfr2, boot->reg_id_mmfr2);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
+				      info->reg_id_mmfr3, boot->reg_id_mmfr3);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR4_EL1, cpu,
+				      info->reg_id_mmfr4, boot->reg_id_mmfr4);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR5_EL1, cpu,
+				      info->reg_id_mmfr5, boot->reg_id_mmfr5);
+	taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
+				      info->reg_id_pfr0, boot->reg_id_pfr0);
+	taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
+				      info->reg_id_pfr1, boot->reg_id_pfr1);
+	taint |= check_update_ftr_reg(SYS_ID_PFR2_EL1, cpu,
+				      info->reg_id_pfr2, boot->reg_id_pfr2);
+	taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
+				      info->reg_mvfr0, boot->reg_mvfr0);
+	taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
+				      info->reg_mvfr1, boot->reg_mvfr1);
+	taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
+				      info->reg_mvfr2, boot->reg_mvfr2);
+
+	return taint;
+}
+
 /*
  * Update system wide CPU feature registers with the values from a
  * non-boot CPU. Also performs SANITY checks to make sure that there
@@ -753,9 +1026,6 @@ void update_cpu_features(int cpu,
 	taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu,
 				      info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
 
-	/*
-	 * EL3 is not our concern.
-	 */
 	taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
 				      info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
 	taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
@@ -764,55 +1034,6 @@ void update_cpu_features(int cpu,
 	taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
 				      info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
 
-	/*
-	 * If we have AArch32, we care about 32-bit features for compat.
-	 * If the system doesn't support AArch32, don't update them.
-	 */
-	if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
-		id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
-
-		taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
-					info->reg_id_dfr0, boot->reg_id_dfr0);
-		taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
-					info->reg_id_isar0, boot->reg_id_isar0);
-		taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
-					info->reg_id_isar1, boot->reg_id_isar1);
-		taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
-					info->reg_id_isar2, boot->reg_id_isar2);
-		taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
-					info->reg_id_isar3, boot->reg_id_isar3);
-		taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
-					info->reg_id_isar4, boot->reg_id_isar4);
-		taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
-					info->reg_id_isar5, boot->reg_id_isar5);
-		taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu,
-					info->reg_id_isar6, boot->reg_id_isar6);
-
-		/*
-		 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
-		 * ACTLR formats could differ across CPUs and therefore would have to
-		 * be trapped for virtualization anyway.
-		 */
-		taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
-					info->reg_id_mmfr0, boot->reg_id_mmfr0);
-		taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
-					info->reg_id_mmfr1, boot->reg_id_mmfr1);
-		taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
-					info->reg_id_mmfr2, boot->reg_id_mmfr2);
-		taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
-					info->reg_id_mmfr3, boot->reg_id_mmfr3);
-		taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
-					info->reg_id_pfr0, boot->reg_id_pfr0);
-		taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
-					info->reg_id_pfr1, boot->reg_id_pfr1);
-		taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
-					info->reg_mvfr0, boot->reg_mvfr0);
-		taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
-					info->reg_mvfr1, boot->reg_mvfr1);
-		taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
-					info->reg_mvfr2, boot->reg_mvfr2);
-	}
-
 	if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
 		taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
 					info->reg_zcr, boot->reg_zcr);
@@ -824,6 +1045,12 @@ void update_cpu_features(int cpu,
 	}
 
 	/*
+	 * This relies on a sanitised view of the AArch64 ID registers
+	 * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last.
+	 */
+	taint |= update_32bit_cpu_features(cpu, info, boot);
+
+	/*
 	 * Mismatched CPU features are a recipe for disaster. Don't even
 	 * pretend to support them.
 	 */
@@ -837,8 +1064,8 @@ u64 read_sanitised_ftr_reg(u32 id)
 {
 	struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
 
-	/* We shouldn't get a request for an unsupported register */
-	BUG_ON(!regp);
+	if (!regp)
+		return 0;
 	return regp->sys_val;
 }
 
@@ -854,11 +1081,15 @@ static u64 __read_sysreg_by_encoding(u32 sys_id)
 	switch (sys_id) {
 	read_sysreg_case(SYS_ID_PFR0_EL1);
 	read_sysreg_case(SYS_ID_PFR1_EL1);
+	read_sysreg_case(SYS_ID_PFR2_EL1);
 	read_sysreg_case(SYS_ID_DFR0_EL1);
+	read_sysreg_case(SYS_ID_DFR1_EL1);
 	read_sysreg_case(SYS_ID_MMFR0_EL1);
 	read_sysreg_case(SYS_ID_MMFR1_EL1);
 	read_sysreg_case(SYS_ID_MMFR2_EL1);
 	read_sysreg_case(SYS_ID_MMFR3_EL1);
+	read_sysreg_case(SYS_ID_MMFR4_EL1);
+	read_sysreg_case(SYS_ID_MMFR5_EL1);
 	read_sysreg_case(SYS_ID_ISAR0_EL1);
 	read_sysreg_case(SYS_ID_ISAR1_EL1);
 	read_sysreg_case(SYS_ID_ISAR2_EL1);
@@ -1409,6 +1640,21 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
 }
 #endif
 
+#ifdef CONFIG_ARM64_BTI
+static void bti_enable(const struct arm64_cpu_capabilities *__unused)
+{
+	/*
+	 * Use of X16/X17 for tail-calls and trampolines that jump to
+	 * function entry points using BR is a requirement for
+	 * marking binaries with GNU_PROPERTY_AARCH64_FEATURE_1_BTI.
+	 * So, be strict and forbid other BRs using other registers to
+	 * jump onto a PACIxSP instruction:
+	 */
+	sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1);
+	isb();
+}
+#endif /* CONFIG_ARM64_BTI */
+
 /* Internal helper functions to match cpu capability type */
 static bool
 cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -1511,6 +1757,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64PFR0_EL0_SHIFT,
 		.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
 	},
+#ifdef CONFIG_KVM
+	{
+		.desc = "32-bit EL1 Support",
+		.capability = ARM64_HAS_32BIT_EL1,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64PFR0_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64PFR0_EL1_SHIFT,
+		.min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
+	},
+#endif
 	{
 		.desc = "Kernel page table isolation (KPTI)",
 		.capability = ARM64_UNMAP_KERNEL_AT_EL0,
@@ -1779,6 +2037,23 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 1,
 	},
 #endif
+#ifdef CONFIG_ARM64_BTI
+	{
+		.desc = "Branch Target Identification",
+		.capability = ARM64_BTI,
+#ifdef CONFIG_ARM64_BTI_KERNEL
+		.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+#else
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+#endif
+		.matches = has_cpuid_feature,
+		.cpu_enable = bti_enable,
+		.sys_reg = SYS_ID_AA64PFR1_EL1,
+		.field_pos = ID_AA64PFR1_BT_SHIFT,
+		.min_field_value = ID_AA64PFR1_BT_BTI,
+		.sign = FTR_UNSIGNED,
+	},
+#endif
 	{},
 };
 
@@ -1888,6 +2163,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
 #endif
 	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
+#ifdef CONFIG_ARM64_BTI
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI),
+#endif
 #ifdef CONFIG_ARM64_PTR_AUTH
 	HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
 	HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
@@ -2181,6 +2459,36 @@ static void verify_sve_features(void)
 	/* Add checks on other ZCR bits here if necessary */
 }
 
+static void verify_hyp_capabilities(void)
+{
+	u64 safe_mmfr1, mmfr0, mmfr1;
+	int parange, ipa_max;
+	unsigned int safe_vmid_bits, vmid_bits;
+
+	if (!IS_ENABLED(CONFIG_KVM) || !IS_ENABLED(CONFIG_KVM_ARM_HOST))
+		return;
+
+	safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+	mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+	mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+
+	/* Verify VMID bits */
+	safe_vmid_bits = get_vmid_bits(safe_mmfr1);
+	vmid_bits = get_vmid_bits(mmfr1);
+	if (vmid_bits < safe_vmid_bits) {
+		pr_crit("CPU%d: VMID width mismatch\n", smp_processor_id());
+		cpu_die_early();
+	}
+
+	/* Verify IPA range */
+	parange = cpuid_feature_extract_unsigned_field(mmfr0,
+				ID_AA64MMFR0_PARANGE_SHIFT);
+	ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
+	if (ipa_max < get_kvm_ipa_limit()) {
+		pr_crit("CPU%d: IPA range mismatch\n", smp_processor_id());
+		cpu_die_early();
+	}
+}
 
 /*
  * Run through the enabled system capabilities and enable() it on this CPU.
@@ -2206,6 +2514,9 @@ static void verify_local_cpu_capabilities(void)
 
 	if (system_supports_sve())
 		verify_sve_features();
+
+	if (is_hyp_mode_available())
+		verify_hyp_capabilities();
 }
 
 void check_local_cpu_capabilities(void)
@@ -2394,7 +2705,7 @@ static int emulate_sys_reg(u32 id, u64 *valp)
 	if (sys_reg_CRm(id) == 0)
 		return emulate_id_reg(id, valp);
 
-	regp = get_arm64_ftr_reg(id);
+	regp = get_arm64_ftr_reg_nowarn(id);
 	if (regp)
 		*valp = arm64_ftr_reg_user_value(regp);
 	else
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 86136075ae41..86637466daa8 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -92,6 +92,7 @@ static const char *const hwcap_str[] = {
 	"bf16",
 	"dgh",
 	"rng",
+	"bti",
 	NULL
 };
 
@@ -311,6 +312,8 @@ static int __init cpuinfo_regs_init(void)
 	}
 	return 0;
 }
+device_initcall(cpuinfo_regs_init);
+
 static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 {
 	unsigned int cpu = smp_processor_id();
@@ -362,6 +365,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	/* Update the 32bit ID registers only if AArch32 is implemented */
 	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
 		info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
+		info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1);
 		info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
 		info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
 		info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
@@ -373,8 +377,11 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 		info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
 		info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
 		info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+		info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1);
+		info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1);
 		info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
 		info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+		info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1);
 
 		info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
 		info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
@@ -403,5 +410,3 @@ void __init cpuinfo_store_boot_cpu(void)
 	boot_cpu_data = *info;
 	init_cpu_features(&boot_cpu_data);
 }
-
-device_initcall(cpuinfo_regs_init);
diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c
index ca4c3e12d8c5..1f646b07e3e9 100644
--- a/arch/arm64/kernel/crash_core.c
+++ b/arch/arm64/kernel/crash_core.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/crash_core.h>
+#include <asm/cpufeature.h>
 #include <asm/memory.h>
 
 void arch_crash_save_vmcoreinfo(void)
@@ -16,4 +17,7 @@ void arch_crash_save_vmcoreinfo(void)
 	vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
 						PHYS_OFFSET);
 	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+	vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n",
+						system_supports_address_auth() ?
+						ptrauth_kernel_pac_mask() : 0);
 }
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 48222a4760c2..15e80c876d46 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -376,15 +376,13 @@ int aarch32_break_handler(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(aarch32_break_handler);
 
-static int __init debug_traps_init(void)
+void __init debug_traps_init(void)
 {
 	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
 			      TRAP_TRACE, "single-step handler");
 	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
 			      TRAP_BRKPT, "ptrace BRK handler");
-	return 0;
 }
-arch_initcall(debug_traps_init);
 
 /* Re-enable single step for syscall restarting. */
 void user_rewind_single_step(struct task_struct *task)
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 1a03618df0df..0073b24b5d25 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -14,12 +14,12 @@
 
 SYM_CODE_START(efi_enter_kernel)
 	/*
-	 * efi_entry() will have copied the kernel image if necessary and we
+	 * efi_pe_entry() will have copied the kernel image if necessary and we
 	 * end up here with device tree address in x1 and the kernel entry
 	 * point stored in x0. Save those values in registers which are
 	 * callee preserved.
 	 */
-	ldr	w2, =stext_offset
+	ldr	w2, =primary_entry_offset
 	add	x19, x0, x2		// relocated Image entrypoint
 	mov	x20, x1			// DTB address
 
diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S
index 914999ccaf8a..df67c0f2a077 100644
--- a/arch/arm64/kernel/efi-header.S
+++ b/arch/arm64/kernel/efi-header.S
@@ -27,12 +27,12 @@ optional_header:
 	.long	__initdata_begin - efi_header_end	// SizeOfCode
 	.long	__pecoff_data_size			// SizeOfInitializedData
 	.long	0					// SizeOfUninitializedData
-	.long	__efistub_efi_entry - _head		// AddressOfEntryPoint
+	.long	__efistub_efi_pe_entry - _head		// AddressOfEntryPoint
 	.long	efi_header_end - _head			// BaseOfCode
 
 extra_header_fields:
 	.quad	0					// ImageBase
-	.long	SZ_4K					// SectionAlignment
+	.long	SEGMENT_ALIGN				// SectionAlignment
 	.long	PECOFF_FILE_ALIGNMENT			// FileAlignment
 	.short	0					// MajorOperatingSystemVersion
 	.short	0					// MinorOperatingSystemVersion
diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..75691a2641c1 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -5,7 +5,7 @@
 
 #include <linux/linkage.h>
 
-ENTRY(__efi_rt_asm_wrapper)
+SYM_FUNC_START(__efi_rt_asm_wrapper)
 	stp	x29, x30, [sp, #-32]!
 	mov	x29, sp
 
@@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
 	ldp	x29, x30, [sp], #32
 	b.ne	0f
 	ret
-0:	b	efi_handle_corrupted_x18	// tail call
-ENDPROC(__efi_rt_asm_wrapper)
+0:
+	/*
+	 * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
+	 * shadow stack pointer, which we need to restore before returning to
+	 * potentially instrumented code. This is safe because the wrapper is
+	 * called with preemption disabled and a separate shadow stack is used
+	 * for interrupts.
+	 */
+	mov	x18, x2
+	b	efi_handle_corrupted_x18	// tail call
+SYM_FUNC_END(__efi_rt_asm_wrapper)
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index c839b5bf1904..3dbdf9752b11 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -94,7 +94,7 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
 		break;
 	default:
 		el1_inv(regs, esr);
-	};
+	}
 }
 NOKPROBE_SYMBOL(el1_sync_handler);
 
@@ -188,6 +188,14 @@ static void notrace el0_undef(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(el0_undef);
 
+static void notrace el0_bti(struct pt_regs *regs)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_bti(regs);
+}
+NOKPROBE_SYMBOL(el0_bti);
+
 static void notrace el0_inv(struct pt_regs *regs, unsigned long esr)
 {
 	user_exit_irqoff();
@@ -255,6 +263,9 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
 	case ESR_ELx_EC_UNKNOWN:
 		el0_undef(regs);
 		break;
+	case ESR_ELx_EC_BTI:
+		el0_bti(regs);
+		break;
 	case ESR_ELx_EC_BREAKPT_LOW:
 	case ESR_ELx_EC_SOFTSTP_LOW:
 	case ESR_ELx_EC_WATCHPT_LOW:
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 0f24eae8f3cc..f880dd63ddc3 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -16,34 +16,34 @@
  *
  * x0 - pointer to struct fpsimd_state
  */
-ENTRY(fpsimd_save_state)
+SYM_FUNC_START(fpsimd_save_state)
 	fpsimd_save x0, 8
 	ret
-ENDPROC(fpsimd_save_state)
+SYM_FUNC_END(fpsimd_save_state)
 
 /*
  * Load the FP registers.
  *
  * x0 - pointer to struct fpsimd_state
  */
-ENTRY(fpsimd_load_state)
+SYM_FUNC_START(fpsimd_load_state)
 	fpsimd_restore x0, 8
 	ret
-ENDPROC(fpsimd_load_state)
+SYM_FUNC_END(fpsimd_load_state)
 
 #ifdef CONFIG_ARM64_SVE
-ENTRY(sve_save_state)
+SYM_FUNC_START(sve_save_state)
 	sve_save 0, x1, 2
 	ret
-ENDPROC(sve_save_state)
+SYM_FUNC_END(sve_save_state)
 
-ENTRY(sve_load_state)
+SYM_FUNC_START(sve_load_state)
 	sve_load 0, x1, x2, 3, x4
 	ret
-ENDPROC(sve_load_state)
+SYM_FUNC_END(sve_load_state)
 
-ENTRY(sve_get_vl)
+SYM_FUNC_START(sve_get_vl)
 	_sve_rdvl	0, 1
 	ret
-ENDPROC(sve_get_vl)
+SYM_FUNC_END(sve_get_vl)
 #endif /* CONFIG_ARM64_SVE */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 833d48c9acb5..a338f40e64d3 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -23,8 +23,9 @@
  *
  * ... where <entry> is either ftrace_caller or ftrace_regs_caller.
  *
- * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
- * live, and x9-x18 are safe to clobber.
+ * Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are
+ * live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to
+ * clobber.
  *
  * We save the callsite's context into a pt_regs before invoking any ftrace
  * callbacks. So that we can get a sensible backtrace, we create a stack record
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ddcde093c433..5304d193c79d 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -23,6 +23,7 @@
 #include <asm/mmu.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
+#include <asm/scs.h>
 #include <asm/thread_info.h>
 #include <asm/asm-uaccess.h>
 #include <asm/unistd.h>
@@ -178,7 +179,9 @@ alternative_cb_end
 
 	apply_ssbd 1, x22, x23
 
-	ptrauth_keys_install_kernel tsk, 1, x20, x22, x23
+	ptrauth_keys_install_kernel tsk, x20, x22, x23
+
+	scs_load tsk, x20
 	.else
 	add	x21, sp, #S_FRAME_SIZE
 	get_current_task tsk
@@ -343,6 +346,8 @@ alternative_else_nop_endif
 	msr	cntkctl_el1, x1
 4:
 #endif
+	scs_save tsk, x0
+
 	/* No kernel C function calls after this as user keys are set. */
 	ptrauth_keys_install_user tsk, x0, x1, x2
 
@@ -388,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
 
 	.macro	irq_stack_entry
 	mov	x19, sp			// preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+	mov	x24, scs_sp		// preserve the original shadow stack
+#endif
 
 	/*
 	 * Compare sp with the base of the task stack.
@@ -405,15 +413,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
 
 	/* switch to the irq stack */
 	mov	sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+	/* also switch to the irq shadow stack */
+	adr_this_cpu scs_sp, irq_shadow_call_stack, x26
+#endif
+
 9998:
 	.endm
 
 	/*
-	 * x19 should be preserved between irq_stack_entry and
-	 * irq_stack_exit.
+	 * The callee-saved regs (x19-x29) should be preserved between
+	 * irq_stack_entry and irq_stack_exit, but note that kernel_entry
+	 * uses x20-x23 to store data for later use.
 	 */
 	.macro	irq_stack_exit
 	mov	sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+	mov	scs_sp, x24
+#endif
 	.endm
 
 /* GPRs used by entry code */
@@ -728,20 +746,9 @@ el0_error_naked:
 SYM_CODE_END(el0_error)
 
 /*
- * Ok, we need to do extra processing, enter the slow path.
- */
-work_pending:
-	mov	x0, sp				// 'regs'
-	bl	do_notify_resume
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_on		// enabled while in userspace
-#endif
-	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for single-step
-	b	finish_ret_to_user
-/*
  * "slow" syscall return path.
  */
-ret_to_user:
+SYM_CODE_START_LOCAL(ret_to_user)
 	disable_daif
 	gic_prio_kentry_setup tmp=x3
 	ldr	x1, [tsk, #TSK_TI_FLAGS]
@@ -753,7 +760,19 @@ finish_ret_to_user:
 	bl	stackleak_erase
 #endif
 	kernel_exit 0
-ENDPROC(ret_to_user)
+
+/*
+ * Ok, we need to do extra processing, enter the slow path.
+ */
+work_pending:
+	mov	x0, sp				// 'regs'
+	bl	do_notify_resume
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_on		// enabled while in userspace
+#endif
+	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for single-step
+	b	finish_ret_to_user
+SYM_CODE_END(ret_to_user)
 
 	.popsection				// .entry.text
 
@@ -900,7 +919,9 @@ SYM_FUNC_START(cpu_switch_to)
 	ldr	lr, [x8]
 	mov	sp, x9
 	msr	sp_el0, x1
-	ptrauth_keys_install_kernel x1, 1, x8, x9, x10
+	ptrauth_keys_install_kernel x1, x8, x9, x10
+	scs_save x0, x8
+	scs_load x1, x8
 	ret
 SYM_FUNC_END(cpu_switch_to)
 NOKPROBE(cpu_switch_to)
@@ -1029,13 +1050,16 @@ SYM_CODE_START(__sdei_asm_handler)
 
 	mov	x19, x1
 
+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+	ldrb	w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
 #ifdef CONFIG_VMAP_STACK
 	/*
 	 * entry.S may have been using sp as a scratch register, find whether
 	 * this is a normal or critical event and switch to the appropriate
 	 * stack for this CPU.
 	 */
-	ldrb	w4, [x19, #SDEI_EVENT_PRIORITY]
 	cbnz	w4, 1f
 	ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
 	b	2f
@@ -1045,6 +1069,15 @@ SYM_CODE_START(__sdei_asm_handler)
 	mov	sp, x5
 #endif
 
+#ifdef CONFIG_SHADOW_CALL_STACK
+	/* Use a separate shadow call stack for normal and critical events */
+	cbnz	w4, 3f
+	adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal, tmp=x6
+	b	4f
+3:	adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical, tmp=x6
+4:
+#endif
+
 	/*
 	 * We may have interrupted userspace, or a guest, or exit-from or
 	 * return-to either of these. We can't trust sp_el0, restore it.
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 57a91032b4c2..632702146813 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/irqchip/arm-gic-v3.h>
 
+#include <asm/asm_pointer_auth.h>
 #include <asm/assembler.h>
 #include <asm/boot.h>
 #include <asm/ptrace.h>
@@ -27,6 +28,7 @@
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
+#include <asm/scs.h>
 #include <asm/smp.h>
 #include <asm/sysreg.h>
 #include <asm/thread_info.h>
@@ -70,9 +72,9 @@ _head:
 	 * its opcode forms the magic "MZ" signature required by UEFI.
 	 */
 	add	x13, x18, #0x16
-	b	stext
+	b	primary_entry
 #else
-	b	stext				// branch to kernel start, magic
+	b	primary_entry			// branch to kernel start, magic
 	.long	0				// reserved
 #endif
 	le64sym	_kernel_offset_le		// Image load offset from start of RAM, little-endian
@@ -98,14 +100,13 @@ pe_header:
 	 * primary lowlevel boot path:
 	 *
 	 *  Register   Scope                      Purpose
-	 *  x21        stext() .. start_kernel()  FDT pointer passed at boot in x0
-	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
-	 *  x28        __create_page_tables()     callee preserved temp register
-	 *  x19/x20    __primary_switch()         callee preserved temp registers
-	 *  x24        __primary_switch() .. relocate_kernel()
-	 *                                        current RELR displacement
+	 *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
+	 *  x23        primary_entry() .. start_kernel()        physical misalignment/KASLR offset
+	 *  x28        __create_page_tables()                   callee preserved temp register
+	 *  x19/x20    __primary_switch()                       callee preserved temp registers
+	 *  x24        __primary_switch() .. relocate_kernel()  current RELR displacement
 	 */
-SYM_CODE_START(stext)
+SYM_CODE_START(primary_entry)
 	bl	preserve_boot_args
 	bl	el2_setup			// Drop to EL1, w0=cpu_boot_mode
 	adrp	x23, __PHYS_OFFSET
@@ -118,10 +119,9 @@ SYM_CODE_START(stext)
 	 * On return, the CPU will be ready for the MMU to be turned on and
 	 * the TCR will have been set.
 	 */
-	mov	x0, #ARM64_CPU_BOOT_PRIMARY
 	bl	__cpu_setup			// initialise processor
 	b	__primary_switch
-SYM_CODE_END(stext)
+SYM_CODE_END(primary_entry)
 
 /*
  * Preserve the arguments passed by the bootloader in x0 .. x3
@@ -394,13 +394,19 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 
 	/*
 	 * Since the page tables have been populated with non-cacheable
-	 * accesses (MMU disabled), invalidate the idmap and swapper page
-	 * tables again to remove any speculatively loaded cache lines.
+	 * accesses (MMU disabled), invalidate those tables again to
+	 * remove any speculatively loaded cache lines.
 	 */
+	dmb	sy
+
 	adrp	x0, idmap_pg_dir
+	adrp	x1, idmap_pg_end
+	sub	x1, x1, x0
+	bl	__inval_dcache_area
+
+	adrp	x0, init_pg_dir
 	adrp	x1, init_pg_end
 	sub	x1, x1, x0
-	dmb	sy
 	bl	__inval_dcache_area
 
 	ret	x28
@@ -417,6 +423,10 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 	adr_l	x5, init_task
 	msr	sp_el0, x5			// Save thread_info
 
+#ifdef CONFIG_ARM64_PTR_AUTH
+	__ptrauth_keys_init_cpu	x5, x6, x7, x8
+#endif
+
 	adr_l	x8, vectors			// load VBAR_EL1 with virtual
 	msr	vbar_el1, x8			// vector table address
 	isb
@@ -424,6 +434,10 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 	stp	xzr, x30, [sp, #-16]!
 	mov	x29, sp
 
+#ifdef CONFIG_SHADOW_CALL_STACK
+	adr_l	scs_sp, init_shadow_call_stack	// Set shadow call stack
+#endif
+
 	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
 
 	ldr_l	x4, kimage_vaddr		// Save the offset between
@@ -717,7 +731,6 @@ SYM_FUNC_START_LOCAL(secondary_startup)
 	 * Common entry point for secondary CPUs.
 	 */
 	bl	__cpu_secondary_check52bitva
-	mov	x0, #ARM64_CPU_BOOT_SECONDARY
 	bl	__cpu_setup			// initialise processor
 	adrp	x1, swapper_pg_dir
 	bl	__enable_mmu
@@ -737,8 +750,14 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
 	ldr	x2, [x0, #CPU_BOOT_TASK]
 	cbz	x2, __secondary_too_slow
 	msr	sp_el0, x2
+	scs_load x2, x3
 	mov	x29, #0
 	mov	x30, #0
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+	ptrauth_keys_init_cpu x2, x3, x4, x5
+#endif
+
 	b	secondary_start_kernel
 SYM_FUNC_END(__secondary_switched)
 
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 6532105b3e32..8ccca660034e 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -65,7 +65,7 @@
  * x5: physical address of a  zero page that remains zero after resume
  */
 .pushsection    ".hibernate_exit.text", "ax"
-ENTRY(swsusp_arch_suspend_exit)
+SYM_CODE_START(swsusp_arch_suspend_exit)
 	/*
 	 * We execute from ttbr0, change ttbr1 to our copied linear map tables
 	 * with a break-before-make via the zero page
@@ -110,7 +110,7 @@ ENTRY(swsusp_arch_suspend_exit)
 	cbz	x24, 3f		/* Do we need to re-initialise EL2? */
 	hvc	#0
 3:	ret
-ENDPROC(swsusp_arch_suspend_exit)
+SYM_CODE_END(swsusp_arch_suspend_exit)
 
 /*
  * Restore the hyp stub.
@@ -119,15 +119,15 @@ ENDPROC(swsusp_arch_suspend_exit)
  *
  * x24: The physical address of __hyp_stub_vectors
  */
-el1_sync:
+SYM_CODE_START_LOCAL(el1_sync)
 	msr	vbar_el2, x24
 	eret
-ENDPROC(el1_sync)
+SYM_CODE_END(el1_sync)
 
 .macro invalid_vector	label
-\label:
+SYM_CODE_START_LOCAL(\label)
 	b \label
-ENDPROC(\label)
+SYM_CODE_END(\label)
 .endm
 
 	invalid_vector	el2_sync_invalid
@@ -141,7 +141,7 @@ ENDPROC(\label)
 
 /* el2 vectors - switch el2 here while we restore the memory image. */
 	.align 11
-ENTRY(hibernate_el2_vectors)
+SYM_CODE_START(hibernate_el2_vectors)
 	ventry	el2_sync_invalid		// Synchronous EL2t
 	ventry	el2_irq_invalid			// IRQ EL2t
 	ventry	el2_fiq_invalid			// FIQ EL2t
@@ -161,6 +161,6 @@ ENTRY(hibernate_el2_vectors)
 	ventry	el1_irq_invalid			// IRQ 32-bit EL1
 	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
 	ventry	el1_error_invalid		// Error 32-bit EL1
-END(hibernate_el2_vectors)
+SYM_CODE_END(hibernate_el2_vectors)
 
 .popsection
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index e473ead806ed..160f5881a0b7 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -21,7 +21,7 @@
 
 	.align 11
 
-ENTRY(__hyp_stub_vectors)
+SYM_CODE_START(__hyp_stub_vectors)
 	ventry	el2_sync_invalid		// Synchronous EL2t
 	ventry	el2_irq_invalid			// IRQ EL2t
 	ventry	el2_fiq_invalid			// FIQ EL2t
@@ -41,11 +41,11 @@ ENTRY(__hyp_stub_vectors)
 	ventry	el1_irq_invalid			// IRQ 32-bit EL1
 	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
 	ventry	el1_error_invalid		// Error 32-bit EL1
-ENDPROC(__hyp_stub_vectors)
+SYM_CODE_END(__hyp_stub_vectors)
 
 	.align 11
 
-el1_sync:
+SYM_CODE_START_LOCAL(el1_sync)
 	cmp	x0, #HVC_SET_VECTORS
 	b.ne	2f
 	msr	vbar_el2, x1
@@ -68,12 +68,12 @@ el1_sync:
 
 9:	mov	x0, xzr
 	eret
-ENDPROC(el1_sync)
+SYM_CODE_END(el1_sync)
 
 .macro invalid_vector	label
-\label:
+SYM_CODE_START_LOCAL(\label)
 	b \label
-ENDPROC(\label)
+SYM_CODE_END(\label)
 .endm
 
 	invalid_vector	el2_sync_invalid
@@ -106,15 +106,15 @@ ENDPROC(\label)
  * initialisation entry point.
  */
 
-ENTRY(__hyp_set_vectors)
+SYM_FUNC_START(__hyp_set_vectors)
 	mov	x1, x0
 	mov	x0, #HVC_SET_VECTORS
 	hvc	#0
 	ret
-ENDPROC(__hyp_set_vectors)
+SYM_FUNC_END(__hyp_set_vectors)
 
-ENTRY(__hyp_reset_vectors)
+SYM_FUNC_START(__hyp_reset_vectors)
 	mov	x0, #HVC_RESET_VECTORS
 	hvc	#0
 	ret
-ENDPROC(__hyp_reset_vectors)
+SYM_FUNC_END(__hyp_reset_vectors)
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 7f06ad93fc95..be0a63ffed23 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -13,7 +13,7 @@
 #ifdef CONFIG_EFI
 
 __efistub_kernel_size		= _edata - _text;
-__efistub_stext_offset		= stext - _text;
+__efistub_primary_entry_offset	= primary_entry - _text;
 
 
 /*
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 4a9e773a177f..684d871ae38d 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -51,21 +51,33 @@ enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn)
 	return aarch64_insn_encoding_class[(insn >> 25) & 0xf];
 }
 
-/* NOP is an alias of HINT */
-bool __kprobes aarch64_insn_is_nop(u32 insn)
+bool __kprobes aarch64_insn_is_steppable_hint(u32 insn)
 {
 	if (!aarch64_insn_is_hint(insn))
 		return false;
 
 	switch (insn & 0xFE0) {
-	case AARCH64_INSN_HINT_YIELD:
-	case AARCH64_INSN_HINT_WFE:
-	case AARCH64_INSN_HINT_WFI:
-	case AARCH64_INSN_HINT_SEV:
-	case AARCH64_INSN_HINT_SEVL:
-		return false;
-	default:
+	case AARCH64_INSN_HINT_XPACLRI:
+	case AARCH64_INSN_HINT_PACIA_1716:
+	case AARCH64_INSN_HINT_PACIB_1716:
+	case AARCH64_INSN_HINT_AUTIA_1716:
+	case AARCH64_INSN_HINT_AUTIB_1716:
+	case AARCH64_INSN_HINT_PACIAZ:
+	case AARCH64_INSN_HINT_PACIASP:
+	case AARCH64_INSN_HINT_PACIBZ:
+	case AARCH64_INSN_HINT_PACIBSP:
+	case AARCH64_INSN_HINT_AUTIAZ:
+	case AARCH64_INSN_HINT_AUTIASP:
+	case AARCH64_INSN_HINT_AUTIBZ:
+	case AARCH64_INSN_HINT_AUTIBSP:
+	case AARCH64_INSN_HINT_BTI:
+	case AARCH64_INSN_HINT_BTIC:
+	case AARCH64_INSN_HINT_BTIJ:
+	case AARCH64_INSN_HINT_BTIJC:
+	case AARCH64_INSN_HINT_NOP:
 		return true;
+	default:
+		return false;
 	}
 }
 
@@ -574,7 +586,7 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
 					     offset >> 2);
 }
 
-u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op)
+u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op)
 {
 	return aarch64_insn_get_hint_value() | op;
 }
@@ -1535,16 +1547,10 @@ static u32 aarch64_encode_immediate(u64 imm,
 				    u32 insn)
 {
 	unsigned int immr, imms, n, ones, ror, esz, tmp;
-	u64 mask = ~0UL;
-
-	/* Can't encode full zeroes or full ones */
-	if (!imm || !~imm)
-		return AARCH64_BREAK_FAULT;
+	u64 mask;
 
 	switch (variant) {
 	case AARCH64_INSN_VARIANT_32BIT:
-		if (upper_32_bits(imm))
-			return AARCH64_BREAK_FAULT;
 		esz = 32;
 		break;
 	case AARCH64_INSN_VARIANT_64BIT:
@@ -1556,6 +1562,12 @@ static u32 aarch64_encode_immediate(u64 imm,
 		return AARCH64_BREAK_FAULT;
 	}
 
+	mask = GENMASK(esz - 1, 0);
+
+	/* Can't encode full zeroes, full ones, or value wider than the mask */
+	if (!imm || imm == mask || imm & ~mask)
+		return AARCH64_BREAK_FAULT;
+
 	/*
 	 * Inverse of Replicate(). Try to spot a repeating pattern
 	 * with a pow2 stride.
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index b40c3b0def92..522e6f517ec0 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -138,12 +138,12 @@ static int setup_dtb(struct kimage *image,
 
 	/* add rng-seed */
 	if (rng_is_initialized()) {
-		u8 rng_seed[RNG_SEED_SIZE];
-		get_random_bytes(rng_seed, RNG_SEED_SIZE);
-		ret = fdt_setprop(dtb, off, FDT_PROP_RNG_SEED, rng_seed,
-				RNG_SEED_SIZE);
+		void *rng_seed;
+		ret = fdt_setprop_placeholder(dtb, off, FDT_PROP_RNG_SEED,
+				RNG_SEED_SIZE, &rng_seed);
 		if (ret)
 			goto out;
+		get_random_bytes(rng_seed, RNG_SEED_SIZE);
 	} else {
 		pr_notice("RNG is not initialised: omitting \"%s\" property\n",
 				FDT_PROP_RNG_SEED);
@@ -284,7 +284,7 @@ int load_other_segments(struct kimage *image,
 		image->arch.elf_headers_sz = headers_sz;
 
 		pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-			 image->arch.elf_headers_mem, headers_sz, headers_sz);
+			 image->arch.elf_headers_mem, kbuf.bufsz, kbuf.memsz);
 	}
 
 	/* load initrd */
@@ -305,7 +305,7 @@ int load_other_segments(struct kimage *image,
 		initrd_load_addr = kbuf.mem;
 
 		pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-				initrd_load_addr, initrd_len, initrd_len);
+				initrd_load_addr, kbuf.bufsz, kbuf.memsz);
 	}
 
 	/* load dtb */
@@ -332,7 +332,7 @@ int load_other_segments(struct kimage *image,
 	image->arch.dtb_mem = kbuf.mem;
 
 	pr_debug("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-			kbuf.mem, dtb_len, dtb_len);
+			kbuf.mem, kbuf.bufsz, kbuf.memsz);
 
 	return 0;
 
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 1ef702b0be2d..295d66490584 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -120,7 +120,7 @@ static bool has_pv_steal_clock(void)
 	struct arm_smccc_res res;
 
 	/* To detect the presence of PV time support we require SMCCC 1.1+ */
-	if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+	if (arm_smccc_1_1_get_conduit() == SMCCC_CONDUIT_NONE)
 		return false;
 
 	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index b78fac9e546c..263d5fba4c8a 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -46,7 +46,7 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
 		 * except for the NOP case.
 		 */
 		if (aarch64_insn_is_hint(insn))
-			return aarch64_insn_is_nop(insn);
+			return aarch64_insn_is_steppable_hint(insn);
 
 		return true;
 	}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index 45dce03aaeaf..890ca72c5a51 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -61,7 +61,7 @@
 	ldp x28, x29, [sp, #S_X28]
 	.endm
 
-ENTRY(kretprobe_trampoline)
+SYM_CODE_START(kretprobe_trampoline)
 	sub sp, sp, #S_FRAME_SIZE
 
 	save_all_base_regs
@@ -79,4 +79,4 @@ ENTRY(kretprobe_trampoline)
 	add sp, sp, #S_FRAME_SIZE
 	ret
 
-ENDPROC(kretprobe_trampoline)
+SYM_CODE_END(kretprobe_trampoline)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 56be4cbf771f..eade7807e819 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -11,6 +11,7 @@
 
 #include <linux/compat.h>
 #include <linux/efi.h>
+#include <linux/elf.h>
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
@@ -18,6 +19,7 @@
 #include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/lockdep.h>
+#include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/sysctl.h>
@@ -209,6 +211,15 @@ void machine_restart(char *cmd)
 	while (1);
 }
 
+#define bstr(suffix, str) [PSR_BTYPE_ ## suffix >> PSR_BTYPE_SHIFT] = str
+static const char *const btypes[] = {
+	bstr(NONE, "--"),
+	bstr(  JC, "jc"),
+	bstr(   C, "-c"),
+	bstr(  J , "j-")
+};
+#undef bstr
+
 static void print_pstate(struct pt_regs *regs)
 {
 	u64 pstate = regs->pstate;
@@ -227,7 +238,10 @@ static void print_pstate(struct pt_regs *regs)
 			pstate & PSR_AA32_I_BIT ? 'I' : 'i',
 			pstate & PSR_AA32_F_BIT ? 'F' : 'f');
 	} else {
-		printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n",
+		const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >>
+					       PSR_BTYPE_SHIFT];
+
+		printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO BTYPE=%s)\n",
 			pstate,
 			pstate & PSR_N_BIT ? 'N' : 'n',
 			pstate & PSR_Z_BIT ? 'Z' : 'z',
@@ -238,7 +252,8 @@ static void print_pstate(struct pt_regs *regs)
 			pstate & PSR_I_BIT ? 'I' : 'i',
 			pstate & PSR_F_BIT ? 'F' : 'f',
 			pstate & PSR_PAN_BIT ? '+' : '-',
-			pstate & PSR_UAO_BIT ? '+' : '-');
+			pstate & PSR_UAO_BIT ? '+' : '-',
+			btype_str);
 	}
 }
 
@@ -655,3 +670,25 @@ asmlinkage void __sched arm64_preempt_schedule_irq(void)
 	if (system_capabilities_finalized())
 		preempt_schedule_irq();
 }
+
+#ifdef CONFIG_BINFMT_ELF
+int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
+			 bool has_interp, bool is_interp)
+{
+	/*
+	 * For dynamically linked executables the interpreter is
+	 * responsible for setting PROT_BTI on everything except
+	 * itself.
+	 */
+	if (is_interp != has_interp)
+		return prot;
+
+	if (!(state->flags & ARM64_ELF_BTI))
+		return prot;
+
+	if (prot & PROT_EXEC)
+		prot |= PROT_BTI;
+
+	return prot;
+}
+#endif
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index e7b01904f180..76790a5f2a0d 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1875,7 +1875,7 @@ void syscall_trace_exit(struct pt_regs *regs)
  */
 #define SPSR_EL1_AARCH64_RES0_BITS \
 	(GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \
-	 GENMASK_ULL(20, 13) | GENMASK_ULL(11, 10) | GENMASK_ULL(5, 5))
+	 GENMASK_ULL(20, 13) | GENMASK_ULL(5, 5))
 #define SPSR_EL1_AARCH32_RES0_BITS \
 	(GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20))
 
diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S
index 16a34f188f26..c50f45fa29fa 100644
--- a/arch/arm64/kernel/reloc_test_syms.S
+++ b/arch/arm64/kernel/reloc_test_syms.S
@@ -5,81 +5,81 @@
 
 #include <linux/linkage.h>
 
-ENTRY(absolute_data64)
+SYM_FUNC_START(absolute_data64)
 	ldr	x0, 0f
 	ret
 0:	.quad	sym64_abs
-ENDPROC(absolute_data64)
+SYM_FUNC_END(absolute_data64)
 
-ENTRY(absolute_data32)
+SYM_FUNC_START(absolute_data32)
 	ldr	w0, 0f
 	ret
 0:	.long	sym32_abs
-ENDPROC(absolute_data32)
+SYM_FUNC_END(absolute_data32)
 
-ENTRY(absolute_data16)
+SYM_FUNC_START(absolute_data16)
 	adr	x0, 0f
 	ldrh	w0, [x0]
 	ret
 0:	.short	sym16_abs, 0
-ENDPROC(absolute_data16)
+SYM_FUNC_END(absolute_data16)
 
-ENTRY(signed_movw)
+SYM_FUNC_START(signed_movw)
 	movz	x0, #:abs_g2_s:sym64_abs
 	movk	x0, #:abs_g1_nc:sym64_abs
 	movk	x0, #:abs_g0_nc:sym64_abs
 	ret
-ENDPROC(signed_movw)
+SYM_FUNC_END(signed_movw)
 
-ENTRY(unsigned_movw)
+SYM_FUNC_START(unsigned_movw)
 	movz	x0, #:abs_g3:sym64_abs
 	movk	x0, #:abs_g2_nc:sym64_abs
 	movk	x0, #:abs_g1_nc:sym64_abs
 	movk	x0, #:abs_g0_nc:sym64_abs
 	ret
-ENDPROC(unsigned_movw)
+SYM_FUNC_END(unsigned_movw)
 
 	.align	12
 	.space	0xff8
-ENTRY(relative_adrp)
+SYM_FUNC_START(relative_adrp)
 	adrp	x0, sym64_rel
 	add	x0, x0, #:lo12:sym64_rel
 	ret
-ENDPROC(relative_adrp)
+SYM_FUNC_END(relative_adrp)
 
 	.align	12
 	.space	0xffc
-ENTRY(relative_adrp_far)
+SYM_FUNC_START(relative_adrp_far)
 	adrp	x0, memstart_addr
 	add	x0, x0, #:lo12:memstart_addr
 	ret
-ENDPROC(relative_adrp_far)
+SYM_FUNC_END(relative_adrp_far)
 
-ENTRY(relative_adr)
+SYM_FUNC_START(relative_adr)
 	adr	x0, sym64_rel
 	ret
-ENDPROC(relative_adr)
+SYM_FUNC_END(relative_adr)
 
-ENTRY(relative_data64)
+SYM_FUNC_START(relative_data64)
 	adr	x1, 0f
 	ldr	x0, [x1]
 	add	x0, x0, x1
 	ret
 0:	.quad	sym64_rel - .
-ENDPROC(relative_data64)
+SYM_FUNC_END(relative_data64)
 
-ENTRY(relative_data32)
+SYM_FUNC_START(relative_data32)
 	adr	x1, 0f
 	ldr	w0, [x1]
 	add	x0, x0, x1
 	ret
 0:	.long	sym64_rel - .
-ENDPROC(relative_data32)
+SYM_FUNC_END(relative_data32)
 
-ENTRY(relative_data16)
+SYM_FUNC_START(relative_data16)
 	adr	x1, 0f
 	ldrsh	w0, [x1]
 	add	x0, x0, x1
 	ret
 0:	.short	sym64_rel - ., 0
-ENDPROC(relative_data16)
+SYM_FUNC_END(relative_data16)
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index c40ce496c78b..542d6edc6806 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -26,7 +26,7 @@
  * control_code_page, a special page which has been set up to be preserved
  * during the copy operation.
  */
-ENTRY(arm64_relocate_new_kernel)
+SYM_CODE_START(arm64_relocate_new_kernel)
 
 	/* Setup the list loop variables. */
 	mov	x18, x2				/* x18 = dtb address */
@@ -111,7 +111,7 @@ ENTRY(arm64_relocate_new_kernel)
 	mov	x3, xzr
 	br	x17
 
-ENDPROC(arm64_relocate_new_kernel)
+SYM_CODE_END(arm64_relocate_new_kernel)
 
 .align 3	/* To keep the 64-bit values below naturally aligned. */
 
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..e8f7ff45dd8f
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/scs.h>
+
+DEFINE_SCS(irq_shadow_call_stack);
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
+#endif
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index d6259dac62b6..dab88260b137 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -95,19 +95,7 @@ static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
 	unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
 	unsigned long high = low + SDEI_STACK_SIZE;
 
-	if (!low)
-		return false;
-
-	if (sp < low || sp >= high)
-		return false;
-
-	if (info) {
-		info->low = low;
-		info->high = high;
-		info->type = STACK_TYPE_SDEI_NORMAL;
-	}
-
-	return true;
+	return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info);
 }
 
 static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
@@ -115,19 +103,7 @@ static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
 	unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
 	unsigned long high = low + SDEI_STACK_SIZE;
 
-	if (!low)
-		return false;
-
-	if (sp < low || sp >= high)
-		return false;
-
-	if (info) {
-		info->low = low;
-		info->high = high;
-		info->type = STACK_TYPE_SDEI_CRITICAL;
-	}
-
-	return true;
+	return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info);
 }
 
 bool _on_sdei_stack(unsigned long sp, struct stack_info *info)
@@ -251,22 +227,12 @@ asmlinkage __kprobes notrace unsigned long
 __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
 {
 	unsigned long ret;
-	bool do_nmi_exit = false;
 
-	/*
-	 * nmi_enter() deals with printk() re-entrance and use of RCU when
-	 * RCU believed this CPU was idle. Because critical events can
-	 * interrupt normal events, we may already be in_nmi().
-	 */
-	if (!in_nmi()) {
-		nmi_enter();
-		do_nmi_exit = true;
-	}
+	nmi_enter();
 
 	ret = _sdei_handler(regs, arg);
 
-	if (do_nmi_exit)
-		nmi_exit();
+	nmi_exit();
 
 	return ret;
 }
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 339882db5a91..801d56cdf701 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -732,6 +732,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
 	regs->regs[29] = (unsigned long)&user->next_frame->fp;
 	regs->pc = (unsigned long)ka->sa.sa_handler;
 
+	/*
+	 * Signal delivery is a (wacky) indirect function call in
+	 * userspace, so simulate the same setting of BTYPE as a BLR
+	 * <register containing the signal handler entry point>.
+	 * Signal delivery to a location in a PROT_BTI guarded page
+	 * that is not a function entry point will now trigger a
+	 * SIGILL in userspace.
+	 *
+	 * If the signal handler entry point is not in a PROT_BTI
+	 * guarded page, this is harmless.
+	 */
+	if (system_supports_bti()) {
+		regs->pstate &= ~PSR_BTYPE_MASK;
+		regs->pstate |= PSR_BTYPE_C;
+	}
+
 	if (ka->sa.sa_flags & SA_RESTORER)
 		sigtramp = ka->sa.sa_restorer;
 	else
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 7b2f2e650c44..ba40d57757d6 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -62,7 +62,7 @@
  *
  *  x0 = struct sleep_stack_data area
  */
-ENTRY(__cpu_suspend_enter)
+SYM_FUNC_START(__cpu_suspend_enter)
 	stp	x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS]
 	stp	x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16]
 	stp	x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32]
@@ -95,23 +95,22 @@ ENTRY(__cpu_suspend_enter)
 	ldp	x29, lr, [sp], #16
 	mov	x0, #1
 	ret
-ENDPROC(__cpu_suspend_enter)
+SYM_FUNC_END(__cpu_suspend_enter)
 
 	.pushsection ".idmap.text", "awx"
-ENTRY(cpu_resume)
+SYM_CODE_START(cpu_resume)
 	bl	el2_setup		// if in EL2 drop to EL1 cleanly
-	mov	x0, #ARM64_CPU_RUNTIME
 	bl	__cpu_setup
 	/* enable the MMU early - so we can access sleep_save_stash by va */
 	adrp	x1, swapper_pg_dir
 	bl	__enable_mmu
 	ldr	x8, =_cpu_resume
 	br	x8
-ENDPROC(cpu_resume)
+SYM_CODE_END(cpu_resume)
 	.ltorg
 	.popsection
 
-ENTRY(_cpu_resume)
+SYM_FUNC_START(_cpu_resume)
 	mrs	x1, mpidr_el1
 	adr_l	x8, mpidr_hash		// x8 = struct mpidr_hash virt address
 
@@ -147,4 +146,4 @@ ENTRY(_cpu_resume)
 	ldp	x29, lr, [x29]
 	mov	x0, #0
 	ret
-ENDPROC(_cpu_resume)
+SYM_FUNC_END(_cpu_resume)
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 54655273d1e0..1f93809528a4 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -30,9 +30,9 @@
  *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
  *		  struct arm_smccc_quirk *quirk)
  */
-ENTRY(__arm_smccc_smc)
+SYM_FUNC_START(__arm_smccc_smc)
 	SMCCC	smc
-ENDPROC(__arm_smccc_smc)
+SYM_FUNC_END(__arm_smccc_smc)
 EXPORT_SYMBOL(__arm_smccc_smc)
 
 /*
@@ -41,7 +41,7 @@ EXPORT_SYMBOL(__arm_smccc_smc)
  *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
  *		  struct arm_smccc_quirk *quirk)
  */
-ENTRY(__arm_smccc_hvc)
+SYM_FUNC_START(__arm_smccc_hvc)
 	SMCCC	hvc
-ENDPROC(__arm_smccc_hvc)
+SYM_FUNC_END(__arm_smccc_hvc)
 EXPORT_SYMBOL(__arm_smccc_hvc)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index bb813d06114a..4b6f4999d06a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -65,7 +65,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
  */
 struct secondary_data secondary_data;
 /* Number of CPUs which aren't online, but looping in kernel text. */
-int cpus_stuck_in_kernel;
+static int cpus_stuck_in_kernel;
 
 enum ipi_msg_type {
 	IPI_RESCHEDULE,
@@ -114,10 +114,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 */
 	secondary_data.task = idle;
 	secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
-#if defined(CONFIG_ARM64_PTR_AUTH)
-	secondary_data.ptrauth_key.apia.lo = idle->thread.keys_kernel.apia.lo;
-	secondary_data.ptrauth_key.apia.hi = idle->thread.keys_kernel.apia.hi;
-#endif
 	update_cpu_boot_status(CPU_MMU_OFF);
 	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
 
@@ -140,10 +136,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	pr_crit("CPU%u: failed to come online\n", cpu);
 	secondary_data.task = NULL;
 	secondary_data.stack = NULL;
-#if defined(CONFIG_ARM64_PTR_AUTH)
-	secondary_data.ptrauth_key.apia.lo = 0;
-	secondary_data.ptrauth_key.apia.hi = 0;
-#endif
 	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	status = READ_ONCE(secondary_data.status);
 	if (status == CPU_MMU_OFF)
@@ -430,7 +422,7 @@ static void __init hyp_mode_check(void)
 			   "CPU: CPUs started in inconsistent modes");
 	else
 		pr_info("CPU: All CPU(s) started at EL1\n");
-	if (IS_ENABLED(CONFIG_KVM_ARM_HOST))
+	if (IS_ENABLED(CONFIG_KVM))
 		kvm_compute_layout();
 }
 
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index a12c0c88d345..5f5b868292f5 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -98,6 +98,24 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 	regs->orig_x0 = regs->regs[0];
 	regs->syscallno = scno;
 
+	/*
+	 * BTI note:
+	 * The architecture does not guarantee that SPSR.BTYPE is zero
+	 * on taking an SVC, so we could return to userspace with a
+	 * non-zero BTYPE after the syscall.
+	 *
+	 * This shouldn't matter except when userspace is explicitly
+	 * doing something stupid, such as setting PROT_BTI on a page
+	 * that lacks conforming BTI/PACIxSP instructions, falling
+	 * through from one executable page to another with differing
+	 * PROT_BTI, or messing with BTYPE via ptrace: in such cases,
+	 * userspace should not be surprised if a SIGILL occurs on
+	 * syscall return.
+	 *
+	 * So, don't touch regs->pstate & PSR_BTYPE_MASK here.
+	 * (Similarly for HVC and SMC elsewhere.)
+	 */
+
 	cortex_a76_erratum_1463225_svc_handler();
 	local_daif_restore(DAIF_PROCCTX);
 	user_exit();
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index cf402be5c573..d332590f5978 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -272,6 +272,61 @@ void arm64_notify_die(const char *str, struct pt_regs *regs,
 	}
 }
 
+#ifdef CONFIG_COMPAT
+#define PSTATE_IT_1_0_SHIFT	25
+#define PSTATE_IT_1_0_MASK	(0x3 << PSTATE_IT_1_0_SHIFT)
+#define PSTATE_IT_7_2_SHIFT	10
+#define PSTATE_IT_7_2_MASK	(0x3f << PSTATE_IT_7_2_SHIFT)
+
+static u32 compat_get_it_state(struct pt_regs *regs)
+{
+	u32 it, pstate = regs->pstate;
+
+	it  = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT;
+	it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2;
+
+	return it;
+}
+
+static void compat_set_it_state(struct pt_regs *regs, u32 it)
+{
+	u32 pstate_it;
+
+	pstate_it  = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK;
+	pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK;
+
+	regs->pstate &= ~PSR_AA32_IT_MASK;
+	regs->pstate |= pstate_it;
+}
+
+static void advance_itstate(struct pt_regs *regs)
+{
+	u32 it;
+
+	/* ARM mode */
+	if (!(regs->pstate & PSR_AA32_T_BIT) ||
+	    !(regs->pstate & PSR_AA32_IT_MASK))
+		return;
+
+	it  = compat_get_it_state(regs);
+
+	/*
+	 * If this is the last instruction of the block, wipe the IT
+	 * state. Otherwise advance it.
+	 */
+	if (!(it & 7))
+		it = 0;
+	else
+		it = (it & 0xe0) | ((it << 1) & 0x1f);
+
+	compat_set_it_state(regs, it);
+}
+#else
+static void advance_itstate(struct pt_regs *regs)
+{
+}
+#endif
+
 void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size)
 {
 	regs->pc += size;
@@ -282,6 +337,11 @@ void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size)
 	 */
 	if (user_mode(regs))
 		user_fastforward_single_step(current);
+
+	if (compat_user_mode(regs))
+		advance_itstate(regs);
+	else
+		regs->pstate &= ~PSR_BTYPE_MASK;
 }
 
 static LIST_HEAD(undef_hook);
@@ -411,6 +471,13 @@ void do_undefinstr(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(do_undefinstr);
 
+void do_bti(struct pt_regs *regs)
+{
+	BUG_ON(!user_mode(regs));
+	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+}
+NOKPROBE_SYMBOL(do_bti);
+
 #define __user_cache_maint(insn, address, res)			\
 	if (address >= user_addr_max()) {			\
 		res = -EFAULT;					\
@@ -566,34 +633,7 @@ static const struct sys64_hook sys64_hooks[] = {
 	{},
 };
 
-
 #ifdef CONFIG_COMPAT
-#define PSTATE_IT_1_0_SHIFT	25
-#define PSTATE_IT_1_0_MASK	(0x3 << PSTATE_IT_1_0_SHIFT)
-#define PSTATE_IT_7_2_SHIFT	10
-#define PSTATE_IT_7_2_MASK	(0x3f << PSTATE_IT_7_2_SHIFT)
-
-static u32 compat_get_it_state(struct pt_regs *regs)
-{
-	u32 it, pstate = regs->pstate;
-
-	it  = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT;
-	it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2;
-
-	return it;
-}
-
-static void compat_set_it_state(struct pt_regs *regs, u32 it)
-{
-	u32 pstate_it;
-
-	pstate_it  = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK;
-	pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK;
-
-	regs->pstate &= ~PSR_AA32_IT_MASK;
-	regs->pstate |= pstate_it;
-}
-
 static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
 {
 	int cond;
@@ -614,42 +654,12 @@ static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
 	return aarch32_opcode_cond_checks[cond](regs->pstate);
 }
 
-static void advance_itstate(struct pt_regs *regs)
-{
-	u32 it;
-
-	/* ARM mode */
-	if (!(regs->pstate & PSR_AA32_T_BIT) ||
-	    !(regs->pstate & PSR_AA32_IT_MASK))
-		return;
-
-	it  = compat_get_it_state(regs);
-
-	/*
-	 * If this is the last instruction of the block, wipe the IT
-	 * state. Otherwise advance it.
-	 */
-	if (!(it & 7))
-		it = 0;
-	else
-		it = (it & 0xe0) | ((it << 1) & 0x1f);
-
-	compat_set_it_state(regs, it);
-}
-
-static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs,
-						   unsigned int sz)
-{
-	advance_itstate(regs);
-	arm64_skip_faulting_instruction(regs, sz);
-}
-
 static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
 {
 	int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT;
 
 	pt_regs_write_reg(regs, reg, arch_timer_get_rate());
-	arm64_compat_skip_faulting_instruction(regs, 4);
+	arm64_skip_faulting_instruction(regs, 4);
 }
 
 static const struct sys64_hook cp15_32_hooks[] = {
@@ -669,7 +679,7 @@ static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
 
 	pt_regs_write_reg(regs, rt, lower_32_bits(val));
 	pt_regs_write_reg(regs, rt2, upper_32_bits(val));
-	arm64_compat_skip_faulting_instruction(regs, 4);
+	arm64_skip_faulting_instruction(regs, 4);
 }
 
 static const struct sys64_hook cp15_64_hooks[] = {
@@ -690,7 +700,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs)
 		 * There is no T16 variant of a CP access, so we
 		 * always advance PC by 4 bytes.
 		 */
-		arm64_compat_skip_faulting_instruction(regs, 4);
+		arm64_skip_faulting_instruction(regs, 4);
 		return;
 	}
 
@@ -753,6 +763,7 @@ static const char *esr_class_str[] = {
 	[ESR_ELx_EC_CP10_ID]		= "CP10 MRC/VMRS",
 	[ESR_ELx_EC_PAC]		= "PAC",
 	[ESR_ELx_EC_CP14_64]		= "CP14 MCRR/MRRC",
+	[ESR_ELx_EC_BTI]		= "BTI",
 	[ESR_ELx_EC_ILL]		= "PSTATE.IL",
 	[ESR_ELx_EC_SVC32]		= "SVC (AArch32)",
 	[ESR_ELx_EC_HVC32]		= "HVC (AArch32)",
@@ -906,17 +917,13 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
 
 asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
 {
-	const bool was_in_nmi = in_nmi();
-
-	if (!was_in_nmi)
-		nmi_enter();
+	nmi_enter();
 
 	/* non-RAS errors are not containable */
 	if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
 		arm64_serror_panic(regs, esr);
 
-	if (!was_in_nmi)
-		nmi_exit();
+	nmi_exit();
 }
 
 asmlinkage void enter_from_user_mode(void)
@@ -1047,11 +1054,11 @@ int __init early_brk64(unsigned long addr, unsigned int esr,
 	return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
 }
 
-/* This registration must happen early, before debug_traps_init(). */
 void __init trap_init(void)
 {
 	register_kernel_break_hook(&bug_break_hook);
 #ifdef CONFIG_KASAN_SW_TAGS
 	register_kernel_break_hook(&kasan_break_hook);
 #endif
+	debug_traps_init();
 }
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 033a48f30dbb..d51a898fd60f 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -33,20 +33,14 @@ extern char vdso_start[], vdso_end[];
 extern char vdso32_start[], vdso32_end[];
 #endif /* CONFIG_COMPAT_VDSO */
 
-/* vdso_lookup arch_index */
-enum arch_vdso_type {
-	ARM64_VDSO = 0,
+enum vdso_abi {
+	VDSO_ABI_AA64,
 #ifdef CONFIG_COMPAT_VDSO
-	ARM64_VDSO32 = 1,
+	VDSO_ABI_AA32,
 #endif /* CONFIG_COMPAT_VDSO */
 };
-#ifdef CONFIG_COMPAT_VDSO
-#define VDSO_TYPES		(ARM64_VDSO32 + 1)
-#else
-#define VDSO_TYPES		(ARM64_VDSO + 1)
-#endif /* CONFIG_COMPAT_VDSO */
 
-struct __vdso_abi {
+struct vdso_abi_info {
 	const char *name;
 	const char *vdso_code_start;
 	const char *vdso_code_end;
@@ -57,14 +51,14 @@ struct __vdso_abi {
 	struct vm_special_mapping *cm;
 };
 
-static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = {
-	{
+static struct vdso_abi_info vdso_info[] __ro_after_init = {
+	[VDSO_ABI_AA64] = {
 		.name = "vdso",
 		.vdso_code_start = vdso_start,
 		.vdso_code_end = vdso_end,
 	},
 #ifdef CONFIG_COMPAT_VDSO
-	{
+	[VDSO_ABI_AA32] = {
 		.name = "vdso32",
 		.vdso_code_start = vdso32_start,
 		.vdso_code_end = vdso32_end,
@@ -81,13 +75,13 @@ static union {
 } vdso_data_store __page_aligned_data;
 struct vdso_data *vdso_data = vdso_data_store.data;
 
-static int __vdso_remap(enum arch_vdso_type arch_index,
+static int __vdso_remap(enum vdso_abi abi,
 			const struct vm_special_mapping *sm,
 			struct vm_area_struct *new_vma)
 {
 	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
-	unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end -
-				  vdso_lookup[arch_index].vdso_code_start;
+	unsigned long vdso_size = vdso_info[abi].vdso_code_end -
+				  vdso_info[abi].vdso_code_start;
 
 	if (vdso_size != new_size)
 		return -EINVAL;
@@ -97,24 +91,24 @@ static int __vdso_remap(enum arch_vdso_type arch_index,
 	return 0;
 }
 
-static int __vdso_init(enum arch_vdso_type arch_index)
+static int __vdso_init(enum vdso_abi abi)
 {
 	int i;
 	struct page **vdso_pagelist;
 	unsigned long pfn;
 
-	if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) {
+	if (memcmp(vdso_info[abi].vdso_code_start, "\177ELF", 4)) {
 		pr_err("vDSO is not a valid ELF object!\n");
 		return -EINVAL;
 	}
 
-	vdso_lookup[arch_index].vdso_pages = (
-			vdso_lookup[arch_index].vdso_code_end -
-			vdso_lookup[arch_index].vdso_code_start) >>
+	vdso_info[abi].vdso_pages = (
+			vdso_info[abi].vdso_code_end -
+			vdso_info[abi].vdso_code_start) >>
 			PAGE_SHIFT;
 
 	/* Allocate the vDSO pagelist, plus a page for the data. */
-	vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1,
+	vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages + 1,
 				sizeof(struct page *),
 				GFP_KERNEL);
 	if (vdso_pagelist == NULL)
@@ -125,26 +119,27 @@ static int __vdso_init(enum arch_vdso_type arch_index)
 
 
 	/* Grab the vDSO code pages. */
-	pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start);
+	pfn = sym_to_pfn(vdso_info[abi].vdso_code_start);
 
-	for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++)
+	for (i = 0; i < vdso_info[abi].vdso_pages; i++)
 		vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
 
-	vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0];
-	vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1];
+	vdso_info[abi].dm->pages = &vdso_pagelist[0];
+	vdso_info[abi].cm->pages = &vdso_pagelist[1];
 
 	return 0;
 }
 
-static int __setup_additional_pages(enum arch_vdso_type arch_index,
+static int __setup_additional_pages(enum vdso_abi abi,
 				    struct mm_struct *mm,
 				    struct linux_binprm *bprm,
 				    int uses_interp)
 {
 	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+	unsigned long gp_flags = 0;
 	void *ret;
 
-	vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT;
+	vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT;
 	/* Be sure to map the data page */
 	vdso_mapping_len = vdso_text_len + PAGE_SIZE;
 
@@ -156,16 +151,19 @@ static int __setup_additional_pages(enum arch_vdso_type arch_index,
 
 	ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
 				       VM_READ|VM_MAYREAD,
-				       vdso_lookup[arch_index].dm);
+				       vdso_info[abi].dm);
 	if (IS_ERR(ret))
 		goto up_fail;
 
+	if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
+		gp_flags = VM_ARM64_BTI;
+
 	vdso_base += PAGE_SIZE;
 	mm->context.vdso = (void *)vdso_base;
 	ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
-				       VM_READ|VM_EXEC|
+				       VM_READ|VM_EXEC|gp_flags|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       vdso_lookup[arch_index].cm);
+				       vdso_info[abi].cm);
 	if (IS_ERR(ret))
 		goto up_fail;
 
@@ -184,46 +182,42 @@ up_fail:
 static int aarch32_vdso_mremap(const struct vm_special_mapping *sm,
 		struct vm_area_struct *new_vma)
 {
-	return __vdso_remap(ARM64_VDSO32, sm, new_vma);
+	return __vdso_remap(VDSO_ABI_AA32, sm, new_vma);
 }
 #endif /* CONFIG_COMPAT_VDSO */
 
-/*
- * aarch32_vdso_pages:
- * 0 - kuser helpers
- * 1 - sigreturn code
- * or (CONFIG_COMPAT_VDSO):
- * 0 - kuser helpers
- * 1 - vdso data
- * 2 - vdso code
- */
-#define C_VECTORS	0
+enum aarch32_map {
+	AA32_MAP_VECTORS, /* kuser helpers */
 #ifdef CONFIG_COMPAT_VDSO
-#define C_VVAR		1
-#define C_VDSO		2
-#define C_PAGES		(C_VDSO + 1)
+	AA32_MAP_VVAR,
+	AA32_MAP_VDSO,
 #else
-#define C_SIGPAGE	1
-#define C_PAGES		(C_SIGPAGE + 1)
-#endif /* CONFIG_COMPAT_VDSO */
-static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init;
-static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
-	{
+	AA32_MAP_SIGPAGE
+#endif
+};
+
+static struct page *aarch32_vectors_page __ro_after_init;
+#ifndef CONFIG_COMPAT_VDSO
+static struct page *aarch32_sig_page __ro_after_init;
+#endif
+
+static struct vm_special_mapping aarch32_vdso_maps[] = {
+	[AA32_MAP_VECTORS] = {
 		.name	= "[vectors]", /* ABI */
-		.pages	= &aarch32_vdso_pages[C_VECTORS],
+		.pages	= &aarch32_vectors_page,
 	},
 #ifdef CONFIG_COMPAT_VDSO
-	{
+	[AA32_MAP_VVAR] = {
 		.name = "[vvar]",
 	},
-	{
+	[AA32_MAP_VDSO] = {
 		.name = "[vdso]",
 		.mremap = aarch32_vdso_mremap,
 	},
 #else
-	{
+	[AA32_MAP_SIGPAGE] = {
 		.name	= "[sigpage]", /* ABI */
-		.pages	= &aarch32_vdso_pages[C_SIGPAGE],
+		.pages	= &aarch32_sig_page,
 	},
 #endif /* CONFIG_COMPAT_VDSO */
 };
@@ -243,8 +237,8 @@ static int aarch32_alloc_kuser_vdso_page(void)
 
 	memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
 	       kuser_sz);
-	aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_page);
-	flush_dcache_page(aarch32_vdso_pages[C_VECTORS]);
+	aarch32_vectors_page = virt_to_page(vdso_page);
+	flush_dcache_page(aarch32_vectors_page);
 	return 0;
 }
 
@@ -253,10 +247,10 @@ static int __aarch32_alloc_vdso_pages(void)
 {
 	int ret;
 
-	vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR];
-	vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO];
+	vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR];
+	vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO];
 
-	ret = __vdso_init(ARM64_VDSO32);
+	ret = __vdso_init(VDSO_ABI_AA32);
 	if (ret)
 		return ret;
 
@@ -275,8 +269,8 @@ static int __aarch32_alloc_vdso_pages(void)
 		return -ENOMEM;
 
 	memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz);
-	aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(sigpage);
-	flush_dcache_page(aarch32_vdso_pages[C_SIGPAGE]);
+	aarch32_sig_page = virt_to_page(sigpage);
+	flush_dcache_page(aarch32_sig_page);
 
 	ret = aarch32_alloc_kuser_vdso_page();
 	if (ret)
@@ -306,7 +300,7 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
 	ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE,
 				       VM_READ | VM_EXEC |
 				       VM_MAYREAD | VM_MAYEXEC,
-				       &aarch32_vdso_spec[C_VECTORS]);
+				       &aarch32_vdso_maps[AA32_MAP_VECTORS]);
 
 	return PTR_ERR_OR_ZERO(ret);
 }
@@ -330,7 +324,7 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm)
 	ret = _install_special_mapping(mm, addr, PAGE_SIZE,
 				       VM_READ | VM_EXEC | VM_MAYREAD |
 				       VM_MAYWRITE | VM_MAYEXEC,
-				       &aarch32_vdso_spec[C_SIGPAGE]);
+				       &aarch32_vdso_maps[AA32_MAP_SIGPAGE]);
 	if (IS_ERR(ret))
 		goto out;
 
@@ -354,7 +348,7 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		goto out;
 
 #ifdef CONFIG_COMPAT_VDSO
-	ret = __setup_additional_pages(ARM64_VDSO32,
+	ret = __setup_additional_pages(VDSO_ABI_AA32,
 				       mm,
 				       bprm,
 				       uses_interp);
@@ -371,22 +365,19 @@ out:
 static int vdso_mremap(const struct vm_special_mapping *sm,
 		struct vm_area_struct *new_vma)
 {
-	return __vdso_remap(ARM64_VDSO, sm, new_vma);
+	return __vdso_remap(VDSO_ABI_AA64, sm, new_vma);
 }
 
-/*
- * aarch64_vdso_pages:
- * 0 - vvar
- * 1 - vdso
- */
-#define A_VVAR		0
-#define A_VDSO		1
-#define A_PAGES		(A_VDSO + 1)
-static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = {
-	{
+enum aarch64_map {
+	AA64_MAP_VVAR,
+	AA64_MAP_VDSO,
+};
+
+static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
+	[AA64_MAP_VVAR] = {
 		.name	= "[vvar]",
 	},
-	{
+	[AA64_MAP_VDSO] = {
 		.name	= "[vdso]",
 		.mremap = vdso_mremap,
 	},
@@ -394,10 +385,10 @@ static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = {
 
 static int __init vdso_init(void)
 {
-	vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR];
-	vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO];
+	vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR];
+	vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO];
 
-	return __vdso_init(ARM64_VDSO);
+	return __vdso_init(VDSO_ABI_AA64);
 }
 arch_initcall(vdso_init);
 
@@ -410,7 +401,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 	if (down_write_killable(&mm->mmap_sem))
 		return -EINTR;
 
-	ret = __setup_additional_pages(ARM64_VDSO,
+	ret = __setup_additional_pages(VDSO_ABI_AA64,
 				       mm,
 				       bprm,
 				       uses_interp);
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 3862cad2410c..556d424c6f52 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -17,15 +17,19 @@ obj-vdso := vgettimeofday.o note.o sigreturn.o
 targets := $(obj-vdso) vdso.so vdso.so.dbg
 obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
 
+btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti
+
+# -Bsymbolic has been added for consistency with arm, the compat vDSO and
+# potential future proofing if we end up with internal calls to the exported
+# routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so
+# preparation in build-time C")).
 ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
-		--build-id -n -T
+		-Bsymbolic --eh-frame-hdr --build-id -n $(btildflags-y) -T
 
 ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
 ccflags-y += -DDISABLE_BRANCH_PROFILING
 
-VDSO_LDFLAGS := -Bsymbolic
-
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
 KBUILD_CFLAGS			+= $(DISABLE_LTO)
 KASAN_SANITIZE			:= n
 UBSAN_SANITIZE			:= n
diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S
index 0ce6ec75a525..3d4e82290c80 100644
--- a/arch/arm64/kernel/vdso/note.S
+++ b/arch/arm64/kernel/vdso/note.S
@@ -12,9 +12,12 @@
 #include <linux/version.h>
 #include <linux/elfnote.h>
 #include <linux/build-salt.h>
+#include <asm/assembler.h>
 
 ELFNOTE_START(Linux, 0, "a")
 	.long LINUX_VERSION_CODE
 ELFNOTE_END
 
 BUILD_SALT
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S
index 12324863d5c2..620a3ef837b7 100644
--- a/arch/arm64/kernel/vdso/sigreturn.S
+++ b/arch/arm64/kernel/vdso/sigreturn.S
@@ -1,7 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Sigreturn trampoline for returning from a signal when the SA_RESTORER
- * flag is not set.
+ * flag is not set. It serves primarily as a hall of shame for crappy
+ * unwinders and features an exciting but mysterious NOP instruction.
+ *
+ * It's also fragile as hell, so please think twice before changing anything
+ * in here.
  *
  * Copyright (C) 2012 ARM Limited
  *
@@ -9,18 +13,54 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/unistd.h>
 
 	.text
 
-	nop
-SYM_FUNC_START(__kernel_rt_sigreturn)
+/* Ensure that the mysterious NOP can be associated with a function. */
 	.cfi_startproc
+
+/*
+ * .cfi_signal_frame causes the corresponding Frame Description Entry in the
+ * .eh_frame section to be annotated as a signal frame. This allows DWARF
+ * unwinders (e.g. libstdc++) to implement _Unwind_GetIPInfo(), which permits
+ * unwinding out of the signal trampoline without the need for the mysterious
+ * NOP.
+ */
 	.cfi_signal_frame
-	.cfi_def_cfa	x29, 0
-	.cfi_offset	x29, 0 * 8
-	.cfi_offset	x30, 1 * 8
+
+/*
+ * Tell the unwinder where to locate the frame record linking back to the
+ * interrupted context. We don't provide unwind info for registers other
+ * than the frame pointer and the link register here; in practice, this
+ * is sufficient for unwinding in C/C++ based runtimes and the values in
+ * the sigcontext may have been modified by this point anyway. Debuggers
+ * already have baked-in strategies for attempting to unwind out of signals.
+ */
+	.cfi_def_cfa    x29, 0
+	.cfi_offset     x29, 0 * 8
+	.cfi_offset     x30, 1 * 8
+
+/*
+ * This mysterious NOP is required for some unwinders (e.g. libc++) that
+ * unconditionally subtract one from the result of _Unwind_GetIP() in order to
+ * identify the calling function.
+ * Hack borrowed from arch/powerpc/kernel/vdso64/sigtramp.S.
+ */
+	nop	// Mysterious NOP
+
+/*
+ * GDB relies on being able to identify the sigreturn instruction sequence to
+ * unwind from signal handlers. We cannot, therefore, use SYM_FUNC_START()
+ * here, as it will emit a BTI C instruction and break the unwinder. Thankfully,
+ * this function is only ever called from a RET and so omitting the landing pad
+ * is perfectly fine.
+ */
+SYM_CODE_START(__kernel_rt_sigreturn)
 	mov	x8, #__NR_rt_sigreturn
 	svc	#0
 	.cfi_endproc
-SYM_FUNC_END(__kernel_rt_sigreturn)
+SYM_CODE_END(__kernel_rt_sigreturn)
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S
index d1414fee5274..c4b1990bf2be 100644
--- a/arch/arm64/kernel/vdso/vdso.S
+++ b/arch/arm64/kernel/vdso/vdso.S
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/const.h>
+#include <asm/assembler.h>
 #include <asm/page.h>
 
 	.globl vdso_start, vdso_end
@@ -19,3 +20,5 @@ vdso_start:
 vdso_end:
 
 	.previous
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S
index 620524969696..b0091064c3d6 100644
--- a/arch/arm64/kernel/vdso32/sigreturn.S
+++ b/arch/arm64/kernel/vdso32/sigreturn.S
@@ -3,6 +3,9 @@
  * This file provides both A32 and T32 versions, in accordance with the
  * arm sigreturn code.
  *
+ * Please read the comments in arch/arm64/kernel/vdso/sigreturn.S to
+ * understand some of the craziness in here.
+ *
  * Copyright (C) 2018 ARM Limited
  */
 
@@ -17,39 +20,39 @@
 	.save {r0-r15}
 	.pad #COMPAT_SIGFRAME_REGS_OFFSET
 	nop
-SYM_FUNC_START(__kernel_sigreturn_arm)
+SYM_CODE_START(__kernel_sigreturn_arm)
 	mov r7, #__NR_compat_sigreturn
 	svc #0
 	.fnend
-SYM_FUNC_END(__kernel_sigreturn_arm)
+SYM_CODE_END(__kernel_sigreturn_arm)
 
 	.fnstart
 	.save {r0-r15}
 	.pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
 	nop
-SYM_FUNC_START(__kernel_rt_sigreturn_arm)
+SYM_CODE_START(__kernel_rt_sigreturn_arm)
 	mov r7, #__NR_compat_rt_sigreturn
 	svc #0
 	.fnend
-SYM_FUNC_END(__kernel_rt_sigreturn_arm)
+SYM_CODE_END(__kernel_rt_sigreturn_arm)
 
 	.thumb
 	.fnstart
 	.save {r0-r15}
 	.pad #COMPAT_SIGFRAME_REGS_OFFSET
 	nop
-SYM_FUNC_START(__kernel_sigreturn_thumb)
+SYM_CODE_START(__kernel_sigreturn_thumb)
 	mov r7, #__NR_compat_sigreturn
 	svc #0
 	.fnend
-SYM_FUNC_END(__kernel_sigreturn_thumb)
+SYM_CODE_END(__kernel_sigreturn_thumb)
 
 	.fnstart
 	.save {r0-r15}
 	.pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
 	nop
-SYM_FUNC_START(__kernel_rt_sigreturn_thumb)
+SYM_CODE_START(__kernel_rt_sigreturn_thumb)
 	mov r7, #__NR_compat_rt_sigreturn
 	svc #0
 	.fnend
-SYM_FUNC_END(__kernel_rt_sigreturn_thumb)
+SYM_CODE_END(__kernel_rt_sigreturn_thumb)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 497f9675071d..3be632177631 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -17,10 +17,6 @@
 
 #include "image.h"
 
-/* .exit.text needed in case of alternative patching */
-#define ARM_EXIT_KEEP(x)	x
-#define ARM_EXIT_DISCARD(x)
-
 OUTPUT_ARCH(aarch64)
 ENTRY(_text)
 
@@ -72,8 +68,8 @@ jiffies = jiffies_64;
 
 /*
  * The size of the PE/COFF section that covers the kernel image, which
- * runs from stext to _edata, must be a round multiple of the PE/COFF
- * FileAlignment, which we set to its minimum value of 0x200. 'stext'
+ * runs from _stext to _edata, must be a round multiple of the PE/COFF
+ * FileAlignment, which we set to its minimum value of 0x200. '_stext'
  * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned
  * boundary should be sufficient.
  */
@@ -95,8 +91,6 @@ SECTIONS
 	 * order of matching.
 	 */
 	/DISCARD/ : {
-		ARM_EXIT_DISCARD(EXIT_TEXT)
-		ARM_EXIT_DISCARD(EXIT_DATA)
 		EXIT_CALL
 		*(.discard)
 		*(.discard.*)
@@ -139,6 +133,7 @@ SECTIONS
 
 	idmap_pg_dir = .;
 	. += IDMAP_DIR_SIZE;
+	idmap_pg_end = .;
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	tramp_pg_dir = .;
@@ -161,7 +156,7 @@ SECTIONS
 
 	__exittext_begin = .;
 	.exit.text : {
-		ARM_EXIT_KEEP(EXIT_TEXT)
+		EXIT_TEXT
 	}
 	__exittext_end = .;
 
@@ -175,7 +170,7 @@ SECTIONS
 		*(.altinstr_replacement)
 	}
 
-	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(SEGMENT_ALIGN);
 	__inittext_end = .;
 	__initdata_begin = .;
 
@@ -188,7 +183,7 @@ SECTIONS
 		*(.init.rodata.* .init.bss)	/* from the EFI stub */
 	}
 	.exit.data : {
-		ARM_EXIT_KEEP(EXIT_DATA)
+		EXIT_DATA
 	}
 
 	PERCPU_SECTION(L1_CACHE_BYTES)
@@ -246,6 +241,7 @@ SECTIONS
 	. += INIT_DIR_SIZE;
 	init_pg_end = .;
 
+	. = ALIGN(SEGMENT_ALIGN);
 	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
 	_end = .;
 
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 449386d76441..f1c1f981482c 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -3,7 +3,6 @@
 # KVM configuration
 #
 
-source "virt/kvm/Kconfig"
 source "virt/lib/Kconfig"
 
 menuconfig VIRTUALIZATION
@@ -18,7 +17,7 @@ menuconfig VIRTUALIZATION
 
 if VIRTUALIZATION
 
-config KVM
+menuconfig KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	depends on OF
 	# for TASKSTATS/TASK_DELAY_ACCT:
@@ -28,13 +27,11 @@ config KVM
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
-	select KVM_ARM_HOST
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
 	select SRCU
 	select KVM_VFIO
 	select HAVE_KVM_EVENTFD
 	select HAVE_KVM_IRQFD
-	select KVM_ARM_PMU if HW_PERF_EVENTS
 	select HAVE_KVM_MSI
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQ_ROUTING
@@ -45,23 +42,24 @@ config KVM
 	select TASK_DELAY_ACCT
 	---help---
 	  Support hosting virtualized guest machines.
-	  We don't support KVM with 16K page tables yet, due to the multiple
-	  levels of fake page tables.
 
 	  If unsure, say N.
 
-config KVM_ARM_HOST
-	bool
-	---help---
-	  Provides host support for ARM processors.
+if KVM
+
+source "virt/kvm/Kconfig"
 
 config KVM_ARM_PMU
-	bool
+	bool "Virtual Performance Monitoring Unit (PMU) support"
+	depends on HW_PERF_EVENTS
+	default y
 	---help---
 	  Adds support for a virtual Performance Monitoring Unit (PMU) in
 	  virtual machines.
 
 config KVM_INDIRECT_VECTORS
-       def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS)
+	def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS
+
+endif # KVM
 
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 5ffbdc39e780..8d3d9513cbfe 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -3,37 +3,25 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic
+ccflags-y += -I $(srctree)/$(src)
 
 KVM=../../../virt/kvm
 
-obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
-obj-$(CONFIG_KVM_ARM_HOST) += hyp/
+obj-$(CONFIG_KVM) += kvm.o
+obj-$(CONFIG_KVM) += hyp/
 
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o
+kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
+	 $(KVM)/vfio.o $(KVM)/irqchip.o \
+	 arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
+	 inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \
+	 guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \
+	 vgic-sys-reg-v3.o fpsimd.o pmu.o \
+	 aarch32.o arch_timer.o \
+	 vgic/vgic.o vgic/vgic-init.o \
+	 vgic/vgic-irqfd.o vgic/vgic-v2.o \
+	 vgic/vgic-v3.o vgic/vgic-v4.o \
+	 vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
+	 vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
+	 vgic/vgic-its.o vgic/vgic-debug.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
-kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
-kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
-kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
-
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
-kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
+kvm-$(CONFIG_KVM_ARM_PMU)  += pmu-emul.o
diff --git a/virt/kvm/arm/aarch32.c b/arch/arm64/kvm/aarch32.c
index 0a356aa91aa1..0a356aa91aa1 100644
--- a/virt/kvm/arm/aarch32.c
+++ b/arch/arm64/kvm/aarch32.c
diff --git a/virt/kvm/arm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 93bd59b46848..a1fe0ea3254e 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -451,17 +451,7 @@ out:
 
 static void set_cntvoff(u64 cntvoff)
 {
-	u32 low = lower_32_bits(cntvoff);
-	u32 high = upper_32_bits(cntvoff);
-
-	/*
-	 * Since kvm_call_hyp doesn't fully support the ARM PCS especially on
-	 * 32-bit systems, but rather passes register by register shifted one
-	 * place (we put the function address in r0/x0), we cannot simply pass
-	 * a 64-bit value as an argument, but have to split the value in two
-	 * 32-bit halves.
-	 */
-	kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
+	kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
 }
 
 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
@@ -571,6 +561,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct timer_map map;
+	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
 
 	if (unlikely(!timer->enabled))
 		return;
@@ -593,7 +584,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	if (map.emul_ptimer)
 		soft_timer_cancel(&map.emul_ptimer->hrtimer);
 
-	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
+	if (rcuwait_active(wait))
 		kvm_timer_blocking(vcpu);
 
 	/*
diff --git a/virt/kvm/arm/arm.c b/arch/arm64/kvm/arm.c
index 48d0ec44ad77..7a57381c05e8 100644
--- a/virt/kvm/arm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -22,7 +22,7 @@
 #include <trace/events/kvm.h>
 
 #define CREATE_TRACE_POINTS
-#include "trace.h"
+#include "trace_arm.h"
 
 #include <linux/uaccess.h>
 #include <asm/ptrace.h>
@@ -95,6 +95,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 	return r;
 }
 
+static int kvm_arm_default_max_vcpus(void)
+{
+	return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
+}
+
 /**
  * kvm_arch_init_vm - initializes a VM data structure
  * @kvm:	pointer to the KVM struct
@@ -128,8 +133,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.vmid.vmid_gen = 0;
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
-	kvm->arch.max_vcpus = vgic_present ?
-				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
+	kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
 
 	return ret;
 out_free_stage2_pgd:
@@ -204,10 +208,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = num_online_cpus();
 		break;
 	case KVM_CAP_MAX_VCPUS:
-		r = KVM_MAX_VCPUS;
-		break;
 	case KVM_CAP_MAX_VCPU_ID:
-		r = KVM_MAX_VCPU_ID;
+		if (kvm)
+			r = kvm->arch.max_vcpus;
+		else
+			r = kvm_arm_default_max_vcpus();
 		break;
 	case KVM_CAP_MSI_DEVID:
 		if (!kvm)
@@ -455,9 +460,9 @@ void force_vm_exit(const cpumask_t *mask)
  *
  * The hardware supports a limited set of values with the value zero reserved
  * for the host, so we check if an assigned value belongs to a previous
- * generation, which which requires us to assign a new value. If we're the
- * first to use a VMID for the new generation, we must flush necessary caches
- * and TLBs on all CPUs.
+ * generation, which requires us to assign a new value. If we're the first to
+ * use a VMID for the new generation, we must flush necessary caches and TLBs
+ * on all CPUs.
  */
 static bool need_new_vmid_gen(struct kvm_vmid *vmid)
 {
@@ -579,16 +584,17 @@ void kvm_arm_resume_guest(struct kvm *kvm)
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		vcpu->arch.pause = false;
-		swake_up_one(kvm_arch_vcpu_wq(vcpu));
+		rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
 	}
 }
 
 static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 {
-	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
+	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
 
-	swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
-				       (!vcpu->arch.pause)));
+	rcuwait_wait_event(wait,
+			   (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
+			   TASK_INTERRUPTIBLE);
 
 	if (vcpu->arch.power_off || vcpu->arch.pause) {
 		/* Awaken to handle a signal, request we sleep again later. */
@@ -639,7 +645,6 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
 /**
  * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
  * @vcpu:	The VCPU pointer
- * @run:	The kvm_run structure pointer used for userspace state exchange
  *
  * This function is called through the VCPU_RUN ioctl called from user space. It
  * will execute VM code in a loop until the time slice for the process is used
@@ -647,8 +652,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
  * return with return value 0 and with the kvm_run structure filled in with the
  * required data for the requested emulation.
  */
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
+	struct kvm_run *run = vcpu->run;
 	int ret;
 
 	if (unlikely(!kvm_vcpu_initialized(vcpu)))
@@ -659,7 +665,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		return ret;
 
 	if (run->exit_reason == KVM_EXIT_MMIO) {
-		ret = kvm_handle_mmio_return(vcpu, vcpu->run);
+		ret = kvm_handle_mmio_return(vcpu, run);
 		if (ret)
 			return ret;
 	}
@@ -983,8 +989,11 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	/*
 	 * Ensure a rebooted VM will fault in RAM pages and detect if the
 	 * guest MMU is turned off and flush the caches as needed.
+	 *
+	 * S2FWB enforces all memory accesses to RAM being cacheable, we
+	 * ensure that the cache is always coherent.
 	 */
-	if (vcpu->arch.has_run_once)
+	if (vcpu->arch.has_run_once && !cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
 		stage2_unmap_vm(vcpu->kvm);
 
 	vcpu_reset_hcr(vcpu);
@@ -1265,19 +1274,41 @@ static void cpu_init_hyp_mode(void)
 {
 	phys_addr_t pgd_ptr;
 	unsigned long hyp_stack_ptr;
-	unsigned long stack_page;
 	unsigned long vector_ptr;
+	unsigned long tpidr_el2;
 
 	/* Switch from the HYP stub to our own HYP init vector */
 	__hyp_set_vectors(kvm_get_idmap_vector());
 
+	/*
+	 * Calculate the raw per-cpu offset without a translation from the
+	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
+	 * so that we can use adr_l to access per-cpu variables in EL2.
+	 */
+	tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) -
+		     (unsigned long)kvm_ksym_ref(kvm_host_data));
+
 	pgd_ptr = kvm_mmu_get_httbr();
-	stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
-	hyp_stack_ptr = stack_page + PAGE_SIZE;
+	hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE;
 	vector_ptr = (unsigned long)kvm_get_hyp_vector();
 
-	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
-	__cpu_init_stage2();
+	/*
+	 * Call initialization code, and switch to the full blown HYP code.
+	 * If the cpucaps haven't been finalized yet, something has gone very
+	 * wrong, and hyp will crash and burn when it uses any
+	 * cpus_have_const_cap() wrapper.
+	 */
+	BUG_ON(!system_capabilities_finalized());
+	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
+
+	/*
+	 * Disabling SSBD on a non-VHE system requires us to enable SSBS
+	 * at EL2.
+	 */
+	if (this_cpu_has_cap(ARM64_SSBS) &&
+	    arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
+		kvm_call_hyp(__kvm_enable_ssbs);
+	}
 }
 
 static void cpu_hyp_reset(void)
@@ -1387,9 +1418,7 @@ static inline void hyp_cpu_pm_exit(void)
 
 static int init_common_resources(void)
 {
-	kvm_set_ipa_limit();
-
-	return 0;
+	return kvm_set_ipa_limit();
 }
 
 static int init_subsystems(void)
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 50a279d3ddd7..aea43ec60f37 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -29,20 +29,19 @@
 
 #include "trace.h"
 
-#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
-#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
-
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-	VCPU_STAT(halt_successful_poll),
-	VCPU_STAT(halt_attempted_poll),
-	VCPU_STAT(halt_poll_invalid),
-	VCPU_STAT(halt_wakeup),
-	VCPU_STAT(hvc_exit_stat),
-	VCPU_STAT(wfe_exit_stat),
-	VCPU_STAT(wfi_exit_stat),
-	VCPU_STAT(mmio_exit_user),
-	VCPU_STAT(mmio_exit_kernel),
-	VCPU_STAT(exits),
+	VCPU_STAT("halt_successful_poll", halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("hvc_exit_stat", hvc_exit_stat),
+	VCPU_STAT("wfe_exit_stat", wfe_exit_stat),
+	VCPU_STAT("wfi_exit_stat", wfi_exit_stat),
+	VCPU_STAT("mmio_exit_user", mmio_exit_user),
+	VCPU_STAT("mmio_exit_kernel", mmio_exit_kernel),
+	VCPU_STAT("exits", exits),
+	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
 	{ NULL }
 };
 
@@ -267,7 +266,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	/*
 	 * Vector lengths supported by the host can't currently be
 	 * hidden from the guest individually: instead we can only set a
-	 * maxmium via ZCR_EL2.LEN.  So, make sure the available vector
+	 * maximum via ZCR_EL2.LEN.  So, make sure the available vector
 	 * lengths match the set requested exactly up to the requested
 	 * maximum:
 	 */
@@ -337,7 +336,7 @@ static int sve_reg_to_region(struct sve_state_reg_region *region,
 	unsigned int reg_num;
 
 	unsigned int reqoffset, reqlen; /* User-requested offset and length */
-	unsigned int maxlen; /* Maxmimum permitted length */
+	unsigned int maxlen; /* Maximum permitted length */
 
 	size_t sve_state_size;
 
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index aacfc55de44c..eb194696ef62 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -23,7 +23,7 @@
 #include <kvm/arm_hypercalls.h>
 
 #define CREATE_TRACE_POINTS
-#include "trace.h"
+#include "trace_handle_exit.h"
 
 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index ea710f674cb6..8c9880783839 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -6,20 +6,10 @@
 ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \
 		$(DISABLE_STACKLEAK_PLUGIN)
 
-KVM=../../../../virt/kvm
+obj-$(CONFIG_KVM) += hyp.o
 
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o
-
-obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
-obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += entry.o
-obj-$(CONFIG_KVM_ARM_HOST) += switch.o
-obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
-obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
-obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
+hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \
+	 debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o
 
 # KVM code is run at a different exception code with a different map, so
 # compiler instrumentation that inserts callbacks or checks into the code may
diff --git a/virt/kvm/arm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c
index 25c0e47d57cb..25c0e47d57cb 100644
--- a/virt/kvm/arm/hyp/aarch32.c
+++ b/arch/arm64/kvm/hyp/aarch32.c
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 8a1e81a400e0..676b6585e5ae 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -138,7 +138,7 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
 
 	write_sysreg(val, cptr_el2);
 
-	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
 		struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
 
 		isb();
@@ -181,7 +181,7 @@ static void deactivate_traps_vhe(void)
 	 * above before we can switch to the EL2/EL0 translation regime used by
 	 * the host.
 	 */
-	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE));
+	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
 
 	write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
 	write_sysreg(vectors, vbar_el1);
@@ -192,7 +192,7 @@ static void __hyp_text __deactivate_traps_nvhe(void)
 {
 	u64 mdcr_el2 = read_sysreg(mdcr_el2);
 
-	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
 		u64 val;
 
 		/*
@@ -270,8 +270,8 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
 static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
 {
 	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
-		__vgic_v3_save_state(vcpu);
-		__vgic_v3_deactivate_traps(vcpu);
+		__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
+		__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
 	}
 }
 
@@ -279,8 +279,8 @@ static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
 static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
 {
 	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
-		__vgic_v3_activate_traps(vcpu);
-		__vgic_v3_restore_state(vcpu);
+		__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
+		__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
 	}
 }
 
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 6d2df9fe0b5d..ea5d22fbdacf 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -107,7 +107,8 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 	write_sysreg(ctxt->sys_regs[MPIDR_EL1],		vmpidr_el2);
 	write_sysreg(ctxt->sys_regs[CSSELR_EL1],	csselr_el1);
 
-	if (!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+	if (has_vhe() ||
+	    !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
 		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
 		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
 	} else	if (!ctxt->__hyp_running_vcpu) {
@@ -138,7 +139,8 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 	write_sysreg(ctxt->sys_regs[PAR_EL1],		par_el1);
 	write_sysreg(ctxt->sys_regs[TPIDR_EL1],		tpidr_el1);
 
-	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) &&
+	if (!has_vhe() &&
+	    cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
 	    ctxt->__hyp_running_vcpu) {
 		/*
 		 * Must only be done for host registers, hence the context
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c
index ff76e6845fe4..fb5c0be33223 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/arch/arm64/kvm/hyp/timer-sr.c
@@ -10,9 +10,8 @@
 
 #include <asm/kvm_hyp.h>
 
-void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high)
+void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff)
 {
-	u64 cntvoff = (u64)cntvoff_high << 32 | cntvoff_low;
 	write_sysreg(cntvoff, cntvoff_el2);
 }
 
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index ceaddbe4279f..d063a576d511 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -23,7 +23,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
 
 	local_irq_save(cxt->flags);
 
-	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
+	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
 		/*
 		 * For CPUs that are affected by ARM errata 1165522 or 1530923,
 		 * we cannot trust stage-1 to be in a correct state at that
@@ -63,7 +63,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
 static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
 						  struct tlb_inv_context *cxt)
 {
-	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
 		u64 val;
 
 		/*
@@ -79,8 +79,9 @@ static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
 		isb();
 	}
 
+	/* __load_guest_stage2() includes an ISB for the workaround. */
 	__load_guest_stage2(kvm);
-	isb();
+	asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
 static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm,
@@ -103,7 +104,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 	isb();
 
-	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
+	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
 		/* Restore the registers to what they were */
 		write_sysreg_el1(cxt->tcr, SYS_TCR);
 		write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
@@ -117,7 +118,7 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
 {
 	write_sysreg(0, vttbr_el2);
 
-	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
 		/* Ensure write of the host VMID */
 		isb();
 		/* Restore the host's TCR_EL1 */
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index ccf1fde9836c..10ed539835c1 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -194,10 +194,9 @@ static u32 __hyp_text __vgic_v3_read_ap1rn(int n)
 	return val;
 }
 
-void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
+void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
 {
-	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
-	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	u64 used_lrs = cpu_if->used_lrs;
 
 	/*
 	 * Make sure stores to the GIC via the memory mapped interface
@@ -230,10 +229,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 	}
 }
 
-void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
+void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
 {
-	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
-	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	u64 used_lrs = cpu_if->used_lrs;
 	int i;
 
 	if (used_lrs || cpu_if->its_vpe.its_vm) {
@@ -257,10 +255,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 	}
 }
 
-void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu)
+void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
 {
-	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
-
 	/*
 	 * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
 	 * Group0 interrupt (as generated in GICv2 mode) to be
@@ -306,9 +302,8 @@ void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu)
 		write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
 }
 
-void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu)
+void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
 {
-	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 	u64 val;
 
 	if (!cpu_if->vgic_sre) {
@@ -333,15 +328,11 @@ void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu)
 		write_gicreg(0, ICH_HCR_EL2);
 }
 
-void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu)
+void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
 {
-	struct vgic_v3_cpu_if *cpu_if;
 	u64 val;
 	u32 nr_pre_bits;
 
-	vcpu = kern_hyp_va(vcpu);
-	cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
-
 	val = read_gicreg(ICH_VTR_EL2);
 	nr_pre_bits = vtr_to_nr_pre_bits(val);
 
@@ -370,15 +361,11 @@ void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu)
 	}
 }
 
-void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu)
+void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
 {
-	struct vgic_v3_cpu_if *cpu_if;
 	u64 val;
 	u32 nr_pre_bits;
 
-	vcpu = kern_hyp_va(vcpu);
-	cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
-
 	val = read_gicreg(ICH_VTR_EL2);
 	nr_pre_bits = vtr_to_nr_pre_bits(val);
 
@@ -431,8 +418,6 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr)
 	write_gicreg(vmcr, ICH_VMCR_EL2);
 }
 
-#ifdef CONFIG_ARM64
-
 static int __hyp_text __vgic_v3_bpr_min(void)
 {
 	/* See Pseudocode for VPriorityGroup */
@@ -453,7 +438,7 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
 						    u32 vmcr,
 						    u64 *lr_val)
 {
-	unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
 	u8 priority = GICv3_IDLE_PRIORITY;
 	int i, lr = -1;
 
@@ -492,7 +477,7 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
 static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu,
 					       int intid, u64 *lr_val)
 {
-	unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
 	int i;
 
 	for (i = 0; i < used_lrs; i++) {
@@ -579,7 +564,7 @@ static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
 
 /*
  * The priority value is independent of any of the BPR values, so we
- * normalize it using the minumal BPR value. This guarantees that no
+ * normalize it using the minimal BPR value. This guarantees that no
  * matter what the guest does with its BPR, we can always set/get the
  * same value of a priority.
  */
@@ -1126,5 +1111,3 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
 
 	return 1;
 }
-
-#endif
diff --git a/virt/kvm/arm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 550dfa3e53cd..550dfa3e53cd 100644
--- a/virt/kvm/arm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 6aafc2825c1c..e21fdd93027a 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -26,28 +26,12 @@ enum exception_type {
 	except_type_serror	= 0x180,
 };
 
-static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
-{
-	u64 exc_offset;
-
-	switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
-	case PSR_MODE_EL1t:
-		exc_offset = CURRENT_EL_SP_EL0_VECTOR;
-		break;
-	case PSR_MODE_EL1h:
-		exc_offset = CURRENT_EL_SP_ELx_VECTOR;
-		break;
-	case PSR_MODE_EL0t:
-		exc_offset = LOWER_EL_AArch64_VECTOR;
-		break;
-	default:
-		exc_offset = LOWER_EL_AArch32_VECTOR;
-	}
-
-	return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
-}
-
 /*
+ * This performs the exception entry at a given EL (@target_mode), stashing PC
+ * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE.
+ * The EL passed to this function *must* be a non-secure, privileged mode with
+ * bit 0 being set (PSTATE.SP == 1).
+ *
  * When an exception is taken, most PSTATE fields are left unchanged in the
  * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
  * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
@@ -59,10 +43,35 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
  * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
  * MSB to LSB.
  */
-static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu)
+static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
+			      enum exception_type type)
 {
-	unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
-	unsigned long old, new;
+	unsigned long sctlr, vbar, old, new, mode;
+	u64 exc_offset;
+
+	mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+	if      (mode == target_mode)
+		exc_offset = CURRENT_EL_SP_ELx_VECTOR;
+	else if ((mode | PSR_MODE_THREAD_BIT) == target_mode)
+		exc_offset = CURRENT_EL_SP_EL0_VECTOR;
+	else if (!(mode & PSR_MODE32_BIT))
+		exc_offset = LOWER_EL_AArch64_VECTOR;
+	else
+		exc_offset = LOWER_EL_AArch32_VECTOR;
+
+	switch (target_mode) {
+	case PSR_MODE_EL1h:
+		vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1);
+		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+		vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
+		break;
+	default:
+		/* Don't do that */
+		BUG();
+	}
+
+	*vcpu_pc(vcpu) = vbar + exc_offset + type;
 
 	old = *vcpu_cpsr(vcpu);
 	new = 0;
@@ -105,9 +114,10 @@ static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu)
 	new |= PSR_I_BIT;
 	new |= PSR_F_BIT;
 
-	new |= PSR_MODE_EL1h;
+	new |= target_mode;
 
-	return new;
+	*vcpu_cpsr(vcpu) = new;
+	vcpu_write_spsr(vcpu, old);
 }
 
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
@@ -116,11 +126,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
 	bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
 	u32 esr = 0;
 
-	vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
-	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
-
-	*vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
-	vcpu_write_spsr(vcpu, cpsr);
+	enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
 
 	vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
 
@@ -148,14 +154,9 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
 
 static void inject_undef64(struct kvm_vcpu *vcpu)
 {
-	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 
-	vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
-	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
-
-	*vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
-	vcpu_write_spsr(vcpu, cpsr);
+	enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
 
 	/*
 	 * Build an unknown exception, depending on the instruction
diff --git a/virt/kvm/arm/mmio.c b/arch/arm64/kvm/mmio.c
index aedfcff99ac5..4e0366759726 100644
--- a/virt/kvm/arm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -131,7 +131,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 	/*
 	 * No valid syndrome? Ask userspace for help if it has
-	 * voluntered to do so, and bail out otherwise.
+	 * volunteered to do so, and bail out otherwise.
 	 */
 	if (!kvm_vcpu_dabt_isvalid(vcpu)) {
 		if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {
diff --git a/virt/kvm/arm/mmu.c b/arch/arm64/kvm/mmu.c
index e3b9ee268823..a1f6bc70c4e4 100644
--- a/virt/kvm/arm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -422,6 +422,9 @@ static void stage2_flush_memslot(struct kvm *kvm,
 		next = stage2_pgd_addr_end(kvm, addr, end);
 		if (!stage2_pgd_none(kvm, *pgd))
 			stage2_flush_puds(kvm, pgd, addr, next);
+
+		if (next != end)
+			cond_resched_lock(&kvm->mmu_lock);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -784,7 +787,7 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
 	mutex_lock(&kvm_hyp_pgd_mutex);
 
 	/*
-	 * This assumes that we we have enough space below the idmap
+	 * This assumes that we have enough space below the idmap
 	 * page to allocate our VAs. If not, the check below will
 	 * kick. A potential alternative would be to detect that
 	 * overflow and switch to an allocation above the idmap.
@@ -964,7 +967,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
  * stage2_unmap_vm - Unmap Stage-2 RAM mappings
  * @kvm: The struct kvm pointer
  *
- * Go through the memregions and unmap any reguler RAM
+ * Go through the memregions and unmap any regular RAM
  * backing memory already mapped to the VM.
  */
 void stage2_unmap_vm(struct kvm *kvm)
@@ -1372,47 +1375,6 @@ out:
 	return ret;
 }
 
-static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
-{
-	kvm_pfn_t pfn = *pfnp;
-	gfn_t gfn = *ipap >> PAGE_SHIFT;
-
-	if (kvm_is_transparent_hugepage(pfn)) {
-		unsigned long mask;
-		/*
-		 * The address we faulted on is backed by a transparent huge
-		 * page.  However, because we map the compound huge page and
-		 * not the individual tail page, we need to transfer the
-		 * refcount to the head page.  We have to be careful that the
-		 * THP doesn't start to split while we are adjusting the
-		 * refcounts.
-		 *
-		 * We are sure this doesn't happen, because mmu_notifier_retry
-		 * was successful and we are holding the mmu_lock, so if this
-		 * THP is trying to split, it will be blocked in the mmu
-		 * notifier before touching any of the pages, specifically
-		 * before being able to call __split_huge_page_refcount().
-		 *
-		 * We can therefore safely transfer the refcount from PG_tail
-		 * to PG_head and switch the pfn from a tail page to the head
-		 * page accordingly.
-		 */
-		mask = PTRS_PER_PMD - 1;
-		VM_BUG_ON((gfn & mask) != (pfn & mask));
-		if (pfn & mask) {
-			*ipap &= PMD_MASK;
-			kvm_release_pfn_clean(pfn);
-			pfn &= ~mask;
-			kvm_get_pfn(pfn);
-			*pfnp = pfn;
-		}
-
-		return true;
-	}
-
-	return false;
-}
-
 /**
  * stage2_wp_ptes - write protect PMD range
  * @pmd:	pointer to pmd entry
@@ -1607,6 +1569,10 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
 	hva_t uaddr_start, uaddr_end;
 	size_t size;
 
+	/* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */
+	if (map_size == PAGE_SIZE)
+		return true;
+
 	size = memslot->npages * PAGE_SIZE;
 
 	gpa_start = memslot->base_gfn << PAGE_SHIFT;
@@ -1626,7 +1592,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
 	 *    |abcde|fgh  Stage-1 block  |    Stage-1 block tv|xyz|
 	 *    +-----+--------------------+--------------------+---+
 	 *
-	 *    memslot->base_gfn << PAGE_SIZE:
+	 *    memslot->base_gfn << PAGE_SHIFT:
 	 *      +---+--------------------+--------------------+-----+
 	 *      |abc|def  Stage-2 block  |    Stage-2 block   |tvxyz|
 	 *      +---+--------------------+--------------------+-----+
@@ -1656,6 +1622,59 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
 	       (hva & ~(map_size - 1)) + map_size <= uaddr_end;
 }
 
+/*
+ * Check if the given hva is backed by a transparent huge page (THP) and
+ * whether it can be mapped using block mapping in stage2. If so, adjust
+ * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently
+ * supported. This will need to be updated to support other THP sizes.
+ *
+ * Returns the size of the mapping.
+ */
+static unsigned long
+transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
+			    unsigned long hva, kvm_pfn_t *pfnp,
+			    phys_addr_t *ipap)
+{
+	kvm_pfn_t pfn = *pfnp;
+
+	/*
+	 * Make sure the adjustment is done only for THP pages. Also make
+	 * sure that the HVA and IPA are sufficiently aligned and that the
+	 * block map is contained within the memslot.
+	 */
+	if (kvm_is_transparent_hugepage(pfn) &&
+	    fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
+		/*
+		 * The address we faulted on is backed by a transparent huge
+		 * page.  However, because we map the compound huge page and
+		 * not the individual tail page, we need to transfer the
+		 * refcount to the head page.  We have to be careful that the
+		 * THP doesn't start to split while we are adjusting the
+		 * refcounts.
+		 *
+		 * We are sure this doesn't happen, because mmu_notifier_retry
+		 * was successful and we are holding the mmu_lock, so if this
+		 * THP is trying to split, it will be blocked in the mmu
+		 * notifier before touching any of the pages, specifically
+		 * before being able to call __split_huge_page_refcount().
+		 *
+		 * We can therefore safely transfer the refcount from PG_tail
+		 * to PG_head and switch the pfn from a tail page to the head
+		 * page accordingly.
+		 */
+		*ipap &= PMD_MASK;
+		kvm_release_pfn_clean(pfn);
+		pfn &= ~(PTRS_PER_PMD - 1);
+		kvm_get_pfn(pfn);
+		*pfnp = pfn;
+
+		return PMD_SIZE;
+	}
+
+	/* Use page mapping if we cannot use block mapping. */
+	return PAGE_SIZE;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -1769,20 +1788,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
 
-	if (vma_pagesize == PAGE_SIZE && !force_pte) {
-		/*
-		 * Only PMD_SIZE transparent hugepages(THP) are
-		 * currently supported. This code will need to be
-		 * updated to support other THP sizes.
-		 *
-		 * Make sure the host VA and the guest IPA are sufficiently
-		 * aligned and that the block is contained within the memslot.
-		 */
-		if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
-		    transparent_hugepage_adjust(&pfn, &fault_ipa))
-			vma_pagesize = PMD_SIZE;
-	}
-
+	/*
+	 * If we are not forced to use page mapping, check if we are
+	 * backed by a THP and thus use block mapping if possible.
+	 */
+	if (vma_pagesize == PAGE_SIZE && !force_pte)
+		vma_pagesize = transparent_hugepage_adjust(memslot, hva,
+							   &pfn, &fault_ipa);
 	if (writable)
 		kvm_set_pfn_dirty(pfn);
 
@@ -2185,11 +2197,11 @@ int kvm_mmu_init(void)
 {
 	int err;
 
-	hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
+	hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
 	hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
-	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
+	hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end);
 	hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
-	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
+	hyp_idmap_vector = __pa_symbol(__kvm_hyp_init);
 
 	/*
 	 * We rely on the linker script to ensure at build time that the HYP
@@ -2262,11 +2274,19 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 {
 	/*
 	 * At this point memslot has been committed and there is an
-	 * allocated dirty_bitmap[], dirty pages will be be tracked while the
+	 * allocated dirty_bitmap[], dirty pages will be tracked while the
 	 * memory slot is write protected.
 	 */
-	if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
-		kvm_mmu_wp_memory_region(kvm, mem->slot);
+	if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+		/*
+		 * If we're with initial-all-set, we don't need to write
+		 * protect any pages because they're all reported as dirty.
+		 * Huge pages and normal pages will be write protect gradually.
+		 */
+		if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
+			kvm_mmu_wp_memory_region(kvm, mem->slot);
+		}
+	}
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
diff --git a/virt/kvm/arm/perf.c b/arch/arm64/kvm/perf.c
index d45b8b9a4415..d45b8b9a4415 100644
--- a/virt/kvm/arm/perf.c
+++ b/arch/arm64/kvm/perf.c
diff --git a/virt/kvm/arm/pmu.c b/arch/arm64/kvm/pmu-emul.c
index f0d0312c0a55..f0d0312c0a55 100644
--- a/virt/kvm/arm/pmu.c
+++ b/arch/arm64/kvm/pmu-emul.c
diff --git a/virt/kvm/arm/psci.c b/arch/arm64/kvm/psci.c
index ae364716ee40..83415e96b589 100644
--- a/virt/kvm/arm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -94,7 +94,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 
 	/*
 	 * NOTE: We always update r0 (or x0) because for PSCI v0.1
-	 * the general puspose registers are undefined upon CPU_ON.
+	 * the general purpose registers are undefined upon CPU_ON.
 	 */
 	reset_state->r0 = smccc_get_arg3(source_vcpu);
 
@@ -265,10 +265,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 	case PSCI_0_2_FN_SYSTEM_OFF:
 		kvm_psci_system_off(vcpu);
 		/*
-		 * We should'nt be going back to guest VCPU after
+		 * We shouldn't be going back to guest VCPU after
 		 * receiving SYSTEM_OFF request.
 		 *
-		 * If user space accidently/deliberately resumes
+		 * If user space accidentally/deliberately resumes
 		 * guest VCPU after SYSTEM_OFF request then guest
 		 * VCPU should see internal failure from PSCI return
 		 * value. To achieve this, we preload r0 (or x0) with
diff --git a/virt/kvm/arm/pvtime.c b/arch/arm64/kvm/pvtime.c
index 1e0f4c284888..1e0f4c284888 100644
--- a/virt/kvm/arm/pvtime.c
+++ b/arch/arm64/kvm/pvtime.c
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 30b7ea680f66..d3b209023727 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -36,23 +36,11 @@ static u32 kvm_ipa_limit;
 /*
  * ARMv8 Reset Values
  */
-static const struct kvm_regs default_regs_reset = {
-	.regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
-			PSR_F_BIT | PSR_D_BIT),
-};
+#define VCPU_RESET_PSTATE_EL1	(PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
+				 PSR_F_BIT | PSR_D_BIT)
 
-static const struct kvm_regs default_regs_reset32 = {
-	.regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT |
-			PSR_AA32_I_BIT | PSR_AA32_F_BIT),
-};
-
-static bool cpu_has_32bit_el1(void)
-{
-	u64 pfr0;
-
-	pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
-	return !!(pfr0 & 0x20);
-}
+#define VCPU_RESET_PSTATE_SVC	(PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
+				 PSR_AA32_I_BIT | PSR_AA32_F_BIT)
 
 /**
  * kvm_arch_vm_ioctl_check_extension
@@ -66,7 +54,7 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
 	switch (ext) {
 	case KVM_CAP_ARM_EL1_32BIT:
-		r = cpu_has_32bit_el1();
+		r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1);
 		break;
 	case KVM_CAP_GUEST_DEBUG_HW_BPS:
 		r = get_num_brps();
@@ -163,7 +151,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
 	vl = vcpu->arch.sve_max_vl;
 
 	/*
-	 * Resposibility for these properties is shared between
+	 * Responsibility for these properties is shared between
 	 * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
 	 * set_sve_vls().  Double-check here just to be sure:
 	 */
@@ -249,7 +237,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
  * ioctl or as part of handling a request issued by another VCPU in the PSCI
  * handling code.  In the first case, the VCPU will not be loaded, and in the
  * second case the VCPU will be loaded.  Because this function operates purely
- * on the memory-backed valus of system registers, we want to do a full put if
+ * on the memory-backed values of system registers, we want to do a full put if
  * we were loaded (handling a request) and load the values back at the end of
  * the function.  Otherwise we leave the state alone.  In both cases, we
  * disable preemption around the vcpu reset as we would otherwise race with
@@ -257,9 +245,9 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
-	const struct kvm_regs *cpu_reset;
 	int ret = -EINVAL;
 	bool loaded;
+	u32 pstate;
 
 	/* Reset PMU outside of the non-preemptible section */
 	kvm_pmu_vcpu_reset(vcpu);
@@ -288,18 +276,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	switch (vcpu->arch.target) {
 	default:
 		if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
-			if (!cpu_has_32bit_el1())
+			if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1))
 				goto out;
-			cpu_reset = &default_regs_reset32;
+			pstate = VCPU_RESET_PSTATE_SVC;
 		} else {
-			cpu_reset = &default_regs_reset;
+			pstate = VCPU_RESET_PSTATE_EL1;
 		}
 
 		break;
 	}
 
 	/* Reset core registers */
-	memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
+	memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
+	vcpu_gp_regs(vcpu)->regs.pstate = pstate;
 
 	/* Reset system registers */
 	kvm_reset_sys_regs(vcpu);
@@ -340,11 +329,50 @@ out:
 	return ret;
 }
 
-void kvm_set_ipa_limit(void)
+u32 get_kvm_ipa_limit(void)
+{
+	return kvm_ipa_limit;
+}
+
+int kvm_set_ipa_limit(void)
 {
-	unsigned int ipa_max, pa_max, va_max, parange;
+	unsigned int ipa_max, pa_max, va_max, parange, tgran_2;
+	u64 mmfr0;
+
+	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+	parange = cpuid_feature_extract_unsigned_field(mmfr0,
+				ID_AA64MMFR0_PARANGE_SHIFT);
+
+	/*
+	 * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
+	 * Stage-2. If not, things will stop very quickly.
+	 */
+	switch (PAGE_SIZE) {
+	default:
+	case SZ_4K:
+		tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT;
+		break;
+	case SZ_16K:
+		tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT;
+		break;
+	case SZ_64K:
+		tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT;
+		break;
+	}
+
+	switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) {
+	default:
+	case 1:
+		kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
+		return -EINVAL;
+	case 0:
+		kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n");
+		break;
+	case 2:
+		kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n");
+		break;
+	}
 
-	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7;
 	pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
 
 	/* Clamp the IPA limit to the PA size supported by the kernel */
@@ -357,7 +385,7 @@ void kvm_set_ipa_limit(void)
 	 *
 	 * So clamp the ipa limit further down to limit the number of levels.
 	 * Since we can concatenate upto 16 tables at entry level, we could
-	 * go upto 4bits above the maximum VA addressible with the current
+	 * go upto 4bits above the maximum VA addressable with the current
 	 * number of levels.
 	 */
 	va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
@@ -378,6 +406,8 @@ void kvm_set_ipa_limit(void)
 	     "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max);
 	kvm_ipa_limit = ipa_max;
 	kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit);
+
+	return 0;
 }
 
 /*
@@ -390,7 +420,7 @@ void kvm_set_ipa_limit(void)
  */
 int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
 {
-	u64 vtcr = VTCR_EL2_FLAGS;
+	u64 vtcr = VTCR_EL2_FLAGS, mmfr0;
 	u32 parange, phys_shift;
 	u8 lvls;
 
@@ -406,7 +436,9 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
 		phys_shift = KVM_PHYS_SHIFT;
 	}
 
-	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
+	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+	parange = cpuid_feature_extract_unsigned_field(mmfr0,
+				ID_AA64MMFR0_PARANGE_SHIFT);
 	if (parange > ID_AA64MMFR0_PARANGE_MAX)
 		parange = ID_AA64MMFR0_PARANGE_MAX;
 	vtcr |= parange << VTCR_EL2_PS_SHIFT;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 51db934702b6..80985439bfb2 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -34,7 +34,7 @@
 #include "trace.h"
 
 /*
- * All of this file is extremly similar to the ARM coproc.c, but the
+ * All of this file is extremely similar to the ARM coproc.c, but the
  * types are different. My gut feeling is that it should be pretty
  * easy to merge, but that would be an ABI breakage -- again. VFP
  * would also need to be abstracted.
@@ -64,11 +64,8 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
 	return false;
 }
 
-u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
+static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
 {
-	if (!vcpu->arch.sysregs_loaded_on_cpu)
-		goto immediate_read;
-
 	/*
 	 * System registers listed in the switch are not saved on every
 	 * exit from the guest but are only saved on vcpu_put.
@@ -79,75 +76,92 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
 	 * thread when emulating cross-VCPU communication.
 	 */
 	switch (reg) {
-	case CSSELR_EL1:	return read_sysreg_s(SYS_CSSELR_EL1);
-	case SCTLR_EL1:		return read_sysreg_s(SYS_SCTLR_EL12);
-	case ACTLR_EL1:		return read_sysreg_s(SYS_ACTLR_EL1);
-	case CPACR_EL1:		return read_sysreg_s(SYS_CPACR_EL12);
-	case TTBR0_EL1:		return read_sysreg_s(SYS_TTBR0_EL12);
-	case TTBR1_EL1:		return read_sysreg_s(SYS_TTBR1_EL12);
-	case TCR_EL1:		return read_sysreg_s(SYS_TCR_EL12);
-	case ESR_EL1:		return read_sysreg_s(SYS_ESR_EL12);
-	case AFSR0_EL1:		return read_sysreg_s(SYS_AFSR0_EL12);
-	case AFSR1_EL1:		return read_sysreg_s(SYS_AFSR1_EL12);
-	case FAR_EL1:		return read_sysreg_s(SYS_FAR_EL12);
-	case MAIR_EL1:		return read_sysreg_s(SYS_MAIR_EL12);
-	case VBAR_EL1:		return read_sysreg_s(SYS_VBAR_EL12);
-	case CONTEXTIDR_EL1:	return read_sysreg_s(SYS_CONTEXTIDR_EL12);
-	case TPIDR_EL0:		return read_sysreg_s(SYS_TPIDR_EL0);
-	case TPIDRRO_EL0:	return read_sysreg_s(SYS_TPIDRRO_EL0);
-	case TPIDR_EL1:		return read_sysreg_s(SYS_TPIDR_EL1);
-	case AMAIR_EL1:		return read_sysreg_s(SYS_AMAIR_EL12);
-	case CNTKCTL_EL1:	return read_sysreg_s(SYS_CNTKCTL_EL12);
-	case PAR_EL1:		return read_sysreg_s(SYS_PAR_EL1);
-	case DACR32_EL2:	return read_sysreg_s(SYS_DACR32_EL2);
-	case IFSR32_EL2:	return read_sysreg_s(SYS_IFSR32_EL2);
-	case DBGVCR32_EL2:	return read_sysreg_s(SYS_DBGVCR32_EL2);
+	case CSSELR_EL1:	*val = read_sysreg_s(SYS_CSSELR_EL1);	break;
+	case SCTLR_EL1:		*val = read_sysreg_s(SYS_SCTLR_EL12);	break;
+	case ACTLR_EL1:		*val = read_sysreg_s(SYS_ACTLR_EL1);	break;
+	case CPACR_EL1:		*val = read_sysreg_s(SYS_CPACR_EL12);	break;
+	case TTBR0_EL1:		*val = read_sysreg_s(SYS_TTBR0_EL12);	break;
+	case TTBR1_EL1:		*val = read_sysreg_s(SYS_TTBR1_EL12);	break;
+	case TCR_EL1:		*val = read_sysreg_s(SYS_TCR_EL12);	break;
+	case ESR_EL1:		*val = read_sysreg_s(SYS_ESR_EL12);	break;
+	case AFSR0_EL1:		*val = read_sysreg_s(SYS_AFSR0_EL12);	break;
+	case AFSR1_EL1:		*val = read_sysreg_s(SYS_AFSR1_EL12);	break;
+	case FAR_EL1:		*val = read_sysreg_s(SYS_FAR_EL12);	break;
+	case MAIR_EL1:		*val = read_sysreg_s(SYS_MAIR_EL12);	break;
+	case VBAR_EL1:		*val = read_sysreg_s(SYS_VBAR_EL12);	break;
+	case CONTEXTIDR_EL1:	*val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
+	case TPIDR_EL0:		*val = read_sysreg_s(SYS_TPIDR_EL0);	break;
+	case TPIDRRO_EL0:	*val = read_sysreg_s(SYS_TPIDRRO_EL0);	break;
+	case TPIDR_EL1:		*val = read_sysreg_s(SYS_TPIDR_EL1);	break;
+	case AMAIR_EL1:		*val = read_sysreg_s(SYS_AMAIR_EL12);	break;
+	case CNTKCTL_EL1:	*val = read_sysreg_s(SYS_CNTKCTL_EL12);	break;
+	case PAR_EL1:		*val = read_sysreg_s(SYS_PAR_EL1);	break;
+	case DACR32_EL2:	*val = read_sysreg_s(SYS_DACR32_EL2);	break;
+	case IFSR32_EL2:	*val = read_sysreg_s(SYS_IFSR32_EL2);	break;
+	case DBGVCR32_EL2:	*val = read_sysreg_s(SYS_DBGVCR32_EL2);	break;
+	default:		return false;
 	}
 
-immediate_read:
-	return __vcpu_sys_reg(vcpu, reg);
+	return true;
 }
 
-void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
 {
-	if (!vcpu->arch.sysregs_loaded_on_cpu)
-		goto immediate_write;
-
 	/*
 	 * System registers listed in the switch are not restored on every
 	 * entry to the guest but are only restored on vcpu_load.
 	 *
 	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
-	 * should never be listed below, because the the MPIDR should only be
-	 * set once, before running the VCPU, and never changed later.
+	 * should never be listed below, because the MPIDR should only be set
+	 * once, before running the VCPU, and never changed later.
 	 */
 	switch (reg) {
-	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	return;
-	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	return;
-	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	return;
-	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	return;
-	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	return;
-	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	return;
-	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	return;
-	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	return;
-	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	return;
-	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	return;
-	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	return;
-	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	return;
-	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	return;
-	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return;
-	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	return;
-	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	return;
-	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	return;
-	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	return;
-	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	return;
-	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	return;
-	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	return;
-	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	return;
-	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	return;
+	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	break;
+	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	break;
+	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	break;
+	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	break;
+	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	break;
+	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	break;
+	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	break;
+	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	break;
+	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	break;
+	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	break;
+	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	break;
+	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	break;
+	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	break;
+	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
+	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	break;
+	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	break;
+	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	break;
+	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	break;
+	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	break;
+	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	break;
+	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	break;
+	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	break;
+	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	break;
+	default:		return false;
 	}
 
-immediate_write:
+	return true;
+}
+
+u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
+{
+	u64 val = 0x8badf00d8badf00d;
+
+	if (vcpu->arch.sysregs_loaded_on_cpu &&
+	    __vcpu_read_sys_reg_from_cpu(reg, &val))
+		return val;
+
+	return __vcpu_sys_reg(vcpu, reg);
+}
+
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+{
+	if (vcpu->arch.sysregs_loaded_on_cpu &&
+	    __vcpu_write_sys_reg_to_cpu(val, reg))
+		return;
+
 	 __vcpu_sys_reg(vcpu, reg) = val;
 }
 
@@ -1456,9 +1470,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	ID_SANITISED(MVFR1_EL1),
 	ID_SANITISED(MVFR2_EL1),
 	ID_UNALLOCATED(3,3),
-	ID_UNALLOCATED(3,4),
-	ID_UNALLOCATED(3,5),
-	ID_UNALLOCATED(3,6),
+	ID_SANITISED(ID_PFR2_EL1),
+	ID_HIDDEN(ID_DFR1_EL1),
+	ID_SANITISED(ID_MMFR5_EL1),
 	ID_UNALLOCATED(3,7),
 
 	/* AArch64 ID registers */
@@ -1532,7 +1546,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
 
 	{ SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 },
-	{ SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, NULL, PMINTENSET_EL1 },
+	{ SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 },
 
 	{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
 	{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
@@ -1571,8 +1585,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
 	{ SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 },
 	{ SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
-	{ SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 },
-	{ SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 },
+	{ SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
+	{ SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 },
 	{ SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 },
 	{ SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 },
 	{ SYS_DESC(SYS_PMCEID0_EL0), access_pmceid },
@@ -2073,12 +2087,37 @@ static const struct sys_reg_desc cp15_64_regs[] = {
 	{ SYS_DESC(SYS_AARCH32_CNTP_CVAL),    access_arch_timer },
 };
 
+static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
+			      bool is_32)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++) {
+		if (!is_32 && table[i].reg && !table[i].reset) {
+			kvm_err("sys_reg table %p entry %d has lacks reset\n",
+				table, i);
+			return 1;
+		}
+
+		if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
+			kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 /* Target specific emulation tables */
 static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
 
 void kvm_register_target_sys_reg_table(unsigned int target,
 				       struct kvm_sys_reg_target_table *table)
 {
+	if (check_sysreg_table(table->table64.table, table->table64.num, false) ||
+	    check_sysreg_table(table->table32.table, table->table32.num, true))
+		return;
+
 	target_tables[target] = table;
 }
 
@@ -2364,19 +2403,13 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 }
 
 static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
-				const struct sys_reg_desc *table, size_t num,
-				unsigned long *bmap)
+				const struct sys_reg_desc *table, size_t num)
 {
 	unsigned long i;
 
 	for (i = 0; i < num; i++)
-		if (table[i].reset) {
-			int reg = table[i].reg;
-
+		if (table[i].reset)
 			table[i].reset(vcpu, &table[i]);
-			if (reg > 0 && reg < NR_SYS_REGS)
-				set_bit(reg, bmap);
-		}
 }
 
 /**
@@ -2832,32 +2865,18 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 	return write_demux_regids(uindices);
 }
 
-static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
-{
-	unsigned int i;
-
-	for (i = 1; i < n; i++) {
-		if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
-			kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
 void kvm_sys_reg_table_init(void)
 {
 	unsigned int i;
 	struct sys_reg_desc clidr;
 
 	/* Make sure tables are unique and in order. */
-	BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
-	BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
-	BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
-	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
-	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
-	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
+	BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false));
+	BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true));
+	BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true));
+	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true));
+	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true));
+	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false));
 
 	/* We abuse the reset function to overwrite the table itself. */
 	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
@@ -2893,17 +2912,10 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
 {
 	size_t num;
 	const struct sys_reg_desc *table;
-	DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, };
 
 	/* Generic chip reset first (so target could override). */
-	reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap);
+	reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
 
 	table = get_target_table(vcpu->arch.target, true, &num);
-	reset_sys_reg_descs(vcpu, table, num, bmap);
-
-	for (num = 1; num < NR_SYS_REGS; num++) {
-		if (WARN(!test_bit(num, bmap),
-			 "Didn't reset __vcpu_sys_reg(%zi)\n", num))
-			break;
-	}
+	reset_sys_reg_descs(vcpu, table, num);
 }
diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h
index eab91ad0effb..86f9ea47be29 100644
--- a/arch/arm64/kvm/trace.h
+++ b/arch/arm64/kvm/trace.h
@@ -1,216 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#ifndef _TRACE_ARM64_KVM_H
 #define _TRACE_ARM64_KVM_H
 
-#include <linux/tracepoint.h>
-#include "sys_regs.h"
+#include "trace_arm.h"
+#include "trace_handle_exit.h"
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kvm
-
-TRACE_EVENT(kvm_wfx_arm64,
-	TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
-	TP_ARGS(vcpu_pc, is_wfe),
-
-	TP_STRUCT__entry(
-		__field(unsigned long,	vcpu_pc)
-		__field(bool,		is_wfe)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu_pc = vcpu_pc;
-		__entry->is_wfe  = is_wfe;
-	),
-
-	TP_printk("guest executed wf%c at: 0x%08lx",
-		  __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
-);
-
-TRACE_EVENT(kvm_hvc_arm64,
-	TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
-	TP_ARGS(vcpu_pc, r0, imm),
-
-	TP_STRUCT__entry(
-		__field(unsigned long, vcpu_pc)
-		__field(unsigned long, r0)
-		__field(unsigned long, imm)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu_pc = vcpu_pc;
-		__entry->r0 = r0;
-		__entry->imm = imm;
-	),
-
-	TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
-		  __entry->vcpu_pc, __entry->r0, __entry->imm)
-);
-
-TRACE_EVENT(kvm_arm_setup_debug,
-	TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
-	TP_ARGS(vcpu, guest_debug),
-
-	TP_STRUCT__entry(
-		__field(struct kvm_vcpu *, vcpu)
-		__field(__u32, guest_debug)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu = vcpu;
-		__entry->guest_debug = guest_debug;
-	),
-
-	TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
-);
-
-TRACE_EVENT(kvm_arm_clear_debug,
-	TP_PROTO(__u32 guest_debug),
-	TP_ARGS(guest_debug),
-
-	TP_STRUCT__entry(
-		__field(__u32, guest_debug)
-	),
-
-	TP_fast_assign(
-		__entry->guest_debug = guest_debug;
-	),
-
-	TP_printk("flags: 0x%08x", __entry->guest_debug)
-);
-
-TRACE_EVENT(kvm_arm_set_dreg32,
-	TP_PROTO(const char *name, __u32 value),
-	TP_ARGS(name, value),
-
-	TP_STRUCT__entry(
-		__field(const char *, name)
-		__field(__u32, value)
-	),
-
-	TP_fast_assign(
-		__entry->name = name;
-		__entry->value = value;
-	),
-
-	TP_printk("%s: 0x%08x", __entry->name, __entry->value)
-);
-
-TRACE_DEFINE_SIZEOF(__u64);
-
-TRACE_EVENT(kvm_arm_set_regset,
-	TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
-	TP_ARGS(type, len, control, value),
-	TP_STRUCT__entry(
-		__field(const char *, name)
-		__field(int, len)
-		__array(u64, ctrls, 16)
-		__array(u64, values, 16)
-	),
-	TP_fast_assign(
-		__entry->name = type;
-		__entry->len = len;
-		memcpy(__entry->ctrls, control, len << 3);
-		memcpy(__entry->values, value, len << 3);
-	),
-	TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
-		__print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
-		__print_array(__entry->values, __entry->len, sizeof(__u64)))
-);
-
-TRACE_EVENT(trap_reg,
-	TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
-	TP_ARGS(fn, reg, is_write, write_value),
-
-	TP_STRUCT__entry(
-		__field(const char *, fn)
-		__field(int, reg)
-		__field(bool, is_write)
-		__field(u64, write_value)
-	),
-
-	TP_fast_assign(
-		__entry->fn = fn;
-		__entry->reg = reg;
-		__entry->is_write = is_write;
-		__entry->write_value = write_value;
-	),
-
-	TP_printk("%s %s reg %d (0x%08llx)", __entry->fn,  __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
-);
-
-TRACE_EVENT(kvm_handle_sys_reg,
-	TP_PROTO(unsigned long hsr),
-	TP_ARGS(hsr),
-
-	TP_STRUCT__entry(
-		__field(unsigned long,	hsr)
-	),
-
-	TP_fast_assign(
-		__entry->hsr = hsr;
-	),
-
-	TP_printk("HSR 0x%08lx", __entry->hsr)
-);
-
-TRACE_EVENT(kvm_sys_access,
-	TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg),
-	TP_ARGS(vcpu_pc, params, reg),
-
-	TP_STRUCT__entry(
-		__field(unsigned long,			vcpu_pc)
-		__field(bool,				is_write)
-		__field(const char *,			name)
-		__field(u8,				Op0)
-		__field(u8,				Op1)
-		__field(u8,				CRn)
-		__field(u8,				CRm)
-		__field(u8,				Op2)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu_pc = vcpu_pc;
-		__entry->is_write = params->is_write;
-		__entry->name = reg->name;
-		__entry->Op0 = reg->Op0;
-		__entry->Op0 = reg->Op0;
-		__entry->Op1 = reg->Op1;
-		__entry->CRn = reg->CRn;
-		__entry->CRm = reg->CRm;
-		__entry->Op2 = reg->Op2;
-	),
-
-	TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s",
-		  __entry->vcpu_pc, __entry->name ?: "UNKN",
-		  __entry->Op0, __entry->Op1, __entry->CRn,
-		  __entry->CRm, __entry->Op2,
-		  __entry->is_write ? "write" : "read")
-);
-
-TRACE_EVENT(kvm_set_guest_debug,
-	TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
-	TP_ARGS(vcpu, guest_debug),
-
-	TP_STRUCT__entry(
-		__field(struct kvm_vcpu *, vcpu)
-		__field(__u32, guest_debug)
-	),
-
-	TP_fast_assign(
-		__entry->vcpu = vcpu;
-		__entry->guest_debug = guest_debug;
-	),
-
-	TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
-);
-
-
-#endif /* _TRACE_ARM64_KVM_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE trace
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
+#endif	/* _TRACE_ARM64_KVM_H */
diff --git a/virt/kvm/arm/trace.h b/arch/arm64/kvm/trace_arm.h
index cc94ccc68821..4c71270cc097 100644
--- a/virt/kvm/arm/trace.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -1,10 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_KVM_H
+#if !defined(_TRACE_ARM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ARM_ARM64_KVM_H
 
 #include <kvm/arm_arch_timer.h>
 #include <linux/tracepoint.h>
-#include <asm/kvm_arm.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
@@ -368,12 +367,12 @@ TRACE_EVENT(kvm_timer_emulate,
 		  __entry->timer_idx, __entry->should_fire)
 );
 
-#endif /* _TRACE_KVM_H */
+#endif /* _TRACE_ARM_ARM64_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../virt/kvm/arm
+#define TRACE_INCLUDE_PATH .
 #undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE trace
+#define TRACE_INCLUDE_FILE trace_arm
 
 /* This part must be outside protection */
 #include <trace/define_trace.h>
diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h
new file mode 100644
index 000000000000..2c56d1e0f5bd
--- /dev/null
+++ b/arch/arm64/kvm/trace_handle_exit.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_HANDLE_EXIT_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HANDLE_EXIT_ARM64_KVM_H
+
+#include <linux/tracepoint.h>
+#include "sys_regs.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+TRACE_EVENT(kvm_wfx_arm64,
+	TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
+	TP_ARGS(vcpu_pc, is_wfe),
+
+	TP_STRUCT__entry(
+		__field(unsigned long,	vcpu_pc)
+		__field(bool,		is_wfe)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc = vcpu_pc;
+		__entry->is_wfe  = is_wfe;
+	),
+
+	TP_printk("guest executed wf%c at: 0x%08lx",
+		  __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_hvc_arm64,
+	TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
+	TP_ARGS(vcpu_pc, r0, imm),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, vcpu_pc)
+		__field(unsigned long, r0)
+		__field(unsigned long, imm)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc = vcpu_pc;
+		__entry->r0 = r0;
+		__entry->imm = imm;
+	),
+
+	TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
+		  __entry->vcpu_pc, __entry->r0, __entry->imm)
+);
+
+TRACE_EVENT(kvm_arm_setup_debug,
+	TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
+	TP_ARGS(vcpu, guest_debug),
+
+	TP_STRUCT__entry(
+		__field(struct kvm_vcpu *, vcpu)
+		__field(__u32, guest_debug)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu = vcpu;
+		__entry->guest_debug = guest_debug;
+	),
+
+	TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
+);
+
+TRACE_EVENT(kvm_arm_clear_debug,
+	TP_PROTO(__u32 guest_debug),
+	TP_ARGS(guest_debug),
+
+	TP_STRUCT__entry(
+		__field(__u32, guest_debug)
+	),
+
+	TP_fast_assign(
+		__entry->guest_debug = guest_debug;
+	),
+
+	TP_printk("flags: 0x%08x", __entry->guest_debug)
+);
+
+TRACE_EVENT(kvm_arm_set_dreg32,
+	TP_PROTO(const char *name, __u32 value),
+	TP_ARGS(name, value),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(__u32, value)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->value = value;
+	),
+
+	TP_printk("%s: 0x%08x", __entry->name, __entry->value)
+);
+
+TRACE_DEFINE_SIZEOF(__u64);
+
+TRACE_EVENT(kvm_arm_set_regset,
+	TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
+	TP_ARGS(type, len, control, value),
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(int, len)
+		__array(u64, ctrls, 16)
+		__array(u64, values, 16)
+	),
+	TP_fast_assign(
+		__entry->name = type;
+		__entry->len = len;
+		memcpy(__entry->ctrls, control, len << 3);
+		memcpy(__entry->values, value, len << 3);
+	),
+	TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
+		__print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
+		__print_array(__entry->values, __entry->len, sizeof(__u64)))
+);
+
+TRACE_EVENT(trap_reg,
+	TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
+	TP_ARGS(fn, reg, is_write, write_value),
+
+	TP_STRUCT__entry(
+		__field(const char *, fn)
+		__field(int, reg)
+		__field(bool, is_write)
+		__field(u64, write_value)
+	),
+
+	TP_fast_assign(
+		__entry->fn = fn;
+		__entry->reg = reg;
+		__entry->is_write = is_write;
+		__entry->write_value = write_value;
+	),
+
+	TP_printk("%s %s reg %d (0x%08llx)", __entry->fn,  __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
+);
+
+TRACE_EVENT(kvm_handle_sys_reg,
+	TP_PROTO(unsigned long hsr),
+	TP_ARGS(hsr),
+
+	TP_STRUCT__entry(
+		__field(unsigned long,	hsr)
+	),
+
+	TP_fast_assign(
+		__entry->hsr = hsr;
+	),
+
+	TP_printk("HSR 0x%08lx", __entry->hsr)
+);
+
+TRACE_EVENT(kvm_sys_access,
+	TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg),
+	TP_ARGS(vcpu_pc, params, reg),
+
+	TP_STRUCT__entry(
+		__field(unsigned long,			vcpu_pc)
+		__field(bool,				is_write)
+		__field(const char *,			name)
+		__field(u8,				Op0)
+		__field(u8,				Op1)
+		__field(u8,				CRn)
+		__field(u8,				CRm)
+		__field(u8,				Op2)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc = vcpu_pc;
+		__entry->is_write = params->is_write;
+		__entry->name = reg->name;
+		__entry->Op0 = reg->Op0;
+		__entry->Op0 = reg->Op0;
+		__entry->Op1 = reg->Op1;
+		__entry->CRn = reg->CRn;
+		__entry->CRm = reg->CRm;
+		__entry->Op2 = reg->Op2;
+	),
+
+	TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s",
+		  __entry->vcpu_pc, __entry->name ?: "UNKN",
+		  __entry->Op0, __entry->Op1, __entry->CRn,
+		  __entry->CRm, __entry->Op2,
+		  __entry->is_write ? "write" : "read")
+);
+
+TRACE_EVENT(kvm_set_guest_debug,
+	TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
+	TP_ARGS(vcpu, guest_debug),
+
+	TP_STRUCT__entry(
+		__field(struct kvm_vcpu *, vcpu)
+		__field(__u32, guest_debug)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu = vcpu;
+		__entry->guest_debug = guest_debug;
+	),
+
+	TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
+);
+
+#endif /* _TRACE_HANDLE_EXIT_ARM64_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace_handle_exit
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index e7d1ea92095d..2f92bdcb1188 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -7,7 +7,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <asm/kvm_emulate.h>
-#include "vgic.h"
+#include "vgic/vgic.h"
 #include "sys_regs.h"
 
 static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
diff --git a/virt/kvm/arm/vgic/trace.h b/arch/arm64/kvm/vgic/trace.h
index 4fd4f6db181b..83c64401a7fc 100644
--- a/virt/kvm/arm/vgic/trace.h
+++ b/arch/arm64/kvm/vgic/trace.h
@@ -30,7 +30,7 @@ TRACE_EVENT(vgic_update_irq_pending,
 #endif /* _TRACE_VGIC_H */
 
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../virt/kvm/arm/vgic
+#define TRACE_INCLUDE_PATH ../../arch/arm64/kvm/vgic
 #undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_FILE trace
 
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index b13a9e3f99dd..b13a9e3f99dd 100644
--- a/virt/kvm/arm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 32e32d67a127..32e32d67a127 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index d8cdfea5cc96..d8cdfea5cc96 100644
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index c012a52b19f5..c012a52b19f5 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 44419679f91a..44419679f91a 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
index a016f07adc28..a016f07adc28 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 89a14ec8b33b..d2339a2b9fb9 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -302,7 +302,7 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
 	 * pending state of interrupt is latched in pending_latch variable.
 	 * Userspace will save and restore pending state and line_level
 	 * separately.
-	 * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.txt
+	 * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
 	 * for handling of ISPENDR and ICPENDR.
 	 */
 	for (i = 0; i < len * 8; i++) {
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index b2d73fc0d1ef..b2d73fc0d1ef 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h
index fefcca2b14dc..fefcca2b14dc 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/arch/arm64/kvm/vgic/vgic-mmio.h
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 621cc168fe3f..ebf53a4e1296 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -56,7 +56,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 
 	cpuif->vgic_hcr &= ~GICH_HCR_UIE;
 
-	for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
+	for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) {
 		u32 val = cpuif->vgic_lr[lr];
 		u32 cpuid, intid = val & GICH_LR_VIRTUALID;
 		struct vgic_irq *irq;
@@ -120,7 +120,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 		vgic_put_irq(vcpu->kvm, irq);
 	}
 
-	vgic_cpu->used_lrs = 0;
+	cpuif->used_lrs = 0;
 }
 
 /*
@@ -427,7 +427,7 @@ out:
 static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
 {
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	u64 used_lrs = cpu_if->used_lrs;
 	u64 elrsr;
 	int i;
 
@@ -448,7 +448,7 @@ static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
 void vgic_v2_save_state(struct kvm_vcpu *vcpu)
 {
 	void __iomem *base = kvm_vgic_global_state.vctrl_base;
-	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
 
 	if (!base)
 		return;
@@ -463,7 +463,7 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	void __iomem *base = kvm_vgic_global_state.vctrl_base;
-	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	u64 used_lrs = cpu_if->used_lrs;
 	int i;
 
 	if (!base)
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 2c9fc13e2c59..76e2d85789ed 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -39,7 +39,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 
 	cpuif->vgic_hcr &= ~ICH_HCR_UIE;
 
-	for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
+	for (lr = 0; lr < cpuif->used_lrs; lr++) {
 		u64 val = cpuif->vgic_lr[lr];
 		u32 intid, cpuid;
 		struct vgic_irq *irq;
@@ -111,7 +111,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 		vgic_put_irq(vcpu->kvm, irq);
 	}
 
-	vgic_cpu->used_lrs = 0;
+	cpuif->used_lrs = 0;
 }
 
 /* Requires the irq to be locked already */
@@ -587,7 +587,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
 	int ret;
 
 	/*
-	 * The ListRegs field is 5 bits, but there is a architectural
+	 * The ListRegs field is 5 bits, but there is an architectural
 	 * maximum of 16 list registers. Just ignore bit 4...
 	 */
 	kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
@@ -630,12 +630,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
 	if (kvm_vgic_global_state.vcpu_base == 0)
 		kvm_info("disabling GICv2 emulation\n");
 
-#ifdef CONFIG_ARM64
 	if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
 		group0_trap = true;
 		group1_trap = true;
 	}
-#endif
 
 	if (group0_trap || group1_trap || common_trap) {
 		kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n",
@@ -664,10 +662,10 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
 	if (likely(cpu_if->vgic_sre))
 		kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
 
-	kvm_call_hyp(__vgic_v3_restore_aprs, vcpu);
+	kvm_call_hyp(__vgic_v3_restore_aprs, kern_hyp_va(cpu_if));
 
 	if (has_vhe())
-		__vgic_v3_activate_traps(vcpu);
+		__vgic_v3_activate_traps(cpu_if);
 
 	WARN_ON(vgic_v4_load(vcpu));
 }
@@ -682,12 +680,14 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
 
 void vgic_v3_put(struct kvm_vcpu *vcpu)
 {
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
 	WARN_ON(vgic_v4_put(vcpu, false));
 
 	vgic_v3_vmcr_sync(vcpu);
 
-	kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
+	kvm_call_hyp(__vgic_v3_save_aprs, kern_hyp_va(cpu_if));
 
 	if (has_vhe())
-		__vgic_v3_deactivate_traps(vcpu);
+		__vgic_v3_deactivate_traps(cpu_if);
 }
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 27ac833e5ec7..27ac833e5ec7 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
diff --git a/virt/kvm/arm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 99b02ca730a8..c3643b7f101b 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -786,6 +786,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 	int count;
 	bool multi_sgi;
 	u8 prio = 0xff;
+	int i = 0;
 
 	lockdep_assert_held(&vgic_cpu->ap_list_lock);
 
@@ -827,11 +828,14 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	vcpu->arch.vgic_cpu.used_lrs = count;
-
 	/* Nuke remaining LRs */
-	for ( ; count < kvm_vgic_global_state.nr_lr; count++)
-		vgic_clear_lr(vcpu, count);
+	for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
+		vgic_clear_lr(vcpu, i);
+
+	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+		vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
+	else
+		vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
 }
 
 static inline bool can_access_vgic_from_kernel(void)
@@ -849,13 +853,13 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu)
 	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 		vgic_v2_save_state(vcpu);
 	else
-		__vgic_v3_save_state(vcpu);
+		__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
 }
 
 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	int used_lrs;
 
 	/* An empty ap_list_head implies used_lrs == 0 */
 	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
@@ -864,7 +868,12 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	if (can_access_vgic_from_kernel())
 		vgic_save_state(vcpu);
 
-	if (vgic_cpu->used_lrs)
+	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+		used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
+	else
+		used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
+
+	if (used_lrs)
 		vgic_fold_lr_state(vcpu);
 	vgic_prune_ap_list(vcpu);
 }
@@ -874,7 +883,7 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
 	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 		vgic_v2_restore_state(vcpu);
 	else
-		__vgic_v3_restore_state(vcpu);
+		__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
 }
 
 /* Flush our emulation state into the GIC hardware before entering the guest. */
diff --git a/virt/kvm/arm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 769e4802645e..64fcd7511110 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -42,7 +42,7 @@
 			    VGIC_AFFINITY_LEVEL(val, 3))
 
 /*
- * As per Documentation/virt/kvm/devices/arm-vgic-v3.txt,
+ * As per Documentation/virt/kvm/devices/arm-vgic-v3.rst,
  * below macros are defined for CPUREG encoding.
  */
 #define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK   0x000000000000c000
@@ -63,7 +63,7 @@
 				      KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
 
 /*
- * As per Documentation/virt/kvm/devices/arm-vgic-its.txt,
+ * As per Documentation/virt/kvm/devices/arm-vgic-its.rst,
  * below macros are defined for ITS table entry encoding.
  */
 #define KVM_ITS_CTE_VALID_SHIFT		63
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 8e25e89ad01f..0f8a3a9e3795 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -20,36 +20,36 @@
  *	x0 - bytes not copied
  */
 
-	.macro ldrb1 ptr, regB, val
-	uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
+	.macro ldrb1 reg, ptr, val
+	uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val
 	.endm
 
-	.macro strb1 ptr, regB, val
-	strb \ptr, [\regB], \val
+	.macro strb1 reg, ptr, val
+	strb \reg, [\ptr], \val
 	.endm
 
-	.macro ldrh1 ptr, regB, val
-	uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
+	.macro ldrh1 reg, ptr, val
+	uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val
 	.endm
 
-	.macro strh1 ptr, regB, val
-	strh \ptr, [\regB], \val
+	.macro strh1 reg, ptr, val
+	strh \reg, [\ptr], \val
 	.endm
 
-	.macro ldr1 ptr, regB, val
-	uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
+	.macro ldr1 reg, ptr, val
+	uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val
 	.endm
 
-	.macro str1 ptr, regB, val
-	str \ptr, [\regB], \val
+	.macro str1 reg, ptr, val
+	str \reg, [\ptr], \val
 	.endm
 
-	.macro ldp1 ptr, regB, regC, val
-	uao_ldp 9998f, \ptr, \regB, \regC, \val
+	.macro ldp1 reg1, reg2, ptr, val
+	uao_ldp 9998f, \reg1, \reg2, \ptr, \val
 	.endm
 
-	.macro stp1 ptr, regB, regC, val
-	stp \ptr, \regB, [\regC], \val
+	.macro stp1 reg1, reg2, ptr, val
+	stp \reg1, \reg2, [\ptr], \val
 	.endm
 
 end	.req	x5
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 667139013ed1..80e37ada0ee1 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -21,36 +21,36 @@
  * Returns:
  *	x0 - bytes not copied
  */
-	.macro ldrb1 ptr, regB, val
-	uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
+	.macro ldrb1 reg, ptr, val
+	uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val
 	.endm
 
-	.macro strb1 ptr, regB, val
-	uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
+	.macro strb1 reg, ptr, val
+	uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val
 	.endm
 
-	.macro ldrh1 ptr, regB, val
-	uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
+	.macro ldrh1 reg, ptr, val
+	uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val
 	.endm
 
-	.macro strh1 ptr, regB, val
-	uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
+	.macro strh1 reg, ptr, val
+	uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val
 	.endm
 
-	.macro ldr1 ptr, regB, val
-	uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
+	.macro ldr1 reg, ptr, val
+	uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val
 	.endm
 
-	.macro str1 ptr, regB, val
-	uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
+	.macro str1 reg, ptr, val
+	uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val
 	.endm
 
-	.macro ldp1 ptr, regB, regC, val
-	uao_ldp 9998f, \ptr, \regB, \regC, \val
+	.macro ldp1 reg1, reg2, ptr, val
+	uao_ldp 9998f, \reg1, \reg2, \ptr, \val
 	.endm
 
-	.macro stp1 ptr, regB, regC, val
-	uao_stp 9998f, \ptr, \regB, \regC, \val
+	.macro stp1 reg1, reg2, ptr, val
+	uao_stp 9998f, \reg1, \reg2, \ptr, \val
 	.endm
 
 end	.req	x5
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 1a104d0089f3..4ec59704b8f2 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -19,36 +19,36 @@
  * Returns:
  *	x0 - bytes not copied
  */
-	.macro ldrb1 ptr, regB, val
-	ldrb  \ptr, [\regB], \val
+	.macro ldrb1 reg, ptr, val
+	ldrb  \reg, [\ptr], \val
 	.endm
 
-	.macro strb1 ptr, regB, val
-	uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
+	.macro strb1 reg, ptr, val
+	uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val
 	.endm
 
-	.macro ldrh1 ptr, regB, val
-	ldrh  \ptr, [\regB], \val
+	.macro ldrh1 reg, ptr, val
+	ldrh  \reg, [\ptr], \val
 	.endm
 
-	.macro strh1 ptr, regB, val
-	uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
+	.macro strh1 reg, ptr, val
+	uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val
 	.endm
 
-	.macro ldr1 ptr, regB, val
-	ldr \ptr, [\regB], \val
+	.macro ldr1 reg, ptr, val
+	ldr \reg, [\ptr], \val
 	.endm
 
-	.macro str1 ptr, regB, val
-	uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
+	.macro str1 reg, ptr, val
+	uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val
 	.endm
 
-	.macro ldp1 ptr, regB, regC, val
-	ldp \ptr, \regB, [\regC], \val
+	.macro ldp1 reg1, reg2, ptr, val
+	ldp \reg1, \reg2, [\ptr], \val
 	.endm
 
-	.macro stp1 ptr, regB, regC, val
-	uao_stp 9998f, \ptr, \regB, \regC, \val
+	.macro stp1 reg1, reg2, ptr, val
+	uao_stp 9998f, \reg1, \reg2, \ptr, \val
 	.endm
 
 end	.req	x5
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
index 243e107e9896..0f9e10ecda23 100644
--- a/arch/arm64/lib/crc32.S
+++ b/arch/arm64/lib/crc32.S
@@ -9,7 +9,7 @@
 #include <asm/alternative.h>
 #include <asm/assembler.h>
 
-	.cpu		generic+crc
+	.arch		armv8-a+crc
 
 	.macro		__crc32, c
 	cmp		x2, #16
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 9f382adfa88a..e0bf83d556f2 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -24,36 +24,36 @@
  * Returns:
  *	x0 - dest
  */
-	.macro ldrb1 ptr, regB, val
-	ldrb  \ptr, [\regB], \val
+	.macro ldrb1 reg, ptr, val
+	ldrb  \reg, [\ptr], \val
 	.endm
 
-	.macro strb1 ptr, regB, val
-	strb \ptr, [\regB], \val
+	.macro strb1 reg, ptr, val
+	strb \reg, [\ptr], \val
 	.endm
 
-	.macro ldrh1 ptr, regB, val
-	ldrh  \ptr, [\regB], \val
+	.macro ldrh1 reg, ptr, val
+	ldrh  \reg, [\ptr], \val
 	.endm
 
-	.macro strh1 ptr, regB, val
-	strh \ptr, [\regB], \val
+	.macro strh1 reg, ptr, val
+	strh \reg, [\ptr], \val
 	.endm
 
-	.macro ldr1 ptr, regB, val
-	ldr \ptr, [\regB], \val
+	.macro ldr1 reg, ptr, val
+	ldr \reg, [\ptr], \val
 	.endm
 
-	.macro str1 ptr, regB, val
-	str \ptr, [\regB], \val
+	.macro str1 reg, ptr, val
+	str \reg, [\ptr], \val
 	.endm
 
-	.macro ldp1 ptr, regB, regC, val
-	ldp \ptr, \regB, [\regC], \val
+	.macro ldp1 reg1, reg2, ptr, val
+	ldp \reg1, \reg2, [\ptr], \val
 	.endm
 
-	.macro stp1 ptr, regB, regC, val
-	stp \ptr, \regB, [\regC], \val
+	.macro stp1 reg1, reg2, ptr, val
+	stp \reg1, \reg2, [\ptr], \val
 	.endm
 
 	.weak memcpy
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 9b26f9a88724..d702d60e64da 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -92,6 +92,9 @@ static void set_reserved_asid_bits(void)
 		bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 }
 
+#define asid_gen_match(asid) \
+	(!(((asid) ^ atomic64_read(&asid_generation)) >> asid_bits))
+
 static void flush_context(void)
 {
 	int i;
@@ -220,8 +223,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 	 *   because atomic RmWs are totally ordered for a given location.
 	 */
 	old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
-	if (old_active_asid &&
-	    !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) &&
+	if (old_active_asid && asid_gen_match(asid) &&
 	    atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
 				     old_active_asid, asid))
 		goto switch_mm_fastpath;
@@ -229,7 +231,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
 	/* Check that our ASID belongs to the current generation. */
 	asid = atomic64_read(&mm->context.id);
-	if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) {
+	if (!asid_gen_match(asid)) {
 		asid = new_context(mm);
 		atomic64_set(&mm->context.id, asid);
 	}
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 860c00ec8bd3..0da020c563e6 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -146,6 +146,11 @@ static const struct prot_bits pte_bits[] = {
 		.set	= "UXN",
 		.clear	= "   ",
 	}, {
+		.mask	= PTE_GP,
+		.val	= PTE_GP,
+		.set	= "GP",
+		.clear	= "  ",
+	}, {
 		.mask	= PTE_ATTRINDX_MASK,
 		.val	= PTE_ATTRINDX(MT_DEVICE_nGnRnE),
 		.set	= "DEVICE/nGnRnE",
@@ -247,7 +252,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 }
 
 static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
-		      unsigned long val)
+		      u64 val)
 {
 	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
 	static const char units[] = "KMGTPE";
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c9cedc0432d2..dff2d72b0883 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -635,11 +635,13 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 
 	inf = esr_to_fault_info(esr);
 
-	/*
-	 * Return value ignored as we rely on signal merging.
-	 * Future patches will make this more robust.
-	 */
-	apei_claim_sea(regs);
+	if (user_mode(regs) && apei_claim_sea(regs) == 0) {
+		/*
+		 * APEI claimed this as a firmware-first notification.
+		 * Some processing deferred to task_work before ret_to_user().
+		 */
+		return 0;
+	}
 
 	if (esr & ESR_ELx_FnV)
 		siaddr = NULL;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index e42727e3568e..d2df416b840e 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -272,7 +272,7 @@ int pfn_valid(unsigned long pfn)
 	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
 		return 0;
 
-	if (!valid_section(__nr_to_section(pfn_to_section_nr(pfn))))
+	if (!valid_section(__pfn_to_section(pfn)))
 		return 0;
 #endif
 	return memblock_is_map_memory(addr);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index a374e4f51a62..c299b73dd5e4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -610,6 +610,22 @@ core_initcall(map_entry_trampoline);
 #endif
 
 /*
+ * Open coded check for BTI, only for use to determine configuration
+ * for early mappings for before the cpufeature code has run.
+ */
+static bool arm64_early_this_cpu_has_bti(void)
+{
+	u64 pfr1;
+
+	if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+		return false;
+
+	pfr1 = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
+	return cpuid_feature_extract_unsigned_field(pfr1,
+						    ID_AA64PFR1_BT_SHIFT);
+}
+
+/*
  * Create fine-grained mappings for the kernel.
  */
 static void __init map_kernel(pgd_t *pgdp)
@@ -625,6 +641,14 @@ static void __init map_kernel(pgd_t *pgdp)
 	pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
 
 	/*
+	 * If we have a CPU that supports BTI and a kernel built for
+	 * BTI then mark the kernel executable text as guarded pages
+	 * now so we don't have to rewrite the page tables later.
+	 */
+	if (arm64_early_this_cpu_has_bti())
+		text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
+
+	/*
 	 * Only rodata will be remapped with different permissions later on,
 	 * all other segments are allowed to use contiguous mappings.
 	 */
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 250c49008d73..bde08090b838 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -126,13 +126,13 @@ int set_memory_nx(unsigned long addr, int numpages)
 {
 	return change_memory_common(addr, numpages,
 					__pgprot(PTE_PXN),
-					__pgprot(0));
+					__pgprot(PTE_MAYBE_GP));
 }
 
 int set_memory_x(unsigned long addr, int numpages)
 {
 	return change_memory_common(addr, numpages,
-					__pgprot(0),
+					__pgprot(PTE_MAYBE_GP),
 					__pgprot(PTE_PXN));
 }
 
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 197a9ba2d5ea..b7bebb12a56d 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -58,6 +58,8 @@
  * cpu_do_suspend - save CPU registers context
  *
  * x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
  */
 SYM_FUNC_START(cpu_do_suspend)
 	mrs	x2, tpidr_el0
@@ -82,6 +84,11 @@ alternative_endif
 	stp	x8, x9, [x0, #48]
 	stp	x10, x11, [x0, #64]
 	stp	x12, x13, [x0, #80]
+	/*
+	 * Save x18 as it may be used as a platform register, e.g. by shadow
+	 * call stack.
+	 */
+	str	x18, [x0, #96]
 	ret
 SYM_FUNC_END(cpu_do_suspend)
 
@@ -98,6 +105,13 @@ SYM_FUNC_START(cpu_do_resume)
 	ldp	x9, x10, [x0, #48]
 	ldp	x11, x12, [x0, #64]
 	ldp	x13, x14, [x0, #80]
+	/*
+	 * Restore x18, as it may be used as a platform register, and clear
+	 * the buffer to minimize the risk of exposure when used for shadow
+	 * call stack.
+	 */
+	ldr	x18, [x0, #96]
+	str	xzr, [x0, #96]
 	msr	tpidr_el0, x2
 	msr	tpidrro_el0, x3
 	msr	contextidr_el1, x4
@@ -139,7 +153,7 @@ alternative_if ARM64_HAS_RAS_EXTN
 	msr_s	SYS_DISR_EL1, xzr
 alternative_else_nop_endif
 
-	ptrauth_keys_install_kernel x14, 0, x1, x2, x3
+	ptrauth_keys_install_kernel_nosync x14, x1, x2, x3
 	isb
 	ret
 SYM_FUNC_END(cpu_do_resume)
@@ -386,8 +400,6 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings)
  *
  *	Initialise the processor for turning the MMU on.
  *
- * Input:
- *	x0 with a flag ARM64_CPU_BOOT_PRIMARY/ARM64_CPU_BOOT_SECONDARY/ARM64_CPU_RUNTIME.
  * Output:
  *	Return in x0 the value of the SCTLR_EL1 register.
  */
@@ -446,51 +458,9 @@ SYM_FUNC_START(__cpu_setup)
 1:
 #endif	/* CONFIG_ARM64_HW_AFDBM */
 	msr	tcr_el1, x10
-	mov	x1, x0
 	/*
 	 * Prepare SCTLR
 	 */
 	mov_q	x0, SCTLR_EL1_SET
-
-#ifdef CONFIG_ARM64_PTR_AUTH
-	/* No ptrauth setup for run time cpus */
-	cmp	x1, #ARM64_CPU_RUNTIME
-	b.eq	3f
-
-	/* Check if the CPU supports ptrauth */
-	mrs	x2, id_aa64isar1_el1
-	ubfx	x2, x2, #ID_AA64ISAR1_APA_SHIFT, #8
-	cbz	x2, 3f
-
-	/*
-	 * The primary cpu keys are reset here and can be
-	 * re-initialised with some proper values later.
-	 */
-	msr_s	SYS_APIAKEYLO_EL1, xzr
-	msr_s	SYS_APIAKEYHI_EL1, xzr
-
-	/* Just enable ptrauth for primary cpu */
-	cmp	x1, #ARM64_CPU_BOOT_PRIMARY
-	b.eq	2f
-
-	/* if !system_supports_address_auth() then skip enable */
-alternative_if_not ARM64_HAS_ADDRESS_AUTH
-	b	3f
-alternative_else_nop_endif
-
-	/* Install ptrauth key for secondary cpus */
-	adr_l	x2, secondary_data
-	ldr	x3, [x2, #CPU_BOOT_TASK]	// get secondary_data.task
-	cbz	x3, 2f				// check for slow booting cpus
-	ldp	x3, x4, [x2, #CPU_BOOT_PTRAUTH_KEY]
-	msr_s	SYS_APIAKEYLO_EL1, x3
-	msr_s	SYS_APIAKEYHI_EL1, x4
-
-2:	/* Enable ptrauth instructions */
-	ldr	x2, =SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
-		     SCTLR_ELx_ENDA | SCTLR_ELx_ENDB
-	orr	x0, x0, x2
-3:
-#endif
 	ret					// return to head.S
 SYM_FUNC_END(__cpu_setup)
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index eb73f9f72c46..cc0cf0f5c7c3 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -100,6 +100,14 @@
 /* Rd = Rn OP imm12 */
 #define A64_ADD_I(sf, Rd, Rn, imm12) A64_ADDSUB_IMM(sf, Rd, Rn, imm12, ADD)
 #define A64_SUB_I(sf, Rd, Rn, imm12) A64_ADDSUB_IMM(sf, Rd, Rn, imm12, SUB)
+#define A64_ADDS_I(sf, Rd, Rn, imm12) \
+	A64_ADDSUB_IMM(sf, Rd, Rn, imm12, ADD_SETFLAGS)
+#define A64_SUBS_I(sf, Rd, Rn, imm12) \
+	A64_ADDSUB_IMM(sf, Rd, Rn, imm12, SUB_SETFLAGS)
+/* Rn + imm12; set condition flags */
+#define A64_CMN_I(sf, Rn, imm12) A64_ADDS_I(sf, A64_ZR, Rn, imm12)
+/* Rn - imm12; set condition flags */
+#define A64_CMP_I(sf, Rn, imm12) A64_SUBS_I(sf, A64_ZR, Rn, imm12)
 /* Rd = Rn */
 #define A64_MOV(sf, Rd, Rn) A64_ADD_I(sf, Rd, Rn, 0)
 
@@ -189,4 +197,26 @@
 /* Rn & Rm; set condition flags */
 #define A64_TST(sf, Rn, Rm) A64_ANDS(sf, A64_ZR, Rn, Rm)
 
+/* Logical (immediate) */
+#define A64_LOGIC_IMM(sf, Rd, Rn, imm, type) ({ \
+	u64 imm64 = (sf) ? (u64)imm : (u64)(u32)imm; \
+	aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_##type, \
+		A64_VARIANT(sf), Rn, Rd, imm64); \
+})
+/* Rd = Rn OP imm */
+#define A64_AND_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, AND)
+#define A64_ORR_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, ORR)
+#define A64_EOR_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, EOR)
+#define A64_ANDS_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, AND_SETFLAGS)
+/* Rn & imm; set condition flags */
+#define A64_TST_I(sf, Rn, imm) A64_ANDS_I(sf, A64_ZR, Rn, imm)
+
+/* HINTs */
+#define A64_HINT(x) aarch64_insn_gen_hint(x)
+
+/* BTI */
+#define A64_BTI_C  A64_HINT(AARCH64_INSN_HINT_BTIC)
+#define A64_BTI_J  A64_HINT(AARCH64_INSN_HINT_BTIJ)
+#define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC)
+
 #endif /* _BPF_JIT_H */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index cdc79de0c794..3cb25b43b368 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -167,11 +167,21 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
 	return to - from;
 }
 
+static bool is_addsub_imm(u32 imm)
+{
+	/* Either imm12 or shifted imm12. */
+	return !(imm & ~0xfff) || !(imm & ~0xfff000);
+}
+
 /* Stack must be multiples of 16B */
 #define STACK_ALIGN(sz) (((sz) + 15) & ~15)
 
 /* Tail call offset to jump into */
+#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
+#define PROLOGUE_OFFSET 8
+#else
 #define PROLOGUE_OFFSET 7
+#endif
 
 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 {
@@ -208,6 +218,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 	 *
 	 */
 
+	/* BTI landing pad */
+	if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+		emit(A64_BTI_C, ctx);
+
 	/* Save FP and LR registers to stay align with ARM64 AAPCS */
 	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
 	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
@@ -230,6 +244,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 				    cur_offset, PROLOGUE_OFFSET);
 			return -1;
 		}
+
+		/* BTI landing pad for the tail call, done with a BR */
+		if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+			emit(A64_BTI_J, ctx);
 	}
 
 	ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);
@@ -356,6 +374,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	const bool isdw = BPF_SIZE(code) == BPF_DW;
 	u8 jmp_cond, reg;
 	s32 jmp_offset;
+	u32 a64_insn;
 
 #define check_imm(bits, imm) do {				\
 	if ((((imm) > 0) && ((imm) >> (bits))) ||		\
@@ -478,28 +497,55 @@ emit_bswap_uxt:
 	/* dst = dst OP imm */
 	case BPF_ALU | BPF_ADD | BPF_K:
 	case BPF_ALU64 | BPF_ADD | BPF_K:
-		emit_a64_mov_i(is64, tmp, imm, ctx);
-		emit(A64_ADD(is64, dst, dst, tmp), ctx);
+		if (is_addsub_imm(imm)) {
+			emit(A64_ADD_I(is64, dst, dst, imm), ctx);
+		} else if (is_addsub_imm(-imm)) {
+			emit(A64_SUB_I(is64, dst, dst, -imm), ctx);
+		} else {
+			emit_a64_mov_i(is64, tmp, imm, ctx);
+			emit(A64_ADD(is64, dst, dst, tmp), ctx);
+		}
 		break;
 	case BPF_ALU | BPF_SUB | BPF_K:
 	case BPF_ALU64 | BPF_SUB | BPF_K:
-		emit_a64_mov_i(is64, tmp, imm, ctx);
-		emit(A64_SUB(is64, dst, dst, tmp), ctx);
+		if (is_addsub_imm(imm)) {
+			emit(A64_SUB_I(is64, dst, dst, imm), ctx);
+		} else if (is_addsub_imm(-imm)) {
+			emit(A64_ADD_I(is64, dst, dst, -imm), ctx);
+		} else {
+			emit_a64_mov_i(is64, tmp, imm, ctx);
+			emit(A64_SUB(is64, dst, dst, tmp), ctx);
+		}
 		break;
 	case BPF_ALU | BPF_AND | BPF_K:
 	case BPF_ALU64 | BPF_AND | BPF_K:
-		emit_a64_mov_i(is64, tmp, imm, ctx);
-		emit(A64_AND(is64, dst, dst, tmp), ctx);
+		a64_insn = A64_AND_I(is64, dst, dst, imm);
+		if (a64_insn != AARCH64_BREAK_FAULT) {
+			emit(a64_insn, ctx);
+		} else {
+			emit_a64_mov_i(is64, tmp, imm, ctx);
+			emit(A64_AND(is64, dst, dst, tmp), ctx);
+		}
 		break;
 	case BPF_ALU | BPF_OR | BPF_K:
 	case BPF_ALU64 | BPF_OR | BPF_K:
-		emit_a64_mov_i(is64, tmp, imm, ctx);
-		emit(A64_ORR(is64, dst, dst, tmp), ctx);
+		a64_insn = A64_ORR_I(is64, dst, dst, imm);
+		if (a64_insn != AARCH64_BREAK_FAULT) {
+			emit(a64_insn, ctx);
+		} else {
+			emit_a64_mov_i(is64, tmp, imm, ctx);
+			emit(A64_ORR(is64, dst, dst, tmp), ctx);
+		}
 		break;
 	case BPF_ALU | BPF_XOR | BPF_K:
 	case BPF_ALU64 | BPF_XOR | BPF_K:
-		emit_a64_mov_i(is64, tmp, imm, ctx);
-		emit(A64_EOR(is64, dst, dst, tmp), ctx);
+		a64_insn = A64_EOR_I(is64, dst, dst, imm);
+		if (a64_insn != AARCH64_BREAK_FAULT) {
+			emit(a64_insn, ctx);
+		} else {
+			emit_a64_mov_i(is64, tmp, imm, ctx);
+			emit(A64_EOR(is64, dst, dst, tmp), ctx);
+		}
 		break;
 	case BPF_ALU | BPF_MUL | BPF_K:
 	case BPF_ALU64 | BPF_MUL | BPF_K:
@@ -623,13 +669,24 @@ emit_cond_jmp:
 	case BPF_JMP32 | BPF_JSLT | BPF_K:
 	case BPF_JMP32 | BPF_JSGE | BPF_K:
 	case BPF_JMP32 | BPF_JSLE | BPF_K:
-		emit_a64_mov_i(is64, tmp, imm, ctx);
-		emit(A64_CMP(is64, dst, tmp), ctx);
+		if (is_addsub_imm(imm)) {
+			emit(A64_CMP_I(is64, dst, imm), ctx);
+		} else if (is_addsub_imm(-imm)) {
+			emit(A64_CMN_I(is64, dst, -imm), ctx);
+		} else {
+			emit_a64_mov_i(is64, tmp, imm, ctx);
+			emit(A64_CMP(is64, dst, tmp), ctx);
+		}
 		goto emit_cond_jmp;
 	case BPF_JMP | BPF_JSET | BPF_K:
 	case BPF_JMP32 | BPF_JSET | BPF_K:
-		emit_a64_mov_i(is64, tmp, imm, ctx);
-		emit(A64_TST(is64, dst, tmp), ctx);
+		a64_insn = A64_TST_I(is64, dst, imm);
+		if (a64_insn != AARCH64_BREAK_FAULT) {
+			emit(a64_insn, ctx);
+		} else {
+			emit_a64_mov_i(is64, tmp, imm, ctx);
+			emit(A64_TST(is64, dst, tmp), ctx);
+		}
 		goto emit_cond_jmp;
 	/* function call */
 	case BPF_JMP | BPF_CALL:
diff --git a/arch/c6x/lib/checksum.c b/arch/c6x/lib/checksum.c
index 46940844c553..335ca4900808 100644
--- a/arch/c6x/lib/checksum.c
+++ b/arch/c6x/lib/checksum.c
@@ -4,28 +4,6 @@
 #include <linux/module.h>
 #include <net/checksum.h>
 
-#include <asm/byteorder.h>
-
-/*
- * copy from fs while checksumming, otherwise like csum_partial
- */
-__wsum
-csum_partial_copy_from_user(const void __user *src, void *dst, int len,
-			    __wsum sum, int *csum_err)
-{
-	int missing;
-
-	missing = __copy_from_user(dst, src, len);
-	if (missing) {
-		memset(dst + len - missing, 0, missing);
-		*csum_err = -EFAULT;
-	} else
-		*csum_err = 0;
-
-	return csum_partial(dst, len, sum);
-}
-EXPORT_SYMBOL(csum_partial_copy_from_user);
-
 /* These are from csum_64plus.S */
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(csum_partial_copy);
diff --git a/arch/ia64/include/asm/checksum.h b/arch/ia64/include/asm/checksum.h
index 0ed18bc3f6cf..2a1c64629cdc 100644
--- a/arch/ia64/include/asm/checksum.h
+++ b/arch/ia64/include/asm/checksum.h
@@ -37,16 +37,6 @@ extern __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
  */
 extern __wsum csum_partial(const void *buff, int len, __wsum sum);
 
-/*
- * Same as csum_partial, but copies from src while it checksums.
- *
- * Here it is even more important to align src and dst on a 32-bit (or
- * even better 64-bit) boundary.
- */
-extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
-						 int len, __wsum sum,
-						 int *errp);
-
 extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
 					       int len, __wsum sum);
 
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index f69f3fe0532e..a54eacbc61a9 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -57,12 +57,12 @@ unsigned long hcdp_phys = EFI_INVALID_TABLE_ADDR;
 unsigned long sal_systab_phys = EFI_INVALID_TABLE_ADDR;
 
 static const efi_config_table_type_t arch_tables[] __initconst = {
-	{ESI_TABLE_GUID, "ESI", &esi_phys},
-	{HCDP_TABLE_GUID, "HCDP", &hcdp_phys},
-	{MPS_TABLE_GUID, "MPS", &mps_phys},
-	{PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, "PALO", &palo_phys},
-	{SAL_SYSTEM_TABLE_GUID, "SALsystab", &sal_systab_phys},
-	{NULL_GUID, NULL, 0},
+	{ESI_TABLE_GUID,				&esi_phys,		"ESI"		},
+	{HCDP_TABLE_GUID,				&hcdp_phys,		"HCDP"		},
+	{MPS_TABLE_GUID,				&mps_phys,		"MPS"		},
+	{PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID,	&palo_phys,		"PALO"		},
+	{SAL_SYSTEM_TABLE_GUID,				&sal_systab_phys,	"SALsystab"	},
+	{},
 };
 
 extern efi_status_t efi_call_phys (void *, ...);
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 042911e670b8..49e325b604b3 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -358,3 +358,4 @@
 # 435 reserved for clone3
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c
index bf9396b1ed32..6e82e0be8040 100644
--- a/arch/ia64/lib/csum_partial_copy.c
+++ b/arch/ia64/lib/csum_partial_copy.c
@@ -12,7 +12,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 
-#include <linux/uaccess.h>
+#include <net/checksum.h>
 
 /*
  * XXX Fixme: those 2 inlines are meant for debugging and will go away
@@ -103,39 +103,11 @@ out:
  * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS.
  * But it's very tricky to get right even in C.
  */
-extern unsigned long do_csum(const unsigned char *, long);
-
-__wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
-				int len, __wsum psum, int *errp)
-{
-	unsigned long result;
-
-	/* XXX Fixme
-	 * for now we separate the copy from checksum for obvious
-	 * alignment difficulties. Look at the Alpha code and you'll be
-	 * scared.
-	 */
-
-	if (__copy_from_user(dst, src, len) != 0 && errp)
-		*errp = -EFAULT;
-
-	result = do_csum(dst, len);
-
-	/* add in old sum, and carry.. */
-	result += (__force u32)psum;
-	/* 32+c bits -> 32 bits */
-	result = (result & 0xffffffff) + (result >> 32);
-	return (__force __wsum)result;
-}
-
-EXPORT_SYMBOL(csum_partial_copy_from_user);
-
 __wsum
 csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
 {
-	return csum_partial_copy_from_user((__force const void __user *)src,
-					   dst, len, sum, NULL);
+	memcpy(dst, src, len);
+	return csum_partial(dst, len, sum);
 }
 
 EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index c32ab8041cf6..4eb911d64e8d 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -221,6 +221,7 @@ static void __init amiga_identify(void)
 	case AMI_1200:
 		AMIGAHW_SET(A1200_IDE);
 		AMIGAHW_SET(PCMCIA);
+		fallthrough;
 	case AMI_500:
 	case AMI_500PLUS:
 	case AMI_1000:
@@ -233,7 +234,7 @@ static void __init amiga_identify(void)
 	case AMI_3000T:
 		AMIGAHW_SET(AMBER_FF);
 		AMIGAHW_SET(MAGIC_REKICK);
-		/* fall through */
+		fallthrough;
 	case AMI_3000PLUS:
 		AMIGAHW_SET(A3000_SCSI);
 		AMIGAHW_SET(A3000_CLK);
@@ -242,7 +243,7 @@ static void __init amiga_identify(void)
 
 	case AMI_4000T:
 		AMIGAHW_SET(A4000_SCSI);
-		/* fall through */
+		fallthrough;
 	case AMI_4000:
 		AMIGAHW_SET(A4000_IDE);
 		AMIGAHW_SET(A3000_CLK);
@@ -628,7 +629,7 @@ struct savekmsg {
 	unsigned long magic2;		/* SAVEKMSG_MAGIC2 */
 	unsigned long magicptr;		/* address of magic1 */
 	unsigned long size;
-	char data[0];
+	char data[];
 };
 
 static struct savekmsg *savekmsg;
diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c
index b4103b6bfdeb..9ef4ec0aea00 100644
--- a/arch/m68k/coldfire/device.c
+++ b/arch/m68k/coldfire/device.c
@@ -22,6 +22,7 @@
 #include <asm/mcfqspi.h>
 #include <linux/platform_data/edma.h>
 #include <linux/platform_data/dma-mcf-edma.h>
+#include <linux/platform_data/mmc-esdhc-mcf.h>
 
 /*
  *	All current ColdFire parts contain from 2, 3, 4 or 10 UARTS.
@@ -551,9 +552,35 @@ static struct platform_device mcf_edma = {
 		.platform_data = &mcf_edma_data,
 	}
 };
-
 #endif /* IS_ENABLED(CONFIG_MCF_EDMA) */
 
+#if IS_ENABLED(CONFIG_MMC)
+static struct mcf_esdhc_platform_data mcf_esdhc_data = {
+	.max_bus_width = 4,
+	.cd_type = ESDHC_CD_NONE,
+};
+
+static struct resource mcf_esdhc_resources[] = {
+	{
+		.start = MCFSDHC_BASE,
+		.end = MCFSDHC_BASE + MCFSDHC_SIZE - 1,
+		.flags = IORESOURCE_MEM,
+	}, {
+		.start = MCF_IRQ_SDHC,
+		.end = MCF_IRQ_SDHC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mcf_esdhc = {
+	.name			= "sdhci-esdhc-mcf",
+	.id			= 0,
+	.num_resources		= ARRAY_SIZE(mcf_esdhc_resources),
+	.resource		= mcf_esdhc_resources,
+	.dev.platform_data	= &mcf_esdhc_data,
+};
+#endif /* IS_ENABLED(CONFIG_MMC) */
+
 static struct platform_device *mcf_devices[] __initdata = {
 	&mcf_uart,
 #if IS_ENABLED(CONFIG_FEC)
@@ -586,6 +613,9 @@ static struct platform_device *mcf_devices[] __initdata = {
 #if IS_ENABLED(CONFIG_MCF_EDMA)
 	&mcf_edma,
 #endif
+#if IS_ENABLED(CONFIG_MMC)
+	&mcf_esdhc,
+#endif
 };
 
 /*
@@ -614,4 +644,3 @@ static int __init mcf_init_devices(void)
 }
 
 arch_initcall(mcf_init_devices);
-
diff --git a/arch/m68k/coldfire/m5441x.c b/arch/m68k/coldfire/m5441x.c
index 5bd24c9b865d..1e5259a652d1 100644
--- a/arch/m68k/coldfire/m5441x.c
+++ b/arch/m68k/coldfire/m5441x.c
@@ -52,7 +52,7 @@ DEFINE_CLK(0, "mcfssi.0", 47, MCF_CLK);
 DEFINE_CLK(0, "pll.0", 48, MCF_CLK);
 DEFINE_CLK(0, "mcfrng.0", 49, MCF_CLK);
 DEFINE_CLK(0, "mcfssi.1", 50, MCF_CLK);
-DEFINE_CLK(0, "mcfsdhc.0", 51, MCF_CLK);
+DEFINE_CLK(0, "sdhci-esdhc-mcf.0", 51, MCF_CLK);
 DEFINE_CLK(0, "enet-fec.0", 53, MCF_CLK);
 DEFINE_CLK(0, "enet-fec.1", 54, MCF_CLK);
 DEFINE_CLK(0, "switch.0", 55, MCF_CLK);
@@ -74,6 +74,10 @@ DEFINE_CLK(1, "mcfpwm.0", 34, MCF_BUSCLK);
 DEFINE_CLK(1, "sys.0", 36, MCF_BUSCLK);
 DEFINE_CLK(1, "gpio.0", 37, MCF_BUSCLK);
 
+DEFINE_CLK(2, "ipg.0", 0, MCF_CLK);
+DEFINE_CLK(2, "ahb.0", 1, MCF_CLK);
+DEFINE_CLK(2, "per.0", 2, MCF_CLK);
+
 struct clk *mcf_clks[] = {
 	&__clk_0_2,
 	&__clk_0_8,
@@ -131,6 +135,11 @@ struct clk *mcf_clks[] = {
 	&__clk_1_34,
 	&__clk_1_36,
 	&__clk_1_37,
+
+	&__clk_2_0,
+	&__clk_2_1,
+	&__clk_2_2,
+
 	NULL,
 };
 
@@ -151,6 +160,7 @@ static struct clk * const enable_clks[] __initconst = {
 	&__clk_0_33, /* pit.1 */
 	&__clk_0_37, /* eport */
 	&__clk_0_48, /* pll */
+	&__clk_0_51, /* esdhc */
 
 	&__clk_1_36, /* CCM/reset module/Power management */
 	&__clk_1_37, /* gpio */
@@ -194,6 +204,21 @@ static struct clk * const disable_clks[] __initconst = {
 	&__clk_1_29, /* uart 9 */
 };
 
+static void __clk_enable2(struct clk *clk)
+{
+	__raw_writel(__raw_readl(MCFSDHC_CLK) | (1 << clk->slot), MCFSDHC_CLK);
+}
+
+static void __clk_disable2(struct clk *clk)
+{
+	__raw_writel(__raw_readl(MCFSDHC_CLK) & ~(1 << clk->slot), MCFSDHC_CLK);
+}
+
+struct clk_ops clk_ops2 = {
+	.enable		= __clk_enable2,
+	.disable	= __clk_disable2,
+};
+
 static void __init m5441x_clk_init(void)
 {
 	unsigned i;
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 5b3a273ae3da..888b75e7fd79 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -100,7 +100,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -381,6 +380,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -452,6 +452,7 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_MSM6242=m
 CONFIG_RTC_DRV_RP5C01=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -472,6 +473,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -619,9 +621,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 0bf0907a7c80..45303846b659 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -96,7 +96,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -360,6 +359,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -408,6 +408,7 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -428,6 +429,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -575,9 +577,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 876e69292294..de824c1bc3d3 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -103,7 +103,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -376,6 +375,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -430,6 +430,7 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -450,6 +451,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -597,9 +599,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index aa59c242e715..071839ca6a59 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -93,7 +93,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -358,6 +357,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -401,6 +401,7 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -421,6 +422,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -568,9 +570,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 308cd93929a9..37ac7b019ec1 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -95,7 +95,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -359,6 +358,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -410,6 +410,7 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -430,6 +431,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -577,9 +579,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 0bc210ace870..608779866260 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -94,7 +94,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -375,6 +374,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -432,6 +432,7 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -452,6 +453,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -599,9 +601,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 3b3b832dee80..0abb53c38c20 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -114,7 +114,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -419,6 +418,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -518,6 +518,7 @@ CONFIG_RTC_DRV_MSM6242=m
 CONFIG_RTC_DRV_RP5C01=m
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -538,6 +539,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -685,9 +687,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index e3633c66926f..cb14c234d3ad 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -92,7 +92,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -357,6 +356,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -400,6 +400,7 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -420,6 +421,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -567,9 +569,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 88b3f7f9f146..e8a1920aded7 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -93,7 +93,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -358,6 +357,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -401,6 +401,7 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -421,6 +422,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -568,9 +570,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 3dd5b536921e..2cbf416fc725 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -94,7 +94,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -365,6 +364,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -419,6 +419,7 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -439,6 +440,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -586,9 +588,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 715e015ed270..fed3cc7abcc4 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -90,7 +90,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -355,6 +354,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -403,6 +403,7 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -423,6 +424,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -570,8 +572,10 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index f9ff129ac7c2..0954fde256e6 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -90,7 +90,6 @@ CONFIG_NF_CONNTRACK_SANE=m
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_SET=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
@@ -355,6 +354,7 @@ CONFIG_IPVLAN=m
 CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_BAREUDP=m
 CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
@@ -402,6 +402,7 @@ CONFIG_RTC_CLASS=y
 # CONFIG_RTC_NVMEM is not set
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
 CONFIG_EXT4_FS=y
@@ -422,6 +423,7 @@ CONFIG_ZISOFS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_CHILDREN=y
 CONFIG_TMPFS=y
@@ -569,9 +571,11 @@ CONFIG_XZ_DEC_TEST=m
 CONFIG_STRING_SELFTEST=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_WW_MUTEX_SELFTEST=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
diff --git a/arch/m68k/include/asm/checksum.h b/arch/m68k/include/asm/checksum.h
index f9b94e4b94f9..3f2c15d6f18c 100644
--- a/arch/m68k/include/asm/checksum.h
+++ b/arch/m68k/include/asm/checksum.h
@@ -30,7 +30,8 @@ __wsum csum_partial(const void *buff, int len, __wsum sum);
  * better 64-bit) boundary
  */
 
-extern __wsum csum_partial_copy_from_user(const void __user *src,
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+extern __wsum csum_and_copy_from_user(const void __user *src,
 						void *dst,
 						int len, __wsum sum,
 						int *csum_err);
diff --git a/arch/m68k/include/asm/floppy.h b/arch/m68k/include/asm/floppy.h
index c3b9ad6732fc..a4d0fea47c6b 100644
--- a/arch/m68k/include/asm/floppy.h
+++ b/arch/m68k/include/asm/floppy.h
@@ -63,21 +63,21 @@ static __inline__ void release_dma_lock(unsigned long flags)
 }
 
 
-static __inline__ unsigned char fd_inb(int port)
+static __inline__ unsigned char fd_inb(int base, int reg)
 {
 	if(MACH_IS_Q40)
-		return inb_p(port);
+		return inb_p(base + reg);
 	else if(MACH_IS_SUN3X)
-		return sun3x_82072_fd_inb(port);
+		return sun3x_82072_fd_inb(base + reg);
 	return 0;
 }
 
-static __inline__ void fd_outb(unsigned char value, int port)
+static __inline__ void fd_outb(unsigned char value, int base, int reg)
 {
 	if(MACH_IS_Q40)
-		outb_p(value, port);
+		outb_p(value, base + reg);
 	else if(MACH_IS_SUN3X)
-		sun3x_82072_fd_outb(value, port);
+		sun3x_82072_fd_outb(value, base + reg);
 }
 
 
@@ -211,26 +211,27 @@ asmlinkage irqreturn_t floppy_hardint(int irq, void *dev_id)
 		st=1;
 		for(lcount=virtual_dma_count, lptr=virtual_dma_addr;
 		    lcount; lcount--, lptr++) {
-			st=inb(virtual_dma_port+4) & 0xa0 ;
-			if(st != 0xa0)
+			st = inb(virtual_dma_port + FD_STATUS);
+			st &= STATUS_DMA | STATUS_READY;
+			if (st != (STATUS_DMA | STATUS_READY))
 				break;
 			if(virtual_dma_mode)
-				outb_p(*lptr, virtual_dma_port+5);
+				outb_p(*lptr, virtual_dma_port + FD_DATA);
 			else
-				*lptr = inb_p(virtual_dma_port+5);
+				*lptr = inb_p(virtual_dma_port + FD_DATA);
 		}
 
 		virtual_dma_count = lcount;
 		virtual_dma_addr = lptr;
-		st = inb(virtual_dma_port+4);
+		st = inb(virtual_dma_port + FD_STATUS);
 	}
 
 #ifdef TRACE_FLPY_INT
 	calls++;
 #endif
-	if(st == 0x20)
+	if (st == STATUS_DMA)
 		return IRQ_HANDLED;
-	if(!(st & 0x20)) {
+	if (!(st & STATUS_DMA)) {
 		virtual_dma_residue += virtual_dma_count;
 		virtual_dma_count=0;
 #ifdef TRACE_FLPY_INT
diff --git a/arch/m68k/include/asm/m5441xsim.h b/arch/m68k/include/asm/m5441xsim.h
index 4892f314ff38..e091e36d3464 100644
--- a/arch/m68k/include/asm/m5441xsim.h
+++ b/arch/m68k/include/asm/m5441xsim.h
@@ -279,6 +279,13 @@
 #define MCFGPIO_PIN_MAX		87
 
 /*
+ * Phase Locked Loop (PLL)
+ */
+#define MCF_PLL_CR		0xFC0C0000
+#define MCF_PLL_DR		0xFC0C0004
+#define MCF_PLL_SR		0xFC0C0008
+
+/*
  *  DSPI module.
  */
 #define MCFDSPI_BASE0		0xfc05c000
@@ -298,5 +305,13 @@
 #define MCFEDMA_IRQ_INTR16	(MCFINT1_VECBASE + MCFEDMA_EDMA_INTR16)
 #define MCFEDMA_IRQ_INTR56	(MCFINT2_VECBASE + MCFEDMA_EDMA_INTR56)
 #define MCFEDMA_IRQ_ERR	(MCFINT0_VECBASE + MCFINT0_EDMA_ERR)
+/*
+ *  esdhc module.
+ */
+#define MCFSDHC_BASE		0xfc0cc000
+#define MCFSDHC_SIZE		256
+#define MCFINT2_SDHC		31
+#define MCF_IRQ_SDHC		(MCFINT2_VECBASE + MCFINT2_SDHC)
+#define MCFSDHC_CLK		(MCFSDHC_BASE + 0x2c)
 
 #endif /* m5441xsim_h */
diff --git a/arch/m68k/include/asm/mac_via.h b/arch/m68k/include/asm/mac_via.h
index de1470c4d829..1149251ea58d 100644
--- a/arch/m68k/include/asm/mac_via.h
+++ b/arch/m68k/include/asm/mac_via.h
@@ -257,6 +257,7 @@ extern int rbv_present,via_alt_mapping;
 
 struct irq_desc;
 
+extern void via_l2_flush(int writeback);
 extern void via_register_interrupts(void);
 extern void via_irq_enable(int);
 extern void via_irq_disable(int);
diff --git a/arch/m68k/include/asm/mcfclk.h b/arch/m68k/include/asm/mcfclk.h
index 0aca504fae31..722627e06d66 100644
--- a/arch/m68k/include/asm/mcfclk.h
+++ b/arch/m68k/include/asm/mcfclk.h
@@ -30,6 +30,8 @@ extern struct clk_ops clk_ops0;
 extern struct clk_ops clk_ops1;
 #endif /* MCFPM_PPMCR1 */
 
+extern struct clk_ops clk_ops2;
+
 #define DEFINE_CLK(clk_bank, clk_name, clk_slot, clk_rate) \
 static struct clk __clk_##clk_bank##_##clk_slot = { \
 	.name = clk_name, \
diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h
index 7e85de984df1..9ae9f8d05925 100644
--- a/arch/m68k/include/asm/uaccess_mm.h
+++ b/arch/m68k/include/asm/uaccess_mm.h
@@ -142,7 +142,7 @@ asm volatile ("\n"					\
 		__get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT);	\
 		break;							\
 	case 8: {							\
-		const void *__gu_ptr = (ptr);				\
+		const void __user *__gu_ptr = (ptr);			\
 		union {							\
 			u64 l;						\
 			__typeof__(*(ptr)) t;				\
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index f4f49fcb76d0..f71b1bbcc198 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -437,3 +437,4 @@
 435	common	clone3				__sys_clone3
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
diff --git a/arch/m68k/lib/checksum.c b/arch/m68k/lib/checksum.c
index 5fa3d392e181..31797be9a3dc 100644
--- a/arch/m68k/lib/checksum.c
+++ b/arch/m68k/lib/checksum.c
@@ -129,7 +129,7 @@ EXPORT_SYMBOL(csum_partial);
  */
 
 __wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
+csum_and_copy_from_user(const void __user *src, void *dst,
 			    int len, __wsum sum, int *csum_err)
 {
 	/*
@@ -316,7 +316,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst,
 	return(sum);
 }
 
-EXPORT_SYMBOL(csum_partial_copy_from_user);
+EXPORT_SYMBOL(csum_and_copy_from_user);
 
 
 /*
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 611f73bfc87c..d0126ab01360 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -59,7 +59,6 @@ extern void iop_preinit(void);
 extern void iop_init(void);
 extern void via_init(void);
 extern void via_init_clock(irq_handler_t func);
-extern void via_flush_cache(void);
 extern void oss_init(void);
 extern void psc_init(void);
 extern void baboon_init(void);
@@ -130,21 +129,6 @@ int __init mac_parse_bootinfo(const struct bi_record *record)
 	return unknown;
 }
 
-/*
- * Flip into 24bit mode for an instant - flushes the L2 cache card. We
- * have to disable interrupts for this. Our IRQ handlers will crap
- * themselves if they take an IRQ in 24bit mode!
- */
-
-static void mac_cache_card_flush(int writeback)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	via_flush_cache();
-	local_irq_restore(flags);
-}
-
 void __init config_mac(void)
 {
 	if (!MACH_IS_MAC)
@@ -175,9 +159,8 @@ void __init config_mac(void)
 	 * not.
 	 */
 
-	if (macintosh_config->ident == MAC_MODEL_IICI
-	    || macintosh_config->ident == MAC_MODEL_IIFX)
-		mach_l2_flush = mac_cache_card_flush;
+	if (macintosh_config->ident == MAC_MODEL_IICI)
+		mach_l2_flush = via_l2_flush;
 }
 
 
diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c
index 9bfa17015768..d3775afb0f07 100644
--- a/arch/m68k/mac/iop.c
+++ b/arch/m68k/mac/iop.c
@@ -299,7 +299,6 @@ void __init iop_init(void)
 
 /*
  * Register the interrupt handler for the IOPs.
- * TODO: might be wrong for non-OSS machines. Anyone?
  */
 
 void __init iop_register_interrupts(void)
@@ -566,36 +565,42 @@ irqreturn_t iop_ism_irq(int irq, void *dev_id)
 	uint iop_num = (uint) dev_id;
 	volatile struct mac_iop *iop = iop_base[iop_num];
 	int i,state;
+	u8 events = iop->status_ctrl & (IOP_INT0 | IOP_INT1);
 
 	iop_pr_debug("status %02X\n", iop->status_ctrl);
 
-	/* INT0 indicates a state change on an outgoing message channel */
-
-	if (iop->status_ctrl & IOP_INT0) {
-		iop->status_ctrl = IOP_INT0 | IOP_RUN | IOP_AUTOINC;
-		iop_pr_debug("new status %02X, send states", iop->status_ctrl);
-		for (i = 0 ; i < NUM_IOP_CHAN  ; i++) {
-			state = iop_readb(iop, IOP_ADDR_SEND_STATE + i);
-			iop_pr_cont(" %02X", state);
-			if (state == IOP_MSG_COMPLETE) {
-				iop_handle_send(iop_num, i);
+	do {
+		/* INT0 indicates state change on an outgoing message channel */
+		if (events & IOP_INT0) {
+			iop->status_ctrl = IOP_INT0 | IOP_RUN | IOP_AUTOINC;
+			iop_pr_debug("new status %02X, send states",
+				     iop->status_ctrl);
+			for (i = 0; i < NUM_IOP_CHAN; i++) {
+				state = iop_readb(iop, IOP_ADDR_SEND_STATE + i);
+				iop_pr_cont(" %02X", state);
+				if (state == IOP_MSG_COMPLETE)
+					iop_handle_send(iop_num, i);
 			}
+			iop_pr_cont("\n");
 		}
-		iop_pr_cont("\n");
-	}
 
-	if (iop->status_ctrl & IOP_INT1) {	/* INT1 for incoming msgs */
-		iop->status_ctrl = IOP_INT1 | IOP_RUN | IOP_AUTOINC;
-		iop_pr_debug("new status %02X, recv states", iop->status_ctrl);
-		for (i = 0 ; i < NUM_IOP_CHAN ; i++) {
-			state = iop_readb(iop, IOP_ADDR_RECV_STATE + i);
-			iop_pr_cont(" %02X", state);
-			if (state == IOP_MSG_NEW) {
-				iop_handle_recv(iop_num, i);
+		/* INT1 for incoming messages */
+		if (events & IOP_INT1) {
+			iop->status_ctrl = IOP_INT1 | IOP_RUN | IOP_AUTOINC;
+			iop_pr_debug("new status %02X, recv states",
+				     iop->status_ctrl);
+			for (i = 0; i < NUM_IOP_CHAN; i++) {
+				state = iop_readb(iop, IOP_ADDR_RECV_STATE + i);
+				iop_pr_cont(" %02X", state);
+				if (state == IOP_MSG_NEW)
+					iop_handle_recv(iop_num, i);
 			}
+			iop_pr_cont("\n");
 		}
-		iop_pr_cont("\n");
-	}
+
+		events = iop->status_ctrl & (IOP_INT0 | IOP_INT1);
+	} while (events);
+
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 3c2cfcb74982..1f0fad2a98a0 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -294,10 +294,14 @@ void via_debug_dump(void)
  * the system into 24-bit mode for an instant.
  */
 
-void via_flush_cache(void)
+void via_l2_flush(int writeback)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
 	via2[gBufB] &= ~VIA2B_vMode32;
 	via2[gBufB] |= VIA2B_vMode32;
+	local_irq_restore(flags);
 }
 
 /*
diff --git a/arch/m68k/tools/amiga/dmesg.c b/arch/m68k/tools/amiga/dmesg.c
index 7340f5b6cf6d..f8005a7efb0b 100644
--- a/arch/m68k/tools/amiga/dmesg.c
+++ b/arch/m68k/tools/amiga/dmesg.c
@@ -34,7 +34,7 @@ struct savekmsg {
     u_long magic2;	/* SAVEKMSG_MAGIC2 */
     u_long magicptr;	/* address of magic1 */
     u_long size;
-    char data[0];
+    char data[];
 };
 
 
diff --git a/arch/microblaze/kernel/microblaze_ksyms.c b/arch/microblaze/kernel/microblaze_ksyms.c
index 92e12c2c2ec1..51c43ee5e380 100644
--- a/arch/microblaze/kernel/microblaze_ksyms.c
+++ b/arch/microblaze/kernel/microblaze_ksyms.c
@@ -6,7 +6,6 @@
 
 #include <linux/export.h>
 #include <linux/string.h>
-#include <linux/cryptohash.h>
 #include <linux/delay.h>
 #include <linux/in6.h>
 #include <linux/syscalls.h>
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 4c67b11f9c9e..edacc4561f2b 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -443,3 +443,4 @@
 435	common	clone3				sys_clone3
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild
index a8d5e4fcbe53..d5d6ef9bb986 100644
--- a/arch/mips/Kbuild
+++ b/arch/mips/Kbuild
@@ -12,7 +12,7 @@ obj-y := $(platform-y)
 
 # make clean traverses $(obj-) without having included .config, so
 # everything ends up here
-obj- := $(platform-)
+obj- := $(platform-y)
 
 # mips object files
 # The object files are linked as core-y files would be linked
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index a69b272a3ab0..c7368a81fd1e 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -1,42 +1,44 @@
 # SPDX-License-Identifier: GPL-2.0
 # All platforms listed in alphabetic order
 
-platforms += alchemy
-platforms += ar7
-platforms += ath25
-platforms += ath79
-platforms += bcm47xx
-platforms += bcm63xx
-platforms += bmips
-platforms += cavium-octeon
-platforms += cobalt
-platforms += dec
-platforms += emma
-platforms += generic
-platforms += jazz
-platforms += jz4740
-platforms += lantiq
-platforms += lasat
-platforms += loongson2ef
-platforms += loongson32
-platforms += loongson64
-platforms += mti-malta
-platforms += netlogic
-platforms += paravirt
-platforms += pic32
-platforms += pistachio
-platforms += pmcs-msp71xx
-platforms += pnx833x
-platforms += ralink
-platforms += rb532
-platforms += sgi-ip22
-platforms += sgi-ip27
-platforms += sgi-ip30
-platforms += sgi-ip32
-platforms += sibyte
-platforms += sni
-platforms += txx9
-platforms += vr41xx
+platform-$(CONFIG_MIPS_ALCHEMY)		+= alchemy/
+platform-$(CONFIG_AR7)			+= ar7/
+platform-$(CONFIG_ATH25)		+= ath25/
+platform-$(CONFIG_ATH79)		+= ath79/
+platform-$(CONFIG_BCM47XX)		+= bcm47xx/
+platform-$(CONFIG_BCM63XX)		+= bcm63xx/
+platform-$(CONFIG_BMIPS_GENERIC)	+= bmips/
+platform-$(CONFIG_CAVIUM_OCTEON_SOC)	+= cavium-octeon/
+platform-$(CONFIG_MIPS_COBALT)		+= cobalt/
+platform-$(CONFIG_MACH_DECSTATION)	+= dec/
+platform-$(CONFIG_MIPS_GENERIC)		+= generic/
+platform-$(CONFIG_MACH_JAZZ)		+= jazz/
+platform-$(CONFIG_MACH_INGENIC)		+= jz4740/
+platform-$(CONFIG_LANTIQ)		+= lantiq/
+platform-$(CONFIG_MACH_LOONGSON2EF)	+= loongson2ef/
+platform-$(CONFIG_MACH_LOONGSON32)	+= loongson32/
+platform-$(CONFIG_MACH_LOONGSON64)	+= loongson64/
+platform-$(CONFIG_MIPS_MALTA)		+= mti-malta/
+platform-$(CONFIG_NLM_COMMON)		+= netlogic/
+platform-$(CONFIG_MIPS_PARAVIRT)	+= paravirt/
+platform-$(CONFIG_PIC32MZDA)		+= pic32/
+platform-$(CONFIG_MACH_PISTACHIO)	+= pistachio/
+platform-$(CONFIG_SOC_PNX833X)		+= pnx833x/
+platform-$(CONFIG_RALINK)		+= ralink/
+platform-$(CONFIG_MIKROTIK_RB532)	+= rb532/
+platform-$(CONFIG_SGI_IP22)		+= sgi-ip22/
+platform-$(CONFIG_SGI_IP27)		+= sgi-ip27/
+platform-$(CONFIG_SGI_IP28)		+= sgi-ip22/
+platform-$(CONFIG_SGI_IP30)		+= sgi-ip30/
+platform-$(CONFIG_SGI_IP32)		+= sgi-ip32/
+platform-$(CONFIG_SIBYTE_BCM112X)	+= sibyte/
+platform-$(CONFIG_SIBYTE_SB1250)	+= sibyte/
+platform-$(CONFIG_SIBYTE_BCM1x55)	+= sibyte/
+platform-$(CONFIG_SIBYTE_BCM1x80)	+= sibyte/
+platform-$(CONFIG_SNI_RM)		+= sni/
+platform-$(CONFIG_MACH_TX39XX)		+= txx9/
+platform-$(CONFIG_MACH_TX49XX)		+= txx9/
+platform-$(CONFIG_MACH_VR41XX)		+= vr41xx/
 
 # include the platform specific files
-include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platforms))
+include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platform-y))
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 690718b3701a..b6338e806a4b 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -92,6 +92,9 @@ config MIPS
 	select SYSCTL_EXCEPTION_TRACE
 	select VIRT_TO_BUS
 
+config MIPS_FIXUP_BIGPHYS_ADDR
+	bool
+
 menu "Machine selection"
 
 choice
@@ -157,6 +160,7 @@ config MIPS_ALCHEMY
 	select CSRC_R4K
 	select IRQ_MIPS_CPU
 	select DMA_MAYBE_COHERENT	# Au1000,1500,1100 aren't, rest is
+	select MIPS_FIXUP_BIGPHYS_ADDR if PCI
 	select SYS_HAS_CPU_MIPS32_R1
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_APM_EMULATION
@@ -427,23 +431,6 @@ config LANTIQ
 	select ARCH_HAS_RESET_CONTROLLER
 	select RESET_CONTROLLER
 
-config LASAT
-	bool "LASAT Networks platforms"
-	select CEVT_R4K
-	select CRC32
-	select CSRC_R4K
-	select DMA_NONCOHERENT
-	select SYS_HAS_EARLY_PRINTK
-	select HAVE_PCI
-	select IRQ_MIPS_CPU
-	select PCI_GT64XXX_PCI0
-	select MIPS_NILE4
-	select R5000_CPU_SCACHE
-	select SYS_HAS_CPU_R5000
-	select SYS_SUPPORTS_32BIT_KERNEL
-	select SYS_SUPPORTS_64BIT_KERNEL if BROKEN
-	select SYS_SUPPORTS_LITTLE_ENDIAN
-
 config MACH_LOONGSON32
 	bool "Loongson 32-bit family of machines"
 	select SYS_SUPPORTS_ZBOOT
@@ -475,8 +462,10 @@ config MACH_LOONGSON64
 	select ISA
 	select I8259
 	select IRQ_MIPS_CPU
-	select NR_CPUS_DEFAULT_4
+	select NO_EXCEPT_FILL
+	select NR_CPUS_DEFAULT_64
 	select USE_GENERIC_EARLY_PRINTK_8250
+	select PCI_DRIVERS_GENERIC
 	select SYS_HAS_CPU_LOONGSON64
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_SMP
@@ -593,13 +582,6 @@ config MACH_PIC32
 	  Microchip PIC32 is a family of general-purpose 32 bit MIPS core
 	  microcontrollers.
 
-config NEC_MARKEINS
-	bool "NEC EMMA2RH Mark-eins board"
-	select SOC_EMMA2RH
-	select HAVE_PCI
-	help
-	  This enables support for the NEC Electronics Mark-eins boards.
-
 config MACH_VR41XX
 	bool "NEC VR4100 series based machines"
 	select CEVT_R4K
@@ -621,30 +603,6 @@ config NXP_STB225
 	help
 	  Support for NXP Semiconductors STB225 Development Board.
 
-config PMC_MSP
-	bool "PMC-Sierra MSP chipsets"
-	select CEVT_R4K
-	select CSRC_R4K
-	select DMA_NONCOHERENT
-	select SWAP_IO_SPACE
-	select NO_EXCEPT_FILL
-	select BOOT_RAW
-	select SYS_HAS_CPU_MIPS32_R1
-	select SYS_HAS_CPU_MIPS32_R2
-	select SYS_SUPPORTS_32BIT_KERNEL
-	select SYS_SUPPORTS_BIG_ENDIAN
-	select SYS_SUPPORTS_MIPS16
-	select IRQ_MIPS_CPU
-	select SERIAL_8250
-	select SERIAL_8250_CONSOLE
-	select USB_EHCI_BIG_ENDIAN_MMIO
-	select USB_EHCI_BIG_ENDIAN_DESC
-	help
-	  This adds support for the PMC-Sierra family of Multi-Service
-	  Processor System-On-A-Chips.  These parts include a number
-	  of integrated peripherals, interfaces and DSPs in addition to
-	  a variety of MIPS cores.
-
 config RALINK
 	bool "Ralink based machines"
 	select CEVT_R4K
@@ -1087,10 +1045,8 @@ source "arch/mips/generic/Kconfig"
 source "arch/mips/jazz/Kconfig"
 source "arch/mips/jz4740/Kconfig"
 source "arch/mips/lantiq/Kconfig"
-source "arch/mips/lasat/Kconfig"
 source "arch/mips/pic32/Kconfig"
 source "arch/mips/pistachio/Kconfig"
-source "arch/mips/pmcs-msp71xx/Kconfig"
 source "arch/mips/ralink/Kconfig"
 source "arch/mips/sgi-ip27/Kconfig"
 source "arch/mips/sibyte/Kconfig"
@@ -1154,6 +1110,7 @@ config CSRC_IOASIC
 	bool
 
 config CSRC_R4K
+	select CLOCKSOURCE_WATCHDOG if CPU_FREQ
 	bool
 
 config CSRC_SB1250
@@ -1211,9 +1168,6 @@ config MIPS_BONITO64
 config MIPS_MSC
 	bool
 
-config MIPS_NILE4
-	bool
-
 config SYNC_R4K
 	bool
 
@@ -1334,18 +1288,6 @@ config PCI_XTALK_BRIDGE
 config NO_EXCEPT_FILL
 	bool
 
-config SOC_EMMA2RH
-	bool
-	select CEVT_R4K
-	select CSRC_R4K
-	select DMA_NONCOHERENT
-	select IRQ_MIPS_CPU
-	select SWAP_IO_SPACE
-	select SYS_HAS_CPU_R5500
-	select SYS_SUPPORTS_32BIT_KERNEL
-	select SYS_SUPPORTS_64BIT_KERNEL
-	select SYS_SUPPORTS_BIG_ENDIAN
-
 config SOC_PNX833X
 	bool
 	select CEVT_R4K
@@ -1419,9 +1361,6 @@ config MIPS_L1_CACHE_SHIFT
 	default "4" if MIPS_L1_CACHE_SHIFT_4
 	default "5"
 
-config HAVE_STD_PC_SERIAL_PORT
-	bool
-
 config ARC_CMDLINE_ONLY
 	bool
 
@@ -1504,6 +1443,18 @@ config CPU_LOONGSON3_WORKAROUNDS
 
 	  If unsure, please say Y.
 
+config CPU_LOONGSON3_CPUCFG_EMULATION
+	bool "Emulate the CPUCFG instruction on older Loongson cores"
+	default y
+	depends on CPU_LOONGSON64
+	help
+	  Loongson-3A R4 and newer have the CPUCFG instruction available for
+	  userland to query CPU capabilities, much like CPUID on x86. This
+	  option provides emulation of the instruction on older Loongson
+	  cores, back to Loongson-3A1000.
+
+	  If unsure, please say Y.
+
 config CPU_LOONGSON2E
 	bool "Loongson 2E"
 	depends on SYS_HAS_CPU_LOONGSON2E
@@ -1580,6 +1531,21 @@ config CPU_MIPS32_R2
 	  specific type of processor in your system, choose those that one
 	  otherwise CPU_MIPS32_R1 is a safe bet for any MIPS32 system.
 
+config CPU_MIPS32_R5
+	bool "MIPS32 Release 5"
+	depends on SYS_HAS_CPU_MIPS32_R5
+	select CPU_HAS_PREFETCH
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select CPU_SUPPORTS_HIGHMEM
+	select CPU_SUPPORTS_MSA
+	select HAVE_KVM
+	select MIPS_O32_FP64_SUPPORT
+	help
+	  Choose this option to build a kernel for release 5 or later of the
+	  MIPS32 architecture.  New MIPS processors, starting with the Warrior
+	  family, are based on a MIPS32r5 processor. If you own an older
+	  processor, you probably need to select MIPS32r1 or MIPS32r2 instead.
+
 config CPU_MIPS32_R6
 	bool "MIPS32 Release 6"
 	depends on SYS_HAS_CPU_MIPS32_R6
@@ -1632,6 +1598,23 @@ config CPU_MIPS64_R2
 	  specific type of processor in your system, choose those that one
 	  otherwise CPU_MIPS64_R1 is a safe bet for any MIPS64 system.
 
+config CPU_MIPS64_R5
+	bool "MIPS64 Release 5"
+	depends on SYS_HAS_CPU_MIPS64_R5
+	select CPU_HAS_PREFETCH
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select CPU_SUPPORTS_64BIT_KERNEL
+	select CPU_SUPPORTS_HIGHMEM
+	select CPU_SUPPORTS_HUGEPAGES
+	select CPU_SUPPORTS_MSA
+	select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32
+	select HAVE_KVM
+	help
+	  Choose this option to build a kernel for release 5 or later of the
+	  MIPS64 architecture.  This is a intermediate MIPS architecture
+	  release partly implementing release 6 features. Though there is no
+	  any hardware known to be based on this release.
+
 config CPU_MIPS64_R6
 	bool "MIPS64 Release 6"
 	depends on SYS_HAS_CPU_MIPS64_R6
@@ -1650,6 +1633,28 @@ config CPU_MIPS64_R6
 	  family, are based on a MIPS64r6 processor. If you own an older
 	  processor, you probably need to select MIPS64r1 or MIPS64r2 instead.
 
+config CPU_P5600
+	bool "MIPS Warrior P5600"
+	depends on SYS_HAS_CPU_P5600
+	select CPU_HAS_PREFETCH
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select CPU_SUPPORTS_HIGHMEM
+	select CPU_SUPPORTS_MSA
+	select CPU_SUPPORTS_UNCACHED_ACCELERATED
+	select CPU_SUPPORTS_CPUFREQ
+	select CPU_MIPSR2_IRQ_VI
+	select CPU_MIPSR2_IRQ_EI
+	select HAVE_KVM
+	select MIPS_O32_FP64_SUPPORT
+	help
+	  Choose this option to build a kernel for MIPS Warrior P5600 CPU.
+	  It's based on MIPS32r5 ISA with XPA, EVA, dual/quad issue exec pipes,
+	  MMU with two-levels TLB, UCA, MSA, MDU core level features and system
+	  level features like up to six P5600 calculation cores, CM2 with L2
+	  cache, IOCU/IOMMU (though might be unused depending on the system-
+	  specific IP core configuration), GIC, CPC, virtualisation module,
+	  eJTAG and PDtrace.
+
 config CPU_R3000
 	bool "R3000"
 	depends on SYS_HAS_CPU_R3000
@@ -1826,7 +1831,8 @@ endchoice
 config CPU_MIPS32_3_5_FEATURES
 	bool "MIPS32 Release 3.5 Features"
 	depends on SYS_HAS_CPU_MIPS32_R3_5
-	depends on CPU_MIPS32_R2 || CPU_MIPS32_R6
+	depends on CPU_MIPS32_R2 || CPU_MIPS32_R5 || CPU_MIPS32_R6 || \
+		   CPU_P5600
 	help
 	  Choose this option to build a kernel for release 2 or later of the
 	  MIPS32 architecture including features from the 3.5 release such as
@@ -1846,7 +1852,7 @@ config CPU_MIPS32_3_5_EVA
 config CPU_MIPS32_R5_FEATURES
 	bool "MIPS32 Release 5 Features"
 	depends on SYS_HAS_CPU_MIPS32_R5
-	depends on CPU_MIPS32_R2
+	depends on CPU_MIPS32_R2 || CPU_MIPS32_R5 || CPU_P5600
 	help
 	  Choose this option to build a kernel for release 2 or later of the
 	  MIPS32 architecture including features from release 5 such as
@@ -2001,6 +2007,10 @@ config SYS_HAS_CPU_MIPS64_R6
 	bool
 	select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT
 
+config SYS_HAS_CPU_P5600
+	bool
+	select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT
+
 config SYS_HAS_CPU_R3000
 	bool
 
@@ -2084,11 +2094,13 @@ endmenu
 #
 config CPU_MIPS32
 	bool
-	default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R6
+	default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R5 || \
+		     CPU_MIPS32_R6 || CPU_P5600
 
 config CPU_MIPS64
 	bool
-	default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R6
+	default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R5 || \
+		     CPU_MIPS64_R6
 
 #
 # These indicate the revision of the architecture
@@ -2104,6 +2116,13 @@ config CPU_MIPSR2
 	select CPU_HAS_DIEI if !CPU_DIEI_BROKEN
 	select MIPS_SPRAM
 
+config CPU_MIPSR5
+	bool
+	default y if CPU_MIPS32_R5 || CPU_MIPS64_R5 || CPU_P5600
+	select CPU_HAS_RIXI
+	select CPU_HAS_DIEI if !CPU_DIEI_BROKEN
+	select MIPS_SPRAM
+
 config CPU_MIPSR6
 	bool
 	default y if CPU_MIPS32_R6 || CPU_MIPS64_R6
@@ -2118,6 +2137,7 @@ config TARGET_ISA_REV
 	int
 	default 1 if CPU_MIPSR1
 	default 2 if CPU_MIPSR2
+	default 5 if CPU_MIPSR5
 	default 6 if CPU_MIPSR6
 	default 0
 	help
@@ -2707,7 +2727,11 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
 
 config RELOCATABLE
 	bool "Relocatable kernel"
-	depends on SYS_SUPPORTS_RELOCATABLE && (CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_MIPS32_R6 || CPU_MIPS64_R6 || CAVIUM_OCTEON_SOC)
+	depends on SYS_SUPPORTS_RELOCATABLE
+	depends on CPU_MIPS32_R2 || CPU_MIPS64_R2 || \
+		   CPU_MIPS32_R5 || CPU_MIPS64_R5 || \
+		   CPU_MIPS32_R6 || CPU_MIPS64_R6 || \
+		   CPU_P5600 || CAVIUM_OCTEON_SOC
 	help
 	  This builds a kernel image that retains relocation information
 	  so it can be loaded someplace besides the default 1MB.
@@ -3275,3 +3299,5 @@ endmenu
 source "drivers/firmware/Kconfig"
 
 source "arch/mips/kvm/Kconfig"
+
+source "arch/mips/vdso/Kconfig"
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 93a2974d2ab7..7a8d94cdd493 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -148,4 +148,14 @@ config MIPS_CPS_NS16550_SHIFT
 	  form their addresses. That is, log base 2 of the span between
 	  adjacent ns16550 registers in the system.
 
+config MIPS_CPS_NS16550_WIDTH
+	int "UART Register Width"
+	default 1
+	help
+	  ns16550 registers width. UART registers IO access methods will be
+	  selected in accordance with this parameter. By setting it to 1, 2 or
+	  4 UART registers will be accessed by means of lb/sb, lh/sh or lw/sw
+	  instructions respectively. Any value not from that set activates
+	  lb/sb instructions.
+
 endif # MIPS_CPS_NS16550_BOOL
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index e1c44aed8156..0d0f29d662c9 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -116,33 +116,8 @@ endif
 
 cflags-y += -ffreestanding
 
-#
-# We explicitly add the endianness specifier if needed, this allows
-# to compile kernels with a toolchain for the other endianness. We
-# carefully avoid to add it redundantly because gcc 3.3/3.4 complains
-# when fed the toolchain default!
-#
-# Certain gcc versions up to gcc 4.1.1 (probably 4.2-subversion as of
-# 2006-10-10 don't properly change the predefined symbols if -EB / -EL
-# are used, so we kludge that here.  A bug has been filed at
-# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29413.
-#
-# clang doesn't suffer from these issues and our checks against -dumpmachine
-# don't work so well when cross compiling, since without providing --target
-# clang's output will be based upon the build machine. So for clang we simply
-# unconditionally specify -EB or -EL as appropriate.
-#
-ifdef CONFIG_CC_IS_CLANG
 cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= -EB
 cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -EL
-else
-undef-all += -UMIPSEB -U_MIPSEB -U__MIPSEB -U__MIPSEB__
-undef-all += -UMIPSEL -U_MIPSEL -U__MIPSEL -U__MIPSEL__
-predef-be += -DMIPSEB -D_MIPSEB -D__MIPSEB -D__MIPSEB__
-predef-le += -DMIPSEL -D_MIPSEL -D__MIPSEL -D__MIPSEL__
-cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' && echo -EB $(undef-all) $(predef-be))
-cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' || echo -EL $(undef-all) $(predef-le))
-endif
 
 cflags-$(CONFIG_SB1XXX_CORELIS)	+= $(call cc-option,-mno-sched-prolog) \
 				   -fno-omit-frame-pointer
@@ -171,10 +146,13 @@ cflags-$(CONFIG_CPU_R4X00)	+= -march=r4600 -Wa,--trap
 cflags-$(CONFIG_CPU_TX49XX)	+= -march=r4600 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS32_R1)	+= -march=mips32 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS32_R2)	+= -march=mips32r2 -Wa,--trap
+cflags-$(CONFIG_CPU_MIPS32_R5)	+= -march=mips32r5 -Wa,--trap -modd-spreg
 cflags-$(CONFIG_CPU_MIPS32_R6)	+= -march=mips32r6 -Wa,--trap -modd-spreg
 cflags-$(CONFIG_CPU_MIPS64_R1)	+= -march=mips64 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS64_R2)	+= -march=mips64r2 -Wa,--trap
+cflags-$(CONFIG_CPU_MIPS64_R5)	+= -march=mips64r5 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS64_R6)	+= -march=mips64r6 -Wa,--trap
+cflags-$(CONFIG_CPU_P5600)	+= -march=p5600 -Wa,--trap -modd-spreg
 cflags-$(CONFIG_CPU_R5000)	+= -march=r5000 -Wa,--trap
 cflags-$(CONFIG_CPU_R5500)	+= $(call cc-option,-march=r5500,-march=r5000) \
 			-Wa,--trap
@@ -288,12 +266,23 @@ ifdef CONFIG_64BIT
   endif
 endif
 
+# When linking a 32-bit executable the LLVM linker cannot cope with a
+# 32-bit load address that has been sign-extended to 64 bits.  Simply
+# remove the upper 32 bits then, as it is safe to do so with other
+# linkers.
+ifdef CONFIG_64BIT
+	load-ld			= $(load-y)
+else
+	load-ld			= $(subst 0xffffffff,0x,$(load-y))
+endif
+
 KBUILD_AFLAGS	+= $(cflags-y)
 KBUILD_CFLAGS	+= $(cflags-y)
-KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y)
+KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) -DLINKER_LOAD_ADDRESS=$(load-ld)
 KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)
 
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y) \
+		  LINKER_LOAD_ADDRESS=$(load-ld) \
 		  VMLINUX_ENTRY_ADDRESS=$(entry-y) \
 		  PLATFORM="$(platform-y)" \
 		  ITS_INPUTS="$(its-y)"
@@ -359,12 +348,6 @@ ifeq ($(shell expr $(zload-y) \< 0xffffffff80000000 2> /dev/null), 0)
 bootz-y			+= uzImage.bin
 endif
 
-ifdef CONFIG_LASAT
-rom.bin rom.sw: vmlinux
-	$(Q)$(MAKE) $(build)=arch/mips/lasat/image \
-		$(bootvars-y) $@
-endif
-
 #
 # Some machines like the Indy need 32-bit ELF binaries for booting purposes.
 # Other need ECOFF, so we build a 32-bit ELF binary for them which we then
@@ -430,7 +413,6 @@ archclean:
 	$(Q)$(MAKE) $(clean)=arch/mips/boot
 	$(Q)$(MAKE) $(clean)=arch/mips/boot/compressed
 	$(Q)$(MAKE) $(clean)=arch/mips/boot/tools
-	$(Q)$(MAKE) $(clean)=arch/mips/lasat
 
 archheaders:
 	$(Q)$(MAKE) $(build)=arch/mips/kernel/syscalls all
diff --git a/arch/mips/alchemy/Platform b/arch/mips/alchemy/Platform
index 33c9da3b077b..c8cff50b0eda 100644
--- a/arch/mips/alchemy/Platform
+++ b/arch/mips/alchemy/Platform
@@ -15,19 +15,16 @@ load-$(CONFIG_MIPS_DB1XXX)	+= 0xffffffff80100000
 #
 # 4G-Systems MTX-1 "MeshCube" wireless router
 #
-platform-$(CONFIG_MIPS_MTX1)	+= alchemy/
 load-$(CONFIG_MIPS_MTX1)	+= 0xffffffff80100000
 
 #
 # MyCable eval board
 #
-platform-$(CONFIG_MIPS_XXS1500) += alchemy/
 load-$(CONFIG_MIPS_XXS1500)	+= 0xffffffff80100000
 
 #
 # Trapeze ITS GRP board
 #
-platform-$(CONFIG_MIPS_GPR)	+= alchemy/
 load-$(CONFIG_MIPS_GPR)		+= 0xffffffff80100000
 
 # boards can specify their own <gpio.h> in one of their include dirs.
diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index 7faaa6d593a7..0f60efe0481e 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c
@@ -27,6 +27,7 @@
 
 #include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/mm.h>
 
 #include <asm/dma-coherence.h>
 #include <asm/mipsregs.h>
@@ -72,9 +73,9 @@ void __init plat_mem_setup(void)
 	iomem_resource.end = IOMEM_RESOURCE_END;
 }
 
-#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_PCI)
+#ifdef CONFIG_MIPS_FIXUP_BIGPHYS_ADDR
 /* This routine should be valid for all Au1x based boards */
-phys_addr_t __fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
+phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
 {
 	unsigned long start = ALCHEMY_PCI_MEMWIN_START;
 	unsigned long end = ALCHEMY_PCI_MEMWIN_END;
@@ -90,5 +91,13 @@ phys_addr_t __fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
 	/* default nop */
 	return phys_addr;
 }
-EXPORT_SYMBOL(__fixup_bigphys_addr);
-#endif
+
+int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long vaddr,
+		unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+	phys_addr_t phys_addr = fixup_bigphys_addr(pfn << PAGE_SHIFT, size);
+
+	return remap_pfn_range(vma, vaddr, phys_addr >> PAGE_SHIFT, size, prot);
+}
+EXPORT_SYMBOL(io_remap_pfn_range);
+#endif /* CONFIG_MIPS_FIXUP_BIGPHYS_ADDR */
diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c
index 3e0c75c0ece0..752b93d91ac9 100644
--- a/arch/mips/alchemy/devboards/db1550.c
+++ b/arch/mips/alchemy/devboards/db1550.c
@@ -225,7 +225,7 @@ static void __init pb1550_nand_setup(void)
 	case 0: case 2: case 8: case 0xC: case 0xD:
 		/* x16 NAND Flash */
 		pb1550_nand_pd.devwidth = 1;
-		/* fallthrough */
+		fallthrough;
 	case 1: case 3: case 9: case 0xE: case 0xF:
 		/* x8 NAND, already set up */
 		platform_device_register(&pb1550_nand_dev);
diff --git a/arch/mips/ar7/Platform b/arch/mips/ar7/Platform
index 21f9102d533c..a9257cc01c3c 100644
--- a/arch/mips/ar7/Platform
+++ b/arch/mips/ar7/Platform
@@ -1,6 +1,5 @@
 #
 # Texas Instruments AR7
 #
-platform-$(CONFIG_AR7)		+= ar7/
 cflags-$(CONFIG_AR7)		+= -I$(srctree)/arch/mips/include/asm/mach-ar7
 load-$(CONFIG_AR7)		+= 0xffffffff94100000
diff --git a/arch/mips/ar7/setup.c b/arch/mips/ar7/setup.c
index b3ffe7c898eb..352d5dbc777c 100644
--- a/arch/mips/ar7/setup.c
+++ b/arch/mips/ar7/setup.c
@@ -57,7 +57,7 @@ const char *get_system_type(void)
 		case TITAN_CHIP_1060:
 			return "TI AR7 (TNETV1060)";
 		}
-		/* fall through */
+		fallthrough;
 	default:
 		return "TI AR7 (unknown)";
 	}
diff --git a/arch/mips/ath25/Platform b/arch/mips/ath25/Platform
index ef3f81fa080b..aef098b6f405 100644
--- a/arch/mips/ath25/Platform
+++ b/arch/mips/ath25/Platform
@@ -1,6 +1,5 @@
 #
 # Atheros AR531X/AR231X WiSoC
 #
-platform-$(CONFIG_ATH25)	+= ath25/
 cflags-$(CONFIG_ATH25)		+= -I$(srctree)/arch/mips/include/asm/mach-ath25
 load-$(CONFIG_ATH25)		+= 0xffffffff80041000
diff --git a/arch/mips/ath79/Platform b/arch/mips/ath79/Platform
index 2bd663647d27..57744472ed2e 100644
--- a/arch/mips/ath79/Platform
+++ b/arch/mips/ath79/Platform
@@ -2,6 +2,5 @@
 # Atheros AR71xx/AR724x/AR913x
 #
 
-platform-$(CONFIG_ATH79)	+= ath79/
 cflags-$(CONFIG_ATH79)		+= -I$(srctree)/arch/mips/include/asm/mach-ath79
 load-$(CONFIG_ATH79)		= 0xffffffff80060000
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index acb4fd647a30..4b7c066ac88e 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -153,8 +153,7 @@ static void __init ath79_detect_sys_type(void)
 	case REV_ID_MAJOR_QCA9533_V2:
 		ver = 2;
 		ath79_soc_rev = 2;
-		/* fall through */
-
+		fallthrough;
 	case REV_ID_MAJOR_QCA9533:
 		ath79_soc = ATH79_SOC_QCA9533;
 		chip = "9533";
diff --git a/arch/mips/bcm47xx/Platform b/arch/mips/bcm47xx/Platform
index 70783b75fd9d..833b204fe5da 100644
--- a/arch/mips/bcm47xx/Platform
+++ b/arch/mips/bcm47xx/Platform
@@ -1,7 +1,6 @@
 #
 # Broadcom BCM47XX boards
 #
-platform-$(CONFIG_BCM47XX)	+= bcm47xx/
 cflags-$(CONFIG_BCM47XX)	+=					\
 		-I$(srctree)/arch/mips/include/asm/mach-bcm47xx
 load-$(CONFIG_BCM47XX)		:= 0xffffffff80001000
diff --git a/arch/mips/bcm63xx/Platform b/arch/mips/bcm63xx/Platform
index 5f86b2fff6de..882dc40f49a2 100644
--- a/arch/mips/bcm63xx/Platform
+++ b/arch/mips/bcm63xx/Platform
@@ -1,7 +1,6 @@
 #
 # Broadcom BCM63XX boards
 #
-platform-$(CONFIG_BCM63XX)	+= bcm63xx/
 cflags-$(CONFIG_BCM63XX)	+=					\
 		-I$(srctree)/arch/mips/include/asm/mach-bcm63xx/
 load-$(CONFIG_BCM63XX)		:= 0xffffffff80010000
diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c
index f61c16f57a97..8e3e199dd35d 100644
--- a/arch/mips/bcm63xx/cpu.c
+++ b/arch/mips/bcm63xx/cpu.c
@@ -304,7 +304,7 @@ void __init bcm63xx_cpu_init(void)
 	case CPU_BMIPS3300:
 		if ((read_c0_prid() & PRID_IMP_MASK) != PRID_IMP_BMIPS3300_ALT)
 			__cpu_name[cpu] = "Broadcom BCM6338";
-		/* fall-through */
+		fallthrough;
 	case CPU_BMIPS32:
 		chipid_reg = BCM_6345_PERF_BASE;
 		break;
diff --git a/arch/mips/bcm63xx/dev-flash.c b/arch/mips/bcm63xx/dev-flash.c
index a1093934c616..f9cc015d3dc9 100644
--- a/arch/mips/bcm63xx/dev-flash.c
+++ b/arch/mips/bcm63xx/dev-flash.c
@@ -94,7 +94,7 @@ static int __init bcm63xx_detect_flash_type(void)
 		case STRAPBUS_6368_BOOT_SEL_PARALLEL:
 			return BCM63XX_FLASH_TYPE_PARALLEL;
 		}
-		/* fall through */
+		fallthrough;
 	default:
 		return -EINVAL;
 	}
diff --git a/arch/mips/bmips/Platform b/arch/mips/bmips/Platform
index 5f127fd7f4b5..1434ea31ce85 100644
--- a/arch/mips/bmips/Platform
+++ b/arch/mips/bmips/Platform
@@ -1,7 +1,6 @@
 #
 # Broadcom Generic BMIPS kernel
 #
-platform-$(CONFIG_BMIPS_GENERIC)	+= bmips/
 cflags-$(CONFIG_BMIPS_GENERIC)		+=				\
 		-I$(srctree)/arch/mips/include/asm/mach-bmips/
 load-$(CONFIG_BMIPS_GENERIC)		:= 0xffffffff80010000
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 0df0ee8a298d..6e56caef69f0 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -90,7 +90,7 @@ ifneq ($(zload-y),)
 VMLINUZ_LOAD_ADDRESS := $(zload-y)
 else
 VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \
-		$(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS))
+		$(obj)/vmlinux.bin $(LINKER_LOAD_ADDRESS))
 endif
 UIMAGE_LOADADDR = $(VMLINUZ_LOAD_ADDRESS)
 
diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index d429a69bfe30..19027129add8 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -1,17 +1,19 @@
 # SPDX-License-Identifier: GPL-2.0
-subdir-y	+= brcm
-subdir-y	+= cavium-octeon
-subdir-y	+= img
-subdir-y	+= ingenic
-subdir-y	+= lantiq
-subdir-y	+= loongson
-subdir-y	+= mscc
-subdir-y	+= mti
-subdir-y	+= netlogic
-subdir-y	+= ni
-subdir-y	+= pic32
-subdir-y	+= qca
-subdir-y	+= ralink
-subdir-y	+= xilfpga
+subdir-$(CONFIG_BMIPS_GENERIC)		+= brcm
+subdir-$(CONFIG_CAVIUM_OCTEON_SOC)	+= cavium-octeon
+subdir-$(CONFIG_MACH_PISTACHIO)		+= img
+subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON)	+= img
+subdir-$(CONFIG_MACH_INGENIC)		+= ingenic
+subdir-$(CONFIG_LANTIQ)			+= lantiq
+subdir-$(CONFIG_MACH_LOONGSON64)	+= loongson
+subdir-$(CONFIG_MSCC_OCELOT)		+= mscc
+subdir-$(CONFIG_MIPS_MALTA)		+= mti
+subdir-$(CONFIG_LEGACY_BOARD_SEAD3)	+= mti
+subdir-$(CONFIG_NLM_XLP_BOARD)		+= netlogic
+subdir-$(CONFIG_FIT_IMAGE_FDT_NI169445)	+= ni
+subdir-$(CONFIG_MACH_PIC32)		+= pic32
+subdir-$(CONFIG_ATH79)			+= qca
+subdir-$(CONFIG_RALINK)			+= ralink
+subdir-$(CONFIG_FIT_IMAGE_FDT_XILFPGA)	+= xilfpga
 
 obj-$(CONFIG_BUILTIN_DTB)	:= $(addsuffix /, $(subdir-y))
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index db0ca250bd1a..75f5bfbf2c37 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -386,6 +386,9 @@
 
 		interrupt-parent = <&gpe>;
 		interrupts = <19 4>;
+
+		nvmem-cells = <&eth0_addr>;
+		nvmem-cell-names = "mac-address";
 	};
 };
 
diff --git a/arch/mips/boot/dts/ingenic/gcw0.dts b/arch/mips/boot/dts/ingenic/gcw0.dts
index f58d239c2058..8d22828787d8 100644
--- a/arch/mips/boot/dts/ingenic/gcw0.dts
+++ b/arch/mips/boot/dts/ingenic/gcw0.dts
@@ -4,6 +4,10 @@
 #include "jz4770.dtsi"
 #include <dt-bindings/clock/ingenic,tcu.h>
 
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/iio/adc/ingenic,adc.h>
+#include <dt-bindings/input/input.h>
+
 / {
 	compatible = "gcw,zero", "ingenic,jz4770";
 	model = "GCW Zero";
@@ -15,20 +19,370 @@
 		serial3 = &uart3;
 	};
 
+	memory: memory {
+		device_type = "memory";
+		reg = <0x0 0x10000000>,
+		      <0x30000000 0x10000000>;
+	};
+
 	chosen {
 		stdout-path = "serial2:57600n8";
 	};
 
-	board {
-		compatible = "simple-bus";
+	vcc: regulator@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc";
+
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+	};
+
+	mmc1_power: regulator@1 {
+		compatible = "regulator-fixed";
+		regulator-name = "mmc1_vcc";
+		gpio = <&gpe 9 0>;
+
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		vin-supply = <&vcc>;
+	};
+
+	headphones_amp: analog-amplifier@0 {
+		compatible = "simple-audio-amplifier";
+		enable-gpios = <&gpf 3 GPIO_ACTIVE_LOW>;
+		enable-delay-ms = <50>;
+
+		VCC-supply = <&ldo5>;
+		sound-name-prefix = "Headphones Amp";
+	};
+
+	speaker_amp: analog-amplifier@1 {
+		compatible = "simple-audio-amplifier";
+		enable-gpios = <&gpf 20 GPIO_ACTIVE_HIGH>;
+
+		VCC-supply = <&ldo5>;
+		sound-name-prefix = "Speaker Amp";
+	};
+
+	sound {
+		compatible = "simple-audio-card";
+
+		simple-audio-card,name = "gcw0-audio";
+		simple-audio-card,format = "i2s";
+
+		simple-audio-card,widgets =
+			"Speaker", "Speaker",
+			"Headphone", "Headphones",
+			"Line", "FM Radio",
+			"Microphone", "Built-in Mic";
+		simple-audio-card,routing =
+			"Headphones Amp INL", "LHPOUT",
+			"Headphones Amp INR", "RHPOUT",
+			"Headphones", "Headphones Amp OUTL",
+			"Headphones", "Headphones Amp OUTR",
+			"Speaker Amp INL", "LOUT",
+			"Speaker Amp INR", "ROUT",
+			"Speaker", "Speaker Amp OUTL",
+			"Speaker", "Speaker Amp OUTR",
+			"LLINEIN", "FM Radio",
+			"RLINEIN", "FM Radio",
+			"Built-in Mic", "MICBIAS",
+			"MIC1P", "Built-in Mic",
+			"MIC1N", "Built-in Mic";
+		simple-audio-card,pin-switches = "Speaker", "Headphones";
+
+		simple-audio-card,hp-det-gpio = <&gpf 21 GPIO_ACTIVE_HIGH>;
+		simple-audio-card,aux-devs = <&speaker_amp>, <&headphones_amp>;
+
+		simple-audio-card,bitclock-master = <&dai_codec>;
+		simple-audio-card,frame-master = <&dai_codec>;
+
+		dai_cpu: simple-audio-card,cpu {
+			sound-dai = <&aic>;
+		};
+
+		dai_codec: simple-audio-card,codec {
+			sound-dai = <&codec>;
+		};
+	};
+
+	rumble {
+		compatible = "pwm-vibrator";
+		pwms = <&pwm 4 2000000 0>;
+		pwm-names = "enable";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_pwm4>;
+	};
+
+	backlight: backlight {
+		compatible = "pwm-backlight";
+		pwms = <&pwm 1 40000 0>;
+		power-supply = <&vcc>;
+
+		brightness-levels = <0 16 32 48 64 80 96 112 128
+				     144 160 176 192 208 224 240 255>;
+		default-brightness-level = <12>;
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_pwm1>;
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		autorepeat;
+
+		button@0 {
+			label = "D-pad up";
+			linux,code = <KEY_UP>;
+			linux,can-disable;
+			gpios = <&gpe 21 GPIO_ACTIVE_LOW>;
+		};
+
+		button@1 {
+			label = "D-pad down";
+			linux,code = <KEY_DOWN>;
+			linux,can-disable;
+			gpios = <&gpe 25 GPIO_ACTIVE_LOW>;
+		};
+
+		button@2 {
+			label = "D-pad left";
+			linux,code = <KEY_LEFT>;
+			linux,can-disable;
+			gpios = <&gpe 23 GPIO_ACTIVE_LOW>;
+		};
+
+		button@3 {
+			label = "D-pad right";
+			linux,code = <KEY_RIGHT>;
+			linux,can-disable;
+			gpios = <&gpe 24 GPIO_ACTIVE_LOW>;
+		};
+
+		button@4 {
+			label = "Button A";
+			linux,code = <KEY_LEFTCTRL>;
+			linux,can-disable;
+			gpios = <&gpe 29 GPIO_ACTIVE_LOW>;
+		};
+
+		button@5 {
+			label = "Button B";
+			linux,code = <KEY_LEFTALT>;
+			linux,can-disable;
+			gpios = <&gpe 20 GPIO_ACTIVE_LOW>;
+		};
+
+		button@6 {
+			label = "Button Y";
+			linux,code = <KEY_SPACE>;
+			linux,can-disable;
+			gpios = <&gpe 27 GPIO_ACTIVE_LOW>;
+		};
+
+		button@7 {
+			label = "Button X";
+			linux,code = <KEY_LEFTSHIFT>;
+			linux,can-disable;
+			gpios = <&gpe 28 GPIO_ACTIVE_LOW>;
+		};
+
+		button@8 {
+			label = "Left shoulder button";
+			linux,code = <KEY_TAB>;
+			linux,can-disable;
+			gpios = <&gpb 20 GPIO_ACTIVE_LOW>;
+		};
+
+		button@9 {
+			label = "Right shoulder button";
+			linux,code = <KEY_BACKSPACE>;
+			linux,can-disable;
+			gpios = <&gpe 26 GPIO_ACTIVE_LOW>;
+		};
+
+		button@10 {
+			label = "Start button";
+			linux,code = <KEY_ENTER>;
+			linux,can-disable;
+			gpios = <&gpb 21 GPIO_ACTIVE_LOW>;
+		};
+
+		button@11 {
+			label = "Select button";
+			linux,code = <KEY_ESC>;
+			linux,can-disable;
+			/*
+			 * This is the only button that is active high,
+			 * since it doubles as BOOT_SEL1.
+			 */
+			gpios = <&gpd 18 GPIO_ACTIVE_HIGH>;
+		};
+
+		button@12 {
+			label = "Power slider";
+			linux,code = <KEY_POWER>;
+			linux,can-disable;
+			gpios = <&gpa 30 GPIO_ACTIVE_LOW>;
+			wakeup-source;
+		};
+
+		button@13 {
+			label = "Power hold";
+			linux,code = <KEY_PAUSE>;
+			linux,can-disable;
+			gpios = <&gpf 11 GPIO_ACTIVE_LOW>;
+		};
+	};
+
+	i2c3: i2c-controller@3 {
+		compatible = "i2c-gpio";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		sda-gpios = <&gpd 5 GPIO_ACTIVE_HIGH>;
+		scl-gpios = <&gpd 4 GPIO_ACTIVE_HIGH>;
+		i2c-gpio,delay-us = <2>; /* 250 kHz */
+
+		act8600: pmic@5a {
+			compatible = "active-semi,act8600";
+			reg = <0x5a>;
+
+			regulators {
+				/* USB OTG */
+				otg_vbus: SUDCDC_REG4 {
+					/*
+					 * 5.3V instead of 5.0V to compensate
+					 * for the voltage drop of a diode
+					 * between the regulator and the
+					 * connector.
+					 */
+					regulator-min-microvolt = <5300000>;
+					regulator-max-microvolt = <5300000>;
+					inl-supply = <&vcc>;
+				};
+
+				/*
+				 * When this is off, there is no sound, but also
+				 * no USB networking.
+				 */
+				ldo5: LDO5 {
+					regulator-min-microvolt = <2500000>;
+					regulator-max-microvolt = <2500000>;
+					inl-supply = <&vcc>;
+				};
+
+				/* LCD panel and FM radio */
+				ldo6: LDO6 {
+					regulator-min-microvolt = <3300000>;
+					regulator-max-microvolt = <3300000>;
+					inl-supply = <&vcc>;
+				};
+
+				/* ??? */
+				LDO7 {
+					regulator-min-microvolt = <3300000>;
+					regulator-max-microvolt = <3300000>;
+					/*regulator-always-on;*/
+					inl-supply = <&vcc>;
+				};
+
+				/*
+				 * The colors on the LCD are wrong when this is
+				 * off. Which is strange, since the LCD panel
+				 * data sheet only mentions a 3.3V input.
+				 */
+				LDO8 {
+					regulator-min-microvolt = <1800000>;
+					regulator-max-microvolt = <1800000>;
+					regulator-always-on;
+					inl-supply = <&vcc>;
+				};
+
+				/* RTC fixed 3.3V */
+				LDO_REG9 {
+					regulator-min-microvolt = <3300000>;
+					regulator-max-microvolt = <3300000>;
+					regulator-always-on;
+					inl-supply = <&vcc>;
+				};
+
+				/* Unused fixed 1.2V */
+				LDO_REG10 {
+					inl-supply = <&vcc>;
+				};
+			};
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		led {
+			gpios = <&gpb 30 GPIO_ACTIVE_LOW>;
+			default-state = "on";
+		};
+	};
+
+	spi {
+		compatible = "spi-gpio";
 		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
+		#size-cells = <0>;
+
+		sck-gpios = <&gpe 15 GPIO_ACTIVE_HIGH>;
+		mosi-gpios = <&gpe 17 GPIO_ACTIVE_HIGH>;
+		cs-gpios = <&gpe 16 GPIO_ACTIVE_HIGH>;
+		num-chipselects = <1>;
+
+		nt39016@0 {
+			compatible = "kingdisplay,kd035g6-54nt";
+			reg = <0>;
+
+			spi-max-frequency = <3125000>;
+			spi-3wire;
+			spi-cs-high;
 
-		otg_phy: otg-phy {
-			compatible = "usb-nop-xceiv";
-			clocks = <&cgu JZ4770_CLK_OTG_PHY>;
-			clock-names = "main_clk";
+			reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>;
+
+			backlight = <&backlight>;
+			power-supply = <&ldo6>;
+
+			port {
+				panel_input: endpoint {
+					remote-endpoint = <&panel_output>;
+				};
+			};
+		};
+	};
+
+	connector {
+		compatible = "gpio-usb-b-connector", "usb-b-connector";
+		label = "mini-USB";
+		type = "mini";
+
+		/*
+		 * USB OTG is not yet working reliably, the ID detection
+		 * mechanism tends to fry easily for unknown reasons.
+		 * Until this is fixed, disable OTG by not providing the
+		 * ID GPIO to the driver.
+		 */
+		//id-gpios = <&gpf 18 GPIO_ACTIVE_LOW>;
+
+		vbus-gpios = <&gpb 5 GPIO_ACTIVE_HIGH>;
+		vbus-supply = <&otg_vbus>;
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_otg>;
+
+		port {
+			usb_ep: endpoint {
+				remote-endpoint = <&usb_otg_ep>;
+			};
 		};
 	};
 };
@@ -37,24 +391,86 @@
 	clock-frequency = <12000000>;
 };
 
+&pinctrl {
+	pins_lcd: lcd {
+		function = "lcd";
+		groups = "lcd-24bit";
+	};
+
+	pins_uart2: uart2 {
+		function = "uart2";
+		groups = "uart2-data";
+	};
+
+	pins_mmc0: mmc0 {
+		function = "mmc0";
+		groups = "mmc0-1bit-a", "mmc0-4bit-a";
+	};
+
+	pins_mmc1: mmc1 {
+		function = "mmc1";
+		groups = "mmc1-1bit-d", "mmc1-4bit-d";
+	};
+
+	pins_otg: otg {
+		otg-vbus-pin {
+			function = "otg";
+			groups = "otg-vbus";
+		};
+
+		vbus-pin {
+			pins = "PB5";
+			bias-disable;
+		};
+	};
+
+	pins_pwm1: pwm1 {
+		function = "pwm1";
+		groups = "pwm1";
+	};
+
+	pins_pwm4: pwm4 {
+		function = "pwm4";
+		groups = "pwm4";
+	};
+};
+
 &uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_uart2>;
+
 	status = "okay";
 };
 
 &cgu {
-	/* Put high-speed peripherals under PLL1, such that we can change the
+	/*
+	 * Put high-speed peripherals under PLL1, such that we can change the
 	 * PLL0 frequency on demand without having to suspend peripherals.
 	 * We use a rate of 432 MHz, which is the least common multiple of
 	 * 27 MHz (required by TV encoder) and 48 MHz (required by USB host).
+	 * Put the GPU under PLL0 since we want a higher frequency.
+	 * Use the 32 kHz oscillator as the parent of the RTC for a higher
+	 * precision.
 	 */
 	assigned-clocks =
 		<&cgu JZ4770_CLK_PLL1>,
-		<&cgu JZ4770_CLK_UHC>;
+		<&cgu JZ4770_CLK_GPU>,
+		<&cgu JZ4770_CLK_RTC>,
+		<&cgu JZ4770_CLK_UHC>,
+		<&cgu JZ4770_CLK_LPCLK_MUX>,
+		<&cgu JZ4770_CLK_MMC0_MUX>,
+		<&cgu JZ4770_CLK_MMC1_MUX>;
 	assigned-clock-parents =
 		<0>,
+		<&cgu JZ4770_CLK_PLL0>,
+		<&cgu JZ4770_CLK_OSC32K>,
+		<&cgu JZ4770_CLK_PLL1>,
+		<&cgu JZ4770_CLK_PLL1>,
+		<&cgu JZ4770_CLK_PLL1>,
 		<&cgu JZ4770_CLK_PLL1>;
 	assigned-clock-rates =
-		<432000000>;
+		<432000000>,
+		<600000000>;
 };
 
 &uhc {
@@ -63,10 +479,69 @@
 };
 
 &tcu {
-	/* 750 kHz for the system timer and clocksource */
-	assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>;
-	assigned-clock-rates = <750000>, <750000>;
+	/*
+	 * 750 kHz for the system timer and clocksource, 12 MHz for the OST,
+	 * and use RTC as the parent for the watchdog clock
+	 */
+	assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>,
+			  <&tcu TCU_CLK_OST>, <&tcu TCU_CLK_WDT>;
+	assigned-clock-parents = <0>, <0>, <0>, <&cgu JZ4770_CLK_RTC>;
+	assigned-clock-rates = <750000>, <750000>, <12000000>;
 
-	/* PWM1 is in use, so reserve channel #2 for the clocksource */
+	/* PWM1 is in use, so use channel #2 for the clocksource */
 	ingenic,pwm-channels-mask = <0xfa>;
 };
+
+&usb_otg {
+	port {
+		usb_otg_ep: endpoint {
+			remote-endpoint = <&usb_ep>;
+		};
+	};
+};
+
+&otg_phy {
+	vcc-supply = <&ldo5>;
+};
+
+&rtc {
+	clocks = <&cgu JZ4770_CLK_RTC>;
+	clock-names = "rtc";
+
+	system-power-controller;
+};
+
+&mmc0 {
+	status = "okay";
+
+	bus-width = <4>;
+	max-frequency = <48000000>;
+	vmmc-supply = <&vcc>;
+	non-removable;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_mmc0>;
+};
+
+&mmc1 {
+	status = "okay";
+
+	bus-width = <4>;
+	max-frequency = <48000000>;
+	cd-gpios = <&gpb 2 GPIO_ACTIVE_LOW>;
+	vmmc-supply = <&mmc1_power>;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_mmc1>;
+};
+
+&lcd {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_lcd>;
+
+	port {
+		panel_output: endpoint {
+			remote-endpoint = <&panel_input>;
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/ingenic/gcw0_proto.dts b/arch/mips/boot/dts/ingenic/gcw0_proto.dts
new file mode 100644
index 000000000000..02df22f8ae0f
--- /dev/null
+++ b/arch/mips/boot/dts/ingenic/gcw0_proto.dts
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/dts-v1/;
+
+#include "gcw0.dts"
+
+/ {
+	model = "GCW Zero Prototype";
+};
+
+&memory {
+	/* Prototype has only 256 MiB of RAM */
+	reg = <0x0 0x10000000>;
+};
diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index a3301bab9231..1520585c235c 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -55,10 +55,10 @@
 
 		#clock-cells = <1>;
 
-		clocks = <&cgu JZ4740_CLK_RTC
-			  &cgu JZ4740_CLK_EXT
-			  &cgu JZ4740_CLK_PCLK
-			  &cgu JZ4740_CLK_TCU>;
+		clocks = <&cgu JZ4740_CLK_RTC>,
+			 <&cgu JZ4740_CLK_EXT>,
+			 <&cgu JZ4740_CLK_PCLK>,
+			 <&cgu JZ4740_CLK_TCU>;
 		clock-names = "rtc", "ext", "pclk", "tcu";
 
 		interrupt-controller;
@@ -74,6 +74,20 @@
 			clocks = <&tcu TCU_CLK_WDT>;
 			clock-names = "wdt";
 		};
+
+		pwm: pwm@40 {
+			compatible = "ingenic,jz4740-pwm";
+			reg = <0x40 0x80>;
+
+			#pwm-cells = <3>;
+
+			clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>,
+				 <&tcu TCU_CLK_TIMER2>, <&tcu TCU_CLK_TIMER3>,
+				 <&tcu TCU_CLK_TIMER4>, <&tcu TCU_CLK_TIMER5>,
+				 <&tcu TCU_CLK_TIMER6>, <&tcu TCU_CLK_TIMER7>;
+			clock-names = "timer0", "timer1", "timer2", "timer3",
+				      "timer4", "timer5", "timer6", "timer7";
+		};
 	};
 
 	rtc_dev: rtc@10003000 {
@@ -241,10 +255,10 @@
 		reg = <0x13010000 0x54>;
 		#address-cells = <2>;
 		#size-cells = <1>;
-		ranges = <1 0 0x18000000 0x4000000
-			  2 0 0x14000000 0x4000000
-			  3 0 0x0c000000 0x4000000
-			  4 0 0x08000000 0x4000000>;
+		ranges = <1 0 0x18000000 0x4000000>,
+			 <2 0 0x14000000 0x4000000>,
+			 <3 0 0x0c000000 0x4000000>,
+			 <4 0 0x08000000 0x4000000>;
 
 		clocks = <&cgu JZ4740_CLK_MCLK>;
 	};
@@ -258,8 +272,7 @@
 
 	dmac: dma-controller@13020000 {
 		compatible = "ingenic,jz4740-dma";
-		reg = <0x13020000 0xbc
-		       0x13020300 0x14>;
+		reg = <0x13020000 0xbc>, <0x13020300 0x14>;
 		#dma-cells = <2>;
 
 		interrupt-parent = <&intc>;
diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi
index 0bfb9edff3d0..fa11ac950499 100644
--- a/arch/mips/boot/dts/ingenic/jz4770.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <dt-bindings/clock/jz4770-cgu.h>
+#include <dt-bindings/clock/ingenic,tcu.h>
 
 / {
 	#address-cells = <1>;
@@ -37,13 +38,25 @@
 	};
 
 	cgu: jz4770-cgu@10000000 {
-		compatible = "ingenic,jz4770-cgu";
+		compatible = "ingenic,jz4770-cgu", "simple-mfd";
 		reg = <0x10000000 0x100>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x0 0x10000000 0x100>;
 
 		clocks = <&ext>, <&osc32k>;
 		clock-names = "ext", "osc32k";
 
 		#clock-cells = <1>;
+
+		otg_phy: usb-phy@3c {
+			compatible = "ingenic,jz4770-phy";
+			reg = <0x3c 0x10>;
+
+			clocks = <&cgu JZ4770_CLK_OTG_PHY>;
+
+			#phy-cells = <0>;
+		};
 	};
 
 	tcu: timer@10002000 {
@@ -55,9 +68,9 @@
 
 		#clock-cells = <1>;
 
-		clocks = <&cgu JZ4770_CLK_RTC
-			  &cgu JZ4770_CLK_EXT
-			  &cgu JZ4770_CLK_PCLK>;
+		clocks = <&cgu JZ4770_CLK_RTC>,
+			 <&cgu JZ4770_CLK_EXT>,
+			 <&cgu JZ4770_CLK_PCLK>;
 		clock-names = "rtc", "ext", "pclk";
 
 		interrupt-controller;
@@ -65,6 +78,47 @@
 
 		interrupt-parent = <&intc>;
 		interrupts = <27 26 25>;
+
+		watchdog: watchdog@0 {
+			compatible = "ingenic,jz4770-watchdog",
+				     "ingenic,jz4740-watchdog";
+			reg = <0x0 0xc>;
+
+			clocks = <&tcu TCU_CLK_WDT>;
+			clock-names = "wdt";
+		};
+
+		pwm: pwm@40 {
+			compatible = "ingenic,jz4770-pwm", "ingenic,jz4740-pwm";
+			reg = <0x40 0x80>;
+
+			#pwm-cells = <3>;
+
+			clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>,
+				 <&tcu TCU_CLK_TIMER2>, <&tcu TCU_CLK_TIMER3>,
+				 <&tcu TCU_CLK_TIMER4>, <&tcu TCU_CLK_TIMER5>,
+				 <&tcu TCU_CLK_TIMER6>, <&tcu TCU_CLK_TIMER7>;
+			clock-names = "timer0", "timer1", "timer2", "timer3",
+				      "timer4", "timer5", "timer6", "timer7";
+		};
+
+		ost: timer@e0 {
+			compatible = "ingenic,jz4770-ost";
+			reg = <0xe0 0x20>;
+
+			clocks = <&tcu TCU_CLK_OST>;
+			clock-names = "ost";
+
+			interrupts = <15>;
+		};
+	};
+
+	rtc: rtc@10003000 {
+		compatible = "ingenic,jz4770-rtc", "ingenic,jz4760-rtc";
+		reg = <0x10003000 0x40>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <32>;
 	};
 
 	pinctrl: pin-controller@10010000 {
@@ -165,6 +219,93 @@
 		};
 	};
 
+	aic: audio-controller@10020000 {
+		compatible = "ingenic,jz4770-i2s";
+		reg = <0x10020000 0x94>;
+
+		#sound-dai-cells = <0>;
+
+		clocks = <&cgu JZ4770_CLK_AIC>, <&cgu JZ4770_CLK_I2S>,
+			 <&cgu JZ4770_CLK_EXT>, <&cgu JZ4770_CLK_PLL0>;
+		clock-names = "aic", "i2s", "ext", "pll half";
+
+		interrupt-parent = <&intc>;
+		interrupts = <34>;
+
+		dmas = <&dmac0 25 0xffffffff>, <&dmac0 24 0xffffffff>;
+		dma-names = "rx", "tx";
+	};
+
+	codec: audio-codec@100200a0 {
+		compatible = "ingenic,jz4770-codec";
+		reg = <0x100200a4 0x8>;
+
+		#sound-dai-cells = <0>;
+
+		clocks = <&cgu JZ4770_CLK_AIC>;
+		clock-names = "aic";
+	};
+
+	mmc0: mmc@10021000 {
+		compatible = "ingenic,jz4770-mmc", "ingenic,jz4760-mmc";
+		reg = <0x10021000 0x1000>;
+
+		clocks = <&cgu JZ4770_CLK_MMC0>;
+		clock-names = "mmc";
+
+		interrupt-parent = <&intc>;
+		interrupts = <37>;
+
+		dmas = <&dmac1 27 0xffffffff>, <&dmac1 26 0xffffffff>;
+		dma-names = "rx", "tx";
+
+		cap-sd-highspeed;
+		cap-mmc-highspeed;
+		cap-sdio-irq;
+
+		status = "disabled";
+	};
+
+	mmc1: mmc@10022000 {
+		compatible = "ingenic,jz4770-mmc", "ingenic,jz4760-mmc";
+		reg = <0x10022000 0x1000>;
+
+		clocks = <&cgu JZ4770_CLK_MMC1>;
+		clock-names = "mmc";
+
+		interrupt-parent = <&intc>;
+		interrupts = <36>;
+
+		dmas = <&dmac1 31 0xffffffff>, <&dmac1 30 0xffffffff>;
+		dma-names = "rx", "tx";
+
+		cap-sd-highspeed;
+		cap-mmc-highspeed;
+		cap-sdio-irq;
+
+		status = "disabled";
+	};
+
+	mmc2: mmc@10023000 {
+		compatible = "ingenic,jz4770-mmc", "ingenic,jz4760-mmc";
+		reg = <0x10023000 0x1000>;
+
+		clocks = <&cgu JZ4770_CLK_MMC2>;
+		clock-names = "mmc";
+
+		interrupt-parent = <&intc>;
+		interrupts = <35>;
+
+		dmas = <&dmac1 37 0xffffffff>, <&dmac1 36 0xffffffff>;
+		dma-names = "rx", "tx";
+
+		cap-sd-highspeed;
+		cap-mmc-highspeed;
+		cap-sdio-irq;
+
+		status = "disabled";
+	};
+
 	uart0: serial@10030000 {
 		compatible = "ingenic,jz4770-uart";
 		reg = <0x10030000 0x100>;
@@ -217,34 +358,63 @@
 		status = "disabled";
 	};
 
+	adc: adc@10070000 {
+		compatible = "ingenic,jz4770-adc";
+		reg = <0x10070000 0x30>;
+
+		#io-channel-cells = <1>;
+
+		clocks = <&cgu JZ4770_CLK_ADC>;
+		clock-names = "adc";
+
+		interrupt-parent = <&intc>;
+		interrupts = <18>;
+	};
+
+	gpu: gpu@13040000 {
+		compatible = "vivante,gc";
+		reg = <0x13040000 0x10000>;
+
+		clocks = <&cgu JZ4770_CLK_GPU>,
+			 <&cgu JZ4770_CLK_GPU>,
+			 <&cgu JZ4770_CLK_GPU>;
+		clock-names = "bus", "core", "shader";
+
+		interrupt-parent = <&intc>;
+		interrupts = <6>;
+	};
+
+	lcd: lcd-controller@13050000 {
+		compatible = "ingenic,jz4770-lcd";
+		reg = <0x13050000 0x300>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <31>;
+
+		clocks = <&cgu JZ4770_CLK_LPCLK_MUX>;
+		clock-names = "lcd_pclk";
+	};
+
 	dmac0: dma-controller@13420000 {
 		compatible = "ingenic,jz4770-dma";
-		reg = <0x13420000 0xC0
-		       0x13420300 0x20>;
+		reg = <0x13420000 0xC0>, <0x13420300 0x20>;
 
-		#dma-cells = <1>;
+		#dma-cells = <2>;
 
 		clocks = <&cgu JZ4770_CLK_DMA>;
 		interrupt-parent = <&intc>;
 		interrupts = <24>;
-
-		/* Disable dmac0 until we have something that uses it */
-		status = "disabled";
 	};
 
 	dmac1: dma-controller@13420100 {
 		compatible = "ingenic,jz4770-dma";
-		reg = <0x13420100 0xC0
-		       0x13420400 0x20>;
+		reg = <0x13420100 0xC0>, <0x13420400 0x20>;
 
-		#dma-cells = <1>;
+		#dma-cells = <2>;
 
 		clocks = <&cgu JZ4770_CLK_DMA>;
 		interrupt-parent = <&intc>;
 		interrupts = <23>;
-
-		/* Disable dmac1 until we have something that uses it */
-		status = "disabled";
 	};
 
 	uhc: uhc@13430000 {
@@ -260,4 +430,29 @@
 
 		status = "disabled";
 	};
+
+	usb_otg: usb@13440000 {
+		compatible = "ingenic,jz4770-musb";
+		reg = <0x13440000 0x10000>;
+
+		clocks = <&cgu JZ4770_CLK_OTG>;
+		clock-names = "udc";
+
+		interrupt-parent = <&intc>;
+		interrupts = <21>;
+		interrupt-names = "mc";
+
+		phys = <&otg_phy>;
+
+		usb-role-switch;
+	};
+
+	rom: memory@1fc00000 {
+		compatible = "mtd-rom";
+		probe-type = "map_rom";
+		reg = <0x1fc00000 0x2000>;
+
+		bank-width = <4>;
+		device-width = <1>;
+	};
 };
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index bb89653d16a3..b7f409a7cf5d 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -58,9 +58,9 @@
 
 		#clock-cells = <1>;
 
-		clocks = <&cgu JZ4780_CLK_RTCLK
-			  &cgu JZ4780_CLK_EXCLK
-			  &cgu JZ4780_CLK_PCLK>;
+		clocks = <&cgu JZ4780_CLK_RTCLK>,
+			 <&cgu JZ4780_CLK_EXCLK>,
+			 <&cgu JZ4780_CLK_PCLK>;
 		clock-names = "rtc", "ext", "pclk";
 
 		interrupt-controller;
@@ -76,6 +76,30 @@
 			clocks = <&tcu TCU_CLK_WDT>;
 			clock-names = "wdt";
 		};
+
+		pwm: pwm@40 {
+			compatible = "ingenic,jz4780-pwm", "ingenic,jz4740-pwm";
+			reg = <0x40 0x80>;
+
+			#pwm-cells = <3>;
+
+			clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>,
+				 <&tcu TCU_CLK_TIMER2>, <&tcu TCU_CLK_TIMER3>,
+				 <&tcu TCU_CLK_TIMER4>, <&tcu TCU_CLK_TIMER5>,
+				 <&tcu TCU_CLK_TIMER6>, <&tcu TCU_CLK_TIMER7>;
+			clock-names = "timer0", "timer1", "timer2", "timer3",
+				      "timer4", "timer5", "timer6", "timer7";
+		};
+
+		ost: timer@e0 {
+			compatible = "ingenic,jz4780-ost", "ingenic,jz4770-ost";
+			reg = <0xe0 0x20>;
+
+			clocks = <&tcu TCU_CLK_OST>;
+			clock-names = "ost";
+
+			interrupts = <15>;
+		};
 	};
 
 	rtc_dev: rtc@10003000 {
@@ -196,8 +220,7 @@
 		gpio-miso = <&gpe 14 0>;
 		gpio-sck = <&gpe 15 0>;
 		gpio-mosi = <&gpe 17 0>;
-		cs-gpios = <&gpe 16 0
-			    &gpe 18 0>;
+		cs-gpios = <&gpe 16 0>, <&gpe 18 0>;
 
 		spidev@0 {
 			compatible = "spidev";
@@ -358,26 +381,40 @@
 	};
 
 	nemc: nemc@13410000 {
-		compatible = "ingenic,jz4780-nemc";
+		compatible = "ingenic,jz4780-nemc", "simple-mfd";
 		reg = <0x13410000 0x10000>;
 		#address-cells = <2>;
 		#size-cells = <1>;
-		ranges = <1 0 0x1b000000 0x1000000
-			  2 0 0x1a000000 0x1000000
-			  3 0 0x19000000 0x1000000
-			  4 0 0x18000000 0x1000000
-			  5 0 0x17000000 0x1000000
-			  6 0 0x16000000 0x1000000>;
+		ranges = <0 0 0x13410000 0x10000>,
+			 <1 0 0x1b000000 0x1000000>,
+			 <2 0 0x1a000000 0x1000000>,
+			 <3 0 0x19000000 0x1000000>,
+			 <4 0 0x18000000 0x1000000>,
+			 <5 0 0x17000000 0x1000000>,
+			 <6 0 0x16000000 0x1000000>;
 
 		clocks = <&cgu JZ4780_CLK_NEMC>;
 
 		status = "disabled";
+
+		efuse: efuse@d0 {
+			reg = <0 0xd0 0x30>;
+			compatible = "ingenic,jz4780-efuse";
+
+			clocks = <&cgu JZ4780_CLK_AHB2>;
+
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			eth0_addr: eth-mac-addr@0x22 {
+				reg = <0x22 0x6>;
+			};
+		};
 	};
 
 	dma: dma@13420000 {
 		compatible = "ingenic,jz4780-dma";
-		reg = <0x13420000 0x400
-		       0x13421000 0x40>;
+		reg = <0x13420000 0x400>, <0x13421000 0x40>;
 		#dma-cells = <2>;
 
 		interrupt-parent = <&intc>;
diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi
index 147f7d5c243a..59a63a0985a8 100644
--- a/arch/mips/boot/dts/ingenic/x1000.dtsi
+++ b/arch/mips/boot/dts/ingenic/x1000.dtsi
@@ -58,9 +58,9 @@
 
 		#clock-cells = <1>;
 
-		clocks = <&cgu X1000_CLK_RTCLK
-			  &cgu X1000_CLK_EXCLK
-			  &cgu X1000_CLK_PCLK>;
+		clocks = <&cgu X1000_CLK_RTCLK>,
+			 <&cgu X1000_CLK_EXCLK>,
+			 <&cgu X1000_CLK_PCLK>;
 		clock-names = "rtc", "ext", "pclk";
 
 		interrupt-controller;
@@ -239,8 +239,7 @@
 
 	pdma: dma-controller@13420000 {
 		compatible = "ingenic,x1000-dma";
-		reg = <0x13420000 0x400
-			   0x13421000 0x40>;
+		reg = <0x13420000 0x400>, <0x13421000 0x40>;
 		#dma-cells = <2>;
 
 		interrupt-parent = <&intc>;
diff --git a/arch/mips/boot/dts/loongson/rs780e-pch.dtsi b/arch/mips/boot/dts/loongson/rs780e-pch.dtsi
index 45c54d555fa4..d0d5d60a8697 100644
--- a/arch/mips/boot/dts/loongson/rs780e-pch.dtsi
+++ b/arch/mips/boot/dts/loongson/rs780e-pch.dtsi
@@ -9,6 +9,18 @@
 				0 0x40000000 0 0x40000000 0 0x40000000
 				0xfd 0xfe000000 0xfd 0xfe000000  0 0x2000000 /* PCI Config Space */>;
 
+		pci@1a000000 {
+			compatible = "loongson,rs780e-pci";
+			device_type = "pci";
+			#address-cells = <3>;
+			#size-cells = <2>;
+
+			reg = <0 0x1a000000 0 0x02000000>;
+
+			ranges = <0x01000000 0 0x00004000 0 0x18004000 0 0x00004000>,
+				 <0x02000000 0 0x40000000 0 0x40000000 0 0x40000000>;
+		};
+
 		isa {
 			compatible = "isa";
 			#address-cells = <2>;
@@ -21,6 +33,11 @@
 				interrupts = <8>;
 				interrupt-parent = <&htpic>;
 			};
+
+			acpi@800 {
+				compatible = "loongson,rs780e-acpi";
+				reg = <1 0x800 0x100>;
+			};
 		};
 	};
 };
diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
index 797d336db54d..f94e8a02ed06 100644
--- a/arch/mips/boot/dts/mscc/ocelot.dtsi
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -214,7 +214,7 @@
 
 			miim1: miim1 {
 				pins = "GPIO_14", "GPIO_15";
-				function = "miim1";
+				function = "miim";
 			};
 
 		};
diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi
index 8f5aed760abb..83b3c0ce135a 100644
--- a/arch/mips/boot/dts/qca/ar9331.dtsi
+++ b/arch/mips/boot/dts/qca/ar9331.dtsi
@@ -59,7 +59,7 @@
 				#qca,ddr-wb-channel-cells = <1>;
 			};
 
-			uart: uart@18020000 {
+			uart: serial@18020000 {
 				compatible = "qca,ar9330-uart";
 				reg = <0x18020000 0x14>;
 
diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
index 0f2b20044834..7695d326df11 100644
--- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
+++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
@@ -3,6 +3,7 @@
 
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
 
 #include "ar9331.dtsi"
 
@@ -22,8 +23,9 @@
 	leds {
 		compatible = "gpio-leds";
 
-		system {
-			label = "dpt-module:green:system";
+		led-0 {
+			function = LED_FUNCTION_STATUS;
+			color = <LED_COLOR_ID_GREEN>;
 			gpios = <&gpio 27 GPIO_ACTIVE_LOW>;
 			default-state = "off";
 		};
diff --git a/arch/mips/cavium-octeon/Platform b/arch/mips/cavium-octeon/Platform
index 45be853700e6..4adef38dea9d 100644
--- a/arch/mips/cavium-octeon/Platform
+++ b/arch/mips/cavium-octeon/Platform
@@ -1,7 +1,6 @@
 #
 # Cavium Octeon
 #
-platform-$(CONFIG_CAVIUM_OCTEON_SOC)	+= cavium-octeon/
 cflags-$(CONFIG_CAVIUM_OCTEON_SOC)	+=				\
 		-I$(srctree)/arch/mips/include/asm/mach-cavium-octeon
 load-$(CONFIG_CAVIUM_OCTEON_SOC)	+= 0xffffffff81100000
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c
index d1ed066e1a17..8c8ea139653e 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-md5.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c
@@ -25,7 +25,6 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <asm/byteorder.h>
-#include <linux/cryptohash.h>
 #include <asm/octeon/octeon.h>
 #include <crypto/internal/hash.h>
 
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c b/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c
index cc94cfa545b4..cb210d2ef0c4 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c
@@ -59,18 +59,6 @@ int __cvmx_helper_npi_probe(int interface)
 		 && !OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
 		/* The packet engines didn't exist before pass 2 */
 		return 4;
-#if 0
-	/*
-	 * Technically CN30XX, CN31XX, and CN50XX contain packet
-	 * engines, but nobody ever uses them. Since this is the case,
-	 * we disable them here.
-	 */
-	else if (OCTEON_IS_MODEL(OCTEON_CN31XX)
-		 || OCTEON_IS_MODEL(OCTEON_CN50XX))
-		return 2;
-	else if (OCTEON_IS_MODEL(OCTEON_CN30XX))
-		return 1;
-#endif
 #endif
 	return 0;
 }
diff --git a/arch/mips/cavium-octeon/executive/cvmx-pko.c b/arch/mips/cavium-octeon/executive/cvmx-pko.c
index b077597c668a..b0efc35e95c4 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-pko.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-pko.c
@@ -489,7 +489,7 @@ cvmx_pko_status_t cvmx_pko_config_port(uint64_t port, uint64_t base_queue,
 				config.s.qos_mask = 0xff;
 				break;
 			}
-			/* fall through - to the error case, when Pass 1 */
+			fallthrough;	/* to the error case, when Pass 1 */
 		default:
 			cvmx_dprintf("ERROR: cvmx_pko_config_port: Invalid "
 				     "priority %llu\n",
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index 51685f893eab..d56e9b9d2e43 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -141,7 +141,7 @@ static void octeon2_usb_clocks_start(struct device *dev)
 	default:
 		pr_err("Invalid UCTL clock rate of %u, using 12000000 instead\n",
 			clock_rate);
-		/* Fall through */
+		fallthrough;
 	case 12000000:
 		clk_rst_ctl.s.p_refclk_div = 0;
 		break;
@@ -1116,7 +1116,7 @@ end_led:
 				new_f[0] = cpu_to_be32(48000000);
 				fdt_setprop_inplace(initial_boot_params, usbn,
 						    "refclk-frequency",  new_f, sizeof(new_f));
-				/* Fall through ...*/
+				fallthrough;
 			case USB_CLOCK_TYPE_REF_12:
 				/* Missing "refclk-type" defaults to external. */
 				fdt_nop_property(initial_boot_params, usbn, "refclk-type");
diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c
index cc88a08bc1f7..1fd85c559700 100644
--- a/arch/mips/cavium-octeon/octeon-usb.c
+++ b/arch/mips/cavium-octeon/octeon-usb.c
@@ -398,7 +398,7 @@ static int dwc3_octeon_clocks_start(struct device *dev, u64 base)
 	default:
 		dev_err(dev, "Invalid ref_clk %u, using 100000000 instead\n",
 			clock_rate);
-		/* fall through */
+		fallthrough;
 	case 100000000:
 		mpll_mul = 0x19;
 		if (ref_clk_sel < 2)
diff --git a/arch/mips/cobalt/Platform b/arch/mips/cobalt/Platform
index 34123efd6dfe..4254895ad6f4 100644
--- a/arch/mips/cobalt/Platform
+++ b/arch/mips/cobalt/Platform
@@ -1,6 +1,5 @@
 #
 # Cobalt Server
 #
-platform-$(CONFIG_MIPS_COBALT)	+= cobalt/
 cflags-$(CONFIG_MIPS_COBALT)	+= -I$(srctree)/arch/mips/include/asm/mach-cobalt
 load-$(CONFIG_MIPS_COBALT)	+= 0xffffffff80080000
diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig
index 3d14d67dc746..96622a2ad333 100644
--- a/arch/mips/configs/ath79_defconfig
+++ b/arch/mips/configs/ath79_defconfig
@@ -46,7 +46,6 @@ CONFIG_MTD_JEDECPROBE=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_PHYSMAP=y
-CONFIG_MTD_M25P80=y
 CONFIG_MTD_SPI_NOR=y
 CONFIG_NETDEVICES=y
 CONFIG_ATH9K=m
diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig
index 0db0088bbc1c..e511fe0243a5 100644
--- a/arch/mips/configs/ci20_defconfig
+++ b/arch/mips/configs/ci20_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_MODULES=y
 CONFIG_KERNEL_XZ=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -29,6 +28,7 @@ CONFIG_HIGHMEM=y
 CONFIG_HZ_100=y
 # CONFIG_SECCOMP is not set
 # CONFIG_SUSPEND is not set
+CONFIG_MODULES=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 # CONFIG_COMPACTION is not set
 CONFIG_CMA=y
@@ -38,17 +38,12 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 # CONFIG_FW_LOADER is not set
 # CONFIG_ALLOW_DEV_COREDUMP is not set
-CONFIG_DMA_CMA=y
-CONFIG_CMA_SIZE_MBYTES=32
 CONFIG_MTD=y
 CONFIG_MTD_RAW_NAND=y
 CONFIG_MTD_NAND_JZ4780=y
@@ -72,9 +67,8 @@ CONFIG_DM9000_FORCE_SIMPLE_PHY_POLL=y
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_WLAN is not set
-# CONFIG_INPUT_KEYBOARD is not set
+CONFIG_KEYBOARD_GPIO=m
 # CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
 CONFIG_VT_HW_CONSOLE_BINDING=y
 CONFIG_LEGACY_PTY_COUNT=2
 CONFIG_SERIAL_8250=y
@@ -89,7 +83,7 @@ CONFIG_I2C_JZ4780=y
 CONFIG_SPI=y
 CONFIG_SPI_GPIO=y
 CONFIG_GPIO_SYSFS=y
-CONFIG_KEYBOARD_GPIO=m
+CONFIG_POWER_SUPPLY=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_JZ4740_WDT=y
@@ -97,17 +91,45 @@ CONFIG_REGULATOR=y
 CONFIG_REGULATOR_DEBUG=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_REGULATOR_ACT8865=y
+CONFIG_RC_CORE=m
+CONFIG_LIRC=y
+CONFIG_RC_DEVICES=y
+CONFIG_IR_GPIO_CIR=m
+CONFIG_IR_GPIO_TX=m
+CONFIG_MEDIA_SUPPORT=m
 # CONFIG_VGA_CONSOLE is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_MMC=y
 CONFIG_MMC_JZ4740=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_ONESHOT=y
+CONFIG_LEDS_TRIGGER_MTD=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_BACKLIGHT=m
+CONFIG_LEDS_TRIGGER_CPU=y
+CONFIG_LEDS_TRIGGER_ACTIVITY=y
+CONFIG_LEDS_TRIGGER_GPIO=y
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+CONFIG_LEDS_TRIGGER_TRANSIENT=y
+CONFIG_LEDS_TRIGGER_CAMERA=m
+CONFIG_LEDS_TRIGGER_PANIC=y
+CONFIG_LEDS_TRIGGER_NETDEV=y
+CONFIG_LEDS_TRIGGER_PATTERN=y
+CONFIG_LEDS_TRIGGER_AUDIO=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_JZ4740=y
 CONFIG_DMADEVICES=y
 CONFIG_DMA_JZ4780=y
+CONFIG_INGENIC_OST=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_MEMORY=y
+CONFIG_PWM=y
+CONFIG_PWM_JZ4740=m
 CONFIG_EXT4_FS=y
 # CONFIG_DNOTIFY is not set
 CONFIG_PROC_KCORE=y
@@ -156,11 +178,13 @@ CONFIG_NLS_ISO8859_15=y
 CONFIG_NLS_KOI8_R=y
 CONFIG_NLS_KOI8_U=y
 CONFIG_NLS_UTF8=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=32
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 CONFIG_STRIP_ASM_SYMS=y
-CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_PANIC_TIMEOUT=10
 # CONFIG_SCHED_DEBUG is not set
@@ -169,21 +193,3 @@ CONFIG_STACKTRACE=y
 # CONFIG_FTRACE is not set
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="earlycon console=ttyS4,115200 clk_ignore_unused"
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_GPIO=y
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_MTD=y
-CONFIG_LEDS_TRIGGER_TIMER=y
-CONFIG_LEDS_TRIGGER_ONESHOT=y
-CONFIG_LEDS_TRIGGER_ONESHOT=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=y
-CONFIG_LEDS_TRIGGER_BACKLIGHT=m
-CONFIG_LEDS_TRIGGER_CPU=y
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
-CONFIG_LEDS_TRIGGER_TRANSIENT=y
-CONFIG_LEDS_TRIGGER_CAMERA=m
-CONFIG_LIRC=y
-CONFIG_MEDIA_SUPPORT=m
-CONFIG_RC_DEVICES=y
-CONFIG_IR_GPIO_CIR=m
-CONFIG_IR_GPIO_TX=m
diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig
index e6f3e8e3da39..b8bd66300996 100644
--- a/arch/mips/configs/db1xxx_defconfig
+++ b/arch/mips/configs/db1xxx_defconfig
@@ -92,7 +92,6 @@ CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_ADV_OPTIONS=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_PHYSMAP=y
-CONFIG_MTD_M25P80=y
 CONFIG_MTD_SST25L=y
 CONFIG_MTD_RAW_NAND=y
 CONFIG_MTD_NAND_ECC_SW_BCH=y
diff --git a/arch/mips/configs/gcw0_defconfig b/arch/mips/configs/gcw0_defconfig
index a3e3eb3c5a8b..48131cb47e66 100644
--- a/arch/mips/configs/gcw0_defconfig
+++ b/arch/mips/configs/gcw0_defconfig
@@ -1,27 +1,152 @@
+CONFIG_DEFAULT_HOSTNAME="gcw0"
 CONFIG_NO_HZ_IDLE=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_EMBEDDED=y
+CONFIG_PROFILING=y
 CONFIG_MACH_INGENIC=y
 CONFIG_JZ4770_GCW0=y
 CONFIG_HIGHMEM=y
 # CONFIG_SECCOMP is not set
-# CONFIG_SUSPEND is not set
+CONFIG_MIPS_RAW_APPENDED_DTB=y
+CONFIG_MIPS_CMDLINE_DTB_EXTEND=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BOUNCE is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=m
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_LOOP_MIN_COUNT=0
 CONFIG_NETDEVICES=y
+# CONFIG_ETHERNET is not set
+# CONFIG_WLAN_VENDOR_ADMTEK is not set
+# CONFIG_WLAN_VENDOR_ATH is not set
+# CONFIG_WLAN_VENDOR_ATMEL is not set
+# CONFIG_WLAN_VENDOR_BROADCOM is not set
+# CONFIG_WLAN_VENDOR_CISCO is not set
+# CONFIG_WLAN_VENDOR_INTEL is not set
+# CONFIG_WLAN_VENDOR_INTERSIL is not set
+# CONFIG_WLAN_VENDOR_MARVELL is not set
+# CONFIG_WLAN_VENDOR_MEDIATEK is not set
+# CONFIG_WLAN_VENDOR_RALINK is not set
+CONFIG_RTL8192CU=m
+# CONFIG_RTLWIFI_DEBUG is not set
+# CONFIG_WLAN_VENDOR_RSI is not set
+# CONFIG_WLAN_VENDOR_ST is not set
+# CONFIG_WLAN_VENDOR_TI is not set
+# CONFIG_WLAN_VENDOR_ZYDAS is not set
+# CONFIG_WLAN_VENDOR_QUANTENNA is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GPIO=y
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_PWM_VIBRA=y
+# CONFIG_SERIO is not set
 CONFIG_SERIAL_8250=y
 # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_INGENIC=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C_GPIO=y
+CONFIG_SPI=y
+CONFIG_SPI_GPIO=y
+CONFIG_POWER_SUPPLY=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_JZ4740_WDT=y
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_REGULATOR_ACT8865=y
+CONFIG_DRM=y
+CONFIG_DRM_FBDEV_OVERALLOC=300
+CONFIG_DRM_PANEL_NOVATEK_NT39016=y
+CONFIG_DRM_INGENIC=y
+CONFIG_DRM_ETNAVIV=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+# CONFIG_BACKLIGHT_GENERIC is not set
+CONFIG_BACKLIGHT_PWM=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_PROC_FS is not set
+# CONFIG_SND_DRIVERS is not set
+# CONFIG_SND_SPI is not set
+# CONFIG_SND_MIPS is not set
+# CONFIG_SND_USB is not set
+CONFIG_SND_SOC=y
+CONFIG_SND_JZ4740_SOC_I2S=y
+CONFIG_SND_SOC_JZ4770_CODEC=y
+CONFIG_SND_SOC_SIMPLE_AMPLIFIER=y
+CONFIG_SND_SIMPLE_CARD=y
+CONFIG_USB_CONN_GPIO=y
 CONFIG_USB=y
+CONFIG_USB_OTG=y
+CONFIG_USB_OTG_BLACKLIST_HUB=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
-CONFIG_NOP_USB_XCEIV=y
-CONFIG_TMPFS=y
+CONFIG_USB_MUSB_HDRC=y
+CONFIG_USB_MUSB_GADGET=y
+CONFIG_USB_MUSB_JZ4740=y
+CONFIG_USB_INVENTRA_DMA=y
+CONFIG_JZ4770_PHY=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_GADGET_VBUS_DRAW=500
+CONFIG_USB_ETH=y
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+CONFIG_MMC_JZ4740=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+CONFIG_LEDS_TRIGGER_PANIC=y
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
+CONFIG_RTC_DRV_JZ4740=y
+CONFIG_DMADEVICES=y
+CONFIG_DMA_JZ4780=y
+# CONFIG_VIRTIO_MENU is not set
+CONFIG_STAGING=y
+CONFIG_R8188EU=m
+CONFIG_INGENIC_OST=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_IIO=y
+CONFIG_IIO_BUFFER=y
+CONFIG_IIO_BUFFER_CB=y
+CONFIG_IIO_KFIFO_BUF=y
+CONFIG_MXC6255=m
+CONFIG_INGENIC_ADC=y
+CONFIG_PWM=y
+CONFIG_PWM_JZ4740=y
+CONFIG_EXT4_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_FILE_DIRECT=y
+CONFIG_SQUASHFS_DECOMP_MULTI=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity"
+CONFIG_CRYPTO_ECHAINIV=y
+CONFIG_FONTS=y
+CONFIG_FONT_6x10=y
+CONFIG_DEBUG_FS=y
diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config
index 7626f2a75b03..510709565404 100644
--- a/arch/mips/configs/generic/board-ocelot.config
+++ b/arch/mips/configs/generic/board-ocelot.config
@@ -9,7 +9,6 @@ CONFIG_GENERIC_PHY=y
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_BLOCK=y
-CONFIG_MTD_M25P80=y
 CONFIG_MTD_RAW_NAND=y
 CONFIG_MTD_NAND_PLATFORM=y
 CONFIG_MTD_SPI_NOR=y
diff --git a/arch/mips/configs/lasat_defconfig b/arch/mips/configs/lasat_defconfig
deleted file mode 100644
index 00cf461db971..000000000000
--- a/arch/mips/configs/lasat_defconfig
+++ /dev/null
@@ -1,55 +0,0 @@
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_EXPERT=y
-# CONFIG_EPOLL is not set
-# CONFIG_SIGNALFD is not set
-# CONFIG_TIMERFD is not set
-# CONFIG_EVENTFD is not set
-# CONFIG_KALLSYMS is not set
-CONFIG_SLAB=y
-CONFIG_LASAT=y
-CONFIG_PICVUE=y
-CONFIG_PICVUE_PROC=y
-CONFIG_DS1603=y
-CONFIG_LASAT_SYSCTL=y
-CONFIG_HZ_1000=y
-# CONFIG_SECCOMP is not set
-CONFIG_PCI=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-CONFIG_MTD=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_ATA=y
-CONFIG_PATA_CMD64X=y
-CONFIG_ATA_GENERIC=y
-CONFIG_PATA_LEGACY=y
-CONFIG_NETDEVICES=y
-CONFIG_PCNET32=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIO_RAW=y
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_SERIAL_8250_PCI is not set
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_DNOTIFY is not set
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CONFIGFS_FS=y
-CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 51675f5000d6..3d4c7e9996c5 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -21,6 +21,7 @@ CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 CONFIG_MACH_LOONGSON64=y
 CONFIG_SMP=y
 CONFIG_HZ_256=y
@@ -216,6 +217,7 @@ CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_MANY_PORTS=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
 CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_HW_RANDOM=y
 CONFIG_RAW_DRIVER=m
 CONFIG_I2C_CHARDEV=y
@@ -229,7 +231,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y
 CONFIG_MEDIA_USB_SUPPORT=y
 CONFIG_USB_VIDEO_CLASS=m
 CONFIG_DRM=y
-CONFIG_DRM_RADEON=y
+CONFIG_DRM_RADEON=m
 CONFIG_FB_RADEON=y
 CONFIG_LCD_CLASS_DEVICE=y
 CONFIG_LCD_PLATFORM=m
diff --git a/arch/mips/configs/markeins_defconfig b/arch/mips/configs/markeins_defconfig
deleted file mode 100644
index 507ad91b21e7..000000000000
--- a/arch/mips/configs/markeins_defconfig
+++ /dev/null
@@ -1,185 +0,0 @@
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_PREEMPT=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-CONFIG_NEC_MARKEINS=y
-CONFIG_HZ_1000=y
-CONFIG_PCI=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_NET_KEY=y
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
-CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6_MIP6=m
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NETFILTER=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_FW_LOADER=m
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_SCSI=m
-# CONFIG_SCSI_PROC_FS is not set
-CONFIG_BLK_DEV_SD=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_SCSI_SCAN_ASYNC=y
-CONFIG_SCSI_AIC94XX=m
-# CONFIG_AIC94XX_DEBUG is not set
-CONFIG_NETDEVICES=y
-CONFIG_TUN=m
-CONFIG_CHELSIO_T3=m
-CONFIG_NATSEMI=y
-CONFIG_QLA3XXX=m
-CONFIG_NETXEN_NIC=m
-CONFIG_PPP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_INPUT_EVDEV=m
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_DEBUG_CORE=y
-CONFIG_I2C_DEBUG_BUS=y
-# CONFIG_HID is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=m
-CONFIG_XFS_FS=m
-# CONFIG_DNOTIFY is not set
-CONFIG_AUTOFS4_FS=m
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_NTFS_FS=m
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_JFFS2_FS=y
-CONFIG_JFFS2_COMPRESSION_OPTIONS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-CONFIG_NLS_DEFAULT=""
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_UTF8=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0,115200 mem=192m ip=bootp root=/dev/nfs rw"
diff --git a/arch/mips/configs/msp71xx_defconfig b/arch/mips/configs/msp71xx_defconfig
deleted file mode 100644
index 6ad1a2381226..000000000000
--- a/arch/mips/configs/msp71xx_defconfig
+++ /dev/null
@@ -1,77 +0,0 @@
-CONFIG_LOCALVERSION="-pmc"
-# CONFIG_SWAP is not set
-CONFIG_SYSVIPC=y
-CONFIG_PREEMPT=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_EXPERT=y
-# CONFIG_SHMEM is not set
-CONFIG_SLAB=y
-CONFIG_PMC_MSP=y
-CONFIG_PMC_MSP7120_GW=y
-CONFIG_CPU_MIPS32_R2=y
-CONFIG_NR_CPUS=2
-CONFIG_PCI=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_NET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_INET_AH=y
-CONFIG_INET_ESP=y
-CONFIG_INET_IPCOMP=y
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-CONFIG_IP_NF_IPTABLES=y
-CONFIG_IP_NF_FILTER=y
-CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_BRIDGE=y
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_FW_LOADER is not set
-CONFIG_MTD=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_RAM=y
-CONFIG_MTD_PMC_MSP_EVM=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=y
-CONFIG_PPP=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_SERIAL_8250_PCI is not set
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_PMCMSP=y
-# CONFIG_USB_HID is not set
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
-CONFIG_USB_STORAGE=y
-CONFIG_EXT2_FS=y
-# CONFIG_DNOTIFY is not set
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_JFFS2_FS=y
-CONFIG_SQUASHFS=y
-CONFIG_SQUASHFS_EMBEDDED=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
index 24e07180c57d..b9adf15ebbec 100644
--- a/arch/mips/configs/pistachio_defconfig
+++ b/arch/mips/configs/pistachio_defconfig
@@ -127,7 +127,6 @@ CONFIG_DEBUG_DEVRES=y
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
-CONFIG_MTD_M25P80=y
 CONFIG_MTD_SPI_NOR=y
 CONFIG_MTD_UBI=y
 CONFIG_MTD_UBI_BLOCK=y
diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig
index 8c2ead53007a..fec5851c164b 100644
--- a/arch/mips/configs/rt305x_defconfig
+++ b/arch/mips/configs/rt305x_defconfig
@@ -76,7 +76,6 @@ CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_M25P80=y
 CONFIG_MTD_SPI_NOR=y
 CONFIG_EEPROM_93CX6=m
 CONFIG_SCSI=y
diff --git a/arch/mips/dec/Platform b/arch/mips/dec/Platform
index cf55a6f4e720..c82391e832f9 100644
--- a/arch/mips/dec/Platform
+++ b/arch/mips/dec/Platform
@@ -1,7 +1,6 @@
 #
 # DECstation family
 #
-platform-$(CONFIG_MACH_DECSTATION)	+= dec/
 cflags-$(CONFIG_MACH_DECSTATION)	+= \
 			-I$(srctree)/arch/mips/include/asm/mach-dec
 libs-$(CONFIG_MACH_DECSTATION)		+= arch/mips/dec/prom/
diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
index a25ef822e725..ea5b5a83f1e1 100644
--- a/arch/mips/dec/int-handler.S
+++ b/arch/mips/dec/int-handler.S
@@ -304,8 +304,8 @@ spurious:
  */
 		FEXPORT(dec_intr_unimplemented)
 		move	a1,t0			# cheats way of printing an arg!
-		PANIC("Unimplemented cpu interrupt! CP0_CAUSE: 0x%08x");
+		ASM_PANIC("Unimplemented cpu interrupt! CP0_CAUSE: 0x%08x");
 
 		FEXPORT(asic_intr_unimplemented)
 		move	a1,t0			# cheats way of printing an arg!
-		PANIC("Unimplemented asic interrupt! ASIC ISR: 0x%08x");
+		ASM_PANIC("Unimplemented asic interrupt! ASIC ISR: 0x%08x");
diff --git a/arch/mips/dec/tc.c b/arch/mips/dec/tc.c
index 732027c79834..dba58397668e 100644
--- a/arch/mips/dec/tc.c
+++ b/arch/mips/dec/tc.c
@@ -52,7 +52,7 @@ int __init tc_bus_get_info(struct tc_bus *tbus)
 	case MACH_DS5900:
 		tbus->ext_slot_base = 0x20000000;
 		tbus->ext_slot_size = 0x20000000;
-		/* fall through */
+		fallthrough;
 	case MACH_DS5000_1XX:
 		tbus->num_tcslots = 3;
 		break;
diff --git a/arch/mips/emma/Makefile b/arch/mips/emma/Makefile
deleted file mode 100644
index bc03082064ca..000000000000
--- a/arch/mips/emma/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_SOC_EMMA2RH)	+= common/
-
-#
-# NEC EMMA2RH Mark-eins
-#
-obj-$(CONFIG_NEC_MARKEINS)	+= markeins/
diff --git a/arch/mips/emma/Platform b/arch/mips/emma/Platform
deleted file mode 100644
index 0282f7f99b88..000000000000
--- a/arch/mips/emma/Platform
+++ /dev/null
@@ -1,4 +0,0 @@
-platform-$(CONFIG_SOC_EMMA2RH)	+= emma/
-cflags-$(CONFIG_SOC_EMMA2RH)	+=					\
-		-I$(srctree)/arch/mips/include/asm/mach-emma2rh
-load-$(CONFIG_NEC_MARKEINS)	+= 0xffffffff88100000
diff --git a/arch/mips/emma/common/Makefile b/arch/mips/emma/common/Makefile
deleted file mode 100644
index a754abd1beb9..000000000000
--- a/arch/mips/emma/common/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-or-later
-#
-#  Copyright (C) NEC Electronics Corporation 2005-2006
-#
-
-obj-$(CONFIG_NEC_MARKEINS)	+= prom.o
diff --git a/arch/mips/emma/common/prom.c b/arch/mips/emma/common/prom.c
deleted file mode 100644
index 7c3a6f32beda..000000000000
--- a/arch/mips/emma/common/prom.c
+++ /dev/null
@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) NEC Electronics Corporation 2004-2006
- *
- *  This file is based on the arch/mips/ddb5xxx/common/prom.c
- *
- *	Copyright 2001 MontaVista Software Inc.
- */
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/memblock.h>
-
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
-#include <asm/emma/emma2rh.h>
-
-const char *get_system_type(void)
-{
-#ifdef CONFIG_NEC_MARKEINS
-	return "NEC EMMA2RH Mark-eins";
-#else
-#error  Unknown NEC board
-#endif
-}
-
-/* [jsun@junsun.net] PMON passes arguments in C main() style */
-void __init prom_init(void)
-{
-	int argc = fw_arg0;
-	char **arg = (char **)fw_arg1;
-	int i;
-
-	/* if user passes kernel args, ignore the default one */
-	if (argc > 1)
-		arcs_cmdline[0] = '\0';
-
-	/* arg[0] is "g", the rest is boot parameters */
-	for (i = 1; i < argc; i++) {
-		if (strlen(arcs_cmdline) + strlen(arg[i]) + 1
-		    >= sizeof(arcs_cmdline))
-			break;
-		strcat(arcs_cmdline, arg[i]);
-		strcat(arcs_cmdline, " ");
-	}
-
-#ifdef CONFIG_NEC_MARKEINS
-	add_memory_region(0, EMMA2RH_RAM_SIZE, BOOT_MEM_RAM);
-#else
-#error  Unknown NEC board
-#endif
-}
-
-void __init prom_free_prom_memory(void)
-{
-}
diff --git a/arch/mips/emma/markeins/Makefile b/arch/mips/emma/markeins/Makefile
deleted file mode 100644
index 8c8649069504..000000000000
--- a/arch/mips/emma/markeins/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-or-later
-#
-#  Copyright (C) NEC Electronics Corporation 2005-2006
-#
-
-obj-$(CONFIG_NEC_MARKEINS) += irq.o setup.o led.o platform.o
diff --git a/arch/mips/emma/markeins/irq.c b/arch/mips/emma/markeins/irq.c
deleted file mode 100644
index 4aebf559be2e..000000000000
--- a/arch/mips/emma/markeins/irq.c
+++ /dev/null
@@ -1,293 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) NEC Electronics Corporation 2004-2006
- *
- *  This file is based on the arch/mips/ddb5xxx/ddb5477/irq.c
- *
- *	Copyright 2001 MontaVista Software Inc.
- */
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/delay.h>
-
-#include <asm/irq_cpu.h>
-#include <asm/mipsregs.h>
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
-
-#include <asm/emma/emma2rh.h>
-
-static void emma2rh_irq_enable(struct irq_data *d)
-{
-	unsigned int irq = d->irq - EMMA2RH_IRQ_BASE;
-	u32 reg_value, reg_bitmask, reg_index;
-
-	reg_index = EMMA2RH_BHIF_INT_EN_0 +
-		    (EMMA2RH_BHIF_INT_EN_1 - EMMA2RH_BHIF_INT_EN_0) * (irq / 32);
-	reg_value = emma2rh_in32(reg_index);
-	reg_bitmask = 0x1 << (irq % 32);
-	emma2rh_out32(reg_index, reg_value | reg_bitmask);
-}
-
-static void emma2rh_irq_disable(struct irq_data *d)
-{
-	unsigned int irq = d->irq - EMMA2RH_IRQ_BASE;
-	u32 reg_value, reg_bitmask, reg_index;
-
-	reg_index = EMMA2RH_BHIF_INT_EN_0 +
-		    (EMMA2RH_BHIF_INT_EN_1 - EMMA2RH_BHIF_INT_EN_0) * (irq / 32);
-	reg_value = emma2rh_in32(reg_index);
-	reg_bitmask = 0x1 << (irq % 32);
-	emma2rh_out32(reg_index, reg_value & ~reg_bitmask);
-}
-
-struct irq_chip emma2rh_irq_controller = {
-	.name = "emma2rh_irq",
-	.irq_mask = emma2rh_irq_disable,
-	.irq_unmask = emma2rh_irq_enable,
-};
-
-void emma2rh_irq_init(void)
-{
-	u32 i;
-
-	for (i = 0; i < NUM_EMMA2RH_IRQ; i++)
-		irq_set_chip_and_handler_name(EMMA2RH_IRQ_BASE + i,
-					      &emma2rh_irq_controller,
-					      handle_level_irq, "level");
-}
-
-static void emma2rh_sw_irq_enable(struct irq_data *d)
-{
-	unsigned int irq = d->irq - EMMA2RH_SW_IRQ_BASE;
-	u32 reg;
-
-	reg = emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN);
-	reg |= 1 << irq;
-	emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, reg);
-}
-
-static void emma2rh_sw_irq_disable(struct irq_data *d)
-{
-	unsigned int irq = d->irq - EMMA2RH_SW_IRQ_BASE;
-	u32 reg;
-
-	reg = emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN);
-	reg &= ~(1 << irq);
-	emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, reg);
-}
-
-struct irq_chip emma2rh_sw_irq_controller = {
-	.name = "emma2rh_sw_irq",
-	.irq_mask = emma2rh_sw_irq_disable,
-	.irq_unmask = emma2rh_sw_irq_enable,
-};
-
-void emma2rh_sw_irq_init(void)
-{
-	u32 i;
-
-	for (i = 0; i < NUM_EMMA2RH_IRQ_SW; i++)
-		irq_set_chip_and_handler_name(EMMA2RH_SW_IRQ_BASE + i,
-					      &emma2rh_sw_irq_controller,
-					      handle_level_irq, "level");
-}
-
-static void emma2rh_gpio_irq_enable(struct irq_data *d)
-{
-	unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE;
-	u32 reg;
-
-	reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK);
-	reg |= 1 << irq;
-	emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg);
-}
-
-static void emma2rh_gpio_irq_disable(struct irq_data *d)
-{
-	unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE;
-	u32 reg;
-
-	reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK);
-	reg &= ~(1 << irq);
-	emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg);
-}
-
-static void emma2rh_gpio_irq_ack(struct irq_data *d)
-{
-	unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE;
-
-	emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~(1 << irq));
-}
-
-static void emma2rh_gpio_irq_mask_ack(struct irq_data *d)
-{
-	unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE;
-	u32 reg;
-
-	emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~(1 << irq));
-
-	reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK);
-	reg &= ~(1 << irq);
-	emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg);
-}
-
-struct irq_chip emma2rh_gpio_irq_controller = {
-	.name = "emma2rh_gpio_irq",
-	.irq_ack = emma2rh_gpio_irq_ack,
-	.irq_mask = emma2rh_gpio_irq_disable,
-	.irq_mask_ack = emma2rh_gpio_irq_mask_ack,
-	.irq_unmask = emma2rh_gpio_irq_enable,
-};
-
-void emma2rh_gpio_irq_init(void)
-{
-	u32 i;
-
-	for (i = 0; i < NUM_EMMA2RH_IRQ_GPIO; i++)
-		irq_set_chip_and_handler_name(EMMA2RH_GPIO_IRQ_BASE + i,
-					      &emma2rh_gpio_irq_controller,
-					      handle_edge_irq, "edge");
-}
-
-/*
- * the first level int-handler will jump here if it is a emma2rh irq
- */
-void emma2rh_irq_dispatch(void)
-{
-	u32 intStatus;
-	u32 bitmask;
-	u32 i;
-
-	intStatus = emma2rh_in32(EMMA2RH_BHIF_INT_ST_0) &
-		    emma2rh_in32(EMMA2RH_BHIF_INT_EN_0);
-
-#ifdef EMMA2RH_SW_CASCADE
-	if (intStatus & (1UL << EMMA2RH_SW_CASCADE)) {
-		u32 swIntStatus;
-		swIntStatus = emma2rh_in32(EMMA2RH_BHIF_SW_INT)
-		    & emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN);
-		for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) {
-			if (swIntStatus & bitmask) {
-				do_IRQ(EMMA2RH_SW_IRQ_BASE + i);
-				return;
-			}
-		}
-	}
-	/* Skip S/W interrupt */
-	intStatus &= ~(1UL << EMMA2RH_SW_CASCADE);
-#endif
-
-	for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) {
-		if (intStatus & bitmask) {
-			do_IRQ(EMMA2RH_IRQ_BASE + i);
-			return;
-		}
-	}
-
-	intStatus = emma2rh_in32(EMMA2RH_BHIF_INT_ST_1) &
-		    emma2rh_in32(EMMA2RH_BHIF_INT_EN_1);
-
-#ifdef EMMA2RH_GPIO_CASCADE
-	if (intStatus & (1UL << (EMMA2RH_GPIO_CASCADE % 32))) {
-		u32 gpioIntStatus;
-		gpioIntStatus = emma2rh_in32(EMMA2RH_GPIO_INT_ST)
-		    & emma2rh_in32(EMMA2RH_GPIO_INT_MASK);
-		for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) {
-			if (gpioIntStatus & bitmask) {
-				do_IRQ(EMMA2RH_GPIO_IRQ_BASE + i);
-				return;
-			}
-		}
-	}
-	/* Skip GPIO interrupt */
-	intStatus &= ~(1UL << (EMMA2RH_GPIO_CASCADE % 32));
-#endif
-
-	for (i = 32, bitmask = 1; i < 64; i++, bitmask <<= 1) {
-		if (intStatus & bitmask) {
-			do_IRQ(EMMA2RH_IRQ_BASE + i);
-			return;
-		}
-	}
-
-	intStatus = emma2rh_in32(EMMA2RH_BHIF_INT_ST_2) &
-		    emma2rh_in32(EMMA2RH_BHIF_INT_EN_2);
-
-	for (i = 64, bitmask = 1; i < 96; i++, bitmask <<= 1) {
-		if (intStatus & bitmask) {
-			do_IRQ(EMMA2RH_IRQ_BASE + i);
-			return;
-		}
-	}
-}
-
-void __init arch_init_irq(void)
-{
-	u32 reg;
-	int irq;
-
-	/* by default, interrupts are disabled. */
-	emma2rh_out32(EMMA2RH_BHIF_INT_EN_0, 0);
-	emma2rh_out32(EMMA2RH_BHIF_INT_EN_1, 0);
-	emma2rh_out32(EMMA2RH_BHIF_INT_EN_2, 0);
-	emma2rh_out32(EMMA2RH_BHIF_INT1_EN_0, 0);
-	emma2rh_out32(EMMA2RH_BHIF_INT1_EN_1, 0);
-	emma2rh_out32(EMMA2RH_BHIF_INT1_EN_2, 0);
-	emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, 0);
-
-	clear_c0_status(0xff00);
-	set_c0_status(0x0400);
-
-#define GPIO_PCI (0xf<<15)
-	/* setup GPIO interrupt for PCI interface */
-	/* direction input */
-	reg = emma2rh_in32(EMMA2RH_GPIO_DIR);
-	emma2rh_out32(EMMA2RH_GPIO_DIR, reg & ~GPIO_PCI);
-	/* disable interrupt */
-	reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK);
-	emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg & ~GPIO_PCI);
-	/* level triggerd */
-	reg = emma2rh_in32(EMMA2RH_GPIO_INT_MODE);
-	emma2rh_out32(EMMA2RH_GPIO_INT_MODE, reg | GPIO_PCI);
-	reg = emma2rh_in32(EMMA2RH_GPIO_INT_CND_A);
-	emma2rh_out32(EMMA2RH_GPIO_INT_CND_A, reg & (~GPIO_PCI));
-	/* interrupt clear */
-	emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~GPIO_PCI);
-
-	/* init all controllers */
-	emma2rh_irq_init();
-	emma2rh_sw_irq_init();
-	emma2rh_gpio_irq_init();
-	mips_cpu_irq_init();
-
-	/* setup cascade interrupts */
-	irq = EMMA2RH_IRQ_BASE + EMMA2RH_SW_CASCADE;
-	if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL))
-		pr_err("Failed to request irq %d (cascade)\n", irq);
-	irq = EMMA2RH_IRQ_BASE + EMMA2RH_GPIO_CASCADE;
-	if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL))
-		pr_err("Failed to request irq %d (cascade)\n", irq);
-	irq = MIPS_CPU_IRQ_BASE + 2;
-	if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL))
-		pr_err("Failed to request irq %d (cascade)\n", irq);
-}
-
-asmlinkage void plat_irq_dispatch(void)
-{
-	unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
-
-	if (pending & STATUSF_IP7)
-		do_IRQ(MIPS_CPU_IRQ_BASE + 7);
-	else if (pending & STATUSF_IP2)
-		emma2rh_irq_dispatch();
-	else if (pending & STATUSF_IP1)
-		do_IRQ(MIPS_CPU_IRQ_BASE + 1);
-	else if (pending & STATUSF_IP0)
-		do_IRQ(MIPS_CPU_IRQ_BASE + 0);
-	else
-		spurious_interrupt();
-}
diff --git a/arch/mips/emma/markeins/led.c b/arch/mips/emma/markeins/led.c
deleted file mode 100644
index d377542c0ec4..000000000000
--- a/arch/mips/emma/markeins/led.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) NEC Electronics Corporation 2004-2006
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <asm/emma/emma2rh.h>
-
-const unsigned long clear = 0x20202020;
-
-#define LED_BASE 0xb1400038
-
-void markeins_led_clear(void)
-{
-	emma2rh_out32(LED_BASE, clear);
-	emma2rh_out32(LED_BASE + 4, clear);
-}
-
-void markeins_led(const char *str)
-{
-	int i;
-	int len = strlen(str);
-
-	markeins_led_clear();
-	if (len > 8)
-		len = 8;
-
-	if (emma2rh_in32(0xb0000800) & (0x1 << 18))
-		for (i = 0; i < len; i++)
-			emma2rh_out8(LED_BASE + i, str[i]);
-	else
-		for (i = 0; i < len; i++)
-			emma2rh_out8(LED_BASE + (i & 4) + (3 - (i & 3)),
-				     str[i]);
-}
-
-void markeins_led_hex(u32 val)
-{
-	char str[10];
-
-	sprintf(str, "%08x", val);
-	markeins_led(str);
-}
diff --git a/arch/mips/emma/markeins/platform.c b/arch/mips/emma/markeins/platform.c
deleted file mode 100644
index 97eeb9e8fb2b..000000000000
--- a/arch/mips/emma/markeins/platform.c
+++ /dev/null
@@ -1,199 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright(C) MontaVista Software Inc, 2006
- *
- *  Author: dmitry pervushin <dpervushin@ru.mvista.com>
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-#include <linux/serial_8250.h>
-#include <linux/mtd/physmap.h>
-
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/addrspace.h>
-#include <asm/time.h>
-#include <asm/bcache.h>
-#include <asm/irq.h>
-#include <asm/reboot.h>
-#include <asm/traps.h>
-
-#include <asm/emma/emma2rh.h>
-
-
-#define I2C_EMMA2RH "emma2rh-iic" /* must be in sync with IIC driver */
-
-static struct resource i2c_emma_resources_0[] = {
-	{
-		.name	= NULL,
-		.start	= EMMA2RH_IRQ_PIIC0,
-		.end	= EMMA2RH_IRQ_PIIC0,
-		.flags	= IORESOURCE_IRQ
-	}, {
-		.name	= NULL,
-		.start	= EMMA2RH_PIIC0_BASE,
-		.end	= EMMA2RH_PIIC0_BASE + 0x1000,
-		.flags	= 0
-	},
-};
-
-struct resource i2c_emma_resources_1[] = {
-	{
-		.name	= NULL,
-		.start	= EMMA2RH_IRQ_PIIC1,
-		.end	= EMMA2RH_IRQ_PIIC1,
-		.flags	= IORESOURCE_IRQ
-	}, {
-		.name	= NULL,
-		.start	= EMMA2RH_PIIC1_BASE,
-		.end	= EMMA2RH_PIIC1_BASE + 0x1000,
-		.flags	= 0
-	},
-};
-
-struct resource i2c_emma_resources_2[] = {
-	{
-		.name	= NULL,
-		.start	= EMMA2RH_IRQ_PIIC2,
-		.end	= EMMA2RH_IRQ_PIIC2,
-		.flags	= IORESOURCE_IRQ
-	}, {
-		.name	= NULL,
-		.start	= EMMA2RH_PIIC2_BASE,
-		.end	= EMMA2RH_PIIC2_BASE + 0x1000,
-		.flags	= 0
-	},
-};
-
-struct platform_device i2c_emma_devices[] = {
-	[0] = {
-		.name = I2C_EMMA2RH,
-		.id = 0,
-		.resource = i2c_emma_resources_0,
-		.num_resources = ARRAY_SIZE(i2c_emma_resources_0),
-	},
-	[1] = {
-		.name = I2C_EMMA2RH,
-		.id = 1,
-		.resource = i2c_emma_resources_1,
-		.num_resources = ARRAY_SIZE(i2c_emma_resources_1),
-	},
-	[2] = {
-		.name = I2C_EMMA2RH,
-		.id = 2,
-		.resource = i2c_emma_resources_2,
-		.num_resources = ARRAY_SIZE(i2c_emma_resources_2),
-	},
-};
-
-#define EMMA2RH_SERIAL_CLOCK 18544000
-#define EMMA2RH_SERIAL_FLAGS UPF_BOOT_AUTOCONF | UPF_SKIP_TEST
-
-static struct  plat_serial8250_port platform_serial_ports[] = {
-	[0] = {
-		.membase= (void __iomem*)KSEG1ADDR(EMMA2RH_PFUR0_BASE + 3),
-		.mapbase = EMMA2RH_PFUR0_BASE + 3,
-		.irq = EMMA2RH_IRQ_PFUR0,
-		.uartclk = EMMA2RH_SERIAL_CLOCK,
-		.regshift = 4,
-		.iotype = UPIO_MEM,
-		.flags = EMMA2RH_SERIAL_FLAGS,
-       }, [1] = {
-		.membase = (void __iomem*)KSEG1ADDR(EMMA2RH_PFUR1_BASE + 3),
-		.mapbase = EMMA2RH_PFUR1_BASE + 3,
-		.irq = EMMA2RH_IRQ_PFUR1,
-		.uartclk = EMMA2RH_SERIAL_CLOCK,
-		.regshift = 4,
-		.iotype = UPIO_MEM,
-		.flags = EMMA2RH_SERIAL_FLAGS,
-       }, [2] = {
-		.membase = (void __iomem*)KSEG1ADDR(EMMA2RH_PFUR2_BASE + 3),
-		.mapbase = EMMA2RH_PFUR2_BASE + 3,
-		.irq = EMMA2RH_IRQ_PFUR2,
-		.uartclk = EMMA2RH_SERIAL_CLOCK,
-		.regshift = 4,
-		.iotype = UPIO_MEM,
-		.flags = EMMA2RH_SERIAL_FLAGS,
-       }, [3] = {
-		.flags = 0,
-       },
-};
-
-static struct  platform_device serial_emma = {
-	.name = "serial8250",
-	.dev = {
-		.platform_data = &platform_serial_ports,
-	},
-};
-
-static struct mtd_partition markeins_parts[] = {
-	[0] = {
-		.name = "RootFS",
-		.offset = 0x00000000,
-		.size = 0x00c00000,
-	},
-	[1] = {
-		.name = "boot code area",
-		.offset = MTDPART_OFS_APPEND,
-		.size = 0x00100000,
-	},
-	[2] = {
-		.name = "kernel image",
-		.offset = MTDPART_OFS_APPEND,
-		.size = 0x00300000,
-	},
-	[3] = {
-		.name = "RootFS2",
-		.offset = MTDPART_OFS_APPEND,
-		.size = 0x00c00000,
-	},
-	[4] = {
-		.name = "boot code area2",
-		.offset = MTDPART_OFS_APPEND,
-		.size = 0x00100000,
-	},
-	[5] = {
-		.name = "kernel image2",
-		.offset = MTDPART_OFS_APPEND,
-		.size = MTDPART_SIZ_FULL,
-	},
-};
-
-static struct physmap_flash_data markeins_flash_data = {
-	.width		= 2,
-	.nr_parts	= ARRAY_SIZE(markeins_parts),
-	.parts		= markeins_parts
-};
-
-static struct resource markeins_flash_resource = {
-	.start		= 0x1e000000,
-	.end		= 0x02000000,
-	.flags		= IORESOURCE_MEM
-};
-
-static struct platform_device markeins_flash_device = {
-	.name		= "physmap-flash",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &markeins_flash_data,
-	},
-	.num_resources	= 1,
-	.resource	= &markeins_flash_resource,
-};
-
-static struct platform_device *devices[] = {
-	i2c_emma_devices,
-	i2c_emma_devices + 1,
-	i2c_emma_devices + 2,
-	&serial_emma,
-	&markeins_flash_device,
-};
-
-static int __init platform_devices_setup(void)
-{
-	return platform_add_devices(devices, ARRAY_SIZE(devices));
-}
-
-arch_initcall(platform_devices_setup);
diff --git a/arch/mips/emma/markeins/setup.c b/arch/mips/emma/markeins/setup.c
deleted file mode 100644
index c8a91c2a63bc..000000000000
--- a/arch/mips/emma/markeins/setup.c
+++ /dev/null
@@ -1,115 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) NEC Electronics Corporation 2004-2006
- *
- *  This file is based on the arch/mips/ddb5xxx/ddb5477/setup.c.
- *
- *	Copyright 2001 MontaVista Software Inc.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-
-#include <asm/time.h>
-#include <asm/reboot.h>
-
-#include <asm/emma/emma2rh.h>
-
-#define USE_CPU_COUNTER_TIMER	/* whether we use cpu counter */
-
-extern void markeins_led(const char *);
-
-static int bus_frequency;
-
-static void markeins_machine_restart(char *command)
-{
-	static void (*back_to_prom) (void) = (void (*)(void))0xbfc00000;
-
-	printk("cannot EMMA2RH Mark-eins restart.\n");
-	markeins_led("restart.");
-	back_to_prom();
-}
-
-static void markeins_machine_halt(void)
-{
-	printk("EMMA2RH Mark-eins halted.\n");
-	markeins_led("halted.");
-	while (1) ;
-}
-
-static void markeins_machine_power_off(void)
-{
-	markeins_led("poweroff.");
-	while (1) ;
-}
-
-static unsigned long __initdata emma2rh_clock[4] = {
-	166500000, 187312500, 199800000, 210600000
-};
-
-static unsigned int __init detect_bus_frequency(unsigned long rtc_base)
-{
-	u32 reg;
-
-	/* detect from boot strap */
-	reg = emma2rh_in32(EMMA2RH_BHIF_STRAP_0);
-	reg = (reg >> 4) & 0x3;
-
-	return emma2rh_clock[reg];
-}
-
-void __init plat_time_init(void)
-{
-	u32 reg;
-	if (bus_frequency == 0)
-		bus_frequency = detect_bus_frequency(0);
-
-	reg = emma2rh_in32(EMMA2RH_BHIF_STRAP_0);
-	if ((reg & 0x3) == 0)
-		reg = (reg >> 6) & 0x3;
-	else {
-		reg = emma2rh_in32(EMMA2RH_BHIF_MAIN_CTRL);
-		reg = (reg >> 4) & 0x3;
-	}
-	mips_hpt_frequency = (bus_frequency * (4 + reg)) / 4 / 2;
-}
-
-static void markeins_board_init(void);
-extern void markeins_irq_setup(void);
-
-static inline void __init markeins_sio_setup(void)
-{
-}
-
-void __init plat_mem_setup(void)
-{
-	/* initialize board - we don't trust the loader */
-	markeins_board_init();
-
-	set_io_port_base(KSEG1ADDR(EMMA2RH_PCI_IO_BASE));
-
-	_machine_restart = markeins_machine_restart;
-	_machine_halt = markeins_machine_halt;
-	pm_power_off = markeins_machine_power_off;
-
-	/* setup resource limits */
-	ioport_resource.start = EMMA2RH_PCI_IO_BASE;
-	ioport_resource.end = EMMA2RH_PCI_IO_BASE + EMMA2RH_PCI_IO_SIZE - 1;
-	iomem_resource.start = EMMA2RH_IO_BASE;
-	iomem_resource.end = EMMA2RH_ROM_BASE - 1;
-
-	markeins_sio_setup();
-}
-
-static void __init markeins_board_init(void)
-{
-	u32 val;
-
-	val = emma2rh_in32(EMMA2RH_PBRD_INT_EN);	/* open serial interrupts. */
-	emma2rh_out32(EMMA2RH_PBRD_INT_EN, val | 0xaa);
-	val = emma2rh_in32(EMMA2RH_PBRD_CLKSEL);	/* set serial clocks. */
-	emma2rh_out32(EMMA2RH_PBRD_CLKSEL, val | 0x5);	/* 18MHz */
-	emma2rh_out32(EMMA2RH_PCI_CONTROL, 0);
-
-	markeins_led("MVL E2RH");
-}
diff --git a/arch/mips/fw/cfe/cfe_api.c b/arch/mips/fw/cfe/cfe_api.c
index c020b29f561c..0c9c97ab291e 100644
--- a/arch/mips/fw/cfe/cfe_api.c
+++ b/arch/mips/fw/cfe/cfe_api.c
@@ -243,11 +243,6 @@ int cfe_getfwinfo(cfe_fwinfo_t * info)
 	info->fwi_bootarea_pa = xiocb.plist.xiocb_fwinfo.fwi_bootarea_pa;
 	info->fwi_bootarea_size =
 	    xiocb.plist.xiocb_fwinfo.fwi_bootarea_size;
-#if 0
-	info->fwi_reserved1 = xiocb.plist.xiocb_fwinfo.fwi_reserved1;
-	info->fwi_reserved2 = xiocb.plist.xiocb_fwinfo.fwi_reserved2;
-	info->fwi_reserved3 = xiocb.plist.xiocb_fwinfo.fwi_reserved3;
-#endif
 
 	return 0;
 }
diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform
index eaa19d189324..53c33cb72974 100644
--- a/arch/mips/generic/Platform
+++ b/arch/mips/generic/Platform
@@ -8,7 +8,6 @@
 # option) any later version.
 #
 
-platform-$(CONFIG_MIPS_GENERIC)	+= generic/
 cflags-$(CONFIG_MIPS_GENERIC)	+= -I$(srctree)/arch/mips/include/asm/mach-generic
 load-$(CONFIG_MIPS_GENERIC)	+= 0xffffffff80100000
 all-$(CONFIG_MIPS_GENERIC)	:= vmlinux.gz.itb
diff --git a/arch/mips/include/asm/asm-eva.h b/arch/mips/include/asm/asm-eva.h
index d80be38c4144..e327ebc76753 100644
--- a/arch/mips/include/asm/asm-eva.h
+++ b/arch/mips/include/asm/asm-eva.h
@@ -180,7 +180,7 @@
 #define user_ld(reg, addr)		kernel_lw(reg, addr)
 #else
 #define user_sd(reg, addr)		kernel_sd(reg, addr)
-#define user_ld(reg, addr)		kernel_sd(reg, addr)
+#define user_ld(reg, addr)		kernel_ld(reg, addr)
 #endif /* CONFIG_32BIT */
 
 #endif /* CONFIG_EVA */
diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h
index c23527ba65d0..3682d1a0bb80 100644
--- a/arch/mips/include/asm/asm.h
+++ b/arch/mips/include/asm/asm.h
@@ -74,10 +74,15 @@ symbol:		.insn
 		.globl	symbol;				\
 symbol		=	value
 
-#define PANIC(msg)					\
+#define TEXT(msg)					\
+		.pushsection .data;			\
+8:		.asciiz msg;				\
+		.popsection;
+
+#define ASM_PANIC(msg)					\
 		.set	push;				\
 		.set	reorder;			\
-		PTR_LA	a0, 8f;				 \
+		PTR_LA	a0, 8f;				\
 		jal	panic;				\
 9:		b	9b;				\
 		.set	pop;				\
@@ -87,22 +92,17 @@ symbol		=	value
  * Print formatted string
  */
 #ifdef CONFIG_PRINTK
-#define PRINT(string)					\
+#define ASM_PRINT(string)				\
 		.set	push;				\
 		.set	reorder;			\
-		PTR_LA	a0, 8f;				 \
+		PTR_LA	a0, 8f;				\
 		jal	printk;				\
 		.set	pop;				\
 		TEXT(string)
 #else
-#define PRINT(string)
+#define ASM_PRINT(string)
 #endif
 
-#define TEXT(msg)					\
-		.pushsection .data;			\
-8:		.asciiz msg;				\
-		.popsection;
-
 /*
  * Stack alignment
  */
@@ -202,7 +202,9 @@ symbol		=	value
 #define LONG_SRA	sra
 #define LONG_SRAV	srav
 
+#ifdef __ASSEMBLY__
 #define LONG		.word
+#endif
 #define LONGSIZE	4
 #define LONGMASK	3
 #define LONGLOG		2
@@ -225,7 +227,9 @@ symbol		=	value
 #define LONG_SRA	dsra
 #define LONG_SRAV	dsrav
 
+#ifdef __ASSEMBLY__
 #define LONG		.dword
+#endif
 #define LONGSIZE	8
 #define LONGMASK	7
 #define LONGLOG		3
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 655f40ddb6d1..86f2323ebe6b 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -44,7 +44,8 @@
 	.endm
 #endif
 
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
+    defined(CONFIG_CPU_MIPSR6)
 	.macro	local_irq_enable reg=t0
 	ei
 	irq_enable_hazard
@@ -54,7 +55,7 @@
 	di
 	irq_disable_hazard
 	.endm
-#else
+#else /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */
 	.macro	local_irq_enable reg=t0
 	mfc0	\reg, CP0_STATUS
 	ori	\reg, \reg, 1
@@ -79,7 +80,7 @@
 	sw      \reg, TI_PRE_COUNT($28)
 #endif
 	.endm
-#endif /* CONFIG_CPU_MIPSR2 */
+#endif  /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */
 
 	.macro	fpu_save_16even thread tmp=t0
 	.set	push
@@ -131,7 +132,7 @@
 
 	.macro	fpu_save_double thread status tmp
 #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \
-		defined(CONFIG_CPU_MIPSR6)
+    defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6)
 	sll	\tmp, \status, 5
 	bgez	\tmp, 10f
 	fpu_save_16odd \thread
@@ -190,7 +191,7 @@
 
 	.macro	fpu_restore_double thread status tmp
 #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \
-		defined(CONFIG_CPU_MIPSR6)
+    defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6)
 	sll	\tmp, \status, 5
 	bgez	\tmp, 10f				# 16 register mode?
 
@@ -200,16 +201,17 @@
 	fpu_restore_16even \thread \tmp
 	.endm
 
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
+    defined(CONFIG_CPU_MIPSR6)
 	.macro	_EXT	rd, rs, p, s
 	ext	\rd, \rs, \p, \s
 	.endm
-#else /* !CONFIG_CPU_MIPSR2 || !CONFIG_CPU_MIPSR6 */
+#else /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */
 	.macro	_EXT	rd, rs, p, s
 	srl	\rd, \rs, \p
 	andi	\rd, \rd, (1 << \s) - 1
 	.endm
-#endif /* !CONFIG_CPU_MIPSR2 || !CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */
 
 /*
  * Temporary until all gas have MT ASE support
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index 61727785a247..c3bd9b2d66e4 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -42,17 +42,6 @@
 #define  MACH_DS5900		10	/* DECsystem 5900		*/
 
 /*
- * Valid machtype for group PMC-MSP
- */
-#define MACH_MSP4200_EVAL	0	/* PMC-Sierra MSP4200 Evaluation */
-#define MACH_MSP4200_GW		1	/* PMC-Sierra MSP4200 Gateway demo */
-#define MACH_MSP4200_FPGA	2	/* PMC-Sierra MSP4200 Emulation */
-#define MACH_MSP7120_EVAL	3	/* PMC-Sierra MSP7120 Evaluation */
-#define MACH_MSP7120_GW		4	/* PMC-Sierra MSP7120 Residential GW */
-#define MACH_MSP7120_FPGA	5	/* PMC-Sierra MSP7120 Emulation */
-#define MACH_MSP_OTHER	      255	/* PMC-Sierra unknown board type */
-
-/*
  * Valid machtype for group Mikrotik
  */
 #define MACH_MIKROTIK_RB532	0	/* Mikrotik RouterBoard 532	*/
@@ -121,7 +110,7 @@ extern unsigned long fw_passed_dtb;
 #endif
 
 /*
- * Platform memory detection hook called by setup_arch
+ * Platform memory detection hook called by arch_mem_init()
  */
 extern void plat_mem_setup(void);
 
diff --git a/arch/mips/include/asm/branch.h b/arch/mips/include/asm/branch.h
index da80878f2c0d..fa3dcbf56fa9 100644
--- a/arch/mips/include/asm/branch.h
+++ b/arch/mips/include/asm/branch.h
@@ -27,6 +27,9 @@ extern int __MIPS16e_compute_return_epc(struct pt_regs *regs);
 #define MM_POOL32A_MINOR_SHIFT	0x6
 #define MM_MIPS32_COND_FC	0x30
 
+int isBranchInstr(struct pt_regs *regs,
+	struct mm_decoded_insn dec_insn, unsigned long *contpc);
+
 extern int __mm_isBranchInstr(struct pt_regs *regs,
 	struct mm_decoded_insn dec_insn, unsigned long *contpc);
 
diff --git a/arch/mips/include/asm/cacheops.h b/arch/mips/include/asm/cacheops.h
index 8031fbc6b69a..50253efecb56 100644
--- a/arch/mips/include/asm/cacheops.h
+++ b/arch/mips/include/asm/cacheops.h
@@ -48,7 +48,7 @@
  * R4000-specific cacheops
  */
 #define Create_Dirty_Excl_D		(Cache_D | 0x0c)
-#define Fill				(Cache_I | 0x14)
+#define Fill_I				(Cache_I | 0x14)
 #define Hit_Writeback_I			(Cache_I | Hit_Writeback)
 #define Hit_Writeback_D			(Cache_D | Hit_Writeback)
 
diff --git a/arch/mips/include/asm/clock.h b/arch/mips/include/asm/clock.h
deleted file mode 100644
index 5a8f96ebe5fa..000000000000
--- a/arch/mips/include/asm/clock.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MIPS_CLOCK_H
-#define __ASM_MIPS_CLOCK_H
-
-#include <linux/kref.h>
-#include <linux/list.h>
-#include <linux/seq_file.h>
-#include <linux/clk.h>
-
-struct clk;
-
-struct clk_ops {
-	void (*init) (struct clk *clk);
-	void (*enable) (struct clk *clk);
-	void (*disable) (struct clk *clk);
-	void (*recalc) (struct clk *clk);
-	int (*set_rate) (struct clk *clk, unsigned long rate, int algo_id);
-	long (*round_rate) (struct clk *clk, unsigned long rate);
-};
-
-struct clk {
-	struct list_head node;
-	const char *name;
-	int id;
-	struct module *owner;
-
-	struct clk *parent;
-	struct clk_ops *ops;
-
-	struct kref kref;
-
-	unsigned long rate;
-	unsigned long flags;
-};
-
-#define CLK_ALWAYS_ENABLED	(1 << 0)
-#define CLK_RATE_PROPAGATES	(1 << 1)
-
-int clk_init(void);
-
-int __clk_enable(struct clk *);
-void __clk_disable(struct clk *);
-
-void clk_recalc_rate(struct clk *);
-
-int clk_register(struct clk *);
-void clk_unregister(struct clk *);
-
-#endif				/* __ASM_MIPS_CLOCK_H */
diff --git a/arch/mips/include/asm/compiler.h b/arch/mips/include/asm/compiler.h
index f77e99f1722e..a2cb2d2b1c07 100644
--- a/arch/mips/include/asm/compiler.h
+++ b/arch/mips/include/asm/compiler.h
@@ -57,6 +57,11 @@
 #define MIPS_ISA_ARCH_LEVEL MIPS_ISA_LEVEL
 #define MIPS_ISA_LEVEL_RAW mips64r6
 #define MIPS_ISA_ARCH_LEVEL_RAW MIPS_ISA_LEVEL_RAW
+#elif defined(CONFIG_CPU_MIPSR5)
+#define MIPS_ISA_LEVEL "mips64r5"
+#define MIPS_ISA_ARCH_LEVEL MIPS_ISA_LEVEL
+#define MIPS_ISA_LEVEL_RAW mips64r5
+#define MIPS_ISA_ARCH_LEVEL_RAW MIPS_ISA_LEVEL_RAW
 #else
 /* MIPS64 is a superset of MIPS32 */
 #define MIPS_ISA_LEVEL "mips64r2"
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index de44c92b1c1f..caecbae4b599 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -284,14 +284,23 @@
 #ifndef cpu_has_mips32r2
 # define cpu_has_mips32r2	__isa_range_or_flag(2, 6, MIPS_CPU_ISA_M32R2)
 #endif
+#ifndef cpu_has_mips32r5
+# define cpu_has_mips32r5	__isa_range_or_flag(5, 6, MIPS_CPU_ISA_M32R5)
+#endif
 #ifndef cpu_has_mips32r6
 # define cpu_has_mips32r6	__isa_ge_or_flag(6, MIPS_CPU_ISA_M32R6)
 #endif
 #ifndef cpu_has_mips64r1
-# define cpu_has_mips64r1	__isa_range_or_flag(1, 6, MIPS_CPU_ISA_M64R1)
+# define cpu_has_mips64r1	(cpu_has_64bits && \
+				 __isa_range_or_flag(1, 6, MIPS_CPU_ISA_M64R1))
 #endif
 #ifndef cpu_has_mips64r2
-# define cpu_has_mips64r2	__isa_range_or_flag(2, 6, MIPS_CPU_ISA_M64R2)
+# define cpu_has_mips64r2	(cpu_has_64bits && \
+				 __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M64R2))
+#endif
+#ifndef cpu_has_mips64r5
+# define cpu_has_mips64r5	(cpu_has_64bits && \
+				 __isa_range_or_flag(5, 6, MIPS_CPU_ISA_M64R5))
 #endif
 #ifndef cpu_has_mips64r6
 # define cpu_has_mips64r6	__isa_ge_and_flag(6, MIPS_CPU_ISA_M64R6)
@@ -313,19 +322,25 @@
 				(cpu_has_mips_3 | cpu_has_mips_4_5_64_r2_r6)
 #define cpu_has_mips_4_5_64_r2_r6					\
 				(cpu_has_mips_4_5 | cpu_has_mips64r1 |	\
-				 cpu_has_mips_r2 | cpu_has_mips_r6)
+				 cpu_has_mips_r2 | cpu_has_mips_r5 | \
+				 cpu_has_mips_r6)
 
-#define cpu_has_mips32	(cpu_has_mips32r1 | cpu_has_mips32r2 | cpu_has_mips32r6)
-#define cpu_has_mips64	(cpu_has_mips64r1 | cpu_has_mips64r2 | cpu_has_mips64r6)
+#define cpu_has_mips32	(cpu_has_mips32r1 | cpu_has_mips32r2 | \
+			 cpu_has_mips32r5 | cpu_has_mips32r6)
+#define cpu_has_mips64	(cpu_has_mips64r1 | cpu_has_mips64r2 | \
+			 cpu_has_mips64r5 | cpu_has_mips64r6)
 #define cpu_has_mips_r1 (cpu_has_mips32r1 | cpu_has_mips64r1)
 #define cpu_has_mips_r2 (cpu_has_mips32r2 | cpu_has_mips64r2)
+#define cpu_has_mips_r5	(cpu_has_mips32r5 | cpu_has_mips64r5)
 #define cpu_has_mips_r6	(cpu_has_mips32r6 | cpu_has_mips64r6)
 #define cpu_has_mips_r	(cpu_has_mips32r1 | cpu_has_mips32r2 | \
-			 cpu_has_mips32r6 | cpu_has_mips64r1 | \
-			 cpu_has_mips64r2 | cpu_has_mips64r6)
+			 cpu_has_mips32r5 | cpu_has_mips32r6 | \
+			 cpu_has_mips64r1 | cpu_has_mips64r2 | \
+			 cpu_has_mips64r5 | cpu_has_mips64r6)
 
-/* MIPSR2 and MIPSR6 have a lot of similarities */
-#define cpu_has_mips_r2_r6	(cpu_has_mips_r2 | cpu_has_mips_r6)
+/* MIPSR2 - MIPSR6 have a lot of similarities */
+#define cpu_has_mips_r2_r6	(cpu_has_mips_r2 | cpu_has_mips_r5 | \
+				 cpu_has_mips_r6)
 
 /*
  * cpu_has_mips_r2_exec_hazard - return if IHB is required on current processor
@@ -435,9 +450,6 @@
 # ifndef cpu_has_64bit_gp_regs
 # define cpu_has_64bit_gp_regs		0
 # endif
-# ifndef cpu_has_64bit_addresses
-# define cpu_has_64bit_addresses	0
-# endif
 # ifndef cpu_vmbits
 # define cpu_vmbits 31
 # endif
@@ -456,9 +468,6 @@
 # ifndef cpu_has_64bit_gp_regs
 # define cpu_has_64bit_gp_regs		1
 # endif
-# ifndef cpu_has_64bit_addresses
-# define cpu_has_64bit_addresses	1
-# endif
 # ifndef cpu_vmbits
 # define cpu_vmbits cpu_data[0].vmbits
 # define __NEED_VMBITS_PROBE
@@ -620,6 +629,14 @@
 # endif
 #endif
 
+#ifndef cpu_has_mm_sysad
+# define cpu_has_mm_sysad	__opt(MIPS_CPU_MM_SYSAD)
+#endif
+
+#ifndef cpu_has_mm_full
+# define cpu_has_mm_full	__opt(MIPS_CPU_MM_FULL)
+#endif
+
 /*
  * Guest capabilities
  */
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index ed7ffe4e63a3..a600670d00e9 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -105,6 +105,15 @@ struct cpuinfo_mips {
 	unsigned int		gtoffset_mask;
 	unsigned int		guestid_mask;
 	unsigned int		guestid_cache;
+
+#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION
+	/* CPUCFG data for this CPU, synthesized at probe time.
+	 *
+	 * CPUCFG select 0 is PRId, 4 and above are unimplemented for now.
+	 * So the only stored values are for CPUCFG selects 1-3 inclusive.
+	 */
+	u32 loongson3_cpucfg_data[3];
+#endif
 } __attribute__((aligned(SMP_CACHE_BYTES)));
 
 extern struct cpuinfo_mips cpu_data[];
@@ -142,7 +151,7 @@ struct proc_cpuinfo_notifier_args {
 static inline unsigned int cpu_cluster(struct cpuinfo_mips *cpuinfo)
 {
 	/* Optimisation for systems where multiple clusters aren't used */
-	if (!IS_ENABLED(CONFIG_CPU_MIPSR6))
+	if (!IS_ENABLED(CONFIG_CPU_MIPSR5) && !IS_ENABLED(CONFIG_CPU_MIPSR6))
 		return 0;
 
 	return (cpuinfo->globalnumber & MIPS_GLOBALNUMBER_CLUSTER) >>
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index 49f0061a6051..75a7a382da09 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -51,13 +51,18 @@ static inline int __pure __get_cpu_type(const int cpu_type)
 	case CPU_M14KEC:
 	case CPU_INTERAPTIV:
 	case CPU_PROAPTIV:
-	case CPU_P5600:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R5
 	case CPU_M5150:
+	case CPU_P5600:
 #endif
 
 #if defined(CONFIG_SYS_HAS_CPU_MIPS32_R2) || \
+    defined(CONFIG_SYS_HAS_CPU_MIPS32_R5) || \
     defined(CONFIG_SYS_HAS_CPU_MIPS32_R6) || \
     defined(CONFIG_SYS_HAS_CPU_MIPS64_R2) || \
+    defined(CONFIG_SYS_HAS_CPU_MIPS64_R5) || \
     defined(CONFIG_SYS_HAS_CPU_MIPS64_R6)
 	case CPU_QEMU_GENERIC:
 #endif
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 216a22916740..104a509312b3 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -250,6 +250,10 @@
 #define PRID_REV_LOONGSON1C		0x0020	/* Same as Loongson-1B */
 #define PRID_REV_LOONGSON2E		0x0002
 #define PRID_REV_LOONGSON2F		0x0003
+#define PRID_REV_LOONGSON2K_R1_0	0x0000
+#define PRID_REV_LOONGSON2K_R1_1	0x0001
+#define PRID_REV_LOONGSON2K_R1_2	0x0002
+#define PRID_REV_LOONGSON2K_R1_3	0x0003
 #define PRID_REV_LOONGSON3A_R1		0x0005
 #define PRID_REV_LOONGSON3B_R1		0x0006
 #define PRID_REV_LOONGSON3B_R2		0x0007
@@ -343,14 +347,16 @@ enum cpu_type_enum {
 #define MIPS_CPU_ISA_M32R2	0x00000020
 #define MIPS_CPU_ISA_M64R1	0x00000040
 #define MIPS_CPU_ISA_M64R2	0x00000080
-#define MIPS_CPU_ISA_M32R6	0x00000100
-#define MIPS_CPU_ISA_M64R6	0x00000200
+#define MIPS_CPU_ISA_M32R5	0x00000100
+#define MIPS_CPU_ISA_M64R5	0x00000200
+#define MIPS_CPU_ISA_M32R6	0x00000400
+#define MIPS_CPU_ISA_M64R6	0x00000800
 
 #define MIPS_CPU_ISA_32BIT (MIPS_CPU_ISA_II | MIPS_CPU_ISA_M32R1 | \
-	MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M32R6)
+	MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M32R6)
 #define MIPS_CPU_ISA_64BIT (MIPS_CPU_ISA_III | MIPS_CPU_ISA_IV | \
 	MIPS_CPU_ISA_V | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2 | \
-	MIPS_CPU_ISA_M64R6)
+	MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M64R6)
 
 /*
  * CPU Option encodings
@@ -416,7 +422,9 @@ enum cpu_type_enum {
 #define MIPS_CPU_MT_PER_TC_PERF_COUNTERS \
 				BIT_ULL(56)	/* CPU has perf counters implemented per TC (MIPSMT ASE) */
 #define MIPS_CPU_MMID		BIT_ULL(57)	/* CPU supports MemoryMapIDs */
-#define MIPS_CPU_MAC_2008_ONLY	BIT_ULL(58)	/* CPU Only support MAC2008 Fused multiply-add instruction */
+#define MIPS_CPU_MM_SYSAD	BIT_ULL(58)	/* CPU supports write-through SysAD Valid merge */
+#define MIPS_CPU_MM_FULL	BIT_ULL(59)	/* CPU supports write-through full merge */
+#define MIPS_CPU_MAC_2008_ONLY	BIT_ULL(60)	/* CPU Only support MAC2008 Fused multiply-add instruction */
 
 /*
  * CPU ASE encodings
diff --git a/arch/mips/include/asm/emma/emma2rh.h b/arch/mips/include/asm/emma/emma2rh.h
deleted file mode 100644
index a25cdb378fe8..000000000000
--- a/arch/mips/include/asm/emma/emma2rh.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Copyright (C) NEC Electronics Corporation 2005-2006
- *
- *  This file based on include/asm-mips/ddb5xxx/ddb5xxx.h
- *	    Copyright 2001 MontaVista Software Inc.
- */
-#ifndef __ASM_EMMA_EMMA2RH_H
-#define __ASM_EMMA_EMMA2RH_H
-
-#include <irq.h>
-
-/*
- * EMMA2RH registers
- */
-#define REGBASE 0x10000000
-
-#define EMMA2RH_BHIF_STRAP_0	(0x000010+REGBASE)
-#define EMMA2RH_BHIF_INT_ST_0	(0x000030+REGBASE)
-#define EMMA2RH_BHIF_INT_ST_1	(0x000034+REGBASE)
-#define EMMA2RH_BHIF_INT_ST_2	(0x000038+REGBASE)
-#define EMMA2RH_BHIF_INT_EN_0	(0x000040+REGBASE)
-#define EMMA2RH_BHIF_INT_EN_1	(0x000044+REGBASE)
-#define EMMA2RH_BHIF_INT_EN_2	(0x000048+REGBASE)
-#define EMMA2RH_BHIF_INT1_EN_0	(0x000050+REGBASE)
-#define EMMA2RH_BHIF_INT1_EN_1	(0x000054+REGBASE)
-#define EMMA2RH_BHIF_INT1_EN_2	(0x000058+REGBASE)
-#define EMMA2RH_BHIF_SW_INT	(0x000070+REGBASE)
-#define EMMA2RH_BHIF_SW_INT_EN	(0x000080+REGBASE)
-#define EMMA2RH_BHIF_SW_INT_CLR (0x000090+REGBASE)
-#define EMMA2RH_BHIF_MAIN_CTRL	(0x0000b4+REGBASE)
-#define EMMA2RH_BHIF_EXCEPT_VECT_BASE_ADDRESS	(0x0000c0+REGBASE)
-#define EMMA2RH_GPIO_DIR	(0x110d20+REGBASE)
-#define EMMA2RH_GPIO_INT_ST	(0x110d30+REGBASE)
-#define EMMA2RH_GPIO_INT_MASK	(0x110d3c+REGBASE)
-#define EMMA2RH_GPIO_INT_MODE	(0x110d48+REGBASE)
-#define EMMA2RH_GPIO_INT_CND_A	(0x110d54+REGBASE)
-#define EMMA2RH_GPIO_INT_CND_B	(0x110d60+REGBASE)
-#define EMMA2RH_PBRD_INT_EN	(0x100010+REGBASE)
-#define EMMA2RH_PBRD_CLKSEL	(0x100028+REGBASE)
-#define EMMA2RH_PFUR0_BASE	(0x101000+REGBASE)
-#define EMMA2RH_PFUR1_BASE	(0x102000+REGBASE)
-#define EMMA2RH_PFUR2_BASE	(0x103000+REGBASE)
-#define EMMA2RH_PIIC0_BASE	(0x107000+REGBASE)
-#define EMMA2RH_PIIC1_BASE	(0x108000+REGBASE)
-#define EMMA2RH_PIIC2_BASE	(0x109000+REGBASE)
-#define EMMA2RH_PCI_CONTROL	(0x200000+REGBASE)
-#define EMMA2RH_PCI_ARBIT_CTR	(0x200004+REGBASE)
-#define EMMA2RH_PCI_IWIN0_CTR	(0x200010+REGBASE)
-#define EMMA2RH_PCI_IWIN1_CTR	(0x200014+REGBASE)
-#define EMMA2RH_PCI_INIT_ESWP	(0x200018+REGBASE)
-#define EMMA2RH_PCI_INT		(0x200020+REGBASE)
-#define EMMA2RH_PCI_INT_EN	(0x200024+REGBASE)
-#define EMMA2RH_PCI_TWIN_CTR	(0x200030+REGBASE)
-#define EMMA2RH_PCI_TWIN_BADR	(0x200034+REGBASE)
-#define EMMA2RH_PCI_TWIN0_DADR	(0x200038+REGBASE)
-#define EMMA2RH_PCI_TWIN1_DADR	(0x20003c+REGBASE)
-
-/*
- *  Memory map (physical address)
- *
- *  Note most of the following address must be properly aligned by the
- *  corresponding size.	 For example, if PCI_IO_SIZE is 16MB, then
- *  PCI_IO_BASE must be aligned along 16MB boundary.
- */
-
-/* the actual ram size is detected at run-time */
-#define EMMA2RH_RAM_BASE	0x00000000
-#define EMMA2RH_RAM_SIZE	0x10000000	/* less than 256MB */
-
-#define EMMA2RH_IO_BASE		0x10000000
-#define EMMA2RH_IO_SIZE		0x01000000	/* 16 MB */
-
-#define EMMA2RH_GENERALIO_BASE	0x11000000
-#define EMMA2RH_GENERALIO_SIZE	0x01000000	/* 16 MB */
-
-#define EMMA2RH_PCI_IO_BASE	0x12000000
-#define EMMA2RH_PCI_IO_SIZE	0x02000000	/* 32 MB */
-
-#define EMMA2RH_PCI_MEM_BASE	0x14000000
-#define EMMA2RH_PCI_MEM_SIZE	0x08000000	/* 128 MB */
-
-#define EMMA2RH_ROM_BASE	0x1c000000
-#define EMMA2RH_ROM_SIZE	0x04000000	/* 64 MB */
-
-#define EMMA2RH_PCI_CONFIG_BASE EMMA2RH_PCI_IO_BASE
-#define EMMA2RH_PCI_CONFIG_SIZE EMMA2RH_PCI_IO_SIZE
-
-#define NUM_EMMA2RH_IRQ		96
-
-#define EMMA2RH_IRQ_BASE	(MIPS_CPU_IRQ_BASE + 8)
-
-/*
- * emma2rh irq defs
- */
-
-#define EMMA2RH_IRQ_INT(n)	(EMMA2RH_IRQ_BASE + (n))
-
-#define EMMA2RH_IRQ_PFUR0	EMMA2RH_IRQ_INT(49)
-#define EMMA2RH_IRQ_PFUR1	EMMA2RH_IRQ_INT(50)
-#define EMMA2RH_IRQ_PFUR2	EMMA2RH_IRQ_INT(51)
-#define EMMA2RH_IRQ_PIIC0	EMMA2RH_IRQ_INT(56)
-#define EMMA2RH_IRQ_PIIC1	EMMA2RH_IRQ_INT(57)
-#define EMMA2RH_IRQ_PIIC2	EMMA2RH_IRQ_INT(58)
-
-/*
- *  EMMA2RH Register Access
- */
-
-#define EMMA2RH_BASE (0xa0000000)
-
-static inline void emma2rh_sync(void)
-{
-	volatile u32 *p = (volatile u32 *)0xbfc00000;
-	(void)(*p);
-}
-
-static inline void emma2rh_out32(u32 offset, u32 val)
-{
-	*(volatile u32 *)(EMMA2RH_BASE | offset) = val;
-	emma2rh_sync();
-}
-
-static inline u32 emma2rh_in32(u32 offset)
-{
-	u32 val = *(volatile u32 *)(EMMA2RH_BASE | offset);
-	return val;
-}
-
-static inline void emma2rh_out16(u32 offset, u16 val)
-{
-	*(volatile u16 *)(EMMA2RH_BASE | offset) = val;
-	emma2rh_sync();
-}
-
-static inline u16 emma2rh_in16(u32 offset)
-{
-	u16 val = *(volatile u16 *)(EMMA2RH_BASE | offset);
-	return val;
-}
-
-static inline void emma2rh_out8(u32 offset, u8 val)
-{
-	*(volatile u8 *)(EMMA2RH_BASE | offset) = val;
-	emma2rh_sync();
-}
-
-static inline u8 emma2rh_in8(u32 offset)
-{
-	u8 val = *(volatile u8 *)(EMMA2RH_BASE | offset);
-	return val;
-}
-
-/**
- * IIC registers map
- **/
-
-/*---------------------------------------------------------------------------*/
-/* CNT - Control register (00H R/W)					     */
-/*---------------------------------------------------------------------------*/
-#define SPT	    0x00000001
-#define STT	    0x00000002
-#define ACKE	    0x00000004
-#define WTIM	    0x00000008
-#define SPIE	    0x00000010
-#define WREL	    0x00000020
-#define LREL	    0x00000040
-#define IICE	    0x00000080
-#define CNT_RESERVED	0x000000ff	/* reserved bit 0 */
-
-#define I2C_EMMA_START	    (IICE | STT)
-#define I2C_EMMA_STOP	    (IICE | SPT)
-#define I2C_EMMA_REPSTART   I2C_EMMA_START
-
-/*---------------------------------------------------------------------------*/
-/* STA - Status register (10H Read)					     */
-/*---------------------------------------------------------------------------*/
-#define MSTS	    0x00000080
-#define ALD	    0x00000040
-#define EXC	    0x00000020
-#define COI	    0x00000010
-#define TRC	    0x00000008
-#define ACKD	    0x00000004
-#define STD	    0x00000002
-#define SPD	    0x00000001
-
-/*---------------------------------------------------------------------------*/
-/* CSEL - Clock select register (20H R/W)				     */
-/*---------------------------------------------------------------------------*/
-#define FCL	    0x00000080
-#define ND50	    0x00000040
-#define CLD	    0x00000020
-#define DAD	    0x00000010
-#define SMC	    0x00000008
-#define DFC	    0x00000004
-#define CL	    0x00000003
-#define CSEL_RESERVED	0x000000ff	/* reserved bit 0 */
-
-#define FAST397	    0x0000008b
-#define FAST297	    0x0000008a
-#define FAST347	    0x0000000b
-#define FAST260	    0x0000000a
-#define FAST130	    0x00000008
-#define STANDARD108 0x00000083
-#define STANDARD83  0x00000082
-#define STANDARD95  0x00000003
-#define STANDARD73  0x00000002
-#define STANDARD36  0x00000001
-#define STANDARD71  0x00000000
-
-/*---------------------------------------------------------------------------*/
-/* SVA - Slave address register (30H R/W)				     */
-/*---------------------------------------------------------------------------*/
-#define SVA	    0x000000fe
-
-/*---------------------------------------------------------------------------*/
-/* SHR - Shift register (40H R/W)					     */
-/*---------------------------------------------------------------------------*/
-#define SR	    0x000000ff
-
-/*---------------------------------------------------------------------------*/
-/* INT - Interrupt register (50H R/W)					     */
-/* INTM - Interrupt mask register (60H R/W)				     */
-/*---------------------------------------------------------------------------*/
-#define INTE0	    0x00000001
-
-/***********************************************************************
- * I2C registers
- ***********************************************************************
- */
-#define I2C_EMMA_CNT		0x00
-#define I2C_EMMA_STA		0x10
-#define I2C_EMMA_CSEL		0x20
-#define I2C_EMMA_SVA		0x30
-#define I2C_EMMA_SHR		0x40
-#define I2C_EMMA_INT		0x50
-#define I2C_EMMA_INTM		0x60
-
-/*
- * include the board dependent part
- */
-#ifdef CONFIG_NEC_MARKEINS
-#include <asm/emma/markeins.h>
-#else
-#error "Unknown EMMA2RH board!"
-#endif
-
-#endif /* __ASM_EMMA_EMMA2RH_H */
diff --git a/arch/mips/include/asm/emma/markeins.h b/arch/mips/include/asm/emma/markeins.h
deleted file mode 100644
index 2d7e1339d36f..000000000000
--- a/arch/mips/include/asm/emma/markeins.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Copyright (C) NEC Electronics Corporation 2005-2006
- *
- *  This file based on include/asm-mips/ddb5xxx/ddb5xxx.h
- *	    Copyright 2001 MontaVista Software Inc.
- */
-
-#ifndef MARKEINS_H
-#define MARKEINS_H
-
-#define NUM_EMMA2RH_IRQ_SW	32
-#define NUM_EMMA2RH_IRQ_GPIO	32
-
-#define EMMA2RH_SW_CASCADE	(EMMA2RH_IRQ_INT(7) - EMMA2RH_IRQ_INT(0))
-#define EMMA2RH_GPIO_CASCADE	(EMMA2RH_IRQ_INT(46) - EMMA2RH_IRQ_INT(0))
-
-#define EMMA2RH_SW_IRQ_BASE	(EMMA2RH_IRQ_BASE + NUM_EMMA2RH_IRQ)
-#define EMMA2RH_GPIO_IRQ_BASE	(EMMA2RH_SW_IRQ_BASE + NUM_EMMA2RH_IRQ_SW)
-
-#define EMMA2RH_SW_IRQ_INT(n)	(EMMA2RH_SW_IRQ_BASE + (n))
-
-#define MARKEINS_PCI_IRQ_INTA	EMMA2RH_GPIO_IRQ_BASE+15
-#define MARKEINS_PCI_IRQ_INTB	EMMA2RH_GPIO_IRQ_BASE+16
-#define MARKEINS_PCI_IRQ_INTC	EMMA2RH_GPIO_IRQ_BASE+17
-#define MARKEINS_PCI_IRQ_INTD	EMMA2RH_GPIO_IRQ_BASE+18
-
-#endif /* CONFIG_MARKEINS */
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 9476e0498d59..08f9dd6903b7 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -71,12 +71,12 @@ static inline int __enable_fpu(enum fpu_mode mode)
 		goto fr_common;
 
 	case FPU_64BIT:
-#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) \
-      || defined(CONFIG_64BIT))
+#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
+      defined(CONFIG_CPU_MIPSR6) || defined(CONFIG_64BIT))
 		/* we only have a 32-bit FPU */
 		return SIGFPE;
 #endif
-		/* fall through */
+		fallthrough;
 	case FPU_32BIT:
 		if (cpu_has_fre) {
 			/* clear FRE */
diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h
index bb7c71ffe5b7..f67759e81210 100644
--- a/arch/mips/include/asm/fpu_emulator.h
+++ b/arch/mips/include/asm/fpu_emulator.h
@@ -172,10 +172,6 @@ void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
 		     struct task_struct *tsk);
 int process_fpemu_return(int sig, void __user *fault_addr,
 			 unsigned long fcr31);
-int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
-		  unsigned long *contpc);
-int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
-		     unsigned long *contpc);
 
 /*
  * Mask the FCSR Cause bits according to the Enable bits, observing
diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h
index a0b92205f933..f855478d12fa 100644
--- a/arch/mips/include/asm/hazards.h
+++ b/arch/mips/include/asm/hazards.h
@@ -22,8 +22,9 @@
 /*
  * TLB hazards
  */
-#if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \
-	!defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_CPU_LOONGSON64)
+#if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
+     defined(CONFIG_CPU_MIPSR6)) && \
+    !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_CPU_LOONGSON64)
 
 /*
  * MIPSR2 defines ehb for hazard avoidance
@@ -278,7 +279,8 @@ do {									\
 
 #define __disable_fpu_hazard
 
-#elif defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
+#elif defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
+      defined(CONFIG_CPU_MIPSR6)
 
 #define __enable_fpu_hazard						\
 	___ehb
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index cf1f2a4a2418..346fffd9e972 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -30,8 +30,6 @@
 #include <asm/pgtable-bits.h>
 #include <asm/processor.h>
 #include <asm/string.h>
-
-#include <ioremap.h>
 #include <mangle-port.h>
 
 /*
@@ -153,66 +151,9 @@ static inline void *isa_bus_to_virt(unsigned long address)
  */
 #define page_to_phys(page)	((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
 
-extern void __iomem * __ioremap(phys_addr_t offset, phys_addr_t size, unsigned long flags);
-extern void __iounmap(const volatile void __iomem *addr);
-
-static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long size,
-	unsigned long flags)
-{
-	void __iomem *addr = plat_ioremap(offset, size, flags);
-
-	if (addr)
-		return addr;
-
-#define __IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL))
-
-	if (cpu_has_64bit_addresses) {
-		u64 base = UNCAC_BASE;
-
-		/*
-		 * R10000 supports a 2 bit uncached attribute therefore
-		 * UNCAC_BASE may not equal IO_BASE.
-		 */
-		if (flags == _CACHE_UNCACHED)
-			base = (u64) IO_BASE;
-		return (void __iomem *) (unsigned long) (base + offset);
-	} else if (__builtin_constant_p(offset) &&
-		   __builtin_constant_p(size) && __builtin_constant_p(flags)) {
-		phys_addr_t phys_addr, last_addr;
-
-		phys_addr = fixup_bigphys_addr(offset, size);
-
-		/* Don't allow wraparound or zero size. */
-		last_addr = phys_addr + size - 1;
-		if (!size || last_addr < phys_addr)
-			return NULL;
-
-		/*
-		 * Map uncached objects in the low 512MB of address
-		 * space using KSEG1.
-		 */
-		if (__IS_LOW512(phys_addr) && __IS_LOW512(last_addr) &&
-		    flags == _CACHE_UNCACHED)
-			return (void __iomem *)
-				(unsigned long)CKSEG1ADDR(phys_addr);
-	}
-
-	return __ioremap(offset, size, flags);
-
-#undef __IS_LOW512
-}
-
-/*
- * ioremap_prot     -   map bus memory into CPU space
- * @offset:    bus address of the memory
- * @size:      size of the resource to map
-
- * ioremap_prot gives the caller control over cache coherency attributes (CCA)
- */
-static inline void __iomem *ioremap_prot(phys_addr_t offset,
-		unsigned long size, unsigned long prot_val) {
-	return __ioremap_mode(offset, size, prot_val & _CACHE_MASK);
-}
+void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
+		unsigned long prot_val);
+void iounmap(const volatile void __iomem *addr);
 
 /*
  * ioremap     -   map bus memory into CPU space
@@ -226,7 +167,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset,
  * address.
  */
 #define ioremap(offset, size)						\
-	__ioremap_mode((offset), (size), _CACHE_UNCACHED)
+	ioremap_prot((offset), (size), _CACHE_UNCACHED)
 #define ioremap_uc		ioremap
 
 /*
@@ -245,7 +186,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset,
  * memory-like regions on I/O busses.
  */
 #define ioremap_cache(offset, size)					\
-	__ioremap_mode((offset), (size), _page_cachable_default)
+	ioremap_prot((offset), (size), _page_cachable_default)
 
 /*
  * ioremap_wc     -   map bus memory into CPU space
@@ -266,23 +207,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset,
  * _CACHE_UNCACHED option (see cpu_probe() method).
  */
 #define ioremap_wc(offset, size)					\
-	__ioremap_mode((offset), (size), boot_cpu_data.writecombine)
-
-static inline void iounmap(const volatile void __iomem *addr)
-{
-	if (plat_iounmap(addr))
-		return;
-
-#define __IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1)
-
-	if (cpu_has_64bit_addresses ||
-	    (__builtin_constant_p(addr) && __IS_KSEG1(addr)))
-		return;
-
-	__iounmap(addr);
-
-#undef __IS_KSEG1
-}
+	ioremap_prot((offset), (size), boot_cpu_data.writecombine)
 
 #if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON64)
 #define war_io_reorder_wmb()		wmb()
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 2c343c346b79..e28b5a946e26 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -174,6 +174,8 @@ struct kvm_vcpu_stat {
 #endif
 	u64 halt_successful_poll;
 	u64 halt_attempted_poll;
+	u64 halt_poll_success_ns;
+	u64 halt_poll_fail_ns;
 	u64 halt_poll_invalid;
 	u64 halt_wakeup;
 };
diff --git a/arch/mips/include/asm/lasat/ds1603.h b/arch/mips/include/asm/lasat/ds1603.h
deleted file mode 100644
index ab833be9637d..000000000000
--- a/arch/mips/include/asm/lasat/ds1603.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/addrspace.h>
-
-/* Lasat 100	*/
-#define DS1603_REG_100		(KSEG1ADDR(0x1c810000))
-#define DS1603_RST_100		(1 << 2)
-#define DS1603_CLK_100		(1 << 0)
-#define DS1603_DATA_SHIFT_100	1
-#define DS1603_DATA_100		(1 << DS1603_DATA_SHIFT_100)
-
-/* Lasat 200	*/
-#define DS1603_REG_200		(KSEG1ADDR(0x11000000))
-#define DS1603_RST_200		(1 << 3)
-#define DS1603_CLK_200		(1 << 4)
-#define DS1603_DATA_200		(1 << 5)
-
-#define DS1603_DATA_REG_200		(DS1603_REG_200 + 0x10000)
-#define DS1603_DATA_READ_SHIFT_200	9
-#define DS1603_DATA_READ_200	(1 << DS1603_DATA_READ_SHIFT_200)
diff --git a/arch/mips/include/asm/lasat/eeprom.h b/arch/mips/include/asm/lasat/eeprom.h
deleted file mode 100644
index 24001a5cbb11..000000000000
--- a/arch/mips/include/asm/lasat/eeprom.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/addrspace.h>
-
-/* lasat 100 */
-#define AT93C_REG_100		    KSEG1ADDR(0x1c810000)
-#define AT93C_RDATA_REG_100	    AT93C_REG_100
-#define AT93C_RDATA_SHIFT_100	    4
-#define AT93C_WDATA_SHIFT_100	    4
-#define AT93C_CS_M_100		    (1 << 5)
-#define AT93C_CLK_M_100		    (1 << 3)
-
-/* lasat 200 */
-#define AT93C_REG_200		KSEG1ADDR(0x11000000)
-#define AT93C_RDATA_REG_200	(AT93C_REG_200+0x10000)
-#define AT93C_RDATA_SHIFT_200	8
-#define AT93C_WDATA_SHIFT_200	2
-#define AT93C_CS_M_200		(1 << 0)
-#define AT93C_CLK_M_200		(1 << 1)
diff --git a/arch/mips/include/asm/lasat/head.h b/arch/mips/include/asm/lasat/head.h
deleted file mode 100644
index 20b0ecedd4b5..000000000000
--- a/arch/mips/include/asm/lasat/head.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Image header stuff
- */
-#ifndef _HEAD_H
-#define _HEAD_H
-
-#define LASAT_K_MAGIC0_VAL	0xfedeabba
-#define LASAT_K_MAGIC1_VAL	0x00bedead
-
-#ifndef _LANGUAGE_ASSEMBLY
-#include <linux/types.h>
-struct bootloader_header {
-	u32 magic[2];
-	u32 version;
-	u32 image_start;
-	u32 image_size;
-	u32 kernel_start;
-	u32 kernel_entry;
-};
-#endif
-
-#endif /* _HEAD_H */
diff --git a/arch/mips/include/asm/lasat/lasat.h b/arch/mips/include/asm/lasat/lasat.h
deleted file mode 100644
index 483be606960d..000000000000
--- a/arch/mips/include/asm/lasat/lasat.h
+++ /dev/null
@@ -1,245 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * lasat.h
- *
- * Thomas Horsten <thh@lasat.com>
- * Copyright (C) 2000 LASAT Networks A/S.
- *
- * Configuration for LASAT boards, loads the appropriate include files.
- */
-#ifndef _LASAT_H
-#define _LASAT_H
-
-#ifndef _LANGUAGE_ASSEMBLY
-
-extern struct lasat_misc {
-	volatile u32 *reset_reg;
-	volatile u32 *flash_wp_reg;
-	u32 flash_wp_bit;
-} *lasat_misc;
-
-enum lasat_mtdparts {
-	LASAT_MTD_BOOTLOADER,
-	LASAT_MTD_SERVICE,
-	LASAT_MTD_NORMAL,
-	LASAT_MTD_CONFIG,
-	LASAT_MTD_FS,
-	LASAT_MTD_LAST
-};
-
-/*
- * The format of the data record in the EEPROM.
- * See the LASAT Hardware Configuration field specification for a detailed
- * description of the config field.
- */
-#include <linux/types.h>
-
-#define LASAT_EEPROM_VERSION 7
-struct lasat_eeprom_struct {
-	unsigned int  version;
-	unsigned int  cfg[3];
-	unsigned char hwaddr[6];
-	unsigned char print_partno[12];
-	unsigned char term0;
-	unsigned char print_serial[14];
-	unsigned char term1;
-	unsigned char prod_partno[12];
-	unsigned char term2;
-	unsigned char prod_serial[14];
-	unsigned char term3;
-	unsigned char passwd_hash[16];
-	unsigned char pwdnull;
-	unsigned char vendid;
-	unsigned char ts_ref;
-	unsigned char ts_signoff;
-	unsigned char reserved[11];
-	unsigned char debugaccess;
-	unsigned short prid;
-	unsigned int  serviceflag;
-	unsigned int  ipaddr;
-	unsigned int  netmask;
-	unsigned int  crc32;
-};
-
-struct lasat_eeprom_struct_pre7 {
-	unsigned int  version;
-	unsigned int  flags[3];
-	unsigned char hwaddr0[6];
-	unsigned char hwaddr1[6];
-	unsigned char print_partno[9];
-	unsigned char term0;
-	unsigned char print_serial[14];
-	unsigned char term1;
-	unsigned char prod_partno[9];
-	unsigned char term2;
-	unsigned char prod_serial[14];
-	unsigned char term3;
-	unsigned char passwd_hash[24];
-	unsigned char pwdnull;
-	unsigned char vendor;
-	unsigned char ts_ref;
-	unsigned char ts_signoff;
-	unsigned char reserved[6];
-	unsigned int  writecount;
-	unsigned int  ipaddr;
-	unsigned int  netmask;
-	unsigned int  crc32;
-};
-
-/* Configuration descriptor encoding - see the doc for details */
-
-#define LASAT_W0_DSCTYPE(v)		(((v))	       & 0xf)
-#define LASAT_W0_BMID(v)		(((v) >> 0x04) & 0xf)
-#define LASAT_W0_CPUTYPE(v)		(((v) >> 0x08) & 0xf)
-#define LASAT_W0_BUSSPEED(v)		(((v) >> 0x0c) & 0xf)
-#define LASAT_W0_CPUCLK(v)		(((v) >> 0x10) & 0xf)
-#define LASAT_W0_SDRAMBANKSZ(v)		(((v) >> 0x14) & 0xf)
-#define LASAT_W0_SDRAMBANKS(v)		(((v) >> 0x18) & 0xf)
-#define LASAT_W0_L2CACHE(v)		(((v) >> 0x1c) & 0xf)
-
-#define LASAT_W1_EDHAC(v)		(((v))	       & 0xf)
-#define LASAT_W1_HIFN(v)		(((v) >> 0x04) & 0x1)
-#define LASAT_W1_ISDN(v)		(((v) >> 0x05) & 0x1)
-#define LASAT_W1_IDE(v)			(((v) >> 0x06) & 0x1)
-#define LASAT_W1_HDLC(v)		(((v) >> 0x07) & 0x1)
-#define LASAT_W1_USVERSION(v)		(((v) >> 0x08) & 0x1)
-#define LASAT_W1_4MACS(v)		(((v) >> 0x09) & 0x1)
-#define LASAT_W1_EXTSERIAL(v)		(((v) >> 0x0a) & 0x1)
-#define LASAT_W1_FLASHSIZE(v)		(((v) >> 0x0c) & 0xf)
-#define LASAT_W1_PCISLOTS(v)		(((v) >> 0x10) & 0xf)
-#define LASAT_W1_PCI1OPT(v)		(((v) >> 0x14) & 0xf)
-#define LASAT_W1_PCI2OPT(v)		(((v) >> 0x18) & 0xf)
-#define LASAT_W1_PCI3OPT(v)		(((v) >> 0x1c) & 0xf)
-
-/* Routines specific to LASAT boards */
-
-#define LASAT_BMID_MASQUERADE2		0
-#define LASAT_BMID_MASQUERADEPRO	1
-#define LASAT_BMID_SAFEPIPE25		2
-#define LASAT_BMID_SAFEPIPE50		3
-#define LASAT_BMID_SAFEPIPE100		4
-#define LASAT_BMID_SAFEPIPE5000		5
-#define LASAT_BMID_SAFEPIPE7000		6
-#define LASAT_BMID_SAFEPIPE1000		7
-#if 0
-#define LASAT_BMID_SAFEPIPE30		7
-#define LASAT_BMID_SAFEPIPE5100		8
-#define LASAT_BMID_SAFEPIPE7100		9
-#endif
-#define LASAT_BMID_UNKNOWN		0xf
-#define LASAT_MAX_BMID_NAMES		9   /* no larger than 15! */
-
-#define LASAT_HAS_EDHAC			(1 << 0)
-#define LASAT_EDHAC_FAST		(1 << 1)
-#define LASAT_HAS_EADI			(1 << 2)
-#define LASAT_HAS_HIFN			(1 << 3)
-#define LASAT_HAS_ISDN			(1 << 4)
-#define LASAT_HAS_LEASEDLINE_IF		(1 << 5)
-#define LASAT_HAS_HDC			(1 << 6)
-
-#define LASAT_PRID_MASQUERADE2		0
-#define LASAT_PRID_MASQUERADEPRO	1
-#define LASAT_PRID_SAFEPIPE25		2
-#define LASAT_PRID_SAFEPIPE50		3
-#define LASAT_PRID_SAFEPIPE100		4
-#define LASAT_PRID_SAFEPIPE5000		5
-#define LASAT_PRID_SAFEPIPE7000		6
-#define LASAT_PRID_SAFEPIPE30		7
-#define LASAT_PRID_SAFEPIPE5100		8
-#define LASAT_PRID_SAFEPIPE7100		9
-
-#define LASAT_PRID_SAFEPIPE1110		10
-#define LASAT_PRID_SAFEPIPE3020		11
-#define LASAT_PRID_SAFEPIPE3030		12
-#define LASAT_PRID_SAFEPIPE5020		13
-#define LASAT_PRID_SAFEPIPE5030		14
-#define LASAT_PRID_SAFEPIPE1120		15
-#define LASAT_PRID_SAFEPIPE1130		16
-#define LASAT_PRID_SAFEPIPE6010		17
-#define LASAT_PRID_SAFEPIPE6110		18
-#define LASAT_PRID_SAFEPIPE6210		19
-#define LASAT_PRID_SAFEPIPE1020		20
-#define LASAT_PRID_SAFEPIPE1040		21
-#define LASAT_PRID_SAFEPIPE1060		22
-
-struct lasat_info {
-	unsigned int  li_cpu_hz;
-	unsigned int  li_bus_hz;
-	unsigned int  li_bmid;
-	unsigned int  li_memsize;
-	unsigned int  li_flash_size;
-	unsigned int  li_prid;
-	unsigned char li_bmstr[16];
-	unsigned char li_namestr[32];
-	unsigned char li_typestr[16];
-	/* Info on the Flash layout */
-	unsigned int  li_flash_base;
-	unsigned long li_flashpart_base[LASAT_MTD_LAST];
-	unsigned long li_flashpart_size[LASAT_MTD_LAST];
-	struct lasat_eeprom_struct li_eeprom_info;
-	unsigned int  li_eeprom_upgrade_version;
-	unsigned int  li_debugaccess;
-};
-
-extern struct lasat_info lasat_board_info;
-
-static inline unsigned long lasat_flash_partition_start(int partno)
-{
-	if (partno < 0 || partno >= LASAT_MTD_LAST)
-		return 0;
-
-	return lasat_board_info.li_flashpart_base[partno];
-}
-
-static inline unsigned long lasat_flash_partition_size(int partno)
-{
-	if (partno < 0 || partno >= LASAT_MTD_LAST)
-		return 0;
-
-	return lasat_board_info.li_flashpart_size[partno];
-}
-
-/* Called from setup() to initialize the global board_info struct */
-extern int lasat_init_board_info(void);
-
-/* Write the modified EEPROM info struct */
-extern void lasat_write_eeprom_info(void);
-
-#define N_MACHTYPES		2
-/* for calibration of delays */
-
-/* the lasat_ndelay function is necessary because it is used at an
- * early stage of the boot process where ndelay is not calibrated.
- * It is used for the bit-banging rtc and eeprom drivers */
-
-#include <linux/delay.h>
-#include <linux/smp.h>
-
-/* calculating with the slowest board with 100 MHz clock */
-#define LASAT_100_DIVIDER 20
-/* All 200's run at 250 MHz clock */
-#define LASAT_200_DIVIDER 8
-
-extern unsigned int lasat_ndelay_divider;
-
-static inline void lasat_ndelay(unsigned int ns)
-{
-	__delay(ns / lasat_ndelay_divider);
-}
-
-#define IS_LASAT_200()	   (current_cpu_data.cputype == CPU_R5000)
-
-#endif /* !defined (_LANGUAGE_ASSEMBLY) */
-
-#define LASAT_SERVICEMODE_MAGIC_1     0xdeadbeef
-#define LASAT_SERVICEMODE_MAGIC_2     0xfedeabba
-
-/* Lasat 100 boards */
-#define LASAT_GT_BASE		(KSEG1ADDR(0x14000000))
-
-/* Lasat 200 boards */
-#define Vrc5074_PHYS_BASE	0x1fa00000
-#define Vrc5074_BASE		(KSEG1ADDR(Vrc5074_PHYS_BASE))
-#define PCI_WINDOW1		0x1a000000
-
-#endif /* _LASAT_H */
diff --git a/arch/mips/include/asm/lasat/lasatint.h b/arch/mips/include/asm/lasat/lasatint.h
deleted file mode 100644
index b2b346e0ca38..000000000000
--- a/arch/mips/include/asm/lasat/lasatint.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_LASAT_LASATINT_H
-#define __ASM_LASAT_LASATINT_H
-
-/* lasat 100 */
-#define LASAT_INT_STATUS_REG_100	(KSEG1ADDR(0x1c880000))
-#define LASAT_INT_MASK_REG_100		(KSEG1ADDR(0x1c890000))
-#define LASATINT_MASK_SHIFT_100		0
-
-/* lasat 200 */
-#define LASAT_INT_STATUS_REG_200	(KSEG1ADDR(0x1104003c))
-#define LASAT_INT_MASK_REG_200		(KSEG1ADDR(0x1104003c))
-#define LASATINT_MASK_SHIFT_200		16
-
-#endif /* __ASM_LASAT_LASATINT_H */
diff --git a/arch/mips/include/asm/lasat/picvue.h b/arch/mips/include/asm/lasat/picvue.h
deleted file mode 100644
index 99987c5a4b83..000000000000
--- a/arch/mips/include/asm/lasat/picvue.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Lasat 100 */
-#define PVC_REG_100		KSEG1ADDR(0x1c820000)
-#define PVC_DATA_SHIFT_100	0
-#define PVC_DATA_M_100		0xFF
-#define PVC_E_100		(1 << 8)
-#define PVC_RW_100		(1 << 9)
-#define PVC_RS_100		(1 << 10)
-
-/* Lasat 200 */
-#define PVC_REG_200		KSEG1ADDR(0x11000000)
-#define PVC_DATA_SHIFT_200	24
-#define PVC_DATA_M_200		(0xFF << PVC_DATA_SHIFT_200)
-#define PVC_E_200		(1 << 16)
-#define PVC_RW_200		(1 << 17)
-#define PVC_RS_200		(1 << 18)
diff --git a/arch/mips/include/asm/lasat/serial.h b/arch/mips/include/asm/lasat/serial.h
deleted file mode 100644
index 7b43d74089d1..000000000000
--- a/arch/mips/include/asm/lasat/serial.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/lasat/lasat.h>
-
-/* Lasat 100 boards serial configuration */
-#define LASAT_BASE_BAUD_100		(7372800 / 16)
-#define LASAT_UART_REGS_BASE_100	0x1c8b0000
-#define LASAT_UART_REGS_SHIFT_100	2
-#define LASATINT_UART_100		16
-
-/* * LASAT 200 boards serial configuration */
-#define LASAT_BASE_BAUD_200		(100000000 / 16 / 12)
-#define LASAT_UART_REGS_BASE_200	(Vrc5074_PHYS_BASE + 0x0300)
-#define LASAT_UART_REGS_SHIFT_200	3
-#define LASATINT_UART_200		21
diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h
index 6908b93c4ff9..99f1c3e4b11f 100644
--- a/arch/mips/include/asm/maar.h
+++ b/arch/mips/include/asm/maar.h
@@ -32,7 +32,7 @@ unsigned platform_maar_init(unsigned num_pairs);
  * @upper:	The highest address that the MAAR pair will affect. Must be
  *		aligned to one byte before a 2^16 byte boundary.
  * @attrs:	The accessibility attributes to program, eg. MIPS_MAAR_S. The
- *		MIPS_MAAR_VL attribute will automatically be set.
+ *		MIPS_MAAR_VL/MIPS_MAAR_VH attributes will automatically be set.
  *
  * Program the pair of MAAR registers specified by idx to apply the attributes
  * specified by attrs to the range of addresses from lower to higher.
@@ -48,17 +48,30 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower,
 	/* Automatically set MIPS_MAAR_VL */
 	attrs |= MIPS_MAAR_VL;
 
-	/* Write the upper address & attributes (only MIPS_MAAR_VL matters) */
+	/*
+	 * Write the upper address & attributes (both MIPS_MAAR_VL and
+	 * MIPS_MAAR_VH matter)
+	 */
 	write_c0_maari(idx << 1);
 	back_to_back_c0_hazard();
 	write_c0_maar(((upper >> 4) & MIPS_MAAR_ADDR) | attrs);
 	back_to_back_c0_hazard();
+#ifdef CONFIG_XPA
+	upper >>= MIPS_MAARX_ADDR_SHIFT;
+	writex_c0_maar(((upper >> 4) & MIPS_MAARX_ADDR) | MIPS_MAARX_VH);
+	back_to_back_c0_hazard();
+#endif
 
 	/* Write the lower address & attributes */
 	write_c0_maari((idx << 1) | 0x1);
 	back_to_back_c0_hazard();
 	write_c0_maar((lower >> 4) | attrs);
 	back_to_back_c0_hazard();
+#ifdef CONFIG_XPA
+	lower >>= MIPS_MAARX_ADDR_SHIFT;
+	writex_c0_maar(((lower >> 4) & MIPS_MAARX_ADDR) | MIPS_MAARX_VH);
+	back_to_back_c0_hazard();
+#endif
 }
 
 /**
diff --git a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h
index 95a0b580909d..a54f20d956a2 100644
--- a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h
@@ -56,6 +56,5 @@
 #define cpu_has_64bits			0
 #define cpu_has_64bit_zero_reg		0
 #define cpu_has_64bit_gp_regs		0
-#define cpu_has_64bit_addresses		0
 
 #endif /* __ASM_MACH_ATH25_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h
index e7c972fccd9f..79ab3ad9fee8 100644
--- a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h
@@ -45,7 +45,6 @@
 #define cpu_has_64bits		0
 #define cpu_has_64bit_zero_reg	0
 #define cpu_has_64bit_gp_regs	0
-#define cpu_has_64bit_addresses 0
 
 #define cpu_dcache_line_size()	32
 #define cpu_icache_line_size()	32
diff --git a/arch/mips/include/asm/mach-au1x00/ioremap.h b/arch/mips/include/asm/mach-au1x00/ioremap.h
deleted file mode 100644
index f6877ed8b8d0..000000000000
--- a/arch/mips/include/asm/mach-au1x00/ioremap.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *	include/asm-mips/mach-au1x00/ioremap.h
- */
-#ifndef __ASM_MACH_AU1X00_IOREMAP_H
-#define __ASM_MACH_AU1X00_IOREMAP_H
-
-#include <linux/types.h>
-
-#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_PCI)
-extern phys_addr_t __fixup_bigphys_addr(phys_addr_t, phys_addr_t);
-#else
-static inline phys_addr_t __fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
-{
-	return phys_addr;
-}
-#endif
-
-/*
- * Allow physical addresses to be fixed up to help 36-bit peripherals.
- */
-static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
-{
-	return __fixup_bigphys_addr(phys_addr, size);
-}
-
-static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size,
-	unsigned long flags)
-{
-	return NULL;
-}
-
-static inline int plat_iounmap(const volatile void __iomem *addr)
-{
-	return 0;
-}
-
-#endif /* __ASM_MACH_AU1X00_IOREMAP_H */
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h
index 8fe88c2251e4..9212429d5edd 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h
@@ -13,16 +13,16 @@ static inline unsigned long bcm63xx_gpio_count(void)
 	case BCM6328_CPU_ID:
 		return 32;
 	case BCM3368_CPU_ID:
-	case BCM6358_CPU_ID:
 		return 40;
 	case BCM6338_CPU_ID:
 		return 8;
 	case BCM6345_CPU_ID:
 		return 16;
-	case BCM6362_CPU_ID:
-		return 48;
+	case BCM6358_CPU_ID:
 	case BCM6368_CPU_ID:
 		return 38;
+	case BCM6362_CPU_ID:
+		return 48;
 	case BCM6348_CPU_ID:
 	default:
 		return 37;
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
index bc3444cd4ef2..9ceb5e72889f 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
@@ -1367,8 +1367,8 @@
 #define MISC_STRAPBUS_6328_REG		0x240
 #define STRAPBUS_6328_FCVO_SHIFT	7
 #define STRAPBUS_6328_FCVO_MASK		(0x1f << STRAPBUS_6328_FCVO_SHIFT)
-#define STRAPBUS_6328_BOOT_SEL_SERIAL	(1 << 28)
-#define STRAPBUS_6328_BOOT_SEL_NAND	(0 << 28)
+#define STRAPBUS_6328_BOOT_SEL_SERIAL	(1 << 18)
+#define STRAPBUS_6328_BOOT_SEL_NAND	(0 << 18)
 
 /*************************************************************************
  * _REG relative to RSET_PCIE
diff --git a/arch/mips/include/asm/mach-bcm63xx/ioremap.h b/arch/mips/include/asm/mach-bcm63xx/ioremap.h
index 8cd261ec0a75..73f31825bbf3 100644
--- a/arch/mips/include/asm/mach-bcm63xx/ioremap.h
+++ b/arch/mips/include/asm/mach-bcm63xx/ioremap.h
@@ -4,11 +4,6 @@
 
 #include <bcm63xx_cpu.h>
 
-static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
-{
-	return phys_addr;
-}
-
 static inline int is_bcm63xx_internal_registers(phys_addr_t offset)
 {
 	switch (bcm63xx_get_cpu_id()) {
diff --git a/arch/mips/include/asm/mach-bmips/ioremap.h b/arch/mips/include/asm/mach-bmips/ioremap.h
index 52632ebc705f..63b4af9916b6 100644
--- a/arch/mips/include/asm/mach-bmips/ioremap.h
+++ b/arch/mips/include/asm/mach-bmips/ioremap.h
@@ -4,11 +4,6 @@
 
 #include <linux/types.h>
 
-static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
-{
-	return phys_addr;
-}
-
 static inline int is_bmips_internal_registers(phys_addr_t offset)
 {
 	if (offset >= 0xfff80000)
diff --git a/arch/mips/include/asm/mach-emma2rh/irq.h b/arch/mips/include/asm/mach-emma2rh/irq.h
deleted file mode 100644
index d32736736bb3..000000000000
--- a/arch/mips/include/asm/mach-emma2rh/irq.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2003 by Ralf Baechle
- */
-#ifndef __ASM_MACH_EMMA2RH_IRQ_H
-#define __ASM_MACH_EMMA2RH_IRQ_H
-
-#define NR_IRQS 256
-
-#include <asm/mach-generic/irq.h>
-
-#endif /* __ASM_MACH_EMMA2RH_IRQ_H */
diff --git a/arch/mips/include/asm/mach-generic/floppy.h b/arch/mips/include/asm/mach-generic/floppy.h
index 9ec2f6a5200b..e3f446d54827 100644
--- a/arch/mips/include/asm/mach-generic/floppy.h
+++ b/arch/mips/include/asm/mach-generic/floppy.h
@@ -26,14 +26,14 @@
 /*
  * How to access the FDC's registers.
  */
-static inline unsigned char fd_inb(unsigned int port)
+static inline unsigned char fd_inb(unsigned int base, unsigned int reg)
 {
-	return inb_p(port);
+	return inb_p(base + reg);
 }
 
-static inline void fd_outb(unsigned char value, unsigned int port)
+static inline void fd_outb(unsigned char value, unsigned int base, unsigned int reg)
 {
-	outb_p(value, port);
+	outb_p(value, base + reg);
 }
 
 /*
diff --git a/arch/mips/include/asm/mach-generic/ioremap.h b/arch/mips/include/asm/mach-generic/ioremap.h
index 4e36ea25ed33..f2442b84545c 100644
--- a/arch/mips/include/asm/mach-generic/ioremap.h
+++ b/arch/mips/include/asm/mach-generic/ioremap.h
@@ -7,15 +7,6 @@
 
 #include <linux/types.h>
 
-/*
- * Allow physical addresses to be fixed up to help peripherals located
- * outside the low 32-bit range -- generic pass-through version.
- */
-static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
-{
-	return phys_addr;
-}
-
 static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size,
 	unsigned long flags)
 {
diff --git a/arch/mips/include/asm/mach-generic/irq.h b/arch/mips/include/asm/mach-generic/irq.h
index be546a0f65fa..72ac2c202c55 100644
--- a/arch/mips/include/asm/mach-generic/irq.h
+++ b/arch/mips/include/asm/mach-generic/irq.h
@@ -36,10 +36,4 @@
 
 #endif /* CONFIG_IRQ_MIPS_CPU */
 
-#ifdef CONFIG_MIPS_GIC
-#ifndef MIPS_GIC_IRQ_BASE
-#define MIPS_GIC_IRQ_BASE (MIPS_CPU_IRQ_BASE + 8)
-#endif
-#endif /* CONFIG_MIPS_GIC */
-
 #endif /* __ASM_MACH_GENERIC_IRQ_H */
diff --git a/arch/mips/include/asm/mach-ip27/spaces.h b/arch/mips/include/asm/mach-ip27/spaces.h
index 24d5e31bcfa6..66421e9a6aa6 100644
--- a/arch/mips/include/asm/mach-ip27/spaces.h
+++ b/arch/mips/include/asm/mach-ip27/spaces.h
@@ -10,17 +10,19 @@
 #ifndef _ASM_MACH_IP27_SPACES_H
 #define _ASM_MACH_IP27_SPACES_H
 
+#include <linux/const.h>
+
 /*
  * IP27 uses the R10000's uncached attribute feature.  Attribute 3 selects
  * uncached memory addressing. Hide the definitions on 32-bit compilation
  * of the compat-vdso code.
  */
 #ifdef CONFIG_64BIT
-#define HSPEC_BASE		0x9000000000000000
-#define IO_BASE			0x9200000000000000
-#define MSPEC_BASE		0x9400000000000000
-#define UNCAC_BASE		0x9600000000000000
-#define CAC_BASE		0xa800000000000000
+#define HSPEC_BASE		_AC(0x9000000000000000, UL)
+#define IO_BASE			_AC(0x9200000000000000, UL)
+#define MSPEC_BASE		_AC(0x9400000000000000, UL)
+#define UNCAC_BASE		_AC(0x9600000000000000, UL)
+#define CAC_BASE		_AC(0xa800000000000000, UL)
 #endif
 
 #define TO_MSPEC(x)		(MSPEC_BASE | ((x) & TO_PHYS_MASK))
diff --git a/arch/mips/include/asm/mach-ip30/war.h b/arch/mips/include/asm/mach-ip30/war.h
index a98ba204f183..a1fa0c1f5300 100644
--- a/arch/mips/include/asm/mach-ip30/war.h
+++ b/arch/mips/include/asm/mach-ip30/war.h
@@ -8,19 +8,17 @@
 #define R4600_V1_INDEX_ICACHEOP_WAR	0
 #define R4600_V1_HIT_CACHEOP_WAR	0
 #define R4600_V2_HIT_CACHEOP_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
 #define BCM1250_M3_WAR			0
 #define SIBYTE_1956_WAR			0
 #define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS34K_MISSED_ITLB_WAR		0
-#define R5432_CP0_INTERRUPT_WAR		0
+#define MIPS_CACHE_SYNC_WAR		0
 #define TX49XX_ICACHE_INDEX_INV_WAR	0
 #define ICACHE_REFILLS_WORKAROUND_WAR	0
-
 #ifdef CONFIG_CPU_R10000
 #define R10000_LLSC_WAR			1
 #else
 #define R10000_LLSC_WAR			0
 #endif
+#define MIPS34K_MISSED_ITLB_WAR		0
 
 #endif /* __ASM_MIPS_MACH_IP30_WAR_H */
diff --git a/arch/mips/include/asm/mach-jazz/floppy.h b/arch/mips/include/asm/mach-jazz/floppy.h
index 4b86c88a03b7..095000c290e5 100644
--- a/arch/mips/include/asm/mach-jazz/floppy.h
+++ b/arch/mips/include/asm/mach-jazz/floppy.h
@@ -17,19 +17,19 @@
 #include <asm/jazzdma.h>
 #include <asm/pgtable.h>
 
-static inline unsigned char fd_inb(unsigned int port)
+static inline unsigned char fd_inb(unsigned int base, unsigned int reg)
 {
 	unsigned char c;
 
-	c = *(volatile unsigned char *) port;
+	c = *(volatile unsigned char *) (base + reg);
 	udelay(1);
 
 	return c;
 }
 
-static inline void fd_outb(unsigned char value, unsigned int port)
+static inline void fd_outb(unsigned char value, unsigned int base, unsigned int reg)
 {
-	*(volatile unsigned char *) port = value;
+	*(volatile unsigned char *) (base + reg) = value;
 }
 
 /*
diff --git a/arch/mips/include/asm/mach-jz4740/base.h b/arch/mips/include/asm/mach-jz4740/base.h
deleted file mode 100644
index 96b2d6674cdb..000000000000
--- a/arch/mips/include/asm/mach-jz4740/base.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MACH_JZ4740_BASE_H__
-#define __ASM_MACH_JZ4740_BASE_H__
-
-#define JZ4740_CPM_BASE_ADDR	0x10000000
-#define JZ4740_INTC_BASE_ADDR	0x10001000
-#define JZ4740_WDT_BASE_ADDR	0x10002000
-#define JZ4740_TCU_BASE_ADDR	0x10002010
-#define JZ4740_RTC_BASE_ADDR	0x10003000
-#define JZ4740_GPIO_BASE_ADDR	0x10010000
-#define JZ4740_AIC_BASE_ADDR	0x10020000
-#define JZ4740_MSC_BASE_ADDR	0x10021000
-#define JZ4740_UART0_BASE_ADDR	0x10030000
-#define JZ4740_UART1_BASE_ADDR	0x10031000
-#define JZ4740_I2C_BASE_ADDR	0x10042000
-#define JZ4740_SSI_BASE_ADDR	0x10043000
-#define JZ4740_SADC_BASE_ADDR	0x10070000
-#define JZ4740_EMC_BASE_ADDR	0x13010000
-#define JZ4740_DMAC_BASE_ADDR	0x13020000
-#define JZ4740_UHC_BASE_ADDR	0x13030000
-#define JZ4740_UDC_BASE_ADDR	0x13040000
-#define JZ4740_LCD_BASE_ADDR	0x13050000
-#define JZ4740_SLCD_BASE_ADDR	0x13050000
-#define JZ4740_CIM_BASE_ADDR	0x13060000
-#define JZ4740_IPU_BASE_ADDR	0x13080000
-
-#endif
diff --git a/arch/mips/include/asm/mach-jz4740/dma.h b/arch/mips/include/asm/mach-jz4740/dma.h
deleted file mode 100644
index e5d2a5311a3a..000000000000
--- a/arch/mips/include/asm/mach-jz4740/dma.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ7420/JZ4740 DMA definitions
- */
-
-#ifndef __ASM_MACH_JZ4740_DMA_H__
-#define __ASM_MACH_JZ4740_DMA_H__
-
-enum jz4740_dma_request_type {
-	JZ4740_DMA_TYPE_AUTO_REQUEST	= 8,
-	JZ4740_DMA_TYPE_UART_TRANSMIT	= 20,
-	JZ4740_DMA_TYPE_UART_RECEIVE	= 21,
-	JZ4740_DMA_TYPE_SPI_TRANSMIT	= 22,
-	JZ4740_DMA_TYPE_SPI_RECEIVE	= 23,
-	JZ4740_DMA_TYPE_MMC_TRANSMIT	= 26,
-	JZ4740_DMA_TYPE_MMC_RECEIVE	= 27,
-	JZ4740_DMA_TYPE_TCU		= 28,
-	JZ4740_DMA_TYPE_SADC		= 29,
-	JZ4740_DMA_TYPE_SLCD		= 30,
-};
-
-#endif	/* __ASM_JZ4740_DMA_H__ */
diff --git a/arch/mips/include/asm/mach-jz4740/irq.h b/arch/mips/include/asm/mach-jz4740/irq.h
index 09c38eac671a..27c543bd340f 100644
--- a/arch/mips/include/asm/mach-jz4740/irq.h
+++ b/arch/mips/include/asm/mach-jz4740/irq.h
@@ -8,49 +8,6 @@
 #define __ASM_MACH_JZ4740_IRQ_H__
 
 #define MIPS_CPU_IRQ_BASE 0
-#define JZ4740_IRQ_BASE 8
-
-#ifdef CONFIG_MACH_JZ4740
-# define NR_INTC_IRQS	32
-#else
-# define NR_INTC_IRQS	64
-#endif
-
-/* 1st-level interrupts */
-#define JZ4740_IRQ(x)		(JZ4740_IRQ_BASE + (x))
-#define JZ4740_IRQ_I2C		JZ4740_IRQ(1)
-#define JZ4740_IRQ_UHC		JZ4740_IRQ(3)
-#define JZ4740_IRQ_UART1	JZ4740_IRQ(8)
-#define JZ4740_IRQ_UART0	JZ4740_IRQ(9)
-#define JZ4740_IRQ_SADC		JZ4740_IRQ(12)
-#define JZ4740_IRQ_MSC		JZ4740_IRQ(14)
-#define JZ4740_IRQ_RTC		JZ4740_IRQ(15)
-#define JZ4740_IRQ_SSI		JZ4740_IRQ(16)
-#define JZ4740_IRQ_CIM		JZ4740_IRQ(17)
-#define JZ4740_IRQ_AIC		JZ4740_IRQ(18)
-#define JZ4740_IRQ_ETH		JZ4740_IRQ(19)
-#define JZ4740_IRQ_DMAC		JZ4740_IRQ(20)
-#define JZ4740_IRQ_TCU2		JZ4740_IRQ(21)
-#define JZ4740_IRQ_TCU1		JZ4740_IRQ(22)
-#define JZ4740_IRQ_TCU0		JZ4740_IRQ(23)
-#define JZ4740_IRQ_UDC		JZ4740_IRQ(24)
-#define JZ4740_IRQ_GPIO3	JZ4740_IRQ(25)
-#define JZ4740_IRQ_GPIO2	JZ4740_IRQ(26)
-#define JZ4740_IRQ_GPIO1	JZ4740_IRQ(27)
-#define JZ4740_IRQ_GPIO0	JZ4740_IRQ(28)
-#define JZ4740_IRQ_IPU		JZ4740_IRQ(29)
-#define JZ4740_IRQ_LCD		JZ4740_IRQ(30)
-
-#define JZ4780_IRQ_TCU2		JZ4740_IRQ(25)
-
-/* 2nd-level interrupts */
-#define JZ4740_IRQ_DMA(x)	(JZ4740_IRQ(NR_INTC_IRQS) + (x))
-
-#define JZ4740_IRQ_INTC_GPIO(x) (JZ4740_IRQ_GPIO0 - (x))
-#define JZ4740_IRQ_GPIO(x)	(JZ4740_IRQ(NR_INTC_IRQS + 16) + (x))
-
-#define JZ4740_IRQ_ADC_BASE	JZ4740_IRQ(NR_INTC_IRQS + 144)
-
-#define NR_IRQS (JZ4740_IRQ_ADC_BASE + 6)
+#define NR_IRQS 256
 
 #endif
diff --git a/arch/mips/include/asm/mach-jz4740/timer.h b/arch/mips/include/asm/mach-jz4740/timer.h
deleted file mode 100644
index 8a19cfe5bed7..000000000000
--- a/arch/mips/include/asm/mach-jz4740/timer.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 platform timer support
- */
-
-#ifndef __ASM_MACH_JZ4740_TIMER
-#define __ASM_MACH_JZ4740_TIMER
-
-#define JZ_REG_TIMER_STOP		0x0C
-#define JZ_REG_TIMER_STOP_SET		0x1C
-#define JZ_REG_TIMER_STOP_CLEAR		0x2C
-#define JZ_REG_TIMER_ENABLE		0x00
-#define JZ_REG_TIMER_ENABLE_SET		0x04
-#define JZ_REG_TIMER_ENABLE_CLEAR	0x08
-#define JZ_REG_TIMER_FLAG		0x10
-#define JZ_REG_TIMER_FLAG_SET		0x14
-#define JZ_REG_TIMER_FLAG_CLEAR		0x18
-#define JZ_REG_TIMER_MASK		0x20
-#define JZ_REG_TIMER_MASK_SET		0x24
-#define JZ_REG_TIMER_MASK_CLEAR		0x28
-
-#define JZ_REG_TIMER_DFR(x) (((x) * 0x10) + 0x30)
-#define JZ_REG_TIMER_DHR(x) (((x) * 0x10) + 0x34)
-#define JZ_REG_TIMER_CNT(x) (((x) * 0x10) + 0x38)
-#define JZ_REG_TIMER_CTRL(x) (((x) * 0x10) + 0x3C)
-
-#define JZ_TIMER_IRQ_HALF(x) BIT((x) + 0x10)
-#define JZ_TIMER_IRQ_FULL(x) BIT(x)
-
-#define JZ_TIMER_CTRL_PWM_ABBRUPT_SHUTDOWN	BIT(9)
-#define JZ_TIMER_CTRL_PWM_ACTIVE_LOW		BIT(8)
-#define JZ_TIMER_CTRL_PWM_ENABLE		BIT(7)
-#define JZ_TIMER_CTRL_PRESCALE_MASK		0x1c
-#define JZ_TIMER_CTRL_PRESCALE_OFFSET		0x3
-#define JZ_TIMER_CTRL_PRESCALE_1		(0 << 3)
-#define JZ_TIMER_CTRL_PRESCALE_4		(1 << 3)
-#define JZ_TIMER_CTRL_PRESCALE_16		(2 << 3)
-#define JZ_TIMER_CTRL_PRESCALE_64		(3 << 3)
-#define JZ_TIMER_CTRL_PRESCALE_256		(4 << 3)
-#define JZ_TIMER_CTRL_PRESCALE_1024		(5 << 3)
-
-#define JZ_TIMER_CTRL_PRESCALER(x) ((x) << JZ_TIMER_CTRL_PRESCALE_OFFSET)
-
-#define JZ_TIMER_CTRL_SRC_EXT		BIT(2)
-#define JZ_TIMER_CTRL_SRC_RTC		BIT(1)
-#define JZ_TIMER_CTRL_SRC_PCLK		BIT(0)
-
-extern void __iomem *jz4740_timer_base;
-void __init jz4740_timer_init(void);
-
-void jz4740_timer_enable_watchdog(void);
-void jz4740_timer_disable_watchdog(void);
-
-static inline void jz4740_timer_stop(unsigned int timer)
-{
-	writel(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_STOP_SET);
-}
-
-static inline void jz4740_timer_start(unsigned int timer)
-{
-	writel(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR);
-}
-
-static inline bool jz4740_timer_is_enabled(unsigned int timer)
-{
-	return readb(jz4740_timer_base + JZ_REG_TIMER_ENABLE) & BIT(timer);
-}
-
-static inline void jz4740_timer_enable(unsigned int timer)
-{
-	writeb(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_ENABLE_SET);
-}
-
-static inline void jz4740_timer_disable(unsigned int timer)
-{
-	writeb(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_ENABLE_CLEAR);
-}
-
-static inline void jz4740_timer_set_period(unsigned int timer, uint16_t period)
-{
-	writew(period, jz4740_timer_base + JZ_REG_TIMER_DFR(timer));
-}
-
-static inline void jz4740_timer_set_duty(unsigned int timer, uint16_t duty)
-{
-	writew(duty, jz4740_timer_base + JZ_REG_TIMER_DHR(timer));
-}
-
-static inline void jz4740_timer_set_count(unsigned int timer, uint16_t count)
-{
-	writew(count, jz4740_timer_base + JZ_REG_TIMER_CNT(timer));
-}
-
-static inline uint16_t jz4740_timer_get_count(unsigned int timer)
-{
-	return readw(jz4740_timer_base + JZ_REG_TIMER_CNT(timer));
-}
-
-static inline void jz4740_timer_ack_full(unsigned int timer)
-{
-	writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_FLAG_CLEAR);
-}
-
-static inline void jz4740_timer_irq_full_enable(unsigned int timer)
-{
-	writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_FLAG_CLEAR);
-	writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_MASK_CLEAR);
-}
-
-static inline void jz4740_timer_irq_full_disable(unsigned int timer)
-{
-	writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_MASK_SET);
-}
-
-static inline void jz4740_timer_set_ctrl(unsigned int timer, uint16_t ctrl)
-{
-	writew(ctrl, jz4740_timer_base + JZ_REG_TIMER_CTRL(timer));
-}
-
-static inline uint16_t jz4740_timer_get_ctrl(unsigned int timer)
-{
-	return readw(jz4740_timer_base + JZ_REG_TIMER_CTRL(timer));
-}
-
-#endif
diff --git a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h
index f03c1c42dd90..10226976f7b7 100644
--- a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h
@@ -46,7 +46,6 @@
 #define cpu_has_64bits		0
 #define cpu_has_64bit_zero_reg	0
 #define cpu_has_64bit_gp_regs	0
-#define cpu_has_64bit_addresses	0
 
 #define cpu_dcache_line_size()	32
 #define cpu_icache_line_size()	32
diff --git a/arch/mips/include/asm/mach-lasat/irq.h b/arch/mips/include/asm/mach-lasat/irq.h
deleted file mode 100644
index e8994921779e..000000000000
--- a/arch/mips/include/asm/mach-lasat/irq.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_MACH_LASAT_IRQ_H
-#define _ASM_MACH_LASAT_IRQ_H
-
-#define LASAT_CASCADE_IRQ	(MIPS_CPU_IRQ_BASE + 2)
-
-#define LASAT_IRQ_BASE		8
-#define LASAT_IRQ_END		23
-
-#define NR_IRQS			24
-
-#include <asm/mach-generic/irq.h>
-
-#endif /* _ASM_MACH_LASAT_IRQ_H */
diff --git a/arch/mips/include/asm/mach-lasat/mach-gt64120.h b/arch/mips/include/asm/mach-lasat/mach-gt64120.h
deleted file mode 100644
index 6666a8871a23..000000000000
--- a/arch/mips/include/asm/mach-lasat/mach-gt64120.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  This is a direct copy of the ev96100.h file, with a global
- * search and replace.	The numbers are the same.
- *
- *  The reason I'm duplicating this is so that the 64120/96100
- * defines won't be confusing in the source code.
- */
-#ifndef _ASM_GT64120_LASAT_GT64120_DEP_H
-#define _ASM_GT64120_LASAT_GT64120_DEP_H
-
-/*
- *   GT64120 config space base address on Lasat 100
- */
-#define GT64120_BASE	(KSEG1ADDR(0x14000000))
-
-/*
- *   PCI Bus allocation
- *
- *   (Guessing ...)
- */
-#define GT_PCI_MEM_BASE 0x12000000UL
-#define GT_PCI_MEM_SIZE 0x02000000UL
-#define GT_PCI_IO_BASE	0x10000000UL
-#define GT_PCI_IO_SIZE	0x02000000UL
-#define GT_ISA_IO_BASE	PCI_IO_BASE
-
-#endif /* _ASM_GT64120_LASAT_GT64120_DEP_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h
index 5008af0a1a19..57e571128489 100644
--- a/arch/mips/include/asm/mach-loongson2ef/loongson.h
+++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h
@@ -244,6 +244,7 @@ static inline void do_perfcnt_IRQ(void)
 #ifdef CONFIG_CPU_SUPPORTS_CPUFREQ
 #include <linux/cpufreq.h>
 extern struct cpufreq_frequency_table loongson2_clockmod_table[];
+extern int loongson2_cpu_set_rate(unsigned long rate_khz);
 #endif
 
 /*
diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h
index 2ed483e32d8c..b35be709f9da 100644
--- a/arch/mips/include/asm/mach-loongson64/boot_param.h
+++ b/arch/mips/include/asm/mach-loongson64/boot_param.h
@@ -192,6 +192,11 @@ struct boot_params {
 	struct efi_reset_system_t reset_system;
 };
 
+enum loongson_bridge_type {
+	LS7A = 1,
+	RS780E = 2
+};
+
 struct loongson_system_configuration {
 	u32 nr_cpus;
 	u32 nr_nodes;
@@ -200,6 +205,7 @@ struct loongson_system_configuration {
 	u16 boot_cpu_id;
 	u16 reserved_cpus_mask;
 	enum loongson_cpu_type cputype;
+	enum loongson_bridge_type bridgetype;
 	u64 ht_control_base;
 	u64 pci_mem_start_addr;
 	u64 pci_mem_end_addr;
@@ -215,9 +221,14 @@ struct loongson_system_configuration {
 	u32 nr_sensors;
 	struct sensor_device sensors[MAX_SENSORS];
 	u64 workarounds;
+	void (*early_config)(void);
 };
 
 extern struct efi_memory_map_loongson *loongson_memmap;
 extern struct loongson_system_configuration loongson_sysconf;
 
+extern u32 node_id_offset;
+extern void ls7a_early_config(void);
+extern void rs780e_early_config(void);
+
 #endif
diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
index 4fab38c743dd..b6e9c99b85a5 100644
--- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
@@ -48,5 +48,6 @@
 #define cpu_hwrena_impl_bits	0xc0000000
 #define cpu_has_mac2008_only	1
 #define cpu_has_mips_r2_exec_hazard 0
+#define cpu_has_perf_cntr_intr_bit	0
 
 #endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-loongson64/cpucfg-emul.h b/arch/mips/include/asm/mach-loongson64/cpucfg-emul.h
new file mode 100644
index 000000000000..d64af19c210d
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson64/cpucfg-emul.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_MACH_LOONGSON64_CPUCFG_EMUL_H_
+#define _ASM_MACH_LOONGSON64_CPUCFG_EMUL_H_
+
+#include <asm/cpu-info.h>
+
+#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION
+
+#include <loongson_regs.h>
+
+#define LOONGSON_FPREV_MASK 0x7
+
+void loongson3_cpucfg_synthesize_data(struct cpuinfo_mips *c);
+
+static inline bool loongson3_cpucfg_emulation_enabled(struct cpuinfo_mips *c)
+{
+	/* All supported cores have non-zero LOONGSON_CFG1 data. */
+	return c->loongson3_cpucfg_data[0] != 0;
+}
+
+static inline u32 loongson3_cpucfg_read_synthesized(struct cpuinfo_mips *c,
+	__u64 sel)
+{
+	switch (sel) {
+	case LOONGSON_CFG0:
+		return c->processor_id;
+	case LOONGSON_CFG1:
+	case LOONGSON_CFG2:
+	case LOONGSON_CFG3:
+		return c->loongson3_cpucfg_data[sel - 1];
+	case LOONGSON_CFG4:
+	case LOONGSON_CFG5:
+		/* CPUCFG selects 4 and 5 are related to the input clock
+		 * signal.
+		 *
+		 * Unimplemented for now.
+		 */
+		return 0;
+	case LOONGSON_CFG6:
+		/* CPUCFG select 6 is for the undocumented Safe Extension. */
+		return 0;
+	case LOONGSON_CFG7:
+		/* CPUCFG select 7 is for the virtualization extension.
+		 * We don't know if the two currently known features are
+		 * supported on older cores according to the public
+		 * documentation, so leave this at zero.
+		 */
+		return 0;
+	}
+
+	/*
+	 * Return 0 for unrecognized CPUCFG selects, which is real hardware
+	 * behavior observed on Loongson 3A R4.
+	 */
+	return 0;
+}
+#else
+static inline void loongson3_cpucfg_synthesize_data(struct cpuinfo_mips *c)
+{
+}
+
+static inline bool loongson3_cpucfg_emulation_enabled(struct cpuinfo_mips *c)
+{
+	return false;
+}
+
+static inline u32 loongson3_cpucfg_read_synthesized(struct cpuinfo_mips *c,
+	__u64 sel)
+{
+	return 0;
+}
+#endif
+
+#endif /* _ASM_MACH_LOONGSON64_CPUCFG_EMUL_H_ */
diff --git a/arch/mips/include/asm/mach-loongson64/loongson_regs.h b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
index 363a47a5d26e..83dbb9fdf9c2 100644
--- a/arch/mips/include/asm/mach-loongson64/loongson_regs.h
+++ b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
@@ -67,6 +67,8 @@ static inline u32 read_cpucfg(u32 reg)
 #define LOONGSON_CFG1_SFBP	BIT(29)
 #define LOONGSON_CFG1_CDMAP	BIT(30)
 
+#define LOONGSON_CFG1_FPREV_OFFSET	1
+
 #define LOONGSON_CFG2 0x2
 #define LOONGSON_CFG2_LEXT1	BIT(0)
 #define LOONGSON_CFG2_LEXT2	BIT(1)
@@ -77,12 +79,12 @@ static inline u32 read_cpucfg(u32 reg)
 #define LOONGSON_CFG2_LBT3	BIT(6)
 #define LOONGSON_CFG2_LBTMMU	BIT(7)
 #define LOONGSON_CFG2_LPMP	BIT(8)
-#define LOONGSON_CFG2_LPMPREV	GENMASK(11, 9)
+#define LOONGSON_CFG2_LPMREV	GENMASK(11, 9)
 #define LOONGSON_CFG2_LAMO	BIT(12)
 #define LOONGSON_CFG2_LPIXU	BIT(13)
-#define LOONGSON_CFG2_LPIXUN	BIT(14)
-#define LOONGSON_CFG2_LZVP	BIT(15)
-#define LOONGSON_CFG2_LZVREV	GENMASK(18, 16)
+#define LOONGSON_CFG2_LPIXNU	BIT(14)
+#define LOONGSON_CFG2_LVZP	BIT(15)
+#define LOONGSON_CFG2_LVZREV	GENMASK(18, 16)
 #define LOONGSON_CFG2_LGFTP	BIT(19)
 #define LOONGSON_CFG2_LGFTPREV	GENMASK(22, 20)
 #define LOONGSON_CFG2_LLFTP	BIT(23)
@@ -90,6 +92,13 @@ static inline u32 read_cpucfg(u32 reg)
 #define LOONGSON_CFG2_LCSRP	BIT(27)
 #define LOONGSON_CFG2_LDISBLIKELY	BIT(28)
 
+#define LOONGSON_CFG2_LPMREV_OFFSET	9
+#define LOONGSON_CFG2_LPM_REV1		(1 << LOONGSON_CFG2_LPMREV_OFFSET)
+#define LOONGSON_CFG2_LPM_REV2		(2 << LOONGSON_CFG2_LPMREV_OFFSET)
+#define LOONGSON_CFG2_LVZREV_OFFSET	16
+#define LOONGSON_CFG2_LVZ_REV1		(1 << LOONGSON_CFG2_LVZREV_OFFSET)
+#define LOONGSON_CFG2_LVZ_REV2		(2 << LOONGSON_CFG2_LVZREV_OFFSET)
+
 #define LOONGSON_CFG3 0x3
 #define LOONGSON_CFG3_LCAMP	BIT(0)
 #define LOONGSON_CFG3_LCAMREV	GENMASK(3, 1)
@@ -97,6 +106,16 @@ static inline u32 read_cpucfg(u32 reg)
 #define LOONGSON_CFG3_LCAMKW	GENMASK(19, 12)
 #define LOONGSON_CFG3_LCAMVW	GENMASK(27, 20)
 
+#define LOONGSON_CFG3_LCAMREV_OFFSET	1
+#define LOONGSON_CFG3_LCAM_REV1		(1 << LOONGSON_CFG3_LCAMREV_OFFSET)
+#define LOONGSON_CFG3_LCAM_REV2		(2 << LOONGSON_CFG3_LCAMREV_OFFSET)
+#define LOONGSON_CFG3_LCAMNUM_OFFSET	4
+#define LOONGSON_CFG3_LCAMNUM_REV1	(0x3f << LOONGSON_CFG3_LCAMNUM_OFFSET)
+#define LOONGSON_CFG3_LCAMKW_OFFSET	12
+#define LOONGSON_CFG3_LCAMKW_REV1	(0x27 << LOONGSON_CFG3_LCAMKW_OFFSET)
+#define LOONGSON_CFG3_LCAMVW_OFFSET	20
+#define LOONGSON_CFG3_LCAMVW_REV1	(0x3f << LOONGSON_CFG3_LCAMVW_OFFSET)
+
 #define LOONGSON_CFG4 0x4
 #define LOONGSON_CFG4_CCFREQ	GENMASK(31, 0)
 
@@ -139,7 +158,7 @@ static inline u64 csr_readq(u32 reg)
 {
 	u64 __res;
 
-	/* DWRCSR reg, val */
+	/* DRDCSR reg, val */
 	__asm__ __volatile__(
 		"parse_r __res,%0\n\t"
 		"parse_r reg,%1\n\t"
diff --git a/arch/mips/include/asm/mach-loongson64/mc146818rtc.h b/arch/mips/include/asm/mach-loongson64/mc146818rtc.h
deleted file mode 100644
index ebdccfee50be..000000000000
--- a/arch/mips/include/asm/mach-loongson64/mc146818rtc.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1998, 2001, 03, 07 by Ralf Baechle (ralf@linux-mips.org)
- *
- * RTC routines for PC style attached Dallas chip.
- */
-#ifndef __ASM_MACH_LOONGSON64_MC146818RTC_H
-#define __ASM_MACH_LOONGSON64_MC146818RTC_H
-
-#include <linux/io.h>
-
-#define RTC_PORT(x)	(0x70 + (x))
-#define RTC_IRQ		8
-
-static inline unsigned char CMOS_READ(unsigned long addr)
-{
-	outb_p(addr, RTC_PORT(0));
-	return inb_p(RTC_PORT(1));
-}
-
-static inline void CMOS_WRITE(unsigned char data, unsigned long addr)
-{
-	outb_p(addr, RTC_PORT(0));
-	outb_p(data, RTC_PORT(1));
-}
-
-#define RTC_ALWAYS_BCD	0
-
-#ifndef mc146818_decode_year
-#define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970)
-#endif
-
-#endif /* __ASM_MACH_LOONGSON64_MC146818RTC_H */
diff --git a/arch/mips/include/asm/mach-loongson64/spaces.h b/arch/mips/include/asm/mach-loongson64/spaces.h
index e85bc1d9c4f2..3de0ac9d8829 100644
--- a/arch/mips/include/asm/mach-loongson64/spaces.h
+++ b/arch/mips/include/asm/mach-loongson64/spaces.h
@@ -6,5 +6,13 @@
 #define CAC_BASE        _AC(0x9800000000000000, UL)
 #endif /* CONFIG_64BIT */
 
+/* Skip 128k to trap NULL pointer dereferences */
+#define PCI_IOBASE	_AC(0xc000000000000000 + SZ_128K, UL)
+#define PCI_IOSIZE	SZ_16M
+#define MAP_BASE	(PCI_IOBASE + PCI_IOSIZE)
+
+/* Reserved at the start of PCI_IOBASE for legacy drivers */
+#define MMIO_LOWER_RESERVED	0x10000
+
 #include <asm/mach-generic/spaces.h>
 #endif
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/cpu-feature-overrides.h b/arch/mips/include/asm/mach-pmcs-msp71xx/cpu-feature-overrides.h
deleted file mode 100644
index 016fa9446ba9..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/cpu-feature-overrides.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2003, 04, 07 Ralf Baechle (ralf@linux-mips.org)
- */
-#ifndef __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H
-#define __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H
-
-#define cpu_has_mips16		1
-#define cpu_has_dsp		1
-/* #define cpu_has_dsp2		??? - do runtime detection */
-#define cpu_has_mipsmt		1
-#define cpu_has_fpu		0
-
-#define cpu_has_mips32r1	0
-#define cpu_has_mips32r2	1
-#define cpu_has_mips64r1	0
-#define cpu_has_mips64r2	0
-
-#endif /* __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_cic_int.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_cic_int.h
deleted file mode 100644
index 50de6876e1c9..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_cic_int.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Defines for the MSP interrupt controller.
- *
- * Copyright (C) 1999 MIPS Technologies, Inc.  All rights reserved.
- * Author: Carsten Langgaard, carstenl@mips.com
- *
- * ########################################################################
- *
- * ########################################################################
- */
-
-#ifndef _MSP_CIC_INT_H
-#define _MSP_CIC_INT_H
-
-/*
- * The PMC-Sierra CIC interrupts are all centrally managed by the
- * CIC sub-system.
- * We attempt to keep the interrupt numbers as consistent as possible
- * across all of the MSP devices, but some differences will creep in ...
- * The interrupts which are directly forwarded to the MIPS core interrupts
- * are assigned interrupts in the range 0-7, interrupts cascaded through
- * the CIC are assigned interrupts 8-39.  The cascade occurs on C_IRQ4
- * (MSP_INT_CIC).  Currently we don't really distinguish between VPE1
- * and VPE0 (or thread contexts for that matter).  Will have to fix.
- * The PER interrupts are assigned interrupts in the range 40-71.
-*/
-
-
-/*
- * IRQs directly forwarded to the CPU
- */
-#define MSP_MIPS_INTBASE	0
-#define MSP_INT_SW0		0	/* IRQ for swint0,	 C_SW0	*/
-#define MSP_INT_SW1		1	/* IRQ for swint1,	 C_SW1	*/
-#define MSP_INT_MAC0		2	/* IRQ for MAC 0,	 C_IRQ0 */
-#define MSP_INT_MAC1		3	/* IRQ for MAC 1,	 C_IRQ1 */
-#define MSP_INT_USB		4	/* IRQ for USB,		 C_IRQ2 */
-#define MSP_INT_SAR		5	/* IRQ for ADSL2+ SAR,	 C_IRQ3 */
-#define MSP_INT_CIC		6	/* IRQ for CIC block,	 C_IRQ4 */
-#define MSP_INT_SEC		7	/* IRQ for Sec engine,	 C_IRQ5 */
-
-/*
- * IRQs cascaded on CPU interrupt 4 (CAUSE bit 12, C_IRQ4)
- * These defines should be tied to the register definitions for the CIC
- * interrupt routine.  For now, just use hard-coded values.
- */
-#define MSP_CIC_INTBASE		(MSP_MIPS_INTBASE + 8)
-#define MSP_INT_EXT0		(MSP_CIC_INTBASE + 0)
-					/* External interrupt 0		*/
-#define MSP_INT_EXT1		(MSP_CIC_INTBASE + 1)
-					/* External interrupt 1		*/
-#define MSP_INT_EXT2		(MSP_CIC_INTBASE + 2)
-					/* External interrupt 2		*/
-#define MSP_INT_EXT3		(MSP_CIC_INTBASE + 3)
-					/* External interrupt 3		*/
-#define MSP_INT_CPUIF		(MSP_CIC_INTBASE + 4)
-					/* CPU interface interrupt	*/
-#define MSP_INT_EXT4		(MSP_CIC_INTBASE + 5)
-					/* External interrupt 4		*/
-#define MSP_INT_CIC_USB		(MSP_CIC_INTBASE + 6)
-					/* Cascaded IRQ for USB		*/
-#define MSP_INT_MBOX		(MSP_CIC_INTBASE + 7)
-					/* Sec engine mailbox IRQ	*/
-#define MSP_INT_EXT5		(MSP_CIC_INTBASE + 8)
-					/* External interrupt 5		*/
-#define MSP_INT_TDM		(MSP_CIC_INTBASE + 9)
-					/* TDM interrupt		*/
-#define MSP_INT_CIC_MAC0	(MSP_CIC_INTBASE + 10)
-					/* Cascaded IRQ for MAC 0	*/
-#define MSP_INT_CIC_MAC1	(MSP_CIC_INTBASE + 11)
-					/* Cascaded IRQ for MAC 1	*/
-#define MSP_INT_CIC_SEC		(MSP_CIC_INTBASE + 12)
-					/* Cascaded IRQ for sec engine	*/
-#define MSP_INT_PER		(MSP_CIC_INTBASE + 13)
-					/* Peripheral interrupt		*/
-#define MSP_INT_TIMER0		(MSP_CIC_INTBASE + 14)
-					/* SLP timer 0			*/
-#define MSP_INT_TIMER1		(MSP_CIC_INTBASE + 15)
-					/* SLP timer 1			*/
-#define MSP_INT_TIMER2		(MSP_CIC_INTBASE + 16)
-					/* SLP timer 2			*/
-#define MSP_INT_VPE0_TIMER	(MSP_CIC_INTBASE + 17)
-					/* VPE0 MIPS timer		*/
-#define MSP_INT_BLKCP		(MSP_CIC_INTBASE + 18)
-					/* Block Copy			*/
-#define MSP_INT_UART0		(MSP_CIC_INTBASE + 19)
-					/* UART 0			*/
-#define MSP_INT_PCI		(MSP_CIC_INTBASE + 20)
-					/* PCI subsystem		*/
-#define MSP_INT_EXT6		(MSP_CIC_INTBASE + 21)
-					/* External interrupt 5		*/
-#define MSP_INT_PCI_MSI		(MSP_CIC_INTBASE + 22)
-					/* PCI Message Signal		*/
-#define MSP_INT_CIC_SAR		(MSP_CIC_INTBASE + 23)
-					/* Cascaded ADSL2+ SAR IRQ	*/
-#define MSP_INT_DSL		(MSP_CIC_INTBASE + 24)
-					/* ADSL2+ IRQ			*/
-#define MSP_INT_CIC_ERR		(MSP_CIC_INTBASE + 25)
-					/* SLP error condition		*/
-#define MSP_INT_VPE1_TIMER	(MSP_CIC_INTBASE + 26)
-					/* VPE1 MIPS timer		*/
-#define MSP_INT_VPE0_PC		(MSP_CIC_INTBASE + 27)
-					/* VPE0 Performance counter	*/
-#define MSP_INT_VPE1_PC		(MSP_CIC_INTBASE + 28)
-					/* VPE1 Performance counter	*/
-#define MSP_INT_EXT7		(MSP_CIC_INTBASE + 29)
-					/* External interrupt 5		*/
-#define MSP_INT_VPE0_SW		(MSP_CIC_INTBASE + 30)
-					/* VPE0 Software interrupt	*/
-#define MSP_INT_VPE1_SW		(MSP_CIC_INTBASE + 31)
-					/* VPE0 Software interrupt	*/
-
-/*
- * IRQs cascaded on CIC PER interrupt (MSP_INT_PER)
- */
-#define MSP_PER_INTBASE		(MSP_CIC_INTBASE + 32)
-/* Reserved					   0-1			*/
-#define MSP_INT_UART1		(MSP_PER_INTBASE + 2)
-					/* UART 1			*/
-/* Reserved					   3-5			*/
-#define MSP_INT_2WIRE		(MSP_PER_INTBASE + 6)
-					/* 2-wire			*/
-#define MSP_INT_TM0		(MSP_PER_INTBASE + 7)
-					/* Peripheral timer block out 0 */
-#define MSP_INT_TM1		(MSP_PER_INTBASE + 8)
-					/* Peripheral timer block out 1 */
-/* Reserved					   9			*/
-#define MSP_INT_SPRX		(MSP_PER_INTBASE + 10)
-					/* SPI RX complete		*/
-#define MSP_INT_SPTX		(MSP_PER_INTBASE + 11)
-					/* SPI TX complete		*/
-#define MSP_INT_GPIO		(MSP_PER_INTBASE + 12)
-					/* GPIO				*/
-#define MSP_INT_PER_ERR		(MSP_PER_INTBASE + 13)
-					/* Peripheral error		*/
-/* Reserved					   14-31		*/
-
-#endif /* !_MSP_CIC_INT_H */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_gpio_macros.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_gpio_macros.h
deleted file mode 100644
index daacebb047c2..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_gpio_macros.h
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- *
- * Macros for external SMP-safe access to the PMC MSP71xx reference
- * board GPIO pins
- *
- * Copyright 2010 PMC-Sierra, Inc.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __MSP_GPIO_MACROS_H__
-#define __MSP_GPIO_MACROS_H__
-
-#include <msp_regops.h>
-#include <msp_regs.h>
-
-#ifdef CONFIG_PMC_MSP7120_GW
-#define MSP_NUM_GPIOS		20
-#else
-#define MSP_NUM_GPIOS		28
-#endif
-
-/* -- GPIO Enumerations -- */
-enum msp_gpio_data {
-	MSP_GPIO_LO = 0,
-	MSP_GPIO_HI = 1,
-	MSP_GPIO_NONE,		/* Special - Means pin is out of range */
-	MSP_GPIO_TOGGLE,	/* Special - Sets pin to opposite */
-};
-
-enum msp_gpio_mode {
-	MSP_GPIO_INPUT		= 0x0,
-	/* MSP_GPIO_ INTERRUPT	= 0x1,	Not supported yet */
-	MSP_GPIO_UART_INPUT	= 0x2,	/* Only GPIO 4 or 5 */
-	MSP_GPIO_OUTPUT		= 0x8,
-	MSP_GPIO_UART_OUTPUT	= 0x9,	/* Only GPIO 2 or 3 */
-	MSP_GPIO_PERIF_TIMERA	= 0x9,	/* Only GPIO 0 or 1 */
-	MSP_GPIO_PERIF_TIMERB	= 0xa,	/* Only GPIO 0 or 1 */
-	MSP_GPIO_UNKNOWN	= 0xb,	/* No such GPIO or mode */
-};
-
-/* -- Static Tables -- */
-
-/* Maps pins to data register */
-static volatile u32 * const MSP_GPIO_DATA_REGISTER[] = {
-	/* GPIO 0 and 1 on the first register */
-	GPIO_DATA1_REG, GPIO_DATA1_REG,
-	/* GPIO 2, 3, 4, and 5 on the second register */
-	GPIO_DATA2_REG, GPIO_DATA2_REG, GPIO_DATA2_REG, GPIO_DATA2_REG,
-	/* GPIO 6, 7, 8, and 9 on the third register */
-	GPIO_DATA3_REG, GPIO_DATA3_REG, GPIO_DATA3_REG, GPIO_DATA3_REG,
-	/* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */
-	GPIO_DATA4_REG, GPIO_DATA4_REG, GPIO_DATA4_REG, GPIO_DATA4_REG,
-	GPIO_DATA4_REG, GPIO_DATA4_REG,
-	/* GPIO 16 - 23 on the first strange EXTENDED register */
-	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
-	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
-	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
-	/* GPIO 24 - 27 on the second strange EXTENDED register */
-	EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG,
-	EXTENDED_GPIO2_REG,
-};
-
-/* Maps pins to mode register */
-static volatile u32 * const MSP_GPIO_MODE_REGISTER[] = {
-	/* GPIO 0 and 1 on the first register */
-	GPIO_CFG1_REG, GPIO_CFG1_REG,
-	/* GPIO 2, 3, 4, and 5 on the second register */
-	GPIO_CFG2_REG, GPIO_CFG2_REG, GPIO_CFG2_REG, GPIO_CFG2_REG,
-	/* GPIO 6, 7, 8, and 9 on the third register */
-	GPIO_CFG3_REG, GPIO_CFG3_REG, GPIO_CFG3_REG, GPIO_CFG3_REG,
-	/* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */
-	GPIO_CFG4_REG, GPIO_CFG4_REG, GPIO_CFG4_REG, GPIO_CFG4_REG,
-	GPIO_CFG4_REG, GPIO_CFG4_REG,
-	/* GPIO 16 - 23 on the first strange EXTENDED register */
-	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
-	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
-	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
-	/* GPIO 24 - 27 on the second strange EXTENDED register */
-	EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG,
-	EXTENDED_GPIO2_REG,
-};
-
-/* Maps 'basic' pins to relative offset from 0 per register */
-static int MSP_GPIO_OFFSET[] = {
-	/* GPIO 0 and 1 on the first register */
-	0, 0,
-	/* GPIO 2, 3, 4, and 5 on the second register */
-	2, 2, 2, 2,
-	/* GPIO 6, 7, 8, and 9 on the third register */
-	6, 6, 6, 6,
-	/* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */
-	10, 10, 10, 10, 10, 10,
-};
-
-/* Maps MODE to allowed pin mask */
-static unsigned int MSP_GPIO_MODE_ALLOWED[] = {
-	0xffffffff,	/* Mode 0 - INPUT */
-	0x00000,	/* Mode 1 - INTERRUPT */
-	0x00030,	/* Mode 2 - UART_INPUT (GPIO 4, 5)*/
-	0, 0, 0, 0, 0,	/* Modes 3, 4, 5, 6, and 7 are reserved */
-	0xffffffff,	/* Mode 8 - OUTPUT */
-	0x0000f,	/* Mode 9 - UART_OUTPUT/
-				PERF_TIMERA (GPIO 0, 1, 2, 3) */
-	0x00003,	/* Mode a - PERF_TIMERB (GPIO 0, 1) */
-	0x00000,	/* Mode b - Not really a mode! */
-};
-
-/* -- Bit masks -- */
-
-/* This gives you the 'register relative offset gpio' number */
-#define OFFSET_GPIO_NUMBER(gpio)	(gpio - MSP_GPIO_OFFSET[gpio])
-
-/* These take the 'register relative offset gpio' number */
-#define BASIC_DATA_REG_MASK(ogpio)		(1 << ogpio)
-#define BASIC_MODE_REG_VALUE(mode, ogpio)	\
-	(mode << BASIC_MODE_REG_SHIFT(ogpio))
-#define BASIC_MODE_REG_MASK(ogpio)		\
-	BASIC_MODE_REG_VALUE(0xf, ogpio)
-#define BASIC_MODE_REG_SHIFT(ogpio)		(ogpio * 4)
-#define BASIC_MODE_REG_FROM_REG(data, ogpio)	\
-	((data & BASIC_MODE_REG_MASK(ogpio)) >> BASIC_MODE_REG_SHIFT(ogpio))
-
-/* These take the actual GPIO number (0 through 15) */
-#define BASIC_DATA_MASK(gpio)	\
-	BASIC_DATA_REG_MASK(OFFSET_GPIO_NUMBER(gpio))
-#define BASIC_MODE_MASK(gpio)	\
-	BASIC_MODE_REG_MASK(OFFSET_GPIO_NUMBER(gpio))
-#define BASIC_MODE(mode, gpio)	\
-	BASIC_MODE_REG_VALUE(mode, OFFSET_GPIO_NUMBER(gpio))
-#define BASIC_MODE_SHIFT(gpio)	\
-	BASIC_MODE_REG_SHIFT(OFFSET_GPIO_NUMBER(gpio))
-#define BASIC_MODE_FROM_REG(data, gpio) \
-	BASIC_MODE_REG_FROM_REG(data, OFFSET_GPIO_NUMBER(gpio))
-
-/*
- * Each extended GPIO register is 32 bits long and is responsible for up to
- * eight GPIOs. The least significant 16 bits contain the set and clear bit
- * pair for each of the GPIOs. The most significant 16 bits contain the
- * disable and enable bit pair for each of the GPIOs. For example, the
- * extended GPIO reg for GPIOs 16-23 is as follows:
- *
- *	31: GPIO23_DISABLE
- *	...
- *	19: GPIO17_DISABLE
- *	18: GPIO17_ENABLE
- *	17: GPIO16_DISABLE
- *	16: GPIO16_ENABLE
- *	...
- *	3:  GPIO17_SET
- *	2:  GPIO17_CLEAR
- *	1:  GPIO16_SET
- *	0:  GPIO16_CLEAR
- */
-
-/* This gives the 'register relative offset gpio' number */
-#define EXTENDED_OFFSET_GPIO(gpio)	(gpio < 24 ? gpio - 16 : gpio - 24)
-
-/* These take the 'register relative offset gpio' number */
-#define EXTENDED_REG_DISABLE(ogpio)	(0x2 << ((ogpio * 2) + 16))
-#define EXTENDED_REG_ENABLE(ogpio)	(0x1 << ((ogpio * 2) + 16))
-#define EXTENDED_REG_SET(ogpio)		(0x2 << (ogpio * 2))
-#define EXTENDED_REG_CLR(ogpio)		(0x1 << (ogpio * 2))
-
-/* These take the actual GPIO number (16 through 27) */
-#define EXTENDED_DISABLE(gpio)	\
-	EXTENDED_REG_DISABLE(EXTENDED_OFFSET_GPIO(gpio))
-#define EXTENDED_ENABLE(gpio)	\
-	EXTENDED_REG_ENABLE(EXTENDED_OFFSET_GPIO(gpio))
-#define EXTENDED_SET(gpio)	\
-	EXTENDED_REG_SET(EXTENDED_OFFSET_GPIO(gpio))
-#define EXTENDED_CLR(gpio)	\
-	EXTENDED_REG_CLR(EXTENDED_OFFSET_GPIO(gpio))
-
-#define EXTENDED_FULL_MASK		(0xffffffff)
-
-/* -- API inline-functions -- */
-
-/*
- * Gets the current value of the specified pin
- */
-static inline enum msp_gpio_data msp_gpio_pin_get(unsigned int gpio)
-{
-	u32 pinhi_mask = 0, pinhi_mask2 = 0;
-
-	if (gpio >= MSP_NUM_GPIOS)
-		return MSP_GPIO_NONE;
-
-	if (gpio < 16) {
-		pinhi_mask = BASIC_DATA_MASK(gpio);
-	} else {
-		/*
-		 * Two cases are possible with the EXTENDED register:
-		 *  - In output mode (ENABLED flag set), check the CLR bit
-		 *  - In input mode (ENABLED flag not set), check the SET bit
-		 */
-		pinhi_mask = EXTENDED_ENABLE(gpio) | EXTENDED_CLR(gpio);
-		pinhi_mask2 = EXTENDED_SET(gpio);
-	}
-	if (((*MSP_GPIO_DATA_REGISTER[gpio] & pinhi_mask) == pinhi_mask) ||
-	    (*MSP_GPIO_DATA_REGISTER[gpio] & pinhi_mask2))
-		return MSP_GPIO_HI;
-	else
-		return MSP_GPIO_LO;
-}
-
-/* Sets the specified pin to the specified value */
-static inline void msp_gpio_pin_set(enum msp_gpio_data data, unsigned int gpio)
-{
-	if (gpio >= MSP_NUM_GPIOS)
-		return;
-
-	if (gpio < 16) {
-		if (data == MSP_GPIO_TOGGLE)
-			toggle_reg32(MSP_GPIO_DATA_REGISTER[gpio],
-					BASIC_DATA_MASK(gpio));
-		else if (data == MSP_GPIO_HI)
-			set_reg32(MSP_GPIO_DATA_REGISTER[gpio],
-					BASIC_DATA_MASK(gpio));
-		else
-			clear_reg32(MSP_GPIO_DATA_REGISTER[gpio],
-					BASIC_DATA_MASK(gpio));
-	} else {
-		if (data == MSP_GPIO_TOGGLE) {
-			/* Special ugly case:
-			 *   We have to read the CLR bit.
-			 *   If set, we write the CLR bit.
-			 *   If not, we write the SET bit.
-			 */
-			u32 tmpdata;
-
-			custom_read_reg32(MSP_GPIO_DATA_REGISTER[gpio],
-								tmpdata);
-			if (tmpdata & EXTENDED_CLR(gpio))
-				tmpdata = EXTENDED_CLR(gpio);
-			else
-				tmpdata = EXTENDED_SET(gpio);
-			custom_write_reg32(MSP_GPIO_DATA_REGISTER[gpio],
-								tmpdata);
-		} else {
-			u32 newdata;
-
-			if (data == MSP_GPIO_HI)
-				newdata = EXTENDED_SET(gpio);
-			else
-				newdata = EXTENDED_CLR(gpio);
-			set_value_reg32(MSP_GPIO_DATA_REGISTER[gpio],
-						EXTENDED_FULL_MASK, newdata);
-		}
-	}
-}
-
-/* Sets the specified pin to the specified value */
-static inline void msp_gpio_pin_hi(unsigned int gpio)
-{
-	msp_gpio_pin_set(MSP_GPIO_HI, gpio);
-}
-
-/* Sets the specified pin to the specified value */
-static inline void msp_gpio_pin_lo(unsigned int gpio)
-{
-	msp_gpio_pin_set(MSP_GPIO_LO, gpio);
-}
-
-/* Sets the specified pin to the opposite value */
-static inline void msp_gpio_pin_toggle(unsigned int gpio)
-{
-	msp_gpio_pin_set(MSP_GPIO_TOGGLE, gpio);
-}
-
-/* Gets the mode of the specified pin */
-static inline enum msp_gpio_mode msp_gpio_pin_get_mode(unsigned int gpio)
-{
-	enum msp_gpio_mode retval = MSP_GPIO_UNKNOWN;
-	uint32_t data;
-
-	if (gpio >= MSP_NUM_GPIOS)
-		return retval;
-
-	data = *MSP_GPIO_MODE_REGISTER[gpio];
-
-	if (gpio < 16) {
-		retval = BASIC_MODE_FROM_REG(data, gpio);
-	} else {
-		/* Extended pins can only be either INPUT or OUTPUT */
-		if (data & EXTENDED_ENABLE(gpio))
-			retval = MSP_GPIO_OUTPUT;
-		else
-			retval = MSP_GPIO_INPUT;
-	}
-
-	return retval;
-}
-
-/*
- * Sets the specified mode on the requested pin
- * Returns 0 on success, or -1 if that mode is not allowed on this pin
- */
-static inline int msp_gpio_pin_mode(enum msp_gpio_mode mode, unsigned int gpio)
-{
-	u32 modemask, newmode;
-
-	if ((1 << gpio) & ~MSP_GPIO_MODE_ALLOWED[mode])
-		return -1;
-
-	if (gpio >= MSP_NUM_GPIOS)
-		return -1;
-
-	if (gpio < 16) {
-		modemask = BASIC_MODE_MASK(gpio);
-		newmode =  BASIC_MODE(mode, gpio);
-	} else {
-		modemask = EXTENDED_FULL_MASK;
-		if (mode == MSP_GPIO_INPUT)
-			newmode = EXTENDED_DISABLE(gpio);
-		else
-			newmode = EXTENDED_ENABLE(gpio);
-	}
-	/* Do the set atomically */
-	set_value_reg32(MSP_GPIO_MODE_REGISTER[gpio], modemask, newmode);
-
-	return 0;
-}
-
-#endif /* __MSP_GPIO_MACROS_H__ */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_int.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_int.h
deleted file mode 100644
index 55078b40f5b5..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_int.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Defines for the MSP interrupt handlers.
- *
- * Copyright (C) 2005, PMC-Sierra, Inc.	 All rights reserved.
- * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com
- *
- * ########################################################################
- *
- * ########################################################################
- */
-
-#ifndef _MSP_INT_H
-#define _MSP_INT_H
-
-/*
- * The PMC-Sierra MSP product line has at least two different interrupt
- * controllers, the SLP register based scheme and the CIC interrupt
- * controller block mechanism.	This file distinguishes between them
- * so that devices see a uniform interface.
- */
-
-#if defined(CONFIG_IRQ_MSP_SLP)
-	#include "msp_slp_int.h"
-#elif defined(CONFIG_IRQ_MSP_CIC)
-	#include "msp_cic_int.h"
-#else
-	#error "What sort of interrupt controller does *your* MSP have?"
-#endif
-
-#endif /* !_MSP_INT_H */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_pci.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_pci.h
deleted file mode 100644
index 5b2535efceb2..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_pci.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2000-2006 PMC-Sierra INC.
- *
- * PMC-SIERRA INC. DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS
- * SOFTWARE.
- */
-
-#ifndef _MSP_PCI_H_
-#define _MSP_PCI_H_
-
-#define MSP_HAS_PCI(ID) (((u32)(ID) <= 0x4236) && ((u32)(ID) >= 0x4220))
-
-/*
- * It is convenient to program the OATRAN register so that
- * Athena virtual address space and PCI address space are
- * the same. This is not a requirement, just a convenience.
- *
- * The only hard restrictions on the value of OATRAN is that
- * OATRAN must not be programmed to allow translated memory
- * addresses to fall within the lowest 512MB of
- * PCI address space. This region is hardcoded
- * for use as Athena PCI Host Controller target
- * access memory space to the Athena's SDRAM.
- *
- * Note that OATRAN applies only to memory accesses, not
- * to I/O accesses.
- *
- * To program OATRAN to make Athena virtual address space
- * and PCI address space have the same values, OATRAN
- * is to be programmed to 0xB8000000. The top seven
- * bits of the value mimic the seven bits clipped off
- * by the PCI Host controller.
- *
- * With OATRAN at the said value, when the CPU does
- * an access to its virtual address at, say 0xB900_5000,
- * the address appearing on the PCI bus will be
- * 0xB900_5000.
- *    - Michael Penner
- */
-#define MSP_PCI_OATRAN		0xB8000000UL
-
-#define MSP_PCI_SPACE_BASE	(MSP_PCI_OATRAN + 0x1002000UL)
-#define MSP_PCI_SPACE_SIZE	(0x3000000UL - 0x2000)
-#define MSP_PCI_SPACE_END \
-		(MSP_PCI_SPACE_BASE + MSP_PCI_SPACE_SIZE - 1)
-#define MSP_PCI_IOSPACE_BASE	(MSP_PCI_OATRAN + 0x1001000UL)
-#define MSP_PCI_IOSPACE_SIZE	0x1000
-#define MSP_PCI_IOSPACE_END  \
-		(MSP_PCI_IOSPACE_BASE + MSP_PCI_IOSPACE_SIZE - 1)
-
-/* IRQ for PCI status interrupts */
-#define PCI_STAT_IRQ	20
-
-#define QFLUSH_REG_1	0xB7F40000
-
-typedef volatile unsigned int pcireg;
-typedef void * volatile ppcireg;
-
-struct pci_block_copy
-{
-    pcireg   unused1; /* +0x00 */
-    pcireg   unused2; /* +0x04 */
-    ppcireg  unused3; /* +0x08 */
-    ppcireg  unused4; /* +0x0C */
-    pcireg   unused5; /* +0x10 */
-    pcireg   unused6; /* +0x14 */
-    pcireg   unused7; /* +0x18 */
-    ppcireg  unused8; /* +0x1C */
-    ppcireg  unused9; /* +0x20 */
-    pcireg   unusedA; /* +0x24 */
-    ppcireg  unusedB; /* +0x28 */
-    ppcireg  unusedC; /* +0x2C */
-};
-
-enum
-{
-    config_device_vendor,  /* 0 */
-    config_status_command, /* 1 */
-    config_class_revision, /* 2 */
-    config_BIST_header_latency_cache, /* 3 */
-    config_BAR0,	   /* 4 */
-    config_BAR1,	   /* 5 */
-    config_BAR2,	   /* 6 */
-    config_not_used7,	   /* 7 */
-    config_not_used8,	   /* 8 */
-    config_not_used9,	   /* 9 */
-    config_CIS,		   /* 10 */
-    config_subsystem,	   /* 11 */
-    config_not_used12,	   /* 12 */
-    config_capabilities,   /* 13 */
-    config_not_used14,	   /* 14 */
-    config_lat_grant_irq,  /* 15 */
-    config_message_control,/* 16 */
-    config_message_addr,   /* 17 */
-    config_message_data,   /* 18 */
-    config_VPD_addr,	   /* 19 */
-    config_VPD_data,	   /* 20 */
-    config_maxregs	   /* 21 - number of registers */
-};
-
-struct msp_pci_regs
-{
-    pcireg hop_unused_00; /* +0x00 */
-    pcireg hop_unused_04; /* +0x04 */
-    pcireg hop_unused_08; /* +0x08 */
-    pcireg hop_unused_0C; /* +0x0C */
-    pcireg hop_unused_10; /* +0x10 */
-    pcireg hop_unused_14; /* +0x14 */
-    pcireg hop_unused_18; /* +0x18 */
-    pcireg hop_unused_1C; /* +0x1C */
-    pcireg hop_unused_20; /* +0x20 */
-    pcireg hop_unused_24; /* +0x24 */
-    pcireg hop_unused_28; /* +0x28 */
-    pcireg hop_unused_2C; /* +0x2C */
-    pcireg hop_unused_30; /* +0x30 */
-    pcireg hop_unused_34; /* +0x34 */
-    pcireg if_control;	  /* +0x38 */
-    pcireg oatran;	  /* +0x3C */
-    pcireg reset_ctl;	  /* +0x40 */
-    pcireg config_addr;	  /* +0x44 */
-    pcireg hop_unused_48; /* +0x48 */
-    pcireg msg_signaled_int_status; /* +0x4C */
-    pcireg msg_signaled_int_mask;   /* +0x50 */
-    pcireg if_status;	  /* +0x54 */
-    pcireg if_mask;	  /* +0x58 */
-    pcireg hop_unused_5C; /* +0x5C */
-    pcireg hop_unused_60; /* +0x60 */
-    pcireg hop_unused_64; /* +0x64 */
-    pcireg hop_unused_68; /* +0x68 */
-    pcireg hop_unused_6C; /* +0x6C */
-    pcireg hop_unused_70; /* +0x70 */
-
-    struct pci_block_copy pci_bc[2] __attribute__((aligned(64)));
-
-    pcireg error_hdr1; /* +0xE0 */
-    pcireg error_hdr2; /* +0xE4 */
-
-    pcireg config[config_maxregs] __attribute__((aligned(256)));
-
-};
-
-#define BPCI_CFGADDR_BUSNUM_SHF 16
-#define BPCI_CFGADDR_FUNCTNUM_SHF 8
-#define BPCI_CFGADDR_REGNUM_SHF 2
-#define BPCI_CFGADDR_ENABLE (1<<31)
-
-#define BPCI_IFCONTROL_RTO (1<<20) /* Retry timeout */
-#define BPCI_IFCONTROL_HCE (1<<16) /* Host configuration enable */
-#define BPCI_IFCONTROL_CTO_SHF 12  /* Shift count for CTO bits */
-#define BPCI_IFCONTROL_SE  (1<<5)  /* Enable exceptions on errors */
-#define BPCI_IFCONTROL_BIST (1<<4) /* Use BIST in per. mode */
-#define BPCI_IFCONTROL_CAP (1<<3)  /* Enable capabilities */
-#define BPCI_IFCONTROL_MMC_SHF 0   /* Shift count for MMC bits */
-
-#define BPCI_IFSTATUS_MGT  (1<<8)  /* Master Grant timeout */
-#define BPCI_IFSTATUS_MTT  (1<<9)  /* Master TRDY timeout */
-#define BPCI_IFSTATUS_MRT  (1<<10) /* Master retry timeout */
-#define BPCI_IFSTATUS_BC0F (1<<13) /* Block copy 0 fault */
-#define BPCI_IFSTATUS_BC1F (1<<14) /* Block copy 1 fault */
-#define BPCI_IFSTATUS_PCIU (1<<15) /* PCI unable to respond */
-#define BPCI_IFSTATUS_BSIZ (1<<16) /* PCI access with illegal size */
-#define BPCI_IFSTATUS_BADD (1<<17) /* PCI access with illegal addr */
-#define BPCI_IFSTATUS_RTO  (1<<18) /* Retry time out */
-#define BPCI_IFSTATUS_SER  (1<<19) /* System error */
-#define BPCI_IFSTATUS_PER  (1<<20) /* Parity error */
-#define BPCI_IFSTATUS_LCA  (1<<21) /* Local CPU abort */
-#define BPCI_IFSTATUS_MEM  (1<<22) /* Memory prot. violation */
-#define BPCI_IFSTATUS_ARB  (1<<23) /* Arbiter timed out */
-#define BPCI_IFSTATUS_STA  (1<<27) /* Signaled target abort */
-#define BPCI_IFSTATUS_TA   (1<<28) /* Target abort */
-#define BPCI_IFSTATUS_MA   (1<<29) /* Master abort */
-#define BPCI_IFSTATUS_PEI  (1<<30) /* Parity error as initiator */
-#define BPCI_IFSTATUS_PET  (1<<31) /* Parity error as target */
-
-#define BPCI_RESETCTL_PR (1<<0)	   /* True if reset asserted */
-#define BPCI_RESETCTL_RT (1<<4)	   /* Release time */
-#define BPCI_RESETCTL_CT (1<<8)	   /* Config time */
-#define BPCI_RESETCTL_PE (1<<12)   /* PCI enabled */
-#define BPCI_RESETCTL_HM (1<<13)   /* PCI host mode */
-#define BPCI_RESETCTL_RI (1<<14)   /* PCI reset in */
-
-extern struct msp_pci_regs msp_pci_regs
-			__attribute__((section(".register")));
-extern unsigned long msp_pci_config_space
-			__attribute__((section(".register")));
-
-#endif /* !_MSP_PCI_H_ */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_prom.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_prom.h
deleted file mode 100644
index 4120a01c30a9..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_prom.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * MIPS boards bootprom interface for the Linux kernel.
- *
- * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
- * Author: Carsten Langgaard, carstenl@mips.com
- *
- * ########################################################################
- *
- * ########################################################################
- */
-
-#ifndef _ASM_MSP_PROM_H
-#define _ASM_MSP_PROM_H
-
-#include <linux/types.h>
-
-#define DEVICEID			"deviceid"
-#define FEATURES			"features"
-#define PROM_ENV			"prom_env"
-#define PROM_ENV_FILE			"/proc/"PROM_ENV
-#define PROM_ENV_SIZE			256
-
-#define CPU_DEVID_FAMILY		0x0000ff00
-#define CPU_DEVID_REVISION		0x000000ff
-
-#define FPGA_IS_POLO(revision) \
-		(((revision >= 0xb0) && (revision < 0xd0)))
-#define FPGA_IS_5000(revision) \
-		((revision >= 0x80) && (revision <= 0x90))
-#define FPGA_IS_ZEUS(revision)		((revision < 0x7f))
-#define FPGA_IS_DUET(revision) \
-		(((revision >= 0xa0) && (revision < 0xb0)))
-#define FPGA_IS_MSP4200(revision)	((revision >= 0xd0))
-#define FPGA_IS_MSP7100(revision)	((revision >= 0xd0))
-
-#define MACHINE_TYPE_POLO		"POLO"
-#define MACHINE_TYPE_DUET		"DUET"
-#define MACHINE_TYPE_ZEUS		"ZEUS"
-#define MACHINE_TYPE_MSP2000REVB	"MSP2000REVB"
-#define MACHINE_TYPE_MSP5000		"MSP5000"
-#define MACHINE_TYPE_MSP4200		"MSP4200"
-#define MACHINE_TYPE_MSP7120		"MSP7120"
-#define MACHINE_TYPE_MSP7130		"MSP7130"
-#define MACHINE_TYPE_OTHER		"OTHER"
-
-#define MACHINE_TYPE_POLO_FPGA		"POLO-FPGA"
-#define MACHINE_TYPE_DUET_FPGA		"DUET-FPGA"
-#define MACHINE_TYPE_ZEUS_FPGA		"ZEUS_FPGA"
-#define MACHINE_TYPE_MSP2000REVB_FPGA	"MSP2000REVB-FPGA"
-#define MACHINE_TYPE_MSP5000_FPGA	"MSP5000-FPGA"
-#define MACHINE_TYPE_MSP4200_FPGA	"MSP4200-FPGA"
-#define MACHINE_TYPE_MSP7100_FPGA	"MSP7100-FPGA"
-#define MACHINE_TYPE_OTHER_FPGA		"OTHER-FPGA"
-
-/* Device Family definitions */
-#define FAMILY_FPGA			0x0000
-#define FAMILY_ZEUS			0x1000
-#define FAMILY_POLO			0x2000
-#define FAMILY_DUET			0x4000
-#define FAMILY_TRIAD			0x5000
-#define FAMILY_MSP4200			0x4200
-#define FAMILY_MSP4200_FPGA		0x4f00
-#define FAMILY_MSP7100			0x7100
-#define FAMILY_MSP7100_FPGA		0x7f00
-
-/* Device Type definitions */
-#define TYPE_MSP7120			0x7120
-#define TYPE_MSP7130			0x7130
-
-#define ENET_KEY		'E'
-#define ENETTXD_KEY		'e'
-#define PCI_KEY			'P'
-#define PCIMUX_KEY		'p'
-#define SEC_KEY			'S'
-#define SPAD_KEY		'D'
-#define TDM_KEY			'T'
-#define ZSP_KEY			'Z'
-
-#define FEATURE_NOEXIST		'-'
-#define FEATURE_EXIST		'+'
-
-#define ENET_MII		'M'
-#define ENET_RMII		'R'
-
-#define ENETTXD_FALLING		'F'
-#define ENETTXD_RISING		'R'
-
-#define PCI_HOST		'H'
-#define PCI_PERIPHERAL		'P'
-
-#define PCIMUX_FULL		'F'
-#define PCIMUX_SINGLE		'S'
-
-#define SEC_DUET		'D'
-#define SEC_POLO		'P'
-#define SEC_SLOW		'S'
-#define SEC_TRIAD		'T'
-
-#define SPAD_POLO		'P'
-
-#define TDM_DUET		'D'	/* DUET TDMs might exist */
-#define TDM_POLO		'P'	/* POLO TDMs might exist */
-#define TDM_TRIAD		'T'	/* TRIAD TDMs might exist */
-
-#define ZSP_DUET		'D'	/* one DUET zsp engine */
-#define ZSP_TRIAD		'T'	/* two TRIAD zsp engines */
-
-extern char *prom_getenv(char *name);
-extern void prom_init_cmdline(void);
-extern void prom_meminit(void);
-extern void prom_fixup_mem_map(unsigned long start_mem,
-			       unsigned long end_mem);
-
-extern int get_ethernet_addr(char *ethaddr_name, char *ethernet_addr);
-extern unsigned long get_deviceid(void);
-extern char identify_enet(unsigned long interface_num);
-extern char identify_enetTxD(unsigned long interface_num);
-extern char identify_pci(void);
-extern char identify_sec(void);
-extern char identify_spad(void);
-extern char identify_sec(void);
-extern char identify_tdm(void);
-extern char identify_zsp(void);
-extern unsigned long identify_family(void);
-extern unsigned long identify_revision(void);
-
-/*
- * The following macro calls prom_printf and puts the format string
- * into an init section so it can be reclaimed.
- */
-#define ppfinit(f, x...) \
-	do { \
-		static char _f[] __initdata = KERN_INFO f; \
-		printk(_f, ## x); \
-	} while (0)
-
-/* Memory descriptor management. */
-#define PROM_MAX_PMEMBLOCKS    7	/* 6 used */
-
-enum yamon_memtypes {
-	yamon_dontuse,
-	yamon_prom,
-	yamon_free,
-};
-
-struct prom_pmemblock {
-	unsigned long base; /* Within KSEG0. */
-	unsigned int size;  /* In bytes. */
-	unsigned int type;  /* free or prom memory */
-};
-
-extern int prom_argc;
-extern char **prom_argv;
-extern char **prom_envp;
-extern int *prom_vec;
-extern struct prom_pmemblock *prom_getmdesc(void);
-
-#endif /* !_ASM_MSP_PROM_H */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h
deleted file mode 100644
index 90dbe43c8d27..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * SMP/VPE-safe functions to access "registers" (see note).
- *
- * NOTES:
-* - These macros use ll/sc instructions, so it is your responsibility to
- * ensure these are available on your platform before including this file.
- * - The MIPS32 spec states that ll/sc results are undefined for uncached
- * accesses. This means they can't be used on HW registers accessed
- * through kseg1. Code which requires these macros for this purpose must
- * front-end the registers with cached memory "registers" and have a single
- * thread update the actual HW registers.
- * - A maximum of 2k of code can be inserted between ll and sc. Every
- * memory accesses between the instructions will increase the chance of
- * sc failing and having to loop.
- * - When using custom_read_reg32/custom_write_reg32 only perform the
- * necessary logical operations on the register value in between these
- * two calls. All other logic should be performed before the first call.
-  * - There is a bug on the R10000 chips which has a workaround. If you
- * are affected by this bug, make sure to define the symbol 'R10000_LLSC_WAR'
- * to be non-zero.  If you are using this header from within linux, you may
- * include <asm/war.h> before including this file to have this defined
- * appropriately for you.
- *
- * Copyright 2005-2007 PMC-Sierra, Inc.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
- *  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF USE,
- *  DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc., 675
- *  Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __ASM_REGOPS_H__
-#define __ASM_REGOPS_H__
-
-#include <linux/types.h>
-
-#include <asm/compiler.h>
-#include <asm/war.h>
-
-#ifndef R10000_LLSC_WAR
-#define R10000_LLSC_WAR 0
-#endif
-
-#if R10000_LLSC_WAR == 1
-#define __beqz	"beqzl	"
-#else
-#define __beqz	"beqz	"
-#endif
-
-#ifndef _LINUX_TYPES_H
-typedef unsigned int u32;
-#endif
-
-/*
- * Sets all the masked bits to the corresponding value bits
- */
-static inline void set_value_reg32(volatile u32 *const addr,
-					u32 const mask,
-					u32 const value)
-{
-	u32 temp;
-
-	__asm__ __volatile__(
-	"	.set	push				\n"
-	"	.set	arch=r4000			\n"
-	"1:	ll	%0, %1	# set_value_reg32	\n"
-	"	and	%0, %2				\n"
-	"	or	%0, %3				\n"
-	"	sc	%0, %1				\n"
-	"	"__beqz"%0, 1b				\n"
-	"	nop					\n"
-	"	.set	pop				\n"
-	: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr)
-	: "ir" (~mask), "ir" (value), GCC_OFF_SMALL_ASM() (*addr));
-}
-
-/*
- * Sets all the masked bits to '1'
- */
-static inline void set_reg32(volatile u32 *const addr,
-				u32 const mask)
-{
-	u32 temp;
-
-	__asm__ __volatile__(
-	"	.set	push				\n"
-	"	.set	arch=r4000			\n"
-	"1:	ll	%0, %1		# set_reg32	\n"
-	"	or	%0, %2				\n"
-	"	sc	%0, %1				\n"
-	"	"__beqz"%0, 1b				\n"
-	"	nop					\n"
-	"	.set	pop				\n"
-	: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr)
-	: "ir" (mask), GCC_OFF_SMALL_ASM() (*addr));
-}
-
-/*
- * Sets all the masked bits to '0'
- */
-static inline void clear_reg32(volatile u32 *const addr,
-				u32 const mask)
-{
-	u32 temp;
-
-	__asm__ __volatile__(
-	"	.set	push				\n"
-	"	.set	arch=r4000			\n"
-	"1:	ll	%0, %1		# clear_reg32	\n"
-	"	and	%0, %2				\n"
-	"	sc	%0, %1				\n"
-	"	"__beqz"%0, 1b				\n"
-	"	nop					\n"
-	"	.set	pop				\n"
-	: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr)
-	: "ir" (~mask), GCC_OFF_SMALL_ASM() (*addr));
-}
-
-/*
- * Toggles all masked bits from '0' to '1' and '1' to '0'
- */
-static inline void toggle_reg32(volatile u32 *const addr,
-				u32 const mask)
-{
-	u32 temp;
-
-	__asm__ __volatile__(
-	"	.set	push				\n"
-	"	.set	arch=r4000			\n"
-	"1:	ll	%0, %1		# toggle_reg32	\n"
-	"	xor	%0, %2				\n"
-	"	sc	%0, %1				\n"
-	"	"__beqz"%0, 1b				\n"
-	"	nop					\n"
-	"	.set	pop				\n"
-	: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr)
-	: "ir" (mask), GCC_OFF_SMALL_ASM() (*addr));
-}
-
-/*
- * Read all masked bits others are returned as '0'
- */
-static inline u32 read_reg32(volatile u32 *const addr,
-				u32 const mask)
-{
-	u32 temp;
-
-	__asm__ __volatile__(
-	"	.set	push				\n"
-	"	.set	noreorder			\n"
-	"	lw	%0, %1		# read		\n"
-	"	and	%0, %2		# mask		\n"
-	"	.set	pop				\n"
-	: "=&r" (temp)
-	: "m" (*addr), "ir" (mask));
-
-	return temp;
-}
-
-/*
- * blocking_read_reg32 - Read address with blocking load
- *
- * Uncached writes need to be read back to ensure they reach RAM.
- * The returned value must be 'used' to prevent from becoming a
- * non-blocking load.
- */
-static inline u32 blocking_read_reg32(volatile u32 *const addr)
-{
-	u32 temp;
-
-	__asm__ __volatile__(
-	"	.set	push				\n"
-	"	.set	noreorder			\n"
-	"	lw	%0, %1		# read		\n"
-	"	move	%0, %0		# block		\n"
-	"	.set	pop				\n"
-	: "=&r" (temp)
-	: "m" (*addr));
-
-	return temp;
-}
-
-/*
- * For special strange cases only:
- *
- * If you need custom processing within a ll/sc loop, use the following macros
- * VERY CAREFULLY:
- *
- *   u32 tmp;				<-- Define a variable to hold the data
- *
- *   custom_read_reg32(address, tmp);	<-- Reads the address and put the value
- *						in the 'tmp' variable given
- *
- *	From here on out, you are (basically) atomic, so don't do anything too
- *	fancy!
- *	Also, this code may loop if the end of this block fails to write
- *	everything back safely due do the other CPU, so do NOT do anything
- *	with side-effects!
- *
- *   custom_write_reg32(address, tmp);	<-- Writes back 'tmp' safely.
- */
-#define custom_read_reg32(address, tmp)				\
-	__asm__ __volatile__(					\
-	"	.set	push				\n"	\
-	"	.set	arch=r4000			\n"	\
-	"1:	ll	%0, %1	#custom_read_reg32	\n"	\
-	"	.set	pop				\n"	\
-	: "=r" (tmp), "=" GCC_OFF_SMALL_ASM() (*address)		\
-	: GCC_OFF_SMALL_ASM() (*address))
-
-#define custom_write_reg32(address, tmp)			\
-	__asm__ __volatile__(					\
-	"	.set	push				\n"	\
-	"	.set	arch=r4000			\n"	\
-	"	sc	%0, %1	#custom_write_reg32	\n"	\
-	"	"__beqz"%0, 1b				\n"	\
-	"	nop					\n"	\
-	"	.set	pop				\n"	\
-	: "=&r" (tmp), "=" GCC_OFF_SMALL_ASM() (*address)		\
-	: "0" (tmp), GCC_OFF_SMALL_ASM() (*address))
-
-#endif	/* __ASM_REGOPS_H__ */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regs.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regs.h
deleted file mode 100644
index e2ce9be51f3f..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regs.h
+++ /dev/null
@@ -1,652 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Defines for the address space, registers and register configuration
- * (bit masks, access macros etc) for the PMC-Sierra line of MSP products.
- * This file contains addess maps for all the devices in the line of
- * products but only has register definitions and configuration masks for
- * registers which aren't definitely associated with any device.  Things
- * like clock settings, reset access, the ELB etc.  Individual device
- * drivers will reference the appropriate XXX_BASE value defined here
- * and have individual registers offset from that.
- *
- * Copyright (C) 2005-2007 PMC-Sierra, Inc.  All rights reserved.
- * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com
- *
- * ########################################################################
- *
- * ########################################################################
- */
-
-#include <asm/addrspace.h>
-#include <linux/types.h>
-
-#ifndef _ASM_MSP_REGS_H
-#define _ASM_MSP_REGS_H
-
-/*
- ########################################################################
- #  Address space and device base definitions				#
- ########################################################################
- */
-
-/*
- ***************************************************************************
- * System Logic and Peripherals (ELB, UART0, etc) device address space	   *
- ***************************************************************************
- */
-#define MSP_SLP_BASE		0x1c000000
-					/* System Logic and Peripherals */
-#define MSP_RST_BASE		(MSP_SLP_BASE + 0x10)
-					/* System reset register base	*/
-#define MSP_RST_SIZE		0x0C	/* System reset register space	*/
-
-#define MSP_WTIMER_BASE		(MSP_SLP_BASE + 0x04C)
-					/* watchdog timer base		*/
-#define MSP_ITIMER_BASE		(MSP_SLP_BASE + 0x054)
-					/* internal timer base		*/
-#define MSP_UART0_BASE		(MSP_SLP_BASE + 0x100)
-					/* UART0 controller base	*/
-#define MSP_BCPY_CTRL_BASE	(MSP_SLP_BASE + 0x120)
-					/* Block Copy controller base	*/
-#define MSP_BCPY_DESC_BASE	(MSP_SLP_BASE + 0x160)
-					/* Block Copy descriptor base	*/
-
-/*
- ***************************************************************************
- * PCI address space							   *
- ***************************************************************************
- */
-#define MSP_PCI_BASE		0x19000000
-
-/*
- ***************************************************************************
- * MSbus device address space						   *
- ***************************************************************************
- */
-#define MSP_MSB_BASE		0x18000000
-					/* MSbus address start		*/
-#define MSP_PER_BASE		(MSP_MSB_BASE + 0x400000)
-					/* Peripheral device registers	*/
-#define MSP_MAC0_BASE		(MSP_MSB_BASE + 0x600000)
-					/* MAC A device registers	*/
-#define MSP_MAC1_BASE		(MSP_MSB_BASE + 0x700000)
-					/* MAC B device registers	*/
-#define MSP_MAC_SIZE		0xE0	/* MAC register space		*/
-
-#define MSP_SEC_BASE		(MSP_MSB_BASE + 0x800000)
-					/* Security Engine registers	*/
-#define MSP_MAC2_BASE		(MSP_MSB_BASE + 0x900000)
-					/* MAC C device registers	*/
-#define MSP_ADSL2_BASE		(MSP_MSB_BASE + 0xA80000)
-					/* ADSL2 device registers	*/
-#define MSP_USB0_BASE		(MSP_MSB_BASE + 0xB00000)
-					/* USB0 device registers	*/
-#define MSP_USB1_BASE		(MSP_MSB_BASE + 0x300000)
-					/* USB1 device registers	*/
-#define MSP_CPUIF_BASE		(MSP_MSB_BASE + 0xC00000)
-					/* CPU interface registers	*/
-
-/* Devices within the MSbus peripheral block */
-#define MSP_UART1_BASE		(MSP_PER_BASE + 0x030)
-					/* UART1 controller base	*/
-#define MSP_SPI_BASE		(MSP_PER_BASE + 0x058)
-					/* SPI/MPI control registers	*/
-#define MSP_TWI_BASE		(MSP_PER_BASE + 0x090)
-					/* Two-wire control registers	*/
-#define MSP_PTIMER_BASE		(MSP_PER_BASE + 0x0F0)
-					/* Programmable timer control	*/
-
-/*
- ***************************************************************************
- * Physical Memory configuration address space				   *
- ***************************************************************************
- */
-#define MSP_MEM_CFG_BASE	0x17f00000
-
-#define MSP_MEM_INDIRECT_CTL_10 0x10
-
-/*
- * Notes:
- *  1) The SPI registers are split into two blocks, one offset from the
- *     MSP_SPI_BASE by 0x00 and the other offset from the MSP_SPI_BASE by
- *     0x68.  The SPI driver definitions for the register must be aware
- *     of this.
- *  2) The block copy engine register are divided into two regions, one
- *     for the control/configuration of the engine proper and one for the
- *     values of the descriptors used in the copy process.  These have
- *     different base defines (CTRL_BASE vs DESC_BASE)
- *  3) These constants are for physical addresses which means that they
- *     work correctly with "ioremap" and friends.  This means that device
- *     drivers will need to remap these addresses using ioremap and perhaps
- *     the readw/writew macros.	 Or they could use the regptr() macro
- *     defined below, but the readw/writew calls are the correct thing.
- *  4) The UARTs have an additional status register offset from the base
- *     address.	 This register isn't used in the standard 8250 driver but
- *     may be used in other software.  Consult the hardware datasheet for
- *     offset details.
- *  5) For some unknown reason the security engine (MSP_SEC_BASE) registers
- *     start at an offset of 0x84 from the base address but the block of
- *     registers before this is reserved for the security engine.  The
- *     driver will have to be aware of this but it makes the register
- *     definitions line up better with the documentation.
- */
-
-/*
- ########################################################################
- #  System register definitions.  Not associated with a specific device #
- ########################################################################
- */
-
-/*
- * This macro maps the physical register number into uncached space
- * and (for C code) casts it into a u32 pointer so it can be dereferenced
- * Normally these would be accessed with ioremap and readX/writeX, but
- * these are convenient for a lot of internal kernel code.
- */
-#ifdef __ASSEMBLER__
-	#define regptr(addr) (KSEG1ADDR(addr))
-#else
-	#define regptr(addr) ((volatile u32 *const)(KSEG1ADDR(addr)))
-#endif
-
-/*
- ***************************************************************************
- * System Logic and Peripherals (RESET, ELB, etc) registers		   *
- ***************************************************************************
- */
-
-/* System Control register definitions */
-#define DEV_ID_REG	regptr(MSP_SLP_BASE + 0x00)
-					/* Device-ID		     RO */
-#define FWR_ID_REG	regptr(MSP_SLP_BASE + 0x04)
-					/* Firmware-ID Register	     RW */
-#define SYS_ID_REG0	regptr(MSP_SLP_BASE + 0x08)
-					/* System-ID Register-0	     RW */
-#define SYS_ID_REG1	regptr(MSP_SLP_BASE + 0x0C)
-					/* System-ID Register-1	     RW */
-
-/* System Reset register definitions */
-#define RST_STS_REG	regptr(MSP_SLP_BASE + 0x10)
-					/* System Reset Status	     RO */
-#define RST_SET_REG	regptr(MSP_SLP_BASE + 0x14)
-					/* System Set Reset	     WO */
-#define RST_CLR_REG	regptr(MSP_SLP_BASE + 0x18)
-					/* System Clear Reset	     WO */
-
-/* System Clock Registers */
-#define PCI_SLP_REG	regptr(MSP_SLP_BASE + 0x1C)
-					/* PCI clock generator	     RW */
-#define URT_SLP_REG	regptr(MSP_SLP_BASE + 0x20)
-					/* UART clock generator	     RW */
-/* reserved		      (MSP_SLP_BASE + 0x24)			*/
-/* reserved		      (MSP_SLP_BASE + 0x28)			*/
-#define PLL1_SLP_REG	regptr(MSP_SLP_BASE + 0x2C)
-					/* PLL1 clock generator	     RW */
-#define PLL0_SLP_REG	regptr(MSP_SLP_BASE + 0x30)
-					/* PLL0 clock generator	     RW */
-#define MIPS_SLP_REG	regptr(MSP_SLP_BASE + 0x34)
-					/* MIPS clock generator	     RW */
-#define VE_SLP_REG	regptr(MSP_SLP_BASE + 0x38)
-					/* Voice Eng clock generator RW */
-/* reserved		      (MSP_SLP_BASE + 0x3C)			*/
-#define MSB_SLP_REG	regptr(MSP_SLP_BASE + 0x40)
-					/* MS-Bus clock generator    RW */
-#define SMAC_SLP_REG	regptr(MSP_SLP_BASE + 0x44)
-					/* Sec & MAC clock generator RW */
-#define PERF_SLP_REG	regptr(MSP_SLP_BASE + 0x48)
-					/* Per & TDM clock generator RW */
-
-/* Interrupt Controller Registers */
-#define SLP_INT_STS_REG regptr(MSP_SLP_BASE + 0x70)
-					/* Interrupt status register RW */
-#define SLP_INT_MSK_REG regptr(MSP_SLP_BASE + 0x74)
-					/* Interrupt enable/mask     RW */
-#define SE_MBOX_REG	regptr(MSP_SLP_BASE + 0x78)
-					/* Security Engine mailbox   RW */
-#define VE_MBOX_REG	regptr(MSP_SLP_BASE + 0x7C)
-					/* Voice Engine mailbox	     RW */
-
-/* ELB Controller Registers */
-#define CS0_CNFG_REG	regptr(MSP_SLP_BASE + 0x80)
-					/* ELB CS0 Configuration Reg	*/
-#define CS0_ADDR_REG	regptr(MSP_SLP_BASE + 0x84)
-					/* ELB CS0 Base Address Reg	*/
-#define CS0_MASK_REG	regptr(MSP_SLP_BASE + 0x88)
-					/* ELB CS0 Mask Register	*/
-#define CS0_ACCESS_REG	regptr(MSP_SLP_BASE + 0x8C)
-					/* ELB CS0 access register	*/
-
-#define CS1_CNFG_REG	regptr(MSP_SLP_BASE + 0x90)
-					/* ELB CS1 Configuration Reg	*/
-#define CS1_ADDR_REG	regptr(MSP_SLP_BASE + 0x94)
-					/* ELB CS1 Base Address Reg	*/
-#define CS1_MASK_REG	regptr(MSP_SLP_BASE + 0x98)
-					/* ELB CS1 Mask Register	*/
-#define CS1_ACCESS_REG	regptr(MSP_SLP_BASE + 0x9C)
-					/* ELB CS1 access register	*/
-
-#define CS2_CNFG_REG	regptr(MSP_SLP_BASE + 0xA0)
-					/* ELB CS2 Configuration Reg	*/
-#define CS2_ADDR_REG	regptr(MSP_SLP_BASE + 0xA4)
-					/* ELB CS2 Base Address Reg	*/
-#define CS2_MASK_REG	regptr(MSP_SLP_BASE + 0xA8)
-					/* ELB CS2 Mask Register	*/
-#define CS2_ACCESS_REG	regptr(MSP_SLP_BASE + 0xAC)
-					/* ELB CS2 access register	*/
-
-#define CS3_CNFG_REG	regptr(MSP_SLP_BASE + 0xB0)
-					/* ELB CS3 Configuration Reg	*/
-#define CS3_ADDR_REG	regptr(MSP_SLP_BASE + 0xB4)
-					/* ELB CS3 Base Address Reg	*/
-#define CS3_MASK_REG	regptr(MSP_SLP_BASE + 0xB8)
-					/* ELB CS3 Mask Register	*/
-#define CS3_ACCESS_REG	regptr(MSP_SLP_BASE + 0xBC)
-					/* ELB CS3 access register	*/
-
-#define CS4_CNFG_REG	regptr(MSP_SLP_BASE + 0xC0)
-					/* ELB CS4 Configuration Reg	*/
-#define CS4_ADDR_REG	regptr(MSP_SLP_BASE + 0xC4)
-					/* ELB CS4 Base Address Reg	*/
-#define CS4_MASK_REG	regptr(MSP_SLP_BASE + 0xC8)
-					/* ELB CS4 Mask Register	*/
-#define CS4_ACCESS_REG	regptr(MSP_SLP_BASE + 0xCC)
-					/* ELB CS4 access register	*/
-
-#define CS5_CNFG_REG	regptr(MSP_SLP_BASE + 0xD0)
-					/* ELB CS5 Configuration Reg	*/
-#define CS5_ADDR_REG	regptr(MSP_SLP_BASE + 0xD4)
-					/* ELB CS5 Base Address Reg	*/
-#define CS5_MASK_REG	regptr(MSP_SLP_BASE + 0xD8)
-					/* ELB CS5 Mask Register	*/
-#define CS5_ACCESS_REG	regptr(MSP_SLP_BASE + 0xDC)
-					/* ELB CS5 access register	*/
-
-/* reserved			       0xE0 - 0xE8			*/
-#define ELB_1PC_EN_REG	regptr(MSP_SLP_BASE + 0xEC)
-					/* ELB single PC card detect	*/
-
-/* reserved			       0xF0 - 0xF8			*/
-#define ELB_CLK_CFG_REG regptr(MSP_SLP_BASE + 0xFC)
-					/* SDRAM read/ELB timing Reg	*/
-
-/* Extended UART status registers */
-#define UART0_STATUS_REG	regptr(MSP_UART0_BASE + 0x0c0)
-					/* UART Status Register 0	*/
-#define UART1_STATUS_REG	regptr(MSP_UART1_BASE + 0x170)
-					/* UART Status Register 1	*/
-
-/* Performance monitoring registers */
-#define PERF_MON_CTRL_REG	regptr(MSP_SLP_BASE + 0x140)
-					/* Performance monitor control	*/
-#define PERF_MON_CLR_REG	regptr(MSP_SLP_BASE + 0x144)
-					/* Performance monitor clear	*/
-#define PERF_MON_CNTH_REG	regptr(MSP_SLP_BASE + 0x148)
-					/* Perf monitor counter high	*/
-#define PERF_MON_CNTL_REG	regptr(MSP_SLP_BASE + 0x14C)
-					/* Perf monitor counter low	*/
-
-/* System control registers */
-#define SYS_CTRL_REG		regptr(MSP_SLP_BASE + 0x150)
-					/* System control register	*/
-#define SYS_ERR1_REG		regptr(MSP_SLP_BASE + 0x154)
-					/* System Error status 1	*/
-#define SYS_ERR2_REG		regptr(MSP_SLP_BASE + 0x158)
-					/* System Error status 2	*/
-#define SYS_INT_CFG_REG		regptr(MSP_SLP_BASE + 0x15C)
-					/* System Interrupt config	*/
-
-/* Voice Engine Memory configuration */
-#define VE_MEM_REG		regptr(MSP_SLP_BASE + 0x17C)
-					/* Voice engine memory config	*/
-
-/* CPU/SLP Error Status registers */
-#define CPU_ERR1_REG		regptr(MSP_SLP_BASE + 0x180)
-					/* CPU/SLP Error status 1	*/
-#define CPU_ERR2_REG		regptr(MSP_SLP_BASE + 0x184)
-					/* CPU/SLP Error status 1	*/
-
-/* Extended GPIO registers	 */
-#define EXTENDED_GPIO1_REG	regptr(MSP_SLP_BASE + 0x188)
-#define EXTENDED_GPIO2_REG	regptr(MSP_SLP_BASE + 0x18c)
-#define EXTENDED_GPIO_REG	EXTENDED_GPIO1_REG
-					/* Backward-compatibility	*/
-
-/* System Error registers */
-#define SLP_ERR_STS_REG		regptr(MSP_SLP_BASE + 0x190)
-					/* Int status for SLP errors	*/
-#define SLP_ERR_MSK_REG		regptr(MSP_SLP_BASE + 0x194)
-					/* Int mask for SLP errors	*/
-#define SLP_ELB_ERST_REG	regptr(MSP_SLP_BASE + 0x198)
-					/* External ELB reset		*/
-#define SLP_BOOT_STS_REG	regptr(MSP_SLP_BASE + 0x19C)
-					/* Boot Status			*/
-
-/* Extended ELB addressing */
-#define CS0_EXT_ADDR_REG	regptr(MSP_SLP_BASE + 0x1A0)
-					/* CS0 Extended address		*/
-#define CS1_EXT_ADDR_REG	regptr(MSP_SLP_BASE + 0x1A4)
-					/* CS1 Extended address		*/
-#define CS2_EXT_ADDR_REG	regptr(MSP_SLP_BASE + 0x1A8)
-					/* CS2 Extended address		*/
-#define CS3_EXT_ADDR_REG	regptr(MSP_SLP_BASE + 0x1AC)
-					/* CS3 Extended address		*/
-/* reserved					      0x1B0		*/
-#define CS5_EXT_ADDR_REG	regptr(MSP_SLP_BASE + 0x1B4)
-					/* CS5 Extended address		*/
-
-/* PLL Adjustment registers */
-#define PLL_LOCK_REG		regptr(MSP_SLP_BASE + 0x200)
-					/* PLL0 lock status		*/
-#define PLL_ARST_REG		regptr(MSP_SLP_BASE + 0x204)
-					/* PLL Analog reset status	*/
-#define PLL0_ADJ_REG		regptr(MSP_SLP_BASE + 0x208)
-					/* PLL0 Adjustment value	*/
-#define PLL1_ADJ_REG		regptr(MSP_SLP_BASE + 0x20C)
-					/* PLL1 Adjustment value	*/
-
-/*
- ***************************************************************************
- * Peripheral Register definitions					   *
- ***************************************************************************
- */
-
-/* Peripheral status */
-#define PER_CTRL_REG		regptr(MSP_PER_BASE + 0x50)
-					/* Peripheral control register	*/
-#define PER_STS_REG		regptr(MSP_PER_BASE + 0x54)
-					/* Peripheral status register	*/
-
-/* SPI/MPI Registers */
-#define SMPI_TX_SZ_REG		regptr(MSP_PER_BASE + 0x58)
-					/* SPI/MPI Tx Size register	*/
-#define SMPI_RX_SZ_REG		regptr(MSP_PER_BASE + 0x5C)
-					/* SPI/MPI Rx Size register	*/
-#define SMPI_CTL_REG		regptr(MSP_PER_BASE + 0x60)
-					/* SPI/MPI Control register	*/
-#define SMPI_MS_REG		regptr(MSP_PER_BASE + 0x64)
-					/* SPI/MPI Chip Select reg	*/
-#define SMPI_CORE_DATA_REG	regptr(MSP_PER_BASE + 0xC0)
-					/* SPI/MPI Core Data reg	*/
-#define SMPI_CORE_CTRL_REG	regptr(MSP_PER_BASE + 0xC4)
-					/* SPI/MPI Core Control reg	*/
-#define SMPI_CORE_STAT_REG	regptr(MSP_PER_BASE + 0xC8)
-					/* SPI/MPI Core Status reg	*/
-#define SMPI_CORE_SSEL_REG	regptr(MSP_PER_BASE + 0xCC)
-					/* SPI/MPI Core Ssel reg	*/
-#define SMPI_FIFO_REG		regptr(MSP_PER_BASE + 0xD0)
-					/* SPI/MPI Data FIFO reg	*/
-
-/* Peripheral Block Error Registers	      */
-#define PER_ERR_STS_REG		regptr(MSP_PER_BASE + 0x70)
-					/* Error Bit Status Register	*/
-#define PER_ERR_MSK_REG		regptr(MSP_PER_BASE + 0x74)
-					/* Error Bit Mask Register	*/
-#define PER_HDR1_REG		regptr(MSP_PER_BASE + 0x78)
-					/* Error Header 1 Register	*/
-#define PER_HDR2_REG		regptr(MSP_PER_BASE + 0x7C)
-					/* Error Header 2 Register	*/
-
-/* Peripheral Block Interrupt Registers	      */
-#define PER_INT_STS_REG		regptr(MSP_PER_BASE + 0x80)
-					/* Interrupt status register	*/
-#define PER_INT_MSK_REG		regptr(MSP_PER_BASE + 0x84)
-					/* Interrupt Mask Register	*/
-#define GPIO_INT_STS_REG	regptr(MSP_PER_BASE + 0x88)
-					/* GPIO interrupt status reg	*/
-#define GPIO_INT_MSK_REG	regptr(MSP_PER_BASE + 0x8C)
-					/* GPIO interrupt MASK Reg	*/
-
-/* POLO GPIO registers			      */
-#define POLO_GPIO_DAT1_REG	regptr(MSP_PER_BASE + 0x0E0)
-					/* Polo GPIO[8:0]  data reg	*/
-#define POLO_GPIO_CFG1_REG	regptr(MSP_PER_BASE + 0x0E4)
-					/* Polo GPIO[7:0]  config reg	*/
-#define POLO_GPIO_CFG2_REG	regptr(MSP_PER_BASE + 0x0E8)
-					/* Polo GPIO[15:8] config reg	*/
-#define POLO_GPIO_OD1_REG	regptr(MSP_PER_BASE + 0x0EC)
-					/* Polo GPIO[31:0] output drive */
-#define POLO_GPIO_CFG3_REG	regptr(MSP_PER_BASE + 0x170)
-					/* Polo GPIO[23:16] config reg	*/
-#define POLO_GPIO_DAT2_REG	regptr(MSP_PER_BASE + 0x174)
-					/* Polo GPIO[15:9]  data reg	*/
-#define POLO_GPIO_DAT3_REG	regptr(MSP_PER_BASE + 0x178)
-					/* Polo GPIO[23:16]  data reg	*/
-#define POLO_GPIO_DAT4_REG	regptr(MSP_PER_BASE + 0x17C)
-					/* Polo GPIO[31:24]  data reg	*/
-#define POLO_GPIO_DAT5_REG	regptr(MSP_PER_BASE + 0x180)
-					/* Polo GPIO[39:32]  data reg	*/
-#define POLO_GPIO_DAT6_REG	regptr(MSP_PER_BASE + 0x184)
-					/* Polo GPIO[47:40]  data reg	*/
-#define POLO_GPIO_DAT7_REG	regptr(MSP_PER_BASE + 0x188)
-					/* Polo GPIO[54:48]  data reg	*/
-#define POLO_GPIO_CFG4_REG	regptr(MSP_PER_BASE + 0x18C)
-					/* Polo GPIO[31:24] config reg	*/
-#define POLO_GPIO_CFG5_REG	regptr(MSP_PER_BASE + 0x190)
-					/* Polo GPIO[39:32] config reg	*/
-#define POLO_GPIO_CFG6_REG	regptr(MSP_PER_BASE + 0x194)
-					/* Polo GPIO[47:40] config reg	*/
-#define POLO_GPIO_CFG7_REG	regptr(MSP_PER_BASE + 0x198)
-					/* Polo GPIO[54:48] config reg	*/
-#define POLO_GPIO_OD2_REG	regptr(MSP_PER_BASE + 0x19C)
-					/* Polo GPIO[54:32] output drive */
-
-/* Generic GPIO registers		      */
-#define GPIO_DATA1_REG		regptr(MSP_PER_BASE + 0x170)
-					/* GPIO[1:0] data register	*/
-#define GPIO_DATA2_REG		regptr(MSP_PER_BASE + 0x174)
-					/* GPIO[5:2] data register	*/
-#define GPIO_DATA3_REG		regptr(MSP_PER_BASE + 0x178)
-					/* GPIO[9:6] data register	*/
-#define GPIO_DATA4_REG		regptr(MSP_PER_BASE + 0x17C)
-					/* GPIO[15:10] data register	*/
-#define GPIO_CFG1_REG		regptr(MSP_PER_BASE + 0x180)
-					/* GPIO[1:0] config register	*/
-#define GPIO_CFG2_REG		regptr(MSP_PER_BASE + 0x184)
-					/* GPIO[5:2] config register	*/
-#define GPIO_CFG3_REG		regptr(MSP_PER_BASE + 0x188)
-					/* GPIO[9:6] config register	*/
-#define GPIO_CFG4_REG		regptr(MSP_PER_BASE + 0x18C)
-					/* GPIO[15:10] config register	*/
-#define GPIO_OD_REG		regptr(MSP_PER_BASE + 0x190)
-					/* GPIO[15:0] output drive	*/
-
-/*
- ***************************************************************************
- * CPU Interface register definitions					   *
- ***************************************************************************
- */
-#define PCI_FLUSH_REG		regptr(MSP_CPUIF_BASE + 0x00)
-					/* PCI-SDRAM queue flush trigger */
-#define OCP_ERR1_REG		regptr(MSP_CPUIF_BASE + 0x04)
-					/* OCP Error Attribute 1	*/
-#define OCP_ERR2_REG		regptr(MSP_CPUIF_BASE + 0x08)
-					/* OCP Error Attribute 2	*/
-#define OCP_STS_REG		regptr(MSP_CPUIF_BASE + 0x0C)
-					/* OCP Error Status		*/
-#define CPUIF_PM_REG		regptr(MSP_CPUIF_BASE + 0x10)
-					/* CPU policy configuration	*/
-#define CPUIF_CFG_REG		regptr(MSP_CPUIF_BASE + 0x10)
-					/* Misc configuration options	*/
-
-/* Central Interrupt Controller Registers */
-#define MSP_CIC_BASE		(MSP_CPUIF_BASE + 0x8000)
-					/* Central Interrupt registers	*/
-#define CIC_EXT_CFG_REG		regptr(MSP_CIC_BASE + 0x00)
-					/* External interrupt config	*/
-#define CIC_STS_REG		regptr(MSP_CIC_BASE + 0x04)
-					/* CIC Interrupt Status		*/
-#define CIC_VPE0_MSK_REG	regptr(MSP_CIC_BASE + 0x08)
-					/* VPE0 Interrupt Mask		*/
-#define CIC_VPE1_MSK_REG	regptr(MSP_CIC_BASE + 0x0C)
-					/* VPE1 Interrupt Mask		*/
-#define CIC_TC0_MSK_REG		regptr(MSP_CIC_BASE + 0x10)
-					/* Thread Context 0 Int Mask	*/
-#define CIC_TC1_MSK_REG		regptr(MSP_CIC_BASE + 0x14)
-					/* Thread Context 1 Int Mask	*/
-#define CIC_TC2_MSK_REG		regptr(MSP_CIC_BASE + 0x18)
-					/* Thread Context 2 Int Mask	*/
-#define CIC_TC3_MSK_REG		regptr(MSP_CIC_BASE + 0x18)
-					/* Thread Context 3 Int Mask	*/
-#define CIC_TC4_MSK_REG		regptr(MSP_CIC_BASE + 0x18)
-					/* Thread Context 4 Int Mask	*/
-#define CIC_PCIMSI_STS_REG	regptr(MSP_CIC_BASE + 0x18)
-#define CIC_PCIMSI_MSK_REG	regptr(MSP_CIC_BASE + 0x18)
-#define CIC_PCIFLSH_REG		regptr(MSP_CIC_BASE + 0x18)
-#define CIC_VPE0_SWINT_REG	regptr(MSP_CIC_BASE + 0x08)
-
-
-/*
- ***************************************************************************
- * Memory controller registers						   *
- ***************************************************************************
- */
-#define MEM_CFG1_REG		regptr(MSP_MEM_CFG_BASE + 0x00)
-#define MEM_SS_ADDR		regptr(MSP_MEM_CFG_BASE + 0x00)
-#define MEM_SS_DATA		regptr(MSP_MEM_CFG_BASE + 0x04)
-#define MEM_SS_WRITE		regptr(MSP_MEM_CFG_BASE + 0x08)
-
-/*
- ***************************************************************************
- * PCI controller registers						   *
- ***************************************************************************
- */
-#define PCI_BASE_REG		regptr(MSP_PCI_BASE + 0x00)
-#define PCI_CONFIG_SPACE_REG	regptr(MSP_PCI_BASE + 0x800)
-#define PCI_JTAG_DEVID_REG	regptr(MSP_SLP_BASE + 0x13c)
-
-/*
- ########################################################################
- #  Register content & macro definitions				#
- ########################################################################
- */
-
-/*
- ***************************************************************************
- * DEV_ID defines							   *
- ***************************************************************************
- */
-#define DEV_ID_PCI_DIS		(1 << 26)	/* Set if PCI disabled */
-#define DEV_ID_PCI_HOST		(1 << 20)	/* Set if PCI host */
-#define DEV_ID_SINGLE_PC	(1 << 19)	/* Set if single PC Card */
-#define DEV_ID_FAMILY		(0xff << 8)	/* family ID code */
-#define POLO_ZEUS_SUB_FAMILY	(0x7  << 16)	/* sub family for Polo/Zeus */
-
-#define MSPFPGA_ID		(0x00  << 8)	/* you are on your own here */
-#define MSP5000_ID		(0x50  << 8)
-#define MSP4F00_ID		(0x4f  << 8)	/* FPGA version of MSP4200 */
-#define MSP4E00_ID		(0x4f  << 8)	/* FPGA version of MSP7120 */
-#define MSP4200_ID		(0x42  << 8)
-#define MSP4000_ID		(0x40  << 8)
-#define MSP2XXX_ID		(0x20  << 8)
-#define MSPZEUS_ID		(0x10  << 8)
-
-#define MSP2004_SUB_ID		(0x0   << 16)
-#define MSP2005_SUB_ID		(0x1   << 16)
-#define MSP2006_SUB_ID		(0x1   << 16)
-#define MSP2007_SUB_ID		(0x2   << 16)
-#define MSP2010_SUB_ID		(0x3   << 16)
-#define MSP2015_SUB_ID		(0x4   << 16)
-#define MSP2020_SUB_ID		(0x5   << 16)
-#define MSP2100_SUB_ID		(0x6   << 16)
-
-/*
- ***************************************************************************
- * RESET defines							   *
- ***************************************************************************
- */
-#define MSP_GR_RST		(0x01 << 0)	/* Global reset bit	*/
-#define MSP_MR_RST		(0x01 << 1)	/* MIPS reset bit	*/
-#define MSP_PD_RST		(0x01 << 2)	/* PVC DMA reset bit	*/
-#define MSP_PP_RST		(0x01 << 3)	/* PVC reset bit	*/
-/* reserved								*/
-#define MSP_EA_RST		(0x01 << 6)	/* Mac A reset bit	*/
-#define MSP_EB_RST		(0x01 << 7)	/* Mac B reset bit	*/
-#define MSP_SE_RST		(0x01 << 8)	/* Security Eng reset bit */
-#define MSP_PB_RST		(0x01 << 9)	/* Per block reset bit	*/
-#define MSP_EC_RST		(0x01 << 10)	/* Mac C reset bit	*/
-#define MSP_TW_RST		(0x01 << 11)	/* TWI reset bit	*/
-#define MSP_SPI_RST		(0x01 << 12)	/* SPI/MPI reset bit	*/
-#define MSP_U1_RST		(0x01 << 13)	/* UART1 reset bit	*/
-#define MSP_U0_RST		(0x01 << 14)	/* UART0 reset bit	*/
-
-/*
- ***************************************************************************
- * UART defines								   *
- ***************************************************************************
- */
-#define MSP_BASE_BAUD		25000000
-#define MSP_UART_REG_LEN	0x20
-
-/*
- ***************************************************************************
- * ELB defines								   *
- ***************************************************************************
- */
-#define PCCARD_32		0x02	/* Set if is PCCARD 32 (Cardbus) */
-#define SINGLE_PCCARD		0x01	/* Set to enable single PC card */
-
-/*
- ***************************************************************************
- * CIC defines								   *
- ***************************************************************************
- */
-
-/* CIC_EXT_CFG_REG */
-#define EXT_INT_POL(eirq)			(1 << (eirq + 8))
-#define EXT_INT_EDGE(eirq)			(1 << eirq)
-
-#define CIC_EXT_SET_TRIGGER_LEVEL(reg, eirq)	(reg &= ~EXT_INT_EDGE(eirq))
-#define CIC_EXT_SET_TRIGGER_EDGE(reg, eirq)	(reg |= EXT_INT_EDGE(eirq))
-#define CIC_EXT_SET_ACTIVE_HI(reg, eirq)	(reg |= EXT_INT_POL(eirq))
-#define CIC_EXT_SET_ACTIVE_LO(reg, eirq)	(reg &= ~EXT_INT_POL(eirq))
-#define CIC_EXT_SET_ACTIVE_RISING		CIC_EXT_SET_ACTIVE_HI
-#define CIC_EXT_SET_ACTIVE_FALLING		CIC_EXT_SET_ACTIVE_LO
-
-#define CIC_EXT_IS_TRIGGER_LEVEL(reg, eirq) \
-				((reg & EXT_INT_EDGE(eirq)) == 0)
-#define CIC_EXT_IS_TRIGGER_EDGE(reg, eirq)	(reg & EXT_INT_EDGE(eirq))
-#define CIC_EXT_IS_ACTIVE_HI(reg, eirq)		(reg & EXT_INT_POL(eirq))
-#define CIC_EXT_IS_ACTIVE_LO(reg, eirq) \
-				((reg & EXT_INT_POL(eirq)) == 0)
-#define CIC_EXT_IS_ACTIVE_RISING		CIC_EXT_IS_ACTIVE_HI
-#define CIC_EXT_IS_ACTIVE_FALLING		CIC_EXT_IS_ACTIVE_LO
-
-/*
- ***************************************************************************
- * Memory Controller defines						   *
- ***************************************************************************
- */
-
-/* Indirect memory controller registers */
-#define DDRC_CFG(n)		(n)
-#define DDRC_DEBUG(n)		(0x04 + n)
-#define DDRC_CTL(n)		(0x40 + n)
-
-/* Macro to perform DDRC indirect write */
-#define DDRC_INDIRECT_WRITE(reg, mask, value) \
-({ \
-	*MEM_SS_ADDR = (((mask) & 0xf) << 8) | ((reg) & 0xff); \
-	*MEM_SS_DATA = (value); \
-	*MEM_SS_WRITE = 1; \
-})
-
-/*
- ***************************************************************************
- * SPI/MPI Mode								   *
- ***************************************************************************
- */
-#define SPI_MPI_RX_BUSY		0x00008000	/* SPI/MPI Receive Busy */
-#define SPI_MPI_FIFO_EMPTY	0x00004000	/* SPI/MPI Fifo Empty	*/
-#define SPI_MPI_TX_BUSY		0x00002000	/* SPI/MPI Transmit Busy */
-#define SPI_MPI_FIFO_FULL	0x00001000	/* SPI/MPU FIFO full	*/
-
-/*
- ***************************************************************************
- * SPI/MPI Control Register						   *
- ***************************************************************************
- */
-#define SPI_MPI_RX_START	0x00000004	/* Start receive command */
-#define SPI_MPI_FLUSH_Q		0x00000002	/* Flush SPI/MPI Queue */
-#define SPI_MPI_TX_START	0x00000001	/* Start Transmit Command */
-
-#endif /* !_ASM_MSP_REGS_H */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_slp_int.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_slp_int.h
deleted file mode 100644
index 9a763eb5e5f5..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_slp_int.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Defines for the MSP interrupt controller.
- *
- * Copyright (C) 1999 MIPS Technologies, Inc.  All rights reserved.
- * Author: Carsten Langgaard, carstenl@mips.com
- *
- * ########################################################################
- *
- * ########################################################################
- */
-
-#ifndef _MSP_SLP_INT_H
-#define _MSP_SLP_INT_H
-
-/*
- * The PMC-Sierra SLP interrupts are arranged in a 3 level cascaded
- * hierarchical system.	 The first level are the direct MIPS interrupts
- * and are assigned the interrupt range 0-7.  The second level is the SLM
- * interrupt controller and is assigned the range 8-39.	 The third level
- * comprises the Peripherial block, the PCI block, the PCI MSI block and
- * the SLP.  The PCI interrupts and the SLP errors are handled by the
- * relevant subsystems so the core interrupt code needs only concern
- * itself with the Peripheral block.  These are assigned interrupts in
- * the range 40-71.
- */
-
-/*
- * IRQs directly connected to CPU
- */
-#define MSP_MIPS_INTBASE	0
-#define MSP_INT_SW0		0  /* IRQ for swint0,	      C_SW0  */
-#define MSP_INT_SW1		1  /* IRQ for swint1,	      C_SW1  */
-#define MSP_INT_MAC0		2  /* IRQ for MAC 0,	      C_IRQ0 */
-#define MSP_INT_MAC1		3  /* IRQ for MAC 1,	      C_IRQ1 */
-#define MSP_INT_C_IRQ2		4  /* Wired off,	      C_IRQ2 */
-#define MSP_INT_VE		5  /* IRQ for Voice Engine,   C_IRQ3 */
-#define MSP_INT_SLP		6  /* IRQ for SLM block,      C_IRQ4 */
-#define MSP_INT_TIMER		7  /* IRQ for the MIPS timer, C_IRQ5 */
-
-/*
- * IRQs cascaded on CPU interrupt 4 (CAUSE bit 12, C_IRQ4)
- * These defines should be tied to the register definition for the SLM
- * interrupt routine.  For now, just use hard-coded values.
- */
-#define MSP_SLP_INTBASE		(MSP_MIPS_INTBASE + 8)
-#define MSP_INT_EXT0		(MSP_SLP_INTBASE + 0)
-					/* External interrupt 0		*/
-#define MSP_INT_EXT1		(MSP_SLP_INTBASE + 1)
-					/* External interrupt 1		*/
-#define MSP_INT_EXT2		(MSP_SLP_INTBASE + 2)
-					/* External interrupt 2		*/
-#define MSP_INT_EXT3		(MSP_SLP_INTBASE + 3)
-					/* External interrupt 3		*/
-/* Reserved					   4-7			*/
-
-/*
- *************************************************************************
- * DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER *
- * Some MSP produces have this interrupt labelled as Voice and some are	 *
- * SEC mbox ...								 *
- *************************************************************************
- */
-#define MSP_INT_SLP_VE		(MSP_SLP_INTBASE + 8)
-					/* Cascaded IRQ for Voice Engine*/
-#define MSP_INT_SLP_TDM		(MSP_SLP_INTBASE + 9)
-					/* TDM interrupt		*/
-#define MSP_INT_SLP_MAC0	(MSP_SLP_INTBASE + 10)
-					/* Cascaded IRQ for MAC 0	*/
-#define MSP_INT_SLP_MAC1	(MSP_SLP_INTBASE + 11)
-					/* Cascaded IRQ for MAC 1	*/
-#define MSP_INT_SEC		(MSP_SLP_INTBASE + 12)
-					/* IRQ for security engine	*/
-#define MSP_INT_PER		(MSP_SLP_INTBASE + 13)
-					/* Peripheral interrupt		*/
-#define MSP_INT_TIMER0		(MSP_SLP_INTBASE + 14)
-					/* SLP timer 0			*/
-#define MSP_INT_TIMER1		(MSP_SLP_INTBASE + 15)
-					/* SLP timer 1			*/
-#define MSP_INT_TIMER2		(MSP_SLP_INTBASE + 16)
-					/* SLP timer 2			*/
-#define MSP_INT_SLP_TIMER	(MSP_SLP_INTBASE + 17)
-					/* Cascaded MIPS timer		*/
-#define MSP_INT_BLKCP		(MSP_SLP_INTBASE + 18)
-					/* Block Copy			*/
-#define MSP_INT_UART0		(MSP_SLP_INTBASE + 19)
-					/* UART 0			*/
-#define MSP_INT_PCI		(MSP_SLP_INTBASE + 20)
-					/* PCI subsystem		*/
-#define MSP_INT_PCI_DBELL	(MSP_SLP_INTBASE + 21)
-					/* PCI doorbell			*/
-#define MSP_INT_PCI_MSI		(MSP_SLP_INTBASE + 22)
-					/* PCI Message Signal		*/
-#define MSP_INT_PCI_BC0		(MSP_SLP_INTBASE + 23)
-					/* PCI Block Copy 0		*/
-#define MSP_INT_PCI_BC1		(MSP_SLP_INTBASE + 24)
-					/* PCI Block Copy 1		*/
-#define MSP_INT_SLP_ERR		(MSP_SLP_INTBASE + 25)
-					/* SLP error condition		*/
-#define MSP_INT_MAC2		(MSP_SLP_INTBASE + 26)
-					/* IRQ for MAC2			*/
-/* Reserved					   26-31		*/
-
-/*
- * IRQs cascaded on SLP PER interrupt (MSP_INT_PER)
- */
-#define MSP_PER_INTBASE		(MSP_SLP_INTBASE + 32)
-/* Reserved					   0-1			*/
-#define MSP_INT_UART1		(MSP_PER_INTBASE + 2)
-					/* UART 1			*/
-/* Reserved					   3-5			*/
-#define MSP_INT_2WIRE		(MSP_PER_INTBASE + 6)
-					/* 2-wire			*/
-#define MSP_INT_TM0		(MSP_PER_INTBASE + 7)
-					/* Peripheral timer block out 0 */
-#define MSP_INT_TM1		(MSP_PER_INTBASE + 8)
-					/* Peripheral timer block out 1 */
-/* Reserved					   9			*/
-#define MSP_INT_SPRX		(MSP_PER_INTBASE + 10)
-					/* SPI RX complete		*/
-#define MSP_INT_SPTX		(MSP_PER_INTBASE + 11)
-					/* SPI TX complete		*/
-#define MSP_INT_GPIO		(MSP_PER_INTBASE + 12)
-					/* GPIO				*/
-#define MSP_INT_PER_ERR		(MSP_PER_INTBASE + 13)
-					/* Peripheral error		*/
-/* Reserved					   14-31		*/
-
-#endif /* !_MSP_SLP_INT_H */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h
deleted file mode 100644
index 3cc3edb336b6..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/******************************************************************
- * Copyright (c) 2000-2007 PMC-Sierra INC.
- *
- * PMC-SIERRA INC. DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS
- * SOFTWARE.
- */
-#ifndef MSP_USB_H_
-#define MSP_USB_H_
-
-#define NUM_USB_DEVS   1
-
-/* Register spaces for USB host 0 */
-#define MSP_USB0_MAB_START	(MSP_USB0_BASE + 0x0)
-#define MSP_USB0_MAB_END	(MSP_USB0_BASE + 0x17)
-#define MSP_USB0_ID_START	(MSP_USB0_BASE + 0x40000)
-#define MSP_USB0_ID_END		(MSP_USB0_BASE + 0x4008f)
-#define MSP_USB0_HS_START	(MSP_USB0_BASE + 0x40100)
-#define MSP_USB0_HS_END		(MSP_USB0_BASE + 0x401FF)
-
-/* Register spaces for USB host 1 */
-#define MSP_USB1_MAB_START	(MSP_USB1_BASE + 0x0)
-#define MSP_USB1_MAB_END	(MSP_USB1_BASE + 0x17)
-#define MSP_USB1_ID_START	(MSP_USB1_BASE + 0x40000)
-#define MSP_USB1_ID_END		(MSP_USB1_BASE + 0x4008f)
-#define MSP_USB1_HS_START	(MSP_USB1_BASE + 0x40100)
-#define MSP_USB1_HS_END		(MSP_USB1_BASE + 0x401ff)
-
-/* USB Identification registers */
-struct msp_usbid_regs {
-	u32 id;		/* 0x0: Identification register */
-	u32 hwgen;	/* 0x4: General HW params */
-	u32 hwhost;	/* 0x8: Host HW params */
-	u32 hwdev;	/* 0xc: Device HW params */
-	u32 hwtxbuf;	/* 0x10: Tx buffer HW params */
-	u32 hwrxbuf;	/* 0x14: Rx buffer HW params */
-	u32 reserved[26];
-	u32 timer0_load; /* 0x80: General-purpose timer 0 load*/
-	u32 timer0_ctrl; /* 0x84: General-purpose timer 0 control */
-	u32 timer1_load; /* 0x88: General-purpose timer 1 load*/
-	u32 timer1_ctrl; /* 0x8c: General-purpose timer 1 control */
-};
-
-/* MSBus to AMBA registers */
-struct msp_mab_regs {
-	u32 isr;	/* 0x0: Interrupt status */
-	u32 imr;	/* 0x4: Interrupt mask */
-	u32 thcr0;	/* 0x8: Transaction header capture 0 */
-	u32 thcr1;	/* 0xc: Transaction header capture 1 */
-	u32 int_stat;	/* 0x10: Interrupt status summary */
-	u32 phy_cfg;	/* 0x14: USB phy config */
-};
-
-/* EHCI registers */
-struct msp_usbhs_regs {
-	u32 hciver;	/* 0x0: Version and offset to operational regs */
-	u32 hcsparams;	/* 0x4: Host control structural parameters */
-	u32 hccparams;	/* 0x8: Host control capability parameters */
-	u32 reserved0[5];
-	u32 dciver;	/* 0x20: Device interface version */
-	u32 dccparams;	/* 0x24: Device control capability parameters */
-	u32 reserved1[6];
-	u32 cmd;	/* 0x40: USB command */
-	u32 sts;	/* 0x44: USB status */
-	u32 int_ena;	/* 0x48: USB interrupt enable */
-	u32 frindex;	/* 0x4c: Frame index */
-	u32 reserved3;
-	union {
-		struct {
-			u32 flb_addr; /* 0x54: Frame list base address */
-			u32 next_async_addr; /* 0x58: next asynchronous addr */
-			u32 ttctrl; /* 0x5c: embedded transaction translator
-							async buffer status */
-			u32 burst_size; /* 0x60: Controller burst size */
-			u32 tx_fifo_ctrl; /* 0x64: Tx latency FIFO tuning */
-			u32 reserved0[4];
-			u32 endpt_nak; /* 0x78: Endpoint NAK */
-			u32 endpt_nak_ena; /* 0x7c: Endpoint NAK enable */
-			u32 cfg_flag; /* 0x80: Config flag */
-			u32 port_sc1; /* 0x84: Port status & control 1 */
-			u32 reserved1[7];
-			u32 otgsc;	/* 0xa4: OTG status & control */
-			u32 mode;	/* 0xa8: USB controller mode */
-		} host;
-
-		struct {
-			u32 dev_addr; /* 0x54: Device address */
-			u32 endpt_list_addr; /* 0x58: Endpoint list address */
-			u32 reserved0[7];
-			u32 endpt_nak;	/* 0x74 */
-			u32 endpt_nak_ctrl; /* 0x78 */
-			u32 cfg_flag; /* 0x80 */
-			u32 port_sc1; /* 0x84: Port status & control 1 */
-			u32 reserved[7];
-			u32 otgsc;	/* 0xa4: OTG status & control */
-			u32 mode;	/* 0xa8: USB controller mode */
-			u32 endpt_setup_stat; /* 0xac */
-			u32 endpt_prime; /* 0xb0 */
-			u32 endpt_flush; /* 0xb4 */
-			u32 endpt_stat; /* 0xb8 */
-			u32 endpt_complete; /* 0xbc */
-			u32 endpt_ctrl0; /* 0xc0 */
-			u32 endpt_ctrl1; /* 0xc4 */
-			u32 endpt_ctrl2; /* 0xc8 */
-			u32 endpt_ctrl3; /* 0xcc */
-		} device;
-	} u;
-};
-/*
- * Container for the more-generic platform_device.
- * This exists mainly as a way to map the non-standard register
- * spaces and make them accessible to the USB ISR.
- */
-struct mspusb_device {
-	struct msp_mab_regs   __iomem *mab_regs;
-	struct msp_usbid_regs __iomem *usbid_regs;
-	struct msp_usbhs_regs __iomem *usbhs_regs;
-	struct platform_device dev;
-};
-
-#define to_mspusb_device(x) container_of((x), struct mspusb_device, dev)
-#define TO_HOST_ID(x) ((x) & 0x3)
-#endif /*MSP_USB_H_*/
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/war.h b/arch/mips/include/asm/mach-pmcs-msp71xx/war.h
deleted file mode 100644
index 31c546f58bb5..000000000000
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/war.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_PMC_SIERRA_WAR_H
-#define __ASM_MIPS_PMC_SIERRA_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR	0
-#define R4600_V1_HIT_CACHEOP_WAR	0
-#define R4600_V2_HIT_CACHEOP_WAR	0
-#define BCM1250_M3_WAR			0
-#define SIBYTE_1956_WAR			0
-#define MIPS4K_ICACHE_REFILL_WAR	0
-#define MIPS_CACHE_SYNC_WAR		0
-#define TX49XX_ICACHE_INDEX_INV_WAR	0
-#define ICACHE_REFILLS_WORKAROUND_WAR	0
-#define R10000_LLSC_WAR			0
-#if defined(CONFIG_PMC_MSP7120_EVAL) || defined(CONFIG_PMC_MSP7120_GW) || \
-	defined(CONFIG_PMC_MSP7120_FPGA)
-#define MIPS34K_MISSED_ITLB_WAR		1
-#else
-#define MIPS34K_MISSED_ITLB_WAR		0
-#endif
-
-#endif /* __ASM_MIPS_PMC_SIERRA_WAR_H */
diff --git a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h
index 6ea5908f0c11..c4579f1705c2 100644
--- a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h
@@ -45,7 +45,6 @@
 #define cpu_has_64bits		0
 #define cpu_has_64bit_zero_reg	0
 #define cpu_has_64bit_gp_regs	0
-#define cpu_has_64bit_addresses	0
 
 #define cpu_dcache_line_size()	32
 #define cpu_icache_line_size()	32
diff --git a/arch/mips/include/asm/mach-ralink/mt7621.h b/arch/mips/include/asm/mach-ralink/mt7621.h
index 65483a4681ab..e1af1ba50bd8 100644
--- a/arch/mips/include/asm/mach-ralink/mt7621.h
+++ b/arch/mips/include/asm/mach-ralink/mt7621.h
@@ -31,6 +31,4 @@
 #define MT7621_CHIP_NAME0		0x3637544D
 #define MT7621_CHIP_NAME1		0x20203132
 
-#define MIPS_GIC_IRQ_BASE           (MIPS_CPU_IRQ_BASE + 8)
-
 #endif
diff --git a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h
index e06f517b2588..168359a0a58d 100644
--- a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h
@@ -46,7 +46,6 @@
 #define cpu_has_64bits		0
 #define cpu_has_64bit_zero_reg	0
 #define cpu_has_64bit_gp_regs	0
-#define cpu_has_64bit_addresses	0
 
 #define cpu_dcache_line_size()	32
 #define cpu_icache_line_size()	32
diff --git a/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h
index 9c069646d0bd..fdaf8c9182bc 100644
--- a/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h
@@ -44,7 +44,6 @@
 #define cpu_has_64bits		0
 #define cpu_has_64bit_zero_reg	0
 #define cpu_has_64bit_gp_regs	0
-#define cpu_has_64bit_addresses	0
 
 #define cpu_dcache_line_size()	16
 #define cpu_icache_line_size()	16
diff --git a/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h
index 2e423fd15384..7a385fe784a6 100644
--- a/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h
@@ -44,7 +44,6 @@
 #define cpu_has_64bits		0
 #define cpu_has_64bit_zero_reg	0
 #define cpu_has_64bit_gp_regs	0
-#define cpu_has_64bit_addresses	0
 
 #define cpu_dcache_line_size()	32
 #define cpu_icache_line_size()	32
diff --git a/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h
index 7cee0e232580..0a61910f6521 100644
--- a/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h
@@ -43,7 +43,6 @@
 #define cpu_has_64bits		0
 #define cpu_has_64bit_zero_reg	0
 #define cpu_has_64bit_gp_regs	0
-#define cpu_has_64bit_addresses	0
 
 #define cpu_dcache_line_size()	32
 #define cpu_icache_line_size()	32
diff --git a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h
index bc46179fdf40..8539ccfb69b7 100644
--- a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h
@@ -54,7 +54,6 @@
 #define cpu_has_64bits			0
 #define cpu_has_64bit_zero_reg		0
 #define cpu_has_64bit_gp_regs		0
-#define cpu_has_64bit_addresses		0
 
 #define cpu_has_inclusive_pcaches	0
 
diff --git a/arch/mips/include/asm/mach-rc32434/pci.h b/arch/mips/include/asm/mach-rc32434/pci.h
index 6f40d1515580..9a6eefd12757 100644
--- a/arch/mips/include/asm/mach-rc32434/pci.h
+++ b/arch/mips/include/asm/mach-rc32434/pci.h
@@ -319,9 +319,6 @@ struct pci_msu {
 #define PCIM_H_EA		0x3
 #define PCIM_H_IA_FIX		0x4
 #define PCIM_H_IA_RR		0x5
-#if 0
-#define PCI_ADDR_START		0x13000000
-#endif
 
 #define PCI_ADDR_START		0x50000000
 
diff --git a/arch/mips/include/asm/mach-tx39xx/ioremap.h b/arch/mips/include/asm/mach-tx39xx/ioremap.h
index 077b3c9971f7..157a7292397e 100644
--- a/arch/mips/include/asm/mach-tx39xx/ioremap.h
+++ b/arch/mips/include/asm/mach-tx39xx/ioremap.h
@@ -7,15 +7,6 @@
 
 #include <linux/types.h>
 
-/*
- * Allow physical addresses to be fixed up to help peripherals located
- * outside the low 32-bit range -- generic pass-through version.
- */
-static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
-{
-	return phys_addr;
-}
-
 static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size,
 	unsigned long flags)
 {
diff --git a/arch/mips/include/asm/mach-tx49xx/ioremap.h b/arch/mips/include/asm/mach-tx49xx/ioremap.h
index c6b9e05f44c4..b1f3710acf8e 100644
--- a/arch/mips/include/asm/mach-tx49xx/ioremap.h
+++ b/arch/mips/include/asm/mach-tx49xx/ioremap.h
@@ -7,15 +7,6 @@
 
 #include <linux/types.h>
 
-/*
- * Allow physical addresses to be fixed up to help peripherals located
- * outside the low 32-bit range -- generic pass-through version.
- */
-static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size)
-{
-	return phys_addr;
-}
-
 static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size,
 	unsigned long flags)
 {
diff --git a/arch/mips/include/asm/mach-xilfpga/irq.h b/arch/mips/include/asm/mach-xilfpga/irq.h
deleted file mode 100644
index 15ad29ec1dee..000000000000
--- a/arch/mips/include/asm/mach-xilfpga/irq.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2015 Imagination Technologies
- * Author: Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
- */
-
-#ifndef __MIPS_ASM_MACH_XILFPGA_IRQ_H__
-#define __MIPS_ASM_MACH_XILFPGA_IRQ_H__
-
-#define NR_IRQS 32
-
-#include <asm/mach-generic/irq.h>
-
-#endif /* __MIPS_ASM_MACH_XILFPGA_IRQ_H__ */
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 796fe47cfd17..796dbb86575b 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -468,6 +468,7 @@
 #define EXCCODE_THREAD		25	/* Thread exceptions (MT) */
 #define EXCCODE_DSPDIS		26	/* DSP disabled exception */
 #define EXCCODE_GE		27	/* Virtualized guest exception (VZ) */
+#define EXCCODE_CACHEERR	30	/* Parity/ECC occured on a core */
 
 /* Implementation specific trap codes used by MIPS cores */
 #define MIPS_EXCCODE_TLBPAR	16	/* TLB parity error exception */
@@ -563,6 +564,17 @@
 #define MIPS_CONF_MT_FTLB	(_ULCAST_(4) <<  7)
 #define MIPS_CONF_AR		(_ULCAST_(7) << 10)
 #define MIPS_CONF_AT		(_ULCAST_(3) << 13)
+#define MIPS_CONF_BE		(_ULCAST_(1) << 15)
+#define MIPS_CONF_BM		(_ULCAST_(1) << 16)
+#define MIPS_CONF_MM		(_ULCAST_(3) << 17)
+#define MIPS_CONF_MM_SYSAD	(_ULCAST_(1) << 17)
+#define MIPS_CONF_MM_FULL	(_ULCAST_(2) << 17)
+#define MIPS_CONF_SB		(_ULCAST_(1) << 21)
+#define MIPS_CONF_UDI		(_ULCAST_(1) << 22)
+#define MIPS_CONF_DSP		(_ULCAST_(1) << 23)
+#define MIPS_CONF_ISP		(_ULCAST_(1) << 24)
+#define MIPS_CONF_KU		(_ULCAST_(3) << 25)
+#define MIPS_CONF_K23		(_ULCAST_(3) << 28)
 #define MIPS_CONF_M		(_ULCAST_(1) << 31)
 
 /*
@@ -674,13 +686,38 @@
 #define MIPS_CONF5_CV		(_ULCAST_(1) << 29)
 #define MIPS_CONF5_K		(_ULCAST_(1) << 30)
 
-#define MIPS_CONF6_SYND		(_ULCAST_(1) << 13)
+/* Config6 feature bits for proAptiv/P5600 */
+
+/* Jump register cache prediction disable */
+#define MIPS_CONF6_MTI_JRCD		(_ULCAST_(1) << 0)
+/* MIPSr6 extensions enable */
+#define MIPS_CONF6_MTI_R6		(_ULCAST_(1) << 2)
+/* IFU Performance Control */
+#define MIPS_CONF6_MTI_IFUPERFCTL	(_ULCAST_(3) << 10)
+#define MIPS_CONF6_MTI_SYND		(_ULCAST_(1) << 13)
+/* Sleep state performance counter disable */
+#define MIPS_CONF6_MTI_SPCD		(_ULCAST_(1) << 14)
 /* proAptiv FTLB on/off bit */
-#define MIPS_CONF6_FTLBEN	(_ULCAST_(1) << 15)
-/* Loongson-3 FTLB on/off bit */
-#define MIPS_CONF6_FTLBDIS	(_ULCAST_(1) << 22)
+#define MIPS_CONF6_MTI_FTLBEN		(_ULCAST_(1) << 15)
+/* Disable load/store bonding */
+#define MIPS_CONF6_MTI_DLSB		(_ULCAST_(1) << 21)
 /* FTLB probability bits */
-#define MIPS_CONF6_FTLBP_SHIFT	(16)
+#define MIPS_CONF6_MTI_FTLBP_SHIFT	(16)
+
+/* Config6 feature bits for Loongson-3 */
+
+/* Loongson-3 internal timer bit */
+#define MIPS_CONF6_LOONGSON_INTIMER	(_ULCAST_(1) << 6)
+/* Loongson-3 external timer bit */
+#define MIPS_CONF6_LOONGSON_EXTIMER	(_ULCAST_(1) << 7)
+/* Loongson-3 SFB on/off bit, STFill in manual */
+#define MIPS_CONF6_LOONGSON_SFBEN	(_ULCAST_(1) << 8)
+/* Loongson-3's LL on exclusive cacheline */
+#define MIPS_CONF6_LOONGSON_LLEXC	(_ULCAST_(1) << 16)
+/* Loongson-3's SC has a random delay */
+#define MIPS_CONF6_LOONGSON_SCRAND	(_ULCAST_(1) << 17)
+/* Loongson-3 FTLB on/off bit, VTLBOnly in manual */
+#define MIPS_CONF6_LOONGSON_FTLBDIS	(_ULCAST_(1) << 22)
 
 #define MIPS_CONF7_WII		(_ULCAST_(1) << 31)
 
@@ -753,10 +790,18 @@
 
 /* MAAR bit definitions */
 #define MIPS_MAAR_VH		(_U64CAST_(1) << 63)
-#define MIPS_MAAR_ADDR		((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12)
+#define MIPS_MAAR_ADDR		GENMASK_ULL(55, 12)
 #define MIPS_MAAR_ADDR_SHIFT	12
 #define MIPS_MAAR_S		(_ULCAST_(1) << 1)
 #define MIPS_MAAR_VL		(_ULCAST_(1) << 0)
+#ifdef CONFIG_XPA
+#define MIPS_MAAR_V		(MIPS_MAAR_VH | MIPS_MAAR_VL)
+#else
+#define MIPS_MAAR_V		MIPS_MAAR_VL
+#endif
+#define MIPS_MAARX_VH		(_ULCAST_(1) << 31)
+#define MIPS_MAARX_ADDR		0xF
+#define MIPS_MAARX_ADDR_SHIFT	32
 
 /* MAARI bit definitions */
 #define MIPS_MAARI_INDEX	(_ULCAST_(0x3f) << 0)
@@ -997,6 +1042,8 @@
 #define LOONGSON_DIAG_ITLB	(_ULCAST_(1) << 2)
 /* Flush DTLB */
 #define LOONGSON_DIAG_DTLB	(_ULCAST_(1) << 3)
+/* Allow some CACHE instructions (CACHE0, 1, 3, 21 and 23) in user mode */
+#define LOONGSON_DIAG_UCAC	(_ULCAST_(1) << 8)
 /* Flush VTLB */
 #define LOONGSON_DIAG_VTLB	(_ULCAST_(1) << 12)
 /* Flush FTLB */
@@ -1717,6 +1764,8 @@ do {									\
 #define write_c0_lladdr(val)	__write_ulong_c0_register($17, 0, val)
 #define read_c0_maar()		__read_ulong_c0_register($17, 1)
 #define write_c0_maar(val)	__write_ulong_c0_register($17, 1, val)
+#define readx_c0_maar()		__readx_32bit_c0_register($17, 1)
+#define writex_c0_maar(val)	__writex_32bit_c0_register($17, 1, val)
 #define read_c0_maari()		__read_32bit_c0_register($17, 2)
 #define write_c0_maari(val)	__write_32bit_c0_register($17, 2, val)
 
diff --git a/arch/mips/include/asm/nile4.h b/arch/mips/include/asm/nile4.h
deleted file mode 100644
index 9d36b7823603..000000000000
--- a/arch/mips/include/asm/nile4.h
+++ /dev/null
@@ -1,310 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  asm-mips/nile4.h -- NEC Vrc-5074 Nile 4 definitions
- *
- *  Copyright (C) 2000 Geert Uytterhoeven <geert@linux-m68k.org>
- *		       Sony Software Development Center Europe (SDCE), Brussels
- *
- *  This file is based on the following documentation:
- *
- *	NEC Vrc 5074 System Controller Data Sheet, June 1998
- */
-
-#ifndef _ASM_NILE4_H
-#define _ASM_NILE4_H
-
-#define NILE4_BASE		0xbfa00000
-#define NILE4_SIZE		0x00200000		/* 2 MB */
-
-
-    /*
-     *	Physical Device Address Registers (PDARs)
-     */
-
-#define NILE4_SDRAM0	0x0000	/* SDRAM Bank 0 [R/W] */
-#define NILE4_SDRAM1	0x0008	/* SDRAM Bank 1 [R/W] */
-#define NILE4_DCS2	0x0010	/* Device Chip-Select 2 [R/W] */
-#define NILE4_DCS3	0x0018	/* Device Chip-Select 3 [R/W] */
-#define NILE4_DCS4	0x0020	/* Device Chip-Select 4 [R/W] */
-#define NILE4_DCS5	0x0028	/* Device Chip-Select 5 [R/W] */
-#define NILE4_DCS6	0x0030	/* Device Chip-Select 6 [R/W] */
-#define NILE4_DCS7	0x0038	/* Device Chip-Select 7 [R/W] */
-#define NILE4_DCS8	0x0040	/* Device Chip-Select 8 [R/W] */
-#define NILE4_PCIW0	0x0060	/* PCI Address Window 0 [R/W] */
-#define NILE4_PCIW1	0x0068	/* PCI Address Window 1 [R/W] */
-#define NILE4_INTCS	0x0070	/* Controller Internal Registers and Devices */
-				/* [R/W] */
-#define NILE4_BOOTCS	0x0078	/* Boot ROM Chip-Select [R/W] */
-
-
-    /*
-     *	CPU Interface Registers
-     */
-
-#define NILE4_CPUSTAT	0x0080	/* CPU Status [R/W] */
-#define NILE4_INTCTRL	0x0088	/* Interrupt Control [R/W] */
-#define NILE4_INTSTAT0	0x0090	/* Interrupt Status 0 [R] */
-#define NILE4_INTSTAT1	0x0098	/* Interrupt Status 1 and CPU Interrupt */
-				/* Enable [R/W] */
-#define NILE4_INTCLR	0x00A0	/* Interrupt Clear [R/W] */
-#define NILE4_INTPPES	0x00A8	/* PCI Interrupt Control [R/W] */
-
-
-    /*
-     *	Memory-Interface Registers
-     */
-
-#define NILE4_MEMCTRL	0x00C0	/* Memory Control */
-#define NILE4_ACSTIME	0x00C8	/* Memory Access Timing [R/W] */
-#define NILE4_CHKERR	0x00D0	/* Memory Check Error Status [R] */
-
-
-    /*
-     *	PCI-Bus Registers
-     */
-
-#define NILE4_PCICTRL	0x00E0	/* PCI Control [R/W] */
-#define NILE4_PCIARB	0x00E8	/* PCI Arbiter [R/W] */
-#define NILE4_PCIINIT0	0x00F0	/* PCI Master (Initiator) 0 [R/W] */
-#define NILE4_PCIINIT1	0x00F8	/* PCI Master (Initiator) 1 [R/W] */
-#define NILE4_PCIERR	0x00B8	/* PCI Error [R/W] */
-
-
-    /*
-     *	Local-Bus Registers
-     */
-
-#define NILE4_LCNFG	0x0100	/* Local Bus Configuration [R/W] */
-#define NILE4_LCST2	0x0110	/* Local Bus Chip-Select Timing 2 [R/W] */
-#define NILE4_LCST3	0x0118	/* Local Bus Chip-Select Timing 3 [R/W] */
-#define NILE4_LCST4	0x0120	/* Local Bus Chip-Select Timing 4 [R/W] */
-#define NILE4_LCST5	0x0128	/* Local Bus Chip-Select Timing 5 [R/W] */
-#define NILE4_LCST6	0x0130	/* Local Bus Chip-Select Timing 6 [R/W] */
-#define NILE4_LCST7	0x0138	/* Local Bus Chip-Select Timing 7 [R/W] */
-#define NILE4_LCST8	0x0140	/* Local Bus Chip-Select Timing 8 [R/W] */
-#define NILE4_DCSFN	0x0150	/* Device Chip-Select Muxing and Output */
-				/* Enables [R/W] */
-#define NILE4_DCSIO	0x0158	/* Device Chip-Selects As I/O Bits [R/W] */
-#define NILE4_BCST	0x0178	/* Local Boot Chip-Select Timing [R/W] */
-
-
-    /*
-     *	DMA Registers
-     */
-
-#define NILE4_DMACTRL0	0x0180	/* DMA Control 0 [R/W] */
-#define NILE4_DMASRCA0	0x0188	/* DMA Source Address 0 [R/W] */
-#define NILE4_DMADESA0	0x0190	/* DMA Destination Address 0 [R/W] */
-#define NILE4_DMACTRL1	0x0198	/* DMA Control 1 [R/W] */
-#define NILE4_DMASRCA1	0x01A0	/* DMA Source Address 1 [R/W] */
-#define NILE4_DMADESA1	0x01A8	/* DMA Destination Address 1 [R/W] */
-
-
-    /*
-     *	Timer Registers
-     */
-
-#define NILE4_T0CTRL	0x01C0	/* SDRAM Refresh Control [R/W] */
-#define NILE4_T0CNTR	0x01C8	/* SDRAM Refresh Counter [R/W] */
-#define NILE4_T1CTRL	0x01D0	/* CPU-Bus Read Time-Out Control [R/W] */
-#define NILE4_T1CNTR	0x01D8	/* CPU-Bus Read Time-Out Counter [R/W] */
-#define NILE4_T2CTRL	0x01E0	/* General-Purpose Timer Control [R/W] */
-#define NILE4_T2CNTR	0x01E8	/* General-Purpose Timer Counter [R/W] */
-#define NILE4_T3CTRL	0x01F0	/* Watchdog Timer Control [R/W] */
-#define NILE4_T3CNTR	0x01F8	/* Watchdog Timer Counter [R/W] */
-
-
-    /*
-     *	PCI Configuration Space Registers
-     */
-
-#define NILE4_PCI_BASE	0x0200
-
-#define NILE4_VID	0x0200	/* PCI Vendor ID [R] */
-#define NILE4_DID	0x0202	/* PCI Device ID [R] */
-#define NILE4_PCICMD	0x0204	/* PCI Command [R/W] */
-#define NILE4_PCISTS	0x0206	/* PCI Status [R/W] */
-#define NILE4_REVID	0x0208	/* PCI Revision ID [R] */
-#define NILE4_CLASS	0x0209	/* PCI Class Code [R] */
-#define NILE4_CLSIZ	0x020C	/* PCI Cache Line Size [R/W] */
-#define NILE4_MLTIM	0x020D	/* PCI Latency Timer [R/W] */
-#define NILE4_HTYPE	0x020E	/* PCI Header Type [R] */
-#define NILE4_BIST	0x020F	/* BIST [R] (unimplemented) */
-#define NILE4_BARC	0x0210	/* PCI Base Address Register Control [R/W] */
-#define NILE4_BAR0	0x0218	/* PCI Base Address Register 0 [R/W] */
-#define NILE4_BAR1	0x0220	/* PCI Base Address Register 1 [R/W] */
-#define NILE4_CIS	0x0228	/* PCI Cardbus CIS Pointer [R] */
-				/* (unimplemented) */
-#define NILE4_SSVID	0x022C	/* PCI Sub-System Vendor ID [R/W] */
-#define NILE4_SSID	0x022E	/* PCI Sub-System ID [R/W] */
-#define NILE4_ROM	0x0230	/* Expansion ROM Base Address [R] */
-				/* (unimplemented) */
-#define NILE4_INTLIN	0x023C	/* PCI Interrupt Line [R/W] */
-#define NILE4_INTPIN	0x023D	/* PCI Interrupt Pin [R] */
-#define NILE4_MINGNT	0x023E	/* PCI Min_Gnt [R] (unimplemented) */
-#define NILE4_MAXLAT	0x023F	/* PCI Max_Lat [R] (unimplemented) */
-#define NILE4_BAR2	0x0240	/* PCI Base Address Register 2 [R/W] */
-#define NILE4_BAR3	0x0248	/* PCI Base Address Register 3 [R/W] */
-#define NILE4_BAR4	0x0250	/* PCI Base Address Register 4 [R/W] */
-#define NILE4_BAR5	0x0258	/* PCI Base Address Register 5 [R/W] */
-#define NILE4_BAR6	0x0260	/* PCI Base Address Register 6 [R/W] */
-#define NILE4_BAR7	0x0268	/* PCI Base Address Register 7 [R/W] */
-#define NILE4_BAR8	0x0270	/* PCI Base Address Register 8 [R/W] */
-#define NILE4_BARB	0x0278	/* PCI Base Address Register BOOT [R/W] */
-
-
-    /*
-     *	Serial-Port Registers
-     */
-
-#define NILE4_UART_BASE 0x0300
-
-#define NILE4_UARTRBR	0x0300	/* UART Receiver Data Buffer [R] */
-#define NILE4_UARTTHR	0x0300	/* UART Transmitter Data Holding [W] */
-#define NILE4_UARTIER	0x0308	/* UART Interrupt Enable [R/W] */
-#define NILE4_UARTDLL	0x0300	/* UART Divisor Latch LSB [R/W] */
-#define NILE4_UARTDLM	0x0308	/* UART Divisor Latch MSB [R/W] */
-#define NILE4_UARTIIR	0x0310	/* UART Interrupt ID [R] */
-#define NILE4_UARTFCR	0x0310	/* UART FIFO Control [W] */
-#define NILE4_UARTLCR	0x0318	/* UART Line Control [R/W] */
-#define NILE4_UARTMCR	0x0320	/* UART Modem Control [R/W] */
-#define NILE4_UARTLSR	0x0328	/* UART Line Status [R/W] */
-#define NILE4_UARTMSR	0x0330	/* UART Modem Status [R/W] */
-#define NILE4_UARTSCR	0x0338	/* UART Scratch [R/W] */
-
-#define NILE4_UART_BASE_BAUD	520833	/* 100 MHz / 12 / 16 */
-
-
-    /*
-     *	Interrupt Lines
-     */
-
-#define NILE4_INT_CPCE	0	/* CPU-Interface Parity-Error Interrupt */
-#define NILE4_INT_CNTD	1	/* CPU No-Target Decode Interrupt */
-#define NILE4_INT_MCE	2	/* Memory-Check Error Interrupt */
-#define NILE4_INT_DMA	3	/* DMA Controller Interrupt */
-#define NILE4_INT_UART	4	/* UART Interrupt */
-#define NILE4_INT_WDOG	5	/* Watchdog Timer Interrupt */
-#define NILE4_INT_GPT	6	/* General-Purpose Timer Interrupt */
-#define NILE4_INT_LBRTD 7	/* Local-Bus Ready Timer Interrupt */
-#define NILE4_INT_INTA	8	/* PCI Interrupt Signal INTA# */
-#define NILE4_INT_INTB	9	/* PCI Interrupt Signal INTB# */
-#define NILE4_INT_INTC	10	/* PCI Interrupt Signal INTC# */
-#define NILE4_INT_INTD	11	/* PCI Interrupt Signal INTD# */
-#define NILE4_INT_INTE	12	/* PCI Interrupt Signal INTE# (ISA cascade) */
-#define NILE4_INT_RESV	13	/* Reserved */
-#define NILE4_INT_PCIS	14	/* PCI SERR# Interrupt */
-#define NILE4_INT_PCIE	15	/* PCI Internal Error Interrupt */
-
-
-    /*
-     *	Nile 4 Register Access
-     */
-
-static inline void nile4_sync(void)
-{
-    volatile u32 *p = (volatile u32 *)0xbfc00000;
-    (void)(*p);
-}
-
-static inline void nile4_out32(u32 offset, u32 val)
-{
-    *(volatile u32 *)(NILE4_BASE+offset) = val;
-    nile4_sync();
-}
-
-static inline u32 nile4_in32(u32 offset)
-{
-    u32 val = *(volatile u32 *)(NILE4_BASE+offset);
-    nile4_sync();
-    return val;
-}
-
-static inline void nile4_out16(u32 offset, u16 val)
-{
-    *(volatile u16 *)(NILE4_BASE+offset) = val;
-    nile4_sync();
-}
-
-static inline u16 nile4_in16(u32 offset)
-{
-    u16 val = *(volatile u16 *)(NILE4_BASE+offset);
-    nile4_sync();
-    return val;
-}
-
-static inline void nile4_out8(u32 offset, u8 val)
-{
-    *(volatile u8 *)(NILE4_BASE+offset) = val;
-    nile4_sync();
-}
-
-static inline u8 nile4_in8(u32 offset)
-{
-    u8 val = *(volatile u8 *)(NILE4_BASE+offset);
-    nile4_sync();
-    return val;
-}
-
-
-    /*
-     *	Physical Device Address Registers
-     */
-
-extern void nile4_set_pdar(u32 pdar, u32 phys, u32 size, int width,
-			   int on_memory_bus, int visible);
-
-
-    /*
-     *	PCI Master Registers
-     */
-
-#define NILE4_PCICMD_IACK	0	/* PCI Interrupt Acknowledge */
-#define NILE4_PCICMD_IO		1	/* PCI I/O Space */
-#define NILE4_PCICMD_MEM	3	/* PCI Memory Space */
-#define NILE4_PCICMD_CFG	5	/* PCI Configuration Space */
-
-
-    /*
-     *	PCI Address Spaces
-     *
-     *	Note that these are multiplexed using PCIINIT[01]!
-     */
-
-#define NILE4_PCI_IO_BASE	0xa6000000
-#define NILE4_PCI_MEM_BASE	0xa8000000
-#define NILE4_PCI_CFG_BASE	NILE4_PCI_MEM_BASE
-#define NILE4_PCI_IACK_BASE	NILE4_PCI_IO_BASE
-
-
-extern void nile4_set_pmr(u32 pmr, u32 type, u32 addr);
-
-
-    /*
-     *	Interrupt Programming
-     */
-
-#define NUM_I8259_INTERRUPTS	16
-#define NUM_NILE4_INTERRUPTS	16
-
-#define IRQ_I8259_CASCADE	NILE4_INT_INTE
-#define is_i8259_irq(irq)	((irq) < NUM_I8259_INTERRUPTS)
-#define nile4_to_irq(n)		((n)+NUM_I8259_INTERRUPTS)
-#define irq_to_nile4(n)		((n)-NUM_I8259_INTERRUPTS)
-
-extern void nile4_map_irq(int nile4_irq, int cpu_irq);
-extern void nile4_map_irq_all(int cpu_irq);
-extern void nile4_enable_irq(unsigned int nile4_irq);
-extern void nile4_disable_irq(unsigned int nile4_irq);
-extern void nile4_disable_irq_all(void);
-extern u16 nile4_get_irq_stat(int cpu_irq);
-extern void nile4_enable_irq_output(int cpu_irq);
-extern void nile4_disable_irq_output(int cpu_irq);
-extern void nile4_set_pci_irq_polarity(int pci_irq, int high);
-extern void nile4_set_pci_irq_level_or_edge(int pci_irq, int level);
-extern void nile4_clear_irq(int nile4_irq);
-extern void nile4_clear_irq_mask(u32 mask);
-extern u8 nile4_i8259_iack(void);
-extern void nile4_dump_irq_status(void);	/* Debug */
-
-#endif
diff --git a/arch/mips/include/asm/octeon/cvmx-sli-defs.h b/arch/mips/include/asm/octeon/cvmx-sli-defs.h
index cbc7cdae1c6a..5ef6c38150f5 100644
--- a/arch/mips/include/asm/octeon/cvmx-sli-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-sli-defs.h
@@ -46,7 +46,7 @@ static inline uint64_t CVMX_SLI_PCIE_MSI_RCV_FUNC(void)
 	case OCTEON_CN78XX & OCTEON_FAMILY_MASK:
 		if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X))
 			return 0x0000000000003CB0ull;
-		/* Else, fall through */
+		fallthrough;
 	default:
 		return 0x0000000000023CB0ull;
 	}
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index e2f503fc7a84..6a77bc4a6eec 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -49,7 +49,7 @@ static inline unsigned int page_size_ftlb(unsigned int mmuextdef)
 			return 6;
 		if (PAGE_SIZE > (256 << 10))
 			return 7; /* reserved */
-			/* fall through */
+		fallthrough;
 	case MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT:
 		return (PAGE_SHIFT - 10) / 2;
 	default:
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index f92716cfa4f4..ee5dc0c145b9 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -172,6 +172,8 @@
 
 extern pte_t invalid_pte_table[PTRS_PER_PTE];
 
+#define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
 #ifndef __PAGETABLE_PUD_FOLDED
 /*
  * For 4-level pagetables we defines these ourselves, for 3-level the
@@ -210,8 +212,6 @@ static inline void p4d_clear(p4d_t *p4dp)
 	p4d_val(*p4dp) = (unsigned long)invalid_pud_table;
 }
 
-#define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-
 static inline unsigned long p4d_page_vaddr(p4d_t p4d)
 {
 	return p4d_val(p4d);
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index 4da79b85c179..e26dc41a8a68 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -55,6 +55,9 @@ enum pgtable_bits {
 #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL)
 	_PAGE_SPECIAL_SHIFT,
 #endif
+#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY)
+	_PAGE_SOFT_DIRTY_SHIFT,
+#endif
 };
 
 /*
@@ -84,6 +87,9 @@ enum pgtable_bits {
 #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL)
 	_PAGE_SPECIAL_SHIFT,
 #endif
+#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY)
+	_PAGE_SOFT_DIRTY_SHIFT,
+#endif
 };
 
 #elif defined(CONFIG_CPU_R3K_TLB)
@@ -99,6 +105,9 @@ enum pgtable_bits {
 #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL)
 	_PAGE_SPECIAL_SHIFT,
 #endif
+#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY)
+	_PAGE_SOFT_DIRTY_SHIFT,
+#endif
 
 	/* Used by TLB hardware (placed in EntryLo) */
 	_PAGE_GLOBAL_SHIFT = 8,
@@ -125,7 +134,9 @@ enum pgtable_bits {
 #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL)
 	_PAGE_SPECIAL_SHIFT,
 #endif
-
+#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY)
+	_PAGE_SOFT_DIRTY_SHIFT,
+#endif
 	/* Used by TLB hardware (placed in EntryLo*) */
 #if defined(CONFIG_CPU_HAS_RIXI)
 	_PAGE_NO_EXEC_SHIFT,
@@ -152,6 +163,11 @@ enum pgtable_bits {
 #else
 # define _PAGE_SPECIAL		0
 #endif
+#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY)
+# define _PAGE_SOFT_DIRTY	(1 << _PAGE_SOFT_DIRTY_SHIFT)
+#else
+# define _PAGE_SOFT_DIRTY	0
+#endif
 
 /* Used by TLB hardware (placed in EntryLo*) */
 #if defined(CONFIG_XPA)
@@ -269,6 +285,6 @@ static inline uint64_t pte_to_entrylo(unsigned long pte_val)
 #define __WRITEABLE	(_PAGE_SILENT_WRITE | _PAGE_WRITE | _PAGE_MODIFIED)
 
 #define _PAGE_CHG_MASK	(_PAGE_ACCESSED | _PAGE_MODIFIED |	\
-			 _PFN_MASK | _CACHE_MASK)
+			 _PAGE_SOFT_DIRTY | _PFN_MASK | _CACHE_MASK)
 
 #endif /* _ASM_PGTABLE_BITS_H */
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index f1801e7a4b15..85b39c9fd09e 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -400,7 +400,7 @@ static inline pte_t pte_mkwrite(pte_t pte)
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	pte_val(pte) |= _PAGE_MODIFIED;
+	pte_val(pte) |= _PAGE_MODIFIED | _PAGE_SOFT_DIRTY;
 	if (pte_val(pte) & _PAGE_WRITE)
 		pte_val(pte) |= _PAGE_SILENT_WRITE;
 	return pte;
@@ -414,6 +414,8 @@ static inline pte_t pte_mkyoung(pte_t pte)
 	return pte;
 }
 
+#define pte_sw_mkyoung	pte_mkyoung
+
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)	{ return pte_val(pte) & _PAGE_HUGE; }
 
@@ -423,6 +425,30 @@ static inline pte_t pte_mkhuge(pte_t pte)
 	return pte;
 }
 #endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pte_soft_dirty(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_SOFT_DIRTY;
+}
+#define pte_swp_soft_dirty pte_soft_dirty
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_SOFT_DIRTY;
+	return pte;
+}
+#define pte_swp_mksoft_dirty pte_mksoft_dirty
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	pte_val(pte) &= ~(_PAGE_SOFT_DIRTY);
+	return pte;
+}
+#define pte_swp_clear_soft_dirty pte_clear_soft_dirty
+
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
 #endif
 
 /*
@@ -454,6 +480,31 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
 	return __pgprot(prot);
 }
 
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+						unsigned long address)
+{
+}
+
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+	return pte_val(pte_a) == pte_val(pte_b);
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+					unsigned long address, pte_t *ptep,
+					pte_t entry, int dirty)
+{
+	if (!pte_same(*ptep, entry))
+		set_pte_at(vma->vm_mm, address, ptep, entry);
+	/*
+	 * update_mmu_cache will unconditionally execute, handling both
+	 * the case that the PTE changed and the spurious fault case.
+	 */
+	return true;
+}
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
@@ -481,8 +532,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
-		     (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
+	pte_val(pte) &= _PAGE_CHG_MASK;
+	pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_CHG_MASK;
+	if ((pte_val(pte) & _PAGE_ACCESSED) && !(pte_val(pte) & _PAGE_NO_READ))
+		pte_val(pte) |= _PAGE_SILENT_READ;
+	return pte;
 }
 #endif
 
@@ -497,6 +551,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	__update_tlb(vma, address, pte);
 }
 
+#define	__HAVE_ARCH_UPDATE_MMU_TLB
+#define update_mmu_tlb	update_mmu_cache
+
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 	unsigned long address, pmd_t *pmdp)
 {
@@ -507,20 +564,17 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 
 #define kern_addr_valid(addr)	(1)
 
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-extern int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot);
-
-static inline int io_remap_pfn_range(struct vm_area_struct *vma,
-		unsigned long vaddr,
-		unsigned long pfn,
-		unsigned long size,
-		pgprot_t prot)
-{
-	phys_addr_t phys_addr_high = fixup_bigphys_addr(pfn << PAGE_SHIFT, size);
-	return remap_pfn_range(vma, vaddr, phys_addr_high >> PAGE_SHIFT, size, prot);
-}
+/*
+ * Allow physical addresses to be fixed up to help 36-bit peripherals.
+ */
+#ifdef CONFIG_MIPS_FIXUP_BIGPHYS_ADDR
+phys_addr_t fixup_bigphys_addr(phys_addr_t addr, phys_addr_t size);
+int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long vaddr,
+		unsigned long pfn, unsigned long size, pgprot_t prot);
 #define io_remap_pfn_range io_remap_pfn_range
-#endif
+#else
+#define fixup_bigphys_addr(addr, size)	(addr)
+#endif /* CONFIG_MIPS_FIXUP_BIGPHYS_ADDR */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
@@ -579,7 +633,7 @@ static inline pmd_t pmd_mkclean(pmd_t pmd)
 
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
 {
-	pmd_val(pmd) |= _PAGE_MODIFIED;
+	pmd_val(pmd) |= _PAGE_MODIFIED | _PAGE_SOFT_DIRTY;
 	if (pmd_val(pmd) & _PAGE_WRITE)
 		pmd_val(pmd) |= _PAGE_SILENT_WRITE;
 
@@ -608,6 +662,26 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
 	return pmd;
 }
 
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline int pmd_soft_dirty(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_SOFT_DIRTY);
+}
+
+static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
+{
+	pmd_val(pmd) |= _PAGE_SOFT_DIRTY;
+	return pmd;
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~(_PAGE_SOFT_DIRTY);
+	return pmd;
+}
+
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
 /* Extern to avoid header file madness */
 extern pmd_t mk_pmd(struct page *page, pgprot_t prot);
 
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 7990c1c70471..5d9ff61004ca 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu)
 {
 	extern const struct plat_smp_ops *mp_ops;	/* private */
 
-	mp_ops->send_ipi_mask(cpumask_of(cpu), SMP_CALL_FUNCTION);
+	mp_ops->send_ipi_single(cpu, SMP_CALL_FUNCTION);
 }
 
 static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index 4d6ad907ae54..3e8d2aaf96af 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -424,7 +424,7 @@
 
 		.macro	RESTORE_SP_AND_RET docfi=0
 		RESTORE_SP \docfi
-#ifdef CONFIG_CPU_MIPSR6
+#if defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6)
 		eretnc
 #else
 		.set	push
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 09cbe9042828..0b0a93bf83cd 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -67,11 +67,11 @@ do {									\
 #endif
 
 /*
- * Clear LLBit during context switches on MIPSr6 such that eretnc can be used
+ * Clear LLBit during context switches on MIPSr5+ such that eretnc can be used
  * unconditionally when returning to userland in entry.S.
  */
-#define __clear_r6_hw_ll_bit() do {					\
-	if (cpu_has_mips_r6)						\
+#define __clear_r5_hw_ll_bit() do {					\
+	if (cpu_has_mips_r5 || cpu_has_mips_r6)				\
 		write_c0_lladdr(0);					\
 } while (0)
 
@@ -129,7 +129,7 @@ do {									\
 		}							\
 		clear_c0_status(ST0_CU2);				\
 	}								\
-	__clear_r6_hw_ll_bit();						\
+	__clear_r5_hw_ll_bit();						\
 	__clear_software_ll_bit();					\
 	if (cpu_has_userlocal)						\
 		write_c0_userlocal(task_thread_info(next)->tp_value);	\
diff --git a/arch/mips/include/asm/unaligned-emul.h b/arch/mips/include/asm/unaligned-emul.h
new file mode 100644
index 000000000000..2022b18944b9
--- /dev/null
+++ b/arch/mips/include/asm/unaligned-emul.h
@@ -0,0 +1,779 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_MIPS_UNALIGNED_EMUL_H
+#define _ASM_MIPS_UNALIGNED_EMUL_H
+
+#include <asm/asm.h>
+
+#ifdef __BIG_ENDIAN
+#define  _LoadHW(addr, value, res, type)  \
+do {                                                \
+	__asm__ __volatile__ (".set\tnoat\n"        \
+		"1:\t"type##_lb("%0", "0(%2)")"\n"  \
+		"2:\t"type##_lbu("$1", "1(%2)")"\n\t"\
+		"sll\t%0, 0x8\n\t"                  \
+		"or\t%0, $1\n\t"                    \
+		"li\t%1, 0\n"                       \
+		"3:\t.set\tat\n\t"                  \
+		".insn\n\t"                         \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%1, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=&r" (value), "=r" (res)         \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
+#define  _LoadW(addr, value, res, type)   \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		"1:\t"type##_lwl("%0", "(%2)")"\n"   \
+		"2:\t"type##_lwr("%0", "3(%2)")"\n\t"\
+		"li\t%1, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%1, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=&r" (value), "=r" (res)         \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
+/* For CPUs without lwl instruction */
+#define  _LoadW(addr, value, res, type) \
+do {                                                \
+	__asm__ __volatile__ (			    \
+		".set\tpush\n"			    \
+		".set\tnoat\n\t"		    \
+		"1:"type##_lb("%0", "0(%2)")"\n\t"  \
+		"2:"type##_lbu("$1", "1(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"3:"type##_lbu("$1", "2(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"4:"type##_lbu("$1", "3(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"li\t%1, 0\n"			    \
+		".set\tpop\n"			    \
+		"10:\n\t"			    \
+		".insn\n\t"			    \
+		".section\t.fixup,\"ax\"\n\t"	    \
+		"11:\tli\t%1, %3\n\t"		    \
+		"j\t10b\n\t"			    \
+		".previous\n\t"			    \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 11b\n\t"		    \
+		STR(PTR)"\t2b, 11b\n\t"		    \
+		STR(PTR)"\t3b, 11b\n\t"		    \
+		STR(PTR)"\t4b, 11b\n\t"		    \
+		".previous"			    \
+		: "=&r" (value), "=r" (res)	    \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
+
+#define  _LoadHWU(addr, value, res, type) \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		".set\tnoat\n"                      \
+		"1:\t"type##_lbu("%0", "0(%2)")"\n" \
+		"2:\t"type##_lbu("$1", "1(%2)")"\n\t"\
+		"sll\t%0, 0x8\n\t"                  \
+		"or\t%0, $1\n\t"                    \
+		"li\t%1, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		".set\tat\n\t"                      \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%1, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=&r" (value), "=r" (res)         \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
+#define  _LoadWU(addr, value, res, type)  \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		"1:\t"type##_lwl("%0", "(%2)")"\n"  \
+		"2:\t"type##_lwr("%0", "3(%2)")"\n\t"\
+		"dsll\t%0, %0, 32\n\t"              \
+		"dsrl\t%0, %0, 32\n\t"              \
+		"li\t%1, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		"\t.section\t.fixup,\"ax\"\n\t"     \
+		"4:\tli\t%1, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=&r" (value), "=r" (res)         \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#define  _LoadDW(addr, value, res)  \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		"1:\tldl\t%0, (%2)\n"               \
+		"2:\tldr\t%0, 7(%2)\n\t"            \
+		"li\t%1, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		"\t.section\t.fixup,\"ax\"\n\t"     \
+		"4:\tli\t%1, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=&r" (value), "=r" (res)         \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
+/* For CPUs without lwl and ldl instructions */
+#define  _LoadWU(addr, value, res, type) \
+do {                                                \
+	__asm__ __volatile__ (			    \
+		".set\tpush\n\t"		    \
+		".set\tnoat\n\t"		    \
+		"1:"type##_lbu("%0", "0(%2)")"\n\t" \
+		"2:"type##_lbu("$1", "1(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"3:"type##_lbu("$1", "2(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"4:"type##_lbu("$1", "3(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"li\t%1, 0\n"			    \
+		".set\tpop\n"			    \
+		"10:\n\t"			    \
+		".insn\n\t"			    \
+		".section\t.fixup,\"ax\"\n\t"	    \
+		"11:\tli\t%1, %3\n\t"		    \
+		"j\t10b\n\t"			    \
+		".previous\n\t"			    \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 11b\n\t"		    \
+		STR(PTR)"\t2b, 11b\n\t"		    \
+		STR(PTR)"\t3b, 11b\n\t"		    \
+		STR(PTR)"\t4b, 11b\n\t"		    \
+		".previous"			    \
+		: "=&r" (value), "=r" (res)	    \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#define  _LoadDW(addr, value, res)  \
+do {                                                \
+	__asm__ __volatile__ (			    \
+		".set\tpush\n\t"		    \
+		".set\tnoat\n\t"		    \
+		"1:lb\t%0, 0(%2)\n\t"		    \
+		"2:lbu\t $1, 1(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"3:lbu\t$1, 2(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"4:lbu\t$1, 3(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"5:lbu\t$1, 4(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"6:lbu\t$1, 5(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"7:lbu\t$1, 6(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"8:lbu\t$1, 7(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"li\t%1, 0\n"			    \
+		".set\tpop\n\t"			    \
+		"10:\n\t"			    \
+		".insn\n\t"			    \
+		".section\t.fixup,\"ax\"\n\t"	    \
+		"11:\tli\t%1, %3\n\t"		    \
+		"j\t10b\n\t"			    \
+		".previous\n\t"			    \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 11b\n\t"		    \
+		STR(PTR)"\t2b, 11b\n\t"		    \
+		STR(PTR)"\t3b, 11b\n\t"		    \
+		STR(PTR)"\t4b, 11b\n\t"		    \
+		STR(PTR)"\t5b, 11b\n\t"		    \
+		STR(PTR)"\t6b, 11b\n\t"		    \
+		STR(PTR)"\t7b, 11b\n\t"		    \
+		STR(PTR)"\t8b, 11b\n\t"		    \
+		".previous"			    \
+		: "=&r" (value), "=r" (res)	    \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
+
+
+#define  _StoreHW(addr, value, res, type) \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		".set\tnoat\n"                      \
+		"1:\t"type##_sb("%1", "1(%2)")"\n"  \
+		"srl\t$1, %1, 0x8\n"                \
+		"2:\t"type##_sb("$1", "0(%2)")"\n"  \
+		".set\tat\n\t"                      \
+		"li\t%0, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%0, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"              \
+		STR(PTR)"\t2b, 4b\n\t"              \
+		".previous"                         \
+		: "=r" (res)                        \
+		: "r" (value), "r" (addr), "i" (-EFAULT));\
+} while (0)
+
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
+#define  _StoreW(addr, value, res, type)  \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		"1:\t"type##_swl("%1", "(%2)")"\n"  \
+		"2:\t"type##_swr("%1", "3(%2)")"\n\t"\
+		"li\t%0, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%0, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=r" (res)                                \
+		: "r" (value), "r" (addr), "i" (-EFAULT));  \
+} while (0)
+
+#define  _StoreDW(addr, value, res) \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		"1:\tsdl\t%1,(%2)\n"                \
+		"2:\tsdr\t%1, 7(%2)\n\t"            \
+		"li\t%0, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%0, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=r" (res)                                \
+		: "r" (value), "r" (addr), "i" (-EFAULT));  \
+} while (0)
+
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
+#define  _StoreW(addr, value, res, type)  \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		".set\tpush\n\t"		    \
+		".set\tnoat\n\t"		    \
+		"1:"type##_sb("%1", "3(%2)")"\n\t"  \
+		"srl\t$1, %1, 0x8\n\t"		    \
+		"2:"type##_sb("$1", "2(%2)")"\n\t"  \
+		"srl\t$1, $1,  0x8\n\t"		    \
+		"3:"type##_sb("$1", "1(%2)")"\n\t"  \
+		"srl\t$1, $1, 0x8\n\t"		    \
+		"4:"type##_sb("$1", "0(%2)")"\n\t"  \
+		".set\tpop\n\t"			    \
+		"li\t%0, 0\n"			    \
+		"10:\n\t"			    \
+		".insn\n\t"			    \
+		".section\t.fixup,\"ax\"\n\t"	    \
+		"11:\tli\t%0, %3\n\t"		    \
+		"j\t10b\n\t"			    \
+		".previous\n\t"			    \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 11b\n\t"		    \
+		STR(PTR)"\t2b, 11b\n\t"		    \
+		STR(PTR)"\t3b, 11b\n\t"		    \
+		STR(PTR)"\t4b, 11b\n\t"		    \
+		".previous"			    \
+		: "=&r" (res)				    \
+		: "r" (value), "r" (addr), "i" (-EFAULT)    \
+		: "memory");                                \
+} while (0)
+
+#define  _StoreDW(addr, value, res) \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		".set\tpush\n\t"		    \
+		".set\tnoat\n\t"		    \
+		"1:sb\t%1, 7(%2)\n\t"		    \
+		"dsrl\t$1, %1, 0x8\n\t"		    \
+		"2:sb\t$1, 6(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"3:sb\t$1, 5(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"4:sb\t$1, 4(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"5:sb\t$1, 3(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"6:sb\t$1, 2(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"7:sb\t$1, 1(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"8:sb\t$1, 0(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		".set\tpop\n\t"			    \
+		"li\t%0, 0\n"			    \
+		"10:\n\t"			    \
+		".insn\n\t"			    \
+		".section\t.fixup,\"ax\"\n\t"	    \
+		"11:\tli\t%0, %3\n\t"		    \
+		"j\t10b\n\t"			    \
+		".previous\n\t"			    \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 11b\n\t"		    \
+		STR(PTR)"\t2b, 11b\n\t"		    \
+		STR(PTR)"\t3b, 11b\n\t"		    \
+		STR(PTR)"\t4b, 11b\n\t"		    \
+		STR(PTR)"\t5b, 11b\n\t"		    \
+		STR(PTR)"\t6b, 11b\n\t"		    \
+		STR(PTR)"\t7b, 11b\n\t"		    \
+		STR(PTR)"\t8b, 11b\n\t"		    \
+		".previous"			    \
+		: "=&r" (res)				    \
+		: "r" (value), "r" (addr), "i" (-EFAULT)    \
+		: "memory");                                \
+} while (0)
+
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
+
+#else /* __BIG_ENDIAN */
+
+#define  _LoadHW(addr, value, res, type)  \
+do {                                                \
+	__asm__ __volatile__ (".set\tnoat\n"        \
+		"1:\t"type##_lb("%0", "1(%2)")"\n"  \
+		"2:\t"type##_lbu("$1", "0(%2)")"\n\t"\
+		"sll\t%0, 0x8\n\t"                  \
+		"or\t%0, $1\n\t"                    \
+		"li\t%1, 0\n"                       \
+		"3:\t.set\tat\n\t"                  \
+		".insn\n\t"                         \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%1, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=&r" (value), "=r" (res)         \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
+#define  _LoadW(addr, value, res, type)   \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		"1:\t"type##_lwl("%0", "3(%2)")"\n" \
+		"2:\t"type##_lwr("%0", "(%2)")"\n\t"\
+		"li\t%1, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%1, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=&r" (value), "=r" (res)         \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
+/* For CPUs without lwl instruction */
+#define  _LoadW(addr, value, res, type) \
+do {                                                \
+	__asm__ __volatile__ (			    \
+		".set\tpush\n"			    \
+		".set\tnoat\n\t"		    \
+		"1:"type##_lb("%0", "3(%2)")"\n\t"  \
+		"2:"type##_lbu("$1", "2(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"3:"type##_lbu("$1", "1(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"4:"type##_lbu("$1", "0(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"li\t%1, 0\n"			    \
+		".set\tpop\n"			    \
+		"10:\n\t"			    \
+		".insn\n\t"			    \
+		".section\t.fixup,\"ax\"\n\t"	    \
+		"11:\tli\t%1, %3\n\t"		    \
+		"j\t10b\n\t"			    \
+		".previous\n\t"			    \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 11b\n\t"		    \
+		STR(PTR)"\t2b, 11b\n\t"		    \
+		STR(PTR)"\t3b, 11b\n\t"		    \
+		STR(PTR)"\t4b, 11b\n\t"		    \
+		".previous"			    \
+		: "=&r" (value), "=r" (res)	    \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
+
+
+#define  _LoadHWU(addr, value, res, type) \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		".set\tnoat\n"                      \
+		"1:\t"type##_lbu("%0", "1(%2)")"\n" \
+		"2:\t"type##_lbu("$1", "0(%2)")"\n\t"\
+		"sll\t%0, 0x8\n\t"                  \
+		"or\t%0, $1\n\t"                    \
+		"li\t%1, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		".set\tat\n\t"                      \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%1, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=&r" (value), "=r" (res)         \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
+#define  _LoadWU(addr, value, res, type)  \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		"1:\t"type##_lwl("%0", "3(%2)")"\n" \
+		"2:\t"type##_lwr("%0", "(%2)")"\n\t"\
+		"dsll\t%0, %0, 32\n\t"              \
+		"dsrl\t%0, %0, 32\n\t"              \
+		"li\t%1, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		"\t.section\t.fixup,\"ax\"\n\t"     \
+		"4:\tli\t%1, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=&r" (value), "=r" (res)         \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#define  _LoadDW(addr, value, res)  \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		"1:\tldl\t%0, 7(%2)\n"              \
+		"2:\tldr\t%0, (%2)\n\t"             \
+		"li\t%1, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		"\t.section\t.fixup,\"ax\"\n\t"     \
+		"4:\tli\t%1, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=&r" (value), "=r" (res)         \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
+/* For CPUs without lwl and ldl instructions */
+#define  _LoadWU(addr, value, res, type) \
+do {                                                \
+	__asm__ __volatile__ (			    \
+		".set\tpush\n\t"		    \
+		".set\tnoat\n\t"		    \
+		"1:"type##_lbu("%0", "3(%2)")"\n\t" \
+		"2:"type##_lbu("$1", "2(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"3:"type##_lbu("$1", "1(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"4:"type##_lbu("$1", "0(%2)")"\n\t" \
+		"sll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"li\t%1, 0\n"			    \
+		".set\tpop\n"			    \
+		"10:\n\t"			    \
+		".insn\n\t"			    \
+		".section\t.fixup,\"ax\"\n\t"	    \
+		"11:\tli\t%1, %3\n\t"		    \
+		"j\t10b\n\t"			    \
+		".previous\n\t"			    \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 11b\n\t"		    \
+		STR(PTR)"\t2b, 11b\n\t"		    \
+		STR(PTR)"\t3b, 11b\n\t"		    \
+		STR(PTR)"\t4b, 11b\n\t"		    \
+		".previous"			    \
+		: "=&r" (value), "=r" (res)	    \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+
+#define  _LoadDW(addr, value, res)  \
+do {                                                \
+	__asm__ __volatile__ (			    \
+		".set\tpush\n\t"		    \
+		".set\tnoat\n\t"		    \
+		"1:lb\t%0, 7(%2)\n\t"		    \
+		"2:lbu\t$1, 6(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"3:lbu\t$1, 5(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"4:lbu\t$1, 4(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"5:lbu\t$1, 3(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"6:lbu\t$1, 2(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"7:lbu\t$1, 1(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"8:lbu\t$1, 0(%2)\n\t"		    \
+		"dsll\t%0, 0x8\n\t"		    \
+		"or\t%0, $1\n\t"		    \
+		"li\t%1, 0\n"			    \
+		".set\tpop\n\t"			    \
+		"10:\n\t"			    \
+		".insn\n\t"			    \
+		".section\t.fixup,\"ax\"\n\t"	    \
+		"11:\tli\t%1, %3\n\t"		    \
+		"j\t10b\n\t"			    \
+		".previous\n\t"			    \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 11b\n\t"		    \
+		STR(PTR)"\t2b, 11b\n\t"		    \
+		STR(PTR)"\t3b, 11b\n\t"		    \
+		STR(PTR)"\t4b, 11b\n\t"		    \
+		STR(PTR)"\t5b, 11b\n\t"		    \
+		STR(PTR)"\t6b, 11b\n\t"		    \
+		STR(PTR)"\t7b, 11b\n\t"		    \
+		STR(PTR)"\t8b, 11b\n\t"		    \
+		".previous"			    \
+		: "=&r" (value), "=r" (res)	    \
+		: "r" (addr), "i" (-EFAULT));       \
+} while (0)
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
+
+#define  _StoreHW(addr, value, res, type) \
+do {                                                 \
+	__asm__ __volatile__ (                      \
+		".set\tnoat\n"                      \
+		"1:\t"type##_sb("%1", "0(%2)")"\n"  \
+		"srl\t$1,%1, 0x8\n"                 \
+		"2:\t"type##_sb("$1", "1(%2)")"\n"  \
+		".set\tat\n\t"                      \
+		"li\t%0, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%0, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=r" (res)                        \
+		: "r" (value), "r" (addr), "i" (-EFAULT));\
+} while (0)
+
+#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
+#define  _StoreW(addr, value, res, type)  \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		"1:\t"type##_swl("%1", "3(%2)")"\n" \
+		"2:\t"type##_swr("%1", "(%2)")"\n\t"\
+		"li\t%0, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%0, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=r" (res)                                \
+		: "r" (value), "r" (addr), "i" (-EFAULT));  \
+} while (0)
+
+#define  _StoreDW(addr, value, res) \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		"1:\tsdl\t%1, 7(%2)\n"              \
+		"2:\tsdr\t%1, (%2)\n\t"             \
+		"li\t%0, 0\n"                       \
+		"3:\n\t"                            \
+		".insn\n\t"                         \
+		".section\t.fixup,\"ax\"\n\t"       \
+		"4:\tli\t%0, %3\n\t"                \
+		"j\t3b\n\t"                         \
+		".previous\n\t"                     \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 4b\n\t"               \
+		STR(PTR)"\t2b, 4b\n\t"               \
+		".previous"                         \
+		: "=r" (res)                                \
+		: "r" (value), "r" (addr), "i" (-EFAULT));  \
+} while (0)
+
+#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
+/* For CPUs without swl and sdl instructions */
+#define  _StoreW(addr, value, res, type)  \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		".set\tpush\n\t"		    \
+		".set\tnoat\n\t"		    \
+		"1:"type##_sb("%1", "0(%2)")"\n\t"  \
+		"srl\t$1, %1, 0x8\n\t"		    \
+		"2:"type##_sb("$1", "1(%2)")"\n\t"  \
+		"srl\t$1, $1,  0x8\n\t"		    \
+		"3:"type##_sb("$1", "2(%2)")"\n\t"  \
+		"srl\t$1, $1, 0x8\n\t"		    \
+		"4:"type##_sb("$1", "3(%2)")"\n\t"  \
+		".set\tpop\n\t"			    \
+		"li\t%0, 0\n"			    \
+		"10:\n\t"			    \
+		".insn\n\t"			    \
+		".section\t.fixup,\"ax\"\n\t"	    \
+		"11:\tli\t%0, %3\n\t"		    \
+		"j\t10b\n\t"			    \
+		".previous\n\t"			    \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 11b\n\t"		    \
+		STR(PTR)"\t2b, 11b\n\t"		    \
+		STR(PTR)"\t3b, 11b\n\t"		    \
+		STR(PTR)"\t4b, 11b\n\t"		    \
+		".previous"			    \
+		: "=&r" (res)				    \
+		: "r" (value), "r" (addr), "i" (-EFAULT)    \
+		: "memory");                                \
+} while (0)
+
+#define  _StoreDW(addr, value, res) \
+do {                                                \
+	__asm__ __volatile__ (                      \
+		".set\tpush\n\t"		    \
+		".set\tnoat\n\t"		    \
+		"1:sb\t%1, 0(%2)\n\t"		    \
+		"dsrl\t$1, %1, 0x8\n\t"		    \
+		"2:sb\t$1, 1(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"3:sb\t$1, 2(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"4:sb\t$1, 3(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"5:sb\t$1, 4(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"6:sb\t$1, 5(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"7:sb\t$1, 6(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		"8:sb\t$1, 7(%2)\n\t"		    \
+		"dsrl\t$1, $1, 0x8\n\t"		    \
+		".set\tpop\n\t"			    \
+		"li\t%0, 0\n"			    \
+		"10:\n\t"			    \
+		".insn\n\t"			    \
+		".section\t.fixup,\"ax\"\n\t"	    \
+		"11:\tli\t%0, %3\n\t"		    \
+		"j\t10b\n\t"			    \
+		".previous\n\t"			    \
+		".section\t__ex_table,\"a\"\n\t"    \
+		STR(PTR)"\t1b, 11b\n\t"		    \
+		STR(PTR)"\t2b, 11b\n\t"		    \
+		STR(PTR)"\t3b, 11b\n\t"		    \
+		STR(PTR)"\t4b, 11b\n\t"		    \
+		STR(PTR)"\t5b, 11b\n\t"		    \
+		STR(PTR)"\t6b, 11b\n\t"		    \
+		STR(PTR)"\t7b, 11b\n\t"		    \
+		STR(PTR)"\t8b, 11b\n\t"		    \
+		".previous"			    \
+		: "=&r" (res)				    \
+		: "r" (value), "r" (addr), "i" (-EFAULT)    \
+		: "memory");                                \
+} while (0)
+
+#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
+#endif
+
+#define LoadHWU(addr, value, res)	_LoadHWU(addr, value, res, kernel)
+#define LoadHWUE(addr, value, res)	_LoadHWU(addr, value, res, user)
+#define LoadWU(addr, value, res)	_LoadWU(addr, value, res, kernel)
+#define LoadWUE(addr, value, res)	_LoadWU(addr, value, res, user)
+#define LoadHW(addr, value, res)	_LoadHW(addr, value, res, kernel)
+#define LoadHWE(addr, value, res)	_LoadHW(addr, value, res, user)
+#define LoadW(addr, value, res)		_LoadW(addr, value, res, kernel)
+#define LoadWE(addr, value, res)	_LoadW(addr, value, res, user)
+#define LoadDW(addr, value, res)	_LoadDW(addr, value, res)
+
+#define StoreHW(addr, value, res)	_StoreHW(addr, value, res, kernel)
+#define StoreHWE(addr, value, res)	_StoreHW(addr, value, res, user)
+#define StoreW(addr, value, res)	_StoreW(addr, value, res, kernel)
+#define StoreWE(addr, value, res)	_StoreW(addr, value, res, user)
+#define StoreDW(addr, value, res)	_StoreDW(addr, value, res)
+
+#endif /* _ASM_MIPS_UNALIGNED_EMUL_H */
diff --git a/arch/mips/include/asm/vermagic.h b/arch/mips/include/asm/vermagic.h
index 24dc3d35161c..4d2dae0c7c57 100644
--- a/arch/mips/include/asm/vermagic.h
+++ b/arch/mips/include/asm/vermagic.h
@@ -8,12 +8,16 @@
 #define MODULE_PROC_FAMILY "MIPS32_R1 "
 #elif defined CONFIG_CPU_MIPS32_R2
 #define MODULE_PROC_FAMILY "MIPS32_R2 "
+#elif defined CONFIG_CPU_MIPS32_R5
+#define MODULE_PROC_FAMILY "MIPS32_R5 "
 #elif defined CONFIG_CPU_MIPS32_R6
 #define MODULE_PROC_FAMILY "MIPS32_R6 "
 #elif defined CONFIG_CPU_MIPS64_R1
 #define MODULE_PROC_FAMILY "MIPS64_R1 "
 #elif defined CONFIG_CPU_MIPS64_R2
 #define MODULE_PROC_FAMILY "MIPS64_R2 "
+#elif defined CONFIG_CPU_MIPS64_R5
+#define MODULE_PROC_FAMILY "MIPS64_R5 "
 #elif defined CONFIG_CPU_MIPS64_R6
 #define MODULE_PROC_FAMILY "MIPS64_R6 "
 #elif defined CONFIG_CPU_R3000
@@ -46,6 +50,8 @@
 #define MODULE_PROC_FAMILY "LOONGSON64 "
 #elif defined CONFIG_CPU_CAVIUM_OCTEON
 #define MODULE_PROC_FAMILY "OCTEON "
+#elif defined CONFIG_CPU_P5600
+#define MODULE_PROC_FAMILY "P5600 "
 #elif defined CONFIG_CPU_XLR
 #define MODULE_PROC_FAMILY "XLR "
 #elif defined CONFIG_CPU_XLP
diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h
index 1ade1daa4921..b7e02bdc1985 100644
--- a/arch/mips/include/uapi/asm/hwcap.h
+++ b/arch/mips/include/uapi/asm/hwcap.h
@@ -17,5 +17,6 @@
 #define HWCAP_LOONGSON_MMI  (1 << 11)
 #define HWCAP_LOONGSON_EXT  (1 << 12)
 #define HWCAP_LOONGSON_EXT2 (1 << 13)
+#define HWCAP_LOONGSON_CPUCFG (1 << 14)
 
 #endif /* _UAPI_ASM_HWCAP_H */
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index eaa3a80affdf..98f97c85e059 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -989,6 +989,30 @@ struct mm16_r5_format {		/* Load/store from stack pointer format */
 };
 
 /*
+ * Loongson-3 overridden COP2 instruction formats (32-bit length)
+ */
+struct loongson3_lswc2_format {	/* Loongson-3 overridden lwc2/swc2 Load/Store format */
+	__BITFIELD_FIELD(unsigned int opcode : 6,
+	__BITFIELD_FIELD(unsigned int base : 5,
+	__BITFIELD_FIELD(unsigned int rt : 5,
+	__BITFIELD_FIELD(unsigned int fr : 1,
+	__BITFIELD_FIELD(unsigned int offset : 9,
+	__BITFIELD_FIELD(unsigned int ls : 1,
+	__BITFIELD_FIELD(unsigned int rq : 5,
+	;)))))))
+};
+
+struct loongson3_lsdc2_format {	/* Loongson-3 overridden ldc2/sdc2 Load/Store format */
+	__BITFIELD_FIELD(unsigned int opcode : 6,
+	__BITFIELD_FIELD(unsigned int base : 5,
+	__BITFIELD_FIELD(unsigned int rt : 5,
+	__BITFIELD_FIELD(unsigned int index : 5,
+	__BITFIELD_FIELD(unsigned int offset : 8,
+	__BITFIELD_FIELD(unsigned int opcode1 : 3,
+	;))))))
+};
+
+/*
  * MIPS16e instruction formats (16-bit length)
  */
 struct m16e_rr {
@@ -1088,6 +1112,8 @@ union mips_instruction {
 	struct mm16_rb_format mm16_rb_format;
 	struct mm16_r3_format mm16_r3_format;
 	struct mm16_r5_format mm16_r5_format;
+	struct loongson3_lswc2_format loongson3_lswc2_format;
+	struct loongson3_lsdc2_format loongson3_lsdc2_format;
 };
 
 union mips16e_instruction {
diff --git a/arch/mips/jazz/Platform b/arch/mips/jazz/Platform
index 3373788acca1..eb0490ae8b09 100644
--- a/arch/mips/jazz/Platform
+++ b/arch/mips/jazz/Platform
@@ -1,6 +1,5 @@
 #
 # Acer PICA 61, Mips Magnum 4000 and Olivetti M700.
 #
-platform-$(CONFIG_MACH_JAZZ)	+= jazz/
 cflags-$(CONFIG_MACH_JAZZ)	+= -I$(srctree)/arch/mips/include/asm/mach-jazz
 load-$(CONFIG_MACH_JAZZ)	+= 0xffffffff80080000
diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/jz4740/Kconfig
index 412d2faa3cdf..9c2e8c15bb97 100644
--- a/arch/mips/jz4740/Kconfig
+++ b/arch/mips/jz4740/Kconfig
@@ -2,7 +2,14 @@
 choice
 	prompt "Machine type"
 	depends on MACH_INGENIC
-	default JZ4740_QI_LB60
+	default INGENIC_GENERIC_BOARD
+
+config INGENIC_GENERIC_BOARD
+	bool "Generic board"
+	select MACH_JZ4740
+	select MACH_JZ4770
+	select MACH_JZ4780
+	select MACH_X1000
 
 config JZ4740_QI_LB60
 	bool "Qi Hardware Ben NanoNote"
diff --git a/arch/mips/jz4740/Makefile b/arch/mips/jz4740/Makefile
index 6de14c0deb4e..f96c0f5eca44 100644
--- a/arch/mips/jz4740/Makefile
+++ b/arch/mips/jz4740/Makefile
@@ -4,11 +4,6 @@
 #
 
 # Object file lists.
-
-obj-y += prom.o time.o reset.o setup.o timer.o
+obj-y += setup.o
 
 CFLAGS_setup.o = -I$(src)/../../../scripts/dtc/libfdt
-
-# PM support
-
-obj-$(CONFIG_PM) += pm.o
diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform
index a2a5a85ea1f9..bd35d0621b13 100644
--- a/arch/mips/jz4740/Platform
+++ b/arch/mips/jz4740/Platform
@@ -1,4 +1,3 @@
-platform-$(CONFIG_MACH_INGENIC)	+= jz4740/
 cflags-$(CONFIG_MACH_INGENIC)	+= -I$(srctree)/arch/mips/include/asm/mach-jz4740
 load-$(CONFIG_MACH_INGENIC)	+= 0xffffffff80010000
 zload-$(CONFIG_MACH_INGENIC)	+= 0xffffffff81000000
diff --git a/arch/mips/jz4740/pm.c b/arch/mips/jz4740/pm.c
deleted file mode 100644
index f9b551f01f42..000000000000
--- a/arch/mips/jz4740/pm.c
+++ /dev/null
@@ -1,34 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
- *	JZ4740 SoC power management support
- */
-
-#include <linux/init.h>
-#include <linux/pm.h>
-#include <linux/delay.h>
-#include <linux/suspend.h>
-
-static int jz4740_pm_enter(suspend_state_t state)
-{
-	__asm__(".set\tmips3\n\t"
-		"wait\n\t"
-		".set\tmips0");
-
-
-
-	return 0;
-}
-
-static const struct platform_suspend_ops jz4740_pm_ops = {
-	.valid		= suspend_valid_only_mem,
-	.enter		= jz4740_pm_enter,
-};
-
-static int __init jz4740_pm_init(void)
-{
-	suspend_set_ops(&jz4740_pm_ops);
-	return 0;
-
-}
-late_initcall(jz4740_pm_init);
diff --git a/arch/mips/jz4740/prom.c b/arch/mips/jz4740/prom.c
deleted file mode 100644
index ff4555c3fb15..000000000000
--- a/arch/mips/jz4740/prom.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 SoC prom code
- */
-
-#include <linux/init.h>
-
-#include <asm/bootinfo.h>
-#include <asm/fw/fw.h>
-
-void __init prom_init(void)
-{
-	fw_init_cmdline();
-}
-
-void __init prom_free_prom_memory(void)
-{
-}
diff --git a/arch/mips/jz4740/reset.c b/arch/mips/jz4740/reset.c
deleted file mode 100644
index 1f9f02e54085..000000000000
--- a/arch/mips/jz4740/reset.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
- */
-
-#include <asm/reboot.h>
-
-#include "reset.h"
-
-static void jz4740_halt(void)
-{
-	while (1) {
-		__asm__(".set push;\n"
-			".set mips3;\n"
-			"wait;\n"
-			".set pop;\n"
-		);
-	}
-}
-
-void jz4740_reset_init(void)
-{
-	_machine_halt = jz4740_halt;
-}
diff --git a/arch/mips/jz4740/reset.h b/arch/mips/jz4740/reset.h
deleted file mode 100644
index 4e8746ee9b61..000000000000
--- a/arch/mips/jz4740/reset.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __MIPS_JZ4740_RESET_H__
-#define __MIPS_JZ4740_RESET_H__
-
-extern void jz4740_reset_init(void);
-
-#endif
diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c
index 880c26857aff..61468a87775c 100644
--- a/arch/mips/jz4740/setup.c
+++ b/arch/mips/jz4740/setup.c
@@ -5,17 +5,22 @@
  *  JZ4740 setup code
  */
 
+#include <linux/clocksource.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/irqchip.h>
 #include <linux/kernel.h>
 #include <linux/libfdt.h>
+#include <linux/of_clk.h>
 #include <linux/of_fdt.h>
+#include <linux/pm.h>
+#include <linux/suspend.h>
 
 #include <asm/bootinfo.h>
+#include <asm/fw/fw.h>
 #include <asm/prom.h>
-
-#include "reset.h"
+#include <asm/reboot.h>
+#include <asm/time.h>
 
 #define JZ4740_EMC_BASE_ADDR 0x13010000
 
@@ -61,8 +66,6 @@ void __init plat_mem_setup(void)
 	int offset;
 	void *dtb;
 
-	jz4740_reset_init();
-
 	if (__dtb_start != __dtb_end)
 		dtb = __dtb_start;
 	else
@@ -105,3 +108,56 @@ void __init arch_init_irq(void)
 {
 	irqchip_init();
 }
+
+void __init plat_time_init(void)
+{
+	of_clk_init(NULL);
+	timer_probe();
+}
+
+void __init prom_init(void)
+{
+	fw_init_cmdline();
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
+
+static void jz4740_wait_instr(void)
+{
+	__asm__(".set push;\n"
+		".set mips3;\n"
+		"wait;\n"
+		".set pop;\n"
+	);
+}
+
+static void jz4740_halt(void)
+{
+	for (;;)
+		jz4740_wait_instr();
+}
+
+static int __maybe_unused jz4740_pm_enter(suspend_state_t state)
+{
+	jz4740_wait_instr();
+
+	return 0;
+}
+
+static const struct platform_suspend_ops jz4740_pm_ops __maybe_unused = {
+	.valid = suspend_valid_only_mem,
+	.enter = jz4740_pm_enter,
+};
+
+static int __init jz4740_pm_init(void)
+{
+	if (IS_ENABLED(CONFIG_PM_SLEEP))
+		suspend_set_ops(&jz4740_pm_ops);
+	_machine_halt = jz4740_halt;
+
+	return 0;
+
+}
+late_initcall(jz4740_pm_init);
diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c
deleted file mode 100644
index 605a84a250bf..000000000000
--- a/arch/mips/jz4740/time.c
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 platform time support
- */
-
-#include <linux/clocksource.h>
-#include <linux/of_clk.h>
-
-#include <asm/mach-jz4740/timer.h>
-
-void __init plat_time_init(void)
-{
-	of_clk_init(NULL);
-	jz4740_timer_init();
-	timer_probe();
-}
diff --git a/arch/mips/jz4740/timer.c b/arch/mips/jz4740/timer.c
deleted file mode 100644
index 5c9f82de6a82..000000000000
--- a/arch/mips/jz4740/timer.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 platform timer support
- */
-
-#include <linux/export.h>
-#include <linux/io.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-#include <asm/mach-jz4740/base.h>
-#include <asm/mach-jz4740/timer.h>
-
-void __iomem *jz4740_timer_base;
-EXPORT_SYMBOL_GPL(jz4740_timer_base);
-
-void jz4740_timer_enable_watchdog(void)
-{
-	writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR);
-}
-EXPORT_SYMBOL_GPL(jz4740_timer_enable_watchdog);
-
-void jz4740_timer_disable_watchdog(void)
-{
-	writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_SET);
-}
-EXPORT_SYMBOL_GPL(jz4740_timer_disable_watchdog);
-
-void __init jz4740_timer_init(void)
-{
-	jz4740_timer_base = ioremap(JZ4740_TCU_BASE_ADDR, 0x100);
-
-	if (!jz4740_timer_base)
-		panic("Failed to ioremap timer registers");
-
-	/* Disable all timer clocks except for those used as system timers */
-	writel(0x000100fc, jz4740_timer_base + JZ_REG_TIMER_STOP_SET);
-
-	/* Timer irqs are unmasked by default, mask them */
-	writel(0x00ff00ff, jz4740_timer_base + JZ_REG_TIMER_MASK_SET);
-}
diff --git a/arch/mips/kernel/8250-platform.c b/arch/mips/kernel/8250-platform.c
deleted file mode 100644
index 5c6b2ab1f56e..000000000000
--- a/arch/mips/kernel/8250-platform.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
- */
-#include <linux/init.h>
-#include <linux/serial_8250.h>
-
-#define PORT(base, int)							\
-{									\
-	.iobase		= base,						\
-	.irq		= int,						\
-	.uartclk	= 1843200,					\
-	.iotype		= UPIO_PORT,					\
-	.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,		\
-	.regshift	= 0,						\
-}
-
-static struct plat_serial8250_port uart8250_data[] = {
-	PORT(0x3F8, 4),
-	PORT(0x2F8, 3),
-	PORT(0x3E8, 4),
-	PORT(0x2E8, 3),
-	{ },
-};
-
-static struct platform_device uart8250_device = {
-	.name			= "serial8250",
-	.id			= PLAT8250_DEV_PLATFORM,
-	.dev			= {
-		.platform_data	= uart8250_data,
-	},
-};
-
-static int __init uart8250_init(void)
-{
-	return platform_device_register(&uart8250_device);
-}
-
-module_init(uart8250_init);
-
-MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Generic 8250 UART probe driver");
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index d6e97df51cfb..8c7a043295ed 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -98,8 +98,6 @@ obj-$(CONFIG_MIPSR2_TO_R6_EMULATOR)	+= mips-r2-to-r6-emul.o
 
 CFLAGS_cpu-bugs64.o	= $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -x c /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi)
 
-obj-$(CONFIG_HAVE_STD_PC_SERIAL_PORT)	+= 8250-platform.o
-
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o
 obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event_mipsxx.o
 
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 2c38f75d87ff..fb3e203698ea 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -90,7 +90,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 			regs->regs[31] = regs->cp0_epc +
 				dec_insn.pc_inc +
 				dec_insn.next_pc_inc;
-			/* Fall through */
+			fallthrough;
 		case mm_bltz_op:
 			if ((long)regs->regs[insn.mm_i_format.rs] < 0)
 				*contpc = regs->cp0_epc +
@@ -106,7 +106,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 			regs->regs[31] = regs->cp0_epc +
 					dec_insn.pc_inc +
 					dec_insn.next_pc_inc;
-			/* Fall through */
+			fallthrough;
 		case mm_bgez_op:
 			if ((long)regs->regs[insn.mm_i_format.rs] >= 0)
 				*contpc = regs->cp0_epc +
@@ -144,7 +144,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 			unsigned int bit;
 
 			bc_false = 1;
-			/* Fall through */
+			fallthrough;
 		case mm_bc2t_op:
 		case mm_bc1t_op:
 			preempt_disable();
@@ -178,7 +178,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 		case mm_jalrs16_op:
 			regs->regs[31] = regs->cp0_epc +
 				dec_insn.pc_inc + dec_insn.next_pc_inc;
-			/* Fall through */
+			fallthrough;
 		case mm_jr16_op:
 			*contpc = regs->regs[insn.mm_i_format.rs];
 			return 1;
@@ -239,7 +239,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 	case mm_jal32_op:
 		regs->regs[31] = regs->cp0_epc +
 			dec_insn.pc_inc + dec_insn.next_pc_inc;
-		/* Fall through */
+		fallthrough;
 	case mm_j32_op:
 		*contpc = regs->cp0_epc + dec_insn.pc_inc;
 		*contpc >>= 27;
@@ -432,7 +432,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		switch (insn.r_format.func) {
 		case jalr_op:
 			regs->regs[insn.r_format.rd] = epc + 8;
-			/* Fall through */
+			fallthrough;
 		case jr_op:
 			if (NO_R6EMU && insn.r_format.func == jr_op)
 				goto sigill_r2r6;
@@ -451,7 +451,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		case bltzl_op:
 			if (NO_R6EMU)
 				goto sigill_r2r6;
-			/* fall through */
+			fallthrough;
 		case bltz_op:
 			if ((long)regs->regs[insn.i_format.rs] < 0) {
 				epc = epc + 4 + (insn.i_format.simmediate << 2);
@@ -465,7 +465,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		case bgezl_op:
 			if (NO_R6EMU)
 				goto sigill_r2r6;
-			/* fall through */
+			fallthrough;
 		case bgez_op:
 			if ((long)regs->regs[insn.i_format.rs] >= 0) {
 				epc = epc + 4 + (insn.i_format.simmediate << 2);
@@ -561,7 +561,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	case jalx_op:
 	case jal_op:
 		regs->regs[31] = regs->cp0_epc + 8;
-		/* fall through */
+		fallthrough;
 	case j_op:
 		epc += 4;
 		epc >>= 28;
@@ -578,7 +578,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	case beql_op:
 		if (NO_R6EMU)
 			goto sigill_r2r6;
-		/* fall through */
+		fallthrough;
 	case beq_op:
 		if (regs->regs[insn.i_format.rs] ==
 		    regs->regs[insn.i_format.rt]) {
@@ -593,7 +593,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	case bnel_op:
 		if (NO_R6EMU)
 			goto sigill_r2r6;
-		/* fall through */
+		fallthrough;
 	case bne_op:
 		if (regs->regs[insn.i_format.rs] !=
 		    regs->regs[insn.i_format.rt]) {
@@ -608,7 +608,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	case blezl_op: /* not really i_format */
 		if (!insn.i_format.rt && NO_R6EMU)
 			goto sigill_r2r6;
-		/* fall through */
+		fallthrough;
 	case blez_op:
 		/*
 		 * Compact branches for R6 for the
@@ -644,7 +644,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	case bgtzl_op:
 		if (!insn.i_format.rt && NO_R6EMU)
 			goto sigill_r2r6;
-		/* fall through */
+		fallthrough;
 	case bgtz_op:
 		/*
 		 * Compact branches for R6 for the
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 17a9cbb8b3df..995ad9e69ded 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -8,6 +8,7 @@
  */
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
+#include <linux/cpufreq.h>
 #include <linux/percpu.h>
 #include <linux/smp.h>
 #include <linux/irq.h>
@@ -250,6 +251,49 @@ unsigned int __weak get_c0_compare_int(void)
 	return MIPS_CPU_IRQ_BASE + cp0_compare_irq;
 }
 
+#ifdef CONFIG_CPU_FREQ
+
+static unsigned long mips_ref_freq;
+
+static int r4k_cpufreq_callback(struct notifier_block *nb,
+				unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	struct clock_event_device *cd;
+	unsigned long rate;
+	int cpu;
+
+	if (!mips_ref_freq)
+		mips_ref_freq = freq->old;
+
+	if (val == CPUFREQ_POSTCHANGE) {
+		rate = cpufreq_scale(mips_hpt_frequency, mips_ref_freq,
+				     freq->new);
+
+		for_each_cpu(cpu, freq->policy->cpus) {
+			cd = &per_cpu(mips_clockevent_device, cpu);
+
+			clockevents_update_freq(cd, rate);
+		}
+	}
+
+	return 0;
+}
+
+static struct notifier_block r4k_cpufreq_notifier = {
+	.notifier_call  = r4k_cpufreq_callback,
+};
+
+static int __init r4k_register_cpufreq_notifier(void)
+{
+	return cpufreq_register_notifier(&r4k_cpufreq_notifier,
+					 CPUFREQ_TRANSITION_NOTIFIER);
+
+}
+core_initcall(r4k_register_cpufreq_notifier);
+
+#endif /* !CONFIG_CPU_FREQ */
+
 int r4k_clockevent_init(void)
 {
 	unsigned long flags = IRQF_PERCPU | IRQF_TIMER | IRQF_SHARED;
diff --git a/arch/mips/kernel/cps-vec-ns16550.S b/arch/mips/kernel/cps-vec-ns16550.S
index d5a67b4ce9f6..30725e1df987 100644
--- a/arch/mips/kernel/cps-vec-ns16550.S
+++ b/arch/mips/kernel/cps-vec-ns16550.S
@@ -14,16 +14,30 @@
 #define UART_TX_OFS	(UART_TX << CONFIG_MIPS_CPS_NS16550_SHIFT)
 #define UART_LSR_OFS	(UART_LSR << CONFIG_MIPS_CPS_NS16550_SHIFT)
 
+#if CONFIG_MIPS_CPS_NS16550_WIDTH == 1
+# define UART_L		lb
+# define UART_S		sb
+#elif CONFIG_MIPS_CPS_NS16550_WIDTH == 2
+# define UART_L		lh
+# define UART_S		sh
+#elif CONFIG_MIPS_CPS_NS16550_WIDTH == 4
+# define UART_L		lw
+# define UART_S		sw
+#else
+# define UART_L		lb
+# define UART_S		sb
+#endif
+
 /**
  * _mips_cps_putc() - write a character to the UART
  * @a0: ASCII character to write
  * @t9: UART base address
  */
 LEAF(_mips_cps_putc)
-1:	lw		t0, UART_LSR_OFS(t9)
+1:	UART_L		t0, UART_LSR_OFS(t9)
 	andi		t0, t0, UART_LSR_TEMT
 	beqz		t0, 1b
-	sb		a0, UART_TX_OFS(t9)
+	UART_S		a0, UART_TX_OFS(t9)
 	jr		ra
 	END(_mips_cps_putc)
 
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index f21a2304401f..6b93162d7c5a 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -28,6 +28,8 @@
 #include <asm/spram.h>
 #include <linux/uaccess.h>
 
+#include <asm/mach-loongson64/cpucfg-emul.h>
+
 /* Hardware capabilities */
 unsigned int elf_hwcap __read_mostly;
 EXPORT_SYMBOL_GPL(elf_hwcap);
@@ -92,6 +94,7 @@ static void cpu_set_fpu_2008(struct cpuinfo_mips *c)
 {
 	if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
 			    MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+			    MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
 			    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
 		unsigned long sr, fir, fcsr, fcsr0, fcsr1;
 
@@ -172,6 +175,7 @@ static void cpu_set_nofpu_2008(struct cpuinfo_mips *c)
 	case STRICT:
 		if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
 				    MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+				    MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
 				    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
 			c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
 		} else {
@@ -263,9 +267,11 @@ static void cpu_set_nofpu_id(struct cpuinfo_mips *c)
 	value = 0;
 	if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
 			    MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+			    MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
 			    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
 		value |= MIPS_FPIR_D | MIPS_FPIR_S;
 	if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+			    MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
 			    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
 		value |= MIPS_FPIR_F64 | MIPS_FPIR_L | MIPS_FPIR_W;
 	if (c->options & MIPS_CPU_NAN_2008)
@@ -286,6 +292,7 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
 
 	if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
 			    MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+			    MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
 			    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
 		if (c->fpu_id & MIPS_FPIR_3D)
 			c->ases |= MIPS_ASE_MIPS3D;
@@ -532,22 +539,26 @@ static inline void cpu_probe_vmbits(struct cpuinfo_mips *c)
 static void set_isa(struct cpuinfo_mips *c, unsigned int isa)
 {
 	switch (isa) {
+	case MIPS_CPU_ISA_M64R5:
+		c->isa_level |= MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5;
+		set_elf_base_platform("mips64r5");
+		fallthrough;
 	case MIPS_CPU_ISA_M64R2:
 		c->isa_level |= MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2;
 		set_elf_base_platform("mips64r2");
-		/* fall through */
+		fallthrough;
 	case MIPS_CPU_ISA_M64R1:
 		c->isa_level |= MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1;
 		set_elf_base_platform("mips64");
-		/* fall through */
+		fallthrough;
 	case MIPS_CPU_ISA_V:
 		c->isa_level |= MIPS_CPU_ISA_V;
 		set_elf_base_platform("mips5");
-		/* fall through */
+		fallthrough;
 	case MIPS_CPU_ISA_IV:
 		c->isa_level |= MIPS_CPU_ISA_IV;
 		set_elf_base_platform("mips4");
-		/* fall through */
+		fallthrough;
 	case MIPS_CPU_ISA_III:
 		c->isa_level |= MIPS_CPU_ISA_II | MIPS_CPU_ISA_III;
 		set_elf_base_platform("mips3");
@@ -557,20 +568,24 @@ static void set_isa(struct cpuinfo_mips *c, unsigned int isa)
 	case MIPS_CPU_ISA_M64R6:
 		c->isa_level |= MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6;
 		set_elf_base_platform("mips64r6");
-		/* fall through */
+		fallthrough;
 	case MIPS_CPU_ISA_M32R6:
 		c->isa_level |= MIPS_CPU_ISA_M32R6;
 		set_elf_base_platform("mips32r6");
 		/* Break here so we don't add incompatible ISAs */
 		break;
+	case MIPS_CPU_ISA_M32R5:
+		c->isa_level |= MIPS_CPU_ISA_M32R5;
+		set_elf_base_platform("mips32r5");
+		fallthrough;
 	case MIPS_CPU_ISA_M32R2:
 		c->isa_level |= MIPS_CPU_ISA_M32R2;
 		set_elf_base_platform("mips32r2");
-		/* fall through */
+		fallthrough;
 	case MIPS_CPU_ISA_M32R1:
 		c->isa_level |= MIPS_CPU_ISA_M32R1;
 		set_elf_base_platform("mips32");
-		/* fall through */
+		fallthrough;
 	case MIPS_CPU_ISA_II:
 		c->isa_level |= MIPS_CPU_ISA_II;
 		set_elf_base_platform("mips2");
@@ -620,14 +635,14 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags)
 		config = read_c0_config6();
 
 		if (flags & FTLB_EN)
-			config |= MIPS_CONF6_FTLBEN;
+			config |= MIPS_CONF6_MTI_FTLBEN;
 		else
-			config &= ~MIPS_CONF6_FTLBEN;
+			config &= ~MIPS_CONF6_MTI_FTLBEN;
 
 		if (flags & FTLB_SET_PROB) {
-			config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT);
+			config &= ~(3 << MIPS_CONF6_MTI_FTLBP_SHIFT);
 			config |= calculate_ftlb_probability(c)
-				  << MIPS_CONF6_FTLBP_SHIFT;
+				  << MIPS_CONF6_MTI_FTLBP_SHIFT;
 		}
 
 		write_c0_config6(config);
@@ -647,10 +662,10 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags)
 		config = read_c0_config6();
 		if (flags & FTLB_EN)
 			/* Enable FTLB */
-			write_c0_config6(config & ~MIPS_CONF6_FTLBDIS);
+			write_c0_config6(config & ~MIPS_CONF6_LOONGSON_FTLBDIS);
 		else
 			/* Disable FTLB */
-			write_c0_config6(config | MIPS_CONF6_FTLBDIS);
+			write_c0_config6(config | MIPS_CONF6_LOONGSON_FTLBDIS);
 		break;
 	default:
 		return 1;
@@ -659,6 +674,52 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags)
 	return 0;
 }
 
+static int mm_config(struct cpuinfo_mips *c)
+{
+	unsigned int config0, update, mm;
+
+	config0 = read_c0_config();
+	mm = config0 & MIPS_CONF_MM;
+
+	/*
+	 * It's implementation dependent what type of write-merge is supported
+	 * and whether it can be enabled/disabled. If it is settable lets make
+	 * the merging allowed by default. Some platforms might have
+	 * write-through caching unsupported. In this case just ignore the
+	 * CP0.Config.MM bit field value.
+	 */
+	switch (c->cputype) {
+	case CPU_24K:
+	case CPU_34K:
+	case CPU_74K:
+	case CPU_P5600:
+	case CPU_P6600:
+		c->options |= MIPS_CPU_MM_FULL;
+		update = MIPS_CONF_MM_FULL;
+		break;
+	case CPU_1004K:
+	case CPU_1074K:
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
+		mm = 0;
+		fallthrough;
+	default:
+		update = 0;
+		break;
+	}
+
+	if (update) {
+		config0 = (config0 & ~MIPS_CONF_MM) | update;
+		write_c0_config(config0);
+	} else if (mm == MIPS_CONF_MM_SYSAD) {
+		c->options |= MIPS_CPU_MM_SYSAD;
+	} else if (mm == MIPS_CONF_MM_FULL) {
+		c->options |= MIPS_CPU_MM_FULL;
+	}
+
+	return 0;
+}
+
 static inline unsigned int decode_config0(struct cpuinfo_mips *c)
 {
 	unsigned int config0;
@@ -850,7 +911,7 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c)
 				  MIPS_CONF4_VTLBSIZEEXT_SHIFT) * 0x40;
 			c->tlbsize = c->tlbsizevtlb;
 			ftlb_page = MIPS_CONF4_VFTLBPAGESIZE;
-			/* fall through */
+			fallthrough;
 		case MIPS_CONF4_MMUEXTDEF_FTLBSIZEEXT:
 			if (mips_ftlb_disabled)
 				break;
@@ -1750,13 +1811,19 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 
 	spram_config();
 
+	mm_config(c);
+
 	switch (__get_cpu_type(c->cputype)) {
+	case CPU_M5150:
+	case CPU_P5600:
+		set_isa(c, MIPS_CPU_ISA_M32R5);
+		break;
 	case CPU_I6500:
 		c->options |= MIPS_CPU_SHARED_FTLB_ENTRIES;
-		/* fall-through */
+		fallthrough;
 	case CPU_I6400:
 		c->options |= MIPS_CPU_SHARED_FTLB_RAM;
-		/* fall-through */
+		fallthrough;
 	default:
 		break;
 	}
@@ -1932,10 +1999,53 @@ platform:
 	}
 }
 
+#ifdef CONFIG_CPU_LOONGSON64
+#include <loongson_regs.h>
+
+static inline void decode_cpucfg(struct cpuinfo_mips *c)
+{
+	u32 cfg1 = read_cpucfg(LOONGSON_CFG1);
+	u32 cfg2 = read_cpucfg(LOONGSON_CFG2);
+	u32 cfg3 = read_cpucfg(LOONGSON_CFG3);
+
+	if (cfg1 & LOONGSON_CFG1_MMI)
+		c->ases |= MIPS_ASE_LOONGSON_MMI;
+
+	if (cfg2 & LOONGSON_CFG2_LEXT1)
+		c->ases |= MIPS_ASE_LOONGSON_EXT;
+
+	if (cfg2 & LOONGSON_CFG2_LEXT2)
+		c->ases |= MIPS_ASE_LOONGSON_EXT2;
+
+	if (cfg2 & LOONGSON_CFG2_LSPW)
+		c->options |= MIPS_CPU_LDPTE;
+
+	if (cfg3 & LOONGSON_CFG3_LCAMP)
+		c->ases |= MIPS_ASE_LOONGSON_CAM;
+}
+
 static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 {
+	decode_configs(c);
+
 	switch (c->processor_id & PRID_IMP_MASK) {
-	case PRID_IMP_LOONGSON_64C:  /* Loongson-2/3 */
+	case PRID_IMP_LOONGSON_64R: /* Loongson-64 Reduced */
+		switch (c->processor_id & PRID_REV_MASK) {
+		case PRID_REV_LOONGSON2K_R1_0:
+		case PRID_REV_LOONGSON2K_R1_1:
+		case PRID_REV_LOONGSON2K_R1_2:
+		case PRID_REV_LOONGSON2K_R1_3:
+			c->cputype = CPU_LOONGSON64;
+			__cpu_name[cpu] = "Loongson-2K";
+			set_elf_platform(cpu, "gs264e");
+			set_isa(c, MIPS_CPU_ISA_M64R2);
+			break;
+		}
+		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
+		c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_EXT |
+				MIPS_ASE_LOONGSON_EXT2);
+		break;
+	case PRID_IMP_LOONGSON_64C:  /* Loongson-3 Classic */
 		switch (c->processor_id & PRID_REV_MASK) {
 		case PRID_REV_LOONGSON3A_R2_0:
 		case PRID_REV_LOONGSON3A_R2_1:
@@ -1952,8 +2062,14 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 			set_isa(c, MIPS_CPU_ISA_M64R2);
 			break;
 		}
-
-		decode_configs(c);
+		/*
+		 * Loongson-3 Classic did not implement MIPS standard TLBINV
+		 * but implemented TLBINVF and EHINV. As currently we're only
+		 * using these two features, enable MIPS_CPU_TLBINV as well.
+		 *
+		 * Also some early Loongson-3A2000 had wrong TLB type in Config
+		 * register, we correct it here.
+		 */
 		c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
 		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
 		c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
@@ -1964,17 +2080,17 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 		__cpu_name[cpu] = "ICT Loongson-3";
 		set_elf_platform(cpu, "loongson3a");
 		set_isa(c, MIPS_CPU_ISA_M64R2);
-		decode_configs(c);
-		c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
+		decode_cpucfg(c);
 		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
-		c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
-			MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
 		break;
 	default:
 		panic("Unknown Loongson Processor ID!");
 		break;
 	}
 }
+#else
+static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { }
+#endif
 
 static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
 {
@@ -2028,7 +2144,7 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
 		default:
 			break;
 		}
-	/* fall-through */
+		fallthrough;
 	case PRID_IMP_XBURST_REV2:
 		c->cputype = CPU_XBURST;
 		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
@@ -2286,6 +2402,13 @@ void cpu_probe(void)
 
 	cpu_probe_vmbits(c);
 
+	/* Synthesize CPUCFG data if running on Loongson processors;
+	 * no-op otherwise.
+	 *
+	 * This looks at previously probed features, so keep this at bottom.
+	 */
+	loongson3_cpucfg_synthesize_data(c);
+
 #ifdef CONFIG_64BIT
 	if (cpu == 0)
 		__ua_limit = ~((1ull << cpu_vmbits) - 1);
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index 437dda64fd7a..edc4afc080fa 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -6,6 +6,7 @@
  * Copyright (C) 2007 by Ralf Baechle
  */
 #include <linux/clocksource.h>
+#include <linux/cpufreq.h>
 #include <linux/init.h>
 #include <linux/sched_clock.h>
 
@@ -65,6 +66,45 @@ static bool rdhwr_count_usable(void)
 	return false;
 }
 
+#ifdef CONFIG_CPU_FREQ
+
+static bool __read_mostly r4k_clock_unstable;
+
+static void r4k_clocksource_unstable(char *reason)
+{
+	if (r4k_clock_unstable)
+		return;
+
+	r4k_clock_unstable = true;
+
+	pr_info("R4K timer is unstable due to %s\n", reason);
+
+	clocksource_mark_unstable(&clocksource_mips);
+}
+
+static int r4k_cpufreq_callback(struct notifier_block *nb,
+				unsigned long val, void *data)
+{
+	if (val == CPUFREQ_POSTCHANGE)
+		r4k_clocksource_unstable("CPU frequency change");
+
+	return 0;
+}
+
+static struct notifier_block r4k_cpufreq_notifier = {
+	.notifier_call  = r4k_cpufreq_callback,
+};
+
+static int __init r4k_register_cpufreq_notifier(void)
+{
+	return cpufreq_register_notifier(&r4k_cpufreq_notifier,
+					 CPUFREQ_TRANSITION_NOTIFIER);
+
+}
+core_initcall(r4k_register_cpufreq_notifier);
+
+#endif /* !CONFIG_CPU_FREQ */
+
 int __init init_r4k_clocksource(void)
 {
 	if (!cpu_has_counter || !mips_hpt_frequency)
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 4849a48afc0f..4b896f5023ff 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -169,8 +169,8 @@ syscall_exit_work:
 	jal	syscall_trace_leave
 	b	resume_userspace
 
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) || \
-    defined(CONFIG_MIPS_MT)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
+    defined(CONFIG_CPU_MIPSR6) || defined(CONFIG_MIPS_MT)
 
 /*
  * MIPS32R2 Instruction Hazard Barrier - must be called
@@ -183,4 +183,4 @@ LEAF(mips_ihb)
 	nop
 	END(mips_ihb)
 
-#endif /* CONFIG_CPU_MIPSR2 or CONFIG_CPU_MIPSR6 or CONFIG_MIPS_MT */
+#endif /* CONFIG_CPU_MIPSR2 - CONFIG_CPU_MIPSR6 or CONFIG_MIPS_MT */
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 0a43c9125267..a1b966f3578e 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -476,20 +476,20 @@ NESTED(nmi_handler, PT_SIZE, sp)
 	.endm
 
 	.macro	__build_clear_fpe
+	CLI
+	TRACE_IRQS_OFF
 	.set	push
 	/* gas fails to assemble cfc1 for some archs (octeon).*/ \
 	.set	mips1
 	SET_HARDFLOAT
 	cfc1	a1, fcr31
 	.set	pop
-	CLI
-	TRACE_IRQS_OFF
 	.endm
 
 	.macro	__build_clear_msa_fpe
-	_cfcmsa	a1, MSA_CSR
 	CLI
 	TRACE_IRQS_OFF
+	_cfcmsa	a1, MSA_CSR
 	.endm
 
 	.macro	__build_clear_ade
@@ -501,17 +501,17 @@ NESTED(nmi_handler, PT_SIZE, sp)
 	.macro	__BUILD_silent exception
 	.endm
 
-	/* Gas tries to parse the PRINT argument as a string containing
+	/* Gas tries to parse the ASM_PRINT argument as a string containing
 	   string escapes and emits bogus warnings if it believes to
 	   recognize an unknown escape code.  So make the arguments
 	   start with an n and gas will believe \n is ok ...  */
 	.macro	__BUILD_verbose nexception
 	LONG_L	a1, PT_EPC(sp)
 #ifdef CONFIG_32BIT
-	PRINT("Got \nexception at %08lx\012")
+	ASM_PRINT("Got \nexception at %08lx\012")
 #endif
 #ifdef CONFIG_64BIT
-	PRINT("Got \nexception at %016lx\012")
+	ASM_PRINT("Got \nexception at %016lx\012")
 #endif
 	.endm
 
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 351d40fe0859..3b02ffe46304 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -132,6 +132,9 @@ dtb_found:
 #endif
 
 	MTC0		zero, CP0_CONTEXT	# clear context register
+#ifdef CONFIG_64BIT
+	MTC0		zero, CP0_XCONTEXT
+#endif
 	PTR_LA		$28, init_thread_union
 	/* Set the SP after an empty pt_regs.  */
 	PTR_LI		sp, _THREAD_SIZE - 32 - PT_SIZE
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 37f8e78e2869..5bc3b04693c7 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -180,7 +180,8 @@ void __init check_wait(void)
 		break;
 	case CPU_LOONGSON64:
 		if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >=
-				(PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0))
+				(PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) ||
+				(c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R)
 			cpu_wait = r4k_wait;
 		break;
 
@@ -201,7 +202,7 @@ void __init check_wait(void)
 		 */
 		if (IS_ENABLED(CONFIG_MIPS_EJTAG_FDC_TTY))
 			break;
-		/* fall through */
+		fallthrough;
 	case CPU_M14KC:
 	case CPU_M14KEC:
 	case CPU_24K:
diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c
index cdb93ed91cde..f60af512c877 100644
--- a/arch/mips/kernel/mips-cm.c
+++ b/arch/mips/kernel/mips-cm.c
@@ -114,14 +114,56 @@ static char *cm2_core[8] = {
 	"Exclusive/OK", "Exclusive/Data"
 };
 
+static char *cm2_l2_type[4] = {
+	[0x0] = "None",
+	[0x1] = "Tag RAM single/double ECC error",
+	[0x2] = "Data RAM single/double ECC error",
+	[0x3] = "WS RAM uncorrectable dirty parity"
+};
+
+static char *cm2_l2_instr[32] = {
+	[0x00] = "L2_NOP",
+	[0x01] = "L2_ERR_CORR",
+	[0x02] = "L2_TAG_INV",
+	[0x03] = "L2_WS_CLEAN",
+	[0x04] = "L2_RD_MDYFY_WR",
+	[0x05] = "L2_WS_MRU",
+	[0x06] = "L2_EVICT_LN2",
+	[0x07] = "0x07",
+	[0x08] = "L2_EVICT",
+	[0x09] = "L2_REFL",
+	[0x0a] = "L2_RD",
+	[0x0b] = "L2_WR",
+	[0x0c] = "L2_EVICT_MRU",
+	[0x0d] = "L2_SYNC",
+	[0x0e] = "L2_REFL_ERR",
+	[0x0f] = "0x0f",
+	[0x10] = "L2_INDX_WB_INV",
+	[0x11] = "L2_INDX_LD_TAG",
+	[0x12] = "L2_INDX_ST_TAG",
+	[0x13] = "L2_INDX_ST_DATA",
+	[0x14] = "L2_INDX_ST_ECC",
+	[0x15] = "0x15",
+	[0x16] = "0x16",
+	[0x17] = "0x17",
+	[0x18] = "L2_FTCH_AND_LCK",
+	[0x19] = "L2_HIT_INV",
+	[0x1a] = "L2_HIT_WB_INV",
+	[0x1b] = "L2_HIT_WB",
+	[0x1c] = "0x1c",
+	[0x1d] = "0x1d",
+	[0x1e] = "0x1e",
+	[0x1f] = "0x1f"
+};
+
 static char *cm2_causes[32] = {
 	"None", "GC_WR_ERR", "GC_RD_ERR", "COH_WR_ERR",
 	"COH_RD_ERR", "MMIO_WR_ERR", "MMIO_RD_ERR", "0x07",
 	"0x08", "0x09", "0x0a", "0x0b",
 	"0x0c", "0x0d", "0x0e", "0x0f",
-	"0x10", "0x11", "0x12", "0x13",
-	"0x14", "0x15", "0x16", "INTVN_WR_ERR",
-	"INTVN_RD_ERR", "0x19", "0x1a", "0x1b",
+	"0x10", "INTVN_WR_ERR", "INTVN_RD_ERR", "0x13",
+	"0x14", "0x15", "0x16", "0x17",
+	"L2_RD_UNCORR", "L2_WR_UNCORR", "L2_CORR", "0x1b",
 	"0x1c", "0x1d", "0x1e", "0x1f"
 };
 
@@ -360,7 +402,7 @@ void mips_cm_error_report(void)
 				 "CCA=%lu TR=%s MCmd=%s STag=%lu "
 				 "SPort=%lu\n", cca_bits, cm2_tr[tr_bits],
 				 cm2_cmd[cmd_bits], stag_bits, sport_bits);
-		} else {
+		} else if (cause < 24) {
 			/* glob state & sresp together */
 			unsigned long c3_bits = (cm_error >> 18) & 7;
 			unsigned long c2_bits = (cm_error >> 15) & 7;
@@ -377,6 +419,22 @@ void mips_cm_error_report(void)
 				 cm2_core[c1_bits], cm2_core[c0_bits],
 				 sc_bit ? "True" : "False",
 				 cm2_cmd[cmd_bits], sport_bits);
+		} else {
+			unsigned long muc_bit = (cm_error >> 23) & 1;
+			unsigned long ins_bits = (cm_error >> 18) & 0x1f;
+			unsigned long arr_bits = (cm_error >> 16) & 3;
+			unsigned long dw_bits = (cm_error >> 12) & 15;
+			unsigned long way_bits = (cm_error >> 9) & 7;
+			unsigned long mway_bit = (cm_error >> 8) & 1;
+			unsigned long syn_bits = (cm_error >> 0) & 0xFF;
+
+			snprintf(buf, sizeof(buf),
+				 "Type=%s%s Instr=%s DW=%lu Way=%lu "
+				 "MWay=%s Syndrome=0x%02lx",
+				 muc_bit ? "Multi-UC " : "",
+				 cm2_l2_type[arr_bits],
+				 cm2_l2_instr[ins_bits], dw_bits, way_bits,
+				 mway_bit ? "True" : "False", syn_bits);
 		}
 		pr_err("CM_ERROR=%08llx %s <%s>\n", cm_error,
 		       cm2_causes[cause], buf);
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index b4d210bfcdae..a39ec755e4c2 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -1109,7 +1109,7 @@ repeat:
 			err = SIGILL;
 			break;
 		}
-		/* fall through */
+		fallthrough;
 	case beql_op:
 	case bnel_op:
 		if (delay_slot(regs)) {
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 128fc9999c56..efce5defcc5c 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -90,6 +90,7 @@ struct mips_pmu {
 	unsigned int	num_counters;
 };
 
+static int counter_bits;
 static struct mips_pmu mipspmu;
 
 #define M_PERFCTL_EVENT(event)		(((event) << MIPS_PERFCTRL_EVENT_S) & \
@@ -118,6 +119,7 @@ static struct mips_pmu mipspmu;
 #define M_PERFCTL_CONFIG_MASK		0x1f
 #endif
 
+#define CNTR_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
 
 #ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS
 static DEFINE_RWLOCK(pmuint_rwlock);
@@ -154,6 +156,31 @@ static void pause_local_counters(void);
 static irqreturn_t mipsxx_pmu_handle_irq(int, void *);
 static int mipsxx_pmu_handle_shared_irq(void);
 
+/* 0: Not Loongson-3
+ * 1: Loongson-3A1000/3B1000/3B1500
+ * 2: Loongson-3A2000/3A3000
+ * 3: Loongson-3A4000+
+ */
+
+#define LOONGSON_PMU_TYPE0 0
+#define LOONGSON_PMU_TYPE1 1
+#define LOONGSON_PMU_TYPE2 2
+#define LOONGSON_PMU_TYPE3 3
+
+static inline int get_loongson3_pmu_type(void)
+{
+	if (boot_cpu_type() != CPU_LOONGSON64)
+		return LOONGSON_PMU_TYPE0;
+	if ((boot_cpu_data.processor_id & PRID_COMP_MASK) == PRID_COMP_LEGACY)
+		return LOONGSON_PMU_TYPE1;
+	if ((boot_cpu_data.processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64C)
+		return LOONGSON_PMU_TYPE2;
+	if ((boot_cpu_data.processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G)
+		return LOONGSON_PMU_TYPE3;
+
+	return LOONGSON_PMU_TYPE0;
+}
+
 static unsigned int mipsxx_pmu_swizzle_perf_idx(unsigned int idx)
 {
 	if (vpe_id() == 1)
@@ -186,17 +213,18 @@ static u64 mipsxx_pmu_read_counter(unsigned int idx)
 
 static u64 mipsxx_pmu_read_counter_64(unsigned int idx)
 {
+	u64 mask = CNTR_BIT_MASK(counter_bits);
 	idx = mipsxx_pmu_swizzle_perf_idx(idx);
 
 	switch (idx) {
 	case 0:
-		return read_c0_perfcntr0_64();
+		return read_c0_perfcntr0_64() & mask;
 	case 1:
-		return read_c0_perfcntr1_64();
+		return read_c0_perfcntr1_64() & mask;
 	case 2:
-		return read_c0_perfcntr2_64();
+		return read_c0_perfcntr2_64() & mask;
 	case 3:
-		return read_c0_perfcntr3_64();
+		return read_c0_perfcntr3_64() & mask;
 	default:
 		WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
 		return 0;
@@ -225,6 +253,7 @@ static void mipsxx_pmu_write_counter(unsigned int idx, u64 val)
 
 static void mipsxx_pmu_write_counter_64(unsigned int idx, u64 val)
 {
+	val &= CNTR_BIT_MASK(counter_bits);
 	idx = mipsxx_pmu_swizzle_perf_idx(idx);
 
 	switch (idx) {
@@ -286,12 +315,16 @@ static int mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc,
 				    struct hw_perf_event *hwc)
 {
 	int i;
+	unsigned long cntr_mask;
 
 	/*
 	 * We only need to care the counter mask. The range has been
 	 * checked definitely.
 	 */
-	unsigned long cntr_mask = (hwc->event_base >> 8) & 0xffff;
+	if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2)
+		cntr_mask = (hwc->event_base >> 10) & 0xffff;
+	else
+		cntr_mask = (hwc->event_base >> 8) & 0xffff;
 
 	for (i = mipspmu.num_counters - 1; i >= 0; i--) {
 		/*
@@ -320,10 +353,16 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx)
 
 	WARN_ON(idx < 0 || idx >= mipspmu.num_counters);
 
-	cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) |
-		(evt->config_base & M_PERFCTL_CONFIG_MASK) |
-		/* Make sure interrupt enabled. */
-		MIPS_PERFCTRL_IE;
+	if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2)
+		cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0x3ff) |
+			(evt->config_base & M_PERFCTL_CONFIG_MASK) |
+			/* Make sure interrupt enabled. */
+			MIPS_PERFCTRL_IE;
+	else
+		cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) |
+			(evt->config_base & M_PERFCTL_CONFIG_MASK) |
+			/* Make sure interrupt enabled. */
+			MIPS_PERFCTRL_IE;
 
 	if (IS_ENABLED(CONFIG_CPU_BMIPS5000)) {
 		/* enable the counter for the calling thread */
@@ -396,6 +435,10 @@ static int mipspmu_event_set_period(struct perf_event *event,
 
 	local64_set(&hwc->prev_count, mipspmu.overflow - left);
 
+	if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2)
+		mipsxx_pmu_write_control(idx,
+				M_PERFCTL_EVENT(hwc->event_base & 0x3ff));
+
 	mipspmu.write_counter(idx, mipspmu.overflow - left);
 
 	perf_event_update_userpage(event);
@@ -667,8 +710,14 @@ static unsigned int mipspmu_perf_event_encode(const struct mips_perf_event *pev)
 			(pev->event_id & 0xff);
 	else
 #endif /* CONFIG_MIPS_MT_SMP */
-		return ((pev->cntr_mask & 0xffff00) |
-			(pev->event_id & 0xff));
+	{
+		if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2)
+			return (pev->cntr_mask & 0xfffc00) |
+				(pev->event_id & 0x3ff);
+		else
+			return (pev->cntr_mask & 0xffff00) |
+				(pev->event_id & 0xff);
+	}
 }
 
 static const struct mips_perf_event *mipspmu_map_general_event(int idx)
@@ -783,26 +832,104 @@ static int n_counters(void)
 	return counters;
 }
 
+static void loongson3_reset_counters(void *arg)
+{
+	int counters = (int)(long)arg;
+
+	switch (counters) {
+	case 4:
+		mipsxx_pmu_write_control(3, 0);
+		mipspmu.write_counter(3, 0);
+		mipsxx_pmu_write_control(3, 127<<5);
+		mipspmu.write_counter(3, 0);
+		mipsxx_pmu_write_control(3, 191<<5);
+		mipspmu.write_counter(3, 0);
+		mipsxx_pmu_write_control(3, 255<<5);
+		mipspmu.write_counter(3, 0);
+		mipsxx_pmu_write_control(3, 319<<5);
+		mipspmu.write_counter(3, 0);
+		mipsxx_pmu_write_control(3, 383<<5);
+		mipspmu.write_counter(3, 0);
+		mipsxx_pmu_write_control(3, 575<<5);
+		mipspmu.write_counter(3, 0);
+		fallthrough;
+	case 3:
+		mipsxx_pmu_write_control(2, 0);
+		mipspmu.write_counter(2, 0);
+		mipsxx_pmu_write_control(2, 127<<5);
+		mipspmu.write_counter(2, 0);
+		mipsxx_pmu_write_control(2, 191<<5);
+		mipspmu.write_counter(2, 0);
+		mipsxx_pmu_write_control(2, 255<<5);
+		mipspmu.write_counter(2, 0);
+		mipsxx_pmu_write_control(2, 319<<5);
+		mipspmu.write_counter(2, 0);
+		mipsxx_pmu_write_control(2, 383<<5);
+		mipspmu.write_counter(2, 0);
+		mipsxx_pmu_write_control(2, 575<<5);
+		mipspmu.write_counter(2, 0);
+		fallthrough;
+	case 2:
+		mipsxx_pmu_write_control(1, 0);
+		mipspmu.write_counter(1, 0);
+		mipsxx_pmu_write_control(1, 127<<5);
+		mipspmu.write_counter(1, 0);
+		mipsxx_pmu_write_control(1, 191<<5);
+		mipspmu.write_counter(1, 0);
+		mipsxx_pmu_write_control(1, 255<<5);
+		mipspmu.write_counter(1, 0);
+		mipsxx_pmu_write_control(1, 319<<5);
+		mipspmu.write_counter(1, 0);
+		mipsxx_pmu_write_control(1, 383<<5);
+		mipspmu.write_counter(1, 0);
+		mipsxx_pmu_write_control(1, 575<<5);
+		mipspmu.write_counter(1, 0);
+		fallthrough;
+	case 1:
+		mipsxx_pmu_write_control(0, 0);
+		mipspmu.write_counter(0, 0);
+		mipsxx_pmu_write_control(0, 127<<5);
+		mipspmu.write_counter(0, 0);
+		mipsxx_pmu_write_control(0, 191<<5);
+		mipspmu.write_counter(0, 0);
+		mipsxx_pmu_write_control(0, 255<<5);
+		mipspmu.write_counter(0, 0);
+		mipsxx_pmu_write_control(0, 319<<5);
+		mipspmu.write_counter(0, 0);
+		mipsxx_pmu_write_control(0, 383<<5);
+		mipspmu.write_counter(0, 0);
+		mipsxx_pmu_write_control(0, 575<<5);
+		mipspmu.write_counter(0, 0);
+		break;
+	}
+}
+
 static void reset_counters(void *arg)
 {
 	int counters = (int)(long)arg;
+
+	if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) {
+		loongson3_reset_counters(arg);
+		return;
+	}
+
 	switch (counters) {
 	case 4:
 		mipsxx_pmu_write_control(3, 0);
 		mipspmu.write_counter(3, 0);
-		/* fall through */
+		fallthrough;
 	case 3:
 		mipsxx_pmu_write_control(2, 0);
 		mipspmu.write_counter(2, 0);
-		/* fall through */
+		fallthrough;
 	case 2:
 		mipsxx_pmu_write_control(1, 0);
 		mipspmu.write_counter(1, 0);
-		/* fall through */
+		fallthrough;
 	case 1:
 		mipsxx_pmu_write_control(0, 0);
 		mipspmu.write_counter(0, 0);
-		/* fall through */
+		break;
 	}
 }
 
@@ -834,13 +961,30 @@ static const struct mips_perf_event i6x00_event_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_BRANCH_MISSES]       = { 0x16, CNTR_EVEN | CNTR_ODD },
 };
 
-static const struct mips_perf_event loongson3_event_map[PERF_COUNT_HW_MAX] = {
+static const struct mips_perf_event loongson3_event_map1[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN },
 	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, CNTR_ODD },
 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x01, CNTR_EVEN },
 	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x01, CNTR_ODD },
 };
 
+static const struct mips_perf_event loongson3_event_map2[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x80, CNTR_ALL },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x81, CNTR_ALL },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x18, CNTR_ALL },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x94, CNTR_ALL },
+	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x9c, CNTR_ALL },
+};
+
+static const struct mips_perf_event loongson3_event_map3[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_ALL },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x01, CNTR_ALL },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x1c, CNTR_ALL },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x1d, CNTR_ALL },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02, CNTR_ALL },
+	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x08, CNTR_ALL },
+};
+
 static const struct mips_perf_event octeon_event_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_CPU_CYCLES] = { 0x01, CNTR_ALL },
 	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x03, CNTR_ALL },
@@ -1064,7 +1208,7 @@ static const struct mips_perf_event i6x00_cache_map
 },
 };
 
-static const struct mips_perf_event loongson3_cache_map
+static const struct mips_perf_event loongson3_cache_map1
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -1109,12 +1253,127 @@ static const struct mips_perf_event loongson3_cache_map
 [C(BPU)] = {
 	/* Using the same code for *HW_BRANCH* */
 	[C(OP_READ)] = {
-		[C(RESULT_ACCESS)]      = { 0x02, CNTR_EVEN },
-		[C(RESULT_MISS)]        = { 0x02, CNTR_ODD },
+		[C(RESULT_ACCESS)]      = { 0x01, CNTR_EVEN },
+		[C(RESULT_MISS)]        = { 0x01, CNTR_ODD },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)]      = { 0x01, CNTR_EVEN },
+		[C(RESULT_MISS)]        = { 0x01, CNTR_ODD },
+	},
+},
+};
+
+static const struct mips_perf_event loongson3_cache_map2
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+	/*
+	 * Like some other architectures (e.g. ARM), the performance
+	 * counters don't differentiate between read and write
+	 * accesses/misses, so this isn't strictly correct, but it's the
+	 * best we can do. Writes and reads get combined.
+	 */
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x156, CNTR_ALL },
 	},
 	[C(OP_WRITE)] = {
-		[C(RESULT_ACCESS)]      = { 0x02, CNTR_EVEN },
-		[C(RESULT_MISS)]        = { 0x02, CNTR_ODD },
+		[C(RESULT_ACCESS)]	= { 0x155, CNTR_ALL },
+		[C(RESULT_MISS)]        = { 0x153, CNTR_ALL },
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_MISS)]	= { 0x18, CNTR_ALL },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_MISS)]        = { 0x18, CNTR_ALL },
+	},
+},
+[C(LL)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x1b6, CNTR_ALL },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)]	= { 0x1b7, CNTR_ALL },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)]	= { 0x1bf, CNTR_ALL },
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_MISS)]        = { 0x92, CNTR_ALL },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_MISS)]        = { 0x92, CNTR_ALL },
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_MISS)]	= { 0x1a, CNTR_ALL },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_MISS)]	= { 0x1a, CNTR_ALL },
+	},
+},
+[C(BPU)] = {
+	/* Using the same code for *HW_BRANCH* */
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]      = { 0x94, CNTR_ALL },
+		[C(RESULT_MISS)]        = { 0x9c, CNTR_ALL },
+	},
+},
+};
+
+static const struct mips_perf_event loongson3_cache_map3
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+	/*
+	 * Like some other architectures (e.g. ARM), the performance
+	 * counters don't differentiate between read and write
+	 * accesses/misses, so this isn't strictly correct, but it's the
+	 * best we can do. Writes and reads get combined.
+	 */
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]      = { 0x1e, CNTR_ALL },
+		[C(RESULT_MISS)]        = { 0x1f, CNTR_ALL },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)]	= { 0xaa, CNTR_ALL },
+		[C(RESULT_MISS)]	= { 0xa9, CNTR_ALL },
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x1c, CNTR_ALL },
+		[C(RESULT_MISS)]	= { 0x1d, CNTR_ALL },
+	},
+},
+[C(LL)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x2e, CNTR_ALL },
+		[C(RESULT_MISS)]	= { 0x2f, CNTR_ALL },
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]      = { 0x14, CNTR_ALL },
+		[C(RESULT_MISS)]	= { 0x1b, CNTR_ALL },
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_MISS)]	= { 0x1a, CNTR_ALL },
+	},
+},
+[C(BPU)] = {
+	/* Using the same code for *HW_BRANCH* */
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]      = { 0x02, CNTR_ALL },
+		[C(RESULT_MISS)]        = { 0x08, CNTR_ALL },
 	},
 },
 };
@@ -1178,7 +1437,6 @@ static const struct mips_perf_event bmips5000_cache_map
 },
 };
 
-
 static const struct mips_perf_event octeon_cache_map
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1512,6 +1770,7 @@ static irqreturn_t mipsxx_pmu_handle_irq(int irq, void *dev)
 static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 {
 	/* currently most cores have 7-bit event numbers */
+	int pmu_type;
 	unsigned int raw_id = config & 0xff;
 	unsigned int base_id = raw_id & 0x7f;
 
@@ -1624,8 +1883,33 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 				raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
 		break;
 	case CPU_LOONGSON64:
-		raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
-	break;
+		pmu_type = get_loongson3_pmu_type();
+
+		switch (pmu_type) {
+		case LOONGSON_PMU_TYPE1:
+			raw_event.cntr_mask =
+				raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
+			break;
+		case LOONGSON_PMU_TYPE2:
+			base_id = config & 0x3ff;
+			raw_event.cntr_mask = CNTR_ALL;
+
+			if ((base_id >= 1 && base_id < 28) ||
+				(base_id >= 64 && base_id < 90) ||
+				(base_id >= 128 && base_id < 164) ||
+				(base_id >= 192 && base_id < 200) ||
+				(base_id >= 256 && base_id < 274) ||
+				(base_id >= 320 && base_id < 358) ||
+				(base_id >= 384 && base_id < 574))
+				break;
+
+			return ERR_PTR(-EOPNOTSUPP);
+		case LOONGSON_PMU_TYPE3:
+			base_id = raw_id;
+			raw_event.cntr_mask = CNTR_ALL;
+			break;
+		}
+		break;
 	}
 
 	raw_event.event_id = base_id;
@@ -1683,8 +1967,7 @@ static const struct mips_perf_event *xlp_pmu_map_raw_event(u64 config)
 static int __init
 init_hw_perf_events(void)
 {
-	int counters, irq;
-	int counter_bits;
+	int counters, irq, pmu_type;
 
 	pr_info("Performance counters: ");
 
@@ -1771,8 +2054,25 @@ init_hw_perf_events(void)
 		break;
 	case CPU_LOONGSON64:
 		mipspmu.name = "mips/loongson3";
-		mipspmu.general_event_map = &loongson3_event_map;
-		mipspmu.cache_event_map = &loongson3_cache_map;
+		pmu_type = get_loongson3_pmu_type();
+
+		switch (pmu_type) {
+		case LOONGSON_PMU_TYPE1:
+			counters = 2;
+			mipspmu.general_event_map = &loongson3_event_map1;
+			mipspmu.cache_event_map = &loongson3_cache_map1;
+			break;
+		case LOONGSON_PMU_TYPE2:
+			counters = 4;
+			mipspmu.general_event_map = &loongson3_event_map2;
+			mipspmu.cache_event_map = &loongson3_cache_map2;
+			break;
+		case LOONGSON_PMU_TYPE3:
+			counters = 4;
+			mipspmu.general_event_map = &loongson3_event_map3;
+			mipspmu.cache_event_map = &loongson3_cache_map3;
+			break;
+		}
 		break;
 	case CPU_CAVIUM_OCTEON:
 	case CPU_CAVIUM_OCTEON_PLUS:
@@ -1803,19 +2103,26 @@ init_hw_perf_events(void)
 	mipspmu.irq = irq;
 
 	if (read_c0_perfctrl0() & MIPS_PERFCTRL_W) {
-		mipspmu.max_period = (1ULL << 63) - 1;
-		mipspmu.valid_count = (1ULL << 63) - 1;
-		mipspmu.overflow = 1ULL << 63;
+		if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) {
+			counter_bits = 48;
+			mipspmu.max_period = (1ULL << 47) - 1;
+			mipspmu.valid_count = (1ULL << 47) - 1;
+			mipspmu.overflow = 1ULL << 47;
+		} else {
+			counter_bits = 64;
+			mipspmu.max_period = (1ULL << 63) - 1;
+			mipspmu.valid_count = (1ULL << 63) - 1;
+			mipspmu.overflow = 1ULL << 63;
+		}
 		mipspmu.read_counter = mipsxx_pmu_read_counter_64;
 		mipspmu.write_counter = mipsxx_pmu_write_counter_64;
-		counter_bits = 64;
 	} else {
+		counter_bits = 32;
 		mipspmu.max_period = (1ULL << 31) - 1;
 		mipspmu.valid_count = (1ULL << 31) - 1;
 		mipspmu.overflow = 1ULL << 31;
 		mipspmu.read_counter = mipsxx_pmu_read_counter;
 		mipspmu.write_counter = mipsxx_pmu_write_counter;
-		counter_bits = 32;
 	}
 
 	on_each_cpu(reset_counters, (void *)(long)counters, 1);
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index f8d36710cd58..4184d641f05e 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -98,12 +98,16 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "%s", " mips32r1");
 	if (cpu_has_mips32r2)
 		seq_printf(m, "%s", " mips32r2");
+	if (cpu_has_mips32r5)
+		seq_printf(m, "%s", " mips32r5");
 	if (cpu_has_mips32r6)
 		seq_printf(m, "%s", " mips32r6");
 	if (cpu_has_mips64r1)
 		seq_printf(m, "%s", " mips64r1");
 	if (cpu_has_mips64r2)
 		seq_printf(m, "%s", " mips64r2");
+	if (cpu_has_mips64r5)
+		seq_printf(m, "%s", " mips64r5");
 	if (cpu_has_mips64r6)
 		seq_printf(m, "%s", " mips64r6");
 	seq_printf(m, "\n");
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 59be5c812aa2..b91e91106475 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -41,7 +41,7 @@
 LEAF(_save_fp)
 EXPORT_SYMBOL(_save_fp)
 #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \
-		defined(CONFIG_CPU_MIPSR6)
+    defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6)
 	mfc0	t0, CP0_STATUS
 #endif
 	fpu_save_double a0 t0 t1		# clobbers t1
@@ -53,7 +53,7 @@ EXPORT_SYMBOL(_save_fp)
  */
 LEAF(_restore_fp)
 #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \
-		defined(CONFIG_CPU_MIPSR6)
+    defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6)
 	mfc0	t0, CP0_STATUS
 #endif
 	fpu_restore_double a0 t0 t1		# clobbers t1
@@ -103,10 +103,10 @@ LEAF(_save_fp_context)
 	.set	pop
 
 #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \
-		defined(CONFIG_CPU_MIPSR6)
+    defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6)
 	.set	push
 	SET_HARDFLOAT
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5)
 	.set	mips32r2
 	.set	fp=64
 	mfc0	t0, CP0_STATUS
@@ -170,11 +170,11 @@ LEAF(_save_fp_context)
 LEAF(_restore_fp_context)
 	EX	lw t1, 0(a1)
 
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2)  || \
-		defined(CONFIG_CPU_MIPSR6)
+#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \
+    defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6)
 	.set	push
 	SET_HARDFLOAT
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5)
 	.set	mips32r2
 	.set	fp=64
 	mfc0	t0, CP0_STATUS
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 41df8221bb8f..50c9a57e0d3a 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -41,7 +41,7 @@ NESTED(handle_sys, PT_SIZE, sp)
 #if 0
  SAVE_ALL
  move a1, v0
- PRINT("Scall %ld\n")
+ ASM_PRINT("Scall %ld\n")
  RESTORE_ALL
 #endif
 
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 10bef8f78e7c..7b537fa2035d 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -575,7 +575,7 @@ static int __init bootcmdline_scan_chosen(unsigned long node, const char *uname,
 
 #endif /* CONFIG_OF_EARLY_FLATTREE */
 
-static void __init bootcmdline_init(char **cmdline_p)
+static void __init bootcmdline_init(void)
 {
 	bool dt_bootargs = false;
 
@@ -654,13 +654,11 @@ static void __init bootcmdline_init(char **cmdline_p)
  */
 static void __init arch_mem_init(char **cmdline_p)
 {
-	extern void plat_mem_setup(void);
-
 	/* call board setup routine */
 	plat_mem_setup();
 	memblock_set_bottom_up(true);
 
-	bootcmdline_init(cmdline_p);
+	bootcmdline_init();
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
@@ -702,7 +700,17 @@ static void __init arch_mem_init(char **cmdline_p)
 		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
 #endif
 	device_tree_init();
+
+	/*
+	 * In order to reduce the possibility of kernel panic when failed to
+	 * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
+	 * low memory as small as possible before plat_swiotlb_setup(), so
+	 * make sparse_init() using top-down allocation.
+	 */
+	memblock_set_bottom_up(false);
 	sparse_init();
+	memblock_set_bottom_up(true);
+
 	plat_swiotlb_setup();
 
 	dma_contiguous_reserve(PFN_PHYS(max_low_pfn));
@@ -831,7 +839,7 @@ arch_initcall(debugfs_mips);
 /* User defined DMA coherency from command line. */
 enum coherent_io_user_state coherentio = IO_COHERENCE_DEFAULT;
 EXPORT_SYMBOL_GPL(coherentio);
-int hw_coherentio = 0;	/* Actual hardware supported DMA coherency setting. */
+int hw_coherentio;	/* Actual hardware supported DMA coherency setting. */
 
 static int __init setcoherentio(char *str)
 {
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index f6efabcb4e92..a0262729cd4c 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -52,7 +52,7 @@ struct sigframe {
 	/* Matches struct ucontext from its uc_mcontext field onwards */
 	struct sigcontext sf_sc;
 	sigset_t sf_mask;
-	unsigned long long sf_extcontext[0];
+	unsigned long long sf_extcontext[];
 };
 
 struct rt_sigframe {
@@ -824,7 +824,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 				regs->regs[2] = EINTR;
 				break;
 			}
-		/* fallthrough */
+			fallthrough;
 		case ERESTARTNOINTR:
 			regs->regs[7] = regs->regs[26];
 			regs->regs[2] = regs->regs[0];
diff --git a/arch/mips/kernel/spram.c b/arch/mips/kernel/spram.c
index 26d355462ace..d5d96214cce5 100644
--- a/arch/mips/kernel/spram.c
+++ b/arch/mips/kernel/spram.c
@@ -209,11 +209,11 @@ void spram_config(void)
 	case CPU_P6600:
 		config0 = read_c0_config();
 		/* FIXME: addresses are Malta specific */
-		if (config0 & (1<<24)) {
+		if (config0 & MIPS_CONF_ISP) {
 			probe_spram("ISPRAM", 0x1c000000,
 				    &ispram_load_tag, &ispram_store_tag);
 		}
-		if (config0 & (1<<23))
+		if (config0 & MIPS_CONF_DSP)
 			probe_spram("DSPRAM", 0x1c100000,
 				    &dspram_load_tag, &dspram_store_tag);
 	}
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 1f9e8ad636cc..f777141f5256 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -376,3 +376,4 @@
 435	n32	clone3				__sys_clone3
 437	n32	openat2				sys_openat2
 438	n32	pidfd_getfd			sys_pidfd_getfd
+439	n32	faccessat2			sys_faccessat2
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index c0b9d802dbf6..da8c76394e17 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -352,3 +352,4 @@
 435	n64	clone3				__sys_clone3
 437	n64	openat2				sys_openat2
 438	n64	pidfd_getfd			sys_pidfd_getfd
+439	n64	faccessat2			sys_faccessat2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index ac586774c980..13280625d312 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -425,3 +425,4 @@
 435	o32	clone3				__sys_clone3
 437	o32	openat2				sys_openat2
 438	o32	pidfd_getfd			sys_pidfd_getfd
+439	o32	faccessat2			sys_faccessat2
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index 37e9413a393d..caa01457dce6 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -18,12 +18,82 @@
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/export.h>
+#include <linux/cpufreq.h>
+#include <linux/delay.h>
 
 #include <asm/cpu-features.h>
 #include <asm/cpu-type.h>
 #include <asm/div64.h>
 #include <asm/time.h>
 
+#ifdef CONFIG_CPU_FREQ
+
+static DEFINE_PER_CPU(unsigned long, pcp_lpj_ref);
+static DEFINE_PER_CPU(unsigned long, pcp_lpj_ref_freq);
+static unsigned long glb_lpj_ref;
+static unsigned long glb_lpj_ref_freq;
+
+static int cpufreq_callback(struct notifier_block *nb,
+			    unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	struct cpumask *cpus = freq->policy->cpus;
+	unsigned long lpj;
+	int cpu;
+
+	/*
+	 * Skip lpj numbers adjustment if the CPU-freq transition is safe for
+	 * the loops delay. (Is this possible?)
+	 */
+	if (freq->flags & CPUFREQ_CONST_LOOPS)
+		return NOTIFY_OK;
+
+	/* Save the initial values of the lpjes for future scaling. */
+	if (!glb_lpj_ref) {
+		glb_lpj_ref = boot_cpu_data.udelay_val;
+		glb_lpj_ref_freq = freq->old;
+
+		for_each_online_cpu(cpu) {
+			per_cpu(pcp_lpj_ref, cpu) =
+				cpu_data[cpu].udelay_val;
+			per_cpu(pcp_lpj_ref_freq, cpu) = freq->old;
+		}
+	}
+
+	/*
+	 * Adjust global lpj variable and per-CPU udelay_val number in
+	 * accordance with the new CPU frequency.
+	 */
+	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
+	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
+		loops_per_jiffy = cpufreq_scale(glb_lpj_ref,
+						glb_lpj_ref_freq,
+						freq->new);
+
+		for_each_cpu(cpu, cpus) {
+			lpj = cpufreq_scale(per_cpu(pcp_lpj_ref, cpu),
+					    per_cpu(pcp_lpj_ref_freq, cpu),
+					    freq->new);
+			cpu_data[cpu].udelay_val = (unsigned int)lpj;
+		}
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpufreq_notifier = {
+	.notifier_call  = cpufreq_callback,
+};
+
+static int __init register_cpufreq_notifier(void)
+{
+	return cpufreq_register_notifier(&cpufreq_notifier,
+					 CPUFREQ_TRANSITION_NOTIFIER);
+}
+core_initcall(register_cpufreq_notifier);
+
+#endif /* CONFIG_CPU_FREQ */
+
 /*
  * forward reference
  */
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 31968cbd6464..22f805a73921 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -71,6 +71,8 @@
 #include <asm/tlbex.h>
 #include <asm/uasm.h>
 
+#include <asm/mach-loongson64/cpucfg-emul.h>
+
 extern void check_wait(void);
 extern asmlinkage void rollback_handle_int(void);
 extern asmlinkage void handle_int(void);
@@ -693,6 +695,48 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
 	return -1;			/* Must be something else ... */
 }
 
+/*
+ * Loongson-3 CSR instructions emulation
+ */
+
+#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION
+
+#define LWC2             0xc8000000
+#define RS               BASE
+#define CSR_OPCODE2      0x00000118
+#define CSR_OPCODE2_MASK 0x000007ff
+#define CSR_FUNC_MASK    RT
+#define CSR_FUNC_CPUCFG  0x8
+
+static int simulate_loongson3_cpucfg(struct pt_regs *regs,
+				     unsigned int opcode)
+{
+	int op = opcode & OPCODE;
+	int op2 = opcode & CSR_OPCODE2_MASK;
+	int csr_func = (opcode & CSR_FUNC_MASK) >> 16;
+
+	if (op == LWC2 && op2 == CSR_OPCODE2 && csr_func == CSR_FUNC_CPUCFG) {
+		int rd = (opcode & RD) >> 11;
+		int rs = (opcode & RS) >> 21;
+		__u64 sel = regs->regs[rs];
+
+		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+
+		/* Do not emulate on unsupported core models. */
+		if (!loongson3_cpucfg_emulation_enabled(&current_cpu_data))
+			return -1;
+
+		regs->regs[rd] = loongson3_cpucfg_read_synthesized(
+			&current_cpu_data, sel);
+
+		return 0;
+	}
+
+	/* Not ours.  */
+	return -1;
+}
+#endif /* CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION */
+
 asmlinkage void do_ov(struct pt_regs *regs)
 {
 	enum ctx_state prev_state;
@@ -1166,6 +1210,11 @@ no_r2_instr:
 
 		if (status < 0)
 			status = simulate_fp(regs, opcode, old_epc, old31);
+
+#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION
+		if (status < 0)
+			status = simulate_loongson3_cpucfg(regs, opcode);
+#endif
 	} else if (cpu_has_mmips) {
 		unsigned short mmop[2] = { 0 };
 
@@ -1401,8 +1450,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
 			force_sig(SIGILL);
 			break;
 		}
-		/* Fall through.  */
-
+		fallthrough;
 	case 1: {
 		void __user *fault_addr;
 		unsigned long fcr31;
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index ca6fc4762d97..0adce604fa44 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -89,12 +89,10 @@
 #include <asm/fpu.h>
 #include <asm/fpu_emulator.h>
 #include <asm/inst.h>
+#include <asm/unaligned-emul.h>
 #include <asm/mmu_context.h>
 #include <linux/uaccess.h>
 
-#define STR(x)	__STR(x)
-#define __STR(x)  #x
-
 enum {
 	UNALIGNED_ACTION_QUIET,
 	UNALIGNED_ACTION_SIGNAL,
@@ -108,778 +106,6 @@ static u32 unaligned_action;
 #endif
 extern void show_registers(struct pt_regs *regs);
 
-#ifdef __BIG_ENDIAN
-#define     _LoadHW(addr, value, res, type)  \
-do {                                                        \
-		__asm__ __volatile__ (".set\tnoat\n"        \
-			"1:\t"type##_lb("%0", "0(%2)")"\n"  \
-			"2:\t"type##_lbu("$1", "1(%2)")"\n\t"\
-			"sll\t%0, 0x8\n\t"                  \
-			"or\t%0, $1\n\t"                    \
-			"li\t%1, 0\n"                       \
-			"3:\t.set\tat\n\t"                  \
-			".insn\n\t"                         \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%1, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
-#define     _LoadW(addr, value, res, type)   \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			"1:\t"type##_lwl("%0", "(%2)")"\n"   \
-			"2:\t"type##_lwr("%0", "3(%2)")"\n\t"\
-			"li\t%1, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%1, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
-/* For CPUs without lwl instruction */
-#define     _LoadW(addr, value, res, type) \
-do {                                                        \
-		__asm__ __volatile__ (			    \
-			".set\tpush\n"			    \
-			".set\tnoat\n\t"		    \
-			"1:"type##_lb("%0", "0(%2)")"\n\t"  \
-			"2:"type##_lbu("$1", "1(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"3:"type##_lbu("$1", "2(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"4:"type##_lbu("$1", "3(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"li\t%1, 0\n"			    \
-			".set\tpop\n"			    \
-			"10:\n\t"			    \
-			".insn\n\t"			    \
-			".section\t.fixup,\"ax\"\n\t"	    \
-			"11:\tli\t%1, %3\n\t"		    \
-			"j\t10b\n\t"			    \
-			".previous\n\t"			    \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 11b\n\t"		    \
-			STR(PTR)"\t2b, 11b\n\t"		    \
-			STR(PTR)"\t3b, 11b\n\t"		    \
-			STR(PTR)"\t4b, 11b\n\t"		    \
-			".previous"			    \
-			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
-
-#define     _LoadHWU(addr, value, res, type) \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			".set\tnoat\n"                      \
-			"1:\t"type##_lbu("%0", "0(%2)")"\n" \
-			"2:\t"type##_lbu("$1", "1(%2)")"\n\t"\
-			"sll\t%0, 0x8\n\t"                  \
-			"or\t%0, $1\n\t"                    \
-			"li\t%1, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			".set\tat\n\t"                      \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%1, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
-#define     _LoadWU(addr, value, res, type)  \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			"1:\t"type##_lwl("%0", "(%2)")"\n"  \
-			"2:\t"type##_lwr("%0", "3(%2)")"\n\t"\
-			"dsll\t%0, %0, 32\n\t"              \
-			"dsrl\t%0, %0, 32\n\t"              \
-			"li\t%1, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			"\t.section\t.fixup,\"ax\"\n\t"     \
-			"4:\tli\t%1, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#define     _LoadDW(addr, value, res)  \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			"1:\tldl\t%0, (%2)\n"               \
-			"2:\tldr\t%0, 7(%2)\n\t"            \
-			"li\t%1, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			"\t.section\t.fixup,\"ax\"\n\t"     \
-			"4:\tli\t%1, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
-/* For CPUs without lwl and ldl instructions */
-#define	    _LoadWU(addr, value, res, type) \
-do {                                                        \
-		__asm__ __volatile__ (			    \
-			".set\tpush\n\t"		    \
-			".set\tnoat\n\t"		    \
-			"1:"type##_lbu("%0", "0(%2)")"\n\t" \
-			"2:"type##_lbu("$1", "1(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"3:"type##_lbu("$1", "2(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"4:"type##_lbu("$1", "3(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"li\t%1, 0\n"			    \
-			".set\tpop\n"			    \
-			"10:\n\t"			    \
-			".insn\n\t"			    \
-			".section\t.fixup,\"ax\"\n\t"	    \
-			"11:\tli\t%1, %3\n\t"		    \
-			"j\t10b\n\t"			    \
-			".previous\n\t"			    \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 11b\n\t"		    \
-			STR(PTR)"\t2b, 11b\n\t"		    \
-			STR(PTR)"\t3b, 11b\n\t"		    \
-			STR(PTR)"\t4b, 11b\n\t"		    \
-			".previous"			    \
-			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#define     _LoadDW(addr, value, res)  \
-do {                                                        \
-		__asm__ __volatile__ (			    \
-			".set\tpush\n\t"		    \
-			".set\tnoat\n\t"		    \
-			"1:lb\t%0, 0(%2)\n\t"    	    \
-			"2:lbu\t $1, 1(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"3:lbu\t$1, 2(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"4:lbu\t$1, 3(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"5:lbu\t$1, 4(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"6:lbu\t$1, 5(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"7:lbu\t$1, 6(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"8:lbu\t$1, 7(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"li\t%1, 0\n"			    \
-			".set\tpop\n\t"			    \
-			"10:\n\t"			    \
-			".insn\n\t"			    \
-			".section\t.fixup,\"ax\"\n\t"	    \
-			"11:\tli\t%1, %3\n\t"		    \
-			"j\t10b\n\t"			    \
-			".previous\n\t"			    \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 11b\n\t"		    \
-			STR(PTR)"\t2b, 11b\n\t"		    \
-			STR(PTR)"\t3b, 11b\n\t"		    \
-			STR(PTR)"\t4b, 11b\n\t"		    \
-			STR(PTR)"\t5b, 11b\n\t"		    \
-			STR(PTR)"\t6b, 11b\n\t"		    \
-			STR(PTR)"\t7b, 11b\n\t"		    \
-			STR(PTR)"\t8b, 11b\n\t"		    \
-			".previous"			    \
-			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
-
-
-#define     _StoreHW(addr, value, res, type) \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			".set\tnoat\n"                      \
-			"1:\t"type##_sb("%1", "1(%2)")"\n"  \
-			"srl\t$1, %1, 0x8\n"                \
-			"2:\t"type##_sb("$1", "0(%2)")"\n"  \
-			".set\tat\n\t"                      \
-			"li\t%0, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%0, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=r" (res)                        \
-			: "r" (value), "r" (addr), "i" (-EFAULT));\
-} while(0)
-
-#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
-#define     _StoreW(addr, value, res, type)  \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			"1:\t"type##_swl("%1", "(%2)")"\n"  \
-			"2:\t"type##_swr("%1", "3(%2)")"\n\t"\
-			"li\t%0, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%0, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-		: "=r" (res)                                \
-		: "r" (value), "r" (addr), "i" (-EFAULT));  \
-} while(0)
-
-#define     _StoreDW(addr, value, res) \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			"1:\tsdl\t%1,(%2)\n"                \
-			"2:\tsdr\t%1, 7(%2)\n\t"            \
-			"li\t%0, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%0, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-		: "=r" (res)                                \
-		: "r" (value), "r" (addr), "i" (-EFAULT));  \
-} while(0)
-
-#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
-#define     _StoreW(addr, value, res, type)  \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			".set\tpush\n\t"		    \
-			".set\tnoat\n\t"		    \
-			"1:"type##_sb("%1", "3(%2)")"\n\t"  \
-			"srl\t$1, %1, 0x8\n\t"		    \
-			"2:"type##_sb("$1", "2(%2)")"\n\t"  \
-			"srl\t$1, $1,  0x8\n\t"		    \
-			"3:"type##_sb("$1", "1(%2)")"\n\t"  \
-			"srl\t$1, $1, 0x8\n\t"		    \
-			"4:"type##_sb("$1", "0(%2)")"\n\t"  \
-			".set\tpop\n\t"			    \
-			"li\t%0, 0\n"			    \
-			"10:\n\t"			    \
-			".insn\n\t"			    \
-			".section\t.fixup,\"ax\"\n\t"	    \
-			"11:\tli\t%0, %3\n\t"		    \
-			"j\t10b\n\t"			    \
-			".previous\n\t"			    \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 11b\n\t"		    \
-			STR(PTR)"\t2b, 11b\n\t"		    \
-			STR(PTR)"\t3b, 11b\n\t"		    \
-			STR(PTR)"\t4b, 11b\n\t"		    \
-			".previous"			    \
-		: "=&r" (res)			    	    \
-		: "r" (value), "r" (addr), "i" (-EFAULT)    \
-		: "memory");                                \
-} while(0)
-
-#define     _StoreDW(addr, value, res) \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			".set\tpush\n\t"		    \
-			".set\tnoat\n\t"		    \
-			"1:sb\t%1, 7(%2)\n\t"    	    \
-			"dsrl\t$1, %1, 0x8\n\t"		    \
-			"2:sb\t$1, 6(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"3:sb\t$1, 5(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"4:sb\t$1, 4(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"5:sb\t$1, 3(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"6:sb\t$1, 2(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"7:sb\t$1, 1(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"8:sb\t$1, 0(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			".set\tpop\n\t"			    \
-			"li\t%0, 0\n"			    \
-			"10:\n\t"			    \
-			".insn\n\t"			    \
-			".section\t.fixup,\"ax\"\n\t"	    \
-			"11:\tli\t%0, %3\n\t"		    \
-			"j\t10b\n\t"			    \
-			".previous\n\t"			    \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 11b\n\t"		    \
-			STR(PTR)"\t2b, 11b\n\t"		    \
-			STR(PTR)"\t3b, 11b\n\t"		    \
-			STR(PTR)"\t4b, 11b\n\t"		    \
-			STR(PTR)"\t5b, 11b\n\t"		    \
-			STR(PTR)"\t6b, 11b\n\t"		    \
-			STR(PTR)"\t7b, 11b\n\t"		    \
-			STR(PTR)"\t8b, 11b\n\t"		    \
-			".previous"			    \
-		: "=&r" (res)			    	    \
-		: "r" (value), "r" (addr), "i" (-EFAULT)    \
-		: "memory");                                \
-} while(0)
-
-#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
-
-#else /* __BIG_ENDIAN */
-
-#define     _LoadHW(addr, value, res, type)  \
-do {                                                        \
-		__asm__ __volatile__ (".set\tnoat\n"        \
-			"1:\t"type##_lb("%0", "1(%2)")"\n"  \
-			"2:\t"type##_lbu("$1", "0(%2)")"\n\t"\
-			"sll\t%0, 0x8\n\t"                  \
-			"or\t%0, $1\n\t"                    \
-			"li\t%1, 0\n"                       \
-			"3:\t.set\tat\n\t"                  \
-			".insn\n\t"                         \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%1, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
-#define     _LoadW(addr, value, res, type)   \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			"1:\t"type##_lwl("%0", "3(%2)")"\n" \
-			"2:\t"type##_lwr("%0", "(%2)")"\n\t"\
-			"li\t%1, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%1, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
-/* For CPUs without lwl instruction */
-#define     _LoadW(addr, value, res, type) \
-do {                                                        \
-		__asm__ __volatile__ (			    \
-			".set\tpush\n"			    \
-			".set\tnoat\n\t"		    \
-			"1:"type##_lb("%0", "3(%2)")"\n\t"  \
-			"2:"type##_lbu("$1", "2(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"3:"type##_lbu("$1", "1(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"4:"type##_lbu("$1", "0(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"li\t%1, 0\n"			    \
-			".set\tpop\n"			    \
-			"10:\n\t"			    \
-			".insn\n\t"			    \
-			".section\t.fixup,\"ax\"\n\t"	    \
-			"11:\tli\t%1, %3\n\t"		    \
-			"j\t10b\n\t"			    \
-			".previous\n\t"			    \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 11b\n\t"		    \
-			STR(PTR)"\t2b, 11b\n\t"		    \
-			STR(PTR)"\t3b, 11b\n\t"		    \
-			STR(PTR)"\t4b, 11b\n\t"		    \
-			".previous"			    \
-			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
-
-
-#define     _LoadHWU(addr, value, res, type) \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			".set\tnoat\n"                      \
-			"1:\t"type##_lbu("%0", "1(%2)")"\n" \
-			"2:\t"type##_lbu("$1", "0(%2)")"\n\t"\
-			"sll\t%0, 0x8\n\t"                  \
-			"or\t%0, $1\n\t"                    \
-			"li\t%1, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			".set\tat\n\t"                      \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%1, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
-#define     _LoadWU(addr, value, res, type)  \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			"1:\t"type##_lwl("%0", "3(%2)")"\n" \
-			"2:\t"type##_lwr("%0", "(%2)")"\n\t"\
-			"dsll\t%0, %0, 32\n\t"              \
-			"dsrl\t%0, %0, 32\n\t"              \
-			"li\t%1, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			"\t.section\t.fixup,\"ax\"\n\t"     \
-			"4:\tli\t%1, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#define     _LoadDW(addr, value, res)  \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			"1:\tldl\t%0, 7(%2)\n"              \
-			"2:\tldr\t%0, (%2)\n\t"             \
-			"li\t%1, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			"\t.section\t.fixup,\"ax\"\n\t"     \
-			"4:\tli\t%1, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=&r" (value), "=r" (res)         \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
-/* For CPUs without lwl and ldl instructions */
-#define	    _LoadWU(addr, value, res, type) \
-do {                                                        \
-		__asm__ __volatile__ (			    \
-			".set\tpush\n\t"		    \
-			".set\tnoat\n\t"		    \
-			"1:"type##_lbu("%0", "3(%2)")"\n\t" \
-			"2:"type##_lbu("$1", "2(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"3:"type##_lbu("$1", "1(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"4:"type##_lbu("$1", "0(%2)")"\n\t" \
-			"sll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"li\t%1, 0\n"			    \
-			".set\tpop\n"			    \
-			"10:\n\t"			    \
-			".insn\n\t"			    \
-			".section\t.fixup,\"ax\"\n\t"	    \
-			"11:\tli\t%1, %3\n\t"		    \
-			"j\t10b\n\t"			    \
-			".previous\n\t"			    \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 11b\n\t"		    \
-			STR(PTR)"\t2b, 11b\n\t"		    \
-			STR(PTR)"\t3b, 11b\n\t"		    \
-			STR(PTR)"\t4b, 11b\n\t"		    \
-			".previous"			    \
-			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-
-#define     _LoadDW(addr, value, res)  \
-do {                                                        \
-		__asm__ __volatile__ (			    \
-			".set\tpush\n\t"		    \
-			".set\tnoat\n\t"		    \
-			"1:lb\t%0, 7(%2)\n\t"    	    \
-			"2:lbu\t$1, 6(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"3:lbu\t$1, 5(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"4:lbu\t$1, 4(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"5:lbu\t$1, 3(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"6:lbu\t$1, 2(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"7:lbu\t$1, 1(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"8:lbu\t$1, 0(%2)\n\t"   	    \
-			"dsll\t%0, 0x8\n\t"		    \
-			"or\t%0, $1\n\t"		    \
-			"li\t%1, 0\n"			    \
-			".set\tpop\n\t"			    \
-			"10:\n\t"			    \
-			".insn\n\t"			    \
-			".section\t.fixup,\"ax\"\n\t"	    \
-			"11:\tli\t%1, %3\n\t"		    \
-			"j\t10b\n\t"			    \
-			".previous\n\t"			    \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 11b\n\t"		    \
-			STR(PTR)"\t2b, 11b\n\t"		    \
-			STR(PTR)"\t3b, 11b\n\t"		    \
-			STR(PTR)"\t4b, 11b\n\t"		    \
-			STR(PTR)"\t5b, 11b\n\t"		    \
-			STR(PTR)"\t6b, 11b\n\t"		    \
-			STR(PTR)"\t7b, 11b\n\t"		    \
-			STR(PTR)"\t8b, 11b\n\t"		    \
-			".previous"			    \
-			: "=&r" (value), "=r" (res)	    \
-			: "r" (addr), "i" (-EFAULT));       \
-} while(0)
-#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
-
-#define     _StoreHW(addr, value, res, type) \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			".set\tnoat\n"                      \
-			"1:\t"type##_sb("%1", "0(%2)")"\n"  \
-			"srl\t$1,%1, 0x8\n"                 \
-			"2:\t"type##_sb("$1", "1(%2)")"\n"  \
-			".set\tat\n\t"                      \
-			"li\t%0, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%0, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-			: "=r" (res)                        \
-			: "r" (value), "r" (addr), "i" (-EFAULT));\
-} while(0)
-
-#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
-#define     _StoreW(addr, value, res, type)  \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			"1:\t"type##_swl("%1", "3(%2)")"\n" \
-			"2:\t"type##_swr("%1", "(%2)")"\n\t"\
-			"li\t%0, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%0, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-		: "=r" (res)                                \
-		: "r" (value), "r" (addr), "i" (-EFAULT));  \
-} while(0)
-
-#define     _StoreDW(addr, value, res) \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			"1:\tsdl\t%1, 7(%2)\n"              \
-			"2:\tsdr\t%1, (%2)\n\t"             \
-			"li\t%0, 0\n"                       \
-			"3:\n\t"                            \
-			".insn\n\t"                         \
-			".section\t.fixup,\"ax\"\n\t"       \
-			"4:\tli\t%0, %3\n\t"                \
-			"j\t3b\n\t"                         \
-			".previous\n\t"                     \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 4b\n\t"              \
-			STR(PTR)"\t2b, 4b\n\t"              \
-			".previous"                         \
-		: "=r" (res)                                \
-		: "r" (value), "r" (addr), "i" (-EFAULT));  \
-} while(0)
-
-#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
-/* For CPUs without swl and sdl instructions */
-#define     _StoreW(addr, value, res, type)  \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			".set\tpush\n\t"		    \
-			".set\tnoat\n\t"		    \
-			"1:"type##_sb("%1", "0(%2)")"\n\t"  \
-			"srl\t$1, %1, 0x8\n\t"		    \
-			"2:"type##_sb("$1", "1(%2)")"\n\t"  \
-			"srl\t$1, $1,  0x8\n\t"		    \
-			"3:"type##_sb("$1", "2(%2)")"\n\t"  \
-			"srl\t$1, $1, 0x8\n\t"		    \
-			"4:"type##_sb("$1", "3(%2)")"\n\t"  \
-			".set\tpop\n\t"			    \
-			"li\t%0, 0\n"			    \
-			"10:\n\t"			    \
-			".insn\n\t"			    \
-			".section\t.fixup,\"ax\"\n\t"	    \
-			"11:\tli\t%0, %3\n\t"		    \
-			"j\t10b\n\t"			    \
-			".previous\n\t"			    \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 11b\n\t"		    \
-			STR(PTR)"\t2b, 11b\n\t"		    \
-			STR(PTR)"\t3b, 11b\n\t"		    \
-			STR(PTR)"\t4b, 11b\n\t"		    \
-			".previous"			    \
-		: "=&r" (res)			    	    \
-		: "r" (value), "r" (addr), "i" (-EFAULT)    \
-		: "memory");                                \
-} while(0)
-
-#define     _StoreDW(addr, value, res) \
-do {                                                        \
-		__asm__ __volatile__ (                      \
-			".set\tpush\n\t"		    \
-			".set\tnoat\n\t"		    \
-			"1:sb\t%1, 0(%2)\n\t"    	    \
-			"dsrl\t$1, %1, 0x8\n\t"		    \
-			"2:sb\t$1, 1(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"3:sb\t$1, 2(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"4:sb\t$1, 3(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"5:sb\t$1, 4(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"6:sb\t$1, 5(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"7:sb\t$1, 6(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			"8:sb\t$1, 7(%2)\n\t"    	    \
-			"dsrl\t$1, $1, 0x8\n\t"		    \
-			".set\tpop\n\t"			    \
-			"li\t%0, 0\n"			    \
-			"10:\n\t"			    \
-			".insn\n\t"			    \
-			".section\t.fixup,\"ax\"\n\t"	    \
-			"11:\tli\t%0, %3\n\t"		    \
-			"j\t10b\n\t"			    \
-			".previous\n\t"			    \
-			".section\t__ex_table,\"a\"\n\t"    \
-			STR(PTR)"\t1b, 11b\n\t"		    \
-			STR(PTR)"\t2b, 11b\n\t"		    \
-			STR(PTR)"\t3b, 11b\n\t"		    \
-			STR(PTR)"\t4b, 11b\n\t"		    \
-			STR(PTR)"\t5b, 11b\n\t"		    \
-			STR(PTR)"\t6b, 11b\n\t"		    \
-			STR(PTR)"\t7b, 11b\n\t"		    \
-			STR(PTR)"\t8b, 11b\n\t"		    \
-			".previous"			    \
-		: "=&r" (res)			    	    \
-		: "r" (value), "r" (addr), "i" (-EFAULT)    \
-		: "memory");                                \
-} while(0)
-
-#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
-#endif
-
-#define LoadHWU(addr, value, res)	_LoadHWU(addr, value, res, kernel)
-#define LoadHWUE(addr, value, res)	_LoadHWU(addr, value, res, user)
-#define LoadWU(addr, value, res)	_LoadWU(addr, value, res, kernel)
-#define LoadWUE(addr, value, res)	_LoadWU(addr, value, res, user)
-#define LoadHW(addr, value, res)	_LoadHW(addr, value, res, kernel)
-#define LoadHWE(addr, value, res)	_LoadHW(addr, value, res, user)
-#define LoadW(addr, value, res)		_LoadW(addr, value, res, kernel)
-#define LoadWE(addr, value, res)	_LoadW(addr, value, res, user)
-#define LoadDW(addr, value, res)	_LoadDW(addr, value, res)
-
-#define StoreHW(addr, value, res)	_StoreHW(addr, value, res, kernel)
-#define StoreHWE(addr, value, res)	_StoreHW(addr, value, res, user)
-#define StoreW(addr, value, res)	_StoreW(addr, value, res, kernel)
-#define StoreWE(addr, value, res)	_StoreW(addr, value, res, user)
-#define StoreDW(addr, value, res)	_StoreDW(addr, value, res)
-
 static void emulate_load_store_insn(struct pt_regs *regs,
 	void __user *addr, unsigned int __user *pc)
 {
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index a5f00ec73ea6..f185a85a27c1 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -55,7 +55,7 @@ SECTIONS
 	/* . = 0xa800000000300000; */
 	. = 0xffffffff80300000;
 #endif
-	. = VMLINUX_LOAD_ADDRESS;
+	. = LINKER_LOAD_ADDRESS;
 	/* read-only */
 	_text = .;	/* Text and read-only data */
 	.text : {
diff --git a/arch/mips/kernel/watch.c b/arch/mips/kernel/watch.c
index ba73b4077668..c9263b95cb2e 100644
--- a/arch/mips/kernel/watch.c
+++ b/arch/mips/kernel/watch.c
@@ -27,15 +27,15 @@ void mips_install_watch_registers(struct task_struct *t)
 	case 4:
 		write_c0_watchlo3(watches->watchlo[3]);
 		write_c0_watchhi3(watchhi | watches->watchhi[3]);
-		/* fall through */
+		fallthrough;
 	case 3:
 		write_c0_watchlo2(watches->watchlo[2]);
 		write_c0_watchhi2(watchhi | watches->watchhi[2]);
-		/* fall through */
+		fallthrough;
 	case 2:
 		write_c0_watchlo1(watches->watchlo[1]);
 		write_c0_watchhi1(watchhi | watches->watchhi[1]);
-		/* fall through */
+		fallthrough;
 	case 1:
 		write_c0_watchlo0(watches->watchlo[0]);
 		write_c0_watchhi0(watchhi | watches->watchhi[0]);
@@ -58,13 +58,13 @@ void mips_read_watch_registers(void)
 		BUG();
 	case 4:
 		watches->watchhi[3] = (read_c0_watchhi3() & watchhi_mask);
-		/* fall through */
+		fallthrough;
 	case 3:
 		watches->watchhi[2] = (read_c0_watchhi2() & watchhi_mask);
-		/* fall through */
+		fallthrough;
 	case 2:
 		watches->watchhi[1] = (read_c0_watchhi1() & watchhi_mask);
-		/* fall through */
+		fallthrough;
 	case 1:
 		watches->watchhi[0] = (read_c0_watchhi0() & watchhi_mask);
 	}
@@ -91,25 +91,25 @@ void mips_clear_watch_registers(void)
 		BUG();
 	case 8:
 		write_c0_watchlo7(0);
-		/* fall through */
+		fallthrough;
 	case 7:
 		write_c0_watchlo6(0);
-		/* fall through */
+		fallthrough;
 	case 6:
 		write_c0_watchlo5(0);
-		/* fall through */
+		fallthrough;
 	case 5:
 		write_c0_watchlo4(0);
-		/* fall through */
+		fallthrough;
 	case 4:
 		write_c0_watchlo3(0);
-		/* fall through */
+		fallthrough;
 	case 3:
 		write_c0_watchlo2(0);
-		/* fall through */
+		fallthrough;
 	case 2:
 		write_c0_watchlo1(0);
-		/* fall through */
+		fallthrough;
 	case 1:
 		write_c0_watchlo0(0);
 	}
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 754094b40a75..7ccf9b096783 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -64,7 +64,7 @@ static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc,
 		switch (insn.r_format.func) {
 		case jalr_op:
 			arch->gprs[insn.r_format.rd] = epc + 8;
-			/* Fall through */
+			fallthrough;
 		case jr_op:
 			nextpc = arch->gprs[insn.r_format.rs];
 			break;
@@ -140,7 +140,7 @@ static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc,
 		/* These are unconditional and in j_format. */
 	case jal_op:
 		arch->gprs[31] = instpc + 8;
-		/* fall through */
+		fallthrough;
 	case j_op:
 		epc += 4;
 		epc >>= 28;
@@ -1724,14 +1724,14 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
 
 	case lhu_op:
 		vcpu->mmio_needed = 1;	/* unsigned */
-		/* fall through */
+		fallthrough;
 	case lh_op:
 		run->mmio.len = 2;
 		break;
 
 	case lbu_op:
 		vcpu->mmio_needed = 1;	/* unsigned */
-		/* fall through */
+		fallthrough;
 	case lb_op:
 		run->mmio.len = 1;
 		break;
@@ -1790,7 +1790,7 @@ static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long),
 			return EMULATE_EXCEPT;
 		default:
 			break;
-		};
+		}
 	}
 }
 
@@ -1965,7 +1965,7 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
 			break;
 		default:
 			goto unknown;
-		};
+		}
 		break;
 unknown:
 #endif
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 8f05dd0a0f4e..3b0148c99c0d 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -39,40 +39,41 @@
 #define VECTORSPACING 0x100	/* for EI/VI mode */
 #endif
 
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x)
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ "wait",	  VCPU_STAT(wait_exits),	 KVM_STAT_VCPU },
-	{ "cache",	  VCPU_STAT(cache_exits),	 KVM_STAT_VCPU },
-	{ "signal",	  VCPU_STAT(signal_exits),	 KVM_STAT_VCPU },
-	{ "interrupt",	  VCPU_STAT(int_exits),		 KVM_STAT_VCPU },
-	{ "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
-	{ "tlbmod",	  VCPU_STAT(tlbmod_exits),	 KVM_STAT_VCPU },
-	{ "tlbmiss_ld",	  VCPU_STAT(tlbmiss_ld_exits),	 KVM_STAT_VCPU },
-	{ "tlbmiss_st",	  VCPU_STAT(tlbmiss_st_exits),	 KVM_STAT_VCPU },
-	{ "addrerr_st",	  VCPU_STAT(addrerr_st_exits),	 KVM_STAT_VCPU },
-	{ "addrerr_ld",	  VCPU_STAT(addrerr_ld_exits),	 KVM_STAT_VCPU },
-	{ "syscall",	  VCPU_STAT(syscall_exits),	 KVM_STAT_VCPU },
-	{ "resvd_inst",	  VCPU_STAT(resvd_inst_exits),	 KVM_STAT_VCPU },
-	{ "break_inst",	  VCPU_STAT(break_inst_exits),	 KVM_STAT_VCPU },
-	{ "trap_inst",	  VCPU_STAT(trap_inst_exits),	 KVM_STAT_VCPU },
-	{ "msa_fpe",	  VCPU_STAT(msa_fpe_exits),	 KVM_STAT_VCPU },
-	{ "fpe",	  VCPU_STAT(fpe_exits),		 KVM_STAT_VCPU },
-	{ "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU },
-	{ "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
+	VCPU_STAT("wait", wait_exits),
+	VCPU_STAT("cache", cache_exits),
+	VCPU_STAT("signal", signal_exits),
+	VCPU_STAT("interrupt", int_exits),
+	VCPU_STAT("cop_unusable", cop_unusable_exits),
+	VCPU_STAT("tlbmod", tlbmod_exits),
+	VCPU_STAT("tlbmiss_ld", tlbmiss_ld_exits),
+	VCPU_STAT("tlbmiss_st", tlbmiss_st_exits),
+	VCPU_STAT("addrerr_st", addrerr_st_exits),
+	VCPU_STAT("addrerr_ld", addrerr_ld_exits),
+	VCPU_STAT("syscall", syscall_exits),
+	VCPU_STAT("resvd_inst", resvd_inst_exits),
+	VCPU_STAT("break_inst", break_inst_exits),
+	VCPU_STAT("trap_inst", trap_inst_exits),
+	VCPU_STAT("msa_fpe", msa_fpe_exits),
+	VCPU_STAT("fpe", fpe_exits),
+	VCPU_STAT("msa_disabled", msa_disabled_exits),
+	VCPU_STAT("flush_dcache", flush_dcache_exits),
 #ifdef CONFIG_KVM_MIPS_VZ
-	{ "vz_gpsi",	  VCPU_STAT(vz_gpsi_exits),	 KVM_STAT_VCPU },
-	{ "vz_gsfc",	  VCPU_STAT(vz_gsfc_exits),	 KVM_STAT_VCPU },
-	{ "vz_hc",	  VCPU_STAT(vz_hc_exits),	 KVM_STAT_VCPU },
-	{ "vz_grr",	  VCPU_STAT(vz_grr_exits),	 KVM_STAT_VCPU },
-	{ "vz_gva",	  VCPU_STAT(vz_gva_exits),	 KVM_STAT_VCPU },
-	{ "vz_ghfc",	  VCPU_STAT(vz_ghfc_exits),	 KVM_STAT_VCPU },
-	{ "vz_gpa",	  VCPU_STAT(vz_gpa_exits),	 KVM_STAT_VCPU },
-	{ "vz_resvd",	  VCPU_STAT(vz_resvd_exits),	 KVM_STAT_VCPU },
+	VCPU_STAT("vz_gpsi", vz_gpsi_exits),
+	VCPU_STAT("vz_gsfc", vz_gsfc_exits),
+	VCPU_STAT("vz_hc", vz_hc_exits),
+	VCPU_STAT("vz_grr", vz_grr_exits),
+	VCPU_STAT("vz_gva", vz_gva_exits),
+	VCPU_STAT("vz_ghfc", vz_ghfc_exits),
+	VCPU_STAT("vz_gpa", vz_gpa_exits),
+	VCPU_STAT("vz_resvd", vz_resvd_exits),
 #endif
-	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
-	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU },
-	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU },
-	{ "halt_wakeup",  VCPU_STAT(halt_wakeup),	 KVM_STAT_VCPU },
+	VCPU_STAT("halt_successful_poll", halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
 	{NULL}
 };
 
@@ -80,13 +81,13 @@ bool kvm_trace_guest_mode_change;
 
 int kvm_guest_mode_change_trace_reg(void)
 {
-	kvm_trace_guest_mode_change = 1;
+	kvm_trace_guest_mode_change = true;
 	return 0;
 }
 
 void kvm_guest_mode_change_trace_unreg(void)
 {
-	kvm_trace_guest_mode_change = 0;
+	kvm_trace_guest_mode_change = false;
 }
 
 /*
@@ -284,8 +285,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
 	kvm_mips_callbacks->queue_timer_int(vcpu);
 
 	vcpu->arch.wait = 0;
-	if (swq_has_sleeper(&vcpu->wq))
-		swake_up_one(&vcpu->wq);
+	rcuwait_wake_up(&vcpu->wait);
 
 	return kvm_mips_count_timeout(vcpu);
 }
@@ -439,8 +439,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	return -ENOIOCTLCMD;
 }
 
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
+	struct kvm_run *run = vcpu->run;
 	int r = -EINTR;
 
 	vcpu_load(vcpu);
@@ -511,8 +512,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 
 	dvcpu->arch.wait = 0;
 
-	if (swq_has_sleeper(&dvcpu->wq))
-		swake_up_one(&dvcpu->wq);
+	rcuwait_wake_up(&dvcpu->wait);
 
 	return 0;
 }
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 7cd92166a0b9..5d436c5216cc 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -469,7 +469,7 @@ void kvm_vz_local_flush_guesttlb_all(void)
 		cvmmemctl2 |= CVMMEMCTL2_INHIBITTS;
 		write_c0_cvmmemctl2(cvmmemctl2);
 		break;
-	};
+	}
 
 	/* Invalidate guest entries in guest TLB */
 	write_gc0_entrylo0(0);
@@ -486,7 +486,7 @@ void kvm_vz_local_flush_guesttlb_all(void)
 	if (cvmmemctl2) {
 		cvmmemctl2 &= ~CVMMEMCTL2_INHIBITTS;
 		write_c0_cvmmemctl2(cvmmemctl2);
-	};
+	}
 
 	write_gc0_index(old_index);
 	write_gc0_entryhi(old_entryhi);
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index dde20887a70d..51f51009a53f 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -1118,7 +1118,7 @@ static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst,
 		break;
 	default:
 		break;
-	};
+	}
 
 	kvm_err("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
 		curr_pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base],
@@ -1183,7 +1183,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
 				trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR,
 					      KVM_TRACE_HWR(rd, sel), 0);
 				goto unknown;
-			};
+			}
 
 			trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR,
 				      KVM_TRACE_HWR(rd, sel), arch->gprs[rt]);
@@ -1192,7 +1192,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
 			break;
 		default:
 			goto unknown;
-		};
+		}
 		break;
 unknown:
 
@@ -1946,7 +1946,7 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu,
 		default:
 			*v = (long)kvm_read_c0_guest_prid(cop0);
 			break;
-		};
+		}
 		break;
 	case KVM_REG_MIPS_CP0_EBASE:
 		*v = kvm_vz_read_gc0_ebase();
@@ -2185,7 +2185,7 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu,
 		default:
 			kvm_write_c0_guest_prid(cop0, v);
 			break;
-		};
+		}
 		break;
 	case KVM_REG_MIPS_CP0_EBASE:
 		kvm_vz_write_gc0_ebase(v);
@@ -2980,7 +2980,7 @@ static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu)
 	 */
 
 	/* PageGrain */
-	if (cpu_has_mips_r6)
+	if (cpu_has_mips_r5 || cpu_has_mips_r6)
 		kvm_write_sw_gc0_pagegrain(cop0, PG_RIE | PG_XIE | PG_IEC);
 	/* Wired */
 	if (cpu_has_mips_r6)
@@ -2988,7 +2988,7 @@ static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu)
 				       read_gc0_wired() & MIPSR6_WIRED_LIMIT);
 	/* Status */
 	kvm_write_sw_gc0_status(cop0, ST0_BEV | ST0_ERL);
-	if (cpu_has_mips_r6)
+	if (cpu_has_mips_r5 || cpu_has_mips_r6)
 		kvm_change_sw_gc0_status(cop0, ST0_FR, read_gc0_status());
 	/* IntCtl */
 	kvm_write_sw_gc0_intctl(cop0, read_gc0_intctl() &
@@ -3086,7 +3086,7 @@ static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu)
 	}
 
 	/* reset HTW registers */
-	if (cpu_guest_has_htw && cpu_has_mips_r6) {
+	if (cpu_guest_has_htw && (cpu_has_mips_r5 || cpu_has_mips_r6)) {
 		/* PWField */
 		kvm_write_sw_gc0_pwfield(cop0, 0x0c30c302);
 		/* PWSize */
diff --git a/arch/mips/lantiq/Platform b/arch/mips/lantiq/Platform
index b3ec49838fd7..0bc9c0fbd431 100644
--- a/arch/mips/lantiq/Platform
+++ b/arch/mips/lantiq/Platform
@@ -2,7 +2,6 @@
 # Lantiq
 #
 
-platform-$(CONFIG_LANTIQ)	+= lantiq/
 cflags-$(CONFIG_LANTIQ)		+= -I$(srctree)/arch/mips/include/asm/mach-lantiq
 load-$(CONFIG_LANTIQ)		= 0xffffffff80002000
 cflags-$(CONFIG_SOC_TYPE_XWAY)	+= -I$(srctree)/arch/mips/include/asm/mach-lantiq/xway
diff --git a/arch/mips/lasat/Kconfig b/arch/mips/lasat/Kconfig
deleted file mode 100644
index 11b89e94b835..000000000000
--- a/arch/mips/lasat/Kconfig
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-config PICVUE
-	tristate "PICVUE LCD display driver"
-	depends on LASAT
-
-config PICVUE_PROC
-	tristate "PICVUE LCD display driver /proc interface"
-	depends on PICVUE && PROC_FS
-
-config DS1603
-	bool "DS1603 RTC driver"
-	depends on LASAT
-
-config LASAT_SYSCTL
-	bool "LASAT sysctl interface"
-	depends on LASAT
diff --git a/arch/mips/lasat/Makefile b/arch/mips/lasat/Makefile
deleted file mode 100644
index 1789b227ef20..000000000000
--- a/arch/mips/lasat/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the LASAT specific kernel interface routines under Linux.
-#
-
-obj-y				+= reset.o setup.o prom.o lasat_board.o \
-				   at93c.o interrupt.o serial.o
-
-obj-$(CONFIG_LASAT_SYSCTL)	+= sysctl.o
-obj-$(CONFIG_DS1603)		+= ds1603.o
-obj-$(CONFIG_PICVUE)		+= picvue.o
-obj-$(CONFIG_PICVUE_PROC)	+= picvue_proc.o
-
-clean:
-	make -C image clean
diff --git a/arch/mips/lasat/Platform b/arch/mips/lasat/Platform
deleted file mode 100644
index 760252828bf1..000000000000
--- a/arch/mips/lasat/Platform
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# LASAT platforms
-#
-platform-$(CONFIG_LASAT)	+= lasat/
-cflags-$(CONFIG_LASAT)		+=					\
-		-I$(srctree)/arch/mips/include/asm/mach-lasat
-load-$(CONFIG_LASAT)		+= 0xffffffff80000000
diff --git a/arch/mips/lasat/at93c.c b/arch/mips/lasat/at93c.c
deleted file mode 100644
index f895fe94b937..000000000000
--- a/arch/mips/lasat/at93c.c
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Atmel AT93C46 serial eeprom driver
- *
- * Brian Murphy <brian.murphy@eicon.com>
- *
- */
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <asm/lasat/lasat.h>
-
-#include "at93c.h"
-
-#define AT93C_ADDR_SHIFT	7
-#define AT93C_ADDR_MAX		((1 << AT93C_ADDR_SHIFT) - 1)
-#define AT93C_RCMD		(0x6 << AT93C_ADDR_SHIFT)
-#define AT93C_WCMD		(0x5 << AT93C_ADDR_SHIFT)
-#define AT93C_WENCMD		0x260
-#define AT93C_WDSCMD		0x200
-
-struct at93c_defs *at93c;
-
-static void at93c_reg_write(u32 val)
-{
-	*at93c->reg = val;
-}
-
-static u32 at93c_reg_read(void)
-{
-	u32 tmp = *at93c->reg;
-	return tmp;
-}
-
-static u32 at93c_datareg_read(void)
-{
-	u32 tmp = *at93c->rdata_reg;
-	return tmp;
-}
-
-static void at93c_cycle_clk(u32 data)
-{
-	at93c_reg_write(data | at93c->clk);
-	lasat_ndelay(250);
-	at93c_reg_write(data & ~at93c->clk);
-	lasat_ndelay(250);
-}
-
-static void at93c_write_databit(u8 bit)
-{
-	u32 data = at93c_reg_read();
-	if (bit)
-		data |= 1 << at93c->wdata_shift;
-	else
-		data &= ~(1 << at93c->wdata_shift);
-
-	at93c_reg_write(data);
-	lasat_ndelay(100);
-	at93c_cycle_clk(data);
-}
-
-static unsigned int at93c_read_databit(void)
-{
-	u32 data;
-
-	at93c_cycle_clk(at93c_reg_read());
-	data = (at93c_datareg_read() >> at93c->rdata_shift) & 1;
-	return data;
-}
-
-static u8 at93c_read_byte(void)
-{
-	int i;
-	u8 data = 0;
-
-	for (i = 0; i <= 7; i++) {
-		data <<= 1;
-		data |= at93c_read_databit();
-	}
-	return data;
-}
-
-static void at93c_write_bits(u32 data, int size)
-{
-	int i;
-	int shift = size - 1;
-	u32 mask = (1 << shift);
-
-	for (i = 0; i < size; i++) {
-		at93c_write_databit((data & mask) >> shift);
-		data <<= 1;
-	}
-}
-
-static void at93c_init_op(void)
-{
-	at93c_reg_write((at93c_reg_read() | at93c->cs) &
-			~at93c->clk & ~(1 << at93c->rdata_shift));
-	lasat_ndelay(50);
-}
-
-static void at93c_end_op(void)
-{
-	at93c_reg_write(at93c_reg_read() & ~at93c->cs);
-	lasat_ndelay(250);
-}
-
-static void at93c_wait(void)
-{
-	at93c_init_op();
-	while (!at93c_read_databit())
-		;
-	at93c_end_op();
-};
-
-static void at93c_disable_wp(void)
-{
-	at93c_init_op();
-	at93c_write_bits(AT93C_WENCMD, 10);
-	at93c_end_op();
-}
-
-static void at93c_enable_wp(void)
-{
-	at93c_init_op();
-	at93c_write_bits(AT93C_WDSCMD, 10);
-	at93c_end_op();
-}
-
-u8 at93c_read(u8 addr)
-{
-	u8 byte;
-	at93c_init_op();
-	at93c_write_bits((addr & AT93C_ADDR_MAX)|AT93C_RCMD, 10);
-	byte = at93c_read_byte();
-	at93c_end_op();
-	return byte;
-}
-
-void at93c_write(u8 addr, u8 data)
-{
-	at93c_disable_wp();
-	at93c_init_op();
-	at93c_write_bits((addr & AT93C_ADDR_MAX)|AT93C_WCMD, 10);
-	at93c_write_bits(data, 8);
-	at93c_end_op();
-	at93c_wait();
-	at93c_enable_wp();
-}
diff --git a/arch/mips/lasat/at93c.h b/arch/mips/lasat/at93c.h
deleted file mode 100644
index 7a99a02d81d0..000000000000
--- a/arch/mips/lasat/at93c.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Atmel AT93C46 serial eeprom driver
- *
- * Brian Murphy <brian.murphy@eicon.com>
- *
- */
-
-extern struct at93c_defs {
-	volatile u32 *reg;
-	volatile u32 *rdata_reg;
-	int rdata_shift;
-	int wdata_shift;
-	u32 cs;
-	u32 clk;
-} *at93c;
-
-u8 at93c_read(u8 addr);
-void at93c_write(u8 addr, u8 data);
diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c
deleted file mode 100644
index e6ce39fefa78..000000000000
--- a/arch/mips/lasat/ds1603.c
+++ /dev/null
@@ -1,190 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Dallas Semiconductors 1603 RTC driver
- *
- * Brian Murphy <brian@murphy.dk>
- *
- */
-#include <linux/kernel.h>
-#include <asm/lasat/lasat.h>
-#include <linux/delay.h>
-#include <asm/lasat/ds1603.h>
-#include <asm/time.h>
-
-#include "ds1603.h"
-
-#define READ_TIME_CMD 0x81
-#define SET_TIME_CMD 0x80
-#define TRIMMER_SET_CMD 0xC0
-#define TRIMMER_VALUE_MASK 0x38
-#define TRIMMER_SHIFT 3
-
-struct ds_defs *ds1603;
-
-/* HW specific register functions */
-static void rtc_reg_write(unsigned long val)
-{
-	*ds1603->reg = val;
-}
-
-static unsigned long rtc_reg_read(void)
-{
-	unsigned long tmp = *ds1603->reg;
-	return tmp;
-}
-
-static unsigned long rtc_datareg_read(void)
-{
-	unsigned long tmp = *ds1603->data_reg;
-	return tmp;
-}
-
-static void rtc_nrst_high(void)
-{
-	rtc_reg_write(rtc_reg_read() | ds1603->rst);
-}
-
-static void rtc_nrst_low(void)
-{
-	rtc_reg_write(rtc_reg_read() & ~ds1603->rst);
-}
-
-static void rtc_cycle_clock(unsigned long data)
-{
-	data |= ds1603->clk;
-	rtc_reg_write(data);
-	lasat_ndelay(250);
-	if (ds1603->data_reversed)
-		data &= ~ds1603->data;
-	else
-		data |= ds1603->data;
-	data &= ~ds1603->clk;
-	rtc_reg_write(data);
-	lasat_ndelay(250 + ds1603->huge_delay);
-}
-
-static void rtc_write_databit(unsigned int bit)
-{
-	unsigned long data = rtc_reg_read();
-	if (ds1603->data_reversed)
-		bit = !bit;
-	if (bit)
-		data |= ds1603->data;
-	else
-		data &= ~ds1603->data;
-
-	rtc_reg_write(data);
-	lasat_ndelay(50 + ds1603->huge_delay);
-	rtc_cycle_clock(data);
-}
-
-static unsigned int rtc_read_databit(void)
-{
-	unsigned int data;
-
-	data = (rtc_datareg_read() & (1 << ds1603->data_read_shift))
-		>> ds1603->data_read_shift;
-	rtc_cycle_clock(rtc_reg_read());
-	return data;
-}
-
-static void rtc_write_byte(unsigned int byte)
-{
-	int i;
-
-	for (i = 0; i <= 7; i++) {
-		rtc_write_databit(byte & 1L);
-		byte >>= 1;
-	}
-}
-
-static void rtc_write_word(unsigned long word)
-{
-	int i;
-
-	for (i = 0; i <= 31; i++) {
-		rtc_write_databit(word & 1L);
-		word >>= 1;
-	}
-}
-
-static unsigned long rtc_read_word(void)
-{
-	int i;
-	unsigned long word = 0;
-	unsigned long shift = 0;
-
-	for (i = 0; i <= 31; i++) {
-		word |= rtc_read_databit() << shift;
-		shift++;
-	}
-	return word;
-}
-
-static void rtc_init_op(void)
-{
-	rtc_nrst_high();
-
-	rtc_reg_write(rtc_reg_read() & ~ds1603->clk);
-
-	lasat_ndelay(50);
-}
-
-static void rtc_end_op(void)
-{
-	rtc_nrst_low();
-	lasat_ndelay(1000);
-}
-
-void read_persistent_clock64(struct timespec64 *ts)
-{
-	unsigned long word;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	rtc_init_op();
-	rtc_write_byte(READ_TIME_CMD);
-	word = rtc_read_word();
-	rtc_end_op();
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	ts->tv_sec = word;
-	ts->tv_nsec = 0;
-}
-
-int update_persistent_clock64(struct timespec64 now)
-{
-	time64_t time = now.tv_sec;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	rtc_init_op();
-	rtc_write_byte(SET_TIME_CMD);
-	/*
-	 * Due to the hardware limitation, we cast to 'unsigned long' type,
-	 * so it will overflow in year 2106 on 32-bit machine.
-	 */
-	rtc_write_word((unsigned long)time);
-	rtc_end_op();
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	return 0;
-}
-
-void ds1603_set_trimmer(unsigned int trimval)
-{
-	rtc_init_op();
-	rtc_write_byte(((trimval << TRIMMER_SHIFT) & TRIMMER_VALUE_MASK)
-			| (TRIMMER_SET_CMD));
-	rtc_end_op();
-}
-
-void ds1603_disable(void)
-{
-	ds1603_set_trimmer(TRIMMER_DISABLE_RTC);
-}
-
-void ds1603_enable(void)
-{
-	ds1603_set_trimmer(TRIMMER_DEFAULT);
-}
diff --git a/arch/mips/lasat/ds1603.h b/arch/mips/lasat/ds1603.h
deleted file mode 100644
index 00987d3bdc21..000000000000
--- a/arch/mips/lasat/ds1603.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Dallas Semiconductors 1603 RTC driver
- *
- * Brian Murphy <brian@murphy.dk>
- *
- */
-#ifndef __DS1603_H
-#define __DS1603_H
-
-struct ds_defs {
-	volatile u32 *reg;
-	volatile u32 *data_reg;
-	u32 rst;
-	u32 clk;
-	u32 data;
-	u32 data_read_shift;
-	char data_reversed;
-	u32 huge_delay;
-};
-
-extern struct ds_defs *ds1603;
-
-void ds1603_set_trimmer(unsigned int);
-void ds1603_enable(void);
-void ds1603_disable(void);
-void ds1603_init(struct ds_defs *);
-
-#define TRIMMER_DEFAULT 3
-#define TRIMMER_DISABLE_RTC 0
-
-#endif
diff --git a/arch/mips/lasat/image/Makefile b/arch/mips/lasat/image/Makefile
deleted file mode 100644
index 78ce4cff1012..000000000000
--- a/arch/mips/lasat/image/Makefile
+++ /dev/null
@@ -1,53 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# MAKEFILE FOR THE MIPS LINUX BOOTLOADER AND ROM DEBUGGER
-#
-# i-data Networks
-#
-# Author: Thomas Horsten <thh@i-data.com>
-#
-
-ifndef Version
- Version = "$(USER)-test"
-endif
-
-MKLASATIMG = mklasatimg
-MKLASATIMG_ARCH = mq2,mqpro,sp100,sp200
-KERNEL_IMAGE = vmlinux
-
-LDSCRIPT= -L$(srctree)/$(src) -Tromscript.normal
-
-HEAD_DEFINES := -D_kernel_start=$(VMLINUX_LOAD_ADDRESS) \
-		-D_kernel_entry=$(VMLINUX_ENTRY_ADDRESS) \
-		-D VERSION="\"$(Version)\"" \
-		-D TIMESTAMP=$(shell date +%s)
-
-$(obj)/head.o: $(obj)/head.S $(KERNEL_IMAGE)
-	$(CC) -fno-pic $(HEAD_DEFINES) $(LINUXINCLUDE) -c -o $@ $<
-
-OBJECTS = head.o kImage.o
-
-rom.sw: $(obj)/rom.sw
-rom.bin:	$(obj)/rom.bin
-
-$(obj)/rom.sw:	$(obj)/rom.bin
-	$(MKLASATIMG) -o $@ -k $^ -m $(MKLASATIMG_ARCH)
-
-$(obj)/rom.bin: $(obj)/rom
-	$(OBJCOPY) -O binary -S $^ $@
-
-# Rule to make the bootloader
-$(obj)/rom: $(addprefix $(obj)/,$(OBJECTS))
-	$(LD) $(KBUILD_LDFLAGS) $(LDSCRIPT) -o $@ $^
-
-$(obj)/%.o: $(obj)/%.gz
-	$(LD) -r -o $@ -b binary $<
-
-$(obj)/%.gz: $(obj)/%.bin
-	gzip -cf -9 $< > $@
-
-$(obj)/kImage.bin: $(KERNEL_IMAGE)
-	$(OBJCOPY) -O binary -S $^ $@
-
-clean:
-	rm -f rom rom.bin rom.sw kImage.bin kImage.o
diff --git a/arch/mips/lasat/image/head.S b/arch/mips/lasat/image/head.S
deleted file mode 100644
index 1a27312d4c2e..000000000000
--- a/arch/mips/lasat/image/head.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/lasat/head.h>
-
-	.text
-	.section .text..start, "ax"
-	.set noreorder
-	.set mips3
-
-	/* Magic words identifying a software image */
-	.word	LASAT_K_MAGIC0_VAL
-	.word	LASAT_K_MAGIC1_VAL
-
-	/* Image header version */
-	.word	0x00000002
-
-	/* image start and size */
-	.word	_image_start
-	.word	_image_size
-
-	/* start of kernel and entrypoint in uncompressed image */
-	.word	_kernel_start
-	.word	_kernel_entry
-
-	/* Here we have room for future flags */
-
-	.org	0x40
-reldate:
-	.word	TIMESTAMP
-
-	.org	0x50
-release:
-	.string VERSION
diff --git a/arch/mips/lasat/image/romscript.normal b/arch/mips/lasat/image/romscript.normal
deleted file mode 100644
index 0864c963e188..000000000000
--- a/arch/mips/lasat/image/romscript.normal
+++ /dev/null
@@ -1,23 +0,0 @@
-OUTPUT_ARCH(mips)
-
-SECTIONS
-{
-  .text :
-  {
-    *(.text..start)
-  }
-
-  /* Data in ROM */
-
-  .data ALIGN(0x10) :
-  {
-    *(.data)
-  }
-  _image_start = ADDR(.data);
-  _image_size = SIZEOF(.data);
-
-  .other :
-  {
-    *(.*)
-  }
-}
diff --git a/arch/mips/lasat/interrupt.c b/arch/mips/lasat/interrupt.c
deleted file mode 100644
index 7965bbd0d319..000000000000
--- a/arch/mips/lasat/interrupt.c
+++ /dev/null
@@ -1,119 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- * Routines for generic manipulation of the interrupts found on the
- * Lasat boards.
- */
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-
-#include <asm/irq_cpu.h>
-#include <asm/lasat/lasat.h>
-#include <asm/lasat/lasatint.h>
-
-#include <irq.h>
-
-static volatile int *lasat_int_status;
-static volatile int *lasat_int_mask;
-static volatile int lasat_int_mask_shift;
-
-void disable_lasat_irq(struct irq_data *d)
-{
-	unsigned int irq_nr = d->irq - LASAT_IRQ_BASE;
-
-	*lasat_int_mask &= ~(1 << irq_nr) << lasat_int_mask_shift;
-}
-
-void enable_lasat_irq(struct irq_data *d)
-{
-	unsigned int irq_nr = d->irq - LASAT_IRQ_BASE;
-
-	*lasat_int_mask |= (1 << irq_nr) << lasat_int_mask_shift;
-}
-
-static struct irq_chip lasat_irq_type = {
-	.name = "Lasat",
-	.irq_mask = disable_lasat_irq,
-	.irq_unmask = enable_lasat_irq,
-};
-
-static inline int ls1bit32(unsigned int x)
-{
-	int b = 31, s;
-
-	s = 16; if (x << 16 == 0) s = 0; b -= s; x <<= s;
-	s =  8; if (x <<  8 == 0) s = 0; b -= s; x <<= s;
-	s =  4; if (x <<  4 == 0) s = 0; b -= s; x <<= s;
-	s =  2; if (x <<  2 == 0) s = 0; b -= s; x <<= s;
-	s =  1; if (x <<  1 == 0) s = 0; b -= s;
-
-	return b;
-}
-
-static unsigned long (*get_int_status)(void);
-
-static unsigned long get_int_status_100(void)
-{
-	return *lasat_int_status & *lasat_int_mask;
-}
-
-static unsigned long get_int_status_200(void)
-{
-	unsigned long int_status;
-
-	int_status = *lasat_int_status;
-	int_status &= (int_status >> LASATINT_MASK_SHIFT_200) & 0xffff;
-	return int_status;
-}
-
-asmlinkage void plat_irq_dispatch(void)
-{
-	unsigned long int_status;
-	unsigned int cause = read_c0_cause();
-	int irq;
-
-	if (cause & CAUSEF_IP7) {	/* R4000 count / compare IRQ */
-		do_IRQ(7);
-		return;
-	}
-
-	int_status = get_int_status();
-
-	/* if int_status == 0, then the interrupt has already been cleared */
-	if (int_status) {
-		irq = LASAT_IRQ_BASE + ls1bit32(int_status);
-
-		do_IRQ(irq);
-	}
-}
-
-void __init arch_init_irq(void)
-{
-	int irq = LASAT_CASCADE_IRQ;
-	int i;
-
-	if (IS_LASAT_200()) {
-		lasat_int_status = (void *)LASAT_INT_STATUS_REG_200;
-		lasat_int_mask = (void *)LASAT_INT_MASK_REG_200;
-		lasat_int_mask_shift = LASATINT_MASK_SHIFT_200;
-		get_int_status = get_int_status_200;
-		*lasat_int_mask &= 0xffff;
-	} else {
-		lasat_int_status = (void *)LASAT_INT_STATUS_REG_100;
-		lasat_int_mask = (void *)LASAT_INT_MASK_REG_100;
-		lasat_int_mask_shift = LASATINT_MASK_SHIFT_100;
-		get_int_status = get_int_status_100;
-		*lasat_int_mask = 0;
-	}
-
-	mips_cpu_irq_init();
-
-	for (i = LASAT_IRQ_BASE; i <= LASAT_IRQ_END; i++)
-		irq_set_chip_and_handler(i, &lasat_irq_type, handle_level_irq);
-
-	if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL))
-		pr_err("Failed to request irq %d (cascade)\n", irq);
-}
diff --git a/arch/mips/lasat/lasat_board.c b/arch/mips/lasat/lasat_board.c
deleted file mode 100644
index 80e1ba541148..000000000000
--- a/arch/mips/lasat/lasat_board.c
+++ /dev/null
@@ -1,268 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Thomas Horsten <thh@lasat.com>
- * Copyright (C) 2000 LASAT Networks A/S.
- *
- * Routines specific to the LASAT boards
- */
-#include <linux/types.h>
-#include <linux/crc32.h>
-#include <asm/lasat/lasat.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/mutex.h>
-#include <asm/addrspace.h>
-#include "at93c.h"
-/* New model description table */
-#include "lasat_models.h"
-
-static DEFINE_MUTEX(lasat_eeprom_mutex);
-
-#define EEPROM_CRC(data, len) (~crc32(~0, data, len))
-
-struct lasat_info lasat_board_info;
-
-int EEPROMRead(unsigned int pos, unsigned char *data, int len)
-{
-	int i;
-
-	for (i = 0; i < len; i++)
-		*data++ = at93c_read(pos++);
-
-	return 0;
-}
-
-int EEPROMWrite(unsigned int pos, unsigned char *data, int len)
-{
-	int i;
-
-	for (i = 0; i < len; i++)
-		at93c_write(pos++, *data++);
-
-	return 0;
-}
-
-static void init_flash_sizes(void)
-{
-	unsigned long *lb = lasat_board_info.li_flashpart_base;
-	unsigned long *ls = lasat_board_info.li_flashpart_size;
-	int i;
-
-	ls[LASAT_MTD_BOOTLOADER] = 0x40000;
-	ls[LASAT_MTD_SERVICE] = 0xC0000;
-	ls[LASAT_MTD_NORMAL] = 0x100000;
-
-	if (!IS_LASAT_200()) {
-		lasat_board_info.li_flash_base = 0x1e000000;
-
-		lb[LASAT_MTD_BOOTLOADER] = 0x1e400000;
-
-		if (lasat_board_info.li_flash_size > 0x200000) {
-			ls[LASAT_MTD_CONFIG] = 0x100000;
-			ls[LASAT_MTD_FS] = 0x500000;
-		}
-	} else {
-		lasat_board_info.li_flash_base = 0x10000000;
-
-		if (lasat_board_info.li_flash_size < 0x1000000) {
-			lb[LASAT_MTD_BOOTLOADER] = 0x10000000;
-			ls[LASAT_MTD_CONFIG] = 0x100000;
-			if (lasat_board_info.li_flash_size >= 0x400000)
-				ls[LASAT_MTD_FS] =
-				     lasat_board_info.li_flash_size - 0x300000;
-		}
-	}
-
-	for (i = 1; i < LASAT_MTD_LAST; i++)
-		lb[i] = lb[i-1] + ls[i-1];
-}
-
-int lasat_init_board_info(void)
-{
-	int c;
-	unsigned long crc;
-	unsigned long cfg0, cfg1;
-	const struct product_info   *ppi;
-	int i_n_base_models = N_BASE_MODELS;
-	const char * const * i_txt_base_models = txt_base_models;
-	int i_n_prids = N_PRIDS;
-
-	memset(&lasat_board_info, 0, sizeof(lasat_board_info));
-
-	/* First read the EEPROM info */
-	EEPROMRead(0, (unsigned char *)&lasat_board_info.li_eeprom_info,
-		   sizeof(struct lasat_eeprom_struct));
-
-	/* Check the CRC */
-	crc = EEPROM_CRC((unsigned char *)(&lasat_board_info.li_eeprom_info),
-		    sizeof(struct lasat_eeprom_struct) - 4);
-
-	if (crc != lasat_board_info.li_eeprom_info.crc32) {
-		printk(KERN_WARNING "WARNING...\nWARNING...\nEEPROM CRC does "
-		       "not match calculated, attempting to soldier on...\n");
-	}
-
-	if (lasat_board_info.li_eeprom_info.version != LASAT_EEPROM_VERSION) {
-		printk(KERN_WARNING "WARNING...\nWARNING...\nEEPROM version "
-		       "%d, wanted version %d, attempting to soldier on...\n",
-		       (unsigned int)lasat_board_info.li_eeprom_info.version,
-		       LASAT_EEPROM_VERSION);
-	}
-
-	cfg0 = lasat_board_info.li_eeprom_info.cfg[0];
-	cfg1 = lasat_board_info.li_eeprom_info.cfg[1];
-
-	if (LASAT_W0_DSCTYPE(cfg0) != 1) {
-		printk(KERN_WARNING "WARNING...\nWARNING...\n"
-		       "Invalid configuration read from EEPROM, attempting to "
-		       "soldier on...");
-	}
-	/* We have a valid configuration */
-
-	switch (LASAT_W0_SDRAMBANKSZ(cfg0)) {
-	case 0:
-		lasat_board_info.li_memsize = 0x0800000;
-		break;
-	case 1:
-		lasat_board_info.li_memsize = 0x1000000;
-		break;
-	case 2:
-		lasat_board_info.li_memsize = 0x2000000;
-		break;
-	case 3:
-		lasat_board_info.li_memsize = 0x4000000;
-		break;
-	case 4:
-		lasat_board_info.li_memsize = 0x8000000;
-		break;
-	default:
-		lasat_board_info.li_memsize = 0;
-	}
-
-	switch (LASAT_W0_SDRAMBANKS(cfg0)) {
-	case 0:
-		break;
-	case 1:
-		lasat_board_info.li_memsize *= 2;
-		break;
-	default:
-		break;
-	}
-
-	switch (LASAT_W0_BUSSPEED(cfg0)) {
-	case 0x0:
-		lasat_board_info.li_bus_hz = 60000000;
-		break;
-	case 0x1:
-		lasat_board_info.li_bus_hz = 66000000;
-		break;
-	case 0x2:
-		lasat_board_info.li_bus_hz = 66666667;
-		break;
-	case 0x3:
-		lasat_board_info.li_bus_hz = 80000000;
-		break;
-	case 0x4:
-		lasat_board_info.li_bus_hz = 83333333;
-		break;
-	case 0x5:
-		lasat_board_info.li_bus_hz = 100000000;
-		break;
-	}
-
-	switch (LASAT_W0_CPUCLK(cfg0)) {
-	case 0x0:
-		lasat_board_info.li_cpu_hz =
-			lasat_board_info.li_bus_hz;
-		break;
-	case 0x1:
-		lasat_board_info.li_cpu_hz =
-			lasat_board_info.li_bus_hz +
-			(lasat_board_info.li_bus_hz >> 1);
-		break;
-	case 0x2:
-		lasat_board_info.li_cpu_hz =
-			lasat_board_info.li_bus_hz +
-			lasat_board_info.li_bus_hz;
-		break;
-	case 0x3:
-		lasat_board_info.li_cpu_hz =
-			lasat_board_info.li_bus_hz +
-			lasat_board_info.li_bus_hz +
-			(lasat_board_info.li_bus_hz >> 1);
-		break;
-	case 0x4:
-		lasat_board_info.li_cpu_hz =
-			lasat_board_info.li_bus_hz +
-			lasat_board_info.li_bus_hz +
-			lasat_board_info.li_bus_hz;
-		break;
-	}
-
-	/* Flash size */
-	switch (LASAT_W1_FLASHSIZE(cfg1)) {
-	case 0:
-		lasat_board_info.li_flash_size = 0x200000;
-		break;
-	case 1:
-		lasat_board_info.li_flash_size = 0x400000;
-		break;
-	case 2:
-		lasat_board_info.li_flash_size = 0x800000;
-		break;
-	case 3:
-		lasat_board_info.li_flash_size = 0x1000000;
-		break;
-	case 4:
-		lasat_board_info.li_flash_size = 0x2000000;
-		break;
-	}
-
-	init_flash_sizes();
-
-	lasat_board_info.li_bmid = LASAT_W0_BMID(cfg0);
-	lasat_board_info.li_prid = lasat_board_info.li_eeprom_info.prid;
-	if (lasat_board_info.li_prid == 0xffff || lasat_board_info.li_prid == 0)
-		lasat_board_info.li_prid = lasat_board_info.li_bmid;
-
-	/* Base model stuff */
-	if (lasat_board_info.li_bmid > i_n_base_models)
-		lasat_board_info.li_bmid = i_n_base_models;
-	strcpy(lasat_board_info.li_bmstr,
-	       i_txt_base_models[lasat_board_info.li_bmid]);
-
-	/* Product ID dependent values */
-	c = lasat_board_info.li_prid;
-	if (c >= i_n_prids) {
-		strcpy(lasat_board_info.li_namestr, "Unknown Model");
-		strcpy(lasat_board_info.li_typestr, "Unknown Type");
-	} else {
-		ppi = &vendor_info_table[0].vi_product_info[c];
-		strcpy(lasat_board_info.li_namestr, ppi->pi_name);
-		if (ppi->pi_type)
-			strcpy(lasat_board_info.li_typestr, ppi->pi_type);
-		else
-			sprintf(lasat_board_info.li_typestr, "%d", 10 * c);
-	}
-
-	return 0;
-}
-
-void lasat_write_eeprom_info(void)
-{
-	unsigned long crc;
-
-	mutex_lock(&lasat_eeprom_mutex);
-
-	/* Generate the CRC */
-	crc = EEPROM_CRC((unsigned char *)(&lasat_board_info.li_eeprom_info),
-		    sizeof(struct lasat_eeprom_struct) - 4);
-	lasat_board_info.li_eeprom_info.crc32 = crc;
-
-	/* Write the EEPROM info */
-	EEPROMWrite(0, (unsigned char *)&lasat_board_info.li_eeprom_info,
-		    sizeof(struct lasat_eeprom_struct));
-
-	mutex_unlock(&lasat_eeprom_mutex);
-}
diff --git a/arch/mips/lasat/lasat_models.h b/arch/mips/lasat/lasat_models.h
deleted file mode 100644
index 474e57342484..000000000000
--- a/arch/mips/lasat/lasat_models.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Model description tables
- */
-#include <linux/kernel.h>
-
-struct product_info {
-	const char     *pi_name;
-	const char     *pi_type;
-};
-
-struct vendor_info {
-	const char     *vi_name;
-	const struct product_info *vi_product_info;
-};
-
-/*
- * Base models
- */
-static const char * const txt_base_models[] = {
-	"MQ 2", "MQ Pro", "SP 25", "SP 50", "SP 100", "SP 5000", "SP 7000",
-	"SP 1000", "Unknown"
-};
-#define N_BASE_MODELS (ARRAY_SIZE(txt_base_models) - 1)
-
-/*
- * Eicon Networks
- */
-static const char txt_en_mq[] = "Masquerade";
-static const char txt_en_sp[] = "Safepipe";
-
-static const struct product_info product_info_eicon[] = {
-	{ txt_en_mq, "II"   }, /*  0 */
-	{ txt_en_mq, "Pro"  }, /*  1 */
-	{ txt_en_sp, "25"   }, /*  2 */
-	{ txt_en_sp, "50"   }, /*  3 */
-	{ txt_en_sp, "100"  }, /*  4 */
-	{ txt_en_sp, "5000" }, /*  5 */
-	{ txt_en_sp, "7000" }, /*  6 */
-	{ txt_en_sp, "30"   }, /*  7 */
-	{ txt_en_sp, "5100" }, /*  8 */
-	{ txt_en_sp, "7100" }, /*  9 */
-	{ txt_en_sp, "1110" }, /* 10 */
-	{ txt_en_sp, "3020" }, /* 11 */
-	{ txt_en_sp, "3030" }, /* 12 */
-	{ txt_en_sp, "5020" }, /* 13 */
-	{ txt_en_sp, "5030" }, /* 14 */
-	{ txt_en_sp, "1120" }, /* 15 */
-	{ txt_en_sp, "1130" }, /* 16 */
-	{ txt_en_sp, "6010" }, /* 17 */
-	{ txt_en_sp, "6110" }, /* 18 */
-	{ txt_en_sp, "6210" }, /* 19 */
-	{ txt_en_sp, "1020" }, /* 20 */
-	{ txt_en_sp, "1040" }, /* 21 */
-	{ txt_en_sp, "1050" }, /* 22 */
-	{ txt_en_sp, "1060" }, /* 23 */
-};
-
-#define N_PRIDS ARRAY_SIZE(product_info_eicon)
-
-/*
- * The vendor table
- */
-static struct vendor_info const vendor_info_table[] = {
-	{ "Eicon Networks",	product_info_eicon   },
-};
-
-#define N_VENDORS ARRAY_SIZE(vendor_info_table)
diff --git a/arch/mips/lasat/picvue.c b/arch/mips/lasat/picvue.c
deleted file mode 100644
index 08298ccf5ccf..000000000000
--- a/arch/mips/lasat/picvue.c
+++ /dev/null
@@ -1,242 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Picvue PVC160206 display driver
- *
- * Brian Murphy <brian@murphy.dk>
- *
- */
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <asm/bootinfo.h>
-#include <asm/lasat/lasat.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-
-#include "picvue.h"
-
-#define PVC_BUSY		0x80
-#define PVC_NLINES		2
-#define PVC_DISPMEM		80
-#define PVC_LINELEN		PVC_DISPMEM / PVC_NLINES
-
-struct pvc_defs *picvue;
-
-static void pvc_reg_write(u32 val)
-{
-	*picvue->reg = val;
-}
-
-static u32 pvc_reg_read(void)
-{
-	u32 tmp = *picvue->reg;
-	return tmp;
-}
-
-static void pvc_write_byte(u32 data, u8 byte)
-{
-	data |= picvue->e;
-	pvc_reg_write(data);
-	data &= ~picvue->data_mask;
-	data |= byte << picvue->data_shift;
-	pvc_reg_write(data);
-	ndelay(220);
-	pvc_reg_write(data & ~picvue->e);
-	ndelay(220);
-}
-
-static u8 pvc_read_byte(u32 data)
-{
-	u8 byte;
-
-	data |= picvue->e;
-	pvc_reg_write(data);
-	ndelay(220);
-	byte = (pvc_reg_read() & picvue->data_mask) >> picvue->data_shift;
-	data &= ~picvue->e;
-	pvc_reg_write(data);
-	ndelay(220);
-	return byte;
-}
-
-static u8 pvc_read_data(void)
-{
-	u32 data = pvc_reg_read();
-	u8 byte;
-	data |= picvue->rw;
-	data &= ~picvue->rs;
-	pvc_reg_write(data);
-	ndelay(40);
-	byte = pvc_read_byte(data);
-	data |= picvue->rs;
-	pvc_reg_write(data);
-	return byte;
-}
-
-#define TIMEOUT 1000
-static int pvc_wait(void)
-{
-	int i = TIMEOUT;
-	int err = 0;
-
-	while ((pvc_read_data() & PVC_BUSY) && i)
-		i--;
-	if (i == 0)
-		err = -ETIME;
-
-	return err;
-}
-
-#define MODE_INST 0
-#define MODE_DATA 1
-static void pvc_write(u8 byte, int mode)
-{
-	u32 data = pvc_reg_read();
-	data &= ~picvue->rw;
-	if (mode == MODE_DATA)
-		data |= picvue->rs;
-	else
-		data &= ~picvue->rs;
-	pvc_reg_write(data);
-	ndelay(40);
-	pvc_write_byte(data, byte);
-	if (mode == MODE_DATA)
-		data &= ~picvue->rs;
-	else
-		data |= picvue->rs;
-	pvc_reg_write(data);
-	pvc_wait();
-}
-
-void pvc_write_string(const unsigned char *str, u8 addr, int line)
-{
-	int i = 0;
-
-	if (line > 0 && (PVC_NLINES > 1))
-		addr += 0x40 * line;
-	pvc_write(0x80 | addr, MODE_INST);
-
-	while (*str != 0 && i < PVC_LINELEN) {
-		pvc_write(*str++, MODE_DATA);
-		i++;
-	}
-}
-
-void pvc_write_string_centered(const unsigned char *str, int line)
-{
-	int len = strlen(str);
-	u8 addr;
-
-	if (len > PVC_VISIBLE_CHARS)
-		addr = 0;
-	else
-		addr = (PVC_VISIBLE_CHARS - strlen(str))/2;
-
-	pvc_write_string(str, addr, line);
-}
-
-void pvc_dump_string(const unsigned char *str)
-{
-	int len = strlen(str);
-
-	pvc_write_string(str, 0, 0);
-	if (len > PVC_VISIBLE_CHARS)
-		pvc_write_string(&str[PVC_VISIBLE_CHARS], 0, 1);
-}
-
-#define BM_SIZE			8
-#define MAX_PROGRAMMABLE_CHARS	8
-int pvc_program_cg(int charnum, u8 bitmap[BM_SIZE])
-{
-	int i;
-	int addr;
-
-	if (charnum > MAX_PROGRAMMABLE_CHARS)
-		return -ENOENT;
-
-	addr = charnum * 8;
-	pvc_write(0x40 | addr, MODE_INST);
-
-	for (i = 0; i < BM_SIZE; i++)
-		pvc_write(bitmap[i], MODE_DATA);
-	return 0;
-}
-
-#define FUNC_SET_CMD	0x20
-#define	 EIGHT_BYTE	(1 << 4)
-#define	 FOUR_BYTE	0
-#define	 TWO_LINES	(1 << 3)
-#define	 ONE_LINE	0
-#define	 LARGE_FONT	(1 << 2)
-#define	 SMALL_FONT	0
-
-static void pvc_funcset(u8 cmd)
-{
-	pvc_write(FUNC_SET_CMD | (cmd & (EIGHT_BYTE|TWO_LINES|LARGE_FONT)),
-		  MODE_INST);
-}
-
-#define ENTRYMODE_CMD		0x4
-#define	 AUTO_INC		(1 << 1)
-#define	 AUTO_DEC		0
-#define	 CURSOR_FOLLOWS_DISP	(1 << 0)
-
-static void pvc_entrymode(u8 cmd)
-{
-	pvc_write(ENTRYMODE_CMD | (cmd & (AUTO_INC|CURSOR_FOLLOWS_DISP)),
-		  MODE_INST);
-}
-
-#define DISP_CNT_CMD	0x08
-#define	 DISP_OFF	0
-#define	 DISP_ON	(1 << 2)
-#define	 CUR_ON		(1 << 1)
-#define	 CUR_BLINK	(1 << 0)
-void pvc_dispcnt(u8 cmd)
-{
-	pvc_write(DISP_CNT_CMD | (cmd & (DISP_ON|CUR_ON|CUR_BLINK)), MODE_INST);
-}
-
-#define MOVE_CMD	0x10
-#define	 DISPLAY	(1 << 3)
-#define	 CURSOR		0
-#define	 RIGHT		(1 << 2)
-#define	 LEFT		0
-void pvc_move(u8 cmd)
-{
-	pvc_write(MOVE_CMD | (cmd & (DISPLAY|RIGHT)), MODE_INST);
-}
-
-#define CLEAR_CMD	0x1
-void pvc_clear(void)
-{
-	pvc_write(CLEAR_CMD, MODE_INST);
-}
-
-#define HOME_CMD	0x2
-void pvc_home(void)
-{
-	pvc_write(HOME_CMD, MODE_INST);
-}
-
-int pvc_init(void)
-{
-	u8 cmd = EIGHT_BYTE;
-
-	if (PVC_NLINES == 2)
-		cmd |= (SMALL_FONT|TWO_LINES);
-	else
-		cmd |= (LARGE_FONT|ONE_LINE);
-	pvc_funcset(cmd);
-	pvc_dispcnt(DISP_ON);
-	pvc_entrymode(AUTO_INC);
-
-	pvc_clear();
-	pvc_write_string_centered("Display", 0);
-	pvc_write_string_centered("Initialized", 1);
-
-	return 0;
-}
-
-module_init(pvc_init);
-MODULE_LICENSE("GPL");
diff --git a/arch/mips/lasat/picvue.h b/arch/mips/lasat/picvue.h
deleted file mode 100644
index 161d3bf50811..000000000000
--- a/arch/mips/lasat/picvue.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Picvue PVC160206 display driver
- *
- * Brian Murphy <brian.murphy@eicon.com>
- *
- */
-struct pvc_defs {
-	volatile u32 *reg;
-	u32 data_shift;
-	u32 data_mask;
-	u32 e;
-	u32 rw;
-	u32 rs;
-};
-
-extern struct pvc_defs *picvue;
-
-#define PVC_NLINES		2
-#define PVC_DISPMEM		80
-#define PVC_LINELEN		PVC_DISPMEM / PVC_NLINES
-#define PVC_VISIBLE_CHARS	16
-
-void pvc_write_string(const unsigned char *str, u8 addr, int line);
-void pvc_write_string_centered(const unsigned char *str, int line);
-void pvc_dump_string(const unsigned char *str);
-
-#define BM_SIZE			8
-#define MAX_PROGRAMMABLE_CHARS	8
-int pvc_program_cg(int charnum, u8 bitmap[BM_SIZE]);
-
-void pvc_dispcnt(u8 cmd);
-#define	 DISP_OFF	0
-#define	 DISP_ON	(1 << 2)
-#define	 CUR_ON		(1 << 1)
-#define	 CUR_BLINK	(1 << 0)
-
-void pvc_move(u8 cmd);
-#define	 DISPLAY	(1 << 3)
-#define	 CURSOR		0
-#define	 RIGHT		(1 << 2)
-#define	 LEFT		0
-
-void pvc_clear(void);
-void pvc_home(void);
diff --git a/arch/mips/lasat/picvue_proc.c b/arch/mips/lasat/picvue_proc.c
deleted file mode 100644
index 61c033494af5..000000000000
--- a/arch/mips/lasat/picvue_proc.c
+++ /dev/null
@@ -1,208 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Picvue PVC160206 display driver
- *
- * Brian Murphy <brian.murphy@eicon.com>
- *
- */
-#include <linux/bug.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-
-#include <linux/timer.h>
-#include <linux/mutex.h>
-#include <linux/uaccess.h>
-
-#include "picvue.h"
-
-static DEFINE_MUTEX(pvc_mutex);
-static char pvc_lines[PVC_NLINES][PVC_LINELEN+1];
-static int pvc_linedata[PVC_NLINES];
-static char *pvc_linename[PVC_NLINES] = {"line1", "line2"};
-#define DISPLAY_DIR_NAME "display"
-static int scroll_dir, scroll_interval;
-
-static struct timer_list timer;
-
-static void pvc_display(unsigned long data)
-{
-	int i;
-
-	pvc_clear();
-	for (i = 0; i < PVC_NLINES; i++)
-		pvc_write_string(pvc_lines[i], 0, i);
-}
-
-static DECLARE_TASKLET(pvc_display_tasklet, &pvc_display, 0);
-
-static int pvc_line_proc_show(struct seq_file *m, void *v)
-{
-	int lineno = *(int *)m->private;
-
-	if (lineno < 0 || lineno >= PVC_NLINES) {
-		printk(KERN_WARNING "proc_read_line: invalid lineno %d\n", lineno);
-		return 0;
-	}
-
-	mutex_lock(&pvc_mutex);
-	seq_printf(m, "%s\n", pvc_lines[lineno]);
-	mutex_unlock(&pvc_mutex);
-
-	return 0;
-}
-
-static int pvc_line_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pvc_line_proc_show, PDE_DATA(inode));
-}
-
-static ssize_t pvc_line_proc_write(struct file *file, const char __user *buf,
-				   size_t count, loff_t *pos)
-{
-	int lineno = *(int *)PDE_DATA(file_inode(file));
-	char kbuf[PVC_LINELEN];
-	size_t len;
-
-	BUG_ON(lineno < 0 || lineno >= PVC_NLINES);
-
-	len = min(count, sizeof(kbuf) - 1);
-	if (copy_from_user(kbuf, buf, len))
-		return -EFAULT;
-	kbuf[len] = '\0';
-
-	if (len > 0 && kbuf[len - 1] == '\n')
-		len--;
-
-	mutex_lock(&pvc_mutex);
-	strncpy(pvc_lines[lineno], kbuf, len);
-	pvc_lines[lineno][len] = '\0';
-	mutex_unlock(&pvc_mutex);
-
-	tasklet_schedule(&pvc_display_tasklet);
-
-	return count;
-}
-
-static const struct proc_ops pvc_line_proc_ops = {
-	.proc_open	= pvc_line_proc_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= single_release,
-	.proc_write	= pvc_line_proc_write,
-};
-
-static ssize_t pvc_scroll_proc_write(struct file *file, const char __user *buf,
-				     size_t count, loff_t *pos)
-{
-	char kbuf[42];
-	size_t len;
-	int cmd;
-
-	len = min(count, sizeof(kbuf) - 1);
-	if (copy_from_user(kbuf, buf, len))
-		return -EFAULT;
-	kbuf[len] = '\0';
-
-	cmd = simple_strtol(kbuf, NULL, 10);
-
-	mutex_lock(&pvc_mutex);
-	if (scroll_interval != 0)
-		del_timer(&timer);
-
-	if (cmd == 0) {
-		scroll_dir = 0;
-		scroll_interval = 0;
-	} else {
-		if (cmd < 0) {
-			scroll_dir = -1;
-			scroll_interval = -cmd;
-		} else {
-			scroll_dir = 1;
-			scroll_interval = cmd;
-		}
-		add_timer(&timer);
-	}
-	mutex_unlock(&pvc_mutex);
-
-	return count;
-}
-
-static int pvc_scroll_proc_show(struct seq_file *m, void *v)
-{
-	mutex_lock(&pvc_mutex);
-	seq_printf(m, "%d\n", scroll_dir * scroll_interval);
-	mutex_unlock(&pvc_mutex);
-
-	return 0;
-}
-
-static int pvc_scroll_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pvc_scroll_proc_show, NULL);
-}
-
-static const struct proc_ops pvc_scroll_proc_ops = {
-	.proc_open	= pvc_scroll_proc_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= single_release,
-	.proc_write	= pvc_scroll_proc_write,
-};
-
-void pvc_proc_timerfunc(struct timer_list *unused)
-{
-	if (scroll_dir < 0)
-		pvc_move(DISPLAY|RIGHT);
-	else if (scroll_dir > 0)
-		pvc_move(DISPLAY|LEFT);
-
-	timer.expires = jiffies + scroll_interval;
-	add_timer(&timer);
-}
-
-static void pvc_proc_cleanup(void)
-{
-	remove_proc_subtree(DISPLAY_DIR_NAME, NULL);
-	del_timer_sync(&timer);
-}
-
-static int __init pvc_proc_init(void)
-{
-	struct proc_dir_entry *dir, *proc_entry;
-	int i;
-
-	dir = proc_mkdir(DISPLAY_DIR_NAME, NULL);
-	if (dir == NULL)
-		goto error;
-
-	for (i = 0; i < PVC_NLINES; i++) {
-		strcpy(pvc_lines[i], "");
-		pvc_linedata[i] = i;
-	}
-	for (i = 0; i < PVC_NLINES; i++) {
-		proc_entry = proc_create_data(pvc_linename[i], 0644, dir,
-					&pvc_line_proc_ops, &pvc_linedata[i]);
-		if (proc_entry == NULL)
-			goto error;
-	}
-	proc_entry = proc_create("scroll", 0644, dir, &pvc_scroll_proc_ops);
-	if (proc_entry == NULL)
-		goto error;
-
-	timer_setup(&timer, pvc_proc_timerfunc, 0);
-
-	return 0;
-error:
-	pvc_proc_cleanup();
-	return -ENOMEM;
-}
-
-module_init(pvc_proc_init);
-module_exit(pvc_proc_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/arch/mips/lasat/prom.c b/arch/mips/lasat/prom.c
deleted file mode 100644
index 5ce1407de2d5..000000000000
--- a/arch/mips/lasat/prom.c
+++ /dev/null
@@ -1,126 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * PROM interface routines.
- */
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/memblock.h>
-#include <linux/ioport.h>
-#include <asm/bootinfo.h>
-#include <asm/lasat/lasat.h>
-#include <asm/cpu.h>
-#include <asm/setup.h>
-
-#include "at93c.h"
-#include <asm/lasat/eeprom.h>
-#include "prom.h"
-
-#define RESET_VECTOR	0xbfc00000
-#define PROM_JUMP_TABLE_ENTRY(n) (*((u32 *)(RESET_VECTOR + 0x20) + n))
-#define PROM_DISPLAY_ADDR	PROM_JUMP_TABLE_ENTRY(0)
-#define PROM_PUTC_ADDR		PROM_JUMP_TABLE_ENTRY(1)
-#define PROM_MONITOR_ADDR	PROM_JUMP_TABLE_ENTRY(2)
-
-static void null_prom_display(const char *string, int pos, int clear)
-{
-}
-
-static void null_prom_monitor(void)
-{
-}
-
-static void null_prom_putc(char c)
-{
-}
-
-/* these are functions provided by the bootloader */
-static void (*__prom_putc)(char c) = null_prom_putc;
-
-void prom_putchar(char c)
-{
-	__prom_putc(c);
-}
-
-void (*prom_display)(const char *string, int pos, int clear) =
-		null_prom_display;
-void (*prom_monitor)(void) = null_prom_monitor;
-
-unsigned int lasat_ndelay_divider;
-
-static void setup_prom_vectors(void)
-{
-	u32 version = *(u32 *)(RESET_VECTOR + 0x90);
-
-	if (version >= 307) {
-		prom_display = (void *)PROM_DISPLAY_ADDR;
-		__prom_putc = (void *)PROM_PUTC_ADDR;
-		prom_monitor = (void *)PROM_MONITOR_ADDR;
-	}
-	printk(KERN_DEBUG "prom vectors set up\n");
-}
-
-static struct at93c_defs at93c_defs[N_MACHTYPES] = {
-	{
-		.reg		= (void *)AT93C_REG_100,
-		.rdata_reg	= (void *)AT93C_RDATA_REG_100,
-		.rdata_shift	= AT93C_RDATA_SHIFT_100,
-		.wdata_shift	= AT93C_WDATA_SHIFT_100,
-		.cs		= AT93C_CS_M_100,
-		.clk		= AT93C_CLK_M_100
-	}, {
-		.reg		= (void *)AT93C_REG_200,
-		.rdata_reg	= (void *)AT93C_RDATA_REG_200,
-		.rdata_shift	= AT93C_RDATA_SHIFT_200,
-		.wdata_shift	= AT93C_WDATA_SHIFT_200,
-		.cs		= AT93C_CS_M_200,
-		.clk		= AT93C_CLK_M_200
-	},
-};
-
-void __init prom_init(void)
-{
-	int argc = fw_arg0;
-	char **argv = (char **) fw_arg1;
-
-	setup_prom_vectors();
-
-	if (IS_LASAT_200()) {
-		printk(KERN_INFO "LASAT 200 board\n");
-		lasat_ndelay_divider = LASAT_200_DIVIDER;
-		at93c = &at93c_defs[1];
-	} else {
-		printk(KERN_INFO "LASAT 100 board\n");
-		lasat_ndelay_divider = LASAT_100_DIVIDER;
-		at93c = &at93c_defs[0];
-	}
-
-	lasat_init_board_info();		/* Read info from EEPROM */
-
-	/* Get the command line */
-	if (argc > 0) {
-		strncpy(arcs_cmdline, argv[0], COMMAND_LINE_SIZE-1);
-		arcs_cmdline[COMMAND_LINE_SIZE-1] = '\0';
-	}
-
-	/* Set the I/O base address */
-	set_io_port_base(KSEG1);
-
-	/* Set memory regions */
-	ioport_resource.start = 0;
-	ioport_resource.end = 0xffffffff;	/* Wrong, fixme.  */
-
-	add_memory_region(0, lasat_board_info.li_memsize, BOOT_MEM_RAM);
-}
-
-void __init prom_free_prom_memory(void)
-{
-}
-
-const char *get_system_type(void)
-{
-	return lasat_board_info.li_bmstr;
-}
diff --git a/arch/mips/lasat/prom.h b/arch/mips/lasat/prom.h
deleted file mode 100644
index 3d1df853e9d3..000000000000
--- a/arch/mips/lasat/prom.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PROM_H
-#define __PROM_H
-
-extern void (*prom_display)(const char *string, int pos, int clear);
-extern void (*prom_monitor)(void);
-
-#endif /* __PROM_H */
diff --git a/arch/mips/lasat/reset.c b/arch/mips/lasat/reset.c
deleted file mode 100644
index 7c516ed9af15..000000000000
--- a/arch/mips/lasat/reset.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Thomas Horsten <thh@lasat.com>
- * Copyright (C) 2000 LASAT Networks A/S.
- *
- * Reset the LASAT board.
- */
-#include <linux/kernel.h>
-#include <linux/pm.h>
-
-#include <asm/reboot.h>
-#include <asm/lasat/lasat.h>
-
-#include "picvue.h"
-#include "prom.h"
-
-static void lasat_machine_restart(char *command);
-static void lasat_machine_halt(void);
-
-/* Used to set machine to boot in service mode via /proc interface */
-int lasat_boot_to_service;
-
-static void lasat_machine_restart(char *command)
-{
-	local_irq_disable();
-
-	if (lasat_boot_to_service) {
-		*(volatile unsigned int *)0xa0000024 = 0xdeadbeef;
-		*(volatile unsigned int *)0xa00000fc = 0xfedeabba;
-	}
-	*lasat_misc->reset_reg = 0xbedead;
-	for (;;) ;
-}
-
-static void lasat_machine_halt(void)
-{
-	local_irq_disable();
-
-	prom_monitor();
-	for (;;) ;
-}
-
-void lasat_reboot_setup(void)
-{
-	_machine_restart = lasat_machine_restart;
-	_machine_halt = lasat_machine_halt;
-	pm_power_off = lasat_machine_halt;
-}
diff --git a/arch/mips/lasat/serial.c b/arch/mips/lasat/serial.c
deleted file mode 100644
index 16b242713420..000000000000
--- a/arch/mips/lasat/serial.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Registration of Lasat UART platform device.
- *
- *  Copyright (C) 2007	Brian Murphy <brian@murphy.dk>
- */
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/platform_device.h>
-#include <linux/serial_8250.h>
-
-#include <asm/lasat/lasat.h>
-#include <asm/lasat/serial.h>
-
-static struct resource lasat_serial_res[2] __initdata;
-
-static struct plat_serial8250_port lasat_serial8250_port[] = {
-	{
-		.iotype		= UPIO_MEM,
-		.flags		= UPF_IOREMAP | UPF_BOOT_AUTOCONF |
-				  UPF_SKIP_TEST,
-	},
-	{},
-};
-
-static __init int lasat_uart_add(void)
-{
-	struct platform_device *pdev;
-	int retval;
-
-	pdev = platform_device_alloc("serial8250", -1);
-	if (!pdev)
-		return -ENOMEM;
-
-	if (!IS_LASAT_200()) {
-		lasat_serial_res[0].start = KSEG1ADDR(LASAT_UART_REGS_BASE_100);
-		lasat_serial_res[0].end = lasat_serial_res[0].start + LASAT_UART_REGS_SHIFT_100 * 8 - 1;
-		lasat_serial_res[0].flags = IORESOURCE_MEM;
-		lasat_serial_res[1].start = LASATINT_UART_100;
-		lasat_serial_res[1].end = LASATINT_UART_100;
-		lasat_serial_res[1].flags = IORESOURCE_IRQ;
-
-		lasat_serial8250_port[0].mapbase = LASAT_UART_REGS_BASE_100;
-		lasat_serial8250_port[0].uartclk = LASAT_BASE_BAUD_100 * 16;
-		lasat_serial8250_port[0].regshift = LASAT_UART_REGS_SHIFT_100;
-		lasat_serial8250_port[0].irq = LASATINT_UART_100;
-	} else {
-		lasat_serial_res[0].start = KSEG1ADDR(LASAT_UART_REGS_BASE_200);
-		lasat_serial_res[0].end = lasat_serial_res[0].start + LASAT_UART_REGS_SHIFT_200 * 8 - 1;
-		lasat_serial_res[0].flags = IORESOURCE_MEM;
-		lasat_serial_res[1].start = LASATINT_UART_200;
-		lasat_serial_res[1].end = LASATINT_UART_200;
-		lasat_serial_res[1].flags = IORESOURCE_IRQ;
-
-		lasat_serial8250_port[0].mapbase = LASAT_UART_REGS_BASE_200;
-		lasat_serial8250_port[0].uartclk = LASAT_BASE_BAUD_200 * 16;
-		lasat_serial8250_port[0].regshift = LASAT_UART_REGS_SHIFT_200;
-		lasat_serial8250_port[0].irq = LASATINT_UART_200;
-	}
-
-	pdev->id = PLAT8250_DEV_PLATFORM;
-	pdev->dev.platform_data = lasat_serial8250_port;
-
-	retval = platform_device_add_resources(pdev, lasat_serial_res, ARRAY_SIZE(lasat_serial_res));
-	if (retval)
-		goto err_free_device;
-
-	retval = platform_device_add(pdev);
-	if (retval)
-		goto err_free_device;
-
-	return 0;
-
-err_free_device:
-	platform_device_put(pdev);
-
-	return retval;
-}
-device_initcall(lasat_uart_add);
diff --git a/arch/mips/lasat/setup.c b/arch/mips/lasat/setup.c
deleted file mode 100644
index 0743243fd86d..000000000000
--- a/arch/mips/lasat/setup.c
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999 MIPS Technologies, Inc.  All rights reserved.
- *
- * Thomas Horsten <thh@lasat.com>
- * Copyright (C) 2000 LASAT Networks A/S.
- *
- * Brian Murphy <brian@murphy.dk>
- *
- * Lasat specific setup.
- */
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/tty.h>
-
-#include <asm/time.h>
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/lasat/lasat.h>
-#include <asm/lasat/serial.h>
-
-#ifdef CONFIG_PICVUE
-#include <linux/notifier.h>
-#endif
-
-#include "ds1603.h"
-#include <asm/lasat/ds1603.h>
-#include <asm/lasat/picvue.h>
-#include <asm/lasat/eeprom.h>
-
-#include "prom.h"
-
-int lasat_command_line;
-void lasatint_init(void);
-
-extern void lasat_reboot_setup(void);
-extern void pcisetup(void);
-extern void edhac_init(void *, void *, void *);
-extern void addrflt_init(void);
-
-struct lasat_misc lasat_misc_info[N_MACHTYPES] = {
-	{
-		.reset_reg	= (void *)KSEG1ADDR(0x1c840000),
-		.flash_wp_reg	= (void *)KSEG1ADDR(0x1c800000), 2
-	}, {
-		.reset_reg	= (void *)KSEG1ADDR(0x11080000),
-		.flash_wp_reg	= (void *)KSEG1ADDR(0x11000000), 6
-	}
-};
-
-struct lasat_misc *lasat_misc;
-
-#ifdef CONFIG_DS1603
-static struct ds_defs ds_defs[N_MACHTYPES] = {
-	{ (void *)DS1603_REG_100, (void *)DS1603_REG_100,
-		DS1603_RST_100, DS1603_CLK_100, DS1603_DATA_100,
-		DS1603_DATA_SHIFT_100, 0, 0 },
-	{ (void *)DS1603_REG_200, (void *)DS1603_DATA_REG_200,
-		DS1603_RST_200, DS1603_CLK_200, DS1603_DATA_200,
-		DS1603_DATA_READ_SHIFT_200, 1, 2000 }
-};
-#endif
-
-#ifdef CONFIG_PICVUE
-#include "picvue.h"
-static struct pvc_defs pvc_defs[N_MACHTYPES] = {
-	{ (void *)PVC_REG_100, PVC_DATA_SHIFT_100, PVC_DATA_M_100,
-		PVC_E_100, PVC_RW_100, PVC_RS_100 },
-	{ (void *)PVC_REG_200, PVC_DATA_SHIFT_200, PVC_DATA_M_200,
-		PVC_E_200, PVC_RW_200, PVC_RS_200 }
-};
-#endif
-
-static int lasat_panic_display(struct notifier_block *this,
-			     unsigned long event, void *ptr)
-{
-#ifdef CONFIG_PICVUE
-	unsigned char *string = ptr;
-	if (string == NULL)
-		string = "Kernel Panic";
-	pvc_dump_string(string);
-#endif
-	return NOTIFY_DONE;
-}
-
-static int lasat_panic_prom_monitor(struct notifier_block *this,
-			     unsigned long event, void *ptr)
-{
-	prom_monitor();
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block lasat_panic_block[] =
-{
-	{
-		.notifier_call	= lasat_panic_display,
-		.priority	= INT_MAX
-	}, {
-		.notifier_call	= lasat_panic_prom_monitor,
-		.priority	= INT_MIN
-	}
-};
-
-void __init plat_time_init(void)
-{
-	mips_hpt_frequency = lasat_board_info.li_cpu_hz / 2;
-
-	change_c0_status(ST0_IM, IE_IRQ0);
-}
-
-void __init plat_mem_setup(void)
-{
-	int i;
-	int lasat_type = IS_LASAT_200() ? 1 : 0;
-
-	lasat_misc  = &lasat_misc_info[lasat_type];
-#ifdef CONFIG_PICVUE
-	picvue = &pvc_defs[lasat_type];
-#endif
-
-	/* Set up panic notifier */
-	for (i = 0; i < ARRAY_SIZE(lasat_panic_block); i++)
-		atomic_notifier_chain_register(&panic_notifier_list,
-				&lasat_panic_block[i]);
-
-	lasat_reboot_setup();
-
-#ifdef CONFIG_DS1603
-	ds1603 = &ds_defs[lasat_type];
-#endif
-
-#ifdef DYNAMIC_SERIAL_INIT
-	serial_init();
-#endif
-
-	pr_info("Lasat specific initialization complete\n");
-}
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
deleted file mode 100644
index 2119541a5b8b..000000000000
--- a/arch/mips/lasat/sysctl.c
+++ /dev/null
@@ -1,265 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Thomas Horsten <thh@lasat.com>
- * Copyright (C) 2000 LASAT Networks A/S.
- *
- * Routines specific to the LASAT boards
- */
-#include <linux/types.h>
-#include <asm/lasat/lasat.h>
-
-#include <linux/sysctl.h>
-#include <linux/stddef.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/ctype.h>
-#include <linux/string.h>
-#include <linux/net.h>
-#include <linux/inet.h>
-#include <linux/uaccess.h>
-
-#include <asm/time.h>
-
-#ifdef CONFIG_DS1603
-#include "ds1603.h"
-#endif
-
-
-/* And the same for proc */
-int proc_dolasatstring(struct ctl_table *table, int write,
-		       void *buffer, size_t *lenp, loff_t *ppos)
-{
-	int r;
-
-	r = proc_dostring(table, write, buffer, lenp, ppos);
-	if ((!write) || r)
-		return r;
-
-	lasat_write_eeprom_info();
-
-	return 0;
-}
-
-#ifdef CONFIG_DS1603
-static int rtctmp;
-
-/* proc function to read/write RealTime Clock */
-int proc_dolasatrtc(struct ctl_table *table, int write,
-		       void *buffer, size_t *lenp, loff_t *ppos)
-{
-	struct timespec64 ts;
-	int r;
-
-	if (!write) {
-		read_persistent_clock64(&ts);
-		rtctmp = ts.tv_sec;
-		/* check for time < 0 and set to 0 */
-		if (rtctmp < 0)
-			rtctmp = 0;
-	}
-	r = proc_dointvec(table, write, buffer, lenp, ppos);
-	if (r)
-		return r;
-
-	if (write) {
-		/*
-		 * Due to the RTC hardware limitation, we can not actually
-		 * use the full 64-bit range here.
-		 */
-		ts.tv_sec = rtctmp;
-		ts.tv_nsec = 0;
-
-		update_persistent_clock64(ts);
-	}
-
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_INET
-int proc_lasat_ip(struct ctl_table *table, int write,
-		       void *buffer, size_t *lenp, loff_t *ppos)
-{
-	unsigned int ip;
-	char *p, c;
-	int len;
-	char ipbuf[32];
-
-	if (!table->data || !table->maxlen || !*lenp ||
-	    (*ppos && !write)) {
-		*lenp = 0;
-		return 0;
-	}
-
-	if (write) {
-		len = 0;
-		p = buffer;
-		while (len < *lenp) {
-			c = *p;
-			p++;
-			if (c == 0 || c == '\n')
-				break;
-			len++;
-		}
-		if (len >= sizeof(ipbuf)-1)
-			len = sizeof(ipbuf) - 1;
-		memcpy(ipbuf, buffer, len);
-		ipbuf[len] = 0;
-		*ppos += *lenp;
-		/* Now see if we can convert it to a valid IP */
-		ip = in_aton(ipbuf);
-		*(unsigned int *)(table->data) = ip;
-		lasat_write_eeprom_info();
-	} else {
-		ip = *(unsigned int *)(table->data);
-		sprintf(ipbuf, "%d.%d.%d.%d",
-			(ip)	   & 0xff,
-			(ip >>	8) & 0xff,
-			(ip >> 16) & 0xff,
-			(ip >> 24) & 0xff);
-		len = strlen(ipbuf);
-		if (len > *lenp)
-			len = *lenp;
-		if (len)
-			memcpy(buffer, ipbuf, len);
-		if (len < *lenp) {
-			*((char *)buffer + len) = '\n';
-			len++;
-		}
-		*lenp = len;
-		*ppos += len;
-	}
-
-	return 0;
-}
-#endif
-
-int proc_lasat_prid(struct ctl_table *table, int write,
-		       void *buffer, size_t *lenp, loff_t *ppos)
-{
-	int r;
-
-	r = proc_dointvec(table, write, buffer, lenp, ppos);
-	if (r < 0)
-		return r;
-	if (write) {
-		lasat_board_info.li_eeprom_info.prid =
-			lasat_board_info.li_prid;
-		lasat_write_eeprom_info();
-		lasat_init_board_info();
-	}
-	return 0;
-}
-
-extern int lasat_boot_to_service;
-
-static struct ctl_table lasat_table[] = {
-	{
-		.procname	= "cpu-hz",
-		.data		= &lasat_board_info.li_cpu_hz,
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "bus-hz",
-		.data		= &lasat_board_info.li_bus_hz,
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "bmid",
-		.data		= &lasat_board_info.li_bmid,
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "prid",
-		.data		= &lasat_board_info.li_prid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_lasat_prid,
-	},
-#ifdef CONFIG_INET
-	{
-		.procname	= "ipaddr",
-		.data		= &lasat_board_info.li_eeprom_info.ipaddr,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_lasat_ip,
-	},
-	{
-		.procname	= "netmask",
-		.data		= &lasat_board_info.li_eeprom_info.netmask,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_lasat_ip,
-	},
-#endif
-	{
-		.procname	= "passwd_hash",
-		.data		= &lasat_board_info.li_eeprom_info.passwd_hash,
-		.maxlen		=
-			sizeof(lasat_board_info.li_eeprom_info.passwd_hash),
-		.mode		= 0600,
-		.proc_handler	= proc_dolasatstring,
-	},
-	{
-		.procname	= "boot-service",
-		.data		= &lasat_boot_to_service,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_DS1603
-	{
-		.procname	= "rtc",
-		.data		= &rtctmp,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dolasatrtc,
-	},
-#endif
-	{
-		.procname	= "namestr",
-		.data		= &lasat_board_info.li_namestr,
-		.maxlen		= sizeof(lasat_board_info.li_namestr),
-		.mode		= 0444,
-		.proc_handler	= proc_dostring,
-	},
-	{
-		.procname	= "typestr",
-		.data		= &lasat_board_info.li_typestr,
-		.maxlen		= sizeof(lasat_board_info.li_typestr),
-		.mode		= 0444,
-		.proc_handler	= proc_dostring,
-	},
-	{}
-};
-
-static struct ctl_table lasat_root_table[] = {
-	{
-		.procname	= "lasat",
-		.mode		=  0555,
-		.child		= lasat_table
-	},
-	{}
-};
-
-static int __init lasat_register_sysctl(void)
-{
-	struct ctl_table_header *lasat_table_header;
-
-	lasat_table_header =
-		register_sysctl_table(lasat_root_table);
-	if (!lasat_table_header) {
-		printk(KERN_ERR "Unable to register LASAT sysctl\n");
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-arch_initcall(lasat_register_sysctl);
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index fda7b57b826e..87fda0713b84 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -279,7 +279,8 @@ EXPORT_SYMBOL(csum_partial)
 #endif
 
 	/* odd buffer alignment? */
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
+    defined(CONFIG_CPU_LOONGSON64)
 	.set	push
 	.set	arch=mips32r2
 	wsbh	v1, sum
@@ -732,7 +733,8 @@ EXPORT_SYMBOL(csum_partial)
 	addu	sum, v1
 #endif
 
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
+    defined(CONFIG_CPU_LOONGSON64)
 	.set	push
 	.set	arch=mips32r2
 	wsbh	v1, sum
diff --git a/arch/mips/loongson2ef/Kconfig b/arch/mips/loongson2ef/Kconfig
index 595dd48e1e4d..96dc6eba4310 100644
--- a/arch/mips/loongson2ef/Kconfig
+++ b/arch/mips/loongson2ef/Kconfig
@@ -46,7 +46,6 @@ config LEMOTE_MACH2F
 	select CSRC_R4K if ! MIPS_EXTERNAL_TIMER
 	select DMA_NONCOHERENT
 	select GENERIC_ISA_DMA_SUPPORT_BROKEN
-	select HAVE_CLK
 	select FORCE_PCI
 	select I8259
 	select IRQ_MIPS_CPU
diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform
index 3aca42963f35..cdad3c1a9a18 100644
--- a/arch/mips/loongson2ef/Platform
+++ b/arch/mips/loongson2ef/Platform
@@ -26,7 +26,6 @@ endif
 # Loongson Machines' Support
 #
 
-platform-$(CONFIG_MACH_LOONGSON2EF) += loongson2ef/
 cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef -mno-branch-likely
 load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000
 load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000
diff --git a/arch/mips/loongson2ef/common/init.c b/arch/mips/loongson2ef/common/init.c
index 45512178be77..ce3f02f75e2a 100644
--- a/arch/mips/loongson2ef/common/init.c
+++ b/arch/mips/loongson2ef/common/init.c
@@ -19,10 +19,10 @@ unsigned long __maybe_unused _loongson_addrwincfg_base;
 static void __init mips_nmi_setup(void)
 {
 	void *base;
-	extern char except_vec_nmi;
+	extern char except_vec_nmi[];
 
 	base = (void *)(CAC_BASE + 0x380);
-	memcpy(base, &except_vec_nmi, 0x80);
+	memcpy(base, except_vec_nmi, 0x80);
 	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
 }
 
diff --git a/arch/mips/loongson2ef/lemote-2f/clock.c b/arch/mips/loongson2ef/lemote-2f/clock.c
index 414f282c8ab5..850b6b9f8f15 100644
--- a/arch/mips/loongson2ef/lemote-2f/clock.c
+++ b/arch/mips/loongson2ef/lemote-2f/clock.c
@@ -6,22 +6,12 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
-#include <linux/clk.h>
 #include <linux/cpufreq.h>
 #include <linux/errno.h>
 #include <linux/export.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
 
-#include <asm/clock.h>
 #include <asm/mach-loongson2ef/loongson.h>
 
-static LIST_HEAD(clock_list);
-static DEFINE_SPINLOCK(clock_lock);
-static DEFINE_MUTEX(clock_list_sem);
-
-/* Minimum CLK support */
 enum {
 	DC_ZERO, DC_25PT = 2, DC_37PT, DC_50PT, DC_62PT, DC_75PT,
 	DC_87PT, DC_DISABLE, DC_RESV
@@ -41,103 +31,21 @@ struct cpufreq_frequency_table loongson2_clockmod_table[] = {
 };
 EXPORT_SYMBOL_GPL(loongson2_clockmod_table);
 
-static struct clk cpu_clk = {
-	.name = "cpu_clk",
-	.flags = CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES,
-	.rate = 800000000,
-};
-
-struct clk *clk_get(struct device *dev, const char *id)
-{
-	return &cpu_clk;
-}
-EXPORT_SYMBOL(clk_get);
-
-static void propagate_rate(struct clk *clk)
-{
-	struct clk *clkp;
-
-	list_for_each_entry(clkp, &clock_list, node) {
-		if (likely(clkp->parent != clk))
-			continue;
-		if (likely(clkp->ops && clkp->ops->recalc))
-			clkp->ops->recalc(clkp);
-		if (unlikely(clkp->flags & CLK_RATE_PROPAGATES))
-			propagate_rate(clkp);
-	}
-}
-
-int clk_enable(struct clk *clk)
-{
-	return 0;
-}
-EXPORT_SYMBOL(clk_enable);
-
-void clk_disable(struct clk *clk)
+int loongson2_cpu_set_rate(unsigned long rate_khz)
 {
-}
-EXPORT_SYMBOL(clk_disable);
-
-unsigned long clk_get_rate(struct clk *clk)
-{
-	if (!clk)
-		return 0;
-
-	return (unsigned long)clk->rate;
-}
-EXPORT_SYMBOL(clk_get_rate);
-
-void clk_put(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_put);
-
-int clk_set_rate(struct clk *clk, unsigned long rate)
-{
-	unsigned int rate_khz = rate / 1000;
 	struct cpufreq_frequency_table *pos;
-	int ret = 0;
 	int regval;
 
-	if (likely(clk->ops && clk->ops->set_rate)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&clock_lock, flags);
-		ret = clk->ops->set_rate(clk, rate, 0);
-		spin_unlock_irqrestore(&clock_lock, flags);
-	}
-
-	if (unlikely(clk->flags & CLK_RATE_PROPAGATES))
-		propagate_rate(clk);
-
 	cpufreq_for_each_valid_entry(pos, loongson2_clockmod_table)
 		if (rate_khz == pos->frequency)
 			break;
 	if (rate_khz != pos->frequency)
 		return -ENOTSUPP;
 
-	clk->rate = rate;
-
 	regval = readl(LOONGSON_CHIPCFG);
 	regval = (regval & ~0x7) | (pos->driver_data - 1);
 	writel(regval, LOONGSON_CHIPCFG);
 
-	return ret;
-}
-EXPORT_SYMBOL_GPL(clk_set_rate);
-
-long clk_round_rate(struct clk *clk, unsigned long rate)
-{
-	if (likely(clk->ops && clk->ops->round_rate)) {
-		unsigned long flags, rounded;
-
-		spin_lock_irqsave(&clock_lock, flags);
-		rounded = clk->ops->round_rate(clk, rate);
-		spin_unlock_irqrestore(&clock_lock, flags);
-
-		return rounded;
-	}
-
-	return rate;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(clk_round_rate);
+EXPORT_SYMBOL_GPL(loongson2_cpu_set_rate);
diff --git a/arch/mips/loongson32/Platform b/arch/mips/loongson32/Platform
index 7f8e342f1ef5..3b9673e7a2fa 100644
--- a/arch/mips/loongson32/Platform
+++ b/arch/mips/loongson32/Platform
@@ -1,4 +1,3 @@
 cflags-$(CONFIG_CPU_LOONGSON32)		+= -march=mips32r2 -Wa,--trap
-platform-$(CONFIG_MACH_LOONGSON32)	+= loongson32/
 cflags-$(CONFIG_MACH_LOONGSON32)	+= -I$(srctree)/arch/mips/include/asm/mach-loongson32
 load-$(CONFIG_CPU_LOONGSON32)		+= 0xffffffff80200000
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index 48b29c198acf..517f1f8e81fb 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -4,18 +4,12 @@ if MACH_LOONGSON64
 config RS780_HPET
 	bool "RS780/SBX00 HPET Timer"
 	depends on MACH_LOONGSON64
+	depends on BROKEN
 	select MIPS_EXTERNAL_TIMER
 	help
 	  This option enables the hpet timer of AMD RS780/SBX00.
 
-	  If you want to enable the Loongson3 CPUFreq Driver, Please enable
-	  this option at first, otherwise, You will get wrong system time.
-
-	  If unsure, say Yes.
-
-
-config LOONGSON_MC146818
-	bool
-	default n
+	  Note: This driver is doing some dangerous hack. Please only enable
+	  it on RS780E systems.
 
 endif # MACH_LOONGSON64
diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
index b7f40b179c71..39c06f52b08f 100644
--- a/arch/mips/loongson64/Makefile
+++ b/arch/mips/loongson64/Makefile
@@ -2,12 +2,12 @@
 #
 # Makefile for Loongson-3 family machines
 #
-obj-$(CONFIG_MACH_LOONGSON64) += cop2-ex.o platform.o acpi_init.o dma.o \
+obj-$(CONFIG_MACH_LOONGSON64) += cop2-ex.o platform.o dma.o \
 				setup.o init.o env.o time.o reset.o \
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_NUMA)	+= numa.o
 obj-$(CONFIG_RS780_HPET) += hpet.o
-obj-$(CONFIG_PCI) += pci.o
-obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
 obj-$(CONFIG_SUSPEND) += pm.o
+obj-$(CONFIG_PCI_QUIRKS) += vbios_quirk.o
+obj-$(CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION) += cpucfg-emul.o
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index d5eb94c9edb4..ec42c5085905 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -55,6 +55,5 @@ cflags-y += $(call cc-option,-mno-loongson-mmi)
 # Loongson Machines' Support
 #
 
-platform-$(CONFIG_MACH_LOONGSON64) += loongson64/
 cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely
 load-$(CONFIG_CPU_LOONGSON64) += 0xffffffff80200000
diff --git a/arch/mips/loongson64/cop2-ex.c b/arch/mips/loongson64/cop2-ex.c
index 9efdfe430ff0..f130f62129b8 100644
--- a/arch/mips/loongson64/cop2-ex.c
+++ b/arch/mips/loongson64/cop2-ex.c
@@ -14,17 +14,30 @@
 #include <linux/sched.h>
 #include <linux/notifier.h>
 #include <linux/ptrace.h>
+#include <linux/uaccess.h>
+#include <linux/sched/signal.h>
 
 #include <asm/fpu.h>
 #include <asm/cop2.h>
+#include <asm/inst.h>
+#include <asm/branch.h>
 #include <asm/current.h>
 #include <asm/mipsregs.h>
+#include <asm/unaligned-emul.h>
 
 static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
 	void *data)
 {
-	int fpu_owned;
+	unsigned int res, fpu_owned;
+	unsigned long ra, value, value_next;
+	union mips_instruction insn;
 	int fr = !test_thread_flag(TIF_32BIT_FPREGS);
+	struct pt_regs *regs = (struct pt_regs *)data;
+	void __user *addr = (void __user *)regs->cp0_badvaddr;
+	unsigned int __user *pc = (unsigned int __user *)exception_epc(regs);
+
+	ra = regs->regs[31];
+	__get_user(insn.word, pc);
 
 	switch (action) {
 	case CU2_EXCEPTION:
@@ -49,9 +62,284 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
 		preempt_enable();
 
 		return NOTIFY_STOP;	/* Don't call default notifier */
+
+	case CU2_LWC2_OP:
+		if (insn.loongson3_lswc2_format.ls == 0)
+			goto sigbus;
+
+		if (insn.loongson3_lswc2_format.fr == 0) {	/* gslq */
+			if (!access_ok(addr, 16))
+				goto sigbus;
+
+			LoadDW(addr, value, res);
+			if (res)
+				goto fault;
+
+			LoadDW(addr + 8, value_next, res);
+			if (res)
+				goto fault;
+
+			regs->regs[insn.loongson3_lswc2_format.rt] = value;
+			regs->regs[insn.loongson3_lswc2_format.rq] = value_next;
+			compute_return_epc(regs);
+		} else {					/* gslqc1 */
+			if (!access_ok(addr, 16))
+				goto sigbus;
+
+			lose_fpu(1);
+			LoadDW(addr, value, res);
+			if (res)
+				goto fault;
+
+			LoadDW(addr + 8, value_next, res);
+			if (res)
+				goto fault;
+
+			set_fpr64(current->thread.fpu.fpr,
+				insn.loongson3_lswc2_format.rt, value);
+			set_fpr64(current->thread.fpu.fpr,
+				insn.loongson3_lswc2_format.rq, value_next);
+			compute_return_epc(regs);
+			own_fpu(1);
+		}
+		return NOTIFY_STOP;	/* Don't call default notifier */
+
+	case CU2_SWC2_OP:
+		if (insn.loongson3_lswc2_format.ls == 0)
+			goto sigbus;
+
+		if (insn.loongson3_lswc2_format.fr == 0) {	/* gssq */
+			if (!access_ok(addr, 16))
+				goto sigbus;
+
+			/* write upper 8 bytes first */
+			value_next = regs->regs[insn.loongson3_lswc2_format.rq];
+
+			StoreDW(addr + 8, value_next, res);
+			if (res)
+				goto fault;
+			value = regs->regs[insn.loongson3_lswc2_format.rt];
+
+			StoreDW(addr, value, res);
+			if (res)
+				goto fault;
+
+			compute_return_epc(regs);
+		} else {					/* gssqc1 */
+			if (!access_ok(addr, 16))
+				goto sigbus;
+
+			lose_fpu(1);
+			value_next = get_fpr64(current->thread.fpu.fpr,
+					insn.loongson3_lswc2_format.rq);
+
+			StoreDW(addr + 8, value_next, res);
+			if (res)
+				goto fault;
+
+			value = get_fpr64(current->thread.fpu.fpr,
+					insn.loongson3_lswc2_format.rt);
+
+			StoreDW(addr, value, res);
+			if (res)
+				goto fault;
+
+			compute_return_epc(regs);
+			own_fpu(1);
+		}
+		return NOTIFY_STOP;	/* Don't call default notifier */
+
+	case CU2_LDC2_OP:
+		switch (insn.loongson3_lsdc2_format.opcode1) {
+		/*
+		 * Loongson-3 overridden ldc2 instructions.
+		 * opcode1              instruction
+		 *   0x1          gslhx: load 2 bytes to GPR
+		 *   0x2          gslwx: load 4 bytes to GPR
+		 *   0x3          gsldx: load 8 bytes to GPR
+		 *   0x6	  gslwxc1: load 4 bytes to FPR
+		 *   0x7	  gsldxc1: load 8 bytes to FPR
+		 */
+		case 0x1:
+			if (!access_ok(addr, 2))
+				goto sigbus;
+
+			LoadHW(addr, value, res);
+			if (res)
+				goto fault;
+
+			compute_return_epc(regs);
+			regs->regs[insn.loongson3_lsdc2_format.rt] = value;
+			break;
+		case 0x2:
+			if (!access_ok(addr, 4))
+				goto sigbus;
+
+			LoadW(addr, value, res);
+			if (res)
+				goto fault;
+
+			compute_return_epc(regs);
+			regs->regs[insn.loongson3_lsdc2_format.rt] = value;
+			break;
+		case 0x3:
+			if (!access_ok(addr, 8))
+				goto sigbus;
+
+			LoadDW(addr, value, res);
+			if (res)
+				goto fault;
+
+			compute_return_epc(regs);
+			regs->regs[insn.loongson3_lsdc2_format.rt] = value;
+			break;
+		case 0x6:
+			die_if_kernel("Unaligned FP access in kernel code", regs);
+			BUG_ON(!used_math());
+			if (!access_ok(addr, 4))
+				goto sigbus;
+
+			lose_fpu(1);
+			LoadW(addr, value, res);
+			if (res)
+				goto fault;
+
+			set_fpr64(current->thread.fpu.fpr,
+					insn.loongson3_lsdc2_format.rt, value);
+			compute_return_epc(regs);
+			own_fpu(1);
+
+			break;
+		case 0x7:
+			die_if_kernel("Unaligned FP access in kernel code", regs);
+			BUG_ON(!used_math());
+			if (!access_ok(addr, 8))
+				goto sigbus;
+
+			lose_fpu(1);
+			LoadDW(addr, value, res);
+			if (res)
+				goto fault;
+
+			set_fpr64(current->thread.fpu.fpr,
+					insn.loongson3_lsdc2_format.rt, value);
+			compute_return_epc(regs);
+			own_fpu(1);
+			break;
+
+		}
+		return NOTIFY_STOP;	/* Don't call default notifier */
+
+	case CU2_SDC2_OP:
+		switch (insn.loongson3_lsdc2_format.opcode1) {
+		/*
+		 * Loongson-3 overridden sdc2 instructions.
+		 * opcode1              instruction
+		 *   0x1          gsshx: store 2 bytes from GPR
+		 *   0x2          gsswx: store 4 bytes from GPR
+		 *   0x3          gssdx: store 8 bytes from GPR
+		 *   0x6          gsswxc1: store 4 bytes from FPR
+		 *   0x7          gssdxc1: store 8 bytes from FPR
+		 */
+		case 0x1:
+			if (!access_ok(addr, 2))
+				goto sigbus;
+
+			compute_return_epc(regs);
+			value = regs->regs[insn.loongson3_lsdc2_format.rt];
+
+			StoreHW(addr, value, res);
+			if (res)
+				goto fault;
+
+			break;
+		case 0x2:
+			if (!access_ok(addr, 4))
+				goto sigbus;
+
+			compute_return_epc(regs);
+			value = regs->regs[insn.loongson3_lsdc2_format.rt];
+
+			StoreW(addr, value, res);
+			if (res)
+				goto fault;
+
+			break;
+		case 0x3:
+			if (!access_ok(addr, 8))
+				goto sigbus;
+
+			compute_return_epc(regs);
+			value = regs->regs[insn.loongson3_lsdc2_format.rt];
+
+			StoreDW(addr, value, res);
+			if (res)
+				goto fault;
+
+			break;
+
+		case 0x6:
+			die_if_kernel("Unaligned FP access in kernel code", regs);
+			BUG_ON(!used_math());
+
+			if (!access_ok(addr, 4))
+				goto sigbus;
+
+			lose_fpu(1);
+			value = get_fpr64(current->thread.fpu.fpr,
+					insn.loongson3_lsdc2_format.rt);
+
+			StoreW(addr, value, res);
+			if (res)
+				goto fault;
+
+			compute_return_epc(regs);
+			own_fpu(1);
+
+			break;
+		case 0x7:
+			die_if_kernel("Unaligned FP access in kernel code", regs);
+			BUG_ON(!used_math());
+
+			if (!access_ok(addr, 8))
+				goto sigbus;
+
+			lose_fpu(1);
+			value = get_fpr64(current->thread.fpu.fpr,
+					insn.loongson3_lsdc2_format.rt);
+
+			StoreDW(addr, value, res);
+			if (res)
+				goto fault;
+
+			compute_return_epc(regs);
+			own_fpu(1);
+
+			break;
+		}
+		return NOTIFY_STOP;	/* Don't call default notifier */
 	}
 
 	return NOTIFY_OK;		/* Let default notifier send signals */
+
+fault:
+	/* roll back jump/branch */
+	regs->regs[31] = ra;
+	regs->cp0_epc = (unsigned long)pc;
+	/* Did we have an exception handler installed? */
+	if (fixup_exception(regs))
+		return NOTIFY_STOP;	/* Don't call default notifier */
+
+	die_if_kernel("Unhandled kernel unaligned access", regs);
+	force_sig(SIGSEGV);
+
+	return NOTIFY_STOP;	/* Don't call default notifier */
+
+sigbus:
+	die_if_kernel("Unhandled kernel unaligned access", regs);
+	force_sig(SIGBUS);
+
+	return NOTIFY_STOP;	/* Don't call default notifier */
 }
 
 static int __init loongson_cu2_setup(void)
diff --git a/arch/mips/loongson64/cpucfg-emul.c b/arch/mips/loongson64/cpucfg-emul.c
new file mode 100644
index 000000000000..cd619b47ba1f
--- /dev/null
+++ b/arch/mips/loongson64/cpucfg-emul.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <asm/cpu.h>
+#include <asm/cpu-info.h>
+#include <asm/elf.h>
+
+#include <loongson_regs.h>
+#include <cpucfg-emul.h>
+
+static bool is_loongson(struct cpuinfo_mips *c)
+{
+	switch (c->processor_id & PRID_COMP_MASK) {
+	case PRID_COMP_LEGACY:
+		return ((c->processor_id & PRID_IMP_MASK) ==
+			PRID_IMP_LOONGSON_64C);
+
+	case PRID_COMP_LOONGSON:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static u32 get_loongson_fprev(struct cpuinfo_mips *c)
+{
+	return c->fpu_id & LOONGSON_FPREV_MASK;
+}
+
+static bool cpu_has_uca(void)
+{
+	u32 diag = read_c0_diag();
+	u32 new_diag;
+
+	if (diag & LOONGSON_DIAG_UCAC)
+		/* UCA is already enabled. */
+		return true;
+
+	/* See if UCAC bit can be flipped on. This should be safe. */
+	new_diag = diag | LOONGSON_DIAG_UCAC;
+	write_c0_diag(new_diag);
+	new_diag = read_c0_diag();
+	write_c0_diag(diag);
+
+	return (new_diag & LOONGSON_DIAG_UCAC) != 0;
+}
+
+static void probe_uca(struct cpuinfo_mips *c)
+{
+	if (cpu_has_uca())
+		c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_LSUCA;
+}
+
+static void decode_loongson_config6(struct cpuinfo_mips *c)
+{
+	u32 config6 = read_c0_config6();
+
+	if (config6 & MIPS_CONF6_LOONGSON_SFBEN)
+		c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_SFBP;
+	if (config6 & MIPS_CONF6_LOONGSON_LLEXC)
+		c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_LLEXC;
+	if (config6 & MIPS_CONF6_LOONGSON_SCRAND)
+		c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_SCRAND;
+}
+
+static void patch_cpucfg_sel1(struct cpuinfo_mips *c)
+{
+	u64 ases = c->ases;
+	u64 options = c->options;
+	u32 data = c->loongson3_cpucfg_data[0];
+
+	if (options & MIPS_CPU_FPU) {
+		data |= LOONGSON_CFG1_FP;
+		data |= get_loongson_fprev(c) << LOONGSON_CFG1_FPREV_OFFSET;
+	}
+	if (ases & MIPS_ASE_LOONGSON_MMI)
+		data |= LOONGSON_CFG1_MMI;
+	if (ases & MIPS_ASE_MSA)
+		data |= LOONGSON_CFG1_MSA1;
+
+	c->loongson3_cpucfg_data[0] = data;
+}
+
+static void patch_cpucfg_sel2(struct cpuinfo_mips *c)
+{
+	u64 ases = c->ases;
+	u64 options = c->options;
+	u32 data = c->loongson3_cpucfg_data[1];
+
+	if (ases & MIPS_ASE_LOONGSON_EXT)
+		data |= LOONGSON_CFG2_LEXT1;
+	if (ases & MIPS_ASE_LOONGSON_EXT2)
+		data |= LOONGSON_CFG2_LEXT2;
+	if (options & MIPS_CPU_LDPTE)
+		data |= LOONGSON_CFG2_LSPW;
+
+	if (ases & MIPS_ASE_VZ)
+		data |= LOONGSON_CFG2_LVZP;
+	else
+		data &= ~LOONGSON_CFG2_LVZREV;
+
+	c->loongson3_cpucfg_data[1] = data;
+}
+
+static void patch_cpucfg_sel3(struct cpuinfo_mips *c)
+{
+	u64 ases = c->ases;
+	u32 data = c->loongson3_cpucfg_data[2];
+
+	if (ases & MIPS_ASE_LOONGSON_CAM) {
+		data |= LOONGSON_CFG3_LCAMP;
+	} else {
+		data &= ~LOONGSON_CFG3_LCAMREV;
+		data &= ~LOONGSON_CFG3_LCAMNUM;
+		data &= ~LOONGSON_CFG3_LCAMKW;
+		data &= ~LOONGSON_CFG3_LCAMVW;
+	}
+
+	c->loongson3_cpucfg_data[2] = data;
+}
+
+void loongson3_cpucfg_synthesize_data(struct cpuinfo_mips *c)
+{
+	/* Only engage the logic on Loongson processors. */
+	if (!is_loongson(c))
+		return;
+
+	/* CPUs with CPUCFG support don't need to synthesize anything. */
+	if (cpu_has_cfg())
+		goto have_cpucfg_now;
+
+	c->loongson3_cpucfg_data[0] = 0;
+	c->loongson3_cpucfg_data[1] = 0;
+	c->loongson3_cpucfg_data[2] = 0;
+
+	/* Add CPUCFG features non-discoverable otherwise. */
+	switch (c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) {
+	case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_0:
+	case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_1:
+	case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_2:
+	case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_3:
+		decode_loongson_config6(c);
+		probe_uca(c);
+
+		c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_LSLDR0 |
+			LOONGSON_CFG1_LSSYNCI | LOONGSON_CFG1_LLSYNC |
+			LOONGSON_CFG1_TGTSYNC);
+		c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 |
+			LOONGSON_CFG2_LBT2 | LOONGSON_CFG2_LPMP |
+			LOONGSON_CFG2_LPM_REV2);
+		c->loongson3_cpucfg_data[2] = 0;
+		break;
+
+	case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R1:
+		c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_LSLDR0 |
+			LOONGSON_CFG1_LSSYNCI | LOONGSON_CFG1_LSUCA |
+			LOONGSON_CFG1_LLSYNC | LOONGSON_CFG1_TGTSYNC);
+		c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 |
+			LOONGSON_CFG2_LPMP | LOONGSON_CFG2_LPM_REV1);
+		c->loongson3_cpucfg_data[2] |= (
+			LOONGSON_CFG3_LCAM_REV1 |
+			LOONGSON_CFG3_LCAMNUM_REV1 |
+			LOONGSON_CFG3_LCAMKW_REV1 |
+			LOONGSON_CFG3_LCAMVW_REV1);
+		break;
+
+	case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3B_R1:
+	case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3B_R2:
+		c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_LSLDR0 |
+			LOONGSON_CFG1_LSSYNCI | LOONGSON_CFG1_LSUCA |
+			LOONGSON_CFG1_LLSYNC | LOONGSON_CFG1_TGTSYNC);
+		c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 |
+			LOONGSON_CFG2_LPMP | LOONGSON_CFG2_LPM_REV1);
+		c->loongson3_cpucfg_data[2] |= (
+			LOONGSON_CFG3_LCAM_REV1 |
+			LOONGSON_CFG3_LCAMNUM_REV1 |
+			LOONGSON_CFG3_LCAMKW_REV1 |
+			LOONGSON_CFG3_LCAMVW_REV1);
+		break;
+
+	case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0:
+	case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_1:
+	case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R3_0:
+	case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R3_1:
+		decode_loongson_config6(c);
+		probe_uca(c);
+
+		c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_CNT64 |
+			LOONGSON_CFG1_LSLDR0 | LOONGSON_CFG1_LSPREF |
+			LOONGSON_CFG1_LSPREFX | LOONGSON_CFG1_LSSYNCI |
+			LOONGSON_CFG1_LLSYNC | LOONGSON_CFG1_TGTSYNC);
+		c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 |
+			LOONGSON_CFG2_LBT2 | LOONGSON_CFG2_LBTMMU |
+			LOONGSON_CFG2_LPMP | LOONGSON_CFG2_LPM_REV1 |
+			LOONGSON_CFG2_LVZ_REV1);
+		c->loongson3_cpucfg_data[2] |= (LOONGSON_CFG3_LCAM_REV1 |
+			LOONGSON_CFG3_LCAMNUM_REV1 |
+			LOONGSON_CFG3_LCAMKW_REV1 |
+			LOONGSON_CFG3_LCAMVW_REV1);
+		break;
+
+	default:
+		/* It is possible that some future Loongson cores still do
+		 * not have CPUCFG, so do not emulate anything for these
+		 * cores.
+		 */
+		return;
+	}
+
+	/* This feature is set by firmware, but all known Loongson-64 systems
+	 * are configured this way.
+	 */
+	c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_CDMAP;
+
+	/* Patch in dynamically probed bits. */
+	patch_cpucfg_sel1(c);
+	patch_cpucfg_sel2(c);
+	patch_cpucfg_sel3(c);
+
+have_cpucfg_now:
+	/* We have usable CPUCFG now, emulated or not.
+	 * Announce CPUCFG availability to userspace via hwcap.
+	 */
+	elf_hwcap |= HWCAP_LOONGSON_CPUCFG;
+}
diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c
index 5e86635f71db..dbfe6e82fddd 100644
--- a/arch/mips/loongson64/dma.c
+++ b/arch/mips/loongson64/dma.c
@@ -2,21 +2,24 @@
 #include <linux/dma-direct.h>
 #include <linux/init.h>
 #include <linux/swiotlb.h>
+#include <boot_param.h>
 
 dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
 	/* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from
 	 * Loongson-3's 48bit address space and embed it into 40bit */
 	long nid = (paddr >> 44) & 0x3;
-	return ((nid << 44) ^ paddr) | (nid << 37);
+
+	return ((nid << 44) ^ paddr) | (nid << node_id_offset);
 }
 
 phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
 	/* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from
 	 * Loongson-3's 48bit address space and embed it into 40bit */
-	long nid = (daddr >> 37) & 0x3;
-	return ((nid << 37) ^ daddr) | (nid << 44);
+	long nid = (daddr >> node_id_offset) & 0x3;
+
+	return ((nid << node_id_offset) ^ daddr) | (nid << 44);
 }
 
 void __init plat_swiotlb_setup(void)
diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c
index 2554ef11170d..d11bc346bbca 100644
--- a/arch/mips/loongson64/env.c
+++ b/arch/mips/loongson64/env.c
@@ -14,12 +14,15 @@
  * Author: Wu Zhangjin, wuzhangjin@gmail.com
  */
 #include <linux/export.h>
+#include <linux/pci_ids.h>
 #include <asm/bootinfo.h>
 #include <loongson.h>
 #include <boot_param.h>
 #include <builtin_dtbs.h>
 #include <workarounds.h>
 
+#define HOST_BRIDGE_CONFIG_ADDR	((void __iomem *)TO_UNCAC(0x1a000000))
+
 u32 cpu_clock_freq;
 EXPORT_SYMBOL(cpu_clock_freq);
 struct efi_memory_map_loongson *loongson_memmap;
@@ -43,6 +46,8 @@ void __init prom_init_env(void)
 	struct system_loongson *esys;
 	struct efi_cpuinfo_loongson *ecpu;
 	struct irq_source_routing_table *eirq_source;
+	u32 id;
+	u16 vendor, device;
 
 	/* firmware arguments are initialized in head.S */
 	boot_p = (struct boot_params *)fw_arg2;
@@ -178,4 +183,19 @@ void __init prom_init_env(void)
 		memcpy(loongson_sysconf.sensors, esys->sensors,
 			sizeof(struct sensor_device) * loongson_sysconf.nr_sensors);
 	pr_info("CpuClock = %u\n", cpu_clock_freq);
+
+	/* Read the ID of PCI host bridge to detect bridge type */
+	id = readl(HOST_BRIDGE_CONFIG_ADDR);
+	vendor = id & 0xffff;
+	device = (id >> 16) & 0xffff;
+
+	if (vendor == PCI_VENDOR_ID_LOONGSON && device == 0x7a00) {
+		pr_info("The bridge chip is LS7A\n");
+		loongson_sysconf.bridgetype = LS7A;
+		loongson_sysconf.early_config = ls7a_early_config;
+	} else {
+		pr_info("The bridge chip is RS780E or SR5690\n");
+		loongson_sysconf.bridgetype = RS780E;
+		loongson_sysconf.early_config = rs780e_early_config;
+	}
 }
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
index da38944471f4..59ddadace83f 100644
--- a/arch/mips/loongson64/init.c
+++ b/arch/mips/loongson64/init.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/irqchip.h>
+#include <linux/logic_pio.h>
 #include <linux/memblock.h>
 #include <asm/bootinfo.h>
 #include <asm/traps.h>
@@ -13,25 +14,41 @@
 #include <asm/fw/fw.h>
 
 #include <loongson.h>
+#include <boot_param.h>
+
+#define NODE_ID_OFFSET_ADDR	((void __iomem *)TO_UNCAC(0x1001041c))
+
+u32 node_id_offset;
 
 static void __init mips_nmi_setup(void)
 {
 	void *base;
-	extern char except_vec_nmi;
+	extern char except_vec_nmi[];
 
 	base = (void *)(CAC_BASE + 0x380);
-	memcpy(base, &except_vec_nmi, 0x80);
+	memcpy(base, except_vec_nmi, 0x80);
 	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
 }
 
+void ls7a_early_config(void)
+{
+	node_id_offset = ((readl(NODE_ID_OFFSET_ADDR) >> 8) & 0x1f) + 36;
+}
+
+void rs780e_early_config(void)
+{
+	node_id_offset = 37;
+}
+
 void __init prom_init(void)
 {
 	fw_init_cmdline();
 	prom_init_env();
 
 	/* init base address of io space */
-	set_io_port_base((unsigned long)
-		ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
+	set_io_port_base(PCI_IOBASE);
+
+	loongson_sysconf.early_config();
 
 	prom_init_numa_memory();
 
@@ -46,7 +63,45 @@ void __init prom_free_prom_memory(void)
 {
 }
 
+static __init void reserve_pio_range(void)
+{
+	struct logic_pio_hwaddr *range;
+
+	range = kzalloc(sizeof(*range), GFP_ATOMIC);
+	if (!range)
+		return;
+
+	range->fwnode = &of_root->fwnode;
+	range->size = MMIO_LOWER_RESERVED;
+	range->hw_start = LOONGSON_PCIIO_BASE;
+	range->flags = LOGIC_PIO_CPU_MMIO;
+
+	if (logic_pio_register_range(range)) {
+		pr_err("Failed to reserve PIO range for legacy ISA\n");
+		goto free_range;
+	}
+
+	if (WARN(range->io_start != 0,
+			"Reserved PIO range does not start from 0\n"))
+		goto unregister;
+
+	/*
+	 * i8259 would access I/O space, so mapping must be done here.
+	 * Please remove it when all drivers can be managed by logic_pio.
+	 */
+	ioremap_page_range(PCI_IOBASE, PCI_IOBASE + MMIO_LOWER_RESERVED,
+				LOONGSON_PCIIO_BASE,
+				pgprot_device(PAGE_KERNEL));
+
+	return;
+unregister:
+	logic_pio_unregister_range(range);
+free_range:
+	kfree(range);
+}
+
 void __init arch_init_irq(void)
 {
+	reserve_pio_range();
 	irqchip_init();
 }
diff --git a/arch/mips/loongson64/pci.c b/arch/mips/loongson64/pci.c
deleted file mode 100644
index e84ae20c3290..000000000000
--- a/arch/mips/loongson64/pci.c
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- */
-#include <linux/pci.h>
-
-#include <pci.h>
-#include <loongson.h>
-#include <boot_param.h>
-
-static struct resource loongson_pci_mem_resource = {
-	.name	= "pci memory space",
-	.start	= LOONGSON_PCI_MEM_START,
-	.end	= LOONGSON_PCI_MEM_END,
-	.flags	= IORESOURCE_MEM,
-};
-
-static struct resource loongson_pci_io_resource = {
-	.name	= "pci io space",
-	.start	= LOONGSON_PCI_IO_START,
-	.end	= IO_SPACE_LIMIT,
-	.flags	= IORESOURCE_IO,
-};
-
-static struct pci_controller  loongson_pci_controller = {
-	.pci_ops	= &loongson_pci_ops,
-	.io_resource	= &loongson_pci_io_resource,
-	.mem_resource	= &loongson_pci_mem_resource,
-	.mem_offset	= 0x00000000UL,
-	.io_offset	= 0x00000000UL,
-};
-
-
-extern int sbx00_acpi_init(void);
-
-static int __init pcibios_init(void)
-{
-
-	loongson_pci_controller.io_map_base = mips_io_port_base;
-	loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
-	loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr;
-
-	register_pci_controller(&loongson_pci_controller);
-
-	sbx00_acpi_init();
-
-	return 0;
-}
-
-arch_initcall(pcibios_init);
diff --git a/arch/mips/loongson64/rtc.c b/arch/mips/loongson64/rtc.c
deleted file mode 100644
index 8d7628c0f513..000000000000
--- a/arch/mips/loongson64/rtc.c
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Lemote Fuloong platform support
- *
- *  Copyright(c) 2010 Arnaud Patard <apatard@mandriva.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/mc146818rtc.h>
-
-static struct resource loongson_rtc_resources[] = {
-	{
-		.start	= RTC_PORT(0),
-		.end	= RTC_PORT(1),
-		.flags	= IORESOURCE_IO,
-	}, {
-		.start	= RTC_IRQ,
-		.end	= RTC_IRQ,
-		.flags	= IORESOURCE_IRQ,
-	}
-};
-
-static struct platform_device loongson_rtc_device = {
-	.name		= "rtc_cmos",
-	.id		= -1,
-	.resource	= loongson_rtc_resources,
-	.num_resources	= ARRAY_SIZE(loongson_rtc_resources),
-};
-
-
-static int __init loongson_rtc_platform_init(void)
-{
-	platform_device_register(&loongson_rtc_device);
-	return 0;
-}
-
-device_initcall(loongson_rtc_platform_init);
diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
index e1fe8bbb377d..e744e1bee49e 100644
--- a/arch/mips/loongson64/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -15,7 +15,6 @@
 #include <linux/kexec.h>
 #include <asm/processor.h>
 #include <asm/time.h>
-#include <asm/clock.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <loongson.h>
diff --git a/arch/mips/loongson64/time.c b/arch/mips/loongson64/time.c
index 1245f22cec84..91e842b58365 100644
--- a/arch/mips/loongson64/time.c
+++ b/arch/mips/loongson64/time.c
@@ -6,7 +6,7 @@
  * Copyright (C) 2009 Lemote Inc.
  * Author: Wu Zhangjin, wuzhangjin@gmail.com
  */
-#include <asm/mc146818-time.h>
+
 #include <asm/time.h>
 #include <asm/hpet.h>
 
@@ -21,9 +21,3 @@ void __init plat_time_init(void)
 	setup_hpet_timer();
 #endif
 }
-
-void read_persistent_clock64(struct timespec64 *ts)
-{
-	ts->tv_sec = mc146818_get_cmos_time();
-	ts->tv_nsec = 0;
-}
diff --git a/arch/mips/loongson64/vbios_quirk.c b/arch/mips/loongson64/vbios_quirk.c
new file mode 100644
index 000000000000..9a29e94d3db1
--- /dev/null
+++ b/arch/mips/loongson64/vbios_quirk.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/pci.h>
+#include <loongson.h>
+
+static void pci_fixup_radeon(struct pci_dev *pdev)
+{
+	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
+
+	if (res->start)
+		return;
+
+	if (!loongson_sysconf.vgabios_addr)
+		return;
+
+	pci_disable_rom(pdev);
+	if (res->parent)
+		release_resource(res);
+
+	res->start = virt_to_phys((void *) loongson_sysconf.vgabios_addr);
+	res->end   = res->start + 256*1024 - 1;
+	res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
+		     IORESOURCE_PCI_FIXED;
+
+	dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n",
+		 PCI_ROM_RESOURCE, res);
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, 0x9615,
+				PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon);
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 9701c89e7e14..587cf1d115e8 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -439,7 +439,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 					regs->cp0_epc + dec_insn.pc_inc +
 					dec_insn.next_pc_inc;
 			}
-			/* fall through */
+			fallthrough;
 		case jr_op:
 			/* For R6, JR already emulated in jalr_op */
 			if (NO_R6EMU && insn.r_format.func == jr_op)
@@ -459,11 +459,11 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 			regs->regs[31] = regs->cp0_epc +
 				dec_insn.pc_inc +
 				dec_insn.next_pc_inc;
-			/* fall through */
+			fallthrough;
 		case bltzl_op:
 			if (NO_R6EMU)
 				break;
-			/* fall through */
+			fallthrough;
 		case bltz_op:
 			if ((long)regs->regs[insn.i_format.rs] < 0)
 				*contpc = regs->cp0_epc +
@@ -483,11 +483,11 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 			regs->regs[31] = regs->cp0_epc +
 				dec_insn.pc_inc +
 				dec_insn.next_pc_inc;
-			/* fall through */
+			fallthrough;
 		case bgezl_op:
 			if (NO_R6EMU)
 				break;
-			/* fall through */
+			fallthrough;
 		case bgez_op:
 			if ((long)regs->regs[insn.i_format.rs] >= 0)
 				*contpc = regs->cp0_epc +
@@ -502,12 +502,12 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 		break;
 	case jalx_op:
 		set_isa16_mode(bit);
-		/* fall through */
+		fallthrough;
 	case jal_op:
 		regs->regs[31] = regs->cp0_epc +
 			dec_insn.pc_inc +
 			dec_insn.next_pc_inc;
-		/* fall through */
+		fallthrough;
 	case j_op:
 		*contpc = regs->cp0_epc + dec_insn.pc_inc;
 		*contpc >>= 28;
@@ -519,7 +519,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 	case beql_op:
 		if (NO_R6EMU)
 			break;
-		/* fall through */
+		fallthrough;
 	case beq_op:
 		if (regs->regs[insn.i_format.rs] ==
 		    regs->regs[insn.i_format.rt])
@@ -534,7 +534,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 	case bnel_op:
 		if (NO_R6EMU)
 			break;
-		/* fall through */
+		fallthrough;
 	case bne_op:
 		if (regs->regs[insn.i_format.rs] !=
 		    regs->regs[insn.i_format.rt])
@@ -549,7 +549,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 	case blezl_op:
 		if (!insn.i_format.rt && NO_R6EMU)
 			break;
-		/* fall through */
+		fallthrough;
 	case blez_op:
 
 		/*
@@ -587,7 +587,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 	case bgtzl_op:
 		if (!insn.i_format.rt && NO_R6EMU)
 			break;
-		/* fall through */
+		fallthrough;
 	case bgtz_op:
 		/*
 		 * Compact branches for R6 for the
@@ -725,7 +725,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 			return 1;
 		}
 		/* R2/R6 compatible cop1 instruction */
-		/* fall through */
+		fallthrough;
 	case cop2_op:
 	case cop1x_op:
 		if (insn.i_format.rs == bc_op) {
@@ -1217,14 +1217,14 @@ emul:
 			case bcfl_op:
 				if (cpu_has_mips_2_3_4_5_r)
 					likely = 1;
-				/* fall through */
+				fallthrough;
 			case bcf_op:
 				cond = !cond;
 				break;
 			case bctl_op:
 				if (cpu_has_mips_2_3_4_5_r)
 					likely = 1;
-				/* fall through */
+				fallthrough;
 			case bct_op:
 				break;
 			}
diff --git a/arch/mips/math-emu/dp_add.c b/arch/mips/math-emu/dp_add.c
index a8f98b8157f5..78504736be9e 100644
--- a/arch/mips/math-emu/dp_add.c
+++ b/arch/mips/math-emu/dp_add.c
@@ -92,8 +92,7 @@ union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		DPDNORMY;
 		break;
diff --git a/arch/mips/math-emu/dp_div.c b/arch/mips/math-emu/dp_div.c
index 2b682e930e39..ac1ecc46248d 100644
--- a/arch/mips/math-emu/dp_div.c
+++ b/arch/mips/math-emu/dp_div.c
@@ -91,8 +91,7 @@ union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		DPDNORMY;
 		break;
diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c
index 3eda9ff7b491..126ec90bb4c7 100644
--- a/arch/mips/math-emu/dp_fmax.c
+++ b/arch/mips/math-emu/dp_fmax.c
@@ -93,8 +93,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		DPDNORMY;
 		break;
@@ -222,8 +221,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		DPDNORMY;
 		break;
diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c
index b3594a1704a7..35ded4c45989 100644
--- a/arch/mips/math-emu/dp_fmin.c
+++ b/arch/mips/math-emu/dp_fmin.c
@@ -93,8 +93,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		DPDNORMY;
 		break;
@@ -222,8 +221,7 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		DPDNORMY;
 		break;
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index e24ef374d828..931e66f683ca 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -150,8 +150,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		if (zc == IEEE754_CLASS_INF)
 			return ieee754dp_inf(zs);
diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c
index e8a97d26472a..8a671bb7af12 100644
--- a/arch/mips/math-emu/dp_mul.c
+++ b/arch/mips/math-emu/dp_mul.c
@@ -89,8 +89,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		DPDNORMY;
 		break;
diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c
index 06be390ba79a..1ee38f8242fd 100644
--- a/arch/mips/math-emu/dp_sqrt.c
+++ b/arch/mips/math-emu/dp_sqrt.c
@@ -52,8 +52,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
 
 	case IEEE754_CLASS_DNORM:
 		DPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case IEEE754_CLASS_NORM:
 		if (xs) {
 			/* sqrt(-x) = Nan */
@@ -130,7 +129,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
 		switch (oldcsr.rm) {
 		case FPU_CSR_RU:
 			y.bits += 1;
-			/* fall through */
+			fallthrough;
 		case FPU_CSR_RN:
 			t.bits += 1;
 			break;
diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c
index f08aecefceff..08474ad2a64e 100644
--- a/arch/mips/math-emu/dp_sub.c
+++ b/arch/mips/math-emu/dp_sub.c
@@ -94,8 +94,7 @@ union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		/* normalize ym,ye */
 		DPDNORMY;
diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c
index 9af3ec7302fb..715cd0534301 100644
--- a/arch/mips/math-emu/sp_add.c
+++ b/arch/mips/math-emu/sp_add.c
@@ -92,8 +92,7 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		SPDNORMY;
 		break;
diff --git a/arch/mips/math-emu/sp_div.c b/arch/mips/math-emu/sp_div.c
index fcc285f3b48d..2bfa266fdc76 100644
--- a/arch/mips/math-emu/sp_div.c
+++ b/arch/mips/math-emu/sp_div.c
@@ -91,8 +91,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		SPDNORMY;
 		break;
diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c
index 9f1456109aa8..56417497c88e 100644
--- a/arch/mips/math-emu/sp_fdp.c
+++ b/arch/mips/math-emu/sp_fdp.c
@@ -34,8 +34,7 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x)
 	case IEEE754_CLASS_SNAN:
 		x = ieee754dp_nanxcpt(x);
 		EXPLODEXDP;
-		/* fall through */
-
+		fallthrough;
 	case IEEE754_CLASS_QNAN:
 		y = ieee754sp_nan_fdp(xs, xm);
 		if (!ieee754_csr.nan2008) {
diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c
index 4ce1d1f8b499..3fb16a1df3b8 100644
--- a/arch/mips/math-emu/sp_fmax.c
+++ b/arch/mips/math-emu/sp_fmax.c
@@ -93,8 +93,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		SPDNORMY;
 		break;
@@ -222,8 +221,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		SPDNORMY;
 		break;
diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c
index 7ad867fd7de2..ad2599d4a892 100644
--- a/arch/mips/math-emu/sp_fmin.c
+++ b/arch/mips/math-emu/sp_fmin.c
@@ -93,8 +93,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		SPDNORMY;
 		break;
@@ -222,8 +221,7 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		SPDNORMY;
 		break;
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index 1b85b1a527ac..473ee222d90c 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -119,8 +119,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		if (zc == IEEE754_CLASS_INF)
 			return ieee754sp_inf(zs);
diff --git a/arch/mips/math-emu/sp_mul.c b/arch/mips/math-emu/sp_mul.c
index ded17e28e8bc..26cfd63025e9 100644
--- a/arch/mips/math-emu/sp_mul.c
+++ b/arch/mips/math-emu/sp_mul.c
@@ -89,8 +89,7 @@ union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		SPDNORMY;
 		break;
diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c
index f3d26a1f162c..16c8e9ae63ed 100644
--- a/arch/mips/math-emu/sp_sub.c
+++ b/arch/mips/math-emu/sp_sub.c
@@ -94,8 +94,7 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y)
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
-		/* fall through */
-
+		fallthrough;
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 		SPDNORMY;
 		break;
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index 46f483e952c8..865926a37775 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -23,7 +23,7 @@ obj-y				+= uasm-mips.o
 endif
 
 obj-$(CONFIG_32BIT)		+= ioremap.o pgtable-32.o
-obj-$(CONFIG_64BIT)		+= pgtable-64.o
+obj-$(CONFIG_64BIT)		+= ioremap64.o pgtable-64.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_DMA_NONCOHERENT)	+= dma-noncoherent.o
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 36a311348739..6fb83ac7c475 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1049,7 +1049,7 @@ static inline void rm7k_erratum31(void)
 			"cache\t%1, 0x3000(%0)\n\t"
 			".set pop\n"
 			:
-			: "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill));
+			: "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill_I));
 	}
 }
 
@@ -1073,12 +1073,12 @@ static inline int alias_74k_erratum(struct cpuinfo_mips *c)
 		if (rev <= PRID_REV_ENCODE_332(2, 4, 0))
 			present = 1;
 		if (rev == PRID_REV_ENCODE_332(2, 4, 0))
-			write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND);
+			write_c0_config6(read_c0_config6() | MIPS_CONF6_MTI_SYND);
 		break;
 	case PRID_IMP_1074K:
 		if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) {
 			present = 1;
-			write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND);
+			write_c0_config6(read_c0_config6() | MIPS_CONF6_MTI_SYND);
 		}
 		break;
 	default:
@@ -1200,7 +1200,7 @@ static void probe_pcache(void)
 
 	case CPU_VR4133:
 		write_c0_config(config & ~VR41_CONF_P4K);
-		/* fall through */
+		fallthrough;
 	case CPU_VR4131:
 		/* Workaround for cache instruction bug of VR4131 */
 		if (c->processor_id == 0x0c80U || c->processor_id == 0x0c81U ||
@@ -1303,7 +1303,8 @@ static void probe_pcache(void)
 					  c->dcache.linesz;
 		c->dcache.waybit = 0;
 		if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >=
-				(PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0))
+				(PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) ||
+				(c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R)
 			c->options |= MIPS_CPU_PREFETCH;
 		break;
 
@@ -1425,7 +1426,7 @@ static void probe_pcache(void)
 	case CPU_74K:
 	case CPU_1074K:
 		has_74k_erratum = alias_74k_erratum(c);
-		/* Fall through. */
+		fallthrough;
 	case CPU_M14KC:
 	case CPU_M14KEC:
 	case CPU_24K:
@@ -1449,7 +1450,7 @@ static void probe_pcache(void)
 			c->dcache.flags |= MIPS_CACHE_PINDEX;
 			break;
 		}
-		/* fall through */
+		fallthrough;
 	default:
 		if (has_74k_erratum || c->dcache.waysize > PAGE_SIZE)
 			c->dcache.flags |= MIPS_CACHE_ALIASES;
@@ -1629,8 +1630,13 @@ static void __init loongson3_sc_init(void)
 	scache_size = c->scache.sets *
 				  c->scache.ways *
 				  c->scache.linesz;
-	/* Loongson-3 has 4 cores, 1MB scache for each. scaches are shared */
-	scache_size *= 4;
+
+	/* Loongson-3 has 4-Scache banks, while Loongson-2K have only 2 banks */
+	if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R)
+		scache_size *= 2;
+	else
+		scache_size *= 4;
+
 	c->scache.waybit = 0;
 	c->scache.waysize = scache_size / c->scache.ways;
 	pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n",
@@ -1703,9 +1709,10 @@ static void setup_scache(void)
 		return;
 
 	default:
-		if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 |
-				    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R1 |
-				    MIPS_CPU_ISA_M64R2 | MIPS_CPU_ISA_M64R6)) {
+		if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+				    MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+				    MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
+				    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
 #ifdef CONFIG_MIPS_CPU_SCACHE
 			if (mips_sc_init ()) {
 				scache_size = c->scache.ways * c->scache.sets * c->scache.linesz;
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 33b409391ddb..ad6df1cea866 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -36,7 +36,6 @@ EXPORT_SYMBOL_GPL(flush_icache_range);
 void (*local_flush_icache_range)(unsigned long start, unsigned long end);
 EXPORT_SYMBOL_GPL(local_flush_icache_range);
 void (*__flush_icache_user_range)(unsigned long start, unsigned long end);
-EXPORT_SYMBOL_GPL(__flush_icache_user_range);
 void (*__local_flush_icache_user_range)(unsigned long start, unsigned long end);
 EXPORT_SYMBOL_GPL(__local_flush_icache_user_range);
 
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index fcea92d95d86..563c2c0d0c81 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -33,6 +33,7 @@ static inline bool cpu_needs_post_dma_flush(void)
 	case CPU_R10000:
 	case CPU_R12000:
 	case CPU_BMIPS5000:
+	case CPU_LOONGSON2EF:
 		return true;
 	default:
 		/*
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 79684000de0e..620ebfa45ec1 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -358,17 +358,23 @@ void maar_init(void)
 		write_c0_maari(i);
 		back_to_back_c0_hazard();
 		upper = read_c0_maar();
+#ifdef CONFIG_XPA
+		upper |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT;
+#endif
 
 		write_c0_maari(i + 1);
 		back_to_back_c0_hazard();
 		lower = read_c0_maar();
+#ifdef CONFIG_XPA
+		lower |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT;
+#endif
 
 		attr = lower & upper;
 		lower = (lower & MIPS_MAAR_ADDR) << 4;
 		upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff;
 
 		pr_info("  [%d]: ", i / 2);
-		if (!(attr & MIPS_MAAR_VL)) {
+		if ((attr & MIPS_MAAR_V) != MIPS_MAAR_V) {
 			pr_cont("disabled\n");
 			continue;
 		}
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 8317f337a86e..b6dad2fd5575 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -14,94 +14,13 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mm_types.h>
+#include <linux/io.h>
 #include <asm/cacheflush.h>
-#include <asm/io.h>
 #include <asm/tlbflush.h>
+#include <ioremap.h>
 
-static inline void remap_area_pte(pte_t * pte, unsigned long address,
-	phys_addr_t size, phys_addr_t phys_addr, unsigned long flags)
-{
-	phys_addr_t end;
-	unsigned long pfn;
-	pgprot_t pgprot = __pgprot(_PAGE_GLOBAL | _PAGE_PRESENT | __READABLE
-				   | __WRITEABLE | flags);
-
-	address &= ~PMD_MASK;
-	end = address + size;
-	if (end > PMD_SIZE)
-		end = PMD_SIZE;
-	BUG_ON(address >= end);
-	pfn = phys_addr >> PAGE_SHIFT;
-	do {
-		if (!pte_none(*pte)) {
-			printk("remap_area_pte: page already exists\n");
-			BUG();
-		}
-		set_pte(pte, pfn_pte(pfn, pgprot));
-		address += PAGE_SIZE;
-		pfn++;
-		pte++;
-	} while (address && (address < end));
-}
-
-static inline int remap_area_pmd(pmd_t * pmd, unsigned long address,
-	phys_addr_t size, phys_addr_t phys_addr, unsigned long flags)
-{
-	phys_addr_t end;
-
-	address &= ~PGDIR_MASK;
-	end = address + size;
-	if (end > PGDIR_SIZE)
-		end = PGDIR_SIZE;
-	phys_addr -= address;
-	BUG_ON(address >= end);
-	do {
-		pte_t * pte = pte_alloc_kernel(pmd, address);
-		if (!pte)
-			return -ENOMEM;
-		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address && (address < end));
-	return 0;
-}
-
-static int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
-	phys_addr_t size, unsigned long flags)
-{
-	int error;
-	pgd_t * dir;
-	unsigned long end = address + size;
-
-	phys_addr -= address;
-	dir = pgd_offset(&init_mm, address);
-	flush_cache_all();
-	BUG_ON(address >= end);
-	do {
-		p4d_t *p4d;
-		pud_t *pud;
-		pmd_t *pmd;
-
-		error = -ENOMEM;
-		p4d = p4d_alloc(&init_mm, dir, address);
-		if (!p4d)
-			break;
-		pud = pud_alloc(&init_mm, p4d, address);
-		if (!pud)
-			break;
-		pmd = pmd_alloc(&init_mm, pud, address);
-		if (!pmd)
-			break;
-		if (remap_area_pmd(pmd, address, end - address,
-					 phys_addr + address, flags))
-			break;
-		error = 0;
-		address = (address + PGDIR_SIZE) & PGDIR_MASK;
-		dir++;
-	} while (address && (address < end));
-	flush_tlb_all();
-	return error;
-}
+#define IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL))
+#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1)
 
 static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
 			       void *arg)
@@ -118,27 +37,25 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
 }
 
 /*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
+ * ioremap_prot     -   map bus memory into CPU space
+ * @phys_addr:    bus address of the memory
+ * @size:      size of the resource to map
  *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
+ * ioremap_prot gives the caller control over cache coherency attributes (CCA)
  */
-
-#define IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL))
-
-void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long flags)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, unsigned long size,
+		unsigned long prot_val)
 {
+	unsigned long flags = prot_val & _CACHE_MASK;
 	unsigned long offset, pfn, last_pfn;
-	struct vm_struct * area;
+	struct vm_struct *area;
 	phys_addr_t last_addr;
-	void * addr;
+	unsigned long vaddr;
+	void __iomem *cpu_addr;
+
+	cpu_addr = plat_ioremap(phys_addr, size, flags);
+	if (cpu_addr)
+		return cpu_addr;
 
 	phys_addr = fixup_bigphys_addr(phys_addr, size);
 
@@ -181,30 +98,22 @@ void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long
 	area = get_vm_area(size, VM_IOREMAP);
 	if (!area)
 		return NULL;
-	addr = area->addr;
-	if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
-		vunmap(addr);
+	vaddr = (unsigned long)area->addr;
+
+	flags |= _PAGE_GLOBAL | _PAGE_PRESENT | __READABLE | __WRITEABLE;
+	if (ioremap_page_range(vaddr, vaddr + size, phys_addr,
+			__pgprot(flags))) {
+		free_vm_area(area);
 		return NULL;
 	}
 
-	return (void __iomem *) (offset + (char *)addr);
+	return (void __iomem *)(vaddr + offset);
 }
+EXPORT_SYMBOL(ioremap_prot);
 
-#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1)
-
-void __iounmap(const volatile void __iomem *addr)
+void iounmap(const volatile void __iomem *addr)
 {
-	struct vm_struct *p;
-
-	if (IS_KSEG1(addr))
-		return;
-
-	p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
-	if (!p)
-		printk(KERN_ERR "iounmap: bad address %p\n", addr);
-
-	kfree(p);
+	if (!plat_iounmap(addr) && !IS_KSEG1(addr))
+		vunmap((void *)((unsigned long)addr & PAGE_MASK));
 }
-
-EXPORT_SYMBOL(__ioremap);
-EXPORT_SYMBOL(__iounmap);
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/mips/mm/ioremap64.c b/arch/mips/mm/ioremap64.c
new file mode 100644
index 000000000000..15e7820d6a5f
--- /dev/null
+++ b/arch/mips/mm/ioremap64.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/io.h>
+#include <ioremap.h>
+
+void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
+		unsigned long prot_val)
+{
+	unsigned long flags = prot_val & _CACHE_MASK;
+	u64 base = (flags == _CACHE_UNCACHED ? IO_BASE : UNCAC_BASE);
+	void __iomem *addr;
+
+	addr = plat_ioremap(offset, size, flags);
+	if (!addr)
+		addr = (void __iomem *)(unsigned long)(base + offset);
+	return addr;
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+void iounmap(const volatile void __iomem *addr)
+{
+	plat_iounmap(addr);
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index dbdbfe5d8408..eedad47df24f 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -194,9 +194,10 @@ static inline int __init mips_sc_probe(void)
 		return mips_sc_probe_cm3();
 
 	/* Ignore anything but MIPSxx processors */
-	if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 |
-			      MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R1 |
-			      MIPS_CPU_ISA_M64R2 | MIPS_CPU_ISA_M64R6)))
+	if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+			      MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+			      MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 |
+			      MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)))
 		return 0;
 
 	/* Does this MIPS32/MIPS64 CPU have a config2 register? */
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index da407cdc2135..38c204204529 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -576,7 +576,7 @@ void build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case CPU_R5500:
 		if (m4kc_tlbp_war())
 			uasm_i_nop(p);
-		/* fall through */
+		fallthrough;
 	case CPU_ALCHEMY:
 		tlbw(p);
 		break;
diff --git a/arch/mips/mti-malta/Platform b/arch/mips/mti-malta/Platform
index 2cc72c9b38e3..41e0d2a2d325 100644
--- a/arch/mips/mti-malta/Platform
+++ b/arch/mips/mti-malta/Platform
@@ -1,7 +1,6 @@
 #
 # MIPS Malta board
 #
-platform-$(CONFIG_MIPS_MALTA)	+= mti-malta/
 cflags-$(CONFIG_MIPS_MALTA)	+= -I$(srctree)/arch/mips/include/asm/mach-malta
 ifdef CONFIG_KVM_GUEST
     load-$(CONFIG_MIPS_MALTA)	+= 0x0000000040100000
diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c
index ff2c1d809538..893af377aacc 100644
--- a/arch/mips/mti-malta/malta-init.c
+++ b/arch/mips/mti-malta/malta-init.c
@@ -90,24 +90,24 @@ static void __init console_config(void)
 static void __init mips_nmi_setup(void)
 {
 	void *base;
-	extern char except_vec_nmi;
+	extern char except_vec_nmi[];
 
 	base = cpu_has_veic ?
 		(void *)(CAC_BASE + 0xa80) :
 		(void *)(CAC_BASE + 0x380);
-	memcpy(base, &except_vec_nmi, 0x80);
+	memcpy(base, except_vec_nmi, 0x80);
 	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
 }
 
 static void __init mips_ejtag_setup(void)
 {
 	void *base;
-	extern char except_vec_ejtag_debug;
+	extern char except_vec_ejtag_debug[];
 
 	base = cpu_has_veic ?
 		(void *)(CAC_BASE + 0xa00) :
 		(void *)(CAC_BASE + 0x300);
-	memcpy(base, &except_vec_ejtag_debug, 0x80);
+	memcpy(base, except_vec_ejtag_debug, 0x80);
 	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
 }
 
diff --git a/arch/mips/netlogic/Platform b/arch/mips/netlogic/Platform
index fb8eb4c0c6ec..4195a097f5f2 100644
--- a/arch/mips/netlogic/Platform
+++ b/arch/mips/netlogic/Platform
@@ -13,5 +13,4 @@ cflags-$(CONFIG_CPU_XLP)	+= $(call cc-option,-march=xlp,-march=mips64r2)
 #
 # NETLOGIC processor support
 #
-platform-$(CONFIG_NLM_COMMON)	+= netlogic/
 load-$(CONFIG_NLM_COMMON)	+= 0xffffffff80100000
diff --git a/arch/mips/netlogic/xlr/fmn.c b/arch/mips/netlogic/xlr/fmn.c
index d7db1533889a..f90303f31967 100644
--- a/arch/mips/netlogic/xlr/fmn.c
+++ b/arch/mips/netlogic/xlr/fmn.c
@@ -103,7 +103,7 @@ static irqreturn_t fmn_message_handler(int irq, void *data)
 				mflags = nlm_cop2_enable_irqsave();
 			}
 		}
-	};
+	}
 	/* Enable message ring intr, to any thread in core */
 	nlm_fmn_setup_intr(irq, (1 << nlm_threads_per_core) - 1);
 	nlm_cop2_disable_irqrestore(mflags);
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index 03db268cba5c..d3996c4c6440 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -110,7 +110,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	case CPU_LOONGSON64:
 		lmodel = &op_model_loongson3_ops;
 		break;
-	};
+	}
 
 	/*
 	 * Always set the backtrace. This allows unsupported CPU types to still
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index a537bf98912c..1493c49ca47a 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -172,15 +172,15 @@ static void mipsxx_cpu_setup(void *args)
 	case 4:
 		w_c0_perfctrl3(0);
 		w_c0_perfcntr3(reg.counter[3]);
-		/* fall through */
+		fallthrough;
 	case 3:
 		w_c0_perfctrl2(0);
 		w_c0_perfcntr2(reg.counter[2]);
-		/* fall through */
+		fallthrough;
 	case 2:
 		w_c0_perfctrl1(0);
 		w_c0_perfcntr1(reg.counter[1]);
-		/* fall through */
+		fallthrough;
 	case 1:
 		w_c0_perfctrl0(0);
 		w_c0_perfcntr0(reg.counter[0]);
@@ -198,13 +198,13 @@ static void mipsxx_cpu_start(void *args)
 	switch (counters) {
 	case 4:
 		w_c0_perfctrl3(WHAT | reg.control[3]);
-		/* fall through */
+		fallthrough;
 	case 3:
 		w_c0_perfctrl2(WHAT | reg.control[2]);
-		/* fall through */
+		fallthrough;
 	case 2:
 		w_c0_perfctrl1(WHAT | reg.control[1]);
-		/* fall through */
+		fallthrough;
 	case 1:
 		w_c0_perfctrl0(WHAT | reg.control[0]);
 	}
@@ -221,13 +221,13 @@ static void mipsxx_cpu_stop(void *args)
 	switch (counters) {
 	case 4:
 		w_c0_perfctrl3(0);
-		/* fall through */
+		fallthrough;
 	case 3:
 		w_c0_perfctrl2(0);
-		/* fall through */
+		fallthrough;
 	case 2:
 		w_c0_perfctrl1(0);
-		/* fall through */
+		fallthrough;
 	case 1:
 		w_c0_perfctrl0(0);
 	}
@@ -245,7 +245,7 @@ static int mipsxx_perfcount_handler(void)
 
 	switch (counters) {
 #define HANDLE_COUNTER(n)						\
-	/* fall through */						\
+	fallthrough;							\
 	case n + 1:							\
 		control = r_c0_perfctrl ## n();				\
 		counter = r_c0_perfcntr ## n();				\
@@ -307,15 +307,15 @@ static void reset_counters(void *arg)
 	case 4:
 		w_c0_perfctrl3(0);
 		w_c0_perfcntr3(0);
-		/* fall through */
+		fallthrough;
 	case 3:
 		w_c0_perfctrl2(0);
 		w_c0_perfcntr2(0);
-		/* fall through */
+		fallthrough;
 	case 2:
 		w_c0_perfctrl1(0);
 		w_c0_perfcntr1(0);
-		/* fall through */
+		fallthrough;
 	case 1:
 		w_c0_perfctrl0(0);
 		w_c0_perfcntr0(0);
diff --git a/arch/mips/paravirt/Platform b/arch/mips/paravirt/Platform
index 7e76ef25ea17..0b857580dfdd 100644
--- a/arch/mips/paravirt/Platform
+++ b/arch/mips/paravirt/Platform
@@ -1,7 +1,6 @@
 #
 # Generic para-virtualized guest.
 #
-platform-$(CONFIG_MIPS_PARAVIRT)	+= paravirt/
 cflags-$(CONFIG_MIPS_PARAVIRT)		+=				\
 		-I$(srctree)/arch/mips/include/asm/mach-paravirt
 
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index 342ce10ef593..0f68d6849978 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -13,10 +13,8 @@ obj-$(CONFIG_PCI_DRIVERS_GENERIC)+= pci-generic.o
 obj-$(CONFIG_MIPS_BONITO64)	+= ops-bonito64.o
 obj-$(CONFIG_PCI_GT64XXX_PCI0)	+= ops-gt64xxx_pci0.o
 obj-$(CONFIG_MIPS_MSC)		+= ops-msc.o
-obj-$(CONFIG_MIPS_NILE4)	+= ops-nile4.o
 obj-$(CONFIG_SOC_TX3927)	+= ops-tx3927.o
 obj-$(CONFIG_PCI_VR41XX)	+= ops-vr41xx.o pci-vr41xx.o
-obj-$(CONFIG_NEC_MARKEINS)	+= ops-emma2rh.o pci-emma2rh.o fixup-emma2rh.o
 obj-$(CONFIG_PCI_TX4927)	+= ops-tx4927.o
 obj-$(CONFIG_BCM47XX)		+= pci-bcm47xx.o
 obj-$(CONFIG_BCM63XX)		+= pci-bcm63xx.o fixup-bcm63xx.o \
@@ -31,15 +29,10 @@ obj-$(CONFIG_PCI_XTALK_BRIDGE)	+= pci-xtalk-bridge.o
 # These are still pretty much in the old state, watch, go blind.
 #
 obj-$(CONFIG_ATH79)		+= fixup-ath79.o
-obj-$(CONFIG_LASAT)		+= pci-lasat.o
 obj-$(CONFIG_MIPS_COBALT)	+= fixup-cobalt.o
 obj-$(CONFIG_LEMOTE_FULOONG2E)	+= fixup-fuloong2e.o ops-loongson2.o
 obj-$(CONFIG_LEMOTE_MACH2F)	+= fixup-lemote2f.o ops-loongson2.o
-obj-$(CONFIG_MACH_LOONGSON64)	+= fixup-loongson3.o ops-loongson3.o
 obj-$(CONFIG_MIPS_MALTA)	+= fixup-malta.o pci-malta.o
-obj-$(CONFIG_PMC_MSP7120_GW)	+= fixup-pmcmsp.o ops-pmcmsp.o
-obj-$(CONFIG_PMC_MSP7120_EVAL)	+= fixup-pmcmsp.o ops-pmcmsp.o
-obj-$(CONFIG_PMC_MSP7120_FPGA)	+= fixup-pmcmsp.o ops-pmcmsp.o
 obj-$(CONFIG_SGI_IP27)		+= pci-ip27.o
 obj-$(CONFIG_SGI_IP32)		+= fixup-ip32.o ops-mace.o pci-ip32.o
 obj-$(CONFIG_SIBYTE_SB1250)	+= fixup-sb1250.o pci-sb1250.o
diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c
deleted file mode 100644
index 2541f9bc12de..000000000000
--- a/arch/mips/pci/fixup-emma2rh.c
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) NEC Electronics Corporation 2004-2006
- *
- *  This file is based on the arch/mips/ddb5xxx/ddb5477/pci.c
- *
- *	Copyright 2001 MontaVista Software Inc.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-
-#include <asm/bootinfo.h>
-
-#include <asm/emma/emma2rh.h>
-
-#define EMMA2RH_PCI_HOST_SLOT 0x09
-#define EMMA2RH_USB_SLOT 0x03
-#define PCI_DEVICE_ID_NEC_EMMA2RH      0x014b /* EMMA2RH PCI Host */
-
-/*
- * we fix up irqs based on the slot number.
- * The first entry is at AD:11.
- * Fortunately this works because, although we have two pci buses,
- * they all have different slot numbers (except for rockhopper slot 20
- * which is handled below).
- *
- */
-
-#define MAX_SLOT_NUM 10
-static unsigned char irq_map[][5] = {
-	[3] = {0, MARKEINS_PCI_IRQ_INTB, MARKEINS_PCI_IRQ_INTC,
-	       MARKEINS_PCI_IRQ_INTD, 0,},
-	[4] = {0, MARKEINS_PCI_IRQ_INTA, 0, 0, 0,},
-	[5] = {0, 0, 0, 0, 0,},
-	[6] = {0, MARKEINS_PCI_IRQ_INTC, MARKEINS_PCI_IRQ_INTD,
-	       MARKEINS_PCI_IRQ_INTA, MARKEINS_PCI_IRQ_INTB,},
-};
-
-static void nec_usb_controller_fixup(struct pci_dev *dev)
-{
-	if (PCI_SLOT(dev->devfn) == EMMA2RH_USB_SLOT)
-		/* on board USB controller configuration */
-		pci_write_config_dword(dev, 0xe4, 1 << 5);
-}
-
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
-			nec_usb_controller_fixup);
-
-/*
- * Prevent the PCI layer from seeing the resources allocated to this device
- * if it is the host bridge by marking it as such.  These resources are of
- * no consequence to the PCI layer (they are handled elsewhere).
- */
-static void emma2rh_pci_host_fixup(struct pci_dev *dev)
-{
-	int i;
-
-	if (PCI_SLOT(dev->devfn) == EMMA2RH_PCI_HOST_SLOT) {
-		dev->class &= 0xff;
-		dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
-		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-			dev->resource[i].start = 0;
-			dev->resource[i].end = 0;
-			dev->resource[i].flags = 0;
-		}
-	}
-}
-
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH,
-			 emma2rh_pci_host_fixup);
-
-int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
-	return irq_map[slot][pin];
-}
-
-/* Do platform specific device initialization at pci_enable_device() time */
-int pcibios_plat_dev_init(struct pci_dev *dev)
-{
-	return 0;
-}
diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c
deleted file mode 100644
index 8a741c2c6685..000000000000
--- a/arch/mips/pci/fixup-loongson3.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * fixup-loongson3.c
- *
- * Copyright (C) 2012 Lemote, Inc.
- * Author: Xiang Yu, xiangy@lemote.com
- *         Chen Huacai, chenhc@lemote.com
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- * WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- * NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- * USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/pci.h>
-#include <boot_param.h>
-
-static void print_fixup_info(const struct pci_dev *pdev)
-{
-	dev_info(&pdev->dev, "Device %x:%x, irq %d\n",
-			pdev->vendor, pdev->device, pdev->irq);
-}
-
-int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
-	print_fixup_info(dev);
-	return dev->irq;
-}
-
-static void pci_fixup_radeon(struct pci_dev *pdev)
-{
-	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
-
-	if (res->start)
-		return;
-
-	if (!loongson_sysconf.vgabios_addr)
-		return;
-
-	pci_disable_rom(pdev);
-	if (res->parent)
-		release_resource(res);
-
-	res->start = virt_to_phys((void *) loongson_sysconf.vgabios_addr);
-	res->end   = res->start + 256*1024 - 1;
-	res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
-		     IORESOURCE_PCI_FIXED;
-
-	dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n",
-		 PCI_ROM_RESOURCE, res);
-}
-
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
-				PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon);
-
-/* Do platform specific device initialization at pci_enable_device() time */
-int pcibios_plat_dev_init(struct pci_dev *dev)
-{
-	return 0;
-}
diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c
deleted file mode 100644
index 4ad2ef02087b..000000000000
--- a/arch/mips/pci/fixup-pmcmsp.c
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * PMC-Sierra MSP board specific pci fixups.
- *
- * Copyright 2001 MontaVista Software Inc.
- * Copyright 2005-2007 PMC-Sierra, Inc
- *
- * Author: MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifdef CONFIG_PCI
-
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-
-#include <asm/byteorder.h>
-
-#include <msp_pci.h>
-#include <msp_cic_int.h>
-
-/* PCI interrupt pins */
-#define IRQ4	MSP_INT_EXT4
-#define IRQ5	MSP_INT_EXT5
-#define IRQ6	MSP_INT_EXT6
-
-#if defined(CONFIG_PMC_MSP7120_GW)
-/* Garibaldi Board IRQ wiring to PCI slots */
-static char irq_tab[][5] = {
-	/* INTA	   INTB	   INTC	   INTD */
-	{0,	0,	0,	0,	0 },	/*    (AD[0]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[1]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[2]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[3]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[4]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[5]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[6]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[7]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[8]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[9]): Unused */
-	{0,	0,	0,	0,	0 },	/*  0 (AD[10]): Unused */
-	{0,	0,	0,	0,	0 },	/*  1 (AD[11]): Unused */
-	{0,	0,	0,	0,	0 },	/*  2 (AD[12]): Unused */
-	{0,	0,	0,	0,	0 },	/*  3 (AD[13]): Unused */
-	{0,	0,	0,	0,	0 },	/*  4 (AD[14]): Unused */
-	{0,	0,	0,	0,	0 },	/*  5 (AD[15]): Unused */
-	{0,	0,	0,	0,	0 },	/*  6 (AD[16]): Unused */
-	{0,	0,	0,	0,	0 },	/*  7 (AD[17]): Unused */
-	{0,	0,	0,	0,	0 },	/*  8 (AD[18]): Unused */
-	{0,	0,	0,	0,	0 },	/*  9 (AD[19]): Unused */
-	{0,	0,	0,	0,	0 },	/* 10 (AD[20]): Unused */
-	{0,	0,	0,	0,	0 },	/* 11 (AD[21]): Unused */
-	{0,	0,	0,	0,	0 },	/* 12 (AD[22]): Unused */
-	{0,	0,	0,	0,	0 },	/* 13 (AD[23]): Unused */
-	{0,	0,	0,	0,	0 },	/* 14 (AD[24]): Unused */
-	{0,	0,	0,	0,	0 },	/* 15 (AD[25]): Unused */
-	{0,	0,	0,	0,	0 },	/* 16 (AD[26]): Unused */
-	{0,	0,	0,	0,	0 },	/* 17 (AD[27]): Unused */
-	{0,	IRQ4,	IRQ4,	0,	0 },	/* 18 (AD[28]): slot 0 */
-	{0,	0,	0,	0,	0 },	/* 19 (AD[29]): Unused */
-	{0,	IRQ5,	IRQ5,	0,	0 },	/* 20 (AD[30]): slot 1 */
-	{0,	IRQ6,	IRQ6,	0,	0 }	/* 21 (AD[31]): slot 2 */
-};
-
-#elif defined(CONFIG_PMC_MSP7120_EVAL)
-
-/* MSP7120 Eval Board IRQ wiring to PCI slots */
-static char irq_tab[][5] = {
-	/* INTA	   INTB	   INTC	   INTD */
-	{0,	0,	0,	0,	0 },	/*    (AD[0]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[1]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[2]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[3]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[4]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[5]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[6]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[7]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[8]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[9]): Unused */
-	{0,	0,	0,	0,	0 },	/*  0 (AD[10]): Unused */
-	{0,	0,	0,	0,	0 },	/*  1 (AD[11]): Unused */
-	{0,	0,	0,	0,	0 },	/*  2 (AD[12]): Unused */
-	{0,	0,	0,	0,	0 },	/*  3 (AD[13]): Unused */
-	{0,	0,	0,	0,	0 },	/*  4 (AD[14]): Unused */
-	{0,	0,	0,	0,	0 },	/*  5 (AD[15]): Unused */
-	{0,	IRQ6,	IRQ6,	0,	0 },	/*  6 (AD[16]): slot 3 (mini) */
-	{0,	IRQ5,	IRQ5,	0,	0 },	/*  7 (AD[17]): slot 2 (mini) */
-	{0,	IRQ4,	IRQ4,	IRQ4,	IRQ4},	/*  8 (AD[18]): slot 0 (PCI) */
-	{0,	IRQ5,	IRQ5,	IRQ5,	IRQ5},	/*  9 (AD[19]): slot 1 (PCI) */
-	{0,	0,	0,	0,	0 },	/* 10 (AD[20]): Unused */
-	{0,	0,	0,	0,	0 },	/* 11 (AD[21]): Unused */
-	{0,	0,	0,	0,	0 },	/* 12 (AD[22]): Unused */
-	{0,	0,	0,	0,	0 },	/* 13 (AD[23]): Unused */
-	{0,	0,	0,	0,	0 },	/* 14 (AD[24]): Unused */
-	{0,	0,	0,	0,	0 },	/* 15 (AD[25]): Unused */
-	{0,	0,	0,	0,	0 },	/* 16 (AD[26]): Unused */
-	{0,	0,	0,	0,	0 },	/* 17 (AD[27]): Unused */
-	{0,	0,	0,	0,	0 },	/* 18 (AD[28]): Unused */
-	{0,	0,	0,	0,	0 },	/* 19 (AD[29]): Unused */
-	{0,	0,	0,	0,	0 },	/* 20 (AD[30]): Unused */
-	{0,	0,	0,	0,	0 }	/* 21 (AD[31]): Unused */
-};
-
-#else
-
-/* Unknown board -- don't assign any IRQs */
-static char irq_tab[][5] = {
-	/* INTA	   INTB	   INTC	   INTD */
-	{0,	0,	0,	0,	0 },	/*    (AD[0]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[1]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[2]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[3]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[4]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[5]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[6]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[7]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[8]): Unused */
-	{0,	0,	0,	0,	0 },	/*    (AD[9]): Unused */
-	{0,	0,	0,	0,	0 },	/*  0 (AD[10]): Unused */
-	{0,	0,	0,	0,	0 },	/*  1 (AD[11]): Unused */
-	{0,	0,	0,	0,	0 },	/*  2 (AD[12]): Unused */
-	{0,	0,	0,	0,	0 },	/*  3 (AD[13]): Unused */
-	{0,	0,	0,	0,	0 },	/*  4 (AD[14]): Unused */
-	{0,	0,	0,	0,	0 },	/*  5 (AD[15]): Unused */
-	{0,	0,	0,	0,	0 },	/*  6 (AD[16]): Unused */
-	{0,	0,	0,	0,	0 },	/*  7 (AD[17]): Unused */
-	{0,	0,	0,	0,	0 },	/*  8 (AD[18]): Unused */
-	{0,	0,	0,	0,	0 },	/*  9 (AD[19]): Unused */
-	{0,	0,	0,	0,	0 },	/* 10 (AD[20]): Unused */
-	{0,	0,	0,	0,	0 },	/* 11 (AD[21]): Unused */
-	{0,	0,	0,	0,	0 },	/* 12 (AD[22]): Unused */
-	{0,	0,	0,	0,	0 },	/* 13 (AD[23]): Unused */
-	{0,	0,	0,	0,	0 },	/* 14 (AD[24]): Unused */
-	{0,	0,	0,	0,	0 },	/* 15 (AD[25]): Unused */
-	{0,	0,	0,	0,	0 },	/* 16 (AD[26]): Unused */
-	{0,	0,	0,	0,	0 },	/* 17 (AD[27]): Unused */
-	{0,	0,	0,	0,	0 },	/* 18 (AD[28]): Unused */
-	{0,	0,	0,	0,	0 },	/* 19 (AD[29]): Unused */
-	{0,	0,	0,	0,	0 },	/* 20 (AD[30]): Unused */
-	{0,	0,	0,	0,	0 }	/* 21 (AD[31]): Unused */
-};
-#endif
-
-/*****************************************************************************
- *
- *  FUNCTION: pcibios_plat_dev_init
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Perform platform specific device initialization at
- *		 pci_enable_device() time.
- *		 None are needed for the MSP7120 PCI Controller.
- *
- *  INPUTS:	 dev	 - structure describing the PCI device
- *
- *  OUTPUTS:	 none
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL
- *
- ****************************************************************************/
-int pcibios_plat_dev_init(struct pci_dev *dev)
-{
-	return PCIBIOS_SUCCESSFUL;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: pcibios_map_irq
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Perform board supplied PCI IRQ mapping routine.
- *
- *  INPUTS:	 dev	 - unused
- *		 slot	 - PCI slot. Identified by which bit of the AD[] bus
- *			   drives the IDSEL line. AD[10] is 0, AD[31] is
- *			   slot 21.
- *		 pin	 - numbered using the scheme of the PCI_INTERRUPT_PIN
- *			   field of the config header.
- *
- *  OUTPUTS:	 none
- *
- *  RETURNS:	 IRQ number
- *
- ****************************************************************************/
-int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
-#if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL)
-	printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n");
-#endif
-	printk(KERN_WARNING "PCI: irq_tab returned %d for slot=%d pin=%d\n",
-		irq_tab[slot][pin], slot, pin);
-
-	return irq_tab[slot][pin];
-}
-
-#endif	/* CONFIG_PCI */
diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c
index adb9a58641e8..de012f8bd8c3 100644
--- a/arch/mips/pci/fixup-sni.c
+++ b/arch/mips/pci/fixup-sni.c
@@ -151,8 +151,7 @@ int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	case SNI_BRD_PCI_MTOWER:
 		if (is_rm300_revd())
 			return irq_tab_rm300d[slot][pin];
-		/* fall through */
-
+		fallthrough;
 	case SNI_BRD_PCI_DESKTOP:
 		return irq_tab_rm200[slot][pin];
 
diff --git a/arch/mips/pci/ops-bcm63xx.c b/arch/mips/pci/ops-bcm63xx.c
index 925c72348fb6..dc6dc2741272 100644
--- a/arch/mips/pci/ops-bcm63xx.c
+++ b/arch/mips/pci/ops-bcm63xx.c
@@ -474,7 +474,7 @@ static int bcm63xx_pcie_can_access(struct pci_bus *bus, int devfn)
 		if (PCI_SLOT(devfn) == 0)
 			return bcm_pcie_readl(PCIE_DLSTATUS_REG)
 					& DLSTATUS_PHYLINKUP;
-		/* else, fall through */
+		fallthrough;
 	default:
 		return false;
 	}
diff --git a/arch/mips/pci/ops-emma2rh.c b/arch/mips/pci/ops-emma2rh.c
deleted file mode 100644
index 65f47344536c..000000000000
--- a/arch/mips/pci/ops-emma2rh.c
+++ /dev/null
@@ -1,167 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) NEC Electronics Corporation 2004-2006
- *
- *  This file is based on the arch/mips/pci/ops-vr41xx.c
- *
- *	Copyright 2001 MontaVista Software Inc.
- */
-
-#include <linux/pci.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-
-#include <asm/addrspace.h>
-
-#include <asm/emma/emma2rh.h>
-
-#define RTABORT (0x1<<9)
-#define RMABORT (0x1<<10)
-#define EMMA2RH_PCI_SLOT_NUM 9	/* 0000:09.0 is final PCI device */
-
-/*
- * access config space
- */
-
-static int check_args(struct pci_bus *bus, u32 devfn, u32 * bus_num)
-{
-	/* check if the bus is top-level */
-	if (bus->parent != NULL)
-		*bus_num = bus->number;
-	else
-		*bus_num = 0;
-
-	if (*bus_num == 0) {
-		/* Type 0 */
-		if (PCI_SLOT(devfn) >= 10)
-			return PCIBIOS_DEVICE_NOT_FOUND;
-	} else {
-		/* Type 1 */
-		if ((*bus_num >= 64) || (PCI_SLOT(devfn) >= 16))
-			return PCIBIOS_DEVICE_NOT_FOUND;
-	}
-	return 0;
-}
-
-static inline int set_pci_configuration_address(unsigned char bus_num,
-						unsigned int devfn, int where)
-{
-	u32 config_win0;
-
-	emma2rh_out32(EMMA2RH_PCI_INT, ~RMABORT);
-	if (bus_num == 0)
-		/*
-		 * Type 0 configuration
-		 */
-		config_win0 = (1 << (22 + PCI_SLOT(devfn))) | (5 << 9);
-	else
-		/*
-		 * Type 1 configuration
-		 */
-		config_win0 = (bus_num << 26) | (PCI_SLOT(devfn) << 22) |
-		    (1 << 15) | (5 << 9);
-
-	emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, config_win0);
-
-	return 0;
-}
-
-static int pci_config_read(struct pci_bus *bus, unsigned int devfn, int where,
-			   int size, uint32_t * val)
-{
-	u32 bus_num;
-	u32 base = KSEG1ADDR(EMMA2RH_PCI_CONFIG_BASE);
-	u32 backup_win0;
-	u32 data;
-
-	*val = 0xffffffffU;
-
-	if (check_args(bus, devfn, &bus_num) == PCIBIOS_DEVICE_NOT_FOUND)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	backup_win0 = emma2rh_in32(EMMA2RH_PCI_IWIN0_CTR);
-
-	if (set_pci_configuration_address(bus_num, devfn, where) < 0)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	data =
-	    *(volatile u32 *)(base + (PCI_FUNC(devfn) << 8) +
-			      (where & 0xfffffffc));
-
-	switch (size) {
-	case 1:
-		*val = (data >> ((where & 3) << 3)) & 0xffU;
-		break;
-	case 2:
-		*val = (data >> ((where & 2) << 3)) & 0xffffU;
-		break;
-	case 4:
-		*val = data;
-		break;
-	default:
-		emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0);
-		return PCIBIOS_FUNC_NOT_SUPPORTED;
-	}
-
-	emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0);
-
-	if (emma2rh_in32(EMMA2RH_PCI_INT) & RMABORT)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int pci_config_write(struct pci_bus *bus, unsigned int devfn, int where,
-			    int size, u32 val)
-{
-	u32 bus_num;
-	u32 base = KSEG1ADDR(EMMA2RH_PCI_CONFIG_BASE);
-	u32 backup_win0;
-	u32 data;
-	int shift;
-
-	if (check_args(bus, devfn, &bus_num) == PCIBIOS_DEVICE_NOT_FOUND)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	backup_win0 = emma2rh_in32(EMMA2RH_PCI_IWIN0_CTR);
-
-	if (set_pci_configuration_address(bus_num, devfn, where) < 0)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	/* read modify write */
-	data =
-	    *(volatile u32 *)(base + (PCI_FUNC(devfn) << 8) +
-			      (where & 0xfffffffc));
-
-	switch (size) {
-	case 1:
-		shift = (where & 3) << 3;
-		data &= ~(0xffU << shift);
-		data |= ((val & 0xffU) << shift);
-		break;
-	case 2:
-		shift = (where & 2) << 3;
-		data &= ~(0xffffU << shift);
-		data |= ((val & 0xffffU) << shift);
-		break;
-	case 4:
-		data = val;
-		break;
-	default:
-		emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0);
-		return PCIBIOS_FUNC_NOT_SUPPORTED;
-	}
-	*(volatile u32 *)(base + (PCI_FUNC(devfn) << 8) +
-			  (where & 0xfffffffc)) = data;
-
-	emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0);
-	if (emma2rh_in32(EMMA2RH_PCI_INT) & RMABORT)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-struct pci_ops emma2rh_pci_ops = {
-	.read = pci_config_read,
-	.write = pci_config_write,
-};
diff --git a/arch/mips/pci/ops-loongson3.c b/arch/mips/pci/ops-loongson3.c
deleted file mode 100644
index 2f6ad36bdea6..000000000000
--- a/arch/mips/pci/ops-loongson3.c
+++ /dev/null
@@ -1,116 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/kernel.h>
-
-#include <asm/mips-boards/bonito64.h>
-
-#include <loongson.h>
-
-#define PCI_ACCESS_READ  0
-#define PCI_ACCESS_WRITE 1
-
-#define HT1LO_PCICFG_BASE      0x1a000000
-#define HT1LO_PCICFG_BASE_TP1  0x1b000000
-
-static int loongson3_pci_config_access(unsigned char access_type,
-		struct pci_bus *bus, unsigned int devfn,
-		int where, u32 *data)
-{
-	unsigned char busnum = bus->number;
-	int function = PCI_FUNC(devfn);
-	int device = PCI_SLOT(devfn);
-	int reg = where & ~3;
-	void *addrp;
-	u64 addr;
-
-	if (where < PCI_CFG_SPACE_SIZE) { /* standard config */
-		addr = (busnum << 16) | (device << 11) | (function << 8) | reg;
-		if (busnum == 0) {
-			if (device > 31)
-				return PCIBIOS_DEVICE_NOT_FOUND;
-			addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE | addr);
-		} else {
-			addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE_TP1 | addr);
-		}
-	} else if (where < PCI_CFG_SPACE_EXP_SIZE) {  /* extended config */
-		struct pci_dev *rootdev;
-
-		rootdev = pci_get_domain_bus_and_slot(0, 0, 0);
-		if (!rootdev)
-			return PCIBIOS_DEVICE_NOT_FOUND;
-
-		addr = pci_resource_start(rootdev, 3);
-		if (!addr)
-			return PCIBIOS_DEVICE_NOT_FOUND;
-
-		addr |= busnum << 20 | device << 15 | function << 12 | reg;
-		addrp = (void *)TO_UNCAC(addr);
-	} else {
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	}
-
-	if (access_type == PCI_ACCESS_WRITE)
-		writel(*data, addrp);
-	else {
-		*data = readl(addrp);
-		if (*data == 0xffffffff) {
-			*data = -1;
-			return PCIBIOS_DEVICE_NOT_FOUND;
-		}
-	}
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int loongson3_pci_pcibios_read(struct pci_bus *bus, unsigned int devfn,
-				 int where, int size, u32 *val)
-{
-	u32 data = 0;
-	int ret = loongson3_pci_config_access(PCI_ACCESS_READ,
-			bus, devfn, where, &data);
-
-	if (ret != PCIBIOS_SUCCESSFUL)
-		return ret;
-
-	if (size == 1)
-		*val = (data >> ((where & 3) << 3)) & 0xff;
-	else if (size == 2)
-		*val = (data >> ((where & 3) << 3)) & 0xffff;
-	else
-		*val = data;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int loongson3_pci_pcibios_write(struct pci_bus *bus, unsigned int devfn,
-				  int where, int size, u32 val)
-{
-	u32 data = 0;
-	int ret;
-
-	if (size == 4)
-		data = val;
-	else {
-		ret = loongson3_pci_config_access(PCI_ACCESS_READ,
-				bus, devfn, where, &data);
-		if (ret != PCIBIOS_SUCCESSFUL)
-			return ret;
-
-		if (size == 1)
-			data = (data & ~(0xff << ((where & 3) << 3))) |
-			    (val << ((where & 3) << 3));
-		else if (size == 2)
-			data = (data & ~(0xffff << ((where & 3) << 3))) |
-			    (val << ((where & 3) << 3));
-	}
-
-	ret = loongson3_pci_config_access(PCI_ACCESS_WRITE,
-			bus, devfn, where, &data);
-
-	return ret;
-}
-
-struct pci_ops loongson_pci_ops = {
-	.read = loongson3_pci_pcibios_read,
-	.write = loongson3_pci_pcibios_write
-};
diff --git a/arch/mips/pci/ops-nile4.c b/arch/mips/pci/ops-nile4.c
deleted file mode 100644
index b00658d19116..000000000000
--- a/arch/mips/pci/ops-nile4.c
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <asm/bootinfo.h>
-
-#include <asm/lasat/lasat.h>
-#include <asm/nile4.h>
-
-#define PCI_ACCESS_READ	 0
-#define PCI_ACCESS_WRITE 1
-
-#define LO(reg) (reg / 4)
-#define HI(reg) (reg / 4 + 1)
-
-volatile unsigned long *const vrc_pciregs = (void *) Vrc5074_BASE;
-
-static int nile4_pcibios_config_access(unsigned char access_type,
-	struct pci_bus *bus, unsigned int devfn, int where, u32 *val)
-{
-	unsigned char busnum = bus->number;
-	u32 adr, mask, err;
-
-	if ((busnum == 0) && (PCI_SLOT(devfn) > 8))
-		/* The addressing scheme chosen leaves room for just
-		 * 8 devices on the first busnum (besides the PCI
-		 * controller itself) */
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	if ((busnum == 0) && (devfn == PCI_DEVFN(0, 0))) {
-		/* Access controller registers directly */
-		if (access_type == PCI_ACCESS_WRITE) {
-			vrc_pciregs[(0x200 + where) >> 2] = *val;
-		} else {
-			*val = vrc_pciregs[(0x200 + where) >> 2];
-		}
-		return PCIBIOS_SUCCESSFUL;
-	}
-
-	/* Temporarily map PCI Window 1 to config space */
-	mask = vrc_pciregs[LO(NILE4_PCIINIT1)];
-	vrc_pciregs[LO(NILE4_PCIINIT1)] = 0x0000001a | (busnum ? 0x200 : 0);
-
-	/* Clear PCI Error register. This also clears the Error Type
-	 * bits in the Control register */
-	vrc_pciregs[LO(NILE4_PCIERR)] = 0;
-	vrc_pciregs[HI(NILE4_PCIERR)] = 0;
-
-	/* Setup address */
-	if (busnum == 0)
-		adr =
-		    KSEG1ADDR(PCI_WINDOW1) +
-		    ((1 << (PCI_SLOT(devfn) + 15)) | (PCI_FUNC(devfn) << 8)
-		     | (where & ~3));
-	else
-		adr = KSEG1ADDR(PCI_WINDOW1) | (busnum << 16) | (devfn << 8) |
-		      (where & ~3);
-
-	if (access_type == PCI_ACCESS_WRITE)
-		*(u32 *) adr = *val;
-	else
-		*val = *(u32 *) adr;
-
-	/* Check for master or target abort */
-	err = (vrc_pciregs[HI(NILE4_PCICTRL)] >> 5) & 0x7;
-
-	/* Restore PCI Window 1 */
-	vrc_pciregs[LO(NILE4_PCIINIT1)] = mask;
-
-	if (err)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int nile4_pcibios_read(struct pci_bus *bus, unsigned int devfn,
-	int where, int size, u32 *val)
-{
-	u32 data = 0;
-	int err;
-
-	if ((size == 2) && (where & 1))
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-	else if ((size == 4) && (where & 3))
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-
-	err = nile4_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, where,
-					  &data);
-	if (err)
-		return err;
-
-	if (size == 1)
-		*val = (data >> ((where & 3) << 3)) & 0xff;
-	else if (size == 2)
-		*val = (data >> ((where & 3) << 3)) & 0xffff;
-	else
-		*val = data;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int nile4_pcibios_write(struct pci_bus *bus, unsigned int devfn,
-	int where, int size, u32 val)
-{
-	u32 data = 0;
-	int err;
-
-	if ((size == 2) && (where & 1))
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-	else if ((size == 4) && (where & 3))
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-
-	err = nile4_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, where,
-					  &data);
-	if (err)
-		return err;
-
-	if (size == 1)
-		data = (data & ~(0xff << ((where & 3) << 3))) |
-		    (val << ((where & 3) << 3));
-	else if (size == 2)
-		data = (data & ~(0xffff << ((where & 3) << 3))) |
-		    (val << ((where & 3) << 3));
-	else
-		data = val;
-
-	if (nile4_pcibios_config_access
-	    (PCI_ACCESS_WRITE, bus, devfn, where, &data))
-		return -1;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-struct pci_ops nile4_pci_ops = {
-	.read = nile4_pcibios_read,
-	.write = nile4_pcibios_write,
-};
diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c
deleted file mode 100644
index ad5dd711c575..000000000000
--- a/arch/mips/pci/ops-pmcmsp.c
+++ /dev/null
@@ -1,944 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * PMC-Sierra MSP board specific pci_ops
- *
- * Copyright 2001 MontaVista Software Inc.
- * Copyright 2005-2007 PMC-Sierra, Inc
- *
- * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
- *
- * Much of the code is derived from the original DDB5074 port by
- * Geert Uytterhoeven <geert@linux-m68k.org>
- */
-
-#define PCI_COUNTERS	1
-
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-
-#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS)
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#endif /* CONFIG_PROC_FS && PCI_COUNTERS */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-
-#include <asm/byteorder.h>
-#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL)
-#include <asm/mipsmtregs.h>
-#endif
-
-#include <msp_prom.h>
-#include <msp_cic_int.h>
-#include <msp_pci.h>
-#include <msp_regs.h>
-#include <msp_regops.h>
-
-#define PCI_ACCESS_READ		0
-#define PCI_ACCESS_WRITE	1
-
-#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS)
-static char proc_init;
-extern struct proc_dir_entry *proc_bus_pci_dir;
-unsigned int pci_int_count[32];
-
-static void pci_proc_init(void);
-
-/*****************************************************************************
- *
- *  FUNCTION: show_msp_pci_counts
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Prints the count of how many times each PCI
- *		 interrupt has asserted. Can be invoked by the
- *		 /proc filesystem.
- *
- *  INPUTS:	 m	 - synthetic file construction data
- *		 v	 - iterator
- *
- *  RETURNS:	 0 or error
- *
- ****************************************************************************/
-static int show_msp_pci_counts(struct seq_file *m, void *v)
-{
-	int i;
-	unsigned int intcount, total = 0;
-
-	for (i = 0; i < 32; ++i) {
-		intcount = pci_int_count[i];
-		if (intcount != 0) {
-			seq_printf(m, "[%d] = %u\n", i, intcount);
-			total += intcount;
-		}
-	}
-
-	seq_printf(m, "total = %u\n", total);
-	return 0;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: gen_pci_cfg_wr_show
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Generates a configuration write cycle for debug purposes.
- *		 The IDSEL line asserted and location and data written are
- *		 immaterial. Just want to be able to prove that a
- *		 configuration write can be correctly generated on the
- *		 PCI bus.  Intent is that this function by invocable from
- *		 the /proc filesystem.
- *
- *  INPUTS:	 m	 - synthetic file construction data
- *		 v	 - iterator
- *
- *  RETURNS:	 0 or error
- *
- ****************************************************************************/
-static int gen_pci_cfg_wr_show(struct seq_file *m, void *v)
-{
-	unsigned char where = 0; /* Write to static Device/Vendor ID */
-	unsigned char bus_num = 0; /* Bus 0 */
-	unsigned char dev_fn = 0xF; /* Arbitrary device number */
-	u32 wr_data = 0xFF00AA00; /* Arbitrary data */
-	struct msp_pci_regs *preg = (void *)PCI_BASE_REG;
-	unsigned long value;
-	int intr;
-
-	seq_puts(m, "PMC MSP PCI: Beginning\n");
-
-	if (proc_init == 0) {
-		pci_proc_init();
-		proc_init = ~0;
-	}
-
-	seq_puts(m, "PMC MSP PCI: Before Cfg Wr\n");
-
-	/*
-	 * Generate PCI Configuration Write Cycle
-	 */
-
-	/* Clear cause register bits */
-	preg->if_status = ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F);
-
-	/* Setup address that is to appear on PCI bus */
-	preg->config_addr = BPCI_CFGADDR_ENABLE |
-		(bus_num << BPCI_CFGADDR_BUSNUM_SHF) |
-		(dev_fn << BPCI_CFGADDR_FUNCTNUM_SHF) |
-		(where & 0xFC);
-
-	value = cpu_to_le32(wr_data);
-
-	/* Launch the PCI configuration write cycle */
-	*PCI_CONFIG_SPACE_REG = value;
-
-	/*
-	 * Check if the PCI configuration cycle (rd or wr) succeeded, by
-	 * checking the status bits for errors like master or target abort.
-	 */
-	intr = preg->if_status;
-
-	seq_puts(m, "PMC MSP PCI: After Cfg Wr\n");
-	return 0;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: pci_proc_init
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Create entries in the /proc filesystem for debug access.
- *
- *  INPUTS:	 none
- *
- *  OUTPUTS:	 none
- *
- *  RETURNS:	 none
- *
- ****************************************************************************/
-static void pci_proc_init(void)
-{
-	proc_create_single("pmc_msp_pci_rd_cnt", 0, NULL, show_msp_pci_counts);
-	proc_create_single("pmc_msp_pci_cfg_wr", 0, NULL, gen_pci_cfg_wr_show);
-}
-#endif /* CONFIG_PROC_FS && PCI_COUNTERS */
-
-/*****************************************************************************
- *
- *  STRUCT: pci_io_resource
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Defines the address range that pciauto() will use to
- *		 assign to the I/O BARs of PCI devices.
- *
- *		 Use the start and end addresses of the MSP7120 PCI Host
- *		 Controller I/O space, in the form that they appear on the
- *		 PCI bus AFTER MSP7120 has performed address translation.
- *
- *		 For I/O accesses, MSP7120 ignores OATRAN and maps I/O
- *		 accesses into the bottom 0xFFF region of address space,
- *		 so that is the range to put into the pci_io_resource
- *		 struct.
- *
- *		 In MSP4200, the start address was 0x04 instead of the
- *		 expected 0x00. Will just assume there was a good reason
- *		 for this!
- *
- *  NOTES:	 Linux, by default, will assign I/O space to the lowest
- *		 region of address space. Since MSP7120 and Linux,
- *		 by default, have no offset in between how they map, the
- *		 io_offset element of pci_controller struct should be set
- *		 to zero.
- *  ELEMENTS:
- *    name	 - String used for a meaningful name.
- *
- *    start	 - Start address of MSP7120's I/O space, as MSP7120 presents
- *		   the address on the PCI bus.
- *
- *    end	 - End address of MSP7120's I/O space, as MSP7120 presents
- *		   the address on the PCI bus.
- *
- *    flags	 - Attributes indicating the type of resource. In this case,
- *		   indicate I/O space.
- *
- ****************************************************************************/
-static struct resource pci_io_resource = {
-	.name	= "pci IO space",
-	.start	= 0x04,
-	.end	= 0x0FFF,
-	.flags	= IORESOURCE_IO /* I/O space */
-};
-
-/*****************************************************************************
- *
- *  STRUCT: pci_mem_resource
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Defines the address range that pciauto() will use to
- *		 assign to the memory BARs of PCI devices.
- *
- *		 The .start and .end values are dependent upon how address
- *		 translation is performed by the OATRAN regiser.
- *
- *		 The values to use for .start and .end are the values
- *		 in the form they appear on the PCI bus AFTER MSP7120 has
- *		 performed OATRAN address translation.
- *
- *  ELEMENTS:
- *    name	 - String used for a meaningful name.
- *
- *    start	 - Start address of MSP7120's memory space, as MSP7120 presents
- *		   the address on the PCI bus.
- *
- *    end	 - End address of MSP7120's memory space, as MSP7120 presents
- *		   the address on the PCI bus.
- *
- *    flags	 - Attributes indicating the type of resource. In this case,
- *		   indicate memory space.
- *
- ****************************************************************************/
-static struct resource pci_mem_resource = {
-	.name	= "pci memory space",
-	.start	= MSP_PCI_SPACE_BASE,
-	.end	= MSP_PCI_SPACE_END,
-	.flags	= IORESOURCE_MEM	 /* memory space */
-};
-
-/*****************************************************************************
- *
- *  FUNCTION: bpci_interrupt
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: PCI status interrupt handler. Updates the count of how
- *		 many times each status bit has been set, then clears
- *		 the status bits. If the appropriate macros are defined,
- *		 these counts can be viewed via the /proc filesystem.
- *
- *  INPUTS:	 irq	 - unused
- *		 dev_id	 - unused
- *		 pt_regs - unused
- *
- *  OUTPUTS:	 none
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL  - success
- *
- ****************************************************************************/
-static irqreturn_t bpci_interrupt(int irq, void *dev_id)
-{
-	struct msp_pci_regs *preg = (void *)PCI_BASE_REG;
-	unsigned int stat = preg->if_status;
-
-#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS)
-	int i;
-	for (i = 0; i < 32; ++i) {
-		if ((1 << i) & stat)
-			++pci_int_count[i];
-	}
-#endif /* PROC_FS && PCI_COUNTERS */
-
-	/* printk("PCI ISR: Status=%08X\n", stat); */
-
-	/* write to clear all asserted interrupts */
-	preg->if_status = stat;
-
-	return IRQ_HANDLED;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: msp_pcibios_config_access
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Performs a PCI configuration access (rd or wr), then
- *		 checks that the access succeeded by querying MSP7120's
- *		 PCI status bits.
- *
- *  INPUTS:
- *		 access_type  - kind of PCI configuration cycle to perform
- *				(read or write). Legal values are
- *				PCI_ACCESS_WRITE and PCI_ACCESS_READ.
- *
- *		 bus	      - pointer to the bus number of the device to
- *				be targeted for the configuration cycle.
- *				The only element of the pci_bus structure
- *				used is bus->number. This argument determines
- *				if the configuration access will be Type 0 or
- *				Type 1. Since MSP7120 assumes itself to be the
- *				PCI Host, any non-zero bus->number generates
- *				a Type 1 access.
- *
- *		 devfn	      - this is an 8-bit field. The lower three bits
- *				specify the function number of the device to
- *				be targeted for the configuration cycle, with
- *				all three-bit combinations being legal. The
- *				upper five bits specify the device number,
- *				with legal values being 10 to 31.
- *
- *		 where	      - address within the Configuration Header
- *				space to access.
- *
- *		 data	      - for write accesses, contains the data to
- *				write.
- *
- *  OUTPUTS:
- *		 data	      - for read accesses, contains the value read.
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL  - success
- *		 -1		     - access failure
- *
- ****************************************************************************/
-int msp_pcibios_config_access(unsigned char access_type,
-				struct pci_bus *bus,
-				unsigned int devfn,
-				unsigned char where,
-				u32 *data)
-{
-	struct msp_pci_regs *preg = (void *)PCI_BASE_REG;
-	unsigned char bus_num = bus->number;
-	unsigned char dev_fn = (unsigned char)devfn;
-	unsigned long intr;
-	unsigned long value;
-	static char pciirqflag;
-	int ret;
-#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL)
-	unsigned int	vpe_status;
-#endif
-
-#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS)
-	if (proc_init == 0) {
-		pci_proc_init();
-		proc_init = ~0;
-	}
-#endif /* CONFIG_PROC_FS && PCI_COUNTERS */
-
-	/*
-	 * Just the first time this function invokes, allocate
-	 * an interrupt line for PCI host status interrupts. The
-	 * allocation assigns an interrupt handler to the interrupt.
-	 */
-	if (pciirqflag == 0) {
-		ret = request_irq(MSP_INT_PCI,/* Hardcoded internal MSP7120 wiring */
-				bpci_interrupt,
-				IRQF_SHARED,
-				"PMC MSP PCI Host",
-				preg);
-		if (ret != 0)
-			return ret;
-		pciirqflag = ~0;
-	}
-
-#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL)
-	vpe_status = dvpe();
-#endif
-
-	/*
-	 * Clear PCI cause register bits.
-	 *
-	 * In Polo, the PCI Host had a dedicated DMA called the
-	 * Block Copy (not to be confused with the general purpose Block
-	 * Copy Engine block). There appear to have been special interrupts
-	 * for this Block Copy, called Block Copy 0 Fault (BC0F) and
-	 * Block Copy 1 Fault (BC1F). MSP4200 and MSP7120 don't have this
-	 * dedicated Block Copy block, so these two interrupts are now
-	 * marked reserved. In case the	 Block Copy is resurrected in a
-	 * future design, maintain the code that treats these two interrupts
-	 * specially.
-	 *
-	 * Write to clear all interrupts in the PCI status register, aside
-	 * from BC0F and BC1F.
-	 */
-	preg->if_status = ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F);
-
-	/* Setup address that is to appear on PCI bus */
-	preg->config_addr = BPCI_CFGADDR_ENABLE |
-		(bus_num << BPCI_CFGADDR_BUSNUM_SHF) |
-		(dev_fn << BPCI_CFGADDR_FUNCTNUM_SHF) |
-		(where & 0xFC);
-
-	/* IF access is a PCI configuration write */
-	if (access_type == PCI_ACCESS_WRITE) {
-		value = cpu_to_le32(*data);
-		*PCI_CONFIG_SPACE_REG = value;
-	} else {
-		/* ELSE access is a PCI configuration read */
-		value = le32_to_cpu(*PCI_CONFIG_SPACE_REG);
-		*data = value;
-	}
-
-	/*
-	 * Check if the PCI configuration cycle (rd or wr) succeeded, by
-	 * checking the status bits for errors like master or target abort.
-	 */
-	intr = preg->if_status;
-
-	/* Clear config access */
-	preg->config_addr = 0;
-
-	/* IF error occurred */
-	if (intr & ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F)) {
-		/* Clear status bits */
-		preg->if_status = ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F);
-
-#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL)
-		evpe(vpe_status);
-#endif
-
-		return -1;
-	}
-
-#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL)
-	evpe(vpe_status);
-#endif
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: msp_pcibios_read_config_byte
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Read a byte from PCI configuration address spac
- *		 Since the hardware can't address 8 bit chunks
- *		 directly, read a 32-bit chunk, then mask off extraneous
- *		 bits.
- *
- *  INPUTS	 bus	- structure containing attributes for the PCI bus
- *			  that the read is destined for.
- *		 devfn	- device/function combination that the read is
- *			  destined for.
- *		 where	- register within the Configuration Header space
- *			  to access.
- *
- *  OUTPUTS	 val	- read data
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL  - success
- *		 -1		     - read access failure
- *
- ****************************************************************************/
-static int
-msp_pcibios_read_config_byte(struct pci_bus *bus,
-				unsigned int devfn,
-				int where,
-				u32 *val)
-{
-	u32 data = 0;
-
-	/*
-	 * If the config access did not complete normally (e.g., underwent
-	 * master abort) do the PCI compliant thing, which is to supply an
-	 * all ones value.
-	 */
-	if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn,
-					where, &data)) {
-		*val = 0xFFFFFFFF;
-		return -1;
-	}
-
-	*val = (data >> ((where & 3) << 3)) & 0x0ff;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: msp_pcibios_read_config_word
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Read a word (16 bits) from PCI configuration address space.
- *		 Since the hardware can't address 16 bit chunks
- *		 directly, read a 32-bit chunk, then mask off extraneous
- *		 bits.
- *
- *  INPUTS	 bus	- structure containing attributes for the PCI bus
- *			  that the read is destined for.
- *		 devfn	- device/function combination that the read is
- *			  destined for.
- *		 where	- register within the Configuration Header space
- *			  to access.
- *
- *  OUTPUTS	 val	- read data
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL	      - success
- *		 PCIBIOS_BAD_REGISTER_NUMBER  - bad register address
- *		 -1			      - read access failure
- *
- ****************************************************************************/
-static int
-msp_pcibios_read_config_word(struct pci_bus *bus,
-				unsigned int devfn,
-				int where,
-				u32 *val)
-{
-	u32 data = 0;
-
-	/* if (where & 1) */	/* Commented out non-compliant code.
-				 * Should allow word access to configuration
-				 * registers, with only exception being when
-				 * the word access would wrap around into
-				 * the next dword.
-				 */
-	if ((where & 3) == 3) {
-		*val = 0xFFFFFFFF;
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-	}
-
-	/*
-	 * If the config access did not complete normally (e.g., underwent
-	 * master abort) do the PCI compliant thing, which is to supply an
-	 * all ones value.
-	 */
-	if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn,
-					where, &data)) {
-		*val = 0xFFFFFFFF;
-		return -1;
-	}
-
-	*val = (data >> ((where & 3) << 3)) & 0x0ffff;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: msp_pcibios_read_config_dword
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Read a double word (32 bits) from PCI configuration
- *		 address space.
- *
- *  INPUTS	 bus	- structure containing attributes for the PCI bus
- *			  that the read is destined for.
- *		 devfn	- device/function combination that the read is
- *			  destined for.
- *		 where	- register within the Configuration Header space
- *			  to access.
- *
- *  OUTPUTS	 val	- read data
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL	      - success
- *		 PCIBIOS_BAD_REGISTER_NUMBER  - bad register address
- *		 -1			      - read access failure
- *
- ****************************************************************************/
-static int
-msp_pcibios_read_config_dword(struct pci_bus *bus,
-				unsigned int devfn,
-				int where,
-				u32 *val)
-{
-	u32 data = 0;
-
-	/* Address must be dword aligned. */
-	if (where & 3) {
-		*val = 0xFFFFFFFF;
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-	}
-
-	/*
-	 * If the config access did not complete normally (e.g., underwent
-	 * master abort) do the PCI compliant thing, which is to supply an
-	 * all ones value.
-	 */
-	if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn,
-					where, &data)) {
-		*val = 0xFFFFFFFF;
-		return -1;
-	}
-
-	*val = data;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: msp_pcibios_write_config_byte
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Write a byte to PCI configuration address space.
- *		 Since the hardware can't address 8 bit chunks
- *		 directly, a read-modify-write is performed.
- *
- *  INPUTS	 bus	- structure containing attributes for the PCI bus
- *			  that the write is destined for.
- *		 devfn	- device/function combination that the write is
- *			  destined for.
- *		 where	- register within the Configuration Header space
- *			  to access.
- *		 val	- value to write
- *
- *  OUTPUTS	 none
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL  - success
- *		 -1		     - write access failure
- *
- ****************************************************************************/
-static int
-msp_pcibios_write_config_byte(struct pci_bus *bus,
-				unsigned int devfn,
-				int where,
-				u8 val)
-{
-	u32 data = 0;
-
-	/* read config space */
-	if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn,
-					where, &data))
-		return -1;
-
-	/* modify the byte within the dword */
-	data = (data & ~(0xff << ((where & 3) << 3))) |
-			(val << ((where & 3) << 3));
-
-	/* write back the full dword */
-	if (msp_pcibios_config_access(PCI_ACCESS_WRITE, bus, devfn,
-					where, &data))
-		return -1;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: msp_pcibios_write_config_word
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Write a word (16-bits) to PCI configuration address space.
- *		 Since the hardware can't address 16 bit chunks
- *		 directly, a read-modify-write is performed.
- *
- *  INPUTS	 bus	- structure containing attributes for the PCI bus
- *			  that the write is destined for.
- *		 devfn	- device/function combination that the write is
- *			  destined for.
- *		 where	- register within the Configuration Header space
- *			  to access.
- *		 val	- value to write
- *
- *  OUTPUTS	 none
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL	      - success
- *		 PCIBIOS_BAD_REGISTER_NUMBER  - bad register address
- *		 -1			      - write access failure
- *
- ****************************************************************************/
-static int
-msp_pcibios_write_config_word(struct pci_bus *bus,
-				unsigned int devfn,
-				int where,
-				u16 val)
-{
-	u32 data = 0;
-
-	/* Fixed non-compliance: if (where & 1) */
-	if ((where & 3) == 3)
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-
-	/* read config space */
-	if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn,
-					where, &data))
-		return -1;
-
-	/* modify the word within the dword */
-	data = (data & ~(0xffff << ((where & 3) << 3))) |
-			(val << ((where & 3) << 3));
-
-	/* write back the full dword */
-	if (msp_pcibios_config_access(PCI_ACCESS_WRITE, bus, devfn,
-					where, &data))
-		return -1;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: msp_pcibios_write_config_dword
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Write a double word (32-bits) to PCI configuration address
- *		 space.
- *
- *  INPUTS	 bus	- structure containing attributes for the PCI bus
- *			  that the write is destined for.
- *		 devfn	- device/function combination that the write is
- *			  destined for.
- *		 where	- register within the Configuration Header space
- *			  to access.
- *		 val	- value to write
- *
- *  OUTPUTS	 none
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL	      - success
- *		 PCIBIOS_BAD_REGISTER_NUMBER  - bad register address
- *		 -1			      - write access failure
- *
- ****************************************************************************/
-static int
-msp_pcibios_write_config_dword(struct pci_bus *bus,
-				unsigned int devfn,
-				int where,
-				u32 val)
-{
-	/* check that address is dword aligned */
-	if (where & 3)
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-
-	/* perform write */
-	if (msp_pcibios_config_access(PCI_ACCESS_WRITE, bus, devfn,
-					where, &val))
-		return -1;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: msp_pcibios_read_config
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Interface the PCI configuration read request with
- *		 the appropriate function, based on how many bytes
- *		 the read request is.
- *
- *  INPUTS	 bus	- structure containing attributes for the PCI bus
- *			  that the write is destined for.
- *		 devfn	- device/function combination that the write is
- *			  destined for.
- *		 where	- register within the Configuration Header space
- *			  to access.
- *		 size	- in units of bytes, should be 1, 2, or 4.
- *
- *  OUTPUTS	 val	- value read, with any extraneous bytes masked
- *			  to zero.
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL   - success
- *		 -1		      - failure
- *
- ****************************************************************************/
-int
-msp_pcibios_read_config(struct pci_bus *bus,
-			unsigned int	devfn,
-			int where,
-			int size,
-			u32 *val)
-{
-	if (size == 1) {
-		if (msp_pcibios_read_config_byte(bus, devfn, where, val)) {
-			return -1;
-		}
-	} else if (size == 2) {
-		if (msp_pcibios_read_config_word(bus, devfn, where, val)) {
-			return -1;
-		}
-	} else if (size == 4) {
-		if (msp_pcibios_read_config_dword(bus, devfn, where, val)) {
-			return -1;
-		}
-	} else {
-		*val = 0xFFFFFFFF;
-		return -1;
-	}
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-/*****************************************************************************
- *
- *  FUNCTION: msp_pcibios_write_config
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Interface the PCI configuration write request with
- *		 the appropriate function, based on how many bytes
- *		 the read request is.
- *
- *  INPUTS	 bus	- structure containing attributes for the PCI bus
- *			  that the write is destined for.
- *		 devfn	- device/function combination that the write is
- *			  destined for.
- *		 where	- register within the Configuration Header space
- *			  to access.
- *		 size	- in units of bytes, should be 1, 2, or 4.
- *		 val	- value to write
- *
- *  OUTPUTS:	 none
- *
- *  RETURNS:	 PCIBIOS_SUCCESSFUL   - success
- *		 -1		      - failure
- *
- ****************************************************************************/
-int
-msp_pcibios_write_config(struct pci_bus *bus,
-			unsigned int devfn,
-			int where,
-			int size,
-			u32 val)
-{
-	if (size == 1) {
-		if (msp_pcibios_write_config_byte(bus, devfn,
-						where, (u8)(0xFF & val))) {
-			return -1;
-		}
-	} else if (size == 2) {
-		if (msp_pcibios_write_config_word(bus, devfn,
-						where, (u16)(0xFFFF & val))) {
-			return -1;
-		}
-	} else if (size == 4) {
-		if (msp_pcibios_write_config_dword(bus, devfn, where, val)) {
-			return -1;
-		}
-	} else {
-		return -1;
-	}
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-/*****************************************************************************
- *
- *  STRUCTURE: msp_pci_ops
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: structure to abstract the hardware specific PCI
- *		 configuration accesses.
- *
- *  ELEMENTS:
- *    read	- function for Linux to generate PCI Configuration reads.
- *    write	- function for Linux to generate PCI Configuration writes.
- *
- ****************************************************************************/
-struct pci_ops msp_pci_ops = {
-	.read = msp_pcibios_read_config,
-	.write = msp_pcibios_write_config
-};
-
-/*****************************************************************************
- *
- *  STRUCTURE: msp_pci_controller
- *  _________________________________________________________________________
- *
- *  Describes the attributes of the MSP7120 PCI Host Controller
- *
- *  ELEMENTS:
- *    pci_ops	   - abstracts the hardware specific PCI configuration
- *		     accesses.
- *
- *    mem_resource - address range pciauto() uses to assign to PCI device
- *		     memory BARs.
- *
- *    mem_offset   - offset between how MSP7120 outbound PCI memory
- *		     transaction addresses appear on the PCI bus and how Linux
- *		     wants to configure memory BARs of the PCI devices.
- *		     MSP7120 does nothing funky, so just set to zero.
- *
- *    io_resource  - address range pciauto() uses to assign to PCI device
- *		     I/O BARs.
- *
- *    io_offset	   - offset between how MSP7120 outbound PCI I/O
- *		     transaction addresses appear on the PCI bus and how
- *		     Linux defaults to configure I/O BARs of the PCI devices.
- *		     MSP7120 maps outbound I/O accesses into the bottom
- *		     bottom 4K of PCI address space (and ignores OATRAN).
- *		     Since the Linux default is to configure I/O BARs to the
- *		     bottom 4K, no special offset is needed. Just set to zero.
- *
- ****************************************************************************/
-static struct pci_controller msp_pci_controller = {
-	.pci_ops	= &msp_pci_ops,
-	.mem_resource	= &pci_mem_resource,
-	.mem_offset	= 0,
-	.io_map_base	= MSP_PCI_IOSPACE_BASE,
-	.io_resource	= &pci_io_resource,
-	.io_offset	= 0
-};
-
-/*****************************************************************************
- *
- *  FUNCTION: msp_pci_init
- *  _________________________________________________________________________
- *
- *  DESCRIPTION: Initialize the PCI Host Controller and register it with
- *		 Linux so Linux can seize control of the PCI bus.
- *
- ****************************************************************************/
-void __init msp_pci_init(void)
-{
-	struct msp_pci_regs *preg = (void *)PCI_BASE_REG;
-	u32 id;
-
-	/* Extract Device ID */
-	id = read_reg32(PCI_JTAG_DEVID_REG, 0xFFFF) >> 12;
-
-	/* Check if JTAG ID identifies MSP7120 */
-	if (!MSP_HAS_PCI(id)) {
-		printk(KERN_WARNING "PCI: No PCI; id reads as %x\n", id);
-		goto no_pci;
-	}
-
-	/*
-	 * Enable flushing of the PCI-SDRAM queue upon a read
-	 * of the SDRAM's Memory Configuration Register.
-	 */
-	*(unsigned long *)QFLUSH_REG_1 = 3;
-
-	/* Configure PCI Host Controller. */
-	preg->if_status = ~0;		/* Clear cause register bits */
-	preg->config_addr = 0;		/* Clear config access */
-	preg->oatran	= MSP_PCI_OATRAN; /* PCI outbound addr translation */
-	preg->if_mask	= 0xF8BF87C0;	/* Enable all PCI status interrupts */
-
-	/* configure so inb(), outb(), and family are functional */
-	set_io_port_base(MSP_PCI_IOSPACE_BASE);
-
-	/* Tell Linux the details of the MSP7120 PCI Host Controller */
-	register_pci_controller(&msp_pci_controller);
-
-	return;
-
-no_pci:
-	/* Disable PCI channel */
-	printk(KERN_WARNING "PCI: no host PCI bus detected\n");
-}
diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c
index 01a2af8215c8..7285b5667568 100644
--- a/arch/mips/pci/pci-alchemy.c
+++ b/arch/mips/pci/pci-alchemy.c
@@ -52,7 +52,7 @@ struct alchemy_pci_context {
 static struct alchemy_pci_context *__alchemy_pci_ctx;
 
 
-/* IO/MEM resources for PCI. Keep the memres in sync with __fixup_bigphys_addr
+/* IO/MEM resources for PCI. Keep the memres in sync with fixup_bigphys_addr
  * in arch/mips/alchemy/common/setup.c
  */
 static struct resource alchemy_pci_def_memres = {
diff --git a/arch/mips/pci/pci-emma2rh.c b/arch/mips/pci/pci-emma2rh.c
deleted file mode 100644
index 156091a3e341..000000000000
--- a/arch/mips/pci/pci-emma2rh.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (C) NEC Electronics Corporation 2004-2006
- *
- *  This file is based on the arch/mips/ddb5xxx/ddb5477/pci.c
- *
- *	Copyright 2001 MontaVista Software Inc.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-
-#include <asm/bootinfo.h>
-
-#include <asm/emma/emma2rh.h>
-
-static struct resource pci_io_resource = {
-	.name = "pci IO space",
-	.start = EMMA2RH_PCI_IO_BASE,
-	.end = EMMA2RH_PCI_IO_BASE + EMMA2RH_PCI_IO_SIZE - 1,
-	.flags = IORESOURCE_IO,
-};
-
-static struct resource pci_mem_resource = {
-	.name = "pci memory space",
-	.start = EMMA2RH_PCI_MEM_BASE,
-	.end = EMMA2RH_PCI_MEM_BASE + EMMA2RH_PCI_MEM_SIZE - 1,
-	.flags = IORESOURCE_MEM,
-};
-
-extern struct pci_ops emma2rh_pci_ops;
-
-static struct pci_controller emma2rh_pci_controller = {
-	.pci_ops = &emma2rh_pci_ops,
-	.mem_resource = &pci_mem_resource,
-	.io_resource = &pci_io_resource,
-	.mem_offset = -0x04000000,
-	.io_offset = 0,
-};
-
-static void __init emma2rh_pci_init(void)
-{
-	/* setup PCI interface */
-	emma2rh_out32(EMMA2RH_PCI_ARBIT_CTR, 0x70f);
-
-	emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, 0x80000a18);
-	emma2rh_out32(EMMA2RH_PCI_CONFIG_BASE + PCI_COMMAND,
-		      PCI_STATUS_DEVSEL_MEDIUM | PCI_STATUS_CAP_LIST |
-		      PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY);
-	emma2rh_out32(EMMA2RH_PCI_CONFIG_BASE + PCI_BASE_ADDRESS_0, 0x10000000);
-	emma2rh_out32(EMMA2RH_PCI_CONFIG_BASE + PCI_BASE_ADDRESS_1, 0x00000000);
-
-	emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, 0x12000000 | 0x218);
-	emma2rh_out32(EMMA2RH_PCI_IWIN1_CTR, 0x18000000 | 0x600);
-	emma2rh_out32(EMMA2RH_PCI_INIT_ESWP, 0x00000200);
-
-	emma2rh_out32(EMMA2RH_PCI_TWIN_CTR, 0x00009200);
-	emma2rh_out32(EMMA2RH_PCI_TWIN_BADR, 0x00000000);
-	emma2rh_out32(EMMA2RH_PCI_TWIN0_DADR, 0x00000000);
-	emma2rh_out32(EMMA2RH_PCI_TWIN1_DADR, 0x00000000);
-}
-
-static int __init emma2rh_pci_setup(void)
-{
-	emma2rh_pci_init();
-	register_pci_controller(&emma2rh_pci_controller);
-	return 0;
-}
-
-arch_initcall(emma2rh_pci_setup);
diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c
deleted file mode 100644
index 47f4ee6bbb3b..000000000000
--- a/arch/mips/pci/pci-lasat.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2000, 2001, 04 Keith M Wesolowski
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/types.h>
-
-#include <asm/lasat/lasat.h>
-
-#include <irq.h>
-
-extern struct pci_ops nile4_pci_ops;
-extern struct pci_ops gt64xxx_pci0_ops;
-static struct resource lasat_pci_mem_resource = {
-	.name	= "LASAT PCI MEM",
-	.start	= 0x18000000,
-	.end	= 0x19ffffff,
-	.flags	= IORESOURCE_MEM,
-};
-
-static struct resource lasat_pci_io_resource = {
-	.name	= "LASAT PCI IO",
-	.start	= 0x1a000000,
-	.end	= 0x1bffffff,
-	.flags	= IORESOURCE_IO,
-};
-
-static struct pci_controller lasat_pci_controller = {
-	.mem_resource	= &lasat_pci_mem_resource,
-	.io_resource	= &lasat_pci_io_resource,
-};
-
-static int __init lasat_pci_setup(void)
-{
-	printk(KERN_DEBUG "PCI: starting\n");
-
-	if (IS_LASAT_200())
-		lasat_pci_controller.pci_ops = &nile4_pci_ops;
-	else
-		lasat_pci_controller.pci_ops = &gt64xxx_pci0_ops;
-
-	register_pci_controller(&lasat_pci_controller);
-
-	return 0;
-}
-
-arch_initcall(lasat_pci_setup);
-
-#define LASAT_IRQ_ETH1	 (LASAT_IRQ_BASE + 0)
-#define LASAT_IRQ_ETH0	 (LASAT_IRQ_BASE + 1)
-#define LASAT_IRQ_HDC	 (LASAT_IRQ_BASE + 2)
-#define LASAT_IRQ_COMP	 (LASAT_IRQ_BASE + 3)
-#define LASAT_IRQ_HDLC	 (LASAT_IRQ_BASE + 4)
-#define LASAT_IRQ_PCIA	 (LASAT_IRQ_BASE + 5)
-#define LASAT_IRQ_PCIB	 (LASAT_IRQ_BASE + 6)
-#define LASAT_IRQ_PCIC	 (LASAT_IRQ_BASE + 7)
-#define LASAT_IRQ_PCID	 (LASAT_IRQ_BASE + 8)
-
-int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
-	switch (slot) {
-	case 1:
-	case 2:
-	case 3:
-		return LASAT_IRQ_PCIA + (((slot-1) + (pin-1)) % 4);
-	case 4:
-		return LASAT_IRQ_ETH1;	 /* Ethernet 1 (LAN 2) */
-	case 5:
-		return LASAT_IRQ_ETH0;	 /* Ethernet 0 (LAN 1) */
-	case 6:
-		return LASAT_IRQ_HDC;	 /* IDE controller */
-	default:
-		return 0xff;		/* Illegal */
-	}
-
-	return -1;
-}
-
-/* Do platform specific device initialization at pci_enable_device() time */
-int pcibios_plat_dev_init(struct pci_dev *dev)
-{
-	return 0;
-}
diff --git a/arch/mips/pic32/Platform b/arch/mips/pic32/Platform
index cd2084f44507..1e92e52a137b 100644
--- a/arch/mips/pic32/Platform
+++ b/arch/mips/pic32/Platform
@@ -1,7 +1,6 @@
 #
 # PIC32MZDA
 #
-platform-$(CONFIG_PIC32MZDA)	+= pic32/
 cflags-$(CONFIG_PIC32MZDA)	+= -I$(srctree)/arch/mips/include/asm/mach-pic32
 load-$(CONFIG_PIC32MZDA)	+= 0xffffffff88000000
 all-$(CONFIG_PIC32MZDA)		:= $(COMPRESSION_FNAME).bin
diff --git a/arch/mips/pistachio/Platform b/arch/mips/pistachio/Platform
index c3592b374ad2..f73a1a929965 100644
--- a/arch/mips/pistachio/Platform
+++ b/arch/mips/pistachio/Platform
@@ -1,7 +1,6 @@
 #
 # IMG Pistachio SoC
 #
-platform-$(CONFIG_MACH_PISTACHIO)	+= pistachio/
 cflags-$(CONFIG_MACH_PISTACHIO)		+=				\
 		-I$(srctree)/arch/mips/include/asm/mach-pistachio
 load-$(CONFIG_MACH_PISTACHIO)		+= 0xffffffff80400000
diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c
index a09a5da38e6b..558995ed6fe8 100644
--- a/arch/mips/pistachio/init.c
+++ b/arch/mips/pistachio/init.c
@@ -83,12 +83,12 @@ phys_addr_t mips_cdmm_phys_base(void)
 static void __init mips_nmi_setup(void)
 {
 	void *base;
-	extern char except_vec_nmi;
+	extern char except_vec_nmi[];
 
 	base = cpu_has_veic ?
 		(void *)(CAC_BASE + 0xa80) :
 		(void *)(CAC_BASE + 0x380);
-	memcpy(base, &except_vec_nmi, 0x80);
+	memcpy(base, except_vec_nmi, 0x80);
 	flush_icache_range((unsigned long)base,
 			   (unsigned long)base + 0x80);
 }
@@ -96,12 +96,12 @@ static void __init mips_nmi_setup(void)
 static void __init mips_ejtag_setup(void)
 {
 	void *base;
-	extern char except_vec_ejtag_debug;
+	extern char except_vec_ejtag_debug[];
 
 	base = cpu_has_veic ?
 		(void *)(CAC_BASE + 0xa00) :
 		(void *)(CAC_BASE + 0x300);
-	memcpy(base, &except_vec_ejtag_debug, 0x80);
+	memcpy(base, except_vec_ejtag_debug, 0x80);
 	flush_icache_range((unsigned long)base,
 			   (unsigned long)base + 0x80);
 }
diff --git a/arch/mips/pmcs-msp71xx/Kconfig b/arch/mips/pmcs-msp71xx/Kconfig
deleted file mode 100644
index b185b7620c97..000000000000
--- a/arch/mips/pmcs-msp71xx/Kconfig
+++ /dev/null
@@ -1,50 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-choice
-	prompt "PMC-Sierra MSP SOC type"
-	depends on PMC_MSP
-
-config PMC_MSP4200_EVAL
-	bool "PMC-Sierra MSP4200 Eval Board"
-	select IRQ_MSP_SLP
-	select HAVE_PCI
-	select MIPS_L1_CACHE_SHIFT_4
-
-config PMC_MSP4200_GW
-	bool "PMC-Sierra MSP4200 VoIP Gateway"
-	select IRQ_MSP_SLP
-	select HAVE_PCI
-
-config PMC_MSP7120_EVAL
-	bool "PMC-Sierra MSP7120 Eval Board"
-	select SYS_SUPPORTS_MULTITHREADING
-	select IRQ_MSP_CIC
-	select HAVE_PCI
-
-config PMC_MSP7120_GW
-	bool "PMC-Sierra MSP7120 Residential Gateway"
-	select SYS_SUPPORTS_MULTITHREADING
-	select IRQ_MSP_CIC
-	select HAVE_PCI
-	select MSP_HAS_USB
-	select MSP_ETH
-
-config PMC_MSP7120_FPGA
-	bool "PMC-Sierra MSP7120 FPGA"
-	select SYS_SUPPORTS_MULTITHREADING
-	select IRQ_MSP_CIC
-	select HAVE_PCI
-
-endchoice
-
-config MSP_HAS_USB
-	bool
-	depends on PMC_MSP
-
-config MSP_ETH
-	bool
-	select MSP_HAS_MAC
-	depends on PMC_MSP
-
-config MSP_HAS_MAC
-	bool
-	depends on PMC_MSP
diff --git a/arch/mips/pmcs-msp71xx/Makefile b/arch/mips/pmcs-msp71xx/Makefile
deleted file mode 100644
index c040bd6ed62d..000000000000
--- a/arch/mips/pmcs-msp71xx/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the PMC-Sierra MSP SOCs
-#
-obj-y += msp_prom.o msp_setup.o msp_irq.o \
-	 msp_time.o msp_serial.o msp_elb.o
-obj-$(CONFIG_PMC_MSP7120_GW) += msp_hwbutton.o
-obj-$(CONFIG_IRQ_MSP_SLP) += msp_irq_slp.o
-obj-$(CONFIG_IRQ_MSP_CIC) += msp_irq_cic.o msp_irq_per.o
-obj-$(CONFIG_PCI) += msp_pci.o
-obj-$(CONFIG_MSP_HAS_MAC) += msp_eth.o
-obj-$(CONFIG_MSP_HAS_USB) += msp_usb.o
-obj-$(CONFIG_MIPS_MT_SMP) += msp_smp.o
diff --git a/arch/mips/pmcs-msp71xx/Platform b/arch/mips/pmcs-msp71xx/Platform
deleted file mode 100644
index 7af0734a5007..000000000000
--- a/arch/mips/pmcs-msp71xx/Platform
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# PMC-Sierra MSP SOCs
-#
-platform-$(CONFIG_PMC_MSP)	+= pmcs-msp71xx/
-cflags-$(CONFIG_PMC_MSP)	+= -I$(srctree)/arch/mips/include/asm/mach-pmcs-msp71xx \
-					-mno-branch-likely
-load-$(CONFIG_PMC_MSP)		+= 0xffffffff80100000
diff --git a/arch/mips/pmcs-msp71xx/msp_elb.c b/arch/mips/pmcs-msp71xx/msp_elb.c
deleted file mode 100644
index 3e9641007216..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_elb.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Sets up the proper Chip Select configuration registers.  It is assumed that
- * PMON sets up the ADDR and MASK registers properly.
- *
- * Copyright 2005-2006 PMC-Sierra, Inc.
- * Author: Marc St-Jean, Marc_St-Jean@pmc-sierra.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <msp_regs.h>
-
-static int __init msp_elb_setup(void)
-{
-#if defined(CONFIG_PMC_MSP7120_GW) \
- || defined(CONFIG_PMC_MSP7120_EVAL)
-	/*
-	 * Force all CNFG to be identical and equal to CS0,
-	 * according to OPS doc
-	 */
-	*CS1_CNFG_REG = *CS2_CNFG_REG = *CS3_CNFG_REG = *CS0_CNFG_REG;
-#endif
-	return 0;
-}
-
-subsys_initcall(msp_elb_setup);
diff --git a/arch/mips/pmcs-msp71xx/msp_eth.c b/arch/mips/pmcs-msp71xx/msp_eth.c
deleted file mode 100644
index 15679b427f44..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_eth.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * The setup file for ethernet related hardware on PMC-Sierra MSP processors.
- *
- * Copyright 2010 PMC-Sierra, Inc.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/ioport.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-#include <msp_regs.h>
-#include <msp_int.h>
-#include <msp_gpio_macros.h>
-
-
-#define MSP_ETHERNET_GPIO0	14
-#define MSP_ETHERNET_GPIO1	15
-#define MSP_ETHERNET_GPIO2	16
-
-#define MSP_ETH_ID	"pmc_mspeth"
-#define MSP_ETH_SIZE	0xE0
-static struct resource msp_eth0_resources[] = {
-	[0] = {
-		.start	= MSP_MAC0_BASE,
-		.end	= MSP_MAC0_BASE + MSP_ETH_SIZE - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= MSP_INT_MAC0,
-		.end	= MSP_INT_MAC0,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct resource msp_eth1_resources[] = {
-	[0] = {
-		.start	= MSP_MAC1_BASE,
-		.end	= MSP_MAC1_BASE + MSP_ETH_SIZE - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= MSP_INT_MAC1,
-		.end	= MSP_INT_MAC1,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-
-
-static struct platform_device mspeth_device[] = {
-	[0] = {
-		.name	= MSP_ETH_ID,
-		.id	= 0,
-		.num_resources = ARRAY_SIZE(msp_eth0_resources),
-		.resource = msp_eth0_resources,
-	},
-	[1] = {
-		.name	= MSP_ETH_ID,
-		.id	= 1,
-		.num_resources = ARRAY_SIZE(msp_eth1_resources),
-		.resource = msp_eth1_resources,
-	},
-
-};
-#define msp_eth_devs	mspeth_device
-
-int __init msp_eth_setup(void)
-{
-	int i, ret = 0;
-
-	/* Configure the GPIO and take the ethernet PHY out of reset */
-	msp_gpio_pin_mode(MSP_GPIO_OUTPUT, MSP_ETHERNET_GPIO0);
-	msp_gpio_pin_hi(MSP_ETHERNET_GPIO0);
-
-	for (i = 0; i < ARRAY_SIZE(msp_eth_devs); i++) {
-		ret = platform_device_register(&msp_eth_devs[i]);
-		printk(KERN_INFO "device: %d, return value = %d\n", i, ret);
-		if (ret) {
-			platform_device_unregister(&msp_eth_devs[i]);
-			break;
-		}
-	}
-
-	if (ret)
-		printk(KERN_WARNING "Could not initialize "
-						"MSPETH device structures.\n");
-
-	return ret;
-}
-subsys_initcall(msp_eth_setup);
diff --git a/arch/mips/pmcs-msp71xx/msp_hwbutton.c b/arch/mips/pmcs-msp71xx/msp_hwbutton.c
deleted file mode 100644
index bb57ed9ea2bd..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_hwbutton.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Sets up interrupt handlers for various hardware switches which are
- * connected to interrupt lines.
- *
- * Copyright 2005-2207 PMC-Sierra, Inc.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-
-#include <msp_int.h>
-#include <msp_regs.h>
-#include <msp_regops.h>
-
-/* For hwbutton_interrupt->initial_state */
-#define HWBUTTON_HI	0x1
-#define HWBUTTON_LO	0x2
-
-/*
- * This struct describes a hardware button
- */
-struct hwbutton_interrupt {
-	char *name;			/* Name of button */
-	int irq;			/* Actual LINUX IRQ */
-	int eirq;			/* Extended IRQ number (0-7) */
-	int initial_state;		/* The "normal" state of the switch */
-	void (*handle_hi)(void *);	/* Handler: switch input has gone HI */
-	void (*handle_lo)(void *);	/* Handler: switch input has gone LO */
-	void *data;			/* Optional data to pass to handler */
-};
-
-#ifdef CONFIG_PMC_MSP7120_GW
-extern void msp_restart(char *);
-
-static void softreset_push(void *data)
-{
-	printk(KERN_WARNING "SOFTRESET switch was pushed\n");
-
-	/*
-	 * In the future you could move this to the release handler,
-	 * timing the difference between the 'push' and 'release', and only
-	 * doing this ungraceful restart if the button has been down for
-	 * a certain amount of time; otherwise doing a graceful restart.
-	 */
-
-	msp_restart(NULL);
-}
-
-static void softreset_release(void *data)
-{
-	printk(KERN_WARNING "SOFTRESET switch was released\n");
-
-	/* Do nothing */
-}
-
-static void standby_on(void *data)
-{
-	printk(KERN_WARNING "STANDBY switch was set to ON (not implemented)\n");
-
-	/* TODO: Put board in standby mode */
-}
-
-static void standby_off(void *data)
-{
-	printk(KERN_WARNING
-		"STANDBY switch was set to OFF (not implemented)\n");
-
-	/* TODO: Take out of standby mode */
-}
-
-static struct hwbutton_interrupt softreset_sw = {
-	.name = "Softreset button",
-	.irq = MSP_INT_EXT0,
-	.eirq = 0,
-	.initial_state = HWBUTTON_HI,
-	.handle_hi = softreset_release,
-	.handle_lo = softreset_push,
-	.data = NULL,
-};
-
-static struct hwbutton_interrupt standby_sw = {
-	.name = "Standby switch",
-	.irq = MSP_INT_EXT1,
-	.eirq = 1,
-	.initial_state = HWBUTTON_HI,
-	.handle_hi = standby_off,
-	.handle_lo = standby_on,
-	.data = NULL,
-};
-#endif /* CONFIG_PMC_MSP7120_GW */
-
-static irqreturn_t hwbutton_handler(int irq, void *data)
-{
-	struct hwbutton_interrupt *hirq = data;
-	unsigned long cic_ext = *CIC_EXT_CFG_REG;
-
-	if (CIC_EXT_IS_ACTIVE_HI(cic_ext, hirq->eirq)) {
-		/* Interrupt: pin is now HI */
-		CIC_EXT_SET_ACTIVE_LO(cic_ext, hirq->eirq);
-		hirq->handle_hi(hirq->data);
-	} else {
-		/* Interrupt: pin is now LO */
-		CIC_EXT_SET_ACTIVE_HI(cic_ext, hirq->eirq);
-		hirq->handle_lo(hirq->data);
-	}
-
-	/*
-	 * Invert the POLARITY of this level interrupt to ack the interrupt
-	 * Thus next state change will invoke the opposite message
-	 */
-	*CIC_EXT_CFG_REG = cic_ext;
-
-	return IRQ_HANDLED;
-}
-
-static int msp_hwbutton_register(struct hwbutton_interrupt *hirq)
-{
-	unsigned long cic_ext;
-
-	if (hirq->handle_hi == NULL || hirq->handle_lo == NULL)
-		return -EINVAL;
-
-	cic_ext = *CIC_EXT_CFG_REG;
-	CIC_EXT_SET_TRIGGER_LEVEL(cic_ext, hirq->eirq);
-	if (hirq->initial_state == HWBUTTON_HI)
-		CIC_EXT_SET_ACTIVE_LO(cic_ext, hirq->eirq);
-	else
-		CIC_EXT_SET_ACTIVE_HI(cic_ext, hirq->eirq);
-	*CIC_EXT_CFG_REG = cic_ext;
-
-	return request_irq(hirq->irq, hwbutton_handler, 0,
-			   hirq->name, hirq);
-}
-
-static int __init msp_hwbutton_setup(void)
-{
-#ifdef CONFIG_PMC_MSP7120_GW
-	msp_hwbutton_register(&softreset_sw);
-	msp_hwbutton_register(&standby_sw);
-#endif
-	return 0;
-}
-
-subsys_initcall(msp_hwbutton_setup);
diff --git a/arch/mips/pmcs-msp71xx/msp_irq.c b/arch/mips/pmcs-msp71xx/msp_irq.c
deleted file mode 100644
index d525cc931d89..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_irq.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * IRQ vector handles
- *
- * Copyright (C) 1995, 1996, 1997, 2003 by Ralf Baechle
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/ptrace.h>
-#include <linux/time.h>
-
-#include <asm/irq_cpu.h>
-#include <asm/setup.h>
-
-#include <msp_int.h>
-
-/* SLP bases systems */
-extern void msp_slp_irq_init(void);
-extern void msp_slp_irq_dispatch(void);
-
-/* CIC based systems */
-extern void msp_cic_irq_init(void);
-extern void msp_cic_irq_dispatch(void);
-
-/* VSMP support init */
-extern void msp_vsmp_int_init(void);
-
-/* vectored interrupt implementation */
-
-/* SW0/1 interrupts are used for SMP  */
-static inline void mac0_int_dispatch(void) { do_IRQ(MSP_INT_MAC0); }
-static inline void mac1_int_dispatch(void) { do_IRQ(MSP_INT_MAC1); }
-static inline void mac2_int_dispatch(void) { do_IRQ(MSP_INT_SAR); }
-static inline void usb_int_dispatch(void)  { do_IRQ(MSP_INT_USB);  }
-static inline void sec_int_dispatch(void)  { do_IRQ(MSP_INT_SEC);  }
-
-/*
- * The PMC-Sierra MSP interrupts are arranged in a 3 level cascaded
- * hierarchical system.	 The first level are the direct MIPS interrupts
- * and are assigned the interrupt range 0-7.  The second level is the SLM
- * interrupt controller and is assigned the range 8-39.	 The third level
- * comprises the Peripherial block, the PCI block, the PCI MSI block and
- * the SLP.  The PCI interrupts and the SLP errors are handled by the
- * relevant subsystems so the core interrupt code needs only concern
- * itself with the Peripheral block.  These are assigned interrupts in
- * the range 40-71.
- */
-
-asmlinkage void plat_irq_dispatch(void)
-{
-	u32 pending;
-
-	pending = read_c0_status() & read_c0_cause();
-
-	/*
-	 * jump to the correct interrupt routine
-	 * These are arranged in priority order and the timer
-	 * comes first!
-	 */
-
-#ifdef CONFIG_IRQ_MSP_CIC	/* break out the CIC stuff for now */
-	if (pending & C_IRQ4)	/* do the peripherals first, that's the timer */
-		msp_cic_irq_dispatch();
-
-	else if (pending & C_IRQ0)
-		do_IRQ(MSP_INT_MAC0);
-
-	else if (pending & C_IRQ1)
-		do_IRQ(MSP_INT_MAC1);
-
-	else if (pending & C_IRQ2)
-		do_IRQ(MSP_INT_USB);
-
-	else if (pending & C_IRQ3)
-		do_IRQ(MSP_INT_SAR);
-
-	else if (pending & C_IRQ5)
-		do_IRQ(MSP_INT_SEC);
-
-#else
-	if (pending & C_IRQ5)
-		do_IRQ(MSP_INT_TIMER);
-
-	else if (pending & C_IRQ0)
-		do_IRQ(MSP_INT_MAC0);
-
-	else if (pending & C_IRQ1)
-		do_IRQ(MSP_INT_MAC1);
-
-	else if (pending & C_IRQ3)
-		do_IRQ(MSP_INT_VE);
-
-	else if (pending & C_IRQ4)
-		msp_slp_irq_dispatch();
-#endif
-
-	else if (pending & C_SW0)	/* do software after hardware */
-		do_IRQ(MSP_INT_SW0);
-
-	else if (pending & C_SW1)
-		do_IRQ(MSP_INT_SW1);
-}
-
-void __init arch_init_irq(void)
-{
-	/* assume we'll be using vectored interrupt mode except in UP mode*/
-#ifdef CONFIG_MIPS_MT
-	BUG_ON(!cpu_has_vint);
-#endif
-	/* initialize the 1st-level CPU based interrupt controller */
-	mips_cpu_irq_init();
-
-#ifdef CONFIG_IRQ_MSP_CIC
-	msp_cic_irq_init();
-#ifdef CONFIG_MIPS_MT
-	set_vi_handler(MSP_INT_CIC, msp_cic_irq_dispatch);
-	set_vi_handler(MSP_INT_MAC0, mac0_int_dispatch);
-	set_vi_handler(MSP_INT_MAC1, mac1_int_dispatch);
-	set_vi_handler(MSP_INT_SAR, mac2_int_dispatch);
-	set_vi_handler(MSP_INT_USB, usb_int_dispatch);
-	set_vi_handler(MSP_INT_SEC, sec_int_dispatch);
-#ifdef CONFIG_MIPS_MT_SMP
-	msp_vsmp_int_init();
-#endif	/* CONFIG_MIPS_MT_SMP */
-#endif	/* CONFIG_MIPS_MT */
-	/* setup the cascaded interrupts */
-	if (request_irq(MSP_INT_CIC, no_action, IRQF_NO_THREAD,
-			"MSP CIC cascade", NULL))
-		pr_err("Failed to register MSP CIC cascade interrupt\n");
-	if (request_irq(MSP_INT_PER, no_action, IRQF_NO_THREAD,
-			"MSP PER cascade", NULL))
-		pr_err("Failed to register MSP PER cascade interrupt\n");
-
-#else
-	/*
-	 * Setup the 2nd-level SLP register based interrupt controller.
-	 * VSMP support support is not enabled for SLP.
-	 */
-	msp_slp_irq_init();
-
-	/* setup the cascaded SLP/PER interrupts */
-	if (request_irq(MSP_INT_SLP, no_action, IRQF_NO_THREAD,
-			"MSP CIC cascade", NULL))
-		pr_err("Failed to register MSP CIC cascade interrupt\n");
-	if (request_irq(MSP_INT_PER, no_action, IRQF_NO_THREAD,
-			"MSP PER cascade", NULL))
-		pr_err("Failed to register MSP PER cascade interrupt\n");
-#endif
-}
diff --git a/arch/mips/pmcs-msp71xx/msp_irq_cic.c b/arch/mips/pmcs-msp71xx/msp_irq_cic.c
deleted file mode 100644
index 0706010cc99f..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_irq_cic.c
+++ /dev/null
@@ -1,208 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright 2010 PMC-Sierra, Inc, derived from irq_cpu.c
- *
- * This file define the irq handler for MSP CIC subsystem interrupts.
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/bitops.h>
-#include <linux/irq.h>
-
-#include <asm/mipsregs.h>
-
-#include <msp_cic_int.h>
-#include <msp_regs.h>
-
-/*
- * External API
- */
-extern void msp_per_irq_init(void);
-extern void msp_per_irq_dispatch(void);
-
-
-/*
- * Convenience Macro.  Should be somewhere generic.
- */
-#define get_current_vpe()   \
-	((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE)
-
-#ifdef CONFIG_SMP
-
-#define LOCK_VPE(flags, mtflags) \
-do {				\
-	local_irq_save(flags);	\
-	mtflags = dmt();	\
-} while (0)
-
-#define UNLOCK_VPE(flags, mtflags) \
-do {				\
-	emt(mtflags);		\
-	local_irq_restore(flags);\
-} while (0)
-
-#define LOCK_CORE(flags, mtflags) \
-do {				\
-	local_irq_save(flags);	\
-	mtflags = dvpe();	\
-} while (0)
-
-#define UNLOCK_CORE(flags, mtflags)		\
-do {				\
-	evpe(mtflags);		\
-	local_irq_restore(flags);\
-} while (0)
-
-#else
-
-#define LOCK_VPE(flags, mtflags)
-#define UNLOCK_VPE(flags, mtflags)
-#endif
-
-/* ensure writes to cic are completed */
-static inline void cic_wmb(void)
-{
-	const volatile void __iomem *cic_mem = CIC_VPE0_MSK_REG;
-	volatile u32 dummy_read;
-
-	wmb();
-	dummy_read = __raw_readl(cic_mem);
-	dummy_read++;
-}
-
-static void unmask_cic_irq(struct irq_data *d)
-{
-	volatile u32   *cic_msk_reg = CIC_VPE0_MSK_REG;
-	int vpe;
-#ifdef CONFIG_SMP
-	unsigned int mtflags;
-	unsigned long  flags;
-
-	/*
-	* Make sure we have IRQ affinity.  It may have changed while
-	* we were processing the IRQ.
-	*/
-	if (!cpumask_test_cpu(smp_processor_id(),
-			      irq_data_get_affinity_mask(d)))
-		return;
-#endif
-
-	vpe = get_current_vpe();
-	LOCK_VPE(flags, mtflags);
-	cic_msk_reg[vpe] |= (1 << (d->irq - MSP_CIC_INTBASE));
-	UNLOCK_VPE(flags, mtflags);
-	cic_wmb();
-}
-
-static void mask_cic_irq(struct irq_data *d)
-{
-	volatile u32 *cic_msk_reg = CIC_VPE0_MSK_REG;
-	int	vpe = get_current_vpe();
-#ifdef CONFIG_SMP
-	unsigned long flags, mtflags;
-#endif
-	LOCK_VPE(flags, mtflags);
-	cic_msk_reg[vpe] &= ~(1 << (d->irq - MSP_CIC_INTBASE));
-	UNLOCK_VPE(flags, mtflags);
-	cic_wmb();
-}
-static void msp_cic_irq_ack(struct irq_data *d)
-{
-	mask_cic_irq(d);
-	/*
-	* Only really necessary for 18, 16-14 and sometimes 3:0
-	* (since these can be edge sensitive) but it doesn't
-	* hurt for the others
-	*/
-	*CIC_STS_REG = (1 << (d->irq - MSP_CIC_INTBASE));
-}
-
-/* Note: Limiting to VSMP.  */
-
-#ifdef CONFIG_MIPS_MT_SMP
-static int msp_cic_irq_set_affinity(struct irq_data *d,
-				    const struct cpumask *cpumask, bool force)
-{
-	int cpu;
-	unsigned long flags;
-	unsigned int  mtflags;
-	unsigned long imask = (1 << (d->irq - MSP_CIC_INTBASE));
-	volatile u32 *cic_mask = (volatile u32 *)CIC_VPE0_MSK_REG;
-
-	/* timer balancing should be disabled in kernel code */
-	BUG_ON(d->irq == MSP_INT_VPE0_TIMER || d->irq == MSP_INT_VPE1_TIMER);
-
-	LOCK_CORE(flags, mtflags);
-	/* enable if any of each VPE's TCs require this IRQ */
-	for_each_online_cpu(cpu) {
-		if (cpumask_test_cpu(cpu, cpumask))
-			cic_mask[cpu] |= imask;
-		else
-			cic_mask[cpu] &= ~imask;
-
-	}
-
-	UNLOCK_CORE(flags, mtflags);
-	return 0;
-
-}
-#endif
-
-static struct irq_chip msp_cic_irq_controller = {
-	.name = "MSP_CIC",
-	.irq_mask = mask_cic_irq,
-	.irq_mask_ack = msp_cic_irq_ack,
-	.irq_unmask = unmask_cic_irq,
-	.irq_ack = msp_cic_irq_ack,
-#ifdef CONFIG_MIPS_MT_SMP
-	.irq_set_affinity = msp_cic_irq_set_affinity,
-#endif
-};
-
-void __init msp_cic_irq_init(void)
-{
-	int i;
-	/* Mask/clear interrupts. */
-	*CIC_VPE0_MSK_REG = 0x00000000;
-	*CIC_VPE1_MSK_REG = 0x00000000;
-	*CIC_STS_REG	  = 0xFFFFFFFF;
-	/*
-	* The MSP7120 RG and EVBD boards use IRQ[6:4] for PCI.
-	* These inputs map to EXT_INT_POL[6:4] inside the CIC.
-	* They are to be active low, level sensitive.
-	*/
-	*CIC_EXT_CFG_REG &= 0xFFFF8F8F;
-
-	/* initialize all the IRQ descriptors */
-	for (i = MSP_CIC_INTBASE ; i < MSP_CIC_INTBASE + 32 ; i++) {
-		irq_set_chip_and_handler(i, &msp_cic_irq_controller,
-					 handle_level_irq);
-	}
-
-	/* Initialize the PER interrupt sub-system */
-	 msp_per_irq_init();
-}
-
-/* CIC masked by CIC vector processing before dispatch called */
-void msp_cic_irq_dispatch(void)
-{
-	volatile u32	*cic_msk_reg = (volatile u32 *)CIC_VPE0_MSK_REG;
-	u32	cic_mask;
-	u32	 pending;
-	int	cic_status = *CIC_STS_REG;
-	cic_mask = cic_msk_reg[get_current_vpe()];
-	pending = cic_status & cic_mask;
-	if (pending & (1 << (MSP_INT_VPE0_TIMER - MSP_CIC_INTBASE))) {
-		do_IRQ(MSP_INT_VPE0_TIMER);
-	} else if (pending & (1 << (MSP_INT_VPE1_TIMER - MSP_CIC_INTBASE))) {
-		do_IRQ(MSP_INT_VPE1_TIMER);
-	} else if (pending & (1 << (MSP_INT_PER - MSP_CIC_INTBASE))) {
-		msp_per_irq_dispatch();
-	} else if (pending) {
-		do_IRQ(ffs(pending) + MSP_CIC_INTBASE - 1);
-	} else{
-		spurious_interrupt();
-	}
-}
diff --git a/arch/mips/pmcs-msp71xx/msp_irq_per.c b/arch/mips/pmcs-msp71xx/msp_irq_per.c
deleted file mode 100644
index b284412b2923..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_irq_per.c
+++ /dev/null
@@ -1,127 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright 2010 PMC-Sierra, Inc, derived from irq_cpu.c
- *
- * This file define the irq handler for MSP PER subsystem interrupts.
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/bitops.h>
-
-#include <asm/mipsregs.h>
-
-#include <msp_cic_int.h>
-#include <msp_regs.h>
-
-
-/*
- * Convenience Macro.  Should be somewhere generic.
- */
-#define get_current_vpe()	\
-	((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE)
-
-#ifdef CONFIG_SMP
-/*
- * The PER registers must be protected from concurrent access.
- */
-
-static DEFINE_SPINLOCK(per_lock);
-#endif
-
-/* ensure writes to per are completed */
-
-static inline void per_wmb(void)
-{
-	const volatile void __iomem *per_mem = PER_INT_MSK_REG;
-	volatile u32 dummy_read;
-
-	wmb();
-	dummy_read = __raw_readl(per_mem);
-	dummy_read++;
-}
-
-static inline void unmask_per_irq(struct irq_data *d)
-{
-#ifdef CONFIG_SMP
-	unsigned long flags;
-	spin_lock_irqsave(&per_lock, flags);
-	*PER_INT_MSK_REG |= (1 << (d->irq - MSP_PER_INTBASE));
-	spin_unlock_irqrestore(&per_lock, flags);
-#else
-	*PER_INT_MSK_REG |= (1 << (d->irq - MSP_PER_INTBASE));
-#endif
-	per_wmb();
-}
-
-static inline void mask_per_irq(struct irq_data *d)
-{
-#ifdef CONFIG_SMP
-	unsigned long flags;
-	spin_lock_irqsave(&per_lock, flags);
-	*PER_INT_MSK_REG &= ~(1 << (d->irq - MSP_PER_INTBASE));
-	spin_unlock_irqrestore(&per_lock, flags);
-#else
-	*PER_INT_MSK_REG &= ~(1 << (d->irq - MSP_PER_INTBASE));
-#endif
-	per_wmb();
-}
-
-static inline void msp_per_irq_ack(struct irq_data *d)
-{
-	mask_per_irq(d);
-	/*
-	 * In the PER interrupt controller, only bits 11 and 10
-	 * are write-to-clear, (SPI TX complete, SPI RX complete).
-	 * It does nothing for any others.
-	 */
-	*PER_INT_STS_REG = (1 << (d->irq - MSP_PER_INTBASE));
-}
-
-#ifdef CONFIG_SMP
-static int msp_per_irq_set_affinity(struct irq_data *d,
-				    const struct cpumask *affinity, bool force)
-{
-	/* WTF is this doing ????? */
-	unmask_per_irq(d);
-	return 0;
-}
-#endif
-
-static struct irq_chip msp_per_irq_controller = {
-	.name = "MSP_PER",
-	.irq_enable = unmask_per_irq,
-	.irq_disable = mask_per_irq,
-	.irq_ack = msp_per_irq_ack,
-#ifdef CONFIG_SMP
-	.irq_set_affinity = msp_per_irq_set_affinity,
-#endif
-};
-
-void __init msp_per_irq_init(void)
-{
-	int i;
-	/* Mask/clear interrupts. */
-	*PER_INT_MSK_REG  = 0x00000000;
-	*PER_INT_STS_REG  = 0xFFFFFFFF;
-	/* initialize all the IRQ descriptors */
-	for (i = MSP_PER_INTBASE; i < MSP_PER_INTBASE + 32; i++) {
-		irq_set_chip(i, &msp_per_irq_controller);
-	}
-}
-
-void msp_per_irq_dispatch(void)
-{
-	u32	per_mask = *PER_INT_MSK_REG;
-	u32	per_status = *PER_INT_STS_REG;
-	u32	pending;
-
-	pending = per_status & per_mask;
-	if (pending) {
-		do_IRQ(ffs(pending) + MSP_PER_INTBASE - 1);
-	} else {
-		spurious_interrupt();
-	}
-}
diff --git a/arch/mips/pmcs-msp71xx/msp_irq_slp.c b/arch/mips/pmcs-msp71xx/msp_irq_slp.c
deleted file mode 100644
index 097a5fd3b06b..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_irq_slp.c
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * This file define the irq handler for MSP SLM subsystem interrupts.
- *
- * Copyright 2005-2006 PMC-Sierra, Inc, derived from irq_cpu.c
- * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/bitops.h>
-
-#include <asm/mipsregs.h>
-
-#include <msp_slp_int.h>
-#include <msp_regs.h>
-
-static inline void unmask_msp_slp_irq(struct irq_data *d)
-{
-	unsigned int irq = d->irq;
-
-	/* check for PER interrupt range */
-	if (irq < MSP_PER_INTBASE)
-		*SLP_INT_MSK_REG |= (1 << (irq - MSP_SLP_INTBASE));
-	else
-		*PER_INT_MSK_REG |= (1 << (irq - MSP_PER_INTBASE));
-}
-
-static inline void mask_msp_slp_irq(struct irq_data *d)
-{
-	unsigned int irq = d->irq;
-
-	/* check for PER interrupt range */
-	if (irq < MSP_PER_INTBASE)
-		*SLP_INT_MSK_REG &= ~(1 << (irq - MSP_SLP_INTBASE));
-	else
-		*PER_INT_MSK_REG &= ~(1 << (irq - MSP_PER_INTBASE));
-}
-
-/*
- * While we ack the interrupt interrupts are disabled and thus we don't need
- * to deal with concurrency issues.  Same for msp_slp_irq_end.
- */
-static inline void ack_msp_slp_irq(struct irq_data *d)
-{
-	unsigned int irq = d->irq;
-
-	/* check for PER interrupt range */
-	if (irq < MSP_PER_INTBASE)
-		*SLP_INT_STS_REG = (1 << (irq - MSP_SLP_INTBASE));
-	else
-		*PER_INT_STS_REG = (1 << (irq - MSP_PER_INTBASE));
-}
-
-static struct irq_chip msp_slp_irq_controller = {
-	.name = "MSP_SLP",
-	.irq_ack = ack_msp_slp_irq,
-	.irq_mask = mask_msp_slp_irq,
-	.irq_unmask = unmask_msp_slp_irq,
-};
-
-void __init msp_slp_irq_init(void)
-{
-	int i;
-
-	/* Mask/clear interrupts. */
-	*SLP_INT_MSK_REG = 0x00000000;
-	*PER_INT_MSK_REG = 0x00000000;
-	*SLP_INT_STS_REG = 0xFFFFFFFF;
-	*PER_INT_STS_REG = 0xFFFFFFFF;
-
-	/* initialize all the IRQ descriptors */
-	for (i = MSP_SLP_INTBASE; i < MSP_PER_INTBASE + 32; i++)
-		irq_set_chip_and_handler(i, &msp_slp_irq_controller,
-					 handle_level_irq);
-}
-
-void msp_slp_irq_dispatch(void)
-{
-	u32 pending;
-	int intbase;
-
-	intbase = MSP_SLP_INTBASE;
-	pending = *SLP_INT_STS_REG & *SLP_INT_MSK_REG;
-
-	/* check for PER interrupt */
-	if (pending == (1 << (MSP_INT_PER - MSP_SLP_INTBASE))) {
-		intbase = MSP_PER_INTBASE;
-		pending = *PER_INT_STS_REG & *PER_INT_MSK_REG;
-	}
-
-	/* check for spurious interrupt */
-	if (pending == 0x00000000) {
-		printk(KERN_ERR "Spurious %s interrupt?\n",
-			(intbase == MSP_SLP_INTBASE) ? "SLP" : "PER");
-		return;
-	}
-
-	/* dispatch the irq */
-	do_IRQ(ffs(pending) + intbase - 1);
-}
diff --git a/arch/mips/pmcs-msp71xx/msp_pci.c b/arch/mips/pmcs-msp71xx/msp_pci.c
deleted file mode 100644
index 428dea23c35c..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_pci.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * The setup file for PCI related hardware on PMC-Sierra MSP processors.
- *
- * Copyright 2005-2006 PMC-Sierra, Inc.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/init.h>
-
-#include <msp_prom.h>
-#include <msp_regs.h>
-
-extern void msp_pci_init(void);
-
-static int __init msp_pci_setup(void)
-{
-#if 0 /* Linux 2.6 initialization code to be completed */
-	if (getdeviceid() & DEV_ID_SINGLE_PC) {
-		/* If single card mode */
-		slmRegs *sreg = (slmRegs *) SREG_BASE;
-
-		sreg->single_pc_enable = SINGLE_PCCARD;
-	}
-#endif
-
-	msp_pci_init();
-
-	return 0;
-}
-
-subsys_initcall(msp_pci_setup);
diff --git a/arch/mips/pmcs-msp71xx/msp_prom.c b/arch/mips/pmcs-msp71xx/msp_prom.c
deleted file mode 100644
index 800a21b8b8b0..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_prom.c
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- * BRIEF MODULE DESCRIPTION
- *    PROM library initialisation code, assuming a version of
- *    pmon is the boot code.
- *
- * Copyright 2000,2001 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- * This file was derived from Carsten Langgaard's
- * arch/mips/mips-boards/xx files.
- *
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
-#include <asm-generic/sections.h>
-#include <asm/page.h>
-
-#include <msp_prom.h>
-#include <msp_regs.h>
-
-/* global PROM environment variables and pointers */
-int prom_argc;
-char **prom_argv, **prom_envp;
-int *prom_vec;
-
-/* debug flag */
-int init_debug = 1;
-
-/* memory blocks */
-struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS];
-
-#define MAX_PROM_MEM 5
-static phys_addr_t prom_mem_base[MAX_PROM_MEM] __initdata;
-static phys_addr_t prom_mem_size[MAX_PROM_MEM] __initdata;
-static unsigned int nr_prom_mem __initdata;
-
-/* default feature sets */
-static char msp_default_features[] =
-#if defined(CONFIG_PMC_MSP4200_EVAL) \
- || defined(CONFIG_PMC_MSP4200_GW)
-	"ERER";
-#elif defined(CONFIG_PMC_MSP7120_EVAL) \
- || defined(CONFIG_PMC_MSP7120_GW)
-	"EMEMSP";
-#elif defined(CONFIG_PMC_MSP7120_FPGA)
-	"EMEM";
-#endif
-
-/* conversion functions */
-static inline unsigned char str2hexnum(unsigned char c)
-{
-	if (c >= '0' && c <= '9')
-		return c - '0';
-	if (c >= 'a' && c <= 'f')
-		return c - 'a' + 10;
-	return 0; /* foo */
-}
-
-int str2eaddr(unsigned char *ea, unsigned char *str)
-{
-	int index = 0;
-	unsigned char num = 0;
-
-	while (*str != '\0') {
-		if ((*str == '.') || (*str == ':')) {
-			ea[index++] = num;
-			num = 0;
-			str++;
-		} else {
-			num = num << 4;
-			num |= str2hexnum(*str++);
-		}
-	}
-
-	if (index == 5) {
-		ea[index++] = num;
-		return 0;
-	} else
-		return -1;
-}
-EXPORT_SYMBOL(str2eaddr);
-
-static inline unsigned long str2hex(unsigned char *str)
-{
-	int value = 0;
-
-	while (*str) {
-		value = value << 4;
-		value |= str2hexnum(*str++);
-	}
-
-	return value;
-}
-
-/* function to query the system information */
-const char *get_system_type(void)
-{
-#if defined(CONFIG_PMC_MSP4200_EVAL)
-	return "PMC-Sierra MSP4200 Eval Board";
-#elif defined(CONFIG_PMC_MSP4200_GW)
-	return "PMC-Sierra MSP4200 VoIP Gateway";
-#elif defined(CONFIG_PMC_MSP7120_EVAL)
-	return "PMC-Sierra MSP7120 Eval Board";
-#elif defined(CONFIG_PMC_MSP7120_GW)
-	return "PMC-Sierra MSP7120 Residential Gateway";
-#elif defined(CONFIG_PMC_MSP7120_FPGA)
-	return "PMC-Sierra MSP7120 FPGA";
-#else
-	#error "What is the type of *your* MSP?"
-#endif
-}
-
-int get_ethernet_addr(char *ethaddr_name, char *ethernet_addr)
-{
-	char *ethaddr_str;
-
-	ethaddr_str = prom_getenv(ethaddr_name);
-	if (!ethaddr_str) {
-		printk(KERN_WARNING "%s not set in boot prom\n", ethaddr_name);
-		return -1;
-	}
-
-	if (str2eaddr(ethernet_addr, ethaddr_str) == -1) {
-		printk(KERN_WARNING "%s badly formatted-<%s>\n",
-			ethaddr_name, ethaddr_str);
-		return -1;
-	}
-
-	if (init_debug > 1) {
-		int i;
-		printk(KERN_DEBUG "get_ethernet_addr: for %s ", ethaddr_name);
-		for (i = 0; i < 5; i++)
-			printk(KERN_DEBUG "%02x:",
-				(unsigned char)*(ethernet_addr+i));
-		printk(KERN_DEBUG "%02x\n", *(ethernet_addr+i));
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(get_ethernet_addr);
-
-static char *get_features(void)
-{
-	char *feature = prom_getenv(FEATURES);
-
-	if (feature == NULL) {
-		/* default features based on MACHINE_TYPE */
-		feature = msp_default_features;
-	}
-
-	return feature;
-}
-
-static char test_feature(char c)
-{
-	char *feature = get_features();
-
-	while (*feature) {
-		if (*feature++ == c)
-			return *feature;
-		feature++;
-	}
-
-	return FEATURE_NOEXIST;
-}
-
-unsigned long get_deviceid(void)
-{
-	char *deviceid = prom_getenv(DEVICEID);
-
-	if (deviceid == NULL)
-		return *DEV_ID_REG;
-	else
-		return str2hex(deviceid);
-}
-
-char identify_pci(void)
-{
-	return test_feature(PCI_KEY);
-}
-EXPORT_SYMBOL(identify_pci);
-
-char identify_pcimux(void)
-{
-	return test_feature(PCIMUX_KEY);
-}
-
-char identify_sec(void)
-{
-	return test_feature(SEC_KEY);
-}
-EXPORT_SYMBOL(identify_sec);
-
-char identify_spad(void)
-{
-	return test_feature(SPAD_KEY);
-}
-EXPORT_SYMBOL(identify_spad);
-
-char identify_tdm(void)
-{
-	return test_feature(TDM_KEY);
-}
-EXPORT_SYMBOL(identify_tdm);
-
-char identify_zsp(void)
-{
-	return test_feature(ZSP_KEY);
-}
-EXPORT_SYMBOL(identify_zsp);
-
-static char identify_enetfeature(char key, unsigned long interface_num)
-{
-	char *feature = get_features();
-
-	while (*feature) {
-		if (*feature++ == key && interface_num-- == 0)
-			return *feature;
-		feature++;
-	}
-
-	return FEATURE_NOEXIST;
-}
-
-char identify_enet(unsigned long interface_num)
-{
-	return identify_enetfeature(ENET_KEY, interface_num);
-}
-EXPORT_SYMBOL(identify_enet);
-
-char identify_enetTxD(unsigned long interface_num)
-{
-	return identify_enetfeature(ENETTXD_KEY, interface_num);
-}
-EXPORT_SYMBOL(identify_enetTxD);
-
-unsigned long identify_family(void)
-{
-	unsigned long deviceid;
-
-	deviceid = get_deviceid();
-
-	return deviceid & CPU_DEVID_FAMILY;
-}
-EXPORT_SYMBOL(identify_family);
-
-unsigned long identify_revision(void)
-{
-	unsigned long deviceid;
-
-	deviceid = get_deviceid();
-
-	return deviceid & CPU_DEVID_REVISION;
-}
-EXPORT_SYMBOL(identify_revision);
-
-/* PROM environment functions */
-char *prom_getenv(char *env_name)
-{
-	/*
-	 * Return a pointer to the given environment variable.	prom_envp
-	 * points to a null terminated array of pointers to variables.
-	 * Environment variables are stored in the form of "memsize=64"
-	 */
-
-	char **var = prom_envp;
-	int i = strlen(env_name);
-
-	while (*var) {
-		if (strncmp(env_name, *var, i) == 0) {
-			return *var + strlen(env_name) + 1;
-		}
-		var++;
-	}
-
-	return NULL;
-}
-
-/* PROM commandline functions */
-void  __init prom_init_cmdline(void)
-{
-	char *cp;
-	int actr;
-
-	actr = 1; /* Always ignore argv[0] */
-
-	cp = &(arcs_cmdline[0]);
-	while (actr < prom_argc) {
-		strcpy(cp, prom_argv[actr]);
-		cp += strlen(prom_argv[actr]);
-		*cp++ = ' ';
-		actr++;
-	}
-	if (cp != &(arcs_cmdline[0])) /* get rid of trailing space */
-		--cp;
-	*cp = '\0';
-}
-
-/* memory allocation functions */
-static int __init prom_memtype_classify(unsigned int type)
-{
-	switch (type) {
-	case yamon_free:
-		return BOOT_MEM_RAM;
-	case yamon_prom:
-		return BOOT_MEM_ROM_DATA;
-	default:
-		return BOOT_MEM_RESERVED;
-	}
-}
-
-void __init prom_meminit(void)
-{
-	struct prom_pmemblock *p;
-
-	p = prom_getmdesc();
-
-	while (p->size) {
-		long type;
-		unsigned long base, size;
-
-		type = prom_memtype_classify(p->type);
-		base = p->base;
-		size = p->size;
-
-		add_memory_region(base, size, type);
-		p++;
-
-		if (type == BOOT_MEM_ROM_DATA) {
-			if (nr_prom_mem >= MAX_PROM_MEM) {
-				pr_err("Too many ROM DATA regions");
-				continue;
-			}
-			prom_mem_base[nr_prom_mem] = base;
-			prom_mem_size[nr_prom_mem] = size;
-			nr_prom_mem++;
-		}
-	}
-}
-
-void __init prom_free_prom_memory(void)
-{
-	int	argc;
-	char	**argv;
-	char	**envp;
-	char	*ptr;
-	int	len = 0;
-	int	i;
-
-	/*
-	 * preserve environment variables and command line from pmon/bbload
-	 * first preserve the command line
-	 */
-	for (argc = 0; argc < prom_argc; argc++) {
-		len += sizeof(char *);			/* length of pointer */
-		len += strlen(prom_argv[argc]) + 1;	/* length of string */
-	}
-	len += sizeof(char *);		/* plus length of null pointer */
-
-	argv = kmalloc(len, GFP_KERNEL);
-	ptr = (char *) &argv[prom_argc + 1];	/* strings follow array */
-
-	for (argc = 0; argc < prom_argc; argc++) {
-		argv[argc] = ptr;
-		strcpy(ptr, prom_argv[argc]);
-		ptr += strlen(prom_argv[argc]) + 1;
-	}
-	argv[prom_argc] = NULL;		/* end array with null pointer */
-	prom_argv = argv;
-
-	/* next preserve the environment variables */
-	len = 0;
-	i = 0;
-	for (envp = prom_envp; *envp != NULL; envp++) {
-		i++;		/* count number of environment variables */
-		len += sizeof(char *);		/* length of pointer */
-		len += strlen(*envp) + 1;	/* length of string */
-	}
-	len += sizeof(char *);		/* plus length of null pointer */
-
-	envp = kmalloc(len, GFP_KERNEL);
-	ptr = (char *) &envp[i+1];
-
-	for (argc = 0; argc < i; argc++) {
-		envp[argc] = ptr;
-		strcpy(ptr, prom_envp[argc]);
-		ptr += strlen(prom_envp[argc]) + 1;
-	}
-	envp[i] = NULL;			/* end array with null pointer */
-	prom_envp = envp;
-
-	for (i = 0; i < nr_prom_mem; i++) {
-		free_init_pages("prom memory",
-			prom_mem_base[i], prom_mem_base[i] + prom_mem_size[i]);
-	}
-}
-
-struct prom_pmemblock *__init prom_getmdesc(void)
-{
-	static char	memsz_env[] __initdata = "memsize";
-	static char	heaptop_env[] __initdata = "heaptop";
-	char		*str;
-	unsigned int	memsize;
-	unsigned int	heaptop;
-	int i;
-
-	str = prom_getenv(memsz_env);
-	if (!str) {
-		ppfinit("memsize not set in boot prom, "
-			"set to default (32Mb)\n");
-		memsize = 0x02000000;
-	} else {
-		memsize = simple_strtol(str, NULL, 0);
-
-		if (memsize == 0) {
-			/* if memsize is a bad size, use reasonable default */
-			memsize = 0x02000000;
-		}
-
-		/* convert to physical address (removing caching bits, etc) */
-		memsize = CPHYSADDR(memsize);
-	}
-
-	str = prom_getenv(heaptop_env);
-	if (!str) {
-		heaptop = CPHYSADDR((u32)&_text);
-		ppfinit("heaptop not set in boot prom, "
-			"set to default 0x%08x\n", heaptop);
-	} else {
-		heaptop = simple_strtol(str, NULL, 16);
-		if (heaptop == 0) {
-			/* heaptop conversion bad, might have 0xValue */
-			heaptop = simple_strtol(str, NULL, 0);
-
-			if (heaptop == 0) {
-				/* heaptop still bad, use reasonable default */
-				heaptop = CPHYSADDR((u32)&_text);
-			}
-		}
-
-		/* convert to physical address (removing caching bits, etc) */
-		heaptop = CPHYSADDR((u32)heaptop);
-	}
-
-	/* the base region */
-	i = 0;
-	mdesc[i].type = BOOT_MEM_RESERVED;
-	mdesc[i].base = 0x00000000;
-	mdesc[i].size = PAGE_ALIGN(0x300 + 0x80);
-		/* jtag interrupt vector + sizeof vector */
-
-	/* PMON data */
-	if (heaptop > mdesc[i].base + mdesc[i].size) {
-		i++;			/* 1 */
-		mdesc[i].type = BOOT_MEM_ROM_DATA;
-		mdesc[i].base = mdesc[i-1].base + mdesc[i-1].size;
-		mdesc[i].size = heaptop - mdesc[i].base;
-	}
-
-	/* end of PMON data to start of kernel -- probably zero .. */
-	if (heaptop != CPHYSADDR((u32)_text)) {
-		i++;	/* 2 */
-		mdesc[i].type = BOOT_MEM_RAM;
-		mdesc[i].base = heaptop;
-		mdesc[i].size = CPHYSADDR((u32)_text) - mdesc[i].base;
-	}
-
-	/*  kernel proper */
-	i++;			/* 3 */
-	mdesc[i].type = BOOT_MEM_RESERVED;
-	mdesc[i].base = CPHYSADDR((u32)_text);
-	mdesc[i].size = CPHYSADDR(PAGE_ALIGN((u32)_end)) - mdesc[i].base;
-
-	/* Remainder of RAM -- under memsize */
-	i++;			/* 5 */
-	mdesc[i].type = yamon_free;
-	mdesc[i].base = mdesc[i-1].base + mdesc[i-1].size;
-	mdesc[i].size = memsize - mdesc[i].base;
-
-	return &mdesc[0];
-}
diff --git a/arch/mips/pmcs-msp71xx/msp_serial.c b/arch/mips/pmcs-msp71xx/msp_serial.c
deleted file mode 100644
index 940c684f6921..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_serial.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * The setup file for serial related hardware on PMC-Sierra MSP processors.
- *
- * Copyright 2005 PMC-Sierra, Inc.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/serial.h>
-#include <linux/serial_core.h>
-#include <linux/serial_reg.h>
-#include <linux/slab.h>
-
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/serial.h>
-#include <linux/serial_8250.h>
-
-#include <msp_prom.h>
-#include <msp_int.h>
-#include <msp_regs.h>
-
-struct msp_uart_data {
-	int	last_lcr;
-};
-
-static void msp_serial_out(struct uart_port *p, int offset, int value)
-{
-	struct msp_uart_data *d = p->private_data;
-
-	if (offset == UART_LCR)
-		d->last_lcr = value;
-
-	offset <<= p->regshift;
-	writeb(value, p->membase + offset);
-}
-
-static unsigned int msp_serial_in(struct uart_port *p, int offset)
-{
-	offset <<= p->regshift;
-
-	return readb(p->membase + offset);
-}
-
-static int msp_serial_handle_irq(struct uart_port *p)
-{
-	struct msp_uart_data *d = p->private_data;
-	unsigned int iir = readb(p->membase + (UART_IIR << p->regshift));
-
-	if (serial8250_handle_irq(p, iir)) {
-		return 1;
-	} else if ((iir & UART_IIR_BUSY) == UART_IIR_BUSY) {
-		/*
-		 * The DesignWare APB UART has an Busy Detect (0x07) interrupt
-		 * meaning an LCR write attempt occurred while the UART was
-		 * busy. The interrupt must be cleared by reading the UART
-		 * status register (USR) and the LCR re-written.
-		 *
-		 * Note: MSP reserves 0x20 bytes of address space for the UART
-		 * and the USR is mapped in a separate block at an offset of
-		 * 0xc0 from the start of the UART.
-		 */
-		(void)readb(p->membase + 0xc0);
-		writeb(d->last_lcr, p->membase + (UART_LCR << p->regshift));
-
-		return 1;
-	}
-
-	return 0;
-}
-
-void __init msp_serial_setup(void)
-{
-	char	*s;
-	char	*endp;
-	struct uart_port up;
-	unsigned int uartclk;
-
-	memset(&up, 0, sizeof(up));
-
-	/* Check if clock was specified in environment */
-	s = prom_getenv("uartfreqhz");
-	if(!(s && *s && (uartclk = simple_strtoul(s, &endp, 10)) && *endp == 0))
-		uartclk = MSP_BASE_BAUD;
-	ppfinit("UART clock set to %d\n", uartclk);
-
-	/* Initialize first serial port */
-	up.mapbase	= MSP_UART0_BASE;
-	up.membase	= ioremap(up.mapbase, MSP_UART_REG_LEN);
-	up.irq		= MSP_INT_UART0;
-	up.uartclk	= uartclk;
-	up.regshift	= 2;
-	up.iotype	= UPIO_MEM;
-	up.flags	= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
-	up.type		= PORT_16550A;
-	up.line		= 0;
-	up.serial_out	= msp_serial_out;
-	up.serial_in	= msp_serial_in;
-	up.handle_irq	= msp_serial_handle_irq;
-	up.private_data = kzalloc(sizeof(struct msp_uart_data), GFP_KERNEL);
-	if (!up.private_data) {
-		pr_err("failed to allocate uart private data\n");
-		return;
-	}
-	if (early_serial_setup(&up)) {
-		kfree(up.private_data);
-		pr_err("Early serial init of port 0 failed\n");
-	}
-
-	/* Initialize the second serial port, if one exists */
-	switch (mips_machtype) {
-		case MACH_MSP4200_EVAL:
-		case MACH_MSP4200_GW:
-		case MACH_MSP4200_FPGA:
-		case MACH_MSP7120_EVAL:
-		case MACH_MSP7120_GW:
-		case MACH_MSP7120_FPGA:
-			/* Enable UART1 on MSP4200 and MSP7120 */
-			*GPIO_CFG2_REG = 0x00002299;
-			break;
-
-		default:
-			return; /* No second serial port, good-bye. */
-	}
-
-	up.mapbase	= MSP_UART1_BASE;
-	up.membase	= ioremap(up.mapbase, MSP_UART_REG_LEN);
-	up.irq		= MSP_INT_UART1;
-	up.line		= 1;
-	up.private_data		= (void*)UART1_STATUS_REG;
-	if (early_serial_setup(&up)) {
-		kfree(up.private_data);
-		pr_err("Early serial init of port 1 failed\n");
-	}
-}
diff --git a/arch/mips/pmcs-msp71xx/msp_setup.c b/arch/mips/pmcs-msp71xx/msp_setup.c
deleted file mode 100644
index d1e59cec116e..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_setup.c
+++ /dev/null
@@ -1,228 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The generic setup file for PMC-Sierra MSP processors
- *
- * Copyright 2005-2007 PMC-Sierra, Inc,
- * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
- */
-
-#include <linux/delay.h>
-
-#include <asm/bootinfo.h>
-#include <asm/cacheflush.h>
-#include <asm/idle.h>
-#include <asm/r4kcache.h>
-#include <asm/reboot.h>
-#include <asm/smp-ops.h>
-#include <asm/time.h>
-
-#include <msp_prom.h>
-#include <msp_regs.h>
-
-#if defined(CONFIG_PMC_MSP7120_GW)
-#include <msp_regops.h>
-#define MSP_BOARD_RESET_GPIO	9
-#endif
-
-extern void msp_serial_setup(void);
-
-#if defined(CONFIG_PMC_MSP7120_EVAL) || \
-    defined(CONFIG_PMC_MSP7120_GW) || \
-    defined(CONFIG_PMC_MSP7120_FPGA)
-/*
- * Performs the reset for MSP7120-based boards
- */
-void msp7120_reset(void)
-{
-	void *start, *end, *iptr;
-
-	/* Diasble all interrupts */
-	local_irq_disable();
-#ifdef CONFIG_SYS_SUPPORTS_MULTITHREADING
-	dvpe();
-#endif
-
-	/* Cache the reset code of this function */
-	__asm__ __volatile__ (
-		"	.set	push				\n"
-		"	.set	arch=r4000			\n"
-		"	la	%0,startpoint			\n"
-		"	la	%1,endpoint			\n"
-		"	.set	pop				\n"
-		: "=r" (start), "=r" (end)
-		:
-	);
-
-	for (iptr = (void *)((unsigned int)start & ~(L1_CACHE_BYTES - 1));
-	     iptr < end; iptr += L1_CACHE_BYTES)
-		cache_op(Fill, iptr);
-
-	__asm__ __volatile__ (
-		"startpoint:					\n"
-	);
-
-	/* Put the DDRC into self-refresh mode */
-	DDRC_INDIRECT_WRITE(DDRC_CTL(10), 0xb, 1 << 16);
-
-	/*
-	 * IMPORTANT!
-	 * DO NOT do anything from here on out that might even
-	 * think about fetching from RAM - i.e., don't call any
-	 * non-inlined functions, and be VERY sure that any inline
-	 * functions you do call do NOT access any sort of RAM
-	 * anywhere!
-	 */
-
-	/* Wait a bit for the DDRC to settle */
-	mdelay(125);
-
-#if defined(CONFIG_PMC_MSP7120_GW)
-	/*
-	 * Set GPIO 9 HI, (tied to board reset logic)
-	 * GPIO 9 is the 4th GPIO of register 3
-	 *
-	 * NOTE: We cannot use the higher-level msp_gpio_mode()/out()
-	 * as GPIO char driver may not be enabled and it would look up
-	 * data inRAM!
-	 */
-	set_value_reg32(GPIO_CFG3_REG, 0xf000, 0x8000);
-	set_reg32(GPIO_DATA3_REG, 8);
-
-	/*
-	 * In case GPIO9 doesn't reset the board (jumper configurable!)
-	 * fallback to device reset below.
-	 */
-#endif
-	/* Set bit 1 of the MSP7120 reset register */
-	*RST_SET_REG = 0x00000001;
-
-	__asm__ __volatile__ (
-		"endpoint:					\n"
-	);
-}
-#endif
-
-void msp_restart(char *command)
-{
-	printk(KERN_WARNING "Now rebooting .......\n");
-
-#if defined(CONFIG_PMC_MSP7120_EVAL) || \
-    defined(CONFIG_PMC_MSP7120_GW) || \
-    defined(CONFIG_PMC_MSP7120_FPGA)
-	msp7120_reset();
-#else
-	/* No chip-specific reset code, just jump to the ROM reset vector */
-	set_c0_status(ST0_BEV | ST0_ERL);
-	change_c0_config(CONF_CM_CMASK, CONF_CM_UNCACHED);
-	__flush_cache_all();
-	write_c0_wired(0);
-
-	__asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
-#endif
-}
-
-void msp_halt(void)
-{
-	printk(KERN_WARNING "\n** You can safely turn off the power\n");
-	while (1)
-		/* If possible call official function to get CPU WARs */
-		if (cpu_wait)
-			(*cpu_wait)();
-		else
-			__asm__(".set\tmips3\n\t" "wait\n\t" ".set\tmips0");
-}
-
-void msp_power_off(void)
-{
-	msp_halt();
-}
-
-void __init plat_mem_setup(void)
-{
-	_machine_restart = msp_restart;
-	_machine_halt = msp_halt;
-	pm_power_off = msp_power_off;
-}
-
-void __init prom_init(void)
-{
-	unsigned long family;
-	unsigned long revision;
-
-	prom_argc = fw_arg0;
-	prom_argv = (char **)fw_arg1;
-	prom_envp = (char **)fw_arg2;
-
-	/*
-	 * Someday we can use this with PMON2000 to get a
-	 * platform call prom routines for output etc. without
-	 * having to use grody hacks.  For now it's unused.
-	 *
-	 * struct callvectors *cv = (struct callvectors *) fw_arg3;
-	 */
-	family = identify_family();
-	revision = identify_revision();
-
-	switch (family) {
-	case FAMILY_FPGA:
-		if (FPGA_IS_MSP4200(revision)) {
-			/* Old-style revision ID */
-			mips_machtype = MACH_MSP4200_FPGA;
-		} else {
-			mips_machtype = MACH_MSP_OTHER;
-		}
-		break;
-
-	case FAMILY_MSP4200:
-#if defined(CONFIG_PMC_MSP4200_EVAL)
-		mips_machtype  = MACH_MSP4200_EVAL;
-#elif defined(CONFIG_PMC_MSP4200_GW)
-		mips_machtype  = MACH_MSP4200_GW;
-#else
-		mips_machtype = MACH_MSP_OTHER;
-#endif
-		break;
-
-	case FAMILY_MSP4200_FPGA:
-		mips_machtype  = MACH_MSP4200_FPGA;
-		break;
-
-	case FAMILY_MSP7100:
-#if defined(CONFIG_PMC_MSP7120_EVAL)
-		mips_machtype = MACH_MSP7120_EVAL;
-#elif defined(CONFIG_PMC_MSP7120_GW)
-		mips_machtype = MACH_MSP7120_GW;
-#else
-		mips_machtype = MACH_MSP_OTHER;
-#endif
-		break;
-
-	case FAMILY_MSP7100_FPGA:
-		mips_machtype  = MACH_MSP7120_FPGA;
-		break;
-
-	default:
-		/* we don't recognize the machine */
-		mips_machtype  = MACH_UNKNOWN;
-		panic("***Bogosity factor five***, exiting");
-		break;
-	}
-
-	prom_init_cmdline();
-
-	prom_meminit();
-
-	/*
-	 * Sub-system setup follows.
-	 * Setup functions can	either be called here or using the
-	 * subsys_initcall mechanism (i.e. see msp_pci_setup). The
-	 * order in which they are called can be changed by using the
-	 * link order in arch/mips/pmc-sierra/msp71xx/Makefile.
-	 *
-	 * NOTE: Please keep sub-system specific initialization code
-	 * in separate specific files.
-	 */
-	msp_serial_setup();
-
-	register_vsmp_smp_ops();
-}
diff --git a/arch/mips/pmcs-msp71xx/msp_smp.c b/arch/mips/pmcs-msp71xx/msp_smp.c
deleted file mode 100644
index 00092e2924ec..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_smp.c
+++ /dev/null
@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2000, 2001, 2004 MIPS Technologies, Inc.
- * Copyright (C) 2001 Ralf Baechle
- * Copyright (C) 2010 PMC-Sierra, Inc.
- *
- *  VSMP support for MSP platforms . Derived from malta vsmp support.
- */
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-
-#include <asm/setup.h>
-
-#ifdef CONFIG_MIPS_MT_SMP
-#define MIPS_CPU_IPI_RESCHED_IRQ 0	/* SW int 0 for resched */
-#define MIPS_CPU_IPI_CALL_IRQ 1		/* SW int 1 for call */
-
-
-static void ipi_resched_dispatch(void)
-{
-	do_IRQ(MIPS_CPU_IPI_RESCHED_IRQ);
-}
-
-static void ipi_call_dispatch(void)
-{
-	do_IRQ(MIPS_CPU_IPI_CALL_IRQ);
-}
-
-static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
-{
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
-{
-	generic_smp_call_function_interrupt();
-
-	return IRQ_HANDLED;
-}
-
-void __init arch_init_ipiirq(int irq, const char *name, irq_handler_t handler)
-{
-	if (request_irq(irq, handler, IRQF_PERCPU, name, NULL))
-		pr_err("Failed to request irq %d (%s)\n", irq, name);
-	irq_set_handler(irq, handle_percpu_irq);
-}
-
-void __init msp_vsmp_int_init(void)
-{
-	set_vi_handler(MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch);
-	set_vi_handler(MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch);
-	arch_init_ipiirq(MIPS_CPU_IPI_RESCHED_IRQ, "IPI_resched",
-			 ipi_resched_interrupt);
-	arch_init_ipiirq(MIPS_CPU_IPI_CALL_IRQ, "IPI_call", ipi_call_interrupt);
-}
-#endif /* CONFIG_MIPS_MT_SMP */
diff --git a/arch/mips/pmcs-msp71xx/msp_time.c b/arch/mips/pmcs-msp71xx/msp_time.c
deleted file mode 100644
index 9c629829f447..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_time.c
+++ /dev/null
@@ -1,90 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Setting up the clock on MSP SOCs.  No RTC typically.
- *
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- * ########################################################################
- *
- * ########################################################################
- */
-
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/ptrace.h>
-
-#include <asm/cevt-r4k.h>
-#include <asm/mipsregs.h>
-#include <asm/time.h>
-
-#include <msp_prom.h>
-#include <msp_int.h>
-#include <msp_regs.h>
-
-#define get_current_vpe()   \
-	((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE)
-
-static int tim_installed;
-
-void __init plat_time_init(void)
-{
-	char	*endp, *s;
-	unsigned long cpu_rate = 0;
-
-	if (cpu_rate == 0) {
-		s = prom_getenv("clkfreqhz");
-		cpu_rate = simple_strtoul(s, &endp, 10);
-		if (endp != NULL && *endp != 0) {
-			printk(KERN_ERR
-				"Clock rate in Hz parse error: %s\n", s);
-			cpu_rate = 0;
-		}
-	}
-
-	if (cpu_rate == 0) {
-		s = prom_getenv("clkfreq");
-		cpu_rate = 1000 * simple_strtoul(s, &endp, 10);
-		if (endp != NULL && *endp != 0) {
-			printk(KERN_ERR
-				"Clock rate in MHz parse error: %s\n", s);
-			cpu_rate = 0;
-		}
-	}
-
-	if (cpu_rate == 0) {
-#if defined(CONFIG_PMC_MSP7120_EVAL) \
- || defined(CONFIG_PMC_MSP7120_GW)
-		cpu_rate = 400000000;
-#elif defined(CONFIG_PMC_MSP7120_FPGA)
-		cpu_rate = 25000000;
-#else
-		cpu_rate = 150000000;
-#endif
-		printk(KERN_ERR
-			"Failed to determine CPU clock rate, "
-			"assuming %ld hz ...\n", cpu_rate);
-	}
-
-	printk(KERN_WARNING "Clock rate set to %ld\n", cpu_rate);
-
-	/* timer frequency is 1/2 clock rate */
-	mips_hpt_frequency = cpu_rate/2;
-}
-
-unsigned int get_c0_compare_int(void)
-{
-	unsigned long flags = IRQF_PERCPU | IRQF_TIMER | IRQF_SHARED;
-
-	/* MIPS_MT modes may want timer for second VPE */
-	if ((get_current_vpe()) && !tim_installed) {
-		if (request_irq(MSP_INT_VPE1_TIMER, c0_compare_interrupt, flags,
-				"timer", c0_compare_interrupt))
-			pr_err("Failed to register timer interrupt\n");
-		tim_installed++;
-	}
-
-	return get_current_vpe() ? MSP_INT_VPE1_TIMER : MSP_INT_VPE0_TIMER;
-}
diff --git a/arch/mips/pmcs-msp71xx/msp_usb.c b/arch/mips/pmcs-msp71xx/msp_usb.c
deleted file mode 100644
index d38ac70b5a2e..000000000000
--- a/arch/mips/pmcs-msp71xx/msp_usb.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * The setup file for USB related hardware on PMC-Sierra MSP processors.
- *
- * Copyright 2006 PMC-Sierra, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#if defined(CONFIG_USB_EHCI_HCD) || defined(CONFIG_USB_GADGET)
-
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/platform_device.h>
-
-#include <asm/mipsregs.h>
-
-#include <msp_regs.h>
-#include <msp_int.h>
-#include <msp_prom.h>
-#include <msp_usb.h>
-
-
-#if defined(CONFIG_USB_EHCI_HCD)
-static struct resource msp_usbhost0_resources[] = {
-	[0] = { /* EHCI-HS operational and capabilities registers */
-		.start	= MSP_USB0_HS_START,
-		.end	= MSP_USB0_HS_END,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= MSP_INT_USB,
-		.end	= MSP_INT_USB,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = { /* MSBus-to-AMBA bridge register space */
-		.start	= MSP_USB0_MAB_START,
-		.end	= MSP_USB0_MAB_END,
-		.flags	= IORESOURCE_MEM,
-	},
-	[3] = { /* Identification and general hardware parameters */
-		.start	= MSP_USB0_ID_START,
-		.end	= MSP_USB0_ID_END,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static u64 msp_usbhost0_dma_mask = 0xffffffffUL;
-
-static struct mspusb_device msp_usbhost0_device = {
-	.dev	= {
-		.name	= "pmcmsp-ehci",
-		.id	= 0,
-		.dev	= {
-			.dma_mask = &msp_usbhost0_dma_mask,
-			.coherent_dma_mask = 0xffffffffUL,
-		},
-		.num_resources	= ARRAY_SIZE(msp_usbhost0_resources),
-		.resource	= msp_usbhost0_resources,
-	},
-};
-#endif /* CONFIG_USB_EHCI_HCD */
-
-#if defined(CONFIG_USB_GADGET)
-static struct resource msp_usbdev0_resources[] = {
-	[0] = { /* EHCI-HS operational and capabilities registers */
-		.start	= MSP_USB0_HS_START,
-		.end	= MSP_USB0_HS_END,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= MSP_INT_USB,
-		.end	= MSP_INT_USB,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = { /* MSBus-to-AMBA bridge register space */
-		.start	= MSP_USB0_MAB_START,
-		.end	= MSP_USB0_MAB_END,
-		.flags	= IORESOURCE_MEM,
-	},
-	[3] = { /* Identification and general hardware parameters */
-		.start	= MSP_USB0_ID_START,
-		.end	= MSP_USB0_ID_END,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static u64 msp_usbdev_dma_mask = 0xffffffffUL;
-
-/* This may need to be converted to a mspusb_device, too. */
-static struct mspusb_device msp_usbdev0_device = {
-	.dev	= {
-		.name	= "msp71xx_udc",
-		.id	= 0,
-		.dev	= {
-			.dma_mask = &msp_usbdev_dma_mask,
-			.coherent_dma_mask = 0xffffffffUL,
-		},
-		.num_resources	= ARRAY_SIZE(msp_usbdev0_resources),
-		.resource	= msp_usbdev0_resources,
-	},
-};
-#endif /* CONFIG_USB_GADGET */
-
-static int __init msp_usb_setup(void)
-{
-	char		*strp;
-	char		envstr[32];
-	struct platform_device *msp_devs[NUM_USB_DEVS];
-	unsigned int val;
-
-	/* construct environment name usbmode */
-	/* set usbmode <host/device> as pmon environment var */
-	/*
-	 * Could this perhaps be integrated into the "features" env var?
-	 * Use the features key "U", and follow with "H" for host-mode,
-	 * "D" for device-mode.	 If it works for Ethernet, why not USB...
-	 *  -- hammtrev, 2007/03/22
-	 */
-	snprintf(&envstr[0], sizeof(envstr), "usbmode");
-
-	/* set default host mode */
-	val = 1;
-
-	/* get environment string */
-	strp = prom_getenv(&envstr[0]);
-	if (strp) {
-		/* compare string */
-		if (!strcmp(strp, "device"))
-			val = 0;
-	}
-
-	if (val) {
-#if defined(CONFIG_USB_EHCI_HCD)
-		msp_devs[0] = &msp_usbhost0_device.dev;
-		ppfinit("platform add USB HOST done %s.\n", msp_devs[0]->name);
-#else
-		ppfinit("%s: echi_hcd not supported\n", __FILE__);
-#endif	/* CONFIG_USB_EHCI_HCD */
-	} else {
-#if defined(CONFIG_USB_GADGET)
-		/* get device mode structure */
-		msp_devs[0] = &msp_usbdev0_device.dev;
-		ppfinit("platform add USB DEVICE done %s.\n"
-					, msp_devs[0]->name);
-#else
-		ppfinit("%s: usb_gadget not supported\n", __FILE__);
-#endif	/* CONFIG_USB_GADGET */
-	}
-	/* add device */
-	platform_add_devices(msp_devs, ARRAY_SIZE(msp_devs));
-
-	return 0;
-}
-
-subsys_initcall(msp_usb_setup);
-#endif /* CONFIG_USB_EHCI_HCD || CONFIG_USB_GADGET */
diff --git a/arch/mips/pnx833x/Platform b/arch/mips/pnx833x/Platform
index 287260669551..e5286a49fc3e 100644
--- a/arch/mips/pnx833x/Platform
+++ b/arch/mips/pnx833x/Platform
@@ -1,5 +1,4 @@
 # NXP STB225
-platform-$(CONFIG_SOC_PNX833X)	+= pnx833x/
 cflags-$(CONFIG_SOC_PNX833X)	+= -I$(srctree)/arch/mips/include/asm/mach-pnx833x
 load-$(CONFIG_NXP_STB220)	+= 0xffffffff80001000
 load-$(CONFIG_NXP_STB225)	+= 0xffffffff80001000
diff --git a/arch/mips/ralink/Platform b/arch/mips/ralink/Platform
index 6095fcc334f4..02ee0791481d 100644
--- a/arch/mips/ralink/Platform
+++ b/arch/mips/ralink/Platform
@@ -1,7 +1,6 @@
 #
 # Ralink SoC common stuff
 #
-core-$(CONFIG_RALINK)		+= arch/mips/ralink/
 cflags-$(CONFIG_RALINK)		+= -I$(srctree)/arch/mips/include/asm/mach-ralink
 
 #
diff --git a/arch/mips/ralink/bootrom.c b/arch/mips/ralink/bootrom.c
index 88bcce59beeb..94ca8379b83c 100644
--- a/arch/mips/ralink/bootrom.c
+++ b/arch/mips/ralink/bootrom.c
@@ -31,7 +31,7 @@ static const struct file_operations bootrom_file_ops = {
 	.release	= single_release,
 };
 
-static int bootrom_setup(void)
+static int __init bootrom_setup(void)
 {
 	debugfs_create_file("bootrom", 0444, NULL, NULL, &bootrom_file_ops);
 	return 0;
diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
index 0accb80db709..ca0ac607b0f3 100644
--- a/arch/mips/ralink/mt7621.c
+++ b/arch/mips/ralink/mt7621.c
@@ -20,11 +20,6 @@
 
 #include "common.h"
 
-#define SYSC_REG_SYSCFG		0x10
-#define SYSC_REG_CPLL_CLKCFG0	0x2c
-#define SYSC_REG_CUR_CLK_STS	0x44
-#define CPU_CLK_SEL		(BIT(30) | BIT(31))
-
 #define MT7621_GPIO_MODE_UART1		1
 #define MT7621_GPIO_MODE_I2C		2
 #define MT7621_GPIO_MODE_UART3_MASK	0x3
@@ -115,44 +110,6 @@ phys_addr_t mips_cpc_default_phys_base(void)
 	panic("Cannot detect cpc address");
 }
 
-void __init ralink_clk_init(void)
-{
-	int cpu_fdiv = 0;
-	int cpu_ffrac = 0;
-	int fbdiv = 0;
-	u32 clk_sts, syscfg;
-	u8 clk_sel = 0, xtal_mode;
-	u32 cpu_clk;
-
-	if ((rt_sysc_r32(SYSC_REG_CPLL_CLKCFG0) & CPU_CLK_SEL) != 0)
-		clk_sel = 1;
-
-	switch (clk_sel) {
-	case 0:
-		clk_sts = rt_sysc_r32(SYSC_REG_CUR_CLK_STS);
-		cpu_fdiv = ((clk_sts >> 8) & 0x1F);
-		cpu_ffrac = (clk_sts & 0x1F);
-		cpu_clk = (500 * cpu_ffrac / cpu_fdiv) * 1000 * 1000;
-		break;
-
-	case 1:
-		fbdiv = ((rt_sysc_r32(0x648) >> 4) & 0x7F) + 1;
-		syscfg = rt_sysc_r32(SYSC_REG_SYSCFG);
-		xtal_mode = (syscfg >> 6) & 0x7;
-		if (xtal_mode >= 6) {
-			/* 25Mhz Xtal */
-			cpu_clk = 25 * fbdiv * 1000 * 1000;
-		} else if (xtal_mode >= 3) {
-			/* 40Mhz Xtal */
-			cpu_clk = 40 * fbdiv * 1000 * 1000;
-		} else {
-			/* 20Mhz Xtal */
-			cpu_clk = 20 * fbdiv * 1000 * 1000;
-		}
-		break;
-	}
-}
-
 void __init ralink_of_remap(void)
 {
 	rt_sysc_membase = plat_of_remap_node("mtk,mt7621-sysc");
diff --git a/arch/mips/rb532/Platform b/arch/mips/rb532/Platform
index aeec45a7cbb3..12eaa8790b3e 100644
--- a/arch/mips/rb532/Platform
+++ b/arch/mips/rb532/Platform
@@ -1,7 +1,6 @@
 #
 # Routerboard 532
 #
-platform-$(CONFIG_MIKROTIK_RB532)	+= rb532/
 cflags-$(CONFIG_MIKROTIK_RB532)		+=				\
 		-I$(srctree)/arch/mips/include/asm/mach-rc32434
 load-$(CONFIG_MIKROTIK_RB532)		+= 0xffffffff80101000
diff --git a/arch/mips/sgi-ip22/Platform b/arch/mips/sgi-ip22/Platform
index e8f6b3a42a48..62fa30bb959e 100644
--- a/arch/mips/sgi-ip22/Platform
+++ b/arch/mips/sgi-ip22/Platform
@@ -7,7 +7,6 @@
 # current variable will break so for 64-bit kernels we have to raise the start
 # address by 8kb.
 #
-platform-$(CONFIG_SGI_IP22)		+= sgi-ip22/
 cflags-$(CONFIG_SGI_IP22)	+= -I$(srctree)/arch/mips/include/asm/mach-ip22
 ifdef CONFIG_32BIT
 load-$(CONFIG_SGI_IP22)		+= 0xffffffff88002000
@@ -29,6 +28,5 @@ ifdef CONFIG_SGI_IP28
       $(error gcc doesn't support needed option -mr10k-cache-barrier=store)
   endif
 endif
-platform-$(CONFIG_SGI_IP28)		+= sgi-ip22/
 cflags-$(CONFIG_SGI_IP28)	+= -mr10k-cache-barrier=store -I$(srctree)/arch/mips/include/asm/mach-ip28
 load-$(CONFIG_SGI_IP28)		+= 0xa800000020004000
diff --git a/arch/mips/sgi-ip27/Platform b/arch/mips/sgi-ip27/Platform
index 1fb9c2ea7c8f..e734ee6abd44 100644
--- a/arch/mips/sgi-ip27/Platform
+++ b/arch/mips/sgi-ip27/Platform
@@ -5,8 +5,6 @@
 # symmon, 0xc00000000001c000 for production kernels.  Note that the value must
 # be 16kb aligned or the handling of the current variable will break.
 #
-ifdef CONFIG_SGI_IP27
-platform-$(CONFIG_SGI_IP27)	+= sgi-ip27/
 cflags-$(CONFIG_SGI_IP27)	+= -I$(srctree)/arch/mips/include/asm/mach-ip27
 ifdef CONFIG_MAPPED_KERNEL
 load-$(CONFIG_SGI_IP27)		+= 0xc00000004001c000
@@ -16,4 +14,3 @@ else
 load-$(CONFIG_SGI_IP27)		+= 0xa80000000001c000
 OBJCOPYFLAGS			:= --change-addresses=0x57ffffff80000000
 endif
-endif
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 61f3565f3645..c0e33632bc37 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -21,7 +21,6 @@
 #include <asm/time.h>
 #include <asm/pgtable.h>
 #include <asm/sgialib.h>
-#include <asm/sn/ioc3.h>
 #include <asm/sn/klconfig.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/addrs.h>
@@ -29,14 +28,6 @@
 
 #include "ip27-common.h"
 
-#define TICK_SIZE (tick_nsec / 1000)
-
-/* Includes for ioc3_init().  */
-#include <asm/sn/types.h>
-#include <asm/pci/bridge.h>
-
-#include "ip27-common.h"
-
 static int rt_next_event(unsigned long delta, struct clock_event_device *evt)
 {
 	unsigned int cpu = smp_processor_id();
diff --git a/arch/mips/sgi-ip30/Platform b/arch/mips/sgi-ip30/Platform
index 2b5695c2049a..f6f11517e091 100644
--- a/arch/mips/sgi-ip30/Platform
+++ b/arch/mips/sgi-ip30/Platform
@@ -1,8 +1,5 @@
 #
 # SGI-IP30 (Octane/Octane2)
 #
-ifdef CONFIG_SGI_IP30
-platform-$(CONFIG_SGI_IP30)		+= sgi-ip30/
 cflags-$(CONFIG_SGI_IP30)		+= -I$(srctree)/arch/mips/include/asm/mach-ip30
 load-$(CONFIG_SGI_IP30)			+= 0xa800000020004000
-endif
diff --git a/arch/mips/sgi-ip32/Platform b/arch/mips/sgi-ip32/Platform
index 0fea556f3641..f58a7a02b4ca 100644
--- a/arch/mips/sgi-ip32/Platform
+++ b/arch/mips/sgi-ip32/Platform
@@ -6,6 +6,5 @@
 # a multiple of the kernel stack size or the handling of the current variable
 # will break.
 #
-platform-$(CONFIG_SGI_IP32)	+= sgi-ip32/
 cflags-$(CONFIG_SGI_IP32)	+= -I$(srctree)/arch/mips/include/asm/mach-ip32
 load-$(CONFIG_SGI_IP32)		+= 0xffffffff80004000
diff --git a/arch/mips/sibyte/Platform b/arch/mips/sibyte/Platform
index af117330ce14..65b2225b76b2 100644
--- a/arch/mips/sibyte/Platform
+++ b/arch/mips/sibyte/Platform
@@ -1,10 +1,6 @@
 #
 # These are all rather similar so we consider them a single platform
 #
-platform-$(CONFIG_SIBYTE_BCM112X)	+= sibyte/
-platform-$(CONFIG_SIBYTE_SB1250)	+= sibyte/
-platform-$(CONFIG_SIBYTE_BCM1x55)	+= sibyte/
-platform-$(CONFIG_SIBYTE_BCM1x80)	+= sibyte/
 
 #
 # Sibyte SB1250 / BCM1480 family of SOCs
diff --git a/arch/mips/sni/Platform b/arch/mips/sni/Platform
index 2644a9d63c0f..b0b3dde0bef8 100644
--- a/arch/mips/sni/Platform
+++ b/arch/mips/sni/Platform
@@ -1,7 +1,6 @@
 #
 # SNI RM
 #
-platform-$(CONFIG_SNI_RM)		+= sni/
 cflags-$(CONFIG_SNI_RM)		+= -I$(srctree)/arch/mips/include/asm/mach-rm
 ifdef CONFIG_CPU_LITTLE_ENDIAN
 load-$(CONFIG_SNI_RM)		+= 0xffffffff80600000
diff --git a/arch/mips/tools/elf-entry.c b/arch/mips/tools/elf-entry.c
index adde79ce7fc0..dbd14ff05b4c 100644
--- a/arch/mips/tools/elf-entry.c
+++ b/arch/mips/tools/elf-entry.c
@@ -51,11 +51,14 @@ int main(int argc, const char *argv[])
 	nread = fread(&hdr, 1, sizeof(hdr), file);
 	if (nread != sizeof(hdr)) {
 		perror("Unable to read input file");
+		fclose(file);
 		return EXIT_FAILURE;
 	}
 
-	if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG))
+	if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) {
+		fclose(file);
 		die("Input is not an ELF\n");
+	}
 
 	switch (hdr.ehdr32.e_ident[EI_CLASS]) {
 	case ELFCLASS32:
@@ -67,6 +70,7 @@ int main(int argc, const char *argv[])
 			entry = be32toh(hdr.ehdr32.e_entry);
 			break;
 		default:
+			fclose(file);
 			die("Invalid ELF encoding\n");
 		}
 
@@ -83,14 +87,17 @@ int main(int argc, const char *argv[])
 			entry = be64toh(hdr.ehdr64.e_entry);
 			break;
 		default:
+			fclose(file);
 			die("Invalid ELF encoding\n");
 		}
 		break;
 
 	default:
+		fclose(file);
 		die("Invalid ELF class\n");
 	}
 
 	printf("0x%016" PRIx64 "\n", entry);
+	fclose(file);
 	return EXIT_SUCCESS;
 }
diff --git a/arch/mips/tools/loongson3-llsc-check.c b/arch/mips/tools/loongson3-llsc-check.c
index 0ebddd0ae46f..bdbc7b4324ec 100644
--- a/arch/mips/tools/loongson3-llsc-check.c
+++ b/arch/mips/tools/loongson3-llsc-check.c
@@ -303,5 +303,7 @@ out_munmap:
 out_close:
 	close(vmlinux_fd);
 out_ret:
+	fprintf(stdout, "loongson3-llsc-check returns %s\n",
+		status ? "failure" : "success");
 	return status;
 }
diff --git a/arch/mips/txx9/Kconfig b/arch/mips/txx9/Kconfig
index 9a22a182b7a4..85c4c121c71f 100644
--- a/arch/mips/txx9/Kconfig
+++ b/arch/mips/txx9/Kconfig
@@ -58,7 +58,7 @@ config TOSHIBA_RBTX4939
 config SOC_TX3927
 	bool
 	select CEVT_TXX9
-	select HAS_TXX9_SERIAL
+	imply HAS_TXX9_SERIAL
 	select HAVE_PCI
 	select IRQ_TXX9
 	select GPIO_TXX9
@@ -66,30 +66,30 @@ config SOC_TX3927
 config SOC_TX4927
 	bool
 	select CEVT_TXX9
-	select HAS_TXX9_SERIAL
+	imply HAS_TXX9_SERIAL
 	select HAVE_PCI
 	select IRQ_TXX9
 	select PCI_TX4927
 	select GPIO_TXX9
-	select HAS_TXX9_ACLC
+	imply HAS_TXX9_ACLC
 
 config SOC_TX4938
 	bool
 	select CEVT_TXX9
-	select HAS_TXX9_SERIAL
+	imply HAS_TXX9_SERIAL
 	select HAVE_PCI
 	select IRQ_TXX9
 	select PCI_TX4927
 	select GPIO_TXX9
-	select HAS_TXX9_ACLC
+	imply HAS_TXX9_ACLC
 
 config SOC_TX4939
 	bool
 	select CEVT_TXX9
-	select HAS_TXX9_SERIAL
+	imply HAS_TXX9_SERIAL
 	select HAVE_PCI
 	select PCI_TX4927
-	select HAS_TXX9_ACLC
+	imply HAS_TXX9_ACLC
 
 config TXX9_7SEGLED
 	bool
diff --git a/arch/mips/txx9/Platform b/arch/mips/txx9/Platform
index a176d1fd5799..7f4429ba22eb 100644
--- a/arch/mips/txx9/Platform
+++ b/arch/mips/txx9/Platform
@@ -1,6 +1,3 @@
-platform-$(CONFIG_MACH_TX39XX)	+= txx9/
-platform-$(CONFIG_MACH_TX49XX)	+= txx9/
-
 cflags-$(CONFIG_MACH_TX39XX)	+=					\
 		-I$(srctree)/arch/mips/include/asm/mach-tx39xx
 cflags-$(CONFIG_MACH_TX49XX)	+=					\
diff --git a/arch/mips/vdso/Kconfig b/arch/mips/vdso/Kconfig
new file mode 100644
index 000000000000..7aec721398d5
--- /dev/null
+++ b/arch/mips/vdso/Kconfig
@@ -0,0 +1,18 @@
+# For the pre-R6 code in arch/mips/vdso/vdso.h for locating
+# the base address of VDSO, the linker will emit a R_MIPS_PC32
+# relocation in binutils > 2.25 but it will fail with older versions
+# because that relocation is not supported for that symbol. As a result
+# of which we are forced to disable the VDSO symbols when building
+# with < 2.25 binutils on pre-R6 kernels. For more references on why we
+# can't use other methods to get the base address of VDSO please refer to
+# the comments on that file.
+#
+# GCC (at least up to version 9.2) appears to emit function calls that make use
+# of the GOT when targeting microMIPS, which we can't use in the VDSO due to
+# the lack of relocations. As such, we disable the VDSO for microMIPS builds.
+
+config MIPS_LD_CAN_LINK_VDSO
+	def_bool LD_VERSION >= 225000000 || LD_IS_LLD
+
+config MIPS_DISABLE_VDSO
+	def_bool CPU_MICROMIPS || (!CPU_MIPSR6 && !MIPS_LD_CAN_LINK_VDSO)
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index d7fe8408603e..2e64c7600eea 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -52,44 +52,17 @@ endif
 
 CFLAGS_REMOVE_vgettimeofday.o = -pg
 
-DISABLE_VDSO := n
-
-#
-# For the pre-R6 code in arch/mips/vdso/vdso.h for locating
-# the base address of VDSO, the linker will emit a R_MIPS_PC32
-# relocation in binutils > 2.25 but it will fail with older versions
-# because that relocation is not supported for that symbol. As a result
-# of which we are forced to disable the VDSO symbols when building
-# with < 2.25 binutils on pre-R6 kernels. For more references on why we
-# can't use other methods to get the base address of VDSO please refer to
-# the comments on that file.
-#
-ifndef CONFIG_CPU_MIPSR6
-  ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
+ifdef CONFIG_MIPS_DISABLE_VDSO
+  ifndef CONFIG_MIPS_LD_CAN_LINK_VDSO
     $(warning MIPS VDSO requires binutils >= 2.25)
-    DISABLE_VDSO := y
   endif
-endif
-
-#
-# GCC (at least up to version 9.2) appears to emit function calls that make use
-# of the GOT when targeting microMIPS, which we can't use in the VDSO due to
-# the lack of relocations. As such, we disable the VDSO for microMIPS builds.
-#
-ifdef CONFIG_CPU_MICROMIPS
-  DISABLE_VDSO := y
-endif
-
-ifeq ($(DISABLE_VDSO),y)
   obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y))
-  ccflags-vdso += -DDISABLE_MIPS_VDSO
 endif
 
 # VDSO linker flags.
-VDSO_LDFLAGS := \
-	-Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 \
-	$(addprefix -Wl$(comma),$(filter -E%,$(KBUILD_CFLAGS))) \
-	-nostdlib -shared -Wl,--hash-style=sysv -Wl,--build-id
+ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
+	$(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \
+	-G 0 --eh-frame-hdr --hash-style=sysv --build-id -T
 
 CFLAGS_REMOVE_vdso.o = -pg
 
@@ -108,11 +81,7 @@ quiet_cmd_vdso_mips_check = VDSOCHK $@
 #
 
 quiet_cmd_vdsold_and_vdso_check = LD      $@
-      cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check); $(cmd_vdso_mips_check)
-
-quiet_cmd_vdsold = VDSO    $@
-      cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \
-                   -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
+      cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check); $(cmd_vdso_mips_check)
 
 quiet_cmd_vdsoas_o_S = AS      $@
       cmd_vdsoas_o_S = $(CC) $(a_flags) -c -o $@ $<
diff --git a/arch/mips/vdso/vdso.lds.S b/arch/mips/vdso/vdso.lds.S
index da4627430aba..d90b65724d78 100644
--- a/arch/mips/vdso/vdso.lds.S
+++ b/arch/mips/vdso/vdso.lds.S
@@ -91,7 +91,7 @@ PHDRS
 VERSION
 {
 	LINUX_2.6 {
-#ifndef DISABLE_MIPS_VDSO
+#ifndef CONFIG_MIPS_DISABLE_VDSO
 	global:
 		__vdso_clock_gettime;
 		__vdso_gettimeofday;
diff --git a/arch/mips/vr41xx/Makefile b/arch/mips/vr41xx/Makefile
new file mode 100644
index 000000000000..765020d5ee4d
--- /dev/null
+++ b/arch/mips/vr41xx/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+obj-$(CONFIG_MACH_VR41XX)	+= common/
+obj-$(CONFIG_CASIO_E55)		+= casio-e55/
+obj-$(CONFIG_IBM_WORKPAD)	+= ibm-workpad/
diff --git a/arch/mips/vr41xx/Platform b/arch/mips/vr41xx/Platform
index b6c8d5c08ddb..3f593a3e5678 100644
--- a/arch/mips/vr41xx/Platform
+++ b/arch/mips/vr41xx/Platform
@@ -1,19 +1,16 @@
 #
 # NEC VR4100 series based machines
 #
-platform-$(CONFIG_MACH_VR41XX)	+= vr41xx/common/
 cflags-$(CONFIG_MACH_VR41XX)	+= -I$(srctree)/arch/mips/include/asm/mach-vr41xx
 
 #
 # CASIO CASSIPEIA E-55/65 (VR4111)
 #
-platform-$(CONFIG_CASIO_E55)	+= vr41xx/casio-e55/
 load-$(CONFIG_CASIO_E55)	+= 0xffffffff80004000
 
 #
 # IBM WorkPad z50 (VR4121)
 #
-platform-$(CONFIG_IBM_WORKPAD)	+= vr41xx/ibm-workpad/
 load-$(CONFIG_IBM_WORKPAD)	+= 0xffffffff80004000
 
 #
diff --git a/arch/nios2/include/asm/checksum.h b/arch/nios2/include/asm/checksum.h
index 703c5ee63421..ec39698d3bea 100644
--- a/arch/nios2/include/asm/checksum.h
+++ b/arch/nios2/include/asm/checksum.h
@@ -14,8 +14,6 @@
 extern __wsum csum_partial(const void *buff, int len, __wsum sum);
 extern __wsum csum_partial_copy(const void *src, void *dst, int len,
 				__wsum sum);
-extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
-					int len, __wsum sum, int *csum_err);
 #define csum_partial_copy_nocheck(src, dst, len, sum)	\
 	csum_partial_copy((src), (dst), (len), (sum))
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 3801a2ef9bca..92128f9164ce 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -68,7 +68,7 @@ config PARISC
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
 	  in many of their workstations & servers (HP9000 700 and 800 series,
 	  and later HP3000 series).  The PA-RISC Linux project home page is
-	  at <http://www.parisc-linux.org/>.
+	  at <https://parisc.wiki.kernel.org>.
 
 config CPU_BIG_ENDIAN
 	def_bool y
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 628cd8bb7ad8..fadbbd010337 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -21,8 +21,6 @@ KBUILD_IMAGE := vmlinuz
 
 NM		= sh $(srctree)/arch/parisc/nm
 CHECKFLAGS	+= -D__hppa__=1
-LIBGCC		= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
-export LIBGCC
 
 ifdef CONFIG_64BIT
 UTS_MACHINE	:= parisc64
@@ -110,6 +108,8 @@ cflags-$(CONFIG_PA8X00)		+= -march=2.0 -mschedule=8000
 head-y			:= arch/parisc/kernel/head.o 
 
 KBUILD_CFLAGS	+= $(cflags-y)
+LIBGCC		:= $(shell $(CC) -print-libgcc-file-name)
+export LIBGCC
 
 kernel-y			:= mm/ kernel/ math-emu/
 
diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile
index 1e5879c6a752..dff453687530 100644
--- a/arch/parisc/boot/compressed/Makefile
+++ b/arch/parisc/boot/compressed/Makefile
@@ -16,6 +16,7 @@ targets += real2.S firmware.c
 
 KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += -fno-strict-aliasing
 KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -fno-builtin-printf
 KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os
 ifndef CONFIG_64BIT
diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h
index c1c22819a04d..fe8c63b2d2c3 100644
--- a/arch/parisc/include/asm/checksum.h
+++ b/arch/parisc/include/asm/checksum.h
@@ -27,13 +27,6 @@ extern __wsum csum_partial(const void *, int, __wsum);
 extern __wsum csum_partial_copy_nocheck(const void *, void *, int, __wsum);
 
 /*
- * this is a new version of the above that records errors it finds in *errp,
- * but continues and zeros the rest of the buffer.
- */
-extern __wsum csum_partial_copy_from_user(const void __user *src,
-		void *dst, int len, __wsum sum, int *errp);
-
-/*
  *	Optimized for IP headers, which always checksum on 4 octet boundaries.
  *
  *	Written by Randolph Chung <tausq@debian.org>, and then mucked with by
diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h
index 09b6f4c1687e..762cfe7778c0 100644
--- a/arch/parisc/include/asm/floppy.h
+++ b/arch/parisc/include/asm/floppy.h
@@ -29,8 +29,8 @@
 #define CSW fd_routine[can_use_virtual_dma & 1]
 
 
-#define fd_inb(port)			readb(port)
-#define fd_outb(value, port)		writeb(value, port)
+#define fd_inb(base, reg)		readb((base) + (reg))
+#define fd_outb(value, base, reg)	writeb(value, (base) + (reg))
 
 #define fd_request_dma()        CSW._request_dma(FLOPPY_DMA,"floppy")
 #define fd_free_dma()           CSW._free_dma(FLOPPY_DMA)
@@ -75,27 +75,28 @@ static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
 		register char *lptr = virtual_dma_addr;
 
 		for (lcount = virtual_dma_count; lcount; lcount--) {
-			st = fd_inb(virtual_dma_port+4) & 0xa0 ;
-			if (st != 0xa0) 
+			st = fd_inb(virtual_dma_port, FD_STATUS);
+			st &= STATUS_DMA | STATUS_READY;
+			if (st != (STATUS_DMA | STATUS_READY))
 				break;
 			if (virtual_dma_mode) {
-				fd_outb(*lptr, virtual_dma_port+5);
+				fd_outb(*lptr, virtual_dma_port, FD_DATA);
 			} else {
-				*lptr = fd_inb(virtual_dma_port+5);
+				*lptr = fd_inb(virtual_dma_port, FD_DATA);
 			}
 			lptr++;
 		}
 		virtual_dma_count = lcount;
 		virtual_dma_addr = lptr;
-		st = fd_inb(virtual_dma_port+4);
+		st = fd_inb(virtual_dma_port, FD_STATUS);
 	}
 
 #ifdef TRACE_FLPY_INT
 	calls++;
 #endif
-	if (st == 0x20)
+	if (st == STATUS_DMA)
 		return;
-	if (!(st & 0x20)) {
+	if (!(st & STATUS_DMA)) {
 		virtual_dma_residue += virtual_dma_count;
 		virtual_dma_count = 0;
 #ifdef TRACE_FLPY_INT
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 9832c73a7021..cd7df48dc874 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -93,10 +93,8 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 
 #define set_pte_at(mm, addr, ptep, pteval)			\
 	do {							\
-		pte_t old_pte;					\
 		unsigned long flags;				\
 		spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\
-		old_pte = *ptep;				\
 		set_pte(ptep, pteval);				\
 		purge_tlb_entries(mm, addr);			\
 		spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 1d976f2ebff0..665b70086685 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -4,7 +4,8 @@
  *
  *	PDC == Processor Dependent Code
  *
- * See http://www.parisc-linux.org/documentation/index.html
+ * See PDC documentation at
+ * https://parisc.wiki.kernel.org/index.php/Technical_Documentation
  * for documentation describing the entry points and calling
  * conventions defined below.
  *
diff --git a/arch/parisc/kernel/hardware.c b/arch/parisc/kernel/hardware.c
index 98c5203c1ab0..17161e72ea29 100644
--- a/arch/parisc/kernel/hardware.c
+++ b/arch/parisc/kernel/hardware.c
@@ -6,7 +6,8 @@
  * 
  *    Based on the document "PA-RISC 1.1 I/O Firmware Architecture 
  *    Reference Specification", March 7, 1999, version 0.96.  This
- *    is available at http://parisc-linux.org/documentation/
+ *    is available at
+ *    https://parisc.wiki.kernel.org/index.php/Technical_Documentation
  *
  *    Copyright 1999 by Alex deVries <alex@onefishtwo.ca>
  *    and copyright 1999 The Puffin Group Inc.
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 1c50093e2ebe..fac18c623d16 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -3,9 +3,9 @@
  *
  *    The best reference for this stuff is probably the Processor-
  *    Specific ELF Supplement for PA-RISC:
- *        http://ftp.parisc-linux.org/docs/arch/elf-pa-hp.pdf
+ *        https://parisc.wiki.kernel.org/index.php/File:Elf-pa-hp.pdf
  *
- *    Linux/PA-RISC Project (http://www.parisc-linux.org/)
+ *    Linux/PA-RISC Project
  *    Copyright (C) 2003 Randolph Chung <tausq at debian . org>
  *    Copyright (C) 2008 Helge Deller <deller@gmx.de>
  *
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index e1a8fee3ad49..d46b6709ec56 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -300,7 +300,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf,
 	else
 		return -EFAULT;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!perfmon_capable())
 		return -EACCES;
 
 	if (count != sizeof(uint32_t))
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 52a15f5cd130..5a758fa6ec52 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -435,3 +435,4 @@
 435	common	clone3				sys_clone3_wrapper
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
index 256322c7b648..c6f161583549 100644
--- a/arch/parisc/lib/checksum.c
+++ b/arch/parisc/lib/checksum.c
@@ -123,23 +123,3 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst,
 	return sum;
 }
 EXPORT_SYMBOL(csum_partial_copy_nocheck);
-
-/*
- * Copy from userspace and compute checksum.  If we catch an exception
- * then zero the rest of the buffer.
- */
-__wsum csum_partial_copy_from_user(const void __user *src,
-					void *dst, int len,
-					__wsum sum, int *err_ptr)
-{
-	int missing;
-
-	missing = copy_from_user(dst, src, len);
-	if (missing) {
-		memset(dst + len - missing, 0, missing);
-		*err_ptr = -EFAULT;
-	}
-		
-	return csum_partial(dst, len, sum);
-}
-EXPORT_SYMBOL(csum_partial_copy_from_user);
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
index 7d1bf2fcf668..c24f605033bd 100644
--- a/arch/powerpc/crypto/md5-glue.c
+++ b/arch/powerpc/crypto/md5-glue.c
@@ -11,7 +11,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/md5.h>
 #include <asm/byteorder.h>
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
index 6379990bd604..cb57be4ada61 100644
--- a/arch/powerpc/crypto/sha1-spe-glue.c
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -11,7 +11,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <asm/byteorder.h>
diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c
index 7b43fc352089..b40dc50a6908 100644
--- a/arch/powerpc/crypto/sha1.c
+++ b/arch/powerpc/crypto/sha1.c
@@ -16,14 +16,13 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <asm/byteorder.h>
 
-extern void powerpc_sha_transform(u32 *state, const u8 *src, u32 *temp);
+void powerpc_sha_transform(u32 *state, const u8 *src);
 
-static int sha1_init(struct shash_desc *desc)
+static int powerpc_sha1_init(struct shash_desc *desc)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
 
@@ -34,8 +33,8 @@ static int sha1_init(struct shash_desc *desc)
 	return 0;
 }
 
-static int sha1_update(struct shash_desc *desc, const u8 *data,
-			unsigned int len)
+static int powerpc_sha1_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
 	unsigned int partial, done;
@@ -47,7 +46,6 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
 	src = data;
 
 	if ((partial + len) > 63) {
-		u32 temp[SHA_WORKSPACE_WORDS];
 
 		if (partial) {
 			done = -partial;
@@ -56,12 +54,11 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
 		}
 
 		do {
-			powerpc_sha_transform(sctx->state, src, temp);
+			powerpc_sha_transform(sctx->state, src);
 			done += 64;
 			src = data + done;
 		} while (done + 63 < len);
 
-		memzero_explicit(temp, sizeof(temp));
 		partial = 0;
 	}
 	memcpy(sctx->buffer + partial, src, len - done);
@@ -71,7 +68,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
 
 
 /* Add padding and return the message digest. */
-static int sha1_final(struct shash_desc *desc, u8 *out)
+static int powerpc_sha1_final(struct shash_desc *desc, u8 *out)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
 	__be32 *dst = (__be32 *)out;
@@ -84,10 +81,10 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
 	/* Pad out to 56 mod 64 */
 	index = sctx->count & 0x3f;
 	padlen = (index < 56) ? (56 - index) : ((64+56) - index);
-	sha1_update(desc, padding, padlen);
+	powerpc_sha1_update(desc, padding, padlen);
 
 	/* Append length */
-	sha1_update(desc, (const u8 *)&bits, sizeof(bits));
+	powerpc_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
 
 	/* Store state in digest */
 	for (i = 0; i < 5; i++)
@@ -99,7 +96,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
 	return 0;
 }
 
-static int sha1_export(struct shash_desc *desc, void *out)
+static int powerpc_sha1_export(struct shash_desc *desc, void *out)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
 
@@ -107,7 +104,7 @@ static int sha1_export(struct shash_desc *desc, void *out)
 	return 0;
 }
 
-static int sha1_import(struct shash_desc *desc, const void *in)
+static int powerpc_sha1_import(struct shash_desc *desc, const void *in)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
 
@@ -117,11 +114,11 @@ static int sha1_import(struct shash_desc *desc, const void *in)
 
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
-	.init		=	sha1_init,
-	.update		=	sha1_update,
-	.final		=	sha1_final,
-	.export		=	sha1_export,
-	.import		=	sha1_import,
+	.init		=	powerpc_sha1_init,
+	.update		=	powerpc_sha1_update,
+	.final		=	powerpc_sha1_final,
+	.export		=	powerpc_sha1_export,
+	.import		=	powerpc_sha1_import,
 	.descsize	=	sizeof(struct sha1_state),
 	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
index 84939e563b81..ceb0b6c980b3 100644
--- a/arch/powerpc/crypto/sha256-spe-glue.c
+++ b/arch/powerpc/crypto/sha256-spe-glue.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <asm/byteorder.h>
diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h
index 167c44b58848..7af9a68fd949 100644
--- a/arch/powerpc/include/asm/floppy.h
+++ b/arch/powerpc/include/asm/floppy.h
@@ -13,8 +13,8 @@
 
 #include <asm/machdep.h>
 
-#define fd_inb(port)		inb_p(port)
-#define fd_outb(value,port)	outb_p(value,port)
+#define fd_inb(base, reg)		inb_p((base) + (reg))
+#define fd_outb(value, base, reg)	outb_p(value, (base) + (reg))
 
 #define fd_enable_dma()         enable_dma(FLOPPY_DMA)
 #define fd_disable_dma()	 fd_ops->_disable_dma(FLOPPY_DMA)
@@ -61,21 +61,22 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
 	st = 1;
 	for (lcount=virtual_dma_count, lptr=virtual_dma_addr;
 	     lcount; lcount--, lptr++) {
-		st=inb(virtual_dma_port+4) & 0xa0 ;
-		if (st != 0xa0)
+		st = inb(virtual_dma_port + FD_STATUS);
+		st &= STATUS_DMA | STATUS_READY;
+		if (st != (STATUS_DMA | STATUS_READY))
 			break;
 		if (virtual_dma_mode)
-			outb_p(*lptr, virtual_dma_port+5);
+			outb_p(*lptr, virtual_dma_port + FD_DATA);
 		else
-			*lptr = inb_p(virtual_dma_port+5);
+			*lptr = inb_p(virtual_dma_port + FD_DATA);
 	}
 	virtual_dma_count = lcount;
 	virtual_dma_addr = lptr;
-	st = inb(virtual_dma_port+4);
+	st = inb(virtual_dma_port + FD_STATUS);
 
-	if (st == 0x20)
+	if (st == STATUS_DMA)
 		return IRQ_HANDLED;
-	if (!(st & 0x20)) {
+	if (!(st & STATUS_DMA)) {
 		virtual_dma_residue += virtual_dma_count;
 		virtual_dma_count=0;
 		doing_vdma = 0;
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 635969b5b58e..13f90dd03450 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -699,10 +699,6 @@ static inline void iosync(void)
  *
  * * iounmap undoes such a mapping and can be hooked
  *
- * * __ioremap_at (and the pending __iounmap_at) are low level functions to
- *   create hand-made mappings for use only by the PCI code and cannot
- *   currently be hooked. Must be page aligned.
- *
  * * __ioremap_caller is the same as above but takes an explicit caller
  *   reference rather than using __builtin_return_address(0)
  *
@@ -719,6 +715,8 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
 
 extern void iounmap(volatile void __iomem *addr);
 
+void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size);
+
 int early_ioremap_range(unsigned long ea, phys_addr_t pa,
 			unsigned long size, pgprot_t prot);
 void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
@@ -727,10 +725,6 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
 extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
 				      pgprot_t prot, void *caller);
 
-extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
-				   unsigned long size, pgprot_t prot);
-extern void __iounmap_at(void *ea, unsigned long size);
-
 /*
  * When CONFIG_PPC_INDIRECT_PIO is set, we use the generic iomap implementation
  * which needs some additional definitions here. They basically allow PIO
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 506e4df2d730..6e5d85ba588d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -78,7 +78,7 @@ struct kvmppc_vcore {
 	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
 	struct list_head preempt_list;
 	spinlock_t lock;
-	struct swait_queue_head wq;
+	struct rcuwait wait;
 	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
 	u64 stolen_tb;
 	u64 preempt_tb;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1dc63101ffe1..337047ba4a56 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -751,7 +751,7 @@ struct kvm_vcpu_arch {
 	u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
 	u32 last_inst;
 
-	struct swait_queue_head *wqp;
+	struct rcuwait *waitp;
 	struct kvmppc_vcore *vcore;
 	int ret;
 	int trap;
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 69f4cb3b7c56..b92e81b256e5 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -66,7 +66,7 @@ struct pci_controller {
 
 	void __iomem *io_base_virt;
 #ifdef CONFIG_PPC64
-	void *io_base_alloc;
+	void __iomem *io_base_alloc;
 #endif
 	resource_size_t io_base_phys;
 	resource_size_t pci_io_size;
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
index be48c2215fa2..a809b1b44ddf 100644
--- a/arch/powerpc/include/uapi/asm/kvm_para.h
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -31,7 +31,7 @@
  * Struct fields are always 32 or 64 bit aligned, depending on them being 32
  * or 64 bit wide respectively.
  *
- * See Documentation/virt/kvm/ppc-pv.txt
+ * See Documentation/virt/kvm/ppc-pv.rst
  */
 struct kvm_vcpu_arch_shared {
 	__u64 scratch1;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 1f1169856dc8..112d150354b2 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -748,9 +748,8 @@ void do_IRQ(struct pt_regs *regs)
 
 static void *__init alloc_vm_stack(void)
 {
-	return __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START,
-				    VMALLOC_END, THREADINFO_GFP, PAGE_KERNEL,
-				     0, NUMA_NO_NODE, (void*)_RET_IP_);
+	return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP,
+			      NUMA_NO_NODE, (void *)_RET_IP_);
 }
 
 static void __init vmap_irqstack_init(void)
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 773671b512df..2257d24e6a26 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/notifier.h>
+#include <linux/vmalloc.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -38,6 +39,22 @@ EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
 #define ISA_SPACE_MASK 0x1
 #define ISA_SPACE_IO 0x1
 
+static void remap_isa_base(phys_addr_t pa, unsigned long size)
+{
+	WARN_ON_ONCE(ISA_IO_BASE & ~PAGE_MASK);
+	WARN_ON_ONCE(pa & ~PAGE_MASK);
+	WARN_ON_ONCE(size & ~PAGE_MASK);
+
+	if (slab_is_available()) {
+		if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa,
+				pgprot_noncached(PAGE_KERNEL)))
+			unmap_kernel_range(ISA_IO_BASE, size);
+	} else {
+		early_ioremap_range(ISA_IO_BASE, pa, size,
+				pgprot_noncached(PAGE_KERNEL));
+	}
+}
+
 static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
 				      unsigned long phb_io_base_phys)
 {
@@ -105,15 +122,13 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
 	if (size > 0x10000)
 		size = 0x10000;
 
-	__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
-		     size, pgprot_noncached(PAGE_KERNEL));
+	remap_isa_base(phb_io_base_phys, size);
 	return;
 
 inval_range:
 	printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
 	       "mapping 64k\n");
-	__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
-		     0x10000, pgprot_noncached(PAGE_KERNEL));
+	remap_isa_base(phb_io_base_phys, 0x10000);
 }
 
 
@@ -248,8 +263,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
 	 * and map it
 	 */
 	isa_io_base = ISA_IO_BASE;
-	__ioremap_at(pbase, (void *)ISA_IO_BASE,
-		     size, pgprot_noncached(PAGE_KERNEL));
+	remap_isa_base(pbase, size);
 
 	pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
 }
@@ -297,7 +311,7 @@ static void isa_bridge_remove(void)
 	isa_bridge_pcidev = NULL;
 
 	/* Unmap the ISA area */
-	__iounmap_at((void *)ISA_IO_BASE, 0x10000);
+	unmap_kernel_range(ISA_IO_BASE, 0x10000);
 }
 
 /**
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index fb4f61096613..0cd1c88bfc8b 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -655,9 +655,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
 	int rc = -1;
 
 	switch (reason) {
-	case KMSG_DUMP_RESTART:
-	case KMSG_DUMP_HALT:
-	case KMSG_DUMP_POWEROFF:
+	case KMSG_DUMP_SHUTDOWN:
 		/* These are almost always orderly shutdowns. */
 		return;
 	case KMSG_DUMP_OOPS:
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index f83d1f69b1dd..d9ac980c398c 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -109,23 +109,47 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
 	/* Get the host bridge */
 	hose = pci_bus_to_host(bus);
 
-	/* Check if we have IOs allocated */
-	if (hose->io_base_alloc == NULL)
-		return 0;
-
 	pr_debug("IO unmapping for PHB %pOF\n", hose->dn);
 	pr_debug("  alloc=0x%p\n", hose->io_base_alloc);
 
-	/* This is a PHB, we fully unmap the IO area */
-	vunmap(hose->io_base_alloc);
-
+	iounmap(hose->io_base_alloc);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
 
-static int pcibios_map_phb_io_space(struct pci_controller *hose)
+void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size)
 {
 	struct vm_struct *area;
+	unsigned long addr;
+
+	WARN_ON_ONCE(paddr & ~PAGE_MASK);
+	WARN_ON_ONCE(size & ~PAGE_MASK);
+
+	/*
+	 * Let's allocate some IO space for that guy. We don't pass VM_IOREMAP
+	 * because we don't care about alignment tricks that the core does in
+	 * that case.  Maybe we should due to stupid card with incomplete
+	 * address decoding but I'd rather not deal with those outside of the
+	 * reserved 64K legacy region.
+	 */
+	area = __get_vm_area_caller(size, 0, PHB_IO_BASE, PHB_IO_END,
+				    __builtin_return_address(0));
+	if (!area)
+		return NULL;
+
+	addr = (unsigned long)area->addr;
+	if (ioremap_page_range(addr, addr + size, paddr,
+			pgprot_noncached(PAGE_KERNEL))) {
+		unmap_kernel_range(addr, size);
+		return NULL;
+	}
+
+	return (void __iomem *)addr;
+}
+EXPORT_SYMBOL_GPL(ioremap_phb);
+
+static int pcibios_map_phb_io_space(struct pci_controller *hose)
+{
 	unsigned long phys_page;
 	unsigned long size_page;
 	unsigned long io_virt_offset;
@@ -146,12 +170,11 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
 	 * with incomplete address decoding but I'd rather not deal with
 	 * those outside of the reserved 64K legacy region.
 	 */
-	area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END);
-	if (area == NULL)
+	hose->io_base_alloc = ioremap_phb(phys_page, size_page);
+	if (!hose->io_base_alloc)
 		return -ENOMEM;
-	hose->io_base_alloc = area->addr;
-	hose->io_base_virt = (void __iomem *)(area->addr +
-					      hose->io_base_phys - phys_page);
+	hose->io_base_virt = hose->io_base_alloc +
+				hose->io_base_phys - phys_page;
 
 	pr_debug("IO mapping for PHB %pOF\n", hose->dn);
 	pr_debug("  phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
@@ -159,11 +182,6 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
 	pr_debug("  size=0x%016llx (alloc=0x%016lx)\n",
 		 hose->pci_io_size, size_page);
 
-	/* Establish the mapping */
-	if (__ioremap_at(phys_page, area->addr, size_page,
-			 pgprot_noncached(PAGE_KERNEL)) == NULL)
-		return -ENOMEM;
-
 	/* Fixup hose IO resource */
 	io_virt_offset = pcibios_io_space_offset(hose);
 	hose->io_resource.start += io_virt_offset;
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 220ae11555f2..f833a3190822 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -527,3 +527,4 @@
 435	spu	clone3				sys_ni_syscall
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 3fca22276bb1..b44dd75de517 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -441,15 +441,9 @@ nonrecoverable:
 void system_reset_exception(struct pt_regs *regs)
 {
 	unsigned long hsrr0, hsrr1;
-	bool nested = in_nmi();
 	bool saved_hsrrs = false;
 
-	/*
-	 * Avoid crashes in case of nested NMI exceptions. Recoverability
-	 * is determined by RI and in_nmi
-	 */
-	if (!nested)
-		nmi_enter();
+	nmi_enter();
 
 	/*
 	 * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
@@ -521,8 +515,7 @@ out:
 		mtspr(SPRN_HSRR1, hsrr1);
 	}
 
-	if (!nested)
-		nmi_exit();
+	nmi_exit();
 
 	/* What should we do here? We could issue a shutdown or hard reset. */
 }
@@ -823,9 +816,8 @@ int machine_check_generic(struct pt_regs *regs)
 void machine_check_exception(struct pt_regs *regs)
 {
 	int recover = 0;
-	bool nested = in_nmi();
-	if (!nested)
-		nmi_enter();
+
+	nmi_enter();
 
 	__this_cpu_inc(irq_stat.mce_exceptions);
 
@@ -851,8 +843,7 @@ void machine_check_exception(struct pt_regs *regs)
 	if (check_io_access(regs))
 		goto bail;
 
-	if (!nested)
-		nmi_exit();
+	nmi_exit();
 
 	die("Machine check", regs, SIGBUS);
 
@@ -863,8 +854,7 @@ void machine_check_exception(struct pt_regs *regs)
 	return;
 
 bail:
-	if (!nested)
-		nmi_exit();
+	nmi_exit();
 }
 
 void SMIException(struct pt_regs *regs)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 31a0f201fb6f..a1706b63b82d 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -90,6 +90,7 @@ SECTIONS
 #ifdef CONFIG_PPC64
 		*(.tramp.ftrace.text);
 #endif
+		NOINSTR_TEXT
 		SCHED_TEXT
 		CPUIDLE_TEXT
 		LOCK_TEXT
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 5690a1f9b976..37508a356f28 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -36,41 +36,38 @@
 #include "book3s.h"
 #include "trace.h"
 
-#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
-#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
-
 /* #define EXIT_DEBUG */
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ "exits",       VCPU_STAT(sum_exits) },
-	{ "mmio",        VCPU_STAT(mmio_exits) },
-	{ "sig",         VCPU_STAT(signal_exits) },
-	{ "sysc",        VCPU_STAT(syscall_exits) },
-	{ "inst_emu",    VCPU_STAT(emulated_inst_exits) },
-	{ "dec",         VCPU_STAT(dec_exits) },
-	{ "ext_intr",    VCPU_STAT(ext_intr_exits) },
-	{ "queue_intr",  VCPU_STAT(queue_intr) },
-	{ "halt_poll_success_ns",	VCPU_STAT(halt_poll_success_ns) },
-	{ "halt_poll_fail_ns",		VCPU_STAT(halt_poll_fail_ns) },
-	{ "halt_wait_ns",		VCPU_STAT(halt_wait_ns) },
-	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
-	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
-	{ "halt_successful_wait",	VCPU_STAT(halt_successful_wait) },
-	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
-	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
-	{ "pf_storage",  VCPU_STAT(pf_storage) },
-	{ "sp_storage",  VCPU_STAT(sp_storage) },
-	{ "pf_instruc",  VCPU_STAT(pf_instruc) },
-	{ "sp_instruc",  VCPU_STAT(sp_instruc) },
-	{ "ld",          VCPU_STAT(ld) },
-	{ "ld_slow",     VCPU_STAT(ld_slow) },
-	{ "st",          VCPU_STAT(st) },
-	{ "st_slow",     VCPU_STAT(st_slow) },
-	{ "pthru_all",       VCPU_STAT(pthru_all) },
-	{ "pthru_host",      VCPU_STAT(pthru_host) },
-	{ "pthru_bad_aff",   VCPU_STAT(pthru_bad_aff) },
-	{ "largepages_2M",    VM_STAT(num_2M_pages, .mode = 0444) },
-	{ "largepages_1G",    VM_STAT(num_1G_pages, .mode = 0444) },
+	VCPU_STAT("exits", sum_exits),
+	VCPU_STAT("mmio", mmio_exits),
+	VCPU_STAT("sig", signal_exits),
+	VCPU_STAT("sysc", syscall_exits),
+	VCPU_STAT("inst_emu", emulated_inst_exits),
+	VCPU_STAT("dec", dec_exits),
+	VCPU_STAT("ext_intr", ext_intr_exits),
+	VCPU_STAT("queue_intr", queue_intr),
+	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+	VCPU_STAT("halt_wait_ns", halt_wait_ns),
+	VCPU_STAT("halt_successful_poll", halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+	VCPU_STAT("halt_successful_wait", halt_successful_wait),
+	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("pf_storage", pf_storage),
+	VCPU_STAT("sp_storage", sp_storage),
+	VCPU_STAT("pf_instruc", pf_instruc),
+	VCPU_STAT("sp_instruc", sp_instruc),
+	VCPU_STAT("ld", ld),
+	VCPU_STAT("ld_slow", ld_slow),
+	VCPU_STAT("st", st),
+	VCPU_STAT("st_slow", st_slow),
+	VCPU_STAT("pthru_all", pthru_all),
+	VCPU_STAT("pthru_host", pthru_host),
+	VCPU_STAT("pthru_bad_aff", pthru_bad_aff),
+	VM_STAT("largepages_2M", num_2M_pages, .mode = 0444),
+	VM_STAT("largepages_1G", num_1G_pages, .mode = 0444),
 	{ NULL }
 };
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 93493f0cbfe8..7f59c47a5b9d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -230,13 +230,11 @@ static bool kvmppc_ipi_thread(int cpu)
 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
 	int cpu;
-	struct swait_queue_head *wqp;
+	struct rcuwait *waitp;
 
-	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (swq_has_sleeper(wqp)) {
-		swake_up_one(wqp);
+	waitp = kvm_arch_vcpu_get_wait(vcpu);
+	if (rcuwait_wake_up(waitp))
 		++vcpu->stat.halt_wakeup;
-	}
 
 	cpu = READ_ONCE(vcpu->arch.thread_cpu);
 	if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -2125,7 +2123,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
 
 	spin_lock_init(&vcore->lock);
 	spin_lock_init(&vcore->stoltb_lock);
-	init_swait_queue_head(&vcore->wq);
+	rcuwait_init(&vcore->wait);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
 	vcore->first_vcpuid = id;
@@ -3784,7 +3782,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 	ktime_t cur, start_poll, start_wait;
 	int do_sleep = 1;
 	u64 block_ns;
-	DECLARE_SWAITQUEUE(wait);
 
 	/* Poll for pending exceptions and ceded state */
 	cur = start_poll = ktime_get();
@@ -3812,10 +3809,10 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 		}
 	}
 
-	prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
-
+	prepare_to_rcuwait(&vc->wait);
+	set_current_state(TASK_INTERRUPTIBLE);
 	if (kvmppc_vcore_check_block(vc)) {
-		finish_swait(&vc->wq, &wait);
+		finish_rcuwait(&vc->wait);
 		do_sleep = 0;
 		/* If we polled, count this as a successful poll */
 		if (vc->halt_poll_ns)
@@ -3829,7 +3826,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 	trace_kvmppc_vcore_blocked(vc, 0);
 	spin_unlock(&vc->lock);
 	schedule();
-	finish_swait(&vc->wq, &wait);
+	finish_rcuwait(&vc->wait);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
 	trace_kvmppc_vcore_blocked(vc, 1);
@@ -3940,7 +3937,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			kvmppc_start_thread(vcpu, vc);
 			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
-			swake_up_one(&vc->wq);
+		        rcuwait_wake_up(&vc->wait);
 		}
 
 	}
@@ -4279,7 +4276,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 	user_vrsave = mfspr(SPRN_VRSAVE);
 
-	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+	vcpu->arch.waitp = &vcpu->arch.vcore->wait;
 	vcpu->arch.pgdir = kvm->mm->pgd;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 6c18ea88fd25..888afe8d35cc 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -35,29 +35,28 @@
 
 unsigned long kvmppc_booke_handlers;
 
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ "mmio",       VCPU_STAT(mmio_exits) },
-	{ "sig",        VCPU_STAT(signal_exits) },
-	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
-	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
-	{ "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
-	{ "dtlb_v",     VCPU_STAT(dtlb_virt_miss_exits) },
-	{ "sysc",       VCPU_STAT(syscall_exits) },
-	{ "isi",        VCPU_STAT(isi_exits) },
-	{ "dsi",        VCPU_STAT(dsi_exits) },
-	{ "inst_emu",   VCPU_STAT(emulated_inst_exits) },
-	{ "dec",        VCPU_STAT(dec_exits) },
-	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
-	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
-	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
-	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
-	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
-	{ "doorbell", VCPU_STAT(dbell_exits) },
-	{ "guest doorbell", VCPU_STAT(gdbell_exits) },
-	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
+	VCPU_STAT("mmio", mmio_exits),
+	VCPU_STAT("sig", signal_exits),
+	VCPU_STAT("itlb_r", itlb_real_miss_exits),
+	VCPU_STAT("itlb_v", itlb_virt_miss_exits),
+	VCPU_STAT("dtlb_r", dtlb_real_miss_exits),
+	VCPU_STAT("dtlb_v", dtlb_virt_miss_exits),
+	VCPU_STAT("sysc", syscall_exits),
+	VCPU_STAT("isi", isi_exits),
+	VCPU_STAT("dsi", dsi_exits),
+	VCPU_STAT("inst_emu", emulated_inst_exits),
+	VCPU_STAT("dec", dec_exits),
+	VCPU_STAT("ext_intr", ext_intr_exits),
+	VCPU_STAT("halt_successful_poll", halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("doorbell", dbell_exits),
+	VCPU_STAT("guest doorbell", gdbell_exits),
+	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+	VM_STAT("remote_tlb_flush", remote_tlb_flush),
 	{ NULL }
 };
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ad2f172c26a6..27ccff612903 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -752,7 +752,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	if (err)
 		goto out_vcpu_uninit;
 
-	vcpu->arch.wqp = &vcpu->wq;
+	vcpu->arch.waitp = &vcpu->wait;
 	kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
 	return 0;
 
@@ -1765,8 +1765,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	return r;
 }
 
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
+	struct kvm_run *run = vcpu->run;
 	int r;
 
 	vcpu_load(vcpu);
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
index 50a99d9684f7..ba5cbb0d66bd 100644
--- a/arch/powerpc/mm/ioremap_64.c
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -4,56 +4,6 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
-/**
- * Low level function to establish the page tables for an IO mapping
- */
-void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
-{
-	int ret;
-	unsigned long va = (unsigned long)ea;
-
-	/* We don't support the 4K PFN hack with ioremap */
-	if (pgprot_val(prot) & H_PAGE_4K_PFN)
-		return NULL;
-
-	if ((ea + size) >= (void *)IOREMAP_END) {
-		pr_warn("Outside the supported range\n");
-		return NULL;
-	}
-
-	WARN_ON(pa & ~PAGE_MASK);
-	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
-	WARN_ON(size & ~PAGE_MASK);
-
-	if (slab_is_available()) {
-		ret = ioremap_page_range(va, va + size, pa, prot);
-		if (ret)
-			unmap_kernel_range(va, size);
-	} else {
-		ret = early_ioremap_range(va, pa, size, prot);
-	}
-
-	if (ret)
-		return NULL;
-
-	return (void __iomem *)ea;
-}
-EXPORT_SYMBOL(__ioremap_at);
-
-/**
- * Low level function to tear down the page tables for an IO mapping. This is
- * used for mappings that are manipulated manually, like partial unmapping of
- * PCI IOs or ISA space.
- */
-void __iounmap_at(void *ea, unsigned long size)
-{
-	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
-	WARN_ON(size & ~PAGE_MASK);
-
-	unmap_kernel_range((unsigned long)ea, size);
-}
-EXPORT_SYMBOL(__iounmap_at);
-
 void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
 			       pgprot_t prot, void *caller)
 {
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index eb82dda884e5..0edcfd0b491d 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -976,7 +976,7 @@ static int thread_imc_event_init(struct perf_event *event)
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!perfmon_capable())
 		return -EACCES;
 
 	/* Sampling not supported */
@@ -1412,7 +1412,7 @@ static int trace_imc_event_init(struct perf_event *event)
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!perfmon_capable())
 		return -EACCES;
 
 	/* Return if this is a couting event */
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 8b3296b62f65..3b75e8f60609 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -21,22 +21,6 @@
 
 #include "spufs.h"
 
-static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
-				size_t size, loff_t *off)
-{
-	u64 data;
-	int ret;
-
-	if (spufs_coredump_read[num].read)
-		return spufs_coredump_read[num].read(ctx, buffer, size, off);
-
-	data = spufs_coredump_read[num].get(ctx);
-	ret = snprintf(buffer, size, "0x%.16llx", data);
-	if (ret >= size)
-		return size;
-	return ++ret; /* count trailing NULL */
-}
-
 static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
 {
 	int i, sz, total = 0;
@@ -118,58 +102,43 @@ int spufs_coredump_extra_notes_size(void)
 static int spufs_arch_write_note(struct spu_context *ctx, int i,
 				  struct coredump_params *cprm, int dfd)
 {
-	loff_t pos = 0;
-	int sz, rc, total = 0;
-	const int bufsz = PAGE_SIZE;
-	char *name;
-	char fullname[80], *buf;
+	size_t sz = spufs_coredump_read[i].size;
+	char fullname[80];
 	struct elf_note en;
-	size_t skip;
-
-	buf = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
+	size_t ret;
 
-	name = spufs_coredump_read[i].name;
-	sz = spufs_coredump_read[i].size;
-
-	sprintf(fullname, "SPU/%d/%s", dfd, name);
+	sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name);
 	en.n_namesz = strlen(fullname) + 1;
 	en.n_descsz = sz;
 	en.n_type = NT_SPU;
 
 	if (!dump_emit(cprm, &en, sizeof(en)))
-		goto Eio;
-
+		return -EIO;
 	if (!dump_emit(cprm, fullname, en.n_namesz))
-		goto Eio;
-
+		return -EIO;
 	if (!dump_align(cprm, 4))
-		goto Eio;
-
-	do {
-		rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
-		if (rc > 0) {
-			if (!dump_emit(cprm, buf, rc))
-				goto Eio;
-			total += rc;
-		}
-	} while (rc == bufsz && total < sz);
-
-	if (rc < 0)
-		goto out;
-
-	skip = roundup(cprm->pos - total + sz, 4) - cprm->pos;
-	if (!dump_skip(cprm, skip))
-		goto Eio;
-
-	rc = 0;
-out:
-	free_page((unsigned long)buf);
-	return rc;
-Eio:
-	free_page((unsigned long)buf);
-	return -EIO;
+		return -EIO;
+
+	if (spufs_coredump_read[i].dump) {
+		ret = spufs_coredump_read[i].dump(ctx, cprm);
+		if (ret < 0)
+			return ret;
+	} else {
+		char buf[32];
+
+		ret = snprintf(buf, sizeof(buf), "0x%.16llx",
+			       spufs_coredump_read[i].get(ctx));
+		if (ret >= sizeof(buf))
+			return sizeof(buf);
+
+		/* count trailing the NULL: */
+		if (!dump_emit(cprm, buf, ret + 1))
+			return -EIO;
+	}
+
+	if (!dump_skip(cprm, roundup(cprm->pos - ret + sz, 4) - cprm->pos))
+		return -EIO;
+	return 0;
 }
 
 int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index c0f950a3f4e1..e44427c24585 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -9,6 +9,7 @@
 
 #undef DEBUG
 
+#include <linux/coredump.h>
 #include <linux/fs.h>
 #include <linux/ioctl.h>
 #include <linux/export.h>
@@ -129,6 +130,14 @@ out:
 	return ret;
 }
 
+static ssize_t spufs_dump_emit(struct coredump_params *cprm, void *buf,
+		size_t size)
+{
+	if (!dump_emit(cprm, buf, size))
+		return -EIO;
+	return size;
+}
+
 #define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)	\
 static int __fops ## _open(struct inode *inode, struct file *file)	\
 {									\
@@ -172,12 +181,9 @@ spufs_mem_release(struct inode *inode, struct file *file)
 }
 
 static ssize_t
-__spufs_mem_read(struct spu_context *ctx, char __user *buffer,
-			size_t size, loff_t *pos)
+spufs_mem_dump(struct spu_context *ctx, struct coredump_params *cprm)
 {
-	char *local_store = ctx->ops->get_ls(ctx);
-	return simple_read_from_buffer(buffer, size, pos, local_store,
-					LS_SIZE);
+	return spufs_dump_emit(cprm, ctx->ops->get_ls(ctx), LS_SIZE);
 }
 
 static ssize_t
@@ -190,7 +196,8 @@ spufs_mem_read(struct file *file, char __user *buffer,
 	ret = spu_acquire(ctx);
 	if (ret)
 		return ret;
-	ret = __spufs_mem_read(ctx, buffer, size, pos);
+	ret = simple_read_from_buffer(buffer, size, pos, ctx->ops->get_ls(ctx),
+				      LS_SIZE);
 	spu_release(ctx);
 
 	return ret;
@@ -459,12 +466,10 @@ spufs_regs_open(struct inode *inode, struct file *file)
 }
 
 static ssize_t
-__spufs_regs_read(struct spu_context *ctx, char __user *buffer,
-			size_t size, loff_t *pos)
+spufs_regs_dump(struct spu_context *ctx, struct coredump_params *cprm)
 {
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	return simple_read_from_buffer(buffer, size, pos,
-				      lscsa->gprs, sizeof lscsa->gprs);
+	return spufs_dump_emit(cprm, ctx->csa.lscsa->gprs,
+			       sizeof(ctx->csa.lscsa->gprs));
 }
 
 static ssize_t
@@ -482,7 +487,8 @@ spufs_regs_read(struct file *file, char __user *buffer,
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
-	ret = __spufs_regs_read(ctx, buffer, size, pos);
+	ret = simple_read_from_buffer(buffer, size, pos, ctx->csa.lscsa->gprs,
+				      sizeof(ctx->csa.lscsa->gprs));
 	spu_release_saved(ctx);
 	return ret;
 }
@@ -517,12 +523,10 @@ static const struct file_operations spufs_regs_fops = {
 };
 
 static ssize_t
-__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer,
-			size_t size, loff_t * pos)
+spufs_fpcr_dump(struct spu_context *ctx, struct coredump_params *cprm)
 {
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	return simple_read_from_buffer(buffer, size, pos,
-				      &lscsa->fpcr, sizeof(lscsa->fpcr));
+	return spufs_dump_emit(cprm, &ctx->csa.lscsa->fpcr,
+			       sizeof(ctx->csa.lscsa->fpcr));
 }
 
 static ssize_t
@@ -535,7 +539,8 @@ spufs_fpcr_read(struct file *file, char __user * buffer,
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
-	ret = __spufs_fpcr_read(ctx, buffer, size, pos);
+	ret = simple_read_from_buffer(buffer, size, pos, &ctx->csa.lscsa->fpcr,
+				      sizeof(ctx->csa.lscsa->fpcr));
 	spu_release_saved(ctx);
 	return ret;
 }
@@ -590,17 +595,12 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	u32 mbox_data, __user *udata;
+	u32 mbox_data, __user *udata = (void __user *)buf;
 	ssize_t count;
 
 	if (len < 4)
 		return -EINVAL;
 
-	if (!access_ok(buf, len))
-		return -EFAULT;
-
-	udata = (void __user *)buf;
-
 	count = spu_acquire(ctx);
 	if (count)
 		return count;
@@ -616,7 +616,7 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
 		 * but still need to return the data we have
 		 * read successfully so far.
 		 */
-		ret = __put_user(mbox_data, udata);
+		ret = put_user(mbox_data, udata);
 		if (ret) {
 			if (!count)
 				count = -EFAULT;
@@ -698,17 +698,12 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	u32 ibox_data, __user *udata;
+	u32 ibox_data, __user *udata = (void __user *)buf;
 	ssize_t count;
 
 	if (len < 4)
 		return -EINVAL;
 
-	if (!access_ok(buf, len))
-		return -EFAULT;
-
-	udata = (void __user *)buf;
-
 	count = spu_acquire(ctx);
 	if (count)
 		goto out;
@@ -727,7 +722,7 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
 	}
 
 	/* if we can't write at all, return -EFAULT */
-	count = __put_user(ibox_data, udata);
+	count = put_user(ibox_data, udata);
 	if (count)
 		goto out_unlock;
 
@@ -741,7 +736,7 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
 		 * but still need to return the data we have
 		 * read successfully so far.
 		 */
-		ret = __put_user(ibox_data, udata);
+		ret = put_user(ibox_data, udata);
 		if (ret)
 			break;
 	}
@@ -836,17 +831,13 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	u32 wbox_data, __user *udata;
+	u32 wbox_data, __user *udata = (void __user *)buf;
 	ssize_t count;
 
 	if (len < 4)
 		return -EINVAL;
 
-	udata = (void __user *)buf;
-	if (!access_ok(buf, len))
-		return -EFAULT;
-
-	if (__get_user(wbox_data, udata))
+	if (get_user(wbox_data, udata))
 		return -EFAULT;
 
 	count = spu_acquire(ctx);
@@ -873,7 +864,7 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
 	/* write as much as possible */
 	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
 		int ret;
-		ret = __get_user(wbox_data, udata);
+		ret = get_user(wbox_data, udata);
 		if (ret)
 			break;
 
@@ -967,28 +958,26 @@ spufs_signal1_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
-			size_t len, loff_t *pos)
+static ssize_t spufs_signal1_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
 {
-	int ret = 0;
-	u32 data;
+	if (!ctx->csa.spu_chnlcnt_RW[3])
+		return 0;
+	return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[3],
+			       sizeof(ctx->csa.spu_chnldata_RW[3]));
+}
 
-	if (len < 4)
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
+			size_t len)
+{
+	if (len < sizeof(ctx->csa.spu_chnldata_RW[3]))
 		return -EINVAL;
-
-	if (ctx->csa.spu_chnlcnt_RW[3]) {
-		data = ctx->csa.spu_chnldata_RW[3];
-		ret = 4;
-	}
-
-	if (!ret)
-		goto out;
-
-	if (copy_to_user(buf, &data, 4))
+	if (!ctx->csa.spu_chnlcnt_RW[3])
+		return 0;
+	if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[3],
+			 sizeof(ctx->csa.spu_chnldata_RW[3])))
 		return -EFAULT;
-
-out:
-	return ret;
+	return sizeof(ctx->csa.spu_chnldata_RW[3]);
 }
 
 static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
@@ -1000,7 +989,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
-	ret = __spufs_signal1_read(ctx, buf, len, pos);
+	ret = __spufs_signal1_read(ctx, buf, len);
 	spu_release_saved(ctx);
 
 	return ret;
@@ -1104,28 +1093,26 @@ spufs_signal2_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
-			size_t len, loff_t *pos)
+static ssize_t spufs_signal2_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
 {
-	int ret = 0;
-	u32 data;
+	if (!ctx->csa.spu_chnlcnt_RW[4])
+		return 0;
+	return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[4],
+			       sizeof(ctx->csa.spu_chnldata_RW[4]));
+}
 
-	if (len < 4)
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
+			size_t len)
+{
+	if (len < sizeof(ctx->csa.spu_chnldata_RW[4]))
 		return -EINVAL;
-
-	if (ctx->csa.spu_chnlcnt_RW[4]) {
-		data =  ctx->csa.spu_chnldata_RW[4];
-		ret = 4;
-	}
-
-	if (!ret)
-		goto out;
-
-	if (copy_to_user(buf, &data, 4))
+	if (!ctx->csa.spu_chnlcnt_RW[4])
+		return 0;
+	if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[4],
+			 sizeof(ctx->csa.spu_chnldata_RW[4])))
 		return -EFAULT;
-
-out:
-	return ret;
+	return sizeof(ctx->csa.spu_chnldata_RW[4]);
 }
 
 static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
@@ -1137,7 +1124,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
-	ret = __spufs_signal2_read(ctx, buf, len, pos);
+	ret = __spufs_signal2_read(ctx, buf, len);
 	spu_release_saved(ctx);
 
 	return ret;
@@ -1961,38 +1948,36 @@ static const struct file_operations spufs_caps_fops = {
 	.release	= single_release,
 };
 
-static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
-			char __user *buf, size_t len, loff_t *pos)
+static ssize_t spufs_mbox_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
 {
-	u32 data;
-
-	/* EOF if there's no entry in the mbox */
 	if (!(ctx->csa.prob.mb_stat_R & 0x0000ff))
 		return 0;
-
-	data = ctx->csa.prob.pu_mb_R;
-
-	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+	return spufs_dump_emit(cprm, &ctx->csa.prob.pu_mb_R,
+			       sizeof(ctx->csa.prob.pu_mb_R));
 }
 
 static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
 				   size_t len, loff_t *pos)
 {
-	int ret;
 	struct spu_context *ctx = file->private_data;
-
-	if (!access_ok(buf, len))
-		return -EFAULT;
+	u32 stat, data;
+	int ret;
 
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
 	spin_lock(&ctx->csa.register_lock);
-	ret = __spufs_mbox_info_read(ctx, buf, len, pos);
+	stat = ctx->csa.prob.mb_stat_R;
+	data = ctx->csa.prob.pu_mb_R;
 	spin_unlock(&ctx->csa.register_lock);
 	spu_release_saved(ctx);
 
-	return ret;
+	/* EOF if there's no entry in the mbox */
+	if (!(stat & 0x0000ff))
+		return 0;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof(data));
 }
 
 static const struct file_operations spufs_mbox_info_fops = {
@@ -2001,38 +1986,36 @@ static const struct file_operations spufs_mbox_info_fops = {
 	.llseek  = generic_file_llseek,
 };
 
-static ssize_t __spufs_ibox_info_read(struct spu_context *ctx,
-				char __user *buf, size_t len, loff_t *pos)
+static ssize_t spufs_ibox_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
 {
-	u32 data;
-
-	/* EOF if there's no entry in the ibox */
 	if (!(ctx->csa.prob.mb_stat_R & 0xff0000))
 		return 0;
-
-	data = ctx->csa.priv2.puint_mb_R;
-
-	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+	return spufs_dump_emit(cprm, &ctx->csa.priv2.puint_mb_R,
+			       sizeof(ctx->csa.priv2.puint_mb_R));
 }
 
 static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
 				   size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
+	u32 stat, data;
 	int ret;
 
-	if (!access_ok(buf, len))
-		return -EFAULT;
-
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
 	spin_lock(&ctx->csa.register_lock);
-	ret = __spufs_ibox_info_read(ctx, buf, len, pos);
+	stat = ctx->csa.prob.mb_stat_R;
+	data = ctx->csa.priv2.puint_mb_R;
 	spin_unlock(&ctx->csa.register_lock);
 	spu_release_saved(ctx);
 
-	return ret;
+	/* EOF if there's no entry in the ibox */
+	if (!(stat & 0xff0000))
+		return 0;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof(data));
 }
 
 static const struct file_operations spufs_ibox_info_fops = {
@@ -2041,41 +2024,36 @@ static const struct file_operations spufs_ibox_info_fops = {
 	.llseek  = generic_file_llseek,
 };
 
-static ssize_t __spufs_wbox_info_read(struct spu_context *ctx,
-			char __user *buf, size_t len, loff_t *pos)
+static size_t spufs_wbox_info_cnt(struct spu_context *ctx)
 {
-	int i, cnt;
-	u32 data[4];
-	u32 wbox_stat;
-
-	wbox_stat = ctx->csa.prob.mb_stat_R;
-	cnt = 4 - ((wbox_stat & 0x00ff00) >> 8);
-	for (i = 0; i < cnt; i++) {
-		data[i] = ctx->csa.spu_mailbox_data[i];
-	}
+	return (4 - ((ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8)) * sizeof(u32);
+}
 
-	return simple_read_from_buffer(buf, len, pos, &data,
-				cnt * sizeof(u32));
+static ssize_t spufs_wbox_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	return spufs_dump_emit(cprm, &ctx->csa.spu_mailbox_data,
+			spufs_wbox_info_cnt(ctx));
 }
 
 static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
 				   size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	int ret;
-
-	if (!access_ok(buf, len))
-		return -EFAULT;
+	u32 data[ARRAY_SIZE(ctx->csa.spu_mailbox_data)];
+	int ret, count;
 
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
 	spin_lock(&ctx->csa.register_lock);
-	ret = __spufs_wbox_info_read(ctx, buf, len, pos);
+	count = spufs_wbox_info_cnt(ctx);
+	memcpy(&data, &ctx->csa.spu_mailbox_data, sizeof(data));
 	spin_unlock(&ctx->csa.register_lock);
 	spu_release_saved(ctx);
 
-	return ret;
+	return simple_read_from_buffer(buf, len, pos, &data,
+				count * sizeof(u32));
 }
 
 static const struct file_operations spufs_wbox_info_fops = {
@@ -2084,50 +2062,53 @@ static const struct file_operations spufs_wbox_info_fops = {
 	.llseek  = generic_file_llseek,
 };
 
-static ssize_t __spufs_dma_info_read(struct spu_context *ctx,
-			char __user *buf, size_t len, loff_t *pos)
+static void spufs_get_dma_info(struct spu_context *ctx,
+		struct spu_dma_info *info)
 {
-	struct spu_dma_info info;
-	struct mfc_cq_sr *qp, *spuqp;
 	int i;
 
-	info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
-	info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
-	info.dma_info_status = ctx->csa.spu_chnldata_RW[24];
-	info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
-	info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+	info->dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+	info->dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+	info->dma_info_status = ctx->csa.spu_chnldata_RW[24];
+	info->dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+	info->dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
 	for (i = 0; i < 16; i++) {
-		qp = &info.dma_info_command_data[i];
-		spuqp = &ctx->csa.priv2.spuq[i];
+		struct mfc_cq_sr *qp = &info->dma_info_command_data[i];
+		struct mfc_cq_sr *spuqp = &ctx->csa.priv2.spuq[i];
 
 		qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
 		qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
 		qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
 		qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
 	}
+}
 
-	return simple_read_from_buffer(buf, len, pos, &info,
-				sizeof info);
+static ssize_t spufs_dma_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	struct spu_dma_info info;
+
+	spufs_get_dma_info(ctx, &info);
+	return spufs_dump_emit(cprm, &info, sizeof(info));
 }
 
 static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
 			      size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
+	struct spu_dma_info info;
 	int ret;
 
-	if (!access_ok(buf, len))
-		return -EFAULT;
-
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
 	spin_lock(&ctx->csa.register_lock);
-	ret = __spufs_dma_info_read(ctx, buf, len, pos);
+	spufs_get_dma_info(ctx, &info);
 	spin_unlock(&ctx->csa.register_lock);
 	spu_release_saved(ctx);
 
-	return ret;
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof(info));
 }
 
 static const struct file_operations spufs_dma_info_fops = {
@@ -2136,52 +2117,55 @@ static const struct file_operations spufs_dma_info_fops = {
 	.llseek = no_llseek,
 };
 
-static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
-			char __user *buf, size_t len, loff_t *pos)
+static void spufs_get_proxydma_info(struct spu_context *ctx,
+		struct spu_proxydma_info *info)
 {
-	struct spu_proxydma_info info;
-	struct mfc_cq_sr *qp, *puqp;
-	int ret = sizeof info;
 	int i;
 
-	if (len < ret)
-		return -EINVAL;
-
-	if (!access_ok(buf, len))
-		return -EFAULT;
+	info->proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+	info->proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+	info->proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
 
-	info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
-	info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
-	info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
 	for (i = 0; i < 8; i++) {
-		qp = &info.proxydma_info_command_data[i];
-		puqp = &ctx->csa.priv2.puq[i];
+		struct mfc_cq_sr *qp = &info->proxydma_info_command_data[i];
+		struct mfc_cq_sr *puqp = &ctx->csa.priv2.puq[i];
 
 		qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
 		qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
 		qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
 		qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
 	}
+}
 
-	return simple_read_from_buffer(buf, len, pos, &info,
-				sizeof info);
+static ssize_t spufs_proxydma_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	struct spu_proxydma_info info;
+
+	spufs_get_proxydma_info(ctx, &info);
+	return spufs_dump_emit(cprm, &info, sizeof(info));
 }
 
 static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
 				   size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
+	struct spu_proxydma_info info;
 	int ret;
 
+	if (len < sizeof(info))
+		return -EINVAL;
+
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
 	spin_lock(&ctx->csa.register_lock);
-	ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
+	spufs_get_proxydma_info(ctx, &info);
 	spin_unlock(&ctx->csa.register_lock);
 	spu_release_saved(ctx);
 
-	return ret;
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof(info));
 }
 
 static const struct file_operations spufs_proxydma_info_fops = {
@@ -2625,23 +2609,23 @@ const struct spufs_tree_descr spufs_dir_debug_contents[] = {
 };
 
 const struct spufs_coredump_reader spufs_coredump_read[] = {
-	{ "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])},
-	{ "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) },
+	{ "regs", spufs_regs_dump, NULL, sizeof(struct spu_reg128[128])},
+	{ "fpcr", spufs_fpcr_dump, NULL, sizeof(struct spu_reg128) },
 	{ "lslr", NULL, spufs_lslr_get, 19 },
 	{ "decr", NULL, spufs_decr_get, 19 },
 	{ "decr_status", NULL, spufs_decr_status_get, 19 },
-	{ "mem", __spufs_mem_read, NULL, LS_SIZE, },
-	{ "signal1", __spufs_signal1_read, NULL, sizeof(u32) },
+	{ "mem", spufs_mem_dump, NULL, LS_SIZE, },
+	{ "signal1", spufs_signal1_dump, NULL, sizeof(u32) },
 	{ "signal1_type", NULL, spufs_signal1_type_get, 19 },
-	{ "signal2", __spufs_signal2_read, NULL, sizeof(u32) },
+	{ "signal2", spufs_signal2_dump, NULL, sizeof(u32) },
 	{ "signal2_type", NULL, spufs_signal2_type_get, 19 },
 	{ "event_mask", NULL, spufs_event_mask_get, 19 },
 	{ "event_status", NULL, spufs_event_status_get, 19 },
-	{ "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) },
-	{ "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) },
-	{ "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)},
-	{ "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)},
-	{ "proxydma_info", __spufs_proxydma_info_read,
+	{ "mbox_info", spufs_mbox_info_dump, NULL, sizeof(u32) },
+	{ "ibox_info", spufs_ibox_info_dump, NULL, sizeof(u32) },
+	{ "wbox_info", spufs_wbox_info_dump, NULL, 4 * sizeof(u32)},
+	{ "dma_info", spufs_dma_info_dump, NULL, sizeof(struct spu_dma_info)},
+	{ "proxydma_info", spufs_proxydma_info_dump,
 			   NULL, sizeof(struct spu_proxydma_info)},
 	{ "object-id", NULL, spufs_object_id_get, 19 },
 	{ "npc", NULL, spufs_npc_get, 19 },
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 413c89afe112..1ba4d884febf 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -337,8 +337,7 @@ void spufs_dma_callback(struct spu *spu, int type);
 extern struct spu_coredump_calls spufs_coredump_calls;
 struct spufs_coredump_reader {
 	char *name;
-	ssize_t (*read)(struct spu_context *ctx,
-			char __user *buffer, size_t size, loff_t *pos);
+	ssize_t (*dump)(struct spu_context *ctx, struct coredump_params *cprm);
 	u64 (*get)(struct spu_context *ctx);
 	size_t size;
 };
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 13e251699346..b2ba3e95bda7 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -167,7 +167,6 @@ static void pnv_smp_cpu_kill_self(void)
 	/* Standard hot unplug procedure */
 
 	idle_task_exit();
-	current->active_mm = NULL; /* for sanity */
 	cpu = smp_processor_id();
 	DBG("CPU%d offline\n", cpu);
 	generic_set_cpu_dead(cpu);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 35b60035b6b0..d50706ea1c94 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -473,9 +473,9 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 #define PAGE_SHARED		__pgprot(0)
 #define PAGE_KERNEL		__pgprot(0)
 #define swapper_pg_dir		NULL
+#define TASK_SIZE		0xffffffffUL
 #define VMALLOC_START		0
-
-#define TASK_SIZE 0xffffffffUL
+#define VMALLOC_END		TASK_SIZE
 
 static inline void __kernel_map_pages(struct page *page, int numpages, int enable) {}
 
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 7eab76a93106..070505d79b06 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -204,7 +204,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 }
 
 static void note_page(struct ptdump_state *pt_st, unsigned long addr,
-		      int level, unsigned long val)
+		      int level, u64 val)
 {
 	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
 	u64 pa = PFN_PHYS(pte_pfn(__pte(val)));
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 7c15542d3685..698b1e6d3c14 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -27,7 +27,7 @@
 
 #include "sha.h"
 
-static int sha1_init(struct shash_desc *desc)
+static int s390_sha1_init(struct shash_desc *desc)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 
@@ -42,7 +42,7 @@ static int sha1_init(struct shash_desc *desc)
 	return 0;
 }
 
-static int sha1_export(struct shash_desc *desc, void *out)
+static int s390_sha1_export(struct shash_desc *desc, void *out)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	struct sha1_state *octx = out;
@@ -53,7 +53,7 @@ static int sha1_export(struct shash_desc *desc, void *out)
 	return 0;
 }
 
-static int sha1_import(struct shash_desc *desc, const void *in)
+static int s390_sha1_import(struct shash_desc *desc, const void *in)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	const struct sha1_state *ictx = in;
@@ -67,11 +67,11 @@ static int sha1_import(struct shash_desc *desc, const void *in)
 
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
-	.init		=	sha1_init,
+	.init		=	s390_sha1_init,
 	.update		=	s390_sha_update,
 	.final		=	s390_sha_final,
-	.export		=	sha1_export,
-	.import		=	sha1_import,
+	.export		=	s390_sha1_export,
+	.import		=	s390_sha1_import,
 	.descsize	=	sizeof(struct s390_sha_ctx),
 	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 91e376b0d28c..6d01c96aeb5c 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -39,25 +39,6 @@ csum_partial(const void *buff, int len, __wsum sum)
 	return sum;
 }
 
-/*
- * the same as csum_partial_copy, but copies from user space.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- *
- * Copy from userspace and compute checksum.
- */
-static inline __wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
-                                          int len, __wsum sum,
-                                          int *err_ptr)
-{
-	if (unlikely(copy_from_user(dst, src, len)))
-		*err_ptr = -EFAULT;
-	return csum_partial(dst, len, sum);
-}
-
-
 static inline __wsum
 csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
 {
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d6bcd34f3ec3..3d554887794e 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -375,6 +375,8 @@ struct kvm_vcpu_stat {
 	u64 halt_poll_invalid;
 	u64 halt_no_poll_steal;
 	u64 halt_wakeup;
+	u64 halt_poll_success_ns;
+	u64 halt_poll_fail_ns;
 	u64 instruction_lctl;
 	u64 instruction_lctlg;
 	u64 instruction_stctl;
@@ -971,7 +973,7 @@ struct kvm_arch_async_pf {
 	unsigned long pfault_token;
 };
 
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
 
 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
 			       struct kvm_async_pf *work);
@@ -982,6 +984,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 				 struct kvm_async_pf *work);
 
+static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {}
+
 void kvm_arch_crypto_clear_masks(struct kvm *kvm);
 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
 			       unsigned long *aqm, unsigned long *adm);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 36445dd40fdb..0f0b140b5558 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -305,12 +305,9 @@ void *restart_stack __section(.data);
 unsigned long stack_alloc(void)
 {
 #ifdef CONFIG_VMAP_STACK
-	return (unsigned long)
-		__vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
-				     VMALLOC_START, VMALLOC_END,
-				     THREADINFO_GFP,
-				     PAGE_KERNEL, 0, NUMA_NO_NODE,
-				     __builtin_return_address(0));
+	return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
+			THREADINFO_GFP, NUMA_NO_NODE,
+			__builtin_return_address(0));
 #else
 	return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 #endif
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bd7bd3581a0f..bfdcb7633957 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -440,3 +440,4 @@
 435  common	clone3			sys_clone3			sys_clone3
 437  common	openat2			sys_openat2			sys_openat2
 438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
+439  common	faccessat2		sys_faccessat2			sys_faccessat2
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index bfb481134994..a4d4ca2769bd 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3082,7 +3082,7 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer)
 		__airqs_kick_single_vcpu(kvm, pending_mask);
 		hrtimer_forward_now(timer, ns_to_ktime(gi->expires));
 		return HRTIMER_RESTART;
-	};
+	}
 
 	return HRTIMER_NORESTART;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d05bb040fd42..06bde4bad205 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -57,110 +57,109 @@
 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
 
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ "userspace_handled", VCPU_STAT(exit_userspace) },
-	{ "exit_null", VCPU_STAT(exit_null) },
-	{ "exit_validity", VCPU_STAT(exit_validity) },
-	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
-	{ "exit_external_request", VCPU_STAT(exit_external_request) },
-	{ "exit_io_request", VCPU_STAT(exit_io_request) },
-	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
-	{ "exit_instruction", VCPU_STAT(exit_instruction) },
-	{ "exit_pei", VCPU_STAT(exit_pei) },
-	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
-	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
-	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
-	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
-	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
-	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
-	{ "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
-	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
-	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
-	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
-	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
-	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
-	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
-	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
-	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
-	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
-	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
-	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
-	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
-	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
-	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
-	{ "deliver_program", VCPU_STAT(deliver_program) },
-	{ "deliver_io", VCPU_STAT(deliver_io) },
-	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
-	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
-	{ "inject_ckc", VCPU_STAT(inject_ckc) },
-	{ "inject_cputm", VCPU_STAT(inject_cputm) },
-	{ "inject_external_call", VCPU_STAT(inject_external_call) },
-	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
-	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
-	{ "inject_io", VM_STAT(inject_io) },
-	{ "inject_mchk", VCPU_STAT(inject_mchk) },
-	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
-	{ "inject_program", VCPU_STAT(inject_program) },
-	{ "inject_restart", VCPU_STAT(inject_restart) },
-	{ "inject_service_signal", VM_STAT(inject_service_signal) },
-	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
-	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
-	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
-	{ "inject_virtio", VM_STAT(inject_virtio) },
-	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
-	{ "instruction_gs", VCPU_STAT(instruction_gs) },
-	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
-	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
-	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
-	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
-	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
-	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
-	{ "instruction_sck", VCPU_STAT(instruction_sck) },
-	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
-	{ "instruction_spx", VCPU_STAT(instruction_spx) },
-	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
-	{ "instruction_stap", VCPU_STAT(instruction_stap) },
-	{ "instruction_iske", VCPU_STAT(instruction_iske) },
-	{ "instruction_ri", VCPU_STAT(instruction_ri) },
-	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
-	{ "instruction_sske", VCPU_STAT(instruction_sske) },
-	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
-	{ "instruction_essa", VCPU_STAT(instruction_essa) },
-	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
-	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
-	{ "instruction_tb", VCPU_STAT(instruction_tb) },
-	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
-	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
-	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
-	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
-	{ "instruction_sie", VCPU_STAT(instruction_sie) },
-	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
-	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
-	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
-	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
-	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
-	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
-	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
-	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
-	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
-	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
-	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
-	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
-	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
-	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
-	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
-	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
-	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
-	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
-	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
-	{ "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
-	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
-	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
-	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
-	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
+	VCPU_STAT("userspace_handled", exit_userspace),
+	VCPU_STAT("exit_null", exit_null),
+	VCPU_STAT("exit_validity", exit_validity),
+	VCPU_STAT("exit_stop_request", exit_stop_request),
+	VCPU_STAT("exit_external_request", exit_external_request),
+	VCPU_STAT("exit_io_request", exit_io_request),
+	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
+	VCPU_STAT("exit_instruction", exit_instruction),
+	VCPU_STAT("exit_pei", exit_pei),
+	VCPU_STAT("exit_program_interruption", exit_program_interruption),
+	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
+	VCPU_STAT("exit_operation_exception", exit_operation_exception),
+	VCPU_STAT("halt_successful_poll", halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
+	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+	VCPU_STAT("instruction_lctlg", instruction_lctlg),
+	VCPU_STAT("instruction_lctl", instruction_lctl),
+	VCPU_STAT("instruction_stctl", instruction_stctl),
+	VCPU_STAT("instruction_stctg", instruction_stctg),
+	VCPU_STAT("deliver_ckc", deliver_ckc),
+	VCPU_STAT("deliver_cputm", deliver_cputm),
+	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
+	VCPU_STAT("deliver_external_call", deliver_external_call),
+	VCPU_STAT("deliver_service_signal", deliver_service_signal),
+	VCPU_STAT("deliver_virtio", deliver_virtio),
+	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
+	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
+	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
+	VCPU_STAT("deliver_program", deliver_program),
+	VCPU_STAT("deliver_io", deliver_io),
+	VCPU_STAT("deliver_machine_check", deliver_machine_check),
+	VCPU_STAT("exit_wait_state", exit_wait_state),
+	VCPU_STAT("inject_ckc", inject_ckc),
+	VCPU_STAT("inject_cputm", inject_cputm),
+	VCPU_STAT("inject_external_call", inject_external_call),
+	VM_STAT("inject_float_mchk", inject_float_mchk),
+	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
+	VM_STAT("inject_io", inject_io),
+	VCPU_STAT("inject_mchk", inject_mchk),
+	VM_STAT("inject_pfault_done", inject_pfault_done),
+	VCPU_STAT("inject_program", inject_program),
+	VCPU_STAT("inject_restart", inject_restart),
+	VM_STAT("inject_service_signal", inject_service_signal),
+	VCPU_STAT("inject_set_prefix", inject_set_prefix),
+	VCPU_STAT("inject_stop_signal", inject_stop_signal),
+	VCPU_STAT("inject_pfault_init", inject_pfault_init),
+	VM_STAT("inject_virtio", inject_virtio),
+	VCPU_STAT("instruction_epsw", instruction_epsw),
+	VCPU_STAT("instruction_gs", instruction_gs),
+	VCPU_STAT("instruction_io_other", instruction_io_other),
+	VCPU_STAT("instruction_lpsw", instruction_lpsw),
+	VCPU_STAT("instruction_lpswe", instruction_lpswe),
+	VCPU_STAT("instruction_pfmf", instruction_pfmf),
+	VCPU_STAT("instruction_ptff", instruction_ptff),
+	VCPU_STAT("instruction_stidp", instruction_stidp),
+	VCPU_STAT("instruction_sck", instruction_sck),
+	VCPU_STAT("instruction_sckpf", instruction_sckpf),
+	VCPU_STAT("instruction_spx", instruction_spx),
+	VCPU_STAT("instruction_stpx", instruction_stpx),
+	VCPU_STAT("instruction_stap", instruction_stap),
+	VCPU_STAT("instruction_iske", instruction_iske),
+	VCPU_STAT("instruction_ri", instruction_ri),
+	VCPU_STAT("instruction_rrbe", instruction_rrbe),
+	VCPU_STAT("instruction_sske", instruction_sske),
+	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
+	VCPU_STAT("instruction_essa", instruction_essa),
+	VCPU_STAT("instruction_stsi", instruction_stsi),
+	VCPU_STAT("instruction_stfl", instruction_stfl),
+	VCPU_STAT("instruction_tb", instruction_tb),
+	VCPU_STAT("instruction_tpi", instruction_tpi),
+	VCPU_STAT("instruction_tprot", instruction_tprot),
+	VCPU_STAT("instruction_tsch", instruction_tsch),
+	VCPU_STAT("instruction_sthyi", instruction_sthyi),
+	VCPU_STAT("instruction_sie", instruction_sie),
+	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
+	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
+	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
+	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
+	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
+	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
+	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
+	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
+	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
+	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
+	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
+	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
+	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
+	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
+	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
+	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
+	VCPU_STAT("instruction_diag_10", diagnose_10),
+	VCPU_STAT("instruction_diag_44", diagnose_44),
+	VCPU_STAT("instruction_diag_9c", diagnose_9c),
+	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
+	VCPU_STAT("instruction_diag_258", diagnose_258),
+	VCPU_STAT("instruction_diag_308", diagnose_308),
+	VCPU_STAT("instruction_diag_500", diagnose_500),
+	VCPU_STAT("instruction_diag_other", diagnose_other),
 	{ NULL }
 };
 
@@ -3944,7 +3943,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
 	/* s390 will always inject the page directly */
 }
 
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
 {
 	/*
 	 * s390 will always inject the page directly,
@@ -4337,8 +4336,9 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		store_regs_fmt2(vcpu, kvm_run);
 }
 
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
+	struct kvm_run *kvm_run = vcpu->run;
 	int rc;
 
 	if (kvm_run->immediate_exit)
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 4f6c22d72072..ef05b4e167fb 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1000,8 +1000,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
 	handle_last_fault(vcpu, vsie_page);
 
-	if (need_resched())
-		schedule();
 	if (test_cpu_flag(CIF_MCCK_PENDING))
 		s390_handle_mcck();
 
@@ -1185,6 +1183,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		    kvm_s390_vcpu_has_irq(vcpu, 0) ||
 		    kvm_s390_vcpu_sie_inhibited(vcpu))
 			break;
+		cond_resched();
 	}
 
 	if (rc == -EFAULT) {
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 1a95d8809cc3..4b6903fbba4a 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -788,19 +788,19 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 					     unsigned long gaddr, int level)
 {
 	const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
-	unsigned long *table;
+	unsigned long *table = gmap->table;
 
-	if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4))
-		return NULL;
 	if (gmap_is_shadow(gmap) && gmap->removed)
 		return NULL;
 
+	if (WARN_ON_ONCE(level > (asce_type >> 2) + 1))
+		return NULL;
+
 	if (asce_type != _ASCE_TYPE_REGION1 &&
 	    gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
 		return NULL;
 
-	table = gmap->table;
-	switch (gmap->asce & _ASCE_TYPE_MASK) {
+	switch (asce_type) {
 	case _ASCE_TYPE_REGION1:
 		table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
 		if (level == 4)
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b4f0e37b83eb..97656d20b9ea 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -71,7 +71,6 @@ config SUPERH32
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
-	select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_ARCH_KGDB
diff --git a/arch/sh/include/asm/checksum_32.h b/arch/sh/include/asm/checksum_32.h
index 36b84cfd3f67..91571a42e44e 100644
--- a/arch/sh/include/asm/checksum_32.h
+++ b/arch/sh/include/asm/checksum_32.h
@@ -48,12 +48,17 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst,
 	return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
 }
 
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 static inline
-__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+__wsum csum_and_copy_from_user(const void __user *src, void *dst,
 				   int len, __wsum sum, int *err_ptr)
 {
-	return csum_partial_copy_generic((__force const void *)src, dst,
+	if (access_ok(src, len))
+		return csum_partial_copy_generic((__force const void *)src, dst,
 					len, sum, err_ptr, NULL);
+	if (len)
+		*err_ptr = -EFAULT;
+	return sum;
 }
 
 /*
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index 934ff84844fa..d432164b23b7 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -103,7 +103,8 @@ static int __sq_remap(struct sq_mapping *map, pgprot_t prot)
 #if defined(CONFIG_MMU)
 	struct vm_struct *vma;
 
-	vma = __get_vm_area(map->size, VM_ALLOC, map->sq_addr, SQ_ADDRMAX);
+	vma = __get_vm_area_caller(map->size, VM_ALLOC, map->sq_addr,
+			SQ_ADDRMAX, __builtin_return_address(0));
 	if (!vma)
 		return -ENOMEM;
 
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index c7a30fcd135f..acc35daa1b79 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -440,3 +440,4 @@
 # 435 reserved for clone3
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 63cf17bc760d..2130381c9d57 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -170,11 +170,21 @@ BUILD_TRAP_HANDLER(bug)
 	force_sig(SIGTRAP);
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void arch_ftrace_nmi_enter(void);
+extern void arch_ftrace_nmi_exit(void);
+#else
+static inline void arch_ftrace_nmi_enter(void) { }
+static inline void arch_ftrace_nmi_exit(void) { }
+#endif
+
 BUILD_TRAP_HANDLER(nmi)
 {
 	unsigned int cpu = smp_processor_id();
 	TRAP_HANDLER_DECL;
 
+	arch_ftrace_nmi_enter();
+
 	nmi_enter();
 	nmi_count(cpu)++;
 
@@ -190,4 +200,6 @@ BUILD_TRAP_HANDLER(nmi)
 	}
 
 	nmi_exit();
+
+	arch_ftrace_nmi_exit();
 }
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
index 14f6c15be6ae..111283fe837e 100644
--- a/arch/sparc/crypto/md5_glue.c
+++ b/arch/sparc/crypto/md5_glue.c
@@ -18,7 +18,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/md5.h>
 
diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c
index 7c1666304441..dc017782be52 100644
--- a/arch/sparc/crypto/sha1_glue.c
+++ b/arch/sparc/crypto/sha1_glue.c
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c
index f403ce9ba6e4..286bc8ecf15b 100644
--- a/arch/sparc/crypto/sha256_glue.c
+++ b/arch/sparc/crypto/sha256_glue.c
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 
diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c
index a3b532e43c07..3b2ca732ff7a 100644
--- a/arch/sparc/crypto/sha512_glue.c
+++ b/arch/sparc/crypto/sha512_glue.c
@@ -14,7 +14,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 
diff --git a/arch/sparc/include/asm/checksum.h b/arch/sparc/include/asm/checksum.h
index c3be56e2e768..a6256cb6fc5c 100644
--- a/arch/sparc/include/asm/checksum.h
+++ b/arch/sparc/include/asm/checksum.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef ___ASM_SPARC_CHECKSUM_H
 #define ___ASM_SPARC_CHECKSUM_H
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 #if defined(__sparc__) && defined(__arch64__)
 #include <asm/checksum_64.h>
 #else
diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h
index 5fc98d80b03b..479a0b812af5 100644
--- a/arch/sparc/include/asm/checksum_32.h
+++ b/arch/sparc/include/asm/checksum_32.h
@@ -60,7 +60,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
 }
 
 static inline __wsum
-csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+csum_and_copy_from_user(const void __user *src, void *dst, int len,
 			    __wsum sum, int *err)
   {
 	register unsigned long ret asm("o0") = (unsigned long)src;
@@ -68,6 +68,12 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len,
 	register int l asm("g1") = len;
 	register __wsum s asm("g7") = sum;
 
+	if (unlikely(!access_ok(src, len))) {
+		if (len)
+			*err = -EFAULT;
+		return sum;
+	}
+
 	__asm__ __volatile__ (
 	".section __ex_table,#alloc\n\t"
 	".align 4\n\t"
@@ -83,8 +89,10 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len,
 	return (__force __wsum)ret;
 }
 
+#define HAVE_CSUM_COPY_USER
+
 static inline __wsum
-csum_partial_copy_to_user(const void *src, void __user *dst, int len,
+csum_and_copy_to_user(const void *src, void __user *dst, int len,
 			  __wsum sum, int *err)
 {
 	if (!access_ok(dst, len)) {
@@ -113,9 +121,6 @@ csum_partial_copy_to_user(const void *src, void __user *dst, int len,
 	}
 }
 
-#define HAVE_CSUM_COPY_USER
-#define csum_and_copy_to_user csum_partial_copy_to_user
-
 /* ihl is always 5 or greater, almost always is 5, and iph is word aligned
  * the majority of the time.
  */
diff --git a/arch/sparc/include/asm/checksum_64.h b/arch/sparc/include/asm/checksum_64.h
index e52450930e4e..0fa4433f5662 100644
--- a/arch/sparc/include/asm/checksum_64.h
+++ b/arch/sparc/include/asm/checksum_64.h
@@ -46,7 +46,7 @@ long __csum_partial_copy_from_user(const void __user *src,
 				   __wsum sum);
 
 static inline __wsum
-csum_partial_copy_from_user(const void __user *src,
+csum_and_copy_from_user(const void __user *src,
 			    void *dst, int len,
 			    __wsum sum, int *err)
 {
diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h
index b519acf4383d..946dbcbf3a83 100644
--- a/arch/sparc/include/asm/floppy_32.h
+++ b/arch/sparc/include/asm/floppy_32.h
@@ -59,8 +59,8 @@ struct sun_floppy_ops {
 
 static struct sun_floppy_ops sun_fdops;
 
-#define fd_inb(port)              sun_fdops.fd_inb(port)
-#define fd_outb(value,port)       sun_fdops.fd_outb(value,port)
+#define fd_inb(base, reg)         sun_fdops.fd_inb(reg)
+#define fd_outb(value, base, reg) sun_fdops.fd_outb(value, reg)
 #define fd_enable_dma()           sun_fd_enable_dma()
 #define fd_disable_dma()          sun_fd_disable_dma()
 #define fd_request_dma()          (0) /* nothing... */
@@ -114,15 +114,15 @@ static unsigned char sun_read_dir(void)
 static unsigned char sun_82072_fd_inb(int port)
 {
 	udelay(5);
-	switch(port & 7) {
+	switch (port) {
 	default:
 		printk("floppy: Asked to read unknown port %d\n", port);
 		panic("floppy: Port bolixed.");
-	case 4: /* FD_STATUS */
+	case FD_STATUS:
 		return sun_fdc->status_82072 & ~STATUS_DMA;
-	case 5: /* FD_DATA */
+	case FD_DATA:
 		return sun_fdc->data_82072;
-	case 7: /* FD_DIR */
+	case FD_DIR:
 		return sun_read_dir();
 	}
 	panic("sun_82072_fd_inb: How did I get here?");
@@ -131,20 +131,20 @@ static unsigned char sun_82072_fd_inb(int port)
 static void sun_82072_fd_outb(unsigned char value, int port)
 {
 	udelay(5);
-	switch(port & 7) {
+	switch (port) {
 	default:
 		printk("floppy: Asked to write to unknown port %d\n", port);
 		panic("floppy: Port bolixed.");
-	case 2: /* FD_DOR */
+	case FD_DOR:
 		sun_set_dor(value, 0);
 		break;
-	case 5: /* FD_DATA */
+	case FD_DATA:
 		sun_fdc->data_82072 = value;
 		break;
-	case 7: /* FD_DCR */
+	case FD_DCR:
 		sun_fdc->dcr_82072 = value;
 		break;
-	case 4: /* FD_STATUS */
+	case FD_DSR:
 		sun_fdc->status_82072 = value;
 		break;
 	}
@@ -154,23 +154,23 @@ static void sun_82072_fd_outb(unsigned char value, int port)
 static unsigned char sun_82077_fd_inb(int port)
 {
 	udelay(5);
-	switch(port & 7) {
+	switch (port) {
 	default:
 		printk("floppy: Asked to read unknown port %d\n", port);
 		panic("floppy: Port bolixed.");
-	case 0: /* FD_STATUS_0 */
+	case FD_SRA:
 		return sun_fdc->status1_82077;
-	case 1: /* FD_STATUS_1 */
+	case FD_SRB:
 		return sun_fdc->status2_82077;
-	case 2: /* FD_DOR */
+	case FD_DOR:
 		return sun_fdc->dor_82077;
-	case 3: /* FD_TDR */
+	case FD_TDR:
 		return sun_fdc->tapectl_82077;
-	case 4: /* FD_STATUS */
+	case FD_STATUS:
 		return sun_fdc->status_82077 & ~STATUS_DMA;
-	case 5: /* FD_DATA */
+	case FD_DATA:
 		return sun_fdc->data_82077;
-	case 7: /* FD_DIR */
+	case FD_DIR:
 		return sun_read_dir();
 	}
 	panic("sun_82077_fd_inb: How did I get here?");
@@ -179,23 +179,23 @@ static unsigned char sun_82077_fd_inb(int port)
 static void sun_82077_fd_outb(unsigned char value, int port)
 {
 	udelay(5);
-	switch(port & 7) {
+	switch (port) {
 	default:
 		printk("floppy: Asked to write to unknown port %d\n", port);
 		panic("floppy: Port bolixed.");
-	case 2: /* FD_DOR */
+	case FD_DOR:
 		sun_set_dor(value, 1);
 		break;
-	case 5: /* FD_DATA */
+	case FD_DATA:
 		sun_fdc->data_82077 = value;
 		break;
-	case 7: /* FD_DCR */
+	case FD_DCR:
 		sun_fdc->dcr_82077 = value;
 		break;
-	case 4: /* FD_STATUS */
+	case FD_DSR:
 		sun_fdc->status_82077 = value;
 		break;
-	case 3: /* FD_TDR */
+	case FD_TDR:
 		sun_fdc->tapectl_82077 = value;
 		break;
 	}
diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h
index 3729fc35ba83..070c8c1f5c8f 100644
--- a/arch/sparc/include/asm/floppy_64.h
+++ b/arch/sparc/include/asm/floppy_64.h
@@ -47,8 +47,9 @@ unsigned long fdc_status;
 static struct platform_device *floppy_op = NULL;
 
 struct sun_floppy_ops {
-	unsigned char	(*fd_inb) (unsigned long port);
-	void		(*fd_outb) (unsigned char value, unsigned long port);
+	unsigned char	(*fd_inb) (unsigned long port, unsigned int reg);
+	void		(*fd_outb) (unsigned char value, unsigned long base,
+				    unsigned int reg);
 	void		(*fd_enable_dma) (void);
 	void		(*fd_disable_dma) (void);
 	void		(*fd_set_dma_mode) (int);
@@ -62,8 +63,8 @@ struct sun_floppy_ops {
 
 static struct sun_floppy_ops sun_fdops;
 
-#define fd_inb(port)              sun_fdops.fd_inb(port)
-#define fd_outb(value,port)       sun_fdops.fd_outb(value,port)
+#define fd_inb(base, reg)         sun_fdops.fd_inb(base, reg)
+#define fd_outb(value, base, reg) sun_fdops.fd_outb(value, base, reg)
 #define fd_enable_dma()           sun_fdops.fd_enable_dma()
 #define fd_disable_dma()          sun_fdops.fd_disable_dma()
 #define fd_request_dma()          (0) /* nothing... */
@@ -97,42 +98,43 @@ static int sun_floppy_types[2] = { 0, 0 };
 /* No 64k boundary crossing problems on the Sparc. */
 #define CROSS_64KB(a,s) (0)
 
-static unsigned char sun_82077_fd_inb(unsigned long port)
+static unsigned char sun_82077_fd_inb(unsigned long base, unsigned int reg)
 {
 	udelay(5);
-	switch(port & 7) {
+	switch (reg) {
 	default:
-		printk("floppy: Asked to read unknown port %lx\n", port);
+		printk("floppy: Asked to read unknown port %x\n", reg);
 		panic("floppy: Port bolixed.");
-	case 4: /* FD_STATUS */
+	case FD_STATUS:
 		return sbus_readb(&sun_fdc->status_82077) & ~STATUS_DMA;
-	case 5: /* FD_DATA */
+	case FD_DATA:
 		return sbus_readb(&sun_fdc->data_82077);
-	case 7: /* FD_DIR */
+	case FD_DIR:
 		/* XXX: Is DCL on 0x80 in sun4m? */
 		return sbus_readb(&sun_fdc->dir_82077);
 	}
 	panic("sun_82072_fd_inb: How did I get here?");
 }
 
-static void sun_82077_fd_outb(unsigned char value, unsigned long port)
+static void sun_82077_fd_outb(unsigned char value, unsigned long base,
+			      unsigned int reg)
 {
 	udelay(5);
-	switch(port & 7) {
+	switch (reg) {
 	default:
-		printk("floppy: Asked to write to unknown port %lx\n", port);
+		printk("floppy: Asked to write to unknown port %x\n", reg);
 		panic("floppy: Port bolixed.");
-	case 2: /* FD_DOR */
+	case FD_DOR:
 		/* Happily, the 82077 has a real DOR register. */
 		sbus_writeb(value, &sun_fdc->dor_82077);
 		break;
-	case 5: /* FD_DATA */
+	case FD_DATA:
 		sbus_writeb(value, &sun_fdc->data_82077);
 		break;
-	case 7: /* FD_DCR */
+	case FD_DCR:
 		sbus_writeb(value, &sun_fdc->dcr_82077);
 		break;
-	case 4: /* FD_STATUS */
+	case FD_DSR:
 		sbus_writeb(value, &sun_fdc->status_82077);
 		break;
 	}
@@ -298,19 +300,21 @@ static struct sun_pci_dma_op sun_pci_dma_pending = { -1U, 0, 0, NULL};
 
 irqreturn_t floppy_interrupt(int irq, void *dev_id);
 
-static unsigned char sun_pci_fd_inb(unsigned long port)
+static unsigned char sun_pci_fd_inb(unsigned long base, unsigned int reg)
 {
 	udelay(5);
-	return inb(port);
+	return inb(base + reg);
 }
 
-static void sun_pci_fd_outb(unsigned char val, unsigned long port)
+static void sun_pci_fd_outb(unsigned char val, unsigned long base,
+			    unsigned int reg)
 {
 	udelay(5);
-	outb(val, port);
+	outb(val, base + reg);
 }
 
-static void sun_pci_fd_broken_outb(unsigned char val, unsigned long port)
+static void sun_pci_fd_broken_outb(unsigned char val, unsigned long base,
+				   unsigned int reg)
 {
 	udelay(5);
 	/*
@@ -320,16 +324,17 @@ static void sun_pci_fd_broken_outb(unsigned char val, unsigned long port)
 	 *      this does not hurt correct hardware like the AXmp.
 	 *      (Eddie, Sep 12 1998).
 	 */
-	if (port == ((unsigned long)sun_fdc) + 2) {
+	if (reg == FD_DOR) {
 		if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x20)) {
 			val |= 0x10;
 		}
 	}
-	outb(val, port);
+	outb(val, base + reg);
 }
 
 #ifdef PCI_FDC_SWAP_DRIVES
-static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long port)
+static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long base,
+				       unsigned int reg)
 {
 	udelay(5);
 	/*
@@ -339,13 +344,13 @@ static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long port)
 	 *      this does not hurt correct hardware like the AXmp.
 	 *      (Eddie, Sep 12 1998).
 	 */
-	if (port == ((unsigned long)sun_fdc) + 2) {
+	if (reg == FD_DOR) {
 		if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x10)) {
 			val &= ~(0x03);
 			val |= 0x21;
 		}
 	}
-	outb(val, port);
+	outb(val, base + reg);
 }
 #endif /* PCI_FDC_SWAP_DRIVES */
 
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index f13615ecdecc..8004a276cb74 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -483,3 +483,4 @@
 # 435 reserved for clone3
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
diff --git a/arch/unicore32/kernel/ksyms.c b/arch/unicore32/kernel/ksyms.c
index f4b84872d640..731445008932 100644
--- a/arch/unicore32/kernel/ksyms.c
+++ b/arch/unicore32/kernel/ksyms.c
@@ -9,7 +9,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/string.h>
-#include <linux/cryptohash.h>
 #include <linux/delay.h>
 #include <linux/in6.h>
 #include <linux/syscalls.h>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2d3f963fd6f1..70c668d976cc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -60,6 +60,7 @@ config X86
 	select ARCH_HAS_ACPI_TABLE_UPGRADE	if ACPI
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
+	select ARCH_HAS_EARLY_DEBUG		if KGDB
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_FILTER_PGPROT
@@ -92,6 +93,7 @@ config X86
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_USE_SYM_ANNOTATIONS
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	select ARCH_WANT_DEFAULT_BPF_JIT	if X86_64
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
@@ -596,7 +598,7 @@ config X86_INTEL_MID
 	select I2C
 	select DW_APB_TIMER
 	select APB_TIMER
-	select INTEL_SCU_IPC
+	select INTEL_SCU_PCI
 	select MFD_INTEL_MSIC
 	---help---
 	  Select to build a kernel capable of supporting Intel MID (Mobile
@@ -1611,19 +1613,10 @@ config NODES_SHIFT
 	  Specify the maximum number of NUMA Nodes available on the target
 	  system.  Increases memory reserved to accommodate various tables.
 
-config ARCH_HAVE_MEMORY_PRESENT
-	def_bool y
-	depends on X86_32 && DISCONTIGMEM
-
 config ARCH_FLATMEM_ENABLE
 	def_bool y
 	depends on X86_32 && !NUMA
 
-config ARCH_DISCONTIGMEM_ENABLE
-	def_bool n
-	depends on NUMA && X86_32
-	depends on BROKEN
-
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD
@@ -1888,10 +1881,10 @@ config X86_UMIP
 	  results are dummy.
 
 config X86_INTEL_MEMORY_PROTECTION_KEYS
-	prompt "Intel Memory Protection Keys"
+	prompt "Memory Protection Keys"
 	def_bool y
 	# Note: only available in 64-bit mode
-	depends on CPU_SUP_INTEL && X86_64
+	depends on X86_64 && (CPU_SUP_INTEL || CPU_SUP_AMD)
 	select ARCH_USES_HIGH_VMA_FLAGS
 	select ARCH_HAS_PKEYS
 	---help---
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index 13de0db38d4e..26b8c08e2fc4 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -15,3 +15,7 @@ config AS_SHA256_NI
 	def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1)
 	help
 	  Supported by binutils >= 2.24 and LLVM integrated assembler
+config AS_TPAUSE
+	def_bool $(as-instr,tpause %ecx)
+	help
+	  Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2e74690b028a..f909d3ce36e6 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -99,15 +99,6 @@ config DEBUG_WX
 
 	  If in doubt, say "Y".
 
-config DOUBLEFAULT
-	default y
-	bool "Enable doublefault exception handler" if EXPERT && X86_32
-	---help---
-	  This option allows trapping of rare doublefault exceptions that
-	  would otherwise cause a system to silently reboot. Disabling this
-	  option saves about 4k and might cause you much additional grey
-	  hair.
-
 config DEBUG_TLBFLUSH
 	bool "Set upper limit of TLB entries to flush one-by-one"
 	depends on DEBUG_KERNEL
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b65ec63c7db7..00e378de8bc0 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -246,7 +246,7 @@ drivers-$(CONFIG_FB) += arch/x86/video/
 
 boot := arch/x86/boot
 
-BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage
+BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 isoimage
 
 PHONY += bzImage $(BOOT_TARGETS)
 
@@ -267,8 +267,8 @@ endif
 $(BOOT_TARGETS): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
-PHONY += install
-install:
+PHONY += install bzlilo
+install bzlilo:
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
 PHONY += vdso_install
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index e17be90ab312..4c5355684321 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -57,11 +57,10 @@ $(obj)/cpu.o: $(obj)/cpustr.h
 
 quiet_cmd_cpustr = CPUSTR  $@
       cmd_cpustr = $(obj)/mkcpustr > $@
-targets += cpustr.h
 $(obj)/cpustr.h: $(obj)/mkcpustr FORCE
 	$(call if_changed,cpustr)
 endif
-clean-files += cpustr.h
+targets += cpustr.h
 
 # ---------------------------------------------------------------------------
 
@@ -129,6 +128,8 @@ quiet_cmd_genimage = GENIMAGE $3
 cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
 			$(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD)
 
+PHONY += bzdisk fdimage fdimage144 fdimage288 isoimage bzlilo install
+
 # This requires write access to /dev/fd0
 bzdisk: $(obj)/bzImage $(obj)/mtools.conf
 	$(call cmd,genimage,bzdisk,/dev/fd0)
@@ -146,7 +147,7 @@ isoimage: $(obj)/bzImage
 	$(call cmd,genimage,isoimage,$(obj)/image.iso)
 	@$(kecho) 'Kernel: $(obj)/image.iso is ready'
 
-bzlilo: $(obj)/bzImage
+bzlilo:
 	if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
 	if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
 	cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index ef2ad7253cd5..8bcbcee54aa1 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -280,9 +280,9 @@ acpi_physical_address get_rsdp_addr(void)
  */
 #define MAX_ADDR_LEN 19
 
-static acpi_physical_address get_cmdline_acpi_rsdp(void)
+static unsigned long get_cmdline_acpi_rsdp(void)
 {
-	acpi_physical_address addr = 0;
+	unsigned long addr = 0;
 
 #ifdef CONFIG_KEXEC
 	char val[MAX_ADDR_LEN] = { };
@@ -292,7 +292,7 @@ static acpi_physical_address get_cmdline_acpi_rsdp(void)
 	if (ret < 0)
 		return 0;
 
-	if (kstrtoull(val, 16, &addr))
+	if (boot_kstrtoul(val, 16, &addr))
 		return 0;
 #endif
 	return addr;
@@ -314,7 +314,6 @@ static unsigned long get_acpi_srat_table(void)
 	 * different ideas about whether to trust a command-line parameter.
 	 */
 	rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
-
 	if (!rsdp)
 		rsdp = (struct acpi_table_rsdp *)(long)
 			boot_params->acpi_rsdp_addr;
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
index 2b2049259619..c4bb0f9363f5 100644
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -28,8 +28,6 @@ SYM_FUNC_START(__efi64_thunk)
 	push	%rbx
 
 	leaq	1f(%rip), %rbp
-	leaq	efi_gdt64(%rip), %rbx
-	movl	%ebx, 2(%rbx)		/* Fixup the gdt base address */
 
 	movl	%ds, %eax
 	push	%rax
@@ -48,7 +46,8 @@ SYM_FUNC_START(__efi64_thunk)
 	movl	%r8d, 0xc(%rsp)
 	movl	%r9d, 0x10(%rsp)
 
-	sgdt	0x14(%rsp)
+	leaq	0x14(%rsp), %rbx
+	sgdt	(%rbx)
 
 	/*
 	 * Switch to gdt with 32-bit segments. This is the firmware GDT
@@ -68,8 +67,7 @@ SYM_FUNC_START(__efi64_thunk)
 	pushq	%rax
 	lretq
 
-1:	lgdt	0x14(%rsp)
-	addq	$32, %rsp
+1:	addq	$32, %rsp
 	movq	%rdi, %rax
 
 	pop	%rbx
@@ -175,14 +173,3 @@ SYM_DATA_END(efi32_boot_cs)
 SYM_DATA_START(efi32_boot_ds)
 	.word	0
 SYM_DATA_END(efi32_boot_ds)
-
-SYM_DATA_START(efi_gdt64)
-	.word	efi_gdt64_end - efi_gdt64
-	.long	0			/* Filled out by user */
-	.word	0
-	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
-	.quad	0x0080890000000000	/* TS descriptor */
-	.quad   0x0000000000000000	/* TS continued */
-SYM_DATA_END_LABEL(efi_gdt64, SYM_L_LOCAL, efi_gdt64_end)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index ab3307036ba4..03557f2174bf 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -49,16 +49,17 @@
  * Position Independent Executable (PIE) so that linker won't optimize
  * R_386_GOT32X relocation to its fixed symbol address.  Older
  * linkers generate R_386_32 relocations against locally defined symbols,
- * _bss, _ebss, _got and _egot, in PIE.  It isn't wrong, just less
+ * _bss, _ebss, _got, _egot and _end, in PIE.  It isn't wrong, just less
  * optimal than R_386_RELATIVE.  But the x86 kernel fails to properly handle
  * R_386_32 relocations when relocating the kernel.  To generate
- * R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as
+ * R_386_RELATIVE relocations, we mark _bss, _ebss, _got, _egot and _end as
  * hidden:
  */
 	.hidden _bss
 	.hidden _ebss
 	.hidden _got
 	.hidden _egot
+	.hidden _end
 
 	__HEAD
 SYM_FUNC_START(startup_32)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 4f7e6b84be07..e821a7d7d5c4 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -42,6 +42,7 @@
 	.hidden _ebss
 	.hidden _got
 	.hidden _egot
+	.hidden _end
 
 	__HEAD
 	.code32
@@ -393,6 +394,14 @@ SYM_CODE_START(startup_64)
 	addq	%rax, 2(%rax)
 	lgdt	(%rax)
 
+	/* Reload CS so IRET returns to a CS actually in the GDT */
+	pushq	$__KERNEL_CS
+	leaq	.Lon_kernel_cs(%rip), %rax
+	pushq	%rax
+	lretq
+
+.Lon_kernel_cs:
+
 	/*
 	 * paging_prepare() sets up the trampoline and checks if we need to
 	 * enable 5-level paging.
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 508cfa6828c5..8f1025d1f681 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -52,6 +52,7 @@ SECTIONS
 		_data = . ;
 		*(.data)
 		*(.data.*)
+		*(.bss.efistub)
 		_edata = . ;
 	}
 	. = ALIGN(L1_CACHE_BYTES);
@@ -73,4 +74,6 @@ SECTIONS
 #endif
 	. = ALIGN(PAGE_SIZE);	/* keep ZO size page aligned */
 	_end = .;
+
+	DISCARDS
 }
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 8272a4492844..8a3fff9128bb 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -117,7 +117,6 @@ static unsigned int simple_guess_base(const char *cp)
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
  */
-
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
 {
 	unsigned long long result = 0;
@@ -335,3 +334,45 @@ int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 		s++;
 	return _kstrtoull(s, base, res);
 }
+
+static int _kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	rv = kstrtoull(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (unsigned long)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+
+/**
+ * kstrtoul - convert a string to an unsigned long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the simple_strtoull.
+ */
+int boot_kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+	/*
+	 * We want to shortcut function call, but
+	 * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0.
+	 */
+	if (sizeof(unsigned long) == sizeof(unsigned long long) &&
+	    __alignof__(unsigned long) == __alignof__(unsigned long long))
+		return kstrtoull(s, base, (unsigned long long *)res);
+	else
+		return _kstrtoul(s, base, res);
+}
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 38d8f2f5e47e..995f7b7ad512 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -30,4 +30,5 @@ extern unsigned long long simple_strtoull(const char *cp, char **endp,
 					  unsigned int base);
 
 int kstrtoull(const char *s, unsigned int base, unsigned long long *res);
+int boot_kstrtoul(const char *s, unsigned int base, unsigned long *res);
 #endif /* BOOT_STRING_H */
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index cad6e1bfa7d5..54e7d15dbd0d 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2758,7 +2758,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
 	pxor INC, STATE4
 	movdqu IV, 0x30(OUTP)
 
-	CALL_NOSPEC %r11
+	CALL_NOSPEC r11
 
 	movdqu 0x00(OUTP), INC
 	pxor INC, STATE1
@@ -2803,7 +2803,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
 	_aesni_gf128mul_x_ble()
 	movups IV, (IVP)
 
-	CALL_NOSPEC %r11
+	CALL_NOSPEC r11
 
 	movdqu 0x40(OUTP), INC
 	pxor INC, STATE1
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index d01ddd73de65..ecc0a9a905c4 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -1228,7 +1228,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
 	vpxor 14 * 16(%rax), %xmm15, %xmm14;
 	vpxor 15 * 16(%rax), %xmm15, %xmm15;
 
-	CALL_NOSPEC %r9;
+	CALL_NOSPEC r9;
 
 	addq $(16 * 16), %rsp;
 
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 563ef6e83cdd..0907243c501c 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -1339,7 +1339,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way)
 	vpxor 14 * 32(%rax), %ymm15, %ymm14;
 	vpxor 15 * 32(%rax), %ymm15, %ymm15;
 
-	CALL_NOSPEC %r9;
+	CALL_NOSPEC r9;
 
 	addq $(16 * 32), %rsp;
 
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 0e6690e3618c..8501ec4532f4 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -75,7 +75,7 @@
 
 .text
 SYM_FUNC_START(crc_pcl)
-#define    bufp		%rdi
+#define    bufp		rdi
 #define    bufp_dw	%edi
 #define    bufp_w	%di
 #define    bufp_b	%dil
@@ -105,9 +105,9 @@ SYM_FUNC_START(crc_pcl)
 	## 1) ALIGN:
 	################################################################
 
-	mov     bufp, bufptmp		# rdi = *buf
-	neg     bufp
-	and     $7, bufp		# calculate the unalignment amount of
+	mov     %bufp, bufptmp		# rdi = *buf
+	neg     %bufp
+	and     $7, %bufp		# calculate the unalignment amount of
 					# the address
 	je      proc_block		# Skip if aligned
 
@@ -123,13 +123,13 @@ SYM_FUNC_START(crc_pcl)
 do_align:
 	#### Calculate CRC of unaligned bytes of the buffer (if any)
 	movq    (bufptmp), tmp		# load a quadward from the buffer
-	add     bufp, bufptmp		# align buffer pointer for quadword
+	add     %bufp, bufptmp		# align buffer pointer for quadword
 					# processing
-	sub     bufp, len		# update buffer length
+	sub     %bufp, len		# update buffer length
 align_loop:
 	crc32b  %bl, crc_init_dw 	# compute crc32 of 1-byte
 	shr     $8, tmp			# get next byte
-	dec     bufp
+	dec     %bufp
 	jne     align_loop
 
 proc_block:
@@ -169,10 +169,10 @@ continue_block:
 	xor     crc2, crc2
 
 	## branch into array
-	lea	jump_table(%rip), bufp
-	movzxw  (bufp, %rax, 2), len
-	lea	crc_array(%rip), bufp
-	lea     (bufp, len, 1), bufp
+	lea	jump_table(%rip), %bufp
+	movzxw  (%bufp, %rax, 2), len
+	lea	crc_array(%rip), %bufp
+	lea     (%bufp, len, 1), %bufp
 	JMP_NOSPEC bufp
 
 	################################################################
@@ -218,9 +218,9 @@ LABEL crc_ %i
 	## 4) Combine three results:
 	################################################################
 
-	lea	(K_table-8)(%rip), bufp		# first entry is for idx 1
+	lea	(K_table-8)(%rip), %bufp		# first entry is for idx 1
 	shlq    $3, %rax			# rax *= 8
-	pmovzxdq (bufp,%rax), %xmm0		# 2 consts: K1:K2
+	pmovzxdq (%bufp,%rax), %xmm0		# 2 consts: K1:K2
 	leal	(%eax,%eax,2), %eax		# rax *= 3 (total *24)
 	subq    %rax, tmp			# tmp -= rax*24
 
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index a801ffc10cbb..18200135603f 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -21,7 +21,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <crypto/sha1_base.h>
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 6394b5fe8db6..dd06249229e1 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -34,7 +34,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <crypto/sha256_base.h>
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 82cc1b3ced1d..b0b05c93409e 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -32,7 +32,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index b67bae7091d7..a5eed844e948 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -816,7 +816,7 @@ SYM_CODE_START(ret_from_fork)
 
 	/* kernel thread */
 1:	movl	%edi, %eax
-	CALL_NOSPEC %ebx
+	CALL_NOSPEC ebx
 	/*
 	 * A kernel thread is allowed to return here after successfully
 	 * calling do_execve().  Exit to userspace to complete the execve()
@@ -1501,7 +1501,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
 
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
-	CALL_NOSPEC %edi
+	CALL_NOSPEC edi
 	jmp	ret_from_exception
 SYM_CODE_END(common_exception_read_cr2)
 
@@ -1522,7 +1522,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception)
 
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
-	CALL_NOSPEC %edi
+	CALL_NOSPEC edi
 	jmp	ret_from_exception
 SYM_CODE_END(common_exception)
 
@@ -1536,7 +1536,6 @@ SYM_CODE_START(debug)
 	jmp	common_exception
 SYM_CODE_END(debug)
 
-#ifdef CONFIG_DOUBLEFAULT
 SYM_CODE_START(double_fault)
 1:
 	/*
@@ -1576,7 +1575,6 @@ SYM_CODE_START(double_fault)
 	hlt
 	jmp 1b
 SYM_CODE_END(double_fault)
-#endif
 
 /*
  * NMI is doubly nasty.  It can happen on the first instruction of
@@ -1693,14 +1691,6 @@ SYM_CODE_START(general_protection)
 	jmp	common_exception
 SYM_CODE_END(general_protection)
 
-#ifdef CONFIG_KVM_GUEST
-SYM_CODE_START(async_page_fault)
-	ASM_CLAC
-	pushl	$do_async_page_fault
-	jmp	common_exception_read_cr2
-SYM_CODE_END(async_page_fault)
-#endif
-
 SYM_CODE_START(rewind_stack_do_exit)
 	/* Prevent any naive code from trying to unwind to our caller. */
 	xorl	%ebp, %ebp
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3063aa9090f9..eead1e2bebd5 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -348,7 +348,7 @@ SYM_CODE_START(ret_from_fork)
 	/* kernel thread */
 	UNWIND_HINT_EMPTY
 	movq	%r12, %rdi
-	CALL_NOSPEC %rbx
+	CALL_NOSPEC rbx
 	/*
 	 * A kernel thread is allowed to return here after successfully
 	 * calling do_execve().  Exit to userspace to complete the execve()
@@ -1202,10 +1202,6 @@ idtentry xendebug		do_debug		has_error_code=0
 idtentry general_protection	do_general_protection	has_error_code=1
 idtentry page_fault		do_page_fault		has_error_code=1	read_cr2=1
 
-#ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault	do_async_page_fault	has_error_code=1	read_cr2=1
-#endif
-
 #ifdef CONFIG_X86_MCE
 idtentry machine_check		do_mce			has_error_code=0	paranoid=1
 #endif
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 54581ac671b4..d8f8a1a69ed1 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -442,3 +442,4 @@
 435	i386	clone3			sys_clone3
 437	i386	openat2			sys_openat2
 438	i386	pidfd_getfd		sys_pidfd_getfd
+439	i386	faccessat2		sys_faccessat2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 37b844f839bc..78847b32e137 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -359,6 +359,7 @@
 435	common	clone3			sys_clone3
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd		sys_pidfd_getfd
+439	common	faccessat2		sys_faccessat2
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 433a1259f61d..54e03ab26ff3 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -24,6 +24,8 @@ VDSO32-$(CONFIG_IA32_EMULATION)	:= y
 
 # files to link into the vdso
 vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
+vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
+vobjs32-y += vdso32/vclock_gettime.o
 
 # files to link into kernel
 obj-y				+= vma.o
@@ -37,10 +39,12 @@ vdso_img-$(VDSO32-y)		+= 32
 obj-$(VDSO32-y)			+= vdso32-setup.o
 
 vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F)
 
 $(obj)/vdso.o: $(obj)/vdso.so
 
 targets += vdso.lds $(vobjs-y)
+targets += vdso32/vdso32.lds $(vobjs32-y)
 
 # Build the vDSO image C files and link them in.
 vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
@@ -130,10 +134,6 @@ $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
 CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
 
-targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
-targets += vdso32/vclock_gettime.o
-
 KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
 $(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
 $(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32
@@ -158,12 +158,7 @@ endif
 
 $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
 
-$(obj)/vdso32.so.dbg: FORCE \
-		      $(obj)/vdso32/vdso32.lds \
-		      $(obj)/vdso32/vclock_gettime.o \
-		      $(obj)/vdso32/note.o \
-		      $(obj)/vdso32/system_call.o \
-		      $(obj)/vdso32/sigreturn.o
+$(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE
 	$(call if_changed,vdso_and_check)
 
 #
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 3842873b3ae3..7380908045c7 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -187,7 +187,7 @@ static void map_input(const char *name, void **addr, size_t *len, int prot)
 
 	int fd = open(name, O_RDONLY);
 	if (fd == -1)
-		err(1, "%s", name);
+		err(1, "open(%s)", name);
 
 	tmp_len = lseek(fd, 0, SEEK_END);
 	if (tmp_len == (off_t)-1)
@@ -240,7 +240,7 @@ int main(int argc, char **argv)
 	outfilename = argv[3];
 	outfile = fopen(outfilename, "w");
 	if (!outfile)
-		err(1, "%s", argv[2]);
+		err(1, "fopen(%s)", outfilename);
 
 	go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
 
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index a20b134de2a8..6f46e11ce539 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -13,8 +13,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	unsigned long load_size = -1;  /* Work around bogus warning */
 	unsigned long mapping_size;
 	ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
-	int i;
-	unsigned long j;
+	unsigned long i, syms_nr;
 	ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
 		*alt_sec = NULL;
 	ELF(Dyn) *dyn = 0, *dyn_end = 0;
@@ -86,11 +85,10 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
 		GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
 
+	syms_nr = GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
 	/* Walk the symbol table */
-	for (i = 0;
-	     i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
-	     i++) {
-		int k;
+	for (i = 0; i < syms_nr; i++) {
+		unsigned int k;
 		ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
 			GET_LE(&symtab_hdr->sh_entsize) * i;
 		const char *sym_name = raw_addr +
@@ -150,11 +148,11 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	fprintf(outfile,
 		"static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
 		mapping_size);
-	for (j = 0; j < stripped_len; j++) {
-		if (j % 10 == 0)
+	for (i = 0; i < stripped_len; i++) {
+		if (i % 10 == 0)
 			fprintf(outfile, "\n\t");
 		fprintf(outfile, "0x%02X, ",
-			(int)((unsigned char *)stripped_addr)[j]);
+			(int)((unsigned char *)stripped_addr)[i]);
 	}
 	fprintf(outfile, "\n};\n\n");
 
diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
index 9a7a1446cb3a..4a809c6cbd2f 100644
--- a/arch/x86/events/Kconfig
+++ b/arch/x86/events/Kconfig
@@ -10,11 +10,11 @@ config PERF_EVENTS_INTEL_UNCORE
 	available on NehalemEX and more modern processors.
 
 config PERF_EVENTS_INTEL_RAPL
-	tristate "Intel rapl performance events"
-	depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+	tristate "Intel/AMD rapl performance events"
+	depends on PERF_EVENTS && (CPU_SUP_INTEL || CPU_SUP_AMD) && PCI
 	default y
 	---help---
-	Include support for Intel rapl performance events for power
+	Include support for Intel and AMD rapl performance events for power
 	monitoring on modern processors.
 
 config PERF_EVENTS_INTEL_CSTATE
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index 9e07f554333f..12c42eba77ec 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-y					+= core.o probe.o
+obj-$(PERF_EVENTS_INTEL_RAPL)		+= rapl.o
 obj-y					+= amd/
 obj-$(CONFIG_X86_LOCAL_APIC)            += msr.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/
+obj-$(CONFIG_CPU_SUP_CENTAUR)		+= zhaoxin/
+obj-$(CONFIG_CPU_SUP_ZHAOXIN)		+= zhaoxin/
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index a619763e96e1..9e63ee50b19a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1839,6 +1839,10 @@ static int __init init_hw_perf_events(void)
 		err = amd_pmu_init();
 		x86_pmu.name = "HYGON";
 		break;
+	case X86_VENDOR_ZHAOXIN:
+	case X86_VENDOR_CENTAUR:
+		err = zhaoxin_pmu_init();
+		break;
 	default:
 		err = -ENOTSUPP;
 	}
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index 3468b0c1dc7c..e67a5886336c 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -2,8 +2,6 @@
 obj-$(CONFIG_CPU_SUP_INTEL)		+= core.o bts.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= ds.o knc.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= lbr.o p4.o p6.o pt.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)	+= intel-rapl-perf.o
-intel-rapl-perf-objs			:= rapl.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel-uncore.o
 intel-uncore-objs			:= uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE)	+= intel-cstate.o
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 6a3b599ee0fe..731dd8d0dbb1 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -58,7 +58,7 @@ struct bts_buffer {
 	local_t		head;
 	unsigned long	end;
 	void		**data_pages;
-	struct bts_phys	buf[0];
+	struct bts_phys	buf[];
 };
 
 static struct pmu bts_pmu;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 332954cccece..ca35c8b5ee10 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1892,8 +1892,8 @@ static __initconst const u64 tnt_hw_cache_extra_regs
 
 static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0),
-	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1),
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1),
 	EVENT_EXTRA_END
 };
 
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 1db7a51d9792..e94af4a54d0d 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -226,8 +226,6 @@ static int __init pt_pmu_hw_init(void)
 			pt_pmu.vmx = true;
 	}
 
-	attrs = NULL;
-
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
 		cpuid_count(20, i,
 			    &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM],
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 0da4a4605536..b469ddd45515 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -130,7 +130,7 @@ struct intel_uncore_box {
 	struct list_head list;
 	struct list_head active_list;
 	void __iomem *io_addr;
-	struct intel_uncore_extra_reg shared_regs[0];
+	struct intel_uncore_extra_reg shared_regs[];
 };
 
 /* CFL uncore 8th cbox MSRs */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f1cd1ca1a77b..e17a3d8a47ed 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -618,6 +618,7 @@ struct x86_pmu {
 
 	/* PMI handler bits */
 	unsigned int	late_ack		:1,
+			enabled_ack		:1,
 			counter_freezing	:1;
 	/*
 	 * sysfs attrs
@@ -1133,3 +1134,12 @@ static inline int is_ht_workaround_enabled(void)
 	return 0;
 }
 #endif /* CONFIG_CPU_SUP_INTEL */
+
+#if ((defined CONFIG_CPU_SUP_CENTAUR) || (defined CONFIG_CPU_SUP_ZHAOXIN))
+int zhaoxin_pmu_init(void);
+#else
+static inline int zhaoxin_pmu_init(void)
+{
+	return 0;
+}
+#endif /*CONFIG_CPU_SUP_CENTAUR or CONFIG_CPU_SUP_ZHAOXIN*/
diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c
index c2ede2f3b277..136a1e847254 100644
--- a/arch/x86/events/probe.c
+++ b/arch/x86/events/probe.c
@@ -10,6 +10,11 @@ not_visible(struct kobject *kobj, struct attribute *attr, int i)
 	return 0;
 }
 
+/*
+ * Accepts msr[] array with non populated entries as long as either
+ * msr[i].msr is 0 or msr[i].grp is NULL. Note that the default sysfs
+ * visibility is visible when group->is_visible callback is set.
+ */
 unsigned long
 perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
 {
@@ -24,8 +29,16 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
 		if (!msr[bit].no_check) {
 			struct attribute_group *grp = msr[bit].grp;
 
+			/* skip entry with no group */
+			if (!grp)
+				continue;
+
 			grp->is_visible = not_visible;
 
+			/* skip unpopulated entry */
+			if (!msr[bit].msr)
+				continue;
+
 			if (msr[bit].test && !msr[bit].test(bit, data))
 				continue;
 			/* Virt sucks; you cannot tell if a R/O MSR is present :/ */
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/rapl.c
index a5dbd25852cb..0f2bf59f4354 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -1,11 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Support Intel RAPL energy consumption counters
+ * Support Intel/AMD RAPL energy consumption counters
  * Copyright (C) 2013 Google, Inc., Stephane Eranian
  *
  * Intel RAPL interface is specified in the IA-32 Manual Vol3b
  * section 14.7.1 (September 2013)
  *
+ * AMD RAPL interface for Fam17h is described in the public PPR:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=206537
+ *
  * RAPL provides more controls than just reporting energy consumption
  * however here we only expose the 3 energy consumption free running
  * counters (pp0, pkg, dram).
@@ -58,8 +61,8 @@
 #include <linux/nospec.h>
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
-#include "../perf_event.h"
-#include "../probe.h"
+#include "perf_event.h"
+#include "probe.h"
 
 MODULE_LICENSE("GPL");
 
@@ -128,7 +131,9 @@ struct rapl_pmus {
 };
 
 struct rapl_model {
+	struct perf_msr *rapl_msrs;
 	unsigned long	events;
+	unsigned int	msr_power_unit;
 	bool		apply_quirk;
 };
 
@@ -138,7 +143,7 @@ static struct rapl_pmus *rapl_pmus;
 static cpumask_t rapl_cpu_mask;
 static unsigned int rapl_cntr_mask;
 static u64 rapl_timer_ms;
-static struct perf_msr rapl_msrs[];
+static struct perf_msr *rapl_msrs;
 
 static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
 {
@@ -455,9 +460,16 @@ static struct attribute *rapl_events_cores[] = {
 	NULL,
 };
 
+static umode_t
+rapl_not_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+	return 0;
+}
+
 static struct attribute_group rapl_events_cores_group = {
 	.name  = "events",
 	.attrs = rapl_events_cores,
+	.is_visible = rapl_not_visible,
 };
 
 static struct attribute *rapl_events_pkg[] = {
@@ -470,6 +482,7 @@ static struct attribute *rapl_events_pkg[] = {
 static struct attribute_group rapl_events_pkg_group = {
 	.name  = "events",
 	.attrs = rapl_events_pkg,
+	.is_visible = rapl_not_visible,
 };
 
 static struct attribute *rapl_events_ram[] = {
@@ -482,6 +495,7 @@ static struct attribute *rapl_events_ram[] = {
 static struct attribute_group rapl_events_ram_group = {
 	.name  = "events",
 	.attrs = rapl_events_ram,
+	.is_visible = rapl_not_visible,
 };
 
 static struct attribute *rapl_events_gpu[] = {
@@ -494,6 +508,7 @@ static struct attribute *rapl_events_gpu[] = {
 static struct attribute_group rapl_events_gpu_group = {
 	.name  = "events",
 	.attrs = rapl_events_gpu,
+	.is_visible = rapl_not_visible,
 };
 
 static struct attribute *rapl_events_psys[] = {
@@ -506,6 +521,7 @@ static struct attribute *rapl_events_psys[] = {
 static struct attribute_group rapl_events_psys_group = {
 	.name  = "events",
 	.attrs = rapl_events_psys,
+	.is_visible = rapl_not_visible,
 };
 
 static bool test_msr(int idx, void *data)
@@ -513,7 +529,7 @@ static bool test_msr(int idx, void *data)
 	return test_bit(idx, (unsigned long *) data);
 }
 
-static struct perf_msr rapl_msrs[] = {
+static struct perf_msr intel_rapl_msrs[] = {
 	[PERF_RAPL_PP0]  = { MSR_PP0_ENERGY_STATUS,      &rapl_events_cores_group, test_msr },
 	[PERF_RAPL_PKG]  = { MSR_PKG_ENERGY_STATUS,      &rapl_events_pkg_group,   test_msr },
 	[PERF_RAPL_RAM]  = { MSR_DRAM_ENERGY_STATUS,     &rapl_events_ram_group,   test_msr },
@@ -521,6 +537,16 @@ static struct perf_msr rapl_msrs[] = {
 	[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group,  test_msr },
 };
 
+/*
+ * Force to PERF_RAPL_MAX size due to:
+ * - perf_msr_probe(PERF_RAPL_MAX)
+ * - want to use same event codes across both architectures
+ */
+static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = {
+	[PERF_RAPL_PKG]  = { MSR_AMD_PKG_ENERGY_STATUS,  &rapl_events_pkg_group,   test_msr },
+};
+
+
 static int rapl_cpu_offline(unsigned int cpu)
 {
 	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
@@ -575,13 +601,13 @@ static int rapl_cpu_online(unsigned int cpu)
 	return 0;
 }
 
-static int rapl_check_hw_unit(bool apply_quirk)
+static int rapl_check_hw_unit(struct rapl_model *rm)
 {
 	u64 msr_rapl_power_unit_bits;
 	int i;
 
 	/* protect rdmsrl() to handle virtualization */
-	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+	if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits))
 		return -1;
 	for (i = 0; i < NR_RAPL_DOMAINS; i++)
 		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
@@ -592,7 +618,7 @@ static int rapl_check_hw_unit(bool apply_quirk)
 	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
 	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
 	 */
-	if (apply_quirk)
+	if (rm->apply_quirk)
 		rapl_hw_unit[PERF_RAPL_RAM] = 16;
 
 	/*
@@ -673,6 +699,8 @@ static struct rapl_model model_snb = {
 			  BIT(PERF_RAPL_PKG) |
 			  BIT(PERF_RAPL_PP1),
 	.apply_quirk	= false,
+	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+	.rapl_msrs      = intel_rapl_msrs,
 };
 
 static struct rapl_model model_snbep = {
@@ -680,6 +708,8 @@ static struct rapl_model model_snbep = {
 			  BIT(PERF_RAPL_PKG) |
 			  BIT(PERF_RAPL_RAM),
 	.apply_quirk	= false,
+	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+	.rapl_msrs      = intel_rapl_msrs,
 };
 
 static struct rapl_model model_hsw = {
@@ -688,6 +718,8 @@ static struct rapl_model model_hsw = {
 			  BIT(PERF_RAPL_RAM) |
 			  BIT(PERF_RAPL_PP1),
 	.apply_quirk	= false,
+	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+	.rapl_msrs      = intel_rapl_msrs,
 };
 
 static struct rapl_model model_hsx = {
@@ -695,12 +727,16 @@ static struct rapl_model model_hsx = {
 			  BIT(PERF_RAPL_PKG) |
 			  BIT(PERF_RAPL_RAM),
 	.apply_quirk	= true,
+	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+	.rapl_msrs      = intel_rapl_msrs,
 };
 
 static struct rapl_model model_knl = {
 	.events		= BIT(PERF_RAPL_PKG) |
 			  BIT(PERF_RAPL_RAM),
 	.apply_quirk	= true,
+	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+	.rapl_msrs      = intel_rapl_msrs,
 };
 
 static struct rapl_model model_skl = {
@@ -710,6 +746,15 @@ static struct rapl_model model_skl = {
 			  BIT(PERF_RAPL_PP1) |
 			  BIT(PERF_RAPL_PSYS),
 	.apply_quirk	= false,
+	.msr_power_unit = MSR_RAPL_POWER_UNIT,
+	.rapl_msrs      = intel_rapl_msrs,
+};
+
+static struct rapl_model model_amd_fam17h = {
+	.events		= BIT(PERF_RAPL_PKG),
+	.apply_quirk	= false,
+	.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
+	.rapl_msrs      = amd_rapl_msrs,
 };
 
 static const struct x86_cpu_id rapl_model_match[] __initconst = {
@@ -738,8 +783,11 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,	&model_hsw),
 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		&model_skl),
 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		&model_skl),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&model_hsx),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&model_hsx),
 	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,		&model_skl),
 	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,		&model_skl),
+	X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
 	{},
 };
 MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
@@ -755,10 +803,13 @@ static int __init rapl_pmu_init(void)
 		return -ENODEV;
 
 	rm = (struct rapl_model *) id->driver_data;
+
+	rapl_msrs = rm->rapl_msrs;
+
 	rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
 					false, (void *) &rm->events);
 
-	ret = rapl_check_hw_unit(rm->apply_quirk);
+	ret = rapl_check_hw_unit(rm);
 	if (ret)
 		return ret;
 
diff --git a/arch/x86/events/zhaoxin/Makefile b/arch/x86/events/zhaoxin/Makefile
new file mode 100644
index 000000000000..642c1174d662
--- /dev/null
+++ b/arch/x86/events/zhaoxin/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y	+= core.o
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
new file mode 100644
index 000000000000..898fa1ae9ceb
--- /dev/null
+++ b/arch/x86/events/zhaoxin/core.c
@@ -0,0 +1,613 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Zhoaxin PMU; like Intel Architectural PerfMon-v2
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/nmi.h>
+
+#include <asm/cpufeature.h>
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+/*
+ * Zhaoxin PerfMon, used on zxc and later.
+ */
+static u64 zx_pmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = {
+
+	[PERF_COUNT_HW_CPU_CYCLES]        = 0x0082,
+	[PERF_COUNT_HW_INSTRUCTIONS]      = 0x00c0,
+	[PERF_COUNT_HW_CACHE_REFERENCES]  = 0x0515,
+	[PERF_COUNT_HW_CACHE_MISSES]      = 0x051a,
+	[PERF_COUNT_HW_BUS_CYCLES]        = 0x0083,
+};
+
+static struct event_constraint zxc_event_constraints[] __read_mostly = {
+
+	FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint zxd_event_constraints[] __read_mostly = {
+
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* retired instructions */
+	FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
+	FIXED_EVENT_CONSTRAINT(0x0083, 2), /* unhalted bus clock cycles */
+	EVENT_CONSTRAINT_END
+};
+
+static __initconst const u64 zxd_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0042,
+		[C(RESULT_MISS)] = 0x0538,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = 0x0043,
+		[C(RESULT_MISS)] = 0x0562,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0300,
+		[C(RESULT_MISS)] = 0x0301,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = 0x030a,
+		[C(RESULT_MISS)] = 0x030b,
+	},
+},
+[C(LL)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0042,
+		[C(RESULT_MISS)] = 0x052c,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = 0x0043,
+		[C(RESULT_MISS)] = 0x0530,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = 0x0564,
+		[C(RESULT_MISS)] = 0x0565,
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x00c0,
+		[C(RESULT_MISS)] = 0x0534,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+},
+[C(BPU)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0700,
+		[C(RESULT_MISS)] = 0x0709,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+},
+[C(NODE)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+},
+};
+
+static __initconst const u64 zxe_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0568,
+		[C(RESULT_MISS)] = 0x054b,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = 0x0669,
+		[C(RESULT_MISS)] = 0x0562,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0300,
+		[C(RESULT_MISS)] = 0x0301,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = 0x030a,
+		[C(RESULT_MISS)] = 0x030b,
+	},
+},
+[C(LL)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0,
+		[C(RESULT_MISS)] = 0x0,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = 0x0,
+		[C(RESULT_MISS)] = 0x0,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = 0x0,
+		[C(RESULT_MISS)] = 0x0,
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0568,
+		[C(RESULT_MISS)] = 0x052c,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = 0x0669,
+		[C(RESULT_MISS)] = 0x0530,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = 0x0564,
+		[C(RESULT_MISS)] = 0x0565,
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x00c0,
+		[C(RESULT_MISS)] = 0x0534,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+},
+[C(BPU)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0028,
+		[C(RESULT_MISS)] = 0x0029,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+},
+[C(NODE)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)] = -1,
+	},
+},
+};
+
+static void zhaoxin_pmu_disable_all(void)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+}
+
+static void zhaoxin_pmu_enable_all(int added)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+}
+
+static inline u64 zhaoxin_pmu_get_status(void)
+{
+	u64 status;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+	return status;
+}
+
+static inline void zhaoxin_pmu_ack_status(u64 ack)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static inline void zxc_pmu_ack_status(u64 ack)
+{
+	/*
+	 * ZXC needs global control enabled in order to clear status bits.
+	 */
+	zhaoxin_pmu_enable_all(0);
+	zhaoxin_pmu_ack_status(ack);
+	zhaoxin_pmu_disable_all();
+}
+
+static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
+{
+	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+	u64 ctrl_val, mask;
+
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void zhaoxin_pmu_disable_event(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		zhaoxin_pmu_disable_fixed(hwc);
+		return;
+	}
+
+	x86_pmu_disable_event(event);
+}
+
+static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
+{
+	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+	u64 ctrl_val, bits, mask;
+
+	/*
+	 * Enable IRQ generation (0x8),
+	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+	 * if requested:
+	 */
+	bits = 0x8ULL;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+		bits |= 0x2;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+		bits |= 0x1;
+
+	bits <<= (idx * 4);
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	ctrl_val |= bits;
+	wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void zhaoxin_pmu_enable_event(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		zhaoxin_pmu_enable_fixed(hwc);
+		return;
+	}
+
+	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int zhaoxin_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	int handled = 0;
+	u64 status;
+	int bit;
+
+	cpuc = this_cpu_ptr(&cpu_hw_events);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	zhaoxin_pmu_disable_all();
+	status = zhaoxin_pmu_get_status();
+	if (!status)
+		goto done;
+
+again:
+	if (x86_pmu.enabled_ack)
+		zxc_pmu_ack_status(status);
+	else
+		zhaoxin_pmu_ack_status(status);
+
+	inc_irq_stat(apic_perf_irqs);
+
+	/*
+	 * CondChgd bit 63 doesn't mean any overflow status. Ignore
+	 * and clear the bit.
+	 */
+	if (__test_and_clear_bit(63, (unsigned long *)&status)) {
+		if (!status)
+			goto done;
+	}
+
+	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+		struct perf_event *event = cpuc->events[bit];
+
+		handled++;
+
+		if (!test_bit(bit, cpuc->active_mask))
+			continue;
+
+		x86_perf_event_update(event);
+		perf_sample_data_init(&data, 0, event->hw.last_period);
+
+		if (!x86_perf_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			x86_pmu_stop(event, 0);
+	}
+
+	/*
+	 * Repeat if there is more work to be done:
+	 */
+	status = zhaoxin_pmu_get_status();
+	if (status)
+		goto again;
+
+done:
+	zhaoxin_pmu_enable_all(0);
+	return handled;
+}
+
+static u64 zhaoxin_pmu_event_map(int hw_event)
+{
+	return zx_pmon_event_map[hw_event];
+}
+
+static struct event_constraint *
+zhaoxin_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	if (x86_pmu.event_constraints) {
+		for_each_event_constraint(c, x86_pmu.event_constraints) {
+			if ((event->hw.config & c->cmask) == c->code)
+				return c;
+		}
+	}
+
+	return &unconstrained;
+}
+
+PMU_FORMAT_ATTR(event,	"config:0-7");
+PMU_FORMAT_ATTR(umask,	"config:8-15");
+PMU_FORMAT_ATTR(edge,	"config:18");
+PMU_FORMAT_ATTR(inv,	"config:23");
+PMU_FORMAT_ATTR(cmask,	"config:24-31");
+
+static struct attribute *zx_arch_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_cmask.attr,
+	NULL,
+};
+
+static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config)
+{
+	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
+
+	return x86_event_sysfs_show(page, config, event);
+}
+
+static const struct x86_pmu zhaoxin_pmu __initconst = {
+	.name			= "zhaoxin",
+	.handle_irq		= zhaoxin_pmu_handle_irq,
+	.disable_all		= zhaoxin_pmu_disable_all,
+	.enable_all		= zhaoxin_pmu_enable_all,
+	.enable			= zhaoxin_pmu_enable_event,
+	.disable		= zhaoxin_pmu_disable_event,
+	.hw_config		= x86_pmu_hw_config,
+	.schedule_events	= x86_schedule_events,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= zhaoxin_pmu_event_map,
+	.max_events		= ARRAY_SIZE(zx_pmon_event_map),
+	.apic			= 1,
+	/*
+	 * For zxd/zxe, read/write operation for PMCx MSR is 48 bits.
+	 */
+	.max_period		= (1ULL << 47) - 1,
+	.get_event_constraints	= zhaoxin_get_event_constraints,
+
+	.format_attrs		= zx_arch_formats_attr,
+	.events_sysfs_show	= zhaoxin_event_sysfs_show,
+};
+
+static const struct { int id; char *name; } zx_arch_events_map[] __initconst = {
+	{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
+	{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
+	{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
+	{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
+	{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
+	{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
+	{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
+};
+
+static __init void zhaoxin_arch_events_quirk(void)
+{
+	int bit;
+
+	/* disable event that reported as not presend by cpuid */
+	for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) {
+		zx_pmon_event_map[zx_arch_events_map[bit].id] = 0;
+		pr_warn("CPUID marked event: \'%s\' unavailable\n",
+			zx_arch_events_map[bit].name);
+	}
+}
+
+__init int zhaoxin_pmu_init(void)
+{
+	union cpuid10_edx edx;
+	union cpuid10_eax eax;
+	union cpuid10_ebx ebx;
+	struct event_constraint *c;
+	unsigned int unused;
+	int version;
+
+	pr_info("Welcome to zhaoxin pmu!\n");
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * hw_event or not.
+	 */
+	cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
+
+	if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT - 1)
+		return -ENODEV;
+
+	version = eax.split.version_id;
+	if (version != 2)
+		return -ENODEV;
+
+	x86_pmu = zhaoxin_pmu;
+	pr_info("Version check pass!\n");
+
+	x86_pmu.version			= version;
+	x86_pmu.num_counters		= eax.split.num_counters;
+	x86_pmu.cntval_bits		= eax.split.bit_width;
+	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
+	x86_pmu.events_maskl		= ebx.full;
+	x86_pmu.events_mask_len		= eax.split.mask_length;
+
+	x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
+	x86_add_quirk(zhaoxin_arch_events_quirk);
+
+	switch (boot_cpu_data.x86) {
+	case 0x06:
+		if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {
+
+			x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
+
+			/* Clearing status works only if the global control is enable on zxc. */
+			x86_pmu.enabled_ack = 1;
+
+			x86_pmu.event_constraints = zxc_event_constraints;
+			zx_pmon_event_map[PERF_COUNT_HW_INSTRUCTIONS] = 0;
+			zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0;
+			zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0;
+			zx_pmon_event_map[PERF_COUNT_HW_BUS_CYCLES] = 0;
+
+			pr_cont("ZXC events, ");
+			break;
+		}
+		return -ENODEV;
+
+	case 0x07:
+		zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+			X86_CONFIG(.event = 0x01, .umask = 0x01, .inv = 0x01, .cmask = 0x01);
+
+		zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+			X86_CONFIG(.event = 0x0f, .umask = 0x04, .inv = 0, .cmask = 0);
+
+		switch (boot_cpu_data.x86_model) {
+		case 0x1b:
+			memcpy(hw_cache_event_ids, zxd_hw_cache_event_ids,
+			       sizeof(hw_cache_event_ids));
+
+			x86_pmu.event_constraints = zxd_event_constraints;
+
+			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700;
+			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709;
+
+			pr_cont("ZXD events, ");
+			break;
+		case 0x3b:
+			memcpy(hw_cache_event_ids, zxe_hw_cache_event_ids,
+			       sizeof(hw_cache_event_ids));
+
+			x86_pmu.event_constraints = zxd_event_constraints;
+
+			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0028;
+			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0029;
+
+			pr_cont("ZXE events, ");
+			break;
+		default:
+			return -ENODEV;
+		}
+		break;
+
+	default:
+		return -ENODEV;
+	}
+
+	x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1;
+	x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+
+	if (x86_pmu.event_constraints) {
+		for_each_event_constraint(c, x86_pmu.event_constraints) {
+			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+			c->weight += x86_pmu.num_counters;
+		}
+	}
+
+	return 0;
+}
+
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index acf76b466db6..e2137070386a 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -97,8 +97,7 @@ static int hv_cpu_init(unsigned int cpu)
 	 * not be stopped in the case of CPU offlining and the VM will hang.
 	 */
 	if (!*hvp) {
-		*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO,
-				 PAGE_KERNEL);
+		*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
 	}
 
 	if (*hvp) {
@@ -379,7 +378,7 @@ void __init hyperv_init(void)
 	guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
 	wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
 
-	hv_hypercall_pg  = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
+	hv_hypercall_pg = vmalloc_exec(PAGE_SIZE);
 	if (hv_hypercall_pg == NULL) {
 		wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
 		goto remove_cpuhp_state;
diff --git a/arch/x86/ia32/audit.c b/arch/x86/ia32/audit.c
index 3d21eab7aaed..6efe6cb3768a 100644
--- a/arch/x86/ia32/audit.c
+++ b/arch/x86/ia32/audit.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <asm/unistd_32.h>
+#include <asm/audit.h>
 
 unsigned ia32_dir_class[] = {
 #include <asm-generic/audit_dir_write.h>
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index f9d8804144d0..81cf22398cd1 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -350,7 +350,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
 	unsafe_put_user(*(__u64 *)set, (__u64 *)&frame->uc.uc_sigmask, Efault);
 	user_access_end();
 
-	if (__copy_siginfo_to_user32(&frame->info, &ksig->info, false))
+	if (__copy_siginfo_to_user32(&frame->info, &ksig->info))
 		return -EFAULT;
 
 	/* Set up registers for signal handler */
diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h
new file mode 100644
index 000000000000..1b07fb102c4e
--- /dev/null
+++ b/arch/x86/include/asm/GEN-for-each-reg.h
@@ -0,0 +1,25 @@
+#ifdef CONFIG_64BIT
+GEN(rax)
+GEN(rbx)
+GEN(rcx)
+GEN(rdx)
+GEN(rsi)
+GEN(rdi)
+GEN(rbp)
+GEN(r8)
+GEN(r9)
+GEN(r10)
+GEN(r11)
+GEN(r12)
+GEN(r13)
+GEN(r14)
+GEN(r15)
+#else
+GEN(eax)
+GEN(ebx)
+GEN(ecx)
+GEN(edx)
+GEN(esi)
+GEN(edi)
+GEN(ebp)
+#endif
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
index 99bb207fc04c..87ce8e963215 100644
--- a/arch/x86/include/asm/apb_timer.h
+++ b/arch/x86/include/asm/apb_timer.h
@@ -25,11 +25,7 @@
 #define APBT_MIN_FREQ          1000000
 #define APBT_MMAP_SIZE         1024
 
-#define APBT_DEV_USED  1
-
 extern void apbt_time_init(void);
-extern unsigned long apbt_quick_calibrate(void);
-extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
 extern void apbt_setup_secondary_clock(void);
 
 extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
@@ -38,7 +34,6 @@ extern int sfi_mtimer_num;
 
 #else /* CONFIG_APB_TIMER */
 
-static inline unsigned long apbt_quick_calibrate(void) {return 0; }
 static inline void apbt_time_init(void) { }
 
 #endif
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 7a4bb1bd4bdb..ebc248e49549 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -15,16 +15,6 @@
 
 #define RDRAND_RETRY_LOOPS	10
 
-#define RDRAND_INT	".byte 0x0f,0xc7,0xf0"
-#define RDSEED_INT	".byte 0x0f,0xc7,0xf8"
-#ifdef CONFIG_X86_64
-# define RDRAND_LONG	".byte 0x48,0x0f,0xc7,0xf0"
-# define RDSEED_LONG	".byte 0x48,0x0f,0xc7,0xf8"
-#else
-# define RDRAND_LONG	RDRAND_INT
-# define RDSEED_LONG	RDSEED_INT
-#endif
-
 /* Unconditional execution of RDRAND and RDSEED */
 
 static inline bool __must_check rdrand_long(unsigned long *v)
@@ -32,9 +22,9 @@ static inline bool __must_check rdrand_long(unsigned long *v)
 	bool ok;
 	unsigned int retry = RDRAND_RETRY_LOOPS;
 	do {
-		asm volatile(RDRAND_LONG
+		asm volatile("rdrand %[out]"
 			     CC_SET(c)
-			     : CC_OUT(c) (ok), "=a" (*v));
+			     : CC_OUT(c) (ok), [out] "=r" (*v));
 		if (ok)
 			return true;
 	} while (--retry);
@@ -46,9 +36,9 @@ static inline bool __must_check rdrand_int(unsigned int *v)
 	bool ok;
 	unsigned int retry = RDRAND_RETRY_LOOPS;
 	do {
-		asm volatile(RDRAND_INT
+		asm volatile("rdrand %[out]"
 			     CC_SET(c)
-			     : CC_OUT(c) (ok), "=a" (*v));
+			     : CC_OUT(c) (ok), [out] "=r" (*v));
 		if (ok)
 			return true;
 	} while (--retry);
@@ -58,18 +48,18 @@ static inline bool __must_check rdrand_int(unsigned int *v)
 static inline bool __must_check rdseed_long(unsigned long *v)
 {
 	bool ok;
-	asm volatile(RDSEED_LONG
+	asm volatile("rdseed %[out]"
 		     CC_SET(c)
-		     : CC_OUT(c) (ok), "=a" (*v));
+		     : CC_OUT(c) (ok), [out] "=r" (*v));
 	return ok;
 }
 
 static inline bool __must_check rdseed_int(unsigned int *v)
 {
 	bool ok;
-	asm volatile(RDSEED_INT
+	asm volatile("rdseed %[out]"
 		     CC_SET(c)
-		     : CC_OUT(c) (ok), "=a" (*v));
+		     : CC_OUT(c) (ok), [out] "=r" (*v));
 	return ok;
 }
 
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index ce92c4acc913..9bf2620ce817 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -17,24 +17,19 @@ extern void cmpxchg8b_emu(void);
 #endif
 
 #ifdef CONFIG_RETPOLINE
-#ifdef CONFIG_X86_32
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
-#else
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
-INDIRECT_THUNK(8)
-INDIRECT_THUNK(9)
-INDIRECT_THUNK(10)
-INDIRECT_THUNK(11)
-INDIRECT_THUNK(12)
-INDIRECT_THUNK(13)
-INDIRECT_THUNK(14)
-INDIRECT_THUNK(15)
-#endif
-INDIRECT_THUNK(ax)
-INDIRECT_THUNK(bx)
-INDIRECT_THUNK(cx)
-INDIRECT_THUNK(dx)
-INDIRECT_THUNK(si)
-INDIRECT_THUNK(di)
-INDIRECT_THUNK(bp)
+
+#define DECL_INDIRECT_THUNK(reg) \
+	extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+
+#define DECL_RETPOLINE(reg) \
+	extern asmlinkage void __x86_retpoline_ ## reg (void);
+
+#undef GEN
+#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) DECL_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>
+
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h
new file mode 100644
index 000000000000..36aec57ea7a3
--- /dev/null
+++ b/arch/x86/include/asm/audit.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AUDIT_H
+#define _ASM_X86_AUDIT_H
+
+int ia32_classify_syscall(unsigned int syscall);
+
+#endif /* _ASM_X86_AUDIT_H */
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index d79d1e622dcf..0ada98d5d09f 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,4 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#define  _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
+#define HAVE_CSUM_COPY_USER
 #ifdef CONFIG_X86_32
 # include <asm/checksum_32.h>
 #else
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index f57b94e02c57..11624c8a9d8d 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -44,18 +44,21 @@ static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst,
 	return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
 }
 
-static inline __wsum csum_partial_copy_from_user(const void __user *src,
-						 void *dst,
-						 int len, __wsum sum,
-						 int *err_ptr)
+static inline __wsum csum_and_copy_from_user(const void __user *src,
+					     void *dst, int len,
+					     __wsum sum, int *err_ptr)
 {
 	__wsum ret;
 
 	might_sleep();
-	stac();
+	if (!user_access_begin(src, len)) {
+		if (len)
+			*err_ptr = -EFAULT;
+		return sum;
+	}
 	ret = csum_partial_copy_generic((__force void *)src, dst,
 					len, sum, err_ptr, NULL);
-	clac();
+	user_access_end();
 
 	return ret;
 }
@@ -173,7 +176,6 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 /*
  *	Copy and checksum to user
  */
-#define HAVE_CSUM_COPY_USER
 static inline __wsum csum_and_copy_to_user(const void *src,
 					   void __user *dst,
 					   int len, __wsum sum,
@@ -182,11 +184,10 @@ static inline __wsum csum_and_copy_to_user(const void *src,
 	__wsum ret;
 
 	might_sleep();
-	if (access_ok(dst, len)) {
-		stac();
+	if (user_access_begin(dst, len)) {
 		ret = csum_partial_copy_generic(src, (__force void *)dst,
 						len, sum, NULL, err_ptr);
-		clac();
+		user_access_end();
 		return ret;
 	}
 
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 3ec6d3267cf9..0a289b87e872 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -129,27 +129,19 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
  */
 extern __wsum csum_partial(const void *buff, int len, __wsum sum);
 
-#define  _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
-#define HAVE_CSUM_COPY_USER 1
-
-
 /* Do not call this directly. Use the wrappers below */
 extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst,
 					int len, __wsum sum,
 					int *src_err_ptr, int *dst_err_ptr);
 
 
-extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
 					  int len, __wsum isum, int *errp);
-extern __wsum csum_partial_copy_to_user(const void *src, void __user *dst,
+extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
 					int len, __wsum isum, int *errp);
 extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
 					int len, __wsum sum);
 
-/* Old names. To be removed. */
-#define csum_and_copy_to_user csum_partial_copy_to_user
-#define csum_and_copy_from_user csum_partial_copy_from_user
-
 /**
  * ip_compute_csum - Compute an 16bit IP checksum.
  * @buff: buffer address.
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 52e9f3480f69..d4edf281fff4 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -214,7 +214,11 @@ static inline bool in_compat_syscall(void)
 #endif
 
 struct compat_siginfo;
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-		const kernel_siginfo_t *from, bool x32_ABI);
+
+#ifdef CONFIG_X86_X32_ABI
+int copy_siginfo_to_user32(struct compat_siginfo __user *to,
+		const kernel_siginfo_t *from);
+#define copy_siginfo_to_user32 copy_siginfo_to_user32
+#endif /* CONFIG_X86_X32_ABI */
 
 #endif /* _ASM_X86_COMPAT_H */
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index cf3d621c6892..eb8fcede9e3b 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -20,12 +20,14 @@
 #define X86_CENTAUR_FAM6_C7_D		0xd
 #define X86_CENTAUR_FAM6_NANO		0xf
 
+#define X86_STEPPINGS(mins, maxs)    GENMASK(maxs, mins)
 /**
- * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Base macro for CPU matching
+ * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching
  * @_vendor:	The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
  *		The name is expanded to X86_VENDOR_@_vendor
  * @_family:	The family number or X86_FAMILY_ANY
  * @_model:	The model number, model constant or X86_MODEL_ANY
+ * @_steppings:	Bitmask for steppings, stepping constant or X86_STEPPING_ANY
  * @_feature:	A X86_FEATURE bit or X86_FEATURE_ANY
  * @_data:	Driver specific data or NULL. The internal storage
  *		format is unsigned long. The supplied value, pointer
@@ -37,16 +39,35 @@
  * into another macro at the usage site for good reasons, then please
  * start this local macro with X86_MATCH to allow easy grepping.
  */
-#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(_vendor, _family, _model,	\
-					   _feature, _data) {		\
+#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \
+						    _steppings, _feature, _data) { \
 	.vendor		= X86_VENDOR_##_vendor,				\
 	.family		= _family,					\
 	.model		= _model,					\
+	.steppings	= _steppings,					\
 	.feature	= _feature,					\
 	.driver_data	= (unsigned long) _data				\
 }
 
 /**
+ * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching
+ * @_vendor:	The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ *		The name is expanded to X86_VENDOR_@_vendor
+ * @_family:	The family number or X86_FAMILY_ANY
+ * @_model:	The model number, model constant or X86_MODEL_ANY
+ * @_feature:	A X86_FEATURE bit or X86_FEATURE_ANY
+ * @_data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is casted to unsigned long internally.
+ *
+ * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is
+ * set to wildcards.
+ */
+#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \
+	X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \
+						X86_STEPPING_ANY, feature, data)
+
+/**
  * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature
  * @vendor:	The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
  *		The name is expanded to X86_VENDOR_@vendor
@@ -139,6 +160,10 @@
 #define X86_MATCH_INTEL_FAM6_MODEL(model, data)				\
 	X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data)
 
+#define X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(model, steppings, data)	\
+	X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
+						     steppings, X86_FEATURE_ANY, data)
+
 /*
  * Match specific microcode revisions.
  *
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h
index de9e7841f953..630891d25819 100644
--- a/arch/x86/include/asm/delay.h
+++ b/arch/x86/include/asm/delay.h
@@ -3,8 +3,10 @@
 #define _ASM_X86_DELAY_H
 
 #include <asm-generic/delay.h>
+#include <linux/init.h>
 
-void use_tsc_delay(void);
+void __init use_tsc_delay(void);
+void __init use_tpause_delay(void);
 void use_mwaitx_delay(void);
 
 #endif /* _ASM_X86_DELAY_H */
diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h
index af9a14ac8962..54a6e4a2e132 100644
--- a/arch/x86/include/asm/doublefault.h
+++ b/arch/x86/include/asm/doublefault.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_DOUBLEFAULT_H
 #define _ASM_X86_DOUBLEFAULT_H
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
+#ifdef CONFIG_X86_32
 extern void doublefault_init_cpu_tss(void);
 #else
 static inline void doublefault_init_cpu_tss(void)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 8391c115c0ec..89dcc7aa7e2c 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -9,6 +9,7 @@
 #include <asm/nospec-branch.h>
 #include <asm/mmu_context.h>
 #include <linux/build_bug.h>
+#include <linux/kernel.h>
 
 extern unsigned long efi_fw_vendor, efi_config_table;
 
@@ -225,14 +226,21 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
 
 /* arch specific definitions used by the stub code */
 
-__attribute_const__ bool efi_is_64bit(void);
+#ifdef CONFIG_EFI_MIXED
+
+#define ARCH_HAS_EFISTUB_WRAPPERS
+
+static inline bool efi_is_64bit(void)
+{
+	extern const bool efi_is64;
+
+	return efi_is64;
+}
 
 static inline bool efi_is_native(void)
 {
 	if (!IS_ENABLED(CONFIG_X86_64))
 		return true;
-	if (!IS_ENABLED(CONFIG_EFI_MIXED))
-		return true;
 	return efi_is_64bit();
 }
 
@@ -286,6 +294,15 @@ static inline u32 efi64_convert_status(efi_status_t status)
 #define __efi64_argmap_allocate_pool(type, size, buffer)		\
 	((type), (size), efi64_zero_upper(buffer))
 
+#define __efi64_argmap_create_event(type, tpl, f, c, event)		\
+	((type), (tpl), (f), (c), efi64_zero_upper(event))
+
+#define __efi64_argmap_set_timer(event, type, time)			\
+	((event), (type), lower_32_bits(time), upper_32_bits(time))
+
+#define __efi64_argmap_wait_for_event(num, event, index)		\
+	((num), (event), efi64_zero_upper(index))
+
 #define __efi64_argmap_handle_protocol(handle, protocol, interface)	\
 	((handle), (protocol), efi64_zero_upper(interface))
 
@@ -307,6 +324,10 @@ static inline u32 efi64_convert_status(efi_status_t status)
 #define __efi64_argmap_load_file(protocol, path, policy, bufsize, buf)	\
 	((protocol), (path), (policy), efi64_zero_upper(bufsize), (buf))
 
+/* Graphics Output Protocol */
+#define __efi64_argmap_query_mode(gop, mode, size, info)		\
+	((gop), (mode), efi64_zero_upper(size), efi64_zero_upper(info))
+
 /*
  * The macros below handle the plumbing for the argument mapping. To add a
  * mapping for a specific EFI method, simply define a macro
@@ -335,15 +356,26 @@ static inline u32 efi64_convert_status(efi_status_t status)
 
 #define efi_bs_call(func, ...)						\
 	(efi_is_native()						\
-		? efi_system_table()->boottime->func(__VA_ARGS__)	\
-		: __efi64_thunk_map(efi_table_attr(efi_system_table(),	\
-				boottime), func, __VA_ARGS__))
+		? efi_system_table->boottime->func(__VA_ARGS__)		\
+		: __efi64_thunk_map(efi_table_attr(efi_system_table,	\
+						   boottime),		\
+				    func, __VA_ARGS__))
 
 #define efi_rt_call(func, ...)						\
 	(efi_is_native()						\
-		? efi_system_table()->runtime->func(__VA_ARGS__)	\
-		: __efi64_thunk_map(efi_table_attr(efi_system_table(),	\
-				runtime), func, __VA_ARGS__))
+		? efi_system_table->runtime->func(__VA_ARGS__)		\
+		: __efi64_thunk_map(efi_table_attr(efi_system_table,	\
+						   runtime),		\
+				    func, __VA_ARGS__))
+
+#else /* CONFIG_EFI_MIXED */
+
+static inline bool efi_is_64bit(void)
+{
+	return IS_ENABLED(CONFIG_X86_64);
+}
+
+#endif /* CONFIG_EFI_MIXED */
 
 extern bool efi_reboot_required(void);
 extern bool efi_is_table_address(unsigned long phys_addr);
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index 7ec59edde154..d43717b423cb 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -31,8 +31,8 @@
 #define CSW fd_routine[can_use_virtual_dma & 1]
 
 
-#define fd_inb(port)		inb_p(port)
-#define fd_outb(value, port)	outb_p(value, port)
+#define fd_inb(base, reg)		inb_p((base) + (reg))
+#define fd_outb(value, base, reg)	outb_p(value, (base) + (reg))
 
 #define fd_request_dma()	CSW._request_dma(FLOPPY_DMA, "floppy")
 #define fd_free_dma()		CSW._free_dma(FLOPPY_DMA)
@@ -77,25 +77,26 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
 		st = 1;
 		for (lcount = virtual_dma_count, lptr = virtual_dma_addr;
 		     lcount; lcount--, lptr++) {
-			st = inb(virtual_dma_port + 4) & 0xa0;
-			if (st != 0xa0)
+			st = inb(virtual_dma_port + FD_STATUS);
+			st &= STATUS_DMA | STATUS_READY;
+			if (st != (STATUS_DMA | STATUS_READY))
 				break;
 			if (virtual_dma_mode)
-				outb_p(*lptr, virtual_dma_port + 5);
+				outb_p(*lptr, virtual_dma_port + FD_DATA);
 			else
-				*lptr = inb_p(virtual_dma_port + 5);
+				*lptr = inb_p(virtual_dma_port + FD_DATA);
 		}
 		virtual_dma_count = lcount;
 		virtual_dma_addr = lptr;
-		st = inb(virtual_dma_port + 4);
+		st = inb(virtual_dma_port + FD_STATUS);
 	}
 
 #ifdef TRACE_FLPY_INT
 	calls++;
 #endif
-	if (st == 0x20)
+	if (st == STATUS_DMA)
 		return IRQ_HANDLED;
-	if (!(st & 0x20)) {
+	if (!(st & STATUS_DMA)) {
 		virtual_dma_residue += virtual_dma_count;
 		virtual_dma_count = 0;
 #ifdef TRACE_FLPY_INT
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 44c48e34d799..42159f45bf9c 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -31,7 +31,8 @@ extern void fpu__save(struct fpu *fpu);
 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
 extern void fpu__drop(struct fpu *fpu);
 extern int  fpu__copy(struct task_struct *dst, struct task_struct *src);
-extern void fpu__clear(struct fpu *fpu);
+extern void fpu__clear_user_states(struct fpu *fpu);
+extern void fpu__clear_all(struct fpu *fpu);
 extern int  fpu__exception_code(struct fpu *fpu, int trap_nr);
 extern int  dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
 
@@ -92,7 +93,7 @@ static inline void fpstate_init_xstate(struct xregs_state *xsave)
 	 * XRSTORS requires these bits set in xcomp_bv, or it will
 	 * trigger #GP:
 	 */
-	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask;
+	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
 }
 
 static inline void fpstate_init_fxstate(struct fxregs_state *fx)
@@ -399,7 +400,10 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
 	u32 hmask = mask >> 32;
 	int err;
 
-	XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
+	if (static_cpu_has(X86_FEATURE_XSAVES))
+		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
+	else
+		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
 
 	return err;
 }
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index c6136d79f8c0..422d8369012a 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -21,19 +21,29 @@
 #define XSAVE_YMM_SIZE	    256
 #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
 
-/* Supervisor features */
-#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
-
-/* All currently supported features */
-#define XCNTXT_MASK		(XFEATURE_MASK_FP | \
-				 XFEATURE_MASK_SSE | \
-				 XFEATURE_MASK_YMM | \
-				 XFEATURE_MASK_OPMASK | \
-				 XFEATURE_MASK_ZMM_Hi256 | \
-				 XFEATURE_MASK_Hi16_ZMM	 | \
-				 XFEATURE_MASK_PKRU | \
-				 XFEATURE_MASK_BNDREGS | \
-				 XFEATURE_MASK_BNDCSR)
+/* All currently supported user features */
+#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
+				      XFEATURE_MASK_SSE | \
+				      XFEATURE_MASK_YMM | \
+				      XFEATURE_MASK_OPMASK | \
+				      XFEATURE_MASK_ZMM_Hi256 | \
+				      XFEATURE_MASK_Hi16_ZMM	 | \
+				      XFEATURE_MASK_PKRU | \
+				      XFEATURE_MASK_BNDREGS | \
+				      XFEATURE_MASK_BNDCSR)
+
+/* All currently supported supervisor features */
+#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0)
+
+/*
+ * Unsupported supervisor features. When a supervisor feature in this mask is
+ * supported in the future, move it to the supported supervisor feature mask.
+ */
+#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
+
+/* All supervisor states including supported and unsupported states. */
+#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
+				      XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
 
 #ifdef CONFIG_X86_64
 #define REX_PREFIX	"0x48, "
@@ -41,7 +51,18 @@
 #define REX_PREFIX
 #endif
 
-extern u64 xfeatures_mask;
+extern u64 xfeatures_mask_all;
+
+static inline u64 xfeatures_mask_supervisor(void)
+{
+	return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
+}
+
+static inline u64 xfeatures_mask_user(void)
+{
+	return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
+}
+
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
 extern void __init update_regset_xstate_info(unsigned int size,
@@ -54,8 +75,9 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
+void copy_supervisor_to_kernel(struct xregs_state *xsave);
 
 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-extern int validate_xstate_header(const struct xstate_header *hdr);
+int validate_user_xstate_header(const struct xstate_header *hdr);
 
 #endif
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 29336574d0bc..7a4d2062385c 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -11,17 +11,6 @@
 
 #include <linux/types.h>
 #include <asm/page.h>
-
-/*
- * While not explicitly listed in the TLFS, Hyper-V always runs with a page size
- * of 4096. These definitions are used when communicating with Hyper-V using
- * guest physical pages and guest physical page addresses, since the guest page
- * size may not be 4096 on all architectures.
- */
-#define HV_HYP_PAGE_SHIFT      12
-#define HV_HYP_PAGE_SIZE       BIT(HV_HYP_PAGE_SHIFT)
-#define HV_HYP_PAGE_MASK       (~(HV_HYP_PAGE_SIZE - 1))
-
 /*
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
  * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
@@ -39,78 +28,41 @@
 #define HYPERV_CPUID_MAX			0x4000ffff
 
 /*
- * Feature identification. EAX indicates which features are available
- * to the partition based upon the current partition privileges.
- * These are HYPERV_CPUID_FEATURES.EAX bits.
+ * Aliases for Group A features that have X64 in the name.
+ * On x86/x64 these are HYPERV_CPUID_FEATURES.EAX bits.
  */
 
-/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
-#define HV_X64_MSR_VP_RUNTIME_AVAILABLE		BIT(0)
-/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
-#define HV_MSR_TIME_REF_COUNT_AVAILABLE		BIT(1)
-/*
- * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
- * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
- */
-#define HV_X64_MSR_SYNIC_AVAILABLE		BIT(2)
-/*
- * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
- * HV_X64_MSR_STIMER3_COUNT) available
- */
-#define HV_MSR_SYNTIMER_AVAILABLE		BIT(3)
-/*
- * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
- * are available
- */
-#define HV_X64_MSR_APIC_ACCESS_AVAILABLE	BIT(4)
-/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
-#define HV_X64_MSR_HYPERCALL_AVAILABLE		BIT(5)
-/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
-#define HV_X64_MSR_VP_INDEX_AVAILABLE		BIT(6)
-/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
-#define HV_X64_MSR_RESET_AVAILABLE		BIT(7)
-/*
- * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
- * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
- * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
- */
-#define HV_X64_MSR_STAT_PAGES_AVAILABLE		BIT(8)
-/* Partition reference TSC MSR is available */
-#define HV_MSR_REFERENCE_TSC_AVAILABLE		BIT(9)
-/* Partition Guest IDLE MSR is available */
-#define HV_X64_MSR_GUEST_IDLE_AVAILABLE		BIT(10)
-/*
- * There is a single feature flag that signifies if the partition has access
- * to MSRs with local APIC and TSC frequencies.
- */
-#define HV_X64_ACCESS_FREQUENCY_MSRS		BIT(11)
-/* AccessReenlightenmentControls privilege */
-#define HV_X64_ACCESS_REENLIGHTENMENT		BIT(13)
-/* AccessTscInvariantControls privilege */
-#define HV_X64_ACCESS_TSC_INVARIANT		BIT(15)
+#define HV_X64_MSR_VP_RUNTIME_AVAILABLE		\
+		HV_MSR_VP_RUNTIME_AVAILABLE
+#define HV_X64_MSR_SYNIC_AVAILABLE		\
+		HV_MSR_SYNIC_AVAILABLE
+#define HV_X64_MSR_APIC_ACCESS_AVAILABLE	\
+		HV_MSR_APIC_ACCESS_AVAILABLE
+#define HV_X64_MSR_HYPERCALL_AVAILABLE		\
+		HV_MSR_HYPERCALL_AVAILABLE
+#define HV_X64_MSR_VP_INDEX_AVAILABLE		\
+		HV_MSR_VP_INDEX_AVAILABLE
+#define HV_X64_MSR_RESET_AVAILABLE		\
+		HV_MSR_RESET_AVAILABLE
+#define HV_X64_MSR_GUEST_IDLE_AVAILABLE		\
+		HV_MSR_GUEST_IDLE_AVAILABLE
+#define HV_X64_ACCESS_FREQUENCY_MSRS		\
+		HV_ACCESS_FREQUENCY_MSRS
+#define HV_X64_ACCESS_REENLIGHTENMENT		\
+		HV_ACCESS_REENLIGHTENMENT
+#define HV_X64_ACCESS_TSC_INVARIANT		\
+		HV_ACCESS_TSC_INVARIANT
 
 /*
- * Feature identification: indicates which flags were specified at partition
- * creation. The format is the same as the partition creation flag structure
- * defined in section Partition Creation Flags.
- * These are HYPERV_CPUID_FEATURES.EBX bits.
+ * Aliases for Group B features that have X64 in the name.
+ * On x86/x64 these are HYPERV_CPUID_FEATURES.EBX bits.
  */
-#define HV_X64_CREATE_PARTITIONS		BIT(0)
-#define HV_X64_ACCESS_PARTITION_ID		BIT(1)
-#define HV_X64_ACCESS_MEMORY_POOL		BIT(2)
-#define HV_X64_ADJUST_MESSAGE_BUFFERS		BIT(3)
-#define HV_X64_POST_MESSAGES			BIT(4)
-#define HV_X64_SIGNAL_EVENTS			BIT(5)
-#define HV_X64_CREATE_PORT			BIT(6)
-#define HV_X64_CONNECT_PORT			BIT(7)
-#define HV_X64_ACCESS_STATS			BIT(8)
-#define HV_X64_DEBUGGING			BIT(11)
-#define HV_X64_CPU_POWER_MANAGEMENT		BIT(12)
+#define HV_X64_POST_MESSAGES		HV_POST_MESSAGES
+#define HV_X64_SIGNAL_EVENTS		HV_SIGNAL_EVENTS
 
 /*
- * Feature identification. EDX indicates which miscellaneous features
- * are available to the partition.
- * These are HYPERV_CPUID_FEATURES.EDX bits.
+ * Group D Features.  The bit assignments are custom to each architecture.
+ * On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits.
  */
 /* The MWAIT instruction is available (per section MONITOR / MWAIT) */
 #define HV_X64_MWAIT_AVAILABLE				BIT(0)
@@ -131,6 +83,8 @@
 #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE		BIT(8)
 /* Crash MSR available */
 #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE		BIT(10)
+/* Support for debug MSRs available */
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE			BIT(11)
 /* stimer Direct Mode is available */
 #define HV_STIMER_DIRECT_MODE_AVAILABLE			BIT(19)
 
@@ -187,7 +141,7 @@
  * processor, except for virtual processors that are reported as sibling SMT
  * threads.
  */
-#define HV_X64_NO_NONARCH_CORESHARING                  BIT(18)
+#define HV_X64_NO_NONARCH_CORESHARING			BIT(18)
 
 /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
 #define HV_X64_NESTED_DIRECT_FLUSH			BIT(17)
@@ -295,43 +249,6 @@ union hv_x64_msr_hypercall_contents {
 	} __packed;
 };
 
-/*
- * TSC page layout.
- */
-struct ms_hyperv_tsc_page {
-	volatile u32 tsc_sequence;
-	u32 reserved1;
-	volatile u64 tsc_scale;
-	volatile s64 tsc_offset;
-	u64 reserved2[509];
-}  __packed;
-
-/*
- * The guest OS needs to register the guest ID with the hypervisor.
- * The guest ID is a 64 bit entity and the structure of this ID is
- * specified in the Hyper-V specification:
- *
- * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
- *
- * While the current guideline does not specify how Linux guest ID(s)
- * need to be generated, our plan is to publish the guidelines for
- * Linux and other guest operating systems that currently are hosted
- * on Hyper-V. The implementation here conforms to this yet
- * unpublished guidelines.
- *
- *
- * Bit(s)
- * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
- * 62:56 - Os Type; Linux is 0x100
- * 55:48 - Distro specific identification
- * 47:16 - Linux kernel version number
- * 15:0  - Distro specific identification
- *
- *
- */
-
-#define HV_LINUX_VENDOR_ID              0x8100
-
 struct hv_reenlightenment_control {
 	__u64 vector:8;
 	__u64 reserved1:8;
@@ -355,31 +272,12 @@ struct hv_tsc_emulation_status {
 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
 		(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
 
-/*
- * Crash notification (HV_X64_MSR_CRASH_CTL) flags.
- */
-#define HV_CRASH_CTL_CRASH_NOTIFY_MSG		BIT_ULL(62)
-#define HV_CRASH_CTL_CRASH_NOTIFY		BIT_ULL(63)
 #define HV_X64_MSR_CRASH_PARAMS		\
 		(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
 
 #define HV_IPI_LOW_VECTOR	0x10
 #define HV_IPI_HIGH_VECTOR	0xff
 
-/* Declare the various hypercall operations. */
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE	0x0002
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST	0x0003
-#define HVCALL_NOTIFY_LONG_SPIN_WAIT		0x0008
-#define HVCALL_SEND_IPI				0x000b
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
-#define HVCALL_SEND_IPI_EX			0x0015
-#define HVCALL_POST_MESSAGE			0x005c
-#define HVCALL_SIGNAL_EVENT			0x005d
-#define HVCALL_RETARGET_INTERRUPT		0x007e
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
-
 #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
@@ -391,75 +289,6 @@ struct hv_tsc_emulation_status {
 #define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
 
-#define HV_PROCESSOR_POWER_STATE_C0		0
-#define HV_PROCESSOR_POWER_STATE_C1		1
-#define HV_PROCESSOR_POWER_STATE_C2		2
-#define HV_PROCESSOR_POWER_STATE_C3		3
-
-#define HV_FLUSH_ALL_PROCESSORS			BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES	BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY	BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)
-
-enum HV_GENERIC_SET_FORMAT {
-	HV_GENERIC_SET_SPARSE_4K,
-	HV_GENERIC_SET_ALL,
-};
-
-#define HV_PARTITION_ID_SELF                    ((u64)-1)
-
-#define HV_HYPERCALL_RESULT_MASK	GENMASK_ULL(15, 0)
-#define HV_HYPERCALL_FAST_BIT		BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET	17
-#define HV_HYPERCALL_REP_COMP_OFFSET	32
-#define HV_HYPERCALL_REP_COMP_MASK	GENMASK_ULL(43, 32)
-#define HV_HYPERCALL_REP_START_OFFSET	48
-#define HV_HYPERCALL_REP_START_MASK	GENMASK_ULL(59, 48)
-
-/* hypercall status code */
-#define HV_STATUS_SUCCESS			0
-#define HV_STATUS_INVALID_HYPERCALL_CODE	2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT	3
-#define HV_STATUS_INVALID_ALIGNMENT		4
-#define HV_STATUS_INVALID_PARAMETER		5
-#define HV_STATUS_INSUFFICIENT_MEMORY		11
-#define HV_STATUS_INVALID_PORT_ID		17
-#define HV_STATUS_INVALID_CONNECTION_ID		18
-#define HV_STATUS_INSUFFICIENT_BUFFERS		19
-
-/*
- * The Hyper-V TimeRefCount register and the TSC
- * page provide a guest VM clock with 100ns tick rate
- */
-#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
-
-typedef struct _HV_REFERENCE_TSC_PAGE {
-	__u32 tsc_sequence;
-	__u32 res1;
-	__u64 tsc_scale;
-	__s64 tsc_offset;
-}  __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
-
-/* Define the number of synthetic interrupt sources. */
-#define HV_SYNIC_SINT_COUNT		(16)
-/* Define the expected SynIC version. */
-#define HV_SYNIC_VERSION_1		(0x1)
-/* Valid SynIC vectors are 16-255. */
-#define HV_SYNIC_FIRST_VALID_VECTOR	(16)
-
-#define HV_SYNIC_CONTROL_ENABLE		(1ULL << 0)
-#define HV_SYNIC_SIMP_ENABLE		(1ULL << 0)
-#define HV_SYNIC_SIEFP_ENABLE		(1ULL << 0)
-#define HV_SYNIC_SINT_MASKED		(1ULL << 16)
-#define HV_SYNIC_SINT_AUTO_EOI		(1ULL << 17)
-#define HV_SYNIC_SINT_VECTOR_MASK	(0xFF)
-
-#define HV_SYNIC_STIMER_COUNT		(4)
-
-/* Define synthetic interrupt controller message constants. */
-#define HV_MESSAGE_SIZE			(256)
-#define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
-#define HV_MESSAGE_PAYLOAD_QWORD_COUNT	(30)
 
 /* Define hypervisor message types. */
 enum hv_message_type {
@@ -470,76 +299,25 @@ enum hv_message_type {
 	HVMSG_GPA_INTERCEPT		= 0x80000001,
 
 	/* Timer notification messages. */
-	HVMSG_TIMER_EXPIRED			= 0x80000010,
+	HVMSG_TIMER_EXPIRED		= 0x80000010,
 
 	/* Error messages. */
 	HVMSG_INVALID_VP_REGISTER_VALUE	= 0x80000020,
 	HVMSG_UNRECOVERABLE_EXCEPTION	= 0x80000021,
-	HVMSG_UNSUPPORTED_FEATURE		= 0x80000022,
+	HVMSG_UNSUPPORTED_FEATURE	= 0x80000022,
 
 	/* Trace buffer complete messages. */
 	HVMSG_EVENTLOG_BUFFERCOMPLETE	= 0x80000040,
 
 	/* Platform-specific processor intercept messages. */
-	HVMSG_X64_IOPORT_INTERCEPT		= 0x80010000,
+	HVMSG_X64_IOPORT_INTERCEPT	= 0x80010000,
 	HVMSG_X64_MSR_INTERCEPT		= 0x80010001,
-	HVMSG_X64_CPUID_INTERCEPT		= 0x80010002,
+	HVMSG_X64_CPUID_INTERCEPT	= 0x80010002,
 	HVMSG_X64_EXCEPTION_INTERCEPT	= 0x80010003,
-	HVMSG_X64_APIC_EOI			= 0x80010004,
-	HVMSG_X64_LEGACY_FP_ERROR		= 0x80010005
-};
-
-/* Define synthetic interrupt controller message flags. */
-union hv_message_flags {
-	__u8 asu8;
-	struct {
-		__u8 msg_pending:1;
-		__u8 reserved:7;
-	} __packed;
-};
-
-/* Define port identifier type. */
-union hv_port_id {
-	__u32 asu32;
-	struct {
-		__u32 id:24;
-		__u32 reserved:8;
-	} __packed u;
+	HVMSG_X64_APIC_EOI		= 0x80010004,
+	HVMSG_X64_LEGACY_FP_ERROR	= 0x80010005
 };
 
-/* Define synthetic interrupt controller message header. */
-struct hv_message_header {
-	__u32 message_type;
-	__u8 payload_size;
-	union hv_message_flags message_flags;
-	__u8 reserved[2];
-	union {
-		__u64 sender;
-		union hv_port_id port;
-	};
-} __packed;
-
-/* Define synthetic interrupt controller message format. */
-struct hv_message {
-	struct hv_message_header header;
-	union {
-		__u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
-	} u;
-} __packed;
-
-/* Define the synthetic interrupt message page layout. */
-struct hv_message_page {
-	struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
-} __packed;
-
-/* Define timer message payload structure. */
-struct hv_timer_message_payload {
-	__u32 timer_index;
-	__u32 reserved;
-	__u64 expiration_time;	/* When the timer expired */
-	__u64 delivery_time;	/* When the message was delivered */
-} __packed;
-
 struct hv_nested_enlightenments_control {
 	struct {
 		__u32 directhypercall:1;
@@ -767,187 +545,11 @@ struct hv_enlightened_vmcs {
 
 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL			0xFFFF
 
-/* Define synthetic interrupt controller flag constants. */
-#define HV_EVENT_FLAGS_COUNT		(256 * 8)
-#define HV_EVENT_FLAGS_LONG_COUNT	(256 / sizeof(unsigned long))
-
-/*
- * Synthetic timer configuration.
- */
-union hv_stimer_config {
-	u64 as_uint64;
-	struct {
-		u64 enable:1;
-		u64 periodic:1;
-		u64 lazy:1;
-		u64 auto_enable:1;
-		u64 apic_vector:8;
-		u64 direct_mode:1;
-		u64 reserved_z0:3;
-		u64 sintx:4;
-		u64 reserved_z1:44;
-	} __packed;
-};
-
-
-/* Define the synthetic interrupt controller event flags format. */
-union hv_synic_event_flags {
-	unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
-};
-
-/* Define SynIC control register. */
-union hv_synic_scontrol {
-	u64 as_uint64;
-	struct {
-		u64 enable:1;
-		u64 reserved:63;
-	} __packed;
-};
-
-/* Define synthetic interrupt source. */
-union hv_synic_sint {
-	u64 as_uint64;
-	struct {
-		u64 vector:8;
-		u64 reserved1:8;
-		u64 masked:1;
-		u64 auto_eoi:1;
-		u64 polling:1;
-		u64 reserved2:45;
-	} __packed;
-};
-
-/* Define the format of the SIMP register */
-union hv_synic_simp {
-	u64 as_uint64;
-	struct {
-		u64 simp_enabled:1;
-		u64 preserved:11;
-		u64 base_simp_gpa:52;
-	} __packed;
-};
-
-/* Define the format of the SIEFP register */
-union hv_synic_siefp {
-	u64 as_uint64;
-	struct {
-		u64 siefp_enabled:1;
-		u64 preserved:11;
-		u64 base_siefp_gpa:52;
-	} __packed;
-};
-
-struct hv_vpset {
-	u64 format;
-	u64 valid_bank_mask;
-	u64 bank_contents[];
-} __packed;
-
-/* HvCallSendSyntheticClusterIpi hypercall */
-struct hv_send_ipi {
-	u32 vector;
-	u32 reserved;
-	u64 cpu_mask;
-} __packed;
-
-/* HvCallSendSyntheticClusterIpiEx hypercall */
-struct hv_send_ipi_ex {
-	u32 vector;
-	u32 reserved;
-	struct hv_vpset vp_set;
-} __packed;
-
-/* HvFlushGuestPhysicalAddressSpace hypercalls */
-struct hv_guest_mapping_flush {
-	u64 address_space;
-	u64 flags;
-} __packed;
-
-/*
- *  HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
- *  by the bitwidth of "additional_pages" in union hv_gpa_page_range.
- */
-#define HV_MAX_FLUSH_PAGES (2048)
-
-/* HvFlushGuestPhysicalAddressList hypercall */
-union hv_gpa_page_range {
-	u64 address_space;
-	struct {
-		u64 additional_pages:11;
-		u64 largepage:1;
-		u64 basepfn:52;
-	} page;
-};
-
-/*
- * All input flush parameters should be in single page. The max flush
- * count is equal with how many entries of union hv_gpa_page_range can
- * be populated into the input parameter page.
- */
-#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) /	\
-				sizeof(union hv_gpa_page_range))
-
-struct hv_guest_mapping_flush_list {
-	u64 address_space;
-	u64 flags;
-	union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
-};
-
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_tlb_flush {
-	u64 address_space;
-	u64 flags;
-	u64 processor_mask;
-	u64 gva_list[];
-} __packed;
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_tlb_flush_ex {
-	u64 address_space;
-	u64 flags;
-	struct hv_vpset hv_vp_set;
-	u64 gva_list[];
-} __packed;
-
 struct hv_partition_assist_pg {
 	u32 tlb_lock_count;
 };
 
-union hv_msi_entry {
-	u64 as_uint64;
-	struct {
-		u32 address;
-		u32 data;
-	} __packed;
-};
-
-struct hv_interrupt_entry {
-	u32 source;			/* 1 for MSI(-X) */
-	u32 reserved1;
-	union hv_msi_entry msi_entry;
-} __packed;
 
-/*
- * flags for hv_device_interrupt_target.flags
- */
-#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST		1
-#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET	2
-
-struct hv_device_interrupt_target {
-	u32 vector;
-	u32 flags;
-	union {
-		u64 vp_mask;
-		struct hv_vpset vp_set;
-	};
-} __packed;
+#include <asm-generic/hyperv-tlfs.h>
 
-/* HvRetargetDeviceInterrupt hypercall */
-struct hv_retarget_device_interrupt {
-	u64 partition_id;		/* use "self" */
-	u64 device_id;
-	struct hv_interrupt_entry int_entry;
-	u64 reserved2;
-	struct hv_device_interrupt_target int_target;
-} __packed __aligned(8);
 #endif
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 8e5af119dc2d..de58391bdee0 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -88,11 +88,17 @@ static inline bool intel_mid_has_msic(void)
 	return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL);
 }
 
+extern void intel_scu_devices_create(void);
+extern void intel_scu_devices_destroy(void);
+
 #else /* !CONFIG_X86_INTEL_MID */
 
 #define intel_mid_identify_cpu()	0
 #define intel_mid_has_msic()		0
 
+static inline void intel_scu_devices_create(void) { }
+static inline void intel_scu_devices_destroy(void) { }
+
 #endif /* !CONFIG_X86_INTEL_MID */
 
 enum intel_mid_timer_options {
@@ -115,9 +121,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
 #define SFI_MTMR_MAX_NUM		8
 #define SFI_MRTC_MAX			8
 
-extern void intel_scu_devices_create(void);
-extern void intel_scu_devices_destroy(void);
-
 /* VRTC timer */
 #define MRST_VRTC_MAP_SZ		1024
 /* #define MRST_VRTC_PGOFFSET		0xc00 */
diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h
deleted file mode 100644
index e6da1ce26256..000000000000
--- a/arch/x86/include/asm/intel_pmc_ipc.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_PMC_IPC_H_
-#define  _ASM_X86_INTEL_PMC_IPC_H_
-
-/* Commands */
-#define PMC_IPC_PMIC_ACCESS		0xFF
-#define		PMC_IPC_PMIC_ACCESS_READ	0x0
-#define		PMC_IPC_PMIC_ACCESS_WRITE	0x1
-#define PMC_IPC_USB_PWR_CTRL		0xF0
-#define PMC_IPC_PMIC_BLACKLIST_SEL	0xEF
-#define PMC_IPC_PHY_CONFIG		0xEE
-#define PMC_IPC_NORTHPEAK_CTRL		0xED
-#define PMC_IPC_PM_DEBUG		0xEC
-#define PMC_IPC_PMC_TELEMTRY		0xEB
-#define PMC_IPC_PMC_FW_MSG_CTRL		0xEA
-
-/* IPC return code */
-#define IPC_ERR_NONE			0
-#define IPC_ERR_CMD_NOT_SUPPORTED	1
-#define IPC_ERR_CMD_NOT_SERVICED	2
-#define IPC_ERR_UNABLE_TO_SERVICE	3
-#define IPC_ERR_CMD_INVALID		4
-#define IPC_ERR_CMD_FAILED		5
-#define IPC_ERR_EMSECURITY		6
-#define IPC_ERR_UNSIGNEDKERNEL		7
-
-/* GCR reg offsets from gcr base*/
-#define PMC_GCR_PMC_CFG_REG		0x08
-#define PMC_GCR_TELEM_DEEP_S0IX_REG	0x78
-#define PMC_GCR_TELEM_SHLW_S0IX_REG	0x80
-
-#if IS_ENABLED(CONFIG_INTEL_PMC_IPC)
-
-int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
-		u32 *out, u32 outlen);
-int intel_pmc_s0ix_counter_read(u64 *data);
-int intel_pmc_gcr_read64(u32 offset, u64 *data);
-
-#else
-
-static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
-		u32 *out, u32 outlen)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_s0ix_counter_read(u64 *data)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_read64(u32 offset, u64 *data)
-{
-	return -EINVAL;
-}
-
-#endif /*CONFIG_INTEL_PMC_IPC*/
-
-#endif
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
index 2a1442ba6e78..11d457af68c5 100644
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ b/arch/x86/include/asm/intel_scu_ipc.h
@@ -2,61 +2,69 @@
 #ifndef _ASM_X86_INTEL_SCU_IPC_H_
 #define  _ASM_X86_INTEL_SCU_IPC_H_
 
-#include <linux/notifier.h>
-
-#define IPCMSG_INDIRECT_READ	0x02
-#define IPCMSG_INDIRECT_WRITE	0x05
-
-#define IPCMSG_COLD_OFF		0x80	/* Only for Tangier */
-
-#define IPCMSG_WARM_RESET	0xF0
-#define IPCMSG_COLD_RESET	0xF1
-#define IPCMSG_SOFT_RESET	0xF2
-#define IPCMSG_COLD_BOOT	0xF3
-
-#define IPCMSG_VRTC		0xFA	 /* Set vRTC device */
-	/* Command id associated with message IPCMSG_VRTC */
-	#define IPC_CMD_VRTC_SETTIME      1 /* Set time */
-	#define IPC_CMD_VRTC_SETALARM     2 /* Set alarm */
-
-/* Read single register */
-int intel_scu_ipc_ioread8(u16 addr, u8 *data);
-
-/* Read a vector */
-int intel_scu_ipc_readv(u16 *addr, u8 *data, int len);
-
-/* Write single register */
-int intel_scu_ipc_iowrite8(u16 addr, u8 data);
-
-/* Write a vector */
-int intel_scu_ipc_writev(u16 *addr, u8 *data, int len);
-
-/* Update single register based on the mask */
-int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask);
-
-/* Issue commands to the SCU with or without data */
-int intel_scu_ipc_simple_command(int cmd, int sub);
-int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
-			  u32 *out, int outlen);
-
-extern struct blocking_notifier_head intel_scu_notifier;
-
-static inline void intel_scu_notifier_add(struct notifier_block *nb)
-{
-	blocking_notifier_chain_register(&intel_scu_notifier, nb);
-}
-
-static inline void intel_scu_notifier_remove(struct notifier_block *nb)
-{
-	blocking_notifier_chain_unregister(&intel_scu_notifier, nb);
-}
-
-static inline int intel_scu_notifier_post(unsigned long v, void *p)
+#include <linux/ioport.h>
+
+struct device;
+struct intel_scu_ipc_dev;
+
+/**
+ * struct intel_scu_ipc_data - Data used to configure SCU IPC
+ * @mem: Base address of SCU IPC MMIO registers
+ * @irq: The IRQ number used for SCU (optional)
+ */
+struct intel_scu_ipc_data {
+	struct resource mem;
+	int irq;
+};
+
+struct intel_scu_ipc_dev *
+__intel_scu_ipc_register(struct device *parent,
+			 const struct intel_scu_ipc_data *scu_data,
+			 struct module *owner);
+
+#define intel_scu_ipc_register(parent, scu_data)  \
+	__intel_scu_ipc_register(parent, scu_data, THIS_MODULE)
+
+void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu);
+
+struct intel_scu_ipc_dev *
+__devm_intel_scu_ipc_register(struct device *parent,
+			      const struct intel_scu_ipc_data *scu_data,
+			      struct module *owner);
+
+#define devm_intel_scu_ipc_register(parent, scu_data)  \
+	__devm_intel_scu_ipc_register(parent, scu_data, THIS_MODULE)
+
+struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void);
+void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu);
+struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev);
+
+int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr,
+			      u8 *data);
+int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr,
+			       u8 data);
+int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr,
+			    u8 *data, size_t len);
+int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr,
+			     u8 *data, size_t len);
+
+int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr,
+			     u8 data, u8 mask);
+
+int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd,
+				     int sub);
+int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd,
+					int sub, const void *in, size_t inlen,
+					size_t size, void *out, size_t outlen);
+
+static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int cmd,
+					    int sub, const void *in, size_t inlen,
+					    void *out, size_t outlen)
 {
-	return blocking_notifier_call_chain(&intel_scu_notifier, v, p);
+	return intel_scu_ipc_dev_command_with_size(scu, cmd, sub, in, inlen,
+						   inlen, out, outlen);
 }
 
-#define		SCU_AVAILABLE		1
-#define		SCU_DOWN		2
+#include <asm/intel_scu_ipc_legacy.h>
 
 #endif
diff --git a/arch/x86/include/asm/intel_scu_ipc_legacy.h b/arch/x86/include/asm/intel_scu_ipc_legacy.h
new file mode 100644
index 000000000000..4cf13fecb673
--- /dev/null
+++ b/arch/x86/include/asm/intel_scu_ipc_legacy.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INTEL_SCU_IPC_LEGACY_H_
+#define _ASM_X86_INTEL_SCU_IPC_LEGACY_H_
+
+#include <linux/notifier.h>
+
+#define IPCMSG_INDIRECT_READ	0x02
+#define IPCMSG_INDIRECT_WRITE	0x05
+
+#define IPCMSG_COLD_OFF		0x80	/* Only for Tangier */
+
+#define IPCMSG_WARM_RESET	0xF0
+#define IPCMSG_COLD_RESET	0xF1
+#define IPCMSG_SOFT_RESET	0xF2
+#define IPCMSG_COLD_BOOT	0xF3
+
+#define IPCMSG_VRTC		0xFA	/* Set vRTC device */
+/* Command id associated with message IPCMSG_VRTC */
+#define IPC_CMD_VRTC_SETTIME      1	/* Set time */
+#define IPC_CMD_VRTC_SETALARM     2	/* Set alarm */
+
+/* Don't call these in new code - they will be removed eventually */
+
+/* Read single register */
+static inline int intel_scu_ipc_ioread8(u16 addr, u8 *data)
+{
+	return intel_scu_ipc_dev_ioread8(NULL, addr, data);
+}
+
+/* Read a vector */
+static inline int intel_scu_ipc_readv(u16 *addr, u8 *data, int len)
+{
+	return intel_scu_ipc_dev_readv(NULL, addr, data, len);
+}
+
+/* Write single register */
+static inline int intel_scu_ipc_iowrite8(u16 addr, u8 data)
+{
+	return intel_scu_ipc_dev_iowrite8(NULL, addr, data);
+}
+
+/* Write a vector */
+static inline int intel_scu_ipc_writev(u16 *addr, u8 *data, int len)
+{
+	return intel_scu_ipc_dev_writev(NULL, addr, data, len);
+}
+
+/* Update single register based on the mask */
+static inline int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask)
+{
+	return intel_scu_ipc_dev_update(NULL, addr, data, mask);
+}
+
+/* Issue commands to the SCU with or without data */
+static inline int intel_scu_ipc_simple_command(int cmd, int sub)
+{
+	return intel_scu_ipc_dev_simple_command(NULL, cmd, sub);
+}
+
+static inline int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
+					u32 *out, int outlen)
+{
+	/* New API takes both inlen and outlen as bytes so convert here */
+	size_t inbytes = inlen * sizeof(u32);
+	size_t outbytes = outlen * sizeof(u32);
+
+	return intel_scu_ipc_dev_command_with_size(NULL, cmd, sub, in, inbytes,
+						   inlen, out, outbytes);
+}
+
+extern struct blocking_notifier_head intel_scu_notifier;
+
+static inline void intel_scu_notifier_add(struct notifier_block *nb)
+{
+	blocking_notifier_chain_register(&intel_scu_notifier, nb);
+}
+
+static inline void intel_scu_notifier_remove(struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&intel_scu_notifier, nb);
+}
+
+static inline int intel_scu_notifier_post(unsigned long v, void *p)
+{
+	return blocking_notifier_call_chain(&intel_scu_notifier, v, p);
+}
+
+#define		SCU_AVAILABLE		1
+#define		SCU_DOWN		2
+
+#endif
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index 2f77e31a1283..8046e70dfd7c 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -10,6 +10,8 @@
 #define TELEM_MAX_EVENTS_SRAM		28
 #define TELEM_MAX_OS_ALLOCATED_EVENTS	20
 
+#include <asm/intel_scu_ipc.h>
+
 enum telemetry_unit {
 	TELEM_PSS = 0,
 	TELEM_IOSS,
@@ -51,6 +53,8 @@ struct telemetry_plt_config {
 	struct telemetry_unit_config ioss_config;
 	struct mutex telem_trace_lock;
 	struct mutex telem_lock;
+	struct intel_pmc_dev *pmc;
+	struct intel_scu_ipc_dev *scu;
 	bool telem_in_use;
 };
 
@@ -92,7 +96,7 @@ int telemetry_set_pltdata(const struct telemetry_core_ops *ops,
 
 int telemetry_clear_pltdata(void);
 
-int telemetry_pltconfig_valid(void);
+struct telemetry_plt_config *telemetry_get_pltdata(void);
 
 int telemetry_get_evtname(enum telemetry_unit telem_unit,
 			  const char **name, int len);
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
index 989cfa86de85..734482afbf81 100644
--- a/arch/x86/include/asm/invpcid.h
+++ b/arch/x86/include/asm/invpcid.h
@@ -12,12 +12,9 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr,
 	 * stale TLB entries and, especially if we're flushing global
 	 * mappings, we don't want the compiler to reorder any subsequent
 	 * memory accesses before the TLB flush.
-	 *
-	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
-	 * invpcid (%rcx), %rax in long mode.
 	 */
-	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
-		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+	asm volatile("invpcid %[desc], %[type]"
+		     :: [desc] "m" (desc), [type] "r" (type) : "memory");
 }
 
 #define INVPCID_TYPE_INDIV_ADDR		0
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0a6b35353fc7..1da5858501ca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -83,6 +83,10 @@
 #define KVM_REQ_GET_VMCS12_PAGES	KVM_ARCH_REQ(24)
 #define KVM_REQ_APICV_UPDATE \
 	KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_TLB_FLUSH_CURRENT	KVM_ARCH_REQ(26)
+#define KVM_REQ_HV_TLB_FLUSH \
+	KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_APF_READY		KVM_ARCH_REQ(28)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -107,15 +111,8 @@
 #define UNMAPPED_GVA (~(gpa_t)0)
 
 /* KVM Hugepage definitions for x86 */
-enum {
-	PT_PAGE_TABLE_LEVEL   = 1,
-	PT_DIRECTORY_LEVEL    = 2,
-	PT_PDPE_LEVEL         = 3,
-	/* set max level to the biggest one */
-	PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL,
-};
-#define KVM_NR_PAGE_SIZES	(PT_MAX_HUGEPAGE_LEVEL - \
-				 PT_PAGE_TABLE_LEVEL + 1)
+#define KVM_MAX_HUGEPAGE_LEVEL	PG_LEVEL_1G
+#define KVM_NR_PAGE_SIZES	(KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1)
 #define KVM_HPAGE_GFN_SHIFT(x)	(((x) - 1) * 9)
 #define KVM_HPAGE_SHIFT(x)	(PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
 #define KVM_HPAGE_SIZE(x)	(1UL << KVM_HPAGE_SHIFT(x))
@@ -124,7 +121,7 @@ enum {
 
 static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 {
-	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+	/* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
 	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
 		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
 }
@@ -164,9 +161,13 @@ enum kvm_reg {
 	NR_VCPU_REGS,
 
 	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
+	VCPU_EXREG_CR0,
 	VCPU_EXREG_CR3,
+	VCPU_EXREG_CR4,
 	VCPU_EXREG_RFLAGS,
 	VCPU_EXREG_SEGMENTS,
+	VCPU_EXREG_EXIT_INFO_1,
+	VCPU_EXREG_EXIT_INFO_2,
 };
 
 enum {
@@ -182,8 +183,10 @@ enum {
 
 enum exit_fastpath_completion {
 	EXIT_FASTPATH_NONE,
-	EXIT_FASTPATH_SKIP_EMUL_INS,
+	EXIT_FASTPATH_REENTER_GUEST,
+	EXIT_FASTPATH_EXIT_HANDLED,
 };
+typedef enum exit_fastpath_completion fastpath_t;
 
 struct x86_emulate_ctxt;
 struct x86_exception;
@@ -372,12 +375,12 @@ struct rsvd_bits_validate {
 };
 
 struct kvm_mmu_root_info {
-	gpa_t cr3;
+	gpa_t pgd;
 	hpa_t hpa;
 };
 
 #define KVM_MMU_ROOT_INFO_INVALID \
-	((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
+	((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
 
 #define KVM_MMU_NUM_PREV_ROOTS 3
 
@@ -403,7 +406,7 @@ struct kvm_mmu {
 	void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			   u64 *spte, const void *pte);
 	hpa_t root_hpa;
-	gpa_t root_cr3;
+	gpa_t root_pgd;
 	union kvm_mmu_role mmu_role;
 	u8 root_level;
 	u8 shadow_root_level;
@@ -598,6 +601,7 @@ struct kvm_vcpu_arch {
 	u64 ia32_xss;
 	u64 microcode_version;
 	u64 arch_capabilities;
+	u64 perf_capabilities;
 
 	/*
 	 * Paging state of the vcpu
@@ -650,7 +654,6 @@ struct kvm_vcpu_arch {
 
 	u64 xcr0;
 	u64 guest_supported_xcr0;
-	u32 guest_xstate_size;
 
 	struct kvm_pio_request pio;
 	void *pio_data;
@@ -680,6 +683,7 @@ struct kvm_vcpu_arch {
 	struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
 
 	int maxphyaddr;
+	int tdp_level;
 
 	/* emulate context */
 
@@ -703,6 +707,7 @@ struct kvm_vcpu_arch {
 		struct gfn_to_pfn_cache cache;
 	} st;
 
+	u64 l1_tsc_offset;
 	u64 tsc_offset;
 	u64 last_guest_tsc;
 	u64 last_host_tsc;
@@ -762,14 +767,17 @@ struct kvm_vcpu_arch {
 
 	struct {
 		bool halted;
-		gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
+		gfn_t gfns[ASYNC_PF_PER_VCPU];
 		struct gfn_to_hva_cache data;
-		u64 msr_val;
+		u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */
+		u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
+		u16 vec;
 		u32 id;
 		bool send_user_only;
-		u32 host_apf_reason;
+		u32 host_apf_flags;
 		unsigned long nested_apf_token;
 		bool delivery_as_pf_vmexit;
+		bool pageready_pending;
 	} apf;
 
 	/* OSVW MSRs (AMD only) */
@@ -855,6 +863,18 @@ struct kvm_apic_map {
 	struct kvm_lapic *phys_map[];
 };
 
+/* Hyper-V synthetic debugger (SynDbg)*/
+struct kvm_hv_syndbg {
+	struct {
+		u64 control;
+		u64 status;
+		u64 send_page;
+		u64 recv_page;
+		u64 pending_page;
+	} control;
+	u64 options;
+};
+
 /* Hyper-V emulation context */
 struct kvm_hv {
 	struct mutex hv_lock;
@@ -866,7 +886,7 @@ struct kvm_hv {
 	u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
 	u64 hv_crash_ctl;
 
-	HV_REFERENCE_TSC_PAGE tsc_ref;
+	struct ms_hyperv_tsc_page tsc_ref;
 
 	struct idr conn_to_evt;
 
@@ -878,6 +898,7 @@ struct kvm_hv {
 	atomic_t num_mismatched_vp_indexes;
 
 	struct hv_partition_assist_pg *hv_pa_pg;
+	struct kvm_hv_syndbg hv_syndbg;
 };
 
 enum kvm_irqchip_mode {
@@ -1028,6 +1049,8 @@ struct kvm_vcpu_stat {
 	u64 irq_injections;
 	u64 nmi_injections;
 	u64 req_event;
+	u64 halt_poll_success_ns;
+	u64 halt_poll_fail_ns;
 };
 
 struct x86_instruction_info;
@@ -1059,7 +1082,7 @@ struct kvm_x86_ops {
 	void (*hardware_disable)(void);
 	void (*hardware_unsetup)(void);
 	bool (*cpu_has_accelerated_tpr)(void);
-	bool (*has_emulated_msr)(int index);
+	bool (*has_emulated_msr)(u32 index);
 	void (*cpuid_update)(struct kvm_vcpu *vcpu);
 
 	unsigned int vm_size;
@@ -1085,8 +1108,6 @@ struct kvm_x86_ops {
 	void (*set_segment)(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
 	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
-	void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
-	void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
 	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
 	int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
 	void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
@@ -1100,7 +1121,8 @@ struct kvm_x86_ops {
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
 
-	void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+	void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
+	void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
 	int  (*tlb_remote_flush)(struct kvm *kvm);
 	int  (*tlb_remote_flush_with_range)(struct kvm *kvm,
 			struct kvm_tlb_range *range);
@@ -1113,7 +1135,13 @@ struct kvm_x86_ops {
 	 */
 	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
 
-	void (*run)(struct kvm_vcpu *vcpu);
+	/*
+	 * Flush any TLB entries created by the guest.  Like tlb_flush_gva(),
+	 * does not need to flush GPA->HPA mappings.
+	 */
+	void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
+
+	enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu);
 	int (*handle_exit)(struct kvm_vcpu *vcpu,
 		enum exit_fastpath_completion exit_fastpath);
 	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@ -1126,8 +1154,8 @@ struct kvm_x86_ops {
 	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu);
 	void (*cancel_injection)(struct kvm_vcpu *vcpu);
-	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
-	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+	int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+	int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
 	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
 	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
@@ -1141,7 +1169,7 @@ struct kvm_x86_ops {
 	bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
-	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
+	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
 	int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 	int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
@@ -1153,7 +1181,6 @@ struct kvm_x86_ops {
 
 	bool (*has_wbinvd_exit)(void);
 
-	u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
 	/* Returns actual tsc_offset set in active VMCS */
 	u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
@@ -1163,10 +1190,8 @@ struct kvm_x86_ops {
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage,
 			       struct x86_exception *exception);
-	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
-		enum exit_fastpath_completion *exit_fastpath);
+	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
 
-	int (*check_nested_events)(struct kvm_vcpu *vcpu);
 	void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
 
 	void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
@@ -1199,6 +1224,7 @@ struct kvm_x86_ops {
 
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
+	const struct kvm_x86_nested_ops *nested_ops;
 
 	/*
 	 * Architecture specific hooks for vCPU blocking due to
@@ -1226,18 +1252,10 @@ struct kvm_x86_ops {
 
 	void (*setup_mce)(struct kvm_vcpu *vcpu);
 
-	int (*get_nested_state)(struct kvm_vcpu *vcpu,
-				struct kvm_nested_state __user *user_kvm_nested_state,
-				unsigned user_data_size);
-	int (*set_nested_state)(struct kvm_vcpu *vcpu,
-				struct kvm_nested_state __user *user_kvm_nested_state,
-				struct kvm_nested_state *kvm_state);
-	bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
-
-	int (*smi_allowed)(struct kvm_vcpu *vcpu);
+	int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
 	int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
 	int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
-	int (*enable_smi_window)(struct kvm_vcpu *vcpu);
+	void (*enable_smi_window)(struct kvm_vcpu *vcpu);
 
 	int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
 	int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
@@ -1245,14 +1263,28 @@ struct kvm_x86_ops {
 
 	int (*get_msr_feature)(struct kvm_msr_entry *entry);
 
-	int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
-				   uint16_t *vmcs_version);
-	uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
-
 	bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
 
 	bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
 	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
+
+	void (*migrate_timers)(struct kvm_vcpu *vcpu);
+};
+
+struct kvm_x86_nested_ops {
+	int (*check_events)(struct kvm_vcpu *vcpu);
+	bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+	int (*get_state)(struct kvm_vcpu *vcpu,
+			 struct kvm_nested_state __user *user_kvm_nested_state,
+			 unsigned user_data_size);
+	int (*set_state)(struct kvm_vcpu *vcpu,
+			 struct kvm_nested_state __user *user_kvm_nested_state,
+			 struct kvm_nested_state *kvm_state);
+	bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+
+	int (*enable_evmcs)(struct kvm_vcpu *vcpu,
+			    uint16_t *vmcs_version);
+	uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_init_ops {
@@ -1279,8 +1311,7 @@ extern struct kmem_cache *x86_fpu_cache;
 #define __KVM_HAVE_ARCH_VM_ALLOC
 static inline struct kvm *kvm_arch_alloc_vm(void)
 {
-	return __vmalloc(kvm_x86_ops.vm_size,
-			 GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL);
+	return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 }
 void kvm_arch_free_vm(struct kvm *kvm);
 
@@ -1452,6 +1483,8 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long pay
 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
+bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+				    struct x86_exception *fault);
 int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			    gfn_t gfn, void *data, int offset, int len,
 			    u32 access);
@@ -1479,6 +1512,8 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 
+void kvm_update_dr7(struct kvm_vcpu *vcpu);
+
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
@@ -1509,8 +1544,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
 		       void *insn, int insn_len);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+			    gva_t gva, hpa_t root_hpa);
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
+void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
+		     bool skip_mmu_sync);
 
 void kvm_configure_mmu(bool enable_tdp, int tdp_page_level);
 
@@ -1574,8 +1612,6 @@ enum {
 };
 
 #define HF_GIF_MASK		(1 << 0)
-#define HF_HIF_MASK		(1 << 1)
-#define HF_VINTR_MASK		(1 << 2)
 #define HF_NMI_MASK		(1 << 3)
 #define HF_IRET_MASK		(1 << 4)
 #define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
@@ -1641,7 +1677,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 				 struct kvm_async_pf *work);
 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
 			       struct kvm_async_pf *work);
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu);
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
 extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 9b4df6eaa11a..57fd1966c4ea 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -88,11 +88,21 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 bool kvm_para_available(void);
 unsigned int kvm_arch_para_features(void);
 unsigned int kvm_arch_para_hints(void);
-void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
+void kvm_async_pf_task_wait_schedule(u32 token);
 void kvm_async_pf_task_wake(u32 token);
-u32 kvm_read_and_reset_pf_reason(void);
-extern void kvm_disable_steal_time(void);
-void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
+u32 kvm_read_and_reset_apf_flags(void);
+void kvm_disable_steal_time(void);
+bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
+
+DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
+
+static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+{
+	if (static_branch_unlikely(&kvm_async_pf_enabled))
+		return __kvm_handle_async_pf(regs, token);
+	else
+		return false;
+}
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 void __init kvm_spinlock_init(void);
@@ -103,7 +113,7 @@ static inline void kvm_spinlock_init(void)
 #endif /* CONFIG_PARAVIRT_SPINLOCKS */
 
 #else /* CONFIG_KVM_GUEST */
-#define kvm_async_pf_task_wait(T, I) do {} while(0)
+#define kvm_async_pf_task_wait_schedule(T) do {} while(0)
 #define kvm_async_pf_task_wake(T) do {} while(0)
 
 static inline bool kvm_para_available(void)
@@ -121,7 +131,7 @@ static inline unsigned int kvm_arch_para_hints(void)
 	return 0;
 }
 
-static inline u32 kvm_read_and_reset_pf_reason(void)
+static inline u32 kvm_read_and_reset_apf_flags(void)
 {
 	return 0;
 }
@@ -130,6 +140,11 @@ static inline void kvm_disable_steal_time(void)
 {
 	return;
 }
+
+static inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+{
+	return false;
+}
 #endif
 
 #endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 73d8dd14dda2..2d4515e8b7df 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -14,43 +14,4 @@ extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)	(node_data[nid])
 #endif /* CONFIG_NUMA */
 
-#ifdef CONFIG_DISCONTIGMEM
-
-/*
- * generic node memory support, the following assumptions apply:
- *
- * 1) memory comes in 64Mb contiguous chunks which are either present or not
- * 2) we will not have more than 64Gb in total
- *
- * for now assume that 64Gb is max amount of RAM for whole system
- *    64Gb / 4096bytes/page = 16777216 pages
- */
-#define MAX_NR_PAGES 16777216
-#define MAX_SECTIONS 1024
-#define PAGES_PER_SECTION (MAX_NR_PAGES/MAX_SECTIONS)
-
-extern s8 physnode_map[];
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-#ifdef CONFIG_NUMA
-	return((int) physnode_map[(pfn) / PAGES_PER_SECTION]);
-#else
-	return 0;
-#endif
-}
-
-static inline int pfn_valid(int pfn)
-{
-	int nid = pfn_to_nid(pfn);
-
-	if (nid >= 0)
-		return (pfn < node_end_pfn(nid));
-	return 0;
-}
-
-#define early_pfn_valid(pfn)	pfn_valid((pfn))
-
-#endif /* CONFIG_DISCONTIGMEM */
-
 #endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 12c9684d59ba..ef452b817f44 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -301,6 +301,9 @@
 #define MSR_PP1_ENERGY_STATUS		0x00000641
 #define MSR_PP1_POLICY			0x00000642
 
+#define MSR_AMD_PKG_ENERGY_STATUS	0xc001029b
+#define MSR_AMD_RAPL_POWER_UNIT		0xc0010299
+
 /* Config TDP MSRs */
 #define MSR_CONFIG_TDP_NOMINAL		0x00000648
 #define MSR_CONFIG_TDP_LEVEL_1		0x00000649
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index b809f117f3f4..73d997aa2966 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -20,8 +20,10 @@
 
 #define MWAIT_ECX_INTERRUPT_BREAK	0x1
 #define MWAITX_ECX_TIMER_ENABLE		BIT(1)
-#define MWAITX_MAX_LOOPS		((u32)-1)
+#define MWAITX_MAX_WAIT_CYCLES		UINT_MAX
 #define MWAITX_DISABLE_CSTATES		0xf0
+#define TPAUSE_C01_STATE		1
+#define TPAUSE_C02_STATE		0
 
 u32 get_umwait_control_msr(void);
 
@@ -122,4 +124,24 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 	current_clr_polling();
 }
 
+/*
+ * Caller can specify whether to enter C0.1 (low latency, less
+ * power saving) or C0.2 state (saves more power, but longer wakeup
+ * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
+ * which can force requests for C0.2 to be downgraded to C0.1.
+ */
+static inline void __tpause(u32 ecx, u32 edx, u32 eax)
+{
+	/* "tpause %ecx, %edx, %eax;" */
+	#ifdef CONFIG_AS_TPAUSE
+	asm volatile("tpause %%ecx\n"
+		     :
+		     : "c"(ecx), "d"(edx), "a"(eax));
+	#else
+	asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
+		     :
+		     : "c"(ecx), "d"(edx), "a"(eax));
+	#endif
+}
+
 #endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 7e9a281e2660..d52d1aacdd97 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -4,20 +4,13 @@
 #define _ASM_X86_NOSPEC_BRANCH_H_
 
 #include <linux/static_key.h>
+#include <linux/frame.h>
 
 #include <asm/alternative.h>
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
-
-/*
- * This should be used immediately before a retpoline alternative. It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
- */
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
-	ANNOTATE_IGNORE_ALTERNATIVE
+#include <asm/unwind_hints.h>
 
 /*
  * Fill the CPU return stack buffer.
@@ -46,21 +39,25 @@
 #define __FILL_RETURN_BUFFER(reg, nr, sp)	\
 	mov	$(nr/2), reg;			\
 771:						\
+	ANNOTATE_INTRA_FUNCTION_CALL;		\
 	call	772f;				\
 773:	/* speculation trap */			\
+	UNWIND_HINT_EMPTY;			\
 	pause;					\
 	lfence;					\
 	jmp	773b;				\
 772:						\
+	ANNOTATE_INTRA_FUNCTION_CALL;		\
 	call	774f;				\
 775:	/* speculation trap */			\
+	UNWIND_HINT_EMPTY;			\
 	pause;					\
 	lfence;					\
 	jmp	775b;				\
 774:						\
+	add	$(BITS_PER_LONG/8) * 2, sp;	\
 	dec	reg;				\
-	jnz	771b;				\
-	add	$(BITS_PER_LONG/8) * nr, sp;
+	jnz	771b;
 
 #ifdef __ASSEMBLY__
 
@@ -77,57 +74,27 @@
 .endm
 
 /*
- * These are the bare retpoline primitives for indirect jmp and call.
- * Do not use these directly; they only exist to make the ALTERNATIVE
- * invocation below less ugly.
- */
-.macro RETPOLINE_JMP reg:req
-	call	.Ldo_rop_\@
-.Lspec_trap_\@:
-	pause
-	lfence
-	jmp	.Lspec_trap_\@
-.Ldo_rop_\@:
-	mov	\reg, (%_ASM_SP)
-	ret
-.endm
-
-/*
- * This is a wrapper around RETPOLINE_JMP so the called function in reg
- * returns to the instruction after the macro.
- */
-.macro RETPOLINE_CALL reg:req
-	jmp	.Ldo_call_\@
-.Ldo_retpoline_jmp_\@:
-	RETPOLINE_JMP \reg
-.Ldo_call_\@:
-	call	.Ldo_retpoline_jmp_\@
-.endm
-
-/*
  * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
  * indirect jmp/call which may be susceptible to the Spectre variant 2
  * attack.
  */
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg),	\
-		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+		      __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
-	jmp	*\reg
+	jmp	*%\reg
 #endif
 .endm
 
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg),	\
-		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
+		      __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
 #else
-	call	*\reg
+	call	*%\reg
 #endif
 .endm
 
@@ -137,10 +104,8 @@
   */
 .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
 #ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE "jmp .Lskip_rsb_\@",				\
-		__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))	\
-		\ftr
+	ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+	__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
 .Lskip_rsb_\@:
 #endif
 .endm
@@ -161,16 +126,16 @@
  * which is ensured when CONFIG_RETPOLINE is defined.
  */
 # define CALL_NOSPEC						\
-	ANNOTATE_NOSPEC_ALTERNATIVE				\
 	ALTERNATIVE_2(						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
-	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
+	"call __x86_retpoline_%V[thunk_target]\n",		\
 	X86_FEATURE_RETPOLINE,					\
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
 	X86_FEATURE_RETPOLINE_AMD)
+
 # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
 
 #else /* CONFIG_X86_32 */
@@ -180,7 +145,6 @@
  * here, anyway.
  */
 # define CALL_NOSPEC						\
-	ANNOTATE_NOSPEC_ALTERNATIVE				\
 	ALTERNATIVE_2(						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index 6e060907c163..d25534940bde 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -58,8 +58,7 @@
 #define ORC_TYPE_CALL			0
 #define ORC_TYPE_REGS			1
 #define ORC_TYPE_REGS_IRET		2
-#define UNWIND_HINT_TYPE_SAVE		3
-#define UNWIND_HINT_TYPE_RESTORE	4
+#define UNWIND_HINT_TYPE_RET_OFFSET	3
 
 #ifndef __ASSEMBLY__
 /*
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 6deb6cd236e3..7f6ccff0ba72 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -20,6 +20,8 @@ typedef union {
 
 #define SHARED_KERNEL_PMD	0
 
+#define ARCH_PAGE_TABLE_SYNC_MASK	PGTBL_PMD_MODIFIED
+
 /*
  * traditional i386 two-level paging structure:
  */
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 33845d36897c..80fbb4a9ed87 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -27,6 +27,8 @@ typedef union {
 #define SHARED_KERNEL_PMD	(!static_cpu_has(X86_FEATURE_PTI))
 #endif
 
+#define ARCH_PAGE_TABLE_SYNC_MASK	(SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED)
+
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
  */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 0dca7f7aeff2..be7b19646897 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -66,8 +66,7 @@ do {						\
 #endif /* !__ASSEMBLY__ */
 
 /*
- * kern_addr_valid() is (1) for FLATMEM and (0) for
- * SPARSEMEM and DISCONTIGMEM
+ * kern_addr_valid() is (1) for FLATMEM and (0) for SPARSEMEM
  */
 #ifdef CONFIG_FLATMEM
 #define kern_addr_valid(addr)	(1)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 52e5f5f2240d..8f63efb2a2cc 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -159,4 +159,6 @@ extern unsigned int ptrs_per_p4d;
 
 #define PGD_KERNEL_START	((PAGE_SIZE / 2) / sizeof(pgd_t))
 
+#define ARCH_PAGE_TABLE_SYNC_MASK	(pgtable_l5_enabled() ?	PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b6606fe6cfdf..2e7c442cc618 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -194,7 +194,6 @@ enum page_cache_mode {
 #define _PAGE_TABLE_NOENC	 (__PP|__RW|_USR|___A|   0|___D|   0|   0)
 #define _PAGE_TABLE		 (__PP|__RW|_USR|___A|   0|___D|   0|   0| _ENC)
 #define __PAGE_KERNEL_RO	 (__PP|   0|   0|___A|__NX|___D|   0|___G)
-#define __PAGE_KERNEL_RX	 (__PP|   0|   0|___A|   0|___D|   0|___G)
 #define __PAGE_KERNEL_NOCACHE	 (__PP|__RW|   0|___A|__NX|___D|   0|___G| __NC)
 #define __PAGE_KERNEL_VVAR	 (__PP|   0|_USR|___A|__NX|___D|   0|___G)
 #define __PAGE_KERNEL_LARGE	 (__PP|__RW|   0|___A|__NX|___D|_PSE|___G)
@@ -220,7 +219,6 @@ enum page_cache_mode {
 #define PAGE_KERNEL_RO		__pgprot_mask(__PAGE_KERNEL_RO         | _ENC)
 #define PAGE_KERNEL_EXEC	__pgprot_mask(__PAGE_KERNEL_EXEC       | _ENC)
 #define PAGE_KERNEL_EXEC_NOENC	__pgprot_mask(__PAGE_KERNEL_EXEC       |    0)
-#define PAGE_KERNEL_RX		__pgprot_mask(__PAGE_KERNEL_RX         | _ENC)
 #define PAGE_KERNEL_NOCACHE	__pgprot_mask(__PAGE_KERNEL_NOCACHE    | _ENC)
 #define PAGE_KERNEL_LARGE	__pgprot_mask(__PAGE_KERNEL_LARGE      | _ENC)
 #define PAGE_KERNEL_LARGE_EXEC	__pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
@@ -284,6 +282,12 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
 
 typedef struct { pgdval_t pgd; } pgd_t;
 
+static inline pgprot_t pgprot_nx(pgprot_t prot)
+{
+	return __pgprot(pgprot_val(prot) | _PAGE_NX);
+}
+#define pgprot_nx pgprot_nx
+
 #ifdef CONFIG_X86_PAE
 
 /*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3bcf27caf6c9..29ee0c088009 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -113,9 +113,10 @@ struct cpuinfo_x86 {
 	/* in KB - valid for CPUS which support this call: */
 	unsigned int		x86_cache_size;
 	int			x86_cache_alignment;	/* In bytes */
-	/* Cache QoS architectural values: */
+	/* Cache QoS architectural values, valid only on the BSP: */
 	int			x86_cache_max_rmid;	/* max index */
 	int			x86_cache_occ_scale;	/* scale to bytes */
+	int			x86_cache_mbm_width_offset;
 	int			x86_power;
 	unsigned long		loops_per_jiffy;
 	/* cpuid returned max cores value: */
@@ -727,7 +728,6 @@ static inline void sync_core(void)
 	unsigned int tmp;
 
 	asm volatile (
-		UNWIND_HINT_SAVE
 		"mov %%ss, %0\n\t"
 		"pushq %q0\n\t"
 		"pushq %%rsp\n\t"
@@ -737,7 +737,6 @@ static inline void sync_core(void)
 		"pushq %q0\n\t"
 		"pushq $1f\n\t"
 		"iretq\n\t"
-		UNWIND_HINT_RESTORE
 		"1:"
 		: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
 #endif
diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl.h
index f6b7fe2833cc..07603064df8f 100644
--- a/arch/x86/include/asm/resctrl_sched.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_RESCTRL_SCHED_H
-#define _ASM_X86_RESCTRL_SCHED_H
+#ifndef _ASM_X86_RESCTRL_H
+#define _ASM_X86_RESCTRL_H
 
 #ifdef CONFIG_X86_CPU_RESCTRL
 
@@ -84,10 +84,13 @@ static inline void resctrl_sched_in(void)
 		__resctrl_sched_in();
 }
 
+void resctrl_cpu_detect(struct cpuinfo_x86 *c);
+
 #else
 
 static inline void resctrl_sched_in(void) {}
+static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
 
 #endif /* CONFIG_X86_CPU_RESCTRL */
 
-#endif /* _ASM_X86_RESCTRL_SCHED_H */
+#endif /* _ASM_X86_RESCTRL_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 27c47d183f4b..8b58d6975d5d 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -57,8 +57,10 @@ static __always_inline unsigned long smap_save(void)
 {
 	unsigned long flags;
 
-	asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
-				  X86_FEATURE_SMAP)
+	asm volatile ("# smap_save\n\t"
+		      ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
+		      "pushf; pop %0; " __ASM_CLAC "\n\t"
+		      "1:"
 		      : "=rm" (flags) : : "memory", "cc");
 
 	return flags;
@@ -66,7 +68,10 @@ static __always_inline unsigned long smap_save(void)
 
 static __always_inline void smap_restore(unsigned long flags)
 {
-	asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+	asm volatile ("# smap_restore\n\t"
+		      ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
+		      "push %0; popf\n\t"
+		      "1:"
 		      : : "g" (flags) : "memory", "cc");
 }
 
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index bf3e34b25afc..323db6c5852a 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -3,29 +3,7 @@
 #define _ASM_X86_SPINLOCK_TYPES_H
 
 #include <linux/types.h>
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-#define __TICKET_LOCK_INC	2
-#define TICKET_SLOWPATH_FLAG	((__ticket_t)1)
-#else
-#define __TICKET_LOCK_INC	1
-#define TICKET_SLOWPATH_FLAG	((__ticket_t)0)
-#endif
-
-#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC))
-typedef u8  __ticket_t;
-typedef u16 __ticketpair_t;
-#else
-typedef u16 __ticket_t;
-typedef u32 __ticketpair_t;
-#endif
-
-#define TICKET_LOCK_INC	((__ticket_t)__TICKET_LOCK_INC)
-
-#define TICKET_SHIFT	(sizeof(__ticket_t) * 8)
-
 #include <asm-generic/qspinlock_types.h>
-
 #include <asm-generic/qrwlock_types.h>
 
 #endif /* _ASM_X86_SPINLOCK_TYPES_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 6ece8561ba66..8a1f5382a4ea 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -96,7 +96,6 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	u8 reserved_6[8];	/* Offset 0xe8 */
 	u64 avic_logical_id;	/* Offset 0xf0 */
 	u64 avic_physical_id;	/* Offset 0xf8 */
-	u8 reserved_7[768];
 };
 
 
@@ -203,8 +202,16 @@ struct __attribute__ ((__packed__)) vmcb_save_area {
 	u64 last_excp_to;
 };
 
+
+static inline void __unused_size_checks(void)
+{
+	BUILD_BUG_ON(sizeof(struct vmcb_save_area) != 0x298);
+	BUILD_BUG_ON(sizeof(struct vmcb_control_area) != 256);
+}
+
 struct __attribute__ ((__packed__)) vmcb {
 	struct vmcb_control_area control;
+	u8 reserved_control[1024 - sizeof(struct vmcb_control_area)];
 	struct vmcb_save_area save;
 };
 
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 0e059b73437b..9f69cc497f4b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -12,27 +12,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
 __visible struct task_struct *__switch_to(struct task_struct *prev,
 					  struct task_struct *next);
 
-/* This runs runs on the previous thread's stack. */
-static inline void prepare_switch_to(struct task_struct *next)
-{
-#ifdef CONFIG_VMAP_STACK
-	/*
-	 * If we switch to a stack that has a top-level paging entry
-	 * that is not present in the current mm, the resulting #PF will
-	 * will be promoted to a double-fault and we'll panic.  Probe
-	 * the new stack now so that vmalloc_fault can fix up the page
-	 * tables if needed.  This can only happen if we use a stack
-	 * in vmap space.
-	 *
-	 * We assume that the stack is aligned so that it never spans
-	 * more than one top-level paging entry.
-	 *
-	 * To minimize cache pollution, just follow the stack pointer.
-	 */
-	READ_ONCE(*(unsigned char *)next->thread.sp);
-#endif
-}
-
 asmlinkage void ret_from_fork(void);
 
 /*
@@ -67,8 +46,6 @@ struct fork_frame {
 
 #define switch_to(prev, next, last)					\
 do {									\
-	prepare_switch_to(next);					\
-									\
 	((last) = __switch_to_asm((prev), (next)));			\
 } while (0)
 
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index c26a7e1d8a2c..2ae904bf25e4 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -69,9 +69,7 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code);
 dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);
 dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);
 dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code);
-#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2);
-#endif
 dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
 dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
 dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
@@ -118,11 +116,6 @@ void smp_spurious_interrupt(struct pt_regs *regs);
 void smp_error_interrupt(struct pt_regs *regs);
 asmlinkage void smp_irq_move_cleanup_interrupt(void);
 
-extern void ist_enter(struct pt_regs *regs);
-extern void ist_exit(struct pt_regs *regs);
-extern void ist_begin_non_atomic(struct pt_regs *regs);
-extern void ist_end_non_atomic(void);
-
 #ifdef CONFIG_VMAP_STACK
 void __noreturn handle_stack_overflow(const char *message,
 				      struct pt_regs *regs,
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index f5e2eb12cb71..7d903fdb3f43 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -86,32 +86,15 @@
 	UNWIND_HINT sp_offset=\sp_offset
 .endm
 
-.macro UNWIND_HINT_SAVE
-	UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
-.endm
-
-.macro UNWIND_HINT_RESTORE
-	UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+/*
+ * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN
+ * and sibling calls. On these, sp_offset denotes the expected offset from
+ * initial_func_cfi.
+ */
+.macro UNWIND_HINT_RET_OFFSET sp_offset=8
+	UNWIND_HINT type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset
 .endm
 
-#else /* !__ASSEMBLY__ */
-
-#define UNWIND_HINT(sp_reg, sp_offset, type, end)		\
-	"987: \n\t"						\
-	".pushsection .discard.unwind_hints\n\t"		\
-	/* struct unwind_hint */				\
-	".long 987b - .\n\t"					\
-	".short " __stringify(sp_offset) "\n\t"			\
-	".byte " __stringify(sp_reg) "\n\t"			\
-	".byte " __stringify(type) "\n\t"			\
-	".byte " __stringify(end) "\n\t"			\
-	".balign 4 \n\t"					\
-	".popsection\n\t"
-
-#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0)
-
-#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0)
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 389174eaec79..2fcc3ac12e76 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -123,12 +123,6 @@ enum uv_memprotect {
 	UV_MEMPROT_ALLOW_RW
 };
 
-/*
- * bios calls have 6 parameters
- */
-extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-
 extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *);
 extern s64 uv_bios_freq_base(u64, u64 *);
 extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int,
@@ -146,7 +140,6 @@ extern long sn_partition_id;
 extern long sn_coherency_id;
 extern long sn_region_size;
 extern long system_serial_number;
-#define uv_partition_coherence_id()	(sn_coherency_id)
 
 extern struct kobject *sgi_uv_kobj;	/* /sys/firmware/sgi_uv */
 
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 45ea95ce79b4..ae587ce544f4 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -31,7 +31,6 @@ static inline bool is_early_uv_system(void)
 }
 extern int is_uv_system(void);
 extern int is_uv_hubbed(int uvtype);
-extern int is_uv_hubless(int uvtype);
 extern void uv_cpu_init(void);
 extern void uv_nmi_init(void);
 extern void uv_system_init(void);
@@ -44,7 +43,6 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
 static inline bool is_early_uv_system(void)	{ return 0; }
 static inline int is_uv_system(void)	{ return 0; }
 static inline int is_uv_hubbed(int uv)	{ return 0; }
-static inline int is_uv_hubless(int uv) { return 0; }
 static inline void uv_cpu_init(void)	{ }
 static inline void uv_system_init(void)	{ }
 static inline const struct cpumask *
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 950cd1395d5d..60ca0afdeaf9 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -219,20 +219,6 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
 	return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info;
 }
 
-#define	UV_HUB_INFO_VERSION	0x7150
-extern int uv_hub_info_version(void);
-static inline int uv_hub_info_check(int version)
-{
-	if (uv_hub_info_version() == version)
-		return 0;
-
-	pr_crit("UV: uv_hub_info version(%x) mismatch, expecting(%x)\n",
-		uv_hub_info_version(), version);
-
-	BUG();	/* Catastrophic - cannot continue on unknown UV system */
-}
-#define	_uv_hub_info_check()	uv_hub_info_check(UV_HUB_INFO_VERSION)
-
 /*
  * HUB revision ranges for each UV HUB architecture.
  * This is a software convention - NOT the hardware revision numbers in
@@ -244,51 +230,32 @@ static inline int uv_hub_info_check(int version)
 #define UV4_HUB_REVISION_BASE		7
 #define UV4A_HUB_REVISION_BASE		8	/* UV4 (fixed) rev 2 */
 
-/* WARNING: UVx_HUB_IS_SUPPORTED defines are deprecated and will be removed */
 static inline int is_uv1_hub(void)
 {
-#ifdef	UV1_HUB_IS_SUPPORTED
 	return is_uv_hubbed(uv(1));
-#else
-	return 0;
-#endif
 }
 
 static inline int is_uv2_hub(void)
 {
-#ifdef	UV2_HUB_IS_SUPPORTED
 	return is_uv_hubbed(uv(2));
-#else
-	return 0;
-#endif
 }
 
 static inline int is_uv3_hub(void)
 {
-#ifdef	UV3_HUB_IS_SUPPORTED
 	return is_uv_hubbed(uv(3));
-#else
-	return 0;
-#endif
 }
 
 /* First test "is UV4A", then "is UV4" */
 static inline int is_uv4a_hub(void)
 {
-#ifdef	UV4A_HUB_IS_SUPPORTED
 	if (is_uv_hubbed(uv(4)))
 		return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE);
-#endif
 	return 0;
 }
 
 static inline int is_uv4_hub(void)
 {
-#ifdef	UV4_HUB_IS_SUPPORTED
 	return is_uv_hubbed(uv(4));
-#else
-	return 0;
-#endif
 }
 
 static inline int is_uvx_hub(void)
@@ -692,7 +659,6 @@ static inline int uv_cpu_blade_processor_id(int cpu)
 {
 	return uv_cpu_info_per(cpu)->blade_cpu_id;
 }
-#define _uv_cpu_blade_processor_id 1	/* indicate function available */
 
 /* Blade number to Node number (UV1..UV4 is 1:1) */
 static inline int uv_blade_to_node(int blade)
@@ -856,26 +822,6 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
 }
 
 extern unsigned int uv_apicid_hibits;
-static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
-{
-	apicid |= uv_apicid_hibits;
-	return (1UL << UVH_IPI_INT_SEND_SHFT) |
-			((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
-			(mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
-			(vector << UVH_IPI_INT_VECTOR_SHFT);
-}
-
-static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
-{
-	unsigned long val;
-	unsigned long dmode = dest_Fixed;
-
-	if (vector == NMI_VECTOR)
-		dmode = dest_NMI;
-
-	val = uv_hub_ipi_value(apicid, vector, dmode);
-	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-}
 
 /*
  * Get the minimum revision number of the hub chips within the partition.
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 62c79e26a59a..9ee5ed6e8b34 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -99,13 +99,6 @@
 #define UV3_HUB_PART_NUMBER_X	0x4321
 #define UV4_HUB_PART_NUMBER	0x99a1
 
-/* Compat: Indicate which UV Hubs are supported. */
-#define UV1_HUB_IS_SUPPORTED	1
-#define UV2_HUB_IS_SUPPORTED	1
-#define UV3_HUB_IS_SUPPORTED	1
-#define UV4_HUB_IS_SUPPORTED	1
-#define UV4A_HUB_IS_SUPPORTED	1
-
 /* Error function to catch undefined references */
 extern unsigned long uv_undefined(char *str);
 
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 5e090d1f03f8..cd7de4b401fe 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -527,10 +527,12 @@ struct vmx_msr_entry {
 /*
  * Exit Qualifications for entry failure during or after loading guest state
  */
-#define ENTRY_FAIL_DEFAULT		0
-#define ENTRY_FAIL_PDPTE		2
-#define ENTRY_FAIL_NMI			3
-#define ENTRY_FAIL_VMCS_LINK_PTR	4
+enum vm_entry_failure_code {
+	ENTRY_FAIL_DEFAULT		= 0,
+	ENTRY_FAIL_PDPTE		= 2,
+	ENTRY_FAIL_NMI			= 3,
+	ENTRY_FAIL_VMCS_LINK_PTR	= 4,
+};
 
 /*
  * Exit Qualifications for EPT Violations
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 96d9cd208610..6807153c0410 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -50,14 +50,12 @@ struct x86_init_resources {
  * @pre_vector_init:		init code to run before interrupt vectors
  *				are set up.
  * @intr_init:			interrupt init code
- * @trap_init:			platform specific trap setup
  * @intr_mode_select:		interrupt delivery mode selection
  * @intr_mode_init:		interrupt delivery mode setup
  */
 struct x86_init_irqs {
 	void (*pre_vector_init)(void);
 	void (*intr_init)(void);
-	void (*trap_init)(void);
 	void (*intr_mode_select)(void);
 	void (*intr_mode_init)(void);
 };
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c65..17c5a038f42d 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -385,32 +385,48 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
 
 #define KVM_STATE_NESTED_FORMAT_VMX	0
-#define KVM_STATE_NESTED_FORMAT_SVM	1	/* unused */
+#define KVM_STATE_NESTED_FORMAT_SVM	1
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
 #define KVM_STATE_NESTED_EVMCS		0x00000004
 #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
+#define KVM_STATE_NESTED_GIF_SET	0x00000100
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
 
 #define KVM_STATE_NESTED_VMX_VMCS_SIZE	0x1000
 
+#define KVM_STATE_NESTED_SVM_VMCB_SIZE	0x1000
+
+#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE	0x00000001
+
 struct kvm_vmx_nested_state_data {
 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
 	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
 };
 
 struct kvm_vmx_nested_state_hdr {
+	__u32 flags;
 	__u64 vmxon_pa;
 	__u64 vmcs12_pa;
+	__u64 preemption_timer_deadline;
 
 	struct {
 		__u16 flags;
 	} smm;
 };
 
+struct kvm_svm_nested_state_data {
+	/* Save area only used if KVM_STATE_NESTED_RUN_PENDING.  */
+	__u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
+};
+
+struct kvm_svm_nested_state_hdr {
+	__u64 vmcb_pa;
+};
+
 /* for KVM_CAP_NESTED_STATE */
 struct kvm_nested_state {
 	__u16 flags;
@@ -419,6 +435,7 @@ struct kvm_nested_state {
 
 	union {
 		struct kvm_vmx_nested_state_hdr vmx;
+		struct kvm_svm_nested_state_hdr svm;
 
 		/* Pad the header to 128 bytes.  */
 		__u8 pad[120];
@@ -431,6 +448,7 @@ struct kvm_nested_state {
 	 */
 	union {
 		struct kvm_vmx_nested_state_data vmx[0];
+		struct kvm_svm_nested_state_data svm[0];
 	} data;
 };
 
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 2a8e0b6b9805..812e9b4c1114 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -31,6 +31,7 @@
 #define KVM_FEATURE_PV_SEND_IPI	11
 #define KVM_FEATURE_POLL_CONTROL	12
 #define KVM_FEATURE_PV_SCHED_YIELD	13
+#define KVM_FEATURE_ASYNC_PF_INT	14
 
 #define KVM_HINTS_REALTIME      0
 
@@ -50,6 +51,8 @@
 #define MSR_KVM_STEAL_TIME  0x4b564d03
 #define MSR_KVM_PV_EOI_EN      0x4b564d04
 #define MSR_KVM_POLL_CONTROL	0x4b564d05
+#define MSR_KVM_ASYNC_PF_INT	0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK	0x4b564d07
 
 struct kvm_steal_time {
 	__u64 steal;
@@ -81,6 +84,11 @@ struct kvm_clock_pairing {
 #define KVM_ASYNC_PF_ENABLED			(1 << 0)
 #define KVM_ASYNC_PF_SEND_ALWAYS		(1 << 1)
 #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT	(1 << 2)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT		(1 << 3)
+
+/* MSR_KVM_ASYNC_PF_INT */
+#define KVM_ASYNC_PF_VEC_MASK			GENMASK(7, 0)
+
 
 /* Operations for KVM_HC_MMU_OP */
 #define KVM_MMU_OP_WRITE_PTE            1
@@ -112,8 +120,13 @@ struct kvm_mmu_op_release_pt {
 #define KVM_PV_REASON_PAGE_READY 2
 
 struct kvm_vcpu_pv_apf_data {
-	__u32 reason;
-	__u8 pad[60];
+	/* Used for 'page not present' events delivered via #PF */
+	__u32 flags;
+
+	/* Used for 'page ready' events delivered via interrupt notification */
+	__u32 token;
+
+	__u8 pad[56];
 	__u32 enabled;
 };
 
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index e95b72ec19bc..b8ff9e8ac0d5 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -150,6 +150,9 @@
 	{ EXIT_REASON_UMWAIT,                "UMWAIT" }, \
 	{ EXIT_REASON_TPAUSE,                "TPAUSE" }
 
+#define VMX_EXIT_REASON_FLAGS \
+	{ VMX_EXIT_REASONS_FAILED_VMENTRY,	"FAILED_VMENTRY" }
+
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL       2
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index ba89cabe5fcf..2a7c3afa62e2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -102,9 +102,7 @@ obj-$(CONFIG_KEXEC_FILE)	+= kexec-bzimage64.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-y				+= kprobes/
 obj-$(CONFIG_MODULES)		+= module.o
-ifeq ($(CONFIG_X86_32),y)
-obj-$(CONFIG_DOUBLEFAULT)	+= doublefault_32.o
-endif
+obj-$(CONFIG_X86_32)		+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_VM86)		+= vm86_32.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index fe698f96617c..263eeaddb0aa 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -345,56 +345,3 @@ out_noapbt:
 	apb_timer_block_enabled = 0;
 	panic("failed to enable APB timer\n");
 }
-
-/* called before apb_timer_enable, use early map */
-unsigned long apbt_quick_calibrate(void)
-{
-	int i, scale;
-	u64 old, new;
-	u64 t1, t2;
-	unsigned long khz = 0;
-	u32 loop, shift;
-
-	apbt_set_mapping();
-	dw_apb_clocksource_start(clocksource_apbt);
-
-	/* check if the timer can count down, otherwise return */
-	old = dw_apb_clocksource_read(clocksource_apbt);
-	i = 10000;
-	while (--i) {
-		if (old != dw_apb_clocksource_read(clocksource_apbt))
-			break;
-	}
-	if (!i)
-		goto failed;
-
-	/* count 16 ms */
-	loop = (apbt_freq / 1000) << 4;
-
-	/* restart the timer to ensure it won't get to 0 in the calibration */
-	dw_apb_clocksource_start(clocksource_apbt);
-
-	old = dw_apb_clocksource_read(clocksource_apbt);
-	old += loop;
-
-	t1 = rdtsc();
-
-	do {
-		new = dw_apb_clocksource_read(clocksource_apbt);
-	} while (new < old);
-
-	t2 = rdtsc();
-
-	shift = 5;
-	if (unlikely(loop >> shift == 0)) {
-		printk(KERN_INFO
-		       "APBT TSC calibration failed, not enough resolution\n");
-		return 0;
-	}
-	scale = (int)div_u64((t2 - t1), loop >> shift);
-	khz = (scale * (apbt_freq / 1000)) >> shift;
-	printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz);
-	return khz;
-failed:
-	return 0;
-}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e53dda210cd7..4b1d31be50b4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -544,46 +544,20 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
-static __init u32 hsx_deadline_rev(void)
-{
-	switch (boot_cpu_data.x86_stepping) {
-	case 0x02: return 0x3a; /* EP */
-	case 0x04: return 0x0f; /* EX */
-	}
-
-	return ~0U;
-}
-
-static __init u32 bdx_deadline_rev(void)
-{
-	switch (boot_cpu_data.x86_stepping) {
-	case 0x02: return 0x00000011;
-	case 0x03: return 0x0700000e;
-	case 0x04: return 0x0f00000c;
-	case 0x05: return 0x0e000003;
-	}
-
-	return ~0U;
-}
-
-static __init u32 skx_deadline_rev(void)
-{
-	switch (boot_cpu_data.x86_stepping) {
-	case 0x03: return 0x01000136;
-	case 0x04: return 0x02000014;
-	}
+static const struct x86_cpu_id deadline_match[] __initconst = {
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */
 
-	if (boot_cpu_data.x86_stepping > 4)
-		return 0;
+	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X,	0x0b000020),
 
-	return ~0U;
-}
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011),
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e),
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c),
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003),
 
-static const struct x86_cpu_id deadline_match[] __initconst = {
-	X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X,		&hsx_deadline_rev),
-	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X,	0x0b000020),
-	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D,	&bdx_deadline_rev),
-	X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_X,		&skx_deadline_rev),
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136),
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014),
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0),
 
 	X86_MATCH_INTEL_FAM6_MODEL( HASWELL,		0x22),
 	X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L,		0x20),
@@ -615,14 +589,7 @@ static __init bool apic_validate_deadline_timer(void)
 	if (!m)
 		return true;
 
-	/*
-	 * Function pointers will have the MSB set due to address layout,
-	 * immediate revisions will not.
-	 */
-	if ((long)m->driver_data < 0)
-		rev = ((u32 (*)(void))(m->driver_data))();
-	else
-		rev = (u32)m->driver_data;
+	rev = (u32)m->driver_data;
 
 	if (boot_cpu_data.microcode >= rev)
 		return true;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 913c88617848..ce61e3e7d399 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -154,19 +154,6 @@ static inline bool mp_is_legacy_irq(int irq)
 	return irq >= 0 && irq < nr_legacy_irqs();
 }
 
-/*
- * Initialize all legacy IRQs and all pins on the first IOAPIC
- * if we have legacy interrupt controller. Kernel boot option "pirq="
- * may rely on non-legacy pins on the first IOAPIC.
- */
-static inline int mp_init_irq_at_boot(int ioapic, int irq)
-{
-	if (!nr_legacy_irqs())
-		return 0;
-
-	return ioapic == 0 || mp_is_legacy_irq(irq);
-}
-
 static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
 {
 	return ioapics[ioapic].irqdomain;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index ad53b2abc859..69e70ed0f5e6 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -30,8 +30,6 @@ static enum uv_system_type	uv_system_type;
 static int			uv_hubbed_system;
 static int			uv_hubless_system;
 static u64			gru_start_paddr, gru_end_paddr;
-static u64			gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
-static u64			gru_dist_lmask, gru_dist_umask;
 static union uvh_apicid		uvh_apicid;
 
 /* Unpack OEM/TABLE ID's to be NULL terminated strings */
@@ -48,11 +46,9 @@ static struct {
 	unsigned int gnode_shift;
 } uv_cpuid;
 
-int uv_min_hub_revision_id;
-EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
+static int uv_min_hub_revision_id;
 
 unsigned int uv_apicid_hibits;
-EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 
 static struct apic apic_x2apic_uv_x;
 static struct uv_hub_info_s uv_hub_info_node0;
@@ -85,20 +81,7 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
 
 static inline bool is_GRU_range(u64 start, u64 end)
 {
-	if (gru_dist_base) {
-		u64 su = start & gru_dist_umask; /* Upper (incl pnode) bits */
-		u64 sl = start & gru_dist_lmask; /* Base offset bits */
-		u64 eu = end & gru_dist_umask;
-		u64 el = end & gru_dist_lmask;
-
-		/* Must reside completely within a single GRU range: */
-		return (sl == gru_dist_base && el == gru_dist_base &&
-			su >= gru_first_node_paddr &&
-			su <= gru_last_node_paddr &&
-			eu == su);
-	} else {
-		return start >= gru_start_paddr && end <= gru_end_paddr;
-	}
+	return start >= gru_start_paddr && end <= gru_end_paddr;
 }
 
 static bool uv_is_untracked_pat_range(u64 start, u64 end)
@@ -385,11 +368,10 @@ int is_uv_hubbed(int uvtype)
 }
 EXPORT_SYMBOL_GPL(is_uv_hubbed);
 
-int is_uv_hubless(int uvtype)
+static int is_uv_hubless(int uvtype)
 {
 	return (uv_hubless_system & uvtype);
 }
-EXPORT_SYMBOL_GPL(is_uv_hubless);
 
 void **__uv_hub_info_list;
 EXPORT_SYMBOL_GPL(__uv_hub_info_list);
@@ -417,12 +399,6 @@ static __initdata struct uv_gam_range_s		*_gr_table;
 
 #define	SOCK_EMPTY	((unsigned short)~0)
 
-extern int uv_hub_info_version(void)
-{
-	return UV_HUB_INFO_VERSION;
-}
-EXPORT_SYMBOL(uv_hub_info_version);
-
 /* Default UV memory block size is 2GB */
 static unsigned long mem_block_size __initdata = (2UL << 30);
 
@@ -590,12 +566,21 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 
 static void uv_send_IPI_one(int cpu, int vector)
 {
-	unsigned long apicid;
-	int pnode;
+	unsigned long apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	int pnode = uv_apicid_to_pnode(apicid);
+	unsigned long dmode, val;
+
+	if (vector == NMI_VECTOR)
+		dmode = dest_NMI;
+	else
+		dmode = dest_Fixed;
+
+	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+		((apicid | uv_apicid_hibits) << UVH_IPI_INT_APIC_ID_SHFT) |
+		(dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
+		(vector << UVH_IPI_INT_VECTOR_SHFT);
 
-	apicid = per_cpu(x86_cpu_to_apicid, cpu);
-	pnode = uv_apicid_to_pnode(apicid);
-	uv_hub_send_ipi(pnode, apicid, vector);
+	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 }
 
 static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
@@ -797,42 +782,6 @@ static __init void map_high(char *id, unsigned long base, int pshift, int bshift
 		init_extra_mapping_wb(paddr, bytes);
 }
 
-static __init void map_gru_distributed(unsigned long c)
-{
-	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
-	u64 paddr;
-	unsigned long bytes;
-	int nid;
-
-	gru.v = c;
-
-	/* Only base bits 42:28 relevant in dist mode */
-	gru_dist_base = gru.v & 0x000007fff0000000UL;
-	if (!gru_dist_base) {
-		pr_info("UV: Map GRU_DIST base address NULL\n");
-		return;
-	}
-
-	bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
-	gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1);
-	gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1);
-	gru_dist_base &= gru_dist_lmask; /* Clear bits above M */
-
-	for_each_online_node(nid) {
-		paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) |
-				gru_dist_base;
-		init_extra_mapping_wb(paddr, bytes);
-		gru_first_node_paddr = min(paddr, gru_first_node_paddr);
-		gru_last_node_paddr = max(paddr, gru_last_node_paddr);
-	}
-
-	/* Save upper (63:M) bits of address only for is_GRU_range */
-	gru_first_node_paddr &= gru_dist_umask;
-	gru_last_node_paddr &= gru_dist_umask;
-
-	pr_debug("UV: Map GRU_DIST base 0x%016llx  0x%016llx - 0x%016llx\n", gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
-}
-
 static __init void map_gru_high(int max_pnode)
 {
 	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
@@ -846,12 +795,6 @@ static __init void map_gru_high(int max_pnode)
 		return;
 	}
 
-	/* Only UV3 has distributed GRU mode */
-	if (is_uv3_hub() && gru.s3.mode) {
-		map_gru_distributed(gru.v);
-		return;
-	}
-
 	base = (gru.v & mask) >> shift;
 	map_high("GRU", base, shift, shift, max_pnode, map_wb);
 	gru_start_paddr = ((u64)base << shift);
diff --git a/arch/x86/kernel/audit_64.c b/arch/x86/kernel/audit_64.c
index e1efe44ebefc..83d9cad4e68b 100644
--- a/arch/x86/kernel/audit_64.c
+++ b/arch/x86/kernel/audit_64.c
@@ -3,6 +3,7 @@
 #include <linux/types.h>
 #include <linux/audit.h>
 #include <asm/unistd.h>
+#include <asm/audit.h>
 
 static unsigned dir_class[] = {
 #include <asm-generic/audit_dir_write.h>
@@ -41,7 +42,6 @@ int audit_classify_arch(int arch)
 int audit_classify_syscall(int abi, unsigned syscall)
 {
 #ifdef CONFIG_IA32_EMULATION
-	extern int ia32_classify_syscall(unsigned);
 	if (abi == AUDIT_ARCH_I386)
 		return ia32_classify_syscall(syscall);
 #endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 547ad7bbf0e0..d4806eac9325 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -18,6 +18,7 @@
 #include <asm/pci-direct.h>
 #include <asm/delay.h>
 #include <asm/debugreg.h>
+#include <asm/resctrl.h>
 
 #ifdef CONFIG_X86_64
 # include <asm/mmconfig.h>
@@ -597,6 +598,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 			x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
 		}
 	}
+
+	resctrl_cpu_detect(c);
 }
 
 static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -1142,8 +1145,7 @@ static const int amd_erratum_383[] =
 
 /* #1054: Instructions Retired Performance Counter May Be Inaccurate */
 static const int amd_erratum_1054[] =
-	AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
-
+	AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
 
 static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
 {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bed0cb83fe24..d07809286b95 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -854,30 +854,6 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
 	}
 }
 
-static void init_cqm(struct cpuinfo_x86 *c)
-{
-	if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
-		c->x86_cache_max_rmid  = -1;
-		c->x86_cache_occ_scale = -1;
-		return;
-	}
-
-	/* will be overridden if occupancy monitoring exists */
-	c->x86_cache_max_rmid = cpuid_ebx(0xf);
-
-	if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
-	    cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
-	    cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
-		u32 eax, ebx, ecx, edx;
-
-		/* QoS sub-leaf, EAX=0Fh, ECX=1 */
-		cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
-
-		c->x86_cache_max_rmid  = ecx;
-		c->x86_cache_occ_scale = ebx;
-	}
-}
-
 void get_cpu_cap(struct cpuinfo_x86 *c)
 {
 	u32 eax, ebx, ecx, edx;
@@ -945,7 +921,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 
 	init_scattered_cpuid_features(c);
 	init_speculation_control(c);
-	init_cqm(c);
 
 	/*
 	 * Clear/Set all flags overridden by options, after probe.
@@ -1377,20 +1352,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
 #endif
 }
 
-static void x86_init_cache_qos(struct cpuinfo_x86 *c)
-{
-	/*
-	 * The heavy lifting of max_rmid and cache_occ_scale are handled
-	 * in get_cpu_cap().  Here we just set the max_rmid for the boot_cpu
-	 * in case CQM bits really aren't there in this CPU.
-	 */
-	if (c != &boot_cpu_data) {
-		boot_cpu_data.x86_cache_max_rmid =
-			min(boot_cpu_data.x86_cache_max_rmid,
-			    c->x86_cache_max_rmid);
-	}
-}
-
 /*
  * Validate that ACPI/mptables have the same information about the
  * effective APIC id and update the package map.
@@ -1503,7 +1464,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #endif
 
 	x86_init_rdrand(c);
-	x86_init_cache_qos(c);
 	setup_pku(c);
 
 	/*
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index a19a680542ce..166d7c355896 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -22,6 +22,7 @@
 #include <asm/cpu_device_id.h>
 #include <asm/cmdline.h>
 #include <asm/traps.h>
+#include <asm/resctrl.h>
 
 #ifdef CONFIG_X86_64
 #include <linux/topology.h>
@@ -322,6 +323,11 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 		detect_ht_early(c);
 }
 
+static void bsp_init_intel(struct cpuinfo_x86 *c)
+{
+	resctrl_cpu_detect(c);
+}
+
 #ifdef CONFIG_X86_32
 /*
  *	Early probe support logic for ppro memory erratum #50
@@ -961,6 +967,7 @@ static const struct cpu_dev intel_cpu_dev = {
 #endif
 	.c_detect_tlb	= intel_detect_tlb,
 	.c_early_init   = early_init_intel,
+	.c_bsp_init	= bsp_init_intel,
 	.c_init		= init_intel,
 	.c_x86_vendor	= X86_VENDOR_INTEL,
 };
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index d3482eb43ff3..ad6776081e60 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -39,13 +39,18 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
 	const struct x86_cpu_id *m;
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
-	for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
+	for (m = match;
+	     m->vendor | m->family | m->model | m->steppings | m->feature;
+	     m++) {
 		if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
 			continue;
 		if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
 			continue;
 		if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
 			continue;
+		if (m->steppings != X86_STEPPING_ANY &&
+		    !(BIT(c->x86_stepping) & m->steppings))
+			continue;
 		if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
 			continue;
 		return m;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 54165f3569e8..e9265e2f28c9 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -42,6 +42,8 @@
 #include <linux/export.h>
 #include <linux/jump_label.h>
 #include <linux/set_memory.h>
+#include <linux/task_work.h>
+#include <linux/hardirq.h>
 
 #include <asm/intel-family.h>
 #include <asm/processor.h>
@@ -1086,23 +1088,6 @@ static void mce_clear_state(unsigned long *toclear)
 	}
 }
 
-static int do_memory_failure(struct mce *m)
-{
-	int flags = MF_ACTION_REQUIRED;
-	int ret;
-
-	pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
-	if (!(m->mcgstatus & MCG_STATUS_RIPV))
-		flags |= MF_MUST_KILL;
-	ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
-	if (ret)
-		pr_err("Memory error not recovered");
-	else
-		set_mce_nospec(m->addr >> PAGE_SHIFT);
-	return ret;
-}
-
-
 /*
  * Cases where we avoid rendezvous handler timeout:
  * 1) If this CPU is offline.
@@ -1204,6 +1189,29 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
 	*m = *final;
 }
 
+static void kill_me_now(struct callback_head *ch)
+{
+	force_sig(SIGBUS);
+}
+
+static void kill_me_maybe(struct callback_head *cb)
+{
+	struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
+	int flags = MF_ACTION_REQUIRED;
+
+	pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
+	if (!(p->mce_status & MCG_STATUS_RIPV))
+		flags |= MF_MUST_KILL;
+
+	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
+		set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
+		return;
+	}
+
+	pr_err("Memory error not recovered");
+	kill_me_now(cb);
+}
+
 /*
  * The actual machine check handler. This only handles real
  * exceptions when something got corrupted coming in through int 18.
@@ -1222,7 +1230,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
  * backing the user stack, tracing that reads the user stack will cause
  * potentially infinite recursion.
  */
-void notrace do_machine_check(struct pt_regs *regs, long error_code)
+void noinstr do_machine_check(struct pt_regs *regs, long error_code)
 {
 	DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
 	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
@@ -1259,7 +1267,7 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
 	if (__mc_check_crashing_cpu(cpu))
 		return;
 
-	ist_enter(regs);
+	nmi_enter();
 
 	this_cpu_inc(mce_exception_count);
 
@@ -1352,23 +1360,24 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
 
 	/* Fault was in user mode and we need to take some action */
 	if ((m.cs & 3) == 3) {
-		ist_begin_non_atomic(regs);
-		local_irq_enable();
-
-		if (kill_it || do_memory_failure(&m))
-			force_sig(SIGBUS);
-		local_irq_disable();
-		ist_end_non_atomic();
+		/* If this triggers there is no way to recover. Die hard. */
+		BUG_ON(!on_thread_stack() || !user_mode(regs));
+
+		current->mce_addr = m.addr;
+		current->mce_status = m.mcgstatus;
+		current->mce_kill_me.func = kill_me_maybe;
+		if (kill_it)
+			current->mce_kill_me.func = kill_me_now;
+		task_work_add(current, &current->mce_kill_me, true);
 	} else {
 		if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
 			mce_panic("Failed kernel mode recovery", &m, msg);
 	}
 
 out_ist:
-	ist_exit(regs);
+	nmi_exit();
 }
 EXPORT_SYMBOL_GPL(do_machine_check);
-NOKPROBE_SYMBOL(do_machine_check);
 
 #ifndef CONFIG_MEMORY_FAILURE
 int memory_failure(unsigned long pfn, int flags)
diff --git a/arch/x86/kernel/cpu/mce/p5.c b/arch/x86/kernel/cpu/mce/p5.c
index 4ae6df556526..5ee94aa1b766 100644
--- a/arch/x86/kernel/cpu/mce/p5.c
+++ b/arch/x86/kernel/cpu/mce/p5.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/smp.h>
+#include <linux/hardirq.h>
 
 #include <asm/processor.h>
 #include <asm/traps.h>
@@ -24,7 +25,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
 {
 	u32 loaddr, hi, lotype;
 
-	ist_enter(regs);
+	nmi_enter();
 
 	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
 	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
@@ -39,7 +40,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
 
 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
-	ist_exit(regs);
+	nmi_exit();
 }
 
 /* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mce/winchip.c b/arch/x86/kernel/cpu/mce/winchip.c
index a30ea13cccc2..b3938c195365 100644
--- a/arch/x86/kernel/cpu/mce/winchip.c
+++ b/arch/x86/kernel/cpu/mce/winchip.c
@@ -6,6 +6,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <linux/hardirq.h>
 
 #include <asm/processor.h>
 #include <asm/traps.h>
@@ -18,12 +19,12 @@
 /* Machine check handler for WinChip C6: */
 static void winchip_machine_check(struct pt_regs *regs, long error_code)
 {
-	ist_enter(regs);
+	nmi_enter();
 
 	pr_emerg("CPU0: Machine Check Exception.\n");
 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
-	ist_exit(regs);
+	nmi_exit();
 }
 
 /* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 7019d4b2df0c..baec68b7e010 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -545,8 +545,7 @@ static int __wait_for_cpus(atomic_t *t, long long timeout)
 /*
  * Returns:
  * < 0 - on error
- *   0 - no update done
- *   1 - microcode was updated
+ *   0 - success (no update done or microcode was updated)
  */
 static int __reload_late(void *info)
 {
@@ -573,11 +572,11 @@ static int __reload_late(void *info)
 	else
 		goto wait_for_siblings;
 
-	if (err > UCODE_NFOUND) {
-		pr_warn("Error reloading microcode on CPU %d\n", cpu);
+	if (err >= UCODE_NFOUND) {
+		if (err == UCODE_ERROR)
+			pr_warn("Error reloading microcode on CPU %d\n", cpu);
+
 		ret = -1;
-	} else if (err == UCODE_UPDATED || err == UCODE_OK) {
-		ret = 1;
 	}
 
 wait_for_siblings:
@@ -608,7 +607,7 @@ static int microcode_reload_late(void)
 	atomic_set(&late_cpus_out, 0);
 
 	ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
-	if (ret > 0)
+	if (ret == 0)
 		microcode_check();
 
 	pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode);
@@ -649,7 +648,7 @@ static ssize_t reload_store(struct device *dev,
 put:
 	put_online_cpus();
 
-	if (ret >= 0)
+	if (ret == 0)
 		ret = size;
 
 	return ret;
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 9556930cd8c1..a5ee607a3b89 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -63,6 +63,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 		case 15:
 			return msr - MSR_P4_BPU_PERFCTR0;
 		}
+		fallthrough;
+	case X86_VENDOR_ZHAOXIN:
+	case X86_VENDOR_CENTAUR:
+		return msr - MSR_ARCH_PERFMON_PERFCTR0;
 	}
 	return 0;
 }
@@ -92,6 +96,10 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
 		case 15:
 			return msr - MSR_P4_BSU_ESCR0;
 		}
+		fallthrough;
+	case X86_VENDOR_ZHAOXIN:
+	case X86_VENDOR_CENTAUR:
+		return msr - MSR_ARCH_PERFMON_EVENTSEL0;
 	}
 	return 0;
 
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index d8cc5223b7ce..12f967c6b603 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -22,7 +22,7 @@
 #include <linux/cpuhotplug.h>
 
 #include <asm/intel-family.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
 #include "internal.h"
 
 /* Mutex to protect rdtgroup access. */
@@ -958,6 +958,36 @@ static __init void rdt_init_res_defs(void)
 
 static enum cpuhp_state rdt_online;
 
+/* Runs once on the BSP during boot. */
+void resctrl_cpu_detect(struct cpuinfo_x86 *c)
+{
+	if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+		c->x86_cache_max_rmid  = -1;
+		c->x86_cache_occ_scale = -1;
+		c->x86_cache_mbm_width_offset = -1;
+		return;
+	}
+
+	/* will be overridden if occupancy monitoring exists */
+	c->x86_cache_max_rmid = cpuid_ebx(0xf);
+
+	if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
+	    cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
+	    cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+		u32 eax, ebx, ecx, edx;
+
+		/* QoS sub-leaf, EAX=0Fh, ECX=1 */
+		cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
+
+		c->x86_cache_max_rmid  = ecx;
+		c->x86_cache_occ_scale = ebx;
+		if (c->x86_vendor == X86_VENDOR_INTEL)
+			c->x86_cache_mbm_width_offset = eax & 0xff;
+		else
+			c->x86_cache_mbm_width_offset = -1;
+	}
+}
+
 static int __init resctrl_late_init(void)
 {
 	struct rdt_resource *r;
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 055c8613b531..934c8fb8a64a 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -495,14 +495,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
 	return ret;
 }
 
-void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
-		    struct rdtgroup *rdtgrp, int evtid, int first)
+void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
+		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
+		    int evtid, int first)
 {
 	/*
 	 * setup the parameters to send to the IPI to read the data.
 	 */
 	rr->rgrp = rdtgrp;
 	rr->evtid = evtid;
+	rr->r = r;
 	rr->d = d;
 	rr->val = 0;
 	rr->first = first;
@@ -539,7 +541,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 		goto out;
 	}
 
-	mon_event_read(&rr, d, rdtgrp, evtid, false);
+	mon_event_read(&rr, r, d, rdtgrp, evtid, false);
 
 	if (rr.val & RMID_VAL_ERROR)
 		seq_puts(m, "Error\n");
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 3dd13f3a8b23..f20a47d120b1 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -31,7 +31,7 @@
 
 #define CQM_LIMBOCHECK_INTERVAL	1000
 
-#define MBM_CNTR_WIDTH			24
+#define MBM_CNTR_WIDTH_BASE		24
 #define MBM_OVERFLOW_INTERVAL		1000
 #define MAX_MBA_BW			100u
 #define MBA_IS_LINEAR			0x4
@@ -40,6 +40,12 @@
 
 #define RMID_VAL_ERROR			BIT_ULL(63)
 #define RMID_VAL_UNAVAIL		BIT_ULL(62)
+/*
+ * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
+ * data to be returned. The counter width is discovered from the hardware
+ * as an offset from MBM_CNTR_WIDTH_BASE.
+ */
+#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
 
 
 struct rdt_fs_context {
@@ -87,6 +93,7 @@ union mon_data_bits {
 
 struct rmid_read {
 	struct rdtgroup		*rgrp;
+	struct rdt_resource	*r;
 	struct rdt_domain	*d;
 	int			evtid;
 	bool			first;
@@ -460,6 +467,7 @@ struct rdt_resource {
 	struct list_head	evt_list;
 	int			num_rmid;
 	unsigned int		mon_scale;
+	unsigned int		mbm_width;
 	unsigned long		fflags;
 };
 
@@ -587,8 +595,9 @@ void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
 				    unsigned int dom_id);
 void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
 				    struct rdt_domain *d);
-void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
-		    struct rdtgroup *rdtgrp, int evtid, int first);
+void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
+		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
+		    int evtid, int first);
 void mbm_setup_overflow_handler(struct rdt_domain *dom,
 				unsigned long delay_ms);
 void mbm_handle_overflow(struct work_struct *work);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 773124b0e18a..837d7d012b7b 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -214,9 +214,9 @@ void free_rmid(u32 rmid)
 		list_add_tail(&entry->list, &rmid_free_lru);
 }
 
-static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
+static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
 {
-	u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
+	u64 shift = 64 - width, chunks;
 
 	chunks = (cur_msr << shift) - (prev_msr << shift);
 	return chunks >>= shift;
@@ -256,7 +256,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
 		return 0;
 	}
 
-	chunks = mbm_overflow_count(m->prev_msr, tval);
+	chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width);
 	m->chunks += chunks;
 	m->prev_msr = tval;
 
@@ -278,7 +278,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
 	if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
 		return;
 
-	chunks = mbm_overflow_count(m->prev_bw_msr, tval);
+	chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
 	m->chunks_bw += chunks;
 	m->chunks = m->chunks_bw;
 	cur_bw = (chunks * r->mon_scale) >> 20;
@@ -433,11 +433,12 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
 	}
 }
 
-static void mbm_update(struct rdt_domain *d, int rmid)
+static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
 {
 	struct rmid_read rr;
 
 	rr.first = false;
+	rr.r = r;
 	rr.d = d;
 
 	/*
@@ -510,6 +511,7 @@ void mbm_handle_overflow(struct work_struct *work)
 	struct rdtgroup *prgrp, *crgrp;
 	int cpu = smp_processor_id();
 	struct list_head *head;
+	struct rdt_resource *r;
 	struct rdt_domain *d;
 
 	mutex_lock(&rdtgroup_mutex);
@@ -517,16 +519,18 @@ void mbm_handle_overflow(struct work_struct *work)
 	if (!static_branch_likely(&rdt_mon_enable_key))
 		goto out_unlock;
 
-	d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]);
+	r = &rdt_resources_all[RDT_RESOURCE_L3];
+
+	d = get_domain_from_cpu(cpu, r);
 	if (!d)
 		goto out_unlock;
 
 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
-		mbm_update(d, prgrp->mon.rmid);
+		mbm_update(r, d, prgrp->mon.rmid);
 
 		head = &prgrp->mon.crdtgrp_list;
 		list_for_each_entry(crgrp, head, mon.crdtgrp_list)
-			mbm_update(d, crgrp->mon.rmid);
+			mbm_update(r, d, crgrp->mon.rmid);
 
 		if (is_mba_sc(NULL))
 			update_mba_bw(prgrp, d);
@@ -614,11 +618,18 @@ static void l3_mon_evt_init(struct rdt_resource *r)
 
 int rdt_get_mon_l3_config(struct rdt_resource *r)
 {
+	unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
 	unsigned int cl_size = boot_cpu_data.x86_cache_size;
 	int ret;
 
 	r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
 	r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
+	r->mbm_width = MBM_CNTR_WIDTH_BASE;
+
+	if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
+		r->mbm_width += mbm_offset;
+	else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
+		pr_warn("Ignoring impossible MBM counter offset\n");
 
 	/*
 	 * A reasonable upper limit on the max threshold is the number
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index d7623e1b927d..4bd28b388a1a 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -24,7 +24,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/intel-family.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
 #include <asm/perf_event.h>
 
 #include "../../events/perf_event.h" /* For X86_CONFIG() */
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 5a359d9fcc05..d7cb5ab0d1f0 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -29,7 +29,7 @@
 
 #include <uapi/linux/magic.h>
 
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
 #include "internal.h"
 
 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
@@ -2472,7 +2472,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
 			goto out_destroy;
 
 		if (is_mbm_event(mevt->evtid))
-			mon_event_read(&rr, d, prgrp, mevt->evtid, true);
+			mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
 	}
 	kernfs_activate(kn);
 	return 0;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 8e3a8fedfa4d..722fd712e1cf 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -87,7 +87,6 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
 
 static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info)
 {
-#ifdef CONFIG_DOUBLEFAULT
 	struct cpu_entry_area *cea = get_cpu_entry_area(raw_smp_processor_id());
 	struct doublefault_stack *ss = &cea->doublefault_stack;
 
@@ -103,9 +102,6 @@ static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info)
 	info->next_sp	= (unsigned long *)this_cpu_read(cpu_tss_rw.x86_tss.sp);
 
 	return true;
-#else
-	return false;
-#endif
 }
 
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index c5399e80c59c..4d13c57f370a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -910,14 +910,6 @@ static int __init parse_memmap_one(char *p)
 		return -EINVAL;
 
 	if (!strncmp(p, "exactmap", 8)) {
-#ifdef CONFIG_CRASH_DUMP
-		/*
-		 * If we are doing a crash dump, we still need to know
-		 * the real memory size before the original memory map is
-		 * reset.
-		 */
-		saved_max_pfn = e820__end_of_ram_pfn();
-#endif
 		e820_table->nr_entries = 0;
 		userdef = 1;
 		return 0;
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 9b33904251a9..93fbdff2974f 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -15,12 +15,9 @@
 #include <xen/hvc-console.h>
 #include <asm/pci-direct.h>
 #include <asm/fixmap.h>
-#include <asm/intel-mid.h>
 #include <asm/pgtable.h>
 #include <linux/usb/ehci_def.h>
 #include <linux/usb/xhci-dbgp.h>
-#include <linux/efi.h>
-#include <asm/efi.h>
 #include <asm/pci_x86.h>
 
 /* Simple VGA output */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 12c70840980e..06c818967bb6 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -291,15 +291,13 @@ void fpu__drop(struct fpu *fpu)
 }
 
 /*
- * Clear FPU registers by setting them up from
- * the init fpstate:
+ * Clear FPU registers by setting them up from the init fpstate.
+ * Caller must do fpregs_[un]lock() around it.
  */
-static inline void copy_init_fpstate_to_fpregs(void)
+static inline void copy_init_fpstate_to_fpregs(u64 features_mask)
 {
-	fpregs_lock();
-
 	if (use_xsave())
-		copy_kernel_to_xregs(&init_fpstate.xsave, -1);
+		copy_kernel_to_xregs(&init_fpstate.xsave, features_mask);
 	else if (static_cpu_has(X86_FEATURE_FXSR))
 		copy_kernel_to_fxregs(&init_fpstate.fxsave);
 	else
@@ -307,9 +305,6 @@ static inline void copy_init_fpstate_to_fpregs(void)
 
 	if (boot_cpu_has(X86_FEATURE_OSPKE))
 		copy_init_pkru_to_fpregs();
-
-	fpregs_mark_activate();
-	fpregs_unlock();
 }
 
 /*
@@ -318,18 +313,40 @@ static inline void copy_init_fpstate_to_fpregs(void)
  * Called by sys_execve(), by the signal handler code and by various
  * error paths.
  */
-void fpu__clear(struct fpu *fpu)
+static void fpu__clear(struct fpu *fpu, bool user_only)
 {
-	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
+	WARN_ON_FPU(fpu != &current->thread.fpu);
 
-	fpu__drop(fpu);
+	if (!static_cpu_has(X86_FEATURE_FPU)) {
+		fpu__drop(fpu);
+		fpu__initialize(fpu);
+		return;
+	}
 
-	/*
-	 * Make sure fpstate is cleared and initialized.
-	 */
-	fpu__initialize(fpu);
-	if (static_cpu_has(X86_FEATURE_FPU))
-		copy_init_fpstate_to_fpregs();
+	fpregs_lock();
+
+	if (user_only) {
+		if (!fpregs_state_valid(fpu, smp_processor_id()) &&
+		    xfeatures_mask_supervisor())
+			copy_kernel_to_xregs(&fpu->state.xsave,
+					     xfeatures_mask_supervisor());
+		copy_init_fpstate_to_fpregs(xfeatures_mask_user());
+	} else {
+		copy_init_fpstate_to_fpregs(xfeatures_mask_all);
+	}
+
+	fpregs_mark_activate();
+	fpregs_unlock();
+}
+
+void fpu__clear_user_states(struct fpu *fpu)
+{
+	fpu__clear(fpu, true);
+}
+
+void fpu__clear_all(struct fpu *fpu)
+{
+	fpu__clear(fpu, false);
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6ce7e0a23268..61ddc3a5e5c2 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -224,7 +224,8 @@ static void __init fpu__init_system_xstate_size_legacy(void)
  */
 u64 __init fpu__get_supported_xfeatures_mask(void)
 {
-	return XCNTXT_MASK;
+	return XFEATURE_MASK_USER_SUPPORTED |
+	       XFEATURE_MASK_SUPERVISOR_SUPPORTED;
 }
 
 /* Legacy code to initialize eager fpu mode. */
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index d652b939ccfb..bd1d0649f8ce 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -139,7 +139,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 		if (!ret)
-			ret = validate_xstate_header(&xsave->header);
+			ret = validate_user_xstate_header(&xsave->header);
 	}
 
 	/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 400a05e1c1c5..9393a445d73c 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -211,9 +211,9 @@ retry:
 }
 
 static inline void
-sanitize_restored_xstate(union fpregs_state *state,
-			 struct user_i387_ia32_struct *ia32_env,
-			 u64 xfeatures, int fx_only)
+sanitize_restored_user_xstate(union fpregs_state *state,
+			      struct user_i387_ia32_struct *ia32_env,
+			      u64 user_xfeatures, int fx_only)
 {
 	struct xregs_state *xsave = &state->xsave;
 	struct xstate_header *header = &xsave->header;
@@ -226,13 +226,22 @@ sanitize_restored_xstate(union fpregs_state *state,
 		 */
 
 		/*
-		 * Init the state that is not present in the memory
-		 * layout and not enabled by the OS.
+		 * 'user_xfeatures' might have bits clear which are
+		 * set in header->xfeatures. This represents features that
+		 * were in init state prior to a signal delivery, and need
+		 * to be reset back to the init state.  Clear any user
+		 * feature bits which are set in the kernel buffer to get
+		 * them back to the init state.
+		 *
+		 * Supervisor state is unchanged by input from userspace.
+		 * Ensure supervisor state bits stay set and supervisor
+		 * state is not modified.
 		 */
 		if (fx_only)
 			header->xfeatures = XFEATURE_MASK_FPSSE;
 		else
-			header->xfeatures &= xfeatures;
+			header->xfeatures &= user_xfeatures |
+					     xfeatures_mask_supervisor();
 	}
 
 	if (use_fxsr()) {
@@ -252,16 +261,24 @@ sanitize_restored_xstate(union fpregs_state *state,
  */
 static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
 {
+	u64 init_bv;
+	int r;
+
 	if (use_xsave()) {
 		if (fx_only) {
-			u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
-			copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
-			return copy_user_to_fxregs(buf);
+			init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE;
+
+			r = copy_user_to_fxregs(buf);
+			if (!r)
+				copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
+			return r;
 		} else {
-			u64 init_bv = xfeatures_mask & ~xbv;
-			if (unlikely(init_bv))
+			init_bv = xfeatures_mask_user() & ~xbv;
+
+			r = copy_user_to_xregs(buf, xbv);
+			if (!r && unlikely(init_bv))
 				copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
-			return copy_user_to_xregs(buf, xbv);
+			return r;
 		}
 	} else if (use_fxsr()) {
 		return copy_user_to_fxregs(buf);
@@ -277,7 +294,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	struct task_struct *tsk = current;
 	struct fpu *fpu = &tsk->thread.fpu;
 	struct user_i387_ia32_struct env;
-	u64 xfeatures = 0;
+	u64 user_xfeatures = 0;
 	int fx_only = 0;
 	int ret = 0;
 
@@ -285,7 +302,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			 IS_ENABLED(CONFIG_IA32_EMULATION));
 
 	if (!buf) {
-		fpu__clear(fpu);
+		fpu__clear_user_states(fpu);
 		return 0;
 	}
 
@@ -310,32 +327,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			trace_x86_fpu_xstate_check_failed(fpu);
 		} else {
 			state_size = fx_sw_user.xstate_size;
-			xfeatures = fx_sw_user.xfeatures;
+			user_xfeatures = fx_sw_user.xfeatures;
 		}
 	}
 
-	/*
-	 * The current state of the FPU registers does not matter. By setting
-	 * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate
-	 * is not modified on context switch and that the xstate is considered
-	 * to be loaded again on return to userland (overriding last_cpu avoids
-	 * the optimisation).
-	 */
-	set_thread_flag(TIF_NEED_FPU_LOAD);
-	__fpu_invalidate_fpregs_state(fpu);
-
 	if ((unsigned long)buf_fx % 64)
 		fx_only = 1;
-	/*
-	 * For 32-bit frames with fxstate, copy the fxstate so it can be
-	 * reconstructed later.
-	 */
-	if (ia32_fxstate) {
-		ret = __copy_from_user(&env, buf, sizeof(env));
-		if (ret)
-			goto err_out;
-		envp = &env;
-	} else {
+
+	if (!ia32_fxstate) {
 		/*
 		 * Attempt to restore the FPU registers directly from user
 		 * memory. For that to succeed, the user access cannot cause
@@ -345,20 +344,65 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		fpregs_lock();
 		pagefault_disable();
-		ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only);
+		ret = copy_user_to_fpregs_zeroing(buf_fx, user_xfeatures, fx_only);
 		pagefault_enable();
 		if (!ret) {
+
+			/*
+			 * Restore supervisor states: previous context switch
+			 * etc has done XSAVES and saved the supervisor states
+			 * in the kernel buffer from which they can be restored
+			 * now.
+			 *
+			 * We cannot do a single XRSTORS here - which would
+			 * be nice - because the rest of the FPU registers are
+			 * being restored from a user buffer directly. The
+			 * single XRSTORS happens below, when the user buffer
+			 * has been copied to the kernel one.
+			 */
+			if (test_thread_flag(TIF_NEED_FPU_LOAD) &&
+			    xfeatures_mask_supervisor())
+				copy_kernel_to_xregs(&fpu->state.xsave,
+						     xfeatures_mask_supervisor());
 			fpregs_mark_activate();
 			fpregs_unlock();
 			return 0;
 		}
-		fpregs_deactivate(fpu);
 		fpregs_unlock();
+	} else {
+		/*
+		 * For 32-bit frames with fxstate, copy the fxstate so it can
+		 * be reconstructed later.
+		 */
+		ret = __copy_from_user(&env, buf, sizeof(env));
+		if (ret)
+			goto err_out;
+		envp = &env;
 	}
 
+	/*
+	 * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is
+	 * not modified on context switch and that the xstate is considered
+	 * to be loaded again on return to userland (overriding last_cpu avoids
+	 * the optimisation).
+	 */
+	fpregs_lock();
+
+	if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
+
+		/*
+		 * Supervisor states are not modified by user space input.  Save
+		 * current supervisor states first and invalidate the FPU regs.
+		 */
+		if (xfeatures_mask_supervisor())
+			copy_supervisor_to_kernel(&fpu->state.xsave);
+		set_thread_flag(TIF_NEED_FPU_LOAD);
+	}
+	__fpu_invalidate_fpregs_state(fpu);
+	fpregs_unlock();
 
 	if (use_xsave() && !fx_only) {
-		u64 init_bv = xfeatures_mask & ~xfeatures;
+		u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
 
 		if (using_compacted_format()) {
 			ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
@@ -366,17 +410,24 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
 
 			if (!ret && state_size > offsetof(struct xregs_state, header))
-				ret = validate_xstate_header(&fpu->state.xsave.header);
+				ret = validate_user_xstate_header(&fpu->state.xsave.header);
 		}
 		if (ret)
 			goto err_out;
 
-		sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
+		sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
+					      fx_only);
 
 		fpregs_lock();
 		if (unlikely(init_bv))
 			copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
-		ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures);
+
+		/*
+		 * Restore previously saved supervisor xstates along with
+		 * copied-in user xstates.
+		 */
+		ret = copy_kernel_to_xregs_err(&fpu->state.xsave,
+					       user_xfeatures | xfeatures_mask_supervisor());
 
 	} else if (use_fxsr()) {
 		ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
@@ -385,11 +436,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			goto err_out;
 		}
 
-		sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
+		sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
+					      fx_only);
 
 		fpregs_lock();
 		if (use_xsave()) {
-			u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
+			u64 init_bv;
+
+			init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE;
 			copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
 		}
 
@@ -410,7 +464,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
 err_out:
 	if (ret)
-		fpu__clear(fpu);
+		fpu__clear_user_states(fpu);
 	return ret;
 }
 
@@ -465,7 +519,7 @@ void fpu__init_prepare_fx_sw_frame(void)
 
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
 	fx_sw_reserved.extended_size = size;
-	fx_sw_reserved.xfeatures = xfeatures_mask;
+	fx_sw_reserved.xfeatures = xfeatures_mask_user();
 	fx_sw_reserved.xstate_size = fpu_user_xstate_size;
 
 	if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6a54e83d5589..bda2e5eaca0e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -54,13 +54,15 @@ static short xsave_cpuid_features[] __initdata = {
 };
 
 /*
- * Mask of xstate features supported by the CPU and the kernel:
+ * This represents the full set of bits that should ever be set in a kernel
+ * XSAVE buffer, both supervisor and user xstates.
  */
-u64 xfeatures_mask __read_mostly;
+u64 xfeatures_mask_all __read_mostly;
 
 static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
 static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] = -1};
 static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
+static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
 
 /*
  * The XSAVE area of kernel can be in standard or compacted format;
@@ -76,7 +78,7 @@ unsigned int fpu_user_xstate_size;
  */
 int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
 {
-	u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask;
+	u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask_all;
 
 	if (unlikely(feature_name)) {
 		long xfeature_idx, max_idx;
@@ -150,7 +152,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 	 * None of the feature bits are in init state. So nothing else
 	 * to do for us, as the memory layout is up to date.
 	 */
-	if ((xfeatures & xfeatures_mask) == xfeatures_mask)
+	if ((xfeatures & xfeatures_mask_all) == xfeatures_mask_all)
 		return;
 
 	/*
@@ -177,7 +179,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 	 * in a special way already:
 	 */
 	feature_bit = 0x2;
-	xfeatures = (xfeatures_mask & ~xfeatures) >> 2;
+	xfeatures = (xfeatures_mask_user() & ~xfeatures) >> 2;
 
 	/*
 	 * Update all the remaining memory layouts according to their
@@ -205,30 +207,39 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
  */
 void fpu__init_cpu_xstate(void)
 {
-	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
+	u64 unsup_bits;
+
+	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all)
 		return;
 	/*
-	 * Make it clear that XSAVES supervisor states are not yet
-	 * implemented should anyone expect it to work by changing
-	 * bits in XFEATURE_MASK_* macros and XCR0.
+	 * Unsupported supervisor xstates should not be found in
+	 * the xfeatures mask.
 	 */
-	WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
-		"x86/fpu: XSAVES supervisor states are not yet implemented.\n");
+	unsup_bits = xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
+	WARN_ONCE(unsup_bits, "x86/fpu: Found unsupported supervisor xstates: 0x%llx\n",
+		  unsup_bits);
 
-	xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
+	xfeatures_mask_all &= ~XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
 
 	cr4_set_bits(X86_CR4_OSXSAVE);
-	xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+
+	/*
+	 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
+	 * managed by XSAVE{C, OPT, S} and XRSTOR{S}.  Only XSAVE user
+	 * states can be set here.
+	 */
+	xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
+
+	/*
+	 * MSR_IA32_XSS sets supervisor states managed by XSAVES.
+	 */
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
 }
 
-/*
- * Note that in the future we will likely need a pair of
- * functions here: one for user xstates and the other for
- * system xstates.  For now, they are the same.
- */
-static int xfeature_enabled(enum xfeature xfeature)
+static bool xfeature_enabled(enum xfeature xfeature)
 {
-	return !!(xfeatures_mask & (1UL << xfeature));
+	return xfeatures_mask_all & BIT_ULL(xfeature);
 }
 
 /*
@@ -383,6 +394,33 @@ static void __init setup_xstate_comp_offsets(void)
 }
 
 /*
+ * Setup offsets of a supervisor-state-only XSAVES buffer:
+ *
+ * The offsets stored in xstate_comp_offsets[] only work for one specific
+ * value of the Requested Feature BitMap (RFBM).  In cases where a different
+ * RFBM value is used, a different set of offsets is required.  This set of
+ * offsets is for when RFBM=xfeatures_mask_supervisor().
+ */
+static void __init setup_supervisor_only_offsets(void)
+{
+	unsigned int next_offset;
+	int i;
+
+	next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+		if (!xfeature_enabled(i) || !xfeature_is_supervisor(i))
+			continue;
+
+		if (xfeature_is_aligned(i))
+			next_offset = ALIGN(next_offset, 64);
+
+		xstate_supervisor_only_offsets[i] = next_offset;
+		next_offset += xstate_sizes[i];
+	}
+}
+
+/*
  * Print out xstate component offsets and sizes
  */
 static void __init print_xstate_offset_size(void)
@@ -415,7 +453,7 @@ static void __init setup_init_fpu_buf(void)
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
-						     xfeatures_mask;
+						     xfeatures_mask_all;
 
 	/*
 	 * Init all the features state with header.xfeatures being 0x0
@@ -438,7 +476,7 @@ static int xfeature_uncompacted_offset(int xfeature_nr)
 	 * format. Checking a supervisor state's uncompacted offset is
 	 * an error.
 	 */
-	if (XFEATURE_MASK_SUPERVISOR & BIT_ULL(xfeature_nr)) {
+	if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) {
 		WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
 		return -1;
 	}
@@ -472,10 +510,10 @@ int using_compacted_format(void)
 }
 
 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-int validate_xstate_header(const struct xstate_header *hdr)
+int validate_user_xstate_header(const struct xstate_header *hdr)
 {
 	/* No unknown or supervisor features may be set */
-	if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+	if (hdr->xfeatures & ~xfeatures_mask_user())
 		return -EINVAL;
 
 	/* Userspace must use the uncompacted format */
@@ -610,15 +648,12 @@ static void do_extra_xstate_size_checks(void)
 
 
 /*
- * Get total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0 | IA32_XSS.
  *
  * Note the SDM's wording here.  "sub-function 0" only enumerates
  * the size of the *user* states.  If we use it to size a buffer
  * that we use 'XSAVES' on, we could potentially overflow the
  * buffer because 'XSAVES' saves system states too.
- *
- * Note that we do not currently set any bits on IA32_XSS so
- * 'XCR0 | IA32_XSS == XCR0' for now.
  */
 static unsigned int __init get_xsaves_size(void)
 {
@@ -700,7 +735,7 @@ static int __init init_xstate_size(void)
  */
 static void fpu__init_disable_system_xstate(void)
 {
-	xfeatures_mask = 0;
+	xfeatures_mask_all = 0;
 	cr4_clear_bits(X86_CR4_OSXSAVE);
 	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 }
@@ -735,16 +770,26 @@ void __init fpu__init_system_xstate(void)
 		return;
 	}
 
+	/*
+	 * Find user xstates supported by the processor.
+	 */
 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-	xfeatures_mask = eax + ((u64)edx << 32);
+	xfeatures_mask_all = eax + ((u64)edx << 32);
 
-	if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
+	/*
+	 * Find supervisor xstates supported by the processor.
+	 */
+	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+	xfeatures_mask_all |= ecx + ((u64)edx << 32);
+
+	if ((xfeatures_mask_user() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
 		/*
 		 * This indicates that something really unexpected happened
 		 * with the enumeration.  Disable XSAVE and try to continue
 		 * booting without it.  This is too early to BUG().
 		 */
-		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
+		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
+		       xfeatures_mask_all);
 		goto out_disable;
 	}
 
@@ -753,10 +798,10 @@ void __init fpu__init_system_xstate(void)
 	 */
 	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
 		if (!boot_cpu_has(xsave_cpuid_features[i]))
-			xfeatures_mask &= ~BIT(i);
+			xfeatures_mask_all &= ~BIT_ULL(i);
 	}
 
-	xfeatures_mask &= fpu__get_supported_xfeatures_mask();
+	xfeatures_mask_all &= fpu__get_supported_xfeatures_mask();
 
 	/* Enable xstate instructions to be able to continue with initialization: */
 	fpu__init_cpu_xstate();
@@ -768,15 +813,16 @@ void __init fpu__init_system_xstate(void)
 	 * Update info used for ptrace frames; use standard-format size and no
 	 * supervisor xstates:
 	 */
-	update_regset_xstate_info(fpu_user_xstate_size,	xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR);
+	update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_user());
 
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 	setup_xstate_comp_offsets();
+	setup_supervisor_only_offsets();
 	print_xstate_offset_size();
 
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
-		xfeatures_mask,
+		xfeatures_mask_all,
 		fpu_kernel_xstate_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 	return;
@@ -795,7 +841,14 @@ void fpu__resume_cpu(void)
 	 * Restore XCR0 on xsave capable CPUs:
 	 */
 	if (boot_cpu_has(X86_FEATURE_XSAVE))
-		xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
+
+	/*
+	 * Restore IA32_XSS. The same CPUID bit enumerates support
+	 * of XSAVES and MSR_IA32_XSS.
+	 */
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
 }
 
 /*
@@ -840,10 +893,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 
 	/*
 	 * We should not ever be requesting features that we
-	 * have not enabled.  Remember that xfeatures_mask is
-	 * what we write to the XCR0 register.
+	 * have not enabled.
 	 */
-	WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
+	WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)),
 		  "get of unsupported state");
 	/*
 	 * This assumes the last 'xsave*' instruction to
@@ -1010,7 +1062,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 	 */
 	memset(&header, 0, sizeof(header));
 	header.xfeatures = xsave->header.xfeatures;
-	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+	header.xfeatures &= xfeatures_mask_user();
 
 	if (header.xfeatures & XFEATURE_MASK_FP)
 		copy_part(0, off_mxcsr,
@@ -1090,7 +1142,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	 */
 	memset(&header, 0, sizeof(header));
 	header.xfeatures = xsave->header.xfeatures;
-	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+	header.xfeatures &= xfeatures_mask_user();
 
 	/*
 	 * Copy xregs_state->header:
@@ -1157,7 +1209,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 
 	memcpy(&hdr, kbuf + offset, size);
 
-	if (validate_xstate_header(&hdr))
+	if (validate_user_xstate_header(&hdr))
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
@@ -1183,7 +1235,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 	 * The state that came in from userspace was user-state only.
 	 * Mask all the user states out of 'xfeatures':
 	 */
-	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
 
 	/*
 	 * Add back in the features that came in from userspace:
@@ -1211,7 +1263,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	if (__copy_from_user(&hdr, ubuf + offset, size))
 		return -EFAULT;
 
-	if (validate_xstate_header(&hdr))
+	if (validate_user_xstate_header(&hdr))
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
@@ -1239,7 +1291,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	 * The state that came in from userspace was user-state only.
 	 * Mask all the user states out of 'xfeatures':
 	 */
-	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
 
 	/*
 	 * Add back in the features that came in from userspace:
@@ -1249,6 +1301,61 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	return 0;
 }
 
+/*
+ * Save only supervisor states to the kernel buffer.  This blows away all
+ * old states, and is intended to be used only in __fpu__restore_sig(), where
+ * user states are restored from the user buffer.
+ */
+void copy_supervisor_to_kernel(struct xregs_state *xstate)
+{
+	struct xstate_header *header;
+	u64 max_bit, min_bit;
+	u32 lmask, hmask;
+	int err, i;
+
+	if (WARN_ON(!boot_cpu_has(X86_FEATURE_XSAVES)))
+		return;
+
+	if (!xfeatures_mask_supervisor())
+		return;
+
+	max_bit = __fls(xfeatures_mask_supervisor());
+	min_bit = __ffs(xfeatures_mask_supervisor());
+
+	lmask = xfeatures_mask_supervisor();
+	hmask = xfeatures_mask_supervisor() >> 32;
+	XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
+
+	/* We should never fault when copying to a kernel buffer: */
+	if (WARN_ON_FPU(err))
+		return;
+
+	/*
+	 * At this point, the buffer has only supervisor states and must be
+	 * converted back to normal kernel format.
+	 */
+	header = &xstate->header;
+	header->xcomp_bv |= xfeatures_mask_all;
+
+	/*
+	 * This only moves states up in the buffer.  Start with
+	 * the last state and move backwards so that states are
+	 * not overwritten until after they are moved.  Note:
+	 * memmove() allows overlapping src/dst buffers.
+	 */
+	for (i = max_bit; i >= min_bit; i--) {
+		u8 *xbuf = (u8 *)xstate;
+
+		if (!((header->xfeatures >> i) & 1))
+			continue;
+
+		/* Move xfeature 'i' into its normal location */
+		memmove(xbuf + xstate_comp_offsets[i],
+			xbuf + xstate_supervisor_only_offsets[i],
+			xstate_sizes[i]);
+	}
+}
+
 #ifdef CONFIG_PROC_PID_ARCH_STATUS
 /*
  * Report the amount of time elapsed in millisecond since last AVX512
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index b0e641793be4..c84d28e90a58 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -282,7 +282,8 @@ static inline void tramp_free(void *tramp) { }
 
 /* Defined as markers to the end of the ftrace default trampolines */
 extern void ftrace_regs_caller_end(void);
-extern void ftrace_epilogue(void);
+extern void ftrace_regs_caller_ret(void);
+extern void ftrace_caller_end(void);
 extern void ftrace_caller_op_ptr(void);
 extern void ftrace_regs_caller_op_ptr(void);
 
@@ -334,7 +335,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 		call_offset = (unsigned long)ftrace_regs_call;
 	} else {
 		start_offset = (unsigned long)ftrace_caller;
-		end_offset = (unsigned long)ftrace_epilogue;
+		end_offset = (unsigned long)ftrace_caller_end;
 		op_offset = (unsigned long)ftrace_caller_op_ptr;
 		call_offset = (unsigned long)ftrace_call;
 	}
@@ -366,6 +367,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	if (WARN_ON(ret < 0))
 		goto fail;
 
+	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+		ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
+		ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
+		if (WARN_ON(ret < 0))
+			goto fail;
+	}
+
 	/*
 	 * The address of the ftrace_ops that is used for this trampoline
 	 * is stored at the end of the trampoline. This will be used to
@@ -433,7 +441,7 @@ void set_ftrace_ops_ro(void)
 			end_offset = (unsigned long)ftrace_regs_caller_end;
 		} else {
 			start_offset = (unsigned long)ftrace_caller;
-			end_offset = (unsigned long)ftrace_epilogue;
+			end_offset = (unsigned long)ftrace_caller_end;
 		}
 		size = end_offset - start_offset;
 		size = size + RET_SIZE + sizeof(void *);
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index e8a9f8370112..e405fe1a8bf4 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -189,5 +189,5 @@ return_to_handler:
 	movl	%eax, %ecx
 	popl	%edx
 	popl	%eax
-	JMP_NOSPEC %ecx
+	JMP_NOSPEC ecx
 #endif
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 369e61faacfe..aa5d28aeb31e 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -23,7 +23,7 @@
 #endif /* CONFIG_FRAME_POINTER */
 
 /* Size of stack used to save mcount regs in save_mcount_regs */
-#define MCOUNT_REG_SIZE		(SS+8 + MCOUNT_FRAME_SIZE)
+#define MCOUNT_REG_SIZE		(FRAME_SIZE + MCOUNT_FRAME_SIZE)
 
 /*
  * gcc -pg option adds a call to 'mcount' in most functions.
@@ -77,7 +77,7 @@
 	/*
 	 * We add enough stack to save all regs.
 	 */
-	subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp
+	subq $(FRAME_SIZE), %rsp
 	movq %rax, RAX(%rsp)
 	movq %rcx, RCX(%rsp)
 	movq %rdx, RDX(%rsp)
@@ -157,8 +157,12 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
 	 * think twice before adding any new code or changing the
 	 * layout here.
 	 */
-SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL)
+SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)
 
+	jmp ftrace_epilogue
+SYM_FUNC_END(ftrace_caller);
+
+SYM_FUNC_START(ftrace_epilogue)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
 	jmp ftrace_stub
@@ -170,14 +174,12 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
  */
 SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
 	retq
-SYM_FUNC_END(ftrace_caller)
+SYM_FUNC_END(ftrace_epilogue)
 
 SYM_FUNC_START(ftrace_regs_caller)
 	/* Save the current flags before any operations that can change them */
 	pushfq
 
-	UNWIND_HINT_SAVE
-
 	/* added 8 bytes to save flags */
 	save_mcount_regs 8
 	/* save_mcount_regs fills in first two parameters */
@@ -233,10 +235,13 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
 	movq ORIG_RAX(%rsp), %rax
 	movq %rax, MCOUNT_REG_SIZE-8(%rsp)
 
-	/* If ORIG_RAX is anything but zero, make this a call to that */
+	/*
+	 * If ORIG_RAX is anything but zero, make this a call to that.
+	 * See arch_ftrace_set_direct_caller().
+	 */
 	movq ORIG_RAX(%rsp), %rax
-	cmpq	$0, %rax
-	je	1f
+	testq	%rax, %rax
+	jz	1f
 
 	/* Swap the flags with orig_rax */
 	movq MCOUNT_REG_SIZE(%rsp), %rdi
@@ -244,20 +249,14 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
 	movq %rax, MCOUNT_REG_SIZE(%rsp)
 
 	restore_mcount_regs 8
+	/* Restore flags */
+	popfq
 
-	jmp	2f
+SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL);
+	UNWIND_HINT_RET_OFFSET
+	jmp	ftrace_epilogue
 
 1:	restore_mcount_regs
-
-
-2:
-	/*
-	 * The stack layout is nondetermistic here, depending on which path was
-	 * taken.  This confuses objtool and ORC, rightfully so.  For now,
-	 * pretend the stack always looks like the non-direct case.
-	 */
-	UNWIND_HINT_RESTORE
-
 	/* Restore flags */
 	popfq
 
@@ -268,7 +267,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
 	 * to the return.
 	 */
 SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
-
 	jmp ftrace_epilogue
 
 SYM_FUNC_END(ftrace_regs_caller)
@@ -303,7 +301,7 @@ trace:
 	 * function tracing is enabled.
 	 */
 	movq ftrace_trace_function, %r8
-	CALL_NOSPEC %r8
+	CALL_NOSPEC r8
 	restore_mcount_regs
 
 	jmp fgraph_trace
@@ -340,6 +338,6 @@ SYM_CODE_START(return_to_handler)
 	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	addq $24, %rsp
-	JMP_NOSPEC %rdi
+	JMP_NOSPEC rdi
 SYM_CODE_END(return_to_handler)
 #endif
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 12df3a4abfdd..6b32ab009c19 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -43,7 +43,7 @@ static int map_irq_stack(unsigned int cpu)
 		pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
 	}
 
-	va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+	va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, VM_MAP, PAGE_KERNEL);
 	if (!va)
 		return -ENOMEM;
 
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 6efe0410fb72..d6f22a3a1f7d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -35,6 +35,8 @@
 #include <asm/tlb.h>
 #include <asm/cpuidle_haltpoll.h>
 
+DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
+
 static int kvmapf = 1;
 
 static int __init parse_no_kvmapf(char *arg)
@@ -73,7 +75,6 @@ struct kvm_task_sleep_node {
 	struct swait_queue_head wq;
 	u32 token;
 	int cpu;
-	bool halted;
 };
 
 static struct kvm_task_sleep_head {
@@ -96,77 +97,64 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
 	return NULL;
 }
 
-/*
- * @interrupt_kernel: Is this called from a routine which interrupts the kernel
- * 		      (other than user space)?
- */
-void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
+static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
 {
 	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
 	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
-	struct kvm_task_sleep_node n, *e;
-	DECLARE_SWAITQUEUE(wait);
-
-	rcu_irq_enter();
+	struct kvm_task_sleep_node *e;
 
 	raw_spin_lock(&b->lock);
 	e = _find_apf_task(b, token);
 	if (e) {
 		/* dummy entry exist -> wake up was delivered ahead of PF */
 		hlist_del(&e->link);
-		kfree(e);
 		raw_spin_unlock(&b->lock);
-
-		rcu_irq_exit();
-		return;
+		kfree(e);
+		return false;
 	}
 
-	n.token = token;
-	n.cpu = smp_processor_id();
-	n.halted = is_idle_task(current) ||
-		   (IS_ENABLED(CONFIG_PREEMPT_COUNT)
-		    ? preempt_count() > 1 || rcu_preempt_depth()
-		    : interrupt_kernel);
-	init_swait_queue_head(&n.wq);
-	hlist_add_head(&n.link, &b->list);
+	n->token = token;
+	n->cpu = smp_processor_id();
+	init_swait_queue_head(&n->wq);
+	hlist_add_head(&n->link, &b->list);
 	raw_spin_unlock(&b->lock);
+	return true;
+}
+
+/*
+ * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled
+ * @token:	Token to identify the sleep node entry
+ *
+ * Invoked from the async pagefault handling code or from the VM exit page
+ * fault handler. In both cases RCU is watching.
+ */
+void kvm_async_pf_task_wait_schedule(u32 token)
+{
+	struct kvm_task_sleep_node n;
+	DECLARE_SWAITQUEUE(wait);
+
+	lockdep_assert_irqs_disabled();
+
+	if (!kvm_async_pf_queue_task(token, &n))
+		return;
 
 	for (;;) {
-		if (!n.halted)
-			prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
 		if (hlist_unhashed(&n.link))
 			break;
 
-		rcu_irq_exit();
-
-		if (!n.halted) {
-			local_irq_enable();
-			schedule();
-			local_irq_disable();
-		} else {
-			/*
-			 * We cannot reschedule. So halt.
-			 */
-			native_safe_halt();
-			local_irq_disable();
-		}
-
-		rcu_irq_enter();
+		local_irq_enable();
+		schedule();
+		local_irq_disable();
 	}
-	if (!n.halted)
-		finish_swait(&n.wq, &wait);
-
-	rcu_irq_exit();
-	return;
+	finish_swait(&n.wq, &wait);
 }
-EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
+EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);
 
 static void apf_task_wake_one(struct kvm_task_sleep_node *n)
 {
 	hlist_del_init(&n->link);
-	if (n->halted)
-		smp_send_reschedule(n->cpu);
-	else if (swq_has_sleeper(&n->wq))
+	if (swq_has_sleeper(&n->wq))
 		swake_up_one(&n->wq);
 }
 
@@ -175,12 +163,13 @@ static void apf_task_wake_all(void)
 	int i;
 
 	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
-		struct hlist_node *p, *next;
 		struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
+		struct kvm_task_sleep_node *n;
+		struct hlist_node *p, *next;
+
 		raw_spin_lock(&b->lock);
 		hlist_for_each_safe(p, next, &b->list) {
-			struct kvm_task_sleep_node *n =
-				hlist_entry(p, typeof(*n), link);
+			n = hlist_entry(p, typeof(*n), link);
 			if (n->cpu == smp_processor_id())
 				apf_task_wake_one(n);
 		}
@@ -221,46 +210,61 @@ again:
 		n->cpu = smp_processor_id();
 		init_swait_queue_head(&n->wq);
 		hlist_add_head(&n->link, &b->list);
-	} else
+	} else {
 		apf_task_wake_one(n);
+	}
 	raw_spin_unlock(&b->lock);
 	return;
 }
 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
 
-u32 kvm_read_and_reset_pf_reason(void)
+u32 kvm_read_and_reset_apf_flags(void)
 {
-	u32 reason = 0;
+	u32 flags = 0;
 
 	if (__this_cpu_read(apf_reason.enabled)) {
-		reason = __this_cpu_read(apf_reason.reason);
-		__this_cpu_write(apf_reason.reason, 0);
+		flags = __this_cpu_read(apf_reason.flags);
+		__this_cpu_write(apf_reason.flags, 0);
 	}
 
-	return reason;
+	return flags;
 }
-EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
-NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
+EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
+NOKPROBE_SYMBOL(kvm_read_and_reset_apf_flags);
 
-dotraplinkage void
-do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
-	switch (kvm_read_and_reset_pf_reason()) {
-	default:
-		do_page_fault(regs, error_code, address);
-		break;
+	u32 reason = kvm_read_and_reset_apf_flags();
+
+	switch (reason) {
 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
-		/* page is swapped out by the host. */
-		kvm_async_pf_task_wait((u32)address, !user_mode(regs));
-		break;
 	case KVM_PV_REASON_PAGE_READY:
+		break;
+	default:
+		return false;
+	}
+
+	/*
+	 * If the host managed to inject an async #PF into an interrupt
+	 * disabled region, then die hard as this is not going to end well
+	 * and the host side is seriously broken.
+	 */
+	if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
+		panic("Host injected async #PF in interrupt disabled region\n");
+
+	if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) {
+		if (unlikely(!(user_mode(regs))))
+			panic("Host injected async #PF in kernel mode\n");
+		/* Page is swapped out by the host. */
+		kvm_async_pf_task_wait_schedule(token);
+	} else {
 		rcu_irq_enter();
-		kvm_async_pf_task_wake((u32)address);
+		kvm_async_pf_task_wake(token);
 		rcu_irq_exit();
-		break;
 	}
+	return true;
 }
-NOKPROBE_SYMBOL(do_async_page_fault);
+NOKPROBE_SYMBOL(__kvm_handle_async_pf);
 
 static void __init paravirt_ops_setup(void)
 {
@@ -306,11 +310,11 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
 static void kvm_guest_cpu_init(void)
 {
 	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
-		u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
+		u64 pa;
 
-#ifdef CONFIG_PREEMPTION
-		pa |= KVM_ASYNC_PF_SEND_ALWAYS;
-#endif
+		WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
+
+		pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
 		pa |= KVM_ASYNC_PF_ENABLED;
 
 		if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
@@ -318,12 +322,12 @@ static void kvm_guest_cpu_init(void)
 
 		wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
 		__this_cpu_write(apf_reason.enabled, 1);
-		printk(KERN_INFO"KVM setup async PF for cpu %d\n",
-		       smp_processor_id());
+		pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
 	}
 
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
 		unsigned long pa;
+
 		/* Size alignment is implied but just to make it explicit. */
 		BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
 		__this_cpu_write(kvm_apic_eoi, 0);
@@ -344,8 +348,7 @@ static void kvm_pv_disable_apf(void)
 	wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
 	__this_cpu_write(apf_reason.enabled, 0);
 
-	printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
-	       smp_processor_id());
+	pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
 }
 
 static void kvm_pv_guest_cpu_reboot(void *unused)
@@ -592,12 +595,6 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
 }
 #endif
 
-static void __init kvm_apf_trap_init(void)
-{
-	update_intr_gate(X86_TRAP_PF, async_page_fault);
-}
-
-
 static void kvm_flush_tlb_others(const struct cpumask *cpumask,
 			const struct flush_tlb_info *info)
 {
@@ -632,8 +629,6 @@ static void __init kvm_guest_init(void)
 	register_reboot_notifier(&kvm_pv_reboot_nb);
 	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
 		raw_spin_lock_init(&async_pf_sleepers[i].lock);
-	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
-		x86_init.irqs.trap_init = kvm_apf_trap_init;
 
 	if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
 		has_steal_clock = 1;
@@ -649,6 +644,9 @@ static void __init kvm_guest_init(void)
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
 		apic_set_eoi_write(kvm_guest_apic_eoi_write);
 
+	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf)
+		static_branch_enable(&kvm_async_pf_enabled);
+
 #ifdef CONFIG_SMP
 	smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
 	smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 6407ea21fa1b..bdcc5146de96 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -25,10 +25,6 @@
 #include <linux/atomic.h>
 #include <linux/sched/clock.h>
 
-#if defined(CONFIG_EDAC)
-#include <linux/edac.h>
-#endif
-
 #include <asm/cpu_entry_area.h>
 #include <asm/traps.h>
 #include <asm/mach_traps.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 35638f1c5791..ce6cd220f722 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -191,7 +191,7 @@ void flush_thread(void)
 	flush_ptrace_hw_breakpoint(tsk);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 
-	fpu__clear(&tsk->thread.fpu);
+	fpu__clear_all(&tsk->thread.fpu);
 }
 
 void disable_TSC(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 954b013cc585..538d4e8d6589 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -52,7 +52,7 @@
 #include <asm/debugreg.h>
 #include <asm/switch_to.h>
 #include <asm/vm86.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
 #include <asm/proto.h>
 
 #include "process.h"
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5ef9d8f25b0e..0c169a5687e1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
 #include <asm/switch_to.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/vdso.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
 #include <asm/unistd.h>
 #include <asm/fsgsbase.h>
 #ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4b3fa6cd3106..a3767e74c758 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -237,6 +237,9 @@ static u64 __init get_ramdisk_image(void)
 
 	ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32;
 
+	if (ramdisk_image == 0)
+		ramdisk_image = phys_initrd_start;
+
 	return ramdisk_image;
 }
 static u64 __init get_ramdisk_size(void)
@@ -245,6 +248,9 @@ static u64 __init get_ramdisk_size(void)
 
 	ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32;
 
+	if (ramdisk_size == 0)
+		ramdisk_size = phys_initrd_size;
+
 	return ramdisk_size;
 }
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e6d7894ad127..fd945ce78554 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -287,9 +287,9 @@ void __init setup_per_cpu_areas(void)
 	/*
 	 * Sync back kernel address range again.  We already did this in
 	 * setup_arch(), but percpu data also needs to be available in
-	 * the smpboot asm.  We can't reliably pick up percpu mappings
-	 * using vmalloc_fault(), because exception dispatch needs
-	 * percpu data.
+	 * the smpboot asm and arch_sync_kernel_mappings() doesn't sync to
+	 * swapper_pg_dir on 32-bit. The per-cpu mappings need to be available
+	 * there too.
 	 *
 	 * FIXME: Can the later sync in setup_cpu_entry_areas() replace
 	 * this call?
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 83b74fb38c8f..399f97abee02 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -37,6 +37,7 @@
 #include <asm/vm86.h>
 
 #ifdef CONFIG_X86_64
+#include <linux/compat.h>
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
 #endif /* CONFIG_X86_64 */
@@ -511,6 +512,31 @@ Efault:
 }
 #endif /* CONFIG_X86_32 */
 
+#ifdef CONFIG_X86_X32_ABI
+static int x32_copy_siginfo_to_user(struct compat_siginfo __user *to,
+		const struct kernel_siginfo *from)
+{
+	struct compat_siginfo new;
+
+	copy_siginfo_to_external32(&new, from);
+	if (from->si_signo == SIGCHLD) {
+		new._sifields._sigchld_x32._utime = from->si_utime;
+		new._sifields._sigchld_x32._stime = from->si_stime;
+	}
+	if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
+		return -EFAULT;
+	return 0;
+}
+
+int copy_siginfo_to_user32(struct compat_siginfo __user *to,
+			   const struct kernel_siginfo *from)
+{
+	if (in_x32_syscall())
+		return x32_copy_siginfo_to_user(to, from);
+	return __copy_siginfo_to_user32(to, from);
+}
+#endif /* CONFIG_X86_X32_ABI */
+
 static int x32_setup_rt_frame(struct ksignal *ksig,
 			      compat_sigset_t *set,
 			      struct pt_regs *regs)
@@ -543,7 +569,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
 	user_access_end();
 
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
-		if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true))
+		if (x32_copy_siginfo_to_user(&frame->info, &ksig->info))
 			return -EFAULT;
 	}
 
@@ -732,7 +758,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 		/*
 		 * Ensure the signal handler starts with the new fpu state.
 		 */
-		fpu__clear(fpu);
+		fpu__clear_user_states(fpu);
 	}
 	signal_setup_done(failed, ksig, stepping);
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2f24c334a938..2467f3dd35d3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1384,12 +1384,12 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	speculative_store_bypass_ht_init();
 }
 
-void arch_enable_nonboot_cpus_begin(void)
+void arch_thaw_secondary_cpus_begin(void)
 {
 	set_mtrr_aps_delayed_init();
 }
 
-void arch_enable_nonboot_cpus_end(void)
+void arch_thaw_secondary_cpus_end(void)
 {
 	mtrr_aps_init();
 }
@@ -1857,24 +1857,25 @@ static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 
-#define ICPU(model) \
-	{X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF, 0}
+#define X86_MATCH(model)					\
+	X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,		\
+		INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
 
 static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
-	ICPU(INTEL_FAM6_XEON_PHI_KNL),
-	ICPU(INTEL_FAM6_XEON_PHI_KNM),
+	X86_MATCH(XEON_PHI_KNL),
+	X86_MATCH(XEON_PHI_KNM),
 	{}
 };
 
 static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
-	ICPU(INTEL_FAM6_SKYLAKE_X),
+	X86_MATCH(SKYLAKE_X),
 	{}
 };
 
 static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
-	ICPU(INTEL_FAM6_ATOM_GOLDMONT),
-	ICPU(INTEL_FAM6_ATOM_GOLDMONT_D),
-	ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS),
+	X86_MATCH(ATOM_GOLDMONT),
+	X86_MATCH(ATOM_GOLDMONT_D),
+	X86_MATCH(ATOM_GOLDMONT_PLUS),
 	{}
 };
 
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index b89f6ac6a0c0..b2942b2dbfcf 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -35,8 +35,7 @@
 #include "../realmode/rm/wakeup.h"
 
 /* Global pointer to shared data; NULL means no measured launch. */
-struct tboot *tboot __read_mostly;
-EXPORT_SYMBOL(tboot);
+static struct tboot *tboot __read_mostly;
 
 /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
 #define AP_WAIT_TIMEOUT		1
@@ -46,6 +45,11 @@ EXPORT_SYMBOL(tboot);
 
 static u8 tboot_uuid[16] __initdata = TBOOT_UUID;
 
+bool tboot_enabled(void)
+{
+	return tboot != NULL;
+}
+
 void __init tboot_probe(void)
 {
 	/* Look for valid page-aligned address for shared page. */
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 106e7f87f534..371a6b348e44 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -103,6 +103,9 @@ static __init void x86_late_time_init(void)
 	 */
 	x86_init.irqs.intr_mode_init();
 	tsc_init();
+
+	if (static_cpu_has(X86_FEATURE_WAITPKG))
+		use_tpause_delay();
 }
 
 /*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d54cffdc7cac..4cc541051994 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -37,10 +37,12 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/hardirq.h>
+#include <linux/atomic.h>
+
 #include <asm/stacktrace.h>
 #include <asm/processor.h>
 #include <asm/debugreg.h>
-#include <linux/atomic.h>
 #include <asm/text-patching.h>
 #include <asm/ftrace.h>
 #include <asm/traps.h>
@@ -82,78 +84,6 @@ static inline void cond_local_irq_disable(struct pt_regs *regs)
 		local_irq_disable();
 }
 
-/*
- * In IST context, we explicitly disable preemption.  This serves two
- * purposes: it makes it much less likely that we would accidentally
- * schedule in IST context and it will force a warning if we somehow
- * manage to schedule by accident.
- */
-void ist_enter(struct pt_regs *regs)
-{
-	if (user_mode(regs)) {
-		RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
-	} else {
-		/*
-		 * We might have interrupted pretty much anything.  In
-		 * fact, if we're a machine check, we can even interrupt
-		 * NMI processing.  We don't want in_nmi() to return true,
-		 * but we need to notify RCU.
-		 */
-		rcu_nmi_enter();
-	}
-
-	preempt_disable();
-
-	/* This code is a bit fragile.  Test it. */
-	RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
-}
-NOKPROBE_SYMBOL(ist_enter);
-
-void ist_exit(struct pt_regs *regs)
-{
-	preempt_enable_no_resched();
-
-	if (!user_mode(regs))
-		rcu_nmi_exit();
-}
-
-/**
- * ist_begin_non_atomic() - begin a non-atomic section in an IST exception
- * @regs:	regs passed to the IST exception handler
- *
- * IST exception handlers normally cannot schedule.  As a special
- * exception, if the exception interrupted userspace code (i.e.
- * user_mode(regs) would return true) and the exception was not
- * a double fault, it can be safe to schedule.  ist_begin_non_atomic()
- * begins a non-atomic section within an ist_enter()/ist_exit() region.
- * Callers are responsible for enabling interrupts themselves inside
- * the non-atomic section, and callers must call ist_end_non_atomic()
- * before ist_exit().
- */
-void ist_begin_non_atomic(struct pt_regs *regs)
-{
-	BUG_ON(!user_mode(regs));
-
-	/*
-	 * Sanity check: we need to be on the normal thread stack.  This
-	 * will catch asm bugs and any attempt to use ist_preempt_enable
-	 * from double_fault.
-	 */
-	BUG_ON(!on_thread_stack());
-
-	preempt_enable_no_resched();
-}
-
-/**
- * ist_end_non_atomic() - begin a non-atomic section in an IST exception
- *
- * Ends a non-atomic section started with ist_begin_non_atomic().
- */
-void ist_end_non_atomic(void)
-{
-	preempt_disable();
-}
-
 int is_valid_bugaddr(unsigned long addr)
 {
 	unsigned short ud;
@@ -326,7 +256,6 @@ __visible void __noreturn handle_stack_overflow(const char *message,
 }
 #endif
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
 /*
  * Runs on an IST stack for x86_64 and on a special task stack for x86_32.
  *
@@ -363,7 +292,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
 	 * The net result is that our #GP handler will think that we
 	 * entered from usermode with the bad user context.
 	 *
-	 * No need for ist_enter here because we don't use RCU.
+	 * No need for nmi_enter() here because we don't use RCU.
 	 */
 	if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
 		regs->cs == __KERNEL_CS &&
@@ -398,7 +327,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
 	}
 #endif
 
-	ist_enter(regs);
+	nmi_enter();
 	notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
 
 	tsk->thread.error_code = error_code;
@@ -450,7 +379,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
 	die("double fault", regs, error_code);
 	panic("Machine halted.");
 }
-#endif
 
 dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 {
@@ -592,19 +520,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 		return;
 
 	/*
-	 * Unlike any other non-IST entry, we can be called from a kprobe in
-	 * non-CONTEXT_KERNEL kernel mode or even during context tracking
-	 * state changes.  Make sure that we wake up RCU even if we're coming
-	 * from kernel code.
-	 *
-	 * This means that we can't schedule even if we came from a
-	 * preemptible kernel context.  That's okay.
+	 * Unlike any other non-IST entry, we can be called from pretty much
+	 * any location in the kernel through kprobes -- text_poke() will most
+	 * likely be handled by poke_int3_handler() above. This means this
+	 * handler is effectively NMI-like.
 	 */
-	if (!user_mode(regs)) {
-		rcu_nmi_enter();
-		preempt_disable();
-	}
-	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+	if (!user_mode(regs))
+		nmi_enter();
 
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
@@ -626,10 +548,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 	cond_local_irq_disable(regs);
 
 exit:
-	if (!user_mode(regs)) {
-		preempt_enable_no_resched();
-		rcu_nmi_exit();
-	}
+	if (!user_mode(regs))
+		nmi_exit();
 }
 NOKPROBE_SYMBOL(do_int3);
 
@@ -733,7 +653,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 	unsigned long dr6;
 	int si_code;
 
-	ist_enter(regs);
+	nmi_enter();
 
 	get_debugreg(dr6, 6);
 	/*
@@ -826,7 +746,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 	debug_stack_usage_dec();
 
 exit:
-	ist_exit(regs);
+	nmi_exit();
 }
 NOKPROBE_SYMBOL(do_debug);
 
@@ -983,7 +903,5 @@ void __init trap_init(void)
 
 	idt_setup_ist_traps();
 
-	x86_init.irqs.trap_init();
-
 	idt_setup_debugidt_traps();
 }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index fdd4c1078632..49d925043171 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -41,6 +41,7 @@ EXPORT_SYMBOL(tsc_khz);
  * TSC can be unstable due to cpufreq or due to unsynced TSCs
  */
 static int __read_mostly tsc_unstable;
+static unsigned int __initdata tsc_early_khz;
 
 static DEFINE_STATIC_KEY_FALSE(__use_tsc);
 
@@ -59,6 +60,12 @@ struct cyc2ns {
 
 static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
 
+static int __init tsc_early_khz_setup(char *buf)
+{
+	return kstrtouint(buf, 0, &tsc_early_khz);
+}
+early_param("tsc_early_khz", tsc_early_khz_setup);
+
 __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
 {
 	int seq, idx;
@@ -1412,7 +1419,10 @@ static bool __init determine_cpu_tsc_frequencies(bool early)
 
 	if (early) {
 		cpu_khz = x86_platform.calibrate_cpu();
-		tsc_khz = x86_platform.calibrate_tsc();
+		if (tsc_early_khz)
+			tsc_khz = tsc_early_khz;
+		else
+			tsc_khz = x86_platform.calibrate_tsc();
 	} else {
 		/* We should not be here with non-native cpu calibration */
 		WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 85f1a90c55cd..123f1c1f1788 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -79,7 +79,6 @@ struct x86_init_ops x86_init __initdata = {
 	.irqs = {
 		.pre_vector_init	= init_ISA_irqs,
 		.intr_init		= native_init_IRQ,
-		.trap_init		= x86_init_noop,
 		.intr_mode_select	= apic_intr_mode_select,
 		.intr_mode_init		= apic_intr_mode_init
 	},
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 901cd1fdecd9..253b8e875ccd 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -86,12 +86,10 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 	best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
 	if (!best) {
 		vcpu->arch.guest_supported_xcr0 = 0;
-		vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
 	} else {
 		vcpu->arch.guest_supported_xcr0 =
 			(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
-		vcpu->arch.guest_xstate_size = best->ebx =
-			xstate_required_size(vcpu->arch.xcr0, false);
+		best->ebx = xstate_required_size(vcpu->arch.xcr0, false);
 	}
 
 	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
@@ -124,8 +122,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 					   MSR_IA32_MISC_ENABLE_MWAIT);
 	}
 
-	/* Update physical-address width */
+	/* Note, maxphyaddr must be updated before tdp_level. */
 	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+	vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
 	kvm_mmu_reset_context(vcpu);
 
 	kvm_pmu_refresh(vcpu);
@@ -297,7 +296,7 @@ void kvm_set_cpu_caps(void)
 		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
 		0 /* DS-CPL, VMX, SMX, EST */ |
 		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
-		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
+		F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) |
 		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
 		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
 		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
@@ -712,7 +711,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
 			     (1 << KVM_FEATURE_PV_SEND_IPI) |
 			     (1 << KVM_FEATURE_POLL_CONTROL) |
-			     (1 << KVM_FEATURE_PV_SCHED_YIELD);
+			     (1 << KVM_FEATURE_PV_SCHED_YIELD) |
+			     (1 << KVM_FEATURE_ASYNC_PF_INT);
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
@@ -728,6 +728,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		cpuid_entry_override(entry, CPUID_8000_0001_EDX);
 		cpuid_entry_override(entry, CPUID_8000_0001_ECX);
 		break;
+	case 0x80000006:
+		/* L2 cache and TLB: pass through host info. */
+		break;
 	case 0x80000007: /* Advanced power management */
 		/* invariant TSC is CPUID.80000007H:EDX[8] */
 		entry->edx &= (1 << 8);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 63a70f6a3df3..05434cd9342f 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -303,4 +303,9 @@ static __always_inline void kvm_cpu_cap_check_and_set(unsigned int x86_feature)
 		kvm_cpu_cap_set(x86_feature);
 }
 
+static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+	return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
+}
+
 #endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index bddaba9c68dd..de5476f8683e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5798,6 +5798,8 @@ writeback:
 	}
 
 	ctxt->eip = ctxt->_eip;
+	if (ctxt->mode != X86EMUL_MODE_PROT64)
+		ctxt->eip = (u32)ctxt->_eip;
 
 done:
 	if (rc == X86EMUL_PROPAGATE_FAULT) {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 54d4b98b49e1..238b78e069fe 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -21,6 +21,7 @@
 #include "x86.h"
 #include "lapic.h"
 #include "ioapic.h"
+#include "cpuid.h"
 #include "hyperv.h"
 
 #include <linux/cpu.h>
@@ -266,6 +267,123 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 	return ret;
 }
 
+static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *entry;
+
+	entry = kvm_find_cpuid_entry(vcpu,
+				     HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES,
+				     0);
+	if (!entry)
+		return false;
+
+	return entry->eax & HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+}
+
+static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL)
+		hv->hv_syndbg.control.status =
+			vcpu->run->hyperv.u.syndbg.status;
+	return 1;
+}
+
+static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
+	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
+
+	hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG;
+	hv_vcpu->exit.u.syndbg.msr = msr;
+	hv_vcpu->exit.u.syndbg.control = syndbg->control.control;
+	hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page;
+	hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page;
+	hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page;
+	vcpu->arch.complete_userspace_io =
+			kvm_hv_syndbg_complete_userspace;
+
+	kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
+}
+
+static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
+{
+	struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
+
+	if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
+		return 1;
+
+	trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id,
+				    vcpu_to_hv_vcpu(vcpu)->vp_index, msr, data);
+	switch (msr) {
+	case HV_X64_MSR_SYNDBG_CONTROL:
+		syndbg->control.control = data;
+		if (!host)
+			syndbg_exit(vcpu, msr);
+		break;
+	case HV_X64_MSR_SYNDBG_STATUS:
+		syndbg->control.status = data;
+		break;
+	case HV_X64_MSR_SYNDBG_SEND_BUFFER:
+		syndbg->control.send_page = data;
+		break;
+	case HV_X64_MSR_SYNDBG_RECV_BUFFER:
+		syndbg->control.recv_page = data;
+		break;
+	case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
+		syndbg->control.pending_page = data;
+		if (!host)
+			syndbg_exit(vcpu, msr);
+		break;
+	case HV_X64_MSR_SYNDBG_OPTIONS:
+		syndbg->options = data;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
+{
+	struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
+
+	if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
+		return 1;
+
+	switch (msr) {
+	case HV_X64_MSR_SYNDBG_CONTROL:
+		*pdata = syndbg->control.control;
+		break;
+	case HV_X64_MSR_SYNDBG_STATUS:
+		*pdata = syndbg->control.status;
+		break;
+	case HV_X64_MSR_SYNDBG_SEND_BUFFER:
+		*pdata = syndbg->control.send_page;
+		break;
+	case HV_X64_MSR_SYNDBG_RECV_BUFFER:
+		*pdata = syndbg->control.recv_page;
+		break;
+	case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
+		*pdata = syndbg->control.pending_page;
+		break;
+	case HV_X64_MSR_SYNDBG_OPTIONS:
+		*pdata = syndbg->options;
+		break;
+	default:
+		break;
+	}
+
+	trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id,
+				    vcpu_to_hv_vcpu(vcpu)->vp_index, msr,
+				    *pdata);
+
+	return 0;
+}
+
 static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
 			 bool host)
 {
@@ -800,6 +918,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
 	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
 	case HV_X64_MSR_TSC_EMULATION_CONTROL:
 	case HV_X64_MSR_TSC_EMULATION_STATUS:
+	case HV_X64_MSR_SYNDBG_OPTIONS:
+	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
 		r = true;
 		break;
 	}
@@ -900,7 +1020,7 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
  * These two equivalencies are implemented in this function.
  */
 static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
-					HV_REFERENCE_TSC_PAGE *tsc_ref)
+					struct ms_hyperv_tsc_page *tsc_ref)
 {
 	u64 max_mul;
 
@@ -941,7 +1061,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
 	u64 gfn;
 
 	BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
-	BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
+	BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
 
 	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
 		return;
@@ -1061,6 +1181,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 		if (!host)
 			return 1;
 		break;
+	case HV_X64_MSR_SYNDBG_OPTIONS:
+	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
+		return syndbg_set_msr(vcpu, msr, data, host);
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
 			    msr, data);
@@ -1190,7 +1313,8 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 	return 0;
 }
 
-static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
+			     bool host)
 {
 	u64 data = 0;
 	struct kvm *kvm = vcpu->kvm;
@@ -1227,6 +1351,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_TSC_EMULATION_STATUS:
 		data = hv->hv_tsc_emulation_status;
 		break;
+	case HV_X64_MSR_SYNDBG_OPTIONS:
+	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
+		return syndbg_get_msr(vcpu, msr, pdata, host);
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -1316,7 +1443,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
 		int r;
 
 		mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
-		r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
+		r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host);
 		mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
 		return r;
 	} else
@@ -1425,8 +1552,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
 	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
 	 * analyze it here, flush TLB regardless of the specified address space.
 	 */
-	kvm_make_vcpus_request_mask(kvm,
-				    KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
+	kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH,
 				    NULL, vcpu_mask, &hv_vcpu->tlb_flush);
 
 ret_success:
@@ -1530,7 +1656,7 @@ ret_success:
 
 bool kvm_hv_hypercall_enabled(struct kvm *kvm)
 {
-	return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
+	return READ_ONCE(kvm->arch.hyperv.hv_guest_os_id) != 0;
 }
 
 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
@@ -1709,6 +1835,34 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		}
 		ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false);
 		break;
+	case HVCALL_POST_DEBUG_DATA:
+	case HVCALL_RETRIEVE_DEBUG_DATA:
+		if (unlikely(fast)) {
+			ret = HV_STATUS_INVALID_PARAMETER;
+			break;
+		}
+		fallthrough;
+	case HVCALL_RESET_DEBUG_SESSION: {
+		struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
+
+		if (!kvm_hv_is_syndbg_enabled(vcpu)) {
+			ret = HV_STATUS_INVALID_HYPERCALL_CODE;
+			break;
+		}
+
+		if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) {
+			ret = HV_STATUS_OPERATION_DENIED;
+			break;
+		}
+		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
+		vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
+		vcpu->run->hyperv.u.hcall.input = param;
+		vcpu->run->hyperv.u.hcall.params[0] = ingpa;
+		vcpu->run->hyperv.u.hcall.params[1] = outgpa;
+		vcpu->arch.complete_userspace_io =
+				kvm_hv_hypercall_complete_userspace;
+		return 0;
+	}
 	default:
 		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
 		break;
@@ -1796,12 +1950,15 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 		{ .function = HYPERV_CPUID_FEATURES },
 		{ .function = HYPERV_CPUID_ENLIGHTMENT_INFO },
 		{ .function = HYPERV_CPUID_IMPLEMENT_LIMITS },
+		{ .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS },
+		{ .function = HYPERV_CPUID_SYNDBG_INTERFACE },
+		{ .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES	},
 		{ .function = HYPERV_CPUID_NESTED_FEATURES },
 	};
 	int i, nent = ARRAY_SIZE(cpuid_entries);
 
-	if (kvm_x86_ops.nested_get_evmcs_version)
-		evmcs_ver = kvm_x86_ops.nested_get_evmcs_version(vcpu);
+	if (kvm_x86_ops.nested_ops->get_evmcs_version)
+		evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu);
 
 	/* Skip NESTED_FEATURES if eVMCS is not supported */
 	if (!evmcs_ver)
@@ -1821,7 +1978,7 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 		case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS:
 			memcpy(signature, "Linux KVM Hv", 12);
 
-			ent->eax = HYPERV_CPUID_NESTED_FEATURES;
+			ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES;
 			ent->ebx = signature[0];
 			ent->ecx = signature[1];
 			ent->edx = signature[2];
@@ -1860,6 +2017,10 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 			ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
 			ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
 
+			ent->ebx |= HV_DEBUGGING;
+			ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE;
+			ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
+
 			/*
 			 * Direct Synthetic timers only make sense with in-kernel
 			 * LAPIC
@@ -1903,6 +2064,24 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 
 			break;
 
+		case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
+			memcpy(signature, "Linux KVM Hv", 12);
+
+			ent->eax = 0;
+			ent->ebx = signature[0];
+			ent->ecx = signature[1];
+			ent->edx = signature[2];
+			break;
+
+		case HYPERV_CPUID_SYNDBG_INTERFACE:
+			memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12);
+			ent->eax = signature[0];
+			break;
+
+		case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES:
+			ent->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+			break;
+
 		default:
 			break;
 		}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 757cb578101c..e68c6c2e9649 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -23,6 +23,33 @@
 
 #include <linux/kvm_host.h>
 
+/*
+ * The #defines related to the synthetic debugger are required by KDNet, but
+ * they are not documented in the Hyper-V TLFS because the synthetic debugger
+ * functionality has been deprecated and is subject to removal in future
+ * versions of Windows.
+ */
+#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS	0x40000080
+#define HYPERV_CPUID_SYNDBG_INTERFACE			0x40000081
+#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES	0x40000082
+
+/*
+ * Hyper-V synthetic debugger platform capabilities
+ * These are HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX bits.
+ */
+#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING	BIT(1)
+
+/* Hyper-V Synthetic debug options MSR */
+#define HV_X64_MSR_SYNDBG_CONTROL		0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS		0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER		0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER		0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER	0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS		0x400000FF
+
+/* Hyper-V HV_X64_MSR_SYNDBG_OPTIONS bits */
+#define HV_X64_SYNDBG_OPTION_USE_HCALLS		BIT(2)
+
 static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu)
 {
 	return &vcpu->arch.hyperv;
@@ -46,6 +73,11 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
 	return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic));
 }
 
+static inline struct kvm_hv_syndbg *vcpu_to_hv_syndbg(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->kvm->arch.hyperv.hv_syndbg;
+}
+
 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host);
 
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 2fb2e3c80724..660401700075 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -3,8 +3,8 @@
 #define __KVM_IO_APIC_H
 
 #include <linux/kvm_host.h>
-
 #include <kvm/iodev.h>
+#include "irq.h"
 
 struct kvm;
 struct kvm_vcpu;
@@ -108,11 +108,7 @@ do {									\
 
 static inline int ioapic_in_kernel(struct kvm *kvm)
 {
-	int mode = kvm->arch.irqchip_mode;
-
-	/* Matches smp_wmb() when setting irqchip_mode */
-	smp_rmb();
-	return mode == KVM_IRQCHIP_KERNEL;
+	return irqchip_kernel(kvm);
 }
 
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e330e7d125f7..99d118ffc67d 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -83,6 +83,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 
 	return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
 }
+EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr);
 
 /*
  * check if there is pending interrupt without
@@ -159,6 +160,8 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	__kvm_migrate_apic_timer(vcpu);
 	__kvm_migrate_pit_timer(vcpu);
+	if (kvm_x86_ops.migrate_timers)
+		kvm_x86_ops.migrate_timers(vcpu);
 }
 
 bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index f173ab6b407e..9b64abf9b3f1 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -16,7 +16,6 @@
 #include <linux/spinlock.h>
 
 #include <kvm/iodev.h>
-#include "ioapic.h"
 #include "lapic.h"
 
 #define PIC_NUM_PINS 16
@@ -66,15 +65,6 @@ void kvm_pic_destroy(struct kvm *kvm);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
 
-static inline int pic_in_kernel(struct kvm *kvm)
-{
-	int mode = kvm->arch.irqchip_mode;
-
-	/* Matches smp_wmb() when setting irqchip_mode */
-	smp_rmb();
-	return mode == KVM_IRQCHIP_KERNEL;
-}
-
 static inline int irqchip_split(struct kvm *kvm)
 {
 	int mode = kvm->arch.irqchip_mode;
@@ -93,6 +83,11 @@ static inline int irqchip_kernel(struct kvm *kvm)
 	return mode == KVM_IRQCHIP_KERNEL;
 }
 
+static inline int pic_in_kernel(struct kvm *kvm)
+{
+	return irqchip_kernel(kvm);
+}
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
 	int mode = kvm->arch.irqchip_mode;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 62558b9bdda7..ff2d0e9ca3bc 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -116,8 +116,9 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
 static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
 {
 	ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
-	if (tmask & vcpu->arch.cr0_guest_owned_bits)
-		kvm_x86_ops.decache_cr0_guest_bits(vcpu);
+	if ((tmask & vcpu->arch.cr0_guest_owned_bits) &&
+	    !kvm_register_is_available(vcpu, VCPU_EXREG_CR0))
+		kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_CR0);
 	return vcpu->arch.cr0 & mask;
 }
 
@@ -129,8 +130,9 @@ static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
 static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
 {
 	ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
-	if (tmask & vcpu->arch.cr4_guest_owned_bits)
-		kvm_x86_ops.decache_cr4_guest_bits(vcpu);
+	if ((tmask & vcpu->arch.cr4_guest_owned_bits) &&
+	    !kvm_register_is_available(vcpu, VCPU_EXREG_CR4))
+		kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_CR4);
 	return vcpu->arch.cr4 & mask;
 }
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9af25c97612a..34a7e0533dad 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -36,6 +36,7 @@
 #include <linux/jump_label.h>
 #include "kvm_cache_regs.h"
 #include "irq.h"
+#include "ioapic.h"
 #include "trace.h"
 #include "x86.h"
 #include "cpuid.h"
@@ -110,11 +111,18 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 	return apic->vcpu->vcpu_id;
 }
 
-bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
+static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
 {
 	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
 }
-EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
+
+bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
+{
+	return kvm_x86_ops.set_hv_timer
+	       && !(kvm_mwait_in_guest(vcpu->kvm) ||
+		    kvm_can_post_timer_interrupt(vcpu));
+}
+EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
 
 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
 {
@@ -1593,7 +1601,7 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
 	}
 }
 
-static void apic_timer_expired(struct kvm_lapic *apic)
+static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
 {
 	struct kvm_vcpu *vcpu = apic->vcpu;
 	struct kvm_timer *ktimer = &apic->lapic_timer;
@@ -1604,6 +1612,12 @@ static void apic_timer_expired(struct kvm_lapic *apic)
 	if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
 		ktimer->expired_tscdeadline = ktimer->tscdeadline;
 
+	if (!from_timer_fn && vcpu->arch.apicv_active) {
+		WARN_ON(kvm_get_running_vcpu() != vcpu);
+		kvm_apic_inject_pending_timer_irqs(apic);
+		return;
+	}
+
 	if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
 		if (apic->lapic_timer.timer_advance_ns)
 			__kvm_wait_lapic_expire(vcpu);
@@ -1643,18 +1657,23 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
 		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
 	} else
-		apic_timer_expired(apic);
+		apic_timer_expired(apic, false);
 
 	local_irq_restore(flags);
 }
 
+static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
+{
+	return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
+}
+
 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
 {
 	ktime_t now, remaining;
 	u64 ns_remaining_old, ns_remaining_new;
 
-	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
-		* APIC_BUS_CYCLE_NS * apic->divide_count;
+	apic->lapic_timer.period =
+			tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
 	limit_periodic_timer_frequency(apic);
 
 	now = ktime_get();
@@ -1672,14 +1691,15 @@ static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_diviso
 	apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
 }
 
-static bool set_target_expiration(struct kvm_lapic *apic)
+static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
 {
 	ktime_t now;
 	u64 tscl = rdtsc();
+	s64 deadline;
 
 	now = ktime_get();
-	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
-		* APIC_BUS_CYCLE_NS * apic->divide_count;
+	apic->lapic_timer.period =
+			tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
 
 	if (!apic->lapic_timer.period) {
 		apic->lapic_timer.tscdeadline = 0;
@@ -1687,10 +1707,32 @@ static bool set_target_expiration(struct kvm_lapic *apic)
 	}
 
 	limit_periodic_timer_frequency(apic);
+	deadline = apic->lapic_timer.period;
+
+	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
+		if (unlikely(count_reg != APIC_TMICT)) {
+			deadline = tmict_to_ns(apic,
+				     kvm_lapic_get_reg(apic, count_reg));
+			if (unlikely(deadline <= 0))
+				deadline = apic->lapic_timer.period;
+			else if (unlikely(deadline > apic->lapic_timer.period)) {
+				pr_info_ratelimited(
+				    "kvm: vcpu %i: requested lapic timer restore with "
+				    "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
+				    "Using initial count to start timer.\n",
+				    apic->vcpu->vcpu_id,
+				    count_reg,
+				    kvm_lapic_get_reg(apic, count_reg),
+				    deadline, apic->lapic_timer.period);
+				kvm_lapic_set_reg(apic, count_reg, 0);
+				deadline = apic->lapic_timer.period;
+			}
+		}
+	}
 
 	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
-		nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
-	apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
+		nsec_to_cycles(apic->vcpu, deadline);
+	apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
 
 	return true;
 }
@@ -1723,7 +1765,7 @@ static void start_sw_period(struct kvm_lapic *apic)
 
 	if (ktime_after(ktime_get(),
 			apic->lapic_timer.target_expiration)) {
-		apic_timer_expired(apic);
+		apic_timer_expired(apic, false);
 
 		if (apic_lvtt_oneshot(apic))
 			return;
@@ -1760,7 +1802,7 @@ static bool start_hv_timer(struct kvm_lapic *apic)
 	bool expired;
 
 	WARN_ON(preemptible());
-	if (!kvm_x86_ops.set_hv_timer)
+	if (!kvm_can_use_hv_timer(vcpu))
 		return false;
 
 	if (!ktimer->tscdeadline)
@@ -1785,7 +1827,7 @@ static bool start_hv_timer(struct kvm_lapic *apic)
 		if (atomic_read(&ktimer->pending)) {
 			cancel_hv_timer(apic);
 		} else if (expired) {
-			apic_timer_expired(apic);
+			apic_timer_expired(apic, false);
 			cancel_hv_timer(apic);
 		}
 	}
@@ -1833,9 +1875,9 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 	/* If the preempt notifier has already run, it also called apic_timer_expired */
 	if (!apic->lapic_timer.hv_timer_in_use)
 		goto out;
-	WARN_ON(swait_active(&vcpu->wq));
+	WARN_ON(rcuwait_active(&vcpu->wait));
 	cancel_hv_timer(apic);
-	apic_timer_expired(apic);
+	apic_timer_expired(apic, false);
 
 	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
 		advance_periodic_target_expiration(apic);
@@ -1872,17 +1914,22 @@ void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
 	restart_apic_timer(apic);
 }
 
-static void start_apic_timer(struct kvm_lapic *apic)
+static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
 {
 	atomic_set(&apic->lapic_timer.pending, 0);
 
 	if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
-	    && !set_target_expiration(apic))
+	    && !set_target_expiration(apic, count_reg))
 		return;
 
 	restart_apic_timer(apic);
 }
 
+static void start_apic_timer(struct kvm_lapic *apic)
+{
+	__start_apic_timer(apic, APIC_TMICT);
+}
+
 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
 {
 	bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
@@ -2336,7 +2383,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
 	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
 
-	apic_timer_expired(apic);
+	apic_timer_expired(apic, true);
 
 	if (lapic_is_periodic(apic)) {
 		advance_periodic_target_expiration(apic);
@@ -2493,6 +2540,14 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
 {
 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
+
+	/*
+	 * Get calculated timer current count for remaining timer period (if
+	 * any) and store it in the returned register set.
+	 */
+	__kvm_lapic_set_reg(s->regs, APIC_TMCCT,
+			    __apic_read(vcpu->arch.apic, APIC_TMCCT));
+
 	return kvm_apic_state_fixup(vcpu, s, false);
 }
 
@@ -2520,7 +2575,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
 	apic_update_lvtt(apic);
 	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
 	update_divide_count(apic);
-	start_apic_timer(apic);
+	__start_apic_timer(apic, APIC_TMCCT);
 	kvm_apic_update_apicv(vcpu);
 	apic->highest_isr_cache = -1;
 	if (vcpu->arch.apicv_active) {
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index a0ffb4331418..754f29beb83e 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -161,9 +161,14 @@ static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off)
 	return *((u32 *) (apic->regs + reg_off));
 }
 
+static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val)
+{
+	*((u32 *) (regs + reg_off)) = val;
+}
+
 static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
 {
-	*((u32 *) (apic->regs + reg_off)) = val;
+	__kvm_lapic_set_reg(apic->regs, reg_off, val);
 }
 
 extern struct static_key kvm_no_apic_vcpu;
@@ -245,7 +250,7 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
-bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu);
+bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu);
 
 static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
 {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 8a3b1bce722a..0ad06bfe2c2c 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -51,13 +51,13 @@ static inline u64 rsvd_bits(int s, int e)
 	return ((1ULL << (e - s + 1)) - 1) << s;
 }
 
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask);
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask);
 
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
 
 void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots);
-void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 			     bool accessed_dirty, gpa_t new_eptp);
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8071952e9cf2..fdd05c233308 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -16,6 +16,7 @@
  */
 
 #include "irq.h"
+#include "ioapic.h"
 #include "mmu.h"
 #include "x86.h"
 #include "kvm_cache_regs.h"
@@ -78,6 +79,9 @@ module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
 		&nx_huge_pages_recovery_ratio, 0644);
 __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
 
+static bool __read_mostly force_flush_and_sync_on_reuse;
+module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
+
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
  * where the hardware walks 2 page tables:
@@ -244,7 +248,6 @@ static u64 __read_mostly shadow_x_mask;	/* mutual exclusive with nx_mask */
 static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
-static u64 __read_mostly shadow_mmio_mask;
 static u64 __read_mostly shadow_mmio_value;
 static u64 __read_mostly shadow_mmio_access_mask;
 static u64 __read_mostly shadow_present_mask;
@@ -331,19 +334,19 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
 	kvm_flush_remote_tlbs_with_range(kvm, &range);
 }
 
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask)
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask)
 {
 	BUG_ON((u64)(unsigned)access_mask != access_mask);
-	BUG_ON((mmio_mask & mmio_value) != mmio_value);
+	WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len));
+	WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);
 	shadow_mmio_value = mmio_value | SPTE_MMIO_MASK;
-	shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
 	shadow_mmio_access_mask = access_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 
 static bool is_mmio_spte(u64 spte)
 {
-	return (spte & shadow_mmio_mask) == shadow_mmio_value;
+	return (spte & SPTE_SPECIAL_MASK) == SPTE_MMIO_MASK;
 }
 
 static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
@@ -566,7 +569,6 @@ static void kvm_mmu_reset_all_pte_masks(void)
 	shadow_dirty_mask = 0;
 	shadow_nx_mask = 0;
 	shadow_x_mask = 0;
-	shadow_mmio_mask = 0;
 	shadow_present_mask = 0;
 	shadow_acc_track_mask = 0;
 
@@ -583,16 +585,15 @@ static void kvm_mmu_reset_all_pte_masks(void)
 	 * the most significant bits of legal physical address space.
 	 */
 	shadow_nonpresent_or_rsvd_mask = 0;
-	low_phys_bits = boot_cpu_data.x86_cache_bits;
-	if (boot_cpu_data.x86_cache_bits <
-	    52 - shadow_nonpresent_or_rsvd_mask_len) {
+	low_phys_bits = boot_cpu_data.x86_phys_bits;
+	if (boot_cpu_has_bug(X86_BUG_L1TF) &&
+	    !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >=
+			  52 - shadow_nonpresent_or_rsvd_mask_len)) {
+		low_phys_bits = boot_cpu_data.x86_cache_bits
+			- shadow_nonpresent_or_rsvd_mask_len;
 		shadow_nonpresent_or_rsvd_mask =
-			rsvd_bits(boot_cpu_data.x86_cache_bits -
-				  shadow_nonpresent_or_rsvd_mask_len,
-				  boot_cpu_data.x86_cache_bits - 1);
-		low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
-	} else
-		WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF));
+			rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1);
+	}
 
 	shadow_nonpresent_or_rsvd_lower_gfn_mask =
 		GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
@@ -620,7 +621,7 @@ static int is_large_pte(u64 pte)
 
 static int is_last_spte(u64 pte, int level)
 {
-	if (level == PT_PAGE_TABLE_LEVEL)
+	if (level == PG_LEVEL_4K)
 		return 1;
 	if (is_large_pte(pte))
 		return 1;
@@ -1196,7 +1197,7 @@ static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
 	struct kvm_lpage_info *linfo;
 	int i;
 
-	for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
+	for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
 		linfo = lpage_info_slot(gfn, slot, i);
 		linfo->disallow_lpage += count;
 		WARN_ON(linfo->disallow_lpage < 0);
@@ -1225,7 +1226,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	slot = __gfn_to_memslot(slots, gfn);
 
 	/* the non-leaf shadow pages are keeping readonly. */
-	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+	if (sp->role.level > PG_LEVEL_4K)
 		return kvm_slot_page_track_add_page(kvm, slot, gfn,
 						    KVM_PAGE_TRACK_WRITE);
 
@@ -1253,7 +1254,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	gfn = sp->gfn;
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slot = __gfn_to_memslot(slots, gfn);
-	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+	if (sp->role.level > PG_LEVEL_4K)
 		return kvm_slot_page_track_remove_page(kvm, slot, gfn,
 						       KVM_PAGE_TRACK_WRITE);
 
@@ -1398,7 +1399,7 @@ static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
 	unsigned long idx;
 
 	idx = gfn_to_index(gfn, slot->base_gfn, level);
-	return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
+	return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
 }
 
 static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
@@ -1529,8 +1530,7 @@ static void drop_spte(struct kvm *kvm, u64 *sptep)
 static bool __drop_large_spte(struct kvm *kvm, u64 *sptep)
 {
 	if (is_large_pte(*sptep)) {
-		WARN_ON(page_header(__pa(sptep))->role.level ==
-			PT_PAGE_TABLE_LEVEL);
+		WARN_ON(page_header(__pa(sptep))->role.level == PG_LEVEL_4K);
 		drop_spte(kvm, sptep);
 		--kvm->stat.lpages;
 		return true;
@@ -1682,7 +1682,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 
 	while (mask) {
 		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-					  PT_PAGE_TABLE_LEVEL, slot);
+					  PG_LEVEL_4K, slot);
 		__rmap_write_protect(kvm, rmap_head, false);
 
 		/* clear the first set bit */
@@ -1708,7 +1708,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 
 	while (mask) {
 		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-					  PT_PAGE_TABLE_LEVEL, slot);
+					  PG_LEVEL_4K, slot);
 		__rmap_clear_dirty(kvm, rmap_head);
 
 		/* clear the first set bit */
@@ -1760,7 +1760,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 	int i;
 	bool write_protected = false;
 
-	for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
+	for (i = PG_LEVEL_4K; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
 		rmap_head = __gfn_to_rmap(gfn, i, slot);
 		write_protected |= __rmap_write_protect(kvm, rmap_head, true);
 	}
@@ -1948,8 +1948,8 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 			gfn_start = hva_to_gfn_memslot(hva_start, memslot);
 			gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 
-			for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL,
-						 PT_MAX_HUGEPAGE_LEVEL,
+			for_each_slot_rmap_range(memslot, PG_LEVEL_4K,
+						 KVM_MAX_HUGEPAGE_LEVEL,
 						 gfn_start, gfn_end - 1,
 						 &iterator)
 				ret |= handler(kvm, iterator.rmap, memslot,
@@ -2153,10 +2153,6 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
-{
-}
-
 static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
 				 struct kvm_mmu_page *sp, u64 *spte,
 				 const void *pte)
@@ -2313,7 +2309,7 @@ static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
 		return;
 
 	if (local_flush)
-		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 }
 
 #ifdef CONFIG_KVM_MMU_AUDIT
@@ -2347,7 +2343,7 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
 		if (!s->unsync)
 			continue;
 
-		WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
+		WARN_ON(s->role.level != PG_LEVEL_4K);
 		ret |= kvm_sync_page(vcpu, s, invalid_list);
 	}
 
@@ -2376,7 +2372,7 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec,
 		int level = sp->role.level;
 
 		parents->idx[level-1] = idx;
-		if (level == PT_PAGE_TABLE_LEVEL)
+		if (level == PG_LEVEL_4K)
 			break;
 
 		parents->parent[level-2] = sp;
@@ -2398,7 +2394,7 @@ static int mmu_pages_first(struct kvm_mmu_pages *pvec,
 
 	sp = pvec->page[0].sp;
 	level = sp->role.level;
-	WARN_ON(level == PT_PAGE_TABLE_LEVEL);
+	WARN_ON(level == PG_LEVEL_4K);
 
 	parents->parent[level-2] = sp;
 
@@ -2520,11 +2516,11 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 				break;
 
 			WARN_ON(!list_empty(&invalid_list));
-			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+			kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 		}
 
 		if (sp->unsync_children)
-			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+			kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 
 		__clear_sp_write_flooding_count(sp);
 		trace_kvm_mmu_get_page(sp, false);
@@ -2546,11 +2542,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		 * be inconsistent with guest page table.
 		 */
 		account_shadowed(vcpu->kvm, sp);
-		if (level == PT_PAGE_TABLE_LEVEL &&
-		      rmap_write_protect(vcpu, gfn))
+		if (level == PG_LEVEL_4K && rmap_write_protect(vcpu, gfn))
 			kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1);
 
-		if (level > PT_PAGE_TABLE_LEVEL && need_sync)
+		if (level > PG_LEVEL_4K && need_sync)
 			flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
 	}
 	clear_page(sp->spt);
@@ -2601,7 +2596,7 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
 
 static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
 {
-	if (iterator->level < PT_PAGE_TABLE_LEVEL)
+	if (iterator->level < PG_LEVEL_4K)
 		return false;
 
 	iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
@@ -2722,7 +2717,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
 	struct mmu_page_path parents;
 	struct kvm_mmu_pages pages;
 
-	if (parent->role.level == PT_PAGE_TABLE_LEVEL)
+	if (parent->role.level == PG_LEVEL_4K)
 		return 0;
 
 	while (mmu_unsync_walk(parent, &pages)) {
@@ -2921,7 +2916,7 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 		if (sp->unsync)
 			continue;
 
-		WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
+		WARN_ON(sp->role.level != PG_LEVEL_4K);
 		kvm_unsync_page(vcpu, sp);
 	}
 
@@ -3020,7 +3015,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	if (!speculative)
 		spte |= spte_shadow_accessed_mask(spte);
 
-	if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
+	if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) &&
 	    is_nx_huge_page_enabled()) {
 		pte_access &= ~ACC_EXEC_MASK;
 	}
@@ -3033,7 +3028,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	if (pte_access & ACC_USER_MASK)
 		spte |= shadow_user_mask;
 
-	if (level > PT_PAGE_TABLE_LEVEL)
+	if (level > PG_LEVEL_4K)
 		spte |= PT_PAGE_SIZE_MASK;
 	if (tdp_enabled)
 		spte |= kvm_x86_ops.get_mt_mask(vcpu, gfn,
@@ -3103,8 +3098,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
 		 * the parent of the now unreachable PTE.
 		 */
-		if (level > PT_PAGE_TABLE_LEVEL &&
-		    !is_large_pte(*sptep)) {
+		if (level > PG_LEVEL_4K && !is_large_pte(*sptep)) {
 			struct kvm_mmu_page *child;
 			u64 pte = *sptep;
 
@@ -3125,7 +3119,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
 		if (write_fault)
 			ret = RET_PF_EMULATE;
-		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 	}
 
 	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH || flush)
@@ -3228,7 +3222,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
 	if (sp_ad_disabled(sp))
 		return;
 
-	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+	if (sp->role.level > PG_LEVEL_4K)
 		return;
 
 	__direct_pte_prefetch(vcpu, sp, sptep);
@@ -3241,12 +3235,8 @@ static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn,
 	pte_t *pte;
 	int level;
 
-	BUILD_BUG_ON(PT_PAGE_TABLE_LEVEL != (int)PG_LEVEL_4K ||
-		     PT_DIRECTORY_LEVEL != (int)PG_LEVEL_2M ||
-		     PT_PDPE_LEVEL != (int)PG_LEVEL_1G);
-
 	if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn))
-		return PT_PAGE_TABLE_LEVEL;
+		return PG_LEVEL_4K;
 
 	/*
 	 * Note, using the already-retrieved memslot and __gfn_to_hva_memslot()
@@ -3260,7 +3250,7 @@ static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn,
 
 	pte = lookup_address_in_mm(vcpu->kvm->mm, hva, &level);
 	if (unlikely(!pte))
-		return PT_PAGE_TABLE_LEVEL;
+		return PG_LEVEL_4K;
 
 	return level;
 }
@@ -3274,28 +3264,28 @@ static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
 	kvm_pfn_t mask;
 	int level;
 
-	if (unlikely(max_level == PT_PAGE_TABLE_LEVEL))
-		return PT_PAGE_TABLE_LEVEL;
+	if (unlikely(max_level == PG_LEVEL_4K))
+		return PG_LEVEL_4K;
 
 	if (is_error_noslot_pfn(pfn) || kvm_is_reserved_pfn(pfn))
-		return PT_PAGE_TABLE_LEVEL;
+		return PG_LEVEL_4K;
 
 	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, true);
 	if (!slot)
-		return PT_PAGE_TABLE_LEVEL;
+		return PG_LEVEL_4K;
 
 	max_level = min(max_level, max_page_level);
-	for ( ; max_level > PT_PAGE_TABLE_LEVEL; max_level--) {
+	for ( ; max_level > PG_LEVEL_4K; max_level--) {
 		linfo = lpage_info_slot(gfn, slot, max_level);
 		if (!linfo->disallow_lpage)
 			break;
 	}
 
-	if (max_level == PT_PAGE_TABLE_LEVEL)
-		return PT_PAGE_TABLE_LEVEL;
+	if (max_level == PG_LEVEL_4K)
+		return PG_LEVEL_4K;
 
 	level = host_pfn_mapping_level(vcpu, gfn, pfn, slot);
-	if (level == PT_PAGE_TABLE_LEVEL)
+	if (level == PG_LEVEL_4K)
 		return level;
 
 	level = min(level, max_level);
@@ -3317,7 +3307,7 @@ static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
 	int level = *levelp;
 	u64 spte = *it.sptep;
 
-	if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
+	if (it.level == level && level > PG_LEVEL_4K &&
 	    is_nx_huge_page_enabled() &&
 	    is_shadow_present_pte(spte) &&
 	    !is_large_pte(spte)) {
@@ -3574,7 +3564,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 			 *
 			 * See the comments in kvm_arch_commit_memory_region().
 			 */
-			if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+			if (sp->role.level > PG_LEVEL_4K)
 				break;
 		}
 
@@ -3586,7 +3576,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 		/*
 		 * Currently, fast page fault only works for direct mapping
 		 * since the gfn is not stable for indirect shadow page. See
-		 * Documentation/virt/kvm/locking.txt to get more detail.
+		 * Documentation/virt/kvm/locking.rst to get more detail.
 		 */
 		fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
 							iterator.sptep, spte,
@@ -3666,7 +3656,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 							   &invalid_list);
 			mmu->root_hpa = INVALID_PAGE;
 		}
-		mmu->root_cr3 = 0;
+		mmu->root_pgd = 0;
 	}
 
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
@@ -3686,58 +3676,64 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
 	return ret;
 }
 
-static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
+static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
+			    u8 level, bool direct)
 {
 	struct kvm_mmu_page *sp;
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+
+	if (make_mmu_pages_available(vcpu)) {
+		spin_unlock(&vcpu->kvm->mmu_lock);
+		return INVALID_PAGE;
+	}
+	sp = kvm_mmu_get_page(vcpu, gfn, gva, level, direct, ACC_ALL);
+	++sp->root_count;
+
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	return __pa(sp->spt);
+}
+
+static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
+{
+	u8 shadow_root_level = vcpu->arch.mmu->shadow_root_level;
+	hpa_t root;
 	unsigned i;
 
-	if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
-		spin_lock(&vcpu->kvm->mmu_lock);
-		if(make_mmu_pages_available(vcpu) < 0) {
-			spin_unlock(&vcpu->kvm->mmu_lock);
+	if (shadow_root_level >= PT64_ROOT_4LEVEL) {
+		root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level, true);
+		if (!VALID_PAGE(root))
 			return -ENOSPC;
-		}
-		sp = kvm_mmu_get_page(vcpu, 0, 0,
-				vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL);
-		++sp->root_count;
-		spin_unlock(&vcpu->kvm->mmu_lock);
-		vcpu->arch.mmu->root_hpa = __pa(sp->spt);
-	} else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) {
+		vcpu->arch.mmu->root_hpa = root;
+	} else if (shadow_root_level == PT32E_ROOT_LEVEL) {
 		for (i = 0; i < 4; ++i) {
-			hpa_t root = vcpu->arch.mmu->pae_root[i];
+			MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->pae_root[i]));
 
-			MMU_WARN_ON(VALID_PAGE(root));
-			spin_lock(&vcpu->kvm->mmu_lock);
-			if (make_mmu_pages_available(vcpu) < 0) {
-				spin_unlock(&vcpu->kvm->mmu_lock);
+			root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT),
+					      i << 30, PT32_ROOT_LEVEL, true);
+			if (!VALID_PAGE(root))
 				return -ENOSPC;
-			}
-			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
-					i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
-			root = __pa(sp->spt);
-			++sp->root_count;
-			spin_unlock(&vcpu->kvm->mmu_lock);
 			vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK;
 		}
 		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
 	} else
 		BUG();
 
-	/* root_cr3 is ignored for direct MMUs. */
-	vcpu->arch.mmu->root_cr3 = 0;
+	/* root_pgd is ignored for direct MMUs. */
+	vcpu->arch.mmu->root_pgd = 0;
 
 	return 0;
 }
 
 static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 {
-	struct kvm_mmu_page *sp;
 	u64 pdptr, pm_mask;
-	gfn_t root_gfn, root_cr3;
+	gfn_t root_gfn, root_pgd;
+	hpa_t root;
 	int i;
 
-	root_cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
-	root_gfn = root_cr3 >> PAGE_SHIFT;
+	root_pgd = vcpu->arch.mmu->get_guest_pgd(vcpu);
+	root_gfn = root_pgd >> PAGE_SHIFT;
 
 	if (mmu_check_root(vcpu, root_gfn))
 		return 1;
@@ -3747,22 +3743,14 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * write-protect the guests page table root.
 	 */
 	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
-		hpa_t root = vcpu->arch.mmu->root_hpa;
-
-		MMU_WARN_ON(VALID_PAGE(root));
+		MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->root_hpa));
 
-		spin_lock(&vcpu->kvm->mmu_lock);
-		if (make_mmu_pages_available(vcpu) < 0) {
-			spin_unlock(&vcpu->kvm->mmu_lock);
+		root = mmu_alloc_root(vcpu, root_gfn, 0,
+				      vcpu->arch.mmu->shadow_root_level, false);
+		if (!VALID_PAGE(root))
 			return -ENOSPC;
-		}
-		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
-				vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL);
-		root = __pa(sp->spt);
-		++sp->root_count;
-		spin_unlock(&vcpu->kvm->mmu_lock);
 		vcpu->arch.mmu->root_hpa = root;
-		goto set_root_cr3;
+		goto set_root_pgd;
 	}
 
 	/*
@@ -3775,9 +3763,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
 	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu->pae_root[i];
-
-		MMU_WARN_ON(VALID_PAGE(root));
+		MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->pae_root[i]));
 		if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) {
 			pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i);
 			if (!(pdptr & PT_PRESENT_MASK)) {
@@ -3788,17 +3774,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 			if (mmu_check_root(vcpu, root_gfn))
 				return 1;
 		}
-		spin_lock(&vcpu->kvm->mmu_lock);
-		if (make_mmu_pages_available(vcpu) < 0) {
-			spin_unlock(&vcpu->kvm->mmu_lock);
-			return -ENOSPC;
-		}
-		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
-				      0, ACC_ALL);
-		root = __pa(sp->spt);
-		++sp->root_count;
-		spin_unlock(&vcpu->kvm->mmu_lock);
 
+		root = mmu_alloc_root(vcpu, root_gfn, i << 30,
+				      PT32_ROOT_LEVEL, false);
+		if (!VALID_PAGE(root))
+			return -ENOSPC;
 		vcpu->arch.mmu->pae_root[i] = root | pm_mask;
 	}
 	vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
@@ -3828,8 +3808,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
 	}
 
-set_root_cr3:
-	vcpu->arch.mmu->root_cr3 = root_cr3;
+set_root_pgd:
+	vcpu->arch.mmu->root_pgd = root_pgd;
 
 	return 0;
 }
@@ -4083,18 +4063,16 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 			 gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
 			 bool *writable)
 {
-	struct kvm_memory_slot *slot;
+	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 	bool async;
 
-	/*
-	 * Don't expose private memslots to L2.
-	 */
-	if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+	/* Don't expose private memslots to L2. */
+	if (is_guest_mode(vcpu) && !kvm_is_visible_memslot(slot)) {
 		*pfn = KVM_PFN_NOSLOT;
+		*writable = false;
 		return false;
 	}
 
-	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 	async = false;
 	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
 	if (!async)
@@ -4135,7 +4113,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 		return r;
 
 	if (lpage_disallowed)
-		max_level = PT_PAGE_TABLE_LEVEL;
+		max_level = PG_LEVEL_4K;
 
 	if (fast_page_fault(vcpu, gpa, error_code))
 		return RET_PF_RETRY;
@@ -4171,7 +4149,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 	/* This path builds a PAE pagetable, we can map 2mb pages at maximum. */
 	return direct_page_fault(vcpu, gpa & PAGE_MASK, error_code, prefault,
-				 PT_DIRECTORY_LEVEL, false);
+				 PG_LEVEL_2M, false);
 }
 
 int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
@@ -4186,7 +4164,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
 #endif
 
 	vcpu->arch.l1tf_flush_l1d = true;
-	switch (vcpu->arch.apf.host_apf_reason) {
+	switch (vcpu->arch.apf.host_apf_flags) {
 	default:
 		trace_kvm_page_fault(fault_address, error_code);
 
@@ -4196,13 +4174,13 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
 				insn_len);
 		break;
 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
-		vcpu->arch.apf.host_apf_reason = 0;
+		vcpu->arch.apf.host_apf_flags = 0;
 		local_irq_disable();
-		kvm_async_pf_task_wait(fault_address, 0);
+		kvm_async_pf_task_wait_schedule(fault_address);
 		local_irq_enable();
 		break;
 	case KVM_PV_REASON_PAGE_READY:
-		vcpu->arch.apf.host_apf_reason = 0;
+		vcpu->arch.apf.host_apf_flags = 0;
 		local_irq_disable();
 		kvm_async_pf_task_wake(fault_address);
 		local_irq_enable();
@@ -4217,8 +4195,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 {
 	int max_level;
 
-	for (max_level = PT_MAX_HUGEPAGE_LEVEL;
-	     max_level > PT_PAGE_TABLE_LEVEL;
+	for (max_level = KVM_MAX_HUGEPAGE_LEVEL;
+	     max_level > PG_LEVEL_4K;
 	     max_level--) {
 		int page_num = KVM_PAGES_PER_HPAGE(max_level);
 		gfn_t base = (gpa >> PAGE_SHIFT) & ~(page_num - 1);
@@ -4237,7 +4215,7 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 	context->page_fault = nonpaging_page_fault;
 	context->gva_to_gpa = nonpaging_gva_to_gpa;
 	context->sync_page = nonpaging_sync_page;
-	context->invlpg = nonpaging_invlpg;
+	context->invlpg = NULL;
 	context->update_pte = nonpaging_update_pte;
 	context->root_level = 0;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
@@ -4245,51 +4223,50 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 	context->nx = false;
 }
 
-static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t cr3,
+static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
 				  union kvm_mmu_page_role role)
 {
-	return (role.direct || cr3 == root->cr3) &&
+	return (role.direct || pgd == root->pgd) &&
 	       VALID_PAGE(root->hpa) && page_header(root->hpa) &&
 	       role.word == page_header(root->hpa)->role.word;
 }
 
 /*
- * Find out if a previously cached root matching the new CR3/role is available.
+ * Find out if a previously cached root matching the new pgd/role is available.
  * The current root is also inserted into the cache.
  * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
  * returned.
  * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
  * false is returned. This root should now be freed by the caller.
  */
-static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_pgd,
 				  union kvm_mmu_page_role new_role)
 {
 	uint i;
 	struct kvm_mmu_root_info root;
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
 
-	root.cr3 = mmu->root_cr3;
+	root.pgd = mmu->root_pgd;
 	root.hpa = mmu->root_hpa;
 
-	if (is_root_usable(&root, new_cr3, new_role))
+	if (is_root_usable(&root, new_pgd, new_role))
 		return true;
 
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
 		swap(root, mmu->prev_roots[i]);
 
-		if (is_root_usable(&root, new_cr3, new_role))
+		if (is_root_usable(&root, new_pgd, new_role))
 			break;
 	}
 
 	mmu->root_hpa = root.hpa;
-	mmu->root_cr3 = root.cr3;
+	mmu->root_pgd = root.pgd;
 
 	return i < KVM_MMU_NUM_PREV_ROOTS;
 }
 
-static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
-			    union kvm_mmu_page_role new_role,
-			    bool skip_tlb_flush)
+static bool fast_pgd_switch(struct kvm_vcpu *vcpu, gpa_t new_pgd,
+			    union kvm_mmu_page_role new_role)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
 
@@ -4299,70 +4276,59 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 	 * later if necessary.
 	 */
 	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
-	    mmu->root_level >= PT64_ROOT_4LEVEL) {
-		if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
-			return false;
-
-		if (cached_root_available(vcpu, new_cr3, new_role)) {
-			/*
-			 * It is possible that the cached previous root page is
-			 * obsolete because of a change in the MMU generation
-			 * number. However, changing the generation number is
-			 * accompanied by KVM_REQ_MMU_RELOAD, which will free
-			 * the root set here and allocate a new one.
-			 */
-			kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
-			if (!skip_tlb_flush) {
-				kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-				kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-			}
-
-			/*
-			 * The last MMIO access's GVA and GPA are cached in the
-			 * VCPU. When switching to a new CR3, that GVA->GPA
-			 * mapping may no longer be valid. So clear any cached
-			 * MMIO info even when we don't need to sync the shadow
-			 * page tables.
-			 */
-			vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
-
-			__clear_sp_write_flooding_count(
-				page_header(mmu->root_hpa));
-
-			return true;
-		}
-	}
+	    mmu->root_level >= PT64_ROOT_4LEVEL)
+		return !mmu_check_root(vcpu, new_pgd >> PAGE_SHIFT) &&
+		       cached_root_available(vcpu, new_pgd, new_role);
 
 	return false;
 }
 
-static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
 			      union kvm_mmu_page_role new_role,
-			      bool skip_tlb_flush)
+			      bool skip_tlb_flush, bool skip_mmu_sync)
 {
-	if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush))
-		kvm_mmu_free_roots(vcpu, vcpu->arch.mmu,
-				   KVM_MMU_ROOT_CURRENT);
+	if (!fast_pgd_switch(vcpu, new_pgd, new_role)) {
+		kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, KVM_MMU_ROOT_CURRENT);
+		return;
+	}
+
+	/*
+	 * It's possible that the cached previous root page is obsolete because
+	 * of a change in the MMU generation number. However, changing the
+	 * generation number is accompanied by KVM_REQ_MMU_RELOAD, which will
+	 * free the root set here and allocate a new one.
+	 */
+	kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
+
+	if (!skip_mmu_sync || force_flush_and_sync_on_reuse)
+		kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+	if (!skip_tlb_flush || force_flush_and_sync_on_reuse)
+		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+
+	/*
+	 * The last MMIO access's GVA and GPA are cached in the VCPU. When
+	 * switching to a new CR3, that GVA->GPA mapping may no longer be
+	 * valid. So clear any cached MMIO info even when we don't need to sync
+	 * the shadow page tables.
+	 */
+	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
+
+	__clear_sp_write_flooding_count(page_header(vcpu->arch.mmu->root_hpa));
 }
 
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush)
+void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
+		     bool skip_mmu_sync)
 {
-	__kvm_mmu_new_cr3(vcpu, new_cr3, kvm_mmu_calc_root_page_role(vcpu),
-			  skip_tlb_flush);
+	__kvm_mmu_new_pgd(vcpu, new_pgd, kvm_mmu_calc_root_page_role(vcpu),
+			  skip_tlb_flush, skip_mmu_sync);
 }
-EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3);
+EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
 
 static unsigned long get_cr3(struct kvm_vcpu *vcpu)
 {
 	return kvm_read_cr3(vcpu);
 }
 
-static void inject_page_fault(struct kvm_vcpu *vcpu,
-			      struct x86_exception *fault)
-{
-	vcpu->arch.mmu->inject_page_fault(vcpu, fault);
-}
-
 static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
 			   unsigned int access, int *nr_present)
 {
@@ -4391,11 +4357,11 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu,
 	gpte &= level - mmu->last_nonleaf_level;
 
 	/*
-	 * PT_PAGE_TABLE_LEVEL always terminates.  The RHS has bit 7 set
-	 * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
-	 * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
+	 * PG_LEVEL_4K always terminates.  The RHS has bit 7 set
+	 * iff level <= PG_LEVEL_4K, which for our purpose means
+	 * level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then.
 	 */
-	gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
+	gpte |= level - PG_LEVEL_4K - 1;
 
 	return gpte & PT_PAGE_SIZE_MASK;
 }
@@ -4909,7 +4875,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
 	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
 
 	role.base.ad_disabled = (shadow_accessed_mask == 0);
-	role.base.level = kvm_x86_ops.get_tdp_level(vcpu);
+	role.base.level = vcpu->arch.tdp_level;
 	role.base.direct = true;
 	role.base.gpte_is_8_bytes = true;
 
@@ -4928,9 +4894,9 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	context->mmu_role.as_u64 = new_role.as_u64;
 	context->page_fault = kvm_tdp_page_fault;
 	context->sync_page = nonpaging_sync_page;
-	context->invlpg = nonpaging_invlpg;
+	context->invlpg = NULL;
 	context->update_pte = nonpaging_update_pte;
-	context->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu);
+	context->shadow_root_level = vcpu->arch.tdp_level;
 	context->direct_map = true;
 	context->get_guest_pgd = get_cr3;
 	context->get_pdptr = kvm_pdptr_read;
@@ -4986,7 +4952,7 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
 	return role;
 }
 
-void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer)
 {
 	struct kvm_mmu *context = vcpu->arch.mmu;
 	union kvm_mmu_role new_role =
@@ -4995,11 +4961,11 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 	if (new_role.as_u64 == context->mmu_role.as_u64)
 		return;
 
-	if (!is_paging(vcpu))
+	if (!(cr0 & X86_CR0_PG))
 		nonpaging_init_context(vcpu, context);
-	else if (is_long_mode(vcpu))
+	else if (efer & EFER_LMA)
 		paging64_init_context(vcpu, context);
-	else if (is_pae(vcpu))
+	else if (cr4 & X86_CR4_PAE)
 		paging32E_init_context(vcpu, context);
 	else
 		paging32_init_context(vcpu, context);
@@ -5047,7 +5013,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 		kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
 						   execonly, level);
 
-	__kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);
+	__kvm_mmu_new_pgd(vcpu, new_eptp, new_role.base, true, true);
 
 	if (new_role.as_u64 == context->mmu_role.as_u64)
 		return;
@@ -5077,7 +5043,11 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = vcpu->arch.mmu;
 
-	kvm_init_shadow_mmu(vcpu);
+	kvm_init_shadow_mmu(vcpu,
+			    kvm_read_cr0_bits(vcpu, X86_CR0_PG),
+			    kvm_read_cr4_bits(vcpu, X86_CR4_PAE),
+			    vcpu->arch.efer);
+
 	context->get_guest_pgd     = get_cr3;
 	context->get_pdptr         = kvm_pdptr_read;
 	context->inject_page_fault = kvm_inject_page_fault;
@@ -5097,6 +5067,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	g_context->inject_page_fault = kvm_inject_page_fault;
 
 	/*
+	 * L2 page tables are never shadowed, so there is no need to sync
+	 * SPTEs.
+	 */
+	g_context->invlpg            = NULL;
+
+	/*
 	 * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
 	 * L1's nested page tables (e.g. EPT12). The nested translation
 	 * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
@@ -5183,7 +5159,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 	if (r)
 		goto out;
 	kvm_mmu_load_pgd(vcpu);
-	kvm_x86_ops.tlb_flush(vcpu, true);
+	kvm_x86_ops.tlb_flush_current(vcpu);
 out:
 	return r;
 }
@@ -5202,7 +5178,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu_page *sp, u64 *spte,
 				  const void *new)
 {
-	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
+	if (sp->role.level != PG_LEVEL_4K) {
 		++vcpu->kvm->stat.mmu_pde_zapped;
 		return;
         }
@@ -5260,7 +5236,7 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp)
 	 * Skip write-flooding detected for the sp whose level is 1, because
 	 * it can become unsync, then the guest page is not write-protected.
 	 */
-	if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+	if (sp->role.level == PG_LEVEL_4K)
 		return false;
 
 	atomic_inc(&sp->write_flooding_count);
@@ -5497,37 +5473,54 @@ emulate:
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
-void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+			    gva_t gva, hpa_t root_hpa)
 {
-	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	int i;
 
-	/* INVLPG on a * non-canonical address is a NOP according to the SDM.  */
-	if (is_noncanonical_address(gva, vcpu))
+	/* It's actually a GPA for vcpu->arch.guest_mmu.  */
+	if (mmu != &vcpu->arch.guest_mmu) {
+		/* INVLPG on a non-canonical address is a NOP according to the SDM.  */
+		if (is_noncanonical_address(gva, vcpu))
+			return;
+
+		kvm_x86_ops.tlb_flush_gva(vcpu, gva);
+	}
+
+	if (!mmu->invlpg)
 		return;
 
-	mmu->invlpg(vcpu, gva, mmu->root_hpa);
+	if (root_hpa == INVALID_PAGE) {
+		mmu->invlpg(vcpu, gva, mmu->root_hpa);
 
-	/*
-	 * INVLPG is required to invalidate any global mappings for the VA,
-	 * irrespective of PCID. Since it would take us roughly similar amount
-	 * of work to determine whether any of the prev_root mappings of the VA
-	 * is marked global, or to just sync it blindly, so we might as well
-	 * just always sync it.
-	 *
-	 * Mappings not reachable via the current cr3 or the prev_roots will be
-	 * synced when switching to that cr3, so nothing needs to be done here
-	 * for them.
-	 */
-	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-		if (VALID_PAGE(mmu->prev_roots[i].hpa))
-			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+		/*
+		 * INVLPG is required to invalidate any global mappings for the VA,
+		 * irrespective of PCID. Since it would take us roughly similar amount
+		 * of work to determine whether any of the prev_root mappings of the VA
+		 * is marked global, or to just sync it blindly, so we might as well
+		 * just always sync it.
+		 *
+		 * Mappings not reachable via the current cr3 or the prev_roots will be
+		 * synced when switching to that cr3, so nothing needs to be done here
+		 * for them.
+		 */
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			if (VALID_PAGE(mmu->prev_roots[i].hpa))
+				mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+	} else {
+		mmu->invlpg(vcpu, gva, root_hpa);
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva);
 
-	kvm_x86_ops.tlb_flush_gva(vcpu, gva);
+void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+{
+	kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
 	++vcpu->stat.invlpg;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
 
+
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
@@ -5541,7 +5534,7 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
 		if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
-		    pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) {
+		    pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
 			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
 			tlb_flush = true;
 		}
@@ -5574,9 +5567,9 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_page_level)
 	if (tdp_enabled)
 		max_page_level = tdp_page_level;
 	else if (boot_cpu_has(X86_FEATURE_GBPAGES))
-		max_page_level = PT_PDPE_LEVEL;
+		max_page_level = PG_LEVEL_1G;
 	else
-		max_page_level = PT_DIRECTORY_LEVEL;
+		max_page_level = PG_LEVEL_2M;
 }
 EXPORT_SYMBOL_GPL(kvm_configure_mmu);
 
@@ -5632,24 +5625,24 @@ static __always_inline bool
 slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		      slot_level_handler fn, bool lock_flush_tlb)
 {
-	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
-				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+	return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K,
+				 KVM_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
 }
 
 static __always_inline bool
 slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			slot_level_handler fn, bool lock_flush_tlb)
 {
-	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1,
-				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+	return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K + 1,
+				 KVM_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
 }
 
 static __always_inline bool
 slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		 slot_level_handler fn, bool lock_flush_tlb)
 {
-	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
-				 PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
+	return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K,
+				 PG_LEVEL_4K, lock_flush_tlb);
 }
 
 static void free_mmu_pages(struct kvm_mmu *mmu)
@@ -5672,7 +5665,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 	 * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can
 	 * skip allocating the PDP table.
 	 */
-	if (tdp_enabled && kvm_x86_ops.get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
+	if (tdp_enabled && vcpu->arch.tdp_level > PT32E_ROOT_LEVEL)
 		return 0;
 
 	page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
@@ -5695,13 +5688,13 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
 	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
 
 	vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
-	vcpu->arch.root_mmu.root_cr3 = 0;
+	vcpu->arch.root_mmu.root_pgd = 0;
 	vcpu->arch.root_mmu.translate_gpa = translate_gpa;
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
 		vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 
 	vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
-	vcpu->arch.guest_mmu.root_cr3 = 0;
+	vcpu->arch.guest_mmu.root_pgd = 0;
 	vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
 		vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
@@ -5859,7 +5852,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 				continue;
 
 			slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
-						PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
+						PG_LEVEL_4K,
+						KVM_MAX_HUGEPAGE_LEVEL,
 						start, end - 1, true);
 		}
 	}
@@ -5881,7 +5875,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 
 	spin_lock(&kvm->mmu_lock);
 	flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
-				start_level, PT_MAX_HUGEPAGE_LEVEL, false);
+				start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
 	spin_unlock(&kvm->mmu_lock);
 
 	/*
@@ -6142,27 +6136,18 @@ static void kvm_set_mmio_spte_mask(void)
 	u64 mask;
 
 	/*
-	 * Set the reserved bits and the present bit of an paging-structure
-	 * entry to generate page fault with PFER.RSV = 1.
+	 * Set a reserved PA bit in MMIO SPTEs to generate page faults with
+	 * PFEC.RSVD=1 on MMIO accesses.  64-bit PTEs (PAE, x86-64, and EPT
+	 * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports
+	 * 52-bit physical addresses then there are no reserved PA bits in the
+	 * PTEs and so the reserved PA approach must be disabled.
 	 */
+	if (shadow_phys_bits < 52)
+		mask = BIT_ULL(51) | PT_PRESENT_MASK;
+	else
+		mask = 0;
 
-	/*
-	 * Mask the uppermost physical address bit, which would be reserved as
-	 * long as the supported physical address width is less than 52.
-	 */
-	mask = 1ull << 51;
-
-	/* Set the present bit. */
-	mask |= 1ull;
-
-	/*
-	 * If reserved bit is not supported, clear the present bit to disable
-	 * mmio page fault.
-	 */
-	if (shadow_phys_bits == 52)
-		mask &= ~1ull;
-
-	kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
+	kvm_mmu_set_mmio_spte_mask(mask, ACC_WRITE_MASK | ACC_USER_MASK);
 }
 
 static bool get_nx_auto_mode(void)
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index ddc1ec3bdacd..a7bcde34d1f2 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -61,7 +61,7 @@ static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
 {
 	int index, val;
 
-	index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
+	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
 
 	val = slot->arch.gfn_track[mode][index];
 
@@ -151,7 +151,7 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
 	if (!slot)
 		return false;
 
-	index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
+	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
 	return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
 }
 
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 9bdf9b7d9a96..38c576495048 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -75,7 +75,7 @@
 #define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT)
 
 #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
-#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
+#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PG_LEVEL_4K)
 
 /*
  * The guest_walker structure emulates the behavior of the hardware page
@@ -198,7 +198,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 	    !(gpte & PT_GUEST_ACCESSED_MASK))
 		goto no_present;
 
-	if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+	if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PG_LEVEL_4K))
 		goto no_present;
 
 	return false;
@@ -436,7 +436,7 @@ retry_walk:
 	gfn = gpte_to_gfn_lvl(pte, walker->level);
 	gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
 
-	if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
+	if (PTTYPE == 32 && walker->level > PG_LEVEL_4K && is_cpuid_PSE36())
 		gfn += pse36_gfn_delta(pte);
 
 	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
@@ -552,7 +552,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	 * we call mmu_set_spte() with host_writable = true because
 	 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
 	 */
-	mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
+	mmu_set_spte(vcpu, spte, pte_access, 0, PG_LEVEL_4K, gfn, pfn,
 		     true, true);
 
 	kvm_release_pfn_clean(pfn);
@@ -575,7 +575,7 @@ static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
 	u64 mask;
 	int r, index;
 
-	if (level == PT_PAGE_TABLE_LEVEL) {
+	if (level == PG_LEVEL_4K) {
 		mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1;
 		base_gpa = pte_gpa & ~mask;
 		index = (pte_gpa - base_gpa) / sizeof(pt_element_t);
@@ -600,7 +600,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 
 	sp = page_header(__pa(sptep));
 
-	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+	if (sp->role.level > PG_LEVEL_4K)
 		return;
 
 	if (sp->role.direct)
@@ -812,7 +812,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
 	if (!r) {
 		pgprintk("%s: guest page fault\n", __func__);
 		if (!prefault)
-			inject_page_fault(vcpu, &walker.fault);
+			kvm_inject_emulated_page_fault(vcpu, &walker.fault);
 
 		return RET_PF_RETRY;
 	}
@@ -828,7 +828,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
 	      &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
 
 	if (lpage_disallowed || is_self_change_mapping)
-		max_level = PT_PAGE_TABLE_LEVEL;
+		max_level = PG_LEVEL_4K;
 	else
 		max_level = walker.level;
 
@@ -884,7 +884,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
 {
 	int offset = 0;
 
-	WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
+	WARN_ON(sp->role.level != PG_LEVEL_4K);
 
 	if (PTTYPE == 32)
 		offset = sp->role.quadrant << PT64_LEVEL_BITS;
@@ -1070,7 +1070,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
 
 		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
-					 pte_access, PT_PAGE_TABLE_LEVEL,
+					 pte_access, PG_LEVEL_4K,
 					 gfn, spte_to_pfn(sp->spt[i]),
 					 true, false, host_writable);
 	}
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index ca39f62aabc6..9d2844f87f6d 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -100,7 +100,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
 	sp = page_header(__pa(sptep));
 
 	if (sp->unsync) {
-		if (level != PT_PAGE_TABLE_LEVEL) {
+		if (level != PG_LEVEL_4K) {
 			audit_printk(vcpu->kvm, "unsync sp: %p "
 				     "level = %d\n", sp, level);
 			return;
@@ -176,7 +176,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	int i;
 
-	if (sp->role.level != PT_PAGE_TABLE_LEVEL)
+	if (sp->role.level != PG_LEVEL_4K)
 		return;
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
@@ -200,7 +200,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slot = __gfn_to_memslot(slots, sp->gfn);
-	rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
+	rmap_head = __gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot);
 
 	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		if (is_writable_pte(*sptep))
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index a5078841bdac..b86346903f2e 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -397,9 +397,9 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
 		__set_bit(pmc->idx, pmu->pmc_in_use);
 }
 
-int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
+int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
-	return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr, data);
+	return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr_info);
 }
 
 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index a6c78a797cb1..ab85eed8a6cc 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -32,7 +32,7 @@ struct kvm_pmu_ops {
 	struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
 	int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
 	bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
-	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+	int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 	int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 	void (*refresh)(struct kvm_vcpu *vcpu);
 	void (*init)(struct kvm_vcpu *vcpu);
@@ -147,7 +147,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
 int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
-int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
 void kvm_pmu_reset(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 9a2a62e5afeb..8a6db11dcb43 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -25,6 +25,8 @@
 #include "trace.h"
 #include "mmu.h"
 #include "x86.h"
+#include "cpuid.h"
+#include "lapic.h"
 #include "svm.h"
 
 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@ -59,7 +61,7 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	u64 cr3 = svm->nested.nested_cr3;
+	u64 cr3 = svm->nested.ctl.nested_cr3;
 	u64 pdpte;
 	int ret;
 
@@ -74,19 +76,22 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	return svm->nested.nested_cr3;
+	return svm->nested.ctl.nested_cr3;
 }
 
 static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *hsave = svm->nested.hsave;
+
 	WARN_ON(mmu_is_nested(vcpu));
 
 	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
-	kvm_init_shadow_mmu(vcpu);
+	kvm_init_shadow_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer);
 	vcpu->arch.mmu->get_guest_pgd     = nested_svm_get_tdp_cr3;
 	vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
 	vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
-	vcpu->arch.mmu->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu);
+	vcpu->arch.mmu->shadow_root_level = vcpu->arch.tdp_level;
 	reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
 }
@@ -99,8 +104,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
 
 void recalc_intercepts(struct vcpu_svm *svm)
 {
-	struct vmcb_control_area *c, *h;
-	struct nested_state *g;
+	struct vmcb_control_area *c, *h, *g;
 
 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 
@@ -109,14 +113,16 @@ void recalc_intercepts(struct vcpu_svm *svm)
 
 	c = &svm->vmcb->control;
 	h = &svm->nested.hsave->control;
-	g = &svm->nested;
+	g = &svm->nested.ctl;
+
+	svm->nested.host_intercept_exceptions = h->intercept_exceptions;
 
 	c->intercept_cr = h->intercept_cr;
 	c->intercept_dr = h->intercept_dr;
 	c->intercept_exceptions = h->intercept_exceptions;
 	c->intercept = h->intercept;
 
-	if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
+	if (g->int_ctl & V_INTR_MASKING_MASK) {
 		/* We only want the cr8 intercept bits of L1 */
 		c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ);
 		c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE);
@@ -138,11 +144,9 @@ void recalc_intercepts(struct vcpu_svm *svm)
 	c->intercept |= g->intercept;
 }
 
-static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
+static void copy_vmcb_control_area(struct vmcb_control_area *dst,
+				   struct vmcb_control_area *from)
 {
-	struct vmcb_control_area *dst  = &dst_vmcb->control;
-	struct vmcb_control_area *from = &from_vmcb->control;
-
 	dst->intercept_cr         = from->intercept_cr;
 	dst->intercept_dr         = from->intercept_dr;
 	dst->intercept_exceptions = from->intercept_exceptions;
@@ -150,7 +154,7 @@ static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb
 	dst->iopm_base_pa         = from->iopm_base_pa;
 	dst->msrpm_base_pa        = from->msrpm_base_pa;
 	dst->tsc_offset           = from->tsc_offset;
-	dst->asid                 = from->asid;
+	/* asid not copied, it is handled manually for svm->vmcb.  */
 	dst->tlb_ctl              = from->tlb_ctl;
 	dst->int_ctl              = from->int_ctl;
 	dst->int_vector           = from->int_vector;
@@ -179,7 +183,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 	 */
 	int i;
 
-	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+	if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT)))
 		return true;
 
 	for (i = 0; i < MSRPM_OFFSETS; i++) {
@@ -190,7 +194,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 			break;
 
 		p      = msrpm_offsets[i];
-		offset = svm->nested.vmcb_msrpm + (p * 4);
+		offset = svm->nested.ctl.msrpm_base_pa + (p * 4);
 
 		if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
 			return false;
@@ -203,41 +207,111 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 	return true;
 }
 
-static bool nested_vmcb_checks(struct vmcb *vmcb)
+static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
 {
-	if ((vmcb->save.efer & EFER_SVME) == 0)
+	if ((control->intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
 		return false;
 
-	if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
+	if (control->asid == 0)
 		return false;
 
-	if (vmcb->control.asid == 0)
-		return false;
-
-	if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
+	if ((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
 	    !npt_enabled)
 		return false;
 
 	return true;
 }
 
-void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
-			  struct vmcb *nested_vmcb, struct kvm_host_map *map)
+static bool nested_vmcb_checks(struct vmcb *vmcb)
 {
-	bool evaluate_pending_interrupts =
-		is_intercept(svm, INTERCEPT_VINTR) ||
-		is_intercept(svm, INTERCEPT_IRET);
+	if ((vmcb->save.efer & EFER_SVME) == 0)
+		return false;
 
-	if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
-		svm->vcpu.arch.hflags |= HF_HIF_MASK;
-	else
-		svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+	if (((vmcb->save.cr0 & X86_CR0_CD) == 0) &&
+	    (vmcb->save.cr0 & X86_CR0_NW))
+		return false;
 
-	if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
-		svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
-		nested_svm_init_mmu_context(&svm->vcpu);
+	return nested_vmcb_check_controls(&vmcb->control);
+}
+
+static void load_nested_vmcb_control(struct vcpu_svm *svm,
+				     struct vmcb_control_area *control)
+{
+	copy_vmcb_control_area(&svm->nested.ctl, control);
+
+	/* Copy it here because nested_svm_check_controls will check it.  */
+	svm->nested.ctl.asid           = control->asid;
+	svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL;
+	svm->nested.ctl.iopm_base_pa  &= ~0x0fffULL;
+}
+
+/*
+ * Synchronize fields that are written by the processor, so that
+ * they can be copied back into the nested_vmcb.
+ */
+void sync_nested_vmcb_control(struct vcpu_svm *svm)
+{
+	u32 mask;
+	svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
+	svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
+
+	/* Only a few fields of int_ctl are written by the processor.  */
+	mask = V_IRQ_MASK | V_TPR_MASK;
+	if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
+	    is_intercept(svm, SVM_EXIT_VINTR)) {
+		/*
+		 * In order to request an interrupt window, L0 is usurping
+		 * svm->vmcb->control.int_ctl and possibly setting V_IRQ
+		 * even if it was clear in L1's VMCB.  Restoring it would be
+		 * wrong.  However, in this case V_IRQ will remain true until
+		 * interrupt_window_interception calls svm_clear_vintr and
+		 * restores int_ctl.  We can just leave it aside.
+		 */
+		mask &= ~V_IRQ_MASK;
 	}
+	svm->nested.ctl.int_ctl        &= ~mask;
+	svm->nested.ctl.int_ctl        |= svm->vmcb->control.int_ctl & mask;
+}
+
+/*
+ * Transfer any event that L0 or L1 wanted to inject into L2 to
+ * EXIT_INT_INFO.
+ */
+static void nested_vmcb_save_pending_event(struct vcpu_svm *svm,
+					   struct vmcb *nested_vmcb)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	u32 exit_int_info = 0;
+	unsigned int nr;
+
+	if (vcpu->arch.exception.injected) {
+		nr = vcpu->arch.exception.nr;
+		exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
+
+		if (vcpu->arch.exception.has_error_code) {
+			exit_int_info |= SVM_EVTINJ_VALID_ERR;
+			nested_vmcb->control.exit_int_info_err =
+				vcpu->arch.exception.error_code;
+		}
+
+	} else if (vcpu->arch.nmi_injected) {
+		exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
 
+	} else if (vcpu->arch.interrupt.injected) {
+		nr = vcpu->arch.interrupt.nr;
+		exit_int_info = nr | SVM_EVTINJ_VALID;
+
+		if (vcpu->arch.interrupt.soft)
+			exit_int_info |= SVM_EVTINJ_TYPE_SOFT;
+		else
+			exit_int_info |= SVM_EVTINJ_TYPE_INTR;
+	}
+
+	nested_vmcb->control.exit_int_info = exit_int_info;
+}
+
+static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
+{
 	/* Load the nested guest state */
 	svm->vmcb->save.es = nested_vmcb->save.es;
 	svm->vmcb->save.cs = nested_vmcb->save.cs;
@@ -249,14 +323,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
 	svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
 	svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
 	svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
-	if (npt_enabled) {
-		svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
-		svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
-	} else
-		(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
-
-	/* Guest paging mode is active - reset mmu */
-	kvm_mmu_reset_context(&svm->vcpu);
+	(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
 
 	svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
 	kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
@@ -270,38 +337,34 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
 	svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
 	svm->vcpu.arch.dr6  = nested_vmcb->save.dr6;
 	svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+}
 
-	svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
-	svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & ~0x0fffULL;
+static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
+{
+	const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+	if (svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
+		nested_svm_init_mmu_context(&svm->vcpu);
 
-	/* cache intercepts */
-	svm->nested.intercept_cr         = nested_vmcb->control.intercept_cr;
-	svm->nested.intercept_dr         = nested_vmcb->control.intercept_dr;
-	svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
-	svm->nested.intercept            = nested_vmcb->control.intercept;
+	/* Guest paging mode is active - reset mmu */
+	kvm_mmu_reset_context(&svm->vcpu);
 
-	svm_flush_tlb(&svm->vcpu, true);
-	svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
-	if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
-		svm->vcpu.arch.hflags |= HF_VINTR_MASK;
-	else
-		svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
+	svm_flush_tlb(&svm->vcpu);
 
-	svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
-	svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
+	svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
+		svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
 
-	svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
-	svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
-	svm->vmcb->control.int_state = nested_vmcb->control.int_state;
-	svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
-	svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+	svm->vmcb->control.int_ctl             =
+		(svm->nested.ctl.int_ctl & ~mask) |
+		(svm->nested.hsave->control.int_ctl & mask);
 
-	svm->vmcb->control.pause_filter_count =
-		nested_vmcb->control.pause_filter_count;
-	svm->vmcb->control.pause_filter_thresh =
-		nested_vmcb->control.pause_filter_thresh;
+	svm->vmcb->control.virt_ext            = svm->nested.ctl.virt_ext;
+	svm->vmcb->control.int_vector          = svm->nested.ctl.int_vector;
+	svm->vmcb->control.int_state           = svm->nested.ctl.int_state;
+	svm->vmcb->control.event_inj           = svm->nested.ctl.event_inj;
+	svm->vmcb->control.event_inj_err       = svm->nested.ctl.event_inj_err;
 
-	kvm_vcpu_unmap(&svm->vcpu, map, true);
+	svm->vmcb->control.pause_filter_count  = svm->nested.ctl.pause_filter_count;
+	svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh;
 
 	/* Enter Guest-Mode */
 	enter_guest_mode(&svm->vcpu);
@@ -312,25 +375,18 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
 	 */
 	recalc_intercepts(svm);
 
-	svm->nested.vmcb = vmcb_gpa;
+	mark_all_dirty(svm->vmcb);
+}
 
-	/*
-	 * If L1 had a pending IRQ/NMI before executing VMRUN,
-	 * which wasn't delivered because it was disallowed (e.g.
-	 * interrupts disabled), L0 needs to evaluate if this pending
-	 * event should cause an exit from L2 to L1 or be delivered
-	 * directly to L2.
-	 *
-	 * Usually this would be handled by the processor noticing an
-	 * IRQ/NMI window request.  However, VMRUN can unblock interrupts
-	 * by implicitly setting GIF, so force L0 to perform pending event
-	 * evaluation by requesting a KVM_REQ_EVENT.
-	 */
-	enable_gif(svm);
-	if (unlikely(evaluate_pending_interrupts))
-		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
+			  struct vmcb *nested_vmcb)
+{
+	svm->nested.vmcb = vmcb_gpa;
+	load_nested_vmcb_control(svm, &nested_vmcb->control);
+	nested_prepare_vmcb_save(svm, nested_vmcb);
+	nested_prepare_vmcb_control(svm);
 
-	mark_all_dirty(svm->vmcb);
+	svm_set_gif(svm, true);
 }
 
 int nested_svm_vmrun(struct vcpu_svm *svm)
@@ -342,8 +398,12 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
 	struct kvm_host_map map;
 	u64 vmcb_gpa;
 
-	vmcb_gpa = svm->vmcb->save.rax;
+	if (is_smm(&svm->vcpu)) {
+		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+		return 1;
+	}
 
+	vmcb_gpa = svm->vmcb->save.rax;
 	ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
 	if (ret == -EINVAL) {
 		kvm_inject_gp(&svm->vcpu, 0);
@@ -361,10 +421,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
 		nested_vmcb->control.exit_code_hi = 0;
 		nested_vmcb->control.exit_info_1  = 0;
 		nested_vmcb->control.exit_info_2  = 0;
-
-		kvm_vcpu_unmap(&svm->vcpu, &map, true);
-
-		return ret;
+		goto out;
 	}
 
 	trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
@@ -404,9 +461,10 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
 	else
 		hsave->save.cr3    = kvm_read_cr3(&svm->vcpu);
 
-	copy_vmcb_control_area(hsave, vmcb);
+	copy_vmcb_control_area(&hsave->control, &vmcb->control);
 
-	enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
+	svm->nested.nested_run_pending = 1;
+	enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb);
 
 	if (!nested_svm_vmrun_msrpm(svm)) {
 		svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
@@ -417,6 +475,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
 		nested_svm_vmexit(svm);
 	}
 
+out:
+	kvm_vcpu_unmap(&svm->vcpu, &map, true);
+
 	return ret;
 }
 
@@ -444,13 +505,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	struct vmcb *vmcb = svm->vmcb;
 	struct kvm_host_map map;
 
-	trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
-				       vmcb->control.exit_info_1,
-				       vmcb->control.exit_info_2,
-				       vmcb->control.exit_int_info,
-				       vmcb->control.exit_int_info_err,
-				       KVM_ISA_SVM);
-
 	rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
 	if (rc) {
 		if (rc == -EINVAL)
@@ -463,9 +517,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	/* Exit Guest-Mode */
 	leave_guest_mode(&svm->vcpu);
 	svm->nested.vmcb = 0;
+	WARN_ON_ONCE(svm->nested.nested_run_pending);
+
+	/* in case we halted in L2 */
+	svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
 	/* Give the current vmcb to the guest */
-	disable_gif(svm);
+	svm_set_gif(svm, false);
 
 	nested_vmcb->save.es     = vmcb->save.es;
 	nested_vmcb->save.cs     = vmcb->save.cs;
@@ -479,62 +537,42 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	nested_vmcb->save.cr2    = vmcb->save.cr2;
 	nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
 	nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
-	nested_vmcb->save.rip    = vmcb->save.rip;
-	nested_vmcb->save.rsp    = vmcb->save.rsp;
-	nested_vmcb->save.rax    = vmcb->save.rax;
+	nested_vmcb->save.rip    = kvm_rip_read(&svm->vcpu);
+	nested_vmcb->save.rsp    = kvm_rsp_read(&svm->vcpu);
+	nested_vmcb->save.rax    = kvm_rax_read(&svm->vcpu);
 	nested_vmcb->save.dr7    = vmcb->save.dr7;
 	nested_vmcb->save.dr6    = svm->vcpu.arch.dr6;
 	nested_vmcb->save.cpl    = vmcb->save.cpl;
 
-	nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
-	nested_vmcb->control.int_vector        = vmcb->control.int_vector;
 	nested_vmcb->control.int_state         = vmcb->control.int_state;
 	nested_vmcb->control.exit_code         = vmcb->control.exit_code;
 	nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
 	nested_vmcb->control.exit_info_1       = vmcb->control.exit_info_1;
 	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
-	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
-	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+
+	if (nested_vmcb->control.exit_code != SVM_EXIT_ERR)
+		nested_vmcb_save_pending_event(svm, nested_vmcb);
 
 	if (svm->nrips_enabled)
 		nested_vmcb->control.next_rip  = vmcb->control.next_rip;
 
-	/*
-	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
-	 * to make sure that we do not lose injected events. So check event_inj
-	 * here and copy it to exit_int_info if it is valid.
-	 * Exit_int_info and event_inj can't be both valid because the case
-	 * below only happens on a VMRUN instruction intercept which has
-	 * no valid exit_int_info set.
-	 */
-	if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
-		struct vmcb_control_area *nc = &nested_vmcb->control;
-
-		nc->exit_int_info     = vmcb->control.event_inj;
-		nc->exit_int_info_err = vmcb->control.event_inj_err;
-	}
-
-	nested_vmcb->control.tlb_ctl           = 0;
-	nested_vmcb->control.event_inj         = 0;
-	nested_vmcb->control.event_inj_err     = 0;
+	nested_vmcb->control.int_ctl           = svm->nested.ctl.int_ctl;
+	nested_vmcb->control.tlb_ctl           = svm->nested.ctl.tlb_ctl;
+	nested_vmcb->control.event_inj         = svm->nested.ctl.event_inj;
+	nested_vmcb->control.event_inj_err     = svm->nested.ctl.event_inj_err;
 
 	nested_vmcb->control.pause_filter_count =
 		svm->vmcb->control.pause_filter_count;
 	nested_vmcb->control.pause_filter_thresh =
 		svm->vmcb->control.pause_filter_thresh;
 
-	/* We always set V_INTR_MASKING and remember the old value in hflags */
-	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
-		nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-
 	/* Restore the original control entries */
-	copy_vmcb_control_area(vmcb, hsave);
+	copy_vmcb_control_area(&vmcb->control, &hsave->control);
 
-	svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
-	kvm_clear_exception_queue(&svm->vcpu);
-	kvm_clear_interrupt_queue(&svm->vcpu);
+	svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
+		svm->vcpu.arch.l1_tsc_offset;
 
-	svm->nested.nested_cr3 = 0;
+	svm->nested.ctl.nested_cr3 = 0;
 
 	/* Restore selected save entries */
 	svm->vmcb->save.es = hsave->save.es;
@@ -562,6 +600,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 
 	mark_all_dirty(svm->vmcb);
 
+	trace_kvm_nested_vmexit_inject(nested_vmcb->control.exit_code,
+				       nested_vmcb->control.exit_info_1,
+				       nested_vmcb->control.exit_info_2,
+				       nested_vmcb->control.exit_int_info,
+				       nested_vmcb->control.exit_int_info_err,
+				       KVM_ISA_SVM);
+
 	kvm_vcpu_unmap(&svm->vcpu, &map, true);
 
 	nested_svm_uninit_mmu_context(&svm->vcpu);
@@ -579,12 +624,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	return 0;
 }
 
+/*
+ * Forcibly leave nested mode in order to be able to reset the VCPU later on.
+ */
+void svm_leave_nested(struct vcpu_svm *svm)
+{
+	if (is_guest_mode(&svm->vcpu)) {
+		struct vmcb *hsave = svm->nested.hsave;
+		struct vmcb *vmcb = svm->vmcb;
+
+		svm->nested.nested_run_pending = 0;
+		leave_guest_mode(&svm->vcpu);
+		copy_vmcb_control_area(&vmcb->control, &hsave->control);
+		nested_svm_uninit_mmu_context(&svm->vcpu);
+	}
+}
+
 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
 {
 	u32 offset, msr, value;
 	int write, mask;
 
-	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+	if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT)))
 		return NESTED_EXIT_HOST;
 
 	msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
@@ -598,56 +659,12 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
 	/* Offset is in 32 bit units but need in 8 bit units */
 	offset *= 4;
 
-	if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
+	if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4))
 		return NESTED_EXIT_DONE;
 
 	return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
 }
 
-/* DB exceptions for our internal use must not cause vmexit */
-static int nested_svm_intercept_db(struct vcpu_svm *svm)
-{
-	unsigned long dr6 = svm->vmcb->save.dr6;
-
-	/* Always catch it and pass it to userspace if debugging.  */
-	if (svm->vcpu.guest_debug &
-	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
-		return NESTED_EXIT_HOST;
-
-	/* if we're not singlestepping, it's not ours */
-	if (!svm->nmi_singlestep)
-		goto reflected_db;
-
-	/* if it's not a singlestep exception, it's not ours */
-	if (!(dr6 & DR6_BS))
-		goto reflected_db;
-
-	/* if the guest is singlestepping, it should get the vmexit */
-	if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
-		disable_nmi_singlestep(svm);
-		goto reflected_db;
-	}
-
-	/* it's ours, the nested hypervisor must not see this one */
-	return NESTED_EXIT_HOST;
-
-reflected_db:
-	/*
-	 * Synchronize guest DR6 here just like in kvm_deliver_exception_payload;
-	 * it will be moved into the nested VMCB by nested_svm_vmexit.  Once
-	 * exceptions will be moved to svm_check_nested_events, all this stuff
-	 * will just go away and we could just return NESTED_EXIT_HOST
-	 * unconditionally.  db_interception will queue the exception, which
-	 * will be processed by svm_check_nested_events if a nested vmexit is
-	 * required, and we will just use kvm_deliver_exception_payload to copy
-	 * the payload to DR6 before vmexit.
-	 */
-	WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT);
-	svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM);
-	svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1;
-	return NESTED_EXIT_DONE;
-}
-
 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
 {
 	unsigned port, size, iopm_len;
@@ -655,13 +672,13 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
 	u8 start_bit;
 	u64 gpa;
 
-	if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
+	if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
 		return NESTED_EXIT_HOST;
 
 	port = svm->vmcb->control.exit_info_1 >> 16;
 	size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
 		SVM_IOIO_SIZE_SHIFT;
-	gpa  = svm->nested.vmcb_iopm + (port / 8);
+	gpa  = svm->nested.ctl.iopm_base_pa + (port / 8);
 	start_bit = port % 8;
 	iopm_len = (start_bit + size > 8) ? 2 : 1;
 	mask = (0xf >> (4 - size)) << start_bit;
@@ -687,31 +704,23 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
 		break;
 	case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
 		u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
-		if (svm->nested.intercept_cr & bit)
+		if (svm->nested.ctl.intercept_cr & bit)
 			vmexit = NESTED_EXIT_DONE;
 		break;
 	}
 	case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
 		u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
-		if (svm->nested.intercept_dr & bit)
+		if (svm->nested.ctl.intercept_dr & bit)
 			vmexit = NESTED_EXIT_DONE;
 		break;
 	}
 	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
-		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
-		if (svm->nested.intercept_exceptions & excp_bits) {
-			if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
-				vmexit = nested_svm_intercept_db(svm);
-			else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR &&
-				 svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP)
-				vmexit = NESTED_EXIT_HOST;
-			else
-				vmexit = NESTED_EXIT_DONE;
-		}
-		/* async page fault always cause vmexit */
-		else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
-			 svm->vcpu.arch.exception.nested_apf != 0)
-			vmexit = NESTED_EXIT_DONE;
+		/*
+		 * Host-intercepted exceptions have been checked already in
+		 * nested_svm_exit_special.  There is nothing to do here,
+		 * the vmexit is injected by svm_check_nested_events.
+		 */
+		vmexit = NESTED_EXIT_DONE;
 		break;
 	}
 	case SVM_EXIT_ERR: {
@@ -720,7 +729,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
 	}
 	default: {
 		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
-		if (svm->nested.intercept & exit_bits)
+		if (svm->nested.ctl.intercept & exit_bits)
 			vmexit = NESTED_EXIT_DONE;
 	}
 	}
@@ -756,62 +765,140 @@ int nested_svm_check_permissions(struct vcpu_svm *svm)
 	return 0;
 }
 
-int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
-			       bool has_error_code, u32 error_code)
+static bool nested_exit_on_exception(struct vcpu_svm *svm)
 {
-	int vmexit;
+	unsigned int nr = svm->vcpu.arch.exception.nr;
 
-	if (!is_guest_mode(&svm->vcpu))
-		return 0;
+	return (svm->nested.ctl.intercept_exceptions & (1 << nr));
+}
 
-	vmexit = nested_svm_intercept(svm);
-	if (vmexit != NESTED_EXIT_DONE)
-		return 0;
+static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
+{
+	unsigned int nr = svm->vcpu.arch.exception.nr;
 
 	svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
 	svm->vmcb->control.exit_code_hi = 0;
-	svm->vmcb->control.exit_info_1 = error_code;
+
+	if (svm->vcpu.arch.exception.has_error_code)
+		svm->vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code;
 
 	/*
 	 * EXITINFO2 is undefined for all exception intercepts other
 	 * than #PF.
 	 */
-	if (svm->vcpu.arch.exception.nested_apf)
-		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
-	else if (svm->vcpu.arch.exception.has_payload)
-		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
-	else
-		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+	if (nr == PF_VECTOR) {
+		if (svm->vcpu.arch.exception.nested_apf)
+			svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+		else if (svm->vcpu.arch.exception.has_payload)
+			svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
+		else
+			svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+	} else if (nr == DB_VECTOR) {
+		/* See inject_pending_event.  */
+		kvm_deliver_exception_payload(&svm->vcpu);
+		if (svm->vcpu.arch.dr7 & DR7_GD) {
+			svm->vcpu.arch.dr7 &= ~DR7_GD;
+			kvm_update_dr7(&svm->vcpu);
+		}
+	} else
+		WARN_ON(svm->vcpu.arch.exception.has_payload);
 
-	svm->nested.exit_required = true;
-	return vmexit;
+	nested_svm_vmexit(svm);
+}
+
+static void nested_svm_smi(struct vcpu_svm *svm)
+{
+	svm->vmcb->control.exit_code = SVM_EXIT_SMI;
+	svm->vmcb->control.exit_info_1 = 0;
+	svm->vmcb->control.exit_info_2 = 0;
+
+	nested_svm_vmexit(svm);
+}
+
+static void nested_svm_nmi(struct vcpu_svm *svm)
+{
+	svm->vmcb->control.exit_code = SVM_EXIT_NMI;
+	svm->vmcb->control.exit_info_1 = 0;
+	svm->vmcb->control.exit_info_2 = 0;
+
+	nested_svm_vmexit(svm);
 }
 
 static void nested_svm_intr(struct vcpu_svm *svm)
 {
+	trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+
 	svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
 	svm->vmcb->control.exit_info_1 = 0;
 	svm->vmcb->control.exit_info_2 = 0;
 
-	/* nested_svm_vmexit this gets called afterwards from handle_exit */
-	svm->nested.exit_required = true;
-	trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+	nested_svm_vmexit(svm);
 }
 
-static bool nested_exit_on_intr(struct vcpu_svm *svm)
+static inline bool nested_exit_on_init(struct vcpu_svm *svm)
 {
-	return (svm->nested.intercept & 1ULL);
+	return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INIT));
+}
+
+static void nested_svm_init(struct vcpu_svm *svm)
+{
+	svm->vmcb->control.exit_code   = SVM_EXIT_INIT;
+	svm->vmcb->control.exit_info_1 = 0;
+	svm->vmcb->control.exit_info_2 = 0;
+
+	nested_svm_vmexit(svm);
 }
 
-int svm_check_nested_events(struct kvm_vcpu *vcpu)
+
+static int svm_check_nested_events(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	bool block_nested_events =
-		kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required;
+		kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (lapic_in_kernel(vcpu) &&
+	    test_bit(KVM_APIC_INIT, &apic->pending_events)) {
+		if (block_nested_events)
+			return -EBUSY;
+		if (!nested_exit_on_init(svm))
+			return 0;
+		nested_svm_init(svm);
+		return 0;
+	}
+
+	if (vcpu->arch.exception.pending) {
+		if (block_nested_events)
+                        return -EBUSY;
+		if (!nested_exit_on_exception(svm))
+			return 0;
+		nested_svm_inject_exception_vmexit(svm);
+		return 0;
+	}
+
+	if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
+		if (block_nested_events)
+			return -EBUSY;
+		if (!nested_exit_on_smi(svm))
+			return 0;
+		nested_svm_smi(svm);
+		return 0;
+	}
+
+	if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
+		if (block_nested_events)
+			return -EBUSY;
+		if (!nested_exit_on_nmi(svm))
+			return 0;
+		nested_svm_nmi(svm);
+		return 0;
+	}
 
-	if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) {
+	if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) {
 		if (block_nested_events)
 			return -EBUSY;
+		if (!nested_exit_on_intr(svm))
+			return 0;
 		nested_svm_intr(svm);
 		return 0;
 	}
@@ -826,21 +913,170 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
 	switch (exit_code) {
 	case SVM_EXIT_INTR:
 	case SVM_EXIT_NMI:
-	case SVM_EXIT_EXCP_BASE + MC_VECTOR:
-		return NESTED_EXIT_HOST;
 	case SVM_EXIT_NPF:
-		/* For now we are always handling NPFs when using them */
-		if (npt_enabled)
+		return NESTED_EXIT_HOST;
+	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
+
+		if (get_host_vmcb(svm)->control.intercept_exceptions & excp_bits)
 			return NESTED_EXIT_HOST;
-		break;
-	case SVM_EXIT_EXCP_BASE + PF_VECTOR:
-		/* When we're shadowing, trap PFs, but not async PF */
-		if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
+		else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
+			 svm->vcpu.arch.apf.host_apf_flags)
+			/* Trap async PF even if not shadowing */
 			return NESTED_EXIT_HOST;
 		break;
+	}
 	default:
 		break;
 	}
 
 	return NESTED_EXIT_CONTINUE;
 }
+
+static int svm_get_nested_state(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				u32 user_data_size)
+{
+	struct vcpu_svm *svm;
+	struct kvm_nested_state kvm_state = {
+		.flags = 0,
+		.format = KVM_STATE_NESTED_FORMAT_SVM,
+		.size = sizeof(kvm_state),
+	};
+	struct vmcb __user *user_vmcb = (struct vmcb __user *)
+		&user_kvm_nested_state->data.svm[0];
+
+	if (!vcpu)
+		return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE;
+
+	svm = to_svm(vcpu);
+
+	if (user_data_size < kvm_state.size)
+		goto out;
+
+	/* First fill in the header and copy it out.  */
+	if (is_guest_mode(vcpu)) {
+		kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb;
+		kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
+		kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
+
+		if (svm->nested.nested_run_pending)
+			kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+	}
+
+	if (gif_set(svm))
+		kvm_state.flags |= KVM_STATE_NESTED_GIF_SET;
+
+	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
+		return -EFAULT;
+
+	if (!is_guest_mode(vcpu))
+		goto out;
+
+	/*
+	 * Copy over the full size of the VMCB rather than just the size
+	 * of the structs.
+	 */
+	if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
+		return -EFAULT;
+	if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
+			 sizeof(user_vmcb->control)))
+		return -EFAULT;
+	if (copy_to_user(&user_vmcb->save, &svm->nested.hsave->save,
+			 sizeof(user_vmcb->save)))
+		return -EFAULT;
+
+out:
+	return kvm_state.size;
+}
+
+static int svm_set_nested_state(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				struct kvm_nested_state *kvm_state)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *hsave = svm->nested.hsave;
+	struct vmcb __user *user_vmcb = (struct vmcb __user *)
+		&user_kvm_nested_state->data.svm[0];
+	struct vmcb_control_area ctl;
+	struct vmcb_save_area save;
+	u32 cr0;
+
+	if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
+		return -EINVAL;
+
+	if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE |
+				 KVM_STATE_NESTED_RUN_PENDING |
+				 KVM_STATE_NESTED_GIF_SET))
+		return -EINVAL;
+
+	/*
+	 * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
+	 * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
+	 */
+	if (!(vcpu->arch.efer & EFER_SVME)) {
+		/* GIF=1 and no guest mode are required if SVME=0.  */
+		if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
+			return -EINVAL;
+	}
+
+	/* SMM temporarily disables SVM, so we cannot be in guest mode.  */
+	if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+		return -EINVAL;
+
+	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
+		svm_leave_nested(svm);
+		goto out_set_gif;
+	}
+
+	if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
+		return -EINVAL;
+	if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
+		return -EINVAL;
+	if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl)))
+		return -EFAULT;
+	if (copy_from_user(&save, &user_vmcb->save, sizeof(save)))
+		return -EFAULT;
+
+	if (!nested_vmcb_check_controls(&ctl))
+		return -EINVAL;
+
+	/*
+	 * Processor state contains L2 state.  Check that it is
+	 * valid for guest mode (see nested_vmcb_checks).
+	 */
+	cr0 = kvm_read_cr0(vcpu);
+        if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
+                return -EINVAL;
+
+	/*
+	 * Validate host state saved from before VMRUN (see
+	 * nested_svm_check_permissions).
+	 * TODO: validate reserved bits for all saved state.
+	 */
+	if (!(save.cr0 & X86_CR0_PG))
+		return -EINVAL;
+
+	/*
+	 * All checks done, we can enter guest mode.  L1 control fields
+	 * come from the nested save state.  Guest state is already
+	 * in the registers, the save area of the nested state instead
+	 * contains saved L1 state.
+	 */
+	copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
+	hsave->save = save;
+
+	svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa;
+	load_nested_vmcb_control(svm, &ctl);
+	nested_prepare_vmcb_control(svm);
+
+out_set_gif:
+	svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+	return 0;
+}
+
+struct kvm_x86_nested_ops svm_nested_ops = {
+	.check_events = svm_check_nested_events,
+	.get_state = svm_get_nested_state,
+	.set_state = svm_set_nested_state,
+};
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index ce0b10fe5e2b..035da07500e8 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -215,21 +215,22 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
 	return pmc;
 }
 
-static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
+static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
+	u32 msr = msr_info->index;
 
 	/* MSR_PERFCTRn */
 	pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
 	if (pmc) {
-		*data = pmc_read_counter(pmc);
+		msr_info->data = pmc_read_counter(pmc);
 		return 0;
 	}
 	/* MSR_EVNTSELn */
 	pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
 	if (pmc) {
-		*data = pmc->eventsel;
+		msr_info->data = pmc->eventsel;
 		return 0;
 	}
 
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 89f7f3aebd31..5573a97f1520 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -336,8 +336,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
 	/* Avoid using vmalloc for smaller buffers. */
 	size = npages * sizeof(struct page *);
 	if (size > PAGE_SIZE)
-		pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO,
-				  PAGE_KERNEL);
+		pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 	else
 		pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a862c768fd54..9e333b91ff78 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -33,6 +33,7 @@
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
+#include <asm/mce.h>
 #include <asm/spec-ctrl.h>
 #include <asm/cpu_device_id.h>
 
@@ -264,6 +265,7 @@ static int get_npt_level(struct kvm_vcpu *vcpu)
 
 void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
 	vcpu->arch.efer = efer;
 
 	if (!npt_enabled) {
@@ -274,8 +276,13 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 			efer &= ~EFER_LME;
 	}
 
-	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
-	mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
+	if (!(efer & EFER_SVME)) {
+		svm_leave_nested(svm);
+		svm_set_gif(svm, true);
+	}
+
+	svm->vmcb->save.efer = efer | EFER_SVME;
+	mark_dirty(svm->vmcb, VMCB_CR);
 }
 
 static int is_external_interrupt(u32 info)
@@ -318,9 +325,6 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 		if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
 			return 0;
 	} else {
-		if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
-			pr_err("%s: ip 0x%lx next 0x%llx\n",
-			       __func__, kvm_rip_read(vcpu), svm->next_rip);
 		kvm_rip_write(vcpu, svm->next_rip);
 	}
 	svm_set_interrupt_shadow(vcpu, 0);
@@ -333,17 +337,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	unsigned nr = vcpu->arch.exception.nr;
 	bool has_error_code = vcpu->arch.exception.has_error_code;
-	bool reinject = vcpu->arch.exception.injected;
 	u32 error_code = vcpu->arch.exception.error_code;
 
-	/*
-	 * If we are within a nested VM we'd better #VMEXIT and let the guest
-	 * handle the exception
-	 */
-	if (!reinject &&
-	    nested_svm_check_exception(svm, nr, has_error_code, error_code))
-		return;
-
 	kvm_deliver_exception_payload(&svm->vcpu);
 
 	if (nr == BP_VECTOR && !nrips) {
@@ -780,7 +775,7 @@ static __init void svm_adjust_mmio_mask(void)
 	 */
 	mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
 
-	kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
+	kvm_mmu_set_mmio_spte_mask(mask, PT_WRITABLE_MASK | PT_USER_MASK);
 }
 
 static void svm_hardware_teardown(void)
@@ -890,7 +885,7 @@ static __init int svm_hardware_setup(void)
 	if (npt_enabled && !npt)
 		npt_enabled = false;
 
-	kvm_configure_mmu(npt_enabled, PT_PDPE_LEVEL);
+	kvm_configure_mmu(npt_enabled, PG_LEVEL_1G);
 	pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
 
 	if (nrips) {
@@ -953,16 +948,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
 	seg->base = 0;
 }
 
-static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-
-	if (is_guest_mode(vcpu))
-		return svm->nested.hsave->control.tsc_offset;
-
-	return vcpu->arch.tsc_offset;
-}
-
 static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -1208,6 +1193,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 		svm->avic_is_running = true;
 
 	svm->nested.hsave = page_address(hsave_page);
+	clear_page(svm->nested.hsave);
 
 	svm->msrpm = page_address(msrpm_pages);
 	svm_vcpu_init_msrpm(svm->msrpm);
@@ -1364,12 +1350,13 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 	}
 }
 
-static inline void svm_enable_vintr(struct vcpu_svm *svm)
+static void svm_set_vintr(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control;
 
 	/* The following fields are ignored when AVIC is enabled */
 	WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
+	set_intercept(svm, INTERCEPT_VINTR);
 
 	/*
 	 * This is just a dummy VINTR to actually cause a vmexit to happen.
@@ -1383,18 +1370,19 @@ static inline void svm_enable_vintr(struct vcpu_svm *svm)
 	mark_dirty(svm->vmcb, VMCB_INTR);
 }
 
-static void svm_set_vintr(struct vcpu_svm *svm)
-{
-	set_intercept(svm, INTERCEPT_VINTR);
-	if (is_intercept(svm, INTERCEPT_VINTR))
-		svm_enable_vintr(svm);
-}
-
 static void svm_clear_vintr(struct vcpu_svm *svm)
 {
+	const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK;
 	clr_intercept(svm, INTERCEPT_VINTR);
 
-	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+	/* Drop int_ctl fields related to VINTR injection.  */
+	svm->vmcb->control.int_ctl &= mask;
+	if (is_guest_mode(&svm->vcpu)) {
+		WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
+			(svm->nested.ctl.int_ctl & V_TPR_MASK));
+		svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask;
+	}
+
 	mark_dirty(svm->vmcb, VMCB_INTR);
 }
 
@@ -1533,14 +1521,6 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
 	mark_dirty(svm->vmcb, VMCB_DT);
 }
 
-static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
-{
-}
-
-static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
-{
-}
-
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
 	ulong gcr0 = svm->vcpu.arch.cr0;
@@ -1603,7 +1583,7 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		return 1;
 
 	if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
-		svm_flush_tlb(vcpu, true);
+		svm_flush_tlb(vcpu);
 
 	vcpu->arch.cr4 = cr4;
 	if (!npt_enabled)
@@ -1842,6 +1822,25 @@ static bool is_erratum_383(void)
 	return true;
 }
 
+/*
+ * Trigger machine check on the host. We assume all the MSRs are already set up
+ * by the CPU and that we still run on the same CPU as the MCE occurred on.
+ * We pass a fake environment to the machine check handler because we want
+ * the guest to be always treated like user space, no matter what context
+ * it used internally.
+ */
+static void kvm_machine_check(void)
+{
+#if defined(CONFIG_X86_MCE)
+	struct pt_regs regs = {
+		.cs = 3, /* Fake ring 3 no matter what the guest ran on */
+		.flags = X86_EFLAGS_IF,
+	};
+
+	do_machine_check(&regs, 0);
+#endif
+}
+
 static void svm_handle_mce(struct vcpu_svm *svm)
 {
 	if (is_erratum_383()) {
@@ -1860,11 +1859,7 @@ static void svm_handle_mce(struct vcpu_svm *svm)
 	 * On an #MC intercept the MCE handler is not called automatically in
 	 * the host. So do it by hand here.
 	 */
-	asm volatile (
-		"int $0x12\n");
-	/* not sure if we ever come back to this point */
-
-	return;
+	kvm_machine_check();
 }
 
 static int mc_interception(struct vcpu_svm *svm)
@@ -1993,6 +1988,38 @@ static int vmrun_interception(struct vcpu_svm *svm)
 	return nested_svm_vmrun(svm);
 }
 
+void svm_set_gif(struct vcpu_svm *svm, bool value)
+{
+	if (value) {
+		/*
+		 * If VGIF is enabled, the STGI intercept is only added to
+		 * detect the opening of the SMI/NMI window; remove it now.
+		 * Likewise, clear the VINTR intercept, we will set it
+		 * again while processing KVM_REQ_EVENT if needed.
+		 */
+		if (vgif_enabled(svm))
+			clr_intercept(svm, INTERCEPT_STGI);
+		if (is_intercept(svm, SVM_EXIT_VINTR))
+			svm_clear_vintr(svm);
+
+		enable_gif(svm);
+		if (svm->vcpu.arch.smi_pending ||
+		    svm->vcpu.arch.nmi_pending ||
+		    kvm_cpu_has_injectable_intr(&svm->vcpu))
+			kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+	} else {
+		disable_gif(svm);
+
+		/*
+		 * After a CLGI no interrupts should come.  But if vGIF is
+		 * in use, we still rely on the VINTR intercept (rather than
+		 * STGI) to detect an open interrupt window.
+		*/
+		if (!vgif_enabled(svm))
+			svm_clear_vintr(svm);
+	}
+}
+
 static int stgi_interception(struct vcpu_svm *svm)
 {
 	int ret;
@@ -2000,18 +2027,8 @@ static int stgi_interception(struct vcpu_svm *svm)
 	if (nested_svm_check_permissions(svm))
 		return 1;
 
-	/*
-	 * If VGIF is enabled, the STGI intercept is only added to
-	 * detect the opening of the SMI/NMI window; remove it now.
-	 */
-	if (vgif_enabled(svm))
-		clr_intercept(svm, INTERCEPT_STGI);
-
 	ret = kvm_skip_emulated_instruction(&svm->vcpu);
-	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
-
-	enable_gif(svm);
-
+	svm_set_gif(svm, true);
 	return ret;
 }
 
@@ -2023,13 +2040,7 @@ static int clgi_interception(struct vcpu_svm *svm)
 		return 1;
 
 	ret = kvm_skip_emulated_instruction(&svm->vcpu);
-
-	disable_gif(svm);
-
-	/* After a CLGI no interrupts should come */
-	if (!kvm_vcpu_apicv_active(&svm->vcpu))
-		svm_clear_vintr(svm);
-
+	svm_set_gif(svm, false);
 	return ret;
 }
 
@@ -2193,7 +2204,7 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
 	bool ret = false;
 	u64 intercept;
 
-	intercept = svm->nested.intercept;
+	intercept = svm->nested.ctl.intercept;
 
 	if (!is_guest_mode(&svm->vcpu) ||
 	    (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
@@ -2671,8 +2682,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
 	 */
 	svm_toggle_avic_for_irq_window(&svm->vcpu, true);
 
-	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
-	mark_dirty(svm->vmcb, VMCB_INTR);
 	++svm->vcpu.stat.irq_window_exits;
 	return 1;
 }
@@ -2898,8 +2907,7 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 	*info2 = control->exit_info_2;
 }
 
-static int handle_exit(struct kvm_vcpu *vcpu,
-	enum exit_fastpath_completion exit_fastpath)
+static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct kvm_run *kvm_run = vcpu->run;
@@ -2912,12 +2920,7 @@ static int handle_exit(struct kvm_vcpu *vcpu,
 	if (npt_enabled)
 		vcpu->arch.cr3 = svm->vmcb->save.cr3;
 
-	if (unlikely(svm->nested.exit_required)) {
-		nested_svm_vmexit(svm);
-		svm->nested.exit_required = false;
-
-		return 1;
-	}
+	svm_complete_interrupts(svm);
 
 	if (is_guest_mode(vcpu)) {
 		int vmexit;
@@ -2938,8 +2941,6 @@ static int handle_exit(struct kvm_vcpu *vcpu,
 			return 1;
 	}
 
-	svm_complete_interrupts(svm);
-
 	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2957,10 +2958,10 @@ static int handle_exit(struct kvm_vcpu *vcpu,
 		       __func__, svm->vmcb->control.exit_int_info,
 		       exit_code);
 
-	if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) {
-		kvm_skip_emulated_instruction(vcpu);
+	if (exit_fastpath != EXIT_FASTPATH_NONE)
 		return 1;
-	} else if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
+
+	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
 	    || !svm_exit_handlers[exit_code]) {
 		vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
 		dump_vmcb(vcpu);
@@ -3049,18 +3050,37 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
 }
 
-static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
+bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb *vmcb = svm->vmcb;
-	int ret;
-	ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-	      !(svm->vcpu.arch.hflags & HF_NMI_MASK);
-	ret = ret && gif_set(svm) && nested_svm_nmi(svm);
+	bool ret;
+
+	if (!gif_set(svm))
+		return true;
+
+	if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
+		return false;
+
+	ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
+	      (svm->vcpu.arch.hflags & HF_NMI_MASK);
 
 	return ret;
 }
 
+static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	if (svm->nested.nested_run_pending)
+		return -EBUSY;
+
+	/* An NMI must not be injected into L2 if it's supposed to VM-Exit.  */
+	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
+		return -EBUSY;
+
+	return !svm_nmi_blocked(vcpu);
+}
+
 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -3081,19 +3101,46 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 	}
 }
 
-static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
+bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb *vmcb = svm->vmcb;
 
-	if (!gif_set(svm) ||
-	     (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
-		return 0;
+	if (!gif_set(svm))
+		return true;
 
-	if (is_guest_mode(vcpu) && (svm->vcpu.arch.hflags & HF_VINTR_MASK))
-		return !!(svm->vcpu.arch.hflags & HF_HIF_MASK);
-	else
-		return !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
+	if (is_guest_mode(vcpu)) {
+		/* As long as interrupts are being delivered...  */
+		if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
+		    ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)
+		    : !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
+			return true;
+
+		/* ... vmexits aren't blocked by the interrupt shadow  */
+		if (nested_exit_on_intr(svm))
+			return false;
+	} else {
+		if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
+			return true;
+	}
+
+	return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK);
+}
+
+static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	if (svm->nested.nested_run_pending)
+		return -EBUSY;
+
+	/*
+	 * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+	 * e.g. if the IRQ arrived asynchronously after checking nested events.
+	 */
+	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm))
+		return -EBUSY;
+
+	return !svm_interrupt_blocked(vcpu);
 }
 
 static void enable_irq_window(struct kvm_vcpu *vcpu)
@@ -3134,9 +3181,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 		return; /* STGI will cause a vm exit */
 	}
 
-	if (svm->nested.exit_required)
-		return; /* we're not going to run the guest yet */
-
 	/*
 	 * Something prevents NMI from been injected. Single step over possible
 	 * problem (IRET or exception injection or interrupt shadow)
@@ -3156,10 +3200,17 @@ static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
 	return 0;
 }
 
-void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+void svm_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	/*
+	 * Flush only the current ASID even if the TLB flush was invoked via
+	 * kvm_flush_remote_tlbs().  Although flushing remote TLBs requires all
+	 * ASIDs to be flushed, KVM uses a single ASID for L1 and L2, and
+	 * unconditionally does a TLB flush on both nested VM-Enter and nested
+	 * VM-Exit (via kvm_mmu_reset_context()).
+	 */
 	if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
 		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
 	else
@@ -3279,10 +3330,21 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
 	svm_complete_interrupts(svm);
 }
 
+static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+{
+	if (!is_guest_mode(vcpu) &&
+	    to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+	    to_svm(vcpu)->vmcb->control.exit_info_1)
+		return handle_fastpath_set_msr_irqoff(vcpu);
+
+	return EXIT_FASTPATH_NONE;
+}
+
 void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
 
-static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 {
+	fastpath_t exit_fastpath;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
@@ -3290,13 +3352,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
 
 	/*
-	 * A vmexit emulation is required before the vcpu can be executed
-	 * again.
-	 */
-	if (unlikely(svm->nested.exit_required))
-		return;
-
-	/*
 	 * Disable singlestep if we're injecting an interrupt/exception.
 	 * We don't want our modified rflags to be pushed on the stack where
 	 * we might not be able to easily reset them if we disabled NMI
@@ -3387,6 +3442,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	stgi();
 
 	/* Any pending NMI will happen here */
+	exit_fastpath = svm_exit_handlers_fastpath(vcpu);
 
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
 		kvm_after_interrupt(&svm->vcpu);
@@ -3394,12 +3450,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	sync_cr8_to_lapic(vcpu);
 
 	svm->next_rip = 0;
+	if (is_guest_mode(&svm->vcpu)) {
+		sync_nested_vmcb_control(svm);
+		svm->nested.nested_run_pending = 0;
+	}
 
 	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
 
 	/* if exit due to PF check for async PF */
 	if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
-		svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
+		svm->vcpu.arch.apf.host_apf_flags =
+			kvm_read_and_reset_apf_flags();
 
 	if (npt_enabled) {
 		vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
@@ -3415,12 +3476,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		svm_handle_mce(svm);
 
 	mark_all_clean(svm->vmcb);
+	return exit_fastpath;
 }
 
 static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	bool update_guest_cr3 = true;
 	unsigned long cr3;
 
 	cr3 = __sme_set(root);
@@ -3429,18 +3490,13 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
 		mark_dirty(svm->vmcb, VMCB_NPT);
 
 		/* Loading L2's CR3 is handled by enter_svm_guest_mode.  */
-		if (is_guest_mode(vcpu))
-			update_guest_cr3 = false;
-		else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
-			cr3 = vcpu->arch.cr3;
-		else /* CR3 is already up-to-date.  */
-			update_guest_cr3 = false;
+		if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+			return;
+		cr3 = vcpu->arch.cr3;
 	}
 
-	if (update_guest_cr3) {
-		svm->vmcb->save.cr3 = cr3;
-		mark_dirty(svm->vmcb, VMCB_CR);
-	}
+	svm->vmcb->save.cr3 = cr3;
+	mark_dirty(svm->vmcb, VMCB_CR);
 }
 
 static int is_disabled(void)
@@ -3475,7 +3531,7 @@ static bool svm_cpu_has_accelerated_tpr(void)
 	return false;
 }
 
-static bool svm_has_emulated_msr(int index)
+static bool svm_has_emulated_msr(u32 index)
 {
 	switch (index) {
 	case MSR_IA32_MCG_EXT_CTL:
@@ -3628,7 +3684,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
 		    info->intercept == x86_intercept_clts)
 			break;
 
-		intercept = svm->nested.intercept;
+		intercept = svm->nested.ctl.intercept;
 
 		if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
 			break;
@@ -3716,13 +3772,8 @@ out:
 	return ret;
 }
 
-static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
-	enum exit_fastpath_completion *exit_fastpath)
+static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 {
-	if (!is_guest_mode(vcpu) &&
-	    to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
-	    to_svm(vcpu)->vmcb->control.exit_info_1)
-		*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
 }
 
 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
@@ -3737,23 +3788,28 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
 	vcpu->arch.mcg_cap &= 0x1ff;
 }
 
-static int svm_smi_allowed(struct kvm_vcpu *vcpu)
+bool svm_smi_blocked(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	/* Per APM Vol.2 15.22.2 "Response to SMI" */
 	if (!gif_set(svm))
-		return 0;
+		return true;
 
-	if (is_guest_mode(&svm->vcpu) &&
-	    svm->nested.intercept & (1ULL << INTERCEPT_SMI)) {
-		/* TODO: Might need to set exit_info_1 and exit_info_2 here */
-		svm->vmcb->control.exit_code = SVM_EXIT_SMI;
-		svm->nested.exit_required = true;
-		return 0;
-	}
+	return is_smm(vcpu);
+}
 
-	return 1;
+static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	if (svm->nested.nested_run_pending)
+		return -EBUSY;
+
+	/* An SMI must not be injected into L2 if it's supposed to VM-Exit.  */
+	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm))
+		return -EBUSY;
+
+	return !svm_smi_blocked(vcpu);
 }
 
 static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
@@ -3793,12 +3849,13 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 		if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
 			return 1;
 		nested_vmcb = map.hva;
-		enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map);
+		enter_svm_guest_mode(svm, vmcb, nested_vmcb);
+		kvm_vcpu_unmap(&svm->vcpu, &map, true);
 	}
 	return 0;
 }
 
-static int enable_smi_window(struct kvm_vcpu *vcpu)
+static void enable_smi_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -3806,9 +3863,9 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
 		if (vgif_enabled(svm))
 			set_intercept(svm, INTERCEPT_STGI);
 		/* STGI will cause a vm exit */
-		return 1;
+	} else {
+		/* We must be in SMM; RSM will cause a vmexit anyway.  */
 	}
-	return 0;
 }
 
 static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
@@ -3819,6 +3876,13 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
 	bool is_user = svm_get_cpl(vcpu) == 3;
 
 	/*
+	 * If RIP is invalid, go ahead with emulation which will cause an
+	 * internal error exit.
+	 */
+	if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
+		return true;
+
+	/*
 	 * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
 	 *
 	 * Errata:
@@ -3876,9 +3940,9 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
 	/*
 	 * TODO: Last condition latch INIT signals on vCPU when
 	 * vCPU is in guest-mode and vmcb12 defines intercept on INIT.
-	 * To properly emulate the INIT intercept, SVM should implement
-	 * kvm_x86_ops.check_nested_events() and call nested_svm_vmexit()
-	 * there if an INIT signal is pending.
+	 * To properly emulate the INIT intercept,
+	 * svm_check_nested_events() should call nested_svm_vmexit()
+	 * if an INIT signal is pending.
 	 */
 	return !gif_set(svm) ||
 		   (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
@@ -3932,8 +3996,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.set_segment = svm_set_segment,
 	.get_cpl = svm_get_cpl,
 	.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
-	.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
-	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
 	.set_cr0 = svm_set_cr0,
 	.set_cr4 = svm_set_cr4,
 	.set_efer = svm_set_efer,
@@ -3947,8 +4009,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.get_rflags = svm_get_rflags,
 	.set_rflags = svm_set_rflags,
 
-	.tlb_flush = svm_flush_tlb,
+	.tlb_flush_all = svm_flush_tlb,
+	.tlb_flush_current = svm_flush_tlb,
 	.tlb_flush_gva = svm_flush_tlb_gva,
+	.tlb_flush_guest = svm_flush_tlb,
 
 	.run = svm_vcpu_run,
 	.handle_exit = handle_exit,
@@ -3989,7 +4053,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 
 	.has_wbinvd_exit = svm_has_wbinvd_exit,
 
-	.read_l1_tsc_offset = svm_read_l1_tsc_offset,
 	.write_l1_tsc_offset = svm_write_l1_tsc_offset,
 
 	.load_mmu_pgd = svm_load_mmu_pgd,
@@ -4002,6 +4065,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.sched_in = svm_sched_in,
 
 	.pmu_ops = &amd_pmu_ops,
+	.nested_ops = &svm_nested_ops,
+
 	.deliver_posted_interrupt = svm_deliver_avic_intr,
 	.dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
 	.update_pi_irte = svm_update_pi_irte,
@@ -4016,14 +4081,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.mem_enc_reg_region = svm_register_enc_region,
 	.mem_enc_unreg_region = svm_unregister_enc_region,
 
-	.nested_enable_evmcs = NULL,
-	.nested_get_evmcs_version = NULL,
-
 	.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
 
 	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
-
-	.check_nested_events = svm_check_nested_events,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index df3474f4fb02..6ac4c00a5d82 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -86,25 +86,17 @@ struct nested_state {
 	u64 hsave_msr;
 	u64 vm_cr_msr;
 	u64 vmcb;
+	u32 host_intercept_exceptions;
 
 	/* These are the merged vectors */
 	u32 *msrpm;
 
-	/* gpa pointers to the real vectors */
-	u64 vmcb_msrpm;
-	u64 vmcb_iopm;
+	/* A VMRUN has started but has not yet been performed, so
+	 * we cannot inject a nested vmexit yet.  */
+	bool nested_run_pending;
 
-	/* A VMEXIT is required but not yet emulated */
-	bool exit_required;
-
-	/* cache for intercepts of the guest */
-	u32 intercept_cr;
-	u32 intercept_dr;
-	u32 intercept_exceptions;
-	u64 intercept;
-
-	/* Nested Paging related state */
-	u64 nested_cr3;
+	/* cache for control fields of the guest */
+	struct vmcb_control_area ctl;
 };
 
 struct vcpu_svm {
@@ -360,8 +352,12 @@ u32 svm_msrpm_offset(u32 msr);
 void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
-void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+void svm_flush_tlb(struct kvm_vcpu *vcpu);
 void disable_nmi_singlestep(struct vcpu_svm *svm);
+bool svm_smi_blocked(struct kvm_vcpu *vcpu);
+bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
+bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
+void svm_set_gif(struct vcpu_svm *svm, bool value);
 
 /* nested.c */
 
@@ -369,28 +365,31 @@ void disable_nmi_singlestep(struct vcpu_svm *svm);
 #define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
 #define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
 
-/* This function returns true if it is save to enable the nmi window */
-static inline bool nested_svm_nmi(struct vcpu_svm *svm)
+static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
 {
-	if (!is_guest_mode(&svm->vcpu))
-		return true;
+	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
-		return true;
+	return is_guest_mode(vcpu) && (svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK);
+}
 
-	svm->vmcb->control.exit_code = SVM_EXIT_NMI;
-	svm->nested.exit_required = true;
+static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
+{
+	return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_SMI));
+}
 
-	return false;
+static inline bool nested_exit_on_intr(struct vcpu_svm *svm)
+{
+	return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INTR));
 }
 
-static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
+static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
 {
-	return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
+	return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_NMI));
 }
 
 void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
-			  struct vmcb *nested_vmcb, struct kvm_host_map *map);
+			  struct vmcb *nested_vmcb);
+void svm_leave_nested(struct vcpu_svm *svm);
 int nested_svm_vmrun(struct vcpu_svm *svm);
 void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
 int nested_svm_vmexit(struct vcpu_svm *svm);
@@ -398,8 +397,10 @@ int nested_svm_exit_handled(struct vcpu_svm *svm);
 int nested_svm_check_permissions(struct vcpu_svm *svm);
 int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 			       bool has_error_code, u32 error_code);
-int svm_check_nested_events(struct kvm_vcpu *vcpu);
 int nested_svm_exit_special(struct vcpu_svm *svm);
+void sync_nested_vmcb_control(struct vcpu_svm *svm);
+
+extern struct kvm_x86_nested_ops svm_nested_ops;
 
 /* avic.c */
 
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 249062f24b94..b66432b015d2 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -225,6 +225,14 @@ TRACE_EVENT(kvm_apic,
 #define KVM_ISA_VMX   1
 #define KVM_ISA_SVM   2
 
+#define kvm_print_exit_reason(exit_reason, isa)				\
+	(isa == KVM_ISA_VMX) ?						\
+	__print_symbolic(exit_reason & 0xffff, VMX_EXIT_REASONS) :	\
+	__print_symbolic(exit_reason, SVM_EXIT_REASONS),		\
+	(isa == KVM_ISA_VMX && exit_reason & ~0xffff) ? " " : "",	\
+	(isa == KVM_ISA_VMX) ?						\
+	__print_flags(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : ""
+
 /*
  * Tracepoint for kvm guest exit:
  */
@@ -250,12 +258,10 @@ TRACE_EVENT(kvm_exit,
 					   &__entry->info2);
 	),
 
-	TP_printk("vcpu %u reason %s rip 0x%lx info %llx %llx",
+	TP_printk("vcpu %u reason %s%s%s rip 0x%lx info %llx %llx",
 		  __entry->vcpu_id,
-		 (__entry->isa == KVM_ISA_VMX) ?
-		 __print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) :
-		 __print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS),
-		 __entry->guest_rip, __entry->info1, __entry->info2)
+		  kvm_print_exit_reason(__entry->exit_reason, __entry->isa),
+		  __entry->guest_rip, __entry->info1, __entry->info2)
 );
 
 /*
@@ -588,12 +594,10 @@ TRACE_EVENT(kvm_nested_vmexit,
 		__entry->exit_int_info_err	= exit_int_info_err;
 		__entry->isa			= isa;
 	),
-	TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
+	TP_printk("rip: 0x%016llx reason: %s%s%s ext_inf1: 0x%016llx "
 		  "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
 		  __entry->rip,
-		 (__entry->isa == KVM_ISA_VMX) ?
-		 __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) :
-		 __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS),
+		  kvm_print_exit_reason(__entry->exit_code, __entry->isa),
 		  __entry->exit_info1, __entry->exit_info2,
 		  __entry->exit_int_info, __entry->exit_int_info_err)
 );
@@ -626,13 +630,11 @@ TRACE_EVENT(kvm_nested_vmexit_inject,
 		__entry->isa			= isa;
 	),
 
-	TP_printk("reason: %s ext_inf1: 0x%016llx "
+	TP_printk("reason: %s%s%s ext_inf1: 0x%016llx "
 		  "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
-		 (__entry->isa == KVM_ISA_VMX) ?
-		 __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) :
-		 __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS),
-		__entry->exit_info1, __entry->exit_info2,
-		__entry->exit_int_info, __entry->exit_int_info_err)
+		  kvm_print_exit_reason(__entry->exit_code, __entry->isa),
+		  __entry->exit_info1, __entry->exit_info2,
+		  __entry->exit_int_info, __entry->exit_int_info_err)
 );
 
 /*
@@ -1539,6 +1541,57 @@ TRACE_EVENT(kvm_nested_vmenter_failed,
 		__print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
 );
 
+/*
+ * Tracepoint for syndbg_set_msr.
+ */
+TRACE_EVENT(kvm_hv_syndbg_set_msr,
+	TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data),
+	TP_ARGS(vcpu_id, vp_index, msr, data),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(u32, vp_index)
+		__field(u32, msr)
+		__field(u64, data)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->vp_index = vp_index;
+		__entry->msr = msr;
+		__entry->data = data;
+	),
+
+	TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx",
+		  __entry->vcpu_id, __entry->vp_index, __entry->msr,
+		  __entry->data)
+);
+
+/*
+ * Tracepoint for syndbg_get_msr.
+ */
+TRACE_EVENT(kvm_hv_syndbg_get_msr,
+	TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data),
+	TP_ARGS(vcpu_id, vp_index, msr, data),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(u32, vp_index)
+		__field(u32, msr)
+		__field(u64, data)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->vp_index = vp_index;
+		__entry->msr = msr;
+		__entry->data = data;
+	),
+
+	TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx",
+		  __entry->vcpu_id, __entry->vp_index, __entry->msr,
+		  __entry->data)
+);
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 8903475f751e..4bbd8b448d22 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -18,6 +18,8 @@ extern int __read_mostly pt_mode;
 #define PT_MODE_SYSTEM		0
 #define PT_MODE_HOST_GUEST	1
 
+#define PMU_CAP_FW_WRITES	(1ULL << 13)
+
 struct nested_vmx_msrs {
 	/*
 	 * We only store the "true" versions of the VMX capability MSRs. We
@@ -367,4 +369,13 @@ static inline bool vmx_pt_mode_is_host_guest(void)
 	return pt_mode == PT_MODE_HOST_GUEST;
 }
 
+static inline u64 vmx_get_perf_capabilities(void)
+{
+	/*
+	 * Since counters are virtualized, KVM would support full
+	 * width counting unconditionally, even if the host lacks it.
+	 */
+	return PMU_CAP_FW_WRITES;
+}
+
 #endif /* __KVM_X86_VMX_CAPS_H */
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 303813423c3e..e5325bd0f304 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -4,6 +4,7 @@
 #include <linux/smp.h>
 
 #include "../hyperv.h"
+#include "../cpuid.h"
 #include "evmcs.h"
 #include "vmcs.h"
 #include "vmx.h"
@@ -160,14 +161,6 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
 		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
 	EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr,
 		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
-	EVMCS1_FIELD(CR3_TARGET_VALUE0, cr3_target_value0,
-		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
-	EVMCS1_FIELD(CR3_TARGET_VALUE1, cr3_target_value1,
-		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
-	EVMCS1_FIELD(CR3_TARGET_VALUE2, cr3_target_value2,
-		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
-	EVMCS1_FIELD(CR3_TARGET_VALUE3, cr3_target_value3,
-		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
 
 	/* 32 bit rw */
 	EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold,
@@ -334,17 +327,18 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
 
 uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
 {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
-       /*
-        * vmcs_version represents the range of supported Enlightened VMCS
-        * versions: lower 8 bits is the minimal version, higher 8 bits is the
-        * maximum supported version. KVM supports versions from 1 to
-        * KVM_EVMCS_VERSION.
-        */
-       if (vmx->nested.enlightened_vmcs_enabled)
-               return (KVM_EVMCS_VERSION << 8) | 1;
-
-       return 0;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	/*
+	 * vmcs_version represents the range of supported Enlightened VMCS
+	 * versions: lower 8 bits is the minimal version, higher 8 bits is the
+	 * maximum supported version. KVM supports versions from 1 to
+	 * KVM_EVMCS_VERSION.
+	 */
+	if (kvm_cpu_cap_get(X86_FEATURE_VMX) &&
+	    vmx->nested.enlightened_vmcs_enabled)
+		return (KVM_EVMCS_VERSION << 8) | 1;
+
+	return 0;
 }
 
 void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e44f33c82332..9c74a732b08d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -303,11 +303,11 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 	cpu = get_cpu();
 	prev = vmx->loaded_vmcs;
 	vmx->loaded_vmcs = vmcs;
-	vmx_vcpu_load_vmcs(vcpu, cpu);
+	vmx_vcpu_load_vmcs(vcpu, cpu, prev);
 	vmx_sync_vmcs_host_state(vmx, prev);
 	put_cpu();
 
-	vmx_segment_cache_clear(vmx);
+	vmx_register_cache_reset(vcpu);
 }
 
 /*
@@ -328,19 +328,19 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 exit_reason;
+	u32 vm_exit_reason;
 	unsigned long exit_qualification = vcpu->arch.exit_qualification;
 
 	if (vmx->nested.pml_full) {
-		exit_reason = EXIT_REASON_PML_FULL;
+		vm_exit_reason = EXIT_REASON_PML_FULL;
 		vmx->nested.pml_full = false;
 		exit_qualification &= INTR_INFO_UNBLOCK_NMI;
 	} else if (fault->error_code & PFERR_RSVD_MASK)
-		exit_reason = EXIT_REASON_EPT_MISCONFIG;
+		vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
 	else
-		exit_reason = EXIT_REASON_EPT_VIOLATION;
+		vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
 
-	nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
+	nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
 	vmcs12->guest_physical_address = fault->address;
 }
 
@@ -437,11 +437,6 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 	}
 }
 
-static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
-{
-	return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
-}
-
 static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
 					       struct vmcs12 *vmcs12)
 {
@@ -698,11 +693,6 @@ static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
 		VM_EXIT_ACK_INTR_ON_EXIT;
 }
 
-static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
-{
-	return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
-}
-
 static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
 					  struct vmcs12 *vmcs12)
 {
@@ -927,6 +917,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 	}
 	return 0;
 fail:
+	/* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */
 	return i + 1;
 }
 
@@ -1074,34 +1065,81 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
 }
 
 /*
+ * Returns true if the MMU needs to be sync'd on nested VM-Enter/VM-Exit.
+ * tl;dr: the MMU needs a sync if L0 is using shadow paging and L1 didn't
+ * enable VPID for L2 (implying it expects a TLB flush on VMX transitions).
+ * Here's why.
+ *
+ * If EPT is enabled by L0 a sync is never needed:
+ * - if it is disabled by L1, then L0 is not shadowing L1 or L2 PTEs, there
+ *   cannot be unsync'd SPTEs for either L1 or L2.
+ *
+ * - if it is also enabled by L1, then L0 doesn't need to sync on VM-Enter
+ *   VM-Enter as VM-Enter isn't required to invalidate guest-physical mappings
+ *   (irrespective of VPID), i.e. L1 can't rely on the (virtual) CPU to flush
+ *   stale guest-physical mappings for L2 from the TLB.  And as above, L0 isn't
+ *   shadowing L1 PTEs so there are no unsync'd SPTEs to sync on VM-Exit.
+ *
+ * If EPT is disabled by L0:
+ * - if VPID is enabled by L1 (for L2), the situation is similar to when L1
+ *   enables EPT: L0 doesn't need to sync as VM-Enter and VM-Exit aren't
+ *   required to invalidate linear mappings (EPT is disabled so there are
+ *   no combined or guest-physical mappings), i.e. L1 can't rely on the
+ *   (virtual) CPU to flush stale linear mappings for either L2 or itself (L1).
+ *
+ * - however if VPID is disabled by L1, then a sync is needed as L1 expects all
+ *   linear mappings (EPT is disabled so there are no combined or guest-physical
+ *   mappings) to be invalidated on both VM-Enter and VM-Exit.
+ *
+ * Note, this logic is subtly different than nested_has_guest_tlb_tag(), which
+ * additionally checks that L2 has been assigned a VPID (when EPT is disabled).
+ * Whether or not L2 has been assigned a VPID by L0 is irrelevant with respect
+ * to L1's expectations, e.g. L0 needs to invalidate hardware TLB entries if L2
+ * doesn't have a unique VPID to prevent reusing L1's entries (assuming L1 has
+ * been assigned a VPID), but L0 doesn't need to do a MMU sync because L1
+ * doesn't expect stale (virtual) TLB entries to be flushed, i.e. L1 doesn't
+ * know that L0 will flush the TLB and so L1 will do INVVPID as needed to flush
+ * stale TLB entries, at which point L0 will sync L2's MMU.
+ */
+static bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu)
+{
+	return !enable_ept && !nested_cpu_has_vpid(get_vmcs12(vcpu));
+}
+
+/*
  * Load guest's/host's cr3 at nested entry/exit.  @nested_ept is true if we are
  * emulating VM-Entry into a guest with EPT enabled.  On failure, the expected
  * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to
  * @entry_failure_code.
  */
 static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
-			       u32 *entry_failure_code)
+			       enum vm_entry_failure_code *entry_failure_code)
 {
-	if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
-		if (CC(!nested_cr3_valid(vcpu, cr3))) {
-			*entry_failure_code = ENTRY_FAIL_DEFAULT;
-			return -EINVAL;
-		}
+	if (CC(!nested_cr3_valid(vcpu, cr3))) {
+		*entry_failure_code = ENTRY_FAIL_DEFAULT;
+		return -EINVAL;
+	}
 
-		/*
-		 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
-		 * must not be dereferenced.
-		 */
-		if (is_pae_paging(vcpu) && !nested_ept) {
-			if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
-				*entry_failure_code = ENTRY_FAIL_PDPTE;
-				return -EINVAL;
-			}
+	/*
+	 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
+	 * must not be dereferenced.
+	 */
+	if (!nested_ept && is_pae_paging(vcpu) &&
+	    (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
+		if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
+			*entry_failure_code = ENTRY_FAIL_PDPTE;
+			return -EINVAL;
 		}
 	}
 
+	/*
+	 * Unconditionally skip the TLB flush on fast CR3 switch, all TLB
+	 * flushes are handled by nested_vmx_transition_tlb_flush().  See
+	 * nested_vmx_transition_mmu_sync for details on skipping the MMU sync.
+	 */
 	if (!nested_ept)
-		kvm_mmu_new_cr3(vcpu, cr3, false);
+		kvm_mmu_new_pgd(vcpu, cr3, true,
+				!nested_vmx_transition_mmu_sync(vcpu));
 
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
@@ -1132,11 +1170,48 @@ static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
 	       (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
 }
 
-static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
+static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
+					    struct vmcs12 *vmcs12,
+					    bool is_vmenter)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
+	/*
+	 * If VPID is disabled, linear and combined mappings are flushed on
+	 * VM-Enter/VM-Exit, and guest-physical mappings are valid only for
+	 * their associated EPTP.
+	 */
+	if (!enable_vpid)
+		return;
+
+	/*
+	 * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
+	 * for *all* contexts to be flushed on VM-Enter/VM-Exit.
+	 *
+	 * If VPID is enabled and used by vmc12, but L2 does not have a unique
+	 * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
+	 * a VPID for L2, flush the current context as the effective ASID is
+	 * common to both L1 and L2.
+	 *
+	 * Defer the flush so that it runs after vmcs02.EPTP has been set by
+	 * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
+	 * redundant flushes further down the nested pipeline.
+	 *
+	 * If a TLB flush isn't required due to any of the above, and vpid12 is
+	 * changing then the new "virtual" VPID (vpid12) will reuse the same
+	 * "real" VPID (vpid02), and so needs to be sync'd.  There is no direct
+	 * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
+	 * all nested vCPUs.
+	 */
+	if (!nested_cpu_has_vpid(vmcs12)) {
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	} else if (!nested_has_guest_tlb_tag(vcpu)) {
+		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+	} else if (is_vmenter &&
+		   vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
+		vmx->nested.last_vpid = vmcs12->virtual_processor_id;
+		vpid_sync_context(nested_get_vpid02(vcpu));
+	}
 }
 
 static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
@@ -1700,10 +1775,6 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 	 * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
 	 * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
 	 * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
-	 * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0;
-	 * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1;
-	 * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2;
-	 * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3;
 	 * vmcs12->page_fault_error_code_mask =
 	 *		evmcs->page_fault_error_code_mask;
 	 * vmcs12->page_fault_error_code_match =
@@ -1777,10 +1848,6 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
 	 * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
 	 * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
 	 * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
-	 * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0;
-	 * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1;
-	 * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2;
-	 * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3;
 	 * evmcs->tpr_threshold = vmcs12->tpr_threshold;
 	 * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
 	 * evmcs->exception_bitmap = vmcs12->exception_bitmap;
@@ -2020,9 +2087,25 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
+static u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+	u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >>
+			    VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+
+	if (!vmx->nested.has_preemption_timer_deadline) {
+		vmx->nested.preemption_timer_deadline =
+			vmcs12->vmx_preemption_timer_value + l1_scaled_tsc;
+		vmx->nested.has_preemption_timer_deadline = true;
+	}
+	return vmx->nested.preemption_timer_deadline - l1_scaled_tsc;
+}
+
+static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
+					u64 preemption_timeout)
 {
-	u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	/*
@@ -2041,7 +2124,8 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
 	preemption_timeout *= 1000000;
 	do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
 	hrtimer_start(&vmx->nested.preemption_timer,
-		      ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
+		      ktime_add_ns(ktime_get(), preemption_timeout),
+		      HRTIMER_MODE_ABS_PINNED);
 }
 
 static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
@@ -2398,7 +2482,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
  * is assigned to entry_failure_code on failure.
  */
 static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
-			  u32 *entry_failure_code)
+			  enum vm_entry_failure_code *entry_failure_code)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
@@ -2447,32 +2531,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	if (kvm_has_tsc_control)
 		decache_tsc_multiplier(vmx);
 
-	if (enable_vpid) {
-		/*
-		 * There is no direct mapping between vpid02 and vpid12, the
-		 * vpid02 is per-vCPU for L0 and reused while the value of
-		 * vpid12 is changed w/ one invvpid during nested vmentry.
-		 * The vpid12 is allocated by L1 for L2, so it will not
-		 * influence global bitmap(for vpid01 and vpid02 allocation)
-		 * even if spawn a lot of nested vCPUs.
-		 */
-		if (nested_cpu_has_vpid(vmcs12) && nested_has_guest_tlb_tag(vcpu)) {
-			if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
-				vmx->nested.last_vpid = vmcs12->virtual_processor_id;
-				__vmx_flush_tlb(vcpu, nested_get_vpid02(vcpu), false);
-			}
-		} else {
-			/*
-			 * If L1 use EPT, then L0 needs to execute INVEPT on
-			 * EPTP02 instead of EPTP01. Therefore, delay TLB
-			 * flush until vmcs02->eptp is fully updated by
-			 * KVM_REQ_LOAD_MMU_PGD. Note that this assumes
-			 * KVM_REQ_TLB_FLUSH is evaluated after
-			 * KVM_REQ_LOAD_MMU_PGD in vcpu_enter_guest().
-			 */
-			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-		}
-	}
+	nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
 
 	if (nested_cpu_has_ept(vmcs12))
 		nested_ept_init_mmu_context(vcpu);
@@ -2883,11 +2942,11 @@ static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
 
 static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 					struct vmcs12 *vmcs12,
-					u32 *exit_qual)
+					enum vm_entry_failure_code *entry_failure_code)
 {
 	bool ia32e;
 
-	*exit_qual = ENTRY_FAIL_DEFAULT;
+	*entry_failure_code = ENTRY_FAIL_DEFAULT;
 
 	if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
 	    CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
@@ -2902,7 +2961,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
-		*exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
+		*entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR;
 		return -EINVAL;
 	}
 
@@ -3194,9 +3253,12 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	enum vm_entry_failure_code entry_failure_code;
 	bool evaluate_pending_interrupts;
-	u32 exit_reason = EXIT_REASON_INVALID_STATE;
-	u32 exit_qual;
+	u32 exit_reason, failed_index;
+
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+		kvm_vcpu_flush_tlb_current(vcpu);
 
 	evaluate_pending_interrupts = exec_controls_get(vmx) &
 		(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
@@ -3241,24 +3303,33 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 			return NVMX_VMENTRY_VMFAIL;
 		}
 
-		if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
+		if (nested_vmx_check_guest_state(vcpu, vmcs12,
+						 &entry_failure_code)) {
+			exit_reason = EXIT_REASON_INVALID_STATE;
+			vmcs12->exit_qualification = entry_failure_code;
 			goto vmentry_fail_vmexit;
+		}
 	}
 
 	enter_guest_mode(vcpu);
 	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
 		vcpu->arch.tsc_offset += vmcs12->tsc_offset;
 
-	if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
+	if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) {
+		exit_reason = EXIT_REASON_INVALID_STATE;
+		vmcs12->exit_qualification = entry_failure_code;
 		goto vmentry_fail_vmexit_guest_mode;
+	}
 
 	if (from_vmentry) {
-		exit_reason = EXIT_REASON_MSR_LOAD_FAIL;
-		exit_qual = nested_vmx_load_msr(vcpu,
-						vmcs12->vm_entry_msr_load_addr,
-						vmcs12->vm_entry_msr_load_count);
-		if (exit_qual)
+		failed_index = nested_vmx_load_msr(vcpu,
+						   vmcs12->vm_entry_msr_load_addr,
+						   vmcs12->vm_entry_msr_load_count);
+		if (failed_index) {
+			exit_reason = EXIT_REASON_MSR_LOAD_FAIL;
+			vmcs12->exit_qualification = failed_index;
 			goto vmentry_fail_vmexit_guest_mode;
+		}
 	} else {
 		/*
 		 * The MMU is not initialized to point at the right entities yet and
@@ -3293,8 +3364,10 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 	 * the timer.
 	 */
 	vmx->nested.preemption_timer_expired = false;
-	if (nested_cpu_has_preemption_timer(vmcs12))
-		vmx_start_preemption_timer(vcpu);
+	if (nested_cpu_has_preemption_timer(vmcs12)) {
+		u64 timer_value = vmx_calc_preemption_timer_value(vcpu);
+		vmx_start_preemption_timer(vcpu, timer_value);
+	}
 
 	/*
 	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@ -3322,7 +3395,6 @@ vmentry_fail_vmexit:
 
 	load_vmcs12_host_state(vcpu, vmcs12);
 	vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
-	vmcs12->exit_qualification = exit_qual;
 	if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
 		vmx->nested.need_vmcs12_to_shadow_sync = true;
 	return NVMX_VMENTRY_VMEXIT;
@@ -3403,6 +3475,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	 * the nested entry.
 	 */
 	vmx->nested.nested_run_pending = 1;
+	vmx->nested.has_preemption_timer_deadline = false;
 	status = nested_vmx_enter_non_root_mode(vcpu, true);
 	if (unlikely(status != NVMX_VMENTRY_SUCCESS))
 		goto vmentry_failed;
@@ -3632,6 +3705,12 @@ static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
 			    vcpu->arch.exception.payload);
 }
 
+static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
+{
+	return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
+	       to_vmx(vcpu)->nested.preemption_timer_expired;
+}
+
 static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3661,11 +3740,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
 	/*
 	 * Process any exceptions that are not debug traps before MTF.
 	 */
-	if (vcpu->arch.exception.pending &&
-	    !vmx_pending_dbg_trap(vcpu) &&
-	    nested_vmx_check_exception(vcpu, &exit_qual)) {
+	if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) {
 		if (block_nested_events)
 			return -EBUSY;
+		if (!nested_vmx_check_exception(vcpu, &exit_qual))
+			goto no_vmexit;
 		nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
 		return 0;
 	}
@@ -3678,25 +3757,34 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 
-	if (vcpu->arch.exception.pending &&
-	    nested_vmx_check_exception(vcpu, &exit_qual)) {
+	if (vcpu->arch.exception.pending) {
 		if (block_nested_events)
 			return -EBUSY;
+		if (!nested_vmx_check_exception(vcpu, &exit_qual))
+			goto no_vmexit;
 		nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
 		return 0;
 	}
 
-	if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
-	    vmx->nested.preemption_timer_expired) {
+	if (nested_vmx_preemption_timer_pending(vcpu)) {
 		if (block_nested_events)
 			return -EBUSY;
 		nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
 		return 0;
 	}
 
-	if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
+	if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
+		if (block_nested_events)
+			return -EBUSY;
+		goto no_vmexit;
+	}
+
+	if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) {
 		if (block_nested_events)
 			return -EBUSY;
+		if (!nested_exit_on_nmi(vcpu))
+			goto no_vmexit;
+
 		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
 				  NMI_VECTOR | INTR_TYPE_NMI_INTR |
 				  INTR_INFO_VALID_MASK, 0);
@@ -3709,13 +3797,16 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 
-	if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(vcpu)) {
+	if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
 		if (block_nested_events)
 			return -EBUSY;
+		if (!nested_exit_on_intr(vcpu))
+			goto no_vmexit;
 		nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
 		return 0;
 	}
 
+no_vmexit:
 	vmx_complete_nested_posted_interrupt(vcpu);
 	return 0;
 }
@@ -3842,12 +3933,12 @@ static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
 
 	cpu = get_cpu();
 	vmx->loaded_vmcs = &vmx->nested.vmcs02;
-	vmx_vcpu_load(&vmx->vcpu, cpu);
+	vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
 
 	sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
 
 	vmx->loaded_vmcs = &vmx->vmcs01;
-	vmx_vcpu_load(&vmx->vcpu, cpu);
+	vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
 	put_cpu();
 }
 
@@ -3876,10 +3967,6 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
 	vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
 
-	vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
-	vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
-	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
-
 	vmcs12->guest_interruptibility_info =
 		vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
 
@@ -3889,9 +3976,10 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
 
 	if (nested_cpu_has_preemption_timer(vmcs12) &&
-	    vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
-			vmcs12->vmx_preemption_timer_value =
-				vmx_get_preemption_timer_value(vcpu);
+	    vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER &&
+	    !vmx->nested.nested_run_pending)
+		vmcs12->vmx_preemption_timer_value =
+			vmx_get_preemption_timer_value(vcpu);
 
 	/*
 	 * In some cases (usually, nested EPT), L2 is allowed to change its
@@ -3939,11 +4027,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
  * which already writes to vmcs12 directly.
  */
 static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
-			   u32 exit_reason, u32 exit_intr_info,
+			   u32 vm_exit_reason, u32 exit_intr_info,
 			   unsigned long exit_qualification)
 {
 	/* update exit information fields: */
-	vmcs12->vm_exit_reason = exit_reason;
+	vmcs12->vm_exit_reason = vm_exit_reason;
 	vmcs12->exit_qualification = exit_qualification;
 	vmcs12->vm_exit_intr_info = exit_intr_info;
 
@@ -3998,8 +4086,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 				   struct vmcs12 *vmcs12)
 {
+	enum vm_entry_failure_code ignored;
 	struct kvm_segment seg;
-	u32 entry_failure_code;
 
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
 		vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -4034,30 +4122,13 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	 * Only PDPTE load can fail as the value of cr3 was checked on entry and
 	 * couldn't have changed.
 	 */
-	if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
+	if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored))
 		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
 
 	if (!enable_ept)
 		vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
 
-	/*
-	 * If vmcs01 doesn't use VPID, CPU flushes TLB on every
-	 * VMEntry/VMExit. Thus, no need to flush TLB.
-	 *
-	 * If vmcs12 doesn't use VPID, L1 expects TLB to be
-	 * flushed on every VMEntry/VMExit.
-	 *
-	 * Otherwise, we can preserve TLB entries as long as we are
-	 * able to tag L1 TLB entries differently than L2 TLB entries.
-	 *
-	 * If vmcs12 uses EPT, we need to execute this flush on EPTP01
-	 * and therefore we request the TLB flush to happen only after VMCS EPTP
-	 * has been set by KVM_REQ_LOAD_MMU_PGD.
-	 */
-	if (enable_vpid &&
-	    (!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) {
-		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-	}
+	nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
 
 	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
 	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
@@ -4204,7 +4275,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
 	 * VMFail, like everything else we just need to ensure our
 	 * software model is up-to-date.
 	 */
-	if (enable_ept)
+	if (enable_ept && is_pae_paging(vcpu))
 		ept_save_pdptrs(vcpu);
 
 	kvm_mmu_reset_context(vcpu);
@@ -4272,7 +4343,7 @@ vmabort:
  * and modify vmcs12 to make it see what it would expect to see there if
  * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
  */
-void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
+void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 		       u32 exit_intr_info, unsigned long exit_qualification)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4281,6 +4352,10 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	/* trying to cancel vmlaunch/vmresume is a bug */
 	WARN_ON_ONCE(vmx->nested.nested_run_pending);
 
+	/* Service the TLB flush request for L2 before switching to L1. */
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+		kvm_vcpu_flush_tlb_current(vcpu);
+
 	leave_guest_mode(vcpu);
 
 	if (nested_cpu_has_preemption_timer(vmcs12))
@@ -4292,9 +4367,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	if (likely(!vmx->fail)) {
 		sync_vmcs02_to_vmcs12(vcpu, vmcs12);
 
-		if (exit_reason != -1)
-			prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
-				       exit_qualification);
+		if (vm_exit_reason != -1)
+			prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
+				       exit_intr_info, exit_qualification);
 
 		/*
 		 * Must happen outside of sync_vmcs02_to_vmcs12() as it will
@@ -4344,20 +4419,20 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
 	vmx->nested.pi_desc = NULL;
 
-	/*
-	 * We are now running in L2, mmu_notifier will force to reload the
-	 * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
-	 */
-	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+	if (vmx->nested.reload_vmcs01_apic_access_page) {
+		vmx->nested.reload_vmcs01_apic_access_page = false;
+		kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+	}
 
-	if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs))
+	if ((vm_exit_reason != -1) &&
+	    (enable_shadow_vmcs || vmx->nested.hv_evmcs))
 		vmx->nested.need_vmcs12_to_shadow_sync = true;
 
 	/* in case we halted in L2 */
 	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
 	if (likely(!vmx->fail)) {
-		if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
+		if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
 		    nested_exit_intr_ack_set(vcpu)) {
 			int irq = kvm_cpu_get_interrupt(vcpu);
 			WARN_ON(irq < 0);
@@ -4365,7 +4440,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 				INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
 		}
 
-		if (exit_reason != -1)
+		if (vm_exit_reason != -1)
 			trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
 						       vmcs12->exit_qualification,
 						       vmcs12->idt_vectoring_info_field,
@@ -4554,13 +4629,13 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
 	gva_t gva;
 	struct x86_exception e;
 
-	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
 				vmcs_read32(VMX_INSTRUCTION_INFO), false,
 				sizeof(*vmpointer), &gva))
 		return 1;
 
 	if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
+		kvm_inject_emulated_page_fault(vcpu, &e);
 		return 1;
 	}
 
@@ -4614,7 +4689,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 		goto out_shadow_vmcs;
 
 	hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
-		     HRTIMER_MODE_REL_PINNED);
+		     HRTIMER_MODE_ABS_PINNED);
 	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
 
 	vmx->nested.vpid02 = allocate_vpid();
@@ -4819,7 +4894,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 {
 	struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
 						    : get_vmcs12(vcpu);
-	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
 	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct x86_exception e;
@@ -4869,7 +4944,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 			return 1;
 		/* _system ok, nested_vmx_check_permission has verified cpl=0 */
 		if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) {
-			kvm_inject_page_fault(vcpu, &e);
+			kvm_inject_emulated_page_fault(vcpu, &e);
 			return 1;
 		}
 	}
@@ -4905,7 +4980,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 {
 	struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
 						    : get_vmcs12(vcpu);
-	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
 	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct x86_exception e;
@@ -4943,7 +5018,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 					instr_info, false, len, &gva))
 			return 1;
 		if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) {
-			kvm_inject_page_fault(vcpu, &e);
+			kvm_inject_emulated_page_fault(vcpu, &e);
 			return 1;
 		}
 	}
@@ -5090,7 +5165,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 /* Emulate the VMPTRST instruction */
 static int handle_vmptrst(struct kvm_vcpu *vcpu)
 {
-	unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION);
+	unsigned long exit_qual = vmx_get_exit_qual(vcpu);
 	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 	gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
 	struct x86_exception e;
@@ -5108,23 +5183,33 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 	/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
 	if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
 					sizeof(gpa_t), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
+		kvm_inject_emulated_page_fault(vcpu, &e);
 		return 1;
 	}
 	return nested_vmx_succeed(vcpu);
 }
 
+#define EPTP_PA_MASK   GENMASK_ULL(51, 12)
+
+static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
+{
+	return VALID_PAGE(root_hpa) &&
+		((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
+}
+
 /* Emulate the INVEPT instruction */
 static int handle_invept(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 vmx_instruction_info, types;
-	unsigned long type;
+	unsigned long type, roots_to_free;
+	struct kvm_mmu *mmu;
 	gva_t gva;
 	struct x86_exception e;
 	struct {
 		u64 eptp, gpa;
 	} operand;
+	int i;
 
 	if (!(vmx->nested.msrs.secondary_ctls_high &
 	      SECONDARY_EXEC_ENABLE_EPT) ||
@@ -5148,27 +5233,49 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 	/* According to the Intel VMX instruction reference, the memory
 	 * operand is read even if it isn't needed (e.g., for type==global)
 	 */
-	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
 			vmx_instruction_info, false, sizeof(operand), &gva))
 		return 1;
 	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
+		kvm_inject_emulated_page_fault(vcpu, &e);
 		return 1;
 	}
 
-	switch (type) {
-	case VMX_EPT_EXTENT_GLOBAL:
-	case VMX_EPT_EXTENT_CONTEXT:
 	/*
-	 * TODO: Sync the necessary shadow EPT roots here, rather than
-	 * at the next emulated VM-entry.
+	 * Nested EPT roots are always held through guest_mmu,
+	 * not root_mmu.
 	 */
+	mmu = &vcpu->arch.guest_mmu;
+
+	switch (type) {
+	case VMX_EPT_EXTENT_CONTEXT:
+		if (!nested_vmx_check_eptp(vcpu, operand.eptp))
+			return nested_vmx_failValid(vcpu,
+				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+
+		roots_to_free = 0;
+		if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
+					    operand.eptp))
+			roots_to_free |= KVM_MMU_ROOT_CURRENT;
+
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+			if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
+						    mmu->prev_roots[i].pgd,
+						    operand.eptp))
+				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+		}
+		break;
+	case VMX_EPT_EXTENT_GLOBAL:
+		roots_to_free = KVM_MMU_ROOTS_ALL;
 		break;
 	default:
 		BUG();
 		break;
 	}
 
+	if (roots_to_free)
+		kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
+
 	return nested_vmx_succeed(vcpu);
 }
 
@@ -5208,11 +5315,11 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 	/* according to the intel vmx instruction reference, the memory
 	 * operand is read even if it isn't needed (e.g., for type==global)
 	 */
-	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
 			vmx_instruction_info, false, sizeof(operand), &gva))
 		return 1;
 	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
+		kvm_inject_emulated_page_fault(vcpu, &e);
 		return 1;
 	}
 	if (operand.vpid >> 16)
@@ -5226,27 +5333,37 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 		    is_noncanonical_address(operand.gla, vcpu))
 			return nested_vmx_failValid(vcpu,
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-		if (cpu_has_vmx_invvpid_individual_addr()) {
-			__invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
-				vpid02, operand.gla);
-		} else
-			__vmx_flush_tlb(vcpu, vpid02, false);
+		vpid_sync_vcpu_addr(vpid02, operand.gla);
 		break;
 	case VMX_VPID_EXTENT_SINGLE_CONTEXT:
 	case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
 		if (!operand.vpid)
 			return nested_vmx_failValid(vcpu,
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-		__vmx_flush_tlb(vcpu, vpid02, false);
+		vpid_sync_context(vpid02);
 		break;
 	case VMX_VPID_EXTENT_ALL_CONTEXT:
-		__vmx_flush_tlb(vcpu, vpid02, false);
+		vpid_sync_context(vpid02);
 		break;
 	default:
 		WARN_ON_ONCE(1);
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 
+	/*
+	 * Sync the shadow page tables if EPT is disabled, L1 is invalidating
+	 * linear mappings for L2 (tagged with L2's VPID).  Free all roots as
+	 * VPIDs are not tracked in the MMU role.
+	 *
+	 * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share
+	 * an MMU when EPT is disabled.
+	 *
+	 * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR.
+	 */
+	if (!enable_ept)
+		kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu,
+				   KVM_MMU_ROOTS_ALL);
+
 	return nested_vmx_succeed(vcpu);
 }
 
@@ -5327,8 +5444,8 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
 
 fail:
 	nested_vmx_vmexit(vcpu, vmx->exit_reason,
-			  vmcs_read32(VM_EXIT_INTR_INFO),
-			  vmcs_readl(EXIT_QUALIFICATION));
+			  vmx_get_intr_info(vcpu),
+			  vmx_get_exit_qual(vcpu));
 	return 1;
 }
 
@@ -5379,7 +5496,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
 	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
 		return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	exit_qualification = vmx_get_exit_qual(vcpu);
 
 	port = exit_qualification >> 16;
 	size = (exit_qualification & 7) + 1;
@@ -5433,7 +5550,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
 static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 	struct vmcs12 *vmcs12)
 {
-	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
 	int cr = exit_qualification & 15;
 	int reg;
 	unsigned long val;
@@ -5449,15 +5566,6 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 				return true;
 			break;
 		case 3:
-			if ((vmcs12->cr3_target_count >= 1 &&
-					vmcs12->cr3_target_value0 == val) ||
-				(vmcs12->cr3_target_count >= 2 &&
-					vmcs12->cr3_target_value1 == val) ||
-				(vmcs12->cr3_target_count >= 3 &&
-					vmcs12->cr3_target_value2 == val) ||
-				(vmcs12->cr3_target_count >= 4 &&
-					vmcs12->cr3_target_value3 == val))
-				return false;
 			if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
 				return true;
 			break;
@@ -5551,49 +5659,85 @@ static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
 }
 
 /*
- * Return true if we should exit from L2 to L1 to handle an exit, or false if we
- * should handle it ourselves in L0 (and then continue L2). Only call this
- * when in is_guest_mode (L2).
+ * Return true if L0 wants to handle an exit from L2 regardless of whether or not
+ * L1 wants the exit.  Only call this when in is_guest_mode (L2).
  */
-bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
+static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
 {
-	u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
-	WARN_ON_ONCE(vmx->nested.nested_run_pending);
-
-	if (unlikely(vmx->fail)) {
-		trace_kvm_nested_vmenter_failed(
-			"hardware VM-instruction error: ",
-			vmcs_read32(VM_INSTRUCTION_ERROR));
-		return true;
-	}
-
-	trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
-				vmcs_readl(EXIT_QUALIFICATION),
-				vmx->idt_vectoring_info,
-				intr_info,
-				vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
-				KVM_ISA_VMX);
+	u32 intr_info;
 
 	switch (exit_reason) {
 	case EXIT_REASON_EXCEPTION_NMI:
+		intr_info = vmx_get_intr_info(vcpu);
 		if (is_nmi(intr_info))
-			return false;
+			return true;
 		else if (is_page_fault(intr_info))
-			return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
+			return vcpu->arch.apf.host_apf_flags || !enable_ept;
 		else if (is_debug(intr_info) &&
 			 vcpu->guest_debug &
 			 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
-			return false;
+			return true;
 		else if (is_breakpoint(intr_info) &&
 			 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
-			return false;
+			return true;
+		return false;
+	case EXIT_REASON_EXTERNAL_INTERRUPT:
+		return true;
+	case EXIT_REASON_MCE_DURING_VMENTRY:
+		return true;
+	case EXIT_REASON_EPT_VIOLATION:
+		/*
+		 * L0 always deals with the EPT violation. If nested EPT is
+		 * used, and the nested mmu code discovers that the address is
+		 * missing in the guest EPT table (EPT12), the EPT violation
+		 * will be injected with nested_ept_inject_page_fault()
+		 */
+		return true;
+	case EXIT_REASON_EPT_MISCONFIG:
+		/*
+		 * L2 never uses directly L1's EPT, but rather L0's own EPT
+		 * table (shadow on EPT) or a merged EPT table that L0 built
+		 * (EPT on EPT). So any problems with the structure of the
+		 * table is L0's fault.
+		 */
+		return true;
+	case EXIT_REASON_PREEMPTION_TIMER:
+		return true;
+	case EXIT_REASON_PML_FULL:
+		/* We emulate PML support to L1. */
+		return true;
+	case EXIT_REASON_VMFUNC:
+		/* VM functions are emulated through L2->L0 vmexits. */
+		return true;
+	case EXIT_REASON_ENCLS:
+		/* SGX is never exposed to L1 */
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+
+/*
+ * Return 1 if L1 wants to intercept an exit from L2.  Only call this when in
+ * is_guest_mode (L2).
+ */
+static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	u32 intr_info;
+
+	switch (exit_reason) {
+	case EXIT_REASON_EXCEPTION_NMI:
+		intr_info = vmx_get_intr_info(vcpu);
+		if (is_nmi(intr_info))
+			return true;
+		else if (is_page_fault(intr_info))
+			return true;
 		return vmcs12->exception_bitmap &
 				(1u << (intr_info & INTR_INFO_VECTOR_MASK));
 	case EXIT_REASON_EXTERNAL_INTERRUPT:
-		return false;
+		return nested_exit_on_intr(vcpu);
 	case EXIT_REASON_TRIPLE_FAULT:
 		return true;
 	case EXIT_REASON_INTERRUPT_WINDOW:
@@ -5658,7 +5802,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 			nested_cpu_has2(vmcs12,
 				SECONDARY_EXEC_PAUSE_LOOP_EXITING);
 	case EXIT_REASON_MCE_DURING_VMENTRY:
-		return false;
+		return true;
 	case EXIT_REASON_TPR_BELOW_THRESHOLD:
 		return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
 	case EXIT_REASON_APIC_ACCESS:
@@ -5670,22 +5814,6 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 		 * delivery" only come from vmcs12.
 		 */
 		return true;
-	case EXIT_REASON_EPT_VIOLATION:
-		/*
-		 * L0 always deals with the EPT violation. If nested EPT is
-		 * used, and the nested mmu code discovers that the address is
-		 * missing in the guest EPT table (EPT12), the EPT violation
-		 * will be injected with nested_ept_inject_page_fault()
-		 */
-		return false;
-	case EXIT_REASON_EPT_MISCONFIG:
-		/*
-		 * L2 never uses directly L1's EPT, but rather L0's own EPT
-		 * table (shadow on EPT) or a merged EPT table that L0 built
-		 * (EPT on EPT). So any problems with the structure of the
-		 * table is L0's fault.
-		 */
-		return false;
 	case EXIT_REASON_INVPCID:
 		return
 			nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
@@ -5702,17 +5830,6 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 		 * the XSS exit bitmap in vmcs12.
 		 */
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
-	case EXIT_REASON_PREEMPTION_TIMER:
-		return false;
-	case EXIT_REASON_PML_FULL:
-		/* We emulate PML support to L1. */
-		return false;
-	case EXIT_REASON_VMFUNC:
-		/* VM functions are emulated through L2->L0 vmexits. */
-		return false;
-	case EXIT_REASON_ENCLS:
-		/* SGX is never exposed to L1 */
-		return false;
 	case EXIT_REASON_UMWAIT:
 	case EXIT_REASON_TPAUSE:
 		return nested_cpu_has2(vmcs12,
@@ -5722,6 +5839,67 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 	}
 }
 
+/*
+ * Conditionally reflect a VM-Exit into L1.  Returns %true if the VM-Exit was
+ * reflected into L1.
+ */
+bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 exit_reason = vmx->exit_reason;
+	unsigned long exit_qual;
+	u32 exit_intr_info;
+
+	WARN_ON_ONCE(vmx->nested.nested_run_pending);
+
+	/*
+	 * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM
+	 * has already loaded L2's state.
+	 */
+	if (unlikely(vmx->fail)) {
+		trace_kvm_nested_vmenter_failed(
+			"hardware VM-instruction error: ",
+			vmcs_read32(VM_INSTRUCTION_ERROR));
+		exit_intr_info = 0;
+		exit_qual = 0;
+		goto reflect_vmexit;
+	}
+
+	exit_intr_info = vmx_get_intr_info(vcpu);
+	exit_qual = vmx_get_exit_qual(vcpu);
+
+	trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, exit_qual,
+				vmx->idt_vectoring_info, exit_intr_info,
+				vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
+				KVM_ISA_VMX);
+
+	/* If L0 (KVM) wants the exit, it trumps L1's desires. */
+	if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
+		return false;
+
+	/* If L1 doesn't want the exit, handle it in L0. */
+	if (!nested_vmx_l1_wants_exit(vcpu, exit_reason))
+		return false;
+
+	/*
+	 * vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits.  For
+	 * EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would
+	 * need to be synthesized by querying the in-kernel LAPIC, but external
+	 * interrupts are never reflected to L1 so it's a non-issue.
+	 */
+	if ((exit_intr_info &
+	     (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
+	    (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+		vmcs12->vm_exit_intr_error_code =
+			vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+	}
+
+reflect_vmexit:
+	nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, exit_qual);
+	return true;
+}
 
 static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 				struct kvm_nested_state __user *user_kvm_nested_state,
@@ -5733,8 +5911,10 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 		.flags = 0,
 		.format = KVM_STATE_NESTED_FORMAT_VMX,
 		.size = sizeof(kvm_state),
+		.hdr.vmx.flags = 0,
 		.hdr.vmx.vmxon_pa = -1ull,
 		.hdr.vmx.vmcs12_pa = -1ull,
+		.hdr.vmx.preemption_timer_deadline = 0,
 	};
 	struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
 		&user_kvm_nested_state->data.vmx[0];
@@ -5776,6 +5956,14 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 
 			if (vmx->nested.mtf_pending)
 				kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
+
+			if (nested_cpu_has_preemption_timer(vmcs12) &&
+			    vmx->nested.has_preemption_timer_deadline) {
+				kvm_state.hdr.vmx.flags |=
+					KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE;
+				kvm_state.hdr.vmx.preemption_timer_deadline =
+					vmx->nested.preemption_timer_deadline;
+			}
 		}
 	}
 
@@ -5821,7 +6009,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 				 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
 			return -EFAULT;
 	}
-
 out:
 	return kvm_state.size;
 }
@@ -5844,7 +6031,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmcs12 *vmcs12;
-	u32 exit_qual;
+	enum vm_entry_failure_code ignored;
 	struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
 		&user_kvm_nested_state->data.vmx[0];
 	int ret;
@@ -5983,9 +6170,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 			goto error_guest_mode;
 	}
 
+	if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) {
+		vmx->nested.has_preemption_timer_deadline = true;
+		vmx->nested.preemption_timer_deadline =
+			kvm_state->hdr.vmx.preemption_timer_deadline;
+	}
+
 	if (nested_vmx_check_controls(vcpu, vmcs12) ||
 	    nested_vmx_check_host_state(vcpu, vmcs12) ||
-	    nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
+	    nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
 		goto error_guest_mode;
 
 	vmx->nested.dirty_vmcs12 = true;
@@ -6031,7 +6224,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
 	 * reason is that if one of these bits is necessary, it will appear
 	 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
 	 * fields of vmcs01 and vmcs02, will turn these bits off - and
-	 * nested_vmx_exit_reflected() will not pass related exits to L1.
+	 * nested_vmx_l1_wants_exit() will not pass related exits to L1.
 	 * These rules have exceptions below.
 	 */
 
@@ -6259,8 +6452,7 @@ void nested_vmx_hardware_unsetup(void)
 	}
 }
 
-__init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops,
-				     int (*exit_handlers[])(struct kvm_vcpu *))
+__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
 {
 	int i;
 
@@ -6296,12 +6488,15 @@ __init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops,
 	exit_handlers[EXIT_REASON_INVVPID]	= handle_invvpid;
 	exit_handlers[EXIT_REASON_VMFUNC]	= handle_vmfunc;
 
-	ops->check_nested_events = vmx_check_nested_events;
-	ops->get_nested_state = vmx_get_nested_state;
-	ops->set_nested_state = vmx_set_nested_state;
-	ops->get_vmcs12_pages = nested_get_vmcs12_pages;
-	ops->nested_enable_evmcs = nested_enable_evmcs;
-	ops->nested_get_evmcs_version = nested_get_evmcs_version;
-
 	return 0;
 }
+
+struct kvm_x86_nested_ops vmx_nested_ops = {
+	.check_events = vmx_check_nested_events,
+	.hv_timer_pending = nested_vmx_preemption_timer_pending,
+	.get_state = vmx_get_nested_state,
+	.set_state = vmx_set_nested_state,
+	.get_vmcs12_pages = nested_get_vmcs12_pages,
+	.enable_evmcs = nested_enable_evmcs,
+	.get_evmcs_version = nested_get_evmcs_version,
+};
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index ac56aefa49e3..758bccc26cf9 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -19,14 +19,13 @@ enum nvmx_vmentry_status {
 void vmx_leave_nested(struct kvm_vcpu *vcpu);
 void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
 void nested_vmx_hardware_unsetup(void);
-__init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops,
-				     int (*exit_handlers[])(struct kvm_vcpu *));
+__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
 void nested_vmx_set_vmcs_shadowing_bitmap(void);
 void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
 enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 						     bool from_vmentry);
-bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
-void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
+bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu);
+void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 		       u32 exit_intr_info, unsigned long exit_qualification);
 void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu);
 int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
@@ -62,6 +61,13 @@ static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
 		vmx->nested.hv_evmcs;
 }
 
+static inline u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
+}
+
 static inline unsigned long nested_ept_get_eptp(struct kvm_vcpu *vcpu)
 {
 	/* return the page table to be shadowed - in our case, EPT12 */
@@ -74,34 +80,6 @@ static inline bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
 }
 
 /*
- * Reflect a VM Exit into L1.
- */
-static inline int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu,
-					    u32 exit_reason)
-{
-	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
-	/*
-	 * At this point, the exit interruption info in exit_intr_info
-	 * is only valid for EXCEPTION_NMI exits.  For EXTERNAL_INTERRUPT
-	 * we need to query the in-kernel LAPIC.
-	 */
-	WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT);
-	if ((exit_intr_info &
-	     (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
-	    (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
-		vmcs12->vm_exit_intr_error_code =
-			vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
-	}
-
-	nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info,
-			  vmcs_readl(EXIT_QUALIFICATION));
-	return 1;
-}
-
-/*
  * Return the cr0 value that a nested guest would read. This is a combination
  * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
  * its hypervisor (cr0_read_shadow).
@@ -246,6 +224,11 @@ static inline bool nested_cpu_has_save_preemption_timer(struct vmcs12 *vmcs12)
 	    VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
 }
 
+static inline bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
+{
+	return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
+}
+
 /*
  * In nested virtualization, check if L1 asked to exit on external interrupts.
  * For most existing hypervisors, this will always return true.
@@ -299,4 +282,6 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
 #define nested_guest_cr4_valid	nested_cr4_valid
 #define nested_host_cr4_valid	nested_cr4_valid
 
+extern struct kvm_x86_nested_ops vmx_nested_ops;
+
 #endif /* __KVM_X86_VMX_NESTED_H */
diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h
index 19717d0a1100..5f1ac002b4b6 100644
--- a/arch/x86/kvm/vmx/ops.h
+++ b/arch/x86/kvm/vmx/ops.h
@@ -268,42 +268,38 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
 	vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa);
 }
 
-static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
-{
-	if (vpid == 0)
-		return true;
-
-	if (cpu_has_vmx_invvpid_individual_addr()) {
-		__invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr);
-		return true;
-	}
-
-	return false;
-}
-
 static inline void vpid_sync_vcpu_single(int vpid)
 {
 	if (vpid == 0)
 		return;
 
-	if (cpu_has_vmx_invvpid_single())
-		__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
+	__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
 }
 
 static inline void vpid_sync_vcpu_global(void)
 {
-	if (cpu_has_vmx_invvpid_global())
-		__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
+	__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
 }
 
 static inline void vpid_sync_context(int vpid)
 {
 	if (cpu_has_vmx_invvpid_single())
 		vpid_sync_vcpu_single(vpid);
-	else
+	else if (vpid != 0)
 		vpid_sync_vcpu_global();
 }
 
+static inline void vpid_sync_vcpu_addr(int vpid, gva_t addr)
+{
+	if (vpid == 0)
+		return;
+
+	if (cpu_has_vmx_invvpid_individual_addr())
+		__invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr);
+	else
+		vpid_sync_context(vpid);
+}
+
 static inline void ept_sync_global(void)
 {
 	__invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 7c857737b438..d33d890b605f 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -18,6 +18,8 @@
 #include "nested.h"
 #include "pmu.h"
 
+#define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
+
 static struct kvm_event_hw_type_mapping intel_arch_events[] = {
 	/* Index must match CPUID 0x0A.EBX bit vector */
 	[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
@@ -150,6 +152,22 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
 	return &counters[array_index_nospec(idx, num_counters)];
 }
 
+static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
+{
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
+		return false;
+
+	return vcpu->arch.perf_capabilities & PMU_CAP_FW_WRITES;
+}
+
+static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
+{
+	if (!fw_writes_is_enabled(pmu_to_vcpu(pmu)))
+		return NULL;
+
+	return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
+}
+
 static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -162,10 +180,13 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 		ret = pmu->version > 1;
 		break;
+	case MSR_IA32_PERF_CAPABILITIES:
+		ret = guest_cpuid_has(vcpu, X86_FEATURE_PDCM);
+		break;
 	default:
 		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
 			get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
-			get_fixed_pmc(pmu, msr);
+			get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr);
 		break;
 	}
 
@@ -184,35 +205,45 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
 	return pmc;
 }
 
-static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
+static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
+	u32 msr = msr_info->index;
 
 	switch (msr) {
 	case MSR_CORE_PERF_FIXED_CTR_CTRL:
-		*data = pmu->fixed_ctr_ctrl;
+		msr_info->data = pmu->fixed_ctr_ctrl;
 		return 0;
 	case MSR_CORE_PERF_GLOBAL_STATUS:
-		*data = pmu->global_status;
+		msr_info->data = pmu->global_status;
 		return 0;
 	case MSR_CORE_PERF_GLOBAL_CTRL:
-		*data = pmu->global_ctrl;
+		msr_info->data = pmu->global_ctrl;
 		return 0;
 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
-		*data = pmu->global_ovf_ctrl;
+		msr_info->data = pmu->global_ovf_ctrl;
+		return 0;
+	case MSR_IA32_PERF_CAPABILITIES:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
+			return 1;
+		msr_info->data = vcpu->arch.perf_capabilities;
 		return 0;
 	default:
-		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) {
+		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
+		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
 			u64 val = pmc_read_counter(pmc);
-			*data = val & pmu->counter_bitmask[KVM_PMC_GP];
+			msr_info->data =
+				val & pmu->counter_bitmask[KVM_PMC_GP];
 			return 0;
 		} else if ((pmc = get_fixed_pmc(pmu, msr))) {
 			u64 val = pmc_read_counter(pmc);
-			*data = val & pmu->counter_bitmask[KVM_PMC_FIXED];
+			msr_info->data =
+				val & pmu->counter_bitmask[KVM_PMC_FIXED];
 			return 0;
 		} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
-			*data = pmc->eventsel;
+			msr_info->data = pmc->eventsel;
 			return 0;
 		}
 	}
@@ -258,9 +289,22 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 0;
 		}
 		break;
+	case MSR_IA32_PERF_CAPABILITIES:
+		if (!msr_info->host_initiated)
+			return 1;
+		if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ?
+			(data & ~vmx_get_perf_capabilities()) : data)
+			return 1;
+		vcpu->arch.perf_capabilities = data;
+		return 0;
 	default:
-		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) {
-			if (!msr_info->host_initiated)
+		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
+		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
+			if ((msr & MSR_PMC_FULL_WIDTH_BIT) &&
+			    (data & ~pmu->counter_bitmask[KVM_PMC_GP]))
+				return 1;
+			if (!msr_info->host_initiated &&
+			    !(msr & MSR_PMC_FULL_WIDTH_BIT))
 				data = (s64)(s32)data;
 			pmc->counter += data - pmc_read_counter(pmc);
 			if (pmc->perf_event)
@@ -300,6 +344,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
 	pmu->version = 0;
 	pmu->reserved_bits = 0xffffffff00200000ull;
+	vcpu->arch.perf_capabilities = 0;
 
 	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
 	if (!entry)
@@ -312,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 		return;
 
 	perf_get_x86_pmu_capability(&x86_pmu);
+	if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
+		vcpu->arch.perf_capabilities = vmx_get_perf_capabilities();
 
 	pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
 					 x86_pmu.num_counters_gp);
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 481ad879197b..5c0ff80b85c0 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -19,7 +19,7 @@ struct vmcs_hdr {
 struct vmcs {
 	struct vmcs_hdr hdr;
 	u32 abort;
-	char data[0];
+	char data[];
 };
 
 DECLARE_PER_CPU(struct vmcs *, current_vmcs);
diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index 53dfb401316d..c8e51c004f78 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -115,10 +115,6 @@ const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
 	FIELD(CR0_READ_SHADOW, cr0_read_shadow),
 	FIELD(CR4_READ_SHADOW, cr4_read_shadow),
-	FIELD(CR3_TARGET_VALUE0, cr3_target_value0),
-	FIELD(CR3_TARGET_VALUE1, cr3_target_value1),
-	FIELD(CR3_TARGET_VALUE2, cr3_target_value2),
-	FIELD(CR3_TARGET_VALUE3, cr3_target_value3),
 	FIELD(EXIT_QUALIFICATION, exit_qualification),
 	FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address),
 	FIELD(GUEST_CR0, guest_cr0),
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index d0c6df373f67..80232daf00ff 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -80,10 +80,7 @@ struct __packed vmcs12 {
 	natural_width cr4_guest_host_mask;
 	natural_width cr0_read_shadow;
 	natural_width cr4_read_shadow;
-	natural_width cr3_target_value0;
-	natural_width cr3_target_value1;
-	natural_width cr3_target_value2;
-	natural_width cr3_target_value3;
+	natural_width dead_space[4]; /* Last remnants of cr3_target_value[0-3]. */
 	natural_width exit_qualification;
 	natural_width guest_linear_address;
 	natural_width guest_cr0;
@@ -263,10 +260,7 @@ static inline void vmx_check_vmcs12_offsets(void)
 	CHECK_OFFSET(cr4_guest_host_mask, 352);
 	CHECK_OFFSET(cr0_read_shadow, 360);
 	CHECK_OFFSET(cr4_read_shadow, 368);
-	CHECK_OFFSET(cr3_target_value0, 376);
-	CHECK_OFFSET(cr3_target_value1, 384);
-	CHECK_OFFSET(cr3_target_value2, 392);
-	CHECK_OFFSET(cr3_target_value3, 400);
+	CHECK_OFFSET(dead_space, 376);
 	CHECK_OFFSET(exit_qualification, 408);
 	CHECK_OFFSET(guest_linear_address, 416);
 	CHECK_OFFSET(guest_cr0, 424);
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 51d1a82742fd..e0a182cb3cdd 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -166,13 +166,13 @@ SYM_FUNC_START(__vmx_vcpu_run)
 	mov WORD_SIZE(%_ASM_SP), %_ASM_AX
 
 	/* Save all guest registers, including RAX from the stack */
-	__ASM_SIZE(pop) VCPU_RAX(%_ASM_AX)
-	mov %_ASM_CX,   VCPU_RCX(%_ASM_AX)
-	mov %_ASM_DX,   VCPU_RDX(%_ASM_AX)
-	mov %_ASM_BX,   VCPU_RBX(%_ASM_AX)
-	mov %_ASM_BP,   VCPU_RBP(%_ASM_AX)
-	mov %_ASM_SI,   VCPU_RSI(%_ASM_AX)
-	mov %_ASM_DI,   VCPU_RDI(%_ASM_AX)
+	pop           VCPU_RAX(%_ASM_AX)
+	mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
+	mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
+	mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
+	mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
+	mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
+	mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
 #ifdef CONFIG_X86_64
 	mov %r8,  VCPU_R8 (%_ASM_AX)
 	mov %r9,  VCPU_R9 (%_ASM_AX)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 89c766fad889..170cc76a581f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -437,6 +437,11 @@ static const struct kvm_vmx_segment_field {
 	VMX_SEGMENT_FIELD(LDTR),
 };
 
+static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
+{
+	vmx->segment_cache.bitmask = 0;
+}
+
 static unsigned long host_idt_base;
 
 /*
@@ -1306,10 +1311,12 @@ after_clear_sn:
 		pi_set_on(pi_desc);
 }
 
-void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
+			struct loaded_vmcs *buddy)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
+	struct vmcs *prev;
 
 	if (!already_loaded) {
 		loaded_vmcs_clear(vmx->loaded_vmcs);
@@ -1328,16 +1335,28 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
 		local_irq_enable();
 	}
 
-	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
+	prev = per_cpu(current_vmcs, cpu);
+	if (prev != vmx->loaded_vmcs->vmcs) {
 		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
 		vmcs_load(vmx->loaded_vmcs->vmcs);
-		indirect_branch_prediction_barrier();
+
+		/*
+		 * No indirect branch prediction barrier needed when switching
+		 * the active VMCS within a guest, e.g. on nested VM-Enter.
+		 * The L1 VMM can protect itself with retpolines, IBPB or IBRS.
+		 */
+		if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev))
+			indirect_branch_prediction_barrier();
 	}
 
 	if (!already_loaded) {
 		void *gdt = get_current_gdt_ro();
 		unsigned long sysenter_esp;
 
+		/*
+		 * Flush all EPTP/VPID contexts, the new pCPU may have stale
+		 * TLB entries from its previous association with the vCPU.
+		 */
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 
 		/*
@@ -1364,11 +1383,11 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
  */
-void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	vmx_vcpu_load_vmcs(vcpu, cpu);
+	vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
 
 	vmx_vcpu_pi_load(vcpu, cpu);
 
@@ -1546,7 +1565,7 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
 
 static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
-	unsigned long rip;
+	unsigned long rip, orig_rip;
 
 	/*
 	 * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
@@ -1558,8 +1577,17 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	 */
 	if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
 	    to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
-		rip = kvm_rip_read(vcpu);
-		rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+		orig_rip = kvm_rip_read(vcpu);
+		rip = orig_rip + vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+#ifdef CONFIG_X86_64
+		/*
+		 * We need to mask out the high 32 bits of RIP if not in 64-bit
+		 * mode, but just finding out that we are in 64-bit mode is
+		 * quite expensive.  Only do it if there was a carry.
+		 */
+		if (unlikely(((rip ^ orig_rip) >> 31) == 3) && !is_64_bit_mode(vcpu))
+			rip = (u32)rip;
+#endif
 		kvm_rip_write(vcpu, rip);
 	} else {
 		if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
@@ -1712,17 +1740,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 		vmx_update_msr_bitmap(&vmx->vcpu);
 }
 
-static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
-{
-	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
-	if (is_guest_mode(vcpu) &&
-	    (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
-		return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
-
-	return vcpu->arch.tsc_offset;
-}
-
 static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -1771,6 +1788,9 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
 		if (!nested)
 			return 1;
 		return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
+	case MSR_IA32_PERF_CAPABILITIES:
+		msr->data = vmx_get_perf_capabilities();
+		return 0;
 	default:
 		return 1;
 	}
@@ -1926,6 +1946,16 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	return 0;
 }
 
+static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
+						    u64 data)
+{
+#ifdef CONFIG_X86_64
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+		return (u32)data;
+#endif
+	return (unsigned long)data;
+}
+
 /*
  * Writes msr value into the appropriate "register".
  * Returns 0 on success, non-0 otherwise.
@@ -1963,13 +1993,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vmcs_write32(GUEST_SYSENTER_CS, data);
 		break;
 	case MSR_IA32_SYSENTER_EIP:
-		if (is_guest_mode(vcpu))
+		if (is_guest_mode(vcpu)) {
+			data = nested_vmx_truncate_sysenter_addr(vcpu, data);
 			get_vmcs12(vcpu)->guest_sysenter_eip = data;
+		}
 		vmcs_writel(GUEST_SYSENTER_EIP, data);
 		break;
 	case MSR_IA32_SYSENTER_ESP:
-		if (is_guest_mode(vcpu))
+		if (is_guest_mode(vcpu)) {
+			data = nested_vmx_truncate_sysenter_addr(vcpu, data);
 			get_vmcs12(vcpu)->guest_sysenter_esp = data;
+		}
 		vmcs_writel(GUEST_SYSENTER_ESP, data);
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
@@ -2187,6 +2221,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 {
+	unsigned long guest_owned_bits;
+
 	kvm_register_mark_available(vcpu, reg);
 
 	switch (reg) {
@@ -2200,10 +2236,22 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		if (enable_ept)
 			ept_save_pdptrs(vcpu);
 		break;
+	case VCPU_EXREG_CR0:
+		guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
+
+		vcpu->arch.cr0 &= ~guest_owned_bits;
+		vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits;
+		break;
 	case VCPU_EXREG_CR3:
 		if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
 			vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
 		break;
+	case VCPU_EXREG_CR4:
+		guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
+
+		vcpu->arch.cr4 &= ~guest_owned_bits;
+		vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits;
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		break;
@@ -2837,34 +2885,64 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 
 #endif
 
-static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
+static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
 {
-	int vpid = to_vmx(vcpu)->vpid;
-
-	if (!vpid_sync_vcpu_addr(vpid, addr))
-		vpid_sync_context(vpid);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	/*
-	 * If VPIDs are not supported or enabled, then the above is a no-op.
-	 * But we don't really need a TLB flush in that case anyway, because
-	 * each VM entry/exit includes an implicit flush when VPID is 0.
+	 * INVEPT must be issued when EPT is enabled, irrespective of VPID, as
+	 * the CPU is not required to invalidate guest-physical mappings on
+	 * VM-Entry, even if VPID is disabled.  Guest-physical mappings are
+	 * associated with the root EPT structure and not any particular VPID
+	 * (INVVPID also isn't required to invalidate guest-physical mappings).
 	 */
+	if (enable_ept) {
+		ept_sync_global();
+	} else if (enable_vpid) {
+		if (cpu_has_vmx_invvpid_global()) {
+			vpid_sync_vcpu_global();
+		} else {
+			vpid_sync_vcpu_single(vmx->vpid);
+			vpid_sync_vcpu_single(vmx->nested.vpid02);
+		}
+	}
 }
 
-static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
+static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
 {
-	ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
+	u64 root_hpa = vcpu->arch.mmu->root_hpa;
 
-	vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
-	vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
+	/* No flush required if the current context is invalid. */
+	if (!VALID_PAGE(root_hpa))
+		return;
+
+	if (enable_ept)
+		ept_sync_context(construct_eptp(vcpu, root_hpa));
+	else if (!is_guest_mode(vcpu))
+		vpid_sync_context(to_vmx(vcpu)->vpid);
+	else
+		vpid_sync_context(nested_get_vpid02(vcpu));
 }
 
-static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
+static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
 {
-	ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
+	/*
+	 * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
+	 * vmx_flush_tlb_guest() for an explanation of why this is ok.
+	 */
+	vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
+}
 
-	vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
-	vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
+static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
+	 * or a vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit
+	 * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
+	 * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
+	 * i.e. no explicit INVVPID is necessary.
+	 */
+	vpid_sync_context(to_vmx(vcpu)->vpid);
 }
 
 static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
@@ -2886,12 +2964,13 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
 
-	if (is_pae_paging(vcpu)) {
-		mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
-		mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
-		mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
-		mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
-	}
+	if (WARN_ON_ONCE(!is_pae_paging(vcpu)))
+		return;
+
+	mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+	mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+	mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+	mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
 
 	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
 }
@@ -2955,20 +3034,27 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	vmcs_writel(CR0_READ_SHADOW, cr0);
 	vmcs_writel(GUEST_CR0, hw_cr0);
 	vcpu->arch.cr0 = cr0;
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR0);
 
 	/* depends on vcpu->arch.cr0 to be set to a new value */
 	vmx->emulation_required = emulation_required(vcpu);
 }
 
-static int get_ept_level(struct kvm_vcpu *vcpu)
+static int vmx_get_tdp_level(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
-		return vmx_eptp_page_walk_level(nested_ept_get_eptp(vcpu));
 	if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
 		return 5;
 	return 4;
 }
 
+static int get_ept_level(struct kvm_vcpu *vcpu)
+{
+	if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
+		return vmx_eptp_page_walk_level(nested_ept_get_eptp(vcpu));
+
+	return vmx_get_tdp_level(vcpu);
+}
+
 u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
 {
 	u64 eptp = VMX_EPTP_MT_WB;
@@ -2983,16 +3069,15 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
 	return eptp;
 }
 
-void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3)
+void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd)
 {
 	struct kvm *kvm = vcpu->kvm;
 	bool update_guest_cr3 = true;
 	unsigned long guest_cr3;
 	u64 eptp;
 
-	guest_cr3 = cr3;
 	if (enable_ept) {
-		eptp = construct_eptp(vcpu, cr3);
+		eptp = construct_eptp(vcpu, pgd);
 		vmcs_write64(EPT_POINTER, eptp);
 
 		if (kvm_x86_ops.tlb_remote_flush) {
@@ -3003,16 +3088,15 @@ void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3)
 			spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
 		}
 
-		/* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */
-		if (is_guest_mode(vcpu))
-			update_guest_cr3 = false;
-		else if (!enable_unrestricted_guest && !is_paging(vcpu))
+		if (!enable_unrestricted_guest && !is_paging(vcpu))
 			guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
 		else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
 			guest_cr3 = vcpu->arch.cr3;
 		else /* vmcs01.GUEST_CR3 is already up-to-date. */
 			update_guest_cr3 = false;
 		ept_load_pdptrs(vcpu);
+	} else {
+		guest_cr3 = pgd;
 	}
 
 	if (update_guest_cr3)
@@ -3063,6 +3147,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		return 1;
 
 	vcpu->arch.cr4 = cr4;
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
 
 	if (!enable_unrestricted_guest) {
 		if (enable_ept) {
@@ -3851,7 +3936,8 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
 	if (pi_test_and_set_on(&vmx->pi_desc))
 		return 0;
 
-	if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
+	if (vcpu != kvm_get_running_vcpu() &&
+	    !kvm_vcpu_trigger_posted_interrupt(vcpu, false))
 		kvm_vcpu_kick(vcpu);
 
 	return 0;
@@ -4147,8 +4233,7 @@ static void ept_set_mmio_spte_mask(void)
 	 * EPT Misconfigurations can be generated if the value of bits 2:0
 	 * of an EPT paging-structure entry is 110b (write/execute).
 	 */
-	kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
-				   VMX_EPT_MISCONFIG_WX_VALUE, 0);
+	kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0);
 }
 
 #define VMX_XSS_EXIT_BITMAP 0
@@ -4453,31 +4538,54 @@ void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 	}
 }
 
-static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
+bool vmx_nmi_blocked(struct kvm_vcpu *vcpu)
 {
-	if (to_vmx(vcpu)->nested.nested_run_pending)
-		return 0;
+	if (is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu))
+		return false;
 
-	if (!enable_vnmi &&
-	    to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
-		return 0;
+	if (!enable_vnmi && to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
+		return true;
 
-	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-		  (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
-		   | GUEST_INTR_STATE_NMI));
+	return (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+		(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI |
+		 GUEST_INTR_STATE_NMI));
 }
 
-static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
+static int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 {
 	if (to_vmx(vcpu)->nested.nested_run_pending)
-		return false;
+		return -EBUSY;
+
+	/* An NMI must not be injected into L2 if it's supposed to VM-Exit.  */
+	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu))
+		return -EBUSY;
+
+	return !vmx_nmi_blocked(vcpu);
+}
 
+bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu)
+{
 	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
-		return true;
+		return false;
 
-	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-		!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
+	return !(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) ||
+	       (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+		(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
+}
+
+static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+	if (to_vmx(vcpu)->nested.nested_run_pending)
+		return -EBUSY;
+
+       /*
+        * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+        * e.g. if the IRQ arrived asynchronously after checking nested events.
+        */
+	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+		return -EBUSY;
+
+	return !vmx_interrupt_blocked(vcpu);
 }
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -4518,10 +4626,8 @@ static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
 			return false;
 		/* fall through */
 	case DB_VECTOR:
-		if (vcpu->guest_debug &
-			(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
-			return false;
-		/* fall through */
+		return !(vcpu->guest_debug &
+			(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP));
 	case DE_VECTOR:
 	case OF_VECTOR:
 	case BR_VECTOR:
@@ -4616,7 +4722,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 	u32 vect_info;
 
 	vect_info = vmx->idt_vectoring_info;
-	intr_info = vmx->exit_intr_info;
+	intr_info = vmx_get_intr_info(vcpu);
 
 	if (is_machine_check(intr_info) || is_nmi(intr_info))
 		return 1; /* handled by handle_exception_nmi_irqoff() */
@@ -4660,9 +4766,9 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 	}
 
 	if (is_page_fault(intr_info)) {
-		cr2 = vmcs_readl(EXIT_QUALIFICATION);
+		cr2 = vmx_get_exit_qual(vcpu);
 		/* EPT won't cause page fault directly */
-		WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
+		WARN_ON_ONCE(!vcpu->arch.apf.host_apf_flags && enable_ept);
 		return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
 	}
 
@@ -4673,7 +4779,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 
 	switch (ex_no) {
 	case DB_VECTOR:
-		dr6 = vmcs_readl(EXIT_QUALIFICATION);
+		dr6 = vmx_get_exit_qual(vcpu);
 		if (!(vcpu->guest_debug &
 		      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
 			if (is_icebp(intr_info))
@@ -4740,7 +4846,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
 	int size, in, string;
 	unsigned port;
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	exit_qualification = vmx_get_exit_qual(vcpu);
 	string = (exit_qualification & 16) != 0;
 
 	++vcpu->stat.io_exits;
@@ -4831,7 +4937,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 	int err;
 	int ret;
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	exit_qualification = vmx_get_exit_qual(vcpu);
 	cr = exit_qualification & 15;
 	reg = (exit_qualification >> 8) & 15;
 	switch ((exit_qualification >> 4) & 3) {
@@ -4908,7 +5014,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 	unsigned long exit_qualification;
 	int dr, dr7, reg;
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	exit_qualification = vmx_get_exit_qual(vcpu);
 	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
 
 	/* First, if DR does not exist, trigger UD */
@@ -5010,7 +5116,7 @@ static int handle_invd(struct kvm_vcpu *vcpu)
 
 static int handle_invlpg(struct kvm_vcpu *vcpu)
 {
-	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
 
 	kvm_mmu_invlpg(vcpu, exit_qualification);
 	return kvm_skip_emulated_instruction(vcpu);
@@ -5042,7 +5148,7 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	if (likely(fasteoi)) {
-		unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+		unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
 		int access_type, offset;
 
 		access_type = exit_qualification & APIC_ACCESS_TYPE;
@@ -5063,7 +5169,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
 
 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
 {
-	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
 	int vector = exit_qualification & 0xff;
 
 	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
@@ -5073,7 +5179,7 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
 
 static int handle_apic_write(struct kvm_vcpu *vcpu)
 {
-	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
 	u32 offset = exit_qualification & 0xfff;
 
 	/* APIC-write VM exit is trap-like and thus no need to adjust IP */
@@ -5094,7 +5200,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
 	idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
 	type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	exit_qualification = vmx_get_exit_qual(vcpu);
 
 	reason = (u32)exit_qualification >> 30;
 	if (reason == TASK_SWITCH_GATE && idt_v) {
@@ -5144,7 +5250,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	gpa_t gpa;
 	u64 error_code;
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	exit_qualification = vmx_get_exit_qual(vcpu);
 
 	/*
 	 * EPT violation happened while executing iret from NMI,
@@ -5216,18 +5322,11 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 	bool intr_window_requested;
 	unsigned count = 130;
 
-	/*
-	 * We should never reach the point where we are emulating L2
-	 * due to invalid guest state as that means we incorrectly
-	 * allowed a nested VMEntry with an invalid vmcs12.
-	 */
-	WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
-
 	intr_window_requested = exec_controls_get(vmx) &
 				CPU_BASED_INTR_WINDOW_EXITING;
 
 	while (vmx->emulation_required && count-- != 0) {
-		if (intr_window_requested && vmx_interrupt_allowed(vcpu))
+		if (intr_window_requested && !vmx_interrupt_blocked(vcpu))
 			return handle_interrupt_window(&vmx->vcpu);
 
 		if (kvm_test_request(KVM_REQ_EVENT, vcpu))
@@ -5404,13 +5503,13 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
 	/* According to the Intel instruction reference, the memory operand
 	 * is read even if it isn't needed (e.g., for type==all)
 	 */
-	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
 				vmx_instruction_info, false,
 				sizeof(operand), &gva))
 		return 1;
 
 	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
-		kvm_inject_page_fault(vcpu, &e);
+		kvm_inject_emulated_page_fault(vcpu, &e);
 		return 1;
 	}
 
@@ -5439,11 +5538,11 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
 
 		if (kvm_get_active_pcid(vcpu) == operand.pcid) {
 			kvm_mmu_sync_roots(vcpu);
-			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+			kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 		}
 
 		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-			if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
+			if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
 			    == operand.pcid)
 				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
 
@@ -5480,7 +5579,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
 
 	trace_kvm_pml_full(vcpu->vcpu_id);
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	exit_qualification = vmx_get_exit_qual(vcpu);
 
 	/*
 	 * PML buffer FULL happened while executing iret from NMI,
@@ -5499,14 +5598,22 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-static int handle_preemption_timer(struct kvm_vcpu *vcpu)
+static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	if (!vmx->req_immediate_exit &&
-	    !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
+	    !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) {
 		kvm_lapic_expired_hv_timer(vcpu);
+		return EXIT_FASTPATH_REENTER_GUEST;
+	}
+
+	return EXIT_FASTPATH_NONE;
+}
 
+static int handle_preemption_timer(struct kvm_vcpu *vcpu)
+{
+	handle_fastpath_preemption_timer(vcpu);
 	return 1;
 }
 
@@ -5594,8 +5701,8 @@ static const int kvm_vmx_max_exit_handlers =
 
 static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 {
-	*info1 = vmcs_readl(EXIT_QUALIFICATION);
-	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
+	*info1 = vmx_get_exit_qual(vcpu);
+	*info2 = vmx_get_intr_info(vcpu);
 }
 
 static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
@@ -5677,7 +5784,6 @@ void dump_vmcs(void)
 	u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
 	unsigned long cr4;
 	u64 efer;
-	int i, n;
 
 	if (!dump_invalid_vmcs) {
 		pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
@@ -5814,14 +5920,6 @@ void dump_vmcs(void)
 		pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
 	if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
 		pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
-	n = vmcs_read32(CR3_TARGET_COUNT);
-	for (i = 0; i + 1 < n; i += 4)
-		pr_err("CR3 target%u=%016lx target%u=%016lx\n",
-		       i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
-		       i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
-	if (i < n)
-		pr_err("CR3 target%u=%016lx\n",
-		       i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
 	if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
 		pr_err("PLE Gap=%08x Window=%08x\n",
 		       vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
@@ -5834,15 +5932,12 @@ void dump_vmcs(void)
  * The guest has exited.  See if we can fix it or if we need userspace
  * assistance.
  */
-static int vmx_handle_exit(struct kvm_vcpu *vcpu,
-	enum exit_fastpath_completion exit_fastpath)
+static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exit_reason = vmx->exit_reason;
 	u32 vectoring_info = vmx->idt_vectoring_info;
 
-	trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
-
 	/*
 	 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
 	 * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
@@ -5853,6 +5948,14 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu,
 	if (enable_pml)
 		vmx_flush_pml_buffer(vcpu);
 
+	/*
+	 * We should never reach this point with a pending nested VM-Enter, and
+	 * more specifically emulation of L2 due to invalid guest state (see
+	 * below) should never happen as that means we incorrectly allowed a
+	 * nested VM-Enter with an invalid vmcs12.
+	 */
+	WARN_ON_ONCE(vmx->nested.nested_run_pending);
+
 	/* If guest state is invalid, start emulating */
 	if (vmx->emulation_required)
 		return handle_invalid_guest_state(vcpu);
@@ -5871,8 +5974,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu,
 		 */
 		nested_mark_vmcs12_pages_dirty(vcpu);
 
-		if (nested_vmx_exit_reflected(vcpu, exit_reason))
-			return nested_vmx_reflect_vmexit(vcpu, exit_reason);
+		if (nested_vmx_reflect_vmexit(vcpu))
+			return 1;
 	}
 
 	if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
@@ -5919,7 +6022,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu,
 
 	if (unlikely(!enable_vnmi &&
 		     vmx->loaded_vmcs->soft_vnmi_blocked)) {
-		if (vmx_interrupt_allowed(vcpu)) {
+		if (!vmx_interrupt_blocked(vcpu)) {
 			vmx->loaded_vmcs->soft_vnmi_blocked = 0;
 		} else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
 			   vcpu->arch.nmi_pending) {
@@ -5936,10 +6039,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu,
 		}
 	}
 
-	if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) {
-		kvm_skip_emulated_instruction(vcpu);
+	if (exit_fastpath != EXIT_FASTPATH_NONE)
 		return 1;
-	}
 
 	if (exit_reason >= kvm_vmx_max_exit_handlers)
 		goto unexpected_vmexit;
@@ -6093,7 +6194,15 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 		if (flexpriority_enabled) {
 			sec_exec_control |=
 				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-			vmx_flush_tlb(vcpu, true);
+			kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+
+			/*
+			 * Flush the TLB, reloading the APIC access page will
+			 * only do so if its physical address has changed, but
+			 * the guest may have inserted a non-APIC mapping into
+			 * the TLB while the APIC access page was disabled.
+			 */
+			kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 		}
 		break;
 	case LAPIC_MODE_X2APIC:
@@ -6107,12 +6216,32 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 	vmx_update_msr_bitmap(vcpu);
 }
 
-static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
+static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
 {
-	if (!is_guest_mode(vcpu)) {
-		vmcs_write64(APIC_ACCESS_ADDR, hpa);
-		vmx_flush_tlb(vcpu, true);
+	struct page *page;
+
+	/* Defer reload until vmcs01 is the current VMCS. */
+	if (is_guest_mode(vcpu)) {
+		to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true;
+		return;
 	}
+
+	if (!(secondary_exec_controls_get(to_vmx(vcpu)) &
+	    SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+		return;
+
+	page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+	if (is_error_page(page))
+		return;
+
+	vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
+	vmx_flush_tlb_current(vcpu);
+
+	/*
+	 * Do not pin apic access page in memory, the MMU notifier
+	 * will call us again if it is migrated or swapped out.
+	 */
+	put_page(page);
 }
 
 static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
@@ -6230,16 +6359,16 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 
 static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 {
-	vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+	u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
 
 	/* if exit due to PF check for async PF */
-	if (is_page_fault(vmx->exit_intr_info)) {
-		vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
+	if (is_page_fault(intr_info)) {
+		vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
 	/* Handle machine checks before interrupts are enabled */
-	} else if (is_machine_check(vmx->exit_intr_info)) {
+	} else if (is_machine_check(intr_info)) {
 		kvm_machine_check();
 	/* We need to handle NMIs before interrupts are enabled */
-	} else if (is_nmi(vmx->exit_intr_info)) {
+	} else if (is_nmi(intr_info)) {
 		kvm_before_interrupt(&vmx->vcpu);
 		asm("int $2");
 		kvm_after_interrupt(&vmx->vcpu);
@@ -6254,9 +6383,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 	unsigned long tmp;
 #endif
 	gate_desc *desc;
-	u32 intr_info;
+	u32 intr_info = vmx_get_intr_info(vcpu);
 
-	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 	if (WARN_ONCE(!is_external_intr(intr_info),
 	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
 		return;
@@ -6269,13 +6397,13 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 
 	asm volatile(
 #ifdef CONFIG_X86_64
-		"mov %%" _ASM_SP ", %[sp]\n\t"
-		"and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
-		"push $%c[ss]\n\t"
+		"mov %%rsp, %[sp]\n\t"
+		"and $-16, %%rsp\n\t"
+		"push %[ss]\n\t"
 		"push %[sp]\n\t"
 #endif
 		"pushf\n\t"
-		__ASM_SIZE(push) " $%c[cs]\n\t"
+		"push %[cs]\n\t"
 		CALL_NOSPEC
 		:
 #ifdef CONFIG_X86_64
@@ -6284,7 +6412,9 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 		ASM_CALL_CONSTRAINT
 		:
 		[thunk_target]"r"(entry),
+#ifdef CONFIG_X86_64
 		[ss]"i"(__KERNEL_DS),
+#endif
 		[cs]"i"(__KERNEL_CS)
 	);
 
@@ -6292,8 +6422,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 }
 STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
 
-static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
-	enum exit_fastpath_completion *exit_fastpath)
+static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -6301,12 +6430,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
 		handle_external_interrupt_irqoff(vcpu);
 	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
 		handle_exception_nmi_irqoff(vmx);
-	else if (!is_guest_mode(vcpu) &&
-		vmx->exit_reason == EXIT_REASON_MSR_WRITE)
-		*exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
 }
 
-static bool vmx_has_emulated_msr(int index)
+static bool vmx_has_emulated_msr(u32 index)
 {
 	switch (index) {
 	case MSR_IA32_SMBASE:
@@ -6337,11 +6463,8 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 	if (enable_vnmi) {
 		if (vmx->loaded_vmcs->nmi_known_unmasked)
 			return;
-		/*
-		 * Can't use vmx->exit_intr_info since we're not sure what
-		 * the exit reason is.
-		 */
-		exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+		exit_intr_info = vmx_get_intr_info(&vmx->vcpu);
 		unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
 		vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
 		/*
@@ -6508,13 +6631,27 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
 	}
 }
 
+static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+{
+	switch (to_vmx(vcpu)->exit_reason) {
+	case EXIT_REASON_MSR_WRITE:
+		return handle_fastpath_set_msr_irqoff(vcpu);
+	case EXIT_REASON_PREEMPTION_TIMER:
+		return handle_fastpath_preemption_timer(vcpu);
+	default:
+		return EXIT_FASTPATH_NONE;
+	}
+}
+
 bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
 
-static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
+	fastpath_t exit_fastpath;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long cr3, cr4;
 
+reenter_guest:
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!enable_vnmi &&
 		     vmx->loaded_vmcs->soft_vnmi_blocked))
@@ -6523,7 +6660,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	/* Don't enter VMX if guest state is invalid, let the exit handler
 	   start emulation until we arrive back to a valid state */
 	if (vmx->emulation_required)
-		return;
+		return EXIT_FASTPATH_NONE;
 
 	if (vmx->ple_window_dirty) {
 		vmx->ple_window_dirty = false;
@@ -6643,12 +6780,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	loadsegment(es, __USER_DS);
 #endif
 
-	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
-				  | (1 << VCPU_EXREG_RFLAGS)
-				  | (1 << VCPU_EXREG_PDPTR)
-				  | (1 << VCPU_EXREG_SEGMENTS)
-				  | (1 << VCPU_EXREG_CR3));
-	vcpu->arch.regs_dirty = 0;
+	vmx_register_cache_reset(vcpu);
 
 	pt_guest_exit(vmx);
 
@@ -6657,18 +6789,45 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->nested.nested_run_pending = 0;
 	vmx->idt_vectoring_info = 0;
 
-	vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
-	if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
+	if (unlikely(vmx->fail)) {
+		vmx->exit_reason = 0xdead;
+		return EXIT_FASTPATH_NONE;
+	}
+
+	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
+	if (unlikely((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY))
 		kvm_machine_check();
 
-	if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
-		return;
+	trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
+
+	if (unlikely(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+		return EXIT_FASTPATH_NONE;
 
 	vmx->loaded_vmcs->launched = 1;
 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
 	vmx_recover_nmi_blocking(vmx);
 	vmx_complete_interrupts(vmx);
+
+	if (is_guest_mode(vcpu))
+		return EXIT_FASTPATH_NONE;
+
+	exit_fastpath = vmx_exit_handlers_fastpath(vcpu);
+	if (exit_fastpath == EXIT_FASTPATH_REENTER_GUEST) {
+		if (!kvm_vcpu_exit_request(vcpu)) {
+			/*
+			 * FIXME: this goto should be a loop in vcpu_enter_guest,
+			 * but it would incur the cost of a retpoline for now.
+			 * Revisit once static calls are available.
+			 */
+			if (vcpu->arch.apicv_active)
+				vmx_sync_pir_to_irr(vcpu);
+			goto reenter_guest;
+		}
+		exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
+	}
+
+	return exit_fastpath;
 }
 
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
@@ -7138,6 +7297,9 @@ static __init void vmx_set_cpu_caps(void)
 	/* CPUID 0x80000001 */
 	if (!cpu_has_vmx_rdtscp())
 		kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+
+	if (vmx_waitpkg_supported())
+		kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
 }
 
 static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -7253,10 +7415,6 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
 	u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
 	struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
 
-	if (kvm_mwait_in_guest(vcpu->kvm) ||
-		kvm_can_post_timer_interrupt(vcpu))
-		return -EOPNOTSUPP;
-
 	vmx = to_vmx(vcpu);
 	tscl = rdtsc();
 	guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
@@ -7599,12 +7757,12 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
 			~FEAT_CTL_LMCE_ENABLED;
 }
 
-static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
+static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 {
 	/* we need a nested vmexit to enter SMM, postpone if run is pending */
 	if (to_vmx(vcpu)->nested.nested_run_pending)
-		return 0;
-	return 1;
+		return -EBUSY;
+	return !is_smm(vcpu);
 }
 
 static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
@@ -7641,9 +7799,9 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 	return 0;
 }
 
-static int enable_smi_window(struct kvm_vcpu *vcpu)
+static void enable_smi_window(struct kvm_vcpu *vcpu)
 {
-	return 0;
+	/* RSM will cause a vmexit anyway.  */
 }
 
 static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
@@ -7656,6 +7814,16 @@ static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
 	return to_vmx(vcpu)->nested.vmxon;
 }
 
+static void vmx_migrate_timers(struct kvm_vcpu *vcpu)
+{
+	if (is_guest_mode(vcpu)) {
+		struct hrtimer *timer = &to_vmx(vcpu)->nested.preemption_timer;
+
+		if (hrtimer_try_to_cancel(timer) == 1)
+			hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+	}
+}
+
 static void hardware_unsetup(void)
 {
 	if (nested)
@@ -7700,8 +7868,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.set_segment = vmx_set_segment,
 	.get_cpl = vmx_get_cpl,
 	.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
-	.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
-	.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
 	.set_cr0 = vmx_set_cr0,
 	.set_cr4 = vmx_set_cr4,
 	.set_efer = vmx_set_efer,
@@ -7715,8 +7881,10 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.get_rflags = vmx_get_rflags,
 	.set_rflags = vmx_set_rflags,
 
-	.tlb_flush = vmx_flush_tlb,
+	.tlb_flush_all = vmx_flush_tlb_all,
+	.tlb_flush_current = vmx_flush_tlb_current,
 	.tlb_flush_gva = vmx_flush_tlb_gva,
+	.tlb_flush_guest = vmx_flush_tlb_guest,
 
 	.run = vmx_vcpu_run,
 	.handle_exit = vmx_handle_exit,
@@ -7751,7 +7919,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.set_identity_map_addr = vmx_set_identity_map_addr,
-	.get_tdp_level = get_ept_level,
+	.get_tdp_level = vmx_get_tdp_level,
 	.get_mt_mask = vmx_get_mt_mask,
 
 	.get_exit_info = vmx_get_exit_info,
@@ -7760,7 +7928,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
 	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
 
-	.read_l1_tsc_offset = vmx_read_l1_tsc_offset,
 	.write_l1_tsc_offset = vmx_write_l1_tsc_offset,
 
 	.load_mmu_pgd = vmx_load_mmu_pgd,
@@ -7782,6 +7949,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.post_block = vmx_post_block,
 
 	.pmu_ops = &intel_pmu_ops,
+	.nested_ops = &vmx_nested_ops,
 
 	.update_pi_irte = vmx_update_pi_irte,
 
@@ -7797,14 +7965,9 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.pre_leave_smm = vmx_pre_leave_smm,
 	.enable_smi_window = enable_smi_window,
 
-	.check_nested_events = NULL,
-	.get_nested_state = NULL,
-	.set_nested_state = NULL,
-	.get_vmcs12_pages = NULL,
-	.nested_enable_evmcs = NULL,
-	.nested_get_evmcs_version = NULL,
 	.need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
 	.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
+	.migrate_timers = vmx_migrate_timers,
 };
 
 static __init int hardware_setup(void)
@@ -7903,11 +8066,11 @@ static __init int hardware_setup(void)
 	if (!enable_ept)
 		ept_lpage_level = 0;
 	else if (cpu_has_vmx_ept_1g_page())
-		ept_lpage_level = PT_PDPE_LEVEL;
+		ept_lpage_level = PG_LEVEL_1G;
 	else if (cpu_has_vmx_ept_2m_page())
-		ept_lpage_level = PT_DIRECTORY_LEVEL;
+		ept_lpage_level = PG_LEVEL_2M;
 	else
-		ept_lpage_level = PT_PAGE_TABLE_LEVEL;
+		ept_lpage_level = PG_LEVEL_4K;
 	kvm_configure_mmu(enable_ept, ept_lpage_level);
 
 	/*
@@ -7967,8 +8130,7 @@ static __init int hardware_setup(void)
 		nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
 					   vmx_capability.ept);
 
-		r = nested_vmx_hardware_setup(&vmx_x86_ops,
-					      kvm_vmx_exit_handlers);
+		r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
 		if (r)
 			return r;
 	}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index aab9df55336e..672c28f17e49 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -8,6 +8,7 @@
 #include <asm/intel_pt.h>
 
 #include "capabilities.h"
+#include "kvm_cache_regs.h"
 #include "ops.h"
 #include "vmcs.h"
 
@@ -136,6 +137,7 @@ struct nested_vmx {
 	bool vmcs02_initialized;
 
 	bool change_vmcs01_virtual_apic_mode;
+	bool reload_vmcs01_apic_access_page;
 
 	/*
 	 * Enlightened VMCS has been enabled. It does not mean that L1 has to
@@ -167,6 +169,8 @@ struct nested_vmx {
 	u16 posted_intr_nv;
 
 	struct hrtimer preemption_timer;
+	u64 preemption_timer_deadline;
+	bool has_preemption_timer_deadline;
 	bool preemption_timer_expired;
 
 	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
@@ -208,6 +212,7 @@ struct vcpu_vmx {
 	 */
 	bool		      guest_state_loaded;
 
+	unsigned long         exit_qualification;
 	u32                   exit_intr_info;
 	u32                   idt_vectoring_info;
 	ulong                 rflags;
@@ -317,8 +322,8 @@ struct kvm_vmx {
 };
 
 bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
-void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
-void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
+			struct loaded_vmcs *buddy);
 int allocate_vpid(void);
 void free_vpid(int vpid);
 void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
@@ -341,6 +346,8 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
 void update_exception_bitmap(struct kvm_vcpu *vcpu);
 void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
+bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
 bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
 void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
 void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
@@ -441,9 +448,18 @@ BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL)
 BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL)
 BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL)
 
-static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
+static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu)
 {
-	vmx->segment_cache.bitmask = 0;
+	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
+				  | (1 << VCPU_EXREG_RFLAGS)
+				  | (1 << VCPU_EXREG_PDPTR)
+				  | (1 << VCPU_EXREG_SEGMENTS)
+				  | (1 << VCPU_EXREG_CR0)
+				  | (1 << VCPU_EXREG_CR3)
+				  | (1 << VCPU_EXREG_CR4)
+				  | (1 << VCPU_EXREG_EXIT_INFO_1)
+				  | (1 << VCPU_EXREG_EXIT_INFO_2));
+	vcpu->arch.regs_dirty = 0;
 }
 
 static inline u32 vmx_vmentry_ctrl(void)
@@ -486,6 +502,28 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
 	return &(to_vmx(vcpu)->pi_desc);
 }
 
+static inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_EXIT_INFO_1)) {
+		kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1);
+		vmx->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	}
+	return vmx->exit_qualification;
+}
+
+static inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_EXIT_INFO_2)) {
+		kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2);
+		vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+	}
+	return vmx->exit_intr_info;
+}
+
 struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags);
 void free_vmcs(struct vmcs *vmcs);
 int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
@@ -500,24 +538,6 @@ static inline struct vmcs *alloc_vmcs(bool shadow)
 
 u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
 
-static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
-				bool invalidate_gpa)
-{
-	if (enable_ept && (invalidate_gpa || !enable_vpid)) {
-		if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
-			return;
-		ept_sync_context(construct_eptp(vcpu,
-						vcpu->arch.mmu->root_hpa));
-	} else {
-		vpid_sync_context(vpid);
-	}
-}
-
-static inline void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
-{
-	__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
-}
-
 static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
 {
 	vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c17e6eb9ad43..9e41b5135340 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -18,6 +18,7 @@
 
 #include <linux/kvm_host.h>
 #include "irq.h"
+#include "ioapic.h"
 #include "mmu.h"
 #include "i8254.h"
 #include "tss.h"
@@ -97,9 +98,6 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 
 static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
 
-#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
-#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
-
 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
                                     KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
 
@@ -194,45 +192,46 @@ u64 __read_mostly supported_xss;
 EXPORT_SYMBOL_GPL(supported_xss);
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ "pf_fixed", VCPU_STAT(pf_fixed) },
-	{ "pf_guest", VCPU_STAT(pf_guest) },
-	{ "tlb_flush", VCPU_STAT(tlb_flush) },
-	{ "invlpg", VCPU_STAT(invlpg) },
-	{ "exits", VCPU_STAT(exits) },
-	{ "io_exits", VCPU_STAT(io_exits) },
-	{ "mmio_exits", VCPU_STAT(mmio_exits) },
-	{ "signal_exits", VCPU_STAT(signal_exits) },
-	{ "irq_window", VCPU_STAT(irq_window_exits) },
-	{ "nmi_window", VCPU_STAT(nmi_window_exits) },
-	{ "halt_exits", VCPU_STAT(halt_exits) },
-	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
-	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
-	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
-	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
-	{ "hypercalls", VCPU_STAT(hypercalls) },
-	{ "request_irq", VCPU_STAT(request_irq_exits) },
-	{ "irq_exits", VCPU_STAT(irq_exits) },
-	{ "host_state_reload", VCPU_STAT(host_state_reload) },
-	{ "fpu_reload", VCPU_STAT(fpu_reload) },
-	{ "insn_emulation", VCPU_STAT(insn_emulation) },
-	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
-	{ "irq_injections", VCPU_STAT(irq_injections) },
-	{ "nmi_injections", VCPU_STAT(nmi_injections) },
-	{ "req_event", VCPU_STAT(req_event) },
-	{ "l1d_flush", VCPU_STAT(l1d_flush) },
-	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
-	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
-	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
-	{ "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
-	{ "mmu_flooded", VM_STAT(mmu_flooded) },
-	{ "mmu_recycled", VM_STAT(mmu_recycled) },
-	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
-	{ "mmu_unsync", VM_STAT(mmu_unsync) },
-	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
-	{ "largepages", VM_STAT(lpages, .mode = 0444) },
-	{ "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
-	{ "max_mmu_page_hash_collisions",
-		VM_STAT(max_mmu_page_hash_collisions) },
+	VCPU_STAT("pf_fixed", pf_fixed),
+	VCPU_STAT("pf_guest", pf_guest),
+	VCPU_STAT("tlb_flush", tlb_flush),
+	VCPU_STAT("invlpg", invlpg),
+	VCPU_STAT("exits", exits),
+	VCPU_STAT("io_exits", io_exits),
+	VCPU_STAT("mmio_exits", mmio_exits),
+	VCPU_STAT("signal_exits", signal_exits),
+	VCPU_STAT("irq_window", irq_window_exits),
+	VCPU_STAT("nmi_window", nmi_window_exits),
+	VCPU_STAT("halt_exits", halt_exits),
+	VCPU_STAT("halt_successful_poll", halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("hypercalls", hypercalls),
+	VCPU_STAT("request_irq", request_irq_exits),
+	VCPU_STAT("irq_exits", irq_exits),
+	VCPU_STAT("host_state_reload", host_state_reload),
+	VCPU_STAT("fpu_reload", fpu_reload),
+	VCPU_STAT("insn_emulation", insn_emulation),
+	VCPU_STAT("insn_emulation_fail", insn_emulation_fail),
+	VCPU_STAT("irq_injections", irq_injections),
+	VCPU_STAT("nmi_injections", nmi_injections),
+	VCPU_STAT("req_event", req_event),
+	VCPU_STAT("l1d_flush", l1d_flush),
+	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+	VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
+	VM_STAT("mmu_pte_write", mmu_pte_write),
+	VM_STAT("mmu_pte_updated", mmu_pte_updated),
+	VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
+	VM_STAT("mmu_flooded", mmu_flooded),
+	VM_STAT("mmu_recycled", mmu_recycled),
+	VM_STAT("mmu_cache_miss", mmu_cache_miss),
+	VM_STAT("mmu_unsync", mmu_unsync),
+	VM_STAT("remote_tlb_flush", remote_tlb_flush),
+	VM_STAT("largepages", lpages, .mode = 0444),
+	VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
+	VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
 	{ NULL }
 };
 
@@ -261,7 +260,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
 {
 	int i;
-	for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
+	for (i = 0; i < ASYNC_PF_PER_VCPU; i++)
 		vcpu->arch.apf.gfns[i] = ~0;
 }
 
@@ -612,15 +611,28 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 }
 EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
 
-static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+				    struct x86_exception *fault)
 {
-	if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
-		vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
-	else
-		vcpu->arch.mmu->inject_page_fault(vcpu, fault);
+	struct kvm_mmu *fault_mmu;
+	WARN_ON_ONCE(fault->vector != PF_VECTOR);
+
+	fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
+					       vcpu->arch.walk_mmu;
+
+	/*
+	 * Invalidate the TLB entry for the faulting address, if it exists,
+	 * else the access will fault indefinitely (and to emulate hardware).
+	 */
+	if ((fault->error_code & PFERR_PRESENT_MASK) &&
+	    !(fault->error_code & PFERR_RSVD_MASK))
+		kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
+				       fault_mmu->root_hpa);
 
+	fault_mmu->inject_page_fault(vcpu, fault);
 	return fault->nested_page_fault;
 }
+EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
 {
@@ -1008,7 +1020,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 	if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
 		if (!skip_tlb_flush) {
 			kvm_mmu_sync_roots(vcpu);
-			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+			kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 		}
 		return 0;
 	}
@@ -1020,7 +1032,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
 		return 1;
 
-	kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
+	kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
@@ -1060,7 +1072,7 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void kvm_update_dr7(struct kvm_vcpu *vcpu)
+void kvm_update_dr7(struct kvm_vcpu *vcpu)
 {
 	unsigned long dr7;
 
@@ -1073,6 +1085,7 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu)
 	if (dr7 & DR7_BP_EN_MASK)
 		vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
 }
+EXPORT_SYMBOL_GPL(kvm_update_dr7);
 
 static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
 {
@@ -1233,13 +1246,18 @@ static const u32 emulated_msrs_all[] = {
 	HV_X64_MSR_VP_ASSIST_PAGE,
 	HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
 	HV_X64_MSR_TSC_EMULATION_STATUS,
+	HV_X64_MSR_SYNDBG_OPTIONS,
+	HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
+	HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
+	HV_X64_MSR_SYNDBG_PENDING_BUFFER,
 
 	MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
-	MSR_KVM_PV_EOI_EN,
+	MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
 
 	MSR_IA32_TSC_ADJUST,
 	MSR_IA32_TSCDEADLINE,
 	MSR_IA32_ARCH_CAPABILITIES,
+	MSR_IA32_PERF_CAPABILITIES,
 	MSR_IA32_MISC_ENABLE,
 	MSR_IA32_MCG_STATUS,
 	MSR_IA32_MCG_CTL,
@@ -1306,6 +1324,7 @@ static const u32 msr_based_features_all[] = {
 	MSR_F10H_DECFG,
 	MSR_IA32_UCODE_REV,
 	MSR_IA32_ARCH_CAPABILITIES,
+	MSR_IA32_PERF_CAPABILITIES,
 };
 
 static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
@@ -1564,6 +1583,13 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
 
+bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
+{
+	return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
+		need_resched() || signal_pending(current);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request);
+
 /*
  * The fast path for frequent and performance sensitive wrmsr emulation,
  * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
@@ -1592,27 +1618,44 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data
 	return 1;
 }
 
-enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
+static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
+{
+	if (!kvm_can_use_hv_timer(vcpu))
+		return 1;
+
+	kvm_set_lapic_tscdeadline_msr(vcpu, data);
+	return 0;
+}
+
+fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
 {
 	u32 msr = kvm_rcx_read(vcpu);
 	u64 data;
-	int ret = 0;
+	fastpath_t ret = EXIT_FASTPATH_NONE;
 
 	switch (msr) {
 	case APIC_BASE_MSR + (APIC_ICR >> 4):
 		data = kvm_read_edx_eax(vcpu);
-		ret = handle_fastpath_set_x2apic_icr_irqoff(vcpu, data);
+		if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
+			kvm_skip_emulated_instruction(vcpu);
+			ret = EXIT_FASTPATH_EXIT_HANDLED;
+		}
+		break;
+	case MSR_IA32_TSCDEADLINE:
+		data = kvm_read_edx_eax(vcpu);
+		if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
+			kvm_skip_emulated_instruction(vcpu);
+			ret = EXIT_FASTPATH_REENTER_GUEST;
+		}
 		break;
 	default:
-		return EXIT_FASTPATH_NONE;
+		break;
 	}
 
-	if (!ret) {
+	if (ret != EXIT_FASTPATH_NONE)
 		trace_kvm_msr_write(msr, data);
-		return EXIT_FASTPATH_SKIP_EMUL_INS;
-	}
 
-	return EXIT_FASTPATH_NONE;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
 
@@ -1901,7 +1944,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
 
 static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
 {
-	u64 curr_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
+	u64 curr_offset = vcpu->arch.l1_tsc_offset;
 	vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
 }
 
@@ -1943,14 +1986,13 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 {
-	u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
-
-	return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
+	return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
 }
 EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
 
 static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
+	vcpu->arch.l1_tsc_offset = offset;
 	vcpu->arch.tsc_offset = kvm_x86_ops.write_l1_tsc_offset(vcpu, offset);
 }
 
@@ -2075,7 +2117,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
 					   s64 adjustment)
 {
-	u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu);
+	u64 tsc_offset = vcpu->arch.l1_tsc_offset;
 	kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
 }
 
@@ -2637,29 +2679,54 @@ out:
 	return r;
 }
 
+static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
+{
+	u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
+
+	return (vcpu->arch.apf.msr_en_val & mask) == mask;
+}
+
 static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 {
 	gpa_t gpa = data & ~0x3f;
 
-	/* Bits 3:5 are reserved, Should be zero */
-	if (data & 0x38)
+	/* Bits 4:5 are reserved, Should be zero */
+	if (data & 0x30)
 		return 1;
 
-	vcpu->arch.apf.msr_val = data;
+	vcpu->arch.apf.msr_en_val = data;
 
-	if (!(data & KVM_ASYNC_PF_ENABLED)) {
+	if (!kvm_pv_async_pf_enabled(vcpu)) {
 		kvm_clear_async_pf_completion_queue(vcpu);
 		kvm_async_pf_hash_reset(vcpu);
 		return 0;
 	}
 
 	if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
-					sizeof(u32)))
+					sizeof(u64)))
 		return 1;
 
 	vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
 	vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+
 	kvm_async_pf_wakeup_all(vcpu);
+
+	return 0;
+}
+
+static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
+{
+	/* Bits 8-63 are reserved */
+	if (data >> 8)
+		return 1;
+
+	if (!lapic_in_kernel(vcpu))
+		return 1;
+
+	vcpu->arch.apf.msr_int_val = data;
+
+	vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK;
+
 	return 0;
 }
 
@@ -2669,10 +2736,16 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.time = 0;
 }
 
-static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
+{
+	++vcpu->stat.tlb_flush;
+	kvm_x86_ops.tlb_flush_all(vcpu);
+}
+
+static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.tlb_flush;
-	kvm_x86_ops.tlb_flush(vcpu, invalidate_gpa);
+	kvm_x86_ops.tlb_flush_guest(vcpu);
 }
 
 static void record_steal_time(struct kvm_vcpu *vcpu)
@@ -2698,7 +2771,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
 		st->preempted & KVM_VCPU_FLUSH_TLB);
 	if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
-		kvm_vcpu_flush_tlb(vcpu, false);
+		kvm_vcpu_flush_tlb_guest(vcpu);
 
 	vcpu->arch.st.preempted = 0;
 
@@ -2875,6 +2948,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (kvm_pv_enable_async_pf(vcpu, data))
 			return 1;
 		break;
+	case MSR_KVM_ASYNC_PF_INT:
+		if (kvm_pv_enable_async_pf_int(vcpu, data))
+			return 1;
+		break;
+	case MSR_KVM_ASYNC_PF_ACK:
+		if (data & 0x1) {
+			vcpu->arch.apf.pageready_pending = false;
+			kvm_check_async_pf_completion(vcpu);
+		}
+		break;
 	case MSR_KVM_STEAL_TIME:
 
 		if (unlikely(!sched_info_on()))
@@ -2932,6 +3015,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 */
 		break;
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
+	case HV_X64_MSR_SYNDBG_OPTIONS:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_CRASH_CTL:
 	case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
@@ -3071,7 +3156,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
 	case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
 		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
-			return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
+			return kvm_pmu_get_msr(vcpu, msr_info);
 		msr_info->data = 0;
 		break;
 	case MSR_IA32_UCODE_REV:
@@ -3149,7 +3234,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = vcpu->arch.time;
 		break;
 	case MSR_KVM_ASYNC_PF_EN:
-		msr_info->data = vcpu->arch.apf.msr_val;
+		msr_info->data = vcpu->arch.apf.msr_en_val;
+		break;
+	case MSR_KVM_ASYNC_PF_INT:
+		msr_info->data = vcpu->arch.apf.msr_int_val;
+		break;
+	case MSR_KVM_ASYNC_PF_ACK:
+		msr_info->data = 0;
 		break;
 	case MSR_KVM_STEAL_TIME:
 		msr_info->data = vcpu->arch.st.msr_val;
@@ -3187,6 +3278,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = 0x20000000;
 		break;
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
+	case HV_X64_MSR_SYNDBG_OPTIONS:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_CRASH_CTL:
 	case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
@@ -3233,7 +3326,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	default:
 		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
-			return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
+			return kvm_pmu_get_msr(vcpu, msr_info);
 		if (!ignore_msrs) {
 			vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
 					       msr_info->index);
@@ -3363,6 +3456,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 	case KVM_CAP_XSAVE:
 	case KVM_CAP_ASYNC_PF:
+	case KVM_CAP_ASYNC_PF_INT:
 	case KVM_CAP_GET_TSC_KHZ:
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
@@ -3431,14 +3525,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_X2APIC_API_VALID_FLAGS;
 		break;
 	case KVM_CAP_NESTED_STATE:
-		r = kvm_x86_ops.get_nested_state ?
-			kvm_x86_ops.get_nested_state(NULL, NULL, 0) : 0;
+		r = kvm_x86_ops.nested_ops->get_state ?
+			kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
 		break;
 	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
 		r = kvm_x86_ops.enable_direct_tlbflush != NULL;
 		break;
 	case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
-		r = kvm_x86_ops.nested_enable_evmcs != NULL;
+		r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
 		break;
 	default:
 		break;
@@ -4226,9 +4320,9 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		return kvm_hv_activate_synic(vcpu, cap->cap ==
 					     KVM_CAP_HYPERV_SYNIC2);
 	case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
-		if (!kvm_x86_ops.nested_enable_evmcs)
+		if (!kvm_x86_ops.nested_ops->enable_evmcs)
 			return -ENOTTY;
-		r = kvm_x86_ops.nested_enable_evmcs(vcpu, &vmcs_version);
+		r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
 		if (!r) {
 			user_ptr = (void __user *)(uintptr_t)cap->args[0];
 			if (copy_to_user(user_ptr, &vmcs_version,
@@ -4543,7 +4637,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		u32 user_data_size;
 
 		r = -EINVAL;
-		if (!kvm_x86_ops.get_nested_state)
+		if (!kvm_x86_ops.nested_ops->get_state)
 			break;
 
 		BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
@@ -4551,8 +4645,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (get_user(user_data_size, &user_kvm_nested_state->size))
 			break;
 
-		r = kvm_x86_ops.get_nested_state(vcpu, user_kvm_nested_state,
-						  user_data_size);
+		r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state,
+						     user_data_size);
 		if (r < 0)
 			break;
 
@@ -4573,7 +4667,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		int idx;
 
 		r = -EINVAL;
-		if (!kvm_x86_ops.set_nested_state)
+		if (!kvm_x86_ops.nested_ops->set_state)
 			break;
 
 		r = -EFAULT;
@@ -4586,7 +4680,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
 		if (kvm_state.flags &
 		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
-		      | KVM_STATE_NESTED_EVMCS))
+		      | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING
+		      | KVM_STATE_NESTED_GIF_SET))
 			break;
 
 		/* nested_run_pending implies guest_mode.  */
@@ -4595,7 +4690,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			break;
 
 		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		r = kvm_x86_ops.set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+		r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state);
 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
 	}
@@ -5242,6 +5337,10 @@ static void kvm_init_msr_list(void)
 			if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
 				continue;
 			break;
+		case MSR_IA32_UMWAIT_CONTROL:
+			if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
+				continue;
+			break;
 		case MSR_IA32_RTIT_CTL:
 		case MSR_IA32_RTIT_STATUS:
 			if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
@@ -5259,7 +5358,7 @@ static void kvm_init_msr_list(void)
 				 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
 				continue;
 			break;
-		case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
+		case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
 			if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
 				msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
 				intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
@@ -5274,7 +5373,7 @@ static void kvm_init_msr_list(void)
 			if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
 			    min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
 				continue;
-		}
+			break;
 		default:
 			break;
 		}
@@ -6403,7 +6502,7 @@ static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
 {
 	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
 	if (ctxt->exception.vector == PF_VECTOR)
-		return kvm_propagate_fault(vcpu, &ctxt->exception);
+		return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
 
 	if (ctxt->exception.error_code_valid)
 		kvm_queue_exception_e(vcpu, ctxt->exception.vector,
@@ -7669,14 +7768,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 	kvm_x86_ops.update_cr8_intercept(vcpu, tpr, max_irr);
 }
 
-static int inject_pending_event(struct kvm_vcpu *vcpu)
+static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
 {
 	int r;
+	bool can_inject = true;
 
 	/* try to reinject previous events if any */
 
-	if (vcpu->arch.exception.injected)
+	if (vcpu->arch.exception.injected) {
 		kvm_x86_ops.queue_exception(vcpu);
+		can_inject = false;
+	}
 	/*
 	 * Do not inject an NMI or interrupt if there is a pending
 	 * exception.  Exceptions and interrupts are recognized at
@@ -7692,22 +7794,28 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
 	 * fully complete the previous instruction.
 	 */
 	else if (!vcpu->arch.exception.pending) {
-		if (vcpu->arch.nmi_injected)
+		if (vcpu->arch.nmi_injected) {
 			kvm_x86_ops.set_nmi(vcpu);
-		else if (vcpu->arch.interrupt.injected)
+			can_inject = false;
+		} else if (vcpu->arch.interrupt.injected) {
 			kvm_x86_ops.set_irq(vcpu);
+			can_inject = false;
+		}
 	}
 
+	WARN_ON_ONCE(vcpu->arch.exception.injected &&
+		     vcpu->arch.exception.pending);
+
 	/*
 	 * Call check_nested_events() even if we reinjected a previous event
 	 * in order for caller to determine if it should require immediate-exit
 	 * from L2 to L1 due to pending L1 events which require exit
 	 * from L2 to L1.
 	 */
-	if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
-		r = kvm_x86_ops.check_nested_events(vcpu);
-		if (r != 0)
-			return r;
+	if (is_guest_mode(vcpu)) {
+		r = kvm_x86_ops.nested_ops->check_events(vcpu);
+		if (r < 0)
+			goto busy;
 	}
 
 	/* try to inject new event if pending */
@@ -7716,7 +7824,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
 					vcpu->arch.exception.has_error_code,
 					vcpu->arch.exception.error_code);
 
-		WARN_ON_ONCE(vcpu->arch.exception.injected);
 		vcpu->arch.exception.pending = false;
 		vcpu->arch.exception.injected = true;
 
@@ -7725,16 +7832,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
 					     X86_EFLAGS_RF);
 
 		if (vcpu->arch.exception.nr == DB_VECTOR) {
-			/*
-			 * This code assumes that nSVM doesn't use
-			 * check_nested_events(). If it does, the
-			 * DR6/DR7 changes should happen before L1
-			 * gets a #VMEXIT for an intercepted #DB in
-			 * L2.  (Under VMX, on the other hand, the
-			 * DR6/DR7 changes should not happen in the
-			 * event of a VM-exit to L1 for an intercepted
-			 * #DB in L2.)
-			 */
 			kvm_deliver_exception_payload(vcpu);
 			if (vcpu->arch.dr7 & DR7_GD) {
 				vcpu->arch.dr7 &= ~DR7_GD;
@@ -7743,42 +7840,72 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
 		}
 
 		kvm_x86_ops.queue_exception(vcpu);
+		can_inject = false;
 	}
 
-	/* Don't consider new event if we re-injected an event */
-	if (kvm_event_needs_reinjection(vcpu))
-		return 0;
+	/*
+	 * Finally, inject interrupt events.  If an event cannot be injected
+	 * due to architectural conditions (e.g. IF=0) a window-open exit
+	 * will re-request KVM_REQ_EVENT.  Sometimes however an event is pending
+	 * and can architecturally be injected, but we cannot do it right now:
+	 * an interrupt could have arrived just now and we have to inject it
+	 * as a vmexit, or there could already an event in the queue, which is
+	 * indicated by can_inject.  In that case we request an immediate exit
+	 * in order to make progress and get back here for another iteration.
+	 * The kvm_x86_ops hooks communicate this by returning -EBUSY.
+	 */
+	if (vcpu->arch.smi_pending) {
+		r = can_inject ? kvm_x86_ops.smi_allowed(vcpu, true) : -EBUSY;
+		if (r < 0)
+			goto busy;
+		if (r) {
+			vcpu->arch.smi_pending = false;
+			++vcpu->arch.smi_count;
+			enter_smm(vcpu);
+			can_inject = false;
+		} else
+			kvm_x86_ops.enable_smi_window(vcpu);
+	}
 
-	if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
-	    kvm_x86_ops.smi_allowed(vcpu)) {
-		vcpu->arch.smi_pending = false;
-		++vcpu->arch.smi_count;
-		enter_smm(vcpu);
-	} else if (vcpu->arch.nmi_pending && kvm_x86_ops.nmi_allowed(vcpu)) {
-		--vcpu->arch.nmi_pending;
-		vcpu->arch.nmi_injected = true;
-		kvm_x86_ops.set_nmi(vcpu);
-	} else if (kvm_cpu_has_injectable_intr(vcpu)) {
-		/*
-		 * Because interrupts can be injected asynchronously, we are
-		 * calling check_nested_events again here to avoid a race condition.
-		 * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
-		 * proposal and current concerns.  Perhaps we should be setting
-		 * KVM_REQ_EVENT only on certain events and not unconditionally?
-		 */
-		if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
-			r = kvm_x86_ops.check_nested_events(vcpu);
-			if (r != 0)
-				return r;
+	if (vcpu->arch.nmi_pending) {
+		r = can_inject ? kvm_x86_ops.nmi_allowed(vcpu, true) : -EBUSY;
+		if (r < 0)
+			goto busy;
+		if (r) {
+			--vcpu->arch.nmi_pending;
+			vcpu->arch.nmi_injected = true;
+			kvm_x86_ops.set_nmi(vcpu);
+			can_inject = false;
+			WARN_ON(kvm_x86_ops.nmi_allowed(vcpu, true) < 0);
 		}
-		if (kvm_x86_ops.interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
-					    false);
+		if (vcpu->arch.nmi_pending)
+			kvm_x86_ops.enable_nmi_window(vcpu);
+	}
+
+	if (kvm_cpu_has_injectable_intr(vcpu)) {
+		r = can_inject ? kvm_x86_ops.interrupt_allowed(vcpu, true) : -EBUSY;
+		if (r < 0)
+			goto busy;
+		if (r) {
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
 			kvm_x86_ops.set_irq(vcpu);
+			WARN_ON(kvm_x86_ops.interrupt_allowed(vcpu, true) < 0);
 		}
+		if (kvm_cpu_has_injectable_intr(vcpu))
+			kvm_x86_ops.enable_irq_window(vcpu);
 	}
 
-	return 0;
+	if (is_guest_mode(vcpu) &&
+	    kvm_x86_ops.nested_ops->hv_timer_pending &&
+	    kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
+		*req_immediate_exit = true;
+
+	WARN_ON(vcpu->arch.exception.pending);
+	return;
+
+busy:
+	*req_immediate_exit = true;
+	return;
 }
 
 static void process_nmi(struct kvm_vcpu *vcpu)
@@ -8169,24 +8296,13 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
-	struct page *page = NULL;
-
 	if (!lapic_in_kernel(vcpu))
 		return;
 
 	if (!kvm_x86_ops.set_apic_access_page_addr)
 		return;
 
-	page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
-	if (is_error_page(page))
-		return;
-	kvm_x86_ops.set_apic_access_page_addr(vcpu, page_to_phys(page));
-
-	/*
-	 * Do not pin apic access page in memory, the MMU notifier
-	 * will call us again if it is migrated or swapped out.
-	 */
-	put_page(page);
+	kvm_x86_ops.set_apic_access_page_addr(vcpu);
 }
 
 void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -8206,13 +8322,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	bool req_int_win =
 		dm_request_for_irq_injection(vcpu) &&
 		kvm_cpu_accept_dm_intr(vcpu);
-	enum exit_fastpath_completion exit_fastpath = EXIT_FASTPATH_NONE;
+	fastpath_t exit_fastpath;
 
 	bool req_immediate_exit = false;
 
 	if (kvm_request_pending(vcpu)) {
 		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
-			if (unlikely(!kvm_x86_ops.get_vmcs12_pages(vcpu))) {
+			if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) {
 				r = 0;
 				goto out;
 			}
@@ -8234,8 +8350,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_mmu_sync_roots(vcpu);
 		if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
 			kvm_mmu_load_pgd(vcpu);
-		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
-			kvm_vcpu_flush_tlb(vcpu, true);
+		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+			kvm_vcpu_flush_tlb_all(vcpu);
+
+			/* Flushing all ASIDs flushes the current ASID... */
+			kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+		}
+		if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+			kvm_vcpu_flush_tlb_current(vcpu);
+		if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+			kvm_vcpu_flush_tlb_guest(vcpu);
+
 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
 			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
 			r = 0;
@@ -8308,6 +8433,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_hv_process_stimers(vcpu);
 		if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
 			kvm_vcpu_update_apicv(vcpu);
+		if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
+			kvm_check_async_pf_completion(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -8318,32 +8445,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			goto out;
 		}
 
-		if (inject_pending_event(vcpu) != 0)
-			req_immediate_exit = true;
-		else {
-			/* Enable SMI/NMI/IRQ window open exits if needed.
-			 *
-			 * SMIs have three cases:
-			 * 1) They can be nested, and then there is nothing to
-			 *    do here because RSM will cause a vmexit anyway.
-			 * 2) There is an ISA-specific reason why SMI cannot be
-			 *    injected, and the moment when this changes can be
-			 *    intercepted.
-			 * 3) Or the SMI can be pending because
-			 *    inject_pending_event has completed the injection
-			 *    of an IRQ or NMI from the previous vmexit, and
-			 *    then we request an immediate exit to inject the
-			 *    SMI.
-			 */
-			if (vcpu->arch.smi_pending && !is_smm(vcpu))
-				if (!kvm_x86_ops.enable_smi_window(vcpu))
-					req_immediate_exit = true;
-			if (vcpu->arch.nmi_pending)
-				kvm_x86_ops.enable_nmi_window(vcpu);
-			if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
-				kvm_x86_ops.enable_irq_window(vcpu);
-			WARN_ON(vcpu->arch.exception.pending);
-		}
+		inject_pending_event(vcpu, &req_immediate_exit);
+		if (req_int_win)
+			kvm_x86_ops.enable_irq_window(vcpu);
 
 		if (kvm_lapic_enabled(vcpu)) {
 			update_cr8_intercept(vcpu);
@@ -8391,8 +8495,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
 		kvm_x86_ops.sync_pir_to_irr(vcpu);
 
-	if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
-	    || need_resched() || signal_pending(current)) {
+	if (kvm_vcpu_exit_request(vcpu)) {
 		vcpu->mode = OUTSIDE_GUEST_MODE;
 		smp_wmb();
 		local_irq_enable();
@@ -8424,7 +8527,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
-	kvm_x86_ops.run(vcpu);
+	exit_fastpath = kvm_x86_ops.run(vcpu);
 
 	/*
 	 * Do this here before restoring debug registers on the host.  And
@@ -8455,7 +8558,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
-	kvm_x86_ops.handle_exit_irqoff(vcpu, &exit_fastpath);
+	kvm_x86_ops.handle_exit_irqoff(vcpu);
 
 	/*
 	 * Consume any pending interrupts, including the possible source of
@@ -8502,6 +8605,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	return r;
 
 cancel_injection:
+	if (req_immediate_exit)
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
 	kvm_x86_ops.cancel_injection(vcpu);
 	if (unlikely(vcpu->arch.apic_attention))
 		kvm_lapic_sync_from_vapic(vcpu);
@@ -8544,8 +8649,8 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
 
 static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events)
-		kvm_x86_ops.check_nested_events(vcpu);
+	if (is_guest_mode(vcpu))
+		kvm_x86_ops.nested_ops->check_events(vcpu);
 
 	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
 		!vcpu->arch.apf.halted);
@@ -8581,8 +8686,6 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 			break;
 		}
 
-		kvm_check_async_pf_completion(vcpu);
-
 		if (signal_pending(current)) {
 			r = -EINTR;
 			vcpu->run->exit_reason = KVM_EXIT_INTR;
@@ -8727,8 +8830,9 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 	trace_kvm_fpu(0);
 }
 
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
+	struct kvm_run *kvm_run = vcpu->run;
 	int r;
 
 	vcpu_load(vcpu);
@@ -8746,18 +8850,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		r = -EAGAIN;
 		if (signal_pending(current)) {
 			r = -EINTR;
-			vcpu->run->exit_reason = KVM_EXIT_INTR;
+			kvm_run->exit_reason = KVM_EXIT_INTR;
 			++vcpu->stat.signal_exits;
 		}
 		goto out;
 	}
 
-	if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
+	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
 		r = -EINVAL;
 		goto out;
 	}
 
-	if (vcpu->run->kvm_dirty_regs) {
+	if (kvm_run->kvm_dirty_regs) {
 		r = sync_regs(vcpu);
 		if (r != 0)
 			goto out;
@@ -8787,7 +8891,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 out:
 	kvm_put_guest_fpu(vcpu);
-	if (vcpu->run->kvm_valid_regs)
+	if (kvm_run->kvm_valid_regs)
 		store_regs(vcpu);
 	post_kvm_run_save(vcpu);
 	kvm_sigset_deactivate(vcpu);
@@ -9379,9 +9483,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	}
 	fx_init(vcpu);
 
-	vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
-
 	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+	vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
 
 	vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
 
@@ -9502,7 +9605,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vcpu->arch.cr2 = 0;
 
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
-	vcpu->arch.apf.msr_val = 0;
+	vcpu->arch.apf.msr_en_val = 0;
+	vcpu->arch.apf.msr_int_val = 0;
 	vcpu->arch.st.msr_val = 0;
 
 	kvmclock_reset(vcpu);
@@ -10040,7 +10144,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 {
 	/* Still write protect RO slot */
 	if (new->flags & KVM_MEM_READONLY) {
-		kvm_mmu_slot_remove_write_access(kvm, new, PT_PAGE_TABLE_LEVEL);
+		kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
 		return;
 	}
 
@@ -10080,7 +10184,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 		} else {
 			int level =
 				kvm_dirty_log_manual_protect_and_init_set(kvm) ?
-				PT_DIRECTORY_LEVEL : PT_PAGE_TABLE_LEVEL;
+				PG_LEVEL_2M : PG_LEVEL_4K;
 
 			/*
 			 * If we're with initial-all-set, we don't need
@@ -10182,11 +10286,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
 
 	if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
 	    (vcpu->arch.nmi_pending &&
-	     kvm_x86_ops.nmi_allowed(vcpu)))
+	     kvm_x86_ops.nmi_allowed(vcpu, false)))
 		return true;
 
 	if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
-	    (vcpu->arch.smi_pending && !is_smm(vcpu)))
+	    (vcpu->arch.smi_pending &&
+	     kvm_x86_ops.smi_allowed(vcpu, false)))
 		return true;
 
 	if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -10197,6 +10302,11 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
 	if (kvm_hv_has_stimer_pending(vcpu))
 		return true;
 
+	if (is_guest_mode(vcpu) &&
+	    kvm_x86_ops.nested_ops->hv_timer_pending &&
+	    kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
+		return true;
+
 	return false;
 }
 
@@ -10233,7 +10343,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
-	return kvm_x86_ops.interrupt_allowed(vcpu);
+	return kvm_x86_ops.interrupt_allowed(vcpu, false);
 }
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
@@ -10298,12 +10408,14 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 
 static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
 {
+	BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU));
+
 	return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
 }
 
 static inline u32 kvm_async_pf_next_probe(u32 key)
 {
-	return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
+	return (key + 1) & (ASYNC_PF_PER_VCPU - 1);
 }
 
 static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -10321,7 +10433,7 @@ static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
 	int i;
 	u32 key = kvm_async_pf_hash_fn(gfn);
 
-	for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
+	for (i = 0; i < ASYNC_PF_PER_VCPU &&
 		     (vcpu->arch.apf.gfns[key] != gfn &&
 		      vcpu->arch.apf.gfns[key] != ~0); i++)
 		key = kvm_async_pf_next_probe(key);
@@ -10339,6 +10451,10 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
 	u32 i, j, k;
 
 	i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
+
+	if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn))
+		return;
+
 	while (true) {
 		vcpu->arch.apf.gfns[i] = ~0;
 		do {
@@ -10357,18 +10473,32 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
 	}
 }
 
-static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
+static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu)
 {
+	u32 reason = KVM_PV_REASON_PAGE_NOT_PRESENT;
 
-	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
-				      sizeof(val));
+	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &reason,
+				      sizeof(reason));
 }
 
-static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
+static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token)
 {
+	unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
 
-	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
-				      sizeof(u32));
+	return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
+					     &token, offset, sizeof(token));
+}
+
+static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu)
+{
+	unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
+	u32 val;
+
+	if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
+					 &val, offset, sizeof(val)))
+		return false;
+
+	return !val;
 }
 
 static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
@@ -10376,9 +10506,8 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
 		return false;
 
-	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
-	    (vcpu->arch.apf.send_user_only &&
-	     kvm_x86_ops.get_cpl(vcpu) == 0))
+	if (!kvm_pv_async_pf_enabled(vcpu) ||
+	    (vcpu->arch.apf.send_user_only && kvm_x86_ops.get_cpl(vcpu) == 0))
 		return false;
 
 	return true;
@@ -10398,7 +10527,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 	 * If interrupts are off we cannot even use an artificial
 	 * halt state.
 	 */
-	return kvm_x86_ops.interrupt_allowed(vcpu);
+	return kvm_arch_interrupt_allowed(vcpu);
 }
 
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
@@ -10410,7 +10539,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 	kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
 
 	if (kvm_can_deliver_async_pf(vcpu) &&
-	    !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
+	    !apf_put_user_notpresent(vcpu)) {
 		fault.vector = PF_VECTOR;
 		fault.error_code_valid = true;
 		fault.error_code = 0;
@@ -10434,8 +10563,10 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 				 struct kvm_async_pf *work)
 {
-	struct x86_exception fault;
-	u32 val;
+	struct kvm_lapic_irq irq = {
+		.delivery_mode = APIC_DM_FIXED,
+		.vector = vcpu->arch.apf.vec
+	};
 
 	if (work->wakeup_all)
 		work->arch.token = ~0; /* broadcast wakeup */
@@ -10443,39 +10574,29 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 		kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
 	trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
 
-	if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
-	    !apf_get_user(vcpu, &val)) {
-		if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
-		    vcpu->arch.exception.pending &&
-		    vcpu->arch.exception.nr == PF_VECTOR &&
-		    !apf_put_user(vcpu, 0)) {
-			vcpu->arch.exception.injected = false;
-			vcpu->arch.exception.pending = false;
-			vcpu->arch.exception.nr = 0;
-			vcpu->arch.exception.has_error_code = false;
-			vcpu->arch.exception.error_code = 0;
-			vcpu->arch.exception.has_payload = false;
-			vcpu->arch.exception.payload = 0;
-		} else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
-			fault.vector = PF_VECTOR;
-			fault.error_code_valid = true;
-			fault.error_code = 0;
-			fault.nested_page_fault = false;
-			fault.address = work->arch.token;
-			fault.async_page_fault = true;
-			kvm_inject_page_fault(vcpu, &fault);
-		}
+	if (kvm_pv_async_pf_enabled(vcpu) &&
+	    !apf_put_user_ready(vcpu, work->arch.token)) {
+		vcpu->arch.apf.pageready_pending = true;
+		kvm_apic_set_irq(vcpu, &irq, NULL);
 	}
+
 	vcpu->arch.apf.halted = false;
 	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 }
 
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)
+{
+	kvm_make_request(KVM_REQ_APF_READY, vcpu);
+	if (!vcpu->arch.apf.pageready_pending)
+		kvm_vcpu_kick(vcpu);
+}
+
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
 {
-	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
+	if (!kvm_pv_async_pf_enabled(vcpu))
 		return true;
 	else
-		return kvm_can_do_async_pf(vcpu);
+		return apf_pageready_slot_free(vcpu);
 }
 
 void kvm_arch_start_assignment(struct kvm *kvm)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index b968acc0516f..6eb62e97e59f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -125,6 +125,12 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
 	return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
 }
 
+static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
+{
+	++vcpu->stat.tlb_flush;
+	kvm_x86_ops.tlb_flush_current(vcpu);
+}
+
 static inline int is_pae(struct kvm_vcpu *vcpu)
 {
 	return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
@@ -268,7 +274,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
 bool kvm_vector_hashing_enabled(void);
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 			    int emulation_type, void *insn, int insn_len);
-enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
+fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
 
 extern u64 host_xcr0;
 extern u64 supported_xcr0;
@@ -358,5 +364,6 @@ static inline bool kvm_dr7_valid(u64 data)
 void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
 void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
 u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4742e8fa7ee7..d1d768912368 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -153,7 +153,7 @@ SYM_FUNC_START(csum_partial)
 	negl %ebx
 	lea 45f(%ebx,%ebx,2), %ebx
 	testl %esi, %esi
-	JMP_NOSPEC %ebx
+	JMP_NOSPEC ebx
 
 	# Handle 2-byte-aligned regions
 20:	addw (%esi), %ax
@@ -436,7 +436,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
 	andl $-32,%edx
 	lea 3f(%ebx,%ebx), %ebx
 	testl %esi, %esi 
-	JMP_NOSPEC %ebx
+	JMP_NOSPEC ebx
 1:	addl $64,%esi
 	addl $64,%edi 
 	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index c66c8b00f236..ee63d7576fd2 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -10,7 +10,7 @@
 #include <asm/smap.h>
 
 /**
- * csum_partial_copy_from_user - Copy and checksum from user space.
+ * csum_and_copy_from_user - Copy and checksum from user space.
  * @src: source address (user space)
  * @dst: destination address
  * @len: number of bytes to be copied.
@@ -21,13 +21,13 @@
  * src and dst are best aligned to 64bits.
  */
 __wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
+csum_and_copy_from_user(const void __user *src, void *dst,
 			    int len, __wsum isum, int *errp)
 {
 	might_sleep();
 	*errp = 0;
 
-	if (!likely(access_ok(src, len)))
+	if (!user_access_begin(src, len))
 		goto out_err;
 
 	/*
@@ -42,8 +42,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst,
 		while (((unsigned long)src & 6) && len >= 2) {
 			__u16 val16;
 
-			if (__get_user(val16, (const __u16 __user *)src))
-				goto out_err;
+			unsafe_get_user(val16, (const __u16 __user *)src, out);
 
 			*(__u16 *)dst = val16;
 			isum = (__force __wsum)add32_with_carry(
@@ -53,25 +52,26 @@ csum_partial_copy_from_user(const void __user *src, void *dst,
 			len -= 2;
 		}
 	}
-	stac();
 	isum = csum_partial_copy_generic((__force const void *)src,
 				dst, len, isum, errp, NULL);
-	clac();
+	user_access_end();
 	if (unlikely(*errp))
 		goto out_err;
 
 	return isum;
 
+out:
+	user_access_end();
 out_err:
 	*errp = -EFAULT;
 	memset(dst, 0, len);
 
 	return isum;
 }
-EXPORT_SYMBOL(csum_partial_copy_from_user);
+EXPORT_SYMBOL(csum_and_copy_from_user);
 
 /**
- * csum_partial_copy_to_user - Copy and checksum to user space.
+ * csum_and_copy_to_user - Copy and checksum to user space.
  * @src: source address
  * @dst: destination address (user space)
  * @len: number of bytes to be copied.
@@ -82,14 +82,14 @@ EXPORT_SYMBOL(csum_partial_copy_from_user);
  * src and dst are best aligned to 64bits.
  */
 __wsum
-csum_partial_copy_to_user(const void *src, void __user *dst,
+csum_and_copy_to_user(const void *src, void __user *dst,
 			  int len, __wsum isum, int *errp)
 {
 	__wsum ret;
 
 	might_sleep();
 
-	if (unlikely(!access_ok(dst, len))) {
+	if (!user_access_begin(dst, len)) {
 		*errp = -EFAULT;
 		return 0;
 	}
@@ -100,9 +100,7 @@ csum_partial_copy_to_user(const void *src, void __user *dst,
 
 			isum = (__force __wsum)add32_with_carry(
 					(__force unsigned)isum, val16);
-			*errp = __put_user(val16, (__u16 __user *)dst);
-			if (*errp)
-				return isum;
+			unsafe_put_user(val16, (__u16 __user *)dst, out);
 			src += 2;
 			dst += 2;
 			len -= 2;
@@ -110,13 +108,16 @@ csum_partial_copy_to_user(const void *src, void __user *dst,
 	}
 
 	*errp = 0;
-	stac();
 	ret = csum_partial_copy_generic(src, (void __force *)dst,
 					len, isum, NULL, errp);
-	clac();
+	user_access_end();
 	return ret;
+out:
+	user_access_end();
+	*errp = -EFAULT;
+	return isum;
 }
-EXPORT_SYMBOL(csum_partial_copy_to_user);
+EXPORT_SYMBOL(csum_and_copy_to_user);
 
 /**
  * csum_partial_copy_nocheck - Copy and checksum.
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index c126571e5e2e..65d15df6212d 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -27,9 +27,20 @@
 # include <asm/smp.h>
 #endif
 
+static void delay_loop(u64 __loops);
+
+/*
+ * Calibration and selection of the delay mechanism happens only once
+ * during boot.
+ */
+static void (*delay_fn)(u64) __ro_after_init = delay_loop;
+static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
+
 /* simple loop based delay: */
-static void delay_loop(unsigned long loops)
+static void delay_loop(u64 __loops)
 {
+	unsigned long loops = (unsigned long)__loops;
+
 	asm volatile(
 		"	test %0,%0	\n"
 		"	jz 3f		\n"
@@ -49,9 +60,9 @@ static void delay_loop(unsigned long loops)
 }
 
 /* TSC based delay: */
-static void delay_tsc(unsigned long __loops)
+static void delay_tsc(u64 cycles)
 {
-	u64 bclock, now, loops = __loops;
+	u64 bclock, now;
 	int cpu;
 
 	preempt_disable();
@@ -59,7 +70,7 @@ static void delay_tsc(unsigned long __loops)
 	bclock = rdtsc_ordered();
 	for (;;) {
 		now = rdtsc_ordered();
-		if ((now - bclock) >= loops)
+		if ((now - bclock) >= cycles)
 			break;
 
 		/* Allow RT tasks to run */
@@ -77,7 +88,7 @@ static void delay_tsc(unsigned long __loops)
 		 * counter for this CPU.
 		 */
 		if (unlikely(cpu != smp_processor_id())) {
-			loops -= (now - bclock);
+			cycles -= (now - bclock);
 			cpu = smp_processor_id();
 			bclock = rdtsc_ordered();
 		}
@@ -86,65 +97,96 @@ static void delay_tsc(unsigned long __loops)
 }
 
 /*
+ * On Intel the TPAUSE instruction waits until any of:
+ * 1) the TSC counter exceeds the value provided in EDX:EAX
+ * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
+ * 3) an external interrupt occurs
+ */
+static void delay_halt_tpause(u64 start, u64 cycles)
+{
+	u64 until = start + cycles;
+	u32 eax, edx;
+
+	eax = lower_32_bits(until);
+	edx = upper_32_bits(until);
+
+	/*
+	 * Hard code the deeper (C0.2) sleep state because exit latency is
+	 * small compared to the "microseconds" that usleep() will delay.
+	 */
+	__tpause(TPAUSE_C02_STATE, edx, eax);
+}
+
+/*
  * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
- * counts with TSC frequency. The input value is the loop of the
- * counter, it will exit when the timer expires.
+ * counts with TSC frequency. The input value is the number of TSC cycles
+ * to wait. MWAITX will also exit when the timer expires.
  */
-static void delay_mwaitx(unsigned long __loops)
+static void delay_halt_mwaitx(u64 unused, u64 cycles)
 {
-	u64 start, end, delay, loops = __loops;
+	u64 delay;
+
+	delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
+	/*
+	 * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
+	 * variable as the monitor target.
+	 */
+	 __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
+
+	/*
+	 * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
+	 * enter any deep C-state and we use it here in delay() to minimize
+	 * wakeup latency.
+	 */
+	__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+}
+
+/*
+ * Call a vendor specific function to delay for a given amount of time. Because
+ * these functions may return earlier than requested, check for actual elapsed
+ * time and call again until done.
+ */
+static void delay_halt(u64 __cycles)
+{
+	u64 start, end, cycles = __cycles;
 
 	/*
 	 * Timer value of 0 causes MWAITX to wait indefinitely, unless there
 	 * is a store on the memory monitored by MONITORX.
 	 */
-	if (loops == 0)
+	if (!cycles)
 		return;
 
 	start = rdtsc_ordered();
 
 	for (;;) {
-		delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
-
-		/*
-		 * Use cpu_tss_rw as a cacheline-aligned, seldomly
-		 * accessed per-cpu variable as the monitor target.
-		 */
-		__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
-
-		/*
-		 * AMD, like Intel's MWAIT version, supports the EAX hint and
-		 * EAX=0xf0 means, do not enter any deep C-state and we use it
-		 * here in delay() to minimize wakeup latency.
-		 */
-		__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
-
+		delay_halt_fn(start, cycles);
 		end = rdtsc_ordered();
 
-		if (loops <= end - start)
+		if (cycles <= end - start)
 			break;
 
-		loops -= end - start;
-
+		cycles -= end - start;
 		start = end;
 	}
 }
 
-/*
- * Since we calibrate only once at boot, this
- * function should be set once at boot and not changed
- */
-static void (*delay_fn)(unsigned long) = delay_loop;
-
-void use_tsc_delay(void)
+void __init use_tsc_delay(void)
 {
 	if (delay_fn == delay_loop)
 		delay_fn = delay_tsc;
 }
 
+void __init use_tpause_delay(void)
+{
+	delay_halt_fn = delay_halt_tpause;
+	delay_fn = delay_halt;
+}
+
 void use_mwaitx_delay(void)
 {
-	delay_fn = delay_mwaitx;
+	delay_halt_fn = delay_halt_mwaitx;
+	delay_fn = delay_halt;
 }
 
 int read_current_timer(unsigned long *timer_val)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 363ec132df7e..b4c43a9b1483 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,15 +7,31 @@
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
+#include <asm/unwind_hints.h>
+#include <asm/frame.h>
 
 .macro THUNK reg
 	.section .text.__x86.indirect_thunk
 
+	.align 32
 SYM_FUNC_START(__x86_indirect_thunk_\reg)
-	CFI_STARTPROC
-	JMP_NOSPEC %\reg
-	CFI_ENDPROC
+	JMP_NOSPEC \reg
 SYM_FUNC_END(__x86_indirect_thunk_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
+	ANNOTATE_INTRA_FUNCTION_CALL
+	call	.Ldo_rop_\@
+.Lspec_trap_\@:
+	UNWIND_HINT_EMPTY
+	pause
+	lfence
+	jmp	.Lspec_trap_\@
+.Ldo_rop_\@:
+	mov	%\reg, (%_ASM_SP)
+	UNWIND_HINT_RET_OFFSET
+	ret
+SYM_FUNC_END(__x86_retpoline_\reg)
+
 .endm
 
 /*
@@ -24,25 +40,24 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
  * only see one instance of "__x86_indirect_thunk_\reg" rather
  * than one per register with the correct names. So we do it
  * the simple and nasty way...
+ *
+ * Worse, you can only have a single EXPORT_SYMBOL per line,
+ * and CPP can't insert newlines, so we have to repeat everything
+ * at least twice.
  */
-#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
-#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
-
-GENERATE_THUNK(_ASM_AX)
-GENERATE_THUNK(_ASM_BX)
-GENERATE_THUNK(_ASM_CX)
-GENERATE_THUNK(_ASM_DX)
-GENERATE_THUNK(_ASM_SI)
-GENERATE_THUNK(_ASM_DI)
-GENERATE_THUNK(_ASM_BP)
-#ifdef CONFIG_64BIT
-GENERATE_THUNK(r8)
-GENERATE_THUNK(r9)
-GENERATE_THUNK(r10)
-GENERATE_THUNK(r11)
-GENERATE_THUNK(r12)
-GENERATE_THUNK(r13)
-GENERATE_THUNK(r14)
-GENERATE_THUNK(r15)
-#endif
+
+#define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#define EXPORT_RETPOLINE(reg)  __EXPORT_THUNK(__x86_retpoline_ ## reg)
+
+#undef GEN
+#define GEN(reg) THUNK reg
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) EXPORT_THUNK(reg)
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) EXPORT_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 56f9189bbadb..5199d8a1daf1 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -17,7 +17,7 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
 DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
 #endif
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
+#ifdef CONFIG_X86_32
 DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
 #endif
 
@@ -114,12 +114,10 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
 #else
 static inline void percpu_setup_exception_stacks(unsigned int cpu)
 {
-#ifdef CONFIG_DOUBLEFAULT
 	struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
 
 	cea_map_percpu_pages(&cea->doublefault_stack,
 			     &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL);
-#endif
 }
 #endif
 
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 69309cd56fdf..ea9010113f69 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -249,10 +249,22 @@ static void note_wx(struct pg_state *st, unsigned long addr)
 		  (void *)st->start_address);
 }
 
-static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+static void effective_prot(struct ptdump_state *pt_st, int level, u64 val)
 {
-	return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
-	       ((prot1 | prot2) & _PAGE_NX);
+	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+	pgprotval_t prot = val & PTE_FLAGS_MASK;
+	pgprotval_t effective;
+
+	if (level > 0) {
+		pgprotval_t higher_prot = st->prot_levels[level - 1];
+
+		effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) |
+			    ((higher_prot | prot) & _PAGE_NX);
+	} else {
+		effective = prot;
+	}
+
+	st->prot_levels[level] = effective;
 }
 
 /*
@@ -261,7 +273,7 @@ static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
  * print what we collected so far.
  */
 static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
-		      unsigned long val)
+		      u64 val)
 {
 	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
 	pgprotval_t new_prot, new_eff;
@@ -270,16 +282,10 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
 	struct seq_file *m = st->seq;
 
 	new_prot = val & PTE_FLAGS_MASK;
-
-	if (level > 0) {
-		new_eff = effective_prot(st->prot_levels[level - 1],
-					 new_prot);
-	} else {
-		new_eff = new_prot;
-	}
-
-	if (level >= 0)
-		st->prot_levels[level] = new_eff;
+	if (!val)
+		new_eff = 0;
+	else
+		new_eff = st->prot_levels[level];
 
 	/*
 	 * If we have a "break" in the series, we need to flush the state that
@@ -374,6 +380,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m,
 	struct pg_state st = {
 		.ptdump = {
 			.note_page	= note_page,
+			.effective_prot = effective_prot,
 			.range		= ptdump_ranges
 		},
 		.level = -1,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a51df516b87b..c5437f2964ee 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -30,6 +30,7 @@
 #include <asm/desc.h>			/* store_idt(), ...		*/
 #include <asm/cpu_entry_area.h>		/* exception stack		*/
 #include <asm/pgtable_areas.h>		/* VMALLOC_START, ...		*/
+#include <asm/kvm_para.h>		/* kvm_handle_async_pf		*/
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
@@ -190,16 +191,13 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 	return pmd_k;
 }
 
-static void vmalloc_sync(void)
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
 {
-	unsigned long address;
-
-	if (SHARED_KERNEL_PMD)
-		return;
+	unsigned long addr;
 
-	for (address = VMALLOC_START & PMD_MASK;
-	     address >= TASK_SIZE_MAX && address < VMALLOC_END;
-	     address += PMD_SIZE) {
+	for (addr = start & PMD_MASK;
+	     addr >= TASK_SIZE_MAX && addr < VMALLOC_END;
+	     addr += PMD_SIZE) {
 		struct page *page;
 
 		spin_lock(&pgd_lock);
@@ -210,61 +208,13 @@ static void vmalloc_sync(void)
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 
 			spin_lock(pgt_lock);
-			vmalloc_sync_one(page_address(page), address);
+			vmalloc_sync_one(page_address(page), addr);
 			spin_unlock(pgt_lock);
 		}
 		spin_unlock(&pgd_lock);
 	}
 }
 
-void vmalloc_sync_mappings(void)
-{
-	vmalloc_sync();
-}
-
-void vmalloc_sync_unmappings(void)
-{
-	vmalloc_sync();
-}
-
-/*
- * 32-bit:
- *
- *   Handle a fault on the vmalloc or module mapping area
- */
-static noinline int vmalloc_fault(unsigned long address)
-{
-	unsigned long pgd_paddr;
-	pmd_t *pmd_k;
-	pte_t *pte_k;
-
-	/* Make sure we are in vmalloc area: */
-	if (!(address >= VMALLOC_START && address < VMALLOC_END))
-		return -1;
-
-	/*
-	 * Synchronize this task's top level page-table
-	 * with the 'reference' page table.
-	 *
-	 * Do _not_ use "current" here. We might be inside
-	 * an interrupt in the middle of a task switch..
-	 */
-	pgd_paddr = read_cr3_pa();
-	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
-	if (!pmd_k)
-		return -1;
-
-	if (pmd_large(*pmd_k))
-		return 0;
-
-	pte_k = pte_offset_kernel(pmd_k, address);
-	if (!pte_present(*pte_k))
-		return -1;
-
-	return 0;
-}
-NOKPROBE_SYMBOL(vmalloc_fault);
-
 /*
  * Did it hit the DOS screen memory VA from vm86 mode?
  */
@@ -329,96 +279,6 @@ out:
 
 #else /* CONFIG_X86_64: */
 
-void vmalloc_sync_mappings(void)
-{
-	/*
-	 * 64-bit mappings might allocate new p4d/pud pages
-	 * that need to be propagated to all tasks' PGDs.
-	 */
-	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
-}
-
-void vmalloc_sync_unmappings(void)
-{
-	/*
-	 * Unmappings never allocate or free p4d/pud pages.
-	 * No work is required here.
-	 */
-}
-
-/*
- * 64-bit:
- *
- *   Handle a fault on the vmalloc area
- */
-static noinline int vmalloc_fault(unsigned long address)
-{
-	pgd_t *pgd, *pgd_k;
-	p4d_t *p4d, *p4d_k;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	/* Make sure we are in vmalloc area: */
-	if (!(address >= VMALLOC_START && address < VMALLOC_END))
-		return -1;
-
-	/*
-	 * Copy kernel mappings over when needed. This can also
-	 * happen within a race in page table update. In the later
-	 * case just flush:
-	 */
-	pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
-	pgd_k = pgd_offset_k(address);
-	if (pgd_none(*pgd_k))
-		return -1;
-
-	if (pgtable_l5_enabled()) {
-		if (pgd_none(*pgd)) {
-			set_pgd(pgd, *pgd_k);
-			arch_flush_lazy_mmu_mode();
-		} else {
-			BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k));
-		}
-	}
-
-	/* With 4-level paging, copying happens on the p4d level. */
-	p4d = p4d_offset(pgd, address);
-	p4d_k = p4d_offset(pgd_k, address);
-	if (p4d_none(*p4d_k))
-		return -1;
-
-	if (p4d_none(*p4d) && !pgtable_l5_enabled()) {
-		set_p4d(p4d, *p4d_k);
-		arch_flush_lazy_mmu_mode();
-	} else {
-		BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k));
-	}
-
-	BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4);
-
-	pud = pud_offset(p4d, address);
-	if (pud_none(*pud))
-		return -1;
-
-	if (pud_large(*pud))
-		return 0;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd))
-		return -1;
-
-	if (pmd_large(*pmd))
-		return 0;
-
-	pte = pte_offset_kernel(pmd, address);
-	if (!pte_present(*pte))
-		return -1;
-
-	return 0;
-}
-NOKPROBE_SYMBOL(vmalloc_fault);
-
 #ifdef CONFIG_CPU_SUP_AMD
 static const char errata93_warning[] =
 KERN_ERR 
@@ -1257,29 +1117,6 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
 	 */
 	WARN_ON_ONCE(hw_error_code & X86_PF_PK);
 
-	/*
-	 * We can fault-in kernel-space virtual memory on-demand. The
-	 * 'reference' page table is init_mm.pgd.
-	 *
-	 * NOTE! We MUST NOT take any locks for this case. We may
-	 * be in an interrupt or a critical region, and should
-	 * only copy the information from the master page table,
-	 * nothing more.
-	 *
-	 * Before doing this on-demand faulting, ensure that the
-	 * fault is not any of the following:
-	 * 1. A fault on a PTE with a reserved bit set.
-	 * 2. A fault caused by a user-mode access.  (Do not demand-
-	 *    fault kernel memory due to user-mode accesses).
-	 * 3. A fault caused by a page-level protection violation.
-	 *    (A demand fault would be on a non-present page which
-	 *     would have X86_PF_PROT==0).
-	 */
-	if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
-		if (vmalloc_fault(address) >= 0)
-			return;
-	}
-
 	/* Was the fault spurious, caused by lazy TLB invalidation? */
 	if (spurious_kernel_fault(hw_error_code, address))
 		return;
@@ -1523,6 +1360,24 @@ do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
 		unsigned long address)
 {
 	prefetchw(&current->mm->mmap_sem);
+	/*
+	 * KVM has two types of events that are, logically, interrupts, but
+	 * are unfortunately delivered using the #PF vector.  These events are
+	 * "you just accessed valid memory, but the host doesn't have it right
+	 * now, so I'll put you to sleep if you continue" and "that memory
+	 * you tried to access earlier is available now."
+	 *
+	 * We are relying on the interrupted context being sane (valid RSP,
+	 * relevant locks not held, etc.), which is fine as long as the
+	 * interrupted context had IF=1.  We are also relying on the KVM
+	 * async pf type field and CR2 being read consistently instead of
+	 * getting values from real and async page faults mixed up.
+	 *
+	 * Fingers crossed.
+	 */
+	if (kvm_handle_async_pf(regs, (u32)address))
+		return;
+
 	trace_page_fault_entries(regs, hw_error_code, address);
 
 	if (unlikely(kmmio_fault(regs, address)))
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1bba16c5742b..a573a3e63f02 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -121,8 +121,6 @@ __ref void *alloc_low_pages(unsigned int num)
 	} else {
 		pfn = pgt_buf_end;
 		pgt_buf_end += num;
-		printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n",
-			pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
 	}
 
 	for (i = 0; i < num; i++) {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 8b5f73f5e207..96274a90c5ff 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -218,6 +218,11 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 		sync_global_pgds_l4(start, end);
 }
 
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+{
+	sync_global_pgds(start, end);
+}
+
 /*
  * NOTE: This function is marked __ref because it calls __init function
  * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index cb91eccc4960..c90c20904a60 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -18,7 +18,9 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/compat.h>
+#include <linux/elf-randomize.h>
 #include <asm/elf.h>
+#include <asm/io.h>
 
 #include "physaddr.h"
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index f2bd3d61e16b..104544359d69 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -27,40 +27,6 @@
 
 #include "numa_internal.h"
 
-#ifdef CONFIG_DISCONTIGMEM
-/*
- * 4) physnode_map     - the mapping between a pfn and owning node
- * physnode_map keeps track of the physical memory layout of a generic
- * numa node on a 64Mb break (each element of the array will
- * represent 64Mb of memory and will be marked by the node id.  so,
- * if the first gig is on node 0, and the second gig is on node 1
- * physnode_map will contain:
- *
- *     physnode_map[0-15] = 0;
- *     physnode_map[16-31] = 1;
- *     physnode_map[32- ] = -1;
- */
-s8 physnode_map[MAX_SECTIONS] __read_mostly = { [0 ... (MAX_SECTIONS - 1)] = -1};
-EXPORT_SYMBOL(physnode_map);
-
-void memory_present(int nid, unsigned long start, unsigned long end)
-{
-	unsigned long pfn;
-
-	printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n",
-			nid, start, end);
-	printk(KERN_DEBUG "  Setting physnode_map array to node %d for pfns:\n", nid);
-	printk(KERN_DEBUG "  ");
-	start = round_down(start, PAGES_PER_SECTION);
-	end = round_up(end, PAGES_PER_SECTION);
-	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
-		physnode_map[pfn / PAGES_PER_SECTION] = nid;
-		printk(KERN_CONT "%lx ", pfn);
-	}
-	printk(KERN_CONT "\n");
-}
-#endif
-
 extern unsigned long highend_pfn, highstart_pfn;
 
 void __init initmem_init(void)
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 843aa10a4cb6..da0fb17a1a36 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -448,13 +448,7 @@ static void __init pti_clone_user_shared(void)
 		 * the sp1 and sp2 slots.
 		 *
 		 * This is done for all possible CPUs during boot to ensure
-		 * that it's propagated to all mms.  If we were to add one of
-		 * these mappings during CPU hotplug, we would need to take
-		 * some measure to make sure that every mm that subsequently
-		 * ran on that CPU would have the relevant PGD entry in its
-		 * pagetables.  The usual vmalloc_fault() mechanism would not
-		 * work for page faults taken in entry_SYSCALL_64 before RSP
-		 * is set up.
+		 * that it's propagated to all mms.
 		 */
 
 		unsigned long va = (unsigned long)&per_cpu(cpu_tss_rw, cpu);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 66f96f21a7b6..f3fe261e5936 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -161,34 +161,6 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	local_irq_restore(flags);
 }
 
-static void sync_current_stack_to_mm(struct mm_struct *mm)
-{
-	unsigned long sp = current_stack_pointer;
-	pgd_t *pgd = pgd_offset(mm, sp);
-
-	if (pgtable_l5_enabled()) {
-		if (unlikely(pgd_none(*pgd))) {
-			pgd_t *pgd_ref = pgd_offset_k(sp);
-
-			set_pgd(pgd, *pgd_ref);
-		}
-	} else {
-		/*
-		 * "pgd" is faked.  The top level entries are "p4d"s, so sync
-		 * the p4d.  This compiles to approximately the same code as
-		 * the 5-level case.
-		 */
-		p4d_t *p4d = p4d_offset(pgd, sp);
-
-		if (unlikely(p4d_none(*p4d))) {
-			pgd_t *pgd_ref = pgd_offset_k(sp);
-			p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
-
-			set_p4d(p4d, *p4d_ref);
-		}
-	}
-}
-
 static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
 {
 	unsigned long next_tif = task_thread_info(next)->flags;
@@ -377,15 +349,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		 */
 		cond_ibpb(tsk);
 
-		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
-			/*
-			 * If our current stack is in vmalloc space and isn't
-			 * mapped in the new pgd, we'll double-fault.  Forcibly
-			 * map it.
-			 */
-			sync_current_stack_to_mm(next);
-		}
-
 		/*
 		 * Stop remote flushes for the previous mm.
 		 * Skip kernel threads; we never send init_mm TLB flushing IPIs,
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 1aae5302501d..e966115d105c 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -62,12 +62,12 @@ static unsigned long efi_runtime, efi_nr_tables;
 unsigned long efi_fw_vendor, efi_config_table;
 
 static const efi_config_table_type_t arch_tables[] __initconst = {
-	{EFI_PROPERTIES_TABLE_GUID, "PROP", &prop_phys},
-	{UGA_IO_PROTOCOL_GUID, "UGA", &uga_phys},
+	{EFI_PROPERTIES_TABLE_GUID,	&prop_phys,		"PROP"		},
+	{UGA_IO_PROTOCOL_GUID,		&uga_phys,		"UGA"		},
 #ifdef CONFIG_X86_UV
-	{UV_SYSTEM_TABLE_GUID, "UVsystab", &uv_systab_phys},
+	{UV_SYSTEM_TABLE_GUID,		&uv_systab_phys,	"UVsystab"	},
 #endif
-	{NULL_GUID, NULL, NULL},
+	{},
 };
 
 static const unsigned long * const efi_tables[] = {
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 15da118f04f0..90380a17ab23 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -21,7 +21,7 @@ SYM_FUNC_START(__efi_call)
 	mov %r8, %r9
 	mov %rcx, %r8
 	mov %rsi, %rcx
-	CALL_NOSPEC %rdi
+	CALL_NOSPEC rdi
 	leave
 	ret
 SYM_FUNC_END(__efi_call)
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index c60255da5a6c..4494589a288a 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -45,7 +45,8 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
 	return ret;
 }
 
-s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+static s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4,
+		u64 a5)
 {
 	s64 ret;
 
@@ -57,10 +58,9 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(uv_bios_call);
 
-s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
-					u64 a4, u64 a5)
+static s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+		u64 a4, u64 a5)
 {
 	unsigned long bios_flags;
 	s64 ret;
@@ -77,18 +77,13 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
 	return ret;
 }
 
-
 long sn_partition_id;
 EXPORT_SYMBOL_GPL(sn_partition_id);
 long sn_coherency_id;
-EXPORT_SYMBOL_GPL(sn_coherency_id);
 long sn_region_size;
 EXPORT_SYMBOL_GPL(sn_region_size);
 long system_serial_number;
-EXPORT_SYMBOL_GPL(system_serial_number);
 int uv_type;
-EXPORT_SYMBOL_GPL(uv_type);
-
 
 s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
 		long *region, long *ssn)
@@ -115,7 +110,6 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
 		*ssn = v1;
 	return ret;
 }
-EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
 
 int
 uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
@@ -166,7 +160,6 @@ s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
 	return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
 			   (u64)ticks_per_second, 0, 0, 0);
 }
-EXPORT_SYMBOL_GPL(uv_bios_freq_base);
 
 /*
  * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target
@@ -185,7 +178,6 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
 	return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET,
 				(u64)decode, (u64)domain, (u64)bus, 0, 0);
 }
-EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
 
 int uv_bios_init(void)
 {
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c
index 62214731fea5..266773e2fb37 100644
--- a/arch/x86/platform/uv/uv_sysfs.c
+++ b/arch/x86/platform/uv/uv_sysfs.c
@@ -21,7 +21,7 @@ static ssize_t partition_id_show(struct kobject *kobj,
 static ssize_t coherence_id_show(struct kobject *kobj,
 			struct kobj_attribute *attr, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%ld\n", uv_partition_coherence_id());
+	return snprintf(buf, PAGE_SIZE, "%ld\n", sn_coherency_id);
 }
 
 static struct kobj_attribute partition_id_attr =
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index aaff9ed7ff45..fc3b757afb2c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -307,7 +307,7 @@ int hibernate_resume_nonboot_cpu_disable(void)
 	if (ret)
 		return ret;
 	smp_ops.play_dead = resume_play_dead;
-	ret = disable_nonboot_cpus();
+	ret = freeze_secondary_cpus(0);
 	smp_ops.play_dead = play_dead;
 	return ret;
 }
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index 2a56cac64687..ff6bba2c8ab6 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -36,26 +36,6 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst,
 	return csum_partial(dst, len, sum);
 }
 
-/*
- * the same as csum_partial, but copies from src while it
- * checksums, and handles user-space pointer exceptions correctly, when needed.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-
-static __inline__
-__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
-					 int len, __wsum sum, int *err_ptr)
-{
-	if (copy_from_user(dst, src, len)) {
-		*err_ptr = -EFAULT;
-		return (__force __wsum)-1;
-	}
-
-	return csum_partial(dst, len, sum);
-}
-
 /**
  * csum_fold - Fold and invert a 32bit checksum.
  * sum: 32bit unfolded sum
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 1abe455d926a..205a9bc981b0 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -29,7 +29,7 @@ static efi_system_table_t efi_systab_xen __initdata = {
 	.fw_vendor	= EFI_INVALID_TABLE_ADDR, /* Initialized later. */
 	.fw_revision	= 0,			  /* Initialized later. */
 	.con_in_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
-	.con_in		= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+	.con_in		= NULL,			  /* Not used under Xen. */
 	.con_out_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
 	.con_out	= NULL, 		  /* Not used under Xen. */
 	.stderr_handle	= EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h
index 8b687176ad72..d8292cc9ebdf 100644
--- a/arch/xtensa/include/asm/checksum.h
+++ b/arch/xtensa/include/asm/checksum.h
@@ -44,8 +44,6 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
 /*
  *	Note: when you get a NULL pointer exception here this means someone
  *	passed in an incorrect kernel address to one of these functions.
- *
- *	If you use these functions directly please don't forget the access_ok().
  */
 static inline
 __wsum csum_partial_copy_nocheck(const void *src, void *dst,
@@ -54,12 +52,17 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst,
 	return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
 }
 
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 static inline
-__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+__wsum csum_and_copy_from_user(const void __user *src, void *dst,
 				   int len, __wsum sum, int *err_ptr)
 {
-	return csum_partial_copy_generic((__force const void *)src, dst,
+	if (access_ok(dst, len))
+		return csum_partial_copy_generic((__force const void *)src, dst,
 					len, sum, err_ptr, NULL);
+	if (len)
+		*err_ptr = -EFAULT;
+	return sum;
 }
 
 /*
diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index 47b7702aaa40..e57f0d0a88d8 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h
@@ -84,7 +84,7 @@ extern long __put_user_bad(void);
 #define __put_user_check(x, ptr, size)					\
 ({									\
 	long __pu_err = -EFAULT;					\
-	__typeof__(*(ptr)) *__pu_addr = (ptr);				\
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
 	if (access_ok(__pu_addr, size))			\
 		__put_user_size((x), __pu_addr, (size), __pu_err);	\
 	__pu_err;							\
@@ -180,11 +180,11 @@ __asm__ __volatile__(					\
 #define __get_user_check(x, ptr, size)					\
 ({									\
 	long __gu_err = -EFAULT;					\
-	const __typeof__(*(ptr)) *__gu_addr = (ptr);			\
+	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
 	if (access_ok(__gu_addr, size))					\
 		__get_user_size((x), __gu_addr, (size), __gu_err);	\
 	else								\
-		(x) = 0;						\
+		(x) = (__typeof__(*(ptr)))0;				\
 	__gu_err;							\
 })
 
@@ -202,13 +202,15 @@ do {									\
 		u64 __x;						\
 		if (unlikely(__copy_from_user(&__x, ptr, 8))) {		\
 			retval = -EFAULT;				\
-			(x) = 0;					\
+			(x) = (__typeof__(*(ptr)))0;			\
 		} else {						\
-			(x) = *(__force __typeof__((ptr)))&__x;		\
+			(x) = *(__force __typeof__(*(ptr)) *)&__x;	\
 		}							\
 		break;							\
 	}								\
-	default: (x) = 0; __get_user_bad();				\
+	default:							\
+		(x) = (__typeof__(*(ptr)))0;				\
+		__get_user_bad();					\
 	}								\
 } while (0)
 
@@ -270,15 +272,15 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
  */
 
 static inline unsigned long
-__xtensa_clear_user(void *addr, unsigned long size)
+__xtensa_clear_user(void __user *addr, unsigned long size)
 {
-	if (!__memset(addr, 0, size))
+	if (!__memset((void __force *)addr, 0, size))
 		return size;
 	return 0;
 }
 
 static inline unsigned long
-clear_user(void *addr, unsigned long size)
+clear_user(void __user *addr, unsigned long size)
 {
 	if (access_ok(addr, size))
 		return __xtensa_clear_user(addr, size);
@@ -290,10 +292,10 @@ clear_user(void *addr, unsigned long size)
 
 #ifndef CONFIG_GENERIC_STRNCPY_FROM_USER
 
-extern long __strncpy_user(char *, const char *, long);
+extern long __strncpy_user(char *dst, const char __user *src, long count);
 
 static inline long
-strncpy_from_user(char *dst, const char *src, long count)
+strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	if (access_ok(src, 1))
 		return __strncpy_user(dst, src, count);
@@ -306,13 +308,11 @@ long strncpy_from_user(char *dst, const char *src, long count);
 /*
  * Return the size of a string (including the ending 0!)
  */
-extern long __strnlen_user(const char *, long);
+extern long __strnlen_user(const char __user *str, long len);
 
-static inline long strnlen_user(const char *str, long len)
+static inline long strnlen_user(const char __user *str, long len)
 {
-	unsigned long top = __kernel_ok ? ~0UL : TASK_SIZE - 1;
-
-	if ((unsigned long)str > top)
+	if (!access_ok(str, 1))
 		return 0;
 	return __strnlen_user(str, len);
 }
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 06fbb0a171f1..fae33ddcaebb 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -959,14 +959,14 @@ ENDPROC(unrecoverable_exception)
  * of the proper size instead.
  *
  * This algorithm simply backs out the register changes started by the user
- * excpetion handler, makes it appear that we have started a window underflow
+ * exception handler, makes it appear that we have started a window underflow
  * by rotating the window back and then setting the old window base (OWB) in
  * the 'ps' register with the rolled back window base. The 'movsp' instruction
  * will be re-executed and this time since the next window frames is in the
  * active AR registers it won't cause an exception.
  *
  * If the WindowUnderflow code gets a TLB miss the page will get mapped
- * the the partial windeowUnderflow will be handeled in the double exception
+ * the partial WindowUnderflow will be handled in the double exception
  * handler.
  *
  * Entry condition:
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 85a9ab1bc04d..69d0d73876b3 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -408,3 +408,4 @@
 435	common	clone3				sys_clone3
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
diff --git a/block/Kconfig b/block/Kconfig
index 3bc76bb113a0..9382a4acefc3 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -146,6 +146,7 @@ config BLK_CGROUP_IOLATENCY
 config BLK_CGROUP_IOCOST
 	bool "Enable support for cost model based cgroup IO controller"
 	depends on BLK_CGROUP=y
+	select BLK_RQ_IO_DATA_LEN
 	select BLK_RQ_ALLOC_TIME
 	---help---
 	Enabling this option enables the .weight interface for cost
@@ -185,6 +186,23 @@ config BLK_SED_OPAL
 	Enabling this option enables users to setup/unlock/lock
 	Locking ranges for SED devices using the Opal protocol.
 
+config BLK_INLINE_ENCRYPTION
+	bool "Enable inline encryption support in block layer"
+	help
+	  Build the blk-crypto subsystem. Enabling this lets the
+	  block layer handle encryption, so users can take
+	  advantage of inline encryption hardware if present.
+
+config BLK_INLINE_ENCRYPTION_FALLBACK
+	bool "Enable crypto API fallback for blk-crypto"
+	depends on BLK_INLINE_ENCRYPTION
+	select CRYPTO
+	select CRYPTO_SKCIPHER
+	help
+	  Enabling this lets the block layer handle inline encryption
+	  by falling back to the kernel crypto API when inline
+	  encryption hardware is not present.
+
 menu "Partition Types"
 
 source "block/partitions/Kconfig"
diff --git a/block/Makefile b/block/Makefile
index 206b96e9387f..78719169fb2a 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -36,3 +36,5 @@ obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
 obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
 obj-$(CONFIG_BLK_SED_OPAL)	+= sed-opal.o
 obj-$(CONFIG_BLK_PM)		+= blk-pm.o
+obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= keyslot-manager.o blk-crypto.o
+obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK)	+= blk-crypto-fallback.o
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 3d411716d7ee..50c8f034c01c 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6073,7 +6073,7 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
  * comments on bfq_init_rq for the reason behind this delayed
  * preparation.
  */
-static void bfq_prepare_request(struct request *rq, struct bio *bio)
+static void bfq_prepare_request(struct request *rq)
 {
 	/*
 	 * Regardless of whether we have an icq attached, we have to
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index bf62c25cde8f..3579ac0f6ec1 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -42,6 +42,9 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 	struct bio_set *bs = bio->bi_pool;
 	unsigned inline_vecs;
 
+	if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
+		return ERR_PTR(-EOPNOTSUPP);
+
 	if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
 		bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
 		inline_vecs = nr_vecs;
diff --git a/block/bio.c b/block/bio.c
index 21cbaa6a1c20..5235da6434aa 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -18,6 +18,7 @@
 #include <linux/blk-cgroup.h>
 #include <linux/highmem.h>
 #include <linux/sched/sysctl.h>
+#include <linux/blk-crypto.h>
 
 #include <trace/events/block.h>
 #include "blk.h"
@@ -237,6 +238,8 @@ void bio_uninit(struct bio *bio)
 
 	if (bio_integrity(bio))
 		bio_integrity_free(bio);
+
+	bio_crypt_free_ctx(bio);
 }
 EXPORT_SYMBOL(bio_uninit);
 
@@ -708,6 +711,8 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
 
 	__bio_clone_fast(b, bio);
 
+	bio_crypt_clone(b, bio, gfp_mask);
+
 	if (bio_integrity(bio)) {
 		int ret;
 
@@ -748,9 +753,14 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
 	return true;
 }
 
-static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
-		struct page *page, unsigned len, unsigned offset,
-		bool *same_page)
+/*
+ * Try to merge a page into a segment, while obeying the hardware segment
+ * size limit.  This is not for normal read/write bios, but for passthrough
+ * or Zone Append operations that we can't split.
+ */
+static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
+				 struct page *page, unsigned len,
+				 unsigned offset, bool *same_page)
 {
 	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 	unsigned long mask = queue_segment_boundary(q);
@@ -765,38 +775,32 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
 }
 
 /**
- *	__bio_add_pc_page	- attempt to add page to passthrough bio
- *	@q: the target queue
- *	@bio: destination bio
- *	@page: page to add
- *	@len: vec entry length
- *	@offset: vec entry offset
- *	@same_page: return if the merge happen inside the same page
- *
- *	Attempt to add a page to the bio_vec maplist. This can fail for a
- *	number of reasons, such as the bio being full or target block device
- *	limitations. The target block device must allow bio's up to PAGE_SIZE,
- *	so it is always possible to add a single page to an empty bio.
+ * bio_add_hw_page - attempt to add a page to a bio with hw constraints
+ * @q: the target queue
+ * @bio: destination bio
+ * @page: page to add
+ * @len: vec entry length
+ * @offset: vec entry offset
+ * @max_sectors: maximum number of sectors that can be added
+ * @same_page: return if the segment has been merged inside the same page
  *
- *	This should only be used by passthrough bios.
+ * Add a page to a bio while respecting the hardware max_sectors, max_segment
+ * and gap limitations.
  */
-int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
-		bool *same_page)
+		unsigned int max_sectors, bool *same_page)
 {
 	struct bio_vec *bvec;
 
-	/*
-	 * cloned bio must not modify vec list
-	 */
-	if (unlikely(bio_flagged(bio, BIO_CLONED)))
+	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return 0;
 
-	if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
+	if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
 		return 0;
 
 	if (bio->bi_vcnt > 0) {
-		if (bio_try_merge_pc_page(q, bio, page, len, offset, same_page))
+		if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
 			return len;
 
 		/*
@@ -823,11 +827,27 @@ int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
 	return len;
 }
 
+/**
+ * bio_add_pc_page	- attempt to add page to passthrough bio
+ * @q: the target queue
+ * @bio: destination bio
+ * @page: page to add
+ * @len: vec entry length
+ * @offset: vec entry offset
+ *
+ * Attempt to add a page to the bio_vec maplist. This can fail for a
+ * number of reasons, such as the bio being full or target block device
+ * limitations. The target block device must allow bio's up to PAGE_SIZE,
+ * so it is always possible to add a single page to an empty bio.
+ *
+ * This should only be used by passthrough bios.
+ */
 int bio_add_pc_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset)
 {
 	bool same_page = false;
-	return __bio_add_pc_page(q, bio, page, len, offset, &same_page);
+	return bio_add_hw_page(q, bio, page, len, offset,
+			queue_max_hw_sectors(q), &same_page);
 }
 EXPORT_SYMBOL(bio_add_pc_page);
 
@@ -936,6 +956,7 @@ void bio_release_pages(struct bio *bio, bool mark_dirty)
 		put_page(bvec->bv_page);
 	}
 }
+EXPORT_SYMBOL_GPL(bio_release_pages);
 
 static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
 {
@@ -1010,6 +1031,50 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	return 0;
 }
 
+static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
+	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+	struct request_queue *q = bio->bi_disk->queue;
+	unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
+	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
+	struct page **pages = (struct page **)bv;
+	ssize_t size, left;
+	unsigned len, i;
+	size_t offset;
+
+	if (WARN_ON_ONCE(!max_append_sectors))
+		return 0;
+
+	/*
+	 * Move page array up in the allocated memory for the bio vecs as far as
+	 * possible so that we can start filling biovecs from the beginning
+	 * without overwriting the temporary page array.
+	 */
+	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
+	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
+
+	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	if (unlikely(size <= 0))
+		return size ? size : -EFAULT;
+
+	for (left = size, i = 0; left > 0; left -= len, i++) {
+		struct page *page = pages[i];
+		bool same_page = false;
+
+		len = min_t(size_t, PAGE_SIZE - offset, left);
+		if (bio_add_hw_page(q, bio, page, len, offset,
+				max_append_sectors, &same_page) != len)
+			return -EINVAL;
+		if (same_page)
+			put_page(page);
+		offset = 0;
+	}
+
+	iov_iter_advance(iter, size);
+	return 0;
+}
+
 /**
  * bio_iov_iter_get_pages - add user or kernel pages to a bio
  * @bio: bio to add pages to
@@ -1039,16 +1104,23 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 		return -EINVAL;
 
 	do {
-		if (is_bvec)
-			ret = __bio_iov_bvec_add_pages(bio, iter);
-		else
-			ret = __bio_iov_iter_get_pages(bio, iter);
+		if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+			if (WARN_ON_ONCE(is_bvec))
+				return -EINVAL;
+			ret = __bio_iov_append_get_pages(bio, iter);
+		} else {
+			if (is_bvec)
+				ret = __bio_iov_bvec_add_pages(bio, iter);
+			else
+				ret = __bio_iov_iter_get_pages(bio, iter);
+		}
 	} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
 
 	if (is_bvec)
 		bio_set_flag(bio, BIO_NO_PAGE_REF);
 	return bio->bi_vcnt ? 0 : ret;
 }
+EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
 
 static void submit_bio_wait_endio(struct bio *bio)
 {
@@ -1105,6 +1177,7 @@ void bio_advance(struct bio *bio, unsigned bytes)
 	if (bio_integrity(bio))
 		bio_integrity_advance(bio, bytes);
 
+	bio_crypt_advance(bio, bytes);
 	bio_advance_iter(bio, &bio->bi_iter, bytes);
 }
 EXPORT_SYMBOL(bio_advance);
@@ -1303,55 +1376,6 @@ defer:
 	schedule_work(&bio_dirty_work);
 }
 
-void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
-{
-	unsigned long stamp;
-again:
-	stamp = READ_ONCE(part->stamp);
-	if (unlikely(stamp != now)) {
-		if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
-			__part_stat_add(part, io_ticks, end ? now - stamp : 1);
-		}
-	}
-	if (part->partno) {
-		part = &part_to_disk(part)->part0;
-		goto again;
-	}
-}
-
-void generic_start_io_acct(struct request_queue *q, int op,
-			   unsigned long sectors, struct hd_struct *part)
-{
-	const int sgrp = op_stat_group(op);
-
-	part_stat_lock();
-
-	update_io_ticks(part, jiffies, false);
-	part_stat_inc(part, ios[sgrp]);
-	part_stat_add(part, sectors[sgrp], sectors);
-	part_inc_in_flight(q, part, op_is_write(op));
-
-	part_stat_unlock();
-}
-EXPORT_SYMBOL(generic_start_io_acct);
-
-void generic_end_io_acct(struct request_queue *q, int req_op,
-			 struct hd_struct *part, unsigned long start_time)
-{
-	unsigned long now = jiffies;
-	unsigned long duration = now - start_time;
-	const int sgrp = op_stat_group(req_op);
-
-	part_stat_lock();
-
-	update_io_ticks(part, now, true);
-	part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
-	part_dec_in_flight(q, part, op_is_write(req_op));
-
-	part_stat_unlock();
-}
-EXPORT_SYMBOL(generic_end_io_acct);
-
 static inline bool bio_remaining_done(struct bio *bio)
 {
 	/*
@@ -1445,6 +1469,10 @@ struct bio *bio_split(struct bio *bio, int sectors,
 	BUG_ON(sectors <= 0);
 	BUG_ON(sectors >= bio_sectors(bio));
 
+	/* Zone append commands cannot be split */
+	if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
+		return NULL;
+
 	split = bio_clone_fast(bio, gfp, bs);
 	if (!split)
 		return NULL;
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 930212c1a512..0ecc897b225c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1530,6 +1530,10 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
 {
 	u64 old = atomic64_read(&blkg->delay_start);
 
+	/* negative use_delay means no scaling, see blkcg_set_delay() */
+	if (atomic_read(&blkg->use_delay) < 0)
+		return;
+
 	/*
 	 * We only want to scale down every second.  The idea here is that we
 	 * want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain
@@ -1717,6 +1721,8 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
  */
 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
 {
+	if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
+		return;
 	blkcg_scale_delay(blkg, now);
 	atomic64_add(delta, &blkg->delay_nsec);
 }
diff --git a/block/blk-core.c b/block/blk-core.c
index 9bfaee050c82..03252af8c82c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -20,6 +20,7 @@
 #include <linux/blk-mq.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
+#include <linux/pagemap.h>
 #include <linux/kernel_stat.h>
 #include <linux/string.h>
 #include <linux/init.h>
@@ -38,6 +39,8 @@
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
 #include <linux/psi.h>
+#include <linux/sched/sysctl.h>
+#include <linux/blk-crypto.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -120,6 +123,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	rq->start_time_ns = ktime_get_ns();
 	rq->part = NULL;
 	refcount_set(&rq->ref, 1);
+	blk_crypto_rq_set_defaults(rq);
 }
 EXPORT_SYMBOL(blk_rq_init);
 
@@ -135,6 +139,7 @@ static const char *const blk_op_name[] = {
 	REQ_OP_NAME(ZONE_OPEN),
 	REQ_OP_NAME(ZONE_CLOSE),
 	REQ_OP_NAME(ZONE_FINISH),
+	REQ_OP_NAME(ZONE_APPEND),
 	REQ_OP_NAME(WRITE_SAME),
 	REQ_OP_NAME(WRITE_ZEROES),
 	REQ_OP_NAME(SCSI_IN),
@@ -240,6 +245,17 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 
 	bio_advance(bio, nbytes);
 
+	if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
+		/*
+		 * Partial zone append completions cannot be supported as the
+		 * BIO fragments may end up not being written sequentially.
+		 */
+		if (bio->bi_iter.bi_size)
+			bio->bi_status = BLK_STS_IOERR;
+		else
+			bio->bi_iter.bi_sector = rq->__sector;
+	}
+
 	/* don't actually finish bio if it's part of flush sequence */
 	if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
 		bio_endio(bio);
@@ -440,6 +456,23 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 	}
 }
 
+static inline int bio_queue_enter(struct bio *bio)
+{
+	struct request_queue *q = bio->bi_disk->queue;
+	bool nowait = bio->bi_opf & REQ_NOWAIT;
+	int ret;
+
+	ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0);
+	if (unlikely(ret)) {
+		if (nowait && !blk_queue_dying(q))
+			bio_wouldblock_error(bio);
+		else
+			bio_io_error(bio);
+	}
+
+	return ret;
+}
+
 void blk_queue_exit(struct request_queue *q)
 {
 	percpu_ref_put(&q->q_usage_counter);
@@ -484,7 +517,7 @@ struct request_queue *__blk_alloc_queue(int node_id)
 	if (ret)
 		goto fail_id;
 
-	q->backing_dev_info = bdi_alloc_node(GFP_KERNEL, node_id);
+	q->backing_dev_info = bdi_alloc(node_id);
 	if (!q->backing_dev_info)
 		goto fail_split;
 
@@ -494,7 +527,6 @@ struct request_queue *__blk_alloc_queue(int node_id)
 
 	q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
 	q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
-	q->backing_dev_info->name = "block";
 	q->node = node_id;
 
 	timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
@@ -605,6 +637,16 @@ void blk_put_request(struct request *req)
 }
 EXPORT_SYMBOL(blk_put_request);
 
+static void blk_account_io_merge_bio(struct request *req)
+{
+	if (!blk_do_io_stat(req))
+		return;
+
+	part_stat_lock();
+	part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
+	part_stat_unlock();
+}
+
 bool bio_attempt_back_merge(struct request *req, struct bio *bio,
 		unsigned int nr_segs)
 {
@@ -623,7 +665,9 @@ bool bio_attempt_back_merge(struct request *req, struct bio *bio,
 	req->biotail = bio;
 	req->__data_len += bio->bi_iter.bi_size;
 
-	blk_account_io_start(req, false);
+	bio_crypt_free_ctx(bio);
+
+	blk_account_io_merge_bio(req);
 	return true;
 }
 
@@ -647,7 +691,9 @@ bool bio_attempt_front_merge(struct request *req, struct bio *bio,
 	req->__sector = bio->bi_iter.bi_sector;
 	req->__data_len += bio->bi_iter.bi_size;
 
-	blk_account_io_start(req, false);
+	bio_crypt_do_front_merge(req, bio);
+
+	blk_account_io_merge_bio(req);
 	return true;
 }
 
@@ -669,7 +715,7 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
 	req->__data_len += bio->bi_iter.bi_size;
 	req->nr_phys_segments = segments + 1;
 
-	blk_account_io_start(req, false);
+	blk_account_io_merge_bio(req);
 	return true;
 no_merge:
 	req_set_nomerge(q, req);
@@ -871,6 +917,41 @@ out:
 	return ret;
 }
 
+/*
+ * Check write append to a zoned block device.
+ */
+static inline blk_status_t blk_check_zone_append(struct request_queue *q,
+						 struct bio *bio)
+{
+	sector_t pos = bio->bi_iter.bi_sector;
+	int nr_sectors = bio_sectors(bio);
+
+	/* Only applicable to zoned block devices */
+	if (!blk_queue_is_zoned(q))
+		return BLK_STS_NOTSUPP;
+
+	/* The bio sector must point to the start of a sequential zone */
+	if (pos & (blk_queue_zone_sectors(q) - 1) ||
+	    !blk_queue_zone_is_seq(q, pos))
+		return BLK_STS_IOERR;
+
+	/*
+	 * Not allowed to cross zone boundaries. Otherwise, the BIO will be
+	 * split and could result in non-contiguous sectors being written in
+	 * different zones.
+	 */
+	if (nr_sectors > q->limits.chunk_sectors)
+		return BLK_STS_IOERR;
+
+	/* Make sure the BIO is small enough and will not get split */
+	if (nr_sectors > q->limits.max_zone_append_sectors)
+		return BLK_STS_IOERR;
+
+	bio->bi_opf |= REQ_NOMERGE;
+
+	return BLK_STS_OK;
+}
+
 static noinline_for_stack bool
 generic_make_request_checks(struct bio *bio)
 {
@@ -940,6 +1021,11 @@ generic_make_request_checks(struct bio *bio)
 		if (!q->limits.max_write_same_sectors)
 			goto not_supported;
 		break;
+	case REQ_OP_ZONE_APPEND:
+		status = blk_check_zone_append(q, bio);
+		if (status != BLK_STS_OK)
+			goto end_io;
+		break;
 	case REQ_OP_ZONE_RESET:
 	case REQ_OP_ZONE_OPEN:
 	case REQ_OP_ZONE_CLOSE:
@@ -960,12 +1046,13 @@ generic_make_request_checks(struct bio *bio)
 	}
 
 	/*
-	 * Various block parts want %current->io_context and lazy ioc
-	 * allocation ends up trading a lot of pain for a small amount of
-	 * memory.  Just allocate it upfront.  This may fail and block
-	 * layer knows how to live with it.
+	 * Various block parts want %current->io_context, so allocate it up
+	 * front rather than dealing with lots of pain to allocate it only
+	 * where needed. This may fail and the block layer knows how to live
+	 * with it.
 	 */
-	create_io_context(GFP_ATOMIC, q->node);
+	if (unlikely(!current->io_context))
+		create_task_io_context(current, GFP_ATOMIC, q->node);
 
 	if (!blkcg_bio_issue_check(q, bio))
 		return false;
@@ -987,29 +1074,28 @@ end_io:
 	return false;
 }
 
+static blk_qc_t do_make_request(struct bio *bio)
+{
+	struct request_queue *q = bio->bi_disk->queue;
+	blk_qc_t ret = BLK_QC_T_NONE;
+
+	if (blk_crypto_bio_prep(&bio)) {
+		if (!q->make_request_fn)
+			return blk_mq_make_request(q, bio);
+		ret = q->make_request_fn(q, bio);
+	}
+	blk_queue_exit(q);
+	return ret;
+}
+
 /**
- * generic_make_request - hand a buffer to its device driver for I/O
+ * generic_make_request - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
  *
- * generic_make_request() is used to make I/O requests of block
- * devices. It is passed a &struct bio, which describes the I/O that needs
- * to be done.
- *
- * generic_make_request() does not return any status.  The
- * success/failure status of the request, along with notification of
- * completion, is delivered asynchronously through the bio->bi_end_io
- * function described (one day) else where.
- *
- * The caller of generic_make_request must make sure that bi_io_vec
- * are set to describe the memory buffer, and that bi_dev and bi_sector are
- * set to describe the device address, and the
- * bi_end_io and optionally bi_private are set to describe how
- * completion notification should be signaled.
- *
- * generic_make_request and the drivers it calls may use bi_next if this
- * bio happens to be merged with someone else, and may resubmit the bio to
- * a lower device by calling into generic_make_request recursively, which
- * means the bio should NOT be touched after the call to ->make_request_fn.
+ * This is a version of submit_bio() that shall only be used for I/O that is
+ * resubmitted to lower level drivers by stacking block drivers.  All file
+ * systems and other upper level users of the block layer should use
+ * submit_bio() instead.
  */
 blk_qc_t generic_make_request(struct bio *bio)
 {
@@ -1060,18 +1146,14 @@ blk_qc_t generic_make_request(struct bio *bio)
 	current->bio_list = bio_list_on_stack;
 	do {
 		struct request_queue *q = bio->bi_disk->queue;
-		blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
-			BLK_MQ_REQ_NOWAIT : 0;
 
-		if (likely(blk_queue_enter(q, flags) == 0)) {
+		if (likely(bio_queue_enter(bio) == 0)) {
 			struct bio_list lower, same;
 
 			/* Create a fresh bio_list for all subordinate requests */
 			bio_list_on_stack[1] = bio_list_on_stack[0];
 			bio_list_init(&bio_list_on_stack[0]);
-			ret = q->make_request_fn(q, bio);
-
-			blk_queue_exit(q);
+			ret = do_make_request(bio);
 
 			/* sort new bios into those for a lower level
 			 * and those for the same level
@@ -1087,12 +1169,6 @@ blk_qc_t generic_make_request(struct bio *bio)
 			bio_list_merge(&bio_list_on_stack[0], &lower);
 			bio_list_merge(&bio_list_on_stack[0], &same);
 			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
-		} else {
-			if (unlikely(!blk_queue_dying(q) &&
-					(bio->bi_opf & REQ_NOWAIT)))
-				bio_wouldblock_error(bio);
-			else
-				bio_io_error(bio);
 		}
 		bio = bio_list_pop(&bio_list_on_stack[0]);
 	} while (bio);
@@ -1109,30 +1185,25 @@ EXPORT_SYMBOL(generic_make_request);
  *
  * This function behaves like generic_make_request(), but does not protect
  * against recursion.  Must only be used if the called driver is known
- * to not call generic_make_request (or direct_make_request) again from
- * its make_request function.  (Calling direct_make_request again from
- * a workqueue is perfectly fine as that doesn't recurse).
+ * to be blk-mq based.
  */
 blk_qc_t direct_make_request(struct bio *bio)
 {
 	struct request_queue *q = bio->bi_disk->queue;
-	bool nowait = bio->bi_opf & REQ_NOWAIT;
-	blk_qc_t ret;
 
+	if (WARN_ON_ONCE(q->make_request_fn)) {
+		bio_io_error(bio);
+		return BLK_QC_T_NONE;
+	}
 	if (!generic_make_request_checks(bio))
 		return BLK_QC_T_NONE;
-
-	if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
-		if (nowait && !blk_queue_dying(q))
-			bio_wouldblock_error(bio);
-		else
-			bio_io_error(bio);
+	if (unlikely(bio_queue_enter(bio)))
+		return BLK_QC_T_NONE;
+	if (!blk_crypto_bio_prep(&bio)) {
+		blk_queue_exit(q);
 		return BLK_QC_T_NONE;
 	}
-
-	ret = q->make_request_fn(q, bio);
-	blk_queue_exit(q);
-	return ret;
+	return blk_mq_make_request(q, bio);
 }
 EXPORT_SYMBOL_GPL(direct_make_request);
 
@@ -1140,17 +1211,17 @@ EXPORT_SYMBOL_GPL(direct_make_request);
  * submit_bio - submit a bio to the block device layer for I/O
  * @bio: The &struct bio which describes the I/O
  *
- * submit_bio() is very similar in purpose to generic_make_request(), and
- * uses that function to do most of the work. Both are fairly rough
- * interfaces; @bio must be presetup and ready for I/O.
+ * submit_bio() is used to submit I/O requests to block devices.  It is passed a
+ * fully set up &struct bio that describes the I/O that needs to be done.  The
+ * bio will be send to the device described by the bi_disk and bi_partno fields.
  *
+ * The success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the ->bi_end_io() callback
+ * in @bio.  The bio must NOT be touched by thecaller until ->bi_end_io() has
+ * been called.
  */
 blk_qc_t submit_bio(struct bio *bio)
 {
-	bool workingset_read = false;
-	unsigned long pflags;
-	blk_qc_t ret;
-
 	if (blkcg_punt_bio_submit(bio))
 		return BLK_QC_T_NONE;
 
@@ -1169,8 +1240,6 @@ blk_qc_t submit_bio(struct bio *bio)
 		if (op_is_write(bio_op(bio))) {
 			count_vm_events(PGPGOUT, count);
 		} else {
-			if (bio_flagged(bio, BIO_WORKINGSET))
-				workingset_read = true;
 			task_io_account_read(bio->bi_iter.bi_size);
 			count_vm_events(PGPGIN, count);
 		}
@@ -1186,20 +1255,24 @@ blk_qc_t submit_bio(struct bio *bio)
 	}
 
 	/*
-	 * If we're reading data that is part of the userspace
-	 * workingset, count submission time as memory stall. When the
-	 * device is congested, or the submitting cgroup IO-throttled,
-	 * submission can be a significant part of overall IO time.
+	 * If we're reading data that is part of the userspace workingset, count
+	 * submission time as memory stall.  When the device is congested, or
+	 * the submitting cgroup IO-throttled, submission can be a significant
+	 * part of overall IO time.
 	 */
-	if (workingset_read)
-		psi_memstall_enter(&pflags);
-
-	ret = generic_make_request(bio);
+	if (unlikely(bio_op(bio) == REQ_OP_READ &&
+	    bio_flagged(bio, BIO_WORKINGSET))) {
+		unsigned long pflags;
+		blk_qc_t ret;
 
-	if (workingset_read)
+		psi_memstall_enter(&pflags);
+		ret = generic_make_request(bio);
 		psi_memstall_leave(&pflags);
 
-	return ret;
+		return ret;
+	}
+
+	return generic_make_request(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 
@@ -1260,8 +1333,11 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
 	    should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
 		return BLK_STS_IOERR;
 
+	if (blk_crypto_insert_cloned_request(rq))
+		return BLK_STS_IOERR;
+
 	if (blk_queue_io_stat(q))
-		blk_account_io_start(rq, true);
+		blk_account_io_start(rq);
 
 	/*
 	 * Since we have a scheduler attached on the top device,
@@ -1313,7 +1389,22 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 
-void blk_account_io_completion(struct request *req, unsigned int bytes)
+static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
+{
+	unsigned long stamp;
+again:
+	stamp = READ_ONCE(part->stamp);
+	if (unlikely(stamp != now)) {
+		if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
+			__part_stat_add(part, io_ticks, end ? now - stamp : 1);
+	}
+	if (part->partno) {
+		part = &part_to_disk(part)->part0;
+		goto again;
+	}
+}
+
+static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (req->part && blk_do_io_stat(req)) {
 		const int sgrp = op_stat_group(req_op(req));
@@ -1344,48 +1435,57 @@ void blk_account_io_done(struct request *req, u64 now)
 		update_io_ticks(part, jiffies, true);
 		part_stat_inc(part, ios[sgrp]);
 		part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
-		part_dec_in_flight(req->q, part, rq_data_dir(req));
+		part_stat_unlock();
 
 		hd_struct_put(part);
-		part_stat_unlock();
 	}
 }
 
-void blk_account_io_start(struct request *rq, bool new_io)
+void blk_account_io_start(struct request *rq)
 {
-	struct hd_struct *part;
-	int rw = rq_data_dir(rq);
-
 	if (!blk_do_io_stat(rq))
 		return;
 
+	rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+
 	part_stat_lock();
+	update_io_ticks(rq->part, jiffies, false);
+	part_stat_unlock();
+}
 
-	if (!new_io) {
-		part = rq->part;
-		part_stat_inc(part, merges[rw]);
-	} else {
-		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
-		if (!hd_struct_try_get(part)) {
-			/*
-			 * The partition is already being removed,
-			 * the request will be accounted on the disk only
-			 *
-			 * We take a reference on disk->part0 although that
-			 * partition will never be deleted, so we can treat
-			 * it as any other partition.
-			 */
-			part = &rq->rq_disk->part0;
-			hd_struct_get(part);
-		}
-		part_inc_in_flight(rq->q, part, rw);
-		rq->part = part;
-	}
+unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
+		unsigned int op)
+{
+	struct hd_struct *part = &disk->part0;
+	const int sgrp = op_stat_group(op);
+	unsigned long now = READ_ONCE(jiffies);
+
+	part_stat_lock();
+	update_io_ticks(part, now, false);
+	part_stat_inc(part, ios[sgrp]);
+	part_stat_add(part, sectors[sgrp], sectors);
+	part_stat_local_inc(part, in_flight[op_is_write(op)]);
+	part_stat_unlock();
 
-	update_io_ticks(part, jiffies, false);
+	return now;
+}
+EXPORT_SYMBOL(disk_start_io_acct);
+
+void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+		unsigned long start_time)
+{
+	struct hd_struct *part = &disk->part0;
+	const int sgrp = op_stat_group(op);
+	unsigned long now = READ_ONCE(jiffies);
+	unsigned long duration = now - start_time;
 
+	part_stat_lock();
+	update_io_ticks(part, now, true);
+	part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
+	part_stat_local_dec(part, in_flight[op_is_write(op)]);
 	part_stat_unlock();
 }
+EXPORT_SYMBOL(disk_end_io_acct);
 
 /*
  * Steal bios from a request and add them to a bio list.
@@ -1635,7 +1735,9 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 	}
 	rq->nr_phys_segments = rq_src->nr_phys_segments;
 	rq->ioprio = rq_src->ioprio;
-	rq->extra_len = rq_src->extra_len;
+
+	if (rq->bio)
+		blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask);
 
 	return 0;
 
@@ -1777,6 +1879,18 @@ void blk_finish_plug(struct blk_plug *plug)
 }
 EXPORT_SYMBOL(blk_finish_plug);
 
+void blk_io_schedule(void)
+{
+	/* Prevent hang_check timer from firing at us during very long I/O */
+	unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2;
+
+	if (timeout)
+		io_schedule_timeout(timeout);
+	else
+		io_schedule();
+}
+EXPORT_SYMBOL_GPL(blk_io_schedule);
+
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
new file mode 100644
index 000000000000..6e49688a2d80
--- /dev/null
+++ b/block/blk-crypto-fallback.c
@@ -0,0 +1,657 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Google LLC
+ */
+
+/*
+ * Refer to Documentation/block/inline-encryption.rst for detailed explanation.
+ */
+
+#define pr_fmt(fmt) "blk-crypto-fallback: " fmt
+
+#include <crypto/skcipher.h>
+#include <linux/blk-cgroup.h>
+#include <linux/blk-crypto.h>
+#include <linux/blkdev.h>
+#include <linux/crypto.h>
+#include <linux/keyslot-manager.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/random.h>
+
+#include "blk-crypto-internal.h"
+
+static unsigned int num_prealloc_bounce_pg = 32;
+module_param(num_prealloc_bounce_pg, uint, 0);
+MODULE_PARM_DESC(num_prealloc_bounce_pg,
+		 "Number of preallocated bounce pages for the blk-crypto crypto API fallback");
+
+static unsigned int blk_crypto_num_keyslots = 100;
+module_param_named(num_keyslots, blk_crypto_num_keyslots, uint, 0);
+MODULE_PARM_DESC(num_keyslots,
+		 "Number of keyslots for the blk-crypto crypto API fallback");
+
+static unsigned int num_prealloc_fallback_crypt_ctxs = 128;
+module_param(num_prealloc_fallback_crypt_ctxs, uint, 0);
+MODULE_PARM_DESC(num_prealloc_crypt_fallback_ctxs,
+		 "Number of preallocated bio fallback crypto contexts for blk-crypto to use during crypto API fallback");
+
+struct bio_fallback_crypt_ctx {
+	struct bio_crypt_ctx crypt_ctx;
+	/*
+	 * Copy of the bvec_iter when this bio was submitted.
+	 * We only want to en/decrypt the part of the bio as described by the
+	 * bvec_iter upon submission because bio might be split before being
+	 * resubmitted
+	 */
+	struct bvec_iter crypt_iter;
+	union {
+		struct {
+			struct work_struct work;
+			struct bio *bio;
+		};
+		struct {
+			void *bi_private_orig;
+			bio_end_io_t *bi_end_io_orig;
+		};
+	};
+};
+
+static struct kmem_cache *bio_fallback_crypt_ctx_cache;
+static mempool_t *bio_fallback_crypt_ctx_pool;
+
+/*
+ * Allocating a crypto tfm during I/O can deadlock, so we have to preallocate
+ * all of a mode's tfms when that mode starts being used. Since each mode may
+ * need all the keyslots at some point, each mode needs its own tfm for each
+ * keyslot; thus, a keyslot may contain tfms for multiple modes.  However, to
+ * match the behavior of real inline encryption hardware (which only supports a
+ * single encryption context per keyslot), we only allow one tfm per keyslot to
+ * be used at a time - the rest of the unused tfms have their keys cleared.
+ */
+static DEFINE_MUTEX(tfms_init_lock);
+static bool tfms_inited[BLK_ENCRYPTION_MODE_MAX];
+
+static struct blk_crypto_keyslot {
+	enum blk_crypto_mode_num crypto_mode;
+	struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX];
+} *blk_crypto_keyslots;
+
+static struct blk_keyslot_manager blk_crypto_ksm;
+static struct workqueue_struct *blk_crypto_wq;
+static mempool_t *blk_crypto_bounce_page_pool;
+
+/*
+ * This is the key we set when evicting a keyslot. This *should* be the all 0's
+ * key, but AES-XTS rejects that key, so we use some random bytes instead.
+ */
+static u8 blank_key[BLK_CRYPTO_MAX_KEY_SIZE];
+
+static void blk_crypto_evict_keyslot(unsigned int slot)
+{
+	struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot];
+	enum blk_crypto_mode_num crypto_mode = slotp->crypto_mode;
+	int err;
+
+	WARN_ON(slotp->crypto_mode == BLK_ENCRYPTION_MODE_INVALID);
+
+	/* Clear the key in the skcipher */
+	err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], blank_key,
+				     blk_crypto_modes[crypto_mode].keysize);
+	WARN_ON(err);
+	slotp->crypto_mode = BLK_ENCRYPTION_MODE_INVALID;
+}
+
+static int blk_crypto_keyslot_program(struct blk_keyslot_manager *ksm,
+				      const struct blk_crypto_key *key,
+				      unsigned int slot)
+{
+	struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot];
+	const enum blk_crypto_mode_num crypto_mode =
+						key->crypto_cfg.crypto_mode;
+	int err;
+
+	if (crypto_mode != slotp->crypto_mode &&
+	    slotp->crypto_mode != BLK_ENCRYPTION_MODE_INVALID)
+		blk_crypto_evict_keyslot(slot);
+
+	slotp->crypto_mode = crypto_mode;
+	err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key->raw,
+				     key->size);
+	if (err) {
+		blk_crypto_evict_keyslot(slot);
+		return err;
+	}
+	return 0;
+}
+
+static int blk_crypto_keyslot_evict(struct blk_keyslot_manager *ksm,
+				    const struct blk_crypto_key *key,
+				    unsigned int slot)
+{
+	blk_crypto_evict_keyslot(slot);
+	return 0;
+}
+
+/*
+ * The crypto API fallback KSM ops - only used for a bio when it specifies a
+ * blk_crypto_key that was not supported by the device's inline encryption
+ * hardware.
+ */
+static const struct blk_ksm_ll_ops blk_crypto_ksm_ll_ops = {
+	.keyslot_program	= blk_crypto_keyslot_program,
+	.keyslot_evict		= blk_crypto_keyslot_evict,
+};
+
+static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio)
+{
+	struct bio *src_bio = enc_bio->bi_private;
+	int i;
+
+	for (i = 0; i < enc_bio->bi_vcnt; i++)
+		mempool_free(enc_bio->bi_io_vec[i].bv_page,
+			     blk_crypto_bounce_page_pool);
+
+	src_bio->bi_status = enc_bio->bi_status;
+
+	bio_put(enc_bio);
+	bio_endio(src_bio);
+}
+
+static struct bio *blk_crypto_clone_bio(struct bio *bio_src)
+{
+	struct bvec_iter iter;
+	struct bio_vec bv;
+	struct bio *bio;
+
+	bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src), NULL);
+	if (!bio)
+		return NULL;
+	bio->bi_disk		= bio_src->bi_disk;
+	bio->bi_opf		= bio_src->bi_opf;
+	bio->bi_ioprio		= bio_src->bi_ioprio;
+	bio->bi_write_hint	= bio_src->bi_write_hint;
+	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
+	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
+
+	bio_for_each_segment(bv, bio_src, iter)
+		bio->bi_io_vec[bio->bi_vcnt++] = bv;
+
+	bio_clone_blkg_association(bio, bio_src);
+	blkcg_bio_issue_init(bio);
+
+	return bio;
+}
+
+static bool blk_crypto_alloc_cipher_req(struct blk_ksm_keyslot *slot,
+					struct skcipher_request **ciph_req_ret,
+					struct crypto_wait *wait)
+{
+	struct skcipher_request *ciph_req;
+	const struct blk_crypto_keyslot *slotp;
+	int keyslot_idx = blk_ksm_get_slot_idx(slot);
+
+	slotp = &blk_crypto_keyslots[keyslot_idx];
+	ciph_req = skcipher_request_alloc(slotp->tfms[slotp->crypto_mode],
+					  GFP_NOIO);
+	if (!ciph_req)
+		return false;
+
+	skcipher_request_set_callback(ciph_req,
+				      CRYPTO_TFM_REQ_MAY_BACKLOG |
+				      CRYPTO_TFM_REQ_MAY_SLEEP,
+				      crypto_req_done, wait);
+	*ciph_req_ret = ciph_req;
+
+	return true;
+}
+
+static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr)
+{
+	struct bio *bio = *bio_ptr;
+	unsigned int i = 0;
+	unsigned int num_sectors = 0;
+	struct bio_vec bv;
+	struct bvec_iter iter;
+
+	bio_for_each_segment(bv, bio, iter) {
+		num_sectors += bv.bv_len >> SECTOR_SHIFT;
+		if (++i == BIO_MAX_PAGES)
+			break;
+	}
+	if (num_sectors < bio_sectors(bio)) {
+		struct bio *split_bio;
+
+		split_bio = bio_split(bio, num_sectors, GFP_NOIO, NULL);
+		if (!split_bio) {
+			bio->bi_status = BLK_STS_RESOURCE;
+			return false;
+		}
+		bio_chain(split_bio, bio);
+		generic_make_request(bio);
+		*bio_ptr = split_bio;
+	}
+
+	return true;
+}
+
+union blk_crypto_iv {
+	__le64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
+	u8 bytes[BLK_CRYPTO_MAX_IV_SIZE];
+};
+
+static void blk_crypto_dun_to_iv(const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
+				 union blk_crypto_iv *iv)
+{
+	int i;
+
+	for (i = 0; i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++)
+		iv->dun[i] = cpu_to_le64(dun[i]);
+}
+
+/*
+ * The crypto API fallback's encryption routine.
+ * Allocate a bounce bio for encryption, encrypt the input bio using crypto API,
+ * and replace *bio_ptr with the bounce bio. May split input bio if it's too
+ * large. Returns true on success. Returns false and sets bio->bi_status on
+ * error.
+ */
+static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
+{
+	struct bio *src_bio, *enc_bio;
+	struct bio_crypt_ctx *bc;
+	struct blk_ksm_keyslot *slot;
+	int data_unit_size;
+	struct skcipher_request *ciph_req = NULL;
+	DECLARE_CRYPTO_WAIT(wait);
+	u64 curr_dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
+	struct scatterlist src, dst;
+	union blk_crypto_iv iv;
+	unsigned int i, j;
+	bool ret = false;
+	blk_status_t blk_st;
+
+	/* Split the bio if it's too big for single page bvec */
+	if (!blk_crypto_split_bio_if_needed(bio_ptr))
+		return false;
+
+	src_bio = *bio_ptr;
+	bc = src_bio->bi_crypt_context;
+	data_unit_size = bc->bc_key->crypto_cfg.data_unit_size;
+
+	/* Allocate bounce bio for encryption */
+	enc_bio = blk_crypto_clone_bio(src_bio);
+	if (!enc_bio) {
+		src_bio->bi_status = BLK_STS_RESOURCE;
+		return false;
+	}
+
+	/*
+	 * Use the crypto API fallback keyslot manager to get a crypto_skcipher
+	 * for the algorithm and key specified for this bio.
+	 */
+	blk_st = blk_ksm_get_slot_for_key(&blk_crypto_ksm, bc->bc_key, &slot);
+	if (blk_st != BLK_STS_OK) {
+		src_bio->bi_status = blk_st;
+		goto out_put_enc_bio;
+	}
+
+	/* and then allocate an skcipher_request for it */
+	if (!blk_crypto_alloc_cipher_req(slot, &ciph_req, &wait)) {
+		src_bio->bi_status = BLK_STS_RESOURCE;
+		goto out_release_keyslot;
+	}
+
+	memcpy(curr_dun, bc->bc_dun, sizeof(curr_dun));
+	sg_init_table(&src, 1);
+	sg_init_table(&dst, 1);
+
+	skcipher_request_set_crypt(ciph_req, &src, &dst, data_unit_size,
+				   iv.bytes);
+
+	/* Encrypt each page in the bounce bio */
+	for (i = 0; i < enc_bio->bi_vcnt; i++) {
+		struct bio_vec *enc_bvec = &enc_bio->bi_io_vec[i];
+		struct page *plaintext_page = enc_bvec->bv_page;
+		struct page *ciphertext_page =
+			mempool_alloc(blk_crypto_bounce_page_pool, GFP_NOIO);
+
+		enc_bvec->bv_page = ciphertext_page;
+
+		if (!ciphertext_page) {
+			src_bio->bi_status = BLK_STS_RESOURCE;
+			goto out_free_bounce_pages;
+		}
+
+		sg_set_page(&src, plaintext_page, data_unit_size,
+			    enc_bvec->bv_offset);
+		sg_set_page(&dst, ciphertext_page, data_unit_size,
+			    enc_bvec->bv_offset);
+
+		/* Encrypt each data unit in this page */
+		for (j = 0; j < enc_bvec->bv_len; j += data_unit_size) {
+			blk_crypto_dun_to_iv(curr_dun, &iv);
+			if (crypto_wait_req(crypto_skcipher_encrypt(ciph_req),
+					    &wait)) {
+				i++;
+				src_bio->bi_status = BLK_STS_IOERR;
+				goto out_free_bounce_pages;
+			}
+			bio_crypt_dun_increment(curr_dun, 1);
+			src.offset += data_unit_size;
+			dst.offset += data_unit_size;
+		}
+	}
+
+	enc_bio->bi_private = src_bio;
+	enc_bio->bi_end_io = blk_crypto_fallback_encrypt_endio;
+	*bio_ptr = enc_bio;
+	ret = true;
+
+	enc_bio = NULL;
+	goto out_free_ciph_req;
+
+out_free_bounce_pages:
+	while (i > 0)
+		mempool_free(enc_bio->bi_io_vec[--i].bv_page,
+			     blk_crypto_bounce_page_pool);
+out_free_ciph_req:
+	skcipher_request_free(ciph_req);
+out_release_keyslot:
+	blk_ksm_put_slot(slot);
+out_put_enc_bio:
+	if (enc_bio)
+		bio_put(enc_bio);
+
+	return ret;
+}
+
+/*
+ * The crypto API fallback's main decryption routine.
+ * Decrypts input bio in place, and calls bio_endio on the bio.
+ */
+static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
+{
+	struct bio_fallback_crypt_ctx *f_ctx =
+		container_of(work, struct bio_fallback_crypt_ctx, work);
+	struct bio *bio = f_ctx->bio;
+	struct bio_crypt_ctx *bc = &f_ctx->crypt_ctx;
+	struct blk_ksm_keyslot *slot;
+	struct skcipher_request *ciph_req = NULL;
+	DECLARE_CRYPTO_WAIT(wait);
+	u64 curr_dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
+	union blk_crypto_iv iv;
+	struct scatterlist sg;
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	const int data_unit_size = bc->bc_key->crypto_cfg.data_unit_size;
+	unsigned int i;
+	blk_status_t blk_st;
+
+	/*
+	 * Use the crypto API fallback keyslot manager to get a crypto_skcipher
+	 * for the algorithm and key specified for this bio.
+	 */
+	blk_st = blk_ksm_get_slot_for_key(&blk_crypto_ksm, bc->bc_key, &slot);
+	if (blk_st != BLK_STS_OK) {
+		bio->bi_status = blk_st;
+		goto out_no_keyslot;
+	}
+
+	/* and then allocate an skcipher_request for it */
+	if (!blk_crypto_alloc_cipher_req(slot, &ciph_req, &wait)) {
+		bio->bi_status = BLK_STS_RESOURCE;
+		goto out;
+	}
+
+	memcpy(curr_dun, bc->bc_dun, sizeof(curr_dun));
+	sg_init_table(&sg, 1);
+	skcipher_request_set_crypt(ciph_req, &sg, &sg, data_unit_size,
+				   iv.bytes);
+
+	/* Decrypt each segment in the bio */
+	__bio_for_each_segment(bv, bio, iter, f_ctx->crypt_iter) {
+		struct page *page = bv.bv_page;
+
+		sg_set_page(&sg, page, data_unit_size, bv.bv_offset);
+
+		/* Decrypt each data unit in the segment */
+		for (i = 0; i < bv.bv_len; i += data_unit_size) {
+			blk_crypto_dun_to_iv(curr_dun, &iv);
+			if (crypto_wait_req(crypto_skcipher_decrypt(ciph_req),
+					    &wait)) {
+				bio->bi_status = BLK_STS_IOERR;
+				goto out;
+			}
+			bio_crypt_dun_increment(curr_dun, 1);
+			sg.offset += data_unit_size;
+		}
+	}
+
+out:
+	skcipher_request_free(ciph_req);
+	blk_ksm_put_slot(slot);
+out_no_keyslot:
+	mempool_free(f_ctx, bio_fallback_crypt_ctx_pool);
+	bio_endio(bio);
+}
+
+/**
+ * blk_crypto_fallback_decrypt_endio - queue bio for fallback decryption
+ *
+ * @bio: the bio to queue
+ *
+ * Restore bi_private and bi_end_io, and queue the bio for decryption into a
+ * workqueue, since this function will be called from an atomic context.
+ */
+static void blk_crypto_fallback_decrypt_endio(struct bio *bio)
+{
+	struct bio_fallback_crypt_ctx *f_ctx = bio->bi_private;
+
+	bio->bi_private = f_ctx->bi_private_orig;
+	bio->bi_end_io = f_ctx->bi_end_io_orig;
+
+	/* If there was an IO error, don't queue for decrypt. */
+	if (bio->bi_status) {
+		mempool_free(f_ctx, bio_fallback_crypt_ctx_pool);
+		bio_endio(bio);
+		return;
+	}
+
+	INIT_WORK(&f_ctx->work, blk_crypto_fallback_decrypt_bio);
+	f_ctx->bio = bio;
+	queue_work(blk_crypto_wq, &f_ctx->work);
+}
+
+/**
+ * blk_crypto_fallback_bio_prep - Prepare a bio to use fallback en/decryption
+ *
+ * @bio_ptr: pointer to the bio to prepare
+ *
+ * If bio is doing a WRITE operation, this splits the bio into two parts if it's
+ * too big (see blk_crypto_split_bio_if_needed). It then allocates a bounce bio
+ * for the first part, encrypts it, and update bio_ptr to point to the bounce
+ * bio.
+ *
+ * For a READ operation, we mark the bio for decryption by using bi_private and
+ * bi_end_io.
+ *
+ * In either case, this function will make the bio look like a regular bio (i.e.
+ * as if no encryption context was ever specified) for the purposes of the rest
+ * of the stack except for blk-integrity (blk-integrity and blk-crypto are not
+ * currently supported together).
+ *
+ * Return: true on success. Sets bio->bi_status and returns false on error.
+ */
+bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
+{
+	struct bio *bio = *bio_ptr;
+	struct bio_crypt_ctx *bc = bio->bi_crypt_context;
+	struct bio_fallback_crypt_ctx *f_ctx;
+
+	if (WARN_ON_ONCE(!tfms_inited[bc->bc_key->crypto_cfg.crypto_mode])) {
+		/* User didn't call blk_crypto_start_using_key() first */
+		bio->bi_status = BLK_STS_IOERR;
+		return false;
+	}
+
+	if (!blk_ksm_crypto_cfg_supported(&blk_crypto_ksm,
+					  &bc->bc_key->crypto_cfg)) {
+		bio->bi_status = BLK_STS_NOTSUPP;
+		return false;
+	}
+
+	if (bio_data_dir(bio) == WRITE)
+		return blk_crypto_fallback_encrypt_bio(bio_ptr);
+
+	/*
+	 * bio READ case: Set up a f_ctx in the bio's bi_private and set the
+	 * bi_end_io appropriately to trigger decryption when the bio is ended.
+	 */
+	f_ctx = mempool_alloc(bio_fallback_crypt_ctx_pool, GFP_NOIO);
+	f_ctx->crypt_ctx = *bc;
+	f_ctx->crypt_iter = bio->bi_iter;
+	f_ctx->bi_private_orig = bio->bi_private;
+	f_ctx->bi_end_io_orig = bio->bi_end_io;
+	bio->bi_private = (void *)f_ctx;
+	bio->bi_end_io = blk_crypto_fallback_decrypt_endio;
+	bio_crypt_free_ctx(bio);
+
+	return true;
+}
+
+int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key)
+{
+	return blk_ksm_evict_key(&blk_crypto_ksm, key);
+}
+
+static bool blk_crypto_fallback_inited;
+static int blk_crypto_fallback_init(void)
+{
+	int i;
+	int err;
+
+	if (blk_crypto_fallback_inited)
+		return 0;
+
+	prandom_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE);
+
+	err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+	if (err)
+		goto out;
+	err = -ENOMEM;
+
+	blk_crypto_ksm.ksm_ll_ops = blk_crypto_ksm_ll_ops;
+	blk_crypto_ksm.max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
+
+	/* All blk-crypto modes have a crypto API fallback. */
+	for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++)
+		blk_crypto_ksm.crypto_modes_supported[i] = 0xFFFFFFFF;
+	blk_crypto_ksm.crypto_modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
+
+	blk_crypto_wq = alloc_workqueue("blk_crypto_wq",
+					WQ_UNBOUND | WQ_HIGHPRI |
+					WQ_MEM_RECLAIM, num_online_cpus());
+	if (!blk_crypto_wq)
+		goto fail_free_ksm;
+
+	blk_crypto_keyslots = kcalloc(blk_crypto_num_keyslots,
+				      sizeof(blk_crypto_keyslots[0]),
+				      GFP_KERNEL);
+	if (!blk_crypto_keyslots)
+		goto fail_free_wq;
+
+	blk_crypto_bounce_page_pool =
+		mempool_create_page_pool(num_prealloc_bounce_pg, 0);
+	if (!blk_crypto_bounce_page_pool)
+		goto fail_free_keyslots;
+
+	bio_fallback_crypt_ctx_cache = KMEM_CACHE(bio_fallback_crypt_ctx, 0);
+	if (!bio_fallback_crypt_ctx_cache)
+		goto fail_free_bounce_page_pool;
+
+	bio_fallback_crypt_ctx_pool =
+		mempool_create_slab_pool(num_prealloc_fallback_crypt_ctxs,
+					 bio_fallback_crypt_ctx_cache);
+	if (!bio_fallback_crypt_ctx_pool)
+		goto fail_free_crypt_ctx_cache;
+
+	blk_crypto_fallback_inited = true;
+
+	return 0;
+fail_free_crypt_ctx_cache:
+	kmem_cache_destroy(bio_fallback_crypt_ctx_cache);
+fail_free_bounce_page_pool:
+	mempool_destroy(blk_crypto_bounce_page_pool);
+fail_free_keyslots:
+	kfree(blk_crypto_keyslots);
+fail_free_wq:
+	destroy_workqueue(blk_crypto_wq);
+fail_free_ksm:
+	blk_ksm_destroy(&blk_crypto_ksm);
+out:
+	return err;
+}
+
+/*
+ * Prepare blk-crypto-fallback for the specified crypto mode.
+ * Returns -ENOPKG if the needed crypto API support is missing.
+ */
+int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num)
+{
+	const char *cipher_str = blk_crypto_modes[mode_num].cipher_str;
+	struct blk_crypto_keyslot *slotp;
+	unsigned int i;
+	int err = 0;
+
+	/*
+	 * Fast path
+	 * Ensure that updates to blk_crypto_keyslots[i].tfms[mode_num]
+	 * for each i are visible before we try to access them.
+	 */
+	if (likely(smp_load_acquire(&tfms_inited[mode_num])))
+		return 0;
+
+	mutex_lock(&tfms_init_lock);
+	if (tfms_inited[mode_num])
+		goto out;
+
+	err = blk_crypto_fallback_init();
+	if (err)
+		goto out;
+
+	for (i = 0; i < blk_crypto_num_keyslots; i++) {
+		slotp = &blk_crypto_keyslots[i];
+		slotp->tfms[mode_num] = crypto_alloc_skcipher(cipher_str, 0, 0);
+		if (IS_ERR(slotp->tfms[mode_num])) {
+			err = PTR_ERR(slotp->tfms[mode_num]);
+			if (err == -ENOENT) {
+				pr_warn_once("Missing crypto API support for \"%s\"\n",
+					     cipher_str);
+				err = -ENOPKG;
+			}
+			slotp->tfms[mode_num] = NULL;
+			goto out_free_tfms;
+		}
+
+		crypto_skcipher_set_flags(slotp->tfms[mode_num],
+					  CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
+	}
+
+	/*
+	 * Ensure that updates to blk_crypto_keyslots[i].tfms[mode_num]
+	 * for each i are visible before we set tfms_inited[mode_num].
+	 */
+	smp_store_release(&tfms_inited[mode_num], true);
+	goto out;
+
+out_free_tfms:
+	for (i = 0; i < blk_crypto_num_keyslots; i++) {
+		slotp = &blk_crypto_keyslots[i];
+		crypto_free_skcipher(slotp->tfms[mode_num]);
+		slotp->tfms[mode_num] = NULL;
+	}
+out:
+	mutex_unlock(&tfms_init_lock);
+	return err;
+}
diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h
new file mode 100644
index 000000000000..d2b0f565d83c
--- /dev/null
+++ b/block/blk-crypto-internal.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2019 Google LLC
+ */
+
+#ifndef __LINUX_BLK_CRYPTO_INTERNAL_H
+#define __LINUX_BLK_CRYPTO_INTERNAL_H
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+
+/* Represents a crypto mode supported by blk-crypto  */
+struct blk_crypto_mode {
+	const char *cipher_str; /* crypto API name (for fallback case) */
+	unsigned int keysize; /* key size in bytes */
+	unsigned int ivsize; /* iv size in bytes */
+};
+
+extern const struct blk_crypto_mode blk_crypto_modes[];
+
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+
+void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
+			     unsigned int inc);
+
+bool bio_crypt_rq_ctx_compatible(struct request *rq, struct bio *bio);
+
+bool bio_crypt_ctx_mergeable(struct bio_crypt_ctx *bc1, unsigned int bc1_bytes,
+			     struct bio_crypt_ctx *bc2);
+
+static inline bool bio_crypt_ctx_back_mergeable(struct request *req,
+						struct bio *bio)
+{
+	return bio_crypt_ctx_mergeable(req->crypt_ctx, blk_rq_bytes(req),
+				       bio->bi_crypt_context);
+}
+
+static inline bool bio_crypt_ctx_front_mergeable(struct request *req,
+						 struct bio *bio)
+{
+	return bio_crypt_ctx_mergeable(bio->bi_crypt_context,
+				       bio->bi_iter.bi_size, req->crypt_ctx);
+}
+
+static inline bool bio_crypt_ctx_merge_rq(struct request *req,
+					  struct request *next)
+{
+	return bio_crypt_ctx_mergeable(req->crypt_ctx, blk_rq_bytes(req),
+				       next->crypt_ctx);
+}
+
+static inline void blk_crypto_rq_set_defaults(struct request *rq)
+{
+	rq->crypt_ctx = NULL;
+	rq->crypt_keyslot = NULL;
+}
+
+static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
+{
+	return rq->crypt_ctx;
+}
+
+#else /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+static inline bool bio_crypt_rq_ctx_compatible(struct request *rq,
+					       struct bio *bio)
+{
+	return true;
+}
+
+static inline bool bio_crypt_ctx_front_mergeable(struct request *req,
+						 struct bio *bio)
+{
+	return true;
+}
+
+static inline bool bio_crypt_ctx_back_mergeable(struct request *req,
+						struct bio *bio)
+{
+	return true;
+}
+
+static inline bool bio_crypt_ctx_merge_rq(struct request *req,
+					  struct request *next)
+{
+	return true;
+}
+
+static inline void blk_crypto_rq_set_defaults(struct request *rq) { }
+
+static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
+{
+	return false;
+}
+
+#endif /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+void __bio_crypt_advance(struct bio *bio, unsigned int bytes);
+static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes)
+{
+	if (bio_has_crypt_ctx(bio))
+		__bio_crypt_advance(bio, bytes);
+}
+
+void __bio_crypt_free_ctx(struct bio *bio);
+static inline void bio_crypt_free_ctx(struct bio *bio)
+{
+	if (bio_has_crypt_ctx(bio))
+		__bio_crypt_free_ctx(bio);
+}
+
+static inline void bio_crypt_do_front_merge(struct request *rq,
+					    struct bio *bio)
+{
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+	if (bio_has_crypt_ctx(bio))
+		memcpy(rq->crypt_ctx->bc_dun, bio->bi_crypt_context->bc_dun,
+		       sizeof(rq->crypt_ctx->bc_dun));
+#endif
+}
+
+bool __blk_crypto_bio_prep(struct bio **bio_ptr);
+static inline bool blk_crypto_bio_prep(struct bio **bio_ptr)
+{
+	if (bio_has_crypt_ctx(*bio_ptr))
+		return __blk_crypto_bio_prep(bio_ptr);
+	return true;
+}
+
+blk_status_t __blk_crypto_init_request(struct request *rq);
+static inline blk_status_t blk_crypto_init_request(struct request *rq)
+{
+	if (blk_crypto_rq_is_encrypted(rq))
+		return __blk_crypto_init_request(rq);
+	return BLK_STS_OK;
+}
+
+void __blk_crypto_free_request(struct request *rq);
+static inline void blk_crypto_free_request(struct request *rq)
+{
+	if (blk_crypto_rq_is_encrypted(rq))
+		__blk_crypto_free_request(rq);
+}
+
+void __blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
+			      gfp_t gfp_mask);
+static inline void blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
+					  gfp_t gfp_mask)
+{
+	if (bio_has_crypt_ctx(bio))
+		__blk_crypto_rq_bio_prep(rq, bio, gfp_mask);
+}
+
+/**
+ * blk_crypto_insert_cloned_request - Prepare a cloned request to be inserted
+ *				      into a request queue.
+ * @rq: the request being queued
+ *
+ * Return: BLK_STS_OK on success, nonzero on error.
+ */
+static inline blk_status_t blk_crypto_insert_cloned_request(struct request *rq)
+{
+
+	if (blk_crypto_rq_is_encrypted(rq))
+		return blk_crypto_init_request(rq);
+	return BLK_STS_OK;
+}
+
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK
+
+int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num);
+
+bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr);
+
+int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key);
+
+#else /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */
+
+static inline int
+blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num)
+{
+	pr_warn_once("crypto API fallback is disabled\n");
+	return -ENOPKG;
+}
+
+static inline bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
+{
+	pr_warn_once("crypto API fallback disabled; failing request.\n");
+	(*bio_ptr)->bi_status = BLK_STS_NOTSUPP;
+	return false;
+}
+
+static inline int
+blk_crypto_fallback_evict_key(const struct blk_crypto_key *key)
+{
+	return 0;
+}
+
+#endif /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */
+
+#endif /* __LINUX_BLK_CRYPTO_INTERNAL_H */
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
new file mode 100644
index 000000000000..6533c9b36ab8
--- /dev/null
+++ b/block/blk-crypto.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Google LLC
+ */
+
+/*
+ * Refer to Documentation/block/inline-encryption.rst for detailed explanation.
+ */
+
+#define pr_fmt(fmt) "blk-crypto: " fmt
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/keyslot-manager.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "blk-crypto-internal.h"
+
+const struct blk_crypto_mode blk_crypto_modes[] = {
+	[BLK_ENCRYPTION_MODE_AES_256_XTS] = {
+		.cipher_str = "xts(aes)",
+		.keysize = 64,
+		.ivsize = 16,
+	},
+	[BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV] = {
+		.cipher_str = "essiv(cbc(aes),sha256)",
+		.keysize = 16,
+		.ivsize = 16,
+	},
+	[BLK_ENCRYPTION_MODE_ADIANTUM] = {
+		.cipher_str = "adiantum(xchacha12,aes)",
+		.keysize = 32,
+		.ivsize = 32,
+	},
+};
+
+/*
+ * This number needs to be at least (the number of threads doing IO
+ * concurrently) * (maximum recursive depth of a bio), so that we don't
+ * deadlock on crypt_ctx allocations. The default is chosen to be the same
+ * as the default number of post read contexts in both EXT4 and F2FS.
+ */
+static int num_prealloc_crypt_ctxs = 128;
+
+module_param(num_prealloc_crypt_ctxs, int, 0444);
+MODULE_PARM_DESC(num_prealloc_crypt_ctxs,
+		"Number of bio crypto contexts to preallocate");
+
+static struct kmem_cache *bio_crypt_ctx_cache;
+static mempool_t *bio_crypt_ctx_pool;
+
+static int __init bio_crypt_ctx_init(void)
+{
+	size_t i;
+
+	bio_crypt_ctx_cache = KMEM_CACHE(bio_crypt_ctx, 0);
+	if (!bio_crypt_ctx_cache)
+		goto out_no_mem;
+
+	bio_crypt_ctx_pool = mempool_create_slab_pool(num_prealloc_crypt_ctxs,
+						      bio_crypt_ctx_cache);
+	if (!bio_crypt_ctx_pool)
+		goto out_no_mem;
+
+	/* This is assumed in various places. */
+	BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0);
+
+	/* Sanity check that no algorithm exceeds the defined limits. */
+	for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) {
+		BUG_ON(blk_crypto_modes[i].keysize > BLK_CRYPTO_MAX_KEY_SIZE);
+		BUG_ON(blk_crypto_modes[i].ivsize > BLK_CRYPTO_MAX_IV_SIZE);
+	}
+
+	return 0;
+out_no_mem:
+	panic("Failed to allocate mem for bio crypt ctxs\n");
+}
+subsys_initcall(bio_crypt_ctx_init);
+
+void bio_crypt_set_ctx(struct bio *bio, const struct blk_crypto_key *key,
+		       const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], gfp_t gfp_mask)
+{
+	struct bio_crypt_ctx *bc = mempool_alloc(bio_crypt_ctx_pool, gfp_mask);
+
+	bc->bc_key = key;
+	memcpy(bc->bc_dun, dun, sizeof(bc->bc_dun));
+
+	bio->bi_crypt_context = bc;
+}
+
+void __bio_crypt_free_ctx(struct bio *bio)
+{
+	mempool_free(bio->bi_crypt_context, bio_crypt_ctx_pool);
+	bio->bi_crypt_context = NULL;
+}
+
+void __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask)
+{
+	dst->bi_crypt_context = mempool_alloc(bio_crypt_ctx_pool, gfp_mask);
+	*dst->bi_crypt_context = *src->bi_crypt_context;
+}
+EXPORT_SYMBOL_GPL(__bio_crypt_clone);
+
+/* Increments @dun by @inc, treating @dun as a multi-limb integer. */
+void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
+			     unsigned int inc)
+{
+	int i;
+
+	for (i = 0; inc && i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) {
+		dun[i] += inc;
+		/*
+		 * If the addition in this limb overflowed, then we need to
+		 * carry 1 into the next limb. Else the carry is 0.
+		 */
+		if (dun[i] < inc)
+			inc = 1;
+		else
+			inc = 0;
+	}
+}
+
+void __bio_crypt_advance(struct bio *bio, unsigned int bytes)
+{
+	struct bio_crypt_ctx *bc = bio->bi_crypt_context;
+
+	bio_crypt_dun_increment(bc->bc_dun,
+				bytes >> bc->bc_key->data_unit_size_bits);
+}
+
+/*
+ * Returns true if @bc->bc_dun plus @bytes converted to data units is equal to
+ * @next_dun, treating the DUNs as multi-limb integers.
+ */
+bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc,
+				 unsigned int bytes,
+				 const u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE])
+{
+	int i;
+	unsigned int carry = bytes >> bc->bc_key->data_unit_size_bits;
+
+	for (i = 0; i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) {
+		if (bc->bc_dun[i] + carry != next_dun[i])
+			return false;
+		/*
+		 * If the addition in this limb overflowed, then we need to
+		 * carry 1 into the next limb. Else the carry is 0.
+		 */
+		if ((bc->bc_dun[i] + carry) < carry)
+			carry = 1;
+		else
+			carry = 0;
+	}
+
+	/* If the DUN wrapped through 0, don't treat it as contiguous. */
+	return carry == 0;
+}
+
+/*
+ * Checks that two bio crypt contexts are compatible - i.e. that
+ * they are mergeable except for data_unit_num continuity.
+ */
+static bool bio_crypt_ctx_compatible(struct bio_crypt_ctx *bc1,
+				     struct bio_crypt_ctx *bc2)
+{
+	if (!bc1)
+		return !bc2;
+
+	return bc2 && bc1->bc_key == bc2->bc_key;
+}
+
+bool bio_crypt_rq_ctx_compatible(struct request *rq, struct bio *bio)
+{
+	return bio_crypt_ctx_compatible(rq->crypt_ctx, bio->bi_crypt_context);
+}
+
+/*
+ * Checks that two bio crypt contexts are compatible, and also
+ * that their data_unit_nums are continuous (and can hence be merged)
+ * in the order @bc1 followed by @bc2.
+ */
+bool bio_crypt_ctx_mergeable(struct bio_crypt_ctx *bc1, unsigned int bc1_bytes,
+			     struct bio_crypt_ctx *bc2)
+{
+	if (!bio_crypt_ctx_compatible(bc1, bc2))
+		return false;
+
+	return !bc1 || bio_crypt_dun_is_contiguous(bc1, bc1_bytes, bc2->bc_dun);
+}
+
+/* Check that all I/O segments are data unit aligned. */
+static bool bio_crypt_check_alignment(struct bio *bio)
+{
+	const unsigned int data_unit_size =
+		bio->bi_crypt_context->bc_key->crypto_cfg.data_unit_size;
+	struct bvec_iter iter;
+	struct bio_vec bv;
+
+	bio_for_each_segment(bv, bio, iter) {
+		if (!IS_ALIGNED(bv.bv_len | bv.bv_offset, data_unit_size))
+			return false;
+	}
+
+	return true;
+}
+
+blk_status_t __blk_crypto_init_request(struct request *rq)
+{
+	return blk_ksm_get_slot_for_key(rq->q->ksm, rq->crypt_ctx->bc_key,
+					&rq->crypt_keyslot);
+}
+
+/**
+ * __blk_crypto_free_request - Uninitialize the crypto fields of a request.
+ *
+ * @rq: The request whose crypto fields to uninitialize.
+ *
+ * Completely uninitializes the crypto fields of a request. If a keyslot has
+ * been programmed into some inline encryption hardware, that keyslot is
+ * released. The rq->crypt_ctx is also freed.
+ */
+void __blk_crypto_free_request(struct request *rq)
+{
+	blk_ksm_put_slot(rq->crypt_keyslot);
+	mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool);
+	blk_crypto_rq_set_defaults(rq);
+}
+
+/**
+ * __blk_crypto_bio_prep - Prepare bio for inline encryption
+ *
+ * @bio_ptr: pointer to original bio pointer
+ *
+ * If the bio crypt context provided for the bio is supported by the underlying
+ * device's inline encryption hardware, do nothing.
+ *
+ * Otherwise, try to perform en/decryption for this bio by falling back to the
+ * kernel crypto API. When the crypto API fallback is used for encryption,
+ * blk-crypto may choose to split the bio into 2 - the first one that will
+ * continue to be processed and the second one that will be resubmitted via
+ * generic_make_request. A bounce bio will be allocated to encrypt the contents
+ * of the aforementioned "first one", and *bio_ptr will be updated to this
+ * bounce bio.
+ *
+ * Caller must ensure bio has bio_crypt_ctx.
+ *
+ * Return: true on success; false on error (and bio->bi_status will be set
+ *	   appropriately, and bio_endio() will have been called so bio
+ *	   submission should abort).
+ */
+bool __blk_crypto_bio_prep(struct bio **bio_ptr)
+{
+	struct bio *bio = *bio_ptr;
+	const struct blk_crypto_key *bc_key = bio->bi_crypt_context->bc_key;
+
+	/* Error if bio has no data. */
+	if (WARN_ON_ONCE(!bio_has_data(bio))) {
+		bio->bi_status = BLK_STS_IOERR;
+		goto fail;
+	}
+
+	if (!bio_crypt_check_alignment(bio)) {
+		bio->bi_status = BLK_STS_IOERR;
+		goto fail;
+	}
+
+	/*
+	 * Success if device supports the encryption context, or if we succeeded
+	 * in falling back to the crypto API.
+	 */
+	if (blk_ksm_crypto_cfg_supported(bio->bi_disk->queue->ksm,
+					 &bc_key->crypto_cfg))
+		return true;
+
+	if (blk_crypto_fallback_bio_prep(bio_ptr))
+		return true;
+fail:
+	bio_endio(*bio_ptr);
+	return false;
+}
+
+/**
+ * __blk_crypto_rq_bio_prep - Prepare a request's crypt_ctx when its first bio
+ *			      is inserted
+ *
+ * @rq: The request to prepare
+ * @bio: The first bio being inserted into the request
+ * @gfp_mask: gfp mask
+ */
+void __blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
+			      gfp_t gfp_mask)
+{
+	if (!rq->crypt_ctx)
+		rq->crypt_ctx = mempool_alloc(bio_crypt_ctx_pool, gfp_mask);
+	*rq->crypt_ctx = *bio->bi_crypt_context;
+}
+
+/**
+ * blk_crypto_init_key() - Prepare a key for use with blk-crypto
+ * @blk_key: Pointer to the blk_crypto_key to initialize.
+ * @raw_key: Pointer to the raw key. Must be the correct length for the chosen
+ *	     @crypto_mode; see blk_crypto_modes[].
+ * @crypto_mode: identifier for the encryption algorithm to use
+ * @dun_bytes: number of bytes that will be used to specify the DUN when this
+ *	       key is used
+ * @data_unit_size: the data unit size to use for en/decryption
+ *
+ * Return: 0 on success, -errno on failure.  The caller is responsible for
+ *	   zeroizing both blk_key and raw_key when done with them.
+ */
+int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
+			enum blk_crypto_mode_num crypto_mode,
+			unsigned int dun_bytes,
+			unsigned int data_unit_size)
+{
+	const struct blk_crypto_mode *mode;
+
+	memset(blk_key, 0, sizeof(*blk_key));
+
+	if (crypto_mode >= ARRAY_SIZE(blk_crypto_modes))
+		return -EINVAL;
+
+	mode = &blk_crypto_modes[crypto_mode];
+	if (mode->keysize == 0)
+		return -EINVAL;
+
+	if (dun_bytes == 0 || dun_bytes > BLK_CRYPTO_MAX_IV_SIZE)
+		return -EINVAL;
+
+	if (!is_power_of_2(data_unit_size))
+		return -EINVAL;
+
+	blk_key->crypto_cfg.crypto_mode = crypto_mode;
+	blk_key->crypto_cfg.dun_bytes = dun_bytes;
+	blk_key->crypto_cfg.data_unit_size = data_unit_size;
+	blk_key->data_unit_size_bits = ilog2(data_unit_size);
+	blk_key->size = mode->keysize;
+	memcpy(blk_key->raw, raw_key, mode->keysize);
+
+	return 0;
+}
+
+/*
+ * Check if bios with @cfg can be en/decrypted by blk-crypto (i.e. either the
+ * request queue it's submitted to supports inline crypto, or the
+ * blk-crypto-fallback is enabled and supports the cfg).
+ */
+bool blk_crypto_config_supported(struct request_queue *q,
+				 const struct blk_crypto_config *cfg)
+{
+	return IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) ||
+	       blk_ksm_crypto_cfg_supported(q->ksm, cfg);
+}
+
+/**
+ * blk_crypto_start_using_key() - Start using a blk_crypto_key on a device
+ * @key: A key to use on the device
+ * @q: the request queue for the device
+ *
+ * Upper layers must call this function to ensure that either the hardware
+ * supports the key's crypto settings, or the crypto API fallback has transforms
+ * for the needed mode allocated and ready to go. This function may allocate
+ * an skcipher, and *should not* be called from the data path, since that might
+ * cause a deadlock
+ *
+ * Return: 0 on success; -ENOPKG if the hardware doesn't support the key and
+ *	   blk-crypto-fallback is either disabled or the needed algorithm
+ *	   is disabled in the crypto API; or another -errno code.
+ */
+int blk_crypto_start_using_key(const struct blk_crypto_key *key,
+			       struct request_queue *q)
+{
+	if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg))
+		return 0;
+	return blk_crypto_fallback_start_using_mode(key->crypto_cfg.crypto_mode);
+}
+
+/**
+ * blk_crypto_evict_key() - Evict a key from any inline encryption hardware
+ *			    it may have been programmed into
+ * @q: The request queue who's associated inline encryption hardware this key
+ *     might have been programmed into
+ * @key: The key to evict
+ *
+ * Upper layers (filesystems) must call this function to ensure that a key is
+ * evicted from any hardware that it might have been programmed into.  The key
+ * must not be in use by any in-flight IO when this function is called.
+ *
+ * Return: 0 on success or if key is not present in the q's ksm, -err on error.
+ */
+int blk_crypto_evict_key(struct request_queue *q,
+			 const struct blk_crypto_key *key)
+{
+	if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg))
+		return blk_ksm_evict_key(q->ksm, key);
+
+	/*
+	 * If the request queue's associated inline encryption hardware didn't
+	 * have support for the key, then the key might have been programmed
+	 * into the fallback keyslot manager, so try to evict from there.
+	 */
+	return blk_crypto_fallback_evict_key(key);
+}
diff --git a/block/blk-exec.c b/block/blk-exec.c
index e20a852ae432..85324d53d072 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -55,7 +55,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	rq->rq_disk = bd_disk;
 	rq->end_io = done;
 
-	blk_account_io_start(rq, true);
+	blk_account_io_start(rq);
 
 	/*
 	 * don't check dying flag for MQ because the request won't
diff --git a/block/blk-flush.c b/block/blk-flush.c
index c7f396e3d5e2..15ae0155ec07 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -258,7 +258,6 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
 		blk_flush_complete_seq(rq, fq, seq, error);
 	}
 
-	fq->flush_queue_delayed = 0;
 	spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 }
 
@@ -433,41 +432,20 @@ void blk_insert_flush(struct request *rq)
  * blkdev_issue_flush - queue a flush
  * @bdev:	blockdev to issue flush for
  * @gfp_mask:	memory allocation flags (for bio_alloc)
- * @error_sector:	error sector
  *
  * Description:
- *    Issue a flush for the block device in question. Caller can supply
- *    room for storing the error offset in case of a flush error, if they
- *    wish to.
+ *    Issue a flush for the block device in question.
  */
-int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
-		sector_t *error_sector)
+int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
 {
-	struct request_queue *q;
 	struct bio *bio;
 	int ret = 0;
 
-	if (bdev->bd_disk == NULL)
-		return -ENXIO;
-
-	q = bdev_get_queue(bdev);
-	if (!q)
-		return -ENXIO;
-
 	bio = bio_alloc(gfp_mask, 0);
 	bio_set_dev(bio, bdev);
 	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 
 	ret = submit_bio_wait(bio);
-
-	/*
-	 * The driver must store the error location in ->bi_sector, if
-	 * it supports it. For non-stacked drivers, this should be
-	 * copied from blk_rq_pos(rq).
-	 */
-	if (error_sector)
-		*error_sector = bio->bi_iter.bi_sector;
-
 	bio_put(bio);
 	return ret;
 }
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index ff1070edbb40..c03705cbb9c9 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -409,6 +409,13 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
 	bi->tag_size = template->tag_size;
 
 	disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
+
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+	if (disk->queue->ksm) {
+		pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
+		blk_ksm_unregister(disk->queue);
+	}
+#endif
 }
 EXPORT_SYMBOL(blk_integrity_register);
 
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 7c1fe605d0d6..8ac4aad66ebc 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -260,6 +260,7 @@ enum {
 	VTIME_PER_SEC_SHIFT	= 37,
 	VTIME_PER_SEC		= 1LLU << VTIME_PER_SEC_SHIFT,
 	VTIME_PER_USEC		= VTIME_PER_SEC / USEC_PER_SEC,
+	VTIME_PER_NSEC		= VTIME_PER_SEC / NSEC_PER_SEC,
 
 	/* bound vrate adjustments within two orders of magnitude */
 	VRATE_MIN_PPM		= 10000,	/* 1% */
@@ -1206,14 +1207,14 @@ static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
+static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
 {
 	struct ioc *ioc = iocg->ioc;
 	struct blkcg_gq *blkg = iocg_to_blkg(iocg);
 	u64 vtime = atomic64_read(&iocg->vtime);
 	u64 vmargin = ioc->margin_us * now->vrate;
 	u64 margin_ns = ioc->margin_us * NSEC_PER_USEC;
-	u64 expires, oexpires;
+	u64 delta_ns, expires, oexpires;
 	u32 hw_inuse;
 
 	lockdep_assert_held(&iocg->waitq.lock);
@@ -1236,15 +1237,10 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
 		return false;
 
 	/* use delay */
-	if (cost) {
-		u64 cost_ns = DIV64_U64_ROUND_UP(cost * NSEC_PER_USEC,
-						 now->vrate);
-		blkcg_add_delay(blkg, now->now_ns, cost_ns);
-	}
-	blkcg_use_delay(blkg);
-
-	expires = now->now_ns + DIV64_U64_ROUND_UP(vtime - now->vnow,
-						   now->vrate) * NSEC_PER_USEC;
+	delta_ns = DIV64_U64_ROUND_UP(vtime - now->vnow,
+				      now->vrate) * NSEC_PER_USEC;
+	blkcg_set_delay(blkg, delta_ns);
+	expires = now->now_ns + delta_ns;
 
 	/* if already active and close enough, don't bother */
 	oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer));
@@ -1265,7 +1261,7 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
 
 	spin_lock_irqsave(&iocg->waitq.lock, flags);
 	ioc_now(iocg->ioc, &now);
-	iocg_kick_delay(iocg, &now, 0);
+	iocg_kick_delay(iocg, &now);
 	spin_unlock_irqrestore(&iocg->waitq.lock, flags);
 
 	return HRTIMER_NORESTART;
@@ -1383,7 +1379,7 @@ static void ioc_timer_fn(struct timer_list *timer)
 		if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
 			/* might be oversleeping vtime / hweight changes, kick */
 			iocg_kick_waitq(iocg, &now);
-			iocg_kick_delay(iocg, &now, 0);
+			iocg_kick_delay(iocg, &now);
 		} else if (iocg_is_idle(iocg)) {
 			/* no waiter and idle, deactivate */
 			iocg->last_inuse = iocg->inuse;
@@ -1543,19 +1539,39 @@ skip_surplus_transfers:
 	if (rq_wait_pct > RQ_WAIT_BUSY_PCT ||
 	    missed_ppm[READ] > ppm_rthr ||
 	    missed_ppm[WRITE] > ppm_wthr) {
+		/* clearly missing QoS targets, slow down vrate */
 		ioc->busy_level = max(ioc->busy_level, 0);
 		ioc->busy_level++;
 	} else if (rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 &&
 		   missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 &&
 		   missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) {
-		/* take action iff there is contention */
-		if (nr_shortages && !nr_lagging) {
+		/* QoS targets are being met with >25% margin */
+		if (nr_shortages) {
+			/*
+			 * We're throttling while the device has spare
+			 * capacity.  If vrate was being slowed down, stop.
+			 */
 			ioc->busy_level = min(ioc->busy_level, 0);
-			/* redistribute surpluses first */
-			if (!nr_surpluses)
+
+			/*
+			 * If there are IOs spanning multiple periods, wait
+			 * them out before pushing the device harder.  If
+			 * there are surpluses, let redistribution work it
+			 * out first.
+			 */
+			if (!nr_lagging && !nr_surpluses)
 				ioc->busy_level--;
+		} else {
+			/*
+			 * Nobody is being throttled and the users aren't
+			 * issuing enough IOs to saturate the device.  We
+			 * simply don't know how close the device is to
+			 * saturation.  Coast.
+			 */
+			ioc->busy_level = 0;
 		}
 	} else {
+		/* inside the hysterisis margin, we're good */
 		ioc->busy_level = 0;
 	}
 
@@ -1678,6 +1694,31 @@ static u64 calc_vtime_cost(struct bio *bio, struct ioc_gq *iocg, bool is_merge)
 	return cost;
 }
 
+static void calc_size_vtime_cost_builtin(struct request *rq, struct ioc *ioc,
+					 u64 *costp)
+{
+	unsigned int pages = blk_rq_stats_sectors(rq) >> IOC_SECT_TO_PAGE_SHIFT;
+
+	switch (req_op(rq)) {
+	case REQ_OP_READ:
+		*costp = pages * ioc->params.lcoefs[LCOEF_RPAGE];
+		break;
+	case REQ_OP_WRITE:
+		*costp = pages * ioc->params.lcoefs[LCOEF_WPAGE];
+		break;
+	default:
+		*costp = 0;
+	}
+}
+
+static u64 calc_size_vtime_cost(struct request *rq, struct ioc *ioc)
+{
+	u64 cost;
+
+	calc_size_vtime_cost_builtin(rq, ioc, &cost);
+	return cost;
+}
+
 static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 {
 	struct blkcg_gq *blkg = bio->bi_blkg;
@@ -1762,7 +1803,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 	 */
 	if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
 		iocg->abs_vdebt += abs_cost;
-		if (iocg_kick_delay(iocg, &now, cost))
+		if (iocg_kick_delay(iocg, &now))
 			blkcg_schedule_throttle(rqos->q,
 					(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
 		spin_unlock_irq(&iocg->waitq.lock);
@@ -1850,7 +1891,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
 	spin_lock_irqsave(&iocg->waitq.lock, flags);
 	if (likely(!list_empty(&iocg->active_list))) {
 		iocg->abs_vdebt += abs_cost;
-		iocg_kick_delay(iocg, &now, cost);
+		iocg_kick_delay(iocg, &now);
 	} else {
 		iocg_commit_bio(iocg, bio, cost);
 	}
@@ -1868,7 +1909,7 @@ static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
 static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
 {
 	struct ioc *ioc = rqos_to_ioc(rqos);
-	u64 on_q_ns, rq_wait_ns;
+	u64 on_q_ns, rq_wait_ns, size_nsec;
 	int pidx, rw;
 
 	if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns)
@@ -1889,8 +1930,10 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
 
 	on_q_ns = ktime_get_ns() - rq->alloc_time_ns;
 	rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
+	size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
 
-	if (on_q_ns <= ioc->params.qos[pidx] * NSEC_PER_USEC)
+	if (on_q_ns <= size_nsec ||
+	    on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC)
 		this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_met);
 	else
 		this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_missed);
@@ -2297,6 +2340,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 	spin_lock_irq(&ioc->lock);
 
 	if (enable) {
+		blk_stat_enable_accounting(ioc->rqos.q);
 		blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, ioc->rqos.q);
 		ioc->enabled = true;
 	} else {
diff --git a/block/blk-map.c b/block/blk-map.c
index b72c361911a4..6e804892d5ec 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -257,6 +257,7 @@ out_bmd:
 static struct bio *bio_map_user_iov(struct request_queue *q,
 		struct iov_iter *iter, gfp_t gfp_mask)
 {
+	unsigned int max_sectors = queue_max_hw_sectors(q);
 	int j;
 	struct bio *bio;
 	int ret;
@@ -294,8 +295,8 @@ static struct bio *bio_map_user_iov(struct request_queue *q,
 				if (n > bytes)
 					n = bytes;
 
-				if (!__bio_add_pc_page(q, bio, page, n, offs,
-						&same_page)) {
+				if (!bio_add_hw_page(q, bio, page, n, offs,
+						     max_sectors, &same_page)) {
 					if (same_page)
 						put_page(page);
 					break;
@@ -549,6 +550,7 @@ int blk_rq_append_bio(struct request *rq, struct bio **bio)
 		rq->biotail->bi_next = *bio;
 		rq->biotail = *bio;
 		rq->__data_len += (*bio)->bi_iter.bi_size;
+		bio_crypt_free_ctx(*bio);
 	}
 
 	return 0;
@@ -654,8 +656,6 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 			bio = rq->bio;
 	} while (iov_iter_count(&i));
 
-	if (!bio_flagged(bio, BIO_USER_MAPPED))
-		rq->rq_flags |= RQF_COPY_USER;
 	return 0;
 
 unmap_rq:
@@ -731,7 +731,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 {
 	int reading = rq_data_dir(rq) == READ;
 	unsigned long addr = (unsigned long) kbuf;
-	int do_copy = 0;
 	struct bio *bio, *orig_bio;
 	int ret;
 
@@ -740,8 +739,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	if (!len || !kbuf)
 		return -EINVAL;
 
-	do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf);
-	if (do_copy)
+	if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf))
 		bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
 	else
 		bio = bio_map_kern(q, kbuf, len, gfp_mask);
@@ -752,9 +750,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	bio->bi_opf &= ~REQ_OP_MASK;
 	bio->bi_opf |= req_op(rq);
 
-	if (do_copy)
-		rq->rq_flags |= RQF_COPY_USER;
-
 	orig_bio = bio;
 	ret = blk_rq_append_bio(rq, &bio);
 	if (unlikely(ret)) {
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1534ed736363..f0b0bae075a0 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -336,16 +336,6 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
 		/* there isn't chance to merge the splitted bio */
 		split->bi_opf |= REQ_NOMERGE;
 
-		/*
-		 * Since we're recursing into make_request here, ensure
-		 * that we mark this bio as already having entered the queue.
-		 * If not, and the queue is going away, we can get stuck
-		 * forever on waiting for the queue reference to drop. But
-		 * that will never happen, as we're already holding a
-		 * reference to it.
-		 */
-		bio_set_flag(*bio, BIO_QUEUE_ENTERED);
-
 		bio_chain(split, *bio);
 		trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
 		generic_make_request(*bio);
@@ -519,44 +509,20 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
  * map a request to scatterlist, return number of sg entries setup. Caller
  * must make sure sg can hold rq->nr_phys_segments entries
  */
-int blk_rq_map_sg(struct request_queue *q, struct request *rq,
-		  struct scatterlist *sglist)
+int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
+		struct scatterlist *sglist, struct scatterlist **last_sg)
 {
-	struct scatterlist *sg = NULL;
 	int nsegs = 0;
 
 	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
-		nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, &sg);
+		nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg);
 	else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME)
-		nsegs = __blk_bvec_map_sg(bio_iovec(rq->bio), sglist, &sg);
+		nsegs = __blk_bvec_map_sg(bio_iovec(rq->bio), sglist, last_sg);
 	else if (rq->bio)
-		nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
-
-	if (unlikely(rq->rq_flags & RQF_COPY_USER) &&
-	    (blk_rq_bytes(rq) & q->dma_pad_mask)) {
-		unsigned int pad_len =
-			(q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
-
-		sg->length += pad_len;
-		rq->extra_len += pad_len;
-	}
+		nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg);
 
-	if (q->dma_drain_size && q->dma_drain_needed(rq)) {
-		if (op_is_write(req_op(rq)))
-			memset(q->dma_drain_buffer, 0, q->dma_drain_size);
-
-		sg_unmark_end(sg);
-		sg = sg_next(sg);
-		sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
-			    q->dma_drain_size,
-			    ((unsigned long)q->dma_drain_buffer) &
-			    (PAGE_SIZE - 1));
-		nsegs++;
-		rq->extra_len += q->dma_drain_size;
-	}
-
-	if (sg)
-		sg_mark_end(sg);
+	if (*last_sg)
+		sg_mark_end(*last_sg);
 
 	/*
 	 * Something must have been wrong if the figured number of
@@ -566,7 +532,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 
 	return nsegs;
 }
-EXPORT_SYMBOL(blk_rq_map_sg);
+EXPORT_SYMBOL(__blk_rq_map_sg);
 
 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
 		unsigned int nr_phys_segs)
@@ -596,6 +562,8 @@ int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
 	if (blk_integrity_rq(req) &&
 	    integrity_req_gap_back_merge(req, bio))
 		return 0;
+	if (!bio_crypt_ctx_back_mergeable(req, bio))
+		return 0;
 	if (blk_rq_sectors(req) + bio_sectors(bio) >
 	    blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
 		req_set_nomerge(req->q, req);
@@ -612,6 +580,8 @@ int ll_front_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs
 	if (blk_integrity_rq(req) &&
 	    integrity_req_gap_front_merge(req, bio))
 		return 0;
+	if (!bio_crypt_ctx_front_mergeable(req, bio))
+		return 0;
 	if (blk_rq_sectors(req) + bio_sectors(bio) >
 	    blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
 		req_set_nomerge(req->q, req);
@@ -661,6 +631,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	if (blk_integrity_merge_rq(q, req, next) == false)
 		return 0;
 
+	if (!bio_crypt_ctx_merge_rq(req, next))
+		return 0;
+
 	/* Merge is OK... */
 	req->nr_phys_segments = total_phys_segments;
 	return 1;
@@ -696,20 +669,17 @@ void blk_rq_set_mixed_merge(struct request *rq)
 	rq->rq_flags |= RQF_MIXED_MERGE;
 }
 
-static void blk_account_io_merge(struct request *req)
+static void blk_account_io_merge_request(struct request *req)
 {
 	if (blk_do_io_stat(req)) {
-		struct hd_struct *part;
-
 		part_stat_lock();
-		part = req->part;
-
-		part_dec_in_flight(req->q, part, rq_data_dir(req));
-
-		hd_struct_put(part);
+		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
 		part_stat_unlock();
+
+		hd_struct_put(req->part);
 	}
 }
+
 /*
  * Two cases of handling DISCARD merge:
  * If max_discard_segments > 1, the driver takes every bio
@@ -821,7 +791,7 @@ static struct request *attempt_merge(struct request_queue *q,
 	/*
 	 * 'next' is going away, so update stats accordingly
 	 */
-	blk_account_io_merge(next);
+	blk_account_io_merge_request(next);
 
 	/*
 	 * ownership of bio passed from next to req, return 'next' for
@@ -885,6 +855,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
 		return false;
 
+	/* Only merge if the crypt contexts are compatible */
+	if (!bio_crypt_rq_ctx_compatible(rq, bio))
+		return false;
+
 	/* must be using the same buffer */
 	if (req_op(rq) == REQ_OP_WRITE_SAME &&
 	    !blk_write_same_mergeable(rq->bio, bio))
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index b3f2ba483992..15df3a36e9fa 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -213,6 +213,7 @@ static const char *const hctx_state_name[] = {
 	HCTX_STATE_NAME(STOPPED),
 	HCTX_STATE_NAME(TAG_ACTIVE),
 	HCTX_STATE_NAME(SCHED_RESTART),
+	HCTX_STATE_NAME(INACTIVE),
 };
 #undef HCTX_STATE_NAME
 
@@ -239,6 +240,7 @@ static const char *const hctx_flag_name[] = {
 	HCTX_FLAG_NAME(TAG_SHARED),
 	HCTX_FLAG_NAME(BLOCKING),
 	HCTX_FLAG_NAME(NO_SCHED),
+	HCTX_FLAG_NAME(STACKING),
 };
 #undef HCTX_FLAG_NAME
 
@@ -292,7 +294,6 @@ static const char *const rqf_name[] = {
 	RQF_NAME(MQ_INFLIGHT),
 	RQF_NAME(DONTPREP),
 	RQF_NAME(PREEMPT),
-	RQF_NAME(COPY_USER),
 	RQF_NAME(FAILED),
 	RQF_NAME(QUIET),
 	RQF_NAME(ELVPRIV),
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 74cedea56034..fdcc2c1dd178 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -80,16 +80,22 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
 	blk_mq_run_hw_queue(hctx, true);
 }
 
+#define BLK_MQ_BUDGET_DELAY	3		/* ms units */
+
 /*
  * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
  * its queue by itself in its completion handler, so we don't need to
  * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
+ *
+ * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
+ * be run again.  This is necessary to avoid starving flushes.
  */
-static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
 	struct elevator_queue *e = q->elevator;
 	LIST_HEAD(rq_list);
+	int ret = 0;
 
 	do {
 		struct request *rq;
@@ -97,12 +103,25 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 		if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
 			break;
 
+		if (!list_empty_careful(&hctx->dispatch)) {
+			ret = -EAGAIN;
+			break;
+		}
+
 		if (!blk_mq_get_dispatch_budget(hctx))
 			break;
 
 		rq = e->type->ops.dispatch_request(hctx);
 		if (!rq) {
 			blk_mq_put_dispatch_budget(hctx);
+			/*
+			 * We're releasing without dispatching. Holding the
+			 * budget could have blocked any "hctx"s with the
+			 * same queue and if we didn't dispatch then there's
+			 * no guarantee anyone will kick the queue.  Kick it
+			 * ourselves.
+			 */
+			blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
 			break;
 		}
 
@@ -113,6 +132,8 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 		 */
 		list_add(&rq->queuelist, &rq_list);
 	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
+
+	return ret;
 }
 
 static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
@@ -130,16 +151,25 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
  * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
  * its queue by itself in its completion handler, so we don't need to
  * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
+ *
+ * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
+ * to be run again.  This is necessary to avoid starving flushes.
  */
-static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
+static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
 	LIST_HEAD(rq_list);
 	struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
+	int ret = 0;
 
 	do {
 		struct request *rq;
 
+		if (!list_empty_careful(&hctx->dispatch)) {
+			ret = -EAGAIN;
+			break;
+		}
+
 		if (!sbitmap_any_bit_set(&hctx->ctx_map))
 			break;
 
@@ -149,6 +179,14 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
 		rq = blk_mq_dequeue_from_ctx(hctx, ctx);
 		if (!rq) {
 			blk_mq_put_dispatch_budget(hctx);
+			/*
+			 * We're releasing without dispatching. Holding the
+			 * budget could have blocked any "hctx"s with the
+			 * same queue and if we didn't dispatch then there's
+			 * no guarantee anyone will kick the queue.  Kick it
+			 * ourselves.
+			 */
+			blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
 			break;
 		}
 
@@ -165,21 +203,17 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
 	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
 
 	WRITE_ONCE(hctx->dispatch_from, ctx);
+	return ret;
 }
 
-void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
+static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
 	struct elevator_queue *e = q->elevator;
 	const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
+	int ret = 0;
 	LIST_HEAD(rq_list);
 
-	/* RCU or SRCU read lock is needed before checking quiesced flag */
-	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
-		return;
-
-	hctx->run++;
-
 	/*
 	 * If we have previous entries on our dispatch list, grab them first for
 	 * more fair dispatch.
@@ -208,19 +242,41 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 		blk_mq_sched_mark_restart_hctx(hctx);
 		if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
 			if (has_sched_dispatch)
-				blk_mq_do_dispatch_sched(hctx);
+				ret = blk_mq_do_dispatch_sched(hctx);
 			else
-				blk_mq_do_dispatch_ctx(hctx);
+				ret = blk_mq_do_dispatch_ctx(hctx);
 		}
 	} else if (has_sched_dispatch) {
-		blk_mq_do_dispatch_sched(hctx);
+		ret = blk_mq_do_dispatch_sched(hctx);
 	} else if (hctx->dispatch_busy) {
 		/* dequeue request one by one from sw queue if queue is busy */
-		blk_mq_do_dispatch_ctx(hctx);
+		ret = blk_mq_do_dispatch_ctx(hctx);
 	} else {
 		blk_mq_flush_busy_ctxs(hctx, &rq_list);
 		blk_mq_dispatch_rq_list(q, &rq_list, false);
 	}
+
+	return ret;
+}
+
+void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
+{
+	struct request_queue *q = hctx->queue;
+
+	/* RCU or SRCU read lock is needed before checking quiesced flag */
+	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
+		return;
+
+	hctx->run++;
+
+	/*
+	 * A return of -EAGAIN is an indication that hctx->dispatch is not
+	 * empty and we must run again in order to avoid starving flushes.
+	 */
+	if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
+		if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
+			blk_mq_run_hw_queue(hctx, true);
+	}
 }
 
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 586c9d6e904a..96a39d0724a2 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -92,7 +92,7 @@ static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
 {
 	if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
 	    !hctx_may_queue(data->hctx, bt))
-		return -1;
+		return BLK_MQ_NO_TAG;
 	if (data->shallow_depth)
 		return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
 	else
@@ -111,7 +111,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 	if (data->flags & BLK_MQ_REQ_RESERVED) {
 		if (unlikely(!tags->nr_reserved_tags)) {
 			WARN_ON_ONCE(1);
-			return BLK_MQ_TAG_FAIL;
+			return BLK_MQ_NO_TAG;
 		}
 		bt = &tags->breserved_tags;
 		tag_offset = 0;
@@ -121,11 +121,11 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 	}
 
 	tag = __blk_mq_get_tag(data, bt);
-	if (tag != -1)
+	if (tag != BLK_MQ_NO_TAG)
 		goto found_tag;
 
 	if (data->flags & BLK_MQ_REQ_NOWAIT)
-		return BLK_MQ_TAG_FAIL;
+		return BLK_MQ_NO_TAG;
 
 	ws = bt_wait_ptr(bt, data->hctx);
 	do {
@@ -143,13 +143,13 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		 * as running the queue may also have found completions.
 		 */
 		tag = __blk_mq_get_tag(data, bt);
-		if (tag != -1)
+		if (tag != BLK_MQ_NO_TAG)
 			break;
 
 		sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
 
 		tag = __blk_mq_get_tag(data, bt);
-		if (tag != -1)
+		if (tag != BLK_MQ_NO_TAG)
 			break;
 
 		bt_prev = bt;
@@ -180,6 +180,14 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 	sbitmap_finish_wait(bt, ws, &wait);
 
 found_tag:
+	/*
+	 * Give up this allocation if the hctx is inactive.  The caller will
+	 * retry on an active hctx.
+	 */
+	if (unlikely(test_bit(BLK_MQ_S_INACTIVE, &data->hctx->state))) {
+		blk_mq_put_tag(tags, data->ctx, tag + tag_offset);
+		return BLK_MQ_NO_TAG;
+	}
 	return tag + tag_offset;
 }
 
@@ -256,14 +264,17 @@ struct bt_tags_iter_data {
 	struct blk_mq_tags *tags;
 	busy_tag_iter_fn *fn;
 	void *data;
-	bool reserved;
+	unsigned int flags;
 };
 
+#define BT_TAG_ITER_RESERVED		(1 << 0)
+#define BT_TAG_ITER_STARTED		(1 << 1)
+
 static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 {
 	struct bt_tags_iter_data *iter_data = data;
 	struct blk_mq_tags *tags = iter_data->tags;
-	bool reserved = iter_data->reserved;
+	bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED;
 	struct request *rq;
 
 	if (!reserved)
@@ -274,10 +285,12 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 	 * test and set the bit before assining ->rqs[].
 	 */
 	rq = tags->rqs[bitnr];
-	if (rq && blk_mq_request_started(rq))
-		return iter_data->fn(rq, iter_data->data, reserved);
-
-	return true;
+	if (!rq)
+		return true;
+	if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
+	    !blk_mq_request_started(rq))
+		return true;
+	return iter_data->fn(rq, iter_data->data, reserved);
 }
 
 /**
@@ -290,39 +303,47 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
  *		@reserved) where rq is a pointer to a request. Return true
  *		to continue iterating tags, false to stop.
  * @data:	Will be passed as second argument to @fn.
- * @reserved:	Indicates whether @bt is the breserved_tags member or the
- *		bitmap_tags member of struct blk_mq_tags.
+ * @flags:	BT_TAG_ITER_*
  */
 static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
-			     busy_tag_iter_fn *fn, void *data, bool reserved)
+			     busy_tag_iter_fn *fn, void *data, unsigned int flags)
 {
 	struct bt_tags_iter_data iter_data = {
 		.tags = tags,
 		.fn = fn,
 		.data = data,
-		.reserved = reserved,
+		.flags = flags,
 	};
 
 	if (tags->rqs)
 		sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
 }
 
+static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
+		busy_tag_iter_fn *fn, void *priv, unsigned int flags)
+{
+	WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED);
+
+	if (tags->nr_reserved_tags)
+		bt_tags_for_each(tags, &tags->breserved_tags, fn, priv,
+				 flags | BT_TAG_ITER_RESERVED);
+	bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags);
+}
+
 /**
- * blk_mq_all_tag_busy_iter - iterate over all started requests in a tag map
+ * blk_mq_all_tag_iter - iterate over all requests in a tag map
  * @tags:	Tag map to iterate over.
- * @fn:		Pointer to the function that will be called for each started
+ * @fn:		Pointer to the function that will be called for each
  *		request. @fn will be called as follows: @fn(rq, @priv,
  *		reserved) where rq is a pointer to a request. 'reserved'
  *		indicates whether or not @rq is a reserved request. Return
  *		true to continue iterating tags, false to stop.
  * @priv:	Will be passed as second argument to @fn.
  */
-static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
-		busy_tag_iter_fn *fn, void *priv)
+void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
+		void *priv)
 {
-	if (tags->nr_reserved_tags)
-		bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
-	bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
+	return __blk_mq_all_tag_iter(tags, fn, priv, 0);
 }
 
 /**
@@ -342,7 +363,8 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 
 	for (i = 0; i < tagset->nr_hw_queues; i++) {
 		if (tagset->tags && tagset->tags[i])
-			blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
+			__blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
+					      BT_TAG_ITER_STARTED);
 	}
 }
 EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 2b8321efb682..d38e48f2a0a4 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -34,6 +34,8 @@ extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 		void *priv);
+void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
+		void *priv);
 
 static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
 						 struct blk_mq_hw_ctx *hctx)
@@ -44,9 +46,9 @@ static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
 }
 
 enum {
-	BLK_MQ_TAG_FAIL		= -1U,
+	BLK_MQ_NO_TAG		= -1U,
 	BLK_MQ_TAG_MIN		= 1,
-	BLK_MQ_TAG_MAX		= BLK_MQ_TAG_FAIL - 1,
+	BLK_MQ_TAG_MAX		= BLK_MQ_NO_TAG - 1,
 };
 
 extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a7785df2c944..9a36ac1c1fa1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
 #include <linux/prefetch.h>
+#include <linux/blk-crypto.h>
 
 #include <trace/events/block.h>
 
@@ -270,14 +271,14 @@ static inline bool blk_mq_need_time_stamp(struct request *rq)
 }
 
 static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
-		unsigned int tag, unsigned int op, u64 alloc_time_ns)
+		unsigned int tag, u64 alloc_time_ns)
 {
 	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
 	struct request *rq = tags->static_rqs[tag];
 	req_flags_t rq_flags = 0;
 
 	if (data->flags & BLK_MQ_REQ_INTERNAL) {
-		rq->tag = -1;
+		rq->tag = BLK_MQ_NO_TAG;
 		rq->internal_tag = tag;
 	} else {
 		if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
@@ -285,7 +286,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 			atomic_inc(&data->hctx->nr_active);
 		}
 		rq->tag = tag;
-		rq->internal_tag = -1;
+		rq->internal_tag = BLK_MQ_NO_TAG;
 		data->hctx->tags->rqs[rq->tag] = rq;
 	}
 
@@ -294,7 +295,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	rq->mq_ctx = data->ctx;
 	rq->mq_hctx = data->hctx;
 	rq->rq_flags = rq_flags;
-	rq->cmd_flags = op;
+	rq->cmd_flags = data->cmd_flags;
 	if (data->flags & BLK_MQ_REQ_PREEMPT)
 		rq->rq_flags |= RQF_PREEMPT;
 	if (blk_queue_io_stat(data->q))
@@ -317,8 +318,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	rq->nr_integrity_segments = 0;
 #endif
+	blk_crypto_rq_set_defaults(rq);
 	/* tag was already set */
-	rq->extra_len = 0;
 	WRITE_ONCE(rq->deadline, 0);
 
 	rq->timeout = 0;
@@ -326,35 +327,37 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	rq->end_io = NULL;
 	rq->end_io_data = NULL;
 
-	data->ctx->rq_dispatched[op_is_sync(op)]++;
+	data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++;
 	refcount_set(&rq->ref, 1);
+
+	if (!op_is_flush(data->cmd_flags)) {
+		struct elevator_queue *e = data->q->elevator;
+
+		rq->elv.icq = NULL;
+		if (e && e->type->ops.prepare_request) {
+			if (e->type->icq_cache)
+				blk_mq_sched_assign_ioc(rq);
+
+			e->type->ops.prepare_request(rq);
+			rq->rq_flags |= RQF_ELVPRIV;
+		}
+	}
+
+	data->hctx->queued++;
 	return rq;
 }
 
-static struct request *blk_mq_get_request(struct request_queue *q,
-					  struct bio *bio,
-					  struct blk_mq_alloc_data *data)
+static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data)
 {
+	struct request_queue *q = data->q;
 	struct elevator_queue *e = q->elevator;
-	struct request *rq;
-	unsigned int tag;
-	bool clear_ctx_on_error = false;
 	u64 alloc_time_ns = 0;
-
-	blk_queue_enter_live(q);
+	unsigned int tag;
 
 	/* alloc_time includes depth and tag waits */
 	if (blk_queue_rq_alloc_time(q))
 		alloc_time_ns = ktime_get_ns();
 
-	data->q = q;
-	if (likely(!data->ctx)) {
-		data->ctx = blk_mq_get_ctx(q);
-		clear_ctx_on_error = true;
-	}
-	if (likely(!data->hctx))
-		data->hctx = blk_mq_map_queue(q, data->cmd_flags,
-						data->ctx);
 	if (data->cmd_flags & REQ_NOWAIT)
 		data->flags |= BLK_MQ_REQ_NOWAIT;
 
@@ -370,37 +373,43 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 		    e->type->ops.limit_depth &&
 		    !(data->flags & BLK_MQ_REQ_RESERVED))
 			e->type->ops.limit_depth(data->cmd_flags, data);
-	} else {
-		blk_mq_tag_busy(data->hctx);
 	}
 
-	tag = blk_mq_get_tag(data);
-	if (tag == BLK_MQ_TAG_FAIL) {
-		if (clear_ctx_on_error)
-			data->ctx = NULL;
-		blk_queue_exit(q);
-		return NULL;
-	}
+retry:
+	data->ctx = blk_mq_get_ctx(q);
+	data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
+	if (!(data->flags & BLK_MQ_REQ_INTERNAL))
+		blk_mq_tag_busy(data->hctx);
 
-	rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags, alloc_time_ns);
-	if (!op_is_flush(data->cmd_flags)) {
-		rq->elv.icq = NULL;
-		if (e && e->type->ops.prepare_request) {
-			if (e->type->icq_cache)
-				blk_mq_sched_assign_ioc(rq);
+	/*
+	 * Waiting allocations only fail because of an inactive hctx.  In that
+	 * case just retry the hctx assignment and tag allocation as CPU hotplug
+	 * should have migrated us to an online CPU by now.
+	 */
+	tag = blk_mq_get_tag(data);
+	if (tag == BLK_MQ_NO_TAG) {
+		if (data->flags & BLK_MQ_REQ_NOWAIT)
+			return NULL;
 
-			e->type->ops.prepare_request(rq, bio);
-			rq->rq_flags |= RQF_ELVPRIV;
-		}
+		/*
+		 * Give up the CPU and sleep for a random short time to ensure
+		 * that thread using a realtime scheduling class are migrated
+		 * off the the CPU, and thus off the hctx that is going away.
+		 */
+		msleep(3);
+		goto retry;
 	}
-	data->hctx->queued++;
-	return rq;
+	return blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
 }
 
 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		blk_mq_req_flags_t flags)
 {
-	struct blk_mq_alloc_data alloc_data = { .flags = flags, .cmd_flags = op };
+	struct blk_mq_alloc_data data = {
+		.q		= q,
+		.flags		= flags,
+		.cmd_flags	= op,
+	};
 	struct request *rq;
 	int ret;
 
@@ -408,34 +417,43 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 	if (ret)
 		return ERR_PTR(ret);
 
-	rq = blk_mq_get_request(q, NULL, &alloc_data);
-	blk_queue_exit(q);
-
+	rq = __blk_mq_alloc_request(&data);
 	if (!rq)
-		return ERR_PTR(-EWOULDBLOCK);
-
+		goto out_queue_exit;
 	rq->__data_len = 0;
 	rq->__sector = (sector_t) -1;
 	rq->bio = rq->biotail = NULL;
 	return rq;
+out_queue_exit:
+	blk_queue_exit(q);
+	return ERR_PTR(-EWOULDBLOCK);
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
 struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 	unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx)
 {
-	struct blk_mq_alloc_data alloc_data = { .flags = flags, .cmd_flags = op };
-	struct request *rq;
+	struct blk_mq_alloc_data data = {
+		.q		= q,
+		.flags		= flags,
+		.cmd_flags	= op,
+	};
+	u64 alloc_time_ns = 0;
 	unsigned int cpu;
+	unsigned int tag;
 	int ret;
 
+	/* alloc_time includes depth and tag waits */
+	if (blk_queue_rq_alloc_time(q))
+		alloc_time_ns = ktime_get_ns();
+
 	/*
 	 * If the tag allocator sleeps we could get an allocation for a
 	 * different hardware context.  No need to complicate the low level
 	 * allocator for this for the rare use case of a command tied to
 	 * a specific queue.
 	 */
-	if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)))
+	if (WARN_ON_ONCE(!(flags & (BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED))))
 		return ERR_PTR(-EINVAL);
 
 	if (hctx_idx >= q->nr_hw_queues)
@@ -449,21 +467,27 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 	 * Check if the hardware context is actually mapped to anything.
 	 * If not tell the caller that it should skip this queue.
 	 */
-	alloc_data.hctx = q->queue_hw_ctx[hctx_idx];
-	if (!blk_mq_hw_queue_mapped(alloc_data.hctx)) {
-		blk_queue_exit(q);
-		return ERR_PTR(-EXDEV);
-	}
-	cpu = cpumask_first_and(alloc_data.hctx->cpumask, cpu_online_mask);
-	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
-
-	rq = blk_mq_get_request(q, NULL, &alloc_data);
-	blk_queue_exit(q);
+	ret = -EXDEV;
+	data.hctx = q->queue_hw_ctx[hctx_idx];
+	if (!blk_mq_hw_queue_mapped(data.hctx))
+		goto out_queue_exit;
+	cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
+	data.ctx = __blk_mq_get_ctx(q, cpu);
+
+	if (q->elevator)
+		data.flags |= BLK_MQ_REQ_INTERNAL;
+	else
+		blk_mq_tag_busy(data.hctx);
 
-	if (!rq)
-		return ERR_PTR(-EWOULDBLOCK);
+	ret = -EWOULDBLOCK;
+	tag = blk_mq_get_tag(&data);
+	if (tag == BLK_MQ_NO_TAG)
+		goto out_queue_exit;
+	return blk_mq_rq_ctx_init(&data, tag, alloc_time_ns);
 
-	return rq;
+out_queue_exit:
+	blk_queue_exit(q);
+	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
@@ -474,11 +498,12 @@ static void __blk_mq_free_request(struct request *rq)
 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 	const int sched_tag = rq->internal_tag;
 
+	blk_crypto_free_request(rq);
 	blk_pm_mark_last_busy(rq);
 	rq->mq_hctx = NULL;
-	if (rq->tag != -1)
+	if (rq->tag != BLK_MQ_NO_TAG)
 		blk_mq_put_tag(hctx->tags, ctx, rq->tag);
-	if (sched_tag != -1)
+	if (sched_tag != BLK_MQ_NO_TAG)
 		blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
 	blk_mq_sched_restart(hctx);
 	blk_queue_exit(q);
@@ -527,7 +552,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 		blk_stat_add(rq, now);
 	}
 
-	if (rq->internal_tag != -1)
+	if (rq->internal_tag != BLK_MQ_NO_TAG)
 		blk_mq_sched_completed_request(rq, now);
 
 	blk_account_io_done(rq, now);
@@ -557,7 +582,17 @@ static void __blk_mq_complete_request_remote(void *data)
 	q->mq_ops->complete(rq);
 }
 
-static void __blk_mq_complete_request(struct request *rq)
+/**
+ * blk_mq_force_complete_rq() - Force complete the request, bypassing any error
+ * 				injection that could drop the completion.
+ * @rq: Request to be force completed
+ *
+ * Drivers should use blk_mq_complete_request() to complete requests in their
+ * normal IO path. For timeout error recovery, drivers may call this forced
+ * completion routine after they've reclaimed timed out requests to bypass
+ * potentially subsequent fake timeouts.
+ */
+void blk_mq_force_complete_rq(struct request *rq)
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 	struct request_queue *q = rq->q;
@@ -603,6 +638,7 @@ static void __blk_mq_complete_request(struct request *rq)
 	}
 	put_cpu();
 }
+EXPORT_SYMBOL_GPL(blk_mq_force_complete_rq);
 
 static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
 	__releases(hctx->srcu)
@@ -636,7 +672,7 @@ bool blk_mq_complete_request(struct request *rq)
 {
 	if (unlikely(blk_should_fake_timeout(rq->q)))
 		return false;
-	__blk_mq_complete_request(rq);
+	blk_mq_force_complete_rq(rq);
 	return true;
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
@@ -667,15 +703,6 @@ void blk_mq_start_request(struct request *rq)
 	blk_add_timer(rq);
 	WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
 
-	if (q->dma_drain_size && blk_rq_bytes(rq)) {
-		/*
-		 * Make sure space for the drain appears.  We know we can do
-		 * this because max_hw_segments has been adjusted to be one
-		 * fewer than the device can handle.
-		 */
-		rq->nr_phys_segments++;
-	}
-
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 	if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
 		q->integrity.profile->prepare_fn(rq);
@@ -695,8 +722,6 @@ static void __blk_mq_requeue_request(struct request *rq)
 	if (blk_mq_request_started(rq)) {
 		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
 		rq->rq_flags &= ~RQF_TIMED_OUT;
-		if (q->dma_drain_size && blk_rq_bytes(rq))
-			rq->nr_phys_segments--;
 	}
 }
 
@@ -1037,7 +1062,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
 	};
 	bool shared;
 
-	if (rq->tag != -1)
+	if (rq->tag != BLK_MQ_NO_TAG)
 		return true;
 
 	if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
@@ -1053,7 +1078,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
 		data.hctx->tags->rqs[rq->tag] = rq;
 	}
 
-	return rq->tag != -1;
+	return rq->tag != BLK_MQ_NO_TAG;
 }
 
 static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
@@ -1195,6 +1220,19 @@ static void blk_mq_handle_dev_resource(struct request *rq,
 	__blk_mq_requeue_request(rq);
 }
 
+static void blk_mq_handle_zone_resource(struct request *rq,
+					struct list_head *zone_list)
+{
+	/*
+	 * If we end up here it is because we cannot dispatch a request to a
+	 * specific zone due to LLD level zone-write locking or other zone
+	 * related resource not being available. In this case, set the request
+	 * aside in zone_list for retrying it later.
+	 */
+	list_add(&rq->queuelist, zone_list);
+	__blk_mq_requeue_request(rq);
+}
+
 /*
  * Returns true if we did some work AND can potentially do more.
  */
@@ -1206,6 +1244,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 	bool no_tag = false;
 	int errors, queued;
 	blk_status_t ret = BLK_STS_OK;
+	bool no_budget_avail = false;
+	LIST_HEAD(zone_list);
 
 	if (list_empty(list))
 		return false;
@@ -1224,6 +1264,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 		hctx = rq->mq_hctx;
 		if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
 			blk_mq_put_driver_tag(rq);
+			no_budget_avail = true;
 			break;
 		}
 
@@ -1266,6 +1307,16 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 		if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
 			blk_mq_handle_dev_resource(rq, list);
 			break;
+		} else if (ret == BLK_STS_ZONE_RESOURCE) {
+			/*
+			 * Move the request to zone_list and keep going through
+			 * the dispatch list to find more requests the drive can
+			 * accept.
+			 */
+			blk_mq_handle_zone_resource(rq, &zone_list);
+			if (list_empty(list))
+				break;
+			continue;
 		}
 
 		if (unlikely(ret != BLK_STS_OK)) {
@@ -1277,6 +1328,9 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 		queued++;
 	} while (!list_empty(list));
 
+	if (!list_empty(&zone_list))
+		list_splice_tail_init(&zone_list, list);
+
 	hctx->dispatched[queued_to_index(queued)]++;
 
 	/*
@@ -1320,13 +1374,15 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 		 *
 		 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
 		 * bit is set, run queue after a delay to avoid IO stalls
-		 * that could otherwise occur if the queue is idle.
+		 * that could otherwise occur if the queue is idle.  We'll do
+		 * similar if we couldn't get budget and SCHED_RESTART is set.
 		 */
 		needs_restart = blk_mq_sched_needs_restart(hctx);
 		if (!needs_restart ||
 		    (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
 			blk_mq_run_hw_queue(hctx, true);
-		else if (needs_restart && (ret == BLK_STS_RESOURCE))
+		else if (needs_restart && (ret == BLK_STS_RESOURCE ||
+					   no_budget_avail))
 			blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
 
 		blk_mq_update_dispatch_busy(hctx, true);
@@ -1542,6 +1598,25 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
 EXPORT_SYMBOL(blk_mq_run_hw_queues);
 
 /**
+ * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
+ * @q: Pointer to the request queue to run.
+ * @msecs: Microseconds of delay to wait before running the queues.
+ */
+void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (blk_mq_hctx_stopped(hctx))
+			continue;
+
+		blk_mq_delay_run_hw_queue(hctx, msecs);
+	}
+}
+EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
+
+/**
  * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped
  * @q: request queue.
  *
@@ -1782,8 +1857,9 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
 	rq->__sector = bio->bi_iter.bi_sector;
 	rq->write_hint = bio->bi_write_hint;
 	blk_rq_bio_prep(rq, bio, nr_segs);
+	blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
 
-	blk_account_io_start(rq, true);
+	blk_account_io_start(rq);
 }
 
 static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
@@ -1973,39 +2049,42 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
  *
  * Returns: Request queue cookie.
  */
-static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
+blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = op_is_sync(bio->bi_opf);
 	const int is_flush_fua = op_is_flush(bio->bi_opf);
-	struct blk_mq_alloc_data data = { .flags = 0};
+	struct blk_mq_alloc_data data = {
+		.q		= q,
+	};
 	struct request *rq;
 	struct blk_plug *plug;
 	struct request *same_queue_rq = NULL;
 	unsigned int nr_segs;
 	blk_qc_t cookie;
+	blk_status_t ret;
 
 	blk_queue_bounce(q, &bio);
 	__blk_queue_split(q, &bio, &nr_segs);
 
 	if (!bio_integrity_prep(bio))
-		return BLK_QC_T_NONE;
+		goto queue_exit;
 
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq))
-		return BLK_QC_T_NONE;
+		goto queue_exit;
 
 	if (blk_mq_sched_bio_merge(q, bio, nr_segs))
-		return BLK_QC_T_NONE;
+		goto queue_exit;
 
 	rq_qos_throttle(q, bio);
 
 	data.cmd_flags = bio->bi_opf;
-	rq = blk_mq_get_request(q, bio, &data);
+	rq = __blk_mq_alloc_request(&data);
 	if (unlikely(!rq)) {
 		rq_qos_cleanup(q, bio);
 		if (bio->bi_opf & REQ_NOWAIT)
 			bio_wouldblock_error(bio);
-		return BLK_QC_T_NONE;
+		goto queue_exit;
 	}
 
 	trace_block_getrq(q, bio, bio->bi_opf);
@@ -2016,6 +2095,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_mq_bio_to_request(rq, bio, nr_segs);
 
+	ret = blk_crypto_init_request(rq);
+	if (ret != BLK_STS_OK) {
+		bio->bi_status = ret;
+		bio_endio(bio);
+		blk_mq_free_request(rq);
+		return BLK_QC_T_NONE;
+	}
+
 	plug = blk_mq_plug(q, bio);
 	if (unlikely(is_flush_fua)) {
 		/* Bypass scheduler for flush requests */
@@ -2084,7 +2171,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	}
 
 	return cookie;
+queue_exit:
+	blk_queue_exit(q);
+	return BLK_QC_T_NONE;
 }
+EXPORT_SYMBOL_GPL(blk_mq_make_request); /* only for request based dm */
 
 void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 		     unsigned int hctx_idx)
@@ -2260,6 +2351,86 @@ fail:
 	return -ENOMEM;
 }
 
+struct rq_iter_data {
+	struct blk_mq_hw_ctx *hctx;
+	bool has_rq;
+};
+
+static bool blk_mq_has_request(struct request *rq, void *data, bool reserved)
+{
+	struct rq_iter_data *iter_data = data;
+
+	if (rq->mq_hctx != iter_data->hctx)
+		return true;
+	iter_data->has_rq = true;
+	return false;
+}
+
+static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
+{
+	struct blk_mq_tags *tags = hctx->sched_tags ?
+			hctx->sched_tags : hctx->tags;
+	struct rq_iter_data data = {
+		.hctx	= hctx,
+	};
+
+	blk_mq_all_tag_iter(tags, blk_mq_has_request, &data);
+	return data.has_rq;
+}
+
+static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu,
+		struct blk_mq_hw_ctx *hctx)
+{
+	if (cpumask_next_and(-1, hctx->cpumask, cpu_online_mask) != cpu)
+		return false;
+	if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)
+		return false;
+	return true;
+}
+
+static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
+{
+	struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
+			struct blk_mq_hw_ctx, cpuhp_online);
+
+	if (!cpumask_test_cpu(cpu, hctx->cpumask) ||
+	    !blk_mq_last_cpu_in_hctx(cpu, hctx))
+		return 0;
+
+	/*
+	 * Prevent new request from being allocated on the current hctx.
+	 *
+	 * The smp_mb__after_atomic() Pairs with the implied barrier in
+	 * test_and_set_bit_lock in sbitmap_get().  Ensures the inactive flag is
+	 * seen once we return from the tag allocator.
+	 */
+	set_bit(BLK_MQ_S_INACTIVE, &hctx->state);
+	smp_mb__after_atomic();
+
+	/*
+	 * Try to grab a reference to the queue and wait for any outstanding
+	 * requests.  If we could not grab a reference the queue has been
+	 * frozen and there are no requests.
+	 */
+	if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) {
+		while (blk_mq_hctx_has_requests(hctx))
+			msleep(5);
+		percpu_ref_put(&hctx->queue->q_usage_counter);
+	}
+
+	return 0;
+}
+
+static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node)
+{
+	struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
+			struct blk_mq_hw_ctx, cpuhp_online);
+
+	if (cpumask_test_cpu(cpu, hctx->cpumask))
+		clear_bit(BLK_MQ_S_INACTIVE, &hctx->state);
+	return 0;
+}
+
 /*
  * 'cpu' is going away. splice any existing rq_list entries from this
  * software queue to the hw queue dispatch list, and ensure that it
@@ -2273,6 +2444,9 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
 	enum hctx_type type;
 
 	hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
+	if (!cpumask_test_cpu(cpu, hctx->cpumask))
+		return 0;
+
 	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
 	type = hctx->type;
 
@@ -2296,6 +2470,9 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
 
 static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
 {
+	if (!(hctx->flags & BLK_MQ_F_STACKING))
+		cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
+						    &hctx->cpuhp_online);
 	cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
 					    &hctx->cpuhp_dead);
 }
@@ -2355,6 +2532,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
 {
 	hctx->queue_num = hctx_idx;
 
+	if (!(hctx->flags & BLK_MQ_F_STACKING))
+		cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
+				&hctx->cpuhp_online);
 	cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
 
 	hctx->tags = set->tags[hctx_idx];
@@ -2473,7 +2653,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 	}
 }
 
-static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
+static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
+					int hctx_idx)
 {
 	int ret = 0;
 
@@ -2521,18 +2702,6 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 	 * If the cpu isn't present, the cpu is mapped to first hctx.
 	 */
 	for_each_possible_cpu(i) {
-		hctx_idx = set->map[HCTX_TYPE_DEFAULT].mq_map[i];
-		/* unmapped hw queue can be remapped after CPU topo changed */
-		if (!set->tags[hctx_idx] &&
-		    !__blk_mq_alloc_rq_map(set, hctx_idx)) {
-			/*
-			 * If tags initialization fail for some hctx,
-			 * that hctx won't be brought online.  In this
-			 * case, remap the current ctx to hctx[0] which
-			 * is guaranteed to always have tags allocated
-			 */
-			set->map[HCTX_TYPE_DEFAULT].mq_map[i] = 0;
-		}
 
 		ctx = per_cpu_ptr(q->queue_ctx, i);
 		for (j = 0; j < set->nr_maps; j++) {
@@ -2541,6 +2710,18 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 						HCTX_TYPE_DEFAULT, i);
 				continue;
 			}
+			hctx_idx = set->map[j].mq_map[i];
+			/* unmapped hw queue can be remapped after CPU topo changed */
+			if (!set->tags[hctx_idx] &&
+			    !__blk_mq_alloc_map_and_request(set, hctx_idx)) {
+				/*
+				 * If tags initialization fail for some hctx,
+				 * that hctx won't be brought online.  In this
+				 * case, remap the current ctx to hctx[0] which
+				 * is guaranteed to always have tags allocated
+				 */
+				set->map[j].mq_map[i] = 0;
+			}
 
 			hctx = blk_mq_map_queue_type(q, j, i);
 			ctx->hctxs[j] = hctx;
@@ -2944,7 +3125,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	INIT_LIST_HEAD(&q->requeue_list);
 	spin_lock_init(&q->requeue_lock);
 
-	q->make_request_fn = blk_mq_make_request;
 	q->nr_requests = set->queue_depth;
 
 	/*
@@ -2988,14 +3168,14 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
 	int i;
 
 	for (i = 0; i < set->nr_hw_queues; i++)
-		if (!__blk_mq_alloc_rq_map(set, i))
+		if (!__blk_mq_alloc_map_and_request(set, i))
 			goto out_unwind;
 
 	return 0;
 
 out_unwind:
 	while (--i >= 0)
-		blk_mq_free_rq_map(set->tags[i]);
+		blk_mq_free_map_and_requests(set, i);
 
 	return -ENOMEM;
 }
@@ -3005,7 +3185,7 @@ out_unwind:
  * may reduce the depth asked for, if memory is tight. set->queue_depth
  * will be updated to reflect the allocated depth.
  */
-static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
+static int blk_mq_alloc_map_and_requests(struct blk_mq_tag_set *set)
 {
 	unsigned int depth;
 	int err;
@@ -3165,7 +3345,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	if (ret)
 		goto out_free_mq_map;
 
-	ret = blk_mq_alloc_rq_maps(set);
+	ret = blk_mq_alloc_map_and_requests(set);
 	if (ret)
 		goto out_free_mq_map;
 
@@ -3347,14 +3527,14 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		blk_mq_sysfs_unregister(q);
 	}
 
+	prev_nr_hw_queues = set->nr_hw_queues;
 	if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
 	    0)
 		goto reregister;
 
-	prev_nr_hw_queues = set->nr_hw_queues;
 	set->nr_hw_queues = nr_hw_queues;
-	blk_mq_update_queue_map(set);
 fallback:
+	blk_mq_update_queue_map(set);
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_realloc_hw_ctxs(set, q);
 		if (q->nr_hw_queues != set->nr_hw_queues) {
@@ -3609,6 +3789,9 @@ static int __init blk_mq_init(void)
 {
 	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
 				blk_mq_hctx_notify_dead);
+	cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
+				blk_mq_hctx_notify_online,
+				blk_mq_hctx_notify_offline);
 	return 0;
 }
 subsys_initcall(blk_mq_init);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 10bfdfb494fa..a139b0631817 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -201,7 +201,7 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
 					   struct request *rq)
 {
 	blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
-	rq->tag = -1;
+	rq->tag = BLK_MQ_NO_TAG;
 
 	if (rq->rq_flags & RQF_MQ_INFLIGHT) {
 		rq->rq_flags &= ~RQF_MQ_INFLIGHT;
@@ -211,7 +211,7 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
 
 static inline void blk_mq_put_driver_tag(struct request *rq)
 {
-	if (rq->tag == -1 || rq->internal_tag == -1)
+	if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG)
 		return;
 
 	__blk_mq_put_driver_tag(rq->mq_hctx, rq);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 14397b4c4b53..9a2c23cd9700 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -48,6 +48,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->chunk_sectors = 0;
 	lim->max_write_same_sectors = 0;
 	lim->max_write_zeroes_sectors = 0;
+	lim->max_zone_append_sectors = 0;
 	lim->max_discard_sectors = 0;
 	lim->max_hw_discard_sectors = 0;
 	lim->discard_granularity = 0;
@@ -83,6 +84,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
 	lim->max_dev_sectors = UINT_MAX;
 	lim->max_write_same_sectors = UINT_MAX;
 	lim->max_write_zeroes_sectors = UINT_MAX;
+	lim->max_zone_append_sectors = UINT_MAX;
 }
 EXPORT_SYMBOL(blk_set_stacking_limits);
 
@@ -222,6 +224,33 @@ void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
 EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
 
 /**
+ * blk_queue_max_zone_append_sectors - set max sectors for a single zone append
+ * @q:  the request queue for the device
+ * @max_zone_append_sectors: maximum number of sectors to write per command
+ **/
+void blk_queue_max_zone_append_sectors(struct request_queue *q,
+		unsigned int max_zone_append_sectors)
+{
+	unsigned int max_sectors;
+
+	if (WARN_ON(!blk_queue_is_zoned(q)))
+		return;
+
+	max_sectors = min(q->limits.max_hw_sectors, max_zone_append_sectors);
+	max_sectors = min(q->limits.chunk_sectors, max_sectors);
+
+	/*
+	 * Signal eventual driver bugs resulting in the max_zone_append sectors limit
+	 * being 0 due to a 0 argument, the chunk_sectors limit (zone size) not set,
+	 * or the max_hw_sectors limit not set.
+	 */
+	WARN_ON(!max_sectors);
+
+	q->limits.max_zone_append_sectors = max_sectors;
+}
+EXPORT_SYMBOL_GPL(blk_queue_max_zone_append_sectors);
+
+/**
  * blk_queue_max_segments - set max hw segments for a request for this queue
  * @q:  the request queue for the device
  * @max_segments:  max number of segments
@@ -470,6 +499,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 					b->max_write_same_sectors);
 	t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
 					b->max_write_zeroes_sectors);
+	t->max_zone_append_sectors = min(t->max_zone_append_sectors,
+					b->max_zone_append_sectors);
 	t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
 
 	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
@@ -652,43 +683,6 @@ void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
 EXPORT_SYMBOL(blk_queue_update_dma_pad);
 
 /**
- * blk_queue_dma_drain - Set up a drain buffer for excess dma.
- * @q:  the request queue for the device
- * @dma_drain_needed: fn which returns non-zero if drain is necessary
- * @buf:	physically contiguous buffer
- * @size:	size of the buffer in bytes
- *
- * Some devices have excess DMA problems and can't simply discard (or
- * zero fill) the unwanted piece of the transfer.  They have to have a
- * real area of memory to transfer it into.  The use case for this is
- * ATAPI devices in DMA mode.  If the packet command causes a transfer
- * bigger than the transfer size some HBAs will lock up if there
- * aren't DMA elements to contain the excess transfer.  What this API
- * does is adjust the queue so that the buf is always appended
- * silently to the scatterlist.
- *
- * Note: This routine adjusts max_hw_segments to make room for appending
- * the drain buffer.  If you call blk_queue_max_segments() after calling
- * this routine, you must set the limit to one fewer than your device
- * can support otherwise there won't be room for the drain buffer.
- */
-int blk_queue_dma_drain(struct request_queue *q,
-			       dma_drain_needed_fn *dma_drain_needed,
-			       void *buf, unsigned int size)
-{
-	if (queue_max_segments(q) < 2)
-		return -EINVAL;
-	/* make room for appending the drain */
-	blk_queue_max_segments(q, queue_max_segments(q) - 1);
-	q->dma_drain_needed = dma_drain_needed;
-	q->dma_drain_buffer = buf;
-	q->dma_drain_size = size;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
-
-/**
  * blk_queue_segment_boundary - set boundary rules for segment merging
  * @q:  the request queue for the device
  * @mask:  the memory boundary mask
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index fca9b158f4a0..02643e149d5e 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -218,6 +218,13 @@ static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
 		(unsigned long long)q->limits.max_write_zeroes_sectors << 9);
 }
 
+static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
+{
+	unsigned long long max_sectors = q->limits.max_zone_append_sectors;
+
+	return sprintf(page, "%llu\n", max_sectors << SECTOR_SHIFT);
+}
+
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
@@ -639,6 +646,11 @@ static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
 	.show = queue_write_zeroes_max_show,
 };
 
+static struct queue_sysfs_entry queue_zone_append_max_entry = {
+	.attr = {.name = "zone_append_max_bytes", .mode = 0444 },
+	.show = queue_zone_append_max_show,
+};
+
 static struct queue_sysfs_entry queue_nonrot_entry = {
 	.attr = {.name = "rotational", .mode = 0644 },
 	.show = queue_show_nonrot,
@@ -749,6 +761,7 @@ static struct attribute *queue_attrs[] = {
 	&queue_discard_zeroes_data_entry.attr,
 	&queue_write_same_max_entry.attr,
 	&queue_write_zeroes_max_entry.attr,
+	&queue_zone_append_max_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_zoned_entry.attr,
 	&queue_nr_zones_entry.attr,
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 98233c9c65a8..209fdd8939fb 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2358,69 +2358,6 @@ void blk_throtl_bio_endio(struct bio *bio)
 }
 #endif
 
-/*
- * Dispatch all bios from all children tg's queued on @parent_sq.  On
- * return, @parent_sq is guaranteed to not have any active children tg's
- * and all bios from previously active tg's are on @parent_sq->bio_lists[].
- */
-static void tg_drain_bios(struct throtl_service_queue *parent_sq)
-{
-	struct throtl_grp *tg;
-
-	while ((tg = throtl_rb_first(parent_sq))) {
-		struct throtl_service_queue *sq = &tg->service_queue;
-		struct bio *bio;
-
-		throtl_dequeue_tg(tg);
-
-		while ((bio = throtl_peek_queued(&sq->queued[READ])))
-			tg_dispatch_one_bio(tg, bio_data_dir(bio));
-		while ((bio = throtl_peek_queued(&sq->queued[WRITE])))
-			tg_dispatch_one_bio(tg, bio_data_dir(bio));
-	}
-}
-
-/**
- * blk_throtl_drain - drain throttled bios
- * @q: request_queue to drain throttled bios for
- *
- * Dispatch all currently throttled bios on @q through ->make_request_fn().
- */
-void blk_throtl_drain(struct request_queue *q)
-	__releases(&q->queue_lock) __acquires(&q->queue_lock)
-{
-	struct throtl_data *td = q->td;
-	struct blkcg_gq *blkg;
-	struct cgroup_subsys_state *pos_css;
-	struct bio *bio;
-	int rw;
-
-	rcu_read_lock();
-
-	/*
-	 * Drain each tg while doing post-order walk on the blkg tree, so
-	 * that all bios are propagated to td->service_queue.  It'd be
-	 * better to walk service_queue tree directly but blkg walk is
-	 * easier.
-	 */
-	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
-		tg_drain_bios(&blkg_to_tg(blkg)->service_queue);
-
-	/* finally, transfer bios from top-level tg's into the td */
-	tg_drain_bios(&td->service_queue);
-
-	rcu_read_unlock();
-	spin_unlock_irq(&q->queue_lock);
-
-	/* all bios now should be in td->service_queue, issue them */
-	for (rw = READ; rw <= WRITE; rw++)
-		while ((bio = throtl_pop_queued(&td->service_queue.queued[rw],
-						NULL)))
-			generic_make_request(bio);
-
-	spin_lock_irq(&q->queue_lock);
-}
-
 int blk_throtl_init(struct request_queue *q)
 {
 	struct throtl_data *td;
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 9cb082f38b93..0fa615eefd52 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -405,7 +405,7 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
 		rwb_arm_timer(rwb);
 }
 
-static void __wbt_update_limits(struct rq_wb *rwb)
+static void wbt_update_limits(struct rq_wb *rwb)
 {
 	struct rq_depth *rqd = &rwb->rq_depth;
 
@@ -418,14 +418,6 @@ static void __wbt_update_limits(struct rq_wb *rwb)
 	rwb_wake_all(rwb);
 }
 
-void wbt_update_limits(struct request_queue *q)
-{
-	struct rq_qos *rqos = wbt_rq_qos(q);
-	if (!rqos)
-		return;
-	__wbt_update_limits(RQWB(rqos));
-}
-
 u64 wbt_get_min_lat(struct request_queue *q)
 {
 	struct rq_qos *rqos = wbt_rq_qos(q);
@@ -441,7 +433,7 @@ void wbt_set_min_lat(struct request_queue *q, u64 val)
 		return;
 	RQWB(rqos)->min_lat_nsec = val;
 	RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
-	__wbt_update_limits(RQWB(rqos));
+	wbt_update_limits(RQWB(rqos));
 }
 
 
@@ -685,7 +677,7 @@ static int wbt_data_dir(const struct request *rq)
 static void wbt_queue_depth_changed(struct rq_qos *rqos)
 {
 	RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
-	__wbt_update_limits(RQWB(rqos));
+	wbt_update_limits(RQWB(rqos));
 }
 
 static void wbt_exit(struct rq_qos *rqos)
@@ -843,7 +835,7 @@ int wbt_init(struct request_queue *q)
 	rwb->enable_state = WBT_STATE_ON_DEFAULT;
 	rwb->wc = 1;
 	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
-	__wbt_update_limits(rwb);
+	wbt_update_limits(rwb);
 
 	/*
 	 * Assign rwb and add the stats callback.
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index 8e4e37660971..16bdc85b8df9 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -88,7 +88,6 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
 #ifdef CONFIG_BLK_WBT
 
 int wbt_init(struct request_queue *);
-void wbt_update_limits(struct request_queue *);
 void wbt_disable_default(struct request_queue *);
 void wbt_enable_default(struct request_queue *);
 
@@ -108,9 +107,6 @@ static inline int wbt_init(struct request_queue *q)
 {
 	return -EINVAL;
 }
-static inline void wbt_update_limits(struct request_queue *q)
-{
-}
 static inline void wbt_disable_default(struct request_queue *q)
 {
 }
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index f87956e0dcaf..23831fa8701d 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -82,6 +82,20 @@ bool blk_req_needs_zone_write_lock(struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
 
+bool blk_req_zone_write_trylock(struct request *rq)
+{
+	unsigned int zno = blk_rq_zone_no(rq);
+
+	if (test_and_set_bit(zno, rq->q->seq_zones_wlock))
+		return false;
+
+	WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
+	rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(blk_req_zone_write_trylock);
+
 void __blk_req_zone_write_lock(struct request *rq)
 {
 	if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
@@ -457,14 +471,19 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 /**
  * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
  * @disk:	Target disk
+ * @update_driver_data:	Callback to update driver data on the frozen disk
  *
  * Helper function for low-level device drivers to (re) allocate and initialize
  * a disk request queue zone bitmaps. This functions should normally be called
  * within the disk ->revalidate method for blk-mq based drivers.  For BIO based
  * drivers only q->nr_zones needs to be updated so that the sysfs exposed value
  * is correct.
+ * If the @update_driver_data callback function is not NULL, the callback is
+ * executed with the device request queue frozen after all zones have been
+ * checked.
  */
-int blk_revalidate_disk_zones(struct gendisk *disk)
+int blk_revalidate_disk_zones(struct gendisk *disk,
+			      void (*update_driver_data)(struct gendisk *disk))
 {
 	struct request_queue *q = disk->queue;
 	struct blk_revalidate_zone_args args = {
@@ -498,6 +517,8 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
 		q->nr_zones = args.nr_zones;
 		swap(q->seq_zones_wlock, args.seq_zones_wlock);
 		swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
+		if (update_driver_data)
+			update_driver_data(disk);
 		ret = 0;
 	} else {
 		pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
diff --git a/block/blk.h b/block/blk.h
index 0a94ec68af32..aa16e524dc35 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -5,7 +5,9 @@
 #include <linux/idr.h>
 #include <linux/blk-mq.h>
 #include <linux/part_stat.h>
+#include <linux/blk-crypto.h>
 #include <xen/xen.h>
+#include "blk-crypto-internal.h"
 #include "blk-mq.h"
 #include "blk-mq-sched.h"
 
@@ -17,7 +19,6 @@ extern struct dentry *blk_debugfs_root;
 #endif
 
 struct blk_flush_queue {
-	unsigned int		flush_queue_delayed:1;
 	unsigned int		flush_pending_idx:1;
 	unsigned int		flush_running_idx:1;
 	blk_status_t 		rq_status;
@@ -62,17 +63,6 @@ void blk_free_flush_queue(struct blk_flush_queue *q);
 
 void blk_freeze_queue(struct request_queue *q);
 
-static inline void blk_queue_enter_live(struct request_queue *q)
-{
-	/*
-	 * Given that running in generic_make_request() context
-	 * guarantees that a live reference against q_usage_counter has
-	 * been established, further references under that same context
-	 * need not check that the queue has been frozen (marked dead).
-	 */
-	percpu_ref_get(&q->q_usage_counter);
-}
-
 static inline bool biovec_phys_mergeable(struct request_queue *q,
 		struct bio_vec *vec1, struct bio_vec *vec2)
 {
@@ -195,8 +185,7 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs, struct request **same_queue_rq);
 
-void blk_account_io_start(struct request *req, bool new_io);
-void blk_account_io_completion(struct request *req, unsigned int bytes);
+void blk_account_io_start(struct request *req);
 void blk_account_io_done(struct request *req, u64 now);
 
 /*
@@ -303,36 +292,14 @@ void ioc_clear_queue(struct request_queue *q);
 
 int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
 
-/**
- * create_io_context - try to create task->io_context
- * @gfp_mask: allocation mask
- * @node: allocation node
- *
- * If %current->io_context is %NULL, allocate a new io_context and install
- * it.  Returns the current %current->io_context which may be %NULL if
- * allocation failed.
- *
- * Note that this function can't be called with IRQ disabled because
- * task_lock which protects %current->io_context is IRQ-unsafe.
- */
-static inline struct io_context *create_io_context(gfp_t gfp_mask, int node)
-{
-	WARN_ON_ONCE(irqs_disabled());
-	if (unlikely(!current->io_context))
-		create_task_io_context(current, gfp_mask, node);
-	return current->io_context;
-}
-
 /*
  * Internal throttling interface
  */
 #ifdef CONFIG_BLK_DEV_THROTTLING
-extern void blk_throtl_drain(struct request_queue *q);
 extern int blk_throtl_init(struct request_queue *q);
 extern void blk_throtl_exit(struct request_queue *q);
 extern void blk_throtl_register_queue(struct request_queue *q);
 #else /* CONFIG_BLK_DEV_THROTTLING */
-static inline void blk_throtl_drain(struct request_queue *q) { }
 static inline int blk_throtl_init(struct request_queue *q) { return 0; }
 static inline void blk_throtl_exit(struct request_queue *q) { }
 static inline void blk_throtl_register_queue(struct request_queue *q) { }
@@ -375,11 +342,6 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q);
 static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
 #endif
 
-void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
-			int rw);
-void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
-			int rw);
-void update_io_ticks(struct hd_struct *part, unsigned long now, bool end);
 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);
 
 int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
@@ -389,44 +351,32 @@ char *disk_name(struct gendisk *hd, int partno, char *buf);
 #define ADDPART_FLAG_NONE	0
 #define ADDPART_FLAG_RAID	1
 #define ADDPART_FLAG_WHOLEDISK	2
-struct hd_struct *__must_check add_partition(struct gendisk *disk, int partno,
-		sector_t start, sector_t len, int flags,
-		struct partition_meta_info *info);
-void __delete_partition(struct percpu_ref *ref);
-void delete_partition(struct gendisk *disk, int partno);
+void delete_partition(struct gendisk *disk, struct hd_struct *part);
+int bdev_add_partition(struct block_device *bdev, int partno,
+		sector_t start, sector_t length);
+int bdev_del_partition(struct block_device *bdev, int partno);
+int bdev_resize_partition(struct block_device *bdev, int partno,
+		sector_t start, sector_t length);
 int disk_expand_part_tbl(struct gendisk *disk, int target);
+int hd_ref_init(struct hd_struct *part);
 
-static inline int hd_ref_init(struct hd_struct *part)
-{
-	if (percpu_ref_init(&part->ref, __delete_partition, 0,
-				GFP_KERNEL))
-		return -ENOMEM;
-	return 0;
-}
-
-static inline void hd_struct_get(struct hd_struct *part)
-{
-	percpu_ref_get(&part->ref);
-}
-
+/* no need to get/put refcount of part0 */
 static inline int hd_struct_try_get(struct hd_struct *part)
 {
-	return percpu_ref_tryget_live(&part->ref);
+	if (part->partno)
+		return percpu_ref_tryget_live(&part->ref);
+	return 1;
 }
 
 static inline void hd_struct_put(struct hd_struct *part)
 {
-	percpu_ref_put(&part->ref);
-}
-
-static inline void hd_struct_kill(struct hd_struct *part)
-{
-	percpu_ref_kill(&part->ref);
+	if (part->partno)
+		percpu_ref_put(&part->ref);
 }
 
 static inline void hd_free_part(struct hd_struct *part)
 {
-	free_part_stats(part);
+	free_percpu(part->dkstats);
 	kfree(part->info);
 	percpu_ref_exit(&part->ref);
 }
@@ -484,8 +434,8 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
 
 struct request_queue *__blk_alloc_queue(int node_id);
 
-int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
-		bool *same_page);
+		unsigned int max_sectors, bool *same_page);
 
 #endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index f8ed677a1bf7..c3aaed070124 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -267,6 +267,8 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 		break;
 	}
 
+	bio_crypt_clone(bio, bio_src, gfp_mask);
+
 	if (bio_integrity(bio_src)) {
 		int ret;
 
diff --git a/block/genhd.c b/block/genhd.c
index 06b642b23a07..1a7659327664 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -92,7 +92,6 @@ const char *bdevname(struct block_device *bdev, char *buf)
 }
 EXPORT_SYMBOL(bdevname);
 
-#ifdef CONFIG_SMP
 static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
 {
 	int cpu;
@@ -112,44 +111,13 @@ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
 		stat->io_ticks += ptr->io_ticks;
 	}
 }
-#else /* CONFIG_SMP */
-static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
-{
-	memcpy(stat, &part->dkstats, sizeof(struct disk_stats));
-}
-#endif /* CONFIG_SMP */
-
-void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
-{
-	if (queue_is_mq(q))
-		return;
-
-	part_stat_local_inc(part, in_flight[rw]);
-	if (part->partno)
-		part_stat_local_inc(&part_to_disk(part)->part0, in_flight[rw]);
-}
-
-void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
-{
-	if (queue_is_mq(q))
-		return;
-
-	part_stat_local_dec(part, in_flight[rw]);
-	if (part->partno)
-		part_stat_local_dec(&part_to_disk(part)->part0, in_flight[rw]);
-}
 
 static unsigned int part_in_flight(struct request_queue *q,
 		struct hd_struct *part)
 {
+	unsigned int inflight = 0;
 	int cpu;
-	unsigned int inflight;
 
-	if (queue_is_mq(q)) {
-		return blk_mq_in_flight(q, part);
-	}
-
-	inflight = 0;
 	for_each_possible_cpu(cpu) {
 		inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
 			    part_stat_local_read_cpu(part, in_flight[1], cpu);
@@ -165,11 +133,6 @@ static void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
 {
 	int cpu;
 
-	if (queue_is_mq(q)) {
-		blk_mq_in_flight_rw(q, part, inflight);
-		return;
-	}
-
 	inflight[0] = 0;
 	inflight[1] = 0;
 	for_each_possible_cpu(cpu) {
@@ -344,11 +307,13 @@ static inline int sector_in_part(struct hd_struct *part, sector_t sector)
  * primarily used for stats accounting.
  *
  * CONTEXT:
- * RCU read locked.  The returned partition pointer is valid only
- * while preemption is disabled.
+ * RCU read locked.  The returned partition pointer is always valid
+ * because its refcount is grabbed except for part0, which lifetime
+ * is same with the disk.
  *
  * RETURNS:
  * Found partition on success, part0 is returned if no partition matches
+ * or the matched partition is being deleted.
  */
 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 {
@@ -356,21 +321,33 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 	struct hd_struct *part;
 	int i;
 
+	rcu_read_lock();
 	ptbl = rcu_dereference(disk->part_tbl);
 
 	part = rcu_dereference(ptbl->last_lookup);
-	if (part && sector_in_part(part, sector))
-		return part;
+	if (part && sector_in_part(part, sector) && hd_struct_try_get(part))
+		goto out_unlock;
 
 	for (i = 1; i < ptbl->len; i++) {
 		part = rcu_dereference(ptbl->part[i]);
 
 		if (part && sector_in_part(part, sector)) {
+			/*
+			 * only live partition can be cached for lookup,
+			 * so use-after-free on cached & deleting partition
+			 * can be avoided
+			 */
+			if (!hd_struct_try_get(part))
+				break;
 			rcu_assign_pointer(ptbl->last_lookup, part);
-			return part;
+			goto out_unlock;
 		}
 	}
-	return &disk->part0;
+
+	part = &disk->part0;
+out_unlock:
+	rcu_read_unlock();
+	return part;
 }
 
 /**
@@ -840,13 +817,15 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 		disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
 		disk->flags |= GENHD_FL_NO_PART_SCAN;
 	} else {
+		struct backing_dev_info *bdi = disk->queue->backing_dev_info;
+		struct device *dev = disk_to_dev(disk);
 		int ret;
 
 		/* Register BDI before referencing it from bdev */
-		disk_to_dev(disk)->devt = devt;
-		ret = bdi_register_owner(disk->queue->backing_dev_info,
-						disk_to_dev(disk));
+		dev->devt = devt;
+		ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
 		WARN_ON(ret);
+		bdi_set_owner(bdi, dev);
 		blk_register_region(disk_devt(disk), disk->minors, NULL,
 				    exact_match, exact_lock, disk);
 	}
@@ -878,6 +857,25 @@ void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
 }
 EXPORT_SYMBOL(device_add_disk_no_queue_reg);
 
+static void invalidate_partition(struct gendisk *disk, int partno)
+{
+	struct block_device *bdev;
+
+	bdev = bdget_disk(disk, partno);
+	if (!bdev)
+		return;
+
+	fsync_bdev(bdev);
+	__invalidate_device(bdev, true);
+
+	/*
+	 * Unhash the bdev inode for this device so that it gets evicted as soon
+	 * as last inode reference is dropped.
+	 */
+	remove_inode_hash(bdev->bd_inode);
+	bdput(bdev);
+}
+
 void del_gendisk(struct gendisk *disk)
 {
 	struct disk_part_iter piter;
@@ -896,13 +894,11 @@ void del_gendisk(struct gendisk *disk)
 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
 	while ((part = disk_part_iter_next(&piter))) {
 		invalidate_partition(disk, part->partno);
-		bdev_unhash_inode(part_devt(part));
-		delete_partition(disk, part->partno);
+		delete_partition(disk, part);
 	}
 	disk_part_iter_exit(&piter);
 
 	invalidate_partition(disk, 0);
-	bdev_unhash_inode(disk_devt(disk));
 	set_capacity(disk, 0);
 	disk->flags &= ~GENHD_FL_UP;
 	up_write(&disk->lookup_sem);
@@ -1279,7 +1275,10 @@ ssize_t part_stat_show(struct device *dev,
 	unsigned int inflight;
 
 	part_stat_read_all(p, &stat);
-	inflight = part_in_flight(q, p);
+	if (queue_is_mq(q))
+		inflight = blk_mq_in_flight(q, p);
+	else
+		inflight = part_in_flight(q, p);
 
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
@@ -1318,7 +1317,11 @@ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
 	struct request_queue *q = part_to_disk(p)->queue;
 	unsigned int inflight[2];
 
-	part_in_flight_rw(q, p, inflight);
+	if (queue_is_mq(q))
+		blk_mq_in_flight_rw(q, p, inflight);
+	else
+		part_in_flight_rw(q, p, inflight);
+
 	return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
 }
 
@@ -1573,7 +1576,10 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 	disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
 	while ((hd = disk_part_iter_next(&piter))) {
 		part_stat_read_all(hd, &stat);
-		inflight = part_in_flight(gp->queue, hd);
+		if (queue_is_mq(gp->queue))
+			inflight = blk_mq_in_flight(gp->queue, hd);
+		else
+			inflight = part_in_flight(gp->queue, hd);
 
 		seq_printf(seqf, "%4d %7d %s "
 			   "%lu %lu %lu %u "
@@ -1680,14 +1686,15 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 
 	disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
 	if (disk) {
-		if (!init_part_stats(&disk->part0)) {
+		disk->part0.dkstats = alloc_percpu(struct disk_stats);
+		if (!disk->part0.dkstats) {
 			kfree(disk);
 			return NULL;
 		}
 		init_rwsem(&disk->lookup_sem);
 		disk->node_id = node_id;
 		if (disk_expand_part_tbl(disk, 0)) {
-			free_part_stats(&disk->part0);
+			free_percpu(disk->part0.dkstats);
 			kfree(disk);
 			return NULL;
 		}
@@ -1703,7 +1710,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 		 * TODO: Ideally set_capacity() and get_capacity() should be
 		 * converted to make use of bd_mutex and sequence counters.
 		 */
-		seqcount_init(&disk->part0.nr_sects_seq);
+		hd_sects_seq_init(&disk->part0);
 		if (hd_ref_init(&disk->part0)) {
 			hd_free_part(&disk->part0);
 			kfree(disk);
@@ -1806,20 +1813,6 @@ int bdev_read_only(struct block_device *bdev)
 
 EXPORT_SYMBOL(bdev_read_only);
 
-int invalidate_partition(struct gendisk *disk, int partno)
-{
-	int res = 0;
-	struct block_device *bdev = bdget_disk(disk, partno);
-	if (bdev) {
-		fsync_bdev(bdev);
-		res = __invalidate_device(bdev, true);
-		bdput(bdev);
-	}
-	return res;
-}
-
-EXPORT_SYMBOL(invalidate_partition);
-
 /*
  * Disk events - monitor disk events like media change and eject request.
  */
diff --git a/block/ioctl.c b/block/ioctl.c
index 6e827de1a4c4..bdb3bbb253d9 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -16,142 +16,44 @@
 static int blkpg_do_ioctl(struct block_device *bdev,
 			  struct blkpg_partition __user *upart, int op)
 {
-	struct block_device *bdevp;
-	struct gendisk *disk;
-	struct hd_struct *part, *lpart;
 	struct blkpg_partition p;
-	struct disk_part_iter piter;
 	long long start, length;
-	int partno;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 	if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
 		return -EFAULT;
-	disk = bdev->bd_disk;
 	if (bdev != bdev->bd_contains)
 		return -EINVAL;
-	partno = p.pno;
-	if (partno <= 0)
+
+	if (p.pno <= 0)
 		return -EINVAL;
+
+	if (op == BLKPG_DEL_PARTITION)
+		return bdev_del_partition(bdev, p.pno);
+
+	start = p.start >> SECTOR_SHIFT;
+	length = p.length >> SECTOR_SHIFT;
+
+	/* check for fit in a hd_struct */
+	if (sizeof(sector_t) < sizeof(long long)) {
+		long pstart = start, plength = length;
+
+		if (pstart != start || plength != length || pstart < 0 ||
+		    plength < 0 || p.pno > 65535)
+			return -EINVAL;
+	}
+
 	switch (op) {
-		case BLKPG_ADD_PARTITION:
-			start = p.start >> 9;
-			length = p.length >> 9;
-			/* check for fit in a hd_struct */
-			if (sizeof(sector_t) == sizeof(long) &&
-			    sizeof(long long) > sizeof(long)) {
-				long pstart = start, plength = length;
-				if (pstart != start || plength != length
-				    || pstart < 0 || plength < 0 || partno > 65535)
-					return -EINVAL;
-			}
-			/* check if partition is aligned to blocksize */
-			if (p.start & (bdev_logical_block_size(bdev) - 1))
-				return -EINVAL;
-
-			mutex_lock(&bdev->bd_mutex);
-
-			/* overlap? */
-			disk_part_iter_init(&piter, disk,
-					    DISK_PITER_INCL_EMPTY);
-			while ((part = disk_part_iter_next(&piter))) {
-				if (!(start + length <= part->start_sect ||
-				      start >= part->start_sect + part->nr_sects)) {
-					disk_part_iter_exit(&piter);
-					mutex_unlock(&bdev->bd_mutex);
-					return -EBUSY;
-				}
-			}
-			disk_part_iter_exit(&piter);
-
-			/* all seems OK */
-			part = add_partition(disk, partno, start, length,
-					     ADDPART_FLAG_NONE, NULL);
-			mutex_unlock(&bdev->bd_mutex);
-			return PTR_ERR_OR_ZERO(part);
-		case BLKPG_DEL_PARTITION:
-			part = disk_get_part(disk, partno);
-			if (!part)
-				return -ENXIO;
-
-			bdevp = bdget(part_devt(part));
-			disk_put_part(part);
-			if (!bdevp)
-				return -ENOMEM;
-
-			mutex_lock(&bdevp->bd_mutex);
-			if (bdevp->bd_openers) {
-				mutex_unlock(&bdevp->bd_mutex);
-				bdput(bdevp);
-				return -EBUSY;
-			}
-			/* all seems OK */
-			fsync_bdev(bdevp);
-			invalidate_bdev(bdevp);
-
-			mutex_lock_nested(&bdev->bd_mutex, 1);
-			delete_partition(disk, partno);
-			mutex_unlock(&bdev->bd_mutex);
-			mutex_unlock(&bdevp->bd_mutex);
-			bdput(bdevp);
-
-			return 0;
-		case BLKPG_RESIZE_PARTITION:
-			start = p.start >> 9;
-			/* new length of partition in bytes */
-			length = p.length >> 9;
-			/* check for fit in a hd_struct */
-			if (sizeof(sector_t) == sizeof(long) &&
-			    sizeof(long long) > sizeof(long)) {
-				long pstart = start, plength = length;
-				if (pstart != start || plength != length
-				    || pstart < 0 || plength < 0)
-					return -EINVAL;
-			}
-			part = disk_get_part(disk, partno);
-			if (!part)
-				return -ENXIO;
-			bdevp = bdget(part_devt(part));
-			if (!bdevp) {
-				disk_put_part(part);
-				return -ENOMEM;
-			}
-			mutex_lock(&bdevp->bd_mutex);
-			mutex_lock_nested(&bdev->bd_mutex, 1);
-			if (start != part->start_sect) {
-				mutex_unlock(&bdevp->bd_mutex);
-				mutex_unlock(&bdev->bd_mutex);
-				bdput(bdevp);
-				disk_put_part(part);
-				return -EINVAL;
-			}
-			/* overlap? */
-			disk_part_iter_init(&piter, disk,
-					    DISK_PITER_INCL_EMPTY);
-			while ((lpart = disk_part_iter_next(&piter))) {
-				if (lpart->partno != partno &&
-				   !(start + length <= lpart->start_sect ||
-				   start >= lpart->start_sect + lpart->nr_sects)
-				   ) {
-					disk_part_iter_exit(&piter);
-					mutex_unlock(&bdevp->bd_mutex);
-					mutex_unlock(&bdev->bd_mutex);
-					bdput(bdevp);
-					disk_put_part(part);
-					return -EBUSY;
-				}
-			}
-			disk_part_iter_exit(&piter);
-			part_nr_sects_write(part, (sector_t)length);
-			i_size_write(bdevp->bd_inode, p.length);
-			mutex_unlock(&bdevp->bd_mutex);
-			mutex_unlock(&bdev->bd_mutex);
-			bdput(bdevp);
-			disk_put_part(part);
-			return 0;
-		default:
+	case BLKPG_ADD_PARTITION:
+		/* check if partition is aligned to blocksize */
+		if (p.start & (bdev_logical_block_size(bdev) - 1))
 			return -EINVAL;
+		return bdev_add_partition(bdev, p.pno, start, length);
+	case BLKPG_RESIZE_PARTITION:
+		return bdev_resize_partition(bdev, p.pno, start, length);
+	default:
+		return -EINVAL;
 	}
 }
 
@@ -302,12 +204,12 @@ static int put_u64(u64 __user *argp, u64 val)
 }
 
 #ifdef CONFIG_COMPAT
-static int compat_put_long(compat_long_t *argp, long val)
+static int compat_put_long(compat_long_t __user *argp, long val)
 {
 	return put_user(val, argp);
 }
 
-static int compat_put_ulong(compat_ulong_t *argp, compat_ulong_t val)
+static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val)
 {
 	return put_user(val, argp);
 }
diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c
new file mode 100644
index 000000000000..c2ef41b3147b
--- /dev/null
+++ b/block/keyslot-manager.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Google LLC
+ */
+
+/**
+ * DOC: The Keyslot Manager
+ *
+ * Many devices with inline encryption support have a limited number of "slots"
+ * into which encryption contexts may be programmed, and requests can be tagged
+ * with a slot number to specify the key to use for en/decryption.
+ *
+ * As the number of slots is limited, and programming keys is expensive on
+ * many inline encryption hardware, we don't want to program the same key into
+ * multiple slots - if multiple requests are using the same key, we want to
+ * program just one slot with that key and use that slot for all requests.
+ *
+ * The keyslot manager manages these keyslots appropriately, and also acts as
+ * an abstraction between the inline encryption hardware and the upper layers.
+ *
+ * Lower layer devices will set up a keyslot manager in their request queue
+ * and tell it how to perform device specific operations like programming/
+ * evicting keys from keyslots.
+ *
+ * Upper layers will call blk_ksm_get_slot_for_key() to program a
+ * key into some slot in the inline encryption hardware.
+ */
+
+#define pr_fmt(fmt) "blk-crypto: " fmt
+
+#include <linux/keyslot-manager.h>
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+#include <linux/pm_runtime.h>
+#include <linux/wait.h>
+#include <linux/blkdev.h>
+
+struct blk_ksm_keyslot {
+	atomic_t slot_refs;
+	struct list_head idle_slot_node;
+	struct hlist_node hash_node;
+	const struct blk_crypto_key *key;
+	struct blk_keyslot_manager *ksm;
+};
+
+static inline void blk_ksm_hw_enter(struct blk_keyslot_manager *ksm)
+{
+	/*
+	 * Calling into the driver requires ksm->lock held and the device
+	 * resumed.  But we must resume the device first, since that can acquire
+	 * and release ksm->lock via blk_ksm_reprogram_all_keys().
+	 */
+	if (ksm->dev)
+		pm_runtime_get_sync(ksm->dev);
+	down_write(&ksm->lock);
+}
+
+static inline void blk_ksm_hw_exit(struct blk_keyslot_manager *ksm)
+{
+	up_write(&ksm->lock);
+	if (ksm->dev)
+		pm_runtime_put_sync(ksm->dev);
+}
+
+/**
+ * blk_ksm_init() - Initialize a keyslot manager
+ * @ksm: The keyslot_manager to initialize.
+ * @num_slots: The number of key slots to manage.
+ *
+ * Allocate memory for keyslots and initialize a keyslot manager. Called by
+ * e.g. storage drivers to set up a keyslot manager in their request_queue.
+ *
+ * Return: 0 on success, or else a negative error code.
+ */
+int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots)
+{
+	unsigned int slot;
+	unsigned int i;
+	unsigned int slot_hashtable_size;
+
+	memset(ksm, 0, sizeof(*ksm));
+
+	if (num_slots == 0)
+		return -EINVAL;
+
+	ksm->slots = kvcalloc(num_slots, sizeof(ksm->slots[0]), GFP_KERNEL);
+	if (!ksm->slots)
+		return -ENOMEM;
+
+	ksm->num_slots = num_slots;
+
+	init_rwsem(&ksm->lock);
+
+	init_waitqueue_head(&ksm->idle_slots_wait_queue);
+	INIT_LIST_HEAD(&ksm->idle_slots);
+
+	for (slot = 0; slot < num_slots; slot++) {
+		ksm->slots[slot].ksm = ksm;
+		list_add_tail(&ksm->slots[slot].idle_slot_node,
+			      &ksm->idle_slots);
+	}
+
+	spin_lock_init(&ksm->idle_slots_lock);
+
+	slot_hashtable_size = roundup_pow_of_two(num_slots);
+	ksm->log_slot_ht_size = ilog2(slot_hashtable_size);
+	ksm->slot_hashtable = kvmalloc_array(slot_hashtable_size,
+					     sizeof(ksm->slot_hashtable[0]),
+					     GFP_KERNEL);
+	if (!ksm->slot_hashtable)
+		goto err_destroy_ksm;
+	for (i = 0; i < slot_hashtable_size; i++)
+		INIT_HLIST_HEAD(&ksm->slot_hashtable[i]);
+
+	return 0;
+
+err_destroy_ksm:
+	blk_ksm_destroy(ksm);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(blk_ksm_init);
+
+static inline struct hlist_head *
+blk_ksm_hash_bucket_for_key(struct blk_keyslot_manager *ksm,
+			    const struct blk_crypto_key *key)
+{
+	return &ksm->slot_hashtable[hash_ptr(key, ksm->log_slot_ht_size)];
+}
+
+static void blk_ksm_remove_slot_from_lru_list(struct blk_ksm_keyslot *slot)
+{
+	struct blk_keyslot_manager *ksm = slot->ksm;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ksm->idle_slots_lock, flags);
+	list_del(&slot->idle_slot_node);
+	spin_unlock_irqrestore(&ksm->idle_slots_lock, flags);
+}
+
+static struct blk_ksm_keyslot *blk_ksm_find_keyslot(
+					struct blk_keyslot_manager *ksm,
+					const struct blk_crypto_key *key)
+{
+	const struct hlist_head *head = blk_ksm_hash_bucket_for_key(ksm, key);
+	struct blk_ksm_keyslot *slotp;
+
+	hlist_for_each_entry(slotp, head, hash_node) {
+		if (slotp->key == key)
+			return slotp;
+	}
+	return NULL;
+}
+
+static struct blk_ksm_keyslot *blk_ksm_find_and_grab_keyslot(
+					struct blk_keyslot_manager *ksm,
+					const struct blk_crypto_key *key)
+{
+	struct blk_ksm_keyslot *slot;
+
+	slot = blk_ksm_find_keyslot(ksm, key);
+	if (!slot)
+		return NULL;
+	if (atomic_inc_return(&slot->slot_refs) == 1) {
+		/* Took first reference to this slot; remove it from LRU list */
+		blk_ksm_remove_slot_from_lru_list(slot);
+	}
+	return slot;
+}
+
+unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot)
+{
+	return slot - slot->ksm->slots;
+}
+EXPORT_SYMBOL_GPL(blk_ksm_get_slot_idx);
+
+/**
+ * blk_ksm_get_slot_for_key() - Program a key into a keyslot.
+ * @ksm: The keyslot manager to program the key into.
+ * @key: Pointer to the key object to program, including the raw key, crypto
+ *	 mode, and data unit size.
+ * @slot_ptr: A pointer to return the pointer of the allocated keyslot.
+ *
+ * Get a keyslot that's been programmed with the specified key.  If one already
+ * exists, return it with incremented refcount.  Otherwise, wait for a keyslot
+ * to become idle and program it.
+ *
+ * Context: Process context. Takes and releases ksm->lock.
+ * Return: BLK_STS_OK on success (and keyslot is set to the pointer of the
+ *	   allocated keyslot), or some other blk_status_t otherwise (and
+ *	   keyslot is set to NULL).
+ */
+blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
+				      const struct blk_crypto_key *key,
+				      struct blk_ksm_keyslot **slot_ptr)
+{
+	struct blk_ksm_keyslot *slot;
+	int slot_idx;
+	int err;
+
+	*slot_ptr = NULL;
+	down_read(&ksm->lock);
+	slot = blk_ksm_find_and_grab_keyslot(ksm, key);
+	up_read(&ksm->lock);
+	if (slot)
+		goto success;
+
+	for (;;) {
+		blk_ksm_hw_enter(ksm);
+		slot = blk_ksm_find_and_grab_keyslot(ksm, key);
+		if (slot) {
+			blk_ksm_hw_exit(ksm);
+			goto success;
+		}
+
+		/*
+		 * If we're here, that means there wasn't a slot that was
+		 * already programmed with the key. So try to program it.
+		 */
+		if (!list_empty(&ksm->idle_slots))
+			break;
+
+		blk_ksm_hw_exit(ksm);
+		wait_event(ksm->idle_slots_wait_queue,
+			   !list_empty(&ksm->idle_slots));
+	}
+
+	slot = list_first_entry(&ksm->idle_slots, struct blk_ksm_keyslot,
+				idle_slot_node);
+	slot_idx = blk_ksm_get_slot_idx(slot);
+
+	err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot_idx);
+	if (err) {
+		wake_up(&ksm->idle_slots_wait_queue);
+		blk_ksm_hw_exit(ksm);
+		return errno_to_blk_status(err);
+	}
+
+	/* Move this slot to the hash list for the new key. */
+	if (slot->key)
+		hlist_del(&slot->hash_node);
+	slot->key = key;
+	hlist_add_head(&slot->hash_node, blk_ksm_hash_bucket_for_key(ksm, key));
+
+	atomic_set(&slot->slot_refs, 1);
+
+	blk_ksm_remove_slot_from_lru_list(slot);
+
+	blk_ksm_hw_exit(ksm);
+success:
+	*slot_ptr = slot;
+	return BLK_STS_OK;
+}
+
+/**
+ * blk_ksm_put_slot() - Release a reference to a slot
+ * @slot: The keyslot to release the reference of.
+ *
+ * Context: Any context.
+ */
+void blk_ksm_put_slot(struct blk_ksm_keyslot *slot)
+{
+	struct blk_keyslot_manager *ksm;
+	unsigned long flags;
+
+	if (!slot)
+		return;
+
+	ksm = slot->ksm;
+
+	if (atomic_dec_and_lock_irqsave(&slot->slot_refs,
+					&ksm->idle_slots_lock, flags)) {
+		list_add_tail(&slot->idle_slot_node, &ksm->idle_slots);
+		spin_unlock_irqrestore(&ksm->idle_slots_lock, flags);
+		wake_up(&ksm->idle_slots_wait_queue);
+	}
+}
+
+/**
+ * blk_ksm_crypto_cfg_supported() - Find out if a crypto configuration is
+ *				    supported by a ksm.
+ * @ksm: The keyslot manager to check
+ * @cfg: The crypto configuration to check for.
+ *
+ * Checks for crypto_mode/data unit size/dun bytes support.
+ *
+ * Return: Whether or not this ksm supports the specified crypto config.
+ */
+bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
+				  const struct blk_crypto_config *cfg)
+{
+	if (!ksm)
+		return false;
+	if (!(ksm->crypto_modes_supported[cfg->crypto_mode] &
+	      cfg->data_unit_size))
+		return false;
+	if (ksm->max_dun_bytes_supported < cfg->dun_bytes)
+		return false;
+	return true;
+}
+
+/**
+ * blk_ksm_evict_key() - Evict a key from the lower layer device.
+ * @ksm: The keyslot manager to evict from
+ * @key: The key to evict
+ *
+ * Find the keyslot that the specified key was programmed into, and evict that
+ * slot from the lower layer device. The slot must not be in use by any
+ * in-flight IO when this function is called.
+ *
+ * Context: Process context. Takes and releases ksm->lock.
+ * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY
+ *	   if the keyslot is still in use, or another -errno value on other
+ *	   error.
+ */
+int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
+		      const struct blk_crypto_key *key)
+{
+	struct blk_ksm_keyslot *slot;
+	int err = 0;
+
+	blk_ksm_hw_enter(ksm);
+	slot = blk_ksm_find_keyslot(ksm, key);
+	if (!slot)
+		goto out_unlock;
+
+	if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) {
+		err = -EBUSY;
+		goto out_unlock;
+	}
+	err = ksm->ksm_ll_ops.keyslot_evict(ksm, key,
+					    blk_ksm_get_slot_idx(slot));
+	if (err)
+		goto out_unlock;
+
+	hlist_del(&slot->hash_node);
+	slot->key = NULL;
+	err = 0;
+out_unlock:
+	blk_ksm_hw_exit(ksm);
+	return err;
+}
+
+/**
+ * blk_ksm_reprogram_all_keys() - Re-program all keyslots.
+ * @ksm: The keyslot manager
+ *
+ * Re-program all keyslots that are supposed to have a key programmed.  This is
+ * intended only for use by drivers for hardware that loses its keys on reset.
+ *
+ * Context: Process context. Takes and releases ksm->lock.
+ */
+void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm)
+{
+	unsigned int slot;
+
+	/* This is for device initialization, so don't resume the device */
+	down_write(&ksm->lock);
+	for (slot = 0; slot < ksm->num_slots; slot++) {
+		const struct blk_crypto_key *key = ksm->slots[slot].key;
+		int err;
+
+		if (!key)
+			continue;
+
+		err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot);
+		WARN_ON(err);
+	}
+	up_write(&ksm->lock);
+}
+EXPORT_SYMBOL_GPL(blk_ksm_reprogram_all_keys);
+
+void blk_ksm_destroy(struct blk_keyslot_manager *ksm)
+{
+	if (!ksm)
+		return;
+	kvfree(ksm->slot_hashtable);
+	memzero_explicit(ksm->slots, sizeof(ksm->slots[0]) * ksm->num_slots);
+	kvfree(ksm->slots);
+	memzero_explicit(ksm, sizeof(*ksm));
+}
+EXPORT_SYMBOL_GPL(blk_ksm_destroy);
+
+bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q)
+{
+	if (blk_integrity_queue_supports_integrity(q)) {
+		pr_warn("Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
+		return false;
+	}
+	q->ksm = ksm;
+	return true;
+}
+EXPORT_SYMBOL_GPL(blk_ksm_register);
+
+void blk_ksm_unregister(struct request_queue *q)
+{
+	q->ksm = NULL;
+}
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 34dcea0ef637..a38c5ab103d1 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -579,7 +579,7 @@ static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
 	return merged;
 }
 
-static void kyber_prepare_request(struct request *rq, struct bio *bio)
+static void kyber_prepare_request(struct request *rq)
 {
 	rq_set_domain_token(rq, -1);
 }
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index b490f47fd553..b57470e154c8 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -541,7 +541,7 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
  * Nothing to do here. This is defined only to ensure that .finish_request
  * method is called upon request completion.
  */
-static void dd_prepare_request(struct request *rq, struct bio *bio)
+static void dd_prepare_request(struct request *rq)
 {
 }
 
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 9ef48a8cff86..78951e33b2d7 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -274,10 +274,10 @@ struct device_type part_type = {
 	.uevent		= part_uevent,
 };
 
-static void delete_partition_work_fn(struct work_struct *work)
+static void hd_struct_free_work(struct work_struct *work)
 {
-	struct hd_struct *part = container_of(to_rcu_work(work), struct hd_struct,
-					rcu_work);
+	struct hd_struct *part =
+		container_of(to_rcu_work(work), struct hd_struct, rcu_work);
 
 	part->start_sect = 0;
 	part->nr_sects = 0;
@@ -285,32 +285,42 @@ static void delete_partition_work_fn(struct work_struct *work)
 	put_device(part_to_dev(part));
 }
 
-void __delete_partition(struct percpu_ref *ref)
+static void hd_struct_free(struct percpu_ref *ref)
 {
 	struct hd_struct *part = container_of(ref, struct hd_struct, ref);
-	INIT_RCU_WORK(&part->rcu_work, delete_partition_work_fn);
+	struct gendisk *disk = part_to_disk(part);
+	struct disk_part_tbl *ptbl =
+		rcu_dereference_protected(disk->part_tbl, 1);
+
+	rcu_assign_pointer(ptbl->last_lookup, NULL);
+	put_device(disk_to_dev(disk));
+
+	INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work);
 	queue_rcu_work(system_wq, &part->rcu_work);
 }
 
+int hd_ref_init(struct hd_struct *part)
+{
+	if (percpu_ref_init(&part->ref, hd_struct_free, 0, GFP_KERNEL))
+		return -ENOMEM;
+	return 0;
+}
+
 /*
  * Must be called either with bd_mutex held, before a disk can be opened or
  * after all disk users are gone.
  */
-void delete_partition(struct gendisk *disk, int partno)
+void delete_partition(struct gendisk *disk, struct hd_struct *part)
 {
 	struct disk_part_tbl *ptbl =
 		rcu_dereference_protected(disk->part_tbl, 1);
-	struct hd_struct *part;
-
-	if (partno >= ptbl->len)
-		return;
-
-	part = rcu_dereference_protected(ptbl->part[partno], 1);
-	if (!part)
-		return;
 
-	rcu_assign_pointer(ptbl->part[partno], NULL);
-	rcu_assign_pointer(ptbl->last_lookup, NULL);
+	/*
+	 * ->part_tbl is referenced in this part's release handler, so
+	 *  we have to hold the disk device
+	 */
+	get_device(disk_to_dev(part_to_disk(part)));
+	rcu_assign_pointer(ptbl->part[part->partno], NULL);
 	kobject_put(part->holder_dir);
 	device_del(part_to_dev(part));
 
@@ -321,7 +331,7 @@ void delete_partition(struct gendisk *disk, int partno)
 	 * "in-use" until we really free the gendisk.
 	 */
 	blk_invalidate_devt(part_devt(part));
-	hd_struct_kill(part);
+	percpu_ref_kill(&part->ref);
 }
 
 static ssize_t whole_disk_show(struct device *dev,
@@ -335,7 +345,7 @@ static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
  * Must be called either with bd_mutex held, before a disk can be opened or
  * after all disk users are gone.
  */
-struct hd_struct *add_partition(struct gendisk *disk, int partno,
+static struct hd_struct *add_partition(struct gendisk *disk, int partno,
 				sector_t start, sector_t len, int flags,
 				struct partition_meta_info *info)
 {
@@ -377,12 +387,13 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	if (!p)
 		return ERR_PTR(-EBUSY);
 
-	if (!init_part_stats(p)) {
+	p->dkstats = alloc_percpu(struct disk_stats);
+	if (!p->dkstats) {
 		err = -ENOMEM;
 		goto out_free;
 	}
 
-	seqcount_init(&p->nr_sects_seq);
+	hd_sects_seq_init(p);
 	pdev = part_to_dev(p);
 
 	p->start_sect = start;
@@ -458,7 +469,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 out_free_info:
 	kfree(p->info);
 out_free_stats:
-	free_part_stats(p);
+	free_percpu(p->dkstats);
 out_free:
 	kfree(p);
 	return ERR_PTR(err);
@@ -472,6 +483,121 @@ out_put:
 	return ERR_PTR(err);
 }
 
+static bool partition_overlaps(struct gendisk *disk, sector_t start,
+		sector_t length, int skip_partno)
+{
+	struct disk_part_iter piter;
+	struct hd_struct *part;
+	bool overlap = false;
+
+	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
+	while ((part = disk_part_iter_next(&piter))) {
+		if (part->partno == skip_partno ||
+		    start >= part->start_sect + part->nr_sects ||
+		    start + length <= part->start_sect)
+			continue;
+		overlap = true;
+		break;
+	}
+
+	disk_part_iter_exit(&piter);
+	return overlap;
+}
+
+int bdev_add_partition(struct block_device *bdev, int partno,
+		sector_t start, sector_t length)
+{
+	struct hd_struct *part;
+
+	mutex_lock(&bdev->bd_mutex);
+	if (partition_overlaps(bdev->bd_disk, start, length, -1)) {
+		mutex_unlock(&bdev->bd_mutex);
+		return -EBUSY;
+	}
+
+	part = add_partition(bdev->bd_disk, partno, start, length,
+			ADDPART_FLAG_NONE, NULL);
+	mutex_unlock(&bdev->bd_mutex);
+	return PTR_ERR_OR_ZERO(part);
+}
+
+int bdev_del_partition(struct block_device *bdev, int partno)
+{
+	struct block_device *bdevp;
+	struct hd_struct *part;
+	int ret = 0;
+
+	part = disk_get_part(bdev->bd_disk, partno);
+	if (!part)
+		return -ENXIO;
+
+	ret = -ENOMEM;
+	bdevp = bdget(part_devt(part));
+	if (!bdevp)
+		goto out_put_part;
+
+	mutex_lock(&bdevp->bd_mutex);
+
+	ret = -EBUSY;
+	if (bdevp->bd_openers)
+		goto out_unlock;
+
+	sync_blockdev(bdevp);
+	invalidate_bdev(bdevp);
+
+	mutex_lock_nested(&bdev->bd_mutex, 1);
+	delete_partition(bdev->bd_disk, part);
+	mutex_unlock(&bdev->bd_mutex);
+
+	ret = 0;
+out_unlock:
+	mutex_unlock(&bdevp->bd_mutex);
+	bdput(bdevp);
+out_put_part:
+	disk_put_part(part);
+	return ret;
+}
+
+int bdev_resize_partition(struct block_device *bdev, int partno,
+		sector_t start, sector_t length)
+{
+	struct block_device *bdevp;
+	struct hd_struct *part;
+	int ret = 0;
+
+	part = disk_get_part(bdev->bd_disk, partno);
+	if (!part)
+		return -ENXIO;
+
+	ret = -ENOMEM;
+	bdevp = bdget(part_devt(part));
+	if (!bdevp)
+		goto out_put_part;
+
+	mutex_lock(&bdevp->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, 1);
+
+	ret = -EINVAL;
+	if (start != part->start_sect)
+		goto out_unlock;
+
+	ret = -EBUSY;
+	if (partition_overlaps(bdev->bd_disk, start, length, partno))
+		goto out_unlock;
+
+	part_nr_sects_write(part, (sector_t)length);
+	i_size_write(bdevp->bd_inode, length << SECTOR_SHIFT);
+
+	ret = 0;
+out_unlock:
+	mutex_unlock(&bdevp->bd_mutex);
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdevp);
+out_put_part:
+	disk_put_part(part);
+	return ret;
+}
+
 static bool disk_unlock_native_capacity(struct gendisk *disk)
 {
 	const struct block_device_operations *bdops = disk->fops;
@@ -488,27 +614,30 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
 	}
 }
 
-int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev)
+int blk_drop_partitions(struct block_device *bdev)
 {
 	struct disk_part_iter piter;
 	struct hd_struct *part;
-	int res;
 
-	if (!disk_part_scan_enabled(disk))
+	if (!disk_part_scan_enabled(bdev->bd_disk))
 		return 0;
 	if (bdev->bd_part_count)
 		return -EBUSY;
-	res = invalidate_partition(disk, 0);
-	if (res)
-		return res;
 
-	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
+	sync_blockdev(bdev);
+	invalidate_bdev(bdev);
+
+	disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY);
 	while ((part = disk_part_iter_next(&piter)))
-		delete_partition(disk, part->partno);
+		delete_partition(bdev->bd_disk, part);
 	disk_part_iter_exit(&piter);
 
 	return 0;
 }
+#ifdef CONFIG_S390
+/* for historic reasons in the DASD driver */
+EXPORT_SYMBOL_GPL(blk_drop_partitions);
+#endif
 
 static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
 		struct parsed_partitions *state, int p)
diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c
index 073faa6a69b8..d6e18df9c53c 100644
--- a/block/partitions/ibm.c
+++ b/block/partitions/ibm.c
@@ -13,10 +13,11 @@
 #include <asm/ebcdic.h>
 #include <linux/uaccess.h>
 #include <asm/vtoc.h>
+#include <linux/module.h>
+#include <linux/dasd_mod.h>
 
 #include "check.h"
 
-
 union label_t {
 	struct vtoc_volume_label_cdl vol;
 	struct vtoc_volume_label_ldl lnx;
@@ -288,7 +289,9 @@ static int find_cms1_partitions(struct parsed_partitions *state,
  */
 int ibm_partition(struct parsed_partitions *state)
 {
+	int (*fn)(struct gendisk *disk, dasd_information2_t *info);
 	struct block_device *bdev = state->bdev;
+	struct gendisk *disk = bdev->bd_disk;
 	int blocksize, res;
 	loff_t i_size, offset, size;
 	dasd_information2_t *info;
@@ -299,24 +302,31 @@ int ibm_partition(struct parsed_partitions *state)
 	union label_t *label;
 
 	res = 0;
+	if (!disk->fops->getgeo)
+		goto out_exit;
+	fn = symbol_get(dasd_biodasdinfo);
+	if (!fn)
+		goto out_exit;
 	blocksize = bdev_logical_block_size(bdev);
 	if (blocksize <= 0)
-		goto out_exit;
+		goto out_symbol;
 	i_size = i_size_read(bdev->bd_inode);
 	if (i_size == 0)
-		goto out_exit;
+		goto out_symbol;
 	info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
 	if (info == NULL)
-		goto out_exit;
+		goto out_symbol;
 	geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL);
 	if (geo == NULL)
 		goto out_nogeo;
 	label = kmalloc(sizeof(union label_t), GFP_KERNEL);
 	if (label == NULL)
 		goto out_nolab;
-	if (ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
+	/* set start if not filled by getgeo function e.g. virtblk */
+	geo->start = get_start_sect(bdev);
+	if (disk->fops->getgeo(bdev, geo))
 		goto out_freeall;
-	if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0) {
+	if (fn(disk, info)) {
 		kfree(info);
 		info = NULL;
 	}
@@ -359,6 +369,8 @@ out_nolab:
 	kfree(geo);
 out_nogeo:
 	kfree(info);
+out_symbol:
+	symbol_put(dasd_biodasdinfo);
 out_exit:
 	return res;
 }
diff --git a/crypto/Kconfig b/crypto/Kconfig
index c24a47406f8f..d5daf35431e3 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -370,7 +370,6 @@ config CRYPTO_CFB
 config CRYPTO_CTR
 	tristate "CTR support"
 	select CRYPTO_SKCIPHER
-	select CRYPTO_SEQIV
 	select CRYPTO_MANAGER
 	help
 	  CTR: Counter mode
@@ -1820,7 +1819,7 @@ config CRYPTO_DRBG_HASH
 config CRYPTO_DRBG_CTR
 	bool "Enable CTR DRBG"
 	select CRYPTO_AES
-	depends on CRYPTO_CTR
+	select CRYPTO_CTR
 	help
 	  Enable the CTR DRBG variant as defined in NIST SP800-90A.
 
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 69605e21af92..92abdf675992 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -403,7 +403,7 @@ static void crypto_wait_for_test(struct crypto_larval *larval)
 	err = wait_for_completion_killable(&larval->completion);
 	WARN_ON(err);
 	if (!err)
-		crypto_probing_notify(CRYPTO_MSG_ALG_LOADED, larval);
+		crypto_notify(CRYPTO_MSG_ALG_LOADED, larval);
 
 out:
 	crypto_larval_kill(&larval->alg);
@@ -716,17 +716,27 @@ EXPORT_SYMBOL_GPL(crypto_drop_spawn);
 
 static struct crypto_alg *crypto_spawn_alg(struct crypto_spawn *spawn)
 {
-	struct crypto_alg *alg;
+	struct crypto_alg *alg = ERR_PTR(-EAGAIN);
+	struct crypto_alg *target;
+	bool shoot = false;
 
 	down_read(&crypto_alg_sem);
-	alg = spawn->alg;
-	if (!spawn->dead && !crypto_mod_get(alg)) {
-		alg->cra_flags |= CRYPTO_ALG_DYING;
-		alg = NULL;
+	if (!spawn->dead) {
+		alg = spawn->alg;
+		if (!crypto_mod_get(alg)) {
+			target = crypto_alg_get(alg);
+			shoot = true;
+			alg = ERR_PTR(-EAGAIN);
+		}
 	}
 	up_read(&crypto_alg_sem);
 
-	return alg ?: ERR_PTR(-EAGAIN);
+	if (shoot) {
+		crypto_shoot_alg(target);
+		crypto_alg_put(target);
+	}
+
+	return alg;
 }
 
 struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
@@ -904,6 +914,14 @@ out:
 }
 EXPORT_SYMBOL_GPL(crypto_enqueue_request);
 
+void crypto_enqueue_request_head(struct crypto_queue *queue,
+				 struct crypto_async_request *request)
+{
+	queue->qlen++;
+	list_add(&request->list, &queue->list);
+}
+EXPORT_SYMBOL_GPL(crypto_enqueue_request_head);
+
 struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue)
 {
 	struct list_head *request;
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 22df3799a17b..087c0ad09d38 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -61,7 +61,7 @@ static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct rng_ctx *ctx = ask->private;
-	int err = -EFAULT;
+	int err;
 	int genlen = 0;
 	u8 result[MAXSIZE];
 
diff --git a/crypto/api.c b/crypto/api.c
index 7d71a9b10e5f..edcf690800d4 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -333,12 +333,13 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
 	return len;
 }
 
-static void crypto_shoot_alg(struct crypto_alg *alg)
+void crypto_shoot_alg(struct crypto_alg *alg)
 {
 	down_write(&crypto_alg_sem);
 	alg->cra_flags |= CRYPTO_ALG_DYING;
 	up_write(&crypto_alg_sem);
 }
+EXPORT_SYMBOL_GPL(crypto_shoot_alg);
 
 struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
 				      u32 mask)
diff --git a/crypto/blake2b_generic.c b/crypto/blake2b_generic.c
index 1d262374fa4e..0ffd8d92e308 100644
--- a/crypto/blake2b_generic.c
+++ b/crypto/blake2b_generic.c
@@ -129,7 +129,9 @@ static void blake2b_compress(struct blake2b_state *S,
 	ROUND(9);
 	ROUND(10);
 	ROUND(11);
-
+#ifdef CONFIG_CC_IS_CLANG
+#pragma nounroll /* https://bugs.llvm.org/show_bug.cgi?id=45803 */
+#endif
 	for (i = 0; i < 8; ++i)
 		S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
 }
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index eb029ff1e05a..3655d9d3f5df 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -22,32 +22,36 @@
  * @err: error number
  */
 static void crypto_finalize_request(struct crypto_engine *engine,
-			     struct crypto_async_request *req, int err)
+				    struct crypto_async_request *req, int err)
 {
 	unsigned long flags;
-	bool finalize_cur_req = false;
+	bool finalize_req = false;
 	int ret;
 	struct crypto_engine_ctx *enginectx;
 
-	spin_lock_irqsave(&engine->queue_lock, flags);
-	if (engine->cur_req == req)
-		finalize_cur_req = true;
-	spin_unlock_irqrestore(&engine->queue_lock, flags);
+	/*
+	 * If hardware cannot enqueue more requests
+	 * and retry mechanism is not supported
+	 * make sure we are completing the current request
+	 */
+	if (!engine->retry_support) {
+		spin_lock_irqsave(&engine->queue_lock, flags);
+		if (engine->cur_req == req) {
+			finalize_req = true;
+			engine->cur_req = NULL;
+		}
+		spin_unlock_irqrestore(&engine->queue_lock, flags);
+	}
 
-	if (finalize_cur_req) {
+	if (finalize_req || engine->retry_support) {
 		enginectx = crypto_tfm_ctx(req->tfm);
-		if (engine->cur_req_prepared &&
+		if (enginectx->op.prepare_request &&
 		    enginectx->op.unprepare_request) {
 			ret = enginectx->op.unprepare_request(engine, req);
 			if (ret)
 				dev_err(engine->dev, "failed to unprepare request\n");
 		}
-		spin_lock_irqsave(&engine->queue_lock, flags);
-		engine->cur_req = NULL;
-		engine->cur_req_prepared = false;
-		spin_unlock_irqrestore(&engine->queue_lock, flags);
 	}
-
 	req->complete(req, err);
 
 	kthread_queue_work(engine->kworker, &engine->pump_requests);
@@ -74,7 +78,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 	spin_lock_irqsave(&engine->queue_lock, flags);
 
 	/* Make sure we are not already running a request */
-	if (engine->cur_req)
+	if (!engine->retry_support && engine->cur_req)
 		goto out;
 
 	/* If another context is idling then defer */
@@ -108,13 +112,21 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		goto out;
 	}
 
+start_request:
 	/* Get the fist request from the engine queue to handle */
 	backlog = crypto_get_backlog(&engine->queue);
 	async_req = crypto_dequeue_request(&engine->queue);
 	if (!async_req)
 		goto out;
 
-	engine->cur_req = async_req;
+	/*
+	 * If hardware doesn't support the retry mechanism,
+	 * keep track of the request we are processing now.
+	 * We'll need it on completion (crypto_finalize_request).
+	 */
+	if (!engine->retry_support)
+		engine->cur_req = async_req;
+
 	if (backlog)
 		backlog->complete(backlog, -EINPROGRESS);
 
@@ -130,7 +142,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		ret = engine->prepare_crypt_hardware(engine);
 		if (ret) {
 			dev_err(engine->dev, "failed to prepare crypt hardware\n");
-			goto req_err;
+			goto req_err_2;
 		}
 	}
 
@@ -141,28 +153,90 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		if (ret) {
 			dev_err(engine->dev, "failed to prepare request: %d\n",
 				ret);
-			goto req_err;
+			goto req_err_2;
 		}
-		engine->cur_req_prepared = true;
 	}
 	if (!enginectx->op.do_one_request) {
 		dev_err(engine->dev, "failed to do request\n");
 		ret = -EINVAL;
-		goto req_err;
+		goto req_err_1;
 	}
+
 	ret = enginectx->op.do_one_request(engine, async_req);
-	if (ret) {
-		dev_err(engine->dev, "Failed to do one request from queue: %d\n", ret);
-		goto req_err;
+
+	/* Request unsuccessfully executed by hardware */
+	if (ret < 0) {
+		/*
+		 * If hardware queue is full (-ENOSPC), requeue request
+		 * regardless of backlog flag.
+		 * Otherwise, unprepare and complete the request.
+		 */
+		if (!engine->retry_support ||
+		    (ret != -ENOSPC)) {
+			dev_err(engine->dev,
+				"Failed to do one request from queue: %d\n",
+				ret);
+			goto req_err_1;
+		}
+		/*
+		 * If retry mechanism is supported,
+		 * unprepare current request and
+		 * enqueue it back into crypto-engine queue.
+		 */
+		if (enginectx->op.unprepare_request) {
+			ret = enginectx->op.unprepare_request(engine,
+							      async_req);
+			if (ret)
+				dev_err(engine->dev,
+					"failed to unprepare request\n");
+		}
+		spin_lock_irqsave(&engine->queue_lock, flags);
+		/*
+		 * If hardware was unable to execute request, enqueue it
+		 * back in front of crypto-engine queue, to keep the order
+		 * of requests.
+		 */
+		crypto_enqueue_request_head(&engine->queue, async_req);
+
+		kthread_queue_work(engine->kworker, &engine->pump_requests);
+		goto out;
 	}
-	return;
 
-req_err:
-	crypto_finalize_request(engine, async_req, ret);
+	goto retry;
+
+req_err_1:
+	if (enginectx->op.unprepare_request) {
+		ret = enginectx->op.unprepare_request(engine, async_req);
+		if (ret)
+			dev_err(engine->dev, "failed to unprepare request\n");
+	}
+
+req_err_2:
+	async_req->complete(async_req, ret);
+
+retry:
+	/* If retry mechanism is supported, send new requests to engine */
+	if (engine->retry_support) {
+		spin_lock_irqsave(&engine->queue_lock, flags);
+		goto start_request;
+	}
 	return;
 
 out:
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
+
+	/*
+	 * Batch requests is possible only if
+	 * hardware can enqueue multiple requests
+	 */
+	if (engine->do_batch_requests) {
+		ret = engine->do_batch_requests(engine);
+		if (ret)
+			dev_err(engine->dev, "failed to do batch requests: %d\n",
+				ret);
+	}
+
+	return;
 }
 
 static void crypto_pump_work(struct kthread_work *work)
@@ -386,15 +460,27 @@ int crypto_engine_stop(struct crypto_engine *engine)
 EXPORT_SYMBOL_GPL(crypto_engine_stop);
 
 /**
- * crypto_engine_alloc_init - allocate crypto hardware engine structure and
- * initialize it.
+ * crypto_engine_alloc_init_and_set - allocate crypto hardware engine structure
+ * and initialize it by setting the maximum number of entries in the software
+ * crypto-engine queue.
  * @dev: the device attached with one hardware engine
+ * @retry_support: whether hardware has support for retry mechanism
+ * @cbk_do_batch: pointer to a callback function to be invoked when executing a
+ *                a batch of requests.
+ *                This has the form:
+ *                callback(struct crypto_engine *engine)
+ *                where:
+ *                @engine: the crypto engine structure.
  * @rt: whether this queue is set to run as a realtime task
+ * @qlen: maximum size of the crypto-engine queue
  *
  * This must be called from context that can sleep.
  * Return: the crypto engine structure on success, else NULL.
  */
-struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
+struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev,
+						       bool retry_support,
+						       int (*cbk_do_batch)(struct crypto_engine *engine),
+						       bool rt, int qlen)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO / 2 };
 	struct crypto_engine *engine;
@@ -411,12 +497,18 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
 	engine->running = false;
 	engine->busy = false;
 	engine->idling = false;
-	engine->cur_req_prepared = false;
+	engine->retry_support = retry_support;
 	engine->priv_data = dev;
+	/*
+	 * Batch requests is possible only if
+	 * hardware has support for retry mechanism.
+	 */
+	engine->do_batch_requests = retry_support ? cbk_do_batch : NULL;
+
 	snprintf(engine->name, sizeof(engine->name),
 		 "%s-engine", dev_name(dev));
 
-	crypto_init_queue(&engine->queue, CRYPTO_ENGINE_MAX_QLEN);
+	crypto_init_queue(&engine->queue, qlen);
 	spin_lock_init(&engine->queue_lock);
 
 	engine->kworker = kthread_create_worker(0, "%s", engine->name);
@@ -433,6 +525,22 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
 
 	return engine;
 }
+EXPORT_SYMBOL_GPL(crypto_engine_alloc_init_and_set);
+
+/**
+ * crypto_engine_alloc_init - allocate crypto hardware engine structure and
+ * initialize it.
+ * @dev: the device attached with one hardware engine
+ * @rt: whether this queue is set to run as a realtime task
+ *
+ * This must be called from context that can sleep.
+ * Return: the crypto engine structure on success, else NULL.
+ */
+struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
+{
+	return crypto_engine_alloc_init_and_set(dev, false, NULL, rt,
+						CRYPTO_ENGINE_MAX_QLEN);
+}
 EXPORT_SYMBOL_GPL(crypto_engine_alloc_init);
 
 /**
diff --git a/crypto/drbg.c b/crypto/drbg.c
index b6929eb5f565..37526eb8c5d5 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1087,10 +1087,6 @@ static void drbg_async_seed(struct work_struct *work)
 	if (ret)
 		goto unlock;
 
-	/* If nonblocking pool is initialized, deactivate Jitter RNG */
-	crypto_free_rng(drbg->jent);
-	drbg->jent = NULL;
-
 	/* Set seeded to false so that if __drbg_seed fails the
 	 * next generate call will trigger a reseed.
 	 */
@@ -1168,7 +1164,23 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
 						   entropylen);
 			if (ret) {
 				pr_devel("DRBG: jent failed with %d\n", ret);
-				goto out;
+
+				/*
+				 * Do not treat the transient failure of the
+				 * Jitter RNG as an error that needs to be
+				 * reported. The combined number of the
+				 * maximum reseed threshold times the maximum
+				 * number of Jitter RNG transient errors is
+				 * less than the reseed threshold required by
+				 * SP800-90A allowing us to treat the
+				 * transient errors as such.
+				 *
+				 * However, we mandate that at least the first
+				 * seeding operation must succeed with the
+				 * Jitter RNG.
+				 */
+				if (!reseed || ret != -EAGAIN)
+					goto out;
 			}
 
 			drbg_string_fill(&data1, entropy, entropylen * 2);
@@ -1294,8 +1306,10 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 	if (IS_ENABLED(CONFIG_CRYPTO_FIPS)) {
 		drbg->prev = kzalloc(drbg_sec_strength(drbg->core->flags),
 				     GFP_KERNEL);
-		if (!drbg->prev)
+		if (!drbg->prev) {
+			ret = -ENOMEM;
 			goto fini;
+		}
 		drbg->fips_primed = false;
 	}
 
@@ -1492,6 +1506,8 @@ static int drbg_prepare_hrng(struct drbg_state *drbg)
 	if (list_empty(&drbg->test_data.list))
 		return 0;
 
+	drbg->jent = crypto_alloc_rng("jitterentropy_rng", 0, 0);
+
 	INIT_WORK(&drbg->seed_work, drbg_async_seed);
 
 	drbg->random_ready.owner = THIS_MODULE;
@@ -1512,8 +1528,6 @@ static int drbg_prepare_hrng(struct drbg_state *drbg)
 		return err;
 	}
 
-	drbg->jent = crypto_alloc_rng("jitterentropy_rng", 0, 0);
-
 	/*
 	 * Require frequent reseeds until the seed source is fully
 	 * initialized.
diff --git a/crypto/essiv.c b/crypto/essiv.c
index 465a89c9d1ef..a7f45dbc4ee2 100644
--- a/crypto/essiv.c
+++ b/crypto/essiv.c
@@ -66,7 +66,6 @@ static int essiv_skcipher_setkey(struct crypto_skcipher *tfm,
 				 const u8 *key, unsigned int keylen)
 {
 	struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
-	SHASH_DESC_ON_STACK(desc, tctx->hash);
 	u8 salt[HASH_MAX_DIGESTSIZE];
 	int err;
 
@@ -78,8 +77,7 @@ static int essiv_skcipher_setkey(struct crypto_skcipher *tfm,
 	if (err)
 		return err;
 
-	desc->tfm = tctx->hash;
-	err = crypto_shash_digest(desc, key, keylen, salt);
+	err = crypto_shash_tfm_digest(tctx->hash, key, keylen, salt);
 	if (err)
 		return err;
 
diff --git a/crypto/internal.h b/crypto/internal.h
index d5ebc60c5143..ff06a3bd1ca1 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -65,6 +65,7 @@ void crypto_alg_tested(const char *name, int err);
 void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 			  struct crypto_alg *nalg);
 void crypto_remove_final(struct list_head *list);
+void crypto_shoot_alg(struct crypto_alg *alg);
 struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
 				      u32 mask);
 void *crypto_create_tfm(struct crypto_alg *alg,
diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c
index a5ce8f96790f..b43684c0dade 100644
--- a/crypto/jitterentropy-kcapi.c
+++ b/crypto/jitterentropy-kcapi.c
@@ -108,6 +108,7 @@ void jent_get_nstime(__u64 *out)
 struct jitterentropy {
 	spinlock_t jent_lock;
 	struct rand_data *entropy_collector;
+	unsigned int reset_cnt;
 };
 
 static int jent_kcapi_init(struct crypto_tfm *tfm)
@@ -142,7 +143,33 @@ static int jent_kcapi_random(struct crypto_rng *tfm,
 	int ret = 0;
 
 	spin_lock(&rng->jent_lock);
+
+	/* Return a permanent error in case we had too many resets in a row. */
+	if (rng->reset_cnt > (1<<10)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
 	ret = jent_read_entropy(rng->entropy_collector, rdata, dlen);
+
+	/* Reset RNG in case of health failures */
+	if (ret < -1) {
+		pr_warn_ratelimited("Reset Jitter RNG due to health test failure: %s failure\n",
+				    (ret == -2) ? "Repetition Count Test" :
+						  "Adaptive Proportion Test");
+
+		rng->reset_cnt++;
+
+		ret = -EAGAIN;
+	} else {
+		rng->reset_cnt = 0;
+
+		/* Convert the Jitter RNG error into a usable error code */
+		if (ret == -1)
+			ret = -EINVAL;
+	}
+
+out:
 	spin_unlock(&rng->jent_lock);
 
 	return ret;
diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c
index 042157f0d28b..57f4a1ac738b 100644
--- a/crypto/jitterentropy.c
+++ b/crypto/jitterentropy.c
@@ -2,7 +2,7 @@
  * Non-physical true random number generator based on timing jitter --
  * Jitter RNG standalone code.
  *
- * Copyright Stephan Mueller <smueller@chronox.de>, 2015 - 2019
+ * Copyright Stephan Mueller <smueller@chronox.de>, 2015 - 2020
  *
  * Design
  * ======
@@ -47,7 +47,7 @@
 
 /*
  * This Jitterentropy RNG is based on the jitterentropy library
- * version 2.1.2 provided at http://www.chronox.de/jent.html
+ * version 2.2.0 provided at http://www.chronox.de/jent.html
  */
 
 #ifdef __OPTIMIZE__
@@ -83,6 +83,22 @@ struct rand_data {
 	unsigned int memblocksize; /* Size of one memory block in bytes */
 	unsigned int memaccessloops; /* Number of memory accesses per random
 				      * bit generation */
+
+	/* Repetition Count Test */
+	int rct_count;			/* Number of stuck values */
+
+	/* Adaptive Proportion Test for a significance level of 2^-30 */
+#define JENT_APT_CUTOFF		325	/* Taken from SP800-90B sec 4.4.2 */
+#define JENT_APT_WINDOW_SIZE	512	/* Data window size */
+	/* LSB of time stamp to process */
+#define JENT_APT_LSB		16
+#define JENT_APT_WORD_MASK	(JENT_APT_LSB - 1)
+	unsigned int apt_observations;	/* Number of collected observations */
+	unsigned int apt_count;		/* APT counter */
+	unsigned int apt_base;		/* APT base reference */
+	unsigned int apt_base_set:1;	/* APT base reference set? */
+
+	unsigned int health_failure:1;	/* Permanent health failure */
 };
 
 /* Flags that can be used to initialize the RNG */
@@ -98,12 +114,201 @@ struct rand_data {
 				   * variations (2nd derivation of time is
 				   * zero). */
 #define JENT_ESTUCK		8 /* Too many stuck results during init. */
+#define JENT_EHEALTH		9 /* Health test failed during initialization */
+#define JENT_ERCT		10 /* RCT failed during initialization */
+
+#include "jitterentropy.h"
 
 /***************************************************************************
- * Helper functions
+ * Adaptive Proportion Test
+ *
+ * This test complies with SP800-90B section 4.4.2.
  ***************************************************************************/
 
-#include "jitterentropy.h"
+/**
+ * Reset the APT counter
+ *
+ * @ec [in] Reference to entropy collector
+ */
+static void jent_apt_reset(struct rand_data *ec, unsigned int delta_masked)
+{
+	/* Reset APT counter */
+	ec->apt_count = 0;
+	ec->apt_base = delta_masked;
+	ec->apt_observations = 0;
+}
+
+/**
+ * Insert a new entropy event into APT
+ *
+ * @ec [in] Reference to entropy collector
+ * @delta_masked [in] Masked time delta to process
+ */
+static void jent_apt_insert(struct rand_data *ec, unsigned int delta_masked)
+{
+	/* Initialize the base reference */
+	if (!ec->apt_base_set) {
+		ec->apt_base = delta_masked;
+		ec->apt_base_set = 1;
+		return;
+	}
+
+	if (delta_masked == ec->apt_base) {
+		ec->apt_count++;
+
+		if (ec->apt_count >= JENT_APT_CUTOFF)
+			ec->health_failure = 1;
+	}
+
+	ec->apt_observations++;
+
+	if (ec->apt_observations >= JENT_APT_WINDOW_SIZE)
+		jent_apt_reset(ec, delta_masked);
+}
+
+/***************************************************************************
+ * Stuck Test and its use as Repetition Count Test
+ *
+ * The Jitter RNG uses an enhanced version of the Repetition Count Test
+ * (RCT) specified in SP800-90B section 4.4.1. Instead of counting identical
+ * back-to-back values, the input to the RCT is the counting of the stuck
+ * values during the generation of one Jitter RNG output block.
+ *
+ * The RCT is applied with an alpha of 2^{-30} compliant to FIPS 140-2 IG 9.8.
+ *
+ * During the counting operation, the Jitter RNG always calculates the RCT
+ * cut-off value of C. If that value exceeds the allowed cut-off value,
+ * the Jitter RNG output block will be calculated completely but discarded at
+ * the end. The caller of the Jitter RNG is informed with an error code.
+ ***************************************************************************/
+
+/**
+ * Repetition Count Test as defined in SP800-90B section 4.4.1
+ *
+ * @ec [in] Reference to entropy collector
+ * @stuck [in] Indicator whether the value is stuck
+ */
+static void jent_rct_insert(struct rand_data *ec, int stuck)
+{
+	/*
+	 * If we have a count less than zero, a previous RCT round identified
+	 * a failure. We will not overwrite it.
+	 */
+	if (ec->rct_count < 0)
+		return;
+
+	if (stuck) {
+		ec->rct_count++;
+
+		/*
+		 * The cutoff value is based on the following consideration:
+		 * alpha = 2^-30 as recommended in FIPS 140-2 IG 9.8.
+		 * In addition, we require an entropy value H of 1/OSR as this
+		 * is the minimum entropy required to provide full entropy.
+		 * Note, we collect 64 * OSR deltas for inserting them into
+		 * the entropy pool which should then have (close to) 64 bits
+		 * of entropy.
+		 *
+		 * Note, ec->rct_count (which equals to value B in the pseudo
+		 * code of SP800-90B section 4.4.1) starts with zero. Hence
+		 * we need to subtract one from the cutoff value as calculated
+		 * following SP800-90B.
+		 */
+		if ((unsigned int)ec->rct_count >= (31 * ec->osr)) {
+			ec->rct_count = -1;
+			ec->health_failure = 1;
+		}
+	} else {
+		ec->rct_count = 0;
+	}
+}
+
+/**
+ * Is there an RCT health test failure?
+ *
+ * @ec [in] Reference to entropy collector
+ *
+ * @return
+ * 	0 No health test failure
+ * 	1 Permanent health test failure
+ */
+static int jent_rct_failure(struct rand_data *ec)
+{
+	if (ec->rct_count < 0)
+		return 1;
+	return 0;
+}
+
+static inline __u64 jent_delta(__u64 prev, __u64 next)
+{
+#define JENT_UINT64_MAX		(__u64)(~((__u64) 0))
+	return (prev < next) ? (next - prev) :
+			       (JENT_UINT64_MAX - prev + 1 + next);
+}
+
+/**
+ * Stuck test by checking the:
+ * 	1st derivative of the jitter measurement (time delta)
+ * 	2nd derivative of the jitter measurement (delta of time deltas)
+ * 	3rd derivative of the jitter measurement (delta of delta of time deltas)
+ *
+ * All values must always be non-zero.
+ *
+ * @ec [in] Reference to entropy collector
+ * @current_delta [in] Jitter time delta
+ *
+ * @return
+ * 	0 jitter measurement not stuck (good bit)
+ * 	1 jitter measurement stuck (reject bit)
+ */
+static int jent_stuck(struct rand_data *ec, __u64 current_delta)
+{
+	__u64 delta2 = jent_delta(ec->last_delta, current_delta);
+	__u64 delta3 = jent_delta(ec->last_delta2, delta2);
+	unsigned int delta_masked = current_delta & JENT_APT_WORD_MASK;
+
+	ec->last_delta = current_delta;
+	ec->last_delta2 = delta2;
+
+	/*
+	 * Insert the result of the comparison of two back-to-back time
+	 * deltas.
+	 */
+	jent_apt_insert(ec, delta_masked);
+
+	if (!current_delta || !delta2 || !delta3) {
+		/* RCT with a stuck bit */
+		jent_rct_insert(ec, 1);
+		return 1;
+	}
+
+	/* RCT with a non-stuck bit */
+	jent_rct_insert(ec, 0);
+
+	return 0;
+}
+
+/**
+ * Report any health test failures
+ *
+ * @ec [in] Reference to entropy collector
+ *
+ * @return
+ * 	0 No health test failure
+ * 	1 Permanent health test failure
+ */
+static int jent_health_failure(struct rand_data *ec)
+{
+	/* Test is only enabled in FIPS mode */
+	if (!jent_fips_enabled())
+		return 0;
+
+	return ec->health_failure;
+}
+
+/***************************************************************************
+ * Noise sources
+ ***************************************************************************/
 
 /**
  * Update of the loop count used for the next round of
@@ -148,10 +353,6 @@ static __u64 jent_loop_shuffle(struct rand_data *ec,
 	return (shuffle + (1<<min));
 }
 
-/***************************************************************************
- * Noise sources
- ***************************************************************************/
-
 /**
  * CPU Jitter noise source -- this is the noise source based on the CPU
  *			      execution time jitter
@@ -166,18 +367,19 @@ static __u64 jent_loop_shuffle(struct rand_data *ec,
  * the CPU execution time jitter. Any change to the loop in this function
  * implies that careful retesting must be done.
  *
- * Input:
- * @ec entropy collector struct
- * @time time stamp to be injected
- * @loop_cnt if a value not equal to 0 is set, use the given value as number of
- *	     loops to perform the folding
+ * @ec [in] entropy collector struct
+ * @time [in] time stamp to be injected
+ * @loop_cnt [in] if a value not equal to 0 is set, use the given value as
+ *		  number of loops to perform the folding
+ * @stuck [in] Is the time stamp identified as stuck?
  *
  * Output:
  * updated ec->data
  *
  * @return Number of loops the folding operation is performed
  */
-static __u64 jent_lfsr_time(struct rand_data *ec, __u64 time, __u64 loop_cnt)
+static void jent_lfsr_time(struct rand_data *ec, __u64 time, __u64 loop_cnt,
+			   int stuck)
 {
 	unsigned int i;
 	__u64 j = 0;
@@ -220,9 +422,17 @@ static __u64 jent_lfsr_time(struct rand_data *ec, __u64 time, __u64 loop_cnt)
 			new ^= tmp;
 		}
 	}
-	ec->data = new;
 
-	return fold_loop_cnt;
+	/*
+	 * If the time stamp is stuck, do not finally insert the value into
+	 * the entropy pool. Although this operation should not do any harm
+	 * even when the time stamp has no entropy, SP800-90B requires that
+	 * any conditioning operation (SP800-90B considers the LFSR to be a
+	 * conditioning operation) to have an identical amount of input
+	 * data according to section 3.1.5.
+	 */
+	if (!stuck)
+		ec->data = new;
 }
 
 /**
@@ -243,16 +453,13 @@ static __u64 jent_lfsr_time(struct rand_data *ec, __u64 time, __u64 loop_cnt)
  * to reliably access either L3 or memory, the ec->mem memory must be quite
  * large which is usually not desirable.
  *
- * Input:
- * @ec Reference to the entropy collector with the memory access data -- if
- *     the reference to the memory block to be accessed is NULL, this noise
- *     source is disabled
- * @loop_cnt if a value not equal to 0 is set, use the given value as number of
- *	     loops to perform the folding
- *
- * @return Number of memory access operations
+ * @ec [in] Reference to the entropy collector with the memory access data -- if
+ *	    the reference to the memory block to be accessed is NULL, this noise
+ *	    source is disabled
+ * @loop_cnt [in] if a value not equal to 0 is set, use the given value
+ *		  number of loops to perform the LFSR
  */
-static unsigned int jent_memaccess(struct rand_data *ec, __u64 loop_cnt)
+static void jent_memaccess(struct rand_data *ec, __u64 loop_cnt)
 {
 	unsigned int wrap = 0;
 	__u64 i = 0;
@@ -262,7 +469,7 @@ static unsigned int jent_memaccess(struct rand_data *ec, __u64 loop_cnt)
 		jent_loop_shuffle(ec, MAX_ACC_LOOP_BIT, MIN_ACC_LOOP_BIT);
 
 	if (NULL == ec || NULL == ec->mem)
-		return 0;
+		return;
 	wrap = ec->memblocksize * ec->memblocks;
 
 	/*
@@ -288,43 +495,11 @@ static unsigned int jent_memaccess(struct rand_data *ec, __u64 loop_cnt)
 		ec->memlocation = ec->memlocation + ec->memblocksize - 1;
 		ec->memlocation = ec->memlocation % wrap;
 	}
-	return i;
 }
 
 /***************************************************************************
  * Start of entropy processing logic
  ***************************************************************************/
-
-/**
- * Stuck test by checking the:
- *	1st derivation of the jitter measurement (time delta)
- *	2nd derivation of the jitter measurement (delta of time deltas)
- *	3rd derivation of the jitter measurement (delta of delta of time deltas)
- *
- * All values must always be non-zero.
- *
- * Input:
- * @ec Reference to entropy collector
- * @current_delta Jitter time delta
- *
- * @return
- *	0 jitter measurement not stuck (good bit)
- *	1 jitter measurement stuck (reject bit)
- */
-static int jent_stuck(struct rand_data *ec, __u64 current_delta)
-{
-	__s64 delta2 = ec->last_delta - current_delta;
-	__s64 delta3 = delta2 - ec->last_delta2;
-
-	ec->last_delta = current_delta;
-	ec->last_delta2 = delta2;
-
-	if (!current_delta || !delta2 || !delta3)
-		return 1;
-
-	return 0;
-}
-
 /**
  * This is the heart of the entropy generation: calculate time deltas and
  * use the CPU jitter in the time deltas. The jitter is injected into the
@@ -334,8 +509,7 @@ static int jent_stuck(struct rand_data *ec, __u64 current_delta)
  *	    of this function! This can be done by calling this function
  *	    and not using its result.
  *
- * Input:
- * @entropy_collector Reference to entropy collector
+ * @ec [in] Reference to entropy collector
  *
  * @return result of stuck test
  */
@@ -343,6 +517,7 @@ static int jent_measure_jitter(struct rand_data *ec)
 {
 	__u64 time = 0;
 	__u64 current_delta = 0;
+	int stuck;
 
 	/* Invoke one noise source before time measurement to add variations */
 	jent_memaccess(ec, 0);
@@ -352,22 +527,23 @@ static int jent_measure_jitter(struct rand_data *ec)
 	 * invocation to measure the timing variations
 	 */
 	jent_get_nstime(&time);
-	current_delta = time - ec->prev_time;
+	current_delta = jent_delta(ec->prev_time, time);
 	ec->prev_time = time;
 
+	/* Check whether we have a stuck measurement. */
+	stuck = jent_stuck(ec, current_delta);
+
 	/* Now call the next noise sources which also injects the data */
-	jent_lfsr_time(ec, current_delta, 0);
+	jent_lfsr_time(ec, current_delta, 0, stuck);
 
-	/* Check whether we have a stuck measurement. */
-	return jent_stuck(ec, current_delta);
+	return stuck;
 }
 
 /**
  * Generator of one 64 bit random number
  * Function fills rand_data->data
  *
- * Input:
- * @ec Reference to entropy collector
+ * @ec [in] Reference to entropy collector
  */
 static void jent_gen_entropy(struct rand_data *ec)
 {
@@ -391,31 +567,6 @@ static void jent_gen_entropy(struct rand_data *ec)
 }
 
 /**
- * The continuous test required by FIPS 140-2 -- the function automatically
- * primes the test if needed.
- *
- * Return:
- * returns normally if FIPS test passed
- * panics the kernel if FIPS test failed
- */
-static void jent_fips_test(struct rand_data *ec)
-{
-	if (!jent_fips_enabled())
-		return;
-
-	/* prime the FIPS test */
-	if (!ec->old_data) {
-		ec->old_data = ec->data;
-		jent_gen_entropy(ec);
-	}
-
-	if (ec->data == ec->old_data)
-		jent_panic("jitterentropy: Duplicate output detected\n");
-
-	ec->old_data = ec->data;
-}
-
-/**
  * Entry function: Obtain entropy for the caller.
  *
  * This function invokes the entropy gathering logic as often to generate
@@ -425,17 +576,18 @@ static void jent_fips_test(struct rand_data *ec)
  * This function truncates the last 64 bit entropy value output to the exact
  * size specified by the caller.
  *
- * Input:
- * @ec Reference to entropy collector
- * @data pointer to buffer for storing random data -- buffer must already
- *	 exist
- * @len size of the buffer, specifying also the requested number of random
- *	in bytes
+ * @ec [in] Reference to entropy collector
+ * @data [in] pointer to buffer for storing random data -- buffer must already
+ *	      exist
+ * @len [in] size of the buffer, specifying also the requested number of random
+ *	     in bytes
  *
  * @return 0 when request is fulfilled or an error
  *
  * The following error codes can occur:
  *	-1	entropy_collector is NULL
+ *	-2	RCT failed
+ *	-3	APT test failed
  */
 int jent_read_entropy(struct rand_data *ec, unsigned char *data,
 		      unsigned int len)
@@ -449,7 +601,42 @@ int jent_read_entropy(struct rand_data *ec, unsigned char *data,
 		unsigned int tocopy;
 
 		jent_gen_entropy(ec);
-		jent_fips_test(ec);
+
+		if (jent_health_failure(ec)) {
+			int ret;
+
+			if (jent_rct_failure(ec))
+				ret = -2;
+			else
+				ret = -3;
+
+			/*
+			 * Re-initialize the noise source
+			 *
+			 * If the health test fails, the Jitter RNG remains
+			 * in failure state and will return a health failure
+			 * during next invocation.
+			 */
+			if (jent_entropy_init())
+				return ret;
+
+			/* Set APT to initial state */
+			jent_apt_reset(ec, 0);
+			ec->apt_base_set = 0;
+
+			/* Set RCT to initial state */
+			ec->rct_count = 0;
+
+			/* Re-enable Jitter RNG */
+			ec->health_failure = 0;
+
+			/*
+			 * Return the health test failure status to the
+			 * caller as the generated value is not appropriate.
+			 */
+			return ret;
+		}
+
 		if ((DATA_SIZE_BITS / 8) < len)
 			tocopy = (DATA_SIZE_BITS / 8);
 		else
@@ -513,11 +700,15 @@ int jent_entropy_init(void)
 	int i;
 	__u64 delta_sum = 0;
 	__u64 old_delta = 0;
+	unsigned int nonstuck = 0;
 	int time_backwards = 0;
 	int count_mod = 0;
 	int count_stuck = 0;
 	struct rand_data ec = { 0 };
 
+	/* Required for RCT */
+	ec.osr = 1;
+
 	/* We could perform statistical tests here, but the problem is
 	 * that we only have a few loop counts to do testing. These
 	 * loop counts may show some slight skew and we produce
@@ -539,8 +730,10 @@ int jent_entropy_init(void)
 	/*
 	 * TESTLOOPCOUNT needs some loops to identify edge systems. 100 is
 	 * definitely too little.
+	 *
+	 * SP800-90B requires at least 1024 initial test cycles.
 	 */
-#define TESTLOOPCOUNT 300
+#define TESTLOOPCOUNT 1024
 #define CLEARCACHE 100
 	for (i = 0; (TESTLOOPCOUNT + CLEARCACHE) > i; i++) {
 		__u64 time = 0;
@@ -552,13 +745,13 @@ int jent_entropy_init(void)
 		/* Invoke core entropy collection logic */
 		jent_get_nstime(&time);
 		ec.prev_time = time;
-		jent_lfsr_time(&ec, time, 0);
+		jent_lfsr_time(&ec, time, 0, 0);
 		jent_get_nstime(&time2);
 
 		/* test whether timer works */
 		if (!time || !time2)
 			return JENT_ENOTIME;
-		delta = time2 - time;
+		delta = jent_delta(time, time2);
 		/*
 		 * test whether timer is fine grained enough to provide
 		 * delta even when called shortly after each other -- this
@@ -581,6 +774,28 @@ int jent_entropy_init(void)
 
 		if (stuck)
 			count_stuck++;
+		else {
+			nonstuck++;
+
+			/*
+			 * Ensure that the APT succeeded.
+			 *
+			 * With the check below that count_stuck must be less
+			 * than 10% of the overall generated raw entropy values
+			 * it is guaranteed that the APT is invoked at
+			 * floor((TESTLOOPCOUNT * 0.9) / 64) == 14 times.
+			 */
+			if ((nonstuck % JENT_APT_WINDOW_SIZE) == 0) {
+				jent_apt_reset(&ec,
+					       delta & JENT_APT_WORD_MASK);
+				if (jent_health_failure(&ec))
+					return JENT_EHEALTH;
+			}
+		}
+
+		/* Validate RCT */
+		if (jent_rct_failure(&ec))
+			return JENT_ERCT;
 
 		/* test whether we have an increasing timer */
 		if (!(time2 > time))
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 3c734b81b3a2..5b07a7c09296 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -287,7 +287,7 @@ static void exit_tfm(struct crypto_skcipher *tfm)
 	crypto_free_skcipher(ctx->child);
 }
 
-static void free_inst(struct skcipher_instance *inst)
+static void crypto_lrw_free(struct skcipher_instance *inst)
 {
 	crypto_drop_skcipher(skcipher_instance_ctx(inst));
 	kfree(inst);
@@ -400,12 +400,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.encrypt = encrypt;
 	inst->alg.decrypt = decrypt;
 
-	inst->free = free_inst;
+	inst->free = crypto_lrw_free;
 
 	err = skcipher_register_instance(tmpl, inst);
 	if (err) {
 err_free_inst:
-		free_inst(inst);
+		crypto_lrw_free(inst);
 	}
 	return err;
 }
diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index 7c57b844c382..1d43472fecbd 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/cryptohash.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <crypto/sha1_base.h>
@@ -31,10 +30,10 @@ EXPORT_SYMBOL_GPL(sha1_zero_message_hash);
 static void sha1_generic_block_fn(struct sha1_state *sst, u8 const *src,
 				  int blocks)
 {
-	u32 temp[SHA_WORKSPACE_WORDS];
+	u32 temp[SHA1_WORKSPACE_WORDS];
 
 	while (blocks--) {
-		sha_transform(sst->state, src, temp);
+		sha1_transform(sst->state, src, temp);
 		src += SHA1_BLOCK_SIZE;
 	}
 	memzero_explicit(temp, sizeof(temp));
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index f2d7095d4f2d..88156e3e2a33 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -35,27 +35,31 @@ EXPORT_SYMBOL_GPL(sha256_zero_message_hash);
 
 static int crypto_sha256_init(struct shash_desc *desc)
 {
-	return sha256_init(shash_desc_ctx(desc));
+	sha256_init(shash_desc_ctx(desc));
+	return 0;
 }
 
 static int crypto_sha224_init(struct shash_desc *desc)
 {
-	return sha224_init(shash_desc_ctx(desc));
+	sha224_init(shash_desc_ctx(desc));
+	return 0;
 }
 
 int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
 			  unsigned int len)
 {
-	return sha256_update(shash_desc_ctx(desc), data, len);
+	sha256_update(shash_desc_ctx(desc), data, len);
+	return 0;
 }
 EXPORT_SYMBOL(crypto_sha256_update);
 
 static int crypto_sha256_final(struct shash_desc *desc, u8 *out)
 {
 	if (crypto_shash_digestsize(desc->tfm) == SHA224_DIGEST_SIZE)
-		return sha224_final(shash_desc_ctx(desc), out);
+		sha224_final(shash_desc_ctx(desc), out);
 	else
-		return sha256_final(shash_desc_ctx(desc), out);
+		sha256_final(shash_desc_ctx(desc), out);
+	return 0;
 }
 
 int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
diff --git a/crypto/shash.c b/crypto/shash.c
index c075b26c2a1d..e6a4b5f39b8c 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -206,6 +206,22 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
 }
 EXPORT_SYMBOL_GPL(crypto_shash_digest);
 
+int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data,
+			    unsigned int len, u8 *out)
+{
+	SHASH_DESC_ON_STACK(desc, tfm);
+	int err;
+
+	desc->tfm = tfm;
+
+	err = crypto_shash_digest(desc, data, len, out);
+
+	shash_desc_zero(desc);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_shash_tfm_digest);
+
 static int shash_default_export(struct shash_desc *desc, void *out)
 {
 	memcpy(out, shash_desc_ctx(desc), crypto_shash_descsize(desc->tfm));
diff --git a/crypto/xts.c b/crypto/xts.c
index 6d8cea94b3cf..3565f3b863a6 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -322,7 +322,7 @@ static void exit_tfm(struct crypto_skcipher *tfm)
 	crypto_free_cipher(ctx->tweak);
 }
 
-static void free_inst(struct skcipher_instance *inst)
+static void crypto_xts_free(struct skcipher_instance *inst)
 {
 	crypto_drop_skcipher(skcipher_instance_ctx(inst));
 	kfree(inst);
@@ -434,12 +434,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.encrypt = encrypt;
 	inst->alg.decrypt = decrypt;
 
-	inst->free = free_inst;
+	inst->free = crypto_xts_free;
 
 	err = skcipher_register_instance(tmpl, inst);
 	if (err) {
 err_free_inst:
-		free_inst(inst);
+		crypto_xts_free(inst);
 	}
 	return err;
 }
diff --git a/drivers/acpi/acpi_dbg.c b/drivers/acpi/acpi_dbg.c
index 7a265c2171c0..6041974c7627 100644
--- a/drivers/acpi/acpi_dbg.c
+++ b/drivers/acpi/acpi_dbg.c
@@ -745,7 +745,7 @@ static const struct acpi_debugger_ops acpi_aml_debugger = {
 	.notify_command_complete = acpi_aml_notify_command_complete,
 };
 
-int __init acpi_aml_init(void)
+static int __init acpi_aml_init(void)
 {
 	int ret;
 
@@ -771,7 +771,7 @@ int __init acpi_aml_init(void)
 	return 0;
 }
 
-void __exit acpi_aml_exit(void)
+static void __exit acpi_aml_exit(void)
 {
 	if (acpi_aml_initialized) {
 		acpi_unregister_debugger(&acpi_aml_debugger);
diff --git a/drivers/acpi/acpi_lpit.c b/drivers/acpi/acpi_lpit.c
index 953437a216f6..48e5059d67ca 100644
--- a/drivers/acpi/acpi_lpit.c
+++ b/drivers/acpi/acpi_lpit.c
@@ -151,10 +151,11 @@ void acpi_init_lpit(void)
 	struct acpi_table_lpit *lpit;
 
 	status = acpi_get_table(ACPI_SIG_LPIT, 0, (struct acpi_table_header **)&lpit);
-
 	if (ACPI_FAILURE(status))
 		return;
 
 	lpit_process((u64)lpit + sizeof(*lpit),
 		     (u64)lpit + lpit->header.length);
+
+	acpi_put_table((struct acpi_table_header *)lpit);
 }
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index dee999938213..5e2bfbcf526f 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -1041,7 +1041,7 @@ static int acpi_lpss_do_suspend_late(struct device *dev)
 {
 	int ret;
 
-	if (dev_pm_smart_suspend_and_suspended(dev))
+	if (dev_pm_skip_suspend(dev))
 		return 0;
 
 	ret = pm_generic_suspend_late(dev);
@@ -1093,6 +1093,9 @@ static int acpi_lpss_resume_early(struct device *dev)
 	if (pdata->dev_desc->resume_from_noirq)
 		return 0;
 
+	if (dev_pm_skip_resume(dev))
+		return 0;
+
 	return acpi_lpss_do_resume_early(dev);
 }
 
@@ -1102,12 +1105,9 @@ static int acpi_lpss_resume_noirq(struct device *dev)
 	int ret;
 
 	/* Follow acpi_subsys_resume_noirq(). */
-	if (dev_pm_may_skip_resume(dev))
+	if (dev_pm_skip_resume(dev))
 		return 0;
 
-	if (dev_pm_smart_suspend_and_suspended(dev))
-		pm_runtime_set_active(dev);
-
 	ret = pm_generic_resume_noirq(dev);
 	if (ret)
 		return ret;
@@ -1169,7 +1169,7 @@ static int acpi_lpss_poweroff_late(struct device *dev)
 {
 	struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev));
 
-	if (dev_pm_smart_suspend_and_suspended(dev))
+	if (dev_pm_skip_suspend(dev))
 		return 0;
 
 	if (pdata->dev_desc->resume_from_noirq)
@@ -1182,7 +1182,7 @@ static int acpi_lpss_poweroff_noirq(struct device *dev)
 {
 	struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev));
 
-	if (dev_pm_smart_suspend_and_suspended(dev))
+	if (dev_pm_skip_suspend(dev))
 		return 0;
 
 	if (pdata->dev_desc->resume_from_noirq) {
diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c
index 33a4bcdaa4d7..7d45cce0c3c1 100644
--- a/drivers/acpi/acpi_tad.c
+++ b/drivers/acpi/acpi_tad.c
@@ -624,7 +624,7 @@ static int acpi_tad_probe(struct platform_device *pdev)
 	 */
 	device_init_wakeup(dev, true);
 	dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND |
-				     DPM_FLAG_LEAVE_SUSPENDED);
+				     DPM_FLAG_MAY_SKIP_RESUME);
 	/*
 	 * The platform bus type layer tells the ACPI PM domain powers up the
 	 * device, so set the runtime PM status of it to "active".
diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c
index 6e9ec6e3fe47..5c1e9ea43123 100644
--- a/drivers/acpi/acpi_watchdog.c
+++ b/drivers/acpi/acpi_watchdog.c
@@ -73,6 +73,7 @@ static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void)
 	}
 
 	if (acpi_watchdog_uses_rtc(wdat)) {
+		acpi_put_table((struct acpi_table_header *)wdat);
 		pr_info("Skipping WDAT on this system because it uses RTC SRAM\n");
 		return NULL;
 	}
@@ -117,12 +118,12 @@ void __init acpi_watchdog_init(void)
 
 	/* Watchdog disabled by BIOS */
 	if (!(wdat->flags & ACPI_WDAT_ENABLED))
-		return;
+		goto fail_put_wdat;
 
 	/* Skip legacy PCI WDT devices */
 	if (wdat->pci_segment != 0xff || wdat->pci_bus != 0xff ||
 	    wdat->pci_device != 0xff || wdat->pci_function != 0xff)
-		return;
+		goto fail_put_wdat;
 
 	INIT_LIST_HEAD(&resource_list);
 
@@ -188,4 +189,6 @@ void __init acpi_watchdog_init(void)
 
 fail_free_resource_list:
 	resource_list_free(&resource_list);
+fail_put_wdat:
+	acpi_put_table((struct acpi_table_header *)wdat);
 }
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 38ffa2c0a496..1030a0ce1599 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -290,6 +290,7 @@ ACPI_GLOBAL(struct acpi_external_file *, acpi_gbl_external_file_list);
 #ifdef ACPI_DEBUGGER
 ACPI_INIT_GLOBAL(u8, acpi_gbl_abort_method, FALSE);
 ACPI_INIT_GLOBAL(acpi_thread_id, acpi_gbl_db_thread_id, ACPI_INVALID_THREAD_ID);
+ACPI_INIT_GLOBAL(u32, acpi_gbl_next_cmd_num, 1);
 
 ACPI_GLOBAL(u8, acpi_gbl_db_opt_no_ini_methods);
 ACPI_GLOBAL(u8, acpi_gbl_db_opt_no_region_support);
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index cd0f5df0ea23..2cbb56652f1c 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -640,10 +640,10 @@ const union acpi_predefined_info acpi_gbl_predefined_methods[] = {
 	{{"_NIC", METHOD_0ARGS,	/* ACPI 6.3 */
 	  METHOD_RETURNS(ACPI_RTYPE_BUFFER)}},
 
-	{{"_NIG", METHOD_1ARGS(ACPI_TYPE_BUFFER),	/* ACPI 6.3 */
+	{{"_NIG", METHOD_0ARGS, /* ACPI 6.3 */
 	  METHOD_RETURNS(ACPI_RTYPE_BUFFER)}},
 
-	{{"_NIH", METHOD_0ARGS,	/* ACPI 6.3 */
+	{{"_NIH", METHOD_1ARGS(ACPI_TYPE_BUFFER), /* ACPI 6.3 */
 	  METHOD_RETURNS(ACPI_RTYPE_BUFFER)}},
 
 	{{"_NTT", METHOD_0ARGS,
diff --git a/drivers/acpi/acpica/dbhistry.c b/drivers/acpi/acpica/dbhistry.c
index bb9600b867ee..f5fba14461a6 100644
--- a/drivers/acpi/acpica/dbhistry.c
+++ b/drivers/acpi/acpica/dbhistry.c
@@ -27,7 +27,6 @@ static HISTORY_INFO acpi_gbl_history_buffer[HISTORY_SIZE];
 static u16 acpi_gbl_lo_history = 0;
 static u16 acpi_gbl_num_history = 0;
 static u16 acpi_gbl_next_history_index = 0;
-u32 acpi_gbl_next_cmd_num = 1;
 
 /*******************************************************************************
  *
diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c
index c901f5aec739..fa768b3a989e 100644
--- a/drivers/acpi/acpica/dsfield.c
+++ b/drivers/acpi/acpica/dsfield.c
@@ -177,7 +177,10 @@ acpi_ds_create_buffer_field(union acpi_parse_object *op,
 					arg->common.value.string, ACPI_TYPE_ANY,
 					ACPI_IMODE_LOAD_PASS1, flags,
 					walk_state, &node);
-		if (ACPI_FAILURE(status)) {
+		if ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE)
+		    && status == AE_ALREADY_EXISTS) {
+			status = AE_OK;
+		} else if (ACPI_FAILURE(status)) {
 			ACPI_ERROR_NAMESPACE(walk_state->scope_info,
 					     arg->common.value.string, status);
 			return_ACPI_STATUS(status);
@@ -514,13 +517,20 @@ acpi_ds_create_field(union acpi_parse_object *op,
 	info.region_node = region_node;
 
 	status = acpi_ds_get_field_names(&info, walk_state, arg->common.next);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
 	if (info.region_node->object->region.space_id ==
-	    ACPI_ADR_SPACE_PLATFORM_COMM
-	    && !(region_node->object->field.internal_pcc_buffer =
-		 ACPI_ALLOCATE_ZEROED(info.region_node->object->region.
-				      length))) {
-		return_ACPI_STATUS(AE_NO_MEMORY);
+	    ACPI_ADR_SPACE_PLATFORM_COMM) {
+		region_node->object->field.internal_pcc_buffer =
+		    ACPI_ALLOCATE_ZEROED(info.region_node->object->region.
+					 length);
+		if (!region_node->object->field.internal_pcc_buffer) {
+			return_ACPI_STATUS(AE_NO_MEMORY);
+		}
 	}
+
 	return_ACPI_STATUS(status);
 }
 
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index e85eb31e5075..3323a2ba6a31 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -22,7 +22,7 @@ ACPI_MODULE_NAME("exfield")
  */
 #define ACPI_INVALID_PROTOCOL_ID        0x80
 #define ACPI_MAX_PROTOCOL_ID            0x0F
-const u8 acpi_protocol_lengths[] = {
+static const u8 acpi_protocol_lengths[] = {
 	ACPI_INVALID_PROTOCOL_ID,	/* 0 - reserved */
 	ACPI_INVALID_PROTOCOL_ID,	/* 1 - reserved */
 	0x00,			/* 2 - ATTRIB_QUICK */
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index 1155fb9dcc3a..19e50fcbf4d6 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -119,7 +119,7 @@ static int __init bert_init(void)
 	rc = bert_check_table(bert_tab);
 	if (rc) {
 		pr_err(FW_BUG "table invalid.\n");
-		return rc;
+		goto out_put_bert_tab;
 	}
 
 	region_len = bert_tab->region_length;
@@ -127,7 +127,7 @@ static int __init bert_init(void)
 	rc = apei_resources_add(&bert_resources, bert_tab->address,
 				region_len, true);
 	if (rc)
-		return rc;
+		goto out_put_bert_tab;
 	rc = apei_resources_request(&bert_resources, "APEI BERT");
 	if (rc)
 		goto out_fini;
@@ -142,6 +142,8 @@ static int __init bert_init(void)
 	apei_resources_release(&bert_resources);
 out_fini:
 	apei_resources_fini(&bert_resources);
+out_put_bert_tab:
+	acpi_put_table((struct acpi_table_header *)bert_tab);
 
 	return rc;
 }
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 086373f8ccb1..133156759551 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -692,7 +692,7 @@ static int __init einj_init(void)
 	rc = einj_check_table(einj_tab);
 	if (rc) {
 		pr_warn(FW_BUG "Invalid EINJ table.\n");
-		return -EINVAL;
+		goto err_put_table;
 	}
 
 	rc = -ENOMEM;
@@ -760,6 +760,8 @@ err_release:
 err_fini:
 	apei_resources_fini(&einj_resources);
 	debugfs_remove_recursive(einj_debug_dir);
+err_put_table:
+	acpi_put_table((struct acpi_table_header *)einj_tab);
 
 	return rc;
 }
@@ -780,6 +782,7 @@ static void __exit einj_exit(void)
 	apei_resources_release(&einj_resources);
 	apei_resources_fini(&einj_resources);
 	debugfs_remove_recursive(einj_debug_dir);
+	acpi_put_table((struct acpi_table_header *)einj_tab);
 }
 
 module_init(einj_init);
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 2015a0967cbb..2e0b0fcad960 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1122,7 +1122,7 @@ static int __init erst_init(void)
 	rc = erst_check_table(erst_tab);
 	if (rc) {
 		pr_err(FW_BUG "ERST table is invalid.\n");
-		goto err;
+		goto err_put_erst_tab;
 	}
 
 	apei_resources_init(&erst_resources);
@@ -1196,6 +1196,8 @@ err_release:
 	apei_resources_release(&erst_resources);
 err_fini:
 	apei_resources_fini(&erst_resources);
+err_put_erst_tab:
+	acpi_put_table((struct acpi_table_header *)erst_tab);
 err:
 	erst_disable = 1;
 	return rc;
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 24c9642e8fc7..81bf71b10d44 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -40,6 +40,7 @@
 #include <linux/sched/clock.h>
 #include <linux/uuid.h>
 #include <linux/ras.h>
+#include <linux/task_work.h>
 
 #include <acpi/actbl1.h>
 #include <acpi/ghes.h>
@@ -167,12 +168,6 @@ int ghes_estatus_pool_init(int num_ghes)
 	if (!addr)
 		goto err_pool_alloc;
 
-	/*
-	 * New allocation must be visible in all pgd before it can be found by
-	 * an NMI allocating from the pool.
-	 */
-	vmalloc_sync_mappings();
-
 	rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
 	if (rc)
 		goto err_pool_add;
@@ -414,23 +409,46 @@ static void ghes_clear_estatus(struct ghes *ghes,
 		ghes_ack_error(ghes->generic_v2);
 }
 
-static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
+/*
+ * Called as task_work before returning to user-space.
+ * Ensure any queued work has been done before we return to the context that
+ * triggered the notification.
+ */
+static void ghes_kick_task_work(struct callback_head *head)
+{
+	struct acpi_hest_generic_status *estatus;
+	struct ghes_estatus_node *estatus_node;
+	u32 node_len;
+
+	estatus_node = container_of(head, struct ghes_estatus_node, task_work);
+	if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
+		memory_failure_queue_kick(estatus_node->task_work_cpu);
+
+	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+	node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
+	gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len);
+}
+
+static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
+				       int sev)
 {
-#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
 	unsigned long pfn;
 	int flags = -1;
 	int sec_sev = ghes_severity(gdata->error_severity);
 	struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
 
+	if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
+		return false;
+
 	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
-		return;
+		return false;
 
 	pfn = mem_err->physical_addr >> PAGE_SHIFT;
 	if (!pfn_valid(pfn)) {
 		pr_warn_ratelimited(FW_WARN GHES_PFX
 		"Invalid address in generic error data: %#llx\n",
 		mem_err->physical_addr);
-		return;
+		return false;
 	}
 
 	/* iff following two events can be handled properly by now */
@@ -440,9 +458,12 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
 	if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
 		flags = 0;
 
-	if (flags != -1)
+	if (flags != -1) {
 		memory_failure_queue(pfn, flags);
-#endif
+		return true;
+	}
+
+	return false;
 }
 
 /*
@@ -490,7 +511,7 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
 #endif
 }
 
-static void ghes_do_proc(struct ghes *ghes,
+static bool ghes_do_proc(struct ghes *ghes,
 			 const struct acpi_hest_generic_status *estatus)
 {
 	int sev, sec_sev;
@@ -498,6 +519,7 @@ static void ghes_do_proc(struct ghes *ghes,
 	guid_t *sec_type;
 	const guid_t *fru_id = &guid_null;
 	char *fru_text = "";
+	bool queued = false;
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
@@ -515,7 +537,7 @@ static void ghes_do_proc(struct ghes *ghes,
 			ghes_edac_report_mem_error(sev, mem_err);
 
 			arch_apei_report_mem_error(sev, mem_err);
-			ghes_handle_memory_failure(gdata, sev);
+			queued = ghes_handle_memory_failure(gdata, sev);
 		}
 		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
 			ghes_handle_aer(gdata);
@@ -532,6 +554,8 @@ static void ghes_do_proc(struct ghes *ghes,
 					       gdata->error_data_length);
 		}
 	}
+
+	return queued;
 }
 
 static void __ghes_print_estatus(const char *pfx,
@@ -827,7 +851,9 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 	struct ghes_estatus_node *estatus_node;
 	struct acpi_hest_generic *generic;
 	struct acpi_hest_generic_status *estatus;
+	bool task_work_pending;
 	u32 len, node_len;
+	int ret;
 
 	llnode = llist_del_all(&ghes_estatus_llist);
 	/*
@@ -842,14 +868,26 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 		len = cper_estatus_len(estatus);
 		node_len = GHES_ESTATUS_NODE_LEN(len);
-		ghes_do_proc(estatus_node->ghes, estatus);
+		task_work_pending = ghes_do_proc(estatus_node->ghes, estatus);
 		if (!ghes_estatus_cached(estatus)) {
 			generic = estatus_node->generic;
 			if (ghes_print_estatus(NULL, generic, estatus))
 				ghes_estatus_cache_add(generic, estatus);
 		}
-		gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
-			      node_len);
+
+		if (task_work_pending && current->mm != &init_mm) {
+			estatus_node->task_work.func = ghes_kick_task_work;
+			estatus_node->task_work_cpu = smp_processor_id();
+			ret = task_work_add(current, &estatus_node->task_work,
+					    true);
+			if (ret)
+				estatus_node->task_work.func = NULL;
+		}
+
+		if (!estatus_node->task_work.func)
+			gen_pool_free(ghes_estatus_pool,
+				      (unsigned long)estatus_node, node_len);
+
 		llnode = next;
 	}
 }
@@ -909,6 +947,7 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
 
 	estatus_node->ghes = ghes;
 	estatus_node->generic = ghes->generic;
+	estatus_node->task_work.func = NULL;
 	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 
 	if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) {
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 822402480f7d..953a2fae8b15 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -243,8 +243,8 @@ void __init acpi_hest_init(void)
 	} else if (ACPI_FAILURE(status)) {
 		const char *msg = acpi_format_exception(status);
 		pr_err(HEST_PFX "Failed to get table, %s\n", msg);
-		rc = -EINVAL;
-		goto err;
+		hest_disable = HEST_DISABLED;
+		return;
 	}
 
 	rc = apei_hest_parse(hest_parse_cmc, NULL);
@@ -266,4 +266,5 @@ void __init acpi_hest_init(void)
 	return;
 err:
 	hest_disable = HEST_DISABLED;
+	acpi_put_table((struct acpi_table_header *)hest_tab);
 }
diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c
index 01962c63a711..f2d0e5915dab 100644
--- a/drivers/acpi/arm64/gtdt.c
+++ b/drivers/acpi/arm64/gtdt.c
@@ -394,7 +394,7 @@ static int __init gtdt_sbsa_gwdt_init(void)
 	 */
 	ret = acpi_gtdt_init(table, &timer_count);
 	if (ret || !timer_count)
-		return ret;
+		goto out_put_gtdt;
 
 	for_each_platform_timer(platform_timer) {
 		if (is_non_secure_watchdog(platform_timer)) {
@@ -408,6 +408,8 @@ static int __init gtdt_sbsa_gwdt_init(void)
 	if (gwdt_count)
 		pr_info("found %d SBSA generic Watchdog(s).\n", gwdt_count);
 
+out_put_gtdt:
+	acpi_put_table(table);
 	return ret;
 }
 
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 7d04424189df..28a6b387e80e 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -299,61 +299,8 @@ out:
 	return status;
 }
 
-struct iort_workaround_oem_info {
-	char oem_id[ACPI_OEM_ID_SIZE + 1];
-	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
-	u32 oem_revision;
-};
-
-static bool apply_id_count_workaround;
-
-static struct iort_workaround_oem_info wa_info[] __initdata = {
-	{
-		.oem_id		= "HISI  ",
-		.oem_table_id	= "HIP07   ",
-		.oem_revision	= 0,
-	}, {
-		.oem_id		= "HISI  ",
-		.oem_table_id	= "HIP08   ",
-		.oem_revision	= 0,
-	}
-};
-
-static void __init
-iort_check_id_count_workaround(struct acpi_table_header *tbl)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(wa_info); i++) {
-		if (!memcmp(wa_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) &&
-		    !memcmp(wa_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
-		    wa_info[i].oem_revision == tbl->oem_revision) {
-			apply_id_count_workaround = true;
-			pr_warn(FW_BUG "ID count for ID mapping entry is wrong, applying workaround\n");
-			break;
-		}
-	}
-}
-
-static inline u32 iort_get_map_max(struct acpi_iort_id_mapping *map)
-{
-	u32 map_max = map->input_base + map->id_count;
-
-	/*
-	 * The IORT specification revision D (Section 3, table 4, page 9) says
-	 * Number of IDs = The number of IDs in the range minus one, but the
-	 * IORT code ignored the "minus one", and some firmware did that too,
-	 * so apply a workaround here to keep compatible with both the spec
-	 * compliant and non-spec compliant firmwares.
-	 */
-	if (apply_id_count_workaround)
-		map_max--;
-
-	return map_max;
-}
-
 static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
-		       u32 *rid_out)
+		       u32 *rid_out, bool check_overlap)
 {
 	/* Single mapping does not care for input id */
 	if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
@@ -368,10 +315,37 @@ static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
 		return -ENXIO;
 	}
 
-	if (rid_in < map->input_base || rid_in > iort_get_map_max(map))
+	if (rid_in < map->input_base ||
+	    (rid_in > map->input_base + map->id_count))
 		return -ENXIO;
 
+	if (check_overlap) {
+		/*
+		 * We already found a mapping for this input ID at the end of
+		 * another region. If it coincides with the start of this
+		 * region, we assume the prior match was due to the off-by-1
+		 * issue mentioned below, and allow it to be superseded.
+		 * Otherwise, things are *really* broken, and we just disregard
+		 * duplicate matches entirely to retain compatibility.
+		 */
+		pr_err(FW_BUG "[map %p] conflicting mapping for input ID 0x%x\n",
+		       map, rid_in);
+		if (rid_in != map->input_base)
+			return -ENXIO;
+
+		pr_err(FW_BUG "applying workaround.\n");
+	}
+
 	*rid_out = map->output_base + (rid_in - map->input_base);
+
+	/*
+	 * Due to confusion regarding the meaning of the id_count field (which
+	 * carries the number of IDs *minus 1*), we may have to disregard this
+	 * match if it is at the end of the range, and overlaps with the start
+	 * of another one.
+	 */
+	if (map->id_count > 0 && rid_in == map->input_base + map->id_count)
+		return -EAGAIN;
 	return 0;
 }
 
@@ -414,6 +388,7 @@ static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
 static int iort_get_id_mapping_index(struct acpi_iort_node *node)
 {
 	struct acpi_iort_smmu_v3 *smmu;
+	struct acpi_iort_pmcg *pmcg;
 
 	switch (node->type) {
 	case ACPI_IORT_NODE_SMMU_V3:
@@ -441,6 +416,10 @@ static int iort_get_id_mapping_index(struct acpi_iort_node *node)
 
 		return smmu->id_mapping_index;
 	case ACPI_IORT_NODE_PMCG:
+		pmcg = (struct acpi_iort_pmcg *)node->node_data;
+		if (pmcg->overflow_gsiv || node->mapping_count == 0)
+			return -EINVAL;
+
 		return 0;
 	default:
 		return -EINVAL;
@@ -456,7 +435,8 @@ static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node,
 	/* Parse the ID mapping tree to find specified node type */
 	while (node) {
 		struct acpi_iort_id_mapping *map;
-		int i, index;
+		int i, index, rc = 0;
+		u32 out_ref = 0, map_id = id;
 
 		if (IORT_TYPE_MASK(node->type) & type_mask) {
 			if (id_out)
@@ -490,15 +470,18 @@ static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node,
 			if (i == index)
 				continue;
 
-			if (!iort_id_map(map, node->type, id, &id))
+			rc = iort_id_map(map, node->type, map_id, &id, out_ref);
+			if (!rc)
 				break;
+			if (rc == -EAGAIN)
+				out_ref = map->output_reference;
 		}
 
-		if (i == node->mapping_count)
+		if (i == node->mapping_count && !out_ref)
 			goto fail_map;
 
 		node = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
-				    map->output_reference);
+				    rc ? out_ref : map->output_reference);
 	}
 
 fail_map:
@@ -789,15 +772,6 @@ void acpi_configure_pmsi_domain(struct device *dev)
 		dev_set_msi_domain(dev, msi_domain);
 }
 
-static int __maybe_unused __get_pci_rid(struct pci_dev *pdev, u16 alias,
-					void *data)
-{
-	u32 *rid = data;
-
-	*rid = alias;
-	return 0;
-}
-
 #ifdef CONFIG_IOMMU_API
 static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
 {
@@ -1148,13 +1122,10 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
 	else
 		size = 1ULL << 32;
 
-	if (dev_is_pci(dev)) {
-		ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
-		if (ret == -ENODEV)
-			ret = rc_dma_get_range(dev, &size);
-	} else {
-		ret = nc_dma_get_range(dev, &size);
-	}
+	ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
+	if (ret == -ENODEV)
+		ret = dev_is_pci(dev) ? rc_dma_get_range(dev, &size)
+				      : nc_dma_get_range(dev, &size);
 
 	if (!ret) {
 		/*
@@ -1692,6 +1663,10 @@ void __init acpi_iort_init(void)
 {
 	acpi_status status;
 
+	/* iort_table will be used at runtime after the iort init,
+	 * so we don't need to call acpi_put_table() to release
+	 * the IORT table mapping.
+	 */
 	status = acpi_get_table(ACPI_SIG_IORT, 0, &iort_table);
 	if (ACPI_FAILURE(status)) {
 		if (status != AE_NOT_FOUND) {
@@ -1703,6 +1678,5 @@ void __init acpi_iort_init(void)
 		return;
 	}
 
-	iort_check_id_count_workaround(iort_table);
 	iort_init_platform_devices();
 }
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 78cfc70cb320..3c35e57dd854 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -24,7 +24,6 @@
 #define PREFIX "ACPI: "
 
 #define ACPI_BUTTON_CLASS		"button"
-#define ACPI_BUTTON_FILE_INFO		"info"
 #define ACPI_BUTTON_FILE_STATE		"state"
 #define ACPI_BUTTON_TYPE_UNKNOWN	0x00
 #define ACPI_BUTTON_NOTIFY_STATUS	0x80
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 8b2e89c20c11..7a99b19bb893 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -350,7 +350,7 @@ static void cppc_chan_tx_done(struct mbox_client *cl, void *msg, int ret)
 				*(u16 *)msg, ret);
 }
 
-struct mbox_client cppc_mbox_cl = {
+static struct mbox_client cppc_mbox_cl = {
 	.tx_done = cppc_chan_tx_done,
 	.knows_txdone = true,
 };
@@ -597,7 +597,7 @@ bool __weak cpc_ffh_supported(void)
  *
  * Return: 0 for success, errno for failure
  */
-int pcc_data_alloc(int pcc_ss_id)
+static int pcc_data_alloc(int pcc_ss_id)
 {
 	if (pcc_ss_id < 0 || pcc_ss_id >= MAX_PCC_SUBSPACES)
 		return -EINVAL;
@@ -846,6 +846,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 			"acpi_cppc");
 	if (ret) {
 		per_cpu(cpc_desc_ptr, pr->id) = NULL;
+		kobject_put(&cpc_ptr->kobj);
 		goto out_free;
 	}
 
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 5832bc10aca8..b44b12a931e7 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1084,7 +1084,7 @@ int acpi_subsys_suspend_late(struct device *dev)
 {
 	int ret;
 
-	if (dev_pm_smart_suspend_and_suspended(dev))
+	if (dev_pm_skip_suspend(dev))
 		return 0;
 
 	ret = pm_generic_suspend_late(dev);
@@ -1100,10 +1100,8 @@ int acpi_subsys_suspend_noirq(struct device *dev)
 {
 	int ret;
 
-	if (dev_pm_smart_suspend_and_suspended(dev)) {
-		dev->power.may_skip_resume = true;
+	if (dev_pm_skip_suspend(dev))
 		return 0;
-	}
 
 	ret = pm_generic_suspend_noirq(dev);
 	if (ret)
@@ -1116,8 +1114,8 @@ int acpi_subsys_suspend_noirq(struct device *dev)
 	 * acpi_subsys_complete() to take care of fixing up the device's state
 	 * anyway, if need be.
 	 */
-	dev->power.may_skip_resume = device_may_wakeup(dev) ||
-					!device_can_wakeup(dev);
+	if (device_can_wakeup(dev) && !device_may_wakeup(dev))
+		dev->power.may_skip_resume = false;
 
 	return 0;
 }
@@ -1129,17 +1127,9 @@ EXPORT_SYMBOL_GPL(acpi_subsys_suspend_noirq);
  */
 static int acpi_subsys_resume_noirq(struct device *dev)
 {
-	if (dev_pm_may_skip_resume(dev))
+	if (dev_pm_skip_resume(dev))
 		return 0;
 
-	/*
-	 * Devices with DPM_FLAG_SMART_SUSPEND may be left in runtime suspend
-	 * during system suspend, so update their runtime PM status to "active"
-	 * as they will be put into D0 going forward.
-	 */
-	if (dev_pm_smart_suspend_and_suspended(dev))
-		pm_runtime_set_active(dev);
-
 	return pm_generic_resume_noirq(dev);
 }
 
@@ -1153,7 +1143,12 @@ static int acpi_subsys_resume_noirq(struct device *dev)
  */
 static int acpi_subsys_resume_early(struct device *dev)
 {
-	int ret = acpi_dev_resume(dev);
+	int ret;
+
+	if (dev_pm_skip_resume(dev))
+		return 0;
+
+	ret = acpi_dev_resume(dev);
 	return ret ? ret : pm_generic_resume_early(dev);
 }
 
@@ -1218,7 +1213,7 @@ static int acpi_subsys_poweroff_late(struct device *dev)
 {
 	int ret;
 
-	if (dev_pm_smart_suspend_and_suspended(dev))
+	if (dev_pm_skip_suspend(dev))
 		return 0;
 
 	ret = pm_generic_poweroff_late(dev);
@@ -1234,7 +1229,7 @@ static int acpi_subsys_poweroff_late(struct device *dev)
  */
 static int acpi_subsys_poweroff_noirq(struct device *dev)
 {
-	if (dev_pm_smart_suspend_and_suspended(dev))
+	if (dev_pm_skip_suspend(dev))
 		return 0;
 
 	return pm_generic_poweroff_noirq(dev);
diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c
index e4e8b75d39f0..5fab7e350db8 100644
--- a/drivers/acpi/dptf/dptf_power.c
+++ b/drivers/acpi/dptf/dptf_power.c
@@ -10,12 +10,19 @@
 #include <linux/platform_device.h>
 
 /*
- * Presentation of attributes which are defined for INT3407. They are:
+ * Presentation of attributes which are defined for INT3407 and INT3532.
+ * They are:
  * PMAX : Maximum platform powe
  * PSRC : Platform power source
  * ARTG : Adapter rating
  * CTYP : Charger type
  * PBSS : Battery steady power
+ * PROP : Rest of worst case platform Power
+ * PBSS : Power Battery Steady State
+ * PBSS : Power Battery Steady State
+ * RBHF : High Frequency Impedance
+ * VBNL : Instantaneous No-Load Voltage
+ * CMPP : Current Discharge Capability
  */
 #define DPTF_POWER_SHOW(name, object) \
 static ssize_t name##_show(struct device *dev,\
@@ -39,12 +46,42 @@ DPTF_POWER_SHOW(platform_power_source, PSRC)
 DPTF_POWER_SHOW(adapter_rating_mw, ARTG)
 DPTF_POWER_SHOW(battery_steady_power_mw, PBSS)
 DPTF_POWER_SHOW(charger_type, CTYP)
+DPTF_POWER_SHOW(rest_of_platform_power_mw, PROP)
+DPTF_POWER_SHOW(max_steady_state_power_mw, PBSS)
+DPTF_POWER_SHOW(high_freq_impedance_mohm, RBHF)
+DPTF_POWER_SHOW(no_load_voltage_mv, VBNL)
+DPTF_POWER_SHOW(current_discharge_capbility_ma, CMPP);
 
 static DEVICE_ATTR_RO(max_platform_power_mw);
 static DEVICE_ATTR_RO(platform_power_source);
 static DEVICE_ATTR_RO(adapter_rating_mw);
 static DEVICE_ATTR_RO(battery_steady_power_mw);
 static DEVICE_ATTR_RO(charger_type);
+static DEVICE_ATTR_RO(rest_of_platform_power_mw);
+static DEVICE_ATTR_RO(max_steady_state_power_mw);
+static DEVICE_ATTR_RO(high_freq_impedance_mohm);
+static DEVICE_ATTR_RO(no_load_voltage_mv);
+static DEVICE_ATTR_RO(current_discharge_capbility_ma);
+
+static ssize_t prochot_confirm_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct acpi_device *acpi_dev = dev_get_drvdata(dev);
+	acpi_status status;
+	int seq_no;
+
+	if (kstrtouint(buf, 0, &seq_no) < 0)
+		return -EINVAL;
+
+	status = acpi_execute_simple_method(acpi_dev->handle, "PBOK", seq_no);
+	if (ACPI_SUCCESS(status))
+		return count;
+
+	return -EINVAL;
+}
+
+static DEVICE_ATTR_WO(prochot_confirm);
 
 static struct attribute *dptf_power_attrs[] = {
 	&dev_attr_max_platform_power_mw.attr,
@@ -52,6 +89,8 @@ static struct attribute *dptf_power_attrs[] = {
 	&dev_attr_adapter_rating_mw.attr,
 	&dev_attr_battery_steady_power_mw.attr,
 	&dev_attr_charger_type.attr,
+	&dev_attr_rest_of_platform_power_mw.attr,
+	&dev_attr_prochot_confirm.attr,
 	NULL
 };
 
@@ -60,10 +99,79 @@ static const struct attribute_group dptf_power_attribute_group = {
 	.name = "dptf_power"
 };
 
+static struct attribute *dptf_battery_attrs[] = {
+	&dev_attr_max_platform_power_mw.attr,
+	&dev_attr_max_steady_state_power_mw.attr,
+	&dev_attr_high_freq_impedance_mohm.attr,
+	&dev_attr_no_load_voltage_mv.attr,
+	&dev_attr_current_discharge_capbility_ma.attr,
+	NULL
+};
+
+static const struct attribute_group dptf_battery_attribute_group = {
+	.attrs = dptf_battery_attrs,
+	.name = "dptf_battery"
+};
+
+#define MAX_POWER_CHANGED		0x80
+#define POWER_STATE_CHANGED		0x81
+#define STEADY_STATE_POWER_CHANGED	0x83
+#define POWER_PROP_CHANGE_EVENT	0x84
+#define IMPEDANCED_CHNGED		0x85
+#define VOLTAGE_CURRENT_CHANGED	0x86
+
+static long long dptf_participant_type(acpi_handle handle)
+{
+	unsigned long long ptype;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(handle, "PTYP", NULL, &ptype);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	return ptype;
+}
+
+static void dptf_power_notify(acpi_handle handle, u32 event, void *data)
+{
+	struct platform_device *pdev = data;
+	char *attr;
+
+	switch (event) {
+	case POWER_STATE_CHANGED:
+		attr = "platform_power_source";
+		break;
+	case POWER_PROP_CHANGE_EVENT:
+		attr = "rest_of_platform_power_mw";
+		break;
+	case MAX_POWER_CHANGED:
+		attr = "max_platform_power_mw";
+		break;
+	case STEADY_STATE_POWER_CHANGED:
+		attr = "max_steady_state_power_mw";
+		break;
+	case VOLTAGE_CURRENT_CHANGED:
+		attr = "no_load_voltage_mv";
+		break;
+	default:
+		dev_err(&pdev->dev, "Unsupported event [0x%x]\n", event);
+		return;
+	}
+
+	/*
+	 * Notify that an attribute is changed, so that user space can read
+	 * again.
+	 */
+	if (dptf_participant_type(handle) == 0x0CULL)
+		sysfs_notify(&pdev->dev.kobj, "dptf_battery", attr);
+	else
+		sysfs_notify(&pdev->dev.kobj, "dptf_power", attr);
+}
+
 static int dptf_power_add(struct platform_device *pdev)
 {
+	const struct attribute_group *attr_group;
 	struct acpi_device *acpi_dev;
-	acpi_status status;
 	unsigned long long ptype;
 	int result;
 
@@ -71,17 +179,29 @@ static int dptf_power_add(struct platform_device *pdev)
 	if (!acpi_dev)
 		return -ENODEV;
 
-	status = acpi_evaluate_integer(acpi_dev->handle, "PTYP", NULL, &ptype);
-	if (ACPI_FAILURE(status))
+	ptype = dptf_participant_type(acpi_dev->handle);
+	if (ptype == 0x11)
+		attr_group = &dptf_power_attribute_group;
+	else if (ptype == 0x0C)
+		attr_group = &dptf_battery_attribute_group;
+	else
 		return -ENODEV;
 
-	if (ptype != 0x11)
-		return -ENODEV;
+	result = acpi_install_notify_handler(acpi_dev->handle,
+					     ACPI_DEVICE_NOTIFY,
+					     dptf_power_notify,
+					     (void *)pdev);
+	if (result)
+		return result;
 
 	result = sysfs_create_group(&pdev->dev.kobj,
-				    &dptf_power_attribute_group);
-	if (result)
+				    attr_group);
+	if (result) {
+		acpi_remove_notify_handler(acpi_dev->handle,
+					   ACPI_DEVICE_NOTIFY,
+					   dptf_power_notify);
 		return result;
+	}
 
 	platform_set_drvdata(pdev, acpi_dev);
 
@@ -90,14 +210,23 @@ static int dptf_power_add(struct platform_device *pdev)
 
 static int dptf_power_remove(struct platform_device *pdev)
 {
+	struct acpi_device *acpi_dev = platform_get_drvdata(pdev);
+
+	acpi_remove_notify_handler(acpi_dev->handle,
+				   ACPI_DEVICE_NOTIFY,
+				   dptf_power_notify);
 
-	sysfs_remove_group(&pdev->dev.kobj, &dptf_power_attribute_group);
+	if (dptf_participant_type(acpi_dev->handle) == 0x0CULL)
+		sysfs_remove_group(&pdev->dev.kobj, &dptf_battery_attribute_group);
+	else
+		sysfs_remove_group(&pdev->dev.kobj, &dptf_power_attribute_group);
 
 	return 0;
 }
 
 static const struct acpi_device_id int3407_device_ids[] = {
 	{"INT3407", 0},
+	{"INT3532", 0},
 	{"INTC1047", 0},
 	{"", 0},
 };
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 1af2125e17d5..04ce2b96c3da 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -34,7 +34,6 @@
 
 #define ACPI_EC_CLASS			"embedded_controller"
 #define ACPI_EC_DEVICE_NAME		"Embedded Controller"
-#define ACPI_EC_FILE_INFO		"info"
 
 /* EC status register */
 #define ACPI_EC_FLAG_OBF	0x01	/* Output buffer full */
@@ -1783,13 +1782,14 @@ static void __init acpi_ec_ecdt_start(void)
 		return;
 
 	status = acpi_get_handle(NULL, ecdt_ptr->id, &handle);
-	if (ACPI_FAILURE(status))
-		return;
+	if (ACPI_SUCCESS(status)) {
+		boot_ec->handle = handle;
 
-	boot_ec->handle = handle;
+		/* Add a special ACPI device object to represent the boot EC. */
+		acpi_bus_register_early_device(ACPI_BUS_TYPE_ECDT_EC);
+	}
 
-	/* Add a special ACPI device object to represent the boot EC. */
-	acpi_bus_register_early_device(ACPI_BUS_TYPE_ECDT_EC);
+	acpi_put_table((struct acpi_table_header *)ecdt_ptr);
 }
 
 /*
@@ -1891,12 +1891,12 @@ void __init acpi_ec_ecdt_probe(void)
 		 * Asus X50GL:
 		 * https://bugzilla.kernel.org/show_bug.cgi?id=11880
 		 */
-		return;
+		goto out;
 	}
 
 	ec = acpi_ec_alloc();
 	if (!ec)
-		return;
+		goto out;
 
 	if (EC_FLAGS_CORRECT_ECDT) {
 		ec->command_addr = ecdt_ptr->data.address;
@@ -1922,13 +1922,16 @@ void __init acpi_ec_ecdt_probe(void)
 	ret = acpi_ec_setup(ec, NULL);
 	if (ret) {
 		acpi_ec_free(ec);
-		return;
+		goto out;
 	}
 
 	boot_ec = ec;
 	boot_ec_is_ecdt = true;
 
 	pr_info("Boot ECDT EC used to handle transactions\n");
+
+out:
+	acpi_put_table((struct acpi_table_header *)ecdt_ptr);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -2017,7 +2020,7 @@ bool acpi_ec_dispatch_gpe(void)
 	 */
 	ret = acpi_dispatch_gpe(NULL, first_ec->gpe);
 	if (ret == ACPI_INTERRUPT_HANDLED) {
-		pm_pr_dbg("EC GPE dispatched\n");
+		pm_pr_dbg("ACPI EC GPE dispatched\n");
 
 		/* Flush the event and query workqueues. */
 		acpi_ec_flush_work();
diff --git a/drivers/acpi/evged.c b/drivers/acpi/evged.c
index aba0d0027586..ccd900690b6f 100644
--- a/drivers/acpi/evged.c
+++ b/drivers/acpi/evged.c
@@ -79,6 +79,8 @@ static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares,
 	struct resource r;
 	struct acpi_resource_irq *p = &ares->data.irq;
 	struct acpi_resource_extended_irq *pext = &ares->data.extended_irq;
+	char ev_name[5];
+	u8 trigger;
 
 	if (ares->type == ACPI_RESOURCE_TYPE_END_TAG)
 		return AE_OK;
@@ -87,14 +89,28 @@ static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares,
 		dev_err(dev, "unable to parse IRQ resource\n");
 		return AE_ERROR;
 	}
-	if (ares->type == ACPI_RESOURCE_TYPE_IRQ)
+	if (ares->type == ACPI_RESOURCE_TYPE_IRQ) {
 		gsi = p->interrupts[0];
-	else
+		trigger = p->triggering;
+	} else {
 		gsi = pext->interrupts[0];
+		trigger = pext->triggering;
+	}
 
 	irq = r.start;
 
-	if (ACPI_FAILURE(acpi_get_handle(handle, "_EVT", &evt_handle))) {
+	switch (gsi) {
+	case 0 ... 255:
+		sprintf(ev_name, "_%c%02hhX",
+			trigger == ACPI_EDGE_SENSITIVE ? 'E' : 'L', gsi);
+
+		if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle)))
+			break;
+		/* fall through */
+	default:
+		if (ACPI_SUCCESS(acpi_get_handle(handle, "_EVT", &evt_handle)))
+			break;
+
 		dev_err(dev, "cannot locate _EVT method\n");
 		return AE_ERROR;
 	}
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index ed3d2182cf2c..606da5d77ad3 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -31,8 +31,6 @@
 ACPI_MODULE_NAME("pci_link");
 #define ACPI_PCI_LINK_CLASS		"pci_irq_routing"
 #define ACPI_PCI_LINK_DEVICE_NAME	"PCI Interrupt Link"
-#define ACPI_PCI_LINK_FILE_INFO		"info"
-#define ACPI_PCI_LINK_FILE_STATUS	"state"
 #define ACPI_PCI_LINK_MAX_POSSIBLE	16
 
 static int acpi_pci_link_add(struct acpi_device *device,
diff --git a/drivers/acpi/pmic/intel_pmic_chtdc_ti.c b/drivers/acpi/pmic/intel_pmic_chtdc_ti.c
index 7ccd7d9660bc..a5101b07611a 100644
--- a/drivers/acpi/pmic/intel_pmic_chtdc_ti.c
+++ b/drivers/acpi/pmic/intel_pmic_chtdc_ti.c
@@ -102,6 +102,7 @@ static struct intel_pmic_opregion_data chtdc_ti_pmic_opregion_data = {
 	.power_table_count = ARRAY_SIZE(chtdc_ti_power_table),
 	.thermal_table = chtdc_ti_thermal_table,
 	.thermal_table_count = ARRAY_SIZE(chtdc_ti_thermal_table),
+	.pmic_i2c_address = 0x5e,
 };
 
 static int chtdc_ti_pmic_opregion_probe(struct platform_device *pdev)
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index fe1e7bc91a5e..837b875d075e 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -36,8 +36,6 @@
 ACPI_MODULE_NAME("power");
 #define ACPI_POWER_CLASS		"power_resource"
 #define ACPI_POWER_DEVICE_NAME		"Power Resource"
-#define ACPI_POWER_FILE_INFO		"info"
-#define ACPI_POWER_FILE_STATUS		"state"
 #define ACPI_POWER_RESOURCE_STATE_OFF	0x00
 #define ACPI_POWER_RESOURCE_STATE_ON	0x01
 #define ACPI_POWER_RESOURCE_STATE_UNKNOWN 0xFF
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index dcc289e30166..75534c5b5433 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -308,11 +308,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
 	if (ret)
 		return ret;
 
-	/*
-	 * It is expected that there will be at least 2 states, C1 and
-	 * something else (C2 or C3), so fail if that is not the case.
-	 */
-	if (pr->power.count < 2)
+	if (!pr->power.count)
 		return -EFAULT;
 
 	pr->flags.has_cst = 1;
@@ -468,8 +464,7 @@ static int acpi_processor_get_cstate_info(struct acpi_processor *pr)
 	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
 		if (pr->power.states[i].valid) {
 			pr->power.count = i;
-			if (pr->power.states[i].type >= ACPI_STATE_C2)
-				pr->flags.power = 1;
+			pr->flags.power = 1;
 		}
 	}
 
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 6e88224f60f0..f158b8c30113 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -28,9 +28,6 @@
 #define ACPI_SBS_CLASS			"sbs"
 #define ACPI_AC_CLASS			"ac_adapter"
 #define ACPI_SBS_DEVICE_NAME		"Smart Battery System"
-#define ACPI_SBS_FILE_INFO		"info"
-#define ACPI_SBS_FILE_STATE		"state"
-#define ACPI_SBS_FILE_ALARM		"alarm"
 #define ACPI_BATTERY_DIR_NAME		"BAT%i"
 #define ACPI_AC_DIR_NAME		"AC0"
 
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 6d3448895382..5287ab98b8c1 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2157,10 +2157,13 @@ static void __init acpi_get_spcr_uart_addr(void)
 
 	status = acpi_get_table(ACPI_SIG_SPCR, 0,
 				(struct acpi_table_header **)&spcr_ptr);
-	if (ACPI_SUCCESS(status))
-		spcr_uart_addr = spcr_ptr->serial_port.address;
-	else
-		printk(KERN_WARNING PREFIX "STAO table present, but SPCR is missing\n");
+	if (ACPI_FAILURE(status)) {
+		pr_warn(PREFIX "STAO table present, but SPCR is missing\n");
+		return;
+	}
+
+	spcr_uart_addr = spcr_ptr->serial_port.address;
+	acpi_put_table((struct acpi_table_header *)spcr_ptr);
 }
 
 static bool acpi_scan_initialized;
@@ -2196,10 +2199,12 @@ int __init acpi_scan_init(void)
 				(struct acpi_table_header **)&stao_ptr);
 	if (ACPI_SUCCESS(status)) {
 		if (stao_ptr->header.length > sizeof(struct acpi_table_stao))
-			printk(KERN_INFO PREFIX "STAO Name List not yet supported.");
+			pr_info(PREFIX "STAO Name List not yet supported.\n");
 
 		if (stao_ptr->ignore_uart)
 			acpi_get_spcr_uart_addr();
+
+		acpi_put_table((struct acpi_table_header *)stao_ptr);
 	}
 
 	acpi_gpe_apply_masked_gpes();
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index fd9d4e8318e9..aff13bf4d947 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -992,23 +992,31 @@ static bool acpi_s2idle_wake(void)
 		 * wakeup is pending anyway and the SCI is not the source of
 		 * it).
 		 */
-		if (irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
+		if (irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) {
+			pm_pr_dbg("Wakeup unrelated to ACPI SCI\n");
 			return true;
+		}
 
 		/*
 		 * If the status bit of any enabled fixed event is set, the
 		 * wakeup is regarded as valid.
 		 */
-		if (acpi_any_fixed_event_status_set())
+		if (acpi_any_fixed_event_status_set()) {
+			pm_pr_dbg("ACPI fixed event wakeup\n");
 			return true;
+		}
 
 		/* Check wakeups from drivers sharing the SCI. */
-		if (acpi_check_wakeup_handlers())
+		if (acpi_check_wakeup_handlers()) {
+			pm_pr_dbg("ACPI custom handler wakeup\n");
 			return true;
+		}
 
 		/* Check non-EC GPE wakeups and dispatch the EC GPE. */
-		if (acpi_ec_dispatch_gpe())
+		if (acpi_ec_dispatch_gpe()) {
+			pm_pr_dbg("ACPI non-EC GPE wakeup\n");
 			return true;
+		}
 
 		/*
 		 * Cancel the SCI wakeup and process all pending events in case
@@ -1027,8 +1035,10 @@ static bool acpi_s2idle_wake(void)
 		 * are pending here, they must be resulting from the processing
 		 * of EC events above or coming from somewhere else.
 		 */
-		if (pm_wakeup_pending())
+		if (pm_wakeup_pending()) {
+			pm_pr_dbg("Wakeup after ACPI Notify sync\n");
 			return true;
+		}
 
 		rearm_wake_irq(acpi_sci_irq);
 	}
@@ -1280,8 +1290,10 @@ static void acpi_sleep_hibernate_setup(void)
 		return;
 
 	acpi_get_table(ACPI_SIG_FACS, 1, (struct acpi_table_header **)&facs);
-	if (facs)
+	if (facs) {
 		s4_hardware_signature = facs->hardware_signature;
+		acpi_put_table((struct acpi_table_header *)facs);
+	}
 }
 #else /* !CONFIG_HIBERNATION */
 static inline void acpi_sleep_hibernate_setup(void) {}
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index c60d2c6d31d6..3a89909b50a6 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -993,8 +993,10 @@ void acpi_sysfs_add_hotplug_profile(struct acpi_hotplug_profile *hotplug,
 
 	error = kobject_init_and_add(&hotplug->kobj,
 		&acpi_hotplug_profile_ktype, hotplug_kobj, "%s", name);
-	if (error)
+	if (error) {
+		kobject_put(&hotplug->kobj);
 		goto err_out;
+	}
 
 	kobject_uevent(&hotplug->kobj, KOBJ_ADD);
 	return;
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 804ac0df58ec..838b719ec7ce 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -606,6 +606,31 @@ acpi_status acpi_evaluate_lck(acpi_handle handle, int lock)
 }
 
 /**
+ * acpi_evaluate_reg: Evaluate _REG method to register OpRegion presence
+ * @handle: ACPI device handle
+ * @space_id: ACPI address space id to register OpRegion presence for
+ * @function: Parameter to pass to _REG one of ACPI_REG_CONNECT or
+ *            ACPI_REG_DISCONNECT
+ *
+ * Evaluate device's _REG method to register OpRegion presence.
+ */
+acpi_status acpi_evaluate_reg(acpi_handle handle, u8 space_id, u32 function)
+{
+	struct acpi_object_list arg_list;
+	union acpi_object params[2];
+
+	params[0].type = ACPI_TYPE_INTEGER;
+	params[0].integer.value = space_id;
+	params[1].type = ACPI_TYPE_INTEGER;
+	params[1].integer.value = function;
+	arg_list.count = 2;
+	arg_list.pointer = params;
+
+	return acpi_evaluate_object(handle, "_REG", &arg_list, NULL);
+}
+EXPORT_SYMBOL(acpi_evaluate_reg);
+
+/**
  * acpi_evaluate_dsm - evaluate device's _DSM method
  * @handle: ACPI device handle
  * @guid: GUID of requested functions, should be 16 bytes
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index b4994e50608d..2499d7e3c710 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -361,6 +361,16 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_BOARD_NAME, "JV50"),
 		},
 	},
+	{
+	 /* https://bugzilla.kernel.org/show_bug.cgi?id=207835 */
+	 .callback = video_detect_force_native,
+	 .ident = "Acer TravelMate 5735Z",
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 5735Z"),
+		DMI_MATCH(DMI_BOARD_NAME, "BA51_MV"),
+		},
+	},
 
 	/*
 	 * Desktops which falsely report a backlight and which our heuristics
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index beca5f91bb4c..69361ec43db5 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5209,7 +5209,7 @@ void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp)
  *	sata_link_init_spd - Initialize link->sata_spd_limit
  *	@link: Link to configure sata_spd_limit for
  *
- *	Initialize @link->[hw_]sata_spd_limit to the currently
+ *	Initialize ``link->[hw_]sata_spd_limit`` to the currently
  *	configured value.
  *
  *	LOCKING:
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 36e588d88b95..435781a16875 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -649,7 +649,7 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
 {
 	struct scsi_cmnd *scmd = qc->scsicmd;
 
-	qc->extrabytes = scmd->request->extra_len;
+	qc->extrabytes = scmd->extra_len;
 	qc->nbytes = scsi_bufflen(scmd) + qc->extrabytes;
 }
 
@@ -1017,16 +1017,11 @@ void ata_scsi_sdev_config(struct scsi_device *sdev)
  *	RETURNS:
  *	1 if ; otherwise, 0.
  */
-static int atapi_drain_needed(struct request *rq)
+bool ata_scsi_dma_need_drain(struct request *rq)
 {
-	if (likely(!blk_rq_is_passthrough(rq)))
-		return 0;
-
-	if (!blk_rq_bytes(rq) || op_is_write(req_op(rq)))
-		return 0;
-
 	return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC;
 }
+EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain);
 
 int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
 {
@@ -1039,21 +1034,21 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
 	blk_queue_max_hw_sectors(q, dev->max_sectors);
 
 	if (dev->class == ATA_DEV_ATAPI) {
-		void *buf;
-
 		sdev->sector_size = ATA_SECT_SIZE;
 
 		/* set DMA padding */
 		blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
 
-		/* configure draining */
-		buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
-		if (!buf) {
+		/* make room for appending the drain */
+		blk_queue_max_segments(q, queue_max_segments(q) - 1);
+
+		sdev->dma_drain_len = ATAPI_MAX_DRAIN;
+		sdev->dma_drain_buf = kmalloc(sdev->dma_drain_len,
+				q->bounce_gfp | GFP_KERNEL);
+		if (!sdev->dma_drain_buf) {
 			ata_dev_err(dev, "drain buffer allocation failed\n");
 			return -ENOMEM;
 		}
-
-		blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN);
 	} else {
 		sdev->sector_size = ata_id_logical_sector_size(dev->id);
 		sdev->manage_start_stop = 1;
@@ -1135,7 +1130,6 @@ EXPORT_SYMBOL_GPL(ata_scsi_slave_config);
 void ata_scsi_slave_destroy(struct scsi_device *sdev)
 {
 	struct ata_port *ap = ata_shost_to_port(sdev->host);
-	struct request_queue *q = sdev->request_queue;
 	unsigned long flags;
 	struct ata_device *dev;
 
@@ -1152,9 +1146,7 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev)
 	}
 	spin_unlock_irqrestore(ap->lock, flags);
 
-	kfree(q->dma_drain_buffer);
-	q->dma_drain_buffer = NULL;
-	q->dma_drain_size = 0;
+	kfree(sdev->dma_drain_buf);
 }
 EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy);
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 0cad34f1eede..de808c5a187b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1393,7 +1393,7 @@ static void device_release(struct kobject *kobj)
 	else if (dev->class && dev->class->dev_release)
 		dev->class->dev_release(dev);
 	else
-		WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt.\n",
+		WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
 			dev_name(dev));
 	kfree(p);
 }
@@ -3213,40 +3213,6 @@ error:
 }
 
 /**
- * device_create_vargs - creates a device and registers it with sysfs
- * @class: pointer to the struct class that this device should be registered to
- * @parent: pointer to the parent struct device of this new device, if any
- * @devt: the dev_t for the char device to be added
- * @drvdata: the data to be added to the device for callbacks
- * @fmt: string for the device's name
- * @args: va_list for the device's name
- *
- * This function can be used by char device classes.  A struct device
- * will be created in sysfs, registered to the specified class.
- *
- * A "dev" file will be created, showing the dev_t for the device, if
- * the dev_t is not 0,0.
- * If a pointer to a parent struct device is passed in, the newly created
- * struct device will be a child of that device in sysfs.
- * The pointer to the struct device will be returned from the call.
- * Any further sysfs files that might be required can be created using this
- * pointer.
- *
- * Returns &struct device pointer on success, or ERR_PTR() on error.
- *
- * Note: the struct class passed to this function must have previously
- * been created with a call to class_create().
- */
-struct device *device_create_vargs(struct class *class, struct device *parent,
-				   dev_t devt, void *drvdata, const char *fmt,
-				   va_list args)
-{
-	return device_create_groups_vargs(class, parent, devt, drvdata, NULL,
-					  fmt, args);
-}
-EXPORT_SYMBOL_GPL(device_create_vargs);
-
-/**
  * device_create - creates a device and registers it with sysfs
  * @class: pointer to the struct class that this device should be registered to
  * @parent: pointer to the parent struct device of this new device, if any
@@ -3277,7 +3243,8 @@ struct device *device_create(struct class *class, struct device *parent,
 	struct device *dev;
 
 	va_start(vargs, fmt);
-	dev = device_create_vargs(class, parent, devt, drvdata, fmt, vargs);
+	dev = device_create_groups_vargs(class, parent, devt, drvdata, NULL,
+					  fmt, vargs);
 	va_end(vargs);
 	return dev;
 }
@@ -3915,6 +3882,7 @@ void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
 	else
 		dev->fwnode = fwnode;
 }
+EXPORT_SYMBOL_GPL(set_secondary_fwnode);
 
 /**
  * device_set_of_node_from_dev - reuse device-tree node of another device
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 10d7e818e118..5b02f69769e8 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
 		       "Node %d AnonPages:      %8lu kB\n"
 		       "Node %d Shmem:          %8lu kB\n"
 		       "Node %d KernelStack:    %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+		       "Node %d ShadowCallStack:%8lu kB\n"
+#endif
 		       "Node %d PageTables:     %8lu kB\n"
 		       "Node %d NFS_Unstable:   %8lu kB\n"
 		       "Node %d Bounce:         %8lu kB\n"
@@ -438,8 +441,11 @@ static ssize_t node_read_meminfo(struct device *dev,
 		       nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
 		       nid, K(i.sharedram),
 		       nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+		       nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
+#endif
 		       nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
-		       nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
+		       nid, 0UL,
 		       nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
 		       nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
 		       nid, K(sreclaimable +
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 8da314b81eab..c4a17e5edf8b 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -387,7 +387,7 @@ void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
  *
  * @domain:	The platform-msi domain
  * @virq:	The base irq from which to perform the allocate operation
- * @nvec:	How many interrupts to free from @virq
+ * @nr_irqs:	How many interrupts to free from @virq
  *
  * Return 0 on success, or an error code on failure. Must be called
  * with irq_domain_mutex held (which can only be done as part of a
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index b27d0f6c18c9..615c6b06b427 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -147,7 +147,8 @@ EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource_byname);
  * request_irq() APIs. This is the same as platform_get_irq(), except that it
  * does not print an error message if an IRQ can not be obtained.
  *
- * Example:
+ * For example::
+ *
  *		int irq = platform_get_irq_optional(pdev, 0);
  *		if (irq < 0)
  *			return irq;
@@ -226,7 +227,8 @@ EXPORT_SYMBOL_GPL(platform_get_irq_optional);
  * IRQ fails. Device drivers should check the return value for errors so as to
  * not pass a negative integer value to the request_irq() APIs.
  *
- * Example:
+ * For example::
+ *
  *		int irq = platform_get_irq(pdev, 0);
  *		if (irq < 0)
  *			return irq;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 0e07e17c2def..bb98b813554f 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -562,72 +562,26 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd)
 /*------------------------- Resume routines -------------------------*/
 
 /**
- * suspend_event - Return a "suspend" message for given "resume" one.
- * @resume_msg: PM message representing a system-wide resume transition.
- */
-static pm_message_t suspend_event(pm_message_t resume_msg)
-{
-	switch (resume_msg.event) {
-	case PM_EVENT_RESUME:
-		return PMSG_SUSPEND;
-	case PM_EVENT_THAW:
-	case PM_EVENT_RESTORE:
-		return PMSG_FREEZE;
-	case PM_EVENT_RECOVER:
-		return PMSG_HIBERNATE;
-	}
-	return PMSG_ON;
-}
-
-/**
- * dev_pm_may_skip_resume - System-wide device resume optimization check.
+ * dev_pm_skip_resume - System-wide device resume optimization check.
  * @dev: Target device.
  *
- * Checks whether or not the device may be left in suspend after a system-wide
- * transition to the working state.
+ * Return:
+ * - %false if the transition under way is RESTORE.
+ * - Return value of dev_pm_skip_suspend() if the transition under way is THAW.
+ * - The logical negation of %power.must_resume otherwise (that is, when the
+ *   transition under way is RESUME).
  */
-bool dev_pm_may_skip_resume(struct device *dev)
+bool dev_pm_skip_resume(struct device *dev)
 {
-	return !dev->power.must_resume && pm_transition.event != PM_EVENT_RESTORE;
-}
-
-static pm_callback_t dpm_subsys_resume_noirq_cb(struct device *dev,
-						pm_message_t state,
-						const char **info_p)
-{
-	pm_callback_t callback;
-	const char *info;
-
-	if (dev->pm_domain) {
-		info = "noirq power domain ";
-		callback = pm_noirq_op(&dev->pm_domain->ops, state);
-	} else if (dev->type && dev->type->pm) {
-		info = "noirq type ";
-		callback = pm_noirq_op(dev->type->pm, state);
-	} else if (dev->class && dev->class->pm) {
-		info = "noirq class ";
-		callback = pm_noirq_op(dev->class->pm, state);
-	} else if (dev->bus && dev->bus->pm) {
-		info = "noirq bus ";
-		callback = pm_noirq_op(dev->bus->pm, state);
-	} else {
-		return NULL;
-	}
+	if (pm_transition.event == PM_EVENT_RESTORE)
+		return false;
 
-	if (info_p)
-		*info_p = info;
+	if (pm_transition.event == PM_EVENT_THAW)
+		return dev_pm_skip_suspend(dev);
 
-	return callback;
+	return !dev->power.must_resume;
 }
 
-static pm_callback_t dpm_subsys_suspend_noirq_cb(struct device *dev,
-						 pm_message_t state,
-						 const char **info_p);
-
-static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev,
-						pm_message_t state,
-						const char **info_p);
-
 /**
  * device_resume_noirq - Execute a "noirq resume" callback for given device.
  * @dev: Device to handle.
@@ -639,8 +593,8 @@ static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev,
  */
 static int device_resume_noirq(struct device *dev, pm_message_t state, bool async)
 {
-	pm_callback_t callback;
-	const char *info;
+	pm_callback_t callback = NULL;
+	const char *info = NULL;
 	bool skip_resume;
 	int error = 0;
 
@@ -656,37 +610,41 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn
 	if (!dpm_wait_for_superior(dev, async))
 		goto Out;
 
-	skip_resume = dev_pm_may_skip_resume(dev);
+	skip_resume = dev_pm_skip_resume(dev);
+	/*
+	 * If the driver callback is skipped below or by the middle layer
+	 * callback and device_resume_early() also skips the driver callback for
+	 * this device later, it needs to appear as "suspended" to PM-runtime,
+	 * so change its status accordingly.
+	 *
+	 * Otherwise, the device is going to be resumed, so set its PM-runtime
+	 * status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set
+	 * to avoid confusing drivers that don't use it.
+	 */
+	if (skip_resume)
+		pm_runtime_set_suspended(dev);
+	else if (dev_pm_skip_suspend(dev))
+		pm_runtime_set_active(dev);
 
-	callback = dpm_subsys_resume_noirq_cb(dev, state, &info);
+	if (dev->pm_domain) {
+		info = "noirq power domain ";
+		callback = pm_noirq_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "noirq type ";
+		callback = pm_noirq_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "noirq class ";
+		callback = pm_noirq_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "noirq bus ";
+		callback = pm_noirq_op(dev->bus->pm, state);
+	}
 	if (callback)
 		goto Run;
 
 	if (skip_resume)
 		goto Skip;
 
-	if (dev_pm_smart_suspend_and_suspended(dev)) {
-		pm_message_t suspend_msg = suspend_event(state);
-
-		/*
-		 * If "freeze" callbacks have been skipped during a transition
-		 * related to hibernation, the subsequent "thaw" callbacks must
-		 * be skipped too or bad things may happen.  Otherwise, resume
-		 * callbacks are going to be run for the device, so its runtime
-		 * PM status must be changed to reflect the new state after the
-		 * transition under way.
-		 */
-		if (!dpm_subsys_suspend_late_cb(dev, suspend_msg, NULL) &&
-		    !dpm_subsys_suspend_noirq_cb(dev, suspend_msg, NULL)) {
-			if (state.event == PM_EVENT_THAW) {
-				skip_resume = true;
-				goto Skip;
-			} else {
-				pm_runtime_set_active(dev);
-			}
-		}
-	}
-
 	if (dev->driver && dev->driver->pm) {
 		info = "noirq driver ";
 		callback = pm_noirq_op(dev->driver->pm, state);
@@ -698,20 +656,6 @@ Run:
 Skip:
 	dev->power.is_noirq_suspended = false;
 
-	if (skip_resume) {
-		/* Make the next phases of resume skip the device. */
-		dev->power.is_late_suspended = false;
-		dev->power.is_suspended = false;
-		/*
-		 * The device is going to be left in suspend, but it might not
-		 * have been in runtime suspend before the system suspended, so
-		 * its runtime PM status needs to be updated to avoid confusing
-		 * the runtime PM framework when runtime PM is enabled for the
-		 * device again.
-		 */
-		pm_runtime_set_suspended(dev);
-	}
-
 Out:
 	complete_all(&dev->power.completion);
 	TRACE_RESUME(error);
@@ -810,35 +754,6 @@ void dpm_resume_noirq(pm_message_t state)
 	cpuidle_resume();
 }
 
-static pm_callback_t dpm_subsys_resume_early_cb(struct device *dev,
-						pm_message_t state,
-						const char **info_p)
-{
-	pm_callback_t callback;
-	const char *info;
-
-	if (dev->pm_domain) {
-		info = "early power domain ";
-		callback = pm_late_early_op(&dev->pm_domain->ops, state);
-	} else if (dev->type && dev->type->pm) {
-		info = "early type ";
-		callback = pm_late_early_op(dev->type->pm, state);
-	} else if (dev->class && dev->class->pm) {
-		info = "early class ";
-		callback = pm_late_early_op(dev->class->pm, state);
-	} else if (dev->bus && dev->bus->pm) {
-		info = "early bus ";
-		callback = pm_late_early_op(dev->bus->pm, state);
-	} else {
-		return NULL;
-	}
-
-	if (info_p)
-		*info_p = info;
-
-	return callback;
-}
-
 /**
  * device_resume_early - Execute an "early resume" callback for given device.
  * @dev: Device to handle.
@@ -849,8 +764,8 @@ static pm_callback_t dpm_subsys_resume_early_cb(struct device *dev,
  */
 static int device_resume_early(struct device *dev, pm_message_t state, bool async)
 {
-	pm_callback_t callback;
-	const char *info;
+	pm_callback_t callback = NULL;
+	const char *info = NULL;
 	int error = 0;
 
 	TRACE_DEVICE(dev);
@@ -865,17 +780,37 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn
 	if (!dpm_wait_for_superior(dev, async))
 		goto Out;
 
-	callback = dpm_subsys_resume_early_cb(dev, state, &info);
+	if (dev->pm_domain) {
+		info = "early power domain ";
+		callback = pm_late_early_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "early type ";
+		callback = pm_late_early_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "early class ";
+		callback = pm_late_early_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "early bus ";
+		callback = pm_late_early_op(dev->bus->pm, state);
+	}
+	if (callback)
+		goto Run;
+
+	if (dev_pm_skip_resume(dev))
+		goto Skip;
 
-	if (!callback && dev->driver && dev->driver->pm) {
+	if (dev->driver && dev->driver->pm) {
 		info = "early driver ";
 		callback = pm_late_early_op(dev->driver->pm, state);
 	}
 
+Run:
 	error = dpm_run_callback(callback, dev, state, info);
+
+Skip:
 	dev->power.is_late_suspended = false;
 
- Out:
+Out:
 	TRACE_RESUME(error);
 
 	pm_runtime_enable(dev);
@@ -1245,61 +1180,6 @@ static void dpm_superior_set_must_resume(struct device *dev)
 	device_links_read_unlock(idx);
 }
 
-static pm_callback_t dpm_subsys_suspend_noirq_cb(struct device *dev,
-						 pm_message_t state,
-						 const char **info_p)
-{
-	pm_callback_t callback;
-	const char *info;
-
-	if (dev->pm_domain) {
-		info = "noirq power domain ";
-		callback = pm_noirq_op(&dev->pm_domain->ops, state);
-	} else if (dev->type && dev->type->pm) {
-		info = "noirq type ";
-		callback = pm_noirq_op(dev->type->pm, state);
-	} else if (dev->class && dev->class->pm) {
-		info = "noirq class ";
-		callback = pm_noirq_op(dev->class->pm, state);
-	} else if (dev->bus && dev->bus->pm) {
-		info = "noirq bus ";
-		callback = pm_noirq_op(dev->bus->pm, state);
-	} else {
-		return NULL;
-	}
-
-	if (info_p)
-		*info_p = info;
-
-	return callback;
-}
-
-static bool device_must_resume(struct device *dev, pm_message_t state,
-			       bool no_subsys_suspend_noirq)
-{
-	pm_message_t resume_msg = resume_event(state);
-
-	/*
-	 * If all of the device driver's "noirq", "late" and "early" callbacks
-	 * are invoked directly by the core, the decision to allow the device to
-	 * stay in suspend can be based on its current runtime PM status and its
-	 * wakeup settings.
-	 */
-	if (no_subsys_suspend_noirq &&
-	    !dpm_subsys_suspend_late_cb(dev, state, NULL) &&
-	    !dpm_subsys_resume_early_cb(dev, resume_msg, NULL) &&
-	    !dpm_subsys_resume_noirq_cb(dev, resume_msg, NULL))
-		return !pm_runtime_status_suspended(dev) &&
-			(resume_msg.event != PM_EVENT_RESUME ||
-			 (device_can_wakeup(dev) && !device_may_wakeup(dev)));
-
-	/*
-	 * The only safe strategy here is to require that if the device may not
-	 * be left in suspend, resume callbacks must be invoked for it.
-	 */
-	return !dev->power.may_skip_resume;
-}
-
 /**
  * __device_suspend_noirq - Execute a "noirq suspend" callback for given device.
  * @dev: Device to handle.
@@ -1311,9 +1191,8 @@ static bool device_must_resume(struct device *dev, pm_message_t state,
  */
 static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool async)
 {
-	pm_callback_t callback;
-	const char *info;
-	bool no_subsys_cb = false;
+	pm_callback_t callback = NULL;
+	const char *info = NULL;
 	int error = 0;
 
 	TRACE_DEVICE(dev);
@@ -1327,13 +1206,23 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
-	callback = dpm_subsys_suspend_noirq_cb(dev, state, &info);
+	if (dev->pm_domain) {
+		info = "noirq power domain ";
+		callback = pm_noirq_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "noirq type ";
+		callback = pm_noirq_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "noirq class ";
+		callback = pm_noirq_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "noirq bus ";
+		callback = pm_noirq_op(dev->bus->pm, state);
+	}
 	if (callback)
 		goto Run;
 
-	no_subsys_cb = !dpm_subsys_suspend_late_cb(dev, state, NULL);
-
-	if (dev_pm_smart_suspend_and_suspended(dev) && no_subsys_cb)
+	if (dev_pm_skip_suspend(dev))
 		goto Skip;
 
 	if (dev->driver && dev->driver->pm) {
@@ -1351,13 +1240,16 @@ Run:
 Skip:
 	dev->power.is_noirq_suspended = true;
 
-	if (dev_pm_test_driver_flags(dev, DPM_FLAG_LEAVE_SUSPENDED)) {
-		dev->power.must_resume = dev->power.must_resume ||
-				atomic_read(&dev->power.usage_count) > 1 ||
-				device_must_resume(dev, state, no_subsys_cb);
-	} else {
+	/*
+	 * Skipping the resume of devices that were in use right before the
+	 * system suspend (as indicated by their PM-runtime usage counters)
+	 * would be suboptimal.  Also resume them if doing that is not allowed
+	 * to be skipped.
+	 */
+	if (atomic_read(&dev->power.usage_count) > 1 ||
+	    !(dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME) &&
+	      dev->power.may_skip_resume))
 		dev->power.must_resume = true;
-	}
 
 	if (dev->power.must_resume)
 		dpm_superior_set_must_resume(dev);
@@ -1474,35 +1366,6 @@ static void dpm_propagate_wakeup_to_parent(struct device *dev)
 	spin_unlock_irq(&parent->power.lock);
 }
 
-static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev,
-						pm_message_t state,
-						const char **info_p)
-{
-	pm_callback_t callback;
-	const char *info;
-
-	if (dev->pm_domain) {
-		info = "late power domain ";
-		callback = pm_late_early_op(&dev->pm_domain->ops, state);
-	} else if (dev->type && dev->type->pm) {
-		info = "late type ";
-		callback = pm_late_early_op(dev->type->pm, state);
-	} else if (dev->class && dev->class->pm) {
-		info = "late class ";
-		callback = pm_late_early_op(dev->class->pm, state);
-	} else if (dev->bus && dev->bus->pm) {
-		info = "late bus ";
-		callback = pm_late_early_op(dev->bus->pm, state);
-	} else {
-		return NULL;
-	}
-
-	if (info_p)
-		*info_p = info;
-
-	return callback;
-}
-
 /**
  * __device_suspend_late - Execute a "late suspend" callback for given device.
  * @dev: Device to handle.
@@ -1513,8 +1376,8 @@ static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev,
  */
 static int __device_suspend_late(struct device *dev, pm_message_t state, bool async)
 {
-	pm_callback_t callback;
-	const char *info;
+	pm_callback_t callback = NULL;
+	const char *info = NULL;
 	int error = 0;
 
 	TRACE_DEVICE(dev);
@@ -1535,12 +1398,23 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
-	callback = dpm_subsys_suspend_late_cb(dev, state, &info);
+	if (dev->pm_domain) {
+		info = "late power domain ";
+		callback = pm_late_early_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "late type ";
+		callback = pm_late_early_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "late class ";
+		callback = pm_late_early_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "late bus ";
+		callback = pm_late_early_op(dev->bus->pm, state);
+	}
 	if (callback)
 		goto Run;
 
-	if (dev_pm_smart_suspend_and_suspended(dev) &&
-	    !dpm_subsys_suspend_noirq_cb(dev, state, NULL))
+	if (dev_pm_skip_suspend(dev))
 		goto Skip;
 
 	if (dev->driver && dev->driver->pm) {
@@ -1766,7 +1640,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 		dev->power.direct_complete = false;
 	}
 
-	dev->power.may_skip_resume = false;
+	dev->power.may_skip_resume = true;
 	dev->power.must_resume = false;
 
 	dpm_watchdog_set(&wd, dev);
@@ -1970,7 +1844,7 @@ unlock:
 	spin_lock_irq(&dev->power.lock);
 	dev->power.direct_complete = state.event == PM_EVENT_SUSPEND &&
 		(ret > 0 || dev->power.no_pm_callbacks) &&
-		!dev_pm_test_driver_flags(dev, DPM_FLAG_NEVER_SKIP);
+		!dev_pm_test_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
 	spin_unlock_irq(&dev->power.lock);
 	return 0;
 }
@@ -2128,7 +2002,7 @@ void device_pm_check_callbacks(struct device *dev)
 	spin_unlock_irq(&dev->power.lock);
 }
 
-bool dev_pm_smart_suspend_and_suspended(struct device *dev)
+bool dev_pm_skip_suspend(struct device *dev)
 {
 	return dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) &&
 		pm_runtime_status_suspended(dev);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 99c7da112c95..9f62790f644c 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -523,13 +523,11 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 
  repeat:
 	retval = rpm_check_suspend_allowed(dev);
-
 	if (retval < 0)
-		;	/* Conditions are wrong. */
+		goto out;	/* Conditions are wrong. */
 
 	/* Synchronous suspends are not allowed in the RPM_RESUMING state. */
-	else if (dev->power.runtime_status == RPM_RESUMING &&
-	    !(rpmflags & RPM_ASYNC))
+	if (dev->power.runtime_status == RPM_RESUMING && !(rpmflags & RPM_ASYNC))
 		retval = -EAGAIN;
 	if (retval)
 		goto out;
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 2b99fe1eb207..24d25cf8ab14 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -666,7 +666,7 @@ int dpm_sysfs_add(struct device *dev)
 	if (rc)
 		return rc;
 
-	if (pm_runtime_callbacks_present(dev)) {
+	if (!pm_runtime_has_no_callbacks(dev)) {
 		rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group);
 		if (rc)
 			goto err_out;
@@ -709,7 +709,7 @@ int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid)
 	if (rc)
 		return rc;
 
-	if (pm_runtime_callbacks_present(dev)) {
+	if (!pm_runtime_has_no_callbacks(dev)) {
 		rc = sysfs_group_change_owner(
 			&dev->kobj, &pm_runtime_attr_group, kuid, kgid);
 		if (rc)
diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index e72843fe41df..089e5dc7144a 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -227,6 +227,9 @@ static ssize_t regmap_read_debugfs(struct regmap *map, unsigned int from,
 	if (*ppos < 0 || !count)
 		return -EINVAL;
 
+	if (count > (PAGE_SIZE << (MAX_ORDER - 1)))
+		count = PAGE_SIZE << (MAX_ORDER - 1);
+
 	buf = kmalloc(count, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
@@ -371,6 +374,9 @@ static ssize_t regmap_reg_ranges_read_file(struct file *file,
 	if (*ppos < 0 || !count)
 		return -EINVAL;
 
+	if (count > (PAGE_SIZE << (MAX_ORDER - 1)))
+		count = PAGE_SIZE << (MAX_ORDER - 1);
+
 	buf = kmalloc(count, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index 008f8da69d97..62b95a9212ae 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -246,6 +246,63 @@ static const struct regmap_bus regmap_i2c_smbus_i2c_block = {
 	.max_raw_write = I2C_SMBUS_BLOCK_MAX,
 };
 
+static int regmap_i2c_smbus_i2c_write_reg16(void *context, const void *data,
+				      size_t count)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	if (count < 2)
+		return -EINVAL;
+
+	count--;
+	return i2c_smbus_write_i2c_block_data(i2c, ((u8 *)data)[0], count,
+					      (u8 *)data + 1);
+}
+
+static int regmap_i2c_smbus_i2c_read_reg16(void *context, const void *reg,
+				     size_t reg_size, void *val,
+				     size_t val_size)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+	int ret, count, len = val_size;
+
+	if (reg_size != 2)
+		return -EINVAL;
+
+	ret = i2c_smbus_write_byte_data(i2c, ((u16 *)reg)[0] & 0xff,
+					((u16 *)reg)[0] >> 8);
+	if (ret < 0)
+		return ret;
+
+	count = 0;
+	do {
+		/* Current Address Read */
+		ret = i2c_smbus_read_byte(i2c);
+		if (ret < 0)
+			break;
+
+		*((u8 *)val++) = ret;
+		count++;
+		len--;
+	} while (len > 0);
+
+	if (count == val_size)
+		return 0;
+	else if (ret < 0)
+		return ret;
+	else
+		return -EIO;
+}
+
+static const struct regmap_bus regmap_i2c_smbus_i2c_block_reg16 = {
+	.write = regmap_i2c_smbus_i2c_write_reg16,
+	.read = regmap_i2c_smbus_i2c_read_reg16,
+	.max_raw_read = I2C_SMBUS_BLOCK_MAX,
+	.max_raw_write = I2C_SMBUS_BLOCK_MAX,
+};
+
 static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
 					const struct regmap_config *config)
 {
@@ -255,6 +312,10 @@ static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
 		 i2c_check_functionality(i2c->adapter,
 					 I2C_FUNC_SMBUS_I2C_BLOCK))
 		return &regmap_i2c_smbus_i2c_block;
+	else if (config->val_bits == 8 && config->reg_bits == 16 &&
+		i2c_check_functionality(i2c->adapter,
+					I2C_FUNC_SMBUS_I2C_BLOCK))
+		return &regmap_i2c_smbus_i2c_block_reg16;
 	else if (config->val_bits == 16 && config->reg_bits == 8 &&
 		 i2c_check_functionality(i2c->adapter,
 					 I2C_FUNC_SMBUS_WORD_DATA))
diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 3d64c9331a82..4340e1d268b6 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -541,8 +541,9 @@ static const struct irq_domain_ops regmap_domain_ops = {
 };
 
 /**
- * regmap_add_irq_chip() - Use standard regmap IRQ controller handling
+ * regmap_add_irq_chip_np() - Use standard regmap IRQ controller handling
  *
+ * @np: The device_node where the IRQ domain should be added to.
  * @map: The regmap for the device.
  * @irq: The IRQ the device uses to signal interrupts.
  * @irq_flags: The IRQF_ flags to use for the primary interrupt.
@@ -556,9 +557,10 @@ static const struct irq_domain_ops regmap_domain_ops = {
  * register cache.  The chip driver is responsible for restoring the
  * register values used by the IRQ controller over suspend and resume.
  */
-int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
-			int irq_base, const struct regmap_irq_chip *chip,
-			struct regmap_irq_chip_data **data)
+int regmap_add_irq_chip_np(struct device_node *np, struct regmap *map, int irq,
+			   int irq_flags, int irq_base,
+			   const struct regmap_irq_chip *chip,
+			   struct regmap_irq_chip_data **data)
 {
 	struct regmap_irq_chip_data *d;
 	int i;
@@ -769,12 +771,10 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 	}
 
 	if (irq_base)
-		d->domain = irq_domain_add_legacy(map->dev->of_node,
-						  chip->num_irqs, irq_base, 0,
-						  &regmap_domain_ops, d);
+		d->domain = irq_domain_add_legacy(np, chip->num_irqs, irq_base,
+						  0, &regmap_domain_ops, d);
 	else
-		d->domain = irq_domain_add_linear(map->dev->of_node,
-						  chip->num_irqs,
+		d->domain = irq_domain_add_linear(np, chip->num_irqs,
 						  &regmap_domain_ops, d);
 	if (!d->domain) {
 		dev_err(map->dev, "Failed to create IRQ domain\n");
@@ -808,6 +808,30 @@ err_alloc:
 	kfree(d);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(regmap_add_irq_chip_np);
+
+/**
+ * regmap_add_irq_chip() - Use standard regmap IRQ controller handling
+ *
+ * @map: The regmap for the device.
+ * @irq: The IRQ the device uses to signal interrupts.
+ * @irq_flags: The IRQF_ flags to use for the primary interrupt.
+ * @irq_base: Allocate at specific IRQ number if irq_base > 0.
+ * @chip: Configuration for the interrupt controller.
+ * @data: Runtime data structure for the controller, allocated on success.
+ *
+ * Returns 0 on success or an errno on failure.
+ *
+ * This is the same as regmap_add_irq_chip_np, except that the device
+ * node of the regmap is used.
+ */
+int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
+			int irq_base, const struct regmap_irq_chip *chip,
+			struct regmap_irq_chip_data **data)
+{
+	return regmap_add_irq_chip_np(map->dev->of_node, map, irq, irq_flags,
+				      irq_base, chip, data);
+}
 EXPORT_SYMBOL_GPL(regmap_add_irq_chip);
 
 /**
@@ -875,9 +899,10 @@ static int devm_regmap_irq_chip_match(struct device *dev, void *res, void *data)
 }
 
 /**
- * devm_regmap_add_irq_chip() - Resource manager regmap_add_irq_chip()
+ * devm_regmap_add_irq_chip_np() - Resource manager regmap_add_irq_chip_np()
  *
  * @dev: The device pointer on which irq_chip belongs to.
+ * @np: The device_node where the IRQ domain should be added to.
  * @map: The regmap for the device.
  * @irq: The IRQ the device uses to signal interrupts
  * @irq_flags: The IRQF_ flags to use for the primary interrupt.
@@ -890,10 +915,11 @@ static int devm_regmap_irq_chip_match(struct device *dev, void *res, void *data)
  * The &regmap_irq_chip_data will be automatically released when the device is
  * unbound.
  */
-int devm_regmap_add_irq_chip(struct device *dev, struct regmap *map, int irq,
-			     int irq_flags, int irq_base,
-			     const struct regmap_irq_chip *chip,
-			     struct regmap_irq_chip_data **data)
+int devm_regmap_add_irq_chip_np(struct device *dev, struct device_node *np,
+				struct regmap *map, int irq, int irq_flags,
+				int irq_base,
+				const struct regmap_irq_chip *chip,
+				struct regmap_irq_chip_data **data)
 {
 	struct regmap_irq_chip_data **ptr, *d;
 	int ret;
@@ -903,8 +929,8 @@ int devm_regmap_add_irq_chip(struct device *dev, struct regmap *map, int irq,
 	if (!ptr)
 		return -ENOMEM;
 
-	ret = regmap_add_irq_chip(map, irq, irq_flags, irq_base,
-				  chip, &d);
+	ret = regmap_add_irq_chip_np(np, map, irq, irq_flags, irq_base,
+				     chip, &d);
 	if (ret < 0) {
 		devres_free(ptr);
 		return ret;
@@ -915,6 +941,32 @@ int devm_regmap_add_irq_chip(struct device *dev, struct regmap *map, int irq,
 	*data = d;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip_np);
+
+/**
+ * devm_regmap_add_irq_chip() - Resource manager regmap_add_irq_chip()
+ *
+ * @dev: The device pointer on which irq_chip belongs to.
+ * @map: The regmap for the device.
+ * @irq: The IRQ the device uses to signal interrupts
+ * @irq_flags: The IRQF_ flags to use for the primary interrupt.
+ * @irq_base: Allocate at specific IRQ number if irq_base > 0.
+ * @chip: Configuration for the interrupt controller.
+ * @data: Runtime data structure for the controller, allocated on success
+ *
+ * Returns 0 on success or an errno on failure.
+ *
+ * The &regmap_irq_chip_data will be automatically released when the device is
+ * unbound.
+ */
+int devm_regmap_add_irq_chip(struct device *dev, struct regmap *map, int irq,
+			     int irq_flags, int irq_base,
+			     const struct regmap_irq_chip *chip,
+			     struct regmap_irq_chip_data **data)
+{
+	return devm_regmap_add_irq_chip_np(dev, map->dev->of_node, map, irq,
+					   irq_flags, irq_base, chip, data);
+}
 EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip);
 
 /**
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 4ad5c5adc0a3..c472f624382d 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -827,6 +827,7 @@ struct regmap *__regmap_init(struct device *dev,
 	} else if (!bus->read || !bus->write) {
 		map->reg_read = _regmap_bus_reg_read;
 		map->reg_write = _regmap_bus_reg_write;
+		map->reg_update_bits = bus->reg_update_bits;
 
 		map->defer_caching = false;
 		goto skip_format_initialization;
diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
index de8d3543e8fe..2079937ddb51 100644
--- a/drivers/base/swnode.c
+++ b/drivers/base/swnode.c
@@ -727,6 +727,54 @@ void software_node_unregister_nodes(const struct software_node *nodes)
 EXPORT_SYMBOL_GPL(software_node_unregister_nodes);
 
 /**
+ * software_node_register_node_group - Register a group of software nodes
+ * @node_group: NULL terminated array of software node pointers to be registered
+ *
+ * Register multiple software nodes at once.
+ */
+int software_node_register_node_group(const struct software_node **node_group)
+{
+	unsigned int i;
+	int ret;
+
+	if (!node_group)
+		return 0;
+
+	for (i = 0; node_group[i]; i++) {
+		ret = software_node_register(node_group[i]);
+		if (ret) {
+			software_node_unregister_node_group(node_group);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(software_node_register_node_group);
+
+/**
+ * software_node_unregister_node_group - Unregister a group of software nodes
+ * @node_group: NULL terminated array of software node pointers to be unregistered
+ *
+ * Unregister multiple software nodes at once.
+ */
+void software_node_unregister_node_group(const struct software_node **node_group)
+{
+	struct swnode *swnode;
+	unsigned int i;
+
+	if (!node_group)
+		return;
+
+	for (i = 0; node_group[i]; i++) {
+		swnode = software_node_to_swnode(node_group[i]);
+		if (swnode)
+			fwnode_remove_software_node(&swnode->fwnode);
+	}
+}
+EXPORT_SYMBOL_GPL(software_node_unregister_node_group);
+
+/**
  * software_node_register - Register static software node
  * @node: The software node to be registered
  */
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index a27804d71e12..5ca7216e9e01 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -407,7 +407,6 @@ aoeblk_gdalloc(void *vp)
 	WARN_ON(d->gd);
 	WARN_ON(d->flags & DEVFL_UP);
 	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
-	q->backing_dev_info->name = "aoe";
 	q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE;
 	d->bufpool = mp;
 	d->blkq = gd->queue = q;
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 15e99697234a..df53dca5d02c 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -396,9 +396,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
 	bytes = sizeof(struct page *)*want;
 	new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN);
 	if (!new_pages) {
-		new_pages = __vmalloc(bytes,
-				GFP_NOIO | __GFP_ZERO,
-				PAGE_KERNEL);
+		new_pages = __vmalloc(bytes, GFP_NOIO | __GFP_ZERO);
 		if (!new_pages)
 			return NULL;
 	}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 840c3aef3c5c..c80a2f1c3c2a 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -21,24 +21,6 @@
 
 static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size);
 
-/* Update disk stats at start of I/O request */
-static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req)
-{
-	struct request_queue *q = device->rq_queue;
-
-	generic_start_io_acct(q, bio_op(req->master_bio),
-				req->i.size >> 9, &device->vdisk->part0);
-}
-
-/* Update disk stats when completing request upwards */
-static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req)
-{
-	struct request_queue *q = device->rq_queue;
-
-	generic_end_io_acct(q, bio_op(req->master_bio),
-			    &device->vdisk->part0, req->start_jif);
-}
-
 static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio *bio_src)
 {
 	struct drbd_request *req;
@@ -263,7 +245,7 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
 		start_new_tl_epoch(first_peer_device(device)->connection);
 
 	/* Update disk stats */
-	_drbd_end_io_acct(device, req);
+	bio_end_io_acct(req->master_bio, req->start_jif);
 
 	/* If READ failed,
 	 * have it be pushed back to the retry work queue,
@@ -1222,16 +1204,15 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
 		bio_endio(bio);
 		return ERR_PTR(-ENOMEM);
 	}
-	req->start_jif = start_jif;
+
+	/* Update disk stats */
+	req->start_jif = bio_start_io_acct(req->master_bio);
 
 	if (!get_ldev(device)) {
 		bio_put(req->private_bio);
 		req->private_bio = NULL;
 	}
 
-	/* Update disk stats */
-	_drbd_start_io_acct(device, req);
-
 	/* process discards always from our submitter thread */
 	if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
 	    bio_op(bio) == REQ_OP_DISCARD)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index c3daa64cb52c..3e9db22db2a8 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -337,8 +337,7 @@ static bool initialized;
 /*
  * globals used by 'result()'
  */
-#define MAX_REPLIES 16
-static unsigned char reply_buffer[MAX_REPLIES];
+static unsigned char reply_buffer[FD_RAW_REPLY_SIZE];
 static int inr;		/* size of reply buffer, when called from interrupt */
 #define ST0		0
 #define ST1		1
@@ -595,12 +594,12 @@ static unsigned char in_sector_offset;	/* offset within physical sector,
 
 static inline unsigned char fdc_inb(int fdc, int reg)
 {
-	return fd_inb(fdc_state[fdc].address + reg);
+	return fd_inb(fdc_state[fdc].address, reg);
 }
 
 static inline void fdc_outb(unsigned char value, int fdc, int reg)
 {
-	fd_outb(value, fdc_state[fdc].address + reg);
+	fd_outb(value, fdc_state[fdc].address, reg);
 }
 
 static inline bool drive_no_geom(int drive)
@@ -668,16 +667,12 @@ static struct output_log {
 
 static int output_log_pos;
 
-#define current_reqD -1
 #define MAXTIMEOUT -2
 
 static void __reschedule_timeout(int drive, const char *message)
 {
 	unsigned long delay;
 
-	if (drive == current_reqD)
-		drive = current_drive;
-
 	if (drive < 0 || drive >= N_DRIVE) {
 		delay = 20UL * HZ;
 		drive = 0;
@@ -827,59 +822,70 @@ static int set_dor(int fdc, char mask, char data)
 	return olddor;
 }
 
-static void twaddle(void)
+static void twaddle(int fdc, int drive)
 {
-	if (drive_params[current_drive].select_delay)
+	if (drive_params[drive].select_delay)
 		return;
-	fdc_outb(fdc_state[current_fdc].dor & ~(0x10 << UNIT(current_drive)),
-		 current_fdc, FD_DOR);
-	fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
-	drive_state[current_drive].select_date = jiffies;
+	fdc_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(drive)),
+		 fdc, FD_DOR);
+	fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR);
+	drive_state[drive].select_date = jiffies;
 }
 
 /*
- * Reset all driver information about the current fdc.
+ * Reset all driver information about the specified fdc.
  * This is needed after a reset, and after a raw command.
  */
-static void reset_fdc_info(int mode)
+static void reset_fdc_info(int fdc, int mode)
 {
 	int drive;
 
-	fdc_state[current_fdc].spec1 = fdc_state[current_fdc].spec2 = -1;
-	fdc_state[current_fdc].need_configure = 1;
-	fdc_state[current_fdc].perp_mode = 1;
-	fdc_state[current_fdc].rawcmd = 0;
+	fdc_state[fdc].spec1 = fdc_state[fdc].spec2 = -1;
+	fdc_state[fdc].need_configure = 1;
+	fdc_state[fdc].perp_mode = 1;
+	fdc_state[fdc].rawcmd = 0;
 	for (drive = 0; drive < N_DRIVE; drive++)
-		if (FDC(drive) == current_fdc &&
+		if (FDC(drive) == fdc &&
 		    (mode || drive_state[drive].track != NEED_1_RECAL))
 			drive_state[drive].track = NEED_2_RECAL;
 }
 
-/* selects the fdc and drive, and enables the fdc's input/dma. */
+/*
+ * selects the fdc and drive, and enables the fdc's input/dma.
+ * Both current_drive and current_fdc are changed to match the new drive.
+ */
 static void set_fdc(int drive)
 {
-	unsigned int new_fdc = current_fdc;
+	unsigned int fdc;
 
-	if (drive >= 0 && drive < N_DRIVE) {
-		new_fdc = FDC(drive);
-		current_drive = drive;
+	if (drive < 0 || drive >= N_DRIVE) {
+		pr_info("bad drive value %d\n", drive);
+		return;
 	}
-	if (new_fdc >= N_FDC) {
+
+	fdc = FDC(drive);
+	if (fdc >= N_FDC) {
 		pr_info("bad fdc value\n");
 		return;
 	}
-	current_fdc = new_fdc;
-	set_dor(current_fdc, ~0, 8);
+
+	set_dor(fdc, ~0, 8);
 #if N_FDC > 1
-	set_dor(1 - current_fdc, ~8, 0);
+	set_dor(1 - fdc, ~8, 0);
 #endif
-	if (fdc_state[current_fdc].rawcmd == 2)
-		reset_fdc_info(1);
-	if (fdc_inb(current_fdc, FD_STATUS) != STATUS_READY)
-		fdc_state[current_fdc].reset = 1;
+	if (fdc_state[fdc].rawcmd == 2)
+		reset_fdc_info(fdc, 1);
+	if (fdc_inb(fdc, FD_STATUS) != STATUS_READY)
+		fdc_state[fdc].reset = 1;
+
+	current_drive = drive;
+	current_fdc = fdc;
 }
 
-/* locks the driver */
+/*
+ * locks the driver.
+ * Both current_drive and current_fdc are changed to match the new drive.
+ */
 static int lock_fdc(int drive)
 {
 	if (WARN(atomic_read(&usage_count) == 0,
@@ -1062,12 +1068,9 @@ static void setup_DMA(void)
 	unsigned long f;
 
 	if (raw_cmd->length == 0) {
-		int i;
-
-		pr_info("zero dma transfer size:");
-		for (i = 0; i < raw_cmd->cmd_count; i++)
-			pr_cont("%x,", raw_cmd->cmd[i]);
-		pr_cont("\n");
+		print_hex_dump(KERN_INFO, "zero dma transfer size: ",
+			       DUMP_PREFIX_NONE, 16, 1,
+			       raw_cmd->fullcmd, raw_cmd->cmd_count, false);
 		cont->done(0);
 		fdc_state[current_fdc].reset = 1;
 		return;
@@ -1104,62 +1107,62 @@ static void setup_DMA(void)
 #endif
 }
 
-static void show_floppy(void);
+static void show_floppy(int fdc);
 
 /* waits until the fdc becomes ready */
-static int wait_til_ready(void)
+static int wait_til_ready(int fdc)
 {
 	int status;
 	int counter;
 
-	if (fdc_state[current_fdc].reset)
+	if (fdc_state[fdc].reset)
 		return -1;
 	for (counter = 0; counter < 10000; counter++) {
-		status = fdc_inb(current_fdc, FD_STATUS);
+		status = fdc_inb(fdc, FD_STATUS);
 		if (status & STATUS_READY)
 			return status;
 	}
 	if (initialized) {
-		DPRINT("Getstatus times out (%x) on fdc %d\n", status, current_fdc);
-		show_floppy();
+		DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc);
+		show_floppy(fdc);
 	}
-	fdc_state[current_fdc].reset = 1;
+	fdc_state[fdc].reset = 1;
 	return -1;
 }
 
 /* sends a command byte to the fdc */
-static int output_byte(char byte)
+static int output_byte(int fdc, char byte)
 {
-	int status = wait_til_ready();
+	int status = wait_til_ready(fdc);
 
 	if (status < 0)
 		return -1;
 
 	if (is_ready_state(status)) {
-		fdc_outb(byte, current_fdc, FD_DATA);
+		fdc_outb(byte, fdc, FD_DATA);
 		output_log[output_log_pos].data = byte;
 		output_log[output_log_pos].status = status;
 		output_log[output_log_pos].jiffies = jiffies;
 		output_log_pos = (output_log_pos + 1) % OLOGSIZE;
 		return 0;
 	}
-	fdc_state[current_fdc].reset = 1;
+	fdc_state[fdc].reset = 1;
 	if (initialized) {
 		DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n",
-		       byte, current_fdc, status);
-		show_floppy();
+		       byte, fdc, status);
+		show_floppy(fdc);
 	}
 	return -1;
 }
 
 /* gets the response from the fdc */
-static int result(void)
+static int result(int fdc)
 {
 	int i;
 	int status = 0;
 
-	for (i = 0; i < MAX_REPLIES; i++) {
-		status = wait_til_ready();
+	for (i = 0; i < FD_RAW_REPLY_SIZE; i++) {
+		status = wait_til_ready(fdc);
 		if (status < 0)
 			break;
 		status &= STATUS_DIR | STATUS_READY | STATUS_BUSY | STATUS_DMA;
@@ -1169,24 +1172,24 @@ static int result(void)
 			return i;
 		}
 		if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY))
-			reply_buffer[i] = fdc_inb(current_fdc, FD_DATA);
+			reply_buffer[i] = fdc_inb(fdc, FD_DATA);
 		else
 			break;
 	}
 	if (initialized) {
 		DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n",
-		       current_fdc, status, i);
-		show_floppy();
+		       fdc, status, i);
+		show_floppy(fdc);
 	}
-	fdc_state[current_fdc].reset = 1;
+	fdc_state[fdc].reset = 1;
 	return -1;
 }
 
 #define MORE_OUTPUT -2
 /* does the fdc need more output? */
-static int need_more_output(void)
+static int need_more_output(int fdc)
 {
-	int status = wait_til_ready();
+	int status = wait_til_ready(fdc);
 
 	if (status < 0)
 		return -1;
@@ -1194,13 +1197,13 @@ static int need_more_output(void)
 	if (is_ready_state(status))
 		return MORE_OUTPUT;
 
-	return result();
+	return result(fdc);
 }
 
 /* Set perpendicular mode as required, based on data rate, if supported.
  * 82077 Now tested. 1Mbps data rate only possible with 82077-1.
  */
-static void perpendicular_mode(void)
+static void perpendicular_mode(int fdc)
 {
 	unsigned char perp_mode;
 
@@ -1215,7 +1218,7 @@ static void perpendicular_mode(void)
 		default:
 			DPRINT("Invalid data rate for perpendicular mode!\n");
 			cont->done(0);
-			fdc_state[current_fdc].reset = 1;
+			fdc_state[fdc].reset = 1;
 					/*
 					 * convenient way to return to
 					 * redo without too much hassle
@@ -1226,12 +1229,12 @@ static void perpendicular_mode(void)
 	} else
 		perp_mode = 0;
 
-	if (fdc_state[current_fdc].perp_mode == perp_mode)
+	if (fdc_state[fdc].perp_mode == perp_mode)
 		return;
-	if (fdc_state[current_fdc].version >= FDC_82077_ORIG) {
-		output_byte(FD_PERPENDICULAR);
-		output_byte(perp_mode);
-		fdc_state[current_fdc].perp_mode = perp_mode;
+	if (fdc_state[fdc].version >= FDC_82077_ORIG) {
+		output_byte(fdc, FD_PERPENDICULAR);
+		output_byte(fdc, perp_mode);
+		fdc_state[fdc].perp_mode = perp_mode;
 	} else if (perp_mode) {
 		DPRINT("perpendicular mode not supported by this FDC.\n");
 	}
@@ -1240,16 +1243,15 @@ static void perpendicular_mode(void)
 static int fifo_depth = 0xa;
 static int no_fifo;
 
-static int fdc_configure(void)
+static int fdc_configure(int fdc)
 {
 	/* Turn on FIFO */
-	output_byte(FD_CONFIGURE);
-	if (need_more_output() != MORE_OUTPUT)
+	output_byte(fdc, FD_CONFIGURE);
+	if (need_more_output(fdc) != MORE_OUTPUT)
 		return 0;
-	output_byte(0);
-	output_byte(0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf));
-	output_byte(0);		/* pre-compensation from track
-				   0 upwards */
+	output_byte(fdc, 0);
+	output_byte(fdc, 0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf));
+	output_byte(fdc, 0);    /* pre-compensation from track 0 upwards */
 	return 1;
 }
 
@@ -1274,7 +1276,7 @@ static int fdc_configure(void)
  *
  * These values are rounded up to the next highest available delay time.
  */
-static void fdc_specify(void)
+static void fdc_specify(int fdc, int drive)
 {
 	unsigned char spec1;
 	unsigned char spec2;
@@ -1286,10 +1288,10 @@ static void fdc_specify(void)
 	int hlt_max_code = 0x7f;
 	int hut_max_code = 0xf;
 
-	if (fdc_state[current_fdc].need_configure &&
-	    fdc_state[current_fdc].version >= FDC_82072A) {
-		fdc_configure();
-		fdc_state[current_fdc].need_configure = 0;
+	if (fdc_state[fdc].need_configure &&
+	    fdc_state[fdc].version >= FDC_82072A) {
+		fdc_configure(fdc);
+		fdc_state[fdc].need_configure = 0;
 	}
 
 	switch (raw_cmd->rate & 0x03) {
@@ -1298,13 +1300,13 @@ static void fdc_specify(void)
 		break;
 	case 1:
 		dtr = 300;
-		if (fdc_state[current_fdc].version >= FDC_82078) {
+		if (fdc_state[fdc].version >= FDC_82078) {
 			/* chose the default rate table, not the one
 			 * where 1 = 2 Mbps */
-			output_byte(FD_DRIVESPEC);
-			if (need_more_output() == MORE_OUTPUT) {
-				output_byte(UNIT(current_drive));
-				output_byte(0xc0);
+			output_byte(fdc, FD_DRIVESPEC);
+			if (need_more_output(fdc) == MORE_OUTPUT) {
+				output_byte(fdc, UNIT(drive));
+				output_byte(fdc, 0xc0);
 			}
 		}
 		break;
@@ -1313,14 +1315,14 @@ static void fdc_specify(void)
 		break;
 	}
 
-	if (fdc_state[current_fdc].version >= FDC_82072) {
+	if (fdc_state[fdc].version >= FDC_82072) {
 		scale_dtr = dtr;
 		hlt_max_code = 0x00;	/* 0==256msec*dtr0/dtr (not linear!) */
 		hut_max_code = 0x0;	/* 0==256msec*dtr0/dtr (not linear!) */
 	}
 
 	/* Convert step rate from microseconds to milliseconds and 4 bits */
-	srt = 16 - DIV_ROUND_UP(drive_params[current_drive].srt * scale_dtr / 1000,
+	srt = 16 - DIV_ROUND_UP(drive_params[drive].srt * scale_dtr / 1000,
 				NOMINAL_DTR);
 	if (slow_floppy)
 		srt = srt / 4;
@@ -1328,14 +1330,14 @@ static void fdc_specify(void)
 	SUPBOUND(srt, 0xf);
 	INFBOUND(srt, 0);
 
-	hlt = DIV_ROUND_UP(drive_params[current_drive].hlt * scale_dtr / 2,
+	hlt = DIV_ROUND_UP(drive_params[drive].hlt * scale_dtr / 2,
 			   NOMINAL_DTR);
 	if (hlt < 0x01)
 		hlt = 0x01;
 	else if (hlt > 0x7f)
 		hlt = hlt_max_code;
 
-	hut = DIV_ROUND_UP(drive_params[current_drive].hut * scale_dtr / 16,
+	hut = DIV_ROUND_UP(drive_params[drive].hut * scale_dtr / 16,
 			   NOMINAL_DTR);
 	if (hut < 0x1)
 		hut = 0x1;
@@ -1346,12 +1348,12 @@ static void fdc_specify(void)
 	spec2 = (hlt << 1) | (use_virtual_dma & 1);
 
 	/* If these parameters did not change, just return with success */
-	if (fdc_state[current_fdc].spec1 != spec1 ||
-	    fdc_state[current_fdc].spec2 != spec2) {
+	if (fdc_state[fdc].spec1 != spec1 ||
+	    fdc_state[fdc].spec2 != spec2) {
 		/* Go ahead and set spec1 and spec2 */
-		output_byte(FD_SPECIFY);
-		output_byte(fdc_state[current_fdc].spec1 = spec1);
-		output_byte(fdc_state[current_fdc].spec2 = spec2);
+		output_byte(fdc, FD_SPECIFY);
+		output_byte(fdc, fdc_state[fdc].spec1 = spec1);
+		output_byte(fdc, fdc_state[fdc].spec2 = spec2);
 	}
 }				/* fdc_specify */
 
@@ -1513,7 +1515,7 @@ static void setup_rw_floppy(void)
 
 	r = 0;
 	for (i = 0; i < raw_cmd->cmd_count; i++)
-		r |= output_byte(raw_cmd->cmd[i]);
+		r |= output_byte(current_fdc, raw_cmd->fullcmd[i]);
 
 	debugt(__func__, "rw_command");
 
@@ -1524,7 +1526,7 @@ static void setup_rw_floppy(void)
 	}
 
 	if (!(flags & FD_RAW_INTR)) {
-		inr = result();
+		inr = result(current_fdc);
 		cont->interrupt();
 	} else if (flags & FD_RAW_NEED_DISK)
 		fd_watchdog();
@@ -1562,29 +1564,29 @@ static void seek_interrupt(void)
 	floppy_ready();
 }
 
-static void check_wp(void)
+static void check_wp(int fdc, int drive)
 {
-	if (test_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags)) {
+	if (test_bit(FD_VERIFY_BIT, &drive_state[drive].flags)) {
 					/* check write protection */
-		output_byte(FD_GETSTATUS);
-		output_byte(UNIT(current_drive));
-		if (result() != 1) {
-			fdc_state[current_fdc].reset = 1;
+		output_byte(fdc, FD_GETSTATUS);
+		output_byte(fdc, UNIT(drive));
+		if (result(fdc) != 1) {
+			fdc_state[fdc].reset = 1;
 			return;
 		}
-		clear_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags);
+		clear_bit(FD_VERIFY_BIT, &drive_state[drive].flags);
 		clear_bit(FD_NEED_TWADDLE_BIT,
-			  &drive_state[current_drive].flags);
-		debug_dcl(drive_params[current_drive].flags,
+			  &drive_state[drive].flags);
+		debug_dcl(drive_params[drive].flags,
 			  "checking whether disk is write protected\n");
-		debug_dcl(drive_params[current_drive].flags, "wp=%x\n",
+		debug_dcl(drive_params[drive].flags, "wp=%x\n",
 			  reply_buffer[ST3] & 0x40);
 		if (!(reply_buffer[ST3] & 0x40))
 			set_bit(FD_DISK_WRITABLE_BIT,
-				&drive_state[current_drive].flags);
+				&drive_state[drive].flags);
 		else
 			clear_bit(FD_DISK_WRITABLE_BIT,
-				  &drive_state[current_drive].flags);
+				  &drive_state[drive].flags);
 	}
 }
 
@@ -1628,7 +1630,7 @@ static void seek_floppy(void)
 			track = 1;
 		}
 	} else {
-		check_wp();
+		check_wp(current_fdc, current_drive);
 		if (raw_cmd->track != drive_state[current_drive].track &&
 		    (raw_cmd->flags & FD_RAW_NEED_SEEK))
 			track = raw_cmd->track;
@@ -1639,9 +1641,9 @@ static void seek_floppy(void)
 	}
 
 	do_floppy = seek_interrupt;
-	output_byte(FD_SEEK);
-	output_byte(UNIT(current_drive));
-	if (output_byte(track) < 0) {
+	output_byte(current_fdc, FD_SEEK);
+	output_byte(current_fdc, UNIT(current_drive));
+	if (output_byte(current_fdc, track) < 0) {
 		reset_fdc();
 		return;
 	}
@@ -1742,14 +1744,14 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id)
 
 	do_print = !handler && print_unex && initialized;
 
-	inr = result();
+	inr = result(current_fdc);
 	if (do_print)
 		print_result("unexpected interrupt", inr);
 	if (inr == 0) {
 		int max_sensei = 4;
 		do {
-			output_byte(FD_SENSEI);
-			inr = result();
+			output_byte(current_fdc, FD_SENSEI);
+			inr = result(current_fdc);
 			if (do_print)
 				print_result("sensei", inr);
 			max_sensei--;
@@ -1771,8 +1773,8 @@ static void recalibrate_floppy(void)
 {
 	debugt(__func__, "");
 	do_floppy = recal_interrupt;
-	output_byte(FD_RECALIBRATE);
-	if (output_byte(UNIT(current_drive)) < 0)
+	output_byte(current_fdc, FD_RECALIBRATE);
+	if (output_byte(current_fdc, UNIT(current_drive)) < 0)
 		reset_fdc();
 }
 
@@ -1782,7 +1784,7 @@ static void recalibrate_floppy(void)
 static void reset_interrupt(void)
 {
 	debugt(__func__, "");
-	result();		/* get the status ready for set_fdc */
+	result(current_fdc);		/* get the status ready for set_fdc */
 	if (fdc_state[current_fdc].reset) {
 		pr_info("reset set in interrupt, calling %ps\n", cont->error);
 		cont->error();	/* a reset just after a reset. BAD! */
@@ -1792,7 +1794,9 @@ static void reset_interrupt(void)
 
 /*
  * reset is done by pulling bit 2 of DOR low for a while (old FDCs),
- * or by setting the self clearing bit 7 of STATUS (newer FDCs)
+ * or by setting the self clearing bit 7 of STATUS (newer FDCs).
+ * This WILL trigger an interrupt, causing the handlers in the current
+ * cont's ->redo() to be called via reset_interrupt().
  */
 static void reset_fdc(void)
 {
@@ -1800,7 +1804,7 @@ static void reset_fdc(void)
 
 	do_floppy = reset_interrupt;
 	fdc_state[current_fdc].reset = 0;
-	reset_fdc_info(0);
+	reset_fdc_info(current_fdc, 0);
 
 	/* Pseudo-DMA may intercept 'reset finished' interrupt.  */
 	/* Irrelevant for systems with true DMA (i386).          */
@@ -1819,7 +1823,7 @@ static void reset_fdc(void)
 	}
 }
 
-static void show_floppy(void)
+static void show_floppy(int fdc)
 {
 	int i;
 
@@ -1842,7 +1846,7 @@ static void show_floppy(void)
 	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1,
 		       reply_buffer, resultsize, true);
 
-	pr_info("status=%x\n", fdc_inb(current_fdc, FD_STATUS));
+	pr_info("status=%x\n", fdc_inb(fdc, FD_STATUS));
 	pr_info("fdc_busy=%lu\n", fdc_busy);
 	if (do_floppy)
 		pr_info("do_floppy=%ps\n", do_floppy);
@@ -1868,7 +1872,7 @@ static void floppy_shutdown(struct work_struct *arg)
 	unsigned long flags;
 
 	if (initialized)
-		show_floppy();
+		show_floppy(current_fdc);
 	cancel_activity();
 
 	flags = claim_dma_lock();
@@ -1934,7 +1938,7 @@ static void floppy_ready(void)
 		  "calling disk change from floppy_ready\n");
 	if (!(raw_cmd->flags & FD_RAW_NO_MOTOR) &&
 	    disk_change(current_drive) && !drive_params[current_drive].select_delay)
-		twaddle();	/* this clears the dcl on certain
+		twaddle(current_fdc, current_drive);	/* this clears the dcl on certain
 				 * drive/controller combinations */
 
 #ifdef fd_chose_dma_mode
@@ -1946,20 +1950,20 @@ static void floppy_ready(void)
 #endif
 
 	if (raw_cmd->flags & (FD_RAW_NEED_SEEK | FD_RAW_NEED_DISK)) {
-		perpendicular_mode();
-		fdc_specify();	/* must be done here because of hut, hlt ... */
+		perpendicular_mode(current_fdc);
+		fdc_specify(current_fdc, current_drive); /* must be done here because of hut, hlt ... */
 		seek_floppy();
 	} else {
 		if ((raw_cmd->flags & FD_RAW_READ) ||
 		    (raw_cmd->flags & FD_RAW_WRITE))
-			fdc_specify();
+			fdc_specify(current_fdc, current_drive);
 		setup_rw_floppy();
 	}
 }
 
 static void floppy_start(void)
 {
-	reschedule_timeout(current_reqD, "floppy start");
+	reschedule_timeout(current_drive, "floppy start");
 
 	scandrives();
 	debug_dcl(drive_params[current_drive].flags,
@@ -2004,6 +2008,9 @@ static const struct cont_t intr_cont = {
 	.done		= (done_f)empty
 };
 
+/* schedules handler, waiting for completion. May be interrupted, will then
+ * return -EINTR, in which case the driver will automatically be unlocked.
+ */
 static int wait_til_done(void (*handler)(void), bool interruptible)
 {
 	int ret;
@@ -2059,18 +2066,19 @@ static void success_and_wakeup(void)
  * ==========================
  */
 
-static int next_valid_format(void)
+static int next_valid_format(int drive)
 {
 	int probed_format;
 
-	probed_format = drive_state[current_drive].probed_format;
+	probed_format = drive_state[drive].probed_format;
 	while (1) {
-		if (probed_format >= 8 || !drive_params[current_drive].autodetect[probed_format]) {
-			drive_state[current_drive].probed_format = 0;
+		if (probed_format >= FD_AUTODETECT_SIZE ||
+		    !drive_params[drive].autodetect[probed_format]) {
+			drive_state[drive].probed_format = 0;
 			return 1;
 		}
-		if (floppy_type[drive_params[current_drive].autodetect[probed_format]].sect) {
-			drive_state[current_drive].probed_format = probed_format;
+		if (floppy_type[drive_params[drive].autodetect[probed_format]].sect) {
+			drive_state[drive].probed_format = probed_format;
 			return 0;
 		}
 		probed_format++;
@@ -2083,7 +2091,7 @@ static void bad_flp_intr(void)
 
 	if (probing) {
 		drive_state[current_drive].probed_format++;
-		if (!next_valid_format())
+		if (!next_valid_format(current_drive))
 			return;
 	}
 	err_count = ++(*errors);
@@ -2843,6 +2851,9 @@ static int set_next_request(void)
 	return current_req != NULL;
 }
 
+/* Starts or continues processing request. Will automatically unlock the
+ * driver at end of request.
+ */
 static void redo_fd_request(void)
 {
 	int drive;
@@ -2867,7 +2878,7 @@ do_request:
 	}
 	drive = (long)current_req->rq_disk->private_data;
 	set_fdc(drive);
-	reschedule_timeout(current_reqD, "redo fd request");
+	reschedule_timeout(current_drive, "redo fd request");
 
 	set_floppy(drive);
 	raw_cmd = &default_raw_cmd;
@@ -2885,7 +2896,7 @@ do_request:
 	if (!_floppy) {	/* Autodetection */
 		if (!probing) {
 			drive_state[current_drive].probed_format = 0;
-			if (next_valid_format()) {
+			if (next_valid_format(current_drive)) {
 				DPRINT("no autodetectable formats\n");
 				_floppy = NULL;
 				request_done(0);
@@ -2904,7 +2915,7 @@ do_request:
 	}
 
 	if (test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags))
-		twaddle();
+		twaddle(current_fdc, current_drive);
 	schedule_bh(floppy_start);
 	debugt(__func__, "queue fd request");
 	return;
@@ -2917,6 +2928,7 @@ static const struct cont_t rw_cont = {
 	.done		= request_done
 };
 
+/* schedule the request and automatically unlock the driver on completion */
 static void process_fd_request(void)
 {
 	cont = &rw_cont;
@@ -2938,17 +2950,17 @@ static blk_status_t floppy_queue_rq(struct blk_mq_hw_ctx *hctx,
 		 (unsigned long long) current_req->cmd_flags))
 		return BLK_STS_IOERR;
 
-	spin_lock_irq(&floppy_lock);
-	list_add_tail(&bd->rq->queuelist, &floppy_reqs);
-	spin_unlock_irq(&floppy_lock);
-
 	if (test_and_set_bit(0, &fdc_busy)) {
 		/* fdc busy, this new request will be treated when the
 		   current one is done */
 		is_alive(__func__, "old request running");
-		return BLK_STS_OK;
+		return BLK_STS_RESOURCE;
 	}
 
+	spin_lock_irq(&floppy_lock);
+	list_add_tail(&bd->rq->queuelist, &floppy_reqs);
+	spin_unlock_irq(&floppy_lock);
+
 	command_status = FD_COMMAND_NONE;
 	__reschedule_timeout(MAXTIMEOUT, "fd_request");
 	set_fdc(0);
@@ -2996,6 +3008,10 @@ static const struct cont_t reset_cont = {
 	.done		= generic_done
 };
 
+/*
+ * Resets the FDC connected to drive <drive>.
+ * Both current_drive and current_fdc are changed to match the new drive.
+ */
 static int user_reset_fdc(int drive, int arg, bool interruptible)
 {
 	int ret;
@@ -3006,6 +3022,9 @@ static int user_reset_fdc(int drive, int arg, bool interruptible)
 	if (arg == FD_RESET_ALWAYS)
 		fdc_state[current_fdc].reset = 1;
 	if (fdc_state[current_fdc].reset) {
+		/* note: reset_fdc will take care of unlocking the driver
+		 * on completion.
+		 */
 		cont = &reset_cont;
 		ret = wait_til_done(reset_fdc, interruptible);
 		if (ret == -EINTR)
@@ -3059,7 +3078,7 @@ static void raw_cmd_done(int flag)
 		raw_cmd->flags |= FD_RAW_HARDFAILURE;
 	} else {
 		raw_cmd->reply_count = inr;
-		if (raw_cmd->reply_count > MAX_REPLIES)
+		if (raw_cmd->reply_count > FD_RAW_REPLY_SIZE)
 			raw_cmd->reply_count = 0;
 		for (i = 0; i < raw_cmd->reply_count; i++)
 			raw_cmd->reply[i] = reply_buffer[i];
@@ -3170,18 +3189,10 @@ loop:
 	if (ret)
 		return -EFAULT;
 	param += sizeof(struct floppy_raw_cmd);
-	if (ptr->cmd_count > 33)
-			/* the command may now also take up the space
-			 * initially intended for the reply & the
-			 * reply count. Needed for long 82078 commands
-			 * such as RESTORE, which takes ... 17 command
-			 * bytes. Murphy's law #137: When you reserve
-			 * 16 bytes for a structure, you'll one day
-			 * discover that you really need 17...
-			 */
+	if (ptr->cmd_count > FD_RAW_CMD_FULLSIZE)
 		return -EINVAL;
 
-	for (i = 0; i < 16; i++)
+	for (i = 0; i < FD_RAW_REPLY_SIZE; i++)
 		ptr->reply[i] = 0;
 	ptr->resultcode = 0;
 
@@ -3423,13 +3434,13 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static bool valid_floppy_drive_params(const short autodetect[8],
+static bool valid_floppy_drive_params(const short autodetect[FD_AUTODETECT_SIZE],
 		int native_format)
 {
 	size_t floppy_type_size = ARRAY_SIZE(floppy_type);
 	size_t i = 0;
 
-	for (i = 0; i < 8; ++i) {
+	for (i = 0; i < FD_AUTODETECT_SIZE; ++i) {
 		if (autodetect[i] < 0 ||
 		    autodetect[i] >= floppy_type_size)
 			return false;
@@ -3610,7 +3621,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 	case FDTWADDLE:
 		if (lock_fdc(drive))
 			return -EINTR;
-		twaddle();
+		twaddle(current_fdc, current_drive);
 		process_fd_request();
 		return 0;
 	default:
@@ -3654,7 +3665,7 @@ struct compat_floppy_drive_params {
 	struct floppy_max_errors max_errors;
 	char		flags;
 	char		read_track;
-	short		autodetect[8];
+	short		autodetect[FD_AUTODETECT_SIZE];
 	compat_int_t	checkfreq;
 	compat_int_t	native_format;
 };
@@ -4298,79 +4309,79 @@ static const struct block_device_operations floppy_fops = {
 
 /* Determine the floppy disk controller type */
 /* This routine was written by David C. Niemi */
-static char __init get_fdc_version(void)
+static char __init get_fdc_version(int fdc)
 {
 	int r;
 
-	output_byte(FD_DUMPREGS);	/* 82072 and better know DUMPREGS */
-	if (fdc_state[current_fdc].reset)
+	output_byte(fdc, FD_DUMPREGS);	/* 82072 and better know DUMPREGS */
+	if (fdc_state[fdc].reset)
 		return FDC_NONE;
-	r = result();
+	r = result(fdc);
 	if (r <= 0x00)
 		return FDC_NONE;	/* No FDC present ??? */
 	if ((r == 1) && (reply_buffer[0] == 0x80)) {
-		pr_info("FDC %d is an 8272A\n", current_fdc);
+		pr_info("FDC %d is an 8272A\n", fdc);
 		return FDC_8272A;	/* 8272a/765 don't know DUMPREGS */
 	}
 	if (r != 10) {
 		pr_info("FDC %d init: DUMPREGS: unexpected return of %d bytes.\n",
-			current_fdc, r);
+			fdc, r);
 		return FDC_UNKNOWN;
 	}
 
-	if (!fdc_configure()) {
-		pr_info("FDC %d is an 82072\n", current_fdc);
+	if (!fdc_configure(fdc)) {
+		pr_info("FDC %d is an 82072\n", fdc);
 		return FDC_82072;	/* 82072 doesn't know CONFIGURE */
 	}
 
-	output_byte(FD_PERPENDICULAR);
-	if (need_more_output() == MORE_OUTPUT) {
-		output_byte(0);
+	output_byte(fdc, FD_PERPENDICULAR);
+	if (need_more_output(fdc) == MORE_OUTPUT) {
+		output_byte(fdc, 0);
 	} else {
-		pr_info("FDC %d is an 82072A\n", current_fdc);
+		pr_info("FDC %d is an 82072A\n", fdc);
 		return FDC_82072A;	/* 82072A as found on Sparcs. */
 	}
 
-	output_byte(FD_UNLOCK);
-	r = result();
+	output_byte(fdc, FD_UNLOCK);
+	r = result(fdc);
 	if ((r == 1) && (reply_buffer[0] == 0x80)) {
-		pr_info("FDC %d is a pre-1991 82077\n", current_fdc);
+		pr_info("FDC %d is a pre-1991 82077\n", fdc);
 		return FDC_82077_ORIG;	/* Pre-1991 82077, doesn't know
 					 * LOCK/UNLOCK */
 	}
 	if ((r != 1) || (reply_buffer[0] != 0x00)) {
 		pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n",
-			current_fdc, r);
+			fdc, r);
 		return FDC_UNKNOWN;
 	}
-	output_byte(FD_PARTID);
-	r = result();
+	output_byte(fdc, FD_PARTID);
+	r = result(fdc);
 	if (r != 1) {
 		pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n",
-			current_fdc, r);
+			fdc, r);
 		return FDC_UNKNOWN;
 	}
 	if (reply_buffer[0] == 0x80) {
-		pr_info("FDC %d is a post-1991 82077\n", current_fdc);
+		pr_info("FDC %d is a post-1991 82077\n", fdc);
 		return FDC_82077;	/* Revised 82077AA passes all the tests */
 	}
 	switch (reply_buffer[0] >> 5) {
 	case 0x0:
 		/* Either a 82078-1 or a 82078SL running at 5Volt */
-		pr_info("FDC %d is an 82078.\n", current_fdc);
+		pr_info("FDC %d is an 82078.\n", fdc);
 		return FDC_82078;
 	case 0x1:
-		pr_info("FDC %d is a 44pin 82078\n", current_fdc);
+		pr_info("FDC %d is a 44pin 82078\n", fdc);
 		return FDC_82078;
 	case 0x2:
-		pr_info("FDC %d is a S82078B\n", current_fdc);
+		pr_info("FDC %d is a S82078B\n", fdc);
 		return FDC_S82078B;
 	case 0x3:
-		pr_info("FDC %d is a National Semiconductor PC87306\n", current_fdc);
+		pr_info("FDC %d is a National Semiconductor PC87306\n", fdc);
 		return FDC_87306;
 	default:
 		pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n",
-			current_fdc, reply_buffer[0] >> 5);
+			fdc, reply_buffer[0] >> 5);
 		return FDC_82078_UNKN;
 	}
 }				/* get_fdc_version */
@@ -4534,11 +4545,13 @@ static void floppy_device_release(struct device *dev)
 static int floppy_resume(struct device *dev)
 {
 	int fdc;
+	int saved_drive;
 
+	saved_drive = current_drive;
 	for (fdc = 0; fdc < N_FDC; fdc++)
 		if (fdc_state[fdc].address != -1)
-			user_reset_fdc(-1, FD_RESET_ALWAYS, false);
-
+			user_reset_fdc(REVDRIVE(fdc, 0), FD_RESET_ALWAYS, false);
+	set_fdc(saved_drive);
 	return 0;
 }
 
@@ -4646,16 +4659,15 @@ static int __init do_floppy_init(void)
 	config_types();
 
 	for (i = 0; i < N_FDC; i++) {
-		current_fdc = i;
-		memset(&fdc_state[current_fdc], 0, sizeof(*fdc_state));
-		fdc_state[current_fdc].dtr = -1;
-		fdc_state[current_fdc].dor = 0x4;
+		memset(&fdc_state[i], 0, sizeof(*fdc_state));
+		fdc_state[i].dtr = -1;
+		fdc_state[i].dor = 0x4;
 #if defined(__sparc__) || defined(__mc68000__)
 	/*sparcs/sun3x don't have a DOR reset which we can fall back on to */
 #ifdef __mc68000__
 		if (MACH_IS_SUN3X)
 #endif
-			fdc_state[current_fdc].version = FDC_82072A;
+			fdc_state[i].version = FDC_82072A;
 #endif
 	}
 
@@ -4697,30 +4709,29 @@ static int __init do_floppy_init(void)
 	msleep(10);
 
 	for (i = 0; i < N_FDC; i++) {
-		current_fdc = i;
-		fdc_state[current_fdc].driver_version = FD_DRIVER_VERSION;
+		fdc_state[i].driver_version = FD_DRIVER_VERSION;
 		for (unit = 0; unit < 4; unit++)
-			fdc_state[current_fdc].track[unit] = 0;
-		if (fdc_state[current_fdc].address == -1)
+			fdc_state[i].track[unit] = 0;
+		if (fdc_state[i].address == -1)
 			continue;
-		fdc_state[current_fdc].rawcmd = 2;
-		if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) {
+		fdc_state[i].rawcmd = 2;
+		if (user_reset_fdc(REVDRIVE(i, 0), FD_RESET_ALWAYS, false)) {
 			/* free ioports reserved by floppy_grab_irq_and_dma() */
-			floppy_release_regions(current_fdc);
-			fdc_state[current_fdc].address = -1;
-			fdc_state[current_fdc].version = FDC_NONE;
+			floppy_release_regions(i);
+			fdc_state[i].address = -1;
+			fdc_state[i].version = FDC_NONE;
 			continue;
 		}
 		/* Try to determine the floppy controller type */
-		fdc_state[current_fdc].version = get_fdc_version();
-		if (fdc_state[current_fdc].version == FDC_NONE) {
+		fdc_state[i].version = get_fdc_version(i);
+		if (fdc_state[i].version == FDC_NONE) {
 			/* free ioports reserved by floppy_grab_irq_and_dma() */
-			floppy_release_regions(current_fdc);
-			fdc_state[current_fdc].address = -1;
+			floppy_release_regions(i);
+			fdc_state[i].address = -1;
 			continue;
 		}
 		if (can_use_virtual_dma == 2 &&
-		    fdc_state[current_fdc].version < FDC_82072A)
+		    fdc_state[i].version < FDC_82072A)
 			can_use_virtual_dma = 0;
 
 		have_no_fdc = 0;
@@ -4728,7 +4739,7 @@ static int __init do_floppy_init(void)
 		 * properly, so force a reset for the standard FDC clones,
 		 * to avoid interrupt garbage.
 		 */
-		user_reset_fdc(-1, FD_RESET_ALWAYS, false);
+		user_reset_fdc(REVDRIVE(i, 0), FD_RESET_ALWAYS, false);
 	}
 	current_fdc = 0;
 	cancel_delayed_work(&fd_timeout);
@@ -4855,6 +4866,8 @@ static void floppy_release_regions(int fdc)
 
 static int floppy_grab_irq_and_dma(void)
 {
+	int fdc;
+
 	if (atomic_inc_return(&usage_count) > 1)
 		return 0;
 
@@ -4882,24 +4895,24 @@ static int floppy_grab_irq_and_dma(void)
 		}
 	}
 
-	for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) {
-		if (fdc_state[current_fdc].address != -1) {
-			if (floppy_request_regions(current_fdc))
+	for (fdc = 0; fdc < N_FDC; fdc++) {
+		if (fdc_state[fdc].address != -1) {
+			if (floppy_request_regions(fdc))
 				goto cleanup;
 		}
 	}
-	for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) {
-		if (fdc_state[current_fdc].address != -1) {
-			reset_fdc_info(1);
-			fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
+	for (fdc = 0; fdc < N_FDC; fdc++) {
+		if (fdc_state[fdc].address != -1) {
+			reset_fdc_info(fdc, 1);
+			fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR);
 		}
 	}
-	current_fdc = 0;
+
 	set_dor(0, ~0, 8);	/* avoid immediate interrupt */
 
-	for (current_fdc = 0; current_fdc < N_FDC; current_fdc++)
-		if (fdc_state[current_fdc].address != -1)
-			fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
+	for (fdc = 0; fdc < N_FDC; fdc++)
+		if (fdc_state[fdc].address != -1)
+			fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR);
 	/*
 	 * The driver will try and free resources and relies on us
 	 * to know if they were allocated or not.
@@ -4910,15 +4923,16 @@ static int floppy_grab_irq_and_dma(void)
 cleanup:
 	fd_free_irq();
 	fd_free_dma();
-	while (--current_fdc >= 0)
-		floppy_release_regions(current_fdc);
+	while (--fdc >= 0)
+		floppy_release_regions(fdc);
+	current_fdc = 0;
 	atomic_dec(&usage_count);
 	return -1;
 }
 
 static void floppy_release_irq_and_dma(void)
 {
-	int old_fdc;
+	int fdc;
 #ifndef __sparc__
 	int drive;
 #endif
@@ -4959,11 +4973,9 @@ static void floppy_release_irq_and_dma(void)
 		pr_info("auxiliary floppy timer still active\n");
 	if (work_pending(&floppy_work))
 		pr_info("work still pending\n");
-	old_fdc = current_fdc;
-	for (current_fdc = 0; current_fdc < N_FDC; current_fdc++)
-		if (fdc_state[current_fdc].address != -1)
-			floppy_release_regions(current_fdc);
-	current_fdc = old_fdc;
+	for (fdc = 0; fdc < N_FDC; fdc++)
+		if (fdc_state[fdc].address != -1)
+			floppy_release_regions(fdc);
 }
 
 #ifdef MODULE
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index da693e6a834e..2e96d8b8758b 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -228,26 +228,36 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
 		blk_mq_unfreeze_queue(lo->lo_queue);
 }
 
+/**
+ * loop_validate_block_size() - validates the passed in block size
+ * @bsize: size to validate
+ */
 static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+loop_validate_block_size(unsigned short bsize)
 {
-	loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
-	sector_t x = (sector_t)size;
-	struct block_device *bdev = lo->lo_device;
+	if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
+		return -EINVAL;
 
-	if (unlikely((loff_t)x != size))
-		return -EFBIG;
-	if (lo->lo_offset != offset)
-		lo->lo_offset = offset;
-	if (lo->lo_sizelimit != sizelimit)
-		lo->lo_sizelimit = sizelimit;
-	set_capacity(lo->lo_disk, x);
-	bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
-	/* let user-space know about the new size */
-	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
 	return 0;
 }
 
+/**
+ * loop_set_size() - sets device size and notifies userspace
+ * @lo: struct loop_device to set the size for
+ * @size: new size of the loop device
+ *
+ * Callers must validate that the size passed into this function fits into
+ * a sector_t, eg using loop_validate_size()
+ */
+static void loop_set_size(struct loop_device *lo, loff_t size)
+{
+	struct block_device *bdev = lo->lo_device;
+
+	bd_set_size(bdev, size << SECTOR_SHIFT);
+
+	set_capacity_revalidate_and_notify(lo->lo_disk, size, false);
+}
+
 static inline int
 lo_do_transfer(struct loop_device *lo, int cmd,
 	       struct page *rpage, unsigned roffs,
@@ -634,8 +644,8 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
 
 static inline void loop_update_dio(struct loop_device *lo)
 {
-	__loop_update_dio(lo, io_is_direct(lo->lo_backing_file) |
-			lo->use_dio);
+	__loop_update_dio(lo, (lo->lo_backing_file->f_flags & O_DIRECT) |
+				lo->use_dio);
 }
 
 static void loop_reread_partitions(struct loop_device *lo,
@@ -919,7 +929,7 @@ static void loop_unprepare_queue(struct loop_device *lo)
 
 static int loop_kthread_worker_fn(void *worker_ptr)
 {
-	current->flags |= PF_LESS_THROTTLE | PF_MEMALLOC_NOIO;
+	current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
 	return kthread_worker_fn(worker_ptr);
 }
 
@@ -952,23 +962,125 @@ static void loop_update_rotational(struct loop_device *lo)
 		blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
 }
 
-static int loop_set_fd(struct loop_device *lo, fmode_t mode,
-		       struct block_device *bdev, unsigned int arg)
+static int
+loop_release_xfer(struct loop_device *lo)
+{
+	int err = 0;
+	struct loop_func_table *xfer = lo->lo_encryption;
+
+	if (xfer) {
+		if (xfer->release)
+			err = xfer->release(lo);
+		lo->transfer = NULL;
+		lo->lo_encryption = NULL;
+		module_put(xfer->owner);
+	}
+	return err;
+}
+
+static int
+loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
+	       const struct loop_info64 *i)
+{
+	int err = 0;
+
+	if (xfer) {
+		struct module *owner = xfer->owner;
+
+		if (!try_module_get(owner))
+			return -EINVAL;
+		if (xfer->init)
+			err = xfer->init(lo, i);
+		if (err)
+			module_put(owner);
+		else
+			lo->lo_encryption = xfer;
+	}
+	return err;
+}
+
+/**
+ * loop_set_status_from_info - configure device from loop_info
+ * @lo: struct loop_device to configure
+ * @info: struct loop_info64 to configure the device with
+ *
+ * Configures the loop device parameters according to the passed
+ * in loop_info64 configuration.
+ */
+static int
+loop_set_status_from_info(struct loop_device *lo,
+			  const struct loop_info64 *info)
+{
+	int err;
+	struct loop_func_table *xfer;
+	kuid_t uid = current_uid();
+
+	if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
+		return -EINVAL;
+
+	err = loop_release_xfer(lo);
+	if (err)
+		return err;
+
+	if (info->lo_encrypt_type) {
+		unsigned int type = info->lo_encrypt_type;
+
+		if (type >= MAX_LO_CRYPT)
+			return -EINVAL;
+		xfer = xfer_funcs[type];
+		if (xfer == NULL)
+			return -EINVAL;
+	} else
+		xfer = NULL;
+
+	err = loop_init_xfer(lo, xfer, info);
+	if (err)
+		return err;
+
+	lo->lo_offset = info->lo_offset;
+	lo->lo_sizelimit = info->lo_sizelimit;
+	memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
+	memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
+	lo->lo_file_name[LO_NAME_SIZE-1] = 0;
+	lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
+
+	if (!xfer)
+		xfer = &none_funcs;
+	lo->transfer = xfer->transfer;
+	lo->ioctl = xfer->ioctl;
+
+	lo->lo_flags = info->lo_flags;
+
+	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
+	lo->lo_init[0] = info->lo_init[0];
+	lo->lo_init[1] = info->lo_init[1];
+	if (info->lo_encrypt_key_size) {
+		memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
+		       info->lo_encrypt_key_size);
+		lo->lo_key_owner = uid;
+	}
+
+	return 0;
+}
+
+static int loop_configure(struct loop_device *lo, fmode_t mode,
+			  struct block_device *bdev,
+			  const struct loop_config *config)
 {
 	struct file	*file;
 	struct inode	*inode;
 	struct address_space *mapping;
 	struct block_device *claimed_bdev = NULL;
-	int		lo_flags = 0;
 	int		error;
 	loff_t		size;
 	bool		partscan;
+	unsigned short  bsize;
 
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
 
 	error = -EBADF;
-	file = fget(arg);
+	file = fget(config->fd);
 	if (!file)
 		goto out;
 
@@ -977,7 +1089,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	 * here to avoid changing device under exclusive owner.
 	 */
 	if (!(mode & FMODE_EXCL)) {
-		claimed_bdev = bd_start_claiming(bdev, loop_set_fd);
+		claimed_bdev = bd_start_claiming(bdev, loop_configure);
 		if (IS_ERR(claimed_bdev)) {
 			error = PTR_ERR(claimed_bdev);
 			goto out_putf;
@@ -999,52 +1111,58 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	mapping = file->f_mapping;
 	inode = mapping->host;
 
+	size = get_loop_size(lo, file);
+
+	if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
+		error = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (config->block_size) {
+		error = loop_validate_block_size(config->block_size);
+		if (error)
+			goto out_unlock;
+	}
+
+	error = loop_set_status_from_info(lo, &config->info);
+	if (error)
+		goto out_unlock;
+
 	if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
 	    !file->f_op->write_iter)
-		lo_flags |= LO_FLAGS_READ_ONLY;
+		lo->lo_flags |= LO_FLAGS_READ_ONLY;
 
-	error = -EFBIG;
-	size = get_loop_size(lo, file);
-	if ((loff_t)(sector_t)size != size)
-		goto out_unlock;
 	error = loop_prepare_queue(lo);
 	if (error)
 		goto out_unlock;
 
-	error = 0;
-
-	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
+	set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
-	lo->use_dio = false;
+	lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
 	lo->lo_device = bdev;
-	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
-	lo->transfer = NULL;
-	lo->ioctl = NULL;
-	lo->lo_sizelimit = 0;
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
-	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
+	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
 		blk_queue_write_cache(lo->lo_queue, true, false);
 
-	if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) {
+	if (config->block_size)
+		bsize = config->block_size;
+	else if ((lo->lo_backing_file->f_flags & O_DIRECT) && inode->i_sb->s_bdev)
 		/* In case of direct I/O, match underlying block size */
-		unsigned short bsize = bdev_logical_block_size(
-			inode->i_sb->s_bdev);
+		bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
+	else
+		bsize = 512;
 
-		blk_queue_logical_block_size(lo->lo_queue, bsize);
-		blk_queue_physical_block_size(lo->lo_queue, bsize);
-		blk_queue_io_min(lo->lo_queue, bsize);
-	}
+	blk_queue_logical_block_size(lo->lo_queue, bsize);
+	blk_queue_physical_block_size(lo->lo_queue, bsize);
+	blk_queue_io_min(lo->lo_queue, bsize);
 
 	loop_update_rotational(lo);
 	loop_update_dio(lo);
-	set_capacity(lo->lo_disk, size);
-	bd_set_size(bdev, size << 9);
 	loop_sysfs_init(lo);
-	/* let user-space know about the new size */
-	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+	loop_set_size(lo, size);
 
 	set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
 		      block_size(inode->i_bdev) : PAGE_SIZE);
@@ -1062,14 +1180,14 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	if (partscan)
 		loop_reread_partitions(lo, bdev);
 	if (claimed_bdev)
-		bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
+		bd_abort_claiming(bdev, claimed_bdev, loop_configure);
 	return 0;
 
 out_unlock:
 	mutex_unlock(&loop_ctl_mutex);
 out_bdev:
 	if (claimed_bdev)
-		bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
+		bd_abort_claiming(bdev, claimed_bdev, loop_configure);
 out_putf:
 	fput(file);
 out:
@@ -1078,43 +1196,6 @@ out:
 	return error;
 }
 
-static int
-loop_release_xfer(struct loop_device *lo)
-{
-	int err = 0;
-	struct loop_func_table *xfer = lo->lo_encryption;
-
-	if (xfer) {
-		if (xfer->release)
-			err = xfer->release(lo);
-		lo->transfer = NULL;
-		lo->lo_encryption = NULL;
-		module_put(xfer->owner);
-	}
-	return err;
-}
-
-static int
-loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
-	       const struct loop_info64 *i)
-{
-	int err = 0;
-
-	if (xfer) {
-		struct module *owner = xfer->owner;
-
-		if (!try_module_get(owner))
-			return -EINVAL;
-		if (xfer->init)
-			err = xfer->init(lo, i);
-		if (err)
-			module_put(owner);
-		else
-			lo->lo_encryption = xfer;
-	}
-	return err;
-}
-
 static int __loop_clr_fd(struct loop_device *lo, bool release)
 {
 	struct file *filp = NULL;
@@ -1263,10 +1344,11 @@ static int
 loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 {
 	int err;
-	struct loop_func_table *xfer;
-	kuid_t uid = current_uid();
 	struct block_device *bdev;
+	kuid_t uid = current_uid();
+	int prev_lo_flags;
 	bool partscan = false;
+	bool size_changed = false;
 
 	err = mutex_lock_killable(&loop_ctl_mutex);
 	if (err)
@@ -1281,13 +1363,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 		err = -ENXIO;
 		goto out_unlock;
 	}
-	if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
 
 	if (lo->lo_offset != info->lo_offset ||
 	    lo->lo_sizelimit != info->lo_sizelimit) {
+		size_changed = true;
 		sync_blockdev(lo->lo_device);
 		kill_bdev(lo->lo_device);
 	}
@@ -1295,79 +1374,44 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	/* I/O need to be drained during transfer transition */
 	blk_mq_freeze_queue(lo->lo_queue);
 
-	err = loop_release_xfer(lo);
-	if (err)
+	if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
+		/* If any pages were dirtied after kill_bdev(), try again */
+		err = -EAGAIN;
+		pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
+			__func__, lo->lo_number, lo->lo_file_name,
+			lo->lo_device->bd_inode->i_mapping->nrpages);
 		goto out_unfreeze;
+	}
 
-	if (info->lo_encrypt_type) {
-		unsigned int type = info->lo_encrypt_type;
-
-		if (type >= MAX_LO_CRYPT) {
-			err = -EINVAL;
-			goto out_unfreeze;
-		}
-		xfer = xfer_funcs[type];
-		if (xfer == NULL) {
-			err = -EINVAL;
-			goto out_unfreeze;
-		}
-	} else
-		xfer = NULL;
+	prev_lo_flags = lo->lo_flags;
 
-	err = loop_init_xfer(lo, xfer, info);
+	err = loop_set_status_from_info(lo, info);
 	if (err)
 		goto out_unfreeze;
 
-	if (lo->lo_offset != info->lo_offset ||
-	    lo->lo_sizelimit != info->lo_sizelimit) {
-		/* kill_bdev should have truncated all the pages */
-		if (lo->lo_device->bd_inode->i_mapping->nrpages) {
-			err = -EAGAIN;
-			pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
-				__func__, lo->lo_number, lo->lo_file_name,
-				lo->lo_device->bd_inode->i_mapping->nrpages);
-			goto out_unfreeze;
-		}
-		if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
-			err = -EFBIG;
-			goto out_unfreeze;
-		}
+	/* Mask out flags that can't be set using LOOP_SET_STATUS. */
+	lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS;
+	/* For those flags, use the previous values instead */
+	lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS;
+	/* For flags that can't be cleared, use previous values too */
+	lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_CLEARABLE_FLAGS;
+
+	if (size_changed) {
+		loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
+					   lo->lo_backing_file);
+		loop_set_size(lo, new_size);
 	}
 
 	loop_config_discard(lo);
 
-	memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
-	memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
-	lo->lo_file_name[LO_NAME_SIZE-1] = 0;
-	lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
-
-	if (!xfer)
-		xfer = &none_funcs;
-	lo->transfer = xfer->transfer;
-	lo->ioctl = xfer->ioctl;
-
-	if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
-	     (info->lo_flags & LO_FLAGS_AUTOCLEAR))
-		lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
-
-	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
-	lo->lo_init[0] = info->lo_init[0];
-	lo->lo_init[1] = info->lo_init[1];
-	if (info->lo_encrypt_key_size) {
-		memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
-		       info->lo_encrypt_key_size);
-		lo->lo_key_owner = uid;
-	}
-
 	/* update dio if lo_offset or transfer is changed */
 	__loop_update_dio(lo, lo->use_dio);
 
 out_unfreeze:
 	blk_mq_unfreeze_queue(lo->lo_queue);
 
-	if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
-	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
-		lo->lo_flags |= LO_FLAGS_PARTSCAN;
+	if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) &&
+	     !(prev_lo_flags & LO_FLAGS_PARTSCAN)) {
 		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
 		bdev = lo->lo_device;
 		partscan = true;
@@ -1531,10 +1575,15 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
 
 static int loop_set_capacity(struct loop_device *lo)
 {
+	loff_t size;
+
 	if (unlikely(lo->lo_state != Lo_bound))
 		return -ENXIO;
 
-	return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+	size = get_loop_size(lo, lo->lo_backing_file);
+	loop_set_size(lo, size);
+
+	return 0;
 }
 
 static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1558,8 +1607,9 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
 
-	if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
-		return -EINVAL;
+	err = loop_validate_block_size(arg);
+	if (err)
+		return err;
 
 	if (lo->lo_queue->limits.logical_block_size == arg)
 		return 0;
@@ -1617,11 +1667,31 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 	unsigned int cmd, unsigned long arg)
 {
 	struct loop_device *lo = bdev->bd_disk->private_data;
+	void __user *argp = (void __user *) arg;
 	int err;
 
 	switch (cmd) {
-	case LOOP_SET_FD:
-		return loop_set_fd(lo, mode, bdev, arg);
+	case LOOP_SET_FD: {
+		/*
+		 * Legacy case - pass in a zeroed out struct loop_config with
+		 * only the file descriptor set , which corresponds with the
+		 * default parameters we'd have used otherwise.
+		 */
+		struct loop_config config;
+
+		memset(&config, 0, sizeof(config));
+		config.fd = arg;
+
+		return loop_configure(lo, mode, bdev, &config);
+	}
+	case LOOP_CONFIGURE: {
+		struct loop_config config;
+
+		if (copy_from_user(&config, argp, sizeof(config)))
+			return -EFAULT;
+
+		return loop_configure(lo, mode, bdev, &config);
+	}
 	case LOOP_CHANGE_FD:
 		return loop_change_fd(lo, bdev, arg);
 	case LOOP_CLR_FD:
@@ -1629,21 +1699,19 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 	case LOOP_SET_STATUS:
 		err = -EPERM;
 		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
-			err = loop_set_status_old(lo,
-					(struct loop_info __user *)arg);
+			err = loop_set_status_old(lo, argp);
 		}
 		break;
 	case LOOP_GET_STATUS:
-		return loop_get_status_old(lo, (struct loop_info __user *) arg);
+		return loop_get_status_old(lo, argp);
 	case LOOP_SET_STATUS64:
 		err = -EPERM;
 		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
-			err = loop_set_status64(lo,
-					(struct loop_info64 __user *) arg);
+			err = loop_set_status64(lo, argp);
 		}
 		break;
 	case LOOP_GET_STATUS64:
-		return loop_get_status64(lo, (struct loop_info64 __user *) arg);
+		return loop_get_status64(lo, argp);
 	case LOOP_SET_CAPACITY:
 	case LOOP_SET_DIRECT_IO:
 	case LOOP_SET_BLOCK_SIZE:
@@ -1795,6 +1863,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 	case LOOP_CLR_FD:
 	case LOOP_GET_STATUS64:
 	case LOOP_SET_STATUS64:
+	case LOOP_CONFIGURE:
 		arg = (unsigned long) compat_ptr(arg);
 		/* fall through */
 	case LOOP_SET_FD:
@@ -2037,7 +2106,7 @@ static int loop_add(struct loop_device **l, int i)
 	lo->tag_set.queue_depth = 128;
 	lo->tag_set.numa_node = NUMA_NO_NODE;
 	lo->tag_set.cmd_size = sizeof(struct loop_cmd);
-	lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+	lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
 	lo->tag_set.driver_data = lo;
 
 	err = blk_mq_alloc_tag_set(&lo->tag_set);
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index ce9e33603a4d..87b31f9ca362 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1250,8 +1250,34 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
 	return errno_to_blk_status(err);
 }
 
+static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
+{
+	struct nullb_device *dev = cmd->nq->dev;
+	struct bio *bio;
+
+	if (dev->memory_backed)
+		return;
+
+	if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
+		zero_fill_bio(cmd->bio);
+	} else if (req_op(cmd->rq) == REQ_OP_READ) {
+		__rq_for_each_bio(bio, cmd->rq)
+			zero_fill_bio(bio);
+	}
+}
+
 static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
 {
+	/*
+	 * Since root privileges are required to configure the null_blk
+	 * driver, it is fine that this driver does not initialize the
+	 * data buffers of read commands. Zero-initialize these buffers
+	 * anyway if KMSAN is enabled to prevent that KMSAN complains
+	 * about null_blk not initializing read data buffers.
+	 */
+	if (IS_ENABLED(CONFIG_KMSAN))
+		nullb_zero_read_cmd_buffer(cmd);
+
 	/* Complete IO by inline, softirq or timer */
 	switch (cmd->nq->dev->irqmode) {
 	case NULL_IRQ_SOFTIRQ:
@@ -1397,7 +1423,7 @@ static bool should_requeue_request(struct request *rq)
 static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
 {
 	pr_info("rq %p timed out\n", rq);
-	blk_mq_complete_request(rq);
+	blk_mq_force_complete_rq(rq);
 	return BLK_EH_DONE;
 }
 
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index ed5458f2d367..cc47606d8ffe 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -74,13 +74,20 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
 
 int null_register_zoned_dev(struct nullb *nullb)
 {
+	struct nullb_device *dev = nullb->dev;
 	struct request_queue *q = nullb->q;
 
-	if (queue_is_mq(q))
-		return blk_revalidate_disk_zones(nullb->disk);
+	if (queue_is_mq(q)) {
+		int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
+
+		if (ret)
+			return ret;
+	} else {
+		blk_queue_chunk_sectors(q, dev->zone_size_sects);
+		q->nr_zones = blkdev_nr_zones(nullb->disk);
+	}
 
-	blk_queue_chunk_sectors(q, nullb->dev->zone_size_sects);
-	q->nr_zones = blkdev_nr_zones(nullb->disk);
+	blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
 
 	return 0;
 }
@@ -142,7 +149,7 @@ size_t null_zone_valid_read_len(struct nullb *nullb,
 }
 
 static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
-		     unsigned int nr_sectors)
+				    unsigned int nr_sectors, bool append)
 {
 	struct nullb_device *dev = cmd->nq->dev;
 	unsigned int zno = null_zone_no(dev, sector);
@@ -162,9 +169,21 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
 	case BLK_ZONE_COND_IMP_OPEN:
 	case BLK_ZONE_COND_EXP_OPEN:
 	case BLK_ZONE_COND_CLOSED:
-		/* Writes must be at the write pointer position */
-		if (sector != zone->wp)
+		/*
+		 * Regular writes must be at the write pointer position.
+		 * Zone append writes are automatically issued at the write
+		 * pointer and the position returned using the request or BIO
+		 * sector.
+		 */
+		if (append) {
+			sector = zone->wp;
+			if (cmd->bio)
+				cmd->bio->bi_iter.bi_sector = sector;
+			else
+				cmd->rq->__sector = sector;
+		} else if (sector != zone->wp) {
 			return BLK_STS_IOERR;
+		}
 
 		if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
 			zone->cond = BLK_ZONE_COND_IMP_OPEN;
@@ -246,7 +265,9 @@ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
 {
 	switch (op) {
 	case REQ_OP_WRITE:
-		return null_zone_write(cmd, sector, nr_sectors);
+		return null_zone_write(cmd, sector, nr_sectors, false);
+	case REQ_OP_ZONE_APPEND:
+		return null_zone_write(cmd, sector, nr_sectors, true);
 	case REQ_OP_ZONE_RESET:
 	case REQ_OP_ZONE_RESET_ALL:
 	case REQ_OP_ZONE_OPEN:
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index cda5cf917e9a..5124eca90e83 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -1032,7 +1032,7 @@ static int __init pcd_init(void)
 
 	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
 		if (cd->present) {
-			register_cdrom(&cd->info);
+			register_cdrom(cd->disk, &cd->info);
 			cd->disk->private_data = cd;
 			add_disk(cd->disk);
 		}
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 8ffa8260dcaf..3ba07ab30c84 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -96,20 +96,6 @@ static const struct block_device_operations rsxx_fops = {
 	.ioctl		= rsxx_blkdev_ioctl,
 };
 
-static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio)
-{
-	generic_start_io_acct(card->queue, bio_op(bio), bio_sectors(bio),
-			     &card->gendisk->part0);
-}
-
-static void disk_stats_complete(struct rsxx_cardinfo *card,
-				struct bio *bio,
-				unsigned long start_time)
-{
-	generic_end_io_acct(card->queue, bio_op(bio),
-			    &card->gendisk->part0, start_time);
-}
-
 static void bio_dma_done_cb(struct rsxx_cardinfo *card,
 			    void *cb_data,
 			    unsigned int error)
@@ -121,7 +107,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
 
 	if (atomic_dec_and_test(&meta->pending_dmas)) {
 		if (!card->eeh_state && card->gendisk)
-			disk_stats_complete(card, meta->bio, meta->start_time);
+			bio_end_io_acct(meta->bio, meta->start_time);
 
 		if (atomic_read(&meta->error))
 			bio_io_error(meta->bio);
@@ -167,10 +153,9 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 	bio_meta->bio = bio;
 	atomic_set(&bio_meta->error, 0);
 	atomic_set(&bio_meta->pending_dmas, 0);
-	bio_meta->start_time = jiffies;
 
 	if (!unlikely(card->halt))
-		disk_stats_start(card, bio);
+		bio_meta->start_time = bio_start_io_acct(bio);
 
 	dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
 		 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 4c297f69171d..dd34504382e5 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -327,7 +327,7 @@ static inline void swim_motor(struct swim __iomem *base,
 			swim_select(base, RELAX);
 			if (swim_readbit(base, MOTOR_ON))
 				break;
-			current->state = TASK_INTERRUPTIBLE;
+			set_current_state(TASK_INTERRUPTIBLE);
 			schedule_timeout(1);
 		}
 	} else if (action == OFF) {
@@ -346,7 +346,7 @@ static inline void swim_eject(struct swim __iomem *base)
 		swim_select(base, RELAX);
 		if (!swim_readbit(base, DISK_IN))
 			break;
-		current->state = TASK_INTERRUPTIBLE;
+		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(1);
 	}
 	swim_select(base, RELAX);
@@ -370,7 +370,7 @@ static inline int swim_step(struct swim __iomem *base)
 
 	for (wait = 0; wait < HZ; wait++) {
 
-		current->state = TASK_INTERRUPTIBLE;
+		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(1);
 
 		swim_select(base, RELAX);
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 1a8564a79d8d..5ee8e3fae551 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -37,19 +37,16 @@ static void zcomp_strm_free(struct zcomp_strm *zstrm)
 	if (!IS_ERR_OR_NULL(zstrm->tfm))
 		crypto_free_comp(zstrm->tfm);
 	free_pages((unsigned long)zstrm->buffer, 1);
-	kfree(zstrm);
+	zstrm->tfm = NULL;
+	zstrm->buffer = NULL;
 }
 
 /*
- * allocate new zcomp_strm structure with ->tfm initialized by
- * backend, return NULL on error
+ * Initialize zcomp_strm structure with ->tfm initialized by backend, and
+ * ->buffer. Return a negative value on error.
  */
-static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
+static int zcomp_strm_init(struct zcomp_strm *zstrm, struct zcomp *comp)
 {
-	struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL);
-	if (!zstrm)
-		return NULL;
-
 	zstrm->tfm = crypto_alloc_comp(comp->name, 0, 0);
 	/*
 	 * allocate 2 pages. 1 for compressed data, plus 1 extra for the
@@ -58,9 +55,9 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
 	zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
 	if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) {
 		zcomp_strm_free(zstrm);
-		zstrm = NULL;
+		return -ENOMEM;
 	}
-	return zstrm;
+	return 0;
 }
 
 bool zcomp_available_algorithm(const char *comp)
@@ -113,12 +110,13 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
 
 struct zcomp_strm *zcomp_stream_get(struct zcomp *comp)
 {
-	return *get_cpu_ptr(comp->stream);
+	local_lock(&comp->stream->lock);
+	return this_cpu_ptr(comp->stream);
 }
 
 void zcomp_stream_put(struct zcomp *comp)
 {
-	put_cpu_ptr(comp->stream);
+	local_unlock(&comp->stream->lock);
 }
 
 int zcomp_compress(struct zcomp_strm *zstrm,
@@ -159,17 +157,15 @@ int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
 {
 	struct zcomp *comp = hlist_entry(node, struct zcomp, node);
 	struct zcomp_strm *zstrm;
+	int ret;
 
-	if (WARN_ON(*per_cpu_ptr(comp->stream, cpu)))
-		return 0;
+	zstrm = per_cpu_ptr(comp->stream, cpu);
+	local_lock_init(&zstrm->lock);
 
-	zstrm = zcomp_strm_alloc(comp);
-	if (IS_ERR_OR_NULL(zstrm)) {
+	ret = zcomp_strm_init(zstrm, comp);
+	if (ret)
 		pr_err("Can't allocate a compression stream\n");
-		return -ENOMEM;
-	}
-	*per_cpu_ptr(comp->stream, cpu) = zstrm;
-	return 0;
+	return ret;
 }
 
 int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node)
@@ -177,10 +173,8 @@ int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node)
 	struct zcomp *comp = hlist_entry(node, struct zcomp, node);
 	struct zcomp_strm *zstrm;
 
-	zstrm = *per_cpu_ptr(comp->stream, cpu);
-	if (!IS_ERR_OR_NULL(zstrm))
-		zcomp_strm_free(zstrm);
-	*per_cpu_ptr(comp->stream, cpu) = NULL;
+	zstrm = per_cpu_ptr(comp->stream, cpu);
+	zcomp_strm_free(zstrm);
 	return 0;
 }
 
@@ -188,7 +182,7 @@ static int zcomp_init(struct zcomp *comp)
 {
 	int ret;
 
-	comp->stream = alloc_percpu(struct zcomp_strm *);
+	comp->stream = alloc_percpu(struct zcomp_strm);
 	if (!comp->stream)
 		return -ENOMEM;
 
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index 1806475b919d..40f6420f4b2e 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -5,8 +5,11 @@
 
 #ifndef _ZCOMP_H_
 #define _ZCOMP_H_
+#include <linux/local_lock.h>
 
 struct zcomp_strm {
+	/* The members ->buffer and ->tfm are protected by ->lock. */
+	local_lock_t lock;
 	/* compression/decompression buffer */
 	void *buffer;
 	struct crypto_comp *tfm;
@@ -14,7 +17,7 @@ struct zcomp_strm {
 
 /* dynamic per-device compression frontend */
 struct zcomp {
-	struct zcomp_strm * __percpu *stream;
+	struct zcomp_strm __percpu *stream;
 	const char *name;
 	struct hlist_node node;
 };
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index ebb234f36909..6e2ad90b17a3 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1510,13 +1510,8 @@ static void zram_bio_discard(struct zram *zram, u32 index,
 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 			int offset, unsigned int op, struct bio *bio)
 {
-	unsigned long start_time = jiffies;
-	struct request_queue *q = zram->disk->queue;
 	int ret;
 
-	generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT,
-			&zram->disk->part0);
-
 	if (!op_is_write(op)) {
 		atomic64_inc(&zram->stats.num_reads);
 		ret = zram_bvec_read(zram, bvec, index, offset, bio);
@@ -1526,8 +1521,6 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 		ret = zram_bvec_write(zram, bvec, index, offset, bio);
 	}
 
-	generic_end_io_acct(q, op, &zram->disk->part0, start_time);
-
 	zram_slot_lock(zram, index);
 	zram_accessed(zram, index);
 	zram_slot_unlock(zram, index);
@@ -1548,6 +1541,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 	u32 index;
 	struct bio_vec bvec;
 	struct bvec_iter iter;
+	unsigned long start_time;
 
 	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
 	offset = (bio->bi_iter.bi_sector &
@@ -1563,6 +1557,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 		break;
 	}
 
+	start_time = bio_start_io_acct(bio);
 	bio_for_each_segment(bvec, bio, iter) {
 		struct bio_vec bv = bvec;
 		unsigned int unwritten = bvec.bv_len;
@@ -1571,8 +1566,10 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 			bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
 							unwritten);
 			if (zram_bvec_rw(zram, &bv, index, offset,
-					 bio_op(bio), bio) < 0)
-				goto out;
+					 bio_op(bio), bio) < 0) {
+				bio->bi_status = BLK_STS_IOERR;
+				break;
+			}
 
 			bv.bv_offset += bv.bv_len;
 			unwritten -= bv.bv_len;
@@ -1580,12 +1577,8 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 			update_position(&index, &offset, &bv);
 		} while (unwritten);
 	}
-
+	bio_end_io_acct(bio, start_time);
 	bio_endio(bio);
-	return;
-
-out:
-	bio_io_error(bio);
 }
 
 /*
@@ -1633,6 +1626,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
 	u32 index;
 	struct zram *zram;
 	struct bio_vec bv;
+	unsigned long start_time;
 
 	if (PageTransHuge(page))
 		return -ENOTSUPP;
@@ -1651,7 +1645,9 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
 	bv.bv_len = PAGE_SIZE;
 	bv.bv_offset = 0;
 
+	start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
 	ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
+	disk_end_io_acct(bdev->bd_disk, op, start_time);
 out:
 	/*
 	 * If I/O fails, just return error(ie, non-zero) without
diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index 0f3a020703ab..a296f8526433 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -355,31 +355,31 @@ static const struct btmrvl_sdio_device btmrvl_sdio_sd8997 = {
 
 static const struct sdio_device_id btmrvl_sdio_ids[] = {
 	/* Marvell SD8688 Bluetooth device */
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x9105),
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8688_BT),
 			.driver_data = (unsigned long)&btmrvl_sdio_sd8688 },
 	/* Marvell SD8787 Bluetooth device */
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x911A),
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8787_BT),
 			.driver_data = (unsigned long)&btmrvl_sdio_sd8787 },
 	/* Marvell SD8787 Bluetooth AMP device */
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x911B),
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8787_BT_AMP),
 			.driver_data = (unsigned long)&btmrvl_sdio_sd8787 },
 	/* Marvell SD8797 Bluetooth device */
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x912A),
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8797_BT),
 			.driver_data = (unsigned long)&btmrvl_sdio_sd8797 },
 	/* Marvell SD8887 Bluetooth device */
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x9136),
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8887_BT),
 			.driver_data = (unsigned long)&btmrvl_sdio_sd8887 },
 	/* Marvell SD8897 Bluetooth device */
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x912E),
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8897_BT),
 			.driver_data = (unsigned long)&btmrvl_sdio_sd8897 },
 	/* Marvell SD8977 Bluetooth device */
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x9146),
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8977_BT),
 			.driver_data = (unsigned long)&btmrvl_sdio_sd8977 },
 	/* Marvell SD8987 Bluetooth device */
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x914A),
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8987_BT),
 			.driver_data = (unsigned long)&btmrvl_sdio_sd8987 },
 	/* Marvell SD8997 Bluetooth device */
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x9142),
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8997_BT),
 			.driver_data = (unsigned long)&btmrvl_sdio_sd8997 },
 
 	{ }	/* Terminating entry */
diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c
index 519788c442ca..bff095be2f97 100644
--- a/drivers/bluetooth/btmtksdio.c
+++ b/drivers/bluetooth/btmtksdio.c
@@ -51,9 +51,9 @@ static const struct btmtksdio_data mt7668_data = {
 };
 
 static const struct sdio_device_id btmtksdio_table[] = {
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MEDIATEK, 0x7663),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MEDIATEK, SDIO_DEVICE_ID_MEDIATEK_MT7663),
 	 .driver_data = (kernel_ulong_t)&mt7663_data },
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MEDIATEK, 0x7668),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MEDIATEK, SDIO_DEVICE_ID_MEDIATEK_MT7668),
 	 .driver_data = (kernel_ulong_t)&mt7668_data },
 	{ }	/* Terminating entry */
 };
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index e3bbe108eb54..a0a7ae705de8 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -586,7 +586,7 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
 	return 0;
 }
 
-int register_cdrom(struct cdrom_device_info *cdi)
+int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi)
 {
 	static char banner_printed;
 	const struct cdrom_device_ops *cdo = cdi->ops;
@@ -601,6 +601,9 @@ int register_cdrom(struct cdrom_device_info *cdi)
 		cdrom_sysctl_register();
 	}
 
+	cdi->disk = disk;
+	disk->cdi = cdi;
+
 	ENSURE(cdo, drive_status, CDC_DRIVE_STATUS);
 	if (cdo->check_events == NULL && cdo->media_changed == NULL)
 		WARN_ON_ONCE(cdo->capability & (CDC_MEDIA_CHANGED | CDC_SELECT_DISC));
@@ -2292,37 +2295,46 @@ retry:
 	return cdrom_read_cdda_old(cdi, ubuf, lba, nframes);	
 }
 
-static int cdrom_ioctl_multisession(struct cdrom_device_info *cdi,
-		void __user *argp)
+int cdrom_multisession(struct cdrom_device_info *cdi,
+		struct cdrom_multisession *info)
 {
-	struct cdrom_multisession ms_info;
 	u8 requested_format;
 	int ret;
 
-	cd_dbg(CD_DO_IOCTL, "entering CDROMMULTISESSION\n");
-
 	if (!(cdi->ops->capability & CDC_MULTI_SESSION))
 		return -ENOSYS;
 
-	if (copy_from_user(&ms_info, argp, sizeof(ms_info)))
-		return -EFAULT;
-
-	requested_format = ms_info.addr_format;
+	requested_format = info->addr_format;
 	if (requested_format != CDROM_MSF && requested_format != CDROM_LBA)
 		return -EINVAL;
-	ms_info.addr_format = CDROM_LBA;
+	info->addr_format = CDROM_LBA;
 
-	ret = cdi->ops->get_last_session(cdi, &ms_info);
-	if (ret)
-		return ret;
+	ret = cdi->ops->get_last_session(cdi, info);
+	if (!ret)
+		sanitize_format(&info->addr, &info->addr_format,
+				requested_format);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cdrom_multisession);
 
-	sanitize_format(&ms_info.addr, &ms_info.addr_format, requested_format);
+static int cdrom_ioctl_multisession(struct cdrom_device_info *cdi,
+		void __user *argp)
+{
+	struct cdrom_multisession info;
+	int ret;
+
+	cd_dbg(CD_DO_IOCTL, "entering CDROMMULTISESSION\n");
 
-	if (copy_to_user(argp, &ms_info, sizeof(ms_info)))
+	if (copy_from_user(&info, argp, sizeof(info)))
+		return -EFAULT;
+	ret = cdrom_multisession(cdi, &info);
+	if (ret)
+		return ret;
+	if (copy_to_user(argp, &info, sizeof(info)))
 		return -EFAULT;
 
 	cd_dbg(CD_DO_IOCTL, "CDROMMULTISESSION successful\n");
-	return 0;
+	return ret;
 }
 
 static int cdrom_ioctl_eject(struct cdrom_device_info *cdi)
@@ -2663,32 +2675,37 @@ static int cdrom_ioctl_read_tochdr(struct cdrom_device_info *cdi,
 	return 0;
 }
 
+int cdrom_read_tocentry(struct cdrom_device_info *cdi,
+		struct cdrom_tocentry *entry)
+{
+	u8 requested_format = entry->cdte_format;
+	int ret;
+
+	if (requested_format != CDROM_MSF && requested_format != CDROM_LBA)
+		return -EINVAL;
+
+	/* make interface to low-level uniform */
+	entry->cdte_format = CDROM_MSF;
+	ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, entry);
+	if (!ret)
+		sanitize_format(&entry->cdte_addr, &entry->cdte_format,
+				requested_format);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cdrom_read_tocentry);
+
 static int cdrom_ioctl_read_tocentry(struct cdrom_device_info *cdi,
 		void __user *argp)
 {
 	struct cdrom_tocentry entry;
-	u8 requested_format;
 	int ret;
 
-	/* cd_dbg(CD_DO_IOCTL, "entering CDROMREADTOCENTRY\n"); */
-
 	if (copy_from_user(&entry, argp, sizeof(entry)))
 		return -EFAULT;
-
-	requested_format = entry.cdte_format;
-	if (requested_format != CDROM_MSF && requested_format != CDROM_LBA)
-		return -EINVAL;
-	/* make interface to low-level uniform */
-	entry.cdte_format = CDROM_MSF;
-	ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &entry);
-	if (ret)
-		return ret;
-	sanitize_format(&entry.cdte_addr, &entry.cdte_format, requested_format);
-
-	if (copy_to_user(argp, &entry, sizeof(entry)))
+	ret = cdrom_read_tocentry(cdi, &entry);
+	if (!ret && copy_to_user(argp, &entry, sizeof(entry)))
 		return -EFAULT;
-	/* cd_dbg(CD_DO_IOCTL, "CDROMREADTOCENTRY successful\n"); */
-	return 0;
+	return ret;
 }
 
 static int cdrom_ioctl_play_msf(struct cdrom_device_info *cdi,
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index c51292c2a131..09b0cd292720 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -770,7 +770,7 @@ static int probe_gdrom(struct platform_device *devptr)
 		goto probe_fail_no_disk;
 	}
 	probe_gdrom_setupdisk();
-	if (register_cdrom(gd.cd_info)) {
+	if (register_cdrom(gd.disk, gd.cd_info)) {
 		err = -ENODEV;
 		goto probe_fail_cdrom_register;
 	}
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 66a62d17a3f5..4b34a5195c65 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -846,6 +846,7 @@ void intel_gtt_insert_page(dma_addr_t addr,
 			   unsigned int flags)
 {
 	intel_private.driver->write_entry(addr, pg, flags);
+	readl(intel_private.gtt + pg);
 	if (intel_private.driver->chipset_flush)
 		intel_private.driver->chipset_flush();
 }
@@ -871,7 +872,7 @@ void intel_gtt_insert_sg_entries(struct sg_table *st,
 			j++;
 		}
 	}
-	wmb();
+	readl(intel_private.gtt + j - 1);
 	if (intel_private.driver->chipset_flush)
 		intel_private.driver->chipset_flush();
 }
@@ -1105,6 +1106,7 @@ static void i9xx_cleanup(void)
 
 static void i9xx_chipset_flush(void)
 {
+	wmb();
 	if (intel_private.i9xx_flush_page)
 		writel(1, intel_private.i9xx_flush_page);
 }
@@ -1405,13 +1407,16 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
 
 	dev_info(&bridge_pdev->dev, "Intel %s Chipset\n", intel_gtt_chipsets[i].name);
 
-	mask = intel_private.driver->dma_mask_size;
-	if (pci_set_dma_mask(intel_private.pcidev, DMA_BIT_MASK(mask)))
-		dev_err(&intel_private.pcidev->dev,
-			"set gfx device dma mask %d-bit failed!\n", mask);
-	else
-		pci_set_consistent_dma_mask(intel_private.pcidev,
-					    DMA_BIT_MASK(mask));
+	if (bridge) {
+		mask = intel_private.driver->dma_mask_size;
+		if (pci_set_dma_mask(intel_private.pcidev, DMA_BIT_MASK(mask)))
+			dev_err(&intel_private.pcidev->dev,
+				"set gfx device dma mask %d-bit failed!\n",
+				mask);
+		else
+			pci_set_consistent_dma_mask(intel_private.pcidev,
+						    DMA_BIT_MASK(mask));
+	}
 
 	if (intel_gtt_init() != 0) {
 		intel_gmch_remove();
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 9bc46da8d77a..ac00d78ee9cc 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -474,6 +474,19 @@ config HW_RANDOM_KEYSTONE
 	help
 	  This option enables Keystone's hardware random generator.
 
+config HW_RANDOM_CCTRNG
+	tristate "Arm CryptoCell True Random Number Generator support"
+	depends on HAS_IOMEM && OF
+	help
+	  Say 'Y' to enable the True Random Number Generator driver for the
+	  Arm TrustZone CryptoCell family of processors.
+	  Currently the CryptoCell 713 and 703 are supported.
+	  The driver is supported only in SoC where Trusted Execution
+	  Environment is not used.
+	  Choose 'M' to compile this driver as a module. The module
+	  will be called cctrng.
+	  If unsure, say 'N'.
+
 endif # HW_RANDOM
 
 config UML_RANDOM
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index a7801b49ce6c..2c6724735345 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -41,3 +41,4 @@ obj-$(CONFIG_HW_RANDOM_S390) += s390-trng.o
 obj-$(CONFIG_HW_RANDOM_KEYSTONE) += ks-sa-rng.o
 obj-$(CONFIG_HW_RANDOM_OPTEE) += optee-rng.o
 obj-$(CONFIG_HW_RANDOM_NPCM) += npcm-rng.o
+obj-$(CONFIG_HW_RANDOM_CCTRNG) += cctrng.o
diff --git a/drivers/char/hw_random/cctrng.c b/drivers/char/hw_random/cctrng.c
new file mode 100644
index 000000000000..619148fb2dc9
--- /dev/null
+++ b/drivers/char/hw_random/cctrng.c
@@ -0,0 +1,735 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2020 ARM Limited or its affiliates. */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+#include <linux/workqueue.h>
+#include <linux/circ_buf.h>
+#include <linux/completion.h>
+#include <linux/of.h>
+#include <linux/bitfield.h>
+#include <linux/fips.h>
+
+#include "cctrng.h"
+
+#define CC_REG_LOW(name)  (name ## _BIT_SHIFT)
+#define CC_REG_HIGH(name) (CC_REG_LOW(name) + name ## _BIT_SIZE - 1)
+#define CC_GENMASK(name)  GENMASK(CC_REG_HIGH(name), CC_REG_LOW(name))
+
+#define CC_REG_FLD_GET(reg_name, fld_name, reg_val)     \
+	(FIELD_GET(CC_GENMASK(CC_ ## reg_name ## _ ## fld_name), reg_val))
+
+#define CC_HW_RESET_LOOP_COUNT 10
+#define CC_TRNG_SUSPEND_TIMEOUT 3000
+
+/* data circular buffer in words must be:
+ *  - of a power-of-2 size (limitation of circ_buf.h macros)
+ *  - at least 6, the size generated in the EHR according to HW implementation
+ */
+#define CCTRNG_DATA_BUF_WORDS 32
+
+/* The timeout for the TRNG operation should be calculated with the formula:
+ * Timeout = EHR_NUM * VN_COEFF * EHR_LENGTH * SAMPLE_CNT * SCALE_VALUE
+ * while:
+ *  - SAMPLE_CNT is input value from the characterisation process
+ *  - all the rest are constants
+ */
+#define EHR_NUM 1
+#define VN_COEFF 4
+#define EHR_LENGTH CC_TRNG_EHR_IN_BITS
+#define SCALE_VALUE 2
+#define CCTRNG_TIMEOUT(smpl_cnt) \
+	(EHR_NUM * VN_COEFF * EHR_LENGTH * smpl_cnt * SCALE_VALUE)
+
+struct cctrng_drvdata {
+	struct platform_device *pdev;
+	void __iomem *cc_base;
+	struct clk *clk;
+	struct hwrng rng;
+	u32 active_rosc;
+	/* Sampling interval for each ring oscillator:
+	 * count of ring oscillator cycles between consecutive bits sampling.
+	 * Value of 0 indicates non-valid rosc
+	 */
+	u32 smpl_ratio[CC_TRNG_NUM_OF_ROSCS];
+
+	u32 data_buf[CCTRNG_DATA_BUF_WORDS];
+	struct circ_buf circ;
+	struct work_struct compwork;
+	struct work_struct startwork;
+
+	/* pending_hw - 1 when HW is pending, 0 when it is idle */
+	atomic_t pending_hw;
+
+	/* protects against multiple concurrent consumers of data_buf */
+	spinlock_t read_lock;
+};
+
+
+/* functions for write/read CC registers */
+static inline void cc_iowrite(struct cctrng_drvdata *drvdata, u32 reg, u32 val)
+{
+	iowrite32(val, (drvdata->cc_base + reg));
+}
+static inline u32 cc_ioread(struct cctrng_drvdata *drvdata, u32 reg)
+{
+	return ioread32(drvdata->cc_base + reg);
+}
+
+
+static int cc_trng_pm_get(struct device *dev)
+{
+	int rc = 0;
+
+	rc = pm_runtime_get_sync(dev);
+
+	/* pm_runtime_get_sync() can return 1 as a valid return code */
+	return (rc == 1 ? 0 : rc);
+}
+
+static void cc_trng_pm_put_suspend(struct device *dev)
+{
+	int rc = 0;
+
+	pm_runtime_mark_last_busy(dev);
+	rc = pm_runtime_put_autosuspend(dev);
+	if (rc)
+		dev_err(dev, "pm_runtime_put_autosuspend returned %x\n", rc);
+}
+
+static int cc_trng_pm_init(struct cctrng_drvdata *drvdata)
+{
+	struct device *dev = &(drvdata->pdev->dev);
+
+	/* must be before the enabling to avoid redundant suspending */
+	pm_runtime_set_autosuspend_delay(dev, CC_TRNG_SUSPEND_TIMEOUT);
+	pm_runtime_use_autosuspend(dev);
+	/* set us as active - note we won't do PM ops until cc_trng_pm_go()! */
+	return pm_runtime_set_active(dev);
+}
+
+static void cc_trng_pm_go(struct cctrng_drvdata *drvdata)
+{
+	struct device *dev = &(drvdata->pdev->dev);
+
+	/* enable the PM module*/
+	pm_runtime_enable(dev);
+}
+
+static void cc_trng_pm_fini(struct cctrng_drvdata *drvdata)
+{
+	struct device *dev = &(drvdata->pdev->dev);
+
+	pm_runtime_disable(dev);
+}
+
+
+static inline int cc_trng_parse_sampling_ratio(struct cctrng_drvdata *drvdata)
+{
+	struct device *dev = &(drvdata->pdev->dev);
+	struct device_node *np = drvdata->pdev->dev.of_node;
+	int rc;
+	int i;
+	/* ret will be set to 0 if at least one rosc has (sampling ratio > 0) */
+	int ret = -EINVAL;
+
+	rc = of_property_read_u32_array(np, "arm,rosc-ratio",
+					drvdata->smpl_ratio,
+					CC_TRNG_NUM_OF_ROSCS);
+	if (rc) {
+		/* arm,rosc-ratio was not found in device tree */
+		return rc;
+	}
+
+	/* verify that at least one rosc has (sampling ratio > 0) */
+	for (i = 0; i < CC_TRNG_NUM_OF_ROSCS; ++i) {
+		dev_dbg(dev, "rosc %d sampling ratio %u",
+			i, drvdata->smpl_ratio[i]);
+
+		if (drvdata->smpl_ratio[i] > 0)
+			ret = 0;
+	}
+
+	return ret;
+}
+
+static int cc_trng_change_rosc(struct cctrng_drvdata *drvdata)
+{
+	struct device *dev = &(drvdata->pdev->dev);
+
+	dev_dbg(dev, "cctrng change rosc (was %d)\n", drvdata->active_rosc);
+	drvdata->active_rosc += 1;
+
+	while (drvdata->active_rosc < CC_TRNG_NUM_OF_ROSCS) {
+		if (drvdata->smpl_ratio[drvdata->active_rosc] > 0)
+			return 0;
+
+		drvdata->active_rosc += 1;
+	}
+	return -EINVAL;
+}
+
+
+static void cc_trng_enable_rnd_source(struct cctrng_drvdata *drvdata)
+{
+	u32 max_cycles;
+
+	/* Set watchdog threshold to maximal allowed time (in CPU cycles) */
+	max_cycles = CCTRNG_TIMEOUT(drvdata->smpl_ratio[drvdata->active_rosc]);
+	cc_iowrite(drvdata, CC_RNG_WATCHDOG_VAL_REG_OFFSET, max_cycles);
+
+	/* enable the RND source */
+	cc_iowrite(drvdata, CC_RND_SOURCE_ENABLE_REG_OFFSET, 0x1);
+
+	/* unmask RNG interrupts */
+	cc_iowrite(drvdata, CC_RNG_IMR_REG_OFFSET, (u32)~CC_RNG_INT_MASK);
+}
+
+
+/* increase circular data buffer index (head/tail) */
+static inline void circ_idx_inc(int *idx, int bytes)
+{
+	*idx += (bytes + 3) >> 2;
+	*idx &= (CCTRNG_DATA_BUF_WORDS - 1);
+}
+
+static inline size_t circ_buf_space(struct cctrng_drvdata *drvdata)
+{
+	return CIRC_SPACE(drvdata->circ.head,
+			  drvdata->circ.tail, CCTRNG_DATA_BUF_WORDS);
+
+}
+
+static int cctrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	/* current implementation ignores "wait" */
+
+	struct cctrng_drvdata *drvdata = (struct cctrng_drvdata *)rng->priv;
+	struct device *dev = &(drvdata->pdev->dev);
+	u32 *buf = (u32 *)drvdata->circ.buf;
+	size_t copied = 0;
+	size_t cnt_w;
+	size_t size;
+	size_t left;
+
+	if (!spin_trylock(&drvdata->read_lock)) {
+		/* concurrent consumers from data_buf cannot be served */
+		dev_dbg_ratelimited(dev, "unable to hold lock\n");
+		return 0;
+	}
+
+	/* copy till end of data buffer (without wrap back) */
+	cnt_w = CIRC_CNT_TO_END(drvdata->circ.head,
+				drvdata->circ.tail, CCTRNG_DATA_BUF_WORDS);
+	size = min((cnt_w<<2), max);
+	memcpy(data, &(buf[drvdata->circ.tail]), size);
+	copied = size;
+	circ_idx_inc(&drvdata->circ.tail, size);
+	/* copy rest of data in data buffer */
+	left = max - copied;
+	if (left > 0) {
+		cnt_w = CIRC_CNT(drvdata->circ.head,
+				 drvdata->circ.tail, CCTRNG_DATA_BUF_WORDS);
+		size = min((cnt_w<<2), left);
+		memcpy(data, &(buf[drvdata->circ.tail]), size);
+		copied += size;
+		circ_idx_inc(&drvdata->circ.tail, size);
+	}
+
+	spin_unlock(&drvdata->read_lock);
+
+	if (circ_buf_space(drvdata) >= CC_TRNG_EHR_IN_WORDS) {
+		if (atomic_cmpxchg(&drvdata->pending_hw, 0, 1) == 0) {
+			/* re-check space in buffer to avoid potential race */
+			if (circ_buf_space(drvdata) >= CC_TRNG_EHR_IN_WORDS) {
+				/* increment device's usage counter */
+				int rc = cc_trng_pm_get(dev);
+
+				if (rc) {
+					dev_err(dev,
+						"cc_trng_pm_get returned %x\n",
+						rc);
+					return rc;
+				}
+
+				/* schedule execution of deferred work handler
+				 * for filling of data buffer
+				 */
+				schedule_work(&drvdata->startwork);
+			} else {
+				atomic_set(&drvdata->pending_hw, 0);
+			}
+		}
+	}
+
+	return copied;
+}
+
+static void cc_trng_hw_trigger(struct cctrng_drvdata *drvdata)
+{
+	u32 tmp_smpl_cnt = 0;
+	struct device *dev = &(drvdata->pdev->dev);
+
+	dev_dbg(dev, "cctrng hw trigger.\n");
+
+	/* enable the HW RND clock */
+	cc_iowrite(drvdata, CC_RNG_CLK_ENABLE_REG_OFFSET, 0x1);
+
+	/* do software reset */
+	cc_iowrite(drvdata, CC_RNG_SW_RESET_REG_OFFSET, 0x1);
+	/* in order to verify that the reset has completed,
+	 * the sample count need to be verified
+	 */
+	do {
+		/* enable the HW RND clock   */
+		cc_iowrite(drvdata, CC_RNG_CLK_ENABLE_REG_OFFSET, 0x1);
+
+		/* set sampling ratio (rng_clocks) between consecutive bits */
+		cc_iowrite(drvdata, CC_SAMPLE_CNT1_REG_OFFSET,
+			   drvdata->smpl_ratio[drvdata->active_rosc]);
+
+		/* read the sampling ratio  */
+		tmp_smpl_cnt = cc_ioread(drvdata, CC_SAMPLE_CNT1_REG_OFFSET);
+
+	} while (tmp_smpl_cnt != drvdata->smpl_ratio[drvdata->active_rosc]);
+
+	/* disable the RND source for setting new parameters in HW */
+	cc_iowrite(drvdata, CC_RND_SOURCE_ENABLE_REG_OFFSET, 0);
+
+	cc_iowrite(drvdata, CC_RNG_ICR_REG_OFFSET, 0xFFFFFFFF);
+
+	cc_iowrite(drvdata, CC_TRNG_CONFIG_REG_OFFSET, drvdata->active_rosc);
+
+	/* Debug Control register: set to 0 - no bypasses */
+	cc_iowrite(drvdata, CC_TRNG_DEBUG_CONTROL_REG_OFFSET, 0);
+
+	cc_trng_enable_rnd_source(drvdata);
+}
+
+static void cc_trng_compwork_handler(struct work_struct *w)
+{
+	u32 isr = 0;
+	u32 ehr_valid = 0;
+	struct cctrng_drvdata *drvdata =
+			container_of(w, struct cctrng_drvdata, compwork);
+	struct device *dev = &(drvdata->pdev->dev);
+	int i;
+
+	/* stop DMA and the RNG source */
+	cc_iowrite(drvdata, CC_RNG_DMA_ENABLE_REG_OFFSET, 0);
+	cc_iowrite(drvdata, CC_RND_SOURCE_ENABLE_REG_OFFSET, 0);
+
+	/* read RNG_ISR and check for errors */
+	isr = cc_ioread(drvdata, CC_RNG_ISR_REG_OFFSET);
+	ehr_valid = CC_REG_FLD_GET(RNG_ISR, EHR_VALID, isr);
+	dev_dbg(dev, "Got RNG_ISR=0x%08X (EHR_VALID=%u)\n", isr, ehr_valid);
+
+	if (fips_enabled && CC_REG_FLD_GET(RNG_ISR, CRNGT_ERR, isr)) {
+		fips_fail_notify();
+		/* FIPS error is fatal */
+		panic("Got HW CRNGT error while fips is enabled!\n");
+	}
+
+	/* Clear all pending RNG interrupts */
+	cc_iowrite(drvdata, CC_RNG_ICR_REG_OFFSET, isr);
+
+
+	if (!ehr_valid) {
+		/* in case of AUTOCORR/TIMEOUT error, try the next ROSC */
+		if (CC_REG_FLD_GET(RNG_ISR, AUTOCORR_ERR, isr) ||
+				CC_REG_FLD_GET(RNG_ISR, WATCHDOG, isr)) {
+			dev_dbg(dev, "cctrng autocorr/timeout error.\n");
+			goto next_rosc;
+		}
+
+		/* in case of VN error, ignore it */
+	}
+
+	/* read EHR data from registers */
+	for (i = 0; i < CC_TRNG_EHR_IN_WORDS; i++) {
+		/* calc word ptr in data_buf */
+		u32 *buf = (u32 *)drvdata->circ.buf;
+
+		buf[drvdata->circ.head] = cc_ioread(drvdata,
+				CC_EHR_DATA_0_REG_OFFSET + (i*sizeof(u32)));
+
+		/* EHR_DATA registers are cleared on read. In case 0 value was
+		 * returned, restart the entropy collection.
+		 */
+		if (buf[drvdata->circ.head] == 0) {
+			dev_dbg(dev, "Got 0 value in EHR. active_rosc %u\n",
+				drvdata->active_rosc);
+			goto next_rosc;
+		}
+
+		circ_idx_inc(&drvdata->circ.head, 1<<2);
+	}
+
+	atomic_set(&drvdata->pending_hw, 0);
+
+	/* continue to fill data buffer if needed */
+	if (circ_buf_space(drvdata) >= CC_TRNG_EHR_IN_WORDS) {
+		if (atomic_cmpxchg(&drvdata->pending_hw, 0, 1) == 0) {
+			/* Re-enable rnd source */
+			cc_trng_enable_rnd_source(drvdata);
+			return;
+		}
+	}
+
+	cc_trng_pm_put_suspend(dev);
+
+	dev_dbg(dev, "compwork handler done\n");
+	return;
+
+next_rosc:
+	if ((circ_buf_space(drvdata) >= CC_TRNG_EHR_IN_WORDS) &&
+			(cc_trng_change_rosc(drvdata) == 0)) {
+		/* trigger trng hw with next rosc */
+		cc_trng_hw_trigger(drvdata);
+	} else {
+		atomic_set(&drvdata->pending_hw, 0);
+		cc_trng_pm_put_suspend(dev);
+	}
+}
+
+static irqreturn_t cc_isr(int irq, void *dev_id)
+{
+	struct cctrng_drvdata *drvdata = (struct cctrng_drvdata *)dev_id;
+	struct device *dev = &(drvdata->pdev->dev);
+	u32 irr;
+
+	/* if driver suspended return, probably shared interrupt */
+	if (pm_runtime_suspended(dev))
+		return IRQ_NONE;
+
+	/* read the interrupt status */
+	irr = cc_ioread(drvdata, CC_HOST_RGF_IRR_REG_OFFSET);
+	dev_dbg(dev, "Got IRR=0x%08X\n", irr);
+
+	if (irr == 0) /* Probably shared interrupt line */
+		return IRQ_NONE;
+
+	/* clear interrupt - must be before processing events */
+	cc_iowrite(drvdata, CC_HOST_RGF_ICR_REG_OFFSET, irr);
+
+	/* RNG interrupt - most probable */
+	if (irr & CC_HOST_RNG_IRQ_MASK) {
+		/* Mask RNG interrupts - will be unmasked in deferred work */
+		cc_iowrite(drvdata, CC_RNG_IMR_REG_OFFSET, 0xFFFFFFFF);
+
+		/* We clear RNG interrupt here,
+		 * to avoid it from firing as we'll unmask RNG interrupts.
+		 */
+		cc_iowrite(drvdata, CC_HOST_RGF_ICR_REG_OFFSET,
+			   CC_HOST_RNG_IRQ_MASK);
+
+		irr &= ~CC_HOST_RNG_IRQ_MASK;
+
+		/* schedule execution of deferred work handler */
+		schedule_work(&drvdata->compwork);
+	}
+
+	if (irr) {
+		dev_dbg_ratelimited(dev,
+				"IRR includes unknown cause bits (0x%08X)\n",
+				irr);
+		/* Just warning */
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void cc_trng_startwork_handler(struct work_struct *w)
+{
+	struct cctrng_drvdata *drvdata =
+			container_of(w, struct cctrng_drvdata, startwork);
+
+	drvdata->active_rosc = 0;
+	cc_trng_hw_trigger(drvdata);
+}
+
+
+static int cc_trng_clk_init(struct cctrng_drvdata *drvdata)
+{
+	struct clk *clk;
+	struct device *dev = &(drvdata->pdev->dev);
+	int rc = 0;
+
+	clk = devm_clk_get_optional(dev, NULL);
+	if (IS_ERR(clk)) {
+		if (PTR_ERR(clk) != -EPROBE_DEFER)
+			dev_err(dev, "Error getting clock: %pe\n", clk);
+		return PTR_ERR(clk);
+	}
+	drvdata->clk = clk;
+
+	rc = clk_prepare_enable(drvdata->clk);
+	if (rc) {
+		dev_err(dev, "Failed to enable clock\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static void cc_trng_clk_fini(struct cctrng_drvdata *drvdata)
+{
+	clk_disable_unprepare(drvdata->clk);
+}
+
+
+static int cctrng_probe(struct platform_device *pdev)
+{
+	struct resource *req_mem_cc_regs = NULL;
+	struct cctrng_drvdata *drvdata;
+	struct device *dev = &pdev->dev;
+	int rc = 0;
+	u32 val;
+	int irq;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->rng.name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
+	if (!drvdata->rng.name)
+		return -ENOMEM;
+
+	drvdata->rng.read = cctrng_read;
+	drvdata->rng.priv = (unsigned long)drvdata;
+	drvdata->rng.quality = CC_TRNG_QUALITY;
+
+	platform_set_drvdata(pdev, drvdata);
+	drvdata->pdev = pdev;
+
+	drvdata->circ.buf = (char *)drvdata->data_buf;
+
+	/* Get device resources */
+	/* First CC registers space */
+	req_mem_cc_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* Map registers space */
+	drvdata->cc_base = devm_ioremap_resource(dev, req_mem_cc_regs);
+	if (IS_ERR(drvdata->cc_base)) {
+		dev_err(dev, "Failed to ioremap registers");
+		return PTR_ERR(drvdata->cc_base);
+	}
+
+	dev_dbg(dev, "Got MEM resource (%s): %pR\n", req_mem_cc_regs->name,
+		req_mem_cc_regs);
+	dev_dbg(dev, "CC registers mapped from %pa to 0x%p\n",
+		&req_mem_cc_regs->start, drvdata->cc_base);
+
+	/* Then IRQ */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "Failed getting IRQ resource\n");
+		return irq;
+	}
+
+	/* parse sampling rate from device tree */
+	rc = cc_trng_parse_sampling_ratio(drvdata);
+	if (rc) {
+		dev_err(dev, "Failed to get legal sampling ratio for rosc\n");
+		return rc;
+	}
+
+	rc = cc_trng_clk_init(drvdata);
+	if (rc) {
+		dev_err(dev, "cc_trng_clk_init failed\n");
+		return rc;
+	}
+
+	INIT_WORK(&drvdata->compwork, cc_trng_compwork_handler);
+	INIT_WORK(&drvdata->startwork, cc_trng_startwork_handler);
+	spin_lock_init(&drvdata->read_lock);
+
+	/* register the driver isr function */
+	rc = devm_request_irq(dev, irq, cc_isr, IRQF_SHARED, "cctrng", drvdata);
+	if (rc) {
+		dev_err(dev, "Could not register to interrupt %d\n", irq);
+		goto post_clk_err;
+	}
+	dev_dbg(dev, "Registered to IRQ: %d\n", irq);
+
+	/* Clear all pending interrupts */
+	val = cc_ioread(drvdata, CC_HOST_RGF_IRR_REG_OFFSET);
+	dev_dbg(dev, "IRR=0x%08X\n", val);
+	cc_iowrite(drvdata, CC_HOST_RGF_ICR_REG_OFFSET, val);
+
+	/* unmask HOST RNG interrupt */
+	cc_iowrite(drvdata, CC_HOST_RGF_IMR_REG_OFFSET,
+		   cc_ioread(drvdata, CC_HOST_RGF_IMR_REG_OFFSET) &
+		   ~CC_HOST_RNG_IRQ_MASK);
+
+	/* init PM */
+	rc = cc_trng_pm_init(drvdata);
+	if (rc) {
+		dev_err(dev, "cc_trng_pm_init failed\n");
+		goto post_clk_err;
+	}
+
+	/* increment device's usage counter */
+	rc = cc_trng_pm_get(dev);
+	if (rc) {
+		dev_err(dev, "cc_trng_pm_get returned %x\n", rc);
+		goto post_pm_err;
+	}
+
+	/* set pending_hw to verify that HW won't be triggered from read */
+	atomic_set(&drvdata->pending_hw, 1);
+
+	/* registration of the hwrng device */
+	rc = hwrng_register(&drvdata->rng);
+	if (rc) {
+		dev_err(dev, "Could not register hwrng device.\n");
+		goto post_pm_err;
+	}
+
+	/* trigger HW to start generate data */
+	drvdata->active_rosc = 0;
+	cc_trng_hw_trigger(drvdata);
+
+	/* All set, we can allow auto-suspend */
+	cc_trng_pm_go(drvdata);
+
+	dev_info(dev, "ARM cctrng device initialized\n");
+
+	return 0;
+
+post_pm_err:
+	cc_trng_pm_fini(drvdata);
+
+post_clk_err:
+	cc_trng_clk_fini(drvdata);
+
+	return rc;
+}
+
+static int cctrng_remove(struct platform_device *pdev)
+{
+	struct cctrng_drvdata *drvdata = platform_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+
+	dev_dbg(dev, "Releasing cctrng resources...\n");
+
+	hwrng_unregister(&drvdata->rng);
+
+	cc_trng_pm_fini(drvdata);
+
+	cc_trng_clk_fini(drvdata);
+
+	dev_info(dev, "ARM cctrng device terminated\n");
+
+	return 0;
+}
+
+static int __maybe_unused cctrng_suspend(struct device *dev)
+{
+	struct cctrng_drvdata *drvdata = dev_get_drvdata(dev);
+
+	dev_dbg(dev, "set HOST_POWER_DOWN_EN\n");
+	cc_iowrite(drvdata, CC_HOST_POWER_DOWN_EN_REG_OFFSET,
+			POWER_DOWN_ENABLE);
+
+	clk_disable_unprepare(drvdata->clk);
+
+	return 0;
+}
+
+static bool cctrng_wait_for_reset_completion(struct cctrng_drvdata *drvdata)
+{
+	unsigned int val;
+	unsigned int i;
+
+	for (i = 0; i < CC_HW_RESET_LOOP_COUNT; i++) {
+		/* in cc7x3 NVM_IS_IDLE indicates that CC reset is
+		 *  completed and device is fully functional
+		 */
+		val = cc_ioread(drvdata, CC_NVM_IS_IDLE_REG_OFFSET);
+		if (val & BIT(CC_NVM_IS_IDLE_VALUE_BIT_SHIFT)) {
+			/* hw indicate reset completed */
+			return true;
+		}
+		/* allow scheduling other process on the processor */
+		schedule();
+	}
+	/* reset not completed */
+	return false;
+}
+
+static int __maybe_unused cctrng_resume(struct device *dev)
+{
+	struct cctrng_drvdata *drvdata = dev_get_drvdata(dev);
+	int rc;
+
+	dev_dbg(dev, "unset HOST_POWER_DOWN_EN\n");
+	/* Enables the device source clk */
+	rc = clk_prepare_enable(drvdata->clk);
+	if (rc) {
+		dev_err(dev, "failed getting clock back on. We're toast.\n");
+		return rc;
+	}
+
+	/* wait for Cryptocell reset completion */
+	if (!cctrng_wait_for_reset_completion(drvdata)) {
+		dev_err(dev, "Cryptocell reset not completed");
+		return -EBUSY;
+	}
+
+	/* unmask HOST RNG interrupt */
+	cc_iowrite(drvdata, CC_HOST_RGF_IMR_REG_OFFSET,
+		   cc_ioread(drvdata, CC_HOST_RGF_IMR_REG_OFFSET) &
+		   ~CC_HOST_RNG_IRQ_MASK);
+
+	cc_iowrite(drvdata, CC_HOST_POWER_DOWN_EN_REG_OFFSET,
+		   POWER_DOWN_DISABLE);
+
+	return 0;
+}
+
+static UNIVERSAL_DEV_PM_OPS(cctrng_pm, cctrng_suspend, cctrng_resume, NULL);
+
+static const struct of_device_id arm_cctrng_dt_match[] = {
+	{ .compatible = "arm,cryptocell-713-trng", },
+	{ .compatible = "arm,cryptocell-703-trng", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, arm_cctrng_dt_match);
+
+static struct platform_driver cctrng_driver = {
+	.driver = {
+		.name = "cctrng",
+		.of_match_table = arm_cctrng_dt_match,
+		.pm = &cctrng_pm,
+	},
+	.probe = cctrng_probe,
+	.remove = cctrng_remove,
+};
+
+static int __init cctrng_mod_init(void)
+{
+	/* Compile time assertion checks */
+	BUILD_BUG_ON(CCTRNG_DATA_BUF_WORDS < 6);
+	BUILD_BUG_ON((CCTRNG_DATA_BUF_WORDS & (CCTRNG_DATA_BUF_WORDS-1)) != 0);
+
+	return platform_driver_register(&cctrng_driver);
+}
+module_init(cctrng_mod_init);
+
+static void __exit cctrng_mod_exit(void)
+{
+	platform_driver_unregister(&cctrng_driver);
+}
+module_exit(cctrng_mod_exit);
+
+/* Module description */
+MODULE_DESCRIPTION("ARM CryptoCell TRNG Driver");
+MODULE_AUTHOR("ARM");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/char/hw_random/cctrng.h b/drivers/char/hw_random/cctrng.h
new file mode 100644
index 000000000000..1f2fde95adcb
--- /dev/null
+++ b/drivers/char/hw_random/cctrng.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2020 ARM Limited or its affiliates. */
+
+#include <linux/bitops.h>
+
+#define POWER_DOWN_ENABLE 0x01
+#define POWER_DOWN_DISABLE 0x00
+
+/* hwrng quality: bits of true entropy per 1024 bits of input */
+#define CC_TRNG_QUALITY	1024
+
+/* CryptoCell TRNG HW definitions */
+#define CC_TRNG_NUM_OF_ROSCS	4
+/* The number of words generated in the entropy holding register (EHR)
+ * 6 words (192 bit) according to HW implementation
+ */
+#define CC_TRNG_EHR_IN_WORDS	6
+#define CC_TRNG_EHR_IN_BITS	(CC_TRNG_EHR_IN_WORDS * BITS_PER_TYPE(u32))
+
+#define CC_HOST_RNG_IRQ_MASK BIT(CC_HOST_RGF_IRR_RNG_INT_BIT_SHIFT)
+
+/* RNG interrupt mask */
+#define CC_RNG_INT_MASK (BIT(CC_RNG_IMR_EHR_VALID_INT_MASK_BIT_SHIFT) | \
+			 BIT(CC_RNG_IMR_AUTOCORR_ERR_INT_MASK_BIT_SHIFT) | \
+			 BIT(CC_RNG_IMR_CRNGT_ERR_INT_MASK_BIT_SHIFT) | \
+			 BIT(CC_RNG_IMR_VN_ERR_INT_MASK_BIT_SHIFT) | \
+			 BIT(CC_RNG_IMR_WATCHDOG_INT_MASK_BIT_SHIFT))
+
+// --------------------------------------
+// BLOCK: RNG
+// --------------------------------------
+#define CC_RNG_IMR_REG_OFFSET	0x0100UL
+#define CC_RNG_IMR_EHR_VALID_INT_MASK_BIT_SHIFT	0x0UL
+#define CC_RNG_IMR_AUTOCORR_ERR_INT_MASK_BIT_SHIFT	0x1UL
+#define CC_RNG_IMR_CRNGT_ERR_INT_MASK_BIT_SHIFT	0x2UL
+#define CC_RNG_IMR_VN_ERR_INT_MASK_BIT_SHIFT	0x3UL
+#define CC_RNG_IMR_WATCHDOG_INT_MASK_BIT_SHIFT	0x4UL
+#define CC_RNG_ISR_REG_OFFSET	0x0104UL
+#define CC_RNG_ISR_EHR_VALID_BIT_SHIFT	0x0UL
+#define CC_RNG_ISR_EHR_VALID_BIT_SIZE	0x1UL
+#define CC_RNG_ISR_AUTOCORR_ERR_BIT_SHIFT	0x1UL
+#define CC_RNG_ISR_AUTOCORR_ERR_BIT_SIZE	0x1UL
+#define CC_RNG_ISR_CRNGT_ERR_BIT_SHIFT	0x2UL
+#define CC_RNG_ISR_CRNGT_ERR_BIT_SIZE	0x1UL
+#define CC_RNG_ISR_WATCHDOG_BIT_SHIFT	0x4UL
+#define CC_RNG_ISR_WATCHDOG_BIT_SIZE	0x1UL
+#define CC_RNG_ICR_REG_OFFSET	0x0108UL
+#define CC_TRNG_CONFIG_REG_OFFSET	0x010CUL
+#define CC_EHR_DATA_0_REG_OFFSET	0x0114UL
+#define CC_RND_SOURCE_ENABLE_REG_OFFSET	0x012CUL
+#define CC_SAMPLE_CNT1_REG_OFFSET	0x0130UL
+#define CC_TRNG_DEBUG_CONTROL_REG_OFFSET	0x0138UL
+#define CC_RNG_SW_RESET_REG_OFFSET	0x0140UL
+#define CC_RNG_CLK_ENABLE_REG_OFFSET	0x01C4UL
+#define CC_RNG_DMA_ENABLE_REG_OFFSET	0x01C8UL
+#define CC_RNG_WATCHDOG_VAL_REG_OFFSET	0x01D8UL
+// --------------------------------------
+// BLOCK: SEC_HOST_RGF
+// --------------------------------------
+#define CC_HOST_RGF_IRR_REG_OFFSET	0x0A00UL
+#define CC_HOST_RGF_IRR_RNG_INT_BIT_SHIFT	0xAUL
+#define CC_HOST_RGF_IMR_REG_OFFSET	0x0A04UL
+#define CC_HOST_RGF_ICR_REG_OFFSET	0x0A08UL
+
+#define CC_HOST_POWER_DOWN_EN_REG_OFFSET	0x0A78UL
+
+// --------------------------------------
+// BLOCK: NVM
+// --------------------------------------
+#define CC_NVM_IS_IDLE_REG_OFFSET	0x0F10UL
+#define CC_NVM_IS_IDLE_VALUE_BIT_SHIFT	0x0UL
+#define CC_NVM_IS_IDLE_VALUE_BIT_SIZE	0x1UL
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 65952393e1bb..7290c603fcb8 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -392,11 +392,8 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 	if (of_device_is_compatible(dev->of_node, "ti,omap4-rng") ||
 	    of_device_is_compatible(dev->of_node, "inside-secure,safexcel-eip76")) {
 		irq = platform_get_irq(pdev, 0);
-		if (irq < 0) {
-			dev_err(dev, "%s: error getting IRQ resource - %d\n",
-				__func__, irq);
+		if (irq < 0)
 			return irq;
-		}
 
 		err = devm_request_irq(dev, irq, omap4_rng_irq,
 				       IRQF_TRIGGER_NONE, dev_name(dev), priv);
diff --git a/drivers/char/hw_random/optee-rng.c b/drivers/char/hw_random/optee-rng.c
index ddfbabaa5f8f..49b2e02537dd 100644
--- a/drivers/char/hw_random/optee-rng.c
+++ b/drivers/char/hw_random/optee-rng.c
@@ -226,7 +226,7 @@ static int optee_rng_probe(struct device *dev)
 		return -ENODEV;
 
 	/* Open session with hwrng Trusted App */
-	memcpy(sess_arg.uuid, rng_device->id.uuid.b, TEE_IOCTL_UUID_LEN);
+	export_uuid(sess_arg.uuid, &rng_device->id.uuid);
 	sess_arg.clnt_login = TEE_IOCTL_LOGIN_PUBLIC;
 	sess_arg.num_params = 0;
 
diff --git a/drivers/char/hw_random/xgene-rng.c b/drivers/char/hw_random/xgene-rng.c
index d7516a446987..008e6db9ce01 100644
--- a/drivers/char/hw_random/xgene-rng.c
+++ b/drivers/char/hw_random/xgene-rng.c
@@ -328,10 +328,8 @@ static int xgene_rng_probe(struct platform_device *pdev)
 		return PTR_ERR(ctx->csr_base);
 
 	rc = platform_get_irq(pdev, 0);
-	if (rc < 0) {
-		dev_err(&pdev->dev, "No IRQ resource\n");
+	if (rc < 0)
 		return rc;
-	}
 	ctx->irq = rc;
 
 	dev_dbg(&pdev->dev, "APM X-Gene RNG BASE %p ALARM IRQ %d",
diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 7dc2c3ec4051..07847d9a459a 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -14,7 +14,7 @@ menuconfig IPMI_HANDLER
 	  IPMI is a standard for managing sensors (temperature,
 	  voltage, etc.) in a system.
 
-	  See <file:Documentation/IPMI.txt> for more details on the driver.
+	  See <file:Documentation/driver-api/ipmi.rst> for more details on the driver.
 
 	  If unsure, say N.
 
diff --git a/drivers/char/ipmi/ipmi_si_hotmod.c b/drivers/char/ipmi/ipmi_si_hotmod.c
index 42a925f8cf69..4fbb4e18bae2 100644
--- a/drivers/char/ipmi/ipmi_si_hotmod.c
+++ b/drivers/char/ipmi/ipmi_si_hotmod.c
@@ -18,7 +18,7 @@ static int hotmod_handler(const char *val, const struct kernel_param *kp);
 
 module_param_call(hotmod, hotmod_handler, NULL, NULL, 0200);
 MODULE_PARM_DESC(hotmod, "Add and remove interfaces.  See"
-		 " Documentation/IPMI.txt in the kernel sources for the"
+		 " Documentation/driver-api/ipmi.rst in the kernel sources for the"
 		 " gory details.");
 
 /*
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index c7cc8538b84a..77b8d551ae7f 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -968,7 +968,7 @@ static inline bool ipmi_thread_busy_wait(enum si_sm_result smi_result,
  * that are not BT and do not have interrupts.  It starts spinning
  * when an operation is complete or until max_busy tells it to stop
  * (if that is enabled).  See the paragraph on kimid_max_busy_us in
- * Documentation/IPMI.txt for details.
+ * Documentation/driver-api/ipmi.rst for details.
  */
 static int ipmi_thread(void *data)
 {
diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index 4667844eee69..8206412d25ba 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -232,8 +232,6 @@ static ssize_t nvram_misc_read(struct file *file, char __user *buf,
 	ssize_t ret;
 
 
-	if (!access_ok(buf, count))
-		return -EFAULT;
 	if (*ppos >= nvram_size)
 		return 0;
 
@@ -264,8 +262,6 @@ static ssize_t nvram_misc_write(struct file *file, const char __user *buf,
 	char *tmp;
 	ssize_t ret;
 
-	if (!access_ok(buf, count))
-		return -EFAULT;
 	if (*ppos >= nvram_size)
 		return 0;
 
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 4edb4174a1e2..89681f07bc78 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1404,7 +1404,6 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	unsigned int iobase = dev->p_dev->resource[0]->start;
 	struct inode *inode = file_inode(filp);
 	struct pcmcia_device *link;
-	int size;
 	int rc;
 	void __user *argp = (void __user *)arg;
 #ifdef CM4000_DEBUG
@@ -1441,19 +1440,6 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		DEBUGP(4, dev, "iocnr mismatch\n");
 		goto out;
 	}
-	size = _IOC_SIZE(cmd);
-	rc = -EFAULT;
-	DEBUGP(4, dev, "iocdir=%.4x iocr=%.4x iocw=%.4x iocsize=%d cmd=%.4x\n",
-	      _IOC_DIR(cmd), _IOC_READ, _IOC_WRITE, size, cmd);
-
-	if (_IOC_DIR(cmd) & _IOC_READ) {
-		if (!access_ok(argp, size))
-			goto out;
-	}
-	if (_IOC_DIR(cmd) & _IOC_WRITE) {
-		if (!access_ok(argp, size))
-			goto out;
-	}
 	rc = 0;
 
 	switch (cmd) {
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 1e0db78b83ba..a7cf6aa65908 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -327,7 +327,6 @@
 #include <linux/spinlock.h>
 #include <linux/kthread.h>
 #include <linux/percpu.h>
-#include <linux/cryptohash.h>
 #include <linux/fips.h>
 #include <linux/ptrace.h>
 #include <linux/workqueue.h>
@@ -337,6 +336,7 @@
 #include <linux/completion.h>
 #include <linux/uuid.h>
 #include <crypto/chacha.h>
+#include <crypto/sha.h>
 
 #include <asm/processor.h>
 #include <linux/uaccess.h>
@@ -1397,14 +1397,14 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
 		__u32 w[5];
 		unsigned long l[LONGS(20)];
 	} hash;
-	__u32 workspace[SHA_WORKSPACE_WORDS];
+	__u32 workspace[SHA1_WORKSPACE_WORDS];
 	unsigned long flags;
 
 	/*
 	 * If we have an architectural hardware random number
 	 * generator, use it for SHA's initial vector
 	 */
-	sha_init(hash.w);
+	sha1_init(hash.w);
 	for (i = 0; i < LONGS(20); i++) {
 		unsigned long v;
 		if (!arch_get_random_long(&v))
@@ -1415,7 +1415,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
 	/* Generate a hash across the pool, 16 words (512 bits) at a time */
 	spin_lock_irqsave(&r->lock, flags);
 	for (i = 0; i < r->poolinfo->poolwords; i += 16)
-		sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);
+		sha1_transform(hash.w, (__u8 *)(r->pool + i), workspace);
 
 	/*
 	 * We mix the hash back into the pool to prevent backtracking
diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c
index e741b1157525..37a05800980c 100644
--- a/drivers/char/tpm/eventlog/tpm2.c
+++ b/drivers/char/tpm/eventlog/tpm2.c
@@ -51,8 +51,7 @@ static void *tpm2_bios_measurements_start(struct seq_file *m, loff_t *pos)
 	int i;
 
 	event_header = addr;
-	size = sizeof(struct tcg_pcr_event) - sizeof(event_header->event)
-		+ event_header->event_size;
+	size = struct_size(event_header, event, event_header->event_size);
 
 	if (*pos == 0) {
 		if (addr + size < limit) {
@@ -98,8 +97,8 @@ static void *tpm2_bios_measurements_next(struct seq_file *m, void *v,
 	event_header = log->bios_event_log;
 
 	if (v == SEQ_START_TOKEN) {
-		event_size = sizeof(struct tcg_pcr_event) -
-			sizeof(event_header->event) + event_header->event_size;
+		event_size = struct_size(event_header, event,
+					 event_header->event_size);
 		marker = event_header;
 	} else {
 		event = v;
@@ -136,9 +135,8 @@ static int tpm2_binary_bios_measurements_show(struct seq_file *m, void *v)
 	size_t size;
 
 	if (v == SEQ_START_TOKEN) {
-		size = sizeof(struct tcg_pcr_event) -
-			sizeof(event_header->event) + event_header->event_size;
-
+		size = struct_size(event_header, event,
+				   event_header->event_size);
 		temp_ptr = event_header;
 
 		if (size > 0)
diff --git a/drivers/char/tpm/tpm_ftpm_tee.c b/drivers/char/tpm/tpm_ftpm_tee.c
index 22bf553ccf9d..2491a2cb54a2 100644
--- a/drivers/char/tpm/tpm_ftpm_tee.c
+++ b/drivers/char/tpm/tpm_ftpm_tee.c
@@ -241,7 +241,7 @@ static int ftpm_tee_probe(struct platform_device *pdev)
 
 	/* Open a session with fTPM TA */
 	memset(&sess_arg, 0, sizeof(sess_arg));
-	memcpy(sess_arg.uuid, ftpm_ta_uuid.b, TEE_IOCTL_UUID_LEN);
+	export_uuid(sess_arg.uuid, &ftpm_ta_uuid);
 	sess_arg.clnt_login = TEE_IOCTL_LOGIN_PUBLIC;
 	sess_arg.num_params = 0;
 
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 3cbaec925606..00c5e3acee46 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -871,7 +871,7 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 		return 0;
 
 	/* Try lock this page */
-	if (pipe_buf_steal(pipe, buf) == 0) {
+	if (pipe_buf_try_steal(pipe, buf)) {
 		/* Get reference and unlock page for moving */
 		get_page(buf->page);
 		unlock_page(buf->page);
diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c
index d5946f7486d6..374afcab89af 100644
--- a/drivers/clk/clk-qoriq.c
+++ b/drivers/clk/clk-qoriq.c
@@ -95,6 +95,7 @@ struct clockgen {
 };
 
 static struct clockgen clockgen;
+static bool add_cpufreq_dev __initdata;
 
 static void cg_out(struct clockgen *cg, u32 val, u32 __iomem *reg)
 {
@@ -1019,7 +1020,7 @@ static void __init create_muxes(struct clockgen *cg)
 	}
 }
 
-static void __init clockgen_init(struct device_node *np);
+static void __init _clockgen_init(struct device_node *np, bool legacy);
 
 /*
  * Legacy nodes may get probed before the parent clockgen node.
@@ -1030,7 +1031,7 @@ static void __init clockgen_init(struct device_node *np);
 static void __init legacy_init_clockgen(struct device_node *np)
 {
 	if (!clockgen.node)
-		clockgen_init(of_get_parent(np));
+		_clockgen_init(of_get_parent(np), true);
 }
 
 /* Legacy node */
@@ -1447,7 +1448,7 @@ static bool __init has_erratum_a4510(void)
 }
 #endif
 
-static void __init clockgen_init(struct device_node *np)
+static void __init _clockgen_init(struct device_node *np, bool legacy)
 {
 	int i, ret;
 	bool is_old_ls1021a = false;
@@ -1516,12 +1517,35 @@ static void __init clockgen_init(struct device_node *np)
 		       __func__, np, ret);
 	}
 
+	/* Don't create cpufreq device for legacy clockgen blocks */
+	add_cpufreq_dev = !legacy;
+
 	return;
 err:
 	iounmap(clockgen.regs);
 	clockgen.regs = NULL;
 }
 
+static void __init clockgen_init(struct device_node *np)
+{
+	_clockgen_init(np, false);
+}
+
+static int __init clockgen_cpufreq_init(void)
+{
+	struct platform_device *pdev;
+
+	if (add_cpufreq_dev) {
+		pdev = platform_device_register_simple("qoriq-cpufreq", -1,
+				NULL, 0);
+		if (IS_ERR(pdev))
+			pr_err("Couldn't register qoriq-cpufreq err=%ld\n",
+				PTR_ERR(pdev));
+	}
+	return 0;
+}
+device_initcall(clockgen_cpufreq_init);
+
 CLK_OF_DECLARE(qoriq_clockgen_1, "fsl,qoriq-clockgen-1.0", clockgen_init);
 CLK_OF_DECLARE(qoriq_clockgen_2, "fsl,qoriq-clockgen-2.0", clockgen_init);
 CLK_OF_DECLARE(qoriq_clockgen_b4420, "fsl,b4420-clockgen", clockgen_init);
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 2dfb30b963c4..407f6919604c 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -114,7 +114,11 @@ static int clk_pm_runtime_get(struct clk_core *core)
 		return 0;
 
 	ret = pm_runtime_get_sync(core->dev);
-	return ret < 0 ? ret : 0;
+	if (ret < 0) {
+		pm_runtime_put_noidle(core->dev);
+		return ret;
+	}
+	return 0;
 }
 
 static void clk_pm_runtime_put(struct clk_core *core)
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index f2142e6bbea3..91418381fcd4 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -120,7 +120,6 @@ config OWL_TIMER
 
 config RDA_TIMER
 	bool "RDA timer driver" if COMPILE_TEST
-	depends on GENERIC_CLOCKEVENTS
 	select CLKSRC_MMIO
 	select TIMER_OF
 	help
@@ -562,16 +561,16 @@ config CLKSRC_VERSATILE
 	bool "ARM Versatile (Express) reference platforms clock source" if COMPILE_TEST
 	depends on GENERIC_SCHED_CLOCK && !ARCH_USES_GETTIMEOFFSET
 	select TIMER_OF
-	default y if MFD_VEXPRESS_SYSREG
+	default y if (ARCH_VEXPRESS || ARCH_VERSATILE) && ARM
 	help
 	  This option enables clock source based on free running
 	  counter available in the "System Registers" block of
-	  ARM Versatile, RealView and Versatile Express reference
-	  platforms.
+	  ARM Versatile and Versatile Express reference platforms.
 
 config CLKSRC_MIPS_GIC
 	bool
 	depends on MIPS_GIC
+	select CLOCKSOURCE_WATCHDOG
 	select TIMER_OF
 
 config CLKSRC_TANGO_XTAL
@@ -709,6 +708,7 @@ config MICROCHIP_PIT64B
 	bool "Microchip PIT64B support"
 	depends on OF || COMPILE_TEST
 	select CLKSRC_MMIO
+	select TIMER_OF
 	help
 	  This option enables Microchip PIT64B timer for Atmel
 	  based system. It supports the oneshot, the periodic
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 641ba5383ab5..bdda1a2e4097 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_CLKSRC_MMIO)	+= mmio.o
 obj-$(CONFIG_DAVINCI_TIMER)	+= timer-davinci.o
 obj-$(CONFIG_DIGICOLOR_TIMER)	+= timer-digicolor.o
 obj-$(CONFIG_OMAP_DM_TIMER)	+= timer-ti-dm.o
+obj-$(CONFIG_OMAP_DM_TIMER)	+= timer-ti-dm-systimer.o
 obj-$(CONFIG_DW_APB_TIMER)	+= dw_apb_timer.o
 obj-$(CONFIG_DW_APB_TIMER_OF)	+= dw_apb_timer_of.o
 obj-$(CONFIG_FTTMR010_TIMER)	+= timer-fttmr010.o
diff --git a/drivers/clocksource/arc_timer.c b/drivers/clocksource/arc_timer.c
index b29b5a75333e..de93dd1a8c7b 100644
--- a/drivers/clocksource/arc_timer.c
+++ b/drivers/clocksource/arc_timer.c
@@ -334,10 +334,8 @@ static int __init arc_clockevent_setup(struct device_node *node)
 	}
 
 	ret = arc_get_timer_clk(node);
-	if (ret) {
-		pr_err("clockevent: missing clk\n");
+	if (ret)
 		return ret;
-	}
 
 	/* Needs apriori irq_set_percpu_devid() done in intc map function */
 	ret = request_percpu_irq(arc_timer_irq, timer_irq_handler,
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 2204a444e801..ecf7b7db2d05 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1588,10 +1588,8 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table)
 	arch_timers_present |= ARCH_TIMER_TYPE_CP15;
 
 	ret = acpi_gtdt_init(table, &platform_timer_count);
-	if (ret) {
-		pr_err("Failed to init GTDT table.\n");
+	if (ret)
 		return ret;
-	}
 
 	arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI] =
 		acpi_gtdt_map_ppi(ARCH_TIMER_PHYS_NONSECURE_PPI);
diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c
index b207a77b0831..f5f24a95ee82 100644
--- a/drivers/clocksource/dw_apb_timer.c
+++ b/drivers/clocksource/dw_apb_timer.c
@@ -222,7 +222,8 @@ static int apbt_next_event(unsigned long delta,
 /**
  * dw_apb_clockevent_init() - use an APB timer as a clock_event_device
  *
- * @cpu:	The CPU the events will be targeted at.
+ * @cpu:	The CPU the events will be targeted at or -1 if CPU affiliation
+ *		isn't required.
  * @name:	The name used for the timer and the IRQ for it.
  * @rating:	The rating to give the timer.
  * @base:	I/O base for the timer registers.
@@ -257,7 +258,7 @@ dw_apb_clockevent_init(int cpu, const char *name, unsigned rating,
 	dw_ced->ced.max_delta_ticks = 0x7fffffff;
 	dw_ced->ced.min_delta_ns = clockevent_delta2ns(5000, &dw_ced->ced);
 	dw_ced->ced.min_delta_ticks = 5000;
-	dw_ced->ced.cpumask = cpumask_of(cpu);
+	dw_ced->ced.cpumask = cpu < 0 ? cpu_possible_mask : cpumask_of(cpu);
 	dw_ced->ced.features = CLOCK_EVT_FEAT_PERIODIC |
 				CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ;
 	dw_ced->ced.set_state_shutdown = apbt_shutdown;
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index 8c28b127759f..ab3ddebe8344 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -73,7 +73,7 @@ static void __init add_clockevent(struct device_node *event_timer)
 
 	timer_get_base_and_rate(event_timer, &iobase, &rate);
 
-	ced = dw_apb_clockevent_init(0, event_timer->name, 300, iobase, irq,
+	ced = dw_apb_clockevent_init(-1, event_timer->name, 300, iobase, irq,
 				     rate);
 	if (!ced)
 		panic("Unable to initialise clockevent device");
@@ -147,10 +147,6 @@ static int num_called;
 static int __init dw_apb_timer_init(struct device_node *timer)
 {
 	switch (num_called) {
-	case 0:
-		pr_debug("%s: found clockevent timer\n", __func__);
-		add_clockevent(timer);
-		break;
 	case 1:
 		pr_debug("%s: found clocksource timer\n", __func__);
 		add_clocksource(timer);
@@ -161,6 +157,8 @@ static int __init dw_apb_timer_init(struct device_node *timer)
 #endif
 		break;
 	default:
+		pr_debug("%s: found clockevent timer\n", __func__);
+		add_clockevent(timer);
 		break;
 	}
 
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c
index 8b5f8ae723cb..be4175f415ba 100644
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -16,6 +16,7 @@
 #include <linux/notifier.h>
 #include <linux/of_irq.h>
 #include <linux/percpu.h>
+#include <linux/sched_clock.h>
 #include <linux/smp.h>
 #include <linux/time.h>
 #include <asm/mips-cps.h>
@@ -23,14 +24,14 @@
 static DEFINE_PER_CPU(struct clock_event_device, gic_clockevent_device);
 static int gic_timer_irq;
 static unsigned int gic_frequency;
+static bool __read_mostly gic_clock_unstable;
 
-static u64 notrace gic_read_count(void)
+static void gic_clocksource_unstable(char *reason);
+
+static u64 notrace gic_read_count_2x32(void)
 {
 	unsigned int hi, hi2, lo;
 
-	if (mips_cm_is64)
-		return read_gic_counter();
-
 	do {
 		hi = read_gic_counter_32h();
 		lo = read_gic_counter_32l();
@@ -40,6 +41,19 @@ static u64 notrace gic_read_count(void)
 	return (((u64) hi) << 32) + lo;
 }
 
+static u64 notrace gic_read_count_64(void)
+{
+	return read_gic_counter();
+}
+
+static u64 notrace gic_read_count(void)
+{
+	if (mips_cm_is64)
+		return gic_read_count_64();
+
+	return gic_read_count_2x32();
+}
+
 static int gic_next_event(unsigned long delta, struct clock_event_device *evt)
 {
 	int cpu = cpumask_first(evt->cpumask);
@@ -114,8 +128,10 @@ static int gic_clk_notifier(struct notifier_block *nb, unsigned long action,
 {
 	struct clk_notifier_data *cnd = data;
 
-	if (action == POST_RATE_CHANGE)
+	if (action == POST_RATE_CHANGE) {
+		gic_clocksource_unstable("ref clock rate change");
 		on_each_cpu(gic_update_frequency, (void *)cnd->new_rate, 1);
+	}
 
 	return NOTIFY_OK;
 }
@@ -161,6 +177,18 @@ static struct clocksource gic_clocksource = {
 	.vdso_clock_mode	= VDSO_CLOCKMODE_GIC,
 };
 
+static void gic_clocksource_unstable(char *reason)
+{
+	if (gic_clock_unstable)
+		return;
+
+	gic_clock_unstable = true;
+
+	pr_info("GIC timer is unstable due to %s\n", reason);
+
+	clocksource_mark_unstable(&gic_clocksource);
+}
+
 static int __init __gic_clocksource_init(void)
 {
 	unsigned int count_width;
@@ -228,6 +256,18 @@ static int __init gic_clocksource_of_init(struct device_node *node)
 	/* And finally start the counter */
 	clear_gic_config(GIC_CONFIG_COUNTSTOP);
 
+	/*
+	 * It's safe to use the MIPS GIC timer as a sched clock source only if
+	 * its ticks are stable, which is true on either the platforms with
+	 * stable CPU frequency or on the platforms with CM3 and CPU frequency
+	 * change performed by the CPC core clocks divider.
+	 */
+	if (mips_cm_revision() >= CM_REV_CM3 || !IS_ENABLED(CONFIG_CPU_FREQ)) {
+		sched_clock_register(mips_cm_is64 ?
+				     gic_read_count_64 : gic_read_count_2x32,
+				     64, gic_frequency);
+	}
+
 	return 0;
 }
 TIMER_OF_DECLARE(mips_gic_timer, "mti,gic-timer",
diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c
index ab0aabfae5f0..73e8aee445da 100644
--- a/drivers/clocksource/timer-atmel-st.c
+++ b/drivers/clocksource/timer-atmel-st.c
@@ -139,7 +139,6 @@ static int
 clkevt32k_next_event(unsigned long delta, struct clock_event_device *dev)
 {
 	u32		alm;
-	int		status = 0;
 	unsigned int	val;
 
 	BUG_ON(delta < 2);
@@ -163,7 +162,7 @@ clkevt32k_next_event(unsigned long delta, struct clock_event_device *dev)
 	alm += delta;
 	regmap_write(regmap_st, AT91_ST_RTAR, alm);
 
-	return status;
+	return 0;
 }
 
 static struct clock_event_device clkevt = {
diff --git a/drivers/clocksource/timer-davinci.c b/drivers/clocksource/timer-davinci.c
index e421946a91c5..bb4eee31ae08 100644
--- a/drivers/clocksource/timer-davinci.c
+++ b/drivers/clocksource/timer-davinci.c
@@ -18,7 +18,7 @@
 #include <clocksource/timer-davinci.h>
 
 #undef pr_fmt
-#define pr_fmt(fmt) "%s: " fmt "\n", __func__
+#define pr_fmt(fmt) "%s: " fmt, __func__
 
 #define DAVINCI_TIMER_REG_TIM12			0x10
 #define DAVINCI_TIMER_REG_TIM34			0x14
@@ -250,31 +250,29 @@ int __init davinci_timer_register(struct clk *clk,
 
 	rv = clk_prepare_enable(clk);
 	if (rv) {
-		pr_err("Unable to prepare and enable the timer clock");
+		pr_err("Unable to prepare and enable the timer clock\n");
 		return rv;
 	}
 
 	if (!request_mem_region(timer_cfg->reg.start,
 				resource_size(&timer_cfg->reg),
 				"davinci-timer")) {
-		pr_err("Unable to request memory region");
+		pr_err("Unable to request memory region\n");
 		return -EBUSY;
 	}
 
 	base = ioremap(timer_cfg->reg.start, resource_size(&timer_cfg->reg));
 	if (!base) {
-		pr_err("Unable to map the register range");
+		pr_err("Unable to map the register range\n");
 		return -ENOMEM;
 	}
 
 	davinci_timer_init(base);
 	tick_rate = clk_get_rate(clk);
 
-	clockevent = kzalloc(sizeof(*clockevent), GFP_KERNEL | __GFP_NOFAIL);
-	if (!clockevent) {
-		pr_err("Error allocating memory for clockevent data");
+	clockevent = kzalloc(sizeof(*clockevent), GFP_KERNEL);
+	if (!clockevent)
 		return -ENOMEM;
-	}
 
 	clockevent->dev.name = "tim12";
 	clockevent->dev.features = CLOCK_EVT_FEAT_ONESHOT;
@@ -298,7 +296,7 @@ int __init davinci_timer_register(struct clk *clk,
 			 davinci_timer_irq_timer, IRQF_TIMER,
 			 "clockevent/tim12", clockevent);
 	if (rv) {
-		pr_err("Unable to request the clockevent interrupt");
+		pr_err("Unable to request the clockevent interrupt\n");
 		return rv;
 	}
 
@@ -325,7 +323,7 @@ int __init davinci_timer_register(struct clk *clk,
 
 	rv = clocksource_register_hz(&davinci_clocksource.dev, tick_rate);
 	if (rv) {
-		pr_err("Unable to register clocksource");
+		pr_err("Unable to register clocksource\n");
 		return rv;
 	}
 
@@ -343,20 +341,20 @@ static int __init of_davinci_timer_register(struct device_node *np)
 
 	rv = of_address_to_resource(np, 0, &timer_cfg.reg);
 	if (rv) {
-		pr_err("Unable to get the register range for timer");
+		pr_err("Unable to get the register range for timer\n");
 		return rv;
 	}
 
 	rv = of_irq_to_resource_table(np, timer_cfg.irq,
 				      DAVINCI_TIMER_NUM_IRQS);
 	if (rv != DAVINCI_TIMER_NUM_IRQS) {
-		pr_err("Unable to get the interrupts for timer");
+		pr_err("Unable to get the interrupts for timer\n");
 		return rv;
 	}
 
 	clk = of_clk_get(np, 0);
 	if (IS_ERR(clk)) {
-		pr_err("Unable to get the timer clock");
+		pr_err("Unable to get the timer clock\n");
 		return PTR_ERR(clk);
 	}
 
diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index 6334a35fdc2f..2cdc077a39f5 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -61,17 +61,19 @@ static inline void tpm_irq_acknowledge(void)
 	writel(TPM_STATUS_CH0F, timer_base + TPM_STATUS);
 }
 
-static struct delay_timer tpm_delay_timer;
-
 static inline unsigned long tpm_read_counter(void)
 {
 	return readl(timer_base + TPM_CNT);
 }
 
+#if defined(CONFIG_ARM)
+static struct delay_timer tpm_delay_timer;
+
 static unsigned long tpm_read_current_timer(void)
 {
 	return tpm_read_counter();
 }
+#endif
 
 static u64 notrace tpm_read_sched_clock(void)
 {
@@ -144,9 +146,11 @@ static struct timer_of to_tpm = {
 
 static int __init tpm_clocksource_init(void)
 {
+#if defined(CONFIG_ARM)
 	tpm_delay_timer.read_current_timer = &tpm_read_current_timer;
 	tpm_delay_timer.freq = timer_of_rate(&to_tpm) >> 3;
 	register_current_timer_delay(&tpm_delay_timer);
+#endif
 
 	sched_clock_register(tpm_read_sched_clock, counter_width,
 			     timer_of_rate(&to_tpm) >> 3);
diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c
index abd5f158d6e2..ae12bbf3d68c 100644
--- a/drivers/clocksource/timer-ti-32k.c
+++ b/drivers/clocksource/timer-ti-32k.c
@@ -24,6 +24,7 @@
  * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
  */
 
+#include <linux/clk.h>
 #include <linux/init.h>
 #include <linux/time.h>
 #include <linux/sched_clock.h>
@@ -76,6 +77,49 @@ static u64 notrace omap_32k_read_sched_clock(void)
 	return ti_32k_read_cycles(&ti_32k_timer.cs);
 }
 
+static void __init ti_32k_timer_enable_clock(struct device_node *np,
+					     const char *name)
+{
+	struct clk *clock;
+	int error;
+
+	clock = of_clk_get_by_name(np->parent, name);
+	if (IS_ERR(clock)) {
+		/* Only some SoCs have a separate interface clock */
+		if (PTR_ERR(clock) == -EINVAL && !strncmp("ick", name, 3))
+			return;
+
+		pr_warn("%s: could not get clock %s %li\n",
+			__func__, name, PTR_ERR(clock));
+		return;
+	}
+
+	error = clk_prepare_enable(clock);
+	if (error) {
+		pr_warn("%s: could not enable %s: %i\n",
+			__func__, name, error);
+		return;
+	}
+}
+
+static void __init ti_32k_timer_module_init(struct device_node *np,
+					    void __iomem *base)
+{
+	void __iomem *sysc = base + 4;
+
+	if (!of_device_is_compatible(np->parent, "ti,sysc"))
+		return;
+
+	ti_32k_timer_enable_clock(np, "fck");
+	ti_32k_timer_enable_clock(np, "ick");
+
+	/*
+	 * Force idle module as wkup domain is active with MPU.
+	 * No need to tag the module disabled for ti-sysc probe.
+	 */
+	writel_relaxed(0, sysc);
+}
+
 static int __init ti_32k_timer_init(struct device_node *np)
 {
 	int ret;
@@ -90,6 +134,7 @@ static int __init ti_32k_timer_init(struct device_node *np)
 		ti_32k_timer.cs.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
 
 	ti_32k_timer.counter = ti_32k_timer.base;
+	ti_32k_timer_module_init(np, ti_32k_timer.base);
 
 	/*
 	 * 32k sync Counter IP register offsets vary between the highlander
@@ -104,6 +149,8 @@ static int __init ti_32k_timer_init(struct device_node *np)
 	else
 		ti_32k_timer.counter += OMAP2_32KSYNCNT_CR_OFF_LOW;
 
+	pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
+
 	ret = clocksource_register_hz(&ti_32k_timer.cs, 32768);
 	if (ret) {
 		pr_err("32k_counter: can't register clocksource\n");
@@ -111,7 +158,6 @@ static int __init ti_32k_timer_init(struct device_node *np)
 	}
 
 	sched_clock_register(omap_32k_read_sched_clock, 32, 32768);
-	pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
 
 	return 0;
 }
diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c
new file mode 100644
index 000000000000..6fd1f219a512
--- /dev/null
+++ b/drivers/clocksource/timer-ti-dm-systimer.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/clk.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched_clock.h>
+
+#include <linux/clk/clk-conf.h>
+
+#include <clocksource/timer-ti-dm.h>
+#include <dt-bindings/bus/ti-sysc.h>
+
+/* For type1, set SYSC_OMAP2_CLOCKACTIVITY for fck off on idle, l4 clock on */
+#define DMTIMER_TYPE1_ENABLE	((1 << 9) | (SYSC_IDLE_SMART << 3) | \
+				 SYSC_OMAP2_ENAWAKEUP | SYSC_OMAP2_AUTOIDLE)
+
+#define DMTIMER_TYPE2_ENABLE	(SYSC_IDLE_SMART_WKUP << 2)
+#define DMTIMER_RESET_WAIT	100000
+
+#define DMTIMER_INST_DONT_CARE	~0U
+
+static int counter_32k;
+static u32 clocksource;
+static u32 clockevent;
+
+/*
+ * Subset of the timer registers we use. Note that the register offsets
+ * depend on the timer revision detected.
+ */
+struct dmtimer_systimer {
+	void __iomem *base;
+	u8 sysc;
+	u8 irq_stat;
+	u8 irq_ena;
+	u8 pend;
+	u8 load;
+	u8 counter;
+	u8 ctrl;
+	u8 wakeup;
+	u8 ifctrl;
+	unsigned long rate;
+};
+
+struct dmtimer_clockevent {
+	struct clock_event_device dev;
+	struct dmtimer_systimer t;
+	u32 period;
+};
+
+struct dmtimer_clocksource {
+	struct clocksource dev;
+	struct dmtimer_systimer t;
+	unsigned int loadval;
+};
+
+/* Assumes v1 ip if bits [31:16] are zero */
+static bool dmtimer_systimer_revision1(struct dmtimer_systimer *t)
+{
+	u32 tidr = readl_relaxed(t->base);
+
+	return !(tidr >> 16);
+}
+
+static int __init dmtimer_systimer_type1_reset(struct dmtimer_systimer *t)
+{
+	void __iomem *syss = t->base + OMAP_TIMER_V1_SYS_STAT_OFFSET;
+	int ret;
+	u32 l;
+
+	writel_relaxed(BIT(1) | BIT(2), t->base + t->ifctrl);
+	ret = readl_poll_timeout_atomic(syss, l, l & BIT(0), 100,
+					DMTIMER_RESET_WAIT);
+
+	return ret;
+}
+
+/* Note we must use io_base instead of func_base for type2 OCP regs */
+static int __init dmtimer_systimer_type2_reset(struct dmtimer_systimer *t)
+{
+	void __iomem *sysc = t->base + t->sysc;
+	u32 l;
+
+	l = readl_relaxed(sysc);
+	l |= BIT(0);
+	writel_relaxed(l, sysc);
+
+	return readl_poll_timeout_atomic(sysc, l, !(l & BIT(0)), 100,
+					 DMTIMER_RESET_WAIT);
+}
+
+static int __init dmtimer_systimer_reset(struct dmtimer_systimer *t)
+{
+	int ret;
+
+	if (dmtimer_systimer_revision1(t))
+		ret = dmtimer_systimer_type1_reset(t);
+	else
+		ret = dmtimer_systimer_type2_reset(t);
+	if (ret < 0) {
+		pr_err("%s failed with %i\n", __func__, ret);
+
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id counter_match_table[] = {
+	{ .compatible = "ti,omap-counter32k" },
+	{ /* Sentinel */ },
+};
+
+/*
+ * Check if the SoC als has a usable working 32 KiHz counter. The 32 KiHz
+ * counter is handled by timer-ti-32k, but we need to detect it as it
+ * affects the preferred dmtimer system timer configuration. There is
+ * typically no use for a dmtimer clocksource if the 32 KiHz counter is
+ * present, except on am437x as described below.
+ */
+static void __init dmtimer_systimer_check_counter32k(void)
+{
+	struct device_node *np;
+
+	if (counter_32k)
+		return;
+
+	np = of_find_matching_node(NULL, counter_match_table);
+	if (!np) {
+		counter_32k = -ENODEV;
+
+		return;
+	}
+
+	if (of_device_is_available(np))
+		counter_32k = 1;
+	else
+		counter_32k = -ENODEV;
+
+	of_node_put(np);
+}
+
+static const struct of_device_id dmtimer_match_table[] = {
+	{ .compatible = "ti,omap2420-timer", },
+	{ .compatible = "ti,omap3430-timer", },
+	{ .compatible = "ti,omap4430-timer", },
+	{ .compatible = "ti,omap5430-timer", },
+	{ .compatible = "ti,am335x-timer", },
+	{ .compatible = "ti,am335x-timer-1ms", },
+	{ .compatible = "ti,dm814-timer", },
+	{ .compatible = "ti,dm816-timer", },
+	{ /* Sentinel */ },
+};
+
+/*
+ * Checks that system timers are configured to not reset and idle during
+ * the generic timer-ti-dm device driver probe. And that the system timer
+ * source clocks are properly configured. Also, let's not hog any DSP and
+ * PWM capable timers unnecessarily as system timers.
+ */
+static bool __init dmtimer_is_preferred(struct device_node *np)
+{
+	if (!of_device_is_available(np))
+		return false;
+
+	if (!of_property_read_bool(np->parent,
+				   "ti,no-reset-on-init"))
+		return false;
+
+	if (!of_property_read_bool(np->parent, "ti,no-idle"))
+		return false;
+
+	/* Secure gptimer12 is always clocked with a fixed source */
+	if (!of_property_read_bool(np, "ti,timer-secure")) {
+		if (!of_property_read_bool(np, "assigned-clocks"))
+			return false;
+
+		if (!of_property_read_bool(np, "assigned-clock-parents"))
+			return false;
+	}
+
+	if (of_property_read_bool(np, "ti,timer-dsp"))
+		return false;
+
+	if (of_property_read_bool(np, "ti,timer-pwm"))
+		return false;
+
+	return true;
+}
+
+/*
+ * Finds the first available usable always-on timer, and assigns it to either
+ * clockevent or clocksource depending if the counter_32k is available on the
+ * SoC or not.
+ *
+ * Some omap3 boards with unreliable oscillator must not use the counter_32k
+ * or dmtimer1 with 32 KiHz source. Additionally, the boards with unreliable
+ * oscillator should really set counter_32k as disabled, and delete dmtimer1
+ * ti,always-on property, but let's not count on it. For these quirky cases,
+ * we prefer using the always-on secure dmtimer12 with the internal 32 KiHz
+ * clock as the clocksource, and any available dmtimer as clockevent.
+ *
+ * For am437x, we are using am335x style dmtimer clocksource. It is unclear
+ * if this quirk handling is really needed, but let's change it separately
+ * based on testing as it might cause side effects.
+ */
+static void __init dmtimer_systimer_assign_alwon(void)
+{
+	struct device_node *np;
+	u32 pa = 0;
+	bool quirk_unreliable_oscillator = false;
+
+	/* Quirk unreliable 32 KiHz oscillator with incomplete dts */
+	if (of_machine_is_compatible("ti,omap3-beagle") ||
+	    of_machine_is_compatible("timll,omap3-devkit8000")) {
+		quirk_unreliable_oscillator = true;
+		counter_32k = -ENODEV;
+	}
+
+	/* Quirk am437x using am335x style dmtimer clocksource */
+	if (of_machine_is_compatible("ti,am43"))
+		counter_32k = -ENODEV;
+
+	for_each_matching_node(np, dmtimer_match_table) {
+		if (!dmtimer_is_preferred(np))
+			continue;
+
+		if (of_property_read_bool(np, "ti,timer-alwon")) {
+			const __be32 *addr;
+
+			addr = of_get_address(np, 0, NULL, NULL);
+			pa = of_translate_address(np, addr);
+			if (pa) {
+				/* Quirky omap3 boards must use dmtimer12 */
+				if (quirk_unreliable_oscillator &&
+				    pa == 0x48318000)
+					continue;
+
+				of_node_put(np);
+				break;
+			}
+		}
+	}
+
+	/* Usually no need for dmtimer clocksource if we have counter32 */
+	if (counter_32k >= 0) {
+		clockevent = pa;
+		clocksource = 0;
+	} else {
+		clocksource = pa;
+		clockevent = DMTIMER_INST_DONT_CARE;
+	}
+}
+
+/* Finds the first usable dmtimer, used for the don't care case */
+static u32 __init dmtimer_systimer_find_first_available(void)
+{
+	struct device_node *np;
+	const __be32 *addr;
+	u32 pa = 0;
+
+	for_each_matching_node(np, dmtimer_match_table) {
+		if (!dmtimer_is_preferred(np))
+			continue;
+
+		addr = of_get_address(np, 0, NULL, NULL);
+		pa = of_translate_address(np, addr);
+		if (pa) {
+			if (pa == clocksource || pa == clockevent) {
+				pa = 0;
+				continue;
+			}
+
+			of_node_put(np);
+			break;
+		}
+	}
+
+	return pa;
+}
+
+/* Selects the best clocksource and clockevent to use */
+static void __init dmtimer_systimer_select_best(void)
+{
+	dmtimer_systimer_check_counter32k();
+	dmtimer_systimer_assign_alwon();
+
+	if (clockevent == DMTIMER_INST_DONT_CARE)
+		clockevent = dmtimer_systimer_find_first_available();
+
+	pr_debug("%s: counter_32k: %i clocksource: %08x clockevent: %08x\n",
+		 __func__, counter_32k, clocksource, clockevent);
+}
+
+/* Interface clocks are only available on some SoCs variants */
+static int __init dmtimer_systimer_init_clock(struct device_node *np,
+					      const char *name,
+					      unsigned long *rate)
+{
+	struct clk *clock;
+	unsigned long r;
+	int error;
+
+	clock = of_clk_get_by_name(np, name);
+	if ((PTR_ERR(clock) == -EINVAL) && !strncmp(name, "ick", 3))
+		return 0;
+	else if (IS_ERR(clock))
+		return PTR_ERR(clock);
+
+	error = clk_prepare_enable(clock);
+	if (error)
+		return error;
+
+	r = clk_get_rate(clock);
+	if (!r)
+		return -ENODEV;
+
+	*rate = r;
+
+	return 0;
+}
+
+static void dmtimer_systimer_enable(struct dmtimer_systimer *t)
+{
+	u32 val;
+
+	if (dmtimer_systimer_revision1(t))
+		val = DMTIMER_TYPE1_ENABLE;
+	else
+		val = DMTIMER_TYPE2_ENABLE;
+
+	writel_relaxed(val, t->base + t->sysc);
+}
+
+static void dmtimer_systimer_disable(struct dmtimer_systimer *t)
+{
+	writel_relaxed(0, t->base + t->sysc);
+}
+
+static int __init dmtimer_systimer_setup(struct device_node *np,
+					 struct dmtimer_systimer *t)
+{
+	unsigned long rate;
+	u8 regbase;
+	int error;
+
+	if (!of_device_is_compatible(np->parent, "ti,sysc"))
+		return -EINVAL;
+
+	t->base = of_iomap(np, 0);
+	if (!t->base)
+		return -ENXIO;
+
+	/*
+	 * Enable optional assigned-clock-parents configured at the timer
+	 * node level. For regular device drivers, this is done automatically
+	 * by bus related code such as platform_drv_probe().
+	 */
+	error = of_clk_set_defaults(np, false);
+	if (error < 0)
+		pr_err("%s: clock source init failed: %i\n", __func__, error);
+
+	/* For ti-sysc, we have timer clocks at the parent module level */
+	error = dmtimer_systimer_init_clock(np->parent, "fck", &rate);
+	if (error)
+		goto err_unmap;
+
+	t->rate = rate;
+
+	error = dmtimer_systimer_init_clock(np->parent, "ick", &rate);
+	if (error)
+		goto err_unmap;
+
+	if (dmtimer_systimer_revision1(t)) {
+		t->irq_stat = OMAP_TIMER_V1_STAT_OFFSET;
+		t->irq_ena = OMAP_TIMER_V1_INT_EN_OFFSET;
+		t->pend = _OMAP_TIMER_WRITE_PEND_OFFSET;
+		regbase = 0;
+	} else {
+		t->irq_stat = OMAP_TIMER_V2_IRQSTATUS;
+		t->irq_ena = OMAP_TIMER_V2_IRQENABLE_SET;
+		regbase = OMAP_TIMER_V2_FUNC_OFFSET;
+		t->pend = regbase + _OMAP_TIMER_WRITE_PEND_OFFSET;
+	}
+
+	t->sysc = OMAP_TIMER_OCP_CFG_OFFSET;
+	t->load = regbase + _OMAP_TIMER_LOAD_OFFSET;
+	t->counter = regbase + _OMAP_TIMER_COUNTER_OFFSET;
+	t->ctrl = regbase + _OMAP_TIMER_CTRL_OFFSET;
+	t->wakeup = regbase + _OMAP_TIMER_WAKEUP_EN_OFFSET;
+	t->ifctrl = regbase + _OMAP_TIMER_IF_CTRL_OFFSET;
+
+	dmtimer_systimer_enable(t);
+	dmtimer_systimer_reset(t);
+	pr_debug("dmtimer rev %08x sysc %08x\n", readl_relaxed(t->base),
+		 readl_relaxed(t->base + t->sysc));
+
+	return 0;
+
+err_unmap:
+	iounmap(t->base);
+
+	return error;
+}
+
+/* Clockevent */
+static struct dmtimer_clockevent *
+to_dmtimer_clockevent(struct clock_event_device *clockevent)
+{
+	return container_of(clockevent, struct dmtimer_clockevent, dev);
+}
+
+static irqreturn_t dmtimer_clockevent_interrupt(int irq, void *data)
+{
+	struct dmtimer_clockevent *clkevt = data;
+	struct dmtimer_systimer *t = &clkevt->t;
+
+	writel_relaxed(OMAP_TIMER_INT_OVERFLOW, t->base + t->irq_stat);
+	clkevt->dev.event_handler(&clkevt->dev);
+
+	return IRQ_HANDLED;
+}
+
+static int dmtimer_set_next_event(unsigned long cycles,
+				  struct clock_event_device *evt)
+{
+	struct dmtimer_clockevent *clkevt = to_dmtimer_clockevent(evt);
+	struct dmtimer_systimer *t = &clkevt->t;
+	void __iomem *pend = t->base + t->pend;
+
+	writel_relaxed(0xffffffff - cycles, t->base + t->counter);
+	while (readl_relaxed(pend) & WP_TCRR)
+		cpu_relax();
+
+	writel_relaxed(OMAP_TIMER_CTRL_ST, t->base + t->ctrl);
+	while (readl_relaxed(pend) & WP_TCLR)
+		cpu_relax();
+
+	return 0;
+}
+
+static int dmtimer_clockevent_shutdown(struct clock_event_device *evt)
+{
+	struct dmtimer_clockevent *clkevt = to_dmtimer_clockevent(evt);
+	struct dmtimer_systimer *t = &clkevt->t;
+	void __iomem *ctrl = t->base + t->ctrl;
+	u32 l;
+
+	l = readl_relaxed(ctrl);
+	if (l & OMAP_TIMER_CTRL_ST) {
+		l &= ~BIT(0);
+		writel_relaxed(l, ctrl);
+		/* Flush posted write */
+		l = readl_relaxed(ctrl);
+		/*  Wait for functional clock period x 3.5 */
+		udelay(3500000 / t->rate + 1);
+	}
+	writel_relaxed(OMAP_TIMER_INT_OVERFLOW, t->base + t->irq_stat);
+
+	return 0;
+}
+
+static int dmtimer_set_periodic(struct clock_event_device *evt)
+{
+	struct dmtimer_clockevent *clkevt = to_dmtimer_clockevent(evt);
+	struct dmtimer_systimer *t = &clkevt->t;
+	void __iomem *pend = t->base + t->pend;
+
+	dmtimer_clockevent_shutdown(evt);
+
+	/* Looks like we need to first set the load value separately */
+	writel_relaxed(clkevt->period, t->base + t->load);
+	while (readl_relaxed(pend) & WP_TLDR)
+		cpu_relax();
+
+	writel_relaxed(clkevt->period, t->base + t->counter);
+	while (readl_relaxed(pend) & WP_TCRR)
+		cpu_relax();
+
+	writel_relaxed(OMAP_TIMER_CTRL_AR | OMAP_TIMER_CTRL_ST,
+		       t->base + t->ctrl);
+	while (readl_relaxed(pend) & WP_TCLR)
+		cpu_relax();
+
+	return 0;
+}
+
+static void omap_clockevent_idle(struct clock_event_device *evt)
+{
+	struct dmtimer_clockevent *clkevt = to_dmtimer_clockevent(evt);
+	struct dmtimer_systimer *t = &clkevt->t;
+
+	dmtimer_systimer_disable(t);
+}
+
+static void omap_clockevent_unidle(struct clock_event_device *evt)
+{
+	struct dmtimer_clockevent *clkevt = to_dmtimer_clockevent(evt);
+	struct dmtimer_systimer *t = &clkevt->t;
+
+	dmtimer_systimer_enable(t);
+	writel_relaxed(OMAP_TIMER_INT_OVERFLOW, t->base + t->irq_ena);
+	writel_relaxed(OMAP_TIMER_INT_OVERFLOW, t->base + t->wakeup);
+}
+
+static int __init dmtimer_clockevent_init(struct device_node *np)
+{
+	struct dmtimer_clockevent *clkevt;
+	struct clock_event_device *dev;
+	struct dmtimer_systimer *t;
+	int error;
+
+	clkevt = kzalloc(sizeof(*clkevt), GFP_KERNEL);
+	if (!clkevt)
+		return -ENOMEM;
+
+	t = &clkevt->t;
+	dev = &clkevt->dev;
+
+	/*
+	 * We mostly use cpuidle_coupled with ARM local timers for runtime,
+	 * so there's probably no use for CLOCK_EVT_FEAT_DYNIRQ here.
+	 */
+	dev->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	dev->rating = 300;
+	dev->set_next_event = dmtimer_set_next_event;
+	dev->set_state_shutdown = dmtimer_clockevent_shutdown;
+	dev->set_state_periodic = dmtimer_set_periodic;
+	dev->set_state_oneshot = dmtimer_clockevent_shutdown;
+	dev->tick_resume = dmtimer_clockevent_shutdown;
+	dev->cpumask = cpu_possible_mask;
+
+	dev->irq = irq_of_parse_and_map(np, 0);
+	if (!dev->irq) {
+		error = -ENXIO;
+		goto err_out_free;
+	}
+
+	error = dmtimer_systimer_setup(np, &clkevt->t);
+	if (error)
+		goto err_out_free;
+
+	clkevt->period = 0xffffffff - DIV_ROUND_CLOSEST(t->rate, HZ);
+
+	/*
+	 * For clock-event timers we never read the timer counter and
+	 * so we are not impacted by errata i103 and i767. Therefore,
+	 * we can safely ignore this errata for clock-event timers.
+	 */
+	writel_relaxed(OMAP_TIMER_CTRL_POSTED, t->base + t->ifctrl);
+
+	error = request_irq(dev->irq, dmtimer_clockevent_interrupt,
+			    IRQF_TIMER, "clockevent", clkevt);
+	if (error)
+		goto err_out_unmap;
+
+	writel_relaxed(OMAP_TIMER_INT_OVERFLOW, t->base + t->irq_ena);
+	writel_relaxed(OMAP_TIMER_INT_OVERFLOW, t->base + t->wakeup);
+
+	pr_info("TI gptimer clockevent: %s%lu Hz at %pOF\n",
+		of_find_property(np, "ti,timer-alwon", NULL) ?
+		"always-on " : "", t->rate, np->parent);
+
+	clockevents_config_and_register(dev, t->rate,
+					3, /* Timer internal resynch latency */
+					0xffffffff);
+
+	if (of_device_is_compatible(np, "ti,am33xx") ||
+	    of_device_is_compatible(np, "ti,am43")) {
+		dev->suspend = omap_clockevent_idle;
+		dev->resume = omap_clockevent_unidle;
+	}
+
+	return 0;
+
+err_out_unmap:
+	iounmap(t->base);
+
+err_out_free:
+	kfree(clkevt);
+
+	return error;
+}
+
+/* Clocksource */
+static struct dmtimer_clocksource *
+to_dmtimer_clocksource(struct clocksource *cs)
+{
+	return container_of(cs, struct dmtimer_clocksource, dev);
+}
+
+static u64 dmtimer_clocksource_read_cycles(struct clocksource *cs)
+{
+	struct dmtimer_clocksource *clksrc = to_dmtimer_clocksource(cs);
+	struct dmtimer_systimer *t = &clksrc->t;
+
+	return (u64)readl_relaxed(t->base + t->counter);
+}
+
+static void __iomem *dmtimer_sched_clock_counter;
+
+static u64 notrace dmtimer_read_sched_clock(void)
+{
+	return readl_relaxed(dmtimer_sched_clock_counter);
+}
+
+static void dmtimer_clocksource_suspend(struct clocksource *cs)
+{
+	struct dmtimer_clocksource *clksrc = to_dmtimer_clocksource(cs);
+	struct dmtimer_systimer *t = &clksrc->t;
+
+	clksrc->loadval = readl_relaxed(t->base + t->counter);
+	dmtimer_systimer_disable(t);
+}
+
+static void dmtimer_clocksource_resume(struct clocksource *cs)
+{
+	struct dmtimer_clocksource *clksrc = to_dmtimer_clocksource(cs);
+	struct dmtimer_systimer *t = &clksrc->t;
+
+	dmtimer_systimer_enable(t);
+	writel_relaxed(clksrc->loadval, t->base + t->counter);
+	writel_relaxed(OMAP_TIMER_CTRL_ST | OMAP_TIMER_CTRL_AR,
+		       t->base + t->ctrl);
+}
+
+static int __init dmtimer_clocksource_init(struct device_node *np)
+{
+	struct dmtimer_clocksource *clksrc;
+	struct dmtimer_systimer *t;
+	struct clocksource *dev;
+	int error;
+
+	clksrc = kzalloc(sizeof(*clksrc), GFP_KERNEL);
+	if (!clksrc)
+		return -ENOMEM;
+
+	dev = &clksrc->dev;
+	t = &clksrc->t;
+
+	error = dmtimer_systimer_setup(np, t);
+	if (error)
+		goto err_out_free;
+
+	dev->name = "dmtimer";
+	dev->rating = 300;
+	dev->read = dmtimer_clocksource_read_cycles;
+	dev->mask = CLOCKSOURCE_MASK(32);
+	dev->flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+	if (of_device_is_compatible(np, "ti,am33xx") ||
+	    of_device_is_compatible(np, "ti,am43")) {
+		dev->suspend = dmtimer_clocksource_suspend;
+		dev->resume = dmtimer_clocksource_resume;
+	}
+
+	writel_relaxed(0, t->base + t->counter);
+	writel_relaxed(OMAP_TIMER_CTRL_ST | OMAP_TIMER_CTRL_AR,
+		       t->base + t->ctrl);
+
+	pr_info("TI gptimer clocksource: %s%pOF\n",
+		of_find_property(np, "ti,timer-alwon", NULL) ?
+		"always-on " : "", np->parent);
+
+	if (!dmtimer_sched_clock_counter) {
+		dmtimer_sched_clock_counter = t->base + t->counter;
+		sched_clock_register(dmtimer_read_sched_clock, 32, t->rate);
+	}
+
+	if (clocksource_register_hz(dev, t->rate))
+		pr_err("Could not register clocksource %pOF\n", np);
+
+	return 0;
+
+err_out_free:
+	kfree(clksrc);
+
+	return -ENODEV;
+}
+
+/*
+ * To detect between a clocksource and clockevent, we assume the device tree
+ * has no interrupts configured for a clocksource timer.
+ */
+static int __init dmtimer_systimer_init(struct device_node *np)
+{
+	const __be32 *addr;
+	u32 pa;
+
+	/* One time init for the preferred timer configuration */
+	if (!clocksource && !clockevent)
+		dmtimer_systimer_select_best();
+
+	if (!clocksource && !clockevent) {
+		pr_err("%s: unable to detect system timers, update dtb?\n",
+		       __func__);
+
+		return -EINVAL;
+	}
+
+	addr = of_get_address(np, 0, NULL, NULL);
+	pa = of_translate_address(np, addr);
+	if (!pa)
+		return -EINVAL;
+
+	if (counter_32k <= 0 && clocksource == pa)
+		return dmtimer_clocksource_init(np);
+
+	if (clockevent == pa)
+		return dmtimer_clockevent_init(np);
+
+	return 0;
+}
+
+TIMER_OF_DECLARE(systimer_omap2, "ti,omap2420-timer", dmtimer_systimer_init);
+TIMER_OF_DECLARE(systimer_omap3, "ti,omap3430-timer", dmtimer_systimer_init);
+TIMER_OF_DECLARE(systimer_omap4, "ti,omap4430-timer", dmtimer_systimer_init);
+TIMER_OF_DECLARE(systimer_omap5, "ti,omap5430-timer", dmtimer_systimer_init);
+TIMER_OF_DECLARE(systimer_am33x, "ti,am335x-timer", dmtimer_systimer_init);
+TIMER_OF_DECLARE(systimer_am3ms, "ti,am335x-timer-1ms", dmtimer_systimer_init);
+TIMER_OF_DECLARE(systimer_dm814, "ti,dm814-timer", dmtimer_systimer_init);
+TIMER_OF_DECLARE(systimer_dm816, "ti,dm816-timer", dmtimer_systimer_init);
diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index 2531eab3d6d7..60aff087947a 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -258,9 +258,7 @@ static int omap_dm_timer_prepare(struct omap_dm_timer *timer)
 	__omap_dm_timer_enable_posted(timer);
 	omap_dm_timer_disable(timer);
 
-	rc = omap_dm_timer_set_source(timer, OMAP_TIMER_SRC_32_KHZ);
-
-	return rc;
+	return 0;
 }
 
 static inline u32 omap_dm_timer_reserved_systimer(int id)
diff --git a/drivers/clocksource/timer-versatile.c b/drivers/clocksource/timer-versatile.c
index e4ebb656d005..f5d017b31afa 100644
--- a/drivers/clocksource/timer-versatile.c
+++ b/drivers/clocksource/timer-versatile.c
@@ -6,6 +6,7 @@
 
 #include <linux/clocksource.h>
 #include <linux/io.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/sched_clock.h>
 
@@ -22,6 +23,8 @@ static int __init versatile_sched_clock_init(struct device_node *node)
 {
 	void __iomem *base = of_iomap(node, 0);
 
+	of_node_clear_flag(node, OF_POPULATED);
+
 	if (!base)
 		return -ENXIO;
 
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index d58ce664da84..646ad385e490 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -18,6 +18,7 @@
 #include <linux/pid_namespace.h>
 
 #include <linux/cn_proc.h>
+#include <linux/local_lock.h>
 
 /*
  * Size of a cn_msg followed by a proc_event structure.  Since the
@@ -38,25 +39,31 @@ static inline struct cn_msg *buffer_to_cn_msg(__u8 *buffer)
 static atomic_t proc_event_num_listeners = ATOMIC_INIT(0);
 static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC };
 
-/* proc_event_counts is used as the sequence number of the netlink message */
-static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
+/* local_event.count is used as the sequence number of the netlink message */
+struct local_event {
+	local_lock_t lock;
+	__u32 count;
+};
+static DEFINE_PER_CPU(struct local_event, local_event) = {
+	.lock = INIT_LOCAL_LOCK(lock),
+};
 
 static inline void send_msg(struct cn_msg *msg)
 {
-	preempt_disable();
+	local_lock(&local_event.lock);
 
-	msg->seq = __this_cpu_inc_return(proc_event_counts) - 1;
+	msg->seq = __this_cpu_inc_return(local_event.count) - 1;
 	((struct proc_event *)msg->data)->cpu = smp_processor_id();
 
 	/*
-	 * Preemption remains disabled during send to ensure the messages are
-	 * ordered according to their sequence numbers.
+	 * local_lock() disables preemption during send to ensure the messages
+	 * are ordered according to their sequence numbers.
 	 *
 	 * If cn_netlink_send() fails, the data is not sent.
 	 */
 	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
 
-	preempt_enable();
+	local_unlock(&local_event.lock);
 }
 
 void proc_fork_connector(struct task_struct *task)
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index c3e6bd59e920..e91750132552 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -323,7 +323,8 @@ endif
 
 config QORIQ_CPUFREQ
 	tristate "CPU frequency scaling driver for Freescale QorIQ SoCs"
-	depends on OF && COMMON_CLK && (PPC_E500MC || ARM || ARM64)
+	depends on OF && COMMON_CLK
+	depends on PPC_E500MC || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
 	select CLK_QORIQ
 	help
 	  This adds the CPUFreq driver support for Freescale QorIQ SoCs
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 15c1a1231516..9481292981f0 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -317,6 +317,7 @@ config ARM_TEGRA186_CPUFREQ
 config ARM_TI_CPUFREQ
 	bool "Texas Instruments CPUFreq support"
 	depends on ARCH_OMAP2PLUS
+	default ARCH_OMAP2PLUS
 	help
 	  This driver enables valid OPPs on the running platform based on
 	  values contained within the SoC in use. Enable this in order to
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index cb9db16bea61..e8e20fef400b 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -53,6 +53,7 @@ static const struct of_device_id whitelist[] __initconst = {
 	{ .compatible = "renesas,r7s72100", },
 	{ .compatible = "renesas,r8a73a4", },
 	{ .compatible = "renesas,r8a7740", },
+	{ .compatible = "renesas,r8a7742", },
 	{ .compatible = "renesas,r8a7743", },
 	{ .compatible = "renesas,r8a7744", },
 	{ .compatible = "renesas,r8a7745", },
@@ -105,6 +106,7 @@ static const struct of_device_id blacklist[] __initconst = {
 	{ .compatible = "calxeda,highbank", },
 	{ .compatible = "calxeda,ecx-2000", },
 
+	{ .compatible = "fsl,imx7ulp", },
 	{ .compatible = "fsl,imx7d", },
 	{ .compatible = "fsl,imx8mq", },
 	{ .compatible = "fsl,imx8mm", },
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 045f9fe157ce..d03f250f68e4 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2535,26 +2535,27 @@ EXPORT_SYMBOL_GPL(cpufreq_update_limits);
 static int cpufreq_boost_set_sw(int state)
 {
 	struct cpufreq_policy *policy;
-	int ret = -EINVAL;
 
 	for_each_active_policy(policy) {
+		int ret;
+
 		if (!policy->freq_table)
-			continue;
+			return -ENXIO;
 
 		ret = cpufreq_frequency_table_cpuinfo(policy,
 						      policy->freq_table);
 		if (ret) {
 			pr_err("%s: Policy frequency update failed\n",
 			       __func__);
-			break;
+			return ret;
 		}
 
 		ret = freq_qos_update_request(policy->max_freq_req, policy->max);
 		if (ret < 0)
-			break;
+			return ret;
 	}
 
-	return ret;
+	return 0;
 }
 
 int cpufreq_boost_trigger_state(int state)
diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c
index de206d2745fe..3fe9125156b4 100644
--- a/drivers/cpufreq/imx-cpufreq-dt.c
+++ b/drivers/cpufreq/imx-cpufreq-dt.c
@@ -3,7 +3,9 @@
  * Copyright 2019 NXP
  */
 
+#include <linux/clk.h>
 #include <linux/cpu.h>
+#include <linux/cpufreq.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -12,8 +14,11 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
+#include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 
+#include "cpufreq-dt.h"
+
 #define OCOTP_CFG3_SPEED_GRADE_SHIFT	8
 #define OCOTP_CFG3_SPEED_GRADE_MASK	(0x3 << 8)
 #define IMX8MN_OCOTP_CFG3_SPEED_GRADE_MASK	(0xf << 8)
@@ -22,20 +27,92 @@
 #define IMX8MP_OCOTP_CFG3_MKT_SEGMENT_SHIFT    5
 #define IMX8MP_OCOTP_CFG3_MKT_SEGMENT_MASK     (0x3 << 5)
 
+#define IMX7ULP_MAX_RUN_FREQ	528000
+
 /* cpufreq-dt device registered by imx-cpufreq-dt */
 static struct platform_device *cpufreq_dt_pdev;
 static struct opp_table *cpufreq_opp_table;
+static struct device *cpu_dev;
+
+enum IMX7ULP_CPUFREQ_CLKS {
+	ARM,
+	CORE,
+	SCS_SEL,
+	HSRUN_CORE,
+	HSRUN_SCS_SEL,
+	FIRC,
+};
+
+static struct clk_bulk_data imx7ulp_clks[] = {
+	{ .id = "arm" },
+	{ .id = "core" },
+	{ .id = "scs_sel" },
+	{ .id = "hsrun_core" },
+	{ .id = "hsrun_scs_sel" },
+	{ .id = "firc" },
+};
+
+static unsigned int imx7ulp_get_intermediate(struct cpufreq_policy *policy,
+					     unsigned int index)
+{
+	return clk_get_rate(imx7ulp_clks[FIRC].clk);
+}
+
+static int imx7ulp_target_intermediate(struct cpufreq_policy *policy,
+					unsigned int index)
+{
+	unsigned int newfreq = policy->freq_table[index].frequency;
+
+	clk_set_parent(imx7ulp_clks[SCS_SEL].clk, imx7ulp_clks[FIRC].clk);
+	clk_set_parent(imx7ulp_clks[HSRUN_SCS_SEL].clk, imx7ulp_clks[FIRC].clk);
+
+	if (newfreq > IMX7ULP_MAX_RUN_FREQ)
+		clk_set_parent(imx7ulp_clks[ARM].clk,
+			       imx7ulp_clks[HSRUN_CORE].clk);
+	else
+		clk_set_parent(imx7ulp_clks[ARM].clk, imx7ulp_clks[CORE].clk);
+
+	return 0;
+}
+
+static struct cpufreq_dt_platform_data imx7ulp_data = {
+	.target_intermediate = imx7ulp_target_intermediate,
+	.get_intermediate = imx7ulp_get_intermediate,
+};
 
 static int imx_cpufreq_dt_probe(struct platform_device *pdev)
 {
-	struct device *cpu_dev = get_cpu_device(0);
+	struct platform_device *dt_pdev;
 	u32 cell_value, supported_hw[2];
 	int speed_grade, mkt_segment;
 	int ret;
 
+	cpu_dev = get_cpu_device(0);
+
 	if (!of_find_property(cpu_dev->of_node, "cpu-supply", NULL))
 		return -ENODEV;
 
+	if (of_machine_is_compatible("fsl,imx7ulp")) {
+		ret = clk_bulk_get(cpu_dev, ARRAY_SIZE(imx7ulp_clks),
+				   imx7ulp_clks);
+		if (ret)
+			return ret;
+
+		dt_pdev = platform_device_register_data(NULL, "cpufreq-dt",
+							-1, &imx7ulp_data,
+							sizeof(imx7ulp_data));
+		if (IS_ERR(dt_pdev)) {
+			clk_bulk_put(ARRAY_SIZE(imx7ulp_clks), imx7ulp_clks);
+			ret = PTR_ERR(dt_pdev);
+			dev_err(&pdev->dev, "Failed to register cpufreq-dt: %d\n", ret);
+			return ret;
+		}
+
+		cpufreq_dt_pdev = dt_pdev;
+
+		return 0;
+	}
+
 	ret = nvmem_cell_read_u32(cpu_dev, "speed_grade", &cell_value);
 	if (ret)
 		return ret;
@@ -98,7 +175,10 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev)
 static int imx_cpufreq_dt_remove(struct platform_device *pdev)
 {
 	platform_device_unregister(cpufreq_dt_pdev);
-	dev_pm_opp_put_supported_hw(cpufreq_opp_table);
+	if (!of_machine_is_compatible("fsl,imx7ulp"))
+		dev_pm_opp_put_supported_hw(cpufreq_opp_table);
+	else
+		clk_bulk_put(ARRAY_SIZE(imx7ulp_clks), imx7ulp_clks);
 
 	return 0;
 }
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 4d3429b2058f..8e23a698ce04 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2771,6 +2771,8 @@ static int __init intel_pstate_init(void)
 		pr_info("Invalid MSRs\n");
 		return -ENODEV;
 	}
+	/* Without HWP start in the passive mode. */
+	default_driver = &intel_cpufreq;
 
 hwp_cpu_matched:
 	/*
@@ -2816,7 +2818,6 @@ static int __init intel_pstate_setup(char *str)
 	if (!strcmp(str, "disable")) {
 		no_load = 1;
 	} else if (!strcmp(str, "passive")) {
-		pr_info("Passive mode enabled\n");
 		default_driver = &intel_cpufreq;
 		no_hwp = 1;
 	}
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index 909f40fbcde2..d05e761d9572 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -20,7 +20,6 @@
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 
-#include <asm/clock.h>
 #include <asm/idle.h>
 
 #include <asm/mach-loongson2ef/loongson.h>
@@ -58,29 +57,20 @@ static int loongson2_cpufreq_target(struct cpufreq_policy *policy,
 	     loongson2_clockmod_table[index].driver_data) / 8;
 
 	/* setting the cpu frequency */
-	clk_set_rate(policy->clk, freq * 1000);
+	loongson2_cpu_set_rate(freq);
 
 	return 0;
 }
 
 static int loongson2_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
-	struct clk *cpuclk;
 	int i;
 	unsigned long rate;
 	int ret;
 
-	cpuclk = clk_get(NULL, "cpu_clk");
-	if (IS_ERR(cpuclk)) {
-		pr_err("couldn't get CPU clk\n");
-		return PTR_ERR(cpuclk);
-	}
-
 	rate = cpu_clock_freq / 1000;
-	if (!rate) {
-		clk_put(cpuclk);
+	if (!rate)
 		return -EINVAL;
-	}
 
 	/* clock table init */
 	for (i = 2;
@@ -88,20 +78,16 @@ static int loongson2_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	     i++)
 		loongson2_clockmod_table[i].frequency = (rate * i) / 8;
 
-	ret = clk_set_rate(cpuclk, rate * 1000);
-	if (ret) {
-		clk_put(cpuclk);
+	ret = loongson2_cpu_set_rate(rate);
+	if (ret)
 		return ret;
-	}
 
-	policy->clk = cpuclk;
 	cpufreq_generic_init(policy, &loongson2_clockmod_table[0], 0);
 	return 0;
 }
 
 static int loongson2_cpufreq_exit(struct cpufreq_policy *policy)
 {
-	clk_put(policy->clk);
 	return 0;
 }
 
diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c
index a1b8238872a2..d06b37822c3d 100644
--- a/drivers/cpufreq/qcom-cpufreq-nvmem.c
+++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c
@@ -277,7 +277,7 @@ static int qcom_cpufreq_probe(struct platform_device *pdev)
 	if (!np)
 		return -ENOENT;
 
-	ret = of_device_is_compatible(np, "operating-points-v2-qcom-cpu");
+	ret = of_device_is_compatible(np, "operating-points-v2-kryo-cpu");
 	if (!ret) {
 		of_node_put(np);
 		return -ENOENT;
diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index 8e436dc75c8b..6b6b20da2bcf 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -18,6 +18,7 @@
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
+#include <linux/platform_device.h>
 
 /**
  * struct cpu_data
@@ -29,12 +30,6 @@ struct cpu_data {
 	struct cpufreq_frequency_table *table;
 };
 
-/*
- * Don't use cpufreq on this SoC -- used when the SoC would have otherwise
- * matched a more generic compatible.
- */
-#define SOC_BLACKLIST		1
-
 /**
  * struct soc_data - SoC specific data
  * @flags: SOC_xxx
@@ -264,64 +259,51 @@ static struct cpufreq_driver qoriq_cpufreq_driver = {
 	.attr		= cpufreq_generic_attr,
 };
 
-static const struct soc_data blacklist = {
-	.flags = SOC_BLACKLIST,
-};
-
-static const struct of_device_id node_matches[] __initconst = {
+static const struct of_device_id qoriq_cpufreq_blacklist[] = {
 	/* e6500 cannot use cpufreq due to erratum A-008083 */
-	{ .compatible = "fsl,b4420-clockgen", &blacklist },
-	{ .compatible = "fsl,b4860-clockgen", &blacklist },
-	{ .compatible = "fsl,t2080-clockgen", &blacklist },
-	{ .compatible = "fsl,t4240-clockgen", &blacklist },
-
-	{ .compatible = "fsl,ls1012a-clockgen", },
-	{ .compatible = "fsl,ls1021a-clockgen", },
-	{ .compatible = "fsl,ls1028a-clockgen", },
-	{ .compatible = "fsl,ls1043a-clockgen", },
-	{ .compatible = "fsl,ls1046a-clockgen", },
-	{ .compatible = "fsl,ls1088a-clockgen", },
-	{ .compatible = "fsl,ls2080a-clockgen", },
-	{ .compatible = "fsl,lx2160a-clockgen", },
-	{ .compatible = "fsl,p4080-clockgen", },
-	{ .compatible = "fsl,qoriq-clockgen-1.0", },
-	{ .compatible = "fsl,qoriq-clockgen-2.0", },
+	{ .compatible = "fsl,b4420-clockgen", },
+	{ .compatible = "fsl,b4860-clockgen", },
+	{ .compatible = "fsl,t2080-clockgen", },
+	{ .compatible = "fsl,t4240-clockgen", },
 	{}
 };
 
-static int __init qoriq_cpufreq_init(void)
+static int qoriq_cpufreq_probe(struct platform_device *pdev)
 {
 	int ret;
-	struct device_node  *np;
-	const struct of_device_id *match;
-	const struct soc_data *data;
-
-	np = of_find_matching_node(NULL, node_matches);
-	if (!np)
-		return -ENODEV;
-
-	match = of_match_node(node_matches, np);
-	data = match->data;
-
-	of_node_put(np);
+	struct device_node *np;
 
-	if (data && data->flags & SOC_BLACKLIST)
+	np = of_find_matching_node(NULL, qoriq_cpufreq_blacklist);
+	if (np) {
+		dev_info(&pdev->dev, "Disabling due to erratum A-008083");
 		return -ENODEV;
+	}
 
 	ret = cpufreq_register_driver(&qoriq_cpufreq_driver);
-	if (!ret)
-		pr_info("Freescale QorIQ CPU frequency scaling driver\n");
+	if (ret)
+		return ret;
 
-	return ret;
+	dev_info(&pdev->dev, "Freescale QorIQ CPU frequency scaling driver\n");
+	return 0;
 }
-module_init(qoriq_cpufreq_init);
 
-static void __exit qoriq_cpufreq_exit(void)
+static int qoriq_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&qoriq_cpufreq_driver);
+
+	return 0;
 }
-module_exit(qoriq_cpufreq_exit);
 
+static struct platform_driver qoriq_cpufreq_platform_driver = {
+	.driver = {
+		.name = "qoriq-cpufreq",
+	},
+	.probe = qoriq_cpufreq_probe,
+	.remove = qoriq_cpufreq_remove,
+};
+module_platform_driver(qoriq_cpufreq_platform_driver);
+
+MODULE_ALIAS("platform:qoriq-cpufreq");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Tang Yuantian <Yuantian.Tang@freescale.com>");
 MODULE_DESCRIPTION("cpufreq driver for Freescale QorIQ series SoCs");
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index 99a2d72ac02b..51a7e89085c0 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -94,3 +94,16 @@ config ARM_TEGRA_CPUIDLE
 	select ARM_CPU_SUSPEND
 	help
 	  Select this to enable cpuidle for NVIDIA Tegra20/30/114/124 SoCs.
+
+config ARM_QCOM_SPM_CPUIDLE
+	bool "CPU Idle Driver for Qualcomm Subsystem Power Manager (SPM)"
+	depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64
+	select ARM_CPU_SUSPEND
+	select CPU_IDLE_MULTIPLE_DRIVERS
+	select DT_IDLE_STATES
+	select QCOM_SCM
+	help
+	  Select this to enable cpuidle for Qualcomm processors.
+	  The Subsystem Power Manager (SPM) controls low power modes for the
+	  CPU and L2 cores. It interface with various system drivers to put
+	  the cores in low power modes.
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 55a464f6a78b..f07800cbb43f 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_ARM_PSCI_CPUIDLE)		+= cpuidle_psci.o
 cpuidle_psci-y				:= cpuidle-psci.o
 cpuidle_psci-$(CONFIG_PM_GENERIC_DOMAINS_OF) += cpuidle-psci-domain.o
 obj-$(CONFIG_ARM_TEGRA_CPUIDLE)		+= cpuidle-tegra.o
+obj-$(CONFIG_ARM_QCOM_SPM_CPUIDLE)	+= cpuidle-qcom-spm.o
 
 ###############################################################################
 # MIPS drivers
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index bae9140a65a5..d0fb585073c6 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -58,6 +58,10 @@ static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
 	u32 state;
 	int ret;
 
+	ret = cpu_pm_enter();
+	if (ret)
+		return -1;
+
 	/* Do runtime PM to manage a hierarchical CPU toplogy. */
 	pm_runtime_put_sync_suspend(pd_dev);
 
@@ -65,10 +69,12 @@ static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
 	if (!state)
 		state = states[idx];
 
-	ret = psci_enter_state(idx, state);
+	ret = psci_cpu_suspend_enter(state) ? -1 : idx;
 
 	pm_runtime_get_sync(pd_dev);
 
+	cpu_pm_exit();
+
 	/* Clear the domain state to start fresh when back from idle. */
 	psci_set_domain_state(0);
 	return ret;
diff --git a/drivers/soc/qcom/spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c
index 8e10e02c6aa5..adf91a6e4d7d 100644
--- a/drivers/soc/qcom/spm.c
+++ b/drivers/cpuidle/cpuidle-qcom-spm.c
@@ -19,10 +19,11 @@
 #include <linux/cpu_pm.h>
 #include <linux/qcom_scm.h>
 
-#include <asm/cpuidle.h>
 #include <asm/proc-fns.h>
 #include <asm/suspend.h>
 
+#include "dt_idle_states.h"
+
 #define MAX_PMIC_DATA		2
 #define MAX_SEQ_DATA		64
 #define SPM_CTL_INDEX		0x7f
@@ -62,6 +63,7 @@ struct spm_reg_data {
 };
 
 struct spm_driver_data {
+	struct cpuidle_driver cpuidle_driver;
 	void __iomem *reg_base;
 	const struct spm_reg_data *reg_data;
 };
@@ -107,11 +109,6 @@ static const struct spm_reg_data spm_reg_8064_cpu = {
 	.start_index[PM_SLEEP_MODE_SPC] = 2,
 };
 
-static DEFINE_PER_CPU(struct spm_driver_data *, cpu_spm_drv);
-
-typedef int (*idle_fn)(void);
-static DEFINE_PER_CPU(idle_fn*, qcom_idle_ops);
-
 static inline void spm_register_write(struct spm_driver_data *drv,
 					enum spm_reg reg, u32 val)
 {
@@ -172,10 +169,9 @@ static int qcom_pm_collapse(unsigned long int unused)
 	return -1;
 }
 
-static int qcom_cpu_spc(void)
+static int qcom_cpu_spc(struct spm_driver_data *drv)
 {
 	int ret;
-	struct spm_driver_data *drv = __this_cpu_read(cpu_spm_drv);
 
 	spm_set_low_power_mode(drv, PM_SLEEP_MODE_SPC);
 	ret = cpu_suspend(0, qcom_pm_collapse);
@@ -190,94 +186,49 @@ static int qcom_cpu_spc(void)
 	return ret;
 }
 
-static int qcom_idle_enter(unsigned long index)
+static int spm_enter_idle_state(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv, int idx)
 {
-	return __this_cpu_read(qcom_idle_ops)[index]();
+	struct spm_driver_data *data = container_of(drv, struct spm_driver_data,
+						    cpuidle_driver);
+
+	return CPU_PM_CPU_IDLE_ENTER_PARAM(qcom_cpu_spc, idx, data);
 }
 
-static const struct of_device_id qcom_idle_state_match[] __initconst = {
-	{ .compatible = "qcom,idle-state-spc", .data = qcom_cpu_spc },
+static struct cpuidle_driver qcom_spm_idle_driver = {
+	.name = "qcom_spm",
+	.owner = THIS_MODULE,
+	.states[0] = {
+		.enter			= spm_enter_idle_state,
+		.exit_latency		= 1,
+		.target_residency	= 1,
+		.power_usage		= UINT_MAX,
+		.name			= "WFI",
+		.desc			= "ARM WFI",
+	}
+};
+
+static const struct of_device_id qcom_idle_state_match[] = {
+	{ .compatible = "qcom,idle-state-spc", .data = spm_enter_idle_state },
 	{ },
 };
 
-static int __init qcom_cpuidle_init(struct device_node *cpu_node, int cpu)
+static int spm_cpuidle_init(struct cpuidle_driver *drv, int cpu)
 {
-	const struct of_device_id *match_id;
-	struct device_node *state_node;
-	int i;
-	int state_count = 1;
-	idle_fn idle_fns[CPUIDLE_STATE_MAX];
-	idle_fn *fns;
-	cpumask_t mask;
-	bool use_scm_power_down = false;
-
-	if (!qcom_scm_is_available())
-		return -EPROBE_DEFER;
-
-	for (i = 0; ; i++) {
-		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
-		if (!state_node)
-			break;
-
-		if (!of_device_is_available(state_node))
-			continue;
-
-		if (i == CPUIDLE_STATE_MAX) {
-			pr_warn("%s: cpuidle states reached max possible\n",
-					__func__);
-			break;
-		}
-
-		match_id = of_match_node(qcom_idle_state_match, state_node);
-		if (!match_id)
-			return -ENODEV;
-
-		idle_fns[state_count] = match_id->data;
-
-		/* Check if any of the states allow power down */
-		if (match_id->data == qcom_cpu_spc)
-			use_scm_power_down = true;
-
-		state_count++;
-	}
-
-	if (state_count == 1)
-		goto check_spm;
-
-	fns = devm_kcalloc(get_cpu_device(cpu), state_count, sizeof(*fns),
-			GFP_KERNEL);
-	if (!fns)
-		return -ENOMEM;
-
-	for (i = 1; i < state_count; i++)
-		fns[i] = idle_fns[i];
+	int ret;
 
-	if (use_scm_power_down) {
-		/* We have atleast one power down mode */
-		cpumask_clear(&mask);
-		cpumask_set_cpu(cpu, &mask);
-		qcom_scm_set_warm_boot_addr(cpu_resume_arm, &mask);
-	}
+	memcpy(drv, &qcom_spm_idle_driver, sizeof(*drv));
+	drv->cpumask = (struct cpumask *)cpumask_of(cpu);
 
-	per_cpu(qcom_idle_ops, cpu) = fns;
+	/* Parse idle states from device tree */
+	ret = dt_init_idle_driver(drv, qcom_idle_state_match, 1);
+	if (ret <= 0)
+		return ret ? : -ENODEV;
 
-	/*
-	 * SPM probe for the cpu should have happened by now, if the
-	 * SPM device does not exist, return -ENXIO to indicate that the
-	 * cpu does not support idle states.
-	 */
-check_spm:
-	return per_cpu(cpu_spm_drv, cpu) ? 0 : -ENXIO;
+	/* We have atleast one power down mode */
+	return qcom_scm_set_warm_boot_addr(cpu_resume_arm, drv->cpumask);
 }
 
-static const struct cpuidle_ops qcom_cpuidle_ops __initconst = {
-	.suspend = qcom_idle_enter,
-	.init = qcom_cpuidle_init,
-};
-
-CPUIDLE_METHOD_OF_DECLARE(qcom_idle_v1, "qcom,kpss-acc-v1", &qcom_cpuidle_ops);
-CPUIDLE_METHOD_OF_DECLARE(qcom_idle_v2, "qcom,kpss-acc-v2", &qcom_cpuidle_ops);
-
 static struct spm_driver_data *spm_get_drv(struct platform_device *pdev,
 		int *spm_cpu)
 {
@@ -323,11 +274,15 @@ static int spm_dev_probe(struct platform_device *pdev)
 	struct resource *res;
 	const struct of_device_id *match_id;
 	void __iomem *addr;
-	int cpu;
+	int cpu, ret;
+
+	if (!qcom_scm_is_available())
+		return -EPROBE_DEFER;
 
 	drv = spm_get_drv(pdev, &cpu);
 	if (!drv)
 		return -EINVAL;
+	platform_set_drvdata(pdev, drv);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	drv->reg_base = devm_ioremap_resource(&pdev->dev, res);
@@ -340,6 +295,10 @@ static int spm_dev_probe(struct platform_device *pdev)
 
 	drv->reg_data = match_id->data;
 
+	ret = spm_cpuidle_init(&drv->cpuidle_driver, cpu);
+	if (ret)
+		return ret;
+
 	/* Write the SPM sequences first.. */
 	addr = drv->reg_base + drv->reg_data->reg_offset[SPM_REG_SEQ_ENTRY];
 	__iowrite32_copy(addr, drv->reg_data->seq,
@@ -362,13 +321,20 @@ static int spm_dev_probe(struct platform_device *pdev)
 	/* Set up Standby as the default low power mode */
 	spm_set_low_power_mode(drv, PM_SLEEP_MODE_STBY);
 
-	per_cpu(cpu_spm_drv, cpu) = drv;
+	return cpuidle_register(&drv->cpuidle_driver, NULL);
+}
+
+static int spm_dev_remove(struct platform_device *pdev)
+{
+	struct spm_driver_data *drv = platform_get_drvdata(pdev);
 
+	cpuidle_unregister(&drv->cpuidle_driver);
 	return 0;
 }
 
 static struct platform_driver spm_driver = {
 	.probe = spm_dev_probe,
+	.remove = spm_dev_remove,
 	.driver = {
 		.name = "saw",
 		.of_match_table = spm_match_table,
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index cdeedbf02646..091d1caceb41 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -18,14 +18,6 @@
 
 #include "cpuidle.h"
 
-static unsigned int sysfs_switch;
-static int __init cpuidle_sysfs_setup(char *unused)
-{
-	sysfs_switch = 1;
-	return 1;
-}
-__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
-
 static ssize_t show_available_governors(struct device *dev,
 					struct device_attribute *attr,
 					char *buf)
@@ -35,10 +27,10 @@ static ssize_t show_available_governors(struct device *dev,
 
 	mutex_lock(&cpuidle_lock);
 	list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
-		if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) -
-				    CPUIDLE_NAME_LEN - 2))
+		if (i >= (ssize_t) (PAGE_SIZE - (CPUIDLE_NAME_LEN + 2)))
 			goto out;
-		i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
+
+		i += scnprintf(&buf[i], CPUIDLE_NAME_LEN + 1, "%s ", tmp->name);
 	}
 
 out:
@@ -85,58 +77,43 @@ static ssize_t store_current_governor(struct device *dev,
 				      struct device_attribute *attr,
 				      const char *buf, size_t count)
 {
-	char gov_name[CPUIDLE_NAME_LEN];
-	int ret = -EINVAL;
-	size_t len = count;
+	char gov_name[CPUIDLE_NAME_LEN + 1];
+	int ret;
 	struct cpuidle_governor *gov;
 
-	if (!len || len >= sizeof(gov_name))
+	ret = sscanf(buf, "%" __stringify(CPUIDLE_NAME_LEN) "s", gov_name);
+	if (ret != 1)
 		return -EINVAL;
 
-	memcpy(gov_name, buf, len);
-	gov_name[len] = '\0';
-	if (gov_name[len - 1] == '\n')
-		gov_name[--len] = '\0';
-
 	mutex_lock(&cpuidle_lock);
-
+	ret = -EINVAL;
 	list_for_each_entry(gov, &cpuidle_governors, governor_list) {
-		if (strlen(gov->name) == len && !strcmp(gov->name, gov_name)) {
+		if (!strncmp(gov->name, gov_name, CPUIDLE_NAME_LEN)) {
 			ret = cpuidle_switch_governor(gov);
 			break;
 		}
 	}
-
 	mutex_unlock(&cpuidle_lock);
 
-	if (ret)
-		return ret;
-	else
-		return count;
+	return ret ? ret : count;
 }
 
-static DEVICE_ATTR(current_driver, 0444, show_current_driver, NULL);
-static DEVICE_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
-
-static struct attribute *cpuidle_default_attrs[] = {
-	&dev_attr_current_driver.attr,
-	&dev_attr_current_governor_ro.attr,
-	NULL
-};
-
 static DEVICE_ATTR(available_governors, 0444, show_available_governors, NULL);
+static DEVICE_ATTR(current_driver, 0444, show_current_driver, NULL);
 static DEVICE_ATTR(current_governor, 0644, show_current_governor,
-		   store_current_governor);
+				   store_current_governor);
+static DEVICE_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
 
-static struct attribute *cpuidle_switch_attrs[] = {
+static struct attribute *cpuidle_attrs[] = {
 	&dev_attr_available_governors.attr,
 	&dev_attr_current_driver.attr,
 	&dev_attr_current_governor.attr,
+	&dev_attr_current_governor_ro.attr,
 	NULL
 };
 
 static struct attribute_group cpuidle_attr_group = {
-	.attrs = cpuidle_default_attrs,
+	.attrs = cpuidle_attrs,
 	.name = "cpuidle",
 };
 
@@ -146,9 +123,6 @@ static struct attribute_group cpuidle_attr_group = {
  */
 int cpuidle_add_interface(struct device *dev)
 {
-	if (sysfs_switch)
-		cpuidle_attr_group.attrs = cpuidle_switch_attrs;
-
 	return sysfs_create_group(&dev->kobj, &cpuidle_attr_group);
 }
 
@@ -167,11 +141,6 @@ struct cpuidle_attr {
 	ssize_t (*store)(struct cpuidle_device *, const char *, size_t count);
 };
 
-#define define_one_ro(_name, show) \
-	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
-#define define_one_rw(_name, show, store) \
-	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
-
 #define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
 
 struct cpuidle_device_kobj {
@@ -431,12 +400,12 @@ static inline void cpuidle_remove_s2idle_attr_group(struct cpuidle_state_kobj *k
 #define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
 
 static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr,
-				  char * buf)
+				  char *buf)
 {
 	int ret = -EIO;
 	struct cpuidle_state *state = kobj_to_state(kobj);
 	struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj);
-	struct cpuidle_state_attr * cattr = attr_to_stateattr(attr);
+	struct cpuidle_state_attr *cattr = attr_to_stateattr(attr);
 
 	if (cattr->show)
 		ret = cattr->show(state, state_usage, buf);
@@ -515,7 +484,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
 		ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle,
 					   &kdev->kobj, "state%d", i);
 		if (ret) {
-			kfree(kobj);
+			kobject_put(&kobj->kobj);
 			goto error_state;
 		}
 		cpuidle_add_s2idle_attr_group(kobj);
@@ -646,7 +615,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
 	ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle,
 				   &kdev->kobj, "driver");
 	if (ret) {
-		kfree(kdrv);
+		kobject_put(&kdrv->kobj);
 		return ret;
 	}
 
@@ -740,7 +709,7 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
 	error = kobject_init_and_add(&kdev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
 				   "cpuidle");
 	if (error) {
-		kfree(kdev);
+		kobject_put(&kdev->kobj);
 		return error;
 	}
 
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index a5fd8975f3d3..a6abb701bfc6 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -8,7 +8,7 @@
  * This file add support for AES cipher with 128,192,256 bits keysize in
  * CBC and ECB mode.
  *
- * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 
 #include <linux/crypto.h>
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
index 3e4e4bbda34c..b957061424a1 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
@@ -7,7 +7,7 @@
  *
  * Core file which registers crypto algorithms supported by the CryptoEngine.
  *
- * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 #include <linux/clk.h>
 #include <linux/crypto.h>
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 84d52fc3a2da..c89cb2ee2496 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -8,7 +8,7 @@
  * This file add support for AES cipher with 128,192,256 bits keysize in
  * CBC and ECB mode.
  *
- * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 
 #include <linux/crypto.h>
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
index 6b301afffd11..5d9d0fedcb06 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -7,7 +7,7 @@
  *
  * Core file which registers crypto algorithms supported by the SecuritySystem
  *
- * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 #include <linux/clk.h>
 #include <linux/crypto.h>
@@ -537,10 +537,8 @@ static int sun8i_ss_probe(struct platform_device *pdev)
 		return err;
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(ss->dev, "Cannot get SecuritySystem IRQ\n");
+	if (irq < 0)
 		return irq;
-	}
 
 	ss->reset = devm_reset_control_get(&pdev->dev, NULL);
 	if (IS_ERR(ss->reset)) {
diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c
index 9d4ead2f7ebb..411857fad8ba 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-core.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-core.c
@@ -253,10 +253,8 @@ static int meson_crypto_probe(struct platform_device *pdev)
 	mc->irqs = devm_kcalloc(mc->dev, MAXFLOW, sizeof(int), GFP_KERNEL);
 	for (i = 0; i < MAXFLOW; i++) {
 		mc->irqs[i] = platform_get_irq(pdev, i);
-		if (mc->irqs[i] < 0) {
-			dev_err(mc->dev, "Cannot get IRQ for flow %d\n", i);
+		if (mc->irqs[i] < 0)
 			return mc->irqs[i];
-		}
 
 		err = devm_request_irq(&pdev->dev, mc->irqs[i], meson_irq_handler, 0,
 				       "gxl-crypto", mc);
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index e536e2a6bbd8..75ccf41a7cb9 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -31,7 +31,6 @@
 #include <linux/of_device.h>
 #include <linux/delay.h>
 #include <linux/crypto.h>
-#include <linux/cryptohash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/algapi.h>
 #include <crypto/sha.h>
diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c
index fcf1effc7661..62ba0325a618 100644
--- a/drivers/crypto/axis/artpec6_crypto.c
+++ b/drivers/crypto/axis/artpec6_crypto.c
@@ -2239,16 +2239,12 @@ artpec6_crypto_hash_set_key(struct crypto_ahash *tfm,
 	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
 
 	if (keylen > blocksize) {
-		SHASH_DESC_ON_STACK(hdesc, tfm_ctx->child_hash);
-
-		hdesc->tfm = tfm_ctx->child_hash;
-
 		tfm_ctx->hmac_key_length = blocksize;
-		ret = crypto_shash_digest(hdesc, key, keylen,
-					  tfm_ctx->hmac_key);
+
+		ret = crypto_shash_tfm_digest(tfm_ctx->child_hash, key, keylen,
+					      tfm_ctx->hmac_key);
 		if (ret)
 			return ret;
-
 	} else {
 		memcpy(tfm_ctx->hmac_key, key, keylen);
 		tfm_ctx->hmac_key_length = keylen;
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index c8b9408541a9..a353217a0d33 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -308,9 +308,9 @@ static int handle_skcipher_req(struct iproc_reqctx_s *rctx)
 	    container_of(areq, struct skcipher_request, base);
 	struct iproc_ctx_s *ctx = rctx->ctx;
 	struct spu_cipher_parms cipher_parms;
-	int err = 0;
-	unsigned int chunksize = 0;	/* Num bytes of request to submit */
-	int remaining = 0;	/* Bytes of request still to process */
+	int err;
+	unsigned int chunksize;	/* Num bytes of request to submit */
+	int remaining;	/* Bytes of request still to process */
 	int chunk_start;	/* Beginning of data for current SPU msg */
 
 	/* IV or ctr value to use in this SPU msg */
@@ -698,7 +698,7 @@ static int handle_ahash_req(struct iproc_reqctx_s *rctx)
 
 	/* number of bytes still to be hashed in this req */
 	unsigned int nbytes_to_hash = 0;
-	int err = 0;
+	int err;
 	unsigned int chunksize = 0;	/* length of hash carry + new data */
 	/*
 	 * length of new data, not from hash carry, to be submitted in
@@ -1664,7 +1664,7 @@ static void spu_rx_callback(struct mbox_client *cl, void *msg)
 	struct spu_hw *spu = &iproc_priv.spu;
 	struct brcm_message *mssg = msg;
 	struct iproc_reqctx_s *rctx;
-	int err = 0;
+	int err;
 
 	rctx = mssg->ctx;
 	if (unlikely(!rctx)) {
@@ -1967,7 +1967,7 @@ static int ahash_enqueue(struct ahash_request *req)
 	struct iproc_reqctx_s *rctx = ahash_request_ctx(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm);
-	int err = 0;
+	int err;
 	const char *alg_name;
 
 	flow_log("ahash_enqueue() nbytes:%u\n", req->nbytes);
@@ -2299,7 +2299,7 @@ ahash_finup_exit:
 
 static int ahash_digest(struct ahash_request *req)
 {
-	int err = 0;
+	int err;
 
 	flow_log("ahash_digest() nbytes:%u\n", req->nbytes);
 
@@ -4436,7 +4436,7 @@ static int spu_mb_init(struct device *dev)
 	for (i = 0; i < iproc_priv.spu.num_chan; i++) {
 		iproc_priv.mbox[i] = mbox_request_channel(mcl, i);
 		if (IS_ERR(iproc_priv.mbox[i])) {
-			err = (int)PTR_ERR(iproc_priv.mbox[i]);
+			err = PTR_ERR(iproc_priv.mbox[i]);
 			dev_err(dev,
 				"Mbox channel %d request failed with err %d",
 				i, err);
@@ -4717,21 +4717,20 @@ static int spu_dt_read(struct platform_device *pdev)
 
 	matched_spu_type = of_device_get_match_data(dev);
 	if (!matched_spu_type) {
-		dev_err(&pdev->dev, "Failed to match device\n");
+		dev_err(dev, "Failed to match device\n");
 		return -ENODEV;
 	}
 
 	spu->spu_type = matched_spu_type->type;
 	spu->spu_subtype = matched_spu_type->subtype;
 
-	i = 0;
 	for (i = 0; (i < MAX_SPUS) && ((spu_ctrl_regs =
 		platform_get_resource(pdev, IORESOURCE_MEM, i)) != NULL); i++) {
 
 		spu->reg_vbase[i] = devm_ioremap_resource(dev, spu_ctrl_regs);
 		if (IS_ERR(spu->reg_vbase[i])) {
 			err = PTR_ERR(spu->reg_vbase[i]);
-			dev_err(&pdev->dev, "Failed to map registers: %d\n",
+			dev_err(dev, "Failed to map registers: %d\n",
 				err);
 			spu->reg_vbase[i] = NULL;
 			return err;
@@ -4747,7 +4746,7 @@ static int bcm_spu_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct spu_hw *spu = &iproc_priv.spu;
-	int err = 0;
+	int err;
 
 	iproc_priv.pdev  = pdev;
 	platform_set_drvdata(iproc_priv.pdev,
@@ -4757,7 +4756,7 @@ static int bcm_spu_probe(struct platform_device *pdev)
 	if (err < 0)
 		goto failure;
 
-	err = spu_mb_init(&pdev->dev);
+	err = spu_mb_init(dev);
 	if (err < 0)
 		goto failure;
 
@@ -4766,7 +4765,7 @@ static int bcm_spu_probe(struct platform_device *pdev)
 	else if (spu->spu_type == SPU_TYPE_SPU2)
 		iproc_priv.bcm_hdr_len = 0;
 
-	spu_functions_register(&pdev->dev, spu->spu_type, spu->spu_subtype);
+	spu_functions_register(dev, spu->spu_type, spu->spu_subtype);
 
 	spu_counters_init();
 
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index e91be9b8b083..788c6607078b 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -346,7 +346,7 @@ static void nitrox_pf_sw_cleanup(struct nitrox_device *ndev)
 }
 
 /**
- * nitrox_bist_check - Check NITORX BIST registers status
+ * nitrox_bist_check - Check NITROX BIST registers status
  * @ndev: NITROX device
  */
 static int nitrox_bist_check(struct nitrox_device *ndev)
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index e0a8bd15aa74..32268e239bf1 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -10,10 +10,9 @@ config CRYPTO_DEV_CCP_DD
 config CRYPTO_DEV_SP_CCP
 	bool "Cryptographic Coprocessor device"
 	default y
-	depends on CRYPTO_DEV_CCP_DD
+	depends on CRYPTO_DEV_CCP_DD && DMADEVICES
 	select HW_RANDOM
 	select DMA_ENGINE
-	select DMADEVICES
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	help
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 474e6f1a6a84..b0cc2bd73af8 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -272,9 +272,6 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
 	struct crypto_shash *shash = ctx->u.sha.hmac_tfm;
-
-	SHASH_DESC_ON_STACK(sdesc, shash);
-
 	unsigned int block_size = crypto_shash_blocksize(shash);
 	unsigned int digest_size = crypto_shash_digestsize(shash);
 	int i, ret;
@@ -289,10 +286,8 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 
 	if (key_len > block_size) {
 		/* Must hash the input key */
-		sdesc->tfm = shash;
-
-		ret = crypto_shash_digest(sdesc, key, key_len,
-					  ctx->u.sha.key);
+		ret = crypto_shash_tfm_digest(shash, key, key_len,
+					      ctx->u.sha.key);
 		if (ret)
 			return -EINVAL;
 
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 896f190b9a50..a2426334be61 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -20,6 +20,7 @@
 #include <linux/hw_random.h>
 #include <linux/ccp.h>
 #include <linux/firmware.h>
+#include <linux/gfp.h>
 
 #include <asm/smp.h>
 
@@ -44,6 +45,14 @@ MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during
 static bool psp_dead;
 static int psp_timeout;
 
+/* Trusted Memory Region (TMR):
+ *   The TMR is a 1MB area that must be 1MB aligned.  Use the page allocator
+ *   to allocate the memory, which will return aligned memory for the specified
+ *   allocation order.
+ */
+#define SEV_ES_TMR_SIZE		(1024 * 1024)
+static void *sev_es_tmr;
+
 static inline bool sev_version_greater_or_equal(u8 maj, u8 min)
 {
 	struct sev_device *sev = psp_master->sev_data;
@@ -214,6 +223,20 @@ static int __sev_platform_init_locked(int *error)
 	if (sev->state == SEV_STATE_INIT)
 		return 0;
 
+	if (sev_es_tmr) {
+		u64 tmr_pa;
+
+		/*
+		 * Do not include the encryption mask on the physical
+		 * address of the TMR (firmware should clear it anyway).
+		 */
+		tmr_pa = __pa(sev_es_tmr);
+
+		sev->init_cmd_buf.flags |= SEV_INIT_FLAGS_SEV_ES;
+		sev->init_cmd_buf.tmr_address = tmr_pa;
+		sev->init_cmd_buf.tmr_len = SEV_ES_TMR_SIZE;
+	}
+
 	rc = __sev_do_cmd_locked(SEV_CMD_INIT, &sev->init_cmd_buf, error);
 	if (rc)
 		return rc;
@@ -371,8 +394,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
 		goto cmd;
 
 	/* allocate a physically contiguous buffer to store the CSR blob */
-	if (!access_ok(input.address, input.length) ||
-	    input.length > SEV_FW_BLOB_MAX_SIZE) {
+	if (input.length > SEV_FW_BLOB_MAX_SIZE) {
 		ret = -EFAULT;
 		goto e_free;
 	}
@@ -609,12 +631,6 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp)
 	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
 		return -EFAULT;
 
-	/* Check if we have write access to the userspace buffer */
-	if (input.address &&
-	    input.length &&
-	    !access_ok(input.address, input.length))
-		return -EFAULT;
-
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
@@ -730,15 +746,13 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
 		goto cmd;
 
 	/* Allocate a physically contiguous buffer to store the PDH blob. */
-	if ((input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE) ||
-	    !access_ok(input.pdh_cert_address, input.pdh_cert_len)) {
+	if (input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE) {
 		ret = -EFAULT;
 		goto e_free;
 	}
 
 	/* Allocate a physically contiguous buffer to store the cert chain blob. */
-	if ((input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE) ||
-	    !access_ok(input.cert_chain_address, input.cert_chain_len)) {
+	if (input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE) {
 		ret = -EFAULT;
 		goto e_free;
 	}
@@ -1012,6 +1026,7 @@ EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
 void sev_pci_init(void)
 {
 	struct sev_device *sev = psp_master->sev_data;
+	struct page *tmr_page;
 	int error, rc;
 
 	if (!sev)
@@ -1041,6 +1056,16 @@ void sev_pci_init(void)
 	    sev_update_firmware(sev->dev) == 0)
 		sev_get_api_version();
 
+	/* Obtain the TMR memory area for SEV-ES use */
+	tmr_page = alloc_pages(GFP_KERNEL, get_order(SEV_ES_TMR_SIZE));
+	if (tmr_page) {
+		sev_es_tmr = page_address(tmr_page);
+	} else {
+		sev_es_tmr = NULL;
+		dev_warn(sev->dev,
+			 "SEV: TMR allocation failed, SEV-ES support unavailable\n");
+	}
+
 	/* Initialize the platform */
 	rc = sev_platform_init(&error);
 	if (rc && (error == SEV_RET_SECURE_DATA_INVALID)) {
@@ -1075,4 +1100,13 @@ void sev_pci_exit(void)
 		return;
 
 	sev_platform_shutdown(NULL);
+
+	if (sev_es_tmr) {
+		/* The TMR area was encrypted, flush it from the cache */
+		wbinvd_on_all_cpus();
+
+		free_pages((unsigned long)sev_es_tmr,
+			   get_order(SEV_ES_TMR_SIZE));
+		sev_es_tmr = NULL;
+	}
 }
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index a84335328f37..872ea3ff1c6b 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -427,12 +427,9 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
 		int key_len = keylen >> 1;
 		int err;
 
-		SHASH_DESC_ON_STACK(desc, ctx_p->shash_tfm);
-
-		desc->tfm = ctx_p->shash_tfm;
-
-		err = crypto_shash_digest(desc, ctx_p->user.key, key_len,
-					  ctx_p->user.key + key_len);
+		err = crypto_shash_tfm_digest(ctx_p->shash_tfm,
+					      ctx_p->user.key, key_len,
+					      ctx_p->user.key + key_len);
 		if (err) {
 			dev_err(dev, "Failed to hash ESSIV key.\n");
 			return err;
diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c
index c454afce7781..7083767602fc 100644
--- a/drivers/crypto/ccree/cc_debugfs.c
+++ b/drivers/crypto/ccree/cc_debugfs.c
@@ -26,7 +26,7 @@ static struct debugfs_reg32 ver_sig_regs[] = {
 	{ .name = "VERSION" }, /* Must be 1st */
 };
 
-static struct debugfs_reg32 pid_cid_regs[] = {
+static const struct debugfs_reg32 pid_cid_regs[] = {
 	CC_DEBUG_REG(PERIPHERAL_ID_0),
 	CC_DEBUG_REG(PERIPHERAL_ID_1),
 	CC_DEBUG_REG(PERIPHERAL_ID_2),
@@ -38,7 +38,7 @@ static struct debugfs_reg32 pid_cid_regs[] = {
 	CC_DEBUG_REG(COMPONENT_ID_3),
 };
 
-static struct debugfs_reg32 debug_regs[] = {
+static const struct debugfs_reg32 debug_regs[] = {
 	CC_DEBUG_REG(HOST_IRR),
 	CC_DEBUG_REG(HOST_POWER_DOWN_EN),
 	CC_DEBUG_REG(AXIM_MON_ERR),
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 2080b2ec6639..f26a7a15551a 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -44,7 +44,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/crypto.h>
-#include <linux/cryptohash.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/highmem.h>
@@ -1779,7 +1778,7 @@ static int chcr_ahash_final(struct ahash_request *req)
 	struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm));
 	struct chcr_context *ctx = h_ctx(rtfm);
 	u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
-	int error = -EINVAL;
+	int error;
 	unsigned int cpu;
 
 	cpu = get_cpu();
diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c
index 3a10f51ad6fd..967babd67a51 100644
--- a/drivers/crypto/chelsio/chcr_ipsec.c
+++ b/drivers/crypto/chelsio/chcr_ipsec.c
@@ -40,7 +40,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/crypto.h>
-#include <linux/cryptohash.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/highmem.h>
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index f09c6cf7823e..9c3b3ca815e6 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -29,6 +29,7 @@ config CRYPTO_DEV_HISI_SEC2
 	depends on PCI && PCI_MSI
 	depends on UACCE || UACCE=n
 	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	depends on ACPI
 	help
 	  Support for HiSilicon SEC Engine of version 2 in crypto subsystem.
 	  It provides AES, SM4, and 3DES algorithms with ECB
@@ -42,6 +43,7 @@ config CRYPTO_DEV_HISI_QM
 	depends on ARM64 || COMPILE_TEST
 	depends on PCI && PCI_MSI
 	depends on UACCE || UACCE=n
+	depends on ACPI
 	help
 	  HiSilicon accelerator engines use a common queue management
 	  interface. Specific engine driver may use this module.
@@ -52,6 +54,7 @@ config CRYPTO_DEV_HISI_ZIP
 	depends on ARM64 || (COMPILE_TEST && 64BIT)
 	depends on !CPU_BIG_ENDIAN || COMPILE_TEST
 	depends on UACCE || UACCE=n
+	depends on ACPI
 	select CRYPTO_DEV_HISI_QM
 	help
 	  Support for HiSilicon ZIP Driver
@@ -61,6 +64,7 @@ config CRYPTO_DEV_HISI_HPRE
 	depends on PCI && PCI_MSI
 	depends on UACCE || UACCE=n
 	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	depends on ACPI
 	select CRYPTO_DEV_HISI_QM
 	select CRYPTO_DH
 	select CRYPTO_RSA
diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h
index 03d512ec6336..ed730d173e95 100644
--- a/drivers/crypto/hisilicon/hpre/hpre.h
+++ b/drivers/crypto/hisilicon/hpre/hpre.h
@@ -25,6 +25,17 @@ enum hpre_ctrl_dbgfs_file {
 	HPRE_DEBUG_FILE_NUM,
 };
 
+enum hpre_dfx_dbgfs_file {
+	HPRE_SEND_CNT,
+	HPRE_RECV_CNT,
+	HPRE_SEND_FAIL_CNT,
+	HPRE_SEND_BUSY_CNT,
+	HPRE_OVER_THRHLD_CNT,
+	HPRE_OVERTIME_THRHLD,
+	HPRE_INVALID_REQ_CNT,
+	HPRE_DFX_FILE_NUM
+};
+
 #define HPRE_DEBUGFS_FILE_NUM    (HPRE_DEBUG_FILE_NUM + HPRE_CLUSTERS_NUM - 1)
 
 struct hpre_debugfs_file {
@@ -34,6 +45,11 @@ struct hpre_debugfs_file {
 	struct hpre_debug *debug;
 };
 
+struct hpre_dfx {
+	atomic64_t value;
+	enum hpre_dfx_dbgfs_file type;
+};
+
 /*
  * One HPRE controller has one PF and multiple VFs, some global configurations
  * which PF has need this structure.
@@ -41,13 +57,13 @@ struct hpre_debugfs_file {
  */
 struct hpre_debug {
 	struct dentry *debug_root;
+	struct hpre_dfx dfx[HPRE_DFX_FILE_NUM];
 	struct hpre_debugfs_file files[HPRE_DEBUGFS_FILE_NUM];
 };
 
 struct hpre {
 	struct hisi_qm qm;
 	struct hpre_debug debug;
-	u32 num_vfs;
 	unsigned long status;
 };
 
diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 65425250b2e9..7b5cb27d473d 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -10,6 +10,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/fips.h>
 #include <linux/module.h>
+#include <linux/time.h>
 #include "hpre.h"
 
 struct hpre_ctx;
@@ -32,6 +33,9 @@ struct hpre_ctx;
 #define HPRE_SQE_DONE_SHIFT	30
 #define HPRE_DH_MAX_P_SZ	512
 
+#define HPRE_DFX_SEC_TO_US	1000000
+#define HPRE_DFX_US_TO_NS	1000
+
 typedef void (*hpre_cb)(struct hpre_ctx *ctx, void *sqe);
 
 struct hpre_rsa_ctx {
@@ -68,6 +72,7 @@ struct hpre_dh_ctx {
 struct hpre_ctx {
 	struct hisi_qp *qp;
 	struct hpre_asym_request **req_list;
+	struct hpre *hpre;
 	spinlock_t req_lock;
 	unsigned int key_sz;
 	bool crt_g2_mode;
@@ -90,6 +95,7 @@ struct hpre_asym_request {
 	int err;
 	int req_id;
 	hpre_cb cb;
+	struct timespec64 req_time;
 };
 
 static DEFINE_MUTEX(hpre_alg_lock);
@@ -119,6 +125,7 @@ static void hpre_free_req_id(struct hpre_ctx *ctx, int req_id)
 static int hpre_add_req_to_ctx(struct hpre_asym_request *hpre_req)
 {
 	struct hpre_ctx *ctx;
+	struct hpre_dfx *dfx;
 	int id;
 
 	ctx = hpre_req->ctx;
@@ -129,6 +136,10 @@ static int hpre_add_req_to_ctx(struct hpre_asym_request *hpre_req)
 	ctx->req_list[id] = hpre_req;
 	hpre_req->req_id = id;
 
+	dfx = ctx->hpre->debug.dfx;
+	if (atomic64_read(&dfx[HPRE_OVERTIME_THRHLD].value))
+		ktime_get_ts64(&hpre_req->req_time);
+
 	return id;
 }
 
@@ -309,12 +320,16 @@ static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe,
 
 static int hpre_ctx_set(struct hpre_ctx *ctx, struct hisi_qp *qp, int qlen)
 {
+	struct hpre *hpre;
+
 	if (!ctx || !qp || qlen < 0)
 		return -EINVAL;
 
 	spin_lock_init(&ctx->req_lock);
 	ctx->qp = qp;
 
+	hpre = container_of(ctx->qp->qm, struct hpre, qm);
+	ctx->hpre = hpre;
 	ctx->req_list = kcalloc(qlen, sizeof(void *), GFP_KERNEL);
 	if (!ctx->req_list)
 		return -ENOMEM;
@@ -337,38 +352,80 @@ static void hpre_ctx_clear(struct hpre_ctx *ctx, bool is_clear_all)
 	ctx->key_sz = 0;
 }
 
+static bool hpre_is_bd_timeout(struct hpre_asym_request *req,
+			       u64 overtime_thrhld)
+{
+	struct timespec64 reply_time;
+	u64 time_use_us;
+
+	ktime_get_ts64(&reply_time);
+	time_use_us = (reply_time.tv_sec - req->req_time.tv_sec) *
+		HPRE_DFX_SEC_TO_US +
+		(reply_time.tv_nsec - req->req_time.tv_nsec) /
+		HPRE_DFX_US_TO_NS;
+
+	if (time_use_us <= overtime_thrhld)
+		return false;
+
+	return true;
+}
+
 static void hpre_dh_cb(struct hpre_ctx *ctx, void *resp)
 {
+	struct hpre_dfx *dfx = ctx->hpre->debug.dfx;
 	struct hpre_asym_request *req;
 	struct kpp_request *areq;
+	u64 overtime_thrhld;
 	int ret;
 
 	ret = hpre_alg_res_post_hf(ctx, resp, (void **)&req);
 	areq = req->areq.dh;
 	areq->dst_len = ctx->key_sz;
+
+	overtime_thrhld = atomic64_read(&dfx[HPRE_OVERTIME_THRHLD].value);
+	if (overtime_thrhld && hpre_is_bd_timeout(req, overtime_thrhld))
+		atomic64_inc(&dfx[HPRE_OVER_THRHLD_CNT].value);
+
 	hpre_hw_data_clr_all(ctx, req, areq->dst, areq->src);
 	kpp_request_complete(areq, ret);
+	atomic64_inc(&dfx[HPRE_RECV_CNT].value);
 }
 
 static void hpre_rsa_cb(struct hpre_ctx *ctx, void *resp)
 {
+	struct hpre_dfx *dfx = ctx->hpre->debug.dfx;
 	struct hpre_asym_request *req;
 	struct akcipher_request *areq;
+	u64 overtime_thrhld;
 	int ret;
 
 	ret = hpre_alg_res_post_hf(ctx, resp, (void **)&req);
+
+	overtime_thrhld = atomic64_read(&dfx[HPRE_OVERTIME_THRHLD].value);
+	if (overtime_thrhld && hpre_is_bd_timeout(req, overtime_thrhld))
+		atomic64_inc(&dfx[HPRE_OVER_THRHLD_CNT].value);
+
 	areq = req->areq.rsa;
 	areq->dst_len = ctx->key_sz;
 	hpre_hw_data_clr_all(ctx, req, areq->dst, areq->src);
 	akcipher_request_complete(areq, ret);
+	atomic64_inc(&dfx[HPRE_RECV_CNT].value);
 }
 
 static void hpre_alg_cb(struct hisi_qp *qp, void *resp)
 {
 	struct hpre_ctx *ctx = qp->qp_ctx;
+	struct hpre_dfx *dfx = ctx->hpre->debug.dfx;
 	struct hpre_sqe *sqe = resp;
+	struct hpre_asym_request *req = ctx->req_list[le16_to_cpu(sqe->tag)];
 
-	ctx->req_list[le16_to_cpu(sqe->tag)]->cb(ctx, resp);
+
+	if (unlikely(!req)) {
+		atomic64_inc(&dfx[HPRE_INVALID_REQ_CNT].value);
+		return;
+	}
+
+	req->cb(ctx, resp);
 }
 
 static int hpre_ctx_init(struct hpre_ctx *ctx)
@@ -436,6 +493,29 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa)
 	return 0;
 }
 
+static int hpre_send(struct hpre_ctx *ctx, struct hpre_sqe *msg)
+{
+	struct hpre_dfx *dfx = ctx->hpre->debug.dfx;
+	int ctr = 0;
+	int ret;
+
+	do {
+		atomic64_inc(&dfx[HPRE_SEND_CNT].value);
+		ret = hisi_qp_send(ctx->qp, msg);
+		if (ret != -EBUSY)
+			break;
+		atomic64_inc(&dfx[HPRE_SEND_BUSY_CNT].value);
+	} while (ctr++ < HPRE_TRY_SEND_TIMES);
+
+	if (likely(!ret))
+		return ret;
+
+	if (ret != -EBUSY)
+		atomic64_inc(&dfx[HPRE_SEND_FAIL_CNT].value);
+
+	return ret;
+}
+
 #ifdef CONFIG_CRYPTO_DH
 static int hpre_dh_compute_value(struct kpp_request *req)
 {
@@ -444,7 +524,6 @@ static int hpre_dh_compute_value(struct kpp_request *req)
 	void *tmp = kpp_request_ctx(req);
 	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
 	struct hpre_sqe *msg = &hpre_req->req;
-	int ctr = 0;
 	int ret;
 
 	ret = hpre_msg_request_set(ctx, req, false);
@@ -465,11 +544,9 @@ static int hpre_dh_compute_value(struct kpp_request *req)
 		msg->dw0 = cpu_to_le32(le32_to_cpu(msg->dw0) | HPRE_ALG_DH_G2);
 	else
 		msg->dw0 = cpu_to_le32(le32_to_cpu(msg->dw0) | HPRE_ALG_DH);
-	do {
-		ret = hisi_qp_send(ctx->qp, msg);
-	} while (ret == -EBUSY && ctr++ < HPRE_TRY_SEND_TIMES);
 
 	/* success */
+	ret = hpre_send(ctx, msg);
 	if (likely(!ret))
 		return -EINPROGRESS;
 
@@ -647,7 +724,6 @@ static int hpre_rsa_enc(struct akcipher_request *req)
 	void *tmp = akcipher_request_ctx(req);
 	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
 	struct hpre_sqe *msg = &hpre_req->req;
-	int ctr = 0;
 	int ret;
 
 	/* For 512 and 1536 bits key size, use soft tfm instead */
@@ -677,11 +753,8 @@ static int hpre_rsa_enc(struct akcipher_request *req)
 	if (unlikely(ret))
 		goto clear_all;
 
-	do {
-		ret = hisi_qp_send(ctx->qp, msg);
-	} while (ret == -EBUSY && ctr++ < HPRE_TRY_SEND_TIMES);
-
 	/* success */
+	ret = hpre_send(ctx, msg);
 	if (likely(!ret))
 		return -EINPROGRESS;
 
@@ -699,7 +772,6 @@ static int hpre_rsa_dec(struct akcipher_request *req)
 	void *tmp = akcipher_request_ctx(req);
 	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
 	struct hpre_sqe *msg = &hpre_req->req;
-	int ctr = 0;
 	int ret;
 
 	/* For 512 and 1536 bits key size, use soft tfm instead */
@@ -736,11 +808,8 @@ static int hpre_rsa_dec(struct akcipher_request *req)
 	if (unlikely(ret))
 		goto clear_all;
 
-	do {
-		ret = hisi_qp_send(ctx->qp, msg);
-	} while (ret == -EBUSY && ctr++ < HPRE_TRY_SEND_TIMES);
-
 	/* success */
+	ret = hpre_send(ctx, msg);
 	if (likely(!ret))
 		return -EINPROGRESS;
 
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 88be53bf4a38..a3ee127a70e3 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -59,10 +59,6 @@
 #define HPRE_HAC_ECC2_CNT		0x301a08
 #define HPRE_HAC_INT_STATUS		0x301800
 #define HPRE_HAC_SOURCE_INT		0x301600
-#define MASTER_GLOBAL_CTRL_SHUTDOWN	1
-#define MASTER_TRANS_RETURN_RW		3
-#define HPRE_MASTER_TRANS_RETURN	0x300150
-#define HPRE_MASTER_GLOBAL_CTRL		0x300000
 #define HPRE_CLSTR_ADDR_INTRVL		0x1000
 #define HPRE_CLUSTER_INQURY		0x100
 #define HPRE_CLSTR_ADDR_INQRY_RSLT	0x104
@@ -80,7 +76,16 @@
 #define HPRE_BD_USR_MASK		0x3
 #define HPRE_CLUSTER_CORE_MASK		0xf
 
+#define HPRE_AM_OOO_SHUTDOWN_ENB	0x301044
+#define HPRE_AM_OOO_SHUTDOWN_ENABLE	BIT(0)
+#define HPRE_WR_MSI_PORT		BIT(2)
+
+#define HPRE_CORE_ECC_2BIT_ERR		BIT(1)
+#define HPRE_OOO_ECC_2BIT_ERR		BIT(5)
+
 #define HPRE_VIA_MSI_DSM		1
+#define HPRE_SQE_MASK_OFFSET		8
+#define HPRE_SQE_MASK_LEN		24
 
 static struct hisi_qm_list hpre_devices;
 static const char hpre_name[] = "hisi_hpre";
@@ -131,7 +136,7 @@ static const u64 hpre_cluster_offsets[] = {
 		HPRE_CLSTR_BASE + HPRE_CLUSTER3 * HPRE_CLSTR_ADDR_INTRVL,
 };
 
-static struct debugfs_reg32 hpre_cluster_dfx_regs[] = {
+static const struct debugfs_reg32 hpre_cluster_dfx_regs[] = {
 	{"CORES_EN_STATUS          ",  HPRE_CORE_EN_OFFSET},
 	{"CORES_INI_CFG              ",  HPRE_CORE_INI_CFG_OFFSET},
 	{"CORES_INI_STATUS         ",  HPRE_CORE_INI_STATUS_OFFSET},
@@ -139,7 +144,7 @@ static struct debugfs_reg32 hpre_cluster_dfx_regs[] = {
 	{"CORES_IS_SCHD               ",  HPRE_CORE_IS_SCHD_OFFSET},
 };
 
-static struct debugfs_reg32 hpre_com_dfx_regs[] = {
+static const struct debugfs_reg32 hpre_com_dfx_regs[] = {
 	{"READ_CLR_EN          ",  HPRE_CTRL_CNT_CLR_CE},
 	{"AXQOS                   ",  HPRE_VFG_AXQOS},
 	{"AWUSR_CFG              ",  HPRE_AWUSR_FP_CFG},
@@ -156,44 +161,38 @@ static struct debugfs_reg32 hpre_com_dfx_regs[] = {
 	{"INT_STATUS               ",  HPRE_INT_STATUS},
 };
 
-static int hpre_pf_q_num_set(const char *val, const struct kernel_param *kp)
-{
-	struct pci_dev *pdev;
-	u32 n, q_num;
-	u8 rev_id;
-	int ret;
-
-	if (!val)
-		return -EINVAL;
-
-	pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_DEVICE_ID, NULL);
-	if (!pdev) {
-		q_num = HPRE_QUEUE_NUM_V2;
-		pr_info("No device found currently, suppose queue number is %d\n",
-			q_num);
-	} else {
-		rev_id = pdev->revision;
-		if (rev_id != QM_HW_V2)
-			return -EINVAL;
-
-		q_num = HPRE_QUEUE_NUM_V2;
-	}
-
-	ret = kstrtou32(val, 10, &n);
-	if (ret != 0 || n == 0 || n > q_num)
-		return -EINVAL;
+static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = {
+	"send_cnt",
+	"recv_cnt",
+	"send_fail_cnt",
+	"send_busy_cnt",
+	"over_thrhld_cnt",
+	"overtime_thrhld",
+	"invalid_req_cnt"
+};
 
-	return param_set_int(val, kp);
+static int pf_q_num_set(const char *val, const struct kernel_param *kp)
+{
+	return q_num_set(val, kp, HPRE_PCI_DEVICE_ID);
 }
 
 static const struct kernel_param_ops hpre_pf_q_num_ops = {
-	.set = hpre_pf_q_num_set,
+	.set = pf_q_num_set,
 	.get = param_get_int,
 };
 
-static u32 hpre_pf_q_num = HPRE_PF_DEF_Q_NUM;
-module_param_cb(hpre_pf_q_num, &hpre_pf_q_num_ops, &hpre_pf_q_num, 0444);
-MODULE_PARM_DESC(hpre_pf_q_num, "Number of queues in PF of CS(1-1024)");
+static u32 pf_q_num = HPRE_PF_DEF_Q_NUM;
+module_param_cb(pf_q_num, &hpre_pf_q_num_ops, &pf_q_num, 0444);
+MODULE_PARM_DESC(pf_q_num, "Number of queues in PF of CS(1-1024)");
+
+static const struct kernel_param_ops vfs_num_ops = {
+	.set = vfs_num_set,
+	.get = param_get_int,
+};
+
+static u32 vfs_num;
+module_param_cb(vfs_num, &vfs_num_ops, &vfs_num, 0444);
+MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)");
 
 struct hisi_qp *hpre_create_qp(void)
 {
@@ -232,9 +231,8 @@ static int hpre_cfg_by_dsm(struct hisi_qm *qm)
 	return 0;
 }
 
-static int hpre_set_user_domain_and_cache(struct hpre *hpre)
+static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
 {
-	struct hisi_qm *qm = &hpre->qm;
 	struct device *dev = &qm->pdev->dev;
 	unsigned long offset;
 	int ret, i;
@@ -324,17 +322,34 @@ static void hpre_cnt_regs_clear(struct hisi_qm *qm)
 
 static void hpre_hw_error_disable(struct hisi_qm *qm)
 {
+	u32 val;
+
 	/* disable hpre hw error interrupts */
 	writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_INT_MASK);
+
+	/* disable HPRE block master OOO when m-bit error occur */
+	val = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+	val &= ~HPRE_AM_OOO_SHUTDOWN_ENABLE;
+	writel(val, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
 }
 
 static void hpre_hw_error_enable(struct hisi_qm *qm)
 {
+	u32 val;
+
+	/* clear HPRE hw error source if having */
+	writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_HAC_SOURCE_INT);
+
 	/* enable hpre hw error interrupts */
 	writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK);
 	writel(HPRE_HAC_RAS_CE_ENABLE, qm->io_base + HPRE_RAS_CE_ENB);
 	writel(HPRE_HAC_RAS_NFE_ENABLE, qm->io_base + HPRE_RAS_NFE_ENB);
 	writel(HPRE_HAC_RAS_FE_ENABLE, qm->io_base + HPRE_RAS_FE_ENB);
+
+	/* enable HPRE block master OOO when m-bit error occur */
+	val = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+	val |= HPRE_AM_OOO_SHUTDOWN_ENABLE;
+	writel(val, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
 }
 
 static inline struct hisi_qm *hpre_file_to_qm(struct hpre_debugfs_file *file)
@@ -354,9 +369,7 @@ static u32 hpre_current_qm_read(struct hpre_debugfs_file *file)
 static int hpre_current_qm_write(struct hpre_debugfs_file *file, u32 val)
 {
 	struct hisi_qm *qm = hpre_file_to_qm(file);
-	struct hpre_debug *debug = file->debug;
-	struct hpre *hpre = container_of(debug, struct hpre, debug);
-	u32 num_vfs = hpre->num_vfs;
+	u32 num_vfs = qm->vfs_num;
 	u32 vfq_num, tmp;
 
 
@@ -523,6 +536,33 @@ static const struct file_operations hpre_ctrl_debug_fops = {
 	.write = hpre_ctrl_debug_write,
 };
 
+static int hpre_debugfs_atomic64_get(void *data, u64 *val)
+{
+	struct hpre_dfx *dfx_item = data;
+
+	*val = atomic64_read(&dfx_item->value);
+
+	return 0;
+}
+
+static int hpre_debugfs_atomic64_set(void *data, u64 val)
+{
+	struct hpre_dfx *dfx_item = data;
+	struct hpre_dfx *hpre_dfx = dfx_item - HPRE_OVERTIME_THRHLD;
+
+	if (val)
+		return -EINVAL;
+
+	if (dfx_item->type == HPRE_OVERTIME_THRHLD)
+		atomic64_set(&hpre_dfx[HPRE_OVER_THRHLD_CNT].value, 0);
+	atomic64_set(&dfx_item->value, val);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get,
+			 hpre_debugfs_atomic64_set, "%llu\n");
+
 static int hpre_create_debugfs_file(struct hpre_debug *dbg, struct dentry *dir,
 				    enum hpre_ctrl_dbgfs_file type, int indx)
 {
@@ -620,6 +660,22 @@ static int hpre_ctrl_debug_init(struct hpre_debug *debug)
 	return hpre_cluster_debugfs_init(debug);
 }
 
+static void hpre_dfx_debug_init(struct hpre_debug *debug)
+{
+	struct hpre *hpre = container_of(debug, struct hpre, debug);
+	struct hpre_dfx *dfx = hpre->debug.dfx;
+	struct hisi_qm *qm = &hpre->qm;
+	struct dentry *parent;
+	int i;
+
+	parent = debugfs_create_dir("hpre_dfx", qm->debug.debug_root);
+	for (i = 0; i < HPRE_DFX_FILE_NUM; i++) {
+		dfx[i].type = i;
+		debugfs_create_file(hpre_dfx_files[i], 0644, parent, &dfx[i],
+				    &hpre_atomic64_ops);
+	}
+}
+
 static int hpre_debugfs_init(struct hpre *hpre)
 {
 	struct hisi_qm *qm = &hpre->qm;
@@ -629,6 +685,8 @@ static int hpre_debugfs_init(struct hpre *hpre)
 
 	dir = debugfs_create_dir(dev_name(dev), hpre_debugfs_root);
 	qm->debug.debug_root = dir;
+	qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET;
+	qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN;
 
 	ret = hisi_qm_debug_init(qm);
 	if (ret)
@@ -640,6 +698,9 @@ static int hpre_debugfs_init(struct hpre *hpre)
 		if (ret)
 			goto failed_to_create;
 	}
+
+	hpre_dfx_debug_init(&hpre->debug);
+
 	return 0;
 
 failed_to_create:
@@ -654,32 +715,27 @@ static void hpre_debugfs_exit(struct hpre *hpre)
 	debugfs_remove_recursive(qm->debug.debug_root);
 }
 
-static int hpre_qm_pre_init(struct hisi_qm *qm, struct pci_dev *pdev)
+static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 {
-	enum qm_hw_ver rev_id;
-
-	rev_id = hisi_qm_get_hw_version(pdev);
-	if (rev_id < 0)
-		return -ENODEV;
-
-	if (rev_id == QM_HW_V1) {
+	if (pdev->revision == QM_HW_V1) {
 		pci_warn(pdev, "HPRE version 1 is not supported!\n");
 		return -EINVAL;
 	}
 
 	qm->pdev = pdev;
-	qm->ver = rev_id;
+	qm->ver = pdev->revision;
 	qm->sqe_size = HPRE_SQE_SIZE;
 	qm->dev_name = hpre_name;
+
 	qm->fun_type = (pdev->device == HPRE_PCI_DEVICE_ID) ?
-		       QM_HW_PF : QM_HW_VF;
-	if (pdev->is_physfn) {
+			QM_HW_PF : QM_HW_VF;
+	if (qm->fun_type == QM_HW_PF) {
 		qm->qp_base = HPRE_PF_DEF_Q_BASE;
-		qm->qp_num = hpre_pf_q_num;
+		qm->qp_num = pf_q_num;
+		qm->qm_list = &hpre_devices;
 	}
-	qm->use_dma_api = true;
 
-	return 0;
+	return hisi_qm_init(qm);
 }
 
 static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts)
@@ -693,8 +749,6 @@ static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts)
 				 err->msg, err->int_msk);
 		err++;
 	}
-
-	writel(err_sts, qm->io_base + HPRE_HAC_SOURCE_INT);
 }
 
 static u32 hpre_get_hw_err_status(struct hisi_qm *qm)
@@ -702,16 +756,38 @@ static u32 hpre_get_hw_err_status(struct hisi_qm *qm)
 	return readl(qm->io_base + HPRE_HAC_INT_STATUS);
 }
 
+static void hpre_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts)
+{
+	writel(err_sts, qm->io_base + HPRE_HAC_SOURCE_INT);
+}
+
+static void hpre_open_axi_master_ooo(struct hisi_qm *qm)
+{
+	u32 value;
+
+	value = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+	writel(value & ~HPRE_AM_OOO_SHUTDOWN_ENABLE,
+	       HPRE_ADDR(qm, HPRE_AM_OOO_SHUTDOWN_ENB));
+	writel(value | HPRE_AM_OOO_SHUTDOWN_ENABLE,
+	       HPRE_ADDR(qm, HPRE_AM_OOO_SHUTDOWN_ENB));
+}
+
 static const struct hisi_qm_err_ini hpre_err_ini = {
+	.hw_init		= hpre_set_user_domain_and_cache,
 	.hw_err_enable		= hpre_hw_error_enable,
 	.hw_err_disable		= hpre_hw_error_disable,
 	.get_dev_hw_err_status	= hpre_get_hw_err_status,
+	.clear_dev_hw_err_status = hpre_clear_hw_err_status,
 	.log_dev_hw_err		= hpre_log_hw_error,
+	.open_axi_master_ooo	= hpre_open_axi_master_ooo,
 	.err_info		= {
 		.ce			= QM_BASE_CE,
 		.nfe			= QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT,
 		.fe			= 0,
-		.msi			= QM_DB_RANDOM_INVALID,
+		.ecc_2bits_mask		= HPRE_CORE_ECC_2BIT_ERR |
+					  HPRE_OOO_ECC_2BIT_ERR,
+		.msi_wr_port		= HPRE_WR_MSI_PORT,
+		.acpi_rst		= "HRST",
 	}
 };
 
@@ -722,7 +798,7 @@ static int hpre_pf_probe_init(struct hpre *hpre)
 
 	qm->ctrl_qp_num = HPRE_QUEUE_NUM_V2;
 
-	ret = hpre_set_user_domain_and_cache(hpre);
+	ret = hpre_set_user_domain_and_cache(qm);
 	if (ret)
 		return ret;
 
@@ -732,6 +808,20 @@ static int hpre_pf_probe_init(struct hpre *hpre)
 	return 0;
 }
 
+static int hpre_probe_init(struct hpre *hpre)
+{
+	struct hisi_qm *qm = &hpre->qm;
+	int ret;
+
+	if (qm->fun_type == QM_HW_PF) {
+		ret = hpre_pf_probe_init(hpre);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct hisi_qm *qm;
@@ -742,26 +832,17 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!hpre)
 		return -ENOMEM;
 
-	pci_set_drvdata(pdev, hpre);
-
 	qm = &hpre->qm;
-	ret = hpre_qm_pre_init(qm, pdev);
-	if (ret)
-		return ret;
-
-	ret = hisi_qm_init(qm);
-	if (ret)
+	ret = hpre_qm_init(qm, pdev);
+	if (ret) {
+		pci_err(pdev, "Failed to init HPRE QM (%d)!\n", ret);
 		return ret;
+	}
 
-	if (pdev->is_physfn) {
-		ret = hpre_pf_probe_init(hpre);
-		if (ret)
-			goto err_with_qm_init;
-	} else if (qm->fun_type == QM_HW_VF && qm->ver == QM_HW_V2) {
-		/* v2 starts to support get vft by mailbox */
-		ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
-		if (ret)
-			goto err_with_qm_init;
+	ret = hpre_probe_init(hpre);
+	if (ret) {
+		pci_err(pdev, "Failed to probe (%d)!\n", ret);
+		goto err_with_qm_init;
 	}
 
 	ret = hisi_qm_start(qm);
@@ -779,8 +860,18 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		pci_err(pdev, "fail to register algs to crypto!\n");
 		goto err_with_qm_start;
 	}
+
+	if (qm->fun_type == QM_HW_PF && vfs_num) {
+		ret = hisi_qm_sriov_enable(pdev, vfs_num);
+		if (ret < 0)
+			goto err_with_crypto_register;
+	}
+
 	return 0;
 
+err_with_crypto_register:
+	hpre_algs_unregister();
+
 err_with_qm_start:
 	hisi_qm_del_from_list(qm, &hpre_devices);
 	hisi_qm_stop(qm);
@@ -794,107 +885,6 @@ err_with_qm_init:
 	return ret;
 }
 
-static int hpre_vf_q_assign(struct hpre *hpre, int num_vfs)
-{
-	struct hisi_qm *qm = &hpre->qm;
-	u32 qp_num = qm->qp_num;
-	int q_num, remain_q_num, i;
-	u32 q_base = qp_num;
-	int ret;
-
-	if (!num_vfs)
-		return -EINVAL;
-
-	remain_q_num = qm->ctrl_qp_num - qp_num;
-
-	/* If remaining queues are not enough, return error. */
-	if (remain_q_num < num_vfs)
-		return -EINVAL;
-
-	q_num = remain_q_num / num_vfs;
-	for (i = 1; i <= num_vfs; i++) {
-		if (i == num_vfs)
-			q_num += remain_q_num % num_vfs;
-		ret = hisi_qm_set_vft(qm, i, q_base, (u32)q_num);
-		if (ret)
-			return ret;
-		q_base += q_num;
-	}
-
-	return 0;
-}
-
-static int hpre_clear_vft_config(struct hpre *hpre)
-{
-	struct hisi_qm *qm = &hpre->qm;
-	u32 num_vfs = hpre->num_vfs;
-	int ret;
-	u32 i;
-
-	for (i = 1; i <= num_vfs; i++) {
-		ret = hisi_qm_set_vft(qm, i, 0, 0);
-		if (ret)
-			return ret;
-	}
-	hpre->num_vfs = 0;
-
-	return 0;
-}
-
-static int hpre_sriov_enable(struct pci_dev *pdev, int max_vfs)
-{
-	struct hpre *hpre = pci_get_drvdata(pdev);
-	int pre_existing_vfs, num_vfs, ret;
-
-	pre_existing_vfs = pci_num_vf(pdev);
-	if (pre_existing_vfs) {
-		pci_err(pdev,
-			"Can't enable VF. Please disable pre-enabled VFs!\n");
-		return 0;
-	}
-
-	num_vfs = min_t(int, max_vfs, HPRE_VF_NUM);
-	ret = hpre_vf_q_assign(hpre, num_vfs);
-	if (ret) {
-		pci_err(pdev, "Can't assign queues for VF!\n");
-		return ret;
-	}
-
-	hpre->num_vfs = num_vfs;
-
-	ret = pci_enable_sriov(pdev, num_vfs);
-	if (ret) {
-		pci_err(pdev, "Can't enable VF!\n");
-		hpre_clear_vft_config(hpre);
-		return ret;
-	}
-
-	return num_vfs;
-}
-
-static int hpre_sriov_disable(struct pci_dev *pdev)
-{
-	struct hpre *hpre = pci_get_drvdata(pdev);
-
-	if (pci_vfs_assigned(pdev)) {
-		pci_err(pdev, "Failed to disable VFs while VFs are assigned!\n");
-		return -EPERM;
-	}
-
-	/* remove in hpre_pci_driver will be called to free VF resources */
-	pci_disable_sriov(pdev);
-
-	return hpre_clear_vft_config(hpre);
-}
-
-static int hpre_sriov_configure(struct pci_dev *pdev, int num_vfs)
-{
-	if (num_vfs)
-		return hpre_sriov_enable(pdev, num_vfs);
-	else
-		return hpre_sriov_disable(pdev);
-}
-
 static void hpre_remove(struct pci_dev *pdev)
 {
 	struct hpre *hpre = pci_get_drvdata(pdev);
@@ -903,8 +893,8 @@ static void hpre_remove(struct pci_dev *pdev)
 
 	hpre_algs_unregister();
 	hisi_qm_del_from_list(qm, &hpre_devices);
-	if (qm->fun_type == QM_HW_PF && hpre->num_vfs != 0) {
-		ret = hpre_sriov_disable(pdev);
+	if (qm->fun_type == QM_HW_PF && qm->vfs_num) {
+		ret = hisi_qm_sriov_disable(pdev);
 		if (ret) {
 			pci_err(pdev, "Disable SRIOV fail!\n");
 			return;
@@ -924,6 +914,9 @@ static void hpre_remove(struct pci_dev *pdev)
 
 static const struct pci_error_handlers hpre_err_handler = {
 	.error_detected		= hisi_qm_dev_err_detected,
+	.slot_reset		= hisi_qm_dev_slot_reset,
+	.reset_prepare		= hisi_qm_reset_prepare,
+	.reset_done		= hisi_qm_reset_done,
 };
 
 static struct pci_driver hpre_pci_driver = {
@@ -931,7 +924,7 @@ static struct pci_driver hpre_pci_driver = {
 	.id_table		= hpre_dev_ids,
 	.probe			= hpre_probe,
 	.remove			= hpre_remove,
-	.sriov_configure	= hpre_sriov_configure,
+	.sriov_configure	= hisi_qm_sriov_configure,
 	.err_handler		= &hpre_err_handler,
 };
 
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index f795fb557630..9bb263cec6c3 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -1,9 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 HiSilicon Limited. */
 #include <asm/page.h>
+#include <linux/acpi.h>
+#include <linux/aer.h>
 #include <linux/bitmap.h>
 #include <linux/debugfs.h>
 #include <linux/dma-mapping.h>
+#include <linux/idr.h>
 #include <linux/io.h>
 #include <linux/irqreturn.h>
 #include <linux/log2.h>
@@ -53,6 +56,7 @@
 #define QM_SQ_TYPE_SHIFT		8
 
 #define QM_SQ_TYPE_MASK			GENMASK(3, 0)
+#define QM_SQ_TAIL_IDX(sqc)		((le16_to_cpu((sqc)->w11) >> 6) & 0x1)
 
 /* cqc shift */
 #define QM_CQ_HOP_NUM_SHIFT		0
@@ -64,6 +68,7 @@
 
 #define QM_CQE_PHASE(cqe)		(le16_to_cpu((cqe)->w7) & 0x1)
 #define QM_QC_CQE_SIZE			4
+#define QM_CQ_TAIL_IDX(cqc)		((le16_to_cpu((cqc)->w11) >> 6) & 0x1)
 
 /* eqc shift */
 #define QM_EQE_AEQE_SIZE		(2UL << 12)
@@ -122,9 +127,11 @@
 #define QM_DFX_CNT_CLR_CE		0x100118
 
 #define QM_ABNORMAL_INT_SOURCE		0x100000
+#define QM_ABNORMAL_INT_SOURCE_CLR	GENMASK(12, 0)
 #define QM_ABNORMAL_INT_MASK		0x100004
 #define QM_ABNORMAL_INT_MASK_VALUE	0x1fff
 #define QM_ABNORMAL_INT_STATUS		0x100008
+#define QM_ABNORMAL_INT_SET		0x10000c
 #define QM_ABNORMAL_INF00		0x100010
 #define QM_FIFO_OVERFLOW_TYPE		0xc0
 #define QM_FIFO_OVERFLOW_TYPE_SHIFT	6
@@ -140,6 +147,27 @@
 #define QM_RAS_CE_TIMES_PER_IRQ		1
 #define QM_RAS_MSI_INT_SEL		0x1040f4
 
+#define QM_DEV_RESET_FLAG		0
+#define QM_RESET_WAIT_TIMEOUT		400
+#define QM_PEH_VENDOR_ID		0x1000d8
+#define ACC_VENDOR_ID_VALUE		0x5a5a
+#define QM_PEH_DFX_INFO0		0x1000fc
+#define ACC_PEH_SRIOV_CTRL_VF_MSE_SHIFT	3
+#define ACC_PEH_MSI_DISABLE		GENMASK(31, 0)
+#define ACC_MASTER_GLOBAL_CTRL_SHUTDOWN	0x1
+#define ACC_MASTER_TRANS_RETURN_RW	3
+#define ACC_MASTER_TRANS_RETURN		0x300150
+#define ACC_MASTER_GLOBAL_CTRL		0x300000
+#define ACC_AM_CFG_PORT_WR_EN		0x30001c
+#define QM_RAS_NFE_MBIT_DISABLE		~QM_ECC_MBIT
+#define ACC_AM_ROB_ECC_INT_STS		0x300104
+#define ACC_ROB_ECC_ERR_MULTPL		BIT(1)
+
+#define POLL_PERIOD			10
+#define POLL_TIMEOUT			1000
+#define WAIT_PERIOD_US_MAX		200
+#define WAIT_PERIOD_US_MIN		100
+#define MAX_WAIT_COUNTS			1000
 #define QM_CACHE_WB_START		0x204
 #define QM_CACHE_WB_DONE		0x208
 
@@ -147,7 +175,12 @@
 #define QM_SQE_DATA_ALIGN_MASK		GENMASK(6, 0)
 #define QMC_ALIGN(sz)			ALIGN(sz, 32)
 
+#define QM_DBG_READ_LEN		256
+#define QM_DBG_WRITE_LEN		1024
 #define QM_DBG_TMP_BUF_LEN		22
+#define QM_PCI_COMMAND_INVALID		~0
+
+#define QM_SQE_ADDR_MASK		GENMASK(7, 0)
 
 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
 	(((hop_num) << QM_CQ_HOP_NUM_SHIFT)	| \
@@ -190,6 +223,12 @@ enum vft_type {
 	CQC_VFT,
 };
 
+enum acc_err_result {
+	ACC_ERR_NONE,
+	ACC_ERR_NEED_RESET,
+	ACC_ERR_RECOVERED,
+};
+
 struct qm_cqe {
 	__le32 rsvd0;
 	__le16 cmd_id;
@@ -284,10 +323,22 @@ struct hisi_qm_hw_ops {
 		      u8 cmd, u16 index, u8 priority);
 	u32 (*get_irq_num)(struct hisi_qm *qm);
 	int (*debug_init)(struct hisi_qm *qm);
-	void (*hw_error_init)(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
-			      u32 msi);
+	void (*hw_error_init)(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe);
 	void (*hw_error_uninit)(struct hisi_qm *qm);
-	pci_ers_result_t (*hw_error_handle)(struct hisi_qm *qm);
+	enum acc_err_result (*hw_error_handle)(struct hisi_qm *qm);
+};
+
+struct qm_dfx_item {
+	const char *name;
+	u32 offset;
+};
+
+static struct qm_dfx_item qm_dfx_files[] = {
+	{"err_irq", offsetof(struct qm_dfx, err_irq_cnt)},
+	{"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)},
+	{"abnormal_irq", offsetof(struct qm_dfx, abnormal_irq_cnt)},
+	{"create_qp_err", offsetof(struct qm_dfx, create_qp_err_cnt)},
+	{"mb_err", offsetof(struct qm_dfx, mb_err_cnt)},
 };
 
 static const char * const qm_debug_file_name[] = {
@@ -325,6 +376,93 @@ static const char * const qm_fifo_overflow[] = {
 	"cq", "eq", "aeq",
 };
 
+static const char * const qm_s[] = {
+	"init", "start", "close", "stop",
+};
+
+static const char * const qp_s[] = {
+	"none", "init", "start", "stop", "close",
+};
+
+static bool qm_avail_state(struct hisi_qm *qm, enum qm_state new)
+{
+	enum qm_state curr = atomic_read(&qm->status.flags);
+	bool avail = false;
+
+	switch (curr) {
+	case QM_INIT:
+		if (new == QM_START || new == QM_CLOSE)
+			avail = true;
+		break;
+	case QM_START:
+		if (new == QM_STOP)
+			avail = true;
+		break;
+	case QM_STOP:
+		if (new == QM_CLOSE || new == QM_START)
+			avail = true;
+		break;
+	default:
+		break;
+	}
+
+	dev_dbg(&qm->pdev->dev, "change qm state from %s to %s\n",
+		qm_s[curr], qm_s[new]);
+
+	if (!avail)
+		dev_warn(&qm->pdev->dev, "Can not change qm state from %s to %s\n",
+			 qm_s[curr], qm_s[new]);
+
+	return avail;
+}
+
+static bool qm_qp_avail_state(struct hisi_qm *qm, struct hisi_qp *qp,
+			      enum qp_state new)
+{
+	enum qm_state qm_curr = atomic_read(&qm->status.flags);
+	enum qp_state qp_curr = 0;
+	bool avail = false;
+
+	if (qp)
+		qp_curr = atomic_read(&qp->qp_status.flags);
+
+	switch (new) {
+	case QP_INIT:
+		if (qm_curr == QM_START || qm_curr == QM_INIT)
+			avail = true;
+		break;
+	case QP_START:
+		if ((qm_curr == QM_START && qp_curr == QP_INIT) ||
+		    (qm_curr == QM_START && qp_curr == QP_STOP))
+			avail = true;
+		break;
+	case QP_STOP:
+		if ((qm_curr == QM_START && qp_curr == QP_START) ||
+		    (qp_curr == QP_INIT))
+			avail = true;
+		break;
+	case QP_CLOSE:
+		if ((qm_curr == QM_START && qp_curr == QP_INIT) ||
+		    (qm_curr == QM_START && qp_curr == QP_STOP) ||
+		    (qm_curr == QM_STOP && qp_curr == QP_STOP)  ||
+		    (qm_curr == QM_STOP && qp_curr == QP_INIT))
+			avail = true;
+		break;
+	default:
+		break;
+	}
+
+	dev_dbg(&qm->pdev->dev, "change qp state from %s to %s in QM %s\n",
+		qp_s[qp_curr], qp_s[new], qm_s[qm_curr]);
+
+	if (!avail)
+		dev_warn(&qm->pdev->dev,
+			 "Can not change qp state from %s to %s in QM %s\n",
+			 qp_s[qp_curr], qp_s[new], qm_s[qm_curr]);
+
+	return avail;
+}
+
 /* return 0 mailbox ready, -ETIMEDOUT hardware timeout */
 static int qm_wait_mb_ready(struct hisi_qm *qm)
 {
@@ -393,6 +531,8 @@ static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
 busy_unlock:
 	mutex_unlock(&qm->mailbox_lock);
 
+	if (ret)
+		atomic64_inc(&qm->debug.dfx.mb_err_cnt);
 	return ret;
 }
 
@@ -460,7 +600,7 @@ static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
 {
 	u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
 
-	return qm->qp_array[cqn];
+	return &qm->qp_array[cqn];
 }
 
 static void qm_cq_head_update(struct hisi_qp *qp)
@@ -510,8 +650,7 @@ static void qm_work_process(struct work_struct *work)
 	while (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
 		eqe_num++;
 		qp = qm_to_hisi_qp(qm, eqe);
-		if (qp)
-			qm_poll_qp(qp, qm);
+		qm_poll_qp(qp, qm);
 
 		if (qm->status.eq_head == QM_Q_DEPTH - 1) {
 			qm->status.eqc_phase = !qm->status.eqc_phase;
@@ -551,6 +690,7 @@ static irqreturn_t qm_irq(int irq, void *data)
 	if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
 		return do_qm_irq(irq, data);
 
+	atomic64_inc(&qm->debug.dfx.err_irq_cnt);
 	dev_err(&qm->pdev->dev, "invalid int source\n");
 	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
 
@@ -563,6 +703,7 @@ static irqreturn_t qm_aeq_irq(int irq, void *data)
 	struct qm_aeqe *aeqe = qm->aeqe + qm->status.aeq_head;
 	u32 type;
 
+	atomic64_inc(&qm->debug.dfx.aeq_irq_cnt);
 	if (!readl(qm->io_base + QM_VF_AEQ_INT_SOURCE))
 		return IRQ_NONE;
 
@@ -590,79 +731,20 @@ static irqreturn_t qm_aeq_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t qm_abnormal_irq(int irq, void *data)
-{
-	const struct hisi_qm_hw_error *err = qm_hw_error;
-	struct hisi_qm *qm = data;
-	struct device *dev = &qm->pdev->dev;
-	u32 error_status, tmp;
-
-	/* read err sts */
-	tmp = readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
-	error_status = qm->msi_mask & tmp;
-
-	while (err->msg) {
-		if (err->int_msk & error_status)
-			dev_err(dev, "%s [error status=0x%x] found\n",
-				err->msg, err->int_msk);
-
-		err++;
-	}
-
-	/* clear err sts */
-	writel(error_status, qm->io_base + QM_ABNORMAL_INT_SOURCE);
-
-	return IRQ_HANDLED;
-}
-
-static int qm_irq_register(struct hisi_qm *qm)
-{
-	struct pci_dev *pdev = qm->pdev;
-	int ret;
-
-	ret = request_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR),
-			  qm_irq, IRQF_SHARED, qm->dev_name, qm);
-	if (ret)
-		return ret;
-
-	if (qm->ver == QM_HW_V2) {
-		ret = request_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR),
-				  qm_aeq_irq, IRQF_SHARED, qm->dev_name, qm);
-		if (ret)
-			goto err_aeq_irq;
-
-		if (qm->fun_type == QM_HW_PF) {
-			ret = request_irq(pci_irq_vector(pdev,
-					  QM_ABNORMAL_EVENT_IRQ_VECTOR),
-					  qm_abnormal_irq, IRQF_SHARED,
-					  qm->dev_name, qm);
-			if (ret)
-				goto err_abonormal_irq;
-		}
-	}
-
-	return 0;
-
-err_abonormal_irq:
-	free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
-err_aeq_irq:
-	free_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR), qm);
-	return ret;
-}
-
 static void qm_irq_unregister(struct hisi_qm *qm)
 {
 	struct pci_dev *pdev = qm->pdev;
 
 	free_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR), qm);
 
-	if (qm->ver == QM_HW_V2) {
-		free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
+	if (qm->ver == QM_HW_V1)
+		return;
+
+	free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
 
-		if (qm->fun_type == QM_HW_PF)
-			free_irq(pci_irq_vector(pdev,
-				 QM_ABNORMAL_EVENT_IRQ_VECTOR), qm);
-	}
+	if (qm->fun_type == QM_HW_PF)
+		free_irq(pci_irq_vector(pdev,
+			 QM_ABNORMAL_EVENT_IRQ_VECTOR), qm);
 }
 
 static void qm_init_qp_status(struct hisi_qp *qp)
@@ -672,7 +754,7 @@ static void qm_init_qp_status(struct hisi_qp *qp)
 	qp_status->sq_tail = 0;
 	qp_status->cq_head = 0;
 	qp_status->cqc_phase = true;
-	qp_status->flags = 0;
+	atomic_set(&qp_status->flags, 0);
 }
 
 static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
@@ -683,36 +765,26 @@ static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
 	if (number > 0) {
 		switch (type) {
 		case SQC_VFT:
-			switch (qm->ver) {
-			case QM_HW_V1:
+			if (qm->ver == QM_HW_V1) {
 				tmp = QM_SQC_VFT_BUF_SIZE	|
 				      QM_SQC_VFT_SQC_SIZE	|
 				      QM_SQC_VFT_INDEX_NUMBER	|
 				      QM_SQC_VFT_VALID		|
 				      (u64)base << QM_SQC_VFT_START_SQN_SHIFT;
-				break;
-			case QM_HW_V2:
+			} else {
 				tmp = (u64)base << QM_SQC_VFT_START_SQN_SHIFT |
 				      QM_SQC_VFT_VALID |
 				      (u64)(number - 1) << QM_SQC_VFT_SQN_SHIFT;
-				break;
-			case QM_HW_UNKNOWN:
-				break;
 			}
 			break;
 		case CQC_VFT:
-			switch (qm->ver) {
-			case QM_HW_V1:
+			if (qm->ver == QM_HW_V1) {
 				tmp = QM_CQC_VFT_BUF_SIZE	|
 				      QM_CQC_VFT_SQC_SIZE	|
 				      QM_CQC_VFT_INDEX_NUMBER	|
 				      QM_CQC_VFT_VALID;
-				break;
-			case QM_HW_V2:
+			} else {
 				tmp = QM_CQC_VFT_VALID;
-				break;
-			case QM_HW_UNKNOWN:
-				break;
 			}
 			break;
 		}
@@ -986,6 +1058,473 @@ static const struct file_operations qm_regs_fops = {
 	.release = single_release,
 };
 
+static ssize_t qm_cmd_read(struct file *filp, char __user *buffer,
+			   size_t count, loff_t *pos)
+{
+	char buf[QM_DBG_READ_LEN];
+	int len;
+
+	if (*pos)
+		return 0;
+
+	if (count < QM_DBG_READ_LEN)
+		return -ENOSPC;
+
+	len = snprintf(buf, QM_DBG_READ_LEN, "%s\n",
+		       "Please echo help to cmd to get help information");
+
+	if (copy_to_user(buffer, buf, len))
+		return -EFAULT;
+
+	return (*pos = len);
+}
+
+static void *qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
+			  dma_addr_t *dma_addr)
+{
+	struct device *dev = &qm->pdev->dev;
+	void *ctx_addr;
+
+	ctx_addr = kzalloc(ctx_size, GFP_KERNEL);
+	if (!ctx_addr)
+		return ERR_PTR(-ENOMEM);
+
+	*dma_addr = dma_map_single(dev, ctx_addr, ctx_size, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, *dma_addr)) {
+		dev_err(dev, "DMA mapping error!\n");
+		kfree(ctx_addr);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return ctx_addr;
+}
+
+static void qm_ctx_free(struct hisi_qm *qm, size_t ctx_size,
+			const void *ctx_addr, dma_addr_t *dma_addr)
+{
+	struct device *dev = &qm->pdev->dev;
+
+	dma_unmap_single(dev, *dma_addr, ctx_size, DMA_FROM_DEVICE);
+	kfree(ctx_addr);
+}
+
+static int dump_show(struct hisi_qm *qm, void *info,
+		     unsigned int info_size, char *info_name)
+{
+	struct device *dev = &qm->pdev->dev;
+	u8 *info_buf, *info_curr = info;
+	u32 i;
+#define BYTE_PER_DW	4
+
+	info_buf = kzalloc(info_size, GFP_KERNEL);
+	if (!info_buf)
+		return -ENOMEM;
+
+	for (i = 0; i < info_size; i++, info_curr++) {
+		if (i % BYTE_PER_DW == 0)
+			info_buf[i + 3UL] = *info_curr;
+		else if (i % BYTE_PER_DW == 1)
+			info_buf[i + 1UL] = *info_curr;
+		else if (i % BYTE_PER_DW == 2)
+			info_buf[i - 1] = *info_curr;
+		else if (i % BYTE_PER_DW == 3)
+			info_buf[i - 3] = *info_curr;
+	}
+
+	dev_info(dev, "%s DUMP\n", info_name);
+	for (i = 0; i < info_size; i += BYTE_PER_DW) {
+		pr_info("DW%d: %02X%02X %02X%02X\n", i / BYTE_PER_DW,
+			info_buf[i], info_buf[i + 1UL],
+			info_buf[i + 2UL], info_buf[i + 3UL]);
+	}
+
+	kfree(info_buf);
+
+	return 0;
+}
+
+static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
+{
+	return qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1);
+}
+
+static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
+{
+	return qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1);
+}
+
+static int qm_sqc_dump(struct hisi_qm *qm, const char *s)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct qm_sqc *sqc, *sqc_curr;
+	dma_addr_t sqc_dma;
+	u32 qp_id;
+	int ret;
+
+	if (!s)
+		return -EINVAL;
+
+	ret = kstrtou32(s, 0, &qp_id);
+	if (ret || qp_id >= qm->qp_num) {
+		dev_err(dev, "Please input qp num (0-%d)", qm->qp_num - 1);
+		return -EINVAL;
+	}
+
+	sqc = qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma);
+	if (IS_ERR(sqc))
+		return PTR_ERR(sqc);
+
+	ret = qm_dump_sqc_raw(qm, sqc_dma, qp_id);
+	if (ret) {
+		down_read(&qm->qps_lock);
+		if (qm->sqc) {
+			sqc_curr = qm->sqc + qp_id;
+
+			ret = dump_show(qm, sqc_curr, sizeof(*sqc),
+					"SOFT SQC");
+			if (ret)
+				dev_info(dev, "Show soft sqc failed!\n");
+		}
+		up_read(&qm->qps_lock);
+
+		goto err_free_ctx;
+	}
+
+	ret = dump_show(qm, sqc, sizeof(*sqc), "SQC");
+	if (ret)
+		dev_info(dev, "Show hw sqc failed!\n");
+
+err_free_ctx:
+	qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma);
+	return ret;
+}
+
+static int qm_cqc_dump(struct hisi_qm *qm, const char *s)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct qm_cqc *cqc, *cqc_curr;
+	dma_addr_t cqc_dma;
+	u32 qp_id;
+	int ret;
+
+	if (!s)
+		return -EINVAL;
+
+	ret = kstrtou32(s, 0, &qp_id);
+	if (ret || qp_id >= qm->qp_num) {
+		dev_err(dev, "Please input qp num (0-%d)", qm->qp_num - 1);
+		return -EINVAL;
+	}
+
+	cqc = qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma);
+	if (IS_ERR(cqc))
+		return PTR_ERR(cqc);
+
+	ret = qm_dump_cqc_raw(qm, cqc_dma, qp_id);
+	if (ret) {
+		down_read(&qm->qps_lock);
+		if (qm->cqc) {
+			cqc_curr = qm->cqc + qp_id;
+
+			ret = dump_show(qm, cqc_curr, sizeof(*cqc),
+					"SOFT CQC");
+			if (ret)
+				dev_info(dev, "Show soft cqc failed!\n");
+		}
+		up_read(&qm->qps_lock);
+
+		goto err_free_ctx;
+	}
+
+	ret = dump_show(qm, cqc, sizeof(*cqc), "CQC");
+	if (ret)
+		dev_info(dev, "Show hw cqc failed!\n");
+
+err_free_ctx:
+	qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma);
+	return ret;
+}
+
+static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size,
+			    int cmd, char *name)
+{
+	struct device *dev = &qm->pdev->dev;
+	dma_addr_t xeqc_dma;
+	void *xeqc;
+	int ret;
+
+	if (strsep(&s, " ")) {
+		dev_err(dev, "Please do not input extra characters!\n");
+		return -EINVAL;
+	}
+
+	xeqc = qm_ctx_alloc(qm, size, &xeqc_dma);
+	if (IS_ERR(xeqc))
+		return PTR_ERR(xeqc);
+
+	ret = qm_mb(qm, cmd, xeqc_dma, 0, 1);
+	if (ret)
+		goto err_free_ctx;
+
+	ret = dump_show(qm, xeqc, size, name);
+	if (ret)
+		dev_info(dev, "Show hw %s failed!\n", name);
+
+err_free_ctx:
+	qm_ctx_free(qm, size, xeqc, &xeqc_dma);
+	return ret;
+}
+
+static int q_dump_param_parse(struct hisi_qm *qm, char *s,
+			      u32 *e_id, u32 *q_id)
+{
+	struct device *dev = &qm->pdev->dev;
+	unsigned int qp_num = qm->qp_num;
+	char *presult;
+	int ret;
+
+	presult = strsep(&s, " ");
+	if (!presult) {
+		dev_err(dev, "Please input qp number!\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtou32(presult, 0, q_id);
+	if (ret || *q_id >= qp_num) {
+		dev_err(dev, "Please input qp num (0-%d)", qp_num - 1);
+		return -EINVAL;
+	}
+
+	presult = strsep(&s, " ");
+	if (!presult) {
+		dev_err(dev, "Please input sqe number!\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtou32(presult, 0, e_id);
+	if (ret || *e_id >= QM_Q_DEPTH) {
+		dev_err(dev, "Please input sqe num (0-%d)", QM_Q_DEPTH - 1);
+		return -EINVAL;
+	}
+
+	if (strsep(&s, " ")) {
+		dev_err(dev, "Please do not input extra characters!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int qm_sq_dump(struct hisi_qm *qm, char *s)
+{
+	struct device *dev = &qm->pdev->dev;
+	void *sqe, *sqe_curr;
+	struct hisi_qp *qp;
+	u32 qp_id, sqe_id;
+	int ret;
+
+	ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id);
+	if (ret)
+		return ret;
+
+	sqe = kzalloc(qm->sqe_size * QM_Q_DEPTH, GFP_KERNEL);
+	if (!sqe)
+		return -ENOMEM;
+
+	qp = &qm->qp_array[qp_id];
+	memcpy(sqe, qp->sqe, qm->sqe_size * QM_Q_DEPTH);
+	sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size);
+	memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK,
+	       qm->debug.sqe_mask_len);
+
+	ret = dump_show(qm, sqe_curr, qm->sqe_size, "SQE");
+	if (ret)
+		dev_info(dev, "Show sqe failed!\n");
+
+	kfree(sqe);
+
+	return ret;
+}
+
+static int qm_cq_dump(struct hisi_qm *qm, char *s)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct qm_cqe *cqe_curr;
+	struct hisi_qp *qp;
+	u32 qp_id, cqe_id;
+	int ret;
+
+	ret = q_dump_param_parse(qm, s, &cqe_id, &qp_id);
+	if (ret)
+		return ret;
+
+	qp = &qm->qp_array[qp_id];
+	cqe_curr = qp->cqe + cqe_id;
+	ret = dump_show(qm, cqe_curr, sizeof(struct qm_cqe), "CQE");
+	if (ret)
+		dev_info(dev, "Show cqe failed!\n");
+
+	return ret;
+}
+
+static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s,
+			  size_t size, char *name)
+{
+	struct device *dev = &qm->pdev->dev;
+	void *xeqe;
+	u32 xeqe_id;
+	int ret;
+
+	if (!s)
+		return -EINVAL;
+
+	ret = kstrtou32(s, 0, &xeqe_id);
+	if (ret || xeqe_id >= QM_Q_DEPTH) {
+		dev_err(dev, "Please input aeqe num (0-%d)", QM_Q_DEPTH - 1);
+		return -EINVAL;
+	}
+
+	down_read(&qm->qps_lock);
+
+	if (qm->eqe && !strcmp(name, "EQE")) {
+		xeqe = qm->eqe + xeqe_id;
+	} else if (qm->aeqe && !strcmp(name, "AEQE")) {
+		xeqe = qm->aeqe + xeqe_id;
+	} else {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	ret = dump_show(qm, xeqe, size, name);
+	if (ret)
+		dev_info(dev, "Show %s failed!\n", name);
+
+err_unlock:
+	up_read(&qm->qps_lock);
+	return ret;
+}
+
+static int qm_dbg_help(struct hisi_qm *qm, char *s)
+{
+	struct device *dev = &qm->pdev->dev;
+
+	if (strsep(&s, " ")) {
+		dev_err(dev, "Please do not input extra characters!\n");
+		return -EINVAL;
+	}
+
+	dev_info(dev, "available commands:\n");
+	dev_info(dev, "sqc <num>\n");
+	dev_info(dev, "cqc <num>\n");
+	dev_info(dev, "eqc\n");
+	dev_info(dev, "aeqc\n");
+	dev_info(dev, "sq <num> <e>\n");
+	dev_info(dev, "cq <num> <e>\n");
+	dev_info(dev, "eq <e>\n");
+	dev_info(dev, "aeq <e>\n");
+
+	return 0;
+}
+
+static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf)
+{
+	struct device *dev = &qm->pdev->dev;
+	char *presult, *s;
+	int ret;
+
+	s = kstrdup(cmd_buf, GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	presult = strsep(&s, " ");
+	if (!presult) {
+		kfree(s);
+		return -EINVAL;
+	}
+
+	if (!strcmp(presult, "sqc"))
+		ret = qm_sqc_dump(qm, s);
+	else if (!strcmp(presult, "cqc"))
+		ret = qm_cqc_dump(qm, s);
+	else if (!strcmp(presult, "eqc"))
+		ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_eqc),
+				       QM_MB_CMD_EQC, "EQC");
+	else if (!strcmp(presult, "aeqc"))
+		ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_aeqc),
+				       QM_MB_CMD_AEQC, "AEQC");
+	else if (!strcmp(presult, "sq"))
+		ret = qm_sq_dump(qm, s);
+	else if (!strcmp(presult, "cq"))
+		ret = qm_cq_dump(qm, s);
+	else if (!strcmp(presult, "eq"))
+		ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_eqe), "EQE");
+	else if (!strcmp(presult, "aeq"))
+		ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_aeqe), "AEQE");
+	else if (!strcmp(presult, "help"))
+		ret = qm_dbg_help(qm, s);
+	else
+		ret = -EINVAL;
+
+	if (ret)
+		dev_info(dev, "Please echo help\n");
+
+	kfree(s);
+
+	return ret;
+}
+
+static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
+			    size_t count, loff_t *pos)
+{
+	struct hisi_qm *qm = filp->private_data;
+	char *cmd_buf, *cmd_buf_tmp;
+	int ret;
+
+	if (*pos)
+		return 0;
+
+	/* Judge if the instance is being reset. */
+	if (unlikely(atomic_read(&qm->status.flags) == QM_STOP))
+		return 0;
+
+	if (count > QM_DBG_WRITE_LEN)
+		return -ENOSPC;
+
+	cmd_buf = kzalloc(count + 1, GFP_KERNEL);
+	if (!cmd_buf)
+		return -ENOMEM;
+
+	if (copy_from_user(cmd_buf, buffer, count)) {
+		kfree(cmd_buf);
+		return -EFAULT;
+	}
+
+	cmd_buf[count] = '\0';
+
+	cmd_buf_tmp = strchr(cmd_buf, '\n');
+	if (cmd_buf_tmp) {
+		*cmd_buf_tmp = '\0';
+		count = cmd_buf_tmp - cmd_buf + 1;
+	}
+
+	ret = qm_cmd_write_dump(qm, cmd_buf);
+	if (ret) {
+		kfree(cmd_buf);
+		return ret;
+	}
+
+	kfree(cmd_buf);
+
+	return count;
+}
+
+static const struct file_operations qm_cmd_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = qm_cmd_read,
+	.write = qm_cmd_write,
+};
+
 static int qm_create_debugfs_file(struct hisi_qm *qm, enum qm_debug_file index)
 {
 	struct dentry *qm_d = qm->debug.qm_d;
@@ -1001,20 +1540,21 @@ static int qm_create_debugfs_file(struct hisi_qm *qm, enum qm_debug_file index)
 	return 0;
 }
 
-static void qm_hw_error_init_v1(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
-				u32 msi)
+static void qm_hw_error_init_v1(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
 {
 	writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
 }
 
-static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
-				u32 msi)
+static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
 {
-	u32 irq_enable = ce | nfe | fe | msi;
+	u32 irq_enable = ce | nfe | fe;
 	u32 irq_unmask = ~irq_enable;
 
 	qm->error_mask = ce | nfe | fe;
-	qm->msi_mask = msi;
+
+	/* clear QM hw residual error source */
+	writel(QM_ABNORMAL_INT_SOURCE_CLR,
+	       qm->io_base + QM_ABNORMAL_INT_SOURCE);
 
 	/* configure error type */
 	writel(ce, qm->io_base + QM_RAS_CE_ENABLE);
@@ -1022,9 +1562,6 @@ static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
 	writel(nfe, qm->io_base + QM_RAS_NFE_ENABLE);
 	writel(fe, qm->io_base + QM_RAS_FE_ENABLE);
 
-	/* use RAS irq default, so only set QM_RAS_MSI_INT_SEL for MSI */
-	writel(msi, qm->io_base + QM_RAS_MSI_INT_SEL);
-
 	irq_unmask &= readl(qm->io_base + QM_ABNORMAL_INT_MASK);
 	writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK);
 }
@@ -1071,7 +1608,7 @@ static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status)
 	}
 }
 
-static pci_ers_result_t qm_hw_error_handle_v2(struct hisi_qm *qm)
+static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm)
 {
 	u32 error_status, tmp;
 
@@ -1080,15 +1617,20 @@ static pci_ers_result_t qm_hw_error_handle_v2(struct hisi_qm *qm)
 	error_status = qm->error_mask & tmp;
 
 	if (error_status) {
-		qm_log_hw_error(qm, error_status);
+		if (error_status & QM_ECC_MBIT)
+			qm->err_status.is_qm_ecc_mbit = true;
 
-		/* clear err sts */
-		writel(error_status, qm->io_base + QM_ABNORMAL_INT_SOURCE);
+		qm_log_hw_error(qm, error_status);
+		if (error_status == QM_DB_RANDOM_INVALID) {
+			writel(error_status, qm->io_base +
+			       QM_ABNORMAL_INT_SOURCE);
+			return ACC_ERR_RECOVERED;
+		}
 
-		return PCI_ERS_RESULT_NEED_RESET;
+		return ACC_ERR_NEED_RESET;
 	}
 
-	return PCI_ERS_RESULT_RECOVERED;
+	return ACC_ERR_RECOVERED;
 }
 
 static const struct hisi_qm_hw_ops qm_hw_ops_v1 = {
@@ -1117,68 +1659,61 @@ static void *qm_get_avail_sqe(struct hisi_qp *qp)
 	return qp->sqe + sq_tail * qp->qm->sqe_size;
 }
 
-/**
- * hisi_qm_create_qp() - Create a queue pair from qm.
- * @qm: The qm we create a qp from.
- * @alg_type: Accelerator specific algorithm type in sqc.
- *
- * return created qp, -EBUSY if all qps in qm allocated, -ENOMEM if allocating
- * qp memory fails.
- */
-struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
+static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type)
 {
 	struct device *dev = &qm->pdev->dev;
 	struct hisi_qp *qp;
-	int qp_id, ret;
-
-	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-	if (!qp)
-		return ERR_PTR(-ENOMEM);
+	int qp_id;
 
-	write_lock(&qm->qps_lock);
+	if (!qm_qp_avail_state(qm, NULL, QP_INIT))
+		return ERR_PTR(-EPERM);
 
-	qp_id = find_first_zero_bit(qm->qp_bitmap, qm->qp_num);
-	if (qp_id >= qm->qp_num) {
-		write_unlock(&qm->qps_lock);
-		dev_info(&qm->pdev->dev, "QM all queues are busy!\n");
-		ret = -EBUSY;
-		goto err_free_qp;
+	if (qm->qp_in_used == qm->qp_num) {
+		dev_info_ratelimited(dev, "All %u queues of QM are busy!\n",
+				     qm->qp_num);
+		atomic64_inc(&qm->debug.dfx.create_qp_err_cnt);
+		return ERR_PTR(-EBUSY);
 	}
-	set_bit(qp_id, qm->qp_bitmap);
-	qm->qp_array[qp_id] = qp;
-	qm->qp_in_used++;
 
-	write_unlock(&qm->qps_lock);
-
-	qp->qm = qm;
+	qp_id = idr_alloc_cyclic(&qm->qp_idr, NULL, 0, qm->qp_num, GFP_ATOMIC);
+	if (qp_id < 0) {
+		dev_info_ratelimited(dev, "All %u queues of QM are busy!\n",
+				    qm->qp_num);
+		atomic64_inc(&qm->debug.dfx.create_qp_err_cnt);
+		return ERR_PTR(-EBUSY);
+	}
 
-	if (qm->use_dma_api) {
-		qp->qdma.size = qm->sqe_size * QM_Q_DEPTH +
-				sizeof(struct qm_cqe) * QM_Q_DEPTH;
-		qp->qdma.va = dma_alloc_coherent(dev, qp->qdma.size,
-						 &qp->qdma.dma, GFP_KERNEL);
-		if (!qp->qdma.va) {
-			ret = -ENOMEM;
-			goto err_clear_bit;
-		}
+	qp = &qm->qp_array[qp_id];
 
-		dev_dbg(dev, "allocate qp dma buf(va=%pK, dma=%pad, size=%zx)\n",
-			qp->qdma.va, &qp->qdma.dma, qp->qdma.size);
-	}
+	memset(qp->cqe, 0, sizeof(struct qm_cqe) * QM_Q_DEPTH);
 
+	qp->event_cb = NULL;
+	qp->req_cb = NULL;
 	qp->qp_id = qp_id;
 	qp->alg_type = alg_type;
+	qm->qp_in_used++;
+	atomic_set(&qp->qp_status.flags, QP_INIT);
 
 	return qp;
+}
+
+/**
+ * hisi_qm_create_qp() - Create a queue pair from qm.
+ * @qm: The qm we create a qp from.
+ * @alg_type: Accelerator specific algorithm type in sqc.
+ *
+ * return created qp, -EBUSY if all qps in qm allocated, -ENOMEM if allocating
+ * qp memory fails.
+ */
+struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
+{
+	struct hisi_qp *qp;
 
-err_clear_bit:
-	write_lock(&qm->qps_lock);
-	qm->qp_array[qp_id] = NULL;
-	clear_bit(qp_id, qm->qp_bitmap);
-	write_unlock(&qm->qps_lock);
-err_free_qp:
-	kfree(qp);
-	return ERR_PTR(ret);
+	down_write(&qm->qps_lock);
+	qp = qm_create_qp_nolock(qm, alg_type);
+	up_write(&qm->qps_lock);
+
+	return qp;
 }
 EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
 
@@ -1191,19 +1726,18 @@ EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
 void hisi_qm_release_qp(struct hisi_qp *qp)
 {
 	struct hisi_qm *qm = qp->qm;
-	struct qm_dma *qdma = &qp->qdma;
-	struct device *dev = &qm->pdev->dev;
 
-	if (qm->use_dma_api && qdma->va)
-		dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
+	down_write(&qm->qps_lock);
+
+	if (!qm_qp_avail_state(qm, qp, QP_CLOSE)) {
+		up_write(&qm->qps_lock);
+		return;
+	}
 
-	write_lock(&qm->qps_lock);
-	qm->qp_array[qp->qp_id] = NULL;
-	clear_bit(qp->qp_id, qm->qp_bitmap);
 	qm->qp_in_used--;
-	write_unlock(&qm->qps_lock);
+	idr_remove(&qm->qp_idr, qp->qp_id);
 
-	kfree(qp);
+	up_write(&qm->qps_lock);
 }
 EXPORT_SYMBOL_GPL(hisi_qm_release_qp);
 
@@ -1234,7 +1768,7 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid)
 	if (ver == QM_HW_V1) {
 		sqc->dw3 = cpu_to_le32(QM_MK_SQC_DW3_V1(0, 0, 0, qm->sqe_size));
 		sqc->w8 = cpu_to_le16(QM_Q_DEPTH - 1);
-	} else if (ver == QM_HW_V2) {
+	} else {
 		sqc->dw3 = cpu_to_le32(QM_MK_SQC_DW3_V2(qm->sqe_size));
 		sqc->w8 = 0; /* rand_qc */
 	}
@@ -1261,7 +1795,7 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid)
 	if (ver == QM_HW_V1) {
 		cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V1(0, 0, 0, 4));
 		cqc->w8 = cpu_to_le16(QM_Q_DEPTH - 1);
-	} else if (ver == QM_HW_V2) {
+	} else {
 		cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V2(4));
 		cqc->w8 = 0;
 	}
@@ -1274,6 +1808,27 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid)
 	return ret;
 }
 
+static int qm_start_qp_nolock(struct hisi_qp *qp, unsigned long arg)
+{
+	struct hisi_qm *qm = qp->qm;
+	struct device *dev = &qm->pdev->dev;
+	int qp_id = qp->qp_id;
+	int pasid = arg;
+	int ret;
+
+	if (!qm_qp_avail_state(qm, qp, QP_START))
+		return -EPERM;
+
+	ret = qm_qp_ctx_cfg(qp, qp_id, pasid);
+	if (ret)
+		return ret;
+
+	atomic_set(&qp->qp_status.flags, QP_START);
+	dev_dbg(dev, "queue %d started\n", qp_id);
+
+	return 0;
+}
+
 /**
  * hisi_qm_start_qp() - Start a qp into running.
  * @qp: The qp we want to start to run.
@@ -1285,48 +1840,112 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid)
 int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg)
 {
 	struct hisi_qm *qm = qp->qm;
-	struct device *dev = &qm->pdev->dev;
-	enum qm_hw_ver ver = qm->ver;
-	int qp_id = qp->qp_id;
-	int pasid = arg;
-	size_t off = 0;
 	int ret;
 
-#define QP_INIT_BUF(qp, type, size) do { \
-	(qp)->type = ((qp)->qdma.va + (off)); \
-	(qp)->type##_dma = (qp)->qdma.dma + (off); \
-	off += (size); \
-} while (0)
+	down_write(&qm->qps_lock);
+	ret = qm_start_qp_nolock(qp, arg);
+	up_write(&qm->qps_lock);
 
-	if (!qp->qdma.dma) {
-		dev_err(dev, "cannot get qm dma buffer\n");
-		return -EINVAL;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_start_qp);
+
+/**
+ * Determine whether the queue is cleared by judging the tail pointers of
+ * sq and cq.
+ */
+static int qm_drain_qp(struct hisi_qp *qp)
+{
+	size_t size = sizeof(struct qm_sqc) + sizeof(struct qm_cqc);
+	struct hisi_qm *qm = qp->qm;
+	struct device *dev = &qm->pdev->dev;
+	struct qm_sqc *sqc;
+	struct qm_cqc *cqc;
+	dma_addr_t dma_addr;
+	int ret = 0, i = 0;
+	void *addr;
+
+	/*
+	 * No need to judge if ECC multi-bit error occurs because the
+	 * master OOO will be blocked.
+	 */
+	if (qm->err_status.is_qm_ecc_mbit || qm->err_status.is_dev_ecc_mbit)
+		return 0;
+
+	addr = qm_ctx_alloc(qm, size, &dma_addr);
+	if (IS_ERR(addr)) {
+		dev_err(dev, "Failed to alloc ctx for sqc and cqc!\n");
+		return -ENOMEM;
 	}
 
-	/* sq need 128 bytes alignment */
-	if (qp->qdma.dma & QM_SQE_DATA_ALIGN_MASK) {
-		dev_err(dev, "qm sq is not aligned to 128 byte\n");
-		return -EINVAL;
+	while (++i) {
+		ret = qm_dump_sqc_raw(qm, dma_addr, qp->qp_id);
+		if (ret) {
+			dev_err_ratelimited(dev, "Failed to dump sqc!\n");
+			break;
+		}
+		sqc = addr;
+
+		ret = qm_dump_cqc_raw(qm, (dma_addr + sizeof(struct qm_sqc)),
+				      qp->qp_id);
+		if (ret) {
+			dev_err_ratelimited(dev, "Failed to dump cqc!\n");
+			break;
+		}
+		cqc = addr + sizeof(struct qm_sqc);
+
+		if ((sqc->tail == cqc->tail) &&
+		    (QM_SQ_TAIL_IDX(sqc) == QM_CQ_TAIL_IDX(cqc)))
+			break;
+
+		if (i == MAX_WAIT_COUNTS) {
+			dev_err(dev, "Fail to empty queue %u!\n", qp->qp_id);
+			ret = -EBUSY;
+			break;
+		}
+
+		usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
 	}
 
-	QP_INIT_BUF(qp, sqe, qm->sqe_size * QM_Q_DEPTH);
-	QP_INIT_BUF(qp, cqe, sizeof(struct qm_cqe) * QM_Q_DEPTH);
+	qm_ctx_free(qm, size, addr, &dma_addr);
 
-	dev_dbg(dev, "init qp buffer(v%d):\n"
-		     " sqe	(%pK, %lx)\n"
-		     " cqe	(%pK, %lx)\n",
-		     ver, qp->sqe, (unsigned long)qp->sqe_dma,
-		     qp->cqe, (unsigned long)qp->cqe_dma);
+	return ret;
+}
 
-	ret = qm_qp_ctx_cfg(qp, qp_id, pasid);
+static int qm_stop_qp_nolock(struct hisi_qp *qp)
+{
+	struct device *dev = &qp->qm->pdev->dev;
+	int ret;
+
+	/*
+	 * It is allowed to stop and release qp when reset, If the qp is
+	 * stopped when reset but still want to be released then, the
+	 * is_resetting flag should be set negative so that this qp will not
+	 * be restarted after reset.
+	 */
+	if (atomic_read(&qp->qp_status.flags) == QP_STOP) {
+		qp->is_resetting = false;
+		return 0;
+	}
+
+	if (!qm_qp_avail_state(qp->qm, qp, QP_STOP))
+		return -EPERM;
+
+	atomic_set(&qp->qp_status.flags, QP_STOP);
+
+	ret = qm_drain_qp(qp);
 	if (ret)
-		return ret;
+		dev_err(dev, "Failed to drain out data for stopping!\n");
 
-	dev_dbg(dev, "queue %d started\n", qp_id);
+	if (qp->qm->wq)
+		flush_workqueue(qp->qm->wq);
+	else
+		flush_work(&qp->qm->work);
+
+	dev_dbg(dev, "stop queue %u!", qp->qp_id);
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(hisi_qm_start_qp);
 
 /**
  * hisi_qm_stop_qp() - Stop a qp in qm.
@@ -1336,27 +1955,13 @@ EXPORT_SYMBOL_GPL(hisi_qm_start_qp);
  */
 int hisi_qm_stop_qp(struct hisi_qp *qp)
 {
-	struct device *dev = &qp->qm->pdev->dev;
-	int i = 0;
-
-	/* it is stopped */
-	if (test_bit(QP_STOP, &qp->qp_status.flags))
-		return 0;
-
-	while (atomic_read(&qp->qp_status.used)) {
-		i++;
-		msleep(20);
-		if (i == 10) {
-			dev_err(dev, "Cannot drain out data for stopping, Force to stop!\n");
-			return 0;
-		}
-	}
-
-	set_bit(QP_STOP, &qp->qp_status.flags);
+	int ret;
 
-	dev_dbg(dev, "stop queue %u!", qp->qp_id);
+	down_write(&qp->qm->qps_lock);
+	ret = qm_stop_qp_nolock(qp);
+	up_write(&qp->qm->qps_lock);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(hisi_qm_stop_qp);
 
@@ -1367,6 +1972,13 @@ EXPORT_SYMBOL_GPL(hisi_qm_stop_qp);
  *
  * This function will return -EBUSY if qp is currently full, and -EAGAIN
  * if qp related qm is resetting.
+ *
+ * Note: This function may run with qm_irq_thread and ACC reset at same time.
+ *       It has no race with qm_irq_thread. However, during hisi_qp_send, ACC
+ *       reset may happen, we have no lock here considering performance. This
+ *       causes current qm_db sending fail or can not receive sended sqe. QM
+ *       sync/async receive function should handle the error sqe. ACC reset
+ *       done function should clear used sqe to 0.
  */
 int hisi_qp_send(struct hisi_qp *qp, const void *msg)
 {
@@ -1375,7 +1987,9 @@ int hisi_qp_send(struct hisi_qp *qp, const void *msg)
 	u16 sq_tail_next = (sq_tail + 1) % QM_Q_DEPTH;
 	void *sqe = qm_get_avail_sqe(qp);
 
-	if (unlikely(test_bit(QP_STOP, &qp->qp_status.flags))) {
+	if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP ||
+		     atomic_read(&qp->qm->status.flags) == QM_STOP ||
+		     qp->is_resetting)) {
 		dev_info(&qp->qm->pdev->dev, "QP is stopped or resetting\n");
 		return -EAGAIN;
 	}
@@ -1397,12 +2011,13 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm)
 {
 	unsigned int val;
 
-	if (qm->ver == QM_HW_V2) {
-		writel(0x1, qm->io_base + QM_CACHE_WB_START);
-		if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
-					       val, val & BIT(0), 10, 1000))
-			dev_err(&qm->pdev->dev, "QM writeback sqc cache fail!\n");
-	}
+	if (qm->ver == QM_HW_V1)
+		return;
+
+	writel(0x1, qm->io_base + QM_CACHE_WB_START);
+	if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
+					    val, val & BIT(0), 10, 1000))
+		dev_err(&qm->pdev->dev, "QM writeback sqc cache fail!\n");
 }
 
 static void qm_qp_event_notifier(struct hisi_qp *qp)
@@ -1412,16 +2027,7 @@ static void qm_qp_event_notifier(struct hisi_qp *qp)
 
 static int hisi_qm_get_available_instances(struct uacce_device *uacce)
 {
-	int i, ret;
-	struct hisi_qm *qm = uacce->priv;
-
-	read_lock(&qm->qps_lock);
-	for (i = 0, ret = 0; i < qm->qp_num; i++)
-		if (!qm->qp_array[i])
-			ret++;
-	read_unlock(&qm->qps_lock);
-
-	return ret;
+	return hisi_qm_get_free_qp_num(uacce->priv);
 }
 
 static int hisi_qm_uacce_get_queue(struct uacce_device *uacce,
@@ -1468,12 +2074,12 @@ static int hisi_qm_uacce_mmap(struct uacce_queue *q,
 
 	switch (qfr->type) {
 	case UACCE_QFRT_MMIO:
-		if (qm->ver == QM_HW_V2) {
-			if (sz > PAGE_SIZE * (QM_DOORBELL_PAGE_NR +
-			    QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE))
+		if (qm->ver == QM_HW_V1) {
+			if (sz > PAGE_SIZE * QM_DOORBELL_PAGE_NR)
 				return -EINVAL;
 		} else {
-			if (sz > PAGE_SIZE * QM_DOORBELL_PAGE_NR)
+			if (sz > PAGE_SIZE * (QM_DOORBELL_PAGE_NR +
+			    QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE))
 				return -EINVAL;
 		}
 
@@ -1519,9 +2125,9 @@ static int qm_set_sqctype(struct uacce_queue *q, u16 type)
 	struct hisi_qm *qm = q->uacce->priv;
 	struct hisi_qp *qp = q->priv;
 
-	write_lock(&qm->qps_lock);
+	down_write(&qm->qps_lock);
 	qp->alg_type = type;
-	write_unlock(&qm->qps_lock);
+	up_write(&qm->qps_lock);
 
 	return 0;
 }
@@ -1623,107 +2229,121 @@ int hisi_qm_get_free_qp_num(struct hisi_qm *qm)
 {
 	int ret;
 
-	read_lock(&qm->qps_lock);
+	down_read(&qm->qps_lock);
 	ret = qm->qp_num - qm->qp_in_used;
-	read_unlock(&qm->qps_lock);
+	up_read(&qm->qps_lock);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(hisi_qm_get_free_qp_num);
 
-/**
- * hisi_qm_init() - Initialize configures about qm.
- * @qm: The qm needing init.
- *
- * This function init qm, then we can call hisi_qm_start to put qm into work.
- */
-int hisi_qm_init(struct hisi_qm *qm)
+static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
 {
-	struct pci_dev *pdev = qm->pdev;
-	struct device *dev = &pdev->dev;
-	unsigned int num_vec;
-	int ret;
+	struct device *dev = &qm->pdev->dev;
+	struct qm_dma *qdma;
+	int i;
 
-	switch (qm->ver) {
-	case QM_HW_V1:
-		qm->ops = &qm_hw_ops_v1;
-		break;
-	case QM_HW_V2:
-		qm->ops = &qm_hw_ops_v2;
-		break;
-	default:
-		return -EINVAL;
+	for (i = num - 1; i >= 0; i--) {
+		qdma = &qm->qp_array[i].qdma;
+		dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
 	}
 
-	ret = qm_alloc_uacce(qm);
-	if (ret < 0)
-		dev_warn(&pdev->dev, "fail to alloc uacce (%d)\n", ret);
+	kfree(qm->qp_array);
+}
 
-	ret = pci_enable_device_mem(pdev);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to enable device mem!\n");
-		goto err_remove_uacce;
-	}
+static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
+{
+	struct device *dev = &qm->pdev->dev;
+	size_t off = qm->sqe_size * QM_Q_DEPTH;
+	struct hisi_qp *qp;
 
-	ret = pci_request_mem_regions(pdev, qm->dev_name);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to request mem regions!\n");
-		goto err_disable_pcidev;
-	}
+	qp = &qm->qp_array[id];
+	qp->qdma.va = dma_alloc_coherent(dev, dma_size, &qp->qdma.dma,
+					 GFP_KERNEL);
+	if (!qp->qdma.va)
+		return -ENOMEM;
 
-	qm->phys_base = pci_resource_start(pdev, PCI_BAR_2);
-	qm->phys_size = pci_resource_len(qm->pdev, PCI_BAR_2);
-	qm->io_base = ioremap(qm->phys_base, qm->phys_size);
-	if (!qm->io_base) {
-		ret = -EIO;
-		goto err_release_mem_regions;
-	}
+	qp->sqe = qp->qdma.va;
+	qp->sqe_dma = qp->qdma.dma;
+	qp->cqe = qp->qdma.va + off;
+	qp->cqe_dma = qp->qdma.dma + off;
+	qp->qdma.size = dma_size;
+	qp->qm = qm;
+	qp->qp_id = id;
 
-	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
-	if (ret < 0)
-		goto err_iounmap;
-	pci_set_master(pdev);
+	return 0;
+}
 
-	if (!qm->ops->get_irq_num) {
-		ret = -EOPNOTSUPP;
-		goto err_iounmap;
+static int hisi_qm_memory_init(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+	size_t qp_dma_size, off = 0;
+	int i, ret = 0;
+
+#define QM_INIT_BUF(qm, type, num) do { \
+	(qm)->type = ((qm)->qdma.va + (off)); \
+	(qm)->type##_dma = (qm)->qdma.dma + (off); \
+	off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \
+} while (0)
+
+	idr_init(&qm->qp_idr);
+	qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_Q_DEPTH) +
+			QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) +
+			QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) +
+			QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num);
+	qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size, &qm->qdma.dma,
+					 GFP_ATOMIC);
+	dev_dbg(dev, "allocate qm dma buf size=%zx)\n", qm->qdma.size);
+	if (!qm->qdma.va)
+		return -ENOMEM;
+
+	QM_INIT_BUF(qm, eqe, QM_Q_DEPTH);
+	QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH);
+	QM_INIT_BUF(qm, sqc, qm->qp_num);
+	QM_INIT_BUF(qm, cqc, qm->qp_num);
+
+	qm->qp_array = kcalloc(qm->qp_num, sizeof(struct hisi_qp), GFP_KERNEL);
+	if (!qm->qp_array) {
+		ret = -ENOMEM;
+		goto err_alloc_qp_array;
 	}
-	num_vec = qm->ops->get_irq_num(qm);
-	ret = pci_alloc_irq_vectors(pdev, num_vec, num_vec, PCI_IRQ_MSI);
-	if (ret < 0) {
-		dev_err(dev, "Failed to enable MSI vectors!\n");
-		goto err_iounmap;
+
+	/* one more page for device or qp statuses */
+	qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
+		      sizeof(struct qm_cqe) * QM_Q_DEPTH;
+	qp_dma_size = PAGE_ALIGN(qp_dma_size);
+	for (i = 0; i < qm->qp_num; i++) {
+		ret = hisi_qp_memory_init(qm, qp_dma_size, i);
+		if (ret)
+			goto err_init_qp_mem;
+
+		dev_dbg(dev, "allocate qp dma buf size=%zx)\n", qp_dma_size);
 	}
 
-	ret = qm_irq_register(qm);
-	if (ret)
-		goto err_free_irq_vectors;
+	return ret;
 
-	qm->qp_in_used = 0;
-	mutex_init(&qm->mailbox_lock);
-	rwlock_init(&qm->qps_lock);
-	INIT_WORK(&qm->work, qm_work_process);
+err_init_qp_mem:
+	hisi_qp_memory_uninit(qm, i);
+err_alloc_qp_array:
+	dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma);
 
-	dev_dbg(dev, "init qm %s with %s\n", pdev->is_physfn ? "pf" : "vf",
-		qm->use_dma_api ? "dma api" : "iommu api");
+	return ret;
+}
 
-	return 0;
+static void hisi_qm_pre_init(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
 
-err_free_irq_vectors:
-	pci_free_irq_vectors(pdev);
-err_iounmap:
-	iounmap(qm->io_base);
-err_release_mem_regions:
-	pci_release_mem_regions(pdev);
-err_disable_pcidev:
-	pci_disable_device(pdev);
-err_remove_uacce:
-	uacce_remove(qm->uacce);
-	qm->uacce = NULL;
+	if (qm->ver == QM_HW_V1)
+		qm->ops = &qm_hw_ops_v1;
+	else
+		qm->ops = &qm_hw_ops_v2;
 
-	return ret;
+	pci_set_drvdata(pdev, qm);
+	mutex_init(&qm->mailbox_lock);
+	init_rwsem(&qm->qps_lock);
+	qm->qp_in_used = 0;
 }
-EXPORT_SYMBOL_GPL(hisi_qm_init);
 
 /**
  * hisi_qm_uninit() - Uninitialize qm.
@@ -1736,10 +2356,20 @@ void hisi_qm_uninit(struct hisi_qm *qm)
 	struct pci_dev *pdev = qm->pdev;
 	struct device *dev = &pdev->dev;
 
+	down_write(&qm->qps_lock);
+
+	if (!qm_avail_state(qm, QM_CLOSE)) {
+		up_write(&qm->qps_lock);
+		return;
+	}
+
 	uacce_remove(qm->uacce);
 	qm->uacce = NULL;
 
-	if (qm->use_dma_api && qm->qdma.va) {
+	hisi_qp_memory_uninit(qm, qm->qp_num);
+	idr_destroy(&qm->qp_idr);
+
+	if (qm->qdma.va) {
 		hisi_qm_cache_wb(qm);
 		dma_free_coherent(dev, qm->qdma.size,
 				  qm->qdma.va, qm->qdma.dma);
@@ -1751,6 +2381,8 @@ void hisi_qm_uninit(struct hisi_qm *qm)
 	iounmap(qm->io_base);
 	pci_release_mem_regions(pdev);
 	pci_disable_device(pdev);
+
+	up_write(&qm->qps_lock);
 }
 EXPORT_SYMBOL_GPL(hisi_qm_uninit);
 
@@ -1781,12 +2413,6 @@ int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
 EXPORT_SYMBOL_GPL(hisi_qm_get_vft);
 
 /**
- * hisi_qm_set_vft() - Set "virtual function table" for a qm.
- * @fun_num: Number of operated function.
- * @qm: The qm in which to set vft, alway in a PF.
- * @base: The base number of queue in vft.
- * @number: The number of queues in vft. 0 means invalid vft.
- *
  * This function is alway called in PF driver, it is used to assign queues
  * among PF and VFs.
  *
@@ -1794,7 +2420,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_get_vft);
  * Assign queues A~B to VF: hisi_qm_set_vft(qm, 2, A, B - A + 1)
  * (VF function number 0x2)
  */
-int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
+static int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
 		    u32 number)
 {
 	u32 max_q_num = qm->ctrl_qp_num;
@@ -1805,7 +2431,6 @@ int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
 
 	return qm_set_sqc_cqc_vft(qm, fun_num, base, number);
 }
-EXPORT_SYMBOL_GPL(hisi_qm_set_vft);
 
 static void qm_init_eq_aeq_status(struct hisi_qm *qm)
 {
@@ -1872,22 +2497,10 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm)
 
 static int __hisi_qm_start(struct hisi_qm *qm)
 {
-	struct pci_dev *pdev = qm->pdev;
-	struct device *dev = &pdev->dev;
-	size_t off = 0;
 	int ret;
 
-#define QM_INIT_BUF(qm, type, num) do { \
-	(qm)->type = ((qm)->qdma.va + (off)); \
-	(qm)->type##_dma = (qm)->qdma.dma + (off); \
-	off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \
-} while (0)
-
 	WARN_ON(!qm->qdma.dma);
 
-	if (qm->qp_num == 0)
-		return -EINVAL;
-
 	if (qm->fun_type == QM_HW_PF) {
 		ret = qm_dev_mem_reset(qm);
 		if (ret)
@@ -1898,21 +2511,6 @@ static int __hisi_qm_start(struct hisi_qm *qm)
 			return ret;
 	}
 
-	QM_INIT_BUF(qm, eqe, QM_Q_DEPTH);
-	QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH);
-	QM_INIT_BUF(qm, sqc, qm->qp_num);
-	QM_INIT_BUF(qm, cqc, qm->qp_num);
-
-	dev_dbg(dev, "init qm buffer:\n"
-		     " eqe	(%pK, %lx)\n"
-		     " aeqe	(%pK, %lx)\n"
-		     " sqc	(%pK, %lx)\n"
-		     " cqc	(%pK, %lx)\n",
-		     qm->eqe, (unsigned long)qm->eqe_dma,
-		     qm->aeqe, (unsigned long)qm->aeqe_dma,
-		     qm->sqc, (unsigned long)qm->sqc_dma,
-		     qm->cqc, (unsigned long)qm->cqc_dma);
-
 	ret = qm_eq_ctx_cfg(qm);
 	if (ret)
 		return ret;
@@ -1940,43 +2538,102 @@ static int __hisi_qm_start(struct hisi_qm *qm)
 int hisi_qm_start(struct hisi_qm *qm)
 {
 	struct device *dev = &qm->pdev->dev;
+	int ret = 0;
+
+	down_write(&qm->qps_lock);
+
+	if (!qm_avail_state(qm, QM_START)) {
+		up_write(&qm->qps_lock);
+		return -EPERM;
+	}
 
 	dev_dbg(dev, "qm start with %d queue pairs\n", qm->qp_num);
 
 	if (!qm->qp_num) {
 		dev_err(dev, "qp_num should not be 0\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_unlock;
 	}
 
-	if (!qm->qp_bitmap) {
-		qm->qp_bitmap = devm_kcalloc(dev, BITS_TO_LONGS(qm->qp_num),
-					     sizeof(long), GFP_KERNEL);
-		qm->qp_array = devm_kcalloc(dev, qm->qp_num,
-					    sizeof(struct hisi_qp *),
-					    GFP_KERNEL);
-		if (!qm->qp_bitmap || !qm->qp_array)
-			return -ENOMEM;
+	ret = __hisi_qm_start(qm);
+	if (!ret)
+		atomic_set(&qm->status.flags, QM_START);
+
+err_unlock:
+	up_write(&qm->qps_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_start);
+
+static int qm_restart(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct hisi_qp *qp;
+	int ret, i;
+
+	ret = hisi_qm_start(qm);
+	if (ret < 0)
+		return ret;
+
+	down_write(&qm->qps_lock);
+	for (i = 0; i < qm->qp_num; i++) {
+		qp = &qm->qp_array[i];
+		if (atomic_read(&qp->qp_status.flags) == QP_STOP &&
+		    qp->is_resetting == true) {
+			ret = qm_start_qp_nolock(qp, 0);
+			if (ret < 0) {
+				dev_err(dev, "Failed to start qp%d!\n", i);
+
+				up_write(&qm->qps_lock);
+				return ret;
+			}
+			qp->is_resetting = false;
+		}
 	}
+	up_write(&qm->qps_lock);
 
-	if (!qm->use_dma_api) {
-		dev_dbg(&qm->pdev->dev, "qm delay start\n");
-		return 0;
-	} else if (!qm->qdma.va) {
-		qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_Q_DEPTH) +
-				QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) +
-				QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) +
-				QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num);
-		qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size,
-						 &qm->qdma.dma, GFP_KERNEL);
-		dev_dbg(dev, "allocate qm dma buf(va=%pK, dma=%pad, size=%zx)\n",
-			qm->qdma.va, &qm->qdma.dma, qm->qdma.size);
-		if (!qm->qdma.va)
-			return -ENOMEM;
+	return 0;
+}
+
+/* Stop started qps in reset flow */
+static int qm_stop_started_qp(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct hisi_qp *qp;
+	int i, ret;
+
+	for (i = 0; i < qm->qp_num; i++) {
+		qp = &qm->qp_array[i];
+		if (qp && atomic_read(&qp->qp_status.flags) == QP_START) {
+			qp->is_resetting = true;
+			ret = qm_stop_qp_nolock(qp);
+			if (ret < 0) {
+				dev_err(dev, "Failed to stop qp%d!\n", i);
+				return ret;
+			}
+		}
 	}
 
-	return __hisi_qm_start(qm);
+	return 0;
+}
+
+/**
+ * This function clears all queues memory in a qm. Reset of accelerator can
+ * use this to clear queues.
+ */
+static void qm_clear_queues(struct hisi_qm *qm)
+{
+	struct hisi_qp *qp;
+	int i;
+
+	for (i = 0; i < qm->qp_num; i++) {
+		qp = &qm->qp_array[i];
+		if (qp->is_resetting)
+			memset(qp->qdma.va, 0, qp->qdma.size);
+	}
+
+	memset(qm->qdma.va, 0, qm->qdma.size);
 }
-EXPORT_SYMBOL_GPL(hisi_qm_start);
 
 /**
  * hisi_qm_stop() - Stop a qm.
@@ -1988,43 +2645,98 @@ EXPORT_SYMBOL_GPL(hisi_qm_start);
  */
 int hisi_qm_stop(struct hisi_qm *qm)
 {
-	struct device *dev;
-	struct hisi_qp *qp;
-	int ret = 0, i;
+	struct device *dev = &qm->pdev->dev;
+	int ret = 0;
 
-	if (!qm || !qm->pdev) {
-		WARN_ON(1);
-		return -EINVAL;
+	down_write(&qm->qps_lock);
+
+	if (!qm_avail_state(qm, QM_STOP)) {
+		ret = -EPERM;
+		goto err_unlock;
 	}
 
-	dev = &qm->pdev->dev;
+	if (qm->status.stop_reason == QM_SOFT_RESET ||
+	    qm->status.stop_reason == QM_FLR) {
+		ret = qm_stop_started_qp(qm);
+		if (ret < 0) {
+			dev_err(dev, "Failed to stop started qp!\n");
+			goto err_unlock;
+		}
+	}
 
 	/* Mask eq and aeq irq */
 	writel(0x1, qm->io_base + QM_VF_EQ_INT_MASK);
 	writel(0x1, qm->io_base + QM_VF_AEQ_INT_MASK);
 
-	/* Stop all qps belong to this qm */
-	for (i = 0; i < qm->qp_num; i++) {
-		qp = qm->qp_array[i];
-		if (qp) {
-			ret = hisi_qm_stop_qp(qp);
-			if (ret < 0) {
-				dev_err(dev, "Failed to stop qp%d!\n", i);
-				return -EBUSY;
-			}
-		}
-	}
-
 	if (qm->fun_type == QM_HW_PF) {
 		ret = hisi_qm_set_vft(qm, 0, 0, 0);
-		if (ret < 0)
+		if (ret < 0) {
 			dev_err(dev, "Failed to set vft!\n");
+			ret = -EBUSY;
+			goto err_unlock;
+		}
 	}
 
+	qm_clear_queues(qm);
+	atomic_set(&qm->status.flags, QM_STOP);
+
+err_unlock:
+	up_write(&qm->qps_lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(hisi_qm_stop);
 
+static ssize_t qm_status_read(struct file *filp, char __user *buffer,
+			      size_t count, loff_t *pos)
+{
+	struct hisi_qm *qm = filp->private_data;
+	char buf[QM_DBG_READ_LEN];
+	int val, cp_len, len;
+
+	if (*pos)
+		return 0;
+
+	if (count < QM_DBG_READ_LEN)
+		return -ENOSPC;
+
+	val = atomic_read(&qm->status.flags);
+	len = snprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]);
+	if (!len)
+		return -EFAULT;
+
+	cp_len = copy_to_user(buffer, buf, len);
+	if (cp_len)
+		return -EFAULT;
+
+	return (*pos = len);
+}
+
+static const struct file_operations qm_status_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = qm_status_read,
+};
+
+static int qm_debugfs_atomic64_set(void *data, u64 val)
+{
+	if (val)
+		return -EINVAL;
+
+	atomic64_set((atomic64_t *)data, 0);
+
+	return 0;
+}
+
+static int qm_debugfs_atomic64_get(void *data, u64 *val)
+{
+	*val = atomic64_read((atomic64_t *)data);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get,
+			 qm_debugfs_atomic64_set, "%llu\n");
+
 /**
  * hisi_qm_debug_init() - Initialize qm related debugfs files.
  * @qm: The qm for which we want to add debugfs files.
@@ -2033,7 +2745,9 @@ EXPORT_SYMBOL_GPL(hisi_qm_stop);
  */
 int hisi_qm_debug_init(struct hisi_qm *qm)
 {
+	struct qm_dfx *dfx = &qm->debug.dfx;
 	struct dentry *qm_d;
+	void *data;
 	int i, ret;
 
 	qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
@@ -2047,7 +2761,20 @@ int hisi_qm_debug_init(struct hisi_qm *qm)
 				goto failed_to_create;
 			}
 
-	debugfs_create_file("qm_regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
+	debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
+
+	debugfs_create_file("cmd", 0444, qm->debug.qm_d, qm, &qm_cmd_fops);
+
+	debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
+			&qm_status_fops);
+	for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
+		data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
+		debugfs_create_file(qm_dfx_files[i].name,
+			0644,
+			qm_d,
+			data,
+			&qm_atomic64_ops);
+	}
 
 	return 0;
 
@@ -2095,8 +2822,7 @@ static void qm_hw_error_init(struct hisi_qm *qm)
 		return;
 	}
 
-	qm->ops->hw_error_init(qm, err_info->ce, err_info->nfe,
-			       err_info->fe, err_info->msi);
+	qm->ops->hw_error_init(qm, err_info->ce, err_info->nfe, err_info->fe);
 }
 
 static void qm_hw_error_uninit(struct hisi_qm *qm)
@@ -2109,36 +2835,17 @@ static void qm_hw_error_uninit(struct hisi_qm *qm)
 	qm->ops->hw_error_uninit(qm);
 }
 
-static pci_ers_result_t qm_hw_error_handle(struct hisi_qm *qm)
+static enum acc_err_result qm_hw_error_handle(struct hisi_qm *qm)
 {
 	if (!qm->ops->hw_error_handle) {
 		dev_err(&qm->pdev->dev, "QM doesn't support hw error report!\n");
-		return PCI_ERS_RESULT_NONE;
+		return ACC_ERR_NONE;
 	}
 
 	return qm->ops->hw_error_handle(qm);
 }
 
 /**
- * hisi_qm_get_hw_version() - Get hardware version of a qm.
- * @pdev: The device which hardware version we want to get.
- *
- * This function gets the hardware version of a qm. Return QM_HW_UNKNOWN
- * if the hardware version is not supported.
- */
-enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev)
-{
-	switch (pdev->revision) {
-	case QM_HW_V1:
-	case QM_HW_V2:
-		return pdev->revision;
-	default:
-		return QM_HW_UNKNOWN;
-	}
-}
-EXPORT_SYMBOL_GPL(hisi_qm_get_hw_version);
-
-/**
  * hisi_qm_dev_err_init() - Initialize device error configuration.
  * @qm: The qm for which we want to do error initialization.
  *
@@ -2299,34 +3006,163 @@ err:
 }
 EXPORT_SYMBOL_GPL(hisi_qm_alloc_qps_node);
 
-static pci_ers_result_t qm_dev_err_handle(struct hisi_qm *qm)
+static int qm_vf_q_assign(struct hisi_qm *qm, u32 num_vfs)
+{
+	u32 remain_q_num, q_num, i, j;
+	u32 q_base = qm->qp_num;
+	int ret;
+
+	if (!num_vfs)
+		return -EINVAL;
+
+	remain_q_num = qm->ctrl_qp_num - qm->qp_num;
+
+	/* If remain queues not enough, return error. */
+	if (qm->ctrl_qp_num < qm->qp_num || remain_q_num < num_vfs)
+		return -EINVAL;
+
+	q_num = remain_q_num / num_vfs;
+	for (i = 1; i <= num_vfs; i++) {
+		if (i == num_vfs)
+			q_num += remain_q_num % num_vfs;
+		ret = hisi_qm_set_vft(qm, i, q_base, q_num);
+		if (ret) {
+			for (j = i; j > 0; j--)
+				hisi_qm_set_vft(qm, j, 0, 0);
+			return ret;
+		}
+		q_base += q_num;
+	}
+
+	return 0;
+}
+
+static int qm_clear_vft_config(struct hisi_qm *qm)
+{
+	int ret;
+	u32 i;
+
+	for (i = 1; i <= qm->vfs_num; i++) {
+		ret = hisi_qm_set_vft(qm, i, 0, 0);
+		if (ret)
+			return ret;
+	}
+	qm->vfs_num = 0;
+
+	return 0;
+}
+
+/**
+ * hisi_qm_sriov_enable() - enable virtual functions
+ * @pdev: the PCIe device
+ * @max_vfs: the number of virtual functions to enable
+ *
+ * Returns the number of enabled VFs. If there are VFs enabled already or
+ * max_vfs is more than the total number of device can be enabled, returns
+ * failure.
+ */
+int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs)
+{
+	struct hisi_qm *qm = pci_get_drvdata(pdev);
+	int pre_existing_vfs, num_vfs, total_vfs, ret;
+
+	total_vfs = pci_sriov_get_totalvfs(pdev);
+	pre_existing_vfs = pci_num_vf(pdev);
+	if (pre_existing_vfs) {
+		pci_err(pdev, "%d VFs already enabled. Please disable pre-enabled VFs!\n",
+			pre_existing_vfs);
+		return 0;
+	}
+
+	num_vfs = min_t(int, max_vfs, total_vfs);
+	ret = qm_vf_q_assign(qm, num_vfs);
+	if (ret) {
+		pci_err(pdev, "Can't assign queues for VF!\n");
+		return ret;
+	}
+
+	qm->vfs_num = num_vfs;
+
+	ret = pci_enable_sriov(pdev, num_vfs);
+	if (ret) {
+		pci_err(pdev, "Can't enable VF!\n");
+		qm_clear_vft_config(qm);
+		return ret;
+	}
+
+	pci_info(pdev, "VF enabled, vfs_num(=%d)!\n", num_vfs);
+
+	return num_vfs;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_sriov_enable);
+
+/**
+ * hisi_qm_sriov_disable - disable virtual functions
+ * @pdev: the PCI device
+ *
+ * Return failure if there are VFs assigned already.
+ */
+int hisi_qm_sriov_disable(struct pci_dev *pdev)
+{
+	struct hisi_qm *qm = pci_get_drvdata(pdev);
+
+	if (pci_vfs_assigned(pdev)) {
+		pci_err(pdev, "Failed to disable VFs as VFs are assigned!\n");
+		return -EPERM;
+	}
+
+	/* remove in hpre_pci_driver will be called to free VF resources */
+	pci_disable_sriov(pdev);
+	return qm_clear_vft_config(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable);
+
+/**
+ * hisi_qm_sriov_configure - configure the number of VFs
+ * @pdev: The PCI device
+ * @num_vfs: The number of VFs need enabled
+ *
+ * Enable SR-IOV according to num_vfs, 0 means disable.
+ */
+int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	if (num_vfs == 0)
+		return hisi_qm_sriov_disable(pdev);
+	else
+		return hisi_qm_sriov_enable(pdev, num_vfs);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_sriov_configure);
+
+static enum acc_err_result qm_dev_err_handle(struct hisi_qm *qm)
 {
 	u32 err_sts;
 
 	if (!qm->err_ini->get_dev_hw_err_status) {
 		dev_err(&qm->pdev->dev, "Device doesn't support get hw error status!\n");
-		return PCI_ERS_RESULT_NONE;
+		return ACC_ERR_NONE;
 	}
 
 	/* get device hardware error status */
 	err_sts = qm->err_ini->get_dev_hw_err_status(qm);
 	if (err_sts) {
+		if (err_sts & qm->err_ini->err_info.ecc_2bits_mask)
+			qm->err_status.is_dev_ecc_mbit = true;
+
 		if (!qm->err_ini->log_dev_hw_err) {
 			dev_err(&qm->pdev->dev, "Device doesn't support log hw error!\n");
-			return PCI_ERS_RESULT_NEED_RESET;
+			return ACC_ERR_NEED_RESET;
 		}
 
 		qm->err_ini->log_dev_hw_err(qm, err_sts);
-		return PCI_ERS_RESULT_NEED_RESET;
+		return ACC_ERR_NEED_RESET;
 	}
 
-	return PCI_ERS_RESULT_RECOVERED;
+	return ACC_ERR_RECOVERED;
 }
 
-static pci_ers_result_t qm_process_dev_error(struct pci_dev *pdev)
+static enum acc_err_result qm_process_dev_error(struct hisi_qm *qm)
 {
-	struct hisi_qm *qm = pci_get_drvdata(pdev);
-	pci_ers_result_t qm_ret, dev_ret;
+	enum acc_err_result qm_ret, dev_ret;
 
 	/* log qm error */
 	qm_ret = qm_hw_error_handle(qm);
@@ -2334,9 +3170,9 @@ static pci_ers_result_t qm_process_dev_error(struct pci_dev *pdev)
 	/* log device error */
 	dev_ret = qm_dev_err_handle(qm);
 
-	return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
-		dev_ret == PCI_ERS_RESULT_NEED_RESET) ?
-		PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
+	return (qm_ret == ACC_ERR_NEED_RESET ||
+		dev_ret == ACC_ERR_NEED_RESET) ?
+		ACC_ERR_NEED_RESET : ACC_ERR_RECOVERED;
 }
 
 /**
@@ -2350,6 +3186,9 @@ static pci_ers_result_t qm_process_dev_error(struct pci_dev *pdev)
 pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
 					  pci_channel_state_t state)
 {
+	struct hisi_qm *qm = pci_get_drvdata(pdev);
+	enum acc_err_result ret;
+
 	if (pdev->is_virtfn)
 		return PCI_ERS_RESULT_NONE;
 
@@ -2357,10 +3196,756 @@ pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
 	if (state == pci_channel_io_perm_failure)
 		return PCI_ERS_RESULT_DISCONNECT;
 
-	return qm_process_dev_error(pdev);
+	ret = qm_process_dev_error(qm);
+	if (ret == ACC_ERR_NEED_RESET)
+		return PCI_ERS_RESULT_NEED_RESET;
+
+	return PCI_ERS_RESULT_RECOVERED;
 }
 EXPORT_SYMBOL_GPL(hisi_qm_dev_err_detected);
 
+static int qm_get_hw_error_status(struct hisi_qm *qm)
+{
+	return readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
+}
+
+static int qm_check_req_recv(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	int ret;
+	u32 val;
+
+	writel(ACC_VENDOR_ID_VALUE, qm->io_base + QM_PEH_VENDOR_ID);
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_VENDOR_ID, val,
+					 (val == ACC_VENDOR_ID_VALUE),
+					 POLL_PERIOD, POLL_TIMEOUT);
+	if (ret) {
+		dev_err(&pdev->dev, "Fails to read QM reg!\n");
+		return ret;
+	}
+
+	writel(PCI_VENDOR_ID_HUAWEI, qm->io_base + QM_PEH_VENDOR_ID);
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_VENDOR_ID, val,
+					 (val == PCI_VENDOR_ID_HUAWEI),
+					 POLL_PERIOD, POLL_TIMEOUT);
+	if (ret)
+		dev_err(&pdev->dev, "Fails to read QM reg in the second time!\n");
+
+	return ret;
+}
+
+static int qm_set_pf_mse(struct hisi_qm *qm, bool set)
+{
+	struct pci_dev *pdev = qm->pdev;
+	u16 cmd;
+	int i;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	if (set)
+		cmd |= PCI_COMMAND_MEMORY;
+	else
+		cmd &= ~PCI_COMMAND_MEMORY;
+
+	pci_write_config_word(pdev, PCI_COMMAND, cmd);
+	for (i = 0; i < MAX_WAIT_COUNTS; i++) {
+		pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+		if (set == ((cmd & PCI_COMMAND_MEMORY) >> 1))
+			return 0;
+
+		udelay(1);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int qm_set_vf_mse(struct hisi_qm *qm, bool set)
+{
+	struct pci_dev *pdev = qm->pdev;
+	u16 sriov_ctrl;
+	int pos;
+	int i;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+	pci_read_config_word(pdev, pos + PCI_SRIOV_CTRL, &sriov_ctrl);
+	if (set)
+		sriov_ctrl |= PCI_SRIOV_CTRL_MSE;
+	else
+		sriov_ctrl &= ~PCI_SRIOV_CTRL_MSE;
+	pci_write_config_word(pdev, pos + PCI_SRIOV_CTRL, sriov_ctrl);
+
+	for (i = 0; i < MAX_WAIT_COUNTS; i++) {
+		pci_read_config_word(pdev, pos + PCI_SRIOV_CTRL, &sriov_ctrl);
+		if (set == (sriov_ctrl & PCI_SRIOV_CTRL_MSE) >>
+		    ACC_PEH_SRIOV_CTRL_VF_MSE_SHIFT)
+			return 0;
+
+		udelay(1);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int qm_set_msi(struct hisi_qm *qm, bool set)
+{
+	struct pci_dev *pdev = qm->pdev;
+
+	if (set) {
+		pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64,
+				       0);
+	} else {
+		pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64,
+				       ACC_PEH_MSI_DISABLE);
+		if (qm->err_status.is_qm_ecc_mbit ||
+		    qm->err_status.is_dev_ecc_mbit)
+			return 0;
+
+		mdelay(1);
+		if (readl(qm->io_base + QM_PEH_DFX_INFO0))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int qm_vf_reset_prepare(struct hisi_qm *qm)
+{
+	struct hisi_qm_list *qm_list = qm->qm_list;
+	int stop_reason = qm->status.stop_reason;
+	struct pci_dev *pdev = qm->pdev;
+	struct pci_dev *virtfn;
+	struct hisi_qm *vf_qm;
+	int ret = 0;
+
+	mutex_lock(&qm_list->lock);
+	list_for_each_entry(vf_qm, &qm_list->list, list) {
+		virtfn = vf_qm->pdev;
+		if (virtfn == pdev)
+			continue;
+
+		if (pci_physfn(virtfn) == pdev) {
+			vf_qm->status.stop_reason = stop_reason;
+			ret = hisi_qm_stop(vf_qm);
+			if (ret)
+				goto stop_fail;
+		}
+	}
+
+stop_fail:
+	mutex_unlock(&qm_list->lock);
+	return ret;
+}
+
+static int qm_reset_prepare_ready(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+	int delay = 0;
+
+	/* All reset requests need to be queued for processing */
+	while (test_and_set_bit(QM_DEV_RESET_FLAG, &pf_qm->reset_flag)) {
+		msleep(++delay);
+		if (delay > QM_RESET_WAIT_TIMEOUT)
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int qm_controller_reset_prepare(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	int ret;
+
+	ret = qm_reset_prepare_ready(qm);
+	if (ret) {
+		pci_err(pdev, "Controller reset not ready!\n");
+		return ret;
+	}
+
+	if (qm->vfs_num) {
+		ret = qm_vf_reset_prepare(qm);
+		if (ret) {
+			pci_err(pdev, "Fails to stop VFs!\n");
+			return ret;
+		}
+	}
+
+	qm->status.stop_reason = QM_SOFT_RESET;
+	ret = hisi_qm_stop(qm);
+	if (ret) {
+		pci_err(pdev, "Fails to stop QM!\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm)
+{
+	u32 nfe_enb = 0;
+
+	if (!qm->err_status.is_dev_ecc_mbit &&
+	    qm->err_status.is_qm_ecc_mbit &&
+	    qm->err_ini->close_axi_master_ooo) {
+
+		qm->err_ini->close_axi_master_ooo(qm);
+
+	} else if (qm->err_status.is_dev_ecc_mbit &&
+		   !qm->err_status.is_qm_ecc_mbit &&
+		   !qm->err_ini->close_axi_master_ooo) {
+
+		nfe_enb = readl(qm->io_base + QM_RAS_NFE_ENABLE);
+		writel(nfe_enb & QM_RAS_NFE_MBIT_DISABLE,
+		       qm->io_base + QM_RAS_NFE_ENABLE);
+		writel(QM_ECC_MBIT, qm->io_base + QM_ABNORMAL_INT_SET);
+	}
+}
+
+static int qm_soft_reset(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	int ret;
+	u32 val;
+
+	/* Ensure all doorbells and mailboxes received by QM */
+	ret = qm_check_req_recv(qm);
+	if (ret)
+		return ret;
+
+	if (qm->vfs_num) {
+		ret = qm_set_vf_mse(qm, false);
+		if (ret) {
+			pci_err(pdev, "Fails to disable vf MSE bit.\n");
+			return ret;
+		}
+	}
+
+	ret = qm_set_msi(qm, false);
+	if (ret) {
+		pci_err(pdev, "Fails to disable PEH MSI bit.\n");
+		return ret;
+	}
+
+	qm_dev_ecc_mbit_handle(qm);
+
+	/* OOO register set and check */
+	writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN,
+	       qm->io_base + ACC_MASTER_GLOBAL_CTRL);
+
+	/* If bus lock, reset chip */
+	ret = readl_relaxed_poll_timeout(qm->io_base + ACC_MASTER_TRANS_RETURN,
+					 val,
+					 (val == ACC_MASTER_TRANS_RETURN_RW),
+					 POLL_PERIOD, POLL_TIMEOUT);
+	if (ret) {
+		pci_emerg(pdev, "Bus lock! Please reset system.\n");
+		return ret;
+	}
+
+	ret = qm_set_pf_mse(qm, false);
+	if (ret) {
+		pci_err(pdev, "Fails to disable pf MSE bit.\n");
+		return ret;
+	}
+
+	/* The reset related sub-control registers are not in PCI BAR */
+	if (ACPI_HANDLE(&pdev->dev)) {
+		unsigned long long value = 0;
+		acpi_status s;
+
+		s = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
+					  qm->err_ini->err_info.acpi_rst,
+					  NULL, &value);
+		if (ACPI_FAILURE(s)) {
+			pci_err(pdev, "NO controller reset method!\n");
+			return -EIO;
+		}
+
+		if (value) {
+			pci_err(pdev, "Reset step %llu failed!\n", value);
+			return -EIO;
+		}
+	} else {
+		pci_err(pdev, "No reset method!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int qm_vf_reset_done(struct hisi_qm *qm)
+{
+	struct hisi_qm_list *qm_list = qm->qm_list;
+	struct pci_dev *pdev = qm->pdev;
+	struct pci_dev *virtfn;
+	struct hisi_qm *vf_qm;
+	int ret = 0;
+
+	mutex_lock(&qm_list->lock);
+	list_for_each_entry(vf_qm, &qm_list->list, list) {
+		virtfn = vf_qm->pdev;
+		if (virtfn == pdev)
+			continue;
+
+		if (pci_physfn(virtfn) == pdev) {
+			ret = qm_restart(vf_qm);
+			if (ret)
+				goto restart_fail;
+		}
+	}
+
+restart_fail:
+	mutex_unlock(&qm_list->lock);
+	return ret;
+}
+
+static int qm_get_dev_err_status(struct hisi_qm *qm)
+{
+	return qm->err_ini->get_dev_hw_err_status(qm);
+}
+
+static int qm_dev_hw_init(struct hisi_qm *qm)
+{
+	return qm->err_ini->hw_init(qm);
+}
+
+static void qm_restart_prepare(struct hisi_qm *qm)
+{
+	u32 value;
+
+	if (!qm->err_status.is_qm_ecc_mbit &&
+	    !qm->err_status.is_dev_ecc_mbit)
+		return;
+
+	/* temporarily close the OOO port used for PEH to write out MSI */
+	value = readl(qm->io_base + ACC_AM_CFG_PORT_WR_EN);
+	writel(value & ~qm->err_ini->err_info.msi_wr_port,
+	       qm->io_base + ACC_AM_CFG_PORT_WR_EN);
+
+	/* clear dev ecc 2bit error source if having */
+	value = qm_get_dev_err_status(qm) &
+		qm->err_ini->err_info.ecc_2bits_mask;
+	if (value && qm->err_ini->clear_dev_hw_err_status)
+		qm->err_ini->clear_dev_hw_err_status(qm, value);
+
+	/* clear QM ecc mbit error source */
+	writel(QM_ECC_MBIT, qm->io_base + QM_ABNORMAL_INT_SOURCE);
+
+	/* clear AM Reorder Buffer ecc mbit source */
+	writel(ACC_ROB_ECC_ERR_MULTPL, qm->io_base + ACC_AM_ROB_ECC_INT_STS);
+
+	if (qm->err_ini->open_axi_master_ooo)
+		qm->err_ini->open_axi_master_ooo(qm);
+}
+
+static void qm_restart_done(struct hisi_qm *qm)
+{
+	u32 value;
+
+	if (!qm->err_status.is_qm_ecc_mbit &&
+	    !qm->err_status.is_dev_ecc_mbit)
+		return;
+
+	/* open the OOO port for PEH to write out MSI */
+	value = readl(qm->io_base + ACC_AM_CFG_PORT_WR_EN);
+	value |= qm->err_ini->err_info.msi_wr_port;
+	writel(value, qm->io_base + ACC_AM_CFG_PORT_WR_EN);
+
+	qm->err_status.is_qm_ecc_mbit = false;
+	qm->err_status.is_dev_ecc_mbit = false;
+}
+
+static int qm_controller_reset_done(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	int ret;
+
+	ret = qm_set_msi(qm, true);
+	if (ret) {
+		pci_err(pdev, "Fails to enable PEH MSI bit!\n");
+		return ret;
+	}
+
+	ret = qm_set_pf_mse(qm, true);
+	if (ret) {
+		pci_err(pdev, "Fails to enable pf MSE bit!\n");
+		return ret;
+	}
+
+	if (qm->vfs_num) {
+		ret = qm_set_vf_mse(qm, true);
+		if (ret) {
+			pci_err(pdev, "Fails to enable vf MSE bit!\n");
+			return ret;
+		}
+	}
+
+	ret = qm_dev_hw_init(qm);
+	if (ret) {
+		pci_err(pdev, "Failed to init device\n");
+		return ret;
+	}
+
+	qm_restart_prepare(qm);
+
+	ret = qm_restart(qm);
+	if (ret) {
+		pci_err(pdev, "Failed to start QM!\n");
+		return ret;
+	}
+
+	if (qm->vfs_num) {
+		ret = qm_vf_q_assign(qm, qm->vfs_num);
+		if (ret) {
+			pci_err(pdev, "Failed to assign queue!\n");
+			return ret;
+		}
+	}
+
+	ret = qm_vf_reset_done(qm);
+	if (ret) {
+		pci_err(pdev, "Failed to start VFs!\n");
+		return -EPERM;
+	}
+
+	hisi_qm_dev_err_init(qm);
+	qm_restart_done(qm);
+
+	clear_bit(QM_DEV_RESET_FLAG, &qm->reset_flag);
+
+	return 0;
+}
+
+static int qm_controller_reset(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	int ret;
+
+	pci_info(pdev, "Controller resetting...\n");
+
+	ret = qm_controller_reset_prepare(qm);
+	if (ret)
+		return ret;
+
+	ret = qm_soft_reset(qm);
+	if (ret) {
+		pci_err(pdev, "Controller reset failed (%d)\n", ret);
+		return ret;
+	}
+
+	ret = qm_controller_reset_done(qm);
+	if (ret)
+		return ret;
+
+	pci_info(pdev, "Controller reset complete\n");
+
+	return 0;
+}
+
+/**
+ * hisi_qm_dev_slot_reset() - slot reset
+ * @pdev: the PCIe device
+ *
+ * This function offers QM relate PCIe device reset interface. Drivers which
+ * use QM can use this function as slot_reset in its struct pci_error_handlers.
+ */
+pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev)
+{
+	struct hisi_qm *qm = pci_get_drvdata(pdev);
+	int ret;
+
+	if (pdev->is_virtfn)
+		return PCI_ERS_RESULT_RECOVERED;
+
+	pci_aer_clear_nonfatal_status(pdev);
+
+	/* reset pcie device controller */
+	ret = qm_controller_reset(qm);
+	if (ret) {
+		pci_err(pdev, "Controller reset failed (%d)\n", ret);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_dev_slot_reset);
+
+/* check the interrupt is ecc-mbit error or not */
+static int qm_check_dev_error(struct hisi_qm *qm)
+{
+	int ret;
+
+	if (qm->fun_type == QM_HW_VF)
+		return 0;
+
+	ret = qm_get_hw_error_status(qm) & QM_ECC_MBIT;
+	if (ret)
+		return ret;
+
+	return (qm_get_dev_err_status(qm) &
+		qm->err_ini->err_info.ecc_2bits_mask);
+}
+
+void hisi_qm_reset_prepare(struct pci_dev *pdev)
+{
+	struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+	struct hisi_qm *qm = pci_get_drvdata(pdev);
+	u32 delay = 0;
+	int ret;
+
+	hisi_qm_dev_err_uninit(pf_qm);
+
+	/*
+	 * Check whether there is an ECC mbit error, If it occurs, need to
+	 * wait for soft reset to fix it.
+	 */
+	while (qm_check_dev_error(pf_qm)) {
+		msleep(++delay);
+		if (delay > QM_RESET_WAIT_TIMEOUT)
+			return;
+	}
+
+	ret = qm_reset_prepare_ready(qm);
+	if (ret) {
+		pci_err(pdev, "FLR not ready!\n");
+		return;
+	}
+
+	if (qm->vfs_num) {
+		ret = qm_vf_reset_prepare(qm);
+		if (ret) {
+			pci_err(pdev, "Failed to prepare reset, ret = %d.\n",
+				ret);
+			return;
+		}
+	}
+
+	ret = hisi_qm_stop(qm);
+	if (ret) {
+		pci_err(pdev, "Failed to stop QM, ret = %d.\n", ret);
+		return;
+	}
+
+	pci_info(pdev, "FLR resetting...\n");
+}
+EXPORT_SYMBOL_GPL(hisi_qm_reset_prepare);
+
+static bool qm_flr_reset_complete(struct pci_dev *pdev)
+{
+	struct pci_dev *pf_pdev = pci_physfn(pdev);
+	struct hisi_qm *qm = pci_get_drvdata(pf_pdev);
+	u32 id;
+
+	pci_read_config_dword(qm->pdev, PCI_COMMAND, &id);
+	if (id == QM_PCI_COMMAND_INVALID) {
+		pci_err(pdev, "Device can not be used!\n");
+		return false;
+	}
+
+	clear_bit(QM_DEV_RESET_FLAG, &qm->reset_flag);
+
+	return true;
+}
+
+void hisi_qm_reset_done(struct pci_dev *pdev)
+{
+	struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+	struct hisi_qm *qm = pci_get_drvdata(pdev);
+	int ret;
+
+	hisi_qm_dev_err_init(pf_qm);
+
+	ret = qm_restart(qm);
+	if (ret) {
+		pci_err(pdev, "Failed to start QM, ret = %d.\n", ret);
+		goto flr_done;
+	}
+
+	if (qm->fun_type == QM_HW_PF) {
+		ret = qm_dev_hw_init(qm);
+		if (ret) {
+			pci_err(pdev, "Failed to init PF, ret = %d.\n", ret);
+			goto flr_done;
+		}
+
+		if (!qm->vfs_num)
+			goto flr_done;
+
+		ret = qm_vf_q_assign(qm, qm->vfs_num);
+		if (ret) {
+			pci_err(pdev, "Failed to assign VFs, ret = %d.\n", ret);
+			goto flr_done;
+		}
+
+		ret = qm_vf_reset_done(qm);
+		if (ret) {
+			pci_err(pdev, "Failed to start VFs, ret = %d.\n", ret);
+			goto flr_done;
+		}
+	}
+
+flr_done:
+	if (qm_flr_reset_complete(pdev))
+		pci_info(pdev, "FLR reset complete\n");
+}
+EXPORT_SYMBOL_GPL(hisi_qm_reset_done);
+
+static irqreturn_t qm_abnormal_irq(int irq, void *data)
+{
+	struct hisi_qm *qm = data;
+	enum acc_err_result ret;
+
+	atomic64_inc(&qm->debug.dfx.abnormal_irq_cnt);
+	ret = qm_process_dev_error(qm);
+	if (ret == ACC_ERR_NEED_RESET)
+		schedule_work(&qm->rst_work);
+
+	return IRQ_HANDLED;
+}
+
+static int qm_irq_register(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	int ret;
+
+	ret = request_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR),
+			  qm_irq, IRQF_SHARED, qm->dev_name, qm);
+	if (ret)
+		return ret;
+
+	if (qm->ver != QM_HW_V1) {
+		ret = request_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR),
+				  qm_aeq_irq, IRQF_SHARED, qm->dev_name, qm);
+		if (ret)
+			goto err_aeq_irq;
+
+		if (qm->fun_type == QM_HW_PF) {
+			ret = request_irq(pci_irq_vector(pdev,
+					  QM_ABNORMAL_EVENT_IRQ_VECTOR),
+					  qm_abnormal_irq, IRQF_SHARED,
+					  qm->dev_name, qm);
+			if (ret)
+				goto err_abonormal_irq;
+		}
+	}
+
+	return 0;
+
+err_abonormal_irq:
+	free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
+err_aeq_irq:
+	free_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR), qm);
+	return ret;
+}
+
+static void hisi_qm_controller_reset(struct work_struct *rst_work)
+{
+	struct hisi_qm *qm = container_of(rst_work, struct hisi_qm, rst_work);
+	int ret;
+
+	/* reset pcie device controller */
+	ret = qm_controller_reset(qm);
+	if (ret)
+		dev_err(&qm->pdev->dev, "controller reset failed (%d)\n", ret);
+
+}
+
+/**
+ * hisi_qm_init() - Initialize configures about qm.
+ * @qm: The qm needing init.
+ *
+ * This function init qm, then we can call hisi_qm_start to put qm into work.
+ */
+int hisi_qm_init(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	struct device *dev = &pdev->dev;
+	unsigned int num_vec;
+	int ret;
+
+	hisi_qm_pre_init(qm);
+
+	ret = qm_alloc_uacce(qm);
+	if (ret < 0)
+		dev_warn(&pdev->dev, "fail to alloc uacce (%d)\n", ret);
+
+	ret = pci_enable_device_mem(pdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to enable device mem!\n");
+		goto err_remove_uacce;
+	}
+
+	ret = pci_request_mem_regions(pdev, qm->dev_name);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to request mem regions!\n");
+		goto err_disable_pcidev;
+	}
+
+	qm->phys_base = pci_resource_start(pdev, PCI_BAR_2);
+	qm->phys_size = pci_resource_len(qm->pdev, PCI_BAR_2);
+	qm->io_base = ioremap(qm->phys_base, qm->phys_size);
+	if (!qm->io_base) {
+		ret = -EIO;
+		goto err_release_mem_regions;
+	}
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (ret < 0)
+		goto err_iounmap;
+	pci_set_master(pdev);
+
+	if (!qm->ops->get_irq_num) {
+		ret = -EOPNOTSUPP;
+		goto err_iounmap;
+	}
+	num_vec = qm->ops->get_irq_num(qm);
+	ret = pci_alloc_irq_vectors(pdev, num_vec, num_vec, PCI_IRQ_MSI);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enable MSI vectors!\n");
+		goto err_iounmap;
+	}
+
+	ret = qm_irq_register(qm);
+	if (ret)
+		goto err_free_irq_vectors;
+
+	if (qm->fun_type == QM_HW_VF && qm->ver != QM_HW_V1) {
+		/* v2 starts to support get vft by mailbox */
+		ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+		if (ret)
+			goto err_irq_unregister;
+	}
+
+	ret = hisi_qm_memory_init(qm);
+	if (ret)
+		goto err_irq_unregister;
+
+	INIT_WORK(&qm->work, qm_work_process);
+	if (qm->fun_type == QM_HW_PF)
+		INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);
+
+	atomic_set(&qm->status.flags, QM_INIT);
+
+	return 0;
+
+err_irq_unregister:
+	qm_irq_unregister(qm);
+err_free_irq_vectors:
+	pci_free_irq_vectors(pdev);
+err_iounmap:
+	iounmap(qm->io_base);
+err_release_mem_regions:
+	pci_release_mem_regions(pdev);
+err_disable_pcidev:
+	pci_disable_device(pdev);
+err_remove_uacce:
+	uacce_remove(qm->uacce);
+	qm->uacce = NULL;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_init);
+
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
 MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver");
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index ec5b6f48db6c..0a351de8d838 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -8,6 +8,10 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 
+#define QM_QNUM_V1			4096
+#define QM_QNUM_V2			1024
+#define QM_MAX_VFS_NUM_V2		63
+
 /* qm user domain */
 #define QM_ARUSER_M_CFG_1		0x100088
 #define AXUSER_SNOOP_ENABLE		BIT(30)
@@ -70,7 +74,7 @@
 
 #define QM_BASE_NFE	(QM_AXI_RRESP | QM_AXI_BRESP | QM_ECC_MBIT | \
 			 QM_ACC_GET_TASK_TIMEOUT | QM_DB_TIMEOUT | \
-			 QM_OF_FIFO_OF)
+			 QM_OF_FIFO_OF | QM_DB_RANDOM_INVALID)
 #define QM_BASE_CE			QM_ECC_1BIT
 
 #define QM_Q_DEPTH			1024
@@ -80,14 +84,31 @@
 /* page number for queue file region */
 #define QM_DOORBELL_PAGE_NR		1
 
+enum qm_stop_reason {
+	QM_NORMAL,
+	QM_SOFT_RESET,
+	QM_FLR,
+};
+
+enum qm_state {
+	QM_INIT = 0,
+	QM_START,
+	QM_CLOSE,
+	QM_STOP,
+};
+
 enum qp_state {
+	QP_INIT = 1,
+	QP_START,
 	QP_STOP,
+	QP_CLOSE,
 };
 
 enum qm_hw_ver {
 	QM_HW_UNKNOWN = -1,
 	QM_HW_V1 = 0x20,
 	QM_HW_V2 = 0x21,
+	QM_HW_V3 = 0x30,
 };
 
 enum qm_fun_type {
@@ -101,6 +122,14 @@ enum qm_debug_file {
 	DEBUG_FILE_NUM,
 };
 
+struct qm_dfx {
+	atomic64_t err_irq_cnt;
+	atomic64_t aeq_irq_cnt;
+	atomic64_t abnormal_irq_cnt;
+	atomic64_t create_qp_err_cnt;
+	atomic64_t mb_err_cnt;
+};
+
 struct debugfs_file {
 	enum qm_debug_file index;
 	struct mutex lock;
@@ -109,6 +138,9 @@ struct debugfs_file {
 
 struct qm_debug {
 	u32 curr_qm_qp_num;
+	u32 sqe_mask_offset;
+	u32 sqe_mask_len;
+	struct qm_dfx dfx;
 	struct dentry *debug_root;
 	struct dentry *qm_d;
 	struct debugfs_file files[DEBUG_FILE_NUM];
@@ -125,22 +157,34 @@ struct hisi_qm_status {
 	bool eqc_phase;
 	u32 aeq_head;
 	bool aeqc_phase;
-	unsigned long flags;
+	atomic_t flags;
+	int stop_reason;
 };
 
 struct hisi_qm;
 
 struct hisi_qm_err_info {
+	char *acpi_rst;
+	u32 msi_wr_port;
+	u32 ecc_2bits_mask;
 	u32 ce;
 	u32 nfe;
 	u32 fe;
-	u32 msi;
+};
+
+struct hisi_qm_err_status {
+	u32 is_qm_ecc_mbit;
+	u32 is_dev_ecc_mbit;
 };
 
 struct hisi_qm_err_ini {
+	int (*hw_init)(struct hisi_qm *qm);
 	void (*hw_err_enable)(struct hisi_qm *qm);
 	void (*hw_err_disable)(struct hisi_qm *qm);
 	u32 (*get_dev_hw_err_status)(struct hisi_qm *qm);
+	void (*clear_dev_hw_err_status)(struct hisi_qm *qm, u32 err_sts);
+	void (*open_axi_master_ooo)(struct hisi_qm *qm);
+	void (*close_axi_master_ooo)(struct hisi_qm *qm);
 	void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts);
 	struct hisi_qm_err_info err_info;
 };
@@ -161,7 +205,9 @@ struct hisi_qm {
 	u32 qp_num;
 	u32 qp_in_used;
 	u32 ctrl_qp_num;
+	u32 vfs_num;
 	struct list_head list;
+	struct hisi_qm_list *qm_list;
 
 	struct qm_dma qdma;
 	struct qm_sqc *sqc;
@@ -175,10 +221,12 @@ struct hisi_qm {
 
 	struct hisi_qm_status status;
 	const struct hisi_qm_err_ini *err_ini;
+	struct hisi_qm_err_status err_status;
+	unsigned long reset_flag;
 
-	rwlock_t qps_lock;
-	unsigned long *qp_bitmap;
-	struct hisi_qp **qp_array;
+	struct rw_semaphore qps_lock;
+	struct idr qp_idr;
+	struct hisi_qp *qp_array;
 
 	struct mutex mailbox_lock;
 
@@ -187,13 +235,12 @@ struct hisi_qm {
 	struct qm_debug debug;
 
 	u32 error_mask;
-	u32 msi_mask;
 
 	struct workqueue_struct *wq;
 	struct work_struct work;
+	struct work_struct rst_work;
 
 	const char *algs;
-	bool use_dma_api;
 	bool use_sva;
 	resource_size_t phys_base;
 	resource_size_t phys_size;
@@ -205,7 +252,7 @@ struct hisi_qp_status {
 	u16 sq_tail;
 	u16 cq_head;
 	bool cqc_phase;
-	unsigned long flags;
+	atomic_t flags;
 };
 
 struct hisi_qp_ops {
@@ -230,10 +277,58 @@ struct hisi_qp {
 	void (*event_cb)(struct hisi_qp *qp);
 
 	struct hisi_qm *qm;
+	bool is_resetting;
 	u16 pasid;
 	struct uacce_queue *uacce_q;
 };
 
+static inline int q_num_set(const char *val, const struct kernel_param *kp,
+			    unsigned int device)
+{
+	struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI,
+					      device, NULL);
+	u32 n, q_num;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	if (!pdev) {
+		q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2);
+		pr_info("No device found currently, suppose queue number is %d\n",
+			q_num);
+	} else {
+		if (pdev->revision == QM_HW_V1)
+			q_num = QM_QNUM_V1;
+		else
+			q_num = QM_QNUM_V2;
+	}
+
+	ret = kstrtou32(val, 10, &n);
+	if (ret || !n || n > q_num)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
+static inline int vfs_num_set(const char *val, const struct kernel_param *kp)
+{
+	u32 n;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	ret = kstrtou32(val, 10, &n);
+	if (ret < 0)
+		return ret;
+
+	if (n > QM_MAX_VFS_NUM_V2)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
 static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list)
 {
 	INIT_LIST_HEAD(&qm_list->list);
@@ -267,14 +362,19 @@ void hisi_qm_release_qp(struct hisi_qp *qp);
 int hisi_qp_send(struct hisi_qp *qp, const void *msg);
 int hisi_qm_get_free_qp_num(struct hisi_qm *qm);
 int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number);
-int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base, u32 number);
 int hisi_qm_debug_init(struct hisi_qm *qm);
 enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev);
 void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
+int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs);
+int hisi_qm_sriov_disable(struct pci_dev *pdev);
+int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs);
 void hisi_qm_dev_err_init(struct hisi_qm *qm);
 void hisi_qm_dev_err_uninit(struct hisi_qm *qm);
 pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
 					  pci_channel_state_t state);
+pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev);
+void hisi_qm_reset_prepare(struct pci_dev *pdev);
+void hisi_qm_reset_done(struct pci_dev *pdev);
 
 struct hisi_acc_sgl_pool;
 struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 3598fa17beb2..7b64aca704d6 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -160,6 +160,10 @@ struct sec_debug_file {
 struct sec_dfx {
 	atomic64_t send_cnt;
 	atomic64_t recv_cnt;
+	atomic64_t send_busy_cnt;
+	atomic64_t err_bd_cnt;
+	atomic64_t invalid_req_cnt;
+	atomic64_t done_flag_cnt;
 };
 
 struct sec_debug {
@@ -172,7 +176,6 @@ struct sec_dev {
 	struct sec_debug debug;
 	u32 ctx_q_num;
 	bool iommu_used;
-	u32 num_vfs;
 	unsigned long status;
 };
 
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 7f1c6a31b82f..64614a9bdf21 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -148,6 +148,7 @@ static int sec_aead_verify(struct sec_req *req)
 static void sec_req_cb(struct hisi_qp *qp, void *resp)
 {
 	struct sec_qp_ctx *qp_ctx = qp->qp_ctx;
+	struct sec_dfx *dfx = &qp_ctx->ctx->sec->debug.dfx;
 	struct sec_sqe *bd = resp;
 	struct sec_ctx *ctx;
 	struct sec_req *req;
@@ -157,11 +158,16 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
 
 	type = bd->type_cipher_auth & SEC_TYPE_MASK;
 	if (unlikely(type != SEC_BD_TYPE2)) {
+		atomic64_inc(&dfx->err_bd_cnt);
 		pr_err("err bd type [%d]\n", type);
 		return;
 	}
 
 	req = qp_ctx->req_list[le16_to_cpu(bd->type2.tag)];
+	if (unlikely(!req)) {
+		atomic64_inc(&dfx->invalid_req_cnt);
+		return;
+	}
 	req->err_type = bd->type2.error_type;
 	ctx = req->ctx;
 	done = le16_to_cpu(bd->type2.done_flag) & SEC_DONE_MASK;
@@ -174,12 +180,13 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
 			"err_type[%d],done[%d],flag[%d]\n",
 			req->err_type, done, flag);
 		err = -EIO;
+		atomic64_inc(&dfx->done_flag_cnt);
 	}
 
 	if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt)
 		err = sec_aead_verify(req);
 
-	atomic64_inc(&ctx->sec->debug.dfx.recv_cnt);
+	atomic64_inc(&dfx->recv_cnt);
 
 	ctx->req_op->buf_unmap(ctx, req);
 
@@ -200,10 +207,12 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
 		return -ENOBUFS;
 
 	if (!ret) {
-		if (req->fake_busy)
+		if (req->fake_busy) {
+			atomic64_inc(&ctx->sec->debug.dfx.send_busy_cnt);
 			ret = -EBUSY;
-		else
+		} else {
 			ret = -EINPROGRESS;
+		}
 	}
 
 	return ret;
@@ -832,7 +841,6 @@ static int sec_aead_auth_set_key(struct sec_auth_ctx *ctx,
 				 struct crypto_authenc_keys *keys)
 {
 	struct crypto_shash *hash_tfm = ctx->hash_tfm;
-	SHASH_DESC_ON_STACK(shash, hash_tfm);
 	int blocksize, ret;
 
 	if (!keys->authkeylen) {
@@ -842,8 +850,8 @@ static int sec_aead_auth_set_key(struct sec_auth_ctx *ctx,
 
 	blocksize = crypto_shash_blocksize(hash_tfm);
 	if (keys->authkeylen > blocksize) {
-		ret = crypto_shash_digest(shash, keys->authkey,
-					  keys->authkeylen, ctx->a_key);
+		ret = crypto_shash_tfm_digest(hash_tfm, keys->authkey,
+					      keys->authkeylen, ctx->a_key);
 		if (ret) {
 			pr_err("hisi_sec2: aead auth digest error!\n");
 			return -EINVAL;
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 1f54ebe164b6..a4cb58b54b25 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -80,6 +80,9 @@
 #define SEC_VF_CNT_MASK			0xffffffc0
 #define SEC_DBGFS_VAL_MAX_LEN		20
 
+#define SEC_SQE_MASK_OFFSET		64
+#define SEC_SQE_MASK_LEN		48
+
 #define SEC_ADDR(qm, offset) ((qm)->io_base + (offset) + \
 			     SEC_ENGINE_PF_CFG_OFF + SEC_ACC_COMMON_REG_OFF)
 
@@ -88,6 +91,11 @@ struct sec_hw_error {
 	const char *msg;
 };
 
+struct sec_dfx_item {
+	const char *name;
+	u32 offset;
+};
+
 static const char sec_name[] = "hisi_sec2";
 static struct dentry *sec_debugfs_root;
 static struct hisi_qm_list sec_devices;
@@ -110,7 +118,16 @@ static const char * const sec_dbg_file_name[] = {
 	[SEC_CLEAR_ENABLE] = "clear_enable",
 };
 
-static struct debugfs_reg32 sec_dfx_regs[] = {
+static struct sec_dfx_item sec_dfx_labels[] = {
+	{"send_cnt", offsetof(struct sec_dfx, send_cnt)},
+	{"recv_cnt", offsetof(struct sec_dfx, recv_cnt)},
+	{"send_busy_cnt", offsetof(struct sec_dfx, send_busy_cnt)},
+	{"err_bd_cnt", offsetof(struct sec_dfx, err_bd_cnt)},
+	{"invalid_req_cnt", offsetof(struct sec_dfx, invalid_req_cnt)},
+	{"done_flag_cnt", offsetof(struct sec_dfx, done_flag_cnt)},
+};
+
+static const struct debugfs_reg32 sec_dfx_regs[] = {
 	{"SEC_PF_ABNORMAL_INT_SOURCE    ",  0x301010},
 	{"SEC_SAA_EN                    ",  0x301270},
 	{"SEC_BD_LATENCY_MIN            ",  0x301600},
@@ -136,45 +153,14 @@ static struct debugfs_reg32 sec_dfx_regs[] = {
 
 static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp)
 {
-	struct pci_dev *pdev;
-	u32 n, q_num;
-	u8 rev_id;
-	int ret;
-
-	if (!val)
-		return -EINVAL;
-
-	pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI,
-			      SEC_PF_PCI_DEVICE_ID, NULL);
-	if (!pdev) {
-		q_num = min_t(u32, SEC_QUEUE_NUM_V1, SEC_QUEUE_NUM_V2);
-		pr_info("No device, suppose queue number is %d!\n", q_num);
-	} else {
-		rev_id = pdev->revision;
-
-		switch (rev_id) {
-		case QM_HW_V1:
-			q_num = SEC_QUEUE_NUM_V1;
-			break;
-		case QM_HW_V2:
-			q_num = SEC_QUEUE_NUM_V2;
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-
-	ret = kstrtou32(val, 10, &n);
-	if (ret || !n || n > q_num)
-		return -EINVAL;
-
-	return param_set_int(val, kp);
+	return q_num_set(val, kp, SEC_PF_PCI_DEVICE_ID);
 }
 
 static const struct kernel_param_ops sec_pf_q_num_ops = {
 	.set = sec_pf_q_num_set,
 	.get = param_get_int,
 };
+
 static u32 pf_q_num = SEC_PF_DEF_Q_NUM;
 module_param_cb(pf_q_num, &sec_pf_q_num_ops, &pf_q_num, 0444);
 MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 0-4096, v2 0-1024)");
@@ -207,6 +193,15 @@ static u32 ctx_q_num = SEC_CTX_Q_NUM_DEF;
 module_param_cb(ctx_q_num, &sec_ctx_q_num_ops, &ctx_q_num, 0444);
 MODULE_PARM_DESC(ctx_q_num, "Queue num in ctx (24 default, 2, 4, ..., 32)");
 
+static const struct kernel_param_ops vfs_num_ops = {
+	.set = vfs_num_set,
+	.get = param_get_int,
+};
+
+static u32 vfs_num;
+module_param_cb(vfs_num, &vfs_num_ops, &vfs_num, 0444);
+MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)");
+
 void sec_destroy_qps(struct hisi_qp **qps, int qp_num)
 {
 	hisi_qm_free_qps(qps, qp_num);
@@ -240,9 +235,8 @@ static const struct pci_device_id sec_dev_ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, sec_dev_ids);
 
-static u8 sec_get_endian(struct sec_dev *sec)
+static u8 sec_get_endian(struct hisi_qm *qm)
 {
-	struct hisi_qm *qm = &sec->qm;
 	u32 reg;
 
 	/*
@@ -270,9 +264,8 @@ static u8 sec_get_endian(struct sec_dev *sec)
 		return SEC_64BE;
 }
 
-static int sec_engine_init(struct sec_dev *sec)
+static int sec_engine_init(struct hisi_qm *qm)
 {
-	struct hisi_qm *qm = &sec->qm;
 	int ret;
 	u32 reg;
 
@@ -315,7 +308,7 @@ static int sec_engine_init(struct sec_dev *sec)
 
 	/* config endian */
 	reg = readl_relaxed(SEC_ADDR(qm, SEC_CONTROL_REG));
-	reg |= sec_get_endian(sec);
+	reg |= sec_get_endian(qm);
 	writel_relaxed(reg, SEC_ADDR(qm, SEC_CONTROL_REG));
 
 	/* Enable sm4 xts mode multiple iv */
@@ -325,10 +318,8 @@ static int sec_engine_init(struct sec_dev *sec)
 	return 0;
 }
 
-static int sec_set_user_domain_and_cache(struct sec_dev *sec)
+static int sec_set_user_domain_and_cache(struct hisi_qm *qm)
 {
-	struct hisi_qm *qm = &sec->qm;
-
 	/* qm user domain */
 	writel(AXUSER_BASE, qm->io_base + QM_ARUSER_M_CFG_1);
 	writel(ARUSER_M_CFG_ENABLE, qm->io_base + QM_ARUSER_M_CFG_ENABLE);
@@ -349,7 +340,7 @@ static int sec_set_user_domain_and_cache(struct sec_dev *sec)
 	       CQC_CACHE_WB_ENABLE | FIELD_PREP(SQC_CACHE_WB_THRD, 1) |
 	       FIELD_PREP(CQC_CACHE_WB_THRD, 1), qm->io_base + QM_CACHE_CTL);
 
-	return sec_engine_init(sec);
+	return sec_engine_init(qm);
 }
 
 /* sec_debug_regs_clear() - clear the sec debug regs */
@@ -424,23 +415,22 @@ static u32 sec_current_qm_read(struct sec_debug_file *file)
 static int sec_current_qm_write(struct sec_debug_file *file, u32 val)
 {
 	struct hisi_qm *qm = file->qm;
-	struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
 	u32 vfq_num;
 	u32 tmp;
 
-	if (val > sec->num_vfs)
+	if (val > qm->vfs_num)
 		return -EINVAL;
 
 	/* According PF or VF Dev ID to calculation curr_qm_qp_num and store */
 	if (!val) {
 		qm->debug.curr_qm_qp_num = qm->qp_num;
 	} else {
-		vfq_num = (qm->ctrl_qp_num - qm->qp_num) / sec->num_vfs;
+		vfq_num = (qm->ctrl_qp_num - qm->qp_num) / qm->vfs_num;
 
-		if (val == sec->num_vfs)
+		if (val == qm->vfs_num)
 			qm->debug.curr_qm_qp_num =
 				qm->ctrl_qp_num - qm->qp_num -
-				(sec->num_vfs - 1) * vfq_num;
+				(qm->vfs_num - 1) * vfq_num;
 		else
 			qm->debug.curr_qm_qp_num = vfq_num;
 	}
@@ -570,10 +560,22 @@ static const struct file_operations sec_dbg_fops = {
 static int sec_debugfs_atomic64_get(void *data, u64 *val)
 {
 	*val = atomic64_read((atomic64_t *)data);
+
+	return 0;
+}
+
+static int sec_debugfs_atomic64_set(void *data, u64 val)
+{
+	if (val)
+		return -EINVAL;
+
+	atomic64_set((atomic64_t *)data, 0);
+
 	return 0;
 }
+
 DEFINE_DEBUGFS_ATTRIBUTE(sec_atomic64_ops, sec_debugfs_atomic64_get,
-			 NULL, "%lld\n");
+			 sec_debugfs_atomic64_set, "%lld\n");
 
 static int sec_core_debug_init(struct sec_dev *sec)
 {
@@ -582,6 +584,7 @@ static int sec_core_debug_init(struct sec_dev *sec)
 	struct sec_dfx *dfx = &sec->debug.dfx;
 	struct debugfs_regset32 *regset;
 	struct dentry *tmp_d;
+	int i;
 
 	tmp_d = debugfs_create_dir("sec_dfx", sec->qm.debug.debug_root);
 
@@ -593,13 +596,15 @@ static int sec_core_debug_init(struct sec_dev *sec)
 	regset->nregs = ARRAY_SIZE(sec_dfx_regs);
 	regset->base = qm->io_base;
 
-	debugfs_create_regset32("regs", 0444, tmp_d, regset);
-
-	debugfs_create_file("send_cnt", 0444, tmp_d,
-			    &dfx->send_cnt, &sec_atomic64_ops);
+	if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID)
+		debugfs_create_regset32("regs", 0444, tmp_d, regset);
 
-	debugfs_create_file("recv_cnt", 0444, tmp_d,
-			    &dfx->recv_cnt, &sec_atomic64_ops);
+	for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) {
+		atomic64_t *data = (atomic64_t *)((uintptr_t)dfx +
+					sec_dfx_labels[i].offset);
+		debugfs_create_file(sec_dfx_labels[i].name, 0644,
+				   tmp_d, data, &sec_atomic64_ops);
+	}
 
 	return 0;
 }
@@ -630,6 +635,9 @@ static int sec_debugfs_init(struct sec_dev *sec)
 
 	qm->debug.debug_root = debugfs_create_dir(dev_name(dev),
 						  sec_debugfs_root);
+
+	qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET;
+	qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN;
 	ret = hisi_qm_debug_init(qm);
 	if (ret)
 		goto failed_to_create;
@@ -675,8 +683,6 @@ static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts)
 		}
 		errs++;
 	}
-
-	writel(err_sts, qm->io_base + SEC_CORE_INT_SOURCE);
 }
 
 static u32 sec_get_hw_err_status(struct hisi_qm *qm)
@@ -684,17 +690,36 @@ static u32 sec_get_hw_err_status(struct hisi_qm *qm)
 	return readl(qm->io_base + SEC_CORE_INT_STATUS);
 }
 
+static void sec_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts)
+{
+	writel(err_sts, qm->io_base + SEC_CORE_INT_SOURCE);
+}
+
+static void sec_open_axi_master_ooo(struct hisi_qm *qm)
+{
+	u32 val;
+
+	val = readl(SEC_ADDR(qm, SEC_CONTROL_REG));
+	writel(val & SEC_AXI_SHUTDOWN_DISABLE, SEC_ADDR(qm, SEC_CONTROL_REG));
+	writel(val | SEC_AXI_SHUTDOWN_ENABLE, SEC_ADDR(qm, SEC_CONTROL_REG));
+}
+
 static const struct hisi_qm_err_ini sec_err_ini = {
+	.hw_init		= sec_set_user_domain_and_cache,
 	.hw_err_enable		= sec_hw_error_enable,
 	.hw_err_disable		= sec_hw_error_disable,
 	.get_dev_hw_err_status	= sec_get_hw_err_status,
+	.clear_dev_hw_err_status = sec_clear_hw_err_status,
 	.log_dev_hw_err		= sec_log_hw_error,
+	.open_axi_master_ooo	= sec_open_axi_master_ooo,
 	.err_info		= {
 		.ce			= QM_BASE_CE,
 		.nfe			= QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT |
 					  QM_ACC_WB_NOT_READY_TIMEOUT,
 		.fe			= 0,
-		.msi			= QM_DB_RANDOM_INVALID,
+		.ecc_2bits_mask		= SEC_CORE_INT_STATUS_M_ECC,
+		.msi_wr_port		= BIT(0),
+		.acpi_rst		= "SRST",
 	}
 };
 
@@ -703,22 +728,14 @@ static int sec_pf_probe_init(struct sec_dev *sec)
 	struct hisi_qm *qm = &sec->qm;
 	int ret;
 
-	switch (qm->ver) {
-	case QM_HW_V1:
+	if (qm->ver == QM_HW_V1)
 		qm->ctrl_qp_num = SEC_QUEUE_NUM_V1;
-		break;
-
-	case QM_HW_V2:
+	else
 		qm->ctrl_qp_num = SEC_QUEUE_NUM_V2;
-		break;
-
-	default:
-		return -EINVAL;
-	}
 
 	qm->err_ini = &sec_err_ini;
 
-	ret = sec_set_user_domain_and_cache(sec);
+	ret = sec_set_user_domain_and_cache(qm);
 	if (ret)
 		return ret;
 
@@ -730,32 +747,30 @@ static int sec_pf_probe_init(struct sec_dev *sec)
 
 static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 {
-	enum qm_hw_ver rev_id;
-
-	rev_id = hisi_qm_get_hw_version(pdev);
-	if (rev_id == QM_HW_UNKNOWN)
-		return -ENODEV;
+	int ret;
 
 	qm->pdev = pdev;
-	qm->ver = rev_id;
-
+	qm->ver = pdev->revision;
 	qm->sqe_size = SEC_SQE_SIZE;
 	qm->dev_name = sec_name;
+
 	qm->fun_type = (pdev->device == SEC_PF_PCI_DEVICE_ID) ?
 			QM_HW_PF : QM_HW_VF;
-	qm->use_dma_api = true;
-
-	return hisi_qm_init(qm);
-}
-
-static void sec_qm_uninit(struct hisi_qm *qm)
-{
-	hisi_qm_uninit(qm);
-}
-
-static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
-{
-	int ret;
+	if (qm->fun_type == QM_HW_PF) {
+		qm->qp_base = SEC_PF_DEF_Q_BASE;
+		qm->qp_num = pf_q_num;
+		qm->debug.curr_qm_qp_num = pf_q_num;
+		qm->qm_list = &sec_devices;
+	} else if (qm->fun_type == QM_HW_VF && qm->ver == QM_HW_V1) {
+		/*
+		 * have no way to get qm configure in VM in v1 hardware,
+		 * so currently force PF to uses SEC_PF_DEF_Q_NUM, and force
+		 * to trigger only one VF in v1 hardware.
+		 * v2 hardware has no such problem.
+		 */
+		qm->qp_base = SEC_PF_DEF_Q_NUM;
+		qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
+	}
 
 	/*
 	 * WQ_HIGHPRI: SEC request must be low delayed,
@@ -763,47 +778,38 @@ static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
 	 * WQ_UNBOUND: SEC task is likely with long
 	 * running CPU intensive workloads.
 	 */
-	qm->wq = alloc_workqueue("%s", WQ_HIGHPRI |
-		WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(),
-		pci_name(qm->pdev));
+	qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
+				 WQ_UNBOUND, num_online_cpus(),
+				 pci_name(qm->pdev));
 	if (!qm->wq) {
 		pci_err(qm->pdev, "fail to alloc workqueue\n");
 		return -ENOMEM;
 	}
 
-	if (qm->fun_type == QM_HW_PF) {
-		qm->qp_base = SEC_PF_DEF_Q_BASE;
-		qm->qp_num = pf_q_num;
-		qm->debug.curr_qm_qp_num = pf_q_num;
+	ret = hisi_qm_init(qm);
+	if (ret)
+		destroy_workqueue(qm->wq);
+
+	return ret;
+}
 
+static void sec_qm_uninit(struct hisi_qm *qm)
+{
+	hisi_qm_uninit(qm);
+}
+
+static int sec_probe_init(struct sec_dev *sec)
+{
+	struct hisi_qm *qm = &sec->qm;
+	int ret;
+
+	if (qm->fun_type == QM_HW_PF) {
 		ret = sec_pf_probe_init(sec);
 		if (ret)
-			goto err_probe_uninit;
-	} else if (qm->fun_type == QM_HW_VF) {
-		/*
-		 * have no way to get qm configure in VM in v1 hardware,
-		 * so currently force PF to uses SEC_PF_DEF_Q_NUM, and force
-		 * to trigger only one VF in v1 hardware.
-		 * v2 hardware has no such problem.
-		 */
-		if (qm->ver == QM_HW_V1) {
-			qm->qp_base = SEC_PF_DEF_Q_NUM;
-			qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
-		} else if (qm->ver == QM_HW_V2) {
-			/* v2 starts to support get vft by mailbox */
-			ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
-			if (ret)
-				goto err_probe_uninit;
-		}
-	} else {
-		ret = -ENODEV;
-		goto err_probe_uninit;
+			return ret;
 	}
 
 	return 0;
-err_probe_uninit:
-	destroy_workqueue(qm->wq);
-	return ret;
 }
 
 static void sec_probe_uninit(struct hisi_qm *qm)
@@ -840,20 +846,17 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!sec)
 		return -ENOMEM;
 
-	pci_set_drvdata(pdev, sec);
-
-	sec->ctx_q_num = ctx_q_num;
-	sec_iommu_used_check(sec);
-
 	qm = &sec->qm;
-
 	ret = sec_qm_init(qm, pdev);
 	if (ret) {
-		pci_err(pdev, "Failed to pre init qm!\n");
+		pci_err(pdev, "Failed to init SEC QM (%d)!\n", ret);
 		return ret;
 	}
 
-	ret = sec_probe_init(qm, sec);
+	sec->ctx_q_num = ctx_q_num;
+	sec_iommu_used_check(sec);
+
+	ret = sec_probe_init(sec);
 	if (ret) {
 		pci_err(pdev, "Failed to probe!\n");
 		goto err_qm_uninit;
@@ -877,8 +880,17 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_remove_from_list;
 	}
 
+	if (qm->fun_type == QM_HW_PF && vfs_num) {
+		ret = hisi_qm_sriov_enable(pdev, vfs_num);
+		if (ret < 0)
+			goto err_crypto_unregister;
+	}
+
 	return 0;
 
+err_crypto_unregister:
+	sec_unregister_from_crypto();
+
 err_remove_from_list:
 	hisi_qm_del_from_list(qm, &sec_devices);
 	sec_debugfs_exit(sec);
@@ -893,110 +905,6 @@ err_qm_uninit:
 	return ret;
 }
 
-/* now we only support equal assignment */
-static int sec_vf_q_assign(struct sec_dev *sec, u32 num_vfs)
-{
-	struct hisi_qm *qm = &sec->qm;
-	u32 qp_num = qm->qp_num;
-	u32 q_base = qp_num;
-	u32 q_num, remain_q_num;
-	int i, j, ret;
-
-	if (!num_vfs)
-		return -EINVAL;
-
-	remain_q_num = qm->ctrl_qp_num - qp_num;
-	q_num = remain_q_num / num_vfs;
-
-	for (i = 1; i <= num_vfs; i++) {
-		if (i == num_vfs)
-			q_num += remain_q_num % num_vfs;
-		ret = hisi_qm_set_vft(qm, i, q_base, q_num);
-		if (ret) {
-			for (j = i; j > 0; j--)
-				hisi_qm_set_vft(qm, j, 0, 0);
-			return ret;
-		}
-		q_base += q_num;
-	}
-
-	return 0;
-}
-
-static int sec_clear_vft_config(struct sec_dev *sec)
-{
-	struct hisi_qm *qm = &sec->qm;
-	u32 num_vfs = sec->num_vfs;
-	int ret;
-	u32 i;
-
-	for (i = 1; i <= num_vfs; i++) {
-		ret = hisi_qm_set_vft(qm, i, 0, 0);
-		if (ret)
-			return ret;
-	}
-
-	sec->num_vfs = 0;
-
-	return 0;
-}
-
-static int sec_sriov_enable(struct pci_dev *pdev, int max_vfs)
-{
-	struct sec_dev *sec = pci_get_drvdata(pdev);
-	int pre_existing_vfs, ret;
-	u32 num_vfs;
-
-	pre_existing_vfs = pci_num_vf(pdev);
-
-	if (pre_existing_vfs) {
-		pci_err(pdev, "Can't enable VF. Please disable at first!\n");
-		return 0;
-	}
-
-	num_vfs = min_t(u32, max_vfs, SEC_VF_NUM);
-
-	ret = sec_vf_q_assign(sec, num_vfs);
-	if (ret) {
-		pci_err(pdev, "Can't assign queues for VF!\n");
-		return ret;
-	}
-
-	sec->num_vfs = num_vfs;
-
-	ret = pci_enable_sriov(pdev, num_vfs);
-	if (ret) {
-		pci_err(pdev, "Can't enable VF!\n");
-		sec_clear_vft_config(sec);
-		return ret;
-	}
-
-	return num_vfs;
-}
-
-static int sec_sriov_disable(struct pci_dev *pdev)
-{
-	struct sec_dev *sec = pci_get_drvdata(pdev);
-
-	if (pci_vfs_assigned(pdev)) {
-		pci_err(pdev, "Can't disable VFs while VFs are assigned!\n");
-		return -EPERM;
-	}
-
-	/* remove in sec_pci_driver will be called to free VF resources */
-	pci_disable_sriov(pdev);
-
-	return sec_clear_vft_config(sec);
-}
-
-static int sec_sriov_configure(struct pci_dev *pdev, int num_vfs)
-{
-	if (num_vfs)
-		return sec_sriov_enable(pdev, num_vfs);
-	else
-		return sec_sriov_disable(pdev);
-}
-
 static void sec_remove(struct pci_dev *pdev)
 {
 	struct sec_dev *sec = pci_get_drvdata(pdev);
@@ -1006,8 +914,8 @@ static void sec_remove(struct pci_dev *pdev)
 
 	hisi_qm_del_from_list(qm, &sec_devices);
 
-	if (qm->fun_type == QM_HW_PF && sec->num_vfs)
-		(void)sec_sriov_disable(pdev);
+	if (qm->fun_type == QM_HW_PF && qm->vfs_num)
+		hisi_qm_sriov_disable(pdev);
 
 	sec_debugfs_exit(sec);
 
@@ -1023,6 +931,9 @@ static void sec_remove(struct pci_dev *pdev)
 
 static const struct pci_error_handlers sec_err_handler = {
 	.error_detected = hisi_qm_dev_err_detected,
+	.slot_reset =  hisi_qm_dev_slot_reset,
+	.reset_prepare		= hisi_qm_reset_prepare,
+	.reset_done		= hisi_qm_reset_done,
 };
 
 static struct pci_driver sec_pci_driver = {
@@ -1031,7 +942,7 @@ static struct pci_driver sec_pci_driver = {
 	.probe = sec_probe,
 	.remove = sec_remove,
 	.err_handler = &sec_err_handler,
-	.sriov_configure = sec_sriov_configure,
+	.sriov_configure = hisi_qm_sriov_configure,
 };
 
 static void sec_register_debugfs(void)
diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h
index 82dc6f867171..f3ed4c0e5493 100644
--- a/drivers/crypto/hisilicon/zip/zip.h
+++ b/drivers/crypto/hisilicon/zip/zip.h
@@ -28,12 +28,20 @@ enum hisi_zip_error_type {
 	HZIP_NC_ERR = 0x0d,
 };
 
+struct hisi_zip_dfx {
+	atomic64_t send_cnt;
+	atomic64_t recv_cnt;
+	atomic64_t send_busy_cnt;
+	atomic64_t err_bd_cnt;
+};
+
 struct hisi_zip_ctrl;
 
 struct hisi_zip {
 	struct hisi_qm qm;
 	struct list_head list;
 	struct hisi_zip_ctrl *ctrl;
+	struct hisi_zip_dfx dfx;
 };
 
 struct hisi_zip_sqe {
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index 369ec3220574..c73707c2e539 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -64,7 +64,6 @@ struct hisi_zip_req_q {
 
 struct hisi_zip_qp_ctx {
 	struct hisi_qp *qp;
-	struct hisi_zip_sqe zip_sqe;
 	struct hisi_zip_req_q req_q;
 	struct hisi_acc_sgl_pool *sgl_pool;
 	struct hisi_zip *zip_dev;
@@ -333,6 +332,7 @@ static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data)
 {
 	struct hisi_zip_sqe *sqe = data;
 	struct hisi_zip_qp_ctx *qp_ctx = qp->qp_ctx;
+	struct hisi_zip_dfx *dfx = &qp_ctx->zip_dev->dfx;
 	struct hisi_zip_req_q *req_q = &qp_ctx->req_q;
 	struct hisi_zip_req *req = req_q->q + sqe->tag;
 	struct acomp_req *acomp_req = req->req;
@@ -340,12 +340,14 @@ static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data)
 	u32 status, dlen, head_size;
 	int err = 0;
 
+	atomic64_inc(&dfx->recv_cnt);
 	status = sqe->dw3 & HZIP_BD_STATUS_M;
 
 	if (status != 0 && status != HZIP_NC_ERR) {
 		dev_err(dev, "%scompress fail in qp%u: %u, output: %u\n",
 			(qp->alg_type == 0) ? "" : "de", qp->qp_id, status,
 			sqe->produced);
+		atomic64_inc(&dfx->err_bd_cnt);
 		err = -EIO;
 	}
 	dlen = sqe->produced;
@@ -484,11 +486,12 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req,
 static int hisi_zip_do_work(struct hisi_zip_req *req,
 			    struct hisi_zip_qp_ctx *qp_ctx)
 {
-	struct hisi_zip_sqe *zip_sqe = &qp_ctx->zip_sqe;
 	struct acomp_req *a_req = req->req;
 	struct hisi_qp *qp = qp_ctx->qp;
 	struct device *dev = &qp->qm->pdev->dev;
 	struct hisi_acc_sgl_pool *pool = qp_ctx->sgl_pool;
+	struct hisi_zip_dfx *dfx = &qp_ctx->zip_dev->dfx;
+	struct hisi_zip_sqe zip_sqe;
 	dma_addr_t input;
 	dma_addr_t output;
 	int ret;
@@ -511,15 +514,18 @@ static int hisi_zip_do_work(struct hisi_zip_req *req,
 	}
 	req->dma_dst = output;
 
-	hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, a_req->slen,
+	hisi_zip_fill_sqe(&zip_sqe, qp->req_type, input, output, a_req->slen,
 			  a_req->dlen, req->sskip, req->dskip);
-	hisi_zip_config_buf_type(zip_sqe, HZIP_SGL);
-	hisi_zip_config_tag(zip_sqe, req->req_id);
+	hisi_zip_config_buf_type(&zip_sqe, HZIP_SGL);
+	hisi_zip_config_tag(&zip_sqe, req->req_id);
 
 	/* send command to start a task */
-	ret = hisi_qp_send(qp, zip_sqe);
-	if (ret < 0)
+	atomic64_inc(&dfx->send_cnt);
+	ret = hisi_qp_send(qp, &zip_sqe);
+	if (ret < 0) {
+		atomic64_inc(&dfx->send_busy_cnt);
 		goto err_unmap_output;
+	}
 
 	return -EINPROGRESS;
 
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index fcc85d2dbd07..2229a21ae7c8 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -62,6 +62,7 @@
 
 #define HZIP_CORE_INT_SOURCE		0x3010A0
 #define HZIP_CORE_INT_MASK_REG		0x3010A4
+#define HZIP_CORE_INT_SET		0x3010A8
 #define HZIP_CORE_INT_STATUS		0x3010AC
 #define HZIP_CORE_INT_STATUS_M_ECC	BIT(1)
 #define HZIP_CORE_SRAM_ECC_ERR_INFO	0x301148
@@ -83,8 +84,13 @@
 
 #define HZIP_SOFT_CTRL_CNT_CLR_CE	0x301000
 #define SOFT_CTRL_CNT_CLR_CE_BIT	BIT(0)
+#define HZIP_SOFT_CTRL_ZIP_CONTROL	0x30100C
+#define HZIP_AXI_SHUTDOWN_ENABLE	BIT(14)
+#define HZIP_WR_PORT			BIT(11)
 
 #define HZIP_BUF_SIZE			22
+#define HZIP_SQE_MASK_OFFSET		64
+#define HZIP_SQE_MASK_LEN		48
 
 static const char hisi_zip_name[] = "hisi_zip";
 static struct dentry *hzip_debugfs_root;
@@ -95,6 +101,18 @@ struct hisi_zip_hw_error {
 	const char *msg;
 };
 
+struct zip_dfx_item {
+	const char *name;
+	u32 offset;
+};
+
+static struct zip_dfx_item zip_dfx_files[] = {
+	{"send_cnt", offsetof(struct hisi_zip_dfx, send_cnt)},
+	{"recv_cnt", offsetof(struct hisi_zip_dfx, recv_cnt)},
+	{"send_busy_cnt", offsetof(struct hisi_zip_dfx, send_busy_cnt)},
+	{"err_bd_cnt", offsetof(struct hisi_zip_dfx, err_bd_cnt)},
+};
+
 static const struct hisi_zip_hw_error zip_hw_error[] = {
 	{ .int_msk = BIT(0), .msg = "zip_ecc_1bitt_err" },
 	{ .int_msk = BIT(1), .msg = "zip_ecc_2bit_err" },
@@ -134,7 +152,6 @@ struct ctrl_debug_file {
  * Just relevant for PF.
  */
 struct hisi_zip_ctrl {
-	u32 num_vfs;
 	struct hisi_zip *hisi_zip;
 	struct dentry *debug_root;
 	struct ctrl_debug_file files[HZIP_DEBUG_FILE_NUM];
@@ -162,7 +179,7 @@ static const u64 core_offsets[] = {
 	[HZIP_DECOMP_CORE5] = 0x309000,
 };
 
-static struct debugfs_reg32 hzip_dfx_regs[] = {
+static const struct debugfs_reg32 hzip_dfx_regs[] = {
 	{"HZIP_GET_BD_NUM                ",  0x00ull},
 	{"HZIP_GET_RIGHT_BD              ",  0x04ull},
 	{"HZIP_GET_ERROR_BD              ",  0x08ull},
@@ -189,38 +206,7 @@ static struct debugfs_reg32 hzip_dfx_regs[] = {
 
 static int pf_q_num_set(const char *val, const struct kernel_param *kp)
 {
-	struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI,
-					      PCI_DEVICE_ID_ZIP_PF, NULL);
-	u32 n, q_num;
-	u8 rev_id;
-	int ret;
-
-	if (!val)
-		return -EINVAL;
-
-	if (!pdev) {
-		q_num = min_t(u32, HZIP_QUEUE_NUM_V1, HZIP_QUEUE_NUM_V2);
-		pr_info("No device found currently, suppose queue number is %d\n",
-			q_num);
-	} else {
-		rev_id = pdev->revision;
-		switch (rev_id) {
-		case QM_HW_V1:
-			q_num = HZIP_QUEUE_NUM_V1;
-			break;
-		case QM_HW_V2:
-			q_num = HZIP_QUEUE_NUM_V2;
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-
-	ret = kstrtou32(val, 10, &n);
-	if (ret != 0 || n > q_num || n == 0)
-		return -EINVAL;
-
-	return param_set_int(val, kp);
+	return q_num_set(val, kp, PCI_DEVICE_ID_ZIP_PF);
 }
 
 static const struct kernel_param_ops pf_q_num_ops = {
@@ -232,9 +218,14 @@ static u32 pf_q_num = HZIP_PF_DEF_Q_NUM;
 module_param_cb(pf_q_num, &pf_q_num_ops, &pf_q_num, 0444);
 MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 1-4096, v2 1-1024)");
 
+static const struct kernel_param_ops vfs_num_ops = {
+	.set = vfs_num_set,
+	.get = param_get_int,
+};
+
 static u32 vfs_num;
-module_param(vfs_num, uint, 0444);
-MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63)");
+module_param_cb(vfs_num, &vfs_num_ops, &vfs_num, 0444);
+MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)");
 
 static const struct pci_device_id hisi_zip_dev_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_PF) },
@@ -250,9 +241,9 @@ int zip_create_qps(struct hisi_qp **qps, int qp_num)
 	return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps);
 }
 
-static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
+static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
 {
-	void __iomem *base = hisi_zip->qm.io_base;
+	void __iomem *base = qm->io_base;
 
 	/* qm user domain */
 	writel(AXUSER_BASE, base + QM_ARUSER_M_CFG_1);
@@ -279,7 +270,7 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
 	writel(AXUSER_BASE, base + HZIP_SGL_RUSER_32_63);
 	writel(AXUSER_BASE, base + HZIP_BD_WUSER_32_63);
 
-	if (hisi_zip->qm.use_sva) {
+	if (qm->use_sva) {
 		writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_RUSER_32_63);
 		writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_WUSER_32_63);
 	} else {
@@ -295,10 +286,14 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
 	writel(SQC_CACHE_ENABLE | CQC_CACHE_ENABLE | SQC_CACHE_WB_ENABLE |
 	       CQC_CACHE_WB_ENABLE | FIELD_PREP(SQC_CACHE_WB_THRD, 1) |
 	       FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL);
+
+	return 0;
 }
 
 static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
 {
+	u32 val;
+
 	if (qm->ver == QM_HW_V1) {
 		writel(HZIP_CORE_INT_MASK_ALL,
 		       qm->io_base + HZIP_CORE_INT_MASK_REG);
@@ -317,12 +312,24 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
 
 	/* enable ZIP hw error interrupts */
 	writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG);
+
+	/* enable ZIP block master OOO when m-bit error occur */
+	val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+	val = val | HZIP_AXI_SHUTDOWN_ENABLE;
+	writel(val, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
 }
 
 static void hisi_zip_hw_error_disable(struct hisi_qm *qm)
 {
+	u32 val;
+
 	/* disable ZIP hw error interrupts */
 	writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_MASK_REG);
+
+	/* disable ZIP block master OOO when m-bit error occur */
+	val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+	val = val & ~HZIP_AXI_SHUTDOWN_ENABLE;
+	writel(val, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
 }
 
 static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file)
@@ -342,21 +349,20 @@ static u32 current_qm_read(struct ctrl_debug_file *file)
 static int current_qm_write(struct ctrl_debug_file *file, u32 val)
 {
 	struct hisi_qm *qm = file_to_qm(file);
-	struct hisi_zip_ctrl *ctrl = file->ctrl;
 	u32 vfq_num;
 	u32 tmp;
 
-	if (val > ctrl->num_vfs)
+	if (val > qm->vfs_num)
 		return -EINVAL;
 
 	/* Calculate curr_qm_qp_num and store */
 	if (val == 0) {
 		qm->debug.curr_qm_qp_num = qm->qp_num;
 	} else {
-		vfq_num = (qm->ctrl_qp_num - qm->qp_num) / ctrl->num_vfs;
-		if (val == ctrl->num_vfs)
+		vfq_num = (qm->ctrl_qp_num - qm->qp_num) / qm->vfs_num;
+		if (val == qm->vfs_num)
 			qm->debug.curr_qm_qp_num = qm->ctrl_qp_num -
-				qm->qp_num - (ctrl->num_vfs - 1) * vfq_num;
+				qm->qp_num - (qm->vfs_num - 1) * vfq_num;
 		else
 			qm->debug.curr_qm_qp_num = vfq_num;
 	}
@@ -477,6 +483,27 @@ static const struct file_operations ctrl_debug_fops = {
 	.write = ctrl_debug_write,
 };
 
+
+static int zip_debugfs_atomic64_set(void *data, u64 val)
+{
+	if (val)
+		return -EINVAL;
+
+	atomic64_set((atomic64_t *)data, 0);
+
+	return 0;
+}
+
+static int zip_debugfs_atomic64_get(void *data, u64 *val)
+{
+	*val = atomic64_read((atomic64_t *)data);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(zip_atomic64_ops, zip_debugfs_atomic64_get,
+			 zip_debugfs_atomic64_set, "%llu\n");
+
 static int hisi_zip_core_debug_init(struct hisi_zip_ctrl *ctrl)
 {
 	struct hisi_zip *hisi_zip = ctrl->hisi_zip;
@@ -508,6 +535,25 @@ static int hisi_zip_core_debug_init(struct hisi_zip_ctrl *ctrl)
 	return 0;
 }
 
+static void hisi_zip_dfx_debug_init(struct hisi_qm *qm)
+{
+	struct hisi_zip *zip = container_of(qm, struct hisi_zip, qm);
+	struct hisi_zip_dfx *dfx = &zip->dfx;
+	struct dentry *tmp_dir;
+	void *data;
+	int i;
+
+	tmp_dir = debugfs_create_dir("zip_dfx", qm->debug.debug_root);
+	for (i = 0; i < ARRAY_SIZE(zip_dfx_files); i++) {
+		data = (atomic64_t *)((uintptr_t)dfx + zip_dfx_files[i].offset);
+		debugfs_create_file(zip_dfx_files[i].name,
+			0644,
+			tmp_dir,
+			data,
+			&zip_atomic64_ops);
+	}
+}
+
 static int hisi_zip_ctrl_debug_init(struct hisi_zip_ctrl *ctrl)
 {
 	int i;
@@ -534,6 +580,8 @@ static int hisi_zip_debugfs_init(struct hisi_zip *hisi_zip)
 
 	dev_d = debugfs_create_dir(dev_name(dev), hzip_debugfs_root);
 
+	qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET;
+	qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN;
 	qm->debug.debug_root = dev_d;
 	ret = hisi_qm_debug_init(qm);
 	if (ret)
@@ -546,6 +594,8 @@ static int hisi_zip_debugfs_init(struct hisi_zip *hisi_zip)
 			goto failed_to_create;
 	}
 
+	hisi_zip_dfx_debug_init(qm);
+
 	return 0;
 
 failed_to_create:
@@ -598,8 +648,6 @@ static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts)
 		}
 		err++;
 	}
-
-	writel(err_sts, qm->io_base + HZIP_CORE_INT_SOURCE);
 }
 
 static u32 hisi_zip_get_hw_err_status(struct hisi_qm *qm)
@@ -607,17 +655,55 @@ static u32 hisi_zip_get_hw_err_status(struct hisi_qm *qm)
 	return readl(qm->io_base + HZIP_CORE_INT_STATUS);
 }
 
+static void hisi_zip_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts)
+{
+	writel(err_sts, qm->io_base + HZIP_CORE_INT_SOURCE);
+}
+
+static void hisi_zip_open_axi_master_ooo(struct hisi_qm *qm)
+{
+	u32 val;
+
+	val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+
+	writel(val & ~HZIP_AXI_SHUTDOWN_ENABLE,
+	       qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+
+	writel(val | HZIP_AXI_SHUTDOWN_ENABLE,
+	       qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+}
+
+static void hisi_zip_close_axi_master_ooo(struct hisi_qm *qm)
+{
+	u32 nfe_enb;
+
+	/* Disable ECC Mbit error report. */
+	nfe_enb = readl(qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB);
+	writel(nfe_enb & ~HZIP_CORE_INT_STATUS_M_ECC,
+	       qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB);
+
+	/* Inject zip ECC Mbit error to block master ooo. */
+	writel(HZIP_CORE_INT_STATUS_M_ECC,
+	       qm->io_base + HZIP_CORE_INT_SET);
+}
+
 static const struct hisi_qm_err_ini hisi_zip_err_ini = {
+	.hw_init		= hisi_zip_set_user_domain_and_cache,
 	.hw_err_enable		= hisi_zip_hw_error_enable,
 	.hw_err_disable		= hisi_zip_hw_error_disable,
 	.get_dev_hw_err_status	= hisi_zip_get_hw_err_status,
+	.clear_dev_hw_err_status = hisi_zip_clear_hw_err_status,
 	.log_dev_hw_err		= hisi_zip_log_hw_error,
+	.open_axi_master_ooo	= hisi_zip_open_axi_master_ooo,
+	.close_axi_master_ooo	= hisi_zip_close_axi_master_ooo,
 	.err_info		= {
 		.ce			= QM_BASE_CE,
 		.nfe			= QM_BASE_NFE |
 					  QM_ACC_WB_NOT_READY_TIMEOUT,
 		.fe			= 0,
-		.msi			= QM_DB_RANDOM_INVALID,
+		.ecc_2bits_mask		= HZIP_CORE_INT_STATUS_M_ECC,
+		.msi_wr_port		= HZIP_WR_PORT,
+		.acpi_rst		= "ZRST",
 	}
 };
 
@@ -633,177 +719,85 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
 	hisi_zip->ctrl = ctrl;
 	ctrl->hisi_zip = hisi_zip;
 
-	switch (qm->ver) {
-	case QM_HW_V1:
+	if (qm->ver == QM_HW_V1)
 		qm->ctrl_qp_num = HZIP_QUEUE_NUM_V1;
-		break;
-
-	case QM_HW_V2:
+	else
 		qm->ctrl_qp_num = HZIP_QUEUE_NUM_V2;
-		break;
-
-	default:
-		return -EINVAL;
-	}
 
 	qm->err_ini = &hisi_zip_err_ini;
 
-	hisi_zip_set_user_domain_and_cache(hisi_zip);
+	hisi_zip_set_user_domain_and_cache(qm);
 	hisi_qm_dev_err_init(qm);
 	hisi_zip_debug_regs_clear(hisi_zip);
 
 	return 0;
 }
 
-/* Currently we only support equal assignment */
-static int hisi_zip_vf_q_assign(struct hisi_zip *hisi_zip, int num_vfs)
+static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 {
-	struct hisi_qm *qm = &hisi_zip->qm;
-	u32 qp_num = qm->qp_num;
-	u32 q_base = qp_num;
-	u32 q_num, remain_q_num, i;
-	int ret;
-
-	if (!num_vfs)
-		return -EINVAL;
-
-	remain_q_num = qm->ctrl_qp_num - qp_num;
-	if (remain_q_num < num_vfs)
-		return -EINVAL;
+	qm->pdev = pdev;
+	qm->ver = pdev->revision;
+	qm->algs = "zlib\ngzip";
+	qm->sqe_size = HZIP_SQE_SIZE;
+	qm->dev_name = hisi_zip_name;
 
-	q_num = remain_q_num / num_vfs;
-	for (i = 1; i <= num_vfs; i++) {
-		if (i == num_vfs)
-			q_num += remain_q_num % num_vfs;
-		ret = hisi_qm_set_vft(qm, i, q_base, q_num);
-		if (ret)
-			return ret;
-		q_base += q_num;
+	qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ?
+			QM_HW_PF : QM_HW_VF;
+	if (qm->fun_type == QM_HW_PF) {
+		qm->qp_base = HZIP_PF_DEF_Q_BASE;
+		qm->qp_num = pf_q_num;
+		qm->qm_list = &zip_devices;
+	} else if (qm->fun_type == QM_HW_VF && qm->ver == QM_HW_V1) {
+		/*
+		 * have no way to get qm configure in VM in v1 hardware,
+		 * so currently force PF to uses HZIP_PF_DEF_Q_NUM, and force
+		 * to trigger only one VF in v1 hardware.
+		 *
+		 * v2 hardware has no such problem.
+		 */
+		qm->qp_base = HZIP_PF_DEF_Q_NUM;
+		qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM;
 	}
 
-	return 0;
+	return hisi_qm_init(qm);
 }
 
-static int hisi_zip_clear_vft_config(struct hisi_zip *hisi_zip)
+static int hisi_zip_probe_init(struct hisi_zip *hisi_zip)
 {
-	struct hisi_zip_ctrl *ctrl = hisi_zip->ctrl;
 	struct hisi_qm *qm = &hisi_zip->qm;
-	u32 i, num_vfs = ctrl->num_vfs;
 	int ret;
 
-	for (i = 1; i <= num_vfs; i++) {
-		ret = hisi_qm_set_vft(qm, i, 0, 0);
+	if (qm->fun_type == QM_HW_PF) {
+		ret = hisi_zip_pf_probe_init(hisi_zip);
 		if (ret)
 			return ret;
 	}
 
-	ctrl->num_vfs = 0;
-
 	return 0;
 }
 
-static int hisi_zip_sriov_enable(struct pci_dev *pdev, int max_vfs)
-{
-	struct hisi_zip *hisi_zip = pci_get_drvdata(pdev);
-	int pre_existing_vfs, num_vfs, ret;
-
-	pre_existing_vfs = pci_num_vf(pdev);
-
-	if (pre_existing_vfs) {
-		dev_err(&pdev->dev,
-			"Can't enable VF. Please disable pre-enabled VFs!\n");
-		return 0;
-	}
-
-	num_vfs = min_t(int, max_vfs, HZIP_VF_NUM);
-
-	ret = hisi_zip_vf_q_assign(hisi_zip, num_vfs);
-	if (ret) {
-		dev_err(&pdev->dev, "Can't assign queues for VF!\n");
-		return ret;
-	}
-
-	hisi_zip->ctrl->num_vfs = num_vfs;
-
-	ret = pci_enable_sriov(pdev, num_vfs);
-	if (ret) {
-		dev_err(&pdev->dev, "Can't enable VF!\n");
-		hisi_zip_clear_vft_config(hisi_zip);
-		return ret;
-	}
-
-	return num_vfs;
-}
-
-static int hisi_zip_sriov_disable(struct pci_dev *pdev)
-{
-	struct hisi_zip *hisi_zip = pci_get_drvdata(pdev);
-
-	if (pci_vfs_assigned(pdev)) {
-		dev_err(&pdev->dev,
-			"Can't disable VFs while VFs are assigned!\n");
-		return -EPERM;
-	}
-
-	/* remove in hisi_zip_pci_driver will be called to free VF resources */
-	pci_disable_sriov(pdev);
-
-	return hisi_zip_clear_vft_config(hisi_zip);
-}
-
 static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct hisi_zip *hisi_zip;
-	enum qm_hw_ver rev_id;
 	struct hisi_qm *qm;
 	int ret;
 
-	rev_id = hisi_qm_get_hw_version(pdev);
-	if (rev_id == QM_HW_UNKNOWN)
-		return -EINVAL;
-
 	hisi_zip = devm_kzalloc(&pdev->dev, sizeof(*hisi_zip), GFP_KERNEL);
 	if (!hisi_zip)
 		return -ENOMEM;
-	pci_set_drvdata(pdev, hisi_zip);
 
 	qm = &hisi_zip->qm;
-	qm->use_dma_api = true;
-	qm->pdev = pdev;
-	qm->ver = rev_id;
 
-	qm->algs = "zlib\ngzip";
-	qm->sqe_size = HZIP_SQE_SIZE;
-	qm->dev_name = hisi_zip_name;
-	qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF :
-								QM_HW_VF;
-	ret = hisi_qm_init(qm);
+	ret = hisi_zip_qm_init(qm, pdev);
 	if (ret) {
-		dev_err(&pdev->dev, "Failed to init qm!\n");
+		pci_err(pdev, "Failed to init ZIP QM (%d)!\n", ret);
 		return ret;
 	}
 
-	if (qm->fun_type == QM_HW_PF) {
-		ret = hisi_zip_pf_probe_init(hisi_zip);
-		if (ret)
-			return ret;
-
-		qm->qp_base = HZIP_PF_DEF_Q_BASE;
-		qm->qp_num = pf_q_num;
-	} else if (qm->fun_type == QM_HW_VF) {
-		/*
-		 * have no way to get qm configure in VM in v1 hardware,
-		 * so currently force PF to uses HZIP_PF_DEF_Q_NUM, and force
-		 * to trigger only one VF in v1 hardware.
-		 *
-		 * v2 hardware has no such problem.
-		 */
-		if (qm->ver == QM_HW_V1) {
-			qm->qp_base = HZIP_PF_DEF_Q_NUM;
-			qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM;
-		} else if (qm->ver == QM_HW_V2)
-			/* v2 starts to support get vft by mailbox */
-			hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+	ret = hisi_zip_probe_init(hisi_zip);
+	if (ret) {
+		pci_err(pdev, "Failed to probe (%d)!\n", ret);
+		goto err_qm_uninit;
 	}
 
 	ret = hisi_qm_start(qm);
@@ -823,7 +817,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	if (qm->fun_type == QM_HW_PF && vfs_num > 0) {
-		ret = hisi_zip_sriov_enable(pdev, vfs_num);
+		ret = hisi_qm_sriov_enable(pdev, vfs_num);
 		if (ret < 0)
 			goto err_remove_from_list;
 	}
@@ -836,15 +830,8 @@ err_remove_from_list:
 	hisi_qm_stop(qm);
 err_qm_uninit:
 	hisi_qm_uninit(qm);
-	return ret;
-}
 
-static int hisi_zip_sriov_configure(struct pci_dev *pdev, int num_vfs)
-{
-	if (num_vfs == 0)
-		return hisi_zip_sriov_disable(pdev);
-	else
-		return hisi_zip_sriov_enable(pdev, num_vfs);
+	return ret;
 }
 
 static void hisi_zip_remove(struct pci_dev *pdev)
@@ -852,8 +839,8 @@ static void hisi_zip_remove(struct pci_dev *pdev)
 	struct hisi_zip *hisi_zip = pci_get_drvdata(pdev);
 	struct hisi_qm *qm = &hisi_zip->qm;
 
-	if (qm->fun_type == QM_HW_PF && hisi_zip->ctrl->num_vfs != 0)
-		hisi_zip_sriov_disable(pdev);
+	if (qm->fun_type == QM_HW_PF && qm->vfs_num)
+		hisi_qm_sriov_disable(pdev);
 
 	hisi_zip_debugfs_exit(hisi_zip);
 	hisi_qm_stop(qm);
@@ -865,6 +852,9 @@ static void hisi_zip_remove(struct pci_dev *pdev)
 
 static const struct pci_error_handlers hisi_zip_err_handler = {
 	.error_detected	= hisi_qm_dev_err_detected,
+	.slot_reset	= hisi_qm_dev_slot_reset,
+	.reset_prepare	= hisi_qm_reset_prepare,
+	.reset_done	= hisi_qm_reset_done,
 };
 
 static struct pci_driver hisi_zip_pci_driver = {
@@ -873,7 +863,7 @@ static struct pci_driver hisi_zip_pci_driver = {
 	.probe			= hisi_zip_probe,
 	.remove			= hisi_zip_remove,
 	.sriov_configure	= IS_ENABLED(CONFIG_PCI_IOV) ?
-					hisi_zip_sriov_configure : NULL,
+					hisi_qm_sriov_configure : NULL,
 	.err_handler		= &hisi_zip_err_handler,
 };
 
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_main.c b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c
index 200fb3303db0..34bb3063eb70 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c
@@ -79,13 +79,13 @@ static int otx_cpt_device_init(struct otx_cpt_device *cpt)
 	/* Check BIST status */
 	bist = (u64)otx_cpt_check_bist_status(cpt);
 	if (bist) {
-		dev_err(dev, "RAM BIST failed with code 0x%llx", bist);
+		dev_err(dev, "RAM BIST failed with code 0x%llx\n", bist);
 		return -ENODEV;
 	}
 
 	bist = otx_cpt_check_exe_bist_status(cpt);
 	if (bist) {
-		dev_err(dev, "Engine BIST failed with code 0x%llx", bist);
+		dev_err(dev, "Engine BIST failed with code 0x%llx\n", bist);
 		return -ENODEV;
 	}
 
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c
index a6774232e9a3..a9e3de65875a 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c
@@ -63,11 +63,11 @@ static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id)
 	hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8,
 			   raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false);
 	if (vf_id >= 0)
-		pr_debug("MBOX opcode %s received from VF%d raw_data %s",
+		pr_debug("MBOX opcode %s received from VF%d raw_data %s\n",
 			 get_mbox_opcode_str(mbox_msg->msg), vf_id,
 			 raw_data_str);
 	else
-		pr_debug("MBOX opcode %s received from PF raw_data %s",
+		pr_debug("MBOX opcode %s received from PF raw_data %s\n",
 			 get_mbox_opcode_str(mbox_msg->msg), raw_data_str);
 }
 
@@ -140,20 +140,20 @@ static int otx_cpt_bind_vq_to_grp(struct otx_cpt_device *cpt, u8 q, u8 grp)
 	struct otx_cpt_ucode *ucode;
 
 	if (q >= cpt->max_vfs) {
-		dev_err(dev, "Requested queue %d is > than maximum avail %d",
+		dev_err(dev, "Requested queue %d is > than maximum avail %d\n",
 			q, cpt->max_vfs);
 		return -EINVAL;
 	}
 
 	if (grp >= OTX_CPT_MAX_ENGINE_GROUPS) {
-		dev_err(dev, "Requested group %d is > than maximum avail %d",
+		dev_err(dev, "Requested group %d is > than maximum avail %d\n",
 			grp, OTX_CPT_MAX_ENGINE_GROUPS);
 		return -EINVAL;
 	}
 
 	eng_grp = &cpt->eng_grps.grp[grp];
 	if (!eng_grp->is_enabled) {
-		dev_err(dev, "Requested engine group %d is disabled", grp);
+		dev_err(dev, "Requested engine group %d is disabled\n", grp);
 		return -EINVAL;
 	}
 
@@ -212,7 +212,7 @@ static void otx_cpt_handle_mbox_intr(struct otx_cpt_device *cpt, int vf)
 		vftype = otx_cpt_bind_vq_to_grp(cpt, vf, (u8)mbx.data);
 		if ((vftype != OTX_CPT_AE_TYPES) &&
 		    (vftype != OTX_CPT_SE_TYPES)) {
-			dev_err(dev, "VF%d binding to eng group %llu failed",
+			dev_err(dev, "VF%d binding to eng group %llu failed\n",
 				vf, mbx.data);
 			otx_cptpf_mbox_send_nack(cpt, vf, &mbx);
 		} else {
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
index d04baa319592..fec8f3b9b112 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
@@ -62,7 +62,7 @@ static struct otx_cpt_bitmap get_cores_bmap(struct device *dev,
 	int i;
 
 	if (eng_grp->g->engs_num > OTX_CPT_MAX_ENGINES) {
-		dev_err(dev, "unsupported number of engines %d on octeontx",
+		dev_err(dev, "unsupported number of engines %d on octeontx\n",
 			eng_grp->g->engs_num);
 		return bmap;
 	}
@@ -78,7 +78,7 @@ static struct otx_cpt_bitmap get_cores_bmap(struct device *dev,
 	}
 
 	if (!found)
-		dev_err(dev, "No engines reserved for engine group %d",
+		dev_err(dev, "No engines reserved for engine group %d\n",
 			eng_grp->idx);
 	return bmap;
 }
@@ -306,7 +306,7 @@ static int process_tar_file(struct device *dev,
 	ucode_size = ntohl(ucode_hdr->code_length) * 2;
 	if (!ucode_size || (size < round_up(ucode_size, 16) +
 	    sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) {
-		dev_err(dev, "Ucode %s invalid size", filename);
+		dev_err(dev, "Ucode %s invalid size\n", filename);
 		return -EINVAL;
 	}
 
@@ -379,18 +379,18 @@ static void print_tar_dbg_info(struct tar_arch_info_t *tar_arch,
 {
 	struct tar_ucode_info_t *curr;
 
-	pr_debug("Tar archive filename %s", tar_filename);
-	pr_debug("Tar archive pointer %p, size %ld", tar_arch->fw->data,
+	pr_debug("Tar archive filename %s\n", tar_filename);
+	pr_debug("Tar archive pointer %p, size %ld\n", tar_arch->fw->data,
 		 tar_arch->fw->size);
 	list_for_each_entry(curr, &tar_arch->ucodes, list) {
-		pr_debug("Ucode filename %s", curr->ucode.filename);
-		pr_debug("Ucode version string %s", curr->ucode.ver_str);
-		pr_debug("Ucode version %d.%d.%d.%d",
+		pr_debug("Ucode filename %s\n", curr->ucode.filename);
+		pr_debug("Ucode version string %s\n", curr->ucode.ver_str);
+		pr_debug("Ucode version %d.%d.%d.%d\n",
 			 curr->ucode.ver_num.nn, curr->ucode.ver_num.xx,
 			 curr->ucode.ver_num.yy, curr->ucode.ver_num.zz);
-		pr_debug("Ucode type (%d) %s", curr->ucode.type,
+		pr_debug("Ucode type (%d) %s\n", curr->ucode.type,
 			 get_ucode_type_str(curr->ucode.type));
-		pr_debug("Ucode size %d", curr->ucode.size);
+		pr_debug("Ucode size %d\n", curr->ucode.size);
 		pr_debug("Ucode ptr %p\n", curr->ucode_ptr);
 	}
 }
@@ -417,14 +417,14 @@ static struct tar_arch_info_t *load_tar_archive(struct device *dev,
 		goto release_tar_arch;
 
 	if (tar_arch->fw->size < TAR_BLOCK_LEN) {
-		dev_err(dev, "Invalid tar archive %s ", tar_filename);
+		dev_err(dev, "Invalid tar archive %s\n", tar_filename);
 		goto release_tar_arch;
 	}
 
 	tar_size = tar_arch->fw->size;
 	tar_blk = (struct tar_blk_t *) tar_arch->fw->data;
 	if (strncmp(tar_blk->hdr.magic, TAR_MAGIC, TAR_MAGIC_LEN - 1)) {
-		dev_err(dev, "Unsupported format of tar archive %s",
+		dev_err(dev, "Unsupported format of tar archive %s\n",
 			tar_filename);
 		goto release_tar_arch;
 	}
@@ -437,7 +437,7 @@ static struct tar_arch_info_t *load_tar_archive(struct device *dev,
 
 		if (tar_offs + cur_size > tar_size ||
 		    tar_offs + 2*TAR_BLOCK_LEN > tar_size) {
-			dev_err(dev, "Invalid tar archive %s ", tar_filename);
+			dev_err(dev, "Invalid tar archive %s\n", tar_filename);
 			goto release_tar_arch;
 		}
 
@@ -458,7 +458,7 @@ static struct tar_arch_info_t *load_tar_archive(struct device *dev,
 
 		/* Check for the end of the archive */
 		if (tar_offs + 2*TAR_BLOCK_LEN > tar_size) {
-			dev_err(dev, "Invalid tar archive %s ", tar_filename);
+			dev_err(dev, "Invalid tar archive %s\n", tar_filename);
 			goto release_tar_arch;
 		}
 
@@ -563,13 +563,13 @@ static void print_engs_info(struct otx_cpt_eng_grp_info *eng_grp,
 
 static void print_ucode_dbg_info(struct otx_cpt_ucode *ucode)
 {
-	pr_debug("Ucode info");
-	pr_debug("Ucode version string %s", ucode->ver_str);
-	pr_debug("Ucode version %d.%d.%d.%d", ucode->ver_num.nn,
+	pr_debug("Ucode info\n");
+	pr_debug("Ucode version string %s\n", ucode->ver_str);
+	pr_debug("Ucode version %d.%d.%d.%d\n", ucode->ver_num.nn,
 		 ucode->ver_num.xx, ucode->ver_num.yy, ucode->ver_num.zz);
-	pr_debug("Ucode type %s", get_ucode_type_str(ucode->type));
-	pr_debug("Ucode size %d", ucode->size);
-	pr_debug("Ucode virt address %16.16llx", (u64)ucode->align_va);
+	pr_debug("Ucode type %s\n", get_ucode_type_str(ucode->type));
+	pr_debug("Ucode size %d\n", ucode->size);
+	pr_debug("Ucode virt address %16.16llx\n", (u64)ucode->align_va);
 	pr_debug("Ucode phys address %16.16llx\n", ucode->align_dma);
 }
 
@@ -600,19 +600,19 @@ static void print_dbg_info(struct device *dev,
 	u32 mask[4];
 	int i, j;
 
-	pr_debug("Engine groups global info");
-	pr_debug("max SE %d, max AE %d",
+	pr_debug("Engine groups global info\n");
+	pr_debug("max SE %d, max AE %d\n",
 		 eng_grps->avail.max_se_cnt, eng_grps->avail.max_ae_cnt);
-	pr_debug("free SE %d", eng_grps->avail.se_cnt);
-	pr_debug("free AE %d", eng_grps->avail.ae_cnt);
+	pr_debug("free SE %d\n", eng_grps->avail.se_cnt);
+	pr_debug("free AE %d\n", eng_grps->avail.ae_cnt);
 
 	for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
 		grp = &eng_grps->grp[i];
-		pr_debug("engine_group%d, state %s", i, grp->is_enabled ?
+		pr_debug("engine_group%d, state %s\n", i, grp->is_enabled ?
 			 "enabled" : "disabled");
 		if (grp->is_enabled) {
 			mirrored_grp = &eng_grps->grp[grp->mirror.idx];
-			pr_debug("Ucode0 filename %s, version %s",
+			pr_debug("Ucode0 filename %s, version %s\n",
 				 grp->mirror.is_ena ?
 				 mirrored_grp->ucode[0].filename :
 				 grp->ucode[0].filename,
@@ -626,18 +626,18 @@ static void print_dbg_info(struct device *dev,
 			if (engs->type) {
 				print_engs_info(grp, engs_info,
 						2*OTX_CPT_UCODE_NAME_LENGTH, j);
-				pr_debug("Slot%d: %s", j, engs_info);
+				pr_debug("Slot%d: %s\n", j, engs_info);
 				bitmap_to_arr32(mask, engs->bmap,
 						eng_grps->engs_num);
-				pr_debug("Mask:  %8.8x %8.8x %8.8x %8.8x",
+				pr_debug("Mask: %8.8x %8.8x %8.8x %8.8x\n",
 					 mask[3], mask[2], mask[1], mask[0]);
 			} else
-				pr_debug("Slot%d not used", j);
+				pr_debug("Slot%d not used\n", j);
 		}
 		if (grp->is_enabled) {
 			cpt_print_engines_mask(grp, dev, engs_mask,
 					       OTX_CPT_UCODE_NAME_LENGTH);
-			pr_debug("Cmask: %s", engs_mask);
+			pr_debug("Cmask: %s\n", engs_mask);
 		}
 	}
 }
@@ -766,7 +766,7 @@ static int check_engines_availability(struct device *dev,
 
 	if (avail_cnt < req_eng->count) {
 		dev_err(dev,
-			"Error available %s engines %d < than requested %d",
+			"Error available %s engines %d < than requested %d\n",
 			get_eng_type_str(req_eng->type),
 			avail_cnt, req_eng->count);
 		return -EBUSY;
@@ -867,7 +867,7 @@ static int copy_ucode_to_dma_mem(struct device *dev,
 				       OTX_CPT_UCODE_ALIGNMENT,
 				       &ucode->dma, GFP_KERNEL);
 	if (!ucode->va) {
-		dev_err(dev, "Unable to allocate space for microcode");
+		dev_err(dev, "Unable to allocate space for microcode\n");
 		return -ENOMEM;
 	}
 	ucode->align_va = PTR_ALIGN(ucode->va, OTX_CPT_UCODE_ALIGNMENT);
@@ -905,15 +905,15 @@ static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode,
 	ucode->size = ntohl(ucode_hdr->code_length) * 2;
 	if (!ucode->size || (fw->size < round_up(ucode->size, 16)
 	    + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) {
-		dev_err(dev, "Ucode %s invalid size", ucode_filename);
+		dev_err(dev, "Ucode %s invalid size\n", ucode_filename);
 		ret = -EINVAL;
 		goto release_fw;
 	}
 
 	ret = get_ucode_type(ucode_hdr, &ucode->type);
 	if (ret) {
-		dev_err(dev, "Microcode %s unknown type 0x%x", ucode->filename,
-			ucode->type);
+		dev_err(dev, "Microcode %s unknown type 0x%x\n",
+			ucode->filename, ucode->type);
 		goto release_fw;
 	}
 
@@ -1083,7 +1083,7 @@ static int eng_grp_update_masks(struct device *dev,
 			break;
 
 		default:
-			dev_err(dev, "Invalid engine type %d", engs->type);
+			dev_err(dev, "Invalid engine type %d\n", engs->type);
 			return -EINVAL;
 		}
 
@@ -1142,13 +1142,14 @@ static int delete_engine_group(struct device *dev,
 		return -EINVAL;
 
 	if (eng_grp->mirror.ref_count) {
-		dev_err(dev, "Can't delete engine_group%d as it is used by:",
+		dev_err(dev, "Can't delete engine_group%d as it is used by engine_group(s):",
 			eng_grp->idx);
 		for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) {
 			if (eng_grp->g->grp[i].mirror.is_ena &&
 			    eng_grp->g->grp[i].mirror.idx == eng_grp->idx)
-				dev_err(dev, "engine_group%d", i);
+				pr_cont(" %d", i);
 		}
+		pr_cont("\n");
 		return -EINVAL;
 	}
 
@@ -1182,7 +1183,7 @@ static int validate_1_ucode_scenario(struct device *dev,
 		if (!otx_cpt_uc_supports_eng_type(&eng_grp->ucode[0],
 						  engs[i].type)) {
 			dev_err(dev,
-				"Microcode %s does not support %s engines",
+				"Microcode %s does not support %s engines\n",
 				eng_grp->ucode[0].filename,
 				get_eng_type_str(engs[i].type));
 			return -EINVAL;
@@ -1220,7 +1221,7 @@ static int create_engine_group(struct device *dev,
 	/* Validate if requested engine types are supported by this device */
 	for (i = 0; i < engs_cnt; i++)
 		if (!dev_supports_eng_type(eng_grps, engs[i].type)) {
-			dev_err(dev, "Device does not support %s engines",
+			dev_err(dev, "Device does not support %s engines\n",
 				get_eng_type_str(engs[i].type));
 			return -EPERM;
 		}
@@ -1228,7 +1229,7 @@ static int create_engine_group(struct device *dev,
 	/* Find engine group which is not used */
 	eng_grp = find_unused_eng_grp(eng_grps);
 	if (!eng_grp) {
-		dev_err(dev, "Error all engine groups are being used");
+		dev_err(dev, "Error all engine groups are being used\n");
 		return -ENOSPC;
 	}
 
@@ -1298,11 +1299,11 @@ static int create_engine_group(struct device *dev,
 	eng_grp->is_enabled = true;
 	if (eng_grp->mirror.is_ena)
 		dev_info(dev,
-			 "Engine_group%d: reuse microcode %s from group %d",
+			 "Engine_group%d: reuse microcode %s from group %d\n",
 			 eng_grp->idx, mirrored_eng_grp->ucode[0].ver_str,
 			 mirrored_eng_grp->idx);
 	else
-		dev_info(dev, "Engine_group%d: microcode loaded %s",
+		dev_info(dev, "Engine_group%d: microcode loaded %s\n",
 			 eng_grp->idx, eng_grp->ucode[0].ver_str);
 
 	return 0;
@@ -1412,14 +1413,14 @@ static ssize_t ucode_load_store(struct device *dev,
 	} else {
 		if (del_grp_idx < 0 ||
 		    del_grp_idx >= OTX_CPT_MAX_ENGINE_GROUPS) {
-			dev_err(dev, "Invalid engine group index %d",
+			dev_err(dev, "Invalid engine group index %d\n",
 				del_grp_idx);
 			ret = -EINVAL;
 			return ret;
 		}
 
 		if (!eng_grps->grp[del_grp_idx].is_enabled) {
-			dev_err(dev, "Error engine_group%d is not configured",
+			dev_err(dev, "Error engine_group%d is not configured\n",
 				del_grp_idx);
 			ret = -EINVAL;
 			return ret;
@@ -1568,7 +1569,7 @@ void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt)
 		udelay(CSR_DELAY);
 		reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY);
 		if (timeout--) {
-			dev_warn(&cpt->pdev->dev, "Cores still busy");
+			dev_warn(&cpt->pdev->dev, "Cores still busy\n");
 			break;
 		}
 	}
@@ -1626,7 +1627,7 @@ int otx_cpt_init_eng_grps(struct pci_dev *pdev,
 			     eng_grps->avail.max_ae_cnt;
 	if (eng_grps->engs_num > OTX_CPT_MAX_ENGINES) {
 		dev_err(&pdev->dev,
-			"Number of engines %d > than max supported %d",
+			"Number of engines %d > than max supported %d\n",
 			eng_grps->engs_num, OTX_CPT_MAX_ENGINES);
 		ret = -EINVAL;
 		goto err;
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
index 06202bcffb33..60e744f680d3 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
@@ -1660,7 +1660,7 @@ int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod,
 	case OTX_CPT_SE_TYPES:
 		count = atomic_read(&se_devices.count);
 		if (count >= CPT_MAX_VF_NUM) {
-			dev_err(&pdev->dev, "No space to add a new device");
+			dev_err(&pdev->dev, "No space to add a new device\n");
 			ret = -ENOSPC;
 			goto err;
 		}
@@ -1687,7 +1687,7 @@ int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod,
 	case OTX_CPT_AE_TYPES:
 		count = atomic_read(&ae_devices.count);
 		if (count >= CPT_MAX_VF_NUM) {
-			dev_err(&pdev->dev, "No space to a add new device");
+			dev_err(&pdev->dev, "No space to a add new device\n");
 			ret = -ENOSPC;
 			goto err;
 		}
@@ -1728,7 +1728,7 @@ void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod,
 		}
 
 	if (!dev_found) {
-		dev_err(&pdev->dev, "%s device not found", __func__);
+		dev_err(&pdev->dev, "%s device not found\n", __func__);
 		goto exit;
 	}
 
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
index a91860b5dc77..ce3168327a39 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
@@ -584,7 +584,7 @@ static irqreturn_t cptvf_done_intr_handler(int __always_unused irq,
 		cptvf_write_vq_done_ack(cptvf, intr);
 		wqe = get_cptvf_vq_wqe(cptvf, 0);
 		if (unlikely(!wqe)) {
-			dev_err(&pdev->dev, "No work to schedule for VF (%d)",
+			dev_err(&pdev->dev, "No work to schedule for VF (%d)\n",
 				cptvf->vfid);
 			return IRQ_NONE;
 		}
@@ -602,7 +602,7 @@ static void cptvf_set_irq_affinity(struct otx_cptvf *cptvf, int vec)
 	if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec],
 				GFP_KERNEL)) {
 		dev_err(&pdev->dev,
-			"Allocation failed for affinity_mask for VF %d",
+			"Allocation failed for affinity_mask for VF %d\n",
 			cptvf->vfid);
 		return;
 	}
@@ -691,7 +691,7 @@ static ssize_t vf_engine_group_store(struct device *dev,
 		return -EINVAL;
 
 	if (val >= OTX_CPT_MAX_ENGINE_GROUPS) {
-		dev_err(dev, "Engine group >= than max available groups %d",
+		dev_err(dev, "Engine group >= than max available groups %d\n",
 			OTX_CPT_MAX_ENGINE_GROUPS);
 		return -EINVAL;
 	}
@@ -837,7 +837,7 @@ static int otx_cptvf_probe(struct pci_dev *pdev,
 			  cptvf_misc_intr_handler, 0, "CPT VF misc intr",
 			  cptvf);
 	if (err) {
-		dev_err(dev, "Failed to request misc irq");
+		dev_err(dev, "Failed to request misc irq\n");
 		goto free_vectors;
 	}
 
@@ -854,7 +854,7 @@ static int otx_cptvf_probe(struct pci_dev *pdev,
 	cptvf->cqinfo.qchunksize = OTX_CPT_CMD_QCHUNK_SIZE;
 	err = cptvf_sw_init(cptvf, OTX_CPT_CMD_QLEN, OTX_CPT_NUM_QS_PER_VF);
 	if (err) {
-		dev_err(dev, "cptvf_sw_init() failed");
+		dev_err(dev, "cptvf_sw_init() failed\n");
 		goto free_misc_irq;
 	}
 	/* Convey VQ LEN to PF */
@@ -946,7 +946,7 @@ static void otx_cptvf_remove(struct pci_dev *pdev)
 
 	/* Convey DOWN to PF */
 	if (otx_cptvf_send_vf_down(cptvf)) {
-		dev_err(&pdev->dev, "PF not responding to DOWN msg");
+		dev_err(&pdev->dev, "PF not responding to DOWN msg\n");
 	} else {
 		sysfs_remove_group(&pdev->dev.kobj, &otx_cptvf_sysfs_group);
 		otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype);
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c
index df839b880354..239195cccf93 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c
@@ -314,7 +314,7 @@ static int process_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
 							      GFP_ATOMIC;
 	ret = setup_sgio_list(pdev, &info, req, gfp);
 	if (unlikely(ret)) {
-		dev_err(&pdev->dev, "Setting up SG list failed");
+		dev_err(&pdev->dev, "Setting up SG list failed\n");
 		goto request_cleanup;
 	}
 	cpt_req->dlen = info->dlen;
@@ -410,17 +410,17 @@ int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
 	struct otx_cptvf *cptvf = pci_get_drvdata(pdev);
 
 	if (!otx_cpt_device_ready(cptvf)) {
-		dev_err(&pdev->dev, "CPT Device is not ready");
+		dev_err(&pdev->dev, "CPT Device is not ready\n");
 		return -ENODEV;
 	}
 
 	if ((cptvf->vftype == OTX_CPT_SE_TYPES) && (!req->ctrl.s.se_req)) {
-		dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request",
+		dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request\n",
 			cptvf->vfid);
 		return -EINVAL;
 	} else if ((cptvf->vftype == OTX_CPT_AE_TYPES) &&
 		   (req->ctrl.s.se_req)) {
-		dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request",
+		dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request\n",
 			cptvf->vfid);
 		return -EINVAL;
 	}
@@ -461,7 +461,7 @@ static int cpt_process_ccode(struct pci_dev *pdev,
 		/* check for timeout */
 		if (time_after_eq(jiffies, cpt_info->time_in +
 				  OTX_CPT_COMMAND_TIMEOUT * HZ))
-			dev_warn(&pdev->dev, "Request timed out 0x%p", req);
+			dev_warn(&pdev->dev, "Request timed out 0x%p\n", req);
 		else if (cpt_info->extra_time < OTX_CPT_TIME_IN_RESET_COUNT) {
 			cpt_info->time_in = jiffies;
 			cpt_info->extra_time++;
diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c
index bd6309e57ab8..da3f0b8814aa 100644
--- a/drivers/crypto/mediatek/mtk-sha.c
+++ b/drivers/crypto/mediatek/mtk-sha.c
@@ -805,12 +805,9 @@ static int mtk_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 	size_t ds = crypto_shash_digestsize(bctx->shash);
 	int err, i;
 
-	SHASH_DESC_ON_STACK(shash, bctx->shash);
-
-	shash->tfm = bctx->shash;
-
 	if (keylen > bs) {
-		err = crypto_shash_digest(shash, key, keylen, bctx->ipad);
+		err = crypto_shash_tfm_digest(bctx->shash, key, keylen,
+					      bctx->ipad);
 		if (err)
 			return err;
 		keylen = ds;
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index f5c468f2cc82..6a828bbecea4 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -462,7 +462,6 @@ static int n2_hmac_async_setkey(struct crypto_ahash *tfm, const u8 *key,
 	struct n2_hmac_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct crypto_shash *child_shash = ctx->child_shash;
 	struct crypto_ahash *fallback_tfm;
-	SHASH_DESC_ON_STACK(shash, child_shash);
 	int err, bs, ds;
 
 	fallback_tfm = ctx->base.fallback_tfm;
@@ -470,14 +469,12 @@ static int n2_hmac_async_setkey(struct crypto_ahash *tfm, const u8 *key,
 	if (err)
 		return err;
 
-	shash->tfm = child_shash;
-
 	bs = crypto_shash_blocksize(child_shash);
 	ds = crypto_shash_digestsize(child_shash);
 	BUG_ON(ds > N2_HASH_KEY_MAX);
 	if (keylen > bs) {
-		err = crypto_shash_digest(shash, key, keylen,
-					  ctx->hash_key);
+		err = crypto_shash_tfm_digest(child_shash, key, keylen,
+					      ctx->hash_key);
 		if (err)
 			return err;
 		keylen = ds;
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index e4072cd38585..063ad5d03f33 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -33,7 +33,6 @@
 #include <linux/of_irq.h>
 #include <linux/delay.h>
 #include <linux/crypto.h>
-#include <linux/cryptohash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/algapi.h>
 #include <crypto/sha.h>
@@ -1245,16 +1244,6 @@ static int omap_sham_update(struct ahash_request *req)
 	return omap_sham_enqueue(req, OP_UPDATE);
 }
 
-static int omap_sham_shash_digest(struct crypto_shash *tfm, u32 flags,
-				  const u8 *data, unsigned int len, u8 *out)
-{
-	SHASH_DESC_ON_STACK(shash, tfm);
-
-	shash->tfm = tfm;
-
-	return crypto_shash_digest(shash, data, len, out);
-}
-
 static int omap_sham_final_shash(struct ahash_request *req)
 {
 	struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
@@ -1270,9 +1259,8 @@ static int omap_sham_final_shash(struct ahash_request *req)
 	    !test_bit(FLAGS_AUTO_XOR, &ctx->dd->flags))
 		offset = get_block_size(ctx);
 
-	return omap_sham_shash_digest(tctx->fallback, req->base.flags,
-				      ctx->buffer + offset,
-				      ctx->bufcnt - offset, req->result);
+	return crypto_shash_tfm_digest(tctx->fallback, ctx->buffer + offset,
+				       ctx->bufcnt - offset, req->result);
 }
 
 static int omap_sham_final(struct ahash_request *req)
@@ -1351,9 +1339,8 @@ static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key,
 		return err;
 
 	if (keylen > bs) {
-		err = omap_sham_shash_digest(bctx->shash,
-				crypto_shash_get_flags(bctx->shash),
-				key, keylen, bctx->ipad);
+		err = crypto_shash_tfm_digest(bctx->shash, key, keylen,
+					      bctx->ipad);
 		if (err)
 			return err;
 		keylen = ds;
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 2a16800d2579..341433fbcc4a 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -1521,37 +1521,6 @@ static int s5p_hash_update(struct ahash_request *req)
 }
 
 /**
- * s5p_hash_shash_digest() - calculate shash digest
- * @tfm:	crypto transformation
- * @flags:	tfm flags
- * @data:	input data
- * @len:	length of data
- * @out:	output buffer
- */
-static int s5p_hash_shash_digest(struct crypto_shash *tfm, u32 flags,
-				 const u8 *data, unsigned int len, u8 *out)
-{
-	SHASH_DESC_ON_STACK(shash, tfm);
-
-	shash->tfm = tfm;
-
-	return crypto_shash_digest(shash, data, len, out);
-}
-
-/**
- * s5p_hash_final_shash() - calculate shash digest
- * @req:	AHASH request
- */
-static int s5p_hash_final_shash(struct ahash_request *req)
-{
-	struct s5p_hash_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
-	struct s5p_hash_reqctx *ctx = ahash_request_ctx(req);
-
-	return s5p_hash_shash_digest(tctx->fallback, req->base.flags,
-				     ctx->buffer, ctx->bufcnt, req->result);
-}
-
-/**
  * s5p_hash_final() - close up hash and calculate digest
  * @req:	AHASH request
  *
@@ -1582,8 +1551,12 @@ static int s5p_hash_final(struct ahash_request *req)
 	if (ctx->error)
 		return -EINVAL; /* uncompleted hash is not needed */
 
-	if (!ctx->digcnt && ctx->bufcnt < BUFLEN)
-		return s5p_hash_final_shash(req);
+	if (!ctx->digcnt && ctx->bufcnt < BUFLEN) {
+		struct s5p_hash_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+
+		return crypto_shash_tfm_digest(tctx->fallback, ctx->buffer,
+					       ctx->bufcnt, req->result);
+	}
 
 	return s5p_hash_enqueue(req, false); /* HASH_OP_FINAL */
 }
diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index 8e92e4ac79f1..3ba41148c2a4 100644
--- a/drivers/crypto/stm32/stm32-crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
@@ -28,18 +28,23 @@
 
 /* Registers values */
 #define CRC_CR_RESET            BIT(0)
-#define CRC_CR_REVERSE          (BIT(7) | BIT(6) | BIT(5))
-#define CRC_INIT_DEFAULT        0xFFFFFFFF
+#define CRC_CR_REV_IN_WORD      (BIT(6) | BIT(5))
+#define CRC_CR_REV_IN_BYTE      BIT(5)
+#define CRC_CR_REV_OUT          BIT(7)
+#define CRC32C_INIT_DEFAULT     0xFFFFFFFF
 
 #define CRC_AUTOSUSPEND_DELAY	50
 
+static unsigned int burst_size;
+module_param(burst_size, uint, 0644);
+MODULE_PARM_DESC(burst_size, "Select burst byte size (0 unlimited)");
+
 struct stm32_crc {
 	struct list_head list;
 	struct device    *dev;
 	void __iomem     *regs;
 	struct clk       *clk;
-	u8               pending_data[sizeof(u32)];
-	size_t           nb_pending_bytes;
+	spinlock_t       lock;
 };
 
 struct stm32_crc_list {
@@ -59,14 +64,13 @@ struct stm32_crc_ctx {
 
 struct stm32_crc_desc_ctx {
 	u32    partial; /* crc32c: partial in first 4 bytes of that struct */
-	struct stm32_crc *crc;
 };
 
 static int stm32_crc32_cra_init(struct crypto_tfm *tfm)
 {
 	struct stm32_crc_ctx *mctx = crypto_tfm_ctx(tfm);
 
-	mctx->key = CRC_INIT_DEFAULT;
+	mctx->key = 0;
 	mctx->poly = CRC32_POLY_LE;
 	return 0;
 }
@@ -75,7 +79,7 @@ static int stm32_crc32c_cra_init(struct crypto_tfm *tfm)
 {
 	struct stm32_crc_ctx *mctx = crypto_tfm_ctx(tfm);
 
-	mctx->key = CRC_INIT_DEFAULT;
+	mctx->key = CRC32C_INIT_DEFAULT;
 	mctx->poly = CRC32C_POLY_LE;
 	return 0;
 }
@@ -92,87 +96,135 @@ static int stm32_crc_setkey(struct crypto_shash *tfm, const u8 *key,
 	return 0;
 }
 
+static struct stm32_crc *stm32_crc_get_next_crc(void)
+{
+	struct stm32_crc *crc;
+
+	spin_lock_bh(&crc_list.lock);
+	crc = list_first_entry(&crc_list.dev_list, struct stm32_crc, list);
+	if (crc)
+		list_move_tail(&crc->list, &crc_list.dev_list);
+	spin_unlock_bh(&crc_list.lock);
+
+	return crc;
+}
+
 static int stm32_crc_init(struct shash_desc *desc)
 {
 	struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
 	struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
 	struct stm32_crc *crc;
+	unsigned long flags;
 
-	spin_lock_bh(&crc_list.lock);
-	list_for_each_entry(crc, &crc_list.dev_list, list) {
-		ctx->crc = crc;
-		break;
-	}
-	spin_unlock_bh(&crc_list.lock);
+	crc = stm32_crc_get_next_crc();
+	if (!crc)
+		return -ENODEV;
+
+	pm_runtime_get_sync(crc->dev);
 
-	pm_runtime_get_sync(ctx->crc->dev);
+	spin_lock_irqsave(&crc->lock, flags);
 
 	/* Reset, set key, poly and configure in bit reverse mode */
-	writel_relaxed(bitrev32(mctx->key), ctx->crc->regs + CRC_INIT);
-	writel_relaxed(bitrev32(mctx->poly), ctx->crc->regs + CRC_POL);
-	writel_relaxed(CRC_CR_RESET | CRC_CR_REVERSE, ctx->crc->regs + CRC_CR);
+	writel_relaxed(bitrev32(mctx->key), crc->regs + CRC_INIT);
+	writel_relaxed(bitrev32(mctx->poly), crc->regs + CRC_POL);
+	writel_relaxed(CRC_CR_RESET | CRC_CR_REV_IN_WORD | CRC_CR_REV_OUT,
+		       crc->regs + CRC_CR);
 
 	/* Store partial result */
-	ctx->partial = readl_relaxed(ctx->crc->regs + CRC_DR);
-	ctx->crc->nb_pending_bytes = 0;
+	ctx->partial = readl_relaxed(crc->regs + CRC_DR);
 
-	pm_runtime_mark_last_busy(ctx->crc->dev);
-	pm_runtime_put_autosuspend(ctx->crc->dev);
+	spin_unlock_irqrestore(&crc->lock, flags);
+
+	pm_runtime_mark_last_busy(crc->dev);
+	pm_runtime_put_autosuspend(crc->dev);
 
 	return 0;
 }
 
-static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
-			    unsigned int length)
+static int burst_update(struct shash_desc *desc, const u8 *d8,
+			size_t length)
 {
 	struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
-	struct stm32_crc *crc = ctx->crc;
-	u32 *d32;
-	unsigned int i;
+	struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct stm32_crc *crc;
+	unsigned long flags;
+
+	crc = stm32_crc_get_next_crc();
+	if (!crc)
+		return -ENODEV;
 
 	pm_runtime_get_sync(crc->dev);
 
-	if (unlikely(crc->nb_pending_bytes)) {
-		while (crc->nb_pending_bytes != sizeof(u32) && length) {
-			/* Fill in pending data */
-			crc->pending_data[crc->nb_pending_bytes++] = *(d8++);
+	spin_lock_irqsave(&crc->lock, flags);
+
+	/*
+	 * Restore previously calculated CRC for this context as init value
+	 * Restore polynomial configuration
+	 * Configure in register for word input data,
+	 * Configure out register in reversed bit mode data.
+	 */
+	writel_relaxed(bitrev32(ctx->partial), crc->regs + CRC_INIT);
+	writel_relaxed(bitrev32(mctx->poly), crc->regs + CRC_POL);
+	writel_relaxed(CRC_CR_RESET | CRC_CR_REV_IN_WORD | CRC_CR_REV_OUT,
+		       crc->regs + CRC_CR);
+
+	if (d8 != PTR_ALIGN(d8, sizeof(u32))) {
+		/* Configure for byte data */
+		writel_relaxed(CRC_CR_REV_IN_BYTE | CRC_CR_REV_OUT,
+			       crc->regs + CRC_CR);
+		while (d8 != PTR_ALIGN(d8, sizeof(u32)) && length) {
+			writeb_relaxed(*d8++, crc->regs + CRC_DR);
 			length--;
 		}
-
-		if (crc->nb_pending_bytes == sizeof(u32)) {
-			/* Process completed pending data */
-			writel_relaxed(*(u32 *)crc->pending_data,
-				       crc->regs + CRC_DR);
-			crc->nb_pending_bytes = 0;
-		}
+		/* Configure for word data */
+		writel_relaxed(CRC_CR_REV_IN_WORD | CRC_CR_REV_OUT,
+			       crc->regs + CRC_CR);
 	}
 
-	d32 = (u32 *)d8;
-	for (i = 0; i < length >> 2; i++)
-		/* Process 32 bits data */
-		writel_relaxed(*(d32++), crc->regs + CRC_DR);
+	for (; length >= sizeof(u32); d8 += sizeof(u32), length -= sizeof(u32))
+		writel_relaxed(*((u32 *)d8), crc->regs + CRC_DR);
+
+	if (length) {
+		/* Configure for byte data */
+		writel_relaxed(CRC_CR_REV_IN_BYTE | CRC_CR_REV_OUT,
+			       crc->regs + CRC_CR);
+		while (length--)
+			writeb_relaxed(*d8++, crc->regs + CRC_DR);
+	}
 
 	/* Store partial result */
 	ctx->partial = readl_relaxed(crc->regs + CRC_DR);
 
+	spin_unlock_irqrestore(&crc->lock, flags);
+
 	pm_runtime_mark_last_busy(crc->dev);
 	pm_runtime_put_autosuspend(crc->dev);
 
-	/* Check for pending data (non 32 bits) */
-	length &= 3;
-	if (likely(!length))
-		return 0;
+	return 0;
+}
 
-	if ((crc->nb_pending_bytes + length) >= sizeof(u32)) {
-		/* Shall not happen */
-		dev_err(crc->dev, "Pending data overflow\n");
-		return -EINVAL;
-	}
+static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
+			    unsigned int length)
+{
+	const unsigned int burst_sz = burst_size;
+	unsigned int rem_sz;
+	const u8 *cur;
+	size_t size;
+	int ret;
 
-	d8 = (const u8 *)d32;
-	for (i = 0; i < length; i++)
-		/* Store pending data */
-		crc->pending_data[crc->nb_pending_bytes++] = *(d8++);
+	if (!burst_sz)
+		return burst_update(desc, d8, length);
+
+	/* Digest first bytes not 32bit aligned at first pass in the loop */
+	size = min(length,
+		   burst_sz + (unsigned int)d8 - ALIGN_DOWN((unsigned int)d8,
+							    sizeof(u32)));
+	for (rem_sz = length, cur = d8; rem_sz;
+	     rem_sz -= size, cur += size, size = min(rem_sz, burst_sz)) {
+		ret = burst_update(desc, cur, size);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
@@ -202,6 +254,8 @@ static int stm32_crc_digest(struct shash_desc *desc, const u8 *data,
 	return stm32_crc_init(desc) ?: stm32_crc_finup(desc, data, length, out);
 }
 
+static unsigned int refcnt;
+static DEFINE_MUTEX(refcnt_lock);
 static struct shash_alg algs[] = {
 	/* CRC-32 */
 	{
@@ -284,20 +338,29 @@ static int stm32_crc_probe(struct platform_device *pdev)
 
 	pm_runtime_get_noresume(dev);
 	pm_runtime_set_active(dev);
+	pm_runtime_irq_safe(dev);
 	pm_runtime_enable(dev);
 
+	spin_lock_init(&crc->lock);
+
 	platform_set_drvdata(pdev, crc);
 
 	spin_lock(&crc_list.lock);
 	list_add(&crc->list, &crc_list.dev_list);
 	spin_unlock(&crc_list.lock);
 
-	ret = crypto_register_shashes(algs, ARRAY_SIZE(algs));
-	if (ret) {
-		dev_err(dev, "Failed to register\n");
-		clk_disable_unprepare(crc->clk);
-		return ret;
+	mutex_lock(&refcnt_lock);
+	if (!refcnt) {
+		ret = crypto_register_shashes(algs, ARRAY_SIZE(algs));
+		if (ret) {
+			mutex_unlock(&refcnt_lock);
+			dev_err(dev, "Failed to register\n");
+			clk_disable_unprepare(crc->clk);
+			return ret;
+		}
 	}
+	refcnt++;
+	mutex_unlock(&refcnt_lock);
 
 	dev_info(dev, "Initialized\n");
 
@@ -318,7 +381,10 @@ static int stm32_crc_remove(struct platform_device *pdev)
 	list_del(&crc->list);
 	spin_unlock(&crc_list.lock);
 
-	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+	mutex_lock(&refcnt_lock);
+	if (!--refcnt)
+		crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+	mutex_unlock(&refcnt_lock);
 
 	pm_runtime_disable(crc->dev);
 	pm_runtime_put_noidle(crc->dev);
@@ -328,34 +394,60 @@ static int stm32_crc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int stm32_crc_runtime_suspend(struct device *dev)
+static int __maybe_unused stm32_crc_suspend(struct device *dev)
 {
 	struct stm32_crc *crc = dev_get_drvdata(dev);
+	int ret;
 
-	clk_disable_unprepare(crc->clk);
+	ret = pm_runtime_force_suspend(dev);
+	if (ret)
+		return ret;
+
+	clk_unprepare(crc->clk);
 
 	return 0;
 }
 
-static int stm32_crc_runtime_resume(struct device *dev)
+static int __maybe_unused stm32_crc_resume(struct device *dev)
 {
 	struct stm32_crc *crc = dev_get_drvdata(dev);
 	int ret;
 
-	ret = clk_prepare_enable(crc->clk);
+	ret = clk_prepare(crc->clk);
 	if (ret) {
-		dev_err(crc->dev, "Failed to prepare_enable clock\n");
+		dev_err(crc->dev, "Failed to prepare clock\n");
+		return ret;
+	}
+
+	return pm_runtime_force_resume(dev);
+}
+
+static int __maybe_unused stm32_crc_runtime_suspend(struct device *dev)
+{
+	struct stm32_crc *crc = dev_get_drvdata(dev);
+
+	clk_disable(crc->clk);
+
+	return 0;
+}
+
+static int __maybe_unused stm32_crc_runtime_resume(struct device *dev)
+{
+	struct stm32_crc *crc = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_enable(crc->clk);
+	if (ret) {
+		dev_err(crc->dev, "Failed to enable clock\n");
 		return ret;
 	}
 
 	return 0;
 }
-#endif
 
 static const struct dev_pm_ops stm32_crc_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
-				pm_runtime_force_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(stm32_crc_suspend,
+				stm32_crc_resume)
 	SET_RUNTIME_PM_OPS(stm32_crc_runtime_suspend,
 			   stm32_crc_runtime_resume, NULL)
 };
diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 167b80eec437..03c5e6683805 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -507,6 +507,7 @@ static int stm32_hash_hmac_dma_send(struct stm32_hash_dev *hdev)
 static int stm32_hash_dma_init(struct stm32_hash_dev *hdev)
 {
 	struct dma_slave_config dma_conf;
+	struct dma_chan *chan;
 	int err;
 
 	memset(&dma_conf, 0, sizeof(dma_conf));
@@ -518,11 +519,11 @@ static int stm32_hash_dma_init(struct stm32_hash_dev *hdev)
 	dma_conf.dst_maxburst = hdev->dma_maxburst;
 	dma_conf.device_fc = false;
 
-	hdev->dma_lch = dma_request_chan(hdev->dev, "in");
-	if (IS_ERR(hdev->dma_lch)) {
-		dev_err(hdev->dev, "Couldn't acquire a slave DMA channel.\n");
-		return PTR_ERR(hdev->dma_lch);
-	}
+	chan = dma_request_chan(hdev->dev, "in");
+	if (IS_ERR(chan))
+		return PTR_ERR(chan);
+
+	hdev->dma_lch = chan;
 
 	err = dmaengine_slave_config(hdev->dma_lch, &dma_conf);
 	if (err) {
@@ -1463,8 +1464,11 @@ static int stm32_hash_probe(struct platform_device *pdev)
 
 	hdev->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(hdev->clk)) {
-		dev_err(dev, "failed to get clock for hash (%lu)\n",
-			PTR_ERR(hdev->clk));
+		if (PTR_ERR(hdev->clk) != -EPROBE_DEFER) {
+			dev_err(dev, "failed to get clock for hash (%lu)\n",
+				PTR_ERR(hdev->clk));
+		}
+
 		return PTR_ERR(hdev->clk);
 	}
 
@@ -1482,7 +1486,12 @@ static int stm32_hash_probe(struct platform_device *pdev)
 	pm_runtime_enable(dev);
 
 	hdev->rst = devm_reset_control_get(&pdev->dev, NULL);
-	if (!IS_ERR(hdev->rst)) {
+	if (IS_ERR(hdev->rst)) {
+		if (PTR_ERR(hdev->rst) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto err_reset;
+		}
+	} else {
 		reset_control_assert(hdev->rst);
 		udelay(2);
 		reset_control_deassert(hdev->rst);
@@ -1493,8 +1502,15 @@ static int stm32_hash_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, hdev);
 
 	ret = stm32_hash_dma_init(hdev);
-	if (ret)
+	switch (ret) {
+	case 0:
+		break;
+	case -ENOENT:
 		dev_dbg(dev, "DMA mode not available\n");
+		break;
+	default:
+		goto err_dma;
+	}
 
 	spin_lock(&stm32_hash.lock);
 	list_add_tail(&hdev->list, &stm32_hash.dev_list);
@@ -1532,10 +1548,10 @@ err_engine:
 	spin_lock(&stm32_hash.lock);
 	list_del(&hdev->list);
 	spin_unlock(&stm32_hash.lock);
-
+err_dma:
 	if (hdev->dma_lch)
 		dma_release_channel(hdev->dma_lch);
-
+err_reset:
 	pm_runtime_disable(dev);
 	pm_runtime_put_noidle(dev);
 
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 1af823b2fe6b..4c0af2eb7e19 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -377,6 +377,7 @@ static int dax_open(struct inode *inode, struct file *filp)
 	inode->i_mapping->a_ops = &dev_dax_aops;
 	filp->f_mapping = inode->i_mapping;
 	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
+	filp->f_sb_err = file_sample_sb_err(filp);
 	filp->private_data = dev_dax;
 	inode->i_flags = S_DAX;
 
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 0b1df12e0f21..37dc40d1fcfb 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -91,6 +91,14 @@ config ARM_EXYNOS_BUS_DEVFREQ
 	  and adjusts the operating frequencies and voltages with OPP support.
 	  This does not yet operate with optimal voltages.
 
+config ARM_IMX_BUS_DEVFREQ
+	tristate "i.MX Generic Bus DEVFREQ Driver"
+	depends on ARCH_MXC || COMPILE_TEST
+	select DEVFREQ_GOV_USERSPACE
+	help
+	  This adds the generic DEVFREQ driver for i.MX interconnects. It
+	  allows adjusting NIC/NOC frequency.
+
 config ARM_IMX8M_DDRC_DEVFREQ
 	tristate "i.MX8M DDRC DEVFREQ Driver"
 	depends on (ARCH_MXC && HAVE_ARM_SMCCC) || \
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 3eb4d5e6635c..3ca1ad0ecb97 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_DEVFREQ_GOV_PASSIVE)	+= governor_passive.o
 
 # DEVFREQ Drivers
 obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ)	+= exynos-bus.o
+obj-$(CONFIG_ARM_IMX_BUS_DEVFREQ)	+= imx-bus.o
 obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ)	+= imx8m-ddrc.o
 obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ)	+= rk3399_dmc.o
 obj-$(CONFIG_ARM_TEGRA_DEVFREQ)		+= tegra30-devfreq.o
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 6fecd11dafdd..52b9c3e141f3 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -60,12 +60,12 @@ static struct devfreq *find_device_devfreq(struct device *dev)
 {
 	struct devfreq *tmp_devfreq;
 
+	lockdep_assert_held(&devfreq_list_lock);
+
 	if (IS_ERR_OR_NULL(dev)) {
 		pr_err("DEVFREQ: %s: Invalid parameters\n", __func__);
 		return ERR_PTR(-EINVAL);
 	}
-	WARN(!mutex_is_locked(&devfreq_list_lock),
-	     "devfreq_list_lock must be locked.");
 
 	list_for_each_entry(tmp_devfreq, &devfreq_list, node) {
 		if (tmp_devfreq->dev.parent == dev)
@@ -258,12 +258,12 @@ static struct devfreq_governor *find_devfreq_governor(const char *name)
 {
 	struct devfreq_governor *tmp_governor;
 
+	lockdep_assert_held(&devfreq_list_lock);
+
 	if (IS_ERR_OR_NULL(name)) {
 		pr_err("DEVFREQ: %s: Invalid parameters\n", __func__);
 		return ERR_PTR(-EINVAL);
 	}
-	WARN(!mutex_is_locked(&devfreq_list_lock),
-	     "devfreq_list_lock must be locked.");
 
 	list_for_each_entry(tmp_governor, &devfreq_governor_list, node) {
 		if (!strncmp(tmp_governor->name, name, DEVFREQ_NAME_LEN))
@@ -289,12 +289,12 @@ static struct devfreq_governor *try_then_request_governor(const char *name)
 	struct devfreq_governor *governor;
 	int err = 0;
 
+	lockdep_assert_held(&devfreq_list_lock);
+
 	if (IS_ERR_OR_NULL(name)) {
 		pr_err("DEVFREQ: %s: Invalid parameters\n", __func__);
 		return ERR_PTR(-EINVAL);
 	}
-	WARN(!mutex_is_locked(&devfreq_list_lock),
-	     "devfreq_list_lock must be locked.");
 
 	governor = find_devfreq_governor(name);
 	if (IS_ERR(governor)) {
@@ -392,10 +392,7 @@ int update_devfreq(struct devfreq *devfreq)
 	int err = 0;
 	u32 flags = 0;
 
-	if (!mutex_is_locked(&devfreq->lock)) {
-		WARN(true, "devfreq->lock must be locked by the caller.\n");
-		return -EINVAL;
-	}
+	lockdep_assert_held(&devfreq->lock);
 
 	if (!devfreq->governor)
 		return -EINVAL;
@@ -768,7 +765,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->dev.release = devfreq_dev_release;
 	INIT_LIST_HEAD(&devfreq->node);
 	devfreq->profile = profile;
-	strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
+	strscpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
 	devfreq->previous_freq = profile->initial_freq;
 	devfreq->last_status.current_frequency = profile->initial_freq;
 	devfreq->data = data;
diff --git a/drivers/devfreq/imx-bus.c b/drivers/devfreq/imx-bus.c
new file mode 100644
index 000000000000..4f38455ad742
--- /dev/null
+++ b/drivers/devfreq/imx-bus.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 NXP
+ */
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/pm_opp.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+struct imx_bus {
+	struct devfreq_dev_profile profile;
+	struct devfreq *devfreq;
+	struct clk *clk;
+	struct platform_device *icc_pdev;
+};
+
+static int imx_bus_target(struct device *dev,
+		unsigned long *freq, u32 flags)
+{
+	struct dev_pm_opp *new_opp;
+	int ret;
+
+	new_opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(new_opp)) {
+		ret = PTR_ERR(new_opp);
+		dev_err(dev, "failed to get recommended opp: %d\n", ret);
+		return ret;
+	}
+	dev_pm_opp_put(new_opp);
+
+	return dev_pm_opp_set_rate(dev, *freq);
+}
+
+static int imx_bus_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct imx_bus *priv = dev_get_drvdata(dev);
+
+	*freq = clk_get_rate(priv->clk);
+
+	return 0;
+}
+
+static int imx_bus_get_dev_status(struct device *dev,
+		struct devfreq_dev_status *stat)
+{
+	struct imx_bus *priv = dev_get_drvdata(dev);
+
+	stat->busy_time = 0;
+	stat->total_time = 0;
+	stat->current_frequency = clk_get_rate(priv->clk);
+
+	return 0;
+}
+
+static void imx_bus_exit(struct device *dev)
+{
+	struct imx_bus *priv = dev_get_drvdata(dev);
+
+	dev_pm_opp_of_remove_table(dev);
+	platform_device_unregister(priv->icc_pdev);
+}
+
+/* imx_bus_init_icc() - register matching icc provider if required */
+static int imx_bus_init_icc(struct device *dev)
+{
+	struct imx_bus *priv = dev_get_drvdata(dev);
+	const char *icc_driver_name;
+
+	if (!of_get_property(dev->of_node, "#interconnect-cells", 0))
+		return 0;
+	if (!IS_ENABLED(CONFIG_INTERCONNECT_IMX)) {
+		dev_warn(dev, "imx interconnect drivers disabled\n");
+		return 0;
+	}
+
+	icc_driver_name = of_device_get_match_data(dev);
+	if (!icc_driver_name) {
+		dev_err(dev, "unknown interconnect driver\n");
+		return 0;
+	}
+
+	priv->icc_pdev = platform_device_register_data(
+			dev, icc_driver_name, -1, NULL, 0);
+	if (IS_ERR(priv->icc_pdev)) {
+		dev_err(dev, "failed to register icc provider %s: %ld\n",
+				icc_driver_name, PTR_ERR(priv->icc_pdev));
+		return PTR_ERR(priv->icc_pdev);
+	}
+
+	return 0;
+}
+
+static int imx_bus_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct imx_bus *priv;
+	const char *gov = DEVFREQ_GOV_USERSPACE;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	/*
+	 * Fetch the clock to adjust but don't explicitly enable.
+	 *
+	 * For imx bus clock clk_set_rate is safe no matter if the clock is on
+	 * or off and some peripheral side-buses might be off unless enabled by
+	 * drivers for devices on those specific buses.
+	 *
+	 * Rate adjustment on a disabled bus clock just takes effect later.
+	 */
+	priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		ret = PTR_ERR(priv->clk);
+		dev_err(dev, "failed to fetch clk: %d\n", ret);
+		return ret;
+	}
+	platform_set_drvdata(pdev, priv);
+
+	ret = dev_pm_opp_of_add_table(dev);
+	if (ret < 0) {
+		dev_err(dev, "failed to get OPP table\n");
+		return ret;
+	}
+
+	priv->profile.polling_ms = 1000;
+	priv->profile.target = imx_bus_target;
+	priv->profile.get_dev_status = imx_bus_get_dev_status;
+	priv->profile.exit = imx_bus_exit;
+	priv->profile.get_cur_freq = imx_bus_get_cur_freq;
+	priv->profile.initial_freq = clk_get_rate(priv->clk);
+
+	priv->devfreq = devm_devfreq_add_device(dev, &priv->profile,
+						gov, NULL);
+	if (IS_ERR(priv->devfreq)) {
+		ret = PTR_ERR(priv->devfreq);
+		dev_err(dev, "failed to add devfreq device: %d\n", ret);
+		goto err;
+	}
+
+	ret = imx_bus_init_icc(dev);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	dev_pm_opp_of_remove_table(dev);
+	return ret;
+}
+
+static const struct of_device_id imx_bus_of_match[] = {
+	{ .compatible = "fsl,imx8mq-noc", .data = "imx8mq-interconnect", },
+	{ .compatible = "fsl,imx8mm-noc", .data = "imx8mm-interconnect", },
+	{ .compatible = "fsl,imx8mn-noc", .data = "imx8mn-interconnect", },
+	{ .compatible = "fsl,imx8m-noc", },
+	{ .compatible = "fsl,imx8m-nic", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, imx_bus_of_match);
+
+static struct platform_driver imx_bus_platdrv = {
+	.probe		= imx_bus_probe,
+	.driver = {
+		.name	= "imx-bus-devfreq",
+		.of_match_table = of_match_ptr(imx_bus_of_match),
+	},
+};
+module_platform_driver(imx_bus_platdrv);
+
+MODULE_DESCRIPTION("Generic i.MX bus frequency scaling driver");
+MODULE_AUTHOR("Leonard Crestez <leonard.crestez@nxp.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
index 28b2c7ca416e..e94a27804c20 100644
--- a/drivers/devfreq/tegra30-devfreq.c
+++ b/drivers/devfreq/tegra30-devfreq.c
@@ -420,7 +420,7 @@ tegra_actmon_cpufreq_contribution(struct tegra_devfreq *tegra,
 
 	static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
 
-	if (dev_freq >= static_cpu_emc_freq)
+	if (dev_freq + actmon_dev->boost_freq >= static_cpu_emc_freq)
 		return 0;
 
 	return static_cpu_emc_freq;
@@ -807,10 +807,9 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
 	}
 
 	err = platform_get_irq(pdev, 0);
-	if (err < 0) {
-		dev_err(&pdev->dev, "Failed to get IRQ: %d\n", err);
+	if (err < 0)
 		return err;
-	}
+
 	tegra->irq = err;
 
 	irq_set_status_flags(tegra->irq, IRQ_NOAUTOEN);
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 9c190026bfab..995e05f609ff 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_UDMABUF)		+= udmabuf.o
 
 dmabuf_selftests-y := \
 	selftest.o \
-	st-dma-fence.o
+	st-dma-fence.o \
+	st-dma-fence-chain.o
 
 obj-$(CONFIG_DMABUF_SELFTESTS)	+= dmabuf_selftests.o
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 07df88f2e305..01ce125f8e8d 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -691,6 +691,8 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
 
 	attach->dev = dev;
 	attach->dmabuf = dmabuf;
+	if (importer_ops)
+		attach->peer2peer = importer_ops->allow_peer2peer;
 	attach->importer_ops = importer_ops;
 	attach->importer_priv = importer_priv;
 
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
index 44a741677d25..c435bbba851c 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -62,7 +62,8 @@ struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
 			replacement = NULL;
 		}
 
-		tmp = cmpxchg((void **)&chain->prev, (void *)prev, (void *)replacement);
+		tmp = cmpxchg((struct dma_fence __force **)&chain->prev,
+			      prev, replacement);
 		if (tmp == prev)
 			dma_fence_put(tmp);
 		else
@@ -98,6 +99,12 @@ int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
 		return -EINVAL;
 
 	dma_fence_chain_for_each(*pfence, &chain->base) {
+		if ((*pfence)->seqno < seqno) { /* already signaled */
+			dma_fence_put(*pfence);
+			*pfence = NULL;
+			break;
+		}
+
 		if ((*pfence)->context != chain->base.context ||
 		    to_dma_fence_chain(*pfence)->prev_seqno < seqno)
 			break;
@@ -221,6 +228,7 @@ EXPORT_SYMBOL(dma_fence_chain_ops);
  * @chain: the chain node to initialize
  * @prev: the previous fence
  * @fence: the current fence
+ * @seqno: the sequence number (syncpt) of the fence within the chain
  *
  * Initialize a new chain node and either start a new chain or add the node to
  * the existing chain of the previous fence.
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 052a41e2451c..90edf2b281b0 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(dma_fence_get_stub);
 u64 dma_fence_context_alloc(unsigned num)
 {
 	WARN_ON(!num);
-	return atomic64_add_return(num, &dma_fence_context_counter) - num;
+	return atomic64_fetch_add(num, &dma_fence_context_counter);
 }
 EXPORT_SYMBOL(dma_fence_context_alloc);
 
diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h
index 5320386f02e5..55918ef9adab 100644
--- a/drivers/dma-buf/selftests.h
+++ b/drivers/dma-buf/selftests.h
@@ -11,3 +11,4 @@
  */
 selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */
 selftest(dma_fence, dma_fence)
+selftest(dma_fence_chain, dma_fence_chain)
diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
new file mode 100644
index 000000000000..5d45ba7ba3cd
--- /dev/null
+++ b/drivers/dma-buf/st-dma-fence-chain.c
@@ -0,0 +1,715 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-fence-chain.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+
+#include "selftest.h"
+
+#define CHAIN_SZ (4 << 10)
+
+static struct kmem_cache *slab_fences;
+
+static inline struct mock_fence {
+	struct dma_fence base;
+	spinlock_t lock;
+} *to_mock_fence(struct dma_fence *f) {
+	return container_of(f, struct mock_fence, base);
+}
+
+static const char *mock_name(struct dma_fence *f)
+{
+	return "mock";
+}
+
+static void mock_fence_release(struct dma_fence *f)
+{
+	kmem_cache_free(slab_fences, to_mock_fence(f));
+}
+
+static const struct dma_fence_ops mock_ops = {
+	.get_driver_name = mock_name,
+	.get_timeline_name = mock_name,
+	.release = mock_fence_release,
+};
+
+static struct dma_fence *mock_fence(void)
+{
+	struct mock_fence *f;
+
+	f = kmem_cache_alloc(slab_fences, GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	spin_lock_init(&f->lock);
+	dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0);
+
+	return &f->base;
+}
+
+static inline struct mock_chain {
+	struct dma_fence_chain base;
+} *to_mock_chain(struct dma_fence *f) {
+	return container_of(f, struct mock_chain, base.base);
+}
+
+static struct dma_fence *mock_chain(struct dma_fence *prev,
+				    struct dma_fence *fence,
+				    u64 seqno)
+{
+	struct mock_chain *f;
+
+	f = kmalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	dma_fence_chain_init(&f->base,
+			     dma_fence_get(prev),
+			     dma_fence_get(fence),
+			     seqno);
+
+	return &f->base.base;
+}
+
+static int sanitycheck(void *arg)
+{
+	struct dma_fence *f, *chain;
+	int err = 0;
+
+	f = mock_fence();
+	if (!f)
+		return -ENOMEM;
+
+	chain = mock_chain(NULL, f, 1);
+	if (!chain)
+		err = -ENOMEM;
+
+	dma_fence_signal(f);
+	dma_fence_put(f);
+
+	dma_fence_put(chain);
+
+	return err;
+}
+
+struct fence_chains {
+	unsigned int chain_length;
+	struct dma_fence **fences;
+	struct dma_fence **chains;
+
+	struct dma_fence *tail;
+};
+
+static uint64_t seqno_inc(unsigned int i)
+{
+	return i + 1;
+}
+
+static int fence_chains_init(struct fence_chains *fc, unsigned int count,
+			     uint64_t (*seqno_fn)(unsigned int))
+{
+	unsigned int i;
+	int err = 0;
+
+	fc->chains = kvmalloc_array(count, sizeof(*fc->chains),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!fc->chains)
+		return -ENOMEM;
+
+	fc->fences = kvmalloc_array(count, sizeof(*fc->fences),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!fc->fences) {
+		err = -ENOMEM;
+		goto err_chains;
+	}
+
+	fc->tail = NULL;
+	for (i = 0; i < count; i++) {
+		fc->fences[i] = mock_fence();
+		if (!fc->fences[i]) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+
+		fc->chains[i] = mock_chain(fc->tail,
+					   fc->fences[i],
+					   seqno_fn(i));
+		if (!fc->chains[i]) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+
+		fc->tail = fc->chains[i];
+	}
+
+	fc->chain_length = i;
+	return 0;
+
+unwind:
+	for (i = 0; i < count; i++) {
+		dma_fence_put(fc->fences[i]);
+		dma_fence_put(fc->chains[i]);
+	}
+	kvfree(fc->fences);
+err_chains:
+	kvfree(fc->chains);
+	return err;
+}
+
+static void fence_chains_fini(struct fence_chains *fc)
+{
+	unsigned int i;
+
+	for (i = 0; i < fc->chain_length; i++) {
+		dma_fence_signal(fc->fences[i]);
+		dma_fence_put(fc->fences[i]);
+	}
+	kvfree(fc->fences);
+
+	for (i = 0; i < fc->chain_length; i++)
+		dma_fence_put(fc->chains[i]);
+	kvfree(fc->chains);
+}
+
+static int find_seqno(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc);
+	if (err)
+		return err;
+
+	fence = dma_fence_get(fc.tail);
+	err = dma_fence_chain_find_seqno(&fence, 0);
+	dma_fence_put(fence);
+	if (err) {
+		pr_err("Reported %d for find_seqno(0)!\n", err);
+		goto err;
+	}
+
+	for (i = 0; i < fc.chain_length; i++) {
+		fence = dma_fence_get(fc.tail);
+		err = dma_fence_chain_find_seqno(&fence, i + 1);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Reported %d for find_seqno(%d:%d)!\n",
+			       err, fc.chain_length + 1, i + 1);
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",
+			       fc.chain_length + 1, i + 1);
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, i + 1);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Error reported for finding self\n");
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence reported by find self\n");
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, i + 2);
+		dma_fence_put(fence);
+		if (!err) {
+			pr_err("Error not reported for future fence: find_seqno(%d:%d)!\n",
+			       i + 1, i + 2);
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, i);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Error reported for previous fence!\n");
+			goto err;
+		}
+		if (i > 0 && fence != fc.chains[i - 1]) {
+			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",
+			       i + 1, i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int find_signaled(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+
+	err = fence_chains_init(&fc, 2, seqno_inc);
+	if (err)
+		return err;
+
+	dma_fence_signal(fc.fences[0]);
+
+	fence = dma_fence_get(fc.tail);
+	err = dma_fence_chain_find_seqno(&fence, 1);
+	dma_fence_put(fence);
+	if (err) {
+		pr_err("Reported %d for find_seqno()!\n", err);
+		goto err;
+	}
+
+	if (fence && fence != fc.chains[0]) {
+		pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:1\n",
+		       fence->seqno);
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, 1);
+		dma_fence_put(fence);
+		if (err)
+			pr_err("Reported %d for finding self!\n", err);
+
+		err = -EINVAL;
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int find_out_of_order(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+
+	err = fence_chains_init(&fc, 3, seqno_inc);
+	if (err)
+		return err;
+
+	dma_fence_signal(fc.fences[1]);
+
+	fence = dma_fence_get(fc.tail);
+	err = dma_fence_chain_find_seqno(&fence, 2);
+	dma_fence_put(fence);
+	if (err) {
+		pr_err("Reported %d for find_seqno()!\n", err);
+		goto err;
+	}
+
+	if (fence && fence != fc.chains[1]) {
+		pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:2\n",
+		       fence->seqno);
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, 2);
+		dma_fence_put(fence);
+		if (err)
+			pr_err("Reported %d for finding self!\n", err);
+
+		err = -EINVAL;
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static uint64_t seqno_inc2(unsigned int i)
+{
+	return 2 * i + 2;
+}
+
+static int find_gap(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc2);
+	if (err)
+		return err;
+
+	for (i = 0; i < fc.chain_length; i++) {
+		fence = dma_fence_get(fc.tail);
+		err = dma_fence_chain_find_seqno(&fence, 2 * i + 1);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Reported %d for find_seqno(%d:%d)!\n",
+			       err, fc.chain_length + 1, 2 * i + 1);
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence.seqno:%lld reported by find_seqno(%d:%d)\n",
+			       fence->seqno,
+			       fc.chain_length + 1,
+			       2 * i + 1);
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, 2 * i + 2);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Error reported for finding self\n");
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence reported by find self\n");
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+struct find_race {
+	struct fence_chains fc;
+	atomic_t children;
+};
+
+static int __find_race(void *arg)
+{
+	struct find_race *data = arg;
+	int err = 0;
+
+	while (!kthread_should_stop()) {
+		struct dma_fence *fence = dma_fence_get(data->fc.tail);
+		int seqno;
+
+		seqno = prandom_u32_max(data->fc.chain_length) + 1;
+
+		err = dma_fence_chain_find_seqno(&fence, seqno);
+		if (err) {
+			pr_err("Failed to find fence seqno:%d\n",
+			       seqno);
+			dma_fence_put(fence);
+			break;
+		}
+		if (!fence)
+			goto signal;
+
+		err = dma_fence_chain_find_seqno(&fence, seqno);
+		if (err) {
+			pr_err("Reported an invalid fence for find-self:%d\n",
+			       seqno);
+			dma_fence_put(fence);
+			break;
+		}
+
+		if (fence->seqno < seqno) {
+			pr_err("Reported an earlier fence.seqno:%lld for seqno:%d\n",
+			       fence->seqno, seqno);
+			err = -EINVAL;
+			dma_fence_put(fence);
+			break;
+		}
+
+		dma_fence_put(fence);
+
+signal:
+		seqno = prandom_u32_max(data->fc.chain_length - 1);
+		dma_fence_signal(data->fc.fences[seqno]);
+		cond_resched();
+	}
+
+	if (atomic_dec_and_test(&data->children))
+		wake_up_var(&data->children);
+	return err;
+}
+
+static int find_race(void *arg)
+{
+	struct find_race data;
+	int ncpus = num_online_cpus();
+	struct task_struct **threads;
+	unsigned long count;
+	int err;
+	int i;
+
+	err = fence_chains_init(&data.fc, CHAIN_SZ, seqno_inc);
+	if (err)
+		return err;
+
+	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
+	if (!threads) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	atomic_set(&data.children, 0);
+	for (i = 0; i < ncpus; i++) {
+		threads[i] = kthread_run(__find_race, &data, "dmabuf/%d", i);
+		if (IS_ERR(threads[i])) {
+			ncpus = i;
+			break;
+		}
+		atomic_inc(&data.children);
+		get_task_struct(threads[i]);
+	}
+
+	wait_var_event_timeout(&data.children,
+			       !atomic_read(&data.children),
+			       5 * HZ);
+
+	for (i = 0; i < ncpus; i++) {
+		int ret;
+
+		ret = kthread_stop(threads[i]);
+		if (ret && !err)
+			err = ret;
+		put_task_struct(threads[i]);
+	}
+	kfree(threads);
+
+	count = 0;
+	for (i = 0; i < data.fc.chain_length; i++)
+		if (dma_fence_is_signaled(data.fc.fences[i]))
+			count++;
+	pr_info("Completed %lu cycles\n", count);
+
+err:
+	fence_chains_fini(&data.fc);
+	return err;
+}
+
+static int signal_forward(void *arg)
+{
+	struct fence_chains fc;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc);
+	if (err)
+		return err;
+
+	for (i = 0; i < fc.chain_length; i++) {
+		dma_fence_signal(fc.fences[i]);
+
+		if (!dma_fence_is_signaled(fc.chains[i])) {
+			pr_err("chain[%d] not signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (i + 1 < fc.chain_length &&
+		    dma_fence_is_signaled(fc.chains[i + 1])) {
+			pr_err("chain[%d] is signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int signal_backward(void *arg)
+{
+	struct fence_chains fc;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc);
+	if (err)
+		return err;
+
+	for (i = fc.chain_length; i--; ) {
+		dma_fence_signal(fc.fences[i]);
+
+		if (i > 0 && dma_fence_is_signaled(fc.chains[i])) {
+			pr_err("chain[%d] is signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+	for (i = 0; i < fc.chain_length; i++) {
+		if (!dma_fence_is_signaled(fc.chains[i])) {
+			pr_err("chain[%d] was not signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int __wait_fence_chains(void *arg)
+{
+	struct fence_chains *fc = arg;
+
+	if (dma_fence_wait(fc->tail, false))
+		return -EIO;
+
+	return 0;
+}
+
+static int wait_forward(void *arg)
+{
+	struct fence_chains fc;
+	struct task_struct *tsk;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, CHAIN_SZ, seqno_inc);
+	if (err)
+		return err;
+
+	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
+	if (IS_ERR(tsk)) {
+		err = PTR_ERR(tsk);
+		goto err;
+	}
+	get_task_struct(tsk);
+	yield_to(tsk, true);
+
+	for (i = 0; i < fc.chain_length; i++)
+		dma_fence_signal(fc.fences[i]);
+
+	err = kthread_stop(tsk);
+	put_task_struct(tsk);
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int wait_backward(void *arg)
+{
+	struct fence_chains fc;
+	struct task_struct *tsk;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, CHAIN_SZ, seqno_inc);
+	if (err)
+		return err;
+
+	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
+	if (IS_ERR(tsk)) {
+		err = PTR_ERR(tsk);
+		goto err;
+	}
+	get_task_struct(tsk);
+	yield_to(tsk, true);
+
+	for (i = fc.chain_length; i--; )
+		dma_fence_signal(fc.fences[i]);
+
+	err = kthread_stop(tsk);
+	put_task_struct(tsk);
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static void randomise_fences(struct fence_chains *fc)
+{
+	unsigned int count = fc->chain_length;
+
+	/* Fisher-Yates shuffle courtesy of Knuth */
+	while (--count) {
+		unsigned int swp;
+
+		swp = prandom_u32_max(count + 1);
+		if (swp == count)
+			continue;
+
+		swap(fc->fences[count], fc->fences[swp]);
+	}
+}
+
+static int wait_random(void *arg)
+{
+	struct fence_chains fc;
+	struct task_struct *tsk;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, CHAIN_SZ, seqno_inc);
+	if (err)
+		return err;
+
+	randomise_fences(&fc);
+
+	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
+	if (IS_ERR(tsk)) {
+		err = PTR_ERR(tsk);
+		goto err;
+	}
+	get_task_struct(tsk);
+	yield_to(tsk, true);
+
+	for (i = 0; i < fc.chain_length; i++)
+		dma_fence_signal(fc.fences[i]);
+
+	err = kthread_stop(tsk);
+	put_task_struct(tsk);
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+int dma_fence_chain(void)
+{
+	static const struct subtest tests[] = {
+		SUBTEST(sanitycheck),
+		SUBTEST(find_seqno),
+		SUBTEST(find_signaled),
+		SUBTEST(find_out_of_order),
+		SUBTEST(find_gap),
+		SUBTEST(find_race),
+		SUBTEST(signal_forward),
+		SUBTEST(signal_backward),
+		SUBTEST(wait_forward),
+		SUBTEST(wait_backward),
+		SUBTEST(wait_random),
+	};
+	int ret;
+
+	pr_info("sizeof(dma_fence_chain)=%zu\n",
+		sizeof(struct dma_fence_chain));
+
+	slab_fences = KMEM_CACHE(mock_fence,
+				 SLAB_TYPESAFE_BY_RCU |
+				 SLAB_HWCACHE_ALIGN);
+	if (!slab_fences)
+		return -ENOMEM;
+
+	ret = subtests(tests, NULL);
+
+	kmem_cache_destroy(slab_fences);
+	return ret;
+}
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index f91f3bc1e0b2..9cf7cc1f3f72 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3403,7 +3403,7 @@ static const struct attribute_group *amd64_edac_attr_groups[] = {
 static int hw_info_get(struct amd64_pvt *pvt)
 {
 	u16 pci_id1, pci_id2;
-	int ret = -EINVAL;
+	int ret;
 
 	if (pvt->fam >= 0x17) {
 		pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c
index 93c82bc17493..169353710982 100644
--- a/drivers/edac/amd8131_edac.c
+++ b/drivers/edac/amd8131_edac.c
@@ -44,14 +44,6 @@ static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32)
 			" PCI Access Write Error at 0x%x\n", reg);
 }
 
-static char * const bridge_str[] = {
-	[NORTH_A] = "NORTH A",
-	[NORTH_B] = "NORTH B",
-	[SOUTH_A] = "SOUTH A",
-	[SOUTH_B] = "SOUTH B",
-	[NO_BRIDGE] = "NO BRIDGE",
-};
-
 /* Support up to two AMD8131 chipsets on a platform */
 static struct amd8131_dev_info amd8131_devices[] = {
 	{
diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c
index a7502ebe9bdc..e3e757513d1b 100644
--- a/drivers/edac/armada_xp_edac.c
+++ b/drivers/edac/armada_xp_edac.c
@@ -78,7 +78,7 @@ struct axp_mc_drvdata {
 	char msg[128];
 };
 
-/* derived from "DRAM Address Multiplexing" in the ARAMDA XP Functional Spec */
+/* derived from "DRAM Address Multiplexing" in the ARMADA XP Functional Spec */
 static uint32_t axp_mc_calc_address(struct axp_mc_drvdata *drvdata,
 				    uint8_t cs, uint8_t bank, uint16_t row,
 				    uint16_t col)
@@ -160,12 +160,12 @@ static void axp_mc_check(struct mem_ctl_info *mci)
 		if (cnt_sbe)
 			cnt_sbe--;
 		else
-			dev_warn(mci->pdev, "inconsistent SBE count detected");
+			dev_warn(mci->pdev, "inconsistent SBE count detected\n");
 	} else {
 		if (cnt_dbe)
 			cnt_dbe--;
 		else
-			dev_warn(mci->pdev, "inconsistent DBE count detected");
+			dev_warn(mci->pdev, "inconsistent DBE count detected\n");
 	}
 
 	/* report earlier errors */
@@ -304,7 +304,7 @@ static int axp_mc_probe(struct platform_device *pdev)
 
 	config = readl(base + SDRAM_CONFIG_REG);
 	if (!(config & SDRAM_CONFIG_ECC_MASK)) {
-		dev_warn(&pdev->dev, "SDRAM ECC is not enabled");
+		dev_warn(&pdev->dev, "SDRAM ECC is not enabled\n");
 		return -EINVAL;
 	}
 
@@ -532,9 +532,9 @@ static int aurora_l2_probe(struct platform_device *pdev)
 
 	l2x0_aux_ctrl = readl(base + L2X0_AUX_CTRL);
 	if (!(l2x0_aux_ctrl & AURORA_ACR_PARITY_EN))
-		dev_warn(&pdev->dev, "tag parity is not enabled");
+		dev_warn(&pdev->dev, "tag parity is not enabled\n");
 	if (!(l2x0_aux_ctrl & AURORA_ACR_ECC_EN))
-		dev_warn(&pdev->dev, "data ECC is not enabled");
+		dev_warn(&pdev->dev, "data ECC is not enabled\n");
 
 	dci = edac_device_alloc_ctl_info(sizeof(*drvdata),
 					 "cpu", 1, "L", 1, 2, NULL, 0, 0);
@@ -618,7 +618,7 @@ static int __init armada_xp_edac_init(void)
 
 	res = platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 	if (res)
-		pr_warn("Aramda XP EDAC drivers fail to register\n");
+		pr_warn("Armada XP EDAC drivers fail to register\n");
 
 	return 0;
 }
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index df08de963d10..9b0044cd21cd 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -122,10 +122,22 @@ static int i10nm_get_all_munits(void)
 	return 0;
 }
 
+static struct res_config i10nm_cfg0 = {
+	.type			= I10NM,
+	.decs_did		= 0x3452,
+	.busno_cfg_offset	= 0xcc,
+};
+
+static struct res_config i10nm_cfg1 = {
+	.type			= I10NM,
+	.decs_did		= 0x3452,
+	.busno_cfg_offset	= 0xd0,
+};
+
 static const struct x86_cpu_id i10nm_cpuids[] = {
-	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	NULL),
-	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		NULL),
-	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		NULL),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&i10nm_cfg0),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&i10nm_cfg0),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&i10nm_cfg1),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
@@ -161,7 +173,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci)
 				 mtr, mcddrtcfg, imc->mc, i, j);
 
 			if (IS_DIMM_PRESENT(mtr))
-				ndimms += skx_get_dimm_info(mtr, 0, dimm,
+				ndimms += skx_get_dimm_info(mtr, 0, 0, dimm,
 							    imc, i, j);
 			else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
 				ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
@@ -234,6 +246,7 @@ static int __init i10nm_init(void)
 {
 	u8 mc = 0, src_id = 0, node_id = 0;
 	const struct x86_cpu_id *id;
+	struct res_config *cfg;
 	const char *owner;
 	struct skx_dev *d;
 	int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
@@ -249,11 +262,17 @@ static int __init i10nm_init(void)
 	if (!id)
 		return -ENODEV;
 
+	cfg = (struct res_config *)id->driver_data;
+
+	/* Newer steppings have different offset for ATOM_TREMONT_D/ICELAKE_X */
+	if (boot_cpu_data.x86_stepping >= 4)
+		cfg->busno_cfg_offset = 0xd0;
+
 	rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
 	if (rc)
 		return rc;
 
-	rc = skx_get_all_bus_mappings(0x3452, 0xcc, I10NM, &i10nm_edac_list);
+	rc = skx_get_all_bus_mappings(cfg, &i10nm_edac_list);
 	if (rc < 0)
 		goto fail;
 	if (rc == 0) {
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 46a3a3440f5e..b907a0f4ece6 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -157,33 +157,35 @@ fail:
 	return -ENODEV;
 }
 
+static struct res_config skx_cfg = {
+	.type			= SKX,
+	.decs_did		= 0x2016,
+	.busno_cfg_offset	= 0xcc,
+};
+
 static const struct x86_cpu_id skx_cpuids[] = {
-	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,	NULL),
+	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,	&skx_cfg),
 	{ }
 };
 MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
 
-#define SKX_GET_MTMTR(dev, reg) \
-	pci_read_config_dword((dev), 0x87c, &(reg))
-
-static bool skx_check_ecc(struct pci_dev *pdev)
+static bool skx_check_ecc(u32 mcmtr)
 {
-	u32 mtmtr;
-
-	SKX_GET_MTMTR(pdev, mtmtr);
-
-	return !!GET_BITFIELD(mtmtr, 2, 2);
+	return !!GET_BITFIELD(mcmtr, 2, 2);
 }
 
 static int skx_get_dimm_config(struct mem_ctl_info *mci)
 {
 	struct skx_pvt *pvt = mci->pvt_info;
+	u32 mtr, mcmtr, amap, mcddrtcfg;
 	struct skx_imc *imc = pvt->imc;
-	u32 mtr, amap, mcddrtcfg;
 	struct dimm_info *dimm;
 	int i, j;
 	int ndimms;
 
+	/* Only the mcmtr on the first channel is effective */
+	pci_read_config_dword(imc->chan[0].cdev, 0x87c, &mcmtr);
+
 	for (i = 0; i < SKX_NUM_CHANNELS; i++) {
 		ndimms = 0;
 		pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
@@ -193,14 +195,14 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci)
 			pci_read_config_dword(imc->chan[i].cdev,
 					      0x80 + 4 * j, &mtr);
 			if (IS_DIMM_PRESENT(mtr)) {
-				ndimms += skx_get_dimm_info(mtr, amap, dimm, imc, i, j);
+				ndimms += skx_get_dimm_info(mtr, mcmtr, amap, dimm, imc, i, j);
 			} else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) {
 				ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
 							      EDAC_MOD_STR);
 				nvdimm_count++;
 			}
 		}
-		if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
+		if (ndimms && !skx_check_ecc(mcmtr)) {
 			skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
 			return -ENODEV;
 		}
@@ -641,6 +643,7 @@ static inline void teardown_skx_debug(void) {}
 static int __init skx_init(void)
 {
 	const struct x86_cpu_id *id;
+	struct res_config *cfg;
 	const struct munit *m;
 	const char *owner;
 	int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8};
@@ -657,11 +660,13 @@ static int __init skx_init(void)
 	if (!id)
 		return -ENODEV;
 
+	cfg = (struct res_config *)id->driver_data;
+
 	rc = skx_get_hi_lo(0x2034, off, &skx_tolm, &skx_tohm);
 	if (rc)
 		return rc;
 
-	rc = skx_get_all_bus_mappings(0x2016, 0xcc, SKX, &skx_edac_list);
+	rc = skx_get_all_bus_mappings(cfg, &skx_edac_list);
 	if (rc < 0)
 		goto fail;
 	if (rc == 0) {
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 99bbaf629b8d..46be1a77bd1d 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -197,12 +197,11 @@ static int get_width(u32 mtr)
 }
 
 /*
- * We use the per-socket device @did to count how many sockets are present,
+ * We use the per-socket device @cfg->did to count how many sockets are present,
  * and to detemine which PCI buses are associated with each socket. Allocate
  * and build the full list of all the skx_dev structures that we need here.
  */
-int skx_get_all_bus_mappings(unsigned int did, int off, enum type type,
-			     struct list_head **list)
+int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
 {
 	struct pci_dev *pdev, *prev;
 	struct skx_dev *d;
@@ -211,7 +210,7 @@ int skx_get_all_bus_mappings(unsigned int did, int off, enum type type,
 
 	prev = NULL;
 	for (;;) {
-		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, prev);
+		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, cfg->decs_did, prev);
 		if (!pdev)
 			break;
 		ndev++;
@@ -221,7 +220,7 @@ int skx_get_all_bus_mappings(unsigned int did, int off, enum type type,
 			return -ENOMEM;
 		}
 
-		if (pci_read_config_dword(pdev, off, &reg)) {
+		if (pci_read_config_dword(pdev, cfg->busno_cfg_offset, &reg)) {
 			kfree(d);
 			pci_dev_put(pdev);
 			skx_printk(KERN_ERR, "Failed to read bus idx\n");
@@ -230,7 +229,7 @@ int skx_get_all_bus_mappings(unsigned int did, int off, enum type type,
 
 		d->bus[0] = GET_BITFIELD(reg, 0, 7);
 		d->bus[1] = GET_BITFIELD(reg, 8, 15);
-		if (type == SKX) {
+		if (cfg->type == SKX) {
 			d->seg = pci_domain_nr(pdev->bus);
 			d->bus[2] = GET_BITFIELD(reg, 16, 23);
 			d->bus[3] = GET_BITFIELD(reg, 24, 31);
@@ -304,7 +303,7 @@ static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
 #define numrow(reg)	skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows")
 #define numcol(reg)	skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols")
 
-int skx_get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
+int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
 		      struct skx_imc *imc, int chan, int dimmno)
 {
 	int  banks = 16, ranks, rows, cols, npages;
@@ -324,8 +323,8 @@ int skx_get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
 		 imc->mc, chan, dimmno, size, npages,
 		 banks, 1 << ranks, rows, cols);
 
-	imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
-	imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
+	imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mcmtr, 0, 0);
+	imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mcmtr, 9, 9);
 	imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
 	imc->chan[chan].dimms[dimmno].rowbits = rows;
 	imc->chan[chan].dimms[dimmno].colbits = cols;
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 60d1ea669afd..78f8c1de0b71 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -112,6 +112,14 @@ struct decoded_addr {
 	int	bank_group;
 };
 
+struct res_config {
+	enum type type;
+	/* Configuration agent device ID */
+	unsigned int decs_did;
+	/* Default bus number configuration register offset */
+	int busno_cfg_offset;
+};
+
 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci);
 typedef bool (*skx_decode_f)(struct decoded_addr *res);
 typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len);
@@ -123,12 +131,11 @@ void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
 int skx_get_node_id(struct skx_dev *d, u8 *id);
 
-int skx_get_all_bus_mappings(unsigned int did, int off, enum type,
-			     struct list_head **list);
+int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
 
 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
 
-int skx_get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
+int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
 		      struct skx_imc *imc, int chan, int dimmno);
 
 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
index 34be60fe6892..4af9744cc6d0 100644
--- a/drivers/edac/thunderx_edac.c
+++ b/drivers/edac/thunderx_edac.c
@@ -1278,7 +1278,7 @@ OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23));
 
 OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S);
 
-struct debugfs_entry *ocx_dfs_ents[] = {
+static struct debugfs_entry *ocx_dfs_ents[] = {
 	&debugfs_tlk0_ecc_ctl,
 	&debugfs_tlk1_ecc_ctl,
 	&debugfs_tlk2_ecc_ctl,
@@ -1919,19 +1919,19 @@ err_free:
 
 L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S);
 
-struct debugfs_entry *l2c_tad_dfs_ents[] = {
+static struct debugfs_entry *l2c_tad_dfs_ents[] = {
 	&debugfs_tad_int,
 };
 
 L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S);
 
-struct debugfs_entry *l2c_cbc_dfs_ents[] = {
+static struct debugfs_entry *l2c_cbc_dfs_ents[] = {
 	&debugfs_cbc_int,
 };
 
 L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S);
 
-struct debugfs_entry *l2c_mci_dfs_ents[] = {
+static struct debugfs_entry *l2c_mci_dfs_ents[] = {
 	&debugfs_mci_int,
 };
 
diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c
index e4a1032ba0b5..1d2c27a00a4a 100644
--- a/drivers/edac/xgene_edac.c
+++ b/drivers/edac/xgene_edac.c
@@ -1349,7 +1349,6 @@ static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
 #define WORD_ALIGNED_ERR_MASK		BIT(28)
 #define PAGE_ACCESS_ERR_MASK		BIT(27)
 #define WRITE_ACCESS_MASK		BIT(26)
-#define RBERRADDR_RD(src)		((src) & 0x03FFFFFF)
 
 static const char * const soc_mem_err_v1[] = {
 	"10GbE0",
@@ -1483,13 +1482,11 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
 		return;
 	if (reg & STICKYERR_MASK) {
 		bool write;
-		u32 address;
 
 		dev_err(edac_dev->dev, "IOB bus access error(s)\n");
 		if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
 			return;
 		write = reg & WRITE_ACCESS_MASK ? 1 : 0;
-		address = RBERRADDR_RD(reg);
 		if (reg & AGENT_OFFLINE_ERR_MASK)
 			dev_err(edac_dev->dev,
 				"IOB bus %s access to offline agent error\n",
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 6e291d8f3a27..c7ea4f2d5ca6 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1081,8 +1081,6 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg)
 		return -EINVAL;
 
 	p = (struct fw_cdev_iso_packet __user *)u64_to_uptr(a->packets);
-	if (!access_ok(p, a->size))
-		return -EFAULT;
 
 	end = (void __user *)p + a->size;
 	count = 0;
@@ -1120,7 +1118,7 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg)
 			&p->header[transmit_header_bytes / 4];
 		if (next > end)
 			return -EINVAL;
-		if (__copy_from_user
+		if (copy_from_user
 		    (u.packet.header, p->header, transmit_header_bytes))
 			return -EFAULT;
 		if (u.packet.skip && ctx->type == FW_ISO_CONTEXT_TRANSMIT &&
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 8007d4aa76dc..4843e94713a4 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -295,15 +295,13 @@ config TURRIS_MOX_RWTM
 	  other manufacturing data and also utilize the Entropy Bit Generator
 	  for hardware random number generation.
 
-config HAVE_ARM_SMCCC
-	bool
-
-source "drivers/firmware/psci/Kconfig"
 source "drivers/firmware/broadcom/Kconfig"
 source "drivers/firmware/google/Kconfig"
 source "drivers/firmware/efi/Kconfig"
 source "drivers/firmware/imx/Kconfig"
 source "drivers/firmware/meson/Kconfig"
+source "drivers/firmware/psci/Kconfig"
+source "drivers/firmware/smccc/Kconfig"
 source "drivers/firmware/tegra/Kconfig"
 source "drivers/firmware/xilinx/Kconfig"
 
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index e9fb838af4df..99510be9f5ed 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -23,12 +23,13 @@ obj-$(CONFIG_TRUSTED_FOUNDATIONS) += trusted_foundations.o
 obj-$(CONFIG_TURRIS_MOX_RWTM)	+= turris-mox-rwtm.o
 
 obj-$(CONFIG_ARM_SCMI_PROTOCOL)	+= arm_scmi/
-obj-y				+= psci/
 obj-y				+= broadcom/
 obj-y				+= meson/
 obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
 obj-$(CONFIG_EFI)		+= efi/
 obj-$(CONFIG_UEFI_CPER)		+= efi/
 obj-y				+= imx/
+obj-y				+= psci/
+obj-y				+= smccc/
 obj-y				+= tegra/
 obj-y				+= xilinx/
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 334c8be0c11f..e7e36aab2386 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -429,7 +429,6 @@ int sdei_event_enable(u32 event_num)
 
 	return err;
 }
-EXPORT_SYMBOL(sdei_event_enable);
 
 static int sdei_api_event_disable(u32 event_num)
 {
@@ -471,7 +470,6 @@ int sdei_event_disable(u32 event_num)
 
 	return err;
 }
-EXPORT_SYMBOL(sdei_event_disable);
 
 static int sdei_api_event_unregister(u32 event_num)
 {
@@ -533,7 +531,6 @@ int sdei_event_unregister(u32 event_num)
 
 	return err;
 }
-EXPORT_SYMBOL(sdei_event_unregister);
 
 /*
  * unregister events, but don't destroy them as they are re-registered by
@@ -643,7 +640,6 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg)
 
 	return err;
 }
-EXPORT_SYMBOL(sdei_event_register);
 
 static int sdei_reregister_event_llocked(struct sdei_event *event)
 {
@@ -1079,26 +1075,9 @@ static struct platform_driver sdei_driver = {
 	.probe		= sdei_probe,
 };
 
-static bool __init sdei_present_dt(void)
-{
-	struct device_node *np, *fw_np;
-
-	fw_np = of_find_node_by_name(NULL, "firmware");
-	if (!fw_np)
-		return false;
-
-	np = of_find_matching_node(fw_np, sdei_of_match);
-	if (!np)
-		return false;
-	of_node_put(np);
-
-	return true;
-}
-
 static bool __init sdei_present_acpi(void)
 {
 	acpi_status status;
-	struct platform_device *pdev;
 	struct acpi_table_header *sdei_table_header;
 
 	if (acpi_disabled)
@@ -1113,20 +1092,26 @@ static bool __init sdei_present_acpi(void)
 	if (ACPI_FAILURE(status))
 		return false;
 
-	pdev = platform_device_register_simple(sdei_driver.driver.name, 0, NULL,
-					       0);
-	if (IS_ERR(pdev))
-		return false;
+	acpi_put_table(sdei_table_header);
 
 	return true;
 }
 
 static int __init sdei_init(void)
 {
-	if (sdei_present_dt() || sdei_present_acpi())
-		platform_driver_register(&sdei_driver);
+	int ret = platform_driver_register(&sdei_driver);
 
-	return 0;
+	if (!ret && sdei_present_acpi()) {
+		struct platform_device *pdev;
+
+		pdev = platform_device_register_simple(sdei_driver.driver.name,
+						       0, NULL, 0);
+		if (IS_ERR(pdev))
+			pr_info("Failed to register ACPI:SDEI platform device %ld\n",
+				PTR_ERR(pdev));
+	}
+
+	return ret;
 }
 
 /*
@@ -1143,6 +1128,14 @@ int sdei_event_handler(struct pt_regs *regs,
 	mm_segment_t orig_addr_limit;
 	u32 event_num = arg->event_num;
 
+	/*
+	 * Save restore 'fs'.
+	 * The architecture's entry code save/restores 'fs' when taking an
+	 * exception from the kernel. This ensures addr_limit isn't inherited
+	 * if you interrupted something that allowed the uaccess routines to
+	 * access kernel memory.
+	 * Do the same here because this doesn't come via the same entry code.
+	*/
 	orig_addr_limit = get_fs();
 	set_fs(USER_DS);
 
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 613828d3f106..6b38f9e5d203 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -106,12 +106,12 @@ config EFI_PARAMS_FROM_FDT
 config EFI_RUNTIME_WRAPPERS
 	bool
 
-config EFI_ARMSTUB
+config EFI_GENERIC_STUB
 	bool
 
 config EFI_ARMSTUB_DTB_LOADER
 	bool "Enable the DTB loader"
-	depends on EFI_ARMSTUB
+	depends on EFI_GENERIC_STUB
 	default y
 	help
 	  Select this config option to add support for the dtb= command
@@ -124,6 +124,17 @@ config EFI_ARMSTUB_DTB_LOADER
 	  functionality for bootloaders that do not have such support
 	  this option is necessary.
 
+config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER
+	bool "Enable the command line initrd loader" if !X86
+	depends on EFI_STUB && (EFI_GENERIC_STUB || X86)
+	default y
+	help
+	  Select this config option to add support for the initrd= command
+	  line parameter, allowing an initrd that resides on the same volume
+	  as the kernel image to be loaded into memory.
+
+	  This method is deprecated.
+
 config EFI_BOOTLOADER_CONTROL
 	tristate "EFI Bootloader Control"
 	depends on EFI_VARS
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index 9e5e62f5f94d..c697e70ca7e7 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -54,8 +54,8 @@ static phys_addr_t __init efi_to_phys(unsigned long addr)
 static __initdata unsigned long screen_info_table = EFI_INVALID_TABLE_ADDR;
 
 static const efi_config_table_type_t arch_tables[] __initconst = {
-	{LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, NULL, &screen_info_table},
-	{NULL_GUID, NULL, NULL}
+	{LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, &screen_info_table},
+	{}
 };
 
 static void __init init_screen_info(void)
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 4e3055238f31..9357d6b6e87c 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -499,21 +499,21 @@ void __init efi_mem_reserve(phys_addr_t addr, u64 size)
 }
 
 static const efi_config_table_type_t common_tables[] __initconst = {
-	{ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20},
-	{ACPI_TABLE_GUID, "ACPI", &efi.acpi},
-	{SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios},
-	{SMBIOS3_TABLE_GUID, "SMBIOS 3.0", &efi.smbios3},
-	{EFI_SYSTEM_RESOURCE_TABLE_GUID, "ESRT", &efi.esrt},
-	{EFI_MEMORY_ATTRIBUTES_TABLE_GUID, "MEMATTR", &efi_mem_attr_table},
-	{LINUX_EFI_RANDOM_SEED_TABLE_GUID, "RNG", &efi_rng_seed},
-	{LINUX_EFI_TPM_EVENT_LOG_GUID, "TPMEventLog", &efi.tpm_log},
-	{LINUX_EFI_TPM_FINAL_LOG_GUID, "TPMFinalLog", &efi.tpm_final_log},
-	{LINUX_EFI_MEMRESERVE_TABLE_GUID, "MEMRESERVE", &mem_reserve},
-	{EFI_RT_PROPERTIES_TABLE_GUID, "RTPROP", &rt_prop},
+	{ACPI_20_TABLE_GUID,			&efi.acpi20,		"ACPI 2.0"	},
+	{ACPI_TABLE_GUID,			&efi.acpi,		"ACPI"		},
+	{SMBIOS_TABLE_GUID,			&efi.smbios,		"SMBIOS"	},
+	{SMBIOS3_TABLE_GUID,			&efi.smbios3,		"SMBIOS 3.0"	},
+	{EFI_SYSTEM_RESOURCE_TABLE_GUID,	&efi.esrt,		"ESRT"		},
+	{EFI_MEMORY_ATTRIBUTES_TABLE_GUID,	&efi_mem_attr_table,	"MEMATTR"	},
+	{LINUX_EFI_RANDOM_SEED_TABLE_GUID,	&efi_rng_seed,		"RNG"		},
+	{LINUX_EFI_TPM_EVENT_LOG_GUID,		&efi.tpm_log,		"TPMEventLog"	},
+	{LINUX_EFI_TPM_FINAL_LOG_GUID,		&efi.tpm_final_log,	"TPMFinalLog"	},
+	{LINUX_EFI_MEMRESERVE_TABLE_GUID,	&mem_reserve,		"MEMRESERVE"	},
+	{EFI_RT_PROPERTIES_TABLE_GUID,		&rt_prop,		"RTPROP"	},
 #ifdef CONFIG_EFI_RCI2_TABLE
-	{DELLEMC_EFI_RCI2_TABLE_GUID, NULL, &rci2_table_phys},
+	{DELLEMC_EFI_RCI2_TABLE_GUID,		&rci2_table_phys			},
 #endif
-	{NULL_GUID, NULL, NULL},
+	{},
 };
 
 static __init int match_config_table(const efi_guid_t *guid,
@@ -522,15 +522,13 @@ static __init int match_config_table(const efi_guid_t *guid,
 {
 	int i;
 
-	if (table_types) {
-		for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) {
-			if (!efi_guidcmp(*guid, table_types[i].guid)) {
-				*(table_types[i].ptr) = table;
-				if (table_types[i].name)
-					pr_cont(" %s=0x%lx ",
-						table_types[i].name, table);
-				return 1;
-			}
+	for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) {
+		if (!efi_guidcmp(*guid, table_types[i].guid)) {
+			*(table_types[i].ptr) = table;
+			if (table_types[i].name[0])
+				pr_cont("%s=0x%lx ",
+					table_types[i].name, table);
+			return 1;
 		}
 	}
 
@@ -567,7 +565,7 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables,
 			table = tbl32[i].table;
 		}
 
-		if (!match_config_table(guid, table, common_tables))
+		if (!match_config_table(guid, table, common_tables) && arch_tables)
 			match_config_table(guid, table, arch_tables);
 	}
 	pr_cont("\n");
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 78ad1ba8c987..26528a46d99e 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -522,8 +522,10 @@ efivar_create_sysfs_entry(struct efivar_entry *new_var)
 	ret = kobject_init_and_add(&new_var->kobj, &efivar_ktype,
 				   NULL, "%s", short_name);
 	kfree(short_name);
-	if (ret)
+	if (ret) {
+		kobject_put(&new_var->kobj);
 		return ret;
+	}
 
 	kobject_uevent(&new_var->kobj, KOBJ_ADD);
 	if (efivar_entry_add(new_var, &efivar_sysfs_list)) {
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 094eabdecfe6..cce4a7436052 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -7,7 +7,7 @@
 #
 cflags-$(CONFIG_X86_32)		:= -march=i386
 cflags-$(CONFIG_X86_64)		:= -mcmodel=small
-cflags-$(CONFIG_X86)		+= -m$(BITS) -D__KERNEL__ -O2 \
+cflags-$(CONFIG_X86)		+= -m$(BITS) -D__KERNEL__ \
 				   -fPIC -fno-strict-aliasing -mno-red-zone \
 				   -mno-mmx -mno-sse -fshort-wchar \
 				   -Wno-pointer-sign \
@@ -23,15 +23,19 @@ cflags-$(CONFIG_ARM)		:= $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
 				   -fno-builtin -fpic \
 				   $(call cc-option,-mno-single-pic-base)
 
-cflags-$(CONFIG_EFI_ARMSTUB)	+= -I$(srctree)/scripts/dtc/libfdt
+cflags-$(CONFIG_EFI_GENERIC_STUB) += -I$(srctree)/scripts/dtc/libfdt
 
-KBUILD_CFLAGS			:= $(cflags-y) -DDISABLE_BRANCH_PROFILING \
+KBUILD_CFLAGS			:= $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
 				   -include $(srctree)/drivers/firmware/efi/libstub/hidden.h \
 				   -D__NO_FORTIFY \
 				   $(call cc-option,-ffreestanding) \
 				   $(call cc-option,-fno-stack-protector) \
+				   $(call cc-option,-fno-addrsig) \
 				   -D__DISABLE_EXPORTS
 
+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+
 GCOV_PROFILE			:= n
 KASAN_SANITIZE			:= n
 UBSAN_SANITIZE			:= n
@@ -42,16 +46,17 @@ KCOV_INSTRUMENT			:= n
 
 lib-y				:= efi-stub-helper.o gop.o secureboot.o tpm.o \
 				   file.o mem.o random.o randomalloc.o pci.o \
-				   skip_spaces.o lib-cmdline.o lib-ctype.o
+				   skip_spaces.o lib-cmdline.o lib-ctype.o \
+				   alignedmem.o relocate.o vsprintf.o
 
 # include the stub's generic dependencies from lib/ when building for ARM/arm64
-arm-deps-y := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c
+efi-deps-y := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c
 
 $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
-lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o string.o \
-				   $(patsubst %.c,lib-%.o,$(arm-deps-y))
+lib-$(CONFIG_EFI_GENERIC_STUB)	+= efi-stub.o fdt.o string.o \
+				   $(patsubst %.c,lib-%.o,$(efi-deps-y))
 
 lib-$(CONFIG_ARM)		+= arm32-stub.o
 lib-$(CONFIG_ARM64)		+= arm64-stub.o
@@ -60,6 +65,25 @@ CFLAGS_arm32-stub.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_arm64-stub.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 #
+# For x86, bootloaders like systemd-boot or grub-efi do not zero-initialize the
+# .bss section, so the .bss section of the EFI stub needs to be included in the
+# .data section of the compressed kernel to ensure initialization. Rename the
+# .bss section here so it's easy to pick out in the linker script.
+#
+STUBCOPY_FLAGS-$(CONFIG_X86)	+= --rename-section .bss=.bss.efistub,load,alloc
+STUBCOPY_RELOC-$(CONFIG_X86_32)	:= R_386_32
+STUBCOPY_RELOC-$(CONFIG_X86_64)	:= R_X86_64_64
+
+#
+# ARM discards the .data section because it disallows r/w data in the
+# decompressor. So move our .data to .data.efistub and .bss to .bss.efistub,
+# which are preserved explicitly by the decompressor linker script.
+#
+STUBCOPY_FLAGS-$(CONFIG_ARM)	+= --rename-section .data=.data.efistub	\
+				   --rename-section .bss=.bss.efistub,load,alloc
+STUBCOPY_RELOC-$(CONFIG_ARM)	:= R_ARM_ABS
+
+#
 # arm64 puts the stub in the kernel proper, which will unnecessarily retain all
 # code indefinitely unless it is annotated as __init/__initdata/__initconst etc.
 # So let's apply the __init annotations at the section level, by prefixing
@@ -73,8 +97,8 @@ CFLAGS_arm64-stub.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 # a verification pass to see if any absolute relocations exist in any of the
 # object files.
 #
-extra-$(CONFIG_EFI_ARMSTUB)	:= $(lib-y)
-lib-$(CONFIG_EFI_ARMSTUB)	:= $(patsubst %.o,%.stub.o,$(lib-y))
+extra-y				:= $(lib-y)
+lib-y				:= $(patsubst %.o,%.stub.o,$(lib-y))
 
 STUBCOPY_FLAGS-$(CONFIG_ARM64)	+= --prefix-alloc-sections=.init \
 				   --prefix-symbols=__efistub_
@@ -97,11 +121,3 @@ quiet_cmd_stubcopy = STUBCPY $@
 		/bin/false;						\
 	fi;								\
 	$(OBJCOPY) $(STUBCOPY_FLAGS-y) $< $@
-
-#
-# ARM discards the .data section because it disallows r/w data in the
-# decompressor. So move our .data to .data.efistub, which is preserved
-# explicitly by the decompressor linker script.
-#
-STUBCOPY_FLAGS-$(CONFIG_ARM)	+= --rename-section .data=.data.efistub
-STUBCOPY_RELOC-$(CONFIG_ARM)	:= R_ARM_ABS
diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c
new file mode 100644
index 000000000000..cc89c4d6196f
--- /dev/null
+++ b/drivers/firmware/efi/libstub/alignedmem.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+
+/**
+ * efi_allocate_pages_aligned() - Allocate memory pages
+ * @size:	minimum number of bytes to allocate
+ * @addr:	On return the address of the first allocated page. The first
+ *		allocated page has alignment EFI_ALLOC_ALIGN which is an
+ *		architecture dependent multiple of the page size.
+ * @max:	the address that the last allocated memory page shall not
+ *		exceed
+ * @align:	minimum alignment of the base of the allocation
+ *
+ * Allocate pages as EFI_LOADER_DATA. The allocated pages are aligned according
+ * to @align, which should be >= EFI_ALLOC_ALIGN. The last allocated page will
+ * not exceed the address given by @max.
+ *
+ * Return:	status code
+ */
+efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr,
+					unsigned long max, unsigned long align)
+{
+	efi_physical_addr_t alloc_addr;
+	efi_status_t status;
+	int slack;
+
+	if (align < EFI_ALLOC_ALIGN)
+		align = EFI_ALLOC_ALIGN;
+
+	alloc_addr = ALIGN_DOWN(max + 1, align) - 1;
+	size = round_up(size, EFI_ALLOC_ALIGN);
+	slack = align / EFI_PAGE_SIZE - 1;
+
+	status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS,
+			     EFI_LOADER_DATA, size / EFI_PAGE_SIZE + slack,
+			     &alloc_addr);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	*addr = ALIGN((unsigned long)alloc_addr, align);
+
+	if (slack > 0) {
+		int l = (alloc_addr % align) / EFI_PAGE_SIZE;
+
+		if (l) {
+			efi_bs_call(free_pages, alloc_addr, slack - l + 1);
+			slack = l - 1;
+		}
+		if (slack)
+			efi_bs_call(free_pages, *addr + size, slack);
+	}
+	return EFI_SUCCESS;
+}
diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c
index 7826553af2ba..40243f524556 100644
--- a/drivers/firmware/efi/libstub/arm32-stub.c
+++ b/drivers/firmware/efi/libstub/arm32-stub.c
@@ -18,7 +18,7 @@ efi_status_t check_platform_features(void)
 	/* LPAE kernels need compatible hardware */
 	block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0);
 	if (block < 5) {
-		pr_efi_err("This LPAE kernel is not supported by your CPU\n");
+		efi_err("This LPAE kernel is not supported by your CPU\n");
 		return EFI_UNSUPPORTED;
 	}
 	return EFI_SUCCESS;
@@ -120,7 +120,7 @@ static efi_status_t reserve_kernel_base(unsigned long dram_base,
 	 */
 	status = efi_get_memory_map(&map);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("reserve_kernel_base(): Unable to retrieve memory map.\n");
+		efi_err("reserve_kernel_base(): Unable to retrieve memory map.\n");
 		return status;
 	}
 
@@ -162,7 +162,7 @@ static efi_status_t reserve_kernel_base(unsigned long dram_base,
 					     (end - start) / EFI_PAGE_SIZE,
 					     &start);
 			if (status != EFI_SUCCESS) {
-				pr_efi_err("reserve_kernel_base(): alloc failed.\n");
+				efi_err("reserve_kernel_base(): alloc failed.\n");
 				goto out;
 			}
 			break;
@@ -199,14 +199,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
 	unsigned long kernel_base;
 	efi_status_t status;
 
-	/*
-	 * Verify that the DRAM base address is compatible with the ARM
-	 * boot protocol, which determines the base of DRAM by masking
-	 * off the low 27 bits of the address at which the zImage is
-	 * loaded. These assumptions are made by the decompressor,
-	 * before any memory map is available.
-	 */
-	kernel_base = round_up(dram_base, SZ_128M);
+	/* use a 16 MiB aligned base for the decompressed kernel */
+	kernel_base = round_up(dram_base, SZ_16M) + TEXT_OFFSET;
 
 	/*
 	 * Note that some platforms (notably, the Raspberry Pi 2) put
@@ -215,41 +209,14 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
 	 * base of the kernel image is only partially used at the moment.
 	 * (Up to 5 pages are used for the swapper page tables)
 	 */
-	kernel_base += TEXT_OFFSET - 5 * PAGE_SIZE;
-
-	status = reserve_kernel_base(kernel_base, reserve_addr, reserve_size);
-	if (status != EFI_SUCCESS) {
-		pr_efi_err("Unable to allocate memory for uncompressed kernel.\n");
-		return status;
-	}
-
-	/*
-	 * Relocate the zImage, so that it appears in the lowest 128 MB
-	 * memory window.
-	 */
-	*image_addr = (unsigned long)image->image_base;
-	*image_size = image->image_size;
-	status = efi_relocate_kernel(image_addr, *image_size, *image_size,
-				     kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0);
+	status = reserve_kernel_base(kernel_base - 5 * PAGE_SIZE, reserve_addr,
+				     reserve_size);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Failed to relocate kernel.\n");
-		efi_free(*reserve_size, *reserve_addr);
-		*reserve_size = 0;
+		efi_err("Unable to allocate memory for uncompressed kernel.\n");
 		return status;
 	}
 
-	/*
-	 * Check to see if we were able to allocate memory low enough
-	 * in memory. The kernel determines the base of DRAM from the
-	 * address at which the zImage is loaded.
-	 */
-	if (*image_addr + *image_size > dram_base + ZIMAGE_OFFSET_LIMIT) {
-		pr_efi_err("Failed to relocate kernel, no low memory available.\n");
-		efi_free(*reserve_size, *reserve_addr);
-		*reserve_size = 0;
-		efi_free(*image_size, *image_addr);
-		*image_size = 0;
-		return EFI_LOAD_ERROR;
-	}
+	*image_addr = kernel_base;
+	*image_size = 0;
 	return EFI_SUCCESS;
 }
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index fc9f8ab533a7..7f6a57dec513 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -26,14 +26,23 @@ efi_status_t check_platform_features(void)
 	tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf;
 	if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) {
 		if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
-			pr_efi_err("This 64 KB granular kernel is not supported by your CPU\n");
+			efi_err("This 64 KB granular kernel is not supported by your CPU\n");
 		else
-			pr_efi_err("This 16 KB granular kernel is not supported by your CPU\n");
+			efi_err("This 16 KB granular kernel is not supported by your CPU\n");
 		return EFI_UNSUPPORTED;
 	}
 	return EFI_SUCCESS;
 }
 
+/*
+ * Relocatable kernels can fix up the misalignment with respect to
+ * MIN_KIMG_ALIGN, so they only require a minimum alignment of EFI_KIMG_ALIGN
+ * (which accounts for the alignment of statically allocated objects such as
+ * the swapper stack.)
+ */
+static const u64 min_kimg_align = IS_ENABLED(CONFIG_RELOCATABLE) ? EFI_KIMG_ALIGN
+								 : MIN_KIMG_ALIGN;
+
 efi_status_t handle_kernel_image(unsigned long *image_addr,
 				 unsigned long *image_size,
 				 unsigned long *reserve_addr,
@@ -43,106 +52,63 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
 {
 	efi_status_t status;
 	unsigned long kernel_size, kernel_memsize = 0;
-	unsigned long preferred_offset;
-	u64 phys_seed = 0;
+	u32 phys_seed = 0;
 
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
-		if (!nokaslr()) {
+		if (!efi_nokaslr) {
 			status = efi_get_random_bytes(sizeof(phys_seed),
 						      (u8 *)&phys_seed);
 			if (status == EFI_NOT_FOUND) {
-				pr_efi("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
+				efi_info("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
 			} else if (status != EFI_SUCCESS) {
-				pr_efi_err("efi_get_random_bytes() failed\n");
+				efi_err("efi_get_random_bytes() failed\n");
 				return status;
 			}
 		} else {
-			pr_efi("KASLR disabled on kernel command line\n");
+			efi_info("KASLR disabled on kernel command line\n");
 		}
 	}
 
-	/*
-	 * The preferred offset of the kernel Image is TEXT_OFFSET bytes beyond
-	 * a 2 MB aligned base, which itself may be lower than dram_base, as
-	 * long as the resulting offset equals or exceeds it.
-	 */
-	preferred_offset = round_down(dram_base, MIN_KIMG_ALIGN) + TEXT_OFFSET;
-	if (preferred_offset < dram_base)
-		preferred_offset += MIN_KIMG_ALIGN;
+	if (image->image_base != _text)
+		efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n");
 
 	kernel_size = _edata - _text;
 	kernel_memsize = kernel_size + (_end - _edata);
+	*reserve_size = kernel_memsize + TEXT_OFFSET % min_kimg_align;
 
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) {
 		/*
-		 * Produce a displacement in the interval [0, MIN_KIMG_ALIGN)
-		 * that doesn't violate this kernel's de-facto alignment
-		 * constraints.
-		 */
-		u32 mask = (MIN_KIMG_ALIGN - 1) & ~(EFI_KIMG_ALIGN - 1);
-		u32 offset = (phys_seed >> 32) & mask;
-
-		/*
-		 * With CONFIG_RANDOMIZE_TEXT_OFFSET=y, TEXT_OFFSET may not
-		 * be a multiple of EFI_KIMG_ALIGN, and we must ensure that
-		 * we preserve the misalignment of 'offset' relative to
-		 * EFI_KIMG_ALIGN so that statically allocated objects whose
-		 * alignment exceeds PAGE_SIZE appear correctly aligned in
-		 * memory.
-		 */
-		offset |= TEXT_OFFSET % EFI_KIMG_ALIGN;
-
-		/*
 		 * If KASLR is enabled, and we have some randomness available,
 		 * locate the kernel at a randomized offset in physical memory.
 		 */
-		*reserve_size = kernel_memsize + offset;
-		status = efi_random_alloc(*reserve_size,
-					  MIN_KIMG_ALIGN, reserve_addr,
-					  (u32)phys_seed);
-
-		*image_addr = *reserve_addr + offset;
+		status = efi_random_alloc(*reserve_size, min_kimg_align,
+					  reserve_addr, phys_seed);
 	} else {
-		/*
-		 * Else, try a straight allocation at the preferred offset.
-		 * This will work around the issue where, if dram_base == 0x0,
-		 * efi_low_alloc() refuses to allocate at 0x0 (to prevent the
-		 * address of the allocation to be mistaken for a FAIL return
-		 * value or a NULL pointer). It will also ensure that, on
-		 * platforms where the [dram_base, dram_base + TEXT_OFFSET)
-		 * interval is partially occupied by the firmware (like on APM
-		 * Mustang), we can still place the kernel at the address
-		 * 'dram_base + TEXT_OFFSET'.
-		 */
-		*image_addr = (unsigned long)_text;
-		if (*image_addr == preferred_offset)
-			return EFI_SUCCESS;
-
-		*image_addr = *reserve_addr = preferred_offset;
-		*reserve_size = round_up(kernel_memsize, EFI_ALLOC_ALIGN);
-
-		status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
-				     EFI_LOADER_DATA,
-				     *reserve_size / EFI_PAGE_SIZE,
-				     (efi_physical_addr_t *)reserve_addr);
+		status = EFI_OUT_OF_RESOURCES;
 	}
 
 	if (status != EFI_SUCCESS) {
-		*reserve_size = kernel_memsize + TEXT_OFFSET;
-		status = efi_low_alloc(*reserve_size,
-				       MIN_KIMG_ALIGN, reserve_addr);
+		if (IS_ALIGNED((u64)_text - TEXT_OFFSET, min_kimg_align)) {
+			/*
+			 * Just execute from wherever we were loaded by the
+			 * UEFI PE/COFF loader if the alignment is suitable.
+			 */
+			*image_addr = (u64)_text;
+			*reserve_size = 0;
+			return EFI_SUCCESS;
+		}
+
+		status = efi_allocate_pages_aligned(*reserve_size, reserve_addr,
+						    ULONG_MAX, min_kimg_align);
 
 		if (status != EFI_SUCCESS) {
-			pr_efi_err("Failed to relocate kernel\n");
+			efi_err("Failed to relocate kernel\n");
 			*reserve_size = 0;
 			return status;
 		}
-		*image_addr = *reserve_addr + TEXT_OFFSET;
 	}
 
-	if (image->image_base != _text)
-		pr_efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n");
-
+	*image_addr = *reserve_addr + TEXT_OFFSET % min_kimg_align;
 	memcpy((void *)*image_addr, _text, kernel_size);
 
 	return EFI_SUCCESS;
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 9f34c7242939..89f075275300 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -7,60 +7,151 @@
  * Copyright 2011 Intel Corporation; author Matt Fleming
  */
 
+#include <stdarg.h>
+
+#include <linux/ctype.h>
 #include <linux/efi.h>
+#include <linux/kernel.h>
+#include <linux/printk.h> /* For CONSOLE_LOGLEVEL_* */
 #include <asm/efi.h>
+#include <asm/setup.h>
 
 #include "efistub.h"
 
-static bool __efistub_global efi_nochunk;
-static bool __efistub_global efi_nokaslr;
-static bool __efistub_global efi_noinitrd;
-static bool __efistub_global efi_quiet;
-static bool __efistub_global efi_novamap;
-static bool __efistub_global efi_nosoftreserve;
-static bool __efistub_global efi_disable_pci_dma =
-					IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA);
+bool efi_nochunk;
+bool efi_nokaslr;
+bool efi_noinitrd;
+int efi_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
+bool efi_novamap;
 
-bool __pure nochunk(void)
-{
-	return efi_nochunk;
-}
-bool __pure nokaslr(void)
-{
-	return efi_nokaslr;
-}
-bool __pure noinitrd(void)
+static bool efi_nosoftreserve;
+static bool efi_disable_pci_dma = IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA);
+
+bool __pure __efi_soft_reserve_enabled(void)
 {
-	return efi_noinitrd;
+	return !efi_nosoftreserve;
 }
-bool __pure is_quiet(void)
+
+void efi_char16_puts(efi_char16_t *str)
 {
-	return efi_quiet;
+	efi_call_proto(efi_table_attr(efi_system_table, con_out),
+		       output_string, str);
 }
-bool __pure novamap(void)
+
+static
+u32 utf8_to_utf32(const u8 **s8)
 {
-	return efi_novamap;
+	u32 c32;
+	u8 c0, cx;
+	size_t clen, i;
+
+	c0 = cx = *(*s8)++;
+	/*
+	 * The position of the most-significant 0 bit gives us the length of
+	 * a multi-octet encoding.
+	 */
+	for (clen = 0; cx & 0x80; ++clen)
+		cx <<= 1;
+	/*
+	 * If the 0 bit is in position 8, this is a valid single-octet
+	 * encoding. If the 0 bit is in position 7 or positions 1-3, the
+	 * encoding is invalid.
+	 * In either case, we just return the first octet.
+	 */
+	if (clen < 2 || clen > 4)
+		return c0;
+	/* Get the bits from the first octet. */
+	c32 = cx >> clen--;
+	for (i = 0; i < clen; ++i) {
+		/* Trailing octets must have 10 in most significant bits. */
+		cx = (*s8)[i] ^ 0x80;
+		if (cx & 0xc0)
+			return c0;
+		c32 = (c32 << 6) | cx;
+	}
+	/*
+	 * Check for validity:
+	 * - The character must be in the Unicode range.
+	 * - It must not be a surrogate.
+	 * - It must be encoded using the correct number of octets.
+	 */
+	if (c32 > 0x10ffff ||
+	    (c32 & 0xf800) == 0xd800 ||
+	    clen != (c32 >= 0x80) + (c32 >= 0x800) + (c32 >= 0x10000))
+		return c0;
+	*s8 += clen;
+	return c32;
 }
-bool __pure __efi_soft_reserve_enabled(void)
+
+void efi_puts(const char *str)
 {
-	return !efi_nosoftreserve;
+	efi_char16_t buf[128];
+	size_t pos = 0, lim = ARRAY_SIZE(buf);
+	const u8 *s8 = (const u8 *)str;
+	u32 c32;
+
+	while (*s8) {
+		if (*s8 == '\n')
+			buf[pos++] = L'\r';
+		c32 = utf8_to_utf32(&s8);
+		if (c32 < 0x10000) {
+			/* Characters in plane 0 use a single word. */
+			buf[pos++] = c32;
+		} else {
+			/*
+			 * Characters in other planes encode into a surrogate
+			 * pair.
+			 */
+			buf[pos++] = (0xd800 - (0x10000 >> 10)) + (c32 >> 10);
+			buf[pos++] = 0xdc00 + (c32 & 0x3ff);
+		}
+		if (*s8 == '\0' || pos >= lim - 2) {
+			buf[pos] = L'\0';
+			efi_char16_puts(buf);
+			pos = 0;
+		}
+	}
 }
 
-void efi_printk(char *str)
+int efi_printk(const char *fmt, ...)
 {
-	char *s8;
+	char printf_buf[256];
+	va_list args;
+	int printed;
+	int loglevel = printk_get_level(fmt);
+
+	switch (loglevel) {
+	case '0' ... '9':
+		loglevel -= '0';
+		break;
+	default:
+		/*
+		 * Use loglevel -1 for cases where we just want to print to
+		 * the screen.
+		 */
+		loglevel = -1;
+		break;
+	}
 
-	for (s8 = str; *s8; s8++) {
-		efi_char16_t ch[2] = { 0 };
+	if (loglevel >= efi_loglevel)
+		return 0;
 
-		ch[0] = *s8;
-		if (*s8 == '\n') {
-			efi_char16_t nl[2] = { '\r', 0 };
-			efi_char16_printk(nl);
-		}
+	if (loglevel >= 0)
+		efi_puts("EFI stub: ");
+
+	fmt = printk_skip_level(fmt);
+
+	va_start(args, fmt);
+	printed = vsnprintf(printf_buf, sizeof(printf_buf), fmt, args);
+	va_end(args);
 
-		efi_char16_printk(ch);
+	efi_puts(printf_buf);
+	if (printed >= sizeof(printf_buf)) {
+		efi_puts("[Message truncated]\n");
+		return -1;
 	}
+
+	return printed;
 }
 
 /*
@@ -91,7 +182,7 @@ efi_status_t efi_parse_options(char const *cmdline)
 		if (!strcmp(param, "nokaslr")) {
 			efi_nokaslr = true;
 		} else if (!strcmp(param, "quiet")) {
-			efi_quiet = true;
+			efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
 		} else if (!strcmp(param, "noinitrd")) {
 			efi_noinitrd = true;
 		} else if (!strcmp(param, "efi") && val) {
@@ -105,6 +196,11 @@ efi_status_t efi_parse_options(char const *cmdline)
 				efi_disable_pci_dma = true;
 			if (parse_option_str(val, "no_disable_early_pci_dma"))
 				efi_disable_pci_dma = false;
+			if (parse_option_str(val, "debug"))
+				efi_loglevel = CONSOLE_LOGLEVEL_DEBUG;
+		} else if (!strcmp(param, "video") &&
+			   val && strstarts(val, "efifb:")) {
+			efi_parse_option_graphics(val + strlen("efifb:"));
 		}
 	}
 	efi_bs_call(free_pool, buf);
@@ -112,97 +208,79 @@ efi_status_t efi_parse_options(char const *cmdline)
 }
 
 /*
- * Get the number of UTF-8 bytes corresponding to an UTF-16 character.
- * This overestimates for surrogates, but that is okay.
- */
-static int efi_utf8_bytes(u16 c)
-{
-	return 1 + (c >= 0x80) + (c >= 0x800);
-}
-
-/*
- * Convert an UTF-16 string, not necessarily null terminated, to UTF-8.
- */
-static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n)
-{
-	unsigned int c;
-
-	while (n--) {
-		c = *src++;
-		if (n && c >= 0xd800 && c <= 0xdbff &&
-		    *src >= 0xdc00 && *src <= 0xdfff) {
-			c = 0x10000 + ((c & 0x3ff) << 10) + (*src & 0x3ff);
-			src++;
-			n--;
-		}
-		if (c >= 0xd800 && c <= 0xdfff)
-			c = 0xfffd; /* Unmatched surrogate */
-		if (c < 0x80) {
-			*dst++ = c;
-			continue;
-		}
-		if (c < 0x800) {
-			*dst++ = 0xc0 + (c >> 6);
-			goto t1;
-		}
-		if (c < 0x10000) {
-			*dst++ = 0xe0 + (c >> 12);
-			goto t2;
-		}
-		*dst++ = 0xf0 + (c >> 18);
-		*dst++ = 0x80 + ((c >> 12) & 0x3f);
-	t2:
-		*dst++ = 0x80 + ((c >> 6) & 0x3f);
-	t1:
-		*dst++ = 0x80 + (c & 0x3f);
-	}
-
-	return dst;
-}
-
-/*
  * Convert the unicode UEFI command line to ASCII to pass to kernel.
  * Size of memory allocated return in *cmd_line_len.
  * Returns NULL on error.
  */
-char *efi_convert_cmdline(efi_loaded_image_t *image,
-			  int *cmd_line_len, unsigned long max_addr)
+char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len)
 {
 	const u16 *s2;
-	u8 *s1 = NULL;
 	unsigned long cmdline_addr = 0;
-	int load_options_chars = efi_table_attr(image, load_options_size) / 2;
+	int options_chars = efi_table_attr(image, load_options_size) / 2;
 	const u16 *options = efi_table_attr(image, load_options);
-	int options_bytes = 0;  /* UTF-8 bytes */
-	int options_chars = 0;  /* UTF-16 chars */
+	int options_bytes = 0, safe_options_bytes = 0;  /* UTF-8 bytes */
+	bool in_quote = false;
 	efi_status_t status;
-	u16 zero = 0;
 
 	if (options) {
 		s2 = options;
-		while (*s2 && *s2 != '\n'
-		       && options_chars < load_options_chars) {
-			options_bytes += efi_utf8_bytes(*s2++);
-			options_chars++;
+		while (options_bytes < COMMAND_LINE_SIZE && options_chars--) {
+			u16 c = *s2++;
+
+			if (c < 0x80) {
+				if (c == L'\0' || c == L'\n')
+					break;
+				if (c == L'"')
+					in_quote = !in_quote;
+				else if (!in_quote && isspace((char)c))
+					safe_options_bytes = options_bytes;
+
+				options_bytes++;
+				continue;
+			}
+
+			/*
+			 * Get the number of UTF-8 bytes corresponding to a
+			 * UTF-16 character.
+			 * The first part handles everything in the BMP.
+			 */
+			options_bytes += 2 + (c >= 0x800);
+			/*
+			 * Add one more byte for valid surrogate pairs. Invalid
+			 * surrogates will be replaced with 0xfffd and take up
+			 * only 3 bytes.
+			 */
+			if ((c & 0xfc00) == 0xd800) {
+				/*
+				 * If the very last word is a high surrogate,
+				 * we must ignore it since we can't access the
+				 * low surrogate.
+				 */
+				if (!options_chars) {
+					options_bytes -= 3;
+				} else if ((*s2 & 0xfc00) == 0xdc00) {
+					options_bytes++;
+					options_chars--;
+					s2++;
+				}
+			}
+		}
+		if (options_bytes >= COMMAND_LINE_SIZE) {
+			options_bytes = safe_options_bytes;
+			efi_err("Command line is too long: truncated to %d bytes\n",
+				options_bytes);
 		}
-	}
-
-	if (!options_chars) {
-		/* No command line options, so return empty string*/
-		options = &zero;
 	}
 
 	options_bytes++;	/* NUL termination */
 
-	status = efi_allocate_pages(options_bytes, &cmdline_addr, max_addr);
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, options_bytes,
+			     (void **)&cmdline_addr);
 	if (status != EFI_SUCCESS)
 		return NULL;
 
-	s1 = (u8 *)cmdline_addr;
-	s2 = (const u16 *)options;
-
-	s1 = efi_utf16_to_utf8(s1, s2, options_chars);
-	*s1 = '\0';
+	snprintf((char *)cmdline_addr, options_bytes, "%.*ls",
+		 options_bytes - 1, options);
 
 	*cmd_line_len = options_bytes;
 	return (char *)cmdline_addr;
@@ -285,8 +363,8 @@ fail:
 
 void *get_efi_config_table(efi_guid_t guid)
 {
-	unsigned long tables = efi_table_attr(efi_system_table(), tables);
-	int nr_tables = efi_table_attr(efi_system_table(), nr_tables);
+	unsigned long tables = efi_table_attr(efi_system_table, tables);
+	int nr_tables = efi_table_attr(efi_system_table, nr_tables);
 	int i;
 
 	for (i = 0; i < nr_tables; i++) {
@@ -301,12 +379,6 @@ void *get_efi_config_table(efi_guid_t guid)
 	return NULL;
 }
 
-void efi_char16_printk(efi_char16_t *str)
-{
-	efi_call_proto(efi_table_attr(efi_system_table(), con_out),
-		       output_string, str);
-}
-
 /*
  * The LINUX_EFI_INITRD_MEDIA_GUID vendor media device path below provides a way
  * for the firmware or bootloader to expose the initrd data directly to the stub
@@ -348,6 +420,7 @@ static const struct {
  *		%EFI_OUT_OF_RESOURCES if memory allocation failed
  *		%EFI_LOAD_ERROR in all other cases
  */
+static
 efi_status_t efi_load_initrd_dev_path(unsigned long *load_addr,
 				      unsigned long *load_size,
 				      unsigned long max)
@@ -360,9 +433,6 @@ efi_status_t efi_load_initrd_dev_path(unsigned long *load_addr,
 	efi_handle_t handle;
 	efi_status_t status;
 
-	if (!load_addr || !load_size)
-		return EFI_INVALID_PARAMETER;
-
 	dp = (efi_device_path_protocol_t *)&initrd_dev_path;
 	status = efi_bs_call(locate_device_path, &lf2_proto_guid, &dp, &handle);
 	if (status != EFI_SUCCESS)
@@ -392,3 +462,80 @@ efi_status_t efi_load_initrd_dev_path(unsigned long *load_addr,
 	*load_size = initrd_size;
 	return EFI_SUCCESS;
 }
+
+static
+efi_status_t efi_load_initrd_cmdline(efi_loaded_image_t *image,
+				     unsigned long *load_addr,
+				     unsigned long *load_size,
+				     unsigned long soft_limit,
+				     unsigned long hard_limit)
+{
+	if (!IS_ENABLED(CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER) ||
+	    (IS_ENABLED(CONFIG_X86) && (!efi_is_native() || image == NULL))) {
+		*load_addr = *load_size = 0;
+		return EFI_SUCCESS;
+	}
+
+	return handle_cmdline_files(image, L"initrd=", sizeof(L"initrd=") - 2,
+				    soft_limit, hard_limit,
+				    load_addr, load_size);
+}
+
+efi_status_t efi_load_initrd(efi_loaded_image_t *image,
+			     unsigned long *load_addr,
+			     unsigned long *load_size,
+			     unsigned long soft_limit,
+			     unsigned long hard_limit)
+{
+	efi_status_t status;
+
+	if (!load_addr || !load_size)
+		return EFI_INVALID_PARAMETER;
+
+	status = efi_load_initrd_dev_path(load_addr, load_size, hard_limit);
+	if (status == EFI_SUCCESS) {
+		efi_info("Loaded initrd from LINUX_EFI_INITRD_MEDIA_GUID device path\n");
+	} else if (status == EFI_NOT_FOUND) {
+		status = efi_load_initrd_cmdline(image, load_addr, load_size,
+						 soft_limit, hard_limit);
+		if (status == EFI_SUCCESS && *load_size > 0)
+			efi_info("Loaded initrd from command line option\n");
+	}
+
+	return status;
+}
+
+efi_status_t efi_wait_for_key(unsigned long usec, efi_input_key_t *key)
+{
+	efi_event_t events[2], timer;
+	unsigned long index;
+	efi_simple_text_input_protocol_t *con_in;
+	efi_status_t status;
+
+	con_in = efi_table_attr(efi_system_table, con_in);
+	if (!con_in)
+		return EFI_UNSUPPORTED;
+	efi_set_event_at(events, 0, efi_table_attr(con_in, wait_for_key));
+
+	status = efi_bs_call(create_event, EFI_EVT_TIMER, 0, NULL, NULL, &timer);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	status = efi_bs_call(set_timer, timer, EfiTimerRelative,
+			     EFI_100NSEC_PER_USEC * usec);
+	if (status != EFI_SUCCESS)
+		return status;
+	efi_set_event_at(events, 1, timer);
+
+	status = efi_bs_call(wait_for_event, 2, events, &index);
+	if (status == EFI_SUCCESS) {
+		if (index == 0)
+			status = efi_call_proto(con_in, read_keystroke, key);
+		else
+			status = EFI_TIMEOUT;
+	}
+
+	efi_bs_call(close_event, timer);
+
+	return status;
+}
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/efi-stub.c
index 48161b1dd098..e97370bdfdb0 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/efi-stub.c
@@ -36,14 +36,9 @@
 #endif
 
 static u64 virtmap_base = EFI_RT_VIRTUAL_BASE;
-static bool __efistub_global flat_va_mapping;
+static bool flat_va_mapping;
 
-static efi_system_table_t *__efistub_global sys_table;
-
-__pure efi_system_table_t *efi_system_table(void)
-{
-	return sys_table;
-}
+const efi_system_table_t *efi_system_table;
 
 static struct screen_info *setup_graphics(void)
 {
@@ -69,7 +64,7 @@ static struct screen_info *setup_graphics(void)
 	return si;
 }
 
-void install_memreserve_table(void)
+static void install_memreserve_table(void)
 {
 	struct linux_efi_memreserve *rsv;
 	efi_guid_t memreserve_table_guid = LINUX_EFI_MEMRESERVE_TABLE_GUID;
@@ -78,7 +73,7 @@ void install_memreserve_table(void)
 	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv),
 			     (void **)&rsv);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Failed to allocate memreserve entry!\n");
+		efi_err("Failed to allocate memreserve entry!\n");
 		return;
 	}
 
@@ -89,7 +84,7 @@ void install_memreserve_table(void)
 	status = efi_bs_call(install_configuration_table,
 			     &memreserve_table_guid, rsv);
 	if (status != EFI_SUCCESS)
-		pr_efi_err("Failed to install memreserve config table!\n");
+		efi_err("Failed to install memreserve config table!\n");
 }
 
 static unsigned long get_dram_base(void)
@@ -149,7 +144,8 @@ asmlinkage void __noreturn efi_enter_kernel(unsigned long entrypoint,
  * for both archictectures, with the arch-specific code provided in the
  * handle_kernel_image() function.
  */
-efi_status_t efi_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg)
+efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
+				   efi_system_table_t *sys_table_arg)
 {
 	efi_loaded_image_t *image;
 	efi_status_t status;
@@ -171,10 +167,10 @@ efi_status_t efi_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg)
 	efi_properties_table_t *prop_tbl;
 	unsigned long max_addr;
 
-	sys_table = sys_table_arg;
+	efi_system_table = sys_table_arg;
 
 	/* Check if we were booted by the EFI firmware */
-	if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
+	if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
 		status = EFI_INVALID_PARAMETER;
 		goto fail;
 	}
@@ -188,16 +184,16 @@ efi_status_t efi_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg)
 	 * information about the running image, such as size and the command
 	 * line.
 	 */
-	status = sys_table->boottime->handle_protocol(handle,
+	status = efi_system_table->boottime->handle_protocol(handle,
 					&loaded_image_proto, (void *)&image);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Failed to get loaded image protocol\n");
+		efi_err("Failed to get loaded image protocol\n");
 		goto fail;
 	}
 
 	dram_base = get_dram_base();
 	if (dram_base == EFI_ERROR) {
-		pr_efi_err("Failed to find DRAM base\n");
+		efi_err("Failed to find DRAM base\n");
 		status = EFI_LOAD_ERROR;
 		goto fail;
 	}
@@ -207,22 +203,32 @@ efi_status_t efi_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg)
 	 * protocol. We are going to copy the command line into the
 	 * device tree, so this can be allocated anywhere.
 	 */
-	cmdline_ptr = efi_convert_cmdline(image, &cmdline_size, ULONG_MAX);
+	cmdline_ptr = efi_convert_cmdline(image, &cmdline_size);
 	if (!cmdline_ptr) {
-		pr_efi_err("getting command line via LOADED_IMAGE_PROTOCOL\n");
+		efi_err("getting command line via LOADED_IMAGE_PROTOCOL\n");
 		status = EFI_OUT_OF_RESOURCES;
 		goto fail;
 	}
 
 	if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
 	    IS_ENABLED(CONFIG_CMDLINE_FORCE) ||
-	    cmdline_size == 0)
-		efi_parse_options(CONFIG_CMDLINE);
+	    cmdline_size == 0) {
+		status = efi_parse_options(CONFIG_CMDLINE);
+		if (status != EFI_SUCCESS) {
+			efi_err("Failed to parse options\n");
+			goto fail_free_cmdline;
+		}
+	}
 
-	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && cmdline_size > 0)
-		efi_parse_options(cmdline_ptr);
+	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && cmdline_size > 0) {
+		status = efi_parse_options(cmdline_ptr);
+		if (status != EFI_SUCCESS) {
+			efi_err("Failed to parse options\n");
+			goto fail_free_cmdline;
+		}
+	}
 
-	pr_efi("Booting Linux Kernel...\n");
+	efi_info("Booting Linux Kernel...\n");
 
 	si = setup_graphics();
 
@@ -231,8 +237,8 @@ efi_status_t efi_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg)
 				     &reserve_size,
 				     dram_base, image);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Failed to relocate kernel\n");
-		goto fail_free_cmdline;
+		efi_err("Failed to relocate kernel\n");
+		goto fail_free_screeninfo;
 	}
 
 	efi_retrieve_tpm2_eventlog();
@@ -250,42 +256,34 @@ efi_status_t efi_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg)
 	if (!IS_ENABLED(CONFIG_EFI_ARMSTUB_DTB_LOADER) ||
 	     secure_boot != efi_secureboot_mode_disabled) {
 		if (strstr(cmdline_ptr, "dtb="))
-			pr_efi("Ignoring DTB from command line.\n");
+			efi_err("Ignoring DTB from command line.\n");
 	} else {
 		status = efi_load_dtb(image, &fdt_addr, &fdt_size);
 
 		if (status != EFI_SUCCESS) {
-			pr_efi_err("Failed to load device tree!\n");
+			efi_err("Failed to load device tree!\n");
 			goto fail_free_image;
 		}
 	}
 
 	if (fdt_addr) {
-		pr_efi("Using DTB from command line\n");
+		efi_info("Using DTB from command line\n");
 	} else {
 		/* Look for a device tree configuration table entry. */
 		fdt_addr = (uintptr_t)get_fdt(&fdt_size);
 		if (fdt_addr)
-			pr_efi("Using DTB from configuration table\n");
+			efi_info("Using DTB from configuration table\n");
 	}
 
 	if (!fdt_addr)
-		pr_efi("Generating empty DTB\n");
+		efi_info("Generating empty DTB\n");
 
-	if (!noinitrd()) {
+	if (!efi_noinitrd) {
 		max_addr = efi_get_max_initrd_addr(dram_base, image_addr);
-		status = efi_load_initrd_dev_path(&initrd_addr, &initrd_size,
-						  max_addr);
-		if (status == EFI_SUCCESS) {
-			pr_efi("Loaded initrd from LINUX_EFI_INITRD_MEDIA_GUID device path\n");
-		} else if (status == EFI_NOT_FOUND) {
-			status = efi_load_initrd(image, &initrd_addr, &initrd_size,
-						 ULONG_MAX, max_addr);
-			if (status == EFI_SUCCESS && initrd_size > 0)
-				pr_efi("Loaded initrd from command line option\n");
-		}
+		status = efi_load_initrd(image, &initrd_addr, &initrd_size,
+					 ULONG_MAX, max_addr);
 		if (status != EFI_SUCCESS)
-			pr_efi_err("Failed to load initrd!\n");
+			efi_err("Failed to load initrd!\n");
 	}
 
 	efi_random_get_seed();
@@ -303,7 +301,7 @@ efi_status_t efi_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg)
 			   EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA);
 
 	/* hibernation expects the runtime regions to stay in the same place */
-	if (!IS_ENABLED(CONFIG_HIBERNATION) && !nokaslr() && !flat_va_mapping) {
+	if (!IS_ENABLED(CONFIG_HIBERNATION) && !efi_nokaslr && !flat_va_mapping) {
 		/*
 		 * Randomize the base of the UEFI runtime services region.
 		 * Preserve the 2 MB alignment of the region by taking a
@@ -335,7 +333,7 @@ efi_status_t efi_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg)
 	/* not reached */
 
 fail_free_initrd:
-	pr_efi_err("Failed to update FDT and exit boot services\n");
+	efi_err("Failed to update FDT and exit boot services\n");
 
 	efi_free(initrd_size, initrd_addr);
 	efi_free(fdt_size, fdt_addr);
@@ -343,9 +341,10 @@ fail_free_initrd:
 fail_free_image:
 	efi_free(image_size, image_addr);
 	efi_free(reserve_size, reserve_addr);
-fail_free_cmdline:
+fail_free_screeninfo:
 	free_screen_info(si);
-	efi_free(cmdline_size, (unsigned long)cmdline_ptr);
+fail_free_cmdline:
+	efi_bs_call(free_pool, cmdline_ptr);
 fail:
 	return status;
 }
@@ -376,7 +375,7 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
 		size = in->num_pages * EFI_PAGE_SIZE;
 
 		in->virt_addr = in->phys_addr;
-		if (novamap()) {
+		if (efi_novamap) {
 			continue;
 		}
 
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 62943992f02f..bcd8c0a785f0 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -3,6 +3,13 @@
 #ifndef _DRIVERS_FIRMWARE_EFI_EFISTUB_H
 #define _DRIVERS_FIRMWARE_EFI_EFISTUB_H
 
+#include <linux/compiler.h>
+#include <linux/efi.h>
+#include <linux/kernel.h>
+#include <linux/kern_levels.h>
+#include <linux/types.h>
+#include <asm/efi.h>
+
 /* error code which can't be mistaken for valid address */
 #define EFI_ERROR	(~0UL)
 
@@ -25,25 +32,33 @@
 #define EFI_ALLOC_ALIGN		EFI_PAGE_SIZE
 #endif
 
-#if defined(CONFIG_ARM) || defined(CONFIG_X86)
-#define __efistub_global	__section(.data)
-#else
-#define __efistub_global
-#endif
+extern bool efi_nochunk;
+extern bool efi_nokaslr;
+extern bool efi_noinitrd;
+extern int efi_loglevel;
+extern bool efi_novamap;
 
-extern bool __pure nochunk(void);
-extern bool __pure nokaslr(void);
-extern bool __pure noinitrd(void);
-extern bool __pure is_quiet(void);
-extern bool __pure novamap(void);
+extern const efi_system_table_t *efi_system_table;
 
-extern __pure efi_system_table_t  *efi_system_table(void);
+efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
+				   efi_system_table_t *sys_table_arg);
 
-#define pr_efi(msg)		do {			\
-	if (!is_quiet()) efi_printk("EFI stub: "msg);	\
-} while (0)
+#ifndef ARCH_HAS_EFISTUB_WRAPPERS
 
-#define pr_efi_err(msg) efi_printk("EFI stub: ERROR: "msg)
+#define efi_is_native()		(true)
+#define efi_bs_call(func, ...)	efi_system_table->boottime->func(__VA_ARGS__)
+#define efi_rt_call(func, ...)	efi_system_table->runtime->func(__VA_ARGS__)
+#define efi_table_attr(inst, attr)	(inst->attr)
+#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__)
+
+#endif
+
+#define efi_info(fmt, ...) \
+	efi_printk(KERN_INFO fmt, ##__VA_ARGS__)
+#define efi_err(fmt, ...) \
+	efi_printk(KERN_ERR "ERROR: " fmt, ##__VA_ARGS__)
+#define efi_debug(fmt, ...) \
+	efi_printk(KERN_DEBUG "DEBUG: " fmt, ##__VA_ARGS__)
 
 /* Helper macros for the usual case of using simple C variables: */
 #ifndef fdt_setprop_inplace_var
@@ -77,6 +92,13 @@ extern __pure efi_system_table_t  *efi_system_table(void);
 		((handle = efi_get_handle_at((array), i)) || true);	\
 	     i++)
 
+static inline
+void efi_set_u64_split(u64 data, u32 *lo, u32 *hi)
+{
+	*lo = lower_32_bits(data);
+	*hi = upper_32_bits(data);
+}
+
 /*
  * Allocation types for calls to boottime->allocate_pages.
  */
@@ -93,6 +115,16 @@ extern __pure efi_system_table_t  *efi_system_table(void);
 #define EFI_LOCATE_BY_PROTOCOL			2
 
 /*
+ * boottime->stall takes the time period in microseconds
+ */
+#define EFI_USEC_PER_SEC		1000000
+
+/*
+ * boottime->set_timer takes the time in 100ns units
+ */
+#define EFI_100NSEC_PER_USEC	((u64)10)
+
+/*
  * An efi_boot_memmap is used by efi_get_memory_map() to return the
  * EFI memory map in a dynamically allocated buffer.
  *
@@ -116,6 +148,39 @@ struct efi_boot_memmap {
 
 typedef struct efi_generic_dev_path efi_device_path_protocol_t;
 
+typedef void *efi_event_t;
+/* Note that notifications won't work in mixed mode */
+typedef void (__efiapi *efi_event_notify_t)(efi_event_t, void *);
+
+#define EFI_EVT_TIMER		0x80000000U
+#define EFI_EVT_RUNTIME		0x40000000U
+#define EFI_EVT_NOTIFY_WAIT	0x00000100U
+#define EFI_EVT_NOTIFY_SIGNAL	0x00000200U
+
+/*
+ * boottime->wait_for_event takes an array of events as input.
+ * Provide a helper to set it up correctly for mixed mode.
+ */
+static inline
+void efi_set_event_at(efi_event_t *events, size_t idx, efi_event_t event)
+{
+	if (efi_is_native())
+		events[idx] = event;
+	else
+		((u32 *)events)[idx] = (u32)(unsigned long)event;
+}
+
+#define EFI_TPL_APPLICATION	4
+#define EFI_TPL_CALLBACK	8
+#define EFI_TPL_NOTIFY		16
+#define EFI_TPL_HIGH_LEVEL	31
+
+typedef enum {
+	EfiTimerCancel,
+	EfiTimerPeriodic,
+	EfiTimerRelative
+} EFI_TIMER_DELAY;
+
 /*
  * EFI Boot Services table
  */
@@ -134,11 +199,16 @@ union efi_boot_services {
 		efi_status_t (__efiapi *allocate_pool)(int, unsigned long,
 						       void **);
 		efi_status_t (__efiapi *free_pool)(void *);
-		void *create_event;
-		void *set_timer;
-		void *wait_for_event;
+		efi_status_t (__efiapi *create_event)(u32, unsigned long,
+						      efi_event_notify_t, void *,
+						      efi_event_t *);
+		efi_status_t (__efiapi *set_timer)(efi_event_t,
+						  EFI_TIMER_DELAY, u64);
+		efi_status_t (__efiapi *wait_for_event)(unsigned long,
+							efi_event_t *,
+							unsigned long *);
 		void *signal_event;
-		void *close_event;
+		efi_status_t (__efiapi *close_event)(efi_event_t);
 		void *check_event;
 		void *install_protocol_interface;
 		void *reinstall_protocol_interface;
@@ -165,7 +235,7 @@ union efi_boot_services {
 		efi_status_t (__efiapi *exit_boot_services)(efi_handle_t,
 							    unsigned long);
 		void *get_next_monotonic_count;
-		void *stall;
+		efi_status_t (__efiapi *stall)(unsigned long);
 		void *set_watchdog_timer;
 		void *connect_controller;
 		efi_status_t (__efiapi *disconnect_controller)(efi_handle_t,
@@ -250,6 +320,27 @@ union efi_uga_draw_protocol {
 	} mixed_mode;
 };
 
+typedef struct {
+	u16 scan_code;
+	efi_char16_t unicode_char;
+} efi_input_key_t;
+
+union efi_simple_text_input_protocol {
+	struct {
+		void *reset;
+		efi_status_t (__efiapi *read_keystroke)(efi_simple_text_input_protocol_t *,
+							efi_input_key_t *);
+		efi_event_t wait_for_key;
+	};
+	struct {
+		u32 reset;
+		u32 read_keystroke;
+		u32 wait_for_key;
+	} mixed_mode;
+};
+
+efi_status_t efi_wait_for_key(unsigned long usec, efi_input_key_t *key);
+
 union efi_simple_text_output_protocol {
 	struct {
 		void *reset;
@@ -311,8 +402,10 @@ typedef union efi_graphics_output_protocol efi_graphics_output_protocol_t;
 
 union efi_graphics_output_protocol {
 	struct {
-		void *query_mode;
-		void *set_mode;
+		efi_status_t (__efiapi *query_mode)(efi_graphics_output_protocol_t *,
+						    u32, unsigned long *,
+						    efi_graphics_output_mode_info_t **);
+		efi_status_t (__efiapi *set_mode)  (efi_graphics_output_protocol_t *, u32);
 		void *blt;
 		efi_graphics_output_protocol_mode_t *mode;
 	};
@@ -600,8 +693,6 @@ efi_status_t efi_exit_boot_services(void *handle,
 				    void *priv,
 				    efi_exit_boot_map_processing priv_func);
 
-void efi_char16_printk(efi_char16_t *);
-
 efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
 					    unsigned long *new_fdt_addr,
 					    unsigned long max_addr,
@@ -625,33 +716,24 @@ efi_status_t check_platform_features(void);
 
 void *get_efi_config_table(efi_guid_t guid);
 
-void efi_printk(char *str);
+/* NOTE: These functions do not print a trailing newline after the string */
+void efi_char16_puts(efi_char16_t *);
+void efi_puts(const char *str);
+
+__printf(1, 2) int efi_printk(char const *fmt, ...);
 
 void efi_free(unsigned long size, unsigned long addr);
 
-char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len,
-			  unsigned long max_addr);
+char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len);
 
 efi_status_t efi_get_memory_map(struct efi_boot_memmap *map);
 
-efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align,
-				 unsigned long *addr, unsigned long min);
-
-static inline
-efi_status_t efi_low_alloc(unsigned long size, unsigned long align,
-			   unsigned long *addr)
-{
-	/*
-	 * Don't allocate at 0x0. It will confuse code that
-	 * checks pointers against NULL. Skip the first 8
-	 * bytes so we start at a nice even number.
-	 */
-	return efi_low_alloc_above(size, align, addr, 0x8);
-}
-
 efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr,
 				unsigned long max);
 
+efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr,
+					unsigned long max, unsigned long align);
+
 efi_status_t efi_relocate_kernel(unsigned long *image_addr,
 				 unsigned long image_size,
 				 unsigned long alloc_size,
@@ -661,12 +743,27 @@ efi_status_t efi_relocate_kernel(unsigned long *image_addr,
 
 efi_status_t efi_parse_options(char const *cmdline);
 
+void efi_parse_option_graphics(char *option);
+
 efi_status_t efi_setup_gop(struct screen_info *si, efi_guid_t *proto,
 			   unsigned long size);
 
-efi_status_t efi_load_dtb(efi_loaded_image_t *image,
-			  unsigned long *load_addr,
-			  unsigned long *load_size);
+efi_status_t handle_cmdline_files(efi_loaded_image_t *image,
+				  const efi_char16_t *optstr,
+				  int optstr_size,
+				  unsigned long soft_limit,
+				  unsigned long hard_limit,
+				  unsigned long *load_addr,
+				  unsigned long *load_size);
+
+
+static inline efi_status_t efi_load_dtb(efi_loaded_image_t *image,
+					unsigned long *load_addr,
+					unsigned long *load_size)
+{
+	return handle_cmdline_files(image, L"dtb=", sizeof(L"dtb=") - 2,
+				    ULONG_MAX, ULONG_MAX, load_addr, load_size);
+}
 
 efi_status_t efi_load_initrd(efi_loaded_image_t *image,
 			     unsigned long *load_addr,
@@ -674,8 +771,4 @@ efi_status_t efi_load_initrd(efi_loaded_image_t *image,
 			     unsigned long soft_limit,
 			     unsigned long hard_limit);
 
-efi_status_t efi_load_initrd_dev_path(unsigned long *load_addr,
-				      unsigned long *load_size,
-				      unsigned long max);
-
 #endif
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 46cffac7a5f1..11ecf3c4640e 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -39,7 +39,7 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size,
 	/* Do some checks on provided FDT, if it exists: */
 	if (orig_fdt) {
 		if (fdt_check_header(orig_fdt)) {
-			pr_efi_err("Device Tree header not valid!\n");
+			efi_err("Device Tree header not valid!\n");
 			return EFI_LOAD_ERROR;
 		}
 		/*
@@ -47,7 +47,7 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size,
 		 * configuration table:
 		 */
 		if (orig_fdt_size && fdt_totalsize(orig_fdt) > orig_fdt_size) {
-			pr_efi_err("Truncated device tree! foo!\n");
+			efi_err("Truncated device tree! foo!\n");
 			return EFI_LOAD_ERROR;
 		}
 	}
@@ -110,7 +110,7 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size,
 
 	/* Add FDT entries for EFI runtime services in chosen node. */
 	node = fdt_subnode_offset(fdt, 0, "chosen");
-	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)efi_system_table());
+	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)efi_system_table);
 
 	status = fdt_setprop_var(fdt, node, "linux,uefi-system-table", fdt_val64);
 	if (status)
@@ -270,16 +270,16 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
 	 */
 	status = efi_get_memory_map(&map);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Unable to retrieve UEFI memory map.\n");
+		efi_err("Unable to retrieve UEFI memory map.\n");
 		return status;
 	}
 
-	pr_efi("Exiting boot services and installing virtual address map...\n");
+	efi_info("Exiting boot services and installing virtual address map...\n");
 
 	map.map = &memory_map;
 	status = efi_allocate_pages(MAX_FDT_SIZE, new_fdt_addr, max_addr);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Unable to allocate memory for new device tree.\n");
+		efi_err("Unable to allocate memory for new device tree.\n");
 		goto fail;
 	}
 
@@ -296,7 +296,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
 			    initrd_addr, initrd_size);
 
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Unable to construct new device tree.\n");
+		efi_err("Unable to construct new device tree.\n");
 		goto fail_free_new_fdt;
 	}
 
@@ -310,11 +310,11 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
 	if (status == EFI_SUCCESS) {
 		efi_set_virtual_address_map_t *svam;
 
-		if (novamap())
+		if (efi_novamap)
 			return EFI_SUCCESS;
 
 		/* Install the new virtual address map */
-		svam = efi_system_table()->runtime->set_virtual_address_map;
+		svam = efi_system_table->runtime->set_virtual_address_map;
 		status = svam(runtime_entry_count * desc_size, desc_size,
 			      desc_ver, runtime_map);
 
@@ -342,13 +342,13 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
 		return EFI_SUCCESS;
 	}
 
-	pr_efi_err("Exit boot services failed.\n");
+	efi_err("Exit boot services failed.\n");
 
 fail_free_new_fdt:
 	efi_free(MAX_FDT_SIZE, *new_fdt_addr);
 
 fail:
-	efi_system_table()->boottime->free_pool(runtime_map);
+	efi_system_table->boottime->free_pool(runtime_map);
 
 	return EFI_LOAD_ERROR;
 }
@@ -363,7 +363,7 @@ void *get_fdt(unsigned long *fdt_size)
 		return NULL;
 
 	if (fdt_check_header(fdt) != 0) {
-		pr_efi_err("Invalid header detected on UEFI supplied FDT, ignoring ...\n");
+		efi_err("Invalid header detected on UEFI supplied FDT, ignoring ...\n");
 		return NULL;
 	}
 	*fdt_size = fdt_totalsize(fdt);
diff --git a/drivers/firmware/efi/libstub/file.c b/drivers/firmware/efi/libstub/file.c
index ea66b1f16a79..2005e33b33d5 100644
--- a/drivers/firmware/efi/libstub/file.c
+++ b/drivers/firmware/efi/libstub/file.c
@@ -46,16 +46,14 @@ static efi_status_t efi_open_file(efi_file_protocol_t *volume,
 
 	status = volume->open(volume, &fh, fi->filename, EFI_FILE_MODE_READ, 0);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Failed to open file: ");
-		efi_char16_printk(fi->filename);
-		efi_printk("\n");
+		efi_err("Failed to open file: %ls\n", fi->filename);
 		return status;
 	}
 
 	info_sz = sizeof(struct finfo);
 	status = fh->get_info(fh, &info_guid, &info_sz, fi);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Failed to get file info\n");
+		efi_err("Failed to get file info\n");
 		fh->close(fh);
 		return status;
 	}
@@ -75,13 +73,13 @@ static efi_status_t efi_open_volume(efi_loaded_image_t *image,
 	status = efi_bs_call(handle_protocol, image->device_handle, &fs_proto,
 			     (void **)&io);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Failed to handle fs_proto\n");
+		efi_err("Failed to handle fs_proto\n");
 		return status;
 	}
 
 	status = io->open_volume(io, fh);
 	if (status != EFI_SUCCESS)
-		pr_efi_err("Failed to open volume\n");
+		efi_err("Failed to open volume\n");
 
 	return status;
 }
@@ -121,13 +119,13 @@ static int find_file_option(const efi_char16_t *cmdline, int cmdline_len,
  * We only support loading a file from the same filesystem as
  * the kernel image.
  */
-static efi_status_t handle_cmdline_files(efi_loaded_image_t *image,
-					 const efi_char16_t *optstr,
-					 int optstr_size,
-					 unsigned long soft_limit,
-					 unsigned long hard_limit,
-					 unsigned long *load_addr,
-					 unsigned long *load_size)
+efi_status_t handle_cmdline_files(efi_loaded_image_t *image,
+				  const efi_char16_t *optstr,
+				  int optstr_size,
+				  unsigned long soft_limit,
+				  unsigned long hard_limit,
+				  unsigned long *load_addr,
+				  unsigned long *load_size)
 {
 	const efi_char16_t *cmdline = image->load_options;
 	int cmdline_len = image->load_options_size / 2;
@@ -142,7 +140,7 @@ static efi_status_t handle_cmdline_files(efi_loaded_image_t *image,
 	if (!load_addr || !load_size)
 		return EFI_INVALID_PARAMETER;
 
-	if (IS_ENABLED(CONFIG_X86) && !nochunk())
+	if (IS_ENABLED(CONFIG_X86) && !efi_nochunk)
 		efi_chunk_size = EFI_READ_CHUNK_SIZE;
 
 	alloc_addr = alloc_size = 0;
@@ -191,7 +189,7 @@ static efi_status_t handle_cmdline_files(efi_loaded_image_t *image,
 							    &alloc_addr,
 							    hard_limit);
 			if (status != EFI_SUCCESS) {
-				pr_efi_err("Failed to allocate memory for files\n");
+				efi_err("Failed to allocate memory for files\n");
 				goto err_close_file;
 			}
 
@@ -215,7 +213,7 @@ static efi_status_t handle_cmdline_files(efi_loaded_image_t *image,
 
 			status = file->read(file, &chunksize, addr);
 			if (status != EFI_SUCCESS) {
-				pr_efi_err("Failed to read file\n");
+				efi_err("Failed to read file\n");
 				goto err_close_file;
 			}
 			addr += chunksize;
@@ -239,21 +237,3 @@ err_close_volume:
 	efi_free(alloc_size, alloc_addr);
 	return status;
 }
-
-efi_status_t efi_load_dtb(efi_loaded_image_t *image,
-			  unsigned long *load_addr,
-			  unsigned long *load_size)
-{
-	return handle_cmdline_files(image, L"dtb=", sizeof(L"dtb=") - 2,
-				    ULONG_MAX, ULONG_MAX, load_addr, load_size);
-}
-
-efi_status_t efi_load_initrd(efi_loaded_image_t *image,
-			     unsigned long *load_addr,
-			     unsigned long *load_size,
-			     unsigned long soft_limit,
-			     unsigned long hard_limit)
-{
-	return handle_cmdline_files(image, L"initrd=", sizeof(L"initrd=") - 2,
-				    soft_limit, hard_limit, load_addr, load_size);
-}
diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c
index 55e6b3f286fe..ea5da307d542 100644
--- a/drivers/firmware/efi/libstub/gop.c
+++ b/drivers/firmware/efi/libstub/gop.c
@@ -5,169 +5,546 @@
  *
  * ----------------------------------------------------------------------- */
 
+#include <linux/bitops.h>
+#include <linux/ctype.h>
 #include <linux/efi.h>
 #include <linux/screen_info.h>
+#include <linux/string.h>
 #include <asm/efi.h>
 #include <asm/setup.h>
 
 #include "efistub.h"
 
-static void find_bits(unsigned long mask, u8 *pos, u8 *size)
+enum efi_cmdline_option {
+	EFI_CMDLINE_NONE,
+	EFI_CMDLINE_MODE_NUM,
+	EFI_CMDLINE_RES,
+	EFI_CMDLINE_AUTO,
+	EFI_CMDLINE_LIST
+};
+
+static struct {
+	enum efi_cmdline_option option;
+	union {
+		u32 mode;
+		struct {
+			u32 width, height;
+			int format;
+			u8 depth;
+		} res;
+	};
+} cmdline = { .option = EFI_CMDLINE_NONE };
+
+static bool parse_modenum(char *option, char **next)
+{
+	u32 m;
+
+	if (!strstarts(option, "mode="))
+		return false;
+	option += strlen("mode=");
+	m = simple_strtoull(option, &option, 0);
+	if (*option && *option++ != ',')
+		return false;
+	cmdline.option = EFI_CMDLINE_MODE_NUM;
+	cmdline.mode   = m;
+
+	*next = option;
+	return true;
+}
+
+static bool parse_res(char *option, char **next)
+{
+	u32 w, h, d = 0;
+	int pf = -1;
+
+	if (!isdigit(*option))
+		return false;
+	w = simple_strtoull(option, &option, 10);
+	if (*option++ != 'x' || !isdigit(*option))
+		return false;
+	h = simple_strtoull(option, &option, 10);
+	if (*option == '-') {
+		option++;
+		if (strstarts(option, "rgb")) {
+			option += strlen("rgb");
+			pf = PIXEL_RGB_RESERVED_8BIT_PER_COLOR;
+		} else if (strstarts(option, "bgr")) {
+			option += strlen("bgr");
+			pf = PIXEL_BGR_RESERVED_8BIT_PER_COLOR;
+		} else if (isdigit(*option))
+			d = simple_strtoull(option, &option, 10);
+		else
+			return false;
+	}
+	if (*option && *option++ != ',')
+		return false;
+	cmdline.option     = EFI_CMDLINE_RES;
+	cmdline.res.width  = w;
+	cmdline.res.height = h;
+	cmdline.res.format = pf;
+	cmdline.res.depth  = d;
+
+	*next = option;
+	return true;
+}
+
+static bool parse_auto(char *option, char **next)
+{
+	if (!strstarts(option, "auto"))
+		return false;
+	option += strlen("auto");
+	if (*option && *option++ != ',')
+		return false;
+	cmdline.option = EFI_CMDLINE_AUTO;
+
+	*next = option;
+	return true;
+}
+
+static bool parse_list(char *option, char **next)
 {
-	u8 first, len;
+	if (!strstarts(option, "list"))
+		return false;
+	option += strlen("list");
+	if (*option && *option++ != ',')
+		return false;
+	cmdline.option = EFI_CMDLINE_LIST;
+
+	*next = option;
+	return true;
+}
+
+void efi_parse_option_graphics(char *option)
+{
+	while (*option) {
+		if (parse_modenum(option, &option))
+			continue;
+		if (parse_res(option, &option))
+			continue;
+		if (parse_auto(option, &option))
+			continue;
+		if (parse_list(option, &option))
+			continue;
+
+		while (*option && *option++ != ',')
+			;
+	}
+}
+
+static u32 choose_mode_modenum(efi_graphics_output_protocol_t *gop)
+{
+	efi_status_t status;
+
+	efi_graphics_output_protocol_mode_t *mode;
+	efi_graphics_output_mode_info_t *info;
+	unsigned long info_size;
+
+	u32 max_mode, cur_mode;
+	int pf;
+
+	mode = efi_table_attr(gop, mode);
+
+	cur_mode = efi_table_attr(mode, mode);
+	if (cmdline.mode == cur_mode)
+		return cur_mode;
+
+	max_mode = efi_table_attr(mode, max_mode);
+	if (cmdline.mode >= max_mode) {
+		efi_err("Requested mode is invalid\n");
+		return cur_mode;
+	}
+
+	status = efi_call_proto(gop, query_mode, cmdline.mode,
+				&info_size, &info);
+	if (status != EFI_SUCCESS) {
+		efi_err("Couldn't get mode information\n");
+		return cur_mode;
+	}
+
+	pf = info->pixel_format;
+
+	efi_bs_call(free_pool, info);
+
+	if (pf == PIXEL_BLT_ONLY || pf >= PIXEL_FORMAT_MAX) {
+		efi_err("Invalid PixelFormat\n");
+		return cur_mode;
+	}
+
+	return cmdline.mode;
+}
+
+static u8 pixel_bpp(int pixel_format, efi_pixel_bitmask_t pixel_info)
+{
+	if (pixel_format == PIXEL_BIT_MASK) {
+		u32 mask = pixel_info.red_mask | pixel_info.green_mask |
+			   pixel_info.blue_mask | pixel_info.reserved_mask;
+		if (!mask)
+			return 0;
+		return __fls(mask) - __ffs(mask) + 1;
+	} else
+		return 32;
+}
+
+static u32 choose_mode_res(efi_graphics_output_protocol_t *gop)
+{
+	efi_status_t status;
+
+	efi_graphics_output_protocol_mode_t *mode;
+	efi_graphics_output_mode_info_t *info;
+	unsigned long info_size;
+
+	u32 max_mode, cur_mode;
+	int pf;
+	efi_pixel_bitmask_t pi;
+	u32 m, w, h;
+
+	mode = efi_table_attr(gop, mode);
+
+	cur_mode = efi_table_attr(mode, mode);
+	info = efi_table_attr(mode, info);
+	pf = info->pixel_format;
+	pi = info->pixel_information;
+	w  = info->horizontal_resolution;
+	h  = info->vertical_resolution;
+
+	if (w == cmdline.res.width && h == cmdline.res.height &&
+	    (cmdline.res.format < 0 || cmdline.res.format == pf) &&
+	    (!cmdline.res.depth || cmdline.res.depth == pixel_bpp(pf, pi)))
+		return cur_mode;
+
+	max_mode = efi_table_attr(mode, max_mode);
+
+	for (m = 0; m < max_mode; m++) {
+		if (m == cur_mode)
+			continue;
+
+		status = efi_call_proto(gop, query_mode, m,
+					&info_size, &info);
+		if (status != EFI_SUCCESS)
+			continue;
+
+		pf = info->pixel_format;
+		pi = info->pixel_information;
+		w  = info->horizontal_resolution;
+		h  = info->vertical_resolution;
+
+		efi_bs_call(free_pool, info);
+
+		if (pf == PIXEL_BLT_ONLY || pf >= PIXEL_FORMAT_MAX)
+			continue;
+		if (w == cmdline.res.width && h == cmdline.res.height &&
+		    (cmdline.res.format < 0 || cmdline.res.format == pf) &&
+		    (!cmdline.res.depth || cmdline.res.depth == pixel_bpp(pf, pi)))
+			return m;
+	}
+
+	efi_err("Couldn't find requested mode\n");
+
+	return cur_mode;
+}
+
+static u32 choose_mode_auto(efi_graphics_output_protocol_t *gop)
+{
+	efi_status_t status;
+
+	efi_graphics_output_protocol_mode_t *mode;
+	efi_graphics_output_mode_info_t *info;
+	unsigned long info_size;
+
+	u32 max_mode, cur_mode, best_mode, area;
+	u8 depth;
+	int pf;
+	efi_pixel_bitmask_t pi;
+	u32 m, w, h, a;
+	u8 d;
+
+	mode = efi_table_attr(gop, mode);
+
+	cur_mode = efi_table_attr(mode, mode);
+	max_mode = efi_table_attr(mode, max_mode);
 
-	first = 0;
-	len = 0;
+	info = efi_table_attr(mode, info);
 
-	if (mask) {
-		while (!(mask & 0x1)) {
-			mask = mask >> 1;
-			first++;
+	pf = info->pixel_format;
+	pi = info->pixel_information;
+	w  = info->horizontal_resolution;
+	h  = info->vertical_resolution;
+
+	best_mode = cur_mode;
+	area = w * h;
+	depth = pixel_bpp(pf, pi);
+
+	for (m = 0; m < max_mode; m++) {
+		if (m == cur_mode)
+			continue;
+
+		status = efi_call_proto(gop, query_mode, m,
+					&info_size, &info);
+		if (status != EFI_SUCCESS)
+			continue;
+
+		pf = info->pixel_format;
+		pi = info->pixel_information;
+		w  = info->horizontal_resolution;
+		h  = info->vertical_resolution;
+
+		efi_bs_call(free_pool, info);
+
+		if (pf == PIXEL_BLT_ONLY || pf >= PIXEL_FORMAT_MAX)
+			continue;
+		a = w * h;
+		if (a < area)
+			continue;
+		d = pixel_bpp(pf, pi);
+		if (a > area || d > depth) {
+			best_mode = m;
+			area = a;
+			depth = d;
 		}
+	}
+
+	return best_mode;
+}
+
+static u32 choose_mode_list(efi_graphics_output_protocol_t *gop)
+{
+	efi_status_t status;
+
+	efi_graphics_output_protocol_mode_t *mode;
+	efi_graphics_output_mode_info_t *info;
+	unsigned long info_size;
+
+	u32 max_mode, cur_mode;
+	int pf;
+	efi_pixel_bitmask_t pi;
+	u32 m, w, h;
+	u8 d;
+	const char *dstr;
+	bool valid;
+	efi_input_key_t key;
 
-		while (mask & 0x1) {
-			mask = mask >> 1;
-			len++;
+	mode = efi_table_attr(gop, mode);
+
+	cur_mode = efi_table_attr(mode, mode);
+	max_mode = efi_table_attr(mode, max_mode);
+
+	efi_printk("Available graphics modes are 0-%u\n", max_mode-1);
+	efi_puts("  * = current mode\n"
+		 "  - = unusable mode\n");
+	for (m = 0; m < max_mode; m++) {
+		status = efi_call_proto(gop, query_mode, m,
+					&info_size, &info);
+		if (status != EFI_SUCCESS)
+			continue;
+
+		pf = info->pixel_format;
+		pi = info->pixel_information;
+		w  = info->horizontal_resolution;
+		h  = info->vertical_resolution;
+
+		efi_bs_call(free_pool, info);
+
+		valid = !(pf == PIXEL_BLT_ONLY || pf >= PIXEL_FORMAT_MAX);
+		d = 0;
+		switch (pf) {
+		case PIXEL_RGB_RESERVED_8BIT_PER_COLOR:
+			dstr = "rgb";
+			break;
+		case PIXEL_BGR_RESERVED_8BIT_PER_COLOR:
+			dstr = "bgr";
+			break;
+		case PIXEL_BIT_MASK:
+			dstr = "";
+			d = pixel_bpp(pf, pi);
+			break;
+		case PIXEL_BLT_ONLY:
+			dstr = "blt";
+			break;
+		default:
+			dstr = "xxx";
+			break;
 		}
+
+		efi_printk("Mode %3u %c%c: Resolution %ux%u-%s%.0hhu\n",
+			   m,
+			   m == cur_mode ? '*' : ' ',
+			   !valid ? '-' : ' ',
+			   w, h, dstr, d);
+	}
+
+	efi_puts("\nPress any key to continue (or wait 10 seconds)\n");
+	status = efi_wait_for_key(10 * EFI_USEC_PER_SEC, &key);
+	if (status != EFI_SUCCESS && status != EFI_TIMEOUT) {
+		efi_err("Unable to read key, continuing in 10 seconds\n");
+		efi_bs_call(stall, 10 * EFI_USEC_PER_SEC);
+	}
+
+	return cur_mode;
+}
+
+static void set_mode(efi_graphics_output_protocol_t *gop)
+{
+	efi_graphics_output_protocol_mode_t *mode;
+	u32 cur_mode, new_mode;
+
+	switch (cmdline.option) {
+	case EFI_CMDLINE_MODE_NUM:
+		new_mode = choose_mode_modenum(gop);
+		break;
+	case EFI_CMDLINE_RES:
+		new_mode = choose_mode_res(gop);
+		break;
+	case EFI_CMDLINE_AUTO:
+		new_mode = choose_mode_auto(gop);
+		break;
+	case EFI_CMDLINE_LIST:
+		new_mode = choose_mode_list(gop);
+		break;
+	default:
+		return;
+	}
+
+	mode = efi_table_attr(gop, mode);
+	cur_mode = efi_table_attr(mode, mode);
+
+	if (new_mode == cur_mode)
+		return;
+
+	if (efi_call_proto(gop, set_mode, new_mode) != EFI_SUCCESS)
+		efi_err("Failed to set requested mode\n");
+}
+
+static void find_bits(u32 mask, u8 *pos, u8 *size)
+{
+	if (!mask) {
+		*pos = *size = 0;
+		return;
 	}
 
-	*pos = first;
-	*size = len;
+	/* UEFI spec guarantees that the set bits are contiguous */
+	*pos  = __ffs(mask);
+	*size = __fls(mask) - *pos + 1;
 }
 
 static void
 setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
 		 efi_pixel_bitmask_t pixel_info, int pixel_format)
 {
-	if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
-		si->lfb_depth = 32;
-		si->lfb_linelength = pixels_per_scan_line * 4;
-		si->red_size = 8;
-		si->red_pos = 0;
-		si->green_size = 8;
-		si->green_pos = 8;
-		si->blue_size = 8;
-		si->blue_pos = 16;
-		si->rsvd_size = 8;
-		si->rsvd_pos = 24;
-	} else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) {
-		si->lfb_depth = 32;
-		si->lfb_linelength = pixels_per_scan_line * 4;
-		si->red_size = 8;
-		si->red_pos = 16;
-		si->green_size = 8;
-		si->green_pos = 8;
-		si->blue_size = 8;
-		si->blue_pos = 0;
-		si->rsvd_size = 8;
-		si->rsvd_pos = 24;
-	} else if (pixel_format == PIXEL_BIT_MASK) {
-		find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size);
-		find_bits(pixel_info.green_mask, &si->green_pos,
-			  &si->green_size);
-		find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size);
-		find_bits(pixel_info.reserved_mask, &si->rsvd_pos,
-			  &si->rsvd_size);
+	if (pixel_format == PIXEL_BIT_MASK) {
+		find_bits(pixel_info.red_mask,
+			  &si->red_pos, &si->red_size);
+		find_bits(pixel_info.green_mask,
+			  &si->green_pos, &si->green_size);
+		find_bits(pixel_info.blue_mask,
+			  &si->blue_pos, &si->blue_size);
+		find_bits(pixel_info.reserved_mask,
+			  &si->rsvd_pos, &si->rsvd_size);
 		si->lfb_depth = si->red_size + si->green_size +
 			si->blue_size + si->rsvd_size;
 		si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8;
 	} else {
-		si->lfb_depth = 4;
-		si->lfb_linelength = si->lfb_width / 2;
-		si->red_size = 0;
-		si->red_pos = 0;
-		si->green_size = 0;
-		si->green_pos = 0;
-		si->blue_size = 0;
-		si->blue_pos = 0;
-		si->rsvd_size = 0;
-		si->rsvd_pos = 0;
+		if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
+			si->red_pos   = 0;
+			si->blue_pos  = 16;
+		} else /* PIXEL_BGR_RESERVED_8BIT_PER_COLOR */ {
+			si->blue_pos  = 0;
+			si->red_pos   = 16;
+		}
+
+		si->green_pos = 8;
+		si->rsvd_pos  = 24;
+		si->red_size = si->green_size =
+			si->blue_size = si->rsvd_size = 8;
+
+		si->lfb_depth = 32;
+		si->lfb_linelength = pixels_per_scan_line * 4;
 	}
 }
 
-static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
-			      unsigned long size, void **handles)
+static efi_graphics_output_protocol_t *
+find_gop(efi_guid_t *proto, unsigned long size, void **handles)
 {
-	efi_graphics_output_protocol_t *gop, *first_gop;
-	u16 width, height;
-	u32 pixels_per_scan_line;
-	u32 ext_lfb_base;
-	efi_physical_addr_t fb_base;
-	efi_pixel_bitmask_t pixel_info;
-	int pixel_format;
-	efi_status_t status;
+	efi_graphics_output_protocol_t *first_gop;
 	efi_handle_t h;
 	int i;
 
 	first_gop = NULL;
-	gop = NULL;
 
 	for_each_efi_handle(h, handles, size, i) {
+		efi_status_t status;
+
+		efi_graphics_output_protocol_t *gop;
 		efi_graphics_output_protocol_mode_t *mode;
-		efi_graphics_output_mode_info_t *info = NULL;
+		efi_graphics_output_mode_info_t *info;
+
 		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
-		bool conout_found = false;
 		void *dummy = NULL;
-		efi_physical_addr_t current_fb_base;
 
 		status = efi_bs_call(handle_protocol, h, proto, (void **)&gop);
 		if (status != EFI_SUCCESS)
 			continue;
 
+		mode = efi_table_attr(gop, mode);
+		info = efi_table_attr(mode, info);
+		if (info->pixel_format == PIXEL_BLT_ONLY ||
+		    info->pixel_format >= PIXEL_FORMAT_MAX)
+			continue;
+
+		/*
+		 * Systems that use the UEFI Console Splitter may
+		 * provide multiple GOP devices, not all of which are
+		 * backed by real hardware. The workaround is to search
+		 * for a GOP implementing the ConOut protocol, and if
+		 * one isn't found, to just fall back to the first GOP.
+		 *
+		 * Once we've found a GOP supporting ConOut,
+		 * don't bother looking any further.
+		 */
 		status = efi_bs_call(handle_protocol, h, &conout_proto, &dummy);
 		if (status == EFI_SUCCESS)
-			conout_found = true;
+			return gop;
 
-		mode = efi_table_attr(gop, mode);
-		info = efi_table_attr(mode, info);
-		current_fb_base = efi_table_attr(mode, frame_buffer_base);
-
-		if ((!first_gop || conout_found) &&
-		    info->pixel_format != PIXEL_BLT_ONLY) {
-			/*
-			 * Systems that use the UEFI Console Splitter may
-			 * provide multiple GOP devices, not all of which are
-			 * backed by real hardware. The workaround is to search
-			 * for a GOP implementing the ConOut protocol, and if
-			 * one isn't found, to just fall back to the first GOP.
-			 */
-			width = info->horizontal_resolution;
-			height = info->vertical_resolution;
-			pixel_format = info->pixel_format;
-			pixel_info = info->pixel_information;
-			pixels_per_scan_line = info->pixels_per_scan_line;
-			fb_base = current_fb_base;
-
-			/*
-			 * Once we've found a GOP supporting ConOut,
-			 * don't bother looking any further.
-			 */
+		if (!first_gop)
 			first_gop = gop;
-			if (conout_found)
-				break;
-		}
 	}
 
+	return first_gop;
+}
+
+static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
+			      unsigned long size, void **handles)
+{
+	efi_graphics_output_protocol_t *gop;
+	efi_graphics_output_protocol_mode_t *mode;
+	efi_graphics_output_mode_info_t *info;
+
+	gop = find_gop(proto, size, handles);
+
 	/* Did we find any GOPs? */
-	if (!first_gop)
+	if (!gop)
 		return EFI_NOT_FOUND;
 
+	/* Change mode if requested */
+	set_mode(gop);
+
 	/* EFI framebuffer */
+	mode = efi_table_attr(gop, mode);
+	info = efi_table_attr(mode, info);
+
 	si->orig_video_isVGA = VIDEO_TYPE_EFI;
 
-	si->lfb_width = width;
-	si->lfb_height = height;
-	si->lfb_base = fb_base;
+	si->lfb_width  = info->horizontal_resolution;
+	si->lfb_height = info->vertical_resolution;
 
-	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
-	if (ext_lfb_base) {
+	efi_set_u64_split(efi_table_attr(mode, frame_buffer_base),
+			  &si->lfb_base, &si->ext_lfb_base);
+	if (si->ext_lfb_base)
 		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
-		si->ext_lfb_base = ext_lfb_base;
-	}
 
 	si->pages = 1;
 
-	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
+	setup_pixel_info(si, info->pixels_per_scan_line,
+			     info->pixel_information, info->pixel_format);
 
 	si->lfb_size = si->lfb_linelength * si->lfb_height;
 
diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c
index 09f4fa01914e..feef8d4be113 100644
--- a/drivers/firmware/efi/libstub/mem.c
+++ b/drivers/firmware/efi/libstub/mem.c
@@ -91,120 +91,23 @@ fail:
 efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr,
 				unsigned long max)
 {
-	efi_physical_addr_t alloc_addr = ALIGN_DOWN(max + 1, EFI_ALLOC_ALIGN) - 1;
-	int slack = EFI_ALLOC_ALIGN / EFI_PAGE_SIZE - 1;
+	efi_physical_addr_t alloc_addr;
 	efi_status_t status;
 
-	size = round_up(size, EFI_ALLOC_ALIGN);
+	if (EFI_ALLOC_ALIGN > EFI_PAGE_SIZE)
+		return efi_allocate_pages_aligned(size, addr, max,
+						  EFI_ALLOC_ALIGN);
+
+	alloc_addr = ALIGN_DOWN(max + 1, EFI_ALLOC_ALIGN) - 1;
 	status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS,
-			     EFI_LOADER_DATA, size / EFI_PAGE_SIZE + slack,
+			     EFI_LOADER_DATA, DIV_ROUND_UP(size, EFI_PAGE_SIZE),
 			     &alloc_addr);
 	if (status != EFI_SUCCESS)
 		return status;
 
-	*addr = ALIGN((unsigned long)alloc_addr, EFI_ALLOC_ALIGN);
-
-	if (slack > 0) {
-		int l = (alloc_addr % EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE;
-
-		if (l) {
-			efi_bs_call(free_pages, alloc_addr, slack - l + 1);
-			slack = l - 1;
-		}
-		if (slack)
-			efi_bs_call(free_pages, *addr + size, slack);
-	}
+	*addr = alloc_addr;
 	return EFI_SUCCESS;
 }
-/**
- * efi_low_alloc_above() - allocate pages at or above given address
- * @size:	size of the memory area to allocate
- * @align:	minimum alignment of the allocated memory area. It should
- *		a power of two.
- * @addr:	on exit the address of the allocated memory
- * @min:	minimum address to used for the memory allocation
- *
- * Allocate at the lowest possible address that is not below @min as
- * EFI_LOADER_DATA. The allocated pages are aligned according to @align but at
- * least EFI_ALLOC_ALIGN. The first allocated page will not below the address
- * given by @min.
- *
- * Return:	status code
- */
-efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align,
-				 unsigned long *addr, unsigned long min)
-{
-	unsigned long map_size, desc_size, buff_size;
-	efi_memory_desc_t *map;
-	efi_status_t status;
-	unsigned long nr_pages;
-	int i;
-	struct efi_boot_memmap boot_map;
-
-	boot_map.map		= &map;
-	boot_map.map_size	= &map_size;
-	boot_map.desc_size	= &desc_size;
-	boot_map.desc_ver	= NULL;
-	boot_map.key_ptr	= NULL;
-	boot_map.buff_size	= &buff_size;
-
-	status = efi_get_memory_map(&boot_map);
-	if (status != EFI_SUCCESS)
-		goto fail;
-
-	/*
-	 * Enforce minimum alignment that EFI or Linux requires when
-	 * requesting a specific address.  We are doing page-based (or
-	 * larger) allocations, and both the address and size must meet
-	 * alignment constraints.
-	 */
-	if (align < EFI_ALLOC_ALIGN)
-		align = EFI_ALLOC_ALIGN;
-
-	size = round_up(size, EFI_ALLOC_ALIGN);
-	nr_pages = size / EFI_PAGE_SIZE;
-	for (i = 0; i < map_size / desc_size; i++) {
-		efi_memory_desc_t *desc;
-		unsigned long m = (unsigned long)map;
-		u64 start, end;
-
-		desc = efi_early_memdesc_ptr(m, desc_size, i);
-
-		if (desc->type != EFI_CONVENTIONAL_MEMORY)
-			continue;
-
-		if (efi_soft_reserve_enabled() &&
-		    (desc->attribute & EFI_MEMORY_SP))
-			continue;
-
-		if (desc->num_pages < nr_pages)
-			continue;
-
-		start = desc->phys_addr;
-		end = start + desc->num_pages * EFI_PAGE_SIZE;
-
-		if (start < min)
-			start = min;
-
-		start = round_up(start, align);
-		if ((start + size) > end)
-			continue;
-
-		status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
-				     EFI_LOADER_DATA, nr_pages, &start);
-		if (status == EFI_SUCCESS) {
-			*addr = start;
-			break;
-		}
-	}
-
-	if (i == map_size / desc_size)
-		status = EFI_NOT_FOUND;
-
-	efi_bs_call(free_pool, map);
-fail:
-	return status;
-}
 
 /**
  * efi_free() - free memory pages
@@ -227,81 +130,3 @@ void efi_free(unsigned long size, unsigned long addr)
 	nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE;
 	efi_bs_call(free_pages, addr, nr_pages);
 }
-
-/**
- * efi_relocate_kernel() - copy memory area
- * @image_addr:		pointer to address of memory area to copy
- * @image_size:		size of memory area to copy
- * @alloc_size:		minimum size of memory to allocate, must be greater or
- *			equal to image_size
- * @preferred_addr:	preferred target address
- * @alignment:		minimum alignment of the allocated memory area. It
- *			should be a power of two.
- * @min_addr:		minimum target address
- *
- * Copy a memory area to a newly allocated memory area aligned according
- * to @alignment but at least EFI_ALLOC_ALIGN. If the preferred address
- * is not available, the allocated address will not be below @min_addr.
- * On exit, @image_addr is updated to the target copy address that was used.
- *
- * This function is used to copy the Linux kernel verbatim. It does not apply
- * any relocation changes.
- *
- * Return:		status code
- */
-efi_status_t efi_relocate_kernel(unsigned long *image_addr,
-				 unsigned long image_size,
-				 unsigned long alloc_size,
-				 unsigned long preferred_addr,
-				 unsigned long alignment,
-				 unsigned long min_addr)
-{
-	unsigned long cur_image_addr;
-	unsigned long new_addr = 0;
-	efi_status_t status;
-	unsigned long nr_pages;
-	efi_physical_addr_t efi_addr = preferred_addr;
-
-	if (!image_addr || !image_size || !alloc_size)
-		return EFI_INVALID_PARAMETER;
-	if (alloc_size < image_size)
-		return EFI_INVALID_PARAMETER;
-
-	cur_image_addr = *image_addr;
-
-	/*
-	 * The EFI firmware loader could have placed the kernel image
-	 * anywhere in memory, but the kernel has restrictions on the
-	 * max physical address it can run at.  Some architectures
-	 * also have a prefered address, so first try to relocate
-	 * to the preferred address.  If that fails, allocate as low
-	 * as possible while respecting the required alignment.
-	 */
-	nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE;
-	status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
-			     EFI_LOADER_DATA, nr_pages, &efi_addr);
-	new_addr = efi_addr;
-	/*
-	 * If preferred address allocation failed allocate as low as
-	 * possible.
-	 */
-	if (status != EFI_SUCCESS) {
-		status = efi_low_alloc_above(alloc_size, alignment, &new_addr,
-					     min_addr);
-	}
-	if (status != EFI_SUCCESS) {
-		pr_efi_err("Failed to allocate usable memory for kernel.\n");
-		return status;
-	}
-
-	/*
-	 * We know source/dest won't overlap since both memory ranges
-	 * have been allocated by UEFI, so we can safely use memcpy.
-	 */
-	memcpy((void *)new_addr, (void *)cur_image_addr, image_size);
-
-	/* Return the new address of the relocated image. */
-	*image_addr = new_addr;
-
-	return status;
-}
diff --git a/drivers/firmware/efi/libstub/pci.c b/drivers/firmware/efi/libstub/pci.c
index b025e59b94df..99fb25d2bcf5 100644
--- a/drivers/firmware/efi/libstub/pci.c
+++ b/drivers/firmware/efi/libstub/pci.c
@@ -28,21 +28,21 @@ void efi_pci_disable_bridge_busmaster(void)
 
 	if (status != EFI_BUFFER_TOO_SMALL) {
 		if (status != EFI_SUCCESS && status != EFI_NOT_FOUND)
-			pr_efi_err("Failed to locate PCI I/O handles'\n");
+			efi_err("Failed to locate PCI I/O handles'\n");
 		return;
 	}
 
 	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, pci_handle_size,
 			     (void **)&pci_handle);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Failed to allocate memory for 'pci_handle'\n");
+		efi_err("Failed to allocate memory for 'pci_handle'\n");
 		return;
 	}
 
 	status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, &pci_proto,
 			     NULL, &pci_handle_size, pci_handle);
 	if (status != EFI_SUCCESS) {
-		pr_efi_err("Failed to locate PCI I/O handles'\n");
+		efi_err("Failed to locate PCI I/O handles'\n");
 		goto free_handle;
 	}
 
@@ -69,7 +69,7 @@ void efi_pci_disable_bridge_busmaster(void)
 		 * access to the framebuffer. Drivers for true PCIe graphics
 		 * controllers that are behind a PCIe root port do not use
 		 * DMA to implement the GOP framebuffer anyway [although they
-		 * may use it in their implentation of Gop->Blt()], and so
+		 * may use it in their implementation of Gop->Blt()], and so
 		 * disabling DMA in the PCI bridge should not interfere with
 		 * normal operation of the device.
 		 */
@@ -106,7 +106,7 @@ void efi_pci_disable_bridge_busmaster(void)
 		status = efi_call_proto(pci, pci.write, EfiPciIoWidthUint16,
 					PCI_COMMAND, 1, &command);
 		if (status != EFI_SUCCESS)
-			pr_efi_err("Failed to disable PCI busmastering\n");
+			efi_err("Failed to disable PCI busmastering\n");
 	}
 
 free_handle:
diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
index 4578f59e160c..a408df474d83 100644
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -74,6 +74,8 @@ efi_status_t efi_random_alloc(unsigned long size,
 	if (align < EFI_ALLOC_ALIGN)
 		align = EFI_ALLOC_ALIGN;
 
+	size = round_up(size, EFI_ALLOC_ALIGN);
+
 	/* count the suitable slots in each memory map entry */
 	for (map_offset = 0; map_offset < map_size; map_offset += desc_size) {
 		efi_memory_desc_t *md = (void *)memory_map + map_offset;
@@ -85,7 +87,7 @@ efi_status_t efi_random_alloc(unsigned long size,
 	}
 
 	/* find a random number between 0 and total_slots */
-	target_slot = (total_slots * (u16)random_seed) >> 16;
+	target_slot = (total_slots * (u64)(random_seed & U32_MAX)) >> 32;
 
 	/*
 	 * target_slot is now a value in the range [0, total_slots), and so
@@ -109,7 +111,7 @@ efi_status_t efi_random_alloc(unsigned long size,
 		}
 
 		target = round_up(md->phys_addr, align) + target_slot * align;
-		pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+		pages = size / EFI_PAGE_SIZE;
 
 		status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
 				     EFI_LOADER_DATA, pages, &target);
diff --git a/drivers/firmware/efi/libstub/relocate.c b/drivers/firmware/efi/libstub/relocate.c
new file mode 100644
index 000000000000..9b1aaf8b123f
--- /dev/null
+++ b/drivers/firmware/efi/libstub/relocate.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+
+/**
+ * efi_low_alloc_above() - allocate pages at or above given address
+ * @size:	size of the memory area to allocate
+ * @align:	minimum alignment of the allocated memory area. It should
+ *		a power of two.
+ * @addr:	on exit the address of the allocated memory
+ * @min:	minimum address to used for the memory allocation
+ *
+ * Allocate at the lowest possible address that is not below @min as
+ * EFI_LOADER_DATA. The allocated pages are aligned according to @align but at
+ * least EFI_ALLOC_ALIGN. The first allocated page will not below the address
+ * given by @min.
+ *
+ * Return:	status code
+ */
+static efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align,
+					unsigned long *addr, unsigned long min)
+{
+	unsigned long map_size, desc_size, buff_size;
+	efi_memory_desc_t *map;
+	efi_status_t status;
+	unsigned long nr_pages;
+	int i;
+	struct efi_boot_memmap boot_map;
+
+	boot_map.map		= &map;
+	boot_map.map_size	= &map_size;
+	boot_map.desc_size	= &desc_size;
+	boot_map.desc_ver	= NULL;
+	boot_map.key_ptr	= NULL;
+	boot_map.buff_size	= &buff_size;
+
+	status = efi_get_memory_map(&boot_map);
+	if (status != EFI_SUCCESS)
+		goto fail;
+
+	/*
+	 * Enforce minimum alignment that EFI or Linux requires when
+	 * requesting a specific address.  We are doing page-based (or
+	 * larger) allocations, and both the address and size must meet
+	 * alignment constraints.
+	 */
+	if (align < EFI_ALLOC_ALIGN)
+		align = EFI_ALLOC_ALIGN;
+
+	size = round_up(size, EFI_ALLOC_ALIGN);
+	nr_pages = size / EFI_PAGE_SIZE;
+	for (i = 0; i < map_size / desc_size; i++) {
+		efi_memory_desc_t *desc;
+		unsigned long m = (unsigned long)map;
+		u64 start, end;
+
+		desc = efi_early_memdesc_ptr(m, desc_size, i);
+
+		if (desc->type != EFI_CONVENTIONAL_MEMORY)
+			continue;
+
+		if (efi_soft_reserve_enabled() &&
+		    (desc->attribute & EFI_MEMORY_SP))
+			continue;
+
+		if (desc->num_pages < nr_pages)
+			continue;
+
+		start = desc->phys_addr;
+		end = start + desc->num_pages * EFI_PAGE_SIZE;
+
+		if (start < min)
+			start = min;
+
+		start = round_up(start, align);
+		if ((start + size) > end)
+			continue;
+
+		status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
+				     EFI_LOADER_DATA, nr_pages, &start);
+		if (status == EFI_SUCCESS) {
+			*addr = start;
+			break;
+		}
+	}
+
+	if (i == map_size / desc_size)
+		status = EFI_NOT_FOUND;
+
+	efi_bs_call(free_pool, map);
+fail:
+	return status;
+}
+
+/**
+ * efi_relocate_kernel() - copy memory area
+ * @image_addr:		pointer to address of memory area to copy
+ * @image_size:		size of memory area to copy
+ * @alloc_size:		minimum size of memory to allocate, must be greater or
+ *			equal to image_size
+ * @preferred_addr:	preferred target address
+ * @alignment:		minimum alignment of the allocated memory area. It
+ *			should be a power of two.
+ * @min_addr:		minimum target address
+ *
+ * Copy a memory area to a newly allocated memory area aligned according
+ * to @alignment but at least EFI_ALLOC_ALIGN. If the preferred address
+ * is not available, the allocated address will not be below @min_addr.
+ * On exit, @image_addr is updated to the target copy address that was used.
+ *
+ * This function is used to copy the Linux kernel verbatim. It does not apply
+ * any relocation changes.
+ *
+ * Return:		status code
+ */
+efi_status_t efi_relocate_kernel(unsigned long *image_addr,
+				 unsigned long image_size,
+				 unsigned long alloc_size,
+				 unsigned long preferred_addr,
+				 unsigned long alignment,
+				 unsigned long min_addr)
+{
+	unsigned long cur_image_addr;
+	unsigned long new_addr = 0;
+	efi_status_t status;
+	unsigned long nr_pages;
+	efi_physical_addr_t efi_addr = preferred_addr;
+
+	if (!image_addr || !image_size || !alloc_size)
+		return EFI_INVALID_PARAMETER;
+	if (alloc_size < image_size)
+		return EFI_INVALID_PARAMETER;
+
+	cur_image_addr = *image_addr;
+
+	/*
+	 * The EFI firmware loader could have placed the kernel image
+	 * anywhere in memory, but the kernel has restrictions on the
+	 * max physical address it can run at.  Some architectures
+	 * also have a preferred address, so first try to relocate
+	 * to the preferred address.  If that fails, allocate as low
+	 * as possible while respecting the required alignment.
+	 */
+	nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE;
+	status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
+			     EFI_LOADER_DATA, nr_pages, &efi_addr);
+	new_addr = efi_addr;
+	/*
+	 * If preferred address allocation failed allocate as low as
+	 * possible.
+	 */
+	if (status != EFI_SUCCESS) {
+		status = efi_low_alloc_above(alloc_size, alignment, &new_addr,
+					     min_addr);
+	}
+	if (status != EFI_SUCCESS) {
+		efi_err("Failed to allocate usable memory for kernel.\n");
+		return status;
+	}
+
+	/*
+	 * We know source/dest won't overlap since both memory ranges
+	 * have been allocated by UEFI, so we can safely use memcpy.
+	 */
+	memcpy((void *)new_addr, (void *)cur_image_addr, image_size);
+
+	/* Return the new address of the relocated image. */
+	*image_addr = new_addr;
+
+	return status;
+}
diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index a765378ad18c..5efc524b14be 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -67,10 +67,10 @@ enum efi_secureboot_mode efi_get_secureboot(void)
 		return efi_secureboot_mode_disabled;
 
 secure_boot_enabled:
-	pr_efi("UEFI Secure Boot is enabled.\n");
+	efi_info("UEFI Secure Boot is enabled.\n");
 	return efi_secureboot_mode_enabled;
 
 out_efi_err:
-	pr_efi_err("Could not determine UEFI Secure Boot status.\n");
+	efi_err("Could not determine UEFI Secure Boot status.\n");
 	return efi_secureboot_mode_unknown;
 }
diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c
index e9a684637b70..7acbac16eae0 100644
--- a/drivers/firmware/efi/libstub/tpm.c
+++ b/drivers/firmware/efi/libstub/tpm.c
@@ -119,7 +119,7 @@ void efi_retrieve_tpm2_eventlog(void)
 			     sizeof(*log_tbl) + log_size, (void **)&log_tbl);
 
 	if (status != EFI_SUCCESS) {
-		efi_printk("Unable to allocate memory for event log\n");
+		efi_err("Unable to allocate memory for event log\n");
 		return;
 	}
 
diff --git a/drivers/firmware/efi/libstub/vsprintf.c b/drivers/firmware/efi/libstub/vsprintf.c
new file mode 100644
index 000000000000..e65ef49a54cd
--- /dev/null
+++ b/drivers/firmware/efi/libstub/vsprintf.c
@@ -0,0 +1,564 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright (C) 1991, 1992 Linus Torvalds
+ *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Oh, it's a waste of space, but oh-so-yummy for debugging.
+ */
+
+#include <stdarg.h>
+
+#include <linux/compiler.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/limits.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+static
+int skip_atoi(const char **s)
+{
+	int i = 0;
+
+	while (isdigit(**s))
+		i = i * 10 + *((*s)++) - '0';
+	return i;
+}
+
+/*
+ * put_dec_full4 handles numbers in the range 0 <= r < 10000.
+ * The multiplier 0xccd is round(2^15/10), and the approximation
+ * r/10 == (r * 0xccd) >> 15 is exact for all r < 16389.
+ */
+static
+void put_dec_full4(char *end, unsigned int r)
+{
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		unsigned int q = (r * 0xccd) >> 15;
+		*--end = '0' + (r - q * 10);
+		r = q;
+	}
+	*--end = '0' + r;
+}
+
+/* put_dec is copied from lib/vsprintf.c with small modifications */
+
+/*
+ * Call put_dec_full4 on x % 10000, return x / 10000.
+ * The approximation x/10000 == (x * 0x346DC5D7) >> 43
+ * holds for all x < 1,128,869,999.  The largest value this
+ * helper will ever be asked to convert is 1,125,520,955.
+ * (second call in the put_dec code, assuming n is all-ones).
+ */
+static
+unsigned int put_dec_helper4(char *end, unsigned int x)
+{
+	unsigned int q = (x * 0x346DC5D7ULL) >> 43;
+
+	put_dec_full4(end, x - q * 10000);
+	return q;
+}
+
+/* Based on code by Douglas W. Jones found at
+ * <http://www.cs.uiowa.edu/~jones/bcd/decimal.html#sixtyfour>
+ * (with permission from the author).
+ * Performs no 64-bit division and hence should be fast on 32-bit machines.
+ */
+static
+char *put_dec(char *end, unsigned long long n)
+{
+	unsigned int d3, d2, d1, q, h;
+	char *p = end;
+
+	d1  = ((unsigned int)n >> 16); /* implicit "& 0xffff" */
+	h   = (n >> 32);
+	d2  = (h      ) & 0xffff;
+	d3  = (h >> 16); /* implicit "& 0xffff" */
+
+	/* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0
+	     = 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */
+	q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((unsigned int)n & 0xffff);
+	q = put_dec_helper4(p, q);
+	p -= 4;
+
+	q += 7671 * d3 + 9496 * d2 + 6 * d1;
+	q = put_dec_helper4(p, q);
+	p -= 4;
+
+	q += 4749 * d3 + 42 * d2;
+	q = put_dec_helper4(p, q);
+	p -= 4;
+
+	q += 281 * d3;
+	q = put_dec_helper4(p, q);
+	p -= 4;
+
+	put_dec_full4(p, q);
+	p -= 4;
+
+	/* strip off the extra 0's we printed */
+	while (p < end && *p == '0')
+		++p;
+
+	return p;
+}
+
+static
+char *number(char *end, unsigned long long num, int base, char locase)
+{
+	/*
+	 * locase = 0 or 0x20. ORing digits or letters with 'locase'
+	 * produces same digits or (maybe lowercased) letters
+	 */
+
+	/* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
+	static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+	switch (base) {
+	case 10:
+		if (num != 0)
+			end = put_dec(end, num);
+		break;
+	case 8:
+		for (; num != 0; num >>= 3)
+			*--end = '0' + (num & 07);
+		break;
+	case 16:
+		for (; num != 0; num >>= 4)
+			*--end = digits[num & 0xf] | locase;
+		break;
+	default:
+		unreachable();
+	};
+
+	return end;
+}
+
+#define ZEROPAD	1		/* pad with zero */
+#define SIGN	2		/* unsigned/signed long */
+#define PLUS	4		/* show plus */
+#define SPACE	8		/* space if plus */
+#define LEFT	16		/* left justified */
+#define SMALL	32		/* Must be 32 == 0x20 */
+#define SPECIAL	64		/* 0x */
+#define WIDE	128		/* UTF-16 string */
+
+static
+int get_flags(const char **fmt)
+{
+	int flags = 0;
+
+	do {
+		switch (**fmt) {
+		case '-':
+			flags |= LEFT;
+			break;
+		case '+':
+			flags |= PLUS;
+			break;
+		case ' ':
+			flags |= SPACE;
+			break;
+		case '#':
+			flags |= SPECIAL;
+			break;
+		case '0':
+			flags |= ZEROPAD;
+			break;
+		default:
+			return flags;
+		}
+		++(*fmt);
+	} while (1);
+}
+
+static
+int get_int(const char **fmt, va_list *ap)
+{
+	if (isdigit(**fmt))
+		return skip_atoi(fmt);
+	if (**fmt == '*') {
+		++(*fmt);
+		/* it's the next argument */
+		return va_arg(*ap, int);
+	}
+	return 0;
+}
+
+static
+unsigned long long get_number(int sign, int qualifier, va_list *ap)
+{
+	if (sign) {
+		switch (qualifier) {
+		case 'L':
+			return va_arg(*ap, long long);
+		case 'l':
+			return va_arg(*ap, long);
+		case 'h':
+			return (short)va_arg(*ap, int);
+		case 'H':
+			return (signed char)va_arg(*ap, int);
+		default:
+			return va_arg(*ap, int);
+		};
+	} else {
+		switch (qualifier) {
+		case 'L':
+			return va_arg(*ap, unsigned long long);
+		case 'l':
+			return va_arg(*ap, unsigned long);
+		case 'h':
+			return (unsigned short)va_arg(*ap, int);
+		case 'H':
+			return (unsigned char)va_arg(*ap, int);
+		default:
+			return va_arg(*ap, unsigned int);
+		}
+	}
+}
+
+static
+char get_sign(long long *num, int flags)
+{
+	if (!(flags & SIGN))
+		return 0;
+	if (*num < 0) {
+		*num = -(*num);
+		return '-';
+	}
+	if (flags & PLUS)
+		return '+';
+	if (flags & SPACE)
+		return ' ';
+	return 0;
+}
+
+static
+size_t utf16s_utf8nlen(const u16 *s16, size_t maxlen)
+{
+	size_t len, clen;
+
+	for (len = 0; len < maxlen && *s16; len += clen) {
+		u16 c0 = *s16++;
+
+		/* First, get the length for a BMP character */
+		clen = 1 + (c0 >= 0x80) + (c0 >= 0x800);
+		if (len + clen > maxlen)
+			break;
+		/*
+		 * If this is a high surrogate, and we're already at maxlen, we
+		 * can't include the character if it's a valid surrogate pair.
+		 * Avoid accessing one extra word just to check if it's valid
+		 * or not.
+		 */
+		if ((c0 & 0xfc00) == 0xd800) {
+			if (len + clen == maxlen)
+				break;
+			if ((*s16 & 0xfc00) == 0xdc00) {
+				++s16;
+				++clen;
+			}
+		}
+	}
+
+	return len;
+}
+
+static
+u32 utf16_to_utf32(const u16 **s16)
+{
+	u16 c0, c1;
+
+	c0 = *(*s16)++;
+	/* not a surrogate */
+	if ((c0 & 0xf800) != 0xd800)
+		return c0;
+	/* invalid: low surrogate instead of high */
+	if (c0 & 0x0400)
+		return 0xfffd;
+	c1 = **s16;
+	/* invalid: missing low surrogate */
+	if ((c1 & 0xfc00) != 0xdc00)
+		return 0xfffd;
+	/* valid surrogate pair */
+	++(*s16);
+	return (0x10000 - (0xd800 << 10) - 0xdc00) + (c0 << 10) + c1;
+}
+
+#define PUTC(c) \
+do {				\
+	if (pos < size)		\
+		buf[pos] = (c);	\
+	++pos;			\
+} while (0);
+
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list ap)
+{
+	/* The maximum space required is to print a 64-bit number in octal */
+	char tmp[(sizeof(unsigned long long) * 8 + 2) / 3];
+	char *tmp_end = &tmp[ARRAY_SIZE(tmp)];
+	long long num;
+	int base;
+	const char *s;
+	size_t len, pos;
+	char sign;
+
+	int flags;		/* flags to number() */
+
+	int field_width;	/* width of output field */
+	int precision;		/* min. # of digits for integers; max
+				   number of chars for from string */
+	int qualifier;		/* 'h', 'hh', 'l' or 'll' for integer fields */
+
+	va_list args;
+
+	/*
+	 * We want to pass our input va_list to helper functions by reference,
+	 * but there's an annoying edge case. If va_list was originally passed
+	 * to us by value, we could just pass &ap down to the helpers. This is
+	 * the case on, for example, X86_32.
+	 * However, on X86_64 (and possibly others), va_list is actually a
+	 * size-1 array containing a structure. Our function parameter ap has
+	 * decayed from T[1] to T*, and &ap has type T** rather than T(*)[1],
+	 * which is what will be expected by a function taking a va_list *
+	 * parameter.
+	 * One standard way to solve this mess is by creating a copy in a local
+	 * variable of type va_list and then passing a pointer to that local
+	 * copy instead, which is what we do here.
+	 */
+	va_copy(args, ap);
+
+	for (pos = 0; *fmt; ++fmt) {
+		if (*fmt != '%' || *++fmt == '%') {
+			PUTC(*fmt);
+			continue;
+		}
+
+		/* process flags */
+		flags = get_flags(&fmt);
+
+		/* get field width */
+		field_width = get_int(&fmt, &args);
+		if (field_width < 0) {
+			field_width = -field_width;
+			flags |= LEFT;
+		}
+
+		if (flags & LEFT)
+			flags &= ~ZEROPAD;
+
+		/* get the precision */
+		precision = -1;
+		if (*fmt == '.') {
+			++fmt;
+			precision = get_int(&fmt, &args);
+			if (precision >= 0)
+				flags &= ~ZEROPAD;
+		}
+
+		/* get the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'h' || *fmt == 'l') {
+			qualifier = *fmt;
+			++fmt;
+			if (qualifier == *fmt) {
+				qualifier -= 'a'-'A';
+				++fmt;
+			}
+		}
+
+		sign = 0;
+
+		switch (*fmt) {
+		case 'c':
+			flags &= LEFT;
+			s = tmp;
+			if (qualifier == 'l') {
+				((u16 *)tmp)[0] = (u16)va_arg(args, unsigned int);
+				((u16 *)tmp)[1] = L'\0';
+				precision = INT_MAX;
+				goto wstring;
+			} else {
+				tmp[0] = (unsigned char)va_arg(args, int);
+				precision = len = 1;
+			}
+			goto output;
+
+		case 's':
+			flags &= LEFT;
+			if (precision < 0)
+				precision = INT_MAX;
+			s = va_arg(args, void *);
+			if (!s)
+				s = precision < 6 ? "" : "(null)";
+			else if (qualifier == 'l') {
+		wstring:
+				flags |= WIDE;
+				precision = len = utf16s_utf8nlen((const u16 *)s, precision);
+				goto output;
+			}
+			precision = len = strnlen(s, precision);
+			goto output;
+
+			/* integer number formats - set up the flags and "break" */
+		case 'o':
+			base = 8;
+			break;
+
+		case 'p':
+			if (precision < 0)
+				precision = 2 * sizeof(void *);
+			fallthrough;
+		case 'x':
+			flags |= SMALL;
+			fallthrough;
+		case 'X':
+			base = 16;
+			break;
+
+		case 'd':
+		case 'i':
+			flags |= SIGN;
+			fallthrough;
+		case 'u':
+			flags &= ~SPECIAL;
+			base = 10;
+			break;
+
+		default:
+			/*
+			 * Bail out if the conversion specifier is invalid.
+			 * There's probably a typo in the format string and the
+			 * remaining specifiers are unlikely to match up with
+			 * the arguments.
+			 */
+			goto fail;
+		}
+		if (*fmt == 'p') {
+			num = (unsigned long)va_arg(args, void *);
+		} else {
+			num = get_number(flags & SIGN, qualifier, &args);
+		}
+
+		sign = get_sign(&num, flags);
+		if (sign)
+			--field_width;
+
+		s = number(tmp_end, num, base, flags & SMALL);
+		len = tmp_end - s;
+		/* default precision is 1 */
+		if (precision < 0)
+			precision = 1;
+		/* precision is minimum number of digits to print */
+		if (precision < len)
+			precision = len;
+		if (flags & SPECIAL) {
+			/*
+			 * For octal, a leading 0 is printed only if necessary,
+			 * i.e. if it's not already there because of the
+			 * precision.
+			 */
+			if (base == 8 && precision == len)
+				++precision;
+			/*
+			 * For hexadecimal, the leading 0x is skipped if the
+			 * output is empty, i.e. both the number and the
+			 * precision are 0.
+			 */
+			if (base == 16 && precision > 0)
+				field_width -= 2;
+			else
+				flags &= ~SPECIAL;
+		}
+		/*
+		 * For zero padding, increase the precision to fill the field
+		 * width.
+		 */
+		if ((flags & ZEROPAD) && field_width > precision)
+			precision = field_width;
+
+output:
+		/* Calculate the padding necessary */
+		field_width -= precision;
+		/* Leading padding with ' ' */
+		if (!(flags & LEFT))
+			while (field_width-- > 0)
+				PUTC(' ');
+		/* sign */
+		if (sign)
+			PUTC(sign);
+		/* 0x/0X for hexadecimal */
+		if (flags & SPECIAL) {
+			PUTC('0');
+			PUTC( 'X' | (flags & SMALL));
+		}
+		/* Zero padding and excess precision */
+		while (precision-- > len)
+			PUTC('0');
+		/* Actual output */
+		if (flags & WIDE) {
+			const u16 *ws = (const u16 *)s;
+
+			while (len-- > 0) {
+				u32 c32 = utf16_to_utf32(&ws);
+				u8 *s8;
+				size_t clen;
+
+				if (c32 < 0x80) {
+					PUTC(c32);
+					continue;
+				}
+
+				/* Number of trailing octets */
+				clen = 1 + (c32 >= 0x800) + (c32 >= 0x10000);
+
+				len -= clen;
+				s8 = (u8 *)&buf[pos];
+
+				/* Avoid writing partial character */
+				PUTC('\0');
+				pos += clen;
+				if (pos >= size)
+					continue;
+
+				/* Set high bits of leading octet */
+				*s8 = (0xf00 >> 1) >> clen;
+				/* Write trailing octets in reverse order */
+				for (s8 += clen; clen; --clen, c32 >>= 6)
+					*s8-- = 0x80 | (c32 & 0x3f);
+				/* Set low bits of leading octet */
+				*s8 |= c32;
+			}
+		} else {
+			while (len-- > 0)
+				PUTC(*s++);
+		}
+		/* Trailing padding with ' ' */
+		while (field_width-- > 0)
+			PUTC(' ');
+	}
+fail:
+	va_end(args);
+
+	if (size)
+		buf[min(pos, size-1)] = '\0';
+
+	return pos;
+}
+
+int snprintf(char *buf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i = vsnprintf(buf, size, fmt, args);
+	va_end(args);
+	return i;
+}
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index f0339b5d3658..5a48d996ed71 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -20,21 +20,9 @@
 /* Maximum physical address for 64-bit kernel with 4-level paging */
 #define MAXMEM_X86_64_4LEVEL (1ull << 46)
 
-static efi_system_table_t *sys_table __efistub_global;
-extern const bool efi_is64;
+const efi_system_table_t *efi_system_table;
 extern u32 image_offset;
-
-__pure efi_system_table_t *efi_system_table(void)
-{
-	return sys_table;
-}
-
-__attribute_const__ bool efi_is_64bit(void)
-{
-	if (IS_ENABLED(CONFIG_EFI_MIXED))
-		return efi_is64;
-	return IS_ENABLED(CONFIG_X86_64);
-}
+static efi_loaded_image_t *image = NULL;
 
 static efi_status_t
 preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
@@ -62,7 +50,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
 			     (void **)&rom);
 	if (status != EFI_SUCCESS) {
-		efi_printk("Failed to allocate memory for 'rom'\n");
+		efi_err("Failed to allocate memory for 'rom'\n");
 		return status;
 	}
 
@@ -78,7 +66,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 				PCI_VENDOR_ID, 1, &rom->vendor);
 
 	if (status != EFI_SUCCESS) {
-		efi_printk("Failed to read rom->vendor\n");
+		efi_err("Failed to read rom->vendor\n");
 		goto free_struct;
 	}
 
@@ -86,7 +74,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 				PCI_DEVICE_ID, 1, &rom->devid);
 
 	if (status != EFI_SUCCESS) {
-		efi_printk("Failed to read rom->devid\n");
+		efi_err("Failed to read rom->devid\n");
 		goto free_struct;
 	}
 
@@ -131,7 +119,7 @@ static void setup_efi_pci(struct boot_params *params)
 				     (void **)&pci_handle);
 
 		if (status != EFI_SUCCESS) {
-			efi_printk("Failed to allocate memory for 'pci_handle'\n");
+			efi_err("Failed to allocate memory for 'pci_handle'\n");
 			return;
 		}
 
@@ -185,7 +173,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
 		return;
 
 	if (efi_table_attr(p, version) != 0x10000) {
-		efi_printk("Unsupported properties proto version\n");
+		efi_err("Unsupported properties proto version\n");
 		return;
 	}
 
@@ -198,7 +186,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
 				     size + sizeof(struct setup_data),
 				     (void **)&new);
 		if (status != EFI_SUCCESS) {
-			efi_printk("Failed to allocate memory for 'properties'\n");
+			efi_err("Failed to allocate memory for 'properties'\n");
 			return;
 		}
 
@@ -227,7 +215,7 @@ static const efi_char16_t apple[] = L"Apple";
 static void setup_quirks(struct boot_params *boot_params)
 {
 	efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
-		efi_table_attr(efi_system_table(), fw_vendor);
+		efi_table_attr(efi_system_table, fw_vendor);
 
 	if (!memcmp(fw_vendor, apple, sizeof(apple))) {
 		if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
@@ -368,7 +356,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
 {
 	struct boot_params *boot_params;
 	struct setup_header *hdr;
-	efi_loaded_image_t *image;
 	void *image_base;
 	efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
 	int options_size = 0;
@@ -377,28 +364,29 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
 	unsigned long ramdisk_addr;
 	unsigned long ramdisk_size;
 
-	sys_table = sys_table_arg;
+	efi_system_table = sys_table_arg;
 
 	/* Check if we were booted by the EFI firmware */
-	if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+	if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
 		efi_exit(handle, EFI_INVALID_PARAMETER);
 
 	status = efi_bs_call(handle_protocol, handle, &proto, (void **)&image);
 	if (status != EFI_SUCCESS) {
-		efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
+		efi_err("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
 		efi_exit(handle, status);
 	}
 
 	image_base = efi_table_attr(image, image_base);
 	image_offset = (void *)startup_32 - image_base;
 
-	status = efi_allocate_pages(0x4000, (unsigned long *)&boot_params, ULONG_MAX);
+	status = efi_allocate_pages(sizeof(struct boot_params),
+				    (unsigned long *)&boot_params, ULONG_MAX);
 	if (status != EFI_SUCCESS) {
-		efi_printk("Failed to allocate lowmem for boot params\n");
+		efi_err("Failed to allocate lowmem for boot params\n");
 		efi_exit(handle, status);
 	}
 
-	memset(boot_params, 0x0, 0x4000);
+	memset(boot_params, 0x0, sizeof(struct boot_params));
 
 	hdr = &boot_params->hdr;
 
@@ -416,43 +404,21 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
 	hdr->type_of_loader = 0x21;
 
 	/* Convert unicode cmdline to ascii */
-	cmdline_ptr = efi_convert_cmdline(image, &options_size, ULONG_MAX);
+	cmdline_ptr = efi_convert_cmdline(image, &options_size);
 	if (!cmdline_ptr)
 		goto fail;
 
-	hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
-	/* Fill in upper bits of command line address, NOP on 32 bit  */
-	boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
+	efi_set_u64_split((unsigned long)cmdline_ptr,
+			  &hdr->cmd_line_ptr, &boot_params->ext_cmd_line_ptr);
 
 	hdr->ramdisk_image = 0;
 	hdr->ramdisk_size = 0;
 
-	if (efi_is_native()) {
-		status = efi_parse_options(cmdline_ptr);
-		if (status != EFI_SUCCESS)
-			goto fail2;
-
-		if (!noinitrd()) {
-			status = efi_load_initrd(image, &ramdisk_addr,
-						 &ramdisk_size,
-						 hdr->initrd_addr_max,
-						 ULONG_MAX);
-			if (status != EFI_SUCCESS)
-				goto fail2;
-			hdr->ramdisk_image = ramdisk_addr & 0xffffffff;
-			hdr->ramdisk_size  = ramdisk_size & 0xffffffff;
-			boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32;
-			boot_params->ext_ramdisk_size  = (u64)ramdisk_size >> 32;
-		}
-	}
-
-	efi_stub_entry(handle, sys_table, boot_params);
+	efi_stub_entry(handle, sys_table_arg, boot_params);
 	/* not reached */
 
-fail2:
-	efi_free(options_size, (unsigned long)cmdline_ptr);
 fail:
-	efi_free(0x4000, (unsigned long)boot_params);
+	efi_free(sizeof(struct boot_params), (unsigned long)boot_params);
 
 	efi_exit(handle, status);
 }
@@ -645,17 +611,14 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map,
 				   : EFI32_LOADER_SIGNATURE;
 	memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
 
-	p->efi->efi_systab		= (unsigned long)efi_system_table();
+	efi_set_u64_split((unsigned long)efi_system_table,
+			  &p->efi->efi_systab, &p->efi->efi_systab_hi);
 	p->efi->efi_memdesc_size	= *map->desc_size;
 	p->efi->efi_memdesc_version	= *map->desc_ver;
-	p->efi->efi_memmap		= (unsigned long)*map->map;
+	efi_set_u64_split((unsigned long)*map->map,
+			  &p->efi->efi_memmap, &p->efi->efi_memmap_hi);
 	p->efi->efi_memmap_size		= *map->map_size;
 
-#ifdef CONFIG_X86_64
-	p->efi->efi_systab_hi		= (unsigned long)efi_system_table() >> 32;
-	p->efi->efi_memmap_hi		= (unsigned long)*map->map >> 32;
-#endif
-
 	return EFI_SUCCESS;
 }
 
@@ -711,12 +674,11 @@ unsigned long efi_main(efi_handle_t handle,
 	unsigned long buffer_start, buffer_end;
 	struct setup_header *hdr = &boot_params->hdr;
 	efi_status_t status;
-	unsigned long cmdline_paddr;
 
-	sys_table = sys_table_arg;
+	efi_system_table = sys_table_arg;
 
 	/* Check if we were booted by the EFI firmware */
-	if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+	if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
 		efi_exit(handle, EFI_INVALID_PARAMETER);
 
 	/*
@@ -759,7 +721,7 @@ unsigned long efi_main(efi_handle_t handle,
 					     hdr->kernel_alignment,
 					     LOAD_PHYSICAL_ADDR);
 		if (status != EFI_SUCCESS) {
-			efi_printk("efi_relocate_kernel() failed!\n");
+			efi_err("efi_relocate_kernel() failed!\n");
 			goto fail;
 		}
 		/*
@@ -770,35 +732,48 @@ unsigned long efi_main(efi_handle_t handle,
 		image_offset = 0;
 	}
 
-	/*
-	 * efi_pe_entry() may have been called before efi_main(), in which
-	 * case this is the second time we parse the cmdline. This is ok,
-	 * parsing the cmdline multiple times does not have side-effects.
-	 */
-	cmdline_paddr = ((u64)hdr->cmd_line_ptr |
-			 ((u64)boot_params->ext_cmd_line_ptr << 32));
-	efi_parse_options((char *)cmdline_paddr);
+#ifdef CONFIG_CMDLINE_BOOL
+	status = efi_parse_options(CONFIG_CMDLINE);
+	if (status != EFI_SUCCESS) {
+		efi_err("Failed to parse options\n");
+		goto fail;
+	}
+#endif
+	if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) {
+		unsigned long cmdline_paddr = ((u64)hdr->cmd_line_ptr |
+					       ((u64)boot_params->ext_cmd_line_ptr << 32));
+		status = efi_parse_options((char *)cmdline_paddr);
+		if (status != EFI_SUCCESS) {
+			efi_err("Failed to parse options\n");
+			goto fail;
+		}
+	}
 
 	/*
-	 * At this point, an initrd may already have been loaded, either by
-	 * the bootloader and passed via bootparams, or loaded from a initrd=
-	 * command line option by efi_pe_entry() above. In either case, we
-	 * permit an initrd loaded from the LINUX_EFI_INITRD_MEDIA_GUID device
-	 * path to supersede it.
+	 * At this point, an initrd may already have been loaded by the
+	 * bootloader and passed via bootparams. We permit an initrd loaded
+	 * from the LINUX_EFI_INITRD_MEDIA_GUID device path to supersede it.
+	 *
+	 * If the device path is not present, any command-line initrd=
+	 * arguments will be processed only if image is not NULL, which will be
+	 * the case only if we were loaded via the PE entry point.
 	 */
-	if (!noinitrd()) {
+	if (!efi_noinitrd) {
 		unsigned long addr, size;
 
-		status = efi_load_initrd_dev_path(&addr, &size, ULONG_MAX);
-		if (status == EFI_SUCCESS) {
-			hdr->ramdisk_image		= (u32)addr;
-			hdr->ramdisk_size 		= (u32)size;
-			boot_params->ext_ramdisk_image	= (u64)addr >> 32;
-			boot_params->ext_ramdisk_size 	= (u64)size >> 32;
-		} else if (status != EFI_NOT_FOUND) {
-			efi_printk("efi_load_initrd_dev_path() failed!\n");
+		status = efi_load_initrd(image, &addr, &size,
+					 hdr->initrd_addr_max, ULONG_MAX);
+
+		if (status != EFI_SUCCESS) {
+			efi_err("Failed to load initrd!\n");
 			goto fail;
 		}
+		if (size > 0) {
+			efi_set_u64_split(addr, &hdr->ramdisk_image,
+					  &boot_params->ext_ramdisk_image);
+			efi_set_u64_split(size, &hdr->ramdisk_size,
+					  &boot_params->ext_ramdisk_size);
+		}
 	}
 
 	/*
@@ -823,13 +798,13 @@ unsigned long efi_main(efi_handle_t handle,
 
 	status = exit_boot(boot_params, handle);
 	if (status != EFI_SUCCESS) {
-		efi_printk("exit_boot() failed!\n");
+		efi_err("exit_boot() failed!\n");
 		goto fail;
 	}
 
 	return bzimage_addr;
 fail:
-	efi_printk("efi_main() failed!\n");
+	efi_err("efi_main() failed!\n");
 
 	efi_exit(handle, status);
 }
diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c
index 7baf48c01e72..ddf9eae396fe 100644
--- a/drivers/firmware/efi/test/efi_test.c
+++ b/drivers/firmware/efi/test/efi_test.c
@@ -70,9 +70,6 @@ copy_ucs2_from_user_len(efi_char16_t **dst, efi_char16_t __user *src,
 		return 0;
 	}
 
-	if (!access_ok(src, 1))
-		return -EFAULT;
-
 	buf = memdup_user(src, len);
 	if (IS_ERR(buf)) {
 		*dst = NULL;
@@ -91,9 +88,6 @@ copy_ucs2_from_user_len(efi_char16_t **dst, efi_char16_t __user *src,
 static inline int
 get_ucs2_strsize_from_user(efi_char16_t __user *src, size_t *len)
 {
-	if (!access_ok(src, 1))
-		return -EFAULT;
-
 	*len = user_ucs2_strsize(src);
 	if (*len == 0)
 		return -EFAULT;
@@ -118,9 +112,6 @@ copy_ucs2_from_user(efi_char16_t **dst, efi_char16_t __user *src)
 {
 	size_t len;
 
-	if (!access_ok(src, 1))
-		return -EFAULT;
-
 	len = user_ucs2_strsize(src);
 	if (len == 0)
 		return -EFAULT;
@@ -142,9 +133,6 @@ copy_ucs2_to_user_len(efi_char16_t __user *dst, efi_char16_t *src, size_t len)
 	if (!src)
 		return 0;
 
-	if (!access_ok(dst, 1))
-		return -EFAULT;
-
 	return copy_to_user(dst, src, len);
 }
 
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index 2937d44b5df4..92013ecc2d9e 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -46,25 +46,14 @@
  * require cooperation with a Trusted OS driver.
  */
 static int resident_cpu = -1;
+struct psci_operations psci_ops;
+static enum arm_smccc_conduit psci_conduit = SMCCC_CONDUIT_NONE;
 
 bool psci_tos_resident_on(int cpu)
 {
 	return cpu == resident_cpu;
 }
 
-struct psci_operations psci_ops = {
-	.conduit = SMCCC_CONDUIT_NONE,
-	.smccc_version = SMCCC_VERSION_1_0,
-};
-
-enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void)
-{
-	if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
-		return SMCCC_CONDUIT_NONE;
-
-	return psci_ops.conduit;
-}
-
 typedef unsigned long (psci_fn)(unsigned long, unsigned long,
 				unsigned long, unsigned long);
 static psci_fn *invoke_psci_fn;
@@ -242,7 +231,7 @@ static void set_conduit(enum arm_smccc_conduit conduit)
 		WARN(1, "Unexpected PSCI conduit %d\n", conduit);
 	}
 
-	psci_ops.conduit = conduit;
+	psci_conduit = conduit;
 }
 
 static int get_set_conduit_method(struct device_node *np)
@@ -411,8 +400,8 @@ static void __init psci_init_smccc(void)
 	if (feature != PSCI_RET_NOT_SUPPORTED) {
 		u32 ret;
 		ret = invoke_psci_fn(ARM_SMCCC_VERSION_FUNC_ID, 0, 0, 0);
-		if (ret == ARM_SMCCC_VERSION_1_1) {
-			psci_ops.smccc_version = SMCCC_VERSION_1_1;
+		if (ret >= ARM_SMCCC_VERSION_1_1) {
+			arm_smccc_version_init(ret, psci_conduit);
 			ver = ret;
 		}
 	}
diff --git a/drivers/firmware/raspberrypi.c b/drivers/firmware/raspberrypi.c
index da26a584dca0..a3e85186f8e6 100644
--- a/drivers/firmware/raspberrypi.c
+++ b/drivers/firmware/raspberrypi.c
@@ -182,16 +182,10 @@ rpi_firmware_print_firmware_revision(struct rpi_firmware *fw)
 					RPI_FIRMWARE_GET_FIRMWARE_REVISION,
 					&packet, sizeof(packet));
 
-	if (ret == 0) {
-		struct tm tm;
-
-		time64_to_tm(packet, 0, &tm);
+	if (ret)
+		return;
 
-		dev_info(fw->cl.dev,
-			 "Attached to firmware from %04ld-%02d-%02d %02d:%02d\n",
-			 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
-			 tm.tm_hour, tm.tm_min);
-	}
+	dev_info(fw->cl.dev, "Attached to firmware from %ptT\n", &packet);
 }
 
 static void
diff --git a/drivers/firmware/smccc/Kconfig b/drivers/firmware/smccc/Kconfig
new file mode 100644
index 000000000000..27b675d76235
--- /dev/null
+++ b/drivers/firmware/smccc/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config HAVE_ARM_SMCCC
+	bool
+	help
+	  Include support for the Secure Monitor Call (SMC) and Hypervisor
+	  Call (HVC) instructions on Armv7 and above architectures.
+
+config HAVE_ARM_SMCCC_DISCOVERY
+	bool
+	depends on ARM_PSCI_FW
+	default y
+	help
+	 SMCCC v1.0 lacked discoverability and hence PSCI v1.0 was updated
+	 to add SMCCC discovery mechanism though the PSCI firmware
+	 implementation of PSCI_FEATURES(SMCCC_VERSION) which returns
+	 success on firmware compliant to SMCCC v1.1 and above.
diff --git a/drivers/firmware/smccc/Makefile b/drivers/firmware/smccc/Makefile
new file mode 100644
index 000000000000..6f369fe3f0b9
--- /dev/null
+++ b/drivers/firmware/smccc/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+obj-$(CONFIG_HAVE_ARM_SMCCC_DISCOVERY)	+= smccc.o
diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
new file mode 100644
index 000000000000..4e80921ee212
--- /dev/null
+++ b/drivers/firmware/smccc/smccc.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Arm Limited
+ */
+
+#define pr_fmt(fmt) "smccc: " fmt
+
+#include <linux/init.h>
+#include <linux/arm-smccc.h>
+
+static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
+static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE;
+
+void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit)
+{
+	smccc_version = version;
+	smccc_conduit = conduit;
+}
+
+enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void)
+{
+	if (smccc_version < ARM_SMCCC_VERSION_1_1)
+		return SMCCC_CONDUIT_NONE;
+
+	return smccc_conduit;
+}
+
+u32 arm_smccc_get_version(void)
+{
+	return smccc_version;
+}
diff --git a/drivers/fpga/dfl-afu-dma-region.c b/drivers/fpga/dfl-afu-dma-region.c
index 62f924489db5..d902acb36d14 100644
--- a/drivers/fpga/dfl-afu-dma-region.c
+++ b/drivers/fpga/dfl-afu-dma-region.c
@@ -324,10 +324,6 @@ int afu_dma_map_region(struct dfl_feature_platform_data *pdata,
 	if (user_addr + length < user_addr)
 		return -EINVAL;
 
-	if (!access_ok((void __user *)(unsigned long)user_addr,
-		       length))
-		return -EINVAL;
-
 	region = kzalloc(sizeof(*region), GFP_KERNEL);
 	if (!region)
 		return -ENOMEM;
diff --git a/drivers/fpga/dfl-fme-pr.c b/drivers/fpga/dfl-fme-pr.c
index a233a53db708..1194c0e850e0 100644
--- a/drivers/fpga/dfl-fme-pr.c
+++ b/drivers/fpga/dfl-fme-pr.c
@@ -97,10 +97,6 @@ static int fme_pr(struct platform_device *pdev, unsigned long arg)
 		return -EINVAL;
 	}
 
-	if (!access_ok((void __user *)(unsigned long)port_pr.buffer_address,
-		       port_pr.buffer_size))
-		return -EFAULT;
-
 	/*
 	 * align PR buffer per PR bandwidth, as HW ignores the extra padding
 	 * data automatically.
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 3eb94f3740d1..bc345185db26 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -14,6 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irq_sim.h>
+#include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
@@ -48,7 +49,7 @@ struct gpio_mockup_line_status {
 struct gpio_mockup_chip {
 	struct gpio_chip gc;
 	struct gpio_mockup_line_status *lines;
-	struct irq_sim irqsim;
+	struct irq_domain *irq_sim_domain;
 	struct dentry *dbg_dir;
 	struct mutex lock;
 };
@@ -144,14 +145,12 @@ static void gpio_mockup_set_multiple(struct gpio_chip *gc,
 static int gpio_mockup_apply_pull(struct gpio_mockup_chip *chip,
 				  unsigned int offset, int value)
 {
+	int curr, irq, irq_type, ret = 0;
 	struct gpio_desc *desc;
 	struct gpio_chip *gc;
-	struct irq_sim *sim;
-	int curr, irq, irq_type;
 
 	gc = &chip->gc;
 	desc = &gc->gpiodev->descs[offset];
-	sim = &chip->irqsim;
 
 	mutex_lock(&chip->lock);
 
@@ -161,14 +160,28 @@ static int gpio_mockup_apply_pull(struct gpio_mockup_chip *chip,
 		if (curr == value)
 			goto out;
 
-		irq = irq_sim_irqnum(sim, offset);
+		irq = irq_find_mapping(chip->irq_sim_domain, offset);
+		if (!irq)
+			/*
+			 * This is fine - it just means, nobody is listening
+			 * for interrupts on this line, otherwise
+			 * irq_create_mapping() would have been called from
+			 * the to_irq() callback.
+			 */
+			goto set_value;
+
 		irq_type = irq_get_trigger_type(irq);
 
 		if ((value == 1 && (irq_type & IRQ_TYPE_EDGE_RISING)) ||
-		    (value == 0 && (irq_type & IRQ_TYPE_EDGE_FALLING)))
-			irq_sim_fire(sim, offset);
+		    (value == 0 && (irq_type & IRQ_TYPE_EDGE_FALLING))) {
+			ret = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING,
+						    true);
+			if (ret)
+				goto out;
+		}
 	}
 
+set_value:
 	/* Change the value unless we're actively driving the line. */
 	if (!test_bit(FLAG_REQUESTED, &desc->flags) ||
 	    !test_bit(FLAG_IS_OUT, &desc->flags))
@@ -177,7 +190,7 @@ static int gpio_mockup_apply_pull(struct gpio_mockup_chip *chip,
 out:
 	chip->lines[offset].pull = value;
 	mutex_unlock(&chip->lock);
-	return 0;
+	return ret;
 }
 
 static int gpio_mockup_set_config(struct gpio_chip *gc,
@@ -236,7 +249,7 @@ static int gpio_mockup_to_irq(struct gpio_chip *gc, unsigned int offset)
 {
 	struct gpio_mockup_chip *chip = gpiochip_get_data(gc);
 
-	return irq_sim_irqnum(&chip->irqsim, offset);
+	return irq_create_mapping(chip->irq_sim_domain, offset);
 }
 
 static void gpio_mockup_free(struct gpio_chip *gc, unsigned int offset)
@@ -389,6 +402,19 @@ static int gpio_mockup_name_lines(struct device *dev,
 	return 0;
 }
 
+static void gpio_mockup_dispose_mappings(void *data)
+{
+	struct gpio_mockup_chip *chip = data;
+	struct gpio_chip *gc = &chip->gc;
+	int i, irq;
+
+	for (i = 0; i < gc->ngpio; i++) {
+		irq = irq_find_mapping(chip->irq_sim_domain, i);
+		if (irq)
+			irq_dispose_mapping(irq);
+	}
+}
+
 static int gpio_mockup_probe(struct platform_device *pdev)
 {
 	struct gpio_mockup_chip *chip;
@@ -456,8 +482,13 @@ static int gpio_mockup_probe(struct platform_device *pdev)
 			return rv;
 	}
 
-	rv = devm_irq_sim_init(dev, &chip->irqsim, gc->ngpio);
-	if (rv < 0)
+	chip->irq_sim_domain = devm_irq_domain_create_sim(dev, NULL,
+							  gc->ngpio);
+	if (IS_ERR(chip->irq_sim_domain))
+		return PTR_ERR(chip->irq_sim_domain);
+
+	rv = devm_add_action_or_reset(dev, gpio_mockup_dispose_mappings, chip);
+	if (rv)
 		return rv;
 
 	rv = devm_gpiochip_add_data(dev, &chip->gc, chip);
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 43594978958e..c4fd57d8b717 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -161,7 +161,7 @@ config DRM_LOAD_EDID_FIRMWARE
 	  monitor are unable to provide appropriate EDID data. Since this
 	  feature is provided as a workaround for broken hardware, the
 	  default case is N. Details and instructions how to build your own
-	  EDID data are given in Documentation/driver-api/edid.rst.
+	  EDID data are given in Documentation/admin-guide/edid.rst.
 
 config DRM_DP_CEC
 	bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support"
@@ -310,8 +310,6 @@ source "drivers/gpu/drm/ast/Kconfig"
 
 source "drivers/gpu/drm/mgag200/Kconfig"
 
-source "drivers/gpu/drm/cirrus/Kconfig"
-
 source "drivers/gpu/drm/armada/Kconfig"
 
 source "drivers/gpu/drm/atmel-hlcdc/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 7f72ef5e7811..2c0e5a7e5953 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -17,7 +17,8 @@ drm-y       :=	drm_auth.o drm_cache.o \
 		drm_plane.o drm_color_mgmt.o drm_print.o \
 		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
 		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
-		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o
+		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
+		drm_managed.o
 
 drm-$(CONFIG_DRM_LEGACY) += drm_legacy_misc.o drm_bufs.o drm_context.o drm_dma.o drm_scatter.o drm_lock.o
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
@@ -32,8 +33,7 @@ drm-$(CONFIG_PCI) += drm_pci.o
 drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o
 drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 
-drm_vram_helper-y := drm_gem_vram_helper.o \
-		     drm_vram_helper_common.o
+drm_vram_helper-y := drm_gem_vram_helper.o
 obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
 
 drm_ttm_helper-y := drm_gem_ttm_helper.o
@@ -74,7 +74,6 @@ obj-$(CONFIG_DRM_I915)	+= i915/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
 obj-$(CONFIG_DRM_V3D)  += v3d/
 obj-$(CONFIG_DRM_VC4)  += vc4/
-obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
 obj-$(CONFIG_DRM_SIS)   += sis/
 obj-$(CONFIG_DRM_SAVAGE)+= savage/
 obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index c2bbcdd9c875..210d57a4afc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -55,7 +55,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
 	amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
 	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
-	amdgpu_umc.o smu_v11_0_i2c.o
+	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o
 
 amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8ac1581a6b53..cd913986863e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -28,6 +28,18 @@
 #ifndef __AMDGPU_H__
 #define __AMDGPU_H__
 
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) "amdgpu: " fmt
+
+#ifdef dev_fmt
+#undef dev_fmt
+#endif
+
+#define dev_fmt(fmt) "amdgpu: " fmt
+
 #include "amdgpu_ctx.h"
 
 #include <linux/atomic.h>
@@ -161,6 +173,7 @@ extern int amdgpu_gpu_recovery;
 extern int amdgpu_emu_mode;
 extern uint amdgpu_smu_memory_pool_size;
 extern uint amdgpu_dc_feature_mask;
+extern uint amdgpu_dc_debug_mask;
 extern uint amdgpu_dm_abm_level;
 extern struct amdgpu_mgpu_info mgpu_info;
 extern int amdgpu_ras_enable;
@@ -177,6 +190,8 @@ extern int sched_policy;
 static const int sched_policy = KFD_SCHED_POLICY_HWS;
 #endif
 
+extern int amdgpu_tmz;
+
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
 #endif
@@ -190,8 +205,6 @@ extern int amdgpu_cik_support;
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
 #define AMDGPU_FENCE_JIFFIES_TIMEOUT		(HZ / 2)
-/* AMDGPU_IB_POOL_SIZE must be a power of 2 */
-#define AMDGPU_IB_POOL_SIZE			16
 #define AMDGPU_DEBUGFS_MAX_COMPONENTS		32
 #define AMDGPUFB_CONN_LIMIT			4
 #define AMDGPU_BIOS_NUM_SCRATCH			16
@@ -439,7 +452,9 @@ struct amdgpu_fpriv {
 int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		  unsigned size, struct amdgpu_ib *ib);
+		  unsigned size,
+		  enum amdgpu_ib_pool_type pool,
+		  struct amdgpu_ib *ib);
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
 		    struct dma_fence *f);
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
@@ -512,7 +527,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
 /*
  * Writeback
  */
-#define AMDGPU_MAX_WB 128	/* Reserve at most 128 WB slots for amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256	/* Reserve at most 256 WB slots for amdgpu-owned rings. */
 
 struct amdgpu_wb {
 	struct amdgpu_bo	*wb_obj;
@@ -724,6 +739,7 @@ struct amdgpu_device {
 	uint32_t			rev_id;
 	uint32_t			external_rev_id;
 	unsigned long			flags;
+	unsigned long			apu_flags;
 	int				usec_timeout;
 	const struct amdgpu_asic_funcs	*asic_funcs;
 	bool				shutdown;
@@ -751,7 +767,6 @@ struct amdgpu_device {
 	uint8_t				*bios;
 	uint32_t			bios_size;
 	struct amdgpu_bo		*stolen_vga_memory;
-	struct amdgpu_bo		*discovery_memory;
 	uint32_t			bios_scratch_reg_offset;
 	uint32_t			bios_scratch[AMDGPU_BIOS_NUM_SCRATCH];
 
@@ -843,7 +858,8 @@ struct amdgpu_device {
 	unsigned			num_rings;
 	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
 	bool				ib_pool_ready;
-	struct amdgpu_sa_manager	ring_tmp_bo;
+	struct amdgpu_sa_manager	ib_pools[AMDGPU_IB_POOL_MAX];
+	struct amdgpu_sched		gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
 
 	/* interrupts */
 	struct amdgpu_irq		irq;
@@ -903,7 +919,9 @@ struct amdgpu_device {
 	struct amdgpu_display_manager dm;
 
 	/* discovery */
-	uint8_t				*discovery;
+	uint8_t				*discovery_bin;
+	uint32_t			discovery_tmr_size;
+	struct amdgpu_bo		*discovery_memory;
 
 	/* mes */
 	bool                            enable_mes;
@@ -923,7 +941,7 @@ struct amdgpu_device {
 	atomic64_t gart_pin_size;
 
 	/* soc15 register offset based on ip, instance and  segment */
-	uint32_t 		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
+	uint32_t		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
 
 	/* delayed work_func for deferring clockgating during resume */
 	struct delayed_work     delayed_init_work;
@@ -935,9 +953,6 @@ struct amdgpu_device {
 	/* link all shadow bo */
 	struct list_head                shadow_list;
 	struct mutex                    shadow_list_lock;
-	/* keep an lru list of rings by HW IP */
-	struct list_head		ring_lru_list;
-	spinlock_t			ring_lru_list_lock;
 
 	/* record hw reset is performed */
 	bool has_hw_reset;
@@ -947,8 +962,6 @@ struct amdgpu_device {
 	bool                            in_suspend;
 	bool				in_hibernate;
 
-	/* record last mm index being written through WREG32*/
-	unsigned long last_mm_index;
 	bool                            in_gpu_reset;
 	enum pp_mp1_state               mp1_state;
 	struct mutex  lock_reset;
@@ -967,14 +980,19 @@ struct amdgpu_device {
 	uint64_t			unique_id;
 	uint64_t	df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
 
-	/* device pstate */
-	int				pstate;
 	/* enable runtime pm on the device */
 	bool                            runpm;
 	bool                            in_runpm;
 
 	bool                            pm_sysfs_en;
 	bool                            ucode_sysfs_en;
+
+	/* Chip product information */
+	char				product_number[16];
+	char				product_name[32];
+	char				serial[16];
+
+	struct amdgpu_autodump		autodump;
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -991,10 +1009,10 @@ int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
 
 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 			       uint32_t *buf, size_t size, bool write);
-uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
+uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
+			    uint32_t acc_flags);
+void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 			uint32_t acc_flags);
-void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
-		    uint32_t acc_flags);
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 		    uint32_t acc_flags);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
@@ -1011,25 +1029,20 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 /*
  * Registers read & write functions.
  */
-
-#define AMDGPU_REGS_IDX       (1<<0)
 #define AMDGPU_REGS_NO_KIQ    (1<<1)
-#define AMDGPU_REGS_KIQ       (1<<2)
 
-#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
-#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
+#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
+#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
 
-#define RREG32_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_KIQ)
-#define WREG32_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_KIQ)
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
 
 #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
 #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
 
-#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
-#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
-#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
-#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0)
-#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_IDX)
+#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0)
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_device_rreg(adev, (reg), 0))
+#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
@@ -1066,7 +1079,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 		tmp_ |= ((val) & ~(mask));			\
 		WREG32_PLL(reg, tmp_);				\
 	} while (0)
-#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_mm_rreg((adev), (reg), false))
+#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false))
 #define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg))
 #define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v))
 
@@ -1249,5 +1262,9 @@ _name##_show(struct device *dev,					\
 									\
 static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name)
 
-#endif
+static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
+{
+       return adev->gmc.tmz_enabled;
+}
 
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 1e41367ef74e..956cbbda4793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -444,7 +444,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 
 		DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
 
-		/* todo: add DC handling */
 		if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) &&
 		    !amdgpu_device_has_dc_support(adev)) {
 			struct amdgpu_encoder *enc = atif->encoder_for_bl;
@@ -463,6 +462,27 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 #endif
 			}
 		}
+#if defined(CONFIG_DRM_AMD_DC)
+#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
+		if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) &&
+		    amdgpu_device_has_dc_support(adev)) {
+			struct amdgpu_display_manager *dm = &adev->dm;
+			struct backlight_device *bd = dm->backlight_dev;
+
+			if (bd) {
+				DRM_DEBUG_DRIVER("Changing brightness to %d\n",
+						 req.backlight_level);
+
+				/*
+				 * XXX backlight_device_set_brightness() is
+				 * hardwired to post BACKLIGHT_UPDATE_SYSFS.
+				 * It probably should accept 'reason' parameter.
+				 */
+				backlight_device_set_brightness(bd, req.backlight_level);
+			}
+		}
+#endif
+#endif
 		if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
 			if (adev->flags & AMD_IS_PX) {
 				pm_runtime_get_sync(adev->ddev->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index abfbe89e805e..ad59ac4423b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -564,6 +564,13 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
 	return adev->gds.gws_size;
 }
 
+uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	return adev->rev_id;
+}
+
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 13feb313e9b3..3f2b695cf19e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -65,6 +65,7 @@ struct kgd_mem {
 	struct amdgpu_sync sync;
 
 	bool aql_queue;
+	bool is_imported;
 };
 
 /* KFD Memory Eviction */
@@ -148,6 +149,9 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
 
 void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
 
+int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
+					int queue_bit);
+
 /* Shared API */
 int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 				void **mem_obj, uint64_t *gpu_addr,
@@ -175,6 +179,7 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
 uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
+uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd);
 uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
 
 /* Read user wptr from a specified user address space with page fault
@@ -218,7 +223,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		void *vm, struct kgd_mem **mem,
 		uint64_t *offset, uint32_t flags);
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-		struct kgd_dev *kgd, struct kgd_mem *mem);
+		struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 4ec6d0c03201..691c89705bcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -543,6 +543,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	uint32_t temp;
 	struct v10_compute_mqd *m = get_mqd(mqd);
 
+	if (adev->in_gpu_reset)
+		return -EIO;
+
 #if 0
 	unsigned long flags;
 	int retry;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6a5b91d23fd9..68e6e1bc8f3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -362,13 +362,13 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
 	ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
 					&param);
 	if (ret) {
-		pr_err("amdgpu: failed to validate PT BOs\n");
+		pr_err("failed to validate PT BOs\n");
 		return ret;
 	}
 
 	ret = amdgpu_amdkfd_validate(&param, pd);
 	if (ret) {
-		pr_err("amdgpu: failed to validate PD\n");
+		pr_err("failed to validate PD\n");
 		return ret;
 	}
 
@@ -377,7 +377,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
 	if (vm->use_cpu_for_update) {
 		ret = amdgpu_bo_kmap(pd, NULL);
 		if (ret) {
-			pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
+			pr_err("failed to kmap PD, ret=%d\n", ret);
 			return ret;
 		}
 	}
@@ -660,15 +660,15 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 
 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
 				     false, &ctx->duplicates);
-	if (!ret)
-		ctx->reserved = true;
-	else {
-		pr_err("Failed to reserve buffers in ttm\n");
+	if (ret) {
+		pr_err("Failed to reserve buffers in ttm.\n");
 		kfree(ctx->vm_pd);
 		ctx->vm_pd = NULL;
+		return ret;
 	}
 
-	return ret;
+	ctx->reserved = true;
+	return 0;
 }
 
 /**
@@ -733,17 +733,15 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 
 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
 				     false, &ctx->duplicates);
-	if (!ret)
-		ctx->reserved = true;
-	else
-		pr_err("Failed to reserve buffers in ttm.\n");
-
 	if (ret) {
+		pr_err("Failed to reserve buffers in ttm.\n");
 		kfree(ctx->vm_pd);
 		ctx->vm_pd = NULL;
+		return ret;
 	}
 
-	return ret;
+	ctx->reserved = true;
+	return 0;
 }
 
 /**
@@ -1279,31 +1277,30 @@ err:
 }
 
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-		struct kgd_dev *kgd, struct kgd_mem *mem)
+		struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
 {
 	struct amdkfd_process_info *process_info = mem->process_info;
 	unsigned long bo_size = mem->bo->tbo.mem.size;
 	struct kfd_bo_va_list *entry, *tmp;
 	struct bo_vm_reservation_context ctx;
 	struct ttm_validate_buffer *bo_list_entry;
+	unsigned int mapped_to_gpu_memory;
 	int ret;
+	bool is_imported = 0;
 
 	mutex_lock(&mem->lock);
-
-	if (mem->mapped_to_gpu_memory > 0) {
-		pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
-				mem->va, bo_size);
-		mutex_unlock(&mem->lock);
-		return -EBUSY;
-	}
-
+	mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
+	is_imported = mem->is_imported;
 	mutex_unlock(&mem->lock);
 	/* lock is not needed after this, since mem is unused and will
 	 * be freed anyway
 	 */
 
-	/* No more MMU notifiers */
-	amdgpu_mn_unregister(mem->bo);
+	if (mapped_to_gpu_memory > 0) {
+		pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
+				mem->va, bo_size);
+		return -EBUSY;
+	}
 
 	/* Make sure restore workers don't access the BO any more */
 	bo_list_entry = &mem->validate_list;
@@ -1311,6 +1308,9 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	list_del(&bo_list_entry->head);
 	mutex_unlock(&process_info->lock);
 
+	/* No more MMU notifiers */
+	amdgpu_mn_unregister(mem->bo);
+
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	if (unlikely(ret))
 		return ret;
@@ -1342,6 +1342,17 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 		kfree(mem->bo->tbo.sg);
 	}
 
+	/* Update the size of the BO being freed if it was allocated from
+	 * VRAM and is not imported.
+	 */
+	if (size) {
+		if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
+		    (!is_imported))
+			*size = bo_size;
+		else
+			*size = 0;
+	}
+
 	/* Free the BO*/
 	drm_gem_object_put_unlocked(&mem->bo->tbo.base);
 	mutex_destroy(&mem->lock);
@@ -1697,6 +1708,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 	(*mem)->process_info = avm->process_info;
 	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
 	amdgpu_sync_create(&(*mem)->sync);
+	(*mem)->is_imported = true;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index d1495e1c9289..d9b35df33806 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -40,7 +40,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
 	for (i = 0; i < n; i++) {
 		struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 		r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
-				       false, false);
+				       false, false, false);
 		if (r)
 			goto exit_do_move;
 		r = dma_fence_wait(fence, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 031b094607bd..78ac6dbe70d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -60,8 +60,6 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
 {
 	CGS_FUNC_ADEV;
 	switch (space) {
-	case CGS_IND_REG__MMIO:
-		return RREG32_IDX(index);
 	case CGS_IND_REG__PCIE:
 		return RREG32_PCIE(index);
 	case CGS_IND_REG__SMC:
@@ -77,6 +75,8 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
 	case CGS_IND_REG__AUDIO_ENDPT:
 		DRM_ERROR("audio endpt register access not implemented.\n");
 		return 0;
+	default:
+		BUG();
 	}
 	WARN(1, "Invalid indirect register space");
 	return 0;
@@ -88,8 +88,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
 {
 	CGS_FUNC_ADEV;
 	switch (space) {
-	case CGS_IND_REG__MMIO:
-		return WREG32_IDX(index, value);
 	case CGS_IND_REG__PCIE:
 		return WREG32_PCIE(index, value);
 	case CGS_IND_REG__SMC:
@@ -105,6 +103,8 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
 	case CGS_IND_REG__AUDIO_ENDPT:
 		DRM_ERROR("audio endpt register access not implemented.\n");
 		return;
+	default:
+		BUG();
 	}
 	WARN(1, "Invalid indirect register space");
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index af91627b19b0..19070226a945 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -924,7 +924,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 
 		ring = to_amdgpu_ring(entity->rq->sched);
 		r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
-				   chunk_ib->ib_bytes : 0, ib);
+				   chunk_ib->ib_bytes : 0,
+				   AMDGPU_IB_POOL_DELAYED, ib);
 		if (r) {
 			DRM_ERROR("Failed to get ib !\n");
 			return r;
@@ -1207,7 +1208,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct drm_sched_entity *entity = p->entity;
-	enum drm_sched_priority priority;
 	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
 	uint64_t seq;
@@ -1257,7 +1257,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	trace_amdgpu_cs_ioctl(job);
 	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
-	priority = job->base.s_priority;
 	drm_sched_entity_push_job(&job->base, entity);
 
 	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 6ed36a2c5f73..8842c55d4490 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "amdgpu_sched.h"
 #include "amdgpu_ras.h"
+#include <linux/nospec.h>
 
 #define to_amdgpu_ctx_entity(e)	\
 	container_of((e), struct amdgpu_ctx_entity, entity)
@@ -72,13 +73,30 @@ static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sch
 	}
 }
 
-static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
+static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
+						 enum drm_sched_priority prio,
+						 u32 hw_ip)
+{
+	unsigned int hw_prio;
+
+	hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ?
+			amdgpu_ctx_sched_prio_to_compute_prio(prio) :
+			AMDGPU_RING_PRIO_DEFAULT;
+	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
+	if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
+		hw_prio = AMDGPU_RING_PRIO_DEFAULT;
+
+	return hw_prio;
+}
+
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
+				   const u32 ring)
 {
 	struct amdgpu_device *adev = ctx->adev;
 	struct amdgpu_ctx_entity *entity;
 	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
 	unsigned num_scheds = 0;
-	enum gfx_pipe_priority hw_prio;
+	unsigned int hw_prio;
 	enum drm_sched_priority priority;
 	int r;
 
@@ -90,52 +108,16 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const
 	entity->sequence = 1;
 	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 				ctx->init_priority : ctx->override_priority;
-	switch (hw_ip) {
-	case AMDGPU_HW_IP_GFX:
-		sched = &adev->gfx.gfx_ring[0].sched;
-		scheds = &sched;
-		num_scheds = 1;
-		break;
-	case AMDGPU_HW_IP_COMPUTE:
-		hw_prio = amdgpu_ctx_sched_prio_to_compute_prio(priority);
-		scheds = adev->gfx.compute_prio_sched[hw_prio];
-		num_scheds = adev->gfx.num_compute_sched[hw_prio];
-		break;
-	case AMDGPU_HW_IP_DMA:
-		scheds = adev->sdma.sdma_sched;
-		num_scheds = adev->sdma.num_sdma_sched;
-		break;
-	case AMDGPU_HW_IP_UVD:
-		sched = &adev->uvd.inst[0].ring.sched;
-		scheds = &sched;
-		num_scheds = 1;
-		break;
-	case AMDGPU_HW_IP_VCE:
-		sched = &adev->vce.ring[0].sched;
-		scheds = &sched;
-		num_scheds = 1;
-		break;
-	case AMDGPU_HW_IP_UVD_ENC:
-		sched = &adev->uvd.inst[0].ring_enc[0].sched;
-		scheds = &sched;
-		num_scheds = 1;
-		break;
-	case AMDGPU_HW_IP_VCN_DEC:
-		sched = drm_sched_pick_best(adev->vcn.vcn_dec_sched,
-					    adev->vcn.num_vcn_dec_sched);
-		scheds = &sched;
-		num_scheds = 1;
-		break;
-	case AMDGPU_HW_IP_VCN_ENC:
-		sched = drm_sched_pick_best(adev->vcn.vcn_enc_sched,
-					    adev->vcn.num_vcn_enc_sched);
+	hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip);
+
+	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
+	scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+	num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+
+	if (hw_ip == AMDGPU_HW_IP_VCN_ENC || hw_ip == AMDGPU_HW_IP_VCN_DEC) {
+		sched = drm_sched_pick_best(scheds, num_scheds);
 		scheds = &sched;
 		num_scheds = 1;
-		break;
-	case AMDGPU_HW_IP_VCN_JPEG:
-		scheds = adev->jpeg.jpeg_sched;
-		num_scheds =  adev->jpeg.num_jpeg_sched;
-		break;
 	}
 
 	r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
@@ -178,7 +160,6 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 
 	return 0;
-
 }
 
 static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
@@ -525,7 +506,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
 					    enum drm_sched_priority priority)
 {
 	struct amdgpu_device *adev = ctx->adev;
-	enum gfx_pipe_priority hw_prio;
+	unsigned int hw_prio;
 	struct drm_gpu_scheduler **scheds = NULL;
 	unsigned num_scheds;
 
@@ -534,9 +515,11 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
 
 	/* set hw priority */
 	if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
-		hw_prio = amdgpu_ctx_sched_prio_to_compute_prio(priority);
-		scheds = adev->gfx.compute_prio_sched[hw_prio];
-		num_scheds = adev->gfx.num_compute_sched[hw_prio];
+		hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority,
+						      AMDGPU_HW_IP_COMPUTE);
+		hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
+		scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+		num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
 		drm_sched_entity_modify_sched(&aentity->entity, scheds,
 					      num_scheds);
 	}
@@ -665,78 +648,3 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 	idr_destroy(&mgr->ctx_handles);
 	mutex_destroy(&mgr->lock);
 }
-
-
-static void amdgpu_ctx_init_compute_sched(struct amdgpu_device *adev)
-{
-	int num_compute_sched_normal = 0;
-	int num_compute_sched_high = AMDGPU_MAX_COMPUTE_RINGS - 1;
-	int i;
-
-	/* use one drm sched array, gfx.compute_sched to store both high and
-	 * normal priority drm compute schedulers */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		if (!adev->gfx.compute_ring[i].has_high_prio)
-			adev->gfx.compute_sched[num_compute_sched_normal++] =
-				&adev->gfx.compute_ring[i].sched;
-		else
-			adev->gfx.compute_sched[num_compute_sched_high--] =
-				&adev->gfx.compute_ring[i].sched;
-	}
-
-	/* compute ring only has two priority for now */
-	i = AMDGPU_GFX_PIPE_PRIO_NORMAL;
-	adev->gfx.compute_prio_sched[i] = &adev->gfx.compute_sched[0];
-	adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-
-	i = AMDGPU_GFX_PIPE_PRIO_HIGH;
-	if (num_compute_sched_high == (AMDGPU_MAX_COMPUTE_RINGS - 1)) {
-		/* When compute has no high priority rings then use */
-		/* normal priority sched array */
-		adev->gfx.compute_prio_sched[i] = &adev->gfx.compute_sched[0];
-		adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
-	} else {
-		adev->gfx.compute_prio_sched[i] =
-			&adev->gfx.compute_sched[num_compute_sched_high - 1];
-		adev->gfx.num_compute_sched[i] =
-			adev->gfx.num_compute_rings - num_compute_sched_normal;
-	}
-}
-
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
-{
-	int i, j;
-
-	amdgpu_ctx_init_compute_sched(adev);
-	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-		adev->gfx.gfx_sched[i] = &adev->gfx.gfx_ring[i].sched;
-		adev->gfx.num_gfx_sched++;
-	}
-
-	for (i = 0; i < adev->sdma.num_instances; i++) {
-		adev->sdma.sdma_sched[i] = &adev->sdma.instance[i].ring.sched;
-		adev->sdma.num_sdma_sched++;
-	}
-
-	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-		if (adev->vcn.harvest_config & (1 << i))
-			continue;
-		adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
-			&adev->vcn.inst[i].ring_dec.sched;
-	}
-
-	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-		if (adev->vcn.harvest_config & (1 << i))
-			continue;
-		for (j = 0; j < adev->vcn.num_enc_rings; ++j)
-			adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
-				&adev->vcn.inst[i].ring_enc[j].sched;
-	}
-
-	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
-		if (adev->jpeg.harvest_config & (1 << i))
-			continue;
-		adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
-			&adev->jpeg.inst[i].ring_dec.sched;
-	}
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index de490f183af2..f54e10314661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -88,7 +88,4 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
-void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
-
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c0f9a651dc06..d33cb344be69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -27,7 +27,7 @@
 #include <linux/pci.h>
 #include <linux/uaccess.h>
 #include <linux/pm_runtime.h>
-
+#include <linux/poll.h>
 #include <drm/drm_debugfs.h>
 
 #include "amdgpu.h"
@@ -74,8 +74,82 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
 	return 0;
 }
 
+int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	unsigned long timeout = 600 * HZ;
+	int ret;
+
+	wake_up_interruptible(&adev->autodump.gpu_hang);
+
+	ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
+	if (ret == 0) {
+		pr_err("autodump: timeout, move on to gpu recovery\n");
+		return -ETIMEDOUT;
+	}
+#endif
+	return 0;
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
+static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
+{
+	struct amdgpu_device *adev = inode->i_private;
+	int ret;
+
+	file->private_data = adev;
+
+	mutex_lock(&adev->lock_reset);
+	if (adev->autodump.dumping.done) {
+		reinit_completion(&adev->autodump.dumping);
+		ret = 0;
+	} else {
+		ret = -EBUSY;
+	}
+	mutex_unlock(&adev->lock_reset);
+
+	return ret;
+}
+
+static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
+{
+	struct amdgpu_device *adev = file->private_data;
+
+	complete_all(&adev->autodump.dumping);
+	return 0;
+}
+
+static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
+{
+	struct amdgpu_device *adev = file->private_data;
+
+	poll_wait(file, &adev->autodump.gpu_hang, poll_table);
+
+	if (adev->in_gpu_reset)
+		return POLLIN | POLLRDNORM | POLLWRNORM;
+
+	return 0;
+}
+
+static const struct file_operations autodump_debug_fops = {
+	.owner = THIS_MODULE,
+	.open = amdgpu_debugfs_autodump_open,
+	.poll = amdgpu_debugfs_autodump_poll,
+	.release = amdgpu_debugfs_autodump_release,
+};
+
+static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
+{
+	init_completion(&adev->autodump.dumping);
+	complete_all(&adev->autodump.dumping);
+	init_waitqueue_head(&adev->autodump.gpu_hang);
+
+	debugfs_create_file("amdgpu_autodump", 0600,
+		adev->ddev->primary->debugfs_root,
+		adev, &autodump_debug_fops);
+}
+
 /**
  * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
  *
@@ -152,11 +226,16 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	if (use_bank) {
 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
 		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return -EINVAL;
 		}
 		mutex_lock(&adev->grbm_idx_mutex);
@@ -207,6 +286,7 @@ end:
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -255,6 +335,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -263,6 +347,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -275,6 +360,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -304,6 +390,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -311,6 +401,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -325,6 +416,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -354,6 +446,10 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -362,6 +458,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -374,6 +471,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -403,6 +501,10 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -410,6 +512,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -424,6 +527,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -453,6 +557,10 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -461,6 +569,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -473,6 +582,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -502,6 +612,10 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -509,6 +623,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -523,6 +638,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -651,16 +767,24 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
 
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
-	if (r)
+	if (r) {
+		amdgpu_virt_disable_access_debugfs(adev);
 		return r;
+	}
 
-	if (size > valuesize)
+	if (size > valuesize) {
+		amdgpu_virt_disable_access_debugfs(adev);
 		return -EINVAL;
+	}
 
 	outsize = 0;
 	x = 0;
@@ -673,6 +797,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 		}
 	}
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return !r ? outsize : r;
 }
 
@@ -720,6 +845,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	/* switch to the specific se/sh/cu */
 	mutex_lock(&adev->grbm_idx_mutex);
 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -734,16 +863,20 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
-	if (!x)
+	if (!x) {
+		amdgpu_virt_disable_access_debugfs(adev);
 		return -EINVAL;
+	}
 
 	while (size && (offset < x * 4)) {
 		uint32_t value;
 
 		value = data[offset >> 2];
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
+		}
 
 		result += 4;
 		buf += 4;
@@ -751,6 +884,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -805,6 +939,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	/* switch to the specific se/sh/cu */
 	mutex_lock(&adev->grbm_idx_mutex);
 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -840,6 +978,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 
 err:
 	kfree(data);
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -1369,6 +1508,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 
 	amdgpu_ras_debugfs_create_all(adev);
 
+	amdgpu_debugfs_autodump_init(adev);
+
 	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
 					ARRAY_SIZE(amdgpu_debugfs_list));
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index de12d1101526..2803884d338d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -31,6 +31,11 @@ struct amdgpu_debugfs {
 	unsigned		num_files;
 };
 
+struct amdgpu_autodump {
+	struct completion		dumping;
+	struct wait_queue_head		gpu_hang;
+};
+
 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
 int amdgpu_debugfs_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_fini(struct amdgpu_device *adev);
@@ -40,3 +45,4 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
 int amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
 int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
 int amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index affde2de2a0d..a027a8f7b281 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -64,9 +64,11 @@
 #include "amdgpu_xgmi.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_pmu.h"
+#include "amdgpu_fru_eeprom.h"
 
 #include <linux/suspend.h>
 #include <drm/task_barrier.h>
+#include <linux/pm_runtime.h>
 
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
@@ -138,6 +140,72 @@ static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 
 /**
+ * DOC: product_name
+ *
+ * The amdgpu driver provides a sysfs API for reporting the product name
+ * for the device
+ * The file serial_number is used for this and returns the product name
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_device_get_product_name(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
+}
+
+static DEVICE_ATTR(product_name, S_IRUGO,
+		amdgpu_device_get_product_name, NULL);
+
+/**
+ * DOC: product_number
+ *
+ * The amdgpu driver provides a sysfs API for reporting the part number
+ * for the device
+ * The file serial_number is used for this and returns the part number
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_device_get_product_number(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
+}
+
+static DEVICE_ATTR(product_number, S_IRUGO,
+		amdgpu_device_get_product_number, NULL);
+
+/**
+ * DOC: serial_number
+ *
+ * The amdgpu driver provides a sysfs API for reporting the serial number
+ * for the device
+ * The file serial_number is used for this and returns the serial number
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_device_get_serial_number(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
+}
+
+static DEVICE_ATTR(serial_number, S_IRUGO,
+		amdgpu_device_get_serial_number, NULL);
+
+/**
  * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
  *
  * @dev: drm_device pointer
@@ -231,10 +299,10 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 }
 
 /*
- * MMIO register access helper functions.
+ * device register access helper functions.
  */
 /**
- * amdgpu_mm_rreg - read a memory mapped IO register
+ * amdgpu_device_rreg - read a register
  *
  * @adev: amdgpu_device pointer
  * @reg: dword aligned register offset
@@ -242,25 +310,19 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
  *
  * Returns the 32 bit value from the offset specified.
  */
-uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
-			uint32_t acc_flags)
+uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
+			    uint32_t acc_flags)
 {
 	uint32_t ret;
 
-	if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
+	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 		return amdgpu_kiq_rreg(adev, reg);
 
-	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
+	if ((reg * 4) < adev->rmmio_size)
 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
-	else {
-		unsigned long flags;
-
-		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
-		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
-		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-	}
-	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
+	else
+		ret = adev->pcie_rreg(adev, (reg * 4));
+	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
 	return ret;
 }
 
@@ -306,28 +368,19 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 		BUG();
 }
 
-void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
+void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
+					     uint32_t v, uint32_t acc_flags)
 {
-	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
+	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
 
-	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
+	if ((reg * 4) < adev->rmmio_size)
 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
-	else {
-		unsigned long flags;
-
-		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
-		writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
-		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-	}
-
-	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
-		udelay(500);
-	}
+	else
+		adev->pcie_wreg(adev, (reg * 4), v);
 }
 
 /**
- * amdgpu_mm_wreg - write to a memory mapped IO register
+ * amdgpu_device_wreg - write to a register
  *
  * @adev: amdgpu_device pointer
  * @reg: dword aligned register offset
@@ -336,17 +389,13 @@ void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg,
  *
  * Writes the value specified to the offset specified.
  */
-void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
-		    uint32_t acc_flags)
+void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
+			uint32_t acc_flags)
 {
-	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
-		adev->last_mm_index = v;
-	}
-
-	if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
+	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 		return amdgpu_kiq_wreg(adev, reg, v);
 
-	amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
+	amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
 }
 
 /*
@@ -365,7 +414,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t
 			return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
 	}
 
-	amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
+	amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
 }
 
 /**
@@ -397,20 +446,12 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
  */
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 {
-	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
-		adev->last_mm_index = v;
-	}
-
 	if ((reg * 4) < adev->rio_mem_size)
 		iowrite32(v, adev->rio_mem + (reg * 4));
 	else {
 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
 		iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
 	}
-
-	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
-		udelay(500);
-	}
 }
 
 /**
@@ -1126,6 +1167,8 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 
 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 
+	amdgpu_gmc_tmz_set(adev);
+
 	return 0;
 }
 
@@ -1147,7 +1190,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
 		return;
 
 	if (state == VGA_SWITCHEROO_ON) {
-		pr_info("amdgpu: switched on\n");
+		pr_info("switched on\n");
 		/* don't suspend or resume card normally */
 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
@@ -1161,7 +1204,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
 		drm_kms_helper_poll_enable(dev);
 	} else {
-		pr_info("amdgpu: switched off\n");
+		pr_info("switched off\n");
 		drm_kms_helper_poll_disable(dev);
 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		amdgpu_device_suspend(dev, true);
@@ -1524,9 +1567,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 		chip_name = "vega12";
 		break;
 	case CHIP_RAVEN:
-		if (adev->rev_id >= 8)
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			chip_name = "raven2";
-		else if (adev->pdev->device == 0x15d8)
+		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
 			chip_name = "picasso";
 		else
 			chip_name = "raven";
@@ -1574,8 +1617,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 
-		if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
+		if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
+			amdgpu_discovery_get_gfx_info(adev);
 			goto parse_soc_bounding_box;
+		}
 
 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
@@ -1721,19 +1766,31 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		return -EINVAL;
 	}
 
-	r = amdgpu_device_parse_gpu_info_fw(adev);
-	if (r)
-		return r;
-
-	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
-		amdgpu_discovery_get_gfx_info(adev);
-
 	amdgpu_amdkfd_device_probe(adev);
 
 	if (amdgpu_sriov_vf(adev)) {
+		/* handle vbios stuff prior full access mode for new handshake */
+		if (adev->virt.req_init_data_ver == 1) {
+			if (!amdgpu_get_bios(adev)) {
+				DRM_ERROR("failed to get vbios\n");
+				return -EINVAL;
+			}
+
+			r = amdgpu_atombios_init(adev);
+			if (r) {
+				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+				return r;
+			}
+		}
+	}
+
+	/* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
+	 * will not be prepared by host for this VF */
+	if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
 		r = amdgpu_virt_request_full_gpu(adev, true);
 		if (r)
-			return -EAGAIN;
+			return r;
 	}
 
 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
@@ -1763,6 +1820,14 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		}
 		/* get the vbios after the asic_funcs are set up */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+			r = amdgpu_device_parse_gpu_info_fw(adev);
+			if (r)
+				return r;
+
+			/* skip vbios handling for new handshake */
+			if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
+				continue;
+
 			/* Read BIOS */
 			if (!amdgpu_get_bios(adev))
 				return -EINVAL;
@@ -1889,6 +1954,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 	if (r)
 		return r;
 
+	if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
+		r = amdgpu_virt_request_full_gpu(adev, true);
+		if (r)
+			return -EAGAIN;
+	}
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
@@ -1975,6 +2046,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		amdgpu_xgmi_add_device(adev);
 	amdgpu_amdkfd_device_init(adev);
 
+	amdgpu_fru_get_product_info(adev);
+
 init_failed:
 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_release_full_gpu(adev, true);
@@ -2171,6 +2244,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 		adev->ip_blocks[i].status.late_initialized = true;
 	}
 
+	amdgpu_ras_set_error_query_ready(adev, true);
+
 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
 
@@ -2203,7 +2278,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 				if (gpu_instance->adev->flags & AMD_IS_APU)
 					continue;
 
-				r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
+						AMDGPU_XGMI_PSTATE_MIN);
 				if (r) {
 					DRM_ERROR("pstate setting failed (%d).\n", r);
 					break;
@@ -2785,12 +2861,12 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
 	 * By default timeout for non compute jobs is 10000.
 	 * And there is no timeout enforced on compute jobs.
 	 * In SR-IOV or passthrough mode, timeout for compute
-	 * jobs are 10000 by default.
+	 * jobs are 60000 by default.
 	 */
 	adev->gfx_timeout = msecs_to_jiffies(10000);
 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
 	if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
-		adev->compute_timeout = adev->gfx_timeout;
+		adev->compute_timeout =  msecs_to_jiffies(60000);
 	else
 		adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
 
@@ -2841,6 +2917,14 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
 	return ret;
 }
 
+static const struct attribute *amdgpu_dev_attributes[] = {
+	&dev_attr_product_name.attr,
+	&dev_attr_product_number.attr,
+	&dev_attr_serial_number.attr,
+	&dev_attr_pcie_replay_count.attr,
+	NULL
+};
+
 /**
  * amdgpu_device_init - initialize the driver
  *
@@ -2942,9 +3026,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	INIT_LIST_HEAD(&adev->shadow_list);
 	mutex_init(&adev->shadow_list_lock);
 
-	INIT_LIST_HEAD(&adev->ring_lru_list);
-	spin_lock_init(&adev->ring_lru_list_lock);
-
 	INIT_DELAYED_WORK(&adev->delayed_init_work,
 			  amdgpu_device_delayed_init_work_handler);
 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
@@ -2953,7 +3034,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
 
 	adev->gfx.gfx_off_req_count = 1;
-	adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
+	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
 
 	/* Registers mapping */
 	/* TODO: block userspace mapping of io register */
@@ -3002,18 +3083,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
 		adev->enable_mes = true;
 
-	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
-		r = amdgpu_discovery_init(adev);
-		if (r) {
-			dev_err(adev->dev, "amdgpu_discovery_init failed\n");
-			return r;
-		}
-	}
-
-	/* early init functions */
-	r = amdgpu_device_ip_early_init(adev);
-	if (r)
-		return r;
+	/* detect hw virtualization here */
+	amdgpu_detect_virtualization(adev);
 
 	r = amdgpu_device_get_job_timeout_settings(adev);
 	if (r) {
@@ -3021,6 +3092,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		return r;
 	}
 
+	/* early init functions */
+	r = amdgpu_device_ip_early_init(adev);
+	if (r)
+		return r;
+
 	/* doorbell bar mapping and doorbell index init*/
 	amdgpu_device_doorbell_init(adev);
 
@@ -3127,14 +3203,13 @@ fence_driver_init:
 		goto failed;
 	}
 
-	DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
+	dev_info(adev->dev,
+		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
 			adev->gfx.config.max_shader_engines,
 			adev->gfx.config.max_sh_per_se,
 			adev->gfx.config.max_cu_per_sh,
 			adev->gfx.cu_info.number);
 
-	amdgpu_ctx_init_sched(adev);
-
 	adev->accel_working = true;
 
 	amdgpu_vm_check_compute_bug(adev);
@@ -3199,9 +3274,9 @@ fence_driver_init:
 	queue_delayed_work(system_wq, &adev->delayed_init_work,
 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
 
-	r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
+	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
 	if (r) {
-		dev_err(adev->dev, "Could not create pcie_replay_count");
+		dev_err(adev->dev, "Could not create amdgpu device attr\n");
 		return r;
 	}
 
@@ -3284,9 +3359,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	adev->rmmio = NULL;
 	amdgpu_device_doorbell_fini(adev);
 
-	device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
 	if (adev->ucode_sysfs_en)
 		amdgpu_ucode_sysfs_fini(adev);
+
+	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
 		amdgpu_pmu_fini(adev);
 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
@@ -3754,6 +3830,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
+	amdgpu_amdkfd_pre_reset(adev);
+
 	/* Resume IP prior to SMC */
 	r = amdgpu_device_ip_reinit_early_sriov(adev);
 	if (r)
@@ -3848,6 +3926,8 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 	int i, r = 0;
 	bool need_full_reset  = *need_full_reset_arg;
 
+	amdgpu_debugfs_wait_dump(adev);
+
 	/* block all schedulers and reset given job's ring */
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
@@ -4052,6 +4132,64 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 	mutex_unlock(&adev->lock_reset);
 }
 
+static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
+{
+	struct pci_dev *p = NULL;
+
+	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+			adev->pdev->bus->number, 1);
+	if (p) {
+		pm_runtime_enable(&(p->dev));
+		pm_runtime_resume(&(p->dev));
+	}
+}
+
+static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
+{
+	enum amd_reset_method reset_method;
+	struct pci_dev *p = NULL;
+	u64 expires;
+
+	/*
+	 * For now, only BACO and mode1 reset are confirmed
+	 * to suffer the audio issue without proper suspended.
+	 */
+	reset_method = amdgpu_asic_reset_method(adev);
+	if ((reset_method != AMD_RESET_METHOD_BACO) &&
+	     (reset_method != AMD_RESET_METHOD_MODE1))
+		return -EINVAL;
+
+	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+			adev->pdev->bus->number, 1);
+	if (!p)
+		return -ENODEV;
+
+	expires = pm_runtime_autosuspend_expiration(&(p->dev));
+	if (!expires)
+		/*
+		 * If we cannot get the audio device autosuspend delay,
+		 * a fixed 4S interval will be used. Considering 3S is
+		 * the audio controller default autosuspend delay setting.
+		 * 4S used here is guaranteed to cover that.
+		 */
+		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
+
+	while (!pm_runtime_status_suspended(&(p->dev))) {
+		if (!pm_runtime_suspend(&(p->dev)))
+			break;
+
+		if (expires < ktime_get_mono_fast_ns()) {
+			dev_warn(adev->dev, "failed to suspend display audio\n");
+			/* TODO: abort the succeeding gpu reset? */
+			return -ETIMEDOUT;
+		}
+	}
+
+	pm_runtime_disable(&(p->dev));
+
+	return 0;
+}
+
 /**
  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
  *
@@ -4067,7 +4205,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job *job)
 {
 	struct list_head device_list, *device_list_handle =  NULL;
-	bool need_full_reset, job_signaled;
+	bool need_full_reset = false;
+	bool job_signaled = false;
 	struct amdgpu_hive_info *hive = NULL;
 	struct amdgpu_device *tmp_adev = NULL;
 	int i, r = 0;
@@ -4075,6 +4214,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	bool use_baco =
 		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
 		true : false;
+	bool audio_suspended = false;
 
 	/*
 	 * Flush RAM to disk so that after reboot
@@ -4088,16 +4228,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		emergency_restart();
 	}
 
-	need_full_reset = job_signaled = false;
-	INIT_LIST_HEAD(&device_list);
-
 	dev_info(adev->dev, "GPU %s begin!\n",
 		(in_ras_intr && !use_baco) ? "jobs stop":"reset");
 
-	cancel_delayed_work_sync(&adev->delayed_init_work);
-
-	hive = amdgpu_get_xgmi_hive(adev, false);
-
 	/*
 	 * Here we trylock to avoid chain of resets executing from
 	 * either trigger by jobs on different adevs in XGMI hive or jobs on
@@ -4105,39 +4238,25 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	 * We always reset all schedulers for device and all devices for XGMI
 	 * hive so that should take care of them too.
 	 */
-
+	hive = amdgpu_get_xgmi_hive(adev, true);
 	if (hive && !mutex_trylock(&hive->reset_lock)) {
 		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
 			  job ? job->base.id : -1, hive->hive_id);
+		mutex_unlock(&hive->hive_lock);
 		return 0;
 	}
 
-	/* Start with adev pre asic reset first for soft reset check.*/
-	if (!amdgpu_device_lock_adev(adev, !hive)) {
-		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
-			  job ? job->base.id : -1);
-		return 0;
-	}
-
-	/* Block kfd: SRIOV would do it separately */
-	if (!amdgpu_sriov_vf(adev))
-                amdgpu_amdkfd_pre_reset(adev);
-
-	/* Build list of devices to reset */
-	if  (adev->gmc.xgmi.num_physical_nodes > 1) {
-		if (!hive) {
-			/*unlock kfd: SRIOV would do it separately */
-			if (!amdgpu_sriov_vf(adev))
-		                amdgpu_amdkfd_post_reset(adev);
-			amdgpu_device_unlock_adev(adev);
+	/*
+	 * Build list of devices to reset.
+	 * In case we are in XGMI hive mode, resort the device list
+	 * to put adev in the 1st position.
+	 */
+	INIT_LIST_HEAD(&device_list);
+	if (adev->gmc.xgmi.num_physical_nodes > 1) {
+		if (!hive)
 			return -ENODEV;
-		}
-
-		/*
-		 * In case we are in XGMI hive mode device reset is done for all the
-		 * nodes in the hive to retrain all XGMI links and hence the reset
-		 * sequence is executed in loop on all nodes.
-		 */
+		if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
+			list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
 		device_list_handle = &hive->device_list;
 	} else {
 		list_add_tail(&adev->gmc.xgmi.head, &device_list);
@@ -4146,19 +4265,40 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 	/* block all schedulers and reset given job's ring */
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-		if (tmp_adev != adev) {
-			amdgpu_device_lock_adev(tmp_adev, false);
-			if (!amdgpu_sriov_vf(tmp_adev))
-			                amdgpu_amdkfd_pre_reset(tmp_adev);
+		if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
+			DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
+				  job ? job->base.id : -1);
+			mutex_unlock(&hive->hive_lock);
+			return 0;
 		}
 
 		/*
+		 * Try to put the audio codec into suspend state
+		 * before gpu reset started.
+		 *
+		 * Due to the power domain of the graphics device
+		 * is shared with AZ power domain. Without this,
+		 * we may change the audio hardware from behind
+		 * the audio driver's back. That will trigger
+		 * some audio codec errors.
+		 */
+		if (!amdgpu_device_suspend_display_audio(tmp_adev))
+			audio_suspended = true;
+
+		amdgpu_ras_set_error_query_ready(tmp_adev, false);
+
+		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
+
+		if (!amdgpu_sriov_vf(tmp_adev))
+			amdgpu_amdkfd_pre_reset(tmp_adev);
+
+		/*
 		 * Mark these ASICs to be reseted as untracked first
 		 * And add them back after reset completed
 		 */
 		amdgpu_unregister_gpu_instance(tmp_adev);
 
-		amdgpu_fbdev_set_suspend(adev, 1);
+		amdgpu_fbdev_set_suspend(tmp_adev, 1);
 
 		/* disable ras on ALL IPs */
 		if (!(in_ras_intr && !use_baco) &&
@@ -4178,7 +4318,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		}
 	}
 
-
 	if (in_ras_intr && !use_baco)
 		goto skip_sched_resume;
 
@@ -4189,30 +4328,14 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	 * job->base holds a reference to parent fence
 	 */
 	if (job && job->base.s_fence->parent &&
-	    dma_fence_is_signaled(job->base.s_fence->parent))
+	    dma_fence_is_signaled(job->base.s_fence->parent)) {
 		job_signaled = true;
-
-	if (job_signaled) {
 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
 		goto skip_hw_reset;
 	}
 
-
-	/* Guilty job will be freed after this*/
-	r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
-	if (r) {
-		/*TODO Should we stop ?*/
-		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
-			  r, adev->ddev->unique);
-		adev->asic_reset_res = r;
-	}
-
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-
-		if (tmp_adev == adev)
-			continue;
-
 		r = amdgpu_device_pre_asic_reset(tmp_adev,
 						 NULL,
 						 &need_full_reset);
@@ -4274,11 +4397,15 @@ skip_sched_resume:
 		/*unlock kfd: SRIOV would do it separately */
 		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
 	                amdgpu_amdkfd_post_reset(tmp_adev);
+		if (audio_suspended)
+			amdgpu_device_resume_display_audio(tmp_adev);
 		amdgpu_device_unlock_adev(tmp_adev);
 	}
 
-	if (hive)
+	if (hive) {
 		mutex_unlock(&hive->reset_lock);
+		mutex_unlock(&hive->hive_lock);
+	}
 
 	if (r)
 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
index 057f6ea645d7..61a26c15c8dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -52,9 +52,6 @@ struct amdgpu_df_funcs {
 	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
 	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
 			 uint32_t ficadl_val, uint32_t ficadh_val);
-	uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev,
-				       uint32_t df_inst);
-	uint32_t (*get_df_inst_id)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_df {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 27d8ae19a7a4..b5d6274952a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -23,9 +23,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_discovery.h"
-#include "soc15_common.h"
 #include "soc15_hw_ip.h"
-#include "nbio/nbio_2_3_offset.h"
 #include "discovery.h"
 
 #define mmRCC_CONFIG_MEMSIZE	0xde3
@@ -135,9 +133,10 @@ static int hw_id_map[MAX_HWIP] = {
 static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
 {
 	uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
-	uint64_t pos = vram_size - DISCOVERY_TMR_SIZE;
+	uint64_t pos = vram_size - adev->discovery_tmr_size;
 
-	amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, DISCOVERY_TMR_SIZE, false);
+	amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+				  adev->discovery_tmr_size, false);
 	return 0;
 }
 
@@ -158,7 +157,7 @@ static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size
 	return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
 }
 
-int amdgpu_discovery_init(struct amdgpu_device *adev)
+static int amdgpu_discovery_init(struct amdgpu_device *adev)
 {
 	struct table_info *info;
 	struct binary_header *bhdr;
@@ -169,17 +168,18 @@ int amdgpu_discovery_init(struct amdgpu_device *adev)
 	uint16_t checksum;
 	int r;
 
-	adev->discovery = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
-	if (!adev->discovery)
+	adev->discovery_tmr_size = DISCOVERY_TMR_SIZE;
+	adev->discovery_bin = kzalloc(adev->discovery_tmr_size, GFP_KERNEL);
+	if (!adev->discovery_bin)
 		return -ENOMEM;
 
-	r = amdgpu_discovery_read_binary(adev, adev->discovery);
+	r = amdgpu_discovery_read_binary(adev, adev->discovery_bin);
 	if (r) {
 		DRM_ERROR("failed to read ip discovery binary\n");
 		goto out;
 	}
 
-	bhdr = (struct binary_header *)adev->discovery;
+	bhdr = (struct binary_header *)adev->discovery_bin;
 
 	if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) {
 		DRM_ERROR("invalid ip discovery binary signature\n");
@@ -192,7 +192,7 @@ int amdgpu_discovery_init(struct amdgpu_device *adev)
 	size = bhdr->binary_size - offset;
 	checksum = bhdr->binary_checksum;
 
-	if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
+	if (!amdgpu_discovery_verify_checksum(adev->discovery_bin + offset,
 					      size, checksum)) {
 		DRM_ERROR("invalid ip discovery binary checksum\n");
 		r = -EINVAL;
@@ -202,7 +202,7 @@ int amdgpu_discovery_init(struct amdgpu_device *adev)
 	info = &bhdr->table_list[IP_DISCOVERY];
 	offset = le16_to_cpu(info->offset);
 	checksum = le16_to_cpu(info->checksum);
-	ihdr = (struct ip_discovery_header *)(adev->discovery + offset);
+	ihdr = (struct ip_discovery_header *)(adev->discovery_bin + offset);
 
 	if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
 		DRM_ERROR("invalid ip discovery data table signature\n");
@@ -210,7 +210,7 @@ int amdgpu_discovery_init(struct amdgpu_device *adev)
 		goto out;
 	}
 
-	if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
+	if (!amdgpu_discovery_verify_checksum(adev->discovery_bin + offset,
 					      ihdr->size, checksum)) {
 		DRM_ERROR("invalid ip discovery data table checksum\n");
 		r = -EINVAL;
@@ -220,9 +220,9 @@ int amdgpu_discovery_init(struct amdgpu_device *adev)
 	info = &bhdr->table_list[GC];
 	offset = le16_to_cpu(info->offset);
 	checksum = le16_to_cpu(info->checksum);
-	ghdr = (struct gpu_info_header *)(adev->discovery + offset);
+	ghdr = (struct gpu_info_header *)(adev->discovery_bin + offset);
 
-	if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
+	if (!amdgpu_discovery_verify_checksum(adev->discovery_bin + offset,
 				              ghdr->size, checksum)) {
 		DRM_ERROR("invalid gc data table checksum\n");
 		r = -EINVAL;
@@ -232,16 +232,16 @@ int amdgpu_discovery_init(struct amdgpu_device *adev)
 	return 0;
 
 out:
-	kfree(adev->discovery);
-	adev->discovery = NULL;
+	kfree(adev->discovery_bin);
+	adev->discovery_bin = NULL;
 
 	return r;
 }
 
 void amdgpu_discovery_fini(struct amdgpu_device *adev)
 {
-	kfree(adev->discovery);
-	adev->discovery = NULL;
+	kfree(adev->discovery_bin);
+	adev->discovery_bin = NULL;
 }
 
 int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
@@ -257,14 +257,16 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
 	uint8_t num_base_address;
 	int hw_ip;
 	int i, j, k;
+	int r;
 
-	if (!adev->discovery) {
-		DRM_ERROR("ip discovery uninitialized\n");
-		return -EINVAL;
+	r = amdgpu_discovery_init(adev);
+	if (r) {
+		DRM_ERROR("amdgpu_discovery_init failed\n");
+		return r;
 	}
 
-	bhdr = (struct binary_header *)adev->discovery;
-	ihdr = (struct ip_discovery_header *)(adev->discovery +
+	bhdr = (struct binary_header *)adev->discovery_bin;
+	ihdr = (struct ip_discovery_header *)(adev->discovery_bin +
 			le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
 	num_dies = le16_to_cpu(ihdr->num_dies);
 
@@ -272,7 +274,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
 
 	for (i = 0; i < num_dies; i++) {
 		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
-		dhdr = (struct die_header *)(adev->discovery + die_offset);
+		dhdr = (struct die_header *)(adev->discovery_bin + die_offset);
 		num_ips = le16_to_cpu(dhdr->num_ips);
 		ip_offset = die_offset + sizeof(*dhdr);
 
@@ -286,7 +288,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
 				le16_to_cpu(dhdr->die_id), num_ips);
 
 		for (j = 0; j < num_ips; j++) {
-			ip = (struct ip *)(adev->discovery + ip_offset);
+			ip = (struct ip *)(adev->discovery_bin + ip_offset);
 			num_base_address = ip->num_base_address;
 
 			DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
@@ -335,24 +337,24 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
 	uint16_t num_ips;
 	int i, j;
 
-	if (!adev->discovery) {
+	if (!adev->discovery_bin) {
 		DRM_ERROR("ip discovery uninitialized\n");
 		return -EINVAL;
 	}
 
-	bhdr = (struct binary_header *)adev->discovery;
-	ihdr = (struct ip_discovery_header *)(adev->discovery +
+	bhdr = (struct binary_header *)adev->discovery_bin;
+	ihdr = (struct ip_discovery_header *)(adev->discovery_bin +
 			le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
 	num_dies = le16_to_cpu(ihdr->num_dies);
 
 	for (i = 0; i < num_dies; i++) {
 		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
-		dhdr = (struct die_header *)(adev->discovery + die_offset);
+		dhdr = (struct die_header *)(adev->discovery_bin + die_offset);
 		num_ips = le16_to_cpu(dhdr->num_ips);
 		ip_offset = die_offset + sizeof(*dhdr);
 
 		for (j = 0; j < num_ips; j++) {
-			ip = (struct ip *)(adev->discovery + ip_offset);
+			ip = (struct ip *)(adev->discovery_bin + ip_offset);
 
 			if (le16_to_cpu(ip->hw_id) == hw_id) {
 				if (major)
@@ -375,13 +377,13 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
 	struct binary_header *bhdr;
 	struct gc_info_v1_0 *gc_info;
 
-	if (!adev->discovery) {
+	if (!adev->discovery_bin) {
 		DRM_ERROR("ip discovery uninitialized\n");
 		return -EINVAL;
 	}
 
-	bhdr = (struct binary_header *)adev->discovery;
-	gc_info = (struct gc_info_v1_0 *)(adev->discovery +
+	bhdr = (struct binary_header *)adev->discovery_bin;
+	gc_info = (struct gc_info_v1_0 *)(adev->discovery_bin +
 			le16_to_cpu(bhdr->table_list[GC].offset));
 
 	adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index ba78e15d9b05..d50d597c45ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -26,7 +26,6 @@
 
 #define DISCOVERY_TMR_SIZE  (64 << 10)
 
-int amdgpu_discovery_init(struct amdgpu_device *adev);
 void amdgpu_discovery_fini(struct amdgpu_device *adev);
 int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
 int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 84cee27cd7ef..f7143d927b6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -523,7 +523,8 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
 			break;
 		case CHIP_RAVEN:
 			/* enable S/G on PCO and RV2 */
-			if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
+			if ((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
+			    (adev->apu_flags & AMD_APU_IS_PICASSO))
 				domain |= AMDGPU_GEM_DOMAIN_GTT;
 			break;
 		default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index ffeb20f11c07..43d8ed7dbd00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -38,6 +38,7 @@
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
 #include <linux/dma-fence-array.h>
+#include <linux/pci-p2pdma.h>
 
 /**
  * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
@@ -179,6 +180,9 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	int r;
 
+	if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0)
+		attach->peer2peer = false;
+
 	if (attach->dev->driver == adev->dev->driver)
 		return 0;
 
@@ -272,14 +276,21 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
 	struct dma_buf *dma_buf = attach->dmabuf;
 	struct drm_gem_object *obj = dma_buf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct sg_table *sgt;
 	long r;
 
 	if (!bo->pin_count) {
-		/* move buffer into GTT */
+		/* move buffer into GTT or VRAM */
 		struct ttm_operation_ctx ctx = { false, false };
+		unsigned domains = AMDGPU_GEM_DOMAIN_GTT;
 
-		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+		if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
+		    attach->peer2peer) {
+			bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+			domains |= AMDGPU_GEM_DOMAIN_VRAM;
+		}
+		amdgpu_bo_placement_from_domain(bo, domains);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		if (r)
 			return ERR_PTR(r);
@@ -289,20 +300,34 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
 		return ERR_PTR(-EBUSY);
 	}
 
-	sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages);
-	if (IS_ERR(sgt))
-		return sgt;
-
-	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
-			      DMA_ATTR_SKIP_CPU_SYNC))
-		goto error_free;
+	switch (bo->tbo.mem.mem_type) {
+	case TTM_PL_TT:
+		sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages,
+					    bo->tbo.num_pages);
+		if (IS_ERR(sgt))
+			return sgt;
+
+		if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+				      DMA_ATTR_SKIP_CPU_SYNC))
+			goto error_free;
+		break;
+
+	case TTM_PL_VRAM:
+		r = amdgpu_vram_mgr_alloc_sgt(adev, &bo->tbo.mem, attach->dev,
+					      dir, &sgt);
+		if (r)
+			return ERR_PTR(r);
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
 
 	return sgt;
 
 error_free:
 	sg_free_table(sgt);
 	kfree(sgt);
-	return ERR_PTR(-ENOMEM);
+	return ERR_PTR(-EBUSY);
 }
 
 /**
@@ -318,9 +343,18 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
 				 struct sg_table *sgt,
 				 enum dma_data_direction dir)
 {
-	dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
-	sg_free_table(sgt);
-	kfree(sgt);
+	struct dma_buf *dma_buf = attach->dmabuf;
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+	if (sgt->sgl->page_link) {
+		dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
+		sg_free_table(sgt);
+		kfree(sgt);
+	} else {
+		amdgpu_vram_mgr_free_sgt(adev, attach->dev, dir, sgt);
+	}
 }
 
 /**
@@ -514,6 +548,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
 }
 
 static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
+	.allow_peer2peer = true,
 	.move_notify = amdgpu_dma_buf_move_notify
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index ba1bb95a3cf9..d2a105e3bf7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -856,7 +856,7 @@ void amdgpu_add_thermal_controller(struct amdgpu_device *adev)
 				const char *name = pp_lib_thermal_controller_names[controller->ucType];
 				info.addr = controller->ucI2cAddress >> 1;
 				strlcpy(info.type, name, sizeof(info.type));
-				i2c_new_device(&adev->pm.i2c_bus->adapter, &info);
+				i2c_new_client_device(&adev->pm.i2c_bus->adapter, &info);
 			}
 		} else {
 			DRM_INFO("Unknown thermal controller type %d at 0x%02x %s fan control\n",
@@ -1188,3 +1188,13 @@ int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev,
 
 	return ret;
 }
+
+int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en)
+{
+	struct smu_context *smu = &adev->smu;
+
+	if (is_support_sw_smu(adev))
+		return smu_allow_xgmi_power_down(smu, en);
+
+	return 0;
+}
+\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 936d85aa0fbc..6a8aae70a0e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -450,6 +450,7 @@ struct amdgpu_pm {
 
 	/* Used for I2C access to various EEPROMs on relevant ASICs */
 	struct i2c_adapter smu_i2c;
+	struct list_head	pm_attr_list;
 };
 
 #define R600_SSTU_DFLT                               0
@@ -538,4 +539,6 @@ int amdgpu_dpm_baco_enter(struct amdgpu_device *adev);
 int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev,
 			     uint32_t cstate);
 
+int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index a735d79a717b..126e74758a34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -86,9 +86,10 @@
  * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
  * - 3.36.0 - Allow reading more status registers on si/cik
  * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
+ * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	37
+#define KMS_DRIVER_MINOR	38
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
@@ -139,12 +140,14 @@ int amdgpu_emu_mode = 0;
 uint amdgpu_smu_memory_pool_size = 0;
 /* FBC (bit 0) disabled by default*/
 uint amdgpu_dc_feature_mask = 0;
+uint amdgpu_dc_debug_mask = 0;
 int amdgpu_async_gfx_ring = 1;
 int amdgpu_mcbp = 0;
 int amdgpu_discovery = -1;
 int amdgpu_mes = 0;
 int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
+int amdgpu_tmz = 0;
 
 struct amdgpu_mgpu_info mgpu_info = {
 	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -688,13 +691,12 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau
 
 /**
  * DOC: hws_gws_support(bool)
- * Whether HWS support gws barriers. Default value: false (not supported)
- * This will be replaced with a MEC firmware version check once firmware
- * is ready
+ * Assume that HWS supports GWS barriers regardless of what firmware version
+ * check says. Default value: false (rely on MEC2 firmware version check).
  */
 bool hws_gws_support;
 module_param(hws_gws_support, bool, 0444);
-MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
+MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)");
 
 /**
   * DOC: queue_preemption_timeout_ms (int)
@@ -714,6 +716,13 @@ MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))");
 module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
 
 /**
+ * DOC: dcdebugmask (uint)
+ * Override display features enabled. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ */
+MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))");
+module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
+
+/**
  * DOC: abmlevel (uint)
  * Override the default ABM (Adaptive Backlight Management) level used for DC
  * enabled hardware. Requires DMCU to be supported and loaded.
@@ -729,6 +738,16 @@ uint amdgpu_dm_abm_level = 0;
 MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) ");
 module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
 
+/**
+ * DOC: tmz (int)
+ * Trusted Memory Zone (TMZ) is a method to protect data being written
+ * to or read from memory.
+ *
+ * The default value: 0 (off).  TODO: change to auto till it is completed.
+ */
+MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
+module_param_named(tmz, amdgpu_tmz, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -1164,14 +1183,6 @@ static int amdgpu_pmops_resume(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 
-	/* GPU comes up enabled by the bios on resume */
-	if (amdgpu_device_supports_boco(drm_dev) ||
-	    amdgpu_device_supports_baco(drm_dev)) {
-		pm_runtime_disable(dev);
-		pm_runtime_set_active(dev);
-		pm_runtime_enable(dev);
-	}
-
 	return amdgpu_device_resume(drm_dev, true);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7531527067df..d878fe7fee51 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
  * Used For polling fence.
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+			      uint32_t timeout)
 {
 	uint32_t seq;
+	signed long r;
 
 	if (!s)
 		return -EINVAL;
 
 	seq = ++ring->fence_drv.sync_seq;
+	r = amdgpu_fence_wait_polling(ring,
+				      seq - ring->fence_drv.num_fences_mask,
+				      timeout);
+	if (r < 1)
+		return -ETIMEDOUT;
+
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 			       seq, 0);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
new file mode 100644
index 000000000000..815c072ac4da
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_i2c.h"
+#include "smu_v11_0_i2c.h"
+#include "atom.h"
+
+#define I2C_PRODUCT_INFO_ADDR		0xAC
+#define I2C_PRODUCT_INFO_ADDR_SIZE	0x2
+#define I2C_PRODUCT_INFO_OFFSET		0xC0
+
+bool is_fru_eeprom_supported(struct amdgpu_device *adev)
+{
+	/* TODO: Gaming SKUs don't have the FRU EEPROM.
+	 * Use this hack to address hangs on modprobe on gaming SKUs
+	 * until a proper solution can be implemented by only supporting
+	 * the explicit chip IDs for VG20 Server cards
+	 *
+	 * TODO: Add list of supported Arcturus DIDs once confirmed
+	 */
+	if ((adev->asic_type == CHIP_VEGA20 && adev->pdev->device == 0x66a0) ||
+	    (adev->asic_type == CHIP_VEGA20 && adev->pdev->device == 0x66a1) ||
+	    (adev->asic_type == CHIP_VEGA20 && adev->pdev->device == 0x66a4))
+		return true;
+	return false;
+}
+
+int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr,
+			   unsigned char *buff)
+{
+	int ret, size;
+	struct i2c_msg msg = {
+			.addr   = I2C_PRODUCT_INFO_ADDR,
+			.flags  = I2C_M_RD,
+			.buf    = buff,
+	};
+	buff[0] = 0;
+	buff[1] = addrptr;
+	msg.len = I2C_PRODUCT_INFO_ADDR_SIZE + 1;
+	ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
+
+	if (ret < 1) {
+		DRM_WARN("FRU: Failed to get size field");
+		return ret;
+	}
+
+	/* The size returned by the i2c requires subtraction of 0xC0 since the
+	 * size apparently always reports as 0xC0+actual size.
+	 */
+	size = buff[2] - I2C_PRODUCT_INFO_OFFSET;
+	/* Add 1 since address field was 1 byte */
+	buff[1] = addrptr + 1;
+
+	msg.len = I2C_PRODUCT_INFO_ADDR_SIZE + size;
+	ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
+
+	if (ret < 1) {
+		DRM_WARN("FRU: Failed to get data field");
+		return ret;
+	}
+
+	return size;
+}
+
+int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
+{
+	unsigned char buff[34];
+	int addrptr = 0, size = 0;
+
+	if (!is_fru_eeprom_supported(adev))
+		return 0;
+
+	/* If algo exists, it means that the i2c_adapter's initialized */
+	if (!adev->pm.smu_i2c.algo) {
+		DRM_WARN("Cannot access FRU, EEPROM accessor not initialized");
+		return 0;
+	}
+
+	/* There's a lot of repetition here. This is due to the FRU having
+	 * variable-length fields. To get the information, we have to find the
+	 * size of each field, and then keep reading along and reading along
+	 * until we get all of the data that we want. We use addrptr to track
+	 * the address as we go
+	 */
+
+	/* The first fields are all of size 1-byte, from 0-7 are offsets that
+	 * contain information that isn't useful to us.
+	 * Bytes 8-a are all 1-byte and refer to the size of the entire struct,
+	 * and the language field, so just start from 0xb, manufacturer size
+	 */
+	addrptr = 0xb;
+	size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
+	if (size < 1) {
+		DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size);
+		return size;
+	}
+
+	/* Increment the addrptr by the size of the field, and 1 due to the
+	 * size field being 1 byte. This pattern continues below.
+	 */
+	addrptr += size + 1;
+	size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
+	if (size < 1) {
+		DRM_ERROR("Failed to read FRU product name, ret:%d", size);
+		return size;
+	}
+
+	/* Product name should only be 32 characters. Any more,
+	 * and something could be wrong. Cap it at 32 to be safe
+	 */
+	if (size > 32) {
+		DRM_WARN("FRU Product Number is larger than 32 characters. This is likely a mistake");
+		size = 32;
+	}
+	/* Start at 2 due to buff using fields 0 and 1 for the address */
+	memcpy(adev->product_name, &buff[2], size);
+	adev->product_name[size] = '\0';
+
+	addrptr += size + 1;
+	size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
+	if (size < 1) {
+		DRM_ERROR("Failed to read FRU product number, ret:%d", size);
+		return size;
+	}
+
+	/* Product number should only be 16 characters. Any more,
+	 * and something could be wrong. Cap it at 16 to be safe
+	 */
+	if (size > 16) {
+		DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake");
+		size = 16;
+	}
+	memcpy(adev->product_number, &buff[2], size);
+	adev->product_number[size] = '\0';
+
+	addrptr += size + 1;
+	size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
+
+	if (size < 1) {
+		DRM_ERROR("Failed to read FRU product version, ret:%d", size);
+		return size;
+	}
+
+	addrptr += size + 1;
+	size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
+
+	if (size < 1) {
+		DRM_ERROR("Failed to read FRU serial number, ret:%d", size);
+		return size;
+	}
+
+	/* Serial number should only be 16 characters. Any more,
+	 * and something could be wrong. Cap it at 16 to be safe
+	 */
+	if (size > 16) {
+		DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake");
+		size = 16;
+	}
+	memcpy(adev->serial, &buff[2], size);
+	adev->serial[size] = '\0';
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
new file mode 100644
index 000000000000..968115c97e33
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_PRODINFO_H__
+#define __AMDGPU_PRODINFO_H__
+
+int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
+
+#endif  // __AMDGPU_PRODINFO_H__
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 4277125a79ee..4ed9958af94e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/pci.h>
+#include <linux/dma-buf.h>
 
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_debugfs.h>
@@ -161,16 +162,17 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
 	struct amdgpu_bo_list_entry vm_pd;
 	struct list_head list, duplicates;
+	struct dma_fence *fence = NULL;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct amdgpu_bo_va *bo_va;
-	int r;
+	long r;
 
 	INIT_LIST_HEAD(&list);
 	INIT_LIST_HEAD(&duplicates);
 
 	tv.bo = &bo->tbo;
-	tv.num_shared = 1;
+	tv.num_shared = 2;
 	list_add(&tv.head, &list);
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
@@ -178,28 +180,34 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
-			"we fail to reserve bo (%d)\n", r);
+			"we fail to reserve bo (%ld)\n", r);
 		return;
 	}
 	bo_va = amdgpu_vm_bo_find(vm, bo);
-	if (bo_va && --bo_va->ref_count == 0) {
-		amdgpu_vm_bo_rmv(adev, bo_va);
-
-		if (amdgpu_vm_ready(vm)) {
-			struct dma_fence *fence = NULL;
+	if (!bo_va || --bo_va->ref_count)
+		goto out_unlock;
 
-			r = amdgpu_vm_clear_freed(adev, vm, &fence);
-			if (unlikely(r)) {
-				dev_err(adev->dev, "failed to clear page "
-					"tables on GEM object close (%d)\n", r);
-			}
+	amdgpu_vm_bo_rmv(adev, bo_va);
+	if (!amdgpu_vm_ready(vm))
+		goto out_unlock;
 
-			if (fence) {
-				amdgpu_bo_fence(bo, fence, true);
-				dma_fence_put(fence);
-			}
-		}
+	fence = dma_resv_get_excl(bo->tbo.base.resv);
+	if (fence) {
+		amdgpu_bo_fence(bo, fence, true);
+		fence = NULL;
 	}
+
+	r = amdgpu_vm_clear_freed(adev, vm, &fence);
+	if (r || !fence)
+		goto out_unlock;
+
+	amdgpu_bo_fence(bo, fence, true);
+	dma_fence_put(fence);
+
+out_unlock:
+	if (unlikely(r < 0))
+		dev_err(adev->dev, "failed to clear page "
+			"tables on GEM object close (%ld)\n", r);
 	ttm_eu_backoff_reservation(&ticket, &list);
 }
 
@@ -226,7 +234,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
 		      AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
-		      AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
+		      AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
+		      AMDGPU_GEM_CREATE_ENCRYPTED))
 
 		return -EINVAL;
 
@@ -234,6 +243,11 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
 		return -EINVAL;
 
+	if (!amdgpu_is_tmz(adev) && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) {
+		DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is disabled\n");
+		return -EINVAL;
+	}
+
 	/* create a gem object to contain this object in */
 	if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
 	    AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
@@ -854,7 +868,8 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
 	attachment = READ_ONCE(bo->tbo.base.import_attach);
 
 	if (attachment)
-		seq_printf(m, " imported from %p", dma_buf);
+		seq_printf(m, " imported from %p%s", dma_buf,
+			   attachment->peer2peer ? " P2P" : "");
 	else if (dma_buf)
 		seq_printf(m, " exported as %p", dma_buf);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 6b9c9193cdfa..d612033a23ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -48,7 +48,7 @@ int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
 	return bit;
 }
 
-void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
 				 int *mec, int *pipe, int *queue)
 {
 	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
@@ -274,7 +274,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
 		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
 			continue;
 
-		amdgpu_gfx_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
+		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
 
 		/*
 		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
@@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 
 	spin_lock_init(&kiq->ring_lock);
 
-	r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
-	if (r)
-		return r;
-
 	ring->adev = NULL;
 	ring->ring_obj = NULL;
 	ring->use_doorbell = true;
@@ -318,9 +314,11 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 		return r;
 
 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
+	ring->no_scheduler = true;
 	sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 	r = amdgpu_ring_init(adev, ring, 1024,
-			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
+			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
+			     AMDGPU_RING_PRIO_DEFAULT);
 	if (r)
 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
 
@@ -329,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 
 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
 {
-	amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
 	amdgpu_ring_fini(ring);
 }
 
@@ -488,6 +485,19 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
 	return amdgpu_ring_test_helper(kiq_ring);
 }
 
+int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
+					int queue_bit)
+{
+	int mec, pipe, queue;
+	int set_resource_bit = 0;
+
+	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
+
+	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
+
+	return set_resource_bit;
+}
+
 int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
 {
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -510,7 +520,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
 			break;
 		}
 
-		queue_mask |= (1ull << i);
+		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
 	}
 
 	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
@@ -670,16 +680,23 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 {
 	signed long r, cnt = 0;
 	unsigned long flags;
-	uint32_t seq;
+	uint32_t seq, reg_val_offs = 0, value = 0;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_ring *ring = &kiq->ring;
 
 	BUG_ON(!ring->funcs->emit_rreg);
 
 	spin_lock_irqsave(&kiq->ring_lock, flags);
+	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
+		pr_err("critical bug! too many kiq readers\n");
+		goto failed_unlock;
+	}
 	amdgpu_ring_alloc(ring, 32);
-	amdgpu_ring_emit_rreg(ring, reg);
-	amdgpu_fence_emit_polling(ring, &seq);
+	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
+	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+	if (r)
+		goto failed_undo;
+
 	amdgpu_ring_commit(ring);
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
@@ -705,9 +722,18 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 	if (cnt > MAX_KIQ_REG_TRY)
 		goto failed_kiq_read;
 
-	return adev->wb.wb[kiq->reg_val_offs];
+	mb();
+	value = adev->wb.wb[reg_val_offs];
+	amdgpu_device_wb_free(adev, reg_val_offs);
+	return value;
 
+failed_undo:
+	amdgpu_ring_undo(ring);
+failed_unlock:
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 failed_kiq_read:
+	if (reg_val_offs)
+		amdgpu_device_wb_free(adev, reg_val_offs);
 	pr_err("failed to read reg:%x\n", reg);
 	return ~0;
 }
@@ -725,7 +751,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 	spin_lock_irqsave(&kiq->ring_lock, flags);
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_emit_wreg(ring, reg, v);
-	amdgpu_fence_emit_polling(ring, &seq);
+	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+	if (r)
+		goto failed_undo;
+
 	amdgpu_ring_commit(ring);
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
@@ -754,6 +783,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 
 	return;
 
+failed_undo:
+	amdgpu_ring_undo(ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 failed_kiq_write:
 	pr_err("failed to write reg:%x\n", reg);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 5825692d07e4..d43c11671a38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -103,7 +103,6 @@ struct amdgpu_kiq {
 	struct amdgpu_ring	ring;
 	struct amdgpu_irq_src	irq;
 	const struct kiq_pm4_funcs *pmf;
-	uint32_t			reg_val_offs;
 };
 
 /*
@@ -286,13 +285,8 @@ struct amdgpu_gfx {
 	bool				me_fw_write_wait;
 	bool				cp_fw_write_wait;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
-	struct drm_gpu_scheduler	*gfx_sched[AMDGPU_MAX_GFX_RINGS];
-	uint32_t			num_gfx_sched;
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
-	struct drm_gpu_scheduler        **compute_prio_sched[AMDGPU_GFX_PIPE_PRIO_MAX];
-	struct drm_gpu_scheduler	*compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
-	uint32_t                        num_compute_sched[AMDGPU_GFX_PIPE_PRIO_MAX];
 	unsigned			num_compute_rings;
 	struct amdgpu_irq_src		eop_irq;
 	struct amdgpu_irq_src		priv_reg_irq;
@@ -370,7 +364,7 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev);
 
 int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
 				int pipe, int queue);
-void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
 				 int *mec, int *pipe, int *queue);
 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
 				     int pipe, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 5884ab590486..acabb57aa8af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -136,8 +136,8 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
 /**
  * amdgpu_gmc_vram_location - try to find VRAM location
  *
- * @adev: amdgpu device structure holding all necessary informations
- * @mc: memory controller structure holding memory informations
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
  * @base: base address at which to put VRAM
  *
  * Function will try to place VRAM at base address provided
@@ -165,8 +165,8 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
 /**
  * amdgpu_gmc_gart_location - try to find GART location
  *
- * @adev: amdgpu device structure holding all necessary informations
- * @mc: memory controller structure holding memory informations
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
  *
  * Function will place try to place GART before or after VRAM.
  *
@@ -207,8 +207,8 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
 
 /**
  * amdgpu_gmc_agp_location - try to find AGP location
- * @adev: amdgpu device structure holding all necessary informations
- * @mc: memory controller structure holding memory informations
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
  *
  * Function will place try to find a place for the AGP BAR in the MC address
  * space.
@@ -373,3 +373,38 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
 
 	return 0;
 }
+
+/**
+ * amdgpu_tmz_set -- check and set if a device supports TMZ
+ * @adev: amdgpu_device pointer
+ *
+ * Check and set if an the device @adev supports Trusted Memory
+ * Zones (TMZ).
+ */
+void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_RAVEN:
+	case CHIP_RENOIR:
+	case CHIP_NAVI10:
+	case CHIP_NAVI14:
+	case CHIP_NAVI12:
+		/* Don't enable it by default yet.
+		 */
+		if (amdgpu_tmz < 1) {
+			adev->gmc.tmz_enabled = false;
+			dev_info(adev->dev,
+				 "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n");
+		} else {
+			adev->gmc.tmz_enabled = true;
+			dev_info(adev->dev,
+				 "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n");
+		}
+		break;
+	default:
+		adev->gmc.tmz_enabled = false;
+		dev_warn(adev->dev,
+			 "Trusted Memory Zone (TMZ) feature not supported\n");
+		break;
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 7546da0cc70c..2bd9423c1dab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -213,6 +213,8 @@ struct amdgpu_gmc {
 	} fault_hash[AMDGPU_GMC_FAULT_HASH_SIZE];
 	uint64_t		last_fault:AMDGPU_GMC_FAULT_RING_ORDER;
 
+	bool tmz_enabled;
+
 	const struct amdgpu_gmc_funcs	*gmc_funcs;
 
 	struct amdgpu_xgmi xgmi;
@@ -276,4 +278,6 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
 
+extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index ccbd7acfc4cb..b91853fd66d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -61,12 +61,13 @@
  * Returns 0 on success, error on failure.
  */
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		  unsigned size, struct amdgpu_ib *ib)
+		  unsigned size, enum amdgpu_ib_pool_type pool_type,
+		  struct amdgpu_ib *ib)
 {
 	int r;
 
 	if (size) {
-		r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
+		r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
 				      &ib->sa_bo, size, 256);
 		if (r) {
 			dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
@@ -131,6 +132,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	uint64_t fence_ctx;
 	uint32_t status = 0, alloc_size;
 	unsigned fence_flags = 0;
+	bool secure;
 
 	unsigned i;
 	int r = 0;
@@ -159,6 +161,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return -EINVAL;
 	}
 
+	if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
+	    (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)) {
+		dev_err(adev->dev, "secure submissions not supported on compute rings\n");
+		return -EINVAL;
+	}
+
 	alloc_size = ring->funcs->emit_frame_size + num_ibs *
 		ring->funcs->emit_ib_size;
 
@@ -181,6 +189,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		dma_fence_put(tmp);
 	}
 
+	if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync)
+		ring->funcs->emit_mem_sync(ring);
+
 	if (ring->funcs->insert_start)
 		ring->funcs->insert_start(ring);
 
@@ -215,6 +226,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_emit_cntxcntl(ring, status);
 	}
 
+	/* Setup initial TMZiness and send it off.
+	 */
+	secure = false;
+	if (job && ring->funcs->emit_frame_cntl) {
+		secure = ib->flags & AMDGPU_IB_FLAGS_SECURE;
+		amdgpu_ring_emit_frame_cntl(ring, true, secure);
+	}
+
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
 
@@ -226,12 +245,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
 			continue;
 
+		if (job && ring->funcs->emit_frame_cntl) {
+			if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
+				amdgpu_ring_emit_frame_cntl(ring, false, secure);
+				secure = !secure;
+				amdgpu_ring_emit_frame_cntl(ring, true, secure);
+			}
+		}
+
 		amdgpu_ring_emit_ib(ring, job, ib, status);
 		status &= ~AMDGPU_HAVE_CTX_SWITCH;
 	}
 
-	if (ring->funcs->emit_tmz)
-		amdgpu_ring_emit_tmz(ring, false);
+	if (job && ring->funcs->emit_frame_cntl)
+		amdgpu_ring_emit_frame_cntl(ring, false, secure);
 
 #ifdef CONFIG_X86_64
 	if (!(adev->flags & AMD_IS_APU))
@@ -280,22 +307,32 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
  */
 int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 {
-	int r;
+	unsigned size;
+	int r, i;
 
-	if (adev->ib_pool_ready) {
+	if (adev->ib_pool_ready)
 		return 0;
-	}
-	r = amdgpu_sa_bo_manager_init(adev, &adev->ring_tmp_bo,
-				      AMDGPU_IB_POOL_SIZE*64*1024,
-				      AMDGPU_GPU_PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT);
-	if (r) {
-		return r;
-	}
 
+	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
+		if (i == AMDGPU_IB_POOL_DIRECT)
+			size = PAGE_SIZE * 2;
+		else
+			size = AMDGPU_IB_POOL_SIZE;
+
+		r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
+					      size, AMDGPU_GPU_PAGE_SIZE,
+					      AMDGPU_GEM_DOMAIN_GTT);
+		if (r)
+			goto error;
+	}
 	adev->ib_pool_ready = true;
 
 	return 0;
+
+error:
+	while (i--)
+		amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]);
+	return r;
 }
 
 /**
@@ -308,10 +345,14 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
  */
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
 {
-	if (adev->ib_pool_ready) {
-		amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
-		adev->ib_pool_ready = false;
-	}
+	int i;
+
+	if (!adev->ib_pool_ready)
+		return;
+
+	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++)
+		amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]);
+	adev->ib_pool_ready = false;
 }
 
 /**
@@ -326,9 +367,9 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
  */
 int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 {
-	unsigned i;
-	int r, ret = 0;
 	long tmo_gfx, tmo_mm;
+	int r, ret = 0;
+	unsigned i;
 
 	tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
 	if (amdgpu_sriov_vf(adev)) {
@@ -406,10 +447,16 @@ static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 
-	amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo, m);
+	seq_printf(m, "--------------------- DELAYED --------------------- \n");
+	amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED],
+				     m);
+	seq_printf(m, "-------------------- IMMEDIATE -------------------- \n");
+	amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE],
+				     m);
+	seq_printf(m, "--------------------- DIRECT ---------------------- \n");
+	amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m);
 
 	return 0;
-
 }
 
 static const struct drm_info_list amdgpu_debugfs_sa_list[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 3a67f6c046d4..fe92dcd94d4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	    !dma_fence_is_later(updates, (*id)->flushed_updates))
 	    updates = NULL;
 
-	if ((*id)->owner != vm->direct.fence_context ||
+	if ((*id)->owner != vm->immediate.fence_context ||
 	    job->vm_pd_addr != (*id)->pd_gpu_addr ||
 	    updates || !(*id)->last_flush ||
 	    ((*id)->last_flush->context != fence_context &&
@@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 		struct dma_fence *flushed;
 
 		/* Check all the prerequisites to using this VMID */
-		if ((*id)->owner != vm->direct.fence_context)
+		if ((*id)->owner != vm->immediate.fence_context)
 			continue;
 
 		if ((*id)->pd_gpu_addr != job->vm_pd_addr)
@@ -448,7 +448,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	}
 
 	id->pd_gpu_addr = job->vm_pd_addr;
-	id->owner = vm->direct.fence_context;
+	id->owner = vm->immediate.fence_context;
 
 	if (job->vm_needs_flush) {
 		dma_fence_put(id->last_flush);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 5ed4227f304b..0cc4c67f95f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -260,7 +260,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 		nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
 		if (nvec > 0) {
 			adev->irq.msi_enabled = true;
-			dev_dbg(adev->dev, "amdgpu: using MSI/MSI-X.\n");
+			dev_dbg(adev->dev, "using MSI/MSI-X.\n");
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4981e443a884..47207188c569 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
 	struct amdgpu_job *job = to_amdgpu_job(s_job);
 	struct amdgpu_task_info ti;
+	struct amdgpu_device *adev = ring->adev;
 
 	memset(&ti, 0, sizeof(struct amdgpu_task_info));
 
@@ -49,10 +50,13 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 	DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
 		  ti.process_name, ti.tgid, ti.task_name, ti.pid);
 
-	if (amdgpu_device_should_recover_gpu(ring->adev))
+	if (amdgpu_device_should_recover_gpu(ring->adev)) {
 		amdgpu_device_gpu_recover(ring->adev, job);
-	else
+	} else {
 		drm_sched_suspend_timeout(&ring->sched);
+		if (amdgpu_sriov_vf(adev))
+			adev->virt.tdr_debug = true;
+	}
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -87,7 +91,8 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 }
 
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
-			     struct amdgpu_job **job)
+		enum amdgpu_ib_pool_type pool_type,
+		struct amdgpu_job **job)
 {
 	int r;
 
@@ -95,7 +100,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 	if (r)
 		return r;
 
-	r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
+	r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]);
 	if (r)
 		kfree(*job);
 
@@ -140,7 +145,6 @@ void amdgpu_job_free(struct amdgpu_job *job)
 int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 		      void *owner, struct dma_fence **f)
 {
-	enum drm_sched_priority priority;
 	int r;
 
 	if (!f)
@@ -152,7 +156,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 
 	*f = dma_fence_get(&job->base.s_fence->finished);
 	amdgpu_job_free_resources(job);
-	priority = job->base.s_priority;
 	drm_sched_entity_push_job(&job->base, entity);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 3f7b8433d179..81caac9b958a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -38,6 +38,7 @@
 #define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0)
 
 struct amdgpu_fence;
+enum amdgpu_ib_pool_type;
 
 struct amdgpu_job {
 	struct drm_sched_job    base;
@@ -61,14 +62,12 @@ struct amdgpu_job {
 	/* user fence handling */
 	uint64_t		uf_addr;
 	uint64_t		uf_sequence;
-
 };
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 		     struct amdgpu_job **job, struct amdgpu_vm *vm);
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
-			     struct amdgpu_job **job);
-
+		enum amdgpu_ib_pool_type pool, struct amdgpu_job **job);
 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
 int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 5727f00afc8e..d31d65e6b039 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -144,7 +144,8 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
 	const unsigned ib_size_dw = 16;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+					AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index bd9ef9cc86de..5131a0a1bc8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -43,8 +43,6 @@ struct amdgpu_jpeg {
 	uint8_t	num_jpeg_inst;
 	struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
 	struct amdgpu_jpeg_reg internal;
-	struct drm_gpu_scheduler *jpeg_sched[AMDGPU_MAX_JPEG_INSTANCES];
-	uint32_t num_jpeg_sched;
 	unsigned harvest_config;
 	struct delayed_work idle_work;
 	enum amd_powergating_state cur_state;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index fd1dc3236eca..d7e17e34fee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -183,18 +183,18 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	/* Call ACPI methods: require modeset init
 	 * but failure is not fatal
 	 */
-	if (!r) {
-		acpi_status = amdgpu_acpi_init(adev);
-		if (acpi_status)
-			dev_dbg(&dev->pdev->dev,
-				"Error during ACPI methods call\n");
-	}
+
+	acpi_status = amdgpu_acpi_init(adev);
+	if (acpi_status)
+		dev_dbg(&dev->pdev->dev, "Error during ACPI methods call\n");
 
 	if (adev->runpm) {
-		dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP);
+		/* only need to skip on ATPX */
+		if (amdgpu_device_supports_boco(dev) &&
+		    !amdgpu_is_atpx_hybrid())
+			dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
 		pm_runtime_use_autosuspend(dev->dev);
 		pm_runtime_set_autosuspend_delay(dev->dev, 5000);
-		pm_runtime_set_active(dev->dev);
 		pm_runtime_allow(dev->dev);
 		pm_runtime_mark_last_busy(dev->dev);
 		pm_runtime_put_autosuspend(dev->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 919bd566ba3c..edaac242ff85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -77,7 +77,6 @@ struct amdgpu_nbio_funcs {
 				      u32 *flags);
 	void (*ih_control)(struct amdgpu_device *adev);
 	void (*init_registers)(struct amdgpu_device *adev);
-	void (*detect_hw_virt)(struct amdgpu_device *adev);
 	void (*remap_hdp_registers)(struct amdgpu_device *adev);
 	void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
 	void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index c687f5415b3f..3d822eba9a5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -753,7 +753,7 @@ int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
 
 	return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
 				  amdgpu_bo_size(shadow), NULL, fence,
-				  true, false);
+				  true, false, false);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 5e39ecd8cc28..7d41f7b9a340 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -229,6 +229,17 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
 	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
 }
 
+/**
+ * amdgpu_bo_encrypted - test if the BO is encrypted
+ * @bo: pointer to a buffer object
+ *
+ * Return true if the buffer object is encrypted, false otherwise.
+ */
+static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo)
+{
+	return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED;
+}
+
 bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index abe94a55ecad..d7646cbce346 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -154,18 +154,15 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso
  *
  */
 
-static ssize_t amdgpu_get_dpm_state(struct device *dev,
-				    struct device_attribute *attr,
-				    char *buf)
+static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type pm;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -189,19 +186,16 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
 			(pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance");
 }
 
-static ssize_t amdgpu_set_dpm_state(struct device *dev,
-				    struct device_attribute *attr,
-				    const char *buf,
-				    size_t count)
+static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf,
+					  size_t count)
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type  state;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return -EINVAL;
-
 	if (strncmp("battery", buf, strlen("battery")) == 0)
 		state = POWER_STATE_TYPE_BATTERY;
 	else if (strncmp("balanced", buf, strlen("balanced")) == 0)
@@ -294,18 +288,15 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
  *
  */
 
-static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
-						struct device_attribute *attr,
-								char *buf)
+static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
+							    struct device_attribute *attr,
+							    char *buf)
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_dpm_forced_level level = 0xff;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -332,10 +323,10 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
 			"unknown");
 }
 
-static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
-						       struct device_attribute *attr,
-						       const char *buf,
-						       size_t count)
+static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
+							    struct device_attribute *attr,
+							    const char *buf,
+							    size_t count)
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
@@ -343,9 +334,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 	enum amd_dpm_forced_level current_level = 0xff;
 	int ret = 0;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return -EINVAL;
-
 	if (strncmp("low", buf, strlen("low")) == 0) {
 		level = AMD_DPM_FORCED_LEVEL_LOW;
 	} else if (strncmp("high", buf, strlen("high")) == 0) {
@@ -383,6 +371,15 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 		return count;
 	}
 
+	if (adev->asic_type == CHIP_RAVEN) {
+		if (!(adev->apu_flags & AMD_APU_IS_RAVEN2)) {
+			if (current_level != AMD_DPM_FORCED_LEVEL_MANUAL && level == AMD_DPM_FORCED_LEVEL_MANUAL)
+				amdgpu_gfx_off_ctrl(adev, false);
+			else if (current_level == AMD_DPM_FORCED_LEVEL_MANUAL && level != AMD_DPM_FORCED_LEVEL_MANUAL)
+				amdgpu_gfx_off_ctrl(adev, true);
+		}
+	}
+
 	/* profile_exit setting is valid only when current mode is in profile mode */
 	if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD |
 	    AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK |
@@ -444,8 +441,11 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
 		ret = smu_get_power_num_states(&adev->smu, &data);
 		if (ret)
 			return ret;
-	} else if (adev->powerplay.pp_funcs->get_pp_num_states)
+	} else if (adev->powerplay.pp_funcs->get_pp_num_states) {
 		amdgpu_dpm_get_pp_num_states(adev, &data);
+	} else {
+		memset(&data, 0, sizeof(data));
+	}
 
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
@@ -472,9 +472,6 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
 	enum amd_pm_state_type pm = 0;
 	int i = 0, ret = 0;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -511,9 +508,6 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	if (adev->pp_force_state_enabled)
 		return amdgpu_get_pp_cur_state(dev, attr, buf);
 	else
@@ -531,9 +525,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 	unsigned long idx;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return -EINVAL;
-
 	if (strlen(buf) == 1)
 		adev->pp_force_state_enabled = false;
 	else if (is_support_sw_smu(adev))
@@ -589,9 +580,6 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
 	char *table = NULL;
 	int size, ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -631,9 +619,6 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret = 0;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return -EINVAL;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -736,9 +721,6 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 	const char delimiter[3] = {' ', '\n', '\0'};
 	uint32_t type;
 
-	if (amdgpu_sriov_vf(adev))
-		return -EINVAL;
-
 	if (count > 127)
 		return -EINVAL;
 
@@ -828,9 +810,6 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -870,19 +849,16 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
  * the corresponding bit from original ppfeature masks and input the
  * new ppfeature masks.
  */
-static ssize_t amdgpu_set_pp_feature_status(struct device *dev,
-		struct device_attribute *attr,
-		const char *buf,
-		size_t count)
+static ssize_t amdgpu_set_pp_features(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf,
+				      size_t count)
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	uint64_t featuremask;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev))
-		return -EINVAL;
-
 	ret = kstrtou64(buf, 0, &featuremask);
 	if (ret)
 		return -EINVAL;
@@ -914,18 +890,15 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev,
 	return count;
 }
 
-static ssize_t amdgpu_get_pp_feature_status(struct device *dev,
-		struct device_attribute *attr,
-		char *buf)
+static ssize_t amdgpu_get_pp_features(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -982,9 +955,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -1048,9 +1018,6 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return -EINVAL;
-
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
@@ -1082,9 +1049,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -1112,9 +1076,6 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 	uint32_t mask = 0;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-			return -EINVAL;
-
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
@@ -1146,9 +1107,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -1176,9 +1134,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return -EINVAL;
-
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
@@ -1212,9 +1167,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -1242,9 +1194,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return -EINVAL;
-
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
@@ -1278,9 +1227,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -1308,9 +1254,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_sriov_vf(adev))
-		return -EINVAL;
-
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
@@ -1344,9 +1287,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -1374,9 +1314,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return -EINVAL;
-
 	ret = amdgpu_read_mask(buf, count, &mask);
 	if (ret)
 		return ret;
@@ -1410,9 +1347,6 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
 	uint32_t value = 0;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -1438,9 +1372,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 	int ret;
 	long int value;
 
-	if (amdgpu_sriov_vf(adev))
-		return -EINVAL;
-
 	ret = kstrtol(buf, 0, &value);
 
 	if (ret)
@@ -1479,9 +1410,6 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
 	uint32_t value = 0;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -1507,9 +1435,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 	int ret;
 	long int value;
 
-	if (amdgpu_sriov_vf(adev))
-		return 0;
-
 	ret = kstrtol(buf, 0, &value);
 
 	if (ret)
@@ -1568,9 +1493,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
 		return ret;
@@ -1612,9 +1534,6 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
 	if (ret)
 		return -EINVAL;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return -EINVAL;
-
 	if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
 		if (count < 2 || count > 127)
 			return -EINVAL;
@@ -1660,17 +1579,14 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
  * The SMU firmware computes a percentage of load based on the
  * aggregate activity level in the IP cores.
  */
-static ssize_t amdgpu_get_busy_percent(struct device *dev,
-		struct device_attribute *attr,
-		char *buf)
+static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	int r, value, size = sizeof(value);
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	r = pm_runtime_get_sync(ddev->dev);
 	if (r < 0)
 		return r;
@@ -1696,17 +1612,14 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
  * The SMU firmware computes a percentage of load based on the
  * aggregate activity level in the IP cores.
  */
-static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
-		struct device_attribute *attr,
-		char *buf)
+static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	int r, value, size = sizeof(value);
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	r = pm_runtime_get_sync(ddev->dev);
 	if (r < 0)
 		return r;
@@ -1742,11 +1655,14 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
-	uint64_t count0, count1;
+	uint64_t count0 = 0, count1 = 0;
 	int ret;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
+	if (adev->flags & AMD_IS_APU)
+		return -ENODATA;
+
+	if (!adev->asic_funcs->get_pcie_usage)
+		return -ENODATA;
 
 	ret = pm_runtime_get_sync(ddev->dev);
 	if (ret < 0)
@@ -1778,66 +1694,191 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	if (adev->unique_id)
 		return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
 
 	return 0;
 }
 
-static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
-static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
-		   amdgpu_get_dpm_forced_performance_level,
-		   amdgpu_set_dpm_forced_performance_level);
-static DEVICE_ATTR(pp_num_states, S_IRUGO, amdgpu_get_pp_num_states, NULL);
-static DEVICE_ATTR(pp_cur_state, S_IRUGO, amdgpu_get_pp_cur_state, NULL);
-static DEVICE_ATTR(pp_force_state, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_force_state,
-		amdgpu_set_pp_force_state);
-static DEVICE_ATTR(pp_table, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_table,
-		amdgpu_set_pp_table);
-static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_dpm_sclk,
-		amdgpu_set_pp_dpm_sclk);
-static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_dpm_mclk,
-		amdgpu_set_pp_dpm_mclk);
-static DEVICE_ATTR(pp_dpm_socclk, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_dpm_socclk,
-		amdgpu_set_pp_dpm_socclk);
-static DEVICE_ATTR(pp_dpm_fclk, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_dpm_fclk,
-		amdgpu_set_pp_dpm_fclk);
-static DEVICE_ATTR(pp_dpm_dcefclk, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_dpm_dcefclk,
-		amdgpu_set_pp_dpm_dcefclk);
-static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_dpm_pcie,
-		amdgpu_set_pp_dpm_pcie);
-static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_sclk_od,
-		amdgpu_set_pp_sclk_od);
-static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_mclk_od,
-		amdgpu_set_pp_mclk_od);
-static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_power_profile_mode,
-		amdgpu_set_pp_power_profile_mode);
-static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_od_clk_voltage,
-		amdgpu_set_pp_od_clk_voltage);
-static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
-		amdgpu_get_busy_percent, NULL);
-static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
-		amdgpu_get_memory_busy_percent, NULL);
-static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
-static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR,
-		amdgpu_get_pp_feature_status,
-		amdgpu_set_pp_feature_status);
-static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
+static struct amdgpu_device_attr amdgpu_device_attrs[] = {
+	AMDGPU_DEVICE_ATTR_RW(power_dpm_state,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+	AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level,	ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+	AMDGPU_DEVICE_ATTR_RO(pp_num_states,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RO(pp_cur_state,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RW(pp_force_state,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RW(pp_table,					ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RW(pp_dpm_sclk,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+	AMDGPU_DEVICE_ATTR_RW(pp_dpm_mclk,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+	AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+	AMDGPU_DEVICE_ATTR_RW(pp_dpm_fclk,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+	AMDGPU_DEVICE_ATTR_RW(pp_dpm_dcefclk,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RW(pp_dpm_pcie,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RW(pp_sclk_od,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RW(pp_mclk_od,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RW(pp_power_profile_mode,			ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RW(pp_od_clk_voltage,			ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RO(gpu_busy_percent,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RO(mem_busy_percent,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RO(pcie_bw,					ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RW(pp_features,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RO(unique_id,				ATTR_FLAG_BASIC),
+};
+
+static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
+			       uint32_t mask, enum amdgpu_device_attr_states *states)
+{
+	struct device_attribute *dev_attr = &attr->dev_attr;
+	const char *attr_name = dev_attr->attr.name;
+	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
+	enum amd_asic_type asic_type = adev->asic_type;
+
+	if (!(attr->flags & mask)) {
+		*states = ATTR_STATE_UNSUPPORTED;
+		return 0;
+	}
+
+#define DEVICE_ATTR_IS(_name)	(!strcmp(attr_name, #_name))
+
+	if (DEVICE_ATTR_IS(pp_dpm_socclk)) {
+		if (asic_type < CHIP_VEGA10)
+			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) {
+		if (asic_type < CHIP_VEGA10 || asic_type == CHIP_ARCTURUS)
+			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(pp_dpm_fclk)) {
+		if (asic_type < CHIP_VEGA20)
+			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(pp_dpm_pcie)) {
+		if (asic_type == CHIP_ARCTURUS)
+			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(pp_od_clk_voltage)) {
+		*states = ATTR_STATE_UNSUPPORTED;
+		if ((is_support_sw_smu(adev) && adev->smu.od_enabled) ||
+		    (!is_support_sw_smu(adev) && hwmgr->od_enabled))
+			*states = ATTR_STATE_SUPPORTED;
+	} else if (DEVICE_ATTR_IS(mem_busy_percent)) {
+		if (adev->flags & AMD_IS_APU || asic_type == CHIP_VEGA10)
+			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(pcie_bw)) {
+		/* PCIe Perf counters won't work on APU nodes */
+		if (adev->flags & AMD_IS_APU)
+			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(unique_id)) {
+		if (!adev->unique_id)
+			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(pp_features)) {
+		if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10)
+			*states = ATTR_STATE_UNSUPPORTED;
+	}
+
+	if (asic_type == CHIP_ARCTURUS) {
+		/* Arcturus does not support standalone mclk/socclk/fclk level setting */
+		if (DEVICE_ATTR_IS(pp_dpm_mclk) ||
+		    DEVICE_ATTR_IS(pp_dpm_socclk) ||
+		    DEVICE_ATTR_IS(pp_dpm_fclk)) {
+			dev_attr->attr.mode &= ~S_IWUGO;
+			dev_attr->store = NULL;
+		}
+	}
+
+#undef DEVICE_ATTR_IS
+
+	return 0;
+}
+
+
+static int amdgpu_device_attr_create(struct amdgpu_device *adev,
+				     struct amdgpu_device_attr *attr,
+				     uint32_t mask, struct list_head *attr_list)
+{
+	int ret = 0;
+	struct device_attribute *dev_attr = &attr->dev_attr;
+	const char *name = dev_attr->attr.name;
+	enum amdgpu_device_attr_states attr_states = ATTR_STATE_SUPPORTED;
+	struct amdgpu_device_attr_entry *attr_entry;
+
+	int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
+			   uint32_t mask, enum amdgpu_device_attr_states *states) = default_attr_update;
+
+	BUG_ON(!attr);
+
+	attr_update = attr->attr_update ? attr_update : default_attr_update;
+
+	ret = attr_update(adev, attr, mask, &attr_states);
+	if (ret) {
+		dev_err(adev->dev, "failed to update device file %s, ret = %d\n",
+			name, ret);
+		return ret;
+	}
+
+	if (attr_states == ATTR_STATE_UNSUPPORTED)
+		return 0;
+
+	ret = device_create_file(adev->dev, dev_attr);
+	if (ret) {
+		dev_err(adev->dev, "failed to create device file %s, ret = %d\n",
+			name, ret);
+	}
+
+	attr_entry = kmalloc(sizeof(*attr_entry), GFP_KERNEL);
+	if (!attr_entry)
+		return -ENOMEM;
+
+	attr_entry->attr = attr;
+	INIT_LIST_HEAD(&attr_entry->entry);
+
+	list_add_tail(&attr_entry->entry, attr_list);
+
+	return ret;
+}
+
+static void amdgpu_device_attr_remove(struct amdgpu_device *adev, struct amdgpu_device_attr *attr)
+{
+	struct device_attribute *dev_attr = &attr->dev_attr;
+
+	device_remove_file(adev->dev, dev_attr);
+}
+
+static void amdgpu_device_attr_remove_groups(struct amdgpu_device *adev,
+					     struct list_head *attr_list);
+
+static int amdgpu_device_attr_create_groups(struct amdgpu_device *adev,
+					    struct amdgpu_device_attr *attrs,
+					    uint32_t counts,
+					    uint32_t mask,
+					    struct list_head *attr_list)
+{
+	int ret = 0;
+	uint32_t i = 0;
+
+	for (i = 0; i < counts; i++) {
+		ret = amdgpu_device_attr_create(adev, &attrs[i], mask, attr_list);
+		if (ret)
+			goto failed;
+	}
+
+	return 0;
+
+failed:
+	amdgpu_device_attr_remove_groups(adev, attr_list);
+
+	return ret;
+}
+
+static void amdgpu_device_attr_remove_groups(struct amdgpu_device *adev,
+					     struct list_head *attr_list)
+{
+	struct amdgpu_device_attr_entry *entry, *entry_tmp;
+
+	if (list_empty(attr_list))
+		return ;
+
+	list_for_each_entry_safe(entry, entry_tmp, attr_list, entry) {
+		amdgpu_device_attr_remove(adev, entry->attr);
+		list_del(&entry->entry);
+		kfree(entry);
+	}
+}
 
 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
@@ -3238,8 +3279,8 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio
 
 int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 {
-	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
 	int ret;
+	uint32_t mask = 0;
 
 	if (adev->pm.sysfs_initialized)
 		return 0;
@@ -3247,6 +3288,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 	if (adev->pm.dpm_enabled == 0)
 		return 0;
 
+	INIT_LIST_HEAD(&adev->pm.pm_attr_list);
+
 	adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
 								   DRIVER_NAME, adev,
 								   hwmon_groups);
@@ -3257,160 +3300,26 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 		return ret;
 	}
 
-	ret = device_create_file(adev->dev, &dev_attr_power_dpm_state);
-	if (ret) {
-		DRM_ERROR("failed to create device file for dpm state\n");
-		return ret;
-	}
-	ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
-	if (ret) {
-		DRM_ERROR("failed to create device file for dpm state\n");
-		return ret;
-	}
-
-
-	ret = device_create_file(adev->dev, &dev_attr_pp_num_states);
-	if (ret) {
-		DRM_ERROR("failed to create device file pp_num_states\n");
-		return ret;
-	}
-	ret = device_create_file(adev->dev, &dev_attr_pp_cur_state);
-	if (ret) {
-		DRM_ERROR("failed to create device file pp_cur_state\n");
-		return ret;
-	}
-	ret = device_create_file(adev->dev, &dev_attr_pp_force_state);
-	if (ret) {
-		DRM_ERROR("failed to create device file pp_force_state\n");
-		return ret;
-	}
-	ret = device_create_file(adev->dev, &dev_attr_pp_table);
-	if (ret) {
-		DRM_ERROR("failed to create device file pp_table\n");
-		return ret;
-	}
-
-	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
-	if (ret) {
-		DRM_ERROR("failed to create device file pp_dpm_sclk\n");
-		return ret;
-	}
-
-	/* Arcturus does not support standalone mclk/socclk/fclk level setting */
-	if (adev->asic_type == CHIP_ARCTURUS) {
-		dev_attr_pp_dpm_mclk.attr.mode &= ~S_IWUGO;
-		dev_attr_pp_dpm_mclk.store = NULL;
-
-		dev_attr_pp_dpm_socclk.attr.mode &= ~S_IWUGO;
-		dev_attr_pp_dpm_socclk.store = NULL;
-
-		dev_attr_pp_dpm_fclk.attr.mode &= ~S_IWUGO;
-		dev_attr_pp_dpm_fclk.store = NULL;
+	switch (amdgpu_virt_get_sriov_vf_mode(adev)) {
+	case SRIOV_VF_MODE_ONE_VF:
+		mask = ATTR_FLAG_ONEVF;
+		break;
+	case SRIOV_VF_MODE_MULTI_VF:
+		mask = 0;
+		break;
+	case SRIOV_VF_MODE_BARE_METAL:
+	default:
+		mask = ATTR_FLAG_MASK_ALL;
+		break;
 	}
 
-	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
-	if (ret) {
-		DRM_ERROR("failed to create device file pp_dpm_mclk\n");
-		return ret;
-	}
-	if (adev->asic_type >= CHIP_VEGA10) {
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_socclk);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_socclk\n");
-			return ret;
-		}
-		if (adev->asic_type != CHIP_ARCTURUS) {
-			ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
-			if (ret) {
-				DRM_ERROR("failed to create device file pp_dpm_dcefclk\n");
-				return ret;
-			}
-		}
-	}
-	if (adev->asic_type >= CHIP_VEGA20) {
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_fclk);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_fclk\n");
-			return ret;
-		}
-	}
-	if (adev->asic_type != CHIP_ARCTURUS) {
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_pcie\n");
-			return ret;
-		}
-	}
-	ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od);
-	if (ret) {
-		DRM_ERROR("failed to create device file pp_sclk_od\n");
-		return ret;
-	}
-	ret = device_create_file(adev->dev, &dev_attr_pp_mclk_od);
-	if (ret) {
-		DRM_ERROR("failed to create device file pp_mclk_od\n");
-		return ret;
-	}
-	ret = device_create_file(adev->dev,
-			&dev_attr_pp_power_profile_mode);
-	if (ret) {
-		DRM_ERROR("failed to create device file	"
-				"pp_power_profile_mode\n");
-		return ret;
-	}
-	if ((is_support_sw_smu(adev) && adev->smu.od_enabled) ||
-	    (!is_support_sw_smu(adev) && hwmgr->od_enabled)) {
-		ret = device_create_file(adev->dev,
-				&dev_attr_pp_od_clk_voltage);
-		if (ret) {
-			DRM_ERROR("failed to create device file	"
-					"pp_od_clk_voltage\n");
-			return ret;
-		}
-	}
-	ret = device_create_file(adev->dev,
-			&dev_attr_gpu_busy_percent);
-	if (ret) {
-		DRM_ERROR("failed to create device file	"
-				"gpu_busy_level\n");
-		return ret;
-	}
-	/* APU does not have its own dedicated memory */
-	if (!(adev->flags & AMD_IS_APU) &&
-	     (adev->asic_type != CHIP_VEGA10)) {
-		ret = device_create_file(adev->dev,
-				&dev_attr_mem_busy_percent);
-		if (ret) {
-			DRM_ERROR("failed to create device file	"
-					"mem_busy_percent\n");
-			return ret;
-		}
-	}
-	/* PCIe Perf counters won't work on APU nodes */
-	if (!(adev->flags & AMD_IS_APU)) {
-		ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
-		if (ret) {
-			DRM_ERROR("failed to create device file pcie_bw\n");
-			return ret;
-		}
-	}
-	if (adev->unique_id)
-		ret = device_create_file(adev->dev, &dev_attr_unique_id);
-	if (ret) {
-		DRM_ERROR("failed to create device file unique_id\n");
+	ret = amdgpu_device_attr_create_groups(adev,
+					       amdgpu_device_attrs,
+					       ARRAY_SIZE(amdgpu_device_attrs),
+					       mask,
+					       &adev->pm.pm_attr_list);
+	if (ret)
 		return ret;
-	}
-
-	if ((adev->asic_type >= CHIP_VEGA10) &&
-	    !(adev->flags & AMD_IS_APU)) {
-		ret = device_create_file(adev->dev,
-				&dev_attr_pp_features);
-		if (ret) {
-			DRM_ERROR("failed to create device file	"
-					"pp_features\n");
-			return ret;
-		}
-	}
 
 	adev->pm.sysfs_initialized = true;
 
@@ -3419,51 +3328,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 
 void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 {
-	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
-
 	if (adev->pm.dpm_enabled == 0)
 		return;
 
 	if (adev->pm.int_hwmon_dev)
 		hwmon_device_unregister(adev->pm.int_hwmon_dev);
-	device_remove_file(adev->dev, &dev_attr_power_dpm_state);
-	device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
-
-	device_remove_file(adev->dev, &dev_attr_pp_num_states);
-	device_remove_file(adev->dev, &dev_attr_pp_cur_state);
-	device_remove_file(adev->dev, &dev_attr_pp_force_state);
-	device_remove_file(adev->dev, &dev_attr_pp_table);
-
-	device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
-	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
-	if (adev->asic_type >= CHIP_VEGA10) {
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk);
-		if (adev->asic_type != CHIP_ARCTURUS)
-			device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
-	}
-	if (adev->asic_type != CHIP_ARCTURUS)
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
-	if (adev->asic_type >= CHIP_VEGA20)
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk);
-	device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
-	device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
-	device_remove_file(adev->dev,
-			&dev_attr_pp_power_profile_mode);
-	if ((is_support_sw_smu(adev) && adev->smu.od_enabled) ||
-	    (!is_support_sw_smu(adev) && hwmgr->od_enabled))
-		device_remove_file(adev->dev,
-				&dev_attr_pp_od_clk_voltage);
-	device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
-	if (!(adev->flags & AMD_IS_APU) &&
-	     (adev->asic_type != CHIP_VEGA10))
-		device_remove_file(adev->dev, &dev_attr_mem_busy_percent);
-	if (!(adev->flags & AMD_IS_APU))
-		device_remove_file(adev->dev, &dev_attr_pcie_bw);
-	if (adev->unique_id)
-		device_remove_file(adev->dev, &dev_attr_unique_id);
-	if ((adev->asic_type >= CHIP_VEGA10) &&
-	    !(adev->flags & AMD_IS_APU))
-		device_remove_file(adev->dev, &dev_attr_pp_features);
+
+	amdgpu_device_attr_remove_groups(adev, &adev->pm.pm_attr_list);
 }
 
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
index 5db0ef86e84c..d9ae2b49a402 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
@@ -30,6 +30,55 @@ struct cg_flag_name
 	const char *name;
 };
 
+enum amdgpu_device_attr_flags {
+	ATTR_FLAG_BASIC = (1 << 0),
+	ATTR_FLAG_ONEVF = (1 << 16),
+};
+
+#define ATTR_FLAG_TYPE_MASK	(0x0000ffff)
+#define ATTR_FLAG_MODE_MASK	(0xffff0000)
+#define ATTR_FLAG_MASK_ALL	(0xffffffff)
+
+enum amdgpu_device_attr_states {
+	ATTR_STATE_UNSUPPORTED = 0,
+	ATTR_STATE_SUPPORTED,
+};
+
+struct amdgpu_device_attr {
+	struct device_attribute dev_attr;
+	enum amdgpu_device_attr_flags flags;
+	int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
+			   uint32_t mask, enum amdgpu_device_attr_states *states);
+
+};
+
+struct amdgpu_device_attr_entry {
+	struct list_head entry;
+	struct amdgpu_device_attr *attr;
+};
+
+#define to_amdgpu_device_attr(_dev_attr) \
+	container_of(_dev_attr, struct amdgpu_device_attr, dev_attr)
+
+#define __AMDGPU_DEVICE_ATTR(_name, _mode, _show, _store, _flags, ...)	\
+	{ .dev_attr = __ATTR(_name, _mode, _show, _store),		\
+	  .flags = _flags,						\
+	  ##__VA_ARGS__, }
+
+#define AMDGPU_DEVICE_ATTR(_name, _mode, _flags, ...)			\
+	__AMDGPU_DEVICE_ATTR(_name, _mode,				\
+			     amdgpu_get_##_name, amdgpu_set_##_name,	\
+			     _flags, ##__VA_ARGS__)
+
+#define AMDGPU_DEVICE_ATTR_RW(_name, _flags, ...)			\
+	AMDGPU_DEVICE_ATTR(_name, S_IRUGO | S_IWUSR,			\
+			   _flags, ##__VA_ARGS__)
+
+#define AMDGPU_DEVICE_ATTR_RO(_name, _flags, ...)			\
+	__AMDGPU_DEVICE_ATTR(_name, S_IRUGO,				\
+			     amdgpu_get_##_name, NULL,			\
+			     _flags, ##__VA_ARGS__)
+
 void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev);
 int amdgpu_pm_sysfs_init(struct amdgpu_device *adev);
 int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index deaa26808841..7301fdcfb8bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -37,11 +37,11 @@
 
 #include "amdgpu_ras.h"
 
-static void psp_set_funcs(struct amdgpu_device *adev);
-
 static int psp_sysfs_init(struct amdgpu_device *adev);
 static void psp_sysfs_fini(struct amdgpu_device *adev);
 
+static int psp_load_smu_fw(struct psp_context *psp);
+
 /*
  * Due to DF Cstate management centralized to PMFW, the firmware
  * loading sequence will be updated as below:
@@ -80,8 +80,6 @@ static int psp_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct psp_context *psp = &adev->psp;
 
-	psp_set_funcs(adev);
-
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
@@ -201,6 +199,7 @@ psp_cmd_submit_buf(struct psp_context *psp,
 	int index;
 	int timeout = 2000;
 	bool ras_intr = false;
+	bool skip_unsupport = false;
 
 	mutex_lock(&psp->mutex);
 
@@ -232,6 +231,9 @@ psp_cmd_submit_buf(struct psp_context *psp,
 		amdgpu_asic_invalidate_hdp(psp->adev, NULL);
 	}
 
+	/* We allow TEE_ERROR_NOT_SUPPORTED for VMR command in SRIOV */
+	skip_unsupport = (psp->cmd_buf_mem->resp.status == 0xffff000a) && amdgpu_sriov_vf(psp->adev);
+
 	/* In some cases, psp response status is not 0 even there is no
 	 * problem while the command is submitted. Some version of PSP FW
 	 * doesn't write 0 to that field.
@@ -239,7 +241,7 @@ psp_cmd_submit_buf(struct psp_context *psp,
 	 * during psp initialization to avoid breaking hw_init and it doesn't
 	 * return -EINVAL.
 	 */
-	if ((psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) {
+	if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) {
 		if (ucode)
 			DRM_WARN("failed to load ucode id (%d) ",
 				  ucode->ucode_id);
@@ -268,7 +270,7 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
 				 struct psp_gfx_cmd_resp *cmd,
 				 uint64_t tmr_mc, uint32_t size)
 {
-	if (psp_support_vmr_ring(psp))
+	if (amdgpu_sriov_vf(psp->adev))
 		cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
 	else
 		cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
@@ -662,6 +664,121 @@ int psp_xgmi_initialize(struct psp_context *psp)
 	return ret;
 }
 
+int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id)
+{
+	struct ta_xgmi_shared_memory *xgmi_cmd;
+	int ret;
+
+	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
+	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+
+	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID;
+
+	/* Invoke xgmi ta to get hive id */
+	ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+	if (ret)
+		return ret;
+
+	*hive_id = xgmi_cmd->xgmi_out_message.get_hive_id.hive_id;
+
+	return 0;
+}
+
+int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
+{
+	struct ta_xgmi_shared_memory *xgmi_cmd;
+	int ret;
+
+	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
+	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+
+	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID;
+
+	/* Invoke xgmi ta to get the node id */
+	ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+	if (ret)
+		return ret;
+
+	*node_id = xgmi_cmd->xgmi_out_message.get_node_id.node_id;
+
+	return 0;
+}
+
+int psp_xgmi_get_topology_info(struct psp_context *psp,
+			       int number_devices,
+			       struct psp_xgmi_topology_info *topology)
+{
+	struct ta_xgmi_shared_memory *xgmi_cmd;
+	struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
+	struct ta_xgmi_cmd_get_topology_info_output *topology_info_output;
+	int i;
+	int ret;
+
+	if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
+		return -EINVAL;
+
+	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
+	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+
+	/* Fill in the shared memory with topology information as input */
+	topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
+	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO;
+	topology_info_input->num_nodes = number_devices;
+
+	for (i = 0; i < topology_info_input->num_nodes; i++) {
+		topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
+		topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
+		topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
+		topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
+	}
+
+	/* Invoke xgmi ta to get the topology information */
+	ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO);
+	if (ret)
+		return ret;
+
+	/* Read the output topology information from the shared memory */
+	topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info;
+	topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes;
+	for (i = 0; i < topology->num_nodes; i++) {
+		topology->nodes[i].node_id = topology_info_output->nodes[i].node_id;
+		topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops;
+		topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled;
+		topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine;
+	}
+
+	return 0;
+}
+
+int psp_xgmi_set_topology_info(struct psp_context *psp,
+			       int number_devices,
+			       struct psp_xgmi_topology_info *topology)
+{
+	struct ta_xgmi_shared_memory *xgmi_cmd;
+	struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
+	int i;
+
+	if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
+		return -EINVAL;
+
+	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
+	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+
+	topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
+	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO;
+	topology_info_input->num_nodes = number_devices;
+
+	for (i = 0; i < topology_info_input->num_nodes; i++) {
+		topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
+		topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
+		topology_info_input->nodes[i].is_sharing_enabled = 1;
+		topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
+	}
+
+	/* Invoke xgmi ta to set topology information */
+	return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO);
+}
+
 // ras begin
 static int psp_ras_init_shared_buf(struct psp_context *psp)
 {
@@ -744,13 +861,40 @@ static int psp_ras_unload(struct psp_context *psp)
 
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
 {
+	struct ta_ras_shared_memory *ras_cmd;
+	int ret;
+
+	ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
+
 	/*
 	 * TODO: bypass the loading in sriov for now
 	 */
 	if (amdgpu_sriov_vf(psp->adev))
 		return 0;
 
-	return psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id);
+	ret = psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id);
+
+	if (amdgpu_ras_intr_triggered())
+		return ret;
+
+	if (ras_cmd->if_version > RAS_TA_HOST_IF_VER)
+	{
+		DRM_WARN("RAS: Unsupported Interface");
+		return -EINVAL;
+	}
+
+	if (!ret) {
+		if (ras_cmd->ras_out_message.flags.err_inject_switch_disable_flag) {
+			dev_warn(psp->adev->dev, "ECC switch disabled\n");
+
+			ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE;
+		}
+		else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
+			dev_warn(psp->adev->dev,
+				 "RAS internal register access blocked\n");
+	}
+
+	return ret;
 }
 
 int psp_ras_enable_features(struct psp_context *psp,
@@ -834,6 +978,33 @@ static int psp_ras_initialize(struct psp_context *psp)
 
 	return 0;
 }
+
+int psp_ras_trigger_error(struct psp_context *psp,
+			  struct ta_ras_trigger_error_input *info)
+{
+	struct ta_ras_shared_memory *ras_cmd;
+	int ret;
+
+	if (!psp->ras.ras_initialized)
+		return -EINVAL;
+
+	ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
+	memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+	ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR;
+	ras_cmd->ras_in_message.trigger_error = *info;
+
+	ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+	if (ret)
+		return -EINVAL;
+
+	/* If err_event_athub occurs error inject was successful, however
+	   return status from TA is no long reliable */
+	if (amdgpu_ras_intr_triggered())
+		return 0;
+
+	return ras_cmd->ras_status;
+}
 // ras end
 
 // HDCP start
@@ -884,6 +1055,7 @@ static int psp_hdcp_load(struct psp_context *psp)
 	if (!ret) {
 		psp->hdcp_context.hdcp_initialized = true;
 		psp->hdcp_context.session_id = cmd->resp.session_id;
+		mutex_init(&psp->hdcp_context.mutex);
 	}
 
 	kfree(cmd);
@@ -1029,6 +1201,7 @@ static int psp_dtm_load(struct psp_context *psp)
 	if (!ret) {
 		psp->dtm_context.dtm_initialized = true;
 		psp->dtm_context.session_id = cmd->resp.session_id;
+		mutex_init(&psp->dtm_context.mutex);
 	}
 
 	kfree(cmd);
@@ -1169,16 +1342,20 @@ static int psp_hw_start(struct psp_context *psp)
 	}
 
 	/*
-	 * For those ASICs with DF Cstate management centralized
+	 * For ASICs with DF Cstate management centralized
 	 * to PMFW, TMR setup should be performed after PMFW
 	 * loaded and before other non-psp firmware loaded.
 	 */
-	if (!psp->pmfw_centralized_cstate_management) {
-		ret = psp_tmr_load(psp);
-		if (ret) {
-			DRM_ERROR("PSP load tmr failed!\n");
+	if (psp->pmfw_centralized_cstate_management) {
+		ret = psp_load_smu_fw(psp);
+		if (ret)
 			return ret;
-		}
+	}
+
+	ret = psp_tmr_load(psp);
+	if (ret) {
+		DRM_ERROR("PSP load tmr failed!\n");
+		return ret;
 	}
 
 	return 0;
@@ -1355,7 +1532,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
 }
 
 static int psp_execute_np_fw_load(struct psp_context *psp,
-			       struct amdgpu_firmware_info *ucode)
+			          struct amdgpu_firmware_info *ucode)
 {
 	int ret = 0;
 
@@ -1369,64 +1546,96 @@ static int psp_execute_np_fw_load(struct psp_context *psp,
 	return ret;
 }
 
+static int psp_load_smu_fw(struct psp_context *psp)
+{
+	int ret;
+	struct amdgpu_device* adev = psp->adev;
+	struct amdgpu_firmware_info *ucode =
+			&adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
+	struct amdgpu_ras *ras = psp->ras.ras;
+
+	if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+
+	if (adev->in_gpu_reset && ras && ras->supported) {
+		ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
+		if (ret) {
+			DRM_WARN("Failed to set MP1 state prepare for reload\n");
+		}
+	}
+
+	ret = psp_execute_np_fw_load(psp, ucode);
+
+	if (ret)
+		DRM_ERROR("PSP load smu failed!\n");
+
+	return ret;
+}
+
+static bool fw_load_skip_check(struct psp_context *psp,
+			       struct amdgpu_firmware_info *ucode)
+{
+	if (!ucode->fw)
+		return true;
+
+	if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
+	    (psp_smu_reload_quirk(psp) ||
+	     psp->autoload_supported ||
+	     psp->pmfw_centralized_cstate_management))
+		return true;
+
+	if (amdgpu_sriov_vf(psp->adev) &&
+	   (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+	    || ucode->ucode_id == AMDGPU_UCODE_ID_SMC))
+		/*skip ucode loading in SRIOV VF */
+		return true;
+
+	if (psp->autoload_supported &&
+	    (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||
+	     ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT))
+		/* skip mec JT when autoload is enabled */
+		return true;
+
+	return false;
+}
+
 static int psp_np_fw_load(struct psp_context *psp)
 {
 	int i, ret;
 	struct amdgpu_firmware_info *ucode;
 	struct amdgpu_device* adev = psp->adev;
 
-	if (psp->autoload_supported ||
-	    psp->pmfw_centralized_cstate_management) {
-		ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
-		if (!ucode->fw || amdgpu_sriov_vf(adev))
-			goto out;
-
-		ret = psp_execute_np_fw_load(psp, ucode);
+	if (psp->autoload_supported &&
+	    !psp->pmfw_centralized_cstate_management) {
+		ret = psp_load_smu_fw(psp);
 		if (ret)
 			return ret;
 	}
 
-	if (psp->pmfw_centralized_cstate_management) {
-		ret = psp_tmr_load(psp);
-		if (ret) {
-			DRM_ERROR("PSP load tmr failed!\n");
-			return ret;
-		}
-	}
-
-out:
 	for (i = 0; i < adev->firmware.max_ucodes; i++) {
 		ucode = &adev->firmware.ucode[i];
-		if (!ucode->fw)
-			continue;
 
 		if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
-		    (psp_smu_reload_quirk(psp) ||
-		     psp->autoload_supported ||
-		     psp->pmfw_centralized_cstate_management))
-			continue;
-
-		if (amdgpu_sriov_vf(adev) &&
-		   (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0
-		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1
-		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2
-		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3
-		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4
-		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5
-		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6
-		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7
-                    || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G
-	            || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
-	            || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
-	            || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
-	            || ucode->ucode_id == AMDGPU_UCODE_ID_SMC))
-			/*skip ucode loading in SRIOV VF */
+		    !fw_load_skip_check(psp, ucode)) {
+			ret = psp_load_smu_fw(psp);
+			if (ret)
+				return ret;
 			continue;
+		}
 
-		if (psp->autoload_supported &&
-		    (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||
-		     ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT))
-			/* skip mec JT when autoload is enabled */
+		if (fw_load_skip_check(psp, ucode))
 			continue;
 
 		psp_print_fw_hdr(psp, ucode);
@@ -1438,17 +1647,12 @@ out:
 		/* Start rlc autoload after psp recieved all the gfx firmware */
 		if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
 		    AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) {
-			ret = psp_rlc_autoload(psp);
+			ret = psp_rlc_autoload_start(psp);
 			if (ret) {
 				DRM_ERROR("Failed to start rlc autoload\n");
 				return ret;
 			}
 		}
-#if 0
-		/* check if firmware loaded sucessfully */
-		if (!amdgpu_psp_check_fw_loading_status(adev, i))
-			return -EINVAL;
-#endif
 	}
 
 	return 0;
@@ -1806,19 +2010,110 @@ int psp_ring_cmd_submit(struct psp_context *psp,
 	return 0;
 }
 
-static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
-					enum AMDGPU_UCODE_ID ucode_type)
+int psp_init_asd_microcode(struct psp_context *psp,
+			   const char *chip_name)
 {
-	struct amdgpu_firmware_info *ucode = NULL;
+	struct amdgpu_device *adev = psp->adev;
+	char fw_name[30];
+	const struct psp_firmware_header_v1_0 *asd_hdr;
+	int err = 0;
 
-	if (!adev->firmware.fw_size)
-		return false;
+	if (!chip_name) {
+		dev_err(adev->dev, "invalid chip name for asd microcode\n");
+		return -EINVAL;
+	}
 
-	ucode = &adev->firmware.ucode[ucode_type];
-	if (!ucode->fw || !ucode->ucode_size)
-		return false;
+	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
+	err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
+	if (err)
+		goto out;
+
+	err = amdgpu_ucode_validate(adev->psp.asd_fw);
+	if (err)
+		goto out;
+
+	asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data;
+	adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version);
+	adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version);
+	adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes);
+	adev->psp.asd_start_addr = (uint8_t *)asd_hdr +
+				le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
+	return 0;
+out:
+	dev_err(adev->dev, "fail to initialize asd microcode\n");
+	release_firmware(adev->psp.asd_fw);
+	adev->psp.asd_fw = NULL;
+	return err;
+}
+
+int psp_init_sos_microcode(struct psp_context *psp,
+			   const char *chip_name)
+{
+	struct amdgpu_device *adev = psp->adev;
+	char fw_name[30];
+	const struct psp_firmware_header_v1_0 *sos_hdr;
+	const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
+	const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
+	int err = 0;
+
+	if (!chip_name) {
+		dev_err(adev->dev, "invalid chip name for sos microcode\n");
+		return -EINVAL;
+	}
+
+	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name);
+	err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev);
+	if (err)
+		goto out;
+
+	err = amdgpu_ucode_validate(adev->psp.sos_fw);
+	if (err)
+		goto out;
+
+	sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
+	amdgpu_ucode_print_psp_hdr(&sos_hdr->header);
+
+	switch (sos_hdr->header.header_version_major) {
+	case 1:
+		adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
+		adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version);
+		adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes);
+		adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->sos_offset_bytes);
+		adev->psp.sys_start_addr = (uint8_t *)sos_hdr +
+				le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
+		adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+				le32_to_cpu(sos_hdr->sos_offset_bytes);
+		if (sos_hdr->header.header_version_minor == 1) {
+			sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data;
+			adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_1->toc_size_bytes);
+			adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+					le32_to_cpu(sos_hdr_v1_1->toc_offset_bytes);
+			adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_1->kdb_size_bytes);
+			adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+					le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes);
+		}
+		if (sos_hdr->header.header_version_minor == 2) {
+			sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data;
+			adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes);
+			adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+						    le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes);
+		}
+		break;
+	default:
+		dev_err(adev->dev,
+			"unsupported psp sos firmware\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	return 0;
+out:
+	dev_err(adev->dev,
+		"failed to init sos firmware\n");
+	release_firmware(adev->psp.sos_fw);
+	adev->psp.sos_fw = NULL;
 
-	return psp_compare_sram_data(&adev->psp, ucode, ucode_type);
+	return err;
 }
 
 static int psp_set_clockgating_state(void *handle,
@@ -1957,16 +2252,6 @@ static void psp_sysfs_fini(struct amdgpu_device *adev)
 	device_remove_file(adev->dev, &dev_attr_usbc_pd_fw);
 }
 
-static const struct amdgpu_psp_funcs psp_funcs = {
-	.check_fw_loading_status = psp_check_fw_loading_status,
-};
-
-static void psp_set_funcs(struct amdgpu_device *adev)
-{
-	if (NULL == adev->firmware.funcs)
-		adev->firmware.funcs = &psp_funcs;
-}
-
 const struct amdgpu_ip_block_version psp_v3_1_ip_block =
 {
 	.type = AMD_IP_BLOCK_TYPE_PSP,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 297435c0c7c1..2a56ad996d83 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -93,22 +93,8 @@ struct psp_funcs
 			    enum psp_ring_type ring_type);
 	int (*ring_destroy)(struct psp_context *psp,
 			    enum psp_ring_type ring_type);
-	bool (*compare_sram_data)(struct psp_context *psp,
-				  struct amdgpu_firmware_info *ucode,
-				  enum AMDGPU_UCODE_ID ucode_type);
 	bool (*smu_reload_quirk)(struct psp_context *psp);
 	int (*mode1_reset)(struct psp_context *psp);
-	int (*xgmi_get_node_id)(struct psp_context *psp, uint64_t *node_id);
-	int (*xgmi_get_hive_id)(struct psp_context *psp, uint64_t *hive_id);
-	int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
-				      struct psp_xgmi_topology_info *topology);
-	int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
-				      struct psp_xgmi_topology_info *topology);
-	bool (*support_vmr_ring)(struct psp_context *psp);
-	int (*ras_trigger_error)(struct psp_context *psp,
-			struct ta_ras_trigger_error_input *info);
-	int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
-	int (*rlc_autoload_start)(struct psp_context *psp);
 	int (*mem_training_init)(struct psp_context *psp);
 	void (*mem_training_fini)(struct psp_context *psp);
 	int (*mem_training)(struct psp_context *psp, uint32_t ops);
@@ -161,6 +147,7 @@ struct psp_hdcp_context {
 	struct amdgpu_bo	*hdcp_shared_bo;
 	uint64_t		hdcp_shared_mc_addr;
 	void			*hdcp_shared_buf;
+	struct mutex		mutex;
 };
 
 struct psp_dtm_context {
@@ -169,6 +156,7 @@ struct psp_dtm_context {
 	struct amdgpu_bo	*dtm_shared_bo;
 	uint64_t		dtm_shared_mc_addr;
 	void			*dtm_shared_buf;
+	struct mutex		mutex;
 };
 
 #define MEM_TRAIN_SYSTEM_SIGNATURE		0x54534942
@@ -306,8 +294,6 @@ struct amdgpu_psp_funcs {
 #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
 #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
 #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
-#define psp_compare_sram_data(psp, ucode, type) \
-		(psp)->funcs->compare_sram_data((psp), (ucode), (type))
 #define psp_init_microcode(psp) \
 		((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0)
 #define psp_bootloader_load_kdb(psp) \
@@ -318,22 +304,8 @@ struct amdgpu_psp_funcs {
 		((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
 #define psp_smu_reload_quirk(psp) \
 		((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
-#define psp_support_vmr_ring(psp) \
-		((psp)->funcs->support_vmr_ring ? (psp)->funcs->support_vmr_ring((psp)) : false)
 #define psp_mode1_reset(psp) \
 		((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
-#define psp_xgmi_get_node_id(psp, node_id) \
-		((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp), (node_id)) : -EINVAL)
-#define psp_xgmi_get_hive_id(psp, hive_id) \
-		((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp), (hive_id)) : -EINVAL)
-#define psp_xgmi_get_topology_info(psp, num_device, topology) \
-		((psp)->funcs->xgmi_get_topology_info ? \
-		(psp)->funcs->xgmi_get_topology_info((psp), (num_device), (topology)) : -EINVAL)
-#define psp_xgmi_set_topology_info(psp, num_device, topology) \
-		((psp)->funcs->xgmi_set_topology_info ?	 \
-		(psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL)
-#define psp_rlc_autoload(psp) \
-		((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0)
 #define psp_mem_training_init(psp) \
 	((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0)
 #define psp_mem_training_fini(psp) \
@@ -341,15 +313,6 @@ struct amdgpu_psp_funcs {
 #define psp_mem_training(psp, ops) \
 	((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0)
 
-#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
-
-#define psp_ras_trigger_error(psp, info) \
-	((psp)->funcs->ras_trigger_error ? \
-	(psp)->funcs->ras_trigger_error((psp), (info)) : -EINVAL)
-#define psp_ras_cure_posion(psp, addr) \
-	((psp)->funcs->ras_cure_posion ? \
-	(psp)->funcs->ras_cure_posion(psp, (addr)) : -EINVAL)
-
 #define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp))
 #define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value))
 
@@ -377,10 +340,21 @@ int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
 int psp_xgmi_initialize(struct psp_context *psp);
 int psp_xgmi_terminate(struct psp_context *psp);
 int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id);
+int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id);
+int psp_xgmi_get_topology_info(struct psp_context *psp,
+			       int number_devices,
+			       struct psp_xgmi_topology_info *topology);
+int psp_xgmi_set_topology_info(struct psp_context *psp,
+			       int number_devices,
+			       struct psp_xgmi_topology_info *topology);
 
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
 int psp_ras_enable_features(struct psp_context *psp,
 		union ta_ras_cmd_input *info, bool enable);
+int psp_ras_trigger_error(struct psp_context *psp,
+			  struct ta_ras_trigger_error_input *info);
+
 int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
 int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
 
@@ -393,4 +367,8 @@ int psp_ring_cmd_submit(struct psp_context *psp,
 			uint64_t cmd_buf_mc_addr,
 			uint64_t fence_mc_addr,
 			int index);
+int psp_init_asd_microcode(struct psp_context *psp,
+			   const char *chip_name);
+int psp_init_sos_microcode(struct psp_context *psp,
+			   const char *chip_name);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ab379b44679c..50fe08bf2f72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -80,6 +80,20 @@ atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
 				uint64_t addr);
 
+void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
+{
+	if (adev && amdgpu_ras_get_context(adev))
+		amdgpu_ras_get_context(adev)->error_query_ready = ready;
+}
+
+bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
+{
+	if (adev && amdgpu_ras_get_context(adev))
+		return amdgpu_ras_get_context(adev)->error_query_ready;
+
+	return false;
+}
+
 static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
@@ -281,8 +295,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
 	struct ras_debug_if data;
 	int ret = 0;
 
-	if (amdgpu_ras_intr_triggered()) {
-		DRM_WARN("RAS WARN: error injection currently inaccessible\n");
+	if (!amdgpu_ras_get_error_query_ready(adev)) {
+		dev_warn(adev->dev, "RAS WARN: error injection "
+				"currently inaccessible\n");
 		return size;
 	}
 
@@ -310,7 +325,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
 		/* umc ce/ue error injection for a bad page is not allowed */
 		if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
 		    amdgpu_ras_check_bad_page(adev, data.inject.address)) {
-			DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n",
+			dev_warn(adev->dev, "RAS WARN: 0x%llx has been marked "
+					"as bad before error injection!\n",
 					data.inject.address);
 			break;
 		}
@@ -399,7 +415,7 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
 		.head = obj->head,
 	};
 
-	if (amdgpu_ras_intr_triggered())
+	if (!amdgpu_ras_get_error_query_ready(obj->adev))
 		return snprintf(buf, PAGE_SIZE,
 				"Query currently inaccessible\n");
 
@@ -486,6 +502,29 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
 }
 /* obj end */
 
+void amdgpu_ras_parse_status_code(struct amdgpu_device* adev,
+				  const char* 		invoke_type,
+				  const char* 		block_name,
+				  enum ta_ras_status 	ret)
+{
+	switch (ret) {
+	case TA_RAS_STATUS__SUCCESS:
+		return;
+	case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
+		dev_warn(adev->dev,
+			"RAS WARN: %s %s currently unavailable\n",
+			invoke_type,
+			block_name);
+		break;
+	default:
+		dev_err(adev->dev,
+			"RAS ERROR: %s %s error failed ret 0x%X\n",
+			invoke_type,
+			block_name,
+			ret);
+	}
+}
+
 /* feature ctl begin */
 static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
 		struct ras_common_if *head)
@@ -549,19 +588,23 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 		struct ras_common_if *head, bool enable)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-	union ta_ras_cmd_input info;
+	union ta_ras_cmd_input *info;
 	int ret;
 
 	if (!con)
 		return -EINVAL;
 
+        info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
 	if (!enable) {
-		info.disable_features = (struct ta_ras_disable_features_input) {
+		info->disable_features = (struct ta_ras_disable_features_input) {
 			.block_id =  amdgpu_ras_block_to_ta(head->block),
 			.error_type = amdgpu_ras_error_to_ta(head->type),
 		};
 	} else {
-		info.enable_features = (struct ta_ras_enable_features_input) {
+		info->enable_features = (struct ta_ras_enable_features_input) {
 			.block_id =  amdgpu_ras_block_to_ta(head->block),
 			.error_type = amdgpu_ras_error_to_ta(head->type),
 		};
@@ -570,26 +613,33 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 	/* Do not enable if it is not allowed. */
 	WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
 	/* Are we alerady in that state we are going to set? */
-	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
-		return 0;
+	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) {
+		ret = 0;
+		goto out;
+	}
 
 	if (!amdgpu_ras_intr_triggered()) {
-		ret = psp_ras_enable_features(&adev->psp, &info, enable);
+		ret = psp_ras_enable_features(&adev->psp, info, enable);
 		if (ret) {
-			DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
-					enable ? "enable":"disable",
-					ras_block_str(head->block),
-					ret);
+			amdgpu_ras_parse_status_code(adev,
+						     enable ? "enable":"disable",
+						     ras_block_str(head->block),
+						    (enum ta_ras_status)ret);
 			if (ret == TA_RAS_STATUS__RESET_NEEDED)
-				return -EAGAIN;
-			return -EINVAL;
+				ret = -EAGAIN;
+			else
+				ret = -EINVAL;
+
+			goto out;
 		}
 	}
 
 	/* setup the obj */
 	__amdgpu_ras_feature_enable(adev, head, enable);
-
-	return 0;
+	ret = 0;
+out:
+	kfree(info);
+	return ret;
 }
 
 /* Only used in device probe stage and called only once. */
@@ -618,7 +668,8 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
 			if (ret == -EINVAL) {
 				ret = __amdgpu_ras_feature_enable(adev, head, 1);
 				if (!ret)
-					DRM_INFO("RAS INFO: %s setup object\n",
+					dev_info(adev->dev,
+						"RAS INFO: %s setup object\n",
 						ras_block_str(head->block));
 			}
 		} else {
@@ -744,17 +795,48 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
 	info->ce_count = obj->err_data.ce_count;
 
 	if (err_data.ce_count) {
-		dev_info(adev->dev, "%ld correctable errors detected in %s block\n",
-			 obj->err_data.ce_count, ras_block_str(info->head.block));
+		dev_info(adev->dev, "%ld correctable hardware errors "
+					"detected in %s block, no user "
+					"action is needed.\n",
+					obj->err_data.ce_count,
+					ras_block_str(info->head.block));
 	}
 	if (err_data.ue_count) {
-		dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n",
-			 obj->err_data.ue_count, ras_block_str(info->head.block));
+		dev_info(adev->dev, "%ld uncorrectable hardware errors "
+					"detected in %s block\n",
+					obj->err_data.ue_count,
+					ras_block_str(info->head.block));
 	}
 
 	return 0;
 }
 
+/* Trigger XGMI/WAFL error */
+int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
+				 struct ta_ras_trigger_error_input *block_info)
+{
+	int ret;
+
+	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+		dev_warn(adev->dev, "Failed to disallow df cstate");
+
+	if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
+		dev_warn(adev->dev, "Failed to disallow XGMI power down");
+
+	ret = psp_ras_trigger_error(&adev->psp, block_info);
+
+	if (amdgpu_ras_intr_triggered())
+		return ret;
+
+	if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
+		dev_warn(adev->dev, "Failed to allow XGMI power down");
+
+	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+		dev_warn(adev->dev, "Failed to allow df cstate");
+
+	return ret;
+}
+
 /* wrapper of psp_ras_trigger_error */
 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 		struct ras_inject_if *info)
@@ -788,20 +870,22 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_RAS_BLOCK__UMC:
 	case AMDGPU_RAS_BLOCK__MMHUB:
-	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
 	case AMDGPU_RAS_BLOCK__PCIE_BIF:
 		ret = psp_ras_trigger_error(&adev->psp, &block_info);
 		break;
+	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+		ret = amdgpu_ras_error_inject_xgmi(adev, &block_info);
+		break;
 	default:
-		DRM_INFO("%s error injection is not supported yet\n",
+		dev_info(adev->dev, "%s error injection is not supported yet\n",
 			 ras_block_str(info->head.block));
 		ret = -EINVAL;
 	}
 
-	if (ret)
-		DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",
-				ras_block_str(info->head.block),
-				ret);
+	amdgpu_ras_parse_status_code(adev,
+				     "inject",
+				     ras_block_str(info->head.block),
+				     (enum ta_ras_status)ret);
 
 	return ret;
 }
@@ -1430,9 +1514,10 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, false);
 
 	/* Build list of devices to query RAS related errors */
-	if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
+	if  (hive && adev->gmc.xgmi.num_physical_nodes > 1)
 		device_list_handle = &hive->device_list;
-	} else {
+	else {
+		INIT_LIST_HEAD(&device_list);
 		list_add_tail(&adev->gmc.xgmi.head, &device_list);
 		device_list_handle = &device_list;
 	}
@@ -1535,7 +1620,7 @@ static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
 							&data->bps[control->num_recs],
 							true,
 							save_count)) {
-			DRM_ERROR("Failed to save EEPROM table data!");
+			dev_err(adev->dev, "Failed to save EEPROM table data!");
 			return -EIO;
 		}
 
@@ -1563,7 +1648,7 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
 
 	if (amdgpu_ras_eeprom_process_recods(control, bps, false,
 		control->num_recs)) {
-		DRM_ERROR("Failed to load EEPROM table records!");
+		dev_err(adev->dev, "Failed to load EEPROM table records!");
 		ret = -EIO;
 		goto out;
 	}
@@ -1637,7 +1722,8 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
 					       AMDGPU_GPU_PAGE_SIZE,
 					       AMDGPU_GEM_DOMAIN_VRAM,
 					       &bo, NULL))
-			DRM_WARN("RAS WARN: reserve vram for retired page %llx fail\n", bp);
+			dev_warn(adev->dev, "RAS WARN: reserve vram for "
+					"retired page %llx fail\n", bp);
 
 		data->bps_bo[i] = bo;
 		data->last_reserved = i + 1;
@@ -1725,7 +1811,7 @@ free:
 	kfree(*data);
 	con->eh_data = NULL;
 out:
-	DRM_WARN("Failed to initialize ras recovery!\n");
+	dev_warn(adev->dev, "Failed to initialize ras recovery!\n");
 
 	return ret;
 }
@@ -1787,18 +1873,18 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
 		return;
 
 	if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
-		DRM_INFO("HBM ECC is active.\n");
+		dev_info(adev->dev, "HBM ECC is active.\n");
 		*hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
 				1 << AMDGPU_RAS_BLOCK__DF);
 	} else
-		DRM_INFO("HBM ECC is not presented.\n");
+		dev_info(adev->dev, "HBM ECC is not presented.\n");
 
 	if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
-		DRM_INFO("SRAM ECC is active.\n");
+		dev_info(adev->dev, "SRAM ECC is active.\n");
 		*hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
 				1 << AMDGPU_RAS_BLOCK__DF);
 	} else
-		DRM_INFO("SRAM ECC is not presented.\n");
+		dev_info(adev->dev, "SRAM ECC is not presented.\n");
 
 	/* hw_supported needs to be aligned with RAS block mask. */
 	*hw_supported &= AMDGPU_RAS_BLOCK_MASK;
@@ -1855,7 +1941,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	if (amdgpu_ras_fs_init(adev))
 		goto fs_out;
 
-	DRM_INFO("RAS INFO: ras initialized successfully, "
+	dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
 			"hardware ability[%x] ras_mask[%x]\n",
 			con->hw_supported, con->supported);
 	return 0;
@@ -2037,7 +2123,8 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
 		return;
 
 	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
-		DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n");
+		dev_info(adev->dev, "uncorrectable hardware error"
+			"(ERREVENT_ATHUB_INTERRUPT) detected!\n");
 
 		amdgpu_ras_reset_gpu(adev);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 55c3eceb390d..e7df5d8429f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -334,6 +334,8 @@ struct amdgpu_ras {
 	uint32_t flags;
 	bool reboot;
 	struct amdgpu_ras_eeprom_control eeprom_control;
+
+	bool error_query_ready;
 };
 
 struct ras_fs_data {
@@ -629,4 +631,6 @@ static inline void amdgpu_ras_intr_cleared(void)
 
 void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
 
+void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index a7e1d0425ed0..13ea8ebc421c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -162,11 +162,13 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
  * Returns 0 on success, error on failure.
  */
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
-		     unsigned max_dw, struct amdgpu_irq_src *irq_src,
-		     unsigned irq_type)
+		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
+		     unsigned int irq_type, unsigned int hw_prio)
 {
 	int r, i;
 	int sched_hw_submission = amdgpu_sched_hw_submission;
+	u32 *num_sched;
+	u32 hw_ip;
 
 	/* Set the hw submission limit higher for KIQ because
 	 * it's used for a number of gfx/compute tasks by both
@@ -258,6 +260,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	ring->priority = DRM_SCHED_PRIORITY_NORMAL;
 	mutex_init(&ring->priority_mutex);
 
+	if (!ring->no_scheduler) {
+		hw_ip = ring->funcs->type;
+		num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+		adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
+			&ring->sched;
+	}
+
 	for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
 		atomic_set(&ring->num_jobs[i], 0);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 9a443013d70d..be218754629a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -30,11 +30,15 @@
 
 /* max number of rings */
 #define AMDGPU_MAX_RINGS		28
+#define AMDGPU_MAX_HWIP_RINGS		8
 #define AMDGPU_MAX_GFX_RINGS		2
 #define AMDGPU_MAX_COMPUTE_RINGS	8
 #define AMDGPU_MAX_VCE_RINGS		3
 #define AMDGPU_MAX_UVD_ENC_RINGS	2
 
+#define AMDGPU_RING_PRIO_DEFAULT	1
+#define AMDGPU_RING_PRIO_MAX		AMDGPU_GFX_PIPE_PRIO_MAX
+
 /* some special values for the owner field */
 #define AMDGPU_FENCE_OWNER_UNDEFINED	((void *)0ul)
 #define AMDGPU_FENCE_OWNER_VM		((void *)1ul)
@@ -46,17 +50,30 @@
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
+#define AMDGPU_IB_POOL_SIZE	(1024 * 1024)
+
 enum amdgpu_ring_type {
-	AMDGPU_RING_TYPE_GFX,
-	AMDGPU_RING_TYPE_COMPUTE,
-	AMDGPU_RING_TYPE_SDMA,
-	AMDGPU_RING_TYPE_UVD,
-	AMDGPU_RING_TYPE_VCE,
-	AMDGPU_RING_TYPE_KIQ,
-	AMDGPU_RING_TYPE_UVD_ENC,
-	AMDGPU_RING_TYPE_VCN_DEC,
-	AMDGPU_RING_TYPE_VCN_ENC,
-	AMDGPU_RING_TYPE_VCN_JPEG
+	AMDGPU_RING_TYPE_GFX		= AMDGPU_HW_IP_GFX,
+	AMDGPU_RING_TYPE_COMPUTE	= AMDGPU_HW_IP_COMPUTE,
+	AMDGPU_RING_TYPE_SDMA		= AMDGPU_HW_IP_DMA,
+	AMDGPU_RING_TYPE_UVD		= AMDGPU_HW_IP_UVD,
+	AMDGPU_RING_TYPE_VCE		= AMDGPU_HW_IP_VCE,
+	AMDGPU_RING_TYPE_UVD_ENC	= AMDGPU_HW_IP_UVD_ENC,
+	AMDGPU_RING_TYPE_VCN_DEC	= AMDGPU_HW_IP_VCN_DEC,
+	AMDGPU_RING_TYPE_VCN_ENC	= AMDGPU_HW_IP_VCN_ENC,
+	AMDGPU_RING_TYPE_VCN_JPEG	= AMDGPU_HW_IP_VCN_JPEG,
+	AMDGPU_RING_TYPE_KIQ
+};
+
+enum amdgpu_ib_pool_type {
+	/* Normal submissions to the top of the pipeline. */
+	AMDGPU_IB_POOL_DELAYED,
+	/* Immediate submissions to the bottom of the pipeline. */
+	AMDGPU_IB_POOL_IMMEDIATE,
+	/* Direct submission to the ring buffer during init and reset. */
+	AMDGPU_IB_POOL_DIRECT,
+
+	AMDGPU_IB_POOL_MAX
 };
 
 struct amdgpu_device;
@@ -65,6 +82,11 @@ struct amdgpu_ib;
 struct amdgpu_cs_parser;
 struct amdgpu_job;
 
+struct amdgpu_sched {
+	u32				num_scheds;
+	struct drm_gpu_scheduler	*sched[AMDGPU_MAX_HWIP_RINGS];
+};
+
 /*
  * Fences.
  */
@@ -96,7 +118,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
 void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
 int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
 		      unsigned flags);
-int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+			      uint32_t timeout);
 bool amdgpu_fence_process(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
 signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
@@ -159,17 +182,20 @@ struct amdgpu_ring_funcs {
 	void (*end_use)(struct amdgpu_ring *ring);
 	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
 	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
-	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
+	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
+			  uint32_t reg_val_offs);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
 	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
 			      uint32_t val, uint32_t mask);
 	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
 					uint32_t reg0, uint32_t reg1,
 					uint32_t ref, uint32_t mask);
-	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
+	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
+				bool secure);
 	/* Try to soft recover the ring to make the fence signal */
 	void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
 	int (*preempt_ib)(struct amdgpu_ring *ring);
+	void (*emit_mem_sync)(struct amdgpu_ring *ring);
 };
 
 struct amdgpu_ring {
@@ -214,12 +240,12 @@ struct amdgpu_ring {
 	unsigned		vm_inv_eng;
 	struct dma_fence	*vmid_wait;
 	bool			has_compute_vm_bug;
+	bool			no_scheduler;
 
 	atomic_t		num_jobs[DRM_SCHED_PRIORITY_MAX];
 	struct mutex		priority_mutex;
 	/* protected by priority_mutex */
 	int			priority;
-	bool			has_high_prio;
 
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
@@ -241,11 +267,11 @@ struct amdgpu_ring {
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
-#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
+#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
 #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
-#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
+#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
@@ -257,8 +283,8 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
-		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
-		     unsigned irq_type);
+		     unsigned int ring_size, struct amdgpu_irq_src *irq_src,
+		     unsigned int irq_type, unsigned int prio);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
 void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
 						uint32_t reg0, uint32_t val0,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 4b352206354b..e5b8fb8e75c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -61,8 +61,6 @@ struct amdgpu_sdma_ras_funcs {
 
 struct amdgpu_sdma {
 	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
-	struct drm_gpu_scheduler    *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES];
-	uint32_t		    num_sdma_sched;
 	struct amdgpu_irq_src	trap_irq;
 	struct amdgpu_irq_src	illegal_inst_irq;
 	struct amdgpu_irq_src	ecc_irq;
@@ -91,7 +89,8 @@ struct amdgpu_buffer_funcs {
 				 /* dst addr in bytes */
 				 uint64_t dst_offset,
 				 /* number of byte to transfer */
-				 uint32_t byte_count);
+				 uint32_t byte_count,
+				 bool tmz);
 
 	/* maximum bytes in a single operation */
 	uint32_t	fill_max_bytes;
@@ -109,7 +108,7 @@ struct amdgpu_buffer_funcs {
 				 uint32_t byte_count);
 };
 
-#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b))
+#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b), (t))
 #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
 
 struct amdgpu_sdma_instance *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index b86392253696..b87ca171986a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -249,6 +249,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
 			continue;
 
+		/* Never sync to VM updates either. */
+		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+			continue;
+
 		/* Ignore fences depending on the sync mode */
 		switch (mode) {
 		case AMDGPU_SYNC_ALWAYS:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index b158230af8db..2f4d5ca9894f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -44,7 +44,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 	/* Number of tests =
 	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
 	 */
-	n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024;
+	n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE;
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		if (adev->rings[i])
 			n -= adev->rings[i]->ring_size;
@@ -124,7 +124,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		amdgpu_bo_kunmap(gtt_obj[i]);
 
 		r = amdgpu_copy_buffer(ring, gart_addr, vram_addr,
-				       size, NULL, &fence, false, false);
+				       size, NULL, &fence, false, false, false);
 
 		if (r) {
 			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
@@ -170,7 +170,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		amdgpu_bo_kunmap(vram_obj);
 
 		r = amdgpu_copy_buffer(ring, vram_addr, gart_addr,
-				       size, NULL, &fence, false, false);
+				       size, NULL, &fence, false, false, false);
 
 		if (r) {
 			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 63e734a125fb..5da20fc166d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -35,7 +35,7 @@
 #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
 	 job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
 
-TRACE_EVENT(amdgpu_mm_rreg,
+TRACE_EVENT(amdgpu_device_rreg,
 	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
 	    TP_ARGS(did, reg, value),
 	    TP_STRUCT__entry(
@@ -54,7 +54,7 @@ TRACE_EVENT(amdgpu_mm_rreg,
 		      (unsigned long)__entry->value)
 );
 
-TRACE_EVENT(amdgpu_mm_wreg,
+TRACE_EVENT(amdgpu_device_wreg,
 	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
 	    TP_ARGS(did, reg, value),
 	    TP_STRUCT__entry(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 6309ff72bd78..9cbecd5ba814 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -62,11 +62,6 @@
 
 #define AMDGPU_TTM_VRAM_MAX_DW_READ	(size_t)128
 
-static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
-			     struct ttm_mem_reg *mem, unsigned num_pages,
-			     uint64_t offset, unsigned window,
-			     struct amdgpu_ring *ring,
-			     uint64_t *addr);
 
 /**
  * amdgpu_init_mem_type - Initialize a memory manager for a specific type of
@@ -277,7 +272,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
  *
  */
 static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
-					       unsigned long *offset)
+					       uint64_t *offset)
 {
 	struct drm_mm_node *mm_node = mem->mm_node;
 
@@ -289,91 +284,191 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
 }
 
 /**
+ * amdgpu_ttm_map_buffer - Map memory into the GART windows
+ * @bo: buffer object to map
+ * @mem: memory object to map
+ * @mm_node: drm_mm node object to map
+ * @num_pages: number of pages to map
+ * @offset: offset into @mm_node where to start
+ * @window: which GART window to use
+ * @ring: DMA ring to use for the copy
+ * @tmz: if we should setup a TMZ enabled mapping
+ * @addr: resulting address inside the MC address space
+ *
+ * Setup one of the GART windows to access a specific piece of memory or return
+ * the physical address for local memory.
+ */
+static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
+				 struct ttm_mem_reg *mem,
+				 struct drm_mm_node *mm_node,
+				 unsigned num_pages, uint64_t offset,
+				 unsigned window, struct amdgpu_ring *ring,
+				 bool tmz, uint64_t *addr)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_job *job;
+	unsigned num_dw, num_bytes;
+	struct dma_fence *fence;
+	uint64_t src_addr, dst_addr;
+	void *cpu_addr;
+	uint64_t flags;
+	unsigned int i;
+	int r;
+
+	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
+	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
+
+	/* Map only what can't be accessed directly */
+	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
+		*addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset;
+		return 0;
+	}
+
+	*addr = adev->gmc.gart_start;
+	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
+		AMDGPU_GPU_PAGE_SIZE;
+	*addr += offset & ~PAGE_MASK;
+
+	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+	num_bytes = num_pages * 8;
+
+	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
+				     AMDGPU_IB_POOL_DELAYED, &job);
+	if (r)
+		return r;
+
+	src_addr = num_dw * 4;
+	src_addr += job->ibs[0].gpu_addr;
+
+	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
+	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
+				dst_addr, num_bytes, false);
+
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+	WARN_ON(job->ibs[0].length_dw > num_dw);
+
+	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
+	if (tmz)
+		flags |= AMDGPU_PTE_TMZ;
+
+	cpu_addr = &job->ibs[0].ptr[num_dw];
+
+	if (mem->mem_type == TTM_PL_TT) {
+		struct ttm_dma_tt *dma;
+		dma_addr_t *dma_address;
+
+		dma = container_of(bo->ttm, struct ttm_dma_tt, ttm);
+		dma_address = &dma->dma_address[offset >> PAGE_SHIFT];
+		r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
+				    cpu_addr);
+		if (r)
+			goto error_free;
+	} else {
+		dma_addr_t dma_address;
+
+		dma_address = (mm_node->start << PAGE_SHIFT) + offset;
+		dma_address += adev->vm_manager.vram_base_offset;
+
+		for (i = 0; i < num_pages; ++i) {
+			r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
+					    &dma_address, flags, cpu_addr);
+			if (r)
+				goto error_free;
+
+			dma_address += PAGE_SIZE;
+		}
+	}
+
+	r = amdgpu_job_submit(job, &adev->mman.entity,
+			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+	if (r)
+		goto error_free;
+
+	dma_fence_put(fence);
+
+	return r;
+
+error_free:
+	amdgpu_job_free(job);
+	return r;
+}
+
+/**
  * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
+ * @adev: amdgpu device
+ * @src: buffer/address where to read from
+ * @dst: buffer/address where to write to
+ * @size: number of bytes to copy
+ * @tmz: if a secure copy should be used
+ * @resv: resv object to sync to
+ * @f: Returns the last fence if multiple jobs are submitted.
  *
  * The function copies @size bytes from {src->mem + src->offset} to
  * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
  * move and different for a BO to BO copy.
  *
- * @f: Returns the last fence if multiple jobs are submitted.
  */
 int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
-			       struct amdgpu_copy_mem *src,
-			       struct amdgpu_copy_mem *dst,
-			       uint64_t size,
+			       const struct amdgpu_copy_mem *src,
+			       const struct amdgpu_copy_mem *dst,
+			       uint64_t size, bool tmz,
 			       struct dma_resv *resv,
 			       struct dma_fence **f)
 {
+	const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
+					AMDGPU_GPU_PAGE_SIZE);
+
+	uint64_t src_node_size, dst_node_size, src_offset, dst_offset;
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 	struct drm_mm_node *src_mm, *dst_mm;
-	uint64_t src_node_start, dst_node_start, src_node_size,
-		 dst_node_size, src_page_offset, dst_page_offset;
 	struct dma_fence *fence = NULL;
 	int r = 0;
-	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
-					AMDGPU_GPU_PAGE_SIZE);
 
 	if (!adev->mman.buffer_funcs_enabled) {
 		DRM_ERROR("Trying to move memory with ring turned off.\n");
 		return -EINVAL;
 	}
 
-	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
-	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
-					     src->offset;
-	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
-	src_page_offset = src_node_start & (PAGE_SIZE - 1);
+	src_offset = src->offset;
+	src_mm = amdgpu_find_mm_node(src->mem, &src_offset);
+	src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset;
 
-	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
-	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
-					     dst->offset;
-	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
-	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
+	dst_offset = dst->offset;
+	dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset);
+	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset;
 
 	mutex_lock(&adev->mman.gtt_window_lock);
 
 	while (size) {
-		unsigned long cur_size;
-		uint64_t from = src_node_start, to = dst_node_start;
+		uint32_t src_page_offset = src_offset & ~PAGE_MASK;
+		uint32_t dst_page_offset = dst_offset & ~PAGE_MASK;
 		struct dma_fence *next;
+		uint32_t cur_size;
+		uint64_t from, to;
 
 		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
 		 * begins at an offset, then adjust the size accordingly
 		 */
-		cur_size = min3(min(src_node_size, dst_node_size), size,
-				GTT_MAX_BYTES);
-		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
-		    cur_size + dst_page_offset > GTT_MAX_BYTES)
-			cur_size -= max(src_page_offset, dst_page_offset);
-
-		/* Map only what needs to be accessed. Map src to window 0 and
-		 * dst to window 1
-		 */
-		if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) {
-			r = amdgpu_map_buffer(src->bo, src->mem,
-					PFN_UP(cur_size + src_page_offset),
-					src_node_start, 0, ring,
-					&from);
-			if (r)
-				goto error;
-			/* Adjust the offset because amdgpu_map_buffer returns
-			 * start of mapped page
-			 */
-			from += src_page_offset;
-		}
+		cur_size = max(src_page_offset, dst_page_offset);
+		cur_size = min(min3(src_node_size, dst_node_size, size),
+			       (uint64_t)(GTT_MAX_BYTES - cur_size));
+
+		/* Map src to window 0 and dst to window 1. */
+		r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm,
+					  PFN_UP(cur_size + src_page_offset),
+					  src_offset, 0, ring, tmz, &from);
+		if (r)
+			goto error;
 
-		if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) {
-			r = amdgpu_map_buffer(dst->bo, dst->mem,
-					PFN_UP(cur_size + dst_page_offset),
-					dst_node_start, 1, ring,
-					&to);
-			if (r)
-				goto error;
-			to += dst_page_offset;
-		}
+		r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, dst_mm,
+					  PFN_UP(cur_size + dst_page_offset),
+					  dst_offset, 1, ring, tmz, &to);
+		if (r)
+			goto error;
 
 		r = amdgpu_copy_buffer(ring, from, to, cur_size,
-				       resv, &next, false, true);
+				       resv, &next, false, true, tmz);
 		if (r)
 			goto error;
 
@@ -386,23 +481,20 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 
 		src_node_size -= cur_size;
 		if (!src_node_size) {
-			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
-							     src->mem);
-			src_node_size = (src_mm->size << PAGE_SHIFT);
-			src_page_offset = 0;
+			++src_mm;
+			src_node_size = src_mm->size << PAGE_SHIFT;
+			src_offset = 0;
 		} else {
-			src_node_start += cur_size;
-			src_page_offset = src_node_start & (PAGE_SIZE - 1);
+			src_offset += cur_size;
 		}
+
 		dst_node_size -= cur_size;
 		if (!dst_node_size) {
-			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
-							     dst->mem);
-			dst_node_size = (dst_mm->size << PAGE_SHIFT);
-			dst_page_offset = 0;
+			++dst_mm;
+			dst_node_size = dst_mm->size << PAGE_SHIFT;
+			dst_offset = 0;
 		} else {
-			dst_node_start += cur_size;
-			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
+			dst_offset += cur_size;
 		}
 	}
 error:
@@ -425,6 +517,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 			    struct ttm_mem_reg *old_mem)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 	struct amdgpu_copy_mem src, dst;
 	struct dma_fence *fence = NULL;
 	int r;
@@ -438,14 +531,14 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 
 	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
 				       new_mem->num_pages << PAGE_SHIFT,
+				       amdgpu_bo_encrypted(abo),
 				       bo->base.resv, &fence);
 	if (r)
 		goto error;
 
 	/* clear the space being freed */
 	if (old_mem->mem_type == TTM_PL_VRAM &&
-	    (ttm_to_amdgpu_bo(bo)->flags &
-	     AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
+	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
 		struct dma_fence *wipe_fence = NULL;
 
 		r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
@@ -742,8 +835,8 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 					   unsigned long page_offset)
 {
+	uint64_t offset = (page_offset << PAGE_SHIFT);
 	struct drm_mm_node *mm;
-	unsigned long offset = (page_offset << PAGE_SHIFT);
 
 	mm = amdgpu_find_mm_node(&bo->mem, &offset);
 	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
@@ -766,18 +859,6 @@ struct amdgpu_ttm_tt {
 };
 
 #ifdef CONFIG_DRM_AMDGPU_USERPTR
-/* flags used by HMM internal, not related to CPU/GPU PTE flags */
-static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
-	(1 << 0), /* HMM_PFN_VALID */
-	(1 << 1), /* HMM_PFN_WRITE */
-};
-
-static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
-	0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
-	0, /* HMM_PFN_NONE */
-	0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
-};
-
 /**
  * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
  * memory and start HMM tracking CPU page table update
@@ -816,18 +897,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 		goto out;
 	}
 	range->notifier = &bo->notifier;
-	range->flags = hmm_range_flags;
-	range->values = hmm_range_values;
-	range->pfn_shift = PAGE_SHIFT;
 	range->start = bo->notifier.interval_tree.start;
 	range->end = bo->notifier.interval_tree.last + 1;
-	range->default_flags = hmm_range_flags[HMM_PFN_VALID];
+	range->default_flags = HMM_PFN_REQ_FAULT;
 	if (!amdgpu_ttm_tt_is_readonly(ttm))
-		range->default_flags |= range->flags[HMM_PFN_WRITE];
+		range->default_flags |= HMM_PFN_REQ_WRITE;
 
-	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
-				     GFP_KERNEL);
-	if (unlikely(!range->pfns)) {
+	range->hmm_pfns = kvmalloc_array(ttm->num_pages,
+					 sizeof(*range->hmm_pfns), GFP_KERNEL);
+	if (unlikely(!range->hmm_pfns)) {
 		r = -ENOMEM;
 		goto out_free_ranges;
 	}
@@ -852,27 +930,23 @@ retry:
 	down_read(&mm->mmap_sem);
 	r = hmm_range_fault(range);
 	up_read(&mm->mmap_sem);
-	if (unlikely(r <= 0)) {
+	if (unlikely(r)) {
 		/*
 		 * FIXME: This timeout should encompass the retry from
 		 * mmu_interval_read_retry() as well.
 		 */
-		if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
+		if (r == -EBUSY && !time_after(jiffies, timeout))
 			goto retry;
 		goto out_free_pfns;
 	}
 
-	for (i = 0; i < ttm->num_pages; i++) {
-		/* FIXME: The pages cannot be touched outside the notifier_lock */
-		pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
-		if (unlikely(!pages[i])) {
-			pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
-			       i, range->pfns[i]);
-			r = -ENOMEM;
-
-			goto out_free_pfns;
-		}
-	}
+	/*
+	 * Due to default_flags, all pages are HMM_PFN_VALID or
+	 * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
+	 * the notifier_lock, and mmu_interval_read_retry() must be done first.
+	 */
+	for (i = 0; i < ttm->num_pages; i++)
+		pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
 
 	gtt->range = range;
 	mmput(mm);
@@ -882,7 +956,7 @@ retry:
 out_unlock:
 	up_read(&mm->mmap_sem);
 out_free_pfns:
-	kvfree(range->pfns);
+	kvfree(range->hmm_pfns);
 out_free_ranges:
 	kfree(range);
 out:
@@ -907,7 +981,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
 	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
 		gtt->userptr, ttm->num_pages);
 
-	WARN_ONCE(!gtt->range || !gtt->range->pfns,
+	WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
 		"No user pages to check\n");
 
 	if (gtt->range) {
@@ -917,7 +991,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
 		 */
 		r = mmu_interval_read_retry(gtt->range->notifier,
 					 gtt->range->notifier_seq);
-		kvfree(gtt->range->pfns);
+		kvfree(gtt->range->hmm_pfns);
 		kfree(gtt->range);
 		gtt->range = NULL;
 	}
@@ -1008,8 +1082,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 
 		for (i = 0; i < ttm->num_pages; i++) {
 			if (ttm->pages[i] !=
-				hmm_device_entry_to_page(gtt->range,
-					      gtt->range->pfns[i]))
+			    hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
 				break;
 		}
 
@@ -1027,6 +1100,9 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	int r;
 
+	if (amdgpu_bo_encrypted(abo))
+		flags |= AMDGPU_PTE_TMZ;
+
 	if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
 		uint64_t page_idx = 1;
 
@@ -1539,6 +1615,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 
 	switch (bo->mem.mem_type) {
 	case TTM_PL_TT:
+		if (amdgpu_bo_is_amdgpu_bo(bo) &&
+		    amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
+			return false;
 		return true;
 
 	case TTM_PL_VRAM:
@@ -1587,8 +1666,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 	if (bo->mem.mem_type != TTM_PL_VRAM)
 		return -EIO;
 
-	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
-	pos = (nodes->start << PAGE_SHIFT) + offset;
+	pos = offset;
+	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &pos);
+	pos += (nodes->start << PAGE_SHIFT);
 
 	while (len && pos < adev->gmc.mc_vram_size) {
 		uint64_t aligned_pos = pos & ~(uint64_t)3;
@@ -1857,17 +1937,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 
 	/*
-	 * reserve one TMR (64K) memory at the top of VRAM which holds
+	 * reserve TMR memory at the top of VRAM which holds
 	 * IP Discovery data and is protected by PSP.
 	 */
-	r = amdgpu_bo_create_kernel_at(adev,
-				       adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE,
-				       DISCOVERY_TMR_SIZE,
-				       AMDGPU_GEM_DOMAIN_VRAM,
-				       &adev->discovery_memory,
-				       NULL);
-	if (r)
-		return r;
+	if (adev->discovery_tmr_size > 0) {
+		r = amdgpu_bo_create_kernel_at(adev,
+			adev->gmc.real_vram_size - adev->discovery_tmr_size,
+			adev->discovery_tmr_size,
+			AMDGPU_GEM_DOMAIN_VRAM,
+			&adev->discovery_memory,
+			NULL);
+		if (r)
+			return r;
+	}
 
 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
 		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
@@ -2015,75 +2097,14 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
 	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
 }
 
-static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
-			     struct ttm_mem_reg *mem, unsigned num_pages,
-			     uint64_t offset, unsigned window,
-			     struct amdgpu_ring *ring,
-			     uint64_t *addr)
-{
-	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
-	struct amdgpu_device *adev = ring->adev;
-	struct ttm_tt *ttm = bo->ttm;
-	struct amdgpu_job *job;
-	unsigned num_dw, num_bytes;
-	dma_addr_t *dma_address;
-	struct dma_fence *fence;
-	uint64_t src_addr, dst_addr;
-	uint64_t flags;
-	int r;
-
-	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
-	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
-
-	*addr = adev->gmc.gart_start;
-	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
-		AMDGPU_GPU_PAGE_SIZE;
-
-	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
-	num_bytes = num_pages * 8;
-
-	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
-	if (r)
-		return r;
-
-	src_addr = num_dw * 4;
-	src_addr += job->ibs[0].gpu_addr;
-
-	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
-	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
-	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
-				dst_addr, num_bytes);
-
-	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-	WARN_ON(job->ibs[0].length_dw > num_dw);
-
-	dma_address = &gtt->ttm.dma_address[offset >> PAGE_SHIFT];
-	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem);
-	r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
-			    &job->ibs[0].ptr[num_dw]);
-	if (r)
-		goto error_free;
-
-	r = amdgpu_job_submit(job, &adev->mman.entity,
-			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
-	if (r)
-		goto error_free;
-
-	dma_fence_put(fence);
-
-	return r;
-
-error_free:
-	amdgpu_job_free(job);
-	return r;
-}
-
 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 		       uint64_t dst_offset, uint32_t byte_count,
 		       struct dma_resv *resv,
 		       struct dma_fence **fence, bool direct_submit,
-		       bool vm_needs_flush)
+		       bool vm_needs_flush, bool tmz)
 {
+	enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
+		AMDGPU_IB_POOL_DELAYED;
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_job *job;
 
@@ -2101,7 +2122,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
 	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
 
-	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
 	if (r)
 		return r;
 
@@ -2123,7 +2144,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
 
 		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
-					dst_offset, cur_size_in_bytes);
+					dst_offset, cur_size_in_bytes, tmz);
 
 		src_offset += cur_size_in_bytes;
 		dst_offset += cur_size_in_bytes;
@@ -2190,7 +2211,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 	/* for IB padding */
 	num_dw += 64;
 
-	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
+				     &job);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index bd05bbb4878d..4351d02644a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -24,8 +24,9 @@
 #ifndef __AMDGPU_TTM_H__
 #define __AMDGPU_TTM_H__
 
-#include "amdgpu.h"
+#include <linux/dma-direction.h>
 #include <drm/gpu_scheduler.h>
+#include "amdgpu.h"
 
 #define AMDGPU_PL_GDS		(TTM_PL_PRIV + 0)
 #define AMDGPU_PL_GWS		(TTM_PL_PRIV + 1)
@@ -74,6 +75,15 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
 int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man);
 
 u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
+int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
+			      struct ttm_mem_reg *mem,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt);
+void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table *sgt);
 uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
 
@@ -87,11 +97,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 		       uint64_t dst_offset, uint32_t byte_count,
 		       struct dma_resv *resv,
 		       struct dma_fence **fence, bool direct_submit,
-		       bool vm_needs_flush);
+		       bool vm_needs_flush, bool tmz);
 int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
-			       struct amdgpu_copy_mem *src,
-			       struct amdgpu_copy_mem *dst,
-			       uint64_t size,
+			       const struct amdgpu_copy_mem *src,
+			       const struct amdgpu_copy_mem *dst,
+			       uint64_t size, bool tmz,
 			       struct dma_resv *resv,
 			       struct dma_fence **f);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 9ef312428231..65bb25e31d45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -403,8 +403,8 @@ FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
 FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
 FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
 FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
-FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version);
-FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_ras_ucode_version);
+FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_xgmi_ucode_version);
 FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
 FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
 FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 9dd51f0d2c11..af1b1ccf613c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -110,7 +110,8 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
 		 * even NOMEM error is encountered
 		 */
 		if(!err_data->err_addr)
-			DRM_WARN("Failed to alloc memory for umc error address record!\n");
+			dev_warn(adev->dev, "Failed to alloc memory for "
+					"umc error address record!\n");
 
 		/* umc query_ras_error_address is also responsible for clearing
 		 * error status
@@ -120,10 +121,14 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
 
 	/* only uncorrectable error needs gpu reset */
 	if (err_data->ue_count) {
+		dev_info(adev->dev, "%ld uncorrectable hardware errors "
+				"detected in UMC block\n",
+				err_data->ue_count);
+
 		if (err_data->err_addr_cnt &&
 		    amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
 						err_data->err_addr_cnt))
-			DRM_WARN("Failed to add ras bad page!\n");
+			dev_warn(adev->dev, "Failed to add ras bad page!\n");
 
 		amdgpu_ras_reset_gpu(adev);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 5fd32ad1c575..5100ebe8858d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1056,7 +1056,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 			goto err;
 	}
 
-	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
+				     AMDGPU_IB_POOL_DELAYED, &job);
 	if (r)
 		goto err;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 59ddba137946..ecaa2d7483b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -446,7 +446,8 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+				     AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
@@ -524,7 +525,9 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	struct dma_fence *f = NULL;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+				     direct ? AMDGPU_IB_POOL_DIRECT :
+				     AMDGPU_IB_POOL_DELAYED, &job);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a41272fbcba2..2badbc0355f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -56,19 +56,23 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
 
 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 {
-	unsigned long bo_size;
+	unsigned long bo_size, fw_shared_bo_size;
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
 	unsigned char fw_check;
 	int i, r;
 
 	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
+	mutex_init(&adev->vcn.vcn_pg_lock);
+	atomic_set(&adev->vcn.total_submission_cnt, 0);
+	for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+		atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
 
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
-		if (adev->rev_id >= 8)
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			fw_name = FIRMWARE_RAVEN2;
-		else if (adev->pdev->device == 0x15d8)
+		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
 			fw_name = FIRMWARE_PICASSO;
 		else
 			fw_name = FIRMWARE_RAVEN;
@@ -178,6 +182,17 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 				return r;
 			}
 		}
+
+		r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)),
+				PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].fw_shared_bo,
+				&adev->vcn.inst[i].fw_shared_gpu_addr, &adev->vcn.inst[i].fw_shared_cpu_addr);
+		if (r) {
+			dev_err(adev->dev, "VCN %d (%d) failed to allocate firmware shared bo\n", i, r);
+			return r;
+		}
+
+		fw_shared_bo_size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo);
+		adev->vcn.inst[i].saved_shm_bo = kvmalloc(fw_shared_bo_size, GFP_KERNEL);
 	}
 
 	return 0;
@@ -192,6 +207,12 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
 		if (adev->vcn.harvest_config & (1 << j))
 			continue;
+
+		kvfree(adev->vcn.inst[j].saved_shm_bo);
+		amdgpu_bo_free_kernel(&adev->vcn.inst[j].fw_shared_bo,
+					  &adev->vcn.inst[j].fw_shared_gpu_addr,
+					  (void **)&adev->vcn.inst[j].fw_shared_cpu_addr);
+
 		if (adev->vcn.indirect_sram) {
 			amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
 						  &adev->vcn.inst[j].dpg_sram_gpu_addr,
@@ -210,6 +231,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 	}
 
 	release_firmware(adev->vcn.fw);
+	mutex_destroy(&adev->vcn.vcn_pg_lock);
 
 	return 0;
 }
@@ -236,6 +258,17 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
 			return -ENOMEM;
 
 		memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+
+		if (adev->vcn.inst[i].fw_shared_bo == NULL)
+			return 0;
+
+		if (!adev->vcn.inst[i].saved_shm_bo)
+			return -ENOMEM;
+
+		size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo);
+		ptr = adev->vcn.inst[i].fw_shared_cpu_addr;
+
+		memcpy_fromio(adev->vcn.inst[i].saved_shm_bo, ptr, size);
 	}
 	return 0;
 }
@@ -273,6 +306,17 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
 			}
 			memset_io(ptr, 0, size);
 		}
+
+		if (adev->vcn.inst[i].fw_shared_bo == NULL)
+			return -EINVAL;
+
+		size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo);
+		ptr = adev->vcn.inst[i].fw_shared_cpu_addr;
+
+		if (adev->vcn.inst[i].saved_shm_bo != NULL)
+			memcpy_toio(ptr, adev->vcn.inst[i].saved_shm_bo, size);
+		else
+			memset_io(ptr, 0, size);
 	}
 	return 0;
 }
@@ -295,7 +339,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
 			struct dpg_pause_state new_state;
 
-			if (fence[j])
+			if (fence[j] ||
+				unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
 				new_state.fw_based = VCN_DPG_STATE__PAUSE;
 			else
 				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
@@ -307,8 +352,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 		fences += fence[j];
 	}
 
-	if (fences == 0) {
-		amdgpu_gfx_off_ctrl(adev, true);
+	if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
 		       AMD_PG_STATE_GATE);
 	} else {
@@ -319,36 +363,46 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
 
-	if (set_clocks) {
-		amdgpu_gfx_off_ctrl(adev, false);
-		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
-		       AMD_PG_STATE_UNGATE);
-	}
+	atomic_inc(&adev->vcn.total_submission_cnt);
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+	mutex_lock(&adev->vcn.vcn_pg_lock);
+	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+	       AMD_PG_STATE_UNGATE);
 
 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
 		struct dpg_pause_state new_state;
-		unsigned int fences = 0;
-		unsigned int i;
 
-		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
-			fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
-		}
-		if (fences)
+		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
+			atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
-		else
-			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+		} else {
+			unsigned int fences = 0;
+			unsigned int i;
 
-		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
-			new_state.fw_based = VCN_DPG_STATE__PAUSE;
+			for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+				fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
+
+			if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
+				new_state.fw_based = VCN_DPG_STATE__PAUSE;
+			else
+				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+		}
 
 		adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
 	}
+	mutex_unlock(&adev->vcn.vcn_pg_lock);
 }
 
 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
 {
+	if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+		ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+		atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+
+	atomic_dec(&ring->adev->vcn.total_submission_cnt);
+
 	schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
 }
 
@@ -390,7 +444,8 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+	r = amdgpu_job_alloc_with_ib(adev, 64,
+					AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		goto err;
 
@@ -557,7 +612,8 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+					AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
@@ -610,7 +666,8 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+					AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 6fe057329de2..90aa12b22725 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -132,6 +132,13 @@
 		}										\
 	} while (0)
 
+#define AMDGPU_VCN_MULTI_QUEUE_FLAG	(1 << 8)
+
+enum fw_queue_mode {
+	FW_QUEUE_RING_RESET = 1,
+	FW_QUEUE_DPG_HOLD_OFF = 2,
+};
+
 enum engine_status_constants {
 	UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
 	UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0,
@@ -179,10 +186,15 @@ struct amdgpu_vcn_inst {
 	struct amdgpu_irq_src	irq;
 	struct amdgpu_vcn_reg	external;
 	struct amdgpu_bo	*dpg_sram_bo;
+	struct amdgpu_bo	*fw_shared_bo;
 	struct dpg_pause_state	pause_state;
 	void			*dpg_sram_cpu_addr;
 	uint64_t		dpg_sram_gpu_addr;
 	uint32_t		*dpg_sram_curr_addr;
+	atomic_t		dpg_enc_submission_cnt;
+	void			*fw_shared_cpu_addr;
+	uint64_t		fw_shared_gpu_addr;
+	void			*saved_shm_bo;
 };
 
 struct amdgpu_vcn {
@@ -196,16 +208,28 @@ struct amdgpu_vcn {
 	uint8_t	num_vcn_inst;
 	struct amdgpu_vcn_inst	 inst[AMDGPU_MAX_VCN_INSTANCES];
 	struct amdgpu_vcn_reg	 internal;
-	struct drm_gpu_scheduler *vcn_enc_sched[AMDGPU_MAX_VCN_ENC_RINGS];
-	struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
-	uint32_t		 num_vcn_enc_sched;
-	uint32_t		 num_vcn_dec_sched;
+	struct mutex		 vcn_pg_lock;
+	atomic_t		 total_submission_cnt;
 
 	unsigned	harvest_config;
 	int (*pause_dpg_mode)(struct amdgpu_device *adev,
 		int inst_idx, struct dpg_pause_state *new_state);
 };
 
+struct amdgpu_fw_shared_multi_queue {
+	uint8_t decode_queue_mode;
+	uint8_t encode_generalpurpose_queue_mode;
+	uint8_t encode_lowlatency_queue_mode;
+	uint8_t encode_realtime_queue_mode;
+	uint8_t padding[4];
+};
+
+struct amdgpu_fw_shared {
+	uint32_t present_flag_0;
+	uint8_t pad[53];
+	struct amdgpu_fw_shared_multi_queue multi_queue;
+} __attribute__((__packed__));
+
 int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
 int amdgpu_vcn_suspend(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index adc813cde8e2..f3b38c9e04ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -38,7 +38,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
 void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 {
 	/* enable virtual display */
-	adev->mode_info.num_crtc = 1;
+	if (adev->mode_info.num_crtc == 0)
+		adev->mode_info.num_crtc = 1;
 	adev->enable_virtual_display = true;
 	adev->ddev->driver->driver_features &= ~DRIVER_ATOMIC;
 	adev->cg_flags = 0;
@@ -59,7 +60,10 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
 					    ref, mask);
-	amdgpu_fence_emit_polling(ring, &seq);
+	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+	if (r)
+		goto failed_undo;
+
 	amdgpu_ring_commit(ring);
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
@@ -81,6 +85,9 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
 
 	return;
 
+failed_undo:
+	amdgpu_ring_undo(ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 failed_kiq:
 	pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
 }
@@ -152,6 +159,19 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev)
 	return 0;
 }
 
+void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (virt->ops && virt->ops->req_init_data)
+		virt->ops->req_init_data(adev);
+
+	if (adev->virt.req_init_data_ver > 0)
+		DRM_INFO("host supports REQ_INIT_DATA handshake\n");
+	else
+		DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
+}
+
 /**
  * amdgpu_virt_wait_reset() - wait for reset gpu completed
  * @amdgpu:	amdgpu device.
@@ -287,3 +307,82 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 		}
 	}
 }
+
+void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+{
+	uint32_t reg;
+
+	switch (adev->asic_type) {
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+		reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
+		break;
+	case CHIP_VEGA10:
+	case CHIP_VEGA20:
+	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_ARCTURUS:
+		reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
+		break;
+	default: /* other chip doesn't support SRIOV */
+		reg = 0;
+		break;
+	}
+
+	if (reg & 1)
+		adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
+
+	if (reg & 0x80000000)
+		adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
+
+	if (!reg) {
+		if (is_virtual_machine())	/* passthrough mode exclus sriov mod */
+			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
+	}
+}
+
+bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
+{
+	return amdgpu_sriov_is_debug(adev) ? true : false;
+}
+
+bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev)
+{
+	return amdgpu_sriov_is_normal(adev) ? true : false;
+}
+
+int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
+{
+	if (!amdgpu_sriov_vf(adev) ||
+	    amdgpu_virt_access_debugfs_is_kiq(adev))
+		return 0;
+
+	if (amdgpu_virt_access_debugfs_is_mmio(adev))
+		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+	else
+		return -EPERM;
+
+	return 0;
+}
+
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
+}
+
+enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev)
+{
+	enum amdgpu_sriov_vf_mode mode;
+
+	if (amdgpu_sriov_vf(adev)) {
+		if (amdgpu_sriov_is_pp_one_vf(adev))
+			mode = SRIOV_VF_MODE_ONE_VF;
+		else
+			mode = SRIOV_VF_MODE_MULTI_VF;
+	} else {
+		mode = SRIOV_VF_MODE_BARE_METAL;
+	}
+
+	return mode;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f0128f745bd2..b90e822cebd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -30,6 +30,17 @@
 #define AMDGPU_PASSTHROUGH_MODE        (1 << 3) /* thw whole GPU is pass through for VM */
 #define AMDGPU_SRIOV_CAPS_RUNTIME      (1 << 4) /* is out of full access mode */
 
+/* all asic after AI use this offset */
+#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
+/* tonga/fiji use this offset */
+#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
+
+enum amdgpu_sriov_vf_mode {
+	SRIOV_VF_MODE_BARE_METAL = 0,
+	SRIOV_VF_MODE_ONE_VF,
+	SRIOV_VF_MODE_MULTI_VF,
+};
+
 struct amdgpu_mm_table {
 	struct amdgpu_bo	*bo;
 	uint32_t		*cpu_addr;
@@ -54,6 +65,7 @@ struct amdgpu_vf_error_buffer {
 struct amdgpu_virt_ops {
 	int (*req_full_gpu)(struct amdgpu_device *adev, bool init);
 	int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
+	int (*req_init_data)(struct amdgpu_device *adev);
 	int (*reset_gpu)(struct amdgpu_device *adev);
 	int (*wait_reset)(struct amdgpu_device *adev);
 	void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
@@ -83,6 +95,8 @@ enum AMDGIM_FEATURE_FLAG {
 	AMDGIM_FEATURE_GIM_LOAD_UCODES   = 0x2,
 	/* VRAM LOST by GIM */
 	AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
+	/* MM bandwidth */
+	AMDGIM_FEATURE_GIM_MM_BW_MGR = 0x8,
 	/* PP ONE VF MODE in GIM */
 	AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
 };
@@ -256,6 +270,8 @@ struct amdgpu_virt {
 	struct amdgpu_virt_fw_reserve	fw_reserve;
 	uint32_t gim_feature;
 	uint32_t reg_access_mode;
+	int req_init_data_ver;
+	bool tdr_debug;
 };
 
 #define amdgpu_sriov_enabled(adev) \
@@ -287,6 +303,10 @@ static inline bool is_virtual_machine(void)
 
 #define amdgpu_sriov_is_pp_one_vf(adev) \
 	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_is_debug(adev) \
+	((!adev->in_gpu_reset) && adev->virt.tdr_debug)
+#define amdgpu_sriov_is_normal(adev) \
+	((!adev->in_gpu_reset) && (!adev->virt.tdr_debug))
 
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
@@ -296,6 +316,7 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
+void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
 int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
@@ -303,4 +324,11 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
 					unsigned int key,
 					unsigned int chksum);
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
+void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
+int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
+
+enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6d9252a27916..7417754e9141 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -82,7 +82,7 @@ struct amdgpu_prt_cb {
 	struct dma_fence_cb cb;
 };
 
-/**
+/*
  * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
  * happens while holding this lock anywhere to prevent deadlocks when
  * an MMU notifier runs in reclaim-FS context.
@@ -726,7 +726,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  * @adev: amdgpu_device pointer
  * @vm: VM to clear BO from
  * @bo: BO to clear
- * @direct: use a direct update
+ * @immediate: use an immediate update
  *
  * Root PD needs to be reserved when calling this.
  *
@@ -736,7 +736,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 			      struct amdgpu_vm *vm,
 			      struct amdgpu_bo *bo,
-			      bool direct)
+			      bool immediate)
 {
 	struct ttm_operation_ctx ctx = { true, false };
 	unsigned level = adev->vm_manager.root_level;
@@ -795,7 +795,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
-	params.direct = direct;
+	params.immediate = immediate;
 
 	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
 	if (r)
@@ -850,11 +850,11 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
  * @adev: amdgpu_device pointer
  * @vm: requesting vm
  * @level: the page table level
- * @direct: use a direct update
+ * @immediate: use a immediate update
  * @bp: resulting BO allocation parameters
  */
 static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			       int level, bool direct,
+			       int level, bool immediate,
 			       struct amdgpu_bo_param *bp)
 {
 	memset(bp, 0, sizeof(*bp));
@@ -870,7 +870,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	else if (!vm->root.base.bo || vm->root.base.bo->shadow)
 		bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
 	bp->type = ttm_bo_type_kernel;
-	bp->no_wait_gpu = direct;
+	bp->no_wait_gpu = immediate;
 	if (vm->root.base.bo)
 		bp->resv = vm->root.base.bo->tbo.base.resv;
 }
@@ -881,7 +881,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  * @adev: amdgpu_device pointer
  * @vm: VM to allocate page tables for
  * @cursor: Which page table to allocate
- * @direct: use a direct update
+ * @immediate: use an immediate update
  *
  * Make sure a specific page table or directory is allocated.
  *
@@ -892,7 +892,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 			       struct amdgpu_vm *vm,
 			       struct amdgpu_vm_pt_cursor *cursor,
-			       bool direct)
+			       bool immediate)
 {
 	struct amdgpu_vm_pt *entry = cursor->entry;
 	struct amdgpu_bo_param bp;
@@ -913,7 +913,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	if (entry->base.bo)
 		return 0;
 
-	amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp);
+	amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp);
 
 	r = amdgpu_bo_create(adev, &bp, &pt);
 	if (r)
@@ -925,7 +925,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
 	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
 
-	r = amdgpu_vm_clear_bo(adev, vm, pt, direct);
+	r = amdgpu_vm_clear_bo(adev, vm, pt, immediate);
 	if (r)
 		goto error_free_pt;
 
@@ -1276,7 +1276,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
- * @direct: submit directly to the paging queue
+ * @immediate: submit immediately to the paging queue
  *
  * Makes sure all directories are up to date.
  *
@@ -1284,7 +1284,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
  * 0 for success, error for failure.
  */
 int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
-			  struct amdgpu_vm *vm, bool direct)
+			  struct amdgpu_vm *vm, bool immediate)
 {
 	struct amdgpu_vm_update_params params;
 	int r;
@@ -1295,7 +1295,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
-	params.direct = direct;
+	params.immediate = immediate;
 
 	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
 	if (r)
@@ -1446,20 +1446,24 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
 		uint64_t incr, entry_end, pe_start;
 		struct amdgpu_bo *pt;
 
-		if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
+		if (!params->unlocked) {
 			/* make sure that the page tables covering the
 			 * address range are actually allocated
 			 */
 			r = amdgpu_vm_alloc_pts(params->adev, params->vm,
-						&cursor, params->direct);
+						&cursor, params->immediate);
 			if (r)
 				return r;
 		}
 
 		shift = amdgpu_vm_level_shift(adev, cursor.level);
 		parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
-		if (adev->asic_type < CHIP_VEGA10 &&
-		    (flags & AMDGPU_PTE_VALID)) {
+		if (params->unlocked) {
+			/* Unlocked updates are only allowed on the leaves */
+			if (amdgpu_vm_pt_descendant(adev, &cursor))
+				continue;
+		} else if (adev->asic_type < CHIP_VEGA10 &&
+			   (flags & AMDGPU_PTE_VALID)) {
 			/* No huge page support before GMC v9 */
 			if (cursor.level != AMDGPU_VM_PTB) {
 				if (!amdgpu_vm_pt_descendant(adev, &cursor))
@@ -1557,7 +1561,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
- * @direct: direct submission in a page fault
+ * @immediate: immediate submission in a page fault
+ * @unlocked: unlocked invalidation during MM callback
  * @resv: fences we need to sync to
  * @start: start of mapped range
  * @last: last mapped entry
@@ -1572,8 +1577,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-				       struct amdgpu_vm *vm, bool direct,
-				       struct dma_resv *resv,
+				       struct amdgpu_vm *vm, bool immediate,
+				       bool unlocked, struct dma_resv *resv,
 				       uint64_t start, uint64_t last,
 				       uint64_t flags, uint64_t addr,
 				       dma_addr_t *pages_addr,
@@ -1586,8 +1591,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
-	params.direct = direct;
+	params.immediate = immediate;
 	params.pages_addr = pages_addr;
+	params.unlocked = unlocked;
 
 	/* Implicitly sync to command submissions in the same VM before
 	 * unmapping. Sync to moving fences before mapping.
@@ -1603,11 +1609,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		goto error_unlock;
 	}
 
-	if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
-		struct amdgpu_bo *root = vm->root.base.bo;
+	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
+		struct dma_fence *tmp = dma_fence_get_stub();
 
-		if (!dma_fence_is_signaled(vm->last_direct))
-			amdgpu_bo_fence(root, vm->last_direct, true);
+		amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true);
+		swap(vm->last_unlocked, tmp);
+		dma_fence_put(tmp);
 	}
 
 	r = vm->update_funcs->prepare(&params, resv, sync_mode);
@@ -1721,7 +1728,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		}
 
 		last = min((uint64_t)mapping->last, start + max_entries - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
+		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
 						start, last, flags, addr,
 						dma_addr, fence);
 		if (r)
@@ -1784,6 +1791,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 
 	if (bo) {
 		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
+
+		if (amdgpu_bo_encrypted(bo))
+			flags |= AMDGPU_PTE_TMZ;
+
 		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	} else {
 		flags = 0x0;
@@ -2014,7 +2025,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		    mapping->start < AMDGPU_GMC_HOLE_START)
 			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
-		r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv,
+		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
 						mapping->start, mapping->last,
 						init_pte_value, 0, NULL, &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
@@ -2124,11 +2135,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	if (bo && amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev)) &&
 	    (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)) {
 		bo_va->is_xgmi = true;
-		mutex_lock(&adev->vm_manager.lock_pstate);
 		/* Power up XGMI if it can be potentially used */
-		if (++adev->vm_manager.xgmi_map_counter == 1)
-			amdgpu_xgmi_set_pstate(adev, 1);
-		mutex_unlock(&adev->vm_manager.lock_pstate);
+		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20);
 	}
 
 	return bo_va;
@@ -2551,12 +2559,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
 	dma_fence_put(bo_va->last_pt_update);
 
-	if (bo && bo_va->is_xgmi) {
-		mutex_lock(&adev->vm_manager.lock_pstate);
-		if (--adev->vm_manager.xgmi_map_counter == 0)
-			amdgpu_xgmi_set_pstate(adev, 0);
-		mutex_unlock(&adev->vm_manager.lock_pstate);
-	}
+	if (bo && bo_va->is_xgmi)
+		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN);
 
 	kfree(bo_va);
 }
@@ -2585,7 +2589,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
 		return false;
 
 	/* Don't evict VM page tables while they are updated */
-	if (!dma_fence_is_signaled(bo_base->vm->last_direct)) {
+	if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
 		amdgpu_vm_eviction_unlock(bo_base->vm);
 		return false;
 	}
@@ -2762,7 +2766,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
 	if (timeout <= 0)
 		return timeout;
 
-	return dma_fence_wait_timeout(vm->last_direct, true, timeout);
+	return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
 }
 
 /**
@@ -2798,7 +2802,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 
 	/* create scheduler entities for page table updates */
-	r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL,
+	r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
 				  adev->vm_manager.vm_pte_scheds,
 				  adev->vm_manager.vm_pte_num_scheds, NULL);
 	if (r)
@@ -2808,7 +2812,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 				  adev->vm_manager.vm_pte_scheds,
 				  adev->vm_manager.vm_pte_num_scheds, NULL);
 	if (r)
-		goto error_free_direct;
+		goto error_free_immediate;
 
 	vm->pte_support_ats = false;
 	vm->is_compute_context = false;
@@ -2834,7 +2838,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	else
 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
 	vm->last_update = NULL;
-	vm->last_direct = dma_fence_get_stub();
+	vm->last_unlocked = dma_fence_get_stub();
 
 	mutex_init(&vm->eviction_lock);
 	vm->evicting = false;
@@ -2888,11 +2892,11 @@ error_free_root:
 	vm->root.base.bo = NULL;
 
 error_free_delayed:
-	dma_fence_put(vm->last_direct);
+	dma_fence_put(vm->last_unlocked);
 	drm_sched_entity_destroy(&vm->delayed);
 
-error_free_direct:
-	drm_sched_entity_destroy(&vm->direct);
+error_free_immediate:
+	drm_sched_entity_destroy(&vm->immediate);
 
 	return r;
 }
@@ -2996,10 +3000,17 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
 		  "CPU update of VM recommended only for large BAR system\n");
 
-	if (vm->use_cpu_for_update)
+	if (vm->use_cpu_for_update) {
+		/* Sync with last SDMA update/clear before switching to CPU */
+		r = amdgpu_bo_sync_wait(vm->root.base.bo,
+					AMDGPU_FENCE_OWNER_UNDEFINED, true);
+		if (r)
+			goto free_idr;
+
 		vm->update_funcs = &amdgpu_vm_cpu_funcs;
-	else
+	} else {
 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
+	}
 	dma_fence_put(vm->last_update);
 	vm->last_update = NULL;
 	vm->is_compute_context = true;
@@ -3089,8 +3100,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		vm->pasid = 0;
 	}
 
-	dma_fence_wait(vm->last_direct, false);
-	dma_fence_put(vm->last_direct);
+	dma_fence_wait(vm->last_unlocked, false);
+	dma_fence_put(vm->last_unlocked);
 
 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
 		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
@@ -3107,7 +3118,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	amdgpu_bo_unref(&root);
 	WARN_ON(vm->root.base.bo);
 
-	drm_sched_entity_destroy(&vm->direct);
+	drm_sched_entity_destroy(&vm->immediate);
 	drm_sched_entity_destroy(&vm->delayed);
 
 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
@@ -3166,9 +3177,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 
 	idr_init(&adev->vm_manager.pasid_idr);
 	spin_lock_init(&adev->vm_manager.pasid_lock);
-
-	adev->vm_manager.xgmi_map_counter = 0;
-	mutex_init(&adev->vm_manager.lock_pstate);
 }
 
 /**
@@ -3343,8 +3351,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
 		value = 0;
 	}
 
-	r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
-					flags, value, NULL, NULL);
+	r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
+					addr + 1, flags, value, NULL, NULL);
 	if (r)
 		goto error_unlock;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 06fe30e1492d..c8e68d7890bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -54,6 +54,9 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_PTE_SYSTEM	(1ULL << 1)
 #define AMDGPU_PTE_SNOOPED	(1ULL << 2)
 
+/* RV+ */
+#define AMDGPU_PTE_TMZ		(1ULL << 3)
+
 /* VI only */
 #define AMDGPU_PTE_EXECUTABLE	(1ULL << 4)
 
@@ -203,9 +206,14 @@ struct amdgpu_vm_update_params {
 	struct amdgpu_vm *vm;
 
 	/**
-	 * @direct: if changes should be made directly
+	 * @immediate: if changes should be made immediately
 	 */
-	bool direct;
+	bool immediate;
+
+	/**
+	 * @unlocked: true if the root BO is not locked
+	 */
+	bool unlocked;
 
 	/**
 	 * @pages_addr:
@@ -271,11 +279,11 @@ struct amdgpu_vm {
 	struct dma_fence	*last_update;
 
 	/* Scheduler entities for page table updates */
-	struct drm_sched_entity	direct;
+	struct drm_sched_entity	immediate;
 	struct drm_sched_entity	delayed;
 
-	/* Last submission to the scheduler entities */
-	struct dma_fence	*last_direct;
+	/* Last unlocked submission to the scheduler entities */
+	struct dma_fence	*last_unlocked;
 
 	unsigned int		pasid;
 	/* dedicated to vm */
@@ -349,10 +357,6 @@ struct amdgpu_vm_manager {
 	 */
 	struct idr				pasid_idr;
 	spinlock_t				pasid_lock;
-
-	/* counter of mapped memory through xgmi */
-	uint32_t				xgmi_map_counter;
-	struct mutex				lock_pstate;
 };
 
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
@@ -380,7 +384,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      void *param);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
 int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
-			  struct amdgpu_vm *vm, bool direct);
+			  struct amdgpu_vm *vm, bool immediate);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index e38516304070..39c704a1fb0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -84,7 +84,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
 
 	pe += (unsigned long)amdgpu_bo_kptr(bo);
 
-	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct);
+	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
 
 	for (i = 0; i < count; i++) {
 		value = p->pages_addr ?
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index cf96c335b258..8d9c6feba660 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -61,10 +61,12 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
 				  struct dma_resv *resv,
 				  enum amdgpu_sync_mode sync_mode)
 {
+	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
+		: AMDGPU_IB_POOL_DELAYED;
 	unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
 	int r;
 
-	r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job);
+	r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job);
 	if (r)
 		return r;
 
@@ -90,11 +92,11 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 {
 	struct amdgpu_ib *ib = p->job->ibs;
 	struct drm_sched_entity *entity;
-	struct dma_fence *f, *tmp;
 	struct amdgpu_ring *ring;
+	struct dma_fence *f;
 	int r;
 
-	entity = p->direct ? &p->vm->direct : &p->vm->delayed;
+	entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
 	ring = container_of(entity->rq->sched, struct amdgpu_ring, sched);
 
 	WARN_ON(ib->length_dw == 0);
@@ -104,15 +106,16 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 	if (r)
 		goto error;
 
-	if (p->direct) {
-		tmp = dma_fence_get(f);
-		swap(p->vm->last_direct, tmp);
+	if (p->unlocked) {
+		struct dma_fence *tmp = dma_fence_get(f);
+
+		swap(p->vm->last_unlocked, f);
 		dma_fence_put(tmp);
 	} else {
-		dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv, f);
+		amdgpu_bo_fence(p->vm->root.base.bo, f, true);
 	}
 
-	if (fence && !p->direct)
+	if (fence && !p->immediate)
 		swap(*fence, f);
 	dma_fence_put(f);
 	return 0;
@@ -142,7 +145,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
 	src += p->num_dw_left * 4;
 
 	pe += amdgpu_gmc_sign_extend(bo->tbo.offset);
-	trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct);
+	trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate);
 
 	amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
 }
@@ -169,7 +172,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
 	struct amdgpu_ib *ib = p->job->ibs;
 
 	pe += amdgpu_gmc_sign_extend(bo->tbo.offset);
-	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct);
+	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
 	if (count < 3) {
 		amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
 				    count, incr);
@@ -198,6 +201,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
 				 uint64_t addr, unsigned count, uint32_t incr,
 				 uint64_t flags)
 {
+	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
+		: AMDGPU_IB_POOL_DELAYED;
 	unsigned int i, ndw, nptes;
 	uint64_t *pte;
 	int r;
@@ -223,7 +228,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
 			ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW);
 			ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
 
-			r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job);
+			r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool,
+						     &p->job);
 			if (r)
 				return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 82a3299e53c0..d399e5893170 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -22,6 +22,7 @@
  * Authors: Christian König
  */
 
+#include <linux/dma-mapping.h>
 #include "amdgpu.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_atomfirmware.h"
@@ -148,6 +149,15 @@ static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO,
 static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO,
 		   amdgpu_mem_info_vram_vendor, NULL);
 
+static const struct attribute *amdgpu_vram_mgr_attributes[] = {
+	&dev_attr_mem_info_vram_total.attr,
+	&dev_attr_mem_info_vis_vram_total.attr,
+	&dev_attr_mem_info_vram_used.attr,
+	&dev_attr_mem_info_vis_vram_used.attr,
+	&dev_attr_mem_info_vram_vendor.attr,
+	NULL
+};
+
 /**
  * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
  *
@@ -172,31 +182,9 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man,
 	man->priv = mgr;
 
 	/* Add the two VRAM-related sysfs files */
-	ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_total);
-	if (ret) {
-		DRM_ERROR("Failed to create device file mem_info_vram_total\n");
-		return ret;
-	}
-	ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_total);
-	if (ret) {
-		DRM_ERROR("Failed to create device file mem_info_vis_vram_total\n");
-		return ret;
-	}
-	ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_used);
-	if (ret) {
-		DRM_ERROR("Failed to create device file mem_info_vram_used\n");
-		return ret;
-	}
-	ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_used);
-	if (ret) {
-		DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n");
-		return ret;
-	}
-	ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor);
-	if (ret) {
-		DRM_ERROR("Failed to create device file mem_info_vram_vendor\n");
-		return ret;
-	}
+	ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
+	if (ret)
+		DRM_ERROR("Failed to register sysfs\n");
 
 	return 0;
 }
@@ -219,11 +207,7 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man)
 	spin_unlock(&mgr->lock);
 	kfree(mgr);
 	man->priv = NULL;
-	device_remove_file(adev->dev, &dev_attr_mem_info_vram_total);
-	device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total);
-	device_remove_file(adev->dev, &dev_attr_mem_info_vram_used);
-	device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used);
-	device_remove_file(adev->dev, &dev_attr_mem_info_vram_vendor);
+	sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
 	return 0;
 }
 
@@ -459,6 +443,104 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
 }
 
 /**
+ * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
+ *
+ * @adev: amdgpu device pointer
+ * @mem: TTM memory object
+ * @dev: the other device
+ * @dir: dma direction
+ * @sgt: resulting sg table
+ *
+ * Allocate and fill a sg table from a VRAM allocation.
+ */
+int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
+			      struct ttm_mem_reg *mem,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt)
+{
+	struct drm_mm_node *node;
+	struct scatterlist *sg;
+	int num_entries = 0;
+	unsigned int pages;
+	int i, r;
+
+	*sgt = kmalloc(sizeof(*sg), GFP_KERNEL);
+	if (!*sgt)
+		return -ENOMEM;
+
+	for (pages = mem->num_pages, node = mem->mm_node;
+	     pages; pages -= node->size, ++node)
+		++num_entries;
+
+	r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
+	if (r)
+		goto error_free;
+
+	for_each_sg((*sgt)->sgl, sg, num_entries, i)
+		sg->length = 0;
+
+	node = mem->mm_node;
+	for_each_sg((*sgt)->sgl, sg, num_entries, i) {
+		phys_addr_t phys = (node->start << PAGE_SHIFT) +
+			adev->gmc.aper_base;
+		size_t size = node->size << PAGE_SHIFT;
+		dma_addr_t addr;
+
+		++node;
+		addr = dma_map_resource(dev, phys, size, dir,
+					DMA_ATTR_SKIP_CPU_SYNC);
+		r = dma_mapping_error(dev, addr);
+		if (r)
+			goto error_unmap;
+
+		sg_set_page(sg, NULL, size, 0);
+		sg_dma_address(sg) = addr;
+		sg_dma_len(sg) = size;
+	}
+	return 0;
+
+error_unmap:
+	for_each_sg((*sgt)->sgl, sg, num_entries, i) {
+		if (!sg->length)
+			continue;
+
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	}
+	sg_free_table(*sgt);
+
+error_free:
+	kfree(*sgt);
+	return r;
+}
+
+/**
+ * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
+ *
+ * @adev: amdgpu device pointer
+ * @sgt: sg table to free
+ *
+ * Free a previously allocate sg table.
+ */
+void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table *sgt)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i)
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
+}
+
+/**
  * amdgpu_vram_mgr_usage - how many bytes are used in this domain
  *
  * @man: TTM memory type manager
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 95b3327168ac..91837a991319 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -325,9 +325,18 @@ success:
 static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
 					  struct amdgpu_hive_info *hive)
 {
+	char node[10];
+	memset(node, 0, sizeof(node));
+
 	device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
-	sysfs_remove_link(&adev->dev->kobj, adev->ddev->unique);
-	sysfs_remove_link(hive->kobj, adev->ddev->unique);
+	device_remove_file(adev->dev, &dev_attr_xgmi_error);
+
+	if (adev != hive->adev)
+		sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
+
+	sprintf(node, "node%d", hive->number_devices);
+	sysfs_remove_link(hive->kobj, node);
+
 }
 
 
@@ -373,7 +382,13 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
 
 	if (lock)
 		mutex_lock(&tmp->hive_lock);
-	tmp->pstate = -1;
+	tmp->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
+	tmp->hi_req_gpu = NULL;
+	/*
+	 * hive pstate on boot is high in vega20 so we have to go to low
+	 * pstate on after boot.
+	 */
+	tmp->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE;
 	mutex_unlock(&xgmi_mutex);
 
 	return tmp;
@@ -383,56 +398,59 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
 {
 	int ret = 0;
 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
-	struct amdgpu_device *tmp_adev;
-	bool update_hive_pstate = true;
-	bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20;
+	struct amdgpu_device *request_adev = hive->hi_req_gpu ?
+						hive->hi_req_gpu : adev;
+	bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
+	bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
 
-	if (!hive)
+	/* fw bug so temporarily disable pstate switching */
+	return 0;
+
+	if (!hive || adev->asic_type != CHIP_VEGA20)
 		return 0;
 
 	mutex_lock(&hive->hive_lock);
 
-	if (hive->pstate == pstate) {
-		adev->pstate = is_high_pstate ? pstate : adev->pstate;
+	if (is_hi_req)
+		hive->hi_req_count++;
+	else
+		hive->hi_req_count--;
+
+	/*
+	 * Vega20 only needs single peer to request pstate high for the hive to
+	 * go high but all peers must request pstate low for the hive to go low
+	 */
+	if (hive->pstate == pstate ||
+			(!is_hi_req && hive->hi_req_count && !init_low))
 		goto out;
-	}
 
-	dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
+	dev_dbg(request_adev->dev, "Set xgmi pstate %d.\n", pstate);
 
-	ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate);
+	ret = amdgpu_dpm_set_xgmi_pstate(request_adev, pstate);
 	if (ret) {
-		dev_err(adev->dev,
+		dev_err(request_adev->dev,
 			"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
-			adev->gmc.xgmi.node_id,
-			adev->gmc.xgmi.hive_id, ret);
+			request_adev->gmc.xgmi.node_id,
+			request_adev->gmc.xgmi.hive_id, ret);
 		goto out;
 	}
 
-	/* Update device pstate */
-	adev->pstate = pstate;
-
-	/*
-	 * Update the hive pstate only all devices of the hive
-	 * are in the same pstate
-	 */
-	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-		if (tmp_adev->pstate != adev->pstate) {
-			update_hive_pstate = false;
-			break;
-		}
-	}
-	if (update_hive_pstate || is_high_pstate)
+	if (init_low)
+		hive->pstate = hive->hi_req_count ?
+					hive->pstate : AMDGPU_XGMI_PSTATE_MIN;
+	else {
 		hive->pstate = pstate;
-
+		hive->hi_req_gpu = pstate != AMDGPU_XGMI_PSTATE_MIN ?
+							adev : NULL;
+	}
 out:
 	mutex_unlock(&hive->hive_lock);
-
 	return ret;
 }
 
 int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
 {
-	int ret = -EINVAL;
+	int ret;
 
 	/* Each psp need to set the latest topology */
 	ret = psp_xgmi_set_topology_info(&adev->psp,
@@ -507,9 +525,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 		goto exit;
 	}
 
-	/* Set default device pstate */
-	adev->pstate = -1;
-
 	top_info = &adev->psp.xgmi_context.top_info;
 
 	list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
@@ -577,14 +592,14 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
 	if (!hive)
 		return -EINVAL;
 
-	if (!(hive->number_devices--)) {
+	task_barrier_rem_task(&hive->tb);
+	amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
+	mutex_unlock(&hive->hive_lock);
+
+	if(!(--hive->number_devices)){
 		amdgpu_xgmi_sysfs_destroy(adev, hive);
 		mutex_destroy(&hive->hive_lock);
 		mutex_destroy(&hive->reset_lock);
-	} else {
-		task_barrier_rem_task(&hive->tb);
-		amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
-		mutex_unlock(&hive->hive_lock);
 	}
 
 	return psp_xgmi_terminate(&adev->psp);
@@ -604,6 +619,8 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
 	    adev->gmc.xgmi.num_physical_nodes == 0)
 		return 0;
 
+	amdgpu_xgmi_reset_ras_error_count(adev);
+
 	if (!adev->gmc.xgmi.ras_if) {
 		adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
 		if (!adev->gmc.xgmi.ras_if)
@@ -641,31 +658,34 @@ void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
 uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
 					   uint64_t addr)
 {
-	uint32_t df_inst_id;
-	uint64_t dram_base_addr = 0;
-	const struct amdgpu_df_funcs *df_funcs = adev->df.funcs;
-
-	if ((!df_funcs)                 ||
-	    (!df_funcs->get_df_inst_id) ||
-	    (!df_funcs->get_dram_base_addr)) {
-		dev_warn(adev->dev,
-			 "XGMI: relative phy_addr algorithm is not supported\n");
-		return addr;
-	}
-
-	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) {
-		dev_warn(adev->dev,
-			 "failed to disable DF-Cstate, DF register may not be accessible\n");
-		return addr;
-	}
+	struct amdgpu_xgmi *xgmi = &adev->gmc.xgmi;
+	return (addr + xgmi->physical_node_id * xgmi->node_segment_size);
+}
 
-	df_inst_id = df_funcs->get_df_inst_id(adev);
-	dram_base_addr = df_funcs->get_dram_base_addr(adev, df_inst_id);
+static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg)
+{
+	WREG32_PCIE(pcs_status_reg, 0xFFFFFFFF);
+	WREG32_PCIE(pcs_status_reg, 0);
+}
 
-	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
-		dev_warn(adev->dev, "failed to enable DF-Cstate\n");
+void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+{
+	uint32_t i;
 
-	return addr + dram_base_addr;
+	switch (adev->asic_type) {
+	case CHIP_ARCTURUS:
+		for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct); i++)
+			pcs_clear_status(adev,
+					 xgmi_pcs_err_status_reg_arct[i]);
+		break;
+	case CHIP_VEGA20:
+		for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++)
+			pcs_clear_status(adev,
+					 xgmi_pcs_err_status_reg_vg20[i]);
+		break;
+	default:
+		break;
+	}
 }
 
 static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
@@ -758,6 +778,8 @@ int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
 		break;
 	}
 
+	amdgpu_xgmi_reset_ras_error_count(adev);
+
 	err_data->ue_count += ue_cnt;
 	err_data->ce_count += ce_cnt;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 4a92067fe595..6999eab16a72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -25,6 +25,7 @@
 #include <drm/task_barrier.h>
 #include "amdgpu_psp.h"
 
+
 struct amdgpu_hive_info {
 	uint64_t		hive_id;
 	struct list_head	device_list;
@@ -33,8 +34,14 @@ struct amdgpu_hive_info {
 	struct kobject *kobj;
 	struct device_attribute dev_attr;
 	struct amdgpu_device *adev;
-	int pstate; /*0 -- low , 1 -- high , -1 unknown*/
+	int hi_req_count;
+	struct amdgpu_device *hi_req_gpu;
 	struct task_barrier tb;
+	enum {
+		AMDGPU_XGMI_PSTATE_MIN,
+		AMDGPU_XGMI_PSTATE_MAX_VEGA20,
+		AMDGPU_XGMI_PSTATE_UNKNOWN
+	} pstate;
 };
 
 struct amdgpu_pcs_ras_field {
@@ -56,6 +63,7 @@ uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
 					   uint64_t addr);
 int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
 				      void *ras_error_status);
+void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev);
 
 static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
 		struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index cae426c7c086..4cfc786699c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -54,6 +54,8 @@
 #define PLL_INDEX	2
 #define PLL_DATA	3
 
+#define ATOM_CMD_TIMEOUT_SEC	20
+
 typedef struct {
 	struct atom_context *ctx;
 	uint32_t *ps, *ws;
@@ -744,8 +746,9 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
 			cjiffies = jiffies;
 			if (time_after(cjiffies, ctx->last_jump_jiffies)) {
 				cjiffies -= ctx->last_jump_jiffies;
-				if ((jiffies_to_msecs(cjiffies) > 10000)) {
-					DRM_ERROR("atombios stuck in loop for more than 10secs aborting\n");
+				if ((jiffies_to_msecs(cjiffies) > ATOM_CMD_TIMEOUT_SEC*1000)) {
+					DRM_ERROR("atombios stuck in loop for more than %dsecs aborting\n",
+						  ATOM_CMD_TIMEOUT_SEC);
 					ctx->abort = true;
 				}
 			} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 62635e58e45e..fe306d0f73f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1809,12 +1809,6 @@ static uint32_t cik_get_rev_id(struct amdgpu_device *adev)
 		>> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT;
 }
 
-static void cik_detect_hw_virtualization(struct amdgpu_device *adev)
-{
-	if (is_virtual_machine()) /* passthrough mode */
-		adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
-}
-
 static void cik_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
 {
 	if (!ring || !ring->funcs->emit_wreg) {
@@ -2177,8 +2171,6 @@ static const struct amdgpu_ip_block_version cik_common_ip_block =
 
 int cik_set_ip_blocks(struct amdgpu_device *adev)
 {
-	cik_detect_hw_virtualization(adev);
-
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
 		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 580d3f93d670..20f108818b2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -320,8 +320,6 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0);
 	}
-	sdma0->sched.ready = false;
-	sdma1->sched.ready = false;
 }
 
 /**
@@ -679,7 +677,8 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err0;
 
@@ -980,7 +979,8 @@ static int cik_sdma_sw_init(void *handle)
 				     &adev->sdma.trap_irq,
 				     (i == 0) ?
 				     AMDGPU_SDMA_IRQ_INSTANCE0 :
-				     AMDGPU_SDMA_IRQ_INSTANCE1);
+				     AMDGPU_SDMA_IRQ_INSTANCE1,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -1313,7 +1313,8 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
 static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
 				      uint64_t src_offset,
 				      uint64_t dst_offset,
-				      uint32_t byte_count)
+				      uint32_t byte_count,
+				      bool tmz)
 {
 	ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
 	ib->ptr[ib->length_dw++] = byte_count;
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index cee6e8a3ad9c..5f3f6ebfb387 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -450,7 +450,7 @@
 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
-#define	PACKET3_AQUIRE_MEM				0x58
+#define	PACKET3_ACQUIRE_MEM				0x58
 #define	PACKET3_REWIND					0x59
 #define	PACKET3_LOAD_UCONFIG_REG			0x5E
 #define	PACKET3_LOAD_SH_REG				0x5F
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 2512e7ebfedf..e38744d06f4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2303,9 +2303,9 @@ static void dce_v10_0_hide_cursor(struct drm_crtc *crtc)
 	struct amdgpu_device *adev = crtc->dev->dev_private;
 	u32 tmp;
 
-	tmp = RREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
 	tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0);
-	WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
 }
 
 static void dce_v10_0_show_cursor(struct drm_crtc *crtc)
@@ -2319,10 +2319,10 @@ static void dce_v10_0_show_cursor(struct drm_crtc *crtc)
 	WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
 	       lower_32_bits(amdgpu_crtc->cursor_addr));
 
-	tmp = RREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
 	tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
 	tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
-	WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
 }
 
 static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 0dde22db9848..2584ff74423b 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2382,9 +2382,9 @@ static void dce_v11_0_hide_cursor(struct drm_crtc *crtc)
 	struct amdgpu_device *adev = crtc->dev->dev_private;
 	u32 tmp;
 
-	tmp = RREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
 	tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0);
-	WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
 }
 
 static void dce_v11_0_show_cursor(struct drm_crtc *crtc)
@@ -2398,10 +2398,10 @@ static void dce_v11_0_show_cursor(struct drm_crtc *crtc)
 	WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
 	       lower_32_bits(amdgpu_crtc->cursor_addr));
 
-	tmp = RREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
 	tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
 	tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
-	WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
 }
 
 static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 84219534bd38..d05c39f9ae40 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -2194,9 +2194,9 @@ static void dce_v6_0_hide_cursor(struct drm_crtc *crtc)
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct amdgpu_device *adev = crtc->dev->dev_private;
 
-	WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
-		   (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
-		   (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+	       (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+	       (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
 
 
 }
@@ -2211,10 +2211,10 @@ static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
 	WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
 	       lower_32_bits(amdgpu_crtc->cursor_addr));
 
-	WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
-		   CUR_CONTROL__CURSOR_EN_MASK |
-		   (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
-		   (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+	       CUR_CONTROL__CURSOR_EN_MASK |
+	       (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+	       (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
 
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 3a640702d7d1..ad0f8adb6a2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2205,9 +2205,9 @@ static void dce_v8_0_hide_cursor(struct drm_crtc *crtc)
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct amdgpu_device *adev = crtc->dev->dev_private;
 
-	WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
-		   (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
-		   (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+	       (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+	       (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
 }
 
 static void dce_v8_0_show_cursor(struct drm_crtc *crtc)
@@ -2220,10 +2220,10 @@ static void dce_v8_0_show_cursor(struct drm_crtc *crtc)
 	WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
 	       lower_32_bits(amdgpu_crtc->cursor_addr));
 
-	WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
-		   CUR_CONTROL__CURSOR_EN_MASK |
-		   (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
-		   (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+	       CUR_CONTROL__CURSOR_EN_MASK |
+	       (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+	       (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
 }
 
 static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 13e12be667fc..d5ff7b6331ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -172,8 +172,9 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 
-	dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+	drm_crtc_vblank_off(crtc);
 
+	amdgpu_crtc->enabled = false;
 	amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
 	amdgpu_crtc->encoder = NULL;
 	amdgpu_crtc->connector = NULL;
@@ -286,7 +287,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector)
 	static const struct mode_size {
 		int w;
 		int h;
-	} common_modes[17] = {
+	} common_modes[21] = {
 		{ 640,  480},
 		{ 720,  480},
 		{ 800,  600},
@@ -303,10 +304,14 @@ static int dce_virtual_get_modes(struct drm_connector *connector)
 		{1680, 1050},
 		{1600, 1200},
 		{1920, 1080},
-		{1920, 1200}
+		{1920, 1200},
+		{4096, 3112},
+		{3656, 2664},
+		{3840, 2160},
+		{4096, 2160},
 	};
 
-	for (i = 0; i < 17; i++) {
+	for (i = 0; i < 21; i++) {
 		mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
 		drm_mode_probed_add(connector, mode);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 5a1bd8ed1a6c..a7b8292cefee 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -686,58 +686,6 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
 	}
 }
 
-static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev,
-					   uint32_t df_inst)
-{
-	uint32_t base_addr_reg_val 	= 0;
-	uint64_t base_addr	 	= 0;
-
-	base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 +
-					df_inst * DF_3_6_SMN_REG_INST_DIST);
-
-	if (REG_GET_FIELD(base_addr_reg_val,
-			  DF_CS_UMC_AON0_DramBaseAddress0,
-			  AddrRngVal) == 0) {
-		DRM_WARN("address range not valid");
-		return 0;
-	}
-
-	base_addr = REG_GET_FIELD(base_addr_reg_val,
-				  DF_CS_UMC_AON0_DramBaseAddress0,
-				  DramBaseAddr);
-
-	return base_addr << 28;
-}
-
-static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev)
-{
-	uint32_t xgmi_node_id	= 0;
-	uint32_t df_inst_id 	= 0;
-
-	/* Walk through DF dst nodes to find current XGMI node */
-	for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) {
-
-		xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 +
-					   df_inst_id * DF_3_6_SMN_REG_INST_DIST);
-		xgmi_node_id = REG_GET_FIELD(xgmi_node_id,
-					     DF_CS_UMC_AON0_DramLimitAddress0,
-					     DstFabricID);
-
-		/* TODO: establish reason dest fabric id is offset by 7 */
-		xgmi_node_id = xgmi_node_id >> 7;
-
-		if (adev->gmc.xgmi.physical_node_id == xgmi_node_id)
-			break;
-	}
-
-	if (df_inst_id == DF_3_6_INST_CNT) {
-		DRM_WARN("cant match df dst id with gpu node");
-		return 0;
-	}
-
-	return df_inst_id;
-}
-
 const struct amdgpu_df_funcs df_v3_6_funcs = {
 	.sw_init = df_v3_6_sw_init,
 	.sw_fini = df_v3_6_sw_fini,
@@ -752,6 +700,4 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
 	.pmc_get_count = df_v3_6_pmc_get_count,
 	.get_fica = df_v3_6_get_fica,
 	.set_fica = df_v3_6_set_fica,
-	.get_dram_base_addr = df_v3_6_get_dram_base_addr,
-	.get_df_inst_id = df_v3_6_get_df_inst_id
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 0e0daf0021b6..bd5dd4f64311 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -138,6 +138,1062 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
 	/* Pending on emulation bring up */
 };
 
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] =
+{
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_SAMPLE_SKEW, 0x000000FF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_MUXSEL_SKEW, 0x000000FF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_DESER_START_SKEW, 0x000000FF, 0x33),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
+};
+
 static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
 {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
@@ -272,11 +1328,1691 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
 	/* Pending on emulation bring up */
 };
 
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] =
+{
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000L, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1a0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1a4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1b0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1b4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1a8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1ac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1b8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1bc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1cc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_SAMPLE_SKEW, 0x000000FF, 0x26),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_MUXSEL_SKEW, 0x000000FF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x25),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_DESER_START_SKEW, 0x000000FF, 0x3b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
+};
+
 static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] =
 {
 	/* Pending on emulation bring up */
 };
 
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] =
+{
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000L, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_SAMPLE_SKEW, 0x000000FF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_MUXSEL_SKEW, 0x000000FF, 0x22),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x1),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_DESER_START_SKEW, 0x000000FF, 0x35),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
+};
+
 #define DEFAULT_SH_MEM_CONFIG \
 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@@ -301,7 +3037,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
 
 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
 {
@@ -431,6 +3167,9 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
 		soc15_program_register_sequence(adev,
 						golden_settings_gc_10_0_nv10,
 						(const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10));
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_rlc_spm_10_0_nv10,
+						(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_0_nv10));
 		break;
 	case CHIP_NAVI14:
 		soc15_program_register_sequence(adev,
@@ -439,6 +3178,9 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
 		soc15_program_register_sequence(adev,
 						golden_settings_gc_10_1_nv14,
 						(const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14));
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_rlc_spm_10_1_nv14,
+						(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_nv14));
 		break;
 	case CHIP_NAVI12:
 		soc15_program_register_sequence(adev,
@@ -447,6 +3189,9 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
 		soc15_program_register_sequence(adev,
 						golden_settings_gc_10_1_2_nv12,
 						(const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12));
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_rlc_spm_10_1_2_nv12,
+						(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_2_nv12));
 		break;
 	default:
 		break;
@@ -557,7 +3302,8 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16, &ib);
+	r = amdgpu_ib_get(adev, NULL, 16,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
@@ -1298,7 +4044,8 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
 
 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
 	r = amdgpu_ring_init(adev, ring, 1024,
-			     &adev->gfx.eop_irq, irq_type);
+			     &adev->gfx.eop_irq, irq_type,
+			     AMDGPU_RING_PRIO_DEFAULT);
 	if (r)
 		return r;
 	return 0;
@@ -1309,7 +4056,8 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
 {
 	int r;
 	unsigned irq_type;
-	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+	struct amdgpu_ring *ring;
+	unsigned int hw_prio;
 
 	ring = &adev->gfx.compute_ring[ring_id];
 
@@ -1328,10 +4076,11 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
 		+ ring->pipe;
-
+	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
+			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
 	/* type-2 packets are deprecated on MEC, use type-3 instead */
 	r = amdgpu_ring_init(adev, ring, 1024,
-			     &adev->gfx.eop_irq, irq_type);
+			     &adev->gfx.eop_irq, irq_type, hw_prio);
 	if (r)
 		return r;
 
@@ -1829,9 +4578,9 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
 
 	/* csib */
 	WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
-		     adev->gfx.rlc.clear_state_gpu_addr >> 32);
+			 adev->gfx.rlc.clear_state_gpu_addr >> 32);
 	WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
-		     adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+			 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
 	WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
 
 	return 0;
@@ -2441,10 +5190,6 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
-	if (!enable) {
-		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			adev->gfx.gfx_ring[i].sched.ready = false;
-	}
 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
@@ -2923,16 +5668,12 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
 
 static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 {
-	int i;
-
 	if (enable) {
 		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
 	} else {
 		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
 			     (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
 			      CP_MEC_CNTL__MEC_ME2_HALT_MASK));
-		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			adev->gfx.compute_ring[i].sched.ready = false;
 		adev->gfx.kiq.ring.sched.ready = false;
 	}
 	udelay(50);
@@ -3268,11 +6009,8 @@ static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct
 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
-			ring->has_high_prio = true;
 			mqd->cp_hqd_queue_priority =
 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
-		} else {
-			ring->has_high_prio = false;
 		}
 	}
 }
@@ -3802,14 +6540,16 @@ static int gfx_v10_0_hw_init(void *handle)
 		 * loaded firstly, so in direct type, it has to load smc ucode
 		 * here before rlc.
 		 */
-		r = smu_load_microcode(&adev->smu);
-		if (r)
-			return r;
+		if (adev->smu.ppt_funcs != NULL) {
+			r = smu_load_microcode(&adev->smu);
+			if (r)
+				return r;
 
-		r = smu_check_fw_status(&adev->smu);
-		if (r) {
-			pr_err("SMC firmware status is not correct\n");
-			return r;
+			r = smu_check_fw_status(&adev->smu);
+			if (r) {
+				pr_err("SMC firmware status is not correct\n");
+				return r;
+			}
 		}
 	}
 
@@ -4292,14 +7032,21 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
 
 static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
 {
-	u32 data;
+	u32 reg, data;
 
-	data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
+	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
+	if (amdgpu_sriov_is_pp_one_vf(adev))
+		data = RREG32_NO_KIQ(reg);
+	else
+		data = RREG32(reg);
 
 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
 
-	WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+	if (amdgpu_sriov_is_pp_one_vf(adev))
+		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+	else
+		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
 }
 
 static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
@@ -4341,6 +7088,20 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
 	.reset = gfx_v10_0_rlc_reset,
 	.start = gfx_v10_0_rlc_start,
 	.update_spm_vmid = gfx_v10_0_update_spm_vmid,
+};
+
+static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
+	.is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
+	.set_safe_mode = gfx_v10_0_set_safe_mode,
+	.unset_safe_mode = gfx_v10_0_unset_safe_mode,
+	.init = gfx_v10_0_rlc_init,
+	.get_csb_size = gfx_v10_0_get_csb_size,
+	.get_csb_buffer = gfx_v10_0_get_csb_buffer,
+	.resume = gfx_v10_0_rlc_resume,
+	.stop = gfx_v10_0_rlc_stop,
+	.reset = gfx_v10_0_rlc_reset,
+	.start = gfx_v10_0_rlc_start,
+	.update_spm_vmid = gfx_v10_0_update_spm_vmid,
 	.rlcg_wreg = gfx_v10_rlcg_wreg,
 	.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
 };
@@ -4350,6 +7111,10 @@ static int gfx_v10_0_set_powergating_state(void *handle,
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	bool enable = (state == AMD_PG_STATE_GATE);
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	switch (adev->asic_type) {
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
@@ -4366,6 +7131,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	switch (adev->asic_type) {
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
@@ -4678,7 +7446,8 @@ static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0);
 }
 
-static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+					 uint32_t flags)
 {
 	uint32_t dw2 = 0;
 
@@ -4686,8 +7455,6 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flag
 		gfx_v10_0_ring_emit_ce_meta(ring,
 				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
 
-	gfx_v10_0_ring_emit_tmz(ring, true);
-
 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
 		/* set load_global_config & load_global_uconfig */
@@ -4844,16 +7611,19 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
 					   sizeof(de_payload) >> 2);
 }
 
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				    bool secure)
 {
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
-static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
+static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+				     uint32_t reg_val_offs)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
 	amdgpu_ring_write(ring, 0 |	/* src: register*/
@@ -4862,9 +7632,9 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 	amdgpu_ring_write(ring, reg);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
-				kiq->reg_val_offs * 4));
+				reg_val_offs * 4));
 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
-				kiq->reg_val_offs * 4));
+				reg_val_offs * 4));
 }
 
 static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -5250,6 +8020,29 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+	const unsigned int gcr_cntl =
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
 static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
 	.name = "gfx_v10_0",
 	.early_init = gfx_v10_0_early_init,
@@ -5297,7 +8090,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 		3 + /* CNTX_CTRL */
 		5 + /* HDP_INVL */
 		8 + 8 + /* FENCE x2 */
-		2, /* SWITCH_BUFFER */
+		2 + /* SWITCH_BUFFER */
+		8, /* gfx_v10_0_emit_mem_sync */
 	.emit_ib_size =	4, /* gfx_v10_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -5314,11 +8108,12 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
 	.preempt_ib = gfx_v10_0_ring_preempt_ib,
-	.emit_tmz = gfx_v10_0_ring_emit_tmz,
+	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 	.soft_recovery = gfx_v10_0_ring_soft_recovery,
+	.emit_mem_sync = gfx_v10_0_emit_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5338,7 +8133,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 		2 + /* gfx_v10_0_ring_emit_vm_flush */
-		8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
+		8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
+		8, /* gfx_v10_0_emit_mem_sync */
 	.emit_ib_size =	7, /* gfx_v10_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v10_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -5353,6 +8149,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+	.emit_mem_sync = gfx_v10_0_emit_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -5439,9 +8236,11 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
 	switch (adev->asic_type) {
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
-	case CHIP_NAVI12:
 		adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
 		break;
+	case CHIP_NAVI12:
+		adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs_sriov;
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 31f44d05e606..79c52c7a02e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1914,7 +1914,8 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 	WREG32(scratch, 0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
@@ -1950,7 +1951,6 @@ err1:
 
 static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 {
-	int i;
 	if (enable) {
 		WREG32(mmCP_ME_CNTL, 0);
 	} else {
@@ -1958,10 +1958,6 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 				      CP_ME_CNTL__PFP_HALT_MASK |
 				      CP_ME_CNTL__CE_HALT_MASK));
 		WREG32(mmSCRATCH_UMSK, 0);
-		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			adev->gfx.gfx_ring[i].sched.ready = false;
-		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			adev->gfx.compute_ring[i].sched.ready = false;
 	}
 	udelay(50);
 }
@@ -3114,7 +3110,9 @@ static int gfx_v6_0_sw_init(void *handle)
 		ring->ring_obj = NULL;
 		sprintf(ring->name, "gfx");
 		r = amdgpu_ring_init(adev, ring, 1024,
-				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
+				     &adev->gfx.eop_irq,
+				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -3136,7 +3134,8 @@ static int gfx_v6_0_sw_init(void *handle)
 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
 		r = amdgpu_ring_init(adev, ring, 1024,
-				     &adev->gfx.eop_irq, irq_type);
+				     &adev->gfx.eop_irq, irq_type,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -3466,6 +3465,18 @@ static int gfx_v6_0_set_powergating_state(void *handle,
 	return 0;
 }
 
+static void gfx_v6_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
 static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
 	.name = "gfx_v6_0",
 	.early_init = gfx_v6_0_early_init,
@@ -3496,7 +3507,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
 		14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
 		7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
 		SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
-		3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
+		3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */
+		5, /* SURFACE_SYNC */
 	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
 	.emit_ib = gfx_v6_0_ring_emit_ib,
 	.emit_fence = gfx_v6_0_ring_emit_fence,
@@ -3507,6 +3519,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
 	.emit_wreg = gfx_v6_0_ring_emit_wreg,
+	.emit_mem_sync = gfx_v6_0_emit_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
@@ -3520,7 +3533,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
 		5 + 5 + /* hdp flush / invalidate */
 		7 + /* gfx_v6_0_ring_emit_pipeline_sync */
 		SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */
-		14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
+		14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
+		5, /* SURFACE_SYNC */
 	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
 	.emit_ib = gfx_v6_0_ring_emit_ib,
 	.emit_fence = gfx_v6_0_ring_emit_fence,
@@ -3530,6 +3544,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
 	.test_ib = gfx_v6_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.emit_wreg = gfx_v6_0_ring_emit_wreg,
+	.emit_mem_sync = gfx_v6_0_emit_mem_sync,
 };
 
 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 733d398c61cc..0cc011f9190d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2364,7 +2364,8 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 	WREG32(scratch, 0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
@@ -2431,15 +2432,12 @@ err1:
  */
 static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 {
-	int i;
-
-	if (enable) {
+	if (enable)
 		WREG32(mmCP_ME_CNTL, 0);
-	} else {
-		WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
-		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			adev->gfx.gfx_ring[i].sched.ready = false;
-	}
+	else
+		WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK |
+				      CP_ME_CNTL__PFP_HALT_MASK |
+				      CP_ME_CNTL__CE_HALT_MASK));
 	udelay(50);
 }
 
@@ -2700,15 +2698,11 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
  */
 static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 {
-	int i;
-
-	if (enable) {
+	if (enable)
 		WREG32(mmCP_MEC_CNTL, 0);
-	} else {
-		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
-		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			adev->gfx.compute_ring[i].sched.ready = false;
-	}
+	else
+		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+				       CP_MEC_CNTL__MEC_ME2_HALT_MASK));
 	udelay(50);
 }
 
@@ -4439,7 +4433,8 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
 
 	/* type-2 packets are deprecated on MEC, use type-3 instead */
 	r = amdgpu_ring_init(adev, ring, 1024,
-			&adev->gfx.eop_irq, irq_type);
+			     &adev->gfx.eop_irq, irq_type,
+			     AMDGPU_RING_PRIO_DEFAULT);
 	if (r)
 		return r;
 
@@ -4511,7 +4506,9 @@ static int gfx_v7_0_sw_init(void *handle)
 		ring->ring_obj = NULL;
 		sprintf(ring->name, "gfx");
 		r = amdgpu_ring_init(adev, ring, 1024,
-				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
+				     &adev->gfx.eop_irq,
+				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -5001,6 +4998,32 @@ static int gfx_v7_0_set_powergating_state(void *handle,
 	return 0;
 }
 
+static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
+static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
+}
+
 static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
 	.name = "gfx_v7_0",
 	.early_init = gfx_v7_0_early_init,
@@ -5033,7 +5056,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 		12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
 		7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
 		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
-		3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+		3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+		5, /* SURFACE_SYNC */
 	.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
@@ -5048,6 +5072,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
 	.emit_wreg = gfx_v7_0_ring_emit_wreg,
 	.soft_recovery = gfx_v7_0_ring_soft_recovery,
+	.emit_mem_sync = gfx_v7_0_emit_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5064,7 +5089,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 		5 + /* hdp invalidate */
 		7 + /* gfx_v7_0_ring_emit_pipeline_sync */
 		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
-		7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+		7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+		7, /* gfx_v7_0_emit_mem_sync_compute */
 	.emit_ib_size =	7, /* gfx_v7_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v7_0_ring_emit_fence_compute,
@@ -5077,6 +5103,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_wreg = gfx_v7_0_ring_emit_wreg,
+	.emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
 };
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index fc32586ef80b..1d4128227ffd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -888,7 +888,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16, &ib);
+	r = amdgpu_ib_get(adev, NULL, 16,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
@@ -1550,7 +1551,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 
 	/* allocate an indirect buffer to put the commands in */
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
+	r = amdgpu_ib_get(adev, NULL, total_size,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 		return r;
@@ -1892,6 +1894,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
 	int r;
 	unsigned irq_type;
 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+	unsigned int hw_prio;
 
 	ring = &adev->gfx.compute_ring[ring_id];
 
@@ -1911,9 +1914,11 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
 		+ ring->pipe;
 
+	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
+			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
 	/* type-2 packets are deprecated on MEC, use type-3 instead */
 	r = amdgpu_ring_init(adev, ring, 1024,
-			&adev->gfx.eop_irq, irq_type);
+			     &adev->gfx.eop_irq, irq_type, hw_prio);
 	if (r)
 		return r;
 
@@ -2017,7 +2022,8 @@ static int gfx_v8_0_sw_init(void *handle)
 		}
 
 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
-				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
+				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -4120,7 +4126,6 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
 
 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 {
-	int i;
 	u32 tmp = RREG32(mmCP_ME_CNTL);
 
 	if (enable) {
@@ -4131,8 +4136,6 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
-		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			adev->gfx.gfx_ring[i].sched.ready = false;
 	}
 	WREG32(mmCP_ME_CNTL, tmp);
 	udelay(50);
@@ -4320,14 +4323,10 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
 
 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 {
-	int i;
-
 	if (enable) {
 		WREG32(mmCP_MEC_CNTL, 0);
 	} else {
 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
-		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			adev->gfx.compute_ring[i].sched.ready = false;
 		adev->gfx.kiq.ring.sched.ready = false;
 	}
 	udelay(50);
@@ -4437,11 +4436,8 @@ static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *m
 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
-			ring->has_high_prio = true;
 			mqd->cp_hqd_queue_priority =
 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
-		} else {
-			ring->has_high_prio = false;
 		}
 	}
 }
@@ -5619,12 +5615,18 @@ static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
 {
 	u32 data;
 
-	data = RREG32(mmRLC_SPM_VMID);
+	if (amdgpu_sriov_is_pp_one_vf(adev))
+		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
+	else
+		data = RREG32(mmRLC_SPM_VMID);
 
 	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
 	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
 
-	WREG32(mmRLC_SPM_VMID, data);
+	if (amdgpu_sriov_is_pp_one_vf(adev))
+		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
+	else
+		WREG32(mmRLC_SPM_VMID, data);
 }
 
 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
@@ -6387,10 +6389,10 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
 		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
 }
 
-static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
+static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+				    uint32_t reg_val_offs)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
 	amdgpu_ring_write(ring, 0 |	/* src: register*/
@@ -6399,9 +6401,9 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 	amdgpu_ring_write(ring, reg);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
-				kiq->reg_val_offs * 4));
+				reg_val_offs * 4));
 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
-				kiq->reg_val_offs * 4));
+				reg_val_offs * 4));
 }
 
 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -6815,6 +6817,34 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA |
+			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
+static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA |
+			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
+}
+
 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
 	.name = "gfx_v8_0",
 	.early_init = gfx_v8_0_early_init,
@@ -6861,7 +6891,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 		3 + /* CNTX_CTRL */
 		5 + /* HDP_INVL */
 		12 + 12 + /* FENCE x2 */
-		2, /* SWITCH_BUFFER */
+		2 + /* SWITCH_BUFFER */
+		5, /* SURFACE_SYNC */
 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
@@ -6879,6 +6910,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
 	.soft_recovery = gfx_v8_0_ring_soft_recovery,
+	.emit_mem_sync = gfx_v8_0_emit_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -6895,7 +6927,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 		5 + /* hdp_invalidate */
 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
-		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
+		7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
+		7, /* gfx_v8_0_emit_mem_sync_compute */
 	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
@@ -6908,6 +6941,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
+	.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d2d9dce68c2f..711e9dd19705 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -50,18 +50,14 @@
 
 #include "gfx_v9_4.h"
 
+#include "asic_reg/pwr/pwr_10_0_offset.h"
+#include "asic_reg/pwr/pwr_10_0_sh_mask.h"
+
 #define GFX9_NUM_GFX_RINGS     1
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
 
-#define mmPWR_MISC_CNTL_STATUS					0x0183
-#define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
-#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
-#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
-#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
-#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
-
 #define mmGCEA_PROBE_MAP                        0x070c
 #define mmGCEA_PROBE_MAP_BASE_IDX               0
 
@@ -511,8 +507,8 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
@@ -963,7 +959,7 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 	case CHIP_RAVEN:
 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
 						ARRAY_SIZE(golden_settings_gc_9_1));
-		if (adev->rev_id >= 8)
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			soc15_program_register_sequence(adev,
 							golden_settings_gc_9_1_rv2,
 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
@@ -1082,7 +1078,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16, &ib);
+	r = amdgpu_ib_get(adev, NULL, 16,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
@@ -1277,7 +1274,8 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
 	case CHIP_VEGA20:
 		break;
 	case CHIP_RAVEN:
-		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
+		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
+		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
 		    ((!is_raven_kicker(adev) &&
 		      adev->gfx.rlc_fw_version < 531) ||
 		     (adev->gfx.rlc_feature_version < 1) ||
@@ -1620,9 +1618,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 		chip_name = "vega20";
 		break;
 	case CHIP_RAVEN:
-		if (adev->rev_id >= 8)
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			chip_name = "raven2";
-		else if (adev->pdev->device == 0x15d8)
+		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
 			chip_name = "picasso";
 		else
 			chip_name = "raven";
@@ -2122,7 +2120,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
-		if (adev->rev_id >= 8)
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
 		else
 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
@@ -2199,6 +2197,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
 	int r;
 	unsigned irq_type;
 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+	unsigned int hw_prio;
 
 	ring = &adev->gfx.compute_ring[ring_id];
 
@@ -2217,10 +2216,11 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
 		+ ring->pipe;
-
+	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
+			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
 	/* type-2 packets are deprecated on MEC, use type-3 instead */
 	r = amdgpu_ring_init(adev, ring, 1024,
-			     &adev->gfx.eop_irq, irq_type);
+			     &adev->gfx.eop_irq, irq_type, hw_prio);
 	if (r)
 		return r;
 
@@ -2314,7 +2314,9 @@ static int gfx_v9_0_sw_init(void *handle)
 		ring->use_doorbell = true;
 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
 		r = amdgpu_ring_init(adev, ring, 1024,
-				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
+				     &adev->gfx.eop_irq,
+				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -2532,7 +2534,7 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
 		break;
 	default:
 		break;
-	};
+	}
 }
 
 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
@@ -2967,8 +2969,7 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
 	 */
 	if (adev->gfx.rlc.is_rlc_v2_1) {
 		if (adev->asic_type == CHIP_VEGA12 ||
-		    (adev->asic_type == CHIP_RAVEN &&
-		     adev->rev_id >= 8))
+		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
 			gfx_v9_1_init_rlc_save_restore_list(adev);
 		gfx_v9_0_enable_save_restore_machine(adev);
 	}
@@ -3104,16 +3105,11 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
 
 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 {
-	int i;
 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
 
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
-	if (!enable) {
-		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			adev->gfx.gfx_ring[i].sched.ready = false;
-	}
 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
 	udelay(50);
 }
@@ -3309,15 +3305,11 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
 
 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 {
-	int i;
-
 	if (enable) {
 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
 	} else {
 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
-		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			adev->gfx.compute_ring[i].sched.ready = false;
 		adev->gfx.kiq.ring.sched.ready = false;
 	}
 	udelay(50);
@@ -3387,11 +3379,8 @@ static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *m
 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
-			ring->has_high_prio = true;
 			mqd->cp_hqd_queue_priority =
 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
-		} else {
-			ring->has_high_prio = false;
 		}
 	}
 }
@@ -4058,13 +4047,18 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
 {
 	signed long r, cnt = 0;
 	unsigned long flags;
-	uint32_t seq;
+	uint32_t seq, reg_val_offs = 0;
+	uint64_t value = 0;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_ring *ring = &kiq->ring;
 
 	BUG_ON(!ring->funcs->emit_rreg);
 
 	spin_lock_irqsave(&kiq->ring_lock, flags);
+	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
+		pr_err("critical bug! too many kiq readers\n");
+		goto failed_unlock;
+	}
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
 	amdgpu_ring_write(ring, 9 |	/* src: register*/
@@ -4074,10 +4068,13 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
-				kiq->reg_val_offs * 4));
+				reg_val_offs * 4));
 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
-				kiq->reg_val_offs * 4));
-	amdgpu_fence_emit_polling(ring, &seq);
+				reg_val_offs * 4));
+	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+	if (r)
+		goto failed_undo;
+
 	amdgpu_ring_commit(ring);
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
@@ -4103,10 +4100,19 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
 	if (cnt > MAX_KIQ_REG_TRY)
 		goto failed_kiq_read;
 
-	return (uint64_t)adev->wb.wb[kiq->reg_val_offs] |
-		(uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL;
+	mb();
+	value = (uint64_t)adev->wb.wb[reg_val_offs] |
+		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
+	amdgpu_device_wb_free(adev, reg_val_offs);
+	return value;
 
+failed_undo:
+	amdgpu_ring_undo(ring);
+failed_unlock:
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 failed_kiq_read:
+	if (reg_val_offs)
+		amdgpu_device_wb_free(adev, reg_val_offs);
 	pr_err("failed to read gpu clock\n");
 	return ~0;
 }
@@ -4491,7 +4497,8 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 
 	/* allocate an indirect buffer to put the commands in */
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
+	r = amdgpu_ib_get(adev, NULL, total_size,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 		return r;
@@ -4962,14 +4969,21 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
 
 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
 {
-	u32 data;
+	u32 reg, data;
 
-	data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
+	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
+	if (amdgpu_sriov_is_pp_one_vf(adev))
+		data = RREG32_NO_KIQ(reg);
+	else
+		data = RREG32(reg);
 
 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
 
-	WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+	if (amdgpu_sriov_is_pp_one_vf(adev))
+		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+	else
+		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
 }
 
 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
@@ -5424,10 +5438,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
 }
 
-static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				   bool secure)
 {
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
@@ -5437,8 +5454,6 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
 	if (amdgpu_sriov_vf(ring->adev))
 		gfx_v9_0_ring_emit_ce_meta(ring);
 
-	gfx_v9_0_ring_emit_tmz(ring, true);
-
 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
 		/* set load_global_config & load_global_uconfig */
@@ -5489,10 +5504,10 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
 }
 
-static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
+static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+				    uint32_t reg_val_offs)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
 	amdgpu_ring_write(ring, 0 |	/* src: register*/
@@ -5501,9 +5516,9 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 	amdgpu_ring_write(ring, reg);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
-				kiq->reg_val_offs * 4));
+				reg_val_offs * 4));
 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
-				kiq->reg_val_offs * 4));
+				reg_val_offs * 4));
 }
 
 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -6404,15 +6419,15 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
 
 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
 		if (sec_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
-				 vml2_mems[i], sec_count);
+			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+				"SEC %d\n", i, vml2_mems[i], sec_count);
 			err_data->ce_count += sec_count;
 		}
 
 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
 		if (ded_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
-				 vml2_mems[i], ded_count);
+			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+				"DED %d\n", i, vml2_mems[i], ded_count);
 			err_data->ue_count += ded_count;
 		}
 	}
@@ -6424,16 +6439,16 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
 						SEC_COUNT);
 		if (sec_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
-				 vml2_walker_mems[i], sec_count);
+			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
 			err_data->ce_count += sec_count;
 		}
 
 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
 						DED_COUNT);
 		if (ded_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
-				 vml2_walker_mems[i], ded_count);
+			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+				"DED %d\n", i, vml2_walker_mems[i], ded_count);
 			err_data->ue_count += ded_count;
 		}
 	}
@@ -6444,8 +6459,9 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
 
 		sec_count = (data & 0x00006000L) >> 0xd;
 		if (sec_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
-				 atc_l2_cache_2m_mems[i], sec_count);
+			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
+				sec_count);
 			err_data->ce_count += sec_count;
 		}
 	}
@@ -6456,15 +6472,17 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
 
 		sec_count = (data & 0x00006000L) >> 0xd;
 		if (sec_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
-				 atc_l2_cache_4k_mems[i], sec_count);
+			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
+				sec_count);
 			err_data->ce_count += sec_count;
 		}
 
 		ded_count = (data & 0x00018000L) >> 0xf;
 		if (ded_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
-				 atc_l2_cache_4k_mems[i], ded_count);
+			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+				"DED %d\n", i, atc_l2_cache_4k_mems[i],
+				ded_count);
 			err_data->ue_count += ded_count;
 		}
 	}
@@ -6477,7 +6495,8 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
+static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
+	const struct soc15_reg_entry *reg,
 	uint32_t se_id, uint32_t inst_id, uint32_t value,
 	uint32_t *sec_count, uint32_t *ded_count)
 {
@@ -6494,7 +6513,8 @@ static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
 				gfx_v9_0_ras_fields[i].sec_count_shift;
 		if (sec_cnt) {
-			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
+			dev_info(adev->dev, "GFX SubBlock %s, "
+				"Instance[%d][%d], SEC %d\n",
 				gfx_v9_0_ras_fields[i].name,
 				se_id, inst_id,
 				sec_cnt);
@@ -6505,7 +6525,8 @@ static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
 				gfx_v9_0_ras_fields[i].ded_count_shift;
 		if (ded_cnt) {
-			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
+			dev_info(adev->dev, "GFX SubBlock %s, "
+				"Instance[%d][%d], DED %d\n",
 				gfx_v9_0_ras_fields[i].name,
 				se_id, inst_id,
 				ded_cnt);
@@ -6594,9 +6615,10 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 				reg_value =
 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
 				if (reg_value)
-					gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
-							j, k, reg_value,
-							&sec_count, &ded_count);
+					gfx_v9_0_ras_error_count(adev,
+						&gfx_v9_0_edc_counter_regs[i],
+						j, k, reg_value,
+						&sec_count, &ded_count);
 			}
 		}
 	}
@@ -6612,6 +6634,25 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+	const unsigned int cp_coher_cntl =
+			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
+			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
+			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
+			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
+			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
+
+	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+}
+
 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
 	.name = "gfx_v9_0",
 	.early_init = gfx_v9_0_early_init,
@@ -6658,7 +6699,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 		3 + /* CNTX_CTRL */
 		5 + /* HDP_INVL */
 		8 + 8 + /* FENCE x2 */
-		2, /* SWITCH_BUFFER */
+		2 + /* SWITCH_BUFFER */
+		7, /* gfx_v9_0_emit_mem_sync */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6674,11 +6716,12 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
-	.emit_tmz = gfx_v9_0_ring_emit_tmz,
+	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
+	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -6698,7 +6741,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 		2 + /* gfx_v9_0_ring_emit_vm_flush */
-		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
+		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
+		7, /* gfx_v9_0_emit_mem_sync */
 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6713,6 +6757,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -6836,7 +6881,7 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
 		adev->gds.gds_compute_max_wave_id = 0x27f;
 		break;
 	case CHIP_RAVEN:
-		if (adev->rev_id >= 0x8)
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
 		else
 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index dce945ef21a5..46351db36922 100644..100755
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -732,7 +732,8 @@ static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
 		sec_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL,
 					  SEC_COUNT);
 		if (sec_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+			dev_info(adev->dev,
+				 "Instance[%d]: SubBlock %s, SEC %d\n", i,
 				 vml2_walker_mems[i], sec_count);
 			err_data->ce_count += sec_count;
 		}
@@ -740,7 +741,8 @@ static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
 		ded_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL,
 					  DED_COUNT);
 		if (ded_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+			dev_info(adev->dev,
+				 "Instance[%d]: SubBlock %s, DED %d\n", i,
 				 vml2_walker_mems[i], ded_count);
 			err_data->ue_count += ded_count;
 		}
@@ -752,14 +754,16 @@ static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
 
 		sec_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, SEC_COUNT);
 		if (sec_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+			dev_info(adev->dev,
+				 "Instance[%d]: SubBlock %s, SEC %d\n", i,
 				 utcl2_router_mems[i], sec_count);
 			err_data->ce_count += sec_count;
 		}
 
 		ded_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, DED_COUNT);
 		if (ded_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+			dev_info(adev->dev,
+				 "Instance[%d]: SubBlock %s, DED %d\n", i,
 				 utcl2_router_mems[i], ded_count);
 			err_data->ue_count += ded_count;
 		}
@@ -772,7 +776,8 @@ static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
 		sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL,
 					  SEC_COUNT);
 		if (sec_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+			dev_info(adev->dev,
+				 "Instance[%d]: SubBlock %s, SEC %d\n", i,
 				 atc_l2_cache_2m_mems[i], sec_count);
 			err_data->ce_count += sec_count;
 		}
@@ -780,7 +785,8 @@ static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
 		ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL,
 					  DED_COUNT);
 		if (ded_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+			dev_info(adev->dev,
+				 "Instance[%d]: SubBlock %s, DED %d\n", i,
 				 atc_l2_cache_2m_mems[i], ded_count);
 			err_data->ue_count += ded_count;
 		}
@@ -793,7 +799,8 @@ static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
 		sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL,
 					  SEC_COUNT);
 		if (sec_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+			dev_info(adev->dev,
+				 "Instance[%d]: SubBlock %s, SEC %d\n", i,
 				 atc_l2_cache_4k_mems[i], sec_count);
 			err_data->ce_count += sec_count;
 		}
@@ -801,7 +808,8 @@ static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
 		ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL,
 					  DED_COUNT);
 		if (ded_count) {
-			DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+			dev_info(adev->dev,
+				 "Instance[%d]: SubBlock %s, DED %d\n", i,
 				 atc_l2_cache_4k_mems[i], ded_count);
 			err_data->ue_count += ded_count;
 		}
@@ -816,7 +824,8 @@ static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int gfx_v9_4_ras_error_count(const struct soc15_reg_entry *reg,
+static int gfx_v9_4_ras_error_count(struct amdgpu_device *adev,
+				    const struct soc15_reg_entry *reg,
 				    uint32_t se_id, uint32_t inst_id,
 				    uint32_t value, uint32_t *sec_count,
 				    uint32_t *ded_count)
@@ -833,7 +842,8 @@ static int gfx_v9_4_ras_error_count(const struct soc15_reg_entry *reg,
 		sec_cnt = (value & gfx_v9_4_ras_fields[i].sec_count_mask) >>
 			  gfx_v9_4_ras_fields[i].sec_count_shift;
 		if (sec_cnt) {
-			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
+			dev_info(adev->dev,
+				 "GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
 				 gfx_v9_4_ras_fields[i].name, se_id, inst_id,
 				 sec_cnt);
 			*sec_count += sec_cnt;
@@ -842,7 +852,8 @@ static int gfx_v9_4_ras_error_count(const struct soc15_reg_entry *reg,
 		ded_cnt = (value & gfx_v9_4_ras_fields[i].ded_count_mask) >>
 			  gfx_v9_4_ras_fields[i].ded_count_shift;
 		if (ded_cnt) {
-			DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
+			dev_info(adev->dev,
+				 "GFX SubBlock %s, Instance[%d][%d], DED %d\n",
 				 gfx_v9_4_ras_fields[i].name, se_id, inst_id,
 				 ded_cnt);
 			*ded_count += ded_cnt;
@@ -876,7 +887,7 @@ int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
 				reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
 					gfx_v9_4_edc_counter_regs[i]));
 				if (reg_value)
-					gfx_v9_4_ras_error_count(
+					gfx_v9_4_ras_error_count(adev,
 						&gfx_v9_4_edc_counter_regs[i],
 						j, k, reg_value, &sec_count,
 						&ded_count);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 1a2f18b908fe..6682b843bafe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -80,7 +80,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 			min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-		if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			/*
 			* Raven2 has a HW issue that it is unable to use the
 			* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9775eca6fe43..ba2b7ac0c02d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -170,6 +170,9 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
 			dev_err(adev->dev,
 				"GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
 				status);
+			dev_err(adev->dev, "\t Faulty UTCL2 client ID: 0x%lx\n",
+				REG_GET_FIELD(status,
+				GCVM_L2_PROTECTION_FAULT_STATUS, CID));
 			dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
 				REG_GET_FIELD(status,
 				GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
@@ -369,7 +372,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 * translation. Avoid this by doing the invalidation from the SDMA
 	 * itself.
 	 */
-	r = amdgpu_job_alloc_with_ib(adev, 16 * 4, &job);
+	r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+				     &job);
 	if (r)
 		goto error_alloc;
 
@@ -423,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
 		kiq->pmf->kiq_invalidate_tlbs(ring,
 					pasid, flush_type, all_hub);
-		amdgpu_fence_emit_polling(ring, &seq);
+		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+		if (r) {
+			amdgpu_ring_undo(ring);
+			spin_unlock(&adev->gfx.kiq.ring_lock);
+			return -ETIME;
+		}
+
 		amdgpu_ring_commit(ring);
 		spin_unlock(&adev->gfx.kiq.ring_lock);
 		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
@@ -676,17 +686,23 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
  */
 static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
 {
-	/* Could aper size report 0 ? */
-	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
-	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+	int r;
 
 	/* size in MB on si */
 	adev->gmc.mc_vram_size =
 		adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
 	adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
-	adev->gmc.visible_vram_size = adev->gmc.aper_size;
+
+	if (!(adev->flags & AMD_IS_APU)) {
+		r = amdgpu_device_resize_fb_bar(adev);
+		if (r)
+			return r;
+	}
+	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
 
 	/* In case the PCI BAR is larger than the actual amount of vram */
+	adev->gmc.visible_vram_size = adev->gmc.aper_size;
 	if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
 		adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index b205039350b6..a75e472b4a81 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -61,17 +61,6 @@ MODULE_FIRMWARE("amdgpu/si58_mc.bin");
 #define MC_SEQ_MISC0__MT__HBM    0x60000000
 #define MC_SEQ_MISC0__MT__DDR3   0xB0000000
 
-
-static const u32 crtc_offsets[6] =
-{
-	SI_CRTC0_REGISTER_OFFSET,
-	SI_CRTC1_REGISTER_OFFSET,
-	SI_CRTC2_REGISTER_OFFSET,
-	SI_CRTC3_REGISTER_OFFSET,
-	SI_CRTC4_REGISTER_OFFSET,
-	SI_CRTC5_REGISTER_OFFSET
-};
-
 static void gmc_v6_0_mc_stop(struct amdgpu_device *adev)
 {
 	u32 blackout;
@@ -858,7 +847,7 @@ static int gmc_v6_0_sw_init(void *handle)
 
 	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
 	if (r) {
-		dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
+		dev_warn(adev->dev, "No suitable DMA available.\n");
 		return r;
 	}
 	adev->need_swiotlb = drm_need_swiotlb(44);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 9da9596a3638..bcd4baecfe11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -762,6 +762,7 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
  * @adev: amdgpu_device pointer
  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+ * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
  *
  * Print human readable fault information (CIK).
  */
@@ -1019,7 +1020,7 @@ static int gmc_v7_0_sw_init(void *handle)
 
 	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));
 	if (r) {
-		pr_warn("amdgpu: No suitable DMA available\n");
+		pr_warn("No suitable DMA available\n");
 		return r;
 	}
 	adev->need_swiotlb = drm_need_swiotlb(40);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 27d83204fa2b..26976e50e2a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1005,6 +1005,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
  * @adev: amdgpu_device pointer
  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+ * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
  *
  * Print human readable fault information (VI).
  */
@@ -1144,7 +1145,7 @@ static int gmc_v8_0_sw_init(void *handle)
 
 	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));
 	if (r) {
-		pr_warn("amdgpu: No suitable DMA available\n");
+		pr_warn("No suitable DMA available\n");
 		return r;
 	}
 	adev->need_swiotlb = drm_need_swiotlb(40);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 8606f877478f..11e93a82131d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -362,6 +362,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 			dev_err(adev->dev,
 				"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
 				status);
+			dev_err(adev->dev, "\t Faulty UTCL2 client ID: 0x%lx\n",
+				REG_GET_FIELD(status,
+				VM_L2_PROTECTION_FAULT_STATUS, CID));
 			dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
 				REG_GET_FIELD(status,
 				VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
@@ -438,9 +441,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
 	return ((vmhub == AMDGPU_MMHUB_0 ||
 		 vmhub == AMDGPU_MMHUB_1) &&
 		(!amdgpu_sriov_vf(adev)) &&
-		(!(adev->asic_type == CHIP_RAVEN &&
-		   adev->rev_id < 0x8 &&
-		   adev->pdev->device == 0x15d8)));
+		(!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
+		   (adev->apu_flags & AMD_APU_IS_PICASSO))));
 }
 
 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
@@ -618,7 +620,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 						      pasid, 2, all_hub);
 		kiq->pmf->kiq_invalidate_tlbs(ring,
 					pasid, flush_type, all_hub);
-		amdgpu_fence_emit_polling(ring, &seq);
+		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+		if (r) {
+			amdgpu_ring_undo(ring);
+			spin_unlock(&adev->gfx.kiq.ring_lock);
+			return -ETIME;
+		}
+
 		amdgpu_ring_commit(ring);
 		spin_unlock(&adev->gfx.kiq.ring_lock);
 		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index 0debfd9f428c..b10c95cad9a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -480,7 +480,8 @@ int jpeg_v1_0_sw_init(void *handle)
 
 	ring = &adev->jpeg.inst->ring_dec;
 	sprintf(ring->name, "jpeg_dec");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0);
+	r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
+			     0, AMDGPU_RING_PRIO_DEFAULT);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 6173951db7b4..e67d09cb1b03 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -106,7 +106,8 @@ static int jpeg_v2_0_sw_init(void *handle)
 	ring->use_doorbell = true;
 	ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
 	sprintf(ring->name, "jpeg_dec");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0);
+	r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
+			     0, AMDGPU_RING_PRIO_DEFAULT);
 	if (r)
 		return r;
 
@@ -169,14 +170,11 @@ static int jpeg_v2_0_hw_init(void *handle)
 static int jpeg_v2_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
 
 	if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
 	      RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
 		jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 
-	ring->sched.ready = false;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index c04c2078a7c1..713c32560445 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -118,7 +118,8 @@ static int jpeg_v2_5_sw_init(void *handle)
 		ring->use_doorbell = true;
 		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
 		sprintf(ring->name, "jpeg_dec_%d", i);
-		r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, 0);
+		r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq,
+				     0, AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 
@@ -267,7 +268,6 @@ static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device* adev, int inst)
 	data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE);
 	data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
 		| JPEG_CGC_GATE__JPEG2_DEC_MASK
-		| JPEG_CGC_GATE__JPEG_ENC_MASK
 		| JPEG_CGC_GATE__JMCIF_MASK
 		| JPEG_CGC_GATE__JRBBM_MASK);
 	WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 396c2a624de0..405767208a4d 100644..100755
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -96,7 +96,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-	if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
+	if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 		/*
 		 * Raven2 has a HW issue that it is unable to use the vram which
 		 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -690,7 +690,8 @@ static const struct soc15_reg_entry mmhub_v1_0_edc_cnt_regs[] = {
    { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), 0, 0, 0},
 };
 
-static int mmhub_v1_0_get_ras_error_count(const struct soc15_reg_entry *reg,
+static int mmhub_v1_0_get_ras_error_count(struct amdgpu_device *adev,
+	const struct soc15_reg_entry *reg,
 	uint32_t value, uint32_t *sec_count, uint32_t *ded_count)
 {
 	uint32_t i;
@@ -704,7 +705,8 @@ static int mmhub_v1_0_get_ras_error_count(const struct soc15_reg_entry *reg,
 				mmhub_v1_0_ras_fields[i].sec_count_mask) >>
 				mmhub_v1_0_ras_fields[i].sec_count_shift;
 		if (sec_cnt) {
-			DRM_INFO("MMHUB SubBlock %s, SEC %d\n",
+			dev_info(adev->dev,
+				"MMHUB SubBlock %s, SEC %d\n",
 				mmhub_v1_0_ras_fields[i].name,
 				sec_cnt);
 			*sec_count += sec_cnt;
@@ -714,7 +716,8 @@ static int mmhub_v1_0_get_ras_error_count(const struct soc15_reg_entry *reg,
 				mmhub_v1_0_ras_fields[i].ded_count_mask) >>
 				mmhub_v1_0_ras_fields[i].ded_count_shift;
 		if (ded_cnt) {
-			DRM_INFO("MMHUB SubBlock %s, DED %d\n",
+			dev_info(adev->dev,
+				"MMHUB SubBlock %s, DED %d\n",
 				mmhub_v1_0_ras_fields[i].name,
 				ded_cnt);
 			*ded_count += ded_cnt;
@@ -739,7 +742,8 @@ static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev,
 		reg_value =
 			RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i]));
 		if (reg_value)
-			mmhub_v1_0_get_ras_error_count(&mmhub_v1_0_edc_cnt_regs[i],
+			mmhub_v1_0_get_ras_error_count(adev,
+				&mmhub_v1_0_edc_cnt_regs[i],
 				reg_value, &sec_count, &ded_count);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 37dbe0f2142f..83b453f5d717 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -26,7 +26,7 @@
 
 #define AI_MAILBOX_POLL_ACK_TIMEDOUT	500
 #define AI_MAILBOX_POLL_MSG_TIMEDOUT	12000
-#define AI_MAILBOX_POLL_FLR_TIMEDOUT	500
+#define AI_MAILBOX_POLL_FLR_TIMEDOUT	5000
 
 enum idh_request {
 	IDH_REQ_GPU_INIT_ACCESS = 1,
@@ -46,7 +46,8 @@ enum idh_event {
 	IDH_SUCCESS,
 	IDH_FAIL,
 	IDH_QUERY_ALIVE,
-	IDH_EVENT_MAX
+
+	IDH_TEXT_MESSAGE = 255,
 };
 
 extern const struct amdgpu_virt_ops xgpu_ai_virt_ops;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 237fa5e16b7c..ce2bf1fb79ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -30,7 +30,6 @@
 #include "navi10_ih.h"
 #include "soc15_common.h"
 #include "mxgpu_nv.h"
-#include "mxgpu_ai.h"
 
 static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev)
 {
@@ -53,8 +52,7 @@ static void xgpu_nv_mailbox_set_valid(struct amdgpu_device *adev, bool val)
  */
 static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
 {
-	return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-				mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0));
+	return RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
 }
 
 
@@ -63,8 +61,7 @@ static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
 {
 	u32 reg;
 
-	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-					     mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0));
+	reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
 	if (reg != event)
 		return -ENOENT;
 
@@ -110,7 +107,6 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 		timeout -= 10;
 	} while (timeout > 1);
 
-	pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
 
 	return -ETIME;
 }
@@ -118,7 +114,6 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
 	      enum idh_request req, u32 data1, u32 data2, u32 data3)
 {
-	u32 reg;
 	int r;
 	uint8_t trn;
 
@@ -137,19 +132,10 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
 		}
 	} while (trn);
 
-	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-					     mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0));
-	reg = REG_SET_FIELD(reg, BIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0,
-			    MSGBUF_DATA, req);
-	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0),
-		      reg);
-	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW1),
-				data1);
-	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW2),
-				data2);
-	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW3),
-				data3);
-
+	WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0, req);
+	WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW1, data1);
+	WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW2, data2);
+	WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW3, data3);
 	xgpu_nv_mailbox_set_valid(adev, true);
 
 	/* start to poll ack */
@@ -164,23 +150,48 @@ static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
 					enum idh_request req)
 {
 	int r;
+	enum idh_event event = -1;
 
 	xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
 
-	/* start to check msg if request is idh_req_gpu_init_access */
-	if (req == IDH_REQ_GPU_INIT_ACCESS ||
-		req == IDH_REQ_GPU_FINI_ACCESS ||
-		req == IDH_REQ_GPU_RESET_ACCESS) {
-		r = xgpu_nv_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
+	switch (req) {
+	case IDH_REQ_GPU_INIT_ACCESS:
+	case IDH_REQ_GPU_FINI_ACCESS:
+	case IDH_REQ_GPU_RESET_ACCESS:
+		event = IDH_READY_TO_ACCESS_GPU;
+		break;
+	case IDH_REQ_GPU_INIT_DATA:
+		event = IDH_REQ_GPU_INIT_DATA_READY;
+		break;
+	default:
+		break;
+	}
+
+	if (event != -1) {
+		r = xgpu_nv_poll_msg(adev, event);
 		if (r) {
-			pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
-			return r;
+			if (req != IDH_REQ_GPU_INIT_DATA) {
+				pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+				return r;
+			}
+			else /* host doesn't support REQ_GPU_INIT_DATA handshake */
+				adev->virt.req_init_data_ver = 0;
+		} else {
+			if (req == IDH_REQ_GPU_INIT_DATA)
+			{
+				adev->virt.req_init_data_ver =
+					RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
+
+				/* assume V1 in case host doesn't set version number */
+				if (adev->virt.req_init_data_ver < 1)
+					adev->virt.req_init_data_ver = 1;
+			}
 		}
+
 		/* Retrieve checksum from mailbox2 */
 		if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) {
 			adev->virt.fw_reserve.checksum_key =
-				RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
-					mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW2));
+				RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
 		}
 	}
 
@@ -213,6 +224,11 @@ static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
 	return r;
 }
 
+static int xgpu_nv_request_init_data(struct amdgpu_device *adev)
+{
+	return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+}
+
 static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					struct amdgpu_iv_entry *entry)
@@ -226,11 +242,14 @@ static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,
 					unsigned type,
 					enum amdgpu_interrupt_state state)
 {
-	u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL));
+	u32 tmp = RREG32_NO_KIQ(mmMAILBOX_INT_CNTL);
+
+	if (state == AMDGPU_IRQ_STATE_ENABLE)
+		tmp |= 2;
+	else
+		tmp &= ~2;
 
-	tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, ACK_INT_EN,
-				(state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
-	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp);
+	WREG32_NO_KIQ(mmMAILBOX_INT_CNTL, tmp);
 
 	return 0;
 }
@@ -282,11 +301,14 @@ static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
 				       unsigned type,
 				       enum amdgpu_interrupt_state state)
 {
-	u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL));
+	u32 tmp = RREG32_NO_KIQ(mmMAILBOX_INT_CNTL);
+
+	if (state == AMDGPU_IRQ_STATE_ENABLE)
+		tmp |= 1;
+	else
+		tmp &= ~1;
 
-	tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, VALID_INT_EN,
-			    (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
-	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp);
+	WREG32_NO_KIQ(mmMAILBOX_INT_CNTL, tmp);
 
 	return 0;
 }
@@ -378,6 +400,7 @@ void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
 const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
 	.req_full_gpu	= xgpu_nv_request_full_gpu_access,
 	.rel_full_gpu	= xgpu_nv_release_full_gpu_access,
+	.req_init_data  = xgpu_nv_request_init_data,
 	.reset_gpu = xgpu_nv_request_reset,
 	.wait_reset = NULL,
 	.trans_msg = xgpu_nv_mailbox_trans_msg,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 99b15f6865cb..52605e14a1a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -25,8 +25,32 @@
 #define __MXGPU_NV_H__
 
 #define NV_MAILBOX_POLL_ACK_TIMEDOUT	500
-#define NV_MAILBOX_POLL_MSG_TIMEDOUT	12000
-#define NV_MAILBOX_POLL_FLR_TIMEDOUT	500
+#define NV_MAILBOX_POLL_MSG_TIMEDOUT	6000
+#define NV_MAILBOX_POLL_FLR_TIMEDOUT	5000
+
+enum idh_request {
+	IDH_REQ_GPU_INIT_ACCESS = 1,
+	IDH_REL_GPU_INIT_ACCESS,
+	IDH_REQ_GPU_FINI_ACCESS,
+	IDH_REL_GPU_FINI_ACCESS,
+	IDH_REQ_GPU_RESET_ACCESS,
+	IDH_REQ_GPU_INIT_DATA,
+
+	IDH_LOG_VF_ERROR       = 200,
+};
+
+enum idh_event {
+	IDH_CLR_MSG_BUF	= 0,
+	IDH_READY_TO_ACCESS_GPU,
+	IDH_FLR_NOTIFICATION,
+	IDH_FLR_NOTIFICATION_CMPL,
+	IDH_SUCCESS,
+	IDH_FAIL,
+	IDH_QUERY_ALIVE,
+	IDH_REQ_GPU_INIT_DATA_READY,
+
+	IDH_TEXT_MESSAGE = 255,
+};
 
 extern const struct amdgpu_virt_ops xgpu_nv_virt_ops;
 
@@ -35,7 +59,21 @@ int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev);
 int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev);
 void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev);
 
-#define NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4)
-#define NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4 + 1)
+#define mmMAILBOX_CONTROL 0xE5E
+
+#define NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE (mmMAILBOX_CONTROL * 4)
+#define NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE (NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE + 1)
+
+#define mmMAILBOX_MSGBUF_TRN_DW0 0xE56
+#define mmMAILBOX_MSGBUF_TRN_DW1 0xE57
+#define mmMAILBOX_MSGBUF_TRN_DW2 0xE58
+#define mmMAILBOX_MSGBUF_TRN_DW3 0xE59
+
+#define mmMAILBOX_MSGBUF_RCV_DW0 0xE5A
+#define mmMAILBOX_MSGBUF_RCV_DW1 0xE5B
+#define mmMAILBOX_MSGBUF_RCV_DW2 0xE5C
+#define mmMAILBOX_MSGBUF_RCV_DW3 0xE5D
+
+#define mmMAILBOX_INT_CNTL 0xE5F
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
index f13dc6cc158f..713ee66a4d3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
@@ -43,7 +43,8 @@ enum idh_event {
 	IDH_READY_TO_ACCESS_GPU,
 	IDH_FLR_NOTIFICATION,
 	IDH_FLR_NOTIFICATION_CMPL,
-	IDH_EVENT_MAX
+
+	IDH_TEXT_MESSAGE = 255
 };
 
 extern const struct amdgpu_virt_ops xgpu_vi_virt_ops;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index e08245a446fc..f97857ed3c7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -49,8 +49,48 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
 
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
-	WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+	if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
+		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+			return;
+		}
+	} else {
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+	}
+
 	adev->irq.ih.enabled = true;
+
+	if (adev->irq.ih1.ring_size) {
+		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+					   RB_ENABLE, 1);
+		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+				return;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+		}
+		adev->irq.ih1.enabled = true;
+	}
+
+	if (adev->irq.ih2.ring_size) {
+		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
+					   RB_ENABLE, 1);
+		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+				return;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+		}
+		adev->irq.ih2.enabled = true;
+	}
 }
 
 /**
@@ -66,12 +106,61 @@ static void navi10_ih_disable_interrupts(struct amdgpu_device *adev)
 
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
-	WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+	if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
+		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+			return;
+		}
+	} else {
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+	}
+
 	/* set rptr, wptr to 0 */
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
 	adev->irq.ih.enabled = false;
 	adev->irq.ih.rptr = 0;
+
+	if (adev->irq.ih1.ring_size) {
+		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+					   RB_ENABLE, 0);
+		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+				return;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+		}
+		/* set rptr, wptr to 0 */
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
+		adev->irq.ih1.enabled = false;
+		adev->irq.ih1.rptr = 0;
+	}
+
+	if (adev->irq.ih2.ring_size) {
+		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
+					   RB_ENABLE, 0);
+		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+				return;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+		}
+		/* set rptr, wptr to 0 */
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
+		adev->irq.ih2.enabled = false;
+		adev->irq.ih2.rptr = 0;
+	}
+
 }
 
 static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
@@ -97,6 +186,43 @@ static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl
 	return ih_rb_cntl;
 }
 
+static uint32_t navi10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+	u32 ih_doorbell_rtpr = 0;
+
+	if (ih->use_doorbell) {
+		ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+						 IH_DOORBELL_RPTR, OFFSET,
+						 ih->doorbell_index);
+		ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+						 IH_DOORBELL_RPTR,
+						 ENABLE, 1);
+	} else {
+		ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+						 IH_DOORBELL_RPTR,
+						 ENABLE, 0);
+	}
+	return ih_doorbell_rtpr;
+}
+
+static void navi10_ih_reroute_ih(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
+
+	/* Reroute to IH ring 1 for VMC */
+	WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12);
+	tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA);
+	tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
+	tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
+	WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp);
+
+	/* Reroute IH ring 1 for UMC */
+	WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x1B);
+	tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA);
+	tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
+	WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp);
+}
+
 /**
  * navi10_ih_irq_init - init and enable the interrupt ring
  *
@@ -111,7 +237,7 @@ static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl
 static int navi10_ih_irq_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ih_ring *ih = &adev->irq.ih;
-	u32 ih_rb_cntl, ih_doorbell_rtpr, ih_chicken;
+	u32 ih_rb_cntl, ih_chicken;
 	u32 tmp;
 
 	/* disable irqs */
@@ -127,6 +253,15 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
 	ih_rb_cntl = navi10_ih_rb_cntl(ih, ih_rb_cntl);
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
 				   !!adev->irq.msi_enabled);
+	if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
+		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+			return -ETIMEDOUT;
+		}
+	} else {
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+	}
+	navi10_ih_reroute_ih(adev);
 
 	if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) {
 		if (ih->use_bus_addr) {
@@ -137,8 +272,6 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
 		}
 	}
 
-	WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
-
 	/* set the writeback address whether it's enabled or not */
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
 		     lower_32_bits(ih->wptr_addr));
@@ -149,22 +282,68 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
 
-	ih_doorbell_rtpr = RREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR);
-	if (ih->use_doorbell) {
-		ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
-						 IH_DOORBELL_RPTR, OFFSET,
-						 ih->doorbell_index);
-		ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
-						 IH_DOORBELL_RPTR, ENABLE, 1);
-	} else {
-		ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
-						 IH_DOORBELL_RPTR, ENABLE, 0);
-	}
-	WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr);
+	WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR,
+			navi10_ih_doorbell_rptr(ih));
 
 	adev->nbio.funcs->ih_doorbell_range(adev, ih->use_doorbell,
 					    ih->doorbell_index);
 
+	ih = &adev->irq.ih1;
+	if (ih->ring_size) {
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_RING1, ih->gpu_addr >> 8);
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING1,
+			     (ih->gpu_addr >> 40) & 0xff);
+
+		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
+		ih_rb_cntl = navi10_ih_rb_cntl(ih, ih_rb_cntl);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+					   WPTR_OVERFLOW_ENABLE, 0);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+					   RB_FULL_DRAIN_ENABLE, 1);
+		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+				return -ETIMEDOUT;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+		}
+		/* set rptr, wptr to 0 */
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
+
+		WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1,
+				navi10_ih_doorbell_rptr(ih));
+	}
+
+	ih = &adev->irq.ih2;
+	if (ih->ring_size) {
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_RING2, ih->gpu_addr >> 8);
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING2,
+			     (ih->gpu_addr >> 40) & 0xff);
+
+		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
+		ih_rb_cntl = navi10_ih_rb_cntl(ih, ih_rb_cntl);
+
+		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+				return -ETIMEDOUT;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+		}
+		/* set rptr, wptr to 0 */
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
+
+		WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2,
+			     navi10_ih_doorbell_rptr(ih));
+	}
+
+
 	tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
 	tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
 			    CLIENT18_IS_STORM_CLIENT, 1);
@@ -217,7 +396,15 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
 	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
 		goto out;
 
-	reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR);
+	if (ih == &adev->irq.ih)
+		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR);
+	else if (ih == &adev->irq.ih1)
+		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1);
+	else if (ih == &adev->irq.ih2)
+		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2);
+	else
+		BUG();
+
 	wptr = RREG32_NO_KIQ(reg);
 	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
 		goto out;
@@ -233,7 +420,15 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
 		 wptr, ih->rptr, tmp);
 	ih->rptr = tmp;
 
-	reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);
+	if (ih == &adev->irq.ih)
+		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);
+	else if (ih == &adev->irq.ih1)
+		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1);
+	else if (ih == &adev->irq.ih2)
+		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2);
+	else
+		BUG();
+
 	tmp = RREG32_NO_KIQ(reg);
 	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
 	WREG32_NO_KIQ(reg, tmp);
@@ -333,8 +528,52 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev,
 
 		if (amdgpu_sriov_vf(adev))
 			navi10_ih_irq_rearm(adev, ih);
-	} else
+	} else if (ih == &adev->irq.ih) {
 		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr);
+	} else if (ih == &adev->irq.ih1) {
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, ih->rptr);
+	} else if (ih == &adev->irq.ih2) {
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, ih->rptr);
+	}
+}
+
+/**
+ * navi10_ih_self_irq - dispatch work for ring 1 and 2
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int navi10_ih_self_irq(struct amdgpu_device *adev,
+			      struct amdgpu_irq_src *source,
+			      struct amdgpu_iv_entry *entry)
+{
+	uint32_t wptr = cpu_to_le32(entry->src_data[0]);
+
+	switch (entry->ring_id) {
+	case 1:
+		*adev->irq.ih1.wptr_cpu = wptr;
+		schedule_work(&adev->irq.ih1_work);
+		break;
+	case 2:
+		*adev->irq.ih2.wptr_cpu = wptr;
+		schedule_work(&adev->irq.ih2_work);
+		break;
+	default: break;
+	}
+	return 0;
+}
+
+static const struct amdgpu_irq_src_funcs navi10_ih_self_irq_funcs = {
+	.process = navi10_ih_self_irq,
+};
+
+static void navi10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+	adev->irq.self_irq.num_types = 0;
+	adev->irq.self_irq.funcs = &navi10_ih_self_irq_funcs;
 }
 
 static int navi10_ih_early_init(void *handle)
@@ -342,6 +581,7 @@ static int navi10_ih_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	navi10_ih_set_interrupt_funcs(adev);
+	navi10_ih_set_self_irq_funcs(adev);
 	return 0;
 }
 
@@ -351,6 +591,12 @@ static int navi10_ih_sw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	bool use_bus_addr;
 
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
+				&adev->irq.self_irq);
+
+	if (r)
+		return r;
+
 	/* use gpu virtual address for ih ring
 	 * until ih_checken is programmed to allow
 	 * use bus address for ih ring by psp bl */
@@ -363,6 +609,20 @@ static int navi10_ih_sw_init(void *handle)
 	adev->irq.ih.use_doorbell = true;
 	adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
 
+	r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true);
+	if (r)
+		return r;
+
+	adev->irq.ih1.use_doorbell = true;
+	adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+
+	r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true);
+	if (r)
+		return r;
+
+	adev->irq.ih2.use_doorbell = true;
+	adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1;
+
 	r = amdgpu_irq_init(adev);
 
 	return r;
@@ -373,6 +633,8 @@ static int navi10_ih_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	amdgpu_irq_fini(adev);
+	amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
+	amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
 	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index f3a3fe746222..cbcf04578b99 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -290,23 +290,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = {
 	.ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
 };
 
-static void nbio_v2_3_detect_hw_virt(struct amdgpu_device *adev)
-{
-	uint32_t reg;
-
-	reg = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_RCC_IOV_FUNC_IDENTIFIER);
-	if (reg & 1)
-		adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
-
-	if (reg & 0x80000000)
-		adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
-
-	if (!reg) {
-		if (is_virtual_machine())	/* passthrough mode exclus sriov mod */
-			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
-	}
-}
-
 static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
 {
 	uint32_t def, data;
@@ -338,6 +321,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
 	.get_clockgating_state = nbio_v2_3_get_clockgating_state,
 	.ih_control = nbio_v2_3_ih_control,
 	.init_registers = nbio_v2_3_init_registers,
-	.detect_hw_virt = nbio_v2_3_detect_hw_virt,
 	.remap_hdp_registers = nbio_v2_3_remap_hdp_registers,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 635d9e1fc0a3..7b2fb050407d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -241,23 +241,6 @@ const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
 	.ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK
 };
 
-static void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev)
-{
-	uint32_t reg;
-
-	reg = RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER);
-	if (reg & 1)
-		adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
-
-	if (reg & 0x80000000)
-		adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
-
-	if (!reg) {
-		if (is_virtual_machine())	/* passthrough mode exclus sriov mod */
-			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
-	}
-}
-
 static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
 {
 	uint32_t def, data;
@@ -294,5 +277,4 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
 	.get_clockgating_state = nbio_v6_1_get_clockgating_state,
 	.ih_control = nbio_v6_1_ih_control,
 	.init_registers = nbio_v6_1_init_registers,
-	.detect_hw_virt = nbio_v6_1_detect_hw_virt,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index d6cbf26074bc..d34628e113fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -280,12 +280,6 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
 	.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
 };
 
-static void nbio_v7_0_detect_hw_virt(struct amdgpu_device *adev)
-{
-	if (is_virtual_machine())	/* passthrough mode exclus sriov mod */
-		adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
-}
-
 static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
 {
 
@@ -310,6 +304,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
 	.get_clockgating_state = nbio_v7_0_get_clockgating_state,
 	.ih_control = nbio_v7_0_ih_control,
 	.init_registers = nbio_v7_0_init_registers,
-	.detect_hw_virt = nbio_v7_0_detect_hw_virt,
 	.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 149d386590df..e629156173d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -185,7 +185,7 @@ static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev,
 
 	if (use_doorbell) {
 		ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
-		ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2);
+		ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 4);
 	} else
 		ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
 
@@ -292,23 +292,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
 	.ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
 };
 
-static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev)
-{
-	uint32_t reg;
-
-	reg = RREG32_SOC15(NBIO, 0, mmRCC_IOV_FUNC_IDENTIFIER);
-	if (reg & 1)
-		adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
-
-	if (reg & 0x80000000)
-		adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
-
-	if (!reg) {
-		if (is_virtual_machine())	/* passthrough mode exclus sriov mod */
-			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
-	}
-}
-
 static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
 {
 
@@ -340,14 +323,20 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 		obj->err_data.ce_count += err_data.ce_count;
 
 		if (err_data.ce_count)
-			DRM_INFO("%ld correctable errors detected in %s block\n",
-				obj->err_data.ce_count, adev->nbio.ras_if->name);
+			dev_info(adev->dev, "%ld correctable hardware "
+					"errors detected in %s block, "
+					"no user action is needed.\n",
+					obj->err_data.ce_count,
+					adev->nbio.ras_if->name);
 
 		if (err_data.ue_count)
-			DRM_INFO("%ld uncorrectable errors detected in %s block\n",
-				obj->err_data.ue_count, adev->nbio.ras_if->name);
+			dev_info(adev->dev, "%ld uncorrectable hardware "
+					"errors detected in %s block\n",
+					obj->err_data.ue_count,
+					adev->nbio.ras_if->name);
 
-		DRM_WARN("RAS controller interrupt triggered by NBIF error\n");
+		dev_info(adev->dev, "RAS controller interrupt triggered "
+					"by NBIF error\n");
 
 		/* ras_controller_int is dedicated for nbif ras error,
 		 * not the global interrupt for sync flood
@@ -561,7 +550,6 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
 	.get_clockgating_state = nbio_v7_4_get_clockgating_state,
 	.ih_control = nbio_v7_4_ih_control,
 	.init_registers = nbio_v7_4_init_registers,
-	.detect_hw_virt = nbio_v7_4_detect_hw_virt,
 	.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
 	.handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
 	.handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 52318b03c424..6655dd2009b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -453,18 +453,19 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 {
 	int r;
 
-	/* Set IP register base before any HW register access */
-	r = nv_reg_base_init(adev);
-	if (r)
-		return r;
-
 	adev->nbio.funcs = &nbio_v2_3_funcs;
 	adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
 
-	adev->nbio.funcs->detect_hw_virt(adev);
-
-	if (amdgpu_sriov_vf(adev))
+	if (amdgpu_sriov_vf(adev)) {
 		adev->virt.ops = &xgpu_nv_virt_ops;
+		/* try send GPU_INIT_DATA request to host */
+		amdgpu_virt_request_init_data(adev);
+	}
+
+	/* Set IP register base before any HW register access */
+	r = nv_reg_base_init(adev);
+	if (r)
+		return r;
 
 	switch (adev->asic_type) {
 	case CHIP_NAVI10:
@@ -497,8 +498,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
-		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
-		    !amdgpu_sriov_vf(adev))
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -548,13 +548,6 @@ static bool nv_need_full_reset(struct amdgpu_device *adev)
 	return true;
 }
 
-static void nv_get_pcie_usage(struct amdgpu_device *adev,
-			      uint64_t *count0,
-			      uint64_t *count1)
-{
-	/*TODO*/
-}
-
 static bool nv_need_reset_on_init(struct amdgpu_device *adev)
 {
 #if 0
@@ -629,7 +622,6 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
 	.invalidate_hdp = &nv_invalidate_hdp,
 	.init_doorbell_index = &nv_init_doorbell_index,
 	.need_full_reset = &nv_need_full_reset,
-	.get_pcie_usage = &nv_get_pcie_usage,
 	.need_reset_on_init = &nv_need_reset_on_init,
 	.get_pcie_replay_count = &nv_get_pcie_replay_count,
 	.supports_baco = &nv_asic_supports_baco,
diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
index 1de984647dbb..fd6b58243b03 100644
--- a/drivers/gpu/drm/amd/amdgpu/nvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
@@ -256,6 +256,54 @@
 #define	PACKET3_BLK_CNTX_UPDATE				0x53
 #define	PACKET3_INCR_UPDT_STATE				0x55
 #define	PACKET3_ACQUIRE_MEM				0x58
+/* 1.  HEADER
+ * 2.  COHER_CNTL [30:0]
+ * 2.1 ENGINE_SEL [31:31]
+ * 2.  COHER_SIZE [31:0]
+ * 3.  COHER_SIZE_HI [7:0]
+ * 4.  COHER_BASE_LO [31:0]
+ * 5.  COHER_BASE_HI [23:0]
+ * 7.  POLL_INTERVAL [15:0]
+ * 8.  GCR_CNTL [18:0]
+ */
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0)
+		/*
+		 * 0:NOP
+		 * 1:ALL
+		 * 2:RANGE
+		 * 3:FIRST_LAST
+		 */
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2)
+		/*
+		 * 0:ALL
+		 * 1:reserved
+		 * 2:RANGE
+		 * 3:FIRST_LAST
+		 */
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(x) ((x) << 4)
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(x) ((x) << 5)
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_WB(x) ((x) << 6)
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7)
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8)
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(x) ((x) << 9)
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10)
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11)
+		/*
+		 * 0:ALL
+		 * 1:VOL
+		 * 2:RANGE
+		 * 3:FIRST_LAST
+		 */
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x)  ((x) << 13)
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14)
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15)
+#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16)
+		/*
+		 * 0: PARALLEL
+		 * 1: FORWARD
+		 * 2: REVERSE
+		 */
+#define 	PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA  (1 << 18)
 #define	PACKET3_REWIND					0x59
 #define	PACKET3_INTERRUPT				0x5A
 #define	PACKET3_GEN_PDEPTE				0x5B
@@ -306,6 +354,7 @@
 #define	PACKET3_GET_LOD_STATS				0x8E
 #define	PACKET3_DRAW_MULTI_PREAMBLE			0x8F
 #define	PACKET3_FRAME_CONTROL				0x90
+#			define FRAME_TMZ	(1 << 0)
 #			define FRAME_CMD(x) ((x) << 28)
 			/*
 			 * x=0: tmz_begin
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 7539104175e8..d7f92634eba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -50,15 +50,14 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
 	const char *chip_name;
 	char fw_name[30];
 	int err = 0;
-	const struct psp_firmware_header_v1_0 *hdr;
 	const struct ta_firmware_header_v1_0 *ta_hdr;
 	DRM_DEBUG("\n");
 
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
-		if (adev->rev_id >= 0x8)
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			chip_name = "raven2";
-		else if (adev->pdev->device == 0x15d8)
+		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
 			chip_name = "picasso";
 		else
 			chip_name = "raven";
@@ -66,22 +65,10 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
 	default: BUG();
 	}
 
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
-	err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
+	err = psp_init_asd_microcode(psp, chip_name);
 	if (err)
 		goto out;
 
-	err = amdgpu_ucode_validate(adev->psp.asd_fw);
-	if (err)
-		goto out;
-
-	hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data;
-	adev->psp.asd_fw_version = le32_to_cpu(hdr->header.ucode_version);
-	adev->psp.asd_feature_version = le32_to_cpu(hdr->ucode_feature_version);
-	adev->psp.asd_ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
-	adev->psp.asd_start_addr = (uint8_t *)hdr +
-				le32_to_cpu(hdr->header.ucode_array_offset_bytes);
-
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
 	err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
 	if (err) {
@@ -126,8 +113,6 @@ out:
 		dev_err(adev->dev,
 			"psp v10.0: Failed to load firmware \"%s\"\n",
 			fw_name);
-		release_firmware(adev->psp.asd_fw);
-		adev->psp.asd_fw = NULL;
 	}
 
 	return err;
@@ -230,129 +215,6 @@ static int psp_v10_0_ring_destroy(struct psp_context *psp,
 	return ret;
 }
 
-static int
-psp_v10_0_sram_map(struct amdgpu_device *adev,
-		   unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
-		   unsigned int *sram_data_reg_offset,
-		   enum AMDGPU_UCODE_ID ucode_id)
-{
-	int ret = 0;
-
-	switch(ucode_id) {
-/* TODO: needs to confirm */
-#if 0
-	case AMDGPU_UCODE_ID_SMC:
-		*sram_offset = 0;
-		*sram_addr_reg_offset = 0;
-		*sram_data_reg_offset = 0;
-		break;
-#endif
-
-	case AMDGPU_UCODE_ID_CP_CE:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_PFP:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_ME:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_MEC1:
-		*sram_offset = 0x10000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_MEC2:
-		*sram_offset = 0x10000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_RLC_G:
-		*sram_offset = 0x2000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_SDMA0:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA);
-		break;
-
-/* TODO: needs to confirm */
-#if 0
-	case AMDGPU_UCODE_ID_SDMA1:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-
-	case AMDGPU_UCODE_ID_UVD:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-
-	case AMDGPU_UCODE_ID_VCE:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-#endif
-
-	case AMDGPU_UCODE_ID_MAXIMUM:
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-static bool psp_v10_0_compare_sram_data(struct psp_context *psp,
-					struct amdgpu_firmware_info *ucode,
-					enum AMDGPU_UCODE_ID ucode_type)
-{
-	int err = 0;
-	unsigned int fw_sram_reg_val = 0;
-	unsigned int fw_sram_addr_reg_offset = 0;
-	unsigned int fw_sram_data_reg_offset = 0;
-	unsigned int ucode_size;
-	uint32_t *ucode_mem = NULL;
-	struct amdgpu_device *adev = psp->adev;
-
-	err = psp_v10_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset,
-				&fw_sram_data_reg_offset, ucode_type);
-	if (err)
-		return false;
-
-	WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val);
-
-	ucode_size = ucode->ucode_size;
-	ucode_mem = (uint32_t *)ucode->kaddr;
-	while (!ucode_size) {
-		fw_sram_reg_val = RREG32(fw_sram_data_reg_offset);
-
-		if (*ucode_mem != fw_sram_reg_val)
-			return false;
-
-		ucode_mem++;
-		/* 4 bytes */
-		ucode_size -= 4;
-	}
-
-	return true;
-}
-
-
 static int psp_v10_0_mode1_reset(struct psp_context *psp)
 {
 	DRM_INFO("psp mode 1 reset not supported now! \n");
@@ -379,7 +241,6 @@ static const struct psp_funcs psp_v10_0_funcs = {
 	.ring_create = psp_v10_0_ring_create,
 	.ring_stop = psp_v10_0_ring_stop,
 	.ring_destroy = psp_v10_0_ring_destroy,
-	.compare_sram_data = psp_v10_0_compare_sram_data,
 	.mode1_reset = psp_v10_0_mode1_reset,
 	.ring_get_wptr = psp_v10_0_ring_get_wptr,
 	.ring_set_wptr = psp_v10_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 0afd610a1263..1de89cc3c355 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -75,10 +75,6 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 	const char *chip_name;
 	char fw_name[30];
 	int err = 0;
-	const struct psp_firmware_header_v1_0 *sos_hdr;
-	const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
-	const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
-	const struct psp_firmware_header_v1_0 *asd_hdr;
 	const struct ta_firmware_header_v1_0 *ta_hdr;
 
 	DRM_DEBUG("\n");
@@ -103,66 +99,13 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 		BUG();
 	}
 
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name);
-	err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev);
+	err = psp_init_sos_microcode(psp, chip_name);
 	if (err)
-		goto out;
+		return err;
 
-	err = amdgpu_ucode_validate(adev->psp.sos_fw);
+	err = psp_init_asd_microcode(psp, chip_name);
 	if (err)
-		goto out;
-
-	sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
-	amdgpu_ucode_print_psp_hdr(&sos_hdr->header);
-
-	switch (sos_hdr->header.header_version_major) {
-	case 1:
-		adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
-		adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version);
-		adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes);
-		adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->sos_offset_bytes);
-		adev->psp.sys_start_addr = (uint8_t *)sos_hdr +
-				le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
-		adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
-				le32_to_cpu(sos_hdr->sos_offset_bytes);
-		if (sos_hdr->header.header_version_minor == 1) {
-			sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data;
-			adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_1->toc_size_bytes);
-			adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr +
-					le32_to_cpu(sos_hdr_v1_1->toc_offset_bytes);
-			adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_1->kdb_size_bytes);
-			adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
-					le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes);
-		}
-		if (sos_hdr->header.header_version_minor == 2) {
-			sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data;
-			adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes);
-			adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
-						    le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes);
-		}
-		break;
-	default:
-		dev_err(adev->dev,
-			"Unsupported psp sos firmware\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
-	err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
-	if (err)
-		goto out1;
-
-	err = amdgpu_ucode_validate(adev->psp.asd_fw);
-	if (err)
-		goto out1;
-
-	asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data;
-	adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version);
-	adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version);
-	adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes);
-	adev->psp.asd_start_addr = (uint8_t *)asd_hdr +
-				le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
+		return err;
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA20:
@@ -194,6 +137,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
 	case CHIP_NAVI12:
+		if (amdgpu_sriov_vf(adev))
+			break;
 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
 		err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
 		if (err) {
@@ -229,15 +174,6 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 out2:
 	release_firmware(adev->psp.ta_fw);
 	adev->psp.ta_fw = NULL;
-out1:
-	release_firmware(adev->psp.asd_fw);
-	adev->psp.asd_fw = NULL;
-out:
-	dev_err(adev->dev,
-		"psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
-	release_firmware(adev->psp.sos_fw);
-	adev->psp.sos_fw = NULL;
-
 	return err;
 }
 
@@ -283,11 +219,8 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
 	/* Check tOS sign of life register to confirm sys driver and sOS
 	 * are already been loaded.
 	 */
-	if (psp_v11_0_is_sos_alive(psp)) {
-		psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
-		dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version);
+	if (psp_v11_0_is_sos_alive(psp))
 		return 0;
-	}
 
 	ret = psp_v11_0_wait_for_bootloader(psp);
 	if (ret)
@@ -319,11 +252,8 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
 	/* Check sOS sign of life register to confirm sys driver and sOS
 	 * are already been loaded.
 	 */
-	if (psp_v11_0_is_sos_alive(psp)) {
-		psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
-		dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version);
+	if (psp_v11_0_is_sos_alive(psp))
 		return 0;
-	}
 
 	ret = psp_v11_0_wait_for_bootloader(psp);
 	if (ret)
@@ -446,13 +376,6 @@ static int psp_v11_0_ring_init(struct psp_context *psp,
 	return 0;
 }
 
-static bool psp_v11_0_support_vmr_ring(struct psp_context *psp)
-{
-	if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045)
-		return true;
-	return false;
-}
-
 static int psp_v11_0_ring_stop(struct psp_context *psp,
 			      enum psp_ring_type ring_type)
 {
@@ -460,7 +383,7 @@ static int psp_v11_0_ring_stop(struct psp_context *psp,
 	struct amdgpu_device *adev = psp->adev;
 
 	/* Write the ring destroy command*/
-	if (psp_v11_0_support_vmr_ring(psp))
+	if (amdgpu_sriov_vf(adev))
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
 				     GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
 	else
@@ -471,7 +394,7 @@ static int psp_v11_0_ring_stop(struct psp_context *psp,
 	mdelay(20);
 
 	/* Wait for response flag (bit 31) */
-	if (psp_v11_0_support_vmr_ring(psp))
+	if (amdgpu_sriov_vf(adev))
 		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
 				   0x80000000, 0x80000000, false);
 	else
@@ -489,7 +412,7 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
 	struct psp_ring *ring = &psp->km_ring;
 	struct amdgpu_device *adev = psp->adev;
 
-	if (psp_v11_0_support_vmr_ring(psp)) {
+	if (amdgpu_sriov_vf(adev)) {
 		ret = psp_v11_0_ring_stop(psp, ring_type);
 		if (ret) {
 			DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n");
@@ -567,138 +490,6 @@ static int psp_v11_0_ring_destroy(struct psp_context *psp,
 	return ret;
 }
 
-static int
-psp_v11_0_sram_map(struct amdgpu_device *adev,
-		  unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
-		  unsigned int *sram_data_reg_offset,
-		  enum AMDGPU_UCODE_ID ucode_id)
-{
-	int ret = 0;
-
-	switch (ucode_id) {
-/* TODO: needs to confirm */
-#if 0
-	case AMDGPU_UCODE_ID_SMC:
-		*sram_offset = 0;
-		*sram_addr_reg_offset = 0;
-		*sram_data_reg_offset = 0;
-		break;
-#endif
-
-	case AMDGPU_UCODE_ID_CP_CE:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_PFP:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_ME:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_MEC1:
-		*sram_offset = 0x10000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_MEC2:
-		*sram_offset = 0x10000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_RLC_G:
-		*sram_offset = 0x2000;
-		if (adev->asic_type < CHIP_NAVI10) {
-			*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR);
-			*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA);
-		} else {
-			*sram_addr_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmRLC_GPM_UCODE_ADDR_NV10;
-			*sram_data_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmRLC_GPM_UCODE_DATA_NV10;
-		}
-		break;
-
-	case AMDGPU_UCODE_ID_SDMA0:
-		*sram_offset = 0x0;
-		if (adev->asic_type < CHIP_NAVI10) {
-			*sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR);
-			*sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA);
-		} else {
-			*sram_addr_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmSDMA0_UCODE_ADDR_NV10;
-			*sram_data_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmSDMA0_UCODE_DATA_NV10;
-		}
-		break;
-
-/* TODO: needs to confirm */
-#if 0
-	case AMDGPU_UCODE_ID_SDMA1:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-
-	case AMDGPU_UCODE_ID_UVD:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-
-	case AMDGPU_UCODE_ID_VCE:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-#endif
-
-	case AMDGPU_UCODE_ID_MAXIMUM:
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-static bool psp_v11_0_compare_sram_data(struct psp_context *psp,
-				       struct amdgpu_firmware_info *ucode,
-				       enum AMDGPU_UCODE_ID ucode_type)
-{
-	int err = 0;
-	unsigned int fw_sram_reg_val = 0;
-	unsigned int fw_sram_addr_reg_offset = 0;
-	unsigned int fw_sram_data_reg_offset = 0;
-	unsigned int ucode_size;
-	uint32_t *ucode_mem = NULL;
-	struct amdgpu_device *adev = psp->adev;
-
-	err = psp_v11_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset,
-				&fw_sram_data_reg_offset, ucode_type);
-	if (err)
-		return false;
-
-	WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val);
-
-	ucode_size = ucode->ucode_size;
-	ucode_mem = (uint32_t *)ucode->kaddr;
-	while (ucode_size) {
-		fw_sram_reg_val = RREG32(fw_sram_data_reg_offset);
-
-		if (*ucode_mem != fw_sram_reg_val)
-			return false;
-
-		ucode_mem++;
-		/* 4 bytes */
-		ucode_size -= 4;
-	}
-
-	return true;
-}
-
 static int psp_v11_0_mode1_reset(struct psp_context *psp)
 {
 	int ret;
@@ -733,181 +524,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
 	return 0;
 }
 
-/* TODO: Fill in follow functions once PSP firmware interface for XGMI is ready.
- * For now, return success and hack the hive_id so high level code can
- * start testing
- */
-static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp,
-	int number_devices, struct psp_xgmi_topology_info *topology)
-{
-	struct ta_xgmi_shared_memory *xgmi_cmd;
-	struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
-	struct ta_xgmi_cmd_get_topology_info_output *topology_info_output;
-	int i;
-	int ret;
-
-	if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
-		return -EINVAL;
-
-	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
-	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
-
-	/* Fill in the shared memory with topology information as input */
-	topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
-	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO;
-	topology_info_input->num_nodes = number_devices;
-
-	for (i = 0; i < topology_info_input->num_nodes; i++) {
-		topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
-		topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
-		topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
-		topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
-	}
-
-	/* Invoke xgmi ta to get the topology information */
-	ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO);
-	if (ret)
-		return ret;
-
-	/* Read the output topology information from the shared memory */
-	topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info;
-	topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes;
-	for (i = 0; i < topology->num_nodes; i++) {
-		topology->nodes[i].node_id = topology_info_output->nodes[i].node_id;
-		topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops;
-		topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled;
-		topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine;
-	}
-
-	return 0;
-}
-
-static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp,
-	int number_devices, struct psp_xgmi_topology_info *topology)
-{
-	struct ta_xgmi_shared_memory *xgmi_cmd;
-	struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
-	int i;
-
-	if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
-		return -EINVAL;
-
-	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
-	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
-
-	topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
-	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO;
-	topology_info_input->num_nodes = number_devices;
-
-	for (i = 0; i < topology_info_input->num_nodes; i++) {
-		topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
-		topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
-		topology_info_input->nodes[i].is_sharing_enabled = 1;
-		topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
-	}
-
-	/* Invoke xgmi ta to set topology information */
-	return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO);
-}
-
-static int psp_v11_0_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id)
-{
-	struct ta_xgmi_shared_memory *xgmi_cmd;
-	int ret;
-
-	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
-	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
-
-	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID;
-
-	/* Invoke xgmi ta to get hive id */
-	ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
-	if (ret)
-		return ret;
-
-	*hive_id = xgmi_cmd->xgmi_out_message.get_hive_id.hive_id;
-
-	return 0;
-}
-
-static int psp_v11_0_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
-{
-	struct ta_xgmi_shared_memory *xgmi_cmd;
-	int ret;
-
-	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
-	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
-
-	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID;
-
-	/* Invoke xgmi ta to get the node id */
-	ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
-	if (ret)
-		return ret;
-
-	*node_id = xgmi_cmd->xgmi_out_message.get_node_id.node_id;
-
-	return 0;
-}
-
-static int psp_v11_0_ras_trigger_error(struct psp_context *psp,
-		struct ta_ras_trigger_error_input *info)
-{
-	struct ta_ras_shared_memory *ras_cmd;
-	int ret;
-
-	if (!psp->ras.ras_initialized)
-		return -EINVAL;
-
-	ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
-	memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
-	ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR;
-	ras_cmd->ras_in_message.trigger_error = *info;
-
-	ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
-	if (ret)
-		return -EINVAL;
-
-	/* If err_event_athub occurs error inject was successful, however
-	   return status from TA is no long reliable */
-	if (amdgpu_ras_intr_triggered())
-		return 0;
-
-	return ras_cmd->ras_status;
-}
-
-static int psp_v11_0_ras_cure_posion(struct psp_context *psp, uint64_t *mode_ptr)
-{
-#if 0
-	// not support yet.
-	struct ta_ras_shared_memory *ras_cmd;
-	int ret;
-
-	if (!psp->ras.ras_initialized)
-		return -EINVAL;
-
-	ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
-	memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
-	ras_cmd->cmd_id = TA_RAS_COMMAND__CURE_POISON;
-	ras_cmd->ras_in_message.cure_poison.mode_ptr = mode_ptr;
-
-	ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
-	if (ret)
-		return -EINVAL;
-
-	return ras_cmd->ras_status;
-#else
-	return -EINVAL;
-#endif
-}
-
-static int psp_v11_0_rlc_autoload_start(struct psp_context *psp)
-{
-	return psp_rlc_autoload_start(psp);
-}
-
 static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
 {
 	int ret;
@@ -1099,7 +715,7 @@ static uint32_t psp_v11_0_ring_get_wptr(struct psp_context *psp)
 	uint32_t data;
 	struct amdgpu_device *adev = psp->adev;
 
-	if (psp_v11_0_support_vmr_ring(psp))
+	if (amdgpu_sriov_vf(adev))
 		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
 	else
 		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
@@ -1111,7 +727,7 @@ static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
 {
 	struct amdgpu_device *adev = psp->adev;
 
-	if (psp_v11_0_support_vmr_ring(psp)) {
+	if (amdgpu_sriov_vf(adev)) {
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
 	} else
@@ -1203,16 +819,7 @@ static const struct psp_funcs psp_v11_0_funcs = {
 	.ring_create = psp_v11_0_ring_create,
 	.ring_stop = psp_v11_0_ring_stop,
 	.ring_destroy = psp_v11_0_ring_destroy,
-	.compare_sram_data = psp_v11_0_compare_sram_data,
 	.mode1_reset = psp_v11_0_mode1_reset,
-	.xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info,
-	.xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info,
-	.xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id,
-	.xgmi_get_node_id = psp_v11_0_xgmi_get_node_id,
-	.support_vmr_ring = psp_v11_0_support_vmr_ring,
-	.ras_trigger_error = psp_v11_0_ras_trigger_error,
-	.ras_cure_posion = psp_v11_0_ras_cure_posion,
-	.rlc_autoload_start = psp_v11_0_rlc_autoload_start,
 	.mem_training_init = psp_v11_0_memory_training_init,
 	.mem_training_fini = psp_v11_0_memory_training_fini,
 	.mem_training = psp_v11_0_memory_training,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index 58d8b6d732e8..6c9614f77d33 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -45,11 +45,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
 	const char *chip_name;
-	char fw_name[30];
 	int err = 0;
-	const struct psp_firmware_header_v1_0 *asd_hdr;
-
-	DRM_DEBUG("\n");
 
 	switch (adev->asic_type) {
 	case CHIP_RENOIR:
@@ -59,28 +55,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
 		BUG();
 	}
 
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
-	err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
-	if (err)
-		goto out1;
-
-	err = amdgpu_ucode_validate(adev->psp.asd_fw);
-	if (err)
-		goto out1;
-
-	asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data;
-	adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version);
-	adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version);
-	adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes);
-	adev->psp.asd_start_addr = (uint8_t *)asd_hdr +
-				le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
-
-	return 0;
-
-out1:
-	release_firmware(adev->psp.asd_fw);
-	adev->psp.asd_fw = NULL;
-
+	err = psp_init_asd_microcode(psp, chip_name);
 	return err;
 }
 
@@ -95,11 +70,8 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
 	 * are already been loaded.
 	 */
 	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
-	if (sol_reg) {
-		psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
-		printk("sos fw version = 0x%x.\n", psp->sos_fw_version);
+	if (sol_reg)
 		return 0;
-	}
 
 	/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
 	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
@@ -228,13 +200,6 @@ static int psp_v12_0_ring_init(struct psp_context *psp,
 	return 0;
 }
 
-static bool psp_v12_0_support_vmr_ring(struct psp_context *psp)
-{
-	if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045)
-		return true;
-	return false;
-}
-
 static int psp_v12_0_ring_create(struct psp_context *psp,
 				enum psp_ring_type ring_type)
 {
@@ -243,7 +208,7 @@ static int psp_v12_0_ring_create(struct psp_context *psp,
 	struct psp_ring *ring = &psp->km_ring;
 	struct amdgpu_device *adev = psp->adev;
 
-	if (psp_v12_0_support_vmr_ring(psp)) {
+	if (amdgpu_sriov_vf(psp->adev)) {
 		/* Write low address of the ring to C2PMSG_102 */
 		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
@@ -295,7 +260,7 @@ static int psp_v12_0_ring_stop(struct psp_context *psp,
 	struct amdgpu_device *adev = psp->adev;
 
 	/* Write the ring destroy command*/
-	if (psp_v12_0_support_vmr_ring(psp))
+	if (amdgpu_sriov_vf(adev))
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
 				     GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
 	else
@@ -306,7 +271,7 @@ static int psp_v12_0_ring_stop(struct psp_context *psp,
 	mdelay(20);
 
 	/* Wait for response flag (bit 31) */
-	if (psp_v12_0_support_vmr_ring(psp))
+	if (amdgpu_sriov_vf(adev))
 		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
 				   0x80000000, 0x80000000, false);
 	else
@@ -334,128 +299,6 @@ static int psp_v12_0_ring_destroy(struct psp_context *psp,
 	return ret;
 }
 
-static int
-psp_v12_0_sram_map(struct amdgpu_device *adev,
-		  unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
-		  unsigned int *sram_data_reg_offset,
-		  enum AMDGPU_UCODE_ID ucode_id)
-{
-	int ret = 0;
-
-	switch (ucode_id) {
-/* TODO: needs to confirm */
-#if 0
-	case AMDGPU_UCODE_ID_SMC:
-		*sram_offset = 0;
-		*sram_addr_reg_offset = 0;
-		*sram_data_reg_offset = 0;
-		break;
-#endif
-
-	case AMDGPU_UCODE_ID_CP_CE:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_PFP:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_ME:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_MEC1:
-		*sram_offset = 0x10000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_MEC2:
-		*sram_offset = 0x10000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_RLC_G:
-		*sram_offset = 0x2000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_SDMA0:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA);
-		break;
-
-/* TODO: needs to confirm */
-#if 0
-	case AMDGPU_UCODE_ID_SDMA1:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-
-	case AMDGPU_UCODE_ID_UVD:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-
-	case AMDGPU_UCODE_ID_VCE:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-#endif
-
-	case AMDGPU_UCODE_ID_MAXIMUM:
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-static bool psp_v12_0_compare_sram_data(struct psp_context *psp,
-				       struct amdgpu_firmware_info *ucode,
-				       enum AMDGPU_UCODE_ID ucode_type)
-{
-	int err = 0;
-	unsigned int fw_sram_reg_val = 0;
-	unsigned int fw_sram_addr_reg_offset = 0;
-	unsigned int fw_sram_data_reg_offset = 0;
-	unsigned int ucode_size;
-	uint32_t *ucode_mem = NULL;
-	struct amdgpu_device *adev = psp->adev;
-
-	err = psp_v12_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset,
-				&fw_sram_data_reg_offset, ucode_type);
-	if (err)
-		return false;
-
-	WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val);
-
-	ucode_size = ucode->ucode_size;
-	ucode_mem = (uint32_t *)ucode->kaddr;
-	while (ucode_size) {
-		fw_sram_reg_val = RREG32(fw_sram_data_reg_offset);
-
-		if (*ucode_mem != fw_sram_reg_val)
-			return false;
-
-		ucode_mem++;
-		/* 4 bytes */
-		ucode_size -= 4;
-	}
-
-	return true;
-}
-
 static int psp_v12_0_mode1_reset(struct psp_context *psp)
 {
 	int ret;
@@ -495,7 +338,7 @@ static uint32_t psp_v12_0_ring_get_wptr(struct psp_context *psp)
 	uint32_t data;
 	struct amdgpu_device *adev = psp->adev;
 
-	if (psp_v12_0_support_vmr_ring(psp))
+	if (amdgpu_sriov_vf(adev))
 		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
 	else
 		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
@@ -507,7 +350,7 @@ static void psp_v12_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
 {
 	struct amdgpu_device *adev = psp->adev;
 
-	if (psp_v12_0_support_vmr_ring(psp)) {
+	if (amdgpu_sriov_vf(adev)) {
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
 	} else
@@ -522,7 +365,6 @@ static const struct psp_funcs psp_v12_0_funcs = {
 	.ring_create = psp_v12_0_ring_create,
 	.ring_stop = psp_v12_0_ring_stop,
 	.ring_destroy = psp_v12_0_ring_destroy,
-	.compare_sram_data = psp_v12_0_compare_sram_data,
 	.mode1_reset = psp_v12_0_mode1_reset,
 	.ring_get_wptr = psp_v12_0_ring_get_wptr,
 	.ring_set_wptr = psp_v12_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 735c43c7daab..f2e725f72d2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -50,9 +50,6 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
 
 #define smnMP1_FIRMWARE_FLAGS 0x3010028
 
-static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
-
-static bool psp_v3_1_support_vmr_ring(struct psp_context *psp);
 static int psp_v3_1_ring_stop(struct psp_context *psp,
 			      enum psp_ring_type ring_type);
 
@@ -60,9 +57,7 @@ static int psp_v3_1_init_microcode(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
 	const char *chip_name;
-	char fw_name[30];
 	int err = 0;
-	const struct psp_firmware_header_v1_0 *hdr;
 
 	DRM_DEBUG("\n");
 
@@ -76,55 +71,15 @@ static int psp_v3_1_init_microcode(struct psp_context *psp)
 	default: BUG();
 	}
 
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name);
-	err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev);
-	if (err)
-		goto out;
-
-	err = amdgpu_ucode_validate(adev->psp.sos_fw);
+	err = psp_init_sos_microcode(psp, chip_name);
 	if (err)
-		goto out;
-
-	hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
-	adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version);
-	adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version);
-	adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes);
-	adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) -
-					le32_to_cpu(hdr->sos_size_bytes);
-	adev->psp.sys_start_addr = (uint8_t *)hdr +
-				le32_to_cpu(hdr->header.ucode_array_offset_bytes);
-	adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
-				le32_to_cpu(hdr->sos_offset_bytes);
-
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
-	err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
-	if (err)
-		goto out;
+		return err;
 
-	err = amdgpu_ucode_validate(adev->psp.asd_fw);
+	err = psp_init_asd_microcode(psp, chip_name);
 	if (err)
-		goto out;
-
-	hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data;
-	adev->psp.asd_fw_version = le32_to_cpu(hdr->header.ucode_version);
-	adev->psp.asd_feature_version = le32_to_cpu(hdr->ucode_feature_version);
-	adev->psp.asd_ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
-	adev->psp.asd_start_addr = (uint8_t *)hdr +
-				le32_to_cpu(hdr->header.ucode_array_offset_bytes);
+		return err;
 
 	return 0;
-out:
-	if (err) {
-		dev_err(adev->dev,
-			"psp v3.1: Failed to load firmware \"%s\"\n",
-			fw_name);
-		release_firmware(adev->psp.sos_fw);
-		adev->psp.sos_fw = NULL;
-		release_firmware(adev->psp.asd_fw);
-		adev->psp.asd_fw = NULL;
-	}
-
-	return err;
 }
 
 static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
@@ -168,41 +123,19 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
 	return ret;
 }
 
-static bool psp_v3_1_match_version(struct amdgpu_device *adev, uint32_t ver)
-{
-	int i;
-
-	if (ver == adev->psp.sos_fw_version)
-		return true;
-
-	/*
-	 * Double check if the latest four legacy versions.
-	 * If yes, it is still the right version.
-	 */
-	for (i = 0; i < ARRAY_SIZE(sos_old_versions); i++) {
-		if (sos_old_versions[i] == adev->psp.sos_fw_version)
-			return true;
-	}
-
-	return false;
-}
-
 static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
 {
 	int ret;
 	unsigned int psp_gfxdrv_command_reg = 0;
 	struct amdgpu_device *adev = psp->adev;
-	uint32_t sol_reg, ver;
+	uint32_t sol_reg;
 
 	/* Check sOS sign of life register to confirm sys driver and sOS
 	 * are already been loaded.
 	 */
 	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
-	if (sol_reg) {
-		psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
-		printk("sos fw version = 0x%x.\n", psp->sos_fw_version);
+	if (sol_reg)
 		return 0;
-	}
 
 	/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
 	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
@@ -227,11 +160,6 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
 	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
 			   RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
 			   0, true);
-
-	ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
-	if (!psp_v3_1_match_version(adev, ver))
-		DRM_WARN("SOS version doesn't match\n");
-
 	return ret;
 }
 
@@ -302,7 +230,7 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
 
 	psp_v3_1_reroute_ih(psp);
 
-	if (psp_v3_1_support_vmr_ring(psp)) {
+	if (amdgpu_sriov_vf(adev)) {
 		ret = psp_v3_1_ring_stop(psp, ring_type);
 		if (ret) {
 			DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
@@ -360,34 +288,26 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
 			      enum psp_ring_type ring_type)
 {
 	int ret = 0;
-	unsigned int psp_ring_reg = 0;
 	struct amdgpu_device *adev = psp->adev;
 
-	if (psp_v3_1_support_vmr_ring(psp)) {
-		/* Write the Destroy GPCOM ring command to C2PMSG_101 */
-		psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING;
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
-
-		/* there might be handshake issue which needs delay */
-		mdelay(20);
-
-		/* Wait for response flag (bit 31) in C2PMSG_101 */
-		ret = psp_wait_for(psp,
-				SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
-				0x80000000, 0x80000000, false);
-	} else {
-		/* Write the ring destroy command to C2PMSG_64 */
-		psp_ring_reg = 3 << 16;
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+	/* Write the ring destroy command*/
+	if (amdgpu_sriov_vf(adev))
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+				     GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+	else
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+				     GFX_CTRL_CMD_ID_DESTROY_RINGS);
 
-		/* there might be handshake issue which needs delay */
-		mdelay(20);
+	/* there might be handshake issue with hardware which needs delay */
+	mdelay(20);
 
-		/* Wait for response flag (bit 31) in C2PMSG_64 */
-		ret = psp_wait_for(psp,
-				SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
-				0x80000000, 0x80000000, false);
-	}
+	/* Wait for response flag (bit 31) */
+	if (amdgpu_sriov_vf(adev))
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+				   0x80000000, 0x80000000, false);
+	else
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+				   0x80000000, 0x80000000, false);
 
 	return ret;
 }
@@ -410,128 +330,6 @@ static int psp_v3_1_ring_destroy(struct psp_context *psp,
 	return ret;
 }
 
-static int
-psp_v3_1_sram_map(struct amdgpu_device *adev,
-		  unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
-		  unsigned int *sram_data_reg_offset,
-		  enum AMDGPU_UCODE_ID ucode_id)
-{
-	int ret = 0;
-
-	switch(ucode_id) {
-/* TODO: needs to confirm */
-#if 0
-	case AMDGPU_UCODE_ID_SMC:
-		*sram_offset = 0;
-		*sram_addr_reg_offset = 0;
-		*sram_data_reg_offset = 0;
-		break;
-#endif
-
-	case AMDGPU_UCODE_ID_CP_CE:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_PFP:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_ME:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_MEC1:
-		*sram_offset = 0x10000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_CP_MEC2:
-		*sram_offset = 0x10000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_RLC_G:
-		*sram_offset = 0x2000;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA);
-		break;
-
-	case AMDGPU_UCODE_ID_SDMA0:
-		*sram_offset = 0x0;
-		*sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR);
-		*sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA);
-		break;
-
-/* TODO: needs to confirm */
-#if 0
-	case AMDGPU_UCODE_ID_SDMA1:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-
-	case AMDGPU_UCODE_ID_UVD:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-
-	case AMDGPU_UCODE_ID_VCE:
-		*sram_offset = ;
-		*sram_addr_reg_offset = ;
-		break;
-#endif
-
-	case AMDGPU_UCODE_ID_MAXIMUM:
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-static bool psp_v3_1_compare_sram_data(struct psp_context *psp,
-				       struct amdgpu_firmware_info *ucode,
-				       enum AMDGPU_UCODE_ID ucode_type)
-{
-	int err = 0;
-	unsigned int fw_sram_reg_val = 0;
-	unsigned int fw_sram_addr_reg_offset = 0;
-	unsigned int fw_sram_data_reg_offset = 0;
-	unsigned int ucode_size;
-	uint32_t *ucode_mem = NULL;
-	struct amdgpu_device *adev = psp->adev;
-
-	err = psp_v3_1_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset,
-				&fw_sram_data_reg_offset, ucode_type);
-	if (err)
-		return false;
-
-	WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val);
-
-	ucode_size = ucode->ucode_size;
-	ucode_mem = (uint32_t *)ucode->kaddr;
-	while (ucode_size) {
-		fw_sram_reg_val = RREG32(fw_sram_data_reg_offset);
-
-		if (*ucode_mem != fw_sram_reg_val)
-			return false;
-
-		ucode_mem++;
-		/* 4 bytes */
-		ucode_size -= 4;
-	}
-
-	return true;
-}
-
 static bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
@@ -575,20 +373,12 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
 	return 0;
 }
 
-static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
-{
-	if (amdgpu_sriov_vf(psp->adev))
-		return true;
-
-	return false;
-}
-
 static uint32_t psp_v3_1_ring_get_wptr(struct psp_context *psp)
 {
 	uint32_t data;
 	struct amdgpu_device *adev = psp->adev;
 
-	if (psp_v3_1_support_vmr_ring(psp))
+	if (amdgpu_sriov_vf(adev))
 		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
 	else
 		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
@@ -599,7 +389,7 @@ static void psp_v3_1_ring_set_wptr(struct psp_context *psp, uint32_t value)
 {
 	struct amdgpu_device *adev = psp->adev;
 
-	if (psp_v3_1_support_vmr_ring(psp)) {
+	if (amdgpu_sriov_vf(adev)) {
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
 		/* send interrupt to PSP for SRIOV ring write pointer update */
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
@@ -616,10 +406,8 @@ static const struct psp_funcs psp_v3_1_funcs = {
 	.ring_create = psp_v3_1_ring_create,
 	.ring_stop = psp_v3_1_ring_stop,
 	.ring_destroy = psp_v3_1_ring_destroy,
-	.compare_sram_data = psp_v3_1_compare_sram_data,
 	.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
 	.mode1_reset = psp_v3_1_mode1_reset,
-	.support_vmr_ring = psp_v3_1_support_vmr_ring,
 	.ring_get_wptr = psp_v3_1_ring_get_wptr,
 	.ring_set_wptr = psp_v3_1_ring_set_wptr,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 7d509a40076f..5f304d61999e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -355,8 +355,6 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 	}
-	sdma0->sched.ready = false;
-	sdma1->sched.ready = false;
 }
 
 /**
@@ -614,7 +612,8 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err0;
 
@@ -874,7 +873,8 @@ static int sdma_v2_4_sw_init(void *handle)
 				     &adev->sdma.trap_irq,
 				     (i == 0) ?
 				     AMDGPU_SDMA_IRQ_INSTANCE0 :
-				     AMDGPU_SDMA_IRQ_INSTANCE1);
+				     AMDGPU_SDMA_IRQ_INSTANCE1,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -1200,7 +1200,8 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
 static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
 				       uint64_t src_offset,
 				       uint64_t dst_offset,
-				       uint32_t byte_count)
+				       uint32_t byte_count,
+				       bool tmz)
 {
 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
 		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index b6109a99fc43..c59f6f6f4c09 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -529,8 +529,6 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 	}
-	sdma0->sched.ready = false;
-	sdma1->sched.ready = false;
 }
 
 /**
@@ -886,7 +884,8 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err0;
 
@@ -1158,7 +1157,8 @@ static int sdma_v3_0_sw_init(void *handle)
 				     &adev->sdma.trap_irq,
 				     (i == 0) ?
 				     AMDGPU_SDMA_IRQ_INSTANCE0 :
-				     AMDGPU_SDMA_IRQ_INSTANCE1);
+				     AMDGPU_SDMA_IRQ_INSTANCE1,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -1638,7 +1638,8 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
 static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
 				       uint64_t src_offset,
 				       uint64_t dst_offset,
-				       uint32_t byte_count)
+				       uint32_t byte_count,
+				       bool tmz)
 {
 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
 		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 5f3a5ee2a3f4..33501c6c7189 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -115,17 +115,21 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = {
 static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
-	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 };
 
 static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
-	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 };
 
 static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
@@ -174,6 +178,7 @@ static const struct soc15_reg_golden golden_settings_sdma0_4_2[] =
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 };
 
 static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
@@ -203,6 +208,7 @@ static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 };
 
 static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
@@ -222,27 +228,35 @@ static const struct soc15_reg_golden golden_settings_sdma_arct[] =
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
 	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
 	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
 	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
 	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
 	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
 	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
 	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
 	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
 	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
 	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
 	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
 	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
-	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002)
+	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_UTCL1_TIMEOUT, 0xffffffff, 0x00010001)
 };
 
 static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
@@ -472,7 +486,7 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
 		soc15_program_register_sequence(adev,
 						golden_settings_sdma_4_1,
 						ARRAY_SIZE(golden_settings_sdma_4_1));
-		if (adev->rev_id >= 8)
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			soc15_program_register_sequence(adev,
 							golden_settings_sdma_rv2,
 							ARRAY_SIZE(golden_settings_sdma_rv2));
@@ -561,9 +575,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 		chip_name = "vega20";
 		break;
 	case CHIP_RAVEN:
-		if (adev->rev_id >= 8)
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			chip_name = "raven2";
-		else if (adev->pdev->device == 0x15d8)
+		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
 			chip_name = "picasso";
 		else
 			chip_name = "raven";
@@ -923,8 +937,6 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
 		ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
 		WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
-
-		sdma[i]->sched.ready = false;
 	}
 }
 
@@ -971,8 +983,6 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
 					IB_ENABLE, 0);
 		WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
-
-		sdma[i]->sched.ready = false;
 	}
 }
 
@@ -1539,7 +1549,8 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err0;
 
@@ -1840,7 +1851,7 @@ static int sdma_v4_0_sw_init(void *handle)
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
 
-		DRM_INFO("use_doorbell being set to: [%s]\n",
+		DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
 				ring->use_doorbell?"true":"false");
 
 		/* doorbell size is 2 dwords, get DWORD offset */
@@ -1848,7 +1859,8 @@ static int sdma_v4_0_sw_init(void *handle)
 
 		sprintf(ring->name, "sdma%d", i);
 		r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
-				     AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+				     AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 
@@ -1866,7 +1878,8 @@ static int sdma_v4_0_sw_init(void *handle)
 			sprintf(ring->name, "page%d", i);
 			r = amdgpu_ring_init(adev, ring, 1024,
 					     &adev->sdma.trap_irq,
-					     AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+					     AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+					     AMDGPU_RING_PRIO_DEFAULT);
 			if (r)
 				return r;
 		}
@@ -2445,10 +2458,12 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
 static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
 				       uint64_t src_offset,
 				       uint64_t dst_offset,
-				       uint32_t byte_count)
+				       uint32_t byte_count,
+				       bool tmz)
 {
 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
-		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+		SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
 	ib->ptr[ib->length_dw++] = byte_count - 1;
 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
 	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index d2840c2f6286..b544baf306f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -88,6 +88,29 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x00ffffff, 0x000c5c00)
 };
 
+static const struct soc15_reg_golden golden_settings_sdma_5_sriov[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+};
+
 static const struct soc15_reg_golden golden_settings_sdma_nv10[] = {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
@@ -141,9 +164,14 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
 						(const u32)ARRAY_SIZE(golden_settings_sdma_nv14));
 		break;
 	case CHIP_NAVI12:
-		soc15_program_register_sequence(adev,
-						golden_settings_sdma_5,
-						(const u32)ARRAY_SIZE(golden_settings_sdma_5));
+		if (amdgpu_sriov_vf(adev))
+			soc15_program_register_sequence(adev,
+							golden_settings_sdma_5_sriov,
+							(const u32)ARRAY_SIZE(golden_settings_sdma_5_sriov));
+		else
+			soc15_program_register_sequence(adev,
+							golden_settings_sdma_5,
+							(const u32)ARRAY_SIZE(golden_settings_sdma_5));
 		soc15_program_register_sequence(adev,
 						golden_settings_sdma_nv12,
 						(const u32)ARRAY_SIZE(golden_settings_sdma_nv12));
@@ -514,9 +542,6 @@ static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
 		WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
 	}
-
-	sdma0->sched.ready = false;
-	sdma1->sched.ready = false;
 }
 
 /**
@@ -541,7 +566,7 @@ static void sdma_v5_0_rlc_stop(struct amdgpu_device *adev)
  */
 static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
 {
-	u32 f32_cntl, phase_quantum = 0;
+	u32 f32_cntl = 0, phase_quantum = 0;
 	int i;
 
 	if (amdgpu_sdma_phase_quantum) {
@@ -569,9 +594,12 @@ static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
 	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
-		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
-				AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+		if (!amdgpu_sriov_vf(adev)) {
+			f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+						 AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+		}
+
 		if (enable && amdgpu_sdma_phase_quantum) {
 			WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
 			       phase_quantum);
@@ -580,7 +608,8 @@ static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
 			WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
 			       phase_quantum);
 		}
-		WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+		if (!amdgpu_sriov_vf(adev))
+			WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
 	}
 
 }
@@ -603,6 +632,9 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
 		sdma_v5_0_rlc_stop(adev);
 	}
 
+	if (amdgpu_sriov_vf(adev))
+		return;
+
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
 		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
@@ -635,7 +667,8 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
 		ring = &adev->sdma.instance[i].ring;
 		wb_offset = (ring->rptr_offs * 4);
 
-		WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+		if (!amdgpu_sriov_vf(adev))
+			WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
 
 		/* Set ring buffer size in dwords */
 		rb_bufsz = order_base_2(ring->ring_size / 4);
@@ -711,26 +744,28 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
 		/* set minor_ptr_update to 0 after wptr programed */
 		WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
 
-		/* set utc l1 enable flag always to 1 */
-		temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
-		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-
-		/* enable MCBP */
-		temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
-		WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
-		/* Set up RESP_MODE to non-copy addresses */
-		temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
-		temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
-		temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
-		WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
-
-		/* program default cache read and write policy */
-		temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
-		/* clean read policy and write policy bits */
-		temp &= 0xFF0FFF;
-		temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
-		WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+		if (!amdgpu_sriov_vf(adev)) {
+			/* set utc l1 enable flag always to 1 */
+			temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+			temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+			/* enable MCBP */
+			temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+			WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+			/* Set up RESP_MODE to non-copy addresses */
+			temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+			temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+			temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+			WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+			/* program default cache read and write policy */
+			temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+			/* clean read policy and write policy bits */
+			temp &= 0xFF0FFF;
+			temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+			WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+		}
 
 		if (!amdgpu_sriov_vf(adev)) {
 			/* unhalt engine */
@@ -960,7 +995,8 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err0;
@@ -1236,7 +1272,7 @@ static int sdma_v5_0_sw_init(void *handle)
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
 
-		DRM_INFO("use_doorbell being set to: [%s]\n",
+		DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
 				ring->use_doorbell?"true":"false");
 
 		ring->doorbell_index = (i == 0) ?
@@ -1248,7 +1284,8 @@ static int sdma_v5_0_sw_init(void *handle)
 				     &adev->sdma.trap_irq,
 				     (i == 0) ?
 				     AMDGPU_SDMA_IRQ_INSTANCE0 :
-				     AMDGPU_SDMA_IRQ_INSTANCE1);
+				     AMDGPU_SDMA_IRQ_INSTANCE1,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -1399,14 +1436,16 @@ static int sdma_v5_0_set_trap_irq_state(struct amdgpu_device *adev,
 {
 	u32 sdma_cntl;
 
-	u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ?
-		sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
-		sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
+	if (!amdgpu_sriov_vf(adev)) {
+		u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ?
+			sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
+			sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
 
-	sdma_cntl = RREG32(reg_offset);
-	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
-		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
-	WREG32(reg_offset, sdma_cntl);
+		sdma_cntl = RREG32(reg_offset);
+		sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+					  state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+		WREG32(reg_offset, sdma_cntl);
+	}
 
 	return 0;
 }
@@ -1667,10 +1706,12 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)
 static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib,
 				       uint64_t src_offset,
 				       uint64_t dst_offset,
-				       uint32_t byte_count)
+				       uint32_t byte_count,
+				       bool tmz)
 {
 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
-		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+		SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
 	ib->ptr[ib->length_dw++] = byte_count - 1;
 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
 	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 4d415bfdb42f..153db3f763bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1249,12 +1249,6 @@ static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
 	return 0;
 }
 
-static void si_detect_hw_virtualization(struct amdgpu_device *adev)
-{
-	if (is_virtual_machine()) /* passthrough mode */
-		adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
-}
-
 static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
 {
 	if (!ring || !ring->funcs->emit_wreg) {
@@ -2165,8 +2159,6 @@ static const struct amdgpu_ip_block_version si_common_ip_block =
 
 int si_set_ip_blocks(struct amdgpu_device *adev)
 {
-	si_detect_hw_virtualization(adev);
-
 	switch (adev->asic_type) {
 	case CHIP_VERDE:
 	case CHIP_TAHITI:
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 42d5601b6bf3..7d2bbcbe547b 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -124,7 +124,6 @@ static void si_dma_stop(struct amdgpu_device *adev)
 
 		if (adev->mman.buffer_funcs_ring == ring)
 			amdgpu_ttm_set_buffer_funcs_status(adev, false);
-		ring->sched.ready = false;
 	}
 }
 
@@ -267,7 +266,8 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256,
+					AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err0;
 
@@ -504,7 +504,8 @@ static int si_dma_sw_init(void *handle)
 				     &adev->sdma.trap_irq,
 				     (i == 0) ?
 				     AMDGPU_SDMA_IRQ_INSTANCE0 :
-				     AMDGPU_SDMA_IRQ_INSTANCE1);
+				     AMDGPU_SDMA_IRQ_INSTANCE1,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -775,7 +776,8 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
 static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib,
 				       uint64_t src_offset,
 				       uint64_t dst_offset,
-				       uint32_t byte_count)
+				       uint32_t byte_count,
+				       bool tmz)
 {
 	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
 					      1, 0, 0, byte_count);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 0860e85a2d35..c00ba4b23c9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -345,26 +345,6 @@ static const struct si_dte_data dte_data_tahiti =
 	false
 };
 
-#if 0
-static const struct si_dte_data dte_data_tahiti_le =
-{
-	{ 0x1E8480, 0x7A1200, 0x2160EC0, 0x3938700, 0 },
-	{ 0x7D, 0x7D, 0x4E4, 0xB00, 0 },
-	0x5,
-	0xAFC8,
-	0x64,
-	0x32,
-	1,
-	0,
-	0x10,
-	{ 0x78, 0x7C, 0x82, 0x88, 0x8E, 0x94, 0x9A, 0xA0, 0xA6, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC, 0xC0, 0xC4 },
-	{ 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700 },
-	{ 0x2AF8, 0x2AF8, 0x29BB, 0x27F9, 0x2637, 0x2475, 0x22B3, 0x20F1, 0x1F2F, 0x1D6D, 0x1734, 0x1414, 0x10F4, 0xDD4, 0xAB4, 0x794 },
-	85,
-	true
-};
-#endif
-
 static const struct si_dte_data dte_data_tahiti_pro =
 {
 	{ 0x1E8480, 0x3D0900, 0x989680, 0x2625A00, 0x0 },
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index d42a8d8a0dea..c7c9e07962b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -564,7 +564,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
 static int soc15_asic_reset(struct amdgpu_device *adev)
 {
 	/* original raven doesn't have full asic reset */
-	if (adev->pdev->device == 0x15dd && adev->rev_id < 0x8)
+	if ((adev->apu_flags & AMD_APU_IS_RAVEN) &&
+	    !(adev->apu_flags & AMD_APU_IS_RAVEN2))
 		return 0;
 
 	switch (soc15_asic_reset_method(adev)) {
@@ -708,7 +709,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 		adev->df.funcs = &df_v1_7_funcs;
 
 	adev->rev_id = soc15_get_rev_id(adev);
-	adev->nbio.funcs->detect_hw_virt(adev);
 
 	if (amdgpu_sriov_vf(adev))
 		adev->virt.ops = &xgpu_ai_virt_ops;
@@ -1130,16 +1130,23 @@ static int soc15_common_early_init(void *handle)
 		break;
 	case CHIP_RAVEN:
 		adev->asic_funcs = &soc15_asic_funcs;
+		if (adev->pdev->device == 0x15dd)
+			adev->apu_flags |= AMD_APU_IS_RAVEN;
+		if (adev->pdev->device == 0x15d8)
+			adev->apu_flags |= AMD_APU_IS_PICASSO;
 		if (adev->rev_id >= 0x8)
+			adev->apu_flags |= AMD_APU_IS_RAVEN2;
+
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 			adev->external_rev_id = adev->rev_id + 0x79;
-		else if (adev->pdev->device == 0x15d8)
+		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
 			adev->external_rev_id = adev->rev_id + 0x41;
 		else if (adev->rev_id == 1)
 			adev->external_rev_id = adev->rev_id + 0x20;
 		else
 			adev->external_rev_id = adev->rev_id + 0x01;
 
-		if (adev->rev_id >= 0x8) {
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2) {
 			adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
 				AMD_CG_SUPPORT_GFX_MGLS |
 				AMD_CG_SUPPORT_GFX_CP_LS |
@@ -1157,7 +1164,7 @@ static int soc15_common_early_init(void *handle)
 				AMD_CG_SUPPORT_VCN_MGCG;
 
 			adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
-		} else if (adev->pdev->device == 0x15d8) {
+		} else if (adev->apu_flags & AMD_APU_IS_PICASSO) {
 			adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
 				AMD_CG_SUPPORT_GFX_MGLS |
 				AMD_CG_SUPPORT_GFX_CP_LS |
@@ -1218,11 +1225,12 @@ static int soc15_common_early_init(void *handle)
 			AMD_CG_SUPPORT_IH_CG |
 			AMD_CG_SUPPORT_VCN_MGCG |
 			AMD_CG_SUPPORT_JPEG_MGCG;
-		adev->pg_flags = 0;
+		adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG;
 		adev->external_rev_id = adev->rev_id + 0x32;
 		break;
 	case CHIP_RENOIR:
 		adev->asic_funcs = &soc15_asic_funcs;
+		adev->apu_flags |= AMD_APU_IS_RENOIR;
 		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
 				 AMD_CG_SUPPORT_GFX_MGLS |
 				 AMD_CG_SUPPORT_GFX_3D_CGCG |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index c893c645a4b2..56d02aa690a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -35,6 +35,9 @@
 #define RREG32_SOC15(ip, inst, reg) \
 	RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
 
+#define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
+	RREG32_NO_KIQ(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
+
 #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \
 	RREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index edfe50821cd9..799925d22fc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -253,7 +253,30 @@
 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
-#define	PACKET3_AQUIRE_MEM				0x58
+#define	PACKET3_ACQUIRE_MEM				0x58
+/* 1.  HEADER
+ * 2.  COHER_CNTL [30:0]
+ * 2.1 ENGINE_SEL [31:31]
+ * 3.  COHER_SIZE [31:0]
+ * 4.  COHER_SIZE_HI [7:0]
+ * 5.  COHER_BASE_LO [31:0]
+ * 6.  COHER_BASE_HI [23:0]
+ * 7.  POLL_INTERVAL [15:0]
+ */
+/* COHER_CNTL fields for CP_COHER_CNTL */
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_NC_ACTION_ENA(x) ((x) << 3)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WC_ACTION_ENA(x) ((x) << 4)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_INV_METADATA_ACTION_ENA(x) ((x) << 5)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_VOL_ACTION_ENA(x) ((x) << 15)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(x) ((x) << 18)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(x) ((x) << 22)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(x) ((x) << 23)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_CB_ACTION_ENA(x) ((x) << 25)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_DB_ACTION_ENA(x) ((x) << 26)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(x) ((x) << 27)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_VOL_ACTION_ENA(x) ((x) << 28)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
+#define 	PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)
 #define	PACKET3_REWIND					0x59
 #define	PACKET3_LOAD_UCONFIG_REG			0x5E
 #define	PACKET3_LOAD_SH_REG				0x5F
@@ -286,6 +309,7 @@
 #define	PACKET3_WAIT_ON_DE_COUNTER_DIFF			0x88
 #define	PACKET3_SWITCH_BUFFER				0x8B
 #define PACKET3_FRAME_CONTROL				0x90
+#			define FRAME_TMZ	(1 << 0)
 #			define FRAME_CMD(x) ((x) << 28)
 			/*
 			 * x=0: tmz_begin
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index ca7d05993ca2..745ed0fba1ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -24,6 +24,8 @@
 #ifndef _TA_RAS_IF_H
 #define _TA_RAS_IF_H
 
+#define RAS_TA_HOST_IF_VER	0
+
 /* Responses have bit 31 set */
 #define RSP_ID_MASK (1U << 31)
 #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
@@ -36,18 +38,24 @@ enum ras_command {
 	TA_RAS_COMMAND__TRIGGER_ERROR,
 };
 
-enum ta_ras_status {
-	TA_RAS_STATUS__SUCCESS				= 0x00,
-	TA_RAS_STATUS__RESET_NEEDED			= 0x01,
-	TA_RAS_STATUS__ERROR_INVALID_PARAMETER		= 0x02,
-	TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE		= 0x03,
-	TA_RAS_STATUS__ERROR_RAS_DUPLICATE_CMD		= 0x04,
-	TA_RAS_STATUS__ERROR_INJECTION_FAILED		= 0x05,
-	TA_RAS_STATUS__ERROR_ASD_READ_WRITE		= 0x06,
-	TA_RAS_STATUS__ERROR_TOGGLE_DF_CSTATE		= 0x07,
-	TA_RAS_STATUS__ERROR_TIMEOUT			= 0x08,
-	TA_RAS_STATUS__ERROR_BLOCK_DISABLED		= 0x09,
-	TA_RAS_STATUS__ERROR_GENERIC			= 0x10,
+enum ta_ras_status
+{
+	TA_RAS_STATUS__SUCCESS                          = 0x00,
+	TA_RAS_STATUS__RESET_NEEDED                     = 0xA001,
+	TA_RAS_STATUS__ERROR_INVALID_PARAMETER          = 0xA002,
+	TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE          = 0xA003,
+	TA_RAS_STATUS__ERROR_RAS_DUPLICATE_CMD          = 0xA004,
+	TA_RAS_STATUS__ERROR_INJECTION_FAILED           = 0xA005,
+	TA_RAS_STATUS__ERROR_ASD_READ_WRITE             = 0xA006,
+	TA_RAS_STATUS__ERROR_TOGGLE_DF_CSTATE           = 0xA007,
+	TA_RAS_STATUS__ERROR_TIMEOUT                    = 0xA008,
+	TA_RAS_STATUS__ERROR_BLOCK_DISABLED             = 0XA009,
+	TA_RAS_STATUS__ERROR_GENERIC                    = 0xA00A,
+	TA_RAS_STATUS__ERROR_RAS_MMHUB_INIT             = 0xA00B,
+	TA_RAS_STATUS__ERROR_GET_DEV_INFO               = 0xA00C,
+	TA_RAS_STATUS__ERROR_UNSUPPORTED_DEV            = 0xA00D,
+	TA_RAS_STATUS__ERROR_NOT_INITIALIZED            = 0xA00E,
+	TA_RAS_STATUS__ERROR_TEE_INTERNAL               = 0xA00F
 };
 
 enum ta_ras_block {
@@ -97,22 +105,39 @@ struct ta_ras_trigger_error_input {
 	uint64_t		value;			// method if error injection. i.e persistent, coherent etc.
 };
 
+struct ta_ras_output_flags
+{
+	uint8_t    ras_init_success_flag;
+	uint8_t    err_inject_switch_disable_flag;
+	uint8_t    reg_access_failure_flag;
+};
+
 /* Common input structure for RAS callbacks */
 /**********************************************************/
 union ta_ras_cmd_input {
 	struct ta_ras_enable_features_input	enable_features;
 	struct ta_ras_disable_features_input	disable_features;
 	struct ta_ras_trigger_error_input	trigger_error;
+
+	uint32_t	reserve_pad[256];
+};
+
+union ta_ras_cmd_output
+{
+	struct ta_ras_output_flags  flags;
+
+	uint32_t	reserve_pad[256];
 };
 
 /* Shared Memory structures */
 /**********************************************************/
 struct ta_ras_shared_memory {
-	uint32_t		cmd_id;
-	uint32_t		resp_id;
-	enum ta_ras_status	ras_status;
-	uint32_t		reserved;
-	union ta_ras_cmd_input	ras_in_message;
+	uint32_t		    cmd_id;
+	uint32_t		    resp_id;
+	uint32_t	    	    ras_status;
+	uint32_t		    if_version;
+	union ta_ras_cmd_input	    ras_in_message;
+	union ta_ras_cmd_output     ras_out_message;
 };
 
 #endif // TL_RAS_IF_H_
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 14d346321a5f..418cf097c918 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -56,24 +56,43 @@ const uint32_t
 
 static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
 {
-	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+	uint32_t rsmu_umc_addr, rsmu_umc_val;
+
+	rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
+			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+	rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
+
+	rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val,
+			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
 			RSMU_UMC_INDEX_MODE_EN, 1);
+
+	WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val);
 }
 
 static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
 {
-	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+	uint32_t rsmu_umc_addr, rsmu_umc_val;
+
+	rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
+			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+	rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
+
+	rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val,
+			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
 			RSMU_UMC_INDEX_MODE_EN, 0);
+
+	WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val);
 }
 
 static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
 {
-	uint32_t rsmu_umc_index;
+	uint32_t rsmu_umc_addr, rsmu_umc_val;
 
-	rsmu_umc_index = RREG32_SOC15(RSMU, 0,
+	rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
 			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+	rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
 
-	return REG_GET_FIELD(rsmu_umc_index,
+	return REG_GET_FIELD(rsmu_umc_val,
 			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
 			RSMU_UMC_INDEX_MODE_EN);
 }
@@ -85,6 +104,81 @@ static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
 	return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst;
 }
 
+static void umc_v6_1_clear_error_count_per_channel(struct amdgpu_device *adev,
+					uint32_t umc_reg_offset)
+{
+	uint32_t ecc_err_cnt_addr;
+	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		/* UMC 6_1_2 registers */
+		ecc_err_cnt_sel_addr =
+			SOC15_REG_OFFSET(UMC, 0,
+					mmUMCCH0_0_EccErrCntSel_ARCT);
+		ecc_err_cnt_addr =
+			SOC15_REG_OFFSET(UMC, 0,
+					mmUMCCH0_0_EccErrCnt_ARCT);
+	} else {
+		/* UMC 6_1_1 registers */
+		ecc_err_cnt_sel_addr =
+			SOC15_REG_OFFSET(UMC, 0,
+					mmUMCCH0_0_EccErrCntSel);
+		ecc_err_cnt_addr =
+			SOC15_REG_OFFSET(UMC, 0,
+					mmUMCCH0_0_EccErrCnt);
+	}
+
+	/* select the lower chip */
+	ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
+					umc_reg_offset) * 4);
+	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
+					UMCCH0_0_EccErrCntSel,
+					EccErrCntCsSel, 0);
+	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
+			ecc_err_cnt_sel);
+
+	/* clear lower chip error count */
+	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+			UMC_V6_1_CE_CNT_INIT);
+
+	/* select the higher chip */
+	ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
+					umc_reg_offset) * 4);
+	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
+					UMCCH0_0_EccErrCntSel,
+					EccErrCntCsSel, 1);
+	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
+			ecc_err_cnt_sel);
+
+	/* clear higher chip error count */
+	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+			UMC_V6_1_CE_CNT_INIT);
+}
+
+static void umc_v6_1_clear_error_count(struct amdgpu_device *adev)
+{
+	uint32_t umc_inst        = 0;
+	uint32_t ch_inst         = 0;
+	uint32_t umc_reg_offset  = 0;
+	uint32_t rsmu_umc_index_state =
+				umc_v6_1_get_umc_index_mode_state(adev);
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_disable_umc_index_mode(adev);
+
+	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+		umc_reg_offset = get_umc_6_reg_offset(adev,
+						umc_inst,
+						ch_inst);
+
+		umc_v6_1_clear_error_count_per_channel(adev,
+						umc_reg_offset);
+	}
+
+	if (rsmu_umc_index_state)
+		umc_v6_1_enable_umc_index_mode(adev);
+}
+
 static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
 						   uint32_t umc_reg_offset,
 						   unsigned long *error_count)
@@ -117,23 +211,21 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
 					EccErrCntCsSel, 0);
 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
 	ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
 	*error_count +=
 		(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
 		 UMC_V6_1_CE_CNT_INIT);
-	/* clear the lower chip err count */
-	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
 
 	/* select the higher chip and check the err counter */
 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
 					EccErrCntCsSel, 1);
 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
 	ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
 	*error_count +=
 		(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
 		 UMC_V6_1_CE_CNT_INIT);
-	/* clear the higher chip err count */
-	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
 
 	/* check for SRAM correctable error
 	  MCUMC_STATUS is a 64 bit register */
@@ -209,6 +301,8 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
 
 	if (rsmu_umc_index_state)
 		umc_v6_1_enable_umc_index_mode(adev);
+
+	umc_v6_1_clear_error_count(adev);
 }
 
 static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 82abd8e728ab..3cafba726587 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -118,7 +118,8 @@ static int uvd_v4_2_sw_init(void *handle)
 
 	ring = &adev->uvd.inst->ring;
 	sprintf(ring->name, "uvd");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
+	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
+			     AMDGPU_RING_PRIO_DEFAULT);
 	if (r)
 		return r;
 
@@ -210,13 +211,10 @@ done:
 static int uvd_v4_2_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 
 	if (RREG32(mmUVD_STATUS) != 0)
 		uvd_v4_2_stop(adev);
 
-	ring->sched.ready = false;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 0fa8aae2d78e..a566ff926e90 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -116,7 +116,8 @@ static int uvd_v5_0_sw_init(void *handle)
 
 	ring = &adev->uvd.inst->ring;
 	sprintf(ring->name, "uvd");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
+	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
+			     AMDGPU_RING_PRIO_DEFAULT);
 	if (r)
 		return r;
 
@@ -208,13 +209,10 @@ done:
 static int uvd_v5_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 
 	if (RREG32(mmUVD_STATUS) != 0)
 		uvd_v5_0_stop(adev);
 
-	ring->sched.ready = false;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index e0aadcaf6c8b..0a880bc101b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -216,7 +216,8 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+					AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
@@ -279,7 +280,8 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+					AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
@@ -416,7 +418,8 @@ static int uvd_v6_0_sw_init(void *handle)
 
 	ring = &adev->uvd.inst->ring;
 	sprintf(ring->name, "uvd");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
+	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
+			     AMDGPU_RING_PRIO_DEFAULT);
 	if (r)
 		return r;
 
@@ -428,7 +431,9 @@ static int uvd_v6_0_sw_init(void *handle)
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
 			ring = &adev->uvd.inst->ring_enc[i];
 			sprintf(ring->name, "uvd_enc%d", i);
-			r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
+			r = amdgpu_ring_init(adev, ring, 512,
+					     &adev->uvd.inst->irq, 0,
+					     AMDGPU_RING_PRIO_DEFAULT);
 			if (r)
 				return r;
 		}
@@ -535,13 +540,10 @@ done:
 static int uvd_v6_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 
 	if (RREG32(mmUVD_STATUS) != 0)
 		uvd_v6_0_stop(adev);
 
-	ring->sched.ready = false;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 0995378d8263..7a55457e6f9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -224,7 +224,8 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+					AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
@@ -286,7 +287,8 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+					AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
@@ -450,7 +452,9 @@ static int uvd_v7_0_sw_init(void *handle)
 		if (!amdgpu_sriov_vf(adev)) {
 			ring = &adev->uvd.inst[j].ring;
 			sprintf(ring->name, "uvd_%d", ring->me);
-			r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
+			r = amdgpu_ring_init(adev, ring, 512,
+					     &adev->uvd.inst[j].irq, 0,
+					     AMDGPU_RING_PRIO_DEFAULT);
 			if (r)
 				return r;
 		}
@@ -469,7 +473,9 @@ static int uvd_v7_0_sw_init(void *handle)
 				else
 					ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring2_3 * 2 + 1;
 			}
-			r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
+			r = amdgpu_ring_init(adev, ring, 512,
+					     &adev->uvd.inst[j].irq, 0,
+					     AMDGPU_RING_PRIO_DEFAULT);
 			if (r)
 				return r;
 		}
@@ -598,7 +604,6 @@ done:
 static int uvd_v7_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int i;
 
 	if (!amdgpu_sriov_vf(adev))
 		uvd_v7_0_stop(adev);
@@ -607,12 +612,6 @@ static int uvd_v7_0_hw_fini(void *handle)
 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
 	}
 
-	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
-		if (adev->uvd.harvest_config & (1 << i))
-			continue;
-		adev->uvd.inst[i].ring.sched.ready = false;
-	}
-
 	return 0;
 }
 
@@ -1694,7 +1693,7 @@ static int uvd_v7_0_set_clockgating_state(void *handle,
 					  enum amd_clockgating_state state)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+	bool enable = (state == AMD_CG_STATE_GATE);
 
 	uvd_v7_0_set_bypass_mode(adev, enable);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index b6837fcfdba7..0e2945baf0f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -434,7 +434,8 @@ static int vce_v2_0_sw_init(void *handle)
 		ring = &adev->vce.ring[i];
 		sprintf(ring->name, "vce%d", i);
 		r = amdgpu_ring_init(adev, ring, 512,
-				     &adev->vce.irq, 0);
+				     &adev->vce.irq, 0,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 217db187207c..6d9108fa22e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -442,7 +442,8 @@ static int vce_v3_0_sw_init(void *handle)
 	for (i = 0; i < adev->vce.num_rings; i++) {
 		ring = &adev->vce.ring[i];
 		sprintf(ring->name, "vce%d", i);
-		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
+		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 3fd102efb7af..a0fb119240f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -476,7 +476,8 @@ static int vce_v4_0_sw_init(void *handle)
 			else
 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
 		}
-		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
+		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -539,7 +540,6 @@ static int vce_v4_0_hw_init(void *handle)
 static int vce_v4_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int i;
 
 	if (!amdgpu_sriov_vf(adev)) {
 		/* vce_v4_0_wait_for_idle(handle); */
@@ -549,9 +549,6 @@ static int vce_v4_0_hw_fini(void *handle)
 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
 	}
 
-	for (i = 0; i < adev->vce.num_rings; i++)
-		adev->vce.ring[i].sched.ready = false;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 09b0572b838d..1ad79155ed00 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -127,7 +127,8 @@ static int vcn_v1_0_sw_init(void *handle)
 
 	ring = &adev->vcn.inst->ring_dec;
 	sprintf(ring->name, "vcn_dec");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
+	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+			     AMDGPU_RING_PRIO_DEFAULT);
 	if (r)
 		return r;
 
@@ -145,7 +146,8 @@ static int vcn_v1_0_sw_init(void *handle)
 	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 		ring = &adev->vcn.inst->ring_enc[i];
 		sprintf(ring->name, "vcn_enc%d", i);
-		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
+		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -227,14 +229,11 @@ done:
 static int vcn_v1_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
 
 	if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
 		RREG32_SOC15(VCN, 0, mmUVD_STATUS))
 		vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 
-	ring->sched.ready = false;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index ec8091a661df..90ed773695ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -92,6 +92,7 @@ static int vcn_v2_0_sw_init(void *handle)
 	struct amdgpu_ring *ring;
 	int i, r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	volatile struct amdgpu_fw_shared *fw_shared;
 
 	/* VCN DEC TRAP */
 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
@@ -133,7 +134,8 @@ static int vcn_v2_0_sw_init(void *handle)
 	ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
 
 	sprintf(ring->name, "vcn_dec");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
+	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+			     AMDGPU_RING_PRIO_DEFAULT);
 	if (r)
 		return r;
 
@@ -163,7 +165,8 @@ static int vcn_v2_0_sw_init(void *handle)
 		else
 			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + i;
 		sprintf(ring->name, "vcn_enc%d", i);
-		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
+		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+				     AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 	}
@@ -174,6 +177,8 @@ static int vcn_v2_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+	fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
 	return 0;
 }
 
@@ -188,6 +193,9 @@ static int vcn_v2_0_sw_fini(void *handle)
 {
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+
+	fw_shared->present_flag_0 = 0;
 
 	amdgpu_virt_free_mm_table(adev);
 
@@ -223,6 +231,10 @@ static int vcn_v2_0_hw_init(void *handle)
 	if (r)
 		goto done;
 
+	//Disable vcn decode for sriov
+	if (amdgpu_sriov_vf(adev))
+		ring->sched.ready = false;
+
 	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 		ring = &adev->vcn.inst->ring_enc[i];
 		r = amdgpu_ring_test_helper(ring);
@@ -248,21 +260,12 @@ done:
 static int vcn_v2_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
-	int i;
 
 	if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
 	    (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
 	      RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
 		vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 
-	ring->sched.ready = false;
-
-	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
-		ring = &adev->vcn.inst->ring_enc[i];
-		ring->sched.ready = false;
-	}
-
 	return 0;
 }
 
@@ -359,6 +362,15 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
 
+	/* non-cache window */
+	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+		lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
+	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+		upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
+	WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
+	WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0,
+		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
+
 	WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
 }
 
@@ -442,13 +454,16 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
 
 	/* non-cache window */
 	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
+		UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+		lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
 	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
+		UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+		upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
 	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
 		UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
 	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
+		UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0),
+		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
 
 	/* VCN global tiling registers */
 	WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
@@ -773,6 +788,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
 
 static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 {
+	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
 	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
 	uint32_t rb_bufsz, tmp;
 
@@ -872,6 +888,12 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
 	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
 
+	/* Stall DPG before WPTR/RPTR reset */
+	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+		UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+		~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+	fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
+
 	/* set the write pointer delay */
 	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
 
@@ -894,11 +916,16 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
 		lower_32_bits(ring->wptr));
 
+	fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+	/* Unstall DPG */
+	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+		0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 	return 0;
 }
 
 static int vcn_v2_0_start(struct amdgpu_device *adev)
 {
+	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
 	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
 	uint32_t rb_bufsz, tmp;
 	uint32_t lmi_swap_cntl;
@@ -1033,6 +1060,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
 	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
 
+	fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
 	/* programm the RB_BASE for ring buffer */
 	WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
 		lower_32_bits(ring->gpu_addr));
@@ -1045,20 +1073,25 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
 	ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
 	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
 			lower_32_bits(ring->wptr));
+	fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
 
+	fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
 	ring = &adev->vcn.inst->ring_enc[0];
 	WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
 	WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
 	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
 	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
 	WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+	fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
 
+	fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
 	ring = &adev->vcn.inst->ring_enc[1];
 	WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
 	WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
 	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
 	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 	WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+	fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
 
 	return 0;
 }
@@ -1180,6 +1213,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
 				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
 
 			if (!ret_code) {
+				volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
 				/* pause DPG */
 				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
 				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
@@ -1189,23 +1223,38 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
 					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
 					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
 
+				/* Stall DPG before WPTR/RPTR reset */
+				WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+					   UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+					   ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 				/* Restore */
+				fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
 				ring = &adev->vcn.inst->ring_enc[0];
+				ring->wptr = 0;
 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
 				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
 				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
 				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+				fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
 
+				fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
 				ring = &adev->vcn.inst->ring_enc[1];
+				ring->wptr = 0;
 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
 				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
 				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
 				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+				fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
 
+				fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
 				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
 					   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+				fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+				/* Unstall DPG */
+				WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+					   0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 
 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
 					   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
@@ -1796,7 +1845,6 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
 	uint32_t table_size = 0;
 	struct mmsch_v2_0_cmd_direct_write direct_wt = { {0} };
 	struct mmsch_v2_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} };
-	struct mmsch_v2_0_cmd_direct_polling direct_poll = { {0} };
 	struct mmsch_v2_0_cmd_end end = { {0} };
 	struct mmsch_v2_0_init_header *header;
 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
@@ -1806,8 +1854,6 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
 	direct_rd_mod_wt.cmd_header.command_type =
 		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
-	direct_poll.cmd_header.command_type =
-		MMSCH_COMMAND__DIRECT_REG_POLLING;
 	end.cmd_header.command_type = MMSCH_COMMAND__END;
 
 	if (header->vcn_table_offset == 0 && header->vcn_table_size == 0) {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index c6363f5ad564..3c6eafb62ee6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -86,7 +86,7 @@ static int vcn_v2_5_early_init(void *handle)
 
 			adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS;
 			for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-				harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING);
+				harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
 				if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
 					adev->vcn.harvest_config |= 1 << i;
 			}
@@ -165,6 +165,8 @@ static int vcn_v2_5_sw_init(void *handle)
 		return r;
 
 	for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
+		volatile struct amdgpu_fw_shared *fw_shared;
+
 		if (adev->vcn.harvest_config & (1 << j))
 			continue;
 		adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
@@ -175,15 +177,15 @@ static int vcn_v2_5_sw_init(void *handle)
 		adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
 
 		adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
-		adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9);
+		adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9);
 		adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
-		adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0);
+		adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0);
 		adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
-		adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1);
+		adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1);
 		adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
-		adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD);
+		adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD);
 		adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
-		adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP);
+		adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP);
 
 		ring = &adev->vcn.inst[j].ring_dec;
 		ring->use_doorbell = true;
@@ -191,7 +193,8 @@ static int vcn_v2_5_sw_init(void *handle)
 		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
 				(amdgpu_sriov_vf(adev) ? 2*j : 8*j);
 		sprintf(ring->name, "vcn_dec_%d", j);
-		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
+		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq,
+				     0, AMDGPU_RING_PRIO_DEFAULT);
 		if (r)
 			return r;
 
@@ -203,10 +206,15 @@ static int vcn_v2_5_sw_init(void *handle)
 					(amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
 
 			sprintf(ring->name, "vcn_enc_%d.%d", j, i);
-			r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
+			r = amdgpu_ring_init(adev, ring, 512,
+					     &adev->vcn.inst[j].irq, 0,
+					     AMDGPU_RING_PRIO_DEFAULT);
 			if (r)
 				return r;
 		}
+
+		fw_shared = adev->vcn.inst[j].fw_shared_cpu_addr;
+		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
 	}
 
 	if (amdgpu_sriov_vf(adev)) {
@@ -230,8 +238,16 @@ static int vcn_v2_5_sw_init(void *handle)
  */
 static int vcn_v2_5_sw_fini(void *handle)
 {
-	int r;
+	int i, r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	volatile struct amdgpu_fw_shared *fw_shared;
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+		if (adev->vcn.harvest_config & (1 << i))
+			continue;
+		fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
+		fw_shared->present_flag_0 = 0;
+	}
 
 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_free_mm_table(adev);
@@ -308,25 +324,16 @@ done:
 static int vcn_v2_5_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring;
-	int i, j;
+	int i;
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
-		ring = &adev->vcn.inst[i].ring_dec;
 
 		if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
 		    (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
 		     RREG32_SOC15(VCN, i, mmUVD_STATUS)))
 			vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
-
-		ring->sched.ready = false;
-
-		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
-			ring = &adev->vcn.inst[i].ring_enc[j];
-			ring->sched.ready = false;
-		}
 	}
 
 	return 0;
@@ -392,38 +399,47 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
 			continue;
 		/* cache window 0: fw */
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
-			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
-			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
-			WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
+			WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
 			offset = 0;
 		} else {
-			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
 				lower_32_bits(adev->vcn.inst[i].gpu_addr));
-			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
 				upper_32_bits(adev->vcn.inst[i].gpu_addr));
 			offset = size;
-			WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
+			WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
 				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
 		}
-		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
+		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
 
 		/* cache window 1: stack */
-		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
-		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
-		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
-		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
+		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
 
 		/* cache window 2: context */
-		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
-		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
-		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
-		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
+		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+		/* non-cache window */
+		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+			lower_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
+		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+			upper_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
+		WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
+		WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
+			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
 	}
 }
 
@@ -436,88 +452,91 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 		if (!indirect) {
 			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
 			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
 			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-				UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+				VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
 		} else {
 			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
 			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-				UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
 			WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-				UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+				VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
 		}
 		offset = 0;
 	} else {
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+			VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+			VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
 		offset = size;
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+			VCN, 0, mmUVD_VCPU_CACHE_OFFSET0),
 			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
 	}
 
 	if (!indirect)
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+			VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
 	else
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+			VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
 
 	/* cache window 1: stack */
 	if (!indirect) {
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+			VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
 	} else {
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
 		WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-			UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+			VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
 	}
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+		VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
 
 	/* cache window 2: context */
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+		VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
 		lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+		VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
 		upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+		VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+		VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
 
 	/* non-cache window */
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
+		VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+		lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
+		VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+		upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+		VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
+		VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0),
+		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
 
 	/* VCN global tiling registers */
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+		VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
 }
 
 /**
@@ -671,19 +690,19 @@ static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
 		 UVD_CGC_CTRL__VCPU_MODE_MASK |
 		 UVD_CGC_CTRL__MMSCH_MODE_MASK);
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+		VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
 
 	/* turn off clock gating */
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+		VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
 
 	/* turn on SUVD clock gating */
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+		VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
 
 	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+		VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
 }
 
 /**
@@ -750,17 +769,18 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
 
 static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
 {
+	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
 	struct amdgpu_ring *ring;
 	uint32_t rb_bufsz, tmp;
 
 	/* disable register anti-hang mechanism */
-	WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1,
+	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
 		~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
 	/* enable dynamic power gating mode */
-	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS);
+	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
 	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
 	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
-	WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp);
+	WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
 
 	if (indirect)
 		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
@@ -773,11 +793,11 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
 	tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+		VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
 
 	/* disable master interupt */
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
+		VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
 
 	/* setup mmUVD_LMI_CTRL */
 	tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
@@ -789,28 +809,28 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
 		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
 		0x00100000L);
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
+		VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
 
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_MPC_CNTL),
+		VCN, 0, mmUVD_MPC_CNTL),
 		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
 
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_MPC_SET_MUXA0),
+		VCN, 0, mmUVD_MPC_SET_MUXA0),
 		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
 		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
 		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
 		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
 
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_MPC_SET_MUXB0),
+		VCN, 0, mmUVD_MPC_SET_MUXB0),
 		((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
 		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
 		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
 		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
 
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_MPC_SET_MUX),
+		VCN, 0, mmUVD_MPC_SET_MUX),
 		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
 		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
 		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
@@ -818,26 +838,26 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
 	vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
 
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+		VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+		VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
 
 	/* enable LMI MC and UMC channels */
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
+		VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
 
 	/* unblock VCPU register access */
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
+		VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
 
 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+		VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
 
 	/* enable master interrupt */
 	WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
-		UVD, 0, mmUVD_MASTINT_EN),
+		VCN, 0, mmUVD_MASTINT_EN),
 		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
 
 	if (indirect)
@@ -853,30 +873,41 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
-	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
+	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
+
+	/* Stall DPG before WPTR/RPTR reset */
+	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+		UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+		~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+	fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
 
 	/* set the write pointer delay */
-	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
+	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
 
 	/* set the wb address */
-	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
+	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
 		(upper_32_bits(ring->gpu_addr) >> 2));
 
 	/* programm the RB_BASE for ring buffer */
-	WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
 		lower_32_bits(ring->gpu_addr));
-	WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
 		upper_32_bits(ring->gpu_addr));
 
 	/* Initialize the ring buffer's read and write pointers */
-	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0);
+	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
 
-	WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0);
+	WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
 
-	ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR);
-	WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+	ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
+	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
 		lower_32_bits(ring->wptr));
 
+	fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+	/* Unstall DPG */
+	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+		0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
 	return 0;
 }
 
@@ -898,12 +929,12 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
 		}
 
 		/* disable register anti-hang mechanism */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0,
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
 			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
 
 		/* set uvd status busy */
-		tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
-		WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp);
+		tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+		WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
 	}
 
 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
@@ -916,44 +947,44 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
 		/* enable VCPU clock */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
 			UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
 
 		/* disable master interrupt */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0,
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
 			~UVD_MASTINT_EN__VCPU_EN_MASK);
 
 		/* setup mmUVD_LMI_CTRL */
-		tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL);
+		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
 		tmp &= ~0xff;
-		WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8|
+		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
 			UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK	|
 			UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
 			UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
 			UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
 
 		/* setup mmUVD_MPC_CNTL */
-		tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL);
+		tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
 		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
 		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
 		WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
 
 		/* setup UVD_MPC_SET_MUXA0 */
-		WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0,
+		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
 			((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
 			(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
 			(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
 			(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
 
 		/* setup UVD_MPC_SET_MUXB0 */
-		WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0,
+		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
 			((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
 			(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
 			(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
 			(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
 
 		/* setup mmUVD_MPC_SET_MUX */
-		WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX,
+		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
 			((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
 			(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
 			(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
@@ -962,30 +993,31 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
 	vcn_v2_5_mc_resume(adev);
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
 		/* VCN global tiling registers */
-		WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG,
+		WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
 			adev->gfx.config.gb_addr_config);
-		WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG,
+		WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
 			adev->gfx.config.gb_addr_config);
 
 		/* enable LMI MC and UMC channels */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
 			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 
 		/* unblock VCPU register access */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0,
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
 			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
 
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0,
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
 			~UVD_VCPU_CNTL__BLK_RST_MASK);
 
 		for (k = 0; k < 10; ++k) {
 			uint32_t status;
 
 			for (j = 0; j < 100; ++j) {
-				status = RREG32_SOC15(UVD, i, mmUVD_STATUS);
+				status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
 				if (status & 2)
 					break;
 				if (amdgpu_emu_mode == 1)
@@ -998,11 +1030,11 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
 				break;
 
 			DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
-			WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
 				UVD_VCPU_CNTL__BLK_RST_MASK,
 				~UVD_VCPU_CNTL__BLK_RST_MASK);
 			mdelay(10);
-			WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0,
+			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
 				~UVD_VCPU_CNTL__BLK_RST_MASK);
 
 			mdelay(10);
@@ -1015,15 +1047,15 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
 		}
 
 		/* enable master interrupt */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
 			UVD_MASTINT_EN__VCPU_EN_MASK,
 			~UVD_MASTINT_EN__VCPU_EN_MASK);
 
 		/* clear the busy bit of VCN_STATUS */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0,
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
 			~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
 
-		WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0);
+		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
 
 		ring = &adev->vcn.inst[i].ring_dec;
 		/* force RBC into idle state */
@@ -1033,33 +1065,40 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
-		WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp);
+		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
 
+		fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
 		/* programm the RB_BASE for ring buffer */
-		WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
 			lower_32_bits(ring->gpu_addr));
-		WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
 			upper_32_bits(ring->gpu_addr));
 
 		/* Initialize the ring buffer's read and write pointers */
-		WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0);
+		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
 
-		ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR);
-		WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR,
+		ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
+		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
 				lower_32_bits(ring->wptr));
-		ring = &adev->vcn.inst[i].ring_enc[0];
-		WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
-		WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
-		WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
-		WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
-		WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+		fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
 
+		fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
+		ring = &adev->vcn.inst[i].ring_enc[0];
+		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+		fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+		fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
 		ring = &adev->vcn.inst[i].ring_enc[1];
-		WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
-		WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
-		WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
-		WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
-		WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+		fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
 	}
 
 	return 0;
@@ -1079,33 +1118,33 @@ static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev,
 	 * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of
 	 *  memory descriptor location
 	 */
-	WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
-	WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
+	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
+	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
 
 	/* 2, update vmid of descriptor */
-	data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID);
+	data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
 	data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
 	/* use domain0 for MM scheduler */
 	data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
-	WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data);
+	WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, data);
 
 	/* 3, notify mmsch about the size of this descriptor */
-	WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size);
+	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
 
 	/* 4, set resp to zero */
-	WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
+	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
 
 	/*
 	 * 5, kick off the initialization and wait until
 	 * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
 	 */
-	WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
+	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
 
-	data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+	data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
 	loop = 10;
 	while ((data & 0x10000002) != 0x10000002) {
 		udelay(100);
-		data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+		data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
 		loop--;
 		if (!loop)
 			break;
@@ -1128,14 +1167,12 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
 	uint32_t table_size = 0;
 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
-	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
 	struct mmsch_v1_0_cmd_end end = { { 0 } };
 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
 	struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table;
 
 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
-	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
 	end.cmd_header.command_type = MMSCH_COMMAND__END;
 
 	header->version = MMSCH_VERSION;
@@ -1150,93 +1187,93 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
 		table_size = 0;
 
 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(
-			SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+			SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS),
 			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
 
 		size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
 		/* mc resume*/
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 			MMSCH_V1_0_INSERT_DIRECT_WT(
-				SOC15_REG_OFFSET(UVD, i,
+				SOC15_REG_OFFSET(VCN, i,
 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
 			MMSCH_V1_0_INSERT_DIRECT_WT(
-				SOC15_REG_OFFSET(UVD, i,
+				SOC15_REG_OFFSET(VCN, i,
 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
 			offset = 0;
 			MMSCH_V1_0_INSERT_DIRECT_WT(
-				SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
+				SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
 		} else {
 			MMSCH_V1_0_INSERT_DIRECT_WT(
-				SOC15_REG_OFFSET(UVD, i,
+				SOC15_REG_OFFSET(VCN, i,
 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
 				lower_32_bits(adev->vcn.inst[i].gpu_addr));
 			MMSCH_V1_0_INSERT_DIRECT_WT(
-				SOC15_REG_OFFSET(UVD, i,
+				SOC15_REG_OFFSET(VCN, i,
 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 				upper_32_bits(adev->vcn.inst[i].gpu_addr));
 			offset = size;
 			MMSCH_V1_0_INSERT_DIRECT_WT(
-				SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+				SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0),
 				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
 		}
 
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0),
+			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE0),
 			size);
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i,
+			SOC15_REG_OFFSET(VCN, i,
 				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i,
+			SOC15_REG_OFFSET(VCN, i,
 				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1),
+			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET1),
 			0);
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1),
+			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE1),
 			AMDGPU_VCN_STACK_SIZE);
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i,
+			SOC15_REG_OFFSET(VCN, i,
 				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset +
 				AMDGPU_VCN_STACK_SIZE));
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i,
+			SOC15_REG_OFFSET(VCN, i,
 				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset +
 				AMDGPU_VCN_STACK_SIZE));
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2),
+			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET2),
 			0);
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE2),
 			AMDGPU_VCN_CONTEXT_SIZE);
 
 		ring = &adev->vcn.inst[i].ring_enc[0];
 		ring->wptr = 0;
 
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO),
+			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_LO),
 			lower_32_bits(ring->gpu_addr));
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI),
+			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_HI),
 			upper_32_bits(ring->gpu_addr));
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE),
+			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_SIZE),
 			ring->ring_size / 4);
 
 		ring = &adev->vcn.inst[i].ring_dec;
 		ring->wptr = 0;
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i,
+			SOC15_REG_OFFSET(VCN, i,
 				mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
 			lower_32_bits(ring->gpu_addr));
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i,
+			SOC15_REG_OFFSET(VCN, i,
 				mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
 			upper_32_bits(ring->gpu_addr));
 
@@ -1248,7 +1285,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
 		MMSCH_V1_0_INSERT_DIRECT_WT(
-			SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+			SOC15_REG_OFFSET(VCN, i, mmUVD_RBC_RB_CNTL), tmp);
 
 		/* add end packet */
 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
@@ -1269,24 +1306,24 @@ static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
 	uint32_t tmp;
 
 	/* Wait for power status to be 1 */
-	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
 
 	/* wait for read ptr to be equal to write ptr */
-	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR);
-	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
+	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
 
-	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2);
-	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
+	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
 
-	tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
-	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
 
-	SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
 
 	/* disable dynamic power gating mode */
-	WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0,
+	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
 			~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
 
 	return 0;
@@ -1330,17 +1367,17 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
 			return r;
 
 		/* block VCPU register access */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL),
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
 			UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
 			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
 
 		/* reset VCPU */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
 			UVD_VCPU_CNTL__BLK_RST_MASK,
 			~UVD_VCPU_CNTL__BLK_RST_MASK);
 
 		/* disable VCPU clock */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0,
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
 			~(UVD_VCPU_CNTL__CLK_EN_MASK));
 
 		/* clear status */
@@ -1349,7 +1386,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
 		vcn_v2_5_enable_clock_gating(adev);
 
 		/* enable register anti-hang mechanism */
-		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS),
+		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
 			UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
 			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
 	}
@@ -1365,55 +1402,69 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
 {
 	struct amdgpu_ring *ring;
 	uint32_t reg_data = 0;
-	int ret_code;
+	int ret_code = 0;
 
 	/* pause/unpause if state is changed */
 	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
 		DRM_DEBUG("dpg pause state changed %d -> %d",
 			adev->vcn.inst[inst_idx].pause_state.fw_based,	new_state->fw_based);
-		reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) &
+		reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
 			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
 
 		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
-			ret_code = 0;
-			SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1,
+			SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
 				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
 
 			if (!ret_code) {
+				volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
+
 				/* pause DPG */
 				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
-				WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+				WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
 
 				/* wait for ACK */
-				SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE,
+				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
 					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
 					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
 
+				/* Stall DPG before WPTR/RPTR reset */
+				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+					   UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+					   ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
 				/* Restore */
+				fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
 				ring = &adev->vcn.inst[inst_idx].ring_enc[0];
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
-
+				ring->wptr = 0;
+				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
+				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
+				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+				fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+				fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
 				ring = &adev->vcn.inst[inst_idx].ring_enc[1];
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
-
-				WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
-					   RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
-
-				SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS,
+				ring->wptr = 0;
+				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
+				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+				fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+				/* Unstall DPG */
+				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+					   0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
 					   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
 			}
 		} else {
-			/* unpause dpg, no need to wait */
 			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
-			WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+			SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
+				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
 		}
 		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
 	}
@@ -1432,7 +1483,7 @@ static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
+	return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
 }
 
 /**
@@ -1449,7 +1500,7 @@ static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
 	if (ring->use_doorbell)
 		return adev->wb.wb[ring->wptr_offs];
 	else
-		return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
+		return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
 }
 
 /**
@@ -1463,15 +1514,11 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
-		WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2,
-			lower_32_bits(ring->wptr) | 0x80000000);
-
 	if (ring->use_doorbell) {
 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 	} else {
-		WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+		WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
 	}
 }
 
@@ -1517,9 +1564,9 @@ static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 
 	if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
-		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
+		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
 	else
-		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
+		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
 }
 
 /**
@@ -1537,12 +1584,12 @@ static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring)
 		if (ring->use_doorbell)
 			return adev->wb.wb[ring->wptr_offs];
 		else
-			return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
+			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
 	} else {
 		if (ring->use_doorbell)
 			return adev->wb.wb[ring->wptr_offs];
 		else
-			return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
+			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
 	}
 }
 
@@ -1562,14 +1609,14 @@ static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring)
 			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 		} else {
-			WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
 		}
 	} else {
 		if (ring->use_doorbell) {
 			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 		} else {
-			WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 3ce10e05d0d6..af8986a55354 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -448,27 +448,6 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
-static void vi_detect_hw_virtualization(struct amdgpu_device *adev)
-{
-	uint32_t reg = 0;
-
-	if (adev->asic_type == CHIP_TONGA ||
-	    adev->asic_type == CHIP_FIJI) {
-	       reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
-	       /* bit0: 0 means pf and 1 means vf */
-	       if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER))
-		       adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
-	       /* bit31: 0 means disable IOV and 1 means enable */
-	       if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE))
-		       adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
-	}
-
-	if (reg == 0) {
-		if (is_virtual_machine()) /* passthrough mode exclus sr-iov mode */
-			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
-	}
-}
-
 static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = {
 	{mmGRBM_STATUS},
 	{mmGRBM_STATUS2},
@@ -1728,9 +1707,6 @@ static const struct amdgpu_ip_block_version vi_common_ip_block =
 
 int vi_set_ip_blocks(struct amdgpu_device *adev)
 {
-	/* in early init stage, vbios code won't work */
-	vi_detect_hw_virtualization(adev);
-
 	if (amdgpu_sriov_vf(adev))
 		adev->virt.ops = &xgpu_vi_virt_ops;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index 19ddd2312e00..7a01e6133798 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -332,7 +332,7 @@
 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
-#define	PACKET3_AQUIRE_MEM				0x58
+#define	PACKET3_ACQUIRE_MEM				0x58
 #define	PACKET3_REWIND					0x59
 #define	PACKET3_LOAD_UCONFIG_REG			0x5E
 #define	PACKET3_LOAD_SH_REG				0x5F
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 0ec5f25adf56..cf0017f4d9d5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -215,6 +215,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
 	}
 
 	q_properties->is_interop = false;
+	q_properties->is_gws = false;
 	q_properties->queue_percent = args->queue_percentage;
 	q_properties->priority = args->queue_priority;
 	q_properties->queue_address = args->ring_base_address;
@@ -1322,6 +1323,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 		goto err_free;
 	}
 
+	/* Update the VRAM usage count */
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+
 	mutex_unlock(&p->mutex);
 
 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
@@ -1337,7 +1342,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 	return 0;
 
 err_free:
-	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
 err_unlock:
 	mutex_unlock(&p->mutex);
 	return err;
@@ -1351,6 +1356,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
 	void *mem;
 	struct kfd_dev *dev;
 	int ret;
+	uint64_t size = 0;
 
 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
 	if (!dev)
@@ -1373,7 +1379,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
 	}
 
 	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
-						(struct kgd_mem *)mem);
+						(struct kgd_mem *)mem, &size);
 
 	/* If freeing the buffer failed, leave the handle in place for
 	 * clean-up during process tear-down.
@@ -1382,6 +1388,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
 		kfd_process_device_remove_obj_handle(
 			pdd, GET_IDR_HANDLE(args->handle));
 
+	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+
 err_unlock:
 	mutex_unlock(&p->mutex);
 	return ret;
@@ -1584,6 +1592,45 @@ copy_from_user_failed:
 	return err;
 }
 
+static int kfd_ioctl_alloc_queue_gws(struct file *filep,
+		struct kfd_process *p, void *data)
+{
+	int retval;
+	struct kfd_ioctl_alloc_queue_gws_args *args = data;
+	struct queue *q;
+	struct kfd_dev *dev;
+
+	mutex_lock(&p->mutex);
+	q = pqm_get_user_queue(&p->pqm, args->queue_id);
+
+	if (q) {
+		dev = q->device;
+	} else {
+		retval = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!dev->gws) {
+		retval = -ENODEV;
+		goto out_unlock;
+	}
+
+	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		retval = -ENODEV;
+		goto out_unlock;
+	}
+
+	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
+	mutex_unlock(&p->mutex);
+
+	args->first_gws = 0;
+	return retval;
+
+out_unlock:
+	mutex_unlock(&p->mutex);
+	return retval;
+}
+
 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
 		struct kfd_process *p, void *data)
 {
@@ -1687,7 +1734,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 	return 0;
 
 err_free:
-	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
 err_unlock:
 	mutex_unlock(&p->mutex);
 	return r;
@@ -1786,6 +1833,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
 				kfd_ioctl_import_dmabuf, 0),
 
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
+			kfd_ioctl_alloc_queue_gws, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index de9f68d5c312..1009a3b8dcc2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -502,7 +502,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
 	num_nodes = crat_table->num_domains;
 	image_len = crat_table->length;
 
-	pr_info("Parsing CRAT table with %d nodes\n", num_nodes);
+	pr_debug("Parsing CRAT table with %d nodes\n", num_nodes);
 
 	for (node_id = 0; node_id < num_nodes; node_id++) {
 		top_dev = kfd_create_topology_device(device_list);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 05bc6d96ec52..0491ab2b4a9b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -569,6 +569,23 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
 	}
 }
 
+static int kfd_gws_init(struct kfd_dev *kfd)
+{
+	int ret = 0;
+
+	if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+		return 0;
+
+	if (hws_gws_support
+		|| (kfd->device_info->asic_family >= CHIP_VEGA10
+			&& kfd->device_info->asic_family <= CHIP_RAVEN
+			&& kfd->mec2_fw_version >= 0x1b3))
+		ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
+				amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
+
+	return ret;
+}
+
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 			 struct drm_device *ddev,
 			 const struct kgd2kfd_shared_resources *gpu_resources)
@@ -578,6 +595,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	kfd->ddev = ddev;
 	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 			KGD_ENGINE_MEC1);
+	kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
+			KGD_ENGINE_MEC2);
 	kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 			KGD_ENGINE_SDMA1);
 	kfd->shared_resources = *gpu_resources;
@@ -598,13 +617,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	} else
 		kfd->max_proc_per_quantum = hws_max_conc_proc;
 
-	/* Allocate global GWS that is shared by all KFD processes */
-	if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
-			amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
-		dev_err(kfd_device, "Could not allocate %d gws\n",
-			amdgpu_amdkfd_get_num_gws(kfd->kgd));
-		goto out;
-	}
 	/* calculate max size of mqds needed for queues */
 	size = max_num_of_queues_per_device *
 			kfd->device_info->mqd_size_aligned;
@@ -662,6 +674,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 		goto device_queue_manager_error;
 	}
 
+	/* If supported on this device, allocate global GWS that is shared
+	 * by all KFD processes
+	 */
+	if (kfd_gws_init(kfd)) {
+		dev_err(kfd_device, "Could not allocate %d gws\n",
+			amdgpu_amdkfd_get_num_gws(kfd->kgd));
+		goto gws_error;
+	}
+
 	if (kfd_iommu_device_init(kfd)) {
 		dev_err(kfd_device, "Error initializing iommuv2\n");
 		goto device_iommu_error;
@@ -691,6 +712,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 kfd_topology_add_device_error:
 kfd_resume_error:
 device_iommu_error:
+gws_error:
 	device_queue_manager_uninit(kfd->dqm);
 device_queue_manager_error:
 	kfd_interrupt_exit(kfd);
@@ -701,7 +723,7 @@ kfd_doorbell_error:
 kfd_gtt_sa_init_error:
 	amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
 alloc_gtt_mem_failure:
-	if (hws_gws_support)
+	if (kfd->gws)
 		amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
 	dev_err(kfd_device,
 		"device %x:%x NOT added due to errors\n",
@@ -720,7 +742,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
 		kfd_doorbell_fini(kfd);
 		kfd_gtt_sa_fini(kfd);
 		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
-		if (hws_gws_support)
+		if (kfd->gws)
 			amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 77ea0f0cb163..e9c4867abeff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -505,8 +505,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
 		deallocate_vmid(dqm, qpd, q);
 	}
 	qpd->queue_count--;
-	if (q->properties.is_active)
+	if (q->properties.is_active) {
 		decrement_queue_count(dqm, q->properties.type);
+		if (q->properties.is_gws) {
+			dqm->gws_queue_count--;
+			qpd->mapped_gws_queue = false;
+		}
+	}
 
 	return retval;
 }
@@ -583,6 +588,20 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 	else if (!q->properties.is_active && prev_active)
 		decrement_queue_count(dqm, q->properties.type);
 
+	if (q->gws && !q->properties.is_gws) {
+		if (q->properties.is_active) {
+			dqm->gws_queue_count++;
+			pdd->qpd.mapped_gws_queue = true;
+		}
+		q->properties.is_gws = true;
+	} else if (!q->gws && q->properties.is_gws) {
+		if (q->properties.is_active) {
+			dqm->gws_queue_count--;
+			pdd->qpd.mapped_gws_queue = false;
+		}
+		q->properties.is_gws = false;
+	}
+
 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
 		retval = map_queues_cpsch(dqm);
 	else if (q->properties.is_active &&
@@ -631,6 +650,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
 				q->properties.type)];
 		q->properties.is_active = false;
 		decrement_queue_count(dqm, q->properties.type);
+		if (q->properties.is_gws) {
+			dqm->gws_queue_count--;
+			qpd->mapped_gws_queue = false;
+		}
 
 		if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
 			continue;
@@ -744,6 +767,10 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 				q->properties.type)];
 		q->properties.is_active = true;
 		increment_queue_count(dqm, q->properties.type);
+		if (q->properties.is_gws) {
+			dqm->gws_queue_count++;
+			qpd->mapped_gws_queue = true;
+		}
 
 		if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
 			continue;
@@ -913,6 +940,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
 	INIT_LIST_HEAD(&dqm->queues);
 	dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
 	dqm->active_cp_queue_count = 0;
+	dqm->gws_queue_count = 0;
 
 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -1061,7 +1089,9 @@ static int set_sched_resources(struct device_queue_manager *dqm)
 			break;
 		}
 
-		res.queue_mask |= (1ull << i);
+		res.queue_mask |= 1ull
+			<< amdgpu_queue_mask_bit_to_set_resource_bit(
+				(struct amdgpu_device *)dqm->dev->kgd, i);
 	}
 	res.gws_mask = ~0ull;
 	res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
@@ -1082,7 +1112,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
 	INIT_LIST_HEAD(&dqm->queues);
 	dqm->active_queue_count = dqm->processes_count = 0;
 	dqm->active_cp_queue_count = 0;
-
+	dqm->gws_queue_count = 0;
 	dqm->active_runlist = false;
 	dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
 	dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
@@ -1432,6 +1462,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
 		if (retval == -ETIME)
 			qpd->reset_wavefronts = true;
+		if (q->properties.is_gws) {
+			dqm->gws_queue_count--;
+			qpd->mapped_gws_queue = false;
+		}
 	}
 
 	/*
@@ -1650,8 +1684,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 		else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
 			deallocate_sdma_queue(dqm, q);
 
-		if (q->properties.is_active)
+		if (q->properties.is_active) {
 			decrement_queue_count(dqm, q->properties.type);
+			if (q->properties.is_gws) {
+				dqm->gws_queue_count--;
+				qpd->mapped_gws_queue = false;
+			}
+		}
 
 		dqm->total_queue_count--;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 50d919f814e9..4afa015c69b1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -182,6 +182,7 @@ struct device_queue_manager {
 	unsigned int		processes_count;
 	unsigned int		active_queue_count;
 	unsigned int		active_cp_queue_count;
+	unsigned int		gws_queue_count;
 	unsigned int		total_queue_count;
 	unsigned int		next_pipe_to_allocate;
 	unsigned int		*allocated_queues;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index e05d75ecda21..fce6ccabe38b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -37,7 +37,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
 	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
 	if (vmid < dev->vm_info.first_vmid_kfd ||
 	    vmid > dev->vm_info.last_vmid_kfd)
-		return 0;
+		return false;
 
 	source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
 	client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
@@ -69,7 +69,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
 
 	/* If there is no valid PASID, it's likely a bug */
 	if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
-		return 0;
+		return false;
 
 	/* Interrupt types we care about: various signals and faults.
 	 * They will be forwarded to a work queue (see below).
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 8d871514671e..7c8786b9eb0a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -192,7 +192,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
 
 	dev_warn_ratelimited(kfd_device,
 			"Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X",
-			PCI_BUS_NUM(pdev->devfn),
+			pdev->bus->number,
 			PCI_SLOT(pdev->devfn),
 			PCI_FUNC(pdev->devfn),
 			pasid,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index bae706462f96..a2b77d1df854 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -126,6 +126,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
 
 	prop.queue_size = queue_size;
 	prop.is_interop = false;
+	prop.is_gws = false;
 	prop.priority = 1;
 	prop.queue_percent = 100;
 	prop.type = type;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index efdb75e7677b..685ca82d42fe 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -41,7 +41,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 				unsigned int *rlib_size,
 				bool *over_subscription)
 {
-	unsigned int process_count, queue_count, compute_queue_count;
+	unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
 	unsigned int map_queue_size;
 	unsigned int max_proc_per_quantum = 1;
 	struct kfd_dev *dev = pm->dqm->dev;
@@ -49,6 +49,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 	process_count = pm->dqm->processes_count;
 	queue_count = pm->dqm->active_queue_count;
 	compute_queue_count = pm->dqm->active_cp_queue_count;
+	gws_queue_count = pm->dqm->gws_queue_count;
 
 	/* check if there is over subscription
 	 * Note: the arbitration between the number of VMIDs and
@@ -61,7 +62,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 		max_proc_per_quantum = dev->max_proc_per_quantum;
 
 	if ((process_count > max_proc_per_quantum) ||
-	    compute_queue_count > get_cp_queues_num(pm->dqm)) {
+	    compute_queue_count > get_cp_queues_num(pm->dqm) ||
+	    gws_queue_count > 1) {
 		*over_subscription = true;
 		pr_debug("Over subscribed runlist\n");
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 2de01009f1b6..bdca9dc5f118 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
 	packet->bitfields2.pasid = qpd->pqm->process->pasid;
 	packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
 	packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
-	packet->bitfields14.num_gws = qpd->num_gws;
+	packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
 	packet->bitfields14.num_oac = qpd->num_oac;
 	packet->bitfields14.sdma_enable = 1;
 	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index c24cad3c64ed..f0587d94294d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -282,6 +282,7 @@ struct kfd_dev {
 
 	/* Firmware versions */
 	uint16_t mec_fw_version;
+	uint16_t mec2_fw_version;
 	uint16_t sdma_fw_version;
 
 	/* Maximum process number mapped to HW scheduler */
@@ -410,6 +411,10 @@ enum KFD_QUEUE_PRIORITY {
  * @is_active: Defines if the queue is active or not. @is_active and
  * @is_evicted are protected by the DQM lock.
  *
+ * @is_gws: Defines if the queue has been updated to be GWS-capable or not.
+ * @is_gws should be protected by the DQM lock, since changing it can yield the
+ * possibility of updating DQM state on number of GWS queues.
+ *
  * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
  * of the queue.
  *
@@ -432,6 +437,7 @@ struct queue_properties {
 	bool is_interop;
 	bool is_evicted;
 	bool is_active;
+	bool is_gws;
 	/* Not relevant for user mode queues in cp scheduling */
 	unsigned int vmid;
 	/* Relevant only for sdma queues*/
@@ -563,6 +569,14 @@ struct qcm_process_device {
 	 */
 	bool reset_wavefronts;
 
+	/* This flag tells us if this process has a GWS-capable
+	 * queue that will be mapped into the runlist. It's
+	 * possible to request a GWS BO, but not have the queue
+	 * currently mapped, and this changes how the MAP_PROCESS
+	 * PM4 packet is configured.
+	 */
+	bool mapped_gws_queue;
+
 	/*
 	 * All the memory management data should be here too
 	 */
@@ -615,6 +629,8 @@ enum kfd_pdd_bound {
 	PDD_BOUND_SUSPENDED,
 };
 
+#define MAX_VRAM_FILENAME_LEN 11
+
 /* Data that is per-process-per device. */
 struct kfd_process_device {
 	/*
@@ -657,6 +673,11 @@ struct kfd_process_device {
 
 	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
 	enum kfd_pdd_bound bound;
+
+	/* VRAM usage */
+	uint64_t vram_usage;
+	struct attribute attr_vram;
+	char vram_filename[MAX_VRAM_FILENAME_LEN];
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -923,6 +944,8 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
 			void *gws);
 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
 						unsigned int qid);
+struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
+						unsigned int qid);
 int pqm_get_wave_state(struct process_queue_manager *pqm,
 		       unsigned int qid,
 		       void __user *ctl_stack,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index fe0cd49d4ea7..d27221ddcdeb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -79,18 +79,22 @@ static struct kfd_procfs_tree procfs;
 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
 			       char *buffer)
 {
-	int val = 0;
-
 	if (strcmp(attr->name, "pasid") == 0) {
 		struct kfd_process *p = container_of(attr, struct kfd_process,
 						     attr_pasid);
-		val = p->pasid;
+
+		return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
+	} else if (strncmp(attr->name, "vram_", 5) == 0) {
+		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
+							      attr_vram);
+		if (pdd)
+			return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
 	} else {
 		pr_err("Invalid attribute");
 		return -EINVAL;
 	}
 
-	return snprintf(buffer, PAGE_SIZE, "%d\n", val);
+	return 0;
 }
 
 static void kfd_procfs_kobj_release(struct kobject *kobj)
@@ -206,6 +210,34 @@ int kfd_procfs_add_queue(struct queue *q)
 	return 0;
 }
 
+int kfd_procfs_add_vram_usage(struct kfd_process *p)
+{
+	int ret = 0;
+	struct kfd_process_device *pdd;
+
+	if (!p)
+		return -EINVAL;
+
+	if (!p->kobj)
+		return -EFAULT;
+
+	/* Create proc/<pid>/vram_<gpuid> file for each GPU */
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+		snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
+			 pdd->dev->id);
+		pdd->attr_vram.name = pdd->vram_filename;
+		pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&pdd->attr_vram);
+		ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
+		if (ret)
+			pr_warn("Creating vram usage for gpu id %d failed",
+				(int)pdd->dev->id);
+	}
+
+	return ret;
+}
+
+
 void kfd_procfs_del_queue(struct queue *q)
 {
 	if (!q)
@@ -248,7 +280,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
 	struct kfd_dev *dev = pdd->dev;
 
 	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
-	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem);
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
 }
 
 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
@@ -312,7 +344,7 @@ sync_memory_failed:
 	return err;
 
 err_map_mem:
-	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem);
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
 err_alloc_mem:
 	*kptr = NULL;
 	return err;
@@ -411,6 +443,11 @@ struct kfd_process *kfd_create_process(struct file *filep)
 							process->kobj);
 		if (!process->kobj_queues)
 			pr_warn("Creating KFD proc/queues folder failed");
+
+		ret = kfd_procfs_add_vram_usage(process);
+		if (ret)
+			pr_warn("Creating vram usage file for pid %d failed",
+				(int)process->lead_thread->pid);
 	}
 out:
 	if (!IS_ERR(process))
@@ -488,7 +525,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
 				peer_pdd->dev->kgd, mem, peer_pdd->vm);
 		}
 
-		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem);
+		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
 		kfd_process_device_remove_obj_handle(pdd, id);
 	}
 }
@@ -551,6 +588,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 {
 	struct kfd_process *p = container_of(work, struct kfd_process,
 					     release_work);
+	struct kfd_process_device *pdd;
 
 	/* Remove the procfs files */
 	if (p->kobj) {
@@ -558,6 +596,10 @@ static void kfd_process_wq_release(struct work_struct *work)
 		kobject_del(p->kobj_queues);
 		kobject_put(p->kobj_queues);
 		p->kobj_queues = NULL;
+
+		list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+			sysfs_remove_file(p->kobj, &pdd->attr_vram);
+
 		kobject_del(p->kobj);
 		kobject_put(p->kobj);
 		p->kobj = NULL;
@@ -858,10 +900,12 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 	pdd->qpd.dqm = dev->dqm;
 	pdd->qpd.pqm = &p->pqm;
 	pdd->qpd.evicted = 0;
+	pdd->qpd.mapped_gws_queue = false;
 	pdd->process = p;
 	pdd->bound = PDD_UNBOUND;
 	pdd->already_dequeued = false;
 	pdd->runtime_inuse = false;
+	pdd->vram_usage = 0;
 	list_add(&pdd->per_device_list, &p->per_device_data);
 
 	/* Init idr used for memory handle translation */
@@ -1078,7 +1122,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
 	return p;
 }
 
-/* process_evict_queues - Evict all user queues of a process
+/* kfd_process_evict_queues - Evict all user queues of a process
  *
  * Eviction is reference-counted per process-device. This means multiple
  * evictions from different sources can be nested safely.
@@ -1118,7 +1162,7 @@ fail:
 	return r;
 }
 
-/* process_restore_queues - Restore all user queues of a process */
+/* kfd_process_restore_queues - Restore all user queues of a process */
 int kfd_process_restore_queues(struct kfd_process *p)
 {
 	struct kfd_process_device *pdd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 084c35f55d59..eb1635ac8988 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -476,6 +476,15 @@ struct kernel_queue *pqm_get_kernel_queue(
 	return NULL;
 }
 
+struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
+					unsigned int qid)
+{
+	struct process_queue_node *pqn;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	return pqn ? pqn->q : NULL;
+}
+
 int pqm_get_wave_state(struct process_queue_manager *pqm,
 		       unsigned int qid,
 		       void __user *ctl_stack,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index aa0bfa78a667..bb77f7af2b6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -478,6 +478,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.device_id);
 	sysfs_show_32bit_prop(buffer, "location_id",
 			dev->node_props.location_id);
+	sysfs_show_32bit_prop(buffer, "domain",
+			dev->node_props.domain);
 	sysfs_show_32bit_prop(buffer, "drm_render_minor",
 			dev->node_props.drm_render_minor);
 	sysfs_show_64bit_prop(buffer, "hive_id",
@@ -787,7 +789,6 @@ static int kfd_topology_update_sysfs(void)
 {
 	int ret;
 
-	pr_info("Creating topology SYSFS entries\n");
 	if (!sys_props.kobj_topology) {
 		sys_props.kobj_topology =
 				kfd_alloc_struct(sys_props.kobj_topology);
@@ -1048,7 +1049,6 @@ int kfd_topology_init(void)
 		sys_props.generation_count++;
 		kfd_update_system_properties();
 		kfd_debug_print_topology();
-		pr_info("Finished initializing topology\n");
 	} else
 		pr_err("Failed to update topology in sysfs ret=%d\n", ret);
 
@@ -1303,7 +1303,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 
 	dev->node_props.vendor_id = gpu->pdev->vendor;
 	dev->node_props.device_id = gpu->pdev->device;
+	dev->node_props.capability |=
+		((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) <<
+			HSA_CAP_ASIC_REVISION_SHIFT) &
+			HSA_CAP_ASIC_REVISION_MASK);
 	dev->node_props.location_id = pci_dev_id(gpu->pdev);
+	dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);
 	dev->node_props.max_engine_clk_fcompute =
 		amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
 	dev->node_props.max_engine_clk_ccompute =
@@ -1317,7 +1322,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 				gpu->device_info->num_xgmi_sdma_engines;
 	dev->node_props.num_sdma_queues_per_engine =
 				gpu->device_info->num_sdma_queues_per_engine;
-	dev->node_props.num_gws = (hws_gws_support &&
+	dev->node_props.num_gws = (dev->gpu->gws &&
 		dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
 		amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
 	dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 46eeecaf1b68..326d9b26b7aa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -41,7 +41,6 @@
 #define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT	8
 #define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK	0x00003000
 #define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT	12
-#define HSA_CAP_RESERVED			0xffffc000
 
 #define HSA_CAP_DOORBELL_TYPE_PRE_1_0		0x0
 #define HSA_CAP_DOORBELL_TYPE_1_0		0x1
@@ -51,6 +50,10 @@
 #define HSA_CAP_SRAM_EDCSUPPORTED		0x00080000
 #define HSA_CAP_MEM_EDCSUPPORTED		0x00100000
 #define HSA_CAP_RASEVENTNOTIFY			0x00200000
+#define HSA_CAP_ASIC_REVISION_MASK		0x03c00000
+#define HSA_CAP_ASIC_REVISION_SHIFT		22
+
+#define HSA_CAP_RESERVED			0xfc078000
 
 struct kfd_node_properties {
 	uint64_t hive_id;
@@ -77,6 +80,7 @@ struct kfd_node_properties {
 	uint32_t vendor_id;
 	uint32_t device_id;
 	uint32_t location_id;
+	uint32_t domain;
 	uint32_t max_engine_clk_fcompute;
 	uint32_t max_engine_clk_ccompute;
 	int32_t  drm_render_minor;
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 87858bc57e64..1911a34cc060 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -21,16 +21,12 @@ config DRM_AMD_DC_HDCP
 	bool "Enable HDCP support in DC"
 	depends on DRM_AMD_DC
 	help
-	 Choose this option
-	 if you want to support
-	 HDCP authentication
+	  Choose this option if you want to support HDCP authentication.
 
 config DEBUG_KERNEL_DC
 	bool "Enable kgdb break in DC"
 	depends on DRM_AMD_DC
 	help
-	  Choose this option
-	  if you want to hit
-	  kdgb_break in assert.
+	  Choose this option if you want to hit kdgb_break in assert.
 
 endmenu
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7fc15b82fe48..bdba0bfd6df1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -30,7 +30,7 @@
 #include "dc.h"
 #include "dc/inc/core_types.h"
 #include "dal_asic_id.h"
-#include "dmub/inc/dmub_srv.h"
+#include "dmub/dmub_srv.h"
 #include "dc/inc/hw/dmcu.h"
 #include "dc/inc/hw/abm.h"
 #include "dc/dc_dmub_srv.h"
@@ -774,8 +774,9 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
 				fw_inst_const_size);
 	}
 
-	memcpy(fb_info->fb[DMUB_WINDOW_2_BSS_DATA].cpu_addr, fw_bss_data,
-	       fw_bss_data_size);
+	if (fw_bss_data_size)
+		memcpy(fb_info->fb[DMUB_WINDOW_2_BSS_DATA].cpu_addr,
+		       fw_bss_data, fw_bss_data_size);
 
 	/* Copy firmware bios info into FB memory. */
 	memcpy(fb_info->fb[DMUB_WINDOW_3_VBIOS].cpu_addr, adev->bios,
@@ -917,6 +918,23 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 		goto error;
 	}
 
+	if (amdgpu_dc_debug_mask & DC_DISABLE_PIPE_SPLIT) {
+		adev->dm.dc->debug.force_single_disp_pipe_split = false;
+		adev->dm.dc->debug.pipe_split_policy = MPC_SPLIT_AVOID;
+	}
+
+	if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY)
+		adev->dm.dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true;
+
+	if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER)
+		adev->dm.dc->debug.disable_stutter = true;
+
+	if (amdgpu_dc_debug_mask & DC_DISABLE_DSC)
+		adev->dm.dc->debug.disable_dsc = true;
+
+	if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING)
+		adev->dm.dc->debug.disable_clock_gate = true;
+
 	r = dm_dmub_hw_init(adev);
 	if (r) {
 		DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
@@ -1214,6 +1232,10 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 		adev->dm.dmub_fw->data +
 		le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
 		le32_to_cpu(hdr->inst_const_bytes);
+	region_params.fw_inst_const =
+		adev->dm.dmub_fw->data +
+		le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+		PSP_HEADER_BYTES;
 
 	status = dmub_srv_calc_region_info(dmub_srv, &region_params,
 					   &region_info);
@@ -1333,9 +1355,14 @@ static int dm_late_init(void *handle)
 	struct dmcu_iram_parameters params;
 	unsigned int linear_lut[16];
 	int i;
-	struct dmcu *dmcu = adev->dm.dc->res_pool->dmcu;
+	struct dmcu *dmcu = NULL;
 	bool ret = false;
 
+	if (!adev->dm.fw_dmcu)
+		return detect_mst_link_for_all_connectors(adev->ddev);
+
+	dmcu = adev->dm.dc->res_pool->dmcu;
+
 	for (i = 0; i < 16; i++)
 		linear_lut[i] = 0xFFFF * i / 15;
 
@@ -1511,12 +1538,115 @@ static int dm_hw_fini(void *handle)
 	return 0;
 }
 
+
+static int dm_enable_vblank(struct drm_crtc *crtc);
+static void dm_disable_vblank(struct drm_crtc *crtc);
+
+static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev,
+				 struct dc_state *state, bool enable)
+{
+	enum dc_irq_source irq_source;
+	struct amdgpu_crtc *acrtc;
+	int rc = -EBUSY;
+	int i = 0;
+
+	for (i = 0; i < state->stream_count; i++) {
+		acrtc = get_crtc_by_otg_inst(
+				adev, state->stream_status[i].primary_otg_inst);
+
+		if (acrtc && state->stream_status[i].plane_count != 0) {
+			irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst;
+			rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
+			DRM_DEBUG("crtc %d - vupdate irq %sabling: r=%d\n",
+				  acrtc->crtc_id, enable ? "en" : "dis", rc);
+			if (rc)
+				DRM_WARN("Failed to %s pflip interrupts\n",
+					 enable ? "enable" : "disable");
+
+			if (enable) {
+				rc = dm_enable_vblank(&acrtc->base);
+				if (rc)
+					DRM_WARN("Failed to enable vblank interrupts\n");
+			} else {
+				dm_disable_vblank(&acrtc->base);
+			}
+
+		}
+	}
+
+}
+
+enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
+{
+	struct dc_state *context = NULL;
+	enum dc_status res = DC_ERROR_UNEXPECTED;
+	int i;
+	struct dc_stream_state *del_streams[MAX_PIPES];
+	int del_streams_count = 0;
+
+	memset(del_streams, 0, sizeof(del_streams));
+
+	context = dc_create_state(dc);
+	if (context == NULL)
+		goto context_alloc_fail;
+
+	dc_resource_state_copy_construct_current(dc, context);
+
+	/* First remove from context all streams */
+	for (i = 0; i < context->stream_count; i++) {
+		struct dc_stream_state *stream = context->streams[i];
+
+		del_streams[del_streams_count++] = stream;
+	}
+
+	/* Remove all planes for removed streams and then remove the streams */
+	for (i = 0; i < del_streams_count; i++) {
+		if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+			res = DC_FAIL_DETACH_SURFACES;
+			goto fail;
+		}
+
+		res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
+		if (res != DC_OK)
+			goto fail;
+	}
+
+
+	res = dc_validate_global_state(dc, context, false);
+
+	if (res != DC_OK) {
+		DRM_ERROR("%s:resource validation failed, dc_status:%d\n", __func__, res);
+		goto fail;
+	}
+
+	res = dc_commit_state(dc, context);
+
+fail:
+	dc_release_state(context);
+
+context_alloc_fail:
+	return res;
+}
+
 static int dm_suspend(void *handle)
 {
 	struct amdgpu_device *adev = handle;
 	struct amdgpu_display_manager *dm = &adev->dm;
 	int ret = 0;
 
+	if (adev->in_gpu_reset) {
+		mutex_lock(&dm->dc_lock);
+		dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
+
+		dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false);
+
+		amdgpu_dm_commit_zero_streams(dm->dc);
+
+		amdgpu_dm_irq_suspend(adev);
+
+		return ret;
+	}
+
 	WARN_ON(adev->dm.cached_state);
 	adev->dm.cached_state = drm_atomic_helper_suspend(adev->ddev);
 
@@ -1527,7 +1657,7 @@ static int dm_suspend(void *handle)
 
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
 
-	return ret;
+	return 0;
 }
 
 static struct amdgpu_dm_connector *
@@ -1631,6 +1761,46 @@ static void emulated_link_detect(struct dc_link *link)
 
 }
 
+static void dm_gpureset_commit_state(struct dc_state *dc_state,
+				     struct amdgpu_display_manager *dm)
+{
+	struct {
+		struct dc_surface_update surface_updates[MAX_SURFACES];
+		struct dc_plane_info plane_infos[MAX_SURFACES];
+		struct dc_scaling_info scaling_infos[MAX_SURFACES];
+		struct dc_flip_addrs flip_addrs[MAX_SURFACES];
+		struct dc_stream_update stream_update;
+	} * bundle;
+	int k, m;
+
+	bundle = kzalloc(sizeof(*bundle), GFP_KERNEL);
+
+	if (!bundle) {
+		dm_error("Failed to allocate update bundle\n");
+		goto cleanup;
+	}
+
+	for (k = 0; k < dc_state->stream_count; k++) {
+		bundle->stream_update.stream = dc_state->streams[k];
+
+		for (m = 0; m < dc_state->stream_status->plane_count; m++) {
+			bundle->surface_updates[m].surface =
+				dc_state->stream_status->plane_states[m];
+			bundle->surface_updates[m].surface->force_full_update =
+				true;
+		}
+		dc_commit_updates_for_stream(
+			dm->dc, bundle->surface_updates,
+			dc_state->stream_status->plane_count,
+			dc_state->streams[k], &bundle->stream_update, dc_state);
+	}
+
+cleanup:
+	kfree(bundle);
+
+	return;
+}
+
 static int dm_resume(void *handle)
 {
 	struct amdgpu_device *adev = handle;
@@ -1647,8 +1817,44 @@ static int dm_resume(void *handle)
 	struct dm_plane_state *dm_new_plane_state;
 	struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state);
 	enum dc_connection_type new_connection_type = dc_connection_none;
-	int i, r;
+	struct dc_state *dc_state;
+	int i, r, j;
+
+	if (adev->in_gpu_reset) {
+		dc_state = dm->cached_dc_state;
+
+		r = dm_dmub_hw_init(adev);
+		if (r)
+			DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+
+		dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
+		dc_resume(dm->dc);
+
+		amdgpu_dm_irq_resume_early(adev);
+
+		for (i = 0; i < dc_state->stream_count; i++) {
+			dc_state->streams[i]->mode_changed = true;
+			for (j = 0; j < dc_state->stream_status->plane_count; j++) {
+				dc_state->stream_status->plane_states[j]->update_flags.raw
+					= 0xffffffff;
+			}
+		}
+
+		WARN_ON(!dc_commit_state(dm->dc, dc_state));
 
+		dm_gpureset_commit_state(dm->cached_dc_state, dm);
+
+		dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, true);
+
+		dc_release_state(dm->cached_dc_state);
+		dm->cached_dc_state = NULL;
+
+		amdgpu_dm_irq_resume_late(adev);
+
+		mutex_unlock(&dm->dc_lock);
+
+		return 0;
+	}
 	/* Recreate dc_state - DC invalidates it when setting power state to S3. */
 	dc_release_state(dm_state->context);
 	dm_state->context = dc_create_state(dm->dc);
@@ -3013,9 +3219,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 		goto fail;
 	}
 
-	if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY)
-		dm->dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true;
-
 	/* No userspace support. */
 	dm->dc->debug.disable_tri_buf = true;
 
@@ -3286,7 +3489,7 @@ static int fill_dc_scaling_info(const struct drm_plane_state *state,
 }
 
 static int get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
-		       uint64_t *tiling_flags)
+		       uint64_t *tiling_flags, bool *tmz_surface)
 {
 	struct amdgpu_bo *rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]);
 	int r = amdgpu_bo_reserve(rbo, false);
@@ -3301,6 +3504,9 @@ static int get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
 	if (tiling_flags)
 		amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
 
+	if (tmz_surface)
+		*tmz_surface = amdgpu_bo_encrypted(rbo);
+
 	amdgpu_bo_unreserve(rbo);
 
 	return r;
@@ -3388,6 +3594,7 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
 			     struct plane_size *plane_size,
 			     struct dc_plane_dcc_param *dcc,
 			     struct dc_plane_address *address,
+			     bool tmz_surface,
 			     bool force_disable_dcc)
 {
 	const struct drm_framebuffer *fb = &afb->base;
@@ -3398,6 +3605,8 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
 	memset(dcc, 0, sizeof(*dcc));
 	memset(address, 0, sizeof(*address));
 
+	address->tmz_surface = tmz_surface;
+
 	if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
 		plane_size->surface_size.x = 0;
 		plane_size->surface_size.y = 0;
@@ -3588,6 +3797,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
 			    const uint64_t tiling_flags,
 			    struct dc_plane_info *plane_info,
 			    struct dc_plane_address *address,
+			    bool tmz_surface,
 			    bool force_disable_dcc)
 {
 	const struct drm_framebuffer *fb = plane_state->fb;
@@ -3631,6 +3841,14 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
 	case DRM_FORMAT_P010:
 		plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb;
 		break;
+	case DRM_FORMAT_XRGB16161616F:
+	case DRM_FORMAT_ARGB16161616F:
+		plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F;
+		break;
+	case DRM_FORMAT_XBGR16161616F:
+	case DRM_FORMAT_ABGR16161616F:
+		plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F;
+		break;
 	default:
 		DRM_ERROR(
 			"Unsupported screen format %s\n",
@@ -3670,7 +3888,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
 					   plane_info->rotation, tiling_flags,
 					   &plane_info->tiling_info,
 					   &plane_info->plane_size,
-					   &plane_info->dcc, address,
+					   &plane_info->dcc, address, tmz_surface,
 					   force_disable_dcc);
 	if (ret)
 		return ret;
@@ -3694,6 +3912,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
 	struct dc_plane_info plane_info;
 	uint64_t tiling_flags;
 	int ret;
+	bool tmz_surface = false;
 	bool force_disable_dcc = false;
 
 	ret = fill_dc_scaling_info(plane_state, &scaling_info);
@@ -3705,7 +3924,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
 	dc_plane_state->clip_rect = scaling_info.clip_rect;
 	dc_plane_state->scaling_quality = scaling_info.scaling_quality;
 
-	ret = get_fb_info(amdgpu_fb, &tiling_flags);
+	ret = get_fb_info(amdgpu_fb, &tiling_flags, &tmz_surface);
 	if (ret)
 		return ret;
 
@@ -3713,6 +3932,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
 	ret = fill_dc_plane_info_and_addr(adev, plane_state, tiling_flags,
 					  &plane_info,
 					  &dc_plane_state->address,
+					  tmz_surface,
 					  force_disable_dcc);
 	if (ret)
 		return ret;
@@ -3800,8 +4020,7 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode,
 
 static enum dc_color_depth
 convert_color_depth_from_display_info(const struct drm_connector *connector,
-				      const struct drm_connector_state *state,
-				      bool is_y420)
+				      bool is_y420, int requested_bpc)
 {
 	uint8_t bpc;
 
@@ -3821,10 +4040,7 @@ convert_color_depth_from_display_info(const struct drm_connector *connector,
 		bpc = bpc ? bpc : 8;
 	}
 
-	if (!state)
-		state = connector->state;
-
-	if (state) {
+	if (requested_bpc > 0) {
 		/*
 		 * Cap display bpc based on the user requested value.
 		 *
@@ -3833,7 +4049,7 @@ convert_color_depth_from_display_info(const struct drm_connector *connector,
 		 * or if this was called outside of atomic check, so it
 		 * can't be used directly.
 		 */
-		bpc = min(bpc, state->max_requested_bpc);
+		bpc = min_t(u8, bpc, requested_bpc);
 
 		/* Round down to the nearest even number. */
 		bpc = bpc - (bpc & 1);
@@ -3955,7 +4171,8 @@ static void fill_stream_properties_from_drm_display_mode(
 	const struct drm_display_mode *mode_in,
 	const struct drm_connector *connector,
 	const struct drm_connector_state *connector_state,
-	const struct dc_stream_state *old_stream)
+	const struct dc_stream_state *old_stream,
+	int requested_bpc)
 {
 	struct dc_crtc_timing *timing_out = &stream->timing;
 	const struct drm_display_info *info = &connector->display_info;
@@ -3985,8 +4202,9 @@ static void fill_stream_properties_from_drm_display_mode(
 
 	timing_out->timing_3d_format = TIMING_3D_FORMAT_NONE;
 	timing_out->display_color_depth = convert_color_depth_from_display_info(
-		connector, connector_state,
-		(timing_out->pixel_encoding == PIXEL_ENCODING_YCBCR420));
+		connector,
+		(timing_out->pixel_encoding == PIXEL_ENCODING_YCBCR420),
+		requested_bpc);
 	timing_out->scan_type = SCANNING_TYPE_NODATA;
 	timing_out->hdmi_vic = 0;
 
@@ -4192,7 +4410,8 @@ static struct dc_stream_state *
 create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		       const struct drm_display_mode *drm_mode,
 		       const struct dm_connector_state *dm_state,
-		       const struct dc_stream_state *old_stream)
+		       const struct dc_stream_state *old_stream,
+		       int requested_bpc)
 {
 	struct drm_display_mode *preferred_mode = NULL;
 	struct drm_connector *drm_connector;
@@ -4277,10 +4496,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	*/
 	if (!scale || mode_refresh != preferred_refresh)
 		fill_stream_properties_from_drm_display_mode(stream,
-			&mode, &aconnector->base, con_state, NULL);
+			&mode, &aconnector->base, con_state, NULL, requested_bpc);
 	else
 		fill_stream_properties_from_drm_display_mode(stream,
-			&mode, &aconnector->base, con_state, old_stream);
+			&mode, &aconnector->base, con_state, old_stream, requested_bpc);
 
 	stream->timing.flags.DSC = 0;
 
@@ -4317,14 +4536,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
 	if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
 		mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket, false, false);
-	if (stream->link->psr_feature_enabled)	{
+	if (stream->link->psr_settings.psr_feature_enabled)	{
 		struct dc  *core_dc = stream->link->ctx->dc;
 
 		if (dc_is_dmcu_initialized(core_dc)) {
-			struct dmcu *dmcu = core_dc->res_pool->dmcu;
-
-			stream->psr_version = dmcu->dmcu_version.psr_version;
-
 			//
 			// should decide stream support vsc sdp colorimetry capability
 			// before building vsc info packet
@@ -4803,16 +5018,54 @@ static void handle_edid_mgmt(struct amdgpu_dm_connector *aconnector)
 	create_eml_sink(aconnector);
 }
 
+static struct dc_stream_state *
+create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+				const struct drm_display_mode *drm_mode,
+				const struct dm_connector_state *dm_state,
+				const struct dc_stream_state *old_stream)
+{
+	struct drm_connector *connector = &aconnector->base;
+	struct amdgpu_device *adev = connector->dev->dev_private;
+	struct dc_stream_state *stream;
+	int requested_bpc = connector->state ? connector->state->max_requested_bpc : 8;
+	enum dc_status dc_result = DC_OK;
+
+	do {
+		stream = create_stream_for_sink(aconnector, drm_mode,
+						dm_state, old_stream,
+						requested_bpc);
+		if (stream == NULL) {
+			DRM_ERROR("Failed to create stream for sink!\n");
+			break;
+		}
+
+		dc_result = dc_validate_stream(adev->dm.dc, stream);
+
+		if (dc_result != DC_OK) {
+			DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n",
+				      drm_mode->hdisplay,
+				      drm_mode->vdisplay,
+				      drm_mode->clock,
+				      dc_result);
+
+			dc_stream_release(stream);
+			stream = NULL;
+			requested_bpc -= 2; /* lower bpc to retry validation */
+		}
+
+	} while (stream == NULL && requested_bpc >= 6);
+
+	return stream;
+}
+
 enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
 				   struct drm_display_mode *mode)
 {
 	int result = MODE_ERROR;
 	struct dc_sink *dc_sink;
-	struct amdgpu_device *adev = connector->dev->dev_private;
 	/* TODO: Unhardcode stream count */
 	struct dc_stream_state *stream;
 	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
-	enum dc_status dc_result = DC_OK;
 
 	if ((mode->flags & DRM_MODE_FLAG_INTERLACE) ||
 			(mode->flags & DRM_MODE_FLAG_DBLSCAN))
@@ -4833,24 +5086,11 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec
 		goto fail;
 	}
 
-	stream = create_stream_for_sink(aconnector, mode, NULL, NULL);
-	if (stream == NULL) {
-		DRM_ERROR("Failed to create stream for sink!\n");
-		goto fail;
-	}
-
-	dc_result = dc_validate_stream(adev->dm.dc, stream);
-
-	if (dc_result == DC_OK)
+	stream = create_validate_stream_for_sink(aconnector, mode, NULL, NULL);
+	if (stream) {
+		dc_stream_release(stream);
 		result = MODE_OK;
-	else
-		DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n",
-			      mode->hdisplay,
-			      mode->vdisplay,
-			      mode->clock,
-			      dc_result);
-
-	dc_stream_release(stream);
+	}
 
 fail:
 	/* TODO: error handling*/
@@ -5173,10 +5413,12 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
 		return 0;
 
 	if (!state->duplicated) {
+		int max_bpc = conn_state->max_requested_bpc;
 		is_y420 = drm_mode_is_420_also(&connector->display_info, adjusted_mode) &&
 				aconnector->force_yuv420_output;
-		color_depth = convert_color_depth_from_display_info(connector, conn_state,
-								    is_y420);
+		color_depth = convert_color_depth_from_display_info(connector,
+								    is_y420,
+								    max_bpc);
 		bpp = convert_dc_color_depth_into_bpc(color_depth) * 3;
 		clock = adjusted_mode->clock;
 		dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp, false);
@@ -5331,6 +5573,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 	uint64_t tiling_flags;
 	uint32_t domain;
 	int r;
+	bool tmz_surface = false;
 	bool force_disable_dcc = false;
 
 	dm_plane_state_old = to_dm_plane_state(plane->state);
@@ -5380,6 +5623,8 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 
 	amdgpu_bo_get_tiling_flags(rbo, &tiling_flags);
 
+	tmz_surface = amdgpu_bo_encrypted(rbo);
+
 	ttm_eu_backoff_reservation(&ticket, &list);
 
 	afb->address = amdgpu_bo_gpu_offset(rbo);
@@ -5395,7 +5640,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 			adev, afb, plane_state->format, plane_state->rotation,
 			tiling_flags, &plane_state->tiling_info,
 			&plane_state->plane_size, &plane_state->dcc,
-			&plane_state->address,
+			&plane_state->address, tmz_surface,
 			force_disable_dcc);
 	}
 
@@ -5542,6 +5787,12 @@ static int get_plane_formats(const struct drm_plane *plane,
 			formats[num_formats++] = DRM_FORMAT_NV12;
 		if (plane_cap && plane_cap->pixel_format_support.p010)
 			formats[num_formats++] = DRM_FORMAT_P010;
+		if (plane_cap && plane_cap->pixel_format_support.fp16) {
+			formats[num_formats++] = DRM_FORMAT_XRGB16161616F;
+			formats[num_formats++] = DRM_FORMAT_ARGB16161616F;
+			formats[num_formats++] = DRM_FORMAT_XBGR16161616F;
+			formats[num_formats++] = DRM_FORMAT_ABGR16161616F;
+		}
 		break;
 
 	case DRM_PLANE_TYPE_OVERLAY:
@@ -6569,6 +6820,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 	unsigned long flags;
 	struct amdgpu_bo *abo;
 	uint64_t tiling_flags;
+	bool tmz_surface = false;
 	uint32_t target_vblank, last_flip_vblank;
 	bool vrr_active = amdgpu_dm_vrr_active(acrtc_state);
 	bool pflip_present = false;
@@ -6621,6 +6873,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		if (new_pcrtc_state->color_mgmt_changed) {
 			bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction;
 			bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func;
+			bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix;
 		}
 
 		fill_dc_scaling_info(new_plane_state,
@@ -6663,12 +6916,15 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 
 		amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
 
+		tmz_surface = amdgpu_bo_encrypted(abo);
+
 		amdgpu_bo_unreserve(abo);
 
 		fill_dc_plane_info_and_addr(
 			dm->adev, new_plane_state, tiling_flags,
 			&bundle->plane_infos[planes_count],
 			&bundle->flip_addrs[planes_count].address,
+			tmz_surface,
 			false);
 
 		DRM_DEBUG_DRIVER("plane: id=%d dcc_en=%d\n",
@@ -6814,7 +7070,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		}
 		mutex_lock(&dm->dc_lock);
 		if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
-				acrtc_state->stream->link->psr_allow_active)
+				acrtc_state->stream->link->psr_settings.psr_allow_active)
 			amdgpu_dm_psr_disable(acrtc_state->stream);
 
 		dc_commit_updates_for_stream(dm->dc,
@@ -6825,12 +7081,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 						     dc_state);
 
 		if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
-						acrtc_state->stream->psr_version &&
-						!acrtc_state->stream->link->psr_feature_enabled)
+				acrtc_state->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED &&
+				!acrtc_state->stream->link->psr_settings.psr_feature_enabled)
 			amdgpu_dm_link_setup_psr(acrtc_state->stream);
 		else if ((acrtc_state->update_type == UPDATE_TYPE_FAST) &&
-						acrtc_state->stream->link->psr_feature_enabled &&
-						!acrtc_state->stream->link->psr_allow_active) {
+				acrtc_state->stream->link->psr_settings.psr_feature_enabled &&
+				!acrtc_state->stream->link->psr_settings.psr_allow_active) {
 			amdgpu_dm_psr_enable(acrtc_state->stream);
 		}
 
@@ -7144,7 +7400,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 			DRM_DEBUG_DRIVER("Atomic commit: RESET. crtc id %d:[%p]\n", acrtc->crtc_id, acrtc);
 			/* i.e. reset mode */
 			if (dm_old_crtc_state->stream) {
-				if (dm_old_crtc_state->stream->link->psr_allow_active)
+				if (dm_old_crtc_state->stream->link->psr_settings.psr_allow_active)
 					amdgpu_dm_psr_disable(dm_old_crtc_state->stream);
 
 				remove_stream(adev, acrtc, dm_old_crtc_state->stream);
@@ -7592,10 +7848,10 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
 			goto skip_modeset;
 
-		new_stream = create_stream_for_sink(aconnector,
-						     &new_crtc_state->mode,
-						    dm_new_conn_state,
-						    dm_old_crtc_state->stream);
+		new_stream = create_validate_stream_for_sink(aconnector,
+							     &new_crtc_state->mode,
+							     dm_new_conn_state,
+							     dm_old_crtc_state->stream);
 
 		/*
 		 * we can have no stream on ACTION_SET if a display
@@ -8056,6 +8312,7 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm,
 			struct dc_flip_addrs *flip_addr = &bundle->flip_addrs[num_plane];
 			struct dc_scaling_info *scaling_info = &bundle->scaling_infos[num_plane];
 			uint64_t tiling_flags;
+			bool tmz_surface = false;
 
 			new_plane_crtc = new_plane_state->crtc;
 			new_dm_plane_state = to_dm_plane_state(new_plane_state);
@@ -8085,6 +8342,8 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm,
 						new_dm_plane_state->dc_state->gamma_correction;
 				bundle->surface_updates[num_plane].in_transfer_func =
 						new_dm_plane_state->dc_state->in_transfer_func;
+				bundle->surface_updates[num_plane].gamut_remap_matrix =
+						&new_dm_plane_state->dc_state->gamut_remap_matrix;
 				bundle->stream_update.gamut_remap =
 						&new_dm_crtc_state->stream->gamut_remap_matrix;
 				bundle->stream_update.output_csc_transform =
@@ -8101,14 +8360,14 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm,
 			bundle->surface_updates[num_plane].scaling_info = scaling_info;
 
 			if (amdgpu_fb) {
-				ret = get_fb_info(amdgpu_fb, &tiling_flags);
+				ret = get_fb_info(amdgpu_fb, &tiling_flags, &tmz_surface);
 				if (ret)
 					goto cleanup;
 
 				ret = fill_dc_plane_info_and_addr(
 					dm->adev, new_plane_state, tiling_flags,
 					plane_info,
-					&flip_addr->address,
+					&flip_addr->address, tmz_surface,
 					false);
 				if (ret)
 					goto cleanup;
@@ -8609,8 +8868,17 @@ static void amdgpu_dm_set_psr_caps(struct dc_link *link)
 		return;
 	if (dm_helpers_dp_read_dpcd(NULL, link, DP_PSR_SUPPORT,
 					dpcd_data, sizeof(dpcd_data))) {
-		link->psr_feature_enabled = dpcd_data[0] ? true:false;
-		DRM_INFO("PSR support:%d\n", link->psr_feature_enabled);
+		link->dpcd_caps.psr_caps.psr_version = dpcd_data[0];
+
+		if (dpcd_data[0] == 0) {
+			link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
+			link->psr_settings.psr_feature_enabled = false;
+		} else {
+			link->psr_settings.psr_version = DC_PSR_VERSION_1;
+			link->psr_settings.psr_feature_enabled = true;
+		}
+
+		DRM_INFO("PSR support:%d\n", link->psr_settings.psr_feature_enabled);
 	}
 }
 
@@ -8625,16 +8893,14 @@ static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
 	struct dc_link *link = NULL;
 	struct psr_config psr_config = {0};
 	struct psr_context psr_context = {0};
-	struct dc *dc = NULL;
 	bool ret = false;
 
 	if (stream == NULL)
 		return false;
 
 	link = stream->link;
-	dc = link->ctx->dc;
 
-	psr_config.psr_version = dc->res_pool->dmcu->dmcu_version.psr_version;
+	psr_config.psr_version = link->dpcd_caps.psr_caps.psr_version;
 
 	if (psr_config.psr_version > 0) {
 		psr_config.psr_exit_link_training_required = 0x1;
@@ -8646,7 +8912,7 @@ static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
 		ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context);
 
 	}
-	DRM_DEBUG_DRIVER("PSR link: %d\n",	link->psr_feature_enabled);
+	DRM_DEBUG_DRIVER("PSR link: %d\n",	link->psr_settings.psr_feature_enabled);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 5cab3e65d992..d61186ff411d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -315,6 +315,7 @@ struct amdgpu_display_manager {
 #endif
 
 	struct drm_atomic_state *cached_state;
+	struct dc_state *cached_dc_state;
 
 	struct dm_comressor_info compressor;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 2233d293a707..4dfb6b55bb2e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -239,7 +239,8 @@ static int __set_output_tf(struct dc_transfer_func *func,
 		 * instead to simulate this.
 		 */
 		gamma->type = GAMMA_CUSTOM;
-		res = mod_color_calculate_degamma_params(func, gamma, true);
+		res = mod_color_calculate_degamma_params(NULL, func,
+							gamma, true);
 	} else {
 		/*
 		 * Assume sRGB. The actual mapping will depend on whether the
@@ -271,7 +272,7 @@ static int __set_input_tf(struct dc_transfer_func *func,
 
 	__drm_lut_to_dc_gamma(lut, gamma, false);
 
-	res = mod_color_calculate_degamma_params(func, gamma, true);
+	res = mod_color_calculate_degamma_params(NULL, func, gamma, true);
 	dc_gamma_release(&gamma);
 
 	return res ? 0 : -ENOMEM;
@@ -419,9 +420,21 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 				      struct dc_plane_state *dc_plane_state)
 {
 	const struct drm_color_lut *degamma_lut;
+	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
 	uint32_t degamma_size;
 	int r;
 
+	/* Get the correct base transfer function for implicit degamma. */
+	switch (dc_plane_state->format) {
+	case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+	case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+		/* DC doesn't have a transfer function for BT601 specifically. */
+		tf = TRANSFER_FUNCTION_BT709;
+		break;
+	default:
+		break;
+	}
+
 	if (crtc->cm_has_degamma) {
 		degamma_lut = __extract_blob_lut(crtc->base.degamma_lut,
 						 &degamma_size);
@@ -455,8 +468,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 		 * map these to the atomic one instead.
 		 */
 		if (crtc->cm_is_degamma_srgb)
-			dc_plane_state->in_transfer_func->tf =
-				TRANSFER_FUNCTION_SRGB;
+			dc_plane_state->in_transfer_func->tf = tf;
 		else
 			dc_plane_state->in_transfer_func->tf =
 				TRANSFER_FUNCTION_LINEAR;
@@ -471,7 +483,12 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 		 * in linear space. Assume that the input is sRGB.
 		 */
 		dc_plane_state->in_transfer_func->type = TF_TYPE_PREDEFINED;
-		dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
+		dc_plane_state->in_transfer_func->tf = tf;
+
+		if (tf != TRANSFER_FUNCTION_SRGB &&
+		    !mod_color_calculate_degamma_params(NULL,
+			    dc_plane_state->in_transfer_func, NULL, false))
+			return -ENOMEM;
 	} else {
 		/* ...Otherwise we can just bypass the DGM block. */
 		dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 0461fecd68db..076af267b488 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -32,7 +32,7 @@
 #include "amdgpu_dm.h"
 #include "amdgpu_dm_debugfs.h"
 #include "dm_helpers.h"
-#include "dmub/inc/dmub_srv.h"
+#include "dmub/dmub_srv.h"
 
 struct dmub_debugfs_trace_header {
 	uint32_t entry_count;
@@ -838,6 +838,44 @@ static int vrr_range_show(struct seq_file *m, void *data)
 	return 0;
 }
 
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+/*
+ * Returns the HDCP capability of the Display (1.4 for now).
+ *
+ * NOTE* Not all HDMI displays report their HDCP caps even when they are capable.
+ * Since its rare for a display to not be HDCP 1.4 capable, we set HDMI as always capable.
+ *
+ * Example usage: cat /sys/kernel/debug/dri/0/DP-1/hdcp_sink_capability
+ *		or cat /sys/kernel/debug/dri/0/HDMI-A-1/hdcp_sink_capability
+ */
+static int hdcp_sink_capability_show(struct seq_file *m, void *data)
+{
+	struct drm_connector *connector = m->private;
+	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	bool hdcp_cap, hdcp2_cap;
+
+	if (connector->status != connector_status_connected)
+		return -ENODEV;
+
+	seq_printf(m, "%s:%d HDCP version: ", connector->name, connector->base.id);
+
+	hdcp_cap = dc_link_is_hdcp14(aconnector->dc_link);
+	hdcp2_cap = dc_link_is_hdcp22(aconnector->dc_link);
+
+
+	if (hdcp_cap)
+		seq_printf(m, "%s ", "HDCP1.4");
+	if (hdcp2_cap)
+		seq_printf(m, "%s ", "HDCP2.2");
+
+	if (!hdcp_cap && !hdcp2_cap)
+		seq_printf(m, "%s ", "None");
+
+	seq_puts(m, "\n");
+
+	return 0;
+}
+#endif
 /* function description
  *
  * generic SDP message access for testing
@@ -964,6 +1002,9 @@ DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
 DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
 DEFINE_SHOW_ATTRIBUTE(output_bpc);
 DEFINE_SHOW_ATTRIBUTE(vrr_range);
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability);
+#endif
 
 static const struct file_operations dp_link_settings_debugfs_fops = {
 	.owner = THIS_MODULE,
@@ -1019,12 +1060,23 @@ static const struct {
 		{"test_pattern", &dp_phy_test_pattern_fops},
 		{"output_bpc", &output_bpc_fops},
 		{"vrr_range", &vrr_range_fops},
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+		{"hdcp_sink_capability", &hdcp_sink_capability_fops},
+#endif
 		{"sdp_message", &sdp_message_fops},
 		{"aux_dpcd_address", &dp_dpcd_address_debugfs_fops},
 		{"aux_dpcd_size", &dp_dpcd_size_debugfs_fops},
 		{"aux_dpcd_data", &dp_dpcd_data_debugfs_fops}
 };
 
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+static const struct {
+	char *name;
+	const struct file_operations *fops;
+} hdmi_debugfs_entries[] = {
+		{"hdcp_sink_capability", &hdcp_sink_capability_fops}
+};
+#endif
 /*
  * Force YUV420 output if available from the given mode
  */
@@ -1093,6 +1145,15 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
 	connector->debugfs_dpcd_address = 0;
 	connector->debugfs_dpcd_size = 0;
 
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+	if (connector->base.connector_type == DRM_MODE_CONNECTOR_HDMIA) {
+		for (i = 0; i < ARRAY_SIZE(hdmi_debugfs_entries); i++) {
+			debugfs_create_file(hdmi_debugfs_entries[i].name,
+					    0644, dir, connector,
+					    hdmi_debugfs_entries[i].fops);
+		}
+	}
+#endif
 }
 
 /*
@@ -1167,8 +1228,9 @@ static int current_backlight_read(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct dc *dc = adev->dm.dc;
-	unsigned int backlight = dc_get_current_backlight_pwm(dc);
+	struct amdgpu_display_manager *dm = &adev->dm;
+
+	unsigned int backlight = dc_link_get_backlight_level(dm->backlight_link);
 
 	seq_printf(m, "0x%x\n", backlight);
 	return 0;
@@ -1184,8 +1246,9 @@ static int target_backlight_read(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct dc *dc = adev->dm.dc;
-	unsigned int backlight = dc_get_target_backlight_pwm(dc);
+	struct amdgpu_display_manager *dm = &adev->dm;
+
+	unsigned int backlight = dc_link_get_target_backlight_pwm(dm->backlight_link);
 
 	seq_printf(m, "0x%x\n", backlight);
 	return 0;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index c20fb08c450b..b086d5c906e0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -445,7 +445,7 @@ bool dm_helpers_dp_read_dpcd(
 	struct amdgpu_dm_connector *aconnector = link->priv;
 
 	if (!aconnector) {
-		DRM_ERROR("Failed to find connector for link!");
+		DC_LOG_DC("Failed to find connector for link!\n");
 		return false;
 	}
 
@@ -554,6 +554,7 @@ enum dc_edid_status dm_helpers_read_local_edid(
 		struct dc_sink *sink)
 {
 	struct amdgpu_dm_connector *aconnector = link->priv;
+	struct drm_connector *connector = &aconnector->base;
 	struct i2c_adapter *ddc;
 	int retry = 3;
 	enum dc_edid_status edid_status;
@@ -571,6 +572,15 @@ enum dc_edid_status dm_helpers_read_local_edid(
 
 		edid = drm_get_edid(&aconnector->base, ddc);
 
+		/* DP Compliance Test 4.2.2.6 */
+		if (link->aux_mode && connector->edid_corrupt)
+			drm_dp_send_real_edid_checksum(&aconnector->dm_dp_aux.aux, connector->real_edid_checksum);
+
+		if (!edid && connector->edid_corrupt) {
+			connector->edid_corrupt = false;
+			return EDID_BAD_CHECKSUM;
+		}
+
 		if (!edid)
 			return EDID_NO_RESPONSE;
 
@@ -605,34 +615,10 @@ enum dc_edid_status dm_helpers_read_local_edid(
 		DRM_ERROR("EDID err: %d, on connector: %s",
 				edid_status,
 				aconnector->base.name);
-	if (link->aux_mode) {
-		union test_request test_request = { {0} };
-		union test_response test_response = { {0} };
-
-		dm_helpers_dp_read_dpcd(ctx,
-					link,
-					DP_TEST_REQUEST,
-					&test_request.raw,
-					sizeof(union test_request));
-
-		if (!test_request.bits.EDID_READ)
-			return edid_status;
 
-		test_response.bits.EDID_CHECKSUM_WRITE = 1;
-
-		dm_helpers_dp_write_dpcd(ctx,
-					link,
-					DP_TEST_EDID_CHECKSUM,
-					&sink->dc_edid.raw_edid[sink->dc_edid.length-1],
-					1);
-
-		dm_helpers_dp_write_dpcd(ctx,
-					link,
-					DP_TEST_RESPONSE,
-					&test_response.raw,
-					sizeof(test_response));
-
-	}
+	/* DP Compliance Test 4.2.2.3 */
+	if (link->aux_mode)
+		drm_dp_send_real_edid_checksum(&aconnector->dm_dp_aux.aux, sink->dc_edid.raw_edid[sink->dc_edid.length-1]);
 
 	return edid_status;
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index d2917759b7ab..ae0a7ef1d595 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -41,53 +41,10 @@
 #include "amdgpu_dm_debugfs.h"
 #endif
 
-
 #if defined(CONFIG_DRM_AMD_DC_DCN)
 #include "dc/dcn20/dcn20_resource.h"
 #endif
 
-/* #define TRACE_DPCD */
-
-#ifdef TRACE_DPCD
-#define SIDE_BAND_MSG(address) (address >= DP_SIDEBAND_MSG_DOWN_REQ_BASE && address < DP_SINK_COUNT_ESI)
-
-static inline char *side_band_msg_type_to_str(uint32_t address)
-{
-	static char str[10] = {0};
-
-	if (address < DP_SIDEBAND_MSG_UP_REP_BASE)
-		strcpy(str, "DOWN_REQ");
-	else if (address < DP_SIDEBAND_MSG_DOWN_REP_BASE)
-		strcpy(str, "UP_REP");
-	else if (address < DP_SIDEBAND_MSG_UP_REQ_BASE)
-		strcpy(str, "DOWN_REP");
-	else
-		strcpy(str, "UP_REQ");
-
-	return str;
-}
-
-static void log_dpcd(uint8_t type,
-		     uint32_t address,
-		     uint8_t *data,
-		     uint32_t size,
-		     bool res)
-{
-	DRM_DEBUG_KMS("Op: %s, addr: %04x, SideBand Msg: %s, Op res: %s\n",
-			(type == DP_AUX_NATIVE_READ) ||
-			(type == DP_AUX_I2C_READ) ?
-					"Read" : "Write",
-			address,
-			SIDE_BAND_MSG(address) ?
-					side_band_msg_type_to_str(address) : "Nop",
-			res ? "OK" : "Fail");
-
-	if (res) {
-		print_hex_dump(KERN_INFO, "Body: ", DUMP_PREFIX_NONE, 16, 1, data, size, false);
-	}
-}
-#endif
-
 static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
 				  struct drm_dp_aux_msg *msg)
 {
@@ -136,17 +93,23 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
 static void
 dm_dp_mst_connector_destroy(struct drm_connector *connector)
 {
-	struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
-	struct amdgpu_encoder *amdgpu_encoder = amdgpu_dm_connector->mst_encoder;
+	struct amdgpu_dm_connector *aconnector =
+		to_amdgpu_dm_connector(connector);
+	struct amdgpu_encoder *amdgpu_encoder = aconnector->mst_encoder;
 
-	kfree(amdgpu_dm_connector->edid);
-	amdgpu_dm_connector->edid = NULL;
+	if (aconnector->dc_sink) {
+		dc_link_remove_remote_sink(aconnector->dc_link,
+					   aconnector->dc_sink);
+		dc_sink_release(aconnector->dc_sink);
+	}
+
+	kfree(aconnector->edid);
 
 	drm_encoder_cleanup(&amdgpu_encoder->base);
 	kfree(amdgpu_encoder);
 	drm_connector_cleanup(connector);
-	drm_dp_mst_put_port_malloc(amdgpu_dm_connector->port);
-	kfree(amdgpu_dm_connector);
+	drm_dp_mst_put_port_malloc(aconnector->port);
+	kfree(aconnector);
 }
 
 static int
@@ -435,40 +398,13 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 	 */
 	amdgpu_dm_connector_funcs_reset(connector);
 
-	DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
-		 aconnector, connector->base.id, aconnector->mst_port);
-
 	drm_dp_mst_get_port_malloc(port);
 
-	DRM_DEBUG_KMS(":%d\n", connector->base.id);
-
 	return connector;
 }
 
-static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
-					struct drm_connector *connector)
-{
-	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
-
-	DRM_INFO("DM_MST: Disabling connector: %p [id: %d] [master: %p]\n",
-		 aconnector, connector->base.id, aconnector->mst_port);
-
-	if (aconnector->dc_sink) {
-		amdgpu_dm_update_freesync_caps(connector, NULL);
-		dc_link_remove_remote_sink(aconnector->dc_link,
-					   aconnector->dc_sink);
-		dc_sink_release(aconnector->dc_sink);
-		aconnector->dc_sink = NULL;
-		aconnector->dc_link->cur_link_settings.lane_count = 0;
-	}
-
-	drm_connector_unregister(connector);
-	drm_connector_put(connector);
-}
-
 static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
 	.add_connector = dm_dp_add_mst_connector,
-	.destroy_connector = dm_dp_destroy_mst_connector,
 };
 
 void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
diff --git a/drivers/gpu/drm/amd/display/dc/basics/Makefile b/drivers/gpu/drm/amd/display/dc/basics/Makefile
index 7ad0cad0f4ef..01b99e0d788e 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/basics/Makefile
@@ -24,8 +24,7 @@
 # It provides the general basic services required by other DAL
 # subcomponents.
 
-BASICS = conversion.o fixpt31_32.o \
-	log_helpers.o vector.o dc_common.o
+BASICS = conversion.o fixpt31_32.o vector.o dc_common.o
 
 AMD_DAL_BASICS = $(addprefix $(AMDDALPATH)/dc/basics/,$(BASICS))
 
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 8edc2506d49e..bed91572f82a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -113,13 +113,19 @@ static void encoder_control_dmcub(
 		struct dc_dmub_srv *dmcub,
 		struct dig_encoder_stream_setup_parameters_v1_5 *dig)
 {
-	struct dmub_rb_cmd_digx_encoder_control encoder_control = { 0 };
+	union dmub_rb_cmd cmd;
 
-	encoder_control.header.type = DMUB_CMD__VBIOS;
-	encoder_control.header.sub_type = DMUB_CMD__VBIOS_DIGX_ENCODER_CONTROL;
-	encoder_control.encoder_control.dig.stream_param = *dig;
+	memset(&cmd, 0, sizeof(cmd));
 
-	dc_dmub_srv_cmd_queue(dmcub, &encoder_control.header);
+	cmd.digx_encoder_control.header.type = DMUB_CMD__VBIOS;
+	cmd.digx_encoder_control.header.sub_type =
+		DMUB_CMD__VBIOS_DIGX_ENCODER_CONTROL;
+	cmd.digx_encoder_control.header.payload_bytes =
+		sizeof(cmd.digx_encoder_control) -
+		sizeof(cmd.digx_encoder_control.header);
+	cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig;
+
+	dc_dmub_srv_cmd_queue(dmcub, &cmd);
 	dc_dmub_srv_cmd_execute(dmcub);
 	dc_dmub_srv_wait_idle(dmcub);
 }
@@ -238,14 +244,19 @@ static void transmitter_control_dmcub(
 		struct dc_dmub_srv *dmcub,
 		struct dig_transmitter_control_parameters_v1_6 *dig)
 {
-	struct dmub_rb_cmd_dig1_transmitter_control transmitter_control;
+	union dmub_rb_cmd cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
 
-	transmitter_control.header.type = DMUB_CMD__VBIOS;
-	transmitter_control.header.sub_type =
+	cmd.dig1_transmitter_control.header.type = DMUB_CMD__VBIOS;
+	cmd.dig1_transmitter_control.header.sub_type =
 		DMUB_CMD__VBIOS_DIG1_TRANSMITTER_CONTROL;
-	transmitter_control.transmitter_control.dig = *dig;
+	cmd.dig1_transmitter_control.header.payload_bytes =
+		sizeof(cmd.dig1_transmitter_control) -
+		sizeof(cmd.dig1_transmitter_control.header);
+	cmd.dig1_transmitter_control.transmitter_control.dig = *dig;
 
-	dc_dmub_srv_cmd_queue(dmcub, &transmitter_control.header);
+	dc_dmub_srv_cmd_queue(dmcub, &cmd);
 	dc_dmub_srv_cmd_execute(dmcub);
 	dc_dmub_srv_wait_idle(dmcub);
 }
@@ -339,13 +350,18 @@ static void set_pixel_clock_dmcub(
 		struct dc_dmub_srv *dmcub,
 		struct set_pixel_clock_parameter_v1_7 *clk)
 {
-	struct dmub_rb_cmd_set_pixel_clock pixel_clock = { 0 };
+	union dmub_rb_cmd cmd;
 
-	pixel_clock.header.type = DMUB_CMD__VBIOS;
-	pixel_clock.header.sub_type = DMUB_CMD__VBIOS_SET_PIXEL_CLOCK;
-	pixel_clock.pixel_clock.clk = *clk;
+	memset(&cmd, 0, sizeof(cmd));
 
-	dc_dmub_srv_cmd_queue(dmcub, &pixel_clock.header);
+	cmd.set_pixel_clock.header.type = DMUB_CMD__VBIOS;
+	cmd.set_pixel_clock.header.sub_type = DMUB_CMD__VBIOS_SET_PIXEL_CLOCK;
+	cmd.set_pixel_clock.header.payload_bytes =
+		sizeof(cmd.set_pixel_clock) -
+		sizeof(cmd.set_pixel_clock.header);
+	cmd.set_pixel_clock.pixel_clock.clk = *clk;
+
+	dc_dmub_srv_cmd_queue(dmcub, &cmd);
 	dc_dmub_srv_cmd_execute(dmcub);
 	dc_dmub_srv_wait_idle(dmcub);
 }
@@ -705,13 +721,19 @@ static void enable_disp_power_gating_dmcub(
 	struct dc_dmub_srv *dmcub,
 	struct enable_disp_power_gating_parameters_v2_1 *pwr)
 {
-	struct dmub_rb_cmd_enable_disp_power_gating power_gating;
+	union dmub_rb_cmd cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
 
-	power_gating.header.type = DMUB_CMD__VBIOS;
-	power_gating.header.sub_type = DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING;
-	power_gating.power_gating.pwr = *pwr;
+	cmd.enable_disp_power_gating.header.type = DMUB_CMD__VBIOS;
+	cmd.enable_disp_power_gating.header.sub_type =
+		DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING;
+	cmd.enable_disp_power_gating.header.payload_bytes =
+		sizeof(cmd.enable_disp_power_gating) -
+		sizeof(cmd.enable_disp_power_gating.header);
+	cmd.enable_disp_power_gating.power_gating.pwr = *pwr;
 
-	dc_dmub_srv_cmd_queue(dmcub, &power_gating.header);
+	dc_dmub_srv_cmd_queue(dmcub, &cmd);
 	dc_dmub_srv_cmd_execute(dmcub);
 	dc_dmub_srv_wait_idle(dmcub);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index 3960a8db94cb..1e5a92b192a1 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -690,6 +690,26 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v,
 		struct dc_debug_options *dbg,
 		struct dc_state *context)
 {
+	int i;
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		/**
+		 * Workaround for avoiding pipe-split in cases where we'd split
+		 * planes that are too small, resulting in splits that aren't
+		 * valid for the scaler.
+		 */
+		if (pipe->plane_state &&
+		    (pipe->plane_state->dst_rect.width <= 16 ||
+		     pipe->plane_state->dst_rect.height <= 16 ||
+		     pipe->plane_state->src_rect.width <= 16 ||
+		     pipe->plane_state->src_rect.height <= 16)) {
+			hack_disable_optional_pipe_split(v);
+			return;
+		}
+	}
+
 	if (dbg->pipe_split_policy == MPC_SPLIT_AVOID)
 		hack_disable_optional_pipe_split(v);
 
@@ -702,7 +722,6 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v,
 		hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_100hz);
 }
 
-
 unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev, uint32_t pci_revision_id)
 {
 	/* for low power RV2 variants, the highest voltage level we want is 0 */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 8ec2dfe45d40..a5c2114e4292 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -90,7 +90,7 @@ void clk_mgr_exit_optimized_pwr_state(const struct dc *dc, struct clk_mgr *clk_m
 		dc->hwss.exit_optimized_pwr_state(dc, dc->current_state);
 
 	if (edp_link) {
-		clk_mgr->psr_allow_active_cache = edp_link->psr_allow_active;
+		clk_mgr->psr_allow_active_cache = edp_link->psr_settings.psr_allow_active;
 		dc_link_set_psr_allow_active(edp_link, false, false);
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
index 26db1c5d4e4d..b210f8e9d592 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
@@ -131,7 +131,7 @@ int dce_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base)
 	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
 	int dprefclk_wdivider;
 	int dprefclk_src_sel;
-	int dp_ref_clk_khz = 600000;
+	int dp_ref_clk_khz;
 	int target_div;
 
 	/* ASSERT DP Reference Clock source is from DFS*/
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
index 97b7f32294fd..c320b7af7d34 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
@@ -97,9 +97,6 @@ int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_di
 			VBIOSSMC_MSG_SetDispclkFreq,
 			requested_dispclk_khz / 1000);
 
-	/* Actual dispclk set is returned in the parameter register */
-	actual_dispclk_set_mhz = REG_READ(MP1_SMN_C2PMSG_83) * 1000;
-
 	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
 		if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
 			if (clk_mgr->dfs_bypass_disp_clk != actual_dispclk_set_mhz)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 47431ca6986d..45cfb7c45566 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -66,6 +66,8 @@
 
 #include "dce/dce_i2c.h"
 
+#include "dmub/dmub_srv.h"
+
 #define CTX \
 	dc->ctx
 
@@ -348,7 +350,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
 
 	for (i = 0; i < MAX_PIPES; i++) {
 		pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-		if (pipe->stream == stream)
+		if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
 			break;
 	}
 	/* Stream not found */
@@ -365,6 +367,9 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
 	param.windowb_x_end = pipe->stream->timing.h_addressable;
 	param.windowb_y_end = pipe->stream->timing.v_addressable;
 
+	param.dsc_mode = pipe->stream->timing.flags.DSC ? 1:0;
+	param.odm_mode = pipe->next_odm_pipe ? 1:0;
+
 	/* Default to the union of both windows */
 	param.selection = UNION_WINDOW_A_B;
 	param.continuous_mode = continuous;
@@ -2204,7 +2209,7 @@ static void commit_planes_do_stream_update(struct dc *dc,
 
 				if (should_program_abm) {
 					if (*stream_update->abm_level == ABM_LEVEL_IMMEDIATE_DISABLE) {
-						pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm);
+						dc->hwss.set_abm_immediate_disable(pipe_ctx);
 					} else {
 						pipe_ctx->stream_res.abm->funcs->set_abm_level(
 							pipe_ctx->stream_res.abm, stream->abm_level);
@@ -2640,33 +2645,12 @@ void dc_set_power_state(
 
 void dc_resume(struct dc *dc)
 {
-
 	uint32_t i;
 
 	for (i = 0; i < dc->link_count; i++)
 		core_link_resume(dc->links[i]);
 }
 
-unsigned int dc_get_current_backlight_pwm(struct dc *dc)
-{
-	struct abm *abm = dc->res_pool->abm;
-
-	if (abm)
-		return abm->funcs->get_current_backlight(abm);
-
-	return 0;
-}
-
-unsigned int dc_get_target_backlight_pwm(struct dc *dc)
-{
-	struct abm *abm = dc->res_pool->abm;
-
-	if (abm)
-		return abm->funcs->get_target_backlight(abm);
-
-	return 0;
-}
-
 bool dc_is_dmcu_initialized(struct dc *dc)
 {
 	struct dmcu *dmcu = dc->res_pool->dmcu;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 67cfff1586e9..48ab51533d5d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -26,7 +26,7 @@
 #include <linux/slab.h>
 
 #include "dm_services.h"
-#include "atom.h"
+#include "atomfirmware.h"
 #include "dm_helpers.h"
 #include "dc.h"
 #include "grph_object_id.h"
@@ -46,10 +46,11 @@
 #include "dmcu.h"
 #include "hw/clk_mgr.h"
 #include "dce/dmub_psr.h"
+#include "dmub/dmub_srv.h"
+#include "inc/hw/panel_cntl.h"
 
 #define DC_LOGGER_INIT(logger)
 
-
 #define LINK_INFO(...) \
 	DC_LOG_HW_HOTPLUG(  \
 		__VA_ARGS__)
@@ -64,11 +65,11 @@
 enum {
 	PEAK_FACTOR_X1000 = 1006,
 	/*
-	* Some receivers fail to train on first try and are good
-	* on subsequent tries. 2 retries should be plenty. If we
-	* don't have a successful training then we don't expect to
-	* ever get one.
-	*/
+	 * Some receivers fail to train on first try and are good
+	 * on subsequent tries. 2 retries should be plenty. If we
+	 * don't have a successful training then we don't expect to
+	 * ever get one.
+	 */
 	LINK_TRAINING_MAX_VERIFY_RETRY = 2
 };
 
@@ -79,7 +80,7 @@ static void dc_link_destruct(struct dc_link *link)
 {
 	int i;
 
-	if (link->hpd_gpio != NULL) {
+	if (link->hpd_gpio) {
 		dal_gpio_destroy_irq(&link->hpd_gpio);
 		link->hpd_gpio = NULL;
 	}
@@ -87,7 +88,10 @@ static void dc_link_destruct(struct dc_link *link)
 	if (link->ddc)
 		dal_ddc_service_destroy(&link->ddc);
 
-	if(link->link_enc)
+	if (link->panel_cntl)
+		link->panel_cntl->funcs->destroy(&link->panel_cntl);
+
+	if (link->link_enc)
 		link->link_enc->funcs->destroy(&link->link_enc);
 
 	if (link->local_sink)
@@ -98,8 +102,8 @@ static void dc_link_destruct(struct dc_link *link)
 }
 
 struct gpio *get_hpd_gpio(struct dc_bios *dcb,
-		struct graphics_object_id link_id,
-		struct gpio_service *gpio_service)
+			  struct graphics_object_id link_id,
+			  struct gpio_service *gpio_service)
 {
 	enum bp_result bp_result;
 	struct graphics_object_hpd_info hpd_info;
@@ -116,10 +120,9 @@ struct gpio *get_hpd_gpio(struct dc_bios *dcb,
 		return NULL;
 	}
 
-	return dal_gpio_service_create_irq(
-		gpio_service,
-		pin_info.offset,
-		pin_info.mask);
+	return dal_gpio_service_create_irq(gpio_service,
+					   pin_info.offset,
+					   pin_info.mask);
 }
 
 /*
@@ -134,13 +137,10 @@ struct gpio *get_hpd_gpio(struct dc_bios *dcb,
  *  @return
  *     true on success, false otherwise
  */
-static bool program_hpd_filter(
-	const struct dc_link *link)
+static bool program_hpd_filter(const struct dc_link *link)
 {
 	bool result = false;
-
 	struct gpio *hpd;
-
 	int delay_on_connect_in_ms = 0;
 	int delay_on_disconnect_in_ms = 0;
 
@@ -159,10 +159,10 @@ static bool program_hpd_filter(
 	case SIGNAL_TYPE_DISPLAY_PORT_MST:
 		/* Program hpd filter to allow DP signal to settle */
 		/* 500:	not able to detect MST <-> SST switch as HPD is low for
-		 * 	only 100ms on DELL U2413
-		 * 0:	some passive dongle still show aux mode instead of i2c
-		 * 20-50:not enough to hide bouncing HPD with passive dongle.
-		 * 	also see intermittent i2c read issues.
+		 * only 100ms on DELL U2413
+		 * 0: some passive dongle still show aux mode instead of i2c
+		 * 20-50: not enough to hide bouncing HPD with passive dongle.
+		 * also see intermittent i2c read issues.
 		 */
 		delay_on_connect_in_ms = 80;
 		delay_on_disconnect_in_ms = 0;
@@ -175,7 +175,8 @@ static bool program_hpd_filter(
 	}
 
 	/* Obtain HPD handle */
-	hpd = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service);
+	hpd = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
+			   link->ctx->gpio_service);
 
 	if (!hpd)
 		return result;
@@ -226,8 +227,9 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
 	}
 
 	/* todo: may need to lock gpio access */
-	hpd_pin = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service);
-	if (hpd_pin == NULL)
+	hpd_pin = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
+			       link->ctx->gpio_service);
+	if (!hpd_pin)
 		goto hpd_gpio_failure;
 
 	dal_gpio_open(hpd_pin, GPIO_MODE_INTERRUPT);
@@ -248,8 +250,7 @@ hpd_gpio_failure:
 	return false;
 }
 
-static enum ddc_transaction_type get_ddc_transaction_type(
-		enum signal_type sink_signal)
+static enum ddc_transaction_type get_ddc_transaction_type(enum signal_type sink_signal)
 {
 	enum ddc_transaction_type transaction_type = DDC_TRANSACTION_TYPE_NONE;
 
@@ -270,7 +271,8 @@ static enum ddc_transaction_type get_ddc_transaction_type(
 	case SIGNAL_TYPE_DISPLAY_PORT_MST:
 		/* MST does not use I2COverAux, but there is the
 		 * SPECIAL use case for "immediate dwnstrm device
-		 * access" (EPR#370830). */
+		 * access" (EPR#370830).
+		 */
 		transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
 		break;
 
@@ -281,9 +283,8 @@ static enum ddc_transaction_type get_ddc_transaction_type(
 	return transaction_type;
 }
 
-static enum signal_type get_basic_signal_type(
-	struct graphics_object_id encoder,
-	struct graphics_object_id downstream)
+static enum signal_type get_basic_signal_type(struct graphics_object_id encoder,
+					      struct graphics_object_id downstream)
 {
 	if (downstream.type == OBJECT_TYPE_CONNECTOR) {
 		switch (downstream.id) {
@@ -369,10 +370,11 @@ bool dc_link_is_dp_sink_present(struct dc_link *link)
 	/* Open GPIO and set it to I2C mode */
 	/* Note: this GpioMode_Input will be converted
 	 * to GpioConfigType_I2cAuxDualMode in GPIO component,
-	 * which indicates we need additional delay */
+	 * which indicates we need additional delay
+	 */
 
-	if (GPIO_RESULT_OK != dal_ddc_open(
-		ddc, GPIO_MODE_INPUT, GPIO_DDC_CONFIG_TYPE_MODE_I2C)) {
+	if (dal_ddc_open(ddc, GPIO_MODE_INPUT,
+			 GPIO_DDC_CONFIG_TYPE_MODE_I2C) != GPIO_RESULT_OK) {
 		dal_ddc_close(ddc);
 
 		return present;
@@ -406,25 +408,25 @@ bool dc_link_is_dp_sink_present(struct dc_link *link)
  * @brief
  * Detect output sink type
  */
-static enum signal_type link_detect_sink(
-	struct dc_link *link,
-	enum dc_detect_reason reason)
+static enum signal_type link_detect_sink(struct dc_link *link,
+					 enum dc_detect_reason reason)
 {
-	enum signal_type result = get_basic_signal_type(
-		link->link_enc->id, link->link_id);
+	enum signal_type result = get_basic_signal_type(link->link_enc->id,
+							link->link_id);
 
 	/* Internal digital encoder will detect only dongles
-	 * that require digital signal */
+	 * that require digital signal
+	 */
 
 	/* Detection mechanism is different
 	 * for different native connectors.
 	 * LVDS connector supports only LVDS signal;
 	 * PCIE is a bus slot, the actual connector needs to be detected first;
 	 * eDP connector supports only eDP signal;
-	 * HDMI should check straps for audio */
+	 * HDMI should check straps for audio
+	 */
 
 	/* PCIE detects the actual connector on add-on board */
-
 	if (link->link_id.id == CONNECTOR_ID_PCIE) {
 		/* ZAZTODO implement PCIE add-on card detection */
 	}
@@ -432,8 +434,10 @@ static enum signal_type link_detect_sink(
 	switch (link->link_id.id) {
 	case CONNECTOR_ID_HDMI_TYPE_A: {
 		/* check audio support:
-		 * if native HDMI is not supported, switch to DVI */
-		struct audio_support *aud_support = &link->dc->res_pool->audio_support;
+		 * if native HDMI is not supported, switch to DVI
+		 */
+		struct audio_support *aud_support =
+					&link->dc->res_pool->audio_support;
 
 		if (!aud_support->hdmi_audio_native)
 			if (link->link_id.id == CONNECTOR_ID_HDMI_TYPE_A)
@@ -461,16 +465,15 @@ static enum signal_type link_detect_sink(
 	return result;
 }
 
-static enum signal_type decide_signal_from_strap_and_dongle_type(
-		enum display_dongle_type dongle_type,
-		struct audio_support *audio_support)
+static enum signal_type decide_signal_from_strap_and_dongle_type(enum display_dongle_type dongle_type,
+								 struct audio_support *audio_support)
 {
 	enum signal_type signal = SIGNAL_TYPE_NONE;
 
 	switch (dongle_type) {
 	case DISPLAY_DONGLE_DP_HDMI_DONGLE:
 		if (audio_support->hdmi_audio_on_dongle)
-			signal =  SIGNAL_TYPE_HDMI_TYPE_A;
+			signal = SIGNAL_TYPE_HDMI_TYPE_A;
 		else
 			signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
 		break;
@@ -491,16 +494,14 @@ static enum signal_type decide_signal_from_strap_and_dongle_type(
 	return signal;
 }
 
-static enum signal_type dp_passive_dongle_detection(
-		struct ddc_service *ddc,
-		struct display_sink_capability *sink_cap,
-		struct audio_support *audio_support)
+static enum signal_type dp_passive_dongle_detection(struct ddc_service *ddc,
+						    struct display_sink_capability *sink_cap,
+						    struct audio_support *audio_support)
 {
-	dal_ddc_service_i2c_query_dp_dual_mode_adaptor(
-						ddc, sink_cap);
-	return decide_signal_from_strap_and_dongle_type(
-			sink_cap->dongle_type,
-			audio_support);
+	dal_ddc_service_i2c_query_dp_dual_mode_adaptor(ddc, sink_cap);
+
+	return decide_signal_from_strap_and_dongle_type(sink_cap->dongle_type,
+							audio_support);
 }
 
 static void link_disconnect_sink(struct dc_link *link)
@@ -519,6 +520,96 @@ static void link_disconnect_remap(struct dc_sink *prev_sink, struct dc_link *lin
 	link->local_sink = prev_sink;
 }
 
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+bool dc_link_is_hdcp14(struct dc_link *link)
+{
+	bool ret = false;
+
+	switch (link->connector_signal)	{
+	case SIGNAL_TYPE_DISPLAY_PORT:
+	case SIGNAL_TYPE_DISPLAY_PORT_MST:
+		ret = link->hdcp_caps.bcaps.bits.HDCP_CAPABLE;
+		break;
+	case SIGNAL_TYPE_DVI_SINGLE_LINK:
+	case SIGNAL_TYPE_DVI_DUAL_LINK:
+	case SIGNAL_TYPE_HDMI_TYPE_A:
+	/* HDMI doesn't tell us its HDCP(1.4) capability, so assume to always be capable,
+	 * we can poll for bksv but some displays have an issue with this. Since its so rare
+	 * for a display to not be 1.4 capable, this assumtion is ok
+	 */
+		ret = true;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+bool dc_link_is_hdcp22(struct dc_link *link)
+{
+	bool ret = false;
+
+	switch (link->connector_signal)	{
+	case SIGNAL_TYPE_DISPLAY_PORT:
+	case SIGNAL_TYPE_DISPLAY_PORT_MST:
+		ret = (link->hdcp_caps.bcaps.bits.HDCP_CAPABLE &&
+				link->hdcp_caps.rx_caps.fields.byte0.hdcp_capable &&
+				(link->hdcp_caps.rx_caps.fields.version == 0x2)) ? 1 : 0;
+		break;
+	case SIGNAL_TYPE_DVI_SINGLE_LINK:
+	case SIGNAL_TYPE_DVI_DUAL_LINK:
+	case SIGNAL_TYPE_HDMI_TYPE_A:
+		ret = (link->hdcp_caps.rx_caps.fields.version == 0x4) ? 1:0;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void query_hdcp_capability(enum signal_type signal, struct dc_link *link)
+{
+	struct hdcp_protection_message msg22;
+	struct hdcp_protection_message msg14;
+
+	memset(&msg22, 0, sizeof(struct hdcp_protection_message));
+	memset(&msg14, 0, sizeof(struct hdcp_protection_message));
+	memset(link->hdcp_caps.rx_caps.raw, 0,
+		sizeof(link->hdcp_caps.rx_caps.raw));
+
+	if ((link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
+			link->ddc->transaction_type ==
+			DDC_TRANSACTION_TYPE_I2C_OVER_AUX) ||
+			link->connector_signal == SIGNAL_TYPE_EDP) {
+		msg22.data = link->hdcp_caps.rx_caps.raw;
+		msg22.length = sizeof(link->hdcp_caps.rx_caps.raw);
+		msg22.msg_id = HDCP_MESSAGE_ID_RX_CAPS;
+	} else {
+		msg22.data = &link->hdcp_caps.rx_caps.fields.version;
+		msg22.length = sizeof(link->hdcp_caps.rx_caps.fields.version);
+		msg22.msg_id = HDCP_MESSAGE_ID_HDCP2VERSION;
+	}
+	msg22.version = HDCP_VERSION_22;
+	msg22.link = HDCP_LINK_PRIMARY;
+	msg22.max_retries = 5;
+	dc_process_hdcp_msg(signal, link, &msg22);
+
+	if (signal == SIGNAL_TYPE_DISPLAY_PORT || signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+		enum hdcp_message_status status = HDCP_MESSAGE_UNSUPPORTED;
+
+		msg14.data = &link->hdcp_caps.bcaps.raw;
+		msg14.length = sizeof(link->hdcp_caps.bcaps.raw);
+		msg14.msg_id = HDCP_MESSAGE_ID_READ_BCAPS;
+		msg14.version = HDCP_VERSION_14;
+		msg14.link = HDCP_LINK_PRIMARY;
+		msg14.max_retries = 5;
+
+		status = dc_process_hdcp_msg(signal, link, &msg14);
+	}
+
+}
+#endif
 
 static void read_current_link_settings_on_detect(struct dc_link *link)
 {
@@ -532,18 +623,18 @@ static void read_current_link_settings_on_detect(struct dc_link *link)
 
 	// Read DPCD 00101h to find out the number of lanes currently set
 	for (i = 0; i < read_dpcd_retry_cnt; i++) {
-		status = core_link_read_dpcd(
-				link,
-				DP_LANE_COUNT_SET,
-				&lane_count_set.raw,
-				sizeof(lane_count_set));
+		status = core_link_read_dpcd(link,
+					     DP_LANE_COUNT_SET,
+					     &lane_count_set.raw,
+					     sizeof(lane_count_set));
 		/* First DPCD read after VDD ON can fail if the particular board
 		 * does not have HPD pin wired correctly. So if DPCD read fails,
 		 * which it should never happen, retry a few times. Target worst
 		 * case scenario of 80 ms.
 		 */
 		if (status == DC_OK) {
-			link->cur_link_settings.lane_count = lane_count_set.bits.LANE_COUNT_SET;
+			link->cur_link_settings.lane_count =
+					lane_count_set.bits.LANE_COUNT_SET;
 			break;
 		}
 
@@ -552,7 +643,7 @@ static void read_current_link_settings_on_detect(struct dc_link *link)
 
 	// Read DPCD 00100h to find if standard link rates are set
 	core_link_read_dpcd(link, DP_LINK_BW_SET,
-			&link_bw_set, sizeof(link_bw_set));
+			    &link_bw_set, sizeof(link_bw_set));
 
 	if (link_bw_set == 0) {
 		if (link->connector_signal == SIGNAL_TYPE_EDP) {
@@ -560,12 +651,12 @@ static void read_current_link_settings_on_detect(struct dc_link *link)
 			 * Read DPCD 00115h to find the edp link rate set used
 			 */
 			core_link_read_dpcd(link, DP_LINK_RATE_SET,
-					&link_rate_set, sizeof(link_rate_set));
+					    &link_rate_set, sizeof(link_rate_set));
 
 			// edp_supported_link_rates_count = 0 for DP
 			if (link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) {
 				link->cur_link_settings.link_rate =
-						link->dpcd_caps.edp_supported_link_rates[link_rate_set];
+					link->dpcd_caps.edp_supported_link_rates[link_rate_set];
 				link->cur_link_settings.link_rate_set = link_rate_set;
 				link->cur_link_settings.use_link_rate_set = true;
 			}
@@ -579,7 +670,7 @@ static void read_current_link_settings_on_detect(struct dc_link *link)
 	}
 	// Read DPCD 00003h to find the max down spread.
 	core_link_read_dpcd(link, DP_MAX_DOWNSPREAD,
-			&max_down_spread.raw, sizeof(max_down_spread));
+			    &max_down_spread.raw, sizeof(max_down_spread));
 	link->cur_link_settings.link_spread =
 		max_down_spread.bits.MAX_DOWN_SPREAD ?
 		LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED;
@@ -612,6 +703,12 @@ static bool detect_dp(struct dc_link *link,
 			dal_ddc_service_set_transaction_type(link->ddc,
 							     sink_caps->transaction_type);
 
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+			/* In case of fallback to SST when topology discovery below fails
+			 * HDCP caps will be querried again later by the upper layer (caller
+			 * of this function). */
+			query_hdcp_capability(SIGNAL_TYPE_DISPLAY_PORT_MST, link);
+#endif
 			/*
 			 * This call will initiate MST topology discovery. Which
 			 * will detect MST ports and add new DRM connector DRM
@@ -683,12 +780,12 @@ static bool is_same_edid(struct dc_edid *old_edid, struct dc_edid *new_edid)
 	if (new_edid->length == 0)
 		return false;
 
-	return (memcmp(old_edid->raw_edid, new_edid->raw_edid, new_edid->length) == 0);
+	return (memcmp(old_edid->raw_edid,
+		       new_edid->raw_edid, new_edid->length) == 0);
 }
 
-static bool wait_for_alt_mode(struct dc_link *link)
+static bool wait_for_entering_dp_alt_mode(struct dc_link *link)
 {
-
 	/**
 	 * something is terribly wrong if time out is > 200ms. (5Hz)
 	 * 500 microseconds * 400 tries us 200 ms
@@ -703,7 +800,7 @@ static bool wait_for_alt_mode(struct dc_link *link)
 
 	DC_LOGGER_INIT(link->ctx->logger);
 
-	if (link->link_enc->funcs->is_in_alt_mode == NULL)
+	if (!link->link_enc->funcs->is_in_alt_mode)
 		return true;
 
 	is_in_alt_mode = link->link_enc->funcs->is_in_alt_mode(link->link_enc);
@@ -718,21 +815,21 @@ static bool wait_for_alt_mode(struct dc_link *link)
 		udelay(sleep_time_in_microseconds);
 		/* ask the link if alt mode is enabled, if so return ok */
 		if (link->link_enc->funcs->is_in_alt_mode(link->link_enc)) {
-
 			finish_timestamp = dm_get_timestamp(link->ctx);
-			time_taken_in_ns = dm_get_elapse_time_in_ns(
-				link->ctx, finish_timestamp, enter_timestamp);
+			time_taken_in_ns =
+				dm_get_elapse_time_in_ns(link->ctx,
+							 finish_timestamp,
+							 enter_timestamp);
 			DC_LOG_WARNING("Alt mode entered finished after %llu ms\n",
 				       div_u64(time_taken_in_ns, 1000000));
 			return true;
 		}
-
 	}
 	finish_timestamp = dm_get_timestamp(link->ctx);
 	time_taken_in_ns = dm_get_elapse_time_in_ns(link->ctx, finish_timestamp,
 						    enter_timestamp);
 	DC_LOG_WARNING("Alt mode has timed out after %llu ms\n",
-			div_u64(time_taken_in_ns, 1000000));
+		       div_u64(time_taken_in_ns, 1000000));
 	return false;
 }
 
@@ -768,30 +865,30 @@ static bool dc_link_detect_helper(struct dc_link *link,
 		return false;
 
 	if ((link->connector_signal == SIGNAL_TYPE_LVDS ||
-			link->connector_signal == SIGNAL_TYPE_EDP) &&
-			link->local_sink) {
-
+	     link->connector_signal == SIGNAL_TYPE_EDP) &&
+	    link->local_sink) {
 		// need to re-write OUI and brightness in resume case
 		if (link->connector_signal == SIGNAL_TYPE_EDP) {
 			dpcd_set_source_specific_data(link);
-			dc_link_set_default_brightness_aux(link); //TODO: use cached
+			dc_link_set_default_brightness_aux(link);
+			//TODO: use cached
 		}
 
 		return true;
 	}
 
-	if (false == dc_link_detect_sink(link, &new_connection_type)) {
+	if (!dc_link_detect_sink(link, &new_connection_type)) {
 		BREAK_TO_DEBUGGER();
 		return false;
 	}
 
 	prev_sink = link->local_sink;
-	if (prev_sink != NULL) {
+	if (prev_sink) {
 		dc_sink_retain(prev_sink);
 		memcpy(&prev_dpcd_caps, &link->dpcd_caps, sizeof(struct dpcd_caps));
 	}
-	link_disconnect_sink(link);
 
+	link_disconnect_sink(link);
 	if (new_connection_type != dc_connection_none) {
 		link->type = new_connection_type;
 		link->link_state_valid = false;
@@ -838,35 +935,31 @@ static bool dc_link_detect_helper(struct dc_link *link,
 		}
 
 		case SIGNAL_TYPE_DISPLAY_PORT: {
-
 			/* wa HPD high coming too early*/
 			if (link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
-
 				/* if alt mode times out, return false */
-				if (wait_for_alt_mode(link) == false) {
+				if (!wait_for_entering_dp_alt_mode(link))
 					return false;
-				}
 			}
 
-			if (!detect_dp(
-				link,
-				&sink_caps,
-				&converter_disable_audio,
-				aud_support, reason)) {
-				if (prev_sink != NULL)
+			if (!detect_dp(link, &sink_caps,
+				       &converter_disable_audio,
+				       aud_support, reason)) {
+				if (prev_sink)
 					dc_sink_release(prev_sink);
 				return false;
 			}
 
 			// Check if dpcp block is the same
-			if (prev_sink != NULL) {
-				if (memcmp(&link->dpcd_caps, &prev_dpcd_caps, sizeof(struct dpcd_caps)))
+			if (prev_sink) {
+				if (memcmp(&link->dpcd_caps, &prev_dpcd_caps,
+					   sizeof(struct dpcd_caps)))
 					same_dpcd = false;
 			}
 			/* Active dongle downstream unplug*/
 			if (link->type == dc_connection_active_dongle &&
-				link->dpcd_caps.sink_count.bits.SINK_COUNT == 0) {
-				if (prev_sink != NULL)
+			    link->dpcd_caps.sink_count.bits.SINK_COUNT == 0) {
+				if (prev_sink)
 					/* Downstream unplug */
 					dc_sink_release(prev_sink);
 				return true;
@@ -874,7 +967,7 @@ static bool dc_link_detect_helper(struct dc_link *link,
 
 			if (link->type == dc_connection_mst_branch) {
 				LINK_INFO("link=%d, mst branch is now Connected\n",
-					link->link_index);
+					  link->link_index);
 				/* Need to setup mst link_cap struct here
 				 * otherwise dc_link_detect() will leave mst link_cap
 				 * empty which leads to allocate_mst_payload() has "0"
@@ -882,15 +975,15 @@ static bool dc_link_detect_helper(struct dc_link *link,
 				 */
 				dp_verify_mst_link_cap(link);
 
-				if (prev_sink != NULL)
+				if (prev_sink)
 					dc_sink_release(prev_sink);
 				return false;
 			}
 
 			// For seamless boot, to skip verify link cap, we read UEFI settings and set them as verified.
 			if (reason == DETECT_REASON_BOOT &&
-					dc_ctx->dc->config.power_down_display_on_boot == false &&
-					link->link_status.link_active == true)
+			    !dc_ctx->dc->config.power_down_display_on_boot &&
+			    link->link_status.link_active)
 				perform_dp_seamless_boot = true;
 
 			if (perform_dp_seamless_boot) {
@@ -903,24 +996,23 @@ static bool dc_link_detect_helper(struct dc_link *link,
 
 		default:
 			DC_ERROR("Invalid connector type! signal:%d\n",
-				link->connector_signal);
-			if (prev_sink != NULL)
+				 link->connector_signal);
+			if (prev_sink)
 				dc_sink_release(prev_sink);
 			return false;
 		} /* switch() */
 
 		if (link->dpcd_caps.sink_count.bits.SINK_COUNT)
-			link->dpcd_sink_count = link->dpcd_caps.sink_count.
-					bits.SINK_COUNT;
+			link->dpcd_sink_count =
+				link->dpcd_caps.sink_count.bits.SINK_COUNT;
 		else
 			link->dpcd_sink_count = 1;
 
-		dal_ddc_service_set_transaction_type(
-						link->ddc,
-						sink_caps.transaction_type);
+		dal_ddc_service_set_transaction_type(link->ddc,
+						     sink_caps.transaction_type);
 
-		link->aux_mode = dal_ddc_service_is_in_aux_transaction_mode(
-				link->ddc);
+		link->aux_mode =
+			dal_ddc_service_is_in_aux_transaction_mode(link->ddc);
 
 		sink_init_data.link = link;
 		sink_init_data.sink_signal = sink_caps.signal;
@@ -928,7 +1020,7 @@ static bool dc_link_detect_helper(struct dc_link *link,
 		sink = dc_sink_create(&sink_init_data);
 		if (!sink) {
 			DC_ERROR("Failed to create sink!\n");
-			if (prev_sink != NULL)
+			if (prev_sink)
 				dc_sink_release(prev_sink);
 			return false;
 		}
@@ -939,10 +1031,8 @@ static bool dc_link_detect_helper(struct dc_link *link,
 		/* dc_sink_create returns a new reference */
 		link->local_sink = sink;
 
-		edid_status = dm_helpers_read_local_edid(
-				link->ctx,
-				link,
-				sink);
+		edid_status = dm_helpers_read_local_edid(link->ctx,
+							 link, sink);
 
 		switch (edid_status) {
 		case EDID_BAD_CHECKSUM:
@@ -950,7 +1040,6 @@ static bool dc_link_detect_helper(struct dc_link *link,
 			break;
 		case EDID_NO_RESPONSE:
 			DC_LOG_ERROR("No EDID read.\n");
-
 			/*
 			 * Abort detection for non-DP connectors if we have
 			 * no EDID
@@ -961,7 +1050,7 @@ static bool dc_link_detect_helper(struct dc_link *link,
 			 */
 			if (dc_is_hdmi_signal(link->connector_signal) ||
 			    dc_is_dvi_signal(link->connector_signal)) {
-				if (prev_sink != NULL)
+				if (prev_sink)
 					dc_sink_release(prev_sink);
 
 				return false;
@@ -974,45 +1063,53 @@ static bool dc_link_detect_helper(struct dc_link *link,
 			link->ctx->dc->debug.disable_fec = true;
 
 		// Check if edid is the same
-		if ((prev_sink != NULL) && ((edid_status == EDID_THE_SAME) || (edid_status == EDID_OK)))
-			same_edid = is_same_edid(&prev_sink->dc_edid, &sink->dc_edid);
+		if ((prev_sink) &&
+		    (edid_status == EDID_THE_SAME || edid_status == EDID_OK))
+			same_edid = is_same_edid(&prev_sink->dc_edid,
+						 &sink->dc_edid);
 
 		if (sink->edid_caps.panel_patch.skip_scdc_overwrite)
 			link->ctx->dc->debug.hdmi20_disable = true;
 
 		if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
-			sink_caps.transaction_type == DDC_TRANSACTION_TYPE_I2C_OVER_AUX) {
+		    sink_caps.transaction_type ==
+		    DDC_TRANSACTION_TYPE_I2C_OVER_AUX) {
 			/*
 			 * TODO debug why Dell 2413 doesn't like
 			 *  two link trainings
 			 */
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+			query_hdcp_capability(sink->sink_signal, link);
+#endif
 
 			// verify link cap for SST non-seamless boot
 			if (!perform_dp_seamless_boot)
 				dp_verify_link_cap_with_retries(link,
-						&link->reported_link_cap,
-						LINK_TRAINING_MAX_VERIFY_RETRY);
+								&link->reported_link_cap,
+								LINK_TRAINING_MAX_VERIFY_RETRY);
 		} else {
 			// If edid is the same, then discard new sink and revert back to original sink
 			if (same_edid) {
 				link_disconnect_remap(prev_sink, link);
 				sink = prev_sink;
 				prev_sink = NULL;
-
 			}
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+			query_hdcp_capability(sink->sink_signal, link);
+#endif
 		}
 
 		/* HDMI-DVI Dongle */
 		if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A &&
-				!sink->edid_caps.edid_hdmi)
+		    !sink->edid_caps.edid_hdmi)
 			sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
 
 		/* Connectivity log: detection */
 		for (i = 0; i < sink->dc_edid.length / DC_EDID_BLOCK_SIZE; i++) {
 			CONN_DATA_DETECT(link,
-					&sink->dc_edid.raw_edid[i * DC_EDID_BLOCK_SIZE],
-					DC_EDID_BLOCK_SIZE,
-					"%s: [Block %d] ", sink->edid_caps.display_name, i);
+					 &sink->dc_edid.raw_edid[i * DC_EDID_BLOCK_SIZE],
+					 DC_EDID_BLOCK_SIZE,
+					 "%s: [Block %d] ", sink->edid_caps.display_name, i);
 		}
 
 		DC_LOG_DETECTION_EDID_PARSER("%s: "
@@ -1047,17 +1144,18 @@ static bool dc_link_detect_helper(struct dc_link *link,
 				sink->edid_caps.audio_modes[i].sample_rate,
 				sink->edid_caps.audio_modes[i].sample_size);
 		}
-
 	} else {
 		/* From Connected-to-Disconnected. */
 		if (link->type == dc_connection_mst_branch) {
 			LINK_INFO("link=%d, mst branch is now Disconnected\n",
-				link->link_index);
+				  link->link_index);
 
 			dm_helpers_dp_mst_stop_top_mgr(link->ctx, link);
 
 			link->mst_stream_alloc_table.stream_count = 0;
-			memset(link->mst_stream_alloc_table.stream_allocations, 0, sizeof(link->mst_stream_alloc_table.stream_allocations));
+			memset(link->mst_stream_alloc_table.stream_allocations,
+			       0,
+			       sizeof(link->mst_stream_alloc_table.stream_allocations));
 		}
 
 		link->type = dc_connection_none;
@@ -1071,16 +1169,15 @@ static bool dc_link_detect_helper(struct dc_link *link,
 	}
 
 	LINK_INFO("link=%d, dc_sink_in=%p is now %s prev_sink=%p dpcd same=%d edid same=%d\n",
-		link->link_index, sink,
-		(sink_caps.signal == SIGNAL_TYPE_NONE ?
-			"Disconnected":"Connected"), prev_sink,
-			same_dpcd, same_edid);
+		  link->link_index, sink,
+		  (sink_caps.signal ==
+		   SIGNAL_TYPE_NONE ? "Disconnected" : "Connected"),
+		  prev_sink, same_dpcd, same_edid);
 
-	if (prev_sink != NULL)
+	if (prev_sink)
 		dc_sink_release(prev_sink);
 
 	return true;
-
 }
 
 bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
@@ -1110,13 +1207,13 @@ bool dc_link_get_hpd_state(struct dc_link *dc_link)
 	return state;
 }
 
-static enum hpd_source_id get_hpd_line(
-		struct dc_link *link)
+static enum hpd_source_id get_hpd_line(struct dc_link *link)
 {
 	struct gpio *hpd;
 	enum hpd_source_id hpd_id = HPD_SOURCEID_UNKNOWN;
 
-	hpd = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service);
+	hpd = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
+			   link->ctx->gpio_service);
 
 	if (hpd) {
 		switch (dal_irq_get_source(hpd)) {
@@ -1191,8 +1288,7 @@ static enum channel_id get_ddc_line(struct dc_link *link)
 	return channel;
 }
 
-static enum transmitter translate_encoder_to_transmitter(
-	struct graphics_object_id encoder)
+static enum transmitter translate_encoder_to_transmitter(struct graphics_object_id encoder)
 {
 	switch (encoder.id) {
 	case ENCODER_ID_INTERNAL_UNIPHY:
@@ -1256,17 +1352,18 @@ static enum transmitter translate_encoder_to_transmitter(
 	}
 }
 
-static bool dc_link_construct(
-	struct dc_link *link,
-	const struct link_init_data *init_params)
+static bool dc_link_construct(struct dc_link *link,
+			      const struct link_init_data *init_params)
 {
 	uint8_t i;
 	struct ddc_service_init_data ddc_service_init_data = { { 0 } };
 	struct dc_context *dc_ctx = init_params->ctx;
 	struct encoder_init_data enc_init_data = { 0 };
+	struct panel_cntl_init_data panel_cntl_init_data = { 0 };
 	struct integrated_info info = {{{ 0 }}};
 	struct dc_bios *bios = init_params->dc->ctx->dc_bios;
 	const struct dc_vbios_funcs *bp_funcs = bios->funcs;
+
 	DC_LOGGER_INIT(dc_ctx->logger);
 
 	link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
@@ -1278,23 +1375,27 @@ static bool dc_link_construct(
 	link->ctx = dc_ctx;
 	link->link_index = init_params->link_index;
 
-	memset(&link->preferred_training_settings, 0, sizeof(struct dc_link_training_overrides));
-	memset(&link->preferred_link_setting, 0, sizeof(struct dc_link_settings));
+	memset(&link->preferred_training_settings, 0,
+	       sizeof(struct dc_link_training_overrides));
+	memset(&link->preferred_link_setting, 0,
+	       sizeof(struct dc_link_settings));
 
-	link->link_id = bios->funcs->get_connector_id(bios, init_params->connector_index);
+	link->link_id =
+		bios->funcs->get_connector_id(bios, init_params->connector_index);
 
 	if (link->link_id.type != OBJECT_TYPE_CONNECTOR) {
 		dm_output_to_console("%s: Invalid Connector ObjectID from Adapter Service for connector index:%d! type %d expected %d\n",
-			 __func__, init_params->connector_index,
-			 link->link_id.type, OBJECT_TYPE_CONNECTOR);
+				     __func__, init_params->connector_index,
+				     link->link_id.type, OBJECT_TYPE_CONNECTOR);
 		goto create_fail;
 	}
 
 	if (link->dc->res_pool->funcs->link_init)
 		link->dc->res_pool->funcs->link_init(link);
 
-	link->hpd_gpio = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service);
-	if (link->hpd_gpio != NULL) {
+	link->hpd_gpio = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
+				      link->ctx->gpio_service);
+	if (link->hpd_gpio) {
 		dal_gpio_open(link->hpd_gpio, GPIO_MODE_INTERRUPT);
 		dal_gpio_unlock_pin(link->hpd_gpio);
 		link->irq_source_hpd = dal_irq_get_source(link->hpd_gpio);
@@ -1314,9 +1415,9 @@ static bool dc_link_construct(
 		link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK;
 		break;
 	case CONNECTOR_ID_DISPLAY_PORT:
-		link->connector_signal =	SIGNAL_TYPE_DISPLAY_PORT;
+		link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT;
 
-		if (link->hpd_gpio != NULL)
+		if (link->hpd_gpio)
 			link->irq_source_hpd_rx =
 					dal_irq_get_rx_source(link->hpd_gpio);
 
@@ -1324,42 +1425,60 @@ static bool dc_link_construct(
 	case CONNECTOR_ID_EDP:
 		link->connector_signal = SIGNAL_TYPE_EDP;
 
-		if (link->hpd_gpio != NULL) {
+		if (link->hpd_gpio) {
 			link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
 			link->irq_source_hpd_rx =
 					dal_irq_get_rx_source(link->hpd_gpio);
 		}
+
 		break;
 	case CONNECTOR_ID_LVDS:
 		link->connector_signal = SIGNAL_TYPE_LVDS;
 		break;
 	default:
-		DC_LOG_WARNING("Unsupported Connector type:%d!\n", link->link_id.id);
+		DC_LOG_WARNING("Unsupported Connector type:%d!\n",
+			       link->link_id.id);
 		goto create_fail;
 	}
 
 	/* TODO: #DAL3 Implement id to str function.*/
 	LINK_INFO("Connector[%d] description:"
-			"signal %d\n",
-			init_params->connector_index,
-			link->connector_signal);
+		  "signal %d\n",
+		  init_params->connector_index,
+		  link->connector_signal);
 
 	ddc_service_init_data.ctx = link->ctx;
 	ddc_service_init_data.id = link->link_id;
 	ddc_service_init_data.link = link;
 	link->ddc = dal_ddc_service_create(&ddc_service_init_data);
 
-	if (link->ddc == NULL) {
+	if (!link->ddc) {
 		DC_ERROR("Failed to create ddc_service!\n");
 		goto ddc_create_fail;
 	}
 
 	link->ddc_hw_inst =
-		dal_ddc_get_line(
-			dal_ddc_service_get_ddc_pin(link->ddc));
+		dal_ddc_get_line(dal_ddc_service_get_ddc_pin(link->ddc));
+
+
+	if (link->dc->res_pool->funcs->panel_cntl_create &&
+		(link->link_id.id == CONNECTOR_ID_EDP ||
+			link->link_id.id == CONNECTOR_ID_LVDS)) {
+		panel_cntl_init_data.ctx = dc_ctx;
+		panel_cntl_init_data.inst = 0;
+		link->panel_cntl =
+			link->dc->res_pool->funcs->panel_cntl_create(
+								&panel_cntl_init_data);
+
+		if (link->panel_cntl == NULL) {
+			DC_ERROR("Failed to create link panel_cntl!\n");
+			goto panel_cntl_create_fail;
+		}
+	}
 
 	enc_init_data.ctx = dc_ctx;
-	bp_funcs->get_src_obj(dc_ctx->dc_bios, link->link_id, 0, &enc_init_data.encoder);
+	bp_funcs->get_src_obj(dc_ctx->dc_bios, link->link_id, 0,
+			      &enc_init_data.encoder);
 	enc_init_data.connector = link->link_id;
 	enc_init_data.channel = get_ddc_line(link);
 	enc_init_data.hpd_source = get_hpd_line(link);
@@ -1367,11 +1486,11 @@ static bool dc_link_construct(
 	link->hpd_src = enc_init_data.hpd_source;
 
 	enc_init_data.transmitter =
-			translate_encoder_to_transmitter(enc_init_data.encoder);
-	link->link_enc = link->dc->res_pool->funcs->link_enc_create(
-								&enc_init_data);
+		translate_encoder_to_transmitter(enc_init_data.encoder);
+	link->link_enc =
+		link->dc->res_pool->funcs->link_enc_create(&enc_init_data);
 
-	if (link->link_enc == NULL) {
+	if (!link->link_enc) {
 		DC_ERROR("Failed to create link encoder!\n");
 		goto link_enc_create_fail;
 	}
@@ -1379,8 +1498,9 @@ static bool dc_link_construct(
 	link->link_enc_hw_inst = link->link_enc->transmitter;
 
 	for (i = 0; i < 4; i++) {
-		if (BP_RESULT_OK !=
-				bp_funcs->get_device_tag(dc_ctx->dc_bios, link->link_id, i, &link->device_tag)) {
+		if (bp_funcs->get_device_tag(dc_ctx->dc_bios,
+					     link->link_id, i,
+					     &link->device_tag) != BP_RESULT_OK) {
 			DC_ERROR("Failed to find device tag!\n");
 			goto device_tag_fail;
 		}
@@ -1388,13 +1508,14 @@ static bool dc_link_construct(
 		/* Look for device tag that matches connector signal,
 		 * CRT for rgb, LCD for other supported signal tyes
 		 */
-		if (!bp_funcs->is_device_id_supported(dc_ctx->dc_bios, link->device_tag.dev_id))
+		if (!bp_funcs->is_device_id_supported(dc_ctx->dc_bios,
+						      link->device_tag.dev_id))
 			continue;
-		if (link->device_tag.dev_id.device_type == DEVICE_TYPE_CRT
-			&& link->connector_signal != SIGNAL_TYPE_RGB)
+		if (link->device_tag.dev_id.device_type == DEVICE_TYPE_CRT &&
+		    link->connector_signal != SIGNAL_TYPE_RGB)
 			continue;
-		if (link->device_tag.dev_id.device_type == DEVICE_TYPE_LCD
-			&& link->connector_signal == SIGNAL_TYPE_RGB)
+		if (link->device_tag.dev_id.device_type == DEVICE_TYPE_LCD &&
+		    link->connector_signal == SIGNAL_TYPE_RGB)
 			continue;
 		break;
 	}
@@ -1406,16 +1527,16 @@ static bool dc_link_construct(
 	for (i = 0; i < MAX_NUMBER_OF_EXT_DISPLAY_PATH; i++) {
 		struct external_display_path *path =
 			&info.ext_disp_conn_info.path[i];
-		if (path->device_connector_id.enum_id == link->link_id.enum_id
-			&& path->device_connector_id.id == link->link_id.id
-			&& path->device_connector_id.type == link->link_id.type) {
 
-			if (link->device_tag.acpi_device != 0
-				&& path->device_acpi_enum == link->device_tag.acpi_device) {
+		if (path->device_connector_id.enum_id == link->link_id.enum_id &&
+		    path->device_connector_id.id == link->link_id.id &&
+		    path->device_connector_id.type == link->link_id.type) {
+			if (link->device_tag.acpi_device != 0 &&
+			    path->device_acpi_enum == link->device_tag.acpi_device) {
 				link->ddi_channel_mapping = path->channel_mapping;
 				link->chip_caps = path->caps;
 			} else if (path->device_tag ==
-					link->device_tag.dev_id.raw_device_tag) {
+				   link->device_tag.dev_id.raw_device_tag) {
 				link->ddi_channel_mapping = path->channel_mapping;
 				link->chip_caps = path->caps;
 			}
@@ -1431,15 +1552,20 @@ static bool dc_link_construct(
 	 */
 	program_hpd_filter(link);
 
+	link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
+
 	return true;
 device_tag_fail:
 	link->link_enc->funcs->destroy(&link->link_enc);
 link_enc_create_fail:
+	if (link->panel_cntl != NULL)
+		link->panel_cntl->funcs->destroy(&link->panel_cntl);
+panel_cntl_create_fail:
 	dal_ddc_service_destroy(&link->ddc);
 ddc_create_fail:
 create_fail:
 
-	if (link->hpd_gpio != NULL) {
+	if (link->hpd_gpio) {
 		dal_gpio_destroy_irq(&link->hpd_gpio);
 		link->hpd_gpio = NULL;
 	}
@@ -2339,9 +2465,28 @@ enum dc_status dc_link_validate_mode_timing(
 	return DC_OK;
 }
 
+static struct abm *get_abm_from_stream_res(const struct dc_link *link)
+{
+	int i;
+	struct dc *dc = link->ctx->dc;
+	struct abm *abm = NULL;
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i];
+		struct dc_stream_state *stream = pipe_ctx.stream;
+
+		if (stream && stream->link == link) {
+			abm = pipe_ctx.stream_res.abm;
+			break;
+		}
+	}
+	return abm;
+}
+
 int dc_link_get_backlight_level(const struct dc_link *link)
 {
-	struct abm *abm = link->ctx->dc->res_pool->abm;
+
+	struct abm *abm = get_abm_from_stream_res(link);
 
 	if (abm == NULL || abm->funcs->get_current_backlight == NULL)
 		return DC_ERROR_UNEXPECTED;
@@ -2349,71 +2494,63 @@ int dc_link_get_backlight_level(const struct dc_link *link)
 	return (int) abm->funcs->get_current_backlight(abm);
 }
 
-bool dc_link_set_backlight_level(const struct dc_link *link,
-		uint32_t backlight_pwm_u16_16,
-		uint32_t frame_ramp)
+int dc_link_get_target_backlight_pwm(const struct dc_link *link)
 {
-	struct dc  *dc = link->ctx->dc;
-	struct abm *abm = dc->res_pool->abm;
-	struct dmcu *dmcu = dc->res_pool->dmcu;
-	unsigned int controller_id = 0;
-	bool use_smooth_brightness = true;
-	int i;
-	DC_LOGGER_INIT(link->ctx->logger);
+	struct abm *abm = get_abm_from_stream_res(link);
 
-	if ((dmcu == NULL) ||
-		(abm == NULL) ||
-		(abm->funcs->set_backlight_level_pwm == NULL))
-		return false;
+	if (abm == NULL || abm->funcs->get_target_backlight == NULL)
+		return DC_ERROR_UNEXPECTED;
 
-	use_smooth_brightness = dmcu->funcs->is_dmcu_initialized(dmcu);
+	return (int) abm->funcs->get_target_backlight(abm);
+}
 
-	DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n",
-			backlight_pwm_u16_16, backlight_pwm_u16_16);
+static struct pipe_ctx *get_pipe_from_link(const struct dc_link *link)
+{
+	int i;
+	struct dc *dc = link->ctx->dc;
+	struct pipe_ctx *pipe_ctx = NULL;
 
-	if (dc_is_embedded_signal(link->connector_signal)) {
-		for (i = 0; i < MAX_PIPES; i++) {
-			if (dc->current_state->res_ctx.pipe_ctx[i].stream) {
-				if (dc->current_state->res_ctx.
-						pipe_ctx[i].stream->link
-						== link) {
-					/* DMCU -1 for all controller id values,
-					 * therefore +1 here
-					 */
-					controller_id =
-						dc->current_state->
-						res_ctx.pipe_ctx[i].stream_res.tg->inst +
-						1;
-
-					/* Disable brightness ramping when the display is blanked
-					 * as it can hang the DMCU
-					 */
-					if (dc->current_state->res_ctx.pipe_ctx[i].plane_state == NULL)
-						frame_ramp = 0;
-				}
+	for (i = 0; i < MAX_PIPES; i++) {
+		if (dc->current_state->res_ctx.pipe_ctx[i].stream) {
+			if (dc->current_state->res_ctx.pipe_ctx[i].stream->link == link) {
+				pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+				break;
 			}
 		}
-		abm->funcs->set_backlight_level_pwm(
-				abm,
-				backlight_pwm_u16_16,
-				frame_ramp,
-				controller_id,
-				use_smooth_brightness);
 	}
 
-	return true;
+	return pipe_ctx;
 }
 
-bool dc_link_set_abm_disable(const struct dc_link *link)
+bool dc_link_set_backlight_level(const struct dc_link *link,
+		uint32_t backlight_pwm_u16_16,
+		uint32_t frame_ramp)
 {
 	struct dc  *dc = link->ctx->dc;
-	struct abm *abm = dc->res_pool->abm;
 
-	if ((abm == NULL) || (abm->funcs->set_backlight_level_pwm == NULL))
-		return false;
+	DC_LOGGER_INIT(link->ctx->logger);
+	DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n",
+			backlight_pwm_u16_16, backlight_pwm_u16_16);
+
+	if (dc_is_embedded_signal(link->connector_signal)) {
+		struct pipe_ctx *pipe_ctx = get_pipe_from_link(link);
 
-	abm->funcs->set_abm_immediate_disable(abm);
+		if (pipe_ctx) {
+			/* Disable brightness ramping when the display is blanked
+			 * as it can hang the DMCU
+			 */
+			if (pipe_ctx->plane_state == NULL)
+				frame_ramp = 0;
+		} else {
+			ASSERT(false);
+			return false;
+		}
 
+		dc->hwss.set_backlight_level(
+				pipe_ctx,
+				backlight_pwm_u16_16,
+				frame_ramp);
+	}
 	return true;
 }
 
@@ -2423,12 +2560,12 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, bool allow_active, bool
 	struct dmcu *dmcu = dc->res_pool->dmcu;
 	struct dmub_psr *psr = dc->res_pool->psr;
 
-	if (psr != NULL && link->psr_feature_enabled)
+	if (psr != NULL && link->psr_settings.psr_feature_enabled)
 		psr->funcs->psr_enable(psr, allow_active);
-	else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled)
+	else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_settings.psr_feature_enabled)
 		dmcu->funcs->set_psr_enable(dmcu, allow_active, wait);
 
-	link->psr_allow_active = allow_active;
+	link->psr_settings.psr_allow_active = allow_active;
 
 	return true;
 }
@@ -2439,9 +2576,9 @@ bool dc_link_get_psr_state(const struct dc_link *link, uint32_t *psr_state)
 	struct dmcu *dmcu = dc->res_pool->dmcu;
 	struct dmub_psr *psr = dc->res_pool->psr;
 
-	if (psr != NULL && link->psr_feature_enabled)
+	if (psr != NULL && link->psr_settings.psr_feature_enabled)
 		psr->funcs->psr_get_state(psr, psr_state);
-	else if (dmcu != NULL && link->psr_feature_enabled)
+	else if (dmcu != NULL && link->psr_settings.psr_feature_enabled)
 		dmcu->funcs->get_psr_state(dmcu, psr_state);
 
 	return true;
@@ -2612,14 +2749,14 @@ bool dc_link_setup_psr(struct dc_link *link,
 	psr_context->frame_delay = 0;
 
 	if (psr)
-		link->psr_feature_enabled = psr->funcs->psr_copy_settings(psr, link, psr_context);
+		link->psr_settings.psr_feature_enabled = psr->funcs->psr_copy_settings(psr, link, psr_context);
 	else
-		link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context);
+		link->psr_settings.psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context);
 
 	/* psr_enabled == 0 indicates setup_psr did not succeed, but this
 	 * should not happen since firmware should be running at this point
 	 */
-	if (link->psr_feature_enabled == 0)
+	if (link->psr_settings.psr_feature_enabled == 0)
 		ASSERT(0);
 
 	return true;
@@ -2966,7 +3103,7 @@ void core_link_enable_stream(
 	enum dc_status status;
 	DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
 
-	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) &&
+	if (!IS_DIAG_DC(dc->ctx->dce_environment) &&
 			dc_is_virtual_signal(pipe_ctx->stream->signal))
 		return;
 
@@ -3040,6 +3177,18 @@ void core_link_enable_stream(
 		if (pipe_ctx->stream->dpms_off)
 			return;
 
+		/* Have to setup DSC before DIG FE and BE are connected (which happens before the
+		 * link training). This is to make sure the bandwidth sent to DIG BE won't be
+		 * bigger than what the link and/or DIG BE can handle. VBID[6]/CompressedStream_flag
+		 * will be automatically set at a later time when the video is enabled
+		 * (DP_VID_STREAM_EN = 1).
+		 */
+		if (pipe_ctx->stream->timing.flags.DSC) {
+			if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
+					dc_is_virtual_signal(pipe_ctx->stream->signal))
+				dp_set_dsc_enable(pipe_ctx, true);
+		}
+
 		status = enable_link(state, pipe_ctx);
 
 		if (status != DC_OK) {
@@ -3067,11 +3216,6 @@ void core_link_enable_stream(
 					CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
 					COLOR_DEPTH_UNDEFINED);
 
-		if (pipe_ctx->stream->timing.flags.DSC) {
-			if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
-					dc_is_virtual_signal(pipe_ctx->stream->signal))
-				dp_set_dsc_enable(pipe_ctx, true);
-		}
 		dc->hwss.enable_stream(pipe_ctx);
 
 		/* Set DPS PPS SDP (AKA "info frames") */
@@ -3101,6 +3245,10 @@ void core_link_enable_stream(
 			dp_set_dsc_enable(pipe_ctx, true);
 
 	}
+
+	if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
+		core_link_set_avmute(pipe_ctx, false);
+	}
 }
 
 void core_link_disable_stream(struct pipe_ctx *pipe_ctx)
@@ -3109,10 +3257,14 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx)
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct dc_link *link = stream->sink->link;
 
-	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) &&
+	if (!IS_DIAG_DC(dc->ctx->dce_environment) &&
 			dc_is_virtual_signal(pipe_ctx->stream->signal))
 		return;
 
+	if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
+		core_link_set_avmute(pipe_ctx, true);
+	}
+
 #if defined(CONFIG_DRM_AMD_DC_HDCP)
 	update_psp_stream_config(pipe_ctx, true);
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
index 256889eed93e..aefd29a440b5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
@@ -599,7 +599,7 @@ bool dal_ddc_submit_aux_command(struct ddc_service *ddc,
 	do {
 		struct aux_payload current_payload;
 		bool is_end_of_payload = (retrieved + DEFAULT_AUX_MAX_DATA_SIZE) >
-			payload->length ? true : false;
+			payload->length;
 
 		current_payload.address = payload->address;
 		current_payload.data = &payload->data[retrieved];
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index caa090d0b6ac..91cd884d6f25 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -13,7 +13,6 @@
 #include "core_status.h"
 #include "dpcd_defs.h"
 
-#include "resource.h"
 #define DC_LOGGER \
 	link->ctx->logger
 
@@ -1737,19 +1736,10 @@ bool dc_link_dp_sync_lt_end(struct dc_link *link, bool link_down)
 
 static struct dc_link_settings get_max_link_cap(struct dc_link *link)
 {
-	/* Set Default link settings */
-	struct dc_link_settings max_link_cap = {LANE_COUNT_FOUR, LINK_RATE_HIGH,
-			LINK_SPREAD_05_DOWNSPREAD_30KHZ, false, 0};
-
-	/* Higher link settings based on feature supported */
-	if (link->link_enc->features.flags.bits.IS_HBR2_CAPABLE)
-		max_link_cap.link_rate = LINK_RATE_HIGH2;
-
-	if (link->link_enc->features.flags.bits.IS_HBR3_CAPABLE)
-		max_link_cap.link_rate = LINK_RATE_HIGH3;
+	struct dc_link_settings max_link_cap = {0};
 
-	if (link->link_enc->funcs->get_max_link_cap)
-		link->link_enc->funcs->get_max_link_cap(link->link_enc, &max_link_cap);
+	/* get max link encoder capability */
+	link->link_enc->funcs->get_max_link_cap(link->link_enc, &max_link_cap);
 
 	/* Lower link settings based on sink's link cap */
 	if (link->reported_link_cap.lane_count < max_link_cap.lane_count)
@@ -2453,7 +2443,7 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link)
 {
 	union dpcd_psr_configuration psr_configuration;
 
-	if (!link->psr_feature_enabled)
+	if (!link->psr_settings.psr_feature_enabled)
 		return false;
 
 	dm_helpers_dp_read_dpcd(
@@ -2557,7 +2547,7 @@ static void dp_test_send_phy_test_pattern(struct dc_link *link)
 	/* get phy test pattern and pattern parameters from DP receiver */
 	core_link_read_dpcd(
 			link,
-			DP_TEST_PHY_PATTERN,
+			DP_PHY_TEST_PATTERN,
 			&dpcd_test_pattern.raw,
 			sizeof(dpcd_test_pattern));
 	core_link_read_dpcd(
@@ -4267,7 +4257,7 @@ void dpcd_set_source_specific_data(struct dc_link *link)
 {
 	const uint32_t post_oui_delay = 30; // 30ms
 	uint8_t dspc = 0;
-	enum dc_status ret = DC_ERROR_UNEXPECTED;
+	enum dc_status ret;
 
 	ret = core_link_read_dpcd(link, DP_DOWN_STREAM_PORT_COUNT, &dspc,
 				  sizeof(dspc));
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
index 51e0ee6e7695..6590f51caefa 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
@@ -400,7 +400,7 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable)
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	bool result = false;
 
-	if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+	if (dc_is_virtual_signal(stream->signal) || IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
 		result = true;
 	else
 		result = dm_helpers_dp_write_dsc_enable(dc->ctx, stream, enable);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index f4bcc71b2920..0c5619364e7d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -532,6 +532,24 @@ static inline void get_vp_scan_direction(
 		*flip_horz_scan_dir = !*flip_horz_scan_dir;
 }
 
+int get_num_mpc_splits(struct pipe_ctx *pipe)
+{
+	int mpc_split_count = 0;
+	struct pipe_ctx *other_pipe = pipe->bottom_pipe;
+
+	while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
+		mpc_split_count++;
+		other_pipe = other_pipe->bottom_pipe;
+	}
+	other_pipe = pipe->top_pipe;
+	while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
+		mpc_split_count++;
+		other_pipe = other_pipe->top_pipe;
+	}
+
+	return mpc_split_count;
+}
+
 int get_num_odm_splits(struct pipe_ctx *pipe)
 {
 	int odm_split_count = 0;
@@ -556,16 +574,11 @@ static void calculate_split_count_and_index(struct pipe_ctx *pipe_ctx, int *spli
 		/*Check for mpc split*/
 		struct pipe_ctx *split_pipe = pipe_ctx->top_pipe;
 
+		*split_count = get_num_mpc_splits(pipe_ctx);
 		while (split_pipe && split_pipe->plane_state == pipe_ctx->plane_state) {
 			(*split_idx)++;
-			(*split_count)++;
 			split_pipe = split_pipe->top_pipe;
 		}
-		split_pipe = pipe_ctx->bottom_pipe;
-		while (split_pipe && split_pipe->plane_state == pipe_ctx->plane_state) {
-			(*split_count)++;
-			split_pipe = split_pipe->bottom_pipe;
-		}
 	} else {
 		/*Get odm split index*/
 		struct pipe_ctx *split_pipe = pipe_ctx->prev_odm_pipe;
@@ -692,6 +705,9 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx)
 	/* Round up, assume original video size always even dimensions */
 	data->viewport_c.width = (data->viewport.width + vpc_div - 1) / vpc_div;
 	data->viewport_c.height = (data->viewport.height + vpc_div - 1) / vpc_div;
+
+	data->viewport_unadjusted = data->viewport;
+	data->viewport_c_unadjusted = data->viewport_c;
 }
 
 static void calculate_recout(struct pipe_ctx *pipe_ctx)
@@ -1061,8 +1077,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
 
 	calculate_viewport(pipe_ctx);
 
-	if (pipe_ctx->plane_res.scl_data.viewport.height < 16 ||
-		pipe_ctx->plane_res.scl_data.viewport.width < 16) {
+	if (pipe_ctx->plane_res.scl_data.viewport.height < 12 ||
+		pipe_ctx->plane_res.scl_data.viewport.width < 12) {
 		if (store_h_border_left) {
 			restore_border_left_from_dst(pipe_ctx,
 				store_h_border_left);
@@ -1358,9 +1374,6 @@ bool dc_add_plane_to_context(
 	dc_plane_state_retain(plane_state);
 
 	while (head_pipe) {
-		tail_pipe = resource_get_tail_pipe(&context->res_ctx, head_pipe);
-		ASSERT(tail_pipe);
-
 		free_pipe = acquire_free_pipe_for_head(context, pool, head_pipe);
 
 	#if defined(CONFIG_DRM_AMD_DC_DCN)
@@ -1378,6 +1391,8 @@ bool dc_add_plane_to_context(
 		free_pipe->plane_state = plane_state;
 
 		if (head_pipe != free_pipe) {
+			tail_pipe = resource_get_tail_pipe(&context->res_ctx, head_pipe);
+			ASSERT(tail_pipe);
 			free_pipe->stream_res.tg = tail_pipe->stream_res.tg;
 			free_pipe->stream_res.abm = tail_pipe->stream_res.abm;
 			free_pipe->stream_res.opp = tail_pipe->stream_res.opp;
@@ -1545,35 +1560,6 @@ bool dc_add_all_planes_for_stream(
 	return add_all_planes_for_stream(dc, stream, &set, 1, context);
 }
 
-
-static bool is_hdr_static_meta_changed(struct dc_stream_state *cur_stream,
-	struct dc_stream_state *new_stream)
-{
-	if (cur_stream == NULL)
-		return true;
-
-	if (memcmp(&cur_stream->hdr_static_metadata,
-			&new_stream->hdr_static_metadata,
-			sizeof(struct dc_info_packet)) != 0)
-		return true;
-
-	return false;
-}
-
-static bool is_vsc_info_packet_changed(struct dc_stream_state *cur_stream,
-		struct dc_stream_state *new_stream)
-{
-	if (cur_stream == NULL)
-		return true;
-
-	if (memcmp(&cur_stream->vsc_infopacket,
-			&new_stream->vsc_infopacket,
-			sizeof(struct dc_info_packet)) != 0)
-		return true;
-
-	return false;
-}
-
 static bool is_timing_changed(struct dc_stream_state *cur_stream,
 		struct dc_stream_state *new_stream)
 {
@@ -1608,15 +1594,9 @@ static bool are_stream_backends_same(
 	if (is_timing_changed(stream_a, stream_b))
 		return false;
 
-	if (is_hdr_static_meta_changed(stream_a, stream_b))
-		return false;
-
 	if (stream_a->dpms_off != stream_b->dpms_off)
 		return false;
 
-	if (is_vsc_info_packet_changed(stream_a, stream_b))
-		return false;
-
 	return true;
 }
 
@@ -1756,21 +1736,6 @@ static struct audio *find_first_free_audio(
 	return 0;
 }
 
-bool resource_is_stream_unchanged(
-	struct dc_state *old_context, struct dc_stream_state *stream)
-{
-	int i;
-
-	for (i = 0; i < old_context->stream_count; i++) {
-		struct dc_stream_state *old_stream = old_context->streams[i];
-
-		if (are_stream_backends_same(old_stream, stream))
-				return true;
-	}
-
-	return false;
-}
-
 /**
  * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state.
  */
@@ -2025,17 +1990,6 @@ enum dc_status resource_map_pool_resources(
 	int pipe_idx = -1;
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 
-	/* TODO Check if this is needed */
-	/*if (!resource_is_stream_unchanged(old_context, stream)) {
-			if (stream != NULL && old_context->streams[i] != NULL) {
-				stream->bit_depth_params =
-						old_context->streams[i]->bit_depth_params;
-				stream->clamping = old_context->streams[i]->clamping;
-				continue;
-			}
-		}
-	*/
-
 	calculate_phy_pix_clks(stream);
 
 	/* TODO: Check Linux */
@@ -2718,19 +2672,16 @@ bool pipe_need_reprogram(
 	if (is_timing_changed(pipe_ctx_old->stream, pipe_ctx->stream))
 		return true;
 
-	if (is_hdr_static_meta_changed(pipe_ctx_old->stream, pipe_ctx->stream))
-		return true;
-
 	if (pipe_ctx_old->stream->dpms_off != pipe_ctx->stream->dpms_off)
 		return true;
 
-	if (is_vsc_info_packet_changed(pipe_ctx_old->stream, pipe_ctx->stream))
-		return true;
-
 	if (false == pipe_ctx_old->stream->link->link_state_valid &&
 		false == pipe_ctx_old->stream->dpms_off)
 		return true;
 
+	if (pipe_ctx_old->stream_res.dsc != pipe_ctx->stream_res.dsc)
+		return true;
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
index a249a0e5edd0..9e16af22e4aa 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
@@ -54,6 +54,7 @@ static bool dc_sink_construct(struct dc_sink *sink, const struct dc_sink_init_da
 	sink->ctx = link->ctx;
 	sink->dongle_max_pix_clk = init_params->dongle_max_pix_clk;
 	sink->converter_disable_audio = init_params->converter_disable_audio;
+	sink->is_mst_legacy = init_params->sink_is_legacy;
 	sink->dc_container_id = NULL;
 	sink->sink_id = init_params->link->ctx->dc_sink_id_count;
 	// increment dc_sink_id_count because we don't want two sinks with same ID
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 1935cf6601eb..85908561c741 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -29,6 +29,9 @@
 #include "dc_types.h"
 #include "grph_object_defs.h"
 #include "logger_types.h"
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+#include "hdcp_types.h"
+#endif
 #include "gpio_types.h"
 #include "link_service_types.h"
 #include "grph_object_ctrl_defs.h"
@@ -39,7 +42,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.76"
+#define DC_VER "3.2.84"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -95,6 +98,49 @@ struct dc_plane_cap {
 	} max_downscale_factor;
 };
 
+// Color management caps (DPP and MPC)
+struct rom_curve_caps {
+	uint16_t srgb : 1;
+	uint16_t bt2020 : 1;
+	uint16_t gamma2_2 : 1;
+	uint16_t pq : 1;
+	uint16_t hlg : 1;
+};
+
+struct dpp_color_caps {
+	uint16_t dcn_arch : 1; // all DCE generations treated the same
+	// input lut is different than most LUTs, just plain 256-entry lookup
+	uint16_t input_lut_shared : 1; // shared with DGAM
+	uint16_t icsc : 1;
+	uint16_t dgam_ram : 1;
+	uint16_t post_csc : 1; // before gamut remap
+	uint16_t gamma_corr : 1;
+
+	// hdr_mult and gamut remap always available in DPP (in that order)
+	// 3d lut implies shaper LUT,
+	// it may be shared with MPC - check MPC:shared_3d_lut flag
+	uint16_t hw_3d_lut : 1;
+	uint16_t ogam_ram : 1; // blnd gam
+	uint16_t ocsc : 1;
+	struct rom_curve_caps dgam_rom_caps;
+	struct rom_curve_caps ogam_rom_caps;
+};
+
+struct mpc_color_caps {
+	uint16_t gamut_remap : 1;
+	uint16_t ogam_ram : 1;
+	uint16_t ocsc : 1;
+	uint16_t num_3dluts : 3; //3d lut always assumes a preceding shaper LUT
+	uint16_t shared_3d_lut:1; //can be in either DPP or MPC, but single instance
+
+	struct rom_curve_caps ogam_rom_caps;
+};
+
+struct dc_color_caps {
+	struct dpp_color_caps dpp;
+	struct mpc_color_caps mpc;
+};
+
 struct dc_caps {
 	uint32_t max_streams;
 	uint32_t max_links;
@@ -117,9 +163,9 @@ struct dc_caps {
 	bool psp_setup_panel_mode;
 	bool extended_aux_timeout_support;
 	bool dmcub_support;
-	bool hw_3d_lut;
 	enum dp_protocol_version max_dp_protocol_version;
 	struct dc_plane_cap planes[MAX_PLANES];
+	struct dc_color_caps color;
 };
 
 struct dc_bug_wa {
@@ -230,7 +276,8 @@ struct dc_config {
 	bool forced_clocks;
 	bool disable_extended_timeout_support; // Used to disable extended timeout and lttpr feature as well
 	bool multi_mon_pp_mclk_switch;
-	bool psr_on_dmub;
+	bool disable_dmcu;
+	bool enable_4to1MPC;
 };
 
 enum visual_confirm {
@@ -238,6 +285,7 @@ enum visual_confirm {
 	VISUAL_CONFIRM_SURFACE = 1,
 	VISUAL_CONFIRM_HDR = 2,
 	VISUAL_CONFIRM_MPCTREE = 4,
+	VISUAL_CONFIRM_PSR = 5,
 };
 
 enum dcc_option {
@@ -429,6 +477,7 @@ struct dc_debug_options {
 	bool enable_dmcub_surface_flip;
 	bool usbc_combo_phy_reset_wa;
 	bool disable_dsc;
+	bool enable_dram_clock_change_one_display_vactive;
 };
 
 struct dc_debug_data {
@@ -474,6 +523,7 @@ struct dc_bounding_box_overrides {
 	int urgent_latency_ns;
 	int percent_of_ideal_drambw;
 	int dram_clock_change_latency_ns;
+	int dummy_clock_change_latency_ns;
 	/* This forces a hard min on the DCFCLK we use
 	 * for DML.  Unlike the debug option for forcing
 	 * DCFCLK, this override affects watermark calculations
@@ -987,6 +1037,7 @@ struct dpcd_caps {
 	union dpcd_fec_capability fec_cap;
 	struct dpcd_dsc_capabilities dsc_caps;
 	struct dc_lttpr_caps lttpr_caps;
+	struct psr_caps psr_caps;
 
 };
 
@@ -1004,6 +1055,35 @@ union dpcd_sink_ext_caps {
 	uint8_t raw;
 };
 
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+union hdcp_rx_caps {
+	struct {
+		uint8_t version;
+		uint8_t reserved;
+		struct {
+			uint8_t repeater	: 1;
+			uint8_t hdcp_capable	: 1;
+			uint8_t reserved	: 6;
+		} byte0;
+	} fields;
+	uint8_t raw[3];
+};
+
+union hdcp_bcaps {
+	struct {
+		uint8_t HDCP_CAPABLE:1;
+		uint8_t REPEATER:1;
+		uint8_t RESERVED:6;
+	} bits;
+	uint8_t raw;
+};
+
+struct hdcp_caps {
+	union hdcp_rx_caps rx_caps;
+	union hdcp_bcaps bcaps;
+};
+#endif
+
 #include "dc_link.h"
 
 /*******************************************************************************
@@ -1046,7 +1126,7 @@ struct dc_sink {
 	void *priv;
 	struct stereo_3d_features features_3d[TIMING_3D_FORMAT_MAX];
 	bool converter_disable_audio;
-
+	bool is_mst_legacy;
 	struct dc_sink_dsc_caps dsc_caps;
 	struct dc_sink_fec_caps fec_caps;
 
@@ -1073,6 +1153,7 @@ struct dc_sink_init_data {
 	struct dc_link *link;
 	uint32_t dongle_max_pix_clk;
 	bool converter_disable_audio;
+	bool sink_is_legacy;
 };
 
 struct dc_sink *dc_sink_create(const struct dc_sink_init_data *init_params);
@@ -1104,9 +1185,16 @@ void dc_set_power_state(
 		struct dc *dc,
 		enum dc_acpi_cm_power_state power_state);
 void dc_resume(struct dc *dc);
-unsigned int dc_get_current_backlight_pwm(struct dc *dc);
-unsigned int dc_get_target_backlight_pwm(struct dc *dc);
 
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+/*
+ * HDCP Interfaces
+ */
+enum hdcp_message_status dc_process_hdcp_msg(
+		enum signal_type signal,
+		struct dc_link *link,
+		struct hdcp_protection_message *message_info);
+#endif
 bool dc_is_dmcu_initialized(struct dc *dc);
 
 enum dc_status dc_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, uint32_t stepping);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 59c298a6484f..eea2429ac67d 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -25,7 +25,7 @@
 
 #include "dc.h"
 #include "dc_dmub_srv.h"
-#include "../dmub/inc/dmub_srv.h"
+#include "../dmub/dmub_srv.h"
 
 static void dc_dmub_srv_construct(struct dc_dmub_srv *dc_srv, struct dc *dc,
 				  struct dmub_srv *dmub)
@@ -58,7 +58,7 @@ void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv)
 }
 
 void dc_dmub_srv_cmd_queue(struct dc_dmub_srv *dc_dmub_srv,
-			   struct dmub_cmd_header *cmd)
+			   union dmub_rb_cmd *cmd)
 {
 	struct dmub_srv *dmub = dc_dmub_srv->dmub;
 	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index 754b6077539c..a3a09ccb6d26 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -27,10 +27,9 @@
 #define _DMUB_DC_SRV_H_
 
 #include "os_types.h"
-#include "../dmub/inc/dmub_cmd.h"
+#include "dmub/dmub_srv.h"
 
 struct dmub_srv;
-struct dmub_cmd_header;
 
 struct dc_reg_helper_state {
 	bool gather_in_progress;
@@ -49,7 +48,7 @@ struct dc_dmub_srv {
 };
 
 void dc_dmub_srv_cmd_queue(struct dc_dmub_srv *dc_dmub_srv,
-			   struct dmub_cmd_header *cmd);
+			   union dmub_rb_cmd *cmd);
 
 void dc_dmub_srv_cmd_execute(struct dc_dmub_srv *dc_dmub_srv);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index bb2730e9521e..af177c087d3b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -740,5 +740,11 @@ struct dpcd_dsc_capabilities {
 	union dpcd_dsc_ext_capabilities dsc_ext_caps;
 };
 
+/* These parameters are from PSR capabilities reported by Sink DPCD */
+struct psr_caps {
+	unsigned char psr_version;
+	unsigned int psr_rfb_setup_time;
+	bool psr_exit_link_training_required;
+};
 
 #endif /* DC_DP_TYPES_H */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 737048d8a96c..85a0170be544 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -50,7 +50,7 @@ static inline void submit_dmub_read_modify_write(
 	gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress;
 	ctx->dmub_srv->reg_helper_offload.gather_in_progress = false;
 
-	dc_dmub_srv_cmd_queue(ctx->dmub_srv, &cmd_buf->header);
+	dc_dmub_srv_cmd_queue(ctx->dmub_srv, &offload->cmd_data);
 
 	ctx->dmub_srv->reg_helper_offload.gather_in_progress = gather;
 
@@ -73,7 +73,7 @@ static inline void submit_dmub_burst_write(
 	gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress;
 	ctx->dmub_srv->reg_helper_offload.gather_in_progress = false;
 
-	dc_dmub_srv_cmd_queue(ctx->dmub_srv, &cmd_buf->header);
+	dc_dmub_srv_cmd_queue(ctx->dmub_srv, &offload->cmd_data);
 
 	ctx->dmub_srv->reg_helper_offload.gather_in_progress = gather;
 
@@ -92,7 +92,7 @@ static inline void submit_dmub_reg_wait(
 	gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress;
 	ctx->dmub_srv->reg_helper_offload.gather_in_progress = false;
 
-	dc_dmub_srv_cmd_queue(ctx->dmub_srv, &cmd_buf->header);
+	dc_dmub_srv_cmd_queue(ctx->dmub_srv, &offload->cmd_data);
 
 	memset(cmd_buf, 0, sizeof(*cmd_buf));
 	offload->reg_seq_count = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index 00ff5e98278c..f63fc25aa6c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -66,6 +66,22 @@ struct time_stamp {
 struct link_trace {
 	struct time_stamp time_stamp;
 };
+
+/* PSR feature flags */
+struct psr_settings {
+	bool psr_feature_enabled;		// PSR is supported by sink
+	bool psr_allow_active;			// PSR is currently active
+	enum dc_psr_version psr_version;		// Internal PSR version, determined based on DPCD
+
+	/* These parameters are calculated in Driver,
+	 * based on display timing and Sink capabilities.
+	 * If VBLANK region is too small and Sink takes a long time
+	 * to set up RFB, it may take an extra frame to enter PSR state.
+	 */
+	bool psr_frame_capture_indication_req;
+	unsigned int psr_sdp_transmit_line_num_deadline;
+};
+
 /*
  * A link contains one or more sinks and their connected status.
  * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported.
@@ -118,6 +134,7 @@ struct dc_link {
 
 	struct dc_context *ctx;
 
+	struct panel_cntl *panel_cntl;
 	struct link_encoder *link_enc;
 	struct graphics_object_id link_id;
 	union ddi_channel_mapping ddi_channel_mapping;
@@ -126,11 +143,14 @@ struct dc_link {
 	uint32_t dongle_max_pix_clk;
 	unsigned short chip_caps;
 	unsigned int dpcd_sink_count;
+#if defined(CONFIG_DRM_AMD_DC_HDCP)
+	struct hdcp_caps hdcp_caps;
+#endif
 	enum edp_revision edp_revision;
-	bool psr_feature_enabled;
-	bool psr_allow_active;
 	union dpcd_sink_ext_caps dpcd_sink_ext_caps;
 
+	struct psr_settings psr_settings;
+
 	/* MST record stream using this link */
 	struct link_flags {
 		bool dp_keep_receiver_powered;
@@ -197,7 +217,7 @@ bool dc_link_set_default_brightness_aux(struct dc_link *link);
 
 int dc_link_get_backlight_level(const struct dc_link *dc_link);
 
-bool dc_link_set_abm_disable(const struct dc_link *dc_link);
+int dc_link_get_target_backlight_pwm(const struct dc_link *link);
 
 bool dc_link_set_psr_allow_active(struct dc_link *dc_link, bool enable, bool wait);
 
@@ -290,6 +310,10 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type);
  * DPCD access interfaces
  */
 
+#ifdef CONFIG_DRM_AMD_DC_HDCP
+bool dc_link_is_hdcp14(struct dc_link *link);
+bool dc_link_is_hdcp22(struct dc_link *link);
+#endif
 void dc_link_set_drive_settings(struct dc *dc,
 				struct link_training_settings *lt_settings,
 				const struct dc_link *link);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index a5c7ef47b8d3..49aad691e687 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -167,8 +167,6 @@ struct dc_stream_state {
 
 	/* TODO: custom INFO packets */
 	/* TODO: ABM info (DMCU) */
-	/* PSR info */
-	unsigned char psr_version;
 	/* TODO: CEA VIC */
 
 	/* DMCU info */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 0d210104ba0a..f236da1c1859 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -862,4 +862,9 @@ struct dsc_dec_dpcd_caps {
 	uint32_t branch_max_line_width;
 };
 
+enum dc_psr_version {
+	DC_PSR_VERSION_1			= 0,
+	DC_PSR_VERSION_UNSUPPORTED		= 0xFFFFFFFF,
+};
+
 #endif /* DC_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/Makefile b/drivers/gpu/drm/amd/display/dc/dce/Makefile
index fbfcff700971..f704a8fd52e8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce/Makefile
@@ -29,7 +29,7 @@
 DCE = dce_audio.o dce_stream_encoder.o dce_link_encoder.o dce_hwseq.o \
 dce_mem_input.o dce_clock_source.o dce_scl_filters.o dce_transform.o \
 dce_opp.o dce_dmcu.o dce_abm.o dce_ipp.o dce_aux.o \
-dce_i2c.o dce_i2c_hw.o dce_i2c_sw.o dmub_psr.o
+dce_i2c.o dce_i2c_hw.o dce_i2c_sw.o dmub_psr.o dmub_abm.o dce_panel_cntl.o
 
 AMD_DAL_DCE = $(addprefix $(AMDDALPATH)/dc/dce/,$(DCE))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index b8a3fc505c9b..4e87e70237e3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -55,7 +55,7 @@
 
 #define MCP_DISABLE_ABM_IMMEDIATELY 255
 
-static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id)
+static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id, uint32_t panel_inst)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
 	uint32_t rampingBoundary = 0xFFFF;
@@ -83,125 +83,12 @@ static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id)
 	return true;
 }
 
-static unsigned int calculate_16_bit_backlight_from_pwm(struct dce_abm *abm_dce)
-{
-	uint64_t current_backlight;
-	uint32_t round_result;
-	uint32_t pwm_period_cntl, bl_period, bl_int_count;
-	uint32_t bl_pwm_cntl, bl_pwm, fractional_duty_cycle_en;
-	uint32_t bl_period_mask, bl_pwm_mask;
-
-	pwm_period_cntl = REG_READ(BL_PWM_PERIOD_CNTL);
-	REG_GET(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, &bl_period);
-	REG_GET(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, &bl_int_count);
-
-	bl_pwm_cntl = REG_READ(BL_PWM_CNTL);
-	REG_GET(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, (uint32_t *)(&bl_pwm));
-	REG_GET(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, &fractional_duty_cycle_en);
-
-	if (bl_int_count == 0)
-		bl_int_count = 16;
-
-	bl_period_mask = (1 << bl_int_count) - 1;
-	bl_period &= bl_period_mask;
-
-	bl_pwm_mask = bl_period_mask << (16 - bl_int_count);
-
-	if (fractional_duty_cycle_en == 0)
-		bl_pwm &= bl_pwm_mask;
-	else
-		bl_pwm &= 0xFFFF;
-
-	current_backlight = bl_pwm << (1 + bl_int_count);
-
-	if (bl_period == 0)
-		bl_period = 0xFFFF;
-
-	current_backlight = div_u64(current_backlight, bl_period);
-	current_backlight = (current_backlight + 1) >> 1;
-
-	current_backlight = (uint64_t)(current_backlight) * bl_period;
-
-	round_result = (uint32_t)(current_backlight & 0xFFFFFFFF);
-
-	round_result = (round_result >> (bl_int_count-1)) & 1;
-
-	current_backlight >>= bl_int_count;
-	current_backlight += round_result;
-
-	return (uint32_t)(current_backlight);
-}
-
-static void driver_set_backlight_level(struct dce_abm *abm_dce,
-		uint32_t backlight_pwm_u16_16)
-{
-	uint32_t backlight_16bit;
-	uint32_t masked_pwm_period;
-	uint8_t bit_count;
-	uint64_t active_duty_cycle;
-	uint32_t pwm_period_bitcnt;
-
-	/*
-	 * 1. Find  16 bit backlight active duty cycle, where 0 <= backlight
-	 * active duty cycle <= backlight period
-	 */
-
-	/* 1.1 Apply bitmask for backlight period value based on value of BITCNT
-	 */
-	REG_GET_2(BL_PWM_PERIOD_CNTL,
-			BL_PWM_PERIOD_BITCNT, &pwm_period_bitcnt,
-			BL_PWM_PERIOD, &masked_pwm_period);
-
-	if (pwm_period_bitcnt == 0)
-		bit_count = 16;
-	else
-		bit_count = pwm_period_bitcnt;
-
-	/* e.g. maskedPwmPeriod = 0x24 when bitCount is 6 */
-	masked_pwm_period = masked_pwm_period & ((1 << bit_count) - 1);
-
-	/* 1.2 Calculate integer active duty cycle required upper 16 bits
-	 * contain integer component, lower 16 bits contain fractional component
-	 * of active duty cycle e.g. 0x21BDC0 = 0xEFF0 * 0x24
-	 */
-	active_duty_cycle = backlight_pwm_u16_16 * masked_pwm_period;
-
-	/* 1.3 Calculate 16 bit active duty cycle from integer and fractional
-	 * components shift by bitCount then mask 16 bits and add rounding bit
-	 * from MSB of fraction e.g. 0x86F7 = ((0x21BDC0 >> 6) & 0xFFF) + 0
-	 */
-	backlight_16bit = active_duty_cycle >> bit_count;
-	backlight_16bit &= 0xFFFF;
-	backlight_16bit += (active_duty_cycle >> (bit_count - 1)) & 0x1;
-
-	/*
-	 * 2. Program register with updated value
-	 */
-
-	/* 2.1 Lock group 2 backlight registers */
-
-	REG_UPDATE_2(BL_PWM_GRP1_REG_LOCK,
-			BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN, 1,
-			BL_PWM_GRP1_REG_LOCK, 1);
-
-	// 2.2 Write new active duty cycle
-	REG_UPDATE(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, backlight_16bit);
-
-	/* 2.3 Unlock group 2 backlight registers */
-	REG_UPDATE(BL_PWM_GRP1_REG_LOCK,
-			BL_PWM_GRP1_REG_LOCK, 0);
-
-	/* 3 Wait for pending bit to be cleared */
-	REG_WAIT(BL_PWM_GRP1_REG_LOCK,
-			BL_PWM_GRP1_REG_UPDATE_PENDING, 0,
-			1, 10000);
-}
-
 static void dmcu_set_backlight_level(
 	struct dce_abm *abm_dce,
 	uint32_t backlight_pwm_u16_16,
 	uint32_t frame_ramp,
-	uint32_t controller_id)
+	uint32_t controller_id,
+	uint32_t panel_id)
 {
 	unsigned int backlight_8_bit = 0;
 	uint32_t s2;
@@ -213,7 +100,7 @@ static void dmcu_set_backlight_level(
 		// Take MSB of fractional part since backlight is not max
 		backlight_8_bit = (backlight_pwm_u16_16 >> 8) & 0xFF;
 
-	dce_abm_set_pipe(&abm_dce->base, controller_id);
+	dce_abm_set_pipe(&abm_dce->base, controller_id, panel_id);
 
 	/* waitDMCUReadyForCmd */
 	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT,
@@ -248,10 +135,9 @@ static void dmcu_set_backlight_level(
 			0, 1, 80000);
 }
 
-static void dce_abm_init(struct abm *abm)
+static void dce_abm_init(struct abm *abm, uint32_t backlight)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
-	unsigned int backlight = calculate_16_bit_backlight_from_pwm(abm_dce);
 
 	REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x103);
 	REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x101);
@@ -331,86 +217,12 @@ static bool dce_abm_set_level(struct abm *abm, uint32_t level)
 	return true;
 }
 
-static bool dce_abm_immediate_disable(struct abm *abm)
+static bool dce_abm_immediate_disable(struct abm *abm, uint32_t panel_inst)
 {
-	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
-
 	if (abm->dmcu_is_running == false)
 		return true;
 
-	dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY);
-
-	abm->stored_backlight_registers.BL_PWM_CNTL =
-		REG_READ(BL_PWM_CNTL);
-	abm->stored_backlight_registers.BL_PWM_CNTL2 =
-		REG_READ(BL_PWM_CNTL2);
-	abm->stored_backlight_registers.BL_PWM_PERIOD_CNTL =
-		REG_READ(BL_PWM_PERIOD_CNTL);
-
-	REG_GET(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV,
-		&abm->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV);
-	return true;
-}
-
-static bool dce_abm_init_backlight(struct abm *abm)
-{
-	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
-	uint32_t value;
-
-	/* It must not be 0, so we have to restore them
-	 * Bios bug w/a - period resets to zero,
-	 * restoring to cache values which is always correct
-	 */
-	REG_GET(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, &value);
-	if (value == 0 || value == 1) {
-		if (abm->stored_backlight_registers.BL_PWM_CNTL != 0) {
-			REG_WRITE(BL_PWM_CNTL,
-				abm->stored_backlight_registers.BL_PWM_CNTL);
-			REG_WRITE(BL_PWM_CNTL2,
-				abm->stored_backlight_registers.BL_PWM_CNTL2);
-			REG_WRITE(BL_PWM_PERIOD_CNTL,
-				abm->stored_backlight_registers.BL_PWM_PERIOD_CNTL);
-			REG_UPDATE(LVTMA_PWRSEQ_REF_DIV,
-				BL_PWM_REF_DIV,
-				abm->stored_backlight_registers.
-				LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV);
-		} else {
-			/* TODO: Note: This should not really happen since VBIOS
-			 * should have initialized PWM registers on boot.
-			 */
-			REG_WRITE(BL_PWM_CNTL, 0xC000FA00);
-			REG_WRITE(BL_PWM_PERIOD_CNTL, 0x000C0FA0);
-		}
-	} else {
-		abm->stored_backlight_registers.BL_PWM_CNTL =
-				REG_READ(BL_PWM_CNTL);
-		abm->stored_backlight_registers.BL_PWM_CNTL2 =
-				REG_READ(BL_PWM_CNTL2);
-		abm->stored_backlight_registers.BL_PWM_PERIOD_CNTL =
-				REG_READ(BL_PWM_PERIOD_CNTL);
-
-		REG_GET(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV,
-				&abm->stored_backlight_registers.
-				LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV);
-	}
-
-	/* Have driver take backlight control
-	 * TakeBacklightControl(true)
-	 */
-	value = REG_READ(BIOS_SCRATCH_2);
-	value |= ATOM_S2_VRI_BRIGHT_ENABLE;
-	REG_WRITE(BIOS_SCRATCH_2, value);
-
-	/* Enable the backlight output */
-	REG_UPDATE(BL_PWM_CNTL, BL_PWM_EN, 1);
-
-	/* Disable fractional pwm if configured */
-	REG_UPDATE(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN,
-		   abm->ctx->dc->config.disable_fractional_pwm ? 0 : 1);
-
-	/* Unlock group 2 backlight registers */
-	REG_UPDATE(BL_PWM_GRP1_REG_LOCK,
-			BL_PWM_GRP1_REG_LOCK, 0);
+	dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY, panel_inst);
 
 	return true;
 }
@@ -420,21 +232,18 @@ static bool dce_abm_set_backlight_level_pwm(
 		unsigned int backlight_pwm_u16_16,
 		unsigned int frame_ramp,
 		unsigned int controller_id,
-		bool use_smooth_brightness)
+		unsigned int panel_inst)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
 
 	DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n",
 			backlight_pwm_u16_16, backlight_pwm_u16_16);
 
-	/* If DMCU is in reset state, DMCU is uninitialized */
-	if (use_smooth_brightness)
-		dmcu_set_backlight_level(abm_dce,
-				backlight_pwm_u16_16,
-				frame_ramp,
-				controller_id);
-	else
-		driver_set_backlight_level(abm_dce, backlight_pwm_u16_16);
+	dmcu_set_backlight_level(abm_dce,
+			backlight_pwm_u16_16,
+			frame_ramp,
+			controller_id,
+			panel_inst);
 
 	return true;
 }
@@ -442,12 +251,12 @@ static bool dce_abm_set_backlight_level_pwm(
 static const struct abm_funcs dce_funcs = {
 	.abm_init = dce_abm_init,
 	.set_abm_level = dce_abm_set_level,
-	.init_backlight = dce_abm_init_backlight,
 	.set_pipe = dce_abm_set_pipe,
 	.set_backlight_level_pwm = dce_abm_set_backlight_level_pwm,
 	.get_current_backlight = dce_abm_get_current_backlight,
 	.get_target_backlight = dce_abm_get_target_backlight,
-	.set_abm_immediate_disable = dce_abm_immediate_disable
+	.init_abm_config = NULL,
+	.set_abm_immediate_disable = dce_abm_immediate_disable,
 };
 
 static void dce_abm_construct(
@@ -461,10 +270,6 @@ static void dce_abm_construct(
 
 	base->ctx = ctx;
 	base->funcs = &dce_funcs;
-	base->stored_backlight_registers.BL_PWM_CNTL = 0;
-	base->stored_backlight_registers.BL_PWM_CNTL2 = 0;
-	base->stored_backlight_registers.BL_PWM_PERIOD_CNTL = 0;
-	base->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV = 0;
 	base->dmcu_is_running = false;
 
 	abm_dce->regs = regs;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
index ba0caaffa24b..9718a4823372 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
@@ -30,11 +30,6 @@
 #include "abm.h"
 
 #define ABM_COMMON_REG_LIST_DCE_BASE() \
-	SR(BL_PWM_PERIOD_CNTL), \
-	SR(BL_PWM_CNTL), \
-	SR(BL_PWM_CNTL2), \
-	SR(BL_PWM_GRP1_REG_LOCK), \
-	SR(LVTMA_PWRSEQ_REF_DIV), \
 	SR(MASTER_COMM_CNTL_REG), \
 	SR(MASTER_COMM_CMD_REG), \
 	SR(MASTER_COMM_DATA_REG1)
@@ -85,15 +80,6 @@
 	.field_name = reg_name ## __ ## field_name ## post_fix
 
 #define ABM_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh) \
-	ABM_SF(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, mask_sh), \
-	ABM_SF(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, mask_sh), \
-	ABM_SF(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, mask_sh), \
-	ABM_SF(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, mask_sh), \
-	ABM_SF(BL_PWM_CNTL, BL_PWM_EN, mask_sh), \
-	ABM_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN, mask_sh), \
-	ABM_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_LOCK, mask_sh), \
-	ABM_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_UPDATE_PENDING, mask_sh), \
-	ABM_SF(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV, mask_sh), \
 	ABM_SF(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, mask_sh), \
 	ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0, mask_sh), \
 	ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE1, mask_sh), \
@@ -178,19 +164,10 @@
 	type ABM1_HG_REG_READ_MISSED_FRAME_CLEAR; \
 	type ABM1_LS_REG_READ_MISSED_FRAME_CLEAR; \
 	type ABM1_BL_REG_READ_MISSED_FRAME_CLEAR; \
-	type BL_PWM_PERIOD; \
-	type BL_PWM_PERIOD_BITCNT; \
-	type BL_ACTIVE_INT_FRAC_CNT; \
-	type BL_PWM_FRACTIONAL_EN; \
 	type MASTER_COMM_INTERRUPT; \
 	type MASTER_COMM_CMD_REG_BYTE0; \
 	type MASTER_COMM_CMD_REG_BYTE1; \
-	type MASTER_COMM_CMD_REG_BYTE2; \
-	type BL_PWM_REF_DIV; \
-	type BL_PWM_EN; \
-	type BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN; \
-	type BL_PWM_GRP1_REG_LOCK; \
-	type BL_PWM_GRP1_REG_UPDATE_PENDING
+	type MASTER_COMM_CMD_REG_BYTE2
 
 struct dce_abm_shift {
 	ABM_REG_FIELD_LIST(uint8_t);
@@ -201,10 +178,6 @@ struct dce_abm_mask {
 };
 
 struct dce_abm_registers {
-	uint32_t BL_PWM_PERIOD_CNTL;
-	uint32_t BL_PWM_CNTL;
-	uint32_t BL_PWM_CNTL2;
-	uint32_t LVTMA_PWRSEQ_REF_DIV;
 	uint32_t DC_ABM1_HG_SAMPLE_RATE;
 	uint32_t DC_ABM1_LS_SAMPLE_RATE;
 	uint32_t BL1_PWM_BL_UPDATE_SAMPLE_RATE;
@@ -219,7 +192,6 @@ struct dce_abm_registers {
 	uint32_t MASTER_COMM_CMD_REG;
 	uint32_t MASTER_COMM_DATA_REG1;
 	uint32_t BIOS_SCRATCH_2;
-	uint32_t BL_PWM_GRP1_REG_LOCK;
 };
 
 struct dce_abm {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 2e992fbc0d71..d2ad0504b0de 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -1014,39 +1014,6 @@ struct pixel_rate_range_table_entry {
 	unsigned short div_factor;
 };
 
-static const struct pixel_rate_range_table_entry video_optimized_pixel_rates[] = {
-	// /1.001 rates
-	{25170, 25180, 25200, 1000, 1001},	//25.2MHz   ->   25.17
-	{59340, 59350, 59400, 1000, 1001},	//59.4Mhz   ->   59.340
-	{74170, 74180, 74250, 1000, 1001},	//74.25Mhz  ->   74.1758
-	{125870, 125880, 126000, 1000, 1001},	//126Mhz    ->  125.87
-	{148350, 148360, 148500, 1000, 1001},	//148.5Mhz  ->  148.3516
-	{167830, 167840, 168000, 1000, 1001},	//168Mhz    ->  167.83
-	{222520, 222530, 222750, 1000, 1001},	//222.75Mhz ->  222.527
-	{257140, 257150, 257400, 1000, 1001},	//257.4Mhz  ->  257.1429
-	{296700, 296710, 297000, 1000, 1001},	//297Mhz    ->  296.7033
-	{342850, 342860, 343200, 1000, 1001},	//343.2Mhz  ->  342.857
-	{395600, 395610, 396000, 1000, 1001},	//396Mhz    ->  395.6
-	{409090, 409100, 409500, 1000, 1001},	//409.5Mhz  ->  409.091
-	{445050, 445060, 445500, 1000, 1001},	//445.5Mhz  ->  445.055
-	{467530, 467540, 468000, 1000, 1001},	//468Mhz    ->  467.5325
-	{519230, 519240, 519750, 1000, 1001},	//519.75Mhz ->  519.231
-	{525970, 525980, 526500, 1000, 1001},	//526.5Mhz  ->  525.974
-	{545450, 545460, 546000, 1000, 1001},	//546Mhz    ->  545.455
-	{593400, 593410, 594000, 1000, 1001},	//594Mhz    ->  593.4066
-	{623370, 623380, 624000, 1000, 1001},	//624Mhz    ->  623.377
-	{692300, 692310, 693000, 1000, 1001},	//693Mhz    ->  692.308
-	{701290, 701300, 702000, 1000, 1001},	//702Mhz    ->  701.2987
-	{791200, 791210, 792000, 1000, 1001},	//792Mhz    ->  791.209
-	{890100, 890110, 891000, 1000, 1001},	//891Mhz    ->  890.1099
-	{1186810, 1186820, 1188000, 1000, 1001},//1188Mhz   -> 1186.8131
-
-	// *1.001 rates
-	{27020, 27030, 27000, 1001, 1000}, //27Mhz
-	{54050, 54060, 54000, 1001, 1000}, //54Mhz
-	{108100, 108110, 108000, 1001, 1000},//108Mhz
-};
-
 static bool dcn20_program_pix_clk(
 		struct clock_source *clock_source,
 		struct pixel_clk_params *pix_clk_params,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
index c5aa1f48593a..5479d959ec62 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
@@ -27,10 +27,6 @@
 
 #include "dc_types.h"
 
-#define BL_REG_LIST()\
-	SR(LVTMA_PWRSEQ_CNTL), \
-	SR(LVTMA_PWRSEQ_STATE)
-
 #define HWSEQ_DCEF_REG_LIST_DCE8() \
 	.DCFE_CLOCK_CONTROL[0] = mmCRTC0_CRTC_DCFE_CLOCK_CONTROL, \
 	.DCFE_CLOCK_CONTROL[1] = mmCRTC1_CRTC_DCFE_CLOCK_CONTROL, \
@@ -94,20 +90,17 @@
 	SRII(BLND_CONTROL, BLND, 0),\
 	SRII(BLND_CONTROL, BLND, 1),\
 	SR(BLNDV_CONTROL),\
-	HWSEQ_PIXEL_RATE_REG_LIST(CRTC),\
-	BL_REG_LIST()
+	HWSEQ_PIXEL_RATE_REG_LIST(CRTC)
 
 #define HWSEQ_DCE8_REG_LIST() \
 	HWSEQ_DCEF_REG_LIST_DCE8(), \
 	HWSEQ_BLND_REG_LIST(), \
-	HWSEQ_PIXEL_RATE_REG_LIST(CRTC),\
-	BL_REG_LIST()
+	HWSEQ_PIXEL_RATE_REG_LIST(CRTC)
 
 #define HWSEQ_DCE10_REG_LIST() \
 	HWSEQ_DCEF_REG_LIST(), \
 	HWSEQ_BLND_REG_LIST(), \
-	HWSEQ_PIXEL_RATE_REG_LIST(CRTC), \
-	BL_REG_LIST()
+	HWSEQ_PIXEL_RATE_REG_LIST(CRTC)
 
 #define HWSEQ_ST_REG_LIST() \
 	HWSEQ_DCE11_REG_LIST_BASE(), \
@@ -134,8 +127,7 @@
 	SR(DCHUB_FB_LOCATION),\
 	SR(DCHUB_AGP_BASE),\
 	SR(DCHUB_AGP_BOT),\
-	SR(DCHUB_AGP_TOP), \
-	BL_REG_LIST()
+	SR(DCHUB_AGP_TOP)
 
 #define HWSEQ_VG20_REG_LIST() \
 	HWSEQ_DCE120_REG_LIST(),\
@@ -144,8 +136,7 @@
 #define HWSEQ_DCE112_REG_LIST() \
 	HWSEQ_DCE10_REG_LIST(), \
 	HWSEQ_PIXEL_RATE_REG_LIST(CRTC), \
-	HWSEQ_PHYPLL_REG_LIST(CRTC), \
-	BL_REG_LIST()
+	HWSEQ_PHYPLL_REG_LIST(CRTC)
 
 #define HWSEQ_DCN_REG_LIST()\
 	SR(REFCLK_CNTL), \
@@ -207,8 +198,7 @@
 	SR(D3VGA_CONTROL), \
 	SR(D4VGA_CONTROL), \
 	SR(VGA_TEST_CONTROL), \
-	SR(DC_IP_REQUEST_CNTL), \
-	BL_REG_LIST()
+	SR(DC_IP_REQUEST_CNTL)
 
 #define HWSEQ_DCN2_REG_LIST()\
 	HWSEQ_DCN_REG_LIST(), \
@@ -273,8 +263,7 @@
 	SR(D4VGA_CONTROL), \
 	SR(D5VGA_CONTROL), \
 	SR(D6VGA_CONTROL), \
-	SR(DC_IP_REQUEST_CNTL), \
-	BL_REG_LIST()
+	SR(DC_IP_REQUEST_CNTL)
 
 #define HWSEQ_DCN21_REG_LIST()\
 	HWSEQ_DCN_REG_LIST(), \
@@ -324,15 +313,9 @@
 	SR(D4VGA_CONTROL), \
 	SR(D5VGA_CONTROL), \
 	SR(D6VGA_CONTROL), \
-	SR(DC_IP_REQUEST_CNTL), \
-	BL_REG_LIST()
+	SR(DC_IP_REQUEST_CNTL)
 
 struct dce_hwseq_registers {
-
-		/* Backlight registers */
-	uint32_t LVTMA_PWRSEQ_CNTL;
-	uint32_t LVTMA_PWRSEQ_STATE;
-
 	uint32_t DCFE_CLOCK_CONTROL[6];
 	uint32_t DCFEV_CLOCK_CONTROL;
 	uint32_t DC_MEM_GLOBAL_PWR_REQ_CNTL;
@@ -465,26 +448,18 @@ struct dce_hwseq_registers {
 	HWS_SF1(blk, PHYPLL_PIXEL_RATE_CNTL, PHYPLL_PIXEL_RATE_SOURCE, mask_sh),\
 	HWS_SF1(blk, PHYPLL_PIXEL_RATE_CNTL, PIXEL_RATE_PLL_SOURCE, mask_sh)
 
-#define HWSEQ_LVTMA_MASK_SH_LIST(mask_sh)\
-	HWS_SF(, LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh),\
-	HWS_SF(, LVTMA_PWRSEQ_CNTL, LVTMA_DIGON, mask_sh),\
-	HWS_SF(, LVTMA_PWRSEQ_CNTL, LVTMA_DIGON_OVRD, mask_sh),\
-	HWS_SF(, LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, mask_sh)
-
 #define HWSEQ_DCE8_MASK_SH_LIST(mask_sh)\
 	.DCFE_CLOCK_ENABLE = CRTC_DCFE_CLOCK_CONTROL__CRTC_DCFE_CLOCK_ENABLE ## mask_sh, \
 	HWS_SF(BLND_, V_UPDATE_LOCK, BLND_DCP_GRPH_V_UPDATE_LOCK, mask_sh),\
 	HWS_SF(BLND_, V_UPDATE_LOCK, BLND_SCL_V_UPDATE_LOCK, mask_sh),\
 	HWS_SF(BLND_, V_UPDATE_LOCK, BLND_DCP_GRPH_SURF_V_UPDATE_LOCK, mask_sh),\
 	HWS_SF(BLND_, CONTROL, BLND_MODE, mask_sh),\
-	HWSEQ_PIXEL_RATE_MASK_SH_LIST(mask_sh, CRTC0_),\
-	HWSEQ_LVTMA_MASK_SH_LIST(mask_sh)
+	HWSEQ_PIXEL_RATE_MASK_SH_LIST(mask_sh, CRTC0_)
 
 #define HWSEQ_DCE10_MASK_SH_LIST(mask_sh)\
 	HWSEQ_DCEF_MASK_SH_LIST(mask_sh, DCFE_),\
 	HWSEQ_BLND_MASK_SH_LIST(mask_sh, BLND_),\
-	HWSEQ_PIXEL_RATE_MASK_SH_LIST(mask_sh, CRTC0_),\
-	HWSEQ_LVTMA_MASK_SH_LIST(mask_sh)
+	HWSEQ_PIXEL_RATE_MASK_SH_LIST(mask_sh, CRTC0_)
 
 #define HWSEQ_DCE11_MASK_SH_LIST(mask_sh)\
 	HWSEQ_DCE10_MASK_SH_LIST(mask_sh),\
@@ -507,8 +482,7 @@ struct dce_hwseq_registers {
 	HWSEQ_BLND_MASK_SH_LIST(mask_sh, BLND0_BLND_),\
 	HWSEQ_PIXEL_RATE_MASK_SH_LIST(mask_sh, CRTC0_),\
 	HWSEQ_PHYPLL_MASK_SH_LIST(mask_sh, CRTC0_),\
-	HWSEQ_GFX9_DCHUB_MASK_SH_LIST(mask_sh),\
-	HWSEQ_LVTMA_MASK_SH_LIST(mask_sh)
+	HWSEQ_GFX9_DCHUB_MASK_SH_LIST(mask_sh)
 
 #define HWSEQ_VG20_MASK_SH_LIST(mask_sh)\
 	HWSEQ_DCE12_MASK_SH_LIST(mask_sh),\
@@ -570,8 +544,7 @@ struct dce_hwseq_registers {
 	HWS_SF(, D3VGA_CONTROL, D3VGA_MODE_ENABLE, mask_sh),\
 	HWS_SF(, D4VGA_CONTROL, D4VGA_MODE_ENABLE, mask_sh),\
 	HWS_SF(, VGA_TEST_CONTROL, VGA_TEST_ENABLE, mask_sh),\
-	HWS_SF(, VGA_TEST_CONTROL, VGA_TEST_RENDER_START, mask_sh),\
-	HWSEQ_LVTMA_MASK_SH_LIST(mask_sh)
+	HWS_SF(, VGA_TEST_CONTROL, VGA_TEST_RENDER_START, mask_sh)
 
 #define HWSEQ_DCN2_MASK_SH_LIST(mask_sh)\
 	HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
@@ -630,8 +603,7 @@ struct dce_hwseq_registers {
 	HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN19_PGFSM_PWR_STATUS, mask_sh), \
 	HWS_SF(, DOMAIN20_PG_STATUS, DOMAIN20_PGFSM_PWR_STATUS, mask_sh), \
 	HWS_SF(, DOMAIN21_PG_STATUS, DOMAIN21_PGFSM_PWR_STATUS, mask_sh), \
-	HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
-	HWSEQ_LVTMA_MASK_SH_LIST(mask_sh)
+	HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh)
 
 #define HWSEQ_DCN21_MASK_SH_LIST(mask_sh)\
 	HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
@@ -671,10 +643,7 @@ struct dce_hwseq_registers {
 	HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN16_PGFSM_PWR_STATUS, mask_sh), \
 	HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN17_PGFSM_PWR_STATUS, mask_sh), \
 	HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN18_PGFSM_PWR_STATUS, mask_sh), \
-	HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
-	HWSEQ_LVTMA_MASK_SH_LIST(mask_sh), \
-	HWS_SF(, LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh), \
-	HWS_SF(, LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, mask_sh)
+	HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh)
 
 #define HWSEQ_REG_FIELD_LIST(type) \
 	type DCFE_CLOCK_ENABLE; \
@@ -706,11 +675,7 @@ struct dce_hwseq_registers {
 	type PF_LFB_REGION;\
 	type PF_MAX_REGION;\
 	type ENABLE_L1_TLB;\
-	type SYSTEM_ACCESS_MODE;\
-	type LVTMA_BLON;\
-	type LVTMA_DIGON;\
-	type LVTMA_DIGON_OVRD;\
-	type LVTMA_PWRSEQ_TARGET_STATE_R;
+	type SYSTEM_ACCESS_MODE;
 
 #define HWSEQ_DCN_REG_FIELD_LIST(type) \
 	type HUBP_VTG_SEL; \
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
index 8527cce81c6f..8d8c84c81b34 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
@@ -118,7 +118,8 @@ static const struct link_encoder_funcs dce110_lnk_enc_funcs = {
 	.enable_hpd = dce110_link_encoder_enable_hpd,
 	.disable_hpd = dce110_link_encoder_disable_hpd,
 	.is_dig_enabled = dce110_is_dig_enabled,
-	.destroy = dce110_link_encoder_destroy
+	.destroy = dce110_link_encoder_destroy,
+	.get_max_link_cap = dce110_link_encoder_get_max_link_cap
 };
 
 static enum bp_result link_transmitter_control(
@@ -1389,3 +1390,20 @@ void dce110_link_encoder_disable_hpd(struct link_encoder *enc)
 
 	set_reg_field_value(value, 0, DC_HPD_CONTROL, DC_HPD_EN);
 }
+
+void dce110_link_encoder_get_max_link_cap(struct link_encoder *enc,
+	struct dc_link_settings *link_settings)
+{
+	/* Set Default link settings */
+	struct dc_link_settings max_link_cap = {LANE_COUNT_FOUR, LINK_RATE_HIGH,
+			LINK_SPREAD_05_DOWNSPREAD_30KHZ, false, 0};
+
+	/* Higher link settings based on feature supported */
+	if (enc->features.flags.bits.IS_HBR2_CAPABLE)
+		max_link_cap.link_rate = LINK_RATE_HIGH2;
+
+	if (enc->features.flags.bits.IS_HBR3_CAPABLE)
+		max_link_cap.link_rate = LINK_RATE_HIGH3;
+
+	*link_settings = max_link_cap;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
index 3c9368df4093..384389f0e2c3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
@@ -271,4 +271,7 @@ void dce110_psr_program_secondary_packet(struct link_encoder *enc,
 
 bool dce110_is_dig_enabled(struct link_encoder *enc);
 
+void dce110_link_encoder_get_max_link_cap(struct link_encoder *enc,
+	struct dc_link_settings *link_settings);
+
 #endif /* __DC_LINK_ENCODER__DCE110_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
new file mode 100644
index 000000000000..ebff9b1e312e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dc_dmub_srv.h"
+#include "panel_cntl.h"
+#include "dce_panel_cntl.h"
+#include "atom.h"
+
+#define TO_DCE_PANEL_CNTL(panel_cntl)\
+	container_of(panel_cntl, struct dce_panel_cntl, base)
+
+#define CTX \
+	dce_panel_cntl->base.ctx
+
+#define DC_LOGGER \
+	dce_panel_cntl->base.ctx->logger
+
+#define REG(reg)\
+	dce_panel_cntl->regs->reg
+
+#undef FN
+#define FN(reg_name, field_name) \
+	dce_panel_cntl->shift->field_name, dce_panel_cntl->mask->field_name
+
+static unsigned int calculate_16_bit_backlight_from_pwm(struct dce_panel_cntl *dce_panel_cntl)
+{
+	uint64_t current_backlight;
+	uint32_t round_result;
+	uint32_t pwm_period_cntl, bl_period, bl_int_count;
+	uint32_t bl_pwm_cntl, bl_pwm, fractional_duty_cycle_en;
+	uint32_t bl_period_mask, bl_pwm_mask;
+
+	pwm_period_cntl = REG_READ(BL_PWM_PERIOD_CNTL);
+	REG_GET(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, &bl_period);
+	REG_GET(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, &bl_int_count);
+
+	bl_pwm_cntl = REG_READ(BL_PWM_CNTL);
+	REG_GET(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, (uint32_t *)(&bl_pwm));
+	REG_GET(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, &fractional_duty_cycle_en);
+
+	if (bl_int_count == 0)
+		bl_int_count = 16;
+
+	bl_period_mask = (1 << bl_int_count) - 1;
+	bl_period &= bl_period_mask;
+
+	bl_pwm_mask = bl_period_mask << (16 - bl_int_count);
+
+	if (fractional_duty_cycle_en == 0)
+		bl_pwm &= bl_pwm_mask;
+	else
+		bl_pwm &= 0xFFFF;
+
+	current_backlight = bl_pwm << (1 + bl_int_count);
+
+	if (bl_period == 0)
+		bl_period = 0xFFFF;
+
+	current_backlight = div_u64(current_backlight, bl_period);
+	current_backlight = (current_backlight + 1) >> 1;
+
+	current_backlight = (uint64_t)(current_backlight) * bl_period;
+
+	round_result = (uint32_t)(current_backlight & 0xFFFFFFFF);
+
+	round_result = (round_result >> (bl_int_count-1)) & 1;
+
+	current_backlight >>= bl_int_count;
+	current_backlight += round_result;
+
+	return (uint32_t)(current_backlight);
+}
+
+uint32_t dce_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
+{
+	struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl);
+	uint32_t value;
+	uint32_t current_backlight;
+
+	/* It must not be 0, so we have to restore them
+	 * Bios bug w/a - period resets to zero,
+	 * restoring to cache values which is always correct
+	 */
+	REG_GET(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, &value);
+
+	if (value == 0 || value == 1) {
+		if (panel_cntl->stored_backlight_registers.BL_PWM_CNTL != 0) {
+			REG_WRITE(BL_PWM_CNTL,
+					panel_cntl->stored_backlight_registers.BL_PWM_CNTL);
+			REG_WRITE(BL_PWM_CNTL2,
+					panel_cntl->stored_backlight_registers.BL_PWM_CNTL2);
+			REG_WRITE(BL_PWM_PERIOD_CNTL,
+					panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL);
+			REG_UPDATE(PWRSEQ_REF_DIV,
+				BL_PWM_REF_DIV,
+				panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV);
+		} else {
+			/* TODO: Note: This should not really happen since VBIOS
+			 * should have initialized PWM registers on boot.
+			 */
+			REG_WRITE(BL_PWM_CNTL, 0xC000FA00);
+			REG_WRITE(BL_PWM_PERIOD_CNTL, 0x000C0FA0);
+		}
+	} else {
+		panel_cntl->stored_backlight_registers.BL_PWM_CNTL =
+				REG_READ(BL_PWM_CNTL);
+		panel_cntl->stored_backlight_registers.BL_PWM_CNTL2 =
+				REG_READ(BL_PWM_CNTL2);
+		panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL =
+				REG_READ(BL_PWM_PERIOD_CNTL);
+
+		REG_GET(PWRSEQ_REF_DIV, BL_PWM_REF_DIV,
+				&panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV);
+	}
+
+	// Have driver take backlight control
+	// TakeBacklightControl(true)
+	value = REG_READ(BIOS_SCRATCH_2);
+	value |= ATOM_S2_VRI_BRIGHT_ENABLE;
+	REG_WRITE(BIOS_SCRATCH_2, value);
+
+	// Enable the backlight output
+	REG_UPDATE(BL_PWM_CNTL, BL_PWM_EN, 1);
+
+	// Unlock group 2 backlight registers
+	REG_UPDATE(BL_PWM_GRP1_REG_LOCK,
+			BL_PWM_GRP1_REG_LOCK, 0);
+
+	current_backlight = calculate_16_bit_backlight_from_pwm(dce_panel_cntl);
+
+	return current_backlight;
+}
+
+bool dce_is_panel_backlight_on(struct panel_cntl *panel_cntl)
+{
+	struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl);
+	uint32_t value;
+
+	REG_GET(PWRSEQ_CNTL, LVTMA_BLON, &value);
+
+	return value;
+}
+
+bool dce_is_panel_powered_on(struct panel_cntl *panel_cntl)
+{
+	struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl);
+	uint32_t pwr_seq_state, dig_on, dig_on_ovrd;
+
+	REG_GET(PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, &pwr_seq_state);
+
+	REG_GET_2(PWRSEQ_CNTL, LVTMA_DIGON, &dig_on, LVTMA_DIGON_OVRD, &dig_on_ovrd);
+
+	return (pwr_seq_state == 1) || (dig_on == 1 && dig_on_ovrd == 1);
+}
+
+void dce_store_backlight_level(struct panel_cntl *panel_cntl)
+{
+	struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl);
+
+	panel_cntl->stored_backlight_registers.BL_PWM_CNTL =
+		REG_READ(BL_PWM_CNTL);
+	panel_cntl->stored_backlight_registers.BL_PWM_CNTL2 =
+		REG_READ(BL_PWM_CNTL2);
+	panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL =
+		REG_READ(BL_PWM_PERIOD_CNTL);
+
+	REG_GET(PWRSEQ_REF_DIV, BL_PWM_REF_DIV,
+		&panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV);
+}
+
+void dce_driver_set_backlight(struct panel_cntl *panel_cntl,
+		uint32_t backlight_pwm_u16_16)
+{
+	uint32_t backlight_16bit;
+	uint32_t masked_pwm_period;
+	uint8_t bit_count;
+	uint64_t active_duty_cycle;
+	uint32_t pwm_period_bitcnt;
+	struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl);
+
+	/*
+	 * 1. Find  16 bit backlight active duty cycle, where 0 <= backlight
+	 * active duty cycle <= backlight period
+	 */
+
+	/* 1.1 Apply bitmask for backlight period value based on value of BITCNT
+	 */
+	REG_GET_2(BL_PWM_PERIOD_CNTL,
+			BL_PWM_PERIOD_BITCNT, &pwm_period_bitcnt,
+			BL_PWM_PERIOD, &masked_pwm_period);
+
+	if (pwm_period_bitcnt == 0)
+		bit_count = 16;
+	else
+		bit_count = pwm_period_bitcnt;
+
+	/* e.g. maskedPwmPeriod = 0x24 when bitCount is 6 */
+	masked_pwm_period = masked_pwm_period & ((1 << bit_count) - 1);
+
+	/* 1.2 Calculate integer active duty cycle required upper 16 bits
+	 * contain integer component, lower 16 bits contain fractional component
+	 * of active duty cycle e.g. 0x21BDC0 = 0xEFF0 * 0x24
+	 */
+	active_duty_cycle = backlight_pwm_u16_16 * masked_pwm_period;
+
+	/* 1.3 Calculate 16 bit active duty cycle from integer and fractional
+	 * components shift by bitCount then mask 16 bits and add rounding bit
+	 * from MSB of fraction e.g. 0x86F7 = ((0x21BDC0 >> 6) & 0xFFF) + 0
+	 */
+	backlight_16bit = active_duty_cycle >> bit_count;
+	backlight_16bit &= 0xFFFF;
+	backlight_16bit += (active_duty_cycle >> (bit_count - 1)) & 0x1;
+
+	/*
+	 * 2. Program register with updated value
+	 */
+
+	/* 2.1 Lock group 2 backlight registers */
+
+	REG_UPDATE_2(BL_PWM_GRP1_REG_LOCK,
+			BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN, 1,
+			BL_PWM_GRP1_REG_LOCK, 1);
+
+	// 2.2 Write new active duty cycle
+	REG_UPDATE(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, backlight_16bit);
+
+	/* 2.3 Unlock group 2 backlight registers */
+	REG_UPDATE(BL_PWM_GRP1_REG_LOCK,
+			BL_PWM_GRP1_REG_LOCK, 0);
+
+	/* 3 Wait for pending bit to be cleared */
+	REG_WAIT(BL_PWM_GRP1_REG_LOCK,
+			BL_PWM_GRP1_REG_UPDATE_PENDING, 0,
+			1, 10000);
+}
+
+static void dce_panel_cntl_destroy(struct panel_cntl **panel_cntl)
+{
+	struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(*panel_cntl);
+
+	kfree(dce_panel_cntl);
+	*panel_cntl = NULL;
+}
+
+static const struct panel_cntl_funcs dce_link_panel_cntl_funcs = {
+	.destroy = dce_panel_cntl_destroy,
+	.hw_init = dce_panel_cntl_hw_init,
+	.is_panel_backlight_on = dce_is_panel_backlight_on,
+	.is_panel_powered_on = dce_is_panel_powered_on,
+	.store_backlight_level = dce_store_backlight_level,
+	.driver_set_backlight = dce_driver_set_backlight,
+};
+
+void dce_panel_cntl_construct(
+	struct dce_panel_cntl *dce_panel_cntl,
+	const struct panel_cntl_init_data *init_data,
+	const struct dce_panel_cntl_registers *regs,
+	const struct dce_panel_cntl_shift *shift,
+	const struct dce_panel_cntl_mask *mask)
+{
+	struct panel_cntl *base = &dce_panel_cntl->base;
+
+	base->stored_backlight_registers.BL_PWM_CNTL = 0;
+	base->stored_backlight_registers.BL_PWM_CNTL2 = 0;
+	base->stored_backlight_registers.BL_PWM_PERIOD_CNTL = 0;
+	base->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV = 0;
+
+	dce_panel_cntl->regs = regs;
+	dce_panel_cntl->shift = shift;
+	dce_panel_cntl->mask = mask;
+
+	dce_panel_cntl->base.funcs = &dce_link_panel_cntl_funcs;
+	dce_panel_cntl->base.ctx = init_data->ctx;
+	dce_panel_cntl->base.inst = init_data->inst;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h
new file mode 100644
index 000000000000..70ec691e14d2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_PANEL_CNTL__DCE_H__
+#define __DC_PANEL_CNTL__DCE_H__
+
+#include "panel_cntl.h"
+
+/* set register offset with instance */
+#define DCE_PANEL_CNTL_SR(reg_name, block)\
+	.reg_name = mm ## block ## _ ## reg_name
+
+#define DCE_PANEL_CNTL_REG_LIST()\
+	DCE_PANEL_CNTL_SR(PWRSEQ_CNTL, LVTMA), \
+	DCE_PANEL_CNTL_SR(PWRSEQ_STATE, LVTMA), \
+	DCE_PANEL_CNTL_SR(PWRSEQ_REF_DIV, LVTMA), \
+	SR(BL_PWM_CNTL), \
+	SR(BL_PWM_CNTL2), \
+	SR(BL_PWM_PERIOD_CNTL), \
+	SR(BL_PWM_GRP1_REG_LOCK), \
+	SR(BIOS_SCRATCH_2)
+
+#define DCN_PANEL_CNTL_SR(reg_name, block)\
+	.reg_name = BASE(mm ## block ## _ ## reg_name ## _BASE_IDX) + \
+					mm ## block ## _ ## reg_name
+
+#define DCN_PANEL_CNTL_REG_LIST()\
+	DCN_PANEL_CNTL_SR(PWRSEQ_CNTL, LVTMA), \
+	DCN_PANEL_CNTL_SR(PWRSEQ_STATE, LVTMA), \
+	DCE_PANEL_CNTL_SR(PWRSEQ_REF_DIV, LVTMA), \
+	SR(BL_PWM_CNTL), \
+	SR(BL_PWM_CNTL2), \
+	SR(BL_PWM_PERIOD_CNTL), \
+	SR(BL_PWM_GRP1_REG_LOCK), \
+	SR(BIOS_SCRATCH_2)
+
+#define DCE_PANEL_CNTL_SF(reg_name, field_name, post_fix)\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
+#define DCE_PANEL_CNTL_MASK_SH_LIST(mask_sh) \
+	DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh),\
+	DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_DIGON, mask_sh),\
+	DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_DIGON_OVRD, mask_sh),\
+	DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, mask_sh), \
+	DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV, mask_sh), \
+	DCE_PANEL_CNTL_SF(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, mask_sh), \
+	DCE_PANEL_CNTL_SF(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, mask_sh), \
+	DCE_PANEL_CNTL_SF(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, mask_sh), \
+	DCE_PANEL_CNTL_SF(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, mask_sh), \
+	DCE_PANEL_CNTL_SF(BL_PWM_CNTL, BL_PWM_EN, mask_sh), \
+	DCE_PANEL_CNTL_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN, mask_sh), \
+	DCE_PANEL_CNTL_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_LOCK, mask_sh), \
+	DCE_PANEL_CNTL_SF(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_UPDATE_PENDING, mask_sh)
+
+#define DCE_PANEL_CNTL_REG_FIELD_LIST(type) \
+	type LVTMA_BLON;\
+	type LVTMA_DIGON;\
+	type LVTMA_DIGON_OVRD;\
+	type LVTMA_PWRSEQ_TARGET_STATE_R; \
+	type BL_PWM_REF_DIV; \
+	type BL_PWM_EN; \
+	type BL_ACTIVE_INT_FRAC_CNT; \
+	type BL_PWM_FRACTIONAL_EN; \
+	type BL_PWM_PERIOD; \
+	type BL_PWM_PERIOD_BITCNT; \
+	type BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN; \
+	type BL_PWM_GRP1_REG_LOCK; \
+	type BL_PWM_GRP1_REG_UPDATE_PENDING
+
+struct dce_panel_cntl_shift {
+	DCE_PANEL_CNTL_REG_FIELD_LIST(uint8_t);
+};
+
+struct dce_panel_cntl_mask {
+	DCE_PANEL_CNTL_REG_FIELD_LIST(uint32_t);
+};
+
+struct dce_panel_cntl_registers {
+	uint32_t PWRSEQ_CNTL;
+	uint32_t PWRSEQ_STATE;
+	uint32_t BL_PWM_CNTL;
+	uint32_t BL_PWM_CNTL2;
+	uint32_t BL_PWM_PERIOD_CNTL;
+	uint32_t BL_PWM_GRP1_REG_LOCK;
+	uint32_t PWRSEQ_REF_DIV;
+	uint32_t BIOS_SCRATCH_2;
+};
+
+struct dce_panel_cntl {
+	struct panel_cntl base;
+	const struct dce_panel_cntl_registers *regs;
+	const struct dce_panel_cntl_shift *shift;
+	const struct dce_panel_cntl_mask *mask;
+};
+
+void dce_panel_cntl_construct(
+	struct dce_panel_cntl *panel_cntl,
+	const struct panel_cntl_init_data *init_data,
+	const struct dce_panel_cntl_registers *regs,
+	const struct dce_panel_cntl_shift *shift,
+	const struct dce_panel_cntl_mask *mask);
+
+#endif /* __DC_PANEL_CNTL__DCE_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index 451574971b96..4cdaaf4d881c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -1336,7 +1336,6 @@ static void dce110_se_audio_setup(
 {
 	struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
 
-	uint32_t speakers = 0;
 	uint32_t channels = 0;
 
 	ASSERT(audio_info);
@@ -1344,7 +1343,6 @@ static void dce110_se_audio_setup(
 		/* This should not happen.it does so we don't get BSOD*/
 		return;
 
-	speakers = audio_info->flags.info.ALLSPEAKERS;
 	channels = speakers_to_channels(audio_info->flags.speaker_flags).all;
 
 	/* setup the audio stream source select (audio -> dig mapping) */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
new file mode 100644
index 000000000000..da0b29abfbda
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dmub_abm.h"
+#include "dce_abm.h"
+#include "dc.h"
+#include "dc_dmub_srv.h"
+#include "dmub/dmub_srv.h"
+#include "core_types.h"
+#include "dm_services.h"
+#include "reg_helper.h"
+#include "fixed31_32.h"
+
+#include "atom.h"
+
+#define TO_DMUB_ABM(abm)\
+	container_of(abm, struct dce_abm, base)
+
+#define REG(reg) \
+	(dce_abm->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+	dce_abm->abm_shift->field_name, dce_abm->abm_mask->field_name
+
+#define CTX \
+	dce_abm->base.ctx
+
+#define DISABLE_ABM_IMMEDIATELY 255
+
+static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t panel_inst)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = abm->ctx;
+	uint32_t ramping_boundary = 0xFFFF;
+
+	cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
+	cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
+	cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
+	cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
+	cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
+	cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+
+	return true;
+}
+
+static void dmcub_set_backlight_level(
+	struct dce_abm *dce_abm,
+	uint32_t backlight_pwm_u16_16,
+	uint32_t frame_ramp,
+	uint32_t otg_inst,
+	uint32_t panel_inst)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = dce_abm->base.ctx;
+	unsigned int backlight_8_bit = 0;
+	uint32_t s2;
+
+	if (backlight_pwm_u16_16 & 0x10000)
+		// Check for max backlight condition
+		backlight_8_bit = 0xFF;
+	else
+		// Take MSB of fractional part since backlight is not max
+		backlight_8_bit = (backlight_pwm_u16_16 >> 8) & 0xFF;
+
+	dmub_abm_set_pipe(&dce_abm->base, otg_inst, panel_inst);
+
+	REG_UPDATE(BL1_PWM_USER_LEVEL, BL1_PWM_USER_LEVEL, backlight_pwm_u16_16);
+
+	if (otg_inst == 0)
+		frame_ramp = 0;
+
+	cmd.abm_set_backlight.header.type = DMUB_CMD__ABM;
+	cmd.abm_set_backlight.header.sub_type = DMUB_CMD__ABM_SET_BACKLIGHT;
+	cmd.abm_set_backlight.abm_set_backlight_data.frame_ramp = frame_ramp;
+	cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+
+	// Update requested backlight level
+	s2 = REG_READ(BIOS_SCRATCH_2);
+
+	s2 &= ~ATOM_S2_CURRENT_BL_LEVEL_MASK;
+	backlight_8_bit &= (ATOM_S2_CURRENT_BL_LEVEL_MASK >>
+				ATOM_S2_CURRENT_BL_LEVEL_SHIFT);
+	s2 |= (backlight_8_bit << ATOM_S2_CURRENT_BL_LEVEL_SHIFT);
+
+	REG_WRITE(BIOS_SCRATCH_2, s2);
+}
+
+static void dmub_abm_enable_fractional_pwm(struct dc_context *dc)
+{
+	union dmub_rb_cmd cmd;
+	uint32_t fractional_pwm = (dc->dc->config.disable_fractional_pwm == false) ? 1 : 0;
+
+	cmd.abm_set_pwm_frac.header.type = DMUB_CMD__ABM;
+	cmd.abm_set_pwm_frac.header.sub_type = DMUB_CMD__ABM_SET_PWM_FRAC;
+	cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.fractional_pwm = fractional_pwm;
+	cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data);
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+}
+
+static void dmub_abm_init(struct abm *abm, uint32_t backlight)
+{
+	struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
+
+	REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x103);
+	REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x101);
+	REG_WRITE(DC_ABM1_LS_SAMPLE_RATE, 0x103);
+	REG_WRITE(DC_ABM1_LS_SAMPLE_RATE, 0x101);
+	REG_WRITE(BL1_PWM_BL_UPDATE_SAMPLE_RATE, 0x101);
+
+	REG_SET_3(DC_ABM1_HG_MISC_CTRL, 0,
+			ABM1_HG_NUM_OF_BINS_SEL, 0,
+			ABM1_HG_VMAX_SEL, 1,
+			ABM1_HG_BIN_BITWIDTH_SIZE_SEL, 0);
+
+	REG_SET_3(DC_ABM1_IPCSC_COEFF_SEL, 0,
+			ABM1_IPCSC_COEFF_SEL_R, 2,
+			ABM1_IPCSC_COEFF_SEL_G, 4,
+			ABM1_IPCSC_COEFF_SEL_B, 2);
+
+	REG_UPDATE(BL1_PWM_CURRENT_ABM_LEVEL,
+			BL1_PWM_CURRENT_ABM_LEVEL, backlight);
+
+	REG_UPDATE(BL1_PWM_TARGET_ABM_LEVEL,
+			BL1_PWM_TARGET_ABM_LEVEL, backlight);
+
+	REG_UPDATE(BL1_PWM_USER_LEVEL,
+			BL1_PWM_USER_LEVEL, backlight);
+
+	REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
+			ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
+			ABM1_LS_MAX_PIXEL_VALUE_THRES, 1000);
+
+	REG_SET_3(DC_ABM1_HGLS_REG_READ_PROGRESS, 0,
+			ABM1_HG_REG_READ_MISSED_FRAME_CLEAR, 1,
+			ABM1_LS_REG_READ_MISSED_FRAME_CLEAR, 1,
+			ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, 1);
+
+	dmub_abm_enable_fractional_pwm(abm->ctx);
+}
+
+static unsigned int dmub_abm_get_current_backlight(struct abm *abm)
+{
+	struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
+	unsigned int backlight = REG_READ(BL1_PWM_CURRENT_ABM_LEVEL);
+
+	/* return backlight in hardware format which is unsigned 17 bits, with
+	 * 1 bit integer and 16 bit fractional
+	 */
+	return backlight;
+}
+
+static unsigned int dmub_abm_get_target_backlight(struct abm *abm)
+{
+	struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
+	unsigned int backlight = REG_READ(BL1_PWM_TARGET_ABM_LEVEL);
+
+	/* return backlight in hardware format which is unsigned 17 bits, with
+	 * 1 bit integer and 16 bit fractional
+	 */
+	return backlight;
+}
+
+static bool dmub_abm_set_level(struct abm *abm, uint32_t level)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = abm->ctx;
+
+	cmd.abm_set_level.header.type = DMUB_CMD__ABM;
+	cmd.abm_set_level.header.sub_type = DMUB_CMD__ABM_SET_LEVEL;
+	cmd.abm_set_level.abm_set_level_data.level = level;
+	cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data);
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+
+	return true;
+}
+
+static bool dmub_abm_immediate_disable(struct abm *abm, uint32_t panel_inst)
+{
+	dmub_abm_set_pipe(abm, DISABLE_ABM_IMMEDIATELY, panel_inst);
+
+	return true;
+}
+
+static bool dmub_abm_set_backlight_level_pwm(
+		struct abm *abm,
+		unsigned int backlight_pwm_u16_16,
+		unsigned int frame_ramp,
+		unsigned int otg_inst,
+		uint32_t panel_inst)
+{
+	struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
+
+	dmcub_set_backlight_level(dce_abm,
+			backlight_pwm_u16_16,
+			frame_ramp,
+			otg_inst,
+			panel_inst);
+
+	return true;
+}
+
+static bool dmub_abm_init_config(struct abm *abm,
+	const char *src,
+	unsigned int bytes)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = abm->ctx;
+
+	// TODO: Optimize by only reading back final 4 bytes
+	dmub_flush_buffer_mem(&dc->dmub_srv->dmub->scratch_mem_fb);
+
+	// Copy iramtable into cw7
+	memcpy(dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, (void *)src, bytes);
+
+	// Fw will copy from cw7 to fw_state
+	cmd.abm_init_config.header.type = DMUB_CMD__ABM;
+	cmd.abm_init_config.header.sub_type = DMUB_CMD__ABM_INIT_CONFIG;
+	cmd.abm_init_config.abm_init_config_data.src.quad_part = dc->dmub_srv->dmub->scratch_mem_fb.gpu_addr;
+	cmd.abm_init_config.abm_init_config_data.bytes = bytes;
+	cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data);
+
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
+	dc_dmub_srv_cmd_execute(dc->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->dmub_srv);
+
+	return true;
+}
+
+static const struct abm_funcs abm_funcs = {
+	.abm_init = dmub_abm_init,
+	.set_abm_level = dmub_abm_set_level,
+	.set_pipe = dmub_abm_set_pipe,
+	.set_backlight_level_pwm = dmub_abm_set_backlight_level_pwm,
+	.get_current_backlight = dmub_abm_get_current_backlight,
+	.get_target_backlight = dmub_abm_get_target_backlight,
+	.set_abm_immediate_disable = dmub_abm_immediate_disable,
+	.init_abm_config = dmub_abm_init_config,
+};
+
+static void dmub_abm_construct(
+	struct dce_abm *abm_dce,
+	struct dc_context *ctx,
+	const struct dce_abm_registers *regs,
+	const struct dce_abm_shift *abm_shift,
+	const struct dce_abm_mask *abm_mask)
+{
+	struct abm *base = &abm_dce->base;
+
+	base->ctx = ctx;
+	base->funcs = &abm_funcs;
+	base->dmcu_is_running = false;
+
+	abm_dce->regs = regs;
+	abm_dce->abm_shift = abm_shift;
+	abm_dce->abm_mask = abm_mask;
+}
+
+struct abm *dmub_abm_create(
+	struct dc_context *ctx,
+	const struct dce_abm_registers *regs,
+	const struct dce_abm_shift *abm_shift,
+	const struct dce_abm_mask *abm_mask)
+{
+	struct dce_abm *abm_dce = kzalloc(sizeof(*abm_dce), GFP_KERNEL);
+
+	if (abm_dce == NULL) {
+		BREAK_TO_DEBUGGER();
+		return NULL;
+	}
+
+	dmub_abm_construct(abm_dce, ctx, regs, abm_shift, abm_mask);
+
+	return &abm_dce->base;
+}
+
+void dmub_abm_destroy(struct abm **abm)
+{
+	struct dce_abm *abm_dce = TO_DMUB_ABM(*abm);
+
+	kfree(abm_dce);
+	*abm = NULL;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.h
index 26583f346c39..3a5d5ac7a86e 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-16 Advanced Micro Devices, Inc.
+ * Copyright 2019 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -23,17 +23,18 @@
  *
  */
 
-#include "core_types.h"
-#include "logger.h"
-#include "include/logger_interface.h"
-#include "dm_helpers.h"
+#ifndef __DMUB_ABM_H__
+#define __DMUB_ABM_H__
 
-void dc_conn_log_hex_linux(const uint8_t *hex_data, int hex_data_count)
-{
-	int i;
+#include "abm.h"
+#include "dce_abm.h"
 
-	if (hex_data)
-		for (i = 0; i < hex_data_count; i++)
-			DC_LOG_DEBUG("%2.2X ", hex_data[i]);
-}
+struct abm *dmub_abm_create(
+	struct dc_context *ctx,
+	const struct dce_abm_registers *regs,
+	const struct dce_abm_shift *abm_shift,
+	const struct dce_abm_mask *abm_mask);
 
+void dmub_abm_destroy(struct abm **abm);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
index bc109d4fc6e6..044a0133ebb1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -26,13 +26,51 @@
 #include "dmub_psr.h"
 #include "dc.h"
 #include "dc_dmub_srv.h"
-#include "../../dmub/inc/dmub_srv.h"
-#include "../../dmub/inc/dmub_gpint_cmd.h"
+#include "dmub/dmub_srv.h"
 #include "core_types.h"
 
 #define MAX_PIPES 6
 
 /**
+ * Convert dmcub psr state to dmcu psr state.
+ */
+static void convert_psr_state(uint32_t *psr_state)
+{
+	if (*psr_state == 0)
+		*psr_state = 0;
+	else if (*psr_state == 0x10)
+		*psr_state = 1;
+	else if (*psr_state == 0x11)
+		*psr_state = 2;
+	else if (*psr_state == 0x20)
+		*psr_state = 3;
+	else if (*psr_state == 0x21)
+		*psr_state = 4;
+	else if (*psr_state == 0x30)
+		*psr_state = 5;
+	else if (*psr_state == 0x31)
+		*psr_state = 6;
+	else if (*psr_state == 0x40)
+		*psr_state = 7;
+	else if (*psr_state == 0x41)
+		*psr_state = 8;
+	else if (*psr_state == 0x42)
+		*psr_state = 9;
+	else if (*psr_state == 0x43)
+		*psr_state = 10;
+	else if (*psr_state == 0x44)
+		*psr_state = 11;
+	else if (*psr_state == 0x50)
+		*psr_state = 12;
+	else if (*psr_state == 0x51)
+		*psr_state = 13;
+	else if (*psr_state == 0x52)
+		*psr_state = 14;
+	else if (*psr_state == 0x53)
+		*psr_state = 15;
+}
+
+/**
  * Get PSR state from firmware.
  */
 static void dmub_psr_get_state(struct dmub_psr *dmub, uint32_t *psr_state)
@@ -43,6 +81,8 @@ static void dmub_psr_get_state(struct dmub_psr *dmub, uint32_t *psr_state)
 	dmub_srv_send_gpint_command(srv, DMUB_GPINT__GET_PSR_STATE, 0, 30);
 
 	dmub_srv_get_gpint_response(srv, psr_state);
+
+	convert_psr_state(psr_state);
 }
 
 /**
@@ -53,19 +93,23 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state *
 	union dmub_rb_cmd cmd;
 	struct dc_context *dc = dmub->ctx;
 
+	if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED)
+		return false;
+
 	cmd.psr_set_version.header.type = DMUB_CMD__PSR;
 	cmd.psr_set_version.header.sub_type = DMUB_CMD__PSR_SET_VERSION;
-
-	if (stream->psr_version == 0x0) // Unsupported
-		return false;
-	else if (stream->psr_version == 0x1)
+	switch (stream->link->psr_settings.psr_version) {
+	case DC_PSR_VERSION_1:
 		cmd.psr_set_version.psr_set_version_data.version = PSR_VERSION_1;
-	else if (stream->psr_version == 0x2)
-		cmd.psr_set_version.psr_set_version_data.version = PSR_VERSION_2;
-
-	cmd.psr_enable.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data);
+		break;
+	case DC_PSR_VERSION_UNSUPPORTED:
+	default:
+		cmd.psr_set_version.psr_set_version_data.version = PSR_VERSION_UNSUPPORTED;
+		break;
+	}
+	cmd.psr_set_version.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data);
 
-	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_enable.header);
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
 	dc_dmub_srv_cmd_execute(dc->dmub_srv);
 	dc_dmub_srv_wait_idle(dc->dmub_srv);
 
@@ -89,7 +133,7 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable)
 
 	cmd.psr_enable.header.payload_bytes = 0; // Send header only
 
-	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_enable.header);
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
 	dc_dmub_srv_cmd_execute(dc->dmub_srv);
 	dc_dmub_srv_wait_idle(dc->dmub_srv);
 }
@@ -113,7 +157,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level)
 	cmd.psr_set_level.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_level_data);
 	cmd.psr_set_level.psr_set_level_data.psr_level = psr_level;
 
-	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_set_level.header);
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
 	dc_dmub_srv_cmd_execute(dc->dmub_srv);
 	dc_dmub_srv_wait_idle(dc->dmub_srv);
 }
@@ -162,7 +206,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
 	cmd.psr_copy_settings.header.payload_bytes = sizeof(struct dmub_cmd_psr_copy_settings_data);
 
 	// Hw insts
-	copy_settings_data->dpphy_inst				= psr_context->phyType;
+	copy_settings_data->dpphy_inst				= psr_context->transmitterId;
 	copy_settings_data->aux_inst				= psr_context->channel;
 	copy_settings_data->digfe_inst				= psr_context->engineId;
 	copy_settings_data->digbe_inst				= psr_context->transmitterId;
@@ -187,8 +231,10 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
 	copy_settings_data->smu_optimizations_en		= psr_context->allow_smu_optimizations;
 	copy_settings_data->frame_delay				= psr_context->frame_delay;
 	copy_settings_data->frame_cap_ind			= psr_context->psrFrameCaptureIndicationReq;
+	copy_settings_data->debug.visual_confirm		= dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR ?
+									true : false;
 
-	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_copy_settings.header);
+	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
 	dc_dmub_srv_cmd_execute(dc->dmub_srv);
 	dc_dmub_srv_wait_idle(dc->dmub_srv);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index 8f78bf9abbca..a28c4ae0f259 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -46,6 +46,7 @@
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
 #include "dce100/dce100_hw_sequencer.h"
+#include "dce/dce_panel_cntl.h"
 
 #include "reg_helper.h"
 
@@ -249,6 +250,18 @@ static const struct dce_stream_encoder_mask se_mask = {
 		SE_COMMON_MASK_SH_LIST_DCE80_100(_MASK)
 };
 
+static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
+	{ DCE_PANEL_CNTL_REG_LIST() }
+};
+
+static const struct dce_panel_cntl_shift panel_cntl_shift = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_panel_cntl_mask panel_cntl_mask = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
+};
+
 #define opp_regs(id)\
 [id] = {\
 	OPP_DCE_100_REG_LIST(id),\
@@ -627,6 +640,23 @@ struct link_encoder *dce100_link_encoder_create(
 	return &enc110->base;
 }
 
+static struct panel_cntl *dce100_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+	struct dce_panel_cntl *panel_cntl =
+		kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
+
+	if (!panel_cntl)
+		return NULL;
+
+	dce_panel_cntl_construct(panel_cntl,
+			init_data,
+			&panel_cntl_regs[init_data->inst],
+			&panel_cntl_shift,
+			&panel_cntl_mask);
+
+	return &panel_cntl->base;
+}
+
 struct output_pixel_processor *dce100_opp_create(
 	struct dc_context *ctx,
 	uint32_t inst)
@@ -943,6 +973,7 @@ struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link(
 static const struct resource_funcs dce100_res_pool_funcs = {
 	.destroy = dce100_destroy_resource_pool,
 	.link_enc_create = dce100_link_encoder_create,
+	.panel_cntl_create = dce100_panel_cntl_create,
 	.validate_bandwidth = dce100_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce100_add_stream_to_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 10527593868c..b77e9dc16086 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -53,6 +53,7 @@
 #include "abm.h"
 #include "audio.h"
 #include "reg_helper.h"
+#include "panel_cntl.h"
 
 /* include DCE11 register header files */
 #include "dce/dce_11_0_d.h"
@@ -697,31 +698,6 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
 
 }
 
-/*todo: cloned in stream enc, fix*/
-bool dce110_is_panel_backlight_on(struct dc_link *link)
-{
-	struct dc_context *ctx = link->ctx;
-	struct dce_hwseq *hws = ctx->dc->hwseq;
-	uint32_t value;
-
-	REG_GET(LVTMA_PWRSEQ_CNTL, LVTMA_BLON, &value);
-
-	return value;
-}
-
-bool dce110_is_panel_powered_on(struct dc_link *link)
-{
-	struct dc_context *ctx = link->ctx;
-	struct dce_hwseq *hws = ctx->dc->hwseq;
-	uint32_t pwr_seq_state, dig_on, dig_on_ovrd;
-
-	REG_GET(LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, &pwr_seq_state);
-
-	REG_GET_2(LVTMA_PWRSEQ_CNTL, LVTMA_DIGON, &dig_on, LVTMA_DIGON_OVRD, &dig_on_ovrd);
-
-	return (pwr_seq_state == 1) || (dig_on == 1 && dig_on_ovrd == 1);
-}
-
 static enum bp_result link_transmitter_control(
 		struct dc_bios *bios,
 	struct bp_transmitter_control *cntl)
@@ -810,7 +786,6 @@ void dce110_edp_power_control(
 		bool power_up)
 {
 	struct dc_context *ctx = link->ctx;
-	struct dce_hwseq *hwseq = ctx->dc->hwseq;
 	struct bp_transmitter_control cntl = { 0 };
 	enum bp_result bp_result;
 
@@ -821,7 +796,11 @@ void dce110_edp_power_control(
 		return;
 	}
 
-	if (power_up != hwseq->funcs.is_panel_powered_on(link)) {
+	if (!link->panel_cntl)
+		return;
+
+	if (power_up !=
+		link->panel_cntl->funcs->is_panel_powered_on(link->panel_cntl)) {
 		/* Send VBIOS command to prompt eDP panel power */
 		if (power_up) {
 			unsigned long long current_ts = dm_get_timestamp(ctx);
@@ -892,7 +871,6 @@ void dce110_edp_backlight_control(
 		bool enable)
 {
 	struct dc_context *ctx = link->ctx;
-	struct dce_hwseq *hws = ctx->dc->hwseq;
 	struct bp_transmitter_control cntl = { 0 };
 
 	if (dal_graphics_object_id_get_connector_id(link->link_enc->connector)
@@ -901,7 +879,8 @@ void dce110_edp_backlight_control(
 		return;
 	}
 
-	if (enable && hws->funcs.is_panel_backlight_on(link)) {
+	if (enable && link->panel_cntl &&
+		link->panel_cntl->funcs->is_panel_backlight_on(link->panel_cntl)) {
 		DC_LOG_HW_RESUME_S3(
 				"%s: panel already powered up. Do nothing.\n",
 				__func__);
@@ -1087,7 +1066,7 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx)
 
 	if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
 		hws->funcs.edp_backlight_control(link, false);
-		dc_link_set_abm_disable(link);
+		link->dc->hwss.set_abm_immediate_disable(pipe_ctx);
 	}
 
 	if (dc_is_dp_signal(pipe_ctx->stream->signal))
@@ -1432,7 +1411,7 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 
 	pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != 0;
 
-	pipe_ctx->stream->link->psr_feature_enabled = false;
+	pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false;
 
 	return DC_OK;
 }
@@ -1838,7 +1817,7 @@ static bool should_enable_fbc(struct dc *dc,
 		return false;
 
 	/* PSR should not be enabled */
-	if (pipe_ctx->stream->link->psr_feature_enabled)
+	if (pipe_ctx->stream->link->psr_settings.psr_feature_enabled)
 		return false;
 
 	/* Nothing to compress */
@@ -2376,6 +2355,7 @@ static void init_hw(struct dc *dc)
 	struct abm *abm;
 	struct dmcu *dmcu;
 	struct dce_hwseq *hws = dc->hwseq;
+	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
 
 	bp = dc->ctx->dc_bios;
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -2422,12 +2402,17 @@ static void init_hw(struct dc *dc)
 		audio->funcs->hw_init(audio);
 	}
 
-	abm = dc->res_pool->abm;
-	if (abm != NULL) {
-		abm->funcs->init_backlight(abm);
-		abm->funcs->abm_init(abm);
+	for (i = 0; i < dc->link_count; i++) {
+		struct dc_link *link = dc->links[i];
+
+		if (link->panel_cntl)
+			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
 	}
 
+	abm = dc->res_pool->abm;
+	if (abm != NULL)
+		abm->funcs->abm_init(abm, backlight);
+
 	dmcu = dc->res_pool->dmcu;
 	if (dmcu != NULL && abm != NULL)
 		abm->dmcu_is_running = dmcu->funcs->is_dmcu_initialized(dmcu);
@@ -2735,6 +2720,53 @@ void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
 				pipe_ctx->plane_res.xfm, attributes);
 }
 
+bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx,
+		uint32_t backlight_pwm_u16_16,
+		uint32_t frame_ramp)
+{
+	struct dc_link *link = pipe_ctx->stream->link;
+	struct dc  *dc = link->ctx->dc;
+	struct abm *abm = pipe_ctx->stream_res.abm;
+	struct panel_cntl *panel_cntl = link->panel_cntl;
+	struct dmcu *dmcu = dc->res_pool->dmcu;
+	bool fw_set_brightness = true;
+	/* DMCU -1 for all controller id values,
+	 * therefore +1 here
+	 */
+	uint32_t controller_id = pipe_ctx->stream_res.tg->inst + 1;
+
+	if (abm == NULL || panel_cntl == NULL || (abm->funcs->set_backlight_level_pwm == NULL))
+		return false;
+
+	if (dmcu)
+		fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu);
+
+	if (!fw_set_brightness && panel_cntl->funcs->driver_set_backlight)
+		panel_cntl->funcs->driver_set_backlight(panel_cntl, backlight_pwm_u16_16);
+	else
+		abm->funcs->set_backlight_level_pwm(
+				abm,
+				backlight_pwm_u16_16,
+				frame_ramp,
+				controller_id,
+				link->panel_cntl->inst);
+
+	return true;
+}
+
+void dce110_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
+{
+	struct abm *abm = pipe_ctx->stream_res.abm;
+	struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
+
+	if (abm)
+		abm->funcs->set_abm_immediate_disable(abm,
+				pipe_ctx->stream->link->panel_cntl->inst);
+
+	if (panel_cntl)
+		panel_cntl->funcs->store_backlight_level(panel_cntl);
+}
+
 static const struct hw_sequencer_funcs dce110_funcs = {
 	.program_gamut_remap = program_gamut_remap,
 	.program_output_csc = program_output_csc,
@@ -2769,7 +2801,9 @@ static const struct hw_sequencer_funcs dce110_funcs = {
 	.edp_power_control = dce110_edp_power_control,
 	.edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
 	.set_cursor_position = dce110_set_cursor_position,
-	.set_cursor_attribute = dce110_set_cursor_attribute
+	.set_cursor_attribute = dce110_set_cursor_attribute,
+	.set_backlight_level = dce110_set_backlight_level,
+	.set_abm_immediate_disable = dce110_set_abm_immediate_disable,
 };
 
 static const struct hwseq_private_funcs dce110_private_funcs = {
@@ -2785,8 +2819,6 @@ static const struct hwseq_private_funcs dce110_private_funcs = {
 	.disable_stream_gating = NULL,
 	.enable_stream_gating = NULL,
 	.edp_backlight_control = dce110_edp_backlight_control,
-	.is_panel_backlight_on = dce110_is_panel_backlight_on,
-	.is_panel_powered_on = dce110_is_panel_powered_on,
 };
 
 void dce110_hw_sequencer_construct(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
index 34be166e8ff0..fe5326df00f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
@@ -85,9 +85,10 @@ void dce110_edp_wait_for_hpd_ready(
 		struct dc_link *link,
 		bool power_up);
 
-bool dce110_is_panel_backlight_on(struct dc_link *link);
-
-bool dce110_is_panel_powered_on(struct dc_link *link);
+bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx,
+		uint32_t backlight_pwm_u16_16,
+		uint32_t frame_ramp);
+void dce110_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx);
 
 #endif /* __DC_HWSS_DCE110_H__ */
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c
index 4245e1f818a3..e096d2b95ef9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c
@@ -679,8 +679,7 @@ void dce110_opp_v_set_csc_default(
 	if (default_adjust->force_hw_default == false) {
 		const struct out_csc_color_matrix *elm;
 		/* currently parameter not in use */
-		enum grph_color_adjust_option option =
-			GRPH_COLOR_MATRIX_HW_DEFAULT;
+		enum grph_color_adjust_option option;
 		uint32_t i;
 		/*
 		 * HW default false we program locally defined matrix
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index bf14e9ab040c..9597fc79d7fa 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -53,6 +53,7 @@
 #include "dce/dce_abm.h"
 #include "dce/dce_dmcu.h"
 #include "dce/dce_i2c.h"
+#include "dce/dce_panel_cntl.h"
 
 #define DC_LOGGER \
 		dc->ctx->logger
@@ -275,6 +276,18 @@ static const struct dce_stream_encoder_mask se_mask = {
 		SE_COMMON_MASK_SH_LIST_DCE110(_MASK)
 };
 
+static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
+	{ DCE_PANEL_CNTL_REG_LIST() }
+};
+
+static const struct dce_panel_cntl_shift panel_cntl_shift = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_panel_cntl_mask panel_cntl_mask = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
+};
+
 static const struct dce110_aux_registers_shift aux_shift = {
 	DCE_AUX_MASK_SH_LIST(__SHIFT)
 };
@@ -673,6 +686,23 @@ static struct link_encoder *dce110_link_encoder_create(
 	return &enc110->base;
 }
 
+static struct panel_cntl *dce110_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+	struct dce_panel_cntl *panel_cntl =
+		kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
+
+	if (!panel_cntl)
+		return NULL;
+
+	dce_panel_cntl_construct(panel_cntl,
+			init_data,
+			&panel_cntl_regs[init_data->inst],
+			&panel_cntl_shift,
+			&panel_cntl_mask);
+
+	return &panel_cntl->base;
+}
+
 static struct output_pixel_processor *dce110_opp_create(
 	struct dc_context *ctx,
 	uint32_t inst)
@@ -1203,6 +1233,7 @@ struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link(
 static const struct resource_funcs dce110_res_pool_funcs = {
 	.destroy = dce110_destroy_resource_pool,
 	.link_enc_create = dce110_link_encoder_create,
+	.panel_cntl_create = dce110_panel_cntl_create,
 	.validate_bandwidth = dce110_validate_bandwidth,
 	.validate_plane = dce110_validate_plane,
 	.acquire_idle_pipe_for_layer = dce110_acquire_underlay,
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
index 700ad8b3e54b..51b3fe502670 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
@@ -51,6 +51,7 @@
 #include "dce/dce_dmcu.h"
 #include "dce/dce_aux.h"
 #include "dce/dce_i2c.h"
+#include "dce/dce_panel_cntl.h"
 
 #include "reg_helper.h"
 
@@ -238,6 +239,18 @@ static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = {
 		aux_regs(5)
 };
 
+static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
+	{ DCE_PANEL_CNTL_REG_LIST() }
+};
+
+static const struct dce_panel_cntl_shift panel_cntl_shift = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_panel_cntl_mask panel_cntl_mask = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
+};
+
 #define hpd_regs(id)\
 [id] = {\
 	HPD_REG_LIST(id)\
@@ -398,7 +411,7 @@ static const struct dc_plane_cap plane_cap = {
 	.pixel_format_support = {
 			.argb8888 = true,
 			.nv12 = false,
-			.fp16 = false
+			.fp16 = true
 	},
 
 	.max_upscale_factor = {
@@ -631,6 +644,23 @@ struct link_encoder *dce112_link_encoder_create(
 	return &enc110->base;
 }
 
+static struct panel_cntl *dce112_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+	struct dce_panel_cntl *panel_cntl =
+		kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
+
+	if (!panel_cntl)
+		return NULL;
+
+	dce_panel_cntl_construct(panel_cntl,
+			init_data,
+			&panel_cntl_regs[init_data->inst],
+			&panel_cntl_shift,
+			&panel_cntl_mask);
+
+	return &panel_cntl->base;
+}
+
 static struct input_pixel_processor *dce112_ipp_create(
 	struct dc_context *ctx, uint32_t inst)
 {
@@ -1021,6 +1051,7 @@ static void dce112_destroy_resource_pool(struct resource_pool **pool)
 static const struct resource_funcs dce112_res_pool_funcs = {
 	.destroy = dce112_destroy_resource_pool,
 	.link_enc_create = dce112_link_encoder_create,
+	.panel_cntl_create = dce112_panel_cntl_create,
 	.validate_bandwidth = dce112_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce112_add_stream_to_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 53ab88ef71f5..8f362e8c1787 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -44,6 +44,7 @@
 #include "dce/dce_clock_source.h"
 #include "dce/dce_ipp.h"
 #include "dce/dce_mem_input.h"
+#include "dce/dce_panel_cntl.h"
 
 #include "dce110/dce110_hw_sequencer.h"
 #include "dce120/dce120_hw_sequencer.h"
@@ -293,6 +294,18 @@ static const struct dce_stream_encoder_mask se_mask = {
 		SE_COMMON_MASK_SH_LIST_DCE120(_MASK)
 };
 
+static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
+	{ DCE_PANEL_CNTL_REG_LIST() }
+};
+
+static const struct dce_panel_cntl_shift panel_cntl_shift = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_panel_cntl_mask panel_cntl_mask = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
+};
+
 static const struct dce110_aux_registers_shift aux_shift = {
 	DCE12_AUX_MASK_SH_LIST(__SHIFT)
 };
@@ -503,7 +516,7 @@ static const struct dc_plane_cap plane_cap = {
 	.pixel_format_support = {
 			.argb8888 = true,
 			.nv12 = false,
-			.fp16 = false
+			.fp16 = true
 	},
 
 	.max_upscale_factor = {
@@ -715,6 +728,23 @@ static struct link_encoder *dce120_link_encoder_create(
 	return &enc110->base;
 }
 
+static struct panel_cntl *dce120_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+	struct dce_panel_cntl *panel_cntl =
+		kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
+
+	if (!panel_cntl)
+		return NULL;
+
+	dce_panel_cntl_construct(panel_cntl,
+			init_data,
+			&panel_cntl_regs[init_data->inst],
+			&panel_cntl_shift,
+			&panel_cntl_mask);
+
+	return &panel_cntl->base;
+}
+
 static struct input_pixel_processor *dce120_ipp_create(
 	struct dc_context *ctx, uint32_t inst)
 {
@@ -880,6 +910,7 @@ static void dce120_destroy_resource_pool(struct resource_pool **pool)
 static const struct resource_funcs dce120_res_pool_funcs = {
 	.destroy = dce120_destroy_resource_pool,
 	.link_enc_create = dce120_link_encoder_create,
+	.panel_cntl_create = dce120_panel_cntl_create,
 	.validate_bandwidth = dce112_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce112_add_stream_to_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c
index 893261c81854..d2ceebdbdf51 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c
@@ -36,34 +36,6 @@
 #include "dce/dce_8_0_d.h"
 #include "dce/dce_8_0_sh_mask.h"
 
-struct dce80_hw_seq_reg_offsets {
-	uint32_t crtc;
-};
-
-static const struct dce80_hw_seq_reg_offsets reg_offsets[] = {
-{
-	.crtc = (mmCRTC0_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
-	.crtc = (mmCRTC1_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
-	.crtc = (mmCRTC2_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
-	.crtc = (mmCRTC3_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
-	.crtc = (mmCRTC4_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-},
-{
-	.crtc = (mmCRTC5_CRTC_GSL_CONTROL - mmCRTC_GSL_CONTROL),
-}
-};
-
-#define HW_REG_CRTC(reg, id)\
-	(reg + reg_offsets[id].crtc)
-
 /*******************************************************************************
  * Private definitions
  ******************************************************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index 2ad5c28c6e66..a19be9de2df7 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -50,6 +50,7 @@
 #include "dce/dce_hwseq.h"
 #include "dce80/dce80_hw_sequencer.h"
 #include "dce100/dce100_resource.h"
+#include "dce/dce_panel_cntl.h"
 
 #include "reg_helper.h"
 
@@ -266,6 +267,18 @@ static const struct dce_stream_encoder_mask se_mask = {
 		SE_COMMON_MASK_SH_LIST_DCE80_100(_MASK)
 };
 
+static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
+	{ DCE_PANEL_CNTL_REG_LIST() }
+};
+
+static const struct dce_panel_cntl_shift panel_cntl_shift = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_panel_cntl_mask panel_cntl_mask = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
+};
+
 #define opp_regs(id)\
 [id] = {\
 	OPP_DCE_80_REG_LIST(id),\
@@ -728,6 +741,23 @@ struct link_encoder *dce80_link_encoder_create(
 	return &enc110->base;
 }
 
+static struct panel_cntl *dce80_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+	struct dce_panel_cntl *panel_cntl =
+		kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
+
+	if (!panel_cntl)
+		return NULL;
+
+	dce_panel_cntl_construct(panel_cntl,
+			init_data,
+			&panel_cntl_regs[init_data->inst],
+			&panel_cntl_shift,
+			&panel_cntl_mask);
+
+	return &panel_cntl->base;
+}
+
 struct clock_source *dce80_clock_source_create(
 	struct dc_context *ctx,
 	struct dc_bios *bios,
@@ -909,6 +939,7 @@ static void dce80_destroy_resource_pool(struct resource_pool **pool)
 static const struct resource_funcs dce80_res_pool_funcs = {
 	.destroy = dce80_destroy_resource_pool,
 	.link_enc_create = dce80_link_encoder_create,
+	.panel_cntl_create = dce80_panel_cntl_create,
 	.validate_bandwidth = dce80_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce100_add_stream_to_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index 0e682b5aa3eb..7f8456b9988b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -134,13 +134,6 @@ bool dpp1_get_optimal_number_of_taps(
 		struct scaler_data *scl_data,
 		const struct scaling_taps *in_taps)
 {
-	uint32_t pixel_width;
-
-	if (scl_data->viewport.width > scl_data->recout.width)
-		pixel_width = scl_data->recout.width;
-	else
-		pixel_width = scl_data->viewport.width;
-
 	/* Some ASICs does not support  FP16 scaling, so we reject modes require this*/
 	if (scl_data->format == PIXEL_FORMAT_FP16 &&
 		dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT &&
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index deccab0228d2..75637c291e75 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -93,7 +93,6 @@ void hubbub1_wm_read_state(struct hubbub *hubbub,
 void hubbub1_allow_self_refresh_control(struct hubbub *hubbub, bool allow)
 {
 	struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
-
 	/*
 	 * DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE = 1 means do not allow stutter
 	 * DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE = 0 means allow stutter
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 31b64733d693..319366ebb44f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -1139,6 +1139,8 @@ void hubp1_cursor_set_position(
 	int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y;
 	int x_hotspot = pos->x_hotspot;
 	int y_hotspot = pos->y_hotspot;
+	int cursor_height = (int)hubp->curs_attr.height;
+	int cursor_width = (int)hubp->curs_attr.width;
 	uint32_t dst_x_offset;
 	uint32_t cur_en = pos->enable ? 1 : 0;
 
@@ -1152,10 +1154,16 @@ void hubp1_cursor_set_position(
 	if (hubp->curs_attr.address.quad_part == 0)
 		return;
 
+	// Rotated cursor width/height and hotspots tweaks for offset calculation
 	if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) {
-		src_x_offset = pos->y - pos->y_hotspot - param->viewport.x;
-		y_hotspot = pos->x_hotspot;
-		x_hotspot = pos->y_hotspot;
+		swap(cursor_height, cursor_width);
+		if (param->rotation == ROTATION_ANGLE_90) {
+			src_x_offset = pos->x - pos->y_hotspot - param->viewport.x;
+			src_y_offset = pos->y - pos->x_hotspot - param->viewport.y;
+		}
+	} else if (param->rotation == ROTATION_ANGLE_180) {
+		src_x_offset = pos->x - param->viewport.x;
+		src_y_offset = pos->y - param->viewport.y;
 	}
 
 	if (param->mirror) {
@@ -1177,13 +1185,13 @@ void hubp1_cursor_set_position(
 	if (src_x_offset >= (int)param->viewport.width)
 		cur_en = 0;  /* not visible beyond right edge*/
 
-	if (src_x_offset + (int)hubp->curs_attr.width <= 0)
+	if (src_x_offset + cursor_width <= 0)
 		cur_en = 0;  /* not visible beyond left edge*/
 
 	if (src_y_offset >= (int)param->viewport.height)
 		cur_en = 0;  /* not visible beyond bottom edge*/
 
-	if (src_y_offset + (int)hubp->curs_attr.height <= 0)
+	if (src_y_offset + cursor_height <= 0)
 		cur_en = 0;  /* not visible beyond top edge*/
 
 	if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 416afb99529d..77f16921e7f0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -737,7 +737,8 @@ void dcn10_bios_golden_init(struct dc *dc)
 	if (dc->res_pool->hubbub->funcs->allow_self_refresh_control)
 		if (allow_self_fresh_force_enable == false &&
 				dc->res_pool->hubbub->funcs->is_allow_self_refresh_enabled(dc->res_pool->hubbub))
-			dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, true);
+			dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
+										!dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
 
 }
 
@@ -826,6 +827,14 @@ enum dc_status dcn10_enable_stream_timing(
 	color_space = stream->output_color_space;
 	color_space_to_black_color(dc, color_space, &black_color);
 
+	/*
+	 * The way 420 is packed, 2 channels carry Y component, 1 channel
+	 * alternate between Cb and Cr, so both channels need the pixel
+	 * value for Y
+	 */
+	if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
+		black_color.color_r_cr = black_color.color_g_y;
+
 	if (pipe_ctx->stream_res.tg->funcs->set_blank_color)
 		pipe_ctx->stream_res.tg->funcs->set_blank_color(
 				pipe_ctx->stream_res.tg,
@@ -903,7 +912,7 @@ static void dcn10_reset_back_end_for_pipe(
 	if (pipe_ctx->top_pipe == NULL) {
 
 		if (pipe_ctx->stream_res.abm)
-			pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm);
+			dc->hwss.set_abm_immediate_disable(pipe_ctx);
 
 		pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg);
 
@@ -1238,12 +1247,13 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 
 void dcn10_init_hw(struct dc *dc)
 {
-	int i;
+	int i, j;
 	struct abm *abm = dc->res_pool->abm;
 	struct dmcu *dmcu = dc->res_pool->dmcu;
 	struct dce_hwseq *hws = dc->hwseq;
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	struct resource_pool *res_pool = dc->res_pool;
+	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
 		dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
@@ -1333,17 +1343,28 @@ void dcn10_init_hw(struct dc *dc)
 				continue;
 
 			/*
-			 * core_link_read_dpcd() will invoke dm_helpers_dp_read_dpcd(),
-			 * which needs to read dpcd info with the help of aconnector.
-			 * If aconnector (dc->links[i]->prev) is NULL, then dpcd status
-			 * cannot be read.
+			 * If any of the displays are lit up turn them off.
+			 * The reason is that some MST hubs cannot be turned off
+			 * completely until we tell them to do so.
+			 * If not turned off, then displays connected to MST hub
+			 * won't light up.
 			 */
-			if (dc->links[i]->priv) {
-				/* if any of the displays are lit up turn them off */
-				status = core_link_read_dpcd(dc->links[i], DP_SET_POWER,
-								&dpcd_power_state, sizeof(dpcd_power_state));
-				if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0)
-					dp_receiver_power_ctrl(dc->links[i], false);
+			status = core_link_read_dpcd(dc->links[i], DP_SET_POWER,
+							&dpcd_power_state, sizeof(dpcd_power_state));
+			if (status == DC_OK && dpcd_power_state == DP_POWER_STATE_D0) {
+				/* blank dp stream before power off receiver*/
+				if (dc->links[i]->link_enc->funcs->get_dig_frontend) {
+					unsigned int fe = dc->links[i]->link_enc->funcs->get_dig_frontend(dc->links[i]->link_enc);
+
+					for (j = 0; j < dc->res_pool->stream_enc_count; j++) {
+						if (fe == dc->res_pool->stream_enc[j]->id) {
+							dc->res_pool->stream_enc[j]->funcs->dp_blank(
+										dc->res_pool->stream_enc[j]);
+							break;
+						}
+					}
+				}
+				dp_receiver_power_ctrl(dc->links[i], false);
 			}
 		}
 	}
@@ -1361,17 +1382,54 @@ void dcn10_init_hw(struct dc *dc)
 					!dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
 	}
 
+	/* In headless boot cases, DIG may be turned
+	 * on which causes HW/SW discrepancies.
+	 * To avoid this, power down hardware on boot
+	 * if DIG is turned on and seamless boot not enabled
+	 */
+	if (dc->config.power_down_display_on_boot) {
+		struct dc_link *edp_link = get_edp_link(dc);
+
+		if (edp_link &&
+				edp_link->link_enc->funcs->is_dig_enabled &&
+				edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) &&
+				dc->hwss.edp_backlight_control &&
+				dc->hwss.power_down &&
+				dc->hwss.edp_power_control) {
+			dc->hwss.edp_backlight_control(edp_link, false);
+			dc->hwss.power_down(dc);
+			dc->hwss.edp_power_control(edp_link, false);
+		} else {
+			for (i = 0; i < dc->link_count; i++) {
+				struct dc_link *link = dc->links[i];
+
+				if (link->link_enc->funcs->is_dig_enabled &&
+						link->link_enc->funcs->is_dig_enabled(link->link_enc) &&
+						dc->hwss.power_down) {
+					dc->hwss.power_down(dc);
+					break;
+				}
+
+			}
+		}
+	}
+
 	for (i = 0; i < res_pool->audio_count; i++) {
 		struct audio *audio = res_pool->audios[i];
 
 		audio->funcs->hw_init(audio);
 	}
 
-	if (abm != NULL) {
-		abm->funcs->init_backlight(abm);
-		abm->funcs->abm_init(abm);
+	for (i = 0; i < dc->link_count; i++) {
+		struct dc_link *link = dc->links[i];
+
+		if (link->panel_cntl)
+			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
 	}
 
+	if (abm != NULL)
+		abm->funcs->abm_init(abm, backlight);
+
 	if (dmcu != NULL && !dmcu->auto_load_dmcu)
 		dmcu->funcs->dmcu_init(dmcu);
 
@@ -2164,25 +2222,25 @@ void dcn10_get_surface_visual_confirm_color(
 
 	switch (pipe_ctx->plane_res.scl_data.format) {
 	case PIXEL_FORMAT_ARGB8888:
-		/* set boarder color to red */
+		/* set border color to red */
 		color->color_r_cr = color_value;
 		break;
 
 	case PIXEL_FORMAT_ARGB2101010:
-		/* set boarder color to blue */
+		/* set border color to blue */
 		color->color_b_cb = color_value;
 		break;
 	case PIXEL_FORMAT_420BPP8:
-		/* set boarder color to green */
+		/* set border color to green */
 		color->color_g_y = color_value;
 		break;
 	case PIXEL_FORMAT_420BPP10:
-		/* set boarder color to yellow */
+		/* set border color to yellow */
 		color->color_g_y = color_value;
 		color->color_r_cr = color_value;
 		break;
 	case PIXEL_FORMAT_FP16:
-		/* set boarder color to white */
+		/* set border color to white */
 		color->color_r_cr = color_value;
 		color->color_b_cb = color_value;
 		color->color_g_y = color_value;
@@ -2207,25 +2265,25 @@ void dcn10_get_hdr_visual_confirm_color(
 	switch (top_pipe_ctx->plane_res.scl_data.format) {
 	case PIXEL_FORMAT_ARGB2101010:
 		if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_PQ) {
-			/* HDR10, ARGB2101010 - set boarder color to red */
+			/* HDR10, ARGB2101010 - set border color to red */
 			color->color_r_cr = color_value;
 		} else if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
-			/* FreeSync 2 ARGB2101010 - set boarder color to pink */
+			/* FreeSync 2 ARGB2101010 - set border color to pink */
 			color->color_r_cr = color_value;
 			color->color_b_cb = color_value;
 		}
 		break;
 	case PIXEL_FORMAT_FP16:
 		if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_PQ) {
-			/* HDR10, FP16 - set boarder color to blue */
+			/* HDR10, FP16 - set border color to blue */
 			color->color_b_cb = color_value;
 		} else if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
-			/* FreeSync 2 HDR - set boarder color to green */
+			/* FreeSync 2 HDR - set border color to green */
 			color->color_g_y = color_value;
 		}
 		break;
 	default:
-		/* SDR - set boarder color to Gray */
+		/* SDR - set border color to Gray */
 		color->color_r_cr = color_value/2;
 		color->color_b_cb = color_value/2;
 		color->color_g_y = color_value/2;
@@ -2274,6 +2332,14 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 				&blnd_cfg.black_color);
 	}
 
+	/*
+	 * The way 420 is packed, 2 channels carry Y component, 1 channel
+	 * alternate between Cb and Cr, so both channels need the pixel
+	 * value for Y
+	 */
+	if (pipe_ctx->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
+		blnd_cfg.black_color.color_r_cr = blnd_cfg.black_color.color_g_y;
+
 	if (per_pixel_alpha)
 		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
 	else
@@ -2510,12 +2576,12 @@ void dcn10_blank_pixel_data(
 		if (stream_res->tg->funcs->set_blank)
 			stream_res->tg->funcs->set_blank(stream_res->tg, blank);
 		if (stream_res->abm) {
-			stream_res->abm->funcs->set_pipe(stream_res->abm, stream_res->tg->inst + 1);
+			stream_res->abm->funcs->set_pipe(stream_res->abm, stream_res->tg->inst + 1,
+					stream->link->panel_cntl->inst);
 			stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level);
 		}
 	} else if (blank) {
-		if (stream_res->abm)
-			stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm);
+		dc->hwss.set_abm_immediate_disable(pipe_ctx);
 		if (stream_res->tg->funcs->set_blank)
 			stream_res->tg->funcs->set_blank(stream_res->tg, blank);
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
index 9e8e32629e47..7cb8c3fb2665 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
@@ -73,6 +73,8 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
 	.get_clock = dcn10_get_clock,
 	.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
 	.calc_vupdate_position = dcn10_calc_vupdate_position,
+	.set_backlight_level = dce110_set_backlight_level,
+	.set_abm_immediate_disable = dce110_set_abm_immediate_disable,
 };
 
 static const struct hwseq_private_funcs dcn10_private_funcs = {
@@ -89,8 +91,6 @@ static const struct hwseq_private_funcs dcn10_private_funcs = {
 	.reset_hw_ctx_wrap = dcn10_reset_hw_ctx_wrap,
 	.enable_stream_timing = dcn10_enable_stream_timing,
 	.edp_backlight_control = dce110_edp_backlight_control,
-	.is_panel_backlight_on = dce110_is_panel_backlight_on,
-	.is_panel_powered_on = dce110_is_panel_powered_on,
 	.disable_stream_gating = NULL,
 	.enable_stream_gating = NULL,
 	.setup_vupdate_interrupt = dcn10_setup_vupdate_interrupt,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
index d3617d6785a7..7fd385be3f3d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
@@ -90,7 +90,8 @@ static const struct link_encoder_funcs dcn10_lnk_enc_funcs = {
 	.is_dig_enabled = dcn10_is_dig_enabled,
 	.get_dig_frontend = dcn10_get_dig_frontend,
 	.get_dig_mode = dcn10_get_dig_mode,
-	.destroy = dcn10_link_encoder_destroy
+	.destroy = dcn10_link_encoder_destroy,
+	.get_max_link_cap = dcn10_link_encoder_get_max_link_cap,
 };
 
 static enum bp_result link_transmitter_control(
@@ -1370,7 +1371,6 @@ void dcn10_link_encoder_disable_hpd(struct link_encoder *enc)
 			DC_HPD_EN, 0);
 }
 
-
 #define AUX_REG(reg)\
 	(enc10->aux_regs->reg)
 
@@ -1425,3 +1425,19 @@ enum signal_type dcn10_get_dig_mode(
 	return SIGNAL_TYPE_NONE;
 }
 
+void dcn10_link_encoder_get_max_link_cap(struct link_encoder *enc,
+	struct dc_link_settings *link_settings)
+{
+	/* Set Default link settings */
+	struct dc_link_settings max_link_cap = {LANE_COUNT_FOUR, LINK_RATE_HIGH,
+			LINK_SPREAD_05_DOWNSPREAD_30KHZ, false, 0};
+
+	/* Higher link settings based on feature supported */
+	if (enc->features.flags.bits.IS_HBR2_CAPABLE)
+		max_link_cap.link_rate = LINK_RATE_HIGH2;
+
+	if (enc->features.flags.bits.IS_HBR3_CAPABLE)
+		max_link_cap.link_rate = LINK_RATE_HIGH3;
+
+	*link_settings = max_link_cap;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
index 762109174fb8..68395bcc24fd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
@@ -575,4 +575,7 @@ void dcn10_aux_initialize(struct dcn10_link_encoder *enc10);
 
 enum signal_type dcn10_get_dig_mode(
 	struct link_encoder *enc);
+
+void dcn10_link_encoder_get_max_link_cap(struct link_encoder *enc,
+	struct dc_link_settings *link_settings);
 #endif /* __DC_LINK_ENCODER__DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index 17d96ec6acd8..ec0ab42becba 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -299,6 +299,7 @@ void optc1_set_vtg_params(struct timing_generator *optc,
 	uint32_t asic_blank_end;
 	uint32_t v_init;
 	uint32_t v_fp2 = 0;
+	int32_t vertical_line_start;
 
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
@@ -315,8 +316,9 @@ void optc1_set_vtg_params(struct timing_generator *optc,
 			patched_crtc_timing.v_border_top;
 
 	/* if VSTARTUP is before VSYNC, FP2 is the offset, otherwise 0 */
-	if (optc1->vstartup_start > asic_blank_end)
-		v_fp2 = optc1->vstartup_start - asic_blank_end;
+	vertical_line_start = asic_blank_end - optc1->vstartup_start + 1;
+	if (vertical_line_start < 0)
+		v_fp2 = -vertical_line_start;
 
 	/* Interlace */
 	if (REG(OTG_INTERLACE_CONTROL)) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
index 9a459a8fe8a0..8d1e52fb0393 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -158,6 +158,7 @@ struct dcn_optc_registers {
 	uint32_t OTG_GSL_WINDOW_Y;
 	uint32_t OTG_VUPDATE_KEEPOUT;
 	uint32_t OTG_CRC_CNTL;
+	uint32_t OTG_CRC_CNTL2;
 	uint32_t OTG_CRC0_DATA_RG;
 	uint32_t OTG_CRC0_DATA_B;
 	uint32_t OTG_CRC0_WINDOWA_X_CONTROL;
@@ -475,7 +476,11 @@ struct dcn_optc_registers {
 	type OPTC_DSC_SLICE_WIDTH;\
 	type OPTC_SEGMENT_WIDTH;\
 	type OPTC_DWB0_SOURCE_SELECT;\
-	type OPTC_DWB1_SOURCE_SELECT;
+	type OPTC_DWB1_SOURCE_SELECT;\
+	type OTG_CRC_DSC_MODE;\
+	type OTG_CRC_DATA_STREAM_COMBINE_MODE;\
+	type OTG_CRC_DATA_STREAM_SPLIT_MODE;\
+	type OTG_CRC_DATA_FORMAT;
 
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index ba849aa31e6e..17d5cb422025 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -51,6 +51,7 @@
 #include "dce112/dce112_resource.h"
 #include "dcn10_hubp.h"
 #include "dcn10_hubbub.h"
+#include "dce/dce_panel_cntl.h"
 
 #include "soc15_hw_ip.h"
 #include "vega10_ip_offset.h"
@@ -329,6 +330,18 @@ static const struct dcn10_link_enc_mask le_mask = {
 		LINK_ENCODER_MASK_SH_LIST_DCN10(_MASK)
 };
 
+static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
+	{ DCN_PANEL_CNTL_REG_LIST() }
+};
+
+static const struct dce_panel_cntl_shift panel_cntl_shift = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_panel_cntl_mask panel_cntl_mask = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
+};
+
 static const struct dce110_aux_registers_shift aux_shift = {
 	DCN10_AUX_MASK_SH_LIST(__SHIFT)
 };
@@ -817,6 +830,23 @@ struct link_encoder *dcn10_link_encoder_create(
 	return &enc10->base;
 }
 
+static struct panel_cntl *dcn10_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+	struct dce_panel_cntl *panel_cntl =
+		kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
+
+	if (!panel_cntl)
+		return NULL;
+
+	dce_panel_cntl_construct(panel_cntl,
+			init_data,
+			&panel_cntl_regs[init_data->inst],
+			&panel_cntl_shift,
+			&panel_cntl_mask);
+
+	return &panel_cntl->base;
+}
+
 struct clock_source *dcn10_clock_source_create(
 	struct dc_context *ctx,
 	struct dc_bios *bios,
@@ -1091,24 +1121,6 @@ static enum dc_status build_mapped_resource(
 {
 	struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream);
 
-	/*TODO Seems unneeded anymore */
-	/*	if (old_context && resource_is_stream_unchanged(old_context, stream)) {
-			if (stream != NULL && old_context->streams[i] != NULL) {
-				 todo: shouldn't have to copy missing parameter here
-				resource_build_bit_depth_reduction_params(stream,
-						&stream->bit_depth_params);
-				stream->clamping.pixel_encoding =
-						stream->timing.pixel_encoding;
-
-				resource_build_bit_depth_reduction_params(stream,
-								&stream->bit_depth_params);
-				build_clamping_params(stream);
-
-				continue;
-			}
-		}
-	*/
-
 	if (!pipe_ctx)
 		return DC_ERROR_UNEXPECTED;
 
@@ -1301,6 +1313,7 @@ static const struct dc_cap_funcs cap_funcs = {
 static const struct resource_funcs dcn10_res_pool_funcs = {
 	.destroy = dcn10_destroy_resource_pool,
 	.link_enc_create = dcn10_link_encoder_create,
+	.panel_cntl_create = dcn10_panel_cntl_create,
 	.validate_bandwidth = dcn_validate_bandwidth,
 	.acquire_idle_pipe_for_layer = dcn10_acquire_idle_pipe_for_layer,
 	.validate_plane = dcn10_validate_plane,
@@ -1363,6 +1376,40 @@ static bool dcn10_resource_construct(
 	/* Raven DP PHY HBR2 eye diagram pattern is not stable. Use TP4 */
 	dc->caps.force_dp_tps4_for_cp2520 = true;
 
+	/* Color pipeline capabilities */
+	dc->caps.color.dpp.dcn_arch = 1;
+	dc->caps.color.dpp.input_lut_shared = 1;
+	dc->caps.color.dpp.icsc = 1;
+	dc->caps.color.dpp.dgam_ram = 1;
+	dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
+	dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
+	dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0;
+	dc->caps.color.dpp.dgam_rom_caps.pq = 0;
+	dc->caps.color.dpp.dgam_rom_caps.hlg = 0;
+	dc->caps.color.dpp.post_csc = 0;
+	dc->caps.color.dpp.gamma_corr = 0;
+
+	dc->caps.color.dpp.hw_3d_lut = 0;
+	dc->caps.color.dpp.ogam_ram = 1; // RGAM on DCN1
+	dc->caps.color.dpp.ogam_rom_caps.srgb = 1;
+	dc->caps.color.dpp.ogam_rom_caps.bt2020 = 1;
+	dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
+	dc->caps.color.dpp.ogam_rom_caps.pq = 0;
+	dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
+	dc->caps.color.dpp.ocsc = 1;
+
+	/* no post-blend color operations */
+	dc->caps.color.mpc.gamut_remap = 0;
+	dc->caps.color.mpc.num_3dluts = 0;
+	dc->caps.color.mpc.shared_3d_lut = 0;
+	dc->caps.color.mpc.ogam_ram = 0;
+	dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
+	dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
+	dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
+	dc->caps.color.mpc.ogam_rom_caps.pq = 0;
+	dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
+	dc->caps.color.mpc.ocsc = 0;
+
 	if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
 		dc->debug = debug_defaults_drv;
 	else
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
index 7eba9333c328..07b2f9399671 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -1274,7 +1274,6 @@ static void enc1_se_audio_setup(
 {
 	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
 
-	uint32_t speakers = 0;
 	uint32_t channels = 0;
 
 	ASSERT(audio_info);
@@ -1282,7 +1281,6 @@ static void enc1_se_audio_setup(
 		/* This should not happen.it does so we don't get BSOD*/
 		return;
 
-	speakers = audio_info->flags.info.ALLSPEAKERS;
 	channels = speakers_to_channels(audio_info->flags.speaker_flags).all;
 
 	/* setup the audio stream source select (audio -> dig mapping) */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h
index 501532dd523a..c478213ba7ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h
@@ -80,6 +80,7 @@ struct dcn20_hubbub {
 	const struct dcn_hubbub_mask *masks;
 	unsigned int debug_test_index_pstate;
 	struct dcn_watermark_set watermarks;
+	int num_vmid;
 	struct dcn20_vmid vmid[16];
 	unsigned int detile_buf_size;
 };
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index a023a4d59f41..da5333d165ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -961,8 +961,7 @@ void dcn20_blank_pixel_data(
 	width = width / odm_cnt;
 
 	if (blank) {
-		if (stream_res->abm)
-			stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm);
+		dc->hwss.set_abm_immediate_disable(pipe_ctx);
 
 		if (dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE) {
 			test_pattern = CONTROLLER_DP_TEST_PATTERN_COLORSQUARES;
@@ -997,7 +996,8 @@ void dcn20_blank_pixel_data(
 
 	if (!blank)
 		if (stream_res->abm) {
-			stream_res->abm->funcs->set_pipe(stream_res->abm, stream_res->tg->inst + 1);
+			stream_res->abm->funcs->set_pipe(stream_res->abm, stream_res->tg->inst + 1,
+					stream->link->panel_cntl->inst);
 			stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level);
 		}
 }
@@ -1478,8 +1478,11 @@ static void dcn20_program_pipe(
 	if (pipe_ctx->update_flags.bits.odm)
 		hws->funcs.update_odm(dc, context, pipe_ctx);
 
-	if (pipe_ctx->update_flags.bits.enable)
+	if (pipe_ctx->update_flags.bits.enable) {
 		dcn20_enable_plane(dc, pipe_ctx, context);
+		if (dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes)
+			dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes(dc->res_pool->hubbub);
+	}
 
 	if (pipe_ctx->update_flags.raw || pipe_ctx->plane_state->update_flags.raw || pipe_ctx->stream->update_flags.raw)
 		dcn20_update_dchubp_dpp(dc, pipe_ctx, context);
@@ -2037,8 +2040,7 @@ static void dcn20_reset_back_end_for_pipe(
 	 */
 	if (pipe_ctx->top_pipe == NULL) {
 
-		if (pipe_ctx->stream_res.abm)
-			pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm);
+		dc->hwss.set_abm_immediate_disable(pipe_ctx);
 
 		pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg);
 
@@ -2171,6 +2173,13 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	 */
 	mpcc_id = hubp->inst;
 
+	/* If there is no full update, don't need to touch MPC tree*/
+	if (!pipe_ctx->plane_state->update_flags.bits.full_update &&
+		!pipe_ctx->update_flags.bits.mpcc) {
+		mpc->funcs->update_blending(mpc, &blnd_cfg, mpcc_id);
+		return;
+	}
+
 	/* check if this MPCC is already being used */
 	new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id);
 	/* remove MPCC if being used */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
index 8334bbd6eabb..2fbde4241559 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
@@ -84,6 +84,8 @@ static const struct hw_sequencer_funcs dcn20_funcs = {
 	.set_flip_control_gsl = dcn20_set_flip_control_gsl,
 	.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
 	.calc_vupdate_position = dcn10_calc_vupdate_position,
+	.set_backlight_level = dce110_set_backlight_level,
+	.set_abm_immediate_disable = dce110_set_abm_immediate_disable,
 };
 
 static const struct hwseq_private_funcs dcn20_private_funcs = {
@@ -99,8 +101,6 @@ static const struct hwseq_private_funcs dcn20_private_funcs = {
 	.reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap,
 	.enable_stream_timing = dcn20_enable_stream_timing,
 	.edp_backlight_control = dce110_edp_backlight_control,
-	.is_panel_backlight_on = dce110_is_panel_backlight_on,
-	.is_panel_powered_on = dce110_is_panel_powered_on,
 	.disable_stream_gating = dcn20_disable_stream_gating,
 	.enable_stream_gating = dcn20_enable_stream_gating,
 	.setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c
index e4ac73035c84..8d209dae66e6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c
@@ -49,6 +49,12 @@
 #define IND_REG(index) \
 	(enc10->link_regs->index)
 
+#ifndef MAX
+#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
+#endif
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
 
 static struct mpll_cfg dcn2_mpll_cfg[] = {
 	// RBR
@@ -260,6 +266,38 @@ void dcn20_link_encoder_enable_dp_output(
 
 }
 
+void dcn20_link_encoder_get_max_link_cap(struct link_encoder *enc,
+	struct dc_link_settings *link_settings)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	uint32_t is_in_usb_c_dp4_mode = 0;
+
+	dcn10_link_encoder_get_max_link_cap(enc, link_settings);
+
+	/* in usb c dp2 mode, max lane count is 2 */
+	if (enc->funcs->is_in_alt_mode && enc->funcs->is_in_alt_mode(enc)) {
+		REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode);
+		if (!is_in_usb_c_dp4_mode)
+			link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
+	}
+
+}
+
+bool dcn20_link_encoder_is_in_alt_mode(struct link_encoder *enc)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	uint32_t dp_alt_mode_disable = 0;
+	bool is_usb_c_alt_mode = false;
+
+	if (enc->features.flags.bits.DP_IS_USB_C) {
+		/* if value == 1 alt mode is disabled, otherwise it is enabled */
+		REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable);
+		is_usb_c_alt_mode = (dp_alt_mode_disable == 0);
+	}
+
+	return is_usb_c_alt_mode;
+}
+
 #define AUX_REG(reg)\
 	(enc10->aux_regs->reg)
 
@@ -338,6 +376,8 @@ static const struct link_encoder_funcs dcn20_link_enc_funcs = {
 	.fec_is_active = enc2_fec_is_active,
 	.get_dig_mode = dcn10_get_dig_mode,
 	.get_dig_frontend = dcn10_get_dig_frontend,
+	.is_in_alt_mode = dcn20_link_encoder_is_in_alt_mode,
+	.get_max_link_cap = dcn20_link_encoder_get_max_link_cap,
 };
 
 void dcn20_link_encoder_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h
index 8cab8107fd94..284a1ee4d249 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h
@@ -343,6 +343,10 @@ void dcn20_link_encoder_enable_dp_output(
 	const struct dc_link_settings *link_settings,
 	enum clock_source_id clock_source);
 
+bool dcn20_link_encoder_is_in_alt_mode(struct link_encoder *enc);
+void dcn20_link_encoder_get_max_link_cap(struct link_encoder *enc,
+	struct dc_link_settings *link_settings);
+
 void dcn20_link_encoder_construct(
 	struct dcn20_link_encoder *enc20,
 	const struct encoder_init_data *init_data,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
index 570dfd9a243f..99cc095dc33c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
@@ -452,7 +452,7 @@ void mpc2_set_output_gamma(
 		next_mode = LUT_RAM_A;
 
 	mpc20_power_on_ogam_lut(mpc, mpcc_id, true);
-	mpc20_configure_ogam_lut(mpc, mpcc_id, next_mode == LUT_RAM_A ? true:false);
+	mpc20_configure_ogam_lut(mpc, mpcc_id, next_mode == LUT_RAM_A);
 
 	if (next_mode == LUT_RAM_A)
 		mpc2_program_luta(mpc, mpcc_id, params);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
index d875b0c38fde..8c16967fe018 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
@@ -409,6 +409,18 @@ void optc2_program_manual_trigger(struct timing_generator *optc)
 			OTG_TRIGA_MANUAL_TRIG, 1);
 }
 
+bool optc2_configure_crc(struct timing_generator *optc,
+			  const struct crc_params *params)
+{
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+	REG_SET_2(OTG_CRC_CNTL2, 0,
+			OTG_CRC_DSC_MODE, params->dsc_mode,
+			OTG_CRC_DATA_STREAM_COMBINE_MODE, params->odm_mode);
+
+	return optc1_configure_crc(optc, params);
+}
+
 static struct timing_generator_funcs dcn20_tg_funcs = {
 		.validate_timing = optc1_validate_timing,
 		.program_timing = optc1_program_timing,
@@ -452,7 +464,7 @@ static struct timing_generator_funcs dcn20_tg_funcs = {
 		.clear_optc_underflow = optc1_clear_optc_underflow,
 		.setup_global_swap_lock = NULL,
 		.get_crc = optc1_get_crc,
-		.configure_crc = optc1_configure_crc,
+		.configure_crc = optc2_configure_crc,
 		.set_dsc_config = optc2_set_dsc_config,
 		.set_dwb_source = optc2_set_dwb_source,
 		.set_odm_bypass = optc2_set_odm_bypass,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
index 239cc40ae474..e0a0a8a8e2c6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
@@ -36,6 +36,7 @@
 	SRI(OTG_GSL_WINDOW_Y, OTG, inst),\
 	SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\
 	SRI(OTG_DSC_START_POSITION, OTG, inst),\
+	SRI(OTG_CRC_CNTL2, OTG, inst),\
 	SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\
 	SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\
 	SRI(OPTC_WIDTH_CONTROL, ODM, inst),\
@@ -62,6 +63,10 @@
 	SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \
 	SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \
 	SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\
+	SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\
+	SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\
+	SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\
+	SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\
 	SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\
 	SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\
 	SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\
@@ -109,4 +114,6 @@ void optc2_lock_doublebuffer_enable(struct timing_generator *optc);
 void optc2_setup_manual_trigger(struct timing_generator *optc);
 void optc2_program_manual_trigger(struct timing_generator *optc);
 bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
+bool optc2_configure_crc(struct timing_generator *optc,
+			  const struct crc_params *params);
 #endif /* __DC_OPTC_DCN20_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index e4348e3b6389..cef1aa938ab5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -61,6 +61,7 @@
 #include "dcn20_dccg.h"
 #include "dcn20_vmid.h"
 #include "dc_link_ddc.h"
+#include "dce/dce_panel_cntl.h"
 
 #include "navi10_ip_offset.h"
 
@@ -691,6 +692,18 @@ static const struct dcn10_link_enc_mask le_mask = {
 	DPCS_DCN2_MASK_SH_LIST(_MASK)
 };
 
+static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
+	{ DCN_PANEL_CNTL_REG_LIST() }
+};
+
+static const struct dce_panel_cntl_shift panel_cntl_shift = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_panel_cntl_mask panel_cntl_mask = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
+};
+
 #define ipp_regs(id)\
 [id] = {\
 	IPP_REG_LIST_DCN20(id),\
@@ -1293,6 +1306,23 @@ struct link_encoder *dcn20_link_encoder_create(
 	return &enc20->enc10.base;
 }
 
+static struct panel_cntl *dcn20_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+	struct dce_panel_cntl *panel_cntl =
+		kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
+
+	if (!panel_cntl)
+		return NULL;
+
+	dce_panel_cntl_construct(panel_cntl,
+			init_data,
+			&panel_cntl_regs[init_data->inst],
+			&panel_cntl_shift,
+			&panel_cntl_mask);
+
+	return &panel_cntl->base;
+}
+
 struct clock_source *dcn20_clock_source_create(
 	struct dc_context *ctx,
 	struct dc_bios *bios,
@@ -1623,24 +1653,6 @@ enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state
 	enum dc_status status = DC_OK;
 	struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream);
 
-	/*TODO Seems unneeded anymore */
-	/*	if (old_context && resource_is_stream_unchanged(old_context, stream)) {
-			if (stream != NULL && old_context->streams[i] != NULL) {
-				 todo: shouldn't have to copy missing parameter here
-				resource_build_bit_depth_reduction_params(stream,
-						&stream->bit_depth_params);
-				stream->clamping.pixel_encoding =
-						stream->timing.pixel_encoding;
-
-				resource_build_bit_depth_reduction_params(stream,
-								&stream->bit_depth_params);
-				build_clamping_params(stream);
-
-				continue;
-			}
-		}
-	*/
-
 	if (!pipe_ctx)
 		return DC_ERROR_UNEXPECTED;
 
@@ -1651,22 +1663,32 @@ enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state
 }
 
 
-static void acquire_dsc(struct resource_context *res_ctx,
-			const struct resource_pool *pool,
+void dcn20_acquire_dsc(const struct dc *dc,
+			struct resource_context *res_ctx,
 			struct display_stream_compressor **dsc,
 			int pipe_idx)
 {
 	int i;
+	const struct resource_pool *pool = dc->res_pool;
+	struct display_stream_compressor *dsc_old = dc->current_state->res_ctx.pipe_ctx[pipe_idx].stream_res.dsc;
 
-	ASSERT(*dsc == NULL);
+	ASSERT(*dsc == NULL); /* If this ASSERT fails, dsc was not released properly */
 	*dsc = NULL;
 
+	/* Always do 1-to-1 mapping when number of DSCs is same as number of pipes */
 	if (pool->res_cap->num_dsc == pool->res_cap->num_opp) {
 		*dsc = pool->dscs[pipe_idx];
 		res_ctx->is_dsc_acquired[pipe_idx] = true;
 		return;
 	}
 
+	/* Return old DSC to avoid the need for re-programming */
+	if (dsc_old && !res_ctx->is_dsc_acquired[dsc_old->inst]) {
+		*dsc = dsc_old;
+		res_ctx->is_dsc_acquired[dsc_old->inst] = true;
+		return ;
+	}
+
 	/* Find first free DSC */
 	for (i = 0; i < pool->res_cap->num_dsc; i++)
 		if (!res_ctx->is_dsc_acquired[i]) {
@@ -1698,7 +1720,6 @@ enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc,
 {
 	enum dc_status result = DC_OK;
 	int i;
-	const struct resource_pool *pool = dc->res_pool;
 
 	/* Get a DSC if required and available */
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1710,7 +1731,7 @@ enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc,
 		if (pipe_ctx->stream_res.dsc)
 			continue;
 
-		acquire_dsc(&dc_ctx->res_ctx, pool, &pipe_ctx->stream_res.dsc, i);
+		dcn20_acquire_dsc(dc, &dc_ctx->res_ctx, &pipe_ctx->stream_res.dsc, i);
 
 		/* The number of DSCs can be less than the number of pipes */
 		if (!pipe_ctx->stream_res.dsc) {
@@ -1838,12 +1859,13 @@ static void swizzle_to_dml_params(
 }
 
 bool dcn20_split_stream_for_odm(
+		const struct dc *dc,
 		struct resource_context *res_ctx,
-		const struct resource_pool *pool,
 		struct pipe_ctx *prev_odm_pipe,
 		struct pipe_ctx *next_odm_pipe)
 {
 	int pipe_idx = next_odm_pipe->pipe_idx;
+	const struct resource_pool *pool = dc->res_pool;
 
 	*next_odm_pipe = *prev_odm_pipe;
 
@@ -1901,7 +1923,7 @@ bool dcn20_split_stream_for_odm(
 	}
 	next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx];
 	if (next_odm_pipe->stream->timing.flags.DSC == 1) {
-		acquire_dsc(res_ctx, pool, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx);
+		dcn20_acquire_dsc(dc, res_ctx, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx);
 		ASSERT(next_odm_pipe->stream_res.dsc);
 		if (next_odm_pipe->stream_res.dsc == NULL)
 			return false;
@@ -1939,8 +1961,6 @@ void dcn20_split_stream_for_mpc(
 	secondary_pipe->top_pipe = primary_pipe;
 
 	ASSERT(primary_pipe->plane_state);
-	resource_build_scaling_params(primary_pipe);
-	resource_build_scaling_params(secondary_pipe);
 }
 
 void dcn20_populate_dml_writeback_from_context(
@@ -2216,12 +2236,12 @@ int dcn20_populate_dml_pipes_from_context(
 					|| pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled;
 			pipes[pipe_cnt].pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90
 					|| pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz;
-			pipes[pipe_cnt].pipe.src.viewport_y_y = scl->viewport.y;
-			pipes[pipe_cnt].pipe.src.viewport_y_c = scl->viewport_c.y;
-			pipes[pipe_cnt].pipe.src.viewport_width = scl->viewport.width;
-			pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width;
-			pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height;
-			pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height;
+			pipes[pipe_cnt].pipe.src.viewport_y_y = scl->viewport_unadjusted.y;
+			pipes[pipe_cnt].pipe.src.viewport_y_c = scl->viewport_c_unadjusted.y;
+			pipes[pipe_cnt].pipe.src.viewport_width = scl->viewport_unadjusted.width;
+			pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c_unadjusted.width;
+			pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport_unadjusted.height;
+			pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c_unadjusted.height;
 			pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width;
 			pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height;
 			pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width;
@@ -2570,13 +2590,15 @@ int dcn20_validate_apply_pipe_split_flags(
 		struct dc *dc,
 		struct dc_state *context,
 		int vlevel,
-		bool *split,
+		int *split,
 		bool *merge)
 {
 	int i, pipe_idx, vlevel_split;
 	int plane_count = 0;
 	bool force_split = false;
 	bool avoid_split = dc->debug.pipe_split_policy == MPC_SPLIT_AVOID;
+	struct vba_vars_st *v = &context->bw_ctx.dml.vba;
+	int max_mpc_comb = v->maxMpcComb;
 
 	if (context->stream_count > 1) {
 		if (dc->debug.pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP)
@@ -2584,10 +2606,22 @@ int dcn20_validate_apply_pipe_split_flags(
 	} else if (dc->debug.force_single_disp_pipe_split)
 			force_split = true;
 
-	/* TODO: fix dc bugs and remove this split threshold thing */
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
+		/**
+		 * Workaround for avoiding pipe-split in cases where we'd split
+		 * planes that are too small, resulting in splits that aren't
+		 * valid for the scaler.
+		 */
+		if (pipe->plane_state &&
+		    (pipe->plane_state->dst_rect.width <= 16 ||
+		     pipe->plane_state->dst_rect.height <= 16 ||
+		     pipe->plane_state->src_rect.width <= 16 ||
+		     pipe->plane_state->src_rect.height <= 16))
+			avoid_split = true;
+
+		/* TODO: fix dc bugs and remove this split threshold thing */
 		if (pipe->stream && !pipe->prev_odm_pipe &&
 				(!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state))
 			++plane_count;
@@ -2602,26 +2636,35 @@ int dcn20_validate_apply_pipe_split_flags(
 				continue;
 
 			for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++)
-				if (context->bw_ctx.dml.vba.NoOfDPP[vlevel][0][pipe_idx] == 1)
+				if (v->NoOfDPP[vlevel][0][pipe_idx] == 1 &&
+						v->ModeSupport[vlevel][0])
 					break;
 			/* Impossible to not split this pipe */
 			if (vlevel > context->bw_ctx.dml.soc.num_states)
 				vlevel = vlevel_split;
+			else
+				max_mpc_comb = 0;
 			pipe_idx++;
 		}
-		context->bw_ctx.dml.vba.maxMpcComb = 0;
+		v->maxMpcComb = max_mpc_comb;
 	}
 
 	/* Split loop sets which pipe should be split based on dml outputs and dc flags */
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		int pipe_plane = context->bw_ctx.dml.vba.pipe_plane[pipe_idx];
+		int pipe_plane = v->pipe_plane[pipe_idx];
+		bool split4mpc = context->stream_count == 1 && plane_count == 1
+				&& dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4;
 
 		if (!context->res_ctx.pipe_ctx[i].stream)
 			continue;
 
-		if (force_split || context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_plane] > 1)
-			split[i] = true;
+		if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] > 1) {
+			if (split4mpc)
+				split[i] = 4;
+			else
+				split[i] = 2;
+		}
 		if ((pipe->stream->view_format ==
 				VIEW_3D_FORMAT_SIDE_BY_SIDE ||
 				pipe->stream->view_format ==
@@ -2630,50 +2673,75 @@ int dcn20_validate_apply_pipe_split_flags(
 				TIMING_3D_FORMAT_TOP_AND_BOTTOM ||
 				 pipe->stream->timing.timing_3d_format ==
 				TIMING_3D_FORMAT_SIDE_BY_SIDE))
-			split[i] = true;
+			split[i] = 2;
 		if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) {
-			split[i] = true;
-			context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1;
+			split[i] = 2;
+			v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1;
 		}
-		context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_plane] =
-			context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_plane];
-
-		if (pipe->prev_odm_pipe && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_plane] != dm_odm_combine_mode_disabled) {
-			/*Already split odm pipe tree, don't try to split again*/
-			split[i] = false;
-			split[pipe->prev_odm_pipe->pipe_idx] = false;
-		} else if (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state
-				&& context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) {
-			/*Already split mpc tree, don't try to split again, assumes only 2x mpc combine*/
-			split[i] = false;
-			split[pipe->top_pipe->pipe_idx] = false;
-		} else if (pipe->prev_odm_pipe || (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state)) {
-			if (split[i] == false) {
-				/*Exiting mpc/odm combine*/
-				merge[i] = true;
+		v->ODMCombineEnabled[pipe_plane] =
+			v->ODMCombineEnablePerState[vlevel][pipe_plane];
+
+		if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) {
+			if (get_num_mpc_splits(pipe) == 1) {
+				/*If need split for mpc but 2 way split already*/
+				if (split[i] == 4)
+					split[i] = 2; /* 2 -> 4 MPC */
+				else if (split[i] == 2)
+					split[i] = 0; /* 2 -> 2 MPC */
+				else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state)
+					merge[i] = true; /* 2 -> 1 MPC */
+			} else if (get_num_mpc_splits(pipe) == 3) {
+				/*If need split for mpc but 4 way split already*/
+				if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe)
+						|| !pipe->bottom_pipe)) {
+					merge[i] = true; /* 4 -> 2 MPC */
+				} else if (split[i] == 0 && pipe->top_pipe &&
+						pipe->top_pipe->plane_state == pipe->plane_state)
+					merge[i] = true; /* 4 -> 1 MPC */
+				split[i] = 0;
+			} else if (get_num_odm_splits(pipe)) {
+				/* ODM -> MPC transition */
+				ASSERT(0); /* NOT expected yet */
 				if (pipe->prev_odm_pipe) {
-					ASSERT(0); /*should not actually happen yet*/
-					merge[pipe->prev_odm_pipe->pipe_idx] = true;
-				} else
-					merge[pipe->top_pipe->pipe_idx] = true;
-			} else {
-				/*Transition from mpc combine to odm combine or vice versa*/
-				ASSERT(0); /*should not actually happen yet*/
-				split[i] = true;
-				merge[i] = true;
-				if (pipe->prev_odm_pipe) {
-					split[pipe->prev_odm_pipe->pipe_idx] = true;
-					merge[pipe->prev_odm_pipe->pipe_idx] = true;
-				} else {
-					split[pipe->top_pipe->pipe_idx] = true;
-					merge[pipe->top_pipe->pipe_idx] = true;
+					split[i] = 0;
+					merge[i] = true;
+				}
+			}
+		} else {
+			if (get_num_odm_splits(pipe) == 1) {
+				/*If need split for odm but 2 way split already*/
+				if (split[i] == 4)
+					split[i] = 2; /* 2 -> 4 ODM */
+				else if (split[i] == 2)
+					split[i] = 0; /* 2 -> 2 ODM */
+				else if (pipe->prev_odm_pipe) {
+					ASSERT(0); /* NOT expected yet */
+					merge[i] = true; /* exit ODM */
+				}
+			} else if (get_num_odm_splits(pipe) == 3) {
+				/*If need split for odm but 4 way split already*/
+				if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe)
+						|| !pipe->next_odm_pipe)) {
+					ASSERT(0); /* NOT expected yet */
+					merge[i] = true; /* 4 -> 2 ODM */
+				} else if (split[i] == 0 && pipe->prev_odm_pipe) {
+					ASSERT(0); /* NOT expected yet */
+					merge[i] = true; /* exit ODM */
+				}
+				split[i] = 0;
+			} else if (get_num_mpc_splits(pipe)) {
+				/* MPC -> ODM transition */
+				ASSERT(0); /* NOT expected yet */
+				if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
+					split[i] = 0;
+					merge[i] = true;
 				}
 			}
 		}
 
 		/* Adjust dppclk when split is forced, do not bother with dispclk */
-		if (split[i] && context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] == 1)
-			context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] /= 2;
+		if (split[i] != 0 && v->NoOfDPP[vlevel][max_mpc_comb][pipe_idx] == 1)
+			v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2;
 		pipe_idx++;
 	}
 
@@ -2689,7 +2757,7 @@ bool dcn20_fast_validate_bw(
 		int *vlevel_out)
 {
 	bool out = false;
-	bool split[MAX_PIPES] = { false };
+	int split[MAX_PIPES] = { 0 };
 	int pipe_cnt, i, pipe_idx, vlevel;
 
 	ASSERT(pipes);
@@ -2731,7 +2799,7 @@ bool dcn20_fast_validate_bw(
 			hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe);
 			ASSERT(hsplit_pipe);
 			if (!dcn20_split_stream_for_odm(
-					&context->res_ctx, dc->res_pool,
+					dc, &context->res_ctx,
 					pipe, hsplit_pipe))
 				goto validate_fail;
 			pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx;
@@ -2749,7 +2817,7 @@ bool dcn20_fast_validate_bw(
 				&& context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx])
 			goto validate_fail;
 
-		if (split[i]) {
+		if (split[i] == 2) {
 			if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) {
 				/* pipe not split previously needs split */
 				hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe);
@@ -2760,14 +2828,17 @@ bool dcn20_fast_validate_bw(
 				}
 				if (context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) {
 					if (!dcn20_split_stream_for_odm(
-							&context->res_ctx, dc->res_pool,
+							dc, &context->res_ctx,
 							pipe, hsplit_pipe))
 						goto validate_fail;
 					dcn20_build_mapped_resource(dc, context, pipe->stream);
-				} else
+				} else {
 					dcn20_split_stream_for_mpc(
-						&context->res_ctx, dc->res_pool,
-						pipe, hsplit_pipe);
+							&context->res_ctx, dc->res_pool,
+							pipe, hsplit_pipe);
+					if (!resource_build_scaling_params(pipe) || !resource_build_scaling_params(hsplit_pipe))
+						goto validate_fail;
+				}
 				pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx;
 			}
 		} else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) {
@@ -3007,7 +3078,7 @@ void dcn20_calculate_dlg_params(
 				pipe_idx,
 				cstate_en,
 				context->bw_ctx.bw.dcn.clk.p_state_change_support,
-				false, false, false);
+				false, false, true);
 
 		context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml,
 				&context->res_ctx.pipe_ctx[i].rq_regs,
@@ -3091,6 +3162,8 @@ static noinline bool dcn20_validate_bandwidth_fp(struct dc *dc,
 	p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us;
 	context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support =
 		dc->debug.disable_dram_clock_change_vactive_support;
+	context->bw_ctx.dml.soc.allow_dram_clock_one_display_vactive =
+		dc->debug.enable_dram_clock_change_one_display_vactive;
 
 	if (fast_validate) {
 		return dcn20_validate_bandwidth_internal(dc, context, true);
@@ -3189,8 +3262,6 @@ static struct dc_cap_funcs cap_funcs = {
 
 enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state)
 {
-	enum dc_status result = DC_OK;
-
 	enum surface_pixel_format surf_pix_format = plane_state->format;
 	unsigned int bpp = resource_pixel_format_to_bpp(surf_pix_format);
 
@@ -3202,12 +3273,13 @@ enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_stat
 		swizzle = DC_SW_64KB_S;
 
 	plane_state->tiling_info.gfx9.swizzle = swizzle;
-	return result;
+	return DC_OK;
 }
 
 static struct resource_funcs dcn20_res_pool_funcs = {
 	.destroy = dcn20_destroy_resource_pool,
 	.link_enc_create = dcn20_link_encoder_create,
+	.panel_cntl_create = dcn20_panel_cntl_create,
 	.validate_bandwidth = dcn20_validate_bandwidth,
 	.acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
 	.add_stream_to_ctx = dcn20_add_stream_to_ctx,
@@ -3446,6 +3518,13 @@ void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st
 		bb->dram_clock_change_latency_us =
 				dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
 	}
+
+	if ((int)(bb->dummy_pstate_latency_us * 1000)
+				!= dc->bb_overrides.dummy_clock_change_latency_ns
+			&& dc->bb_overrides.dummy_clock_change_latency_ns) {
+		bb->dummy_pstate_latency_us =
+				dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0;
+	}
 }
 
 static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb(
@@ -3681,9 +3760,42 @@ static bool dcn20_resource_construct(
 	dc->caps.max_slave_planes = 1;
 	dc->caps.post_blend_color_processing = true;
 	dc->caps.force_dp_tps4_for_cp2520 = true;
-	dc->caps.hw_3d_lut = true;
 	dc->caps.extended_aux_timeout_support = true;
 
+	/* Color pipeline capabilities */
+	dc->caps.color.dpp.dcn_arch = 1;
+	dc->caps.color.dpp.input_lut_shared = 0;
+	dc->caps.color.dpp.icsc = 1;
+	dc->caps.color.dpp.dgam_ram = 1;
+	dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
+	dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
+	dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0;
+	dc->caps.color.dpp.dgam_rom_caps.pq = 0;
+	dc->caps.color.dpp.dgam_rom_caps.hlg = 0;
+	dc->caps.color.dpp.post_csc = 0;
+	dc->caps.color.dpp.gamma_corr = 0;
+
+	dc->caps.color.dpp.hw_3d_lut = 1;
+	dc->caps.color.dpp.ogam_ram = 1;
+	// no OGAM ROM on DCN2, only MPC ROM
+	dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
+	dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
+	dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
+	dc->caps.color.dpp.ogam_rom_caps.pq = 0;
+	dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
+	dc->caps.color.dpp.ocsc = 0;
+
+	dc->caps.color.mpc.gamut_remap = 0;
+	dc->caps.color.mpc.num_3dluts = 0;
+	dc->caps.color.mpc.shared_3d_lut = 0;
+	dc->caps.color.mpc.ogam_ram = 1;
+	dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
+	dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
+	dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
+	dc->caps.color.mpc.ogam_rom_caps.pq = 0;
+	dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
+	dc->caps.color.mpc.ocsc = 1;
+
 	if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) {
 		dc->debug = debug_defaults_drv;
 	} else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
index 9d5bff9455fd..2c1959845c29 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
@@ -123,7 +123,7 @@ int dcn20_validate_apply_pipe_split_flags(
 		struct dc *dc,
 		struct dc_state *context,
 		int vlevel,
-		bool *split,
+		int *split,
 		bool *merge);
 void dcn20_release_dsc(struct resource_context *res_ctx,
 			const struct resource_pool *pool,
@@ -135,10 +135,14 @@ void dcn20_split_stream_for_mpc(
 		struct pipe_ctx *primary_pipe,
 		struct pipe_ctx *secondary_pipe);
 bool dcn20_split_stream_for_odm(
+		const struct dc *dc,
 		struct resource_context *res_ctx,
-		const struct resource_pool *pool,
 		struct pipe_ctx *prev_odm_pipe,
 		struct pipe_ctx *next_odm_pipe);
+void dcn20_acquire_dsc(const struct dc *dc,
+			struct resource_context *res_ctx,
+			struct display_stream_compressor **dsc,
+			int pipe_idx);
 struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc,
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
index 5e2d14b897af..129f0b62f751 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
@@ -49,11 +49,6 @@
 #define FN(reg_name, field_name) \
 	hubbub1->shifts->field_name, hubbub1->masks->field_name
 
-#ifdef NUM_VMID
-#undef NUM_VMID
-#endif
-#define NUM_VMID 16
-
 static uint32_t convert_and_clamp(
 	uint32_t wm_ns,
 	uint32_t refclk_mhz,
@@ -138,7 +133,7 @@ int hubbub21_init_dchub(struct hubbub *hubbub,
 
 	dcn21_dchvm_init(hubbub);
 
-	return NUM_VMID;
+	return hubbub1->num_vmid;
 }
 
 bool hubbub21_program_urgent_watermarks(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
index d285ba622d61..960a0716dde5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
@@ -778,21 +778,28 @@ void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_
 {
 	struct dc_dmub_srv *dmcub = hubp->ctx->dmub_srv;
 	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
-	struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa = { 0 };
-
-	PLAT_54186_wa.header.type = DMUB_CMD__PLAT_54186_WA;
-	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS;
-	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C;
-	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH;
-	PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C;
-	PLAT_54186_wa.flip.flip_params.grph_stereo = flip_regs->grph_stereo;
-	PLAT_54186_wa.flip.flip_params.hubp_inst = hubp->inst;
-	PLAT_54186_wa.flip.flip_params.immediate = flip_regs->immediate;
-	PLAT_54186_wa.flip.flip_params.tmz_surface = flip_regs->tmz_surface;
-	PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid;
+	union dmub_rb_cmd cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+
+	cmd.PLAT_54186_wa.header.type = DMUB_CMD__PLAT_54186_WA;
+	cmd.PLAT_54186_wa.header.payload_bytes = sizeof(cmd.PLAT_54186_wa.flip);
+	cmd.PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS =
+		flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS;
+	cmd.PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C =
+		flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C;
+	cmd.PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH =
+		flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH;
+	cmd.PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C =
+		flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C;
+	cmd.PLAT_54186_wa.flip.flip_params.grph_stereo = flip_regs->grph_stereo;
+	cmd.PLAT_54186_wa.flip.flip_params.hubp_inst = hubp->inst;
+	cmd.PLAT_54186_wa.flip.flip_params.immediate = flip_regs->immediate;
+	cmd.PLAT_54186_wa.flip.flip_params.tmz_surface = flip_regs->tmz_surface;
+	cmd.PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid;
 
 	PERF_TRACE();  // TODO: remove after performance is stable.
-	dc_dmub_srv_cmd_queue(dmcub, &PLAT_54186_wa.header);
+	dc_dmub_srv_cmd_queue(dmcub, &cmd);
 	PERF_TRACE();  // TODO: remove after performance is stable.
 	dc_dmub_srv_cmd_execute(dmcub);
 	PERF_TRACE();  // TODO: remove after performance is stable.
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
index 4dd634118df2..a5baef7e7a7d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
@@ -87,11 +87,9 @@ static const struct hw_sequencer_funcs dcn21_funcs = {
 	.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
 	.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
 	.calc_vupdate_position = dcn10_calc_vupdate_position,
-	.set_cursor_position = dcn10_set_cursor_position,
-	.set_cursor_attribute = dcn10_set_cursor_attribute,
-	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
-	.optimize_pwr_state = dcn21_optimize_pwr_state,
-	.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
+	.power_down = dce110_power_down,
+	.set_backlight_level = dce110_set_backlight_level,
+	.set_abm_immediate_disable = dce110_set_abm_immediate_disable,
 };
 
 static const struct hwseq_private_funcs dcn21_private_funcs = {
@@ -107,8 +105,6 @@ static const struct hwseq_private_funcs dcn21_private_funcs = {
 	.reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap,
 	.enable_stream_timing = dcn20_enable_stream_timing,
 	.edp_backlight_control = dce110_edp_backlight_control,
-	.is_panel_backlight_on = dce110_is_panel_backlight_on,
-	.is_panel_powered_on = dce110_is_panel_powered_on,
 	.disable_stream_gating = dcn20_disable_stream_gating,
 	.enable_stream_gating = dcn20_enable_stream_gating,
 	.setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c
index e45683ac871a..aa46c35b05a2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_link_encoder.c
@@ -203,29 +203,6 @@ static bool update_cfg_data(
 	return true;
 }
 
-void dcn21_link_encoder_get_max_link_cap(struct link_encoder *enc,
-	struct dc_link_settings *link_settings)
-{
-	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
-	uint32_t value;
-
-	REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &value);
-
-	if (!value && link_settings->lane_count > LANE_COUNT_TWO)
-		link_settings->lane_count = LANE_COUNT_TWO;
-}
-
-bool dcn21_link_encoder_is_in_alt_mode(struct link_encoder *enc)
-{
-	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
-	uint32_t value;
-
-	REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &value);
-
-	// if value == 1 alt mode is disabled, otherwise it is enabled
-	return !value;
-}
-
 bool dcn21_link_encoder_acquire_phy(struct link_encoder *enc)
 {
 	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
@@ -348,8 +325,8 @@ static const struct link_encoder_funcs dcn21_link_enc_funcs = {
 	.fec_set_ready = enc2_fec_set_ready,
 	.fec_is_active = enc2_fec_is_active,
 	.get_dig_frontend = dcn10_get_dig_frontend,
-	.is_in_alt_mode = dcn21_link_encoder_is_in_alt_mode,
-	.get_max_link_cap = dcn21_link_encoder_get_max_link_cap,
+	.is_in_alt_mode = dcn20_link_encoder_is_in_alt_mode,
+	.get_max_link_cap = dcn20_link_encoder_get_max_link_cap,
 };
 
 void dcn21_link_encoder_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index a721bb401ef0..f00a56835084 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -61,6 +61,7 @@
 #include "dcn21_hubbub.h"
 #include "dcn10/dcn10_resource.h"
 #include "dce110/dce110_resource.h"
+#include "dce/dce_panel_cntl.h"
 
 #include "dcn20/dcn20_dwb.h"
 #include "dcn20/dcn20_mmhubbub.h"
@@ -85,6 +86,7 @@
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
 #include "dce/dmub_psr.h"
+#include "dce/dmub_abm.h"
 
 #define SOC_BOUNDING_BOX_VALID false
 #define DC_LOGGER_INIT(logger)
@@ -803,7 +805,7 @@ static const struct resource_caps res_cap_rn = {
 		.num_pll = 5,  // maybe 3 because the last two used for USB-c
 		.num_dwb = 1,
 		.num_ddc = 5,
-		.num_vmid = 1,
+		.num_vmid = 16,
 		.num_dsc = 3,
 };
 
@@ -995,9 +997,12 @@ static void dcn21_resource_destruct(struct dcn21_resource_pool *pool)
 		pool->base.dp_clock_source = NULL;
 	}
 
-
-	if (pool->base.abm != NULL)
-		dce_abm_destroy(&pool->base.abm);
+	if (pool->base.abm != NULL) {
+		if (pool->base.abm->ctx->dc->config.disable_dmcu)
+			dmub_abm_destroy(&pool->base.abm);
+		else
+			dce_abm_destroy(&pool->base.abm);
+	}
 
 	if (pool->base.dmcu != NULL)
 		dce_dmcu_destroy(&pool->base.dmcu);
@@ -1290,6 +1295,7 @@ static struct hubbub *dcn21_hubbub_create(struct dc_context *ctx)
 		vmid->shifts = &vmid_shifts;
 		vmid->masks = &vmid_masks;
 	}
+	hubbub->num_vmid = res_cap_rn.num_vmid;
 
 	return &hubbub->base;
 }
@@ -1379,7 +1385,8 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
 	struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool);
 	struct clk_limit_table *clk_table = &bw_params->clk_table;
 	struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
-	unsigned int i, j, closest_clk_lvl;
+	unsigned int i, closest_clk_lvl;
+	int j;
 
 	// Default clock levels are used for diags, which may lead to overclocking.
 	if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
@@ -1591,6 +1598,18 @@ static const struct dcn10_link_enc_registers link_enc_regs[] = {
 	link_regs(4, E),
 };
 
+static const struct dce_panel_cntl_registers panel_cntl_regs[] = {
+	{ DCN_PANEL_CNTL_REG_LIST() }
+};
+
+static const struct dce_panel_cntl_shift panel_cntl_shift = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_panel_cntl_mask panel_cntl_mask = {
+	DCE_PANEL_CNTL_MASK_SH_LIST(_MASK)
+};
+
 #define aux_regs(id)\
 [id] = {\
 	DCN2_AUX_REG_LIST(id)\
@@ -1676,6 +1695,24 @@ static struct link_encoder *dcn21_link_encoder_create(
 
 	return &enc21->enc10.base;
 }
+
+static struct panel_cntl *dcn21_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+	struct dce_panel_cntl *panel_cntl =
+		kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL);
+
+	if (!panel_cntl)
+		return NULL;
+
+	dce_panel_cntl_construct(panel_cntl,
+			init_data,
+			&panel_cntl_regs[init_data->inst],
+			&panel_cntl_shift,
+			&panel_cntl_mask);
+
+	return &panel_cntl->base;
+}
+
 #define CTX ctx
 
 #define REG(reg_name) \
@@ -1694,12 +1731,8 @@ static int dcn21_populate_dml_pipes_from_context(
 {
 	uint32_t pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes);
 	int i;
-	struct resource_context *res_ctx = &context->res_ctx;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 
-		if (!res_ctx->pipe_ctx[i].stream)
-			continue;
+	for (i = 0; i < pipe_cnt; i++) {
 
 		pipes[i].pipe.src.hostvm = 1;
 		pipes[i].pipe.src.gpuvm = 1;
@@ -1724,6 +1757,7 @@ enum dc_status dcn21_patch_unknown_plane_state(struct dc_plane_state *plane_stat
 static struct resource_funcs dcn21_res_pool_funcs = {
 	.destroy = dcn21_destroy_resource_pool,
 	.link_enc_create = dcn21_link_encoder_create,
+	.panel_cntl_create = dcn21_panel_cntl_create,
 	.validate_bandwidth = dcn21_validate_bandwidth,
 	.populate_dml_pipes = dcn21_populate_dml_pipes_from_context,
 	.add_stream_to_ctx = dcn20_add_stream_to_ctx,
@@ -1770,7 +1804,6 @@ static bool dcn21_resource_construct(
 	dc->caps.i2c_speed_in_khz = 100;
 	dc->caps.max_cursor_size = 256;
 	dc->caps.dmdata_alloc_size = 2048;
-	dc->caps.hw_3d_lut = true;
 
 	dc->caps.max_slave_planes = 1;
 	dc->caps.post_blend_color_processing = true;
@@ -1779,6 +1812,40 @@ static bool dcn21_resource_construct(
 	dc->caps.dmcub_support = true;
 	dc->caps.is_apu = true;
 
+	/* Color pipeline capabilities */
+	dc->caps.color.dpp.dcn_arch = 1;
+	dc->caps.color.dpp.input_lut_shared = 0;
+	dc->caps.color.dpp.icsc = 1;
+	dc->caps.color.dpp.dgam_ram = 1;
+	dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
+	dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
+	dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0;
+	dc->caps.color.dpp.dgam_rom_caps.pq = 0;
+	dc->caps.color.dpp.dgam_rom_caps.hlg = 0;
+	dc->caps.color.dpp.post_csc = 0;
+	dc->caps.color.dpp.gamma_corr = 0;
+
+	dc->caps.color.dpp.hw_3d_lut = 1;
+	dc->caps.color.dpp.ogam_ram = 1;
+	// no OGAM ROM on DCN2
+	dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
+	dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
+	dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
+	dc->caps.color.dpp.ogam_rom_caps.pq = 0;
+	dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
+	dc->caps.color.dpp.ocsc = 0;
+
+	dc->caps.color.mpc.gamut_remap = 0;
+	dc->caps.color.mpc.num_3dluts = 0;
+	dc->caps.color.mpc.shared_3d_lut = 0;
+	dc->caps.color.mpc.ogam_ram = 1;
+	dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
+	dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
+	dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
+	dc->caps.color.mpc.ogam_rom_caps.pq = 0;
+	dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
+	dc->caps.color.mpc.ocsc = 1;
+
 	if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
 		dc->debug = debug_defaults_drv;
 	else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) {
@@ -1831,17 +1898,19 @@ static bool dcn21_resource_construct(
 		goto create_fail;
 	}
 
-	pool->base.dmcu = dcn21_dmcu_create(ctx,
-			&dmcu_regs,
-			&dmcu_shift,
-			&dmcu_mask);
-	if (pool->base.dmcu == NULL) {
-		dm_error("DC: failed to create dmcu!\n");
-		BREAK_TO_DEBUGGER();
-		goto create_fail;
+	if (!dc->config.disable_dmcu) {
+		pool->base.dmcu = dcn21_dmcu_create(ctx,
+				&dmcu_regs,
+				&dmcu_shift,
+				&dmcu_mask);
+		if (pool->base.dmcu == NULL) {
+			dm_error("DC: failed to create dmcu!\n");
+			BREAK_TO_DEBUGGER();
+			goto create_fail;
+		}
 	}
 
-	if (dc->debug.disable_dmcu) {
+	if (dc->config.disable_dmcu) {
 		pool->base.psr = dmub_psr_create(ctx);
 
 		if (pool->base.psr == NULL) {
@@ -1851,15 +1920,16 @@ static bool dcn21_resource_construct(
 		}
 	}
 
-	pool->base.abm = dce_abm_create(ctx,
+	if (dc->config.disable_dmcu)
+		pool->base.abm = dmub_abm_create(ctx,
+			&abm_regs,
+			&abm_shift,
+			&abm_mask);
+	else
+		pool->base.abm = dce_abm_create(ctx,
 			&abm_regs,
 			&abm_shift,
 			&abm_mask);
-	if (pool->base.abm == NULL) {
-		dm_error("DC: failed to create abm!\n");
-		BREAK_TO_DEBUGGER();
-		goto create_fail;
-	}
 
 	pool->base.pp_smu = dcn21_pp_smu_create(ctx);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index 5bbbafacc720..80170f9721ce 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -2599,21 +2599,44 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP
 		}
 	}
 
+	{
+	float SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
+	int PlaneWithMinActiveDRAMClockChangeMargin = -1;
+
 	mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
 	for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
 		if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
 				< mode_lib->vba.MinActiveDRAMClockChangeMargin) {
 			mode_lib->vba.MinActiveDRAMClockChangeMargin =
 					mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
+			if (mode_lib->vba.BlendingAndTiming[k] == k) {
+				PlaneWithMinActiveDRAMClockChangeMargin = k;
+			} else {
+				for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
+					if (mode_lib->vba.BlendingAndTiming[k] == j) {
+						PlaneWithMinActiveDRAMClockChangeMargin = j;
+					}
+				}
+			}
 		}
 	}
 
 	mode_lib->vba.MinActiveDRAMClockChangeLatencySupported =
 			mode_lib->vba.MinActiveDRAMClockChangeMargin
 					+ mode_lib->vba.DRAMClockChangeLatency;
+	for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
+		if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (mode_lib->vba.BlendingAndTiming[k] == k))
+				&& !(mode_lib->vba.BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
+				&& mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
+						< SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
+			SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank =
+					mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
+		}
+	}
 
 	if (mode_lib->vba.DRAMClockChangeSupportsVActive &&
 			mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) {
+		mode_lib->vba.DRAMClockChangeWatermark += 25;
 
 		for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
 			if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
@@ -2622,13 +2645,17 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP
 					mode_lib->vba.MinTTUVBlank[k] += 25;
 			}
 		}
-		mode_lib->vba.DRAMClockChangeWatermark += 25;
+
 		mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive;
 	} else if (mode_lib->vba.DummyPStateCheck &&
 			mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
 		mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive;
 	} else {
-		if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
+		if ((mode_lib->vba.SynchronizedVBlank
+				|| mode_lib->vba.NumberOfActivePlanes == 1
+				|| (SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0 &&
+						mode_lib->vba.AllowDramClockChangeOneDisplayVactive))
+					&& mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
 			mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vblank;
 			for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
 				if (!mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k]) {
@@ -2640,6 +2667,7 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP
 			mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_unsupported;
 		}
 	}
+	}
 	for (k = 0; k <= mode_lib->vba.soc.num_states; k++)
 		for (j = 0; j < 2; j++)
 			mode_lib->vba.DRAMClockChangeSupport[k][j] = mode_lib->vba.DRAMClockChangeSupport[0][0];
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index e6617c958bb8..a576eed94d9b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -3190,6 +3190,7 @@ static void CalculateFlipSchedule(
 	double TimeForFetchingRowInVBlankImmediateFlip;
 	double ImmediateFlipBW;
 	double HostVMInefficiencyFactor;
+	double VRatioClamped;
 
 	if (GPUVMEnable == true && HostVMEnable == true) {
 		HostVMInefficiencyFactor =
@@ -3222,31 +3223,32 @@ static void CalculateFlipSchedule(
 
 	*DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
 	*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), (MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
+	VRatioClamped = (VRatio < 1.0) ? 1.0 : VRatio;
 	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
 		if (GPUVMEnable == true && DCCEnable != true) {
 			min_row_time = dml_min(
-					dpte_row_height * LineTime / VRatio,
-					dpte_row_height_chroma * LineTime / (VRatio / 2));
+					dpte_row_height * LineTime / VRatioClamped,
+					dpte_row_height_chroma * LineTime / (VRatioClamped / 2));
 		} else if (GPUVMEnable != true && DCCEnable == true) {
 			min_row_time = dml_min(
-					meta_row_height * LineTime / VRatio,
-					meta_row_height_chroma * LineTime / (VRatio / 2));
+					meta_row_height * LineTime / VRatioClamped,
+					meta_row_height_chroma * LineTime / (VRatioClamped / 2));
 		} else {
 			min_row_time = dml_min4(
-					dpte_row_height * LineTime / VRatio,
-					meta_row_height * LineTime / VRatio,
-					dpte_row_height_chroma * LineTime / (VRatio / 2),
-					meta_row_height_chroma * LineTime / (VRatio / 2));
+					dpte_row_height * LineTime / VRatioClamped,
+					meta_row_height * LineTime / VRatioClamped,
+					dpte_row_height_chroma * LineTime / (VRatioClamped / 2),
+					meta_row_height_chroma * LineTime / (VRatioClamped / 2));
 		}
 	} else {
 		if (GPUVMEnable == true && DCCEnable != true) {
-			min_row_time = dpte_row_height * LineTime / VRatio;
+			min_row_time = dpte_row_height * LineTime / VRatioClamped;
 		} else if (GPUVMEnable != true && DCCEnable == true) {
-			min_row_time = meta_row_height * LineTime / VRatio;
+			min_row_time = meta_row_height * LineTime / VRatioClamped;
 		} else {
 			min_row_time = dml_min(
-					dpte_row_height * LineTime / VRatio,
-					meta_row_height * LineTime / VRatio);
+					dpte_row_height * LineTime / VRatioClamped,
+					meta_row_height * LineTime / VRatioClamped);
 		}
 	}
 
@@ -5944,7 +5946,7 @@ static void CalculateMetaAndPTETimes(
 						* PixelPTEReqHeightY[k];
 			}
 			dpte_groups_per_row_luma_ub = dml_ceil(
-					dpte_row_width_luma_ub[k] / dpte_group_width_luma,
+					(float) dpte_row_width_luma_ub[k] / dpte_group_width_luma,
 					1);
 			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k]
 					/ PixelClock[k] / dpte_groups_per_row_luma_ub;
@@ -5968,7 +5970,7 @@ static void CalculateMetaAndPTETimes(
 							* PixelPTEReqHeightC[k];
 				}
 				dpte_groups_per_row_chroma_ub = dml_ceil(
-						dpte_row_width_chroma_ub[k]
+						(float) dpte_row_width_chroma_ub[k]
 								/ dpte_group_width_chroma,
 						1);
 				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k]
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
index b8ec08e3b7a3..90a5fefef05b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
@@ -1490,19 +1490,30 @@ static void dml_rq_dlg_get_dlg_params(
 	disp_dlg_regs->refcyc_per_pte_group_vblank_l =
 			(unsigned int) (dst_y_per_row_vblank * (double) htotal
 					* ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l);
-	ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
+	if ((refclk_freq_in_mhz / ref_freq_to_pix_freq < 28) &&
+			disp_dlg_regs->refcyc_per_pte_group_vblank_l >= (unsigned int)dml_pow(2, 13))
+		disp_dlg_regs->refcyc_per_pte_group_vblank_l = (1 << 13) - 1;
+	else
+		ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
 
 	if (dual_plane) {
 		disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank
 				* (double) htotal * ref_freq_to_pix_freq
 				/ (double) dpte_groups_per_row_ub_c);
-		ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c
+		if ((refclk_freq_in_mhz / ref_freq_to_pix_freq < 28) &&
+				disp_dlg_regs->refcyc_per_pte_group_vblank_c >= (unsigned int)dml_pow(2, 13))
+			disp_dlg_regs->refcyc_per_pte_group_vblank_c = (1 << 13) - 1;
+		else
+			ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c
 				< (unsigned int)dml_pow(2, 13));
 	}
 
-	disp_dlg_regs->refcyc_per_meta_chunk_vblank_l =
+	if (src->dcc)
+		disp_dlg_regs->refcyc_per_meta_chunk_vblank_l =
 			(unsigned int) (dst_y_per_row_vblank * (double) htotal
 					* ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l);
+	else
+		disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = 0;
 	ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13));
 
 	disp_dlg_regs->refcyc_per_meta_chunk_vblank_c =
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index 687010c17324..439ffd04be34 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -118,9 +118,11 @@ struct _vcs_dpi_soc_bounding_box_st {
 	double urgent_latency_adjustment_fabric_clock_component_us;
 	double urgent_latency_adjustment_fabric_clock_reference_mhz;
 	bool disable_dram_clock_change_vactive_support;
+	bool allow_dram_clock_one_display_vactive;
 };
 
 struct _vcs_dpi_ip_params_st {
+	bool use_min_dcfclk;
 	bool gpuvm_enable;
 	bool hostvm_enable;
 	unsigned int gpuvm_max_page_table_levels;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 6b525c52124c..b19988f54721 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -224,6 +224,7 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib)
 	mode_lib->vba.DummyPStateCheck = soc->dram_clock_change_latency_us == soc->dummy_pstate_latency_us;
 	mode_lib->vba.DRAMClockChangeSupportsVActive = !soc->disable_dram_clock_change_vactive_support ||
 			mode_lib->vba.DummyPStateCheck;
+	mode_lib->vba.AllowDramClockChangeOneDisplayVactive = soc->allow_dram_clock_one_display_vactive;
 
 	mode_lib->vba.Downspreading = soc->downspread_percent;
 	mode_lib->vba.DRAMChannelWidth = soc->dram_channel_width_bytes;   // new!
@@ -280,6 +281,7 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib)
 	ip_params_st *ip = &mode_lib->vba.ip;
 
 	// IP Parameters
+	mode_lib->vba.UseMinimumRequiredDCFCLK = ip->use_min_dcfclk;
 	mode_lib->vba.MaxNumDPP = ip->max_num_dpp;
 	mode_lib->vba.MaxNumOTG = ip->max_num_otg;
 	mode_lib->vba.MaxNumHDMIFRLOutputs = ip->max_num_hdmi_frl_outputs;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 3a734171f083..3f559e725ab1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -896,6 +896,8 @@ struct vba_vars_st {
 	bool dummystring[DC__NUM_DPP__MAX];
 	double BPP;
 	enum odm_combine_policy ODMCombinePolicy;
+	bool UseMinimumRequiredDCFCLK;
+	bool AllowDramClockChangeOneDisplayVactive;
 };
 
 bool CalculateMinAndMaxPrefetchMode(
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index 87d682d25278..0ea6662a1563 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -129,7 +129,7 @@ static bool dsc_line_buff_depth_from_dpcd(int dpcd_line_buff_bit_depth, int *lin
 static bool dsc_throughput_from_dpcd(int dpcd_throughput, int *throughput)
 {
 	switch (dpcd_throughput) {
-	case DP_DSC_THROUGHPUT_MODE_0_UPSUPPORTED:
+	case DP_DSC_THROUGHPUT_MODE_0_UNSUPPORTED:
 		*throughput = 0;
 		break;
 	case DP_DSC_THROUGHPUT_MODE_0_170:
diff --git a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
index 6f730b5bfe42..5e384a8a83dc 100644
--- a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
@@ -322,3 +322,92 @@ static const struct protection_properties dp_11_protection = {
 	.process_transaction = dp_11_process_transaction
 };
 
+static const struct protection_properties *get_protection_properties_by_signal(
+	struct dc_link *link,
+	enum signal_type st,
+	enum hdcp_version version)
+{
+	switch (version) {
+	case HDCP_VERSION_14:
+		switch (st) {
+		case SIGNAL_TYPE_DVI_SINGLE_LINK:
+		case SIGNAL_TYPE_DVI_DUAL_LINK:
+		case SIGNAL_TYPE_HDMI_TYPE_A:
+			return &hdmi_14_protection;
+		case SIGNAL_TYPE_DISPLAY_PORT:
+			if (link &&
+				(link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_VGA_CONVERTER ||
+				link->dpcd_caps.dongle_caps.dongle_type == DISPLAY_DONGLE_DP_VGA_CONVERTER)) {
+				return &non_supported_protection;
+			}
+			return &dp_11_protection;
+		case SIGNAL_TYPE_DISPLAY_PORT_MST:
+		case SIGNAL_TYPE_EDP:
+			return &dp_11_protection;
+		default:
+			return &non_supported_protection;
+		}
+		break;
+	case HDCP_VERSION_22:
+		switch (st) {
+		case SIGNAL_TYPE_DVI_SINGLE_LINK:
+		case SIGNAL_TYPE_DVI_DUAL_LINK:
+		case SIGNAL_TYPE_HDMI_TYPE_A:
+			return &hdmi_14_protection; //todo version2.2
+		case SIGNAL_TYPE_DISPLAY_PORT:
+		case SIGNAL_TYPE_DISPLAY_PORT_MST:
+		case SIGNAL_TYPE_EDP:
+			return &dp_11_protection;  //todo version2.2
+		default:
+			return &non_supported_protection;
+		}
+		break;
+	default:
+		return &non_supported_protection;
+	}
+}
+
+enum hdcp_message_status dc_process_hdcp_msg(
+	enum signal_type signal,
+	struct dc_link *link,
+	struct hdcp_protection_message *message_info)
+{
+	enum hdcp_message_status status = HDCP_MESSAGE_FAILURE;
+	uint32_t i = 0;
+
+	const struct protection_properties *protection_props;
+
+	if (!message_info)
+		return HDCP_MESSAGE_UNSUPPORTED;
+
+	if (message_info->msg_id < HDCP_MESSAGE_ID_READ_BKSV ||
+		message_info->msg_id >= HDCP_MESSAGE_ID_MAX)
+		return HDCP_MESSAGE_UNSUPPORTED;
+
+	protection_props =
+		get_protection_properties_by_signal(
+			link,
+			signal,
+			message_info->version);
+
+	if (!protection_props->supported)
+		return HDCP_MESSAGE_UNSUPPORTED;
+
+	if (protection_props->process_transaction(
+		link,
+		message_info)) {
+		status = HDCP_MESSAGE_SUCCESS;
+	} else {
+		for (i = 0; i < message_info->max_retries; i++) {
+			if (protection_props->process_transaction(
+						link,
+						message_info)) {
+				status = HDCP_MESSAGE_SUCCESS;
+				break;
+			}
+		}
+	}
+
+	return status;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index d523fc9547e7..c7fd702a4a87 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -38,6 +38,7 @@
 #endif
 #include "dwb.h"
 #include "mcif_wb.h"
+#include "panel_cntl.h"
 
 #define MAX_CLOCK_SOURCES 7
 
@@ -92,6 +93,8 @@ struct clk_bw_params;
 struct resource_funcs {
 	void (*destroy)(struct resource_pool **pool);
 	void (*link_init)(struct dc_link *link);
+	struct panel_cntl*(*panel_cntl_create)(
+		const struct panel_cntl_init_data *panel_cntl_init_data);
 	struct link_encoder *(*link_enc_create)(
 			const struct encoder_init_data *init);
 	bool (*validate_bandwidth)(
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
index d607b3191954..e8ce8c85adf1 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
@@ -27,27 +27,17 @@
 
 #include "dm_services_types.h"
 
-struct abm_backlight_registers {
-	unsigned int BL_PWM_CNTL;
-	unsigned int BL_PWM_CNTL2;
-	unsigned int BL_PWM_PERIOD_CNTL;
-	unsigned int LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
-};
-
 struct abm {
 	struct dc_context *ctx;
 	const struct abm_funcs *funcs;
 	bool dmcu_is_running;
-	/* registers setting needs to be saved and restored at InitBacklight */
-	struct abm_backlight_registers stored_backlight_registers;
 };
 
 struct abm_funcs {
-	void (*abm_init)(struct abm *abm);
+	void (*abm_init)(struct abm *abm, uint32_t back_light);
 	bool (*set_abm_level)(struct abm *abm, unsigned int abm_level);
-	bool (*set_abm_immediate_disable)(struct abm *abm);
-	bool (*set_pipe)(struct abm *abm, unsigned int controller_id);
-	bool (*init_backlight)(struct abm *abm);
+	bool (*set_abm_immediate_disable)(struct abm *abm, unsigned int panel_inst);
+	bool (*set_pipe)(struct abm *abm, unsigned int controller_id, unsigned int panel_inst);
 
 	/* backlight_pwm_u16_16 is unsigned 32 bit,
 	 * 16 bit integer + 16 fractional, where 1.0 is max backlight value.
@@ -56,10 +46,13 @@ struct abm_funcs {
 			unsigned int backlight_pwm_u16_16,
 			unsigned int frame_ramp,
 			unsigned int controller_id,
-			bool use_smooth_brightness);
+			unsigned int panel_inst);
 
 	unsigned int (*get_current_backlight)(struct abm *abm);
 	unsigned int (*get_target_backlight)(struct abm *abm);
+	bool (*init_abm_config)(struct abm *abm,
+			const char *src,
+			unsigned int bytes);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
index f5dd0cc73c63..47a566d82d6e 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -144,6 +144,8 @@ struct hubbub_funcs {
 	void (*allow_self_refresh_control)(struct hubbub *hubbub, bool allow);
 
 	void (*apply_DEDCN21_147_wa)(struct hubbub *hubbub);
+
+	void (*force_wm_propagate_to_pipes)(struct hubbub *hubbub);
 };
 
 struct hubbub {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
new file mode 100644
index 000000000000..f9ab5abb6462
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/*
+ * panel_cntl.h
+ *
+ *  Created on: Oct 6, 2015
+ *      Author: yonsun
+ */
+
+#ifndef DC_PANEL_CNTL_H_
+#define DC_PANEL_CNTL_H_
+
+#include "dc_types.h"
+
+#define MAX_BACKLIGHT_LEVEL 0xFFFF
+
+struct panel_cntl_backlight_registers {
+	unsigned int BL_PWM_CNTL;
+	unsigned int BL_PWM_CNTL2;
+	unsigned int BL_PWM_PERIOD_CNTL;
+	unsigned int LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
+};
+
+struct panel_cntl_funcs {
+	void (*destroy)(struct panel_cntl **panel_cntl);
+	uint32_t (*hw_init)(struct panel_cntl *panel_cntl);
+	bool (*is_panel_backlight_on)(struct panel_cntl *panel_cntl);
+	bool (*is_panel_powered_on)(struct panel_cntl *panel_cntl);
+	void (*store_backlight_level)(struct panel_cntl *panel_cntl);
+	void (*driver_set_backlight)(struct panel_cntl *panel_cntl,
+			uint32_t backlight_pwm_u16_16);
+};
+
+struct panel_cntl_init_data {
+	struct dc_context *ctx;
+	uint32_t inst;
+};
+
+struct panel_cntl {
+	const struct panel_cntl_funcs *funcs;
+	struct dc_context *ctx;
+	uint32_t inst;
+	/* registers setting needs to be saved and restored at InitBacklight */
+	struct panel_cntl_backlight_registers stored_backlight_registers;
+};
+
+#endif /* DC_PANEL_CNTL_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index e5e7d94026fc..f803191e3134 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -117,6 +117,9 @@ struct crc_params {
 
 	enum crc_selection selection;
 
+	uint8_t dsc_mode;
+	uint8_t odm_mode;
+
 	bool continuous_mode;
 	bool enable;
 };
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
index fecc80c47c26..2947d1b15512 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
@@ -173,6 +173,8 @@ struct scaler_data {
 	struct scaling_taps taps;
 	struct rect viewport;
 	struct rect viewport_c;
+	struct rect viewport_unadjusted;
+	struct rect viewport_c_unadjusted;
 	struct rect recout;
 	struct scaling_ratios ratios;
 	struct scl_inits inits;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index 08307f3796e3..8e72f077e552 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -75,9 +75,13 @@ struct hw_sequencer_funcs {
 	void (*wait_for_mpcc_disconnect)(struct dc *dc,
 			struct resource_pool *res_pool,
 			struct pipe_ctx *pipe_ctx);
+	void (*edp_backlight_control)(
+			struct dc_link *link,
+			bool enable);
 	void (*program_triplebuffer)(const struct dc *dc,
 		struct pipe_ctx *pipe_ctx, bool enableTripleBuffer);
 	void (*update_pending_status)(struct pipe_ctx *pipe_ctx);
+	void (*power_down)(struct dc *dc);
 
 	/* Pipe Lock Related */
 	void (*pipe_control_lock)(struct dc *dc,
@@ -193,6 +197,12 @@ struct hw_sequencer_funcs {
 			unsigned int bufSize, unsigned int mask);
 	void (*clear_status_bits)(struct dc *dc, unsigned int mask);
 
+	bool (*set_backlight_level)(struct pipe_ctx *pipe_ctx,
+			uint32_t backlight_pwm_u16_16,
+			uint32_t frame_ramp);
+
+	void (*set_abm_immediate_disable)(struct pipe_ctx *pipe_ctx);
+
 
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
index 52a26e6be066..36e906bb6bfc 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
@@ -100,8 +100,6 @@ struct hwseq_private_funcs {
 			struct dc *dc);
 	void (*edp_backlight_control)(struct dc_link *link,
 			bool enable);
-	bool (*is_panel_backlight_on)(struct dc_link *link);
-	bool (*is_panel_powered_on)(struct dc_link *link);
 	void (*setup_vupdate_interrupt)(struct dc *dc,
 			struct pipe_ctx *pipe_ctx);
 	bool (*did_underflow_occur)(struct dc *dc, struct pipe_ctx *pipe_ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index ca4c36c0c9bc..a9be495af922 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -138,9 +138,6 @@ struct pipe_ctx *find_idle_secondary_pipe(
 		const struct resource_pool *pool,
 		const struct pipe_ctx *primary_pipe);
 
-bool resource_is_stream_unchanged(
-	struct dc_state *old_context, struct dc_stream_state *stream);
-
 bool resource_validate_attach_surfaces(
 		const struct dc_validation_set set[],
 		int set_count,
@@ -180,6 +177,8 @@ unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format);
 void get_audio_check(struct audio_info *aud_modes,
 	struct audio_check *aud_chk);
 
+int get_num_mpc_splits(struct pipe_ctx *pipe);
+
 int get_num_odm_splits(struct pipe_ctx *pipe);
 
 #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c
index 3464b2d5b89a..348e9a600a72 100644
--- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c
@@ -84,6 +84,14 @@ static void virtual_link_encoder_destroy(struct link_encoder **enc)
 	*enc = NULL;
 }
 
+static void virtual_link_encoder_get_max_link_cap(struct link_encoder *enc,
+		struct dc_link_settings *link_settings)
+{
+	/* Set Default link settings */
+	struct dc_link_settings max_link_cap = {LANE_COUNT_FOUR, LINK_RATE_HIGH,
+				LINK_SPREAD_05_DOWNSPREAD_30KHZ, false, 0};
+	*link_settings = max_link_cap;
+}
 
 static const struct link_encoder_funcs virtual_lnk_enc_funcs = {
 	.validate_output_with_stream =
@@ -94,6 +102,7 @@ static const struct link_encoder_funcs virtual_lnk_enc_funcs = {
 	.enable_dp_output = virtual_link_encoder_enable_dp_output,
 	.enable_dp_mst_output = virtual_link_encoder_enable_dp_mst_output,
 	.disable_output = virtual_link_encoder_disable_output,
+	.get_max_link_cap = virtual_link_encoder_get_max_link_cap,
 	.dp_set_lane_settings = virtual_link_encoder_dp_set_lane_settings,
 	.dp_set_phy_pattern = virtual_link_encoder_dp_set_phy_pattern,
 	.update_mst_stream_allocation_table =
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index c2671f2616c8..26d94eb5ab58 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -64,10 +64,11 @@
  * other component within DAL.
  */
 
-#include "dmub_types.h"
-#include "dmub_cmd.h"
-#include "dmub_gpint_cmd.h"
-#include "dmub_rb.h"
+#include "inc/dmub_types.h"
+#include "inc/dmub_cmd.h"
+#include "inc/dmub_gpint_cmd.h"
+#include "inc/dmub_cmd_dal.h"
+#include "inc/dmub_rb.h"
 
 #if defined(__cplusplus)
 extern "C" {
@@ -75,7 +76,6 @@ extern "C" {
 
 /* Forward declarations */
 struct dmub_srv;
-struct dmub_cmd_header;
 struct dmub_srv_common_regs;
 
 /* enum dmub_status - return code for dmcub functions */
@@ -151,6 +151,7 @@ struct dmub_srv_region_params {
 	uint32_t inst_const_size;
 	uint32_t bss_data_size;
 	uint32_t vbios_size;
+	const uint8_t *fw_inst_const;
 	const uint8_t *fw_bss_data;
 };
 
@@ -457,7 +458,7 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub);
  *   DMUB_STATUS_INVALID - unspecified error
  */
 enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
-				    const struct dmub_cmd_header *cmd);
+				    const union dmub_rb_cmd *cmd);
 
 /**
  * dmub_srv_cmd_execute() - Executes a queued sequence to the dmub
@@ -565,6 +566,16 @@ dmub_srv_send_gpint_command(struct dmub_srv *dmub,
 enum dmub_status dmub_srv_get_gpint_response(struct dmub_srv *dmub,
 					     uint32_t *response);
 
+/**
+ * dmub_flush_buffer_mem() - Read back entire frame buffer region.
+ * This ensures that the write from x86 has been flushed and will not
+ * hang the DMCUB.
+ * @fb: frame buffer to flush
+ *
+ * Can be called after software initialization.
+ */
+void dmub_flush_buffer_mem(const struct dmub_fb *fb);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 10b5fa9d2588..599bf2055bcb 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -228,6 +228,7 @@ struct dmub_cmd_psr_copy_settings_data {
 	uint8_t smu_optimizations_en;
 	uint8_t frame_delay;
 	uint8_t frame_cap_ind;
+	struct dmub_psr_debug_flags debug;
 };
 
 struct dmub_rb_cmd_psr_copy_settings {
@@ -260,6 +261,8 @@ struct dmub_rb_cmd_psr_set_version {
 struct dmub_cmd_abm_set_pipe_data {
 	uint32_t ramping_boundary;
 	uint32_t otg_inst;
+	uint32_t panel_inst;
+	uint32_t set_pipe_option;
 };
 
 struct dmub_rb_cmd_abm_set_pipe {
@@ -303,6 +306,16 @@ struct dmub_rb_cmd_abm_set_pwm_frac {
 	struct dmub_cmd_abm_set_pwm_frac_data abm_set_pwm_frac_data;
 };
 
+struct dmub_cmd_abm_init_config_data {
+	union dmub_addr src;
+	uint16_t bytes;
+};
+
+struct dmub_rb_cmd_abm_init_config {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_abm_init_config_data abm_init_config_data;
+};
+
 union dmub_rb_cmd {
 	struct dmub_rb_cmd_read_modify_write read_modify_write;
 	struct dmub_rb_cmd_reg_field_update_sequence reg_field_update_seq;
@@ -324,6 +337,7 @@ union dmub_rb_cmd {
 	struct dmub_rb_cmd_abm_set_level abm_set_level;
 	struct dmub_rb_cmd_abm_set_ambient_level abm_set_ambient_level;
 	struct dmub_rb_cmd_abm_set_pwm_frac abm_set_pwm_frac;
+	struct dmub_rb_cmd_abm_init_config abm_init_config;
 };
 
 #pragma pack(pop)
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h
index d37535d21928..e42de9ded275 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h
@@ -32,17 +32,16 @@
  */
 
 enum dmub_cmd_psr_type {
-	DMUB_CMD__PSR_SET_VERSION	= 0,
-	DMUB_CMD__PSR_COPY_SETTINGS	= 1,
-	DMUB_CMD__PSR_ENABLE		= 2,
-	DMUB_CMD__PSR_DISABLE		= 3,
-	DMUB_CMD__PSR_SET_LEVEL		= 4,
+	DMUB_CMD__PSR_SET_VERSION		= 0,
+	DMUB_CMD__PSR_COPY_SETTINGS		= 1,
+	DMUB_CMD__PSR_ENABLE			= 2,
+	DMUB_CMD__PSR_DISABLE			= 3,
+	DMUB_CMD__PSR_SET_LEVEL			= 4,
 };
 
 enum psr_version {
-	PSR_VERSION_1			= 0x10, // PSR Version 1
-	PSR_VERSION_2			= 0x20, // PSR Version 2, includes selective update
-	PSR_VERSION_2_1			= 0x21, // PSR Version 2, includes Y-coordinate support for SU
+	PSR_VERSION_1				= 0,
+	PSR_VERSION_UNSUPPORTED			= 0xFFFFFFFF,
 };
 
 enum dmub_cmd_abm_type {
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
index df875fdd2ab0..2ae48c18bb5b 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
@@ -33,8 +33,6 @@
 extern "C" {
 #endif
 
-struct dmub_cmd_header;
-
 struct dmub_rb_init_params {
 	void *ctx;
 	void *base_address;
@@ -71,7 +69,7 @@ static inline bool dmub_rb_full(struct dmub_rb *rb)
 }
 
 static inline bool dmub_rb_push_front(struct dmub_rb *rb,
-				      const struct dmub_cmd_header *cmd)
+				      const union dmub_rb_cmd *cmd)
 {
 	uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t);
 	const uint64_t *src = (const uint64_t *)cmd;
@@ -93,7 +91,7 @@ static inline bool dmub_rb_push_front(struct dmub_rb *rb,
 }
 
 static inline bool dmub_rb_front(struct dmub_rb *rb,
-				 struct dmub_cmd_header *cmd)
+				 union dmub_rb_cmd  *cmd)
 {
 	uint8_t *rd_ptr = (uint8_t *)rb->base_address + rb->rptr;
 
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h
index 41d524b0db2f..bed5b023a396 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h
@@ -49,6 +49,12 @@ extern "C" {
 #define dmub_udelay(microseconds) udelay(microseconds)
 #endif
 
+/* Maximum number of streams on any ASIC. */
+#define DMUB_MAX_STREAMS 6
+
+/* Maximum number of planes on any ASIC. */
+#define DMUB_MAX_PLANES 6
+
 union dmub_addr {
 	struct {
 		uint32_t low_part;
@@ -57,6 +63,11 @@ union dmub_addr {
 	uint64_t quad_part;
 };
 
+struct dmub_psr_debug_flags {
+	uint8_t visual_confirm : 1;
+	uint8_t reserved : 7;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
index 63bb9e2c81de..edc73d6d7ba2 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
@@ -23,7 +23,7 @@
  *
  */
 
-#include "../inc/dmub_srv.h"
+#include "../dmub_srv.h"
 #include "dmub_reg.h"
 #include "dmub_dcn20.h"
 
@@ -186,14 +186,22 @@ void dmub_dcn20_setup_windows(struct dmub_srv *dmub,
 
 	dmub_dcn20_get_fb_base_offset(dmub, &fb_base, &fb_offset);
 
-	dmub_dcn20_translate_addr(&cw2->offset, fb_base, fb_offset, &offset);
-
-	REG_WRITE(DMCUB_REGION3_CW2_OFFSET, offset.u.low_part);
-	REG_WRITE(DMCUB_REGION3_CW2_OFFSET_HIGH, offset.u.high_part);
-	REG_WRITE(DMCUB_REGION3_CW2_BASE_ADDRESS, cw2->region.base);
-	REG_SET_2(DMCUB_REGION3_CW2_TOP_ADDRESS, 0,
-		  DMCUB_REGION3_CW2_TOP_ADDRESS, cw2->region.top,
-		  DMCUB_REGION3_CW2_ENABLE, 1);
+	if (cw2->region.base != cw2->region.top) {
+		dmub_dcn20_translate_addr(&cw2->offset, fb_base, fb_offset,
+					  &offset);
+
+		REG_WRITE(DMCUB_REGION3_CW2_OFFSET, offset.u.low_part);
+		REG_WRITE(DMCUB_REGION3_CW2_OFFSET_HIGH, offset.u.high_part);
+		REG_WRITE(DMCUB_REGION3_CW2_BASE_ADDRESS, cw2->region.base);
+		REG_SET_2(DMCUB_REGION3_CW2_TOP_ADDRESS, 0,
+			  DMCUB_REGION3_CW2_TOP_ADDRESS, cw2->region.top,
+			  DMCUB_REGION3_CW2_ENABLE, 1);
+	} else {
+		REG_WRITE(DMCUB_REGION3_CW2_OFFSET, 0);
+		REG_WRITE(DMCUB_REGION3_CW2_OFFSET_HIGH, 0);
+		REG_WRITE(DMCUB_REGION3_CW2_BASE_ADDRESS, 0);
+		REG_WRITE(DMCUB_REGION3_CW2_TOP_ADDRESS, 0);
+	}
 
 	dmub_dcn20_translate_addr(&cw3->offset, fb_base, fb_offset, &offset);
 
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c
index 5bed9fcd6b5c..e8f488232e34 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c
@@ -23,7 +23,7 @@
  *
  */
 
-#include "../inc/dmub_srv.h"
+#include "../dmub_srv.h"
 #include "dmub_reg.h"
 #include "dmub_dcn21.h"
 
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c
index 4094eca212f0..ca0c8a54b635 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c
@@ -24,7 +24,7 @@
  */
 
 #include "dmub_reg.h"
-#include "../inc/dmub_srv.h"
+#include "../dmub_srv.h"
 
 struct dmub_reg_value_masks {
 	uint32_t value;
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index ce32cc7933c4..0e3751d94cb0 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -23,7 +23,7 @@
  *
  */
 
-#include "../inc/dmub_srv.h"
+#include "../dmub_srv.h"
 #include "dmub_dcn20.h"
 #include "dmub_dcn21.h"
 #include "dmub_fw_meta.h"
@@ -70,7 +70,7 @@ static inline uint32_t dmub_align(uint32_t val, uint32_t factor)
 	return (val + factor - 1) / factor * factor;
 }
 
-static void dmub_flush_buffer_mem(const struct dmub_fb *fb)
+void dmub_flush_buffer_mem(const struct dmub_fb *fb)
 {
 	const uint8_t *base = (const uint8_t *)fb->cpu_addr;
 	uint8_t buf[64];
@@ -91,18 +91,32 @@ static void dmub_flush_buffer_mem(const struct dmub_fb *fb)
 }
 
 static const struct dmub_fw_meta_info *
-dmub_get_fw_meta_info(const uint8_t *fw_bss_data, uint32_t fw_bss_data_size)
+dmub_get_fw_meta_info(const struct dmub_srv_region_params *params)
 {
 	const union dmub_fw_meta *meta;
+	const uint8_t *blob = NULL;
+	uint32_t blob_size = 0;
+	uint32_t meta_offset = 0;
+
+	if (params->fw_bss_data) {
+		/* Legacy metadata region. */
+		blob = params->fw_bss_data;
+		blob_size = params->bss_data_size;
+		meta_offset = DMUB_FW_META_OFFSET;
+	} else if (params->fw_inst_const) {
+		/* Combined metadata region. */
+		blob = params->fw_inst_const;
+		blob_size = params->inst_const_size;
+		meta_offset = 0;
+	}
 
-	if (fw_bss_data == NULL)
+	if (!blob || !blob_size)
 		return NULL;
 
-	if (fw_bss_data_size < sizeof(union dmub_fw_meta) + DMUB_FW_META_OFFSET)
+	if (blob_size < sizeof(union dmub_fw_meta) + meta_offset)
 		return NULL;
 
-	meta = (const union dmub_fw_meta *)(fw_bss_data + fw_bss_data_size -
-					    DMUB_FW_META_OFFSET -
+	meta = (const union dmub_fw_meta *)(blob + blob_size - meta_offset -
 					    sizeof(union dmub_fw_meta));
 
 	if (meta->info.magic_value != DMUB_FW_META_MAGIC)
@@ -247,8 +261,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
 	mail->base = dmub_align(bios->top, 256);
 	mail->top = mail->base + DMUB_MAILBOX_SIZE;
 
-	fw_info = dmub_get_fw_meta_info(params->fw_bss_data,
-					params->bss_data_size);
+	fw_info = dmub_get_fw_meta_info(params);
 
 	if (fw_info) {
 		fw_state_size = fw_info->fw_region_size;
@@ -449,7 +462,7 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub)
 }
 
 enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
-				    const struct dmub_cmd_header *cmd)
+				    const union dmub_rb_cmd *cmd)
 {
 	if (!dmub->hw_init)
 		return DMUB_STATUS_INVALID;
diff --git a/drivers/gpu/drm/amd/display/include/hdcp_types.h b/drivers/gpu/drm/amd/display/include/hdcp_types.h
index f31e6befc8d6..42229b4effdc 100644
--- a/drivers/gpu/drm/amd/display/include/hdcp_types.h
+++ b/drivers/gpu/drm/amd/display/include/hdcp_types.h
@@ -83,6 +83,12 @@ enum hdcp_link {
 	HDCP_LINK_SECONDARY
 };
 
+enum hdcp_message_status {
+	HDCP_MESSAGE_SUCCESS,
+	HDCP_MESSAGE_FAILURE,
+	HDCP_MESSAGE_UNSUPPORTED
+};
+
 struct hdcp_protection_message {
 	enum hdcp_version version;
 	/* relevant only for DVI */
@@ -91,6 +97,7 @@ struct hdcp_protection_message {
 	uint32_t length;
 	uint8_t max_retries;
 	uint8_t *data;
+	enum hdcp_message_status status;
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/include/logger_interface.h b/drivers/gpu/drm/amd/display/include/logger_interface.h
index 6e008de25629..02c23b04d34b 100644
--- a/drivers/gpu/drm/amd/display/include/logger_interface.h
+++ b/drivers/gpu/drm/amd/display/include/logger_interface.h
@@ -40,8 +40,6 @@ struct dc_state;
  *
  */
 
-void dc_conn_log_hex_linux(const uint8_t *hex_data, int hex_data_count);
-
 void pre_surface_trace(
 		struct dc *dc,
 		const struct dc_plane_state *const *plane_states,
@@ -102,14 +100,12 @@ void context_clock_trace(
 #define CONN_DATA_DETECT(link, hex_data, hex_len, ...) \
 		do { \
 			(void)(link); \
-			dc_conn_log_hex_linux(hex_data, hex_len); \
 			DC_LOG_EVENT_DETECTION(__VA_ARGS__); \
 		} while (0)
 
 #define CONN_DATA_LINK_LOSS(link, hex_data, hex_len, ...) \
 		do { \
 			(void)(link); \
-			dc_conn_log_hex_linux(hex_data, hex_len); \
 			DC_LOG_EVENT_LINK_LOSS(__VA_ARGS__); \
 		} while (0)
 
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index cac09d500fda..9431b48aecb4 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -1782,7 +1782,8 @@ rgb_user_alloc_fail:
 	return ret;
 }
 
-bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
+bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
+		struct dc_transfer_func *input_tf,
 		const struct dc_gamma *ramp, bool mapUserRamp)
 {
 	struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts;
@@ -1801,11 +1802,29 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 	/* we can use hardcoded curve for plain SRGB TF
 	 * If linear, it's bypass if on user ramp
 	 */
-	if (input_tf->type == TF_TYPE_PREDEFINED &&
-			(input_tf->tf == TRANSFER_FUNCTION_SRGB ||
-					input_tf->tf == TRANSFER_FUNCTION_LINEAR) &&
-					!mapUserRamp)
-		return true;
+	if (input_tf->type == TF_TYPE_PREDEFINED) {
+		if ((input_tf->tf == TRANSFER_FUNCTION_SRGB ||
+				input_tf->tf == TRANSFER_FUNCTION_LINEAR) &&
+				!mapUserRamp)
+			return true;
+
+		if (dc_caps != NULL &&
+			dc_caps->dpp.dcn_arch == 1) {
+
+			if (input_tf->tf == TRANSFER_FUNCTION_PQ &&
+					dc_caps->dpp.dgam_rom_caps.pq == 1)
+				return true;
+
+			if (input_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
+					dc_caps->dpp.dgam_rom_caps.gamma2_2 == 1)
+				return true;
+
+			// HLG OOTF not accounted for
+			if (input_tf->tf == TRANSFER_FUNCTION_HLG &&
+					dc_caps->dpp.dgam_rom_caps.hlg == 1)
+				return true;
+		}
+	}
 
 	input_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
 
@@ -1902,7 +1921,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 
 
 
-	if (ramp->type == GAMMA_CUSTOM)
+	if (ramp && ramp->type == GAMMA_CUSTOM)
 		apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
 
 	ret = true;
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
index 9994817a9a03..7f56226ba77a 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
@@ -30,6 +30,7 @@ struct dc_transfer_func;
 struct dc_gamma;
 struct dc_transfer_func_distributed_points;
 struct dc_rgb_fixed;
+struct dc_color_caps;
 enum dc_transfer_func_predefined;
 
 /* For SetRegamma ADL interface support
@@ -100,7 +101,8 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
 		const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed,
 		const struct freesync_hdr_tf_params *fs_params);
 
-bool mod_color_calculate_degamma_params(struct dc_transfer_func *output_tf,
+bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
+		struct dc_transfer_func *output_tf,
 		const struct dc_gamma *ramp, bool mapUserRamp);
 
 bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index c33454a9e0b4..eb7421e83b86 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -443,7 +443,7 @@ static bool vrr_settings_require_update(struct core_freesync *core_freesync,
 		return true;
 	} else if (in_vrr->state == VRR_STATE_ACTIVE_FIXED &&
 			in_vrr->fixed.target_refresh_in_uhz !=
-					in_config->min_refresh_in_uhz) {
+					in_config->fixed_refresh_in_uhz) {
 		return true;
 	} else if (in_vrr->min_refresh_in_uhz != min_refresh_in_uhz) {
 		return true;
@@ -491,7 +491,7 @@ bool mod_freesync_get_v_position(struct mod_freesync *mod_freesync,
 	return false;
 }
 
-static void build_vrr_infopacket_data(const struct mod_vrr_params *vrr,
+static void build_vrr_infopacket_data_v1(const struct mod_vrr_params *vrr,
 		struct dc_info_packet *infopacket)
 {
 	/* PB1 = 0x1A (24bit AMD IEEE OUI (0x00001A) - Byte 0) */
@@ -523,14 +523,74 @@ static void build_vrr_infopacket_data(const struct mod_vrr_params *vrr,
 			vrr->state == VRR_STATE_ACTIVE_FIXED)
 		infopacket->sb[6] |= 0x04;
 
+	// For v1 & 2 infoframes program nominal if non-fs mode, otherwise full range
 	/* PB7 = FreeSync Minimum refresh rate (Hz) */
-	infopacket->sb[7] = (unsigned char)((vrr->min_refresh_in_uhz + 500000) / 1000000);
+	if (vrr->state == VRR_STATE_ACTIVE_VARIABLE ||
+			vrr->state == VRR_STATE_ACTIVE_FIXED) {
+		infopacket->sb[7] = (unsigned char)((vrr->min_refresh_in_uhz + 500000) / 1000000);
+	} else {
+		infopacket->sb[7] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000);
+	}
 
 	/* PB8 = FreeSync Maximum refresh rate (Hz)
 	 * Note: We should never go above the field rate of the mode timing set.
 	 */
 	infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000);
 
+	//FreeSync HDR
+	infopacket->sb[9] = 0;
+	infopacket->sb[10] = 0;
+}
+
+static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
+		struct dc_info_packet *infopacket)
+{
+	/* PB1 = 0x1A (24bit AMD IEEE OUI (0x00001A) - Byte 0) */
+	infopacket->sb[1] = 0x1A;
+
+	/* PB2 = 0x00 (24bit AMD IEEE OUI (0x00001A) - Byte 1) */
+	infopacket->sb[2] = 0x00;
+
+	/* PB3 = 0x00 (24bit AMD IEEE OUI (0x00001A) - Byte 2) */
+	infopacket->sb[3] = 0x00;
+
+	/* PB4 = Reserved */
+
+	/* PB5 = Reserved */
+
+	/* PB6 = [Bits 7:3 = Reserved] */
+
+	/* PB6 = [Bit 0 = FreeSync Supported] */
+	if (vrr->state != VRR_STATE_UNSUPPORTED)
+		infopacket->sb[6] |= 0x01;
+
+	/* PB6 = [Bit 1 = FreeSync Enabled] */
+	if (vrr->state != VRR_STATE_DISABLED &&
+			vrr->state != VRR_STATE_UNSUPPORTED)
+		infopacket->sb[6] |= 0x02;
+
+	/* PB6 = [Bit 2 = FreeSync Active] */
+	if (vrr->state == VRR_STATE_ACTIVE_VARIABLE ||
+			vrr->state == VRR_STATE_ACTIVE_FIXED)
+		infopacket->sb[6] |= 0x04;
+
+	if (vrr->state == VRR_STATE_ACTIVE_FIXED) {
+		/* PB7 = FreeSync Minimum refresh rate (Hz) */
+		infopacket->sb[7] = (unsigned char)((vrr->fixed_refresh_in_uhz + 500000) / 1000000);
+		/* PB8 = FreeSync Maximum refresh rate (Hz) */
+		infopacket->sb[8] = (unsigned char)((vrr->fixed_refresh_in_uhz + 500000) / 1000000);
+	} else if (vrr->state == VRR_STATE_ACTIVE_VARIABLE) {
+		/* PB7 = FreeSync Minimum refresh rate (Hz) */
+		infopacket->sb[7] = (unsigned char)((vrr->min_refresh_in_uhz + 500000) / 1000000);
+		/* PB8 = FreeSync Maximum refresh rate (Hz) */
+		infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000);
+	} else {
+		// Non-fs case, program nominal range
+		/* PB7 = FreeSync Minimum refresh rate (Hz) */
+		infopacket->sb[7] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000);
+		/* PB8 = FreeSync Maximum refresh rate (Hz) */
+		infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000);
+	}
 
 	//FreeSync HDR
 	infopacket->sb[9] = 0;
@@ -678,7 +738,7 @@ static void build_vrr_infopacket_v1(enum signal_type signal,
 	unsigned int payload_size = 0;
 
 	build_vrr_infopacket_header_v1(signal, infopacket, &payload_size);
-	build_vrr_infopacket_data(vrr, infopacket);
+	build_vrr_infopacket_data_v1(vrr, infopacket);
 	build_vrr_infopacket_checksum(&payload_size, infopacket);
 
 	infopacket->valid = true;
@@ -692,7 +752,24 @@ static void build_vrr_infopacket_v2(enum signal_type signal,
 	unsigned int payload_size = 0;
 
 	build_vrr_infopacket_header_v2(signal, infopacket, &payload_size);
-	build_vrr_infopacket_data(vrr, infopacket);
+	build_vrr_infopacket_data_v1(vrr, infopacket);
+
+	build_vrr_infopacket_fs2_data(app_tf, infopacket);
+
+	build_vrr_infopacket_checksum(&payload_size, infopacket);
+
+	infopacket->valid = true;
+}
+
+static void build_vrr_infopacket_v3(enum signal_type signal,
+		const struct mod_vrr_params *vrr,
+		enum color_transfer_func app_tf,
+		struct dc_info_packet *infopacket)
+{
+	unsigned int payload_size = 0;
+
+	build_vrr_infopacket_header_v2(signal, infopacket, &payload_size);
+	build_vrr_infopacket_data_v3(vrr, infopacket);
 
 	build_vrr_infopacket_fs2_data(app_tf, infopacket);
 
@@ -717,11 +794,14 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync,
 		return;
 
 	switch (packet_type) {
-	case PACKET_TYPE_FS2:
+	case PACKET_TYPE_FS_V3:
+		build_vrr_infopacket_v3(stream->signal, vrr, app_tf, infopacket);
+		break;
+	case PACKET_TYPE_FS_V2:
 		build_vrr_infopacket_v2(stream->signal, vrr, app_tf, infopacket);
 		break;
 	case PACKET_TYPE_VRR:
-	case PACKET_TYPE_FS1:
+	case PACKET_TYPE_FS_V1:
 	default:
 		build_vrr_infopacket_v1(stream->signal, vrr, infopacket);
 	}
@@ -793,6 +873,11 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
 				calc_duration_in_us_from_refresh_in_uhz(
 						(unsigned int)max_refresh_in_uhz);
 
+		if (in_config->state == VRR_STATE_ACTIVE_FIXED)
+			in_out_vrr->fixed_refresh_in_uhz = in_config->fixed_refresh_in_uhz;
+		else
+			in_out_vrr->fixed_refresh_in_uhz = 0;
+
 		refresh_range = in_out_vrr->max_refresh_in_uhz -
 				in_out_vrr->min_refresh_in_uhz;
 
@@ -843,7 +928,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
 				in_out_vrr->min_refresh_in_uhz);
 	} else if (in_out_vrr->state == VRR_STATE_ACTIVE_FIXED) {
 		in_out_vrr->fixed.target_refresh_in_uhz =
-				in_out_vrr->min_refresh_in_uhz;
+				in_out_vrr->fixed_refresh_in_uhz;
 		if (in_out_vrr->fixed.ramping_active &&
 				in_out_vrr->fixed.fixed_active) {
 			/* Do not update vtotals if ramping is already active
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
index cc1d3f470b99..e9fbd94f8635 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
@@ -328,7 +328,8 @@ enum mod_hdcp_status mod_hdcp_add_display(struct mod_hdcp *hdcp,
 	/* add display to connection */
 	hdcp->connection.link = *link;
 	*display_container = *display;
-	status = mod_hdcp_add_display_to_topology(hdcp, display->index);
+	status = mod_hdcp_add_display_to_topology(hdcp, display_container);
+
 	if (status != MOD_HDCP_STATUS_SUCCESS)
 		goto out;
 
@@ -374,7 +375,7 @@ enum mod_hdcp_status mod_hdcp_remove_display(struct mod_hdcp *hdcp,
 	status = mod_hdcp_remove_display_from_topology(hdcp, index);
 	if (status != MOD_HDCP_STATUS_SUCCESS)
 		goto out;
-	display->state = MOD_HDCP_DISPLAY_INACTIVE;
+	memset(display, 0, sizeof(struct mod_hdcp_display));
 
 	/* request authentication when connection is not reset */
 	if (current_state(hdcp) != HDCP_UNINITIALIZED)
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
index 5cb4546be0ef..b0cefed2eb02 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
@@ -328,7 +328,7 @@ void mod_hdcp_dump_binary_message(uint8_t *msg, uint32_t msg_size,
 
 /* psp functions */
 enum mod_hdcp_status mod_hdcp_add_display_to_topology(
-		struct mod_hdcp *hdcp, uint8_t index);
+		struct mod_hdcp *hdcp, struct mod_hdcp_display *display);
 enum mod_hdcp_status mod_hdcp_remove_display_from_topology(
 		struct mod_hdcp *hdcp, uint8_t index);
 enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp);
@@ -357,8 +357,6 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_stream_management(
 		struct mod_hdcp *hdcp);
 enum mod_hdcp_status mod_hdcp_hdcp2_validate_stream_ready(
 		struct mod_hdcp *hdcp);
-enum mod_hdcp_status mod_hdcp_hdcp2_get_link_encryption_status(struct mod_hdcp *hdcp,
-							       enum mod_hdcp_encryption_status *encryption_status);
 
 /* ddc functions */
 enum mod_hdcp_status mod_hdcp_read_bksv(struct mod_hdcp *hdcp);
@@ -503,11 +501,6 @@ static inline uint8_t is_display_active(struct mod_hdcp_display *display)
 	return display->state >= MOD_HDCP_DISPLAY_ACTIVE;
 }
 
-static inline uint8_t is_display_added(struct mod_hdcp_display *display)
-{
-	return display->state >= MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED;
-}
-
 static inline uint8_t is_display_encryption_enabled(struct mod_hdcp_display *display)
 {
 	return display->state >= MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED;
@@ -515,34 +508,23 @@ static inline uint8_t is_display_encryption_enabled(struct mod_hdcp_display *dis
 
 static inline uint8_t get_active_display_count(struct mod_hdcp *hdcp)
 {
-	uint8_t added_count = 0;
+	uint8_t active_count = 0;
 	uint8_t i;
 
 	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
 		if (is_display_active(&hdcp->displays[i]))
-			added_count++;
-	return added_count;
-}
-
-static inline uint8_t get_added_display_count(struct mod_hdcp *hdcp)
-{
-	uint8_t added_count = 0;
-	uint8_t i;
-
-	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
-		if (is_display_added(&hdcp->displays[i]))
-			added_count++;
-	return added_count;
+			active_count++;
+	return active_count;
 }
 
-static inline struct mod_hdcp_display *get_first_added_display(
+static inline struct mod_hdcp_display *get_first_active_display(
 		struct mod_hdcp *hdcp)
 {
 	uint8_t i;
 	struct mod_hdcp_display *display = NULL;
 
 	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
-		if (is_display_added(&hdcp->displays[i])) {
+		if (is_display_active(&hdcp->displays[i])) {
 			display = &hdcp->displays[i];
 			break;
 		}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
index 37c8c05497d6..f244b72e74e0 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
@@ -129,7 +129,7 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
 static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
 {
 	/* device count must be greater than or equal to tracked hdcp displays */
-	return (get_device_count(hdcp) < get_added_display_count(hdcp)) ?
+	return (get_device_count(hdcp) < get_active_display_count(hdcp)) ?
 			MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE :
 			MOD_HDCP_STATUS_SUCCESS;
 }
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
index 491c00f48026..549c113abcf7 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
@@ -208,7 +208,7 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
 static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
 {
 	/* device count must be greater than or equal to tracked hdcp displays */
-	return (get_device_count(hdcp) < get_added_display_count(hdcp)) ?
+	return (get_device_count(hdcp) < get_active_display_count(hdcp)) ?
 			MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE :
 			MOD_HDCP_STATUS_SUCCESS;
 }
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c
index 44956f9ba178..fb6a19d020f9 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c
@@ -98,8 +98,8 @@ char *mod_hdcp_status_to_str(int32_t status)
 		return "MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE";
 	case MOD_HDCP_STATUS_HDCP1_KSV_LIST_REVOKED:
 		return "MOD_HDCP_STATUS_HDCP1_KSV_LIST_REVOKED";
-	case MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION:
-		return "MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION";
+	case MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION_FAILURE";
 	case MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE:
 		return "MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE";
 	case MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE:
@@ -158,8 +158,8 @@ char *mod_hdcp_status_to_str(int32_t status)
 		return "MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED";
 	case MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY:
 		return "MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY";
-	case MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION:
-		return "MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION";
+	case MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION_FAILURE:
+		return "MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION_FAILURE";
 	case MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING:
 		return "MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING";
 	case MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE:
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
index c2929815c3ee..fb1161dd7ea8 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -51,12 +51,15 @@ enum mod_hdcp_status mod_hdcp_remove_display_from_topology(
  	struct ta_dtm_shared_memory *dtm_cmd;
 	struct mod_hdcp_display *display =
 			get_active_display_at_index(hdcp, index);
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
 	dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.dtm_shared_buf;
 
-	if (!display || !is_display_added(display))
+	if (!display || !is_display_active(display))
 		return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND;
 
+	mutex_lock(&psp->dtm_context.mutex);
+
 	memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory));
 
 	dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_UPDATE_V2;
@@ -66,34 +69,33 @@ enum mod_hdcp_status mod_hdcp_remove_display_from_topology(
 
 	psp_dtm_invoke(psp, dtm_cmd->cmd_id);
 
-	if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE;
+	if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE;
+	} else {
+		display->state = MOD_HDCP_DISPLAY_ACTIVE;
+		HDCP_TOP_REMOVE_DISPLAY_TRACE(hdcp, display->index);
+	}
 
-	display->state = MOD_HDCP_DISPLAY_ACTIVE;
-	HDCP_TOP_REMOVE_DISPLAY_TRACE(hdcp, display->index);
- 
- 	return MOD_HDCP_STATUS_SUCCESS;
- 
+	mutex_unlock(&psp->dtm_context.mutex);
+	return status;
 }
 enum mod_hdcp_status mod_hdcp_add_display_to_topology(struct mod_hdcp *hdcp,
-						      uint8_t index)
+					       struct mod_hdcp_display *display)
 {
 	struct psp_context *psp = hdcp->config.psp.handle;
 	struct ta_dtm_shared_memory *dtm_cmd;
-	struct mod_hdcp_display *display =
-			get_active_display_at_index(hdcp, index);
 	struct mod_hdcp_link *link = &hdcp->connection.link;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
 	if (!psp->dtm_context.dtm_initialized) {
 		DRM_ERROR("Failed to add display topology, DTM TA is not initialized.");
+		display->state = MOD_HDCP_DISPLAY_INACTIVE;
 		return MOD_HDCP_STATUS_FAILURE;
 	}
 
-	if (!display || is_display_added(display))
-		return MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE;
-
 	dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.dtm_shared_buf;
 
+	mutex_lock(&psp->dtm_context.mutex);
 	memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory));
 
 	dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_UPDATE_V2;
@@ -113,21 +115,24 @@ enum mod_hdcp_status mod_hdcp_add_display_to_topology(struct mod_hdcp *hdcp,
 
 	psp_dtm_invoke(psp, dtm_cmd->cmd_id);
 
-	if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE;
-
-	display->state = MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED;
-	HDCP_TOP_ADD_DISPLAY_TRACE(hdcp, display->index);
+	if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) {
+		display->state = MOD_HDCP_DISPLAY_INACTIVE;
+		status = MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE;
+	} else {
+		HDCP_TOP_ADD_DISPLAY_TRACE(hdcp, display->index);
+	}
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->dtm_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp)
 {
 
 	struct psp_context *psp = hdcp->config.psp.handle;
-	struct mod_hdcp_display *display = get_first_added_display(hdcp);
+	struct mod_hdcp_display *display = get_first_active_display(hdcp);
 	struct ta_hdcp_shared_memory *hdcp_cmd;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
 	if (!psp->hdcp_context.hdcp_initialized) {
 		DRM_ERROR("Failed to create hdcp session. HDCP TA is not initialized.");
@@ -135,6 +140,8 @@ enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp)
 	}
 
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+
+	mutex_lock(&psp->hdcp_context.mutex);
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
 	hdcp_cmd->in_msg.hdcp1_create_session.display_handle = display->index;
@@ -144,16 +151,18 @@ enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp)
 
 	hdcp->auth.id = hdcp_cmd->out_msg.hdcp1_create_session.session_handle;
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE;
-
-	hdcp->auth.msg.hdcp1.ainfo = hdcp_cmd->out_msg.hdcp1_create_session.ainfo_primary;
-	memcpy(hdcp->auth.msg.hdcp1.aksv, hdcp_cmd->out_msg.hdcp1_create_session.aksv_primary,
-		sizeof(hdcp->auth.msg.hdcp1.aksv));
-	memcpy(hdcp->auth.msg.hdcp1.an, hdcp_cmd->out_msg.hdcp1_create_session.an_primary,
-		sizeof(hdcp->auth.msg.hdcp1.an));
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE;
+	} else {
+		hdcp->auth.msg.hdcp1.ainfo = hdcp_cmd->out_msg.hdcp1_create_session.ainfo_primary;
+		memcpy(hdcp->auth.msg.hdcp1.aksv, hdcp_cmd->out_msg.hdcp1_create_session.aksv_primary,
+		       sizeof(hdcp->auth.msg.hdcp1.aksv));
+		memcpy(hdcp->auth.msg.hdcp1.an, hdcp_cmd->out_msg.hdcp1_create_session.an_primary,
+		       sizeof(hdcp->auth.msg.hdcp1.an));
+	}
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp1_destroy_session(struct mod_hdcp *hdcp)
@@ -162,7 +171,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_destroy_session(struct mod_hdcp *hdcp)
 	struct psp_context *psp = hdcp->config.psp.handle;
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	uint8_t i = 0;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -171,27 +182,30 @@ enum mod_hdcp_status mod_hdcp_hdcp1_destroy_session(struct mod_hdcp *hdcp)
 
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP1_DESTROY_SESSION_FAILURE;
-
-	HDCP_TOP_HDCP1_DESTROY_SESSION_TRACE(hdcp);
-	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
-		if (is_display_encryption_enabled(
-				&hdcp->displays[i])) {
-			hdcp->displays[i].state =
-					MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED;
-			HDCP_HDCP1_DISABLED_TRACE(hdcp,
-					hdcp->displays[i].index);
-		}
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_HDCP1_DESTROY_SESSION_FAILURE;
+	} else {
+		HDCP_TOP_HDCP1_DESTROY_SESSION_TRACE(hdcp);
+		for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
+			if (is_display_encryption_enabled(&hdcp->displays[i])) {
+				hdcp->displays[i].state =
+							MOD_HDCP_DISPLAY_ACTIVE;
+				HDCP_HDCP1_DISABLED_TRACE(
+					hdcp, hdcp->displays[i].index);
+			}
+	}
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp1_validate_rx(struct mod_hdcp *hdcp)
 {
 	struct psp_context *psp = hdcp->config.psp.handle;
 	struct ta_hdcp_shared_memory *hdcp_cmd;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -206,10 +220,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_validate_rx(struct mod_hdcp *hdcp)
 
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE;
-
-	if (hdcp_cmd->out_msg.hdcp1_first_part_authentication.authentication_status ==
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE;
+	} else if (hdcp_cmd->out_msg.hdcp1_first_part_authentication.authentication_status ==
 	    TA_HDCP_AUTHENTICATION_STATUS__HDCP1_FIRST_PART_COMPLETE) {
 		/* needs second part of authentication */
 		hdcp->connection.is_repeater = 1;
@@ -219,20 +232,22 @@ enum mod_hdcp_status mod_hdcp_hdcp1_validate_rx(struct mod_hdcp *hdcp)
 	} else if (hdcp_cmd->out_msg.hdcp1_first_part_authentication.authentication_status ==
 		   TA_HDCP_AUTHENTICATION_STATUS__HDCP1_KSV_REVOKED) {
 		hdcp->connection.is_hdcp1_revoked = 1;
-		return MOD_HDCP_STATUS_HDCP1_BKSV_REVOKED;
+		status = MOD_HDCP_STATUS_HDCP1_BKSV_REVOKED;
 	} else
-		return MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE;
-
+		status = MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE;
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp1_enable_encryption(struct mod_hdcp *hdcp)
 {
 	struct psp_context *psp = hdcp->config.psp.handle;
 	struct ta_hdcp_shared_memory *hdcp_cmd;
-	struct mod_hdcp_display *display = get_first_added_display(hdcp);
+	struct mod_hdcp_display *display = get_first_active_display(hdcp);
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -241,14 +256,15 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_encryption(struct mod_hdcp *hdcp)
 
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION;
-
-	if (!is_dp_mst_hdcp(hdcp)) {
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION_FAILURE;
+	} else if (!is_dp_mst_hdcp(hdcp)) {
 		display->state = MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED;
 		HDCP_HDCP1_ENABLED_TRACE(hdcp, display->index);
 	}
-	return MOD_HDCP_STATUS_SUCCESS;
+
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp1_validate_ksvlist_vp(struct mod_hdcp *hdcp)
@@ -257,6 +273,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_validate_ksvlist_vp(struct mod_hdcp *hdcp)
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -287,6 +304,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_validate_ksvlist_vp(struct mod_hdcp *hdcp)
 		status = MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE;
 	}
 
+	mutex_unlock(&psp->hdcp_context.mutex);
 	return status;
 }
 
@@ -296,14 +314,15 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(struct mod_hdcp
 	struct psp_context *psp = hdcp->config.psp.handle;
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	int i = 0;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 
 	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
 
-		if (hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED ||
-		    hdcp->displays[i].adjust.disable)
-			continue;
+		if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE)
+				continue;
 
 		memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -313,21 +332,26 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(struct mod_hdcp
 
 		psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-		if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-			return MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE;
+		if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+			status = MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE;
+			break;
+		}
 
 		hdcp->displays[i].state = MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED;
 		HDCP_HDCP1_ENABLED_TRACE(hdcp, hdcp->displays[i].index);
 	}
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp1_link_maintenance(struct mod_hdcp *hdcp)
 {
 	struct psp_context *psp = hdcp->config.psp.handle;
 	struct ta_hdcp_shared_memory *hdcp_cmd;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
@@ -339,12 +363,12 @@ enum mod_hdcp_status mod_hdcp_hdcp1_link_maintenance(struct mod_hdcp *hdcp)
 
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE;
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS ||
+			hdcp_cmd->out_msg.hdcp1_get_encryption_status.protection_level != 1)
+		status = MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE;
 
-	return (hdcp_cmd->out_msg.hdcp1_get_encryption_status.protection_level == 1)
-		       ? MOD_HDCP_STATUS_SUCCESS
-		       : MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp1_get_link_encryption_status(struct mod_hdcp *hdcp,
@@ -364,19 +388,23 @@ enum mod_hdcp_status mod_hdcp_hdcp2_create_session(struct mod_hdcp *hdcp)
 {
 	struct psp_context *psp = hdcp->config.psp.handle;
 	struct ta_hdcp_shared_memory *hdcp_cmd;
-	struct mod_hdcp_display *display = get_first_added_display(hdcp);
+	struct mod_hdcp_display *display = get_first_active_display(hdcp);
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
 
 	if (!psp->hdcp_context.hdcp_initialized) {
 		DRM_ERROR("Failed to create hdcp session, HDCP TA is not initialized");
 		return MOD_HDCP_STATUS_FAILURE;
 	}
 
-	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
-	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
-
 	if (!display)
 		return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND;
 
+	mutex_lock(&psp->hdcp_context.mutex);
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
 	hdcp_cmd->in_msg.hdcp2_create_session_v2.display_handle = display->index;
 
 	if (hdcp->connection.link.adjust.hdcp2.force_type == MOD_HDCP_FORCE_TYPE_0)
@@ -393,12 +421,14 @@ enum mod_hdcp_status mod_hdcp_hdcp2_create_session(struct mod_hdcp *hdcp)
 
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_CREATE_SESSION_FAILURE;
 
-	hdcp->auth.id = hdcp_cmd->out_msg.hdcp2_create_session_v2.session_handle;
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
+		status = MOD_HDCP_STATUS_HDCP2_CREATE_SESSION_FAILURE;
+	else
+		hdcp->auth.id = hdcp_cmd->out_msg.hdcp2_create_session_v2.session_handle;
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_destroy_session(struct mod_hdcp *hdcp)
@@ -406,7 +436,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_destroy_session(struct mod_hdcp *hdcp)
 	struct psp_context *psp = hdcp->config.psp.handle;
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	uint8_t i = 0;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -415,20 +447,21 @@ enum mod_hdcp_status mod_hdcp_hdcp2_destroy_session(struct mod_hdcp *hdcp)
 
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_DESTROY_SESSION_FAILURE;
-
-	HDCP_TOP_HDCP2_DESTROY_SESSION_TRACE(hdcp);
-	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
-		if (is_display_encryption_enabled(
-				&hdcp->displays[i])) {
-			hdcp->displays[i].state =
-					MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED;
-			HDCP_HDCP2_DISABLED_TRACE(hdcp,
-					hdcp->displays[i].index);
-		}
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_HDCP2_DESTROY_SESSION_FAILURE;
+	} else {
+		HDCP_TOP_HDCP2_DESTROY_SESSION_TRACE(hdcp);
+		for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++)
+			if (is_display_encryption_enabled(&hdcp->displays[i])) {
+				hdcp->displays[i].state =
+							MOD_HDCP_DISPLAY_ACTIVE;
+				HDCP_HDCP2_DISABLED_TRACE(
+					hdcp, hdcp->displays[i].index);
+			}
+	}
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_prepare_ake_init(struct mod_hdcp *hdcp)
@@ -437,7 +470,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_ake_init(struct mod_hdcp *hdcp)
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -452,12 +487,13 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_ake_init(struct mod_hdcp *hdcp)
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
 	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_PREP_AKE_INIT_FAILURE;
-
-	memcpy(&hdcp->auth.msg.hdcp2.ake_init[0], &msg_out->prepare.transmitter_message[0],
-	       sizeof(hdcp->auth.msg.hdcp2.ake_init));
+		status = MOD_HDCP_STATUS_HDCP2_PREP_AKE_INIT_FAILURE;
+	else
+		memcpy(&hdcp->auth.msg.hdcp2.ake_init[0], &msg_out->prepare.transmitter_message[0],
+		       sizeof(hdcp->auth.msg.hdcp2.ake_init));
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_validate_ake_cert(struct mod_hdcp *hdcp)
@@ -466,7 +502,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_ake_cert(struct mod_hdcp *hdcp)
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -488,26 +526,32 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_ake_cert(struct mod_hdcp *hdcp)
 
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE;
-
-	memcpy(hdcp->auth.msg.hdcp2.ake_no_stored_km, &msg_out->prepare.transmitter_message[0],
-	       sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km));
-
-	memcpy(hdcp->auth.msg.hdcp2.ake_stored_km,
-	       &msg_out->prepare.transmitter_message[sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km)],
-	       sizeof(hdcp->auth.msg.hdcp2.ake_stored_km));
-
-	if (msg_out->process.msg1_status == TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS) {
-		hdcp->connection.is_km_stored = msg_out->process.is_km_stored ? 1 : 0;
-		hdcp->connection.is_repeater = msg_out->process.is_repeater ? 1 : 0;
-		return MOD_HDCP_STATUS_SUCCESS;
-	} else if (msg_out->process.msg1_status == TA_HDCP2_MSG_AUTHENTICATION_STATUS__RECEIVERID_REVOKED) {
-		hdcp->connection.is_hdcp2_revoked = 1;
-		return MOD_HDCP_STATUS_HDCP2_AKE_CERT_REVOKED;
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE;
+	} else {
+		memcpy(hdcp->auth.msg.hdcp2.ake_no_stored_km,
+		       &msg_out->prepare.transmitter_message[0],
+		       sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km));
+
+		memcpy(hdcp->auth.msg.hdcp2.ake_stored_km,
+		       &msg_out->prepare.transmitter_message[sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km)],
+		       sizeof(hdcp->auth.msg.hdcp2.ake_stored_km));
+
+		if (msg_out->process.msg1_status ==
+		    TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS) {
+			hdcp->connection.is_km_stored =
+				msg_out->process.is_km_stored ? 1 : 0;
+			hdcp->connection.is_repeater =
+				msg_out->process.is_repeater ? 1 : 0;
+			status = MOD_HDCP_STATUS_SUCCESS;
+		} else if (msg_out->process.msg1_status ==
+			   TA_HDCP2_MSG_AUTHENTICATION_STATUS__RECEIVERID_REVOKED) {
+			hdcp->connection.is_hdcp2_revoked = 1;
+			status = MOD_HDCP_STATUS_HDCP2_AKE_CERT_REVOKED;
+		}
 	}
-
-	return MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_validate_h_prime(struct mod_hdcp *hdcp)
@@ -516,7 +560,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_h_prime(struct mod_hdcp *hdcp)
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -543,16 +589,15 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_h_prime(struct mod_hdcp *hdcp)
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
 	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE;
-
-	if (msg_out->process.msg1_status != TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE;
+		status = MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE;
+	else if (msg_out->process.msg1_status != TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
+		status = MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE;
 	else if (!hdcp->connection.is_km_stored &&
-		 msg_out->process.msg2_status != TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_VALIDATE_PAIRING_INFO_FAILURE;
-
+		   msg_out->process.msg2_status != TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
+		status = MOD_HDCP_STATUS_HDCP2_VALIDATE_PAIRING_INFO_FAILURE;
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_prepare_lc_init(struct mod_hdcp *hdcp)
@@ -561,7 +606,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_lc_init(struct mod_hdcp *hdcp)
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -577,12 +624,13 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_lc_init(struct mod_hdcp *hdcp)
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
 	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_PREP_LC_INIT_FAILURE;
-
-	memcpy(hdcp->auth.msg.hdcp2.lc_init, &msg_out->prepare.transmitter_message[0],
-	       sizeof(hdcp->auth.msg.hdcp2.lc_init));
+		status = MOD_HDCP_STATUS_HDCP2_PREP_LC_INIT_FAILURE;
+	else
+		memcpy(hdcp->auth.msg.hdcp2.lc_init, &msg_out->prepare.transmitter_message[0],
+		       sizeof(hdcp->auth.msg.hdcp2.lc_init));
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_validate_l_prime(struct mod_hdcp *hdcp)
@@ -591,7 +639,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_l_prime(struct mod_hdcp *hdcp)
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -610,13 +660,12 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_l_prime(struct mod_hdcp *hdcp)
 
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE;
-
-	if (msg_out->process.msg1_status != TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE;
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS ||
+			msg_out->process.msg1_status != TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
+		status = MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE;
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_prepare_eks(struct mod_hdcp *hdcp)
@@ -625,7 +674,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_eks(struct mod_hdcp *hdcp)
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -642,48 +693,55 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_eks(struct mod_hdcp *hdcp)
 	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_PREP_EKS_FAILURE;
-
-	memcpy(hdcp->auth.msg.hdcp2.ske_eks, &msg_out->prepare.transmitter_message[0],
-	       sizeof(hdcp->auth.msg.hdcp2.ske_eks));
-	msg_out->prepare.msg1_desc.msg_size = sizeof(hdcp->auth.msg.hdcp2.ske_eks);
-
-	if (is_dp_hdcp(hdcp)) {
-		memcpy(hdcp->auth.msg.hdcp2.content_stream_type_dp,
-		       &msg_out->prepare.transmitter_message[sizeof(hdcp->auth.msg.hdcp2.ske_eks)],
-		       sizeof(hdcp->auth.msg.hdcp2.content_stream_type_dp));
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_HDCP2_PREP_EKS_FAILURE;
+	} else {
+		memcpy(hdcp->auth.msg.hdcp2.ske_eks,
+		       &msg_out->prepare.transmitter_message[0],
+		       sizeof(hdcp->auth.msg.hdcp2.ske_eks));
+		msg_out->prepare.msg1_desc.msg_size =
+			sizeof(hdcp->auth.msg.hdcp2.ske_eks);
+
+		if (is_dp_hdcp(hdcp)) {
+			memcpy(hdcp->auth.msg.hdcp2.content_stream_type_dp,
+			       &msg_out->prepare.transmitter_message[sizeof(hdcp->auth.msg.hdcp2.ske_eks)],
+			       sizeof(hdcp->auth.msg.hdcp2.content_stream_type_dp));
+		}
 	}
+	mutex_unlock(&psp->hdcp_context.mutex);
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_enable_encryption(struct mod_hdcp *hdcp)
 {
 	struct psp_context *psp = hdcp->config.psp.handle;
 	struct ta_hdcp_shared_memory *hdcp_cmd;
-	struct mod_hdcp_display *display = get_first_added_display(hdcp);
-
-	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
-	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+	struct mod_hdcp_display *display = get_first_active_display(hdcp);
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
 	if (!display)
 		return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND;
 
+	mutex_lock(&psp->hdcp_context.mutex);
+
+	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
+	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
+
 	hdcp_cmd->in_msg.hdcp2_set_encryption.session_handle = hdcp->auth.id;
 
 	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_SET_ENCRYPTION;
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_ENABLE_ENCRYPTION_FAILURE;
-
-	if (!is_dp_mst_hdcp(hdcp)) {
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_HDCP2_ENABLE_ENCRYPTION_FAILURE;
+	} else if (!is_dp_mst_hdcp(hdcp)) {
 		display->state = MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED;
 		HDCP_HDCP2_ENABLED_TRACE(hdcp, display->index);
 	}
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_validate_rx_id_list(struct mod_hdcp *hdcp)
@@ -692,6 +750,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_rx_id_list(struct mod_hdcp *hdcp)
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	mutex_lock(&psp->hdcp_context.mutex);
 
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
@@ -712,23 +773,26 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_rx_id_list(struct mod_hdcp *hdcp)
 
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE;
-
-	memcpy(hdcp->auth.msg.hdcp2.repeater_auth_ack, &msg_out->prepare.transmitter_message[0],
-	       sizeof(hdcp->auth.msg.hdcp2.repeater_auth_ack));
-
-	if (msg_out->process.msg1_status == TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS) {
-		hdcp->connection.is_km_stored = msg_out->process.is_km_stored ? 1 : 0;
-		hdcp->connection.is_repeater = msg_out->process.is_repeater ? 1 : 0;
-		return MOD_HDCP_STATUS_SUCCESS;
-	} else if (msg_out->process.msg1_status == TA_HDCP2_MSG_AUTHENTICATION_STATUS__RECEIVERID_REVOKED) {
-		hdcp->connection.is_hdcp2_revoked = 1;
-		return MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED;
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE;
+	} else {
+		memcpy(hdcp->auth.msg.hdcp2.repeater_auth_ack,
+		       &msg_out->prepare.transmitter_message[0],
+		       sizeof(hdcp->auth.msg.hdcp2.repeater_auth_ack));
+
+		if (msg_out->process.msg1_status ==
+		    TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS) {
+			hdcp->connection.is_km_stored = msg_out->process.is_km_stored ? 1 : 0;
+			hdcp->connection.is_repeater = msg_out->process.is_repeater ? 1 : 0;
+			status = MOD_HDCP_STATUS_SUCCESS;
+		} else if (msg_out->process.msg1_status ==
+			   TA_HDCP2_MSG_AUTHENTICATION_STATUS__RECEIVERID_REVOKED) {
+			hdcp->connection.is_hdcp2_revoked = 1;
+			status = MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED;
+		}
 	}
-
-
-	return MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp *hdcp)
@@ -737,7 +801,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
 	uint8_t i;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -747,9 +813,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp
 
 
 	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
-		if (hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED ||
-		    hdcp->displays[i].adjust.disable)
-			continue;
+		if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE)
+				continue;
+
 		hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.display_handle = hdcp->displays[i].index;
 		hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.session_handle = hdcp->auth.id;
 
@@ -763,8 +829,13 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp
 		HDCP_HDCP2_ENABLED_TRACE(hdcp, hdcp->displays[i].index);
 	}
 
-	return (hdcp_cmd->hdcp_status == TA_HDCP_STATUS__SUCCESS) ? MOD_HDCP_STATUS_SUCCESS
-								  : MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION;
+	if (hdcp_cmd->hdcp_status == TA_HDCP_STATUS__SUCCESS)
+		status = MOD_HDCP_STATUS_SUCCESS;
+	else
+		status = MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION_FAILURE;
+
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_prepare_stream_management(struct mod_hdcp *hdcp)
@@ -774,7 +845,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_stream_management(struct mod_hdcp *h
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -789,15 +862,17 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_stream_management(struct mod_hdcp *h
 	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE;
-
-	hdcp->auth.msg.hdcp2.stream_manage_size = msg_out->prepare.msg1_desc.msg_size;
-
-	memcpy(hdcp->auth.msg.hdcp2.repeater_auth_stream_manage, &msg_out->prepare.transmitter_message[0],
-	       sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_manage));
+	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) {
+		status = MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE;
+	} else {
+		hdcp->auth.msg.hdcp2.stream_manage_size = msg_out->prepare.msg1_desc.msg_size;
 
-	return MOD_HDCP_STATUS_SUCCESS;
+		memcpy(hdcp->auth.msg.hdcp2.repeater_auth_stream_manage,
+		       &msg_out->prepare.transmitter_message[0],
+		       sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_manage));
+	}
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
 
 enum mod_hdcp_status mod_hdcp_hdcp2_validate_stream_ready(struct mod_hdcp *hdcp)
@@ -806,7 +881,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_stream_ready(struct mod_hdcp *hdcp)
 	struct ta_hdcp_shared_memory *hdcp_cmd;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_input_v2 *msg_in;
 	struct ta_hdcp_cmd_hdcp2_process_prepare_authentication_message_output_v2 *msg_out;
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+	mutex_lock(&psp->hdcp_context.mutex);
 	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
 	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -825,38 +902,13 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_stream_ready(struct mod_hdcp *hdcp)
 	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
 	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-	return (hdcp_cmd->hdcp_status == TA_HDCP_STATUS__SUCCESS) &&
-			       (msg_out->process.msg1_status == TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
-		       ? MOD_HDCP_STATUS_SUCCESS
-		       : MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE;
-}
-
-enum mod_hdcp_status mod_hdcp_hdcp2_get_link_encryption_status(struct mod_hdcp *hdcp,
-							       enum mod_hdcp_encryption_status *encryption_status)
-{
-	struct psp_context *psp = hdcp->config.psp.handle;
-	struct ta_hdcp_shared_memory *hdcp_cmd;
-
-	hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf;
-
-	memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
-
-	hdcp_cmd->in_msg.hdcp2_get_encryption_status.session_handle = hdcp->auth.id;
-	hdcp_cmd->out_msg.hdcp2_get_encryption_status.protection_level = 0;
-	hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP2_GET_ENCRYPTION_STATUS;
-	*encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
-
-	psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
-
-	if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-		return MOD_HDCP_STATUS_FAILURE;
-
-	if (hdcp_cmd->out_msg.hdcp2_get_encryption_status.protection_level == 1) {
-		if (hdcp_cmd->out_msg.hdcp2_get_encryption_status.hdcp2_type == TA_HDCP2_CONTENT_TYPE__TYPE1)
-			*encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON;
-		else
-			*encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON;
-	}
+	if (hdcp_cmd->hdcp_status == TA_HDCP_STATUS__SUCCESS &&
+	    msg_out->process.msg1_status == TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
+		status = MOD_HDCP_STATUS_SUCCESS;
+	else
+		status = MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE;
 
-	return MOD_HDCP_STATUS_SUCCESS;
+	mutex_unlock(&psp->hdcp_context.mutex);
+	return status;
 }
+
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
index dbe7835aabcf..0ba3cf7f336a 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
@@ -83,6 +83,8 @@ struct mod_freesync_config {
 	bool btr;
 	unsigned int min_refresh_in_uhz;
 	unsigned int max_refresh_in_uhz;
+	unsigned int fixed_refresh_in_uhz;
+
 };
 
 struct mod_vrr_params_btr {
@@ -112,6 +114,7 @@ struct mod_vrr_params {
 	uint32_t max_duration_in_us;
 	uint32_t max_refresh_in_uhz;
 	uint32_t min_duration_in_us;
+	uint32_t fixed_refresh_in_uhz;
 
 	struct dc_crtc_timing_adjust adjust;
 
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
index c088602bc1a0..eed560eecbab 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
@@ -60,7 +60,7 @@ enum mod_hdcp_status {
 	MOD_HDCP_STATUS_HDCP1_KSV_LIST_NOT_READY,
 	MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE,
 	MOD_HDCP_STATUS_HDCP1_KSV_LIST_REVOKED,
-	MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION,
+	MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION_FAILURE,
 	MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE,
 	MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE,
 	MOD_HDCP_STATUS_HDCP1_MAX_DEVS_EXCEEDED_FAILURE,
@@ -90,7 +90,7 @@ enum mod_hdcp_status {
 	MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY,
 	MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE,
 	MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED,
-	MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION,
+	MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION_FAILURE,
 	MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING,
 	MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE,
 	MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE,
@@ -117,7 +117,6 @@ enum mod_hdcp_operation_mode {
 enum mod_hdcp_display_state {
 	MOD_HDCP_DISPLAY_INACTIVE = 0,
 	MOD_HDCP_DISPLAY_ACTIVE,
-	MOD_HDCP_DISPLAY_ACTIVE_AND_ADDED,
 	MOD_HDCP_DISPLAY_ENCRYPTION_ENABLED
 };
 
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h
index fe2117904329..198c0e64d13a 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h
@@ -40,8 +40,9 @@ enum color_transfer_func {
 
 enum vrr_packet_type {
 	PACKET_TYPE_VRR,
-	PACKET_TYPE_FS1,
-	PACKET_TYPE_FS2,
+	PACKET_TYPE_FS_V1,
+	PACKET_TYPE_FS_V2,
+	PACKET_TYPE_FS_V3,
 	PACKET_TYPE_VTEM
 };
 
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index cff3ab15fc0c..7cd8a43d1889 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -144,7 +144,7 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 	}
 
 	/*VSC packet set to 2 when DP revision >= 1.2*/
-	if (stream->psr_version != 0)
+	if (stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED)
 		vsc_packet_revision = vsc_packet_rev2;
 
 	/* Update to revision 5 for extended colorimetry support */
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index e75a4bb94488..8c37bcc27132 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -24,6 +24,9 @@
 
 #include "power_helpers.h"
 #include "dc/inc/hw/dmcu.h"
+#include "dc/inc/hw/abm.h"
+#include "dc.h"
+#include "core_types.h"
 
 #define DIV_ROUNDUP(a, b) (((a)+((b)/2))/(b))
 
@@ -237,7 +240,7 @@ static void fill_backlight_transform_table(struct dmcu_iram_parameters params,
 }
 
 static void fill_backlight_transform_table_v_2_2(struct dmcu_iram_parameters params,
-		struct iram_table_v_2_2 *table)
+		struct iram_table_v_2_2 *table, bool big_endian)
 {
 	unsigned int i;
 	unsigned int num_entries = NUM_BL_CURVE_SEGS;
@@ -261,10 +264,12 @@ static void fill_backlight_transform_table_v_2_2(struct dmcu_iram_parameters par
 		lut_index = (params.backlight_lut_array_size - 1) * i / (num_entries - 1);
 		ASSERT(lut_index < params.backlight_lut_array_size);
 
-		table->backlight_thresholds[i] =
-			cpu_to_be16(DIV_ROUNDUP((i * 65536), num_entries));
-		table->backlight_offsets[i] =
-			cpu_to_be16(params.backlight_lut_array[lut_index]);
+		table->backlight_thresholds[i] = (big_endian) ?
+			cpu_to_be16(DIV_ROUNDUP((i * 65536), num_entries)) :
+			cpu_to_le16(DIV_ROUNDUP((i * 65536), num_entries));
+		table->backlight_offsets[i] = (big_endian) ?
+			cpu_to_be16(params.backlight_lut_array[lut_index]) :
+			cpu_to_le16(params.backlight_lut_array[lut_index]);
 	}
 }
 
@@ -584,18 +589,18 @@ void fill_iram_v_2_2(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parame
 	ram_table->crgb_slope[7]  = cpu_to_be16(0x1910);
 
 	fill_backlight_transform_table_v_2_2(
-			params, ram_table);
+			params, ram_table, true);
 }
 
-void fill_iram_v_2_3(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parameters params)
+void fill_iram_v_2_3(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parameters params, bool big_endian)
 {
 	unsigned int i, j;
 	unsigned int set = params.set;
 
 	ram_table->flags = 0x0;
-
-	ram_table->min_abm_backlight =
-			cpu_to_be16(params.min_abm_backlight);
+	ram_table->min_abm_backlight = (big_endian) ?
+		cpu_to_be16(params.min_abm_backlight) :
+		cpu_to_le16(params.min_abm_backlight);
 
 	for (i = 0; i < NUM_AGGR_LEVEL; i++) {
 		ram_table->hybrid_factor[i] = abm_settings[set][i].brightness_gain;
@@ -619,33 +624,51 @@ void fill_iram_v_2_3(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parame
 	ram_table->iir_curve[4] = 0x65;
 
 	//Gamma 2.2
-	ram_table->crgb_thresh[0] = cpu_to_be16(0x127c);
-	ram_table->crgb_thresh[1] = cpu_to_be16(0x151b);
-	ram_table->crgb_thresh[2] = cpu_to_be16(0x17d5);
-	ram_table->crgb_thresh[3] = cpu_to_be16(0x1a56);
-	ram_table->crgb_thresh[4] = cpu_to_be16(0x1c83);
-	ram_table->crgb_thresh[5] = cpu_to_be16(0x1e72);
-	ram_table->crgb_thresh[6] = cpu_to_be16(0x20f0);
-	ram_table->crgb_thresh[7] = cpu_to_be16(0x232b);
-	ram_table->crgb_offset[0] = cpu_to_be16(0x2999);
-	ram_table->crgb_offset[1] = cpu_to_be16(0x3999);
-	ram_table->crgb_offset[2] = cpu_to_be16(0x4666);
-	ram_table->crgb_offset[3] = cpu_to_be16(0x5999);
-	ram_table->crgb_offset[4] = cpu_to_be16(0x6333);
-	ram_table->crgb_offset[5] = cpu_to_be16(0x7800);
-	ram_table->crgb_offset[6] = cpu_to_be16(0x8c00);
-	ram_table->crgb_offset[7] = cpu_to_be16(0xa000);
-	ram_table->crgb_slope[0]  = cpu_to_be16(0x3609);
-	ram_table->crgb_slope[1]  = cpu_to_be16(0x2dfa);
-	ram_table->crgb_slope[2]  = cpu_to_be16(0x27ea);
-	ram_table->crgb_slope[3]  = cpu_to_be16(0x235d);
-	ram_table->crgb_slope[4]  = cpu_to_be16(0x2042);
-	ram_table->crgb_slope[5]  = cpu_to_be16(0x1dc3);
-	ram_table->crgb_slope[6]  = cpu_to_be16(0x1b1a);
-	ram_table->crgb_slope[7]  = cpu_to_be16(0x1910);
+	ram_table->crgb_thresh[0] = (big_endian) ? cpu_to_be16(0x127c) : cpu_to_le16(0x127c);
+	ram_table->crgb_thresh[1] = (big_endian) ? cpu_to_be16(0x151b) : cpu_to_le16(0x151b);
+	ram_table->crgb_thresh[2] = (big_endian) ? cpu_to_be16(0x17d5) : cpu_to_le16(0x17d5);
+	ram_table->crgb_thresh[3] = (big_endian) ? cpu_to_be16(0x1a56) : cpu_to_le16(0x1a56);
+	ram_table->crgb_thresh[4] = (big_endian) ? cpu_to_be16(0x1c83) : cpu_to_le16(0x1c83);
+	ram_table->crgb_thresh[5] = (big_endian) ? cpu_to_be16(0x1e72) : cpu_to_le16(0x1e72);
+	ram_table->crgb_thresh[6] = (big_endian) ? cpu_to_be16(0x20f0) : cpu_to_le16(0x20f0);
+	ram_table->crgb_thresh[7] = (big_endian) ? cpu_to_be16(0x232b) : cpu_to_le16(0x232b);
+	ram_table->crgb_offset[0] = (big_endian) ? cpu_to_be16(0x2999) : cpu_to_le16(0x2999);
+	ram_table->crgb_offset[1] = (big_endian) ? cpu_to_be16(0x3999) : cpu_to_le16(0x3999);
+	ram_table->crgb_offset[2] = (big_endian) ? cpu_to_be16(0x4666) : cpu_to_le16(0x4666);
+	ram_table->crgb_offset[3] = (big_endian) ? cpu_to_be16(0x5999) : cpu_to_le16(0x5999);
+	ram_table->crgb_offset[4] = (big_endian) ? cpu_to_be16(0x6333) : cpu_to_le16(0x6333);
+	ram_table->crgb_offset[5] = (big_endian) ? cpu_to_be16(0x7800) : cpu_to_le16(0x7800);
+	ram_table->crgb_offset[6] = (big_endian) ? cpu_to_be16(0x8c00) : cpu_to_le16(0x8c00);
+	ram_table->crgb_offset[7] = (big_endian) ? cpu_to_be16(0xa000) : cpu_to_le16(0xa000);
+	ram_table->crgb_slope[0]  = (big_endian) ? cpu_to_be16(0x3609) : cpu_to_le16(0x3609);
+	ram_table->crgb_slope[1]  = (big_endian) ? cpu_to_be16(0x2dfa) : cpu_to_le16(0x2dfa);
+	ram_table->crgb_slope[2]  = (big_endian) ? cpu_to_be16(0x27ea) : cpu_to_le16(0x27ea);
+	ram_table->crgb_slope[3]  = (big_endian) ? cpu_to_be16(0x235d) : cpu_to_le16(0x235d);
+	ram_table->crgb_slope[4]  = (big_endian) ? cpu_to_be16(0x2042) : cpu_to_le16(0x2042);
+	ram_table->crgb_slope[5]  = (big_endian) ? cpu_to_be16(0x1dc3) : cpu_to_le16(0x1dc3);
+	ram_table->crgb_slope[6]  = (big_endian) ? cpu_to_be16(0x1b1a) : cpu_to_le16(0x1b1a);
+	ram_table->crgb_slope[7]  = (big_endian) ? cpu_to_be16(0x1910) : cpu_to_le16(0x1910);
 
 	fill_backlight_transform_table_v_2_2(
-			params, ram_table);
+			params, ram_table, big_endian);
+}
+
+bool dmub_init_abm_config(struct abm *abm,
+	struct dmcu_iram_parameters params)
+{
+	unsigned char ram_table[IRAM_SIZE];
+	bool result = false;
+
+	if (abm == NULL)
+		return false;
+
+	memset(&ram_table, 0, sizeof(ram_table));
+
+	fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params, false);
+	result = abm->funcs->init_abm_config(
+		abm, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2_2);
+
+	return result;
 }
 
 bool dmcu_load_iram(struct dmcu *dmcu,
@@ -657,17 +680,17 @@ bool dmcu_load_iram(struct dmcu *dmcu,
 	if (dmcu == NULL)
 		return false;
 
-	if (!dmcu->funcs->is_dmcu_initialized(dmcu))
+	if (dmcu && !dmcu->funcs->is_dmcu_initialized(dmcu))
 		return true;
 
 	memset(&ram_table, 0, sizeof(ram_table));
 
 	if (dmcu->dmcu_version.abm_version == 0x24) {
-		fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params);
-		result = dmcu->funcs->load_iram(
-				dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2_2);
+		fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params, true);
+			result = dmcu->funcs->load_iram(
+					dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2_2);
 	} else if (dmcu->dmcu_version.abm_version == 0x23) {
-		fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params);
+		fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params, true);
 
 		result = dmcu->funcs->load_iram(
 				dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2_2);
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
index e54157026330..46fbca2e2cd1 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
@@ -26,6 +26,7 @@
 #define MODULES_POWER_POWER_HELPERS_H_
 
 #include "dc/inc/hw/dmcu.h"
+#include "dc/inc/hw/abm.h"
 
 
 enum abm_defines {
@@ -44,5 +45,7 @@ struct dmcu_iram_parameters {
 
 bool dmcu_load_iram(struct dmcu *dmcu,
 		struct dmcu_iram_parameters params);
+bool dmub_init_abm_config(struct abm *abm,
+		struct dmcu_iram_parameters params);
 
 #endif /* MODULES_POWER_POWER_HELPERS_H_ */
diff --git a/drivers/gpu/drm/amd/display/modules/stats/stats.c b/drivers/gpu/drm/amd/display/modules/stats/stats.c
deleted file mode 100644
index 03121ca64fe4..000000000000
--- a/drivers/gpu/drm/amd/display/modules/stats/stats.c
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "mod_stats.h"
-#include "dm_services.h"
-#include "dc.h"
-#include "core_types.h"
-
-#define DAL_STATS_ENABLE_REGKEY			"DalStatsEnable"
-#define DAL_STATS_ENABLE_REGKEY_DEFAULT		0x00000000
-#define DAL_STATS_ENABLE_REGKEY_ENABLED		0x00000001
-
-#define DAL_STATS_ENTRIES_REGKEY		"DalStatsEntries"
-#define DAL_STATS_ENTRIES_REGKEY_DEFAULT	0x00350000
-#define DAL_STATS_ENTRIES_REGKEY_MAX		0x01000000
-
-#define DAL_STATS_EVENT_ENTRIES_DEFAULT		0x00000100
-
-#define MOD_STATS_NUM_VSYNCS			5
-#define MOD_STATS_EVENT_STRING_MAX		512
-
-struct stats_time_cache {
-	unsigned int entry_id;
-
-	unsigned long flip_timestamp_in_ns;
-	unsigned long vupdate_timestamp_in_ns;
-
-	unsigned int render_time_in_us;
-	unsigned int avg_render_time_in_us_last_ten;
-	unsigned int v_sync_time_in_us[MOD_STATS_NUM_VSYNCS];
-	unsigned int num_vsync_between_flips;
-
-	unsigned int flip_to_vsync_time_in_us;
-	unsigned int vsync_to_flip_time_in_us;
-
-	unsigned int min_window;
-	unsigned int max_window;
-	unsigned int v_total_min;
-	unsigned int v_total_max;
-	unsigned int event_triggers;
-
-	unsigned int lfc_mid_point_in_us;
-	unsigned int num_frames_inserted;
-	unsigned int inserted_duration_in_us;
-
-	unsigned int flags;
-};
-
-struct stats_event_cache {
-	unsigned int entry_id;
-	char event_string[MOD_STATS_EVENT_STRING_MAX];
-};
-
-struct core_stats {
-	struct mod_stats public;
-	struct dc *dc;
-
-	bool enabled;
-	unsigned int entries;
-	unsigned int event_entries;
-	unsigned int entry_id;
-
-	struct stats_time_cache *time;
-	unsigned int index;
-
-	struct stats_event_cache *events;
-	unsigned int event_index;
-
-};
-
-#define MOD_STATS_TO_CORE(mod_stats)\
-		container_of(mod_stats, struct core_stats, public)
-
-bool mod_stats_init(struct mod_stats *mod_stats)
-{
-	bool result = false;
-	struct core_stats *core_stats = NULL;
-	struct dc *dc = NULL;
-
-	if (mod_stats == NULL)
-		return false;
-
-	core_stats = MOD_STATS_TO_CORE(mod_stats);
-	dc = core_stats->dc;
-
-	return result;
-}
-
-struct mod_stats *mod_stats_create(struct dc *dc)
-{
-	struct core_stats *core_stats = NULL;
-	struct persistent_data_flag flag;
-	unsigned int reg_data;
-	int i = 0;
-
-	if (dc == NULL)
-		goto fail_construct;
-
-	core_stats = kzalloc(sizeof(struct core_stats), GFP_KERNEL);
-
-	if (core_stats == NULL)
-		goto fail_construct;
-
-	core_stats->dc = dc;
-
-	core_stats->enabled = DAL_STATS_ENABLE_REGKEY_DEFAULT;
-	if (dm_read_persistent_data(dc->ctx, NULL, NULL,
-			DAL_STATS_ENABLE_REGKEY,
-			&reg_data, sizeof(unsigned int), &flag))
-		core_stats->enabled = reg_data;
-
-	if (core_stats->enabled) {
-		core_stats->entries = DAL_STATS_ENTRIES_REGKEY_DEFAULT;
-		if (dm_read_persistent_data(dc->ctx, NULL, NULL,
-				DAL_STATS_ENTRIES_REGKEY,
-				&reg_data, sizeof(unsigned int), &flag)) {
-			if (reg_data > DAL_STATS_ENTRIES_REGKEY_MAX)
-				core_stats->entries = DAL_STATS_ENTRIES_REGKEY_MAX;
-			else
-				core_stats->entries = reg_data;
-		}
-		core_stats->time = kcalloc(core_stats->entries,
-						sizeof(struct stats_time_cache),
-						GFP_KERNEL);
-
-		if (core_stats->time == NULL)
-			goto fail_construct_time;
-
-		core_stats->event_entries = DAL_STATS_EVENT_ENTRIES_DEFAULT;
-		core_stats->events = kcalloc(core_stats->event_entries,
-					     sizeof(struct stats_event_cache),
-					     GFP_KERNEL);
-
-		if (core_stats->events == NULL)
-			goto fail_construct_events;
-
-	} else {
-		core_stats->entries = 0;
-	}
-
-	/* Purposely leave index 0 unused so we don't need special logic to
-	 * handle calculation cases that depend on previous flip data.
-	 */
-	core_stats->index = 1;
-	core_stats->event_index = 0;
-
-	// Keeps track of ordering within the different stats structures
-	core_stats->entry_id = 0;
-
-	return &core_stats->public;
-
-fail_construct_events:
-	kfree(core_stats->time);
-
-fail_construct_time:
-	kfree(core_stats);
-
-fail_construct:
-	return NULL;
-}
-
-void mod_stats_destroy(struct mod_stats *mod_stats)
-{
-	if (mod_stats != NULL) {
-		struct core_stats *core_stats = MOD_STATS_TO_CORE(mod_stats);
-
-		kfree(core_stats->time);
-		kfree(core_stats->events);
-		kfree(core_stats);
-	}
-}
-
-void mod_stats_dump(struct mod_stats *mod_stats)
-{
-	struct dc  *dc = NULL;
-	struct dal_logger *logger = NULL;
-	struct core_stats *core_stats = NULL;
-	struct stats_time_cache *time = NULL;
-	struct stats_event_cache *events = NULL;
-	unsigned int time_index = 1;
-	unsigned int event_index = 0;
-	unsigned int index = 0;
-	struct log_entry log_entry;
-
-	if (mod_stats == NULL)
-		return;
-
-	core_stats = MOD_STATS_TO_CORE(mod_stats);
-	dc = core_stats->dc;
-	logger = dc->ctx->logger;
-	time = core_stats->time;
-	events = core_stats->events;
-
-	DISPLAY_STATS_BEGIN(log_entry);
-
-	DISPLAY_STATS("==Display Caps==\n");
-
-	DISPLAY_STATS("==Display Stats==\n");
-
-	DISPLAY_STATS("%10s %10s %10s %10s %10s"
-			" %11s %11s %17s %10s %14s"
-			" %10s %10s %10s %10s %10s"
-			" %10s %10s %10s %10s\n",
-		"render", "avgRender",
-		"minWindow", "midPoint", "maxWindow",
-		"vsyncToFlip", "flipToVsync", "vsyncsBetweenFlip",
-		"numFrame", "insertDuration",
-		"vTotalMin", "vTotalMax", "eventTrigs",
-		"vSyncTime1", "vSyncTime2", "vSyncTime3",
-		"vSyncTime4", "vSyncTime5", "flags");
-
-	for (int i = 0; i < core_stats->entry_id; i++) {
-		if (event_index < core_stats->event_index &&
-				i == events[event_index].entry_id) {
-			DISPLAY_STATS("==Event==%s\n", events[event_index].event_string);
-			event_index++;
-		} else if (time_index < core_stats->index &&
-				i == time[time_index].entry_id) {
-			DISPLAY_STATS("%10u %10u %10u %10u %10u"
-					" %11u %11u %17u %10u %14u"
-					" %10u %10u %10u %10u %10u"
-					" %10u %10u %10u %10u\n",
-				time[time_index].render_time_in_us,
-				time[time_index].avg_render_time_in_us_last_ten,
-				time[time_index].min_window,
-				time[time_index].lfc_mid_point_in_us,
-				time[time_index].max_window,
-				time[time_index].vsync_to_flip_time_in_us,
-				time[time_index].flip_to_vsync_time_in_us,
-				time[time_index].num_vsync_between_flips,
-				time[time_index].num_frames_inserted,
-				time[time_index].inserted_duration_in_us,
-				time[time_index].v_total_min,
-				time[time_index].v_total_max,
-				time[time_index].event_triggers,
-				time[time_index].v_sync_time_in_us[0],
-				time[time_index].v_sync_time_in_us[1],
-				time[time_index].v_sync_time_in_us[2],
-				time[time_index].v_sync_time_in_us[3],
-				time[time_index].v_sync_time_in_us[4],
-				time[time_index].flags);
-
-			time_index++;
-		}
-	}
-
-	DISPLAY_STATS_END(log_entry);
-}
-
-void mod_stats_reset_data(struct mod_stats *mod_stats)
-{
-	struct core_stats *core_stats = NULL;
-	struct stats_time_cache *time = NULL;
-	unsigned int index = 0;
-
-	if (mod_stats == NULL)
-		return;
-
-	core_stats = MOD_STATS_TO_CORE(mod_stats);
-
-	memset(core_stats->time, 0,
-		sizeof(struct stats_time_cache) * core_stats->entries);
-
-	memset(core_stats->events, 0,
-		sizeof(struct stats_event_cache) * core_stats->event_entries);
-
-	core_stats->index = 1;
-	core_stats->event_index = 0;
-
-	// Keeps track of ordering within the different stats structures
-	core_stats->entry_id = 0;
-}
-
-void mod_stats_update_event(struct mod_stats *mod_stats,
-		char *event_string,
-		unsigned int length)
-{
-	struct core_stats *core_stats = NULL;
-	struct stats_event_cache *events = NULL;
-	unsigned int index = 0;
-	unsigned int copy_length = 0;
-
-	if (mod_stats == NULL)
-		return;
-
-	core_stats = MOD_STATS_TO_CORE(mod_stats);
-
-	if (core_stats->event_index >= core_stats->event_entries)
-		return;
-
-	events = core_stats->events;
-	index = core_stats->event_index;
-
-	copy_length = length;
-	if (length > MOD_STATS_EVENT_STRING_MAX)
-		copy_length = MOD_STATS_EVENT_STRING_MAX;
-
-	memcpy(&events[index].event_string, event_string, copy_length);
-	events[index].event_string[copy_length - 1] = '\0';
-
-	events[index].entry_id = core_stats->entry_id;
-	core_stats->event_index++;
-	core_stats->entry_id++;
-}
-
-void mod_stats_update_flip(struct mod_stats *mod_stats,
-		unsigned long timestamp_in_ns)
-{
-	struct core_stats *core_stats = NULL;
-	struct stats_time_cache *time = NULL;
-	unsigned int index = 0;
-
-	if (mod_stats == NULL)
-		return;
-
-	core_stats = MOD_STATS_TO_CORE(mod_stats);
-
-	if (core_stats->index >= core_stats->entries)
-		return;
-
-	time = core_stats->time;
-	index = core_stats->index;
-
-	time[index].flip_timestamp_in_ns = timestamp_in_ns;
-	time[index].render_time_in_us =
-		(timestamp_in_ns - time[index - 1].flip_timestamp_in_ns) / 1000;
-
-	if (index >= 10) {
-		for (unsigned int i = 0; i < 10; i++)
-			time[index].avg_render_time_in_us_last_ten +=
-					time[index - i].render_time_in_us;
-		time[index].avg_render_time_in_us_last_ten /= 10;
-	}
-
-	if (time[index].num_vsync_between_flips > 0)
-		time[index].vsync_to_flip_time_in_us =
-			(timestamp_in_ns -
-				time[index].vupdate_timestamp_in_ns) / 1000;
-	else
-		time[index].vsync_to_flip_time_in_us =
-			(timestamp_in_ns -
-				time[index - 1].vupdate_timestamp_in_ns) / 1000;
-
-	time[index].entry_id = core_stats->entry_id;
-	core_stats->index++;
-	core_stats->entry_id++;
-}
-
-void mod_stats_update_vupdate(struct mod_stats *mod_stats,
-		unsigned long timestamp_in_ns)
-{
-	struct core_stats *core_stats = NULL;
-	struct stats_time_cache *time = NULL;
-	unsigned int index = 0;
-	unsigned int num_vsyncs = 0;
-	unsigned int prev_vsync_in_ns = 0;
-
-	if (mod_stats == NULL)
-		return;
-
-	core_stats = MOD_STATS_TO_CORE(mod_stats);
-
-	if (core_stats->index >= core_stats->entries)
-		return;
-
-	time = core_stats->time;
-	index = core_stats->index;
-	num_vsyncs = time[index].num_vsync_between_flips;
-
-	if (num_vsyncs < MOD_STATS_NUM_VSYNCS) {
-		if (num_vsyncs == 0) {
-			prev_vsync_in_ns =
-				time[index - 1].vupdate_timestamp_in_ns;
-
-			time[index].flip_to_vsync_time_in_us =
-				(timestamp_in_ns -
-					time[index - 1].flip_timestamp_in_ns) /
-					1000;
-		} else {
-			prev_vsync_in_ns =
-				time[index].vupdate_timestamp_in_ns;
-		}
-
-		time[index].v_sync_time_in_us[num_vsyncs] =
-			(timestamp_in_ns - prev_vsync_in_ns) / 1000;
-	}
-
-	time[index].vupdate_timestamp_in_ns = timestamp_in_ns;
-	time[index].num_vsync_between_flips++;
-}
-
-void mod_stats_update_freesync(struct mod_stats *mod_stats,
-		unsigned int v_total_min,
-		unsigned int v_total_max,
-		unsigned int event_triggers,
-		unsigned int window_min,
-		unsigned int window_max,
-		unsigned int lfc_mid_point_in_us,
-		unsigned int inserted_frames,
-		unsigned int inserted_duration_in_us)
-{
-	struct core_stats *core_stats = NULL;
-	struct stats_time_cache *time = NULL;
-	unsigned int index = 0;
-
-	if (mod_stats == NULL)
-		return;
-
-	core_stats = MOD_STATS_TO_CORE(mod_stats);
-
-	if (core_stats->index >= core_stats->entries)
-		return;
-
-	time = core_stats->time;
-	index = core_stats->index;
-
-	time[index].v_total_min = v_total_min;
-	time[index].v_total_max = v_total_max;
-	time[index].event_triggers = event_triggers;
-	time[index].min_window = window_min;
-	time[index].max_window = window_max;
-	time[index].lfc_mid_point_in_us = lfc_mid_point_in_us;
-	time[index].num_frames_inserted = inserted_frames;
-	time[index].inserted_duration_in_us = inserted_duration_in_us;
-}
-
diff --git a/drivers/gpu/drm/amd/display/modules/vmid/vmid.c b/drivers/gpu/drm/amd/display/modules/vmid/vmid.c
index 00f132f8ad55..61ee4be35d27 100644
--- a/drivers/gpu/drm/amd/display/modules/vmid/vmid.c
+++ b/drivers/gpu/drm/amd/display/modules/vmid/vmid.c
@@ -112,9 +112,12 @@ uint8_t mod_vmid_get_for_ptb(struct mod_vmid *mod_vmid, uint64_t ptb)
 			evict_vmids(core_vmid);
 
 		vmid = get_next_available_vmid(core_vmid);
-		add_ptb_to_table(core_vmid, vmid, ptb);
+		if (vmid != -1) {
+			add_ptb_to_table(core_vmid, vmid, ptb);
 
-		dc_setup_vm_context(core_vmid->dc, &va_config, vmid);
+			dc_setup_vm_context(core_vmid->dc, &va_config, vmid);
+		} else
+			ASSERT(0);
 	}
 
 	return vmid;
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index d655a76bedc6..e98c84ef206f 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -40,6 +40,13 @@ enum amd_chip_flags {
 	AMD_EXP_HW_SUPPORT = 0x00080000UL,
 };
 
+enum amd_apu_flags {
+	AMD_APU_IS_RAVEN = 0x00000001UL,
+	AMD_APU_IS_RAVEN2 = 0x00000002UL,
+	AMD_APU_IS_PICASSO = 0x00000004UL,
+	AMD_APU_IS_RENOIR = 0x00000008UL,
+};
+
 enum amd_ip_block_type {
 	AMD_IP_BLOCK_TYPE_COMMON,
 	AMD_IP_BLOCK_TYPE_GMC,
@@ -150,6 +157,13 @@ enum DC_FEATURE_MASK {
 	DC_PSR_MASK = 0x8,
 };
 
+enum DC_DEBUG_MASK {
+	DC_DISABLE_PIPE_SPLIT = 0x1,
+	DC_DISABLE_STUTTER = 0x2,
+	DC_DISABLE_DSC = 0x4,
+	DC_DISABLE_CLOCK_GATING = 0x8
+};
+
 enum amd_dpm_forced_level;
 /**
  * struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
index e7db6f9f9c86..8b0b9a2a8fed 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
@@ -5599,6 +5599,7 @@
 #define GRBM_PWR_CNTL__ALL_REQ_EN_MASK                                                                        0x00008000L
 //GRBM_STATUS
 #define GRBM_STATUS__ME0PIPE0_CMDFIFO_AVAIL__SHIFT                                                            0x0
+#define GRBM_STATUS__RSMU_RQ_PENDING__SHIFT                                                                   0x5
 #define GRBM_STATUS__ME0PIPE0_CF_RQ_PENDING__SHIFT                                                            0x7
 #define GRBM_STATUS__ME0PIPE0_PF_RQ_PENDING__SHIFT                                                            0x8
 #define GRBM_STATUS__GDS_DMA_RQ_PENDING__SHIFT                                                                0x9
@@ -5619,6 +5620,7 @@
 #define GRBM_STATUS__CB_BUSY__SHIFT                                                                           0x1e
 #define GRBM_STATUS__GUI_ACTIVE__SHIFT                                                                        0x1f
 #define GRBM_STATUS__ME0PIPE0_CMDFIFO_AVAIL_MASK                                                              0x0000000FL
+#define GRBM_STATUS__RSMU_RQ_PENDING_MASK                                                                     0x00000020L
 #define GRBM_STATUS__ME0PIPE0_CF_RQ_PENDING_MASK                                                              0x00000080L
 #define GRBM_STATUS__ME0PIPE0_PF_RQ_PENDING_MASK                                                              0x00000100L
 #define GRBM_STATUS__GDS_DMA_RQ_PENDING_MASK                                                                  0x00000200L
@@ -5832,6 +5834,7 @@
 #define GRBM_READ_ERROR__READ_ERROR_MASK                                                                      0x80000000L
 //GRBM_READ_ERROR2
 #define GRBM_READ_ERROR2__READ_REQUESTER_CPF__SHIFT                                                           0x10
+#define GRBM_READ_ERROR2__READ_REQUESTER_RSMU__SHIFT                                                          0x11
 #define GRBM_READ_ERROR2__READ_REQUESTER_RLC__SHIFT                                                           0x12
 #define GRBM_READ_ERROR2__READ_REQUESTER_GDS_DMA__SHIFT                                                       0x13
 #define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE0_CF__SHIFT                                                   0x14
@@ -5847,6 +5850,7 @@
 #define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE2__SHIFT                                                      0x1e
 #define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE3__SHIFT                                                      0x1f
 #define GRBM_READ_ERROR2__READ_REQUESTER_CPF_MASK                                                             0x00010000L
+#define GRBM_READ_ERROR2__READ_REQUESTER_RSMU_MASK                                                            0x00020000L
 #define GRBM_READ_ERROR2__READ_REQUESTER_RLC_MASK                                                             0x00040000L
 #define GRBM_READ_ERROR2__READ_REQUESTER_GDS_DMA_MASK                                                         0x00080000L
 #define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE0_CF_MASK                                                     0x00100000L
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_1_offset.h
index 68d0ffad28c7..92fd27c26a77 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_1_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_1_offset.h
@@ -1162,8 +1162,10 @@
 #define mmRCC_CONFIG_MEMSIZE_BASE_IDX                                                                  0
 #define mmRCC_CONFIG_RESERVED                                                                          0x0de4 // duplicate 
 #define mmRCC_CONFIG_RESERVED_BASE_IDX                                                                 0
+#ifndef mmRCC_IOV_FUNC_IDENTIFIER
 #define mmRCC_IOV_FUNC_IDENTIFIER                                                                      0x0de5 // duplicate 
 #define mmRCC_IOV_FUNC_IDENTIFIER_BASE_IDX                                                             0
+#endif
 
 
 // addressBlock: syshub_mmreg_ind_syshubdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_offset.h
index 435462294fbc..a7cd760ebf8f 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_offset.h
@@ -4251,8 +4251,10 @@
 #define mmRCC_CONFIG_MEMSIZE_BASE_IDX                                                                  2
 #define mmRCC_CONFIG_RESERVED                                                                          0x00c4
 #define mmRCC_CONFIG_RESERVED_BASE_IDX                                                                 2
+#ifndef mmRCC_IOV_FUNC_IDENTIFIER
 #define mmRCC_IOV_FUNC_IDENTIFIER                                                                      0x00c5
 #define mmRCC_IOV_FUNC_IDENTIFIER_BASE_IDX                                                             2
+#endif
 
 
 // addressBlock: nbio_nbif0_rcc_dev0_BIFDEC1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h
index ce5830ebe095..0c5a08bc034a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h
@@ -2687,8 +2687,10 @@
 #define mmRCC_CONFIG_MEMSIZE_BASE_IDX                                                                  2
 #define mmRCC_CONFIG_RESERVED                                                                          0x00c4
 #define mmRCC_CONFIG_RESERVED_BASE_IDX                                                                 2
+#ifndef mmRCC_IOV_FUNC_IDENTIFIER
 #define mmRCC_IOV_FUNC_IDENTIFIER                                                                      0x00c5
 #define mmRCC_IOV_FUNC_IDENTIFIER_BASE_IDX                                                             2
+#endif
 
 
 // addressBlock: nbio_nbif0_rcc_dev0_BIFDEC1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/pwr/pwr_10_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/pwr/pwr_10_0_offset.h
new file mode 100644
index 000000000000..e87c359ea1fe
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/pwr/pwr_10_0_offset.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2020  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _pwr_10_0_OFFSET_HEADER
+#define _pwr_10_0_OFFSET_HEADER
+
+#define mmPWR_MISC_CNTL_STATUS                                                                         0x0183
+#define mmPWR_MISC_CNTL_STATUS_BASE_IDX                                                                0
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/pwr/pwr_10_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/pwr/pwr_10_0_sh_mask.h
new file mode 100644
index 000000000000..8a000c21651c
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/pwr/pwr_10_0_sh_mask.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2020  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _pwr_10_0_SH_MASK_HEADER
+#define _pwr_10_0_SH_MASK_HEADER
+
+//PWR_MISC_CNTL_STATUS
+#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT                                                      0x0
+#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT                                                        0x1
+#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK                                                        0x00000001L
+#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK                                                          0x00000006L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_12_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_12_0_0_offset.h
new file mode 100644
index 000000000000..9bf73284ad73
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_12_0_0_offset.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2020  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _smuio_12_0_0_OFFSET_HEADER
+#define _smuio_12_0_0_OFFSET_HEADER
+
+#define mmSMUIO_GFX_MISC_CNTL                                                                          0x00c8
+#define mmSMUIO_GFX_MISC_CNTL_BASE_IDX                                                                 0
+
+#define mmPWR_MISC_CNTL_STATUS                                                                         0x0183
+#define mmPWR_MISC_CNTL_STATUS_BASE_IDX                                                                1
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_12_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_12_0_0_sh_mask.h
new file mode 100644
index 000000000000..26556fa3d054
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_12_0_0_sh_mask.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2020  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _smuio_12_0_0_SH_MASK_HEADER
+#define _smuio_12_0_0_SH_MASK_HEADER
+
+//SMUIO_GFX_MISC_CNTL
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK                                                           0x00000006L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT                                                         0x1
+//PWR_MISC_CNTL_STATUS
+#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT                                                      0x0
+#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT                                                        0x1
+#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK                                                        0x00000001L
+#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK                                                          0x00000006L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 70146518174c..b36ea8340afa 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -972,11 +972,13 @@ struct atom_ext_display_path
 };
 
 //usCaps
-enum ext_display_path_cap_def
-{
-  EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE               =0x0001,
-  EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN             =0x0002,
-  EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK              =0x007C,           
+enum ext_display_path_cap_def {
+	EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE =           0x0001,
+	EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN =         0x0002,
+	EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK =          0x007C,
+	EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 =      (0x01 << 2), //PI redriver chip
+	EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x02 << 2), //TI retimer chip
+	EXT_DISPLAY_PATH_CAPS__HDMI20_PARADE_PS175 =    (0x03 << 2)  //Parade DP->HDMI recoverter chip
 };
 
 struct atom_external_display_connection_info
@@ -1876,6 +1878,108 @@ struct atom_smc_dpm_info_v4_6
   uint32_t   boardreserved[10];
 };
 
+struct atom_smc_dpm_info_v4_7
+{
+  struct   atom_common_table_header  table_header;
+    // SECTION: BOARD PARAMETERS
+    // I2C Control
+  struct smudpm_i2c_controller_config_v2  I2cControllers[8];
+
+  // SVI2 Board Parameters
+  uint16_t     MaxVoltageStepGfx; // In mV(Q2) Max voltage step that SMU will request. Multiple steps are taken if voltage change exceeds this value.
+  uint16_t     MaxVoltageStepSoc; // In mV(Q2) Max voltage step that SMU will request. Multiple steps are taken if voltage change exceeds this value.
+
+  uint8_t      VddGfxVrMapping;   // Use VR_MAPPING* bitfields
+  uint8_t      VddSocVrMapping;   // Use VR_MAPPING* bitfields
+  uint8_t      VddMem0VrMapping;  // Use VR_MAPPING* bitfields
+  uint8_t      VddMem1VrMapping;  // Use VR_MAPPING* bitfields
+
+  uint8_t      GfxUlvPhaseSheddingMask; // set this to 1 to set PSI0/1 to 1 in ULV mode
+  uint8_t      SocUlvPhaseSheddingMask; // set this to 1 to set PSI0/1 to 1 in ULV mode
+  uint8_t      ExternalSensorPresent; // External RDI connected to TMON (aka TEMP IN)
+  uint8_t      Padding8_V;
+
+  // Telemetry Settings
+  uint16_t     GfxMaxCurrent;   // in Amps
+  uint8_t      GfxOffset;       // in Amps
+  uint8_t      Padding_TelemetryGfx;
+  uint16_t     SocMaxCurrent;   // in Amps
+  uint8_t      SocOffset;       // in Amps
+  uint8_t      Padding_TelemetrySoc;
+
+  uint16_t     Mem0MaxCurrent;   // in Amps
+  uint8_t      Mem0Offset;       // in Amps
+  uint8_t      Padding_TelemetryMem0;
+
+  uint16_t     Mem1MaxCurrent;   // in Amps
+  uint8_t      Mem1Offset;       // in Amps
+  uint8_t      Padding_TelemetryMem1;
+
+  // GPIO Settings
+  uint8_t      AcDcGpio;        // GPIO pin configured for AC/DC switching
+  uint8_t      AcDcPolarity;    // GPIO polarity for AC/DC switching
+  uint8_t      VR0HotGpio;      // GPIO pin configured for VR0 HOT event
+  uint8_t      VR0HotPolarity;  // GPIO polarity for VR0 HOT event
+
+  uint8_t      VR1HotGpio;      // GPIO pin configured for VR1 HOT event
+  uint8_t      VR1HotPolarity;  // GPIO polarity for VR1 HOT event
+  uint8_t      GthrGpio;        // GPIO pin configured for GTHR Event
+  uint8_t      GthrPolarity;    // replace GPIO polarity for GTHR
+
+  // LED Display Settings
+  uint8_t      LedPin0;         // GPIO number for LedPin[0]
+  uint8_t      LedPin1;         // GPIO number for LedPin[1]
+  uint8_t      LedPin2;         // GPIO number for LedPin[2]
+  uint8_t      padding8_4;
+
+  // GFXCLK PLL Spread Spectrum
+  uint8_t      PllGfxclkSpreadEnabled;   // on or off
+  uint8_t      PllGfxclkSpreadPercent;   // Q4.4
+  uint16_t     PllGfxclkSpreadFreq;      // kHz
+
+  // GFXCLK DFLL Spread Spectrum
+  uint8_t      DfllGfxclkSpreadEnabled;   // on or off
+  uint8_t      DfllGfxclkSpreadPercent;   // Q4.4
+  uint16_t     DfllGfxclkSpreadFreq;      // kHz
+
+  // UCLK Spread Spectrum
+  uint8_t      UclkSpreadEnabled;   // on or off
+  uint8_t      UclkSpreadPercent;   // Q4.4
+  uint16_t     UclkSpreadFreq;      // kHz
+
+  // SOCCLK Spread Spectrum
+  uint8_t      SoclkSpreadEnabled;   // on or off
+  uint8_t      SocclkSpreadPercent;   // Q4.4
+  uint16_t     SocclkSpreadFreq;      // kHz
+
+  // Total board power
+  uint16_t     TotalBoardPower;     //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power
+  uint16_t     BoardPadding;
+
+  // Mvdd Svi2 Div Ratio Setting
+  uint32_t     MvddRatio; // This is used for MVDD Vid workaround. It has 16 fractional bits (Q16.16)
+
+  // GPIO pins for I2C communications with 2nd controller for Input Telemetry Sequence
+  uint8_t      GpioI2cScl;          // Serial Clock
+  uint8_t      GpioI2cSda;          // Serial Data
+  uint16_t     GpioPadding;
+
+  // Additional LED Display Settings
+  uint8_t      LedPin3;         // GPIO number for LedPin[3] - PCIE GEN Speed
+  uint8_t      LedPin4;         // GPIO number for LedPin[4] - PMFW Error Status
+  uint16_t     LedEnableMask;
+
+  // Power Limit Scalars
+  uint8_t      PowerLimitScalar[4];    //[PPT_THROTTLER_COUNT]
+
+  uint8_t      MvddUlvPhaseSheddingMask;
+  uint8_t      VddciUlvPhaseSheddingMask;
+  uint8_t      Padding8_Psi1;
+  uint8_t      Padding8_Psi2;
+
+  uint32_t     BoardReserved[5];
+};
+
 /* 
   ***************************************************************************
     Data Table asic_profiling_info  structure
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index a69deb3a2ac0..60a6536ff656 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -32,7 +32,6 @@ struct cgs_device;
  * enum cgs_ind_reg - Indirect register spaces
  */
 enum cgs_ind_reg {
-	CGS_IND_REG__MMIO,
 	CGS_IND_REG__PCIE,
 	CGS_IND_REG__SMC,
 	CGS_IND_REG__UVD_CTX,
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 8e2acb4df860..7e6dcdf7df73 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -50,6 +50,7 @@ static int amd_powerplay_create(struct amdgpu_device *adev)
 	hwmgr->not_vf = !amdgpu_sriov_vf(adev);
 	hwmgr->device = amdgpu_cgs_create_device(adev);
 	mutex_init(&hwmgr->smu_lock);
+	mutex_init(&hwmgr->msg_lock);
 	hwmgr->chip_family = adev->family;
 	hwmgr->chip_id = adev->asic_type;
 	hwmgr->feature_mask = adev->pm.pp_feature;
@@ -64,6 +65,8 @@ static void amd_powerplay_destroy(struct amdgpu_device *adev)
 {
 	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
 
+	mutex_destroy(&hwmgr->msg_lock);
+
 	kfree(hwmgr->hardcode_pp_table);
 	hwmgr->hardcode_pp_table = NULL;
 
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index e77046931e4c..8c684a6e0156 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -62,6 +62,7 @@ const char *smu_get_feature_name(struct smu_context *smu, enum smu_feature_mask
 
 size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, char *buf)
 {
+	struct amdgpu_device *adev = smu->adev;
 	size_t size = 0;
 	int ret = 0, i = 0;
 	uint32_t feature_mask[2] = { 0 };
@@ -70,6 +71,9 @@ size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, char *buf)
 	uint32_t sort_feature[SMU_FEATURE_COUNT];
 	uint64_t hw_feature_count = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
@@ -110,9 +114,6 @@ static int smu_feature_update_enable_state(struct smu_context *smu,
 	uint32_t feature_low = 0, feature_high = 0;
 	int ret = 0;
 
-	if (!smu->pm_enabled)
-		return ret;
-
 	feature_low = (feature_mask >> 0 ) & 0xffffffff;
 	feature_high = (feature_mask >> 32) & 0xffffffff;
 
@@ -155,6 +156,10 @@ int smu_sys_set_pp_feature_mask(struct smu_context *smu, uint64_t new_mask)
 	uint64_t feature_2_enabled = 0;
 	uint64_t feature_2_disabled = 0;
 	uint64_t feature_enables = 0;
+	struct amdgpu_device *adev = smu->adev;
+
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
 
 	mutex_lock(&smu->mutex);
 
@@ -191,16 +196,31 @@ int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t
 	if (!if_version && !smu_version)
 		return -EINVAL;
 
+	if (smu->smc_fw_if_version && smu->smc_fw_version)
+	{
+		if (if_version)
+			*if_version = smu->smc_fw_if_version;
+
+		if (smu_version)
+			*smu_version = smu->smc_fw_version;
+
+		return 0;
+	}
+
 	if (if_version) {
 		ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion, if_version);
 		if (ret)
 			return ret;
+
+		smu->smc_fw_if_version = *if_version;
 	}
 
 	if (smu_version) {
 		ret = smu_send_smc_msg(smu, SMU_MSG_GetSmuVersion, smu_version);
 		if (ret)
 			return ret;
+
+		smu->smc_fw_version = *smu_version;
 	}
 
 	return ret;
@@ -327,13 +347,13 @@ int smu_get_dpm_freq_by_index(struct smu_context *smu, enum smu_clk_type clk_typ
 	param = (uint32_t)(((clk_id & 0xffff) << 16) | (level & 0xffff));
 
 	ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDpmFreqByIndex,
-					  param, &param);
+					  param, value);
 	if (ret)
 		return ret;
 
 	/* BIT31:  0 - Fine grained DPM, 1 - Dicrete DPM
 	 * now, we un-support it */
-	*value = param & 0x7fffffff;
+	*value = *value & 0x7fffffff;
 
 	return ret;
 }
@@ -417,8 +437,12 @@ bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type)
 int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
 			   bool gate)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	switch (block_type) {
 	case AMD_IP_BLOCK_TYPE_UVD:
 		ret = smu_dpm_set_uvd_enable(smu, !gate);
@@ -511,7 +535,6 @@ int smu_update_table(struct smu_context *smu, enum smu_table_id table_index, int
 	int table_id = smu_table_get_index(smu, table_index);
 	uint32_t table_size;
 	int ret = 0;
-
 	if (!table_data || table_id >= SMU_TABLE_COUNT || table_id < 0)
 		return -EINVAL;
 
@@ -547,12 +570,10 @@ bool is_support_sw_smu(struct amdgpu_device *adev)
 	if (adev->asic_type == CHIP_VEGA20)
 		return (amdgpu_dpm == 2) ? true : false;
 	else if (adev->asic_type >= CHIP_ARCTURUS) {
-		if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
-			return false;
-		else
+	      if (amdgpu_sriov_is_pp_one_vf(adev) || !amdgpu_sriov_vf(adev))
 			return true;
-	} else
-		return false;
+	}
+	return false;
 }
 
 bool is_support_sw_smu_xgmi(struct amdgpu_device *adev)
@@ -569,8 +590,12 @@ bool is_support_sw_smu_xgmi(struct amdgpu_device *adev)
 int smu_sys_get_pp_table(struct smu_context *smu, void **table)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
+	struct amdgpu_device *adev = smu->adev;
 	uint32_t powerplay_table_size;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	if (!smu_table->power_play_table && !smu_table->hardcode_pptable)
 		return -EINVAL;
 
@@ -591,11 +616,13 @@ int smu_sys_get_pp_table(struct smu_context *smu, void **table)
 int smu_sys_set_pp_table(struct smu_context *smu,  void *buf, size_t size)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
+	struct amdgpu_device *adev = smu->adev;
 	ATOM_COMMON_TABLE_HEADER *header = (ATOM_COMMON_TABLE_HEADER *)buf;
 	int ret = 0;
 
-	if (!smu->pm_enabled)
+	if (!adev->pm.dpm_enabled)
 		return -EINVAL;
+
 	if (header->usStructureSize != size) {
 		pr_err("pp table size not matched !\n");
 		return -EIO;
@@ -636,8 +663,6 @@ int smu_feature_init_dpm(struct smu_context *smu)
 	int ret = 0;
 	uint32_t allowed_feature_mask[SMU_FEATURE_MAX/32];
 
-	if (!smu->pm_enabled)
-		return ret;
 	mutex_lock(&feature->mutex);
 	bitmap_zero(feature->allowed, SMU_FEATURE_MAX);
 	mutex_unlock(&feature->mutex);
@@ -665,7 +690,6 @@ int smu_feature_is_enabled(struct smu_context *smu, enum smu_feature_mask mask)
 
 	if (smu->is_apu)
 		return 1;
-
 	feature_id = smu_feature_get_index(smu, mask);
 	if (feature_id < 0)
 		return 0;
@@ -932,13 +956,6 @@ static int smu_sw_init(void *handle)
 		return ret;
 	}
 
-	if (adev->smu.ppt_funcs->i2c_eeprom_init) {
-		ret = smu_i2c_eeprom_init(smu, &adev->pm.smu_i2c);
-
-		if (ret)
-			return ret;
-	}
-
 	return 0;
 }
 
@@ -948,9 +965,6 @@ static int smu_sw_fini(void *handle)
 	struct smu_context *smu = &adev->smu;
 	int ret;
 
-	if (adev->smu.ppt_funcs->i2c_eeprom_fini)
-		smu_i2c_eeprom_fini(smu, &adev->pm.smu_i2c);
-
 	kfree(smu->irq_source);
 	smu->irq_source = NULL;
 
@@ -1323,6 +1337,9 @@ static int smu_hw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = &adev->smu;
 
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+		return 0;
+
 	ret = smu_start_smc_engine(smu);
 	if (ret) {
 		pr_err("SMU is not ready yet!\n");
@@ -1336,9 +1353,6 @@ static int smu_hw_init(void *handle)
 		smu_set_gfx_cgpg(&adev->smu, true);
 	}
 
-	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
-		return 0;
-
 	if (!smu->pm_enabled)
 		return 0;
 
@@ -1366,10 +1380,11 @@ static int smu_hw_init(void *handle)
 	if (ret)
 		goto failed;
 
-	if (!smu->pm_enabled)
-		adev->pm.dpm_enabled = false;
-	else
-		adev->pm.dpm_enabled = true;	/* TODO: will set dpm_enabled flag while VCN and DAL DPM is workable */
+	ret = smu_i2c_eeprom_init(smu, &adev->pm.smu_i2c);
+	if (ret)
+		goto failed;
+
+	adev->pm.dpm_enabled = true;
 
 	pr_info("SMU is initialized successfully!\n");
 
@@ -1381,6 +1396,9 @@ failed:
 
 static int smu_stop_dpms(struct smu_context *smu)
 {
+	if (amdgpu_sriov_vf(smu->adev))
+		return 0;
+
 	return smu_system_features_control(smu, false);
 }
 
@@ -1403,6 +1421,10 @@ static int smu_hw_fini(void *handle)
 	if (!smu->pm_enabled)
 		return 0;
 
+	adev->pm.dpm_enabled = false;
+
+	smu_i2c_eeprom_fini(smu, &adev->pm.smu_i2c);
+
 	if (!amdgpu_sriov_vf(adev)){
 		ret = smu_stop_thermal_control(smu);
 		if (ret) {
@@ -1542,6 +1564,10 @@ static int smu_suspend(void *handle)
 	if (!smu->pm_enabled)
 		return 0;
 
+	adev->pm.dpm_enabled = false;
+
+	smu_i2c_eeprom_fini(smu, &adev->pm.smu_i2c);
+
 	if(!amdgpu_sriov_vf(adev)) {
 		ret = smu_disable_dpm(smu);
 		if (ret)
@@ -1587,11 +1613,17 @@ static int smu_resume(void *handle)
 	if (ret)
 		goto failed;
 
+	ret = smu_i2c_eeprom_init(smu, &adev->pm.smu_i2c);
+	if (ret)
+		goto failed;
+
 	if (smu->is_apu)
 		smu_set_gfx_cgpg(&adev->smu, true);
 
 	smu->disable_uclk_switch = 0;
 
+	adev->pm.dpm_enabled = true;
+
 	pr_info("SMU is resumed successfully!\n");
 
 	return 0;
@@ -1603,10 +1635,14 @@ failed:
 int smu_display_configuration_change(struct smu_context *smu,
 				     const struct amd_pp_display_configuration *display_config)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int index = 0;
 	int num_of_active_display = 0;
 
-	if (!smu->pm_enabled || !is_support_sw_smu(smu->adev))
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
+	if (!is_support_sw_smu(smu->adev))
 		return -EINVAL;
 
 	if (!display_config)
@@ -1668,12 +1704,16 @@ int smu_get_current_clocks(struct smu_context *smu,
 			   struct amd_pp_clock_info *clocks)
 {
 	struct amd_pp_simple_clock_info simple_clocks = {0};
+	struct amdgpu_device *adev = smu->adev;
 	struct smu_clock_info hw_clocks;
 	int ret = 0;
 
 	if (!is_support_sw_smu(smu->adev))
 		return -EINVAL;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	smu_get_dal_power_level(smu, &simple_clocks);
@@ -1736,7 +1776,7 @@ static int smu_enable_umd_pstate(void *handle,
 	struct smu_context *smu = (struct smu_context*)(handle);
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
 
-	if (!smu->is_apu && (!smu->pm_enabled || !smu_dpm_ctx->dpm_context))
+	if (!smu->is_apu && !smu_dpm_ctx->dpm_context)
 		return -EINVAL;
 
 	if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) {
@@ -1778,9 +1818,6 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
 	long workload;
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
 
-	if (!smu->pm_enabled)
-		return -EINVAL;
-
 	if (!skip_display_settings) {
 		ret = smu_display_config_changed(smu);
 		if (ret) {
@@ -1831,8 +1868,12 @@ int smu_handle_task(struct smu_context *smu,
 		    enum amd_pp_task task_id,
 		    bool lock_needed)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	if (lock_needed)
 		mutex_lock(&smu->mutex);
 
@@ -1866,10 +1907,11 @@ int smu_switch_power_profile(struct smu_context *smu,
 			     bool en)
 {
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+	struct amdgpu_device *adev = smu->adev;
 	long workload;
 	uint32_t index;
 
-	if (!smu->pm_enabled)
+	if (!adev->pm.dpm_enabled)
 		return -EINVAL;
 
 	if (!(type < PP_SMC_POWER_PROFILE_CUSTOM))
@@ -1900,8 +1942,12 @@ int smu_switch_power_profile(struct smu_context *smu,
 enum amd_dpm_forced_level smu_get_performance_level(struct smu_context *smu)
 {
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+	struct amdgpu_device *adev = smu->adev;
 	enum amd_dpm_forced_level level;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	if (!smu->is_apu && !smu_dpm_ctx->dpm_context)
 		return -EINVAL;
 
@@ -1915,8 +1961,12 @@ enum amd_dpm_forced_level smu_get_performance_level(struct smu_context *smu)
 int smu_force_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level)
 {
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	if (!smu->is_apu && !smu_dpm_ctx->dpm_context)
 		return -EINVAL;
 
@@ -1939,8 +1989,12 @@ int smu_force_performance_level(struct smu_context *smu, enum amd_dpm_forced_lev
 
 int smu_set_display_count(struct smu_context *smu, uint32_t count)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 	ret = smu_init_display_count(smu, count);
 	mutex_unlock(&smu->mutex);
@@ -1954,8 +2008,12 @@ int smu_force_clk_levels(struct smu_context *smu,
 			 bool lock_needed)
 {
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
 		pr_debug("force clock level is for dpm manual mode only.\n");
 		return -EINVAL;
@@ -1973,20 +2031,19 @@ int smu_force_clk_levels(struct smu_context *smu,
 	return ret;
 }
 
+/*
+ * On system suspending or resetting, the dpm_enabled
+ * flag will be cleared. So that those SMU services which
+ * are not supported will be gated.
+ * However, the mp1 state setting should still be granted
+ * even if the dpm_enabled cleared.
+ */
 int smu_set_mp1_state(struct smu_context *smu,
 		      enum pp_mp1_state mp1_state)
 {
 	uint16_t msg;
 	int ret;
 
-	/*
-	 * The SMC is not fully ready. That may be
-	 * expected as the IP may be masked.
-	 * So, just return without error.
-	 */
-	if (!smu->pm_enabled)
-		return 0;
-
 	mutex_lock(&smu->mutex);
 
 	switch (mp1_state) {
@@ -2023,15 +2080,11 @@ int smu_set_mp1_state(struct smu_context *smu,
 int smu_set_df_cstate(struct smu_context *smu,
 		      enum pp_df_cstate state)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
-	/*
-	 * The SMC is not fully ready. That may be
-	 * expected as the IP may be masked.
-	 * So, just return without error.
-	 */
-	if (!smu->pm_enabled)
-		return 0;
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
 
 	if (!smu->ppt_funcs || !smu->ppt_funcs->set_df_cstate)
 		return 0;
@@ -2047,6 +2100,28 @@ int smu_set_df_cstate(struct smu_context *smu,
 	return ret;
 }
 
+int smu_allow_xgmi_power_down(struct smu_context *smu, bool en)
+{
+	struct amdgpu_device *adev = smu->adev;
+	int ret = 0;
+
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
+	if (!smu->ppt_funcs || !smu->ppt_funcs->allow_xgmi_power_down)
+		return 0;
+
+	mutex_lock(&smu->mutex);
+
+	ret = smu->ppt_funcs->allow_xgmi_power_down(smu, en);
+	if (ret)
+		pr_err("[AllowXgmiPowerDown] failed!\n");
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
 int smu_write_watermarks_table(struct smu_context *smu)
 {
 	void *watermarks_table = smu->smu_table.watermarks_table;
@@ -2065,6 +2140,10 @@ int smu_set_watermarks_for_clock_ranges(struct smu_context *smu,
 		struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges)
 {
 	void *table = smu->smu_table.watermarks_table;
+	struct amdgpu_device *adev = smu->adev;
+
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
 
 	if (!table)
 		return -EINVAL;
@@ -2089,8 +2168,12 @@ int smu_set_watermarks_for_clock_ranges(struct smu_context *smu,
 
 int smu_set_ac_dc(struct smu_context *smu)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	/* controlled by firmware */
 	if (smu->dc_controlled_by_gpio)
 		return 0;
@@ -2149,8 +2232,12 @@ const struct amdgpu_ip_block_version smu_v12_0_ip_block =
 
 int smu_load_microcode(struct smu_context *smu)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->load_microcode)
@@ -2163,8 +2250,12 @@ int smu_load_microcode(struct smu_context *smu)
 
 int smu_check_fw_status(struct smu_context *smu)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->check_fw_status)
@@ -2191,8 +2282,12 @@ int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled)
 
 int smu_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->set_fan_speed_rpm)
@@ -2208,10 +2303,15 @@ int smu_get_power_limit(struct smu_context *smu,
 			bool def,
 			bool lock_needed)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
-	if (lock_needed)
+	if (lock_needed) {
+		if (!adev->pm.dpm_enabled)
+			return -EINVAL;
+
 		mutex_lock(&smu->mutex);
+	}
 
 	if (smu->ppt_funcs->get_power_limit)
 		ret = smu->ppt_funcs->get_power_limit(smu, limit, def);
@@ -2224,8 +2324,12 @@ int smu_get_power_limit(struct smu_context *smu,
 
 int smu_set_power_limit(struct smu_context *smu, uint32_t limit)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->set_power_limit)
@@ -2238,8 +2342,12 @@ int smu_set_power_limit(struct smu_context *smu, uint32_t limit)
 
 int smu_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->print_clk_levels)
@@ -2252,8 +2360,12 @@ int smu_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, ch
 
 int smu_get_od_percentage(struct smu_context *smu, enum smu_clk_type type)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_od_percentage)
@@ -2266,8 +2378,12 @@ int smu_get_od_percentage(struct smu_context *smu, enum smu_clk_type type)
 
 int smu_set_od_percentage(struct smu_context *smu, enum smu_clk_type type, uint32_t value)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->set_od_percentage)
@@ -2282,8 +2398,12 @@ int smu_od_edit_dpm_table(struct smu_context *smu,
 			  enum PP_OD_DPM_TABLE_COMMAND type,
 			  long *input, uint32_t size)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->od_edit_dpm_table)
@@ -2298,8 +2418,12 @@ int smu_read_sensor(struct smu_context *smu,
 		    enum amd_pp_sensors sensor,
 		    void *data, uint32_t *size)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->read_sensor)
@@ -2312,8 +2436,12 @@ int smu_read_sensor(struct smu_context *smu,
 
 int smu_get_power_profile_mode(struct smu_context *smu, char *buf)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_power_profile_mode)
@@ -2329,8 +2457,12 @@ int smu_set_power_profile_mode(struct smu_context *smu,
 			       uint32_t param_size,
 			       bool lock_needed)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	if (lock_needed)
 		mutex_lock(&smu->mutex);
 
@@ -2346,8 +2478,12 @@ int smu_set_power_profile_mode(struct smu_context *smu,
 
 int smu_get_fan_control_mode(struct smu_context *smu)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_fan_control_mode)
@@ -2360,8 +2496,12 @@ int smu_get_fan_control_mode(struct smu_context *smu)
 
 int smu_set_fan_control_mode(struct smu_context *smu, int value)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->set_fan_control_mode)
@@ -2374,8 +2514,12 @@ int smu_set_fan_control_mode(struct smu_context *smu, int value)
 
 int smu_get_fan_speed_percent(struct smu_context *smu, uint32_t *speed)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_fan_speed_percent)
@@ -2388,8 +2532,12 @@ int smu_get_fan_speed_percent(struct smu_context *smu, uint32_t *speed)
 
 int smu_set_fan_speed_percent(struct smu_context *smu, uint32_t speed)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->set_fan_speed_percent)
@@ -2402,8 +2550,12 @@ int smu_set_fan_speed_percent(struct smu_context *smu, uint32_t speed)
 
 int smu_get_fan_speed_rpm(struct smu_context *smu, uint32_t *speed)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_fan_speed_rpm)
@@ -2416,8 +2568,12 @@ int smu_get_fan_speed_rpm(struct smu_context *smu, uint32_t *speed)
 
 int smu_set_deep_sleep_dcefclk(struct smu_context *smu, int clk)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->set_deep_sleep_dcefclk)
@@ -2430,8 +2586,12 @@ int smu_set_deep_sleep_dcefclk(struct smu_context *smu, int clk)
 
 int smu_set_active_display_count(struct smu_context *smu, uint32_t count)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	if (smu->ppt_funcs->set_active_display_count)
 		ret = smu->ppt_funcs->set_active_display_count(smu, count);
 
@@ -2442,8 +2602,12 @@ int smu_get_clock_by_type(struct smu_context *smu,
 			  enum amd_pp_clock_type type,
 			  struct amd_pp_clocks *clocks)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_clock_by_type)
@@ -2457,8 +2621,12 @@ int smu_get_clock_by_type(struct smu_context *smu,
 int smu_get_max_high_clocks(struct smu_context *smu,
 			    struct amd_pp_simple_clock_info *clocks)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_max_high_clocks)
@@ -2473,8 +2641,12 @@ int smu_get_clock_by_type_with_latency(struct smu_context *smu,
 				       enum smu_clk_type clk_type,
 				       struct pp_clock_levels_with_latency *clocks)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_clock_by_type_with_latency)
@@ -2489,8 +2661,12 @@ int smu_get_clock_by_type_with_voltage(struct smu_context *smu,
 				       enum amd_pp_clock_type type,
 				       struct pp_clock_levels_with_voltage *clocks)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_clock_by_type_with_voltage)
@@ -2505,8 +2681,12 @@ int smu_get_clock_by_type_with_voltage(struct smu_context *smu,
 int smu_display_clock_voltage_request(struct smu_context *smu,
 				      struct pp_display_clock_request *clock_req)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->display_clock_voltage_request)
@@ -2520,8 +2700,12 @@ int smu_display_clock_voltage_request(struct smu_context *smu,
 
 int smu_display_disable_memory_clock_switch(struct smu_context *smu, bool disable_memory_clock_switch)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = -EINVAL;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->display_disable_memory_clock_switch)
@@ -2534,8 +2718,12 @@ int smu_display_disable_memory_clock_switch(struct smu_context *smu, bool disabl
 
 int smu_notify_smu_enable_pwe(struct smu_context *smu)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->notify_smu_enable_pwe)
@@ -2549,8 +2737,12 @@ int smu_notify_smu_enable_pwe(struct smu_context *smu)
 int smu_set_xgmi_pstate(struct smu_context *smu,
 			uint32_t pstate)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->set_xgmi_pstate)
@@ -2563,8 +2755,12 @@ int smu_set_xgmi_pstate(struct smu_context *smu,
 
 int smu_set_azalia_d3_pme(struct smu_context *smu)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->set_azalia_d3_pme)
@@ -2575,6 +2771,14 @@ int smu_set_azalia_d3_pme(struct smu_context *smu)
 	return ret;
 }
 
+/*
+ * On system suspending or resetting, the dpm_enabled
+ * flag will be cleared. So that those SMU services which
+ * are not supported will be gated.
+ *
+ * However, the baco/mode1 reset should still be granted
+ * as they are still supported and necessary.
+ */
 bool smu_baco_is_support(struct smu_context *smu)
 {
 	bool ret = false;
@@ -2646,8 +2850,12 @@ int smu_mode2_reset(struct smu_context *smu)
 int smu_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
 					 struct pp_smu_nv_clock_table *max_clocks)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_max_sustainable_clocks_by_dc)
@@ -2662,8 +2870,12 @@ int smu_get_uclk_dpm_states(struct smu_context *smu,
 			    unsigned int *clock_values_in_khz,
 			    unsigned int *num_states)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_uclk_dpm_states)
@@ -2677,6 +2889,10 @@ int smu_get_uclk_dpm_states(struct smu_context *smu,
 enum amd_pm_state_type smu_get_current_power_state(struct smu_context *smu)
 {
 	enum amd_pm_state_type pm_state = POWER_STATE_TYPE_DEFAULT;
+	struct amdgpu_device *adev = smu->adev;
+
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
 
 	mutex_lock(&smu->mutex);
 
@@ -2691,8 +2907,12 @@ enum amd_pm_state_type smu_get_current_power_state(struct smu_context *smu)
 int smu_get_dpm_clock_table(struct smu_context *smu,
 			    struct dpm_clocks *clock_table)
 {
+	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
 	mutex_lock(&smu->mutex);
 
 	if (smu->ppt_funcs->get_dpm_clock_table)
diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
index 1ef0923f7190..27c5fc9572b2 100644
--- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
@@ -128,6 +128,7 @@ static struct smu_11_0_cmn2aisc_mapping arcturus_message_map[SMU_MSG_MAX_COUNT]
 	MSG_MAP(SetXgmiMode,			     PPSMC_MSG_SetXgmiMode),
 	MSG_MAP(SetMemoryChannelEnable,		     PPSMC_MSG_SetMemoryChannelEnable),
 	MSG_MAP(DFCstateControl,		     PPSMC_MSG_DFCstateControl),
+	MSG_MAP(GmiPwrDnControl,		     PPSMC_MSG_GmiPwrDnControl),
 };
 
 static struct smu_11_0_cmn2aisc_mapping arcturus_clk_map[SMU_CLK_COUNT] = {
@@ -622,6 +623,9 @@ static int arcturus_print_clk_levels(struct smu_context *smu,
 	struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
 	struct arcturus_dpm_table *dpm_table = NULL;
 
+	if (amdgpu_ras_intr_triggered())
+		return snprintf(buf, PAGE_SIZE, "unavailable\n");
+
 	dpm_table = smu_dpm->dpm_context;
 
 	switch (type) {
@@ -997,6 +1001,9 @@ static int arcturus_read_sensor(struct smu_context *smu,
 	PPTable_t *pptable = table_context->driver_pptable;
 	int ret = 0;
 
+	if (amdgpu_ras_intr_triggered())
+		return 0;
+
 	if (!data || !size)
 		return -EINVAL;
 
@@ -2226,12 +2233,8 @@ static const struct i2c_algorithm arcturus_i2c_eeprom_i2c_algo = {
 static int arcturus_i2c_eeprom_control_init(struct i2c_adapter *control)
 {
 	struct amdgpu_device *adev = to_amdgpu_device(control);
-	struct smu_context *smu = &adev->smu;
 	int res;
 
-	if (!smu->pm_enabled)
-		return -EOPNOTSUPP;
-
 	control->owner = THIS_MODULE;
 	control->class = I2C_CLASS_SPD;
 	control->dev.parent = &adev->pdev->dev;
@@ -2247,12 +2250,6 @@ static int arcturus_i2c_eeprom_control_init(struct i2c_adapter *control)
 
 static void arcturus_i2c_eeprom_control_fini(struct i2c_adapter *control)
 {
-	struct amdgpu_device *adev = to_amdgpu_device(control);
-	struct smu_context *smu = &adev->smu;
-
-	if (!smu->pm_enabled)
-		return;
-
 	i2c_del_adapter(control);
 }
 
@@ -2261,7 +2258,7 @@ static bool arcturus_is_baco_supported(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t val;
 
-	if (!smu_v11_0_baco_is_support(smu))
+	if (!smu_v11_0_baco_is_support(smu) || amdgpu_sriov_vf(adev))
 		return false;
 
 	val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0);
@@ -2296,6 +2293,35 @@ static int arcturus_set_df_cstate(struct smu_context *smu,
 	return smu_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, state, NULL);
 }
 
+static int arcturus_allow_xgmi_power_down(struct smu_context *smu, bool en)
+{
+	uint32_t smu_version;
+	int ret;
+
+	ret = smu_get_smc_version(smu, NULL, &smu_version);
+	if (ret) {
+		pr_err("Failed to get smu version!\n");
+		return ret;
+	}
+
+	/* PPSMC_MSG_GmiPwrDnControl is supported by 54.23.0 and onwards */
+	if (smu_version < 0x00361700) {
+		pr_err("XGMI power down control is only supported by PMFW 54.23.0 and onwards\n");
+		return -EINVAL;
+	}
+
+	if (en)
+		return smu_send_smc_msg_with_param(smu,
+						   SMU_MSG_GmiPwrDnControl,
+						   1,
+						   NULL);
+
+	return smu_send_smc_msg_with_param(smu,
+					   SMU_MSG_GmiPwrDnControl,
+					   0,
+					   NULL);
+}
+
 static const struct pptable_funcs arcturus_ppt_funcs = {
 	/* translate smu index into arcturus specific index */
 	.get_smu_msg_index = arcturus_get_smu_msg_index,
@@ -2389,6 +2415,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
 	.override_pcie_parameters = smu_v11_0_override_pcie_parameters,
 	.get_pptable_power_limit = arcturus_get_pptable_power_limit,
 	.set_df_cstate = arcturus_set_df_cstate,
+	.allow_xgmi_power_down = arcturus_allow_xgmi_power_down,
 };
 
 void arcturus_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 689072a312a7..c9cfe90a2947 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -36,6 +36,8 @@
 #include "power_state.h"
 #include "soc15_common.h"
 #include "smu10.h"
+#include "asic_reg/pwr/pwr_10_0_offset.h"
+#include "asic_reg/pwr/pwr_10_0_sh_mask.h"
 
 #define SMU10_MAX_DEEPSLEEP_DIVIDER_ID     5
 #define SMU10_MINIMUM_ENGINE_CLOCK         800   /* 8Mhz, the low boundary of engine clock allowed on this chip */
@@ -43,13 +45,6 @@
 #define SMU10_DISPCLK_BYPASS_THRESHOLD     10000 /* 100Mhz */
 #define SMC_RAM_END                     0x40000
 
-#define mmPWR_MISC_CNTL_STATUS					0x0183
-#define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
-#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
-#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
-#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
-#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
-
 static const unsigned long SMU10_Magic = (unsigned long) PHM_Rv_Magic;
 
 
@@ -81,7 +76,7 @@ static int smu10_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 		pr_info("[DisplayClockVoltageRequest]Invalid Clock Type!");
 		return -EINVAL;
 	}
-	smum_send_msg_to_smc_with_parameter(hwmgr, msg, clk_freq);
+	smum_send_msg_to_smc_with_parameter(hwmgr, msg, clk_freq, NULL);
 
 	return 0;
 }
@@ -214,7 +209,8 @@ static int smu10_set_min_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clo
 		smu10_data->deep_sleep_dcefclk = clock;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetMinDeepSleepDcefclk,
-					smu10_data->deep_sleep_dcefclk);
+					smu10_data->deep_sleep_dcefclk,
+					NULL);
 	}
 	return 0;
 }
@@ -228,7 +224,8 @@ static int smu10_set_hard_min_dcefclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t c
 		smu10_data->dcf_actual_hard_min_freq = clock;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetHardMinDcefclkByFreq,
-					smu10_data->dcf_actual_hard_min_freq);
+					smu10_data->dcf_actual_hard_min_freq,
+					NULL);
 	}
 	return 0;
 }
@@ -242,7 +239,8 @@ static int smu10_set_hard_min_fclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t cloc
 		smu10_data->f_actual_hard_min_freq = clock;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetHardMinFclkByFreq,
-					smu10_data->f_actual_hard_min_freq);
+					smu10_data->f_actual_hard_min_freq,
+					NULL);
 	}
 	return 0;
 }
@@ -255,7 +253,8 @@ static int smu10_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count
 		smu10_data->num_active_display = count;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetDisplayCount,
-				smu10_data->num_active_display);
+				smu10_data->num_active_display,
+				NULL);
 	}
 
 	return 0;
@@ -278,7 +277,8 @@ static int smu10_init_power_gate_state(struct pp_hwmgr *hwmgr)
 	if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
 		return smum_send_msg_to_smc_with_parameter(hwmgr,
 							   PPSMC_MSG_SetGfxCGPG,
-							   true);
+							   true,
+							   NULL);
 	else
 		return 0;
 }
@@ -324,7 +324,7 @@ static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
 	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (adev->pm.pp_feature & PP_GFXOFF_MASK) {
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff, NULL);
 
 		/* confirm gfx is back to "on" state */
 		while (!smu10_is_gfx_on(hwmgr))
@@ -344,7 +344,7 @@ static int smu10_enable_gfx_off(struct pp_hwmgr *hwmgr)
 	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (adev->pm.pp_feature & PP_GFXOFF_MASK)
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableGfxOff);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableGfxOff, NULL);
 
 	return 0;
 }
@@ -410,12 +410,10 @@ static int smu10_get_clock_voltage_dependency_table(struct pp_hwmgr *hwmgr,
 			struct smu10_voltage_dependency_table **pptable,
 			uint32_t num_entry, const DpmClock_t *pclk_dependency_table)
 {
-	uint32_t table_size, i;
+	uint32_t i;
 	struct smu10_voltage_dependency_table *ptable;
 
-	table_size = sizeof(uint32_t) + sizeof(struct smu10_voltage_dependency_table) * num_entry;
-	ptable = kzalloc(table_size, GFP_KERNEL);
-
+	ptable = kzalloc(struct_size(ptable, entries, num_entry), GFP_KERNEL);
 	if (NULL == ptable)
 		return -ENOMEM;
 
@@ -479,12 +477,10 @@ static int smu10_populate_clock_table(struct pp_hwmgr *hwmgr)
 	smu10_get_clock_voltage_dependency_table(hwmgr, &pinfo->vdd_dep_on_phyclk,
 					ARRAY_SIZE(VddPhyClk), &VddPhyClk[0]);
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency);
-	result = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &result);
 	smu10_data->gfx_min_freq_limit = result / 10 * 1000;
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency);
-	result = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &result);
 	smu10_data->gfx_max_freq_limit = result / 10 * 1000;
 
 	return 0;
@@ -588,116 +584,148 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
-						data->gfx_max_freq_limit/100);
+						data->gfx_max_freq_limit/100,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
-						SMU10_UMD_PSTATE_PEAK_FCLK);
+						SMU10_UMD_PSTATE_PEAK_FCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinSocclkByFreq,
-						SMU10_UMD_PSTATE_PEAK_SOCCLK);
+						SMU10_UMD_PSTATE_PEAK_SOCCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinVcn,
-						SMU10_UMD_PSTATE_VCE);
+						SMU10_UMD_PSTATE_VCE,
+						NULL);
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxGfxClk,
-						data->gfx_max_freq_limit/100);
+						data->gfx_max_freq_limit/100,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxFclkByFreq,
-						SMU10_UMD_PSTATE_PEAK_FCLK);
+						SMU10_UMD_PSTATE_PEAK_FCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxSocclkByFreq,
-						SMU10_UMD_PSTATE_PEAK_SOCCLK);
+						SMU10_UMD_PSTATE_PEAK_SOCCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxVcn,
-						SMU10_UMD_PSTATE_VCE);
+						SMU10_UMD_PSTATE_VCE,
+						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
-						min_sclk);
+						min_sclk,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxGfxClk,
-						min_sclk);
+						min_sclk,
+						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
-						min_mclk);
+						min_mclk,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxFclkByFreq,
-						min_mclk);
+						min_mclk,
+						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
-						SMU10_UMD_PSTATE_GFXCLK);
+						SMU10_UMD_PSTATE_GFXCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
-						SMU10_UMD_PSTATE_FCLK);
+						SMU10_UMD_PSTATE_FCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinSocclkByFreq,
-						SMU10_UMD_PSTATE_SOCCLK);
+						SMU10_UMD_PSTATE_SOCCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinVcn,
-						SMU10_UMD_PSTATE_VCE);
+						SMU10_UMD_PSTATE_VCE,
+						NULL);
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxGfxClk,
-						SMU10_UMD_PSTATE_GFXCLK);
+						SMU10_UMD_PSTATE_GFXCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxFclkByFreq,
-						SMU10_UMD_PSTATE_FCLK);
+						SMU10_UMD_PSTATE_FCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxSocclkByFreq,
-						SMU10_UMD_PSTATE_SOCCLK);
+						SMU10_UMD_PSTATE_SOCCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxVcn,
-						SMU10_UMD_PSTATE_VCE);
+						SMU10_UMD_PSTATE_VCE,
+						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_AUTO:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
-						min_sclk);
+						min_sclk,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
 						hwmgr->display_config->num_display > 3 ?
 						SMU10_UMD_PSTATE_PEAK_FCLK :
-						min_mclk);
+						min_mclk,
+						NULL);
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinSocclkByFreq,
-						SMU10_UMD_PSTATE_MIN_SOCCLK);
+						SMU10_UMD_PSTATE_MIN_SOCCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinVcn,
-						SMU10_UMD_PSTATE_MIN_VCE);
+						SMU10_UMD_PSTATE_MIN_VCE,
+						NULL);
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxGfxClk,
-						data->gfx_max_freq_limit/100);
+						data->gfx_max_freq_limit/100,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxFclkByFreq,
-						SMU10_UMD_PSTATE_PEAK_FCLK);
+						SMU10_UMD_PSTATE_PEAK_FCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxSocclkByFreq,
-						SMU10_UMD_PSTATE_PEAK_SOCCLK);
+						SMU10_UMD_PSTATE_PEAK_SOCCLK,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxVcn,
-						SMU10_UMD_PSTATE_VCE);
+						SMU10_UMD_PSTATE_VCE,
+						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_LOW:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
-						data->gfx_min_freq_limit/100);
+						data->gfx_min_freq_limit/100,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxGfxClk,
-						data->gfx_min_freq_limit/100);
+						data->gfx_min_freq_limit/100,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
-						min_mclk);
+						min_mclk,
+						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxFclkByFreq,
-						min_mclk);
+						min_mclk,
+						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_MANUAL:
 	case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
@@ -849,13 +877,15 @@ static int smu10_force_clock_level(struct pp_hwmgr *hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
 						low == 2 ? data->gfx_max_freq_limit/100 :
 						low == 1 ? SMU10_UMD_PSTATE_GFXCLK :
-						data->gfx_min_freq_limit/100);
+						data->gfx_min_freq_limit/100,
+						NULL);
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxGfxClk,
 						high == 0 ? data->gfx_min_freq_limit/100 :
 						high == 1 ? SMU10_UMD_PSTATE_GFXCLK :
-						data->gfx_max_freq_limit/100);
+						data->gfx_max_freq_limit/100,
+						NULL);
 		break;
 
 	case PP_MCLK:
@@ -864,11 +894,13 @@ static int smu10_force_clock_level(struct pp_hwmgr *hwmgr,
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
-						mclk_table->entries[low].clk/100);
+						mclk_table->entries[low].clk/100,
+						NULL);
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxFclkByFreq,
-						mclk_table->entries[high].clk/100);
+						mclk_table->entries[high].clk/100,
+						NULL);
 		break;
 
 	case PP_PCIE:
@@ -888,8 +920,7 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
 
 	switch (type) {
 	case PP_SCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetGfxclkFrequency);
-		now = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetGfxclkFrequency, &now);
 
 	/* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */
 		if (now == data->gfx_max_freq_limit/100)
@@ -910,8 +941,7 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
 					i == 2 ? "*" : "");
 		break;
 	case PP_MCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency);
-		now = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency, &now);
 
 		for (i = 0; i < mclk_table->count; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n",
@@ -1122,15 +1152,13 @@ static int smu10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 
 	switch (idx) {
 	case AMDGPU_PP_SENSOR_GFX_SCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetGfxclkFrequency);
-		sclk = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetGfxclkFrequency, &sclk);
 			/* in units of 10KHZ */
 		*((uint32_t *)value) = sclk * 100;
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GFX_MCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency);
-		mclk = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency, &mclk);
 			/* in units of 10KHZ */
 		*((uint32_t *)value) = mclk * 100;
 		*size = 4;
@@ -1166,20 +1194,20 @@ static int smu10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr,
 static int smu10_smus_notify_pwe(struct pp_hwmgr *hwmgr)
 {
 
-	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SetRccPfcPmeRestoreRegister);
+	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SetRccPfcPmeRestoreRegister, NULL);
 }
 
 static int smu10_powergate_mmhub(struct pp_hwmgr *hwmgr)
 {
-	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerGateMmHub);
+	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerGateMmHub, NULL);
 }
 
 static int smu10_powergate_sdma(struct pp_hwmgr *hwmgr, bool gate)
 {
 	if (gate)
-		return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerDownSdma);
+		return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerDownSdma, NULL);
 	else
-		return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerUpSdma);
+		return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerUpSdma, NULL);
 }
 
 static void smu10_powergate_vcn(struct pp_hwmgr *hwmgr, bool bgate)
@@ -1191,11 +1219,11 @@ static void smu10_powergate_vcn(struct pp_hwmgr *hwmgr, bool bgate)
 						AMD_IP_BLOCK_TYPE_VCN,
 						AMD_PG_STATE_GATE);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_PowerDownVcn, 0);
+					PPSMC_MSG_PowerDownVcn, 0, NULL);
 		smu10_data->vcn_power_gated = true;
 	} else {
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-						PPSMC_MSG_PowerUpVcn, 0);
+						PPSMC_MSG_PowerUpVcn, 0, NULL);
 		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 						AMD_IP_BLOCK_TYPE_VCN,
 						AMD_PG_STATE_UNGATE);
@@ -1274,8 +1302,7 @@ static int smu10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
 static bool smu10_is_raven1_refresh(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = hwmgr->adev;
-	if ((adev->asic_type == CHIP_RAVEN) &&
-	    (adev->rev_id != 0x15d8) &&
+	if ((adev->apu_flags & AMD_APU_IS_RAVEN) &&
 	    (hwmgr->smu_version >= 0x41e2b))
 		return true;
 	else
@@ -1304,7 +1331,8 @@ static int smu10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uin
 		hwmgr->gfxoff_state_changed_by_workload = true;
 	}
 	result = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ActiveProcessNotify,
-						1 << workload_type);
+						1 << workload_type,
+						NULL);
 	if (!result)
 		hwmgr->power_profile_mode = input[size];
 	if (workload_type && hwmgr->gfxoff_state_changed_by_workload) {
@@ -1319,13 +1347,13 @@ static int smu10_asic_reset(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mod
 {
 	return smum_send_msg_to_smc_with_parameter(hwmgr,
 						   PPSMC_MSG_DeviceDriverReset,
-						   mode);
+						   mode,
+						   NULL);
 }
 
 static const struct pp_hwmgr_func smu10_hwmgr_funcs = {
 	.backend_init = smu10_hwmgr_backend_init,
 	.backend_fini = smu10_hwmgr_backend_fini,
-	.asic_setup = NULL,
 	.apply_state_adjust_rules = smu10_apply_state_adjust_rules,
 	.force_dpm_level = smu10_dpm_force_dpm_level,
 	.get_power_state_size = smu10_get_power_state_size,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h
index 1fb296a996f3..0f969de10fab 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h
@@ -192,7 +192,7 @@ struct smu10_clock_voltage_dependency_record {
 
 struct smu10_voltage_dependency_table {
 	uint32_t count;
-	struct smu10_clock_voltage_dependency_record entries[1];
+	struct smu10_clock_voltage_dependency_record entries[];
 };
 
 struct smu10_clock_voltage_information {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
index 683b29a99366..f2bda3bcbbde 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
@@ -29,14 +29,16 @@ static int smu7_enable_disable_uvd_dpm(struct pp_hwmgr *hwmgr, bool enable)
 {
 	return smum_send_msg_to_smc(hwmgr, enable ?
 			PPSMC_MSG_UVDDPM_Enable :
-			PPSMC_MSG_UVDDPM_Disable);
+			PPSMC_MSG_UVDDPM_Disable,
+			NULL);
 }
 
 static int smu7_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable)
 {
 	return smum_send_msg_to_smc(hwmgr, enable ?
 			PPSMC_MSG_VCEDPM_Enable :
-			PPSMC_MSG_VCEDPM_Disable);
+			PPSMC_MSG_VCEDPM_Disable,
+			NULL);
 }
 
 static int smu7_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate)
@@ -57,7 +59,8 @@ int smu7_powerdown_uvd(struct pp_hwmgr *hwmgr)
 {
 	if (phm_cf_want_uvd_power_gating(hwmgr))
 		return smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_UVDPowerOFF);
+				PPSMC_MSG_UVDPowerOFF,
+				NULL);
 	return 0;
 }
 
@@ -67,10 +70,10 @@ static int smu7_powerup_uvd(struct pp_hwmgr *hwmgr)
 		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 				  PHM_PlatformCaps_UVDDynamicPowerGating)) {
 			return smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_UVDPowerON, 1);
+					PPSMC_MSG_UVDPowerON, 1, NULL);
 		} else {
 			return smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_UVDPowerON, 0);
+					PPSMC_MSG_UVDPowerON, 0, NULL);
 		}
 	}
 
@@ -81,7 +84,8 @@ static int smu7_powerdown_vce(struct pp_hwmgr *hwmgr)
 {
 	if (phm_cf_want_vce_power_gating(hwmgr))
 		return smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_VCEPowerOFF);
+				PPSMC_MSG_VCEPowerOFF,
+				NULL);
 	return 0;
 }
 
@@ -89,7 +93,8 @@ static int smu7_powerup_vce(struct pp_hwmgr *hwmgr)
 {
 	if (phm_cf_want_vce_power_gating(hwmgr))
 		return smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_VCEPowerON);
+				PPSMC_MSG_VCEPowerON,
+				NULL);
 	return 0;
 }
 
@@ -181,7 +186,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_GFX_CGCG_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			if (PP_STATE_SUPPORT_LS & *msg_id) {
@@ -191,7 +196,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_GFX_CGLS_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -204,7 +209,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_GFX_3DCG_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 
@@ -215,7 +220,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_GFX_3DLS_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -228,7 +233,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_GFX_RLC_LS_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -241,7 +246,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_GFX_CP_LS_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -255,7 +260,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 						CG_GFX_OTHERS_MGCG_MASK);
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -275,7 +280,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_BIF_MGCG_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			if  (PP_STATE_SUPPORT_LS & *msg_id) {
@@ -285,7 +290,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_BIF_MGLS_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -298,7 +303,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_MC_MGCG_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 
@@ -309,7 +314,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_MC_MGLS_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -322,7 +327,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_DRM_MGCG_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			if (PP_STATE_SUPPORT_LS & *msg_id) {
@@ -332,7 +337,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_DRM_MGLS_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -345,7 +350,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_HDP_MGCG_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 
@@ -356,7 +361,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_HDP_MGLS_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -369,7 +374,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_SDMA_MGCG_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 
@@ -380,7 +385,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_SDMA_MGLS_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -393,7 +398,7 @@ int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr,
 				value = CG_SYS_ROM_MASK;
 
 				if (smum_send_msg_to_smc_with_parameter(
-						hwmgr, msg, value))
+						hwmgr, msg, value, NULL))
 					return -EINVAL;
 			}
 			break;
@@ -423,8 +428,10 @@ int smu7_powergate_gfx(struct pp_hwmgr *hwmgr, bool enable)
 	if (enable)
 		return smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_GFX_CU_PG_ENABLE,
-					adev->gfx.cu_info.number);
+					adev->gfx.cu_info.number,
+					NULL);
 	else
 		return smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_GFX_CU_PG_DISABLE);
+				PPSMC_MSG_GFX_CU_PG_DISABLE,
+				NULL);
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 4795eb66b2b2..753cb2cf6b77 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -186,7 +186,7 @@ static int smu7_enable_smc_voltage_controller(struct pp_hwmgr *hwmgr)
 	}
 
 	if (hwmgr->feature_mask & PP_SMC_VOLTAGE_CONTROL_MASK)
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Voltage_Cntl_Enable);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Voltage_Cntl_Enable, NULL);
 
 	return 0;
 }
@@ -493,7 +493,7 @@ static int smu7_copy_and_switch_arb_sets(struct pp_hwmgr *hwmgr,
 
 static int smu7_reset_to_default(struct pp_hwmgr *hwmgr)
 {
-	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ResetToDefaults);
+	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ResetToDefaults, NULL);
 }
 
 /**
@@ -979,7 +979,8 @@ static int smu7_enable_vrhot_gpio_interrupt(struct pp_hwmgr *hwmgr)
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_RegulatorHot))
 		return smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_EnableVRHotGPIOInterrupt);
+				PPSMC_MSG_EnableVRHotGPIOInterrupt,
+				NULL);
 
 	return 0;
 }
@@ -996,7 +997,7 @@ static int smu7_enable_ulv(struct pp_hwmgr *hwmgr)
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
 	if (data->ulv_supported)
-		return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableULV);
+		return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableULV, NULL);
 
 	return 0;
 }
@@ -1006,7 +1007,7 @@ static int smu7_disable_ulv(struct pp_hwmgr *hwmgr)
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
 	if (data->ulv_supported)
-		return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableULV);
+		return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableULV, NULL);
 
 	return 0;
 }
@@ -1015,13 +1016,14 @@ static int smu7_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 {
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_SclkDeepSleep)) {
-		if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MASTER_DeepSleep_ON))
+		if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MASTER_DeepSleep_ON, NULL))
 			PP_ASSERT_WITH_CODE(false,
 					"Attempt to enable Master Deep Sleep switch failed!",
 					return -EINVAL);
 	} else {
 		if (smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_MASTER_DeepSleep_OFF)) {
+				PPSMC_MSG_MASTER_DeepSleep_OFF,
+				NULL)) {
 			PP_ASSERT_WITH_CODE(false,
 					"Attempt to disable Master Deep Sleep switch failed!",
 					return -EINVAL);
@@ -1036,7 +1038,8 @@ static int smu7_disable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_SclkDeepSleep)) {
 		if (smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_MASTER_DeepSleep_OFF)) {
+				PPSMC_MSG_MASTER_DeepSleep_OFF,
+				NULL)) {
 			PP_ASSERT_WITH_CODE(false,
 					"Attempt to disable Master Deep Sleep switch failed!",
 					return -EINVAL);
@@ -1089,7 +1092,7 @@ static int smu7_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 			smu7_disable_sclk_vce_handshake(hwmgr);
 
 		PP_ASSERT_WITH_CODE(
-		(0 == smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DPM_Enable)),
+		(0 == smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DPM_Enable, NULL)),
 		"Failed to enable SCLK DPM during DPM Start Function!",
 		return -EINVAL);
 	}
@@ -1101,7 +1104,8 @@ static int smu7_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(
 				(0 == smum_send_msg_to_smc(hwmgr,
-						PPSMC_MSG_MCLKDPM_Enable)),
+						PPSMC_MSG_MCLKDPM_Enable,
+						NULL)),
 				"Failed to enable MCLK DPM during DPM Start Function!",
 				return -EINVAL);
 
@@ -1172,7 +1176,8 @@ static int smu7_start_dpm(struct pp_hwmgr *hwmgr)
 	if (0 == data->pcie_dpm_key_disabled) {
 		PP_ASSERT_WITH_CODE(
 				(0 == smum_send_msg_to_smc(hwmgr,
-						PPSMC_MSG_PCIeDPM_Enable)),
+						PPSMC_MSG_PCIeDPM_Enable,
+						NULL)),
 				"Failed to enable pcie DPM during DPM Start Function!",
 				return -EINVAL);
 	}
@@ -1180,7 +1185,8 @@ static int smu7_start_dpm(struct pp_hwmgr *hwmgr)
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_Falcon_QuickTransition)) {
 		PP_ASSERT_WITH_CODE((0 == smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_EnableACDCGPIOInterrupt)),
+				PPSMC_MSG_EnableACDCGPIOInterrupt,
+				NULL)),
 				"Failed to enable AC DC GPIO Interrupt!",
 				);
 	}
@@ -1197,7 +1203,7 @@ static int smu7_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 		PP_ASSERT_WITH_CODE(true == smum_is_dpm_running(hwmgr),
 				"Trying to disable SCLK DPM when DPM is disabled",
 				return 0);
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DPM_Disable);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DPM_Disable, NULL);
 	}
 
 	/* disable MCLK dpm */
@@ -1205,7 +1211,7 @@ static int smu7_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 		PP_ASSERT_WITH_CODE(true == smum_is_dpm_running(hwmgr),
 				"Trying to disable MCLK DPM when DPM is disabled",
 				return 0);
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_Disable);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_Disable, NULL);
 	}
 
 	return 0;
@@ -1226,7 +1232,8 @@ static int smu7_stop_dpm(struct pp_hwmgr *hwmgr)
 	if (!data->pcie_dpm_key_disabled) {
 		PP_ASSERT_WITH_CODE(
 				(smum_send_msg_to_smc(hwmgr,
-						PPSMC_MSG_PCIeDPM_Disable) == 0),
+						PPSMC_MSG_PCIeDPM_Disable,
+						NULL) == 0),
 				"Failed to disable pcie DPM during DPM Stop Function!",
 				return -EINVAL);
 	}
@@ -1237,7 +1244,7 @@ static int smu7_stop_dpm(struct pp_hwmgr *hwmgr)
 			"Trying to disable voltage DPM when DPM is disabled",
 			return 0);
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Voltage_Cntl_Disable);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Voltage_Cntl_Disable, NULL);
 
 	return 0;
 }
@@ -1388,7 +1395,7 @@ static int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable VR hot GPIO interrupt!", result = tmp_result);
 
-	smum_send_msg_to_smc(hwmgr, (PPSMC_Msg)PPSMC_NoDisplay);
+	smum_send_msg_to_smc(hwmgr, (PPSMC_Msg)PPSMC_NoDisplay, NULL);
 
 	tmp_result = smu7_enable_sclk_control(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
@@ -1446,14 +1453,14 @@ static int smu7_avfs_control(struct pp_hwmgr *hwmgr, bool enable)
 		if (!PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device,
 				CGS_IND_REG__SMC, FEATURE_STATUS, AVS_ON)) {
 			PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(
-					hwmgr, PPSMC_MSG_EnableAvfs),
+					hwmgr, PPSMC_MSG_EnableAvfs, NULL),
 					"Failed to enable AVFS!",
 					return -EINVAL);
 		}
 	} else if (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device,
 			CGS_IND_REG__SMC, FEATURE_STATUS, AVS_ON)) {
 		PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(
-				hwmgr, PPSMC_MSG_DisableAvfs),
+				hwmgr, PPSMC_MSG_DisableAvfs, NULL),
 				"Failed to disable AVFS!",
 				return -EINVAL);
 	}
@@ -2609,7 +2616,8 @@ static int smu7_force_dpm_highest(struct pp_hwmgr *hwmgr)
 
 			if (level)
 				smum_send_msg_to_smc_with_parameter(hwmgr,
-						PPSMC_MSG_PCIeDPM_ForceLevel, level);
+						PPSMC_MSG_PCIeDPM_ForceLevel, level,
+						NULL);
 		}
 	}
 
@@ -2623,7 +2631,8 @@ static int smu7_force_dpm_highest(struct pp_hwmgr *hwmgr)
 			if (level)
 				smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SCLKDPM_SetEnabledMask,
-						(1 << level));
+						(1 << level),
+						NULL);
 		}
 	}
 
@@ -2637,7 +2646,8 @@ static int smu7_force_dpm_highest(struct pp_hwmgr *hwmgr)
 			if (level)
 				smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_MCLKDPM_SetEnabledMask,
-						(1 << level));
+						(1 << level),
+						NULL);
 		}
 	}
 
@@ -2656,14 +2666,16 @@ static int smu7_upload_dpm_level_enable_mask(struct pp_hwmgr *hwmgr)
 		if (data->dpm_level_enable_mask.sclk_dpm_enable_mask)
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SCLKDPM_SetEnabledMask,
-					data->dpm_level_enable_mask.sclk_dpm_enable_mask);
+					data->dpm_level_enable_mask.sclk_dpm_enable_mask,
+					NULL);
 	}
 
 	if (!data->mclk_dpm_key_disabled) {
 		if (data->dpm_level_enable_mask.mclk_dpm_enable_mask)
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_MCLKDPM_SetEnabledMask,
-					data->dpm_level_enable_mask.mclk_dpm_enable_mask);
+					data->dpm_level_enable_mask.mclk_dpm_enable_mask,
+					NULL);
 	}
 
 	return 0;
@@ -2678,7 +2690,8 @@ static int smu7_unforce_dpm_levels(struct pp_hwmgr *hwmgr)
 
 	if (!data->pcie_dpm_key_disabled) {
 		smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_PCIeDPM_UnForceLevel);
+				PPSMC_MSG_PCIeDPM_UnForceLevel,
+				NULL);
 	}
 
 	return smu7_upload_dpm_level_enable_mask(hwmgr);
@@ -2696,7 +2709,8 @@ static int smu7_force_dpm_lowest(struct pp_hwmgr *hwmgr)
 							      data->dpm_level_enable_mask.sclk_dpm_enable_mask);
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 							    PPSMC_MSG_SCLKDPM_SetEnabledMask,
-							    (1 << level));
+							    (1 << level),
+							    NULL);
 
 	}
 
@@ -2706,7 +2720,8 @@ static int smu7_force_dpm_lowest(struct pp_hwmgr *hwmgr)
 							      data->dpm_level_enable_mask.mclk_dpm_enable_mask);
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 							    PPSMC_MSG_MCLKDPM_SetEnabledMask,
-							    (1 << level));
+							    (1 << level),
+							    NULL);
 		}
 	}
 
@@ -2716,7 +2731,8 @@ static int smu7_force_dpm_lowest(struct pp_hwmgr *hwmgr)
 							      data->dpm_level_enable_mask.pcie_dpm_enable_mask);
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 							    PPSMC_MSG_PCIeDPM_ForceLevel,
-							    (level));
+							    (level),
+							    NULL);
 		}
 	}
 
@@ -3495,21 +3511,20 @@ static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query)
 	    (adev->asic_type != CHIP_BONAIRE) &&
 	    (adev->asic_type != CHIP_FIJI) &&
 	    (adev->asic_type != CHIP_TONGA)) {
-		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0);
-		tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0, &tmp);
 		*query = tmp;
 
 		if (tmp != 0)
 			return 0;
 	}
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart, NULL);
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
 							ixSMU_PM_STATUS_95, 0);
 
 	for (i = 0; i < 10; i++) {
 		msleep(500);
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogSample);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogSample, NULL);
 		tmp = cgs_read_ind_register(hwmgr->device,
 						CGS_IND_REG__SMC,
 						ixSMU_PM_STATUS_95);
@@ -3534,14 +3549,12 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 
 	switch (idx) {
 	case AMDGPU_PP_SENSOR_GFX_SCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_API_GetSclkFrequency);
-		sclk = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_API_GetSclkFrequency, &sclk);
 		*((uint32_t *)value) = sclk;
 		*size = 4;
 		return 0;
 	case AMDGPU_PP_SENSOR_GFX_MCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_API_GetMclkFrequency);
-		mclk = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_API_GetMclkFrequency, &mclk);
 		*((uint32_t *)value) = mclk;
 		*size = 4;
 		return 0;
@@ -3730,7 +3743,8 @@ static int smu7_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 				"Trying to freeze SCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_SCLKDPM_FreezeLevel),
+				PPSMC_MSG_SCLKDPM_FreezeLevel,
+				NULL),
 				"Failed to freeze SCLK DPM during FreezeSclkMclkDPM Function!",
 				return -EINVAL);
 	}
@@ -3742,7 +3756,8 @@ static int smu7_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 				"Trying to freeze MCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_MCLKDPM_FreezeLevel),
+				PPSMC_MSG_MCLKDPM_FreezeLevel,
+				NULL),
 				"Failed to freeze MCLK DPM during FreezeSclkMclkDPM Function!",
 				return -EINVAL);
 	}
@@ -3884,7 +3899,8 @@ static int smu7_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 				"Trying to Unfreeze SCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_SCLKDPM_UnfreezeLevel),
+				PPSMC_MSG_SCLKDPM_UnfreezeLevel,
+				NULL),
 			"Failed to unfreeze SCLK DPM during UnFreezeSclkMclkDPM Function!",
 			return -EINVAL);
 	}
@@ -3896,7 +3912,8 @@ static int smu7_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 				"Trying to Unfreeze MCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_MCLKDPM_UnfreezeLevel),
+				PPSMC_MSG_MCLKDPM_UnfreezeLevel,
+				NULL),
 		    "Failed to unfreeze MCLK DPM during UnFreezeSclkMclkDPM Function!",
 		    return -EINVAL);
 	}
@@ -3949,12 +3966,14 @@ static int smu7_notify_smc_display(struct pp_hwmgr *hwmgr)
 	if (hwmgr->feature_mask & PP_VBI_TIME_SUPPORT_MASK) {
 		if (hwmgr->chip_id == CHIP_VEGAM)
 			smum_send_msg_to_smc_with_parameter(hwmgr,
-					(PPSMC_Msg)PPSMC_MSG_SetVBITimeout_VEGAM, data->frame_time_x2);
+					(PPSMC_Msg)PPSMC_MSG_SetVBITimeout_VEGAM, data->frame_time_x2,
+					NULL);
 		else
 			smum_send_msg_to_smc_with_parameter(hwmgr,
-					(PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2);
+					(PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2,
+					NULL);
 	}
-	return (smum_send_msg_to_smc(hwmgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ?  0 : -EINVAL;
+	return (smum_send_msg_to_smc(hwmgr, (PPSMC_Msg)PPSMC_HasDisplay, NULL) == 0) ?  0 : -EINVAL;
 }
 
 static int smu7_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input)
@@ -4040,7 +4059,8 @@ static int smu7_set_max_fan_pwm_output(struct pp_hwmgr *hwmgr, uint16_t us_max_f
 	advanceFanControlParameters.usMaxFanPWM = us_max_fan_pwm;
 
 	return smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_SetFanPwmMax, us_max_fan_pwm);
+			PPSMC_MSG_SetFanPwmMax, us_max_fan_pwm,
+			NULL);
 }
 
 static int
@@ -4048,7 +4068,7 @@ smu7_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display)
 {
 	PPSMC_Msg msg = has_display ? (PPSMC_Msg)PPSMC_HasDisplay : (PPSMC_Msg)PPSMC_NoDisplay;
 
-	return (smum_send_msg_to_smc(hwmgr, msg) == 0) ?  0 : -1;
+	return (smum_send_msg_to_smc(hwmgr, msg, NULL) == 0) ?  0 : -1;
 }
 
 static int
@@ -4132,7 +4152,8 @@ static int smu7_set_max_fan_rpm_output(struct pp_hwmgr *hwmgr, uint16_t us_max_f
 	advanceFanControlParameters.usMaxFanRPM = us_max_fan_rpm;
 
 	return smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_SetFanRpmMax, us_max_fan_rpm);
+			PPSMC_MSG_SetFanRpmMax, us_max_fan_rpm,
+			NULL);
 }
 
 static const struct amdgpu_irq_src_funcs smu7_irq_funcs = {
@@ -4262,14 +4283,14 @@ static int smu7_check_mc_firmware(struct pp_hwmgr *hwmgr)
 		if ((hwmgr->chip_id == CHIP_POLARIS10) ||
 		    (hwmgr->chip_id == CHIP_POLARIS11) ||
 		    (hwmgr->chip_id == CHIP_POLARIS12))
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableFFC);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableFFC, NULL);
 	} else {
 		data->mem_latency_high = 330;
 		data->mem_latency_low = 330;
 		if ((hwmgr->chip_id == CHIP_POLARIS10) ||
 		    (hwmgr->chip_id == CHIP_POLARIS11) ||
 		    (hwmgr->chip_id == CHIP_POLARIS12))
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableFFC);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableFFC, NULL);
 	}
 
 	return 0;
@@ -4413,13 +4434,15 @@ static int smu7_force_clock_level(struct pp_hwmgr *hwmgr,
 		if (!data->sclk_dpm_key_disabled)
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SCLKDPM_SetEnabledMask,
-					data->dpm_level_enable_mask.sclk_dpm_enable_mask & mask);
+					data->dpm_level_enable_mask.sclk_dpm_enable_mask & mask,
+					NULL);
 		break;
 	case PP_MCLK:
 		if (!data->mclk_dpm_key_disabled)
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_MCLKDPM_SetEnabledMask,
-					data->dpm_level_enable_mask.mclk_dpm_enable_mask & mask);
+					data->dpm_level_enable_mask.mclk_dpm_enable_mask & mask,
+					NULL);
 		break;
 	case PP_PCIE:
 	{
@@ -4427,11 +4450,13 @@ static int smu7_force_clock_level(struct pp_hwmgr *hwmgr,
 
 		if (!data->pcie_dpm_key_disabled) {
 			if (fls(tmp) != ffs(tmp))
-				smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PCIeDPM_UnForceLevel);
+				smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PCIeDPM_UnForceLevel,
+						NULL);
 			else
 				smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_PCIeDPM_ForceLevel,
-					fls(tmp) - 1);
+					fls(tmp) - 1,
+					NULL);
 		}
 		break;
 	}
@@ -4457,8 +4482,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
 
 	switch (type) {
 	case PP_SCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_API_GetSclkFrequency);
-		clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_API_GetSclkFrequency, &clock);
 
 		for (i = 0; i < sclk_table->count; i++) {
 			if (clock > sclk_table->dpm_levels[i].value)
@@ -4473,8 +4497,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
 					(i == now) ? "*" : "");
 		break;
 	case PP_MCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_API_GetMclkFrequency);
-		clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_API_GetMclkFrequency, &clock);
 
 		for (i = 0; i < mclk_table->count; i++) {
 			if (clock > mclk_table->dpm_levels[i].value)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
index 58f5589aaf12..5d4971576111 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
@@ -887,7 +887,10 @@ static int smu7_enable_didt(struct pp_hwmgr *hwmgr, const bool enable)
 	didt_block |= block_en << TCP_Enable_SHIFT;
 
 	if (enable)
-		result = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_Didt_Block_Function, didt_block);
+		result = smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_Didt_Block_Function,
+						didt_block,
+						NULL);
 
 	return result;
 }
@@ -1009,7 +1012,8 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr)
 
 		if (hwmgr->chip_id == CHIP_POLARIS11) {
 			result = smum_send_msg_to_smc(hwmgr,
-						(uint16_t)(PPSMC_MSG_EnableDpmDidt));
+						(uint16_t)(PPSMC_MSG_EnableDpmDidt),
+						NULL);
 			PP_ASSERT_WITH_CODE((0 == result),
 					"Failed to enable DPM DIDT.", goto error);
 		}
@@ -1042,7 +1046,8 @@ int smu7_disable_didt_config(struct pp_hwmgr *hwmgr)
 				goto error);
 		if (hwmgr->chip_id == CHIP_POLARIS11) {
 			result = smum_send_msg_to_smc(hwmgr,
-						(uint16_t)(PPSMC_MSG_DisableDpmDidt));
+						(uint16_t)(PPSMC_MSG_DisableDpmDidt),
+						NULL);
 			PP_ASSERT_WITH_CODE((0 == result),
 					"Failed to disable DPM DIDT.", goto error);
 		}
@@ -1063,7 +1068,8 @@ int smu7_enable_smc_cac(struct pp_hwmgr *hwmgr)
 	if (PP_CAP(PHM_PlatformCaps_CAC)) {
 		int smc_result;
 		smc_result = smum_send_msg_to_smc(hwmgr,
-				(uint16_t)(PPSMC_MSG_EnableCac));
+				(uint16_t)(PPSMC_MSG_EnableCac),
+				NULL);
 		PP_ASSERT_WITH_CODE((0 == smc_result),
 				"Failed to enable CAC in SMC.", result = -1);
 
@@ -1079,7 +1085,8 @@ int smu7_disable_smc_cac(struct pp_hwmgr *hwmgr)
 
 	if (PP_CAP(PHM_PlatformCaps_CAC) && data->cac_enabled) {
 		int smc_result = smum_send_msg_to_smc(hwmgr,
-				(uint16_t)(PPSMC_MSG_DisableCac));
+				(uint16_t)(PPSMC_MSG_DisableCac),
+				NULL);
 		PP_ASSERT_WITH_CODE((smc_result == 0),
 				"Failed to disable CAC in SMC.", result = -1);
 
@@ -1095,7 +1102,9 @@ int smu7_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
 	if (data->power_containment_features &
 			POWERCONTAINMENT_FEATURE_PkgPwrLimit)
 		return smum_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_PkgPwrSetLimit, n<<8);
+				PPSMC_MSG_PkgPwrSetLimit,
+				n<<8,
+				NULL);
 	return 0;
 }
 
@@ -1103,7 +1112,9 @@ static int smu7_set_overdriver_target_tdp(struct pp_hwmgr *hwmgr,
 						uint32_t target_tdp)
 {
 	return smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_OverDriveSetTargetTdp, target_tdp);
+			PPSMC_MSG_OverDriveSetTargetTdp,
+			target_tdp,
+			NULL);
 }
 
 int smu7_enable_power_containment(struct pp_hwmgr *hwmgr)
@@ -1124,7 +1135,8 @@ int smu7_enable_power_containment(struct pp_hwmgr *hwmgr)
 	if (PP_CAP(PHM_PlatformCaps_PowerContainment)) {
 		if (data->enable_tdc_limit_feature) {
 			smc_result = smum_send_msg_to_smc(hwmgr,
-					(uint16_t)(PPSMC_MSG_TDCLimitEnable));
+					(uint16_t)(PPSMC_MSG_TDCLimitEnable),
+					NULL);
 			PP_ASSERT_WITH_CODE((0 == smc_result),
 					"Failed to enable TDCLimit in SMC.", result = -1;);
 			if (0 == smc_result)
@@ -1134,7 +1146,8 @@ int smu7_enable_power_containment(struct pp_hwmgr *hwmgr)
 
 		if (data->enable_pkg_pwr_tracking_feature) {
 			smc_result = smum_send_msg_to_smc(hwmgr,
-					(uint16_t)(PPSMC_MSG_PkgPwrLimitEnable));
+					(uint16_t)(PPSMC_MSG_PkgPwrLimitEnable),
+					NULL);
 			PP_ASSERT_WITH_CODE((0 == smc_result),
 					"Failed to enable PkgPwrTracking in SMC.", result = -1;);
 			if (0 == smc_result) {
@@ -1163,7 +1176,8 @@ int smu7_disable_power_containment(struct pp_hwmgr *hwmgr)
 		if (data->power_containment_features &
 				POWERCONTAINMENT_FEATURE_TDCLimit) {
 			smc_result = smum_send_msg_to_smc(hwmgr,
-					(uint16_t)(PPSMC_MSG_TDCLimitDisable));
+					(uint16_t)(PPSMC_MSG_TDCLimitDisable),
+					NULL);
 			PP_ASSERT_WITH_CODE((smc_result == 0),
 					"Failed to disable TDCLimit in SMC.",
 					result = smc_result);
@@ -1172,7 +1186,8 @@ int smu7_disable_power_containment(struct pp_hwmgr *hwmgr)
 		if (data->power_containment_features &
 				POWERCONTAINMENT_FEATURE_DTE) {
 			smc_result = smum_send_msg_to_smc(hwmgr,
-					(uint16_t)(PPSMC_MSG_DisableDTE));
+					(uint16_t)(PPSMC_MSG_DisableDTE),
+					NULL);
 			PP_ASSERT_WITH_CODE((smc_result == 0),
 					"Failed to disable DTE in SMC.",
 					result = smc_result);
@@ -1181,7 +1196,8 @@ int smu7_disable_power_containment(struct pp_hwmgr *hwmgr)
 		if (data->power_containment_features &
 				POWERCONTAINMENT_FEATURE_PkgPwrLimit) {
 			smc_result = smum_send_msg_to_smc(hwmgr,
-					(uint16_t)(PPSMC_MSG_PkgPwrLimitDisable));
+					(uint16_t)(PPSMC_MSG_PkgPwrLimitDisable),
+					NULL);
 			PP_ASSERT_WITH_CODE((smc_result == 0),
 					"Failed to disable PkgPwrTracking in SMC.",
 					result = smc_result);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
index 5bdc0df5a9f4..0b30f73649a8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
@@ -151,8 +151,8 @@ int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
 	int result;
 
 	if (PP_CAP(PHM_PlatformCaps_ODFuzzyFanControlSupport)) {
-		cgs_write_register(hwmgr->device, mmSMC_MSG_ARG_0, FAN_CONTROL_FUZZY);
-		result = smum_send_msg_to_smc(hwmgr, PPSMC_StartFanControl);
+		result = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_StartFanControl,
+					FAN_CONTROL_FUZZY, NULL);
 
 		if (PP_CAP(PHM_PlatformCaps_FanSpeedInTableIsRPM))
 			hwmgr->hwmgr_func->set_max_fan_rpm_output(hwmgr,
@@ -164,8 +164,8 @@ int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
 					advanceFanControlParameters.usMaxFanPWM);
 
 	} else {
-		cgs_write_register(hwmgr->device, mmSMC_MSG_ARG_0, FAN_CONTROL_TABLE);
-		result = smum_send_msg_to_smc(hwmgr, PPSMC_StartFanControl);
+		result = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_StartFanControl,
+					FAN_CONTROL_TABLE, NULL);
 	}
 
 	if (!result && hwmgr->thermal_controller.
@@ -173,7 +173,8 @@ int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
 		result = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetFanTemperatureTarget,
 				hwmgr->thermal_controller.
-				advanceFanControlParameters.ucTargetTemperature);
+				advanceFanControlParameters.ucTargetTemperature,
+				NULL);
 	hwmgr->fan_ctrl_enabled = true;
 
 	return result;
@@ -183,7 +184,7 @@ int smu7_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr)
 int smu7_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr)
 {
 	hwmgr->fan_ctrl_enabled = false;
-	return smum_send_msg_to_smc(hwmgr, PPSMC_StopFanControl);
+	return smum_send_msg_to_smc(hwmgr, PPSMC_StopFanControl, NULL);
 }
 
 /**
@@ -372,7 +373,7 @@ static void smu7_thermal_enable_alert(struct pp_hwmgr *hwmgr)
 			CG_THERMAL_INT, THERM_INT_MASK, alert);
 
 	/* send message to SMU to enable internal thermal interrupts */
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Thermal_Cntl_Enable);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Thermal_Cntl_Enable, NULL);
 }
 
 /**
@@ -390,7 +391,7 @@ int smu7_thermal_disable_alert(struct pp_hwmgr *hwmgr)
 			CG_THERMAL_INT, THERM_INT_MASK, alert);
 
 	/* send message to SMU to disable internal thermal interrupts */
-	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Thermal_Cntl_Disable);
+	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Thermal_Cntl_Disable, NULL);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
index 019d6a206492..a6c6a793e98e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
@@ -162,8 +162,10 @@ static uint32_t smu8_get_max_sclk_level(struct pp_hwmgr *hwmgr)
 	struct smu8_hwmgr *data = hwmgr->backend;
 
 	if (data->max_sclk_level == 0) {
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxSclkLevel);
-		data->max_sclk_level = smum_get_argument(hwmgr) + 1;
+		smum_send_msg_to_smc(hwmgr,
+				PPSMC_MSG_GetMaxSclkLevel,
+				&data->max_sclk_level);
+		data->max_sclk_level += 1;
 	}
 
 	return data->max_sclk_level;
@@ -580,7 +582,8 @@ static int smu8_init_uvd_limit(struct pp_hwmgr *hwmgr)
 	struct smu8_hwmgr *data = hwmgr->backend;
 	struct phm_uvd_clock_voltage_dependency_table *table =
 				hwmgr->dyn_state.uvd_clock_voltage_dependency_table;
-	unsigned long clock = 0, level;
+	unsigned long clock = 0;
+	uint32_t level;
 
 	if (NULL == table || table->count <= 0)
 		return -EINVAL;
@@ -588,8 +591,7 @@ static int smu8_init_uvd_limit(struct pp_hwmgr *hwmgr)
 	data->uvd_dpm.soft_min_clk = 0;
 	data->uvd_dpm.hard_min_clk = 0;
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxUvdLevel);
-	level = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxUvdLevel, &level);
 
 	if (level < table->count)
 		clock = table->entries[level].vclk;
@@ -607,7 +609,8 @@ static int smu8_init_vce_limit(struct pp_hwmgr *hwmgr)
 	struct smu8_hwmgr *data = hwmgr->backend;
 	struct phm_vce_clock_voltage_dependency_table *table =
 				hwmgr->dyn_state.vce_clock_voltage_dependency_table;
-	unsigned long clock = 0, level;
+	unsigned long clock = 0;
+	uint32_t level;
 
 	if (NULL == table || table->count <= 0)
 		return -EINVAL;
@@ -615,8 +618,7 @@ static int smu8_init_vce_limit(struct pp_hwmgr *hwmgr)
 	data->vce_dpm.soft_min_clk = 0;
 	data->vce_dpm.hard_min_clk = 0;
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxEclkLevel);
-	level = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxEclkLevel, &level);
 
 	if (level < table->count)
 		clock = table->entries[level].ecclk;
@@ -634,7 +636,8 @@ static int smu8_init_acp_limit(struct pp_hwmgr *hwmgr)
 	struct smu8_hwmgr *data = hwmgr->backend;
 	struct phm_acp_clock_voltage_dependency_table *table =
 				hwmgr->dyn_state.acp_clock_voltage_dependency_table;
-	unsigned long clock = 0, level;
+	unsigned long clock = 0;
+	uint32_t level;
 
 	if (NULL == table || table->count <= 0)
 		return -EINVAL;
@@ -642,8 +645,7 @@ static int smu8_init_acp_limit(struct pp_hwmgr *hwmgr)
 	data->acp_dpm.soft_min_clk = 0;
 	data->acp_dpm.hard_min_clk = 0;
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxAclkLevel);
-	level = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxAclkLevel, &level);
 
 	if (level < table->count)
 		clock = table->entries[level].acpclk;
@@ -665,7 +667,7 @@ static void smu8_init_power_gate_state(struct pp_hwmgr *hwmgr)
 #ifdef CONFIG_DRM_AMD_ACP
 	data->acp_power_gated = false;
 #else
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ACPPowerOFF);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ACPPowerOFF, NULL);
 	data->acp_power_gated = true;
 #endif
 
@@ -708,7 +710,8 @@ static int smu8_update_sclk_limit(struct pp_hwmgr *hwmgr)
 						PPSMC_MSG_SetSclkHardMin,
 						 smu8_get_sclk_level(hwmgr,
 					data->sclk_dpm.hard_min_clk,
-					     PPSMC_MSG_SetSclkHardMin));
+					     PPSMC_MSG_SetSclkHardMin),
+						 NULL);
 	}
 
 	clock = data->sclk_dpm.soft_min_clk;
@@ -731,7 +734,8 @@ static int smu8_update_sclk_limit(struct pp_hwmgr *hwmgr)
 						PPSMC_MSG_SetSclkSoftMin,
 						smu8_get_sclk_level(hwmgr,
 					data->sclk_dpm.soft_min_clk,
-					     PPSMC_MSG_SetSclkSoftMin));
+					     PPSMC_MSG_SetSclkSoftMin),
+						NULL);
 	}
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -742,7 +746,8 @@ static int smu8_update_sclk_limit(struct pp_hwmgr *hwmgr)
 						PPSMC_MSG_SetSclkSoftMax,
 						smu8_get_sclk_level(hwmgr,
 					data->sclk_dpm.soft_max_clk,
-					PPSMC_MSG_SetSclkSoftMax));
+					PPSMC_MSG_SetSclkSoftMax),
+						NULL);
 	}
 
 	return 0;
@@ -760,7 +765,8 @@ static int smu8_set_deep_sleep_sclk_threshold(struct pp_hwmgr *hwmgr)
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetMinDeepSleepSclk,
-				clks);
+				clks,
+				NULL);
 	}
 
 	return 0;
@@ -773,7 +779,8 @@ static int smu8_set_watermark_threshold(struct pp_hwmgr *hwmgr)
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetWatermarkFrequency,
-					data->sclk_dpm.soft_max_clk);
+					data->sclk_dpm.soft_max_clk,
+					NULL);
 
 	return 0;
 }
@@ -788,13 +795,15 @@ static int smu8_nbdpm_pstate_enable_disable(struct pp_hwmgr *hwmgr, bool enable,
 
 			return smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_EnableLowMemoryPstate,
-						(lock ? 1 : 0));
+						(lock ? 1 : 0),
+						NULL);
 		} else {
 			PP_DBG_LOG("disable Low Memory PState.\n");
 
 			return smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_DisableLowMemoryPstate,
-						(lock ? 1 : 0));
+						(lock ? 1 : 0),
+						NULL);
 		}
 	}
 
@@ -814,7 +823,8 @@ static int smu8_disable_nb_dpm(struct pp_hwmgr *hwmgr)
 		ret = smum_send_msg_to_smc_with_parameter(
 							  hwmgr,
 							  PPSMC_MSG_DisableAllSmuFeatures,
-							  dpm_features);
+							  dpm_features,
+							  NULL);
 		if (ret == 0)
 			data->is_nb_dpm_enabled = false;
 	}
@@ -835,7 +845,8 @@ static int smu8_enable_nb_dpm(struct pp_hwmgr *hwmgr)
 		ret = smum_send_msg_to_smc_with_parameter(
 							  hwmgr,
 							  PPSMC_MSG_EnableAllSmuFeatures,
-							  dpm_features);
+							  dpm_features,
+							  NULL);
 		if (ret == 0)
 			data->is_nb_dpm_enabled = true;
 	}
@@ -953,7 +964,8 @@ static int smu8_start_dpm(struct pp_hwmgr *hwmgr)
 
 	return smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_EnableAllSmuFeatures,
-				SCLK_DPM_MASK);
+				SCLK_DPM_MASK,
+				NULL);
 }
 
 static int smu8_stop_dpm(struct pp_hwmgr *hwmgr)
@@ -967,7 +979,8 @@ static int smu8_stop_dpm(struct pp_hwmgr *hwmgr)
 		data->dpm_flags &= ~DPMFlags_SCLK_Enabled;
 		ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DisableAllSmuFeatures,
-					dpm_features);
+					dpm_features,
+					NULL);
 	}
 	return ret;
 }
@@ -983,13 +996,15 @@ static int smu8_program_bootup_state(struct pp_hwmgr *hwmgr)
 				PPSMC_MSG_SetSclkSoftMin,
 				smu8_get_sclk_level(hwmgr,
 				data->sclk_dpm.soft_min_clk,
-				PPSMC_MSG_SetSclkSoftMin));
+				PPSMC_MSG_SetSclkSoftMin),
+				NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSclkSoftMax,
 				smu8_get_sclk_level(hwmgr,
 				data->sclk_dpm.soft_max_clk,
-				PPSMC_MSG_SetSclkSoftMax));
+				PPSMC_MSG_SetSclkSoftMax),
+				NULL);
 
 	return 0;
 }
@@ -1127,13 +1142,15 @@ static int smu8_phm_force_dpm_highest(struct pp_hwmgr *hwmgr)
 					PPSMC_MSG_SetSclkSoftMin,
 					smu8_get_sclk_level(hwmgr,
 					data->sclk_dpm.soft_max_clk,
-					PPSMC_MSG_SetSclkSoftMin));
+					PPSMC_MSG_SetSclkSoftMin),
+					NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSclkSoftMax,
 				smu8_get_sclk_level(hwmgr,
 				data->sclk_dpm.soft_max_clk,
-				PPSMC_MSG_SetSclkSoftMax));
+				PPSMC_MSG_SetSclkSoftMax),
+				NULL);
 
 	return 0;
 }
@@ -1167,13 +1184,15 @@ static int smu8_phm_unforce_dpm_levels(struct pp_hwmgr *hwmgr)
 				PPSMC_MSG_SetSclkSoftMin,
 				smu8_get_sclk_level(hwmgr,
 				data->sclk_dpm.soft_min_clk,
-				PPSMC_MSG_SetSclkSoftMin));
+				PPSMC_MSG_SetSclkSoftMin),
+				NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSclkSoftMax,
 				smu8_get_sclk_level(hwmgr,
 				data->sclk_dpm.soft_max_clk,
-				PPSMC_MSG_SetSclkSoftMax));
+				PPSMC_MSG_SetSclkSoftMax),
+				NULL);
 
 	return 0;
 }
@@ -1186,13 +1205,15 @@ static int smu8_phm_force_dpm_lowest(struct pp_hwmgr *hwmgr)
 			PPSMC_MSG_SetSclkSoftMax,
 			smu8_get_sclk_level(hwmgr,
 			data->sclk_dpm.soft_min_clk,
-			PPSMC_MSG_SetSclkSoftMax));
+			PPSMC_MSG_SetSclkSoftMax),
+			NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSclkSoftMin,
 				smu8_get_sclk_level(hwmgr,
 				data->sclk_dpm.soft_min_clk,
-				PPSMC_MSG_SetSclkSoftMin));
+				PPSMC_MSG_SetSclkSoftMin),
+				NULL);
 
 	return 0;
 }
@@ -1227,7 +1248,7 @@ static int smu8_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 static int smu8_dpm_powerdown_uvd(struct pp_hwmgr *hwmgr)
 {
 	if (PP_CAP(PHM_PlatformCaps_UVDPowerGating))
-		return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UVDPowerOFF);
+		return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UVDPowerOFF, NULL);
 	return 0;
 }
 
@@ -1237,7 +1258,8 @@ static int smu8_dpm_powerup_uvd(struct pp_hwmgr *hwmgr)
 		return smum_send_msg_to_smc_with_parameter(
 			hwmgr,
 			PPSMC_MSG_UVDPowerON,
-			PP_CAP(PHM_PlatformCaps_UVDDynamicPowerGating) ? 1 : 0);
+			PP_CAP(PHM_PlatformCaps_UVDDynamicPowerGating) ? 1 : 0,
+			NULL);
 	}
 
 	return 0;
@@ -1259,15 +1281,20 @@ static int  smu8_dpm_update_vce_dpm(struct pp_hwmgr *hwmgr)
 			PPSMC_MSG_SetEclkHardMin,
 			smu8_get_eclk_level(hwmgr,
 				data->vce_dpm.hard_min_clk,
-				PPSMC_MSG_SetEclkHardMin));
+				PPSMC_MSG_SetEclkHardMin),
+			NULL);
 	} else {
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_SetEclkHardMin, 0);
+					PPSMC_MSG_SetEclkHardMin,
+					0,
+					NULL);
 		/* disable ECLK DPM 0. Otherwise VCE could hang if
 		 * switching SCLK from DPM 0 to 6/7 */
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_SetEclkSoftMin, 1);
+					PPSMC_MSG_SetEclkSoftMin,
+					1,
+					NULL);
 	}
 	return 0;
 }
@@ -1276,7 +1303,8 @@ static int smu8_dpm_powerdown_vce(struct pp_hwmgr *hwmgr)
 {
 	if (PP_CAP(PHM_PlatformCaps_VCEPowerGating))
 		return smum_send_msg_to_smc(hwmgr,
-						     PPSMC_MSG_VCEPowerOFF);
+					    PPSMC_MSG_VCEPowerOFF,
+					    NULL);
 	return 0;
 }
 
@@ -1284,7 +1312,8 @@ static int smu8_dpm_powerup_vce(struct pp_hwmgr *hwmgr)
 {
 	if (PP_CAP(PHM_PlatformCaps_VCEPowerGating))
 		return smum_send_msg_to_smc(hwmgr,
-						     PPSMC_MSG_VCEPowerON);
+					    PPSMC_MSG_VCEPowerON,
+					    NULL);
 	return 0;
 }
 
@@ -1435,7 +1464,8 @@ static void smu8_hw_print_display_cfg(
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetDisplaySizePowerParams,
-						data);
+						data,
+						NULL);
 	}
 
 	return 0;
@@ -1497,10 +1527,12 @@ static int smu8_force_clock_level(struct pp_hwmgr *hwmgr,
 	case PP_SCLK:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSclkSoftMin,
-				mask);
+				mask,
+				NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSclkSoftMax,
-				mask);
+				mask,
+				NULL);
 		break;
 	default:
 		break;
@@ -1753,9 +1785,10 @@ static int smu8_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*((uint32_t *)value) = 0;
 		return 0;
 	case AMDGPU_PP_SENSOR_GPU_LOAD:
-		result = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetAverageGraphicsActivity);
+		result = smum_send_msg_to_smc(hwmgr,
+				PPSMC_MSG_GetAverageGraphicsActivity,
+				&activity_percent);
 		if (0 == result) {
-			activity_percent = cgs_read_register(hwmgr->device, mmSMU_MP1_SRBM2P_ARG_0);
 			activity_percent = activity_percent > 100 ? 100 : activity_percent;
 		} else {
 			activity_percent = 50;
@@ -1785,20 +1818,25 @@ static int smu8_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
 {
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramAddrHiVirtual,
-					mc_addr_hi);
+					mc_addr_hi,
+					NULL);
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramAddrLoVirtual,
-					mc_addr_low);
+					mc_addr_low,
+					NULL);
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramAddrHiPhysical,
-					virtual_addr_hi);
+					virtual_addr_hi,
+					NULL);
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramAddrLoPhysical,
-					virtual_addr_low);
+					virtual_addr_low,
+					NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramBufferSize,
-					size);
+					size,
+					NULL);
 	return 0;
 }
 
@@ -1827,12 +1865,16 @@ static int smu8_enable_disable_uvd_dpm(struct pp_hwmgr *hwmgr, bool enable)
 		data->dpm_flags |= DPMFlags_UVD_Enabled;
 		dpm_features |= UVD_DPM_MASK;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-			    PPSMC_MSG_EnableAllSmuFeatures, dpm_features);
+			    PPSMC_MSG_EnableAllSmuFeatures,
+			    dpm_features,
+			    NULL);
 	} else {
 		dpm_features |= UVD_DPM_MASK;
 		data->dpm_flags &= ~DPMFlags_UVD_Enabled;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-			   PPSMC_MSG_DisableAllSmuFeatures, dpm_features);
+			   PPSMC_MSG_DisableAllSmuFeatures,
+			   dpm_features,
+			   NULL);
 	}
 	return 0;
 }
@@ -1854,7 +1896,8 @@ int smu8_dpm_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate)
 				PPSMC_MSG_SetUvdHardMin,
 				smu8_get_uvd_level(hwmgr,
 					data->uvd_dpm.hard_min_clk,
-					PPSMC_MSG_SetUvdHardMin));
+					PPSMC_MSG_SetUvdHardMin),
+				NULL);
 
 			smu8_enable_disable_uvd_dpm(hwmgr, true);
 		} else {
@@ -1878,12 +1921,16 @@ static int smu8_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable)
 		data->dpm_flags |= DPMFlags_VCE_Enabled;
 		dpm_features |= VCE_DPM_MASK;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-			    PPSMC_MSG_EnableAllSmuFeatures, dpm_features);
+			    PPSMC_MSG_EnableAllSmuFeatures,
+			    dpm_features,
+			    NULL);
 	} else {
 		dpm_features |= VCE_DPM_MASK;
 		data->dpm_flags &= ~DPMFlags_VCE_Enabled;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-			   PPSMC_MSG_DisableAllSmuFeatures, dpm_features);
+			   PPSMC_MSG_DisableAllSmuFeatures,
+			   dpm_features,
+			   NULL);
 	}
 
 	return 0;
@@ -1898,9 +1945,9 @@ static void smu8_dpm_powergate_acp(struct pp_hwmgr *hwmgr, bool bgate)
 		return;
 
 	if (bgate)
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ACPPowerOFF);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ACPPowerOFF, NULL);
 	else
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ACPPowerON);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ACPPowerON, NULL);
 }
 
 static void smu8_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
index d09690fca452..60b5ca974356 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/reboot.h>
 
 #include "hwmgr.h"
 #include "pp_debug.h"
@@ -557,7 +558,9 @@ void phm_apply_dal_min_voltage_request(struct pp_hwmgr *hwmgr)
 		if (req_vddc <= vddc_table->entries[i].vddc) {
 			req_volt = (((uint32_t)vddc_table->entries[i].vddc) * VOLTAGE_SCALE);
 			smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_VddC_Request, req_volt);
+					PPSMC_MSG_VddC_Request,
+					req_volt,
+					NULL);
 			return;
 		}
 	}
@@ -593,37 +596,43 @@ int phm_irq_process(struct amdgpu_device *adev,
 	uint32_t src_id = entry->src_id;
 
 	if (client_id == AMDGPU_IRQ_CLIENTID_LEGACY) {
-		if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH)
-			pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n",
-						PCI_BUS_NUM(adev->pdev->devfn),
-						PCI_SLOT(adev->pdev->devfn),
-						PCI_FUNC(adev->pdev->devfn));
-		else if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW)
-			pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n",
-					PCI_BUS_NUM(adev->pdev->devfn),
-					PCI_SLOT(adev->pdev->devfn),
-					PCI_FUNC(adev->pdev->devfn));
-		else if (src_id == VISLANDS30_IV_SRCID_GPIO_19)
-			pr_warn("GPU Critical Temperature Fault detected on PCIe %d:%d.%d!\n",
-					PCI_BUS_NUM(adev->pdev->devfn),
-					PCI_SLOT(adev->pdev->devfn),
-					PCI_FUNC(adev->pdev->devfn));
+		if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH) {
+			dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n");
+			/*
+			 * SW CTF just occurred.
+			 * Try to do a graceful shutdown to prevent further damage.
+			 */
+			dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n");
+			orderly_poweroff(true);
+		} else if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW)
+			dev_emerg(adev->dev, "ERROR: GPU under temperature range detected!\n");
+		else if (src_id == VISLANDS30_IV_SRCID_GPIO_19) {
+			dev_emerg(adev->dev, "ERROR: GPU HW Critical Temperature Fault(aka CTF) detected!\n");
+			/*
+			 * HW CTF just occurred. Shutdown to prevent further damage.
+			 */
+			dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU HW CTF!\n");
+			orderly_poweroff(true);
+		}
 	} else if (client_id == SOC15_IH_CLIENTID_THM) {
-		if (src_id == 0)
-			pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n",
-						PCI_BUS_NUM(adev->pdev->devfn),
-						PCI_SLOT(adev->pdev->devfn),
-						PCI_FUNC(adev->pdev->devfn));
-		else
-			pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n",
-					PCI_BUS_NUM(adev->pdev->devfn),
-					PCI_SLOT(adev->pdev->devfn),
-					PCI_FUNC(adev->pdev->devfn));
-	} else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO)
-		pr_warn("GPU Critical Temperature Fault detected on PCIe %d:%d.%d!\n",
-				PCI_BUS_NUM(adev->pdev->devfn),
-				PCI_SLOT(adev->pdev->devfn),
-				PCI_FUNC(adev->pdev->devfn));
+		if (src_id == 0) {
+			dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n");
+			/*
+			 * SW CTF just occurred.
+			 * Try to do a graceful shutdown to prevent further damage.
+			 */
+			dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n");
+			orderly_poweroff(true);
+		} else
+			dev_emerg(adev->dev, "ERROR: GPU under temperature range detected!\n");
+	} else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) {
+		dev_emerg(adev->dev, "ERROR: GPU HW Critical Temperature Fault(aka CTF) detected!\n");
+		/*
+		 * HW CTF just occurred. Shutdown to prevent further damage.
+		 */
+		dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU HW CTF!\n");
+		orderly_poweroff(true);
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c
index d168af4a4d78..46bb16c29cf6 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c
@@ -98,7 +98,7 @@ int vega10_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
 	if (state == BACO_STATE_IN) {
 		if (soc15_baco_program_registers(hwmgr, pre_baco_tbl,
 					     ARRAY_SIZE(pre_baco_tbl))) {
-			if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnterBaco))
+			if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnterBaco, NULL))
 				return -EINVAL;
 
 			if (soc15_baco_program_registers(hwmgr, enter_baco_tbl,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index f29f95be1e56..675c7cab7cfc 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -484,8 +484,9 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 	if (data->registry_data.vr0hot_enabled)
 		data->smu_features[GNLD_VR0HOT].supported = true;
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion);
-	hwmgr->smu_version = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr,
+			PPSMC_MSG_GetSmuVersion,
+			&hwmgr->smu_version);
 		/* ACG firmware has major version 5 */
 	if ((hwmgr->smu_version & 0xff000000) == 0x5000000)
 		data->smu_features[GNLD_ACG].supported = true;
@@ -503,10 +504,8 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 		data->smu_features[GNLD_PCC_LIMIT].supported = true;
 
 	/* Get the SN to turn into a Unique ID */
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
-	top32 = smum_get_argument(hwmgr);
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
-	bottom32 = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32, &top32);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32, &bottom32);
 
 	adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
 }
@@ -993,7 +992,10 @@ static int vega10_setup_asic_task(struct pp_hwmgr *hwmgr)
 			"Failed to set up led dpm config!",
 			return -EINVAL);
 
-	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_NumOfDisplays, 0);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_NumOfDisplays,
+				0,
+				NULL);
 
 	return 0;
 }
@@ -2303,16 +2305,15 @@ static int vega10_acg_enable(struct pp_hwmgr *hwmgr)
 					data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_bitmap))
 			data->smu_features[GNLD_DPM_PREFETCHER].enabled = true;
 
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_InitializeAcg);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_InitializeAcg, NULL);
 
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgBtc);
-		agc_btc_response = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgBtc, &agc_btc_response);
 
 		if (1 == agc_btc_response) {
 			if (1 == data->acg_loop_state)
-				smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgInClosedLoop);
+				smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgInClosedLoop, NULL);
 			else if (2 == data->acg_loop_state)
-				smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgInOpenLoop);
+				smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgInOpenLoop, NULL);
 			if (0 == vega10_enable_smc_features(hwmgr, true,
 				data->smu_features[GNLD_ACG].smu_feature_bitmap))
 					data->smu_features[GNLD_ACG].enabled = true;
@@ -2429,11 +2430,9 @@ static int vega10_populate_and_upload_avfs_fuse_override(struct pp_hwmgr *hwmgr)
 	struct vega10_hwmgr *data = hwmgr->backend;
 	AvfsFuseOverride_t *avfs_fuse_table = &(data->smc_state_table.avfs_fuse_override_table);
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
-	top32 = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32, &top32);
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
-	bottom32 = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32, &bottom32);
 
 	serial_number = ((uint64_t)bottom32 << 32) | top32;
 
@@ -2610,14 +2609,16 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
 		if (0 != boot_up_values.usVddc) {
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetFloorSocVoltage,
-						(boot_up_values.usVddc * 4));
+						(boot_up_values.usVddc * 4),
+						NULL);
 			data->vbios_boot_state.bsoc_vddc_lock = true;
 		} else {
 			data->vbios_boot_state.bsoc_vddc_lock = false;
 		}
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetMinDeepSleepDcefclk,
-			(uint32_t)(data->vbios_boot_state.dcef_clock / 100));
+			(uint32_t)(data->vbios_boot_state.dcef_clock / 100),
+				NULL);
 	}
 
 	result = vega10_populate_avfs_parameters(hwmgr);
@@ -2904,7 +2905,8 @@ static int vega10_start_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap)
 
 	if (data->vbios_boot_state.bsoc_vddc_lock) {
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-						PPSMC_MSG_SetFloorSocVoltage, 0);
+						PPSMC_MSG_SetFloorSocVoltage, 0,
+						NULL);
 		data->vbios_boot_state.bsoc_vddc_lock = false;
 	}
 
@@ -2947,7 +2949,8 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 		vega10_enable_disable_PCC_limit_feature(hwmgr, true);
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_ConfigureTelemetry, data->config_telemetry);
+			PPSMC_MSG_ConfigureTelemetry, data->config_telemetry,
+			NULL);
 
 		tmp_result = vega10_construct_voltage_tables(hwmgr);
 		PP_ASSERT_WITH_CODE(!tmp_result,
@@ -3528,7 +3531,8 @@ static int vega10_upload_dpm_bootup_level(struct pp_hwmgr *hwmgr)
 				data->dpm_table.gfx_table.dpm_state.soft_min_level) {
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSoftMinGfxclkByIndex,
-				data->smc_state_table.gfx_boot_level);
+				data->smc_state_table.gfx_boot_level,
+				NULL);
 
 			data->dpm_table.gfx_table.dpm_state.soft_min_level =
 					data->smc_state_table.gfx_boot_level;
@@ -3543,11 +3547,13 @@ static int vega10_upload_dpm_bootup_level(struct pp_hwmgr *hwmgr)
 				socclk_idx = vega10_get_soc_index_for_max_uclk(hwmgr);
 				smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMinSocclkByIndex,
-						socclk_idx);
+						socclk_idx,
+						NULL);
 			} else {
 				smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMinUclkByIndex,
-						data->smc_state_table.mem_boot_level);
+						data->smc_state_table.mem_boot_level,
+						NULL);
 			}
 			data->dpm_table.mem_table.dpm_state.soft_min_level =
 					data->smc_state_table.mem_boot_level;
@@ -3562,7 +3568,8 @@ static int vega10_upload_dpm_bootup_level(struct pp_hwmgr *hwmgr)
 				data->dpm_table.soc_table.dpm_state.soft_min_level) {
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSoftMinSocclkByIndex,
-				data->smc_state_table.soc_boot_level);
+				data->smc_state_table.soc_boot_level,
+				NULL);
 			data->dpm_table.soc_table.dpm_state.soft_min_level =
 					data->smc_state_table.soc_boot_level;
 		}
@@ -3582,7 +3589,8 @@ static int vega10_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 			data->dpm_table.gfx_table.dpm_state.soft_max_level) {
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSoftMaxGfxclkByIndex,
-				data->smc_state_table.gfx_max_level);
+				data->smc_state_table.gfx_max_level,
+				NULL);
 			data->dpm_table.gfx_table.dpm_state.soft_max_level =
 					data->smc_state_table.gfx_max_level;
 		}
@@ -3593,7 +3601,8 @@ static int vega10_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 			data->dpm_table.mem_table.dpm_state.soft_max_level) {
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetSoftMaxUclkByIndex,
-					data->smc_state_table.mem_max_level);
+					data->smc_state_table.mem_max_level,
+					NULL);
 			data->dpm_table.mem_table.dpm_state.soft_max_level =
 					data->smc_state_table.mem_max_level;
 		}
@@ -3607,7 +3616,8 @@ static int vega10_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 			data->dpm_table.soc_table.dpm_state.soft_max_level) {
 			smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSoftMaxSocclkByIndex,
-				data->smc_state_table.soc_max_level);
+				data->smc_state_table.soc_max_level,
+				NULL);
 			data->dpm_table.soc_table.dpm_state.soft_max_level =
 					data->smc_state_table.soc_max_level;
 		}
@@ -3694,7 +3704,8 @@ static int vega10_update_sclk_threshold(struct pp_hwmgr *hwmgr)
 		/* This message will also enable SmcToHost Interrupt */
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetLowGfxclkInterruptThreshold,
-				(uint32_t)low_sclk_interrupt_threshold);
+				(uint32_t)low_sclk_interrupt_threshold,
+				NULL);
 	}
 
 	return 0;
@@ -3801,8 +3812,7 @@ static int vega10_get_gpu_power(struct pp_hwmgr *hwmgr,
 	if (!query)
 		return -EINVAL;
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrPkgPwr);
-	value = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrPkgPwr, &value);
 
 	/* SMC returning actual watts, keep consistent with legacy asics, low 8 bit as 8 fractional bits */
 	*query = value << 8;
@@ -3822,13 +3832,11 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 
 	switch (idx) {
 	case AMDGPU_PP_SENSOR_GFX_SCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetAverageGfxclkActualFrequency);
-		sclk_mhz = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetAverageGfxclkActualFrequency, &sclk_mhz);
 		*((uint32_t *)value) = sclk_mhz * 100;
 		break;
 	case AMDGPU_PP_SENSOR_GFX_MCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex);
-		mclk_idx = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex, &mclk_idx);
 		if (mclk_idx < dpm_table->mem_table.count) {
 			*((uint32_t *)value) = dpm_table->mem_table.dpm_levels[mclk_idx].value;
 			*size = 4;
@@ -3837,8 +3845,8 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		}
 		break;
 	case AMDGPU_PP_SENSOR_GPU_LOAD:
-		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetAverageGfxActivity, 0);
-		activity_percent = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetAverageGfxActivity, 0,
+						&activity_percent);
 		*((uint32_t *)value) = activity_percent > 100 ? 100 : activity_percent;
 		*size = 4;
 		break;
@@ -3847,14 +3855,14 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot);
-		*((uint32_t *)value) = smum_get_argument(hwmgr) *
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot, (uint32_t *)value);
+		*((uint32_t *)value) = *((uint32_t *)value) *
 			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_MEM_TEMP:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM);
-		*((uint32_t *)value) = smum_get_argument(hwmgr) *
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM, (uint32_t *)value);
+		*((uint32_t *)value) = *((uint32_t *)value) *
 			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		*size = 4;
 		break;
@@ -3893,7 +3901,8 @@ static void vega10_notify_smc_display_change(struct pp_hwmgr *hwmgr,
 {
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetUclkFastSwitch,
-			has_disp ? 1 : 0);
+			has_disp ? 1 : 0,
+			NULL);
 }
 
 int vega10_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
@@ -3928,7 +3937,8 @@ int vega10_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 		clk_request = (clk_freq << 16) | clk_select;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_RequestDisplayClockByFreq,
-				clk_request);
+				clk_request,
+				NULL);
 	}
 
 	return result;
@@ -3990,7 +4000,8 @@ static int vega10_notify_smc_display_config_after_ps_adjustment(
 		if (!vega10_display_clock_voltage_request(hwmgr, &clock_req)) {
 			smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetMinDeepSleepDcefclk,
-					min_clocks.dcefClockInSR / 100);
+					min_clocks.dcefClockInSR / 100,
+					NULL);
 		} else {
 			pr_info("Attempt to set Hard Min for DCEFCLK Failed!");
 		}
@@ -4000,7 +4011,8 @@ static int vega10_notify_smc_display_config_after_ps_adjustment(
 
 	if (min_clocks.memoryClock != 0) {
 		idx = vega10_get_uclk_index(hwmgr, mclk_table, min_clocks.memoryClock);
-		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMinUclkByIndex, idx);
+		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMinUclkByIndex, idx,
+						NULL);
 		data->dpm_table.mem_table.dpm_state.soft_min_level= idx;
 	}
 
@@ -4541,8 +4553,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
 		if (data->registry_data.sclk_dpm_key_disabled)
 			break;
 
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentGfxclkIndex);
-		now = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentGfxclkIndex, &now);
 
 		if (hwmgr->pp_one_vf &&
 		    (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK))
@@ -4558,8 +4569,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
 		if (data->registry_data.mclk_dpm_key_disabled)
 			break;
 
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex);
-		now = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex, &now);
 
 		for (i = 0; i < mclk_table->count; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n",
@@ -4570,8 +4580,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
 		if (data->registry_data.socclk_dpm_key_disabled)
 			break;
 
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentSocclkIndex);
-		now = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentSocclkIndex, &now);
 
 		for (i = 0; i < soc_table->count; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n",
@@ -4583,8 +4592,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
 			break;
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_GetClockFreqMHz, CLK_DCEFCLK);
-		now = smum_get_argument(hwmgr);
+				PPSMC_MSG_GetClockFreqMHz, CLK_DCEFCLK, &now);
 
 		for (i = 0; i < dcef_table->count; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n",
@@ -4593,8 +4601,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
 					"*" : "");
 		break;
 	case PP_PCIE:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentLinkIndex);
-		now = smum_get_argument(hwmgr);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentLinkIndex, &now);
 
 		for (i = 0; i < pcie_table->count; i++)
 			size += sprintf(buf + size, "%d: %s %s\n", i,
@@ -4658,7 +4665,8 @@ static int vega10_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
 
 	if (data->water_marks_bitmap & WaterMarksLoaded) {
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display);
+			PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display,
+			NULL);
 	}
 
 	return result;
@@ -4924,21 +4932,26 @@ static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
 {
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetSystemVirtualDramAddrHigh,
-					virtual_addr_hi);
+					virtual_addr_hi,
+					NULL);
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetSystemVirtualDramAddrLow,
-					virtual_addr_low);
+					virtual_addr_low,
+					NULL);
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramLogSetDramAddrHigh,
-					mc_addr_hi);
+					mc_addr_hi,
+					NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramLogSetDramAddrLow,
-					mc_addr_low);
+					mc_addr_low,
+					NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramLogSetDramSize,
-					size);
+					size,
+					NULL);
 	return 0;
 }
 
@@ -5040,12 +5053,14 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetCustomGfxDpmParameters,
 					busy_set_point | FPS<<8 |
-					use_rlc_busy << 16 | min_active_level<<24);
+					use_rlc_busy << 16 | min_active_level<<24,
+					NULL);
 	}
 
 out:
 	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask,
-						1 << power_profile_mode);
+						1 << power_profile_mode,
+						NULL);
 	hwmgr->power_profile_mode = power_profile_mode;
 
 	return 0;
@@ -5302,7 +5317,7 @@ static int vega10_set_mp1_state(struct pp_hwmgr *hwmgr,
 		return 0;
 	}
 
-	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr, msg)) == 0,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr, msg, NULL)) == 0,
 			    "[PrepareMp1] Failed!",
 			    return ret);
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
index 0a677d4bc87b..9757d47dd6b8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
@@ -651,18 +651,6 @@ static const struct vega10_didt_config_reg   PSMSEEDCStallDelayConfig_Vega10[] =
 	{   0xFFFFFFFF  }  /* End of list */
 };
 
-static const struct vega10_didt_config_reg   PSMSEEDCThresholdConfig_Vega10[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ EDC THRESHOLD */
-	{   ixDIDT_SQ_EDC_THRESHOLD,           DIDT_SQ_EDC_THRESHOLD__EDC_THRESHOLD_MASK,           DIDT_SQ_EDC_THRESHOLD__EDC_THRESHOLD__SHIFT,            0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
 static const struct vega10_didt_config_reg   PSMSEEDCCtrlResetConfig_Vega10[] =
 {
 /* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
@@ -707,17 +695,6 @@ static const struct vega10_didt_config_reg   PSMSEEDCCtrlConfig_Vega10[] =
 	{   0xFFFFFFFF  }  /* End of list */
 };
 
-static const struct vega10_didt_config_reg   PSMGCEDCThresholdConfig_vega10[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	{   mmGC_EDC_THRESHOLD,                GC_EDC_THRESHOLD__EDC_THRESHOLD_MASK,                GC_EDC_THRESHOLD__EDC_THRESHOLD__SHIFT,                 0x0000000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
 static const struct vega10_didt_config_reg   PSMGCEDCDroopCtrlConfig_vega10[] =
 {
 /* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
@@ -925,7 +902,8 @@ static void vega10_didt_set_mask(struct pp_hwmgr *hwmgr, const bool enable)
 
 	/* For Vega10, SMC does not support any mask yet. */
 	if (enable)
-		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ConfigureGfxDidt, didt_block_info);
+		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ConfigureGfxDidt, didt_block_info,
+						NULL);
 
 }
 
@@ -1327,7 +1305,8 @@ int vega10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
 
 	if (data->registry_data.enable_pkg_pwr_tracking_feature)
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_SetPptLimit, n);
+				PPSMC_MSG_SetPptLimit, n,
+				NULL);
 
 	return 0;
 }
@@ -1393,7 +1372,8 @@ static void vega10_set_overdrive_target_percentage(struct pp_hwmgr *hwmgr,
 		uint32_t adjust_percent)
 {
 	smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_OverDriveSetPercentage, adjust_percent);
+			PPSMC_MSG_OverDriveSetPercentage, adjust_percent,
+			NULL);
 }
 
 int vega10_power_control_set_level(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
index ba8763daa380..7783c7fd7ccb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
@@ -31,8 +31,7 @@
 
 static int vega10_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm)
 {
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentRpm);
-	*current_rpm = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentRpm, current_rpm);
 	return 0;
 }
 
@@ -520,7 +519,8 @@ int vega10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr)
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetFanTemperatureTarget,
-				(uint32_t)table->FanTargetTemperature);
+				(uint32_t)table->FanTargetTemperature,
+				NULL);
 
 	table->FanPwmMin = hwmgr->thermal_controller.
 			advanceFanControlParameters.usPWMMin * 255 / 100;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_baco.c
index 9d8ca94a8f0c..bc53cce4f32d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_baco.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_baco.c
@@ -96,7 +96,7 @@ int vega12_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
 	if (state == BACO_STATE_IN) {
 		if (soc15_baco_program_registers(hwmgr, pre_baco_tbl,
 					     ARRAY_SIZE(pre_baco_tbl))) {
-			if (smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnterBaco, 0))
+			if (smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnterBaco, 0, NULL))
 				return -EINVAL;
 
 			if (soc15_baco_program_registers(hwmgr, enter_baco_tbl,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index aca61d1ff3c2..f4d1692cccf3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -357,10 +357,8 @@ static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 	}
 
 	/* Get the SN to turn into a Unique ID */
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
-	top32 = smum_get_argument(hwmgr);
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
-	bottom32 = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32, &top32);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32, &bottom32);
 
 	adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
 }
@@ -483,16 +481,12 @@ static int vega12_get_number_of_dpm_level(struct pp_hwmgr *hwmgr,
 
 	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_GetDpmFreqByIndex,
-			(clk_id << 16 | 0xFF));
+			(clk_id << 16 | 0xFF),
+			num_of_levels);
 	PP_ASSERT_WITH_CODE(!ret,
 			"[GetNumOfDpmLevel] failed to get dpm levels!",
 			return ret);
 
-	*num_of_levels = smum_get_argument(hwmgr);
-	PP_ASSERT_WITH_CODE(*num_of_levels > 0,
-			"[GetNumOfDpmLevel] number of clk levels is invalid!",
-			return -EINVAL);
-
 	return ret;
 }
 
@@ -504,12 +498,11 @@ static int vega12_get_dpm_frequency_by_index(struct pp_hwmgr *hwmgr,
 	 *Lower 16 bits specify the level
 	 */
 	PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
-		PPSMC_MSG_GetDpmFreqByIndex, (clkID << 16 | index)) == 0,
+		PPSMC_MSG_GetDpmFreqByIndex, (clkID << 16 | index),
+		clock) == 0,
 		"[GetDpmFrequencyByIndex] Failed to get dpm frequency from SMU!",
 		return -EINVAL);
 
-	*clock = smum_get_argument(hwmgr);
-
 	return 0;
 }
 
@@ -749,7 +742,8 @@ static int vega12_init_smc_table(struct pp_hwmgr *hwmgr)
 		data->vbios_boot_state.vclock = boot_up_values.ulVClk;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetMinDeepSleepDcefclk,
-			(uint32_t)(data->vbios_boot_state.dcef_clock / 100));
+			(uint32_t)(data->vbios_boot_state.dcef_clock / 100),
+				NULL);
 	}
 
 	memcpy(pp_table, pptable_information->smc_pptable, sizeof(PPTable_t));
@@ -767,11 +761,10 @@ static int vega12_run_acg_btc(struct pp_hwmgr *hwmgr)
 	uint32_t result;
 
 	PP_ASSERT_WITH_CODE(
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgBtc) == 0,
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgBtc, &result) == 0,
 		"[Run_ACG_BTC] Attempt to run ACG BTC failed!",
 		return -EINVAL);
 
-	result = smum_get_argument(hwmgr);
 	PP_ASSERT_WITH_CODE(result == 1,
 			"Failed to run ACG BTC!", return -EINVAL);
 
@@ -792,12 +785,14 @@ static int vega12_set_allowed_featuresmask(struct pp_hwmgr *hwmgr)
 				(allowed_features_low |= ((data->smu_features[i].smu_feature_bitmap >> SMU_FEATURES_LOW_SHIFT) & 0xFFFFFFFF));
 
 	PP_ASSERT_WITH_CODE(
-		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetAllowedFeaturesMaskHigh, allowed_features_high) == 0,
+		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetAllowedFeaturesMaskHigh, allowed_features_high,
+			NULL) == 0,
 		"[SetAllowedFeaturesMask] Attempt to set allowed features mask (high) failed!",
 		return -1);
 
 	PP_ASSERT_WITH_CODE(
-		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetAllowedFeaturesMaskLow, allowed_features_low) == 0,
+		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetAllowedFeaturesMaskLow, allowed_features_low,
+			NULL) == 0,
 		"[SetAllowedFeaturesMask] Attempt to set allowed features mask (low) failed!",
 		return -1);
 
@@ -828,7 +823,7 @@ static int vega12_enable_all_smu_features(struct pp_hwmgr *hwmgr)
 	bool enabled;
 
 	PP_ASSERT_WITH_CODE(
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAllSmuFeatures) == 0,
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAllSmuFeatures, NULL) == 0,
 		"[EnableAllSMUFeatures] Failed to enable all smu features!",
 		return -1);
 
@@ -854,7 +849,7 @@ static int vega12_disable_all_smu_features(struct pp_hwmgr *hwmgr)
 	bool enabled;
 
 	PP_ASSERT_WITH_CODE(
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableAllSmuFeatures) == 0,
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableAllSmuFeatures, NULL) == 0,
 		"[DisableAllSMUFeatures] Failed to disable all smu features!",
 		return -1);
 
@@ -879,7 +874,8 @@ static int vega12_set_overdrive_target_percentage(struct pp_hwmgr *hwmgr,
 		uint32_t adjust_percent)
 {
 	return smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_OverDriveSetPercentage, adjust_percent);
+			PPSMC_MSG_OverDriveSetPercentage, adjust_percent,
+			NULL);
 }
 
 static int vega12_power_control_set_level(struct pp_hwmgr *hwmgr)
@@ -902,24 +898,24 @@ static int vega12_get_all_clock_ranges_helper(struct pp_hwmgr *hwmgr,
 {
 	/* AC Max */
 	PP_ASSERT_WITH_CODE(
-		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetMaxDpmFreq, (clkid << 16)) == 0,
+		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetMaxDpmFreq, (clkid << 16),
+			&(clock->ACMax)) == 0,
 		"[GetClockRanges] Failed to get max ac clock from SMC!",
 		return -EINVAL);
-	clock->ACMax = smum_get_argument(hwmgr);
 
 	/* AC Min */
 	PP_ASSERT_WITH_CODE(
-		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetMinDpmFreq, (clkid << 16)) == 0,
+		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetMinDpmFreq, (clkid << 16),
+			&(clock->ACMin)) == 0,
 		"[GetClockRanges] Failed to get min ac clock from SMC!",
 		return -EINVAL);
-	clock->ACMin = smum_get_argument(hwmgr);
 
 	/* DC Max */
 	PP_ASSERT_WITH_CODE(
-		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetDcModeMaxDpmFreq, (clkid << 16)) == 0,
+		smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetDcModeMaxDpmFreq, (clkid << 16),
+			&(clock->DCMax)) == 0,
 		"[GetClockRanges] Failed to get max dc clock from SMC!",
 		return -EINVAL);
-	clock->DCMax = smum_get_argument(hwmgr);
 
 	return 0;
 }
@@ -944,7 +940,7 @@ static int vega12_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	int tmp_result, result = 0;
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_NumOfDisplays, 0);
+			PPSMC_MSG_NumOfDisplays, 0, NULL);
 
 	result = vega12_set_allowed_featuresmask(hwmgr);
 	PP_ASSERT_WITH_CODE(result == 0,
@@ -1043,7 +1039,8 @@ static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 		min_freq = data->dpm_table.gfx_table.dpm_state.soft_min_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_GFXCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_GFXCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min gfxclk !",
 					return ret);
 	}
@@ -1052,14 +1049,16 @@ static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 		min_freq = data->dpm_table.mem_table.dpm_state.soft_min_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_UCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_UCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min memclk !",
 					return ret);
 
 		min_freq = data->dpm_table.mem_table.dpm_state.hard_min_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetHardMinByFreq,
-					(PPCLK_UCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_UCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set hard min memclk !",
 					return ret);
 	}
@@ -1069,7 +1068,8 @@ static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_VCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_VCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min vclk!",
 					return ret);
 
@@ -1077,7 +1077,8 @@ static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_DCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_DCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min dclk!",
 					return ret);
 	}
@@ -1087,7 +1088,8 @@ static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_ECLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_ECLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min eclk!",
 					return ret);
 	}
@@ -1097,7 +1099,8 @@ static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_SOCCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_SOCCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min socclk!",
 					return ret);
 	}
@@ -1107,7 +1110,8 @@ static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetHardMinByFreq,
-					(PPCLK_DCEFCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_DCEFCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set hard min dcefclk!",
 					return ret);
 	}
@@ -1127,7 +1131,8 @@ static int vega12_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_GFXCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_GFXCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max gfxclk!",
 					return ret);
 	}
@@ -1137,7 +1142,8 @@ static int vega12_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_UCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_UCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max memclk!",
 					return ret);
 	}
@@ -1147,14 +1153,16 @@ static int vega12_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_VCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_VCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max vclk!",
 					return ret);
 
 		max_freq = data->dpm_table.dclk_table.dpm_state.soft_max_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_DCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_DCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max dclk!",
 					return ret);
 	}
@@ -1164,7 +1172,8 @@ static int vega12_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_ECLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_ECLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max eclk!",
 					return ret);
 	}
@@ -1174,7 +1183,8 @@ static int vega12_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_SOCCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_SOCCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max socclk!",
 					return ret);
 	}
@@ -1287,10 +1297,10 @@ static int vega12_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx
 	*gfx_freq = 0;
 
 	PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_GetDpmClockFreq, (PPCLK_GFXCLK << 16)) == 0,
+			PPSMC_MSG_GetDpmClockFreq, (PPCLK_GFXCLK << 16),
+			&gfx_clk) == 0,
 			"[GetCurrentGfxClkFreq] Attempt to get Current GFXCLK Frequency Failed!",
 			return -EINVAL);
-	gfx_clk = smum_get_argument(hwmgr);
 
 	*gfx_freq = gfx_clk * 100;
 
@@ -1304,10 +1314,10 @@ static int vega12_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_f
 	*mclk_freq = 0;
 
 	PP_ASSERT_WITH_CODE(
-			smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetDpmClockFreq, (PPCLK_UCLK << 16)) == 0,
+			smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetDpmClockFreq, (PPCLK_UCLK << 16),
+				&mem_clk) == 0,
 			"[GetCurrentMClkFreq] Attempt to get Current MCLK Frequency Failed!",
 			return -EINVAL);
-	mem_clk = smum_get_argument(hwmgr);
 
 	*mclk_freq = mem_clk * 100;
 
@@ -1420,7 +1430,8 @@ static int vega12_notify_smc_display_change(struct pp_hwmgr *hwmgr,
 	if (data->smu_features[GNLD_DPM_UCLK].enabled)
 		return smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetUclkFastSwitch,
-			has_disp ? 1 : 0);
+			has_disp ? 1 : 0,
+			NULL);
 
 	return 0;
 }
@@ -1459,7 +1470,8 @@ int vega12_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 			clk_request = (clk_select << 16) | clk_freq;
 			result = smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetHardMinByFreq,
-					clk_request);
+					clk_request,
+					NULL);
 		}
 	}
 
@@ -1493,7 +1505,8 @@ static int vega12_notify_smc_display_config_after_ps_adjustment(
 				PP_ASSERT_WITH_CODE(
 					!smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetMinDeepSleepDcefclk,
-					min_clocks.dcefClockInSR /100),
+					min_clocks.dcefClockInSR /100,
+					NULL),
 					"Attempt to set divider for DCEFCLK Failed!",
 					return -1);
 		} else {
@@ -2124,10 +2137,10 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
 	case PP_SOCCLK:
 		PP_ASSERT_WITH_CODE(
 				smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_GetDpmClockFreq, (PPCLK_SOCCLK << 16)) == 0,
+					PPSMC_MSG_GetDpmClockFreq, (PPCLK_SOCCLK << 16),
+					&now) == 0,
 				"Attempt to get Current SOCCLK Frequency Failed!",
 				return -EINVAL);
-		now = smum_get_argument(hwmgr);
 
 		PP_ASSERT_WITH_CODE(
 				vega12_get_socclocks(hwmgr, &clocks) == 0,
@@ -2142,10 +2155,10 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
 	case PP_DCEFCLK:
 		PP_ASSERT_WITH_CODE(
 				smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_GetDpmClockFreq, (PPCLK_DCEFCLK << 16)) == 0,
+					PPSMC_MSG_GetDpmClockFreq, (PPCLK_DCEFCLK << 16),
+					&now) == 0,
 				"Attempt to get Current DCEFCLK Frequency Failed!",
 				return -EINVAL);
-		now = smum_get_argument(hwmgr);
 
 		PP_ASSERT_WITH_CODE(
 				vega12_get_dcefclocks(hwmgr, &clocks) == 0,
@@ -2343,7 +2356,8 @@ static int vega12_set_uclk_to_highest_dpm_level(struct pp_hwmgr *hwmgr,
 		dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetHardMinByFreq,
-				(PPCLK_UCLK << 16 ) | dpm_table->dpm_state.hard_min_level)),
+				(PPCLK_UCLK << 16 ) | dpm_table->dpm_state.hard_min_level,
+				NULL)),
 				"[SetUclkToHightestDpmLevel] Set hard min uclk failed!",
 				return ret);
 	}
@@ -2357,7 +2371,8 @@ static int vega12_pre_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
 	int ret = 0;
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_NumOfDisplays, 0);
+			PPSMC_MSG_NumOfDisplays, 0,
+			NULL);
 
 	ret = vega12_set_uclk_to_highest_dpm_level(hwmgr,
 			&data->dpm_table.mem_table);
@@ -2383,7 +2398,8 @@ static int vega12_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
 		data->smu_features[GNLD_DPM_DCEFCLK].supported &&
 		data->smu_features[GNLD_DPM_SOCCLK].supported)
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display);
+			PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display,
+			NULL);
 
 	return result;
 }
@@ -2555,21 +2571,26 @@ static int vega12_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
 {
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetSystemVirtualDramAddrHigh,
-					virtual_addr_hi);
+					virtual_addr_hi,
+					NULL);
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetSystemVirtualDramAddrLow,
-					virtual_addr_low);
+					virtual_addr_low,
+					NULL);
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramLogSetDramAddrHigh,
-					mc_addr_hi);
+					mc_addr_hi,
+					NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramLogSetDramAddrLow,
-					mc_addr_low);
+					mc_addr_low,
+					NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramLogSetDramSize,
-					size);
+					size,
+					NULL);
 	return 0;
 }
 
@@ -2605,7 +2626,7 @@ static int vega12_enable_gfx_off(struct pp_hwmgr *hwmgr)
 	int ret = 0;
 
 	if (data->gfxoff_controlled_by_driver)
-		ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_AllowGfxOff);
+		ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_AllowGfxOff, NULL);
 
 	return ret;
 }
@@ -2617,7 +2638,7 @@ static int vega12_disable_gfx_off(struct pp_hwmgr *hwmgr)
 	int ret = 0;
 
 	if (data->gfxoff_controlled_by_driver)
-		ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisallowGfxOff);
+		ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisallowGfxOff, NULL);
 
 	return ret;
 }
@@ -2654,7 +2675,7 @@ static int vega12_set_mp1_state(struct pp_hwmgr *hwmgr,
 		return 0;
 	}
 
-	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr, msg)) == 0,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr, msg, NULL)) == 0,
 			    "[PrepareMp1] Failed!",
 			    return ret);
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c
index 904eb2c9155b..c85806a6f62e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c
@@ -32,10 +32,10 @@
 static int vega12_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm)
 {
 	PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_GetCurrentRpm),
+				PPSMC_MSG_GetCurrentRpm,
+				current_rpm),
 			"Attempt to get current RPM from SMC Failed!",
 			return -EINVAL);
-	*current_rpm = smum_get_argument(hwmgr);
 
 	return 0;
 }
@@ -259,7 +259,8 @@ int vega12_thermal_setup_fan_table(struct pp_hwmgr *hwmgr)
 
 	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetFanTemperatureTarget,
-				(uint32_t)table->FanTargetTemperature);
+				(uint32_t)table->FanTargetTemperature,
+				NULL);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
index 9b5e72bdceca..2a28c9df15a0 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
@@ -91,16 +91,16 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
 			WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
 
 			if(smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_EnterBaco, 0))
+					PPSMC_MSG_EnterBaco, 0, NULL))
 				return -EINVAL;
 		} else {
 			if(smum_send_msg_to_smc_with_parameter(hwmgr,
-					PPSMC_MSG_EnterBaco, 1))
+					PPSMC_MSG_EnterBaco, 1, NULL))
 				return -EINVAL;
 		}
 
 	} else if (state == BACO_STATE_OUT) {
-		if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ExitBaco))
+		if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ExitBaco, NULL))
 			return -EINVAL;
 		if (!soc15_baco_program_registers(hwmgr, clean_baco_tbl,
 						     ARRAY_SIZE(clean_baco_tbl)))
@@ -118,5 +118,5 @@ int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr)
 	if (ret)
 		return ret;
 
-	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_BacoWorkAroundFlushVDCI);
+	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_BacoWorkAroundFlushVDCI, NULL);
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 08b6ba39a6d7..9ff470f1b826 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -92,8 +92,7 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
 	 */
 	data->registry_data.disallowed_features = 0xE0041C00;
 	/* ECC feature should be disabled on old SMUs */
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion);
-	hwmgr->smu_version = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion, &hwmgr->smu_version);
 	if (hwmgr->smu_version < 0x282100)
 		data->registry_data.disallowed_features |= FEATURE_ECC_MASK;
 
@@ -400,10 +399,8 @@ static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 	}
 
 	/* Get the SN to turn into a Unique ID */
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
-	top32 = smum_get_argument(hwmgr);
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
-	bottom32 = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32, &top32);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32, &bottom32);
 
 	adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
 }
@@ -527,16 +524,12 @@ static int vega20_get_number_of_dpm_level(struct pp_hwmgr *hwmgr,
 
 	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_GetDpmFreqByIndex,
-			(clk_id << 16 | 0xFF));
+			(clk_id << 16 | 0xFF),
+			num_of_levels);
 	PP_ASSERT_WITH_CODE(!ret,
 			"[GetNumOfDpmLevel] failed to get dpm levels!",
 			return ret);
 
-	*num_of_levels = smum_get_argument(hwmgr);
-	PP_ASSERT_WITH_CODE(*num_of_levels > 0,
-			"[GetNumOfDpmLevel] number of clk levels is invalid!",
-			return -EINVAL);
-
 	return ret;
 }
 
@@ -547,16 +540,12 @@ static int vega20_get_dpm_frequency_by_index(struct pp_hwmgr *hwmgr,
 
 	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_GetDpmFreqByIndex,
-			(clk_id << 16 | index));
+			(clk_id << 16 | index),
+			clk);
 	PP_ASSERT_WITH_CODE(!ret,
 			"[GetDpmFreqByIndex] failed to get dpm freq by index!",
 			return ret);
 
-	*clk = smum_get_argument(hwmgr);
-	PP_ASSERT_WITH_CODE(*clk,
-			"[GetDpmFreqByIndex] clk value is invalid!",
-			return -EINVAL);
-
 	return ret;
 }
 
@@ -813,7 +802,8 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr)
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetMinDeepSleepDcefclk,
-		(uint32_t)(data->vbios_boot_state.dcef_clock / 100));
+		(uint32_t)(data->vbios_boot_state.dcef_clock / 100),
+			NULL);
 
 	memcpy(pp_table, pptable_information->smc_pptable, sizeof(PPTable_t));
 
@@ -868,7 +858,8 @@ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr)
 	 */
 	smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width;
 	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_OverridePcieParameters, smu_pcie_arg);
+			PPSMC_MSG_OverridePcieParameters, smu_pcie_arg,
+			NULL);
 	PP_ASSERT_WITH_CODE(!ret,
 		"[OverridePcieParameters] Attempt to override pcie params failed!",
 		return ret);
@@ -899,13 +890,13 @@ static int vega20_set_allowed_featuresmask(struct pp_hwmgr *hwmgr)
 				  & 0xFFFFFFFF));
 
 	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-		PPSMC_MSG_SetAllowedFeaturesMaskHigh, allowed_features_high);
+		PPSMC_MSG_SetAllowedFeaturesMaskHigh, allowed_features_high, NULL);
 	PP_ASSERT_WITH_CODE(!ret,
 		"[SetAllowedFeaturesMask] Attempt to set allowed features mask(high) failed!",
 		return ret);
 
 	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-		PPSMC_MSG_SetAllowedFeaturesMaskLow, allowed_features_low);
+		PPSMC_MSG_SetAllowedFeaturesMaskLow, allowed_features_low, NULL);
 	PP_ASSERT_WITH_CODE(!ret,
 		"[SetAllowedFeaturesMask] Attempt to set allowed features mask (low) failed!",
 		return ret);
@@ -915,12 +906,12 @@ static int vega20_set_allowed_featuresmask(struct pp_hwmgr *hwmgr)
 
 static int vega20_run_btc(struct pp_hwmgr *hwmgr)
 {
-	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunBtc);
+	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunBtc, NULL);
 }
 
 static int vega20_run_btc_afll(struct pp_hwmgr *hwmgr)
 {
-	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAfllBtc);
+	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAfllBtc, NULL);
 }
 
 static int vega20_enable_all_smu_features(struct pp_hwmgr *hwmgr)
@@ -933,7 +924,8 @@ static int vega20_enable_all_smu_features(struct pp_hwmgr *hwmgr)
 	int ret = 0;
 
 	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_EnableAllSmuFeatures)) == 0,
+			PPSMC_MSG_EnableAllSmuFeatures,
+			NULL)) == 0,
 			"[EnableAllSMUFeatures] Failed to enable all smu features!",
 			return ret);
 
@@ -966,7 +958,8 @@ static int vega20_notify_smc_display_change(struct pp_hwmgr *hwmgr)
 	if (data->smu_features[GNLD_DPM_UCLK].enabled)
 		return smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetUclkFastSwitch,
-			1);
+			1,
+			NULL);
 
 	return 0;
 }
@@ -978,7 +971,8 @@ static int vega20_send_clock_ratio(struct pp_hwmgr *hwmgr)
 
 	return smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetFclkGfxClkRatio,
-			data->registry_data.fclk_gfxclk_ratio);
+			data->registry_data.fclk_gfxclk_ratio,
+			NULL);
 }
 
 static int vega20_disable_all_smu_features(struct pp_hwmgr *hwmgr)
@@ -991,7 +985,8 @@ static int vega20_disable_all_smu_features(struct pp_hwmgr *hwmgr)
 	int ret = 0;
 
 	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_DisableAllSmuFeatures)) == 0,
+			PPSMC_MSG_DisableAllSmuFeatures,
+			NULL)) == 0,
 			"[DisableAllSMUFeatures] Failed to disable all smu features!",
 			return ret);
 
@@ -1199,12 +1194,12 @@ static int vega20_od8_get_gfx_clock_base_voltage(
 
 	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_GetAVFSVoltageByDpm,
-			((AVFS_CURVE << 24) | (OD8_HOTCURVE_TEMPERATURE << 16) | freq));
+			((AVFS_CURVE << 24) | (OD8_HOTCURVE_TEMPERATURE << 16) | freq),
+			voltage);
 	PP_ASSERT_WITH_CODE(!ret,
 			"[GetBaseVoltage] failed to get GFXCLK AVFS voltage from SMU!",
 			return ret);
 
-	*voltage = smum_get_argument(hwmgr);
 	*voltage = *voltage / VOLTAGE_SCALE;
 
 	return 0;
@@ -1560,19 +1555,19 @@ static int vega20_get_max_sustainable_clock(struct pp_hwmgr *hwmgr,
 
 	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_GetDcModeMaxDpmFreq,
-			(clock_select << 16))) == 0,
+			(clock_select << 16),
+			clock)) == 0,
 			"[GetMaxSustainableClock] Failed to get max DC clock from SMC!",
 			return ret);
-	*clock = smum_get_argument(hwmgr);
 
 	/* if DC limit is zero, return AC limit */
 	if (*clock == 0) {
 		PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_GetMaxDpmFreq,
-			(clock_select << 16))) == 0,
+			(clock_select << 16),
+			clock)) == 0,
 			"[GetMaxSustainableClock] failed to get max AC clock from SMC!",
 			return ret);
-		*clock = smum_get_argument(hwmgr);
 	}
 
 	return 0;
@@ -1641,7 +1636,8 @@ static int vega20_enable_mgpu_fan_boost(struct pp_hwmgr *hwmgr)
 	int result;
 
 	result = smum_send_msg_to_smc(hwmgr,
-		PPSMC_MSG_SetMGpuFanBoostLimitRpm);
+		PPSMC_MSG_SetMGpuFanBoostLimitRpm,
+		NULL);
 	PP_ASSERT_WITH_CODE(!result,
 			"[EnableMgpuFan] Failed to enable mgpu fan boost!",
 			return result);
@@ -1669,7 +1665,7 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	int result = 0;
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_NumOfDisplays, 0);
+			PPSMC_MSG_NumOfDisplays, 0, NULL);
 
 	result = vega20_set_allowed_featuresmask(hwmgr);
 	PP_ASSERT_WITH_CODE(!result,
@@ -1740,12 +1736,12 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 			return result);
 
 	result = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetPptLimit,
-			POWER_SOURCE_AC << 16);
+			POWER_SOURCE_AC << 16, &hwmgr->default_power_limit);
 	PP_ASSERT_WITH_CODE(!result,
 			"[GetPptLimit] get default PPT limit failed!",
 			return result);
 	hwmgr->power_limit =
-		hwmgr->default_power_limit = smum_get_argument(hwmgr);
+		hwmgr->default_power_limit;
 
 	return 0;
 }
@@ -1806,7 +1802,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 		min_freq = data->dpm_table.gfx_table.dpm_state.soft_min_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_GFXCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_GFXCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min gfxclk !",
 					return ret);
 	}
@@ -1816,7 +1813,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 		min_freq = data->dpm_table.mem_table.dpm_state.soft_min_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_UCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_UCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min memclk !",
 					return ret);
 	}
@@ -1827,7 +1825,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_VCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_VCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min vclk!",
 					return ret);
 
@@ -1835,7 +1834,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_DCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_DCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min dclk!",
 					return ret);
 	}
@@ -1846,7 +1846,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_ECLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_ECLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min eclk!",
 					return ret);
 	}
@@ -1857,7 +1858,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_SOCCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_SOCCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min socclk!",
 					return ret);
 	}
@@ -1868,7 +1870,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
-					(PPCLK_FCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_FCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set soft min fclk!",
 					return ret);
 	}
@@ -1879,7 +1882,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetHardMinByFreq,
-					(PPCLK_DCEFCLK << 16) | (min_freq & 0xffff))),
+					(PPCLK_DCEFCLK << 16) | (min_freq & 0xffff),
+					NULL)),
 					"Failed to set hard min dcefclk!",
 					return ret);
 	}
@@ -1900,7 +1904,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_GFXCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_GFXCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max gfxclk!",
 					return ret);
 	}
@@ -1911,7 +1916,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_UCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_UCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max memclk!",
 					return ret);
 	}
@@ -1922,14 +1928,16 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_VCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_VCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max vclk!",
 					return ret);
 
 		max_freq = data->dpm_table.dclk_table.dpm_state.soft_max_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_DCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_DCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max dclk!",
 					return ret);
 	}
@@ -1940,7 +1948,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_ECLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_ECLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max eclk!",
 					return ret);
 	}
@@ -1951,7 +1960,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_SOCCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_SOCCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max socclk!",
 					return ret);
 	}
@@ -1962,7 +1972,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr, uint32_t feature_
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMaxByFreq,
-					(PPCLK_FCLK << 16) | (max_freq & 0xffff))),
+					(PPCLK_FCLK << 16) | (max_freq & 0xffff),
+					NULL)),
 					"Failed to set soft max fclk!",
 					return ret);
 	}
@@ -2006,17 +2017,17 @@ static int vega20_get_clock_ranges(struct pp_hwmgr *hwmgr,
 
 	if (max) {
 		PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_GetMaxDpmFreq, (clock_select << 16))) == 0,
+				PPSMC_MSG_GetMaxDpmFreq, (clock_select << 16),
+				clock)) == 0,
 				"[GetClockRanges] Failed to get max clock from SMC!",
 				return ret);
-		*clock = smum_get_argument(hwmgr);
 	} else {
 		PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_GetMinDpmFreq,
-				(clock_select << 16))) == 0,
+				(clock_select << 16),
+				clock)) == 0,
 				"[GetClockRanges] Failed to get min clock from SMC!",
 				return ret);
-		*clock = smum_get_argument(hwmgr);
 	}
 
 	return 0;
@@ -2122,10 +2133,10 @@ static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr,
 	*clk_freq = 0;
 
 	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_GetDpmClockFreq, (clk_id << 16))) == 0,
+			PPSMC_MSG_GetDpmClockFreq, (clk_id << 16),
+			clk_freq)) == 0,
 			"[GetCurrentClkFreq] Attempt to get Current Frequency Failed!",
 			return ret);
-	*clk_freq = smum_get_argument(hwmgr);
 
 	*clk_freq = *clk_freq * 100;
 
@@ -2276,7 +2287,8 @@ int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 			clk_request = (clk_select << 16) | clk_freq;
 			result = smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetHardMinByFreq,
-					clk_request);
+					clk_request,
+					NULL);
 		}
 	}
 
@@ -2312,7 +2324,8 @@ static int vega20_notify_smc_display_config_after_ps_adjustment(
 			if (data->smu_features[GNLD_DS_DCEFCLK].supported)
 				PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetMinDeepSleepDcefclk,
-					min_clocks.dcefClockInSR / 100)) == 0,
+					min_clocks.dcefClockInSR / 100,
+					NULL)) == 0,
 					"Attempt to set divider for DCEFCLK Failed!",
 					return ret);
 		} else {
@@ -2324,7 +2337,8 @@ static int vega20_notify_smc_display_config_after_ps_adjustment(
 		dpm_table->dpm_state.hard_min_level = min_clocks.memoryClock / 100;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetHardMinByFreq,
-				(PPCLK_UCLK << 16 ) | dpm_table->dpm_state.hard_min_level)),
+				(PPCLK_UCLK << 16 ) | dpm_table->dpm_state.hard_min_level,
+				NULL)),
 				"[SetHardMinFreq] Set hard min uclk failed!",
 				return ret);
 	}
@@ -2656,7 +2670,8 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr,
 			return -EINVAL;
 
 		ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_SetMinLinkDpmByIndex, soft_min_level);
+			PPSMC_MSG_SetMinLinkDpmByIndex, soft_min_level,
+			NULL);
 		PP_ASSERT_WITH_CODE(!ret,
 			"Failed to set min link dpm level!",
 			return ret);
@@ -3140,7 +3155,7 @@ static int vega20_set_mp1_state(struct pp_hwmgr *hwmgr,
 		return 0;
 	}
 
-	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr, msg)) == 0,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr, msg, NULL)) == 0,
 			    "[PrepareMp1] Failed!",
 			    return ret);
 
@@ -3495,7 +3510,8 @@ static int vega20_set_uclk_to_highest_dpm_level(struct pp_hwmgr *hwmgr,
 		dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetHardMinByFreq,
-				(PPCLK_UCLK << 16 ) | dpm_table->dpm_state.hard_min_level)),
+				(PPCLK_UCLK << 16 ) | dpm_table->dpm_state.hard_min_level,
+				NULL)),
 				"[SetUclkToHightestDpmLevel] Set hard min uclk failed!",
 				return ret);
 	}
@@ -3520,7 +3536,8 @@ static int vega20_set_fclk_to_highest_dpm_level(struct pp_hwmgr *hwmgr)
 		dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetSoftMinByFreq,
-				(PPCLK_FCLK << 16 ) | dpm_table->dpm_state.soft_min_level)),
+				(PPCLK_FCLK << 16 ) | dpm_table->dpm_state.soft_min_level,
+				NULL)),
 				"[SetFclkToHightestDpmLevel] Set soft min fclk failed!",
 				return ret);
 	}
@@ -3534,7 +3551,7 @@ static int vega20_pre_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
 	int ret = 0;
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_NumOfDisplays, 0);
+			PPSMC_MSG_NumOfDisplays, 0, NULL);
 
 	ret = vega20_set_uclk_to_highest_dpm_level(hwmgr,
 			&data->dpm_table.mem_table);
@@ -3565,7 +3582,8 @@ static int vega20_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
 	    data->smu_features[GNLD_DPM_SOCCLK].supported) {
 		result = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_NumOfDisplays,
-			hwmgr->display_config->num_display);
+			hwmgr->display_config->num_display,
+			NULL);
 	}
 
 	return result;
@@ -4082,7 +4100,8 @@ out:
 	workload_type =
 		conv_power_profile_to_pplib_workload(power_profile_mode);
 	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask,
-						1 << workload_type);
+						1 << workload_type,
+						NULL);
 
 	hwmgr->power_profile_mode = power_profile_mode;
 
@@ -4098,21 +4117,26 @@ static int vega20_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
 {
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetSystemVirtualDramAddrHigh,
-					virtual_addr_hi);
+					virtual_addr_hi,
+					NULL);
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetSystemVirtualDramAddrLow,
-					virtual_addr_low);
+					virtual_addr_low,
+					NULL);
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramLogSetDramAddrHigh,
-					mc_addr_hi);
+					mc_addr_hi,
+					NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramLogSetDramAddrLow,
-					mc_addr_low);
+					mc_addr_low,
+					NULL);
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DramLogSetDramSize,
-					size);
+					size,
+					NULL);
 	return 0;
 }
 
@@ -4153,7 +4177,8 @@ static int vega20_smu_i2c_bus_access(struct pp_hwmgr *hwmgr, bool acquire)
 						  (acquire ?
 						  PPSMC_MSG_RequestI2CBus :
 						  PPSMC_MSG_ReleaseI2CBus),
-						  0);
+						  0,
+						  NULL);
 
 	PP_ASSERT_WITH_CODE(!res, "[SmuI2CAccessBus] Failed to access bus!", return res);
 	return res;
@@ -4170,7 +4195,8 @@ static int vega20_set_df_cstate(struct pp_hwmgr *hwmgr,
 		return -EINVAL;
 	}
 
-	ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DFCstateControl, state);
+	ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DFCstateControl, state,
+				NULL);
 	if (ret)
 		pr_err("SetDfCstate failed!\n");
 
@@ -4184,7 +4210,8 @@ static int vega20_set_xgmi_pstate(struct pp_hwmgr *hwmgr,
 
 	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 						  PPSMC_MSG_SetXgmiMode,
-						  pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3);
+						  pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3,
+						  NULL);
 	if (ret)
 		pr_err("SetXgmiPstate failed!\n");
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_powertune.c
index a0bfb65cc5d6..d7cc3d2d9e17 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_powertune.c
@@ -36,7 +36,8 @@ int vega20_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
 
 	if (data->smu_features[GNLD_PPT].enabled)
 		return smum_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_SetPptLimit, n);
+				PPSMC_MSG_SetPptLimit, n,
+				NULL);
 
 	return 0;
 }
@@ -51,7 +52,8 @@ static int vega20_set_overdrive_target_percentage(struct pp_hwmgr *hwmgr,
 		uint32_t adjust_percent)
 {
 	return smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_OverDriveSetPercentage, adjust_percent);
+			PPSMC_MSG_OverDriveSetPercentage, adjust_percent,
+			NULL);
 }
 
 int vega20_power_control_set_level(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_thermal.c
index ede54e87e287..7add2f60f49c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_thermal.c
@@ -106,10 +106,10 @@ static int vega20_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm)
 	int ret = 0;
 
 	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr,
-				PPSMC_MSG_GetCurrentRpm)) == 0,
+				PPSMC_MSG_GetCurrentRpm,
+				current_rpm)) == 0,
 			"Attempt to get current RPM from SMC Failed!",
 			return ret);
-	*current_rpm = smum_get_argument(hwmgr);
 
 	return 0;
 }
@@ -329,7 +329,8 @@ static int vega20_thermal_setup_fan_table(struct pp_hwmgr *hwmgr)
 
 	ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetFanTemperatureTarget,
-				(uint32_t)table->FanTargetTemperature);
+				(uint32_t)table->FanTargetTemperature,
+				NULL);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index ae2c318dd6fa..4d1c2a44a8b6 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -405,7 +405,9 @@ struct smu_context
 	bool pm_enabled;
 	bool is_apu;
 
-	uint32_t smc_if_version;
+	uint32_t smc_driver_if_version;
+	uint32_t smc_fw_if_version;
+	uint32_t smc_fw_version;
 
 	bool uploading_custom_pp_table;
 	bool dc_controlled_by_gpio;
@@ -489,6 +491,7 @@ struct pptable_funcs {
 	int (*get_dpm_clk_limited)(struct smu_context *smu, enum smu_clk_type clk_type,
 				   uint32_t dpm_level, uint32_t *freq);
 	int (*set_df_cstate)(struct smu_context *smu, enum pp_df_cstate state);
+	int (*allow_xgmi_power_down)(struct smu_context *smu, bool en);
 	int (*update_pcie_parameters)(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap);
 	int (*i2c_eeprom_init)(struct i2c_adapter *control);
 	void (*i2c_eeprom_fini)(struct i2c_adapter *control);
@@ -580,11 +583,6 @@ int smu_check_fw_status(struct smu_context *smu);
 
 int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
 
-#define smu_i2c_eeprom_init(smu, control) \
-		((smu)->ppt_funcs->i2c_eeprom_init ? (smu)->ppt_funcs->i2c_eeprom_init((control)) : -EINVAL)
-#define smu_i2c_eeprom_fini(smu, control) \
-		((smu)->ppt_funcs->i2c_eeprom_fini ? (smu)->ppt_funcs->i2c_eeprom_fini((control)) : -EINVAL)
-
 int smu_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed);
 
 int smu_get_power_limit(struct smu_context *smu,
@@ -734,6 +732,7 @@ int smu_set_mp1_state(struct smu_context *smu,
 		      enum pp_mp1_state mp1_state);
 int smu_set_df_cstate(struct smu_context *smu,
 		      enum pp_df_cstate state);
+int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
 
 int smu_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
 					 struct pp_smu_nv_clock_table *max_clocks);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h
index f736d773f9d6..e07478b6ac04 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h
@@ -114,7 +114,8 @@
 #define PPSMC_MSG_SetNumBadHbmPagesRetired	 0x3A
 
 #define PPSMC_MSG_DFCstateControl		 0x3B
-#define PPSMC_Message_Count			 0x3C
+#define PPSMC_MSG_GmiPwrDnControl                0x3D
+#define PPSMC_Message_Count                      0x3E
 
 typedef uint32_t PPSMC_Result;
 typedef uint32_t PPSMC_Msg;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 2ffb666b97e6..15ed6cbdf366 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -743,6 +743,7 @@ struct pp_hwmgr {
 	bool pm_en;
 	bool pp_one_vf;
 	struct mutex smu_lock;
+	struct mutex msg_lock;
 
 	uint32_t pp_table_version;
 	void *device;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
index ce5b5011c122..8b82059d97e7 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
@@ -82,8 +82,8 @@
 // Other
 #define FEATURE_OUT_OF_BAND_MONITOR_BIT 24
 #define FEATURE_TEMP_DEPENDENT_VMIN_BIT 25
+#define FEATURE_PER_PART_VMIN_BIT       26
 
-#define FEATURE_SPARE_26_BIT            26
 #define FEATURE_SPARE_27_BIT            27
 #define FEATURE_SPARE_28_BIT            28
 #define FEATURE_SPARE_29_BIT            29
@@ -154,6 +154,7 @@
 
 #define FEATURE_OUT_OF_BAND_MONITOR_MASK  (1 << FEATURE_OUT_OF_BAND_MONITOR_BIT   )
 #define FEATURE_TEMP_DEPENDENT_VMIN_MASK  (1 << FEATURE_TEMP_DEPENDENT_VMIN_BIT )
+#define FEATURE_PER_PART_VMIN_MASK        (1 << FEATURE_PER_PART_VMIN_BIT        )
 
 
 //FIXME need updating
@@ -628,8 +629,14 @@ typedef struct {
   uint16_t BasePerformanceFrequencyCap;   //In Mhz
   uint16_t MaxPerformanceFrequencyCap;    //In Mhz
 
+  // Per-Part Vmin
+  uint16_t VDDGFX_VminLow;        // mv Q2
+  uint16_t VDDGFX_TVminLow;       //Celcius
+  uint16_t VDDGFX_VminLow_HiTemp; // mv Q2
+  uint16_t VDDGFX_VminLow_LoTemp; // mv Q2
+
   // SECTION: Reserved
-  uint32_t     Reserved[9];
+  uint32_t     Reserved[7];
 
   // SECTION: BOARD PARAMETERS
 
@@ -869,6 +876,10 @@ typedef struct {
   uint8_t   Mem_DownHystLimit;
   uint16_t  Mem_Fps;
 
+  uint32_t  BusyThreshold;                  // Q16
+  uint32_t  BusyHyst;
+  uint32_t  IdleHyst;
+
   uint32_t  MmHubPadding[8]; // SMU internal use
 } DpmActivityMonitorCoeffInt_t;
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h
index 2f85a34c0591..e9315eb5b48e 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h
@@ -27,7 +27,7 @@
 // *** IMPORTANT ***
 // SMU TEAM: Always increment the interface version if 
 // any structure is changed in this file
-#define SMU12_DRIVER_IF_VERSION 11
+#define SMU12_DRIVER_IF_VERSION 14
 
 typedef struct {
   int32_t value;
@@ -154,15 +154,19 @@ typedef enum {
 } CLOCK_IDs_e;
 
 // Throttler Status Bitmask
-#define THROTTLER_STATUS_BIT_SPL        0
-#define THROTTLER_STATUS_BIT_FPPT       1
-#define THROTTLER_STATUS_BIT_SPPT       2
-#define THROTTLER_STATUS_BIT_SPPT_APU   3
-#define THROTTLER_STATUS_BIT_THM_CORE   4
-#define THROTTLER_STATUS_BIT_THM_GFX    5
-#define THROTTLER_STATUS_BIT_THM_SOC    6
-#define THROTTLER_STATUS_BIT_TDC_VDD    7
-#define THROTTLER_STATUS_BIT_TDC_SOC    8
+#define THROTTLER_STATUS_BIT_SPL            0
+#define THROTTLER_STATUS_BIT_FPPT           1
+#define THROTTLER_STATUS_BIT_SPPT           2
+#define THROTTLER_STATUS_BIT_SPPT_APU       3
+#define THROTTLER_STATUS_BIT_THM_CORE       4
+#define THROTTLER_STATUS_BIT_THM_GFX        5
+#define THROTTLER_STATUS_BIT_THM_SOC        6
+#define THROTTLER_STATUS_BIT_TDC_VDD        7
+#define THROTTLER_STATUS_BIT_TDC_SOC        8
+#define THROTTLER_STATUS_BIT_PROCHOT_CPU    9
+#define THROTTLER_STATUS_BIT_PROCHOT_GFX   10
+#define THROTTLER_STATUS_BIT_EDC_CPU       11
+#define THROTTLER_STATUS_BIT_EDC_GFX       12
 
 typedef struct {
   uint16_t ClockFrequency[CLOCK_COUNT]; //[MHz]
@@ -180,7 +184,7 @@ typedef struct {
   uint16_t Power[2];                    //[mW] indices: VDDCR_VDD, VDDCR_SOC
 
   uint16_t FanPwm;                      //[milli]
-  uint16_t CurrentSocketPower;          //[mW]
+  uint16_t CurrentSocketPower;          //[W]
 
   uint16_t CoreFrequency[8];            //[MHz]
   uint16_t CorePower[8];                //[mW]
@@ -193,10 +197,16 @@ typedef struct {
   uint16_t ThrottlerStatus;
   uint16_t spare;
 
-  uint16_t StapmOriginalLimit;          //[mW]
-  uint16_t StapmCurrentLimit;           //[mW]
-  uint16_t ApuPower;              //[mW]
-  uint16_t dGpuPower;               //[mW]
+  uint16_t StapmOriginalLimit;          //[W]
+  uint16_t StapmCurrentLimit;           //[W]
+  uint16_t ApuPower;                    //[W]
+  uint16_t dGpuPower;                   //[W]
+
+  uint16_t VddTdcValue;                 //[mA]
+  uint16_t SocTdcValue;                 //[mA]
+  uint16_t VddEdcValue;                 //[mA]
+  uint16_t SocEdcValue;                 //[mA]
+  uint16_t reserve[2];
 } SmuMetrics_t;
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h
index a5b4df146713..ee7dac4693d4 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h
@@ -170,6 +170,7 @@
 	__SMU_DUMMY_MAP(SetSoftMinJpeg),              \
 	__SMU_DUMMY_MAP(SetHardMinFclkByFreq),        \
 	__SMU_DUMMY_MAP(DFCstateControl), \
+	__SMU_DUMMY_MAP(GmiPwrDnControl), \
 	__SMU_DUMMY_MAP(DAL_DISABLE_DUMMY_PSTATE_CHANGE), \
 	__SMU_DUMMY_MAP(DAL_ENABLE_DUMMY_PSTATE_CHANGE), \
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
index 674e426ed59b..6b3b451a8018 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
@@ -27,8 +27,8 @@
 
 #define SMU11_DRIVER_IF_VERSION_INV 0xFFFFFFFF
 #define SMU11_DRIVER_IF_VERSION_VG20 0x13
-#define SMU11_DRIVER_IF_VERSION_ARCT 0x12
-#define SMU11_DRIVER_IF_VERSION_NV10 0x35
+#define SMU11_DRIVER_IF_VERSION_ARCT 0x14
+#define SMU11_DRIVER_IF_VERSION_NV10 0x36
 #define SMU11_DRIVER_IF_VERSION_NV12 0x33
 #define SMU11_DRIVER_IF_VERSION_NV14 0x36
 
@@ -37,7 +37,6 @@
 #define MP0_SRAM			0x03900000
 #define MP1_Public			0x03b00000
 #define MP1_SRAM			0x03c00004
-#define MP1_SMC_SIZE		0x40000
 
 /* address block */
 #define smnMP1_FIRMWARE_FLAGS		0x3010024
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index c5288831aa15..ad100b533d04 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -81,16 +81,15 @@ enum SMU10_TABLE_ID {
 	SMU10_CLOCKTABLE,
 };
 
-extern uint32_t smum_get_argument(struct pp_hwmgr *hwmgr);
-
 extern int smum_download_powerplay_table(struct pp_hwmgr *hwmgr, void **table);
 
 extern int smum_upload_powerplay_table(struct pp_hwmgr *hwmgr);
 
-extern int smum_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg);
+extern int smum_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t *resp);
 
 extern int smum_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
-					uint16_t msg, uint32_t parameter);
+					uint16_t msg, uint32_t parameter,
+					uint32_t *resp);
 
 extern int smum_update_sclk_threshold(struct pp_hwmgr *hwmgr);
 
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 15030284b444..0c9be864d072 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -423,6 +423,7 @@ static int navi10_append_powerplay_table(struct smu_context *smu)
 	struct smu_table_context *table_context = &smu->smu_table;
 	PPTable_t *smc_pptable = table_context->driver_pptable;
 	struct atom_smc_dpm_info_v4_5 *smc_dpm_table;
+	struct atom_smc_dpm_info_v4_7 *smc_dpm_table_v4_7;
 	int index, ret;
 
 	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
@@ -433,77 +434,33 @@ static int navi10_append_powerplay_table(struct smu_context *smu)
 	if (ret)
 		return ret;
 
-	memcpy(smc_pptable->I2cControllers, smc_dpm_table->I2cControllers,
-	       sizeof(I2cControllerConfig_t) * NUM_I2C_CONTROLLERS);
-
-	/* SVI2 Board Parameters */
-	smc_pptable->MaxVoltageStepGfx = smc_dpm_table->MaxVoltageStepGfx;
-	smc_pptable->MaxVoltageStepSoc = smc_dpm_table->MaxVoltageStepSoc;
-	smc_pptable->VddGfxVrMapping = smc_dpm_table->VddGfxVrMapping;
-	smc_pptable->VddSocVrMapping = smc_dpm_table->VddSocVrMapping;
-	smc_pptable->VddMem0VrMapping = smc_dpm_table->VddMem0VrMapping;
-	smc_pptable->VddMem1VrMapping = smc_dpm_table->VddMem1VrMapping;
-	smc_pptable->GfxUlvPhaseSheddingMask = smc_dpm_table->GfxUlvPhaseSheddingMask;
-	smc_pptable->SocUlvPhaseSheddingMask = smc_dpm_table->SocUlvPhaseSheddingMask;
-	smc_pptable->ExternalSensorPresent = smc_dpm_table->ExternalSensorPresent;
-	smc_pptable->Padding8_V = smc_dpm_table->Padding8_V;
-
-	/* Telemetry Settings */
-	smc_pptable->GfxMaxCurrent = smc_dpm_table->GfxMaxCurrent;
-	smc_pptable->GfxOffset = smc_dpm_table->GfxOffset;
-	smc_pptable->Padding_TelemetryGfx = smc_dpm_table->Padding_TelemetryGfx;
-	smc_pptable->SocMaxCurrent = smc_dpm_table->SocMaxCurrent;
-	smc_pptable->SocOffset = smc_dpm_table->SocOffset;
-	smc_pptable->Padding_TelemetrySoc = smc_dpm_table->Padding_TelemetrySoc;
-	smc_pptable->Mem0MaxCurrent = smc_dpm_table->Mem0MaxCurrent;
-	smc_pptable->Mem0Offset = smc_dpm_table->Mem0Offset;
-	smc_pptable->Padding_TelemetryMem0 = smc_dpm_table->Padding_TelemetryMem0;
-	smc_pptable->Mem1MaxCurrent = smc_dpm_table->Mem1MaxCurrent;
-	smc_pptable->Mem1Offset = smc_dpm_table->Mem1Offset;
-	smc_pptable->Padding_TelemetryMem1 = smc_dpm_table->Padding_TelemetryMem1;
-
-	/* GPIO Settings */
-	smc_pptable->AcDcGpio = smc_dpm_table->AcDcGpio;
-	smc_pptable->AcDcPolarity = smc_dpm_table->AcDcPolarity;
-	smc_pptable->VR0HotGpio = smc_dpm_table->VR0HotGpio;
-	smc_pptable->VR0HotPolarity = smc_dpm_table->VR0HotPolarity;
-	smc_pptable->VR1HotGpio = smc_dpm_table->VR1HotGpio;
-	smc_pptable->VR1HotPolarity = smc_dpm_table->VR1HotPolarity;
-	smc_pptable->GthrGpio = smc_dpm_table->GthrGpio;
-	smc_pptable->GthrPolarity = smc_dpm_table->GthrPolarity;
-
-	/* LED Display Settings */
-	smc_pptable->LedPin0 = smc_dpm_table->LedPin0;
-	smc_pptable->LedPin1 = smc_dpm_table->LedPin1;
-	smc_pptable->LedPin2 = smc_dpm_table->LedPin2;
-	smc_pptable->padding8_4 = smc_dpm_table->padding8_4;
-
-	/* GFXCLK PLL Spread Spectrum */
-	smc_pptable->PllGfxclkSpreadEnabled = smc_dpm_table->PllGfxclkSpreadEnabled;
-	smc_pptable->PllGfxclkSpreadPercent = smc_dpm_table->PllGfxclkSpreadPercent;
-	smc_pptable->PllGfxclkSpreadFreq = smc_dpm_table->PllGfxclkSpreadFreq;
-
-	/* GFXCLK DFLL Spread Spectrum */
-	smc_pptable->DfllGfxclkSpreadEnabled = smc_dpm_table->DfllGfxclkSpreadEnabled;
-	smc_pptable->DfllGfxclkSpreadPercent = smc_dpm_table->DfllGfxclkSpreadPercent;
-	smc_pptable->DfllGfxclkSpreadFreq = smc_dpm_table->DfllGfxclkSpreadFreq;
-
-	/* UCLK Spread Spectrum */
-	smc_pptable->UclkSpreadEnabled = smc_dpm_table->UclkSpreadEnabled;
-	smc_pptable->UclkSpreadPercent = smc_dpm_table->UclkSpreadPercent;
-	smc_pptable->UclkSpreadFreq = smc_dpm_table->UclkSpreadFreq;
-
-	/* SOCCLK Spread Spectrum */
-	smc_pptable->SoclkSpreadEnabled = smc_dpm_table->SoclkSpreadEnabled;
-	smc_pptable->SocclkSpreadPercent = smc_dpm_table->SocclkSpreadPercent;
-	smc_pptable->SocclkSpreadFreq = smc_dpm_table->SocclkSpreadFreq;
-
-	/* Total board power */
-	smc_pptable->TotalBoardPower = smc_dpm_table->TotalBoardPower;
-	smc_pptable->BoardPadding = smc_dpm_table->BoardPadding;
-
-	/* Mvdd Svi2 Div Ratio Setting */
-	smc_pptable->MvddRatio = smc_dpm_table->MvddRatio;
+	pr_info("smc_dpm_info table revision(format.content): %d.%d\n",
+			smc_dpm_table->table_header.format_revision,
+			smc_dpm_table->table_header.content_revision);
+
+	if (smc_dpm_table->table_header.format_revision != 4) {
+		pr_err("smc_dpm_info table format revision is not 4!\n");
+		return -EINVAL;
+	}
+
+	switch (smc_dpm_table->table_header.content_revision) {
+	case 5: /* nv10 and nv14 */
+		memcpy(smc_pptable->I2cControllers, smc_dpm_table->I2cControllers,
+			sizeof(*smc_dpm_table) - sizeof(smc_dpm_table->table_header));
+		break;
+	case 7: /* nv12 */
+		ret = smu_get_atom_data_table(smu, index, NULL, NULL, NULL,
+					      (uint8_t **)&smc_dpm_table_v4_7);
+		if (ret)
+			return ret;
+		memcpy(smc_pptable->I2cControllers, smc_dpm_table_v4_7->I2cControllers,
+			sizeof(*smc_dpm_table_v4_7) - sizeof(smc_dpm_table_v4_7->table_header));
+		break;
+	default:
+		pr_err("smc_dpm_info with unsupported content revision %d!\n",
+				smc_dpm_table->table_header.content_revision);
+		return -EINVAL;
+	}
 
 	if (adev->pm.pp_feature & PP_GFXOFF_MASK) {
 		/* TODO: remove it once SMU fw fix it */
@@ -1336,8 +1293,6 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u
 	}
 
 	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
-		if (size < 0)
-			return -EINVAL;
 
 		ret = smu_update_table(smu,
 				       SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
@@ -1860,7 +1815,8 @@ static int navi10_get_power_limit(struct smu_context *smu,
 	int power_src;
 
 	if (!smu->power_limit) {
-		if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) {
+		if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT) &&
+			!amdgpu_sriov_vf(smu->adev)) {
 			power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC);
 			if (power_src < 0)
 				return -EINVAL;
@@ -2003,6 +1959,9 @@ static int navi10_set_default_od_settings(struct smu_context *smu, bool initiali
 	OverDriveTable_t *od_table, *boot_od_table;
 	int ret = 0;
 
+	if (amdgpu_sriov_vf(smu->adev))
+		return 0;
+
 	ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t));
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
index b0ed1b3fe79a..67476047c067 100644
--- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
@@ -296,6 +296,8 @@ static int renoir_print_clk_levels(struct smu_context *smu,
 
 	for (i = 0; i < count; i++) {
 		GET_DPM_CUR_FREQ(clk_table, clk_type, i, value);
+		if (!value)
+			continue;
 		size += sprintf(buf + size, "%d: %uMhz %s\n", i, value,
 				cur_value == value ? "*" : "");
 		if (cur_value == value)
@@ -847,7 +849,7 @@ static int renoir_get_power_profile_mode(struct smu_context *smu,
 	uint32_t i, size = 0;
 	int16_t workload_type = 0;
 
-	if (!smu->pm_enabled || !buf)
+	if (!buf)
 		return -EINVAL;
 
 	for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) {
@@ -898,7 +900,7 @@ static bool renoir_is_dpm_running(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 
 	/*
-	 * Util now, the pmfw hasn't exported the interface of SMU
+	 * Until now, the pmfw hasn't exported the interface of SMU
 	 * feature mask to APU SKU so just force on all the feature
 	 * at early initial stage.
 	 */
@@ -955,6 +957,6 @@ static const struct pptable_funcs renoir_ppt_funcs = {
 void renoir_set_ppt_funcs(struct smu_context *smu)
 {
 	smu->ppt_funcs = &renoir_ppt_funcs;
-	smu->smc_if_version = SMU12_DRIVER_IF_VERSION;
+	smu->smc_driver_if_version = SMU12_DRIVER_IF_VERSION;
 	smu->is_apu = true;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h
index 40c35bcc5a0a..c97444841abc 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_internal.h
+++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h
@@ -214,4 +214,9 @@ static inline int smu_send_smc_msg(struct smu_context *smu, enum smu_message_typ
 #define smu_set_power_source(smu, power_src) \
 	((smu)->ppt_funcs->set_power_source ? (smu)->ppt_funcs->set_power_source((smu), (power_src)) : 0)
 
+#define smu_i2c_eeprom_init(smu, control) \
+		((smu)->ppt_funcs->i2c_eeprom_init ? (smu)->ppt_funcs->i2c_eeprom_init((control)) : 0)
+#define smu_i2c_eeprom_fini(smu, control) \
+		((smu)->ppt_funcs->i2c_eeprom_fini ? (smu)->ppt_funcs->i2c_eeprom_fini((control)) : 0)
+
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 655ba4fb05dc..ae0361e225bb 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -23,6 +23,7 @@
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/reboot.h>
 
 #define SMU_11_0_PARTIAL_PPTABLE
 
@@ -57,7 +58,7 @@ static int smu_v11_0_send_msg_without_waiting(struct smu_context *smu,
 					      uint16_t msg)
 {
 	struct amdgpu_device *adev = smu->adev;
-	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
+	WREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
 	return 0;
 }
 
@@ -65,7 +66,7 @@ static int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg)
 {
 	struct amdgpu_device *adev = smu->adev;
 
-	*arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
+	*arg = RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_82);
 	return 0;
 }
 
@@ -75,7 +76,7 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu)
 	uint32_t cur_value, i, timeout = adev->usec_timeout * 10;
 
 	for (i = 0; i < timeout; i++) {
-		cur_value = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
+		cur_value = RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_90);
 		if ((cur_value & MP1_C2PMSG_90__CONTENT_MASK) != 0)
 			return cur_value == 0x1 ? 0 : -EIO;
 
@@ -83,7 +84,10 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu)
 	}
 
 	/* timeout means wrong logic */
-	return -ETIME;
+	if (i == timeout)
+		return -ETIME;
+
+	return RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO;
 }
 
 int
@@ -107,9 +111,9 @@ smu_v11_0_send_msg_with_param(struct smu_context *smu,
 		goto out;
 	}
 
-	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
+	WREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
-	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, param);
+	WREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_82, param);
 
 	smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index);
 
@@ -119,6 +123,7 @@ smu_v11_0_send_msg_with_param(struct smu_context *smu,
 		       smu_get_message_name(smu, msg), index, param, ret);
 		goto out;
 	}
+
 	if (read_arg) {
 		ret = smu_v11_0_read_arg(smu, read_arg);
 		if (ret) {
@@ -201,13 +206,15 @@ int smu_v11_0_load_microcode(struct smu_context *smu)
 	const struct smc_firmware_header_v1_0 *hdr;
 	uint32_t addr_start = MP1_SRAM;
 	uint32_t i;
+	uint32_t smc_fw_size;
 	uint32_t mp1_fw_flags;
 
 	hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data;
 	src = (const uint32_t *)(adev->pm.fw->data +
 		le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+	smc_fw_size = hdr->header.ucode_size_bytes;
 
-	for (i = 1; i < MP1_SMC_SIZE/4 - 1; i++) {
+	for (i = 1; i < smc_fw_size/4 - 1; i++) {
 		WREG32_PCIE(addr_start, src[i]);
 		addr_start += 4;
 	}
@@ -264,23 +271,23 @@ int smu_v11_0_check_fw_version(struct smu_context *smu)
 
 	switch (smu->adev->asic_type) {
 	case CHIP_VEGA20:
-		smu->smc_if_version = SMU11_DRIVER_IF_VERSION_VG20;
+		smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_VG20;
 		break;
 	case CHIP_ARCTURUS:
-		smu->smc_if_version = SMU11_DRIVER_IF_VERSION_ARCT;
+		smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_ARCT;
 		break;
 	case CHIP_NAVI10:
-		smu->smc_if_version = SMU11_DRIVER_IF_VERSION_NV10;
+		smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_NV10;
 		break;
 	case CHIP_NAVI12:
-		smu->smc_if_version = SMU11_DRIVER_IF_VERSION_NV12;
+		smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_NV12;
 		break;
 	case CHIP_NAVI14:
-		smu->smc_if_version = SMU11_DRIVER_IF_VERSION_NV14;
+		smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_NV14;
 		break;
 	default:
 		pr_err("smu unsupported asic type:%d.\n", smu->adev->asic_type);
-		smu->smc_if_version = SMU11_DRIVER_IF_VERSION_INV;
+		smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_INV;
 		break;
 	}
 
@@ -292,10 +299,10 @@ int smu_v11_0_check_fw_version(struct smu_context *smu)
 	 * Considering above, we just leave user a warning message instead
 	 * of halt driver loading.
 	 */
-	if (if_version != smu->smc_if_version) {
+	if (if_version != smu->smc_driver_if_version) {
 		pr_info("smu driver if version = 0x%08x, smu fw if version = 0x%08x, "
 			"smu fw version = 0x%08x (%d.%d.%d)\n",
-			smu->smc_if_version, if_version,
+			smu->smc_driver_if_version, if_version,
 			smu_version, smu_major, smu_minor, smu_debug);
 		pr_warn("SMU driver if version not matched\n");
 	}
@@ -479,8 +486,6 @@ int smu_v11_0_init_power(struct smu_context *smu)
 {
 	struct smu_power_context *smu_power = &smu->smu_power;
 
-	if (!smu->pm_enabled)
-		return 0;
 	if (smu_power->power_context || smu_power->power_context_size != 0)
 		return -EINVAL;
 
@@ -497,8 +502,6 @@ int smu_v11_0_fini_power(struct smu_context *smu)
 {
 	struct smu_power_context *smu_power = &smu->smu_power;
 
-	if (!smu->pm_enabled)
-		return 0;
 	if (!smu_power->power_context || smu_power->power_context_size == 0)
 		return -EINVAL;
 
@@ -730,8 +733,9 @@ int smu_v11_0_parse_pptable(struct smu_context *smu)
 	struct smu_table_context *table_context = &smu->smu_table;
 	struct smu_table *table = &table_context->tables[SMU_TABLE_PPTABLE];
 
+	/* during TDR we need to free and alloc the pptable */
 	if (table_context->driver_pptable)
-		return -EINVAL;
+		kfree(table_context->driver_pptable);
 
 	table_context->driver_pptable = kzalloc(table->size, GFP_KERNEL);
 
@@ -771,6 +775,9 @@ int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk)
 {
 	int ret;
 
+	if (amdgpu_sriov_vf(smu->adev))
+		return 0;
+
 	ret = smu_send_smc_msg_with_param(smu,
 					  SMU_MSG_SetMinDeepSleepDcefclk, clk, NULL);
 	if (ret)
@@ -783,8 +790,6 @@ int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu)
 {
 	struct smu_table_context *table_context = &smu->smu_table;
 
-	if (!smu->pm_enabled)
-		return 0;
 	if (!table_context)
 		return -EINVAL;
 
@@ -816,6 +821,9 @@ int smu_v11_0_set_tool_table_location(struct smu_context *smu)
 	int ret = 0;
 	struct smu_table *tool_table = &smu->smu_table.tables[SMU_TABLE_PMSTATUSLOG];
 
+	if (amdgpu_sriov_vf(smu->adev))
+		return 0;
+
 	if (tool_table->mc_address) {
 		ret = smu_send_smc_msg_with_param(smu,
 				SMU_MSG_SetToolsDramAddrHigh,
@@ -835,6 +843,9 @@ int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count)
 {
 	int ret = 0;
 
+	if (amdgpu_sriov_vf(smu->adev))
+		return 0;
+
 	if (!smu->pm_enabled)
 		return ret;
 
@@ -849,6 +860,9 @@ int smu_v11_0_set_allowed_mask(struct smu_context *smu)
 	int ret = 0;
 	uint32_t feature_mask[2];
 
+	if (amdgpu_sriov_vf(smu->adev))
+		return 0;
+
 	mutex_lock(&feature->mutex);
 	if (bitmap_empty(feature->allowed, SMU_FEATURE_MAX) || feature->feature_num < 64)
 		goto failed;
@@ -877,6 +891,9 @@ int smu_v11_0_get_enabled_mask(struct smu_context *smu,
 	struct smu_feature *feature = &smu->smu_feature;
 	int ret = 0;
 
+	if (amdgpu_sriov_vf(smu->adev) && !amdgpu_sriov_is_pp_one_vf(smu->adev))
+		return 0;
+
 	if (!feature_mask || num < 2)
 		return -EINVAL;
 
@@ -932,8 +949,12 @@ int smu_v11_0_notify_display_change(struct smu_context *smu)
 {
 	int ret = 0;
 
+	if (amdgpu_sriov_vf(smu->adev))
+		return 0;
+
 	if (!smu->pm_enabled)
 		return ret;
+
 	if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) &&
 	    smu->adev->gmc.vram_type == AMDGPU_VRAM_TYPE_HBM)
 		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1, NULL);
@@ -948,9 +969,6 @@ smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock,
 	int ret = 0;
 	int clk_id;
 
-	if (!smu->pm_enabled)
-		return ret;
-
 	if ((smu_msg_get_index(smu, SMU_MSG_GetDcModeMaxDpmFreq) < 0) ||
 	    (smu_msg_get_index(smu, SMU_MSG_GetMaxDpmFreq) < 0))
 		return 0;
@@ -1096,6 +1114,9 @@ int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)
 	int ret = 0;
 	uint32_t max_power_limit;
 
+	if (amdgpu_sriov_vf(smu->adev))
+		return 0;
+
 	max_power_limit = smu_v11_0_get_max_power_limit(smu);
 
 	if (n > max_power_limit) {
@@ -1205,9 +1226,6 @@ int smu_v11_0_start_thermal_control(struct smu_context *smu)
 	struct smu_temperature_range range;
 	struct amdgpu_device *adev = smu->adev;
 
-	if (!smu->pm_enabled)
-		return ret;
-
 	memcpy(&range, &smu11_thermal_policy[0], sizeof(struct smu_temperature_range));
 
 	ret = smu_get_thermal_temperature_range(smu, &range);
@@ -1321,9 +1339,6 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu,
 	enum smu_clk_type clk_select = 0;
 	uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000;
 
-	if (!smu->pm_enabled)
-		return -EINVAL;
-
 	if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) ||
 		smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) {
 		switch (clk_type) {
@@ -1533,39 +1548,59 @@ static int smu_v11_0_ack_ac_dc_interrupt(struct smu_context *smu)
 #define THM_11_0__SRCID__THM_DIG_THERM_L2H		0		/* ASIC_TEMP > CG_THERMAL_INT.DIG_THERM_INTH  */
 #define THM_11_0__SRCID__THM_DIG_THERM_H2L		1		/* ASIC_TEMP < CG_THERMAL_INT.DIG_THERM_INTL  */
 
+#define SMUIO_11_0__SRCID__SMUIO_GPIO19			83
+
 static int smu_v11_0_irq_process(struct amdgpu_device *adev,
 				 struct amdgpu_irq_src *source,
 				 struct amdgpu_iv_entry *entry)
 {
 	uint32_t client_id = entry->client_id;
 	uint32_t src_id = entry->src_id;
+	/*
+	 * ctxid is used to distinguish different
+	 * events for SMCToHost interrupt.
+	 */
+	uint32_t ctxid = entry->src_data[0];
 
 	if (client_id == SOC15_IH_CLIENTID_THM) {
 		switch (src_id) {
 		case THM_11_0__SRCID__THM_DIG_THERM_L2H:
-			pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n",
-				PCI_BUS_NUM(adev->pdev->devfn),
-				PCI_SLOT(adev->pdev->devfn),
-				PCI_FUNC(adev->pdev->devfn));
+			dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n");
+			/*
+			 * SW CTF just occurred.
+			 * Try to do a graceful shutdown to prevent further damage.
+			 */
+			dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n");
+			orderly_poweroff(true);
 		break;
 		case THM_11_0__SRCID__THM_DIG_THERM_H2L:
-			pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n",
-				PCI_BUS_NUM(adev->pdev->devfn),
-				PCI_SLOT(adev->pdev->devfn),
-				PCI_FUNC(adev->pdev->devfn));
+			dev_emerg(adev->dev, "ERROR: GPU under temperature range detected\n");
 		break;
 		default:
-			pr_warn("GPU under temperature range unknown src id (%d), detected on PCIe %d:%d.%d!\n",
-				src_id,
-				PCI_BUS_NUM(adev->pdev->devfn),
-				PCI_SLOT(adev->pdev->devfn),
-				PCI_FUNC(adev->pdev->devfn));
+			dev_emerg(adev->dev, "ERROR: GPU under temperature range unknown src id (%d)\n",
+				src_id);
 		break;
-
 		}
+	} else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) {
+		dev_emerg(adev->dev, "ERROR: GPU HW Critical Temperature Fault(aka CTF) detected!\n");
+		/*
+		 * HW CTF just occurred. Shutdown to prevent further damage.
+		 */
+		dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU HW CTF!\n");
+		orderly_poweroff(true);
 	} else if (client_id == SOC15_IH_CLIENTID_MP1) {
-		if (src_id == 0xfe)
-			smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
+		if (src_id == 0xfe) {
+			switch (ctxid) {
+			case 0x3:
+				dev_dbg(adev->dev, "Switched to AC mode!\n");
+				smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
+				break;
+			case 0x4:
+				dev_dbg(adev->dev, "Switched to DC mode!\n");
+				smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
+				break;
+			}
+		}
 	}
 
 	return 0;
@@ -1605,6 +1640,13 @@ int smu_v11_0_register_irq_handler(struct smu_context *smu)
 	if (ret)
 		return ret;
 
+	/* Register CTF(GPIO_19) interrupt */
+	ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_ROM_SMUIO,
+				SMUIO_11_0__SRCID__SMUIO_GPIO19,
+				irq_src);
+	if (ret)
+		return ret;
+
 	ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_MP1,
 				0xfe,
 				irq_src);
@@ -1833,6 +1875,9 @@ int smu_v11_0_override_pcie_parameters(struct smu_context *smu)
 	uint32_t pcie_gen = 0, pcie_width = 0;
 	int ret;
 
+	if (amdgpu_sriov_vf(smu->adev))
+		return 0;
+
 	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4)
 		pcie_gen = 3;
 	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
index 169ebdad87b8..4023d10fb49b 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
@@ -32,13 +32,15 @@
 
 #include "asic_reg/mp/mp_12_0_0_offset.h"
 #include "asic_reg/mp/mp_12_0_0_sh_mask.h"
+#include "asic_reg/smuio/smuio_12_0_0_offset.h"
+#include "asic_reg/smuio/smuio_12_0_0_sh_mask.h"
 
-#define smnMP1_FIRMWARE_FLAGS                                0x3010024
+// because some SMU12 based ASICs use older ip offset tables
+// we should undefine this register from the smuio12 header
+// to prevent confusion down the road
+#undef mmPWR_MISC_CNTL_STATUS
 
-#define mmSMUIO_GFX_MISC_CNTL                                0x00c8
-#define mmSMUIO_GFX_MISC_CNTL_BASE_IDX                       0
-#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK          0x00000006L
-#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT        0x1
+#define smnMP1_FIRMWARE_FLAGS                                0x3010024
 
 int smu_v12_0_send_msg_without_waiting(struct smu_context *smu,
 					      uint16_t msg)
@@ -158,10 +160,10 @@ int smu_v12_0_check_fw_version(struct smu_context *smu)
 	 * Considering above, we just leave user a warning message instead
 	 * of halt driver loading.
 	 */
-	if (if_version != smu->smc_if_version) {
+	if (if_version != smu->smc_driver_if_version) {
 		pr_info("smu driver if version = 0x%08x, smu fw if version = 0x%08x, "
 			"smu fw version = 0x%08x (%d.%d.%d)\n",
-			smu->smc_if_version, if_version,
+			smu->smc_driver_if_version, if_version,
 			smu_version, smu_major, smu_minor, smu_debug);
 		pr_warn("SMU driver if version not matched\n");
 	}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index 868e2d5f6e62..85e5b1ed22c2 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -2780,7 +2780,7 @@ static int ci_update_dpm_settings(struct pp_hwmgr *hwmgr,
 
 	if (setting->bupdate_sclk) {
 		if (!data->sclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel, NULL);
 		for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) {
 			if (levels[i].ActivityLevel !=
 				cpu_to_be16(setting->sclk_activity)) {
@@ -2810,12 +2810,12 @@ static int ci_update_dpm_settings(struct pp_hwmgr *hwmgr,
 			}
 		}
 		if (!data->sclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel, NULL);
 	}
 
 	if (setting->bupdate_mclk) {
 		if (!data->mclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel, NULL);
 		for (i = 0; i < smu_data->smc_state_table.MemoryDpmLevelCount; i++) {
 			if (mclk_levels[i].ActivityLevel !=
 				cpu_to_be16(setting->mclk_activity)) {
@@ -2845,7 +2845,7 @@ static int ci_update_dpm_settings(struct pp_hwmgr *hwmgr,
 			}
 		}
 		if (!data->mclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_UnfreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_UnfreezeLevel, NULL);
 	}
 	return 0;
 }
@@ -2881,8 +2881,9 @@ static int ci_update_uvd_smc_table(struct pp_hwmgr *hwmgr)
 		if (hwmgr->dpm_level & profile_mode_mask || !PP_CAP(PHM_PlatformCaps_UVDDPM))
 			break;
 	}
-	ci_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_UVDDPM_SetEnabledMask,
-				data->dpm_level_enable_mask.uvd_dpm_enable_mask);
+	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_UVDDPM_SetEnabledMask,
+				data->dpm_level_enable_mask.uvd_dpm_enable_mask,
+				NULL);
 
 	return 0;
 }
@@ -2912,8 +2913,9 @@ static int ci_update_vce_smc_table(struct pp_hwmgr *hwmgr)
 		if (hwmgr->dpm_level & profile_mode_mask || !PP_CAP(PHM_PlatformCaps_VCEDPM))
 			break;
 	}
-	ci_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_VCEDPM_SetEnabledMask,
-				data->dpm_level_enable_mask.vce_dpm_enable_mask);
+	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_VCEDPM_SetEnabledMask,
+				data->dpm_level_enable_mask.vce_dpm_enable_mask,
+				NULL);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index 32ebb383c456..ecb9ee46d6b3 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -137,9 +137,7 @@ static int fiji_start_smu_in_protection_mode(struct pp_hwmgr *hwmgr)
 	PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, RCU_UC_EVENTS,
 			INTERRUPTS_ENABLED, 1);
 
-	cgs_write_register(hwmgr->device, mmSMC_MSG_ARG_0, 0x20000);
-	cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, PPSMC_MSG_Test);
-	PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
+	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_Test, 0x20000, NULL);
 
 	/* Wait for done bit to be set */
 	PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND,
@@ -203,8 +201,9 @@ static int fiji_start_avfs_btc(struct pp_hwmgr *hwmgr)
 	struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend);
 
 	if (0 != smu_data->avfs_btc_param) {
-		if (0 != smu7_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_PerformBtc, smu_data->avfs_btc_param)) {
+		if (0 != smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_PerformBtc, smu_data->avfs_btc_param,
+				NULL)) {
 			pr_info("[AVFS][Fiji_PerformBtc] PerformBTC SMU msg failed");
 			result = -EINVAL;
 		}
@@ -1913,7 +1912,8 @@ static int fiji_setup_dpm_led_config(struct pp_hwmgr *hwmgr)
 	if (mask)
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						    PPSMC_MSG_LedConfig,
-						    mask);
+						    mask,
+						    NULL);
 	return 0;
 }
 
@@ -2220,14 +2220,16 @@ static int fiji_thermal_setup_fan_table(struct pp_hwmgr *hwmgr)
 		res = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetFanMinPwm,
 				hwmgr->thermal_controller.
-				advanceFanControlParameters.ucMinimumPWMLimit);
+				advanceFanControlParameters.ucMinimumPWMLimit,
+				NULL);
 
 	if (!res && hwmgr->thermal_controller.
 			advanceFanControlParameters.ulMinFanSCLKAcousticLimit)
 		res = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetFanSclkTarget,
 				hwmgr->thermal_controller.
-				advanceFanControlParameters.ulMinFanSCLKAcousticLimit);
+				advanceFanControlParameters.ulMinFanSCLKAcousticLimit,
+				NULL);
 
 	if (res)
 		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
@@ -2242,7 +2244,7 @@ static int fiji_thermal_avfs_enable(struct pp_hwmgr *hwmgr)
 	if (!hwmgr->avfs_supported)
 		return 0;
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs, NULL);
 
 	return 0;
 }
@@ -2390,7 +2392,8 @@ static int fiji_update_uvd_smc_table(struct pp_hwmgr *hwmgr)
 			PHM_PlatformCaps_StablePState))
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_UVDDPM_SetEnabledMask,
-				(uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel));
+				(uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel),
+				NULL);
 	return 0;
 }
 
@@ -2422,7 +2425,8 @@ static int fiji_update_vce_smc_table(struct pp_hwmgr *hwmgr)
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_VCEDPM_SetEnabledMask,
-				(uint32_t)1 << smu_data->smc_state_table.VceBootLevel);
+				(uint32_t)1 << smu_data->smc_state_table.VceBootLevel,
+				NULL);
 	return 0;
 }
 
@@ -2569,7 +2573,7 @@ static int fiji_update_dpm_settings(struct pp_hwmgr *hwmgr,
 
 	if (setting->bupdate_sclk) {
 		if (!data->sclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel, NULL);
 		for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) {
 			if (levels[i].ActivityLevel !=
 				cpu_to_be16(setting->sclk_activity)) {
@@ -2599,12 +2603,12 @@ static int fiji_update_dpm_settings(struct pp_hwmgr *hwmgr,
 			}
 		}
 		if (!data->sclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel, NULL);
 	}
 
 	if (setting->bupdate_mclk) {
 		if (!data->mclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel, NULL);
 		for (i = 0; i < smu_data->smc_state_table.MemoryDpmLevelCount; i++) {
 			if (mclk_levels[i].ActivityLevel !=
 				cpu_to_be16(setting->mclk_activity)) {
@@ -2634,7 +2638,7 @@ static int fiji_update_dpm_settings(struct pp_hwmgr *hwmgr,
 			}
 		}
 		if (!data->mclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_UnfreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_UnfreezeLevel, NULL);
 	}
 	return 0;
 }
@@ -2649,6 +2653,7 @@ const struct pp_smumgr_func fiji_smu_funcs = {
 	.request_smu_load_specific_fw = NULL,
 	.send_msg_to_smc = &smu7_send_msg_to_smc,
 	.send_msg_to_smc_with_parameter = &smu7_send_msg_to_smc_with_parameter,
+	.get_argument = smu7_get_argument,
 	.download_pptable_settings = NULL,
 	.upload_pptable_settings = NULL,
 	.update_smc_table = fiji_update_smc_table,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
index 732005c03a82..431ad2fd38df 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
@@ -2669,6 +2669,7 @@ const struct pp_smumgr_func iceland_smu_funcs = {
 	.request_smu_load_specific_fw = &iceland_request_smu_load_specific_fw,
 	.send_msg_to_smc = &smu7_send_msg_to_smc,
 	.send_msg_to_smc_with_parameter = &smu7_send_msg_to_smc_with_parameter,
+	.get_argument = smu7_get_argument,
 	.download_pptable_settings = NULL,
 	.upload_pptable_settings = NULL,
 	.get_offsetof = iceland_get_offsetof,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 23c12018dbc1..c3d2e6dcf62a 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -99,7 +99,8 @@ static int polaris10_perform_btc(struct pp_hwmgr *hwmgr)
 	struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend);
 
 	if (0 != smu_data->avfs_btc_param) {
-		if (0 != smu7_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_PerformBtc, smu_data->avfs_btc_param)) {
+		if (0 != smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_PerformBtc, smu_data->avfs_btc_param,
+					NULL)) {
 			pr_info("[AVFS][SmuPolaris10_PerformBtc] PerformBTC SMU msg failed");
 			result = -1;
 		}
@@ -2049,15 +2050,16 @@ int polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr)
 		return 0;
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting);
+			PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting,
+			NULL);
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs, NULL);
 
 	/* Apply avfs cks-off voltages to avoid the overshoot
 	 * when switching to the highest sclk frequency
 	 */
 	if (data->apply_avfs_cks_off_voltage)
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ApplyAvfsCksOffVoltage);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ApplyAvfsCksOffVoltage, NULL);
 
 	return 0;
 }
@@ -2158,14 +2160,16 @@ static int polaris10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr)
 		res = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetFanMinPwm,
 				hwmgr->thermal_controller.
-				advanceFanControlParameters.ucMinimumPWMLimit);
+				advanceFanControlParameters.ucMinimumPWMLimit,
+				NULL);
 
 	if (!res && hwmgr->thermal_controller.
 			advanceFanControlParameters.ulMinFanSCLKAcousticLimit)
 		res = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetFanSclkTarget,
 				hwmgr->thermal_controller.
-				advanceFanControlParameters.ulMinFanSCLKAcousticLimit);
+				advanceFanControlParameters.ulMinFanSCLKAcousticLimit,
+				NULL);
 
 	if (res)
 		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
@@ -2202,7 +2206,8 @@ static int polaris10_update_uvd_smc_table(struct pp_hwmgr *hwmgr)
 			PHM_PlatformCaps_StablePState))
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_UVDDPM_SetEnabledMask,
-				(uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel));
+				(uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel),
+				NULL);
 	return 0;
 }
 
@@ -2234,7 +2239,8 @@ static int polaris10_update_vce_smc_table(struct pp_hwmgr *hwmgr)
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_VCEDPM_SetEnabledMask,
-				(uint32_t)1 << smu_data->smc_state_table.VceBootLevel);
+				(uint32_t)1 << smu_data->smc_state_table.VceBootLevel,
+				NULL);
 	return 0;
 }
 
@@ -2485,7 +2491,7 @@ static int polaris10_update_dpm_settings(struct pp_hwmgr *hwmgr,
 
 	if (setting->bupdate_sclk) {
 		if (!data->sclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel, NULL);
 		for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) {
 			if (levels[i].ActivityLevel !=
 				cpu_to_be16(setting->sclk_activity)) {
@@ -2515,12 +2521,12 @@ static int polaris10_update_dpm_settings(struct pp_hwmgr *hwmgr,
 			}
 		}
 		if (!data->sclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel, NULL);
 	}
 
 	if (setting->bupdate_mclk) {
 		if (!data->mclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel, NULL);
 		for (i = 0; i < smu_data->smc_state_table.MemoryDpmLevelCount; i++) {
 			if (mclk_levels[i].ActivityLevel !=
 				cpu_to_be16(setting->mclk_activity)) {
@@ -2550,7 +2556,7 @@ static int polaris10_update_dpm_settings(struct pp_hwmgr *hwmgr,
 			}
 		}
 		if (!data->mclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_UnfreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_UnfreezeLevel, NULL);
 	}
 	return 0;
 }
@@ -2565,6 +2571,7 @@ const struct pp_smumgr_func polaris10_smu_funcs = {
 	.request_smu_load_specific_fw = NULL,
 	.send_msg_to_smc = smu7_send_msg_to_smc,
 	.send_msg_to_smc_with_parameter = smu7_send_msg_to_smc_with_parameter,
+	.get_argument = smu7_get_argument,
 	.download_pptable_settings = NULL,
 	.upload_pptable_settings = NULL,
 	.update_smc_table = polaris10_update_smc_table,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
index 2319400a3fcb..ea2279bb8cbf 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
@@ -126,15 +126,18 @@ static int smu10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
 			"Invalid SMU Table version!", return -EINVAL;);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].size != 0,
 			"Invalid SMU Table Length!", return -EINVAL;);
-	smu10_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr));
-	smu10_send_msg_to_smc_with_parameter(hwmgr,
+			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr));
-	smu10_send_msg_to_smc_with_parameter(hwmgr,
+			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_TransferTableSmu2Dram,
-			priv->smu_tables.entry[table_id].table_id);
+			priv->smu_tables.entry[table_id].table_id,
+			NULL);
 
 	/* flush hdp cache */
 	amdgpu_asic_flush_hdp(adev, NULL);
@@ -164,15 +167,18 @@ static int smu10_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 
 	amdgpu_asic_flush_hdp(adev, NULL);
 
-	smu10_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr));
-	smu10_send_msg_to_smc_with_parameter(hwmgr,
+			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr));
-	smu10_send_msg_to_smc_with_parameter(hwmgr,
+			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_TransferTableDram2Smu,
-			priv->smu_tables.entry[table_id].table_id);
+			priv->smu_tables.entry[table_id].table_id,
+			NULL);
 
 	return 0;
 }
@@ -181,9 +187,9 @@ static int smu10_verify_smc_interface(struct pp_hwmgr *hwmgr)
 {
 	uint32_t smc_driver_if_version;
 
-	smu10_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_GetDriverIfVersion);
-	smc_driver_if_version = smu10_read_arg_from_smc(hwmgr);
+	smum_send_msg_to_smc(hwmgr,
+			PPSMC_MSG_GetDriverIfVersion,
+			&smc_driver_if_version);
 
 	if ((smc_driver_if_version != SMU10_DRIVER_IF_VERSION) &&
 	    (smc_driver_if_version != SMU10_DRIVER_IF_VERSION + 1)) {
@@ -217,11 +223,11 @@ static int smu10_start_smu(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = hwmgr->adev;
 
-	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion);
-	hwmgr->smu_version = smu10_read_arg_from_smc(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion, &hwmgr->smu_version);
 	adev->pm.fw_version = hwmgr->smu_version >> 8;
 
-	if (adev->rev_id < 0x8 && adev->pdev->device != 0x15d8 &&
+	if (!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
+	    (adev->apu_flags & AMD_APU_IS_RAVEN) &&
 	    adev->pm.fw_version < 0x1e45)
 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
index 3f51d545e8ff..aae25243eb10 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
@@ -191,13 +191,6 @@ int smu7_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
 	return 0;
 }
 
-int smu7_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg)
-{
-	cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, msg);
-
-	return 0;
-}
-
 int smu7_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter)
 {
 	PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
@@ -207,25 +200,14 @@ int smu7_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, ui
 	return smu7_send_msg_to_smc(hwmgr, msg);
 }
 
-int smu7_send_msg_to_smc_with_parameter_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter)
+uint32_t smu7_get_argument(struct pp_hwmgr *hwmgr)
 {
-	cgs_write_register(hwmgr->device, mmSMC_MSG_ARG_0, parameter);
-
-	return smu7_send_msg_to_smc_without_waiting(hwmgr, msg);
+	return cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
 }
 
 int smu7_send_msg_to_smc_offset(struct pp_hwmgr *hwmgr)
 {
-	cgs_write_register(hwmgr->device, mmSMC_MSG_ARG_0, 0x20000);
-
-	cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, PPSMC_MSG_Test);
-
-	PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
-
-	if (1 != PHM_READ_FIELD(hwmgr->device, SMC_RESP_0, SMC_RESP))
-		pr_info("Failed to send Message.\n");
-
-	return 0;
+	return smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_Test, 0x20000, NULL);
 }
 
 enum cgs_ucode_id smu7_convert_fw_type_to_cgs(uint32_t fw_type)
@@ -353,12 +335,14 @@ int smu7_request_smu_load_fw(struct pp_hwmgr *hwmgr)
 
 	if (hwmgr->chip_id > CHIP_TOPAZ) { /* add support for Topaz */
 		if (hwmgr->not_vf) {
-			smu7_send_msg_to_smc_with_parameter(hwmgr,
+			smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SMU_DRAM_ADDR_HI,
-						upper_32_bits(smu_data->smu_buffer.mc_addr));
-			smu7_send_msg_to_smc_with_parameter(hwmgr,
+						upper_32_bits(smu_data->smu_buffer.mc_addr),
+						NULL);
+			smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SMU_DRAM_ADDR_LO,
-						lower_32_bits(smu_data->smu_buffer.mc_addr));
+						lower_32_bits(smu_data->smu_buffer.mc_addr),
+						NULL);
 		}
 		fw_to_load = UCODE_ID_RLC_G_MASK
 			   + UCODE_ID_SDMA0_MASK
@@ -423,10 +407,16 @@ int smu7_request_smu_load_fw(struct pp_hwmgr *hwmgr)
 	}
 	memcpy_toio(smu_data->header_buffer.kaddr, smu_data->toc,
 		    sizeof(struct SMU_DRAMData_TOC));
-	smu7_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DRV_DRAM_ADDR_HI, upper_32_bits(smu_data->header_buffer.mc_addr));
-	smu7_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DRV_DRAM_ADDR_LO, lower_32_bits(smu_data->header_buffer.mc_addr));
-
-	smu7_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_LoadUcodes, fw_to_load);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_DRV_DRAM_ADDR_HI,
+			upper_32_bits(smu_data->header_buffer.mc_addr),
+			NULL);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_DRV_DRAM_ADDR_LO,
+			lower_32_bits(smu_data->header_buffer.mc_addr),
+			NULL);
+
+	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_LoadUcodes, fw_to_load, NULL);
 
 	r = smu7_check_fw_load_finish(hwmgr, fw_to_load);
 	if (!r)
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h
index 01f0538fba6b..e7303dc8c260 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h
@@ -60,11 +60,9 @@ int smu7_copy_bytes_to_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address,
 int smu7_program_jump_on_start(struct pp_hwmgr *hwmgr);
 bool smu7_is_smc_ram_running(struct pp_hwmgr *hwmgr);
 int smu7_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg);
-int smu7_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg);
 int smu7_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg,
 						uint32_t parameter);
-int smu7_send_msg_to_smc_with_parameter_without_waiting(struct pp_hwmgr *hwmgr,
-						uint16_t msg, uint32_t parameter);
+uint32_t smu7_get_argument(struct pp_hwmgr *hwmgr);
 int smu7_send_msg_to_smc_offset(struct pp_hwmgr *hwmgr);
 
 enum cgs_ucode_id smu7_convert_fw_type_to_cgs(uint32_t fw_type);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
index 7dca04a89217..76d4f12ceedf 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
@@ -610,18 +610,21 @@ static int smu8_download_pptable_settings(struct pp_hwmgr *hwmgr, void **table)
 
 	*table = (struct SMU8_Fusion_ClkTable *)smu8_smu->scratch_buffer[i].kaddr;
 
-	smu8_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetClkTableAddrHi,
-				upper_32_bits(smu8_smu->scratch_buffer[i].mc_addr));
+				upper_32_bits(smu8_smu->scratch_buffer[i].mc_addr),
+				NULL);
 
-	smu8_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetClkTableAddrLo,
-				lower_32_bits(smu8_smu->scratch_buffer[i].mc_addr));
+				lower_32_bits(smu8_smu->scratch_buffer[i].mc_addr),
+				NULL);
 
-	smu8_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob,
-				smu8_smu->toc_entry_clock_table);
+	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob,
+				smu8_smu->toc_entry_clock_table,
+				NULL);
 
-	smu8_send_msg_to_smc(hwmgr, PPSMC_MSG_ClkTableXferToDram);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ClkTableXferToDram, NULL);
 
 	return 0;
 }
@@ -637,18 +640,21 @@ static int smu8_upload_pptable_settings(struct pp_hwmgr *hwmgr)
 			break;
 	}
 
-	smu8_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetClkTableAddrHi,
-				upper_32_bits(smu8_smu->scratch_buffer[i].mc_addr));
+				upper_32_bits(smu8_smu->scratch_buffer[i].mc_addr),
+				NULL);
 
-	smu8_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetClkTableAddrLo,
-				lower_32_bits(smu8_smu->scratch_buffer[i].mc_addr));
+				lower_32_bits(smu8_smu->scratch_buffer[i].mc_addr),
+				NULL);
 
-	smu8_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob,
-				smu8_smu->toc_entry_clock_table);
+	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob,
+				smu8_smu->toc_entry_clock_table,
+				NULL);
 
-	smu8_send_msg_to_smc(hwmgr, PPSMC_MSG_ClkTableXferToSmu);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ClkTableXferToSmu, NULL);
 
 	return 0;
 }
@@ -671,25 +677,30 @@ static int smu8_request_smu_load_fw(struct pp_hwmgr *hwmgr)
 
 	smu8_write_smc_sram_dword(hwmgr, smc_address, 0, smc_address+4);
 
-	smu8_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DriverDramAddrHi,
-					upper_32_bits(smu8_smu->toc_buffer.mc_addr));
+					upper_32_bits(smu8_smu->toc_buffer.mc_addr),
+					NULL);
 
-	smu8_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_DriverDramAddrLo,
-					lower_32_bits(smu8_smu->toc_buffer.mc_addr));
+					lower_32_bits(smu8_smu->toc_buffer.mc_addr),
+					NULL);
 
-	smu8_send_msg_to_smc(hwmgr, PPSMC_MSG_InitJobs);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_InitJobs, NULL);
 
-	smu8_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_ExecuteJob,
-					smu8_smu->toc_entry_aram);
-	smu8_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob,
-				smu8_smu->toc_entry_power_profiling_index);
+					smu8_smu->toc_entry_aram,
+					NULL);
+	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ExecuteJob,
+				smu8_smu->toc_entry_power_profiling_index,
+				NULL);
 
-	smu8_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_ExecuteJob,
-					smu8_smu->toc_entry_initialize_index);
+					smu8_smu->toc_entry_initialize_index,
+					NULL);
 
 	fw_to_check = UCODE_ID_RLC_G_MASK |
 			UCODE_ID_SDMA0_MASK |
@@ -860,11 +871,13 @@ static bool smu8_dpm_check_smu_features(struct pp_hwmgr *hwmgr,
 				unsigned long check_feature)
 {
 	int result;
-	unsigned long features;
+	uint32_t features;
 
-	result = smu8_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetFeatureStatus, 0);
+	result = smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_GetFeatureStatus,
+				0,
+				&features);
 	if (result == 0) {
-		features = smum_get_argument(hwmgr);
 		if (features & check_feature)
 			return true;
 	}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
index 4240aeec9000..b6fb48066841 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -103,14 +103,6 @@ int smum_process_firmware_header(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
-uint32_t smum_get_argument(struct pp_hwmgr *hwmgr)
-{
-	if (NULL != hwmgr->smumgr_funcs->get_argument)
-		return hwmgr->smumgr_funcs->get_argument(hwmgr);
-
-	return 0;
-}
-
 uint32_t smum_get_mac_definition(struct pp_hwmgr *hwmgr, uint32_t value)
 {
 	if (NULL != hwmgr->smumgr_funcs->get_mac_definition)
@@ -135,22 +127,58 @@ int smum_upload_powerplay_table(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
-int smum_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
+int smum_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t *resp)
 {
-	if (hwmgr == NULL || hwmgr->smumgr_funcs->send_msg_to_smc == NULL)
+	int ret = 0;
+
+	if (hwmgr == NULL ||
+	    hwmgr->smumgr_funcs->send_msg_to_smc == NULL ||
+	    (resp && !hwmgr->smumgr_funcs->get_argument))
 		return -EINVAL;
 
-	return hwmgr->smumgr_funcs->send_msg_to_smc(hwmgr, msg);
+	mutex_lock(&hwmgr->msg_lock);
+
+	ret = hwmgr->smumgr_funcs->send_msg_to_smc(hwmgr, msg);
+	if (ret) {
+		mutex_unlock(&hwmgr->msg_lock);
+		return ret;
+	}
+
+	if (resp)
+		*resp = hwmgr->smumgr_funcs->get_argument(hwmgr);
+
+	mutex_unlock(&hwmgr->msg_lock);
+
+	return ret;
 }
 
 int smum_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
-					uint16_t msg, uint32_t parameter)
+					uint16_t msg,
+					uint32_t parameter,
+					uint32_t *resp)
 {
+	int ret = 0;
+
 	if (hwmgr == NULL ||
-		hwmgr->smumgr_funcs->send_msg_to_smc_with_parameter == NULL)
+	    hwmgr->smumgr_funcs->send_msg_to_smc_with_parameter == NULL ||
+	    (resp && !hwmgr->smumgr_funcs->get_argument))
 		return -EINVAL;
-	return hwmgr->smumgr_funcs->send_msg_to_smc_with_parameter(
+
+	mutex_lock(&hwmgr->msg_lock);
+
+	ret = hwmgr->smumgr_funcs->send_msg_to_smc_with_parameter(
 						hwmgr, msg, parameter);
+	if (ret) {
+		mutex_unlock(&hwmgr->msg_lock);
+		return ret;
+	}
+
+	if (resp)
+		*resp = hwmgr->smumgr_funcs->get_argument(hwmgr);
+
+	mutex_unlock(&hwmgr->msg_lock);
+
+	return ret;
 }
 
 int smum_init_smc_table(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index f19bac7ef7ba..398e7e3587de 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -2702,7 +2702,8 @@ static int tonga_update_uvd_smc_table(struct pp_hwmgr *hwmgr)
 			PHM_PlatformCaps_StablePState))
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_UVDDPM_SetEnabledMask,
-				(uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel));
+				(uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel),
+				NULL);
 	return 0;
 }
 
@@ -2733,7 +2734,8 @@ static int tonga_update_vce_smc_table(struct pp_hwmgr *hwmgr)
 					PHM_PlatformCaps_StablePState))
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_VCEDPM_SetEnabledMask,
-				(uint32_t)1 << smu_data->smc_state_table.VceBootLevel);
+				(uint32_t)1 << smu_data->smc_state_table.VceBootLevel,
+				NULL);
 	return 0;
 }
 
@@ -3168,7 +3170,7 @@ static int tonga_update_dpm_settings(struct pp_hwmgr *hwmgr,
 
 	if (setting->bupdate_sclk) {
 		if (!data->sclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_FreezeLevel, NULL);
 		for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) {
 			if (levels[i].ActivityLevel !=
 				cpu_to_be16(setting->sclk_activity)) {
@@ -3198,12 +3200,12 @@ static int tonga_update_dpm_settings(struct pp_hwmgr *hwmgr,
 			}
 		}
 		if (!data->sclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SCLKDPM_UnfreezeLevel, NULL);
 	}
 
 	if (setting->bupdate_mclk) {
 		if (!data->mclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_FreezeLevel, NULL);
 		for (i = 0; i < smu_data->smc_state_table.MemoryDpmLevelCount; i++) {
 			if (mclk_levels[i].ActivityLevel !=
 				cpu_to_be16(setting->mclk_activity)) {
@@ -3233,7 +3235,7 @@ static int tonga_update_dpm_settings(struct pp_hwmgr *hwmgr,
 			}
 		}
 		if (!data->mclk_dpm_key_disabled)
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_UnfreezeLevel);
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_UnfreezeLevel, NULL);
 	}
 	return 0;
 }
@@ -3248,6 +3250,7 @@ const struct pp_smumgr_func tonga_smu_funcs = {
 	.request_smu_load_specific_fw = NULL,
 	.send_msg_to_smc = &smu7_send_msg_to_smc,
 	.send_msg_to_smc_with_parameter = &smu7_send_msg_to_smc_with_parameter,
+	.get_argument = smu7_get_argument,
 	.download_pptable_settings = NULL,
 	.upload_pptable_settings = NULL,
 	.update_smc_table = tonga_update_smc_table,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 715564009089..1e222c5d91a4 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -47,15 +47,18 @@ static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
 			"Invalid SMU Table version!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].size != 0,
 			"Invalid SMU Table Length!", return -EINVAL);
-	smu9_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr));
-	smu9_send_msg_to_smc_with_parameter(hwmgr,
+			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr));
-	smu9_send_msg_to_smc_with_parameter(hwmgr,
+			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_TransferTableSmu2Dram,
-			priv->smu_tables.entry[table_id].table_id);
+			priv->smu_tables.entry[table_id].table_id,
+			NULL);
 
 	/* flush hdp cache */
 	amdgpu_asic_flush_hdp(adev, NULL);
@@ -90,15 +93,18 @@ static int vega10_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 
 	amdgpu_asic_flush_hdp(adev, NULL);
 
-	smu9_send_msg_to_smc_with_parameter(hwmgr,
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr));
-	smu9_send_msg_to_smc_with_parameter(hwmgr,
+			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr));
-	smu9_send_msg_to_smc_with_parameter(hwmgr,
+			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL);
+	smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_TransferTableDram2Smu,
-			priv->smu_tables.entry[table_id].table_id);
+			priv->smu_tables.entry[table_id].table_id,
+			NULL);
 
 	return 0;
 }
@@ -118,17 +124,21 @@ int vega10_enable_smc_features(struct pp_hwmgr *hwmgr,
 		return 0;
 
 	return smum_send_msg_to_smc_with_parameter(hwmgr,
-			msg, feature_mask);
+			msg, feature_mask, NULL);
 }
 
 int vega10_get_enabled_smc_features(struct pp_hwmgr *hwmgr,
 			    uint64_t *features_enabled)
 {
+	uint32_t enabled_features;
+
 	if (features_enabled == NULL)
 		return -EINVAL;
 
-	smu9_send_msg_to_smc(hwmgr, PPSMC_MSG_GetEnabledSmuFeatures);
-	*features_enabled = smu9_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr,
+			PPSMC_MSG_GetEnabledSmuFeatures,
+			&enabled_features);
+	*features_enabled = enabled_features;
 
 	return 0;
 }
@@ -150,12 +160,14 @@ static int vega10_set_tools_address(struct pp_hwmgr *hwmgr)
 	struct vega10_smumgr *priv = hwmgr->smu_backend;
 
 	if (priv->smu_tables.entry[TOOLSTABLE].mc_addr) {
-		smu9_send_msg_to_smc_with_parameter(hwmgr,
+		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetToolsDramAddrHigh,
-				upper_32_bits(priv->smu_tables.entry[TOOLSTABLE].mc_addr));
-		smu9_send_msg_to_smc_with_parameter(hwmgr,
+				upper_32_bits(priv->smu_tables.entry[TOOLSTABLE].mc_addr),
+				NULL);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetToolsDramAddrLow,
-				lower_32_bits(priv->smu_tables.entry[TOOLSTABLE].mc_addr));
+				lower_32_bits(priv->smu_tables.entry[TOOLSTABLE].mc_addr),
+				NULL);
 	}
 	return 0;
 }
@@ -167,11 +179,11 @@ static int vega10_verify_smc_interface(struct pp_hwmgr *hwmgr)
 	uint32_t dev_id;
 	uint32_t rev_id;
 
-	PP_ASSERT_WITH_CODE(!smu9_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_GetDriverIfVersion),
+	PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr,
+			PPSMC_MSG_GetDriverIfVersion,
+			&smc_driver_if_version),
 			"Attempt to get SMC IF Version Number Failed!",
 			return -EINVAL);
-	smc_driver_if_version = smu9_get_argument(hwmgr);
 
 	dev_id = adev->pdev->device;
 	rev_id = adev->pdev->revision;
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index 275dbf65f1a0..f54df76537e4 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -50,18 +50,21 @@ static int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr,
 			"Invalid SMU Table version!", return -EINVAL);
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].size != 0,
 			"Invalid SMU Table Length!", return -EINVAL);
-	PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr)) == 0,
+			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL) == 0,
 			"[CopyTableFromSMC] Attempt to Set Dram Addr High Failed!", return -EINVAL);
-	PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr)) == 0,
+			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL) == 0,
 			"[CopyTableFromSMC] Attempt to Set Dram Addr Low Failed!",
 			return -EINVAL);
-	PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_TransferTableSmu2Dram,
-			table_id) == 0,
+			table_id,
+			NULL) == 0,
 			"[CopyTableFromSMC] Attempt to Transfer Table From SMU Failed!",
 			return -EINVAL);
 
@@ -98,19 +101,22 @@ static int vega12_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 
 	amdgpu_asic_flush_hdp(adev, NULL);
 
-	PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr)) == 0,
+			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL) == 0,
 			"[CopyTableToSMC] Attempt to Set Dram Addr High Failed!",
 			return -EINVAL;);
-	PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr)) == 0,
+			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL) == 0,
 			"[CopyTableToSMC] Attempt to Set Dram Addr Low Failed!",
 			return -EINVAL);
-	PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_TransferTableDram2Smu,
-			table_id) == 0,
+			table_id,
+			NULL) == 0,
 			"[CopyTableToSMC] Attempt to Transfer Table To SMU Failed!",
 			return -EINVAL);
 
@@ -126,21 +132,21 @@ int vega12_enable_smc_features(struct pp_hwmgr *hwmgr,
 	smu_features_high = (uint32_t)((feature_mask & SMU_FEATURES_HIGH_MASK) >> SMU_FEATURES_HIGH_SHIFT);
 
 	if (enable) {
-		PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_EnableSmuFeaturesLow, smu_features_low) == 0,
+		PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_EnableSmuFeaturesLow, smu_features_low, NULL) == 0,
 				"[EnableDisableSMCFeatures] Attempt to enable SMU features Low failed!",
 				return -EINVAL);
-		PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_EnableSmuFeaturesHigh, smu_features_high) == 0,
+		PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_EnableSmuFeaturesHigh, smu_features_high, NULL) == 0,
 				"[EnableDisableSMCFeatures] Attempt to enable SMU features High failed!",
 				return -EINVAL);
 	} else {
-		PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_DisableSmuFeaturesLow, smu_features_low) == 0,
+		PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_DisableSmuFeaturesLow, smu_features_low, NULL) == 0,
 				"[EnableDisableSMCFeatures] Attempt to disable SMU features Low failed!",
 				return -EINVAL);
-		PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_DisableSmuFeaturesHigh, smu_features_high) == 0,
+		PP_ASSERT_WITH_CODE(smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_DisableSmuFeaturesHigh, smu_features_high, NULL) == 0,
 				"[EnableDisableSMCFeatures] Attempt to disable SMU features High failed!",
 				return -EINVAL);
 	}
@@ -156,17 +162,17 @@ int vega12_get_enabled_smc_features(struct pp_hwmgr *hwmgr,
 	if (features_enabled == NULL)
 		return -EINVAL;
 
-	PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_GetEnabledSmuFeaturesLow) == 0,
+	PP_ASSERT_WITH_CODE(smum_send_msg_to_smc(hwmgr,
+			PPSMC_MSG_GetEnabledSmuFeaturesLow,
+			&smc_features_low) == 0,
 			"[GetEnabledSMCFeatures] Attempt to get SMU features Low failed!",
 			return -EINVAL);
-	smc_features_low = smu9_get_argument(hwmgr);
 
-	PP_ASSERT_WITH_CODE(smu9_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_GetEnabledSmuFeaturesHigh) == 0,
+	PP_ASSERT_WITH_CODE(smum_send_msg_to_smc(hwmgr,
+			PPSMC_MSG_GetEnabledSmuFeaturesHigh,
+			&smc_features_high) == 0,
 			"[GetEnabledSMCFeatures] Attempt to get SMU features High failed!",
 			return -EINVAL);
-	smc_features_high = smu9_get_argument(hwmgr);
 
 	*features_enabled = ((((uint64_t)smc_features_low << SMU_FEATURES_LOW_SHIFT) & SMU_FEATURES_LOW_MASK) |
 			(((uint64_t)smc_features_high << SMU_FEATURES_HIGH_SHIFT) & SMU_FEATURES_HIGH_MASK));
@@ -192,12 +198,14 @@ static int vega12_set_tools_address(struct pp_hwmgr *hwmgr)
 			(struct vega12_smumgr *)(hwmgr->smu_backend);
 
 	if (priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr) {
-		if (!smu9_send_msg_to_smc_with_parameter(hwmgr,
+		if (!smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetToolsDramAddrHigh,
-				upper_32_bits(priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr)))
-			smu9_send_msg_to_smc_with_parameter(hwmgr,
+				upper_32_bits(priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr),
+				NULL))
+			smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetToolsDramAddrLow,
-					lower_32_bits(priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr));
+					lower_32_bits(priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr),
+					NULL);
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
index 16aa171971d3..2fb97554134f 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
@@ -175,18 +175,20 @@ static int vega20_copy_table_from_smc(struct pp_hwmgr *hwmgr,
 	PP_ASSERT_WITH_CODE(priv->smu_tables.entry[table_id].size != 0,
 			"Invalid SMU Table Length!", return -EINVAL);
 
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr))) == 0,
+			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL)) == 0,
 			"[CopyTableFromSMC] Attempt to Set Dram Addr High Failed!",
 			return ret);
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr))) == 0,
+			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL)) == 0,
 			"[CopyTableFromSMC] Attempt to Set Dram Addr Low Failed!",
 			return ret);
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_TransferTableSmu2Dram, table_id)) == 0,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_TransferTableSmu2Dram, table_id, NULL)) == 0,
 			"[CopyTableFromSMC] Attempt to Transfer Table From SMU Failed!",
 			return ret);
 
@@ -224,18 +226,20 @@ static int vega20_copy_table_to_smc(struct pp_hwmgr *hwmgr,
 
 	amdgpu_asic_flush_hdp(adev, NULL);
 
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr))) == 0,
+			upper_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL)) == 0,
 			"[CopyTableToSMC] Attempt to Set Dram Addr High Failed!",
 			return ret);
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr))) == 0,
+			lower_32_bits(priv->smu_tables.entry[table_id].mc_addr),
+			NULL)) == 0,
 			"[CopyTableToSMC] Attempt to Set Dram Addr Low Failed!",
 			return ret);
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_TransferTableDram2Smu, table_id)) == 0,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_TransferTableDram2Smu, table_id, NULL)) == 0,
 			"[CopyTableToSMC] Attempt to Transfer Table To SMU Failed!",
 			return ret);
 
@@ -255,18 +259,22 @@ int vega20_set_activity_monitor_coeff(struct pp_hwmgr *hwmgr,
 
 	amdgpu_asic_flush_hdp(adev, NULL);
 
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].mc_addr))) == 0,
+			upper_32_bits(priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].mc_addr),
+			NULL)) == 0,
 			"[SetActivityMonitor] Attempt to Set Dram Addr High Failed!",
 			return ret);
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].mc_addr))) == 0,
+			lower_32_bits(priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].mc_addr),
+			NULL)) == 0,
 			"[SetActivityMonitor] Attempt to Set Dram Addr Low Failed!",
 			return ret);
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_TransferTableDram2Smu, TABLE_ACTIVITY_MONITOR_COEFF | (workload_type << 16))) == 0,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_TransferTableDram2Smu,
+			TABLE_ACTIVITY_MONITOR_COEFF | (workload_type << 16),
+			NULL)) == 0,
 			"[SetActivityMonitor] Attempt to Transfer Table To SMU Failed!",
 			return ret);
 
@@ -281,19 +289,21 @@ int vega20_get_activity_monitor_coeff(struct pp_hwmgr *hwmgr,
 	struct amdgpu_device *adev = hwmgr->adev;
 	int ret = 0;
 
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].mc_addr))) == 0,
+			upper_32_bits(priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].mc_addr),
+			NULL)) == 0,
 			"[GetActivityMonitor] Attempt to Set Dram Addr High Failed!",
 			return ret);
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].mc_addr))) == 0,
+			lower_32_bits(priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].mc_addr),
+			NULL)) == 0,
 			"[GetActivityMonitor] Attempt to Set Dram Addr Low Failed!",
 			return ret);
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_TransferTableSmu2Dram,
-			TABLE_ACTIVITY_MONITOR_COEFF | (workload_type << 16))) == 0,
+			TABLE_ACTIVITY_MONITOR_COEFF | (workload_type << 16), NULL)) == 0,
 			"[GetActivityMonitor] Attempt to Transfer Table From SMU Failed!",
 			return ret);
 
@@ -316,21 +326,21 @@ int vega20_enable_smc_features(struct pp_hwmgr *hwmgr,
 	smu_features_high = (uint32_t)((feature_mask & SMU_FEATURES_HIGH_MASK) >> SMU_FEATURES_HIGH_SHIFT);
 
 	if (enable) {
-		PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_EnableSmuFeaturesLow, smu_features_low)) == 0,
+		PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_EnableSmuFeaturesLow, smu_features_low, NULL)) == 0,
 				"[EnableDisableSMCFeatures] Attempt to enable SMU features Low failed!",
 				return ret);
-		PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_EnableSmuFeaturesHigh, smu_features_high)) == 0,
+		PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_EnableSmuFeaturesHigh, smu_features_high, NULL)) == 0,
 				"[EnableDisableSMCFeatures] Attempt to enable SMU features High failed!",
 				return ret);
 	} else {
-		PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_DisableSmuFeaturesLow, smu_features_low)) == 0,
+		PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_DisableSmuFeaturesLow, smu_features_low, NULL)) == 0,
 				"[EnableDisableSMCFeatures] Attempt to disable SMU features Low failed!",
 				return ret);
-		PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_DisableSmuFeaturesHigh, smu_features_high)) == 0,
+		PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_DisableSmuFeaturesHigh, smu_features_high, NULL)) == 0,
 				"[EnableDisableSMCFeatures] Attempt to disable SMU features High failed!",
 				return ret);
 	}
@@ -347,16 +357,16 @@ int vega20_get_enabled_smc_features(struct pp_hwmgr *hwmgr,
 	if (features_enabled == NULL)
 		return -EINVAL;
 
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_GetEnabledSmuFeaturesLow)) == 0,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr,
+			PPSMC_MSG_GetEnabledSmuFeaturesLow,
+			&smc_features_low)) == 0,
 			"[GetEnabledSMCFeatures] Attempt to get SMU features Low failed!",
 			return ret);
-	smc_features_low = vega20_get_argument(hwmgr);
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_GetEnabledSmuFeaturesHigh)) == 0,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr,
+			PPSMC_MSG_GetEnabledSmuFeaturesHigh,
+			&smc_features_high)) == 0,
 			"[GetEnabledSMCFeatures] Attempt to get SMU features High failed!",
 			return ret);
-	smc_features_high = vega20_get_argument(hwmgr);
 
 	*features_enabled = ((((uint64_t)smc_features_low << SMU_FEATURES_LOW_SHIFT) & SMU_FEATURES_LOW_MASK) |
 			(((uint64_t)smc_features_high << SMU_FEATURES_HIGH_SHIFT) & SMU_FEATURES_HIGH_MASK));
@@ -371,13 +381,15 @@ static int vega20_set_tools_address(struct pp_hwmgr *hwmgr)
 	int ret = 0;
 
 	if (priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr) {
-		ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+		ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetToolsDramAddrHigh,
-				upper_32_bits(priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr));
+				upper_32_bits(priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr),
+				NULL);
 		if (!ret)
-			ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+			ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetToolsDramAddrLow,
-					lower_32_bits(priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr));
+					lower_32_bits(priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr),
+					NULL);
 	}
 
 	return ret;
@@ -389,14 +401,16 @@ int vega20_set_pptable_driver_address(struct pp_hwmgr *hwmgr)
 			(struct vega20_smumgr *)(hwmgr->smu_backend);
 	int ret = 0;
 
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrHigh,
-			upper_32_bits(priv->smu_tables.entry[TABLE_PPTABLE].mc_addr))) == 0,
+			upper_32_bits(priv->smu_tables.entry[TABLE_PPTABLE].mc_addr),
+			NULL)) == 0,
 			"[SetPPtabeDriverAddress] Attempt to Set Dram Addr High Failed!",
 			return ret);
-	PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr,
+	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
 			PPSMC_MSG_SetDriverDramAddrLow,
-			lower_32_bits(priv->smu_tables.entry[TABLE_PPTABLE].mc_addr))) == 0,
+			lower_32_bits(priv->smu_tables.entry[TABLE_PPTABLE].mc_addr),
+			NULL)) == 0,
 			"[SetPPtabeDriverAddress] Attempt to Set Dram Addr Low Failed!",
 			return ret);
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
index b0e0d67cd54b..3da71a088b92 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
@@ -356,7 +356,8 @@ static int vegam_update_uvd_smc_table(struct pp_hwmgr *hwmgr)
 			PHM_PlatformCaps_StablePState))
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_UVDDPM_SetEnabledMask,
-				(uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel));
+				(uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel),
+				NULL);
 	return 0;
 }
 
@@ -388,7 +389,8 @@ static int vegam_update_vce_smc_table(struct pp_hwmgr *hwmgr)
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_VCEDPM_SetEnabledMask,
-				(uint32_t)1 << smu_data->smc_state_table.VceBootLevel);
+				(uint32_t)1 << smu_data->smc_state_table.VceBootLevel,
+				NULL);
 	return 0;
 }
 
@@ -1906,7 +1908,8 @@ static int vegam_enable_reconfig_cus(struct pp_hwmgr *hwmgr)
 
 	smum_send_msg_to_smc_with_parameter(hwmgr,
 					    PPSMC_MSG_EnableModeSwitchRLCNotification,
-					    adev->gfx.cu_info.number);
+					    adev->gfx.cu_info.number,
+					    NULL);
 
 	return 0;
 }
@@ -2060,7 +2063,7 @@ static int vegam_init_smc_table(struct pp_hwmgr *hwmgr)
 		table->AcDcGpio = gpio_pin.uc_gpio_pin_bit_shift;
 		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_AutomaticDCTransition) &&
-				!smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UseNewGPIOScheme))
+				!smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UseNewGPIOScheme, NULL))
 			phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_SMCtoPPLIBAcdcGpioScheme);
 	} else {
@@ -2250,10 +2253,12 @@ int vegam_thermal_avfs_enable(struct pp_hwmgr *hwmgr)
 	if (!hwmgr->avfs_supported)
 		return 0;
 
-	ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs);
+	ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs, NULL);
 	if (!ret) {
 		if (data->apply_avfs_cks_off_voltage)
-			ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ApplyAvfsCksOffVoltage);
+			ret = smum_send_msg_to_smc(hwmgr,
+					PPSMC_MSG_ApplyAvfsCksOffVoltage,
+					NULL);
 	}
 
 	return ret;
@@ -2279,6 +2284,7 @@ const struct pp_smumgr_func vegam_smu_funcs = {
 	.request_smu_load_specific_fw = NULL,
 	.send_msg_to_smc = smu7_send_msg_to_smc,
 	.send_msg_to_smc_with_parameter = smu7_send_msg_to_smc_with_parameter,
+	.get_argument = smu7_get_argument,
 	.process_firmware_header = vegam_process_firmware_header,
 	.is_dpm_running = vegam_is_dpm_running,
 	.get_mac_definition = vegam_get_mac_definition,
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 3f1044326dcb..61923530b2e4 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -1796,7 +1796,7 @@ static int vega20_get_power_profile_mode(struct smu_context *smu, char *buf)
 			"PD_Data_error_rate_coeff"};
 	int result = 0;
 
-	if (!smu->pm_enabled || !buf)
+	if (!buf)
 		return -EINVAL;
 
 	size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n",
@@ -1887,8 +1887,6 @@ static int vega20_set_power_profile_mode(struct smu_context *smu, long *input, u
 
 	smu->power_profile_mode = input[size];
 
-	if (!smu->pm_enabled)
-		return ret;
 	if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
 		pr_err("Invalid power profile mode %d\n", smu->power_profile_mode);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/arc/arcpgu_drv.c b/drivers/gpu/drm/arc/arcpgu_drv.c
index d6a6692db0ac..c05d001163e0 100644
--- a/drivers/gpu/drm/arc/arcpgu_drv.c
+++ b/drivers/gpu/drm/arc/arcpgu_drv.c
@@ -137,10 +137,11 @@ static struct drm_info_list arcpgu_debugfs_list[] = {
 	{ "clocks", arcpgu_show_pxlclock, 0 },
 };
 
-static int arcpgu_debugfs_init(struct drm_minor *minor)
+static void arcpgu_debugfs_init(struct drm_minor *minor)
 {
-	return drm_debugfs_create_files(arcpgu_debugfs_list,
-		ARRAY_SIZE(arcpgu_debugfs_list), minor->debugfs_root, minor);
+	drm_debugfs_create_files(arcpgu_debugfs_list,
+				 ARRAY_SIZE(arcpgu_debugfs_list),
+				 minor->debugfs_root, minor);
 }
 #endif
 
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
index 442d4656150a..6b85d5f4caa8 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
@@ -14,6 +14,7 @@
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_irq.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
 
@@ -260,17 +261,16 @@ static void komeda_kms_mode_config_init(struct komeda_kms_dev *kms,
 
 struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev)
 {
-	struct komeda_kms_dev *kms = kzalloc(sizeof(*kms), GFP_KERNEL);
+	struct komeda_kms_dev *kms;
 	struct drm_device *drm;
 	int err;
 
-	if (!kms)
-		return ERR_PTR(-ENOMEM);
+	kms = devm_drm_dev_alloc(mdev->dev, &komeda_kms_driver,
+				 struct komeda_kms_dev, base);
+	if (IS_ERR(kms))
+		return kms;
 
 	drm = &kms->base;
-	err = drm_dev_init(drm, &komeda_kms_driver, mdev->dev);
-	if (err)
-		goto free_kms;
 
 	drm->dev_private = mdev;
 
@@ -327,9 +327,6 @@ cleanup_mode_config:
 	drm_mode_config_cleanup(drm);
 	komeda_kms_cleanup_private_objs(kms);
 	drm->dev_private = NULL;
-	drm_dev_put(drm);
-free_kms:
-	kfree(kms);
 	return ERR_PTR(err);
 }
 
@@ -346,5 +343,4 @@ void komeda_kms_detach(struct komeda_kms_dev *kms)
 	drm_mode_config_cleanup(drm);
 	komeda_kms_cleanup_private_objs(kms);
 	drm->dev_private = NULL;
-	drm_dev_put(drm);
 }
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index 2e053815b54a..194419f47c5e 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -224,10 +224,11 @@ static struct drm_info_list hdlcd_debugfs_list[] = {
 	{ "clocks", hdlcd_show_pxlclock, 0 },
 };
 
-static int hdlcd_debugfs_init(struct drm_minor *minor)
+static void hdlcd_debugfs_init(struct drm_minor *minor)
 {
-	return drm_debugfs_create_files(hdlcd_debugfs_list,
-		ARRAY_SIZE(hdlcd_debugfs_list),	minor->debugfs_root, minor);
+	drm_debugfs_create_files(hdlcd_debugfs_list,
+				 ARRAY_SIZE(hdlcd_debugfs_list),
+				 minor->debugfs_root, minor);
 }
 #endif
 
diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index 37d92a06318e..def8c9ffafca 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -548,7 +548,7 @@ static const struct file_operations malidp_debugfs_fops = {
 	.release = single_release,
 };
 
-static int malidp_debugfs_init(struct drm_minor *minor)
+static void malidp_debugfs_init(struct drm_minor *minor)
 {
 	struct malidp_drm *malidp = minor->dev->dev_private;
 
@@ -557,7 +557,6 @@ static int malidp_debugfs_init(struct drm_minor *minor)
 	spin_lock_init(&malidp->errors_lock);
 	debugfs_create_file("debug", S_IRUGO | S_IWUSR, minor->debugfs_root,
 			    minor->dev, &malidp_debugfs_fops);
-	return 0;
 }
 
 #endif //CONFIG_DEBUG_FS
diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
index 197dca3fc84c..5fc25c3f445c 100644
--- a/drivers/gpu/drm/armada/armada_drv.c
+++ b/drivers/gpu/drm/armada/armada_drv.c
@@ -12,6 +12,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_prime.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_fb_helper.h>
@@ -103,6 +104,7 @@ static int armada_drm_bind(struct device *dev)
 		kfree(priv);
 		return ret;
 	}
+	drmm_add_final_kfree(&priv->drm, priv);
 
 	/* Remove early framebuffers */
 	ret = drm_fb_helper_remove_conflicting_framebuffers(NULL,
@@ -311,7 +313,7 @@ static void __exit armada_drm_exit(void)
 }
 module_exit(armada_drm_exit);
 
-MODULE_AUTHOR("Russell King <rmk+kernel@arm.linux.org.uk>");
+MODULE_AUTHOR("Russell King <rmk+kernel@armlinux.org.uk>");
 MODULE_DESCRIPTION("Armada DRM Driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:armada-drm");
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx.h b/drivers/gpu/drm/aspeed/aspeed_gfx.h
index a10358bb61ec..e7ca95827ae8 100644
--- a/drivers/gpu/drm/aspeed/aspeed_gfx.h
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx.h
@@ -5,6 +5,7 @@
 #include <drm/drm_simple_kms_helper.h>
 
 struct aspeed_gfx {
+	struct drm_device		drm;
 	void __iomem			*base;
 	struct clk			*clk;
 	struct reset_control		*rst;
@@ -12,8 +13,8 @@ struct aspeed_gfx {
 
 	struct drm_simple_display_pipe	pipe;
 	struct drm_connector		connector;
-	struct drm_fbdev_cma		*fbdev;
 };
+#define to_aspeed_gfx(x) container_of(x, struct aspeed_gfx, drm)
 
 int aspeed_gfx_create_pipe(struct drm_device *drm);
 int aspeed_gfx_create_output(struct drm_device *drm);
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c b/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c
index 2184b8be6fd4..e54686c31a90 100644
--- a/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c
@@ -231,7 +231,7 @@ static const uint32_t aspeed_gfx_formats[] = {
 
 int aspeed_gfx_create_pipe(struct drm_device *drm)
 {
-	struct aspeed_gfx *priv = drm->dev_private;
+	struct aspeed_gfx *priv = to_aspeed_gfx(drm);
 
 	return drm_simple_display_pipe_init(drm, &priv->pipe, &aspeed_gfx_funcs,
 					    aspeed_gfx_formats,
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
index ada2f6aca906..6b27242b9ee3 100644
--- a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
@@ -77,7 +77,7 @@ static void aspeed_gfx_setup_mode_config(struct drm_device *drm)
 static irqreturn_t aspeed_gfx_irq_handler(int irq, void *data)
 {
 	struct drm_device *drm = data;
-	struct aspeed_gfx *priv = drm->dev_private;
+	struct aspeed_gfx *priv = to_aspeed_gfx(drm);
 	u32 reg;
 
 	reg = readl(priv->base + CRT_CTRL1);
@@ -96,15 +96,10 @@ static irqreturn_t aspeed_gfx_irq_handler(int irq, void *data)
 static int aspeed_gfx_load(struct drm_device *drm)
 {
 	struct platform_device *pdev = to_platform_device(drm->dev);
-	struct aspeed_gfx *priv;
+	struct aspeed_gfx *priv = to_aspeed_gfx(drm);
 	struct resource *res;
 	int ret;
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-	drm->dev_private = priv;
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	priv->base = devm_ioremap_resource(drm->dev, res);
 	if (IS_ERR(priv->base))
@@ -187,8 +182,6 @@ static void aspeed_gfx_unload(struct drm_device *drm)
 {
 	drm_kms_helper_poll_fini(drm);
 	drm_mode_config_cleanup(drm);
-
-	drm->dev_private = NULL;
 }
 
 DEFINE_DRM_GEM_CMA_FOPS(fops);
@@ -216,27 +209,26 @@ static const struct of_device_id aspeed_gfx_match[] = {
 
 static int aspeed_gfx_probe(struct platform_device *pdev)
 {
-	struct drm_device *drm;
+	struct aspeed_gfx *priv;
 	int ret;
 
-	drm = drm_dev_alloc(&aspeed_gfx_driver, &pdev->dev);
-	if (IS_ERR(drm))
-		return PTR_ERR(drm);
+	priv = devm_drm_dev_alloc(&pdev->dev, &aspeed_gfx_driver,
+				  struct aspeed_gfx, drm);
+	if (IS_ERR(priv))
+		return PTR_ERR(priv);
 
-	ret = aspeed_gfx_load(drm);
+	ret = aspeed_gfx_load(&priv->drm);
 	if (ret)
-		goto err_free;
+		return ret;
 
-	ret = drm_dev_register(drm, 0);
+	ret = drm_dev_register(&priv->drm, 0);
 	if (ret)
 		goto err_unload;
 
 	return 0;
 
 err_unload:
-	aspeed_gfx_unload(drm);
-err_free:
-	drm_dev_put(drm);
+	aspeed_gfx_unload(&priv->drm);
 
 	return ret;
 }
@@ -247,7 +239,6 @@ static int aspeed_gfx_remove(struct platform_device *pdev)
 
 	drm_dev_unregister(drm);
 	aspeed_gfx_unload(drm);
-	drm_dev_put(drm);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_out.c b/drivers/gpu/drm/aspeed/aspeed_gfx_out.c
index 67ee5fa10055..6759cb88415a 100644
--- a/drivers/gpu/drm/aspeed/aspeed_gfx_out.c
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx_out.c
@@ -28,7 +28,7 @@ static const struct drm_connector_funcs aspeed_gfx_connector_funcs = {
 
 int aspeed_gfx_create_output(struct drm_device *drm)
 {
-	struct aspeed_gfx *priv = drm->dev_private;
+	struct aspeed_gfx *priv = to_aspeed_gfx(drm);
 	int ret;
 
 	priv->connector.dpms = DRM_MODE_DPMS_OFF;
diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index 30aa73a5d9b7..b7ba22dddcad 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -32,6 +32,7 @@
 
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_vram_helper.h>
 #include <drm/drm_probe_helper.h>
 
@@ -111,6 +112,8 @@ static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto err_ast_driver_unload;
 
+	drm_fbdev_generic_setup(dev, 32);
+
 	return 0;
 
 err_ast_driver_unload:
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 18a0a4ce00f6..e5398e3dabe7 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -30,7 +30,6 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_gem_vram_helper.h>
@@ -512,10 +511,6 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags)
 
 	drm_mode_config_reset(dev);
 
-	ret = drm_fbdev_generic_setup(dev, 32);
-	if (ret)
-		goto out_free;
-
 	return 0;
 out_free:
 	kfree(ast);
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index cdd6c46d6557..7d39b858c9f1 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -561,8 +561,9 @@ static int ast_primary_plane_helper_atomic_check(struct drm_plane *plane,
 	return 0;
 }
 
-void ast_primary_plane_helper_atomic_update(struct drm_plane *plane,
-					    struct drm_plane_state *old_state)
+static void
+ast_primary_plane_helper_atomic_update(struct drm_plane *plane,
+				       struct drm_plane_state *old_state)
 {
 	struct ast_private *ast = plane->dev->dev_private;
 	struct drm_plane_state *state = plane->state;
@@ -801,6 +802,9 @@ static int ast_crtc_helper_atomic_check(struct drm_crtc *crtc,
 		return -EINVAL;
 	}
 
+	if (!state->enable)
+		return 0; /* no mode checks if CRTC is being disabled */
+
 	ast_state = to_ast_crtc_state(state);
 
 	format = ast_state->format;
@@ -881,6 +885,17 @@ static const struct drm_crtc_helper_funcs ast_crtc_helper_funcs = {
 	.atomic_disable = ast_crtc_helper_atomic_disable,
 };
 
+static void ast_crtc_reset(struct drm_crtc *crtc)
+{
+	struct ast_crtc_state *ast_state =
+		kzalloc(sizeof(*ast_state), GFP_KERNEL);
+
+	if (crtc->state)
+		crtc->funcs->atomic_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, &ast_state->base);
+}
+
 static void ast_crtc_destroy(struct drm_crtc *crtc)
 {
 	drm_crtc_cleanup(crtc);
@@ -919,8 +934,7 @@ static void ast_crtc_atomic_destroy_state(struct drm_crtc *crtc,
 }
 
 static const struct drm_crtc_funcs ast_crtc_funcs = {
-	.reset = drm_atomic_helper_crtc_reset,
-	.set_config = drm_crtc_helper_set_config,
+	.reset = ast_crtc_reset,
 	.gamma_set = drm_atomic_helper_legacy_gamma_set,
 	.destroy = ast_crtc_destroy,
 	.set_config = drm_atomic_helper_set_config,
@@ -1069,7 +1083,6 @@ static void ast_connector_destroy(struct drm_connector *connector)
 {
 	struct ast_connector *ast_connector = to_ast_connector(connector);
 	ast_i2c_destroy(ast_connector->i2c);
-	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
 }
@@ -1112,8 +1125,6 @@ static int ast_connector_init(struct drm_device *dev)
 	connector->interlace_allowed = 0;
 	connector->doublescan_allowed = 0;
 
-	drm_connector_register(connector);
-
 	connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 
 	encoder = list_first_entry(&dev->mode_config.encoder_list, struct drm_encoder, head);
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index e2019fe97fff..43bc709e3523 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -11,9 +11,10 @@
 #include <linux/media-bus-format.h>
 #include <linux/of_graph.h>
 
+#include <drm/drm_bridge.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_of.h>
-#include <drm/drm_bridge.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "atmel_hlcdc_dc.h"
 
@@ -22,10 +23,6 @@ struct atmel_hlcdc_rgb_output {
 	int bus_fmt;
 };
 
-static const struct drm_encoder_funcs atmel_hlcdc_panel_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static struct atmel_hlcdc_rgb_output *
 atmel_hlcdc_encoder_to_rgb_output(struct drm_encoder *encoder)
 {
@@ -98,9 +95,8 @@ static int atmel_hlcdc_attach_endpoint(struct drm_device *dev, int endpoint)
 		return -EINVAL;
 	}
 
-	ret = drm_encoder_init(dev, &output->encoder,
-			       &atmel_hlcdc_panel_encoder_funcs,
-			       DRM_MODE_ENCODER_NONE, NULL);
+	ret = drm_simple_encoder_init(dev, &output->encoder,
+				      DRM_MODE_ENCODER_NONE);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h
index 917767173ee6..e5bd1d517a18 100644
--- a/drivers/gpu/drm/bochs/bochs.h
+++ b/drivers/gpu/drm/bochs/bochs.h
@@ -92,7 +92,6 @@ void bochs_mm_fini(struct bochs_device *bochs);
 
 /* bochs_kms.c */
 int bochs_kms_init(struct bochs_device *bochs);
-void bochs_kms_fini(struct bochs_device *bochs);
 
 /* bochs_fbdev.c */
 extern const struct drm_mode_config_funcs bochs_mode_funcs;
diff --git a/drivers/gpu/drm/bochs/bochs_drv.c b/drivers/gpu/drm/bochs/bochs_drv.c
index addb0568c1af..e18c51de1196 100644
--- a/drivers/gpu/drm/bochs/bochs_drv.c
+++ b/drivers/gpu/drm/bochs/bochs_drv.c
@@ -7,6 +7,7 @@
 
 #include <drm/drm_drv.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_managed.h>
 
 #include "bochs.h"
 
@@ -21,10 +22,7 @@ static void bochs_unload(struct drm_device *dev)
 {
 	struct bochs_device *bochs = dev->dev_private;
 
-	bochs_kms_fini(bochs);
 	bochs_mm_fini(bochs);
-	kfree(bochs);
-	dev->dev_private = NULL;
 }
 
 static int bochs_load(struct drm_device *dev)
@@ -32,7 +30,7 @@ static int bochs_load(struct drm_device *dev)
 	struct bochs_device *bochs;
 	int ret;
 
-	bochs = kzalloc(sizeof(*bochs), GFP_KERNEL);
+	bochs = drmm_kzalloc(dev, sizeof(*bochs), GFP_KERNEL);
 	if (bochs == NULL)
 		return -ENOMEM;
 	dev->dev_private = bochs;
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c
index 8066d7d370d5..05d8373888e8 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -104,7 +104,6 @@ static void bochs_connector_init(struct drm_device *dev)
 			   DRM_MODE_CONNECTOR_VIRTUAL);
 	drm_connector_helper_add(connector,
 				 &bochs_connector_connector_helper_funcs);
-	drm_connector_register(connector);
 
 	bochs_hw_load_edid(bochs);
 	if (bochs->edid) {
@@ -134,7 +133,11 @@ const struct drm_mode_config_funcs bochs_mode_funcs = {
 
 int bochs_kms_init(struct bochs_device *bochs)
 {
-	drm_mode_config_init(bochs->dev);
+	int ret;
+
+	ret = drmm_mode_config_init(bochs->dev);
+	if (ret)
+		return ret;
 
 	bochs->dev->mode_config.max_width = 8192;
 	bochs->dev->mode_config.max_height = 8192;
@@ -160,12 +163,3 @@ int bochs_kms_init(struct bochs_device *bochs)
 
 	return 0;
 }
-
-void bochs_kms_fini(struct bochs_device *bochs)
-{
-	if (!bochs->dev->mode_config.num_connector)
-		return;
-
-	drm_atomic_helper_shutdown(bochs->dev);
-	drm_mode_config_cleanup(bochs->dev);
-}
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index aaed2347ace9..04f876e985de 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -27,6 +27,16 @@ config DRM_CDNS_DSI
 	  Support Cadence DPI to DSI bridge. This is an internal
 	  bridge and is meant to be directly embedded in a SoC.
 
+config DRM_CHRONTEL_CH7033
+	tristate "Chrontel CH7033 Video Encoder"
+	depends on OF
+	select DRM_KMS_HELPER
+	help
+	  Enable support for the Chrontel CH7033 VGA/DVI/HDMI Encoder, as
+	  found in the Dell Wyse 3020 thin client.
+
+	  If in doubt, say "N".
+
 config DRM_DISPLAY_CONNECTOR
 	tristate "Display connector support"
 	depends on OF
@@ -58,6 +68,22 @@ config DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW
 	  to DP++. This is used with the i.MX6 imx-ldb
 	  driver. You are likely to say N here.
 
+config DRM_NWL_MIPI_DSI
+	tristate "Northwest Logic MIPI DSI Host controller"
+	depends on DRM
+	depends on COMMON_CLK
+	depends on OF && HAS_IOMEM
+	select DRM_KMS_HELPER
+	select DRM_MIPI_DSI
+	select DRM_PANEL_BRIDGE
+	select GENERIC_PHY_MIPI_DPHY
+	select MFD_SYSCON
+	select MULTIPLEXER
+	select REGMAP_MMIO
+	help
+	  This enables the Northwest Logic MIPI DSI Host controller as
+	  for example found on NXP's i.MX8 Processors.
+
 config DRM_NXP_PTN3460
 	tristate "NXP PTN3460 DP/LVDS bridge"
 	depends on OF
diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile
index 6fb062b5b0f0..d63d4b7e4347 100644
--- a/drivers/gpu/drm/bridge/Makefile
+++ b/drivers/gpu/drm/bridge/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DRM_CDNS_DSI) += cdns-dsi.o
+obj-$(CONFIG_DRM_CHRONTEL_CH7033) += chrontel-ch7033.o
 obj-$(CONFIG_DRM_DISPLAY_CONNECTOR) += display-connector.o
 obj-$(CONFIG_DRM_LVDS_CODEC) += lvds-codec.o
 obj-$(CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW) += megachips-stdpxxxx-ge-b850v3-fw.o
@@ -18,6 +19,7 @@ obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511/
 obj-$(CONFIG_DRM_TI_SN65DSI86) += ti-sn65dsi86.o
 obj-$(CONFIG_DRM_TI_TFP410) += ti-tfp410.o
 obj-$(CONFIG_DRM_TI_TPD12S015) += ti-tpd12s015.o
+obj-$(CONFIG_DRM_NWL_MIPI_DSI) += nwl-dsi.o
 
 obj-y += analogix/
 obj-y += synopsys/
diff --git a/drivers/gpu/drm/bridge/adv7511/Kconfig b/drivers/gpu/drm/bridge/adv7511/Kconfig
index 47d4eb9e845d..f46a5e26b5dd 100644
--- a/drivers/gpu/drm/bridge/adv7511/Kconfig
+++ b/drivers/gpu/drm/bridge/adv7511/Kconfig
@@ -6,7 +6,7 @@ config DRM_I2C_ADV7511
 	select REGMAP_I2C
 	select DRM_MIPI_DSI
 	help
-	  Support for the Analog Device ADV7511(W)/13/33/35 HDMI encoders.
+	  Support for the Analog Devices ADV7511(W)/13/33/35 HDMI encoders.
 
 config DRM_I2C_ADV7511_AUDIO
 	bool "ADV7511 HDMI Audio driver"
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c
index a428185be2c1..f101dd2819b5 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c
@@ -19,13 +19,15 @@ static void adv7511_calc_cts_n(unsigned int f_tmds, unsigned int fs,
 {
 	switch (fs) {
 	case 32000:
-		*n = 4096;
+	case 48000:
+	case 96000:
+	case 192000:
+		*n = fs * 128 / 1000;
 		break;
 	case 44100:
-		*n = 6272;
-		break;
-	case 48000:
-		*n = 6144;
+	case 88200:
+	case 176400:
+		*n = fs * 128 / 900;
 		break;
 	}
 
@@ -119,6 +121,9 @@ int adv7511_hdmi_hw_params(struct device *dev, void *data,
 		audio_source = ADV7511_AUDIO_SOURCE_I2S;
 		i2s_format = ADV7511_I2S_FORMAT_LEFT_J;
 		break;
+	case HDMI_SPDIF:
+		audio_source = ADV7511_AUDIO_SOURCE_SPDIF;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -175,11 +180,21 @@ static int audio_startup(struct device *dev, void *data)
 	/* use Audio infoframe updated info */
 	regmap_update_bits(adv7511->regmap, ADV7511_REG_GC(1),
 				BIT(5), 0);
+	/* enable SPDIF receiver */
+	if (adv7511->audio_source == ADV7511_AUDIO_SOURCE_SPDIF)
+		regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CONFIG,
+				   BIT(7), BIT(7));
+
 	return 0;
 }
 
 static void audio_shutdown(struct device *dev, void *data)
 {
+	struct adv7511 *adv7511 = dev_get_drvdata(dev);
+
+	if (adv7511->audio_source == ADV7511_AUDIO_SOURCE_SPDIF)
+		regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CONFIG,
+				   BIT(7), 0);
 }
 
 static int adv7511_hdmi_i2s_get_dai_id(struct snd_soc_component *component,
@@ -213,6 +228,7 @@ static const struct hdmi_codec_pdata codec_data = {
 	.ops = &adv7511_codec_ops,
 	.max_i2s_channels = 2,
 	.i2s = 1,
+	.spdif = 1,
 };
 
 int adv7511_audio_init(struct device *dev, struct adv7511 *adv7511)
diff --git a/drivers/gpu/drm/bridge/chrontel-ch7033.c b/drivers/gpu/drm/bridge/chrontel-ch7033.c
new file mode 100644
index 000000000000..f8675d82974b
--- /dev/null
+++ b/drivers/gpu/drm/bridge/chrontel-ch7033.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Chrontel CH7033 Video Encoder Driver
+ *
+ * Copyright (C) 2019,2020 Lubomir Rintel
+ */
+
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_of.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
+
+/* Page 0, Register 0x07 */
+enum {
+	DRI_PD		= BIT(3),
+	IO_PD		= BIT(5),
+};
+
+/* Page 0, Register 0x08 */
+enum {
+	DRI_PDDRI	= GENMASK(7, 4),
+	PDDAC		= GENMASK(3, 1),
+	PANEN		= BIT(0),
+};
+
+/* Page 0, Register 0x09 */
+enum {
+	DPD		= BIT(7),
+	GCKOFF		= BIT(6),
+	TV_BP		= BIT(5),
+	SCLPD		= BIT(4),
+	SDPD		= BIT(3),
+	VGA_PD		= BIT(2),
+	HDBKPD		= BIT(1),
+	HDMI_PD		= BIT(0),
+};
+
+/* Page 0, Register 0x0a */
+enum {
+	MEMINIT		= BIT(7),
+	MEMIDLE		= BIT(6),
+	MEMPD		= BIT(5),
+	STOP		= BIT(4),
+	LVDS_PD		= BIT(3),
+	HD_DVIB		= BIT(2),
+	HDCP_PD		= BIT(1),
+	MCU_PD		= BIT(0),
+};
+
+/* Page 0, Register 0x18 */
+enum {
+	IDF		= GENMASK(7, 4),
+	INTEN		= BIT(3),
+	SWAP		= GENMASK(2, 0),
+};
+
+enum {
+	BYTE_SWAP_RGB	= 0,
+	BYTE_SWAP_RBG	= 1,
+	BYTE_SWAP_GRB	= 2,
+	BYTE_SWAP_GBR	= 3,
+	BYTE_SWAP_BRG	= 4,
+	BYTE_SWAP_BGR	= 5,
+};
+
+/* Page 0, Register 0x19 */
+enum {
+	HPO_I		= BIT(5),
+	VPO_I		= BIT(4),
+	DEPO_I		= BIT(3),
+	CRYS_EN		= BIT(2),
+	GCLKFREQ	= GENMASK(2, 0),
+};
+
+/* Page 0, Register 0x2e */
+enum {
+	HFLIP		= BIT(7),
+	VFLIP		= BIT(6),
+	DEPO_O		= BIT(5),
+	HPO_O		= BIT(4),
+	VPO_O		= BIT(3),
+	TE		= GENMASK(2, 0),
+};
+
+/* Page 0, Register 0x2b */
+enum {
+	SWAPS		= GENMASK(7, 4),
+	VFMT		= GENMASK(3, 0),
+};
+
+/* Page 0, Register 0x54 */
+enum {
+	COMP_BP		= BIT(7),
+	DAC_EN_T	= BIT(6),
+	HWO_HDMI_HI	= GENMASK(5, 3),
+	HOO_HDMI_HI	= GENMASK(2, 0),
+};
+
+/* Page 0, Register 0x57 */
+enum {
+	FLDSEN		= BIT(7),
+	VWO_HDMI_HI	= GENMASK(5, 3),
+	VOO_HDMI_HI	= GENMASK(2, 0),
+};
+
+/* Page 0, Register 0x7e */
+enum {
+	HDMI_LVDS_SEL	= BIT(7),
+	DE_GEN		= BIT(6),
+	PWM_INDEX_HI	= BIT(5),
+	USE_DE		= BIT(4),
+	R_INT		= GENMASK(3, 0),
+};
+
+/* Page 1, Register 0x07 */
+enum {
+	BPCKSEL		= BIT(7),
+	DRI_CMFB_EN	= BIT(6),
+	CEC_PUEN	= BIT(5),
+	CEC_T		= BIT(3),
+	CKINV		= BIT(2),
+	CK_TVINV	= BIT(1),
+	DRI_CKS2	= BIT(0),
+};
+
+/* Page 1, Register 0x08 */
+enum {
+	DACG		= BIT(6),
+	DACKTST		= BIT(5),
+	DEDGEB		= BIT(4),
+	SYO		= BIT(3),
+	DRI_IT_LVDS	= GENMASK(2, 1),
+	DISPON		= BIT(0),
+};
+
+/* Page 1, Register 0x0c */
+enum {
+	DRI_PLL_CP	= GENMASK(7, 6),
+	DRI_PLL_DIVSEL	= BIT(5),
+	DRI_PLL_N1_1	= BIT(4),
+	DRI_PLL_N1_0	= BIT(3),
+	DRI_PLL_N3_1	= BIT(2),
+	DRI_PLL_N3_0	= BIT(1),
+	DRI_PLL_CKTSTEN = BIT(0),
+};
+
+/* Page 1, Register 0x6b */
+enum {
+	VCO3CS		= GENMASK(7, 6),
+	ICPGBK2_0	= GENMASK(5, 3),
+	DRI_VCO357SC	= BIT(2),
+	PDPLL2		= BIT(1),
+	DRI_PD_SER	= BIT(0),
+};
+
+/* Page 1, Register 0x6c */
+enum {
+	PLL2N11		= GENMASK(7, 4),
+	PLL2N5_4	= BIT(3),
+	PLL2N5_TOP	= BIT(2),
+	DRI_PLL_PD	= BIT(1),
+	PD_I2CM		= BIT(0),
+};
+
+/* Page 3, Register 0x28 */
+enum {
+	DIFF_EN		= GENMASK(7, 6),
+	CORREC_EN	= GENMASK(5, 4),
+	VGACLK_BP	= BIT(3),
+	HM_LV_SEL	= BIT(2),
+	HD_VGA_SEL	= BIT(1),
+};
+
+/* Page 3, Register 0x2a */
+enum {
+	LVDSCLK_BP	= BIT(7),
+	HDTVCLK_BP	= BIT(6),
+	HDMICLK_BP	= BIT(5),
+	HDTV_BP		= BIT(4),
+	HDMI_BP		= BIT(3),
+	THRWL		= GENMASK(2, 0),
+};
+
+/* Page 4, Register 0x52 */
+enum {
+	PGM_ARSTB	= BIT(7),
+	MCU_ARSTB	= BIT(6),
+	MCU_RETB	= BIT(2),
+	RESETIB		= BIT(1),
+	RESETDB		= BIT(0),
+};
+
+struct ch7033_priv {
+	struct regmap *regmap;
+	struct drm_bridge *next_bridge;
+	struct drm_bridge bridge;
+	struct drm_connector connector;
+};
+
+#define conn_to_ch7033_priv(x) \
+	container_of(x, struct ch7033_priv, connector)
+#define bridge_to_ch7033_priv(x) \
+	container_of(x, struct ch7033_priv, bridge)
+
+
+static enum drm_connector_status ch7033_connector_detect(
+	struct drm_connector *connector, bool force)
+{
+	struct ch7033_priv *priv = conn_to_ch7033_priv(connector);
+
+	return drm_bridge_detect(priv->next_bridge);
+}
+
+static const struct drm_connector_funcs ch7033_connector_funcs = {
+	.reset = drm_atomic_helper_connector_reset,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.detect = ch7033_connector_detect,
+	.destroy = drm_connector_cleanup,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int ch7033_connector_get_modes(struct drm_connector *connector)
+{
+	struct ch7033_priv *priv = conn_to_ch7033_priv(connector);
+	struct edid *edid;
+	int ret;
+
+	edid = drm_bridge_get_edid(priv->next_bridge, connector);
+	drm_connector_update_edid_property(connector, edid);
+	if (edid) {
+		ret = drm_add_edid_modes(connector, edid);
+		kfree(edid);
+	} else {
+		ret = drm_add_modes_noedid(connector, 1920, 1080);
+		drm_set_preferred_mode(connector, 1024, 768);
+	}
+
+	return ret;
+}
+
+static struct drm_encoder *ch7033_connector_best_encoder(
+			struct drm_connector *connector)
+{
+	struct ch7033_priv *priv = conn_to_ch7033_priv(connector);
+
+	return priv->bridge.encoder;
+}
+
+static const struct drm_connector_helper_funcs ch7033_connector_helper_funcs = {
+	.get_modes = ch7033_connector_get_modes,
+	.best_encoder = ch7033_connector_best_encoder,
+};
+
+static void ch7033_hpd_event(void *arg, enum drm_connector_status status)
+{
+	struct ch7033_priv *priv = arg;
+
+	if (priv->bridge.dev)
+		drm_helper_hpd_irq_event(priv->connector.dev);
+}
+
+static int ch7033_bridge_attach(struct drm_bridge *bridge,
+				enum drm_bridge_attach_flags flags)
+{
+	struct ch7033_priv *priv = bridge_to_ch7033_priv(bridge);
+	struct drm_connector *connector = &priv->connector;
+	int ret;
+
+	ret = drm_bridge_attach(bridge->encoder, priv->next_bridge, bridge,
+				DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+	if (ret)
+		return ret;
+
+	if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)
+		return 0;
+
+	if (priv->next_bridge->ops & DRM_BRIDGE_OP_DETECT) {
+		connector->polled = DRM_CONNECTOR_POLL_HPD;
+	} else {
+		connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+				    DRM_CONNECTOR_POLL_DISCONNECT;
+	}
+
+	if (priv->next_bridge->ops & DRM_BRIDGE_OP_HPD) {
+		drm_bridge_hpd_enable(priv->next_bridge, ch7033_hpd_event,
+				      priv);
+	}
+
+	drm_connector_helper_add(connector,
+				 &ch7033_connector_helper_funcs);
+	ret = drm_connector_init_with_ddc(bridge->dev, &priv->connector,
+					  &ch7033_connector_funcs,
+					  priv->next_bridge->type,
+					  priv->next_bridge->ddc);
+	if (ret) {
+		DRM_ERROR("Failed to initialize connector\n");
+		return ret;
+	}
+
+	return drm_connector_attach_encoder(&priv->connector, bridge->encoder);
+}
+
+static void ch7033_bridge_detach(struct drm_bridge *bridge)
+{
+	struct ch7033_priv *priv = bridge_to_ch7033_priv(bridge);
+
+	if (priv->next_bridge->ops & DRM_BRIDGE_OP_HPD)
+		drm_bridge_hpd_disable(priv->next_bridge);
+	drm_connector_cleanup(&priv->connector);
+}
+
+static enum drm_mode_status ch7033_bridge_mode_valid(struct drm_bridge *bridge,
+				     const struct drm_display_mode *mode)
+{
+	if (mode->clock > 165000)
+		return MODE_CLOCK_HIGH;
+	if (mode->hdisplay >= 1920)
+		return MODE_BAD_HVALUE;
+	if (mode->vdisplay >= 1080)
+		return MODE_BAD_VVALUE;
+	return MODE_OK;
+}
+
+static void ch7033_bridge_disable(struct drm_bridge *bridge)
+{
+	struct ch7033_priv *priv = bridge_to_ch7033_priv(bridge);
+
+	regmap_write(priv->regmap, 0x03, 0x04);
+	regmap_update_bits(priv->regmap, 0x52, RESETDB, 0x00);
+}
+
+static void ch7033_bridge_enable(struct drm_bridge *bridge)
+{
+	struct ch7033_priv *priv = bridge_to_ch7033_priv(bridge);
+
+	regmap_write(priv->regmap, 0x03, 0x04);
+	regmap_update_bits(priv->regmap, 0x52, RESETDB, RESETDB);
+}
+
+static void ch7033_bridge_mode_set(struct drm_bridge *bridge,
+				   const struct drm_display_mode *mode,
+				   const struct drm_display_mode *adjusted_mode)
+{
+	struct ch7033_priv *priv = bridge_to_ch7033_priv(bridge);
+	int hbporch = mode->hsync_start - mode->hdisplay;
+	int hsynclen = mode->hsync_end - mode->hsync_start;
+	int vbporch = mode->vsync_start - mode->vdisplay;
+	int vsynclen = mode->vsync_end - mode->vsync_start;
+
+	/*
+	 * Page 4
+	 */
+	regmap_write(priv->regmap, 0x03, 0x04);
+
+	/* Turn everything off to set all the registers to their defaults. */
+	regmap_write(priv->regmap, 0x52, 0x00);
+	/* Bring I/O block up. */
+	regmap_write(priv->regmap, 0x52, RESETIB);
+
+	/*
+	 * Page 0
+	 */
+	regmap_write(priv->regmap, 0x03, 0x00);
+
+	/* Bring up parts we need from the power down. */
+	regmap_update_bits(priv->regmap, 0x07, DRI_PD | IO_PD, 0);
+	regmap_update_bits(priv->regmap, 0x08, DRI_PDDRI | PDDAC | PANEN, 0);
+	regmap_update_bits(priv->regmap, 0x09, DPD | GCKOFF |
+					       HDMI_PD | VGA_PD, 0);
+	regmap_update_bits(priv->regmap, 0x0a, HD_DVIB, 0);
+
+	/* Horizontal input timing. */
+	regmap_write(priv->regmap, 0x0b, (mode->htotal >> 8) << 3 |
+					 (mode->hdisplay >> 8));
+	regmap_write(priv->regmap, 0x0c, mode->hdisplay);
+	regmap_write(priv->regmap, 0x0d, mode->htotal);
+	regmap_write(priv->regmap, 0x0e, (hsynclen >> 8) << 3 |
+					 (hbporch >> 8));
+	regmap_write(priv->regmap, 0x0f, hbporch);
+	regmap_write(priv->regmap, 0x10, hsynclen);
+
+	/* Vertical input timing. */
+	regmap_write(priv->regmap, 0x11, (mode->vtotal >> 8) << 3 |
+					 (mode->vdisplay >> 8));
+	regmap_write(priv->regmap, 0x12, mode->vdisplay);
+	regmap_write(priv->regmap, 0x13, mode->vtotal);
+	regmap_write(priv->regmap, 0x14, ((vsynclen >> 8) << 3) |
+					 (vbporch >> 8));
+	regmap_write(priv->regmap, 0x15, vbporch);
+	regmap_write(priv->regmap, 0x16, vsynclen);
+
+	/* Input color swap. */
+	regmap_update_bits(priv->regmap, 0x18, SWAP, BYTE_SWAP_BGR);
+
+	/* Input clock and sync polarity. */
+	regmap_update_bits(priv->regmap, 0x19, 0x1, mode->clock >> 16);
+	regmap_update_bits(priv->regmap, 0x19, HPO_I | VPO_I | GCLKFREQ,
+			   (mode->flags & DRM_MODE_FLAG_PHSYNC) ? HPO_I : 0 |
+			   (mode->flags & DRM_MODE_FLAG_PVSYNC) ? VPO_I : 0 |
+			   mode->clock >> 16);
+	regmap_write(priv->regmap, 0x1a, mode->clock >> 8);
+	regmap_write(priv->regmap, 0x1b, mode->clock);
+
+	/* Horizontal output timing. */
+	regmap_write(priv->regmap, 0x1f, (mode->htotal >> 8) << 3 |
+					 (mode->hdisplay >> 8));
+	regmap_write(priv->regmap, 0x20, mode->hdisplay);
+	regmap_write(priv->regmap, 0x21, mode->htotal);
+
+	/* Vertical output timing. */
+	regmap_write(priv->regmap, 0x25, (mode->vtotal >> 8) << 3 |
+					 (mode->vdisplay >> 8));
+	regmap_write(priv->regmap, 0x26, mode->vdisplay);
+	regmap_write(priv->regmap, 0x27, mode->vtotal);
+
+	/* VGA channel bypass */
+	regmap_update_bits(priv->regmap, 0x2b, VFMT, 9);
+
+	/* Output sync polarity. */
+	regmap_update_bits(priv->regmap, 0x2e, HPO_O | VPO_O,
+			   (mode->flags & DRM_MODE_FLAG_PHSYNC) ? HPO_O : 0 |
+			   (mode->flags & DRM_MODE_FLAG_PVSYNC) ? VPO_O : 0);
+
+	/* HDMI horizontal output timing. */
+	regmap_update_bits(priv->regmap, 0x54, HWO_HDMI_HI | HOO_HDMI_HI,
+					       (hsynclen >> 8) << 3 |
+					       (hbporch >> 8));
+	regmap_write(priv->regmap, 0x55, hbporch);
+	regmap_write(priv->regmap, 0x56, hsynclen);
+
+	/* HDMI vertical output timing. */
+	regmap_update_bits(priv->regmap, 0x57, VWO_HDMI_HI | VOO_HDMI_HI,
+					       (vsynclen >> 8) << 3 |
+					       (vbporch >> 8));
+	regmap_write(priv->regmap, 0x58, vbporch);
+	regmap_write(priv->regmap, 0x59, vsynclen);
+
+	/* Pick HDMI, not LVDS. */
+	regmap_update_bits(priv->regmap, 0x7e, HDMI_LVDS_SEL, HDMI_LVDS_SEL);
+
+	/*
+	 * Page 1
+	 */
+	regmap_write(priv->regmap, 0x03, 0x01);
+
+	/* No idea what these do, but VGA is wobbly and blinky without them. */
+	regmap_update_bits(priv->regmap, 0x07, CKINV, CKINV);
+	regmap_update_bits(priv->regmap, 0x08, DISPON, DISPON);
+
+	/* DRI PLL */
+	regmap_update_bits(priv->regmap, 0x0c, DRI_PLL_DIVSEL, DRI_PLL_DIVSEL);
+	if (mode->clock <= 40000) {
+		regmap_update_bits(priv->regmap, 0x0c, DRI_PLL_N1_1 |
+						       DRI_PLL_N1_0 |
+						       DRI_PLL_N3_1 |
+						       DRI_PLL_N3_0,
+						       0);
+	} else if (mode->clock < 80000) {
+		regmap_update_bits(priv->regmap, 0x0c, DRI_PLL_N1_1 |
+						       DRI_PLL_N1_0 |
+						       DRI_PLL_N3_1 |
+						       DRI_PLL_N3_0,
+						       DRI_PLL_N3_0 |
+						       DRI_PLL_N1_0);
+	} else {
+		regmap_update_bits(priv->regmap, 0x0c, DRI_PLL_N1_1 |
+						       DRI_PLL_N1_0 |
+						       DRI_PLL_N3_1 |
+						       DRI_PLL_N3_0,
+						       DRI_PLL_N3_1 |
+						       DRI_PLL_N1_1);
+	}
+
+	/* This seems to be color calibration for VGA. */
+	regmap_write(priv->regmap, 0x64, 0x29); /* LSB Blue */
+	regmap_write(priv->regmap, 0x65, 0x29); /* LSB Green */
+	regmap_write(priv->regmap, 0x66, 0x29); /* LSB Red */
+	regmap_write(priv->regmap, 0x67, 0x00); /* MSB Blue */
+	regmap_write(priv->regmap, 0x68, 0x00); /* MSB Green */
+	regmap_write(priv->regmap, 0x69, 0x00); /* MSB Red */
+
+	regmap_update_bits(priv->regmap, 0x6b, DRI_PD_SER, 0x00);
+	regmap_update_bits(priv->regmap, 0x6c, DRI_PLL_PD, 0x00);
+
+	/*
+	 * Page 3
+	 */
+	regmap_write(priv->regmap, 0x03, 0x03);
+
+	/* More bypasses and apparently another HDMI/LVDS selector. */
+	regmap_update_bits(priv->regmap, 0x28, VGACLK_BP | HM_LV_SEL,
+					       VGACLK_BP | HM_LV_SEL);
+	regmap_update_bits(priv->regmap, 0x2a, HDMICLK_BP | HDMI_BP,
+					       HDMICLK_BP | HDMI_BP);
+
+	/*
+	 * Page 4
+	 */
+	regmap_write(priv->regmap, 0x03, 0x04);
+
+	/* Output clock. */
+	regmap_write(priv->regmap, 0x10, mode->clock >> 16);
+	regmap_write(priv->regmap, 0x11, mode->clock >> 8);
+	regmap_write(priv->regmap, 0x12, mode->clock);
+}
+
+static const struct drm_bridge_funcs ch7033_bridge_funcs = {
+	.attach = ch7033_bridge_attach,
+	.detach = ch7033_bridge_detach,
+	.mode_valid = ch7033_bridge_mode_valid,
+	.disable = ch7033_bridge_disable,
+	.enable = ch7033_bridge_enable,
+	.mode_set = ch7033_bridge_mode_set,
+};
+
+static const struct regmap_config ch7033_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 0x7f,
+};
+
+static int ch7033_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct device *dev = &client->dev;
+	struct ch7033_priv *priv;
+	unsigned int val;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, priv);
+
+	ret = drm_of_find_panel_or_bridge(dev->of_node, 1, -1, NULL,
+					  &priv->next_bridge);
+	if (ret)
+		return ret;
+
+	priv->regmap = devm_regmap_init_i2c(client, &ch7033_regmap_config);
+	if (IS_ERR(priv->regmap)) {
+		dev_err(&client->dev, "regmap init failed\n");
+		return PTR_ERR(priv->regmap);
+	}
+
+	ret = regmap_read(priv->regmap, 0x00, &val);
+	if (ret < 0) {
+		dev_err(&client->dev, "error reading the model id: %d\n", ret);
+		return ret;
+	}
+	if ((val & 0xf7) != 0x56) {
+		dev_err(&client->dev, "the device is not a ch7033\n");
+		return -ENODEV;
+	}
+
+	regmap_write(priv->regmap, 0x03, 0x04);
+	ret = regmap_read(priv->regmap, 0x51, &val);
+	if (ret < 0) {
+		dev_err(&client->dev, "error reading the model id: %d\n", ret);
+		return ret;
+	}
+	if ((val & 0x0f) != 3) {
+		dev_err(&client->dev, "unknown revision %u\n", val);
+		return -ENODEV;
+	}
+
+	INIT_LIST_HEAD(&priv->bridge.list);
+	priv->bridge.funcs = &ch7033_bridge_funcs;
+	priv->bridge.of_node = dev->of_node;
+	drm_bridge_add(&priv->bridge);
+
+	dev_info(dev, "Chrontel CH7033 Video Encoder\n");
+	return 0;
+}
+
+static int ch7033_remove(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	struct ch7033_priv *priv = dev_get_drvdata(dev);
+
+	drm_bridge_remove(&priv->bridge);
+
+	return 0;
+}
+
+static const struct of_device_id ch7033_dt_ids[] = {
+	{ .compatible = "chrontel,ch7033", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ch7033_dt_ids);
+
+static const struct i2c_device_id ch7033_ids[] = {
+	{ "ch7033", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ch7033_ids);
+
+static struct i2c_driver ch7033_driver = {
+	.probe = ch7033_probe,
+	.remove = ch7033_remove,
+	.driver = {
+		.name = "ch7033",
+		.of_match_table = of_match_ptr(ch7033_dt_ids),
+	},
+	.id_table = ch7033_ids,
+};
+
+module_i2c_driver(ch7033_driver);
+
+MODULE_AUTHOR("Lubomir Rintel <lkundrak@v3.sk>");
+MODULE_DESCRIPTION("Chrontel CH7033 Video Encoder Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c
new file mode 100644
index 000000000000..b14d725bf609
--- /dev/null
+++ b/drivers/gpu/drm/bridge/nwl-dsi.c
@@ -0,0 +1,1213 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * i.MX8 NWL MIPI DSI host driver
+ *
+ * Copyright (C) 2017 NXP
+ * Copyright (C) 2020 Purism SPC
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/math64.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/mux/consumer.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/sys_soc.h>
+#include <linux/time64.h>
+
+#include <drm/drm_bridge.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
+#include <video/mipi_display.h>
+
+#include "nwl-dsi.h"
+
+#define DRV_NAME "nwl-dsi"
+
+/* i.MX8 NWL quirks */
+/* i.MX8MQ errata E11418 */
+#define E11418_HS_MODE_QUIRK	BIT(0)
+
+#define NWL_DSI_MIPI_FIFO_TIMEOUT msecs_to_jiffies(500)
+
+enum transfer_direction {
+	DSI_PACKET_SEND,
+	DSI_PACKET_RECEIVE,
+};
+
+#define NWL_DSI_ENDPOINT_LCDIF 0
+#define NWL_DSI_ENDPOINT_DCSS 1
+
+struct nwl_dsi_plat_clk_config {
+	const char *id;
+	struct clk *clk;
+	bool present;
+};
+
+struct nwl_dsi_transfer {
+	const struct mipi_dsi_msg *msg;
+	struct mipi_dsi_packet packet;
+	struct completion completed;
+
+	int status; /* status of transmission */
+	enum transfer_direction direction;
+	bool need_bta;
+	u8 cmd;
+	u16 rx_word_count;
+	size_t tx_len; /* in bytes */
+	size_t rx_len; /* in bytes */
+};
+
+struct nwl_dsi {
+	struct drm_bridge bridge;
+	struct mipi_dsi_host dsi_host;
+	struct drm_bridge *panel_bridge;
+	struct device *dev;
+	struct phy *phy;
+	union phy_configure_opts phy_cfg;
+	unsigned int quirks;
+
+	struct regmap *regmap;
+	int irq;
+	/*
+	 * The DSI host controller needs this reset sequence according to NWL:
+	 * 1. Deassert pclk reset to get access to DSI regs
+	 * 2. Configure DSI Host and DPHY and enable DPHY
+	 * 3. Deassert ESC and BYTE resets to allow host TX operations)
+	 * 4. Send DSI cmds to configure peripheral (handled by panel drv)
+	 * 5. Deassert DPI reset so DPI receives pixels and starts sending
+	 *    DSI data
+	 *
+	 * TODO: Since panel_bridges do their DSI setup in enable we
+	 * currently have 4. and 5. swapped.
+	 */
+	struct reset_control *rst_byte;
+	struct reset_control *rst_esc;
+	struct reset_control *rst_dpi;
+	struct reset_control *rst_pclk;
+	struct mux_control *mux;
+
+	/* DSI clocks */
+	struct clk *phy_ref_clk;
+	struct clk *rx_esc_clk;
+	struct clk *tx_esc_clk;
+	struct clk *core_clk;
+	/*
+	 * hardware bug: the i.MX8MQ needs this clock on during reset
+	 * even when not using LCDIF.
+	 */
+	struct clk *lcdif_clk;
+
+	/* dsi lanes */
+	u32 lanes;
+	enum mipi_dsi_pixel_format format;
+	struct drm_display_mode mode;
+	unsigned long dsi_mode_flags;
+	int error;
+
+	struct nwl_dsi_transfer *xfer;
+};
+
+static const struct regmap_config nwl_dsi_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = NWL_DSI_IRQ_MASK2,
+	.name = DRV_NAME,
+};
+
+static inline struct nwl_dsi *bridge_to_dsi(struct drm_bridge *bridge)
+{
+	return container_of(bridge, struct nwl_dsi, bridge);
+}
+
+static int nwl_dsi_clear_error(struct nwl_dsi *dsi)
+{
+	int ret = dsi->error;
+
+	dsi->error = 0;
+	return ret;
+}
+
+static void nwl_dsi_write(struct nwl_dsi *dsi, unsigned int reg, u32 val)
+{
+	int ret;
+
+	if (dsi->error)
+		return;
+
+	ret = regmap_write(dsi->regmap, reg, val);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev,
+			      "Failed to write NWL DSI reg 0x%x: %d\n", reg,
+			      ret);
+		dsi->error = ret;
+	}
+}
+
+static u32 nwl_dsi_read(struct nwl_dsi *dsi, u32 reg)
+{
+	unsigned int val;
+	int ret;
+
+	if (dsi->error)
+		return 0;
+
+	ret = regmap_read(dsi->regmap, reg, &val);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to read NWL DSI reg 0x%x: %d\n",
+			      reg, ret);
+		dsi->error = ret;
+	}
+	return val;
+}
+
+static int nwl_dsi_get_dpi_pixel_format(enum mipi_dsi_pixel_format format)
+{
+	switch (format) {
+	case MIPI_DSI_FMT_RGB565:
+		return NWL_DSI_PIXEL_FORMAT_16;
+	case MIPI_DSI_FMT_RGB666:
+		return NWL_DSI_PIXEL_FORMAT_18L;
+	case MIPI_DSI_FMT_RGB666_PACKED:
+		return NWL_DSI_PIXEL_FORMAT_18;
+	case MIPI_DSI_FMT_RGB888:
+		return NWL_DSI_PIXEL_FORMAT_24;
+	default:
+		return -EINVAL;
+	}
+}
+
+/*
+ * ps2bc - Picoseconds to byte clock cycles
+ */
+static u32 ps2bc(struct nwl_dsi *dsi, unsigned long long ps)
+{
+	u32 bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
+
+	return DIV64_U64_ROUND_UP(ps * dsi->mode.clock * bpp,
+				  dsi->lanes * 8 * NSEC_PER_SEC);
+}
+
+/*
+ * ui2bc - UI time periods to byte clock cycles
+ */
+static u32 ui2bc(struct nwl_dsi *dsi, unsigned long long ui)
+{
+	u32 bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
+
+	return DIV64_U64_ROUND_UP(ui * dsi->lanes,
+				  dsi->mode.clock * 1000 * bpp);
+}
+
+/*
+ * us2bc - micro seconds to lp clock cycles
+ */
+static u32 us2lp(u32 lp_clk_rate, unsigned long us)
+{
+	return DIV_ROUND_UP(us * lp_clk_rate, USEC_PER_SEC);
+}
+
+static int nwl_dsi_config_host(struct nwl_dsi *dsi)
+{
+	u32 cycles;
+	struct phy_configure_opts_mipi_dphy *cfg = &dsi->phy_cfg.mipi_dphy;
+
+	if (dsi->lanes < 1 || dsi->lanes > 4)
+		return -EINVAL;
+
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "DSI Lanes %d\n", dsi->lanes);
+	nwl_dsi_write(dsi, NWL_DSI_CFG_NUM_LANES, dsi->lanes - 1);
+
+	if (dsi->dsi_mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) {
+		nwl_dsi_write(dsi, NWL_DSI_CFG_NONCONTINUOUS_CLK, 0x01);
+		nwl_dsi_write(dsi, NWL_DSI_CFG_AUTOINSERT_EOTP, 0x01);
+	} else {
+		nwl_dsi_write(dsi, NWL_DSI_CFG_NONCONTINUOUS_CLK, 0x00);
+		nwl_dsi_write(dsi, NWL_DSI_CFG_AUTOINSERT_EOTP, 0x00);
+	}
+
+	/* values in byte clock cycles */
+	cycles = ui2bc(dsi, cfg->clk_pre);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_t_pre: 0x%x\n", cycles);
+	nwl_dsi_write(dsi, NWL_DSI_CFG_T_PRE, cycles);
+	cycles = ps2bc(dsi, cfg->lpx + cfg->clk_prepare + cfg->clk_zero);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_tx_gap (pre): 0x%x\n", cycles);
+	cycles += ui2bc(dsi, cfg->clk_pre);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_t_post: 0x%x\n", cycles);
+	nwl_dsi_write(dsi, NWL_DSI_CFG_T_POST, cycles);
+	cycles = ps2bc(dsi, cfg->hs_exit);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_tx_gap: 0x%x\n", cycles);
+	nwl_dsi_write(dsi, NWL_DSI_CFG_TX_GAP, cycles);
+
+	nwl_dsi_write(dsi, NWL_DSI_CFG_EXTRA_CMDS_AFTER_EOTP, 0x01);
+	nwl_dsi_write(dsi, NWL_DSI_CFG_HTX_TO_COUNT, 0x00);
+	nwl_dsi_write(dsi, NWL_DSI_CFG_LRX_H_TO_COUNT, 0x00);
+	nwl_dsi_write(dsi, NWL_DSI_CFG_BTA_H_TO_COUNT, 0x00);
+	/* In LP clock cycles */
+	cycles = us2lp(cfg->lp_clk_rate, cfg->wakeup);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_twakeup: 0x%x\n", cycles);
+	nwl_dsi_write(dsi, NWL_DSI_CFG_TWAKEUP, cycles);
+
+	return nwl_dsi_clear_error(dsi);
+}
+
+static int nwl_dsi_config_dpi(struct nwl_dsi *dsi)
+{
+	u32 mode;
+	int color_format;
+	bool burst_mode;
+	int hfront_porch, hback_porch, vfront_porch, vback_porch;
+	int hsync_len, vsync_len;
+
+	hfront_porch = dsi->mode.hsync_start - dsi->mode.hdisplay;
+	hsync_len = dsi->mode.hsync_end - dsi->mode.hsync_start;
+	hback_porch = dsi->mode.htotal - dsi->mode.hsync_end;
+
+	vfront_porch = dsi->mode.vsync_start - dsi->mode.vdisplay;
+	vsync_len = dsi->mode.vsync_end - dsi->mode.vsync_start;
+	vback_porch = dsi->mode.vtotal - dsi->mode.vsync_end;
+
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "hfront_porch = %d\n", hfront_porch);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "hback_porch = %d\n", hback_porch);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "hsync_len = %d\n", hsync_len);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "hdisplay = %d\n", dsi->mode.hdisplay);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "vfront_porch = %d\n", vfront_porch);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "vback_porch = %d\n", vback_porch);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "vsync_len = %d\n", vsync_len);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "vactive = %d\n", dsi->mode.vdisplay);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "clock = %d kHz\n", dsi->mode.clock);
+
+	color_format = nwl_dsi_get_dpi_pixel_format(dsi->format);
+	if (color_format < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Invalid color format 0x%x\n",
+			      dsi->format);
+		return color_format;
+	}
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "pixel fmt = %d\n", dsi->format);
+
+	nwl_dsi_write(dsi, NWL_DSI_INTERFACE_COLOR_CODING, NWL_DSI_DPI_24_BIT);
+	nwl_dsi_write(dsi, NWL_DSI_PIXEL_FORMAT, color_format);
+	/*
+	 * Adjusting input polarity based on the video mode results in
+	 * a black screen so always pick active low:
+	 */
+	nwl_dsi_write(dsi, NWL_DSI_VSYNC_POLARITY,
+		      NWL_DSI_VSYNC_POLARITY_ACTIVE_LOW);
+	nwl_dsi_write(dsi, NWL_DSI_HSYNC_POLARITY,
+		      NWL_DSI_HSYNC_POLARITY_ACTIVE_LOW);
+
+	burst_mode = (dsi->dsi_mode_flags & MIPI_DSI_MODE_VIDEO_BURST) &&
+		     !(dsi->dsi_mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE);
+
+	if (burst_mode) {
+		nwl_dsi_write(dsi, NWL_DSI_VIDEO_MODE, NWL_DSI_VM_BURST_MODE);
+		nwl_dsi_write(dsi, NWL_DSI_PIXEL_FIFO_SEND_LEVEL, 256);
+	} else {
+		mode = ((dsi->dsi_mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) ?
+				NWL_DSI_VM_BURST_MODE_WITH_SYNC_PULSES :
+				NWL_DSI_VM_NON_BURST_MODE_WITH_SYNC_EVENTS);
+		nwl_dsi_write(dsi, NWL_DSI_VIDEO_MODE, mode);
+		nwl_dsi_write(dsi, NWL_DSI_PIXEL_FIFO_SEND_LEVEL,
+			      dsi->mode.hdisplay);
+	}
+
+	nwl_dsi_write(dsi, NWL_DSI_HFP, hfront_porch);
+	nwl_dsi_write(dsi, NWL_DSI_HBP, hback_porch);
+	nwl_dsi_write(dsi, NWL_DSI_HSA, hsync_len);
+
+	nwl_dsi_write(dsi, NWL_DSI_ENABLE_MULT_PKTS, 0x0);
+	nwl_dsi_write(dsi, NWL_DSI_BLLP_MODE, 0x1);
+	nwl_dsi_write(dsi, NWL_DSI_USE_NULL_PKT_BLLP, 0x0);
+	nwl_dsi_write(dsi, NWL_DSI_VC, 0x0);
+
+	nwl_dsi_write(dsi, NWL_DSI_PIXEL_PAYLOAD_SIZE, dsi->mode.hdisplay);
+	nwl_dsi_write(dsi, NWL_DSI_VACTIVE, dsi->mode.vdisplay - 1);
+	nwl_dsi_write(dsi, NWL_DSI_VBP, vback_porch);
+	nwl_dsi_write(dsi, NWL_DSI_VFP, vfront_porch);
+
+	return nwl_dsi_clear_error(dsi);
+}
+
+static int nwl_dsi_init_interrupts(struct nwl_dsi *dsi)
+{
+	u32 irq_enable;
+
+	nwl_dsi_write(dsi, NWL_DSI_IRQ_MASK, 0xffffffff);
+	nwl_dsi_write(dsi, NWL_DSI_IRQ_MASK2, 0x7);
+
+	irq_enable = ~(u32)(NWL_DSI_TX_PKT_DONE_MASK |
+			    NWL_DSI_RX_PKT_HDR_RCVD_MASK |
+			    NWL_DSI_TX_FIFO_OVFLW_MASK |
+			    NWL_DSI_HS_TX_TIMEOUT_MASK);
+
+	nwl_dsi_write(dsi, NWL_DSI_IRQ_MASK, irq_enable);
+
+	return nwl_dsi_clear_error(dsi);
+}
+
+static int nwl_dsi_host_attach(struct mipi_dsi_host *dsi_host,
+			       struct mipi_dsi_device *device)
+{
+	struct nwl_dsi *dsi = container_of(dsi_host, struct nwl_dsi, dsi_host);
+	struct device *dev = dsi->dev;
+
+	DRM_DEV_INFO(dev, "lanes=%u, format=0x%x flags=0x%lx\n", device->lanes,
+		     device->format, device->mode_flags);
+
+	if (device->lanes < 1 || device->lanes > 4)
+		return -EINVAL;
+
+	dsi->lanes = device->lanes;
+	dsi->format = device->format;
+	dsi->dsi_mode_flags = device->mode_flags;
+
+	return 0;
+}
+
+static bool nwl_dsi_read_packet(struct nwl_dsi *dsi, u32 status)
+{
+	struct device *dev = dsi->dev;
+	struct nwl_dsi_transfer *xfer = dsi->xfer;
+	int err;
+	u8 *payload = xfer->msg->rx_buf;
+	u32 val;
+	u16 word_count;
+	u8 channel;
+	u8 data_type;
+
+	xfer->status = 0;
+
+	if (xfer->rx_word_count == 0) {
+		if (!(status & NWL_DSI_RX_PKT_HDR_RCVD))
+			return false;
+		/* Get the RX header and parse it */
+		val = nwl_dsi_read(dsi, NWL_DSI_RX_PKT_HEADER);
+		err = nwl_dsi_clear_error(dsi);
+		if (err)
+			xfer->status = err;
+		word_count = NWL_DSI_WC(val);
+		channel = NWL_DSI_RX_VC(val);
+		data_type = NWL_DSI_RX_DT(val);
+
+		if (channel != xfer->msg->channel) {
+			DRM_DEV_ERROR(dev,
+				      "[%02X] Channel mismatch (%u != %u)\n",
+				      xfer->cmd, channel, xfer->msg->channel);
+			xfer->status = -EINVAL;
+			return true;
+		}
+
+		switch (data_type) {
+		case MIPI_DSI_RX_GENERIC_SHORT_READ_RESPONSE_2BYTE:
+			fallthrough;
+		case MIPI_DSI_RX_DCS_SHORT_READ_RESPONSE_2BYTE:
+			if (xfer->msg->rx_len > 1) {
+				/* read second byte */
+				payload[1] = word_count >> 8;
+				++xfer->rx_len;
+			}
+			fallthrough;
+		case MIPI_DSI_RX_GENERIC_SHORT_READ_RESPONSE_1BYTE:
+			fallthrough;
+		case MIPI_DSI_RX_DCS_SHORT_READ_RESPONSE_1BYTE:
+			if (xfer->msg->rx_len > 0) {
+				/* read first byte */
+				payload[0] = word_count & 0xff;
+				++xfer->rx_len;
+			}
+			xfer->status = xfer->rx_len;
+			return true;
+		case MIPI_DSI_RX_ACKNOWLEDGE_AND_ERROR_REPORT:
+			word_count &= 0xff;
+			DRM_DEV_ERROR(dev, "[%02X] DSI error report: 0x%02x\n",
+				      xfer->cmd, word_count);
+			xfer->status = -EPROTO;
+			return true;
+		}
+
+		if (word_count > xfer->msg->rx_len) {
+			DRM_DEV_ERROR(dev,
+				"[%02X] Receive buffer too small: %zu (< %u)\n",
+				xfer->cmd, xfer->msg->rx_len, word_count);
+			xfer->status = -EINVAL;
+			return true;
+		}
+
+		xfer->rx_word_count = word_count;
+	} else {
+		/* Set word_count from previous header read */
+		word_count = xfer->rx_word_count;
+	}
+
+	/* If RX payload is not yet received, wait for it */
+	if (!(status & NWL_DSI_RX_PKT_PAYLOAD_DATA_RCVD))
+		return false;
+
+	/* Read the RX payload */
+	while (word_count >= 4) {
+		val = nwl_dsi_read(dsi, NWL_DSI_RX_PAYLOAD);
+		payload[0] = (val >> 0) & 0xff;
+		payload[1] = (val >> 8) & 0xff;
+		payload[2] = (val >> 16) & 0xff;
+		payload[3] = (val >> 24) & 0xff;
+		payload += 4;
+		xfer->rx_len += 4;
+		word_count -= 4;
+	}
+
+	if (word_count > 0) {
+		val = nwl_dsi_read(dsi, NWL_DSI_RX_PAYLOAD);
+		switch (word_count) {
+		case 3:
+			payload[2] = (val >> 16) & 0xff;
+			++xfer->rx_len;
+			fallthrough;
+		case 2:
+			payload[1] = (val >> 8) & 0xff;
+			++xfer->rx_len;
+			fallthrough;
+		case 1:
+			payload[0] = (val >> 0) & 0xff;
+			++xfer->rx_len;
+			break;
+		}
+	}
+
+	xfer->status = xfer->rx_len;
+	err = nwl_dsi_clear_error(dsi);
+	if (err)
+		xfer->status = err;
+
+	return true;
+}
+
+static void nwl_dsi_finish_transmission(struct nwl_dsi *dsi, u32 status)
+{
+	struct nwl_dsi_transfer *xfer = dsi->xfer;
+	bool end_packet = false;
+
+	if (!xfer)
+		return;
+
+	if (xfer->direction == DSI_PACKET_SEND &&
+	    status & NWL_DSI_TX_PKT_DONE) {
+		xfer->status = xfer->tx_len;
+		end_packet = true;
+	} else if (status & NWL_DSI_DPHY_DIRECTION &&
+		   ((status & (NWL_DSI_RX_PKT_HDR_RCVD |
+			       NWL_DSI_RX_PKT_PAYLOAD_DATA_RCVD)))) {
+		end_packet = nwl_dsi_read_packet(dsi, status);
+	}
+
+	if (end_packet)
+		complete(&xfer->completed);
+}
+
+static void nwl_dsi_begin_transmission(struct nwl_dsi *dsi)
+{
+	struct nwl_dsi_transfer *xfer = dsi->xfer;
+	struct mipi_dsi_packet *pkt = &xfer->packet;
+	const u8 *payload;
+	size_t length;
+	u16 word_count;
+	u8 hs_mode;
+	u32 val;
+	u32 hs_workaround = 0;
+
+	/* Send the payload, if any */
+	length = pkt->payload_length;
+	payload = pkt->payload;
+
+	while (length >= 4) {
+		val = *(u32 *)payload;
+		hs_workaround |= !(val & 0xFFFF00);
+		nwl_dsi_write(dsi, NWL_DSI_TX_PAYLOAD, val);
+		payload += 4;
+		length -= 4;
+	}
+	/* Send the rest of the payload */
+	val = 0;
+	switch (length) {
+	case 3:
+		val |= payload[2] << 16;
+		fallthrough;
+	case 2:
+		val |= payload[1] << 8;
+		hs_workaround |= !(val & 0xFFFF00);
+		fallthrough;
+	case 1:
+		val |= payload[0];
+		nwl_dsi_write(dsi, NWL_DSI_TX_PAYLOAD, val);
+		break;
+	}
+	xfer->tx_len = pkt->payload_length;
+
+	/*
+	 * Send the header
+	 * header[0] = Virtual Channel + Data Type
+	 * header[1] = Word Count LSB (LP) or first param (SP)
+	 * header[2] = Word Count MSB (LP) or second param (SP)
+	 */
+	word_count = pkt->header[1] | (pkt->header[2] << 8);
+	if (hs_workaround && (dsi->quirks & E11418_HS_MODE_QUIRK)) {
+		DRM_DEV_DEBUG_DRIVER(dsi->dev,
+				     "Using hs mode workaround for cmd 0x%x\n",
+				     xfer->cmd);
+		hs_mode = 1;
+	} else {
+		hs_mode = (xfer->msg->flags & MIPI_DSI_MSG_USE_LPM) ? 0 : 1;
+	}
+	val = NWL_DSI_WC(word_count) | NWL_DSI_TX_VC(xfer->msg->channel) |
+	      NWL_DSI_TX_DT(xfer->msg->type) | NWL_DSI_HS_SEL(hs_mode) |
+	      NWL_DSI_BTA_TX(xfer->need_bta);
+	nwl_dsi_write(dsi, NWL_DSI_PKT_CONTROL, val);
+
+	/* Send packet command */
+	nwl_dsi_write(dsi, NWL_DSI_SEND_PACKET, 0x1);
+}
+
+static ssize_t nwl_dsi_host_transfer(struct mipi_dsi_host *dsi_host,
+				     const struct mipi_dsi_msg *msg)
+{
+	struct nwl_dsi *dsi = container_of(dsi_host, struct nwl_dsi, dsi_host);
+	struct nwl_dsi_transfer xfer;
+	ssize_t ret = 0;
+
+	/* Create packet to be sent */
+	dsi->xfer = &xfer;
+	ret = mipi_dsi_create_packet(&xfer.packet, msg);
+	if (ret < 0) {
+		dsi->xfer = NULL;
+		return ret;
+	}
+
+	if ((msg->type & MIPI_DSI_GENERIC_READ_REQUEST_0_PARAM ||
+	     msg->type & MIPI_DSI_GENERIC_READ_REQUEST_1_PARAM ||
+	     msg->type & MIPI_DSI_GENERIC_READ_REQUEST_2_PARAM ||
+	     msg->type & MIPI_DSI_DCS_READ) &&
+	    msg->rx_len > 0 && msg->rx_buf)
+		xfer.direction = DSI_PACKET_RECEIVE;
+	else
+		xfer.direction = DSI_PACKET_SEND;
+
+	xfer.need_bta = (xfer.direction == DSI_PACKET_RECEIVE);
+	xfer.need_bta |= (msg->flags & MIPI_DSI_MSG_REQ_ACK) ? 1 : 0;
+	xfer.msg = msg;
+	xfer.status = -ETIMEDOUT;
+	xfer.rx_word_count = 0;
+	xfer.rx_len = 0;
+	xfer.cmd = 0x00;
+	if (msg->tx_len > 0)
+		xfer.cmd = ((u8 *)(msg->tx_buf))[0];
+	init_completion(&xfer.completed);
+
+	ret = clk_prepare_enable(dsi->rx_esc_clk);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to enable rx_esc clk: %zd\n",
+			      ret);
+		return ret;
+	}
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "Enabled rx_esc clk @%lu Hz\n",
+			     clk_get_rate(dsi->rx_esc_clk));
+
+	/* Initiate the DSI packet transmision */
+	nwl_dsi_begin_transmission(dsi);
+
+	if (!wait_for_completion_timeout(&xfer.completed,
+					 NWL_DSI_MIPI_FIFO_TIMEOUT)) {
+		DRM_DEV_ERROR(dsi_host->dev, "[%02X] DSI transfer timed out\n",
+			      xfer.cmd);
+		ret = -ETIMEDOUT;
+	} else {
+		ret = xfer.status;
+	}
+
+	clk_disable_unprepare(dsi->rx_esc_clk);
+
+	return ret;
+}
+
+static const struct mipi_dsi_host_ops nwl_dsi_host_ops = {
+	.attach = nwl_dsi_host_attach,
+	.transfer = nwl_dsi_host_transfer,
+};
+
+static irqreturn_t nwl_dsi_irq_handler(int irq, void *data)
+{
+	u32 irq_status;
+	struct nwl_dsi *dsi = data;
+
+	irq_status = nwl_dsi_read(dsi, NWL_DSI_IRQ_STATUS);
+
+	if (irq_status & NWL_DSI_TX_FIFO_OVFLW)
+		DRM_DEV_ERROR_RATELIMITED(dsi->dev, "tx fifo overflow\n");
+
+	if (irq_status & NWL_DSI_HS_TX_TIMEOUT)
+		DRM_DEV_ERROR_RATELIMITED(dsi->dev, "HS tx timeout\n");
+
+	if (irq_status & NWL_DSI_TX_PKT_DONE ||
+	    irq_status & NWL_DSI_RX_PKT_HDR_RCVD ||
+	    irq_status & NWL_DSI_RX_PKT_PAYLOAD_DATA_RCVD)
+		nwl_dsi_finish_transmission(dsi, irq_status);
+
+	return IRQ_HANDLED;
+}
+
+static int nwl_dsi_enable(struct nwl_dsi *dsi)
+{
+	struct device *dev = dsi->dev;
+	union phy_configure_opts *phy_cfg = &dsi->phy_cfg;
+	int ret;
+
+	if (!dsi->lanes) {
+		DRM_DEV_ERROR(dev, "Need DSI lanes: %d\n", dsi->lanes);
+		return -EINVAL;
+	}
+
+	ret = phy_init(dsi->phy);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "Failed to init DSI phy: %d\n", ret);
+		return ret;
+	}
+
+	ret = phy_configure(dsi->phy, phy_cfg);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "Failed to configure DSI phy: %d\n", ret);
+		goto uninit_phy;
+	}
+
+	ret = clk_prepare_enable(dsi->tx_esc_clk);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to enable tx_esc clk: %d\n",
+			      ret);
+		goto uninit_phy;
+	}
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "Enabled tx_esc clk @%lu Hz\n",
+			     clk_get_rate(dsi->tx_esc_clk));
+
+	ret = nwl_dsi_config_host(dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "Failed to set up DSI: %d", ret);
+		goto disable_clock;
+	}
+
+	ret = nwl_dsi_config_dpi(dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "Failed to set up DPI: %d", ret);
+		goto disable_clock;
+	}
+
+	ret = phy_power_on(dsi->phy);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "Failed to power on DPHY (%d)\n", ret);
+		goto disable_clock;
+	}
+
+	ret = nwl_dsi_init_interrupts(dsi);
+	if (ret < 0)
+		goto power_off_phy;
+
+	return ret;
+
+power_off_phy:
+	phy_power_off(dsi->phy);
+disable_clock:
+	clk_disable_unprepare(dsi->tx_esc_clk);
+uninit_phy:
+	phy_exit(dsi->phy);
+
+	return ret;
+}
+
+static int nwl_dsi_disable(struct nwl_dsi *dsi)
+{
+	struct device *dev = dsi->dev;
+
+	DRM_DEV_DEBUG_DRIVER(dev, "Disabling clocks and phy\n");
+
+	phy_power_off(dsi->phy);
+	phy_exit(dsi->phy);
+
+	/* Disabling the clock before the phy breaks enabling dsi again */
+	clk_disable_unprepare(dsi->tx_esc_clk);
+
+	return 0;
+}
+
+static void nwl_dsi_bridge_disable(struct drm_bridge *bridge)
+{
+	struct nwl_dsi *dsi = bridge_to_dsi(bridge);
+	int ret;
+
+	nwl_dsi_disable(dsi);
+
+	ret = reset_control_assert(dsi->rst_dpi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to assert DPI: %d\n", ret);
+		return;
+	}
+	ret = reset_control_assert(dsi->rst_byte);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to assert ESC: %d\n", ret);
+		return;
+	}
+	ret = reset_control_assert(dsi->rst_esc);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to assert BYTE: %d\n", ret);
+		return;
+	}
+	ret = reset_control_assert(dsi->rst_pclk);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to assert PCLK: %d\n", ret);
+		return;
+	}
+
+	clk_disable_unprepare(dsi->core_clk);
+	clk_disable_unprepare(dsi->lcdif_clk);
+
+	pm_runtime_put(dsi->dev);
+}
+
+static int nwl_dsi_get_dphy_params(struct nwl_dsi *dsi,
+				   const struct drm_display_mode *mode,
+				   union phy_configure_opts *phy_opts)
+{
+	unsigned long rate;
+	int ret;
+
+	if (dsi->lanes < 1 || dsi->lanes > 4)
+		return -EINVAL;
+
+	/*
+	 * So far the DPHY spec minimal timings work for both mixel
+	 * dphy and nwl dsi host
+	 */
+	ret = phy_mipi_dphy_get_default_config(mode->clock * 1000,
+		mipi_dsi_pixel_format_to_bpp(dsi->format), dsi->lanes,
+		&phy_opts->mipi_dphy);
+	if (ret < 0)
+		return ret;
+
+	rate = clk_get_rate(dsi->tx_esc_clk);
+	DRM_DEV_DEBUG_DRIVER(dsi->dev, "LP clk is @%lu Hz\n", rate);
+	phy_opts->mipi_dphy.lp_clk_rate = rate;
+
+	return 0;
+}
+
+static bool nwl_dsi_bridge_mode_fixup(struct drm_bridge *bridge,
+				      const struct drm_display_mode *mode,
+				      struct drm_display_mode *adjusted_mode)
+{
+	/* At least LCDIF + NWL needs active high sync */
+	adjusted_mode->flags |= (DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC);
+	adjusted_mode->flags &= ~(DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC);
+
+	return true;
+}
+
+static enum drm_mode_status
+nwl_dsi_bridge_mode_valid(struct drm_bridge *bridge,
+			  const struct drm_display_mode *mode)
+{
+	struct nwl_dsi *dsi = bridge_to_dsi(bridge);
+	int bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
+
+	if (mode->clock * bpp > 15000000 * dsi->lanes)
+		return MODE_CLOCK_HIGH;
+
+	if (mode->clock * bpp < 80000 * dsi->lanes)
+		return MODE_CLOCK_LOW;
+
+	return MODE_OK;
+}
+
+static void
+nwl_dsi_bridge_mode_set(struct drm_bridge *bridge,
+			const struct drm_display_mode *mode,
+			const struct drm_display_mode *adjusted_mode)
+{
+	struct nwl_dsi *dsi = bridge_to_dsi(bridge);
+	struct device *dev = dsi->dev;
+	union phy_configure_opts new_cfg;
+	unsigned long phy_ref_rate;
+	int ret;
+
+	ret = nwl_dsi_get_dphy_params(dsi, adjusted_mode, &new_cfg);
+	if (ret < 0)
+		return;
+
+	/*
+	 * If hs clock is unchanged, we're all good - all parameters are
+	 * derived from it atm.
+	 */
+	if (new_cfg.mipi_dphy.hs_clk_rate == dsi->phy_cfg.mipi_dphy.hs_clk_rate)
+		return;
+
+	phy_ref_rate = clk_get_rate(dsi->phy_ref_clk);
+	DRM_DEV_DEBUG_DRIVER(dev, "PHY at ref rate: %lu\n", phy_ref_rate);
+	/* Save the new desired phy config */
+	memcpy(&dsi->phy_cfg, &new_cfg, sizeof(new_cfg));
+
+	memcpy(&dsi->mode, adjusted_mode, sizeof(dsi->mode));
+	drm_mode_debug_printmodeline(adjusted_mode);
+}
+
+static void nwl_dsi_bridge_pre_enable(struct drm_bridge *bridge)
+{
+	struct nwl_dsi *dsi = bridge_to_dsi(bridge);
+	int ret;
+
+	pm_runtime_get_sync(dsi->dev);
+
+	if (clk_prepare_enable(dsi->lcdif_clk) < 0)
+		return;
+	if (clk_prepare_enable(dsi->core_clk) < 0)
+		return;
+
+	/* Step 1 from DSI reset-out instructions */
+	ret = reset_control_deassert(dsi->rst_pclk);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to deassert PCLK: %d\n", ret);
+		return;
+	}
+
+	/* Step 2 from DSI reset-out instructions */
+	nwl_dsi_enable(dsi);
+
+	/* Step 3 from DSI reset-out instructions */
+	ret = reset_control_deassert(dsi->rst_esc);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to deassert ESC: %d\n", ret);
+		return;
+	}
+	ret = reset_control_deassert(dsi->rst_byte);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to deassert BYTE: %d\n", ret);
+		return;
+	}
+}
+
+static void nwl_dsi_bridge_enable(struct drm_bridge *bridge)
+{
+	struct nwl_dsi *dsi = bridge_to_dsi(bridge);
+	int ret;
+
+	/* Step 5 from DSI reset-out instructions */
+	ret = reset_control_deassert(dsi->rst_dpi);
+	if (ret < 0)
+		DRM_DEV_ERROR(dsi->dev, "Failed to deassert DPI: %d\n", ret);
+}
+
+static int nwl_dsi_bridge_attach(struct drm_bridge *bridge,
+				 enum drm_bridge_attach_flags flags)
+{
+	struct nwl_dsi *dsi = bridge_to_dsi(bridge);
+	struct drm_bridge *panel_bridge;
+	struct drm_panel *panel;
+	int ret;
+
+	if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) {
+		DRM_ERROR("Fix bridge driver to make connector optional!");
+		return -EINVAL;
+	}
+
+	ret = drm_of_find_panel_or_bridge(dsi->dev->of_node, 1, 0, &panel,
+					  &panel_bridge);
+	if (ret)
+		return ret;
+
+	if (panel) {
+		panel_bridge = drm_panel_bridge_add(panel);
+		if (IS_ERR(panel_bridge))
+			return PTR_ERR(panel_bridge);
+	}
+	dsi->panel_bridge = panel_bridge;
+
+	if (!dsi->panel_bridge)
+		return -EPROBE_DEFER;
+
+	return drm_bridge_attach(bridge->encoder, dsi->panel_bridge, bridge,
+				 flags);
+}
+
+static void nwl_dsi_bridge_detach(struct drm_bridge *bridge)
+{	struct nwl_dsi *dsi = bridge_to_dsi(bridge);
+
+	drm_of_panel_bridge_remove(dsi->dev->of_node, 1, 0);
+}
+
+static const struct drm_bridge_funcs nwl_dsi_bridge_funcs = {
+	.pre_enable = nwl_dsi_bridge_pre_enable,
+	.enable     = nwl_dsi_bridge_enable,
+	.disable    = nwl_dsi_bridge_disable,
+	.mode_fixup = nwl_dsi_bridge_mode_fixup,
+	.mode_set   = nwl_dsi_bridge_mode_set,
+	.mode_valid = nwl_dsi_bridge_mode_valid,
+	.attach	    = nwl_dsi_bridge_attach,
+	.detach	    = nwl_dsi_bridge_detach,
+};
+
+static int nwl_dsi_parse_dt(struct nwl_dsi *dsi)
+{
+	struct platform_device *pdev = to_platform_device(dsi->dev);
+	struct clk *clk;
+	void __iomem *base;
+	int ret;
+
+	dsi->phy = devm_phy_get(dsi->dev, "dphy");
+	if (IS_ERR(dsi->phy)) {
+		ret = PTR_ERR(dsi->phy);
+		if (ret != -EPROBE_DEFER)
+			DRM_DEV_ERROR(dsi->dev, "Could not get PHY: %d\n", ret);
+		return ret;
+	}
+
+	clk = devm_clk_get(dsi->dev, "lcdif");
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		DRM_DEV_ERROR(dsi->dev, "Failed to get lcdif clock: %d\n",
+			      ret);
+		return ret;
+	}
+	dsi->lcdif_clk = clk;
+
+	clk = devm_clk_get(dsi->dev, "core");
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		DRM_DEV_ERROR(dsi->dev, "Failed to get core clock: %d\n",
+			      ret);
+		return ret;
+	}
+	dsi->core_clk = clk;
+
+	clk = devm_clk_get(dsi->dev, "phy_ref");
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		DRM_DEV_ERROR(dsi->dev, "Failed to get phy_ref clock: %d\n",
+			      ret);
+		return ret;
+	}
+	dsi->phy_ref_clk = clk;
+
+	clk = devm_clk_get(dsi->dev, "rx_esc");
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		DRM_DEV_ERROR(dsi->dev, "Failed to get rx_esc clock: %d\n",
+			      ret);
+		return ret;
+	}
+	dsi->rx_esc_clk = clk;
+
+	clk = devm_clk_get(dsi->dev, "tx_esc");
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		DRM_DEV_ERROR(dsi->dev, "Failed to get tx_esc clock: %d\n",
+			      ret);
+		return ret;
+	}
+	dsi->tx_esc_clk = clk;
+
+	dsi->mux = devm_mux_control_get(dsi->dev, NULL);
+	if (IS_ERR(dsi->mux)) {
+		ret = PTR_ERR(dsi->mux);
+		if (ret != -EPROBE_DEFER)
+			DRM_DEV_ERROR(dsi->dev, "Failed to get mux: %d\n", ret);
+		return ret;
+	}
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	dsi->regmap =
+		devm_regmap_init_mmio(dsi->dev, base, &nwl_dsi_regmap_config);
+	if (IS_ERR(dsi->regmap)) {
+		ret = PTR_ERR(dsi->regmap);
+		DRM_DEV_ERROR(dsi->dev, "Failed to create NWL DSI regmap: %d\n",
+			      ret);
+		return ret;
+	}
+
+	dsi->irq = platform_get_irq(pdev, 0);
+	if (dsi->irq < 0) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to get device IRQ: %d\n",
+			      dsi->irq);
+		return dsi->irq;
+	}
+
+	dsi->rst_pclk = devm_reset_control_get_exclusive(dsi->dev, "pclk");
+	if (IS_ERR(dsi->rst_pclk)) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to get pclk reset: %ld\n",
+			      PTR_ERR(dsi->rst_pclk));
+		return PTR_ERR(dsi->rst_pclk);
+	}
+	dsi->rst_byte = devm_reset_control_get_exclusive(dsi->dev, "byte");
+	if (IS_ERR(dsi->rst_byte)) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to get byte reset: %ld\n",
+			      PTR_ERR(dsi->rst_byte));
+		return PTR_ERR(dsi->rst_byte);
+	}
+	dsi->rst_esc = devm_reset_control_get_exclusive(dsi->dev, "esc");
+	if (IS_ERR(dsi->rst_esc)) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to get esc reset: %ld\n",
+			      PTR_ERR(dsi->rst_esc));
+		return PTR_ERR(dsi->rst_esc);
+	}
+	dsi->rst_dpi = devm_reset_control_get_exclusive(dsi->dev, "dpi");
+	if (IS_ERR(dsi->rst_dpi)) {
+		DRM_DEV_ERROR(dsi->dev, "Failed to get dpi reset: %ld\n",
+			      PTR_ERR(dsi->rst_dpi));
+		return PTR_ERR(dsi->rst_dpi);
+	}
+	return 0;
+}
+
+static int nwl_dsi_select_input(struct nwl_dsi *dsi)
+{
+	struct device_node *remote;
+	u32 use_dcss = 1;
+	int ret;
+
+	remote = of_graph_get_remote_node(dsi->dev->of_node, 0,
+					  NWL_DSI_ENDPOINT_LCDIF);
+	if (remote) {
+		use_dcss = 0;
+	} else {
+		remote = of_graph_get_remote_node(dsi->dev->of_node, 0,
+						  NWL_DSI_ENDPOINT_DCSS);
+		if (!remote) {
+			DRM_DEV_ERROR(dsi->dev,
+				      "No valid input endpoint found\n");
+			return -EINVAL;
+		}
+	}
+
+	DRM_DEV_INFO(dsi->dev, "Using %s as input source\n",
+		     (use_dcss) ? "DCSS" : "LCDIF");
+	ret = mux_control_try_select(dsi->mux, use_dcss);
+	if (ret < 0)
+		DRM_DEV_ERROR(dsi->dev, "Failed to select input: %d\n", ret);
+
+	of_node_put(remote);
+	return ret;
+}
+
+static int nwl_dsi_deselect_input(struct nwl_dsi *dsi)
+{
+	int ret;
+
+	ret = mux_control_deselect(dsi->mux);
+	if (ret < 0)
+		DRM_DEV_ERROR(dsi->dev, "Failed to deselect input: %d\n", ret);
+
+	return ret;
+}
+
+static const struct drm_bridge_timings nwl_dsi_timings = {
+	.input_bus_flags = DRM_BUS_FLAG_DE_LOW,
+};
+
+static const struct of_device_id nwl_dsi_dt_ids[] = {
+	{ .compatible = "fsl,imx8mq-nwl-dsi", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, nwl_dsi_dt_ids);
+
+static const struct soc_device_attribute nwl_dsi_quirks_match[] = {
+	{ .soc_id = "i.MX8MQ", .revision = "2.0",
+	  .data = (void *)E11418_HS_MODE_QUIRK },
+	{ /* sentinel. */ },
+};
+
+static int nwl_dsi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct soc_device_attribute *attr;
+	struct nwl_dsi *dsi;
+	int ret;
+
+	dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL);
+	if (!dsi)
+		return -ENOMEM;
+
+	dsi->dev = dev;
+
+	ret = nwl_dsi_parse_dt(dsi);
+	if (ret)
+		return ret;
+
+	ret = devm_request_irq(dev, dsi->irq, nwl_dsi_irq_handler, 0,
+			       dev_name(dev), dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "Failed to request IRQ %d: %d\n", dsi->irq,
+			      ret);
+		return ret;
+	}
+
+	dsi->dsi_host.ops = &nwl_dsi_host_ops;
+	dsi->dsi_host.dev = dev;
+	ret = mipi_dsi_host_register(&dsi->dsi_host);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "Failed to register MIPI host: %d\n", ret);
+		return ret;
+	}
+
+	attr = soc_device_match(nwl_dsi_quirks_match);
+	if (attr)
+		dsi->quirks = (uintptr_t)attr->data;
+
+	dsi->bridge.driver_private = dsi;
+	dsi->bridge.funcs = &nwl_dsi_bridge_funcs;
+	dsi->bridge.of_node = dev->of_node;
+	dsi->bridge.timings = &nwl_dsi_timings;
+
+	dev_set_drvdata(dev, dsi);
+	pm_runtime_enable(dev);
+
+	ret = nwl_dsi_select_input(dsi);
+	if (ret < 0) {
+		mipi_dsi_host_unregister(&dsi->dsi_host);
+		return ret;
+	}
+
+	drm_bridge_add(&dsi->bridge);
+	return 0;
+}
+
+static int nwl_dsi_remove(struct platform_device *pdev)
+{
+	struct nwl_dsi *dsi = platform_get_drvdata(pdev);
+
+	nwl_dsi_deselect_input(dsi);
+	mipi_dsi_host_unregister(&dsi->dsi_host);
+	drm_bridge_remove(&dsi->bridge);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+static struct platform_driver nwl_dsi_driver = {
+	.probe		= nwl_dsi_probe,
+	.remove		= nwl_dsi_remove,
+	.driver		= {
+		.of_match_table = nwl_dsi_dt_ids,
+		.name	= DRV_NAME,
+	},
+};
+
+module_platform_driver(nwl_dsi_driver);
+
+MODULE_AUTHOR("NXP Semiconductor");
+MODULE_AUTHOR("Purism SPC");
+MODULE_DESCRIPTION("Northwest Logic MIPI-DSI driver");
+MODULE_LICENSE("GPL"); /* GPLv2 or later */
diff --git a/drivers/gpu/drm/bridge/nwl-dsi.h b/drivers/gpu/drm/bridge/nwl-dsi.h
new file mode 100644
index 000000000000..a247a8a11c7c
--- /dev/null
+++ b/drivers/gpu/drm/bridge/nwl-dsi.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * NWL MIPI DSI host driver
+ *
+ * Copyright (C) 2017 NXP
+ * Copyright (C) 2019 Purism SPC
+ */
+#ifndef __NWL_DSI_H__
+#define __NWL_DSI_H__
+
+/* DSI HOST registers */
+#define NWL_DSI_CFG_NUM_LANES			0x0
+#define NWL_DSI_CFG_NONCONTINUOUS_CLK		0x4
+#define NWL_DSI_CFG_T_PRE			0x8
+#define NWL_DSI_CFG_T_POST			0xc
+#define NWL_DSI_CFG_TX_GAP			0x10
+#define NWL_DSI_CFG_AUTOINSERT_EOTP		0x14
+#define NWL_DSI_CFG_EXTRA_CMDS_AFTER_EOTP	0x18
+#define NWL_DSI_CFG_HTX_TO_COUNT		0x1c
+#define NWL_DSI_CFG_LRX_H_TO_COUNT		0x20
+#define NWL_DSI_CFG_BTA_H_TO_COUNT		0x24
+#define NWL_DSI_CFG_TWAKEUP			0x28
+#define NWL_DSI_CFG_STATUS_OUT			0x2c
+#define NWL_DSI_RX_ERROR_STATUS			0x30
+
+/* DSI DPI registers */
+#define NWL_DSI_PIXEL_PAYLOAD_SIZE		0x200
+#define NWL_DSI_PIXEL_FIFO_SEND_LEVEL		0x204
+#define NWL_DSI_INTERFACE_COLOR_CODING		0x208
+#define NWL_DSI_PIXEL_FORMAT			0x20c
+#define NWL_DSI_VSYNC_POLARITY			0x210
+#define NWL_DSI_VSYNC_POLARITY_ACTIVE_LOW	0
+#define NWL_DSI_VSYNC_POLARITY_ACTIVE_HIGH	BIT(1)
+
+#define NWL_DSI_HSYNC_POLARITY			0x214
+#define NWL_DSI_HSYNC_POLARITY_ACTIVE_LOW	0
+#define NWL_DSI_HSYNC_POLARITY_ACTIVE_HIGH	BIT(1)
+
+#define NWL_DSI_VIDEO_MODE			0x218
+#define NWL_DSI_HFP				0x21c
+#define NWL_DSI_HBP				0x220
+#define NWL_DSI_HSA				0x224
+#define NWL_DSI_ENABLE_MULT_PKTS		0x228
+#define NWL_DSI_VBP				0x22c
+#define NWL_DSI_VFP				0x230
+#define NWL_DSI_BLLP_MODE			0x234
+#define NWL_DSI_USE_NULL_PKT_BLLP		0x238
+#define NWL_DSI_VACTIVE				0x23c
+#define NWL_DSI_VC				0x240
+
+/* DSI APB PKT control */
+#define NWL_DSI_TX_PAYLOAD			0x280
+#define NWL_DSI_PKT_CONTROL			0x284
+#define NWL_DSI_SEND_PACKET			0x288
+#define NWL_DSI_PKT_STATUS			0x28c
+#define NWL_DSI_PKT_FIFO_WR_LEVEL		0x290
+#define NWL_DSI_PKT_FIFO_RD_LEVEL		0x294
+#define NWL_DSI_RX_PAYLOAD			0x298
+#define NWL_DSI_RX_PKT_HEADER			0x29c
+
+/* DSI IRQ handling */
+#define NWL_DSI_IRQ_STATUS			0x2a0
+#define NWL_DSI_SM_NOT_IDLE			BIT(0)
+#define NWL_DSI_TX_PKT_DONE			BIT(1)
+#define NWL_DSI_DPHY_DIRECTION			BIT(2)
+#define NWL_DSI_TX_FIFO_OVFLW			BIT(3)
+#define NWL_DSI_TX_FIFO_UDFLW			BIT(4)
+#define NWL_DSI_RX_FIFO_OVFLW			BIT(5)
+#define NWL_DSI_RX_FIFO_UDFLW			BIT(6)
+#define NWL_DSI_RX_PKT_HDR_RCVD			BIT(7)
+#define NWL_DSI_RX_PKT_PAYLOAD_DATA_RCVD	BIT(8)
+#define NWL_DSI_BTA_TIMEOUT			BIT(29)
+#define NWL_DSI_LP_RX_TIMEOUT			BIT(30)
+#define NWL_DSI_HS_TX_TIMEOUT			BIT(31)
+
+#define NWL_DSI_IRQ_STATUS2			0x2a4
+#define NWL_DSI_SINGLE_BIT_ECC_ERR		BIT(0)
+#define NWL_DSI_MULTI_BIT_ECC_ERR		BIT(1)
+#define NWL_DSI_CRC_ERR				BIT(2)
+
+#define NWL_DSI_IRQ_MASK			0x2a8
+#define NWL_DSI_SM_NOT_IDLE_MASK		BIT(0)
+#define NWL_DSI_TX_PKT_DONE_MASK		BIT(1)
+#define NWL_DSI_DPHY_DIRECTION_MASK		BIT(2)
+#define NWL_DSI_TX_FIFO_OVFLW_MASK		BIT(3)
+#define NWL_DSI_TX_FIFO_UDFLW_MASK		BIT(4)
+#define NWL_DSI_RX_FIFO_OVFLW_MASK		BIT(5)
+#define NWL_DSI_RX_FIFO_UDFLW_MASK		BIT(6)
+#define NWL_DSI_RX_PKT_HDR_RCVD_MASK		BIT(7)
+#define NWL_DSI_RX_PKT_PAYLOAD_DATA_RCVD_MASK	BIT(8)
+#define NWL_DSI_BTA_TIMEOUT_MASK		BIT(29)
+#define NWL_DSI_LP_RX_TIMEOUT_MASK		BIT(30)
+#define NWL_DSI_HS_TX_TIMEOUT_MASK		BIT(31)
+
+#define NWL_DSI_IRQ_MASK2			0x2ac
+#define NWL_DSI_SINGLE_BIT_ECC_ERR_MASK		BIT(0)
+#define NWL_DSI_MULTI_BIT_ECC_ERR_MASK		BIT(1)
+#define NWL_DSI_CRC_ERR_MASK			BIT(2)
+
+/*
+ * PKT_CONTROL format:
+ * [15: 0] - word count
+ * [17:16] - virtual channel
+ * [23:18] - data type
+ * [24]	   - LP or HS select (0 - LP, 1 - HS)
+ * [25]	   - perform BTA after packet is sent
+ * [26]	   - perform BTA only, no packet tx
+ */
+#define NWL_DSI_WC(x)		FIELD_PREP(GENMASK(15, 0), (x))
+#define NWL_DSI_TX_VC(x)	FIELD_PREP(GENMASK(17, 16), (x))
+#define NWL_DSI_TX_DT(x)	FIELD_PREP(GENMASK(23, 18), (x))
+#define NWL_DSI_HS_SEL(x)	FIELD_PREP(GENMASK(24, 24), (x))
+#define NWL_DSI_BTA_TX(x)	FIELD_PREP(GENMASK(25, 25), (x))
+#define NWL_DSI_BTA_NO_TX(x)	FIELD_PREP(GENMASK(26, 26), (x))
+
+/*
+ * RX_PKT_HEADER format:
+ * [15: 0] - word count
+ * [21:16] - data type
+ * [23:22] - virtual channel
+ */
+#define NWL_DSI_RX_DT(x)	FIELD_GET(GENMASK(21, 16), (x))
+#define NWL_DSI_RX_VC(x)	FIELD_GET(GENMASK(23, 22), (x))
+
+/* DSI Video mode */
+#define NWL_DSI_VM_BURST_MODE_WITH_SYNC_PULSES		0
+#define NWL_DSI_VM_NON_BURST_MODE_WITH_SYNC_EVENTS	BIT(0)
+#define NWL_DSI_VM_BURST_MODE				BIT(1)
+
+/* * DPI color coding */
+#define NWL_DSI_DPI_16_BIT_565_PACKED	0
+#define NWL_DSI_DPI_16_BIT_565_ALIGNED	1
+#define NWL_DSI_DPI_16_BIT_565_SHIFTED	2
+#define NWL_DSI_DPI_18_BIT_PACKED	3
+#define NWL_DSI_DPI_18_BIT_ALIGNED	4
+#define NWL_DSI_DPI_24_BIT		5
+
+/* * DPI Pixel format */
+#define NWL_DSI_PIXEL_FORMAT_16  0
+#define NWL_DSI_PIXEL_FORMAT_18  BIT(0)
+#define NWL_DSI_PIXEL_FORMAT_18L BIT(1)
+#define NWL_DSI_PIXEL_FORMAT_24  (BIT(0) | BIT(1))
+
+#endif /* __NWL_DSI_H__ */
diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
index 8461ee8304ba..1e63ed6b18aa 100644
--- a/drivers/gpu/drm/bridge/panel.c
+++ b/drivers/gpu/drm/bridge/panel.c
@@ -166,7 +166,7 @@ static const struct drm_bridge_funcs panel_bridge_bridge_funcs = {
  *
  * The connector type is set to @panel->connector_type, which must be set to a
  * known type. Calling this function with a panel whose connector type is
- * DRM_MODE_CONNECTOR_Unknown will return NULL.
+ * DRM_MODE_CONNECTOR_Unknown will return ERR_PTR(-EINVAL).
  *
  * See devm_drm_panel_bridge_add() for an automatically managed version of this
  * function.
@@ -174,7 +174,7 @@ static const struct drm_bridge_funcs panel_bridge_bridge_funcs = {
 struct drm_bridge *drm_panel_bridge_add(struct drm_panel *panel)
 {
 	if (WARN_ON(panel->connector_type == DRM_MODE_CONNECTOR_Unknown))
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	return drm_panel_bridge_add_typed(panel, panel->connector_type);
 }
@@ -265,7 +265,7 @@ struct drm_bridge *devm_drm_panel_bridge_add(struct device *dev,
 					     struct drm_panel *panel)
 {
 	if (WARN_ON(panel->connector_type == DRM_MODE_CONNECTOR_Unknown))
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	return devm_drm_panel_bridge_add_typed(dev, panel,
 					       panel->connector_type);
@@ -311,6 +311,7 @@ EXPORT_SYMBOL(devm_drm_panel_bridge_add_typed);
 
 /**
  * drm_panel_bridge_connector - return the connector for the panel bridge
+ * @bridge: The drm_bridge.
  *
  * drm_panel_bridge creates the connector.
  * This function gives external access to the connector.
diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c
index d3a53442d449..4b099196afeb 100644
--- a/drivers/gpu/drm/bridge/parade-ps8640.c
+++ b/drivers/gpu/drm/bridge/parade-ps8640.c
@@ -268,8 +268,6 @@ static int ps8640_probe(struct i2c_client *client)
 	if (!panel)
 		return -ENODEV;
 
-	panel->connector_type = DRM_MODE_CONNECTOR_eDP;
-
 	ps_bridge->panel_bridge = devm_drm_panel_bridge_add(dev, panel);
 	if (IS_ERR(ps_bridge->panel_bridge))
 		return PTR_ERR(ps_bridge->panel_bridge);
diff --git a/drivers/gpu/drm/bridge/sii9234.c b/drivers/gpu/drm/bridge/sii9234.c
index f81f81b7051f..b1258f0ed205 100644
--- a/drivers/gpu/drm/bridge/sii9234.c
+++ b/drivers/gpu/drm/bridge/sii9234.c
@@ -836,7 +836,8 @@ static int sii9234_init_resources(struct sii9234 *ctx,
 	ctx->supplies[3].supply = "cvcc12";
 	ret = devm_regulator_bulk_get(ctx->dev, 4, ctx->supplies);
 	if (ret) {
-		dev_err(ctx->dev, "regulator_bulk failed\n");
+		if (ret != -EPROBE_DEFER)
+			dev_err(ctx->dev, "regulator_bulk failed\n");
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
index dd56996fe9c7..d0db1acf11d7 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
@@ -630,7 +630,7 @@ static struct platform_driver snd_dw_hdmi_driver = {
 
 module_platform_driver(snd_dw_hdmi_driver);
 
-MODULE_AUTHOR("Russell King <rmk+kernel@arm.linux.org.uk>");
+MODULE_AUTHOR("Russell King <rmk+kernel@armlinux.org.uk>");
 MODULE_DESCRIPTION("Synopsis Designware HDMI AHB ALSA interface");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index 383b1073d7de..30681398cfb0 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -92,6 +92,12 @@ static const u16 csc_coeff_rgb_in_eitu709[3][4] = {
 	{ 0x6756, 0x78ab, 0x2000, 0x0200 }
 };
 
+static const u16 csc_coeff_rgb_full_to_rgb_limited[3][4] = {
+	{ 0x1b7c, 0x0000, 0x0000, 0x0020 },
+	{ 0x0000, 0x1b7c, 0x0000, 0x0020 },
+	{ 0x0000, 0x0000, 0x1b7c, 0x0020 }
+};
+
 struct hdmi_vmode {
 	bool mdataenablepolarity;
 
@@ -109,6 +115,7 @@ struct hdmi_data_info {
 	unsigned int pix_repet_factor;
 	unsigned int hdcp_enable;
 	struct hdmi_vmode video_mode;
+	bool rgb_limited_range;
 };
 
 struct dw_hdmi_i2c {
@@ -956,7 +963,14 @@ static void hdmi_video_sample(struct dw_hdmi *hdmi)
 
 static int is_color_space_conversion(struct dw_hdmi *hdmi)
 {
-	return hdmi->hdmi_data.enc_in_bus_format != hdmi->hdmi_data.enc_out_bus_format;
+	struct hdmi_data_info *hdmi_data = &hdmi->hdmi_data;
+	bool is_input_rgb, is_output_rgb;
+
+	is_input_rgb = hdmi_bus_fmt_is_rgb(hdmi_data->enc_in_bus_format);
+	is_output_rgb = hdmi_bus_fmt_is_rgb(hdmi_data->enc_out_bus_format);
+
+	return (is_input_rgb != is_output_rgb) ||
+	       (is_input_rgb && is_output_rgb && hdmi_data->rgb_limited_range);
 }
 
 static int is_color_space_decimation(struct dw_hdmi *hdmi)
@@ -983,28 +997,37 @@ static int is_color_space_interpolation(struct dw_hdmi *hdmi)
 	return 0;
 }
 
+static bool is_csc_needed(struct dw_hdmi *hdmi)
+{
+	return is_color_space_conversion(hdmi) ||
+	       is_color_space_decimation(hdmi) ||
+	       is_color_space_interpolation(hdmi);
+}
+
 static void dw_hdmi_update_csc_coeffs(struct dw_hdmi *hdmi)
 {
 	const u16 (*csc_coeff)[3][4] = &csc_coeff_default;
+	bool is_input_rgb, is_output_rgb;
 	unsigned i;
 	u32 csc_scale = 1;
 
-	if (is_color_space_conversion(hdmi)) {
-		if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) {
-			if (hdmi->hdmi_data.enc_out_encoding ==
-						V4L2_YCBCR_ENC_601)
-				csc_coeff = &csc_coeff_rgb_out_eitu601;
-			else
-				csc_coeff = &csc_coeff_rgb_out_eitu709;
-		} else if (hdmi_bus_fmt_is_rgb(
-					hdmi->hdmi_data.enc_in_bus_format)) {
-			if (hdmi->hdmi_data.enc_out_encoding ==
-						V4L2_YCBCR_ENC_601)
-				csc_coeff = &csc_coeff_rgb_in_eitu601;
-			else
-				csc_coeff = &csc_coeff_rgb_in_eitu709;
-			csc_scale = 0;
-		}
+	is_input_rgb = hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_in_bus_format);
+	is_output_rgb = hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format);
+
+	if (!is_input_rgb && is_output_rgb) {
+		if (hdmi->hdmi_data.enc_out_encoding == V4L2_YCBCR_ENC_601)
+			csc_coeff = &csc_coeff_rgb_out_eitu601;
+		else
+			csc_coeff = &csc_coeff_rgb_out_eitu709;
+	} else if (is_input_rgb && !is_output_rgb) {
+		if (hdmi->hdmi_data.enc_out_encoding == V4L2_YCBCR_ENC_601)
+			csc_coeff = &csc_coeff_rgb_in_eitu601;
+		else
+			csc_coeff = &csc_coeff_rgb_in_eitu709;
+		csc_scale = 0;
+	} else if (is_input_rgb && is_output_rgb &&
+		   hdmi->hdmi_data.rgb_limited_range) {
+		csc_coeff = &csc_coeff_rgb_full_to_rgb_limited;
 	}
 
 	/* The CSC registers are sequential, alternating MSB then LSB */
@@ -1614,6 +1637,18 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
 	drm_hdmi_avi_infoframe_from_display_mode(&frame,
 						 &hdmi->connector, mode);
 
+	if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) {
+		drm_hdmi_avi_infoframe_quant_range(&frame, &hdmi->connector,
+						   mode,
+						   hdmi->hdmi_data.rgb_limited_range ?
+						   HDMI_QUANTIZATION_RANGE_LIMITED :
+						   HDMI_QUANTIZATION_RANGE_FULL);
+	} else {
+		frame.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT;
+		frame.ycc_quantization_range =
+			HDMI_YCC_QUANTIZATION_RANGE_LIMITED;
+	}
+
 	if (hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_out_bus_format))
 		frame.colorspace = HDMI_COLORSPACE_YUV444;
 	else if (hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format))
@@ -1654,8 +1689,6 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
 			HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
 	}
 
-	frame.scan_mode = HDMI_SCAN_MODE_NONE;
-
 	/*
 	 * The Designware IP uses a different byte format from standard
 	 * AVI info frames, though generally the bits are in the correct
@@ -2010,18 +2043,19 @@ static void dw_hdmi_enable_video_path(struct dw_hdmi *hdmi)
 	hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS);
 
 	/* Enable csc path */
-	if (is_color_space_conversion(hdmi)) {
+	if (is_csc_needed(hdmi)) {
 		hdmi->mc_clkdis &= ~HDMI_MC_CLKDIS_CSCCLK_DISABLE;
 		hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS);
-	}
 
-	/* Enable color space conversion if needed */
-	if (is_color_space_conversion(hdmi))
 		hdmi_writeb(hdmi, HDMI_MC_FLOWCTRL_FEED_THROUGH_OFF_CSC_IN_PATH,
 			    HDMI_MC_FLOWCTRL);
-	else
+	} else {
+		hdmi->mc_clkdis |= HDMI_MC_CLKDIS_CSCCLK_DISABLE;
+		hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS);
+
 		hdmi_writeb(hdmi, HDMI_MC_FLOWCTRL_FEED_THROUGH_OFF_CSC_BYPASS,
 			    HDMI_MC_FLOWCTRL);
+	}
 }
 
 /* Workaround to clear the overflow condition */
@@ -2119,6 +2153,10 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
 	if (hdmi->hdmi_data.enc_out_bus_format == MEDIA_BUS_FMT_FIXED)
 		hdmi->hdmi_data.enc_out_bus_format = MEDIA_BUS_FMT_RGB888_1X24;
 
+	hdmi->hdmi_data.rgb_limited_range = hdmi->sink_is_hdmi &&
+		drm_default_rgb_quant_range(mode) ==
+		HDMI_QUANTIZATION_RANGE_LIMITED;
+
 	hdmi->hdmi_data.pix_repet_factor = 0;
 	hdmi->hdmi_data.hdcp_enable = 0;
 	hdmi->hdmi_data.video_mode.mdataenablepolarity = true;
diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c
index 1b39e8d37834..6650fe4cfc20 100644
--- a/drivers/gpu/drm/bridge/tc358768.c
+++ b/drivers/gpu/drm/bridge/tc358768.c
@@ -178,6 +178,8 @@ static int tc358768_clear_error(struct tc358768_priv *priv)
 
 static void tc358768_write(struct tc358768_priv *priv, u32 reg, u32 val)
 {
+	/* work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+	int tmpval = val;
 	size_t count = 2;
 
 	if (priv->error)
@@ -187,7 +189,7 @@ static void tc358768_write(struct tc358768_priv *priv, u32 reg, u32 val)
 	if (reg < 0x100 || reg >= 0x600)
 		count = 1;
 
-	priv->error = regmap_bulk_write(priv->regmap, reg, &val, count);
+	priv->error = regmap_bulk_write(priv->regmap, reg, &tmpval, count);
 }
 
 static void tc358768_read(struct tc358768_priv *priv, u32 reg, u32 *val)
diff --git a/drivers/gpu/drm/cirrus/Kconfig b/drivers/gpu/drm/cirrus/Kconfig
deleted file mode 100644
index c6bbd988b0e5..000000000000
--- a/drivers/gpu/drm/cirrus/Kconfig
+++ /dev/null
@@ -1,19 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config DRM_CIRRUS_QEMU
-	tristate "Cirrus driver for QEMU emulated device"
-	depends on DRM && PCI && MMU
-	select DRM_KMS_HELPER
-	select DRM_GEM_SHMEM_HELPER
-	help
-	 This is a KMS driver for emulated cirrus device in qemu.
-	 It is *NOT* intended for real cirrus devices. This requires
-	 the modesetting userspace X.org driver.
-
-	 Cirrus is obsolete, the hardware was designed in the 90ies
-	 and can't keep up with todays needs.  More background:
-	 https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
-
-	 Better alternatives are:
-	   - stdvga (DRM_BOCHS, qemu -vga std, default in qemu 2.2+)
-	   - qxl (DRM_QXL, qemu -vga qxl, works best with spice)
-	   - virtio (DRM_VIRTIO_GPU), qemu -vga virtio)
diff --git a/drivers/gpu/drm/cirrus/Makefile b/drivers/gpu/drm/cirrus/Makefile
deleted file mode 100644
index 0c1ed3f99725..000000000000
--- a/drivers/gpu/drm/cirrus/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus.o
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 9ccfbf213d72..965173fd0ac2 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1641,10 +1641,10 @@ static const struct drm_info_list drm_atomic_debugfs_list[] = {
 	{"state", drm_state_info, 0},
 };
 
-int drm_atomic_debugfs_init(struct drm_minor *minor)
+void drm_atomic_debugfs_init(struct drm_minor *minor)
 {
-	return drm_debugfs_create_files(drm_atomic_debugfs_list,
-			ARRAY_SIZE(drm_atomic_debugfs_list),
-			minor->debugfs_root, minor);
+	drm_debugfs_create_files(drm_atomic_debugfs_list,
+				 ARRAY_SIZE(drm_atomic_debugfs_list),
+				 minor->debugfs_root, minor);
 }
 #endif
diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index 531b876d0ed8..800ac39f3213 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -135,6 +135,7 @@ static int drm_set_master(struct drm_device *dev, struct drm_file *fpriv,
 		}
 	}
 
+	fpriv->was_master = (ret == 0);
 	return ret;
 }
 
@@ -174,17 +175,77 @@ out_err:
 	return ret;
 }
 
+/*
+ * In the olden days the SET/DROP_MASTER ioctls used to return EACCES when
+ * CAP_SYS_ADMIN was not set. This was used to prevent rogue applications
+ * from becoming master and/or failing to release it.
+ *
+ * At the same time, the first client (for a given VT) is _always_ master.
+ * Thus in order for the ioctls to succeed, one had to _explicitly_ run the
+ * application as root or flip the setuid bit.
+ *
+ * If the CAP_SYS_ADMIN was missing, no other client could become master...
+ * EVER :-( Leading to a) the graphics session dying badly or b) a completely
+ * locked session.
+ *
+ *
+ * As some point systemd-logind was introduced to orchestrate and delegate
+ * master as applicable. It does so by opening the fd and passing it to users
+ * while in itself logind a) does the set/drop master per users' request and
+ * b)  * implicitly drops master on VT switch.
+ *
+ * Even though logind looks like the future, there are a few issues:
+ *  - some platforms don't have equivalent (Android, CrOS, some BSDs) so
+ * root is required _solely_ for SET/DROP MASTER.
+ *  - applications may not be updated to use it,
+ *  - any client which fails to drop master* can DoS the application using
+ * logind, to a varying degree.
+ *
+ * * Either due missing CAP_SYS_ADMIN or simply not calling DROP_MASTER.
+ *
+ *
+ * Here we implement the next best thing:
+ *  - ensure the logind style of fd passing works unchanged, and
+ *  - allow a client to drop/set master, iff it is/was master at a given point
+ * in time.
+ *
+ * Note: DROP_MASTER cannot be free for all, as an arbitrator user could:
+ *  - DoS/crash the arbitrator - details would be implementation specific
+ *  - open the node, become master implicitly and cause issues
+ *
+ * As a result this fixes the following when using root-less build w/o logind
+ * - startx
+ * - weston
+ * - various compositors based on wlroots
+ */
+static int
+drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv)
+{
+	if (file_priv->pid == task_pid(current) && file_priv->was_master)
+		return 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	return 0;
+}
+
 int drm_setmaster_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
 	int ret = 0;
 
 	mutex_lock(&dev->master_mutex);
+
+	ret = drm_master_check_perm(dev, file_priv);
+	if (ret)
+		goto out_unlock;
+
 	if (drm_is_current_master(file_priv))
 		goto out_unlock;
 
 	if (dev->master) {
-		ret = -EINVAL;
+		ret = -EBUSY;
 		goto out_unlock;
 	}
 
@@ -224,6 +285,12 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
 	int ret = -EINVAL;
 
 	mutex_lock(&dev->master_mutex);
+
+	ret = drm_master_check_perm(dev, file_priv);
+	if (ret)
+		goto out_unlock;
+
+	ret = -EINVAL;
 	if (!drm_is_current_master(file_priv))
 		goto out_unlock;
 
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 121481f6aa71..f1dcad96f341 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -135,7 +135,9 @@
  *	are underneath planes with higher Z position values. Two planes with the
  *	same Z position value have undefined ordering. Note that the Z position
  *	value can also be immutable, to inform userspace about the hard-coded
- *	stacking of planes, see drm_plane_create_zpos_immutable_property().
+ *	stacking of planes, see drm_plane_create_zpos_immutable_property(). If
+ *	any plane has a zpos property (either mutable or immutable), then all
+ *	planes shall have a zpos property.
  *
  * pixel blend mode:
  *	Pixel blend mode is set up with drm_plane_create_blend_mode_property().
@@ -183,6 +185,12 @@
  *		 plane does not expose the "alpha" property, then this is
  *		 assumed to be 1.0
  *
+ * IN_FORMATS:
+ *	Blob property which contains the set of buffer format and modifier
+ *	pairs supported by this plane. The blob is a drm_format_modifier_blob
+ *	struct. Without this property the plane doesn't support buffers with
+ *	modifiers. Userspace cannot change this property.
+ *
  * Note that all the property extensions described here apply either to the
  * plane or the CRTC (e.g. for the background color, which currently is not
  * exposed and assumed to be black).
@@ -338,10 +346,10 @@ EXPORT_SYMBOL(drm_rotation_simplify);
  * should be set to 0 and max to maximal number of planes for given crtc - 1.
  *
  * If zpos of some planes cannot be changed (like fixed background or
- * cursor/topmost planes), driver should adjust min/max values and assign those
- * planes immutable zpos property with lower or higher values (for more
+ * cursor/topmost planes), drivers shall adjust the min/max values and assign
+ * those planes immutable zpos properties with lower or higher values (for more
  * information, see drm_plane_create_zpos_immutable_property() function). In such
- * case driver should also assign proper initial zpos values for all planes in
+ * case drivers shall also assign proper initial zpos values for all planes in
  * its plane_reset() callback, so the planes will be always sorted properly.
  *
  * See also drm_atomic_normalize_zpos().
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index dcabf5698333..ef26ac57f039 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -33,6 +33,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/nospec.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
@@ -43,7 +44,6 @@
 #include <drm/drm_device.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
-#include <drm/drm_pci.h>
 #include <drm/drm_print.h>
 
 #include "drm_legacy.h"
diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index 6b0c6ef8b9b3..8cb93f5209a4 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -457,10 +457,10 @@ static const struct drm_info_list drm_client_debugfs_list[] = {
 	{ "internal_clients", drm_client_debugfs_internal_clients, 0 },
 };
 
-int drm_client_debugfs_init(struct drm_minor *minor)
+void drm_client_debugfs_init(struct drm_minor *minor)
 {
-	return drm_debugfs_create_files(drm_client_debugfs_list,
-					ARRAY_SIZE(drm_client_debugfs_list),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(drm_client_debugfs_list,
+				 ARRAY_SIZE(drm_client_debugfs_list),
+				 minor->debugfs_root, minor);
 }
 #endif
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 644f0ad10671..b1099e1251a2 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1970,6 +1970,8 @@ int drm_connector_update_edid_property(struct drm_connector *connector,
 	else
 		drm_reset_display_info(connector);
 
+	drm_update_tile_info(connector, edid);
+
 	drm_object_property_set_value(&connector->base,
 				      dev->mode_config.non_desktop_property,
 				      connector->display_info.non_desktop);
@@ -2392,7 +2394,7 @@ EXPORT_SYMBOL(drm_mode_put_tile_group);
  * tile group or NULL if not found.
  */
 struct drm_tile_group *drm_mode_get_tile_group(struct drm_device *dev,
-					       char topology[8])
+					       const char topology[8])
 {
 	struct drm_tile_group *tg;
 	int id;
@@ -2422,7 +2424,7 @@ EXPORT_SYMBOL(drm_mode_get_tile_group);
  * new tile group or NULL.
  */
 struct drm_tile_group *drm_mode_create_tile_group(struct drm_device *dev,
-						  char topology[8])
+						  const char topology[8])
 {
 	struct drm_tile_group *tg;
 	int ret;
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index 16f2413403aa..da96b2f64d7e 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -82,6 +82,7 @@ int drm_mode_setcrtc(struct drm_device *dev,
 /* drm_mode_config.c */
 int drm_modeset_register_all(struct drm_device *dev);
 void drm_modeset_unregister_all(struct drm_device *dev);
+void drm_mode_config_validate(struct drm_device *dev);
 
 /* drm_modes.c */
 const char *drm_get_mode_status_name(enum drm_mode_status status);
@@ -224,7 +225,7 @@ int drm_mode_dirtyfb_ioctl(struct drm_device *dev,
 /* drm_atomic.c */
 #ifdef CONFIG_DEBUG_FS
 struct drm_minor;
-int drm_atomic_debugfs_init(struct drm_minor *minor);
+void drm_atomic_debugfs_init(struct drm_minor *minor);
 #endif
 
 int __drm_atomic_helper_disable_plane(struct drm_plane *plane,
@@ -278,3 +279,4 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 void drm_mode_fixup_1366x768(struct drm_display_mode *mode);
 void drm_reset_display_info(struct drm_connector *connector);
 u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid);
+void drm_update_tile_info(struct drm_connector *connector, const struct edid *edid);
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 4e673d318503..2bea22130703 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -172,8 +172,8 @@ static const struct file_operations drm_debugfs_fops = {
  * &struct drm_info_list in the given root directory. These files will be removed
  * automatically on drm_debugfs_cleanup().
  */
-int drm_debugfs_create_files(const struct drm_info_list *files, int count,
-			     struct dentry *root, struct drm_minor *minor)
+void drm_debugfs_create_files(const struct drm_info_list *files, int count,
+			      struct dentry *root, struct drm_minor *minor)
 {
 	struct drm_device *dev = minor->dev;
 	struct drm_info_node *tmp;
@@ -199,7 +199,6 @@ int drm_debugfs_create_files(const struct drm_info_list *files, int count,
 		list_add(&tmp->list, &minor->debugfs_list);
 		mutex_unlock(&minor->debugfs_lock);
 	}
-	return 0;
 }
 EXPORT_SYMBOL(drm_debugfs_create_files);
 
@@ -208,52 +207,28 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
 {
 	struct drm_device *dev = minor->dev;
 	char name[64];
-	int ret;
 
 	INIT_LIST_HEAD(&minor->debugfs_list);
 	mutex_init(&minor->debugfs_lock);
 	sprintf(name, "%d", minor_id);
 	minor->debugfs_root = debugfs_create_dir(name, root);
 
-	ret = drm_debugfs_create_files(drm_debugfs_list, DRM_DEBUGFS_ENTRIES,
-				       minor->debugfs_root, minor);
-	if (ret) {
-		debugfs_remove(minor->debugfs_root);
-		minor->debugfs_root = NULL;
-		DRM_ERROR("Failed to create core drm debugfs files\n");
-		return ret;
-	}
+	drm_debugfs_create_files(drm_debugfs_list, DRM_DEBUGFS_ENTRIES,
+				 minor->debugfs_root, minor);
 
 	if (drm_drv_uses_atomic_modeset(dev)) {
-		ret = drm_atomic_debugfs_init(minor);
-		if (ret) {
-			DRM_ERROR("Failed to create atomic debugfs files\n");
-			return ret;
-		}
+		drm_atomic_debugfs_init(minor);
 	}
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		ret = drm_framebuffer_debugfs_init(minor);
-		if (ret) {
-			DRM_ERROR("Failed to create framebuffer debugfs file\n");
-			return ret;
-		}
+		drm_framebuffer_debugfs_init(minor);
 
-		ret = drm_client_debugfs_init(minor);
-		if (ret) {
-			DRM_ERROR("Failed to create client debugfs file\n");
-			return ret;
-		}
+		drm_client_debugfs_init(minor);
 	}
 
-	if (dev->driver->debugfs_init) {
-		ret = dev->driver->debugfs_init(minor);
-		if (ret) {
-			DRM_ERROR("DRM: Driver failed to initialize "
-				  "/sys/kernel/debug/dri.\n");
-			return ret;
-		}
-	}
+	if (dev->driver->debugfs_init)
+		dev->driver->debugfs_init(minor);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/drm_dma.c b/drivers/gpu/drm/drm_dma.c
index a7add55a85b4..d07ba54ec945 100644
--- a/drivers/gpu/drm/drm_dma.c
+++ b/drivers/gpu/drm/drm_dma.c
@@ -34,9 +34,9 @@
  */
 
 #include <linux/export.h>
+#include <linux/pci.h>
 
 #include <drm/drm_drv.h>
-#include <drm/drm_pci.h>
 #include <drm/drm_print.h>
 
 #include "drm_legacy.h"
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index c6fbe6e6bc9d..19c99dddcb99 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -1238,6 +1238,8 @@ static const struct dpcd_quirk dpcd_quirk_list[] = {
 	{ OUI(0x00, 0x00, 0x00), DEVICE_ID('C', 'H', '7', '5', '1', '1'), false, BIT(DP_DPCD_QUIRK_NO_SINK_COUNT) },
 	/* Synaptics DP1.4 MST hubs can support DSC without virtual DPCD */
 	{ OUI(0x90, 0xCC, 0x24), DEVICE_ID_ANY, true, BIT(DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD) },
+	/* Apple MacBookPro 2017 15 inch eDP Retina panel reports too low DP_MAX_LINK_RATE */
+	{ OUI(0x00, 0x10, 0xfa), DEVICE_ID(101, 68, 21, 101, 98, 97), false, BIT(DP_DPCD_QUIRK_CAN_DO_MAX_LINK_RATE_3_24_GBPS) },
 };
 
 #undef OUI
@@ -1313,6 +1315,7 @@ static const struct edid_quirk edid_quirk_list[] = {
 	{ MFG(0x06, 0xaf), PROD_ID(0xeb, 0x41), BIT(DP_QUIRK_FORCE_DPCD_BACKLIGHT) },
 	{ MFG(0x4d, 0x10), PROD_ID(0xc7, 0x14), BIT(DP_QUIRK_FORCE_DPCD_BACKLIGHT) },
 	{ MFG(0x4d, 0x10), PROD_ID(0xe6, 0x14), BIT(DP_QUIRK_FORCE_DPCD_BACKLIGHT) },
+	{ MFG(0x4c, 0x83), PROD_ID(0x47, 0x41), BIT(DP_QUIRK_FORCE_DPCD_BACKLIGHT) },
 };
 
 #undef MFG
@@ -1533,3 +1536,271 @@ int drm_dp_dsc_sink_supported_input_bpcs(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_S
 	return num_bpc;
 }
 EXPORT_SYMBOL(drm_dp_dsc_sink_supported_input_bpcs);
+
+/**
+ * drm_dp_get_phy_test_pattern() - get the requested pattern from the sink.
+ * @aux: DisplayPort AUX channel
+ * @data: DP phy compliance test parameters.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_get_phy_test_pattern(struct drm_dp_aux *aux,
+				struct drm_dp_phy_test_params *data)
+{
+	int err;
+	u8 rate, lanes;
+
+	err = drm_dp_dpcd_readb(aux, DP_TEST_LINK_RATE, &rate);
+	if (err < 0)
+		return err;
+	data->link_rate = drm_dp_bw_code_to_link_rate(rate);
+
+	err = drm_dp_dpcd_readb(aux, DP_TEST_LANE_COUNT, &lanes);
+	if (err < 0)
+		return err;
+	data->num_lanes = lanes & DP_MAX_LANE_COUNT_MASK;
+
+	if (lanes & DP_ENHANCED_FRAME_CAP)
+		data->enhanced_frame_cap = true;
+
+	err = drm_dp_dpcd_readb(aux, DP_PHY_TEST_PATTERN, &data->phy_pattern);
+	if (err < 0)
+		return err;
+
+	switch (data->phy_pattern) {
+	case DP_PHY_TEST_PATTERN_80BIT_CUSTOM:
+		err = drm_dp_dpcd_read(aux, DP_TEST_80BIT_CUSTOM_PATTERN_7_0,
+				       &data->custom80, sizeof(data->custom80));
+		if (err < 0)
+			return err;
+
+		break;
+	case DP_PHY_TEST_PATTERN_CP2520:
+		err = drm_dp_dpcd_read(aux, DP_TEST_HBR2_SCRAMBLER_RESET,
+				       &data->hbr2_reset,
+				       sizeof(data->hbr2_reset));
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_dp_get_phy_test_pattern);
+
+/**
+ * drm_dp_set_phy_test_pattern() - set the pattern to the sink.
+ * @aux: DisplayPort AUX channel
+ * @data: DP phy compliance test parameters.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_set_phy_test_pattern(struct drm_dp_aux *aux,
+				struct drm_dp_phy_test_params *data, u8 dp_rev)
+{
+	int err, i;
+	u8 link_config[2];
+	u8 test_pattern;
+
+	link_config[0] = drm_dp_link_rate_to_bw_code(data->link_rate);
+	link_config[1] = data->num_lanes;
+	if (data->enhanced_frame_cap)
+		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+	err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, link_config, 2);
+	if (err < 0)
+		return err;
+
+	test_pattern = data->phy_pattern;
+	if (dp_rev < 0x12) {
+		test_pattern = (test_pattern << 2) &
+			       DP_LINK_QUAL_PATTERN_11_MASK;
+		err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET,
+					 test_pattern);
+		if (err < 0)
+			return err;
+	} else {
+		for (i = 0; i < data->num_lanes; i++) {
+			err = drm_dp_dpcd_writeb(aux,
+						 DP_LINK_QUAL_LANE0_SET + i,
+						 test_pattern);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_dp_set_phy_test_pattern);
+
+static const char *dp_pixelformat_get_name(enum dp_pixelformat pixelformat)
+{
+	if (pixelformat < 0 || pixelformat > DP_PIXELFORMAT_RESERVED)
+		return "Invalid";
+
+	switch (pixelformat) {
+	case DP_PIXELFORMAT_RGB:
+		return "RGB";
+	case DP_PIXELFORMAT_YUV444:
+		return "YUV444";
+	case DP_PIXELFORMAT_YUV422:
+		return "YUV422";
+	case DP_PIXELFORMAT_YUV420:
+		return "YUV420";
+	case DP_PIXELFORMAT_Y_ONLY:
+		return "Y_ONLY";
+	case DP_PIXELFORMAT_RAW:
+		return "RAW";
+	default:
+		return "Reserved";
+	}
+}
+
+static const char *dp_colorimetry_get_name(enum dp_pixelformat pixelformat,
+					   enum dp_colorimetry colorimetry)
+{
+	if (pixelformat < 0 || pixelformat > DP_PIXELFORMAT_RESERVED)
+		return "Invalid";
+
+	switch (colorimetry) {
+	case DP_COLORIMETRY_DEFAULT:
+		switch (pixelformat) {
+		case DP_PIXELFORMAT_RGB:
+			return "sRGB";
+		case DP_PIXELFORMAT_YUV444:
+		case DP_PIXELFORMAT_YUV422:
+		case DP_PIXELFORMAT_YUV420:
+			return "BT.601";
+		case DP_PIXELFORMAT_Y_ONLY:
+			return "DICOM PS3.14";
+		case DP_PIXELFORMAT_RAW:
+			return "Custom Color Profile";
+		default:
+			return "Reserved";
+		}
+	case DP_COLORIMETRY_RGB_WIDE_FIXED: /* and DP_COLORIMETRY_BT709_YCC */
+		switch (pixelformat) {
+		case DP_PIXELFORMAT_RGB:
+			return "Wide Fixed";
+		case DP_PIXELFORMAT_YUV444:
+		case DP_PIXELFORMAT_YUV422:
+		case DP_PIXELFORMAT_YUV420:
+			return "BT.709";
+		default:
+			return "Reserved";
+		}
+	case DP_COLORIMETRY_RGB_WIDE_FLOAT: /* and DP_COLORIMETRY_XVYCC_601 */
+		switch (pixelformat) {
+		case DP_PIXELFORMAT_RGB:
+			return "Wide Float";
+		case DP_PIXELFORMAT_YUV444:
+		case DP_PIXELFORMAT_YUV422:
+		case DP_PIXELFORMAT_YUV420:
+			return "xvYCC 601";
+		default:
+			return "Reserved";
+		}
+	case DP_COLORIMETRY_OPRGB: /* and DP_COLORIMETRY_XVYCC_709 */
+		switch (pixelformat) {
+		case DP_PIXELFORMAT_RGB:
+			return "OpRGB";
+		case DP_PIXELFORMAT_YUV444:
+		case DP_PIXELFORMAT_YUV422:
+		case DP_PIXELFORMAT_YUV420:
+			return "xvYCC 709";
+		default:
+			return "Reserved";
+		}
+	case DP_COLORIMETRY_DCI_P3_RGB: /* and DP_COLORIMETRY_SYCC_601 */
+		switch (pixelformat) {
+		case DP_PIXELFORMAT_RGB:
+			return "DCI-P3";
+		case DP_PIXELFORMAT_YUV444:
+		case DP_PIXELFORMAT_YUV422:
+		case DP_PIXELFORMAT_YUV420:
+			return "sYCC 601";
+		default:
+			return "Reserved";
+		}
+	case DP_COLORIMETRY_RGB_CUSTOM: /* and DP_COLORIMETRY_OPYCC_601 */
+		switch (pixelformat) {
+		case DP_PIXELFORMAT_RGB:
+			return "Custom Profile";
+		case DP_PIXELFORMAT_YUV444:
+		case DP_PIXELFORMAT_YUV422:
+		case DP_PIXELFORMAT_YUV420:
+			return "OpYCC 601";
+		default:
+			return "Reserved";
+		}
+	case DP_COLORIMETRY_BT2020_RGB: /* and DP_COLORIMETRY_BT2020_CYCC */
+		switch (pixelformat) {
+		case DP_PIXELFORMAT_RGB:
+			return "BT.2020 RGB";
+		case DP_PIXELFORMAT_YUV444:
+		case DP_PIXELFORMAT_YUV422:
+		case DP_PIXELFORMAT_YUV420:
+			return "BT.2020 CYCC";
+		default:
+			return "Reserved";
+		}
+	case DP_COLORIMETRY_BT2020_YCC:
+		switch (pixelformat) {
+		case DP_PIXELFORMAT_YUV444:
+		case DP_PIXELFORMAT_YUV422:
+		case DP_PIXELFORMAT_YUV420:
+			return "BT.2020 YCC";
+		default:
+			return "Reserved";
+		}
+	default:
+		return "Invalid";
+	}
+}
+
+static const char *dp_dynamic_range_get_name(enum dp_dynamic_range dynamic_range)
+{
+	switch (dynamic_range) {
+	case DP_DYNAMIC_RANGE_VESA:
+		return "VESA range";
+	case DP_DYNAMIC_RANGE_CTA:
+		return "CTA range";
+	default:
+		return "Invalid";
+	}
+}
+
+static const char *dp_content_type_get_name(enum dp_content_type content_type)
+{
+	switch (content_type) {
+	case DP_CONTENT_TYPE_NOT_DEFINED:
+		return "Not defined";
+	case DP_CONTENT_TYPE_GRAPHICS:
+		return "Graphics";
+	case DP_CONTENT_TYPE_PHOTO:
+		return "Photo";
+	case DP_CONTENT_TYPE_VIDEO:
+		return "Video";
+	case DP_CONTENT_TYPE_GAME:
+		return "Game";
+	default:
+		return "Reserved";
+	}
+}
+
+void drm_dp_vsc_sdp_log(const char *level, struct device *dev,
+			const struct drm_dp_vsc_sdp *vsc)
+{
+#define DP_SDP_LOG(fmt, ...) dev_printk(level, dev, fmt, ##__VA_ARGS__)
+	DP_SDP_LOG("DP SDP: %s, revision %u, length %u\n", "VSC",
+		   vsc->revision, vsc->length);
+	DP_SDP_LOG("    pixelformat: %s\n",
+		   dp_pixelformat_get_name(vsc->pixelformat));
+	DP_SDP_LOG("    colorimetry: %s\n",
+		   dp_colorimetry_get_name(vsc->pixelformat, vsc->colorimetry));
+	DP_SDP_LOG("    bpc: %u\n", vsc->bpc);
+	DP_SDP_LOG("    dynamic range: %s\n",
+		   dp_dynamic_range_get_name(vsc->dynamic_range));
+	DP_SDP_LOG("    content type: %s\n",
+		   dp_content_type_get_name(vsc->content_type));
+#undef DP_SDP_LOG
+}
+EXPORT_SYMBOL(drm_dp_vsc_sdp_log);
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 9d89ebf3a749..1e26b89628f9 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
+#include <linux/iopoll.h>
 
 #if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
 #include <linux/stacktrace.h>
@@ -687,51 +688,45 @@ static void drm_dp_encode_sideband_reply(struct drm_dp_sideband_msg_reply_body *
 	raw->cur_len = idx;
 }
 
-/* this adds a chunk of msg to the builder to get the final msg */
-static bool drm_dp_sideband_msg_build(struct drm_dp_sideband_msg_rx *msg,
-				      u8 *replybuf, u8 replybuflen, bool hdr)
+static int drm_dp_sideband_msg_set_header(struct drm_dp_sideband_msg_rx *msg,
+					  struct drm_dp_sideband_msg_hdr *hdr,
+					  u8 hdrlen)
 {
-	int ret;
-	u8 crc4;
+	/*
+	 * ignore out-of-order messages or messages that are part of a
+	 * failed transaction
+	 */
+	if (!hdr->somt && !msg->have_somt)
+		return false;
 
-	if (hdr) {
-		u8 hdrlen;
-		struct drm_dp_sideband_msg_hdr recv_hdr;
-		ret = drm_dp_decode_sideband_msg_hdr(&recv_hdr, replybuf, replybuflen, &hdrlen);
-		if (ret == false) {
-			print_hex_dump(KERN_DEBUG, "failed hdr", DUMP_PREFIX_NONE, 16, 1, replybuf, replybuflen, false);
-			return false;
-		}
+	/* get length contained in this portion */
+	msg->curchunk_idx = 0;
+	msg->curchunk_len = hdr->msg_len;
+	msg->curchunk_hdrlen = hdrlen;
 
-		/*
-		 * ignore out-of-order messages or messages that are part of a
-		 * failed transaction
-		 */
-		if (!recv_hdr.somt && !msg->have_somt)
-			return false;
+	/* we have already gotten an somt - don't bother parsing */
+	if (hdr->somt && msg->have_somt)
+		return false;
 
-		/* get length contained in this portion */
-		msg->curchunk_len = recv_hdr.msg_len;
-		msg->curchunk_hdrlen = hdrlen;
+	if (hdr->somt) {
+		memcpy(&msg->initial_hdr, hdr,
+		       sizeof(struct drm_dp_sideband_msg_hdr));
+		msg->have_somt = true;
+	}
+	if (hdr->eomt)
+		msg->have_eomt = true;
 
-		/* we have already gotten an somt - don't bother parsing */
-		if (recv_hdr.somt && msg->have_somt)
-			return false;
+	return true;
+}
 
-		if (recv_hdr.somt) {
-			memcpy(&msg->initial_hdr, &recv_hdr, sizeof(struct drm_dp_sideband_msg_hdr));
-			msg->have_somt = true;
-		}
-		if (recv_hdr.eomt)
-			msg->have_eomt = true;
+/* this adds a chunk of msg to the builder to get the final msg */
+static bool drm_dp_sideband_append_payload(struct drm_dp_sideband_msg_rx *msg,
+					   u8 *replybuf, u8 replybuflen)
+{
+	u8 crc4;
 
-		/* copy the bytes for the remainder of this header chunk */
-		msg->curchunk_idx = min(msg->curchunk_len, (u8)(replybuflen - hdrlen));
-		memcpy(&msg->chunk[0], replybuf + hdrlen, msg->curchunk_idx);
-	} else {
-		memcpy(&msg->chunk[msg->curchunk_idx], replybuf, replybuflen);
-		msg->curchunk_idx += replybuflen;
-	}
+	memcpy(&msg->chunk[msg->curchunk_idx], replybuf, replybuflen);
+	msg->curchunk_idx += replybuflen;
 
 	if (msg->curchunk_idx >= msg->curchunk_len) {
 		/* do CRC */
@@ -1060,13 +1055,12 @@ static void build_link_address(struct drm_dp_sideband_msg_tx *msg)
 	drm_dp_encode_sideband_req(&req, msg);
 }
 
-static int build_clear_payload_id_table(struct drm_dp_sideband_msg_tx *msg)
+static void build_clear_payload_id_table(struct drm_dp_sideband_msg_tx *msg)
 {
 	struct drm_dp_sideband_msg_req_body req;
 
 	req.req_type = DP_CLEAR_PAYLOAD_ID_TABLE;
 	drm_dp_encode_sideband_req(&req, msg);
-	return 0;
 }
 
 static int build_enum_path_resources(struct drm_dp_sideband_msg_tx *msg,
@@ -1203,16 +1197,8 @@ static int drm_dp_mst_wait_tx_reply(struct drm_dp_mst_branch *mstb,
 
 		/* remove from q */
 		if (txmsg->state == DRM_DP_SIDEBAND_TX_QUEUED ||
-		    txmsg->state == DRM_DP_SIDEBAND_TX_START_SEND) {
+		    txmsg->state == DRM_DP_SIDEBAND_TX_START_SEND)
 			list_del(&txmsg->next);
-		}
-
-		if (txmsg->state == DRM_DP_SIDEBAND_TX_START_SEND ||
-		    txmsg->state == DRM_DP_SIDEBAND_TX_SENT) {
-			mstb->tx_slots[txmsg->seqno] = NULL;
-		}
-		mgr->is_waiting_for_dwn_reply = false;
-
 	}
 out:
 	if (unlikely(ret == -EIO) && drm_debug_enabled(DRM_UT_DP)) {
@@ -2691,22 +2677,6 @@ static int set_hdr_from_dst_qlock(struct drm_dp_sideband_msg_hdr *hdr,
 	struct drm_dp_mst_branch *mstb = txmsg->dst;
 	u8 req_type;
 
-	/* both msg slots are full */
-	if (txmsg->seqno == -1) {
-		if (mstb->tx_slots[0] && mstb->tx_slots[1]) {
-			DRM_DEBUG_KMS("%s: failed to find slot\n", __func__);
-			return -EAGAIN;
-		}
-		if (mstb->tx_slots[0] == NULL && mstb->tx_slots[1] == NULL) {
-			txmsg->seqno = mstb->last_seqno;
-			mstb->last_seqno ^= 1;
-		} else if (mstb->tx_slots[0] == NULL)
-			txmsg->seqno = 0;
-		else
-			txmsg->seqno = 1;
-		mstb->tx_slots[txmsg->seqno] = txmsg;
-	}
-
 	req_type = txmsg->msg[0] & 0x7f;
 	if (req_type == DP_CONNECTION_STATUS_NOTIFY ||
 		req_type == DP_RESOURCE_STATUS_NOTIFY)
@@ -2718,7 +2688,7 @@ static int set_hdr_from_dst_qlock(struct drm_dp_sideband_msg_hdr *hdr,
 	hdr->lcr = mstb->lct - 1;
 	if (mstb->lct > 1)
 		memcpy(hdr->rad, mstb->rad, mstb->lct / 2);
-	hdr->seqno = txmsg->seqno;
+
 	return 0;
 }
 /*
@@ -2733,15 +2703,15 @@ static int process_single_tx_qlock(struct drm_dp_mst_topology_mgr *mgr,
 	int len, space, idx, tosend;
 	int ret;
 
+	if (txmsg->state == DRM_DP_SIDEBAND_TX_SENT)
+		return 0;
+
 	memset(&hdr, 0, sizeof(struct drm_dp_sideband_msg_hdr));
 
-	if (txmsg->state == DRM_DP_SIDEBAND_TX_QUEUED) {
-		txmsg->seqno = -1;
+	if (txmsg->state == DRM_DP_SIDEBAND_TX_QUEUED)
 		txmsg->state = DRM_DP_SIDEBAND_TX_START_SEND;
-	}
 
-	/* make hdr from dst mst - for replies use seqno
-	   otherwise assign one */
+	/* make hdr from dst mst */
 	ret = set_hdr_from_dst_qlock(&hdr, txmsg);
 	if (ret < 0)
 		return ret;
@@ -2794,42 +2764,17 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr)
 	if (list_empty(&mgr->tx_msg_downq))
 		return;
 
-	txmsg = list_first_entry(&mgr->tx_msg_downq, struct drm_dp_sideband_msg_tx, next);
+	txmsg = list_first_entry(&mgr->tx_msg_downq,
+				 struct drm_dp_sideband_msg_tx, next);
 	ret = process_single_tx_qlock(mgr, txmsg, false);
-	if (ret == 1) {
-		/* txmsg is sent it should be in the slots now */
-		mgr->is_waiting_for_dwn_reply = true;
-		list_del(&txmsg->next);
-	} else if (ret) {
+	if (ret < 0) {
 		DRM_DEBUG_KMS("failed to send msg in q %d\n", ret);
-		mgr->is_waiting_for_dwn_reply = false;
 		list_del(&txmsg->next);
-		if (txmsg->seqno != -1)
-			txmsg->dst->tx_slots[txmsg->seqno] = NULL;
 		txmsg->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
 		wake_up_all(&mgr->tx_waitq);
 	}
 }
 
-/* called holding qlock */
-static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr,
-				       struct drm_dp_sideband_msg_tx *txmsg)
-{
-	int ret;
-
-	/* construct a chunk from the first msg in the tx_msg queue */
-	ret = process_single_tx_qlock(mgr, txmsg, true);
-
-	if (ret != 1)
-		DRM_DEBUG_KMS("failed to send msg in q %d\n", ret);
-
-	if (txmsg->seqno != -1) {
-		WARN_ON((unsigned int)txmsg->seqno >
-			ARRAY_SIZE(txmsg->dst->tx_slots));
-		txmsg->dst->tx_slots[txmsg->seqno] = NULL;
-	}
-}
-
 static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
 				 struct drm_dp_sideband_msg_tx *txmsg)
 {
@@ -2842,8 +2787,7 @@ static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
 		drm_dp_mst_dump_sideband_msg_tx(&p, txmsg);
 	}
 
-	if (list_is_singular(&mgr->tx_msg_downq) &&
-	    !mgr->is_waiting_for_dwn_reply)
+	if (list_is_singular(&mgr->tx_msg_downq))
 		process_single_down_tx_qlock(mgr);
 	mutex_unlock(&mgr->qlock);
 }
@@ -3467,7 +3411,7 @@ static int drm_dp_encode_up_ack_reply(struct drm_dp_sideband_msg_tx *msg, u8 req
 
 static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr,
 				    struct drm_dp_mst_branch *mstb,
-				    int req_type, int seqno, bool broadcast)
+				    int req_type, bool broadcast)
 {
 	struct drm_dp_sideband_msg_tx *txmsg;
 
@@ -3476,13 +3420,11 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr,
 		return -ENOMEM;
 
 	txmsg->dst = mstb;
-	txmsg->seqno = seqno;
 	drm_dp_encode_up_ack_reply(txmsg, req_type);
 
 	mutex_lock(&mgr->qlock);
-
-	process_single_up_tx_qlock(mgr, txmsg);
-
+	/* construct a chunk from the first msg in the tx_msg queue */
+	process_single_tx_qlock(mgr, txmsg, true);
 	mutex_unlock(&mgr->qlock);
 
 	kfree(txmsg);
@@ -3707,31 +3649,63 @@ out_fail:
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume);
 
-static bool drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up)
+static bool
+drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up,
+		      struct drm_dp_mst_branch **mstb)
 {
 	int len;
 	u8 replyblock[32];
 	int replylen, curreply;
 	int ret;
-	struct drm_dp_sideband_msg_rx *msg;
-	int basereg = up ? DP_SIDEBAND_MSG_UP_REQ_BASE : DP_SIDEBAND_MSG_DOWN_REP_BASE;
-	msg = up ? &mgr->up_req_recv : &mgr->down_rep_recv;
+	u8 hdrlen;
+	struct drm_dp_sideband_msg_hdr hdr;
+	struct drm_dp_sideband_msg_rx *msg =
+		up ? &mgr->up_req_recv : &mgr->down_rep_recv;
+	int basereg = up ? DP_SIDEBAND_MSG_UP_REQ_BASE :
+			   DP_SIDEBAND_MSG_DOWN_REP_BASE;
+
+	if (!up)
+		*mstb = NULL;
 
 	len = min(mgr->max_dpcd_transaction_bytes, 16);
-	ret = drm_dp_dpcd_read(mgr->aux, basereg,
-			       replyblock, len);
+	ret = drm_dp_dpcd_read(mgr->aux, basereg, replyblock, len);
 	if (ret != len) {
 		DRM_DEBUG_KMS("failed to read DPCD down rep %d %d\n", len, ret);
 		return false;
 	}
-	ret = drm_dp_sideband_msg_build(msg, replyblock, len, true);
+
+	ret = drm_dp_decode_sideband_msg_hdr(&hdr, replyblock, len, &hdrlen);
+	if (ret == false) {
+		print_hex_dump(KERN_DEBUG, "failed hdr", DUMP_PREFIX_NONE, 16,
+			       1, replyblock, len, false);
+		DRM_DEBUG_KMS("ERROR: failed header\n");
+		return false;
+	}
+
+	if (!up) {
+		/* Caller is responsible for giving back this reference */
+		*mstb = drm_dp_get_mst_branch_device(mgr, hdr.lct, hdr.rad);
+		if (!*mstb) {
+			DRM_DEBUG_KMS("Got MST reply from unknown device %d\n",
+				      hdr.lct);
+			return false;
+		}
+	}
+
+	if (!drm_dp_sideband_msg_set_header(msg, &hdr, hdrlen)) {
+		DRM_DEBUG_KMS("sideband msg set header failed %d\n",
+			      replyblock[0]);
+		return false;
+	}
+
+	replylen = min(msg->curchunk_len, (u8)(len - hdrlen));
+	ret = drm_dp_sideband_append_payload(msg, replyblock + hdrlen, replylen);
 	if (!ret) {
 		DRM_DEBUG_KMS("sideband msg build failed %d\n", replyblock[0]);
 		return false;
 	}
-	replylen = msg->curchunk_len + msg->curchunk_hdrlen;
 
-	replylen -= len;
+	replylen = msg->curchunk_len + msg->curchunk_hdrlen - len;
 	curreply = len;
 	while (replylen > 0) {
 		len = min3(replylen, mgr->max_dpcd_transaction_bytes, 16);
@@ -3743,7 +3717,7 @@ static bool drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up)
 			return false;
 		}
 
-		ret = drm_dp_sideband_msg_build(msg, replyblock, len, false);
+		ret = drm_dp_sideband_append_payload(msg, replyblock, len);
 		if (!ret) {
 			DRM_DEBUG_KMS("failed to build sideband msg\n");
 			return false;
@@ -3758,67 +3732,60 @@ static bool drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up)
 static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
 {
 	struct drm_dp_sideband_msg_tx *txmsg;
-	struct drm_dp_mst_branch *mstb;
-	struct drm_dp_sideband_msg_hdr *hdr = &mgr->down_rep_recv.initial_hdr;
-	int slot = -1;
-
-	if (!drm_dp_get_one_sb_msg(mgr, false))
-		goto clear_down_rep_recv;
+	struct drm_dp_mst_branch *mstb = NULL;
+	struct drm_dp_sideband_msg_rx *msg = &mgr->down_rep_recv;
 
-	if (!mgr->down_rep_recv.have_eomt)
-		return 0;
+	if (!drm_dp_get_one_sb_msg(mgr, false, &mstb))
+		goto out;
 
-	mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad);
-	if (!mstb) {
-		DRM_DEBUG_KMS("Got MST reply from unknown device %d\n",
-			      hdr->lct);
-		goto clear_down_rep_recv;
-	}
+	/* Multi-packet message transmission, don't clear the reply */
+	if (!msg->have_eomt)
+		goto out;
 
 	/* find the message */
-	slot = hdr->seqno;
 	mutex_lock(&mgr->qlock);
-	txmsg = mstb->tx_slots[slot];
-	/* remove from slots */
+	txmsg = list_first_entry_or_null(&mgr->tx_msg_downq,
+					 struct drm_dp_sideband_msg_tx, next);
 	mutex_unlock(&mgr->qlock);
 
-	if (!txmsg) {
+	/* Were we actually expecting a response, and from this mstb? */
+	if (!txmsg || txmsg->dst != mstb) {
+		struct drm_dp_sideband_msg_hdr *hdr;
+		hdr = &msg->initial_hdr;
 		DRM_DEBUG_KMS("Got MST reply with no msg %p %d %d %02x %02x\n",
 			      mstb, hdr->seqno, hdr->lct, hdr->rad[0],
-			      mgr->down_rep_recv.msg[0]);
-		goto no_msg;
+			      msg->msg[0]);
+		goto out_clear_reply;
 	}
 
-	drm_dp_sideband_parse_reply(&mgr->down_rep_recv, &txmsg->reply);
+	drm_dp_sideband_parse_reply(msg, &txmsg->reply);
 
-	if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK)
+	if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK) {
 		DRM_DEBUG_KMS("Got NAK reply: req 0x%02x (%s), reason 0x%02x (%s), nak data 0x%02x\n",
 			      txmsg->reply.req_type,
 			      drm_dp_mst_req_type_str(txmsg->reply.req_type),
 			      txmsg->reply.u.nak.reason,
 			      drm_dp_mst_nak_reason_str(txmsg->reply.u.nak.reason),
 			      txmsg->reply.u.nak.nak_data);
+	}
 
-	memset(&mgr->down_rep_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
+	memset(msg, 0, sizeof(struct drm_dp_sideband_msg_rx));
 	drm_dp_mst_topology_put_mstb(mstb);
 
 	mutex_lock(&mgr->qlock);
 	txmsg->state = DRM_DP_SIDEBAND_TX_RX;
-	mstb->tx_slots[slot] = NULL;
-	mgr->is_waiting_for_dwn_reply = false;
+	list_del(&txmsg->next);
 	mutex_unlock(&mgr->qlock);
 
 	wake_up_all(&mgr->tx_waitq);
 
 	return 0;
 
-no_msg:
-	drm_dp_mst_topology_put_mstb(mstb);
-clear_down_rep_recv:
-	mutex_lock(&mgr->qlock);
-	mgr->is_waiting_for_dwn_reply = false;
-	mutex_unlock(&mgr->qlock);
-	memset(&mgr->down_rep_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
+out_clear_reply:
+	memset(msg, 0, sizeof(struct drm_dp_sideband_msg_rx));
+out:
+	if (mstb)
+		drm_dp_mst_topology_put_mstb(mstb);
 
 	return 0;
 }
@@ -3894,11 +3861,9 @@ static void drm_dp_mst_up_req_work(struct work_struct *work)
 
 static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
 {
-	struct drm_dp_sideband_msg_hdr *hdr = &mgr->up_req_recv.initial_hdr;
 	struct drm_dp_pending_up_req *up_req;
-	bool seqno;
 
-	if (!drm_dp_get_one_sb_msg(mgr, true))
+	if (!drm_dp_get_one_sb_msg(mgr, true, NULL))
 		goto out;
 
 	if (!mgr->up_req_recv.have_eomt)
@@ -3911,7 +3876,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
 	}
 	INIT_LIST_HEAD(&up_req->next);
 
-	seqno = hdr->seqno;
 	drm_dp_sideband_parse_req(&mgr->up_req_recv, &up_req->msg);
 
 	if (up_req->msg.req_type != DP_CONNECTION_STATUS_NOTIFY &&
@@ -3923,7 +3887,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
 	}
 
 	drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, up_req->msg.req_type,
-				 seqno, false);
+				 false);
 
 	if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) {
 		const struct drm_dp_connection_status_notify *conn_stat =
@@ -3945,7 +3909,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
 			      res_stat->available_pbn);
 	}
 
-	up_req->hdr = *hdr;
+	up_req->hdr = mgr->up_req_recv.initial_hdr;
 	mutex_lock(&mgr->up_req_lock);
 	list_add_tail(&up_req->next, &mgr->up_req_list);
 	mutex_unlock(&mgr->up_req_lock);
@@ -4051,27 +4015,6 @@ out:
 EXPORT_SYMBOL(drm_dp_mst_detect_port);
 
 /**
- * drm_dp_mst_port_has_audio() - Check whether port has audio capability or not
- * @mgr: manager for this port
- * @port: unverified pointer to a port.
- *
- * This returns whether the port supports audio or not.
- */
-bool drm_dp_mst_port_has_audio(struct drm_dp_mst_topology_mgr *mgr,
-					struct drm_dp_mst_port *port)
-{
-	bool ret = false;
-
-	port = drm_dp_mst_topology_get_port_validated(mgr, port);
-	if (!port)
-		return ret;
-	ret = port->has_audio;
-	drm_dp_mst_topology_put_port(port);
-	return ret;
-}
-EXPORT_SYMBOL(drm_dp_mst_port_has_audio);
-
-/**
  * drm_dp_mst_get_edid() - get EDID for an MST port
  * @connector: toplevel connector to get EDID for
  * @mgr: manager for this port
@@ -4448,42 +4391,58 @@ fail:
 	return ret;
 }
 
+static int do_get_act_status(struct drm_dp_aux *aux)
+{
+	int ret;
+	u8 status;
+
+	ret = drm_dp_dpcd_readb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status);
+	if (ret < 0)
+		return ret;
+
+	return status;
+}
 
 /**
- * drm_dp_check_act_status() - Check ACT handled status.
+ * drm_dp_check_act_status() - Polls for ACT handled status.
  * @mgr: manager to use
  *
- * Check the payload status bits in the DPCD for ACT handled completion.
+ * Tries waiting for the MST hub to finish updating it's payload table by
+ * polling for the ACT handled bit for up to 3 seconds (yes-some hubs really
+ * take that long).
+ *
+ * Returns:
+ * 0 if the ACT was handled in time, negative error code on failure.
  */
 int drm_dp_check_act_status(struct drm_dp_mst_topology_mgr *mgr)
 {
-	u8 status;
-	int ret;
-	int count = 0;
-
-	do {
-		ret = drm_dp_dpcd_readb(mgr->aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status);
-
-		if (ret < 0) {
-			DRM_DEBUG_KMS("failed to read payload table status %d\n", ret);
-			goto fail;
-		}
-
-		if (status & DP_PAYLOAD_ACT_HANDLED)
-			break;
-		count++;
-		udelay(100);
-
-	} while (count < 30);
-
-	if (!(status & DP_PAYLOAD_ACT_HANDLED)) {
-		DRM_DEBUG_KMS("failed to get ACT bit %d after %d retries\n", status, count);
-		ret = -EINVAL;
-		goto fail;
+	/*
+	 * There doesn't seem to be any recommended retry count or timeout in
+	 * the MST specification. Since some hubs have been observed to take
+	 * over 1 second to update their payload allocations under certain
+	 * conditions, we use a rather large timeout value.
+	 */
+	const int timeout_ms = 3000;
+	int ret, status;
+
+	ret = readx_poll_timeout(do_get_act_status, mgr->aux, status,
+				 status & DP_PAYLOAD_ACT_HANDLED || status < 0,
+				 200, timeout_ms * USEC_PER_MSEC);
+	if (ret < 0 && status >= 0) {
+		DRM_ERROR("Failed to get ACT after %dms, last status: %02x\n",
+			  timeout_ms, status);
+		return -EINVAL;
+	} else if (status < 0) {
+		/*
+		 * Failure here isn't unexpected - the hub may have
+		 * just been unplugged
+		 */
+		DRM_DEBUG_KMS("Failed to read payload table status: %d\n",
+			      status);
+		return status;
 	}
+
 	return 0;
-fail:
-	return ret;
 }
 EXPORT_SYMBOL(drm_dp_check_act_status);
 
@@ -4674,28 +4633,18 @@ static void drm_dp_tx_work(struct work_struct *work)
 	struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, tx_work);
 
 	mutex_lock(&mgr->qlock);
-	if (!list_empty(&mgr->tx_msg_downq) && !mgr->is_waiting_for_dwn_reply)
+	if (!list_empty(&mgr->tx_msg_downq))
 		process_single_down_tx_qlock(mgr);
 	mutex_unlock(&mgr->qlock);
 }
 
-static inline void drm_dp_destroy_connector(struct drm_dp_mst_port *port)
+static inline void
+drm_dp_delayed_destroy_port(struct drm_dp_mst_port *port)
 {
-	if (!port->connector)
-		return;
-
-	if (port->mgr->cbs->destroy_connector) {
-		port->mgr->cbs->destroy_connector(port->mgr, port->connector);
-	} else {
+	if (port->connector) {
 		drm_connector_unregister(port->connector);
 		drm_connector_put(port->connector);
 	}
-}
-
-static inline void
-drm_dp_delayed_destroy_port(struct drm_dp_mst_port *port)
-{
-	drm_dp_destroy_connector(port);
 
 	drm_dp_port_set_pdt(port, DP_PEER_DEVICE_NONE, port->mcs);
 	drm_dp_mst_put_port_malloc(port);
@@ -4705,26 +4654,25 @@ static inline void
 drm_dp_delayed_destroy_mstb(struct drm_dp_mst_branch *mstb)
 {
 	struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
-	struct drm_dp_mst_port *port, *tmp;
+	struct drm_dp_mst_port *port, *port_tmp;
+	struct drm_dp_sideband_msg_tx *txmsg, *txmsg_tmp;
 	bool wake_tx = false;
 
 	mutex_lock(&mgr->lock);
-	list_for_each_entry_safe(port, tmp, &mstb->ports, next) {
+	list_for_each_entry_safe(port, port_tmp, &mstb->ports, next) {
 		list_del(&port->next);
 		drm_dp_mst_topology_put_port(port);
 	}
 	mutex_unlock(&mgr->lock);
 
-	/* drop any tx slots msg */
+	/* drop any tx slot msg */
 	mutex_lock(&mstb->mgr->qlock);
-	if (mstb->tx_slots[0]) {
-		mstb->tx_slots[0]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
-		mstb->tx_slots[0] = NULL;
-		wake_tx = true;
-	}
-	if (mstb->tx_slots[1]) {
-		mstb->tx_slots[1]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
-		mstb->tx_slots[1] = NULL;
+	list_for_each_entry_safe(txmsg, txmsg_tmp, &mgr->tx_msg_downq, next) {
+		if (txmsg->dst != mstb)
+			continue;
+
+		txmsg->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
+		list_del(&txmsg->next);
 		wake_tx = true;
 	}
 	mutex_unlock(&mstb->mgr->qlock);
@@ -5499,7 +5447,7 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port)
 {
 	struct drm_dp_mst_port *immediate_upstream_port;
 	struct drm_dp_mst_port *fec_port;
-	struct drm_dp_desc desc = { 0 };
+	struct drm_dp_desc desc = { };
 	u8 endpoint_fec;
 	u8 endpoint_dsc;
 
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 7b1a628d1f6e..bc38322f306e 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -39,6 +39,7 @@
 #include <drm/drm_color_mgmt.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mode_object.h>
 #include <drm/drm_print.h>
 
@@ -92,13 +93,27 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev,
 	}
 }
 
+static void drm_minor_alloc_release(struct drm_device *dev, void *data)
+{
+	struct drm_minor *minor = data;
+	unsigned long flags;
+
+	WARN_ON(dev != minor->dev);
+
+	put_device(minor->kdev);
+
+	spin_lock_irqsave(&drm_minor_lock, flags);
+	idr_remove(&drm_minors_idr, minor->index);
+	spin_unlock_irqrestore(&drm_minor_lock, flags);
+}
+
 static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
 {
 	struct drm_minor *minor;
 	unsigned long flags;
 	int r;
 
-	minor = kzalloc(sizeof(*minor), GFP_KERNEL);
+	minor = drmm_kzalloc(dev, sizeof(*minor), GFP_KERNEL);
 	if (!minor)
 		return -ENOMEM;
 
@@ -116,46 +131,20 @@ static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
 	idr_preload_end();
 
 	if (r < 0)
-		goto err_free;
+		return r;
 
 	minor->index = r;
 
+	r = drmm_add_action_or_reset(dev, drm_minor_alloc_release, minor);
+	if (r)
+		return r;
+
 	minor->kdev = drm_sysfs_minor_alloc(minor);
-	if (IS_ERR(minor->kdev)) {
-		r = PTR_ERR(minor->kdev);
-		goto err_index;
-	}
+	if (IS_ERR(minor->kdev))
+		return PTR_ERR(minor->kdev);
 
 	*drm_minor_get_slot(dev, type) = minor;
 	return 0;
-
-err_index:
-	spin_lock_irqsave(&drm_minor_lock, flags);
-	idr_remove(&drm_minors_idr, minor->index);
-	spin_unlock_irqrestore(&drm_minor_lock, flags);
-err_free:
-	kfree(minor);
-	return r;
-}
-
-static void drm_minor_free(struct drm_device *dev, unsigned int type)
-{
-	struct drm_minor **slot, *minor;
-	unsigned long flags;
-
-	slot = drm_minor_get_slot(dev, type);
-	minor = *slot;
-	if (!minor)
-		return;
-
-	put_device(minor->kdev);
-
-	spin_lock_irqsave(&drm_minor_lock, flags);
-	idr_remove(&drm_minors_idr, minor->index);
-	spin_unlock_irqrestore(&drm_minor_lock, flags);
-
-	kfree(minor);
-	*slot = NULL;
 }
 
 static int drm_minor_register(struct drm_device *dev, unsigned int type)
@@ -270,17 +259,22 @@ void drm_minor_release(struct drm_minor *minor)
  * any other resources allocated at device initialization and drop the driver's
  * reference to &drm_device using drm_dev_put().
  *
- * Note that the lifetime rules for &drm_device instance has still a lot of
- * historical baggage. Hence use the reference counting provided by
- * drm_dev_get() and drm_dev_put() only carefully.
+ * Note that any allocation or resource which is visible to userspace must be
+ * released only when the final drm_dev_put() is called, and not when the
+ * driver is unbound from the underlying physical struct &device. Best to use
+ * &drm_device managed resources with drmm_add_action(), drmm_kmalloc() and
+ * related functions.
+ *
+ * devres managed resources like devm_kmalloc() can only be used for resources
+ * directly related to the underlying hardware device, and only used in code
+ * paths fully protected by drm_dev_enter() and drm_dev_exit().
  *
  * Display driver example
  * ~~~~~~~~~~~~~~~~~~~~~~
  *
  * The following example shows a typical structure of a DRM display driver.
  * The example focus on the probe() function and the other functions that is
- * almost always present and serves as a demonstration of devm_drm_dev_init()
- * usage with its accompanying drm_driver->release callback.
+ * almost always present and serves as a demonstration of devm_drm_dev_init().
  *
  * .. code-block:: c
  *
@@ -290,19 +284,8 @@ void drm_minor_release(struct drm_minor *minor)
  *		struct clk *pclk;
  *	};
  *
- *	static void driver_drm_release(struct drm_device *drm)
- *	{
- *		struct driver_device *priv = container_of(...);
- *
- *		drm_mode_config_cleanup(drm);
- *		drm_dev_fini(drm);
- *		kfree(priv->userspace_facing);
- *		kfree(priv);
- *	}
- *
  *	static struct drm_driver driver_drm_driver = {
  *		[...]
- *		.release = driver_drm_release,
  *	};
  *
  *	static int driver_probe(struct platform_device *pdev)
@@ -322,13 +305,16 @@ void drm_minor_release(struct drm_minor *minor)
  *
  *		ret = devm_drm_dev_init(&pdev->dev, drm, &driver_drm_driver);
  *		if (ret) {
- *			kfree(drm);
+ *			kfree(priv);
  *			return ret;
  *		}
+ *		drmm_add_final_kfree(drm, priv);
  *
- *		drm_mode_config_init(drm);
+ *		ret = drmm_mode_config_init(drm);
+ *		if (ret)
+ *			return ret;
  *
- *		priv->userspace_facing = kzalloc(..., GFP_KERNEL);
+ *		priv->userspace_facing = drmm_kzalloc(..., GFP_KERNEL);
  *		if (!priv->userspace_facing)
  *			return -ENOMEM;
  *
@@ -580,6 +566,23 @@ static void drm_fs_inode_free(struct inode *inode)
  *    used.
  */
 
+static void drm_dev_init_release(struct drm_device *dev, void *res)
+{
+	drm_legacy_ctxbitmap_cleanup(dev);
+	drm_legacy_remove_map_hash(dev);
+	drm_fs_inode_free(dev->anon_inode);
+
+	put_device(dev->dev);
+	/* Prevent use-after-free in drm_managed_release when debugging is
+	 * enabled. Slightly awkward, but can't really be helped. */
+	dev->dev = NULL;
+	mutex_destroy(&dev->master_mutex);
+	mutex_destroy(&dev->clientlist_mutex);
+	mutex_destroy(&dev->filelist_mutex);
+	mutex_destroy(&dev->struct_mutex);
+	drm_legacy_destroy_members(dev);
+}
+
 /**
  * drm_dev_init - Initialise new DRM device
  * @dev: DRM device
@@ -608,6 +611,9 @@ static void drm_fs_inode_free(struct inode *inode)
  * arbitrary offset, you must supply a &drm_driver.release callback and control
  * the finalization explicitly.
  *
+ * Note that drivers must call drmm_add_final_kfree() after this function has
+ * completed successfully.
+ *
  * RETURNS:
  * 0 on success, or error code on failure.
  */
@@ -629,6 +635,9 @@ int drm_dev_init(struct drm_device *dev,
 	dev->dev = get_device(parent);
 	dev->driver = driver;
 
+	INIT_LIST_HEAD(&dev->managed.resources);
+	spin_lock_init(&dev->managed.lock);
+
 	/* no per-device feature limits by default */
 	dev->driver_features = ~0u;
 
@@ -644,26 +653,30 @@ int drm_dev_init(struct drm_device *dev,
 	mutex_init(&dev->clientlist_mutex);
 	mutex_init(&dev->master_mutex);
 
+	ret = drmm_add_action(dev, drm_dev_init_release, NULL);
+	if (ret)
+		return ret;
+
 	dev->anon_inode = drm_fs_inode_new();
 	if (IS_ERR(dev->anon_inode)) {
 		ret = PTR_ERR(dev->anon_inode);
 		DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret);
-		goto err_free;
+		goto err;
 	}
 
 	if (drm_core_check_feature(dev, DRIVER_RENDER)) {
 		ret = drm_minor_alloc(dev, DRM_MINOR_RENDER);
 		if (ret)
-			goto err_minors;
+			goto err;
 	}
 
 	ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY);
 	if (ret)
-		goto err_minors;
+		goto err;
 
 	ret = drm_legacy_create_map_hash(dev);
 	if (ret)
-		goto err_minors;
+		goto err;
 
 	drm_legacy_ctxbitmap_init(dev);
 
@@ -671,33 +684,19 @@ int drm_dev_init(struct drm_device *dev,
 		ret = drm_gem_init(dev);
 		if (ret) {
 			DRM_ERROR("Cannot initialize graphics execution manager (GEM)\n");
-			goto err_ctxbitmap;
+			goto err;
 		}
 	}
 
 	ret = drm_dev_set_unique(dev, dev_name(parent));
 	if (ret)
-		goto err_setunique;
+		goto err;
 
 	return 0;
 
-err_setunique:
-	if (drm_core_check_feature(dev, DRIVER_GEM))
-		drm_gem_destroy(dev);
-err_ctxbitmap:
-	drm_legacy_ctxbitmap_cleanup(dev);
-	drm_legacy_remove_map_hash(dev);
-err_minors:
-	drm_minor_free(dev, DRM_MINOR_PRIMARY);
-	drm_minor_free(dev, DRM_MINOR_RENDER);
-	drm_fs_inode_free(dev->anon_inode);
-err_free:
-	put_device(dev->dev);
-	mutex_destroy(&dev->master_mutex);
-	mutex_destroy(&dev->clientlist_mutex);
-	mutex_destroy(&dev->filelist_mutex);
-	mutex_destroy(&dev->struct_mutex);
-	drm_legacy_destroy_members(dev);
+err:
+	drm_managed_release(dev);
+
 	return ret;
 }
 EXPORT_SYMBOL(drm_dev_init);
@@ -714,8 +713,10 @@ static void devm_drm_dev_init_release(void *data)
  * @driver: DRM driver
  *
  * Managed drm_dev_init(). The DRM device initialized with this function is
- * automatically put on driver detach using drm_dev_put(). You must supply a
- * &drm_driver.release callback to control the finalization explicitly.
+ * automatically put on driver detach using drm_dev_put().
+ *
+ * Note that drivers must call drmm_add_final_kfree() after this function has
+ * completed successfully.
  *
  * RETURNS:
  * 0 on success, or error code on failure.
@@ -726,9 +727,6 @@ int devm_drm_dev_init(struct device *parent,
 {
 	int ret;
 
-	if (WARN_ON(!driver->release))
-		return -EINVAL;
-
 	ret = drm_dev_init(dev, driver, parent);
 	if (ret)
 		return ret;
@@ -741,42 +739,28 @@ int devm_drm_dev_init(struct device *parent,
 }
 EXPORT_SYMBOL(devm_drm_dev_init);
 
-/**
- * drm_dev_fini - Finalize a dead DRM device
- * @dev: DRM device
- *
- * Finalize a dead DRM device. This is the converse to drm_dev_init() and
- * frees up all data allocated by it. All driver private data should be
- * finalized first. Note that this function does not free the @dev, that is
- * left to the caller.
- *
- * The ref-count of @dev must be zero, and drm_dev_fini() should only be called
- * from a &drm_driver.release callback.
- */
-void drm_dev_fini(struct drm_device *dev)
+void *__devm_drm_dev_alloc(struct device *parent, struct drm_driver *driver,
+			   size_t size, size_t offset)
 {
-	drm_vblank_cleanup(dev);
-
-	if (drm_core_check_feature(dev, DRIVER_GEM))
-		drm_gem_destroy(dev);
-
-	drm_legacy_ctxbitmap_cleanup(dev);
-	drm_legacy_remove_map_hash(dev);
-	drm_fs_inode_free(dev->anon_inode);
+	void *container;
+	struct drm_device *drm;
+	int ret;
 
-	drm_minor_free(dev, DRM_MINOR_PRIMARY);
-	drm_minor_free(dev, DRM_MINOR_RENDER);
+	container = kzalloc(size, GFP_KERNEL);
+	if (!container)
+		return ERR_PTR(-ENOMEM);
 
-	put_device(dev->dev);
+	drm = container + offset;
+	ret = devm_drm_dev_init(parent, drm, driver);
+	if (ret) {
+		kfree(container);
+		return ERR_PTR(ret);
+	}
+	drmm_add_final_kfree(drm, container);
 
-	mutex_destroy(&dev->master_mutex);
-	mutex_destroy(&dev->clientlist_mutex);
-	mutex_destroy(&dev->filelist_mutex);
-	mutex_destroy(&dev->struct_mutex);
-	drm_legacy_destroy_members(dev);
-	kfree(dev->unique);
+	return container;
 }
-EXPORT_SYMBOL(drm_dev_fini);
+EXPORT_SYMBOL(__devm_drm_dev_alloc);
 
 /**
  * drm_dev_alloc - Allocate new DRM device
@@ -816,6 +800,8 @@ struct drm_device *drm_dev_alloc(struct drm_driver *driver,
 		return ERR_PTR(ret);
 	}
 
+	drmm_add_final_kfree(dev, dev);
+
 	return dev;
 }
 EXPORT_SYMBOL(drm_dev_alloc);
@@ -824,12 +810,13 @@ static void drm_dev_release(struct kref *ref)
 {
 	struct drm_device *dev = container_of(ref, struct drm_device, ref);
 
-	if (dev->driver->release) {
+	if (dev->driver->release)
 		dev->driver->release(dev);
-	} else {
-		drm_dev_fini(dev);
-		kfree(dev);
-	}
+
+	drm_managed_release(dev);
+
+	if (dev->managed.final_kfree)
+		kfree(dev->managed.final_kfree);
 }
 
 /**
@@ -946,6 +933,11 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
 	struct drm_driver *driver = dev->driver;
 	int ret;
 
+	if (!driver->load)
+		drm_mode_config_validate(dev);
+
+	WARN_ON(!dev->managed.final_kfree);
+
 	if (drm_dev_needs_global_mutex(dev))
 		mutex_lock(&drm_global_mutex);
 
@@ -1046,8 +1038,8 @@ EXPORT_SYMBOL(drm_dev_unregister);
  */
 int drm_dev_set_unique(struct drm_device *dev, const char *name)
 {
-	kfree(dev->unique);
-	dev->unique = kstrdup(name, GFP_KERNEL);
+	drmm_kfree(dev, dev->unique);
+	dev->unique = drmm_kstrdup(dev, name, GFP_KERNEL);
 
 	return dev->unique ? 0 : -ENOMEM;
 }
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index d96e3ce3e535..fed653f13c26 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1584,8 +1584,6 @@ module_param_named(edid_fixup, edid_fixup, int, 0400);
 MODULE_PARM_DESC(edid_fixup,
 		 "Minimum number of valid EDID header bytes (0-8, default 6)");
 
-static void drm_get_displayid(struct drm_connector *connector,
-			      struct edid *edid);
 static int validate_displayid(u8 *displayid, int length, int idx);
 
 static int drm_edid_block_checksum(const u8 *raw_edid)
@@ -2019,18 +2017,13 @@ EXPORT_SYMBOL(drm_probe_ddc);
 struct edid *drm_get_edid(struct drm_connector *connector,
 			  struct i2c_adapter *adapter)
 {
-	struct edid *edid;
-
 	if (connector->force == DRM_FORCE_OFF)
 		return NULL;
 
 	if (connector->force == DRM_FORCE_UNSPECIFIED && !drm_probe_ddc(adapter))
 		return NULL;
 
-	edid = drm_do_get_edid(connector, drm_do_probe_ddc_edid, adapter);
-	if (edid)
-		drm_get_displayid(connector, edid);
-	return edid;
+	return drm_do_get_edid(connector, drm_do_probe_ddc_edid, adapter);
 }
 EXPORT_SYMBOL(drm_get_edid);
 
@@ -2388,6 +2381,14 @@ bad_std_timing(u8 a, u8 b)
 	       (a == 0x20 && b == 0x20);
 }
 
+static int drm_mode_hsync(const struct drm_display_mode *mode)
+{
+	if (mode->htotal <= 0)
+		return 0;
+
+	return DIV_ROUND_CLOSEST(mode->clock, mode->htotal);
+}
+
 /**
  * drm_mode_std - convert standard mode info (width, height, refresh) into mode
  * @connector: connector of for the EDID block
@@ -3213,16 +3214,33 @@ static u8 *drm_find_edid_extension(const struct edid *edid, int ext_id)
 }
 
 
-static u8 *drm_find_displayid_extension(const struct edid *edid)
+static u8 *drm_find_displayid_extension(const struct edid *edid,
+					int *length, int *idx)
 {
-	return drm_find_edid_extension(edid, DISPLAYID_EXT);
+	u8 *displayid = drm_find_edid_extension(edid, DISPLAYID_EXT);
+	struct displayid_hdr *base;
+	int ret;
+
+	if (!displayid)
+		return NULL;
+
+	/* EDID extensions block checksum isn't for us */
+	*length = EDID_LENGTH - 1;
+	*idx = 1;
+
+	ret = validate_displayid(displayid, *length, *idx);
+	if (ret)
+		return NULL;
+
+	base = (struct displayid_hdr *)&displayid[*idx];
+	*length = *idx + sizeof(*base) + base->bytes;
+
+	return displayid;
 }
 
 static u8 *drm_find_cea_extension(const struct edid *edid)
 {
-	int ret;
-	int idx = 1;
-	int length = EDID_LENGTH;
+	int length, idx;
 	struct displayid_block *block;
 	u8 *cea;
 	u8 *displayid;
@@ -3233,14 +3251,10 @@ static u8 *drm_find_cea_extension(const struct edid *edid)
 		return cea;
 
 	/* CEA blocks can also be found embedded in a DisplayID block */
-	displayid = drm_find_displayid_extension(edid);
+	displayid = drm_find_displayid_extension(edid, &length, &idx);
 	if (!displayid)
 		return NULL;
 
-	ret = validate_displayid(displayid, length, idx);
-	if (ret)
-		return NULL;
-
 	idx += sizeof(struct displayid_hdr);
 	for_each_displayid_db(displayid, block, idx, length) {
 		if (block->tag == DATA_BLOCK_CTA) {
@@ -5085,7 +5099,7 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi
 
 static int validate_displayid(u8 *displayid, int length, int idx)
 {
-	int i;
+	int i, dispid_length;
 	u8 csum = 0;
 	struct displayid_hdr *base;
 
@@ -5094,15 +5108,18 @@ static int validate_displayid(u8 *displayid, int length, int idx)
 	DRM_DEBUG_KMS("base revision 0x%x, length %d, %d %d\n",
 		      base->rev, base->bytes, base->prod_id, base->ext_count);
 
-	if (base->bytes + 5 > length - idx)
+	/* +1 for DispID checksum */
+	dispid_length = sizeof(*base) + base->bytes + 1;
+	if (dispid_length > length - idx)
 		return -EINVAL;
-	for (i = idx; i <= base->bytes + 5; i++) {
-		csum += displayid[i];
-	}
+
+	for (i = 0; i < dispid_length; i++)
+		csum += displayid[idx + i];
 	if (csum) {
 		DRM_NOTE("DisplayID checksum invalid, remainder is %d\n", csum);
 		return -EINVAL;
 	}
+
 	return 0;
 }
 
@@ -5181,20 +5198,14 @@ static int add_displayid_detailed_modes(struct drm_connector *connector,
 					struct edid *edid)
 {
 	u8 *displayid;
-	int ret;
-	int idx = 1;
-	int length = EDID_LENGTH;
+	int length, idx;
 	struct displayid_block *block;
 	int num_modes = 0;
 
-	displayid = drm_find_displayid_extension(edid);
+	displayid = drm_find_displayid_extension(edid, &length, &idx);
 	if (!displayid)
 		return 0;
 
-	ret = validate_displayid(displayid, length, idx);
-	if (ret)
-		return 0;
-
 	idx += sizeof(struct displayid_hdr);
 	for_each_displayid_db(displayid, block, idx, length) {
 		switch (block->tag) {
@@ -5783,9 +5794,9 @@ drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame,
 EXPORT_SYMBOL(drm_hdmi_vendor_infoframe_from_display_mode);
 
 static int drm_parse_tiled_block(struct drm_connector *connector,
-				 struct displayid_block *block)
+				 const struct displayid_block *block)
 {
-	struct displayid_tiled_block *tile = (struct displayid_tiled_block *)block;
+	const struct displayid_tiled_block *tile = (struct displayid_tiled_block *)block;
 	u16 w, h;
 	u8 tile_v_loc, tile_h_loc;
 	u8 num_v_tile, num_h_tile;
@@ -5836,22 +5847,12 @@ static int drm_parse_tiled_block(struct drm_connector *connector,
 	return 0;
 }
 
-static int drm_parse_display_id(struct drm_connector *connector,
-				u8 *displayid, int length,
-				bool is_edid_extension)
+static int drm_displayid_parse_tiled(struct drm_connector *connector,
+				     const u8 *displayid, int length, int idx)
 {
-	/* if this is an EDID extension the first byte will be 0x70 */
-	int idx = 0;
-	struct displayid_block *block;
+	const struct displayid_block *block;
 	int ret;
 
-	if (is_edid_extension)
-		idx = 1;
-
-	ret = validate_displayid(displayid, length, idx);
-	if (ret)
-		return ret;
-
 	idx += sizeof(struct displayid_hdr);
 	for_each_displayid_db(displayid, block, idx, length) {
 		DRM_DEBUG_KMS("block id 0x%x, rev %d, len %d\n",
@@ -5863,12 +5864,6 @@ static int drm_parse_display_id(struct drm_connector *connector,
 			if (ret)
 				return ret;
 			break;
-		case DATA_BLOCK_TYPE_1_DETAILED_TIMING:
-			/* handled in mode gathering code. */
-			break;
-		case DATA_BLOCK_CTA:
-			/* handled in the cea parser code. */
-			break;
 		default:
 			DRM_DEBUG_KMS("found DisplayID tag 0x%x, unhandled\n", block->tag);
 			break;
@@ -5877,19 +5872,21 @@ static int drm_parse_display_id(struct drm_connector *connector,
 	return 0;
 }
 
-static void drm_get_displayid(struct drm_connector *connector,
-			      struct edid *edid)
+void drm_update_tile_info(struct drm_connector *connector,
+			  const struct edid *edid)
 {
-	void *displayid = NULL;
+	const void *displayid = NULL;
+	int length, idx;
 	int ret;
+
 	connector->has_tile = false;
-	displayid = drm_find_displayid_extension(edid);
+	displayid = drm_find_displayid_extension(edid, &length, &idx);
 	if (!displayid) {
 		/* drop reference to any tile group we had */
 		goto out_drop_ref;
 	}
 
-	ret = drm_parse_display_id(connector, displayid, EDID_LENGTH, true);
+	ret = drm_displayid_parse_tiled(connector, displayid, length, idx);
 	if (ret < 0)
 		goto out_drop_ref;
 	if (!connector->has_tile)
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index 9801c0333eca..cb2349ad338d 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -2,7 +2,7 @@
 /*
  * drm kms/fb cma (contiguous memory allocator) helper functions
  *
- * Copyright (C) 2012 Analog Device Inc.
+ * Copyright (C) 2012 Analog Devices Inc.
  *   Author: Lars-Peter Clausen <lars@metafoo.de>
  *
  * Based on udl_fbdev.c
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index a9771de4d17e..02fc24026872 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -514,6 +514,14 @@ struct fb_info *drm_fb_helper_alloc_fbi(struct drm_fb_helper *fb_helper)
 	if (ret)
 		goto err_release;
 
+	/*
+	 * TODO: We really should be smarter here and alloc an apperture
+	 * for each IORESOURCE_MEM resource helper->dev->dev has and also
+	 * init the ranges of the appertures based on the resources.
+	 * Note some drivers currently count on there being only 1 empty
+	 * aperture and fill this themselves, these will need to be dealt
+	 * with somehow when fixing this.
+	 */
 	info->apertures = alloc_apertures(1);
 	if (!info->apertures) {
 		ret = -ENOMEM;
@@ -2162,6 +2170,8 @@ static const struct drm_client_funcs drm_fbdev_client_funcs = {
  *
  * This function sets up generic fbdev emulation for drivers that supports
  * dumb buffers with a virtual address and that can be mmap'ed.
+ * drm_fbdev_generic_setup() shall be called after the DRM driver registered
+ * the new DRM device with drm_dev_register().
  *
  * Restore, hotplug events and teardown are all taken care of. Drivers that do
  * suspend/resume need to call drm_fb_helper_set_suspend_unlocked() themselves.
@@ -2178,29 +2188,30 @@ static const struct drm_client_funcs drm_fbdev_client_funcs = {
  * Setup will be retried on the next hotplug event.
  *
  * The fbdev is destroyed by drm_dev_unregister().
- *
- * Returns:
- * Zero on success or negative error code on failure.
  */
-int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
+void drm_fbdev_generic_setup(struct drm_device *dev,
+			     unsigned int preferred_bpp)
 {
 	struct drm_fb_helper *fb_helper;
 	int ret;
 
-	WARN(dev->fb_helper, "fb_helper is already set!\n");
+	drm_WARN(dev, !dev->registered, "Device has not been registered.\n");
+	drm_WARN(dev, dev->fb_helper, "fb_helper is already set!\n");
 
 	if (!drm_fbdev_emulation)
-		return 0;
+		return;
 
 	fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL);
-	if (!fb_helper)
-		return -ENOMEM;
+	if (!fb_helper) {
+		drm_err(dev, "Failed to allocate fb_helper\n");
+		return;
+	}
 
 	ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs);
 	if (ret) {
 		kfree(fb_helper);
 		drm_err(dev, "Failed to register client: %d\n", ret);
-		return ret;
+		return;
 	}
 
 	if (!preferred_bpp)
@@ -2214,8 +2225,6 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
 		drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
 
 	drm_client_register(&fb_helper->client);
-
-	return 0;
 }
 EXPORT_SYMBOL(drm_fbdev_generic_setup);
 
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index eb009d3ab48f..2f12b8c1d01c 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -569,9 +569,6 @@ ssize_t drm_read(struct file *filp, char __user *buffer,
 	struct drm_device *dev = file_priv->minor->dev;
 	ssize_t ret;
 
-	if (!access_ok(buffer, count))
-		return -EFAULT;
-
 	ret = mutex_lock_interruptible(&file_priv->event_read_lock);
 	if (ret)
 		return ret;
@@ -613,7 +610,8 @@ put_back_event:
 				file_priv->event_space -= length;
 				list_add(&e->link, &file_priv->event_list);
 				spin_unlock_irq(&dev->event_lock);
-				wake_up_interruptible(&file_priv->event_wait);
+				wake_up_interruptible_poll(&file_priv->event_wait,
+					EPOLLIN | EPOLLRDNORM);
 				break;
 			}
 
@@ -809,7 +807,8 @@ void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e)
 	list_del(&e->pending_link);
 	list_add_tail(&e->link,
 		      &e->file_priv->event_list);
-	wake_up_interruptible(&e->file_priv->event_wait);
+	wake_up_interruptible_poll(&e->file_priv->event_wait,
+		EPOLLIN | EPOLLRDNORM);
 }
 EXPORT_SYMBOL(drm_send_event_locked);
 
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index 57ac94ce9b9e..0375b3d7f8d0 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -1207,10 +1207,10 @@ static const struct drm_info_list drm_framebuffer_debugfs_list[] = {
 	{ "framebuffer", drm_framebuffer_info, 0 },
 };
 
-int drm_framebuffer_debugfs_init(struct drm_minor *minor)
+void drm_framebuffer_debugfs_init(struct drm_minor *minor)
 {
-	return drm_debugfs_create_files(drm_framebuffer_debugfs_list,
-				ARRAY_SIZE(drm_framebuffer_debugfs_list),
-				minor->debugfs_root, minor);
+	drm_debugfs_create_files(drm_framebuffer_debugfs_list,
+				 ARRAY_SIZE(drm_framebuffer_debugfs_list),
+				 minor->debugfs_root, minor);
 }
 #endif
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 37627d06fb06..7bf628e13023 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -44,6 +44,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_gem.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_print.h>
 #include <drm/drm_vma_manager.h>
 
@@ -77,6 +78,12 @@
  * up at a later date, and as our interface with shmfs for memory allocation.
  */
 
+static void
+drm_gem_init_release(struct drm_device *dev, void *ptr)
+{
+	drm_vma_offset_manager_destroy(dev->vma_offset_manager);
+}
+
 /**
  * drm_gem_init - Initialize the GEM device fields
  * @dev: drm_devic structure to initialize
@@ -89,7 +96,8 @@ drm_gem_init(struct drm_device *dev)
 	mutex_init(&dev->object_name_lock);
 	idr_init_base(&dev->object_name_idr, 1);
 
-	vma_offset_manager = kzalloc(sizeof(*vma_offset_manager), GFP_KERNEL);
+	vma_offset_manager = drmm_kzalloc(dev, sizeof(*vma_offset_manager),
+					  GFP_KERNEL);
 	if (!vma_offset_manager) {
 		DRM_ERROR("out of memory\n");
 		return -ENOMEM;
@@ -100,16 +108,7 @@ drm_gem_init(struct drm_device *dev)
 				    DRM_FILE_PAGE_OFFSET_START,
 				    DRM_FILE_PAGE_OFFSET_SIZE);
 
-	return 0;
-}
-
-void
-drm_gem_destroy(struct drm_device *dev)
-{
-
-	drm_vma_offset_manager_destroy(dev->vma_offset_manager);
-	kfree(dev->vma_offset_manager);
-	dev->vma_offset_manager = NULL;
+	return drmm_add_action(dev, drm_gem_init_release, NULL);
 }
 
 /**
@@ -432,7 +431,7 @@ err_unref:
  * drm_gem_handle_create - create a gem handle for an object
  * @file_priv: drm file-private structure to register the handle for
  * @obj: object to register
- * @handlep: pionter to return the created handle to the caller
+ * @handlep: pointer to return the created handle to the caller
  *
  * Create a handle for this object. This adds a handle reference to the object,
  * which includes a regular reference count. Callers will likely want to
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index 3a7ace19a902..ccc2c71fa491 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -21,6 +21,13 @@
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_simple_kms_helper.h>
 
+#define AFBC_HEADER_SIZE		16
+#define AFBC_TH_LAYOUT_ALIGNMENT	8
+#define AFBC_HDR_ALIGN			64
+#define AFBC_SUPERBLOCK_PIXELS		256
+#define AFBC_SUPERBLOCK_ALIGNMENT	128
+#define AFBC_TH_BODY_START_ALIGNMENT	4096
+
 /**
  * DOC: overview
  *
@@ -54,32 +61,25 @@ struct drm_gem_object *drm_gem_fb_get_obj(struct drm_framebuffer *fb,
 }
 EXPORT_SYMBOL_GPL(drm_gem_fb_get_obj);
 
-static struct drm_framebuffer *
-drm_gem_fb_alloc(struct drm_device *dev,
+static int
+drm_gem_fb_init(struct drm_device *dev,
+		 struct drm_framebuffer *fb,
 		 const struct drm_mode_fb_cmd2 *mode_cmd,
 		 struct drm_gem_object **obj, unsigned int num_planes,
 		 const struct drm_framebuffer_funcs *funcs)
 {
-	struct drm_framebuffer *fb;
 	int ret, i;
 
-	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
-	if (!fb)
-		return ERR_PTR(-ENOMEM);
-
 	drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd);
 
 	for (i = 0; i < num_planes; i++)
 		fb->obj[i] = obj[i];
 
 	ret = drm_framebuffer_init(dev, fb, funcs);
-	if (ret) {
+	if (ret)
 		drm_err(dev, "Failed to init framebuffer: %d\n", ret);
-		kfree(fb);
-		return ERR_PTR(ret);
-	}
 
-	return fb;
+	return ret;
 }
 
 /**
@@ -123,10 +123,13 @@ int drm_gem_fb_create_handle(struct drm_framebuffer *fb, struct drm_file *file,
 EXPORT_SYMBOL(drm_gem_fb_create_handle);
 
 /**
- * drm_gem_fb_create_with_funcs() - Helper function for the
- *                                  &drm_mode_config_funcs.fb_create
- *                                  callback
+ * drm_gem_fb_init_with_funcs() - Helper function for implementing
+ *				  &drm_mode_config_funcs.fb_create
+ *				  callback in cases when the driver
+ *				  allocates a subclass of
+ *				  struct drm_framebuffer
  * @dev: DRM device
+ * @fb: framebuffer object
  * @file: DRM file that holds the GEM handle(s) backing the framebuffer
  * @mode_cmd: Metadata from the userspace framebuffer creation request
  * @funcs: vtable to be used for the new framebuffer object
@@ -134,23 +137,26 @@ EXPORT_SYMBOL(drm_gem_fb_create_handle);
  * This function can be used to set &drm_framebuffer_funcs for drivers that need
  * custom framebuffer callbacks. Use drm_gem_fb_create() if you don't need to
  * change &drm_framebuffer_funcs. The function does buffer size validation.
+ * The buffer size validation is for a general case, though, so users should
+ * pay attention to the checks being appropriate for them or, at least,
+ * non-conflicting.
  *
  * Returns:
- * Pointer to a &drm_framebuffer on success or an error pointer on failure.
+ * Zero or a negative error code.
  */
-struct drm_framebuffer *
-drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file,
-			     const struct drm_mode_fb_cmd2 *mode_cmd,
-			     const struct drm_framebuffer_funcs *funcs)
+int drm_gem_fb_init_with_funcs(struct drm_device *dev,
+			       struct drm_framebuffer *fb,
+			       struct drm_file *file,
+			       const struct drm_mode_fb_cmd2 *mode_cmd,
+			       const struct drm_framebuffer_funcs *funcs)
 {
 	const struct drm_format_info *info;
 	struct drm_gem_object *objs[4];
-	struct drm_framebuffer *fb;
 	int ret, i;
 
 	info = drm_get_format_info(dev, mode_cmd);
 	if (!info)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	for (i = 0; i < info->num_planes; i++) {
 		unsigned int width = mode_cmd->width / (i ? info->hsub : 1);
@@ -175,19 +181,55 @@ drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file,
 		}
 	}
 
-	fb = drm_gem_fb_alloc(dev, mode_cmd, objs, i, funcs);
-	if (IS_ERR(fb)) {
-		ret = PTR_ERR(fb);
+	ret = drm_gem_fb_init(dev, fb, mode_cmd, objs, i, funcs);
+	if (ret)
 		goto err_gem_object_put;
-	}
 
-	return fb;
+	return 0;
 
 err_gem_object_put:
 	for (i--; i >= 0; i--)
 		drm_gem_object_put_unlocked(objs[i]);
 
-	return ERR_PTR(ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(drm_gem_fb_init_with_funcs);
+
+/**
+ * drm_gem_fb_create_with_funcs() - Helper function for the
+ *                                  &drm_mode_config_funcs.fb_create
+ *                                  callback
+ * @dev: DRM device
+ * @file: DRM file that holds the GEM handle(s) backing the framebuffer
+ * @mode_cmd: Metadata from the userspace framebuffer creation request
+ * @funcs: vtable to be used for the new framebuffer object
+ *
+ * This function can be used to set &drm_framebuffer_funcs for drivers that need
+ * custom framebuffer callbacks. Use drm_gem_fb_create() if you don't need to
+ * change &drm_framebuffer_funcs. The function does buffer size validation.
+ *
+ * Returns:
+ * Pointer to a &drm_framebuffer on success or an error pointer on failure.
+ */
+struct drm_framebuffer *
+drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file,
+			     const struct drm_mode_fb_cmd2 *mode_cmd,
+			     const struct drm_framebuffer_funcs *funcs)
+{
+	struct drm_framebuffer *fb;
+	int ret;
+
+	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
+	if (!fb)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_gem_fb_init_with_funcs(dev, fb, file, mode_cmd, funcs);
+	if (ret) {
+		kfree(fb);
+		return ERR_PTR(ret);
+	}
+
+	return fb;
 }
 EXPORT_SYMBOL_GPL(drm_gem_fb_create_with_funcs);
 
@@ -265,6 +307,132 @@ drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file,
 }
 EXPORT_SYMBOL_GPL(drm_gem_fb_create_with_dirty);
 
+static __u32 drm_gem_afbc_get_bpp(struct drm_device *dev,
+				  const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	const struct drm_format_info *info;
+
+	info = drm_get_format_info(dev, mode_cmd);
+
+	/* use whatever a driver has set */
+	if (info->cpp[0])
+		return info->cpp[0] * 8;
+
+	/* guess otherwise */
+	switch (info->format) {
+	case DRM_FORMAT_YUV420_8BIT:
+		return 12;
+	case DRM_FORMAT_YUV420_10BIT:
+		return 15;
+	case DRM_FORMAT_VUY101010:
+		return 30;
+	default:
+		break;
+	}
+
+	/* all attempts failed */
+	return 0;
+}
+
+static int drm_gem_afbc_min_size(struct drm_device *dev,
+				 const struct drm_mode_fb_cmd2 *mode_cmd,
+				 struct drm_afbc_framebuffer *afbc_fb)
+{
+	__u32 n_blocks, w_alignment, h_alignment, hdr_alignment;
+	/* remove bpp when all users properly encode cpp in drm_format_info */
+	__u32 bpp;
+
+	switch (mode_cmd->modifier[0] & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
+	case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
+		afbc_fb->block_width = 16;
+		afbc_fb->block_height = 16;
+		break;
+	case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
+		afbc_fb->block_width = 32;
+		afbc_fb->block_height = 8;
+		break;
+	/* no user exists yet - fall through */
+	case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4:
+	case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4:
+	default:
+		drm_dbg_kms(dev, "Invalid AFBC_FORMAT_MOD_BLOCK_SIZE: %lld.\n",
+			    mode_cmd->modifier[0]
+			    & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK);
+		return -EINVAL;
+	}
+
+	/* tiled header afbc */
+	w_alignment = afbc_fb->block_width;
+	h_alignment = afbc_fb->block_height;
+	hdr_alignment = AFBC_HDR_ALIGN;
+	if (mode_cmd->modifier[0] & AFBC_FORMAT_MOD_TILED) {
+		w_alignment *= AFBC_TH_LAYOUT_ALIGNMENT;
+		h_alignment *= AFBC_TH_LAYOUT_ALIGNMENT;
+		hdr_alignment = AFBC_TH_BODY_START_ALIGNMENT;
+	}
+
+	afbc_fb->aligned_width = ALIGN(mode_cmd->width, w_alignment);
+	afbc_fb->aligned_height = ALIGN(mode_cmd->height, h_alignment);
+	afbc_fb->offset = mode_cmd->offsets[0];
+
+	bpp = drm_gem_afbc_get_bpp(dev, mode_cmd);
+	if (!bpp) {
+		drm_dbg_kms(dev, "Invalid AFBC bpp value: %d\n", bpp);
+		return -EINVAL;
+	}
+
+	n_blocks = (afbc_fb->aligned_width * afbc_fb->aligned_height)
+		   / AFBC_SUPERBLOCK_PIXELS;
+	afbc_fb->afbc_size = ALIGN(n_blocks * AFBC_HEADER_SIZE, hdr_alignment);
+	afbc_fb->afbc_size += n_blocks * ALIGN(bpp * AFBC_SUPERBLOCK_PIXELS / 8,
+					       AFBC_SUPERBLOCK_ALIGNMENT);
+
+	return 0;
+}
+
+/**
+ * drm_gem_fb_afbc_init() - Helper function for drivers using afbc to
+ *			    fill and validate all the afbc-specific
+ *			    struct drm_afbc_framebuffer members
+ *
+ * @dev: DRM device
+ * @afbc_fb: afbc-specific framebuffer
+ * @mode_cmd: Metadata from the userspace framebuffer creation request
+ * @afbc_fb: afbc framebuffer
+ *
+ * This function can be used by drivers which support afbc to complete
+ * the preparation of struct drm_afbc_framebuffer. It must be called after
+ * allocating the said struct and calling drm_gem_fb_init_with_funcs().
+ * It is caller's responsibility to put afbc_fb->base.obj objects in case
+ * the call is unsuccessful.
+ *
+ * Returns:
+ * Zero on success or a negative error value on failure.
+ */
+int drm_gem_fb_afbc_init(struct drm_device *dev,
+			 const struct drm_mode_fb_cmd2 *mode_cmd,
+			 struct drm_afbc_framebuffer *afbc_fb)
+{
+	const struct drm_format_info *info;
+	struct drm_gem_object **objs;
+	int ret;
+
+	objs = afbc_fb->base.obj;
+	info = drm_get_format_info(dev, mode_cmd);
+	if (!info)
+		return -EINVAL;
+
+	ret = drm_gem_afbc_min_size(dev, mode_cmd, afbc_fb);
+	if (ret < 0)
+		return ret;
+
+	if (objs[0]->size < afbc_fb->afbc_size)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(drm_gem_fb_afbc_init);
+
 /**
  * drm_gem_fb_prepare_fb() - Prepare a GEM backed framebuffer
  * @plane: Plane
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index 92a11bb42365..8b2d5c945c95 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -1,10 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include <linux/module.h>
+
 #include <drm/drm_debugfs.h>
 #include <drm/drm_device.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_framebuffer.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_gem_ttm_helper.h>
 #include <drm/drm_gem_vram_helper.h>
 #include <drm/drm_mode.h>
@@ -18,13 +21,93 @@ static const struct drm_gem_object_funcs drm_gem_vram_object_funcs;
 /**
  * DOC: overview
  *
- * This library provides a GEM buffer object that is backed by video RAM
- * (VRAM). It can be used for framebuffer devices with dedicated memory.
+ * This library provides &struct drm_gem_vram_object (GEM VRAM), a GEM
+ * buffer object that is backed by video RAM (VRAM). It can be used for
+ * framebuffer devices with dedicated memory.
  *
  * The data structure &struct drm_vram_mm and its helpers implement a memory
- * manager for simple framebuffer devices with dedicated video memory. Buffer
- * objects are either placed in video RAM or evicted to system memory. The rsp.
- * buffer object is provided by &struct drm_gem_vram_object.
+ * manager for simple framebuffer devices with dedicated video memory. GEM
+ * VRAM buffer objects are either placed in the video memory or remain evicted
+ * to system memory.
+ *
+ * With the GEM interface userspace applications create, manage and destroy
+ * graphics buffers, such as an on-screen framebuffer. GEM does not provide
+ * an implementation of these interfaces. It's up to the DRM driver to
+ * provide an implementation that suits the hardware. If the hardware device
+ * contains dedicated video memory, the DRM driver can use the VRAM helper
+ * library. Each active buffer object is stored in video RAM. Active
+ * buffer are used for drawing the current frame, typically something like
+ * the frame's scanout buffer or the cursor image. If there's no more space
+ * left in VRAM, inactive GEM objects can be moved to system memory.
+ *
+ * The easiest way to use the VRAM helper library is to call
+ * drm_vram_helper_alloc_mm(). The function allocates and initializes an
+ * instance of &struct drm_vram_mm in &struct drm_device.vram_mm . Use
+ * &DRM_GEM_VRAM_DRIVER to initialize &struct drm_driver and
+ * &DRM_VRAM_MM_FILE_OPERATIONS to initialize &struct file_operations;
+ * as illustrated below.
+ *
+ * .. code-block:: c
+ *
+ *	struct file_operations fops ={
+ *		.owner = THIS_MODULE,
+ *		DRM_VRAM_MM_FILE_OPERATION
+ *	};
+ *	struct drm_driver drv = {
+ *		.driver_feature = DRM_ ... ,
+ *		.fops = &fops,
+ *		DRM_GEM_VRAM_DRIVER
+ *	};
+ *
+ *	int init_drm_driver()
+ *	{
+ *		struct drm_device *dev;
+ *		uint64_t vram_base;
+ *		unsigned long vram_size;
+ *		int ret;
+ *
+ *		// setup device, vram base and size
+ *		// ...
+ *
+ *		ret = drm_vram_helper_alloc_mm(dev, vram_base, vram_size);
+ *		if (ret)
+ *			return ret;
+ *		return 0;
+ *	}
+ *
+ * This creates an instance of &struct drm_vram_mm, exports DRM userspace
+ * interfaces for GEM buffer management and initializes file operations to
+ * allow for accessing created GEM buffers. With this setup, the DRM driver
+ * manages an area of video RAM with VRAM MM and provides GEM VRAM objects
+ * to userspace.
+ *
+ * To clean up the VRAM memory management, call drm_vram_helper_release_mm()
+ * in the driver's clean-up code.
+ *
+ * .. code-block:: c
+ *
+ *	void fini_drm_driver()
+ *	{
+ *		struct drm_device *dev = ...;
+ *
+ *		drm_vram_helper_release_mm(dev);
+ *	}
+ *
+ * For drawing or scanout operations, buffer object have to be pinned in video
+ * RAM. Call drm_gem_vram_pin() with &DRM_GEM_VRAM_PL_FLAG_VRAM or
+ * &DRM_GEM_VRAM_PL_FLAG_SYSTEM to pin a buffer object in video RAM or system
+ * memory. Call drm_gem_vram_unpin() to release the pinned object afterwards.
+ *
+ * A buffer object that is pinned in video RAM has a fixed address within that
+ * memory region. Call drm_gem_vram_offset() to retrieve this value. Typically
+ * it's used to program the hardware's scanout engine for framebuffers, set
+ * the cursor overlay's image for a mouse cursor, or use it as input to the
+ * hardware's draing engine.
+ *
+ * To access a buffer object's memory from the DRM driver, call
+ * drm_gem_vram_kmap(). It (optionally) maps the buffer into kernel address
+ * space and returns the memory address. Use drm_gem_vram_kunmap() to
+ * release the mapping.
  */
 
 /*
@@ -670,9 +753,9 @@ EXPORT_SYMBOL(drm_gem_vram_driver_dumb_mmap_offset);
  * @plane:	a DRM plane
  * @new_state:	the plane's new state
  *
- * During plane updates, this function pins the GEM VRAM
- * objects of the plane's new framebuffer to VRAM. Call
- * drm_gem_vram_plane_helper_cleanup_fb() to unpin them.
+ * During plane updates, this function sets the plane's fence and
+ * pins the GEM VRAM objects of the plane's new framebuffer to VRAM.
+ * Call drm_gem_vram_plane_helper_cleanup_fb() to unpin them.
  *
  * Returns:
  *	0 on success, or
@@ -698,6 +781,10 @@ drm_gem_vram_plane_helper_prepare_fb(struct drm_plane *plane,
 			goto err_drm_gem_vram_unpin;
 	}
 
+	ret = drm_gem_fb_prepare_fb(plane, new_state);
+	if (ret)
+		goto err_drm_gem_vram_unpin;
+
 	return 0;
 
 err_drm_gem_vram_unpin:
@@ -1018,7 +1105,6 @@ static struct ttm_bo_driver bo_driver = {
  * struct drm_vram_mm
  */
 
-#if defined(CONFIG_DEBUG_FS)
 static int drm_vram_mm_debugfs(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -1035,27 +1121,18 @@ static int drm_vram_mm_debugfs(struct seq_file *m, void *data)
 static const struct drm_info_list drm_vram_mm_debugfs_list[] = {
 	{ "vram-mm", drm_vram_mm_debugfs, 0, NULL },
 };
-#endif
 
 /**
  * drm_vram_mm_debugfs_init() - Register VRAM MM debugfs file.
  *
  * @minor: drm minor device.
  *
- * Returns:
- * 0 on success, or
- * a negative error code otherwise.
  */
-int drm_vram_mm_debugfs_init(struct drm_minor *minor)
+void drm_vram_mm_debugfs_init(struct drm_minor *minor)
 {
-	int ret = 0;
-
-#if defined(CONFIG_DEBUG_FS)
-	ret = drm_debugfs_create_files(drm_vram_mm_debugfs_list,
-				       ARRAY_SIZE(drm_vram_mm_debugfs_list),
-				       minor->debugfs_root, minor);
-#endif
-	return ret;
+	drm_debugfs_create_files(drm_vram_mm_debugfs_list,
+				 ARRAY_SIZE(drm_vram_mm_debugfs_list),
+				 minor->debugfs_root, minor);
 }
 EXPORT_SYMBOL(drm_vram_mm_debugfs_init);
 
@@ -1202,3 +1279,6 @@ drm_vram_helper_mode_valid(struct drm_device *dev,
 	return drm_vram_helper_mode_valid_internal(dev, mode, max_bpp);
 }
 EXPORT_SYMBOL(drm_vram_helper_mode_valid);
+
+MODULE_DESCRIPTION("DRM VRAM memory-management helpers");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 5714a78365ac..2470a352730b 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -89,9 +89,11 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
 struct drm_minor *drm_minor_acquire(unsigned int minor_id);
 void drm_minor_release(struct drm_minor *minor);
 
+/* drm_managed.c */
+void drm_managed_release(struct drm_device *dev);
+
 /* drm_vblank.c */
 void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe);
-void drm_vblank_cleanup(struct drm_device *dev);
 
 /* IOCTLS */
 int drm_wait_vblank_ioctl(struct drm_device *dev, void *data,
@@ -141,7 +143,6 @@ void drm_sysfs_lease_event(struct drm_device *dev);
 /* drm_gem.c */
 struct drm_gem_object;
 int drm_gem_init(struct drm_device *dev);
-void drm_gem_destroy(struct drm_device *dev);
 int drm_gem_handle_create_tail(struct drm_file *file_priv,
 			       struct drm_gem_object *obj,
 			       u32 *handlep);
@@ -235,4 +236,4 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 /* drm_framebuffer.c */
 void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
 				const struct drm_framebuffer *fb);
-int drm_framebuffer_debugfs_init(struct drm_minor *minor);
+void drm_framebuffer_debugfs_init(struct drm_minor *minor);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 9e41972c4bbc..328502aafaf7 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -599,8 +599,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_SET_SAREA_CTX, drm_legacy_setsareactx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_GET_SAREA_CTX, drm_legacy_getsareactx, DRM_AUTH),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, 0),
+	DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, 0),
 
 	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_ADD_CTX, drm_legacy_addctx, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_LEGACY_IOCTL_DEF(DRM_IOCTL_RM_CTX, drm_legacy_rmctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
@@ -741,7 +741,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  *     };
  *
  * Please make sure that you follow all the best practices from
- * ``Documentation/ioctl/botching-up-ioctls.rst``. Note that drm_ioctl()
+ * ``Documentation/process/botching-up-ioctls.rst``. Note that drm_ioctl()
  * automatically zero-extends structures, hence make sure you can add more stuff
  * at the end, i.e. don't put a variable sized array there.
  *
diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
new file mode 100644
index 000000000000..9cebfe370a65
--- /dev/null
+++ b/drivers/gpu/drm/drm_managed.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Intel
+ *
+ * Based on drivers/base/devres.c
+ */
+
+#include <drm/drm_managed.h>
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+
+/**
+ * DOC: managed resources
+ *
+ * Inspired by struct &device managed resources, but tied to the lifetime of
+ * struct &drm_device, which can outlive the underlying physical device, usually
+ * when userspace has some open files and other handles to resources still open.
+ *
+ * Release actions can be added with drmm_add_action(), memory allocations can
+ * be done directly with drmm_kmalloc() and the related functions. Everything
+ * will be released on the final drm_dev_put() in reverse order of how the
+ * release actions have been added and memory has been allocated since driver
+ * loading started with drm_dev_init().
+ *
+ * Note that release actions and managed memory can also be added and removed
+ * during the lifetime of the driver, all the functions are fully concurrent
+ * safe. But it is recommended to use managed resources only for resources that
+ * change rarely, if ever, during the lifetime of the &drm_device instance.
+ */
+
+struct drmres_node {
+	struct list_head	entry;
+	drmres_release_t	release;
+	const char		*name;
+	size_t			size;
+};
+
+struct drmres {
+	struct drmres_node		node;
+	/*
+	 * Some archs want to perform DMA into kmalloc caches
+	 * and need a guaranteed alignment larger than
+	 * the alignment of a 64-bit integer.
+	 * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
+	 * buffer alignment as if it was allocated by plain kmalloc().
+	 */
+	u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
+};
+
+static void free_dr(struct drmres *dr)
+{
+	kfree_const(dr->node.name);
+	kfree(dr);
+}
+
+void drm_managed_release(struct drm_device *dev)
+{
+	struct drmres *dr, *tmp;
+
+	drm_dbg_drmres(dev, "drmres release begin\n");
+	list_for_each_entry_safe(dr, tmp, &dev->managed.resources, node.entry) {
+		drm_dbg_drmres(dev, "REL %p %s (%zu bytes)\n",
+			       dr, dr->node.name, dr->node.size);
+
+		if (dr->node.release)
+			dr->node.release(dev, dr->node.size ? *(void **)&dr->data : NULL);
+
+		list_del(&dr->node.entry);
+		free_dr(dr);
+	}
+	drm_dbg_drmres(dev, "drmres release end\n");
+}
+
+/*
+ * Always inline so that kmalloc_track_caller tracks the actual interesting
+ * caller outside of drm_managed.c.
+ */
+static __always_inline struct drmres * alloc_dr(drmres_release_t release,
+						size_t size, gfp_t gfp, int nid)
+{
+	size_t tot_size;
+	struct drmres *dr;
+
+	/* We must catch any near-SIZE_MAX cases that could overflow. */
+	if (unlikely(check_add_overflow(sizeof(*dr), size, &tot_size)))
+		return NULL;
+
+	dr = kmalloc_node_track_caller(tot_size, gfp, nid);
+	if (unlikely(!dr))
+		return NULL;
+
+	memset(dr, 0, offsetof(struct drmres, data));
+
+	INIT_LIST_HEAD(&dr->node.entry);
+	dr->node.release = release;
+	dr->node.size = size;
+
+	return dr;
+}
+
+static void del_dr(struct drm_device *dev, struct drmres *dr)
+{
+	list_del_init(&dr->node.entry);
+
+	drm_dbg_drmres(dev, "DEL %p %s (%lu bytes)\n",
+		       dr, dr->node.name, (unsigned long) dr->node.size);
+}
+
+static void add_dr(struct drm_device *dev, struct drmres *dr)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->managed.lock, flags);
+	list_add(&dr->node.entry, &dev->managed.resources);
+	spin_unlock_irqrestore(&dev->managed.lock, flags);
+
+	drm_dbg_drmres(dev, "ADD %p %s (%lu bytes)\n",
+		       dr, dr->node.name, (unsigned long) dr->node.size);
+}
+
+/**
+ * drmm_add_final_kfree - add release action for the final kfree()
+ * @dev: DRM device
+ * @container: pointer to the kmalloc allocation containing @dev
+ *
+ * Since the allocation containing the struct &drm_device must be allocated
+ * before it can be initialized with drm_dev_init() there's no way to allocate
+ * that memory with drmm_kmalloc(). To side-step this chicken-egg problem the
+ * pointer for this final kfree() must be specified by calling this function. It
+ * will be released in the final drm_dev_put() for @dev, after all other release
+ * actions installed through drmm_add_action() have been processed.
+ */
+void drmm_add_final_kfree(struct drm_device *dev, void *container)
+{
+	WARN_ON(dev->managed.final_kfree);
+	WARN_ON(dev < (struct drm_device *) container);
+	WARN_ON(dev + 1 > (struct drm_device *) (container + ksize(container)));
+	dev->managed.final_kfree = container;
+}
+EXPORT_SYMBOL(drmm_add_final_kfree);
+
+int __drmm_add_action(struct drm_device *dev,
+		      drmres_release_t action,
+		      void *data, const char *name)
+{
+	struct drmres *dr;
+	void **void_ptr;
+
+	dr = alloc_dr(action, data ? sizeof(void*) : 0,
+		      GFP_KERNEL | __GFP_ZERO,
+		      dev_to_node(dev->dev));
+	if (!dr) {
+		drm_dbg_drmres(dev, "failed to add action %s for %p\n",
+			       name, data);
+		return -ENOMEM;
+	}
+
+	dr->node.name = kstrdup_const(name, GFP_KERNEL);
+	if (data) {
+		void_ptr = (void **)&dr->data;
+		*void_ptr = data;
+	}
+
+	add_dr(dev, dr);
+
+	return 0;
+}
+EXPORT_SYMBOL(__drmm_add_action);
+
+int __drmm_add_action_or_reset(struct drm_device *dev,
+			       drmres_release_t action,
+			       void *data, const char *name)
+{
+	int ret;
+
+	ret = __drmm_add_action(dev, action, data, name);
+	if (ret)
+		action(dev, data);
+
+	return ret;
+}
+EXPORT_SYMBOL(__drmm_add_action_or_reset);
+
+/**
+ * drmm_kmalloc - &drm_device managed kmalloc()
+ * @dev: DRM device
+ * @size: size of the memory allocation
+ * @gfp: GFP allocation flags
+ *
+ * This is a &drm_device managed version of kmalloc(). The allocated memory is
+ * automatically freed on the final drm_dev_put(). Memory can also be freed
+ * before the final drm_dev_put() by calling drmm_kfree().
+ */
+void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp)
+{
+	struct drmres *dr;
+
+	dr = alloc_dr(NULL, size, gfp, dev_to_node(dev->dev));
+	if (!dr) {
+		drm_dbg_drmres(dev, "failed to allocate %zu bytes, %u flags\n",
+			       size, gfp);
+		return NULL;
+	}
+	dr->node.name = kstrdup_const("kmalloc", GFP_KERNEL);
+
+	add_dr(dev, dr);
+
+	return dr->data;
+}
+EXPORT_SYMBOL(drmm_kmalloc);
+
+/**
+ * drmm_kstrdup - &drm_device managed kstrdup()
+ * @dev: DRM device
+ * @s: 0-terminated string to be duplicated
+ * @gfp: GFP allocation flags
+ *
+ * This is a &drm_device managed version of kstrdup(). The allocated memory is
+ * automatically freed on the final drm_dev_put() and works exactly like a
+ * memory allocation obtained by drmm_kmalloc().
+ */
+char *drmm_kstrdup(struct drm_device *dev, const char *s, gfp_t gfp)
+{
+	size_t size;
+	char *buf;
+
+	if (!s)
+		return NULL;
+
+	size = strlen(s) + 1;
+	buf = drmm_kmalloc(dev, size, gfp);
+	if (buf)
+		memcpy(buf, s, size);
+	return buf;
+}
+EXPORT_SYMBOL_GPL(drmm_kstrdup);
+
+/**
+ * drmm_kfree - &drm_device managed kfree()
+ * @dev: DRM device
+ * @data: memory allocation to be freed
+ *
+ * This is a &drm_device managed version of kfree() which can be used to
+ * release memory allocated through drmm_kmalloc() or any of its related
+ * functions before the final drm_dev_put() of @dev.
+ */
+void drmm_kfree(struct drm_device *dev, void *data)
+{
+	struct drmres *dr_match = NULL, *dr;
+	unsigned long flags;
+
+	if (!data)
+		return;
+
+	spin_lock_irqsave(&dev->managed.lock, flags);
+	list_for_each_entry(dr, &dev->managed.resources, node.entry) {
+		if (dr->data == data) {
+			dr_match = dr;
+			del_dr(dev, dr_match);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&dev->managed.lock, flags);
+
+	if (WARN_ON(!dr_match))
+		return;
+
+	free_dr(dr_match);
+}
+EXPORT_SYMBOL(drmm_kfree);
diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c
index 558baf989f5a..bb27c82757f1 100644
--- a/drivers/gpu/drm/drm_mipi_dbi.c
+++ b/drivers/gpu/drm/drm_mipi_dbi.c
@@ -169,7 +169,8 @@ int mipi_dbi_command_buf(struct mipi_dbi *dbi, u8 cmd, u8 *data, size_t len)
 EXPORT_SYMBOL(mipi_dbi_command_buf);
 
 /* This should only be used by mipi_dbi_command() */
-int mipi_dbi_command_stackbuf(struct mipi_dbi *dbi, u8 cmd, u8 *data, size_t len)
+int mipi_dbi_command_stackbuf(struct mipi_dbi *dbi, u8 cmd, const u8 *data,
+			      size_t len)
 {
 	u8 *buf;
 	int ret;
@@ -510,6 +511,10 @@ int mipi_dbi_dev_init_with_formats(struct mipi_dbi_dev *dbidev,
 	if (!dbidev->dbi.command)
 		return -EINVAL;
 
+	ret = drmm_mode_config_init(drm);
+	if (ret)
+		return ret;
+
 	dbidev->tx_buf = devm_kmalloc(drm->dev, tx_buf_size, GFP_KERNEL);
 	if (!dbidev->tx_buf)
 		return -ENOMEM;
@@ -579,26 +584,6 @@ int mipi_dbi_dev_init(struct mipi_dbi_dev *dbidev,
 EXPORT_SYMBOL(mipi_dbi_dev_init);
 
 /**
- * mipi_dbi_release - DRM driver release helper
- * @drm: DRM device
- *
- * This function finalizes and frees &mipi_dbi.
- *
- * Drivers can use this as their &drm_driver->release callback.
- */
-void mipi_dbi_release(struct drm_device *drm)
-{
-	struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(drm);
-
-	DRM_DEBUG_DRIVER("\n");
-
-	drm_mode_config_cleanup(drm);
-	drm_dev_fini(drm);
-	kfree(dbidev);
-}
-EXPORT_SYMBOL(mipi_dbi_release);
-
-/**
  * mipi_dbi_hw_reset - Hardware reset of controller
  * @dbi: MIPI DBI structure
  *
@@ -1308,10 +1293,8 @@ static const struct file_operations mipi_dbi_debugfs_command_fops = {
  * controller or getting the read command values.
  * Drivers can use this as their &drm_driver->debugfs_init callback.
  *
- * Returns:
- * Zero on success, negative error code on failure.
  */
-int mipi_dbi_debugfs_init(struct drm_minor *minor)
+void mipi_dbi_debugfs_init(struct drm_minor *minor)
 {
 	struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(minor->dev);
 	umode_t mode = S_IFREG | S_IWUSR;
@@ -1320,8 +1303,6 @@ int mipi_dbi_debugfs_init(struct drm_minor *minor)
 		mode |= S_IRUGO;
 	debugfs_create_file("command", mode, minor->debugfs_root, dbidev,
 			    &mipi_dbi_debugfs_command_fops);
-
-	return 0;
 }
 EXPORT_SYMBOL(mipi_dbi_debugfs_init);
 
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8981abe8b7c9..f4ca1ff80af9 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -212,20 +212,6 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
 				   &drm_mm_interval_tree_augment);
 }
 
-#define RB_INSERT(root, member, expr) do { \
-	struct rb_node **link = &root.rb_node, *rb = NULL; \
-	u64 x = expr(node); \
-	while (*link) { \
-		rb = *link; \
-		if (x < expr(rb_entry(rb, struct drm_mm_node, member))) \
-			link = &rb->rb_left; \
-		else \
-			link = &rb->rb_right; \
-	} \
-	rb_link_node(&node->member, rb, link); \
-	rb_insert_color(&node->member, &root); \
-} while (0)
-
 #define HOLE_SIZE(NODE) ((NODE)->hole_size)
 #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
 
@@ -255,16 +241,42 @@ static void insert_hole_size(struct rb_root_cached *root,
 	rb_insert_color_cached(&node->rb_hole_size, root, first);
 }
 
+RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
+			 struct drm_mm_node, rb_hole_addr,
+			 u64, subtree_max_hole, HOLE_SIZE)
+
+static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
+{
+	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
+	u64 start = HOLE_ADDR(node), subtree_max_hole = node->subtree_max_hole;
+	struct drm_mm_node *parent;
+
+	while (*link) {
+		rb_parent = *link;
+		parent = rb_entry(rb_parent, struct drm_mm_node, rb_hole_addr);
+		if (parent->subtree_max_hole < subtree_max_hole)
+			parent->subtree_max_hole = subtree_max_hole;
+		if (start < HOLE_ADDR(parent))
+			link = &parent->rb_hole_addr.rb_left;
+		else
+			link = &parent->rb_hole_addr.rb_right;
+	}
+
+	rb_link_node(&node->rb_hole_addr, rb_parent, link);
+	rb_insert_augmented(&node->rb_hole_addr, root, &augment_callbacks);
+}
+
 static void add_hole(struct drm_mm_node *node)
 {
 	struct drm_mm *mm = node->mm;
 
 	node->hole_size =
 		__drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node);
+	node->subtree_max_hole = node->hole_size;
 	DRM_MM_BUG_ON(!drm_mm_hole_follows(node));
 
 	insert_hole_size(&mm->holes_size, node);
-	RB_INSERT(mm->holes_addr, rb_hole_addr, HOLE_ADDR);
+	insert_hole_addr(&mm->holes_addr, node);
 
 	list_add(&node->hole_stack, &mm->hole_stack);
 }
@@ -275,8 +287,10 @@ static void rm_hole(struct drm_mm_node *node)
 
 	list_del(&node->hole_stack);
 	rb_erase_cached(&node->rb_hole_size, &node->mm->holes_size);
-	rb_erase(&node->rb_hole_addr, &node->mm->holes_addr);
+	rb_erase_augmented(&node->rb_hole_addr, &node->mm->holes_addr,
+			   &augment_callbacks);
 	node->hole_size = 0;
+	node->subtree_max_hole = 0;
 
 	DRM_MM_BUG_ON(drm_mm_hole_follows(node));
 }
@@ -361,9 +375,90 @@ first_hole(struct drm_mm *mm,
 	}
 }
 
+/**
+ * next_hole_high_addr - returns next hole for a DRM_MM_INSERT_HIGH mode request
+ * @entry: previously selected drm_mm_node
+ * @size: size of the a hole needed for the request
+ *
+ * This function will verify whether left subtree of @entry has hole big enough
+ * to fit the requtested size. If so, it will return previous node of @entry or
+ * else it will return parent node of @entry
+ *
+ * It will also skip the complete left subtree if subtree_max_hole of that
+ * subtree is same as the subtree_max_hole of the @entry.
+ *
+ * Returns:
+ * previous node of @entry if left subtree of @entry can serve the request or
+ * else return parent of @entry
+ */
+static struct drm_mm_node *
+next_hole_high_addr(struct drm_mm_node *entry, u64 size)
+{
+	struct rb_node *rb_node, *left_rb_node, *parent_rb_node;
+	struct drm_mm_node *left_node;
+
+	if (!entry)
+		return NULL;
+
+	rb_node = &entry->rb_hole_addr;
+	if (rb_node->rb_left) {
+		left_rb_node = rb_node->rb_left;
+		parent_rb_node = rb_parent(rb_node);
+		left_node = rb_entry(left_rb_node,
+				     struct drm_mm_node, rb_hole_addr);
+		if ((left_node->subtree_max_hole < size ||
+		     entry->size == entry->subtree_max_hole) &&
+		    parent_rb_node && parent_rb_node->rb_left != rb_node)
+			return rb_hole_addr_to_node(parent_rb_node);
+	}
+
+	return rb_hole_addr_to_node(rb_prev(rb_node));
+}
+
+/**
+ * next_hole_low_addr - returns next hole for a DRM_MM_INSERT_LOW mode request
+ * @entry: previously selected drm_mm_node
+ * @size: size of the a hole needed for the request
+ *
+ * This function will verify whether right subtree of @entry has hole big enough
+ * to fit the requtested size. If so, it will return next node of @entry or
+ * else it will return parent node of @entry
+ *
+ * It will also skip the complete right subtree if subtree_max_hole of that
+ * subtree is same as the subtree_max_hole of the @entry.
+ *
+ * Returns:
+ * next node of @entry if right subtree of @entry can serve the request or
+ * else return parent of @entry
+ */
+static struct drm_mm_node *
+next_hole_low_addr(struct drm_mm_node *entry, u64 size)
+{
+	struct rb_node *rb_node, *right_rb_node, *parent_rb_node;
+	struct drm_mm_node *right_node;
+
+	if (!entry)
+		return NULL;
+
+	rb_node = &entry->rb_hole_addr;
+	if (rb_node->rb_right) {
+		right_rb_node = rb_node->rb_right;
+		parent_rb_node = rb_parent(rb_node);
+		right_node = rb_entry(right_rb_node,
+				      struct drm_mm_node, rb_hole_addr);
+		if ((right_node->subtree_max_hole < size ||
+		     entry->size == entry->subtree_max_hole) &&
+		    parent_rb_node && parent_rb_node->rb_right != rb_node)
+			return rb_hole_addr_to_node(parent_rb_node);
+	}
+
+	return rb_hole_addr_to_node(rb_next(rb_node));
+}
+
 static struct drm_mm_node *
 next_hole(struct drm_mm *mm,
 	  struct drm_mm_node *node,
+	  u64 size,
 	  enum drm_mm_insert_mode mode)
 {
 	switch (mode) {
@@ -372,10 +467,10 @@ next_hole(struct drm_mm *mm,
 		return rb_hole_size_to_node(rb_prev(&node->rb_hole_size));
 
 	case DRM_MM_INSERT_LOW:
-		return rb_hole_addr_to_node(rb_next(&node->rb_hole_addr));
+		return next_hole_low_addr(node, size);
 
 	case DRM_MM_INSERT_HIGH:
-		return rb_hole_addr_to_node(rb_prev(&node->rb_hole_addr));
+		return next_hole_high_addr(node, size);
 
 	case DRM_MM_INSERT_EVICT:
 		node = list_next_entry(node, hole_stack);
@@ -489,7 +584,7 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 	remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
 	for (hole = first_hole(mm, range_start, range_end, size, mode);
 	     hole;
-	     hole = once ? NULL : next_hole(mm, hole, mode)) {
+	     hole = once ? NULL : next_hole(mm, hole, size, mode)) {
 		u64 hole_start = __drm_mm_hole_node_start(hole);
 		u64 hole_end = hole_start + hole->hole_size;
 		u64 adj_start, adj_end;
diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
index 08e6eff6a179..5761f838a057 100644
--- a/drivers/gpu/drm/drm_mode_config.c
+++ b/drivers/gpu/drm/drm_mode_config.c
@@ -25,6 +25,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_file.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mode_config.h>
 #include <drm/drm_print.h>
 #include <linux/dma-resv.h>
@@ -373,8 +374,14 @@ static int drm_mode_create_standard_properties(struct drm_device *dev)
 	return 0;
 }
 
+static void drm_mode_config_init_release(struct drm_device *dev, void *ptr)
+{
+	drm_mode_config_cleanup(dev);
+}
+
 /**
- * drm_mode_config_init - initialize DRM mode_configuration structure
+ * drmm_mode_config_init - managed DRM mode_configuration structure
+ * 	initialization
  * @dev: DRM device
  *
  * Initialize @dev's mode_config structure, used for tracking the graphics
@@ -384,8 +391,12 @@ static int drm_mode_create_standard_properties(struct drm_device *dev)
  * problem, since this should happen single threaded at init time. It is the
  * driver's problem to ensure this guarantee.
  *
+ * Cleanup is automatically handled through registering drm_mode_config_cleanup
+ * with drmm_add_action().
+ *
+ * Returns: 0 on success, negative error value on failure.
  */
-void drm_mode_config_init(struct drm_device *dev)
+int drmm_mode_config_init(struct drm_device *dev)
 {
 	mutex_init(&dev->mode_config.mutex);
 	drm_modeset_lock_init(&dev->mode_config.connection_mutex);
@@ -443,8 +454,11 @@ void drm_mode_config_init(struct drm_device *dev)
 		drm_modeset_acquire_fini(&modeset_ctx);
 		dma_resv_fini(&resv);
 	}
+
+	return drmm_add_action_or_reset(dev, drm_mode_config_init_release,
+					NULL);
 }
-EXPORT_SYMBOL(drm_mode_config_init);
+EXPORT_SYMBOL(drmm_mode_config_init);
 
 /**
  * drm_mode_config_cleanup - free up DRM mode_config info
@@ -456,6 +470,9 @@ EXPORT_SYMBOL(drm_mode_config_init);
  * Note that since this /should/ happen single-threaded at driver/device
  * teardown time, no locking is required. It's the driver's job to ensure that
  * this guarantee actually holds true.
+ *
+ * FIXME: With the managed drmm_mode_config_init() it is no longer necessary for
+ * drivers to explicitly call this function.
  */
 void drm_mode_config_cleanup(struct drm_device *dev)
 {
@@ -532,3 +549,90 @@ void drm_mode_config_cleanup(struct drm_device *dev)
 	drm_modeset_lock_fini(&dev->mode_config.connection_mutex);
 }
 EXPORT_SYMBOL(drm_mode_config_cleanup);
+
+static u32 full_encoder_mask(struct drm_device *dev)
+{
+	struct drm_encoder *encoder;
+	u32 encoder_mask = 0;
+
+	drm_for_each_encoder(encoder, dev)
+		encoder_mask |= drm_encoder_mask(encoder);
+
+	return encoder_mask;
+}
+
+/*
+ * For some reason we want the encoder itself included in
+ * possible_clones. Make life easy for drivers by allowing them
+ * to leave possible_clones unset if no cloning is possible.
+ */
+static void fixup_encoder_possible_clones(struct drm_encoder *encoder)
+{
+	if (encoder->possible_clones == 0)
+		encoder->possible_clones = drm_encoder_mask(encoder);
+}
+
+static void validate_encoder_possible_clones(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	u32 encoder_mask = full_encoder_mask(dev);
+	struct drm_encoder *other;
+
+	drm_for_each_encoder(other, dev) {
+		WARN(!!(encoder->possible_clones & drm_encoder_mask(other)) !=
+		     !!(other->possible_clones & drm_encoder_mask(encoder)),
+		     "possible_clones mismatch: "
+		     "[ENCODER:%d:%s] mask=0x%x possible_clones=0x%x vs. "
+		     "[ENCODER:%d:%s] mask=0x%x possible_clones=0x%x\n",
+		     encoder->base.id, encoder->name,
+		     drm_encoder_mask(encoder), encoder->possible_clones,
+		     other->base.id, other->name,
+		     drm_encoder_mask(other), other->possible_clones);
+	}
+
+	WARN((encoder->possible_clones & drm_encoder_mask(encoder)) == 0 ||
+	     (encoder->possible_clones & ~encoder_mask) != 0,
+	     "Bogus possible_clones: "
+	     "[ENCODER:%d:%s] possible_clones=0x%x (full encoder mask=0x%x)\n",
+	     encoder->base.id, encoder->name,
+	     encoder->possible_clones, encoder_mask);
+}
+
+static u32 full_crtc_mask(struct drm_device *dev)
+{
+	struct drm_crtc *crtc;
+	u32 crtc_mask = 0;
+
+	drm_for_each_crtc(crtc, dev)
+		crtc_mask |= drm_crtc_mask(crtc);
+
+	return crtc_mask;
+}
+
+static void validate_encoder_possible_crtcs(struct drm_encoder *encoder)
+{
+	u32 crtc_mask = full_crtc_mask(encoder->dev);
+
+	WARN((encoder->possible_crtcs & crtc_mask) == 0 ||
+	     (encoder->possible_crtcs & ~crtc_mask) != 0,
+	     "Bogus possible_crtcs: "
+	     "[ENCODER:%d:%s] possible_crtcs=0x%x (full crtc mask=0x%x)\n",
+	     encoder->base.id, encoder->name,
+	     encoder->possible_crtcs, crtc_mask);
+}
+
+void drm_mode_config_validate(struct drm_device *dev)
+{
+	struct drm_encoder *encoder;
+
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return;
+
+	drm_for_each_encoder(encoder, dev)
+		fixup_encoder_possible_clones(encoder);
+
+	drm_for_each_encoder(encoder, dev) {
+		validate_encoder_possible_clones(encoder);
+		validate_encoder_possible_crtcs(encoder);
+	}
+}
diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c
index 35c2719407a8..901b078abf40 100644
--- a/drivers/gpu/drm/drm_mode_object.c
+++ b/drivers/gpu/drm/drm_mode_object.c
@@ -402,12 +402,13 @@ int drm_mode_obj_get_properties_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_mode_obj_get_properties *arg = data;
 	struct drm_mode_object *obj;
+	struct drm_modeset_acquire_ctx ctx;
 	int ret = 0;
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EOPNOTSUPP;
 
-	drm_modeset_lock_all(dev);
+	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
 
 	obj = drm_mode_object_find(dev, file_priv, arg->obj_id, arg->obj_type);
 	if (!obj) {
@@ -427,7 +428,7 @@ int drm_mode_obj_get_properties_ioctl(struct drm_device *dev, void *data,
 out_unref:
 	drm_mode_object_put(obj);
 out:
-	drm_modeset_unlock_all(dev);
+	DRM_MODESET_LOCK_ALL_END(ctx, ret);
 	return ret;
 }
 
@@ -449,12 +450,13 @@ static int set_property_legacy(struct drm_mode_object *obj,
 {
 	struct drm_device *dev = prop->dev;
 	struct drm_mode_object *ref;
+	struct drm_modeset_acquire_ctx ctx;
 	int ret = -EINVAL;
 
 	if (!drm_property_change_valid_get(prop, prop_value, &ref))
 		return -EINVAL;
 
-	drm_modeset_lock_all(dev);
+	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
 	switch (obj->type) {
 	case DRM_MODE_OBJECT_CONNECTOR:
 		ret = drm_connector_set_obj_prop(obj, prop, prop_value);
@@ -468,7 +470,7 @@ static int set_property_legacy(struct drm_mode_object *obj,
 		break;
 	}
 	drm_property_change_valid_put(prop, ref);
-	drm_modeset_unlock_all(dev);
+	DRM_MODESET_LOCK_ALL_END(ctx, ret);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index d4d64518e11b..fec1c33b3045 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -748,32 +748,6 @@ void drm_mode_set_name(struct drm_display_mode *mode)
 EXPORT_SYMBOL(drm_mode_set_name);
 
 /**
- * drm_mode_hsync - get the hsync of a mode
- * @mode: mode
- *
- * Returns:
- * @modes's hsync rate in kHz, rounded to the nearest integer. Calculates the
- * value first if it is not yet set.
- */
-int drm_mode_hsync(const struct drm_display_mode *mode)
-{
-	unsigned int calc_val;
-
-	if (mode->hsync)
-		return mode->hsync;
-
-	if (mode->htotal <= 0)
-		return 0;
-
-	calc_val = (mode->clock * 1000) / mode->htotal; /* hsync in Hz */
-	calc_val += 500;				/* round to 1000Hz */
-	calc_val /= 1000;				/* truncate to kHz */
-
-	return calc_val;
-}
-EXPORT_SYMBOL(drm_mode_hsync);
-
-/**
  * drm_mode_vrefresh - get the vrefresh of a mode
  * @mode: mode
  *
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 81aa21561982..75e2b7053f35 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -30,12 +30,13 @@
 #include <drm/drm.h>
 #include <drm/drm_agpsupport.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_pci.h>
 #include <drm/drm_print.h>
 
 #include "drm_internal.h"
 #include "drm_legacy.h"
 
+#ifdef CONFIG_DRM_LEGACY
+
 /**
  * drm_pci_alloc - Allocate a PCI consistent memory block, for DMA.
  * @dev: DRM device
@@ -93,6 +94,7 @@ void drm_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
 }
 
 EXPORT_SYMBOL(drm_pci_free);
+#endif
 
 static int drm_get_pci_domain(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index d6ad60ab0d38..4af173ced327 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -289,6 +289,8 @@ EXPORT_SYMBOL(drm_universal_plane_init);
 
 int drm_plane_register_all(struct drm_device *dev)
 {
+	unsigned int num_planes = 0;
+	unsigned int num_zpos = 0;
 	struct drm_plane *plane;
 	int ret = 0;
 
@@ -297,8 +299,15 @@ int drm_plane_register_all(struct drm_device *dev)
 			ret = plane->funcs->late_register(plane);
 		if (ret)
 			return ret;
+
+		if (plane->zpos_property)
+			num_zpos++;
+		num_planes++;
 	}
 
+	drm_WARN(dev, num_zpos && num_planes != num_zpos,
+		 "Mixing planes with and without zpos property is invalid\n");
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c
index ca520028b2cb..f4e6184d1877 100644
--- a/drivers/gpu/drm/drm_scatter.c
+++ b/drivers/gpu/drm/drm_scatter.c
@@ -43,15 +43,6 @@
 
 #define DEBUG_SCATTER 0
 
-static inline void *drm_vmalloc_dma(unsigned long size)
-{
-#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
-	return __vmalloc(size, GFP_KERNEL, pgprot_noncached_wc(PAGE_KERNEL));
-#else
-	return vmalloc_32(size);
-#endif
-}
-
 static void drm_sg_cleanup(struct drm_sg_mem * entry)
 {
 	struct page *page;
@@ -126,7 +117,7 @@ int drm_legacy_sg_alloc(struct drm_device *dev, void *data,
 		return -ENOMEM;
 	}
 
-	entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT);
+	entry->virtual = vmalloc_32(pages << PAGE_SHIFT);
 	if (!entry->virtual) {
 		kfree(entry->busaddr);
 		kfree(entry->pagelist);
diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index da7b0b0c1090..2d5ce690d214 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -30,6 +30,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_framebuffer.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/drm_print.h>
 #include <drm/drm_vblank.h>
@@ -40,6 +41,69 @@
 /**
  * DOC: vblank handling
  *
+ * From the computer's perspective, every time the monitor displays
+ * a new frame the scanout engine has "scanned out" the display image
+ * from top to bottom, one row of pixels at a time. The current row
+ * of pixels is referred to as the current scanline.
+ *
+ * In addition to the display's visible area, there's usually a couple of
+ * extra scanlines which aren't actually displayed on the screen.
+ * These extra scanlines don't contain image data and are occasionally used
+ * for features like audio and infoframes. The region made up of these
+ * scanlines is referred to as the vertical blanking region, or vblank for
+ * short.
+ *
+ * For historical reference, the vertical blanking period was designed to
+ * give the electron gun (on CRTs) enough time to move back to the top of
+ * the screen to start scanning out the next frame. Similar for horizontal
+ * blanking periods. They were designed to give the electron gun enough
+ * time to move back to the other side of the screen to start scanning the
+ * next scanline.
+ *
+ * ::
+ *
+ *
+ *    physical →   ⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽
+ *    top of      |                                        |
+ *    display     |                                        |
+ *                |               New frame                |
+ *                |                                        |
+ *                |↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓|
+ *                |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~| ← Scanline,
+ *                |↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓|   updates the
+ *                |                                        |   frame as it
+ *                |                                        |   travels down
+ *                |                                        |   ("sacn out")
+ *                |               Old frame                |
+ *                |                                        |
+ *                |                                        |
+ *                |                                        |
+ *                |                                        |   physical
+ *                |                                        |   bottom of
+ *    vertical    |⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽| ← display
+ *    blanking    ┆xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx┆
+ *    region   →  ┆xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx┆
+ *                ┆xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx┆
+ *    start of →   ⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽⎽
+ *    new frame
+ *
+ * "Physical top of display" is the reference point for the high-precision/
+ * corrected timestamp.
+ *
+ * On a lot of display hardware, programming needs to take effect during the
+ * vertical blanking period so that settings like gamma, the image buffer
+ * buffer to be scanned out, etc. can safely be changed without showing
+ * any visual artifacts on the screen. In some unforgiving hardware, some of
+ * this programming has to both start and end in the same vblank. To help
+ * with the timing of the hardware programming, an interrupt is usually
+ * available to notify the driver when it can start the updating of registers.
+ * The interrupt is in this context named the vblank interrupt.
+ *
+ * The vblank interrupt may be fired at different points depending on the
+ * hardware. Some hardware implementations will fire the interrupt when the
+ * new frame start, other implementations will fire the interrupt at different
+ * points in time.
+ *
  * Vertical blanking plays a major role in graphics rendering. To achieve
  * tear-free display, users must synchronize page flips and/or rendering to
  * vertical blanking. The DRM API offers ioctls to perform page flips
@@ -278,8 +342,8 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
 
 	DRM_DEBUG_VBL("updating vblank count on crtc %u:"
 		      " current=%llu, diff=%u, hw=%u hw_last=%u\n",
-		      pipe, atomic64_read(&vblank->count), diff,
-		      cur_vblank, vblank->last);
+		      pipe, (unsigned long long)atomic64_read(&vblank->count),
+		      diff, cur_vblank, vblank->last);
 
 	if (diff == 0) {
 		WARN_ON_ONCE(cur_vblank != vblank->last);
@@ -425,14 +489,10 @@ static void vblank_disable_fn(struct timer_list *t)
 	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 }
 
-void drm_vblank_cleanup(struct drm_device *dev)
+static void drm_vblank_init_release(struct drm_device *dev, void *ptr)
 {
 	unsigned int pipe;
 
-	/* Bail if the driver didn't call drm_vblank_init() */
-	if (dev->num_crtcs == 0)
-		return;
-
 	for (pipe = 0; pipe < dev->num_crtcs; pipe++) {
 		struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
 
@@ -441,10 +501,6 @@ void drm_vblank_cleanup(struct drm_device *dev)
 
 		del_timer_sync(&vblank->disable_timer);
 	}
-
-	kfree(dev->vblank);
-
-	dev->num_crtcs = 0;
 }
 
 /**
@@ -453,25 +509,29 @@ void drm_vblank_cleanup(struct drm_device *dev)
  * @num_crtcs: number of CRTCs supported by @dev
  *
  * This function initializes vblank support for @num_crtcs display pipelines.
- * Cleanup is handled by the DRM core, or through calling drm_dev_fini() for
- * drivers with a &drm_driver.release callback.
+ * Cleanup is handled automatically through a cleanup function added with
+ * drmm_add_action().
  *
  * Returns:
  * Zero on success or a negative error code on failure.
  */
 int drm_vblank_init(struct drm_device *dev, unsigned int num_crtcs)
 {
-	int ret = -ENOMEM;
+	int ret;
 	unsigned int i;
 
 	spin_lock_init(&dev->vbl_lock);
 	spin_lock_init(&dev->vblank_time_lock);
 
+	dev->vblank = drmm_kcalloc(dev, num_crtcs, sizeof(*dev->vblank), GFP_KERNEL);
+	if (!dev->vblank)
+		return -ENOMEM;
+
 	dev->num_crtcs = num_crtcs;
 
-	dev->vblank = kcalloc(num_crtcs, sizeof(*dev->vblank), GFP_KERNEL);
-	if (!dev->vblank)
-		goto err;
+	ret = drmm_add_action(dev, drm_vblank_init_release, NULL);
+	if (ret)
+		return ret;
 
 	for (i = 0; i < num_crtcs; i++) {
 		struct drm_vblank_crtc *vblank = &dev->vblank[i];
@@ -486,10 +546,6 @@ int drm_vblank_init(struct drm_device *dev, unsigned int num_crtcs)
 	DRM_INFO("Supports vblank timestamp caching Rev 2 (21.10.2013).\n");
 
 	return 0;
-
-err:
-	dev->num_crtcs = 0;
-	return ret;
 }
 EXPORT_SYMBOL(drm_vblank_init);
 
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index aa88911bbc06..56197ae0b2f9 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -595,8 +595,8 @@ static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 			vma->vm_ops = &drm_vm_ops;
 			break;
 		}
+		fallthrough;	/* to _DRM_FRAME_BUFFER... */
 #endif
-		/* fall through - to _DRM_FRAME_BUFFER... */
 	case _DRM_FRAME_BUFFER:
 	case _DRM_REGISTERS:
 		offset = drm_core_get_reg_ofs(dev);
@@ -621,7 +621,7 @@ static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 		    vma->vm_end - vma->vm_start, vma->vm_page_prot))
 			return -EAGAIN;
 		vma->vm_page_prot = drm_dma_prot(map->type, vma);
-		/* fall through - to _DRM_SHM */
+		fallthrough;	/* to _DRM_SHM */
 	case _DRM_SHM:
 		vma->vm_ops = &drm_vm_shm_ops;
 		vma->vm_private_data = (void *)map;
diff --git a/drivers/gpu/drm/drm_vram_helper_common.c b/drivers/gpu/drm/drm_vram_helper_common.c
deleted file mode 100644
index 2000d9b33fd5..000000000000
--- a/drivers/gpu/drm/drm_vram_helper_common.c
+++ /dev/null
@@ -1,94 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <linux/module.h>
-
-/**
- * DOC: overview
- *
- * This library provides &struct drm_gem_vram_object (GEM VRAM), a GEM
- * buffer object that is backed by video RAM. It can be used for
- * framebuffer devices with dedicated memory. The video RAM is managed
- * by &struct drm_vram_mm (VRAM MM).
- *
- * With the GEM interface userspace applications create, manage and destroy
- * graphics buffers, such as an on-screen framebuffer. GEM does not provide
- * an implementation of these interfaces. It's up to the DRM driver to
- * provide an implementation that suits the hardware. If the hardware device
- * contains dedicated video memory, the DRM driver can use the VRAM helper
- * library. Each active buffer object is stored in video RAM. Active
- * buffer are used for drawing the current frame, typically something like
- * the frame's scanout buffer or the cursor image. If there's no more space
- * left in VRAM, inactive GEM objects can be moved to system memory.
- *
- * The easiest way to use the VRAM helper library is to call
- * drm_vram_helper_alloc_mm(). The function allocates and initializes an
- * instance of &struct drm_vram_mm in &struct drm_device.vram_mm . Use
- * &DRM_GEM_VRAM_DRIVER to initialize &struct drm_driver and
- * &DRM_VRAM_MM_FILE_OPERATIONS to initialize &struct file_operations;
- * as illustrated below.
- *
- * .. code-block:: c
- *
- *	struct file_operations fops ={
- *		.owner = THIS_MODULE,
- *		DRM_VRAM_MM_FILE_OPERATION
- *	};
- *	struct drm_driver drv = {
- *		.driver_feature = DRM_ ... ,
- *		.fops = &fops,
- *		DRM_GEM_VRAM_DRIVER
- *	};
- *
- *	int init_drm_driver()
- *	{
- *		struct drm_device *dev;
- *		uint64_t vram_base;
- *		unsigned long vram_size;
- *		int ret;
- *
- *		// setup device, vram base and size
- *		// ...
- *
- *		ret = drm_vram_helper_alloc_mm(dev, vram_base, vram_size);
- *		if (ret)
- *			return ret;
- *		return 0;
- *	}
- *
- * This creates an instance of &struct drm_vram_mm, exports DRM userspace
- * interfaces for GEM buffer management and initializes file operations to
- * allow for accessing created GEM buffers. With this setup, the DRM driver
- * manages an area of video RAM with VRAM MM and provides GEM VRAM objects
- * to userspace.
- *
- * To clean up the VRAM memory management, call drm_vram_helper_release_mm()
- * in the driver's clean-up code.
- *
- * .. code-block:: c
- *
- *	void fini_drm_driver()
- *	{
- *		struct drm_device *dev = ...;
- *
- *		drm_vram_helper_release_mm(dev);
- *	}
- *
- * For drawing or scanout operations, buffer object have to be pinned in video
- * RAM. Call drm_gem_vram_pin() with &DRM_GEM_VRAM_PL_FLAG_VRAM or
- * &DRM_GEM_VRAM_PL_FLAG_SYSTEM to pin a buffer object in video RAM or system
- * memory. Call drm_gem_vram_unpin() to release the pinned object afterwards.
- *
- * A buffer object that is pinned in video RAM has a fixed address within that
- * memory region. Call drm_gem_vram_offset() to retrieve this value. Typically
- * it's used to program the hardware's scanout engine for framebuffers, set
- * the cursor overlay's image for a mouse cursor, or use it as input to the
- * hardware's draing engine.
- *
- * To access a buffer object's memory from the DRM driver, call
- * drm_gem_vram_kmap(). It (optionally) maps the buffer into kernel address
- * space and returns the memory address. Use drm_gem_vram_kunmap() to
- * release the mapping.
- */
-
-MODULE_DESCRIPTION("DRM VRAM memory-management helpers");
-MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index a8685b2e1803..f9afe11c50f0 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -231,21 +231,11 @@ static struct drm_info_list etnaviv_debugfs_list[] = {
 		{"ring", show_each_gpu, 0, etnaviv_ring_show},
 };
 
-static int etnaviv_debugfs_init(struct drm_minor *minor)
+static void etnaviv_debugfs_init(struct drm_minor *minor)
 {
-	struct drm_device *dev = minor->dev;
-	int ret;
-
-	ret = drm_debugfs_create_files(etnaviv_debugfs_list,
-			ARRAY_SIZE(etnaviv_debugfs_list),
-			minor->debugfs_root, minor);
-
-	if (ret) {
-		dev_err(dev->dev, "could not install etnaviv_debugfs_list\n");
-		return ret;
-	}
-
-	return ret;
+	drm_debugfs_create_files(etnaviv_debugfs_list,
+				 ARRAY_SIZE(etnaviv_debugfs_list),
+				 minor->debugfs_root, minor);
 }
 #endif
 
@@ -736,7 +726,7 @@ static void __exit etnaviv_exit(void)
 module_exit(etnaviv_exit);
 
 MODULE_AUTHOR("Christian Gmeiner <christian.gmeiner@gmail.com>");
-MODULE_AUTHOR("Russell King <rmk+kernel@arm.linux.org.uk>");
+MODULE_AUTHOR("Russell King <rmk+kernel@armlinux.org.uk>");
 MODULE_AUTHOR("Lucas Stach <l.stach@pengutronix.de>");
 MODULE_DESCRIPTION("etnaviv DRM Driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 648cf0207309..706af0304ca4 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -154,8 +154,8 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
 	file_size += sizeof(*iter.hdr) * n_obj;
 
 	/* Allocate the file in vmalloc memory, it's likely to be big */
-	iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
-			       PAGE_KERNEL);
+	iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN |
+			__GFP_NORETRY);
 	if (!iter.start) {
 		mutex_unlock(&gpu->mmu_context->lock);
 		dev_warn(gpu->dev, "failed to allocate devcoredump file\n");
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 5ee090691390..9ac51b6ab34b 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -25,6 +25,7 @@
 #include <drm/drm_panel.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 #include <drm/exynos_drm.h>
 
 #include "exynos_drm_crtc.h"
@@ -135,10 +136,6 @@ static const struct drm_encoder_helper_funcs exynos_dp_encoder_helper_funcs = {
 	.disable = exynos_dp_nop,
 };
 
-static const struct drm_encoder_funcs exynos_dp_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int exynos_dp_dt_parse_panel(struct exynos_dp_device *dp)
 {
 	int ret;
@@ -167,8 +164,7 @@ static int exynos_dp_bind(struct device *dev, struct device *master, void *data)
 			return ret;
 	}
 
-	drm_encoder_init(drm_dev, encoder, &exynos_dp_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm_dev, encoder, DRM_MODE_ENCODER_TMDS);
 
 	drm_encoder_helper_add(encoder, &exynos_dp_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index 43fa0f26c052..7ba5354e7d94 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
@@ -14,6 +14,7 @@
 #include <drm/drm_panel.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include <video/of_videomode.h>
 #include <video/videomode.h>
@@ -149,10 +150,6 @@ static const struct drm_encoder_helper_funcs exynos_dpi_encoder_helper_funcs = {
 	.disable = exynos_dpi_disable,
 };
 
-static const struct drm_encoder_funcs exynos_dpi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 enum {
 	FIMD_PORT_IN0,
 	FIMD_PORT_IN1,
@@ -201,8 +198,7 @@ int exynos_dpi_bind(struct drm_device *dev, struct drm_encoder *encoder)
 {
 	int ret;
 
-	drm_encoder_init(dev, encoder, &exynos_dpi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_TMDS);
 
 	drm_encoder_helper_add(encoder, &exynos_dpi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 57defeb44522..dbd80f1e4c78 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -76,7 +76,6 @@ static void exynos_drm_postclose(struct drm_device *dev, struct drm_file *file)
 }
 
 static const struct vm_operations_struct exynos_drm_gem_vm_ops = {
-	.fault = exynos_drm_gem_fault,
 	.open = drm_gem_vm_open,
 	.close = drm_gem_vm_close,
 };
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index e080aa92338c..ee96a95fb6be 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -30,6 +30,7 @@
 #include <drm/drm_panel.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_drv.h"
@@ -211,7 +212,7 @@
 
 #define OLD_SCLK_MIPI_CLK_NAME "pll_clk"
 
-static char *clk_names[5] = { "bus_clk", "sclk_mipi",
+static const char *const clk_names[5] = { "bus_clk", "sclk_mipi",
 	"phyclk_mipidphy0_bitclkdiv8", "phyclk_mipidphy0_rxclkesc0",
 	"sclk_rgb_vclk_to_dsim0" };
 
@@ -1523,10 +1524,6 @@ static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = {
 	.disable = exynos_dsi_disable,
 };
 
-static const struct drm_encoder_funcs exynos_dsi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 MODULE_DEVICE_TABLE(of, exynos_dsi_of_match);
 
 static int exynos_dsi_host_attach(struct mipi_dsi_host *host,
@@ -1704,8 +1701,7 @@ static int exynos_dsi_bind(struct device *dev, struct device *master,
 	struct drm_bridge *in_bridge;
 	int ret;
 
-	drm_encoder_init(drm_dev, encoder, &exynos_dsi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm_dev, encoder, DRM_MODE_ENCODER_TMDS);
 
 	drm_encoder_helper_add(encoder, &exynos_dsi_encoder_helper_funcs);
 
@@ -1763,10 +1759,6 @@ static int exynos_dsi_probe(struct platform_device *pdev)
 	dsi->dev = dev;
 	dsi->driver_data = of_device_get_match_data(dev);
 
-	ret = exynos_dsi_parse_dt(dsi);
-	if (ret)
-		return ret;
-
 	dsi->supplies[0].supply = "vddcore";
 	dsi->supplies[1].supply = "vddio";
 	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(dsi->supplies),
@@ -1813,10 +1805,8 @@ static int exynos_dsi_probe(struct platform_device *pdev)
 	}
 
 	dsi->irq = platform_get_irq(pdev, 0);
-	if (dsi->irq < 0) {
-		dev_err(dev, "failed to request dsi irq resource\n");
+	if (dsi->irq < 0)
 		return dsi->irq;
-	}
 
 	irq_set_status_flags(dsi->irq, IRQ_NOAUTOEN);
 	ret = devm_request_threaded_irq(dev, dsi->irq, NULL,
@@ -1827,11 +1817,25 @@ static int exynos_dsi_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	ret = exynos_dsi_parse_dt(dsi);
+	if (ret)
+		return ret;
+
 	platform_set_drvdata(pdev, &dsi->encoder);
 
 	pm_runtime_enable(dev);
 
-	return component_add(dev, &exynos_dsi_component_ops);
+	ret = component_add(dev, &exynos_dsi_component_ops);
+	if (ret)
+		goto err_disable_runtime;
+
+	return 0;
+
+err_disable_runtime:
+	pm_runtime_disable(dev);
+	of_node_put(dsi->in_bridge_node);
+
+	return ret;
 }
 
 static int exynos_dsi_remove(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index e6ceaf36fb04..56a2b47e1af7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -76,7 +76,6 @@ static int exynos_drm_fbdev_update(struct drm_fb_helper *helper,
 	struct fb_info *fbi;
 	struct drm_framebuffer *fb = helper->fb;
 	unsigned int size = fb->width * fb->height * fb->format->cpp[0];
-	unsigned int nr_pages;
 	unsigned long offset;
 
 	fbi = drm_fb_helper_alloc_fbi(helper);
@@ -90,16 +89,6 @@ static int exynos_drm_fbdev_update(struct drm_fb_helper *helper,
 
 	drm_fb_helper_fill_info(fbi, helper, sizes);
 
-	nr_pages = exynos_gem->size >> PAGE_SHIFT;
-
-	exynos_gem->kvaddr = (void __iomem *) vmap(exynos_gem->pages, nr_pages,
-				VM_MAP, pgprot_writecombine(PAGE_KERNEL));
-	if (!exynos_gem->kvaddr) {
-		DRM_DEV_ERROR(to_dma_dev(helper->dev),
-			      "failed to map pages to kernel space.\n");
-		return -EIO;
-	}
-
 	offset = fbi->var.xoffset * fb->format->cpp[0];
 	offset += fbi->var.yoffset * fb->pitches[0];
 
@@ -133,18 +122,7 @@ static int exynos_drm_fbdev_create(struct drm_fb_helper *helper,
 
 	size = mode_cmd.pitches[0] * mode_cmd.height;
 
-	exynos_gem = exynos_drm_gem_create(dev, EXYNOS_BO_CONTIG, size);
-	/*
-	 * If physically contiguous memory allocation fails and if IOMMU is
-	 * supported then try to get buffer from non physically contiguous
-	 * memory area.
-	 */
-	if (IS_ERR(exynos_gem) && is_drm_iommu_supported(dev)) {
-		dev_warn(dev->dev, "contiguous FB allocation failed, falling back to non-contiguous\n");
-		exynos_gem = exynos_drm_gem_create(dev, EXYNOS_BO_NONCONTIG,
-						   size);
-	}
-
+	exynos_gem = exynos_drm_gem_create(dev, EXYNOS_BO_WC, size, true);
 	if (IS_ERR(exynos_gem))
 		return PTR_ERR(exynos_gem);
 
@@ -229,12 +207,8 @@ err_init:
 static void exynos_drm_fbdev_destroy(struct drm_device *dev,
 				      struct drm_fb_helper *fb_helper)
 {
-	struct exynos_drm_fbdev *exynos_fbd = to_exynos_fbdev(fb_helper);
-	struct exynos_drm_gem *exynos_gem = exynos_fbd->exynos_gem;
 	struct drm_framebuffer *fb;
 
-	vunmap(exynos_gem->kvaddr);
-
 	/* release drm framebuffer and real buffer */
 	if (fb_helper->fb && fb_helper->fb->funcs) {
 		fb = fb_helper->fb;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index d734d9d51762..0df57ee34144 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -17,28 +17,23 @@
 #include "exynos_drm_drv.h"
 #include "exynos_drm_gem.h"
 
-static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem)
+static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem, bool kvmap)
 {
 	struct drm_device *dev = exynos_gem->base.dev;
-	unsigned long attr;
-	unsigned int nr_pages;
-	struct sg_table sgt;
-	int ret = -ENOMEM;
+	unsigned long attr = 0;
 
 	if (exynos_gem->dma_addr) {
 		DRM_DEV_DEBUG_KMS(to_dma_dev(dev), "already allocated.\n");
 		return 0;
 	}
 
-	exynos_gem->dma_attrs = 0;
-
 	/*
 	 * if EXYNOS_BO_CONTIG, fully physically contiguous memory
 	 * region will be allocated else physically contiguous
 	 * as possible.
 	 */
 	if (!(exynos_gem->flags & EXYNOS_BO_NONCONTIG))
-		exynos_gem->dma_attrs |= DMA_ATTR_FORCE_CONTIGUOUS;
+		attr |= DMA_ATTR_FORCE_CONTIGUOUS;
 
 	/*
 	 * if EXYNOS_BO_WC or EXYNOS_BO_NONCACHABLE, writecombine mapping
@@ -46,61 +41,29 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem)
 	 */
 	if (exynos_gem->flags & EXYNOS_BO_WC ||
 			!(exynos_gem->flags & EXYNOS_BO_CACHABLE))
-		attr = DMA_ATTR_WRITE_COMBINE;
+		attr |= DMA_ATTR_WRITE_COMBINE;
 	else
-		attr = DMA_ATTR_NON_CONSISTENT;
-
-	exynos_gem->dma_attrs |= attr;
-	exynos_gem->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING;
+		attr |= DMA_ATTR_NON_CONSISTENT;
 
-	nr_pages = exynos_gem->size >> PAGE_SHIFT;
-
-	exynos_gem->pages = kvmalloc_array(nr_pages, sizeof(struct page *),
-			GFP_KERNEL | __GFP_ZERO);
-	if (!exynos_gem->pages) {
-		DRM_DEV_ERROR(to_dma_dev(dev), "failed to allocate pages.\n");
-		return -ENOMEM;
-	}
+	/* FBDev emulation requires kernel mapping */
+	if (!kvmap)
+		attr |= DMA_ATTR_NO_KERNEL_MAPPING;
 
+	exynos_gem->dma_attrs = attr;
 	exynos_gem->cookie = dma_alloc_attrs(to_dma_dev(dev), exynos_gem->size,
 					     &exynos_gem->dma_addr, GFP_KERNEL,
 					     exynos_gem->dma_attrs);
 	if (!exynos_gem->cookie) {
 		DRM_DEV_ERROR(to_dma_dev(dev), "failed to allocate buffer.\n");
-		goto err_free;
-	}
-
-	ret = dma_get_sgtable_attrs(to_dma_dev(dev), &sgt, exynos_gem->cookie,
-				    exynos_gem->dma_addr, exynos_gem->size,
-				    exynos_gem->dma_attrs);
-	if (ret < 0) {
-		DRM_DEV_ERROR(to_dma_dev(dev), "failed to get sgtable.\n");
-		goto err_dma_free;
-	}
-
-	if (drm_prime_sg_to_page_addr_arrays(&sgt, exynos_gem->pages, NULL,
-					     nr_pages)) {
-		DRM_DEV_ERROR(to_dma_dev(dev), "invalid sgtable.\n");
-		ret = -EINVAL;
-		goto err_sgt_free;
+		return -ENOMEM;
 	}
 
-	sg_free_table(&sgt);
+	if (kvmap)
+		exynos_gem->kvaddr = exynos_gem->cookie;
 
 	DRM_DEV_DEBUG_KMS(to_dma_dev(dev), "dma_addr(0x%lx), size(0x%lx)\n",
 			(unsigned long)exynos_gem->dma_addr, exynos_gem->size);
-
 	return 0;
-
-err_sgt_free:
-	sg_free_table(&sgt);
-err_dma_free:
-	dma_free_attrs(to_dma_dev(dev), exynos_gem->size, exynos_gem->cookie,
-		       exynos_gem->dma_addr, exynos_gem->dma_attrs);
-err_free:
-	kvfree(exynos_gem->pages);
-
-	return ret;
 }
 
 static void exynos_drm_free_buf(struct exynos_drm_gem *exynos_gem)
@@ -118,8 +81,6 @@ static void exynos_drm_free_buf(struct exynos_drm_gem *exynos_gem)
 	dma_free_attrs(to_dma_dev(dev), exynos_gem->size, exynos_gem->cookie,
 			(dma_addr_t)exynos_gem->dma_addr,
 			exynos_gem->dma_attrs);
-
-	kvfree(exynos_gem->pages);
 }
 
 static int exynos_drm_gem_handle_create(struct drm_gem_object *obj,
@@ -203,7 +164,8 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev,
 
 struct exynos_drm_gem *exynos_drm_gem_create(struct drm_device *dev,
 					     unsigned int flags,
-					     unsigned long size)
+					     unsigned long size,
+					     bool kvmap)
 {
 	struct exynos_drm_gem *exynos_gem;
 	int ret;
@@ -237,7 +199,7 @@ struct exynos_drm_gem *exynos_drm_gem_create(struct drm_device *dev,
 	/* set memory type and cache attribute from user side. */
 	exynos_gem->flags = flags;
 
-	ret = exynos_drm_alloc_buf(exynos_gem);
+	ret = exynos_drm_alloc_buf(exynos_gem, kvmap);
 	if (ret < 0) {
 		drm_gem_object_release(&exynos_gem->base);
 		kfree(exynos_gem);
@@ -254,7 +216,7 @@ int exynos_drm_gem_create_ioctl(struct drm_device *dev, void *data,
 	struct exynos_drm_gem *exynos_gem;
 	int ret;
 
-	exynos_gem = exynos_drm_gem_create(dev, args->flags, args->size);
+	exynos_gem = exynos_drm_gem_create(dev, args->flags, args->size, false);
 	if (IS_ERR(exynos_gem))
 		return PTR_ERR(exynos_gem);
 
@@ -365,7 +327,7 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
 	else
 		flags = EXYNOS_BO_CONTIG | EXYNOS_BO_WC;
 
-	exynos_gem = exynos_drm_gem_create(dev, flags, args->size);
+	exynos_gem = exynos_drm_gem_create(dev, flags, args->size, false);
 	if (IS_ERR(exynos_gem)) {
 		dev_warn(dev->dev, "FB allocation failed.\n");
 		return PTR_ERR(exynos_gem);
@@ -381,26 +343,6 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
 	return 0;
 }
 
-vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf)
-{
-	struct vm_area_struct *vma = vmf->vma;
-	struct drm_gem_object *obj = vma->vm_private_data;
-	struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj);
-	unsigned long pfn;
-	pgoff_t page_offset;
-
-	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
-
-	if (page_offset >= (exynos_gem->size >> PAGE_SHIFT)) {
-		DRM_ERROR("invalid page offset\n");
-		return VM_FAULT_SIGBUS;
-	}
-
-	pfn = page_to_pfn(exynos_gem->pages[page_offset]);
-	return vmf_insert_mixed(vma, vmf->address,
-			__pfn_to_pfn_t(pfn, PFN_DEV));
-}
-
 static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj,
 				   struct vm_area_struct *vma)
 {
@@ -462,11 +404,24 @@ struct drm_gem_object *exynos_drm_gem_prime_import(struct drm_device *dev,
 struct sg_table *exynos_drm_gem_prime_get_sg_table(struct drm_gem_object *obj)
 {
 	struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj);
-	int npages;
+	struct drm_device *drm_dev = obj->dev;
+	struct sg_table *sgt;
+	int ret;
 
-	npages = exynos_gem->size >> PAGE_SHIFT;
+	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt)
+		return ERR_PTR(-ENOMEM);
 
-	return drm_prime_pages_to_sg(exynos_gem->pages, npages);
+	ret = dma_get_sgtable_attrs(to_dma_dev(drm_dev), sgt, exynos_gem->cookie,
+				    exynos_gem->dma_addr, exynos_gem->size,
+				    exynos_gem->dma_attrs);
+	if (ret) {
+		DRM_ERROR("failed to get sgtable, %d\n", ret);
+		kfree(sgt);
+		return ERR_PTR(ret);
+	}
+
+	return sgt;
 }
 
 struct drm_gem_object *
@@ -475,52 +430,47 @@ exynos_drm_gem_prime_import_sg_table(struct drm_device *dev,
 				     struct sg_table *sgt)
 {
 	struct exynos_drm_gem *exynos_gem;
-	int npages;
-	int ret;
-
-	exynos_gem = exynos_drm_gem_init(dev, attach->dmabuf->size);
-	if (IS_ERR(exynos_gem)) {
-		ret = PTR_ERR(exynos_gem);
-		return ERR_PTR(ret);
-	}
 
-	exynos_gem->dma_addr = sg_dma_address(sgt->sgl);
+	if (sgt->nents < 1)
+		return ERR_PTR(-EINVAL);
 
-	npages = exynos_gem->size >> PAGE_SHIFT;
-	exynos_gem->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
-	if (!exynos_gem->pages) {
-		ret = -ENOMEM;
-		goto err;
+	/*
+	 * Check if the provided buffer has been mapped as contiguous
+	 * into DMA address space.
+	 */
+	if (sgt->nents > 1) {
+		dma_addr_t next_addr = sg_dma_address(sgt->sgl);
+		struct scatterlist *s;
+		unsigned int i;
+
+		for_each_sg(sgt->sgl, s, sgt->nents, i) {
+			if (!sg_dma_len(s))
+				break;
+			if (sg_dma_address(s) != next_addr) {
+				DRM_ERROR("buffer chunks must be mapped contiguously");
+				return ERR_PTR(-EINVAL);
+			}
+			next_addr = sg_dma_address(s) + sg_dma_len(s);
+		}
 	}
 
-	ret = drm_prime_sg_to_page_addr_arrays(sgt, exynos_gem->pages, NULL,
-					       npages);
-	if (ret < 0)
-		goto err_free_large;
-
-	exynos_gem->sgt = sgt;
+	exynos_gem = exynos_drm_gem_init(dev, attach->dmabuf->size);
+	if (IS_ERR(exynos_gem))
+		return ERR_CAST(exynos_gem);
 
-	if (sgt->nents == 1) {
-		/* always physically continuous memory if sgt->nents is 1. */
-		exynos_gem->flags |= EXYNOS_BO_CONTIG;
-	} else {
-		/*
-		 * this case could be CONTIG or NONCONTIG type but for now
-		 * sets NONCONTIG.
-		 * TODO. we have to find a way that exporter can notify
-		 * the type of its own buffer to importer.
-		 */
+	/*
+	 * Buffer has been mapped as contiguous into DMA address space,
+	 * but if there is IOMMU, it can be either CONTIG or NONCONTIG.
+	 * We assume a simplified logic below:
+	 */
+	if (is_drm_iommu_supported(dev))
 		exynos_gem->flags |= EXYNOS_BO_NONCONTIG;
-	}
+	else
+		exynos_gem->flags |= EXYNOS_BO_CONTIG;
 
+	exynos_gem->dma_addr = sg_dma_address(sgt->sgl);
+	exynos_gem->sgt = sgt;
 	return &exynos_gem->base;
-
-err_free_large:
-	kvfree(exynos_gem->pages);
-err:
-	drm_gem_object_release(&exynos_gem->base);
-	kfree(exynos_gem);
-	return ERR_PTR(ret);
 }
 
 void *exynos_drm_gem_prime_vmap(struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 42ec67bc262d..6ef001f890aa 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -21,20 +21,15 @@
  * @base: a gem object.
  *	- a new handle to this gem object would be created
  *	by drm_gem_handle_create().
- * @buffer: a pointer to exynos_drm_gem_buffer object.
- *	- contain the information to memory region allocated
- *	by user request or at framebuffer creation.
- *	continuous memory region allocated by user request
- *	or at framebuffer creation.
  * @flags: indicate memory type to allocated buffer and cache attruibute.
  * @size: size requested from user, in bytes and this size is aligned
  *	in page unit.
  * @cookie: cookie returned by dma_alloc_attrs
- * @kvaddr: kernel virtual address to allocated memory region.
+ * @kvaddr: kernel virtual address to allocated memory region (for fbdev)
  * @dma_addr: bus address(accessed by dma) to allocated memory region.
  *	- this address could be physical address without IOMMU and
  *	device address with IOMMU.
- * @pages: Array of backing pages.
+ * @dma_attrs: attrs passed dma mapping framework
  * @sgt: Imported sg_table.
  *
  * P.S. this object would be transferred to user as kms_bo.handle so
@@ -48,7 +43,6 @@ struct exynos_drm_gem {
 	void __iomem		*kvaddr;
 	dma_addr_t		dma_addr;
 	unsigned long		dma_attrs;
-	struct page		**pages;
 	struct sg_table		*sgt;
 };
 
@@ -58,7 +52,8 @@ void exynos_drm_gem_destroy(struct exynos_drm_gem *exynos_gem);
 /* create a new buffer with gem object */
 struct exynos_drm_gem *exynos_drm_gem_create(struct drm_device *dev,
 					     unsigned int flags,
-					     unsigned long size);
+					     unsigned long size,
+					     bool kvmap);
 
 /*
  * request gem object creation and buffer allocation as the size
@@ -101,9 +96,6 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
 			       struct drm_device *dev,
 			       struct drm_mode_create_dumb *args);
 
-/* page fault handler and mmap fault address(virtual) to physical memory. */
-vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf);
-
 /* set vm_flags and we can change the vm attribute to other one at here. */
 int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index f41d75923557..a86abc173605 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -88,7 +88,7 @@
 
 #define MIC_BS_SIZE_2D(x)	((x) & 0x3fff)
 
-static char *clk_names[] = { "pclk_mic0", "sclk_rgb_vclk_to_mic0" };
+static const char *const clk_names[] = { "pclk_mic0", "sclk_rgb_vclk_to_mic0" };
 #define NUM_CLKS		ARRAY_SIZE(clk_names)
 static DEFINE_MUTEX(mic_mutex);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index dafa87b82052..2d94afba031e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -293,10 +293,8 @@ static int rotator_probe(struct platform_device *pdev)
 		return PTR_ERR(rot->regs);
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "failed to get irq\n");
+	if (irq < 0)
 		return irq;
-	}
 
 	ret = devm_request_irq(dev, irq, rotator_irq_handler, 0, dev_name(dev),
 			       rot);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
index 93c43c8d914e..ce1857138f89 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -502,10 +502,8 @@ static int scaler_probe(struct platform_device *pdev)
 		return PTR_ERR(scaler->regs);
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "failed to get irq\n");
+	if (irq < 0)
 		return irq;
-	}
 
 	ret = devm_request_threaded_irq(dev, irq, NULL,	scaler_irq_handler,
 					IRQF_ONESHOT, "drm_scaler", scaler);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index b320b3a21ad4..e5662bdcbbde 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -14,6 +14,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 #include <drm/drm_vblank.h>
 #include <drm/exynos_drm.h>
 
@@ -213,6 +214,12 @@ static ssize_t vidi_store_connection(struct device *dev,
 static DEVICE_ATTR(connection, 0644, vidi_show_connection,
 			vidi_store_connection);
 
+static struct attribute *vidi_attrs[] = {
+	&dev_attr_connection.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vidi);
+
 int vidi_connection_ioctl(struct drm_device *drm_dev, void *data,
 				struct drm_file *file_priv)
 {
@@ -369,10 +376,6 @@ static const struct drm_encoder_helper_funcs exynos_vidi_encoder_helper_funcs =
 	.disable = exynos_vidi_disable,
 };
 
-static const struct drm_encoder_funcs exynos_vidi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int vidi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct vidi_context *ctx = dev_get_drvdata(dev);
@@ -406,8 +409,7 @@ static int vidi_bind(struct device *dev, struct device *master, void *data)
 		return PTR_ERR(ctx->crtc);
 	}
 
-	drm_encoder_init(drm_dev, encoder, &exynos_vidi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm_dev, encoder, DRM_MODE_ENCODER_TMDS);
 
 	drm_encoder_helper_add(encoder, &exynos_vidi_encoder_helper_funcs);
 
@@ -443,7 +445,6 @@ static int vidi_probe(struct platform_device *pdev)
 {
 	struct vidi_context *ctx;
 	struct device *dev = &pdev->dev;
-	int ret;
 
 	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -457,23 +458,7 @@ static int vidi_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ctx);
 
-	ret = device_create_file(dev, &dev_attr_connection);
-	if (ret < 0) {
-		DRM_DEV_ERROR(dev,
-			      "failed to create connection sysfs.\n");
-		return ret;
-	}
-
-	ret = component_add(dev, &vidi_component_ops);
-	if (ret)
-		goto err_remove_file;
-
-	return ret;
-
-err_remove_file:
-	device_remove_file(dev, &dev_attr_connection);
-
-	return ret;
+	return component_add(dev, &vidi_component_ops);
 }
 
 static int vidi_remove(struct platform_device *pdev)
@@ -498,5 +483,6 @@ struct platform_driver vidi_driver = {
 	.driver		= {
 		.name	= "exynos-drm-vidi",
 		.owner	= THIS_MODULE,
+		.dev_groups = vidi_groups,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 1a7c828fc41d..95dd399aa9cc 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -38,6 +38,7 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "exynos_drm_crtc.h"
 #include "regs-hdmi.h"
@@ -1559,10 +1560,6 @@ static const struct drm_encoder_helper_funcs exynos_hdmi_encoder_helper_funcs =
 	.disable	= hdmi_disable,
 };
 
-static const struct drm_encoder_funcs exynos_hdmi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static void hdmi_audio_shutdown(struct device *dev, void *data)
 {
 	struct hdmi_context *hdata = dev_get_drvdata(dev);
@@ -1843,8 +1840,7 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data)
 
 	hdata->phy_clk.enable = hdmiphy_clk_enable;
 
-	drm_encoder_init(drm_dev, encoder, &exynos_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm_dev, encoder, DRM_MODE_ENCODER_TMDS);
 
 	drm_encoder_helper_add(encoder, &exynos_hdmi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 21b726baedea..c7e2e2ebc327 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -1244,9 +1244,11 @@ static int mixer_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ctx);
 
+	pm_runtime_enable(dev);
+
 	ret = component_add(&pdev->dev, &mixer_component_ops);
-	if (!ret)
-		pm_runtime_enable(dev);
+	if (ret)
+		pm_runtime_disable(dev);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
index cff344367f81..9b0c4736c21a 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
@@ -13,19 +13,11 @@
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "fsl_dcu_drm_drv.h"
 #include "fsl_tcon.h"
 
-static void fsl_dcu_drm_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs encoder_funcs = {
-	.destroy = fsl_dcu_drm_encoder_destroy,
-};
-
 int fsl_dcu_drm_encoder_create(struct fsl_dcu_drm_device *fsl_dev,
 			       struct drm_crtc *crtc)
 {
@@ -38,8 +30,8 @@ int fsl_dcu_drm_encoder_create(struct fsl_dcu_drm_device *fsl_dev,
 	if (fsl_dev->tcon)
 		fsl_tcon_bypass_enable(fsl_dev->tcon);
 
-	ret = drm_encoder_init(fsl_dev->drm, encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_LVDS, NULL);
+	ret = drm_simple_encoder_init(fsl_dev->drm, encoder,
+				      DRM_MODE_ENCODER_LVDS);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c
index 29c36d63b20e..88535f5aacc5 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_crt.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c
@@ -28,6 +28,8 @@
 #include <linux/i2c.h>
 #include <linux/pm_runtime.h>
 
+#include <drm/drm_simple_kms_helper.h>
+
 #include "cdv_device.h"
 #include "intel_bios.h"
 #include "power.h"
@@ -237,15 +239,6 @@ static const struct drm_connector_helper_funcs
 	.best_encoder = gma_best_encoder,
 };
 
-static void cdv_intel_crt_enc_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs cdv_intel_crt_enc_funcs = {
-	.destroy = cdv_intel_crt_enc_destroy,
-};
-
 void cdv_intel_crt_init(struct drm_device *dev,
 			struct psb_intel_mode_device *mode_dev)
 {
@@ -271,8 +264,7 @@ void cdv_intel_crt_init(struct drm_device *dev,
 		&cdv_intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
 
 	encoder = &gma_encoder->base;
-	drm_encoder_init(dev, encoder,
-		&cdv_intel_crt_enc_funcs, DRM_MODE_ENCODER_DAC, NULL);
+	drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_DAC);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index 5772b2dce0d6..f41cbb753bb4 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c
@@ -32,6 +32,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_dp_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "gma_display.h"
 #include "psb_drv.h"
@@ -1271,37 +1272,8 @@ cdv_intel_get_adjust_request_pre_emphasis(uint8_t link_status[DP_LINK_STATUS_SIZ
 	return ((l >> s) & 3) << DP_TRAIN_PRE_EMPHASIS_SHIFT;
 }
 
-
-#if 0
-static char	*voltage_names[] = {
-	"0.4V", "0.6V", "0.8V", "1.2V"
-};
-static char	*pre_emph_names[] = {
-	"0dB", "3.5dB", "6dB", "9.5dB"
-};
-static char	*link_train_names[] = {
-	"pattern 1", "pattern 2", "idle", "off"
-};
-#endif
-
 #define CDV_DP_VOLTAGE_MAX	    DP_TRAIN_VOLTAGE_SWING_LEVEL_3
-/*
-static uint8_t
-cdv_intel_dp_pre_emphasis_max(uint8_t voltage_swing)
-{
-	switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) {
-	case DP_TRAIN_VOLTAGE_SWING_400:
-		return DP_TRAIN_PRE_EMPHASIS_6;
-	case DP_TRAIN_VOLTAGE_SWING_600:
-		return DP_TRAIN_PRE_EMPHASIS_6;
-	case DP_TRAIN_VOLTAGE_SWING_800:
-		return DP_TRAIN_PRE_EMPHASIS_3_5;
-	case DP_TRAIN_VOLTAGE_SWING_1200:
-	default:
-		return DP_TRAIN_PRE_EMPHASIS_0;
-	}
-}
-*/
+
 static void
 cdv_intel_get_adjust_train(struct gma_encoder *encoder)
 {
@@ -1908,11 +1880,6 @@ cdv_intel_dp_destroy(struct drm_connector *connector)
 	kfree(connector);
 }
 
-static void cdv_intel_dp_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
 static const struct drm_encoder_helper_funcs cdv_intel_dp_helper_funcs = {
 	.dpms = cdv_intel_dp_dpms,
 	.mode_fixup = cdv_intel_dp_mode_fixup,
@@ -1935,11 +1902,6 @@ static const struct drm_connector_helper_funcs cdv_intel_dp_connector_helper_fun
 	.best_encoder = gma_best_encoder,
 };
 
-static const struct drm_encoder_funcs cdv_intel_dp_enc_funcs = {
-	.destroy = cdv_intel_dp_encoder_destroy,
-};
-
-
 static void cdv_intel_dp_add_properties(struct drm_connector *connector)
 {
 	cdv_intel_attach_force_audio_property(connector);
@@ -2016,8 +1978,7 @@ cdv_intel_dp_init(struct drm_device *dev, struct psb_intel_mode_device *mode_dev
 	encoder = &gma_encoder->base;
 
 	drm_connector_init(dev, connector, &cdv_intel_dp_connector_funcs, type);
-	drm_encoder_init(dev, encoder, &cdv_intel_dp_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_TMDS);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
@@ -2120,7 +2081,7 @@ cdv_intel_dp_init(struct drm_device *dev, struct psb_intel_mode_device *mode_dev
 		if (ret == 0) {
 			/* if this fails, presume the device is a ghost */
 			DRM_INFO("failed to retrieve link info, disabling eDP\n");
-			cdv_intel_dp_encoder_destroy(encoder);
+			drm_encoder_cleanup(encoder);
 			cdv_intel_dp_destroy(connector);
 			goto err_priv;
 		} else {
diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index 1711a41acc16..0d12c6ffbc40 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
@@ -32,6 +32,7 @@
 #include <drm/drm.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "cdv_device.h"
 #include "psb_drv.h"
@@ -311,8 +312,7 @@ void cdv_hdmi_init(struct drm_device *dev,
 			   &cdv_hdmi_connector_funcs,
 			   DRM_MODE_CONNECTOR_DVID);
 
-	drm_encoder_init(dev, encoder, &psb_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_TMDS);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 	gma_encoder->type = INTEL_OUTPUT_HDMI;
diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index ea0a5d9a0acc..eaaf4efec217 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
@@ -12,6 +12,8 @@
 #include <linux/i2c.h>
 #include <linux/pm_runtime.h>
 
+#include <drm/drm_simple_kms_helper.h>
+
 #include "cdv_device.h"
 #include "intel_bios.h"
 #include "power.h"
@@ -72,89 +74,6 @@ static u32 cdv_intel_lvds_get_max_backlight(struct drm_device *dev)
 	return retval;
 }
 
-#if 0
-/*
- * Set LVDS backlight level by I2C command
- */
-static int cdv_lvds_i2c_set_brightness(struct drm_device *dev,
-					unsigned int level)
-{
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_i2c_chan *lvds_i2c_bus = dev_priv->lvds_i2c_bus;
-	u8 out_buf[2];
-	unsigned int blc_i2c_brightness;
-
-	struct i2c_msg msgs[] = {
-		{
-			.addr = lvds_i2c_bus->slave_addr,
-			.flags = 0,
-			.len = 2,
-			.buf = out_buf,
-		}
-	};
-
-	blc_i2c_brightness = BRIGHTNESS_MASK & ((unsigned int)level *
-			     BRIGHTNESS_MASK /
-			     BRIGHTNESS_MAX_LEVEL);
-
-	if (dev_priv->lvds_bl->pol == BLC_POLARITY_INVERSE)
-		blc_i2c_brightness = BRIGHTNESS_MASK - blc_i2c_brightness;
-
-	out_buf[0] = dev_priv->lvds_bl->brightnesscmd;
-	out_buf[1] = (u8)blc_i2c_brightness;
-
-	if (i2c_transfer(&lvds_i2c_bus->adapter, msgs, 1) == 1)
-		return 0;
-
-	DRM_ERROR("I2C transfer error\n");
-	return -1;
-}
-
-
-static int cdv_lvds_pwm_set_brightness(struct drm_device *dev, int level)
-{
-	struct drm_psb_private *dev_priv = dev->dev_private;
-
-	u32 max_pwm_blc;
-	u32 blc_pwm_duty_cycle;
-
-	max_pwm_blc = cdv_intel_lvds_get_max_backlight(dev);
-
-	/*BLC_PWM_CTL Should be initiated while backlight device init*/
-	BUG_ON((max_pwm_blc & PSB_BLC_MAX_PWM_REG_FREQ) == 0);
-
-	blc_pwm_duty_cycle = level * max_pwm_blc / BRIGHTNESS_MAX_LEVEL;
-
-	if (dev_priv->lvds_bl->pol == BLC_POLARITY_INVERSE)
-		blc_pwm_duty_cycle = max_pwm_blc - blc_pwm_duty_cycle;
-
-	blc_pwm_duty_cycle &= PSB_BACKLIGHT_PWM_POLARITY_BIT_CLEAR;
-	REG_WRITE(BLC_PWM_CTL,
-		  (max_pwm_blc << PSB_BACKLIGHT_PWM_CTL_SHIFT) |
-		  (blc_pwm_duty_cycle));
-
-	return 0;
-}
-
-/*
- * Set LVDS backlight level either by I2C or PWM
- */
-void cdv_intel_lvds_set_brightness(struct drm_device *dev, int level)
-{
-	struct drm_psb_private *dev_priv = dev->dev_private;
-
-	if (!dev_priv->lvds_bl) {
-		DRM_ERROR("NO LVDS Backlight Info\n");
-		return;
-	}
-
-	if (dev_priv->lvds_bl->type == BLC_I2C_TYPE)
-		cdv_lvds_i2c_set_brightness(dev, level);
-	else
-		cdv_lvds_pwm_set_brightness(dev, level);
-}
-#endif
-
 /**
  * Sets the backlight level.
  *
@@ -499,16 +418,6 @@ static const struct drm_connector_funcs cdv_intel_lvds_connector_funcs = {
 	.destroy = cdv_intel_lvds_destroy,
 };
 
-
-static void cdv_intel_lvds_enc_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs cdv_intel_lvds_enc_funcs = {
-	.destroy = cdv_intel_lvds_enc_destroy,
-};
-
 /*
  * Enumerate the child dev array parsed from VBT to check whether
  * the LVDS is present.
@@ -616,10 +525,7 @@ void cdv_intel_lvds_init(struct drm_device *dev,
 			   &cdv_intel_lvds_connector_funcs,
 			   DRM_MODE_CONNECTOR_LVDS);
 
-	drm_encoder_init(dev, encoder,
-			 &cdv_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS, NULL);
-
+	drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_LVDS);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 	gma_encoder->type = INTEL_OUTPUT_LVDS;
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 1d8f67e4795a..23a78d755382 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -577,31 +577,31 @@ static void psb_setup_outputs(struct drm_device *dev)
 			break;
 		case INTEL_OUTPUT_SDVO:
 			crtc_mask = dev_priv->ops->sdvo_mask;
-			clone_mask = (1 << INTEL_OUTPUT_SDVO);
+			clone_mask = 0;
 			break;
 		case INTEL_OUTPUT_LVDS:
-		        crtc_mask = dev_priv->ops->lvds_mask;
-			clone_mask = (1 << INTEL_OUTPUT_LVDS);
+			crtc_mask = dev_priv->ops->lvds_mask;
+			clone_mask = 0;
 			break;
 		case INTEL_OUTPUT_MIPI:
 			crtc_mask = (1 << 0);
-			clone_mask = (1 << INTEL_OUTPUT_MIPI);
+			clone_mask = 0;
 			break;
 		case INTEL_OUTPUT_MIPI2:
 			crtc_mask = (1 << 2);
-			clone_mask = (1 << INTEL_OUTPUT_MIPI2);
+			clone_mask = 0;
 			break;
 		case INTEL_OUTPUT_HDMI:
-		        crtc_mask = dev_priv->ops->hdmi_mask;
+			crtc_mask = dev_priv->ops->hdmi_mask;
 			clone_mask = (1 << INTEL_OUTPUT_HDMI);
 			break;
 		case INTEL_OUTPUT_DISPLAYPORT:
 			crtc_mask = (1 << 0) | (1 << 1);
-			clone_mask = (1 << INTEL_OUTPUT_DISPLAYPORT);
+			clone_mask = 0;
 			break;
 		case INTEL_OUTPUT_EDP:
 			crtc_mask = (1 << 1);
-			clone_mask = (1 << INTEL_OUTPUT_EDP);
+			clone_mask = 0;
 		}
 		encoder->possible_crtcs = crtc_mask;
 		encoder->possible_clones =
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c b/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
index d4c65f268922..c976a9dd9240 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
@@ -27,6 +27,8 @@
 
 #include <linux/delay.h>
 
+#include <drm/drm_simple_kms_helper.h>
+
 #include "mdfld_dsi_dpi.h"
 #include "mdfld_dsi_pkg_sender.h"
 #include "mdfld_output.h"
@@ -993,10 +995,7 @@ struct mdfld_dsi_encoder *mdfld_dsi_dpi_init(struct drm_device *dev,
 	/*create drm encoder object*/
 	connector = &dsi_connector->base.base;
 	encoder = &dpi_output->base.base.base;
-	drm_encoder_init(dev,
-			encoder,
-			p_funcs->encoder_funcs,
-			DRM_MODE_ENCODER_LVDS, NULL);
+	drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_LVDS);
 	drm_encoder_helper_add(encoder,
 				p_funcs->encoder_helper_funcs);
 
@@ -1006,10 +1005,10 @@ struct mdfld_dsi_encoder *mdfld_dsi_dpi_init(struct drm_device *dev,
 	/*set possible crtcs and clones*/
 	if (dsi_connector->pipe) {
 		encoder->possible_crtcs = (1 << 2);
-		encoder->possible_clones = (1 << 1);
+		encoder->possible_clones = 0;
 	} else {
 		encoder->possible_crtcs = (1 << 0);
-		encoder->possible_clones = (1 << 0);
+		encoder->possible_clones = 0;
 	}
 
 	dsi_connector->base.encoder = &dpi_output->base.base;
diff --git a/drivers/gpu/drm/gma500/mdfld_intel_display.c b/drivers/gpu/drm/gma500/mdfld_intel_display.c
index 4fff110c4921..aae2d358364c 100644
--- a/drivers/gpu/drm/gma500/mdfld_intel_display.c
+++ b/drivers/gpu/drm/gma500/mdfld_intel_display.c
@@ -658,16 +658,6 @@ static int mdfld_crtc_mode_set(struct drm_crtc *crtc,
 
 	dev_dbg(dev->dev, "pipe = 0x%x\n", pipe);
 
-#if 0
-	if (pipe == 1) {
-		if (!gma_power_begin(dev, true))
-			return 0;
-		android_hdmi_crtc_mode_set(crtc, mode, adjusted_mode,
-			x, y, old_fb);
-		goto mrst_crtc_mode_set_exit;
-	}
-#endif
-
 	ret = check_fb(crtc->primary->fb);
 	if (ret)
 		return ret;
@@ -918,14 +908,6 @@ static int mdfld_crtc_mode_set(struct drm_crtc *crtc,
 		}
 		dpll = 0;
 
-#if 0 /* FIXME revisit later */
-		if (ksel == KSEL_CRYSTAL_19 || ksel == KSEL_BYPASS_19 ||
-						ksel == KSEL_BYPASS_25)
-			dpll &= ~MDFLD_INPUT_REF_SEL;
-		else if (ksel == KSEL_BYPASS_83_100)
-			dpll |= MDFLD_INPUT_REF_SEL;
-#endif /* FIXME revisit later */
-
 		if (is_hdmi)
 			dpll |= MDFLD_VCO_SEL;
 
@@ -935,20 +917,7 @@ static int mdfld_crtc_mode_set(struct drm_crtc *crtc,
 		/* compute bitmask from p1 value */
 		dpll |= (1 << (clock.p1 - 2)) << 17;
 
-#if 0 /* 1080p30 & 720p */
-		dpll = 0x00050000;
-		fp = 0x000001be;
-#endif
-#if 0 /* 480p */
-		dpll = 0x02010000;
-		fp = 0x000000d2;
-#endif
 	} else {
-#if 0 /*DBI_TPO_480x864*/
-		dpll = 0x00020000;
-		fp = 0x00000156;
-#endif /* DBI_TPO_480x864 */ /* get from spec. */
-
 		dpll = 0x00800000;
 		fp = 0x000000c1;
 	}
diff --git a/drivers/gpu/drm/gma500/mdfld_output.h b/drivers/gpu/drm/gma500/mdfld_output.h
index ab2b27c0f037..17a944d70add 100644
--- a/drivers/gpu/drm/gma500/mdfld_output.h
+++ b/drivers/gpu/drm/gma500/mdfld_output.h
@@ -51,7 +51,6 @@ struct panel_info {
 };
 
 struct panel_funcs {
-	const struct drm_encoder_funcs *encoder_funcs;
 	const struct drm_encoder_helper_funcs *encoder_helper_funcs;
 	struct drm_display_mode * (*get_config_mode)(struct drm_device *);
 	int (*get_panel_info)(struct drm_device *, int, struct panel_info *);
diff --git a/drivers/gpu/drm/gma500/mdfld_tmd_vid.c b/drivers/gpu/drm/gma500/mdfld_tmd_vid.c
index 49c92debb7b2..25e897b98f86 100644
--- a/drivers/gpu/drm/gma500/mdfld_tmd_vid.c
+++ b/drivers/gpu/drm/gma500/mdfld_tmd_vid.c
@@ -188,13 +188,7 @@ static const struct drm_encoder_helper_funcs
 	.commit = mdfld_dsi_dpi_commit,
 };
 
-/*TPO DPI encoder funcs*/
-static const struct drm_encoder_funcs mdfld_tpo_dpi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 const struct panel_funcs mdfld_tmd_vid_funcs = {
-	.encoder_funcs = &mdfld_tpo_dpi_encoder_funcs,
 	.encoder_helper_funcs = &mdfld_tpo_dpi_encoder_helper_funcs,
 	.get_config_mode = &tmd_vid_get_config_mode,
 	.get_panel_info = tmd_vid_get_panel_info,
diff --git a/drivers/gpu/drm/gma500/mdfld_tpo_vid.c b/drivers/gpu/drm/gma500/mdfld_tpo_vid.c
index a9420bf9a419..11845978fb0a 100644
--- a/drivers/gpu/drm/gma500/mdfld_tpo_vid.c
+++ b/drivers/gpu/drm/gma500/mdfld_tpo_vid.c
@@ -76,13 +76,7 @@ static const struct drm_encoder_helper_funcs
 	.commit = mdfld_dsi_dpi_commit,
 };
 
-/*TPO DPI encoder funcs*/
-static const struct drm_encoder_funcs mdfld_tpo_dpi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 const struct panel_funcs mdfld_tpo_vid_funcs = {
-	.encoder_funcs = &mdfld_tpo_dpi_encoder_funcs,
 	.encoder_helper_funcs = &mdfld_tpo_dpi_encoder_helper_funcs,
 	.get_config_mode = &tpo_vid_get_config_mode,
 	.get_panel_info = tpo_vid_get_panel_info,
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index f4370232767d..a097a59a9eae 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
@@ -27,6 +27,7 @@
 #include <linux/delay.h>
 
 #include <drm/drm.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
@@ -620,15 +621,6 @@ static const struct drm_connector_funcs oaktrail_hdmi_connector_funcs = {
 	.destroy = oaktrail_hdmi_destroy,
 };
 
-static void oaktrail_hdmi_enc_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs oaktrail_hdmi_enc_funcs = {
-	.destroy = oaktrail_hdmi_enc_destroy,
-};
-
 void oaktrail_hdmi_init(struct drm_device *dev,
 					struct psb_intel_mode_device *mode_dev)
 {
@@ -651,9 +643,7 @@ void oaktrail_hdmi_init(struct drm_device *dev,
 			   &oaktrail_hdmi_connector_funcs,
 			   DRM_MODE_CONNECTOR_DVID);
 
-	drm_encoder_init(dev, encoder,
-			 &oaktrail_hdmi_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_TMDS);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
@@ -673,11 +663,6 @@ failed_connector:
 	kfree(gma_encoder);
 }
 
-static const struct pci_device_id hdmi_ids[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x080d) },
-	{ 0 }
-};
-
 void oaktrail_hdmi_setup(struct drm_device *dev)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c
index 582e09597500..2828360153d1 100644
--- a/drivers/gpu/drm/gma500/oaktrail_lvds.c
+++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c
@@ -13,6 +13,8 @@
 
 #include <asm/intel-mid.h>
 
+#include <drm/drm_simple_kms_helper.h>
+
 #include "intel_bios.h"
 #include "power.h"
 #include "psb_drv.h"
@@ -311,8 +313,7 @@ void oaktrail_lvds_init(struct drm_device *dev,
 			   &psb_intel_lvds_connector_funcs,
 			   DRM_MODE_CONNECTOR_LVDS);
 
-	drm_encoder_init(dev, encoder, &psb_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS, NULL);
+	drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_LVDS);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 	gma_encoder->type = INTEL_OUTPUT_LVDS;
diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h
index 16c6136f778b..fb601983cef0 100644
--- a/drivers/gpu/drm/gma500/psb_intel_drv.h
+++ b/drivers/gpu/drm/gma500/psb_intel_drv.h
@@ -252,7 +252,6 @@ extern int psb_intel_lvds_set_property(struct drm_connector *connector,
 					struct drm_property *property,
 					uint64_t value);
 extern void psb_intel_lvds_destroy(struct drm_connector *connector);
-extern const struct drm_encoder_funcs psb_intel_lvds_enc_funcs;
 
 /* intel_gmbus.c */
 extern void gma_intel_i2c_reset(struct drm_device *dev);
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index afaebab7bc17..063c66bb946d 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -11,6 +11,8 @@
 #include <linux/i2c.h>
 #include <linux/pm_runtime.h>
 
+#include <drm/drm_simple_kms_helper.h>
+
 #include "intel_bios.h"
 #include "power.h"
 #include "psb_drv.h"
@@ -621,18 +623,6 @@ const struct drm_connector_funcs psb_intel_lvds_connector_funcs = {
 	.destroy = psb_intel_lvds_destroy,
 };
 
-
-static void psb_intel_lvds_enc_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-const struct drm_encoder_funcs psb_intel_lvds_enc_funcs = {
-	.destroy = psb_intel_lvds_enc_destroy,
-};
-
-
-
 /**
  * psb_intel_lvds_init - setup LVDS connectors on this device
  * @dev: drm device
@@ -683,9 +673,7 @@ void psb_intel_lvds_init(struct drm_device *dev,
 			   &psb_intel_lvds_connector_funcs,
 			   DRM_MODE_CONNECTOR_LVDS);
 
-	drm_encoder_init(dev, encoder,
-			 &psb_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS, NULL);
+	drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_LVDS);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 	gma_encoder->type = INTEL_OUTPUT_LVDS;
diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index 264d7ad004b4..68fb3d7c172b 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
@@ -864,36 +864,6 @@ static bool psb_intel_sdvo_set_avi_infoframe(struct psb_intel_sdvo *psb_intel_sd
 	DRM_INFO("HDMI is not supported yet");
 
 	return false;
-#if 0
-	struct dip_infoframe avi_if = {
-		.type = DIP_TYPE_AVI,
-		.ver = DIP_VERSION_AVI,
-		.len = DIP_LEN_AVI,
-	};
-	uint8_t tx_rate = SDVO_HBUF_TX_VSYNC;
-	uint8_t set_buf_index[2] = { 1, 0 };
-	uint64_t *data = (uint64_t *)&avi_if;
-	unsigned i;
-
-	intel_dip_infoframe_csum(&avi_if);
-
-	if (!psb_intel_sdvo_set_value(psb_intel_sdvo,
-				  SDVO_CMD_SET_HBUF_INDEX,
-				  set_buf_index, 2))
-		return false;
-
-	for (i = 0; i < sizeof(avi_if); i += 8) {
-		if (!psb_intel_sdvo_set_value(psb_intel_sdvo,
-					  SDVO_CMD_SET_HBUF_DATA,
-					  data, 8))
-			return false;
-		data++;
-	}
-
-	return psb_intel_sdvo_set_value(psb_intel_sdvo,
-				    SDVO_CMD_SET_HBUF_TXRATE,
-				    &tx_rate, 1);
-#endif
 }
 
 static bool psb_intel_sdvo_set_tv_format(struct psb_intel_sdvo *psb_intel_sdvo)
@@ -1227,75 +1197,6 @@ static bool psb_intel_sdvo_get_capabilities(struct psb_intel_sdvo *psb_intel_sdv
 	return true;
 }
 
-/* No use! */
-#if 0
-struct drm_connector* psb_intel_sdvo_find(struct drm_device *dev, int sdvoB)
-{
-	struct drm_connector *connector = NULL;
-	struct psb_intel_sdvo *iout = NULL;
-	struct psb_intel_sdvo *sdvo;
-
-	/* find the sdvo connector */
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		iout = to_psb_intel_sdvo(connector);
-
-		if (iout->type != INTEL_OUTPUT_SDVO)
-			continue;
-
-		sdvo = iout->dev_priv;
-
-		if (sdvo->sdvo_reg == SDVOB && sdvoB)
-			return connector;
-
-		if (sdvo->sdvo_reg == SDVOC && !sdvoB)
-			return connector;
-
-	}
-
-	return NULL;
-}
-
-int psb_intel_sdvo_supports_hotplug(struct drm_connector *connector)
-{
-	u8 response[2];
-	u8 status;
-	struct psb_intel_sdvo *psb_intel_sdvo;
-	DRM_DEBUG_KMS("\n");
-
-	if (!connector)
-		return 0;
-
-	psb_intel_sdvo = to_psb_intel_sdvo(connector);
-
-	return psb_intel_sdvo_get_value(psb_intel_sdvo, SDVO_CMD_GET_HOT_PLUG_SUPPORT,
-				    &response, 2) && response[0];
-}
-
-void psb_intel_sdvo_set_hotplug(struct drm_connector *connector, int on)
-{
-	u8 response[2];
-	u8 status;
-	struct psb_intel_sdvo *psb_intel_sdvo = to_psb_intel_sdvo(connector);
-
-	psb_intel_sdvo_write_cmd(psb_intel_sdvo, SDVO_CMD_GET_ACTIVE_HOT_PLUG, NULL, 0);
-	psb_intel_sdvo_read_response(psb_intel_sdvo, &response, 2);
-
-	if (on) {
-		psb_intel_sdvo_write_cmd(psb_intel_sdvo, SDVO_CMD_GET_HOT_PLUG_SUPPORT, NULL, 0);
-		status = psb_intel_sdvo_read_response(psb_intel_sdvo, &response, 2);
-
-		psb_intel_sdvo_write_cmd(psb_intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, &response, 2);
-	} else {
-		response[0] = 0;
-		response[1] = 0;
-		psb_intel_sdvo_write_cmd(psb_intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, &response, 2);
-	}
-
-	psb_intel_sdvo_write_cmd(psb_intel_sdvo, SDVO_CMD_GET_ACTIVE_HOT_PLUG, NULL, 0);
-	psb_intel_sdvo_read_response(psb_intel_sdvo, &response, 2);
-}
-#endif
-
 static bool
 psb_intel_sdvo_multifunc_encoder(struct psb_intel_sdvo *psb_intel_sdvo)
 {
diff --git a/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c b/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c
index 9e8224456ea2..e5bdd99ad453 100644
--- a/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c
+++ b/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c
@@ -747,11 +747,11 @@ static int cmi_lcd_hack_create_device(void)
 		return -EINVAL;
 	}
 
-	client = i2c_new_device(adapter, &info);
-	if (!client) {
-		pr_err("%s: i2c_new_device() failed\n", __func__);
+	client = i2c_new_client_device(adapter, &info);
+	if (IS_ERR(client)) {
+		pr_err("%s: creating I2C device failed\n", __func__);
 		i2c_put_adapter(adapter);
-		return -EINVAL;
+		return PTR_ERR(client);
 	}
 
 	return 0;
@@ -765,12 +765,7 @@ static const struct drm_encoder_helper_funcs tc35876x_encoder_helper_funcs = {
 	.commit = mdfld_dsi_dpi_commit,
 };
 
-static const struct drm_encoder_funcs tc35876x_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 const struct panel_funcs mdfld_tc35876x_funcs = {
-	.encoder_funcs = &tc35876x_encoder_funcs,
 	.encoder_helper_funcs = &tc35876x_encoder_helper_funcs,
 	.get_config_mode = tc35876x_get_config_mode,
 	.get_panel_info = tc35876x_get_panel_info,
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c
index 55b46a7150a5..cc70e836522f 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c
@@ -94,6 +94,10 @@ static int hibmc_plane_atomic_check(struct drm_plane *plane,
 		return -EINVAL;
 	}
 
+	if (state->fb->pitches[0] % 128 != 0) {
+		DRM_DEBUG_ATOMIC("wrong stride with 128-byte aligned\n");
+		return -EINVAL;
+	}
 	return 0;
 }
 
@@ -119,11 +123,8 @@ static void hibmc_plane_atomic_update(struct drm_plane *plane,
 	writel(gpu_addr, priv->mmio + HIBMC_CRT_FB_ADDRESS);
 
 	reg = state->fb->width * (state->fb->format->cpp[0]);
-	/* now line_pad is 16 */
-	reg = PADDING(16, reg);
 
-	line_l = state->fb->width * state->fb->format->cpp[0];
-	line_l = PADDING(16, line_l);
+	line_l = state->fb->pitches[0];
 	writel(HIBMC_FIELD(HIBMC_CRT_FB_WIDTH_WIDTH, reg) |
 	       HIBMC_FIELD(HIBMC_CRT_FB_WIDTH_OFFS, line_l),
 	       priv->mmio + HIBMC_CRT_FB_WIDTH);
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
index 222356a4f9a8..a6fd0c29e5b8 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
@@ -94,7 +94,7 @@ static int hibmc_kms_init(struct hibmc_drm_private *priv)
 	priv->dev->mode_config.max_height = 1200;
 
 	priv->dev->mode_config.fb_base = priv->fb_base;
-	priv->dev->mode_config.preferred_depth = 24;
+	priv->dev->mode_config.preferred_depth = 32;
 	priv->dev->mode_config.prefer_shadow = 1;
 
 	priv->dev->mode_config.funcs = (void *)&hibmc_mode_funcs;
@@ -307,11 +307,7 @@ static int hibmc_load(struct drm_device *dev)
 	/* reset all the states of crtc/plane/encoder/connector */
 	drm_mode_config_reset(dev);
 
-	ret = drm_fbdev_generic_setup(dev, 16);
-	if (ret) {
-		DRM_ERROR("failed to initialize fbdev: %d\n", ret);
-		goto err;
-	}
+	drm_fbdev_generic_setup(dev, dev->mode_config.preferred_depth);
 
 	return 0;
 
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
index 99397ac3b363..322bd542e89d 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
@@ -50,7 +50,7 @@ void hibmc_mm_fini(struct hibmc_drm_private *hibmc)
 int hibmc_dumb_create(struct drm_file *file, struct drm_device *dev,
 		      struct drm_mode_create_dumb *args)
 {
-	return drm_gem_vram_fill_create_dumb(file, dev, 0, 16, args);
+	return drm_gem_vram_fill_create_dumb(file, dev, 0, 128, args);
 }
 
 const struct drm_mode_config_funcs hibmc_mode_funcs = {
diff --git a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
index f31068d74b18..00e87c290796 100644
--- a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
+++ b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
@@ -20,11 +20,11 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_device.h>
-#include <drm/drm_encoder_slave.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_of.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "dw_dsi_reg.h"
 
@@ -696,10 +696,6 @@ static const struct drm_encoder_helper_funcs dw_encoder_helper_funcs = {
 	.disable	= dsi_encoder_disable
 };
 
-static const struct drm_encoder_funcs dw_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int dw_drm_encoder_init(struct device *dev,
 			       struct drm_device *drm_dev,
 			       struct drm_encoder *encoder)
@@ -713,8 +709,7 @@ static int dw_drm_encoder_init(struct device *dev,
 	}
 
 	encoder->possible_crtcs = crtc_mask;
-	ret = drm_encoder_init(drm_dev, encoder, &dw_encoder_funcs,
-			       DRM_MODE_ENCODER_DSI, NULL);
+	ret = drm_simple_encoder_init(drm_dev, encoder, DRM_MODE_ENCODER_DSI);
 	if (ret) {
 		DRM_ERROR("failed to init dsi encoder\n");
 		return ret;
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
index 86000127d4ee..c339e632522a 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
@@ -940,7 +940,6 @@ static struct drm_driver ade_driver = {
 };
 
 struct kirin_drm_data ade_driver_data = {
-	.register_connects = false,
 	.num_planes = ADE_CH_NUM,
 	.prim_plane = ADE_CH1,
 	.channel_formats = channel_formats,
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
index d3145ae877d7..4349da3e2379 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
@@ -219,40 +219,6 @@ static int kirin_drm_kms_cleanup(struct drm_device *dev)
 	return 0;
 }
 
-static int kirin_drm_connectors_register(struct drm_device *dev)
-{
-	struct drm_connector *connector;
-	struct drm_connector *failed_connector;
-	struct drm_connector_list_iter conn_iter;
-	int ret;
-
-	mutex_lock(&dev->mode_config.mutex);
-	drm_connector_list_iter_begin(dev, &conn_iter);
-	drm_for_each_connector_iter(connector, &conn_iter) {
-		ret = drm_connector_register(connector);
-		if (ret) {
-			failed_connector = connector;
-			goto err;
-		}
-	}
-	drm_connector_list_iter_end(&conn_iter);
-	mutex_unlock(&dev->mode_config.mutex);
-
-	return 0;
-
-err:
-	drm_connector_list_iter_begin(dev, &conn_iter);
-	drm_for_each_connector_iter(connector, &conn_iter) {
-		if (failed_connector == connector)
-			break;
-		drm_connector_unregister(connector);
-	}
-	drm_connector_list_iter_end(&conn_iter);
-	mutex_unlock(&dev->mode_config.mutex);
-
-	return ret;
-}
-
 static int kirin_drm_bind(struct device *dev)
 {
 	struct kirin_drm_data *driver_data;
@@ -279,17 +245,8 @@ static int kirin_drm_bind(struct device *dev)
 
 	drm_fbdev_generic_setup(drm_dev, 32);
 
-	/* connectors should be registered after drm device register */
-	if (driver_data->register_connects) {
-		ret = kirin_drm_connectors_register(drm_dev);
-		if (ret)
-			goto err_drm_dev_unregister;
-	}
-
 	return 0;
 
-err_drm_dev_unregister:
-	drm_dev_unregister(drm_dev);
 err_kms_cleanup:
 	kirin_drm_kms_cleanup(drm_dev);
 err_drm_dev_put:
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.h b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.h
index 4d5c05a24065..dee8ec2f7f2e 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.h
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.h
@@ -37,7 +37,6 @@ struct kirin_drm_data {
 	u32 channel_formats_cnt;
 	int config_max_width;
 	int config_max_height;
-	bool register_connects;
 	u32 num_planes;
 	u32 prim_plane;
 
diff --git a/drivers/gpu/drm/i2c/sil164_drv.c b/drivers/gpu/drm/i2c/sil164_drv.c
index a839f78a4c8a..741886b54419 100644
--- a/drivers/gpu/drm/i2c/sil164_drv.c
+++ b/drivers/gpu/drm/i2c/sil164_drv.c
@@ -393,7 +393,7 @@ sil164_detect_slave(struct i2c_client *client)
 		return NULL;
 	}
 
-	return i2c_new_device(adap, &info);
+	return i2c_new_client_device(adap, &info);
 }
 
 static int
@@ -402,6 +402,7 @@ sil164_encoder_init(struct i2c_client *client,
 		    struct drm_encoder_slave *encoder)
 {
 	struct sil164_priv *priv;
+	struct i2c_client *slave_client;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -410,7 +411,9 @@ sil164_encoder_init(struct i2c_client *client,
 	encoder->slave_priv = priv;
 	encoder->slave_funcs = &sil164_encoder_funcs;
 
-	priv->duallink_slave = sil164_detect_slave(client);
+	slave_client = sil164_detect_slave(client);
+	if (!IS_ERR(slave_client))
+		priv->duallink_slave = slave_client;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index c3332209f27a..9517f522dcb9 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -19,6 +19,7 @@
 #include <drm/drm_of.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 #include <drm/i2c/tda998x.h>
 
 #include <media/cec-notifier.h>
@@ -1132,7 +1133,8 @@ static void tda998x_audio_shutdown(struct device *dev, void *data)
 	mutex_unlock(&priv->audio_mutex);
 }
 
-int tda998x_audio_digital_mute(struct device *dev, void *data, bool enable)
+static int tda998x_audio_digital_mute(struct device *dev, void *data,
+				      bool enable)
 {
 	struct tda998x_priv *priv = dev_get_drvdata(dev);
 
@@ -1949,9 +1951,9 @@ static int tda998x_create(struct device *dev)
 	cec_info.platform_data = &priv->cec_glue;
 	cec_info.irq = client->irq;
 
-	priv->cec = i2c_new_device(client->adapter, &cec_info);
-	if (!priv->cec) {
-		ret = -ENODEV;
+	priv->cec = i2c_new_client_device(client->adapter, &cec_info);
+	if (IS_ERR(priv->cec)) {
+		ret = PTR_ERR(priv->cec);
 		goto fail;
 	}
 
@@ -1997,15 +1999,6 @@ err_irq:
 
 /* DRM encoder functions */
 
-static void tda998x_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs tda998x_encoder_funcs = {
-	.destroy = tda998x_encoder_destroy,
-};
-
 static int tda998x_encoder_init(struct device *dev, struct drm_device *drm)
 {
 	struct tda998x_priv *priv = dev_get_drvdata(dev);
@@ -2023,8 +2016,8 @@ static int tda998x_encoder_init(struct device *dev, struct drm_device *drm)
 
 	priv->encoder.possible_crtcs = crtcs;
 
-	ret = drm_encoder_init(drm, &priv->encoder, &tda998x_encoder_funcs,
-			       DRM_MODE_ENCODER_TMDS, NULL);
+	ret = drm_simple_encoder_init(drm, &priv->encoder,
+				      DRM_MODE_ENCODER_TMDS);
 	if (ret)
 		goto err_encoder;
 
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 0bfd276c19fe..35bbe2b80596 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -1,3 +1,15 @@
+config DRM_I915_FENCE_TIMEOUT
+	int "Timeout for unsignaled foreign fences (ms, jiffy granularity)"
+	default 10000 # milliseconds
+	help
+	  When listening to a foreign fence, we install a supplementary timer
+	  to ensure that we are always signaled and our userspace is able to
+	  make forward progress. This value specifies the timeout used for an
+	  unsignaled foreign fence.
+
+	  May be 0 to disable the timeout, and rely on the foreign fence being
+	  eventually signaled.
+
 config DRM_I915_USERFAULT_AUTOSUSPEND
 	int "Runtime autosuspend delay for userspace GGTT mmaps (ms)"
 	default 250 # milliseconds
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 6cd1f6253814..b0da6ea6e3f1 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -22,6 +22,7 @@ subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
 subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)
 subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
 subdir-ccflags-y += $(call cc-disable-warning, uninitialized)
+subdir-ccflags-y += $(call cc-disable-warning, frame-address)
 subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
 
 # Fine grained warnings disable
@@ -34,6 +35,7 @@ subdir-ccflags-y += -I$(srctree)/$(src)
 
 # core driver code
 i915-y += i915_drv.o \
+	  i915_config.o \
 	  i915_irq.o \
 	  i915_getparam.o \
 	  i915_params.o \
@@ -86,10 +88,12 @@ gt-y += \
 	gt/intel_engine_cs.o \
 	gt/intel_engine_heartbeat.o \
 	gt/intel_engine_pm.o \
-	gt/intel_engine_pool.o \
 	gt/intel_engine_user.o \
 	gt/intel_ggtt.o \
+	gt/intel_ggtt_fencing.o \
 	gt/intel_gt.o \
+	gt/intel_gt_buffer_pool.o \
+	gt/intel_gt_clock_utils.o \
 	gt/intel_gt_irq.o \
 	gt/intel_gt_pm.o \
 	gt/intel_gt_pm_irq.o \
@@ -108,6 +112,7 @@ gt-y += \
 	gt/intel_sseu.o \
 	gt/intel_timeline.o \
 	gt/intel_workarounds.o \
+	gt/shmem_utils.o \
 	gt/sysfs_engines.o
 # autogenerated null render state
 gt-y += \
@@ -150,7 +155,6 @@ i915-y += \
 	  i915_buddy.o \
 	  i915_cmd_parser.o \
 	  i915_gem_evict.o \
-	  i915_gem_fence_reg.o \
 	  i915_gem_gtt.o \
 	  i915_gem.o \
 	  i915_globals.o \
@@ -164,14 +168,18 @@ i915-y += \
 
 # general-purpose microcontroller (GuC) support
 i915-y += gt/uc/intel_uc.o \
+	  gt/uc/intel_uc_debugfs.o \
 	  gt/uc/intel_uc_fw.o \
 	  gt/uc/intel_guc.o \
 	  gt/uc/intel_guc_ads.o \
 	  gt/uc/intel_guc_ct.o \
+	  gt/uc/intel_guc_debugfs.o \
 	  gt/uc/intel_guc_fw.o \
 	  gt/uc/intel_guc_log.o \
+	  gt/uc/intel_guc_log_debugfs.o \
 	  gt/uc/intel_guc_submission.o \
 	  gt/uc/intel_huc.o \
+	  gt/uc/intel_huc_debugfs.o \
 	  gt/uc/intel_huc_fw.o
 
 # modesetting core code
@@ -240,23 +248,6 @@ i915-y += \
 	display/vlv_dsi.o \
 	display/vlv_dsi_pll.o
 
-# perf code
-i915-y += \
-	oa/i915_oa_hsw.o \
-	oa/i915_oa_bdw.o \
-	oa/i915_oa_chv.o \
-	oa/i915_oa_sklgt2.o \
-	oa/i915_oa_sklgt3.o \
-	oa/i915_oa_sklgt4.o \
-	oa/i915_oa_bxt.o \
-	oa/i915_oa_kblgt2.o \
-	oa/i915_oa_kblgt3.o \
-	oa/i915_oa_glk.o \
-	oa/i915_oa_cflgt2.o \
-	oa/i915_oa_cflgt3.o \
-	oa/i915_oa_cnl.o \
-	oa/i915_oa_icl.o \
-	oa/i915_oa_tgl.o
 i915-y += i915_perf.o
 
 # Post-mortem debug and GPU hang state capture
@@ -270,7 +261,8 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	selftests/igt_live_test.o \
 	selftests/igt_mmap.o \
 	selftests/igt_reset.o \
-	selftests/igt_spinner.o
+	selftests/igt_spinner.o \
+	selftests/librapl.o
 
 # virtual gpu code
 i915-y += i915_vgpu.o
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 17cee6f80d8b..4fec5bd64920 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -36,15 +36,15 @@
 #include "intel_panel.h"
 #include "intel_vdsc.h"
 
-static inline int header_credits_available(struct drm_i915_private *dev_priv,
-					   enum transcoder dsi_trans)
+static int header_credits_available(struct drm_i915_private *dev_priv,
+				    enum transcoder dsi_trans)
 {
 	return (intel_de_read(dev_priv, DSI_CMD_TXCTL(dsi_trans)) & FREE_HEADER_CREDIT_MASK)
 		>> FREE_HEADER_CREDIT_SHIFT;
 }
 
-static inline int payload_credits_available(struct drm_i915_private *dev_priv,
-					    enum transcoder dsi_trans)
+static int payload_credits_available(struct drm_i915_private *dev_priv,
+				     enum transcoder dsi_trans)
 {
 	return (intel_de_read(dev_priv, DSI_CMD_TXCTL(dsi_trans)) & FREE_PLOAD_CREDIT_MASK)
 		>> FREE_PLOAD_CREDIT_SHIFT;
@@ -186,16 +186,19 @@ static int dsi_send_pkt_hdr(struct intel_dsi_host *host,
 static int dsi_send_pkt_payld(struct intel_dsi_host *host,
 			      struct mipi_dsi_packet pkt)
 {
+	struct intel_dsi *intel_dsi = host->intel_dsi;
+	struct drm_i915_private *i915 = to_i915(intel_dsi->base.base.dev);
+
 	/* payload queue can accept *256 bytes*, check limit */
 	if (pkt.payload_length > MAX_PLOAD_CREDIT * 4) {
-		DRM_ERROR("payload size exceeds max queue limit\n");
+		drm_err(&i915->drm, "payload size exceeds max queue limit\n");
 		return -1;
 	}
 
 	/* load data into command payload queue */
 	if (!add_payld_to_queue(host, pkt.payload,
 				pkt.payload_length)) {
-		DRM_ERROR("adding payload to queue failed\n");
+		drm_err(&i915->drm, "adding payload to queue failed\n");
 		return -1;
 	}
 
@@ -744,6 +747,18 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder,
 				tmp |= VIDEO_MODE_SYNC_PULSE;
 				break;
 			}
+		} else {
+			/*
+			 * FIXME: Retrieve this info from VBT.
+			 * As per the spec when dsi transcoder is operating
+			 * in TE GATE mode, TE comes from GPIO
+			 * which is UTIL PIN for DSI 0.
+			 * Also this GPIO would not be used for other
+			 * purposes is an assumption.
+			 */
+			tmp &= ~OP_MODE_MASK;
+			tmp |= CMD_MODE_TE_GATE;
+			tmp |= TE_SOURCE_GPIO;
 		}
 
 		intel_de_write(dev_priv, DSI_TRANS_FUNC_CONF(dsi_trans), tmp);
@@ -837,14 +852,33 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
 	}
 
 	hactive = adjusted_mode->crtc_hdisplay;
-	htotal = DIV_ROUND_UP(adjusted_mode->crtc_htotal * mul, div);
+
+	if (is_vid_mode(intel_dsi))
+		htotal = DIV_ROUND_UP(adjusted_mode->crtc_htotal * mul, div);
+	else
+		htotal = DIV_ROUND_UP((hactive + 160) * mul, div);
+
 	hsync_start = DIV_ROUND_UP(adjusted_mode->crtc_hsync_start * mul, div);
 	hsync_end = DIV_ROUND_UP(adjusted_mode->crtc_hsync_end * mul, div);
 	hsync_size  = hsync_end - hsync_start;
 	hback_porch = (adjusted_mode->crtc_htotal -
 		       adjusted_mode->crtc_hsync_end);
 	vactive = adjusted_mode->crtc_vdisplay;
-	vtotal = adjusted_mode->crtc_vtotal;
+
+	if (is_vid_mode(intel_dsi)) {
+		vtotal = adjusted_mode->crtc_vtotal;
+	} else {
+		int bpp, line_time_us, byte_clk_period_ns;
+
+		if (crtc_state->dsc.compression_enable)
+			bpp = crtc_state->dsc.compressed_bpp;
+		else
+			bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
+
+		byte_clk_period_ns = 1000000 / afe_clk(encoder, crtc_state);
+		line_time_us = (htotal * (bpp / 8) * byte_clk_period_ns) / (1000 * intel_dsi->lane_count);
+		vtotal = vactive + DIV_ROUND_UP(400, line_time_us);
+	}
 	vsync_start = adjusted_mode->crtc_vsync_start;
 	vsync_end = adjusted_mode->crtc_vsync_end;
 	vsync_shift = hsync_start - htotal / 2;
@@ -873,7 +907,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
 	}
 
 	/* TRANS_HSYNC register to be programmed only for video mode */
-	if (intel_dsi->operation_mode == INTEL_DSI_VIDEO_MODE) {
+	if (is_vid_mode(intel_dsi)) {
 		if (intel_dsi->video_mode_format ==
 		    VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE) {
 			/* BSPEC: hsync size should be atleast 16 pixels */
@@ -916,22 +950,27 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
 	if (vsync_start < vactive)
 		drm_err(&dev_priv->drm, "vsync_start less than vactive\n");
 
-	/* program TRANS_VSYNC register */
-	for_each_dsi_port(port, intel_dsi->ports) {
-		dsi_trans = dsi_port_to_transcoder(port);
-		intel_de_write(dev_priv, VSYNC(dsi_trans),
-			       (vsync_start - 1) | ((vsync_end - 1) << 16));
+	/* program TRANS_VSYNC register for video mode only */
+	if (is_vid_mode(intel_dsi)) {
+		for_each_dsi_port(port, intel_dsi->ports) {
+			dsi_trans = dsi_port_to_transcoder(port);
+			intel_de_write(dev_priv, VSYNC(dsi_trans),
+				       (vsync_start - 1) | ((vsync_end - 1) << 16));
+		}
 	}
 
 	/*
-	 * FIXME: It has to be programmed only for interlaced
+	 * FIXME: It has to be programmed only for video modes and interlaced
 	 * modes. Put the check condition here once interlaced
 	 * info available as described above.
 	 * program TRANS_VSYNCSHIFT register
 	 */
-	for_each_dsi_port(port, intel_dsi->ports) {
-		dsi_trans = dsi_port_to_transcoder(port);
-		intel_de_write(dev_priv, VSYNCSHIFT(dsi_trans), vsync_shift);
+	if (is_vid_mode(intel_dsi)) {
+		for_each_dsi_port(port, intel_dsi->ports) {
+			dsi_trans = dsi_port_to_transcoder(port);
+			intel_de_write(dev_priv, VSYNCSHIFT(dsi_trans),
+				       vsync_shift);
+		}
 	}
 
 	/* program TRANS_VBLANK register, should be same as vtotal programmed */
@@ -1016,6 +1055,32 @@ static void gen11_dsi_setup_timeouts(struct intel_encoder *encoder,
 	}
 }
 
+static void gen11_dsi_config_util_pin(struct intel_encoder *encoder,
+				      bool enable)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+	u32 tmp;
+
+	/*
+	 * used as TE i/p for DSI0,
+	 * for dual link/DSI1 TE is from slave DSI1
+	 * through GPIO.
+	 */
+	if (is_vid_mode(intel_dsi) || (intel_dsi->ports & BIT(PORT_B)))
+		return;
+
+	tmp = intel_de_read(dev_priv, UTIL_PIN_CTL);
+
+	if (enable) {
+		tmp |= UTIL_PIN_DIRECTION_INPUT;
+		tmp |= UTIL_PIN_ENABLE;
+	} else {
+		tmp &= ~UTIL_PIN_ENABLE;
+	}
+	intel_de_write(dev_priv, UTIL_PIN_CTL, tmp);
+}
+
 static void
 gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *crtc_state)
@@ -1037,6 +1102,9 @@ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
 	/* setup D-PHY timings */
 	gen11_dsi_setup_dphy_timings(encoder, crtc_state);
 
+	/* Since transcoder is configured to take events from GPIO */
+	gen11_dsi_config_util_pin(encoder, true);
+
 	/* step 4h: setup DSI protocol timeouts */
 	gen11_dsi_setup_timeouts(encoder, crtc_state);
 
@@ -1088,7 +1156,8 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
 	wait_for_cmds_dispatched_to_panel(encoder);
 }
 
-static void gen11_dsi_pre_pll_enable(struct intel_encoder *encoder,
+static void gen11_dsi_pre_pll_enable(struct intel_atomic_state *state,
+				     struct intel_encoder *encoder,
 				     const struct intel_crtc_state *crtc_state,
 				     const struct drm_connector_state *conn_state)
 {
@@ -1099,7 +1168,8 @@ static void gen11_dsi_pre_pll_enable(struct intel_encoder *encoder,
 	gen11_dsi_program_esc_clk_div(encoder, crtc_state);
 }
 
-static void gen11_dsi_pre_enable(struct intel_encoder *encoder,
+static void gen11_dsi_pre_enable(struct intel_atomic_state *state,
+				 struct intel_encoder *encoder,
 				 const struct intel_crtc_state *pipe_config,
 				 const struct drm_connector_state *conn_state)
 {
@@ -1118,13 +1188,14 @@ static void gen11_dsi_pre_enable(struct intel_encoder *encoder,
 	gen11_dsi_set_transcoder_timings(encoder, pipe_config);
 }
 
-static void gen11_dsi_enable(struct intel_encoder *encoder,
+static void gen11_dsi_enable(struct intel_atomic_state *state,
+			     struct intel_encoder *encoder,
 			     const struct intel_crtc_state *crtc_state,
 			     const struct drm_connector_state *conn_state)
 {
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 
-	WARN_ON(crtc_state->has_pch_encoder);
+	drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder);
 
 	/* step6d: enable dsi transcoder */
 	gen11_dsi_enable_transcoder(encoder);
@@ -1180,6 +1251,15 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder)
 	enum transcoder dsi_trans;
 	u32 tmp;
 
+	/* disable periodic update mode */
+	if (is_cmd_mode(intel_dsi)) {
+		for_each_dsi_port(port, intel_dsi->ports) {
+			tmp = intel_de_read(dev_priv, DSI_CMD_FRMCTL(port));
+			tmp &= ~DSI_PERIODIC_FRAME_UPDATE_ENABLE;
+			intel_de_write(dev_priv, DSI_CMD_FRMCTL(port), tmp);
+		}
+	}
+
 	/* put dsi link in ULPS */
 	for_each_dsi_port(port, intel_dsi->ports) {
 		dsi_trans = dsi_port_to_transcoder(port);
@@ -1264,7 +1344,8 @@ static void gen11_dsi_disable_io_power(struct intel_encoder *encoder)
 	}
 }
 
-static void gen11_dsi_disable(struct intel_encoder *encoder,
+static void gen11_dsi_disable(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
@@ -1286,11 +1367,14 @@ static void gen11_dsi_disable(struct intel_encoder *encoder,
 	/* step3: disable port */
 	gen11_dsi_disable_port(encoder);
 
+	gen11_dsi_config_util_pin(encoder, false);
+
 	/* step4: disable IO power */
 	gen11_dsi_disable_io_power(encoder);
 }
 
-static void gen11_dsi_post_disable(struct intel_encoder *encoder,
+static void gen11_dsi_post_disable(struct intel_atomic_state *state,
+				   struct intel_encoder *encoder,
 				   const struct intel_crtc_state *old_crtc_state,
 				   const struct drm_connector_state *old_conn_state)
 {
@@ -1347,6 +1431,22 @@ static void gen11_dsi_get_timings(struct intel_encoder *encoder,
 	adjusted_mode->crtc_vblank_end = adjusted_mode->crtc_vtotal;
 }
 
+static bool gen11_dsi_is_periodic_cmd_mode(struct intel_dsi *intel_dsi)
+{
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	enum transcoder dsi_trans;
+	u32 val;
+
+	if (intel_dsi->ports == BIT(PORT_B))
+		dsi_trans = TRANSCODER_DSI_1;
+	else
+		dsi_trans = TRANSCODER_DSI_0;
+
+	val = intel_de_read(dev_priv, DSI_TRANS_FUNC_CONF(dsi_trans));
+	return (val & DSI_PERIODIC_FRAME_UPDATE_ENABLE);
+}
+
 static void gen11_dsi_get_config(struct intel_encoder *encoder,
 				 struct intel_crtc_state *pipe_config)
 {
@@ -1367,6 +1467,10 @@ static void gen11_dsi_get_config(struct intel_encoder *encoder,
 	gen11_dsi_get_timings(encoder, pipe_config);
 	pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI);
 	pipe_config->pipe_bpp = bdw_get_pipemisc_bpp(crtc);
+
+	if (gen11_dsi_is_periodic_cmd_mode(intel_dsi))
+		pipe_config->hw.adjusted_mode.private_flags |=
+					I915_MODE_FLAG_DSI_PERIODIC_CMD_MODE;
 }
 
 static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder,
@@ -1417,18 +1521,22 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder,
 				    struct intel_crtc_state *pipe_config,
 				    struct drm_connector_state *conn_state)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi,
 						   base);
 	struct intel_connector *intel_connector = intel_dsi->attached_connector;
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	const struct drm_display_mode *fixed_mode =
-					intel_connector->panel.fixed_mode;
+		intel_connector->panel.fixed_mode;
 	struct drm_display_mode *adjusted_mode =
-					&pipe_config->hw.adjusted_mode;
+		&pipe_config->hw.adjusted_mode;
+	int ret;
 
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
 	intel_fixed_panel_mode(fixed_mode, adjusted_mode);
-	intel_pch_panel_fitting(crtc, pipe_config, conn_state->scaling_mode);
+
+	ret = intel_pch_panel_fitting(pipe_config, conn_state);
+	if (ret)
+		return ret;
 
 	adjusted_mode->flags = 0;
 
@@ -1446,10 +1554,32 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder,
 	pipe_config->clock_set = true;
 
 	if (gen11_dsi_dsc_compute_config(encoder, pipe_config))
-		DRM_DEBUG_KMS("Attempting to use DSC failed\n");
+		drm_dbg_kms(&i915->drm, "Attempting to use DSC failed\n");
 
 	pipe_config->port_clock = afe_clk(encoder, pipe_config) / 5;
 
+	/* We would not operate in periodic command mode */
+	pipe_config->hw.adjusted_mode.private_flags &=
+					~I915_MODE_FLAG_DSI_PERIODIC_CMD_MODE;
+
+	/*
+	 * In case of TE GATE cmd mode, we
+	 * receive TE from the slave if
+	 * dual link is enabled
+	 */
+	if (is_cmd_mode(intel_dsi)) {
+		if (intel_dsi->ports == (BIT(PORT_B) | BIT(PORT_A)))
+			pipe_config->hw.adjusted_mode.private_flags |=
+						I915_MODE_FLAG_DSI_USE_TE1 |
+						I915_MODE_FLAG_DSI_USE_TE0;
+		else if (intel_dsi->ports == BIT(PORT_B))
+			pipe_config->hw.adjusted_mode.private_flags |=
+						I915_MODE_FLAG_DSI_USE_TE1;
+		else
+			pipe_config->hw.adjusted_mode.private_flags |=
+						I915_MODE_FLAG_DSI_USE_TE0;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index 457b258683d3..79032701873a 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -125,7 +125,7 @@ intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
 	struct intel_plane_state *plane_state = to_intel_plane_state(state);
-	WARN_ON(plane_state->vma);
+	drm_WARN_ON(plane->dev, plane_state->vma);
 
 	__drm_atomic_helper_plane_destroy_state(&plane_state->uapi);
 	if (plane_state->hw.fb)
@@ -264,6 +264,20 @@ void intel_plane_copy_uapi_to_hw_state(struct intel_plane_state *plane_state,
 	plane_state->hw.color_range = from_plane_state->uapi.color_range;
 }
 
+void intel_plane_set_invisible(struct intel_crtc_state *crtc_state,
+			       struct intel_plane_state *plane_state)
+{
+	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
+
+	crtc_state->active_planes &= ~BIT(plane->id);
+	crtc_state->nv12_planes &= ~BIT(plane->id);
+	crtc_state->c8_planes &= ~BIT(plane->id);
+	crtc_state->data_rate[plane->id] = 0;
+	crtc_state->min_cdclk[plane->id] = 0;
+
+	plane_state->uapi.visible = false;
+}
+
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
 					struct intel_crtc_state *new_crtc_state,
 					const struct intel_plane_state *old_plane_state,
@@ -273,12 +287,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	const struct drm_framebuffer *fb = new_plane_state->hw.fb;
 	int ret;
 
-	new_crtc_state->active_planes &= ~BIT(plane->id);
-	new_crtc_state->nv12_planes &= ~BIT(plane->id);
-	new_crtc_state->c8_planes &= ~BIT(plane->id);
-	new_crtc_state->data_rate[plane->id] = 0;
-	new_crtc_state->min_cdclk[plane->id] = 0;
-	new_plane_state->uapi.visible = false;
+	intel_plane_set_invisible(new_crtc_state, new_plane_state);
 
 	if (!new_plane_state->hw.crtc && !old_plane_state->hw.crtc)
 		return 0;
@@ -387,7 +396,7 @@ skl_next_plane_to_commit(struct intel_atomic_state *state,
 	}
 
 	/* should never happen */
-	WARN_ON(1);
+	drm_WARN_ON(state->base.dev, 1);
 
 	return NULL;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h
index a6bbf42bae1f..59dd1fbb02ea 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h
@@ -52,5 +52,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
 int intel_plane_calc_min_cdclk(struct intel_atomic_state *state,
 			       struct intel_plane *plane,
 			       bool *need_cdclk_calc);
+void intel_plane_set_invisible(struct intel_crtc_state *crtc_state,
+			       struct intel_plane_state *plane_state);
 
 #endif /* __INTEL_ATOMIC_PLANE_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index 62f234f641de..ad4aa66fd676 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -252,14 +252,16 @@ static u32 audio_config_hdmi_pixel_clock(const struct intel_crtc_state *crtc_sta
 		i = ARRAY_SIZE(hdmi_audio_clock);
 
 	if (i == ARRAY_SIZE(hdmi_audio_clock)) {
-		DRM_DEBUG_KMS("HDMI audio pixel clock setting for %d not found, falling back to defaults\n",
-			      adjusted_mode->crtc_clock);
+		drm_dbg_kms(&dev_priv->drm,
+			    "HDMI audio pixel clock setting for %d not found, falling back to defaults\n",
+			    adjusted_mode->crtc_clock);
 		i = 1;
 	}
 
-	DRM_DEBUG_KMS("Configuring HDMI audio for pixel clock %d (0x%08x)\n",
-		      hdmi_audio_clock[i].clock,
-		      hdmi_audio_clock[i].config);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Configuring HDMI audio for pixel clock %d (0x%08x)\n",
+		    hdmi_audio_clock[i].clock,
+		    hdmi_audio_clock[i].config);
 
 	return hdmi_audio_clock[i].config;
 }
@@ -512,6 +514,124 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder,
 	mutex_unlock(&dev_priv->av_mutex);
 }
 
+static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
+					   const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	unsigned int link_clks_available, link_clks_required;
+	unsigned int tu_data, tu_line, link_clks_active;
+	unsigned int h_active, h_total, hblank_delta, pixel_clk;
+	unsigned int fec_coeff, cdclk, vdsc_bpp;
+	unsigned int link_clk, lanes;
+	unsigned int hblank_rise;
+
+	h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay;
+	h_total = crtc_state->hw.adjusted_mode.crtc_htotal;
+	pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock;
+	vdsc_bpp = crtc_state->dsc.compressed_bpp;
+	cdclk = i915->cdclk.hw.cdclk;
+	/* fec= 0.972261, using rounding multiplier of 1000000 */
+	fec_coeff = 972261;
+	link_clk = crtc_state->port_clock;
+	lanes = crtc_state->lane_count;
+
+	drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :"
+		    "lanes = %u vdsc_bpp = %u cdclk = %u\n",
+		    h_active, link_clk, lanes, vdsc_bpp, cdclk);
+
+	if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk))
+		return 0;
+
+	link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28;
+	link_clks_required = DIV_ROUND_UP(192000 * h_total, 1000 * pixel_clk) * (48 / lanes + 2);
+
+	if (link_clks_available > link_clks_required)
+		hblank_delta = 32;
+	else
+		hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk),
+						  mul_u32_u32(link_clk, cdclk));
+
+	tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000),
+			    mul_u32_u32(link_clk * lanes, fec_coeff));
+	tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff),
+			    mul_u32_u32(64 * pixel_clk, 1000000));
+	link_clks_active  = (tu_line - 1) * 64 + tu_data;
+
+	hblank_rise = (link_clks_active + 6 * DIV_ROUND_UP(link_clks_active, 250) + 4) * pixel_clk / link_clk;
+
+	return h_active - hblank_rise + hblank_delta;
+}
+
+static unsigned int calc_samples_room(const struct intel_crtc_state *crtc_state)
+{
+	unsigned int h_active, h_total, pixel_clk;
+	unsigned int link_clk, lanes;
+
+	h_active = crtc_state->hw.adjusted_mode.hdisplay;
+	h_total = crtc_state->hw.adjusted_mode.htotal;
+	pixel_clk = crtc_state->hw.adjusted_mode.clock;
+	link_clk = crtc_state->port_clock;
+	lanes = crtc_state->lane_count;
+
+	return ((h_total - h_active) * link_clk - 12 * pixel_clk) /
+		(pixel_clk * (48 / lanes + 2));
+}
+
+static void enable_audio_dsc_wa(struct intel_encoder *encoder,
+				const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	enum pipe pipe = crtc->pipe;
+	unsigned int hblank_early_prog, samples_room;
+	unsigned int val;
+
+	if (INTEL_GEN(i915) < 11)
+		return;
+
+	val = intel_de_read(i915, AUD_CONFIG_BE);
+
+	if (INTEL_GEN(i915) == 11)
+		val |= HBLANK_EARLY_ENABLE_ICL(pipe);
+	else if (INTEL_GEN(i915) >= 12)
+		val |= HBLANK_EARLY_ENABLE_TGL(pipe);
+
+	if (crtc_state->dsc.compression_enable &&
+	    (crtc_state->hw.adjusted_mode.hdisplay >= 3840 &&
+	    crtc_state->hw.adjusted_mode.vdisplay >= 2160)) {
+		/* Get hblank early enable value required */
+		hblank_early_prog = calc_hblank_early_prog(encoder, crtc_state);
+		if (hblank_early_prog < 32) {
+			val &= ~HBLANK_START_COUNT_MASK(pipe);
+			val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_32);
+		} else if (hblank_early_prog < 64) {
+			val &= ~HBLANK_START_COUNT_MASK(pipe);
+			val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_64);
+		} else if (hblank_early_prog < 96) {
+			val &= ~HBLANK_START_COUNT_MASK(pipe);
+			val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_96);
+		} else {
+			val &= ~HBLANK_START_COUNT_MASK(pipe);
+			val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_128);
+		}
+
+		/* Get samples room value required */
+		samples_room = calc_samples_room(crtc_state);
+		if (samples_room < 3) {
+			val &= ~NUMBER_SAMPLES_PER_LINE_MASK(pipe);
+			val |= NUMBER_SAMPLES_PER_LINE(pipe, samples_room);
+		} else {
+			/* Program 0 i.e "All Samples available in buffer" */
+			val &= ~NUMBER_SAMPLES_PER_LINE_MASK(pipe);
+			val |= NUMBER_SAMPLES_PER_LINE(pipe, 0x0);
+		}
+	}
+
+	intel_de_write(i915, AUD_CONFIG_BE, val);
+}
+
+#undef ROUNDING_FACTOR
+
 static void hsw_audio_codec_enable(struct intel_encoder *encoder,
 				   const struct intel_crtc_state *crtc_state,
 				   const struct drm_connector_state *conn_state)
@@ -529,6 +649,10 @@ static void hsw_audio_codec_enable(struct intel_encoder *encoder,
 
 	mutex_lock(&dev_priv->av_mutex);
 
+	/* Enable Audio WA for 4k DSC usecases */
+	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP))
+		enable_audio_dsc_wa(encoder, crtc_state);
+
 	/* Enable audio presence detect, invalidate ELD */
 	tmp = intel_de_read(dev_priv, HSW_AUD_PIN_ELD_CP_VLD);
 	tmp |= AUDIO_OUTPUT_ENABLE(cpu_transcoder);
@@ -891,7 +1015,7 @@ static unsigned long i915_audio_component_get_power(struct device *kdev)
 	ret = intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
 
 	if (dev_priv->audio_power_refcount++ == 0) {
-		if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) {
+		if (INTEL_GEN(dev_priv) >= 9) {
 			intel_de_write(dev_priv, AUD_FREQ_CNTRL,
 				       dev_priv->audio_freq_cntrl);
 			drm_dbg_kms(&dev_priv->drm,
@@ -931,7 +1055,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev,
 	unsigned long cookie;
 	u32 tmp;
 
-	if (!IS_GEN(dev_priv, 9))
+	if (INTEL_GEN(dev_priv) < 9)
 		return;
 
 	cookie = i915_audio_component_get_power(kdev);
@@ -1136,6 +1260,10 @@ static void i915_audio_component_unbind(struct device *i915_kdev,
 	drm_modeset_unlock_all(&dev_priv->drm);
 
 	device_link_remove(hda_kdev, i915_kdev);
+
+	if (dev_priv->audio_power_refcount)
+		drm_err(&dev_priv->drm, "audio power refcount %d after unbind\n",
+			dev_priv->audio_power_refcount);
 }
 
 static const struct component_ops i915_audio_component_bind_ops = {
@@ -1173,7 +1301,7 @@ static void i915_audio_component_init(struct drm_i915_private *dev_priv)
 		return;
 	}
 
-	if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) {
+	if (INTEL_GEN(dev_priv) >= 9) {
 		dev_priv->audio_freq_cntrl = intel_de_read(dev_priv,
 							   AUD_FREQ_CNTRL);
 		drm_dbg_kms(&dev_priv->drm,
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index 58b264bc318d..fef04e2d954e 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -8,6 +8,9 @@
 #include "intel_bw.h"
 #include "intel_display_types.h"
 #include "intel_sideband.h"
+#include "intel_atomic.h"
+#include "intel_pm.h"
+
 
 /* Parameters for Qclk Geyserville (QGV) */
 struct intel_qgv_point {
@@ -113,6 +116,26 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
 	return 0;
 }
 
+int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv,
+				  u32 points_mask)
+{
+	int ret;
+
+	/* bspec says to keep retrying for at least 1 ms */
+	ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG,
+				points_mask,
+				ICL_PCODE_POINTS_RESTRICTED_MASK,
+				ICL_PCODE_POINTS_RESTRICTED,
+				1);
+
+	if (ret < 0) {
+		drm_err(&dev_priv->drm, "Failed to disable qgv points (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
 			      struct intel_qgv_info *qi)
 {
@@ -240,6 +263,16 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel
 			break;
 	}
 
+	/*
+	 * In case if SAGV is disabled in BIOS, we always get 1
+	 * SAGV point, but we can't send PCode commands to restrict it
+	 * as it will fail and pointless anyway.
+	 */
+	if (qi.num_points == 1)
+		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
+	else
+		dev_priv->sagv_status = I915_SAGV_ENABLED;
+
 	return 0;
 }
 
@@ -248,6 +281,11 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
 {
 	int i;
 
+	/*
+	 * Let's return max bw for 0 planes
+	 */
+	num_planes = max(1, num_planes);
+
 	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
 		const struct intel_bw_info *bi =
 			&dev_priv->max_bw[i];
@@ -277,34 +315,6 @@ void intel_bw_init_hw(struct drm_i915_private *dev_priv)
 		icl_get_bw_info(dev_priv, &icl_sa_info);
 }
 
-static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
-					int num_planes)
-{
-	if (INTEL_GEN(dev_priv) >= 11) {
-		/*
-		 * Any bw group has same amount of QGV points
-		 */
-		const struct intel_bw_info *bi =
-			&dev_priv->max_bw[0];
-		unsigned int min_bw = UINT_MAX;
-		int i;
-
-		/*
-		 * FIXME with SAGV disabled maybe we can assume
-		 * point 1 will always be used? Seems to match
-		 * the behaviour observed in the wild.
-		 */
-		for (i = 0; i < bi->num_qgv_points; i++) {
-			unsigned int bw = icl_max_bw(dev_priv, num_planes, i);
-
-			min_bw = min(bw, min_bw);
-		}
-		return min_bw;
-	} else {
-		return UINT_MAX;
-	}
-}
-
 static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
 {
 	/*
@@ -338,16 +348,17 @@ void intel_bw_crtc_update(struct intel_bw_state *bw_state,
 			  const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 
 	bw_state->data_rate[crtc->pipe] =
 		intel_bw_crtc_data_rate(crtc_state);
 	bw_state->num_active_planes[crtc->pipe] =
 		intel_bw_crtc_num_active_planes(crtc_state);
 
-	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
-		      pipe_name(crtc->pipe),
-		      bw_state->data_rate[crtc->pipe],
-		      bw_state->num_active_planes[crtc->pipe]);
+	drm_dbg_kms(&i915->drm, "pipe %c data rate %u num active planes %u\n",
+		    pipe_name(crtc->pipe),
+		    bw_state->data_rate[crtc->pipe],
+		    bw_state->num_active_planes[crtc->pipe]);
 }
 
 static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
@@ -374,7 +385,29 @@ static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
 	return data_rate;
 }
 
-static struct intel_bw_state *
+struct intel_bw_state *
+intel_atomic_get_old_bw_state(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_global_state *bw_state;
+
+	bw_state = intel_atomic_get_old_global_obj_state(state, &dev_priv->bw_obj);
+
+	return to_intel_bw_state(bw_state);
+}
+
+struct intel_bw_state *
+intel_atomic_get_new_bw_state(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_global_state *bw_state;
+
+	bw_state = intel_atomic_get_new_global_obj_state(state, &dev_priv->bw_obj);
+
+	return to_intel_bw_state(bw_state);
+}
+
+struct intel_bw_state *
 intel_atomic_get_bw_state(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
@@ -391,11 +424,16 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
-	struct intel_bw_state *bw_state = NULL;
-	unsigned int data_rate, max_data_rate;
+	struct intel_bw_state *new_bw_state = NULL;
+	const struct intel_bw_state *old_bw_state = NULL;
+	unsigned int data_rate;
 	unsigned int num_active_planes;
 	struct intel_crtc *crtc;
 	int i, ret;
+	u32 allowed_points = 0;
+	unsigned int max_bw_point = 0, max_bw = 0;
+	unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points;
+	u32 mask = (1 << num_qgv_points) - 1;
 
 	/* FIXME earlier gens need some checks too */
 	if (INTEL_GEN(dev_priv) < 11)
@@ -420,41 +458,93 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
 		    old_active_planes == new_active_planes)
 			continue;
 
-		bw_state  = intel_atomic_get_bw_state(state);
-		if (IS_ERR(bw_state))
-			return PTR_ERR(bw_state);
+		new_bw_state = intel_atomic_get_bw_state(state);
+		if (IS_ERR(new_bw_state))
+			return PTR_ERR(new_bw_state);
 
-		bw_state->data_rate[crtc->pipe] = new_data_rate;
-		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
+		new_bw_state->data_rate[crtc->pipe] = new_data_rate;
+		new_bw_state->num_active_planes[crtc->pipe] = new_active_planes;
 
 		drm_dbg_kms(&dev_priv->drm,
 			    "pipe %c data rate %u num active planes %u\n",
 			    pipe_name(crtc->pipe),
-			    bw_state->data_rate[crtc->pipe],
-			    bw_state->num_active_planes[crtc->pipe]);
+			    new_bw_state->data_rate[crtc->pipe],
+			    new_bw_state->num_active_planes[crtc->pipe]);
 	}
 
-	if (!bw_state)
+	if (!new_bw_state)
 		return 0;
 
-	ret = intel_atomic_lock_global_state(&bw_state->base);
+	ret = intel_atomic_lock_global_state(&new_bw_state->base);
 	if (ret)
 		return ret;
 
-	data_rate = intel_bw_data_rate(dev_priv, bw_state);
-	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
+	data_rate = intel_bw_data_rate(dev_priv, new_bw_state);
+	data_rate = DIV_ROUND_UP(data_rate, 1000);
 
-	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
+	num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state);
 
-	data_rate = DIV_ROUND_UP(data_rate, 1000);
+	for (i = 0; i < num_qgv_points; i++) {
+		unsigned int max_data_rate;
 
-	if (data_rate > max_data_rate) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
-			    data_rate, max_data_rate, num_active_planes);
+		max_data_rate = icl_max_bw(dev_priv, num_active_planes, i);
+		/*
+		 * We need to know which qgv point gives us
+		 * maximum bandwidth in order to disable SAGV
+		 * if we find that we exceed SAGV block time
+		 * with watermarks. By that moment we already
+		 * have those, as it is calculated earlier in
+		 * intel_atomic_check,
+		 */
+		if (max_data_rate > max_bw) {
+			max_bw_point = i;
+			max_bw = max_data_rate;
+		}
+		if (max_data_rate >= data_rate)
+			allowed_points |= BIT(i);
+		drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n",
+			    i, max_data_rate, data_rate);
+	}
+
+	/*
+	 * BSpec states that we always should have at least one allowed point
+	 * left, so if we couldn't - simply reject the configuration for obvious
+	 * reasons.
+	 */
+	if (allowed_points == 0) {
+		drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory"
+			    " bandwidth %d for display configuration(%d active planes).\n",
+			    data_rate, num_active_planes);
 		return -EINVAL;
 	}
 
+	/*
+	 * Leave only single point with highest bandwidth, if
+	 * we can't enable SAGV due to the increased memory latency it may
+	 * cause.
+	 */
+	if (!intel_can_enable_sagv(dev_priv, new_bw_state)) {
+		allowed_points = BIT(max_bw_point);
+		drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n",
+			    max_bw_point);
+	}
+	/*
+	 * We store the ones which need to be masked as that is what PCode
+	 * actually accepts as a parameter.
+	 */
+	new_bw_state->qgv_points_mask = ~allowed_points & mask;
+
+	old_bw_state = intel_atomic_get_old_bw_state(state);
+	/*
+	 * If the actual mask had changed we need to make sure that
+	 * the commits are serialized(in case this is a nomodeset, nonblocking)
+	 */
+	if (new_bw_state->qgv_points_mask != old_bw_state->qgv_points_mask) {
+		ret = intel_atomic_serialize_global_state(&new_bw_state->base);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
index a8aa7624c5aa..bbcaaa73ec1b 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.h
+++ b/drivers/gpu/drm/i915/display/intel_bw.h
@@ -18,16 +18,43 @@ struct intel_crtc_state;
 struct intel_bw_state {
 	struct intel_global_state base;
 
+	/*
+	 * Contains a bit mask, used to determine, whether correspondent
+	 * pipe allows SAGV or not.
+	 */
+	u8 pipe_sagv_reject;
+
+	/*
+	 * Current QGV points mask, which restricts
+	 * some particular SAGV states, not to confuse
+	 * with pipe_sagv_mask.
+	 */
+	u8 qgv_points_mask;
+
 	unsigned int data_rate[I915_MAX_PIPES];
 	u8 num_active_planes[I915_MAX_PIPES];
+
+	/* bitmask of active pipes */
+	u8 active_pipes;
 };
 
 #define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
 
+struct intel_bw_state *
+intel_atomic_get_old_bw_state(struct intel_atomic_state *state);
+
+struct intel_bw_state *
+intel_atomic_get_new_bw_state(struct intel_atomic_state *state);
+
+struct intel_bw_state *
+intel_atomic_get_bw_state(struct intel_atomic_state *state);
+
 void intel_bw_init_hw(struct drm_i915_private *dev_priv);
 int intel_bw_init(struct drm_i915_private *dev_priv);
 int intel_bw_atomic_check(struct intel_atomic_state *state);
 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
 			  const struct intel_crtc_state *crtc_state);
+int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv,
+				  u32 points_mask);
 
 #endif /* __INTEL_BW_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index c1cce93a1c25..98ece9cd7cdd 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -460,6 +460,16 @@ static void ilk_lut_10_pack(struct drm_color_lut *entry, u32 val)
 	entry->blue = intel_color_lut_pack(REG_FIELD_GET(PREC_PALETTE_BLUE_MASK, val), 10);
 }
 
+static void icl_lut_multi_seg_pack(struct drm_color_lut *entry, u32 ldw, u32 udw)
+{
+	entry->red = REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_UDW_MASK, udw) << 6 |
+				   REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_LDW_MASK, ldw);
+	entry->green = REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_UDW_MASK, udw) << 6 |
+				     REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_LDW_MASK, ldw);
+	entry->blue = REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_UDW_MASK, udw) << 6 |
+				    REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_LDW_MASK, ldw);
+}
+
 static void i9xx_color_commit(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
@@ -893,7 +903,7 @@ icl_load_gcmax(const struct intel_crtc_state *crtc_state,
 	struct intel_dsb *dsb = intel_dsb_get(crtc);
 	enum pipe pipe = crtc->pipe;
 
-	/* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */
+	/* FIXME LUT entries are 16 bit only, so we can prog 0xFFFF max */
 	intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 0), color->red);
 	intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 1), color->green);
 	intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 2), color->blue);
@@ -1630,6 +1640,24 @@ static int glk_gamma_precision(const struct intel_crtc_state *crtc_state)
 	}
 }
 
+static int icl_gamma_precision(const struct intel_crtc_state *crtc_state)
+{
+	if ((crtc_state->gamma_mode & POST_CSC_GAMMA_ENABLE) == 0)
+		return 0;
+
+	switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) {
+	case GAMMA_MODE_MODE_8BIT:
+		return 8;
+	case GAMMA_MODE_MODE_10BIT:
+		return 10;
+	case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
+		return 16;
+	default:
+		MISSING_CASE(crtc_state->gamma_mode);
+		return 0;
+	}
+}
+
 int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
@@ -1641,7 +1669,9 @@ int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_stat
 		else
 			return i9xx_gamma_precision(crtc_state);
 	} else {
-		if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
+		if (INTEL_GEN(dev_priv) >= 11)
+			return icl_gamma_precision(crtc_state);
+		else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
 			return glk_gamma_precision(crtc_state);
 		else if (IS_IRONLAKE(dev_priv))
 			return ilk_gamma_precision(crtc_state);
@@ -1658,9 +1688,9 @@ static bool err_check(struct drm_color_lut *lut1,
 		((abs((long)lut2->green - lut1->green)) <= err);
 }
 
-static bool intel_color_lut_entry_equal(struct drm_color_lut *lut1,
-					struct drm_color_lut *lut2,
-					int lut_size, u32 err)
+static bool intel_color_lut_entries_equal(struct drm_color_lut *lut1,
+					  struct drm_color_lut *lut2,
+					  int lut_size, u32 err)
 {
 	int i;
 
@@ -1690,16 +1720,8 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1,
 	lut_size2 = drm_color_lut_size(blob2);
 
 	/* check sw and hw lut size */
-	switch (gamma_mode) {
-	case GAMMA_MODE_MODE_8BIT:
-	case GAMMA_MODE_MODE_10BIT:
-		if (lut_size1 != lut_size2)
-			return false;
-		break;
-	default:
-		MISSING_CASE(gamma_mode);
-			return false;
-	}
+	if (lut_size1 != lut_size2)
+		return false;
 
 	lut1 = blob1->data;
 	lut2 = blob2->data;
@@ -1707,11 +1729,16 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1,
 	err = 0xffff >> bit_precision;
 
 	/* check sw and hw lut entry to be equal */
-	switch (gamma_mode) {
+	switch (gamma_mode & GAMMA_MODE_MODE_MASK) {
 	case GAMMA_MODE_MODE_8BIT:
 	case GAMMA_MODE_MODE_10BIT:
-		if (!intel_color_lut_entry_equal(lut1, lut2,
-						 lut_size2, err))
+		if (!intel_color_lut_entries_equal(lut1, lut2,
+						   lut_size2, err))
+			return false;
+		break;
+	case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
+		if (!intel_color_lut_entries_equal(lut1, lut2,
+						   9, err))
 			return false;
 		break;
 	default:
@@ -1946,6 +1973,63 @@ static void glk_read_luts(struct intel_crtc_state *crtc_state)
 		crtc_state->hw.gamma_lut = glk_read_lut_10(crtc, PAL_PREC_INDEX_VALUE(0));
 }
 
+static struct drm_property_blob *
+icl_read_lut_multi_segment(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size;
+	enum pipe pipe = crtc->pipe;
+	struct drm_property_blob *blob;
+	struct drm_color_lut *lut;
+
+	blob = drm_property_create_blob(&dev_priv->drm,
+					sizeof(struct drm_color_lut) * lut_size,
+					NULL);
+	if (IS_ERR(blob))
+		return NULL;
+
+	lut = blob->data;
+
+	intel_de_write(dev_priv, PREC_PAL_MULTI_SEG_INDEX(pipe),
+		       PAL_PREC_AUTO_INCREMENT);
+
+	for (i = 0; i < 9; i++) {
+		u32 ldw = intel_de_read(dev_priv, PREC_PAL_MULTI_SEG_DATA(pipe));
+		u32 udw = intel_de_read(dev_priv, PREC_PAL_MULTI_SEG_DATA(pipe));
+
+		icl_lut_multi_seg_pack(&lut[i], ldw, udw);
+	}
+
+	intel_de_write(dev_priv, PREC_PAL_MULTI_SEG_INDEX(pipe), 0);
+
+	/*
+	 * FIXME readouts from PAL_PREC_DATA register aren't giving
+	 * correct values in the case of fine and coarse segments.
+	 * Restricting readouts only for super fine segment as of now.
+	 */
+
+	return blob;
+}
+
+static void icl_read_luts(struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+
+	if ((crtc_state->gamma_mode & POST_CSC_GAMMA_ENABLE) == 0)
+		return;
+
+	switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) {
+	case GAMMA_MODE_MODE_8BIT:
+		crtc_state->hw.gamma_lut = ilk_read_lut_8(crtc);
+		break;
+	case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
+		crtc_state->hw.gamma_lut = icl_read_lut_multi_segment(crtc);
+		break;
+	default:
+		crtc_state->hw.gamma_lut = glk_read_lut_10(crtc, PAL_PREC_INDEX_VALUE(0));
+	}
+}
+
 void intel_color_init(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -1989,6 +2073,7 @@ void intel_color_init(struct intel_crtc *crtc)
 
 		if (INTEL_GEN(dev_priv) >= 11) {
 			dev_priv->display.load_luts = icl_load_luts;
+			dev_priv->display.read_luts = icl_read_luts;
 		} else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) {
 			dev_priv->display.load_luts = glk_load_luts;
 			dev_priv->display.read_luts = glk_read_luts;
diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c
index 903e49659f56..406e96785c76 100644
--- a/drivers/gpu/drm/i915/display/intel_connector.c
+++ b/drivers/gpu/drm/i915/display/intel_connector.c
@@ -33,6 +33,7 @@
 
 #include "i915_drv.h"
 #include "intel_connector.h"
+#include "intel_display_debugfs.h"
 #include "intel_display_types.h"
 #include "intel_hdcp.h"
 
@@ -123,6 +124,8 @@ int intel_connector_register(struct drm_connector *connector)
 		goto err_backlight;
 	}
 
+	intel_connector_debugfs_add(connector);
+
 	return 0;
 
 err_backlight:
@@ -290,7 +293,7 @@ intel_attach_colorspace_property(struct drm_connector *connector)
 			return;
 		break;
 	default:
-		DRM_DEBUG_KMS("Colorspace property not supported\n");
+		MISSING_CASE(connector->connector_type);
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index 78f9b6cde810..2f5b9a4baafd 100644
--- a/drivers/gpu/drm/i915/display/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -203,27 +203,31 @@ static void intel_crt_set_dpms(struct intel_encoder *encoder,
 	intel_de_write(dev_priv, crt->adpa_reg, adpa);
 }
 
-static void intel_disable_crt(struct intel_encoder *encoder,
+static void intel_disable_crt(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
 	intel_crt_set_dpms(encoder, old_crtc_state, DRM_MODE_DPMS_OFF);
 }
 
-static void pch_disable_crt(struct intel_encoder *encoder,
+static void pch_disable_crt(struct intel_atomic_state *state,
+			    struct intel_encoder *encoder,
 			    const struct intel_crtc_state *old_crtc_state,
 			    const struct drm_connector_state *old_conn_state)
 {
 }
 
-static void pch_post_disable_crt(struct intel_encoder *encoder,
+static void pch_post_disable_crt(struct intel_atomic_state *state,
+				 struct intel_encoder *encoder,
 				 const struct intel_crtc_state *old_crtc_state,
 				 const struct drm_connector_state *old_conn_state)
 {
-	intel_disable_crt(encoder, old_crtc_state, old_conn_state);
+	intel_disable_crt(state, encoder, old_crtc_state, old_conn_state);
 }
 
-static void hsw_disable_crt(struct intel_encoder *encoder,
+static void hsw_disable_crt(struct intel_atomic_state *state,
+			    struct intel_encoder *encoder,
 			    const struct intel_crtc_state *old_crtc_state,
 			    const struct drm_connector_state *old_conn_state)
 {
@@ -234,7 +238,8 @@ static void hsw_disable_crt(struct intel_encoder *encoder,
 	intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
 }
 
-static void hsw_post_disable_crt(struct intel_encoder *encoder,
+static void hsw_post_disable_crt(struct intel_atomic_state *state,
+				 struct intel_encoder *encoder,
 				 const struct intel_crtc_state *old_crtc_state,
 				 const struct drm_connector_state *old_conn_state)
 {
@@ -250,19 +255,20 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder,
 
 	intel_ddi_disable_pipe_clock(old_crtc_state);
 
-	pch_post_disable_crt(encoder, old_crtc_state, old_conn_state);
+	pch_post_disable_crt(state, encoder, old_crtc_state, old_conn_state);
 
 	lpt_disable_pch_transcoder(dev_priv);
 	lpt_disable_iclkip(dev_priv);
 
-	intel_ddi_fdi_post_disable(encoder, old_crtc_state, old_conn_state);
+	intel_ddi_fdi_post_disable(state, encoder, old_crtc_state, old_conn_state);
 
 	drm_WARN_ON(&dev_priv->drm, !old_crtc_state->has_pch_encoder);
 
 	intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
 }
 
-static void hsw_pre_pll_enable_crt(struct intel_encoder *encoder,
+static void hsw_pre_pll_enable_crt(struct intel_atomic_state *state,
+				   struct intel_encoder *encoder,
 				   const struct intel_crtc_state *crtc_state,
 				   const struct drm_connector_state *conn_state)
 {
@@ -273,7 +279,8 @@ static void hsw_pre_pll_enable_crt(struct intel_encoder *encoder,
 	intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
 }
 
-static void hsw_pre_enable_crt(struct intel_encoder *encoder,
+static void hsw_pre_enable_crt(struct intel_atomic_state *state,
+			       struct intel_encoder *encoder,
 			       const struct intel_crtc_state *crtc_state,
 			       const struct drm_connector_state *conn_state)
 {
@@ -287,10 +294,11 @@ static void hsw_pre_enable_crt(struct intel_encoder *encoder,
 
 	hsw_fdi_link_train(encoder, crtc_state);
 
-	intel_ddi_enable_pipe_clock(crtc_state);
+	intel_ddi_enable_pipe_clock(encoder, crtc_state);
 }
 
-static void hsw_enable_crt(struct intel_encoder *encoder,
+static void hsw_enable_crt(struct intel_atomic_state *state,
+			   struct intel_encoder *encoder,
 			   const struct intel_crtc_state *crtc_state,
 			   const struct drm_connector_state *conn_state)
 {
@@ -300,6 +308,8 @@ static void hsw_enable_crt(struct intel_encoder *encoder,
 
 	drm_WARN_ON(&dev_priv->drm, !crtc_state->has_pch_encoder);
 
+	intel_ddi_enable_transcoder_func(encoder, crtc_state);
+
 	intel_enable_pipe(crtc_state);
 
 	lpt_pch_enable(crtc_state);
@@ -314,7 +324,8 @@ static void hsw_enable_crt(struct intel_encoder *encoder,
 	intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
 }
 
-static void intel_enable_crt(struct intel_encoder *encoder,
+static void intel_enable_crt(struct intel_atomic_state *state,
+			     struct intel_encoder *encoder,
 			     const struct intel_crtc_state *crtc_state,
 			     const struct drm_connector_state *conn_state)
 {
@@ -594,7 +605,8 @@ static struct edid *intel_crt_get_edid(struct drm_connector *connector,
 	edid = drm_get_edid(connector, i2c);
 
 	if (!edid && !intel_gmbus_is_forced_bit(i2c)) {
-		DRM_DEBUG_KMS("CRT GMBUS EDID read failed, retry using GPIO bit-banging\n");
+		drm_dbg_kms(connector->dev,
+			    "CRT GMBUS EDID read failed, retry using GPIO bit-banging\n");
 		intel_gmbus_force_bit(i2c, true);
 		edid = drm_get_edid(connector, i2c);
 		intel_gmbus_force_bit(i2c, false);
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 52db7852827b..aa22465bb56e 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -568,7 +568,7 @@ static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_hdmi[] = {
 	{ 0x6, 0x7F, 0x35, 0x00, 0x0A },	/* 600   850      3.0   */
 };
 
-static const struct cnl_ddi_buf_trans ehl_combo_phy_ddi_translations_hbr2_hbr3[] = {
+static const struct cnl_ddi_buf_trans ehl_combo_phy_ddi_translations_dp[] = {
 						/* NT mV Trans mV db    */
 	{ 0xA, 0x33, 0x3F, 0x00, 0x00 },	/* 350   350      0.0   */
 	{ 0xA, 0x47, 0x36, 0x00, 0x09 },	/* 350   500      3.1   */
@@ -583,23 +583,51 @@ static const struct cnl_ddi_buf_trans ehl_combo_phy_ddi_translations_hbr2_hbr3[]
 };
 
 struct icl_mg_phy_ddi_buf_trans {
-	u32 cri_txdeemph_override_5_0;
 	u32 cri_txdeemph_override_11_6;
+	u32 cri_txdeemph_override_5_0;
 	u32 cri_txdeemph_override_17_12;
 };
 
-static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations[] = {
+static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations_rbr_hbr[] = {
+				/* Voltage swing  pre-emphasis */
+	{ 0x18, 0x00, 0x00 },	/* 0              0   */
+	{ 0x1D, 0x00, 0x05 },	/* 0              1   */
+	{ 0x24, 0x00, 0x0C },	/* 0              2   */
+	{ 0x2B, 0x00, 0x14 },	/* 0              3   */
+	{ 0x21, 0x00, 0x00 },	/* 1              0   */
+	{ 0x2B, 0x00, 0x08 },	/* 1              1   */
+	{ 0x30, 0x00, 0x0F },	/* 1              2   */
+	{ 0x31, 0x00, 0x03 },	/* 2              0   */
+	{ 0x34, 0x00, 0x0B },	/* 2              1   */
+	{ 0x3F, 0x00, 0x00 },	/* 3              0   */
+};
+
+static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations_hbr2_hbr3[] = {
 				/* Voltage swing  pre-emphasis */
-	{ 0x0, 0x1B, 0x00 },	/* 0              0   */
-	{ 0x0, 0x23, 0x08 },	/* 0              1   */
-	{ 0x0, 0x2D, 0x12 },	/* 0              2   */
-	{ 0x0, 0x00, 0x00 },	/* 0              3   */
-	{ 0x0, 0x23, 0x00 },	/* 1              0   */
-	{ 0x0, 0x2B, 0x09 },	/* 1              1   */
-	{ 0x0, 0x2E, 0x11 },	/* 1              2   */
-	{ 0x0, 0x2F, 0x00 },	/* 2              0   */
-	{ 0x0, 0x33, 0x0C },	/* 2              1   */
-	{ 0x0, 0x00, 0x00 },	/* 3              0   */
+	{ 0x18, 0x00, 0x00 },	/* 0              0   */
+	{ 0x1D, 0x00, 0x05 },	/* 0              1   */
+	{ 0x24, 0x00, 0x0C },	/* 0              2   */
+	{ 0x2B, 0x00, 0x14 },	/* 0              3   */
+	{ 0x26, 0x00, 0x00 },	/* 1              0   */
+	{ 0x2C, 0x00, 0x07 },	/* 1              1   */
+	{ 0x33, 0x00, 0x0C },	/* 1              2   */
+	{ 0x2E, 0x00, 0x00 },	/* 2              0   */
+	{ 0x36, 0x00, 0x09 },	/* 2              1   */
+	{ 0x3F, 0x00, 0x00 },	/* 3              0   */
+};
+
+static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations_hdmi[] = {
+				/* HDMI Preset	VS	Pre-emph */
+	{ 0x1A, 0x0, 0x0 },	/* 1		400mV	0dB */
+	{ 0x20, 0x0, 0x0 },	/* 2		500mV	0dB */
+	{ 0x29, 0x0, 0x0 },	/* 3		650mV	0dB */
+	{ 0x32, 0x0, 0x0 },	/* 4		800mV	0dB */
+	{ 0x3F, 0x0, 0x0 },	/* 5		1000mV	0dB */
+	{ 0x3A, 0x0, 0x5 },	/* 6		Full	-1.5 dB */
+	{ 0x39, 0x0, 0x6 },	/* 7		Full	-1.8 dB */
+	{ 0x38, 0x0, 0x7 },	/* 8		Full	-2 dB */
+	{ 0x37, 0x0, 0x8 },	/* 9		Full	-2.5 dB */
+	{ 0x36, 0x0, 0x9 },	/* 10		Full	-3 dB */
 };
 
 struct tgl_dkl_phy_ddi_buf_trans {
@@ -943,14 +971,29 @@ icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, int type, int rate,
 	return icl_combo_phy_ddi_translations_dp_hbr2;
 }
 
+static const struct icl_mg_phy_ddi_buf_trans *
+icl_get_mg_buf_trans(struct drm_i915_private *dev_priv, int type, int rate,
+		     int *n_entries)
+{
+	if (type == INTEL_OUTPUT_HDMI) {
+		*n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations_hdmi);
+		return icl_mg_phy_ddi_translations_hdmi;
+	} else if (rate > 270000) {
+		*n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations_hbr2_hbr3);
+		return icl_mg_phy_ddi_translations_hbr2_hbr3;
+	}
+
+	*n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations_rbr_hbr);
+	return icl_mg_phy_ddi_translations_rbr_hbr;
+}
+
 static const struct cnl_ddi_buf_trans *
 ehl_get_combo_buf_trans(struct drm_i915_private *dev_priv, int type, int rate,
 			int *n_entries)
 {
-	if (type != INTEL_OUTPUT_HDMI && type != INTEL_OUTPUT_EDP &&
-	    rate > 270000) {
-		*n_entries = ARRAY_SIZE(ehl_combo_phy_ddi_translations_hbr2_hbr3);
-		return ehl_combo_phy_ddi_translations_hbr2_hbr3;
+	if (type != INTEL_OUTPUT_HDMI && type != INTEL_OUTPUT_EDP) {
+		*n_entries = ARRAY_SIZE(ehl_combo_phy_ddi_translations_dp);
+		return ehl_combo_phy_ddi_translations_dp;
 	}
 
 	return icl_get_combo_buf_trans(dev_priv, type, rate, n_entries);
@@ -989,7 +1032,8 @@ static int intel_ddi_hdmi_level(struct intel_encoder *encoder)
 			icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI,
 						0, &n_entries);
 		else
-			n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations);
+			icl_get_mg_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 0,
+					     &n_entries);
 		default_entry = n_entries - 1;
 	} else if (IS_CANNONLAKE(dev_priv)) {
 		cnl_get_buf_trans_hdmi(dev_priv, &n_entries);
@@ -1103,7 +1147,8 @@ static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv,
 		if (intel_de_read(dev_priv, reg) & DDI_BUF_IS_IDLE)
 			return;
 	}
-	DRM_ERROR("Timeout waiting for DDI BUF %c idle bit\n", port_name(port));
+	drm_err(&dev_priv->drm, "Timeout waiting for DDI BUF %c idle bit\n",
+		port_name(port));
 }
 
 static u32 hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll)
@@ -1216,7 +1261,10 @@ void hsw_fdi_link_train(struct intel_encoder *encoder,
 	for (i = 0; i < ARRAY_SIZE(hsw_ddi_translations_fdi) * 2; i++) {
 		/* Configure DP_TP_CTL with auto-training */
 		intel_de_write(dev_priv, DP_TP_CTL(PORT_E),
-			       DP_TP_CTL_FDI_AUTOTRAIN | DP_TP_CTL_ENHANCED_FRAME_ENABLE | DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_ENABLE);
+			       DP_TP_CTL_FDI_AUTOTRAIN |
+			       DP_TP_CTL_ENHANCED_FRAME_ENABLE |
+			       DP_TP_CTL_LINK_TRAIN_PAT1 |
+			       DP_TP_CTL_ENABLE);
 
 		/* Configure and enable DDI_BUF_CTL for DDI E with next voltage.
 		 * DDI E does not support port reversal, the functionality is
@@ -1250,7 +1298,8 @@ void hsw_fdi_link_train(struct intel_encoder *encoder,
 
 		temp = intel_de_read(dev_priv, DP_TP_STATUS(PORT_E));
 		if (temp & DP_TP_STATUS_AUTOTRAIN_DONE) {
-			DRM_DEBUG_KMS("FDI link training done on step %d\n", i);
+			drm_dbg_kms(&dev_priv->drm,
+				    "FDI link training done on step %d\n", i);
 			break;
 		}
 
@@ -1259,7 +1308,7 @@ void hsw_fdi_link_train(struct intel_encoder *encoder,
 		 * Results in less fireworks from the state checker.
 		 */
 		if (i == ARRAY_SIZE(hsw_ddi_translations_fdi) * 2 - 1) {
-			DRM_ERROR("FDI link training failed!\n");
+			drm_err(&dev_priv->drm, "FDI link training failed!\n");
 			break;
 		}
 
@@ -1291,7 +1340,10 @@ void hsw_fdi_link_train(struct intel_encoder *encoder,
 
 	/* Enable normal pixel sending for FDI */
 	intel_de_write(dev_priv, DP_TP_CTL(PORT_E),
-		       DP_TP_CTL_FDI_AUTOTRAIN | DP_TP_CTL_LINK_TRAIN_NORMAL | DP_TP_CTL_ENHANCED_FRAME_ENABLE | DP_TP_CTL_ENABLE);
+		       DP_TP_CTL_FDI_AUTOTRAIN |
+		       DP_TP_CTL_LINK_TRAIN_NORMAL |
+		       DP_TP_CTL_ENHANCED_FRAME_ENABLE |
+		       DP_TP_CTL_ENABLE);
 }
 
 static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder)
@@ -1305,27 +1357,6 @@ static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder)
 	intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count);
 }
 
-static struct intel_encoder *
-intel_ddi_get_crtc_encoder(struct intel_crtc *crtc)
-{
-	struct drm_device *dev = crtc->base.dev;
-	struct intel_encoder *encoder, *ret = NULL;
-	int num_encoders = 0;
-
-	for_each_encoder_on_crtc(dev, &crtc->base, encoder) {
-		ret = encoder;
-		num_encoders++;
-	}
-
-	if (num_encoders != 1)
-		drm_WARN(dev, 1, "%d encoders on crtc for pipe %c\n",
-			 num_encoders,
-			 pipe_name(crtc->pipe));
-
-	BUG_ON(ret == NULL);
-	return ret;
-}
-
 static int icl_calc_tbt_pll_link(struct drm_i915_private *dev_priv,
 				 enum port port)
 {
@@ -1451,6 +1482,14 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
 	intel_de_write(dev_priv, TRANS_MSA_MISC(cpu_transcoder), temp);
 }
 
+static u32 bdw_trans_port_sync_master_select(enum transcoder master_transcoder)
+{
+	if (master_transcoder == TRANSCODER_EDP)
+		return 0;
+	else
+		return master_transcoder + 1;
+}
+
 /*
  * Returns the TRANS_DDI_FUNC_CTL value based on CRTC state.
  *
@@ -1458,10 +1497,10 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
  * intel_ddi_config_transcoder_func().
  */
 static u32
-intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state)
+intel_ddi_transcoder_func_reg_val_get(struct intel_encoder *encoder,
+				      const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
@@ -1551,20 +1590,46 @@ intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state)
 		temp |= DDI_PORT_WIDTH(crtc_state->lane_count);
 	}
 
+	if (IS_GEN_RANGE(dev_priv, 8, 10) &&
+	    crtc_state->master_transcoder != INVALID_TRANSCODER) {
+		u8 master_select =
+			bdw_trans_port_sync_master_select(crtc_state->master_transcoder);
+
+		temp |= TRANS_DDI_PORT_SYNC_ENABLE |
+			TRANS_DDI_PORT_SYNC_MASTER_SELECT(master_select);
+	}
+
 	return temp;
 }
 
-void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
+void intel_ddi_enable_transcoder_func(struct intel_encoder *encoder,
+				      const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
-	u32 temp;
+	u32 ctl;
+
+	if (INTEL_GEN(dev_priv) >= 11) {
+		enum transcoder master_transcoder = crtc_state->master_transcoder;
+		u32 ctl2 = 0;
+
+		if (master_transcoder != INVALID_TRANSCODER) {
+			u8 master_select =
+				bdw_trans_port_sync_master_select(master_transcoder);
+
+			ctl2 |= PORT_SYNC_MODE_ENABLE |
+				PORT_SYNC_MODE_MASTER_SELECT(master_select);
+		}
 
-	temp = intel_ddi_transcoder_func_reg_val_get(crtc_state);
+		intel_de_write(dev_priv,
+			       TRANS_DDI_FUNC_CTL2(cpu_transcoder), ctl2);
+	}
+
+	ctl = intel_ddi_transcoder_func_reg_val_get(encoder, crtc_state);
 	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))
-		temp |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC;
-	intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), temp);
+		ctl |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC;
+	intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), ctl);
 }
 
 /*
@@ -1572,16 +1637,17 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
  * bit.
  */
 static void
-intel_ddi_config_transcoder_func(const struct intel_crtc_state *crtc_state)
+intel_ddi_config_transcoder_func(struct intel_encoder *encoder,
+				 const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
-	u32 temp;
+	u32 ctl;
 
-	temp = intel_ddi_transcoder_func_reg_val_get(crtc_state);
-	temp &= ~TRANS_DDI_FUNC_ENABLE;
-	intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), temp);
+	ctl = intel_ddi_transcoder_func_reg_val_get(encoder, crtc_state);
+	ctl &= ~TRANS_DDI_FUNC_ENABLE;
+	intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), ctl);
 }
 
 void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state)
@@ -1589,24 +1655,35 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
-	u32 val;
+	u32 ctl;
+
+	if (INTEL_GEN(dev_priv) >= 11)
+		intel_de_write(dev_priv,
+			       TRANS_DDI_FUNC_CTL2(cpu_transcoder), 0);
 
-	val = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder));
-	val &= ~TRANS_DDI_FUNC_ENABLE;
+	ctl = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder));
+
+	ctl &= ~TRANS_DDI_FUNC_ENABLE;
+
+	if (IS_GEN_RANGE(dev_priv, 8, 10))
+		ctl &= ~(TRANS_DDI_PORT_SYNC_ENABLE |
+			 TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK);
 
 	if (INTEL_GEN(dev_priv) >= 12) {
 		if (!intel_dp_mst_is_master_trans(crtc_state)) {
-			val &= ~(TGL_TRANS_DDI_PORT_MASK |
+			ctl &= ~(TGL_TRANS_DDI_PORT_MASK |
 				 TRANS_DDI_MODE_SELECT_MASK);
 		}
 	} else {
-		val &= ~(TRANS_DDI_PORT_MASK | TRANS_DDI_MODE_SELECT_MASK);
+		ctl &= ~(TRANS_DDI_PORT_MASK | TRANS_DDI_MODE_SELECT_MASK);
 	}
-	intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), val);
+
+	intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), ctl);
 
 	if (dev_priv->quirks & QUIRK_INCREASE_DDI_DISABLED_TIME &&
 	    intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) {
-		DRM_DEBUG_KMS("Quirk Increase DDI disabled time\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Quirk Increase DDI disabled time\n");
 		/* Quirk time at 100ms for reliable operation */
 		msleep(100);
 	}
@@ -1667,7 +1744,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
 		goto out;
 	}
 
-	if (HAS_TRANSCODER_EDP(dev_priv) && port == PORT_A)
+	if (HAS_TRANSCODER(dev_priv, TRANSCODER_EDP) && port == PORT_A)
 		cpu_transcoder = TRANSCODER_EDP;
 	else
 		cpu_transcoder = (enum transcoder) pipe;
@@ -1729,7 +1806,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder,
 	if (!(tmp & DDI_BUF_CTL_ENABLE))
 		goto out;
 
-	if (HAS_TRANSCODER_EDP(dev_priv) && port == PORT_A) {
+	if (HAS_TRANSCODER(dev_priv, TRANSCODER_EDP) && port == PORT_A) {
 		tmp = intel_de_read(dev_priv,
 				    TRANS_DDI_FUNC_CTL(TRANSCODER_EDP));
 
@@ -1787,20 +1864,23 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder,
 	}
 
 	if (!*pipe_mask)
-		DRM_DEBUG_KMS("No pipe for [ENCODER:%d:%s] found\n",
-			      encoder->base.base.id, encoder->base.name);
+		drm_dbg_kms(&dev_priv->drm,
+			    "No pipe for [ENCODER:%d:%s] found\n",
+			    encoder->base.base.id, encoder->base.name);
 
 	if (!mst_pipe_mask && hweight8(*pipe_mask) > 1) {
-		DRM_DEBUG_KMS("Multiple pipes for [ENCODER:%d:%s] (pipe_mask %02x)\n",
-			      encoder->base.base.id, encoder->base.name,
-			      *pipe_mask);
+		drm_dbg_kms(&dev_priv->drm,
+			    "Multiple pipes for [ENCODER:%d:%s] (pipe_mask %02x)\n",
+			    encoder->base.base.id, encoder->base.name,
+			    *pipe_mask);
 		*pipe_mask = BIT(ffs(*pipe_mask) - 1);
 	}
 
 	if (mst_pipe_mask && mst_pipe_mask != *pipe_mask)
-		DRM_DEBUG_KMS("Conflicting MST and non-MST state for [ENCODER:%d:%s] (pipe_mask %02x mst_pipe_mask %02x)\n",
-			      encoder->base.base.id, encoder->base.name,
-			      *pipe_mask, mst_pipe_mask);
+		drm_dbg_kms(&dev_priv->drm,
+			    "Conflicting MST and non-MST state for [ENCODER:%d:%s] (pipe_mask %02x mst_pipe_mask %02x)\n",
+			    encoder->base.base.id, encoder->base.name,
+			    *pipe_mask, mst_pipe_mask);
 	else
 		*is_dp_mst = mst_pipe_mask;
 
@@ -1810,9 +1890,9 @@ out:
 		if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK |
 			    BXT_PHY_LANE_POWERDOWN_ACK |
 			    BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED)
-			DRM_ERROR("[ENCODER:%d:%s] enabled but PHY powered down? "
-				  "(PHY_CTL %08x)\n", encoder->base.base.id,
-				  encoder->base.name, tmp);
+			drm_err(&dev_priv->drm,
+				"[ENCODER:%d:%s] enabled but PHY powered down? (PHY_CTL %08x)\n",
+				encoder->base.base.id, encoder->base.name, tmp);
 	}
 
 	intel_display_power_put(dev_priv, encoder->power_domain, wakeref);
@@ -1834,7 +1914,7 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
 	return true;
 }
 
-static inline enum intel_display_power_domain
+static enum intel_display_power_domain
 intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port)
 {
 	/* CNL+ HW requires corresponding AUX IOs to be powered up for PSR with
@@ -1893,11 +1973,11 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder,
 					intel_dsc_power_domain(crtc_state));
 }
 
-void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state)
+void intel_ddi_enable_pipe_clock(struct intel_encoder *encoder,
+				 const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc);
 	enum port port = encoder->port;
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 
@@ -1978,7 +2058,7 @@ static void skl_ddi_set_iboost(struct intel_encoder *encoder,
 
 	/* Make sure that the requested I_boost is valid */
 	if (iboost && iboost != 0x1 && iboost != 0x3 && iboost != 0x7) {
-		DRM_ERROR("Invalid I_boost value %u\n", iboost);
+		drm_err(&dev_priv->drm, "Invalid I_boost value %u\n", iboost);
 		return;
 	}
 
@@ -2037,7 +2117,8 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder)
 			icl_get_combo_buf_trans(dev_priv, encoder->type,
 						intel_dp->link_rate, &n_entries);
 		else
-			n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations);
+			icl_get_mg_buf_trans(dev_priv, encoder->type,
+					     intel_dp->link_rate, &n_entries);
 	} else if (IS_CANNONLAKE(dev_priv)) {
 		if (encoder->type == INTEL_OUTPUT_EDP)
 			cnl_get_buf_trans_edp(dev_priv, &n_entries);
@@ -2237,7 +2318,9 @@ static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv,
 		return;
 
 	if (level >= n_entries) {
-		DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1);
+		drm_dbg_kms(&dev_priv->drm,
+			    "DDI translation not found for level %d. Using %d instead.",
+			    level, n_entries - 1);
 		level = n_entries - 1;
 	}
 
@@ -2350,21 +2433,28 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
 }
 
 static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
-					   int link_clock,
-					   u32 level)
+					   int link_clock, u32 level,
+					   enum intel_output_type type)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port);
 	const struct icl_mg_phy_ddi_buf_trans *ddi_translations;
 	u32 n_entries, val;
-	int ln;
+	int ln, rate = 0;
+
+	if (type != INTEL_OUTPUT_HDMI) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+		rate = intel_dp->link_rate;
+	}
 
-	n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations);
-	ddi_translations = icl_mg_phy_ddi_translations;
+	ddi_translations = icl_get_mg_buf_trans(dev_priv, type, rate,
+						&n_entries);
 	/* The table does not have values for level 3 and level 9. */
 	if (level >= n_entries || level == 3 || level == 9) {
-		DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.",
-			      level, n_entries - 2);
+		drm_dbg_kms(&dev_priv->drm,
+			    "DDI translation not found for level %d. Using %d instead.",
+			    level, n_entries - 2);
 		level = n_entries - 2;
 	}
 
@@ -2483,7 +2573,8 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder,
 	if (intel_phy_is_combo(dev_priv, phy))
 		icl_combo_phy_ddi_vswing_sequence(encoder, level, type);
 	else
-		icl_mg_phy_ddi_vswing_sequence(encoder, link_clock, level);
+		icl_mg_phy_ddi_vswing_sequence(encoder, link_clock, level,
+					       type);
 }
 
 static void
@@ -2550,8 +2641,9 @@ static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder,
 		tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level);
 }
 
-static u32 translate_signal_level(int signal_levels)
+static u32 translate_signal_level(struct intel_dp *intel_dp, int signal_levels)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(index_to_dp_signal_levels); i++) {
@@ -2559,8 +2651,9 @@ static u32 translate_signal_level(int signal_levels)
 			return i;
 	}
 
-	WARN(1, "Unsupported voltage swing/pre-emphasis level: 0x%x\n",
-	     signal_levels);
+	drm_WARN(&i915->drm, 1,
+		 "Unsupported voltage swing/pre-emphasis level: 0x%x\n",
+		 signal_levels);
 
 	return 0;
 }
@@ -2571,46 +2664,73 @@ static u32 intel_ddi_dp_level(struct intel_dp *intel_dp)
 	int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
 					 DP_TRAIN_PRE_EMPHASIS_MASK);
 
-	return translate_signal_level(signal_levels);
+	return translate_signal_level(intel_dp, signal_levels);
 }
 
-u32 bxt_signal_levels(struct intel_dp *intel_dp)
+static void
+tgl_set_signal_levels(struct intel_dp *intel_dp)
 {
-	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
-	struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev);
-	struct intel_encoder *encoder = &dport->base;
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
 	int level = intel_ddi_dp_level(intel_dp);
 
-	if (INTEL_GEN(dev_priv) >= 12)
-		tgl_ddi_vswing_sequence(encoder, intel_dp->link_rate,
-					level, encoder->type);
-	else if (INTEL_GEN(dev_priv) >= 11)
-		icl_ddi_vswing_sequence(encoder, intel_dp->link_rate,
-					level, encoder->type);
-	else if (IS_CANNONLAKE(dev_priv))
-		cnl_ddi_vswing_sequence(encoder, level, encoder->type);
-	else
-		bxt_ddi_vswing_sequence(encoder, level, encoder->type);
+	tgl_ddi_vswing_sequence(encoder, intel_dp->link_rate,
+				level, encoder->type);
+}
 
-	return 0;
+static void
+icl_set_signal_levels(struct intel_dp *intel_dp)
+{
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+	int level = intel_ddi_dp_level(intel_dp);
+
+	icl_ddi_vswing_sequence(encoder, intel_dp->link_rate,
+				level, encoder->type);
 }
 
-u32 ddi_signal_levels(struct intel_dp *intel_dp)
+static void
+cnl_set_signal_levels(struct intel_dp *intel_dp)
 {
-	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
-	struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev);
-	struct intel_encoder *encoder = &dport->base;
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
 	int level = intel_ddi_dp_level(intel_dp);
 
+	cnl_ddi_vswing_sequence(encoder, level, encoder->type);
+}
+
+static void
+bxt_set_signal_levels(struct intel_dp *intel_dp)
+{
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+	int level = intel_ddi_dp_level(intel_dp);
+
+	bxt_ddi_vswing_sequence(encoder, level, encoder->type);
+}
+
+static void
+hsw_set_signal_levels(struct intel_dp *intel_dp)
+{
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	int level = intel_ddi_dp_level(intel_dp);
+	enum port port = encoder->port;
+	u32 signal_levels;
+
+	signal_levels = DDI_BUF_TRANS_SELECT(level);
+
+	drm_dbg_kms(&dev_priv->drm, "Using signal levels %08x\n",
+		    signal_levels);
+
+	intel_dp->DP &= ~DDI_BUF_EMP_MASK;
+	intel_dp->DP |= signal_levels;
+
 	if (IS_GEN9_BC(dev_priv))
 		skl_ddi_set_iboost(encoder, level, encoder->type);
 
-	return DDI_BUF_TRANS_SELECT(level);
+	intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP);
+	intel_de_posting_read(dev_priv, DDI_BUF_CTL(port));
 }
 
-static inline
-u32 icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv,
-			      enum phy phy)
+static u32 icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv,
+				     enum phy phy)
 {
 	if (intel_phy_is_combo(dev_priv, phy)) {
 		return ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
@@ -2698,8 +2818,9 @@ static void icl_sanitize_port_clk_off(struct drm_i915_private *dev_priv,
 		if (drm_WARN_ON(&dev_priv->drm, ddi_clk_needed))
 			continue;
 
-		DRM_NOTE("PHY %c is disabled/in DSI mode with an ungated DDI clock, gate it\n",
-			 phy_name(phy));
+		drm_notice(&dev_priv->drm,
+			   "PHY %c is disabled/in DSI mode with an ungated DDI clock, gate it\n",
+			   phy_name(phy));
 		val |= icl_dpclka_cfgcr0_clk_off(dev_priv, phy);
 		intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val);
 	}
@@ -2936,11 +3057,14 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port,
 static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp,
 					const struct intel_crtc_state *crtc_state)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
 	if (!crtc_state->fec_enable)
 		return;
 
 	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_FEC_CONFIGURATION, DP_FEC_READY) <= 0)
-		DRM_DEBUG_KMS("Failed to set FEC_READY in the sink\n");
+		drm_dbg_kms(&i915->drm,
+			    "Failed to set FEC_READY in the sink\n");
 }
 
 static void intel_ddi_enable_fec(struct intel_encoder *encoder,
@@ -2960,7 +3084,8 @@ static void intel_ddi_enable_fec(struct intel_encoder *encoder,
 
 	if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status,
 				  DP_TP_STATUS_FEC_ENABLE_LIVE, 1))
-		DRM_ERROR("Timed out waiting for FEC Enable Status\n");
+		drm_err(&dev_priv->drm,
+			"Timed out waiting for FEC Enable Status\n");
 }
 
 static void intel_ddi_disable_fec_state(struct intel_encoder *encoder,
@@ -2980,7 +3105,8 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder,
 	intel_de_posting_read(dev_priv, intel_dp->regs.dp_tp_ctl);
 }
 
-static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
+static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state)
 {
@@ -3048,13 +3174,13 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
 	 * 7.a Configure Transcoder Clock Select to direct the Port clock to the
 	 * Transcoder.
 	 */
-	intel_ddi_enable_pipe_clock(crtc_state);
+	intel_ddi_enable_pipe_clock(encoder, crtc_state);
 
 	/*
 	 * 7.b Configure TRANS_DDI_FUNC_CTL DDI Select, DDI Mode Select & MST
 	 * Transport Select
 	 */
-	intel_ddi_config_transcoder_func(crtc_state);
+	intel_ddi_config_transcoder_func(encoder, crtc_state);
 
 	/*
 	 * 7.c Configure & enable DP_TP_CTL with link training pattern 1
@@ -3120,7 +3246,8 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder,
 	intel_dsc_enable(encoder, crtc_state);
 }
 
-static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder,
+static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state)
 {
@@ -3185,21 +3312,22 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder,
 	intel_ddi_enable_fec(encoder, crtc_state);
 
 	if (!is_mst)
-		intel_ddi_enable_pipe_clock(crtc_state);
+		intel_ddi_enable_pipe_clock(encoder, crtc_state);
 
 	intel_dsc_enable(encoder, crtc_state);
 }
 
-static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
+static void intel_ddi_pre_enable_dp(struct intel_atomic_state *state,
+				    struct intel_encoder *encoder,
 				    const struct intel_crtc_state *crtc_state,
 				    const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 
 	if (INTEL_GEN(dev_priv) >= 12)
-		tgl_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
+		tgl_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state);
 	else
-		hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
+		hsw_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state);
 
 	/* MST will call a setting of MSA after an allocating of Virtual Channel
 	 * from MST encoder pre_enable callback.
@@ -3211,7 +3339,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
 	}
 }
 
-static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
+static void intel_ddi_pre_enable_hdmi(struct intel_atomic_state *state,
+				      struct intel_encoder *encoder,
 				      const struct intel_crtc_state *crtc_state,
 				      const struct drm_connector_state *conn_state)
 {
@@ -3244,14 +3373,15 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
 	if (IS_GEN9_BC(dev_priv))
 		skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI);
 
-	intel_ddi_enable_pipe_clock(crtc_state);
+	intel_ddi_enable_pipe_clock(encoder, crtc_state);
 
 	intel_dig_port->set_infoframes(encoder,
 				       crtc_state->has_infoframe,
 				       crtc_state, conn_state);
 }
 
-static void intel_ddi_pre_enable(struct intel_encoder *encoder,
+static void intel_ddi_pre_enable(struct intel_atomic_state *state,
+				 struct intel_encoder *encoder,
 				 const struct intel_crtc_state *crtc_state,
 				 const struct drm_connector_state *conn_state)
 {
@@ -3280,12 +3410,14 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder,
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
 
 	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) {
-		intel_ddi_pre_enable_hdmi(encoder, crtc_state, conn_state);
+		intel_ddi_pre_enable_hdmi(state, encoder, crtc_state,
+					  conn_state);
 	} else {
 		struct intel_lspcon *lspcon =
 				enc_to_intel_lspcon(encoder);
 
-		intel_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
+		intel_ddi_pre_enable_dp(state, encoder, crtc_state,
+					conn_state);
 		if (lspcon->active) {
 			struct intel_digital_port *dig_port =
 					enc_to_dig_port(encoder);
@@ -3328,7 +3460,8 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder,
 		intel_wait_ddi_buf_idle(dev_priv, port);
 }
 
-static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
+static void intel_ddi_post_disable_dp(struct intel_atomic_state *state,
+				      struct intel_encoder *encoder,
 				      const struct intel_crtc_state *old_crtc_state,
 				      const struct drm_connector_state *old_conn_state)
 {
@@ -3339,6 +3472,8 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 					  INTEL_OUTPUT_DP_MST);
 	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
 
+	intel_dp_set_infoframes(encoder, false, old_crtc_state, old_conn_state);
+
 	/*
 	 * Power down sink before disabling the port, otherwise we end
 	 * up getting interrupts from the sink on detecting link loss.
@@ -3384,7 +3519,8 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 	intel_ddi_clk_disable(encoder);
 }
 
-static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder,
+static void intel_ddi_post_disable_hdmi(struct intel_atomic_state *state,
+					struct intel_encoder *encoder,
 					const struct intel_crtc_state *old_crtc_state,
 					const struct drm_connector_state *old_conn_state)
 {
@@ -3407,22 +3543,8 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder,
 	intel_dp_dual_mode_set_tmds_output(intel_hdmi, false);
 }
 
-static void icl_disable_transcoder_port_sync(const struct intel_crtc_state *old_crtc_state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-
-	if (old_crtc_state->master_transcoder == INVALID_TRANSCODER)
-		return;
-
-	DRM_DEBUG_KMS("Disabling Transcoder Port Sync on Slave Transcoder %s\n",
-		      transcoder_name(old_crtc_state->cpu_transcoder));
-
-	intel_de_write(dev_priv,
-		       TRANS_DDI_FUNC_CTL2(old_crtc_state->cpu_transcoder), 0);
-}
-
-static void intel_ddi_post_disable(struct intel_encoder *encoder,
+static void intel_ddi_post_disable(struct intel_atomic_state *state,
+				   struct intel_encoder *encoder,
 				   const struct intel_crtc_state *old_crtc_state,
 				   const struct drm_connector_state *old_conn_state)
 {
@@ -3436,9 +3558,6 @@ static void intel_ddi_post_disable(struct intel_encoder *encoder,
 
 		intel_disable_pipe(old_crtc_state);
 
-		if (INTEL_GEN(dev_priv) >= 11)
-			icl_disable_transcoder_port_sync(old_crtc_state);
-
 		intel_ddi_disable_transcoder_func(old_crtc_state);
 
 		intel_dsc_disable(old_crtc_state);
@@ -3463,11 +3582,11 @@ static void intel_ddi_post_disable(struct intel_encoder *encoder,
 	 */
 
 	if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI))
-		intel_ddi_post_disable_hdmi(encoder,
-					    old_crtc_state, old_conn_state);
+		intel_ddi_post_disable_hdmi(state, encoder, old_crtc_state,
+					    old_conn_state);
 	else
-		intel_ddi_post_disable_dp(encoder,
-					  old_crtc_state, old_conn_state);
+		intel_ddi_post_disable_dp(state, encoder, old_crtc_state,
+					  old_conn_state);
 
 	if (INTEL_GEN(dev_priv) >= 11)
 		icl_unmap_plls_to_ports(encoder);
@@ -3480,7 +3599,8 @@ static void intel_ddi_post_disable(struct intel_encoder *encoder,
 		intel_tc_port_put_link(dig_port);
 }
 
-void intel_ddi_fdi_post_disable(struct intel_encoder *encoder,
+void intel_ddi_fdi_post_disable(struct intel_atomic_state *state,
+				struct intel_encoder *encoder,
 				const struct intel_crtc_state *old_crtc_state,
 				const struct drm_connector_state *old_conn_state)
 {
@@ -3514,7 +3634,43 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *encoder,
 	intel_de_write(dev_priv, FDI_RX_CTL(PIPE_A), val);
 }
 
-static void intel_enable_ddi_dp(struct intel_encoder *encoder,
+static void trans_port_sync_stop_link_train(struct intel_atomic_state *state,
+					    struct intel_encoder *encoder,
+					    const struct intel_crtc_state *crtc_state)
+{
+	const struct drm_connector_state *conn_state;
+	struct drm_connector *conn;
+	int i;
+
+	if (!crtc_state->sync_mode_slaves_mask)
+		return;
+
+	for_each_new_connector_in_state(&state->base, conn, conn_state, i) {
+		struct intel_encoder *slave_encoder =
+			to_intel_encoder(conn_state->best_encoder);
+		struct intel_crtc *slave_crtc = to_intel_crtc(conn_state->crtc);
+		const struct intel_crtc_state *slave_crtc_state;
+
+		if (!slave_crtc)
+			continue;
+
+		slave_crtc_state =
+			intel_atomic_get_new_crtc_state(state, slave_crtc);
+
+		if (slave_crtc_state->master_transcoder !=
+		    crtc_state->cpu_transcoder)
+			continue;
+
+		intel_dp_stop_link_train(enc_to_intel_dp(slave_encoder));
+	}
+
+	usleep_range(200, 400);
+
+	intel_dp_stop_link_train(enc_to_intel_dp(encoder));
+}
+
+static void intel_enable_ddi_dp(struct intel_atomic_state *state,
+				struct intel_encoder *encoder,
 				const struct intel_crtc_state *crtc_state,
 				const struct drm_connector_state *conn_state)
 {
@@ -3526,13 +3682,14 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder,
 		intel_dp_stop_link_train(intel_dp);
 
 	intel_edp_backlight_on(crtc_state, conn_state);
-	intel_psr_enable(intel_dp, crtc_state);
-	intel_dp_vsc_enable(intel_dp, crtc_state, conn_state);
-	intel_dp_hdr_metadata_enable(intel_dp, crtc_state, conn_state);
+	intel_psr_enable(intel_dp, crtc_state, conn_state);
+	intel_dp_set_infoframes(encoder, true, crtc_state, conn_state);
 	intel_edp_drrs_enable(intel_dp, crtc_state);
 
 	if (crtc_state->has_audio)
 		intel_audio_codec_enable(encoder, crtc_state, conn_state);
+
+	trans_port_sync_stop_link_train(state, encoder, crtc_state);
 }
 
 static i915_reg_t
@@ -3555,7 +3712,8 @@ gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv,
 	return CHICKEN_TRANS(trans[port]);
 }
 
-static void intel_enable_ddi_hdmi(struct intel_encoder *encoder,
+static void intel_enable_ddi_hdmi(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state)
 {
@@ -3567,9 +3725,9 @@ static void intel_enable_ddi_hdmi(struct intel_encoder *encoder,
 	if (!intel_hdmi_handle_sink_scrambling(encoder, connector,
 					       crtc_state->hdmi_high_tmds_clock_ratio,
 					       crtc_state->hdmi_scrambling))
-		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] Failed to configure sink "
-			      "scrambling/TMDS bit clock ratio\n",
-			       connector->base.id, connector->name);
+		drm_dbg_kms(&dev_priv->drm,
+			    "[CONNECTOR:%d:%s] Failed to configure sink scrambling/TMDS bit clock ratio\n",
+			    connector->base.id, connector->name);
 
 	/* Display WA #1143: skl,kbl,cfl */
 	if (IS_GEN9_BC(dev_priv)) {
@@ -3617,20 +3775,23 @@ static void intel_enable_ddi_hdmi(struct intel_encoder *encoder,
 		intel_audio_codec_enable(encoder, crtc_state, conn_state);
 }
 
-static void intel_enable_ddi(struct intel_encoder *encoder,
+static void intel_enable_ddi(struct intel_atomic_state *state,
+			     struct intel_encoder *encoder,
 			     const struct intel_crtc_state *crtc_state,
 			     const struct drm_connector_state *conn_state)
 {
-	WARN_ON(crtc_state->has_pch_encoder);
+	drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder);
+
+	intel_ddi_enable_transcoder_func(encoder, crtc_state);
 
 	intel_enable_pipe(crtc_state);
 
 	intel_crtc_vblank_on(crtc_state);
 
 	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI))
-		intel_enable_ddi_hdmi(encoder, crtc_state, conn_state);
+		intel_enable_ddi_hdmi(state, encoder, crtc_state, conn_state);
 	else
-		intel_enable_ddi_dp(encoder, crtc_state, conn_state);
+		intel_enable_ddi_dp(state, encoder, crtc_state, conn_state);
 
 	/* Enable hdcp if it's desired */
 	if (conn_state->content_protection ==
@@ -3640,7 +3801,8 @@ static void intel_enable_ddi(struct intel_encoder *encoder,
 				  (u8)conn_state->hdcp_content_type);
 }
 
-static void intel_disable_ddi_dp(struct intel_encoder *encoder,
+static void intel_disable_ddi_dp(struct intel_atomic_state *state,
+				 struct intel_encoder *encoder,
 				 const struct intel_crtc_state *old_crtc_state,
 				 const struct drm_connector_state *old_conn_state)
 {
@@ -3660,10 +3822,12 @@ static void intel_disable_ddi_dp(struct intel_encoder *encoder,
 					      false);
 }
 
-static void intel_disable_ddi_hdmi(struct intel_encoder *encoder,
+static void intel_disable_ddi_hdmi(struct intel_atomic_state *state,
+				   struct intel_encoder *encoder,
 				   const struct intel_crtc_state *old_crtc_state,
 				   const struct drm_connector_state *old_conn_state)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct drm_connector *connector = old_conn_state->connector;
 
 	if (old_crtc_state->has_audio)
@@ -3672,23 +3836,28 @@ static void intel_disable_ddi_hdmi(struct intel_encoder *encoder,
 
 	if (!intel_hdmi_handle_sink_scrambling(encoder, connector,
 					       false, false))
-		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] Failed to reset sink scrambling/TMDS bit clock ratio\n",
-			      connector->base.id, connector->name);
+		drm_dbg_kms(&i915->drm,
+			    "[CONNECTOR:%d:%s] Failed to reset sink scrambling/TMDS bit clock ratio\n",
+			    connector->base.id, connector->name);
 }
 
-static void intel_disable_ddi(struct intel_encoder *encoder,
+static void intel_disable_ddi(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
 	intel_hdcp_disable(to_intel_connector(old_conn_state->connector));
 
 	if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI))
-		intel_disable_ddi_hdmi(encoder, old_crtc_state, old_conn_state);
+		intel_disable_ddi_hdmi(state, encoder, old_crtc_state,
+				       old_conn_state);
 	else
-		intel_disable_ddi_dp(encoder, old_crtc_state, old_conn_state);
+		intel_disable_ddi_dp(state, encoder, old_crtc_state,
+				     old_conn_state);
 }
 
-static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder,
+static void intel_ddi_update_pipe_dp(struct intel_atomic_state *state,
+				     struct intel_encoder *encoder,
 				     const struct intel_crtc_state *crtc_state,
 				     const struct drm_connector_state *conn_state)
 {
@@ -3696,21 +3865,24 @@ static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder,
 
 	intel_ddi_set_dp_msa(crtc_state, conn_state);
 
-	intel_psr_update(intel_dp, crtc_state);
+	intel_psr_update(intel_dp, crtc_state, conn_state);
+	intel_dp_set_infoframes(encoder, true, crtc_state, conn_state);
 	intel_edp_drrs_enable(intel_dp, crtc_state);
 
-	intel_panel_update_backlight(encoder, crtc_state, conn_state);
+	intel_panel_update_backlight(state, encoder, crtc_state, conn_state);
 }
 
-static void intel_ddi_update_pipe(struct intel_encoder *encoder,
+static void intel_ddi_update_pipe(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state)
 {
 
 	if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI))
-		intel_ddi_update_pipe_dp(encoder, crtc_state, conn_state);
+		intel_ddi_update_pipe_dp(state, encoder, crtc_state,
+					 conn_state);
 
-	intel_hdcp_update_pipe(encoder, crtc_state, conn_state);
+	intel_hdcp_update_pipe(state, encoder, crtc_state, conn_state);
 }
 
 static void
@@ -3722,7 +3894,7 @@ intel_ddi_update_prepare(struct intel_atomic_state *state,
 		crtc ? intel_atomic_get_new_crtc_state(state, crtc) : NULL;
 	int required_lanes = crtc_state ? crtc_state->lane_count : 1;
 
-	WARN_ON(crtc && crtc->active);
+	drm_WARN_ON(state->base.dev, crtc && crtc->active);
 
 	intel_tc_port_get_link(enc_to_dig_port(encoder),
 		               required_lanes);
@@ -3739,7 +3911,8 @@ intel_ddi_update_complete(struct intel_atomic_state *state,
 }
 
 static void
-intel_ddi_pre_pll_enable(struct intel_encoder *encoder,
+intel_ddi_pre_pll_enable(struct intel_atomic_state *state,
+			 struct intel_encoder *encoder,
 			 const struct intel_crtc_state *crtc_state,
 			 const struct drm_connector_state *conn_state)
 {
@@ -3813,6 +3986,74 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp)
 	udelay(600);
 }
 
+static void intel_ddi_set_link_train(struct intel_dp *intel_dp,
+				     u8 dp_train_pat)
+{
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	u8 train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd);
+	enum port port = dp_to_dig_port(intel_dp)->base.port;
+	u32 temp;
+
+	temp = intel_de_read(dev_priv, intel_dp->regs.dp_tp_ctl);
+
+	if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE)
+		temp |= DP_TP_CTL_SCRAMBLE_DISABLE;
+	else
+		temp &= ~DP_TP_CTL_SCRAMBLE_DISABLE;
+
+	temp &= ~DP_TP_CTL_LINK_TRAIN_MASK;
+	switch (dp_train_pat & train_pat_mask) {
+	case DP_TRAINING_PATTERN_DISABLE:
+		temp |= DP_TP_CTL_LINK_TRAIN_NORMAL;
+		break;
+	case DP_TRAINING_PATTERN_1:
+		temp |= DP_TP_CTL_LINK_TRAIN_PAT1;
+		break;
+	case DP_TRAINING_PATTERN_2:
+		temp |= DP_TP_CTL_LINK_TRAIN_PAT2;
+		break;
+	case DP_TRAINING_PATTERN_3:
+		temp |= DP_TP_CTL_LINK_TRAIN_PAT3;
+		break;
+	case DP_TRAINING_PATTERN_4:
+		temp |= DP_TP_CTL_LINK_TRAIN_PAT4;
+		break;
+	}
+
+	intel_de_write(dev_priv, intel_dp->regs.dp_tp_ctl, temp);
+
+	intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP);
+	intel_de_posting_read(dev_priv, DDI_BUF_CTL(port));
+}
+
+static void intel_ddi_set_idle_link_train(struct intel_dp *intel_dp)
+{
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	enum port port = encoder->port;
+	u32 val;
+
+	val = intel_de_read(dev_priv, intel_dp->regs.dp_tp_ctl);
+	val &= ~DP_TP_CTL_LINK_TRAIN_MASK;
+	val |= DP_TP_CTL_LINK_TRAIN_IDLE;
+	intel_de_write(dev_priv, intel_dp->regs.dp_tp_ctl, val);
+
+	/*
+	 * Until TGL on PORT_A we can have only eDP in SST mode. There the only
+	 * reason we need to set idle transmission mode is to work around a HW
+	 * issue where we enable the pipe while not in idle link-training mode.
+	 * In this case there is requirement to wait for a minimum number of
+	 * idle patterns to be sent.
+	 */
+	if (port == PORT_A && INTEL_GEN(dev_priv) < 12)
+		return;
+
+	if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status,
+				  DP_TP_STATUS_IDLE_DONE, 1))
+		drm_err(&dev_priv->drm,
+			"Timed out waiting for DP idle patterns\n");
+}
+
 static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
 				       enum transcoder cpu_transcoder)
 {
@@ -3839,6 +4080,66 @@ void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
 		crtc_state->min_voltage_level = 2;
 }
 
+static enum transcoder bdw_transcoder_master_readout(struct drm_i915_private *dev_priv,
+						     enum transcoder cpu_transcoder)
+{
+	u32 master_select;
+
+	if (INTEL_GEN(dev_priv) >= 11) {
+		u32 ctl2 = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL2(cpu_transcoder));
+
+		if ((ctl2 & PORT_SYNC_MODE_ENABLE) == 0)
+			return INVALID_TRANSCODER;
+
+		master_select = REG_FIELD_GET(PORT_SYNC_MODE_MASTER_SELECT_MASK, ctl2);
+	} else {
+		u32 ctl = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder));
+
+		if ((ctl & TRANS_DDI_PORT_SYNC_ENABLE) == 0)
+			return INVALID_TRANSCODER;
+
+		master_select = REG_FIELD_GET(TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK, ctl);
+	}
+
+	if (master_select == 0)
+		return TRANSCODER_EDP;
+	else
+		return master_select - 1;
+}
+
+static void bdw_get_trans_port_sync_config(struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	u32 transcoders = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) |
+		BIT(TRANSCODER_C) | BIT(TRANSCODER_D);
+	enum transcoder cpu_transcoder;
+
+	crtc_state->master_transcoder =
+		bdw_transcoder_master_readout(dev_priv, crtc_state->cpu_transcoder);
+
+	for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) {
+		enum intel_display_power_domain power_domain;
+		intel_wakeref_t trans_wakeref;
+
+		power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
+		trans_wakeref = intel_display_power_get_if_enabled(dev_priv,
+								   power_domain);
+
+		if (!trans_wakeref)
+			continue;
+
+		if (bdw_transcoder_master_readout(dev_priv, cpu_transcoder) ==
+		    crtc_state->cpu_transcoder)
+			crtc_state->sync_mode_slaves_mask |= BIT(cpu_transcoder);
+
+		intel_display_power_put(dev_priv, power_domain, trans_wakeref);
+	}
+
+	drm_WARN_ON(&dev_priv->drm,
+		    crtc_state->master_transcoder != INVALID_TRANSCODER &&
+		    crtc_state->sync_mode_slaves_mask);
+}
+
 void intel_ddi_get_config(struct intel_encoder *encoder,
 			  struct intel_crtc_state *pipe_config)
 {
@@ -3930,11 +4231,15 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 			pipe_config->fec_enable =
 				intel_de_read(dev_priv, dp_tp_ctl) & DP_TP_CTL_FEC_ENABLE;
 
-			DRM_DEBUG_KMS("[ENCODER:%d:%s] Fec status: %u\n",
-				      encoder->base.base.id, encoder->base.name,
-				      pipe_config->fec_enable);
+			drm_dbg_kms(&dev_priv->drm,
+				    "[ENCODER:%d:%s] Fec status: %u\n",
+				    encoder->base.base.id, encoder->base.name,
+				    pipe_config->fec_enable);
 		}
 
+		pipe_config->infoframes.enable |=
+			intel_hdmi_infoframes_enabled(encoder, pipe_config);
+
 		break;
 	case TRANS_DDI_MODE_SELECT_DP_MST:
 		pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST);
@@ -3946,6 +4251,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 					REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, temp);
 
 		intel_dp_get_m_n(intel_crtc, pipe_config);
+
+		pipe_config->infoframes.enable |=
+			intel_hdmi_infoframes_enabled(encoder, pipe_config);
 		break;
 	default:
 		break;
@@ -3969,8 +4277,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 		 * up by the BIOS, and thus we can't get the mode at module
 		 * load.
 		 */
-		DRM_DEBUG_KMS("pipe has %d bpp for eDP panel, overriding BIOS-provided max %d bpp\n",
-			      pipe_config->pipe_bpp, dev_priv->vbt.edp.bpp);
+		drm_dbg_kms(&dev_priv->drm,
+			    "pipe has %d bpp for eDP panel, overriding BIOS-provided max %d bpp\n",
+			    pipe_config->pipe_bpp, dev_priv->vbt.edp.bpp);
 		dev_priv->vbt.edp.bpp = pipe_config->pipe_bpp;
 	}
 
@@ -3996,6 +4305,12 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 	intel_read_infoframe(encoder, pipe_config,
 			     HDMI_INFOFRAME_TYPE_DRM,
 			     &pipe_config->infoframes.drm);
+
+	if (INTEL_GEN(dev_priv) >= 8)
+		bdw_get_trans_port_sync_config(pipe_config);
+
+	intel_read_dp_sdp(encoder, pipe_config, HDMI_PACKET_TYPE_GAMUT_METADATA);
+	intel_read_dp_sdp(encoder, pipe_config, DP_SDP_VSC);
 }
 
 static enum intel_output_type
@@ -4025,7 +4340,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 	enum port port = encoder->port;
 	int ret;
 
-	if (HAS_TRANSCODER_EDP(dev_priv) && port == PORT_A)
+	if (HAS_TRANSCODER(dev_priv, TRANSCODER_EDP) && port == PORT_A)
 		pipe_config->cpu_transcoder = TRANSCODER_EDP;
 
 	if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) {
@@ -4097,7 +4412,11 @@ intel_ddi_port_sync_transcoders(const struct intel_crtc_state *ref_crtc_state,
 	u8 transcoders = 0;
 	int i;
 
-	if (INTEL_GEN(dev_priv) < 11)
+	/*
+	 * We don't enable port sync on BDW due to missing w/as and
+	 * due to not having adjusted the modeset sequence appropriately.
+	 */
+	if (INTEL_GEN(dev_priv) < 9)
 		return 0;
 
 	if (!intel_crtc_has_type(ref_crtc_state, INTEL_OUTPUT_DP))
@@ -4129,12 +4448,13 @@ static int intel_ddi_compute_config_late(struct intel_encoder *encoder,
 					 struct intel_crtc_state *crtc_state,
 					 struct drm_connector_state *conn_state)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct drm_connector *connector = conn_state->connector;
 	u8 port_sync_transcoders = 0;
 
-	DRM_DEBUG_KMS("[ENCODER:%d:%s] [CRTC:%d:%s]",
-		      encoder->base.base.id, encoder->base.name,
-		      crtc_state->uapi.crtc->base.id, crtc_state->uapi.crtc->name);
+	drm_dbg_kms(&i915->drm, "[ENCODER:%d:%s] [CRTC:%d:%s]",
+		    encoder->base.base.id, encoder->base.name,
+		    crtc_state->uapi.crtc->base.id, crtc_state->uapi.crtc->name);
 
 	if (connector->has_tile)
 		port_sync_transcoders = intel_ddi_port_sync_transcoders(crtc_state,
@@ -4187,6 +4507,20 @@ intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port)
 	intel_dig_port->dp.output_reg = DDI_BUF_CTL(port);
 	intel_dig_port->dp.prepare_link_retrain =
 		intel_ddi_prepare_link_retrain;
+	intel_dig_port->dp.set_link_train = intel_ddi_set_link_train;
+	intel_dig_port->dp.set_idle_link_train = intel_ddi_set_idle_link_train;
+
+	if (INTEL_GEN(dev_priv) >= 12)
+		intel_dig_port->dp.set_signal_levels = tgl_set_signal_levels;
+	else if (INTEL_GEN(dev_priv) >= 11)
+		intel_dig_port->dp.set_signal_levels = icl_set_signal_levels;
+	else if (IS_CANNONLAKE(dev_priv))
+		intel_dig_port->dp.set_signal_levels = cnl_set_signal_levels;
+	else if (IS_GEN9_LP(dev_priv))
+		intel_dig_port->dp.set_signal_levels = bxt_set_signal_levels;
+	else
+		intel_dig_port->dp.set_signal_levels = hsw_set_signal_levels;
+
 	if (INTEL_GEN(dev_priv) < 12) {
 		intel_dig_port->dp.regs.dp_tp_ctl = DP_TP_CTL(port);
 		intel_dig_port->dp.regs.dp_tp_status = DP_TP_STATUS(port);
@@ -4278,7 +4612,8 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder,
 
 	ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config);
 	if (ret < 0) {
-		DRM_ERROR("Failed to read TMDS config: %d\n", ret);
+		drm_err(&dev_priv->drm, "Failed to read TMDS config: %d\n",
+			ret);
 		return 0;
 	}
 
@@ -4302,15 +4637,17 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder,
 
 static enum intel_hotplug_state
 intel_ddi_hotplug(struct intel_encoder *encoder,
-		  struct intel_connector *connector,
-		  bool irq_received)
+		  struct intel_connector *connector)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+	enum phy phy = intel_port_to_phy(i915, encoder->port);
+	bool is_tc = intel_phy_is_tc(i915, phy);
 	struct drm_modeset_acquire_ctx ctx;
 	enum intel_hotplug_state state;
 	int ret;
 
-	state = intel_encoder_hotplug(encoder, connector, irq_received);
+	state = intel_encoder_hotplug(encoder, connector);
 
 	drm_modeset_acquire_init(&ctx, 0);
 
@@ -4348,14 +4685,45 @@ intel_ddi_hotplug(struct intel_encoder *encoder,
 	 * valid EDID. To solve this schedule another detection cycle if this
 	 * time around we didn't detect any change in the sink's connection
 	 * status.
+	 *
+	 * Type-c connectors which get their HPD signal deasserted then
+	 * reasserted, without unplugging/replugging the sink from the
+	 * connector, introduce a delay until the AUX channel communication
+	 * becomes functional. Retry the detection for 5 seconds on type-c
+	 * connectors to account for this delay.
 	 */
-	if (state == INTEL_HOTPLUG_UNCHANGED && irq_received &&
+	if (state == INTEL_HOTPLUG_UNCHANGED &&
+	    connector->hotplug_retries < (is_tc ? 5 : 1) &&
 	    !dig_port->dp.is_mst)
 		state = INTEL_HOTPLUG_RETRY;
 
 	return state;
 }
 
+static bool lpt_digital_port_connected(struct intel_encoder *encoder)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	u32 bit = dev_priv->hotplug.pch_hpd[encoder->hpd_pin];
+
+	return intel_de_read(dev_priv, SDEISR) & bit;
+}
+
+static bool hsw_digital_port_connected(struct intel_encoder *encoder)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	u32 bit = dev_priv->hotplug.hpd[encoder->hpd_pin];
+
+	return intel_de_read(dev_priv, DEISR) & bit;
+}
+
+static bool bdw_digital_port_connected(struct intel_encoder *encoder)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	u32 bit = dev_priv->hotplug.hpd[encoder->hpd_pin];
+
+	return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & bit;
+}
+
 static struct intel_connector *
 intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port)
 {
@@ -4424,7 +4792,8 @@ intel_ddi_max_lanes(struct intel_digital_port *intel_dport)
 	 * so we use the proper lane count for our calculations.
 	 */
 	if (intel_ddi_a_force_4_lanes(intel_dport)) {
-		DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Forcing DDI_A_4_LANES for port A\n");
 		intel_dport->saved_port_bits |= DDI_A_4_LANES;
 		max_lanes = 4;
 	}
@@ -4452,12 +4821,14 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 		init_dp = true;
 		init_lspcon = true;
 		init_hdmi = false;
-		DRM_DEBUG_KMS("VBT says port %c has lspcon\n", port_name(port));
+		drm_dbg_kms(&dev_priv->drm, "VBT says port %c has lspcon\n",
+			    port_name(port));
 	}
 
 	if (!init_dp && !init_hdmi) {
-		DRM_DEBUG_KMS("VBT says port %c is not DVI/HDMI/DP compatible, respect it\n",
-			      port_name(port));
+		drm_dbg_kms(&dev_priv->drm,
+			    "VBT says port %c is not DVI/HDMI/DP compatible, respect it\n",
+			    port_name(port));
 		return;
 	}
 
@@ -4536,17 +4907,36 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 	if (init_lspcon) {
 		if (lspcon_init(intel_dig_port))
 			/* TODO: handle hdmi info frame part */
-			DRM_DEBUG_KMS("LSPCON init success on port %c\n",
-				port_name(port));
+			drm_dbg_kms(&dev_priv->drm,
+				    "LSPCON init success on port %c\n",
+				    port_name(port));
 		else
 			/*
 			 * LSPCON init faied, but DP init was success, so
 			 * lets try to drive as DP++ port.
 			 */
-			DRM_ERROR("LSPCON init failed on port %c\n",
+			drm_err(&dev_priv->drm,
+				"LSPCON init failed on port %c\n",
 				port_name(port));
 	}
 
+	if (INTEL_GEN(dev_priv) >= 11) {
+		if (intel_phy_is_tc(dev_priv, phy))
+			intel_dig_port->connected = intel_tc_port_connected;
+		else
+			intel_dig_port->connected = lpt_digital_port_connected;
+	} else if (INTEL_GEN(dev_priv) >= 8) {
+		if (port == PORT_A || IS_GEN9_LP(dev_priv))
+			intel_dig_port->connected = bdw_digital_port_connected;
+		else
+			intel_dig_port->connected = lpt_digital_port_connected;
+	} else {
+		if (port == PORT_A)
+			intel_dig_port->connected = hsw_digital_port_connected;
+		else
+			intel_dig_port->connected = lpt_digital_port_connected;
+	}
+
 	intel_infoframe_init(intel_dig_port);
 
 	return;
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h
index 55fd72b901fe..fbdf8ddde486 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.h
+++ b/drivers/gpu/drm/i915/display/intel_ddi.h
@@ -17,16 +17,19 @@ struct intel_dp;
 struct intel_dpll_hw_state;
 struct intel_encoder;
 
-void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder,
+void intel_ddi_fdi_post_disable(struct intel_atomic_state *state,
+				struct intel_encoder *intel_encoder,
 				const struct intel_crtc_state *old_crtc_state,
 				const struct drm_connector_state *old_conn_state);
 void hsw_fdi_link_train(struct intel_encoder *encoder,
 			const struct intel_crtc_state *crtc_state);
 void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port);
 bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe);
-void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state);
+void intel_ddi_enable_transcoder_func(struct intel_encoder *encoder,
+				      const struct intel_crtc_state *crtc_state);
 void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state);
-void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state);
+void intel_ddi_enable_pipe_clock(struct intel_encoder *encoder,
+				 const struct intel_crtc_state *crtc_state);
 void intel_ddi_disable_pipe_clock(const  struct intel_crtc_state *crtc_state);
 void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
 			  const struct drm_connector_state *conn_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 346846609f45..9ea1a397d1b5 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -238,9 +238,9 @@ static void intel_update_czclk(struct drm_i915_private *dev_priv)
 		dev_priv->czclk_freq);
 }
 
-static inline u32 /* units of 100MHz */
-intel_fdi_link_freq(struct drm_i915_private *dev_priv,
-		    const struct intel_crtc_state *pipe_config)
+/* units of 100MHz */
+static u32 intel_fdi_link_freq(struct drm_i915_private *dev_priv,
+			       const struct intel_crtc_state *pipe_config)
 {
 	if (HAS_DDI(dev_priv))
 		return pipe_config->port_clock; /* SPLL */
@@ -525,7 +525,7 @@ skl_wa_827(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable)
 		               intel_de_read(dev_priv, CLKGATE_DIS_PSL(pipe)) & ~(DUPS1_GATING_DIS | DUPS2_GATING_DIS));
 }
 
-/* Wa_2006604312:icl */
+/* Wa_2006604312:icl,ehl */
 static void
 icl_wa_scalerclkgating(struct drm_i915_private *dev_priv, enum pipe pipe,
 		       bool enable)
@@ -544,17 +544,23 @@ needs_modeset(const struct intel_crtc_state *state)
 	return drm_atomic_crtc_needs_modeset(&state->uapi);
 }
 
-bool
-is_trans_port_sync_mode(const struct intel_crtc_state *crtc_state)
+static bool
+is_trans_port_sync_slave(const struct intel_crtc_state *crtc_state)
 {
-	return (crtc_state->master_transcoder != INVALID_TRANSCODER ||
-		crtc_state->sync_mode_slaves_mask);
+	return crtc_state->master_transcoder != INVALID_TRANSCODER;
 }
 
 static bool
-is_trans_port_sync_slave(const struct intel_crtc_state *crtc_state)
+is_trans_port_sync_master(const struct intel_crtc_state *crtc_state)
 {
-	return crtc_state->master_transcoder != INVALID_TRANSCODER;
+	return crtc_state->sync_mode_slaves_mask != 0;
+}
+
+bool
+is_trans_port_sync_mode(const struct intel_crtc_state *crtc_state)
+{
+	return is_trans_port_sync_master(crtc_state) ||
+		is_trans_port_sync_slave(crtc_state);
 }
 
 /*
@@ -620,45 +626,43 @@ int chv_calc_dpll_params(int refclk, struct dpll *clock)
 	return clock->dot / 5;
 }
 
-#define INTELPllInvalid(s)   do { /* DRM_DEBUG(s); */ return false; } while (0)
-
 /*
  * Returns whether the given set of divisors are valid for a given refclk with
  * the given connectors.
  */
-static bool intel_PLL_is_valid(struct drm_i915_private *dev_priv,
+static bool intel_pll_is_valid(struct drm_i915_private *dev_priv,
 			       const struct intel_limit *limit,
 			       const struct dpll *clock)
 {
-	if (clock->n   < limit->n.min   || limit->n.max   < clock->n)
-		INTELPllInvalid("n out of range\n");
-	if (clock->p1  < limit->p1.min  || limit->p1.max  < clock->p1)
-		INTELPllInvalid("p1 out of range\n");
-	if (clock->m2  < limit->m2.min  || limit->m2.max  < clock->m2)
-		INTELPllInvalid("m2 out of range\n");
-	if (clock->m1  < limit->m1.min  || limit->m1.max  < clock->m1)
-		INTELPllInvalid("m1 out of range\n");
+	if (clock->n < limit->n.min || limit->n.max < clock->n)
+		return false;
+	if (clock->p1 < limit->p1.min || limit->p1.max < clock->p1)
+		return false;
+	if (clock->m2 < limit->m2.min || limit->m2.max < clock->m2)
+		return false;
+	if (clock->m1 < limit->m1.min || limit->m1.max < clock->m1)
+		return false;
 
 	if (!IS_PINEVIEW(dev_priv) && !IS_VALLEYVIEW(dev_priv) &&
 	    !IS_CHERRYVIEW(dev_priv) && !IS_GEN9_LP(dev_priv))
 		if (clock->m1 <= clock->m2)
-			INTELPllInvalid("m1 <= m2\n");
+			return false;
 
 	if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv) &&
 	    !IS_GEN9_LP(dev_priv)) {
 		if (clock->p < limit->p.min || limit->p.max < clock->p)
-			INTELPllInvalid("p out of range\n");
+			return false;
 		if (clock->m < limit->m.min || limit->m.max < clock->m)
-			INTELPllInvalid("m out of range\n");
+			return false;
 	}
 
 	if (clock->vco < limit->vco.min || limit->vco.max < clock->vco)
-		INTELPllInvalid("vco out of range\n");
+		return false;
 	/* XXX: We may need to be checking "Dot clock" depending on the multiplier,
 	 * connector, etc., rather than just a single range.
 	 */
 	if (clock->dot < limit->dot.min || limit->dot.max < clock->dot)
-		INTELPllInvalid("dot out of range\n");
+		return false;
 
 	return true;
 }
@@ -725,7 +729,7 @@ i9xx_find_best_dpll(const struct intel_limit *limit,
 					int this_err;
 
 					i9xx_calc_dpll_params(refclk, &clock);
-					if (!intel_PLL_is_valid(to_i915(dev),
+					if (!intel_pll_is_valid(to_i915(dev),
 								limit,
 								&clock))
 						continue;
@@ -781,7 +785,7 @@ pnv_find_best_dpll(const struct intel_limit *limit,
 					int this_err;
 
 					pnv_calc_dpll_params(refclk, &clock);
-					if (!intel_PLL_is_valid(to_i915(dev),
+					if (!intel_pll_is_valid(to_i915(dev),
 								limit,
 								&clock))
 						continue;
@@ -842,7 +846,7 @@ g4x_find_best_dpll(const struct intel_limit *limit,
 					int this_err;
 
 					i9xx_calc_dpll_params(refclk, &clock);
-					if (!intel_PLL_is_valid(to_i915(dev),
+					if (!intel_pll_is_valid(to_i915(dev),
 								limit,
 								&clock))
 						continue;
@@ -939,7 +943,7 @@ vlv_find_best_dpll(const struct intel_limit *limit,
 
 					vlv_calc_dpll_params(refclk, &clock);
 
-					if (!intel_PLL_is_valid(to_i915(dev),
+					if (!intel_pll_is_valid(to_i915(dev),
 								limit,
 								&clock))
 						continue;
@@ -1008,7 +1012,7 @@ chv_find_best_dpll(const struct intel_limit *limit,
 
 			chv_calc_dpll_params(refclk, &clock);
 
-			if (!intel_PLL_is_valid(to_i915(dev), limit, &clock))
+			if (!intel_pll_is_valid(to_i915(dev), limit, &clock))
 				continue;
 
 			if (!vlv_PLL_is_optimal(dev, target, &clock, best_clock,
@@ -1969,16 +1973,16 @@ static bool is_aux_plane(const struct drm_framebuffer *fb, int plane)
 
 static int main_to_ccs_plane(const struct drm_framebuffer *fb, int main_plane)
 {
-	WARN_ON(!is_ccs_modifier(fb->modifier) ||
-		(main_plane && main_plane >= fb->format->num_planes / 2));
+	drm_WARN_ON(fb->dev, !is_ccs_modifier(fb->modifier) ||
+		    (main_plane && main_plane >= fb->format->num_planes / 2));
 
 	return fb->format->num_planes / 2 + main_plane;
 }
 
 static int ccs_to_main_plane(const struct drm_framebuffer *fb, int ccs_plane)
 {
-	WARN_ON(!is_ccs_modifier(fb->modifier) ||
-		ccs_plane < fb->format->num_planes / 2);
+	drm_WARN_ON(fb->dev, !is_ccs_modifier(fb->modifier) ||
+		    ccs_plane < fb->format->num_planes / 2);
 
 	return ccs_plane - fb->format->num_planes / 2;
 }
@@ -2910,6 +2914,7 @@ intel_fb_plane_get_subsampling(int *hsub, int *vsub,
 static int
 intel_fb_check_ccs_xy(struct drm_framebuffer *fb, int ccs_plane, int x, int y)
 {
+	struct drm_i915_private *i915 = to_i915(fb->dev);
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
 	int main_plane;
 	int hsub, vsub;
@@ -2938,7 +2943,8 @@ intel_fb_check_ccs_xy(struct drm_framebuffer *fb, int ccs_plane, int x, int y)
 	 * x/y offsets must match between CCS and the main surface.
 	 */
 	if (main_x != ccs_x || main_y != ccs_y) {
-		DRM_DEBUG_KMS("Bad CCS x/y (main %d,%d ccs %d,%d) full (main %d,%d ccs %d,%d)\n",
+		drm_dbg_kms(&i915->drm,
+			      "Bad CCS x/y (main %d,%d ccs %d,%d) full (main %d,%d ccs %d,%d)\n",
 			      main_x, main_y,
 			      ccs_x, ccs_y,
 			      intel_fb->normal[main_plane].x,
@@ -2986,7 +2992,7 @@ setup_fb_rotation(int plane, const struct intel_remapped_plane_info *plane_info,
 	    fb->modifier != I915_FORMAT_MOD_Yf_TILED)
 		return 0;
 
-	if (WARN_ON(plane >= ARRAY_SIZE(rot_info->plane)))
+	if (drm_WARN_ON(fb->dev, plane >= ARRAY_SIZE(rot_info->plane)))
 		return 0;
 
 	rot_info->plane[plane] = *plane_info;
@@ -3336,6 +3342,8 @@ int skl_format_to_fourcc(int format, bool rgb_order, bool alpha)
 		return DRM_FORMAT_RGB565;
 	case PLANE_CTL_FORMAT_NV12:
 		return DRM_FORMAT_NV12;
+	case PLANE_CTL_FORMAT_XYUV:
+		return DRM_FORMAT_XYUV8888;
 	case PLANE_CTL_FORMAT_P010:
 		return DRM_FORMAT_P010;
 	case PLANE_CTL_FORMAT_P012:
@@ -4580,6 +4588,8 @@ static u32 skl_plane_ctl_format(u32 pixel_format)
 	case DRM_FORMAT_XRGB16161616F:
 	case DRM_FORMAT_ARGB16161616F:
 		return PLANE_CTL_FORMAT_XRGB_16161616F;
+	case DRM_FORMAT_XYUV8888:
+		return PLANE_CTL_FORMAT_XYUV;
 	case DRM_FORMAT_YUYV:
 		return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV;
 	case DRM_FORMAT_YVYU:
@@ -4998,37 +5008,6 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc)
 	intel_de_write(dev_priv, PIPE_CHICKEN(pipe), tmp);
 }
 
-static void icl_enable_trans_port_sync(const struct intel_crtc_state *crtc_state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	u32 trans_ddi_func_ctl2_val;
-	u8 master_select;
-
-	/*
-	 * Configure the master select and enable Transcoder Port Sync for
-	 * Slave CRTCs transcoder.
-	 */
-	if (crtc_state->master_transcoder == INVALID_TRANSCODER)
-		return;
-
-	if (crtc_state->master_transcoder == TRANSCODER_EDP)
-		master_select = 0;
-	else
-		master_select = crtc_state->master_transcoder + 1;
-
-	/* Set the master select bits for Tranascoder Port Sync */
-	trans_ddi_func_ctl2_val = (PORT_SYNC_MODE_MASTER_SELECT(master_select) &
-				   PORT_SYNC_MODE_MASTER_SELECT_MASK) <<
-		PORT_SYNC_MODE_MASTER_SELECT_SHIFT;
-	/* Enable Transcoder Port Sync */
-	trans_ddi_func_ctl2_val |= PORT_SYNC_MODE_ENABLE;
-
-	intel_de_write(dev_priv,
-		       TRANS_DDI_FUNC_CTL2(crtc_state->cpu_transcoder),
-		       trans_ddi_func_ctl2_val);
-}
-
 static void intel_fdi_normal_train(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -6110,30 +6089,26 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
 	return 0;
 }
 
-/**
- * skl_update_scaler_crtc - Stages update to scaler state for a given crtc.
- *
- * @state: crtc's scaler state
- *
- * Return
- *     0 - scaler_usage updated successfully
- *    error - requested scaling cannot be supported or other error condition
- */
-int skl_update_scaler_crtc(struct intel_crtc_state *state)
+static int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state)
 {
-	const struct drm_display_mode *adjusted_mode = &state->hw.adjusted_mode;
-	bool need_scaler = false;
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->hw.adjusted_mode;
+	int width, height;
 
-	if (state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ||
-	    state->pch_pfit.enabled)
-		need_scaler = true;
+	if (crtc_state->pch_pfit.enabled) {
+		width = drm_rect_width(&crtc_state->pch_pfit.dst);
+		height = drm_rect_height(&crtc_state->pch_pfit.dst);
+	} else {
+		width = adjusted_mode->crtc_hdisplay;
+		height = adjusted_mode->crtc_vdisplay;
+	}
 
-	return skl_update_scaler(state, !state->hw.active, SKL_CRTC_INDEX,
-				 &state->scaler_state.scaler_id,
-				 state->pipe_src_w, state->pipe_src_h,
-				 adjusted_mode->crtc_hdisplay,
-				 adjusted_mode->crtc_vdisplay, NULL, 0,
-				 need_scaler);
+	return skl_update_scaler(crtc_state, !crtc_state->hw.active,
+				 SKL_CRTC_INDEX,
+				 &crtc_state->scaler_state.scaler_id,
+				 crtc_state->pipe_src_w, crtc_state->pipe_src_h,
+				 width, height, NULL, 0,
+				 crtc_state->pch_pfit.enabled);
 }
 
 /**
@@ -6200,6 +6175,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 	case DRM_FORMAT_UYVY:
 	case DRM_FORMAT_VYUY:
 	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_XYUV8888:
 	case DRM_FORMAT_P010:
 	case DRM_FORMAT_P012:
 	case DRM_FORMAT_P016:
@@ -6241,70 +6217,80 @@ static void skl_pfit_enable(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	enum pipe pipe = crtc->pipe;
 	const struct intel_crtc_scaler_state *scaler_state =
 		&crtc_state->scaler_state;
+	struct drm_rect src = {
+		.x2 = crtc_state->pipe_src_w << 16,
+		.y2 = crtc_state->pipe_src_h << 16,
+	};
+	const struct drm_rect *dst = &crtc_state->pch_pfit.dst;
+	u16 uv_rgb_hphase, uv_rgb_vphase;
+	enum pipe pipe = crtc->pipe;
+	int width = drm_rect_width(dst);
+	int height = drm_rect_height(dst);
+	int x = dst->x1;
+	int y = dst->y1;
+	int hscale, vscale;
+	unsigned long irqflags;
+	int id;
 
-	if (crtc_state->pch_pfit.enabled) {
-		u16 uv_rgb_hphase, uv_rgb_vphase;
-		int pfit_w, pfit_h, hscale, vscale;
-		unsigned long irqflags;
-		int id;
-
-		if (drm_WARN_ON(&dev_priv->drm,
-				crtc_state->scaler_state.scaler_id < 0))
-			return;
+	if (!crtc_state->pch_pfit.enabled)
+		return;
 
-		pfit_w = (crtc_state->pch_pfit.size >> 16) & 0xFFFF;
-		pfit_h = crtc_state->pch_pfit.size & 0xFFFF;
+	if (drm_WARN_ON(&dev_priv->drm,
+			crtc_state->scaler_state.scaler_id < 0))
+		return;
 
-		hscale = (crtc_state->pipe_src_w << 16) / pfit_w;
-		vscale = (crtc_state->pipe_src_h << 16) / pfit_h;
+	hscale = drm_rect_calc_hscale(&src, dst, 0, INT_MAX);
+	vscale = drm_rect_calc_vscale(&src, dst, 0, INT_MAX);
 
-		uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false);
-		uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false);
+	uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false);
+	uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false);
 
-		id = scaler_state->scaler_id;
+	id = scaler_state->scaler_id;
 
-		spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
-		intel_de_write_fw(dev_priv, SKL_PS_CTRL(pipe, id), PS_SCALER_EN |
-				  PS_FILTER_MEDIUM | scaler_state->scalers[id].mode);
-		intel_de_write_fw(dev_priv, SKL_PS_VPHASE(pipe, id),
-				  PS_Y_PHASE(0) | PS_UV_RGB_PHASE(uv_rgb_vphase));
-		intel_de_write_fw(dev_priv, SKL_PS_HPHASE(pipe, id),
-				  PS_Y_PHASE(0) | PS_UV_RGB_PHASE(uv_rgb_hphase));
-		intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(pipe, id),
-				  crtc_state->pch_pfit.pos);
-		intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(pipe, id),
-				  crtc_state->pch_pfit.size);
+	intel_de_write_fw(dev_priv, SKL_PS_CTRL(pipe, id), PS_SCALER_EN |
+			  PS_FILTER_MEDIUM | scaler_state->scalers[id].mode);
+	intel_de_write_fw(dev_priv, SKL_PS_VPHASE(pipe, id),
+			  PS_Y_PHASE(0) | PS_UV_RGB_PHASE(uv_rgb_vphase));
+	intel_de_write_fw(dev_priv, SKL_PS_HPHASE(pipe, id),
+			  PS_Y_PHASE(0) | PS_UV_RGB_PHASE(uv_rgb_hphase));
+	intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(pipe, id),
+			  x << 16 | y);
+	intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(pipe, id),
+			  width << 16 | height);
 
-		spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
-	}
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
 static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct drm_rect *dst = &crtc_state->pch_pfit.dst;
 	enum pipe pipe = crtc->pipe;
+	int width = drm_rect_width(dst);
+	int height = drm_rect_height(dst);
+	int x = dst->x1;
+	int y = dst->y1;
 
-	if (crtc_state->pch_pfit.enabled) {
-		/* Force use of hard-coded filter coefficients
-		 * as some pre-programmed values are broken,
-		 * e.g. x201.
-		 */
-		if (IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv))
-			intel_de_write(dev_priv, PF_CTL(pipe),
-				       PF_ENABLE | PF_FILTER_MED_3x3 | PF_PIPE_SEL_IVB(pipe));
-		else
-			intel_de_write(dev_priv, PF_CTL(pipe),
-				       PF_ENABLE | PF_FILTER_MED_3x3);
-		intel_de_write(dev_priv, PF_WIN_POS(pipe),
-			       crtc_state->pch_pfit.pos);
-		intel_de_write(dev_priv, PF_WIN_SZ(pipe),
-			       crtc_state->pch_pfit.size);
-	}
+	if (!crtc_state->pch_pfit.enabled)
+		return;
+
+	/* Force use of hard-coded filter coefficients
+	 * as some pre-programmed values are broken,
+	 * e.g. x201.
+	 */
+	if (IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv))
+		intel_de_write(dev_priv, PF_CTL(pipe), PF_ENABLE |
+			       PF_FILTER_MED_3x3 | PF_PIPE_SEL_IVB(pipe));
+	else
+		intel_de_write(dev_priv, PF_CTL(pipe), PF_ENABLE |
+			       PF_FILTER_MED_3x3);
+	intel_de_write(dev_priv, PF_WIN_POS(pipe), x << 16 | y);
+	intel_de_write(dev_priv, PF_WIN_SZ(pipe), width << 16 | height);
 }
 
 void hsw_enable_ips(const struct intel_crtc_state *crtc_state)
@@ -6463,8 +6449,8 @@ static bool needs_scalerclk_wa(const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
-	/* Wa_2006604312:icl */
-	if (crtc_state->scaler_state.scaler_users > 0 && IS_ICELAKE(dev_priv))
+	/* Wa_2006604312:icl,ehl */
+	if (crtc_state->scaler_state.scaler_users > 0 && IS_GEN(dev_priv, 11))
 		return true;
 
 	return false;
@@ -6534,7 +6520,7 @@ static void intel_pre_plane_update(struct intel_atomic_state *state,
 	    needs_nv12_wa(new_crtc_state))
 		skl_wa_827(dev_priv, pipe, true);
 
-	/* Wa_2006604312:icl */
+	/* Wa_2006604312:icl,ehl */
 	if (!needs_scalerclk_wa(old_crtc_state) &&
 	    needs_scalerclk_wa(new_crtc_state))
 		icl_wa_scalerclkgating(dev_priv, pipe, true);
@@ -6646,7 +6632,7 @@ intel_connector_primary_encoder(struct intel_connector *connector)
 		return &dp_to_dig_port(connector->mst_port)->base;
 
 	encoder = intel_attached_encoder(connector);
-	WARN_ON(!encoder);
+	drm_WARN_ON(connector->base.dev, !encoder);
 
 	return encoder;
 }
@@ -6720,7 +6706,8 @@ static void intel_encoders_pre_pll_enable(struct intel_atomic_state *state,
 			continue;
 
 		if (encoder->pre_pll_enable)
-			encoder->pre_pll_enable(encoder, crtc_state, conn_state);
+			encoder->pre_pll_enable(state, encoder,
+						crtc_state, conn_state);
 	}
 }
 
@@ -6741,7 +6728,8 @@ static void intel_encoders_pre_enable(struct intel_atomic_state *state,
 			continue;
 
 		if (encoder->pre_enable)
-			encoder->pre_enable(encoder, crtc_state, conn_state);
+			encoder->pre_enable(state, encoder,
+					    crtc_state, conn_state);
 	}
 }
 
@@ -6762,7 +6750,8 @@ static void intel_encoders_enable(struct intel_atomic_state *state,
 			continue;
 
 		if (encoder->enable)
-			encoder->enable(encoder, crtc_state, conn_state);
+			encoder->enable(state, encoder,
+					crtc_state, conn_state);
 		intel_opregion_notify_encoder(encoder, true);
 	}
 }
@@ -6785,7 +6774,8 @@ static void intel_encoders_disable(struct intel_atomic_state *state,
 
 		intel_opregion_notify_encoder(encoder, false);
 		if (encoder->disable)
-			encoder->disable(encoder, old_crtc_state, old_conn_state);
+			encoder->disable(state, encoder,
+					 old_crtc_state, old_conn_state);
 	}
 }
 
@@ -6806,7 +6796,8 @@ static void intel_encoders_post_disable(struct intel_atomic_state *state,
 			continue;
 
 		if (encoder->post_disable)
-			encoder->post_disable(encoder, old_crtc_state, old_conn_state);
+			encoder->post_disable(state, encoder,
+					      old_crtc_state, old_conn_state);
 	}
 }
 
@@ -6827,7 +6818,8 @@ static void intel_encoders_post_pll_disable(struct intel_atomic_state *state,
 			continue;
 
 		if (encoder->post_pll_disable)
-			encoder->post_pll_disable(encoder, old_crtc_state, old_conn_state);
+			encoder->post_pll_disable(state, encoder,
+						  old_crtc_state, old_conn_state);
 	}
 }
 
@@ -6848,7 +6840,8 @@ static void intel_encoders_update_pipe(struct intel_atomic_state *state,
 			continue;
 
 		if (encoder->update_pipe)
-			encoder->update_pipe(encoder, crtc_state, conn_state);
+			encoder->update_pipe(state, encoder,
+					     crtc_state, conn_state);
 	}
 }
 
@@ -7037,9 +7030,6 @@ static void hsw_crtc_enable(struct intel_atomic_state *state,
 	if (!transcoder_is_dsi(cpu_transcoder))
 		intel_set_pipe_timings(new_crtc_state);
 
-	if (INTEL_GEN(dev_priv) >= 11)
-		icl_enable_trans_port_sync(new_crtc_state);
-
 	intel_set_pipe_src_size(new_crtc_state);
 
 	if (cpu_transcoder != TRANSCODER_EDP &&
@@ -7087,9 +7077,6 @@ static void hsw_crtc_enable(struct intel_atomic_state *state,
 	if (INTEL_GEN(dev_priv) >= 11)
 		icl_set_pipe_chicken(crtc);
 
-	if (!transcoder_is_dsi(cpu_transcoder))
-		intel_ddi_enable_transcoder_func(new_crtc_state);
-
 	if (dev_priv->display.initial_watermarks)
 		dev_priv->display.initial_watermarks(state, crtc);
 
@@ -7120,11 +7107,12 @@ void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state)
 
 	/* To avoid upsetting the power well on haswell only disable the pfit if
 	 * it's in use. The hw state code will make sure we get this right. */
-	if (old_crtc_state->pch_pfit.enabled) {
-		intel_de_write(dev_priv, PF_CTL(pipe), 0);
-		intel_de_write(dev_priv, PF_WIN_POS(pipe), 0);
-		intel_de_write(dev_priv, PF_WIN_SZ(pipe), 0);
-	}
+	if (!old_crtc_state->pch_pfit.enabled)
+		return;
+
+	intel_de_write(dev_priv, PF_CTL(pipe), 0);
+	intel_de_write(dev_priv, PF_WIN_POS(pipe), 0);
+	intel_de_write(dev_priv, PF_WIN_SZ(pipe), 0);
 }
 
 static void ilk_crtc_disable(struct intel_atomic_state *state,
@@ -7312,7 +7300,17 @@ intel_aux_power_domain(struct intel_digital_port *dig_port)
 		}
 	}
 
-	switch (dig_port->aux_ch) {
+	return intel_legacy_aux_to_power_domain(dig_port->aux_ch);
+}
+
+/*
+ * Converts aux_ch to power_domain without caring about TBT ports for that use
+ * intel_aux_power_domain()
+ */
+enum intel_display_power_domain
+intel_legacy_aux_to_power_domain(enum aux_ch aux_ch)
+{
+	switch (aux_ch) {
 	case AUX_CH_A:
 		return POWER_DOMAIN_AUX_A;
 	case AUX_CH_B:
@@ -7328,7 +7326,7 @@ intel_aux_power_domain(struct intel_digital_port *dig_port)
 	case AUX_CH_G:
 		return POWER_DOMAIN_AUX_G;
 	default:
-		MISSING_CASE(dig_port->aux_ch);
+		MISSING_CASE(aux_ch);
 		return POWER_DOMAIN_AUX_A;
 	}
 }
@@ -7942,39 +7940,36 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc)
 		(crtc->pipe == PIPE_A || IS_I915G(dev_priv));
 }
 
-static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
+static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *crtc_state)
 {
-	u32 pixel_rate;
-
-	pixel_rate = pipe_config->hw.adjusted_mode.crtc_clock;
+	u32 pixel_rate = crtc_state->hw.adjusted_mode.crtc_clock;
+	unsigned int pipe_w, pipe_h, pfit_w, pfit_h;
 
 	/*
 	 * We only use IF-ID interlacing. If we ever use
 	 * PF-ID we'll need to adjust the pixel_rate here.
 	 */
 
-	if (pipe_config->pch_pfit.enabled) {
-		u64 pipe_w, pipe_h, pfit_w, pfit_h;
-		u32 pfit_size = pipe_config->pch_pfit.size;
+	if (!crtc_state->pch_pfit.enabled)
+		return pixel_rate;
 
-		pipe_w = pipe_config->pipe_src_w;
-		pipe_h = pipe_config->pipe_src_h;
+	pipe_w = crtc_state->pipe_src_w;
+	pipe_h = crtc_state->pipe_src_h;
 
-		pfit_w = (pfit_size >> 16) & 0xFFFF;
-		pfit_h = pfit_size & 0xFFFF;
-		if (pipe_w < pfit_w)
-			pipe_w = pfit_w;
-		if (pipe_h < pfit_h)
-			pipe_h = pfit_h;
+	pfit_w = drm_rect_width(&crtc_state->pch_pfit.dst);
+	pfit_h = drm_rect_height(&crtc_state->pch_pfit.dst);
 
-		if (WARN_ON(!pfit_w || !pfit_h))
-			return pixel_rate;
+	if (pipe_w < pfit_w)
+		pipe_w = pfit_w;
+	if (pipe_h < pfit_h)
+		pipe_h = pfit_h;
 
-		pixel_rate = div_u64(mul_u32_u32(pixel_rate, pipe_w * pipe_h),
-				     pfit_w * pfit_h);
-	}
+	if (drm_WARN_ON(crtc_state->uapi.crtc->dev,
+			!pfit_w || !pfit_h))
+		return pixel_rate;
 
-	return pixel_rate;
+	return div_u64(mul_u32_u32(pixel_rate, pipe_w * pipe_h),
+		       pfit_w * pfit_h);
 }
 
 static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state)
@@ -8143,7 +8138,7 @@ static void intel_panel_sanitize_ssc(struct drm_i915_private *dev_priv)
 	}
 }
 
-static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
+static bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
 {
 	if (i915_modparams.panel_use_ssc >= 0)
 		return i915_modparams.panel_use_ssc != 0;
@@ -8891,7 +8886,6 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 
 	mode->clock = pipe_config->hw.adjusted_mode.crtc_clock;
 
-	mode->hsync = drm_mode_hsync(mode);
 	mode->vrefresh = drm_mode_vrefresh(mode);
 	drm_mode_set_name(mode);
 }
@@ -9168,9 +9162,9 @@ static bool i9xx_has_pfit(struct drm_i915_private *dev_priv)
 		IS_PINEVIEW(dev_priv) || IS_MOBILE(dev_priv);
 }
 
-static void i9xx_get_pfit_config(struct intel_crtc *crtc,
-				 struct intel_crtc_state *pipe_config)
+static void i9xx_get_pfit_config(struct intel_crtc_state *crtc_state)
 {
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	u32 tmp;
 
@@ -9190,9 +9184,9 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc,
 			return;
 	}
 
-	pipe_config->gmch_pfit.control = tmp;
-	pipe_config->gmch_pfit.pgm_ratios = intel_de_read(dev_priv,
-							  PFIT_PGM_RATIOS);
+	crtc_state->gmch_pfit.control = tmp;
+	crtc_state->gmch_pfit.pgm_ratios =
+		intel_de_read(dev_priv, PFIT_PGM_RATIOS);
 }
 
 static void vlv_crtc_clock_get(struct intel_crtc *crtc,
@@ -9398,7 +9392,6 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = NULL;
-	pipe_config->master_transcoder = INVALID_TRANSCODER;
 
 	ret = false;
 
@@ -9443,7 +9436,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 	intel_get_pipe_timings(crtc, pipe_config);
 	intel_get_pipe_src_size(crtc, pipe_config);
 
-	i9xx_get_pfit_config(crtc, pipe_config);
+	i9xx_get_pfit_config(pipe_config);
 
 	if (INTEL_GEN(dev_priv) >= 4) {
 		/* No way to read it out on pipes B and C */
@@ -10413,37 +10406,47 @@ static void ilk_get_fdi_m_n_config(struct intel_crtc *crtc,
 				     &pipe_config->fdi_m_n, NULL);
 }
 
-static void skl_get_pfit_config(struct intel_crtc *crtc,
-				struct intel_crtc_state *pipe_config)
+static void ilk_get_pfit_pos_size(struct intel_crtc_state *crtc_state,
+				  u32 pos, u32 size)
 {
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc_scaler_state *scaler_state = &pipe_config->scaler_state;
-	u32 ps_ctrl = 0;
+	drm_rect_init(&crtc_state->pch_pfit.dst,
+		      pos >> 16, pos & 0xffff,
+		      size >> 16, size & 0xffff);
+}
+
+static void skl_get_pfit_config(struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state;
 	int id = -1;
 	int i;
 
 	/* find scaler attached to this pipe */
 	for (i = 0; i < crtc->num_scalers; i++) {
-		ps_ctrl = intel_de_read(dev_priv, SKL_PS_CTRL(crtc->pipe, i));
-		if (ps_ctrl & PS_SCALER_EN && !(ps_ctrl & PS_PLANE_SEL_MASK)) {
-			id = i;
-			pipe_config->pch_pfit.enabled = true;
-			pipe_config->pch_pfit.pos = intel_de_read(dev_priv,
-								  SKL_PS_WIN_POS(crtc->pipe, i));
-			pipe_config->pch_pfit.size = intel_de_read(dev_priv,
-								   SKL_PS_WIN_SZ(crtc->pipe, i));
-			scaler_state->scalers[i].in_use = true;
-			break;
-		}
+		u32 ctl, pos, size;
+
+		ctl = intel_de_read(dev_priv, SKL_PS_CTRL(crtc->pipe, i));
+		if ((ctl & (PS_SCALER_EN | PS_PLANE_SEL_MASK)) != PS_SCALER_EN)
+			continue;
+
+		id = i;
+		crtc_state->pch_pfit.enabled = true;
+
+		pos = intel_de_read(dev_priv, SKL_PS_WIN_POS(crtc->pipe, i));
+		size = intel_de_read(dev_priv, SKL_PS_WIN_SZ(crtc->pipe, i));
+
+		ilk_get_pfit_pos_size(crtc_state, pos, size);
+
+		scaler_state->scalers[i].in_use = true;
+		break;
 	}
 
 	scaler_state->scaler_id = id;
-	if (id >= 0) {
+	if (id >= 0)
 		scaler_state->scaler_users |= (1 << SKL_CRTC_INDEX);
-	} else {
+	else
 		scaler_state->scaler_users &= ~(1 << SKL_CRTC_INDEX);
-	}
 }
 
 static void
@@ -10579,30 +10582,30 @@ error:
 	kfree(intel_fb);
 }
 
-static void ilk_get_pfit_config(struct intel_crtc *crtc,
-				struct intel_crtc_state *pipe_config)
+static void ilk_get_pfit_config(struct intel_crtc_state *crtc_state)
 {
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	u32 tmp;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	u32 ctl, pos, size;
 
-	tmp = intel_de_read(dev_priv, PF_CTL(crtc->pipe));
-
-	if (tmp & PF_ENABLE) {
-		pipe_config->pch_pfit.enabled = true;
-		pipe_config->pch_pfit.pos = intel_de_read(dev_priv,
-							  PF_WIN_POS(crtc->pipe));
-		pipe_config->pch_pfit.size = intel_de_read(dev_priv,
-							   PF_WIN_SZ(crtc->pipe));
-
-		/* We currently do not free assignements of panel fitters on
-		 * ivb/hsw (since we don't use the higher upscaling modes which
-		 * differentiates them) so just WARN about this case for now. */
-		if (IS_GEN(dev_priv, 7)) {
-			drm_WARN_ON(dev, (tmp & PF_PIPE_SEL_MASK_IVB) !=
-				    PF_PIPE_SEL_IVB(crtc->pipe));
-		}
-	}
+	ctl = intel_de_read(dev_priv, PF_CTL(crtc->pipe));
+	if ((ctl & PF_ENABLE) == 0)
+		return;
+
+	crtc_state->pch_pfit.enabled = true;
+
+	pos = intel_de_read(dev_priv, PF_WIN_POS(crtc->pipe));
+	size = intel_de_read(dev_priv, PF_WIN_SZ(crtc->pipe));
+
+	ilk_get_pfit_pos_size(crtc_state, pos, size);
+
+	/*
+	 * We currently do not free assignements of panel fitters on
+	 * ivb/hsw (since we don't use the higher upscaling modes which
+	 * differentiates them) so just WARN about this case for now.
+	 */
+	drm_WARN_ON(&dev_priv->drm, IS_GEN(dev_priv, 7) &&
+		    (ctl & PF_PIPE_SEL_MASK_IVB) != PF_PIPE_SEL_IVB(crtc->pipe));
 }
 
 static bool ilk_get_pipe_config(struct intel_crtc *crtc,
@@ -10622,7 +10625,6 @@ static bool ilk_get_pipe_config(struct intel_crtc *crtc,
 
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = NULL;
-	pipe_config->master_transcoder = INVALID_TRANSCODER;
 
 	ret = false;
 	tmp = intel_de_read(dev_priv, PIPECONF(crtc->pipe));
@@ -10714,7 +10716,7 @@ static bool ilk_get_pipe_config(struct intel_crtc *crtc,
 	intel_get_pipe_timings(crtc, pipe_config);
 	intel_get_pipe_src_size(crtc, pipe_config);
 
-	ilk_get_pfit_config(crtc, pipe_config);
+	ilk_get_pfit_config(pipe_config);
 
 	ret = true;
 
@@ -10891,7 +10893,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc,
 		panel_transcoder_mask |=
 			BIT(TRANSCODER_DSI_0) | BIT(TRANSCODER_DSI_1);
 
-	if (HAS_TRANSCODER_EDP(dev_priv))
+	if (HAS_TRANSCODER(dev_priv, TRANSCODER_EDP))
 		panel_transcoder_mask |= BIT(TRANSCODER_EDP);
 
 	/*
@@ -11085,61 +11087,6 @@ static void hsw_get_ddi_port_state(struct intel_crtc *crtc,
 	}
 }
 
-static enum transcoder transcoder_master_readout(struct drm_i915_private *dev_priv,
-						 enum transcoder cpu_transcoder)
-{
-	u32 trans_port_sync, master_select;
-
-	trans_port_sync = intel_de_read(dev_priv,
-				        TRANS_DDI_FUNC_CTL2(cpu_transcoder));
-
-	if ((trans_port_sync & PORT_SYNC_MODE_ENABLE) == 0)
-		return INVALID_TRANSCODER;
-
-	master_select = trans_port_sync &
-			PORT_SYNC_MODE_MASTER_SELECT_MASK;
-	if (master_select == 0)
-		return TRANSCODER_EDP;
-	else
-		return master_select - 1;
-}
-
-static void icl_get_trans_port_sync_config(struct intel_crtc_state *crtc_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
-	u32 transcoders;
-	enum transcoder cpu_transcoder;
-
-	crtc_state->master_transcoder = transcoder_master_readout(dev_priv,
-								  crtc_state->cpu_transcoder);
-
-	transcoders = BIT(TRANSCODER_A) |
-		BIT(TRANSCODER_B) |
-		BIT(TRANSCODER_C) |
-		BIT(TRANSCODER_D);
-	for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) {
-		enum intel_display_power_domain power_domain;
-		intel_wakeref_t trans_wakeref;
-
-		power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
-		trans_wakeref = intel_display_power_get_if_enabled(dev_priv,
-								   power_domain);
-
-		if (!trans_wakeref)
-			continue;
-
-		if (transcoder_master_readout(dev_priv, cpu_transcoder) ==
-		    crtc_state->cpu_transcoder)
-			crtc_state->sync_mode_slaves_mask |= BIT(cpu_transcoder);
-
-		intel_display_power_put(dev_priv, power_domain, trans_wakeref);
-	}
-
-	drm_WARN_ON(&dev_priv->drm,
-		    crtc_state->master_transcoder != INVALID_TRANSCODER &&
-		    crtc_state->sync_mode_slaves_mask);
-}
-
 static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 				struct intel_crtc_state *pipe_config)
 {
@@ -11243,9 +11190,9 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 		power_domain_mask |= BIT_ULL(power_domain);
 
 		if (INTEL_GEN(dev_priv) >= 9)
-			skl_get_pfit_config(crtc, pipe_config);
+			skl_get_pfit_config(pipe_config);
 		else
-			ilk_get_pfit_config(crtc, pipe_config);
+			ilk_get_pfit_config(pipe_config);
 	}
 
 	if (hsw_crtc_supports_ips(crtc)) {
@@ -11271,10 +11218,6 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 		pipe_config->pixel_multiplier = 1;
 	}
 
-	if (INTEL_GEN(dev_priv) >= 11 &&
-	    !transcoder_is_dsi(pipe_config->cpu_transcoder))
-		icl_get_trans_port_sync_config(pipe_config);
-
 out:
 	for_each_power_domain(power_domain, power_domain_mask)
 		intel_display_power_put(dev_priv,
@@ -12377,10 +12320,8 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
 	 * only combine the results from all planes in the current place?
 	 */
 	if (!is_crtc_enabled) {
-		plane_state->uapi.visible = visible = false;
-		crtc_state->active_planes &= ~BIT(plane->id);
-		crtc_state->data_rate[plane->id] = 0;
-		crtc_state->min_cdclk[plane->id] = 0;
+		intel_plane_set_invisible(crtc_state, plane_state);
+		visible = false;
 	}
 
 	if (!was_visible && !visible)
@@ -12510,8 +12451,10 @@ static int icl_add_linked_planes(struct intel_atomic_state *state)
 		if (IS_ERR(linked_plane_state))
 			return PTR_ERR(linked_plane_state);
 
-		WARN_ON(linked_plane_state->planar_linked_plane != plane);
-		WARN_ON(linked_plane_state->planar_slave == plane_state->planar_slave);
+		drm_WARN_ON(state->base.dev,
+			    linked_plane_state->planar_linked_plane != plane);
+		drm_WARN_ON(state->base.dev,
+			    linked_plane_state->planar_slave == plane_state->planar_slave);
 	}
 
 	return 0;
@@ -12886,19 +12829,20 @@ compute_baseline_pipe_bpp(struct intel_crtc *crtc,
 	return 0;
 }
 
-static void intel_dump_crtc_timings(const struct drm_display_mode *mode)
+static void intel_dump_crtc_timings(struct drm_i915_private *i915,
+				    const struct drm_display_mode *mode)
 {
-	DRM_DEBUG_KMS("crtc timings: %d %d %d %d %d %d %d %d %d, "
-		      "type: 0x%x flags: 0x%x\n",
-		      mode->crtc_clock,
-		      mode->crtc_hdisplay, mode->crtc_hsync_start,
-		      mode->crtc_hsync_end, mode->crtc_htotal,
-		      mode->crtc_vdisplay, mode->crtc_vsync_start,
-		      mode->crtc_vsync_end, mode->crtc_vtotal,
-		      mode->type, mode->flags);
+	drm_dbg_kms(&i915->drm, "crtc timings: %d %d %d %d %d %d %d %d %d, "
+		    "type: 0x%x flags: 0x%x\n",
+		    mode->crtc_clock,
+		    mode->crtc_hdisplay, mode->crtc_hsync_start,
+		    mode->crtc_hsync_end, mode->crtc_htotal,
+		    mode->crtc_vdisplay, mode->crtc_vsync_start,
+		    mode->crtc_vsync_end, mode->crtc_vtotal,
+		    mode->type, mode->flags);
 }
 
-static inline void
+static void
 intel_dump_m_n_config(const struct intel_crtc_state *pipe_config,
 		      const char *id, unsigned int lane_count,
 		      const struct intel_link_m_n *m_n)
@@ -12922,6 +12866,16 @@ intel_dump_infoframe(struct drm_i915_private *dev_priv,
 	hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, frame);
 }
 
+static void
+intel_dump_dp_vsc_sdp(struct drm_i915_private *dev_priv,
+		      const struct drm_dp_vsc_sdp *vsc)
+{
+	if (!drm_debug_enabled(DRM_UT_KMS))
+		return;
+
+	drm_dp_vsc_sdp_log(KERN_DEBUG, dev_priv->drm.dev, vsc);
+}
+
 #define OUTPUT_TYPE(x) [INTEL_OUTPUT_ ## x] = #x
 
 static const char * const output_type_str[] = {
@@ -13042,6 +12996,11 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config,
 		    transcoder_name(pipe_config->cpu_transcoder),
 		    pipe_config->pipe_bpp, pipe_config->dither);
 
+	drm_dbg_kms(&dev_priv->drm,
+		    "port sync: master transcoder: %s, slave transcoder bitmask = 0x%x\n",
+		    transcoder_name(pipe_config->master_transcoder),
+		    pipe_config->sync_mode_slaves_mask);
+
 	if (pipe_config->has_pch_encoder)
 		intel_dump_m_n_config(pipe_config, "fdi",
 				      pipe_config->fdi_lanes,
@@ -13074,12 +13033,21 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config,
 	if (pipe_config->infoframes.enable &
 	    intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_VENDOR))
 		intel_dump_infoframe(dev_priv, &pipe_config->infoframes.hdmi);
+	if (pipe_config->infoframes.enable &
+	    intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_DRM))
+		intel_dump_infoframe(dev_priv, &pipe_config->infoframes.drm);
+	if (pipe_config->infoframes.enable &
+	    intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA))
+		intel_dump_infoframe(dev_priv, &pipe_config->infoframes.drm);
+	if (pipe_config->infoframes.enable &
+	    intel_hdmi_infoframe_enable(DP_SDP_VSC))
+		intel_dump_dp_vsc_sdp(dev_priv, &pipe_config->infoframes.vsc);
 
 	drm_dbg_kms(&dev_priv->drm, "requested mode:\n");
 	drm_mode_debug_printmodeline(&pipe_config->hw.mode);
 	drm_dbg_kms(&dev_priv->drm, "adjusted mode:\n");
 	drm_mode_debug_printmodeline(&pipe_config->hw.adjusted_mode);
-	intel_dump_crtc_timings(&pipe_config->hw.adjusted_mode);
+	intel_dump_crtc_timings(dev_priv, &pipe_config->hw.adjusted_mode);
 	drm_dbg_kms(&dev_priv->drm,
 		    "port clock: %d, pipe src size: %dx%d, pixel rate %d\n",
 		    pipe_config->port_clock,
@@ -13104,9 +13072,8 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config,
 			    pipe_config->gmch_pfit.lvds_border_bits);
 	else
 		drm_dbg_kms(&dev_priv->drm,
-			    "pch pfit: pos: 0x%08x, size: 0x%08x, %s, force thru: %s\n",
-			    pipe_config->pch_pfit.pos,
-			    pipe_config->pch_pfit.size,
+			    "pch pfit: " DRM_RECT_FMT ", %s, force thru: %s\n",
+			    DRM_RECT_ARG(&pipe_config->pch_pfit.dst),
 			    enableddisabled(pipe_config->pch_pfit.enabled),
 			    yesno(pipe_config->pch_pfit.force_thru));
 
@@ -13228,7 +13195,8 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state
 {
 	crtc_state->uapi.enable = crtc_state->hw.enable;
 	crtc_state->uapi.active = crtc_state->hw.active;
-	WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->uapi, &crtc_state->hw.mode) < 0);
+	drm_WARN_ON(crtc_state->uapi.crtc->dev,
+		    drm_atomic_set_mode_for_crtc(&crtc_state->uapi, &crtc_state->hw.mode) < 0);
 
 	crtc_state->uapi.adjusted_mode = crtc_state->hw.adjusted_mode;
 
@@ -13521,6 +13489,13 @@ intel_compare_infoframe(const union hdmi_infoframe *a,
 	return memcmp(a, b, sizeof(*a)) == 0;
 }
 
+static bool
+intel_compare_dp_vsc_sdp(const struct drm_dp_vsc_sdp *a,
+			 const struct drm_dp_vsc_sdp *b)
+{
+	return memcmp(a, b, sizeof(*a)) == 0;
+}
+
 static void
 pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv,
 			       bool fastset, const char *name,
@@ -13546,6 +13521,31 @@ pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void
+pipe_config_dp_vsc_sdp_mismatch(struct drm_i915_private *dev_priv,
+				bool fastset, const char *name,
+				const struct drm_dp_vsc_sdp *a,
+				const struct drm_dp_vsc_sdp *b)
+{
+	if (fastset) {
+		if (!drm_debug_enabled(DRM_UT_KMS))
+			return;
+
+		drm_dbg_kms(&dev_priv->drm,
+			    "fastset mismatch in %s dp sdp\n", name);
+		drm_dbg_kms(&dev_priv->drm, "expected:\n");
+		drm_dp_vsc_sdp_log(KERN_DEBUG, dev_priv->drm.dev, a);
+		drm_dbg_kms(&dev_priv->drm, "found:\n");
+		drm_dp_vsc_sdp_log(KERN_DEBUG, dev_priv->drm.dev, b);
+	} else {
+		drm_err(&dev_priv->drm, "mismatch in %s dp sdp\n", name);
+		drm_err(&dev_priv->drm, "expected:\n");
+		drm_dp_vsc_sdp_log(KERN_ERR, dev_priv->drm.dev, a);
+		drm_err(&dev_priv->drm, "found:\n");
+		drm_dp_vsc_sdp_log(KERN_ERR, dev_priv->drm.dev, b);
+	}
+}
+
 static void __printf(4, 5)
 pipe_config_mismatch(bool fastset, const struct intel_crtc *crtc,
 		     const char *name, const char *format, ...)
@@ -13747,6 +13747,17 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	} \
 } while (0)
 
+#define PIPE_CONF_CHECK_DP_VSC_SDP(name) do { \
+	if (!current_config->has_psr && !pipe_config->has_psr && \
+	    !intel_compare_dp_vsc_sdp(&current_config->infoframes.name, \
+				      &pipe_config->infoframes.name)) { \
+		pipe_config_dp_vsc_sdp_mismatch(dev_priv, fastset, __stringify(name), \
+						&current_config->infoframes.name, \
+						&pipe_config->infoframes.name); \
+		ret = false; \
+	} \
+} while (0)
+
 #define PIPE_CONF_CHECK_COLOR_LUT(name1, name2, bit_precision) do { \
 	if (current_config->name1 != pipe_config->name1) { \
 		pipe_config_mismatch(fastset, crtc, __stringify(name1), \
@@ -13847,8 +13858,10 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 		PIPE_CONF_CHECK_BOOL(pch_pfit.enabled);
 		if (current_config->pch_pfit.enabled) {
-			PIPE_CONF_CHECK_X(pch_pfit.pos);
-			PIPE_CONF_CHECK_X(pch_pfit.size);
+			PIPE_CONF_CHECK_I(pch_pfit.dst.x1);
+			PIPE_CONF_CHECK_I(pch_pfit.dst.y1);
+			PIPE_CONF_CHECK_I(pch_pfit.dst.x2);
+			PIPE_CONF_CHECK_I(pch_pfit.dst.y2);
 		}
 
 		PIPE_CONF_CHECK_I(scaler_state.scaler_id);
@@ -13922,6 +13935,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	PIPE_CONF_CHECK_INFOFRAME(spd);
 	PIPE_CONF_CHECK_INFOFRAME(hdmi);
 	PIPE_CONF_CHECK_INFOFRAME(drm);
+	PIPE_CONF_CHECK_DP_VSC_SDP(vsc);
 
 	PIPE_CONF_CHECK_X(sync_mode_slaves_mask);
 	PIPE_CONF_CHECK_I(master_transcoder);
@@ -14010,7 +14024,9 @@ static void verify_wm_state(struct intel_crtc *crtc,
 		/* Watermarks */
 		for (level = 0; level <= max_level; level++) {
 			if (skl_wm_level_equals(&hw_plane_wm->wm[level],
-						&sw_plane_wm->wm[level]))
+						&sw_plane_wm->wm[level]) ||
+			    (level == 0 && skl_wm_level_equals(&hw_plane_wm->wm[level],
+							       &sw_plane_wm->sagv_wm0)))
 				continue;
 
 			drm_err(&dev_priv->drm,
@@ -14065,7 +14081,9 @@ static void verify_wm_state(struct intel_crtc *crtc,
 		/* Watermarks */
 		for (level = 0; level <= max_level; level++) {
 			if (skl_wm_level_equals(&hw_plane_wm->wm[level],
-						&sw_plane_wm->wm[level]))
+						&sw_plane_wm->wm[level]) ||
+			    (level == 0 && skl_wm_level_equals(&hw_plane_wm->wm[level],
+							       &sw_plane_wm->sagv_wm0)))
 				continue;
 
 			drm_err(&dev_priv->drm,
@@ -14999,11 +15017,13 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state,
 }
 
 static void commit_pipe_config(struct intel_atomic_state *state,
-			       struct intel_crtc_state *old_crtc_state,
-			       struct intel_crtc_state *new_crtc_state)
+			       struct intel_crtc *crtc)
 {
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 	bool modeset = needs_modeset(new_crtc_state);
 
 	/*
@@ -15029,22 +15049,35 @@ static void commit_pipe_config(struct intel_atomic_state *state,
 		dev_priv->display.atomic_update_watermarks(state, crtc);
 }
 
-static void intel_update_crtc(struct intel_crtc *crtc,
-			      struct intel_atomic_state *state,
-			      struct intel_crtc_state *old_crtc_state,
-			      struct intel_crtc_state *new_crtc_state)
+static void intel_enable_crtc(struct intel_atomic_state *state,
+			      struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	bool modeset = needs_modeset(new_crtc_state);
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 
-	if (modeset) {
-		intel_crtc_update_active_timings(new_crtc_state);
+	if (!needs_modeset(new_crtc_state))
+		return;
 
-		dev_priv->display.crtc_enable(state, crtc);
+	intel_crtc_update_active_timings(new_crtc_state);
 
-		/* vblanks work again, re-enable pipe CRC. */
-		intel_crtc_enable_pipe_crc(crtc);
-	} else {
+	dev_priv->display.crtc_enable(state, crtc);
+
+	/* vblanks work again, re-enable pipe CRC. */
+	intel_crtc_enable_pipe_crc(crtc);
+}
+
+static void intel_update_crtc(struct intel_atomic_state *state,
+			      struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	bool modeset = needs_modeset(new_crtc_state);
+
+	if (!modeset) {
 		if (new_crtc_state->preload_luts &&
 		    (new_crtc_state->uapi.color_mgmt_changed ||
 		     new_crtc_state->update_pipe))
@@ -15064,7 +15097,7 @@ static void intel_update_crtc(struct intel_crtc *crtc,
 	/* Perform vblank evasion around commit operation */
 	intel_pipe_update_start(new_crtc_state);
 
-	commit_pipe_config(state, old_crtc_state, new_crtc_state);
+	commit_pipe_config(state, crtc);
 
 	if (INTEL_GEN(dev_priv) >= 9)
 		skl_update_planes_on_crtc(state, crtc);
@@ -15084,18 +15117,6 @@ static void intel_update_crtc(struct intel_crtc *crtc,
 		intel_crtc_arm_fifo_underrun(crtc, new_crtc_state);
 }
 
-static struct intel_crtc *intel_get_slave_crtc(const struct intel_crtc_state *new_crtc_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(new_crtc_state->uapi.crtc->dev);
-	enum transcoder slave_transcoder;
-
-	drm_WARN_ON(&dev_priv->drm,
-		    !is_power_of_2(new_crtc_state->sync_mode_slaves_mask));
-
-	slave_transcoder = ffs(new_crtc_state->sync_mode_slaves_mask) - 1;
-	return intel_get_crtc_for_pipe(dev_priv,
-				       (enum pipe)slave_transcoder);
-}
 
 static void intel_old_crtc_state_disables(struct intel_atomic_state *state,
 					  struct intel_crtc_state *old_crtc_state,
@@ -15171,129 +15192,19 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state)
 
 static void intel_commit_modeset_enables(struct intel_atomic_state *state)
 {
+	struct intel_crtc_state *new_crtc_state;
 	struct intel_crtc *crtc;
-	struct intel_crtc_state *old_crtc_state, *new_crtc_state;
 	int i;
 
-	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
 		if (!new_crtc_state->hw.active)
 			continue;
 
-		intel_update_crtc(crtc, state, old_crtc_state,
-				  new_crtc_state);
+		intel_enable_crtc(state, crtc);
+		intel_update_crtc(state, crtc);
 	}
 }
 
-static void intel_crtc_enable_trans_port_sync(struct intel_crtc *crtc,
-					      struct intel_atomic_state *state,
-					      struct intel_crtc_state *new_crtc_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-
-	intel_crtc_update_active_timings(new_crtc_state);
-	dev_priv->display.crtc_enable(state, crtc);
-	intel_crtc_enable_pipe_crc(crtc);
-}
-
-static void intel_set_dp_tp_ctl_normal(struct intel_crtc *crtc,
-				       struct intel_atomic_state *state)
-{
-	struct drm_connector *uninitialized_var(conn);
-	struct drm_connector_state *conn_state;
-	struct intel_dp *intel_dp;
-	int i;
-
-	for_each_new_connector_in_state(&state->base, conn, conn_state, i) {
-		if (conn_state->crtc == &crtc->base)
-			break;
-	}
-	intel_dp = intel_attached_dp(to_intel_connector(conn));
-	intel_dp_stop_link_train(intel_dp);
-}
-
-/*
- * TODO: This is only called from port sync and it is identical to what will be
- * executed again in intel_update_crtc() over port sync pipes
- */
-static void intel_post_crtc_enable_updates(struct intel_crtc *crtc,
-					   struct intel_atomic_state *state)
-{
-	struct intel_crtc_state *new_crtc_state =
-		intel_atomic_get_new_crtc_state(state, crtc);
-	struct intel_crtc_state *old_crtc_state =
-		intel_atomic_get_old_crtc_state(state, crtc);
-	bool modeset = needs_modeset(new_crtc_state);
-
-	if (new_crtc_state->update_pipe && !new_crtc_state->enable_fbc)
-		intel_fbc_disable(crtc);
-	else
-		intel_fbc_enable(state, crtc);
-
-	/* Perform vblank evasion around commit operation */
-	intel_pipe_update_start(new_crtc_state);
-	commit_pipe_config(state, old_crtc_state, new_crtc_state);
-	skl_update_planes_on_crtc(state, crtc);
-	intel_pipe_update_end(new_crtc_state);
-
-	/*
-	 * We usually enable FIFO underrun interrupts as part of the
-	 * CRTC enable sequence during modesets.  But when we inherit a
-	 * valid pipe configuration from the BIOS we need to take care
-	 * of enabling them on the CRTC's first fastset.
-	 */
-	if (new_crtc_state->update_pipe && !modeset &&
-	    old_crtc_state->hw.mode.private_flags & I915_MODE_FLAG_INHERITED)
-		intel_crtc_arm_fifo_underrun(crtc, new_crtc_state);
-}
-
-static void intel_update_trans_port_sync_crtcs(struct intel_crtc *crtc,
-					       struct intel_atomic_state *state,
-					       struct intel_crtc_state *old_crtc_state,
-					       struct intel_crtc_state *new_crtc_state)
-{
-	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
-	struct intel_crtc *slave_crtc = intel_get_slave_crtc(new_crtc_state);
-	struct intel_crtc_state *new_slave_crtc_state =
-		intel_atomic_get_new_crtc_state(state, slave_crtc);
-	struct intel_crtc_state *old_slave_crtc_state =
-		intel_atomic_get_old_crtc_state(state, slave_crtc);
-
-	drm_WARN_ON(&i915->drm, !slave_crtc || !new_slave_crtc_state ||
-		    !old_slave_crtc_state);
-
-	drm_dbg_kms(&i915->drm,
-		    "Updating Transcoder Port Sync Master CRTC = %d %s and Slave CRTC %d %s\n",
-		    crtc->base.base.id, crtc->base.name,
-		    slave_crtc->base.base.id, slave_crtc->base.name);
-
-	/* Enable seq for slave with with DP_TP_CTL left Idle until the
-	 * master is ready
-	 */
-	intel_crtc_enable_trans_port_sync(slave_crtc,
-					  state,
-					  new_slave_crtc_state);
-
-	/* Enable seq for master with with DP_TP_CTL left Idle */
-	intel_crtc_enable_trans_port_sync(crtc,
-					  state,
-					  new_crtc_state);
-
-	/* Set Slave's DP_TP_CTL to Normal */
-	intel_set_dp_tp_ctl_normal(slave_crtc,
-				   state);
-
-	/* Set Master's DP_TP_CTL To Normal */
-	usleep_range(200, 400);
-	intel_set_dp_tp_ctl_normal(crtc,
-				   state);
-
-	/* Now do the post crtc enable for all master and slaves */
-	intel_post_crtc_enable_updates(slave_crtc,
-				       state);
-	intel_post_crtc_enable_updates(crtc,
-				       state);
-}
-
 static void icl_dbuf_slice_pre_update(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
@@ -15365,8 +15276,7 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 			entries[pipe] = new_crtc_state->wm.skl.ddb;
 			update_pipes &= ~BIT(pipe);
 
-			intel_update_crtc(crtc, state, old_crtc_state,
-					  new_crtc_state);
+			intel_update_crtc(state, crtc);
 
 			/*
 			 * If this is an already active pipe, it's DDB changed,
@@ -15381,67 +15291,62 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 		}
 	}
 
+	update_pipes = modeset_pipes;
+
 	/*
 	 * Enable all pipes that needs a modeset and do not depends on other
 	 * pipes
 	 */
-	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
-					    new_crtc_state, i) {
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
 		enum pipe pipe = crtc->pipe;
 
 		if ((modeset_pipes & BIT(pipe)) == 0)
 			continue;
 
 		if (intel_dp_mst_is_slave_trans(new_crtc_state) ||
-		    is_trans_port_sync_slave(new_crtc_state))
+		    is_trans_port_sync_master(new_crtc_state))
 			continue;
 
-		drm_WARN_ON(&dev_priv->drm, skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb,
-									entries, I915_MAX_PIPES, pipe));
-
-		entries[pipe] = new_crtc_state->wm.skl.ddb;
 		modeset_pipes &= ~BIT(pipe);
 
-		if (is_trans_port_sync_mode(new_crtc_state)) {
-			struct intel_crtc *slave_crtc;
+		intel_enable_crtc(state, crtc);
+	}
 
-			intel_update_trans_port_sync_crtcs(crtc, state,
-							   old_crtc_state,
-							   new_crtc_state);
+	/*
+	 * Then we enable all remaining pipes that depend on other
+	 * pipes: MST slaves and port sync masters.
+	 */
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
+		enum pipe pipe = crtc->pipe;
 
-			slave_crtc = intel_get_slave_crtc(new_crtc_state);
-			/* TODO: update entries[] of slave */
-			modeset_pipes &= ~BIT(slave_crtc->pipe);
+		if ((modeset_pipes & BIT(pipe)) == 0)
+			continue;
 
-		} else {
-			intel_update_crtc(crtc, state, old_crtc_state,
-					  new_crtc_state);
-		}
+		modeset_pipes &= ~BIT(pipe);
+
+		intel_enable_crtc(state, crtc);
 	}
 
 	/*
-	 * Finally enable all pipes that needs a modeset and depends on
-	 * other pipes, right now it is only MST slaves as both port sync slave
-	 * and master are enabled together
+	 * Finally we do the plane updates/etc. for all pipes that got enabled.
 	 */
-	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
-					    new_crtc_state, i) {
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
 		enum pipe pipe = crtc->pipe;
 
-		if ((modeset_pipes & BIT(pipe)) == 0)
+		if ((update_pipes & BIT(pipe)) == 0)
 			continue;
 
 		drm_WARN_ON(&dev_priv->drm, skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb,
 									entries, I915_MAX_PIPES, pipe));
 
 		entries[pipe] = new_crtc_state->wm.skl.ddb;
-		modeset_pipes &= ~BIT(pipe);
+		update_pipes &= ~BIT(pipe);
 
-		intel_update_crtc(crtc, state, old_crtc_state, new_crtc_state);
+		intel_update_crtc(state, crtc);
 	}
 
 	drm_WARN_ON(&dev_priv->drm, modeset_pipes);
-
+	drm_WARN_ON(&dev_priv->drm, update_pipes);
 }
 
 static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
@@ -15540,16 +15445,11 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 
 		intel_set_cdclk_pre_plane_update(state);
 
-		/*
-		 * SKL workaround: bspec recommends we disable the SAGV when we
-		 * have more then one pipe enabled
-		 */
-		if (!intel_can_enable_sagv(state))
-			intel_disable_sagv(dev_priv);
-
 		intel_modeset_verify_disabled(dev_priv, state);
 	}
 
+	intel_sagv_pre_plane_update(state);
+
 	/* Complete the events for pipes that have now been disabled */
 	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
 		bool modeset = needs_modeset(new_crtc_state);
@@ -15645,8 +15545,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 	if (state->modeset)
 		intel_verify_planes(state);
 
-	if (state->modeset && intel_can_enable_sagv(state))
-		intel_enable_sagv(dev_priv);
+	intel_sagv_post_plane_update(state);
 
 	drm_atomic_helper_commit_hw_done(&state->base);
 
@@ -15982,7 +15881,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 	if (new_plane_state->uapi.fence) { /* explicit fencing */
 		ret = i915_sw_fence_await_dma_fence(&state->commit_ready,
 						    new_plane_state->uapi.fence,
-						    I915_FENCE_TIMEOUT,
+						    i915_fence_timeout(dev_priv),
 						    GFP_KERNEL);
 		if (ret < 0)
 			return ret;
@@ -16009,7 +15908,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 
 		ret = i915_sw_fence_await_reservation(&state->commit_ready,
 						      obj->base.resv, NULL,
-						      false, I915_FENCE_TIMEOUT,
+						      false,
+						      i915_fence_timeout(dev_priv),
 						      GFP_KERNEL);
 		if (ret < 0)
 			goto unpin_fb;
@@ -18261,11 +18161,12 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 			best_encoder = connector->base.state->best_encoder;
 			connector->base.state->best_encoder = &encoder->base;
 
+			/* FIXME NULL atomic state passed! */
 			if (encoder->disable)
-				encoder->disable(encoder, crtc_state,
+				encoder->disable(NULL, encoder, crtc_state,
 						 connector->base.state);
 			if (encoder->post_disable)
-				encoder->post_disable(encoder, crtc_state,
+				encoder->post_disable(NULL, encoder, crtc_state,
 						      connector->base.state);
 
 			connector->base.state->best_encoder = best_encoder;
@@ -18802,15 +18703,6 @@ void intel_modeset_driver_remove_noirq(struct drm_i915_private *i915)
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 
-static bool
-has_transcoder(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder)
-{
-	if (cpu_transcoder == TRANSCODER_EDP)
-		return HAS_TRANSCODER_EDP(dev_priv);
-	else
-		return INTEL_INFO(dev_priv)->pipe_mask & BIT(cpu_transcoder);
-}
-
 struct intel_display_error_state {
 
 	u32 power_well_driver;
@@ -18919,7 +18811,7 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv)
 	for (i = 0; i < ARRAY_SIZE(error->transcoder); i++) {
 		enum transcoder cpu_transcoder = transcoders[i];
 
-		if (!has_transcoder(dev_priv, cpu_transcoder))
+		if (!HAS_TRANSCODER(dev_priv, cpu_transcoder))
 			continue;
 
 		error->transcoder[i].available = true;
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index adb1225a3480..efb4da205ea2 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -320,9 +320,13 @@ enum phy_fia {
 	for_each_pipe(__dev_priv, __p) \
 		for_each_if((__mask) & BIT(__p))
 
-#define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \
+#define for_each_cpu_transcoder(__dev_priv, __t) \
 	for ((__t) = 0; (__t) < I915_MAX_TRANSCODERS; (__t)++)	\
-		for_each_if ((__mask) & (1 << (__t)))
+		for_each_if (INTEL_INFO(__dev_priv)->cpu_transcoder_mask & BIT(__t))
+
+#define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \
+	for_each_cpu_transcoder(__dev_priv, __t) \
+		for_each_if ((__mask) & BIT(__t))
 
 #define for_each_universal_plane(__dev_priv, __pipe, __p)		\
 	for ((__p) = 0;							\
@@ -579,13 +583,14 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state);
 enum intel_display_power_domain intel_port_to_power_domain(enum port port);
 enum intel_display_power_domain
 intel_aux_power_domain(struct intel_digital_port *dig_port);
+enum intel_display_power_domain
+intel_legacy_aux_to_power_domain(enum aux_ch aux_ch);
 void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 				 struct intel_crtc_state *pipe_config);
 void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
 				  struct intel_crtc_state *crtc_state);
 
 u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center);
-int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
 void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state);
 void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state);
 u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state,
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index 1e6eb7f2f72d..70525623bcdf 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -9,6 +9,7 @@
 #include "i915_debugfs.h"
 #include "intel_csr.h"
 #include "intel_display_debugfs.h"
+#include "intel_display_power.h"
 #include "intel_display_types.h"
 #include "intel_dp.h"
 #include "intel_fbc.h"
@@ -631,15 +632,9 @@ static void intel_dp_info(struct seq_file *m,
 }
 
 static void intel_dp_mst_info(struct seq_file *m,
-			  struct intel_connector *intel_connector)
+			      struct intel_connector *intel_connector)
 {
-	struct intel_encoder *intel_encoder = intel_attached_encoder(intel_connector);
-	struct intel_dp_mst_encoder *intel_mst =
-		enc_to_mst(intel_encoder);
-	struct intel_digital_port *intel_dig_port = intel_mst->primary;
-	struct intel_dp *intel_dp = &intel_dig_port->dp;
-	bool has_audio = drm_dp_mst_port_has_audio(&intel_dp->mst_mgr,
-					intel_connector->port);
+	bool has_audio = intel_connector->port->has_audio;
 
 	seq_printf(m, "\taudio support: %s\n", yesno(has_audio));
 }
@@ -1149,6 +1144,51 @@ static int i915_drrs_status(struct seq_file *m, void *unused)
 	return 0;
 }
 
+#define LPSP_STATUS(COND) (COND ? seq_puts(m, "LPSP: enabled\n") : \
+				seq_puts(m, "LPSP: disabled\n"))
+
+static bool
+intel_lpsp_power_well_enabled(struct drm_i915_private *i915,
+			      enum i915_power_well_id power_well_id)
+{
+	intel_wakeref_t wakeref;
+	bool is_enabled;
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	is_enabled = intel_display_power_well_is_enabled(i915,
+							 power_well_id);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+
+	return is_enabled;
+}
+
+static int i915_lpsp_status(struct seq_file *m, void *unused)
+{
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+
+	switch (INTEL_GEN(i915)) {
+	case 12:
+	case 11:
+		LPSP_STATUS(!intel_lpsp_power_well_enabled(i915, ICL_DISP_PW_3));
+		break;
+	case 10:
+	case 9:
+		LPSP_STATUS(!intel_lpsp_power_well_enabled(i915, SKL_DISP_PW_2));
+		break;
+	default:
+		/*
+		 * Apart from HASWELL/BROADWELL other legacy platform doesn't
+		 * support lpsp.
+		 */
+		if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+			LPSP_STATUS(!intel_lpsp_power_well_enabled(i915, HSW_DISP_PW_GLOBAL));
+		else
+			seq_puts(m, "LPSP: not supported\n");
+	}
+
+	return 0;
+}
+
 static int i915_dp_mst_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -1326,6 +1366,16 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data)
 					   intel_dp->compliance.test_data.vdisplay);
 				seq_printf(m, "bpc: %u\n",
 					   intel_dp->compliance.test_data.bpc);
+			} else if (intel_dp->compliance.test_type ==
+				   DP_TEST_LINK_PHY_TEST_PATTERN) {
+				seq_printf(m, "pattern: %d\n",
+					   intel_dp->compliance.test_data.phytest.phy_pattern);
+				seq_printf(m, "Number of lanes: %d\n",
+					   intel_dp->compliance.test_data.phytest.num_lanes);
+				seq_printf(m, "Link Rate: %d\n",
+					   intel_dp->compliance.test_data.phytest.link_rate);
+				seq_printf(m, "level: %02x\n",
+					   intel_dp->train_set[0]);
 			}
 		} else
 			seq_puts(m, "0");
@@ -1358,7 +1408,7 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data)
 
 		if (encoder && connector->status == connector_status_connected) {
 			intel_dp = enc_to_intel_dp(encoder);
-			seq_printf(m, "%02lx", intel_dp->compliance.test_type);
+			seq_printf(m, "%02lx\n", intel_dp->compliance.test_type);
 		} else
 			seq_puts(m, "0");
 	}
@@ -1906,6 +1956,7 @@ static const struct drm_info_list intel_display_debugfs_list[] = {
 	{"i915_dp_mst_info", i915_dp_mst_info, 0},
 	{"i915_ddb_info", i915_ddb_info, 0},
 	{"i915_drrs_status", i915_drrs_status, 0},
+	{"i915_lpsp_status", i915_lpsp_status, 0},
 };
 
 static const struct {
@@ -1927,7 +1978,7 @@ static const struct {
 	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops},
 };
 
-int intel_display_debugfs_register(struct drm_i915_private *i915)
+void intel_display_debugfs_register(struct drm_i915_private *i915)
 {
 	struct drm_minor *minor = i915->drm.primary;
 	int i;
@@ -1940,9 +1991,9 @@ int intel_display_debugfs_register(struct drm_i915_private *i915)
 				    intel_display_debugfs_files[i].fops);
 	}
 
-	return drm_debugfs_create_files(intel_display_debugfs_list,
-					ARRAY_SIZE(intel_display_debugfs_list),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(intel_display_debugfs_list,
+				 ARRAY_SIZE(intel_display_debugfs_list),
+				 minor->debugfs_root, minor);
 }
 
 static int i915_panel_show(struct seq_file *m, void *data)
@@ -1987,6 +2038,48 @@ static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data)
 }
 DEFINE_SHOW_ATTRIBUTE(i915_hdcp_sink_capability);
 
+#define LPSP_CAPABLE(COND) (COND ? seq_puts(m, "LPSP: capable\n") : \
+				seq_puts(m, "LPSP: incapable\n"))
+
+static int i915_lpsp_capability_show(struct seq_file *m, void *data)
+{
+	struct drm_connector *connector = m->private;
+	struct intel_encoder *encoder =
+			intel_attached_encoder(to_intel_connector(connector));
+	struct drm_i915_private *i915 = to_i915(connector->dev);
+
+	if (connector->status != connector_status_connected)
+		return -ENODEV;
+
+	switch (INTEL_GEN(i915)) {
+	case 12:
+		/*
+		 * Actually TGL can drive LPSP on port till DDI_C
+		 * but there is no physical connected DDI_C on TGL sku's,
+		 * even driver is not initilizing DDI_C port for gen12.
+		 */
+		LPSP_CAPABLE(encoder->port <= PORT_B);
+		break;
+	case 11:
+		LPSP_CAPABLE(connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
+			     connector->connector_type == DRM_MODE_CONNECTOR_eDP);
+		break;
+	case 10:
+	case 9:
+		LPSP_CAPABLE(encoder->port == PORT_A &&
+			     (connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
+			     connector->connector_type == DRM_MODE_CONNECTOR_eDP  ||
+			     connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort));
+		break;
+	default:
+		if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+			LPSP_CAPABLE(connector->connector_type == DRM_MODE_CONNECTOR_eDP);
+	}
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(i915_lpsp_capability);
+
 static int i915_dsc_fec_support_show(struct seq_file *m, void *data)
 {
 	struct drm_connector *connector = m->private;
@@ -2130,5 +2223,16 @@ int intel_connector_debugfs_add(struct drm_connector *connector)
 		debugfs_create_file("i915_dsc_fec_support", S_IRUGO, root,
 				    connector, &i915_dsc_fec_support_fops);
 
+	/* Legacy panels doesn't lpsp on any platform */
+	if ((INTEL_GEN(dev_priv) >= 9 || IS_HASWELL(dev_priv) ||
+	     IS_BROADWELL(dev_priv)) &&
+	     (connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
+	     connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
+	     connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	     connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+	     connector->connector_type == DRM_MODE_CONNECTOR_HDMIB))
+		debugfs_create_file("i915_lpsp_capability", 0444, root,
+				    connector, &i915_lpsp_capability_fops);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.h b/drivers/gpu/drm/i915/display/intel_display_debugfs.h
index a3bea1ce04c2..c922c1745bfe 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.h
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.h
@@ -10,10 +10,10 @@ struct drm_connector;
 struct drm_i915_private;
 
 #ifdef CONFIG_DEBUG_FS
-int intel_display_debugfs_register(struct drm_i915_private *i915);
+void intel_display_debugfs_register(struct drm_i915_private *i915);
 int intel_connector_debugfs_add(struct drm_connector *connector);
 #else
-static inline int intel_display_debugfs_register(struct drm_i915_private *i915) { return 0; }
+static inline void intel_display_debugfs_register(struct drm_i915_private *i915) {}
 static inline int intel_connector_debugfs_add(struct drm_connector *connector) { return 0; }
 #endif
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 84ecf8e58523..49998906cc61 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -151,6 +151,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain)
 		return "GT_IRQ";
 	case POWER_DOMAIN_DPLL_DC_OFF:
 		return "DPLL_DC_OFF";
+	case POWER_DOMAIN_TC_COLD_OFF:
+		return "TC_COLD_OFF";
 	default:
 		MISSING_CASE(domain);
 		return "?";
@@ -282,8 +284,51 @@ static void hsw_power_well_pre_disable(struct drm_i915_private *dev_priv,
 		gen8_irq_power_well_pre_disable(dev_priv, irq_pipe_mask);
 }
 
+#define ICL_AUX_PW_TO_CH(pw_idx)	\
+	((pw_idx) - ICL_PW_CTL_IDX_AUX_A + AUX_CH_A)
+
+#define ICL_TBT_AUX_PW_TO_CH(pw_idx)	\
+	((pw_idx) - ICL_PW_CTL_IDX_AUX_TBT1 + AUX_CH_C)
+
+static enum aux_ch icl_tc_phy_aux_ch(struct drm_i915_private *dev_priv,
+				     struct i915_power_well *power_well)
+{
+	int pw_idx = power_well->desc->hsw.idx;
+
+	return power_well->desc->hsw.is_tc_tbt ? ICL_TBT_AUX_PW_TO_CH(pw_idx) :
+						 ICL_AUX_PW_TO_CH(pw_idx);
+}
+
+static struct intel_digital_port *
+aux_ch_to_digital_port(struct drm_i915_private *dev_priv,
+		       enum aux_ch aux_ch)
+{
+	struct intel_digital_port *dig_port = NULL;
+	struct intel_encoder *encoder;
+
+	for_each_intel_encoder(&dev_priv->drm, encoder) {
+		/* We'll check the MST primary port */
+		if (encoder->type == INTEL_OUTPUT_DP_MST)
+			continue;
+
+		dig_port = enc_to_dig_port(encoder);
+		if (!dig_port)
+			continue;
+
+		if (dig_port->aux_ch != aux_ch) {
+			dig_port = NULL;
+			continue;
+		}
+
+		break;
+	}
+
+	return dig_port;
+}
+
 static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv,
-					   struct i915_power_well *power_well)
+					   struct i915_power_well *power_well,
+					   bool timeout_expected)
 {
 	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
 	int pw_idx = power_well->desc->hsw.idx;
@@ -294,8 +339,8 @@ static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv,
 		drm_dbg_kms(&dev_priv->drm, "%s power well enable timeout\n",
 			    power_well->desc->name);
 
-		/* An AUX timeout is expected if the TBT DP tunnel is down. */
-		drm_WARN_ON(&dev_priv->drm, !power_well->desc->hsw.is_tc_tbt);
+		drm_WARN_ON(&dev_priv->drm, !timeout_expected);
+
 	}
 }
 
@@ -358,11 +403,11 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv,
 {
 	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
 	int pw_idx = power_well->desc->hsw.idx;
-	bool wait_fuses = power_well->desc->hsw.has_fuses;
-	enum skl_power_gate uninitialized_var(pg);
 	u32 val;
 
-	if (wait_fuses) {
+	if (power_well->desc->hsw.has_fuses) {
+		enum skl_power_gate pg;
+
 		pg = INTEL_GEN(dev_priv) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) :
 						 SKL_PW_CTL_IDX_TO_PG(pw_idx);
 		/*
@@ -379,19 +424,27 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv,
 	val = intel_de_read(dev_priv, regs->driver);
 	intel_de_write(dev_priv, regs->driver,
 		       val | HSW_PWR_WELL_CTL_REQ(pw_idx));
-	hsw_wait_for_power_well_enable(dev_priv, power_well);
+
+	hsw_wait_for_power_well_enable(dev_priv, power_well, false);
 
 	/* Display WA #1178: cnl */
 	if (IS_CANNONLAKE(dev_priv) &&
 	    pw_idx >= GLK_PW_CTL_IDX_AUX_B &&
 	    pw_idx <= CNL_PW_CTL_IDX_AUX_F) {
+		u32 val;
+
 		val = intel_de_read(dev_priv, CNL_AUX_ANAOVRD1(pw_idx));
 		val |= CNL_AUX_ANAOVRD1_ENABLE | CNL_AUX_ANAOVRD1_LDO_BYPASS;
 		intel_de_write(dev_priv, CNL_AUX_ANAOVRD1(pw_idx), val);
 	}
 
-	if (wait_fuses)
+	if (power_well->desc->hsw.has_fuses) {
+		enum skl_power_gate pg;
+
+		pg = INTEL_GEN(dev_priv) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) :
+						 SKL_PW_CTL_IDX_TO_PG(pw_idx);
 		gen9_wait_for_power_well_fuses(dev_priv, pg);
+	}
 
 	hsw_power_well_post_enable(dev_priv,
 				   power_well->desc->hsw.irq_pipe_mask,
@@ -437,7 +490,7 @@ icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
 			       val | ICL_LANE_ENABLE_AUX);
 	}
 
-	hsw_wait_for_power_well_enable(dev_priv, power_well);
+	hsw_wait_for_power_well_enable(dev_priv, power_well, false);
 
 	/* Display WA #1178: icl */
 	if (pw_idx >= ICL_PW_CTL_IDX_AUX_A && pw_idx <= ICL_PW_CTL_IDX_AUX_B &&
@@ -470,21 +523,6 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv,
 	hsw_wait_for_power_well_disable(dev_priv, power_well);
 }
 
-#define ICL_AUX_PW_TO_CH(pw_idx)	\
-	((pw_idx) - ICL_PW_CTL_IDX_AUX_A + AUX_CH_A)
-
-#define ICL_TBT_AUX_PW_TO_CH(pw_idx)	\
-	((pw_idx) - ICL_PW_CTL_IDX_AUX_TBT1 + AUX_CH_C)
-
-static enum aux_ch icl_tc_phy_aux_ch(struct drm_i915_private *dev_priv,
-				     struct i915_power_well *power_well)
-{
-	int pw_idx = power_well->desc->hsw.idx;
-
-	return power_well->desc->hsw.is_tc_tbt ? ICL_TBT_AUX_PW_TO_CH(pw_idx) :
-						 ICL_AUX_PW_TO_CH(pw_idx);
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 
 static u64 async_put_domains_mask(struct i915_power_domains *power_domains);
@@ -501,51 +539,28 @@ static int power_well_async_ref_count(struct drm_i915_private *dev_priv,
 }
 
 static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv,
-					struct i915_power_well *power_well)
+					struct i915_power_well *power_well,
+					struct intel_digital_port *dig_port)
 {
-	enum aux_ch aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well);
-	struct intel_digital_port *dig_port = NULL;
-	struct intel_encoder *encoder;
-
 	/* Bypass the check if all references are released asynchronously */
 	if (power_well_async_ref_count(dev_priv, power_well) ==
 	    power_well->count)
 		return;
 
-	aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well);
-
-	for_each_intel_encoder(&dev_priv->drm, encoder) {
-		enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
-
-		if (!intel_phy_is_tc(dev_priv, phy))
-			continue;
-
-		/* We'll check the MST primary port */
-		if (encoder->type == INTEL_OUTPUT_DP_MST)
-			continue;
-
-		dig_port = enc_to_dig_port(encoder);
-		if (drm_WARN_ON(&dev_priv->drm, !dig_port))
-			continue;
-
-		if (dig_port->aux_ch != aux_ch) {
-			dig_port = NULL;
-			continue;
-		}
-
-		break;
-	}
-
 	if (drm_WARN_ON(&dev_priv->drm, !dig_port))
 		return;
 
+	if (INTEL_GEN(dev_priv) == 11 && dig_port->tc_legacy_port)
+		return;
+
 	drm_WARN_ON(&dev_priv->drm, !intel_tc_port_ref_held(dig_port));
 }
 
 #else
 
 static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv,
-					struct i915_power_well *power_well)
+					struct i915_power_well *power_well,
+					struct intel_digital_port *dig_port)
 {
 }
 
@@ -553,24 +568,65 @@ static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv,
 
 #define TGL_AUX_PW_TO_TC_PORT(pw_idx)	((pw_idx) - TGL_PW_CTL_IDX_AUX_TC1)
 
+static void icl_tc_cold_exit(struct drm_i915_private *i915)
+{
+	int ret, tries = 0;
+
+	while (1) {
+		ret = sandybridge_pcode_write_timeout(i915,
+						      ICL_PCODE_EXIT_TCCOLD,
+						      0, 250, 1);
+		if (ret != -EAGAIN || ++tries == 3)
+			break;
+		msleep(1);
+	}
+
+	/* Spec states that TC cold exit can take up to 1ms to complete */
+	if (!ret)
+		msleep(1);
+
+	/* TODO: turn failure into a error as soon i915 CI updates ICL IFWI */
+	drm_dbg_kms(&i915->drm, "TC cold block %s\n", ret ? "failed" :
+		    "succeeded");
+}
+
 static void
 icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
 				 struct i915_power_well *power_well)
 {
 	enum aux_ch aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well);
+	struct intel_digital_port *dig_port = aux_ch_to_digital_port(dev_priv, aux_ch);
+	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
+	bool is_tbt = power_well->desc->hsw.is_tc_tbt;
+	bool timeout_expected;
 	u32 val;
 
-	icl_tc_port_assert_ref_held(dev_priv, power_well);
+	icl_tc_port_assert_ref_held(dev_priv, power_well, dig_port);
 
 	val = intel_de_read(dev_priv, DP_AUX_CH_CTL(aux_ch));
 	val &= ~DP_AUX_CH_CTL_TBT_IO;
-	if (power_well->desc->hsw.is_tc_tbt)
+	if (is_tbt)
 		val |= DP_AUX_CH_CTL_TBT_IO;
 	intel_de_write(dev_priv, DP_AUX_CH_CTL(aux_ch), val);
 
-	hsw_power_well_enable(dev_priv, power_well);
+	val = intel_de_read(dev_priv, regs->driver);
+	intel_de_write(dev_priv, regs->driver,
+		       val | HSW_PWR_WELL_CTL_REQ(power_well->desc->hsw.idx));
+
+	/*
+	 * An AUX timeout is expected if the TBT DP tunnel is down,
+	 * or need to enable AUX on a legacy TypeC port as part of the TC-cold
+	 * exit sequence.
+	 */
+	timeout_expected = is_tbt;
+	if (INTEL_GEN(dev_priv) == 11 && dig_port->tc_legacy_port) {
+		icl_tc_cold_exit(dev_priv);
+		timeout_expected = true;
+	}
+
+	hsw_wait_for_power_well_enable(dev_priv, power_well, timeout_expected);
 
-	if (INTEL_GEN(dev_priv) >= 12 && !power_well->desc->hsw.is_tc_tbt) {
+	if (INTEL_GEN(dev_priv) >= 12 && !is_tbt) {
 		enum tc_port tc_port;
 
 		tc_port = TGL_AUX_PW_TO_TC_PORT(power_well->desc->hsw.idx);
@@ -588,11 +644,48 @@ static void
 icl_tc_phy_aux_power_well_disable(struct drm_i915_private *dev_priv,
 				  struct i915_power_well *power_well)
 {
-	icl_tc_port_assert_ref_held(dev_priv, power_well);
+	enum aux_ch aux_ch = icl_tc_phy_aux_ch(dev_priv, power_well);
+	struct intel_digital_port *dig_port = aux_ch_to_digital_port(dev_priv, aux_ch);
+
+	icl_tc_port_assert_ref_held(dev_priv, power_well, dig_port);
 
 	hsw_power_well_disable(dev_priv, power_well);
 }
 
+static void
+icl_aux_power_well_enable(struct drm_i915_private *dev_priv,
+			  struct i915_power_well *power_well)
+{
+	int pw_idx = power_well->desc->hsw.idx;
+	enum phy phy = ICL_AUX_PW_TO_PHY(pw_idx);  /* non-TBT only */
+	bool is_tbt = power_well->desc->hsw.is_tc_tbt;
+
+	if (is_tbt || intel_phy_is_tc(dev_priv, phy))
+		return icl_tc_phy_aux_power_well_enable(dev_priv, power_well);
+	else if (IS_ICELAKE(dev_priv))
+		return icl_combo_phy_aux_power_well_enable(dev_priv,
+							   power_well);
+	else
+		return hsw_power_well_enable(dev_priv, power_well);
+}
+
+static void
+icl_aux_power_well_disable(struct drm_i915_private *dev_priv,
+			   struct i915_power_well *power_well)
+{
+	int pw_idx = power_well->desc->hsw.idx;
+	enum phy phy = ICL_AUX_PW_TO_PHY(pw_idx);  /* non-TBT only */
+	bool is_tbt = power_well->desc->hsw.is_tc_tbt;
+
+	if (is_tbt || intel_phy_is_tc(dev_priv, phy))
+		return icl_tc_phy_aux_power_well_disable(dev_priv, power_well);
+	else if (IS_ICELAKE(dev_priv))
+		return icl_combo_phy_aux_power_well_disable(dev_priv,
+							    power_well);
+	else
+		return hsw_power_well_disable(dev_priv, power_well);
+}
+
 /*
  * We should only use the power well if we explicitly asked the hardware to
  * enable it, so check if it's enabled and also check if we've requested it to
@@ -943,7 +1036,7 @@ static void assert_can_enable_dc5(struct drm_i915_private *dev_priv)
 
 	/* Power wells at this level and above must be disabled for DC5 entry */
 	if (INTEL_GEN(dev_priv) >= 12)
-		high_pg = TGL_DISP_PW_3;
+		high_pg = ICL_DISP_PW_3;
 	else
 		high_pg = SKL_DISP_PW_2;
 
@@ -1873,20 +1966,27 @@ __async_put_domains_state_ok(struct i915_power_domains *power_domains)
 static void print_power_domains(struct i915_power_domains *power_domains,
 				const char *prefix, u64 mask)
 {
+	struct drm_i915_private *i915 = container_of(power_domains,
+						     struct drm_i915_private,
+						     power_domains);
 	enum intel_display_power_domain domain;
 
-	DRM_DEBUG_DRIVER("%s (%lu):\n", prefix, hweight64(mask));
+	drm_dbg(&i915->drm, "%s (%lu):\n", prefix, hweight64(mask));
 	for_each_power_domain(domain, mask)
-		DRM_DEBUG_DRIVER("%s use_count %d\n",
-				 intel_display_power_domain_str(domain),
-				 power_domains->domain_use_count[domain]);
+		drm_dbg(&i915->drm, "%s use_count %d\n",
+			intel_display_power_domain_str(domain),
+			power_domains->domain_use_count[domain]);
 }
 
 static void
 print_async_put_domains_state(struct i915_power_domains *power_domains)
 {
-	DRM_DEBUG_DRIVER("async_put_wakeref %u\n",
-			 power_domains->async_put_wakeref);
+	struct drm_i915_private *i915 = container_of(power_domains,
+						     struct drm_i915_private,
+						     power_domains);
+
+	drm_dbg(&i915->drm, "async_put_wakeref %u\n",
+		power_domains->async_put_wakeref);
 
 	print_power_domains(power_domains, "async_put_domains[0]",
 			    power_domains->async_put_domains[0]);
@@ -2798,6 +2898,21 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 #define TGL_AUX_I_TBT6_IO_POWER_DOMAINS (	\
 	BIT_ULL(POWER_DOMAIN_AUX_I_TBT))
 
+#define TGL_TC_COLD_OFF_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_D)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_E)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_F)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_G)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_H)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_I)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_D_TBT)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_E_TBT)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_F_TBT)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_G_TBT)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_H_TBT)	|	\
+	BIT_ULL(POWER_DOMAIN_AUX_I_TBT)	|	\
+	BIT_ULL(POWER_DOMAIN_TC_COLD_OFF))
+
 static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
 	.sync_hw = i9xx_power_well_sync_hw_noop,
 	.enable = i9xx_always_on_power_well_noop,
@@ -3496,17 +3611,10 @@ static const struct i915_power_well_desc cnl_power_wells[] = {
 	},
 };
 
-static const struct i915_power_well_ops icl_combo_phy_aux_power_well_ops = {
-	.sync_hw = hsw_power_well_sync_hw,
-	.enable = icl_combo_phy_aux_power_well_enable,
-	.disable = icl_combo_phy_aux_power_well_disable,
-	.is_enabled = hsw_power_well_enabled,
-};
-
-static const struct i915_power_well_ops icl_tc_phy_aux_power_well_ops = {
+static const struct i915_power_well_ops icl_aux_power_well_ops = {
 	.sync_hw = hsw_power_well_sync_hw,
-	.enable = icl_tc_phy_aux_power_well_enable,
-	.disable = icl_tc_phy_aux_power_well_disable,
+	.enable = icl_aux_power_well_enable,
+	.disable = icl_aux_power_well_disable,
 	.is_enabled = hsw_power_well_enabled,
 };
 
@@ -3564,7 +3672,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 		.name = "power well 3",
 		.domains = ICL_PW_3_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
+		.id = ICL_DISP_PW_3,
 		{
 			.hsw.regs = &hsw_power_well_regs,
 			.hsw.idx = ICL_PW_CTL_IDX_PW_3,
@@ -3636,7 +3744,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	{
 		.name = "AUX A",
 		.domains = ICL_AUX_A_IO_POWER_DOMAINS,
-		.ops = &icl_combo_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3646,7 +3754,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	{
 		.name = "AUX B",
 		.domains = ICL_AUX_B_IO_POWER_DOMAINS,
-		.ops = &icl_combo_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3656,7 +3764,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	{
 		.name = "AUX C TC1",
 		.domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3667,7 +3775,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	{
 		.name = "AUX D TC2",
 		.domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3678,7 +3786,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	{
 		.name = "AUX E TC3",
 		.domains = ICL_AUX_E_TC3_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3689,7 +3797,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	{
 		.name = "AUX F TC4",
 		.domains = ICL_AUX_F_TC4_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3700,7 +3808,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	{
 		.name = "AUX C TBT1",
 		.domains = ICL_AUX_C_TBT1_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3711,7 +3819,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	{
 		.name = "AUX D TBT2",
 		.domains = ICL_AUX_D_TBT2_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3722,7 +3830,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	{
 		.name = "AUX E TBT3",
 		.domains = ICL_AUX_E_TBT3_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3733,7 +3841,7 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	{
 		.name = "AUX F TBT4",
 		.domains = ICL_AUX_F_TBT4_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -3755,149 +3863,89 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 	},
 };
 
-static const struct i915_power_well_desc ehl_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 1",
-		/* Handled by the DMC firmware */
-		.always_on = true,
-		.domains = 0,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_1,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_PW_1,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "DC off",
-		.domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
-		.ops = &gen9_dc_off_power_well_ops,
-		.id = SKL_DISP_DC_OFF,
-	},
-	{
-		.name = "power well 2",
-		.domains = ICL_PW_2_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_2,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_PW_2,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "power well 3",
-		.domains = ICL_PW_3_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_PW_3,
-			.hsw.irq_pipe_mask = BIT(PIPE_B),
-			.hsw.has_vga = true,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "DDI A IO",
-		.domains = ICL_DDI_IO_A_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_ddi_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_DDI_A,
-		},
-	},
-	{
-		.name = "DDI B IO",
-		.domains = ICL_DDI_IO_B_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_ddi_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_DDI_B,
-		},
-	},
-	{
-		.name = "DDI C IO",
-		.domains = ICL_DDI_IO_C_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_ddi_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_DDI_C,
-		},
-	},
-	{
-		.name = "DDI D IO",
-		.domains = ICL_DDI_IO_D_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_ddi_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_DDI_D,
-		},
-	},
-	{
-		.name = "AUX A",
-		.domains = ICL_AUX_A_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_A,
-		},
-	},
-	{
-		.name = "AUX B",
-		.domains = ICL_AUX_B_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_B,
-		},
-	},
-	{
-		.name = "AUX C",
-		.domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_C,
-		},
-	},
-	{
-		.name = "AUX D",
-		.domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_D,
-		},
-	},
-	{
-		.name = "power well 4",
-		.domains = ICL_PW_4_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_PW_4,
-			.hsw.has_fuses = true,
-			.hsw.irq_pipe_mask = BIT(PIPE_C),
-		},
-	},
+static void
+tgl_tc_cold_request(struct drm_i915_private *i915, bool block)
+{
+	u8 tries = 0;
+	int ret;
+
+	while (1) {
+		u32 low_val = 0, high_val;
+
+		if (block)
+			high_val = TGL_PCODE_EXIT_TCCOLD_DATA_H_BLOCK_REQ;
+		else
+			high_val = TGL_PCODE_EXIT_TCCOLD_DATA_H_UNBLOCK_REQ;
+
+		/*
+		 * Spec states that we should timeout the request after 200us
+		 * but the function below will timeout after 500us
+		 */
+		ret = sandybridge_pcode_read(i915, TGL_PCODE_TCCOLD, &low_val,
+					     &high_val);
+		if (ret == 0) {
+			if (block &&
+			    (low_val & TGL_PCODE_EXIT_TCCOLD_DATA_L_EXIT_FAILED))
+				ret = -EIO;
+			else
+				break;
+		}
+
+		if (++tries == 3)
+			break;
+
+		if (ret == -EAGAIN)
+			msleep(1);
+	}
+
+	if (ret)
+		drm_err(&i915->drm, "TC cold %sblock failed\n",
+			block ? "" : "un");
+	else
+		drm_dbg_kms(&i915->drm, "TC cold %sblock succeeded\n",
+			    block ? "" : "un");
+}
+
+static void
+tgl_tc_cold_off_power_well_enable(struct drm_i915_private *i915,
+				  struct i915_power_well *power_well)
+{
+	tgl_tc_cold_request(i915, true);
+}
+
+static void
+tgl_tc_cold_off_power_well_disable(struct drm_i915_private *i915,
+				   struct i915_power_well *power_well)
+{
+	tgl_tc_cold_request(i915, false);
+}
+
+static void
+tgl_tc_cold_off_power_well_sync_hw(struct drm_i915_private *i915,
+				   struct i915_power_well *power_well)
+{
+	if (power_well->count > 0)
+		tgl_tc_cold_off_power_well_enable(i915, power_well);
+	else
+		tgl_tc_cold_off_power_well_disable(i915, power_well);
+}
+
+static bool
+tgl_tc_cold_off_power_well_is_enabled(struct drm_i915_private *dev_priv,
+				      struct i915_power_well *power_well)
+{
+	/*
+	 * Not the correctly implementation but there is no way to just read it
+	 * from PCODE, so returning count to avoid state mismatch errors
+	 */
+	return power_well->count;
+}
+
+static const struct i915_power_well_ops tgl_tc_cold_off_ops = {
+	.sync_hw = tgl_tc_cold_off_power_well_sync_hw,
+	.enable = tgl_tc_cold_off_power_well_enable,
+	.disable = tgl_tc_cold_off_power_well_disable,
+	.is_enabled = tgl_tc_cold_off_power_well_is_enabled,
 };
 
 static const struct i915_power_well_desc tgl_power_wells[] = {
@@ -3942,7 +3990,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 		.name = "power well 3",
 		.domains = TGL_PW_3_POWER_DOMAINS,
 		.ops = &hsw_power_well_ops,
-		.id = TGL_DISP_PW_3,
+		.id = ICL_DISP_PW_3,
 		{
 			.hsw.regs = &hsw_power_well_regs,
 			.hsw.idx = ICL_PW_CTL_IDX_PW_3,
@@ -4044,7 +4092,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX A",
 		.domains = TGL_AUX_A_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4054,7 +4102,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX B",
 		.domains = TGL_AUX_B_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4064,7 +4112,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX C",
 		.domains = TGL_AUX_C_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4074,7 +4122,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX D TC1",
 		.domains = TGL_AUX_D_TC1_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4085,7 +4133,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX E TC2",
 		.domains = TGL_AUX_E_TC2_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4096,7 +4144,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX F TC3",
 		.domains = TGL_AUX_F_TC3_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4107,7 +4155,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX G TC4",
 		.domains = TGL_AUX_G_TC4_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4118,7 +4166,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX H TC5",
 		.domains = TGL_AUX_H_TC5_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4129,7 +4177,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX I TC6",
 		.domains = TGL_AUX_I_TC6_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4140,7 +4188,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX D TBT1",
 		.domains = TGL_AUX_D_TBT1_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4151,7 +4199,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX E TBT2",
 		.domains = TGL_AUX_E_TBT2_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4162,7 +4210,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX F TBT3",
 		.domains = TGL_AUX_F_TBT3_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4173,7 +4221,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX G TBT4",
 		.domains = TGL_AUX_G_TBT4_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4184,7 +4232,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX H TBT5",
 		.domains = TGL_AUX_H_TBT5_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4195,7 +4243,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 	{
 		.name = "AUX I TBT6",
 		.domains = TGL_AUX_I_TBT6_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
+		.ops = &icl_aux_power_well_ops,
 		.id = DISP_PW_ID_NONE,
 		{
 			.hsw.regs = &icl_aux_power_well_regs,
@@ -4227,6 +4275,12 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
 			.hsw.irq_pipe_mask = BIT(PIPE_D),
 		},
 	},
+	{
+		.name = "TC cold off",
+		.domains = TGL_TC_COLD_OFF_POWER_DOMAINS,
+		.ops = &tgl_tc_cold_off_ops,
+		.id = DISP_PW_ID_NONE,
+	},
 };
 
 static int
@@ -4376,8 +4430,6 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv)
 	 */
 	if (IS_GEN(dev_priv, 12)) {
 		err = set_power_wells(power_domains, tgl_power_wells);
-	} else if (IS_ELKHARTLAKE(dev_priv)) {
-		err = set_power_wells(power_domains, ehl_power_wells);
 	} else if (IS_GEN(dev_priv, 11)) {
 		err = set_power_wells(power_domains, icl_power_wells);
 	} else if (IS_CANNONLAKE(dev_priv)) {
@@ -4439,9 +4491,8 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv)
 	mutex_unlock(&power_domains->lock);
 }
 
-static inline
-bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv,
-			  i915_reg_t reg, bool enable)
+static bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv,
+				 i915_reg_t reg, bool enable)
 {
 	u32 val, status;
 
@@ -4480,7 +4531,8 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 	drm_WARN(&dev_priv->drm, hweight8(req_slices) > max_slices,
 		 "Invalid number of dbuf slices requested\n");
 
-	DRM_DEBUG_KMS("Updating dbuf slices to 0x%x\n", req_slices);
+	drm_dbg_kms(&dev_priv->drm, "Updating dbuf slices to 0x%x\n",
+		    req_slices);
 
 	/*
 	 * Might be running this in parallel to gen9_dc_off_power_well_enable
@@ -5016,7 +5068,7 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv)
 	const struct buddy_page_mask *table;
 	int i;
 
-	if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0))
+	if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_B0))
 		/* Wa_1409767108: tgl */
 		table = wa_1409767108_buddy_page_masks;
 	else
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
index da64a5edae7a..6c917699293b 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -76,6 +76,7 @@ enum intel_display_power_domain {
 	POWER_DOMAIN_MODESET,
 	POWER_DOMAIN_GT_IRQ,
 	POWER_DOMAIN_DPLL_DC_OFF,
+	POWER_DOMAIN_TC_COLD_OFF,
 	POWER_DOMAIN_INIT,
 
 	POWER_DOMAIN_NUM,
@@ -100,7 +101,7 @@ enum i915_power_well_id {
 	SKL_DISP_PW_MISC_IO,
 	SKL_DISP_PW_1,
 	SKL_DISP_PW_2,
-	TGL_DISP_PW_3,
+	ICL_DISP_PW_3,
 	SKL_DISP_DC_OFF,
 };
 
@@ -266,6 +267,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain);
 
 bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
 				    enum intel_display_power_domain domain);
+bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
+					 enum i915_power_well_id power_well_id);
 bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
 				      enum intel_display_power_domain domain);
 intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 5e00e611f077..2bf3d4cb4ea9 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -132,8 +132,7 @@ struct intel_encoder {
 	u16 cloneable;
 	u8 pipe_mask;
 	enum intel_hotplug_state (*hotplug)(struct intel_encoder *encoder,
-					    struct intel_connector *connector,
-					    bool irq_received);
+					    struct intel_connector *connector);
 	enum intel_output_type (*compute_output_type)(struct intel_encoder *,
 						      struct intel_crtc_state *,
 						      struct drm_connector_state *);
@@ -146,28 +145,35 @@ struct intel_encoder {
 	void (*update_prepare)(struct intel_atomic_state *,
 			       struct intel_encoder *,
 			       struct intel_crtc *);
-	void (*pre_pll_enable)(struct intel_encoder *,
+	void (*pre_pll_enable)(struct intel_atomic_state *,
+			       struct intel_encoder *,
 			       const struct intel_crtc_state *,
 			       const struct drm_connector_state *);
-	void (*pre_enable)(struct intel_encoder *,
+	void (*pre_enable)(struct intel_atomic_state *,
+			   struct intel_encoder *,
 			   const struct intel_crtc_state *,
 			   const struct drm_connector_state *);
-	void (*enable)(struct intel_encoder *,
+	void (*enable)(struct intel_atomic_state *,
+		       struct intel_encoder *,
 		       const struct intel_crtc_state *,
 		       const struct drm_connector_state *);
 	void (*update_complete)(struct intel_atomic_state *,
 				struct intel_encoder *,
 				struct intel_crtc *);
-	void (*disable)(struct intel_encoder *,
+	void (*disable)(struct intel_atomic_state *,
+			struct intel_encoder *,
 			const struct intel_crtc_state *,
 			const struct drm_connector_state *);
-	void (*post_disable)(struct intel_encoder *,
+	void (*post_disable)(struct intel_atomic_state *,
+			     struct intel_encoder *,
 			     const struct intel_crtc_state *,
 			     const struct drm_connector_state *);
-	void (*post_pll_disable)(struct intel_encoder *,
+	void (*post_pll_disable)(struct intel_atomic_state *,
+				 struct intel_encoder *,
 				 const struct intel_crtc_state *,
 				 const struct drm_connector_state *);
-	void (*update_pipe)(struct intel_encoder *,
+	void (*update_pipe)(struct intel_atomic_state *,
+			    struct intel_encoder *,
 			    const struct intel_crtc_state *,
 			    const struct drm_connector_state *);
 	/* Read out the current hw state of this connector, returning true if
@@ -425,11 +431,14 @@ struct intel_connector {
 	struct edid *edid;
 	struct edid *detect_edid;
 
+	/* Number of times hotplug detection was tried after an HPD interrupt */
+	int hotplug_retries;
+
 	/* since POLL and HPD connectors may use the same HPD line keep the native
 	   state of connector->polled in case hotplug storm detection changes it */
 	u8 polled;
 
-	void *port; /* store this opaque as its illegal to dereference it */
+	struct drm_dp_mst_port *port;
 
 	struct intel_dp *mst_port;
 
@@ -640,6 +649,16 @@ struct intel_crtc_scaler_state {
 #define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1)
 /* Flag to use the scanline counter instead of the pixel counter */
 #define I915_MODE_FLAG_USE_SCANLINE_COUNTER (1<<2)
+/*
+ * TE0 or TE1 flag is set if the crtc has a DSI encoder which
+ * is operating in command mode.
+ * Flag to use TE from DSI0 instead of VBI in command mode
+ */
+#define I915_MODE_FLAG_DSI_USE_TE0 (1<<3)
+/* Flag to use TE from DSI1 instead of VBI in command mode */
+#define I915_MODE_FLAG_DSI_USE_TE1 (1<<4)
+/* Flag to indicate mipi dsi periodic command mode where we do not get TE */
+#define I915_MODE_FLAG_DSI_PERIODIC_CMD_MODE (1<<5)
 
 struct intel_wm_level {
 	bool enable;
@@ -669,11 +688,13 @@ struct skl_plane_wm {
 	struct skl_wm_level wm[8];
 	struct skl_wm_level uv_wm[8];
 	struct skl_wm_level trans_wm;
+	struct skl_wm_level sagv_wm0;
 	bool is_planar;
 };
 
 struct skl_pipe_wm {
 	struct skl_plane_wm planes[I915_MAX_PLANES];
+	bool use_sagv_wm;
 };
 
 enum vlv_wm_level {
@@ -955,8 +976,7 @@ struct intel_crtc_state {
 
 	/* Panel fitter placement and size for Ironlake+ */
 	struct {
-		u32 pos;
-		u32 size;
+		struct drm_rect dst;
 		bool enabled;
 		bool force_thru;
 	} pch_pfit;
@@ -1015,6 +1035,7 @@ struct intel_crtc_state {
 		union hdmi_infoframe spd;
 		union hdmi_infoframe hdmi;
 		union hdmi_infoframe drm;
+		struct drm_dp_vsc_sdp vsc;
 	} infoframes;
 
 	/* HDMI scrambling status */
@@ -1238,6 +1259,7 @@ struct intel_dp_compliance_data {
 	u8 video_pattern;
 	u16 hdisplay, vdisplay;
 	u8 bpc;
+	struct drm_dp_phy_test_params phytest;
 };
 
 struct intel_dp_compliance {
@@ -1347,6 +1369,9 @@ struct intel_dp {
 
 	/* This is called before a link training is starterd */
 	void (*prepare_link_retrain)(struct intel_dp *intel_dp);
+	void (*set_link_train)(struct intel_dp *intel_dp, u8 dp_train_pat);
+	void (*set_idle_link_train)(struct intel_dp *intel_dp);
+	void (*set_signal_levels)(struct intel_dp *intel_dp);
 
 	/* Displayport compliance testing */
 	struct intel_dp_compliance compliance;
@@ -1401,6 +1426,7 @@ struct intel_digital_port {
 			       const struct drm_connector_state *conn_state);
 	u32 (*infoframes_enabled)(struct intel_encoder *encoder,
 				  const struct intel_crtc_state *pipe_config);
+	bool (*connected)(struct intel_encoder *encoder);
 };
 
 struct intel_dp_mst_encoder {
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index a2fafd4499f2..40d42dcff0b7 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -48,7 +48,6 @@
 #include "intel_audio.h"
 #include "intel_connector.h"
 #include "intel_ddi.h"
-#include "intel_display_debugfs.h"
 #include "intel_display_types.h"
 #include "intel_dp.h"
 #include "intel_dp_link_training.h"
@@ -164,6 +163,17 @@ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp)
 	};
 	int i, max_rate;
 
+	if (drm_dp_has_quirk(&intel_dp->desc, 0,
+			     DP_DPCD_QUIRK_CAN_DO_MAX_LINK_RATE_3_24_GBPS)) {
+		/* Needed, e.g., for Apple MBP 2017, 15 inch eDP Retina panel */
+		static const int quirk_rates[] = { 162000, 270000, 324000 };
+
+		memcpy(intel_dp->sink_rates, quirk_rates, sizeof(quirk_rates));
+		intel_dp->num_sink_rates = ARRAY_SIZE(quirk_rates);
+
+		return;
+	}
+
 	max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]);
 
 	for (i = 0; i < ARRAY_SIZE(dp_rates); i++) {
@@ -452,6 +462,7 @@ static bool intel_dp_can_link_train_fallback_for_edp(struct intel_dp *intel_dp,
 int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
 					    int link_rate, u8 lane_count)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	int index;
 
 	index = intel_dp_rate_index(intel_dp->common_rates,
@@ -462,7 +473,8 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
 		    !intel_dp_can_link_train_fallback_for_edp(intel_dp,
 							      intel_dp->common_rates[index - 1],
 							      lane_count)) {
-			DRM_DEBUG_KMS("Retrying Link training for eDP with same parameters\n");
+			drm_dbg_kms(&i915->drm,
+				    "Retrying Link training for eDP with same parameters\n");
 			return 0;
 		}
 		intel_dp->max_link_rate = intel_dp->common_rates[index - 1];
@@ -472,13 +484,14 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
 		    !intel_dp_can_link_train_fallback_for_edp(intel_dp,
 							      intel_dp_max_common_rate(intel_dp),
 							      lane_count >> 1)) {
-			DRM_DEBUG_KMS("Retrying Link training for eDP with same parameters\n");
+			drm_dbg_kms(&i915->drm,
+				    "Retrying Link training for eDP with same parameters\n");
 			return 0;
 		}
 		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
 		intel_dp->max_link_lane_count = lane_count >> 1;
 	} else {
-		DRM_ERROR("Link Training Unsuccessful\n");
+		drm_err(&i915->drm, "Link Training Unsuccessful\n");
 		return -1;
 	}
 
@@ -553,6 +566,7 @@ static u16 intel_dp_dsc_get_output_bpp(struct drm_i915_private *i915,
 static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
 				       int mode_clock, int mode_hdisplay)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u8 min_slice_count, i;
 	int max_slice_width;
 
@@ -565,8 +579,9 @@ static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
 
 	max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd);
 	if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) {
-		DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n",
-			      max_slice_width);
+		drm_dbg_kms(&i915->drm,
+			    "Unsupported slice width %d by DP DSC Sink device\n",
+			    max_slice_width);
 		return 0;
 	}
 	/* Also take into account max slice width */
@@ -584,7 +599,8 @@ static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
 			return valid_dsc_slicecount[i];
 	}
 
-	DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count);
+	drm_dbg_kms(&i915->drm, "Unsupported Slice Count %d\n",
+		    min_slice_count);
 	return 0;
 }
 
@@ -1343,8 +1359,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 	bool is_tc_port = intel_phy_is_tc(i915, phy);
 	i915_reg_t ch_ctl, ch_data[5];
 	u32 aux_clock_divider;
-	enum intel_display_power_domain aux_domain =
-		intel_aux_power_domain(intel_dig_port);
+	enum intel_display_power_domain aux_domain;
 	intel_wakeref_t aux_wakeref;
 	intel_wakeref_t pps_wakeref;
 	int i, ret, recv_bytes;
@@ -1359,6 +1374,8 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 	if (is_tc_port)
 		intel_tc_port_lock(intel_dig_port);
 
+	aux_domain = intel_aux_power_domain(intel_dig_port);
+
 	aux_wakeref = intel_display_power_get(i915, aux_domain);
 	pps_wakeref = pps_lock(intel_dp);
 
@@ -1832,6 +1849,7 @@ static void snprintf_int_array(char *str, size_t len,
 
 static void intel_dp_print_rates(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	char str[128]; /* FIXME: too big for stack? */
 
 	if (!drm_debug_enabled(DRM_UT_KMS))
@@ -1839,15 +1857,15 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp)
 
 	snprintf_int_array(str, sizeof(str),
 			   intel_dp->source_rates, intel_dp->num_source_rates);
-	DRM_DEBUG_KMS("source rates: %s\n", str);
+	drm_dbg_kms(&i915->drm, "source rates: %s\n", str);
 
 	snprintf_int_array(str, sizeof(str),
 			   intel_dp->sink_rates, intel_dp->num_sink_rates);
-	DRM_DEBUG_KMS("sink rates: %s\n", str);
+	drm_dbg_kms(&i915->drm, "sink rates: %s\n", str);
 
 	snprintf_int_array(str, sizeof(str),
 			   intel_dp->common_rates, intel_dp->num_common_rates);
-	DRM_DEBUG_KMS("common rates: %s\n", str);
+	drm_dbg_kms(&i915->drm, "common rates: %s\n", str);
 }
 
 int
@@ -1954,6 +1972,8 @@ intel_dp_adjust_compliance_config(struct intel_dp *intel_dp,
 				  struct intel_crtc_state *pipe_config,
 				  struct link_config_limits *limits)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
 	/* For DP Compliance we override the computed bpp for the pipe */
 	if (intel_dp->compliance.test_data.bpc != 0) {
 		int bpp = 3 * intel_dp->compliance.test_data.bpc;
@@ -1961,7 +1981,7 @@ intel_dp_adjust_compliance_config(struct intel_dp *intel_dp,
 		limits->min_bpp = limits->max_bpp = bpp;
 		pipe_config->dither_force_disable = bpp == 6 * 3;
 
-		DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", bpp);
+		drm_dbg_kms(&i915->drm, "Setting pipe_bpp to %d\n", bpp);
 	}
 
 	/* Use values requested by Compliance Test Request */
@@ -2055,6 +2075,7 @@ static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc)
 static int intel_dp_dsc_compute_params(struct intel_encoder *encoder,
 				       struct intel_crtc_state *crtc_state)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
 	u8 line_buf_depth;
@@ -2089,7 +2110,8 @@ static int intel_dp_dsc_compute_params(struct intel_encoder *encoder,
 
 	line_buf_depth = drm_dp_dsc_sink_line_buf_depth(intel_dp->dsc_dpcd);
 	if (!line_buf_depth) {
-		DRM_DEBUG_KMS("DSC Sink Line Buffer Depth invalid\n");
+		drm_dbg_kms(&i915->drm,
+			    "DSC Sink Line Buffer Depth invalid\n");
 		return -EINVAL;
 	}
 
@@ -2114,7 +2136,8 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
-	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode =
+		&pipe_config->hw.adjusted_mode;
 	u8 dsc_max_bpc;
 	int pipe_bpp;
 	int ret;
@@ -2229,7 +2252,9 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 			     struct intel_crtc_state *pipe_config,
 			     struct drm_connector_state *conn_state)
 {
-	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	const struct drm_display_mode *adjusted_mode =
+		&pipe_config->hw.adjusted_mode;
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct link_config_limits limits;
 	int common_len;
@@ -2264,11 +2289,11 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 
 	intel_dp_adjust_compliance_config(intel_dp, pipe_config, &limits);
 
-	DRM_DEBUG_KMS("DP link computation with max lane count %i "
-		      "max rate %d max bpp %d pixel clock %iKHz\n",
-		      limits.max_lane_count,
-		      intel_dp->common_rates[limits.max_clock],
-		      limits.max_bpp, adjusted_mode->crtc_clock);
+	drm_dbg_kms(&i915->drm, "DP link computation with max lane count %i "
+		    "max rate %d max bpp %d pixel clock %iKHz\n",
+		    limits.max_lane_count,
+		    intel_dp->common_rates[limits.max_clock],
+		    limits.max_bpp, adjusted_mode->crtc_clock);
 
 	/*
 	 * Optimize for slow and wide. This is the place to add alternative
@@ -2277,7 +2302,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 	ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
 
 	/* enable compression if the mode doesn't fit available BW */
-	DRM_DEBUG_KMS("Force DSC en = %d\n", intel_dp->force_dsc_en);
+	drm_dbg_kms(&i915->drm, "Force DSC en = %d\n", intel_dp->force_dsc_en);
 	if (ret || intel_dp->force_dsc_en) {
 		ret = intel_dp_dsc_compute_config(intel_dp, pipe_config,
 						  conn_state, &limits);
@@ -2286,40 +2311,42 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 	}
 
 	if (pipe_config->dsc.compression_enable) {
-		DRM_DEBUG_KMS("DP lane count %d clock %d Input bpp %d Compressed bpp %d\n",
-			      pipe_config->lane_count, pipe_config->port_clock,
-			      pipe_config->pipe_bpp,
-			      pipe_config->dsc.compressed_bpp);
-
-		DRM_DEBUG_KMS("DP link rate required %i available %i\n",
-			      intel_dp_link_required(adjusted_mode->crtc_clock,
-						     pipe_config->dsc.compressed_bpp),
-			      intel_dp_max_data_rate(pipe_config->port_clock,
-						     pipe_config->lane_count));
+		drm_dbg_kms(&i915->drm,
+			    "DP lane count %d clock %d Input bpp %d Compressed bpp %d\n",
+			    pipe_config->lane_count, pipe_config->port_clock,
+			    pipe_config->pipe_bpp,
+			    pipe_config->dsc.compressed_bpp);
+
+		drm_dbg_kms(&i915->drm,
+			    "DP link rate required %i available %i\n",
+			    intel_dp_link_required(adjusted_mode->crtc_clock,
+						   pipe_config->dsc.compressed_bpp),
+			    intel_dp_max_data_rate(pipe_config->port_clock,
+						   pipe_config->lane_count));
 	} else {
-		DRM_DEBUG_KMS("DP lane count %d clock %d bpp %d\n",
-			      pipe_config->lane_count, pipe_config->port_clock,
-			      pipe_config->pipe_bpp);
+		drm_dbg_kms(&i915->drm, "DP lane count %d clock %d bpp %d\n",
+			    pipe_config->lane_count, pipe_config->port_clock,
+			    pipe_config->pipe_bpp);
 
-		DRM_DEBUG_KMS("DP link rate required %i available %i\n",
-			      intel_dp_link_required(adjusted_mode->crtc_clock,
-						     pipe_config->pipe_bpp),
-			      intel_dp_max_data_rate(pipe_config->port_clock,
-						     pipe_config->lane_count));
+		drm_dbg_kms(&i915->drm,
+			    "DP link rate required %i available %i\n",
+			    intel_dp_link_required(adjusted_mode->crtc_clock,
+						   pipe_config->pipe_bpp),
+			    intel_dp_max_data_rate(pipe_config->port_clock,
+						   pipe_config->lane_count));
 	}
 	return 0;
 }
 
 static int
 intel_dp_ycbcr420_config(struct intel_dp *intel_dp,
-			 struct drm_connector *connector,
-			 struct intel_crtc_state *crtc_state)
+			 struct intel_crtc_state *crtc_state,
+			 const struct drm_connector_state *conn_state)
 {
+	struct drm_connector *connector = conn_state->connector;
 	const struct drm_display_info *info = &connector->display_info;
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	int ret;
 
 	if (!drm_mode_is_420_only(info, adjusted_mode) ||
 	    !intel_dp_get_colorimetry_status(intel_dp) ||
@@ -2328,16 +2355,7 @@ intel_dp_ycbcr420_config(struct intel_dp *intel_dp,
 
 	crtc_state->output_format = INTEL_OUTPUT_FORMAT_YCBCR420;
 
-	/* YCBCR 420 output conversion needs a scaler */
-	ret = skl_update_scaler_crtc(crtc_state);
-	if (ret) {
-		DRM_DEBUG_KMS("Scaler allocation for output failed\n");
-		return ret;
-	}
-
-	intel_pch_panel_fitting(crtc, crtc_state, DRM_MODE_SCALE_FULLSCREEN);
-
-	return 0;
+	return intel_pch_panel_fitting(crtc_state, conn_state);
 }
 
 bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state,
@@ -2384,6 +2402,164 @@ static bool intel_dp_port_has_audio(struct drm_i915_private *dev_priv,
 	return true;
 }
 
+static void intel_dp_compute_vsc_colorimetry(const struct intel_crtc_state *crtc_state,
+					     const struct drm_connector_state *conn_state,
+					     struct drm_dp_vsc_sdp *vsc)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	/*
+	 * Prepare VSC Header for SU as per DP 1.4 spec, Table 2-118
+	 * VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/
+	 * Colorimetry Format indication.
+	 */
+	vsc->revision = 0x5;
+	vsc->length = 0x13;
+
+	/* DP 1.4a spec, Table 2-120 */
+	switch (crtc_state->output_format) {
+	case INTEL_OUTPUT_FORMAT_YCBCR444:
+		vsc->pixelformat = DP_PIXELFORMAT_YUV444;
+		break;
+	case INTEL_OUTPUT_FORMAT_YCBCR420:
+		vsc->pixelformat = DP_PIXELFORMAT_YUV420;
+		break;
+	case INTEL_OUTPUT_FORMAT_RGB:
+	default:
+		vsc->pixelformat = DP_PIXELFORMAT_RGB;
+	}
+
+	switch (conn_state->colorspace) {
+	case DRM_MODE_COLORIMETRY_BT709_YCC:
+		vsc->colorimetry = DP_COLORIMETRY_BT709_YCC;
+		break;
+	case DRM_MODE_COLORIMETRY_XVYCC_601:
+		vsc->colorimetry = DP_COLORIMETRY_XVYCC_601;
+		break;
+	case DRM_MODE_COLORIMETRY_XVYCC_709:
+		vsc->colorimetry = DP_COLORIMETRY_XVYCC_709;
+		break;
+	case DRM_MODE_COLORIMETRY_SYCC_601:
+		vsc->colorimetry = DP_COLORIMETRY_SYCC_601;
+		break;
+	case DRM_MODE_COLORIMETRY_OPYCC_601:
+		vsc->colorimetry = DP_COLORIMETRY_OPYCC_601;
+		break;
+	case DRM_MODE_COLORIMETRY_BT2020_CYCC:
+		vsc->colorimetry = DP_COLORIMETRY_BT2020_CYCC;
+		break;
+	case DRM_MODE_COLORIMETRY_BT2020_RGB:
+		vsc->colorimetry = DP_COLORIMETRY_BT2020_RGB;
+		break;
+	case DRM_MODE_COLORIMETRY_BT2020_YCC:
+		vsc->colorimetry = DP_COLORIMETRY_BT2020_YCC;
+		break;
+	case DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65:
+	case DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER:
+		vsc->colorimetry = DP_COLORIMETRY_DCI_P3_RGB;
+		break;
+	default:
+		/*
+		 * RGB->YCBCR color conversion uses the BT.709
+		 * color space.
+		 */
+		if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
+			vsc->colorimetry = DP_COLORIMETRY_BT709_YCC;
+		else
+			vsc->colorimetry = DP_COLORIMETRY_DEFAULT;
+		break;
+	}
+
+	vsc->bpc = crtc_state->pipe_bpp / 3;
+
+	/* only RGB pixelformat supports 6 bpc */
+	drm_WARN_ON(&dev_priv->drm,
+		    vsc->bpc == 6 && vsc->pixelformat != DP_PIXELFORMAT_RGB);
+
+	/* all YCbCr are always limited range */
+	vsc->dynamic_range = DP_DYNAMIC_RANGE_CTA;
+	vsc->content_type = DP_CONTENT_TYPE_NOT_DEFINED;
+}
+
+static void intel_dp_compute_vsc_sdp(struct intel_dp *intel_dp,
+				     struct intel_crtc_state *crtc_state,
+				     const struct drm_connector_state *conn_state)
+{
+	struct drm_dp_vsc_sdp *vsc = &crtc_state->infoframes.vsc;
+
+	/* When a crtc state has PSR, VSC SDP will be handled by PSR routine */
+	if (crtc_state->has_psr)
+		return;
+
+	if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state))
+		return;
+
+	crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_VSC);
+	vsc->sdp_type = DP_SDP_VSC;
+	intel_dp_compute_vsc_colorimetry(crtc_state, conn_state,
+					 &crtc_state->infoframes.vsc);
+}
+
+void intel_dp_compute_psr_vsc_sdp(struct intel_dp *intel_dp,
+				  const struct intel_crtc_state *crtc_state,
+				  const struct drm_connector_state *conn_state,
+				  struct drm_dp_vsc_sdp *vsc)
+{
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+
+	vsc->sdp_type = DP_SDP_VSC;
+
+	if (dev_priv->psr.psr2_enabled) {
+		if (dev_priv->psr.colorimetry_support &&
+		    intel_dp_needs_vsc_sdp(crtc_state, conn_state)) {
+			/* [PSR2, +Colorimetry] */
+			intel_dp_compute_vsc_colorimetry(crtc_state, conn_state,
+							 vsc);
+		} else {
+			/*
+			 * [PSR2, -Colorimetry]
+			 * Prepare VSC Header for SU as per eDP 1.4 spec, Table 6-11
+			 * 3D stereo + PSR/PSR2 + Y-coordinate.
+			 */
+			vsc->revision = 0x4;
+			vsc->length = 0xe;
+		}
+	} else {
+		/*
+		 * [PSR1]
+		 * Prepare VSC Header for SU as per DP 1.4 spec, Table 2-118
+		 * VSC SDP supporting 3D stereo + PSR (applies to eDP v1.3 or
+		 * higher).
+		 */
+		vsc->revision = 0x2;
+		vsc->length = 0x8;
+	}
+}
+
+static void
+intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp,
+					    struct intel_crtc_state *crtc_state,
+					    const struct drm_connector_state *conn_state)
+{
+	int ret;
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	struct hdmi_drm_infoframe *drm_infoframe = &crtc_state->infoframes.drm.drm;
+
+	if (!conn_state->hdr_output_metadata)
+		return;
+
+	ret = drm_hdmi_infoframe_set_hdr_metadata(drm_infoframe, conn_state);
+
+	if (ret) {
+		drm_dbg_kms(&dev_priv->drm, "couldn't set HDR metadata in infoframe\n");
+		return;
+	}
+
+	crtc_state->infoframes.enable |=
+		intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA);
+}
+
 int
 intel_dp_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_state *pipe_config,
@@ -2394,7 +2570,6 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder);
 	enum port port = encoder->port;
-	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	struct intel_connector *intel_connector = intel_dp->attached_connector;
 	struct intel_digital_connector_state *intel_conn_state =
 		to_intel_digital_connector_state(conn_state);
@@ -2410,9 +2585,8 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	if (lspcon->active)
 		lspcon_ycbcr420_config(&intel_connector->base, pipe_config);
 	else
-		ret = intel_dp_ycbcr420_config(intel_dp, &intel_connector->base,
-					       pipe_config);
-
+		ret = intel_dp_ycbcr420_config(intel_dp, pipe_config,
+					       conn_state);
 	if (ret)
 		return ret;
 
@@ -2428,18 +2602,12 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 		intel_fixed_panel_mode(intel_connector->panel.fixed_mode,
 				       adjusted_mode);
 
-		if (INTEL_GEN(dev_priv) >= 9) {
-			ret = skl_update_scaler_crtc(pipe_config);
-			if (ret)
-				return ret;
-		}
-
 		if (HAS_GMCH(dev_priv))
-			intel_gmch_panel_fitting(intel_crtc, pipe_config,
-						 conn_state->scaling_mode);
+			ret = intel_gmch_panel_fitting(pipe_config, conn_state);
 		else
-			intel_pch_panel_fitting(intel_crtc, pipe_config,
-						conn_state->scaling_mode);
+			ret = intel_pch_panel_fitting(pipe_config, conn_state);
+		if (ret)
+			return ret;
 	}
 
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
@@ -2489,6 +2657,8 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 		intel_dp_set_clock(encoder, pipe_config);
 
 	intel_psr_compute_config(intel_dp, pipe_config);
+	intel_dp_compute_vsc_sdp(intel_dp, pipe_config, conn_state);
+	intel_dp_compute_hdr_metadata_infoframe_sdp(intel_dp, pipe_config, conn_state);
 
 	return 0;
 }
@@ -2630,22 +2800,27 @@ static void wait_panel_status(struct intel_dp *intel_dp,
 
 static void wait_panel_on(struct intel_dp *intel_dp)
 {
-	DRM_DEBUG_KMS("Wait for panel power on\n");
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
+	drm_dbg_kms(&i915->drm, "Wait for panel power on\n");
 	wait_panel_status(intel_dp, IDLE_ON_MASK, IDLE_ON_VALUE);
 }
 
 static void wait_panel_off(struct intel_dp *intel_dp)
 {
-	DRM_DEBUG_KMS("Wait for panel power off time\n");
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
+	drm_dbg_kms(&i915->drm, "Wait for panel power off time\n");
 	wait_panel_status(intel_dp, IDLE_OFF_MASK, IDLE_OFF_VALUE);
 }
 
 static void wait_panel_power_cycle(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	ktime_t panel_power_on_time;
 	s64 panel_power_off_duration;
 
-	DRM_DEBUG_KMS("Wait for panel power cycle\n");
+	drm_dbg_kms(&i915->drm, "Wait for panel power cycle\n");
 
 	/* take the difference of currrent time and panel power off time
 	 * and then make panel wait for t11_t12 if needed. */
@@ -3009,11 +3184,12 @@ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state,
 			    const struct drm_connector_state *conn_state)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(conn_state->best_encoder));
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	DRM_DEBUG_KMS("\n");
+	drm_dbg_kms(&i915->drm, "\n");
 
 	intel_panel_enable_backlight(crtc_state, conn_state);
 	_intel_edp_backlight_on(intel_dp);
@@ -3047,11 +3223,12 @@ static void _intel_edp_backlight_off(struct intel_dp *intel_dp)
 void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder));
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
-	DRM_DEBUG_KMS("\n");
+	drm_dbg_kms(&i915->drm, "\n");
 
 	_intel_edp_backlight_off(intel_dp);
 	intel_panel_disable_backlight(old_conn_state);
@@ -3064,6 +3241,7 @@ void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state)
 static void intel_edp_backlight_power(struct intel_connector *connector,
 				      bool enable)
 {
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	intel_wakeref_t wakeref;
 	bool is_enabled;
@@ -3074,8 +3252,8 @@ static void intel_edp_backlight_power(struct intel_connector *connector,
 	if (is_enabled == enable)
 		return;
 
-	DRM_DEBUG_KMS("panel power control backlight %s\n",
-		      enable ? "enable" : "disable");
+	drm_dbg_kms(&i915->drm, "panel power control backlight %s\n",
+		    enable ? "enable" : "disable");
 
 	if (enable)
 		_intel_edp_backlight_on(intel_dp);
@@ -3185,6 +3363,7 @@ void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp,
 					   const struct intel_crtc_state *crtc_state,
 					   bool enable)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	int ret;
 
 	if (!crtc_state->dsc.compression_enable)
@@ -3193,13 +3372,15 @@ void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp,
 	ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_DSC_ENABLE,
 				 enable ? DP_DECOMPRESSION_EN : 0);
 	if (ret < 0)
-		DRM_DEBUG_KMS("Failed to %s sink decompression state\n",
-			      enable ? "enable" : "disable");
+		drm_dbg_kms(&i915->drm,
+			    "Failed to %s sink decompression state\n",
+			    enable ? "enable" : "disable");
 }
 
 /* If the sink supports it, try to set the power state appropriately */
 void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	int ret, i;
 
 	/* Should have a valid DPCD by this point */
@@ -3232,8 +3413,8 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode)
 	}
 
 	if (ret != 1)
-		DRM_DEBUG_KMS("failed to %s sink power state\n",
-			      mode == DRM_MODE_DPMS_ON ? "enable" : "disable");
+		drm_dbg_kms(&i915->drm, "failed to %s sink power state\n",
+			    mode == DRM_MODE_DPMS_ON ? "enable" : "disable");
 }
 
 static bool cpt_dp_port_selected(struct drm_i915_private *dev_priv,
@@ -3390,7 +3571,8 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 	}
 }
 
-static void intel_disable_dp(struct intel_encoder *encoder,
+static void intel_disable_dp(struct intel_atomic_state *state,
+			     struct intel_encoder *encoder,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
@@ -3410,21 +3592,24 @@ static void intel_disable_dp(struct intel_encoder *encoder,
 	intel_edp_panel_off(intel_dp);
 }
 
-static void g4x_disable_dp(struct intel_encoder *encoder,
+static void g4x_disable_dp(struct intel_atomic_state *state,
+			   struct intel_encoder *encoder,
 			   const struct intel_crtc_state *old_crtc_state,
 			   const struct drm_connector_state *old_conn_state)
 {
-	intel_disable_dp(encoder, old_crtc_state, old_conn_state);
+	intel_disable_dp(state, encoder, old_crtc_state, old_conn_state);
 }
 
-static void vlv_disable_dp(struct intel_encoder *encoder,
+static void vlv_disable_dp(struct intel_atomic_state *state,
+			   struct intel_encoder *encoder,
 			   const struct intel_crtc_state *old_crtc_state,
 			   const struct drm_connector_state *old_conn_state)
 {
-	intel_disable_dp(encoder, old_crtc_state, old_conn_state);
+	intel_disable_dp(state, encoder, old_crtc_state, old_conn_state);
 }
 
-static void g4x_post_disable_dp(struct intel_encoder *encoder,
+static void g4x_post_disable_dp(struct intel_atomic_state *state,
+				struct intel_encoder *encoder,
 				const struct intel_crtc_state *old_crtc_state,
 				const struct drm_connector_state *old_conn_state)
 {
@@ -3444,14 +3629,16 @@ static void g4x_post_disable_dp(struct intel_encoder *encoder,
 		ilk_edp_pll_off(intel_dp, old_crtc_state);
 }
 
-static void vlv_post_disable_dp(struct intel_encoder *encoder,
+static void vlv_post_disable_dp(struct intel_atomic_state *state,
+				struct intel_encoder *encoder,
 				const struct intel_crtc_state *old_crtc_state,
 				const struct drm_connector_state *old_conn_state)
 {
 	intel_dp_link_down(encoder, old_crtc_state);
 }
 
-static void chv_post_disable_dp(struct intel_encoder *encoder,
+static void chv_post_disable_dp(struct intel_atomic_state *state,
+				struct intel_encoder *encoder,
 				const struct intel_crtc_state *old_crtc_state,
 				const struct drm_connector_state *old_conn_state)
 {
@@ -3468,90 +3655,63 @@ static void chv_post_disable_dp(struct intel_encoder *encoder,
 }
 
 static void
-_intel_dp_set_link_train(struct intel_dp *intel_dp,
-			 u32 *DP,
-			 u8 dp_train_pat)
+cpt_set_link_train(struct intel_dp *intel_dp,
+		   u8 dp_train_pat)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	enum port port = intel_dig_port->base.port;
-	u8 train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd);
-
-	if (dp_train_pat & train_pat_mask)
-		drm_dbg_kms(&dev_priv->drm,
-			    "Using DP training pattern TPS%d\n",
-			    dp_train_pat & train_pat_mask);
-
-	if (HAS_DDI(dev_priv)) {
-		u32 temp = intel_de_read(dev_priv, intel_dp->regs.dp_tp_ctl);
+	u32 *DP = &intel_dp->DP;
 
-		if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE)
-			temp |= DP_TP_CTL_SCRAMBLE_DISABLE;
-		else
-			temp &= ~DP_TP_CTL_SCRAMBLE_DISABLE;
-
-		temp &= ~DP_TP_CTL_LINK_TRAIN_MASK;
-		switch (dp_train_pat & train_pat_mask) {
-		case DP_TRAINING_PATTERN_DISABLE:
-			temp |= DP_TP_CTL_LINK_TRAIN_NORMAL;
+	*DP &= ~DP_LINK_TRAIN_MASK_CPT;
 
-			break;
-		case DP_TRAINING_PATTERN_1:
-			temp |= DP_TP_CTL_LINK_TRAIN_PAT1;
-			break;
-		case DP_TRAINING_PATTERN_2:
-			temp |= DP_TP_CTL_LINK_TRAIN_PAT2;
-			break;
-		case DP_TRAINING_PATTERN_3:
-			temp |= DP_TP_CTL_LINK_TRAIN_PAT3;
-			break;
-		case DP_TRAINING_PATTERN_4:
-			temp |= DP_TP_CTL_LINK_TRAIN_PAT4;
-			break;
-		}
-		intel_de_write(dev_priv, intel_dp->regs.dp_tp_ctl, temp);
+	switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
+	case DP_TRAINING_PATTERN_DISABLE:
+		*DP |= DP_LINK_TRAIN_OFF_CPT;
+		break;
+	case DP_TRAINING_PATTERN_1:
+		*DP |= DP_LINK_TRAIN_PAT_1_CPT;
+		break;
+	case DP_TRAINING_PATTERN_2:
+		*DP |= DP_LINK_TRAIN_PAT_2_CPT;
+		break;
+	case DP_TRAINING_PATTERN_3:
+		drm_dbg_kms(&dev_priv->drm,
+			    "TPS3 not supported, using TPS2 instead\n");
+		*DP |= DP_LINK_TRAIN_PAT_2_CPT;
+		break;
+	}
 
-	} else if ((IS_IVYBRIDGE(dev_priv) && port == PORT_A) ||
-		   (HAS_PCH_CPT(dev_priv) && port != PORT_A)) {
-		*DP &= ~DP_LINK_TRAIN_MASK_CPT;
+	intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP);
+	intel_de_posting_read(dev_priv, intel_dp->output_reg);
+}
 
-		switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
-		case DP_TRAINING_PATTERN_DISABLE:
-			*DP |= DP_LINK_TRAIN_OFF_CPT;
-			break;
-		case DP_TRAINING_PATTERN_1:
-			*DP |= DP_LINK_TRAIN_PAT_1_CPT;
-			break;
-		case DP_TRAINING_PATTERN_2:
-			*DP |= DP_LINK_TRAIN_PAT_2_CPT;
-			break;
-		case DP_TRAINING_PATTERN_3:
-			drm_dbg_kms(&dev_priv->drm,
-				    "TPS3 not supported, using TPS2 instead\n");
-			*DP |= DP_LINK_TRAIN_PAT_2_CPT;
-			break;
-		}
+static void
+g4x_set_link_train(struct intel_dp *intel_dp,
+		   u8 dp_train_pat)
+{
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	u32 *DP = &intel_dp->DP;
 
-	} else {
-		*DP &= ~DP_LINK_TRAIN_MASK;
+	*DP &= ~DP_LINK_TRAIN_MASK;
 
-		switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
-		case DP_TRAINING_PATTERN_DISABLE:
-			*DP |= DP_LINK_TRAIN_OFF;
-			break;
-		case DP_TRAINING_PATTERN_1:
-			*DP |= DP_LINK_TRAIN_PAT_1;
-			break;
-		case DP_TRAINING_PATTERN_2:
-			*DP |= DP_LINK_TRAIN_PAT_2;
-			break;
-		case DP_TRAINING_PATTERN_3:
-			drm_dbg_kms(&dev_priv->drm,
-				    "TPS3 not supported, using TPS2 instead\n");
-			*DP |= DP_LINK_TRAIN_PAT_2;
-			break;
-		}
+	switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
+	case DP_TRAINING_PATTERN_DISABLE:
+		*DP |= DP_LINK_TRAIN_OFF;
+		break;
+	case DP_TRAINING_PATTERN_1:
+		*DP |= DP_LINK_TRAIN_PAT_1;
+		break;
+	case DP_TRAINING_PATTERN_2:
+		*DP |= DP_LINK_TRAIN_PAT_2;
+		break;
+	case DP_TRAINING_PATTERN_3:
+		drm_dbg_kms(&dev_priv->drm,
+			    "TPS3 not supported, using TPS2 instead\n");
+		*DP |= DP_LINK_TRAIN_PAT_2;
+		break;
 	}
+
+	intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP);
+	intel_de_posting_read(dev_priv, intel_dp->output_reg);
 }
 
 static void intel_dp_enable_port(struct intel_dp *intel_dp,
@@ -3577,7 +3737,8 @@ static void intel_dp_enable_port(struct intel_dp *intel_dp,
 	intel_de_posting_read(dev_priv, intel_dp->output_reg);
 }
 
-static void intel_enable_dp(struct intel_encoder *encoder,
+static void intel_enable_dp(struct intel_atomic_state *state,
+			    struct intel_encoder *encoder,
 			    const struct intel_crtc_state *pipe_config,
 			    const struct drm_connector_state *conn_state)
 {
@@ -3623,22 +3784,25 @@ static void intel_enable_dp(struct intel_encoder *encoder,
 	}
 }
 
-static void g4x_enable_dp(struct intel_encoder *encoder,
+static void g4x_enable_dp(struct intel_atomic_state *state,
+			  struct intel_encoder *encoder,
 			  const struct intel_crtc_state *pipe_config,
 			  const struct drm_connector_state *conn_state)
 {
-	intel_enable_dp(encoder, pipe_config, conn_state);
+	intel_enable_dp(state, encoder, pipe_config, conn_state);
 	intel_edp_backlight_on(pipe_config, conn_state);
 }
 
-static void vlv_enable_dp(struct intel_encoder *encoder,
+static void vlv_enable_dp(struct intel_atomic_state *state,
+			  struct intel_encoder *encoder,
 			  const struct intel_crtc_state *pipe_config,
 			  const struct drm_connector_state *conn_state)
 {
 	intel_edp_backlight_on(pipe_config, conn_state);
 }
 
-static void g4x_pre_enable_dp(struct intel_encoder *encoder,
+static void g4x_pre_enable_dp(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *pipe_config,
 			      const struct drm_connector_state *conn_state)
 {
@@ -3758,16 +3922,18 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder,
 	intel_dp_init_panel_power_sequencer_registers(intel_dp, true);
 }
 
-static void vlv_pre_enable_dp(struct intel_encoder *encoder,
+static void vlv_pre_enable_dp(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *pipe_config,
 			      const struct drm_connector_state *conn_state)
 {
 	vlv_phy_pre_encoder_enable(encoder, pipe_config);
 
-	intel_enable_dp(encoder, pipe_config, conn_state);
+	intel_enable_dp(state, encoder, pipe_config, conn_state);
 }
 
-static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder,
+static void vlv_dp_pre_pll_enable(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *pipe_config,
 				  const struct drm_connector_state *conn_state)
 {
@@ -3776,19 +3942,21 @@ static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder,
 	vlv_phy_pre_pll_enable(encoder, pipe_config);
 }
 
-static void chv_pre_enable_dp(struct intel_encoder *encoder,
+static void chv_pre_enable_dp(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *pipe_config,
 			      const struct drm_connector_state *conn_state)
 {
 	chv_phy_pre_encoder_enable(encoder, pipe_config);
 
-	intel_enable_dp(encoder, pipe_config, conn_state);
+	intel_enable_dp(state, encoder, pipe_config, conn_state);
 
 	/* Second common lane will stay alive on its own now */
 	chv_phy_release_cl2_override(encoder);
 }
 
-static void chv_dp_pre_pll_enable(struct intel_encoder *encoder,
+static void chv_dp_pre_pll_enable(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *pipe_config,
 				  const struct drm_connector_state *conn_state)
 {
@@ -3797,7 +3965,8 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder,
 	chv_phy_pre_pll_enable(encoder, pipe_config);
 }
 
-static void chv_dp_post_pll_disable(struct intel_encoder *encoder,
+static void chv_dp_post_pll_disable(struct intel_atomic_state *state,
+				    struct intel_encoder *encoder,
 				    const struct intel_crtc_state *old_crtc_state,
 				    const struct drm_connector_state *old_conn_state)
 {
@@ -3881,7 +4050,7 @@ intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing)
 	}
 }
 
-static u32 vlv_signal_levels(struct intel_dp *intel_dp)
+static void vlv_set_signal_levels(struct intel_dp *intel_dp)
 {
 	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
 	unsigned long demph_reg_value, preemph_reg_value,
@@ -3909,7 +4078,7 @@ static u32 vlv_signal_levels(struct intel_dp *intel_dp)
 			uniqtranscale_reg_value = 0x5598DA3A;
 			break;
 		default:
-			return 0;
+			return;
 		}
 		break;
 	case DP_TRAIN_PRE_EMPH_LEVEL_1:
@@ -3928,7 +4097,7 @@ static u32 vlv_signal_levels(struct intel_dp *intel_dp)
 			uniqtranscale_reg_value = 0x55ADDA3A;
 			break;
 		default:
-			return 0;
+			return;
 		}
 		break;
 	case DP_TRAIN_PRE_EMPH_LEVEL_2:
@@ -3943,7 +4112,7 @@ static u32 vlv_signal_levels(struct intel_dp *intel_dp)
 			uniqtranscale_reg_value = 0x55ADDA3A;
 			break;
 		default:
-			return 0;
+			return;
 		}
 		break;
 	case DP_TRAIN_PRE_EMPH_LEVEL_3:
@@ -3954,20 +4123,18 @@ static u32 vlv_signal_levels(struct intel_dp *intel_dp)
 			uniqtranscale_reg_value = 0x55ADDA3A;
 			break;
 		default:
-			return 0;
+			return;
 		}
 		break;
 	default:
-		return 0;
+		return;
 	}
 
 	vlv_set_phy_signal_level(encoder, demph_reg_value, preemph_reg_value,
 				 uniqtranscale_reg_value, 0);
-
-	return 0;
 }
 
-static u32 chv_signal_levels(struct intel_dp *intel_dp)
+static void chv_set_signal_levels(struct intel_dp *intel_dp)
 {
 	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
 	u32 deemph_reg_value, margin_reg_value;
@@ -3995,7 +4162,7 @@ static u32 chv_signal_levels(struct intel_dp *intel_dp)
 			uniq_trans_scale = true;
 			break;
 		default:
-			return 0;
+			return;
 		}
 		break;
 	case DP_TRAIN_PRE_EMPH_LEVEL_1:
@@ -4013,7 +4180,7 @@ static u32 chv_signal_levels(struct intel_dp *intel_dp)
 			margin_reg_value = 154;
 			break;
 		default:
-			return 0;
+			return;
 		}
 		break;
 	case DP_TRAIN_PRE_EMPH_LEVEL_2:
@@ -4027,7 +4194,7 @@ static u32 chv_signal_levels(struct intel_dp *intel_dp)
 			margin_reg_value = 154;
 			break;
 		default:
-			return 0;
+			return;
 		}
 		break;
 	case DP_TRAIN_PRE_EMPH_LEVEL_3:
@@ -4037,21 +4204,18 @@ static u32 chv_signal_levels(struct intel_dp *intel_dp)
 			margin_reg_value = 154;
 			break;
 		default:
-			return 0;
+			return;
 		}
 		break;
 	default:
-		return 0;
+		return;
 	}
 
 	chv_set_phy_signal_level(encoder, deemph_reg_value,
 				 margin_reg_value, uniq_trans_scale);
-
-	return 0;
 }
 
-static u32
-g4x_signal_levels(u8 train_set)
+static u32 g4x_signal_levels(u8 train_set)
 {
 	u32 signal_levels = 0;
 
@@ -4088,12 +4252,31 @@ g4x_signal_levels(u8 train_set)
 	return signal_levels;
 }
 
+static void
+g4x_set_signal_levels(struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	u8 train_set = intel_dp->train_set[0];
+	u32 signal_levels;
+
+	signal_levels = g4x_signal_levels(train_set);
+
+	drm_dbg_kms(&dev_priv->drm, "Using signal levels %08x\n",
+		    signal_levels);
+
+	intel_dp->DP &= ~(DP_VOLTAGE_MASK | DP_PRE_EMPHASIS_MASK);
+	intel_dp->DP |= signal_levels;
+
+	intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP);
+	intel_de_posting_read(dev_priv, intel_dp->output_reg);
+}
+
 /* SNB CPU eDP voltage swing and pre-emphasis control */
-static u32
-snb_cpu_edp_signal_levels(u8 train_set)
+static u32 snb_cpu_edp_signal_levels(u8 train_set)
 {
-	int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
-					 DP_TRAIN_PRE_EMPHASIS_MASK);
+	u8 signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
+					DP_TRAIN_PRE_EMPHASIS_MASK);
+
 	switch (signal_levels) {
 	case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0:
 	case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0:
@@ -4116,12 +4299,31 @@ snb_cpu_edp_signal_levels(u8 train_set)
 	}
 }
 
+static void
+snb_cpu_edp_set_signal_levels(struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	u8 train_set = intel_dp->train_set[0];
+	u32 signal_levels;
+
+	signal_levels = snb_cpu_edp_signal_levels(train_set);
+
+	drm_dbg_kms(&dev_priv->drm, "Using signal levels %08x\n",
+		    signal_levels);
+
+	intel_dp->DP &= ~EDP_LINK_TRAIN_VOL_EMP_MASK_SNB;
+	intel_dp->DP |= signal_levels;
+
+	intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP);
+	intel_de_posting_read(dev_priv, intel_dp->output_reg);
+}
+
 /* IVB CPU eDP voltage swing and pre-emphasis control */
-static u32
-ivb_cpu_edp_signal_levels(u8 train_set)
+static u32 ivb_cpu_edp_signal_levels(u8 train_set)
 {
-	int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
-					 DP_TRAIN_PRE_EMPHASIS_MASK);
+	u8 signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
+					DP_TRAIN_PRE_EMPHASIS_MASK);
+
 	switch (signal_levels) {
 	case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0:
 		return EDP_LINK_TRAIN_400MV_0DB_IVB;
@@ -4147,38 +4349,29 @@ ivb_cpu_edp_signal_levels(u8 train_set)
 	}
 }
 
-void
-intel_dp_set_signal_levels(struct intel_dp *intel_dp)
+static void
+ivb_cpu_edp_set_signal_levels(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	enum port port = intel_dig_port->base.port;
-	u32 signal_levels, mask = 0;
 	u8 train_set = intel_dp->train_set[0];
+	u32 signal_levels;
 
-	if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
-		signal_levels = bxt_signal_levels(intel_dp);
-	} else if (HAS_DDI(dev_priv)) {
-		signal_levels = ddi_signal_levels(intel_dp);
-		mask = DDI_BUF_EMP_MASK;
-	} else if (IS_CHERRYVIEW(dev_priv)) {
-		signal_levels = chv_signal_levels(intel_dp);
-	} else if (IS_VALLEYVIEW(dev_priv)) {
-		signal_levels = vlv_signal_levels(intel_dp);
-	} else if (IS_IVYBRIDGE(dev_priv) && port == PORT_A) {
-		signal_levels = ivb_cpu_edp_signal_levels(train_set);
-		mask = EDP_LINK_TRAIN_VOL_EMP_MASK_IVB;
-	} else if (IS_GEN(dev_priv, 6) && port == PORT_A) {
-		signal_levels = snb_cpu_edp_signal_levels(train_set);
-		mask = EDP_LINK_TRAIN_VOL_EMP_MASK_SNB;
-	} else {
-		signal_levels = g4x_signal_levels(train_set);
-		mask = DP_VOLTAGE_MASK | DP_PRE_EMPHASIS_MASK;
-	}
+	signal_levels = ivb_cpu_edp_signal_levels(train_set);
+
+	drm_dbg_kms(&dev_priv->drm, "Using signal levels %08x\n",
+		    signal_levels);
+
+	intel_dp->DP &= ~EDP_LINK_TRAIN_VOL_EMP_MASK_IVB;
+	intel_dp->DP |= signal_levels;
 
-	if (mask)
-		drm_dbg_kms(&dev_priv->drm, "Using signal levels %08x\n",
-			    signal_levels);
+	intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP);
+	intel_de_posting_read(dev_priv, intel_dp->output_reg);
+}
+
+void intel_dp_set_signal_levels(struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	u8 train_set = intel_dp->train_set[0];
 
 	drm_dbg_kms(&dev_priv->drm, "Using vswing level %d%s\n",
 		    train_set & DP_TRAIN_VOLTAGE_SWING_MASK,
@@ -4189,55 +4382,28 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp)
 		    train_set & DP_TRAIN_MAX_PRE_EMPHASIS_REACHED ?
 		    " (max)" : "");
 
-	intel_dp->DP = (intel_dp->DP & ~mask) | signal_levels;
-
-	intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP);
-	intel_de_posting_read(dev_priv, intel_dp->output_reg);
+	intel_dp->set_signal_levels(intel_dp);
 }
 
 void
 intel_dp_program_link_training_pattern(struct intel_dp *intel_dp,
 				       u8 dp_train_pat)
 {
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_i915_private *dev_priv =
-		to_i915(intel_dig_port->base.base.dev);
+	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	u8 train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd);
 
-	_intel_dp_set_link_train(intel_dp, &intel_dp->DP, dp_train_pat);
+	if (dp_train_pat & train_pat_mask)
+		drm_dbg_kms(&dev_priv->drm,
+			    "Using DP training pattern TPS%d\n",
+			    dp_train_pat & train_pat_mask);
 
-	intel_de_write(dev_priv, intel_dp->output_reg, intel_dp->DP);
-	intel_de_posting_read(dev_priv, intel_dp->output_reg);
+	intel_dp->set_link_train(intel_dp, dp_train_pat);
 }
 
 void intel_dp_set_idle_link_train(struct intel_dp *intel_dp)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	enum port port = intel_dig_port->base.port;
-	u32 val;
-
-	if (!HAS_DDI(dev_priv))
-		return;
-
-	val = intel_de_read(dev_priv, intel_dp->regs.dp_tp_ctl);
-	val &= ~DP_TP_CTL_LINK_TRAIN_MASK;
-	val |= DP_TP_CTL_LINK_TRAIN_IDLE;
-	intel_de_write(dev_priv, intel_dp->regs.dp_tp_ctl, val);
-
-	/*
-	 * Until TGL on PORT_A we can have only eDP in SST mode. There the only
-	 * reason we need to set idle transmission mode is to work around a HW
-	 * issue where we enable the pipe while not in idle link-training mode.
-	 * In this case there is requirement to wait for a minimum number of
-	 * idle patterns to be sent.
-	 */
-	if (port == PORT_A && INTEL_GEN(dev_priv) < 12)
-		return;
-
-	if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status,
-				  DP_TP_STATUS_IDLE_DONE, 1))
-		drm_err(&dev_priv->drm,
-			"Timed out waiting for DP idle patterns\n");
+	if (intel_dp->set_idle_link_train)
+		intel_dp->set_idle_link_train(intel_dp);
 }
 
 static void
@@ -4316,6 +4482,7 @@ intel_dp_link_down(struct intel_encoder *encoder,
 static void
 intel_dp_extended_receiver_capabilities(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u8 dpcd_ext[6];
 
 	/*
@@ -4331,20 +4498,22 @@ intel_dp_extended_receiver_capabilities(struct intel_dp *intel_dp)
 
 	if (drm_dp_dpcd_read(&intel_dp->aux, DP_DP13_DPCD_REV,
 			     &dpcd_ext, sizeof(dpcd_ext)) != sizeof(dpcd_ext)) {
-		DRM_ERROR("DPCD failed read at extended capabilities\n");
+		drm_err(&i915->drm,
+			"DPCD failed read at extended capabilities\n");
 		return;
 	}
 
 	if (intel_dp->dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) {
-		DRM_DEBUG_KMS("DPCD extended DPCD rev less than base DPCD rev\n");
+		drm_dbg_kms(&i915->drm,
+			    "DPCD extended DPCD rev less than base DPCD rev\n");
 		return;
 	}
 
 	if (!memcmp(intel_dp->dpcd, dpcd_ext, sizeof(dpcd_ext)))
 		return;
 
-	DRM_DEBUG_KMS("Base DPCD: %*ph\n",
-		      (int)sizeof(intel_dp->dpcd), intel_dp->dpcd);
+	drm_dbg_kms(&i915->drm, "Base DPCD: %*ph\n",
+		    (int)sizeof(intel_dp->dpcd), intel_dp->dpcd);
 
 	memcpy(intel_dp->dpcd, dpcd_ext, sizeof(dpcd_ext));
 }
@@ -4352,13 +4521,16 @@ intel_dp_extended_receiver_capabilities(struct intel_dp *intel_dp)
 bool
 intel_dp_read_dpcd(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
 	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
 			     sizeof(intel_dp->dpcd)) < 0)
 		return false; /* aux transfer failed */
 
 	intel_dp_extended_receiver_capabilities(intel_dp);
 
-	DRM_DEBUG_KMS("DPCD: %*ph\n", (int) sizeof(intel_dp->dpcd), intel_dp->dpcd);
+	drm_dbg_kms(&i915->drm, "DPCD: %*ph\n", (int)sizeof(intel_dp->dpcd),
+		    intel_dp->dpcd);
 
 	return intel_dp->dpcd[DP_DPCD_REV] != 0;
 }
@@ -4375,6 +4547,8 @@ bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp)
 
 static void intel_dp_get_dsc_sink_cap(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
 	/*
 	 * Clear the cached register set to avoid using stale values
 	 * for the sinks that do not support DSC.
@@ -4390,20 +4564,23 @@ static void intel_dp_get_dsc_sink_cap(struct intel_dp *intel_dp)
 		if (drm_dp_dpcd_read(&intel_dp->aux, DP_DSC_SUPPORT,
 				     intel_dp->dsc_dpcd,
 				     sizeof(intel_dp->dsc_dpcd)) < 0)
-			DRM_ERROR("Failed to read DPCD register 0x%x\n",
-				  DP_DSC_SUPPORT);
+			drm_err(&i915->drm,
+				"Failed to read DPCD register 0x%x\n",
+				DP_DSC_SUPPORT);
 
-		DRM_DEBUG_KMS("DSC DPCD: %*ph\n",
-			      (int)sizeof(intel_dp->dsc_dpcd),
-			      intel_dp->dsc_dpcd);
+		drm_dbg_kms(&i915->drm, "DSC DPCD: %*ph\n",
+			    (int)sizeof(intel_dp->dsc_dpcd),
+			    intel_dp->dsc_dpcd);
 
 		/* FEC is supported only on DP 1.4 */
 		if (!intel_dp_is_edp(intel_dp) &&
 		    drm_dp_dpcd_readb(&intel_dp->aux, DP_FEC_CAPABILITY,
 				      &intel_dp->fec_capable) < 0)
-			DRM_ERROR("Failed to read FEC DPCD register\n");
+			drm_err(&i915->drm,
+				"Failed to read FEC DPCD register\n");
 
-		DRM_DEBUG_KMS("FEC CAPABILITY: %x\n", intel_dp->fec_capable);
+		drm_dbg_kms(&i915->drm, "FEC CAPABILITY: %x\n",
+			    intel_dp->fec_capable);
 	}
 }
 
@@ -4577,14 +4754,16 @@ intel_dp_can_mst(struct intel_dp *intel_dp)
 static void
 intel_dp_configure_mst(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	struct intel_encoder *encoder =
 		&dp_to_dig_port(intel_dp)->base;
 	bool sink_can_mst = intel_dp_sink_can_mst(intel_dp);
 
-	DRM_DEBUG_KMS("[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s\n",
-		      encoder->base.base.id, encoder->base.name,
-		      yesno(intel_dp->can_mst), yesno(sink_can_mst),
-		      yesno(i915_modparams.enable_dp_mst));
+	drm_dbg_kms(&i915->drm,
+		    "[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s\n",
+		    encoder->base.base.id, encoder->base.name,
+		    yesno(intel_dp->can_mst), yesno(sink_can_mst),
+		    yesno(i915_modparams.enable_dp_mst));
 
 	if (!intel_dp->can_mst)
 		return;
@@ -4630,158 +4809,92 @@ intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state,
 	return false;
 }
 
-static void
-intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp,
-		       const struct intel_crtc_state *crtc_state,
-		       const struct drm_connector_state *conn_state)
+static ssize_t intel_dp_vsc_sdp_pack(const struct drm_dp_vsc_sdp *vsc,
+				     struct dp_sdp *sdp, size_t size)
 {
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct dp_sdp vsc_sdp = {};
+	size_t length = sizeof(struct dp_sdp);
 
-	/* Prepare VSC Header for SU as per DP 1.4a spec, Table 2-119 */
-	vsc_sdp.sdp_header.HB0 = 0;
-	vsc_sdp.sdp_header.HB1 = 0x7;
+	if (size < length)
+		return -ENOSPC;
+
+	memset(sdp, 0, size);
 
 	/*
-	 * VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/
-	 * Colorimetry Format indication.
+	 * Prepare VSC Header for SU as per DP 1.4a spec, Table 2-119
+	 * VSC SDP Header Bytes
 	 */
-	vsc_sdp.sdp_header.HB2 = 0x5;
+	sdp->sdp_header.HB0 = 0; /* Secondary-Data Packet ID = 0 */
+	sdp->sdp_header.HB1 = vsc->sdp_type; /* Secondary-data Packet Type */
+	sdp->sdp_header.HB2 = vsc->revision; /* Revision Number */
+	sdp->sdp_header.HB3 = vsc->length; /* Number of Valid Data Bytes */
 
 	/*
-	 * VSC SDP supporting 3D stereo, + PSR2, + Pixel Encoding/
-	 * Colorimetry Format indication (HB2 = 05h).
+	 * Only revision 0x5 supports Pixel Encoding/Colorimetry Format as
+	 * per DP 1.4a spec.
 	 */
-	vsc_sdp.sdp_header.HB3 = 0x13;
-
-	/* DP 1.4a spec, Table 2-120 */
-	switch (crtc_state->output_format) {
-	case INTEL_OUTPUT_FORMAT_YCBCR444:
-		vsc_sdp.db[16] = 0x1 << 4; /* YCbCr 444 : DB16[7:4] = 1h */
-		break;
-	case INTEL_OUTPUT_FORMAT_YCBCR420:
-		vsc_sdp.db[16] = 0x3 << 4; /* YCbCr 420 : DB16[7:4] = 3h */
-		break;
-	case INTEL_OUTPUT_FORMAT_RGB:
-	default:
-		/* RGB: DB16[7:4] = 0h */
-		break;
-	}
+	if (vsc->revision != 0x5)
+		goto out;
 
-	switch (conn_state->colorspace) {
-	case DRM_MODE_COLORIMETRY_BT709_YCC:
-		vsc_sdp.db[16] |= 0x1;
-		break;
-	case DRM_MODE_COLORIMETRY_XVYCC_601:
-		vsc_sdp.db[16] |= 0x2;
-		break;
-	case DRM_MODE_COLORIMETRY_XVYCC_709:
-		vsc_sdp.db[16] |= 0x3;
-		break;
-	case DRM_MODE_COLORIMETRY_SYCC_601:
-		vsc_sdp.db[16] |= 0x4;
-		break;
-	case DRM_MODE_COLORIMETRY_OPYCC_601:
-		vsc_sdp.db[16] |= 0x5;
-		break;
-	case DRM_MODE_COLORIMETRY_BT2020_CYCC:
-	case DRM_MODE_COLORIMETRY_BT2020_RGB:
-		vsc_sdp.db[16] |= 0x6;
-		break;
-	case DRM_MODE_COLORIMETRY_BT2020_YCC:
-		vsc_sdp.db[16] |= 0x7;
-		break;
-	case DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65:
-	case DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER:
-		vsc_sdp.db[16] |= 0x4; /* DCI-P3 (SMPTE RP 431-2) */
-		break;
-	default:
-		/* sRGB (IEC 61966-2-1) / ITU-R BT.601: DB16[0:3] = 0h */
+	/* VSC SDP Payload for DB16 through DB18 */
+	/* Pixel Encoding and Colorimetry Formats  */
+	sdp->db[16] = (vsc->pixelformat & 0xf) << 4; /* DB16[7:4] */
+	sdp->db[16] |= vsc->colorimetry & 0xf; /* DB16[3:0] */
 
-		/* RGB->YCBCR color conversion uses the BT.709 color space. */
-		if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
-			vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */
+	switch (vsc->bpc) {
+	case 6:
+		/* 6bpc: 0x0 */
 		break;
-	}
-
-	/*
-	 * For pixel encoding formats YCbCr444, YCbCr422, YCbCr420, and Y Only,
-	 * the following Component Bit Depth values are defined:
-	 * 001b = 8bpc.
-	 * 010b = 10bpc.
-	 * 011b = 12bpc.
-	 * 100b = 16bpc.
-	 */
-	switch (crtc_state->pipe_bpp) {
-	case 24: /* 8bpc */
-		vsc_sdp.db[17] = 0x1;
+	case 8:
+		sdp->db[17] = 0x1; /* DB17[3:0] */
 		break;
-	case 30: /* 10bpc */
-		vsc_sdp.db[17] = 0x2;
+	case 10:
+		sdp->db[17] = 0x2;
 		break;
-	case 36: /* 12bpc */
-		vsc_sdp.db[17] = 0x3;
+	case 12:
+		sdp->db[17] = 0x3;
 		break;
-	case 48: /* 16bpc */
-		vsc_sdp.db[17] = 0x4;
+	case 16:
+		sdp->db[17] = 0x4;
 		break;
 	default:
-		MISSING_CASE(crtc_state->pipe_bpp);
+		MISSING_CASE(vsc->bpc);
 		break;
 	}
+	/* Dynamic Range and Component Bit Depth */
+	if (vsc->dynamic_range == DP_DYNAMIC_RANGE_CTA)
+		sdp->db[17] |= 0x80;  /* DB17[7] */
 
-	/*
-	 * Dynamic Range (Bit 7)
-	 * 0 = VESA range, 1 = CTA range.
-	 * all YCbCr are always limited range
-	 */
-	vsc_sdp.db[17] |= 0x80;
-
-	/*
-	 * Content Type (Bits 2:0)
-	 * 000b = Not defined.
-	 * 001b = Graphics.
-	 * 010b = Photo.
-	 * 011b = Video.
-	 * 100b = Game
-	 * All other values are RESERVED.
-	 * Note: See CTA-861-G for the definition and expected
-	 * processing by a stream sink for the above contect types.
-	 */
-	vsc_sdp.db[18] = 0;
+	/* Content Type */
+	sdp->db[18] = vsc->content_type & 0x7;
 
-	intel_dig_port->write_infoframe(&intel_dig_port->base,
-			crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp));
+out:
+	return length;
 }
 
-static void
-intel_dp_setup_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp,
-					  const struct intel_crtc_state *crtc_state,
-					  const struct drm_connector_state *conn_state)
+static ssize_t
+intel_dp_hdr_metadata_infoframe_sdp_pack(const struct hdmi_drm_infoframe *drm_infoframe,
+					 struct dp_sdp *sdp,
+					 size_t size)
 {
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct dp_sdp infoframe_sdp = {};
-	struct hdmi_drm_infoframe drm_infoframe = {};
+	size_t length = sizeof(struct dp_sdp);
 	const int infoframe_size = HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE;
 	unsigned char buf[HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE];
 	ssize_t len;
-	int ret;
 
-	ret = drm_hdmi_infoframe_set_hdr_metadata(&drm_infoframe, conn_state);
-	if (ret) {
-		DRM_DEBUG_KMS("couldn't set HDR metadata in infoframe\n");
-		return;
-	}
+	if (size < length)
+		return -ENOSPC;
+
+	memset(sdp, 0, size);
 
-	len = hdmi_drm_infoframe_pack_only(&drm_infoframe, buf, sizeof(buf));
+	len = hdmi_drm_infoframe_pack_only(drm_infoframe, buf, sizeof(buf));
 	if (len < 0) {
 		DRM_DEBUG_KMS("buffer size is smaller than hdr metadata infoframe\n");
-		return;
+		return -ENOSPC;
 	}
 
 	if (len != infoframe_size) {
 		DRM_DEBUG_KMS("wrong static hdr metadata size\n");
-		return;
+		return -ENOSPC;
 	}
 
 	/*
@@ -4790,34 +4903,37 @@ intel_dp_setup_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp,
 	 * Table 2-100 and Table 2-101
 	 */
 
-	/* Packet ID, 00h for non-Audio INFOFRAME */
-	infoframe_sdp.sdp_header.HB0 = 0;
+	/* Secondary-Data Packet ID, 00h for non-Audio INFOFRAME */
+	sdp->sdp_header.HB0 = 0;
 	/*
 	 * Packet Type 80h + Non-audio INFOFRAME Type value
-	 * HDMI_INFOFRAME_TYPE_DRM: 0x87,
+	 * HDMI_INFOFRAME_TYPE_DRM: 0x87
+	 * - 80h + Non-audio INFOFRAME Type value
+	 * - InfoFrame Type: 0x07
+	 *    [CTA-861-G Table-42 Dynamic Range and Mastering InfoFrame]
 	 */
-	infoframe_sdp.sdp_header.HB1 = drm_infoframe.type;
+	sdp->sdp_header.HB1 = drm_infoframe->type;
 	/*
 	 * Least Significant Eight Bits of (Data Byte Count – 1)
-	 * infoframe_size - 1,
+	 * infoframe_size - 1
 	 */
-	infoframe_sdp.sdp_header.HB2 = 0x1D;
+	sdp->sdp_header.HB2 = 0x1D;
 	/* INFOFRAME SDP Version Number */
-	infoframe_sdp.sdp_header.HB3 = (0x13 << 2);
+	sdp->sdp_header.HB3 = (0x13 << 2);
 	/* CTA Header Byte 2 (INFOFRAME Version Number) */
-	infoframe_sdp.db[0] = drm_infoframe.version;
+	sdp->db[0] = drm_infoframe->version;
 	/* CTA Header Byte 3 (Length of INFOFRAME): HDMI_DRM_INFOFRAME_SIZE */
-	infoframe_sdp.db[1] = drm_infoframe.length;
+	sdp->db[1] = drm_infoframe->length;
 	/*
 	 * Copy HDMI_DRM_INFOFRAME_SIZE size from a buffer after
 	 * HDMI_INFOFRAME_HEADER_SIZE
 	 */
-	BUILD_BUG_ON(sizeof(infoframe_sdp.db) < HDMI_DRM_INFOFRAME_SIZE + 2);
-	memcpy(&infoframe_sdp.db[2], &buf[HDMI_INFOFRAME_HEADER_SIZE],
+	BUILD_BUG_ON(sizeof(sdp->db) < HDMI_DRM_INFOFRAME_SIZE + 2);
+	memcpy(&sdp->db[2], &buf[HDMI_INFOFRAME_HEADER_SIZE],
 	       HDMI_DRM_INFOFRAME_SIZE);
 
 	/*
-	 * Size of DP infoframe sdp packet for HDR static metadata is consist of
+	 * Size of DP infoframe sdp packet for HDR static metadata consists of
 	 * - DP SDP Header(struct dp_sdp_header): 4 bytes
 	 * - Two Data Blocks: 2 bytes
 	 *    CTA Header Byte2 (INFOFRAME Version Number)
@@ -4828,36 +4944,286 @@ intel_dp_setup_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp,
 	 * infoframe size. But GEN11+ has larger than that size, write_infoframe
 	 * will pad rest of the size.
 	 */
-	intel_dig_port->write_infoframe(&intel_dig_port->base, crtc_state,
-					HDMI_PACKET_TYPE_GAMUT_METADATA,
-					&infoframe_sdp,
-					sizeof(struct dp_sdp_header) + 2 + HDMI_DRM_INFOFRAME_SIZE);
+	return sizeof(struct dp_sdp_header) + 2 + HDMI_DRM_INFOFRAME_SIZE;
 }
 
-void intel_dp_vsc_enable(struct intel_dp *intel_dp,
-			 const struct intel_crtc_state *crtc_state,
-			 const struct drm_connector_state *conn_state)
+static void intel_write_dp_sdp(struct intel_encoder *encoder,
+			       const struct intel_crtc_state *crtc_state,
+			       unsigned int type)
 {
-	if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state))
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct dp_sdp sdp = {};
+	ssize_t len;
+
+	if ((crtc_state->infoframes.enable &
+	     intel_hdmi_infoframe_enable(type)) == 0)
 		return;
 
-	intel_dp_setup_vsc_sdp(intel_dp, crtc_state, conn_state);
+	switch (type) {
+	case DP_SDP_VSC:
+		len = intel_dp_vsc_sdp_pack(&crtc_state->infoframes.vsc, &sdp,
+					    sizeof(sdp));
+		break;
+	case HDMI_PACKET_TYPE_GAMUT_METADATA:
+		len = intel_dp_hdr_metadata_infoframe_sdp_pack(&crtc_state->infoframes.drm.drm,
+							       &sdp, sizeof(sdp));
+		break;
+	default:
+		MISSING_CASE(type);
+		return;
+	}
+
+	if (drm_WARN_ON(&dev_priv->drm, len < 0))
+		return;
+
+	intel_dig_port->write_infoframe(encoder, crtc_state, type, &sdp, len);
 }
 
-void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp,
-				  const struct intel_crtc_state *crtc_state,
-				  const struct drm_connector_state *conn_state)
+void intel_write_dp_vsc_sdp(struct intel_encoder *encoder,
+			    const struct intel_crtc_state *crtc_state,
+			    struct drm_dp_vsc_sdp *vsc)
 {
-	if (!conn_state->hdr_output_metadata)
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct dp_sdp sdp = {};
+	ssize_t len;
+
+	len = intel_dp_vsc_sdp_pack(vsc, &sdp, sizeof(sdp));
+
+	if (drm_WARN_ON(&dev_priv->drm, len < 0))
+		return;
+
+	intel_dig_port->write_infoframe(encoder, crtc_state, DP_SDP_VSC,
+					&sdp, len);
+}
+
+void intel_dp_set_infoframes(struct intel_encoder *encoder,
+			     bool enable,
+			     const struct intel_crtc_state *crtc_state,
+			     const struct drm_connector_state *conn_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	i915_reg_t reg = HSW_TVIDEO_DIP_CTL(crtc_state->cpu_transcoder);
+	u32 dip_enable = VIDEO_DIP_ENABLE_AVI_HSW | VIDEO_DIP_ENABLE_GCP_HSW |
+			 VIDEO_DIP_ENABLE_VS_HSW | VIDEO_DIP_ENABLE_GMP_HSW |
+			 VIDEO_DIP_ENABLE_SPD_HSW | VIDEO_DIP_ENABLE_DRM_GLK;
+	u32 val = intel_de_read(dev_priv, reg);
+
+	/* TODO: Add DSC case (DIP_ENABLE_PPS) */
+	/* When PSR is enabled, this routine doesn't disable VSC DIP */
+	if (intel_psr_enabled(intel_dp))
+		val &= ~dip_enable;
+	else
+		val &= ~(dip_enable | VIDEO_DIP_ENABLE_VSC_HSW);
+
+	if (!enable) {
+		intel_de_write(dev_priv, reg, val);
+		intel_de_posting_read(dev_priv, reg);
+		return;
+	}
+
+	intel_de_write(dev_priv, reg, val);
+	intel_de_posting_read(dev_priv, reg);
+
+	/* When PSR is enabled, VSC SDP is handled by PSR routine */
+	if (!intel_psr_enabled(intel_dp))
+		intel_write_dp_sdp(encoder, crtc_state, DP_SDP_VSC);
+
+	intel_write_dp_sdp(encoder, crtc_state, HDMI_PACKET_TYPE_GAMUT_METADATA);
+}
+
+static int intel_dp_vsc_sdp_unpack(struct drm_dp_vsc_sdp *vsc,
+				   const void *buffer, size_t size)
+{
+	const struct dp_sdp *sdp = buffer;
+
+	if (size < sizeof(struct dp_sdp))
+		return -EINVAL;
+
+	memset(vsc, 0, size);
+
+	if (sdp->sdp_header.HB0 != 0)
+		return -EINVAL;
+
+	if (sdp->sdp_header.HB1 != DP_SDP_VSC)
+		return -EINVAL;
+
+	vsc->sdp_type = sdp->sdp_header.HB1;
+	vsc->revision = sdp->sdp_header.HB2;
+	vsc->length = sdp->sdp_header.HB3;
+
+	if ((sdp->sdp_header.HB2 == 0x2 && sdp->sdp_header.HB3 == 0x8) ||
+	    (sdp->sdp_header.HB2 == 0x4 && sdp->sdp_header.HB3 == 0xe)) {
+		/*
+		 * - HB2 = 0x2, HB3 = 0x8
+		 *   VSC SDP supporting 3D stereo + PSR
+		 * - HB2 = 0x4, HB3 = 0xe
+		 *   VSC SDP supporting 3D stereo + PSR2 with Y-coordinate of
+		 *   first scan line of the SU region (applies to eDP v1.4b
+		 *   and higher).
+		 */
+		return 0;
+	} else if (sdp->sdp_header.HB2 == 0x5 && sdp->sdp_header.HB3 == 0x13) {
+		/*
+		 * - HB2 = 0x5, HB3 = 0x13
+		 *   VSC SDP supporting 3D stereo + PSR2 + Pixel Encoding/Colorimetry
+		 *   Format.
+		 */
+		vsc->pixelformat = (sdp->db[16] >> 4) & 0xf;
+		vsc->colorimetry = sdp->db[16] & 0xf;
+		vsc->dynamic_range = (sdp->db[17] >> 7) & 0x1;
+
+		switch (sdp->db[17] & 0x7) {
+		case 0x0:
+			vsc->bpc = 6;
+			break;
+		case 0x1:
+			vsc->bpc = 8;
+			break;
+		case 0x2:
+			vsc->bpc = 10;
+			break;
+		case 0x3:
+			vsc->bpc = 12;
+			break;
+		case 0x4:
+			vsc->bpc = 16;
+			break;
+		default:
+			MISSING_CASE(sdp->db[17] & 0x7);
+			return -EINVAL;
+		}
+
+		vsc->content_type = sdp->db[18] & 0x7;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+intel_dp_hdr_metadata_infoframe_sdp_unpack(struct hdmi_drm_infoframe *drm_infoframe,
+					   const void *buffer, size_t size)
+{
+	int ret;
+
+	const struct dp_sdp *sdp = buffer;
+
+	if (size < sizeof(struct dp_sdp))
+		return -EINVAL;
+
+	if (sdp->sdp_header.HB0 != 0)
+		return -EINVAL;
+
+	if (sdp->sdp_header.HB1 != HDMI_INFOFRAME_TYPE_DRM)
+		return -EINVAL;
+
+	/*
+	 * Least Significant Eight Bits of (Data Byte Count – 1)
+	 * 1Dh (i.e., Data Byte Count = 30 bytes).
+	 */
+	if (sdp->sdp_header.HB2 != 0x1D)
+		return -EINVAL;
+
+	/* Most Significant Two Bits of (Data Byte Count – 1), Clear to 00b. */
+	if ((sdp->sdp_header.HB3 & 0x3) != 0)
+		return -EINVAL;
+
+	/* INFOFRAME SDP Version Number */
+	if (((sdp->sdp_header.HB3 >> 2) & 0x3f) != 0x13)
+		return -EINVAL;
+
+	/* CTA Header Byte 2 (INFOFRAME Version Number) */
+	if (sdp->db[0] != 1)
+		return -EINVAL;
+
+	/* CTA Header Byte 3 (Length of INFOFRAME): HDMI_DRM_INFOFRAME_SIZE */
+	if (sdp->db[1] != HDMI_DRM_INFOFRAME_SIZE)
+		return -EINVAL;
+
+	ret = hdmi_drm_infoframe_unpack_only(drm_infoframe, &sdp->db[2],
+					     HDMI_DRM_INFOFRAME_SIZE);
+
+	return ret;
+}
+
+static void intel_read_dp_vsc_sdp(struct intel_encoder *encoder,
+				  struct intel_crtc_state *crtc_state,
+				  struct drm_dp_vsc_sdp *vsc)
+{
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	unsigned int type = DP_SDP_VSC;
+	struct dp_sdp sdp = {};
+	int ret;
+
+	/* When PSR is enabled, VSC SDP is handled by PSR routine */
+	if (intel_psr_enabled(intel_dp))
+		return;
+
+	if ((crtc_state->infoframes.enable &
+	     intel_hdmi_infoframe_enable(type)) == 0)
+		return;
+
+	intel_dig_port->read_infoframe(encoder, crtc_state, type, &sdp, sizeof(sdp));
+
+	ret = intel_dp_vsc_sdp_unpack(vsc, &sdp, sizeof(sdp));
+
+	if (ret)
+		drm_dbg_kms(&dev_priv->drm, "Failed to unpack DP VSC SDP\n");
+}
+
+static void intel_read_dp_hdr_metadata_infoframe_sdp(struct intel_encoder *encoder,
+						     struct intel_crtc_state *crtc_state,
+						     struct hdmi_drm_infoframe *drm_infoframe)
+{
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	unsigned int type = HDMI_PACKET_TYPE_GAMUT_METADATA;
+	struct dp_sdp sdp = {};
+	int ret;
+
+	if ((crtc_state->infoframes.enable &
+	    intel_hdmi_infoframe_enable(type)) == 0)
 		return;
 
-	intel_dp_setup_hdr_metadata_infoframe_sdp(intel_dp,
-						  crtc_state,
-						  conn_state);
+	intel_dig_port->read_infoframe(encoder, crtc_state, type, &sdp,
+				       sizeof(sdp));
+
+	ret = intel_dp_hdr_metadata_infoframe_sdp_unpack(drm_infoframe, &sdp,
+							 sizeof(sdp));
+
+	if (ret)
+		drm_dbg_kms(&dev_priv->drm,
+			    "Failed to unpack DP HDR Metadata Infoframe SDP\n");
+}
+
+void intel_read_dp_sdp(struct intel_encoder *encoder,
+		       struct intel_crtc_state *crtc_state,
+		       unsigned int type)
+{
+	switch (type) {
+	case DP_SDP_VSC:
+		intel_read_dp_vsc_sdp(encoder, crtc_state,
+				      &crtc_state->infoframes.vsc);
+		break;
+	case HDMI_PACKET_TYPE_GAMUT_METADATA:
+		intel_read_dp_hdr_metadata_infoframe_sdp(encoder, crtc_state,
+							 &crtc_state->infoframes.drm.drm);
+		break;
+	default:
+		MISSING_CASE(type);
+		break;
+	}
 }
 
 static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	int status = 0;
 	int test_link_rate;
 	u8 test_lane_count, test_link_bw;
@@ -4869,7 +5235,7 @@ static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 				   &test_lane_count);
 
 	if (status <= 0) {
-		DRM_DEBUG_KMS("Lane count read failed\n");
+		drm_dbg_kms(&i915->drm, "Lane count read failed\n");
 		return DP_TEST_NAK;
 	}
 	test_lane_count &= DP_MAX_LANE_COUNT_MASK;
@@ -4877,7 +5243,7 @@ static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 	status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE,
 				   &test_link_bw);
 	if (status <= 0) {
-		DRM_DEBUG_KMS("Link Rate read failed\n");
+		drm_dbg_kms(&i915->drm, "Link Rate read failed\n");
 		return DP_TEST_NAK;
 	}
 	test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw);
@@ -4895,6 +5261,7 @@ static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 
 static u8 intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u8 test_pattern;
 	u8 test_misc;
 	__be16 h_width, v_height;
@@ -4904,7 +5271,7 @@ static u8 intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
 	status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_PATTERN,
 				   &test_pattern);
 	if (status <= 0) {
-		DRM_DEBUG_KMS("Test pattern read failed\n");
+		drm_dbg_kms(&i915->drm, "Test pattern read failed\n");
 		return DP_TEST_NAK;
 	}
 	if (test_pattern != DP_COLOR_RAMP)
@@ -4913,21 +5280,21 @@ static u8 intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
 	status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_H_WIDTH_HI,
 				  &h_width, 2);
 	if (status <= 0) {
-		DRM_DEBUG_KMS("H Width read failed\n");
+		drm_dbg_kms(&i915->drm, "H Width read failed\n");
 		return DP_TEST_NAK;
 	}
 
 	status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_V_HEIGHT_HI,
 				  &v_height, 2);
 	if (status <= 0) {
-		DRM_DEBUG_KMS("V Height read failed\n");
+		drm_dbg_kms(&i915->drm, "V Height read failed\n");
 		return DP_TEST_NAK;
 	}
 
 	status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_MISC0,
 				   &test_misc);
 	if (status <= 0) {
-		DRM_DEBUG_KMS("TEST MISC read failed\n");
+		drm_dbg_kms(&i915->drm, "TEST MISC read failed\n");
 		return DP_TEST_NAK;
 	}
 	if ((test_misc & DP_TEST_COLOR_FORMAT_MASK) != DP_COLOR_FORMAT_RGB)
@@ -4956,6 +5323,7 @@ static u8 intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
 
 static u8 intel_dp_autotest_edid(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u8 test_result = DP_TEST_ACK;
 	struct intel_connector *intel_connector = intel_dp->attached_connector;
 	struct drm_connector *connector = &intel_connector->base;
@@ -4972,9 +5340,10 @@ static u8 intel_dp_autotest_edid(struct intel_dp *intel_dp)
 		 */
 		if (intel_dp->aux.i2c_nack_count > 0 ||
 			intel_dp->aux.i2c_defer_count > 0)
-			DRM_DEBUG_KMS("EDID read had %d NACKs, %d DEFERs\n",
-				      intel_dp->aux.i2c_nack_count,
-				      intel_dp->aux.i2c_defer_count);
+			drm_dbg_kms(&i915->drm,
+				    "EDID read had %d NACKs, %d DEFERs\n",
+				    intel_dp->aux.i2c_nack_count,
+				    intel_dp->aux.i2c_defer_count);
 		intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_FAILSAFE;
 	} else {
 		struct edid *block = intel_connector->detect_edid;
@@ -4986,7 +5355,8 @@ static u8 intel_dp_autotest_edid(struct intel_dp *intel_dp)
 
 		if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_EDID_CHECKSUM,
 				       block->checksum) <= 0)
-			DRM_DEBUG_KMS("Failed to write EDID checksum\n");
+			drm_dbg_kms(&i915->drm,
+				    "Failed to write EDID checksum\n");
 
 		test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE;
 		intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_PREFERRED;
@@ -4998,43 +5368,217 @@ static u8 intel_dp_autotest_edid(struct intel_dp *intel_dp)
 	return test_result;
 }
 
+static u8 intel_dp_prepare_phytest(struct intel_dp *intel_dp)
+{
+	struct drm_dp_phy_test_params *data =
+		&intel_dp->compliance.test_data.phytest;
+
+	if (drm_dp_get_phy_test_pattern(&intel_dp->aux, data)) {
+		DRM_DEBUG_KMS("DP Phy Test pattern AUX read failure\n");
+		return DP_TEST_NAK;
+	}
+
+	/*
+	 * link_mst is set to false to avoid executing mst related code
+	 * during compliance testing.
+	 */
+	intel_dp->link_mst = false;
+
+	return DP_TEST_ACK;
+}
+
+static void intel_dp_phy_pattern_update(struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *dev_priv =
+			to_i915(dp_to_dig_port(intel_dp)->base.base.dev);
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_dp_phy_test_params *data =
+			&intel_dp->compliance.test_data.phytest;
+	struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
+	enum pipe pipe = crtc->pipe;
+	u32 pattern_val;
+
+	switch (data->phy_pattern) {
+	case DP_PHY_TEST_PATTERN_NONE:
+		DRM_DEBUG_KMS("Disable Phy Test Pattern\n");
+		intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe), 0x0);
+		break;
+	case DP_PHY_TEST_PATTERN_D10_2:
+		DRM_DEBUG_KMS("Set D10.2 Phy Test Pattern\n");
+		intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe),
+			       DDI_DP_COMP_CTL_ENABLE | DDI_DP_COMP_CTL_D10_2);
+		break;
+	case DP_PHY_TEST_PATTERN_ERROR_COUNT:
+		DRM_DEBUG_KMS("Set Error Count Phy Test Pattern\n");
+		intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe),
+			       DDI_DP_COMP_CTL_ENABLE |
+			       DDI_DP_COMP_CTL_SCRAMBLED_0);
+		break;
+	case DP_PHY_TEST_PATTERN_PRBS7:
+		DRM_DEBUG_KMS("Set PRBS7 Phy Test Pattern\n");
+		intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe),
+			       DDI_DP_COMP_CTL_ENABLE | DDI_DP_COMP_CTL_PRBS7);
+		break;
+	case DP_PHY_TEST_PATTERN_80BIT_CUSTOM:
+		/*
+		 * FIXME: Ideally pattern should come from DPCD 0x250. As
+		 * current firmware of DPR-100 could not set it, so hardcoding
+		 * now for complaince test.
+		 */
+		DRM_DEBUG_KMS("Set 80Bit Custom Phy Test Pattern 0x3e0f83e0 0x0f83e0f8 0x0000f83e\n");
+		pattern_val = 0x3e0f83e0;
+		intel_de_write(dev_priv, DDI_DP_COMP_PAT(pipe, 0), pattern_val);
+		pattern_val = 0x0f83e0f8;
+		intel_de_write(dev_priv, DDI_DP_COMP_PAT(pipe, 1), pattern_val);
+		pattern_val = 0x0000f83e;
+		intel_de_write(dev_priv, DDI_DP_COMP_PAT(pipe, 2), pattern_val);
+		intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe),
+			       DDI_DP_COMP_CTL_ENABLE |
+			       DDI_DP_COMP_CTL_CUSTOM80);
+		break;
+	case DP_PHY_TEST_PATTERN_CP2520:
+		/*
+		 * FIXME: Ideally pattern should come from DPCD 0x24A. As
+		 * current firmware of DPR-100 could not set it, so hardcoding
+		 * now for complaince test.
+		 */
+		DRM_DEBUG_KMS("Set HBR2 compliance Phy Test Pattern\n");
+		pattern_val = 0xFB;
+		intel_de_write(dev_priv, DDI_DP_COMP_CTL(pipe),
+			       DDI_DP_COMP_CTL_ENABLE | DDI_DP_COMP_CTL_HBR2 |
+			       pattern_val);
+		break;
+	default:
+		WARN(1, "Invalid Phy Test Pattern\n");
+	}
+}
+
+static void
+intel_dp_autotest_phy_ddi_disable(struct intel_dp *intel_dp)
+{
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = intel_dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
+	enum pipe pipe = crtc->pipe;
+	u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value;
+
+	trans_ddi_func_ctl_value = intel_de_read(dev_priv,
+						 TRANS_DDI_FUNC_CTL(pipe));
+	trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe));
+	dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe));
+
+	trans_ddi_func_ctl_value &= ~(TRANS_DDI_FUNC_ENABLE |
+				      TGL_TRANS_DDI_PORT_MASK);
+	trans_conf_value &= ~PIPECONF_ENABLE;
+	dp_tp_ctl_value &= ~DP_TP_CTL_ENABLE;
+
+	intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value);
+	intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe),
+		       trans_ddi_func_ctl_value);
+	intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value);
+}
+
+static void
+intel_dp_autotest_phy_ddi_enable(struct intel_dp *intel_dp, uint8_t lane_cnt)
+{
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = intel_dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	enum port port = intel_dig_port->base.port;
+	struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
+	enum pipe pipe = crtc->pipe;
+	u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value;
+
+	trans_ddi_func_ctl_value = intel_de_read(dev_priv,
+						 TRANS_DDI_FUNC_CTL(pipe));
+	trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe));
+	dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe));
+
+	trans_ddi_func_ctl_value |= TRANS_DDI_FUNC_ENABLE |
+				    TGL_TRANS_DDI_SELECT_PORT(port);
+	trans_conf_value |= PIPECONF_ENABLE;
+	dp_tp_ctl_value |= DP_TP_CTL_ENABLE;
+
+	intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value);
+	intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value);
+	intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe),
+		       trans_ddi_func_ctl_value);
+}
+
+void intel_dp_process_phy_request(struct intel_dp *intel_dp)
+{
+	struct drm_dp_phy_test_params *data =
+		&intel_dp->compliance.test_data.phytest;
+	u8 link_status[DP_LINK_STATUS_SIZE];
+
+	if (!intel_dp_get_link_status(intel_dp, link_status)) {
+		DRM_DEBUG_KMS("failed to get link status\n");
+		return;
+	}
+
+	/* retrieve vswing & pre-emphasis setting */
+	intel_dp_get_adjust_train(intel_dp, link_status);
+
+	intel_dp_autotest_phy_ddi_disable(intel_dp);
+
+	intel_dp_set_signal_levels(intel_dp);
+
+	intel_dp_phy_pattern_update(intel_dp);
+
+	intel_dp_autotest_phy_ddi_enable(intel_dp, data->num_lanes);
+
+	drm_dp_set_phy_test_pattern(&intel_dp->aux, data,
+				    link_status[DP_DPCD_REV]);
+}
+
 static u8 intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp)
 {
-	u8 test_result = DP_TEST_NAK;
+	u8 test_result;
+
+	test_result = intel_dp_prepare_phytest(intel_dp);
+	if (test_result != DP_TEST_ACK)
+		DRM_ERROR("Phy test preparation failed\n");
+
+	intel_dp_process_phy_request(intel_dp);
+
 	return test_result;
 }
 
 static void intel_dp_handle_test_request(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u8 response = DP_TEST_NAK;
 	u8 request = 0;
 	int status;
 
 	status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_REQUEST, &request);
 	if (status <= 0) {
-		DRM_DEBUG_KMS("Could not read test request from sink\n");
+		drm_dbg_kms(&i915->drm,
+			    "Could not read test request from sink\n");
 		goto update_status;
 	}
 
 	switch (request) {
 	case DP_TEST_LINK_TRAINING:
-		DRM_DEBUG_KMS("LINK_TRAINING test requested\n");
+		drm_dbg_kms(&i915->drm, "LINK_TRAINING test requested\n");
 		response = intel_dp_autotest_link_training(intel_dp);
 		break;
 	case DP_TEST_LINK_VIDEO_PATTERN:
-		DRM_DEBUG_KMS("TEST_PATTERN test requested\n");
+		drm_dbg_kms(&i915->drm, "TEST_PATTERN test requested\n");
 		response = intel_dp_autotest_video_pattern(intel_dp);
 		break;
 	case DP_TEST_LINK_EDID_READ:
-		DRM_DEBUG_KMS("EDID test requested\n");
+		drm_dbg_kms(&i915->drm, "EDID test requested\n");
 		response = intel_dp_autotest_edid(intel_dp);
 		break;
 	case DP_TEST_LINK_PHY_TEST_PATTERN:
-		DRM_DEBUG_KMS("PHY_PATTERN test requested\n");
+		drm_dbg_kms(&i915->drm, "PHY_PATTERN test requested\n");
 		response = intel_dp_autotest_phy_pattern(intel_dp);
 		break;
 	default:
-		DRM_DEBUG_KMS("Invalid test request '%02x'\n", request);
+		drm_dbg_kms(&i915->drm, "Invalid test request '%02x'\n",
+			    request);
 		break;
 	}
 
@@ -5044,64 +5588,59 @@ static void intel_dp_handle_test_request(struct intel_dp *intel_dp)
 update_status:
 	status = drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_RESPONSE, response);
 	if (status <= 0)
-		DRM_DEBUG_KMS("Could not write test response to sink\n");
+		drm_dbg_kms(&i915->drm,
+			    "Could not write test response to sink\n");
 }
 
 static int
 intel_dp_check_mst_status(struct intel_dp *intel_dp)
 {
-	bool bret;
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	bool need_retrain = false;
 
-	if (intel_dp->is_mst) {
-		u8 esi[DP_DPRX_ESI_LEN] = { 0 };
-		int ret = 0;
+	if (!intel_dp->is_mst)
+		return -EINVAL;
+
+	WARN_ON_ONCE(intel_dp->active_mst_links < 0);
+
+	for (;;) {
+		u8 esi[DP_DPRX_ESI_LEN] = {};
+		bool bret, handled;
 		int retry;
-		bool handled;
 
-		WARN_ON_ONCE(intel_dp->active_mst_links < 0);
 		bret = intel_dp_get_sink_irq_esi(intel_dp, esi);
-go_again:
-		if (bret == true) {
-
-			/* check link status - esi[10] = 0x200c */
-			if (intel_dp->active_mst_links > 0 &&
-			    !drm_dp_channel_eq_ok(&esi[10], intel_dp->lane_count)) {
-				DRM_DEBUG_KMS("channel EQ not ok, retraining\n");
-				intel_dp_start_link_train(intel_dp);
-				intel_dp_stop_link_train(intel_dp);
-			}
+		if (!bret) {
+			drm_dbg_kms(&i915->drm,
+				    "failed to get ESI - device may have failed\n");
+			return -EINVAL;
+		}
 
-			DRM_DEBUG_KMS("got esi %3ph\n", esi);
-			ret = drm_dp_mst_hpd_irq(&intel_dp->mst_mgr, esi, &handled);
-
-			if (handled) {
-				for (retry = 0; retry < 3; retry++) {
-					int wret;
-					wret = drm_dp_dpcd_write(&intel_dp->aux,
-								 DP_SINK_COUNT_ESI+1,
-								 &esi[1], 3);
-					if (wret == 3) {
-						break;
-					}
-				}
+		/* check link status - esi[10] = 0x200c */
+		if (intel_dp->active_mst_links > 0 && !need_retrain &&
+		    !drm_dp_channel_eq_ok(&esi[10], intel_dp->lane_count)) {
+			drm_dbg_kms(&i915->drm,
+				    "channel EQ not ok, retraining\n");
+			need_retrain = true;
+		}
 
-				bret = intel_dp_get_sink_irq_esi(intel_dp, esi);
-				if (bret == true) {
-					DRM_DEBUG_KMS("got esi2 %3ph\n", esi);
-					goto go_again;
-				}
-			} else
-				ret = 0;
+		drm_dbg_kms(&i915->drm, "got esi %3ph\n", esi);
 
-			return ret;
-		} else {
-			DRM_DEBUG_KMS("failed to get ESI - device may have failed\n");
-			intel_dp->is_mst = false;
-			drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr,
-							intel_dp->is_mst);
+		drm_dp_mst_hpd_irq(&intel_dp->mst_mgr, esi, &handled);
+		if (!handled)
+			break;
+
+		for (retry = 0; retry < 3; retry++) {
+			int wret;
+
+			wret = drm_dp_dpcd_write(&intel_dp->aux,
+						 DP_SINK_COUNT_ESI+1,
+						 &esi[1], 3);
+			if (wret == 3)
+				break;
 		}
 	}
-	return -EINVAL;
+
+	return need_retrain;
 }
 
 static bool
@@ -5138,20 +5677,102 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
 	return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count);
 }
 
+static bool intel_dp_has_connector(struct intel_dp *intel_dp,
+				   const struct drm_connector_state *conn_state)
+{
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	struct intel_encoder *encoder;
+	enum pipe pipe;
+
+	if (!conn_state->best_encoder)
+		return false;
+
+	/* SST */
+	encoder = &dp_to_dig_port(intel_dp)->base;
+	if (conn_state->best_encoder == &encoder->base)
+		return true;
+
+	/* MST */
+	for_each_pipe(i915, pipe) {
+		encoder = &intel_dp->mst_encoders[pipe]->base;
+		if (conn_state->best_encoder == &encoder->base)
+			return true;
+	}
+
+	return false;
+}
+
+static int intel_dp_prep_link_retrain(struct intel_dp *intel_dp,
+				      struct drm_modeset_acquire_ctx *ctx,
+				      u32 *crtc_mask)
+{
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	struct drm_connector_list_iter conn_iter;
+	struct intel_connector *connector;
+	int ret = 0;
+
+	*crtc_mask = 0;
+
+	if (!intel_dp_needs_link_retrain(intel_dp))
+		return 0;
+
+	drm_connector_list_iter_begin(&i915->drm, &conn_iter);
+	for_each_intel_connector_iter(connector, &conn_iter) {
+		struct drm_connector_state *conn_state =
+			connector->base.state;
+		struct intel_crtc_state *crtc_state;
+		struct intel_crtc *crtc;
+
+		if (!intel_dp_has_connector(intel_dp, conn_state))
+			continue;
+
+		crtc = to_intel_crtc(conn_state->crtc);
+		if (!crtc)
+			continue;
+
+		ret = drm_modeset_lock(&crtc->base.mutex, ctx);
+		if (ret)
+			break;
+
+		crtc_state = to_intel_crtc_state(crtc->base.state);
+
+		drm_WARN_ON(&i915->drm, !intel_crtc_has_dp_encoder(crtc_state));
+
+		if (!crtc_state->hw.active)
+			continue;
+
+		if (conn_state->commit &&
+		    !try_wait_for_completion(&conn_state->commit->hw_done))
+			continue;
+
+		*crtc_mask |= drm_crtc_mask(&crtc->base);
+	}
+	drm_connector_list_iter_end(&conn_iter);
+
+	if (!intel_dp_needs_link_retrain(intel_dp))
+		*crtc_mask = 0;
+
+	return ret;
+}
+
+static bool intel_dp_is_connected(struct intel_dp *intel_dp)
+{
+	struct intel_connector *connector = intel_dp->attached_connector;
+
+	return connector->base.status == connector_status_connected ||
+		intel_dp->is_mst;
+}
+
 int intel_dp_retrain_link(struct intel_encoder *encoder,
 			  struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
-	struct intel_connector *connector = intel_dp->attached_connector;
-	struct drm_connector_state *conn_state;
-	struct intel_crtc_state *crtc_state;
 	struct intel_crtc *crtc;
+	u32 crtc_mask;
 	int ret;
 
-	/* FIXME handle the MST connectors as well */
-
-	if (!connector || connector->base.status != connector_status_connected)
+	if (!intel_dp_is_connected(intel_dp))
 		return 0;
 
 	ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
@@ -5159,46 +5780,42 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
 	if (ret)
 		return ret;
 
-	conn_state = connector->base.state;
-
-	crtc = to_intel_crtc(conn_state->crtc);
-	if (!crtc)
-		return 0;
-
-	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
+	ret = intel_dp_prep_link_retrain(intel_dp, ctx, &crtc_mask);
 	if (ret)
 		return ret;
 
-	crtc_state = to_intel_crtc_state(crtc->base.state);
-
-	drm_WARN_ON(&dev_priv->drm, !intel_crtc_has_dp_encoder(crtc_state));
-
-	if (!crtc_state->hw.active)
+	if (crtc_mask == 0)
 		return 0;
 
-	if (conn_state->commit &&
-	    !try_wait_for_completion(&conn_state->commit->hw_done))
-		return 0;
+	drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] retraining link\n",
+		    encoder->base.base.id, encoder->base.name);
 
-	if (!intel_dp_needs_link_retrain(intel_dp))
-		return 0;
+	for_each_intel_crtc_mask(&dev_priv->drm, crtc, crtc_mask) {
+		const struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
 
-	/* Suppress underruns caused by re-training */
-	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
-	if (crtc_state->has_pch_encoder)
-		intel_set_pch_fifo_underrun_reporting(dev_priv,
-						      intel_crtc_pch_transcoder(crtc), false);
+		/* Suppress underruns caused by re-training */
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
+		if (crtc_state->has_pch_encoder)
+			intel_set_pch_fifo_underrun_reporting(dev_priv,
+							      intel_crtc_pch_transcoder(crtc), false);
+	}
 
 	intel_dp_start_link_train(intel_dp);
 	intel_dp_stop_link_train(intel_dp);
 
-	/* Keep underrun reporting disabled until things are stable */
-	intel_wait_for_vblank(dev_priv, crtc->pipe);
+	for_each_intel_crtc_mask(&dev_priv->drm, crtc, crtc_mask) {
+		const struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
 
-	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true);
-	if (crtc_state->has_pch_encoder)
-		intel_set_pch_fifo_underrun_reporting(dev_priv,
-						      intel_crtc_pch_transcoder(crtc), true);
+		/* Keep underrun reporting disabled until things are stable */
+		intel_wait_for_vblank(dev_priv, crtc->pipe);
+
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true);
+		if (crtc_state->has_pch_encoder)
+			intel_set_pch_fifo_underrun_reporting(dev_priv,
+							      intel_crtc_pch_transcoder(crtc), true);
+	}
 
 	return 0;
 }
@@ -5217,14 +5834,13 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
  */
 static enum intel_hotplug_state
 intel_dp_hotplug(struct intel_encoder *encoder,
-		 struct intel_connector *connector,
-		 bool irq_received)
+		 struct intel_connector *connector)
 {
 	struct drm_modeset_acquire_ctx ctx;
 	enum intel_hotplug_state state;
 	int ret;
 
-	state = intel_encoder_hotplug(encoder, connector, irq_received);
+	state = intel_encoder_hotplug(encoder, connector);
 
 	drm_modeset_acquire_init(&ctx, 0);
 
@@ -5248,7 +5864,7 @@ intel_dp_hotplug(struct intel_encoder *encoder,
 	 * Keeping it consistent with intel_ddi_hotplug() and
 	 * intel_hdmi_hotplug().
 	 */
-	if (state == INTEL_HOTPLUG_UNCHANGED && irq_received)
+	if (state == INTEL_HOTPLUG_UNCHANGED && !connector->hotplug_retries)
 		state = INTEL_HOTPLUG_RETRY;
 
 	return state;
@@ -5256,6 +5872,7 @@ intel_dp_hotplug(struct intel_encoder *encoder,
 
 static void intel_dp_check_service_irq(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u8 val;
 
 	if (intel_dp->dpcd[DP_DPCD_REV] < 0x11)
@@ -5274,7 +5891,7 @@ static void intel_dp_check_service_irq(struct intel_dp *intel_dp)
 		intel_hdcp_handle_cp_irq(intel_dp->attached_connector);
 
 	if (val & DP_SINK_SPECIFIC_IRQ)
-		DRM_DEBUG_DRIVER("Sink specific irq unhandled\n");
+		drm_dbg_kms(&i915->drm, "Sink specific irq unhandled\n");
 }
 
 /*
@@ -5341,6 +5958,7 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
 static enum drm_connector_status
 intel_dp_detect_dpcd(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp);
 	u8 *dpcd = intel_dp->dpcd;
 	u8 type;
@@ -5388,7 +6006,7 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp)
 	}
 
 	/* Anything else is out of spec, warn and ignore */
-	DRM_DEBUG_KMS("Broken DP branch device, ignoring\n");
+	drm_dbg_kms(&i915->drm, "Broken DP branch device, ignoring\n");
 	return connector_status_disconnected;
 }
 
@@ -5401,64 +6019,7 @@ edp_detect(struct intel_dp *intel_dp)
 static bool ibx_digital_port_connected(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	u32 bit;
-
-	switch (encoder->hpd_pin) {
-	case HPD_PORT_B:
-		bit = SDE_PORTB_HOTPLUG;
-		break;
-	case HPD_PORT_C:
-		bit = SDE_PORTC_HOTPLUG;
-		break;
-	case HPD_PORT_D:
-		bit = SDE_PORTD_HOTPLUG;
-		break;
-	default:
-		MISSING_CASE(encoder->hpd_pin);
-		return false;
-	}
-
-	return intel_de_read(dev_priv, SDEISR) & bit;
-}
-
-static bool cpt_digital_port_connected(struct intel_encoder *encoder)
-{
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	u32 bit;
-
-	switch (encoder->hpd_pin) {
-	case HPD_PORT_B:
-		bit = SDE_PORTB_HOTPLUG_CPT;
-		break;
-	case HPD_PORT_C:
-		bit = SDE_PORTC_HOTPLUG_CPT;
-		break;
-	case HPD_PORT_D:
-		bit = SDE_PORTD_HOTPLUG_CPT;
-		break;
-	default:
-		MISSING_CASE(encoder->hpd_pin);
-		return false;
-	}
-
-	return intel_de_read(dev_priv, SDEISR) & bit;
-}
-
-static bool spt_digital_port_connected(struct intel_encoder *encoder)
-{
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	u32 bit;
-
-	switch (encoder->hpd_pin) {
-	case HPD_PORT_A:
-		bit = SDE_PORTA_HOTPLUG_SPT;
-		break;
-	case HPD_PORT_E:
-		bit = SDE_PORTE_HOTPLUG_SPT;
-		break;
-	default:
-		return cpt_digital_port_connected(encoder);
-	}
+	u32 bit = dev_priv->hotplug.pch_hpd[encoder->hpd_pin];
 
 	return intel_de_read(dev_priv, SDEISR) & bit;
 }
@@ -5512,89 +6073,9 @@ static bool gm45_digital_port_connected(struct intel_encoder *encoder)
 static bool ilk_digital_port_connected(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	u32 bit = dev_priv->hotplug.hpd[encoder->hpd_pin];
 
-	if (encoder->hpd_pin == HPD_PORT_A)
-		return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG;
-	else
-		return ibx_digital_port_connected(encoder);
-}
-
-static bool snb_digital_port_connected(struct intel_encoder *encoder)
-{
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-
-	if (encoder->hpd_pin == HPD_PORT_A)
-		return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG;
-	else
-		return cpt_digital_port_connected(encoder);
-}
-
-static bool ivb_digital_port_connected(struct intel_encoder *encoder)
-{
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-
-	if (encoder->hpd_pin == HPD_PORT_A)
-		return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG_IVB;
-	else
-		return cpt_digital_port_connected(encoder);
-}
-
-static bool bdw_digital_port_connected(struct intel_encoder *encoder)
-{
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-
-	if (encoder->hpd_pin == HPD_PORT_A)
-		return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & GEN8_PORT_DP_A_HOTPLUG;
-	else
-		return cpt_digital_port_connected(encoder);
-}
-
-static bool bxt_digital_port_connected(struct intel_encoder *encoder)
-{
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	u32 bit;
-
-	switch (encoder->hpd_pin) {
-	case HPD_PORT_A:
-		bit = BXT_DE_PORT_HP_DDIA;
-		break;
-	case HPD_PORT_B:
-		bit = BXT_DE_PORT_HP_DDIB;
-		break;
-	case HPD_PORT_C:
-		bit = BXT_DE_PORT_HP_DDIC;
-		break;
-	default:
-		MISSING_CASE(encoder->hpd_pin);
-		return false;
-	}
-
-	return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & bit;
-}
-
-static bool intel_combo_phy_connected(struct drm_i915_private *dev_priv,
-				      enum phy phy)
-{
-	if (HAS_PCH_MCC(dev_priv) && phy == PHY_C)
-		return intel_de_read(dev_priv, SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1);
-
-	return intel_de_read(dev_priv, SDEISR) & SDE_DDI_HOTPLUG_ICP(phy);
-}
-
-static bool icp_digital_port_connected(struct intel_encoder *encoder)
-{
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
-	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
-
-	if (intel_phy_is_combo(dev_priv, phy))
-		return intel_combo_phy_connected(dev_priv, phy);
-	else if (intel_phy_is_tc(dev_priv, phy))
-		return intel_tc_port_connected(dig_port);
-	else
-		MISSING_CASE(encoder->hpd_pin);
-
-	return false;
+	return intel_de_read(dev_priv, DEISR) & bit;
 }
 
 /*
@@ -5608,44 +6089,15 @@ static bool icp_digital_port_connected(struct intel_encoder *encoder)
  *
  * Return %true if port is connected, %false otherwise.
  */
-static bool __intel_digital_port_connected(struct intel_encoder *encoder)
-{
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-
-	if (HAS_GMCH(dev_priv)) {
-		if (IS_GM45(dev_priv))
-			return gm45_digital_port_connected(encoder);
-		else
-			return g4x_digital_port_connected(encoder);
-	}
-
-	if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
-		return icp_digital_port_connected(encoder);
-	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT)
-		return spt_digital_port_connected(encoder);
-	else if (IS_GEN9_LP(dev_priv))
-		return bxt_digital_port_connected(encoder);
-	else if (IS_GEN(dev_priv, 8))
-		return bdw_digital_port_connected(encoder);
-	else if (IS_GEN(dev_priv, 7))
-		return ivb_digital_port_connected(encoder);
-	else if (IS_GEN(dev_priv, 6))
-		return snb_digital_port_connected(encoder);
-	else if (IS_GEN(dev_priv, 5))
-		return ilk_digital_port_connected(encoder);
-
-	MISSING_CASE(INTEL_GEN(dev_priv));
-	return false;
-}
-
 bool intel_digital_port_connected(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	bool is_connected = false;
 	intel_wakeref_t wakeref;
 
 	with_intel_display_power(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref)
-		is_connected = __intel_digital_port_connected(encoder);
+		is_connected = dig_port->connected(encoder);
 
 	return is_connected;
 }
@@ -5860,6 +6312,7 @@ static int intel_dp_get_modes(struct drm_connector *connector)
 static int
 intel_dp_connector_register(struct drm_connector *connector)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
 	int ret;
 
@@ -5867,10 +6320,8 @@ intel_dp_connector_register(struct drm_connector *connector)
 	if (ret)
 		return ret;
 
-	intel_connector_debugfs_add(connector);
-
-	DRM_DEBUG_KMS("registering %s bus for %s\n",
-		      intel_dp->aux.name, connector->kdev->kobj.name);
+	drm_dbg_kms(&i915->drm, "registering %s bus for %s\n",
+		    intel_dp->aux.name, connector->kdev->kobj.name);
 
 	intel_dp->aux.dev = connector->kdev;
 	ret = drm_dp_aux_register(&intel_dp->aux);
@@ -5956,6 +6407,7 @@ static
 int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port,
 				u8 *an)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(&intel_dig_port->base.base));
 	static const struct drm_dp_aux_msg msg = {
 		.request = DP_AUX_NATIVE_WRITE,
@@ -5970,8 +6422,9 @@ int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port,
 	dpcd_ret = drm_dp_dpcd_write(&intel_dig_port->dp.aux, DP_AUX_HDCP_AN,
 				     an, DRM_HDCP_AN_LEN);
 	if (dpcd_ret != DRM_HDCP_AN_LEN) {
-		DRM_DEBUG_KMS("Failed to write An over DP/AUX (%zd)\n",
-			      dpcd_ret);
+		drm_dbg_kms(&i915->drm,
+			    "Failed to write An over DP/AUX (%zd)\n",
+			    dpcd_ret);
 		return dpcd_ret >= 0 ? -EIO : dpcd_ret;
 	}
 
@@ -5987,17 +6440,19 @@ int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port,
 				rxbuf, sizeof(rxbuf),
 				DP_AUX_CH_CTL_AUX_AKSV_SELECT);
 	if (ret < 0) {
-		DRM_DEBUG_KMS("Write Aksv over DP/AUX failed (%d)\n", ret);
+		drm_dbg_kms(&i915->drm,
+			    "Write Aksv over DP/AUX failed (%d)\n", ret);
 		return ret;
 	} else if (ret == 0) {
-		DRM_DEBUG_KMS("Aksv write over DP/AUX was empty\n");
+		drm_dbg_kms(&i915->drm, "Aksv write over DP/AUX was empty\n");
 		return -EIO;
 	}
 
 	reply = (rxbuf[0] >> 4) & DP_AUX_NATIVE_REPLY_MASK;
 	if (reply != DP_AUX_NATIVE_REPLY_ACK) {
-		DRM_DEBUG_KMS("Aksv write: no DP_AUX_NATIVE_REPLY_ACK %x\n",
-			      reply);
+		drm_dbg_kms(&i915->drm,
+			    "Aksv write: no DP_AUX_NATIVE_REPLY_ACK %x\n",
+			    reply);
 		return -EIO;
 	}
 	return 0;
@@ -6006,11 +6461,14 @@ int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port,
 static int intel_dp_hdcp_read_bksv(struct intel_digital_port *intel_dig_port,
 				   u8 *bksv)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	ssize_t ret;
+
 	ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BKSV, bksv,
 			       DRM_HDCP_KSV_LEN);
 	if (ret != DRM_HDCP_KSV_LEN) {
-		DRM_DEBUG_KMS("Read Bksv from DP/AUX failed (%zd)\n", ret);
+		drm_dbg_kms(&i915->drm,
+			    "Read Bksv from DP/AUX failed (%zd)\n", ret);
 		return ret >= 0 ? -EIO : ret;
 	}
 	return 0;
@@ -6019,7 +6477,9 @@ static int intel_dp_hdcp_read_bksv(struct intel_digital_port *intel_dig_port,
 static int intel_dp_hdcp_read_bstatus(struct intel_digital_port *intel_dig_port,
 				      u8 *bstatus)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	ssize_t ret;
+
 	/*
 	 * For some reason the HDMI and DP HDCP specs call this register
 	 * definition by different names. In the HDMI spec, it's called BSTATUS,
@@ -6028,7 +6488,8 @@ static int intel_dp_hdcp_read_bstatus(struct intel_digital_port *intel_dig_port,
 	ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BINFO,
 			       bstatus, DRM_HDCP_BSTATUS_LEN);
 	if (ret != DRM_HDCP_BSTATUS_LEN) {
-		DRM_DEBUG_KMS("Read bstatus from DP/AUX failed (%zd)\n", ret);
+		drm_dbg_kms(&i915->drm,
+			    "Read bstatus from DP/AUX failed (%zd)\n", ret);
 		return ret >= 0 ? -EIO : ret;
 	}
 	return 0;
@@ -6038,12 +6499,14 @@ static
 int intel_dp_hdcp_read_bcaps(struct intel_digital_port *intel_dig_port,
 			     u8 *bcaps)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	ssize_t ret;
 
 	ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BCAPS,
 			       bcaps, 1);
 	if (ret != 1) {
-		DRM_DEBUG_KMS("Read bcaps from DP/AUX failed (%zd)\n", ret);
+		drm_dbg_kms(&i915->drm,
+			    "Read bcaps from DP/AUX failed (%zd)\n", ret);
 		return ret >= 0 ? -EIO : ret;
 	}
 
@@ -6069,11 +6532,14 @@ static
 int intel_dp_hdcp_read_ri_prime(struct intel_digital_port *intel_dig_port,
 				u8 *ri_prime)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	ssize_t ret;
+
 	ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_RI_PRIME,
 			       ri_prime, DRM_HDCP_RI_LEN);
 	if (ret != DRM_HDCP_RI_LEN) {
-		DRM_DEBUG_KMS("Read Ri' from DP/AUX failed (%zd)\n", ret);
+		drm_dbg_kms(&i915->drm, "Read Ri' from DP/AUX failed (%zd)\n",
+			    ret);
 		return ret >= 0 ? -EIO : ret;
 	}
 	return 0;
@@ -6083,12 +6549,15 @@ static
 int intel_dp_hdcp_read_ksv_ready(struct intel_digital_port *intel_dig_port,
 				 bool *ksv_ready)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	ssize_t ret;
 	u8 bstatus;
+
 	ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BSTATUS,
 			       &bstatus, 1);
 	if (ret != 1) {
-		DRM_DEBUG_KMS("Read bstatus from DP/AUX failed (%zd)\n", ret);
+		drm_dbg_kms(&i915->drm,
+			    "Read bstatus from DP/AUX failed (%zd)\n", ret);
 		return ret >= 0 ? -EIO : ret;
 	}
 	*ksv_ready = bstatus & DP_BSTATUS_READY;
@@ -6099,6 +6568,7 @@ static
 int intel_dp_hdcp_read_ksv_fifo(struct intel_digital_port *intel_dig_port,
 				int num_downstream, u8 *ksv_fifo)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	ssize_t ret;
 	int i;
 
@@ -6110,8 +6580,9 @@ int intel_dp_hdcp_read_ksv_fifo(struct intel_digital_port *intel_dig_port,
 				       ksv_fifo + i * DRM_HDCP_KSV_LEN,
 				       len);
 		if (ret != len) {
-			DRM_DEBUG_KMS("Read ksv[%d] from DP/AUX failed (%zd)\n",
-				      i, ret);
+			drm_dbg_kms(&i915->drm,
+				    "Read ksv[%d] from DP/AUX failed (%zd)\n",
+				    i, ret);
 			return ret >= 0 ? -EIO : ret;
 		}
 	}
@@ -6122,6 +6593,7 @@ static
 int intel_dp_hdcp_read_v_prime_part(struct intel_digital_port *intel_dig_port,
 				    int i, u32 *part)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	ssize_t ret;
 
 	if (i >= DRM_HDCP_V_PRIME_NUM_PARTS)
@@ -6131,7 +6603,8 @@ int intel_dp_hdcp_read_v_prime_part(struct intel_digital_port *intel_dig_port,
 			       DP_AUX_HDCP_V_PRIME(i), part,
 			       DRM_HDCP_V_PRIME_PART_LEN);
 	if (ret != DRM_HDCP_V_PRIME_PART_LEN) {
-		DRM_DEBUG_KMS("Read v'[%d] from DP/AUX failed (%zd)\n", i, ret);
+		drm_dbg_kms(&i915->drm,
+			    "Read v'[%d] from DP/AUX failed (%zd)\n", i, ret);
 		return ret >= 0 ? -EIO : ret;
 	}
 	return 0;
@@ -6148,13 +6621,15 @@ int intel_dp_hdcp_toggle_signalling(struct intel_digital_port *intel_dig_port,
 static
 bool intel_dp_hdcp_check_link(struct intel_digital_port *intel_dig_port)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	ssize_t ret;
 	u8 bstatus;
 
 	ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BSTATUS,
 			       &bstatus, 1);
 	if (ret != 1) {
-		DRM_DEBUG_KMS("Read bstatus from DP/AUX failed (%zd)\n", ret);
+		drm_dbg_kms(&i915->drm,
+			    "Read bstatus from DP/AUX failed (%zd)\n", ret);
 		return false;
 	}
 
@@ -6225,17 +6700,19 @@ static const struct hdcp2_dp_msg_data hdcp2_dp_msg_data[] = {
 	  0, 0 },
 };
 
-static inline
-int intel_dp_hdcp2_read_rx_status(struct intel_digital_port *intel_dig_port,
-				  u8 *rx_status)
+static int
+intel_dp_hdcp2_read_rx_status(struct intel_digital_port *intel_dig_port,
+			      u8 *rx_status)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	ssize_t ret;
 
 	ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux,
 			       DP_HDCP_2_2_REG_RXSTATUS_OFFSET, rx_status,
 			       HDCP_2_2_DP_RXSTATUS_LEN);
 	if (ret != HDCP_2_2_DP_RXSTATUS_LEN) {
-		DRM_DEBUG_KMS("Read bstatus from DP/AUX failed (%zd)\n", ret);
+		drm_dbg_kms(&i915->drm,
+			    "Read bstatus from DP/AUX failed (%zd)\n", ret);
 		return ret >= 0 ? -EIO : ret;
 	}
 
@@ -6279,6 +6756,7 @@ static ssize_t
 intel_dp_hdcp2_wait_for_msg(struct intel_digital_port *intel_dig_port,
 			    const struct hdcp2_dp_msg_data *hdcp2_msg_data)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	struct intel_dp *dp = &intel_dig_port->dp;
 	struct intel_hdcp *hdcp = &dp->attached_connector->hdcp;
 	u8 msg_id = hdcp2_msg_data->msg_id;
@@ -6310,8 +6788,9 @@ intel_dp_hdcp2_wait_for_msg(struct intel_digital_port *intel_dig_port,
 	}
 
 	if (ret)
-		DRM_DEBUG_KMS("msg_id %d, ret %d, timeout(mSec): %d\n",
-			      hdcp2_msg_data->msg_id, ret, timeout);
+		drm_dbg_kms(&i915->drm,
+			    "msg_id %d, ret %d, timeout(mSec): %d\n",
+			    hdcp2_msg_data->msg_id, ret, timeout);
 
 	return ret;
 }
@@ -6397,6 +6876,7 @@ static
 int intel_dp_hdcp2_read_msg(struct intel_digital_port *intel_dig_port,
 			    u8 msg_id, void *buf, size_t size)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	unsigned int offset;
 	u8 *byte = buf;
 	ssize_t ret, bytes_to_recv, len;
@@ -6430,7 +6910,8 @@ int intel_dp_hdcp2_read_msg(struct intel_digital_port *intel_dig_port,
 		ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, offset,
 				       (void *)byte, len);
 		if (ret < 0) {
-			DRM_DEBUG_KMS("msg_id %d, ret %zd\n", msg_id, ret);
+			drm_dbg_kms(&i915->drm, "msg_id %d, ret %zd\n",
+				    msg_id, ret);
 			return ret;
 		}
 
@@ -6721,7 +7202,11 @@ static int intel_dp_connector_atomic_check(struct drm_connector *conn,
 	if (ret)
 		return ret;
 
-	if (INTEL_GEN(dev_priv) < 11)
+	/*
+	 * We don't enable port sync on BDW due to missing w/as and
+	 * due to not having adjusted the modeset sequence appropriately.
+	 */
+	if (INTEL_GEN(dev_priv) < 9)
 		return 0;
 
 	if (!intel_connector_needs_modeset(state, conn))
@@ -6760,28 +7245,45 @@ static const struct drm_encoder_funcs intel_dp_enc_funcs = {
 	.destroy = intel_dp_encoder_destroy,
 };
 
+static bool intel_edp_have_power(struct intel_dp *intel_dp)
+{
+	intel_wakeref_t wakeref;
+	bool have_power = false;
+
+	with_pps_lock(intel_dp, wakeref) {
+		have_power = edp_have_panel_power(intel_dp) &&
+						  edp_have_panel_vdd(intel_dp);
+	}
+
+	return have_power;
+}
+
 enum irqreturn
 intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 
-	if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) {
+	if (intel_dig_port->base.type == INTEL_OUTPUT_EDP &&
+	    (long_hpd || !intel_edp_have_power(intel_dp))) {
 		/*
-		 * vdd off can generate a long pulse on eDP which
+		 * vdd off can generate a long/short pulse on eDP which
 		 * would require vdd on to handle it, and thus we
 		 * would end up in an endless cycle of
-		 * "vdd off -> long hpd -> vdd on -> detect -> vdd off -> ..."
+		 * "vdd off -> long/short hpd -> vdd on -> detect -> vdd off -> ..."
 		 */
-		DRM_DEBUG_KMS("ignoring long hpd on eDP [ENCODER:%d:%s]\n",
-			      intel_dig_port->base.base.base.id,
-			      intel_dig_port->base.base.name);
+		drm_dbg_kms(&i915->drm,
+			    "ignoring %s hpd on eDP [ENCODER:%d:%s]\n",
+			    long_hpd ? "long" : "short",
+			    intel_dig_port->base.base.base.id,
+			    intel_dig_port->base.base.name);
 		return IRQ_HANDLED;
 	}
 
-	DRM_DEBUG_KMS("got hpd irq on [ENCODER:%d:%s] - %s\n",
-		      intel_dig_port->base.base.base.id,
-		      intel_dig_port->base.base.name,
-		      long_hpd ? "long" : "short");
+	drm_dbg_kms(&i915->drm, "got hpd irq on [ENCODER:%d:%s] - %s\n",
+		    intel_dig_port->base.base.base.id,
+		    intel_dig_port->base.base.name,
+		    long_hpd ? "long" : "short");
 
 	if (long_hpd) {
 		intel_dp->reset_link_params = true;
@@ -6789,18 +7291,25 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	}
 
 	if (intel_dp->is_mst) {
-		if (intel_dp_check_mst_status(intel_dp) == -EINVAL) {
+		switch (intel_dp_check_mst_status(intel_dp)) {
+		case -EINVAL:
 			/*
 			 * If we were in MST mode, and device is not
 			 * there, get out of MST mode
 			 */
-			DRM_DEBUG_KMS("MST device may have disappeared %d vs %d\n",
-				      intel_dp->is_mst, intel_dp->mst_mgr.mst_state);
+			drm_dbg_kms(&i915->drm,
+				    "MST device may have disappeared %d vs %d\n",
+				    intel_dp->is_mst,
+				    intel_dp->mst_mgr.mst_state);
 			intel_dp->is_mst = false;
 			drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr,
 							intel_dp->is_mst);
 
 			return IRQ_NONE;
+		case 1:
+			return IRQ_NONE;
+		default:
+			break;
 		}
 	}
 
@@ -7831,6 +8340,23 @@ bool intel_dp_init(struct drm_i915_private *dev_priv,
 		intel_encoder->post_disable = g4x_post_disable_dp;
 	}
 
+	if ((IS_IVYBRIDGE(dev_priv) && port == PORT_A) ||
+	    (HAS_PCH_CPT(dev_priv) && port != PORT_A))
+		intel_dig_port->dp.set_link_train = cpt_set_link_train;
+	else
+		intel_dig_port->dp.set_link_train = g4x_set_link_train;
+
+	if (IS_CHERRYVIEW(dev_priv))
+		intel_dig_port->dp.set_signal_levels = chv_set_signal_levels;
+	else if (IS_VALLEYVIEW(dev_priv))
+		intel_dig_port->dp.set_signal_levels = vlv_set_signal_levels;
+	else if (IS_IVYBRIDGE(dev_priv) && port == PORT_A)
+		intel_dig_port->dp.set_signal_levels = ivb_cpu_edp_set_signal_levels;
+	else if (IS_GEN(dev_priv, 6) && port == PORT_A)
+		intel_dig_port->dp.set_signal_levels = snb_cpu_edp_set_signal_levels;
+	else
+		intel_dig_port->dp.set_signal_levels = g4x_set_signal_levels;
+
 	intel_dig_port->dp.output_reg = output_reg;
 	intel_dig_port->max_lanes = 4;
 	intel_dig_port->dp.regs.dp_tp_ctl = DP_TP_CTL(port);
@@ -7851,6 +8377,18 @@ bool intel_dp_init(struct drm_i915_private *dev_priv,
 
 	intel_dig_port->hpd_pulse = intel_dp_hpd_pulse;
 
+	if (HAS_GMCH(dev_priv)) {
+		if (IS_GM45(dev_priv))
+			intel_dig_port->connected = gm45_digital_port_connected;
+		else
+			intel_dig_port->connected = g4x_digital_port_connected;
+	} else {
+		if (port == PORT_A)
+			intel_dig_port->connected = ilk_digital_port_connected;
+		else
+			intel_dig_port->connected = ibx_digital_port_connected;
+	}
+
 	if (port != PORT_A)
 		intel_infoframe_init(intel_dig_port);
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 0c7be8ed1423..1702959ca079 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -16,6 +16,7 @@ struct drm_connector_state;
 struct drm_encoder;
 struct drm_i915_private;
 struct drm_modeset_acquire_ctx;
+struct drm_dp_vsc_sdp;
 struct intel_connector;
 struct intel_crtc_state;
 struct intel_digital_port;
@@ -108,13 +109,21 @@ int intel_dp_link_required(int pixel_clock, int bpp);
 int intel_dp_max_data_rate(int max_link_clock, int max_lanes);
 bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state,
 			    const struct drm_connector_state *conn_state);
-void intel_dp_vsc_enable(struct intel_dp *intel_dp,
-			 const struct intel_crtc_state *crtc_state,
-			 const struct drm_connector_state *conn_state);
-void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp,
+void intel_dp_compute_psr_vsc_sdp(struct intel_dp *intel_dp,
 				  const struct intel_crtc_state *crtc_state,
-				  const struct drm_connector_state *conn_state);
+				  const struct drm_connector_state *conn_state,
+				  struct drm_dp_vsc_sdp *vsc);
+void intel_write_dp_vsc_sdp(struct intel_encoder *encoder,
+			    const struct intel_crtc_state *crtc_state,
+			    struct drm_dp_vsc_sdp *vsc);
+void intel_dp_set_infoframes(struct intel_encoder *encoder, bool enable,
+			     const struct intel_crtc_state *crtc_state,
+			     const struct drm_connector_state *conn_state);
+void intel_read_dp_sdp(struct intel_encoder *encoder,
+		       struct intel_crtc_state *crtc_state,
+		       unsigned int type);
 bool intel_digital_port_connected(struct intel_encoder *encoder);
+void intel_dp_process_phy_request(struct intel_dp *intel_dp);
 
 static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index dbfa6895795b..0722540d64ad 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -27,6 +27,7 @@
 
 static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u8 reg_val = 0;
 
 	/* Early return when display use other mechanism to enable backlight. */
@@ -35,8 +36,8 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
 
 	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER,
 			      &reg_val) < 0) {
-		DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
-			      DP_EDP_DISPLAY_CONTROL_REGISTER);
+		drm_dbg_kms(&i915->drm, "Failed to read DPCD register 0x%x\n",
+			    DP_EDP_DISPLAY_CONTROL_REGISTER);
 		return;
 	}
 	if (enable)
@@ -46,8 +47,8 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
 
 	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER,
 			       reg_val) != 1) {
-		DRM_DEBUG_KMS("Failed to %s aux backlight\n",
-			      enable ? "enable" : "disable");
+		drm_dbg_kms(&i915->drm, "Failed to %s aux backlight\n",
+			    enable ? "enable" : "disable");
 	}
 }
 
@@ -58,6 +59,7 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
 static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u8 read_val[2] = { 0x0 };
 	u8 mode_reg;
 	u16 level = 0;
@@ -65,8 +67,9 @@ static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
 	if (drm_dp_dpcd_readb(&intel_dp->aux,
 			      DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
 			      &mode_reg) != 1) {
-		DRM_DEBUG_KMS("Failed to read the DPCD register 0x%x\n",
-			      DP_EDP_BACKLIGHT_MODE_SET_REGISTER);
+		drm_dbg_kms(&i915->drm,
+			    "Failed to read the DPCD register 0x%x\n",
+			    DP_EDP_BACKLIGHT_MODE_SET_REGISTER);
 		return 0;
 	}
 
@@ -80,8 +83,8 @@ static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
 
 	if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB,
 			     &read_val, sizeof(read_val)) < 0) {
-		DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
-			      DP_EDP_BACKLIGHT_BRIGHTNESS_MSB);
+		drm_dbg_kms(&i915->drm, "Failed to read DPCD register 0x%x\n",
+			    DP_EDP_BACKLIGHT_BRIGHTNESS_MSB);
 		return 0;
 	}
 	level = read_val[0];
@@ -100,6 +103,7 @@ intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 lev
 {
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u8 vals[2] = { 0x0 };
 
 	vals[0] = level;
@@ -111,7 +115,8 @@ intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 lev
 	}
 	if (drm_dp_dpcd_write(&intel_dp->aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB,
 			      vals, sizeof(vals)) < 0) {
-		DRM_DEBUG_KMS("Failed to write aux backlight level\n");
+		drm_dbg_kms(&i915->drm,
+			    "Failed to write aux backlight level\n");
 		return;
 	}
 }
@@ -133,7 +138,8 @@ static bool intel_dp_aux_set_pwm_freq(struct intel_connector *connector)
 
 	freq = dev_priv->vbt.backlight.pwm_freq_hz;
 	if (!freq) {
-		DRM_DEBUG_KMS("Use panel default backlight frequency\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Use panel default backlight frequency\n");
 		return false;
 	}
 
@@ -146,13 +152,14 @@ static bool intel_dp_aux_set_pwm_freq(struct intel_connector *connector)
 	fxp_max = DIV_ROUND_CLOSEST(fxp * 5, 4);
 
 	if (fxp_min > fxp_actual || fxp_actual > fxp_max) {
-		DRM_DEBUG_KMS("Actual frequency out of range\n");
+		drm_dbg_kms(&dev_priv->drm, "Actual frequency out of range\n");
 		return false;
 	}
 
 	if (drm_dp_dpcd_writeb(&intel_dp->aux,
 			       DP_EDP_BACKLIGHT_FREQ_SET, (u8) f) < 0) {
-		DRM_DEBUG_KMS("Failed to write aux backlight freq\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Failed to write aux backlight freq\n");
 		return false;
 	}
 	return true;
@@ -163,13 +170,14 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st
 {
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	struct intel_panel *panel = &connector->panel;
 	u8 dpcd_buf, new_dpcd_buf, edp_backlight_mode;
 
 	if (drm_dp_dpcd_readb(&intel_dp->aux,
 			DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) != 1) {
-		DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
-			      DP_EDP_BACKLIGHT_MODE_SET_REGISTER);
+		drm_dbg_kms(&i915->drm, "Failed to read DPCD register 0x%x\n",
+			    DP_EDP_BACKLIGHT_MODE_SET_REGISTER);
 		return;
 	}
 
@@ -186,7 +194,8 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st
 		if (drm_dp_dpcd_writeb(&intel_dp->aux,
 				       DP_EDP_PWMGEN_BIT_COUNT,
 				       panel->backlight.pwmgen_bit_count) < 0)
-			DRM_DEBUG_KMS("Failed to write aux pwmgen bit count\n");
+			drm_dbg_kms(&i915->drm,
+				    "Failed to write aux pwmgen bit count\n");
 
 		break;
 
@@ -203,7 +212,8 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st
 	if (new_dpcd_buf != dpcd_buf) {
 		if (drm_dp_dpcd_writeb(&intel_dp->aux,
 			DP_EDP_BACKLIGHT_MODE_SET_REGISTER, new_dpcd_buf) < 0) {
-			DRM_DEBUG_KMS("Failed to write aux backlight mode\n");
+			drm_dbg_kms(&i915->drm,
+				    "Failed to write aux backlight mode\n");
 		}
 	}
 
@@ -237,9 +247,11 @@ static u32 intel_dp_aux_calc_max_backlight(struct intel_connector *connector)
 	 * minimum value will applied automatically. So no need to check that.
 	 */
 	freq = i915->vbt.backlight.pwm_freq_hz;
-	DRM_DEBUG_KMS("VBT defined backlight frequency %u Hz\n", freq);
+	drm_dbg_kms(&i915->drm, "VBT defined backlight frequency %u Hz\n",
+		    freq);
 	if (!freq) {
-		DRM_DEBUG_KMS("Use panel default backlight frequency\n");
+		drm_dbg_kms(&i915->drm,
+			    "Use panel default backlight frequency\n");
 		return max_backlight;
 	}
 
@@ -254,12 +266,14 @@ static u32 intel_dp_aux_calc_max_backlight(struct intel_connector *connector)
 	 */
 	if (drm_dp_dpcd_readb(&intel_dp->aux,
 			      DP_EDP_PWMGEN_BIT_COUNT_CAP_MIN, &pn_min) != 1) {
-		DRM_DEBUG_KMS("Failed to read pwmgen bit count cap min\n");
+		drm_dbg_kms(&i915->drm,
+			    "Failed to read pwmgen bit count cap min\n");
 		return max_backlight;
 	}
 	if (drm_dp_dpcd_readb(&intel_dp->aux,
 			      DP_EDP_PWMGEN_BIT_COUNT_CAP_MAX, &pn_max) != 1) {
-		DRM_DEBUG_KMS("Failed to read pwmgen bit count cap max\n");
+		drm_dbg_kms(&i915->drm,
+			    "Failed to read pwmgen bit count cap max\n");
 		return max_backlight;
 	}
 	pn_min &= DP_EDP_PWMGEN_BIT_COUNT_MASK;
@@ -268,7 +282,8 @@ static u32 intel_dp_aux_calc_max_backlight(struct intel_connector *connector)
 	fxp_min = DIV_ROUND_CLOSEST(fxp * 3, 4);
 	fxp_max = DIV_ROUND_CLOSEST(fxp * 5, 4);
 	if (fxp_min < (1 << pn_min) || (255 << pn_max) < fxp_max) {
-		DRM_DEBUG_KMS("VBT defined backlight frequency out of range\n");
+		drm_dbg_kms(&i915->drm,
+			    "VBT defined backlight frequency out of range\n");
 		return max_backlight;
 	}
 
@@ -279,10 +294,11 @@ static u32 intel_dp_aux_calc_max_backlight(struct intel_connector *connector)
 			break;
 	}
 
-	DRM_DEBUG_KMS("Using eDP pwmgen bit count of %d\n", pn);
+	drm_dbg_kms(&i915->drm, "Using eDP pwmgen bit count of %d\n", pn);
 	if (drm_dp_dpcd_writeb(&intel_dp->aux,
 			       DP_EDP_PWMGEN_BIT_COUNT, pn) < 0) {
-		DRM_DEBUG_KMS("Failed to write aux pwmgen bit count\n");
+		drm_dbg_kms(&i915->drm,
+			    "Failed to write aux pwmgen bit count\n");
 		return max_backlight;
 	}
 	panel->backlight.pwmgen_bit_count = pn;
@@ -312,6 +328,7 @@ static bool
 intel_dp_aux_display_control_capable(struct intel_connector *connector)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 
 	/* Check the eDP Display control capabilities registers to determine if
 	 * the panel can support backlight control over the aux channel
@@ -319,7 +336,7 @@ intel_dp_aux_display_control_capable(struct intel_connector *connector)
 	if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP &&
 	    (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP) &&
 	    !(intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP)) {
-		DRM_DEBUG_KMS("AUX Backlight Control Supported!\n");
+		drm_dbg_kms(&i915->drm, "AUX Backlight Control Supported!\n");
 		return true;
 	}
 	return false;
@@ -329,8 +346,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector)
 {
 	struct intel_panel *panel = &intel_connector->panel;
 	struct intel_dp *intel_dp = enc_to_intel_dp(intel_connector->encoder);
-	struct drm_device *dev = intel_connector->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 
 	if (i915_modparams.enable_dpcd_backlight == 0 ||
 	    !intel_dp_aux_display_control_capable(intel_connector))
@@ -340,18 +356,18 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector)
 	 * There are a lot of machines that don't advertise the backlight
 	 * control interface to use properly in their VBIOS, :\
 	 */
-	if (dev_priv->vbt.backlight.type !=
+	if (i915->vbt.backlight.type !=
 	    INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE &&
 	    i915_modparams.enable_dpcd_backlight != 1 &&
 	    !drm_dp_has_quirk(&intel_dp->desc, intel_dp->edid_quirks,
 			      DP_QUIRK_FORCE_DPCD_BACKLIGHT)) {
-		DRM_DEV_INFO(dev->dev,
-			     "Panel advertises DPCD backlight support, but "
-			     "VBT disagrees. If your backlight controls "
-			     "don't work try booting with "
-			     "i915.enable_dpcd_backlight=1. If your machine "
-			     "needs this, please file a _new_ bug report on "
-			     "drm/i915, see " FDO_BUG_URL " for details.\n");
+		drm_info(&i915->drm,
+			 "Panel advertises DPCD backlight support, but "
+			 "VBT disagrees. If your backlight controls "
+			 "don't work try booting with "
+			 "i915.enable_dpcd_backlight=1. If your machine "
+			 "needs this, please file a _new_ bug report on "
+			 "drm/i915, see " FDO_BUG_URL " for details.\n");
 		return -ENODEV;
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index a7defb37ab00..e4f1843170b7 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -34,9 +34,8 @@ intel_dp_dump_link_status(const u8 link_status[DP_LINK_STATUS_SIZE])
 		      link_status[3], link_status[4], link_status[5]);
 }
 
-static void
-intel_get_adjust_train(struct intel_dp *intel_dp,
-		       const u8 link_status[DP_LINK_STATUS_SIZE])
+void intel_dp_get_adjust_train(struct intel_dp *intel_dp,
+			       const u8 link_status[DP_LINK_STATUS_SIZE])
 {
 	u8 v = 0;
 	u8 p = 0;
@@ -219,7 +218,7 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
 		voltage = intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
 
 		/* Update training set as requested by target */
-		intel_get_adjust_train(intel_dp, link_status);
+		intel_dp_get_adjust_train(intel_dp, link_status);
 		if (!intel_dp_update_link_train(intel_dp)) {
 			drm_err(&i915->drm,
 				"failed to update link training\n");
@@ -338,7 +337,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 		}
 
 		/* Update training set as requested by target */
-		intel_get_adjust_train(intel_dp, link_status);
+		intel_dp_get_adjust_train(intel_dp, link_status);
 		if (!intel_dp_update_link_train(intel_dp)) {
 			drm_err(&i915->drm,
 				"failed to update link training\n");
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.h b/drivers/gpu/drm/i915/display/intel_dp_link_training.h
index 174566adcc92..01f1dabbb060 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.h
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.h
@@ -6,8 +6,12 @@
 #ifndef __INTEL_DP_LINK_TRAINING_H__
 #define __INTEL_DP_LINK_TRAINING_H__
 
+#include <drm/drm_dp_helper.h>
+
 struct intel_dp;
 
+void intel_dp_get_adjust_train(struct intel_dp *intel_dp,
+			       const u8 link_status[DP_LINK_STATUS_SIZE]);
 void intel_dp_start_link_train(struct intel_dp *intel_dp);
 void intel_dp_stop_link_train(struct intel_dp *intel_dp);
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 44f3fd251ca1..d18b406f2a7d 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -47,9 +47,9 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 	struct intel_dp *intel_dp = &intel_mst->primary->dp;
 	struct intel_connector *connector =
 		to_intel_connector(conn_state->connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
-	void *port = connector->port;
 	bool constant_n = drm_dp_has_quirk(&intel_dp->desc, 0,
 					   DP_DPCD_QUIRK_CONSTANT_N);
 	int bpp, slots = -EINVAL;
@@ -65,7 +65,8 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 						       false);
 
 		slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr,
-						      port, crtc_state->pbn, 0);
+						      connector->port,
+						      crtc_state->pbn, 0);
 		if (slots == -EDEADLK)
 			return slots;
 		if (slots >= 0)
@@ -73,7 +74,8 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 	}
 
 	if (slots < 0) {
-		DRM_DEBUG_KMS("failed finding vcpi slots:%d\n", slots);
+		drm_dbg_kms(&i915->drm, "failed finding vcpi slots:%d\n",
+			    slots);
 		return slots;
 	}
 
@@ -88,56 +90,10 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 	return 0;
 }
 
-/*
- * Iterate over all connectors and return the smallest transcoder in the MST
- * stream
- */
-static enum transcoder
-intel_dp_mst_master_trans_compute(struct intel_atomic_state *state,
-				  struct intel_dp *mst_port)
-{
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	struct intel_digital_connector_state *conn_state;
-	struct intel_connector *connector;
-	enum pipe ret = I915_MAX_PIPES;
-	int i;
-
-	if (INTEL_GEN(dev_priv) < 12)
-		return INVALID_TRANSCODER;
-
-	for_each_new_intel_connector_in_state(state, connector, conn_state, i) {
-		struct intel_crtc_state *crtc_state;
-		struct intel_crtc *crtc;
-
-		if (connector->mst_port != mst_port || !conn_state->base.crtc)
-			continue;
-
-		crtc = to_intel_crtc(conn_state->base.crtc);
-		crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
-		if (!crtc_state->uapi.active)
-			continue;
-
-		/*
-		 * Using crtc->pipe because crtc_state->cpu_transcoder is
-		 * computed, so others CRTCs could have non-computed
-		 * cpu_transcoder
-		 */
-		if (crtc->pipe < ret)
-			ret = crtc->pipe;
-	}
-
-	if (ret == I915_MAX_PIPES)
-		return INVALID_TRANSCODER;
-
-	/* Simple cast works because TGL don't have a eDP transcoder */
-	return (enum transcoder)ret;
-}
-
 static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 				       struct intel_crtc_state *pipe_config,
 				       struct drm_connector_state *conn_state)
 {
-	struct intel_atomic_state *state = to_intel_atomic_state(conn_state->state);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_dp *intel_dp = &intel_mst->primary->dp;
@@ -147,7 +103,6 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 		to_intel_digital_connector_state(conn_state);
 	const struct drm_display_mode *adjusted_mode =
 		&pipe_config->hw.adjusted_mode;
-	void *port = connector->port;
 	struct link_config_limits limits;
 	int ret;
 
@@ -158,8 +113,7 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	pipe_config->has_pch_encoder = false;
 
 	if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO)
-		pipe_config->has_audio =
-			drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, port);
+		pipe_config->has_audio = connector->port->has_audio;
 	else
 		pipe_config->has_audio =
 			intel_conn_state->force_audio == HDMI_AUDIO_ON;
@@ -201,7 +155,56 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 
 	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
 
-	pipe_config->mst_master_transcoder = intel_dp_mst_master_trans_compute(state, intel_dp);
+	return 0;
+}
+
+/*
+ * Iterate over all connectors and return a mask of
+ * all CPU transcoders streaming over the same DP link.
+ */
+static unsigned int
+intel_dp_mst_transcoder_mask(struct intel_atomic_state *state,
+			     struct intel_dp *mst_port)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	const struct intel_digital_connector_state *conn_state;
+	struct intel_connector *connector;
+	u8 transcoders = 0;
+	int i;
+
+	if (INTEL_GEN(dev_priv) < 12)
+		return 0;
+
+	for_each_new_intel_connector_in_state(state, connector, conn_state, i) {
+		const struct intel_crtc_state *crtc_state;
+		struct intel_crtc *crtc;
+
+		if (connector->mst_port != mst_port || !conn_state->base.crtc)
+			continue;
+
+		crtc = to_intel_crtc(conn_state->base.crtc);
+		crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
+
+		if (!crtc_state->hw.active)
+			continue;
+
+		transcoders |= BIT(crtc_state->cpu_transcoder);
+	}
+
+	return transcoders;
+}
+
+static int intel_dp_mst_compute_config_late(struct intel_encoder *encoder,
+					    struct intel_crtc_state *crtc_state,
+					    struct drm_connector_state *conn_state)
+{
+	struct intel_atomic_state *state = to_intel_atomic_state(conn_state->state);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
+	struct intel_dp *intel_dp = &intel_mst->primary->dp;
+
+	/* lowest numbered transcoder will be designated master */
+	crtc_state->mst_master_transcoder =
+		ffs(intel_dp_mst_transcoder_mask(state, intel_dp)) - 1;
 
 	return 0;
 }
@@ -313,7 +316,8 @@ intel_dp_mst_atomic_check(struct drm_connector *connector,
 	return ret;
 }
 
-static void intel_mst_disable_dp(struct intel_encoder *encoder,
+static void intel_mst_disable_dp(struct intel_atomic_state *state,
+				 struct intel_encoder *encoder,
 				 const struct intel_crtc_state *old_crtc_state,
 				 const struct drm_connector_state *old_conn_state)
 {
@@ -322,22 +326,25 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder,
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct intel_connector *connector =
 		to_intel_connector(old_conn_state->connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	int ret;
 
-	DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
+	drm_dbg_kms(&i915->drm, "active links %d\n",
+		    intel_dp->active_mst_links);
 
 	drm_dp_mst_reset_vcpi_slots(&intel_dp->mst_mgr, connector->port);
 
 	ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr);
 	if (ret) {
-		DRM_DEBUG_KMS("failed to update payload %d\n", ret);
+		drm_dbg_kms(&i915->drm, "failed to update payload %d\n", ret);
 	}
 	if (old_crtc_state->has_audio)
 		intel_audio_codec_disable(encoder,
 					  old_crtc_state, old_conn_state);
 }
 
-static void intel_mst_post_disable_dp(struct intel_encoder *encoder,
+static void intel_mst_post_disable_dp(struct intel_atomic_state *state,
+				      struct intel_encoder *encoder,
 				      const struct intel_crtc_state *old_crtc_state,
 				      const struct drm_connector_state *old_conn_state)
 {
@@ -371,7 +378,8 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder,
 
 	if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status,
 				  DP_TP_STATUS_ACT_SENT, 1))
-		DRM_ERROR("Timed out waiting for ACT sent when disabling\n");
+		drm_err(&dev_priv->drm,
+			"Timed out waiting for ACT sent when disabling\n");
 	drm_dp_check_act_status(&intel_dp->mst_mgr);
 
 	drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port);
@@ -402,13 +410,15 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder,
 
 	intel_mst->connector = NULL;
 	if (last_mst_stream)
-		intel_dig_port->base.post_disable(&intel_dig_port->base,
+		intel_dig_port->base.post_disable(state, &intel_dig_port->base,
 						  old_crtc_state, NULL);
 
-	DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
+	drm_dbg_kms(&dev_priv->drm, "active links %d\n",
+		    intel_dp->active_mst_links);
 }
 
-static void intel_mst_pre_pll_enable_dp(struct intel_encoder *encoder,
+static void intel_mst_pre_pll_enable_dp(struct intel_atomic_state *state,
+					struct intel_encoder *encoder,
 					const struct intel_crtc_state *pipe_config,
 					const struct drm_connector_state *conn_state)
 {
@@ -417,11 +427,12 @@ static void intel_mst_pre_pll_enable_dp(struct intel_encoder *encoder,
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 
 	if (intel_dp->active_mst_links == 0)
-		intel_dig_port->base.pre_pll_enable(&intel_dig_port->base,
+		intel_dig_port->base.pre_pll_enable(state, &intel_dig_port->base,
 						    pipe_config, NULL);
 }
 
-static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
+static void intel_mst_pre_enable_dp(struct intel_atomic_state *state,
+				    struct intel_encoder *encoder,
 				    const struct intel_crtc_state *pipe_config,
 				    const struct drm_connector_state *conn_state)
 {
@@ -445,7 +456,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
 		    INTEL_GEN(dev_priv) >= 12 && first_mst_stream &&
 		    !intel_dp_mst_is_master_trans(pipe_config));
 
-	DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
+	drm_dbg_kms(&dev_priv->drm, "active links %d\n",
+		    intel_dp->active_mst_links);
 
 	if (first_mst_stream)
 		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
@@ -453,7 +465,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
 	drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true);
 
 	if (first_mst_stream)
-		intel_dig_port->base.pre_enable(&intel_dig_port->base,
+		intel_dig_port->base.pre_enable(state, &intel_dig_port->base,
 						pipe_config, NULL);
 
 	ret = drm_dp_mst_allocate_vcpi(&intel_dp->mst_mgr,
@@ -461,7 +473,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
 				       pipe_config->pbn,
 				       pipe_config->dp_m_n.tu);
 	if (!ret)
-		DRM_ERROR("failed to allocate vcpi\n");
+		drm_err(&dev_priv->drm, "failed to allocate vcpi\n");
 
 	intel_dp->active_mst_links++;
 	temp = intel_de_read(dev_priv, intel_dp->regs.dp_tp_status);
@@ -477,14 +489,15 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
 	 * here for the following ones.
 	 */
 	if (INTEL_GEN(dev_priv) < 12 || !first_mst_stream)
-		intel_ddi_enable_pipe_clock(pipe_config);
+		intel_ddi_enable_pipe_clock(encoder, pipe_config);
 
 	intel_ddi_set_dp_msa(pipe_config, conn_state);
 
 	intel_dp_set_m_n(pipe_config, M1_N1);
 }
 
-static void intel_mst_enable_dp(struct intel_encoder *encoder,
+static void intel_mst_enable_dp(struct intel_atomic_state *state,
+				struct intel_encoder *encoder,
 				const struct intel_crtc_state *pipe_config,
 				const struct drm_connector_state *conn_state)
 {
@@ -495,19 +508,23 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder,
 
 	drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder);
 
-	intel_enable_pipe(pipe_config);
-
-	intel_crtc_vblank_on(pipe_config);
+	intel_ddi_enable_transcoder_func(encoder, pipe_config);
 
-	DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
+	drm_dbg_kms(&dev_priv->drm, "active links %d\n",
+		    intel_dp->active_mst_links);
 
 	if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status,
 				  DP_TP_STATUS_ACT_SENT, 1))
-		DRM_ERROR("Timed out waiting for ACT sent\n");
+		drm_err(&dev_priv->drm, "Timed out waiting for ACT sent\n");
 
 	drm_dp_check_act_status(&intel_dp->mst_mgr);
 
 	drm_dp_update_payload_part2(&intel_dp->mst_mgr);
+
+	intel_enable_pipe(pipe_config);
+
+	intel_crtc_vblank_on(pipe_config);
+
 	if (pipe_config->has_audio)
 		intel_audio_codec_enable(encoder, pipe_config, conn_state);
 }
@@ -786,6 +803,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum
 	intel_encoder->pipe_mask = ~0;
 
 	intel_encoder->compute_config = intel_dp_mst_compute_config;
+	intel_encoder->compute_config_late = intel_dp_mst_compute_config_late;
 	intel_encoder->disable = intel_mst_disable_dp;
 	intel_encoder->post_disable = intel_mst_post_disable_dp;
 	intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp;
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 2d47f1f756a2..b45185b80bec 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -80,7 +80,7 @@ intel_atomic_get_shared_dpll_state(struct drm_atomic_state *s)
 {
 	struct intel_atomic_state *state = to_intel_atomic_state(s);
 
-	WARN_ON(!drm_modeset_is_locked(&s->dev->mode_config.connection_mutex));
+	drm_WARN_ON(s->dev, !drm_modeset_is_locked(&s->dev->mode_config.connection_mutex));
 
 	if (!state->dpll_set) {
 		state->dpll_set = true;
@@ -979,7 +979,7 @@ hsw_ddi_spll_get_dpll(struct intel_atomic_state *state,
 	struct intel_crtc_state *crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
 
-	if (WARN_ON(crtc_state->port_clock / 2 != 135000))
+	if (drm_WARN_ON(crtc->base.dev, crtc_state->port_clock / 2 != 135000))
 		return NULL;
 
 	crtc_state->dpll_hw_state.spll = SPLL_PLL_ENABLE | SPLL_FREQ_1350MHz |
@@ -1616,7 +1616,7 @@ static int skl_ddi_wrpll_get_freq(struct drm_i915_private *i915,
 	dco_freq += ((pll_state->cfgcr1 & DPLL_CFGCR1_DCO_FRACTION_MASK) >> 9) *
 		    ref_clock / 0x8000;
 
-	if (WARN_ON(p0 == 0 || p1 == 0 || p2 == 0))
+	if (drm_WARN_ON(&i915->drm, p0 == 0 || p1 == 0 || p2 == 0))
 		return 0;
 
 	return dco_freq / (p0 * p1 * p2 * 5);
@@ -2074,7 +2074,7 @@ bxt_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state,
 
 	clk_div->p1 = best_clock.p1;
 	clk_div->p2 = best_clock.p2;
-	WARN_ON(best_clock.m1 != 2);
+	drm_WARN_ON(&i915->drm, best_clock.m1 != 2);
 	clk_div->n = best_clock.n;
 	clk_div->m2_int = best_clock.m2 >> 22;
 	clk_div->m2_frac = best_clock.m2 & ((1 << 22) - 1);
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index d7a6bf2277df..29fec6a92d17 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -34,7 +34,7 @@
 #define DSB_BYTE_EN_SHIFT		20
 #define DSB_REG_VALUE_MASK		0xfffff
 
-static inline bool is_dsb_busy(struct intel_dsb *dsb)
+static bool is_dsb_busy(struct intel_dsb *dsb)
 {
 	struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -43,7 +43,7 @@ static inline bool is_dsb_busy(struct intel_dsb *dsb)
 	return DSB_STATUS & intel_de_read(dev_priv, DSB_CTRL(pipe, dsb->id));
 }
 
-static inline bool intel_dsb_enable_engine(struct intel_dsb *dsb)
+static bool intel_dsb_enable_engine(struct intel_dsb *dsb)
 {
 	struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -63,7 +63,7 @@ static inline bool intel_dsb_enable_engine(struct intel_dsb *dsb)
 	return true;
 }
 
-static inline bool intel_dsb_disable_engine(struct intel_dsb *dsb)
+static bool intel_dsb_disable_engine(struct intel_dsb *dsb)
 {
 	struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
diff --git a/drivers/gpu/drm/i915/display/intel_dsi.c b/drivers/gpu/drm/i915/display/intel_dsi.c
index a2a937109a5a..afa4e6817e8c 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi.c
@@ -31,20 +31,21 @@ int intel_dsi_tlpx_ns(const struct intel_dsi *intel_dsi)
 
 int intel_dsi_get_modes(struct drm_connector *connector)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 	struct intel_connector *intel_connector = to_intel_connector(connector);
 	struct drm_display_mode *mode;
 
-	DRM_DEBUG_KMS("\n");
+	drm_dbg_kms(&i915->drm, "\n");
 
 	if (!intel_connector->panel.fixed_mode) {
-		DRM_DEBUG_KMS("no fixed mode\n");
+		drm_dbg_kms(&i915->drm, "no fixed mode\n");
 		return 0;
 	}
 
 	mode = drm_mode_duplicate(connector->dev,
 				  intel_connector->panel.fixed_mode);
 	if (!mode) {
-		DRM_DEBUG_KMS("drm_mode_duplicate failed\n");
+		drm_dbg_kms(&i915->drm, "drm_mode_duplicate failed\n");
 		return 0;
 	}
 
@@ -60,7 +61,7 @@ enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector,
 	const struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 
-	DRM_DEBUG_KMS("\n");
+	drm_dbg_kms(&dev_priv->drm, "\n");
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 574dcfec9577..eed037ec0b29 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -121,7 +121,7 @@ struct i2c_adapter_lookup {
 #define  ICL_GPIO_DDPA_CTRLCLK_2	8
 #define  ICL_GPIO_DDPA_CTRLDATA_2	9
 
-static inline enum port intel_dsi_seq_port_to_port(u8 port)
+static enum port intel_dsi_seq_port_to_port(u8 port)
 {
 	return port ? PORT_C : PORT_A;
 }
@@ -453,8 +453,7 @@ static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
 
 static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
 {
-	struct drm_device *drm_dev = intel_dsi->base.base.dev;
-	struct device *dev = &drm_dev->pdev->dev;
+	struct drm_i915_private *i915 = to_i915(intel_dsi->base.base.dev);
 	struct i2c_adapter *adapter;
 	struct i2c_msg msg;
 	int ret;
@@ -471,7 +470,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
 
 	adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
 	if (!adapter) {
-		DRM_DEV_ERROR(dev, "Cannot find a valid i2c bus for xfer\n");
+		drm_err(&i915->drm, "Cannot find a valid i2c bus for xfer\n");
 		goto err_bus;
 	}
 
@@ -489,9 +488,9 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
 
 	ret = i2c_transfer(adapter, &msg, 1);
 	if (ret < 0)
-		DRM_DEV_ERROR(dev,
-			      "Failed to xfer payload of size (%u) to reg (%u)\n",
-			      payload_size, reg_offset);
+		drm_err(&i915->drm,
+			"Failed to xfer payload of size (%u) to reg (%u)\n",
+			payload_size, reg_offset);
 
 	kfree(payload_data);
 err_alloc:
diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c
index 341d5ce8b062..5cd09034519b 100644
--- a/drivers/gpu/drm/i915/display/intel_dvo.c
+++ b/drivers/gpu/drm/i915/display/intel_dvo.c
@@ -183,7 +183,8 @@ static void intel_dvo_get_config(struct intel_encoder *encoder,
 	pipe_config->hw.adjusted_mode.crtc_clock = pipe_config->port_clock;
 }
 
-static void intel_disable_dvo(struct intel_encoder *encoder,
+static void intel_disable_dvo(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
@@ -197,7 +198,8 @@ static void intel_disable_dvo(struct intel_encoder *encoder,
 	intel_de_read(dev_priv, dvo_reg);
 }
 
-static void intel_enable_dvo(struct intel_encoder *encoder,
+static void intel_enable_dvo(struct intel_atomic_state *state,
+			     struct intel_encoder *encoder,
 			     const struct intel_crtc_state *pipe_config,
 			     const struct drm_connector_state *conn_state)
 {
@@ -272,7 +274,8 @@ static int intel_dvo_compute_config(struct intel_encoder *encoder,
 	return 0;
 }
 
-static void intel_dvo_pre_enable(struct intel_encoder *encoder,
+static void intel_dvo_pre_enable(struct intel_atomic_state *state,
+				 struct intel_encoder *encoder,
 				 const struct intel_crtc_state *pipe_config,
 				 const struct drm_connector_state *conn_state)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index c125ca9ab9b3..1c26673acb2d 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -104,7 +104,7 @@ static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv)
 	/* Wait for compressing bit to clear */
 	if (intel_de_wait_for_clear(dev_priv, FBC_STATUS,
 				    FBC_STAT_COMPRESSING, 10)) {
-		DRM_DEBUG_KMS("FBC idle timed out\n");
+		drm_dbg_kms(&dev_priv->drm, "FBC idle timed out\n");
 		return;
 	}
 }
@@ -485,7 +485,8 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv,
 	if (!ret)
 		goto err_llb;
 	else if (ret > 1) {
-		DRM_INFO_ONCE("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
+		drm_info_once(&dev_priv->drm,
+			      "Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
 	}
 
 	fbc->threshold = ret;
@@ -520,8 +521,9 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv,
 			       dev_priv->dsm.start + compressed_llb->start);
 	}
 
-	DRM_DEBUG_KMS("reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n",
-		      fbc->compressed_fb.size, fbc->threshold);
+	drm_dbg_kms(&dev_priv->drm,
+		    "reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n",
+		    fbc->compressed_fb.size, fbc->threshold);
 
 	return 0;
 
@@ -530,7 +532,7 @@ err_fb:
 	i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb);
 err_llb:
 	if (drm_mm_initialized(&dev_priv->mm.stolen))
-		pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size);
+		drm_info_once(&dev_priv->drm, "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size);
 	return -ENOSPC;
 }
 
@@ -538,6 +540,9 @@ static void __intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv)
 {
 	struct intel_fbc *fbc = &dev_priv->fbc;
 
+	if (WARN_ON(intel_fbc_hw_is_active(dev_priv)))
+		return;
+
 	if (!drm_mm_node_allocated(&fbc->compressed_fb))
 		return;
 
@@ -562,7 +567,7 @@ void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv)
 }
 
 static bool stride_is_valid(struct drm_i915_private *dev_priv,
-			    unsigned int stride)
+			    u64 modifier, unsigned int stride)
 {
 	/* This should have been caught earlier. */
 	if (drm_WARN_ON_ONCE(&dev_priv->drm, (stride & (64 - 1)) != 0))
@@ -578,6 +583,11 @@ static bool stride_is_valid(struct drm_i915_private *dev_priv,
 	if (IS_GEN(dev_priv, 4) && !IS_G4X(dev_priv) && stride < 2048)
 		return false;
 
+	/* Display WA #1105: skl,bxt,kbl,cfl,glk */
+	if (IS_GEN(dev_priv, 9) &&
+	    modifier == DRM_FORMAT_MOD_LINEAR && stride & 511)
+		return false;
+
 	if (stride > 16384)
 		return false;
 
@@ -605,6 +615,19 @@ static bool pixel_format_is_valid(struct drm_i915_private *dev_priv,
 	}
 }
 
+static bool rotation_is_valid(struct drm_i915_private *dev_priv,
+			      u32 pixel_format, unsigned int rotation)
+{
+	if (INTEL_GEN(dev_priv) >= 9 && pixel_format == DRM_FORMAT_RGB565 &&
+	    drm_rotation_90_or_270(rotation))
+		return false;
+	else if (INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv) &&
+		 rotation != DRM_MODE_ROTATE_0)
+		return false;
+
+	return true;
+}
+
 /*
  * For some reason, the hardware tracking starts looking at whatever we
  * programmed as the display plane base address register. It does not look at
@@ -639,6 +662,22 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
 	return effective_w <= max_w && effective_h <= max_h;
 }
 
+static bool tiling_is_valid(struct drm_i915_private *dev_priv,
+			    uint64_t modifier)
+{
+	switch (modifier) {
+	case DRM_FORMAT_MOD_LINEAR:
+		if (INTEL_GEN(dev_priv) >= 9)
+			return true;
+		return false;
+	case I915_FORMAT_MOD_X_TILED:
+	case I915_FORMAT_MOD_Y_TILED:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 					 const struct intel_crtc_state *crtc_state,
 					 const struct intel_plane_state *plane_state)
@@ -672,6 +711,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 
 	cache->fb.format = fb->format;
 	cache->fb.stride = fb->pitches[0];
+	cache->fb.modifier = fb->modifier;
 
 	drm_WARN_ON(&dev_priv->drm, plane_state->flags & PLANE_HAS_FENCE &&
 		    !plane_state->vma->fence);
@@ -745,30 +785,40 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 		return false;
 	}
 
-	/* The use of a CPU fence is mandatory in order to detect writes
-	 * by the CPU to the scanout and trigger updates to the FBC.
+	/* The use of a CPU fence is one of two ways to detect writes by the
+	 * CPU to the scanout and trigger updates to the FBC.
+	 *
+	 * The other method is by software tracking (see
+	 * intel_fbc_invalidate/flush()), it will manually notify FBC and nuke
+	 * the current compressed buffer and recompress it.
 	 *
 	 * Note that is possible for a tiled surface to be unmappable (and
-	 * so have no fence associated with it) due to aperture constaints
+	 * so have no fence associated with it) due to aperture constraints
 	 * at the time of pinning.
 	 *
 	 * FIXME with 90/270 degree rotation we should use the fence on
 	 * the normal GTT view (the rotated view doesn't even have a
 	 * fence). Would need changes to the FBC fence Y offset as well.
-	 * For now this will effecively disable FBC with 90/270 degree
+	 * For now this will effectively disable FBC with 90/270 degree
 	 * rotation.
 	 */
-	if (cache->fence_id < 0) {
+	if (INTEL_GEN(dev_priv) < 9 && cache->fence_id < 0) {
 		fbc->no_fbc_reason = "framebuffer not tiled or fenced";
 		return false;
 	}
-	if (INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv) &&
-	    cache->plane.rotation != DRM_MODE_ROTATE_0) {
+
+	if (!rotation_is_valid(dev_priv, cache->fb.format->format,
+			       cache->plane.rotation)) {
 		fbc->no_fbc_reason = "rotation unsupported";
 		return false;
 	}
 
-	if (!stride_is_valid(dev_priv, cache->fb.stride)) {
+	if (!tiling_is_valid(dev_priv, cache->fb.modifier)) {
+		fbc->no_fbc_reason = "tiling unsupported";
+		return false;
+	}
+
+	if (!stride_is_valid(dev_priv, cache->fb.modifier, cache->fb.stride)) {
 		fbc->no_fbc_reason = "framebuffer stride not supported";
 		return false;
 	}
@@ -947,7 +997,8 @@ static void __intel_fbc_disable(struct drm_i915_private *dev_priv)
 	drm_WARN_ON(&dev_priv->drm, !fbc->crtc);
 	drm_WARN_ON(&dev_priv->drm, fbc->active);
 
-	DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe));
+	drm_dbg_kms(&dev_priv->drm, "Disabling FBC on pipe %c\n",
+		    pipe_name(crtc->pipe));
 
 	__intel_fbc_cleanup_cfb(dev_priv);
 
@@ -1175,7 +1226,8 @@ void intel_fbc_enable(struct intel_atomic_state *state,
 	else
 		cache->gen9_wa_cfb_stride = 0;
 
-	DRM_DEBUG_KMS("Enabling FBC on pipe %c\n", pipe_name(crtc->pipe));
+	drm_dbg_kms(&dev_priv->drm, "Enabling FBC on pipe %c\n",
+		    pipe_name(crtc->pipe));
 	fbc->no_fbc_reason = "FBC enabled but not active yet\n";
 
 	fbc->crtc = crtc;
@@ -1237,7 +1289,7 @@ static void intel_fbc_underrun_work_fn(struct work_struct *work)
 	if (fbc->underrun_detected || !fbc->crtc)
 		goto out;
 
-	DRM_DEBUG_KMS("Disabling FBC due to FIFO underrun.\n");
+	drm_dbg_kms(&dev_priv->drm, "Disabling FBC due to FIFO underrun.\n");
 	fbc->underrun_detected = true;
 
 	intel_fbc_deactivate(dev_priv, "FIFO underrun");
@@ -1263,7 +1315,8 @@ int intel_fbc_reset_underrun(struct drm_i915_private *dev_priv)
 		return ret;
 
 	if (dev_priv->fbc.underrun_detected) {
-		DRM_DEBUG_KMS("Re-allowing FBC after fifo underrun\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Re-allowing FBC after fifo underrun\n");
 		dev_priv->fbc.no_fbc_reason = "FIFO underrun cleared";
 	}
 
@@ -1334,7 +1387,8 @@ static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
 	/* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
 	if (intel_vtd_active() &&
 	    (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) {
-		DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
+		drm_info(&dev_priv->drm,
+			 "Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
 		return true;
 	}
 
@@ -1362,8 +1416,8 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
 		mkwrite_device_info(dev_priv)->display.has_fbc = false;
 
 	i915_modparams.enable_fbc = intel_sanitize_fbc_option(dev_priv);
-	DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n",
-		      i915_modparams.enable_fbc);
+	drm_dbg_kms(&dev_priv->drm, "Sanitized enable_fbc value: %d\n",
+		    i915_modparams.enable_fbc);
 
 	if (!HAS_FBC(dev_priv)) {
 		fbc->no_fbc_reason = "unsupported by this chipset";
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 3bc804212a99..bd39eb6a21b8 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -146,7 +146,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	if (IS_ERR(obj))
 		obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj)) {
-		DRM_ERROR("failed to allocate framebuffer\n");
+		drm_err(&dev_priv->drm, "failed to allocate framebuffer\n");
 		return PTR_ERR(obj);
 	}
 
@@ -183,21 +183,23 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	if (intel_fb &&
 	    (sizes->fb_width > intel_fb->base.width ||
 	     sizes->fb_height > intel_fb->base.height)) {
-		DRM_DEBUG_KMS("BIOS fb too small (%dx%d), we require (%dx%d),"
-			      " releasing it\n",
-			      intel_fb->base.width, intel_fb->base.height,
-			      sizes->fb_width, sizes->fb_height);
+		drm_dbg_kms(&dev_priv->drm,
+			    "BIOS fb too small (%dx%d), we require (%dx%d),"
+			    " releasing it\n",
+			    intel_fb->base.width, intel_fb->base.height,
+			    sizes->fb_width, sizes->fb_height);
 		drm_framebuffer_put(&intel_fb->base);
 		intel_fb = ifbdev->fb = NULL;
 	}
 	if (!intel_fb || drm_WARN_ON(dev, !intel_fb_obj(&intel_fb->base))) {
-		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "no BIOS fb, allocating a new one\n");
 		ret = intelfb_alloc(helper, sizes);
 		if (ret)
 			return ret;
 		intel_fb = ifbdev->fb;
 	} else {
-		DRM_DEBUG_KMS("re-using BIOS fb\n");
+		drm_dbg_kms(&dev_priv->drm, "re-using BIOS fb\n");
 		prealloc = true;
 		sizes->fb_width = intel_fb->base.width;
 		sizes->fb_height = intel_fb->base.height;
@@ -220,7 +222,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 
 	info = drm_fb_helper_alloc_fbi(helper);
 	if (IS_ERR(info)) {
-		DRM_ERROR("Failed to allocate fb_info\n");
+		drm_err(&dev_priv->drm, "Failed to allocate fb_info\n");
 		ret = PTR_ERR(info);
 		goto out_unpin;
 	}
@@ -240,7 +242,8 @@ static int intelfb_create(struct drm_fb_helper *helper,
 
 	vaddr = i915_vma_pin_iomap(vma);
 	if (IS_ERR(vaddr)) {
-		DRM_ERROR("Failed to remap framebuffer into virtual memory\n");
+		drm_err(&dev_priv->drm,
+			"Failed to remap framebuffer into virtual memory\n");
 		ret = PTR_ERR(vaddr);
 		goto out_unpin;
 	}
@@ -258,9 +261,9 @@ static int intelfb_create(struct drm_fb_helper *helper,
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
 
-	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n",
-		      ifbdev->fb->base.width, ifbdev->fb->base.height,
-		      i915_ggtt_offset(vma));
+	drm_dbg_kms(&dev_priv->drm, "allocated %dx%d fb: 0x%08x\n",
+		    ifbdev->fb->base.width, ifbdev->fb->base.height,
+		    i915_ggtt_offset(vma));
 	ifbdev->vma = vma;
 	ifbdev->vma_flags = flags;
 
@@ -309,6 +312,7 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
 static bool intel_fbdev_init_bios(struct drm_device *dev,
 				 struct intel_fbdev *ifbdev)
 {
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct intel_framebuffer *fb = NULL;
 	struct drm_crtc *crtc;
 	struct intel_crtc *intel_crtc;
@@ -321,21 +325,24 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 		intel_crtc = to_intel_crtc(crtc);
 
 		if (!crtc->state->active || !obj) {
-			DRM_DEBUG_KMS("pipe %c not active or no fb, skipping\n",
-				      pipe_name(intel_crtc->pipe));
+			drm_dbg_kms(&i915->drm,
+				    "pipe %c not active or no fb, skipping\n",
+				    pipe_name(intel_crtc->pipe));
 			continue;
 		}
 
 		if (obj->base.size > max_size) {
-			DRM_DEBUG_KMS("found possible fb from plane %c\n",
-				      pipe_name(intel_crtc->pipe));
+			drm_dbg_kms(&i915->drm,
+				    "found possible fb from plane %c\n",
+				    pipe_name(intel_crtc->pipe));
 			fb = to_intel_framebuffer(crtc->primary->state->fb);
 			max_size = obj->base.size;
 		}
 	}
 
 	if (!fb) {
-		DRM_DEBUG_KMS("no active fbs found, not using BIOS config\n");
+		drm_dbg_kms(&i915->drm,
+			    "no active fbs found, not using BIOS config\n");
 		goto out;
 	}
 
@@ -346,13 +353,14 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 		intel_crtc = to_intel_crtc(crtc);
 
 		if (!crtc->state->active) {
-			DRM_DEBUG_KMS("pipe %c not active, skipping\n",
-				      pipe_name(intel_crtc->pipe));
+			drm_dbg_kms(&i915->drm,
+				    "pipe %c not active, skipping\n",
+				    pipe_name(intel_crtc->pipe));
 			continue;
 		}
 
-		DRM_DEBUG_KMS("checking plane %c for BIOS fb\n",
-			      pipe_name(intel_crtc->pipe));
+		drm_dbg_kms(&i915->drm, "checking plane %c for BIOS fb\n",
+			    pipe_name(intel_crtc->pipe));
 
 		/*
 		 * See if the plane fb we found above will fit on this
@@ -362,9 +370,10 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 		cur_size = crtc->state->adjusted_mode.crtc_hdisplay;
 		cur_size = cur_size * fb->base.format->cpp[0];
 		if (fb->base.pitches[0] < cur_size) {
-			DRM_DEBUG_KMS("fb not wide enough for plane %c (%d vs %d)\n",
-				      pipe_name(intel_crtc->pipe),
-				      cur_size, fb->base.pitches[0]);
+			drm_dbg_kms(&i915->drm,
+				    "fb not wide enough for plane %c (%d vs %d)\n",
+				    pipe_name(intel_crtc->pipe),
+				    cur_size, fb->base.pitches[0]);
 			fb = NULL;
 			break;
 		}
@@ -372,28 +381,32 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 		cur_size = crtc->state->adjusted_mode.crtc_vdisplay;
 		cur_size = intel_fb_align_height(&fb->base, 0, cur_size);
 		cur_size *= fb->base.pitches[0];
-		DRM_DEBUG_KMS("pipe %c area: %dx%d, bpp: %d, size: %d\n",
-			      pipe_name(intel_crtc->pipe),
-			      crtc->state->adjusted_mode.crtc_hdisplay,
-			      crtc->state->adjusted_mode.crtc_vdisplay,
-			      fb->base.format->cpp[0] * 8,
-			      cur_size);
+		drm_dbg_kms(&i915->drm,
+			    "pipe %c area: %dx%d, bpp: %d, size: %d\n",
+			    pipe_name(intel_crtc->pipe),
+			    crtc->state->adjusted_mode.crtc_hdisplay,
+			    crtc->state->adjusted_mode.crtc_vdisplay,
+			    fb->base.format->cpp[0] * 8,
+			    cur_size);
 
 		if (cur_size > max_size) {
-			DRM_DEBUG_KMS("fb not big enough for plane %c (%d vs %d)\n",
-				      pipe_name(intel_crtc->pipe),
-				      cur_size, max_size);
+			drm_dbg_kms(&i915->drm,
+				    "fb not big enough for plane %c (%d vs %d)\n",
+				    pipe_name(intel_crtc->pipe),
+				    cur_size, max_size);
 			fb = NULL;
 			break;
 		}
 
-		DRM_DEBUG_KMS("fb big enough for plane %c (%d >= %d)\n",
-			      pipe_name(intel_crtc->pipe),
-			      max_size, cur_size);
+		drm_dbg_kms(&i915->drm,
+			    "fb big enough for plane %c (%d >= %d)\n",
+			    pipe_name(intel_crtc->pipe),
+			    max_size, cur_size);
 	}
 
 	if (!fb) {
-		DRM_DEBUG_KMS("BIOS fb not suitable for all pipes, not using\n");
+		drm_dbg_kms(&i915->drm,
+			    "BIOS fb not suitable for all pipes, not using\n");
 		goto out;
 	}
 
@@ -415,7 +428,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 	}
 
 
-	DRM_DEBUG_KMS("using BIOS fb for initial console\n");
+	drm_dbg_kms(&i915->drm, "using BIOS fb for initial console\n");
 	return true;
 
 out:
@@ -522,8 +535,9 @@ void intel_fbdev_fini(struct drm_i915_private *dev_priv)
  * processing, fbdev will perform a full connector reprobe if a hotplug event
  * was received while HPD was suspended.
  */
-static void intel_fbdev_hpd_set_suspend(struct intel_fbdev *ifbdev, int state)
+static void intel_fbdev_hpd_set_suspend(struct drm_i915_private *i915, int state)
 {
+	struct intel_fbdev *ifbdev = i915->fbdev;
 	bool send_hpd = false;
 
 	mutex_lock(&ifbdev->hpd_lock);
@@ -533,7 +547,7 @@ static void intel_fbdev_hpd_set_suspend(struct intel_fbdev *ifbdev, int state)
 	mutex_unlock(&ifbdev->hpd_lock);
 
 	if (send_hpd) {
-		DRM_DEBUG_KMS("Handling delayed fbcon HPD event\n");
+		drm_dbg_kms(&i915->drm, "Handling delayed fbcon HPD event\n");
 		drm_fb_helper_hotplug_event(&ifbdev->helper);
 	}
 }
@@ -588,7 +602,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous
 	drm_fb_helper_set_suspend(&ifbdev->helper, state);
 	console_unlock();
 
-	intel_fbdev_hpd_set_suspend(ifbdev, state);
+	intel_fbdev_hpd_set_suspend(dev_priv, state);
 }
 
 void intel_fbdev_output_poll_changed(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index 6cb02c912acc..2979ed2588eb 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -302,12 +302,14 @@ void intel_frontbuffer_track(struct intel_frontbuffer *old,
 		     BITS_PER_TYPE(atomic_t));
 
 	if (old) {
-		WARN_ON(!(atomic_read(&old->bits) & frontbuffer_bits));
+		drm_WARN_ON(old->obj->base.dev,
+			    !(atomic_read(&old->bits) & frontbuffer_bits));
 		atomic_andnot(frontbuffer_bits, &old->bits);
 	}
 
 	if (new) {
-		WARN_ON(atomic_read(&new->bits) & frontbuffer_bits);
+		drm_WARN_ON(new->obj->base.dev,
+			    atomic_read(&new->bits) & frontbuffer_bits);
 		atomic_or(frontbuffer_bits, &new->bits);
 	}
 }
diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c
index a0cc894c3868..212d4ee68205 100644
--- a/drivers/gpu/drm/i915/display/intel_global_state.c
+++ b/drivers/gpu/drm/i915/display/intel_global_state.c
@@ -64,13 +64,14 @@ static void assert_global_state_read_locked(struct intel_atomic_state *state)
 			return;
 	}
 
-	WARN(1, "Global state not read locked\n");
+	drm_WARN(&dev_priv->drm, 1, "Global state not read locked\n");
 }
 
 struct intel_global_state *
 intel_atomic_get_global_obj_state(struct intel_atomic_state *state,
 				  struct intel_global_obj *obj)
 {
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
 	int index, num_objs, i;
 	size_t size;
 	struct __intel_global_objs_state *arr;
@@ -106,8 +107,8 @@ intel_atomic_get_global_obj_state(struct intel_atomic_state *state,
 
 	state->num_global_objs = num_objs;
 
-	DRM_DEBUG_ATOMIC("Added new global object %p state %p to %p\n",
-			 obj, obj_state, state);
+	drm_dbg_atomic(&i915->drm, "Added new global object %p state %p to %p\n",
+		       obj, obj_state, state);
 
 	return obj_state;
 }
@@ -147,7 +148,7 @@ void intel_atomic_swap_global_state(struct intel_atomic_state *state)
 
 	for_each_oldnew_global_obj_in_state(state, obj, old_obj_state,
 					    new_obj_state, i) {
-		WARN_ON(obj->state != old_obj_state);
+		drm_WARN_ON(&dev_priv->drm, obj->state != old_obj_state);
 
 		/*
 		 * If the new state wasn't modified (and properly
diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c
index 1fd3a5a6296b..a8d119b6b45c 100644
--- a/drivers/gpu/drm/i915/display/intel_gmbus.c
+++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
@@ -379,8 +379,7 @@ gmbus_wait_idle(struct drm_i915_private *dev_priv)
 	return ret;
 }
 
-static inline
-unsigned int gmbus_max_xfer_size(struct drm_i915_private *dev_priv)
+static unsigned int gmbus_max_xfer_size(struct drm_i915_private *dev_priv)
 {
 	return INTEL_GEN(dev_priv) >= 9 ? GEN9_GMBUS_BYTE_COUNT_MAX :
 	       GMBUS_BYTE_COUNT_MAX;
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index ee0f27ea2810..2cbc4619b4ce 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -109,18 +109,16 @@ bool intel_hdcp2_capable(struct intel_connector *connector)
 	return capable;
 }
 
-static inline
-bool intel_hdcp_in_use(struct drm_i915_private *dev_priv,
-		       enum transcoder cpu_transcoder, enum port port)
+static bool intel_hdcp_in_use(struct drm_i915_private *dev_priv,
+			      enum transcoder cpu_transcoder, enum port port)
 {
 	return intel_de_read(dev_priv,
 	                     HDCP_STATUS(dev_priv, cpu_transcoder, port)) &
 	       HDCP_STATUS_ENC;
 }
 
-static inline
-bool intel_hdcp2_in_use(struct drm_i915_private *dev_priv,
-			enum transcoder cpu_transcoder, enum port port)
+static bool intel_hdcp2_in_use(struct drm_i915_private *dev_priv,
+			       enum transcoder cpu_transcoder, enum port port)
 {
 	return intel_de_read(dev_priv,
 	                     HDCP2_STATUS(dev_priv, cpu_transcoder, port)) &
@@ -853,8 +851,7 @@ static int _intel_hdcp_enable(struct intel_connector *connector)
 	return ret;
 }
 
-static inline
-struct intel_connector *intel_hdcp_to_connector(struct intel_hdcp *hdcp)
+static struct intel_connector *intel_hdcp_to_connector(struct intel_hdcp *hdcp)
 {
 	return container_of(hdcp, struct intel_connector, hdcp);
 }
@@ -1391,6 +1388,7 @@ static
 int hdcp2_propagate_stream_management_info(struct intel_connector *connector)
 {
 	struct intel_digital_port *intel_dig_port = intel_attached_dig_port(connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_hdcp *hdcp = &connector->hdcp;
 	union {
 		struct hdcp2_rep_stream_manage stream_manage;
@@ -1431,7 +1429,7 @@ int hdcp2_propagate_stream_management_info(struct intel_connector *connector)
 	hdcp->seq_num_m++;
 
 	if (hdcp->seq_num_m > HDCP_2_2_SEQ_NUM_MAX) {
-		DRM_DEBUG_KMS("seq_num_m roll over.\n");
+		drm_dbg_kms(&i915->drm, "seq_num_m roll over.\n");
 		return -1;
 	}
 
@@ -1855,8 +1853,7 @@ static const struct component_ops i915_hdcp_component_ops = {
 	.unbind = i915_hdcp_component_unbind,
 };
 
-static inline
-enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port)
+static enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port)
 {
 	switch (port) {
 	case PORT_A:
@@ -1868,8 +1865,7 @@ enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port)
 	}
 }
 
-static inline
-enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder)
+static enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder)
 {
 	switch (cpu_transcoder) {
 	case TRANSCODER_A ... TRANSCODER_D:
@@ -1879,8 +1875,8 @@ enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder)
 	}
 }
 
-static inline int initialize_hdcp_port_data(struct intel_connector *connector,
-					    const struct intel_hdcp_shim *shim)
+static int initialize_hdcp_port_data(struct intel_connector *connector,
+				     const struct intel_hdcp_shim *shim)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_hdcp *hdcp = &connector->hdcp;
@@ -2075,7 +2071,8 @@ int intel_hdcp_disable(struct intel_connector *connector)
 	return ret;
 }
 
-void intel_hdcp_update_pipe(struct intel_encoder *encoder,
+void intel_hdcp_update_pipe(struct intel_atomic_state *state,
+			    struct intel_encoder *encoder,
 			    const struct intel_crtc_state *crtc_state,
 			    const struct drm_connector_state *conn_state)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h
index 7c12ad609b1f..86bbaec120cc 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.h
@@ -11,6 +11,7 @@
 struct drm_connector;
 struct drm_connector_state;
 struct drm_i915_private;
+struct intel_atomic_state;
 struct intel_connector;
 struct intel_crtc_state;
 struct intel_encoder;
@@ -26,7 +27,8 @@ int intel_hdcp_init(struct intel_connector *connector,
 int intel_hdcp_enable(struct intel_connector *connector,
 		      enum transcoder cpu_transcoder, u8 content_type);
 int intel_hdcp_disable(struct intel_connector *connector);
-void intel_hdcp_update_pipe(struct intel_encoder *encoder,
+void intel_hdcp_update_pipe(struct intel_atomic_state *state,
+			    struct intel_encoder *encoder,
 			    const struct intel_crtc_state *crtc_state,
 			    const struct drm_connector_state *conn_state);
 bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port);
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index 821411b93dac..010f37240710 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -44,7 +44,6 @@
 #include "intel_audio.h"
 #include "intel_connector.h"
 #include "intel_ddi.h"
-#include "intel_display_debugfs.h"
 #include "intel_display_types.h"
 #include "intel_dp.h"
 #include "intel_dpio_phy.h"
@@ -707,13 +706,15 @@ void intel_read_infoframe(struct intel_encoder *encoder,
 	/* see comment above for the reason for this offset */
 	ret = hdmi_infoframe_unpack(frame, buffer + 1, sizeof(buffer) - 1);
 	if (ret) {
-		DRM_DEBUG_KMS("Failed to unpack infoframe type 0x%02x\n", type);
+		drm_dbg_kms(encoder->base.dev,
+			    "Failed to unpack infoframe type 0x%02x\n", type);
 		return;
 	}
 
 	if (frame->any.type != type)
-		DRM_DEBUG_KMS("Found the wrong infoframe type 0x%x (expected 0x%02x)\n",
-			      frame->any.type, type);
+		drm_dbg_kms(encoder->base.dev,
+			    "Found the wrong infoframe type 0x%x (expected 0x%02x)\n",
+			    frame->any.type, type);
 }
 
 static bool
@@ -853,7 +854,8 @@ intel_hdmi_compute_drm_infoframe(struct intel_encoder *encoder,
 
 	ret = drm_hdmi_infoframe_set_hdr_metadata(frame, conn_state);
 	if (ret < 0) {
-		DRM_DEBUG_KMS("couldn't set HDR metadata in infoframe\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "couldn't set HDR metadata in infoframe\n");
 		return false;
 	}
 
@@ -893,8 +895,9 @@ static void g4x_set_infoframes(struct intel_encoder *encoder,
 		if (!(val & VIDEO_DIP_ENABLE))
 			return;
 		if (port != (val & VIDEO_DIP_PORT_MASK)) {
-			DRM_DEBUG_KMS("video DIP still enabled on port %c\n",
-				      (val & VIDEO_DIP_PORT_MASK) >> 29);
+			drm_dbg_kms(&dev_priv->drm,
+				    "video DIP still enabled on port %c\n",
+				    (val & VIDEO_DIP_PORT_MASK) >> 29);
 			return;
 		}
 		val &= ~(VIDEO_DIP_ENABLE | VIDEO_DIP_ENABLE_AVI |
@@ -906,8 +909,9 @@ static void g4x_set_infoframes(struct intel_encoder *encoder,
 
 	if (port != (val & VIDEO_DIP_PORT_MASK)) {
 		if (val & VIDEO_DIP_ENABLE) {
-			DRM_DEBUG_KMS("video DIP already enabled on port %c\n",
-				      (val & VIDEO_DIP_PORT_MASK) >> 29);
+			drm_dbg_kms(&dev_priv->drm,
+				    "video DIP already enabled on port %c\n",
+				    (val & VIDEO_DIP_PORT_MASK) >> 29);
 			return;
 		}
 		val &= ~VIDEO_DIP_PORT_MASK;
@@ -1264,8 +1268,8 @@ void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable)
 	if (hdmi->dp_dual_mode.type < DRM_DP_DUAL_MODE_TYPE2_DVI)
 		return;
 
-	DRM_DEBUG_KMS("%s DP dual mode adaptor TMDS output\n",
-		      enable ? "Enabling" : "Disabling");
+	drm_dbg_kms(&dev_priv->drm, "%s DP dual mode adaptor TMDS output\n",
+		    enable ? "Enabling" : "Disabling");
 
 	drm_dp_dual_mode_set_tmds_output(hdmi->dp_dual_mode.type,
 					 adapter, enable);
@@ -1346,13 +1350,14 @@ int intel_hdmi_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port,
 	ret = intel_hdmi_hdcp_write(intel_dig_port, DRM_HDCP_DDC_AN, an,
 				    DRM_HDCP_AN_LEN);
 	if (ret) {
-		DRM_DEBUG_KMS("Write An over DDC failed (%d)\n", ret);
+		drm_dbg_kms(&i915->drm, "Write An over DDC failed (%d)\n",
+			    ret);
 		return ret;
 	}
 
 	ret = intel_gmbus_output_aksv(adapter);
 	if (ret < 0) {
-		DRM_DEBUG_KMS("Failed to output aksv (%d)\n", ret);
+		drm_dbg_kms(&i915->drm, "Failed to output aksv (%d)\n", ret);
 		return ret;
 	}
 	return 0;
@@ -1361,11 +1366,14 @@ int intel_hdmi_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port,
 static int intel_hdmi_hdcp_read_bksv(struct intel_digital_port *intel_dig_port,
 				     u8 *bksv)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
+
 	int ret;
 	ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BKSV, bksv,
 				   DRM_HDCP_KSV_LEN);
 	if (ret)
-		DRM_DEBUG_KMS("Read Bksv over DDC failed (%d)\n", ret);
+		drm_dbg_kms(&i915->drm, "Read Bksv over DDC failed (%d)\n",
+			    ret);
 	return ret;
 }
 
@@ -1373,11 +1381,14 @@ static
 int intel_hdmi_hdcp_read_bstatus(struct intel_digital_port *intel_dig_port,
 				 u8 *bstatus)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
+
 	int ret;
 	ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BSTATUS,
 				   bstatus, DRM_HDCP_BSTATUS_LEN);
 	if (ret)
-		DRM_DEBUG_KMS("Read bstatus over DDC failed (%d)\n", ret);
+		drm_dbg_kms(&i915->drm, "Read bstatus over DDC failed (%d)\n",
+			    ret);
 	return ret;
 }
 
@@ -1385,12 +1396,14 @@ static
 int intel_hdmi_hdcp_repeater_present(struct intel_digital_port *intel_dig_port,
 				     bool *repeater_present)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	int ret;
 	u8 val;
 
 	ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BCAPS, &val, 1);
 	if (ret) {
-		DRM_DEBUG_KMS("Read bcaps over DDC failed (%d)\n", ret);
+		drm_dbg_kms(&i915->drm, "Read bcaps over DDC failed (%d)\n",
+			    ret);
 		return ret;
 	}
 	*repeater_present = val & DRM_HDCP_DDC_BCAPS_REPEATER_PRESENT;
@@ -1401,11 +1414,14 @@ static
 int intel_hdmi_hdcp_read_ri_prime(struct intel_digital_port *intel_dig_port,
 				  u8 *ri_prime)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
+
 	int ret;
 	ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_RI_PRIME,
 				   ri_prime, DRM_HDCP_RI_LEN);
 	if (ret)
-		DRM_DEBUG_KMS("Read Ri' over DDC failed (%d)\n", ret);
+		drm_dbg_kms(&i915->drm, "Read Ri' over DDC failed (%d)\n",
+			    ret);
 	return ret;
 }
 
@@ -1413,12 +1429,14 @@ static
 int intel_hdmi_hdcp_read_ksv_ready(struct intel_digital_port *intel_dig_port,
 				   bool *ksv_ready)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	int ret;
 	u8 val;
 
 	ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BCAPS, &val, 1);
 	if (ret) {
-		DRM_DEBUG_KMS("Read bcaps over DDC failed (%d)\n", ret);
+		drm_dbg_kms(&i915->drm, "Read bcaps over DDC failed (%d)\n",
+			    ret);
 		return ret;
 	}
 	*ksv_ready = val & DRM_HDCP_DDC_BCAPS_KSV_FIFO_READY;
@@ -1429,11 +1447,13 @@ static
 int intel_hdmi_hdcp_read_ksv_fifo(struct intel_digital_port *intel_dig_port,
 				  int num_downstream, u8 *ksv_fifo)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	int ret;
 	ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_KSV_FIFO,
 				   ksv_fifo, num_downstream * DRM_HDCP_KSV_LEN);
 	if (ret) {
-		DRM_DEBUG_KMS("Read ksv fifo over DDC failed (%d)\n", ret);
+		drm_dbg_kms(&i915->drm,
+			    "Read ksv fifo over DDC failed (%d)\n", ret);
 		return ret;
 	}
 	return 0;
@@ -1443,6 +1463,7 @@ static
 int intel_hdmi_hdcp_read_v_prime_part(struct intel_digital_port *intel_dig_port,
 				      int i, u32 *part)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	int ret;
 
 	if (i >= DRM_HDCP_V_PRIME_NUM_PARTS)
@@ -1451,7 +1472,8 @@ int intel_hdmi_hdcp_read_v_prime_part(struct intel_digital_port *intel_dig_port,
 	ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_V_PRIME(i),
 				   part, DRM_HDCP_V_PRIME_PART_LEN);
 	if (ret)
-		DRM_DEBUG_KMS("Read V'[%d] over DDC failed (%d)\n", i, ret);
+		drm_dbg_kms(&i915->drm, "Read V'[%d] over DDC failed (%d)\n",
+			    i, ret);
 	return ret;
 }
 
@@ -1474,12 +1496,14 @@ static int kbl_repositioning_enc_en_signal(struct intel_connector *connector)
 
 	ret = intel_ddi_toggle_hdcp_signalling(&intel_dig_port->base, false);
 	if (ret) {
-		DRM_ERROR("Disable HDCP signalling failed (%d)\n", ret);
+		drm_err(&dev_priv->drm,
+			"Disable HDCP signalling failed (%d)\n", ret);
 		return ret;
 	}
 	ret = intel_ddi_toggle_hdcp_signalling(&intel_dig_port->base, true);
 	if (ret) {
-		DRM_ERROR("Enable HDCP signalling failed (%d)\n", ret);
+		drm_err(&dev_priv->drm,
+			"Enable HDCP signalling failed (%d)\n", ret);
 		return ret;
 	}
 
@@ -1500,8 +1524,8 @@ int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *intel_dig_port,
 
 	ret = intel_ddi_toggle_hdcp_signalling(&intel_dig_port->base, enable);
 	if (ret) {
-		DRM_ERROR("%s HDCP signalling failed (%d)\n",
-			  enable ? "Enable" : "Disable", ret);
+		drm_err(&dev_priv->drm, "%s HDCP signalling failed (%d)\n",
+			enable ? "Enable" : "Disable", ret);
 		return ret;
 	}
 
@@ -1539,8 +1563,10 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port)
 	if (wait_for((intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder, port)) &
 		      (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC)) ==
 		     (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) {
-		DRM_ERROR("Ri' mismatch detected, link check failed (%x)\n",
-			  intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder, port)));
+		drm_err(&i915->drm,
+			"Ri' mismatch detected, link check failed (%x)\n",
+			intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder,
+							port)));
 		return false;
 	}
 	return true;
@@ -1588,17 +1614,19 @@ static int get_hdcp2_msg_timeout(u8 msg_id, bool is_paired)
 	return -EINVAL;
 }
 
-static inline
-int hdcp2_detect_msg_availability(struct intel_digital_port *intel_digital_port,
-				  u8 msg_id, bool *msg_ready,
-				  ssize_t *msg_sz)
+static int
+hdcp2_detect_msg_availability(struct intel_digital_port *intel_dig_port,
+			      u8 msg_id, bool *msg_ready,
+			      ssize_t *msg_sz)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	u8 rx_status[HDCP_2_2_HDMI_RXSTATUS_LEN];
 	int ret;
 
-	ret = intel_hdmi_hdcp2_read_rx_status(intel_digital_port, rx_status);
+	ret = intel_hdmi_hdcp2_read_rx_status(intel_dig_port, rx_status);
 	if (ret < 0) {
-		DRM_DEBUG_KMS("rx_status read failed. Err %d\n", ret);
+		drm_dbg_kms(&i915->drm, "rx_status read failed. Err %d\n",
+			    ret);
 		return ret;
 	}
 
@@ -1618,6 +1646,7 @@ static ssize_t
 intel_hdmi_hdcp2_wait_for_msg(struct intel_digital_port *intel_dig_port,
 			      u8 msg_id, bool paired)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	bool msg_ready = false;
 	int timeout, ret;
 	ssize_t msg_sz = 0;
@@ -1632,8 +1661,8 @@ intel_hdmi_hdcp2_wait_for_msg(struct intel_digital_port *intel_dig_port,
 			 !ret && msg_ready && msg_sz, timeout * 1000,
 			 1000, 5 * 1000);
 	if (ret)
-		DRM_DEBUG_KMS("msg_id: %d, ret: %d, timeout: %d\n",
-			      msg_id, ret, timeout);
+		drm_dbg_kms(&i915->drm, "msg_id: %d, ret: %d, timeout: %d\n",
+			    msg_id, ret, timeout);
 
 	return ret ? ret : msg_sz;
 }
@@ -1652,6 +1681,7 @@ static
 int intel_hdmi_hdcp2_read_msg(struct intel_digital_port *intel_dig_port,
 			      u8 msg_id, void *buf, size_t size)
 {
+	struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev);
 	struct intel_hdmi *hdmi = &intel_dig_port->hdmi;
 	struct intel_hdcp *hdcp = &hdmi->attached_connector->hdcp;
 	unsigned int offset;
@@ -1667,15 +1697,17 @@ int intel_hdmi_hdcp2_read_msg(struct intel_digital_port *intel_dig_port,
 	 * available buffer.
 	 */
 	if (ret > size) {
-		DRM_DEBUG_KMS("msg_sz(%zd) is more than exp size(%zu)\n",
-			      ret, size);
+		drm_dbg_kms(&i915->drm,
+			    "msg_sz(%zd) is more than exp size(%zu)\n",
+			    ret, size);
 		return -1;
 	}
 
 	offset = HDCP_2_2_HDMI_REG_RD_MSG_OFFSET;
 	ret = intel_hdmi_hdcp_read(intel_dig_port, offset, buf, ret);
 	if (ret)
-		DRM_DEBUG_KMS("Failed to read msg_id: %d(%zd)\n", msg_id, ret);
+		drm_dbg_kms(&i915->drm, "Failed to read msg_id: %d(%zd)\n",
+			    msg_id, ret);
 
 	return ret;
 }
@@ -1718,12 +1750,6 @@ int intel_hdmi_hdcp2_capable(struct intel_digital_port *intel_dig_port,
 	return ret;
 }
 
-static inline
-enum hdcp_wired_protocol intel_hdmi_hdcp2_protocol(void)
-{
-	return HDCP_PROTOCOL_HDMI;
-}
-
 static const struct intel_hdcp_shim intel_hdmi_hdcp_shim = {
 	.write_an_aksv = intel_hdmi_hdcp_write_an_aksv,
 	.read_bksv = intel_hdmi_hdcp_read_bksv,
@@ -1871,15 +1897,17 @@ static void intel_enable_hdmi_audio(struct intel_encoder *encoder,
 				    const struct intel_crtc_state *pipe_config,
 				    const struct drm_connector_state *conn_state)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 
-	drm_WARN_ON(encoder->base.dev, !pipe_config->has_hdmi_sink);
-	DRM_DEBUG_DRIVER("Enabling HDMI audio on pipe %c\n",
-			 pipe_name(crtc->pipe));
+	drm_WARN_ON(&i915->drm, !pipe_config->has_hdmi_sink);
+	drm_dbg_kms(&i915->drm, "Enabling HDMI audio on pipe %c\n",
+		    pipe_name(crtc->pipe));
 	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 }
 
-static void g4x_enable_hdmi(struct intel_encoder *encoder,
+static void g4x_enable_hdmi(struct intel_atomic_state *state,
+			    struct intel_encoder *encoder,
 			    const struct intel_crtc_state *pipe_config,
 			    const struct drm_connector_state *conn_state)
 {
@@ -1901,7 +1929,8 @@ static void g4x_enable_hdmi(struct intel_encoder *encoder,
 		intel_enable_hdmi_audio(encoder, pipe_config, conn_state);
 }
 
-static void ibx_enable_hdmi(struct intel_encoder *encoder,
+static void ibx_enable_hdmi(struct intel_atomic_state *state,
+			    struct intel_encoder *encoder,
 			    const struct intel_crtc_state *pipe_config,
 			    const struct drm_connector_state *conn_state)
 {
@@ -1952,7 +1981,8 @@ static void ibx_enable_hdmi(struct intel_encoder *encoder,
 		intel_enable_hdmi_audio(encoder, pipe_config, conn_state);
 }
 
-static void cpt_enable_hdmi(struct intel_encoder *encoder,
+static void cpt_enable_hdmi(struct intel_atomic_state *state,
+			    struct intel_encoder *encoder,
 			    const struct intel_crtc_state *pipe_config,
 			    const struct drm_connector_state *conn_state)
 {
@@ -2005,13 +2035,15 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder,
 		intel_enable_hdmi_audio(encoder, pipe_config, conn_state);
 }
 
-static void vlv_enable_hdmi(struct intel_encoder *encoder,
+static void vlv_enable_hdmi(struct intel_atomic_state *state,
+			    struct intel_encoder *encoder,
 			    const struct intel_crtc_state *pipe_config,
 			    const struct drm_connector_state *conn_state)
 {
 }
 
-static void intel_disable_hdmi(struct intel_encoder *encoder,
+static void intel_disable_hdmi(struct intel_atomic_state *state,
+			       struct intel_encoder *encoder,
 			       const struct intel_crtc_state *old_crtc_state,
 			       const struct drm_connector_state *old_conn_state)
 {
@@ -2069,7 +2101,8 @@ static void intel_disable_hdmi(struct intel_encoder *encoder,
 	intel_dp_dual_mode_set_tmds_output(intel_hdmi, false);
 }
 
-static void g4x_disable_hdmi(struct intel_encoder *encoder,
+static void g4x_disable_hdmi(struct intel_atomic_state *state,
+			     struct intel_encoder *encoder,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
@@ -2077,10 +2110,11 @@ static void g4x_disable_hdmi(struct intel_encoder *encoder,
 		intel_audio_codec_disable(encoder,
 					  old_crtc_state, old_conn_state);
 
-	intel_disable_hdmi(encoder, old_crtc_state, old_conn_state);
+	intel_disable_hdmi(state, encoder, old_crtc_state, old_conn_state);
 }
 
-static void pch_disable_hdmi(struct intel_encoder *encoder,
+static void pch_disable_hdmi(struct intel_atomic_state *state,
+			     struct intel_encoder *encoder,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
@@ -2089,11 +2123,12 @@ static void pch_disable_hdmi(struct intel_encoder *encoder,
 					  old_crtc_state, old_conn_state);
 }
 
-static void pch_post_disable_hdmi(struct intel_encoder *encoder,
+static void pch_post_disable_hdmi(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *old_crtc_state,
 				  const struct drm_connector_state *old_conn_state)
 {
-	intel_disable_hdmi(encoder, old_crtc_state, old_conn_state);
+	intel_disable_hdmi(state, encoder, old_crtc_state, old_conn_state);
 }
 
 static int intel_hdmi_source_max_tmds_clock(struct intel_encoder *encoder)
@@ -2286,29 +2321,27 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state,
 	return true;
 }
 
-static bool
-intel_hdmi_ycbcr420_config(struct drm_connector *connector,
-			   struct intel_crtc_state *config)
+static int
+intel_hdmi_ycbcr420_config(struct intel_crtc_state *crtc_state,
+			   const struct drm_connector_state *conn_state)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(config->uapi.crtc);
-
-	if (!connector->ycbcr_420_allowed) {
-		DRM_ERROR("Platform doesn't support YCBCR420 output\n");
-		return false;
-	}
+	struct drm_connector *connector = conn_state->connector;
+	struct drm_i915_private *i915 = to_i915(connector->dev);
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->hw.adjusted_mode;
 
-	config->output_format = INTEL_OUTPUT_FORMAT_YCBCR420;
+	if (!drm_mode_is_420_only(&connector->display_info, adjusted_mode))
+		return 0;
 
-	/* YCBCR 420 output conversion needs a scaler */
-	if (skl_update_scaler_crtc(config)) {
-		DRM_DEBUG_KMS("Scaler allocation for output failed\n");
-		return false;
+	if (!connector->ycbcr_420_allowed) {
+		drm_err(&i915->drm,
+			"Platform doesn't support YCBCR420 output\n");
+		return -EINVAL;
 	}
 
-	intel_pch_panel_fitting(intel_crtc, config,
-				DRM_MODE_SCALE_FULLSCREEN);
+	crtc_state->output_format = INTEL_OUTPUT_FORMAT_YCBCR420;
 
-	return true;
+	return intel_pch_panel_fitting(crtc_state, conn_state);
 }
 
 static int intel_hdmi_port_clock(int clock, int bpc)
@@ -2342,6 +2375,7 @@ static int intel_hdmi_compute_bpc(struct intel_encoder *encoder,
 static int intel_hdmi_compute_clock(struct intel_encoder *encoder,
 				    struct intel_crtc_state *crtc_state)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
@@ -2366,13 +2400,15 @@ static int intel_hdmi_compute_clock(struct intel_encoder *encoder,
 	if (crtc_state->pipe_bpp > bpc * 3)
 		crtc_state->pipe_bpp = bpc * 3;
 
-	DRM_DEBUG_KMS("picking %d bpc for HDMI output (pipe bpp: %d)\n",
-		      bpc, crtc_state->pipe_bpp);
+	drm_dbg_kms(&i915->drm,
+		    "picking %d bpc for HDMI output (pipe bpp: %d)\n",
+		    bpc, crtc_state->pipe_bpp);
 
 	if (hdmi_port_clock_valid(intel_hdmi, crtc_state->port_clock,
 				  false, crtc_state->has_hdmi_sink) != MODE_OK) {
-		DRM_DEBUG_KMS("unsupported HDMI clock (%d kHz), rejecting mode\n",
-			      crtc_state->port_clock);
+		drm_dbg_kms(&i915->drm,
+			    "unsupported HDMI clock (%d kHz), rejecting mode\n",
+			    crtc_state->port_clock);
 		return -EINVAL;
 	}
 
@@ -2433,12 +2469,9 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
 		pipe_config->pixel_multiplier = 2;
 
-	if (drm_mode_is_420_only(&connector->display_info, adjusted_mode)) {
-		if (!intel_hdmi_ycbcr420_config(connector, pipe_config)) {
-			DRM_ERROR("Can't support YCBCR420 output\n");
-			return -EINVAL;
-		}
-	}
+	ret = intel_hdmi_ycbcr420_config(pipe_config, conn_state);
+	if (ret)
+		return ret;
 
 	pipe_config->limited_color_range =
 		intel_hdmi_limited_color_range(pipe_config, conn_state);
@@ -2475,25 +2508,26 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 		}
 	}
 
-	intel_hdmi_compute_gcp_infoframe(encoder, pipe_config, conn_state);
+	intel_hdmi_compute_gcp_infoframe(encoder, pipe_config,
+					 conn_state);
 
 	if (!intel_hdmi_compute_avi_infoframe(encoder, pipe_config, conn_state)) {
-		DRM_DEBUG_KMS("bad AVI infoframe\n");
+		drm_dbg_kms(&dev_priv->drm, "bad AVI infoframe\n");
 		return -EINVAL;
 	}
 
 	if (!intel_hdmi_compute_spd_infoframe(encoder, pipe_config, conn_state)) {
-		DRM_DEBUG_KMS("bad SPD infoframe\n");
+		drm_dbg_kms(&dev_priv->drm, "bad SPD infoframe\n");
 		return -EINVAL;
 	}
 
 	if (!intel_hdmi_compute_hdmi_infoframe(encoder, pipe_config, conn_state)) {
-		DRM_DEBUG_KMS("bad HDMI infoframe\n");
+		drm_dbg_kms(&dev_priv->drm, "bad HDMI infoframe\n");
 		return -EINVAL;
 	}
 
 	if (!intel_hdmi_compute_drm_infoframe(encoder, pipe_config, conn_state)) {
-		DRM_DEBUG_KMS("bad DRM infoframe\n");
+		drm_dbg_kms(&dev_priv->drm, "bad DRM infoframe\n");
 		return -EINVAL;
 	}
 
@@ -2543,7 +2577,8 @@ intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector, bool has_edid)
 		 */
 		if (has_edid && !connector->override_edid &&
 		    intel_bios_is_port_dp_dual_mode(dev_priv, port)) {
-			DRM_DEBUG_KMS("Assuming DP dual mode adaptor presence based on VBT\n");
+			drm_dbg_kms(&dev_priv->drm,
+				    "Assuming DP dual mode adaptor presence based on VBT\n");
 			type = DRM_DP_DUAL_MODE_TYPE1_DVI;
 		} else {
 			type = DRM_DP_DUAL_MODE_NONE;
@@ -2557,9 +2592,10 @@ intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector, bool has_edid)
 	hdmi->dp_dual_mode.max_tmds_clock =
 		drm_dp_dual_mode_max_tmds_clock(type, adapter);
 
-	DRM_DEBUG_KMS("DP dual mode adaptor (%s) detected (max TMDS clock: %d kHz)\n",
-		      drm_dp_get_dual_mode_type_name(type),
-		      hdmi->dp_dual_mode.max_tmds_clock);
+	drm_dbg_kms(&dev_priv->drm,
+		    "DP dual mode adaptor (%s) detected (max TMDS clock: %d kHz)\n",
+		    drm_dp_get_dual_mode_type_name(type),
+		    hdmi->dp_dual_mode.max_tmds_clock);
 }
 
 static bool
@@ -2579,7 +2615,8 @@ intel_hdmi_set_edid(struct drm_connector *connector)
 	edid = drm_get_edid(connector, i2c);
 
 	if (!edid && !intel_gmbus_is_forced_bit(i2c)) {
-		DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO bit-banging\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "HDMI GMBUS EDID read failed, retry using GPIO bit-banging\n");
 		intel_gmbus_force_bit(i2c, true);
 		edid = drm_get_edid(connector, i2c);
 		intel_gmbus_force_bit(i2c, false);
@@ -2611,8 +2648,8 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
 	struct intel_encoder *encoder = &hdmi_to_dig_port(intel_hdmi)->base;
 	intel_wakeref_t wakeref;
 
-	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
-		      connector->base.id, connector->name);
+	drm_dbg_kms(&dev_priv->drm, "[CONNECTOR:%d:%s]\n",
+		    connector->base.id, connector->name);
 
 	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 
@@ -2643,8 +2680,10 @@ out:
 static void
 intel_hdmi_force(struct drm_connector *connector)
 {
-	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
-		      connector->base.id, connector->name);
+	struct drm_i915_private *i915 = to_i915(connector->dev);
+
+	drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s]\n",
+		    connector->base.id, connector->name);
 
 	intel_hdmi_unset_edid(connector);
 
@@ -2665,7 +2704,8 @@ static int intel_hdmi_get_modes(struct drm_connector *connector)
 	return intel_connector_update_modes(connector, edid);
 }
 
-static void intel_hdmi_pre_enable(struct intel_encoder *encoder,
+static void intel_hdmi_pre_enable(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *pipe_config,
 				  const struct drm_connector_state *conn_state)
 {
@@ -2679,7 +2719,8 @@ static void intel_hdmi_pre_enable(struct intel_encoder *encoder,
 				       pipe_config, conn_state);
 }
 
-static void vlv_hdmi_pre_enable(struct intel_encoder *encoder,
+static void vlv_hdmi_pre_enable(struct intel_atomic_state *state,
+				struct intel_encoder *encoder,
 				const struct intel_crtc_state *pipe_config,
 				const struct drm_connector_state *conn_state)
 {
@@ -2696,12 +2737,13 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder,
 			      pipe_config->has_infoframe,
 			      pipe_config, conn_state);
 
-	g4x_enable_hdmi(encoder, pipe_config, conn_state);
+	g4x_enable_hdmi(state, encoder, pipe_config, conn_state);
 
 	vlv_wait_port_ready(dev_priv, dport, 0x0);
 }
 
-static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder,
+static void vlv_hdmi_pre_pll_enable(struct intel_atomic_state *state,
+				    struct intel_encoder *encoder,
 				    const struct intel_crtc_state *pipe_config,
 				    const struct drm_connector_state *conn_state)
 {
@@ -2710,7 +2752,8 @@ static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder,
 	vlv_phy_pre_pll_enable(encoder, pipe_config);
 }
 
-static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder,
+static void chv_hdmi_pre_pll_enable(struct intel_atomic_state *state,
+				    struct intel_encoder *encoder,
 				    const struct intel_crtc_state *pipe_config,
 				    const struct drm_connector_state *conn_state)
 {
@@ -2719,14 +2762,16 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder,
 	chv_phy_pre_pll_enable(encoder, pipe_config);
 }
 
-static void chv_hdmi_post_pll_disable(struct intel_encoder *encoder,
+static void chv_hdmi_post_pll_disable(struct intel_atomic_state *state,
+				      struct intel_encoder *encoder,
 				      const struct intel_crtc_state *old_crtc_state,
 				      const struct drm_connector_state *old_conn_state)
 {
 	chv_phy_post_pll_disable(encoder, old_crtc_state);
 }
 
-static void vlv_hdmi_post_disable(struct intel_encoder *encoder,
+static void vlv_hdmi_post_disable(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *old_crtc_state,
 				  const struct drm_connector_state *old_conn_state)
 {
@@ -2734,7 +2779,8 @@ static void vlv_hdmi_post_disable(struct intel_encoder *encoder,
 	vlv_phy_reset_lanes(encoder, old_crtc_state);
 }
 
-static void chv_hdmi_post_disable(struct intel_encoder *encoder,
+static void chv_hdmi_post_disable(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *old_crtc_state,
 				  const struct drm_connector_state *old_conn_state)
 {
@@ -2749,7 +2795,8 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder,
 	vlv_dpio_put(dev_priv);
 }
 
-static void chv_hdmi_pre_enable(struct intel_encoder *encoder,
+static void chv_hdmi_pre_enable(struct intel_atomic_state *state,
+				struct intel_encoder *encoder,
 				const struct intel_crtc_state *pipe_config,
 				const struct drm_connector_state *conn_state)
 {
@@ -2767,7 +2814,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder,
 			      pipe_config->has_infoframe,
 			      pipe_config, conn_state);
 
-	g4x_enable_hdmi(encoder, pipe_config, conn_state);
+	g4x_enable_hdmi(state, encoder, pipe_config, conn_state);
 
 	vlv_wait_port_ready(dev_priv, dport, 0x0);
 
@@ -2786,6 +2833,7 @@ intel_hdmi_get_i2c_adapter(struct drm_connector *connector)
 
 static void intel_hdmi_create_i2c_symlink(struct drm_connector *connector)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 	struct i2c_adapter *adapter = intel_hdmi_get_i2c_adapter(connector);
 	struct kobject *i2c_kobj = &adapter->dev.kobj;
 	struct kobject *connector_kobj = &connector->kdev->kobj;
@@ -2793,7 +2841,7 @@ static void intel_hdmi_create_i2c_symlink(struct drm_connector *connector)
 
 	ret = sysfs_create_link(connector_kobj, i2c_kobj, i2c_kobj->name);
 	if (ret)
-		DRM_ERROR("Failed to create i2c symlink (%d)\n", ret);
+		drm_err(&i915->drm, "Failed to create i2c symlink (%d)\n", ret);
 }
 
 static void intel_hdmi_remove_i2c_symlink(struct drm_connector *connector)
@@ -2814,8 +2862,6 @@ intel_hdmi_connector_register(struct drm_connector *connector)
 	if (ret)
 		return ret;
 
-	intel_connector_debugfs_add(connector);
-
 	intel_hdmi_create_i2c_symlink(connector);
 
 	return ret;
@@ -2922,9 +2968,10 @@ bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
 	if (!sink_scrambling->supported)
 		return true;
 
-	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] scrambling=%s, TMDS bit clock ratio=1/%d\n",
-		      connector->base.id, connector->name,
-		      yesno(scrambling), high_tmds_clock_ratio ? 40 : 10);
+	drm_dbg_kms(&dev_priv->drm,
+		    "[CONNECTOR:%d:%s] scrambling=%s, TMDS bit clock ratio=1/%d\n",
+		    connector->base.id, connector->name,
+		    yesno(scrambling), high_tmds_clock_ratio ? 40 : 10);
 
 	/* Set TMDS bit clock ratio to 1/40 or 1/10, and enable/disable scrambling */
 	return drm_scdc_set_high_tmds_clock_ratio(adapter,
@@ -3066,8 +3113,9 @@ static u8 intel_hdmi_ddc_pin(struct intel_encoder *encoder)
 
 	ddc_pin = intel_bios_alternate_ddc_pin(encoder);
 	if (ddc_pin) {
-		DRM_DEBUG_KMS("Using DDC pin 0x%x for port %c (VBT)\n",
-			      ddc_pin, port_name(port));
+		drm_dbg_kms(&dev_priv->drm,
+			    "Using DDC pin 0x%x for port %c (VBT)\n",
+			    ddc_pin, port_name(port));
 		return ddc_pin;
 	}
 
@@ -3084,8 +3132,9 @@ static u8 intel_hdmi_ddc_pin(struct intel_encoder *encoder)
 	else
 		ddc_pin = g4x_port_to_ddc_pin(dev_priv, port);
 
-	DRM_DEBUG_KMS("Using DDC pin 0x%x for port %c (platform default)\n",
-		      ddc_pin, port_name(port));
+	drm_dbg_kms(&dev_priv->drm,
+		    "Using DDC pin 0x%x for port %c (platform default)\n",
+		    ddc_pin, port_name(port));
 
 	return ddc_pin;
 }
@@ -3142,8 +3191,9 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 	enum port port = intel_encoder->port;
 	struct cec_connector_info conn_info;
 
-	DRM_DEBUG_KMS("Adding HDMI connector on [ENCODER:%d:%s]\n",
-		      intel_encoder->base.base.id, intel_encoder->base.name);
+	drm_dbg_kms(&dev_priv->drm,
+		    "Adding HDMI connector on [ENCODER:%d:%s]\n",
+		    intel_encoder->base.base.id, intel_encoder->base.name);
 
 	if (INTEL_GEN(dev_priv) < 12 && drm_WARN_ON(dev, port == PORT_A))
 		return;
@@ -3187,7 +3237,8 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 		int ret = intel_hdcp_init(intel_connector,
 					  &intel_hdmi_hdcp_shim);
 		if (ret)
-			DRM_DEBUG_KMS("HDCP init failed, skipping.\n");
+			drm_dbg_kms(&dev_priv->drm,
+				    "HDCP init failed, skipping.\n");
 	}
 
 	/* For G4X desktop chip, PEG_BAND_GAP_DATA 3:0 must first be written
@@ -3206,16 +3257,16 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 		cec_notifier_conn_register(dev->dev, port_identifier(port),
 					   &conn_info);
 	if (!intel_hdmi->cec_notifier)
-		DRM_DEBUG_KMS("CEC notifier get failed\n");
+		drm_dbg_kms(&dev_priv->drm, "CEC notifier get failed\n");
 }
 
 static enum intel_hotplug_state
 intel_hdmi_hotplug(struct intel_encoder *encoder,
-		   struct intel_connector *connector, bool irq_received)
+		   struct intel_connector *connector)
 {
 	enum intel_hotplug_state state;
 
-	state = intel_encoder_hotplug(encoder, connector, irq_received);
+	state = intel_encoder_hotplug(encoder, connector);
 
 	/*
 	 * On many platforms the HDMI live state signal is known to be
@@ -3229,7 +3280,7 @@ intel_hdmi_hotplug(struct intel_encoder *encoder,
 	 * time around we didn't detect any change in the sink's connection
 	 * status.
 	 */
-	if (state == INTEL_HOTPLUG_UNCHANGED && irq_received)
+	if (state == INTEL_HOTPLUG_UNCHANGED && !connector->hotplug_retries)
 		state = INTEL_HOTPLUG_RETRY;
 
 	return state;
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c
index a091442efba4..4f6f560e093e 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -270,8 +270,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 
 enum intel_hotplug_state
 intel_encoder_hotplug(struct intel_encoder *encoder,
-		      struct intel_connector *connector,
-		      bool irq_received)
+		      struct intel_connector *connector)
 {
 	struct drm_device *dev = connector->base.dev;
 	enum drm_connector_status old_status;
@@ -392,12 +391,17 @@ static void i915_hotplug_work_func(struct work_struct *work)
 			struct intel_encoder *encoder =
 				intel_attached_encoder(connector);
 
+			if (hpd_event_bits & hpd_bit)
+				connector->hotplug_retries = 0;
+			else
+				connector->hotplug_retries++;
+
 			drm_dbg_kms(&dev_priv->drm,
-				    "Connector %s (pin %i) received hotplug event.\n",
-				    connector->base.name, pin);
+				    "Connector %s (pin %i) received hotplug event. (retry %d)\n",
+				    connector->base.name, pin,
+				    connector->hotplug_retries);
 
-			switch (encoder->hotplug(encoder, connector,
-						 hpd_event_bits & hpd_bit)) {
+			switch (encoder->hotplug(encoder, connector)) {
 			case INTEL_HOTPLUG_UNCHANGED:
 				break;
 			case INTEL_HOTPLUG_CHANGED:
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.h b/drivers/gpu/drm/i915/display/intel_hotplug.h
index 1e6b4fda2900..777b0743257e 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.h
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.h
@@ -15,8 +15,7 @@ enum port;
 
 void intel_hpd_poll_init(struct drm_i915_private *dev_priv);
 enum intel_hotplug_state intel_encoder_hotplug(struct intel_encoder *encoder,
-					       struct intel_connector *connector,
-					       bool irq_received);
+					       struct intel_connector *connector);
 void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 			   u32 pin_mask, u32 long_mask);
 void intel_hpd_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c
index d807c5648c87..6ff7b226f0a1 100644
--- a/drivers/gpu/drm/i915/display/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/display/intel_lspcon.c
@@ -522,7 +522,7 @@ u32 lspcon_infoframes_enabled(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *pipe_config)
 {
 	/* FIXME actually read this from the hw */
-	return enc_to_intel_lspcon(encoder)->active;
+	return 0;
 }
 
 void lspcon_resume(struct intel_lspcon *lspcon)
diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
index 9a067effcfa0..872f2a489339 100644
--- a/drivers/gpu/drm/i915/display/intel_lvds.c
+++ b/drivers/gpu/drm/i915/display/intel_lvds.c
@@ -220,7 +220,8 @@ static void intel_lvds_pps_init_hw(struct drm_i915_private *dev_priv,
 		       REG_FIELD_PREP(PP_REFERENCE_DIVIDER_MASK, pps->divider) | REG_FIELD_PREP(PANEL_POWER_CYCLE_DELAY_MASK, DIV_ROUND_UP(pps->t4, 1000) + 1));
 }
 
-static void intel_pre_enable_lvds(struct intel_encoder *encoder,
+static void intel_pre_enable_lvds(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *pipe_config,
 				  const struct drm_connector_state *conn_state)
 {
@@ -301,7 +302,8 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder,
 /*
  * Sets the power state for the panel.
  */
-static void intel_enable_lvds(struct intel_encoder *encoder,
+static void intel_enable_lvds(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *pipe_config,
 			      const struct drm_connector_state *conn_state)
 {
@@ -323,7 +325,8 @@ static void intel_enable_lvds(struct intel_encoder *encoder,
 	intel_panel_enable_backlight(pipe_config, conn_state);
 }
 
-static void intel_disable_lvds(struct intel_encoder *encoder,
+static void intel_disable_lvds(struct intel_atomic_state *state,
+			       struct intel_encoder *encoder,
 			       const struct intel_crtc_state *old_crtc_state,
 			       const struct drm_connector_state *old_conn_state)
 {
@@ -341,28 +344,31 @@ static void intel_disable_lvds(struct intel_encoder *encoder,
 	intel_de_posting_read(dev_priv, lvds_encoder->reg);
 }
 
-static void gmch_disable_lvds(struct intel_encoder *encoder,
+static void gmch_disable_lvds(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 
 {
 	intel_panel_disable_backlight(old_conn_state);
 
-	intel_disable_lvds(encoder, old_crtc_state, old_conn_state);
+	intel_disable_lvds(state, encoder, old_crtc_state, old_conn_state);
 }
 
-static void pch_disable_lvds(struct intel_encoder *encoder,
+static void pch_disable_lvds(struct intel_atomic_state *state,
+			     struct intel_encoder *encoder,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
 	intel_panel_disable_backlight(old_conn_state);
 }
 
-static void pch_post_disable_lvds(struct intel_encoder *encoder,
+static void pch_post_disable_lvds(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *old_crtc_state,
 				  const struct drm_connector_state *old_conn_state)
 {
-	intel_disable_lvds(encoder, old_crtc_state, old_conn_state);
+	intel_disable_lvds(state, encoder, old_crtc_state, old_conn_state);
 }
 
 static enum drm_mode_status
@@ -397,6 +403,7 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder,
 	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	unsigned int lvds_bpp;
+	int ret;
 
 	/* Should never happen!! */
 	if (INTEL_GEN(dev_priv) < 4 && intel_crtc->pipe == 0) {
@@ -430,16 +437,15 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder,
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return -EINVAL;
 
-	if (HAS_PCH_SPLIT(dev_priv)) {
+	if (HAS_PCH_SPLIT(dev_priv))
 		pipe_config->has_pch_encoder = true;
 
-		intel_pch_panel_fitting(intel_crtc, pipe_config,
-					conn_state->scaling_mode);
-	} else {
-		intel_gmch_panel_fitting(intel_crtc, pipe_config,
-					 conn_state->scaling_mode);
-
-	}
+	if (HAS_GMCH(dev_priv))
+		ret = intel_gmch_panel_fitting(pipe_config, conn_state);
+	else
+		ret = intel_pch_panel_fitting(pipe_config, conn_state);
+	if (ret)
+		return ret;
 
 	/*
 	 * XXX: It would be nice to support lower refresh rates on the
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 481187223101..66711e62fa71 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -281,7 +281,7 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
 	enum pipe pipe = overlay->crtc->pipe;
 	struct intel_frontbuffer *from = NULL, *to = NULL;
 
-	WARN_ON(overlay->old_vma);
+	drm_WARN_ON(&overlay->i915->drm, overlay->old_vma);
 
 	if (overlay->vma)
 		from = intel_frontbuffer_get(overlay->vma->obj);
@@ -350,7 +350,7 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay)
 	struct i915_vma *vma;
 
 	vma = fetch_and_zero(&overlay->old_vma);
-	if (WARN_ON(!vma))
+	if (drm_WARN_ON(&overlay->i915->drm, !vma))
 		return;
 
 	intel_frontbuffer_flip_complete(overlay->i915,
@@ -396,7 +396,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	struct i915_request *rq;
 	u32 *cs, flip_addr = overlay->flip_addr;
 
-	WARN_ON(!overlay->active);
+	drm_WARN_ON(&overlay->i915->drm, !overlay->active);
 
 	/* According to intel docs the overlay hw may hang (when switching
 	 * off) without loading the filter coeffs. It is however unclear whether
@@ -1342,7 +1342,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 	if (!HAS_OVERLAY(dev_priv))
 		return;
 
-	engine = dev_priv->engine[RCS0];
+	engine = dev_priv->gt.engine[RCS0];
 	if (!engine || !engine->kernel_context)
 		return;
 
diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c
index 276f43870802..3c5056dbf607 100644
--- a/drivers/gpu/drm/i915/display/intel_panel.c
+++ b/drivers/gpu/drm/i915/display/intel_panel.c
@@ -176,24 +176,23 @@ intel_panel_vbt_fixed_mode(struct intel_connector *connector)
 }
 
 /* adjusted_mode has been preset to be the panel's fixed mode */
-void
-intel_pch_panel_fitting(struct intel_crtc *intel_crtc,
-			struct intel_crtc_state *pipe_config,
-			int fitting_mode)
+int intel_pch_panel_fitting(struct intel_crtc_state *crtc_state,
+			    const struct drm_connector_state *conn_state)
 {
-	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
-	int x = 0, y = 0, width = 0, height = 0;
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->hw.adjusted_mode;
+	int x, y, width, height;
 
 	/* Native modes don't need fitting */
-	if (adjusted_mode->crtc_hdisplay == pipe_config->pipe_src_w &&
-	    adjusted_mode->crtc_vdisplay == pipe_config->pipe_src_h &&
-	    pipe_config->output_format != INTEL_OUTPUT_FORMAT_YCBCR420)
-		goto done;
+	if (adjusted_mode->crtc_hdisplay == crtc_state->pipe_src_w &&
+	    adjusted_mode->crtc_vdisplay == crtc_state->pipe_src_h &&
+	    crtc_state->output_format != INTEL_OUTPUT_FORMAT_YCBCR420)
+		return 0;
 
-	switch (fitting_mode) {
+	switch (conn_state->scaling_mode) {
 	case DRM_MODE_SCALE_CENTER:
-		width = pipe_config->pipe_src_w;
-		height = pipe_config->pipe_src_h;
+		width = crtc_state->pipe_src_w;
+		height = crtc_state->pipe_src_h;
 		x = (adjusted_mode->crtc_hdisplay - width + 1)/2;
 		y = (adjusted_mode->crtc_vdisplay - height + 1)/2;
 		break;
@@ -202,18 +201,18 @@ intel_pch_panel_fitting(struct intel_crtc *intel_crtc,
 		/* Scale but preserve the aspect ratio */
 		{
 			u32 scaled_width = adjusted_mode->crtc_hdisplay
-				* pipe_config->pipe_src_h;
-			u32 scaled_height = pipe_config->pipe_src_w
+				* crtc_state->pipe_src_h;
+			u32 scaled_height = crtc_state->pipe_src_w
 				* adjusted_mode->crtc_vdisplay;
 			if (scaled_width > scaled_height) { /* pillar */
-				width = scaled_height / pipe_config->pipe_src_h;
+				width = scaled_height / crtc_state->pipe_src_h;
 				if (width & 1)
 					width++;
 				x = (adjusted_mode->crtc_hdisplay - width + 1) / 2;
 				y = 0;
 				height = adjusted_mode->crtc_vdisplay;
 			} else if (scaled_width < scaled_height) { /* letter */
-				height = scaled_width / pipe_config->pipe_src_w;
+				height = scaled_width / crtc_state->pipe_src_w;
 				if (height & 1)
 				    height++;
 				y = (adjusted_mode->crtc_vdisplay - height + 1) / 2;
@@ -227,6 +226,10 @@ intel_pch_panel_fitting(struct intel_crtc *intel_crtc,
 		}
 		break;
 
+	case DRM_MODE_SCALE_NONE:
+		WARN_ON(adjusted_mode->crtc_hdisplay != crtc_state->pipe_src_w);
+		WARN_ON(adjusted_mode->crtc_vdisplay != crtc_state->pipe_src_h);
+		/* fall through */
 	case DRM_MODE_SCALE_FULLSCREEN:
 		x = y = 0;
 		width = adjusted_mode->crtc_hdisplay;
@@ -234,14 +237,15 @@ intel_pch_panel_fitting(struct intel_crtc *intel_crtc,
 		break;
 
 	default:
-		WARN(1, "bad panel fit mode: %d\n", fitting_mode);
-		return;
+		MISSING_CASE(conn_state->scaling_mode);
+		return -EINVAL;
 	}
 
-done:
-	pipe_config->pch_pfit.pos = (x << 16) | y;
-	pipe_config->pch_pfit.size = (width << 16) | height;
-	pipe_config->pch_pfit.enabled = pipe_config->pch_pfit.size != 0;
+	drm_rect_init(&crtc_state->pch_pfit.dst,
+		      x, y, width, height);
+	crtc_state->pch_pfit.enabled = true;
+
+	return 0;
 }
 
 static void
@@ -287,7 +291,7 @@ centre_vertically(struct drm_display_mode *adjusted_mode,
 	adjusted_mode->crtc_vsync_end = adjusted_mode->crtc_vsync_start + sync_width;
 }
 
-static inline u32 panel_fitter_scaling(u32 source, u32 target)
+static u32 panel_fitter_scaling(u32 source, u32 target)
 {
 	/*
 	 * Floating point operation is not supported. So the FACTOR
@@ -300,13 +304,14 @@ static inline u32 panel_fitter_scaling(u32 source, u32 target)
 	return (FACTOR * ratio + FACTOR/2) / FACTOR;
 }
 
-static void i965_scale_aspect(struct intel_crtc_state *pipe_config,
+static void i965_scale_aspect(struct intel_crtc_state *crtc_state,
 			      u32 *pfit_control)
 {
-	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->hw.adjusted_mode;
 	u32 scaled_width = adjusted_mode->crtc_hdisplay *
-		pipe_config->pipe_src_h;
-	u32 scaled_height = pipe_config->pipe_src_w *
+		crtc_state->pipe_src_h;
+	u32 scaled_height = crtc_state->pipe_src_w *
 		adjusted_mode->crtc_vdisplay;
 
 	/* 965+ is easy, it does everything in hw */
@@ -316,18 +321,18 @@ static void i965_scale_aspect(struct intel_crtc_state *pipe_config,
 	else if (scaled_width < scaled_height)
 		*pfit_control |= PFIT_ENABLE |
 			PFIT_SCALING_LETTER;
-	else if (adjusted_mode->crtc_hdisplay != pipe_config->pipe_src_w)
+	else if (adjusted_mode->crtc_hdisplay != crtc_state->pipe_src_w)
 		*pfit_control |= PFIT_ENABLE | PFIT_SCALING_AUTO;
 }
 
-static void i9xx_scale_aspect(struct intel_crtc_state *pipe_config,
+static void i9xx_scale_aspect(struct intel_crtc_state *crtc_state,
 			      u32 *pfit_control, u32 *pfit_pgm_ratios,
 			      u32 *border)
 {
-	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 	u32 scaled_width = adjusted_mode->crtc_hdisplay *
-		pipe_config->pipe_src_h;
-	u32 scaled_height = pipe_config->pipe_src_w *
+		crtc_state->pipe_src_h;
+	u32 scaled_height = crtc_state->pipe_src_w *
 		adjusted_mode->crtc_vdisplay;
 	u32 bits;
 
@@ -339,11 +344,11 @@ static void i9xx_scale_aspect(struct intel_crtc_state *pipe_config,
 	if (scaled_width > scaled_height) { /* pillar */
 		centre_horizontally(adjusted_mode,
 				    scaled_height /
-				    pipe_config->pipe_src_h);
+				    crtc_state->pipe_src_h);
 
 		*border = LVDS_BORDER_ENABLE;
-		if (pipe_config->pipe_src_h != adjusted_mode->crtc_vdisplay) {
-			bits = panel_fitter_scaling(pipe_config->pipe_src_h,
+		if (crtc_state->pipe_src_h != adjusted_mode->crtc_vdisplay) {
+			bits = panel_fitter_scaling(crtc_state->pipe_src_h,
 						    adjusted_mode->crtc_vdisplay);
 
 			*pfit_pgm_ratios |= (bits << PFIT_HORIZ_SCALE_SHIFT |
@@ -355,11 +360,11 @@ static void i9xx_scale_aspect(struct intel_crtc_state *pipe_config,
 	} else if (scaled_width < scaled_height) { /* letter */
 		centre_vertically(adjusted_mode,
 				  scaled_width /
-				  pipe_config->pipe_src_w);
+				  crtc_state->pipe_src_w);
 
 		*border = LVDS_BORDER_ENABLE;
-		if (pipe_config->pipe_src_w != adjusted_mode->crtc_hdisplay) {
-			bits = panel_fitter_scaling(pipe_config->pipe_src_w,
+		if (crtc_state->pipe_src_w != adjusted_mode->crtc_hdisplay) {
+			bits = panel_fitter_scaling(crtc_state->pipe_src_w,
 						    adjusted_mode->crtc_hdisplay);
 
 			*pfit_pgm_ratios |= (bits << PFIT_HORIZ_SCALE_SHIFT |
@@ -377,35 +382,35 @@ static void i9xx_scale_aspect(struct intel_crtc_state *pipe_config,
 	}
 }
 
-void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc,
-			      struct intel_crtc_state *pipe_config,
-			      int fitting_mode)
+int intel_gmch_panel_fitting(struct intel_crtc_state *crtc_state,
+			     const struct drm_connector_state *conn_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	u32 pfit_control = 0, pfit_pgm_ratios = 0, border = 0;
-	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 
 	/* Native modes don't need fitting */
-	if (adjusted_mode->crtc_hdisplay == pipe_config->pipe_src_w &&
-	    adjusted_mode->crtc_vdisplay == pipe_config->pipe_src_h)
+	if (adjusted_mode->crtc_hdisplay == crtc_state->pipe_src_w &&
+	    adjusted_mode->crtc_vdisplay == crtc_state->pipe_src_h)
 		goto out;
 
-	switch (fitting_mode) {
+	switch (conn_state->scaling_mode) {
 	case DRM_MODE_SCALE_CENTER:
 		/*
 		 * For centered modes, we have to calculate border widths &
 		 * heights and modify the values programmed into the CRTC.
 		 */
-		centre_horizontally(adjusted_mode, pipe_config->pipe_src_w);
-		centre_vertically(adjusted_mode, pipe_config->pipe_src_h);
+		centre_horizontally(adjusted_mode, crtc_state->pipe_src_w);
+		centre_vertically(adjusted_mode, crtc_state->pipe_src_h);
 		border = LVDS_BORDER_ENABLE;
 		break;
 	case DRM_MODE_SCALE_ASPECT:
 		/* Scale but preserve the aspect ratio */
 		if (INTEL_GEN(dev_priv) >= 4)
-			i965_scale_aspect(pipe_config, &pfit_control);
+			i965_scale_aspect(crtc_state, &pfit_control);
 		else
-			i9xx_scale_aspect(pipe_config, &pfit_control,
+			i9xx_scale_aspect(crtc_state, &pfit_control,
 					  &pfit_pgm_ratios, &border);
 		break;
 	case DRM_MODE_SCALE_FULLSCREEN:
@@ -413,8 +418,8 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc,
 		 * Full scaling, even if it changes the aspect ratio.
 		 * Fortunately this is all done for us in hw.
 		 */
-		if (pipe_config->pipe_src_h != adjusted_mode->crtc_vdisplay ||
-		    pipe_config->pipe_src_w != adjusted_mode->crtc_hdisplay) {
+		if (crtc_state->pipe_src_h != adjusted_mode->crtc_vdisplay ||
+		    crtc_state->pipe_src_w != adjusted_mode->crtc_hdisplay) {
 			pfit_control |= PFIT_ENABLE;
 			if (INTEL_GEN(dev_priv) >= 4)
 				pfit_control |= PFIT_SCALING_AUTO;
@@ -426,15 +431,14 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc,
 		}
 		break;
 	default:
-		drm_WARN(&dev_priv->drm, 1, "bad panel fit mode: %d\n",
-			 fitting_mode);
-		return;
+		MISSING_CASE(conn_state->scaling_mode);
+		return -EINVAL;
 	}
 
 	/* 965+ wants fuzzy fitting */
 	/* FIXME: handle multiple panels by failing gracefully */
 	if (INTEL_GEN(dev_priv) >= 4)
-		pfit_control |= PFIT_PIPE(intel_crtc->pipe) | PFIT_FILTER_FUZZY;
+		pfit_control |= PFIT_PIPE(crtc->pipe) | PFIT_FILTER_FUZZY;
 
 out:
 	if ((pfit_control & PFIT_ENABLE) == 0) {
@@ -443,12 +447,14 @@ out:
 	}
 
 	/* Make sure pre-965 set dither correctly for 18bpp panels. */
-	if (INTEL_GEN(dev_priv) < 4 && pipe_config->pipe_bpp == 18)
+	if (INTEL_GEN(dev_priv) < 4 && crtc_state->pipe_bpp == 18)
 		pfit_control |= PANEL_8TO6_DITHER_ENABLE;
 
-	pipe_config->gmch_pfit.control = pfit_control;
-	pipe_config->gmch_pfit.pgm_ratios = pfit_pgm_ratios;
-	pipe_config->gmch_pfit.lvds_border_bits = border;
+	crtc_state->gmch_pfit.control = pfit_control;
+	crtc_state->gmch_pfit.pgm_ratios = pfit_pgm_ratios;
+	crtc_state->gmch_pfit.lvds_border_bits = border;
+
+	return 0;
 }
 
 /**
@@ -483,20 +489,10 @@ static u32 scale(u32 source_val,
 	return target_val;
 }
 
-/* Scale user_level in range [0..user_max] to [hw_min..hw_max]. */
-static inline u32 scale_user_to_hw(struct intel_connector *connector,
-				   u32 user_level, u32 user_max)
-{
-	struct intel_panel *panel = &connector->panel;
-
-	return scale(user_level, 0, user_max,
-		     panel->backlight.min, panel->backlight.max);
-}
-
 /* Scale user_level in range [0..user_max] to [0..hw_max], clamping the result
  * to [hw_min..hw_max]. */
-static inline u32 clamp_user_to_hw(struct intel_connector *connector,
-				   u32 user_level, u32 user_max)
+static u32 clamp_user_to_hw(struct intel_connector *connector,
+			    u32 user_level, u32 user_max)
 {
 	struct intel_panel *panel = &connector->panel;
 	u32 hw_level;
@@ -508,8 +504,8 @@ static inline u32 clamp_user_to_hw(struct intel_connector *connector,
 }
 
 /* Scale hw_level in range [hw_min..hw_max] to [0..user_max]. */
-static inline u32 scale_hw_to_user(struct intel_connector *connector,
-				   u32 hw_level, u32 user_max)
+static u32 scale_hw_to_user(struct intel_connector *connector,
+			    u32 hw_level, u32 user_max)
 {
 	struct intel_panel *panel = &connector->panel;
 
@@ -684,9 +680,10 @@ static void
 intel_panel_actually_set_backlight(const struct drm_connector_state *conn_state, u32 level)
 {
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_panel *panel = &connector->panel;
 
-	DRM_DEBUG_DRIVER("set backlight PWM = %d\n", level);
+	drm_dbg_kms(&i915->drm, "set backlight PWM = %d\n", level);
 
 	level = intel_panel_compute_brightness(connector, level);
 	panel->backlight.set(conn_state, level);
@@ -867,8 +864,8 @@ void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_st
 	 * another client is not activated.
 	 */
 	if (dev_priv->drm.switch_power_state == DRM_SWITCH_POWER_CHANGING) {
-		drm_dbg(&dev_priv->drm,
-			"Skipping backlight disable on vga switch\n");
+		drm_dbg_kms(&dev_priv->drm,
+			    "Skipping backlight disable on vga switch\n");
 		return;
 	}
 
@@ -1244,10 +1241,20 @@ static u32 intel_panel_get_backlight(struct intel_connector *connector)
 
 	mutex_unlock(&dev_priv->backlight_lock);
 
-	drm_dbg(&dev_priv->drm, "get backlight PWM = %d\n", val);
+	drm_dbg_kms(&dev_priv->drm, "get backlight PWM = %d\n", val);
 	return val;
 }
 
+/* Scale user_level in range [0..user_max] to [hw_min..hw_max]. */
+static u32 scale_user_to_hw(struct intel_connector *connector,
+			    u32 user_level, u32 user_max)
+{
+	struct intel_panel *panel = &connector->panel;
+
+	return scale(user_level, 0, user_max,
+		     panel->backlight.min, panel->backlight.max);
+}
+
 /* set backlight brightness to level in range [0..max], scaling wrt hw min */
 static void intel_panel_set_backlight(const struct drm_connector_state *conn_state,
 				      u32 user_level, u32 user_max)
@@ -1335,6 +1342,7 @@ static const struct backlight_ops intel_backlight_device_ops = {
 
 int intel_backlight_device_register(struct intel_connector *connector)
 {
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_panel *panel = &connector->panel;
 	struct backlight_properties props;
 
@@ -1374,14 +1382,15 @@ int intel_backlight_device_register(struct intel_connector *connector)
 					  &intel_backlight_device_ops, &props);
 
 	if (IS_ERR(panel->backlight.device)) {
-		DRM_ERROR("Failed to register backlight: %ld\n",
-			  PTR_ERR(panel->backlight.device));
+		drm_err(&i915->drm, "Failed to register backlight: %ld\n",
+			PTR_ERR(panel->backlight.device));
 		panel->backlight.device = NULL;
 		return -ENODEV;
 	}
 
-	DRM_DEBUG_KMS("Connector %s backlight sysfs interface registered\n",
-		      connector->base.name);
+	drm_dbg_kms(&i915->drm,
+		    "Connector %s backlight sysfs interface registered\n",
+		    connector->base.name);
 
 	return 0;
 }
@@ -1931,7 +1940,8 @@ static int pwm_setup_backlight(struct intel_connector *connector,
 	return 0;
 }
 
-void intel_panel_update_backlight(struct intel_encoder *encoder,
+void intel_panel_update_backlight(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_panel.h b/drivers/gpu/drm/i915/display/intel_panel.h
index cedeea443336..968b95281cb4 100644
--- a/drivers/gpu/drm/i915/display/intel_panel.h
+++ b/drivers/gpu/drm/i915/display/intel_panel.h
@@ -25,19 +25,18 @@ int intel_panel_init(struct intel_panel *panel,
 void intel_panel_fini(struct intel_panel *panel);
 void intel_fixed_panel_mode(const struct drm_display_mode *fixed_mode,
 			    struct drm_display_mode *adjusted_mode);
-void intel_pch_panel_fitting(struct intel_crtc *crtc,
-			     struct intel_crtc_state *pipe_config,
-			     int fitting_mode);
-void intel_gmch_panel_fitting(struct intel_crtc *crtc,
-			      struct intel_crtc_state *pipe_config,
-			      int fitting_mode);
+int intel_pch_panel_fitting(struct intel_crtc_state *crtc_state,
+			    const struct drm_connector_state *conn_state);
+int intel_gmch_panel_fitting(struct intel_crtc_state *crtc_state,
+			     const struct drm_connector_state *conn_state);
 void intel_panel_set_backlight_acpi(const struct drm_connector_state *conn_state,
 				    u32 level, u32 max);
 int intel_panel_setup_backlight(struct drm_connector *connector,
 				enum pipe pipe);
 void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state);
-void intel_panel_update_backlight(struct intel_encoder *encoder,
+void intel_panel_update_backlight(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state);
 void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index fd9b146e3aba..b7a2c102648a 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -30,6 +30,7 @@
 #include "intel_display_types.h"
 #include "intel_psr.h"
 #include "intel_sprite.h"
+#include "intel_hdmi.h"
 
 /**
  * DOC: Panel Self Refresh (PSR/SRD)
@@ -137,41 +138,42 @@ static void psr_irq_control(struct drm_i915_private *dev_priv)
 	intel_de_write(dev_priv, imr_reg, val);
 }
 
-static void psr_event_print(u32 val, bool psr2_enabled)
+static void psr_event_print(struct drm_i915_private *i915,
+			    u32 val, bool psr2_enabled)
 {
-	DRM_DEBUG_KMS("PSR exit events: 0x%x\n", val);
+	drm_dbg_kms(&i915->drm, "PSR exit events: 0x%x\n", val);
 	if (val & PSR_EVENT_PSR2_WD_TIMER_EXPIRE)
-		DRM_DEBUG_KMS("\tPSR2 watchdog timer expired\n");
+		drm_dbg_kms(&i915->drm, "\tPSR2 watchdog timer expired\n");
 	if ((val & PSR_EVENT_PSR2_DISABLED) && psr2_enabled)
-		DRM_DEBUG_KMS("\tPSR2 disabled\n");
+		drm_dbg_kms(&i915->drm, "\tPSR2 disabled\n");
 	if (val & PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN)
-		DRM_DEBUG_KMS("\tSU dirty FIFO underrun\n");
+		drm_dbg_kms(&i915->drm, "\tSU dirty FIFO underrun\n");
 	if (val & PSR_EVENT_SU_CRC_FIFO_UNDERRUN)
-		DRM_DEBUG_KMS("\tSU CRC FIFO underrun\n");
+		drm_dbg_kms(&i915->drm, "\tSU CRC FIFO underrun\n");
 	if (val & PSR_EVENT_GRAPHICS_RESET)
-		DRM_DEBUG_KMS("\tGraphics reset\n");
+		drm_dbg_kms(&i915->drm, "\tGraphics reset\n");
 	if (val & PSR_EVENT_PCH_INTERRUPT)
-		DRM_DEBUG_KMS("\tPCH interrupt\n");
+		drm_dbg_kms(&i915->drm, "\tPCH interrupt\n");
 	if (val & PSR_EVENT_MEMORY_UP)
-		DRM_DEBUG_KMS("\tMemory up\n");
+		drm_dbg_kms(&i915->drm, "\tMemory up\n");
 	if (val & PSR_EVENT_FRONT_BUFFER_MODIFY)
-		DRM_DEBUG_KMS("\tFront buffer modification\n");
+		drm_dbg_kms(&i915->drm, "\tFront buffer modification\n");
 	if (val & PSR_EVENT_WD_TIMER_EXPIRE)
-		DRM_DEBUG_KMS("\tPSR watchdog timer expired\n");
+		drm_dbg_kms(&i915->drm, "\tPSR watchdog timer expired\n");
 	if (val & PSR_EVENT_PIPE_REGISTERS_UPDATE)
-		DRM_DEBUG_KMS("\tPIPE registers updated\n");
+		drm_dbg_kms(&i915->drm, "\tPIPE registers updated\n");
 	if (val & PSR_EVENT_REGISTER_UPDATE)
-		DRM_DEBUG_KMS("\tRegister updated\n");
+		drm_dbg_kms(&i915->drm, "\tRegister updated\n");
 	if (val & PSR_EVENT_HDCP_ENABLE)
-		DRM_DEBUG_KMS("\tHDCP enabled\n");
+		drm_dbg_kms(&i915->drm, "\tHDCP enabled\n");
 	if (val & PSR_EVENT_KVMR_SESSION_ENABLE)
-		DRM_DEBUG_KMS("\tKVMR session enabled\n");
+		drm_dbg_kms(&i915->drm, "\tKVMR session enabled\n");
 	if (val & PSR_EVENT_VBI_ENABLE)
-		DRM_DEBUG_KMS("\tVBI enabled\n");
+		drm_dbg_kms(&i915->drm, "\tVBI enabled\n");
 	if (val & PSR_EVENT_LPSP_MODE_EXIT)
-		DRM_DEBUG_KMS("\tLPSP mode exited\n");
+		drm_dbg_kms(&i915->drm, "\tLPSP mode exited\n");
 	if ((val & PSR_EVENT_PSR_DISABLE) && !psr2_enabled)
-		DRM_DEBUG_KMS("\tPSR disabled\n");
+		drm_dbg_kms(&i915->drm, "\tPSR disabled\n");
 }
 
 void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir)
@@ -209,7 +211,7 @@ void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir)
 
 			intel_de_write(dev_priv, PSR_EVENT(cpu_transcoder),
 				       val);
-			psr_event_print(val, psr2_enabled);
+			psr_event_print(dev_priv, val, psr2_enabled);
 		}
 	}
 
@@ -249,18 +251,21 @@ static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp)
 
 static u8 intel_dp_get_sink_sync_latency(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u8 val = 8; /* assume the worst if we can't read the value */
 
 	if (drm_dp_dpcd_readb(&intel_dp->aux,
 			      DP_SYNCHRONIZATION_LATENCY_IN_SINK, &val) == 1)
 		val &= DP_MAX_RESYNC_FRAME_COUNT_MASK;
 	else
-		DRM_DEBUG_KMS("Unable to get sink synchronization latency, assuming 8 frames\n");
+		drm_dbg_kms(&i915->drm,
+			    "Unable to get sink synchronization latency, assuming 8 frames\n");
 	return val;
 }
 
 static u16 intel_dp_get_su_x_granulartiy(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	u16 val;
 	ssize_t r;
 
@@ -273,7 +278,8 @@ static u16 intel_dp_get_su_x_granulartiy(struct intel_dp *intel_dp)
 
 	r = drm_dp_dpcd_read(&intel_dp->aux, DP_PSR2_SU_X_GRANULARITY, &val, 2);
 	if (r != 2)
-		DRM_DEBUG_KMS("Unable to read DP_PSR2_SU_X_GRANULARITY\n");
+		drm_dbg_kms(&i915->drm,
+			    "Unable to read DP_PSR2_SU_X_GRANULARITY\n");
 
 	/*
 	 * Spec says that if the value read is 0 the default granularity should
@@ -352,39 +358,6 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp)
 	}
 }
 
-static void intel_psr_setup_vsc(struct intel_dp *intel_dp,
-				const struct intel_crtc_state *crtc_state)
-{
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	struct dp_sdp psr_vsc;
-
-	if (dev_priv->psr.psr2_enabled) {
-		/* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */
-		memset(&psr_vsc, 0, sizeof(psr_vsc));
-		psr_vsc.sdp_header.HB0 = 0;
-		psr_vsc.sdp_header.HB1 = 0x7;
-		if (dev_priv->psr.colorimetry_support) {
-			psr_vsc.sdp_header.HB2 = 0x5;
-			psr_vsc.sdp_header.HB3 = 0x13;
-		} else {
-			psr_vsc.sdp_header.HB2 = 0x4;
-			psr_vsc.sdp_header.HB3 = 0xe;
-		}
-	} else {
-		/* Prepare VSC packet as per EDP 1.3 spec, Table 3.10 */
-		memset(&psr_vsc, 0, sizeof(psr_vsc));
-		psr_vsc.sdp_header.HB0 = 0;
-		psr_vsc.sdp_header.HB1 = 0x7;
-		psr_vsc.sdp_header.HB2 = 0x2;
-		psr_vsc.sdp_header.HB3 = 0x8;
-	}
-
-	intel_dig_port->write_infoframe(&intel_dig_port->base,
-					crtc_state,
-					DP_SDP_VSC, &psr_vsc, sizeof(psr_vsc));
-}
-
 static void hsw_psr_setup_aux(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
@@ -751,6 +724,8 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 	if (intel_dp != dev_priv->psr.dp)
 		return;
 
+	if (!psr_global_enabled(dev_priv))
+		return;
 	/*
 	 * HSW spec explicitly says PSR is tied to port A.
 	 * BDW+ platforms have a instance of PSR registers per transcoder but
@@ -793,6 +768,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 
 	crtc_state->has_psr = true;
 	crtc_state->has_psr2 = intel_psr2_config_valid(intel_dp, crtc_state);
+	crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_VSC);
 }
 
 static void intel_psr_activate(struct intel_dp *intel_dp)
@@ -875,9 +851,12 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp,
 }
 
 static void intel_psr_enable_locked(struct drm_i915_private *dev_priv,
-				    const struct intel_crtc_state *crtc_state)
+				    const struct intel_crtc_state *crtc_state,
+				    const struct drm_connector_state *conn_state)
 {
 	struct intel_dp *intel_dp = dev_priv->psr.dp;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct intel_encoder *encoder = &intel_dig_port->base;
 	u32 val;
 
 	drm_WARN_ON(&dev_priv->drm, dev_priv->psr.enabled);
@@ -916,7 +895,9 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv,
 
 	drm_dbg_kms(&dev_priv->drm, "Enabling PSR%s\n",
 		    dev_priv->psr.psr2_enabled ? "2" : "1");
-	intel_psr_setup_vsc(intel_dp, crtc_state);
+	intel_dp_compute_psr_vsc_sdp(intel_dp, crtc_state, conn_state,
+				     &dev_priv->psr.vsc);
+	intel_write_dp_vsc_sdp(encoder, crtc_state, &dev_priv->psr.vsc);
 	intel_psr_enable_sink(intel_dp);
 	intel_psr_enable_source(intel_dp, crtc_state);
 	dev_priv->psr.enabled = true;
@@ -928,11 +909,13 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv,
  * intel_psr_enable - Enable PSR
  * @intel_dp: Intel DP
  * @crtc_state: new CRTC state
+ * @conn_state: new CONNECTOR state
  *
  * This function can only be called after the pipe is fully trained and enabled.
  */
 void intel_psr_enable(struct intel_dp *intel_dp,
-		      const struct intel_crtc_state *crtc_state)
+		      const struct intel_crtc_state *crtc_state,
+		      const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 
@@ -953,7 +936,7 @@ void intel_psr_enable(struct intel_dp *intel_dp,
 		goto unlock;
 	}
 
-	intel_psr_enable_locked(dev_priv, crtc_state);
+	intel_psr_enable_locked(dev_priv, crtc_state, conn_state);
 
 unlock:
 	mutex_unlock(&dev_priv->psr.lock);
@@ -1086,13 +1069,15 @@ static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv)
  * intel_psr_update - Update PSR state
  * @intel_dp: Intel DP
  * @crtc_state: new CRTC state
+ * @conn_state: new CONNECTOR state
  *
  * This functions will update PSR states, disabling, enabling or switching PSR
  * version when executing fastsets. For full modeset, intel_psr_disable() and
  * intel_psr_enable() should be called instead.
  */
 void intel_psr_update(struct intel_dp *intel_dp,
-		      const struct intel_crtc_state *crtc_state)
+		      const struct intel_crtc_state *crtc_state,
+		      const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	struct i915_psr *psr = &dev_priv->psr;
@@ -1129,7 +1114,7 @@ void intel_psr_update(struct intel_dp *intel_dp,
 		intel_psr_disable_locked(intel_dp);
 
 	if (enable)
-		intel_psr_enable_locked(dev_priv, crtc_state);
+		intel_psr_enable_locked(dev_priv, crtc_state, conn_state);
 
 unlock:
 	mutex_unlock(&dev_priv->psr.lock);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index 274fc6bb6221..b4515186d5f4 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -17,11 +17,13 @@ struct intel_dp;
 #define CAN_PSR(dev_priv) (HAS_PSR(dev_priv) && dev_priv->psr.sink_support)
 void intel_psr_init_dpcd(struct intel_dp *intel_dp);
 void intel_psr_enable(struct intel_dp *intel_dp,
-		      const struct intel_crtc_state *crtc_state);
+		      const struct intel_crtc_state *crtc_state,
+		      const struct drm_connector_state *conn_state);
 void intel_psr_disable(struct intel_dp *intel_dp,
 		       const struct intel_crtc_state *old_crtc_state);
 void intel_psr_update(struct intel_dp *intel_dp,
-		      const struct intel_crtc_state *crtc_state);
+		      const struct intel_crtc_state *crtc_state,
+		      const struct drm_connector_state *conn_state);
 int intel_psr_debug_set(struct drm_i915_private *dev_priv, u64 value);
 void intel_psr_invalidate(struct drm_i915_private *dev_priv,
 			  unsigned frontbuffer_bits,
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index 637d8fe2f8c2..bc6c26818e15 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -1430,7 +1430,8 @@ static void intel_sdvo_update_props(struct intel_sdvo *intel_sdvo,
 #undef UPDATE_PROPERTY
 }
 
-static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder,
+static void intel_sdvo_pre_enable(struct intel_atomic_state *state,
+				  struct intel_encoder *intel_encoder,
 				  const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state)
 {
@@ -1727,7 +1728,8 @@ static void intel_sdvo_enable_audio(struct intel_sdvo *intel_sdvo,
 				   SDVO_AUDIO_PRESENCE_DETECT);
 }
 
-static void intel_disable_sdvo(struct intel_encoder *encoder,
+static void intel_disable_sdvo(struct intel_atomic_state *state,
+			       struct intel_encoder *encoder,
 			       const struct intel_crtc_state *old_crtc_state,
 			       const struct drm_connector_state *conn_state)
 {
@@ -1775,20 +1777,23 @@ static void intel_disable_sdvo(struct intel_encoder *encoder,
 	}
 }
 
-static void pch_disable_sdvo(struct intel_encoder *encoder,
+static void pch_disable_sdvo(struct intel_atomic_state *state,
+			     struct intel_encoder *encoder,
 			     const struct intel_crtc_state *old_crtc_state,
 			     const struct drm_connector_state *old_conn_state)
 {
 }
 
-static void pch_post_disable_sdvo(struct intel_encoder *encoder,
+static void pch_post_disable_sdvo(struct intel_atomic_state *state,
+				  struct intel_encoder *encoder,
 				  const struct intel_crtc_state *old_crtc_state,
 				  const struct drm_connector_state *old_conn_state)
 {
-	intel_disable_sdvo(encoder, old_crtc_state, old_conn_state);
+	intel_disable_sdvo(state, encoder, old_crtc_state, old_conn_state);
 }
 
-static void intel_enable_sdvo(struct intel_encoder *encoder,
+static void intel_enable_sdvo(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *pipe_config,
 			      const struct drm_connector_state *conn_state)
 {
@@ -1934,12 +1939,11 @@ static void intel_sdvo_enable_hotplug(struct intel_encoder *encoder)
 
 static enum intel_hotplug_state
 intel_sdvo_hotplug(struct intel_encoder *encoder,
-		   struct intel_connector *connector,
-		   bool irq_received)
+		   struct intel_connector *connector)
 {
 	intel_sdvo_enable_hotplug(encoder);
 
-	return intel_encoder_hotplug(encoder, connector, irq_received);
+	return intel_encoder_hotplug(encoder, connector);
 }
 
 static bool
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index 33d886141138..0000ec7055f7 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -2503,6 +2503,7 @@ static const u32 skl_plane_formats[] = {
 	DRM_FORMAT_YVYU,
 	DRM_FORMAT_UYVY,
 	DRM_FORMAT_VYUY,
+	DRM_FORMAT_XYUV8888,
 };
 
 static const u32 skl_planar_formats[] = {
@@ -2521,6 +2522,7 @@ static const u32 skl_planar_formats[] = {
 	DRM_FORMAT_UYVY,
 	DRM_FORMAT_VYUY,
 	DRM_FORMAT_NV12,
+	DRM_FORMAT_XYUV8888,
 };
 
 static const u32 glk_planar_formats[] = {
@@ -2539,6 +2541,7 @@ static const u32 glk_planar_formats[] = {
 	DRM_FORMAT_UYVY,
 	DRM_FORMAT_VYUY,
 	DRM_FORMAT_NV12,
+	DRM_FORMAT_XYUV8888,
 	DRM_FORMAT_P010,
 	DRM_FORMAT_P012,
 	DRM_FORMAT_P016,
@@ -2562,6 +2565,7 @@ static const u32 icl_sdr_y_plane_formats[] = {
 	DRM_FORMAT_Y210,
 	DRM_FORMAT_Y212,
 	DRM_FORMAT_Y216,
+	DRM_FORMAT_XYUV8888,
 	DRM_FORMAT_XVYU2101010,
 	DRM_FORMAT_XVYU12_16161616,
 	DRM_FORMAT_XVYU16161616,
@@ -2589,6 +2593,7 @@ static const u32 icl_sdr_uv_plane_formats[] = {
 	DRM_FORMAT_Y210,
 	DRM_FORMAT_Y212,
 	DRM_FORMAT_Y216,
+	DRM_FORMAT_XYUV8888,
 	DRM_FORMAT_XVYU2101010,
 	DRM_FORMAT_XVYU12_16161616,
 	DRM_FORMAT_XVYU16161616,
@@ -2620,6 +2625,7 @@ static const u32 icl_hdr_plane_formats[] = {
 	DRM_FORMAT_Y210,
 	DRM_FORMAT_Y212,
 	DRM_FORMAT_Y216,
+	DRM_FORMAT_XYUV8888,
 	DRM_FORMAT_XVYU2101010,
 	DRM_FORMAT_XVYU12_16161616,
 	DRM_FORMAT_XVYU16161616,
@@ -2790,6 +2796,7 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane,
 	case DRM_FORMAT_UYVY:
 	case DRM_FORMAT_VYUY:
 	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_XYUV8888:
 	case DRM_FORMAT_P010:
 	case DRM_FORMAT_P012:
 	case DRM_FORMAT_P016:
@@ -2860,6 +2867,7 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane,
 	case DRM_FORMAT_UYVY:
 	case DRM_FORMAT_VYUY:
 	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_XYUV8888:
 	case DRM_FORMAT_P010:
 	case DRM_FORMAT_P012:
 	case DRM_FORMAT_P016:
diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index 9b850c11aa78..b161c15baf86 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -34,6 +34,7 @@ tc_port_load_fia_params(struct drm_i915_private *i915,
 	if (INTEL_INFO(i915)->display.has_modular_fia) {
 		modular_fia = intel_uncore_read(&i915->uncore,
 						PORT_TX_DFLEXDPSP(FIA1));
+		drm_WARN_ON(&i915->drm, modular_fia == 0xffffffff);
 		modular_fia &= MODULAR_FIA_MASK;
 	} else {
 		modular_fia = 0;
@@ -52,6 +53,62 @@ tc_port_load_fia_params(struct drm_i915_private *i915,
 	}
 }
 
+static enum intel_display_power_domain
+tc_cold_get_power_domain(struct intel_digital_port *dig_port)
+{
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+
+	if (INTEL_GEN(i915) == 11)
+		return intel_legacy_aux_to_power_domain(dig_port->aux_ch);
+	else
+		return POWER_DOMAIN_TC_COLD_OFF;
+}
+
+static intel_wakeref_t
+tc_cold_block(struct intel_digital_port *dig_port)
+{
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+	enum intel_display_power_domain domain;
+
+	if (INTEL_GEN(i915) == 11 && !dig_port->tc_legacy_port)
+		return 0;
+
+	domain = tc_cold_get_power_domain(dig_port);
+	return intel_display_power_get(i915, domain);
+}
+
+static void
+tc_cold_unblock(struct intel_digital_port *dig_port, intel_wakeref_t wakeref)
+{
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+	enum intel_display_power_domain domain;
+
+	/*
+	 * wakeref == -1, means some error happened saving save_depot_stack but
+	 * power should still be put down and 0 is a invalid save_depot_stack
+	 * id so can be used to skip it for non TC legacy ports.
+	 */
+	if (wakeref == 0)
+		return;
+
+	domain = tc_cold_get_power_domain(dig_port);
+	intel_display_power_put_async(i915, domain, wakeref);
+}
+
+static void
+assert_tc_cold_blocked(struct intel_digital_port *dig_port)
+{
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+	bool enabled;
+
+	if (INTEL_GEN(i915) == 11 && !dig_port->tc_legacy_port)
+		return;
+
+	enabled = intel_display_power_is_enabled(i915,
+						 tc_cold_get_power_domain(dig_port));
+	drm_WARN_ON(&i915->drm, !enabled);
+}
+
 u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port)
 {
 	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
@@ -62,6 +119,7 @@ u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port)
 				      PORT_TX_DFLEXDPSP(dig_port->tc_phy_fia));
 
 	drm_WARN_ON(&i915->drm, lane_mask == 0xffffffff);
+	assert_tc_cold_blocked(dig_port);
 
 	lane_mask &= DP_LANE_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx);
 	return lane_mask >> DP_LANE_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx);
@@ -77,6 +135,7 @@ u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port)
 				     PORT_TX_DFLEXPA1(dig_port->tc_phy_fia));
 
 	drm_WARN_ON(&i915->drm, pin_mask == 0xffffffff);
+	assert_tc_cold_blocked(dig_port);
 
 	return (pin_mask & DP_PIN_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx)) >>
 	       DP_PIN_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx);
@@ -91,6 +150,8 @@ int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port)
 	if (dig_port->tc_mode != TC_PORT_DP_ALT)
 		return 4;
 
+	assert_tc_cold_blocked(dig_port);
+
 	lane_mask = 0;
 	with_intel_display_power(i915, POWER_DOMAIN_DISPLAY_CORE, wakeref)
 		lane_mask = intel_tc_port_get_lane_mask(dig_port);
@@ -123,6 +184,8 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port,
 	drm_WARN_ON(&i915->drm,
 		    lane_reversal && dig_port->tc_mode != TC_PORT_LEGACY);
 
+	assert_tc_cold_blocked(dig_port);
+
 	val = intel_uncore_read(uncore,
 				PORT_TX_DFLEXDPMLE1(dig_port->tc_phy_fia));
 	val &= ~DFLEXDPMLE1_DPMLETC_MASK(dig_port->tc_phy_fia_idx);
@@ -152,6 +215,7 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port,
 static void tc_port_fixup_legacy_flag(struct intel_digital_port *dig_port,
 				      u32 live_status_mask)
 {
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
 	u32 valid_hpd_mask;
 
 	if (dig_port->tc_legacy_port)
@@ -164,8 +228,9 @@ static void tc_port_fixup_legacy_flag(struct intel_digital_port *dig_port,
 		return;
 
 	/* If live status mismatches the VBT flag, trust the live status. */
-	DRM_ERROR("Port %s: live status %08x mismatches the legacy port flag, fix flag\n",
-		  dig_port->tc_port_name, live_status_mask);
+	drm_err(&i915->drm,
+		"Port %s: live status %08x mismatches the legacy port flag, fix flag\n",
+		dig_port->tc_port_name, live_status_mask);
 
 	dig_port->tc_legacy_port = !dig_port->tc_legacy_port;
 }
@@ -173,8 +238,8 @@ static void tc_port_fixup_legacy_flag(struct intel_digital_port *dig_port,
 static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port)
 {
 	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
-	enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
 	struct intel_uncore *uncore = &i915->uncore;
+	u32 isr_bit = i915->hotplug.pch_hpd[dig_port->base.hpd_pin];
 	u32 mask = 0;
 	u32 val;
 
@@ -193,7 +258,7 @@ static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port)
 	if (val & TC_LIVE_STATE_TC(dig_port->tc_phy_fia_idx))
 		mask |= BIT(TC_PORT_DP_ALT);
 
-	if (intel_uncore_read(uncore, SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port))
+	if (intel_uncore_read(uncore, SDEISR) & isr_bit)
 		mask |= BIT(TC_PORT_LEGACY);
 
 	/* The sink can be connected only in a single mode. */
@@ -233,8 +298,7 @@ static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port,
 	if (val == 0xffffffff) {
 		drm_dbg_kms(&i915->drm,
 			    "Port %s: PHY in TCCOLD, can't set safe-mode to %s\n",
-			    dig_port->tc_port_name,
-			      enableddisabled(enable));
+			    dig_port->tc_port_name, enableddisabled(enable));
 
 		return false;
 	}
@@ -286,11 +350,12 @@ static bool icl_tc_phy_is_in_safe_mode(struct intel_digital_port *dig_port)
 static void icl_tc_phy_connect(struct intel_digital_port *dig_port,
 			       int required_lanes)
 {
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
 	int max_lanes;
 
 	if (!icl_tc_phy_status_complete(dig_port)) {
-		DRM_DEBUG_KMS("Port %s: PHY not ready\n",
-			      dig_port->tc_port_name);
+		drm_dbg_kms(&i915->drm, "Port %s: PHY not ready\n",
+			    dig_port->tc_port_name);
 		goto out_set_tbt_alt_mode;
 	}
 
@@ -311,15 +376,16 @@ static void icl_tc_phy_connect(struct intel_digital_port *dig_port,
 	 * became disconnected. Not necessary for legacy mode.
 	 */
 	if (!(tc_port_live_status_mask(dig_port) & BIT(TC_PORT_DP_ALT))) {
-		DRM_DEBUG_KMS("Port %s: PHY sudden disconnect\n",
-			      dig_port->tc_port_name);
+		drm_dbg_kms(&i915->drm, "Port %s: PHY sudden disconnect\n",
+			    dig_port->tc_port_name);
 		goto out_set_safe_mode;
 	}
 
 	if (max_lanes < required_lanes) {
-		DRM_DEBUG_KMS("Port %s: PHY max lanes %d < required lanes %d\n",
-			      dig_port->tc_port_name,
-			      max_lanes, required_lanes);
+		drm_dbg_kms(&i915->drm,
+			    "Port %s: PHY max lanes %d < required lanes %d\n",
+			    dig_port->tc_port_name,
+			    max_lanes, required_lanes);
 		goto out_set_safe_mode;
 	}
 
@@ -357,15 +423,17 @@ static void icl_tc_phy_disconnect(struct intel_digital_port *dig_port)
 
 static bool icl_tc_phy_is_connected(struct intel_digital_port *dig_port)
 {
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+
 	if (!icl_tc_phy_status_complete(dig_port)) {
-		DRM_DEBUG_KMS("Port %s: PHY status not complete\n",
-			      dig_port->tc_port_name);
+		drm_dbg_kms(&i915->drm, "Port %s: PHY status not complete\n",
+			    dig_port->tc_port_name);
 		return dig_port->tc_mode == TC_PORT_TBT_ALT;
 	}
 
 	if (icl_tc_phy_is_in_safe_mode(dig_port)) {
-		DRM_DEBUG_KMS("Port %s: PHY still in safe mode\n",
-			      dig_port->tc_port_name);
+		drm_dbg_kms(&i915->drm, "Port %s: PHY still in safe mode\n",
+			    dig_port->tc_port_name);
 
 		return false;
 	}
@@ -415,9 +483,14 @@ static void intel_tc_port_reset_mode(struct intel_digital_port *dig_port,
 	enum tc_port_mode old_tc_mode = dig_port->tc_mode;
 
 	intel_display_power_flush_work(i915);
-	drm_WARN_ON(&i915->drm,
-		    intel_display_power_is_enabled(i915,
-					intel_aux_power_domain(dig_port)));
+	if (INTEL_GEN(i915) != 11 || !dig_port->tc_legacy_port) {
+		enum intel_display_power_domain aux_domain;
+		bool aux_powered;
+
+		aux_domain = intel_aux_power_domain(dig_port);
+		aux_powered = intel_display_power_is_enabled(i915, aux_domain);
+		drm_WARN_ON(&i915->drm, aux_powered);
+	}
 
 	icl_tc_phy_disconnect(dig_port);
 	icl_tc_phy_connect(dig_port, required_lanes);
@@ -438,10 +511,13 @@ intel_tc_port_link_init_refcount(struct intel_digital_port *dig_port,
 
 void intel_tc_port_sanitize(struct intel_digital_port *dig_port)
 {
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
 	struct intel_encoder *encoder = &dig_port->base;
+	intel_wakeref_t tc_cold_wref;
 	int active_links = 0;
 
 	mutex_lock(&dig_port->tc_lock);
+	tc_cold_wref = tc_cold_block(dig_port);
 
 	dig_port->tc_mode = intel_tc_port_get_current_mode(dig_port);
 	if (dig_port->dp.is_mst)
@@ -451,8 +527,9 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port)
 
 	if (active_links) {
 		if (!icl_tc_phy_is_connected(dig_port))
-			DRM_DEBUG_KMS("Port %s: PHY disconnected with %d active link(s)\n",
-				      dig_port->tc_port_name, active_links);
+			drm_dbg_kms(&i915->drm,
+				    "Port %s: PHY disconnected with %d active link(s)\n",
+				    dig_port->tc_port_name, active_links);
 		intel_tc_port_link_init_refcount(dig_port, active_links);
 
 		goto out;
@@ -462,10 +539,11 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port)
 		icl_tc_phy_connect(dig_port, 1);
 
 out:
-	DRM_DEBUG_KMS("Port %s: sanitize mode (%s)\n",
-		      dig_port->tc_port_name,
-		      tc_port_mode_name(dig_port->tc_mode));
+	drm_dbg_kms(&i915->drm, "Port %s: sanitize mode (%s)\n",
+		    dig_port->tc_port_name,
+		    tc_port_mode_name(dig_port->tc_mode));
 
+	tc_cold_unblock(dig_port, tc_cold_wref);
 	mutex_unlock(&dig_port->tc_lock);
 }
 
@@ -484,13 +562,19 @@ static bool intel_tc_port_needs_reset(struct intel_digital_port *dig_port)
  * connected ports are usable, and avoids exposing to the users objects they
  * can't really use.
  */
-bool intel_tc_port_connected(struct intel_digital_port *dig_port)
+bool intel_tc_port_connected(struct intel_encoder *encoder)
 {
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	bool is_connected;
+	intel_wakeref_t tc_cold_wref;
 
 	intel_tc_port_lock(dig_port);
+	tc_cold_wref = tc_cold_block(dig_port);
+
 	is_connected = tc_port_live_status_mask(dig_port) &
 		       BIT(dig_port->tc_mode);
+
+	tc_cold_unblock(dig_port, tc_cold_wref);
 	intel_tc_port_unlock(dig_port);
 
 	return is_connected;
@@ -506,9 +590,16 @@ static void __intel_tc_port_lock(struct intel_digital_port *dig_port,
 
 	mutex_lock(&dig_port->tc_lock);
 
-	if (!dig_port->tc_link_refcount &&
-	    intel_tc_port_needs_reset(dig_port))
-		intel_tc_port_reset_mode(dig_port, required_lanes);
+	if (!dig_port->tc_link_refcount) {
+		intel_wakeref_t tc_cold_wref;
+
+		tc_cold_wref = tc_cold_block(dig_port);
+
+		if (intel_tc_port_needs_reset(dig_port))
+			intel_tc_port_reset_mode(dig_port, required_lanes);
+
+		tc_cold_unblock(dig_port, tc_cold_wref);
+	}
 
 	drm_WARN_ON(&i915->drm, dig_port->tc_lock_wakeref);
 	dig_port->tc_lock_wakeref = wakeref;
diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h
index 463f1b3c836f..b619e4736f85 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.h
+++ b/drivers/gpu/drm/i915/display/intel_tc.h
@@ -10,8 +10,9 @@
 #include <linux/types.h>
 
 struct intel_digital_port;
+struct intel_encoder;
 
-bool intel_tc_port_connected(struct intel_digital_port *dig_port);
+bool intel_tc_port_connected(struct intel_encoder *encoder);
 u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port);
 u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port);
 int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port);
diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index d2e3a3a323e9..fbe12aad7d58 100644
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -914,7 +914,8 @@ intel_tv_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe)
 }
 
 static void
-intel_enable_tv(struct intel_encoder *encoder,
+intel_enable_tv(struct intel_atomic_state *state,
+		struct intel_encoder *encoder,
 		const struct intel_crtc_state *pipe_config,
 		const struct drm_connector_state *conn_state)
 {
@@ -930,7 +931,8 @@ intel_enable_tv(struct intel_encoder *encoder,
 }
 
 static void
-intel_disable_tv(struct intel_encoder *encoder,
+intel_disable_tv(struct intel_atomic_state *state,
+		 struct intel_encoder *encoder,
 		 const struct intel_crtc_state *old_crtc_state,
 		 const struct drm_connector_state *old_conn_state)
 {
@@ -1414,7 +1416,8 @@ static void set_color_conversion(struct drm_i915_private *dev_priv,
 		       (color_conversion->bv << 16) | color_conversion->av);
 }
 
-static void intel_tv_pre_enable(struct intel_encoder *encoder,
+static void intel_tv_pre_enable(struct intel_atomic_state *state,
+				struct intel_encoder *encoder,
 				const struct intel_crtc_state *pipe_config,
 				const struct drm_connector_state *conn_state)
 {
@@ -1698,13 +1701,13 @@ intel_tv_detect(struct drm_connector *connector,
 		struct drm_modeset_acquire_ctx *ctx,
 		bool force)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 	struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector));
 	enum drm_connector_status status;
 	int type;
 
-	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n",
-		      connector->base.id, connector->name,
-		      force);
+	drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] force=%d\n",
+		    connector->base.id, connector->name, force);
 
 	if (force) {
 		struct intel_load_detect_pipe tmp;
diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h
index 05c7cbe32eb4..aef7fe932d1a 100644
--- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h
+++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h
@@ -462,7 +462,7 @@ struct bdb_general_definitions {
 	 * number = (block_size - sizeof(bdb_general_definitions))/
 	 *	     defs->child_dev_size;
 	 */
-	u8 devices[0];
+	u8 devices[];
 } __packed;
 
 /*
@@ -839,7 +839,7 @@ struct bdb_mipi_config {
 
 struct bdb_mipi_sequence {
 	u8 version;
-	u8 data[0]; /* up to 6 variable length blocks */
+	u8 data[]; /* up to 6 variable length blocks */
 } __packed;
 
 /*
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index f4c362dc6e15..f582ab52f0b0 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -267,7 +267,6 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder,
 	struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi,
 						   base);
 	struct intel_connector *intel_connector = intel_dsi->attached_connector;
-	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
 	const struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
 	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	int ret;
@@ -279,11 +278,11 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder,
 		intel_fixed_panel_mode(fixed_mode, adjusted_mode);
 
 		if (HAS_GMCH(dev_priv))
-			intel_gmch_panel_fitting(crtc, pipe_config,
-						 conn_state->scaling_mode);
+			ret = intel_gmch_panel_fitting(pipe_config, conn_state);
 		else
-			intel_pch_panel_fitting(crtc, pipe_config,
-						conn_state->scaling_mode);
+			ret = intel_pch_panel_fitting(pipe_config, conn_state);
+		if (ret)
+			return ret;
 	}
 
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
@@ -759,7 +758,8 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder);
  * DSI port enable has to be done before pipe and plane enable, so we do it in
  * the pre_enable hook instead of the enable hook.
  */
-static void intel_dsi_pre_enable(struct intel_encoder *encoder,
+static void intel_dsi_pre_enable(struct intel_atomic_state *state,
+				 struct intel_encoder *encoder,
 				 const struct intel_crtc_state *pipe_config,
 				 const struct drm_connector_state *conn_state)
 {
@@ -858,11 +858,12 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder,
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
 }
 
-static void bxt_dsi_enable(struct intel_encoder *encoder,
+static void bxt_dsi_enable(struct intel_atomic_state *state,
+			   struct intel_encoder *encoder,
 			   const struct intel_crtc_state *crtc_state,
 			   const struct drm_connector_state *conn_state)
 {
-	WARN_ON(crtc_state->has_pch_encoder);
+	drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder);
 
 	intel_crtc_vblank_on(crtc_state);
 }
@@ -871,14 +872,16 @@ static void bxt_dsi_enable(struct intel_encoder *encoder,
  * DSI port disable has to be done after pipe and plane disable, so we do it in
  * the post_disable hook.
  */
-static void intel_dsi_disable(struct intel_encoder *encoder,
+static void intel_dsi_disable(struct intel_atomic_state *state,
+			      struct intel_encoder *encoder,
 			      const struct intel_crtc_state *old_crtc_state,
 			      const struct drm_connector_state *old_conn_state)
 {
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
 
-	DRM_DEBUG_KMS("\n");
+	drm_dbg_kms(&i915->drm, "\n");
 
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF);
 	intel_panel_disable_backlight(old_conn_state);
@@ -906,7 +909,8 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder)
 		vlv_dsi_clear_device_ready(encoder);
 }
 
-static void intel_dsi_post_disable(struct intel_encoder *encoder,
+static void intel_dsi_post_disable(struct intel_atomic_state *state,
+				   struct intel_encoder *encoder,
 				   const struct intel_crtc_state *old_crtc_state,
 				   const struct drm_connector_state *old_conn_state)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 34be4c0ee7c5..bc0223716906 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -108,7 +108,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	if (clflush) {
 		i915_sw_fence_await_reservation(&clflush->base.chain,
 						obj->base.resv, NULL, true,
-						I915_FENCE_TIMEOUT,
+						i915_fence_timeout(to_i915(obj->base.dev)),
 						I915_FENCE_GFP);
 		dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
 		dma_fence_work_commit(&clflush->base);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 0598e5382a1d..d3a86a4d5c04 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -6,7 +6,6 @@
 #include "i915_drv.h"
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
-#include "gt/intel_engine_pool.h"
 #include "i915_gem_client_blt.h"
 #include "i915_gem_object_blt.h"
 
@@ -289,8 +288,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_lock(obj);
 	err = i915_sw_fence_await_reservation(&work->wait,
-					      obj->base.resv, NULL,
-					      true, I915_FENCE_TIMEOUT,
+					      obj->base.resv, NULL, true, 0,
 					      I915_FENCE_GFP);
 	if (err < 0) {
 		dma_fence_set_error(&work->dma, err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 68326ad3b2e0..900ea8b7fc8f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -130,9 +130,7 @@ static void lut_close(struct i915_gem_context *ctx)
 		if (&lut->obj_link != &obj->lut_list) {
 			i915_lut_handle_free(lut);
 			radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
-			if (atomic_dec_and_test(&vma->open_count) &&
-			    !i915_vma_is_ggtt(vma))
-				i915_vma_close(vma);
+			i915_vma_close(vma);
 			i915_gem_object_put(obj);
 		}
 
@@ -570,23 +568,19 @@ static void engines_idle_release(struct i915_gem_context *ctx,
 	engines->ctx = i915_gem_context_get(ctx);
 
 	for_each_gem_engine(ce, engines, it) {
-		struct dma_fence *fence;
-		int err = 0;
+		int err;
 
 		/* serialises with execbuf */
 		set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
 		if (!intel_context_pin_if_active(ce))
 			continue;
 
-		fence = i915_active_fence_get(&ce->timeline->last_request);
-		if (fence) {
-			err = i915_sw_fence_await_dma_fence(&engines->fence,
-							    fence, 0,
-							    GFP_KERNEL);
-			dma_fence_put(fence);
-		}
+		/* Wait until context is finally scheduled out and retired */
+		err = i915_sw_fence_await_active(&engines->fence,
+						 &ce->active,
+						 I915_ACTIVE_AWAIT_BARRIER);
 		intel_context_unpin(ce);
-		if (err < 0)
+		if (err)
 			goto kill;
 	}
 
@@ -757,21 +751,46 @@ err_free:
 	return ERR_PTR(err);
 }
 
+static inline struct i915_gem_engines *
+__context_engines_await(const struct i915_gem_context *ctx)
+{
+	struct i915_gem_engines *engines;
+
+	rcu_read_lock();
+	do {
+		engines = rcu_dereference(ctx->engines);
+		GEM_BUG_ON(!engines);
+
+		if (unlikely(!i915_sw_fence_await(&engines->fence)))
+			continue;
+
+		if (likely(engines == rcu_access_pointer(ctx->engines)))
+			break;
+
+		i915_sw_fence_complete(&engines->fence);
+	} while (1);
+	rcu_read_unlock();
+
+	return engines;
+}
+
 static int
 context_apply_all(struct i915_gem_context *ctx,
 		  int (*fn)(struct intel_context *ce, void *data),
 		  void *data)
 {
 	struct i915_gem_engines_iter it;
+	struct i915_gem_engines *e;
 	struct intel_context *ce;
 	int err = 0;
 
-	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+	e = __context_engines_await(ctx);
+	for_each_gem_engine(ce, e, it) {
 		err = fn(ce, data);
 		if (err)
 			break;
 	}
-	i915_gem_context_unlock_engines(ctx);
+	i915_sw_fence_complete(&e->fence);
 
 	return err;
 }
@@ -786,11 +805,13 @@ static int __apply_ppgtt(struct intel_context *ce, void *vm)
 static struct i915_address_space *
 __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 {
-	struct i915_address_space *old = i915_gem_context_vm(ctx);
+	struct i915_address_space *old;
 
+	old = rcu_replace_pointer(ctx->vm,
+				  i915_vm_open(vm),
+				  lockdep_is_held(&ctx->mutex));
 	GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
 
-	rcu_assign_pointer(ctx->vm, i915_vm_open(vm));
 	context_apply_all(ctx, __apply_ppgtt, vm);
 
 	return old;
@@ -1069,30 +1090,6 @@ static void cb_retire(struct i915_active *base)
 	kfree(cb);
 }
 
-static inline struct i915_gem_engines *
-__context_engines_await(const struct i915_gem_context *ctx)
-{
-	struct i915_gem_engines *engines;
-
-	rcu_read_lock();
-	do {
-		engines = rcu_dereference(ctx->engines);
-		if (unlikely(!engines))
-			break;
-
-		if (unlikely(!i915_sw_fence_await(&engines->fence)))
-			continue;
-
-		if (likely(engines == rcu_access_pointer(ctx->engines)))
-			break;
-
-		i915_sw_fence_complete(&engines->fence);
-	} while (1);
-	rcu_read_unlock();
-
-	return engines;
-}
-
 I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
 static int context_barrier_task(struct i915_gem_context *ctx,
 				intel_engine_mask_t engines,
@@ -1401,10 +1398,10 @@ static int get_ringsize(struct i915_gem_context *ctx,
 	return 0;
 }
 
-static int
-user_to_context_sseu(struct drm_i915_private *i915,
-		     const struct drm_i915_gem_context_param_sseu *user,
-		     struct intel_sseu *context)
+int
+i915_gem_user_to_context_sseu(struct drm_i915_private *i915,
+			      const struct drm_i915_gem_context_param_sseu *user,
+			      struct intel_sseu *context)
 {
 	const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu;
 
@@ -1539,7 +1536,7 @@ static int set_sseu(struct i915_gem_context *ctx,
 		goto out_ce;
 	}
 
-	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+	ret = i915_gem_user_to_context_sseu(i915, &user_sseu, &sseu);
 	if (ret)
 		goto out_ce;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index f1d884d304bd..3702b2fb27ab 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -225,4 +225,8 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it);
 struct i915_lut_handle *i915_lut_handle_alloc(void);
 void i915_lut_handle_free(struct i915_lut_handle *lut);
 
+int i915_gem_user_to_context_sseu(struct drm_i915_private *i915,
+				  const struct drm_i915_gem_context_param_sseu *user,
+				  struct intel_sseu *context);
+
 #endif /* !__I915_GEM_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 4f96c8788a2e..7f76fc68f498 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -368,7 +368,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_vma *vma;
 
-	if (!atomic_read(&obj->bind_count))
+	if (list_empty(&obj->vma.list))
 		return;
 
 	mutex_lock(&i915->ggtt.vm.mutex);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b7440f06c5e2..3ce185670ca4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -15,8 +15,8 @@
 
 #include "gem/i915_gem_ioctls.h"
 #include "gt/intel_context.h"
-#include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_buffer_pool.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_ring.h"
 
@@ -40,6 +40,11 @@ struct eb_vma {
 	u32 handle;
 };
 
+struct eb_vma_array {
+	struct kref kref;
+	struct eb_vma vma[];
+};
+
 enum {
 	FORCE_CPU_RELOC = 1,
 	FORCE_GTT_RELOC,
@@ -52,7 +57,6 @@ enum {
 #define __EXEC_OBJECT_NEEDS_MAP		BIT(29)
 #define __EXEC_OBJECT_NEEDS_BIAS	BIT(28)
 #define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 28) /* all of the above */
-#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
 
 #define __EXEC_HAS_RELOC	BIT(31)
 #define __EXEC_INTERNAL_FLAGS	(~0u << 31)
@@ -264,7 +268,9 @@ struct i915_execbuffer {
 		bool has_fence : 1;
 		bool needs_unfenced : 1;
 
+		struct i915_vma *target;
 		struct i915_request *rq;
+		struct i915_vma *rq_vma;
 		u32 *rq_cmd;
 		unsigned int rq_size;
 	} reloc_cache;
@@ -283,6 +289,7 @@ struct i915_execbuffer {
 	 */
 	int lut_size;
 	struct hlist_head *buckets; /** ht for relocation handles */
+	struct eb_vma_array *array;
 };
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
@@ -292,8 +299,62 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 		 eb->args->batch_len);
 }
 
+static struct eb_vma_array *eb_vma_array_create(unsigned int count)
+{
+	struct eb_vma_array *arr;
+
+	arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN);
+	if (!arr)
+		return NULL;
+
+	kref_init(&arr->kref);
+	arr->vma[0].vma = NULL;
+
+	return arr;
+}
+
+static inline void eb_unreserve_vma(struct eb_vma *ev)
+{
+	struct i915_vma *vma = ev->vma;
+
+	if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+		__i915_vma_unpin_fence(vma);
+
+	if (ev->flags & __EXEC_OBJECT_HAS_PIN)
+		__i915_vma_unpin(vma);
+
+	ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
+		       __EXEC_OBJECT_HAS_FENCE);
+}
+
+static void eb_vma_array_destroy(struct kref *kref)
+{
+	struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref);
+	struct eb_vma *ev = arr->vma;
+
+	while (ev->vma) {
+		eb_unreserve_vma(ev);
+		i915_vma_put(ev->vma);
+		ev++;
+	}
+
+	kvfree(arr);
+}
+
+static void eb_vma_array_put(struct eb_vma_array *arr)
+{
+	kref_put(&arr->kref, eb_vma_array_destroy);
+}
+
 static int eb_create(struct i915_execbuffer *eb)
 {
+	/* Allocate an extra slot for use by the command parser + sentinel */
+	eb->array = eb_vma_array_create(eb->buffer_count + 2);
+	if (!eb->array)
+		return -ENOMEM;
+
+	eb->vma = eb->array->vma;
+
 	if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
 		unsigned int size = 1 + ilog2(eb->buffer_count);
 
@@ -327,8 +388,10 @@ static int eb_create(struct i915_execbuffer *eb)
 				break;
 		} while (--size);
 
-		if (unlikely(!size))
+		if (unlikely(!size)) {
+			eb_vma_array_put(eb->array);
 			return -ENOMEM;
+		}
 
 		eb->lut_size = size;
 	} else {
@@ -368,6 +431,32 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 	return false;
 }
 
+static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
+			unsigned int exec_flags)
+{
+	u64 pin_flags = 0;
+
+	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
+		pin_flags |= PIN_GLOBAL;
+
+	/*
+	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
+	 * limit address to the first 4GBs for unflagged objects.
+	 */
+	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+		pin_flags |= PIN_ZONE_4G;
+
+	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
+		pin_flags |= PIN_MAPPABLE;
+
+	if (exec_flags & EXEC_OBJECT_PINNED)
+		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
+	else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
+		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+
+	return pin_flags;
+}
+
 static inline bool
 eb_pin_vma(struct i915_execbuffer *eb,
 	   const struct drm_i915_gem_exec_object2 *entry,
@@ -385,8 +474,19 @@ eb_pin_vma(struct i915_execbuffer *eb,
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
 		pin_flags |= PIN_GLOBAL;
 
-	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
-		return false;
+	/* Attempt to reuse the current location if available */
+	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			return false;
+
+		/* Failing that pick any _free_ space if suitable */
+		if (unlikely(i915_vma_pin(vma,
+					  entry->pad_to_size,
+					  entry->alignment,
+					  eb_pin_flags(entry, ev->flags) |
+					  PIN_USER | PIN_NOEVICT)))
+			return false;
+	}
 
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
 		if (unlikely(i915_vma_pin_fence(vma))) {
@@ -402,26 +502,6 @@ eb_pin_vma(struct i915_execbuffer *eb,
 	return !eb_vma_misplaced(entry, vma, ev->flags);
 }
 
-static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
-{
-	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
-
-	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
-		__i915_vma_unpin_fence(vma);
-
-	__i915_vma_unpin(vma);
-}
-
-static inline void
-eb_unreserve_vma(struct eb_vma *ev)
-{
-	if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
-		return;
-
-	__eb_unreserve_vma(ev->vma, ev->flags);
-	ev->flags &= ~__EXEC_OBJECT_RESERVED;
-}
-
 static int
 eb_validate_vma(struct i915_execbuffer *eb,
 		struct drm_i915_gem_exec_object2 *entry,
@@ -481,7 +561,7 @@ eb_add_vma(struct i915_execbuffer *eb,
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 
-	ev->vma = i915_vma_get(vma);
+	ev->vma = vma;
 	ev->exec = entry;
 	ev->flags = entry->flags;
 
@@ -547,28 +627,9 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 			  u64 pin_flags)
 {
 	struct drm_i915_gem_exec_object2 *entry = ev->exec;
-	unsigned int exec_flags = ev->flags;
 	struct i915_vma *vma = ev->vma;
 	int err;
 
-	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
-		pin_flags |= PIN_GLOBAL;
-
-	/*
-	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
-	 * limit address to the first 4GBs for unflagged objects.
-	 */
-	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
-		pin_flags |= PIN_ZONE_4G;
-
-	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
-		pin_flags |= PIN_MAPPABLE;
-
-	if (exec_flags & EXEC_OBJECT_PINNED)
-		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
-	else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
-		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
-
 	if (drm_mm_node_allocated(&vma->node) &&
 	    eb_vma_misplaced(entry, vma, ev->flags)) {
 		err = i915_vma_unbind(vma);
@@ -578,7 +639,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 
 	err = i915_vma_pin(vma,
 			   entry->pad_to_size, entry->alignment,
-			   pin_flags);
+			   eb_pin_flags(entry, ev->flags) | pin_flags);
 	if (err)
 		return err;
 
@@ -587,7 +648,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 		eb->args->flags |= __EXEC_HAS_RELOC;
 	}
 
-	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
+	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
 		err = i915_vma_pin_fence(vma);
 		if (unlikely(err)) {
 			i915_vma_unpin(vma);
@@ -595,10 +656,10 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 		}
 
 		if (vma->fence)
-			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
+			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
-	ev->flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
+	ev->flags |= __EXEC_OBJECT_HAS_PIN;
 	GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
 
 	return 0;
@@ -728,77 +789,117 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	return 0;
 }
 
-static int eb_lookup_vmas(struct i915_execbuffer *eb)
+static int __eb_add_lut(struct i915_execbuffer *eb,
+			u32 handle, struct i915_vma *vma)
 {
-	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
-	struct drm_i915_gem_object *obj;
-	unsigned int i, batch;
+	struct i915_gem_context *ctx = eb->gem_context;
+	struct i915_lut_handle *lut;
 	int err;
 
-	if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
-		return -ENOENT;
+	lut = i915_lut_handle_alloc();
+	if (unlikely(!lut))
+		return -ENOMEM;
 
-	INIT_LIST_HEAD(&eb->relocs);
-	INIT_LIST_HEAD(&eb->unbound);
+	i915_vma_get(vma);
+	if (!atomic_fetch_inc(&vma->open_count))
+		i915_vma_reopen(vma);
+	lut->handle = handle;
+	lut->ctx = ctx;
+
+	/* Check that the context hasn't been closed in the meantime */
+	err = -EINTR;
+	if (!mutex_lock_interruptible(&ctx->mutex)) {
+		err = -ENOENT;
+		if (likely(!i915_gem_context_is_closed(ctx)))
+			err = radix_tree_insert(&ctx->handles_vma, handle, vma);
+		if (err == 0) { /* And nor has this handle */
+			struct drm_i915_gem_object *obj = vma->obj;
+
+			i915_gem_object_lock(obj);
+			if (idr_find(&eb->file->object_idr, handle) == obj) {
+				list_add(&lut->obj_link, &obj->lut_list);
+			} else {
+				radix_tree_delete(&ctx->handles_vma, handle);
+				err = -ENOENT;
+			}
+			i915_gem_object_unlock(obj);
+		}
+		mutex_unlock(&ctx->mutex);
+	}
+	if (unlikely(err))
+		goto err;
 
-	batch = eb_batch_index(eb);
+	return 0;
 
-	for (i = 0; i < eb->buffer_count; i++) {
-		u32 handle = eb->exec[i].handle;
-		struct i915_lut_handle *lut;
+err:
+	i915_vma_close(vma);
+	i915_vma_put(vma);
+	i915_lut_handle_free(lut);
+	return err;
+}
+
+static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
+{
+	do {
+		struct drm_i915_gem_object *obj;
 		struct i915_vma *vma;
+		int err;
 
-		vma = radix_tree_lookup(handles_vma, handle);
+		rcu_read_lock();
+		vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
 		if (likely(vma))
-			goto add_vma;
+			vma = i915_vma_tryget(vma);
+		rcu_read_unlock();
+		if (likely(vma))
+			return vma;
 
 		obj = i915_gem_object_lookup(eb->file, handle);
-		if (unlikely(!obj)) {
-			err = -ENOENT;
-			goto err_vma;
-		}
+		if (unlikely(!obj))
+			return ERR_PTR(-ENOENT);
 
 		vma = i915_vma_instance(obj, eb->context->vm, NULL);
 		if (IS_ERR(vma)) {
-			err = PTR_ERR(vma);
-			goto err_obj;
+			i915_gem_object_put(obj);
+			return vma;
 		}
 
-		lut = i915_lut_handle_alloc();
-		if (unlikely(!lut)) {
-			err = -ENOMEM;
-			goto err_obj;
-		}
+		err = __eb_add_lut(eb, handle, vma);
+		if (likely(!err))
+			return vma;
 
-		err = radix_tree_insert(handles_vma, handle, vma);
-		if (unlikely(err)) {
-			i915_lut_handle_free(lut);
-			goto err_obj;
-		}
+		i915_gem_object_put(obj);
+		if (err != -EEXIST)
+			return ERR_PTR(err);
+	} while (1);
+}
 
-		/* transfer ref to lut */
-		if (!atomic_fetch_inc(&vma->open_count))
-			i915_vma_reopen(vma);
-		lut->handle = handle;
-		lut->ctx = eb->gem_context;
+static int eb_lookup_vmas(struct i915_execbuffer *eb)
+{
+	unsigned int batch = eb_batch_index(eb);
+	unsigned int i;
+	int err = 0;
 
-		i915_gem_object_lock(obj);
-		list_add(&lut->obj_link, &obj->lut_list);
-		i915_gem_object_unlock(obj);
+	INIT_LIST_HEAD(&eb->relocs);
+	INIT_LIST_HEAD(&eb->unbound);
+
+	for (i = 0; i < eb->buffer_count; i++) {
+		struct i915_vma *vma;
+
+		vma = eb_lookup_vma(eb, eb->exec[i].handle);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			break;
+		}
 
-add_vma:
 		err = eb_validate_vma(eb, &eb->exec[i], vma);
-		if (unlikely(err))
-			goto err_vma;
+		if (unlikely(err)) {
+			i915_vma_put(vma);
+			break;
+		}
 
 		eb_add_vma(eb, i, batch, vma);
 	}
 
-	return 0;
-
-err_obj:
-	i915_gem_object_put(obj);
-err_vma:
 	eb->vma[i].vma = NULL;
 	return err;
 }
@@ -823,31 +924,13 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 	}
 }
 
-static void eb_release_vmas(const struct i915_execbuffer *eb)
-{
-	const unsigned int count = eb->buffer_count;
-	unsigned int i;
-
-	for (i = 0; i < count; i++) {
-		struct eb_vma *ev = &eb->vma[i];
-		struct i915_vma *vma = ev->vma;
-
-		if (!vma)
-			break;
-
-		eb->vma[i].vma = NULL;
-
-		if (ev->flags & __EXEC_OBJECT_HAS_PIN)
-			__eb_unreserve_vma(vma, ev->flags);
-
-		i915_vma_put(vma);
-	}
-}
-
 static void eb_destroy(const struct i915_execbuffer *eb)
 {
 	GEM_BUG_ON(eb->reloc_cache.rq);
 
+	if (eb->array)
+		eb_vma_array_put(eb->array);
+
 	if (eb->lut_size > 0)
 		kfree(eb->buckets);
 }
@@ -872,7 +955,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
 	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
 	cache->node.flags = 0;
 	cache->rq = NULL;
-	cache->rq_size = 0;
+	cache->target = NULL;
 }
 
 static inline void *unmask_page(unsigned long p)
@@ -894,29 +977,122 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 	return &i915->ggtt;
 }
 
-static void reloc_gpu_flush(struct reloc_cache *cache)
+#define RELOC_TAIL 4
+
+static int reloc_gpu_chain(struct reloc_cache *cache)
+{
+	struct intel_gt_buffer_pool_node *pool;
+	struct i915_request *rq = cache->rq;
+	struct i915_vma *batch;
+	u32 *cmd;
+	int err;
+
+	pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
+	if (IS_ERR(pool))
+		return PTR_ERR(pool);
+
+	batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto out_pool;
+	}
+
+	err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+	if (err)
+		goto out_pool;
+
+	GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE  / sizeof(u32));
+	cmd = cache->rq_cmd + cache->rq_size;
+	*cmd++ = MI_ARB_CHECK;
+	if (cache->gen >= 8)
+		*cmd++ = MI_BATCH_BUFFER_START_GEN8;
+	else if (cache->gen >= 6)
+		*cmd++ = MI_BATCH_BUFFER_START;
+	else
+		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
+	*cmd++ = lower_32_bits(batch->node.start);
+	*cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */
+	i915_gem_object_flush_map(cache->rq_vma->obj);
+	i915_gem_object_unpin_map(cache->rq_vma->obj);
+	cache->rq_vma = NULL;
+
+	err = intel_gt_buffer_pool_mark_active(pool, rq);
+	if (err == 0) {
+		i915_vma_lock(batch);
+		err = i915_request_await_object(rq, batch->obj, false);
+		if (err == 0)
+			err = i915_vma_move_to_active(batch, rq, 0);
+		i915_vma_unlock(batch);
+	}
+	i915_vma_unpin(batch);
+	if (err)
+		goto out_pool;
+
+	cmd = i915_gem_object_pin_map(batch->obj,
+				      cache->has_llc ?
+				      I915_MAP_FORCE_WB :
+				      I915_MAP_FORCE_WC);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto out_pool;
+	}
+
+	/* Return with batch mapping (cmd) still pinned */
+	cache->rq_cmd = cmd;
+	cache->rq_size = 0;
+	cache->rq_vma = batch;
+
+out_pool:
+	intel_gt_buffer_pool_put(pool);
+	return err;
+}
+
+static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
 {
-	struct drm_i915_gem_object *obj = cache->rq->batch->obj;
+	return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
+}
+
+static int reloc_gpu_flush(struct reloc_cache *cache)
+{
+	struct i915_request *rq;
+	int err;
 
-	GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
-	cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
+	rq = fetch_and_zero(&cache->rq);
+	if (!rq)
+		return 0;
 
-	__i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
-	i915_gem_object_unpin_map(obj);
+	if (cache->rq_vma) {
+		struct drm_i915_gem_object *obj = cache->rq_vma->obj;
 
-	intel_gt_chipset_flush(cache->rq->engine->gt);
+		GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
+		cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END;
 
-	i915_request_add(cache->rq);
-	cache->rq = NULL;
+		__i915_gem_object_flush_map(obj,
+					    0, sizeof(u32) * cache->rq_size);
+		i915_gem_object_unpin_map(obj);
+	}
+
+	err = 0;
+	if (rq->engine->emit_init_breadcrumb)
+		err = rq->engine->emit_init_breadcrumb(rq);
+	if (!err)
+		err = rq->engine->emit_bb_start(rq,
+						rq->batch->node.start,
+						PAGE_SIZE,
+						reloc_bb_flags(cache));
+	if (err)
+		i915_request_set_error_once(rq, err);
+
+	intel_gt_chipset_flush(rq->engine->gt);
+	i915_request_add(rq);
+
+	return err;
 }
 
 static void reloc_cache_reset(struct reloc_cache *cache)
 {
 	void *vaddr;
 
-	if (cache->rq)
-		reloc_gpu_flush(cache);
-
 	if (!cache->vaddr)
 		return;
 
@@ -1109,17 +1285,17 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 }
 
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
-			     struct i915_vma *vma,
+			     struct intel_engine_cs *engine,
 			     unsigned int len)
 {
 	struct reloc_cache *cache = &eb->reloc_cache;
-	struct intel_engine_pool_node *pool;
+	struct intel_gt_buffer_pool_node *pool;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	u32 *cmd;
 	int err;
 
-	pool = intel_engine_get_pool(eb->engine, PAGE_SIZE);
+	pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
 	if (IS_ERR(pool))
 		return PTR_ERR(pool);
 
@@ -1132,7 +1308,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto out_pool;
 	}
 
-	batch = i915_vma_instance(pool->obj, vma->vm, NULL);
+	batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
 		goto err_unmap;
@@ -1142,26 +1318,32 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto err_unmap;
 
-	rq = i915_request_create(eb->context);
+	if (engine == eb->context->engine) {
+		rq = i915_request_create(eb->context);
+	} else {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto err_unpin;
+		}
+
+		i915_vm_put(ce->vm);
+		ce->vm = i915_vm_get(eb->context->vm);
+
+		rq = intel_context_create_request(ce);
+		intel_context_put(ce);
+	}
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
 	}
 
-	err = intel_engine_pool_mark_active(pool, rq);
+	err = intel_gt_buffer_pool_mark_active(pool, rq);
 	if (err)
 		goto err_request;
 
-	err = reloc_move_to_gpu(rq, vma);
-	if (err)
-		goto err_request;
-
-	err = eb->engine->emit_bb_start(rq,
-					batch->node.start, PAGE_SIZE,
-					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
-	if (err)
-		goto skip_request;
-
 	i915_vma_lock(batch);
 	err = i915_request_await_object(rq, batch->obj, false);
 	if (err == 0)
@@ -1176,6 +1358,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	cache->rq = rq;
 	cache->rq_cmd = cmd;
 	cache->rq_size = 0;
+	cache->rq_vma = batch;
 
 	/* Return with batch mapping (cmd) still pinned */
 	goto out_pool;
@@ -1189,124 +1372,206 @@ err_unpin:
 err_unmap:
 	i915_gem_object_unpin_map(pool->obj);
 out_pool:
-	intel_engine_pool_put(pool);
+	intel_gt_buffer_pool_put(pool);
 	return err;
 }
 
+static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
+{
+	return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
+}
+
 static u32 *reloc_gpu(struct i915_execbuffer *eb,
 		      struct i915_vma *vma,
 		      unsigned int len)
 {
 	struct reloc_cache *cache = &eb->reloc_cache;
 	u32 *cmd;
-
-	if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
-		reloc_gpu_flush(cache);
+	int err;
 
 	if (unlikely(!cache->rq)) {
-		int err;
+		struct intel_engine_cs *engine = eb->engine;
 
-		if (!intel_engine_can_store_dword(eb->engine))
-			return ERR_PTR(-ENODEV);
+		if (!reloc_can_use_engine(engine)) {
+			engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
+			if (!engine)
+				return ERR_PTR(-ENODEV);
+		}
 
-		err = __reloc_gpu_alloc(eb, vma, len);
+		err = __reloc_gpu_alloc(eb, engine, len);
 		if (unlikely(err))
 			return ERR_PTR(err);
 	}
 
+	if (vma != cache->target) {
+		err = reloc_move_to_gpu(cache->rq, vma);
+		if (unlikely(err)) {
+			i915_request_set_error_once(cache->rq, err);
+			return ERR_PTR(err);
+		}
+
+		cache->target = vma;
+	}
+
+	if (unlikely(cache->rq_size + len >
+		     PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
+		err = reloc_gpu_chain(cache);
+		if (unlikely(err)) {
+			i915_request_set_error_once(cache->rq, err);
+			return ERR_PTR(err);
+		}
+	}
+
+	GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE  / sizeof(u32));
 	cmd = cache->rq_cmd + cache->rq_size;
 	cache->rq_size += len;
 
 	return cmd;
 }
 
-static u64
-relocate_entry(struct i915_vma *vma,
-	       const struct drm_i915_gem_relocation_entry *reloc,
-	       struct i915_execbuffer *eb,
-	       const struct i915_vma *target)
+static inline bool use_reloc_gpu(struct i915_vma *vma)
 {
-	u64 offset = reloc->offset;
-	u64 target_offset = relocation_target(reloc, target);
-	bool wide = eb->reloc_cache.use_64bit_reloc;
-	void *vaddr;
+	if (DBG_FORCE_RELOC == FORCE_GPU_RELOC)
+		return true;
 
-	if (!eb->reloc_cache.vaddr &&
-	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
-	     !dma_resv_test_signaled_rcu(vma->resv, true))) {
-		const unsigned int gen = eb->reloc_cache.gen;
-		unsigned int len;
-		u32 *batch;
-		u64 addr;
-
-		if (wide)
-			len = offset & 7 ? 8 : 5;
-		else if (gen >= 4)
-			len = 4;
-		else
-			len = 3;
+	if (DBG_FORCE_RELOC)
+		return false;
 
-		batch = reloc_gpu(eb, vma, len);
-		if (IS_ERR(batch))
-			goto repeat;
+	return !dma_resv_test_signaled_rcu(vma->resv, true);
+}
 
-		addr = gen8_canonical_addr(vma->node.start + offset);
-		if (wide) {
-			if (offset & 7) {
-				*batch++ = MI_STORE_DWORD_IMM_GEN4;
-				*batch++ = lower_32_bits(addr);
-				*batch++ = upper_32_bits(addr);
-				*batch++ = lower_32_bits(target_offset);
-
-				addr = gen8_canonical_addr(addr + 4);
-
-				*batch++ = MI_STORE_DWORD_IMM_GEN4;
-				*batch++ = lower_32_bits(addr);
-				*batch++ = upper_32_bits(addr);
-				*batch++ = upper_32_bits(target_offset);
-			} else {
-				*batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
-				*batch++ = lower_32_bits(addr);
-				*batch++ = upper_32_bits(addr);
-				*batch++ = lower_32_bits(target_offset);
-				*batch++ = upper_32_bits(target_offset);
-			}
-		} else if (gen >= 6) {
+static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
+{
+	struct page *page;
+	unsigned long addr;
+
+	GEM_BUG_ON(vma->pages != vma->obj->mm.pages);
+
+	page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT);
+	addr = PFN_PHYS(page_to_pfn(page));
+	GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */
+
+	return addr + offset_in_page(offset);
+}
+
+static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+			      struct i915_vma *vma,
+			      u64 offset,
+			      u64 target_addr)
+{
+	const unsigned int gen = eb->reloc_cache.gen;
+	unsigned int len;
+	u32 *batch;
+	u64 addr;
+
+	if (gen >= 8)
+		len = offset & 7 ? 8 : 5;
+	else if (gen >= 4)
+		len = 4;
+	else
+		len = 3;
+
+	batch = reloc_gpu(eb, vma, len);
+	if (IS_ERR(batch))
+		return false;
+
+	addr = gen8_canonical_addr(vma->node.start + offset);
+	if (gen >= 8) {
+		if (offset & 7) {
 			*batch++ = MI_STORE_DWORD_IMM_GEN4;
-			*batch++ = 0;
-			*batch++ = addr;
-			*batch++ = target_offset;
-		} else if (gen >= 4) {
-			*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-			*batch++ = 0;
-			*batch++ = addr;
-			*batch++ = target_offset;
+			*batch++ = lower_32_bits(addr);
+			*batch++ = upper_32_bits(addr);
+			*batch++ = lower_32_bits(target_addr);
+
+			addr = gen8_canonical_addr(addr + 4);
+
+			*batch++ = MI_STORE_DWORD_IMM_GEN4;
+			*batch++ = lower_32_bits(addr);
+			*batch++ = upper_32_bits(addr);
+			*batch++ = upper_32_bits(target_addr);
 		} else {
-			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-			*batch++ = addr;
-			*batch++ = target_offset;
+			*batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
+			*batch++ = lower_32_bits(addr);
+			*batch++ = upper_32_bits(addr);
+			*batch++ = lower_32_bits(target_addr);
+			*batch++ = upper_32_bits(target_addr);
 		}
-
-		goto out;
+	} else if (gen >= 6) {
+		*batch++ = MI_STORE_DWORD_IMM_GEN4;
+		*batch++ = 0;
+		*batch++ = addr;
+		*batch++ = target_addr;
+	} else if (IS_I965G(eb->i915)) {
+		*batch++ = MI_STORE_DWORD_IMM_GEN4;
+		*batch++ = 0;
+		*batch++ = vma_phys_addr(vma, offset);
+		*batch++ = target_addr;
+	} else if (gen >= 4) {
+		*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*batch++ = 0;
+		*batch++ = addr;
+		*batch++ = target_addr;
+	} else if (gen >= 3 &&
+		   !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) {
+		*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*batch++ = addr;
+		*batch++ = target_addr;
+	} else {
+		*batch++ = MI_STORE_DWORD_IMM;
+		*batch++ = vma_phys_addr(vma, offset);
+		*batch++ = target_addr;
 	}
 
+	return true;
+}
+
+static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+			    struct i915_vma *vma,
+			    u64 offset,
+			    u64 target_addr)
+{
+	if (eb->reloc_cache.vaddr)
+		return false;
+
+	if (!use_reloc_gpu(vma))
+		return false;
+
+	return __reloc_entry_gpu(eb, vma, offset, target_addr);
+}
+
+static u64
+relocate_entry(struct i915_vma *vma,
+	       const struct drm_i915_gem_relocation_entry *reloc,
+	       struct i915_execbuffer *eb,
+	       const struct i915_vma *target)
+{
+	u64 target_addr = relocation_target(reloc, target);
+	u64 offset = reloc->offset;
+
+	if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+		bool wide = eb->reloc_cache.use_64bit_reloc;
+		void *vaddr;
+
 repeat:
-	vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
-	if (IS_ERR(vaddr))
-		return PTR_ERR(vaddr);
+		vaddr = reloc_vaddr(vma->obj,
+				    &eb->reloc_cache,
+				    offset >> PAGE_SHIFT);
+		if (IS_ERR(vaddr))
+			return PTR_ERR(vaddr);
 
-	clflush_write32(vaddr + offset_in_page(offset),
-			lower_32_bits(target_offset),
-			eb->reloc_cache.vaddr);
+		GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+		clflush_write32(vaddr + offset_in_page(offset),
+				lower_32_bits(target_addr),
+				eb->reloc_cache.vaddr);
 
-	if (wide) {
-		offset += sizeof(u32);
-		target_offset >>= 32;
-		wide = false;
-		goto repeat;
+		if (wide) {
+			offset += sizeof(u32);
+			target_addr >>= 32;
+			wide = false;
+			goto repeat;
+		}
 	}
 
-out:
 	return target->node.start | UPDATE;
 }
 
@@ -1411,12 +1676,11 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
 {
 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
 	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
-	struct drm_i915_gem_relocation_entry __user *urelocs;
 	const struct drm_i915_gem_exec_object2 *entry = ev->exec;
-	unsigned int remain;
+	struct drm_i915_gem_relocation_entry __user *urelocs =
+		u64_to_user_ptr(entry->relocs_ptr);
+	unsigned long remain = entry->relocation_count;
 
-	urelocs = u64_to_user_ptr(entry->relocs_ptr);
-	remain = entry->relocation_count;
 	if (unlikely(remain > N_RELOC(ULONG_MAX)))
 		return -EINVAL;
 
@@ -1425,13 +1689,13 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
 	 * to read. However, if the array is not writable the user loses
 	 * the updated relocation values.
 	 */
-	if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs))))
+	if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs))))
 		return -EFAULT;
 
 	do {
 		struct drm_i915_gem_relocation_entry *r = stack;
 		unsigned int count =
-			min_t(unsigned int, remain, ARRAY_SIZE(stack));
+			min_t(unsigned long, remain, ARRAY_SIZE(stack));
 		unsigned int copied;
 
 		/*
@@ -1494,9 +1758,7 @@ static int eb_relocate(struct i915_execbuffer *eb)
 {
 	int err;
 
-	mutex_lock(&eb->gem_context->mutex);
 	err = eb_lookup_vmas(eb);
-	mutex_unlock(&eb->gem_context->mutex);
 	if (err)
 		return err;
 
@@ -1509,15 +1771,20 @@ static int eb_relocate(struct i915_execbuffer *eb)
 	/* The objects are in their final locations, apply the relocations. */
 	if (eb->args->flags & __EXEC_HAS_RELOC) {
 		struct eb_vma *ev;
+		int flush;
 
 		list_for_each_entry(ev, &eb->relocs, reloc_link) {
 			err = eb_relocate_vma(eb, ev);
 			if (err)
-				return err;
+				break;
 		}
+
+		flush = reloc_gpu_flush(&eb->reloc_cache);
+		if (!err)
+			err = flush;
 	}
 
-	return 0;
+	return err;
 }
 
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
@@ -1597,19 +1864,15 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 			err = i915_vma_move_to_active(vma, eb->request, flags);
 
 		i915_vma_unlock(vma);
-
-		__eb_unreserve_vma(vma, flags);
-		i915_vma_put(vma);
-
-		ev->vma = NULL;
+		eb_unreserve_vma(ev);
 	}
 	ww_acquire_fini(&acquire);
 
+	eb_vma_array_put(fetch_and_zero(&eb->array));
+
 	if (unlikely(err))
 		goto err_skip;
 
-	eb->exec = NULL;
-
 	/* Unconditionally flush any chipset caches (for streaming writes). */
 	intel_gt_chipset_flush(eb->engine->gt);
 	return 0;
@@ -1784,7 +2047,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 	dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
 	dma_resv_unlock(shadow->resv);
 
-	dma_fence_work_commit(&pw->base);
+	dma_fence_work_commit_imm(&pw->base);
 	return 0;
 
 err_batch_unlock:
@@ -1804,7 +2067,7 @@ err_free:
 static int eb_parse(struct i915_execbuffer *eb)
 {
 	struct drm_i915_private *i915 = eb->i915;
-	struct intel_engine_pool_node *pool;
+	struct intel_gt_buffer_pool_node *pool;
 	struct i915_vma *shadow, *trampoline;
 	unsigned int len;
 	int err;
@@ -1827,7 +2090,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 		len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
 	}
 
-	pool = intel_engine_get_pool(eb->engine, len);
+	pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
 	if (IS_ERR(pool))
 		return PTR_ERR(pool);
 
@@ -1861,6 +2124,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 	eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
 	eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
 	eb->batch = &eb->vma[eb->buffer_count++];
+	eb->vma[eb->buffer_count].vma = NULL;
 
 	eb->trampoline = trampoline;
 	eb->batch_start_offset = 0;
@@ -1874,7 +2138,7 @@ err_trampoline:
 err_shadow:
 	i915_vma_unpin(shadow);
 err:
-	intel_engine_pool_put(pool);
+	intel_gt_buffer_pool_put(pool);
 	return err;
 }
 
@@ -2318,39 +2582,13 @@ static void eb_request_add(struct i915_execbuffer *eb)
 	/* Check that the context wasn't destroyed before submission */
 	if (likely(!intel_context_is_closed(eb->context))) {
 		attr = eb->gem_context->sched;
-
-		/*
-		 * Boost actual workloads past semaphores!
-		 *
-		 * With semaphores we spin on one engine waiting for another,
-		 * simply to reduce the latency of starting our work when
-		 * the signaler completes. However, if there is any other
-		 * work that we could be doing on this engine instead, that
-		 * is better utilisation and will reduce the overall duration
-		 * of the current work. To avoid PI boosting a semaphore
-		 * far in the distance past over useful work, we keep a history
-		 * of any semaphore use along our dependency chain.
-		 */
-		if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
-			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-
-		/*
-		 * Boost priorities to new clients (new request flows).
-		 *
-		 * Allow interactive/synchronous clients to jump ahead of
-		 * the bulk clients. (FQ_CODEL)
-		 */
-		if (list_empty(&rq->sched.signalers_list))
-			attr.priority |= I915_PRIORITY_WAIT;
 	} else {
 		/* Serialise with context_close via the add_to_timeline */
 		i915_request_set_error_once(rq, -ENOENT);
 		__i915_request_skip(rq);
 	}
 
-	local_bh_disable();
 	__i915_request_queue(rq, &attr);
-	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 
 	/* Try to clean up the client's timeline after submitting the request */
 	if (prev)
@@ -2369,7 +2607,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
-	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
 	struct i915_vma *batch;
 	int out_fence_fd = -1;
@@ -2386,8 +2623,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		args->flags |= __EXEC_HAS_RELOC;
 
 	eb.exec = exec;
-	eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
-	eb.vma[0].vma = NULL;
 
 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
 	reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -2414,30 +2649,22 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (args->flags & I915_EXEC_IS_PINNED)
 		eb.batch_flags |= I915_DISPATCH_PINNED;
 
-	if (args->flags & I915_EXEC_FENCE_IN) {
+#define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT)
+	if (args->flags & IN_FENCES) {
+		if ((args->flags & IN_FENCES) == IN_FENCES)
+			return -EINVAL;
+
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
 		if (!in_fence)
 			return -EINVAL;
 	}
-
-	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
-		if (in_fence) {
-			err = -EINVAL;
-			goto err_in_fence;
-		}
-
-		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
-		if (!exec_fence) {
-			err = -EINVAL;
-			goto err_in_fence;
-		}
-	}
+#undef IN_FENCES
 
 	if (args->flags & I915_EXEC_FENCE_OUT) {
 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
 		if (out_fence_fd < 0) {
 			err = out_fence_fd;
-			goto err_exec_fence;
+			goto err_in_fence;
 		}
 	}
 
@@ -2528,14 +2755,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	}
 
 	if (in_fence) {
-		err = i915_request_await_dma_fence(eb.request, in_fence);
-		if (err < 0)
-			goto err_request;
-	}
-
-	if (exec_fence) {
-		err = i915_request_await_execution(eb.request, exec_fence,
-						   eb.engine->bond_execute);
+		if (args->flags & I915_EXEC_FENCE_SUBMIT)
+			err = i915_request_await_execution(eb.request,
+							   in_fence,
+							   eb.engine->bond_execute);
+		else
+			err = i915_request_await_dma_fence(eb.request,
+							   in_fence);
 		if (err < 0)
 			goto err_request;
 	}
@@ -2563,7 +2789,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 */
 	eb.request->batch = batch;
 	if (batch->private)
-		intel_engine_pool_mark_active(batch->private, eb.request);
+		intel_gt_buffer_pool_mark_active(batch->private, eb.request);
 
 	trace_i915_request_queue(eb.request, eb.batch_flags);
 	err = eb_submit(&eb, batch);
@@ -2592,10 +2818,8 @@ err_batch_unpin:
 		i915_vma_unpin(batch);
 err_parse:
 	if (batch->private)
-		intel_engine_pool_put(batch->private);
+		intel_gt_buffer_pool_put(batch->private);
 err_vma:
-	if (eb.exec)
-		eb_release_vmas(&eb);
 	if (eb.trampoline)
 		i915_vma_unpin(eb.trampoline);
 	eb_unpin_engine(&eb);
@@ -2606,8 +2830,6 @@ err_destroy:
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
-err_exec_fence:
-	dma_fence_put(exec_fence);
 err_in_fence:
 	dma_fence_put(in_fence);
 	return err;
@@ -2615,7 +2837,7 @@ err_in_fence:
 
 static size_t eb_element_size(void)
 {
-	return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
+	return sizeof(struct drm_i915_gem_exec_object2);
 }
 
 static bool check_buffer_count(size_t count)
@@ -2671,7 +2893,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 	/* Copy in the exec list from userland */
 	exec_list = kvmalloc_array(count, sizeof(*exec_list),
 				   __GFP_NOWARN | GFP_KERNEL);
-	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+	exec2_list = kvmalloc_array(count, eb_element_size(),
 				    __GFP_NOWARN | GFP_KERNEL);
 	if (exec_list == NULL || exec2_list == NULL) {
 		drm_dbg(&i915->drm,
@@ -2749,8 +2971,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		return err;
 
-	/* Allocate an extra slot for use by the command parser */
-	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+	exec2_list = kvmalloc_array(count, eb_element_size(),
 				    __GFP_NOWARN | GFP_KERNEL);
 	if (exec2_list == NULL) {
 		drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
@@ -2794,7 +3015,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		 * And this range already got effectively checked earlier
 		 * when we did the "copy_from_user()" above.
 		 */
-		if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
+		if (!user_write_access_begin(user_exec_list,
+					     count * sizeof(*user_exec_list)))
 			goto end;
 
 		for (i = 0; i < args->buffer_count; i++) {
@@ -2808,7 +3030,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 					end_user);
 		}
 end_user:
-		user_access_end();
+		user_write_access_end();
 end:;
 	}
 
@@ -2817,3 +3039,7 @@ end:;
 	kvfree(exec2_list);
 	return err;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_execbuffer.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
index 2f6100ec2608..8ab842c80f99 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
@@ -72,8 +72,8 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)
 		       0, 0);
 
 	if (i915_sw_fence_await_reservation(&stub->chain,
-					    obj->base.resv, NULL,
-					    true, I915_FENCE_TIMEOUT,
+					    obj->base.resv, NULL, true,
+					    i915_fence_timeout(to_i915(obj->base.dev)),
 					    I915_FENCE_GFP) < 0)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index b39c24dae64e..70f5f82da288 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -396,6 +396,38 @@ err:
 	return i915_error_to_vmf_fault(ret);
 }
 
+static int
+vm_access(struct vm_area_struct *area, unsigned long addr,
+	  void *buf, int len, int write)
+{
+	struct i915_mmap_offset *mmo = area->vm_private_data;
+	struct drm_i915_gem_object *obj = mmo->obj;
+	void *vaddr;
+
+	if (i915_gem_object_is_readonly(obj) && write)
+		return -EACCES;
+
+	addr -= area->vm_start;
+	if (addr >= obj->base.size)
+		return -EINVAL;
+
+	/* As this is primarily for debugging, let's focus on simplicity */
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	if (write) {
+		memcpy(vaddr + addr, buf, len);
+		__i915_gem_object_flush_map(obj, addr, len);
+	} else {
+		memcpy(buf, vaddr + addr, len);
+	}
+
+	i915_gem_object_unpin_map(obj);
+
+	return len;
+}
+
 void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -745,12 +777,14 @@ static void vm_close(struct vm_area_struct *vma)
 
 static const struct vm_operations_struct vm_ops_gtt = {
 	.fault = vm_fault_gtt,
+	.access = vm_access,
 	.open = vm_open,
 	.close = vm_close,
 };
 
 static const struct vm_operations_struct vm_ops_cpu = {
 	.fault = vm_fault_cpu,
+	.access = vm_access,
 	.open = vm_open,
 	.close = vm_close,
 };
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 5da9f9e534b9..99356c00c19e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -135,9 +135,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 		if (vma) {
 			GEM_BUG_ON(vma->obj != obj);
 			GEM_BUG_ON(!atomic_read(&vma->open_count));
-			if (atomic_dec_and_test(&vma->open_count) &&
-			    !i915_vma_is_ggtt(vma))
-				i915_vma_close(vma);
+			i915_vma_close(vma);
 		}
 		mutex_unlock(&ctx->mutex);
 
@@ -164,9 +162,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 				    struct llist_node *freed)
 {
 	struct drm_i915_gem_object *obj, *on;
-	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	llist_for_each_entry_safe(obj, on, freed, freed) {
 		struct i915_mmap_offset *mmo, *mn;
 
@@ -206,7 +202,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		}
 		obj->mmo.offsets = RB_ROOT;
 
-		GEM_BUG_ON(atomic_read(&obj->bind_count));
 		GEM_BUG_ON(obj->userfault_count);
 		GEM_BUG_ON(!list_empty(&obj->lut_list));
 
@@ -227,7 +222,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
 		cond_resched();
 	}
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }
 
 void i915_gem_flush_free_objects(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index e00792158f13..f457d7130491 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -6,8 +6,8 @@
 #include "i915_drv.h"
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
-#include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_buffer_pool.h"
 #include "gt/intel_ring.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_object_blt.h"
@@ -18,7 +18,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 {
 	struct drm_i915_private *i915 = ce->vm->i915;
 	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
-	struct intel_engine_pool_node *pool;
+	struct intel_gt_buffer_pool_node *pool;
 	struct i915_vma *batch;
 	u64 offset;
 	u64 count;
@@ -33,7 +33,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 	count = div_u64(round_up(vma->size, block_size), block_size);
 	size = (1 + 8 * count) * sizeof(u32);
 	size = round_up(size, PAGE_SIZE);
-	pool = intel_engine_get_pool(ce->engine, size);
+	pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
 	if (IS_ERR(pool)) {
 		err = PTR_ERR(pool);
 		goto out_pm;
@@ -78,10 +78,12 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 	} while (rem);
 
 	*cmd = MI_BATCH_BUFFER_END;
-	intel_gt_chipset_flush(ce->vm->gt);
 
+	i915_gem_object_flush_map(pool->obj);
 	i915_gem_object_unpin_map(pool->obj);
 
+	intel_gt_chipset_flush(ce->vm->gt);
+
 	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
@@ -96,7 +98,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 	return batch;
 
 out_put:
-	intel_engine_pool_put(pool);
+	intel_gt_buffer_pool_put(pool);
 out_pm:
 	intel_engine_pm_put(ce->engine);
 	return ERR_PTR(err);
@@ -114,13 +116,13 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
 	if (unlikely(err))
 		return err;
 
-	return intel_engine_pool_mark_active(vma->private, rq);
+	return intel_gt_buffer_pool_mark_active(vma->private, rq);
 }
 
 void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
 {
 	i915_vma_unpin(vma);
-	intel_engine_pool_put(vma->private);
+	intel_gt_buffer_pool_put(vma->private);
 	intel_engine_pm_put(ce->engine);
 }
 
@@ -213,7 +215,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 {
 	struct drm_i915_private *i915 = ce->vm->i915;
 	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
-	struct intel_engine_pool_node *pool;
+	struct intel_gt_buffer_pool_node *pool;
 	struct i915_vma *batch;
 	u64 src_offset, dst_offset;
 	u64 count, rem;
@@ -228,7 +230,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 	count = div_u64(round_up(dst->size, block_size), block_size);
 	size = (1 + 11 * count) * sizeof(u32);
 	size = round_up(size, PAGE_SIZE);
-	pool = intel_engine_get_pool(ce->engine, size);
+	pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
 	if (IS_ERR(pool)) {
 		err = PTR_ERR(pool);
 		goto out_pm;
@@ -289,10 +291,12 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 	} while (rem);
 
 	*cmd = MI_BATCH_BUFFER_END;
-	intel_gt_chipset_flush(ce->vm->gt);
 
+	i915_gem_object_flush_map(pool->obj);
 	i915_gem_object_unpin_map(pool->obj);
 
+	intel_gt_chipset_flush(ce->vm->gt);
+
 	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
@@ -307,7 +311,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 	return batch;
 
 out_put:
-	intel_engine_pool_put(pool);
+	intel_gt_buffer_pool_put(pool);
 out_pm:
 	intel_engine_pm_put(ce->engine);
 	return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
index 243a43a87824..8bcd336a90dc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -10,7 +10,6 @@
 
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
-#include "gt/intel_engine_pool.h"
 #include "i915_vma.h"
 
 struct drm_i915_gem_object;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index a0b10bcd8d8a..54ee658bb168 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -179,9 +179,6 @@ struct drm_i915_gem_object {
 #define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
 #define STRIDE_MASK (~TILING_MASK)
 
-	/** Count of VMA actually bound by this object */
-	atomic_t bind_count;
-
 	struct {
 		/*
 		 * Protects the pages and their use. Do not use directly, but
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 24f4cadea114..af9e48ee4a33 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -199,8 +199,6 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 	if (i915_gem_object_has_pinned_pages(obj))
 		return -EBUSY;
 
-	GEM_BUG_ON(atomic_read(&obj->bind_count));
-
 	/* May be called by shrinker from within get_pages() (on another bo) */
 	mutex_lock(&obj->mm.lock);
 	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
@@ -393,6 +391,7 @@ void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
 				     offset, size, obj->base.size));
 
+	wmb(); /* let all previous writes be visible to coherent partners */
 	obj->mm.dirty = true;
 
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 698e22420dc5..7fe9831aa9ba 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -10,8 +10,6 @@
 
 #include <drm/drm.h> /* for drm_legacy.h! */
 #include <drm/drm_cache.h>
-#include <drm/drm_legacy.h> /* for drm_pci.h! */
-#include <drm/drm_pci.h>
 
 #include "gt/intel_gt.h"
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 03e5eb4c99d1..5b65ce738b16 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -27,18 +27,6 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 		return false;
 
 	/*
-	 * Only report true if by unbinding the object and putting its pages
-	 * we can actually make forward progress towards freeing physical
-	 * pages.
-	 *
-	 * If the pages are pinned for any other reason than being bound
-	 * to the GPU, simply unbinding from the GPU is not going to succeed
-	 * in releasing our pin count on the pages themselves.
-	 */
-	if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count))
-		return false;
-
-	/*
 	 * We can only return physical pages to the system if we can either
 	 * discard the contents (because the user has marked them as being
 	 * purgeable) or if we can move their contents out to swap.
@@ -54,6 +42,8 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
 	flags = 0;
 	if (shrink & I915_SHRINK_ACTIVE)
 		flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
+	if (!(shrink & I915_SHRINK_BOUND))
+		flags = I915_GEM_OBJECT_UNBIND_TEST;
 
 	if (i915_gem_object_unbind(obj, flags) == 0)
 		__i915_gem_object_put_pages(obj);
@@ -194,10 +184,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
 			    i915_gem_object_is_framebuffer(obj))
 				continue;
 
-			if (!(shrink & I915_SHRINK_BOUND) &&
-			    atomic_read(&obj->bind_count))
-				continue;
-
 			if (!can_release_pages(obj))
 				continue;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 5557dfa83a7b..dc250278bd2c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -381,14 +381,14 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915)
 	mutex_init(&i915->mm.stolen_lock);
 
 	if (intel_vgpu_active(i915)) {
-		dev_notice(i915->drm.dev,
+		drm_notice(&i915->drm,
 			   "%s, disabling use of stolen memory\n",
 			   "iGVT-g active");
 		return 0;
 	}
 
 	if (intel_vtd_active() && INTEL_GEN(i915) < 8) {
-		dev_notice(i915->drm.dev,
+		drm_notice(&i915->drm,
 			   "%s, disabling use of stolen memory\n",
 			   "DMAR active");
 		return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 7ffd7afeb7a5..f80cf6ac20c5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -598,6 +598,14 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 				      GFP_KERNEL |
 				      __GFP_NORETRY |
 				      __GFP_NOWARN);
+		/*
+		 * Using __get_user_pages_fast() with a read-only
+		 * access is questionable. A read-only page may be
+		 * COW-broken, and then this might end up giving
+		 * the wrong side of the COW..
+		 *
+		 * We may or may not care.
+		 */
 		if (pvec) /* defer to worker if malloc fails */
 			pinned = __get_user_pages_fast(obj->userptr.ptr,
 						       num_pages,
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index fa16f2c3f3ac..2b46c6530da9 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -88,8 +88,7 @@ static void huge_put_pages(struct drm_i915_gem_object *obj,
 }
 
 static const struct drm_i915_gem_object_ops huge_ops = {
-	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
-		 I915_GEM_OBJECT_IS_SHRINKABLE,
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
 	.get_pages = huge_get_pages,
 	.put_pages = huge_put_pages,
 };
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index d4f94ca9ae0d..c9988b6d5c88 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -421,7 +421,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg)
 
 			err = i915_vma_pin(vma, 0, 0, PIN_USER);
 			if (err)
-				goto out_close;
+				goto out_put;
 
 			err = igt_check_page_sizes(vma);
 
@@ -432,8 +432,6 @@ static int igt_mock_exhaust_device_supported_pages(void *arg)
 			}
 
 			i915_vma_unpin(vma);
-			i915_vma_close(vma);
-
 			i915_gem_object_put(obj);
 
 			if (err)
@@ -443,8 +441,6 @@ static int igt_mock_exhaust_device_supported_pages(void *arg)
 
 	goto out_device;
 
-out_close:
-	i915_vma_close(vma);
 out_put:
 	i915_gem_object_put(obj);
 out_device:
@@ -492,7 +488,7 @@ static int igt_mock_memory_region_huge_pages(void *arg)
 
 			err = i915_vma_pin(vma, 0, 0, PIN_USER);
 			if (err)
-				goto out_close;
+				goto out_put;
 
 			err = igt_check_page_sizes(vma);
 			if (err)
@@ -515,8 +511,6 @@ static int igt_mock_memory_region_huge_pages(void *arg)
 			}
 
 			i915_vma_unpin(vma);
-			i915_vma_close(vma);
-
 			__i915_gem_object_put_pages(obj);
 			i915_gem_object_put(obj);
 		}
@@ -526,8 +520,6 @@ static int igt_mock_memory_region_huge_pages(void *arg)
 
 out_unpin:
 	i915_vma_unpin(vma);
-out_close:
-	i915_vma_close(vma);
 out_put:
 	i915_gem_object_put(obj);
 out_region:
@@ -587,10 +579,8 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 		}
 
 		err = i915_vma_pin(vma, 0, 0, flags);
-		if (err) {
-			i915_vma_close(vma);
+		if (err)
 			goto out_unpin;
-		}
 
 
 		err = igt_check_page_sizes(vma);
@@ -603,10 +593,8 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 
 		i915_vma_unpin(vma);
 
-		if (err) {
-			i915_vma_close(vma);
+		if (err)
 			goto out_unpin;
-		}
 
 		/*
 		 * Try all the other valid offsets until the next
@@ -615,16 +603,12 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 		 */
 		for (offset = 4096; offset < page_size; offset += 4096) {
 			err = i915_vma_unbind(vma);
-			if (err) {
-				i915_vma_close(vma);
+			if (err)
 				goto out_unpin;
-			}
 
 			err = i915_vma_pin(vma, 0, 0, flags | offset);
-			if (err) {
-				i915_vma_close(vma);
+			if (err)
 				goto out_unpin;
-			}
 
 			err = igt_check_page_sizes(vma);
 
@@ -636,10 +620,8 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 
 			i915_vma_unpin(vma);
 
-			if (err) {
-				i915_vma_close(vma);
+			if (err)
 				goto out_unpin;
-			}
 
 			if (igt_timeout(end_time,
 					"%s timed out at offset %x with page-size %x\n",
@@ -647,8 +629,6 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 				break;
 		}
 
-		i915_vma_close(vma);
-
 		i915_gem_object_unpin_pages(obj);
 		__i915_gem_object_put_pages(obj);
 		i915_gem_object_put(obj);
@@ -670,12 +650,6 @@ static void close_object_list(struct list_head *objects,
 	struct drm_i915_gem_object *obj, *on;
 
 	list_for_each_entry_safe(obj, on, objects, st_link) {
-		struct i915_vma *vma;
-
-		vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
-		if (!IS_ERR(vma))
-			i915_vma_close(vma);
-
 		list_del(&obj->st_link);
 		i915_gem_object_unpin_pages(obj);
 		__i915_gem_object_put_pages(obj);
@@ -912,7 +886,7 @@ static int igt_mock_ppgtt_64K(void *arg)
 
 			err = i915_vma_pin(vma, 0, 0, flags);
 			if (err)
-				goto out_vma_close;
+				goto out_object_unpin;
 
 			err = igt_check_page_sizes(vma);
 			if (err)
@@ -945,8 +919,6 @@ static int igt_mock_ppgtt_64K(void *arg)
 			}
 
 			i915_vma_unpin(vma);
-			i915_vma_close(vma);
-
 			i915_gem_object_unpin_pages(obj);
 			__i915_gem_object_put_pages(obj);
 			i915_gem_object_put(obj);
@@ -957,8 +929,6 @@ static int igt_mock_ppgtt_64K(void *arg)
 
 out_vma_unpin:
 	i915_vma_unpin(vma);
-out_vma_close:
-	i915_vma_close(vma);
 out_object_unpin:
 	i915_gem_object_unpin_pages(obj);
 out_object_put:
@@ -1070,7 +1040,7 @@ static int __igt_write_huge(struct intel_context *ce,
 
 	err = i915_vma_unbind(vma);
 	if (err)
-		goto out_vma_close;
+		return err;
 
 	err = i915_vma_pin(vma, size, 0, flags | offset);
 	if (err) {
@@ -1081,7 +1051,7 @@ static int __igt_write_huge(struct intel_context *ce,
 		if (err == -ENOSPC && i915_is_ggtt(ce->vm))
 			err = 0;
 
-		goto out_vma_close;
+		return err;
 	}
 
 	err = igt_check_page_sizes(vma);
@@ -1102,8 +1072,6 @@ static int __igt_write_huge(struct intel_context *ce,
 
 out_vma_unpin:
 	i915_vma_unpin(vma);
-out_vma_close:
-	__i915_vma_put(vma);
 	return err;
 }
 
@@ -1490,7 +1458,7 @@ static int igt_ppgtt_pin_update(void *arg)
 
 		err = i915_vma_pin(vma, SZ_2M, 0, flags);
 		if (err)
-			goto out_close;
+			goto out_put;
 
 		if (vma->page_sizes.sg < page_size) {
 			pr_info("Unable to allocate page-size %x, finishing test early\n",
@@ -1527,8 +1495,6 @@ static int igt_ppgtt_pin_update(void *arg)
 			goto out_unpin;
 
 		i915_vma_unpin(vma);
-		i915_vma_close(vma);
-
 		i915_gem_object_put(obj);
 	}
 
@@ -1546,7 +1512,7 @@ static int igt_ppgtt_pin_update(void *arg)
 
 	err = i915_vma_pin(vma, 0, 0, flags);
 	if (err)
-		goto out_close;
+		goto out_put;
 
 	/*
 	 * Make sure we don't end up with something like where the pde is still
@@ -1576,8 +1542,6 @@ static int igt_ppgtt_pin_update(void *arg)
 
 out_unpin:
 	i915_vma_unpin(vma);
-out_close:
-	i915_vma_close(vma);
 out_put:
 	i915_gem_object_put(obj);
 out_vm:
@@ -1629,13 +1593,11 @@ static int igt_tmpfs_fallback(void *arg)
 
 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
 	if (err)
-		goto out_close;
+		goto out_put;
 
 	err = igt_check_page_sizes(vma);
 
 	i915_vma_unpin(vma);
-out_close:
-	i915_vma_close(vma);
 out_put:
 	i915_gem_object_put(obj);
 out_restore:
@@ -1682,7 +1644,7 @@ static int igt_shrink_thp(void *arg)
 
 	err = i915_vma_pin(vma, 0, 0, flags);
 	if (err)
-		goto out_close;
+		goto out_put;
 
 	if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
 		pr_info("failed to allocate THP, finishing test early\n");
@@ -1706,7 +1668,7 @@ static int igt_shrink_thp(void *arg)
 	i915_gem_context_unlock_engines(ctx);
 	i915_vma_unpin(vma);
 	if (err)
-		goto out_close;
+		goto out_put;
 
 	/*
 	 * Now that the pages are *unpinned* shrink-all should invoke
@@ -1716,18 +1678,18 @@ static int igt_shrink_thp(void *arg)
 	if (i915_gem_object_has_pages(obj)) {
 		pr_err("shrink-all didn't truncate the pages\n");
 		err = -EINVAL;
-		goto out_close;
+		goto out_put;
 	}
 
 	if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) {
 		pr_err("residual page-size bits left\n");
 		err = -EINVAL;
-		goto out_close;
+		goto out_put;
 	}
 
 	err = i915_vma_pin(vma, 0, 0, flags);
 	if (err)
-		goto out_close;
+		goto out_put;
 
 	while (n--) {
 		err = cpu_check(obj, n, 0xdeadbeaf);
@@ -1737,8 +1699,6 @@ static int igt_shrink_thp(void *arg)
 
 out_unpin:
 	i915_vma_unpin(vma);
-out_close:
-	i915_vma_close(vma);
 out_put:
 	i915_gem_object_put(obj);
 out_vm:
@@ -1777,21 +1737,20 @@ int i915_gem_huge_page_mock_selftests(void)
 	if (!i915_vm_is_4lvl(&ppgtt->vm)) {
 		pr_err("failed to create 48b PPGTT\n");
 		err = -EINVAL;
-		goto out_close;
+		goto out_put;
 	}
 
 	/* If we were ever hit this then it's time to mock the 64K scratch */
 	if (!i915_vm_has_scratch_64K(&ppgtt->vm)) {
 		pr_err("PPGTT missing 64K scratch page\n");
 		err = -EINVAL;
-		goto out_close;
+		goto out_put;
 	}
 
 	err = i915_subtests(tests, ppgtt);
 
-out_close:
+out_put:
 	i915_vm_put(&ppgtt->vm);
-
 out_unlock:
 	drm_dev_put(&dev_priv->drm);
 	return err;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index b972be165e85..8fe3ad2ee34e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -7,9 +7,12 @@
 
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gpu_commands.h"
+#include "gem/i915_gem_lmem.h"
 
 #include "selftests/igt_flush_test.h"
 #include "selftests/mock_drm.h"
+#include "selftests/i915_random.h"
 #include "huge_gem_object.h"
 #include "mock_context.h"
 
@@ -127,10 +130,573 @@ static int igt_client_fill(void *arg)
 	} while (1);
 }
 
+#define WIDTH 512
+#define HEIGHT 32
+
+struct blit_buffer {
+	struct i915_vma *vma;
+	u32 start_val;
+	u32 tiling;
+};
+
+struct tiled_blits {
+	struct intel_context *ce;
+	struct blit_buffer buffers[3];
+	struct blit_buffer scratch;
+	struct i915_vma *batch;
+	u64 hole;
+	u32 width;
+	u32 height;
+};
+
+static int prepare_blit(const struct tiled_blits *t,
+			struct blit_buffer *dst,
+			struct blit_buffer *src,
+			struct drm_i915_gem_object *batch)
+{
+	const int gen = INTEL_GEN(to_i915(batch->base.dev));
+	bool use_64b_reloc = gen >= 8;
+	u32 src_pitch, dst_pitch;
+	u32 cmd, *cs;
+
+	cs = i915_gem_object_pin_map(batch, I915_MAP_WC);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
+	cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
+	if (src->tiling == I915_TILING_Y)
+		cmd |= BCS_SRC_Y;
+	if (dst->tiling == I915_TILING_Y)
+		cmd |= BCS_DST_Y;
+	*cs++ = cmd;
+
+	cmd = MI_FLUSH_DW;
+	if (gen >= 8)
+		cmd++;
+	*cs++ = cmd;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2);
+	if (gen >= 8)
+		cmd += 2;
+
+	src_pitch = t->width * 4;
+	if (src->tiling) {
+		cmd |= XY_SRC_COPY_BLT_SRC_TILED;
+		src_pitch /= 4;
+	}
+
+	dst_pitch = t->width * 4;
+	if (dst->tiling) {
+		cmd |= XY_SRC_COPY_BLT_DST_TILED;
+		dst_pitch /= 4;
+	}
+
+	*cs++ = cmd;
+	*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch;
+	*cs++ = 0;
+	*cs++ = t->height << 16 | t->width;
+	*cs++ = lower_32_bits(dst->vma->node.start);
+	if (use_64b_reloc)
+		*cs++ = upper_32_bits(dst->vma->node.start);
+	*cs++ = 0;
+	*cs++ = src_pitch;
+	*cs++ = lower_32_bits(src->vma->node.start);
+	if (use_64b_reloc)
+		*cs++ = upper_32_bits(src->vma->node.start);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(batch);
+	i915_gem_object_unpin_map(batch);
+
+	return 0;
+}
+
+static void tiled_blits_destroy_buffers(struct tiled_blits *t)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(t->buffers); i++)
+		i915_vma_put(t->buffers[i].vma);
+
+	i915_vma_put(t->scratch.vma);
+	i915_vma_put(t->batch);
+}
+
+static struct i915_vma *
+__create_vma(struct tiled_blits *t, size_t size, bool lmem)
+{
+	struct drm_i915_private *i915 = t->ce->vm->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	if (lmem)
+		obj = i915_gem_object_create_lmem(i915, size, 0);
+	else
+		obj = i915_gem_object_create_shmem(i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	vma = i915_vma_instance(obj, t->ce->vm, NULL);
+	if (IS_ERR(vma))
+		i915_gem_object_put(obj);
+
+	return vma;
+}
+
+static struct i915_vma *create_vma(struct tiled_blits *t, bool lmem)
+{
+	return __create_vma(t, PAGE_ALIGN(t->width * t->height * 4), lmem);
+}
+
+static int tiled_blits_create_buffers(struct tiled_blits *t,
+				      int width, int height,
+				      struct rnd_state *prng)
+{
+	struct drm_i915_private *i915 = t->ce->engine->i915;
+	int i;
+
+	t->width = width;
+	t->height = height;
+
+	t->batch = __create_vma(t, PAGE_SIZE, false);
+	if (IS_ERR(t->batch))
+		return PTR_ERR(t->batch);
+
+	t->scratch.vma = create_vma(t, false);
+	if (IS_ERR(t->scratch.vma)) {
+		i915_vma_put(t->batch);
+		return PTR_ERR(t->scratch.vma);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(t->buffers); i++) {
+		struct i915_vma *vma;
+
+		vma = create_vma(t, HAS_LMEM(i915) && i % 2);
+		if (IS_ERR(vma)) {
+			tiled_blits_destroy_buffers(t);
+			return PTR_ERR(vma);
+		}
+
+		t->buffers[i].vma = vma;
+		t->buffers[i].tiling =
+			i915_prandom_u32_max_state(I915_TILING_Y + 1, prng);
+	}
+
+	return 0;
+}
+
+static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val)
+{
+	int i;
+
+	t->scratch.start_val = val;
+	for (i = 0; i < t->width * t->height; i++)
+		vaddr[i] = val++;
+
+	i915_gem_object_flush_map(t->scratch.vma->obj);
+}
+
+static u64 swizzle_bit(unsigned int bit, u64 offset)
+{
+	return (offset & BIT_ULL(bit)) >> (bit - 6);
+}
+
+static u64 tiled_offset(const struct intel_gt *gt,
+			u64 v,
+			unsigned int stride,
+			unsigned int tiling)
+{
+	unsigned int swizzle;
+	u64 x, y;
+
+	if (tiling == I915_TILING_NONE)
+		return v;
+
+	y = div64_u64_rem(v, stride, &x);
+
+	if (tiling == I915_TILING_X) {
+		v = div64_u64_rem(y, 8, &y) * stride * 8;
+		v += y * 512;
+		v += div64_u64_rem(x, 512, &x) << 12;
+		v += x;
+
+		swizzle = gt->ggtt->bit_6_swizzle_x;
+	} else {
+		const unsigned int ytile_span = 16;
+		const unsigned int ytile_height = 512;
+
+		v = div64_u64_rem(y, 32, &y) * stride * 32;
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+
+		swizzle = gt->ggtt->bit_6_swizzle_y;
+	}
+
+	switch (swizzle) {
+	case I915_BIT_6_SWIZZLE_9:
+		v ^= swizzle_bit(9, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
+		break;
+	}
+
+	return v;
+}
+
+static const char *repr_tiling(int tiling)
+{
+	switch (tiling) {
+	case I915_TILING_NONE: return "linear";
+	case I915_TILING_X: return "X";
+	case I915_TILING_Y: return "Y";
+	default: return "unknown";
+	}
+}
+
+static int verify_buffer(const struct tiled_blits *t,
+			 struct blit_buffer *buf,
+			 struct rnd_state *prng)
+{
+	const u32 *vaddr;
+	int ret = 0;
+	int x, y, p;
+
+	x = i915_prandom_u32_max_state(t->width, prng);
+	y = i915_prandom_u32_max_state(t->height, prng);
+	p = y * t->width + x;
+
+	vaddr = i915_gem_object_pin_map(buf->vma->obj, I915_MAP_WC);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	if (vaddr[0] != buf->start_val) {
+		ret = -EINVAL;
+	} else {
+		u64 v = tiled_offset(buf->vma->vm->gt,
+				     p * 4, t->width * 4,
+				     buf->tiling);
+
+		if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p)
+			ret = -EINVAL;
+	}
+	if (ret) {
+		pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n",
+		       repr_tiling(buf->tiling),
+		       x, y, buf->start_val);
+		igt_hexdump(vaddr, 4096);
+	}
+
+	i915_gem_object_unpin_map(buf->vma->obj);
+	return ret;
+}
+
+static int move_to_active(struct i915_vma *vma,
+			  struct i915_request *rq,
+			  unsigned int flags)
+{
+	int err;
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, flags);
+	i915_vma_unlock(vma);
+
+	return err;
+}
+
+static int pin_buffer(struct i915_vma *vma, u64 addr)
+{
+	int err;
+
+	if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) {
+		err = i915_vma_unbind(vma);
+		if (err)
+			return err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED | addr);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int
+tiled_blit(struct tiled_blits *t,
+	   struct blit_buffer *dst, u64 dst_addr,
+	   struct blit_buffer *src, u64 src_addr)
+{
+	struct i915_request *rq;
+	int err;
+
+	err = pin_buffer(src->vma, src_addr);
+	if (err) {
+		pr_err("Cannot pin src @ %llx\n", src_addr);
+		return err;
+	}
+
+	err = pin_buffer(dst->vma, dst_addr);
+	if (err) {
+		pr_err("Cannot pin dst @ %llx\n", dst_addr);
+		goto err_src;
+	}
+
+	err = i915_vma_pin(t->batch, 0, 0, PIN_USER | PIN_HIGH);
+	if (err) {
+		pr_err("cannot pin batch\n");
+		goto err_dst;
+	}
+
+	err = prepare_blit(t, dst, src, t->batch->obj);
+	if (err)
+		goto err_bb;
+
+	rq = intel_context_create_request(t->ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_bb;
+	}
+
+	err = move_to_active(t->batch, rq, 0);
+	if (!err)
+		err = move_to_active(src->vma, rq, 0);
+	if (!err)
+		err = move_to_active(dst->vma, rq, 0);
+	if (!err)
+		err = rq->engine->emit_bb_start(rq,
+						t->batch->node.start,
+						t->batch->node.size,
+						0);
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	dst->start_val = src->start_val;
+err_bb:
+	i915_vma_unpin(t->batch);
+err_dst:
+	i915_vma_unpin(dst->vma);
+err_src:
+	i915_vma_unpin(src->vma);
+	return err;
+}
+
+static struct tiled_blits *
+tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng)
+{
+	struct drm_mm_node hole;
+	struct tiled_blits *t;
+	u64 hole_size;
+	int err;
+
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return ERR_PTR(-ENOMEM);
+
+	t->ce = intel_context_create(engine);
+	if (IS_ERR(t->ce)) {
+		err = PTR_ERR(t->ce);
+		goto err_free;
+	}
+
+	hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);
+	hole_size *= 2; /* room to maneuver */
+	hole_size += 2 * I915_GTT_MIN_ALIGNMENT;
+
+	mutex_lock(&t->ce->vm->mutex);
+	memset(&hole, 0, sizeof(hole));
+	err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole,
+					  hole_size, 0, I915_COLOR_UNEVICTABLE,
+					  0, U64_MAX,
+					  DRM_MM_INSERT_BEST);
+	if (!err)
+		drm_mm_remove_node(&hole);
+	mutex_unlock(&t->ce->vm->mutex);
+	if (err) {
+		err = -ENODEV;
+		goto err_put;
+	}
+
+	t->hole = hole.start + I915_GTT_MIN_ALIGNMENT;
+	pr_info("Using hole at %llx\n", t->hole);
+
+	err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng);
+	if (err)
+		goto err_put;
+
+	return t;
+
+err_put:
+	intel_context_put(t->ce);
+err_free:
+	kfree(t);
+	return ERR_PTR(err);
+}
+
+static void tiled_blits_destroy(struct tiled_blits *t)
+{
+	tiled_blits_destroy_buffers(t);
+
+	intel_context_put(t->ce);
+	kfree(t);
+}
+
+static int tiled_blits_prepare(struct tiled_blits *t,
+			       struct rnd_state *prng)
+{
+	u64 offset = PAGE_ALIGN(t->width * t->height * 4);
+	u32 *map;
+	int err;
+	int i;
+
+	map = i915_gem_object_pin_map(t->scratch.vma->obj, I915_MAP_WC);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	/* Use scratch to fill objects */
+	for (i = 0; i < ARRAY_SIZE(t->buffers); i++) {
+		fill_scratch(t, map, prandom_u32_state(prng));
+		GEM_BUG_ON(verify_buffer(t, &t->scratch, prng));
+
+		err = tiled_blit(t,
+				 &t->buffers[i], t->hole + offset,
+				 &t->scratch, t->hole);
+		if (err == 0)
+			err = verify_buffer(t, &t->buffers[i], prng);
+		if (err) {
+			pr_err("Failed to create buffer %d\n", i);
+			break;
+		}
+	}
+
+	i915_gem_object_unpin_map(t->scratch.vma->obj);
+	return err;
+}
+
+static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
+{
+	u64 offset =
+		round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT);
+	int err;
+
+	/* We want to check position invariant tiling across GTT eviction */
+
+	err = tiled_blit(t,
+			 &t->buffers[1], t->hole + offset / 2,
+			 &t->buffers[0], t->hole + 2 * offset);
+	if (err)
+		return err;
+
+	/* Reposition so that we overlap the old addresses, and slightly off */
+	err = tiled_blit(t,
+			 &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT,
+			 &t->buffers[1], t->hole + 3 * offset / 2);
+	if (err)
+		return err;
+
+	err = verify_buffer(t, &t->buffers[2], prng);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int __igt_client_tiled_blits(struct intel_engine_cs *engine,
+				    struct rnd_state *prng)
+{
+	struct tiled_blits *t;
+	int err;
+
+	t = tiled_blits_create(engine, prng);
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+
+	err = tiled_blits_prepare(t, prng);
+	if (err)
+		goto out;
+
+	err = tiled_blits_bounce(t, prng);
+	if (err)
+		goto out;
+
+out:
+	tiled_blits_destroy(t);
+	return err;
+}
+
+static bool has_bit17_swizzle(int sw)
+{
+	return (sw == I915_BIT_6_SWIZZLE_9_10_17 ||
+		sw == I915_BIT_6_SWIZZLE_9_17);
+}
+
+static bool bad_swizzling(struct drm_i915_private *i915)
+{
+	struct i915_ggtt *ggtt = &i915->ggtt;
+
+	if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
+		return true;
+
+	if (has_bit17_swizzle(ggtt->bit_6_swizzle_x) ||
+	    has_bit17_swizzle(ggtt->bit_6_swizzle_y))
+		return true;
+
+	return false;
+}
+
+static int igt_client_tiled_blits(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	I915_RND_STATE(prng);
+	int inst = 0;
+
+	/* Test requires explicit BLT tiling controls */
+	if (INTEL_GEN(i915) < 4)
+		return 0;
+
+	if (bad_swizzling(i915)) /* Requires sane (sub-page) swizzling */
+		return 0;
+
+	do {
+		struct intel_engine_cs *engine;
+		int err;
+
+		engine = intel_engine_lookup_user(i915,
+						  I915_ENGINE_CLASS_COPY,
+						  inst++);
+		if (!engine)
+			return 0;
+
+		err = __igt_client_tiled_blits(engine, &prng);
+		if (err == -ENODEV)
+			err = 0;
+		if (err)
+			return err;
+	} while (1);
+}
+
 int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_client_fill),
+		SUBTEST(igt_client_tiled_blits),
 	};
 
 	if (intel_gt_is_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 3f6079e1dfb6..87d7d8aa080f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -158,6 +158,8 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
 		return PTR_ERR(map);
 
 	map[offset / sizeof(*map)] = v;
+
+	__i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map));
 	i915_gem_object_unpin_map(ctx->obj);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 54b86cf7f5d2..b81978890641 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -972,12 +972,6 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 		goto err_batch;
 	}
 
-	err = rq->engine->emit_bb_start(rq,
-					batch->node.start, batch->node.size,
-					0);
-	if (err)
-		goto err_request;
-
 	i915_vma_lock(batch);
 	err = i915_request_await_object(rq, batch->obj, false);
 	if (err == 0)
@@ -994,6 +988,18 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	if (err)
 		goto skip_request;
 
+	if (rq->engine->emit_init_breadcrumb) {
+		err = rq->engine->emit_init_breadcrumb(rq);
+		if (err)
+			goto skip_request;
+	}
+
+	err = rq->engine->emit_bb_start(rq,
+					batch->node.start, batch->node.size,
+					0);
+	if (err)
+		goto skip_request;
+
 	i915_vma_unpin_and_release(&batch, 0);
 	i915_vma_unpin(vma);
 
@@ -1005,7 +1011,6 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 
 skip_request:
 	i915_request_set_error_once(rq, err);
-err_request:
 	i915_request_add(rq);
 err_batch:
 	i915_vma_unpin_and_release(&batch, 0);
@@ -1541,10 +1546,6 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 		goto err_unpin;
 	}
 
-	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
-	if (err)
-		goto err_request;
-
 	i915_vma_lock(vma);
 	err = i915_request_await_object(rq, vma->obj, false);
 	if (err == 0)
@@ -1553,6 +1554,16 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto skip_request;
 
+	if (rq->engine->emit_init_breadcrumb) {
+		err = rq->engine->emit_init_breadcrumb(rq);
+		if (err)
+			goto skip_request;
+	}
+
+	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
+	if (err)
+		goto skip_request;
+
 	i915_vma_unpin(vma);
 
 	i915_request_add(rq);
@@ -1560,7 +1571,6 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	goto out_vm;
 skip_request:
 	i915_request_set_error_once(rq, err);
-err_request:
 	i915_request_add(rq);
 err_unpin:
 	i915_vma_unpin(vma);
@@ -1674,10 +1684,6 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 		goto err_unpin;
 	}
 
-	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
-	if (err)
-		goto err_request;
-
 	i915_vma_lock(vma);
 	err = i915_request_await_object(rq, vma->obj, true);
 	if (err == 0)
@@ -1686,8 +1692,17 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto skip_request;
 
+	if (rq->engine->emit_init_breadcrumb) {
+		err = rq->engine->emit_init_breadcrumb(rq);
+		if (err)
+			goto skip_request;
+	}
+
+	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
+	if (err)
+		goto skip_request;
+
 	i915_vma_unpin(vma);
-	i915_vma_close(vma);
 
 	i915_request_add(rq);
 
@@ -1709,7 +1724,6 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	goto out_vm;
 skip_request:
 	i915_request_set_error_once(rq, err);
-err_request:
 	i915_request_add(rq);
 err_unpin:
 	i915_vma_unpin(vma);
@@ -1925,7 +1939,7 @@ static int mock_context_barrier(void *arg)
 		goto out;
 	}
 
-	rq = igt_request_alloc(ctx, i915->engine[RCS0]);
+	rq = igt_request_alloc(ctx, i915->gt.engine[RCS0]);
 	if (IS_ERR(rq)) {
 		pr_err("Request allocation failed!\n");
 		goto out;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
new file mode 100644
index 000000000000..a49016f8ee0d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "gt/intel_engine_pm.h"
+#include "selftests/igt_flush_test.h"
+
+static u64 read_reloc(const u32 *map, int x, const u64 mask)
+{
+	u64 reloc;
+
+	memcpy(&reloc, &map[x], sizeof(reloc));
+	return reloc & mask;
+}
+
+static int __igt_gpu_reloc(struct i915_execbuffer *eb,
+			   struct drm_i915_gem_object *obj)
+{
+	const unsigned int offsets[] = { 8, 3, 0 };
+	const u64 mask =
+		GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0);
+	const u32 *map = page_mask_bits(obj->mm.mapping);
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	int err;
+	int i;
+
+	vma = i915_vma_instance(obj, eb->context->vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+	if (err)
+		return err;
+
+	/* 8-Byte aligned */
+	if (!__reloc_entry_gpu(eb, vma,
+			       offsets[0] * sizeof(u32),
+			       0)) {
+		err = -EIO;
+		goto unpin_vma;
+	}
+
+	/* !8-Byte aligned */
+	if (!__reloc_entry_gpu(eb, vma,
+			       offsets[1] * sizeof(u32),
+			       1)) {
+		err = -EIO;
+		goto unpin_vma;
+	}
+
+	/* Skip to the end of the cmd page */
+	i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
+	i -= eb->reloc_cache.rq_size;
+	memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size,
+		 MI_NOOP, i);
+	eb->reloc_cache.rq_size += i;
+
+	/* Force batch chaining */
+	if (!__reloc_entry_gpu(eb, vma,
+			       offsets[2] * sizeof(u32),
+			       2)) {
+		err = -EIO;
+		goto unpin_vma;
+	}
+
+	GEM_BUG_ON(!eb->reloc_cache.rq);
+	rq = i915_request_get(eb->reloc_cache.rq);
+	err = reloc_gpu_flush(&eb->reloc_cache);
+	if (err)
+		goto put_rq;
+	GEM_BUG_ON(eb->reloc_cache.rq);
+
+	err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
+	if (err) {
+		intel_gt_set_wedged(eb->engine->gt);
+		goto put_rq;
+	}
+
+	if (!i915_request_completed(rq)) {
+		pr_err("%s: did not wait for relocations!\n", eb->engine->name);
+		err = -EINVAL;
+		goto put_rq;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(offsets); i++) {
+		u64 reloc = read_reloc(map, offsets[i], mask);
+
+		if (reloc != i) {
+			pr_err("%s[%d]: map[%d] %llx != %x\n",
+			       eb->engine->name, i, offsets[i], reloc, i);
+			err = -EINVAL;
+		}
+	}
+	if (err)
+		igt_hexdump(map, 4096);
+
+put_rq:
+	i915_request_put(rq);
+unpin_vma:
+	i915_vma_unpin(vma);
+	return err;
+}
+
+static int igt_gpu_reloc(void *arg)
+{
+	struct i915_execbuffer eb;
+	struct drm_i915_gem_object *scratch;
+	int err = 0;
+	u32 *map;
+
+	eb.i915 = arg;
+
+	scratch = i915_gem_object_create_internal(eb.i915, 4096);
+	if (IS_ERR(scratch))
+		return PTR_ERR(scratch);
+
+	map = i915_gem_object_pin_map(scratch, I915_MAP_WC);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto err_scratch;
+	}
+
+	for_each_uabi_engine(eb.engine, eb.i915) {
+		reloc_cache_init(&eb.reloc_cache, eb.i915);
+		memset(map, POISON_INUSE, 4096);
+
+		intel_engine_pm_get(eb.engine);
+		eb.context = intel_context_create(eb.engine);
+		if (IS_ERR(eb.context)) {
+			err = PTR_ERR(eb.context);
+			goto err_pm;
+		}
+
+		err = intel_context_pin(eb.context);
+		if (err)
+			goto err_put;
+
+		err = __igt_gpu_reloc(&eb, scratch);
+
+		intel_context_unpin(eb.context);
+err_put:
+		intel_context_put(eb.context);
+err_pm:
+		intel_engine_pm_put(eb.engine);
+		if (err)
+			break;
+	}
+
+	if (igt_flush_test(eb.i915))
+		err = -EIO;
+
+err_scratch:
+	i915_gem_object_put(scratch);
+	return err;
+}
+
+int i915_gem_execbuffer_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_gpu_reloc),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_live_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 43912e9b683d..9c7402ce5bf9 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -952,6 +952,129 @@ static int igt_mmap(void *arg)
 	return 0;
 }
 
+static const char *repr_mmap_type(enum i915_mmap_type type)
+{
+	switch (type) {
+	case I915_MMAP_TYPE_GTT: return "gtt";
+	case I915_MMAP_TYPE_WB: return "wb";
+	case I915_MMAP_TYPE_WC: return "wc";
+	case I915_MMAP_TYPE_UC: return "uc";
+	default: return "unknown";
+	}
+}
+
+static bool can_access(const struct drm_i915_gem_object *obj)
+{
+	unsigned int flags =
+		I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM;
+
+	return i915_gem_object_type_has(obj, flags);
+}
+
+static int __igt_mmap_access(struct drm_i915_private *i915,
+			     struct drm_i915_gem_object *obj,
+			     enum i915_mmap_type type)
+{
+	struct i915_mmap_offset *mmo;
+	unsigned long __user *ptr;
+	unsigned long A, B;
+	unsigned long x, y;
+	unsigned long addr;
+	int err;
+
+	memset(&A, 0xAA, sizeof(A));
+	memset(&B, 0xBB, sizeof(B));
+
+	if (!can_mmap(obj, type) || !can_access(obj))
+		return 0;
+
+	mmo = mmap_offset_attach(obj, type, NULL);
+	if (IS_ERR(mmo))
+		return PTR_ERR(mmo);
+
+	addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+	ptr = (unsigned long __user *)addr;
+
+	err = __put_user(A, ptr);
+	if (err) {
+		pr_err("%s(%s): failed to write into user mmap\n",
+		       obj->mm.region->name, repr_mmap_type(type));
+		goto out_unmap;
+	}
+
+	intel_gt_flush_ggtt_writes(&i915->gt);
+
+	err = access_process_vm(current, addr, &x, sizeof(x), 0);
+	if (err != sizeof(x)) {
+		pr_err("%s(%s): access_process_vm() read failed\n",
+		       obj->mm.region->name, repr_mmap_type(type));
+		goto out_unmap;
+	}
+
+	err = access_process_vm(current, addr, &B, sizeof(B), FOLL_WRITE);
+	if (err != sizeof(B)) {
+		pr_err("%s(%s): access_process_vm() write failed\n",
+		       obj->mm.region->name, repr_mmap_type(type));
+		goto out_unmap;
+	}
+
+	intel_gt_flush_ggtt_writes(&i915->gt);
+
+	err = __get_user(y, ptr);
+	if (err) {
+		pr_err("%s(%s): failed to read from user mmap\n",
+		       obj->mm.region->name, repr_mmap_type(type));
+		goto out_unmap;
+	}
+
+	if (x != A || y != B) {
+		pr_err("%s(%s): failed to read/write values, found (%lx, %lx)\n",
+		       obj->mm.region->name, repr_mmap_type(type),
+		       x, y);
+		err = -EINVAL;
+		goto out_unmap;
+	}
+
+out_unmap:
+	vm_munmap(addr, obj->base.size);
+	return err;
+}
+
+static int igt_mmap_access(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		struct drm_i915_gem_object *obj;
+		int err;
+
+		obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+		if (obj == ERR_PTR(-ENODEV))
+			continue;
+
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT);
+		if (err == 0)
+			err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB);
+		if (err == 0)
+			err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WC);
+		if (err == 0)
+			err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_UC);
+
+		i915_gem_object_put(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int __igt_mmap_gpu(struct drm_i915_private *i915,
 			  struct drm_i915_gem_object *obj,
 			  enum i915_mmap_type type)
@@ -1156,9 +1279,6 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915,
 	if (err)
 		goto out_unmap;
 
-	GEM_BUG_ON(mmo->mmap_type == I915_MMAP_TYPE_GTT &&
-		   !atomic_read(&obj->bind_count));
-
 	err = check_present(addr, obj->base.size);
 	if (err) {
 		pr_err("%s: was not present\n", obj->mm.region->name);
@@ -1175,7 +1295,6 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915,
 		pr_err("Failed to unbind object!\n");
 		goto out_unmap;
 	}
-	GEM_BUG_ON(atomic_read(&obj->bind_count));
 
 	if (type != I915_MMAP_TYPE_GTT) {
 		__i915_gem_object_put_pages(obj);
@@ -1233,6 +1352,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_smoke_tiling),
 		SUBTEST(igt_mmap_offset_exhaustion),
 		SUBTEST(igt_mmap),
+		SUBTEST(igt_mmap_access),
 		SUBTEST(igt_mmap_revoke),
 		SUBTEST(igt_mmap_gpu),
 	};
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
index 2b6db6f799de..faa5b6d91795 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -14,7 +14,7 @@ static int igt_gem_object(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct drm_i915_gem_object *obj;
-	int err = -ENOMEM;
+	int err;
 
 	/* Basic test to ensure we can create an object */
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
index 772d8cba7da9..e21b5023ca7d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -83,6 +83,8 @@ igt_emit_store_dw(struct i915_vma *vma,
 		offset += PAGE_SIZE;
 	}
 	*cmd = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(obj);
 	i915_gem_object_unpin_map(obj);
 
 	intel_gt_chipset_flush(vma->vm->gt);
@@ -126,16 +128,6 @@ int igt_gpu_fill_dw(struct intel_context *ce,
 		goto err_batch;
 	}
 
-	flags = 0;
-	if (INTEL_GEN(ce->vm->i915) <= 5)
-		flags |= I915_DISPATCH_SECURE;
-
-	err = rq->engine->emit_bb_start(rq,
-					batch->node.start, batch->node.size,
-					flags);
-	if (err)
-		goto err_request;
-
 	i915_vma_lock(batch);
 	err = i915_request_await_object(rq, batch->obj, false);
 	if (err == 0)
@@ -152,15 +144,17 @@ int igt_gpu_fill_dw(struct intel_context *ce,
 	if (err)
 		goto skip_request;
 
-	i915_request_add(rq);
-
-	i915_vma_unpin_and_release(&batch, 0);
+	flags = 0;
+	if (INTEL_GEN(ce->vm->i915) <= 5)
+		flags |= I915_DISPATCH_SECURE;
 
-	return 0;
+	err = rq->engine->emit_bb_start(rq,
+					batch->node.start, batch->node.size,
+					flags);
 
 skip_request:
-	i915_request_set_error_once(rq, err);
-err_request:
+	if (err)
+		i915_request_set_error_once(rq, err);
 	i915_request_add(rq);
 err_batch:
 	i915_vma_unpin_and_release(&batch, 0);
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
index 9272bef57092..debaf7b18ab5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -66,7 +66,7 @@ static void *mock_dmabuf_vmap(struct dma_buf *dma_buf)
 {
 	struct mock_dmabuf *mock = to_mock(dma_buf);
 
-	return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL);
+	return vm_map_ram(mock->pages, mock->npages, 0);
 }
 
 static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c
index 6a5e9ab20b94..5e3725e62241 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c
@@ -32,5 +32,5 @@ void debugfs_engines_register(struct intel_gt *gt, struct dentry *root)
 		{ "engines", &engines_fops },
 	};
 
-	debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+	intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
 }
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c
index 75255aaacaed..1de5fbaa1cf9 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c
@@ -9,6 +9,7 @@
 #include "debugfs_engines.h"
 #include "debugfs_gt.h"
 #include "debugfs_gt_pm.h"
+#include "uc/intel_uc_debugfs.h"
 #include "i915_drv.h"
 
 void debugfs_gt_register(struct intel_gt *gt)
@@ -24,17 +25,19 @@ void debugfs_gt_register(struct intel_gt *gt)
 
 	debugfs_engines_register(gt, root);
 	debugfs_gt_pm_register(gt, root);
+
+	intel_uc_debugfs_register(&gt->uc, root);
 }
 
-void debugfs_gt_register_files(struct intel_gt *gt,
-			       struct dentry *root,
-			       const struct debugfs_gt_file *files,
-			       unsigned long count)
+void intel_gt_debugfs_register_files(struct dentry *root,
+				     const struct debugfs_gt_file *files,
+				     unsigned long count, void *data)
 {
 	while (count--) {
-		if (!files->eval || files->eval(gt))
+		umode_t mode = files->fops->write ? 0644 : 0444;
+		if (!files->eval || files->eval(data))
 			debugfs_create_file(files->name,
-					    0444, root, gt,
+					    mode, root, data,
 					    files->fops);
 
 		files++;
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h
index 4ea0f06cda8f..f77540f727e9 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.h
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h
@@ -28,12 +28,11 @@ void debugfs_gt_register(struct intel_gt *gt);
 struct debugfs_gt_file {
 	const char *name;
 	const struct file_operations *fops;
-	bool (*eval)(const struct intel_gt *gt);
+	bool (*eval)(void *data);
 };
 
-void debugfs_gt_register_files(struct intel_gt *gt,
-			       struct dentry *root,
-			       const struct debugfs_gt_file *files,
-			       unsigned long count);
+void intel_gt_debugfs_register_files(struct dentry *root,
+				     const struct debugfs_gt_file *files,
+				     unsigned long count, void *data);
 
 #endif /* DEBUGFS_GT_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
index 059c9e5c002e..174a24553322 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
@@ -10,6 +10,7 @@
 #include "debugfs_gt_pm.h"
 #include "i915_drv.h"
 #include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
 #include "intel_llc.h"
 #include "intel_rc6.h"
 #include "intel_rps.h"
@@ -268,7 +269,7 @@ static int frequency_show(struct seq_file *m, void *unused)
 			   yesno(rpmodectl & GEN6_RP_ENABLE));
 		seq_printf(m, "SW control enabled: %s\n",
 			   yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
-				  GEN6_RP_MEDIA_SW_MODE));
+				 GEN6_RP_MEDIA_SW_MODE));
 
 		vlv_punit_get(i915);
 		freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
@@ -300,8 +301,9 @@ static int frequency_show(struct seq_file *m, void *unused)
 		u32 rp_state_cap;
 		u32 rpmodectl, rpinclimit, rpdeclimit;
 		u32 rpstat, cagf, reqf;
-		u32 rpupei, rpcurup, rpprevup;
-		u32 rpdownei, rpcurdown, rpprevdown;
+		u32 rpcurupei, rpcurup, rpprevup;
+		u32 rpcurdownei, rpcurdown, rpprevdown;
+		u32 rpupei, rpupt, rpdownei, rpdownt;
 		u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
 		int max_freq;
 
@@ -334,12 +336,19 @@ static int frequency_show(struct seq_file *m, void *unused)
 		rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
 
 		rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
-		rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
+		rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
 		rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
 		rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
-		rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
+		rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
 		rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
 		rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
+
+		rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
+		rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+
+		rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
+		rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
 		cagf = intel_rps_read_actual_frequency(rps);
 
 		intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
@@ -372,7 +381,7 @@ static int frequency_show(struct seq_file *m, void *unused)
 			   yesno(rpmodectl & GEN6_RP_ENABLE));
 		seq_printf(m, "SW control enabled: %s\n",
 			   yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
-				  GEN6_RP_MEDIA_SW_MODE));
+				 GEN6_RP_MEDIA_SW_MODE));
 
 		seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
 			   pm_ier, pm_imr, pm_mask);
@@ -394,23 +403,35 @@ static int frequency_show(struct seq_file *m, void *unused)
 		seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
 		seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
 		seq_printf(m, "CAGF: %dMHz\n", cagf);
-		seq_printf(m, "RP CUR UP EI: %d (%dus)\n",
-			   rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei));
-		seq_printf(m, "RP CUR UP: %d (%dus)\n",
-			   rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup));
-		seq_printf(m, "RP PREV UP: %d (%dus)\n",
-			   rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup));
+		seq_printf(m, "RP CUR UP EI: %d (%dns)\n",
+			   rpcurupei,
+			   intel_gt_pm_interval_to_ns(gt, rpcurupei));
+		seq_printf(m, "RP CUR UP: %d (%dns)\n",
+			   rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
+		seq_printf(m, "RP PREV UP: %d (%dns)\n",
+			   rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
 		seq_printf(m, "Up threshold: %d%%\n",
 			   rps->power.up_threshold);
-
-		seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
-			   rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei));
-		seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
-			   rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown));
-		seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
-			   rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown));
+		seq_printf(m, "RP UP EI: %d (%dns)\n",
+			   rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
+		seq_printf(m, "RP UP THRESHOLD: %d (%dns)\n",
+			   rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
+
+		seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n",
+			   rpcurdownei,
+			   intel_gt_pm_interval_to_ns(gt, rpcurdownei));
+		seq_printf(m, "RP CUR DOWN: %d (%dns)\n",
+			   rpcurdown,
+			   intel_gt_pm_interval_to_ns(gt, rpcurdown));
+		seq_printf(m, "RP PREV DOWN: %d (%dns)\n",
+			   rpprevdown,
+			   intel_gt_pm_interval_to_ns(gt, rpprevdown));
 		seq_printf(m, "Down threshold: %d%%\n",
 			   rps->power.down_threshold);
+		seq_printf(m, "RP DOWN EI: %d (%dns)\n",
+			   rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
+		seq_printf(m, "RP DOWN THRESHOLD: %d (%dns)\n",
+			   rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
 
 		max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
 			    rp_state_cap >> 16) & 0xff;
@@ -506,8 +527,10 @@ static int llc_show(struct seq_file *m, void *data)
 	return 0;
 }
 
-static bool llc_eval(const struct intel_gt *gt)
+static bool llc_eval(void *data)
 {
+	struct intel_gt *gt = data;
+
 	return HAS_LLC(gt->i915);
 }
 
@@ -533,7 +556,8 @@ static int rps_boost_show(struct seq_file *m, void *data)
 	struct drm_i915_private *i915 = gt->i915;
 	struct intel_rps *rps = &gt->rps;
 
-	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
+	seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps)));
+	seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps)));
 	seq_printf(m, "GPU busy? %s\n", yesno(gt->awake));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
@@ -553,7 +577,7 @@ static int rps_boost_show(struct seq_file *m, void *data)
 
 	seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
 
-	if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) {
+	if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) {
 		struct intel_uncore *uncore = gt->uncore;
 		u32 rpup, rpupei;
 		u32 rpdown, rpdownei;
@@ -580,8 +604,10 @@ static int rps_boost_show(struct seq_file *m, void *data)
 	return 0;
 }
 
-static bool rps_eval(const struct intel_gt *gt)
+static bool rps_eval(void *data)
 {
+	struct intel_gt *gt = data;
+
 	return HAS_RPS(gt->i915);
 }
 
@@ -597,5 +623,5 @@ void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root)
 		{ "rps_boost", &rps_boost_fops, rps_eval },
 	};
 
-	debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+	intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
 }
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 94e746af8926..699125928272 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -389,6 +389,16 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm,
 	return err;
 }
 
+static __always_inline void
+write_pte(gen8_pte_t *pte, const gen8_pte_t val)
+{
+	/* Magic delays? Or can we refine these to flush all in one pass? */
+	*pte = val;
+	wmb(); /* cpu to cache */
+	clflush(pte); /* cache to memory */
+	wmb(); /* visible to all */
+}
+
 static __always_inline u64
 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
 		      struct i915_page_directory *pdp,
@@ -405,7 +415,8 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
 	vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
 	do {
 		GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
-		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
+		write_pte(&vaddr[gen8_pd_index(idx, 0)],
+			  pte_encode | iter->dma);
 
 		iter->dma += I915_GTT_PAGE_SIZE;
 		if (iter->dma >= iter->max) {
@@ -487,7 +498,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 
 		do {
 			GEM_BUG_ON(iter->sg->length < page_size);
-			vaddr[index++] = encode | iter->dma;
+			write_pte(&vaddr[index++], encode | iter->dma);
 
 			start += page_size;
 			iter->dma += page_size;
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index cbad7fe722ce..d907d538176e 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -64,7 +64,7 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 	if (!--b->irq_enabled)
 		irq_disable(engine);
 
-	b->irq_armed = false;
+	WRITE_ONCE(b->irq_armed, false);
 	intel_gt_pm_put_async(engine->gt);
 }
 
@@ -73,7 +73,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	unsigned long flags;
 
-	if (!b->irq_armed)
+	if (!READ_ONCE(b->irq_armed))
 		return;
 
 	spin_lock_irqsave(&b->irq_lock, flags);
@@ -142,6 +142,18 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
 	intel_engine_add_retire(engine, tl);
 }
 
+static void __signal_request(struct i915_request *rq, struct list_head *signals)
+{
+	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+	clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+	if (!__dma_fence_signal(&rq->fence))
+		return;
+
+	i915_request_get(rq);
+	list_add_tail(&rq->signal_link, signals);
+}
+
 static void signal_irq_work(struct irq_work *work)
 {
 	struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
@@ -155,6 +167,8 @@ static void signal_irq_work(struct irq_work *work)
 	if (b->irq_armed && list_empty(&b->signalers))
 		__intel_breadcrumbs_disarm_irq(b);
 
+	list_splice_init(&b->signaled_requests, &signal);
+
 	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
 		GEM_BUG_ON(list_empty(&ce->signals));
 
@@ -163,24 +177,15 @@ static void signal_irq_work(struct irq_work *work)
 				list_entry(pos, typeof(*rq), signal_link);
 
 			GEM_BUG_ON(!check_signal_order(ce, rq));
-
 			if (!__request_completed(rq))
 				break;
 
-			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
-					     &rq->fence.flags));
-			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-
-			if (!__dma_fence_signal(&rq->fence))
-				continue;
-
 			/*
 			 * Queue for execution after dropping the signaling
 			 * spinlock as the callback chain may end up adding
 			 * more signalers to the same context or engine.
 			 */
-			i915_request_get(rq);
-			list_add_tail(&rq->signal_link, &signal);
+			__signal_request(rq, &signal);
 		}
 
 		/*
@@ -233,7 +238,7 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 	 * which we can add a new waiter and avoid the cost of re-enabling
 	 * the irq.
 	 */
-	b->irq_armed = true;
+	WRITE_ONCE(b->irq_armed, true);
 
 	/*
 	 * Since we are waiting on a request, the GPU should be busy
@@ -255,6 +260,7 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 
 	spin_lock_init(&b->irq_lock);
 	INIT_LIST_HEAD(&b->signalers);
+	INIT_LIST_HEAD(&b->signaled_requests);
 
 	init_irq_work(&b->irq_work, signal_irq_work);
 }
@@ -274,6 +280,32 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 	spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
+void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
+					     struct intel_context *ce)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	unsigned long flags;
+
+	spin_lock_irqsave(&b->irq_lock, flags);
+	if (!list_empty(&ce->signals)) {
+		struct i915_request *rq, *next;
+
+		/* Queue for executing the signal callbacks in the irq_work */
+		list_for_each_entry_safe(rq, next, &ce->signals, signal_link) {
+			GEM_BUG_ON(rq->engine != engine);
+			GEM_BUG_ON(!__request_completed(rq));
+
+			__signal_request(rq, &b->signaled_requests);
+		}
+
+		INIT_LIST_HEAD(&ce->signals);
+		list_del_init(&ce->signal_link);
+
+		irq_work_queue(&b->irq_work);
+	}
+	spin_unlock_irqrestore(&b->irq_lock, flags);
+}
+
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 {
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index aea992e46c42..74ddb49b2941 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -114,6 +114,11 @@ int __intel_context_do_pin(struct intel_context *ce)
 		goto out_release;
 	}
 
+	if (unlikely(intel_context_is_closed(ce))) {
+		err = -ENOENT;
+		goto out_unlock;
+	}
+
 	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
 		err = intel_context_active_acquire(ce);
 		if (unlikely(err))
diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
index 57a30956c922..487299cb91f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
@@ -25,8 +25,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
 		return PTR_ERR(cs);
 
 	offset = i915_ggtt_offset(ce->state) +
-		 LRC_STATE_PN * PAGE_SIZE +
-		 CTX_R_PWR_CLK_STATE * 4;
+		 LRC_STATE_OFFSET + CTX_R_PWR_CLK_STATE * 4;
 
 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 	*cs++ = lower_32_bits(offset);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ca0d4f4f3615..4954b0df4864 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -102,6 +102,8 @@ struct intel_context {
 
 	/** sseu: Control eu/slice partitioning */
 	struct intel_sseu sseu;
+
+	u8 wa_bb_page; /* if set, page num reserved for context workarounds */
 };
 
 #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index a1aa0d3e8be1..9bf6d4989968 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -199,6 +199,8 @@ void intel_engine_cleanup(struct intel_engine_cs *engine);
 int intel_engines_init_mmio(struct intel_gt *gt);
 int intel_engines_init(struct intel_gt *gt);
 
+void intel_engine_free_request_pool(struct intel_engine_cs *engine);
+
 void intel_engines_release(struct intel_gt *gt);
 void intel_engines_free(struct intel_gt *gt);
 
@@ -236,22 +238,35 @@ intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
+void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
+					     struct intel_context *ce);
+
 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
 				    struct drm_printer *p);
 
-static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
 {
 	memset(batch, 0, 6 * sizeof(u32));
 
-	batch[0] = GFX_OP_PIPE_CONTROL(6);
-	batch[1] = flags;
+	batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
+	batch[1] = flags1;
 	batch[2] = offset;
 
 	return batch + 6;
 }
 
+static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+{
+	return __gen8_emit_pipe_control(batch, 0, flags, offset);
+}
+
+static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+{
+	return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
+}
+
 static inline u32 *
-gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
 {
 	/* We're using qword write, offset should be aligned to 8 bytes. */
 	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
@@ -260,8 +275,8 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 	 * need a prior CS_STALL, which is emitted by the flush
 	 * following the batch.
 	 */
-	*cs++ = GFX_OP_PIPE_CONTROL(6);
-	*cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
+	*cs++ = GFX_OP_PIPE_CONTROL(6) | flags0;
+	*cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
 	*cs++ = gtt_offset;
 	*cs++ = 0;
 	*cs++ = value;
@@ -271,6 +286,18 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 	return cs;
 }
 
+static inline u32*
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+{
+	return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags);
+}
+
+static inline u32*
+gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
+{
+	return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1);
+}
+
 static inline u32 *
 gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 {
@@ -308,9 +335,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		       struct drm_printer *m,
 		       const char *header, ...);
 
-int intel_enable_engine_stats(struct intel_engine_cs *engine);
-void intel_disable_engine_stats(struct intel_engine_cs *engine);
-
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
 
 struct i915_request *
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 883a9b7fe88d..da5b61085257 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -31,7 +31,6 @@
 #include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
 #include "intel_engine_user.h"
 #include "intel_gt.h"
 #include "intel_gt_requests.h"
@@ -327,6 +326,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
 		engine->props.preempt_timeout_ms = 0;
 
+	engine->defaults = engine->props; /* never to change again */
+
 	engine->context_size = intel_engine_context_size(gt, engine->class);
 	if (WARN_ON(engine->context_size > BIT(20)))
 		engine->context_size = 0;
@@ -347,8 +348,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	gt->engine_class[info->class][info->instance] = engine;
 	gt->engine[id] = engine;
 
-	i915->engine[id] = engine;
-
 	return 0;
 }
 
@@ -425,17 +424,27 @@ void intel_engines_release(struct intel_gt *gt)
 		engine->release = NULL;
 
 		memset(&engine->reset, 0, sizeof(engine->reset));
-
-		gt->i915->engine[id] = NULL;
 	}
 }
 
+void intel_engine_free_request_pool(struct intel_engine_cs *engine)
+{
+	if (!engine->request_pool)
+		return;
+
+	kmem_cache_free(i915_request_slab_cache(), engine->request_pool);
+}
+
 void intel_engines_free(struct intel_gt *gt)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	/* Free the requests! dma-resv keeps fences around for an eternity */
+	rcu_barrier();
+
 	for_each_engine(engine, gt, id) {
+		intel_engine_free_request_pool(engine);
 		kfree(engine);
 		gt->engine[id] = NULL;
 	}
@@ -623,8 +632,6 @@ static int engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init__pm(engine);
 	intel_engine_init_retire(engine);
 
-	intel_engine_pool_init(&engine->pool);
-
 	/* Use the whole device by default */
 	engine->sseu =
 		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
@@ -821,12 +828,11 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	cleanup_status_page(engine);
 
 	intel_engine_fini_retire(engine);
-	intel_engine_pool_fini(&engine->pool);
 	intel_engine_fini_breadcrumbs(engine);
 	intel_engine_cleanup_cmd_parser(engine);
 
 	if (engine->default_state)
-		i915_gem_object_put(engine->default_state);
+		fput(engine->default_state);
 
 	if (engine->kernel_context) {
 		intel_context_unpin(engine->kernel_context);
@@ -1225,6 +1231,49 @@ static void print_request(struct drm_printer *m,
 		   name);
 }
 
+static struct intel_timeline *get_timeline(struct i915_request *rq)
+{
+	struct intel_timeline *tl;
+
+	/*
+	 * Even though we are holding the engine->active.lock here, there
+	 * is no control over the submission queue per-se and we are
+	 * inspecting the active state at a random point in time, with an
+	 * unknown queue. Play safe and make sure the timeline remains valid.
+	 * (Only being used for pretty printing, one extra kref shouldn't
+	 * cause a camel stampede!)
+	 */
+	rcu_read_lock();
+	tl = rcu_dereference(rq->timeline);
+	if (!kref_get_unless_zero(&tl->kref))
+		tl = NULL;
+	rcu_read_unlock();
+
+	return tl;
+}
+
+static int print_ring(char *buf, int sz, struct i915_request *rq)
+{
+	int len = 0;
+
+	if (!i915_request_signaled(rq)) {
+		struct intel_timeline *tl = get_timeline(rq);
+
+		len = scnprintf(buf, sz,
+				"ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
+				i915_ggtt_offset(rq->ring->vma),
+				tl ? tl->hwsp_offset : 0,
+				hwsp_seqno(rq),
+				DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
+						      1000 * 1000));
+
+		if (tl)
+			intel_timeline_put(tl);
+	}
+
+	return len;
+}
+
 static void hexdump(struct drm_printer *m, const void *buf, size_t len)
 {
 	const size_t rowsize = 8 * sizeof(u32);
@@ -1254,27 +1303,6 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
 	}
 }
 
-static struct intel_timeline *get_timeline(struct i915_request *rq)
-{
-	struct intel_timeline *tl;
-
-	/*
-	 * Even though we are holding the engine->active.lock here, there
-	 * is no control over the submission queue per-se and we are
-	 * inspecting the active state at a random point in time, with an
-	 * unknown queue. Play safe and make sure the timeline remains valid.
-	 * (Only being used for pretty printing, one extra kref shouldn't
-	 * cause a camel stampede!)
-	 */
-	rcu_read_lock();
-	tl = rcu_dereference(rq->timeline);
-	if (!kref_get_unless_zero(&tl->kref))
-		tl = NULL;
-	rcu_read_unlock();
-
-	return tl;
-}
-
 static const char *repr_timer(const struct timer_list *t)
 {
 	if (!READ_ONCE(t->expires))
@@ -1393,39 +1421,24 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 			int len;
 
 			len = scnprintf(hdr, sizeof(hdr),
-					"\t\tActive[%d]: ",
-					(int)(port - execlists->active));
-			if (!i915_request_signaled(rq)) {
-				struct intel_timeline *tl = get_timeline(rq);
-
-				len += scnprintf(hdr + len, sizeof(hdr) - len,
-						 "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
-						 i915_ggtt_offset(rq->ring->vma),
-						 tl ? tl->hwsp_offset : 0,
-						 hwsp_seqno(rq),
-						 DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
-								       1000 * 1000));
-
-				if (tl)
-					intel_timeline_put(tl);
-			}
+					"\t\tActive[%d]:  ccid:%08x, ",
+					(int)(port - execlists->active),
+					rq->context->lrc.ccid);
+			len += print_ring(hdr + len, sizeof(hdr) - len, rq);
 			scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
 			print_request(m, rq, hdr);
 		}
 		for (port = execlists->pending; (rq = *port); port++) {
-			struct intel_timeline *tl = get_timeline(rq);
-			char hdr[80];
-
-			snprintf(hdr, sizeof(hdr),
-				 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
-				 (int)(port - execlists->pending),
-				 i915_ggtt_offset(rq->ring->vma),
-				 tl ? tl->hwsp_offset : 0,
-				 hwsp_seqno(rq));
-			print_request(m, rq, hdr);
+			char hdr[160];
+			int len;
 
-			if (tl)
-				intel_timeline_put(tl);
+			len = scnprintf(hdr, sizeof(hdr),
+					"\t\tPending[%d]: ccid:%08x, ",
+					(int)(port - execlists->pending),
+					rq->context->lrc.ccid);
+			len += print_ring(hdr + len, sizeof(hdr) - len, rq);
+			scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
+			print_request(m, rq, hdr);
 		}
 		rcu_read_unlock();
 		execlists_active_unlock_bh(execlists);
@@ -1574,58 +1587,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	intel_engine_print_breadcrumbs(engine, m);
 }
 
-/**
- * intel_enable_engine_stats() - Enable engine busy tracking on engine
- * @engine: engine to enable stats collection
- *
- * Start collecting the engine busyness data for @engine.
- *
- * Returns 0 on success or a negative error code.
- */
-int intel_enable_engine_stats(struct intel_engine_cs *engine)
-{
-	struct intel_engine_execlists *execlists = &engine->execlists;
-	unsigned long flags;
-	int err = 0;
-
-	if (!intel_engine_supports_stats(engine))
-		return -ENODEV;
-
-	execlists_active_lock_bh(execlists);
-	write_seqlock_irqsave(&engine->stats.lock, flags);
-
-	if (unlikely(engine->stats.enabled == ~0)) {
-		err = -EBUSY;
-		goto unlock;
-	}
-
-	if (engine->stats.enabled++ == 0) {
-		struct i915_request * const *port;
-		struct i915_request *rq;
-
-		engine->stats.enabled_at = ktime_get();
-
-		/* XXX submission method oblivious? */
-		for (port = execlists->active; (rq = *port); port++)
-			engine->stats.active++;
-
-		for (port = execlists->pending; (rq = *port); port++) {
-			/* Exclude any contexts already counted in active */
-			if (!intel_context_inflight_count(rq->context))
-				engine->stats.active++;
-		}
-
-		if (engine->stats.active)
-			engine->stats.start = engine->stats.enabled_at;
-	}
-
-unlock:
-	write_sequnlock_irqrestore(&engine->stats.lock, flags);
-	execlists_active_unlock_bh(execlists);
-
-	return err;
-}
-
 static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
 {
 	ktime_t total = engine->stats.total;
@@ -1634,7 +1595,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
 	 * If the engine is executing something at the moment
 	 * add it to the total.
 	 */
-	if (engine->stats.active)
+	if (atomic_read(&engine->stats.active))
 		total = ktime_add(total,
 				  ktime_sub(ktime_get(), engine->stats.start));
 
@@ -1660,28 +1621,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
 	return total;
 }
 
-/**
- * intel_disable_engine_stats() - Disable engine busy tracking on engine
- * @engine: engine to disable stats collection
- *
- * Stops collecting the engine busyness data for @engine.
- */
-void intel_disable_engine_stats(struct intel_engine_cs *engine)
-{
-	unsigned long flags;
-
-	if (!intel_engine_supports_stats(engine))
-		return;
-
-	write_seqlock_irqsave(&engine->stats.lock, flags);
-	WARN_ON_ONCE(engine->stats.enabled == 0);
-	if (--engine->stats.enabled == 0) {
-		engine->stats.total = __intel_engine_get_busy_time(engine);
-		engine->stats.active = 0;
-	}
-	write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
 static bool match_ring(struct i915_request *rq)
 {
 	u32 ring = ENGINE_READ(rq->engine, RING_START);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index dd825718e4e5..5136c8bf112d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -31,7 +31,7 @@ static bool next_heartbeat(struct intel_engine_cs *engine)
 	delay = msecs_to_jiffies_timeout(delay);
 	if (delay >= HZ)
 		delay = round_jiffies_up_relative(delay);
-	schedule_delayed_work(&engine->heartbeat.work, delay);
+	mod_delayed_work(system_wq, &engine->heartbeat.work, delay);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index b6cf284e3a2d..d0a1078ef632 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -10,31 +10,22 @@
 #include "intel_engine.h"
 #include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
 #include "intel_rc6.h"
 #include "intel_ring.h"
+#include "shmem_utils.h"
 
 static int __engine_unpark(struct intel_wakeref *wf)
 {
 	struct intel_engine_cs *engine =
 		container_of(wf, typeof(*engine), wakeref);
 	struct intel_context *ce;
-	void *map;
 
 	ENGINE_TRACE(engine, "\n");
 
 	intel_gt_pm_get(engine->gt);
 
-	/* Pin the default state for fast resets from atomic context. */
-	map = NULL;
-	if (engine->default_state)
-		map = i915_gem_object_pin_map(engine->default_state,
-					      I915_MAP_WB);
-	if (!IS_ERR_OR_NULL(map))
-		engine->pinned_default_state = map;
-
 	/* Discard stale context state from across idling */
 	ce = engine->kernel_context;
 	if (ce) {
@@ -44,6 +35,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
 		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
 			struct drm_i915_gem_object *obj = ce->state->obj;
 			int type = i915_coherent_map_type(engine->i915);
+			void *map;
 
 			map = i915_gem_object_pin_map(obj, type);
 			if (!IS_ERR(map)) {
@@ -181,7 +173,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	 * Ergo, if we put ourselves on the timelines.active_list
 	 * (se intel_timeline_enter()) before we increment the
 	 * engine->wakeref.count, we may see the request completion and retire
-	 * it causing an undeflow of the engine->wakeref.
+	 * it causing an underflow of the engine->wakeref.
 	 */
 	flags = __timeline_mark_lock(ce);
 	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
@@ -255,7 +247,6 @@ static int __engine_park(struct intel_wakeref *wf)
 
 	intel_engine_park_heartbeat(engine);
 	intel_engine_disarm_breadcrumbs(engine);
-	intel_engine_pool_park(&engine->pool);
 
 	/* Must be reset upon idling, or we may miss the busy wakeup. */
 	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
@@ -263,11 +254,6 @@ static int __engine_park(struct intel_wakeref *wf)
 	if (engine->park)
 		engine->park(engine);
 
-	if (engine->pinned_default_state) {
-		i915_gem_object_unpin_map(engine->default_state);
-		engine->pinned_default_state = NULL;
-	}
-
 	engine->execlists.no_priolist = false;
 
 	/* While gt calls i915_vma_parked(), we have to break the lock cycle */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index e52c2b0cb245..418df0a13145 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -37,6 +37,12 @@ static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
 	intel_wakeref_put_async(&engine->wakeref);
 }
 
+static inline void intel_engine_pm_put_delay(struct intel_engine_cs *engine,
+					     unsigned long delay)
+{
+	intel_wakeref_put_delay(&engine->wakeref, delay);
+}
+
 static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
 {
 	intel_wakeref_unlock_wait(&engine->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
deleted file mode 100644
index 1bd89cadc3b7..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#ifndef INTEL_ENGINE_POOL_H
-#define INTEL_ENGINE_POOL_H
-
-#include "intel_engine_pool_types.h"
-#include "i915_active.h"
-#include "i915_request.h"
-
-struct intel_engine_pool_node *
-intel_engine_get_pool(struct intel_engine_cs *engine, size_t size);
-
-static inline int
-intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
-			      struct i915_request *rq)
-{
-	return i915_active_add_request(&node->active, rq);
-}
-
-static inline void
-intel_engine_pool_put(struct intel_engine_pool_node *node)
-{
-	i915_active_release(&node->active);
-}
-
-void intel_engine_pool_init(struct intel_engine_pool *pool);
-void intel_engine_pool_park(struct intel_engine_pool *pool);
-void intel_engine_pool_fini(struct intel_engine_pool *pool);
-
-#endif /* INTEL_ENGINE_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 0be674ae1cf6..2b6cdf47d428 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -22,7 +22,6 @@
 #include "i915_pmu.h"
 #include "i915_priolist_types.h"
 #include "i915_selftest.h"
-#include "intel_engine_pool_types.h"
 #include "intel_sseu.h"
 #include "intel_timeline_types.h"
 #include "intel_wakeref.h"
@@ -181,6 +180,11 @@ struct intel_engine_execlists {
 	u32 error_interrupt;
 
 	/**
+	 * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset
+	 */
+	u32 reset_ccid;
+
+	/**
 	 * @no_priolist: priority lists disabled
 	 */
 	bool no_priolist;
@@ -321,6 +325,9 @@ struct intel_engine_cs {
 		struct list_head hold; /* ready requests, but on hold */
 	} active;
 
+	/* keep a request in reserve for a [pm] barrier under oom */
+	struct i915_request *request_pool;
+
 	struct llist_head barrier_tasks;
 
 	struct intel_context *kernel_context; /* pinned */
@@ -336,8 +343,7 @@ struct intel_engine_cs {
 
 	unsigned long wakeref_serial;
 	struct intel_wakeref wakeref;
-	struct drm_i915_gem_object *default_state;
-	void *pinned_default_state;
+	struct file *default_state;
 
 	struct {
 		struct intel_ring *ring;
@@ -371,6 +377,8 @@ struct intel_engine_cs {
 		spinlock_t irq_lock;
 		struct list_head signalers;
 
+		struct list_head signaled_requests;
+
 		struct irq_work irq_work; /* for use from inside irq_lock */
 
 		unsigned int irq_enabled;
@@ -402,13 +410,6 @@ struct intel_engine_cs {
 		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
 	} pmu;
 
-	/*
-	 * A pool of objects to use as shadow copies of client batch buffers
-	 * when the command parser is enabled. Prevents the client from
-	 * modifying the batch contents after software parsing.
-	 */
-	struct intel_engine_pool pool;
-
 	struct intel_hw_status_page status_page;
 	struct i915_ctx_workarounds wa_ctx;
 	struct i915_wa_list ctx_wa_list;
@@ -420,6 +421,7 @@ struct intel_engine_cs {
 	void		(*irq_enable)(struct intel_engine_cs *engine);
 	void		(*irq_disable)(struct intel_engine_cs *engine);
 
+	void		(*sanitize)(struct intel_engine_cs *engine);
 	int		(*resume)(struct intel_engine_cs *engine);
 
 	struct {
@@ -529,34 +531,34 @@ struct intel_engine_cs {
 
 	struct {
 		/**
-		 * @lock: Lock protecting the below fields.
-		 */
-		seqlock_t lock;
-		/**
-		 * @enabled: Reference count indicating number of listeners.
+		 * @active: Number of contexts currently scheduled in.
 		 */
-		unsigned int enabled;
+		atomic_t active;
+
 		/**
-		 * @active: Number of contexts currently scheduled in.
+		 * @lock: Lock protecting the below fields.
 		 */
-		unsigned int active;
+		seqlock_t lock;
+
 		/**
-		 * @enabled_at: Timestamp when busy stats were enabled.
+		 * @total: Total time this engine was busy.
+		 *
+		 * Accumulated time not counting the most recent block in cases
+		 * where engine is currently busy (active > 0).
 		 */
-		ktime_t enabled_at;
+		ktime_t total;
+
 		/**
 		 * @start: Timestamp of the last idle to active transition.
 		 *
 		 * Idle is defined as active == 0, active is active > 0.
 		 */
 		ktime_t start;
+
 		/**
-		 * @total: Total time this engine was busy.
-		 *
-		 * Accumulated time not counting the most recent block in cases
-		 * where engine is currently busy (active > 0).
+		 * @rps: Utilisation at last RPS sampling.
 		 */
-		ktime_t total;
+		ktime_t rps;
 	} stats;
 
 	struct {
@@ -565,7 +567,7 @@ struct intel_engine_cs {
 		unsigned long preempt_timeout_ms;
 		unsigned long stop_timeout_ms;
 		unsigned long timeslice_duration_ms;
-	} props;
+	} props, defaults;
 };
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 4c5a209cb669..66165b10256e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -65,7 +65,7 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt)
 					      ggtt->mappable_end);
 	}
 
-	i915_ggtt_init_fences(ggtt);
+	intel_ggtt_init_fences(ggtt);
 
 	return 0;
 }
@@ -715,11 +715,13 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
  */
 void i915_ggtt_driver_release(struct drm_i915_private *i915)
 {
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct pagevec *pvec;
 
-	fini_aliasing_ppgtt(&i915->ggtt);
+	fini_aliasing_ppgtt(ggtt);
 
-	ggtt_cleanup_hw(&i915->ggtt);
+	intel_ggtt_fini_fences(ggtt);
+	ggtt_cleanup_hw(ggtt);
 
 	pvec = &i915->mm.wc_stash.pvec;
 	if (pvec->nr) {
@@ -784,13 +786,13 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 	else
 		ggtt->gsm = ioremap_wc(phys_addr, size);
 	if (!ggtt->gsm) {
-		DRM_ERROR("Failed to map the ggtt page table\n");
+		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
 		return -ENOMEM;
 	}
 
 	ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
 	if (ret) {
-		DRM_ERROR("Scratch setup failed\n");
+		drm_err(&i915->drm, "Scratch setup failed\n");
 		/* iounmap will also get called at remove, but meh */
 		iounmap(ggtt->gsm);
 		return ret;
@@ -838,7 +840,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	struct pci_dev *pdev = i915->drm.pdev;
 	unsigned int size;
 	u16 snb_gmch_ctl;
-	int err;
 
 	/* TODO: We're not aware of mappable constraints on gen8 yet */
 	if (!IS_DGFX(i915)) {
@@ -846,12 +847,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 		ggtt->mappable_end = resource_size(&ggtt->gmadr);
 	}
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
-	if (!err)
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
-	if (err)
-		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
-
 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
 	if (IS_CHERRYVIEW(i915))
 		size = chv_get_total_gtt_size(snb_gmch_ctl);
@@ -987,7 +982,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	struct pci_dev *pdev = i915->drm.pdev;
 	unsigned int size;
 	u16 snb_gmch_ctl;
-	int err;
 
 	ggtt->gmadr = pci_resource(pdev, 2);
 	ggtt->mappable_end = resource_size(&ggtt->gmadr);
@@ -997,15 +991,11 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	 * just a coarse sanity check.
 	 */
 	if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
-		DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
+		drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
+			&ggtt->mappable_end);
 		return -ENXIO;
 	}
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
-	if (!err)
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
-	if (err)
-		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
 
 	size = gen6_get_total_gtt_size(snb_gmch_ctl);
@@ -1052,7 +1042,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
 
 	ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL);
 	if (!ret) {
-		DRM_ERROR("failed to set up gmch\n");
+		drm_err(&i915->drm, "failed to set up gmch\n");
 		return -EIO;
 	}
 
@@ -1075,7 +1065,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.vma_ops.clear_pages = clear_pages;
 
 	if (unlikely(ggtt->do_idle_maps))
-		dev_notice(i915->drm.dev,
+		drm_notice(&i915->drm,
 			   "Applying Ironlake quirks for intel_iommu\n");
 
 	return 0;
@@ -1100,26 +1090,29 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
 		return ret;
 
 	if ((ggtt->vm.total - 1) >> 32) {
-		DRM_ERROR("We never expected a Global GTT with more than 32bits"
-			  " of address space! Found %lldM!\n",
-			  ggtt->vm.total >> 20);
+		drm_err(&i915->drm,
+			"We never expected a Global GTT with more than 32bits"
+			" of address space! Found %lldM!\n",
+			ggtt->vm.total >> 20);
 		ggtt->vm.total = 1ULL << 32;
 		ggtt->mappable_end =
 			min_t(u64, ggtt->mappable_end, ggtt->vm.total);
 	}
 
 	if (ggtt->mappable_end > ggtt->vm.total) {
-		DRM_ERROR("mappable aperture extends past end of GGTT,"
-			  " aperture=%pa, total=%llx\n",
-			  &ggtt->mappable_end, ggtt->vm.total);
+		drm_err(&i915->drm,
+			"mappable aperture extends past end of GGTT,"
+			" aperture=%pa, total=%llx\n",
+			&ggtt->mappable_end, ggtt->vm.total);
 		ggtt->mappable_end = ggtt->vm.total;
 	}
 
 	/* GMADR is the PCI mmio aperture into the global GTT. */
-	DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
-	DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
-	DRM_DEBUG_DRIVER("DSM size = %lluM\n",
-			 (u64)resource_size(&intel_graphics_stolen_res) >> 20);
+	drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20);
+	drm_dbg(&i915->drm, "GMADR size = %lluM\n",
+		(u64)ggtt->mappable_end >> 20);
+	drm_dbg(&i915->drm, "DSM size = %lluM\n",
+		(u64)resource_size(&intel_graphics_stolen_res) >> 20);
 
 	return 0;
 }
@@ -1137,7 +1130,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
 		return ret;
 
 	if (intel_vtd_active())
-		dev_info(i915->drm.dev, "VT-d active for gfx access\n");
+		drm_info(&i915->drm, "VT-d active for gfx access\n");
 
 	return 0;
 }
@@ -1212,6 +1205,8 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
 
 	if (INTEL_GEN(ggtt->vm.i915) >= 8)
 		setup_private_pat(ggtt->vm.gt->uncore);
+
+	intel_ggtt_restore_fences(ggtt);
 }
 
 static struct scatterlist *
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index d152b648c73c..7fb36b12fe7a 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -68,8 +68,7 @@ static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence)
 	return fence->ggtt->vm.gt->uncore;
 }
 
-static void i965_write_fence_reg(struct i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static void i965_write_fence_reg(struct i915_fence_reg *fence)
 {
 	i915_reg_t fence_reg_lo, fence_reg_hi;
 	int fence_pitch_shift;
@@ -87,18 +86,16 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence,
 	}
 
 	val = 0;
-	if (vma) {
-		unsigned int stride = i915_gem_object_get_stride(vma->obj);
+	if (fence->tiling) {
+		unsigned int stride = fence->stride;
 
-		GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
-		GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE));
-		GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE));
 		GEM_BUG_ON(!IS_ALIGNED(stride, 128));
 
-		val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32;
-		val |= vma->node.start;
+		val = fence->start + fence->size - I965_FENCE_PAGE;
+		val <<= 32;
+		val |= fence->start;
 		val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
-		if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y)
+		if (fence->tiling == I915_TILING_Y)
 			val |= BIT(I965_FENCE_TILING_Y_SHIFT);
 		val |= I965_FENCE_REG_VALID;
 	}
@@ -125,21 +122,15 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence,
 	}
 }
 
-static void i915_write_fence_reg(struct i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static void i915_write_fence_reg(struct i915_fence_reg *fence)
 {
 	u32 val;
 
 	val = 0;
-	if (vma) {
-		unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
+	if (fence->tiling) {
+		unsigned int stride = fence->stride;
+		unsigned int tiling = fence->tiling;
 		bool is_y_tiled = tiling == I915_TILING_Y;
-		unsigned int stride = i915_gem_object_get_stride(vma->obj);
-
-		GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
-		GEM_BUG_ON(vma->node.start & ~I915_FENCE_START_MASK);
-		GEM_BUG_ON(!is_power_of_2(vma->fence_size));
-		GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size));
 
 		if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence)))
 			stride /= 128;
@@ -147,10 +138,10 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence,
 			stride /= 512;
 		GEM_BUG_ON(!is_power_of_2(stride));
 
-		val = vma->node.start;
+		val = fence->start;
 		if (is_y_tiled)
 			val |= BIT(I830_FENCE_TILING_Y_SHIFT);
-		val |= I915_FENCE_SIZE_BITS(vma->fence_size);
+		val |= I915_FENCE_SIZE_BITS(fence->size);
 		val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT;
 
 		val |= I830_FENCE_REG_VALID;
@@ -165,25 +156,18 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence,
 	}
 }
 
-static void i830_write_fence_reg(struct i915_fence_reg *fence,
-				 struct i915_vma *vma)
+static void i830_write_fence_reg(struct i915_fence_reg *fence)
 {
 	u32 val;
 
 	val = 0;
-	if (vma) {
-		unsigned int stride = i915_gem_object_get_stride(vma->obj);
-
-		GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
-		GEM_BUG_ON(vma->node.start & ~I830_FENCE_START_MASK);
-		GEM_BUG_ON(!is_power_of_2(vma->fence_size));
-		GEM_BUG_ON(!is_power_of_2(stride / 128));
-		GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size));
+	if (fence->tiling) {
+		unsigned int stride = fence->stride;
 
-		val = vma->node.start;
-		if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y)
+		val = fence->start;
+		if (fence->tiling == I915_TILING_Y)
 			val |= BIT(I830_FENCE_TILING_Y_SHIFT);
-		val |= I830_FENCE_SIZE_BITS(vma->fence_size);
+		val |= I830_FENCE_SIZE_BITS(fence->size);
 		val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT;
 		val |= I830_FENCE_REG_VALID;
 	}
@@ -197,8 +181,7 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence,
 	}
 }
 
-static void fence_write(struct i915_fence_reg *fence,
-			struct i915_vma *vma)
+static void fence_write(struct i915_fence_reg *fence)
 {
 	struct drm_i915_private *i915 = fence_to_i915(fence);
 
@@ -209,18 +192,21 @@ static void fence_write(struct i915_fence_reg *fence,
 	 */
 
 	if (IS_GEN(i915, 2))
-		i830_write_fence_reg(fence, vma);
+		i830_write_fence_reg(fence);
 	else if (IS_GEN(i915, 3))
-		i915_write_fence_reg(fence, vma);
+		i915_write_fence_reg(fence);
 	else
-		i965_write_fence_reg(fence, vma);
+		i965_write_fence_reg(fence);
 
 	/*
 	 * Access through the fenced region afterwards is
 	 * ordered by the posting reads whilst writing the registers.
 	 */
+}
 
-	fence->dirty = false;
+static bool gpu_uses_fence_registers(struct i915_fence_reg *fence)
+{
+	return INTEL_GEN(fence_to_i915(fence)) < 4;
 }
 
 static int fence_update(struct i915_fence_reg *fence,
@@ -232,27 +218,32 @@ static int fence_update(struct i915_fence_reg *fence,
 	struct i915_vma *old;
 	int ret;
 
+	fence->tiling = 0;
 	if (vma) {
+		GEM_BUG_ON(!i915_gem_object_get_stride(vma->obj) ||
+			   !i915_gem_object_get_tiling(vma->obj));
+
 		if (!i915_vma_is_map_and_fenceable(vma))
 			return -EINVAL;
 
-		if (drm_WARN(&uncore->i915->drm,
-			     !i915_gem_object_get_stride(vma->obj) ||
-			     !i915_gem_object_get_tiling(vma->obj),
-			     "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
-			     i915_gem_object_get_stride(vma->obj),
-			     i915_gem_object_get_tiling(vma->obj)))
-			return -EINVAL;
+		if (gpu_uses_fence_registers(fence)) {
+			/* implicit 'unfenced' GPU blits */
+			ret = i915_vma_sync(vma);
+			if (ret)
+				return ret;
+		}
 
-		ret = i915_vma_sync(vma);
-		if (ret)
-			return ret;
+		fence->start = vma->node.start;
+		fence->size = vma->fence_size;
+		fence->stride = i915_gem_object_get_stride(vma->obj);
+		fence->tiling = i915_gem_object_get_tiling(vma->obj);
 	}
+	WRITE_ONCE(fence->dirty, false);
 
 	old = xchg(&fence->vma, NULL);
 	if (old) {
 		/* XXX Ideally we would move the waiting to outside the mutex */
-		ret = i915_vma_sync(old);
+		ret = i915_active_wait(&fence->active);
 		if (ret) {
 			fence->vma = old;
 			return ret;
@@ -276,7 +267,7 @@ static int fence_update(struct i915_fence_reg *fence,
 	/*
 	 * We only need to update the register itself if the device is awake.
 	 * If the device is currently powered down, we will defer the write
-	 * to the runtime resume, see i915_gem_restore_fences().
+	 * to the runtime resume, see intel_ggtt_restore_fences().
 	 *
 	 * This only works for removing the fence register, on acquisition
 	 * the caller must hold the rpm wakeref. The fence register must
@@ -290,7 +281,7 @@ static int fence_update(struct i915_fence_reg *fence,
 	}
 
 	WRITE_ONCE(fence->vma, vma);
-	fence_write(fence, vma);
+	fence_write(fence);
 
 	if (vma) {
 		vma->fence = fence;
@@ -307,23 +298,26 @@ static int fence_update(struct i915_fence_reg *fence,
  *
  * This function force-removes any fence from the given object, which is useful
  * if the kernel wants to do untiled GTT access.
- *
- * Returns:
- *
- * 0 on success, negative error code on failure.
  */
-int i915_vma_revoke_fence(struct i915_vma *vma)
+void i915_vma_revoke_fence(struct i915_vma *vma)
 {
 	struct i915_fence_reg *fence = vma->fence;
+	intel_wakeref_t wakeref;
 
 	lockdep_assert_held(&vma->vm->mutex);
 	if (!fence)
-		return 0;
+		return;
 
-	if (atomic_read(&fence->pin_count))
-		return -EBUSY;
+	GEM_BUG_ON(fence->vma != vma);
+	GEM_BUG_ON(!i915_active_is_idle(&fence->active));
+	GEM_BUG_ON(atomic_read(&fence->pin_count));
 
-	return fence_update(fence, NULL);
+	fence->tiling = 0;
+	WRITE_ONCE(fence->vma, NULL);
+	vma->fence = NULL;
+
+	with_intel_runtime_pm_if_in_use(fence_to_uncore(fence)->rpm, wakeref)
+		fence_write(fence);
 }
 
 static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
@@ -487,34 +481,19 @@ void i915_unreserve_fence(struct i915_fence_reg *fence)
 }
 
 /**
- * i915_gem_restore_fences - restore fence state
+ * intel_ggtt_restore_fences - restore fence state
  * @ggtt: Global GTT
  *
  * Restore the hw fence state to match the software tracking again, to be called
  * after a gpu reset and on resume. Note that on runtime suspend we only cancel
  * the fences, to be reacquired by the user later.
  */
-void i915_gem_restore_fences(struct i915_ggtt *ggtt)
+void intel_ggtt_restore_fences(struct i915_ggtt *ggtt)
 {
 	int i;
 
-	rcu_read_lock(); /* keep obj alive as we dereference */
-	for (i = 0; i < ggtt->num_fences; i++) {
-		struct i915_fence_reg *reg = &ggtt->fence_regs[i];
-		struct i915_vma *vma = READ_ONCE(reg->vma);
-
-		GEM_BUG_ON(vma && vma->fence != reg);
-
-		/*
-		 * Commit delayed tiling changes if we have an object still
-		 * attached to the fence, otherwise just clear the fence.
-		 */
-		if (vma && !i915_gem_object_is_tiled(vma->obj))
-			vma = NULL;
-
-		fence_write(reg, vma);
-	}
-	rcu_read_unlock();
+	for (i = 0; i < ggtt->num_fences; i++)
+		fence_write(&ggtt->fence_regs[i]);
 }
 
 /**
@@ -746,7 +725,7 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
  * bit 17 of its physical address and therefore being interpreted differently
  * by the GPU.
  */
-static void i915_gem_swizzle_page(struct page *page)
+static void swizzle_page(struct page *page)
 {
 	char temp[64];
 	char *vaddr;
@@ -791,7 +770,7 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
 	for_each_sgt_page(page, sgt_iter, pages) {
 		char new_bit_17 = page_to_phys(page) >> 17;
 		if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
-			i915_gem_swizzle_page(page);
+			swizzle_page(page);
 			set_page_dirty(page);
 		}
 		i++;
@@ -836,7 +815,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
 	}
 }
 
-void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
+void intel_ggtt_init_fences(struct i915_ggtt *ggtt)
 {
 	struct drm_i915_private *i915 = ggtt->vm.i915;
 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
@@ -864,18 +843,37 @@ void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
 	if (intel_vgpu_active(i915))
 		num_fences = intel_uncore_read(uncore,
 					       vgtif_reg(avail_rs.fence_num));
+	ggtt->fence_regs = kcalloc(num_fences,
+				   sizeof(*ggtt->fence_regs),
+				   GFP_KERNEL);
+	if (!ggtt->fence_regs)
+		num_fences = 0;
 
 	/* Initialize fence registers to zero */
 	for (i = 0; i < num_fences; i++) {
 		struct i915_fence_reg *fence = &ggtt->fence_regs[i];
 
+		i915_active_init(&fence->active, NULL, NULL);
 		fence->ggtt = ggtt;
 		fence->id = i;
 		list_add_tail(&fence->link, &ggtt->fence_list);
 	}
 	ggtt->num_fences = num_fences;
 
-	i915_gem_restore_fences(ggtt);
+	intel_ggtt_restore_fences(ggtt);
+}
+
+void intel_ggtt_fini_fences(struct i915_ggtt *ggtt)
+{
+	int i;
+
+	for (i = 0; i < ggtt->num_fences; i++) {
+		struct i915_fence_reg *fence = &ggtt->fence_regs[i];
+
+		i915_active_fini(&fence->active);
+	}
+
+	kfree(ggtt->fence_regs);
 }
 
 void intel_gt_init_swizzling(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
index 7bd521cd7cd7..9eef679e1311 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
@@ -22,12 +22,14 @@
  *
  */
 
-#ifndef __I915_FENCE_REG_H__
-#define __I915_FENCE_REG_H__
+#ifndef __INTEL_GGTT_FENCING_H__
+#define __INTEL_GGTT_FENCING_H__
 
 #include <linux/list.h>
 #include <linux/types.h>
 
+#include "i915_active.h"
+
 struct drm_i915_gem_object;
 struct i915_ggtt;
 struct i915_vma;
@@ -41,6 +43,7 @@ struct i915_fence_reg {
 	struct i915_ggtt *ggtt;
 	struct i915_vma *vma;
 	atomic_t pin_count;
+	struct i915_active active;
 	int id;
 	/**
 	 * Whether the tiling parameters for the currently
@@ -51,20 +54,24 @@ struct i915_fence_reg {
 	 * command (such as BLT on gen2/3), as a "fence".
 	 */
 	bool dirty;
+	u32 start;
+	u32 size;
+	u32 tiling;
+	u32 stride;
 };
 
-/* i915_gem_fence_reg.c */
 struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt);
 void i915_unreserve_fence(struct i915_fence_reg *fence);
 
-void i915_gem_restore_fences(struct i915_ggtt *ggtt);
+void intel_ggtt_restore_fences(struct i915_ggtt *ggtt);
 
 void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
 				       struct sg_table *pages);
 void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
 					 struct sg_table *pages);
 
-void i915_ggtt_init_fences(struct i915_ggtt *ggtt);
+void intel_ggtt_init_fences(struct i915_ggtt *ggtt);
+void intel_ggtt_fini_fences(struct i915_ggtt *ggtt);
 
 void intel_gt_init_swizzling(struct intel_gt *gt);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index f04214a54f75..534e435f20bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,7 +138,7 @@
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
-#define   MI_LRI_CS_MMIO		(1<<19)
+#define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
 #define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
@@ -156,6 +156,7 @@
 #define MI_LOAD_REGISTER_MEM	   MI_INSTR(0x29, 1)
 #define MI_LOAD_REGISTER_MEM_GEN8  MI_INSTR(0x29, 2)
 #define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+#define   MI_LRR_SOURCE_CS_MMIO		REG_BIT(18)
 #define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE		(1)
 /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -235,9 +236,8 @@
 #define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
 #define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on ILK */
 #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
-#define   PIPE_CONTROL_L3_RO_CACHE_INVALIDATE		REG_BIT(10) /* gen12 */
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
-#define   PIPE_CONTROL_HDC_PIPELINE_FLUSH		REG_BIT(9)  /* gen12 */
+#define   PIPE_CONTROL0_HDC_PIPELINE_FLUSH		REG_BIT(9)  /* gen12 */
 #define   PIPE_CONTROL_NOTIFY				(1<<8)
 #define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7) /* gen7+ */
 #define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d09f7596cb98..f069551e412f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -7,6 +7,8 @@
 #include "i915_drv.h"
 #include "intel_context.h"
 #include "intel_gt.h"
+#include "intel_gt_buffer_pool.h"
+#include "intel_gt_clock_utils.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
 #include "intel_mocs.h"
@@ -15,6 +17,7 @@
 #include "intel_rps.h"
 #include "intel_uncore.h"
 #include "intel_pm.h"
+#include "shmem_utils.h"
 
 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 {
@@ -26,6 +29,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&gt->closed_vma);
 	spin_lock_init(&gt->closed_lock);
 
+	intel_gt_init_buffer_pool(gt);
 	intel_gt_init_reset(gt);
 	intel_gt_init_requests(gt);
 	intel_gt_init_timelines(gt);
@@ -370,18 +374,6 @@ static struct i915_address_space *kernel_vm(struct intel_gt *gt)
 		return i915_vm_get(&gt->ggtt->vm);
 }
 
-static int __intel_context_flush_retire(struct intel_context *ce)
-{
-	struct intel_timeline *tl;
-
-	tl = intel_context_timeline_lock(ce);
-	if (IS_ERR(tl))
-		return PTR_ERR(tl);
-
-	intel_context_timeline_unlock(tl);
-	return 0;
-}
-
 static int __engines_record_defaults(struct intel_gt *gt)
 {
 	struct i915_request *requests[I915_NUM_ENGINES] = {};
@@ -447,8 +439,7 @@ err_rq:
 
 	for (id = 0; id < ARRAY_SIZE(requests); id++) {
 		struct i915_request *rq;
-		struct i915_vma *state;
-		void *vaddr;
+		struct file *state;
 
 		rq = requests[id];
 		if (!rq)
@@ -460,48 +451,16 @@ err_rq:
 		}
 
 		GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
-		state = rq->context->state;
-		if (!state)
+		if (!rq->context->state)
 			continue;
 
-		/* Serialise with retirement on another CPU */
-		GEM_BUG_ON(!i915_request_completed(rq));
-		err = __intel_context_flush_retire(rq->context);
-		if (err)
-			goto out;
-
-		/* We want to be able to unbind the state from the GGTT */
-		GEM_BUG_ON(intel_context_is_pinned(rq->context));
-
-		/*
-		 * As we will hold a reference to the logical state, it will
-		 * not be torn down with the context, and importantly the
-		 * object will hold onto its vma (making it possible for a
-		 * stray GTT write to corrupt our defaults). Unmap the vma
-		 * from the GTT to prevent such accidents and reclaim the
-		 * space.
-		 */
-		err = i915_vma_unbind(state);
-		if (err)
-			goto out;
-
-		i915_gem_object_lock(state->obj);
-		err = i915_gem_object_set_to_cpu_domain(state->obj, false);
-		i915_gem_object_unlock(state->obj);
-		if (err)
-			goto out;
-
-		i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC);
-
-		/* Check we can acquire the image of the context state */
-		vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB);
-		if (IS_ERR(vaddr)) {
-			err = PTR_ERR(vaddr);
+		/* Keep a copy of the state's backing pages; free the obj */
+		state = shmem_create_from_object(rq->context->state->obj);
+		if (IS_ERR(state)) {
+			err = PTR_ERR(state);
 			goto out;
 		}
-
-		rq->engine->default_state = i915_gem_object_get(state->obj);
-		i915_gem_object_unpin_map(state->obj);
+		rq->engine->default_state = state;
 	}
 
 out:
@@ -576,6 +535,8 @@ int intel_gt_init(struct intel_gt *gt)
 	 */
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 
+	intel_gt_init_clock_frequency(gt);
+
 	err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
 	if (err)
 		goto out_fw;
@@ -635,8 +596,7 @@ void intel_gt_driver_remove(struct intel_gt *gt)
 {
 	__intel_gt_disable(gt);
 
-	intel_uc_fini_hw(&gt->uc);
-	intel_uc_fini(&gt->uc);
+	intel_uc_driver_remove(&gt->uc);
 
 	intel_engines_release(gt);
 }
@@ -663,6 +623,7 @@ void intel_gt_driver_release(struct intel_gt *gt)
 
 	intel_gt_pm_fini(gt);
 	intel_gt_fini_scratch(gt);
+	intel_gt_fini_buffer_pool(gt);
 }
 
 void intel_gt_driver_late_release(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 397186818305..1495054a4305 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2014-2018 Intel Corporation
  */
 
@@ -8,15 +7,15 @@
 
 #include "i915_drv.h"
 #include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
+#include "intel_gt_buffer_pool.h"
 
-static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool)
+static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool)
 {
-	return container_of(pool, struct intel_engine_cs, pool);
+	return container_of(pool, struct intel_gt, buffer_pool);
 }
 
 static struct list_head *
-bucket_for_size(struct intel_engine_pool *pool, size_t sz)
+bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz)
 {
 	int n;
 
@@ -32,16 +31,50 @@ bucket_for_size(struct intel_engine_pool *pool, size_t sz)
 	return &pool->cache_list[n];
 }
 
-static void node_free(struct intel_engine_pool_node *node)
+static void node_free(struct intel_gt_buffer_pool_node *node)
 {
 	i915_gem_object_put(node->obj);
 	i915_active_fini(&node->active);
 	kfree(node);
 }
 
+static void pool_free_work(struct work_struct *wrk)
+{
+	struct intel_gt_buffer_pool *pool =
+		container_of(wrk, typeof(*pool), work.work);
+	struct intel_gt_buffer_pool_node *node, *next;
+	unsigned long old = jiffies - HZ;
+	bool active = false;
+	LIST_HEAD(stale);
+	int n;
+
+	/* Free buffers that have not been used in the past second */
+	spin_lock_irq(&pool->lock);
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+		struct list_head *list = &pool->cache_list[n];
+
+		/* Most recent at head; oldest at tail */
+		list_for_each_entry_safe_reverse(node, next, list, link) {
+			if (time_before(node->age, old))
+				break;
+
+			list_move(&node->link, &stale);
+		}
+		active |= !list_empty(list);
+	}
+	spin_unlock_irq(&pool->lock);
+
+	list_for_each_entry_safe(node, next, &stale, link)
+		node_free(node);
+
+	if (active)
+		schedule_delayed_work(&pool->work,
+				      round_jiffies_up_relative(HZ));
+}
+
 static int pool_active(struct i915_active *ref)
 {
-	struct intel_engine_pool_node *node =
+	struct intel_gt_buffer_pool_node *node =
 		container_of(ref, typeof(*node), active);
 	struct dma_resv *resv = node->obj->base.resv;
 	int err;
@@ -64,29 +97,31 @@ static int pool_active(struct i915_active *ref)
 __i915_active_call
 static void pool_retire(struct i915_active *ref)
 {
-	struct intel_engine_pool_node *node =
+	struct intel_gt_buffer_pool_node *node =
 		container_of(ref, typeof(*node), active);
-	struct intel_engine_pool *pool = node->pool;
+	struct intel_gt_buffer_pool *pool = node->pool;
 	struct list_head *list = bucket_for_size(pool, node->obj->base.size);
 	unsigned long flags;
 
-	GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
-
 	i915_gem_object_unpin_pages(node->obj);
 
 	/* Return this object to the shrinker pool */
 	i915_gem_object_make_purgeable(node->obj);
 
 	spin_lock_irqsave(&pool->lock, flags);
+	node->age = jiffies;
 	list_add(&node->link, list);
 	spin_unlock_irqrestore(&pool->lock, flags);
+
+	schedule_delayed_work(&pool->work,
+			      round_jiffies_up_relative(HZ));
 }
 
-static struct intel_engine_pool_node *
-node_create(struct intel_engine_pool *pool, size_t sz)
+static struct intel_gt_buffer_pool_node *
+node_create(struct intel_gt_buffer_pool *pool, size_t sz)
 {
-	struct intel_engine_cs *engine = to_engine(pool);
-	struct intel_engine_pool_node *node;
+	struct intel_gt *gt = to_gt(pool);
+	struct intel_gt_buffer_pool_node *node;
 	struct drm_i915_gem_object *obj;
 
 	node = kmalloc(sizeof(*node),
@@ -97,7 +132,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
 	node->pool = pool;
 	i915_active_init(&node->active, pool_active, pool_retire);
 
-	obj = i915_gem_object_create_internal(engine->i915, sz);
+	obj = i915_gem_object_create_internal(gt->i915, sz);
 	if (IS_ERR(obj)) {
 		i915_active_fini(&node->active);
 		kfree(node);
@@ -110,26 +145,15 @@ node_create(struct intel_engine_pool *pool, size_t sz)
 	return node;
 }
 
-static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine)
+struct intel_gt_buffer_pool_node *
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
 {
-	if (intel_engine_is_virtual(engine))
-		engine = intel_virtual_engine_get_sibling(engine, 0);
-
-	GEM_BUG_ON(!engine);
-	return &engine->pool;
-}
-
-struct intel_engine_pool_node *
-intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
-{
-	struct intel_engine_pool *pool = lookup_pool(engine);
-	struct intel_engine_pool_node *node;
+	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
+	struct intel_gt_buffer_pool_node *node;
 	struct list_head *list;
 	unsigned long flags;
 	int ret;
 
-	GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
-
 	size = PAGE_ALIGN(size);
 	list = bucket_for_size(pool, size);
 
@@ -157,34 +181,48 @@ intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
 	return node;
 }
 
-void intel_engine_pool_init(struct intel_engine_pool *pool)
+void intel_gt_init_buffer_pool(struct intel_gt *gt)
 {
+	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
 	int n;
 
 	spin_lock_init(&pool->lock);
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
 		INIT_LIST_HEAD(&pool->cache_list[n]);
+	INIT_DELAYED_WORK(&pool->work, pool_free_work);
 }
 
-void intel_engine_pool_park(struct intel_engine_pool *pool)
+static void pool_free_imm(struct intel_gt_buffer_pool *pool)
 {
 	int n;
 
+	spin_lock_irq(&pool->lock);
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+		struct intel_gt_buffer_pool_node *node, *next;
 		struct list_head *list = &pool->cache_list[n];
-		struct intel_engine_pool_node *node, *nn;
 
-		list_for_each_entry_safe(node, nn, list, link)
+		list_for_each_entry_safe(node, next, list, link)
 			node_free(node);
-
 		INIT_LIST_HEAD(list);
 	}
+	spin_unlock_irq(&pool->lock);
+}
+
+void intel_gt_flush_buffer_pool(struct intel_gt *gt)
+{
+	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
+
+	if (cancel_delayed_work_sync(&pool->work))
+		pool_free_imm(pool);
 }
 
-void intel_engine_pool_fini(struct intel_engine_pool *pool)
+void intel_gt_fini_buffer_pool(struct intel_gt *gt)
 {
+	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
 	int n;
 
+	intel_gt_flush_buffer_pool(gt);
+
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
 		GEM_BUG_ON(!list_empty(&pool->cache_list[n]));
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
new file mode 100644
index 000000000000..42cbac003e8a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef INTEL_GT_BUFFER_POOL_H
+#define INTEL_GT_BUFFER_POOL_H
+
+#include <linux/types.h>
+
+#include "i915_active.h"
+#include "intel_gt_buffer_pool_types.h"
+
+struct intel_gt;
+struct i915_request;
+
+struct intel_gt_buffer_pool_node *
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size);
+
+static inline int
+intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node,
+				 struct i915_request *rq)
+{
+	return i915_active_add_request(&node->active, rq);
+}
+
+static inline void
+intel_gt_buffer_pool_put(struct intel_gt_buffer_pool_node *node)
+{
+	i915_active_release(&node->active);
+}
+
+void intel_gt_init_buffer_pool(struct intel_gt *gt);
+void intel_gt_flush_buffer_pool(struct intel_gt *gt);
+void intel_gt_fini_buffer_pool(struct intel_gt *gt);
+
+#endif /* INTEL_GT_BUFFER_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
index e31ee361b76f..e28bdda771ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
@@ -4,26 +4,29 @@
  * Copyright © 2014-2018 Intel Corporation
  */
 
-#ifndef INTEL_ENGINE_POOL_TYPES_H
-#define INTEL_ENGINE_POOL_TYPES_H
+#ifndef INTEL_GT_BUFFER_POOL_TYPES_H
+#define INTEL_GT_BUFFER_POOL_TYPES_H
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/workqueue.h>
 
 #include "i915_active_types.h"
 
 struct drm_i915_gem_object;
 
-struct intel_engine_pool {
+struct intel_gt_buffer_pool {
 	spinlock_t lock;
 	struct list_head cache_list[4];
+	struct delayed_work work;
 };
 
-struct intel_engine_pool_node {
+struct intel_gt_buffer_pool_node {
 	struct i915_active active;
 	struct drm_i915_gem_object *obj;
 	struct list_head link;
-	struct intel_engine_pool *pool;
+	struct intel_gt_buffer_pool *pool;
+	unsigned long age;
 };
 
-#endif /* INTEL_ENGINE_POOL_TYPES_H */
+#endif /* INTEL_GT_BUFFER_POOL_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
new file mode 100644
index 000000000000..999079686846
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
+
+#define MHZ_12   12000000 /* 12MHz (24MHz/2), 83.333ns */
+#define MHZ_12_5 12500000 /* 12.5MHz (25MHz/2), 80ns */
+#define MHZ_19_2 19200000 /* 19.2MHz, 52.083ns */
+
+static u32 read_clock_frequency(const struct intel_gt *gt)
+{
+	if (INTEL_GEN(gt->i915) >= 11) {
+		u32 config;
+
+		config = intel_uncore_read(gt->uncore, RPM_CONFIG0);
+		config &= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK;
+		config >>= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+
+		switch (config) {
+		case 0: return MHZ_12;
+		case 1:
+		case 2: return MHZ_19_2;
+		default:
+		case 3: return MHZ_12_5;
+		}
+	} else if (INTEL_GEN(gt->i915) >= 9) {
+		if (IS_GEN9_LP(gt->i915))
+			return MHZ_19_2;
+		else
+			return MHZ_12;
+	} else {
+		return MHZ_12_5;
+	}
+}
+
+void intel_gt_init_clock_frequency(struct intel_gt *gt)
+{
+	/*
+	 * Note that on gen11+, the clock frequency may be reconfigured.
+	 * We do not, and we assume nobody else does.
+	 */
+	gt->clock_frequency = read_clock_frequency(gt);
+	GT_TRACE(gt,
+		 "Using clock frequency: %dkHz\n",
+		 gt->clock_frequency / 1000);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+void intel_gt_check_clock_frequency(const struct intel_gt *gt)
+{
+	if (gt->clock_frequency != read_clock_frequency(gt)) {
+		dev_err(gt->i915->drm.dev,
+			"GT clock frequency changed, was %uHz, now %uHz!\n",
+			gt->clock_frequency,
+			read_clock_frequency(gt));
+	}
+}
+#endif
+
+static u64 div_u64_roundup(u64 nom, u32 den)
+{
+	return div_u64(nom + den - 1, den);
+}
+
+u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count)
+{
+	return div_u64_roundup(mul_u32_u32(count, 1000 * 1000 * 1000),
+			       gt->clock_frequency);
+}
+
+u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count)
+{
+	return intel_gt_clock_interval_to_ns(gt, 16 * count);
+}
+
+u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns)
+{
+	return div_u64_roundup(mul_u32_u32(gt->clock_frequency, ns),
+			       1000 * 1000 * 1000);
+}
+
+u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns)
+{
+	u32 val;
+
+	/*
+	 * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS
+	 * 8300) freezing up around GPU hangs. Looks as if even
+	 * scheduling/timer interrupts start misbehaving if the RPS
+	 * EI/thresholds are "bad", leading to a very sluggish or even
+	 * frozen machine.
+	 */
+	val = DIV_ROUND_UP(intel_gt_ns_to_clock_interval(gt, ns), 16);
+	if (IS_GEN(gt->i915, 6))
+		val = roundup(val, 25);
+
+	return val;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h
new file mode 100644
index 000000000000..f793c89f2cbd
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CLOCK_UTILS_H__
+#define __INTEL_GT_CLOCK_UTILS_H__
+
+#include <linux/types.h>
+
+struct intel_gt;
+
+void intel_gt_init_clock_frequency(struct intel_gt *gt);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+void intel_gt_check_clock_frequency(const struct intel_gt *gt);
+#else
+static inline void intel_gt_check_clock_frequency(const struct intel_gt *gt) {}
+#endif
+
+u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count);
+u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count);
+
+u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns);
+u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns);
+
+#endif /* __INTEL_GT_CLOCK_UTILS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 8b653c0f5e5f..6bdb434a442d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -12,6 +12,7 @@
 #include "intel_context.h"
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
 #include "intel_llc.h"
@@ -138,6 +139,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 
+	intel_gt_check_clock_frequency(gt);
+
 	/*
 	 * As we have just resumed the machine and woken the device up from
 	 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
@@ -155,6 +158,10 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
 
 	intel_uc_reset_prepare(&gt->uc);
 
+	for_each_engine(engine, gt, id)
+		if (engine->sanitize)
+			engine->sanitize(engine);
+
 	if (reset_engines(gt) || force) {
 		for_each_engine(engine, gt, id)
 			__intel_engine_reset(engine, false);
@@ -164,6 +171,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
 		if (engine->reset.finish)
 			engine->reset.finish(engine);
 
+	intel_rps_sanitize(&gt->rps);
+
 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
 }
@@ -191,11 +200,12 @@ int intel_gt_resume(struct intel_gt *gt)
 	 * Only the kernel contexts should remain pinned over suspend,
 	 * allowing us to fixup the user contexts on their first pin.
 	 */
+	gt_sanitize(gt, true);
+
 	intel_gt_pm_get(gt);
 
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 	intel_rc6_sanitize(&gt->rc6);
-	gt_sanitize(gt, true);
 	if (intel_gt_is_wedged(gt)) {
 		err = -EIO;
 		goto out_fw;
@@ -204,7 +214,7 @@ int intel_gt_resume(struct intel_gt *gt)
 	/* Only when the HW is re-initialised, can we replay the requests */
 	err = intel_gt_init_hw(gt);
 	if (err) {
-		dev_err(gt->i915->drm.dev,
+		drm_err(&gt->i915->drm,
 			"Failed to initialize GPU, declaring it wedged!\n");
 		goto err_wedged;
 	}
@@ -220,7 +230,7 @@ int intel_gt_resume(struct intel_gt *gt)
 
 		intel_engine_pm_put(engine);
 		if (err) {
-			dev_err(gt->i915->drm.dev,
+			drm_err(&gt->i915->drm,
 				"Failed to restart %s (%d)\n",
 				engine->name, err);
 			goto err_wedged;
@@ -324,6 +334,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
 {
 	GT_TRACE(gt, "\n");
 	intel_gt_init_swizzling(gt);
+	intel_ggtt_restore_fences(gt->ggtt);
 
 	return intel_uc_runtime_resume(&gt->uc);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 24c99d0838af..16ff47c83bd5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -26,6 +26,11 @@ static bool retire_requests(struct intel_timeline *tl)
 	return !i915_active_fence_isset(&tl->last_request);
 }
 
+static bool engine_active(const struct intel_engine_cs *engine)
+{
+	return !list_empty(&engine->kernel_context->timeline->requests);
+}
+
 static bool flush_submission(struct intel_gt *gt)
 {
 	struct intel_engine_cs *engine;
@@ -37,8 +42,13 @@ static bool flush_submission(struct intel_gt *gt)
 
 	for_each_engine(engine, gt, id) {
 		intel_engine_flush_submission(engine);
-		active |= flush_work(&engine->retire_work);
-		active |= flush_work(&engine->wakeref.work);
+
+		/* Flush the background retirement and idle barriers */
+		flush_work(&engine->retire_work);
+		flush_delayed_work(&engine->wakeref.work);
+
+		/* Is the idle barrier still outstanding? */
+		active |= engine_active(engine);
 	}
 
 	return active;
@@ -162,7 +172,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
 			}
 		}
 
-		if (!retire_requests(tl) || flush_submission(gt))
+		if (!retire_requests(tl))
 			active_count++;
 		mutex_unlock(&tl->mutex);
 
@@ -173,7 +183,6 @@ out_active:	spin_lock(&timelines->lock);
 		if (atomic_dec_and_test(&tl->active_count))
 			list_del(&tl->link);
 
-
 		/* Defer the final release to after the spinlock */
 		if (refcount_dec_and_test(&tl->kref.refcount)) {
 			GEM_BUG_ON(atomic_read(&tl->active_count));
@@ -185,6 +194,9 @@ out_active:	spin_lock(&timelines->lock);
 	list_for_each_entry_safe(tl, tn, &free, link)
 		__intel_timeline_free(&tl->kref);
 
+	if (flush_submission(gt)) /* Wait, there's more! */
+		active_count++;
+
 	return active_count ? timeout : 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 96890dd12b5f..0cc1d6b185dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -17,6 +17,7 @@
 
 #include "i915_vma.h"
 #include "intel_engine_types.h"
+#include "intel_gt_buffer_pool_types.h"
 #include "intel_llc_types.h"
 #include "intel_reset_types.h"
 #include "intel_rc6_types.h"
@@ -61,6 +62,7 @@ struct intel_gt {
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
 
+	ktime_t last_init_time;
 	struct intel_reset reset;
 
 	/**
@@ -72,14 +74,12 @@ struct intel_gt {
 	 */
 	intel_wakeref_t awake;
 
+	u32 clock_frequency;
+
 	struct intel_llc llc;
 	struct intel_rc6 rc6;
 	struct intel_rps rps;
 
-	ktime_t last_init_time;
-
-	struct i915_vma *scratch;
-
 	spinlock_t irq_lock;
 	u32 gt_imr;
 	u32 pm_ier;
@@ -97,6 +97,18 @@ struct intel_gt {
 	 * Reserved for exclusive use by the kernel.
 	 */
 	struct i915_address_space *vm;
+
+	/*
+	 * A pool of objects to use as shadow copies of client batch buffers
+	 * when the command parser is enabled. Prevents the client from
+	 * modifying the batch contents after software parsing.
+	 *
+	 * Buffers older than 1s are periodically reaped from the pool,
+	 * or may be reclaimed by the shrinker before then.
+	 */
+	struct intel_gt_buffer_pool buffer_pool;
+
+	struct i915_vma *scratch;
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index b3116fe8d180..d93ebdf3fa0e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -26,7 +26,6 @@
 #include <drm/drm_mm.h>
 
 #include "gt/intel_reset.h"
-#include "i915_gem_fence_reg.h"
 #include "i915_selftest.h"
 #include "i915_vma_types.h"
 
@@ -135,6 +134,8 @@ typedef u64 gen8_pte_t;
 #define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
+struct i915_fence_reg;
+
 #define for_each_sgt_daddr(__dp, __iter, __sgt) \
 	__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
 
@@ -333,7 +334,7 @@ struct i915_ggtt {
 	u32 pin_bias;
 
 	unsigned int num_fences;
-	struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
+	struct i915_fence_reg *fence_regs;
 	struct list_head fence_list;
 
 	/**
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 2dfaddb8811e..87e6c5bdd2dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -147,6 +147,7 @@
 #include "intel_reset.h"
 #include "intel_ring.h"
 #include "intel_workarounds.h"
+#include "shmem_utils.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
 #define RING_EXECLIST1_VALID		(1 << 0x3)
@@ -216,7 +217,7 @@ struct virtual_engine {
 
 	/* And finally, which physical engines this virtual engine maps onto. */
 	unsigned int num_siblings;
-	struct intel_engine_cs *siblings[0];
+	struct intel_engine_cs *siblings[];
 };
 
 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
@@ -238,6 +239,123 @@ __execlists_update_reg_state(const struct intel_context *ce,
 			     const struct intel_engine_cs *engine,
 			     u32 head);
 
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+	if (INTEL_GEN(engine->i915) >= 12)
+		return 0x60;
+	else if (INTEL_GEN(engine->i915) >= 9)
+		return 0x54;
+	else if (engine->class == RENDER_CLASS)
+		return 0x58;
+	else
+		return -1;
+}
+
+static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
+{
+	if (INTEL_GEN(engine->i915) >= 12)
+		return 0x74;
+	else if (INTEL_GEN(engine->i915) >= 9)
+		return 0x68;
+	else if (engine->class == RENDER_CLASS)
+		return 0xd8;
+	else
+		return -1;
+}
+
+static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
+{
+	if (INTEL_GEN(engine->i915) >= 12)
+		return 0x12;
+	else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
+		return 0x18;
+	else
+		return -1;
+}
+
+static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
+{
+	int x;
+
+	x = lrc_ring_wa_bb_per_ctx(engine);
+	if (x < 0)
+		return x;
+
+	return x + 2;
+}
+
+static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
+{
+	int x;
+
+	x = lrc_ring_indirect_ptr(engine);
+	if (x < 0)
+		return x;
+
+	return x + 2;
+}
+
+static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
+{
+	if (engine->class != RENDER_CLASS)
+		return -1;
+
+	if (INTEL_GEN(engine->i915) >= 12)
+		return 0xb6;
+	else if (INTEL_GEN(engine->i915) >= 11)
+		return 0xaa;
+	else
+		return -1;
+}
+
+static u32
+lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
+{
+	switch (INTEL_GEN(engine->i915)) {
+	default:
+		MISSING_CASE(INTEL_GEN(engine->i915));
+		fallthrough;
+	case 12:
+		return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+	case 11:
+		return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+	case 10:
+		return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+	case 9:
+		return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+	case 8:
+		return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+	}
+}
+
+static void
+lrc_ring_setup_indirect_ctx(u32 *regs,
+			    const struct intel_engine_cs *engine,
+			    u32 ctx_bb_ggtt_addr,
+			    u32 size)
+{
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
+	GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
+	regs[lrc_ring_indirect_ptr(engine) + 1] =
+		ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
+
+	GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
+	regs[lrc_ring_indirect_offset(engine) + 1] =
+		lrc_ring_indirect_offset_default(engine) << 6;
+}
+
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+	/*
+	 * We can use either ppHWSP[16] which is recorded before the context
+	 * switch (and so excludes the cost of context switches) or use the
+	 * value from the context image itself, which is saved/restored earlier
+	 * and so includes the cost of the save.
+	 */
+	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
 static void mark_eio(struct i915_request *rq)
 {
 	if (i915_request_completed(rq))
@@ -311,18 +429,7 @@ static int effective_prio(const struct i915_request *rq)
 	if (i915_request_has_nopreempt(rq))
 		prio = I915_PRIORITY_UNPREEMPTABLE;
 
-	/*
-	 * On unwinding the active request, we give it a priority bump
-	 * if it has completed waiting on any semaphore. If we know that
-	 * the request has already started, we can prevent an unwanted
-	 * preempt-to-idle cycle by taking that into account now.
-	 */
-	if (__i915_request_has_started(rq))
-		prio |= I915_PRIORITY_NOSEMAPHORE;
-
-	/* Restrict mere WAIT boosts from triggering preemption */
-	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
-	return prio | __NO_PREEMPTION;
+	return prio;
 }
 
 static int queue_prio(const struct intel_engine_execlists *execlists)
@@ -489,7 +596,7 @@ static void set_offsets(u32 *regs,
 #define REG16(x) \
 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 	(((x) >> 2) & 0x7f)
-#define END(x) 0, (x)
+#define END(total_state_size) 0, (total_state_size)
 {
 	const u32 base = engine->mmio_base;
 
@@ -512,7 +619,7 @@ static void set_offsets(u32 *regs,
 		if (flags & POSTED)
 			*regs |= MI_LRI_FORCE_POSTED;
 		if (INTEL_GEN(engine->i915) >= 11)
-			*regs |= MI_LRI_CS_MMIO;
+			*regs |= MI_LRI_LRM_CS_MMIO;
 		regs++;
 
 		GEM_BUG_ON(!count);
@@ -897,8 +1004,63 @@ static const u8 gen12_rcs_offsets[] = {
 	NOP(6),
 	LRI(1, 0),
 	REG(0x0c8),
+	NOP(3 + 9 + 1),
+
+	LRI(51, POSTED),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG(0x028),
+	REG(0x09c),
+	REG(0x0c0),
+	REG(0x178),
+	REG(0x17c),
+	REG16(0x358),
+	REG(0x170),
+	REG(0x150),
+	REG(0x154),
+	REG(0x158),
+	REG16(0x41c),
+	REG16(0x600),
+	REG16(0x604),
+	REG16(0x608),
+	REG16(0x60c),
+	REG16(0x610),
+	REG16(0x614),
+	REG16(0x618),
+	REG16(0x61c),
+	REG16(0x620),
+	REG16(0x624),
+	REG16(0x628),
+	REG16(0x62c),
+	REG16(0x630),
+	REG16(0x634),
+	REG16(0x638),
+	REG16(0x63c),
+	REG16(0x640),
+	REG16(0x644),
+	REG16(0x648),
+	REG16(0x64c),
+	REG16(0x650),
+	REG16(0x654),
+	REG16(0x658),
+	REG16(0x65c),
+	REG16(0x660),
+	REG16(0x664),
+	REG16(0x668),
+	REG16(0x66c),
+	REG16(0x670),
+	REG16(0x674),
+	REG16(0x678),
+	REG16(0x67c),
+	REG(0x068),
+	REG(0x084),
+	NOP(1),
 
-	END(80)
+	END(192)
 };
 
 #undef END
@@ -1026,17 +1188,14 @@ static void intel_engine_context_in(struct intel_engine_cs *engine)
 {
 	unsigned long flags;
 
-	if (READ_ONCE(engine->stats.enabled) == 0)
+	if (atomic_add_unless(&engine->stats.active, 1, 0))
 		return;
 
 	write_seqlock_irqsave(&engine->stats.lock, flags);
-
-	if (engine->stats.enabled > 0) {
-		if (engine->stats.active++ == 0)
-			engine->stats.start = ktime_get();
-		GEM_BUG_ON(engine->stats.active == 0);
+	if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
+		engine->stats.start = ktime_get();
+		atomic_inc(&engine->stats.active);
 	}
-
 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }
 
@@ -1044,51 +1203,20 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
 {
 	unsigned long flags;
 
-	if (READ_ONCE(engine->stats.enabled) == 0)
+	GEM_BUG_ON(!atomic_read(&engine->stats.active));
+
+	if (atomic_add_unless(&engine->stats.active, -1, 1))
 		return;
 
 	write_seqlock_irqsave(&engine->stats.lock, flags);
-
-	if (engine->stats.enabled > 0) {
-		ktime_t last;
-
-		if (engine->stats.active && --engine->stats.active == 0) {
-			/*
-			 * Decrement the active context count and in case GPU
-			 * is now idle add up to the running total.
-			 */
-			last = ktime_sub(ktime_get(), engine->stats.start);
-
-			engine->stats.total = ktime_add(engine->stats.total,
-							last);
-		} else if (engine->stats.active == 0) {
-			/*
-			 * After turning on engine stats, context out might be
-			 * the first event in which case we account from the
-			 * time stats gathering was turned on.
-			 */
-			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
-
-			engine->stats.total = ktime_add(engine->stats.total,
-							last);
-		}
+	if (atomic_dec_and_test(&engine->stats.active)) {
+		engine->stats.total =
+			ktime_add(engine->stats.total,
+				  ktime_sub(ktime_get(), engine->stats.start));
 	}
-
 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }
 
-static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
-{
-	if (INTEL_GEN(engine->i915) >= 12)
-		return 0x60;
-	else if (INTEL_GEN(engine->i915) >= 9)
-		return 0x54;
-	else if (engine->class == RENDER_CLASS)
-		return 0x58;
-	else
-		return -1;
-}
-
 static void
 execlists_check_context(const struct intel_context *ce,
 			const struct intel_engine_cs *engine)
@@ -1132,14 +1260,12 @@ execlists_check_context(const struct intel_context *ce,
 static void restore_default_state(struct intel_context *ce,
 				  struct intel_engine_cs *engine)
 {
-	u32 *regs = ce->lrc_reg_state;
+	u32 *regs;
 
-	if (engine->pinned_default_state)
-		memcpy(regs, /* skip restoring the vanilla PPHWSP */
-		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
-		       engine->context_size - PAGE_SIZE);
+	regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
+	execlists_init_reg_state(regs, ce, engine, ce->ring, true);
 
-	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+	ce->runtime.last = intel_context_get_runtime(ce);
 }
 
 static void reset_active(struct i915_request *rq,
@@ -1181,17 +1307,6 @@ static void reset_active(struct i915_request *rq,
 	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
 }
 
-static u32 intel_context_get_runtime(const struct intel_context *ce)
-{
-	/*
-	 * We can use either ppHWSP[16] which is recorded before the context
-	 * switch (and so excludes the cost of context switches) or use the
-	 * value from the context image itself, which is saved/restored earlier
-	 * and so includes the cost of the save.
-	 */
-	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
-}
-
 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
 {
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -1243,7 +1358,7 @@ __execlists_schedule_in(struct i915_request *rq)
 		ce->lrc.ccid = ce->tag;
 	} else {
 		/* We don't need a strict matching tag, just different values */
-		unsigned int tag = ffs(engine->context_tag);
+		unsigned int tag = ffs(READ_ONCE(engine->context_tag));
 
 		GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
 		clear_bit(tag - 1, &engine->context_tag);
@@ -1417,6 +1532,24 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
 	}
 }
 
+static __maybe_unused char *
+dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
+{
+	if (!rq)
+		return "";
+
+	snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
+		 prefix,
+		 rq->context->lrc.ccid,
+		 rq->fence.context, rq->fence.seqno,
+		 i915_request_completed(rq) ? "!" :
+		 i915_request_started(rq) ? "*" :
+		 "",
+		 rq_prio(rq));
+
+	return buf;
+}
+
 static __maybe_unused void
 trace_ports(const struct intel_engine_execlists *execlists,
 	    const char *msg,
@@ -1424,18 +1557,14 @@ trace_ports(const struct intel_engine_execlists *execlists,
 {
 	const struct intel_engine_cs *engine =
 		container_of(execlists, typeof(*engine), execlists);
+	char __maybe_unused p0[40], p1[40];
 
 	if (!ports[0])
 		return;
 
-	ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
-		     ports[0]->fence.context,
-		     ports[0]->fence.seqno,
-		     i915_request_completed(ports[0]) ? "!" :
-		     i915_request_started(ports[0]) ? "*" :
-		     "",
-		     ports[1] ? ports[1]->fence.context : 0,
-		     ports[1] ? ports[1]->fence.seqno : 0);
+	ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
+		     dump_port(p0, sizeof(p0), "", ports[0]),
+		     dump_port(p1, sizeof(p1), ", ", ports[1]));
 }
 
 static inline bool
@@ -1448,9 +1577,12 @@ static __maybe_unused bool
 assert_pending_valid(const struct intel_engine_execlists *execlists,
 		     const char *msg)
 {
+	struct intel_engine_cs *engine =
+		container_of(execlists, typeof(*engine), execlists);
 	struct i915_request * const *port, *rq;
 	struct intel_context *ce = NULL;
 	bool sentinel = false;
+	u32 ccid = -1;
 
 	trace_ports(execlists, msg, execlists->pending);
 
@@ -1459,13 +1591,14 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 		return true;
 
 	if (!execlists->pending[0]) {
-		GEM_TRACE_ERR("Nothing pending for promotion!\n");
+		GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
+			      engine->name);
 		return false;
 	}
 
 	if (execlists->pending[execlists_num_ports(execlists)]) {
-		GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
-			      execlists_num_ports(execlists));
+		GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
+			      engine->name, execlists_num_ports(execlists));
 		return false;
 	}
 
@@ -1477,20 +1610,31 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 		GEM_BUG_ON(!i915_request_is_active(rq));
 
 		if (ce == rq->context) {
-			GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
+			GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
+				      engine->name,
 				      ce->timeline->fence_context,
 				      port - execlists->pending);
 			return false;
 		}
 		ce = rq->context;
 
+		if (ccid == ce->lrc.ccid) {
+			GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
+				      engine->name,
+				      ccid, ce->timeline->fence_context,
+				      port - execlists->pending);
+			return false;
+		}
+		ccid = ce->lrc.ccid;
+
 		/*
 		 * Sentinels are supposed to be lonely so they flush the
 		 * current exection off the HW. Check that they are the
 		 * only request in the pending submission.
 		 */
 		if (sentinel) {
-			GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n",
+			GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
+				      engine->name,
 				      ce->timeline->fence_context,
 				      port - execlists->pending);
 			return false;
@@ -1498,7 +1642,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 
 		sentinel = i915_request_has_sentinel(rq);
 		if (sentinel && port != execlists->pending) {
-			GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n",
+			GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n",
+				      engine->name,
 				      ce->timeline->fence_context,
 				      port - execlists->pending);
 			return false;
@@ -1513,7 +1658,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 
 		if (i915_active_is_idle(&ce->active) &&
 		    !intel_context_is_barrier(ce)) {
-			GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
+			GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
+				      engine->name,
 				      ce->timeline->fence_context,
 				      port - execlists->pending);
 			ok = false;
@@ -1521,7 +1667,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 		}
 
 		if (!i915_vma_is_pinned(ce->state)) {
-			GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
+			GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
+				      engine->name,
 				      ce->timeline->fence_context,
 				      port - execlists->pending);
 			ok = false;
@@ -1529,7 +1676,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 		}
 
 		if (!i915_vma_is_pinned(ce->ring->vma)) {
-			GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
+			GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
+				      engine->name,
 				      ce->timeline->fence_context,
 				      port - execlists->pending);
 			ok = false;
@@ -1664,30 +1812,16 @@ static bool virtual_matches(const struct virtual_engine *ve,
 	return true;
 }
 
-static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
-				     struct i915_request *rq)
+static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
 {
-	struct intel_engine_cs *old = ve->siblings[0];
-
-	/* All unattached (rq->engine == old) must already be completed */
-
-	spin_lock(&old->breadcrumbs.irq_lock);
-	if (!list_empty(&ve->context.signal_link)) {
-		list_del_init(&ve->context.signal_link);
-
-		/*
-		 * We cannot acquire the new engine->breadcrumbs.irq_lock
-		 * (as we are holding a breadcrumbs.irq_lock already),
-		 * so attach this request to the signaler on submission.
-		 * The queued irq_work will occur when we finally drop
-		 * the engine->active.lock after dequeue.
-		 */
-		set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags);
-
-		/* Also transfer the pending irq_work for the old breadcrumb. */
-		intel_engine_signal_breadcrumbs(rq->engine);
-	}
-	spin_unlock(&old->breadcrumbs.irq_lock);
+	/*
+	 * All the outstanding signals on ve->siblings[0] must have
+	 * been completed, just pending the interrupt handler. As those
+	 * signals still refer to the old sibling (via rq->engine), we must
+	 * transfer those to the old irq_worker to keep our locking
+	 * consistent.
+	 */
+	intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
 }
 
 #define for_each_waiter(p__, rq__) \
@@ -1729,7 +1863,8 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
 				continue;
 
 			/* No waiter should start before its signaler */
-			GEM_BUG_ON(i915_request_started(w) &&
+			GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
+				   i915_request_started(w) &&
 				   !i915_request_completed(rq));
 
 			GEM_BUG_ON(i915_request_is_active(w));
@@ -1831,16 +1966,25 @@ static unsigned long active_timeslice(const struct intel_engine_cs *engine)
 
 static void set_timeslice(struct intel_engine_cs *engine)
 {
+	unsigned long duration;
+
 	if (!intel_engine_has_timeslices(engine))
 		return;
 
-	set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
+	duration = active_timeslice(engine);
+	ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
+
+	set_timer_ms(&engine->execlists.timer, duration);
 }
 
 static void start_timeslice(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	int prio = queue_prio(execlists);
+	const int prio = queue_prio(execlists);
+	unsigned long duration;
+
+	if (!intel_engine_has_timeslices(engine))
+		return;
 
 	WRITE_ONCE(execlists->switch_priority_hint, prio);
 	if (prio == INT_MIN)
@@ -1849,7 +1993,12 @@ static void start_timeslice(struct intel_engine_cs *engine)
 	if (timer_pending(&execlists->timer))
 		return;
 
-	set_timer_ms(&execlists->timer, timeslice(engine));
+	duration = timeslice(engine);
+	ENGINE_TRACE(engine,
+		     "start timeslicing, prio:%d, interval:%lu",
+		     prio, duration);
+
+	set_timer_ms(&execlists->timer, duration);
 }
 
 static void record_preemption(struct intel_engine_execlists *execlists)
@@ -1946,11 +2095,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * of trouble.
 	 */
 	active = READ_ONCE(execlists->active);
-	while ((last = *active) && i915_request_completed(last))
-		active++;
 
-	if (last) {
+	/*
+	 * In theory we can skip over completed contexts that have not
+	 * yet been processed by events (as those events are in flight):
+	 *
+	 * while ((last = *active) && i915_request_completed(last))
+	 *	active++;
+	 *
+	 * However, the GPU cannot handle this as it will ultimately
+	 * find itself trying to jump back into a context it has just
+	 * completed and barf.
+	 */
+
+	if ((last = *active)) {
 		if (need_preempt(engine, last, rb)) {
+			if (i915_request_completed(last)) {
+				tasklet_hi_schedule(&execlists->tasklet);
+				return;
+			}
+
 			ENGINE_TRACE(engine,
 				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
 				     last->fence.context,
@@ -1978,6 +2142,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			last = NULL;
 		} else if (need_timeslice(engine, last) &&
 			   timeslice_expired(execlists, last)) {
+			if (i915_request_completed(last)) {
+				tasklet_hi_schedule(&execlists->tasklet);
+				return;
+			}
+
 			ENGINE_TRACE(engine,
 				     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
 				     last->fence.context,
@@ -2087,7 +2256,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 									engine);
 
 				if (!list_empty(&ve->context.signals))
-					virtual_xfer_breadcrumbs(ve, rq);
+					virtual_xfer_breadcrumbs(ve);
 
 				/*
 				 * Move the bound engine to the top of the list
@@ -2246,8 +2415,8 @@ done:
 		clear_ports(port + 1, last_port - port);
 
 		WRITE_ONCE(execlists->yield, -1);
-		execlists_submit_ports(engine);
 		set_preempt_timeout(engine, *active);
+		execlists_submit_ports(engine);
 	} else {
 skip_submit:
 		ring_set_paused(engine, 0);
@@ -2417,8 +2586,6 @@ static void process_csb(struct intel_engine_cs *engine)
 		if (promote) {
 			struct i915_request * const *old = execlists->active;
 
-			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
-
 			ring_set_paused(engine, 0);
 
 			/* Point active to the new ELSP; prevent overwriting */
@@ -2431,6 +2598,7 @@ static void process_csb(struct intel_engine_cs *engine)
 				execlists_schedule_out(*old++);
 
 			/* switch pending to inflight */
+			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
 			memcpy(execlists->inflight,
 			       execlists->pending,
 			       execlists_num_ports(execlists) *
@@ -2449,17 +2617,21 @@ static void process_csb(struct intel_engine_cs *engine)
 			 * We rely on the hardware being strongly
 			 * ordered, that the breadcrumb write is
 			 * coherent (visible from the CPU) before the
-			 * user interrupt and CSB is processed.
+			 * user interrupt is processed. One might assume
+			 * that the breadcrumb write being before the
+			 * user interrupt and the CS event for the context
+			 * switch would therefore be before the CS event
+			 * itself...
 			 */
 			if (GEM_SHOW_DEBUG() &&
-			    !i915_request_completed(*execlists->active) &&
-			    !reset_in_progress(execlists)) {
-				struct i915_request *rq __maybe_unused =
-					*execlists->active;
+			    !i915_request_completed(*execlists->active)) {
+				struct i915_request *rq = *execlists->active;
 				const u32 *regs __maybe_unused =
 					rq->context->lrc_reg_state;
 
 				ENGINE_TRACE(engine,
+					     "context completed before request!\n");
+				ENGINE_TRACE(engine,
 					     "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
 					     ENGINE_READ(engine, RING_START),
 					     ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
@@ -2478,8 +2650,6 @@ static void process_csb(struct intel_engine_cs *engine)
 					     regs[CTX_RING_START],
 					     regs[CTX_RING_HEAD],
 					     regs[CTX_RING_TAIL]);
-
-				GEM_BUG_ON("context completed before request");
 			}
 
 			execlists_schedule_out(*execlists->active++);
@@ -2769,6 +2939,45 @@ err_cap:
 	return NULL;
 }
 
+static struct i915_request *
+active_context(struct intel_engine_cs *engine, u32 ccid)
+{
+	const struct intel_engine_execlists * const el = &engine->execlists;
+	struct i915_request * const *port, *rq;
+
+	/*
+	 * Use the most recent result from process_csb(), but just in case
+	 * we trigger an error (via interrupt) before the first CS event has
+	 * been written, peek at the next submission.
+	 */
+
+	for (port = el->active; (rq = *port); port++) {
+		if (rq->context->lrc.ccid == ccid) {
+			ENGINE_TRACE(engine,
+				     "ccid found at active:%zd\n",
+				     port - el->active);
+			return rq;
+		}
+	}
+
+	for (port = el->pending; (rq = *port); port++) {
+		if (rq->context->lrc.ccid == ccid) {
+			ENGINE_TRACE(engine,
+				     "ccid found at pending:%zd\n",
+				     port - el->pending);
+			return rq;
+		}
+	}
+
+	ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
+	return NULL;
+}
+
+static u32 active_ccid(struct intel_engine_cs *engine)
+{
+	return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
+}
+
 static bool execlists_capture(struct intel_engine_cs *engine)
 {
 	struct execlists_capture *cap;
@@ -2786,7 +2995,7 @@ static bool execlists_capture(struct intel_engine_cs *engine)
 		return true;
 
 	spin_lock_irq(&engine->active.lock);
-	cap->rq = execlists_active(&engine->execlists);
+	cap->rq = active_context(engine, active_ccid(engine));
 	if (cap->rq) {
 		cap->rq = active_request(cap->rq->context->timeline, cap->rq);
 		cap->rq = i915_request_get_rcu(cap->rq);
@@ -2934,10 +3143,14 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 	if (reset_in_progress(execlists))
 		return; /* defer until we restart the engine following reset */
 
-	if (execlists->tasklet.func == execlists_submission_tasklet)
-		__execlists_submission_tasklet(engine);
-	else
-		tasklet_hi_schedule(&execlists->tasklet);
+	/* Hopefully we clear execlists->pending[] to let us through */
+	if (READ_ONCE(execlists->pending[0]) &&
+	    tasklet_trylock(&execlists->tasklet)) {
+		process_csb(engine);
+		tasklet_unlock(&execlists->tasklet);
+	}
+
+	__execlists_submission_tasklet(engine);
 }
 
 static void submit_queue(struct intel_engine_cs *engine,
@@ -3023,19 +3236,139 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
 	vaddr += engine->context_size;
 
 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
-		dev_err_once(engine->i915->drm.dev,
+		drm_err_once(&engine->i915->drm,
 			     "%s context redzone overwritten!\n",
 			     engine->name);
 }
 
 static void execlists_context_unpin(struct intel_context *ce)
 {
-	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
+	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
 		      ce->engine);
 
 	i915_gem_object_unpin_map(ce->state->obj);
 }
 
+static u32 *
+gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
+{
+	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+		MI_SRM_LRM_GLOBAL_GTT |
+		MI_LRI_LRM_CS_MMIO;
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+		CTX_TIMESTAMP * sizeof(u32);
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_REG |
+		MI_LRR_SOURCE_CS_MMIO |
+		MI_LRI_LRM_CS_MMIO;
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+	*cs++ = MI_LOAD_REGISTER_REG |
+		MI_LRR_SOURCE_CS_MMIO |
+		MI_LRI_LRM_CS_MMIO;
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+	return cs;
+}
+
+static u32 *
+gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
+{
+	GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
+
+	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+		MI_SRM_LRM_GLOBAL_GTT |
+		MI_LRI_LRM_CS_MMIO;
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+		(lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
+	*cs++ = 0;
+
+	return cs;
+}
+
+static u32 *
+gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
+{
+	GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
+
+	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+		MI_SRM_LRM_GLOBAL_GTT |
+		MI_LRI_LRM_CS_MMIO;
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+		(lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_REG |
+		MI_LRR_SOURCE_CS_MMIO |
+		MI_LRI_LRM_CS_MMIO;
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
+
+	return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
+{
+	cs = gen12_emit_timestamp_wa(ce, cs);
+	cs = gen12_emit_cmd_buf_wa(ce, cs);
+	cs = gen12_emit_restore_scratch(ce, cs);
+
+	return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
+{
+	cs = gen12_emit_timestamp_wa(ce, cs);
+	cs = gen12_emit_restore_scratch(ce, cs);
+
+	return cs;
+}
+
+static inline u32 context_wa_bb_offset(const struct intel_context *ce)
+{
+	return PAGE_SIZE * ce->wa_bb_page;
+}
+
+static u32 *context_indirect_bb(const struct intel_context *ce)
+{
+	void *ptr;
+
+	GEM_BUG_ON(!ce->wa_bb_page);
+
+	ptr = ce->lrc_reg_state;
+	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
+	ptr += context_wa_bb_offset(ce);
+
+	return ptr;
+}
+
+static void
+setup_indirect_ctx_bb(const struct intel_context *ce,
+		      const struct intel_engine_cs *engine,
+		      u32 *(*emit)(const struct intel_context *, u32 *))
+{
+	u32 * const start = context_indirect_bb(ce);
+	u32 *cs;
+
+	cs = emit(ce, start);
+	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
+	while ((unsigned long)cs % CACHELINE_BYTES)
+		*cs++ = MI_NOOP;
+
+	lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
+				    i915_ggtt_offset(ce->state) +
+				    context_wa_bb_offset(ce),
+				    (cs - start) * sizeof(*cs));
+}
+
 static void
 __execlists_update_reg_state(const struct intel_context *ce,
 			     const struct intel_engine_cs *engine,
@@ -3059,6 +3392,18 @@ __execlists_update_reg_state(const struct intel_context *ce,
 
 		i915_oa_init_reg_state(ce, engine);
 	}
+
+	if (ce->wa_bb_page) {
+		u32 *(*fn)(const struct intel_context *ce, u32 *cs);
+
+		fn = gen12_emit_indirect_ctx_xcs;
+		if (ce->engine->class == RENDER_CLASS)
+			fn = gen12_emit_indirect_ctx_rcs;
+
+		/* Mutually exclusive wrt to global indirect bb */
+		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
+		setup_indirect_ctx_bb(ce, engine, fn);
+	}
 }
 
 static int
@@ -3077,7 +3422,7 @@ __execlists_context_pin(struct intel_context *ce,
 		return PTR_ERR(vaddr);
 
 	ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
-	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
 
 	return 0;
@@ -3125,6 +3470,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 {
 	u32 *cs;
 
+	GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
 	if (!i915_request_timeline(rq)->has_initial_breadcrumb)
 		return 0;
 
@@ -3151,6 +3497,56 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 	/* Record the updated position of the request's payload */
 	rq->infix = intel_ring_offset(rq, cs);
 
+	__set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
+
+	return 0;
+}
+
+static int emit_pdps(struct i915_request *rq)
+{
+	const struct intel_engine_cs * const engine = rq->engine;
+	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
+	int err, i;
+	u32 *cs;
+
+	GEM_BUG_ON(intel_vgpu_active(rq->i915));
+
+	/*
+	 * Beware ye of the dragons, this sequence is magic!
+	 *
+	 * Small changes to this sequence can cause anything from
+	 * GPU hangs to forcewake errors and machine lockups!
+	 */
+
+	/* Flush any residual operations from the context load */
+	err = engine->emit_flush(rq, EMIT_FLUSH);
+	if (err)
+		return err;
+
+	/* Magic required to prevent forcewake errors! */
+	err = engine->emit_flush(rq, EMIT_INVALIDATE);
+	if (err)
+		return err;
+
+	cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/* Ensure the LRI have landed before we invalidate & continue */
+	*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+	for (i = GEN8_3LVL_PDPES; i--; ) {
+		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+		u32 base = engine->mmio_base;
+
+		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+		*cs++ = upper_32_bits(pd_daddr);
+		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+		*cs++ = lower_32_bits(pd_daddr);
+	}
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
 	return 0;
 }
 
@@ -3175,6 +3571,12 @@ static int execlists_request_alloc(struct i915_request *request)
 	 * to cancel/unwind this request now.
 	 */
 
+	if (!i915_vm_is_4lvl(request->context->vm)) {
+		ret = emit_pdps(request);
+		if (ret)
+			return ret;
+	}
+
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
 	if (ret)
@@ -3475,7 +3877,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 
 	ret = lrc_setup_wa_ctx(engine);
 	if (ret) {
-		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
+		drm_dbg(&engine->i915->drm,
+			"Failed to setup context WA page: %d\n", ret);
 		return ret;
 	}
 
@@ -3508,6 +3911,72 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	return ret;
 }
 
+static void reset_csb_pointers(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	const unsigned int reset_value = execlists->csb_size - 1;
+
+	ring_set_paused(engine, 0);
+
+	/*
+	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
+	 * Bludgeon them with a mmio update to be sure.
+	 */
+	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+		     0xffff << 16 | reset_value << 8 | reset_value);
+	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
+	/*
+	 * After a reset, the HW starts writing into CSB entry [0]. We
+	 * therefore have to set our HEAD pointer back one entry so that
+	 * the *first* entry we check is entry 0. To complicate this further,
+	 * as we don't wait for the first interrupt after reset, we have to
+	 * fake the HW write to point back to the last entry so that our
+	 * inline comparison of our cached head position against the last HW
+	 * write works even before the first interrupt.
+	 */
+	execlists->csb_head = reset_value;
+	WRITE_ONCE(*execlists->csb_write, reset_value);
+	wmb(); /* Make sure this is visible to HW (paranoia?) */
+
+	invalidate_csb_entries(&execlists->csb_status[0],
+			       &execlists->csb_status[reset_value]);
+
+	/* Once more for luck and our trusty paranoia */
+	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+		     0xffff << 16 | reset_value << 8 | reset_value);
+	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
+	GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
+}
+
+static void execlists_sanitize(struct intel_engine_cs *engine)
+{
+	/*
+	 * Poison residual state on resume, in case the suspend didn't!
+	 *
+	 * We have to assume that across suspend/resume (or other loss
+	 * of control) that the contents of our pinned buffers has been
+	 * lost, replaced by garbage. Since this doesn't always happen,
+	 * let's poison such state so that we more quickly spot when
+	 * we falsely assume it has been preserved.
+	 */
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
+
+	reset_csb_pointers(engine);
+
+	/*
+	 * The kernel_context HWSP is stored in the status_page. As above,
+	 * that may be lost on resume/initialisation, and so we need to
+	 * reset the value in the HWSP.
+	 */
+	intel_timeline_reset_seqno(engine->kernel_context->timeline);
+
+	/* And scrub the dirty cachelines for the HWSP */
+	clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+}
+
 static void enable_error_interrupt(struct intel_engine_cs *engine)
 {
 	u32 status;
@@ -3518,7 +3987,7 @@ static void enable_error_interrupt(struct intel_engine_cs *engine)
 
 	status = ENGINE_READ(engine, RING_ESR);
 	if (unlikely(status)) {
-		dev_err(engine->i915->drm.dev,
+		drm_err(&engine->i915->drm,
 			"engine '%s' resumed still in error: %08x\n",
 			engine->name, status);
 		__intel_gt_reset(engine->gt, engine->mask);
@@ -3582,7 +4051,8 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
 	bool unexpected = false;
 
 	if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
-		DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
+		drm_dbg(&engine->i915->drm,
+			"STOP_RING still set in RING_MI_MODE\n");
 		unexpected = true;
 	}
 
@@ -3642,39 +4112,10 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	 *
 	 * FIXME: Wa for more modern gens needs to be validated
 	 */
+	ring_set_paused(engine, 1);
 	intel_engine_stop_cs(engine);
-}
-
-static void reset_csb_pointers(struct intel_engine_cs *engine)
-{
-	struct intel_engine_execlists * const execlists = &engine->execlists;
-	const unsigned int reset_value = execlists->csb_size - 1;
-
-	ring_set_paused(engine, 0);
-
-	/*
-	 * After a reset, the HW starts writing into CSB entry [0]. We
-	 * therefore have to set our HEAD pointer back one entry so that
-	 * the *first* entry we check is entry 0. To complicate this further,
-	 * as we don't wait for the first interrupt after reset, we have to
-	 * fake the HW write to point back to the last entry so that our
-	 * inline comparison of our cached head position against the last HW
-	 * write works even before the first interrupt.
-	 */
-	execlists->csb_head = reset_value;
-	WRITE_ONCE(*execlists->csb_write, reset_value);
-	wmb(); /* Make sure this is visible to HW (paranoia?) */
 
-	/*
-	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
-	 * Bludgeon them with a mmio update to be sure.
-	 */
-	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
-		     reset_value << 8 | reset_value);
-	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
-
-	invalidate_csb_entries(&execlists->csb_status[0],
-			       &execlists->csb_status[reset_value]);
+	engine->execlists.reset_ccid = active_ccid(engine);
 }
 
 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
@@ -3717,7 +4158,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * its request, it was still running at the time of the
 	 * reset and will have been clobbered.
 	 */
-	rq = execlists_active(execlists);
+	rq = active_context(engine, engine->execlists.reset_ccid);
 	if (!rq)
 		goto unwind;
 
@@ -3767,8 +4208,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * image back to the expected values to skip over the guilty request.
 	 */
 	__i915_request_reset(rq, stalled);
-	if (!stalled)
-		goto out_replay;
 
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
@@ -3778,9 +4217,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	GEM_BUG_ON(!intel_context_is_pinned(ce));
-	restore_default_state(ce, engine);
-
 out_replay:
 	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
 		     head, ce->ring->tail);
@@ -4146,6 +4582,42 @@ static u32 preparser_disable(bool state)
 	return MI_ARB_CHECK | 1 << 8 | state;
 }
 
+static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+{
+	static const i915_reg_t vd[] = {
+		GEN12_VD0_AUX_NV,
+		GEN12_VD1_AUX_NV,
+		GEN12_VD2_AUX_NV,
+		GEN12_VD3_AUX_NV,
+	};
+
+	static const i915_reg_t ve[] = {
+		GEN12_VE0_AUX_NV,
+		GEN12_VE1_AUX_NV,
+	};
+
+	if (engine->class == VIDEO_DECODE_CLASS)
+		return vd[engine->instance];
+
+	if (engine->class == VIDEO_ENHANCEMENT_CLASS)
+		return ve[engine->instance];
+
+	GEM_BUG_ON("unknown aux_inv_reg\n");
+
+	return INVALID_MMIO_REG;
+}
+
+static u32 *
+gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
+{
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(inv_reg);
+	*cs++ = AUX_INV;
+	*cs++ = MI_NOOP;
+
+	return cs;
+}
+
 static int gen12_emit_flush_render(struct i915_request *request,
 				   u32 mode)
 {
@@ -4154,13 +4626,13 @@ static int gen12_emit_flush_render(struct i915_request *request,
 		u32 *cs;
 
 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_FLUSH_L3;
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 		/* Wa_1409600907:tgl */
 		flags |= PIPE_CONTROL_DEPTH_STALL;
 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
 
 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 		flags |= PIPE_CONTROL_QW_WRITE;
@@ -4171,7 +4643,9 @@ static int gen12_emit_flush_render(struct i915_request *request,
 		if (IS_ERR(cs))
 			return PTR_ERR(cs);
 
-		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+		cs = gen12_emit_pipe_control(cs,
+					     PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+					     flags, LRC_PPHWSP_SCRATCH_ADDR);
 		intel_ring_advance(request, cs);
 	}
 
@@ -4186,14 +4660,13 @@ static int gen12_emit_flush_render(struct i915_request *request,
 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
 
 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 		flags |= PIPE_CONTROL_QW_WRITE;
 
 		flags |= PIPE_CONTROL_CS_STALL;
 
-		cs = intel_ring_begin(request, 8);
+		cs = intel_ring_begin(request, 8 + 4);
 		if (IS_ERR(cs))
 			return PTR_ERR(cs);
 
@@ -4206,6 +4679,9 @@ static int gen12_emit_flush_render(struct i915_request *request,
 
 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
 
+		/* hsdes: 1809175790 */
+		cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
+
 		*cs++ = preparser_disable(false);
 		intel_ring_advance(request, cs);
 	}
@@ -4213,6 +4689,56 @@ static int gen12_emit_flush_render(struct i915_request *request,
 	return 0;
 }
 
+static int gen12_emit_flush(struct i915_request *request, u32 mode)
+{
+	intel_engine_mask_t aux_inv = 0;
+	u32 cmd, *cs;
+
+	if (mode & EMIT_INVALIDATE)
+		aux_inv = request->engine->mask & ~BIT(BCS0);
+
+	cs = intel_ring_begin(request,
+			      4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	cmd = MI_FLUSH_DW + 1;
+
+	/* We always require a command barrier so that subsequent
+	 * commands, such as breadcrumb interrupts, are strictly ordered
+	 * wrt the contents of the write cache being flushed to memory
+	 * (and thus being coherent from the CPU).
+	 */
+	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+	if (mode & EMIT_INVALIDATE) {
+		cmd |= MI_INVALIDATE_TLB;
+		if (request->engine->class == VIDEO_DECODE_CLASS)
+			cmd |= MI_INVALIDATE_BSD;
+	}
+
+	*cs++ = cmd;
+	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+	*cs++ = 0; /* upper addr */
+	*cs++ = 0; /* value */
+
+	if (aux_inv) { /* hsdes: 1809175790 */
+		struct intel_engine_cs *engine;
+		unsigned int tmp;
+
+		*cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
+		for_each_engine_masked(engine, request->engine->gt,
+				       aux_inv, tmp) {
+			*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
+			*cs++ = AUX_INV;
+		}
+		*cs++ = MI_NOOP;
+	}
+	intel_ring_advance(request, cs);
+
+	return 0;
+}
+
 /*
  * Reserve space for 2 NOOPs at the end of each request to be
  * used as a workaround for not being allowed to do lite
@@ -4242,8 +4768,7 @@ static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
 }
 
 static __always_inline u32*
-gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
-				 u32 *cs)
+gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
 {
 	*cs++ = MI_USER_INTERRUPT;
 
@@ -4257,14 +4782,16 @@ gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
 	return gen8_emit_wa_tail(request, cs);
 }
 
-static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
 {
-	cs = gen8_emit_ggtt_write(cs,
-				  request->fence.seqno,
-				  i915_request_active_timeline(request)->hwsp_offset,
-				  0);
+	u32 addr = i915_request_active_timeline(request)->hwsp_offset;
+
+	return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
+}
 
-	return gen8_emit_fini_breadcrumb_footer(request, cs);
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
 }
 
 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
@@ -4282,7 +4809,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				      PIPE_CONTROL_FLUSH_ENABLE |
 				      PIPE_CONTROL_CS_STALL);
 
-	return gen8_emit_fini_breadcrumb_footer(request, cs);
+	return gen8_emit_fini_breadcrumb_tail(request, cs);
 }
 
 static u32 *
@@ -4298,7 +4825,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
 				      PIPE_CONTROL_FLUSH_ENABLE);
 
-	return gen8_emit_fini_breadcrumb_footer(request, cs);
+	return gen8_emit_fini_breadcrumb_tail(request, cs);
 }
 
 /*
@@ -4336,7 +4863,7 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
 }
 
 static __always_inline u32*
-gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
 {
 	*cs++ = MI_USER_INTERRUPT;
 
@@ -4350,33 +4877,29 @@ gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
 	return gen8_emit_wa_tail(request, cs);
 }
 
-static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
 {
-	cs = gen8_emit_ggtt_write(cs,
-				  request->fence.seqno,
-				  i915_request_active_timeline(request)->hwsp_offset,
-				  0);
-
-	return gen12_emit_fini_breadcrumb_footer(request, cs);
+	return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
 }
 
 static u32 *
 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
-	cs = gen8_emit_ggtt_write_rcs(cs,
-				      request->fence.seqno,
-				      i915_request_active_timeline(request)->hwsp_offset,
-				      PIPE_CONTROL_CS_STALL |
-				      PIPE_CONTROL_TILE_CACHE_FLUSH |
-				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
-				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-				      /* Wa_1409600907:tgl */
-				      PIPE_CONTROL_DEPTH_STALL |
-				      PIPE_CONTROL_DC_FLUSH_ENABLE |
-				      PIPE_CONTROL_FLUSH_ENABLE |
-				      PIPE_CONTROL_HDC_PIPELINE_FLUSH);
+	cs = gen12_emit_ggtt_write_rcs(cs,
+				       request->fence.seqno,
+				       i915_request_active_timeline(request)->hwsp_offset,
+				       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+				       PIPE_CONTROL_CS_STALL |
+				       PIPE_CONTROL_TILE_CACHE_FLUSH |
+				       PIPE_CONTROL_FLUSH_L3 |
+				       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+				       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+				       /* Wa_1409600907:tgl */
+				       PIPE_CONTROL_DEPTH_STALL |
+				       PIPE_CONTROL_DC_FLUSH_ENABLE |
+				       PIPE_CONTROL_FLUSH_ENABLE);
 
-	return gen12_emit_fini_breadcrumb_footer(request, cs);
+	return gen12_emit_fini_breadcrumb_tail(request, cs);
 }
 
 static void execlists_park(struct intel_engine_cs *engine)
@@ -4428,6 +4951,8 @@ static void execlists_shutdown(struct intel_engine_cs *engine)
 
 static void execlists_release(struct intel_engine_cs *engine)
 {
+	engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
+
 	execlists_shutdown(engine);
 
 	intel_engine_cleanup_common(engine);
@@ -4447,9 +4972,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
-	if (INTEL_GEN(engine->i915) >= 12)
+	if (INTEL_GEN(engine->i915) >= 12) {
 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
-
+		engine->emit_flush = gen12_emit_flush;
+	}
 	engine->set_default_submission = intel_execlists_set_default_submission;
 
 	if (INTEL_GEN(engine->i915) < 11) {
@@ -4530,7 +5056,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 		 * because we only expect rare glitches but nothing
 		 * critical to prevent us from using GPU
 		 */
-		DRM_ERROR("WA batch buffer initialization failed\n");
+		drm_err(&i915->drm, "WA batch buffer initialization failed\n");
 
 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
 		execlists->submit_reg = uncore->regs +
@@ -4558,48 +5084,13 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 		execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
 	}
 
-	reset_csb_pointers(engine);
-
 	/* Finally, take ownership and responsibility for cleanup! */
+	engine->sanitize = execlists_sanitize;
 	engine->release = execlists_release;
 
 	return 0;
 }
 
-static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
-{
-	u32 indirect_ctx_offset;
-
-	switch (INTEL_GEN(engine->i915)) {
-	default:
-		MISSING_CASE(INTEL_GEN(engine->i915));
-		/* fall through */
-	case 12:
-		indirect_ctx_offset =
-			GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-		break;
-	case 11:
-		indirect_ctx_offset =
-			GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-		break;
-	case 10:
-		indirect_ctx_offset =
-			GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-		break;
-	case 9:
-		indirect_ctx_offset =
-			GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-		break;
-	case 8:
-		indirect_ctx_offset =
-			GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-		break;
-	}
-
-	return indirect_ctx_offset;
-}
-
-
 static void init_common_reg_state(u32 * const regs,
 				  const struct intel_engine_cs *engine,
 				  const struct intel_ring *ring,
@@ -4617,30 +5108,27 @@ static void init_common_reg_state(u32 * const regs,
 	regs[CTX_CONTEXT_CONTROL] = ctl;
 
 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+	regs[CTX_TIMESTAMP] = 0;
 }
 
 static void init_wa_bb_reg_state(u32 * const regs,
-				 const struct intel_engine_cs *engine,
-				 u32 pos_bb_per_ctx)
+				 const struct intel_engine_cs *engine)
 {
 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
 
 	if (wa_ctx->per_ctx.size) {
 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 
-		regs[pos_bb_per_ctx] =
+		GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
+		regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
 	}
 
 	if (wa_ctx->indirect_ctx.size) {
-		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
-		regs[pos_bb_per_ctx + 2] =
-			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
-			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
-
-		regs[pos_bb_per_ctx + 4] =
-			intel_lr_indirect_ctx_offset(engine) << 6;
+		lrc_ring_setup_indirect_ctx(regs, engine,
+					    i915_ggtt_offset(wa_ctx->vma) +
+					    wa_ctx->indirect_ctx.offset,
+					    wa_ctx->indirect_ctx.size);
 	}
 }
 
@@ -4689,10 +5177,7 @@ static void execlists_init_reg_state(u32 *regs,
 	init_common_reg_state(regs, engine, ring, inhibit);
 	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
 
-	init_wa_bb_reg_state(regs, engine,
-			     INTEL_GEN(engine->i915) >= 12 ?
-			     GEN12_CTX_BB_PER_CTX_PTR :
-			     CTX_BB_PER_CTX_PTR);
+	init_wa_bb_reg_state(regs, engine);
 
 	__reset_stop_ring(regs, engine);
 }
@@ -4705,29 +5190,18 @@ populate_lr_context(struct intel_context *ce,
 {
 	bool inhibit = true;
 	void *vaddr;
-	int ret;
 
 	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
-		ret = PTR_ERR(vaddr);
-		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
-		return ret;
+		drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
+		return PTR_ERR(vaddr);
 	}
 
 	set_redzone(vaddr, engine);
 
 	if (engine->default_state) {
-		void *defaults;
-
-		defaults = i915_gem_object_pin_map(engine->default_state,
-						   I915_MAP_WB);
-		if (IS_ERR(defaults)) {
-			ret = PTR_ERR(defaults);
-			goto err_unpin_ctx;
-		}
-
-		memcpy(vaddr, defaults, engine->context_size);
-		i915_gem_object_unpin_map(engine->default_state);
+		shmem_read(engine->default_state, 0,
+			   vaddr, engine->context_size);
 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
 		inhibit = false;
 	}
@@ -4739,14 +5213,12 @@ populate_lr_context(struct intel_context *ce,
 	 * The second page of the context object contains some registers which
 	 * must be set up prior to the first execution.
 	 */
-	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+	execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
 				 ce, engine, ring, inhibit);
 
-	ret = 0;
-err_unpin_ctx:
 	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
 	i915_gem_object_unpin_map(ctx_obj);
-	return ret;
+	return 0;
 }
 
 static int __execlists_context_alloc(struct intel_context *ce,
@@ -4764,6 +5236,11 @@ static int __execlists_context_alloc(struct intel_context *ce,
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
 
+	if (INTEL_GEN(engine->i915) == 12) {
+		ce->wa_bb_page = context_size / PAGE_SIZE;
+		context_size += PAGE_SIZE;
+	}
+
 	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
 	if (IS_ERR(ctx_obj))
 		return PTR_ERR(ctx_obj);
@@ -4803,7 +5280,8 @@ static int __execlists_context_alloc(struct intel_context *ce,
 
 	ret = populate_lr_context(ce, ctx_obj, engine, ring);
 	if (ret) {
-		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
+		drm_dbg(&engine->i915->drm,
+			"Failed to populate LRC: %d\n", ret);
 		goto error_ring_free;
 	}
 
@@ -4856,6 +5334,8 @@ static void virtual_context_destroy(struct kref *kref)
 		__execlists_context_fini(&ve->context);
 	intel_context_fini(&ve->context);
 
+	intel_engine_free_request_pool(&ve->base);
+
 	kfree(ve->bonds);
 	kfree(ve);
 }
@@ -4980,12 +5460,15 @@ static void virtual_submission_tasklet(unsigned long data)
 		return;
 
 	local_irq_disable();
-	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
-		struct intel_engine_cs *sibling = ve->siblings[n];
+	for (n = 0; n < ve->num_siblings; n++) {
+		struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
 		struct ve_node * const node = &ve->nodes[sibling->id];
 		struct rb_node **parent, *rb;
 		bool first;
 
+		if (!READ_ONCE(ve->request))
+			break; /* already handled by a sibling's tasklet */
+
 		if (unlikely(!(mask & sibling->mask))) {
 			if (!RB_EMPTY_NODE(&node->rb)) {
 				spin_lock(&sibling->active.lock);
@@ -5036,10 +5519,8 @@ static void virtual_submission_tasklet(unsigned long data)
 submit_engine:
 		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
 		node->prio = prio;
-		if (first && prio > sibling->execlists.queue_priority_hint) {
-			sibling->execlists.queue_priority_hint = prio;
+		if (first && prio > sibling->execlists.queue_priority_hint)
 			tasklet_hi_schedule(&sibling->execlists.tasklet);
-		}
 
 		spin_unlock(&sibling->active.lock);
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index dfbc214e14f5..91fd8e452d9b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -90,6 +90,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine);
 #define LRC_PPHWSP_SZ	(1)
 /* After the PPHWSP we have the logical state for the context */
 #define LRC_STATE_PN	(LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
+#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE)
 
 /* Space within PPHWSP reserved to be used as scratch */
 #define LRC_PPHWSP_SCRATCH		0x34
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index d39b72590e40..93cb6c460508 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -9,14 +9,13 @@
 
 #include <linux/types.h>
 
-/* GEN8 to GEN11 Reg State Context */
+/* GEN8 to GEN12 Reg State Context */
 #define CTX_CONTEXT_CONTROL		(0x02 + 1)
 #define CTX_RING_HEAD			(0x04 + 1)
 #define CTX_RING_TAIL			(0x06 + 1)
 #define CTX_RING_START			(0x08 + 1)
 #define CTX_RING_CTL			(0x0a + 1)
 #define CTX_BB_STATE			(0x10 + 1)
-#define CTX_BB_PER_CTX_PTR		(0x18 + 1)
 #define CTX_TIMESTAMP			(0x22 + 1)
 #define CTX_PDP3_UDW			(0x24 + 1)
 #define CTX_PDP3_LDW			(0x26 + 1)
@@ -30,9 +29,6 @@
 
 #define GEN9_CTX_RING_MI_MODE		0x54
 
-/* GEN12+ Reg State Context */
-#define GEN12_CTX_BB_PER_CTX_PTR		(0x12 + 1)
-
 #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
 	u32 *reg_state__ = (reg_state); \
 	const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 3847ee44b181..ab675d35030d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -113,7 +113,6 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
 	struct intel_uncore *uncore = rc6_to_uncore(rc6);
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	u32 rc6_mode;
 
 	/* 2b: Program RC6 thresholds.*/
 	if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
@@ -165,16 +164,11 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
 	/* 3a: Enable RC6 */
 	set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
 
-	/* WaRsUseTimeoutMode:cnl (pre-prod) */
-	if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0))
-		rc6_mode = GEN7_RC_CTL_TO_MODE;
-	else
-		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
 
 	rc6->ctl_enable =
 		GEN6_RC_CTL_HW_ENABLE |
 		GEN6_RC_CTL_RC6_ENABLE |
-		rc6_mode;
+		GEN6_RC_CTL_EI_MODE(1);
 
 	/*
 	 * WaRsDisableCoarsePowerGating:skl,cnl
@@ -246,16 +240,18 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
 	ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
 				     &rc6vids, NULL);
 	if (IS_GEN(i915, 6) && ret) {
-		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
+		drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n");
 	} else if (IS_GEN(i915, 6) &&
 		   (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
-		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
-				 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
+		drm_dbg(&i915->drm,
+			"You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+			GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
 		rc6vids &= 0xffff00;
 		rc6vids |= GEN6_ENCODE_RC6_VID(450);
 		ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
 		if (ret)
-			DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
+			drm_err(&i915->drm,
+				"Couldn't fix incorrect rc6 voltage\n");
 	}
 }
 
@@ -263,14 +259,15 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
 static int chv_rc6_init(struct intel_rc6 *rc6)
 {
 	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+	struct drm_i915_private *i915 = rc6_to_i915(rc6);
 	resource_size_t pctx_paddr, paddr;
 	resource_size_t pctx_size = 32 * SZ_1K;
 	u32 pcbr;
 
 	pcbr = intel_uncore_read(uncore, VLV_PCBR);
 	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
-		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
-		paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size;
+		drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
+		paddr = i915->dsm.end + 1 - pctx_size;
 		GEM_BUG_ON(paddr > U32_MAX);
 
 		pctx_paddr = (paddr & ~4095);
@@ -304,7 +301,7 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
 		goto out;
 	}
 
-	DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+	drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
 
 	/*
 	 * From the Gunit register HAS:
@@ -316,7 +313,8 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
 	 */
 	pctx = i915_gem_object_create_stolen(i915, pctx_size);
 	if (IS_ERR(pctx)) {
-		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
+		drm_dbg(&i915->drm,
+			"not enough stolen space for PCTX, disabling\n");
 		return PTR_ERR(pctx);
 	}
 
@@ -398,14 +396,14 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
 	rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
 	rc_sw_target &= RC_SW_TARGET_STATE_MASK;
 	rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
-	DRM_DEBUG_DRIVER("BIOS enabled RC states: "
+	drm_dbg(&i915->drm, "BIOS enabled RC states: "
 			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
 			 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
 			 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
 			 rc_sw_target);
 
 	if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
-		DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
+		drm_dbg(&i915->drm, "RC6 Base location not set properly.\n");
 		enable_rc6 = false;
 	}
 
@@ -417,7 +415,7 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
 		intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
 	if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
 	      rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
-		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
+		drm_dbg(&i915->drm, "RC6 Base address not as expected.\n");
 		enable_rc6 = false;
 	}
 
@@ -425,24 +423,25 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
 	      (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 &&
 	      (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 &&
 	      (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) {
-		DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
+		drm_dbg(&i915->drm,
+			"Engine Idle wait time not set properly.\n");
 		enable_rc6 = false;
 	}
 
 	if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
 	    !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
 	    !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
-		DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
+		drm_dbg(&i915->drm, "Pushbus not setup properly.\n");
 		enable_rc6 = false;
 	}
 
 	if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
-		DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
+		drm_dbg(&i915->drm, "GFX pause not setup properly.\n");
 		enable_rc6 = false;
 	}
 
 	if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
-		DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
+		drm_dbg(&i915->drm, "GPM control not setup properly.\n");
 		enable_rc6 = false;
 	}
 
@@ -463,7 +462,7 @@ static bool rc6_supported(struct intel_rc6 *rc6)
 		return false;
 
 	if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
-		dev_notice(i915->drm.dev,
+		drm_notice(&i915->drm,
 			   "RC6 and powersaving disabled by BIOS\n");
 		return false;
 	}
@@ -495,7 +494,7 @@ static bool pctx_corrupted(struct intel_rc6 *rc6)
 	if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
 		return false;
 
-	dev_notice(i915->drm.dev,
+	drm_notice(&i915->drm,
 		   "RC6 context corruption, disabling runtime power management\n");
 	return true;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 5954ecc3207f..f59e7875cc5e 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -102,7 +102,7 @@ static int render_state_setup(struct intel_renderstate *so,
 	}
 
 	if (rodata->reloc[reloc_index] != -1) {
-		DRM_ERROR("only %d relocs resolved\n", reloc_index);
+		drm_err(&i915->drm, "only %d relocs resolved\n", reloc_index);
 		goto err;
 	}
 
@@ -194,7 +194,7 @@ int intel_renderstate_init(struct intel_renderstate *so,
 
 	err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
-		goto err_vma;
+		goto err_obj;
 
 	err = render_state_setup(so, engine->i915);
 	if (err)
@@ -204,8 +204,6 @@ int intel_renderstate_init(struct intel_renderstate *so,
 
 err_unpin:
 	i915_vma_unpin(so->vma);
-err_vma:
-	i915_vma_close(so->vma);
 err_obj:
 	i915_gem_object_put(obj);
 	so->vma = NULL;
@@ -221,6 +219,14 @@ int intel_renderstate_emit(struct intel_renderstate *so,
 	if (!so->vma)
 		return 0;
 
+	i915_vma_lock(so->vma);
+	err = i915_request_await_object(rq, so->vma->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(so->vma, rq, 0);
+	i915_vma_unlock(so->vma);
+	if (err)
+		return err;
+
 	err = engine->emit_bb_start(rq,
 				    so->batch_offset, so->batch_size,
 				    I915_DISPATCH_SECURE);
@@ -235,13 +241,7 @@ int intel_renderstate_emit(struct intel_renderstate *so,
 			return err;
 	}
 
-	i915_vma_lock(so->vma);
-	err = i915_request_await_object(rq, so->vma->obj, false);
-	if (err == 0)
-		err = i915_vma_move_to_active(so->vma, rq, 0);
-	i915_vma_unlock(so->vma);
-
-	return err;
+	return 0;
 }
 
 void intel_renderstate_fini(struct intel_renderstate *so)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 80db3c9d785e..39070b514e65 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -109,7 +109,7 @@ static bool mark_guilty(struct i915_request *rq)
 		goto out;
 	}
 
-	dev_notice(ctx->i915->drm.dev,
+	drm_notice(&ctx->i915->drm,
 		   "%s context reset due to GPU hang\n",
 		   ctx->name);
 
@@ -755,7 +755,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
 	for_each_engine(engine, gt, id)
 		__intel_engine_reset(engine, stalled_mask & engine->mask);
 
-	i915_gem_restore_fences(gt->ggtt);
+	intel_ggtt_restore_fences(gt->ggtt);
 
 	return err;
 }
@@ -1031,7 +1031,7 @@ void intel_gt_reset(struct intel_gt *gt,
 		goto unlock;
 
 	if (reason)
-		dev_notice(gt->i915->drm.dev,
+		drm_notice(&gt->i915->drm,
 			   "Resetting chip for %s\n", reason);
 	atomic_inc(&gt->i915->gpu_error.reset_count);
 
@@ -1039,7 +1039,7 @@ void intel_gt_reset(struct intel_gt *gt,
 
 	if (!intel_has_gpu_reset(gt)) {
 		if (i915_modparams.reset)
-			dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
+			drm_err(&gt->i915->drm, "GPU reset not supported\n");
 		else
 			drm_dbg(&gt->i915->drm, "GPU reset disabled\n");
 		goto error;
@@ -1049,7 +1049,7 @@ void intel_gt_reset(struct intel_gt *gt,
 		intel_runtime_pm_disable_interrupts(gt->i915);
 
 	if (do_reset(gt, stalled_mask)) {
-		dev_err(gt->i915->drm.dev, "Failed to reset chip\n");
+		drm_err(&gt->i915->drm, "Failed to reset chip\n");
 		goto taint;
 	}
 
@@ -1111,7 +1111,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
 /**
  * intel_engine_reset - reset GPU engine to recover from a hang
  * @engine: engine to reset
- * @msg: reason for GPU reset; or NULL for no dev_notice()
+ * @msg: reason for GPU reset; or NULL for no drm_notice()
  *
  * Reset a specific GPU engine. Useful if a hang is detected.
  * Returns zero on successful reset or otherwise an error code.
@@ -1136,7 +1136,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
 	reset_prepare_engine(engine);
 
 	if (msg)
-		dev_notice(engine->i915->drm.dev,
+		drm_notice(&engine->i915->drm,
 			   "Resetting %s for %s\n", engine->name, msg);
 	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
 
@@ -1381,7 +1381,7 @@ static void intel_wedge_me(struct work_struct *work)
 {
 	struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
 
-	dev_err(w->gt->i915->drm.dev,
+	drm_err(&w->gt->i915->drm,
 		"%s timed out, cancelling all in-flight rendering.\n",
 		w->name);
 	intel_gt_set_wedged(w->gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index 5bdce24994aa..cc0ebca65167 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -88,6 +88,8 @@ static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
 static inline void
 assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 {
+	unsigned int head = READ_ONCE(ring->head);
+
 	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
 
 	/*
@@ -105,8 +107,7 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 	 * into the same cacheline as ring->head.
 	 */
 #define cacheline(a) round_down(a, CACHELINE_BYTES)
-	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
-		   tail < ring->head);
+	GEM_BUG_ON(cacheline(tail) == cacheline(head) && tail < head);
 #undef cacheline
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index fdc3f10e12aa..ca7286e58409 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -42,6 +42,7 @@
 #include "intel_reset.h"
 #include "intel_ring.h"
 #include "intel_workarounds.h"
+#include "shmem_utils.h"
 
 /* Rough estimate of the typical request size, performing a flush,
  * set-context and then emitting the batch.
@@ -577,8 +578,9 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
 				    RING_INSTPM(engine->mmio_base),
 				    INSTPM_SYNC_FLUSH, 0,
 				    1000))
-		DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
-			  engine->name);
+		drm_err(&dev_priv->drm,
+			"%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
+			engine->name);
 }
 
 static void ring_setup_status_page(struct intel_engine_cs *engine)
@@ -601,8 +603,9 @@ static bool stop_ring(struct intel_engine_cs *engine)
 					    MODE_IDLE,
 					    MODE_IDLE,
 					    1000)) {
-			DRM_ERROR("%s : timed out trying to stop ring\n",
-				  engine->name);
+			drm_err(&dev_priv->drm,
+				"%s : timed out trying to stop ring\n",
+				engine->name);
 
 			/*
 			 * Sometimes we observe that the idle flag is not
@@ -661,22 +664,23 @@ static int xcs_resume(struct intel_engine_cs *engine)
 	/* WaClearRingBufHeadRegAtInit:ctg,elk */
 	if (!stop_ring(engine)) {
 		/* G45 ring initialization often fails to reset head to zero */
-		DRM_DEBUG_DRIVER("%s head not reset to zero "
+		drm_dbg(&dev_priv->drm, "%s head not reset to zero "
+			"ctl %08x head %08x tail %08x start %08x\n",
+			engine->name,
+			ENGINE_READ(engine, RING_CTL),
+			ENGINE_READ(engine, RING_HEAD),
+			ENGINE_READ(engine, RING_TAIL),
+			ENGINE_READ(engine, RING_START));
+
+		if (!stop_ring(engine)) {
+			drm_err(&dev_priv->drm,
+				"failed to set %s head to zero "
 				"ctl %08x head %08x tail %08x start %08x\n",
 				engine->name,
 				ENGINE_READ(engine, RING_CTL),
 				ENGINE_READ(engine, RING_HEAD),
 				ENGINE_READ(engine, RING_TAIL),
 				ENGINE_READ(engine, RING_START));
-
-		if (!stop_ring(engine)) {
-			DRM_ERROR("failed to set %s head to zero "
-				  "ctl %08x head %08x tail %08x start %08x\n",
-				  engine->name,
-				  ENGINE_READ(engine, RING_CTL),
-				  ENGINE_READ(engine, RING_HEAD),
-				  ENGINE_READ(engine, RING_TAIL),
-				  ENGINE_READ(engine, RING_START));
 			ret = -EIO;
 			goto out;
 		}
@@ -719,7 +723,7 @@ static int xcs_resume(struct intel_engine_cs *engine)
 				    RING_CTL(engine->mmio_base),
 				    RING_VALID, RING_VALID,
 				    50)) {
-		DRM_ERROR("%s initialization failed "
+		drm_err(&dev_priv->drm, "%s initialization failed "
 			  "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
 			  engine->name,
 			  ENGINE_READ(engine, RING_CTL),
@@ -1238,7 +1242,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
 		i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
 
 	if (engine->default_state) {
-		void *defaults, *vaddr;
+		void *vaddr;
 
 		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 		if (IS_ERR(vaddr)) {
@@ -1246,15 +1250,8 @@ alloc_context_vma(struct intel_engine_cs *engine)
 			goto err_obj;
 		}
 
-		defaults = i915_gem_object_pin_map(engine->default_state,
-						   I915_MAP_WB);
-		if (IS_ERR(defaults)) {
-			err = PTR_ERR(defaults);
-			goto err_map;
-		}
-
-		memcpy(vaddr, defaults, engine->context_size);
-		i915_gem_object_unpin_map(engine->default_state);
+		shmem_read(engine->default_state, 0,
+			   vaddr, engine->context_size);
 
 		i915_gem_object_flush_map(obj);
 		i915_gem_object_unpin_map(obj);
@@ -1268,8 +1265,6 @@ alloc_context_vma(struct intel_engine_cs *engine)
 
 	return vma;
 
-err_map:
-	i915_gem_object_unpin_map(obj);
 err_obj:
 	i915_gem_object_put(obj);
 	return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 19542fd9e207..2f59fc6df3c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -8,12 +8,15 @@
 
 #include "i915_drv.h"
 #include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
 #include "intel_gt_irq.h"
 #include "intel_gt_pm_irq.h"
 #include "intel_rps.h"
 #include "intel_sideband.h"
 #include "../../../platform/x86/intel_ips.h"
 
+#define BUSY_MAX_EI	20u /* ms */
+
 /*
  * Lock protecting IPS related data structures
  */
@@ -44,6 +47,100 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
 	intel_uncore_write_fw(uncore, reg, val);
 }
 
+static void rps_timer(struct timer_list *t)
+{
+	struct intel_rps *rps = from_timer(rps, t, timer);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	s64 max_busy[3] = {};
+	ktime_t dt, last;
+
+	for_each_engine(engine, rps_to_gt(rps), id) {
+		s64 busy;
+		int i;
+
+		dt = intel_engine_get_busy_time(engine);
+		last = engine->stats.rps;
+		engine->stats.rps = dt;
+
+		busy = ktime_to_ns(ktime_sub(dt, last));
+		for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
+			if (busy > max_busy[i])
+				swap(busy, max_busy[i]);
+		}
+	}
+
+	dt = ktime_get();
+	last = rps->pm_timestamp;
+	rps->pm_timestamp = dt;
+
+	if (intel_rps_is_active(rps)) {
+		s64 busy;
+		int i;
+
+		dt = ktime_sub(dt, last);
+
+		/*
+		 * Our goal is to evaluate each engine independently, so we run
+		 * at the lowest clocks required to sustain the heaviest
+		 * workload. However, a task may be split into sequential
+		 * dependent operations across a set of engines, such that
+		 * the independent contributions do not account for high load,
+		 * but overall the task is GPU bound. For example, consider
+		 * video decode on vcs followed by colour post-processing
+		 * on vecs, followed by general post-processing on rcs.
+		 * Since multi-engines being active does imply a single
+		 * continuous workload across all engines, we hedge our
+		 * bets by only contributing a factor of the distributed
+		 * load into our busyness calculation.
+		 */
+		busy = max_busy[0];
+		for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
+			if (!max_busy[i])
+				break;
+
+			busy += div_u64(max_busy[i], 1 << i);
+		}
+		GT_TRACE(rps_to_gt(rps),
+			 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
+			 busy, (int)div64_u64(100 * busy, dt),
+			 max_busy[0], max_busy[1], max_busy[2],
+			 rps->pm_interval);
+
+		if (100 * busy > rps->power.up_threshold * dt &&
+		    rps->cur_freq < rps->max_freq_softlimit) {
+			rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
+			rps->pm_interval = 1;
+			schedule_work(&rps->work);
+		} else if (100 * busy < rps->power.down_threshold * dt &&
+			   rps->cur_freq > rps->min_freq_softlimit) {
+			rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
+			rps->pm_interval = 1;
+			schedule_work(&rps->work);
+		} else {
+			rps->last_adj = 0;
+		}
+
+		mod_timer(&rps->timer,
+			  jiffies + msecs_to_jiffies(rps->pm_interval));
+		rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
+	}
+}
+
+static void rps_start_timer(struct intel_rps *rps)
+{
+	rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+	rps->pm_interval = 1;
+	mod_timer(&rps->timer, jiffies + 1);
+}
+
+static void rps_stop_timer(struct intel_rps *rps)
+{
+	del_timer_sync(&rps->timer);
+	rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+	cancel_work_sync(&rps->work);
+}
+
 static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
 {
 	u32 mask = 0;
@@ -57,7 +154,7 @@ static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
 	if (val < rps->max_freq_softlimit)
 		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
 
-	mask &= READ_ONCE(rps->pm_events);
+	mask &= rps->pm_events;
 
 	return rps_pm_sanitize_mask(rps, ~mask);
 }
@@ -70,18 +167,11 @@ static void rps_reset_ei(struct intel_rps *rps)
 static void rps_enable_interrupts(struct intel_rps *rps)
 {
 	struct intel_gt *gt = rps_to_gt(rps);
-	u32 events;
 
-	rps_reset_ei(rps);
+	GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
+		 rps->pm_events, rps_pm_mask(rps, rps->last_freq));
 
-	if (IS_VALLEYVIEW(gt->i915))
-		/* WaGsvRC0ResidencyMethod:vlv */
-		events = GEN6_PM_RP_UP_EI_EXPIRED;
-	else
-		events = (GEN6_PM_RP_UP_THRESHOLD |
-			  GEN6_PM_RP_DOWN_THRESHOLD |
-			  GEN6_PM_RP_DOWN_TIMEOUT);
-	WRITE_ONCE(rps->pm_events, events);
+	rps_reset_ei(rps);
 
 	spin_lock_irq(&gt->irq_lock);
 	gen6_gt_pm_enable_irq(gt, rps->pm_events);
@@ -120,8 +210,6 @@ static void rps_disable_interrupts(struct intel_rps *rps)
 {
 	struct intel_gt *gt = rps_to_gt(rps);
 
-	WRITE_ONCE(rps->pm_events, 0);
-
 	intel_uncore_write(gt->uncore,
 			   GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
 
@@ -140,6 +228,7 @@ static void rps_disable_interrupts(struct intel_rps *rps)
 	cancel_work_sync(&rps->work);
 
 	rps_reset_interrupts(rps);
+	GT_TRACE(gt, "interrupts:off\n");
 }
 
 static const struct cparams {
@@ -186,14 +275,12 @@ static void gen5_rps_init(struct intel_rps *rps)
 	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
 	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
 		MEMMODE_FSTART_SHIFT;
-	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
-			 fmax, fmin, fstart);
+	drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n",
+		fmax, fmin, fstart);
 
 	rps->min_freq = fmax;
+	rps->efficient_freq = fstart;
 	rps->max_freq = fmin;
-
-	rps->idle_freq = rps->min_freq;
-	rps->cur_freq = rps->idle_freq;
 }
 
 static unsigned long
@@ -456,7 +543,8 @@ static bool gen5_rps_enable(struct intel_rps *rps)
 
 	if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
 			     MEMCTL_CMD_STS) == 0, 10))
-		DRM_ERROR("stuck trying to change perf mode\n");
+		drm_err(&uncore->i915->drm,
+			"stuck trying to change perf mode\n");
 	mdelay(1);
 
 	gen5_rps_set(rps, rps->cur_freq);
@@ -533,8 +621,8 @@ static u32 rps_limits(struct intel_rps *rps, u8 val)
 
 static void rps_set_power(struct intel_rps *rps, int new_power)
 {
-	struct intel_uncore *uncore = rps_to_uncore(rps);
-	struct drm_i915_private *i915 = rps_to_i915(rps);
+	struct intel_gt *gt = rps_to_gt(rps);
+	struct intel_uncore *uncore = gt->uncore;
 	u32 threshold_up = 0, threshold_down = 0; /* in % */
 	u32 ei_up = 0, ei_down = 0;
 
@@ -543,55 +631,49 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
 	if (new_power == rps->power.mode)
 		return;
 
+	threshold_up = 95;
+	threshold_down = 85;
+
 	/* Note the units here are not exactly 1us, but 1280ns. */
 	switch (new_power) {
 	case LOW_POWER:
-		/* Upclock if more than 95% busy over 16ms */
 		ei_up = 16000;
-		threshold_up = 95;
-
-		/* Downclock if less than 85% busy over 32ms */
 		ei_down = 32000;
-		threshold_down = 85;
 		break;
 
 	case BETWEEN:
-		/* Upclock if more than 90% busy over 13ms */
 		ei_up = 13000;
-		threshold_up = 90;
-
-		/* Downclock if less than 75% busy over 32ms */
 		ei_down = 32000;
-		threshold_down = 75;
 		break;
 
 	case HIGH_POWER:
-		/* Upclock if more than 85% busy over 10ms */
 		ei_up = 10000;
-		threshold_up = 85;
-
-		/* Downclock if less than 60% busy over 32ms */
 		ei_down = 32000;
-		threshold_down = 60;
 		break;
 	}
 
 	/* When byt can survive without system hang with dynamic
 	 * sw freq adjustments, this restriction can be lifted.
 	 */
-	if (IS_VALLEYVIEW(i915))
+	if (IS_VALLEYVIEW(gt->i915))
 		goto skip_hw_write;
 
-	set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
+	GT_TRACE(gt,
+		 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
+		 new_power, threshold_up, ei_up, threshold_down, ei_down);
+
+	set(uncore, GEN6_RP_UP_EI,
+	    intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
 	set(uncore, GEN6_RP_UP_THRESHOLD,
-	    GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
+	    intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
 
-	set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
+	set(uncore, GEN6_RP_DOWN_EI,
+	    intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
 	set(uncore, GEN6_RP_DOWN_THRESHOLD,
-	    GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
+	    intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
 
 	set(uncore, GEN6_RP_CONTROL,
-	    (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
+	    (INTEL_GEN(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
 	    GEN6_RP_MEDIA_HW_NORMAL_MODE |
 	    GEN6_RP_MEDIA_IS_GFX |
 	    GEN6_RP_ENABLE |
@@ -646,9 +728,11 @@ static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
 
 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
 {
+	GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive));
+
 	mutex_lock(&rps->power.mutex);
 	if (interactive) {
-		if (!rps->power.interactive++ && READ_ONCE(rps->active))
+		if (!rps->power.interactive++ && intel_rps_is_active(rps))
 			rps_set_power(rps, HIGH_POWER);
 	} else {
 		GEM_BUG_ON(!rps->power.interactive);
@@ -673,6 +757,9 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val)
 			 GEN6_AGGRESSIVE_TURBO);
 	set(uncore, GEN6_RPNSWREQ, swreq);
 
+	GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n",
+		 val, intel_gpu_freq(rps, val), swreq);
+
 	return 0;
 }
 
@@ -685,6 +772,9 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val)
 	err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
 	vlv_punit_put(i915);
 
+	GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
+		 val, intel_gpu_freq(rps, val));
+
 	return err;
 }
 
@@ -715,29 +805,30 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update)
 
 void intel_rps_unpark(struct intel_rps *rps)
 {
-	u8 freq;
-
-	if (!rps->enabled)
+	if (!intel_rps_is_enabled(rps))
 		return;
 
+	GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq);
+
 	/*
 	 * Use the user's desired frequency as a guide, but for better
 	 * performance, jump directly to RPe as our starting frequency.
 	 */
 	mutex_lock(&rps->lock);
 
-	WRITE_ONCE(rps->active, true);
-
-	freq = max(rps->cur_freq, rps->efficient_freq),
-	freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
-	intel_rps_set(rps, freq);
-
-	rps->last_adj = 0;
+	intel_rps_set_active(rps);
+	intel_rps_set(rps,
+		      clamp(rps->cur_freq,
+			    rps->min_freq_softlimit,
+			    rps->max_freq_softlimit));
 
 	mutex_unlock(&rps->lock);
 
-	if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+	rps->pm_iir = 0;
+	if (intel_rps_has_interrupts(rps))
 		rps_enable_interrupts(rps);
+	if (intel_rps_uses_timer(rps))
+		rps_start_timer(rps);
 
 	if (IS_GEN(rps_to_i915(rps), 5))
 		gen5_rps_update(rps);
@@ -745,15 +836,16 @@ void intel_rps_unpark(struct intel_rps *rps)
 
 void intel_rps_park(struct intel_rps *rps)
 {
-	struct drm_i915_private *i915 = rps_to_i915(rps);
+	int adj;
 
-	if (!rps->enabled)
+	if (!intel_rps_clear_active(rps))
 		return;
 
-	if (INTEL_GEN(i915) >= 6)
+	if (intel_rps_uses_timer(rps))
+		rps_stop_timer(rps);
+	if (intel_rps_has_interrupts(rps))
 		rps_disable_interrupts(rps);
 
-	WRITE_ONCE(rps->active, false);
 	if (rps->last_freq <= rps->idle_freq)
 		return;
 
@@ -784,8 +876,15 @@ void intel_rps_park(struct intel_rps *rps)
 	 * (Note we accommodate Cherryview's limitation of only using an
 	 * even bin by applying it to all.)
 	 */
-	rps->cur_freq =
-		max_t(int, round_down(rps->cur_freq - 1, 2), rps->min_freq);
+	adj = rps->last_adj;
+	if (adj < 0)
+		adj *= 2;
+	else /* CHV needs even encode values */
+		adj = -2;
+	rps->last_adj = adj;
+	rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
+
+	GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
 }
 
 void intel_rps_boost(struct i915_request *rq)
@@ -793,7 +892,7 @@ void intel_rps_boost(struct i915_request *rq)
 	struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
 	unsigned long flags;
 
-	if (i915_request_signaled(rq) || !READ_ONCE(rps->active))
+	if (i915_request_signaled(rq) || !intel_rps_is_active(rps))
 		return;
 
 	/* Serializes with i915_request_retire() */
@@ -802,6 +901,9 @@ void intel_rps_boost(struct i915_request *rq)
 	    !dma_fence_is_signaled_locked(&rq->fence)) {
 		set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
 
+		GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
+			 rq->fence.context, rq->fence.seqno);
+
 		if (!atomic_fetch_inc(&rps->num_waiters) &&
 		    READ_ONCE(rps->cur_freq) < rps->boost_freq)
 			schedule_work(&rps->work);
@@ -819,7 +921,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
 	GEM_BUG_ON(val > rps->max_freq);
 	GEM_BUG_ON(val < rps->min_freq);
 
-	if (rps->active) {
+	if (intel_rps_is_active(rps)) {
 		err = rps_set(rps, val, true);
 		if (err)
 			return err;
@@ -828,7 +930,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
 		 * Make sure we continue to get interrupts
 		 * until we hit the minimum or maximum frequencies.
 		 */
-		if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
+		if (intel_rps_has_interrupts(rps)) {
 			struct intel_uncore *uncore = rps_to_uncore(rps);
 
 			set(uncore,
@@ -896,12 +998,14 @@ static void gen6_rps_init(struct intel_rps *rps)
 
 static bool rps_reset(struct intel_rps *rps)
 {
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+
 	/* force a reset */
 	rps->power.mode = -1;
 	rps->last_freq = -1;
 
 	if (rps_set(rps, rps->min_freq, true)) {
-		DRM_ERROR("Failed to reset RPS to initial values\n");
+		drm_err(&i915->drm, "Failed to reset RPS to initial values\n");
 		return false;
 	}
 
@@ -912,20 +1016,18 @@ static bool rps_reset(struct intel_rps *rps)
 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
 static bool gen9_rps_enable(struct intel_rps *rps)
 {
-	struct drm_i915_private *i915 = rps_to_i915(rps);
-	struct intel_uncore *uncore = rps_to_uncore(rps);
+	struct intel_gt *gt = rps_to_gt(rps);
+	struct intel_uncore *uncore = gt->uncore;
 
 	/* Program defaults and thresholds for RPS */
-	if (IS_GEN(i915, 9))
+	if (IS_GEN(gt->i915, 9))
 		intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
 				      GEN9_FREQUENCY(rps->rp1_freq));
 
-	/* 1 second timeout */
-	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
-			      GT_INTERVAL_FROM_US(i915, 1000000));
-
 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
 
+	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
+
 	return rps_reset(rps);
 }
 
@@ -936,12 +1038,10 @@ static bool gen8_rps_enable(struct intel_rps *rps)
 	intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
 			      HSW_FREQUENCY(rps->rp1_freq));
 
-	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
-	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
-			      100000000 / 128); /* 1 second timeout */
-
 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
 
+	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
+
 	return rps_reset(rps);
 }
 
@@ -953,6 +1053,10 @@ static bool gen6_rps_enable(struct intel_rps *rps)
 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
 
+	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+			  GEN6_PM_RP_DOWN_THRESHOLD |
+			  GEN6_PM_RP_DOWN_TIMEOUT);
+
 	return rps_reset(rps);
 }
 
@@ -1038,6 +1142,10 @@ static bool chv_rps_enable(struct intel_rps *rps)
 			      GEN6_RP_UP_BUSY_AVG |
 			      GEN6_RP_DOWN_IDLE_AVG);
 
+	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+			  GEN6_PM_RP_DOWN_THRESHOLD |
+			  GEN6_PM_RP_DOWN_TIMEOUT);
+
 	/* Setting Fixed Bias */
 	vlv_punit_get(i915);
 
@@ -1052,8 +1160,8 @@ static bool chv_rps_enable(struct intel_rps *rps)
 	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
 		      "GPLL not enabled\n");
 
-	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
-	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+	drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+	drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
 
 	return rps_reset(rps);
 }
@@ -1136,6 +1244,9 @@ static bool vlv_rps_enable(struct intel_rps *rps)
 			      GEN6_RP_UP_BUSY_AVG |
 			      GEN6_RP_DOWN_IDLE_CONT);
 
+	/* WaGsvRC0ResidencyMethod:vlv */
+	rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+
 	vlv_punit_get(i915);
 
 	/* Setting Fixed Bias */
@@ -1150,8 +1261,8 @@ static bool vlv_rps_enable(struct intel_rps *rps)
 	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
 		      "GPLL not enabled\n");
 
-	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
-	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+	drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+	drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
 
 	return rps_reset(rps);
 }
@@ -1194,33 +1305,71 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips)
 	return ips->gfx_power + state2;
 }
 
+static bool has_busy_stats(struct intel_rps *rps)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, rps_to_gt(rps), id) {
+		if (!intel_engine_supports_stats(engine))
+			return false;
+	}
+
+	return true;
+}
+
 void intel_rps_enable(struct intel_rps *rps)
 {
 	struct drm_i915_private *i915 = rps_to_i915(rps);
 	struct intel_uncore *uncore = rps_to_uncore(rps);
+	bool enabled = false;
+
+	if (!HAS_RPS(i915))
+		return;
+
+	intel_gt_check_clock_frequency(rps_to_gt(rps));
 
 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-	if (IS_CHERRYVIEW(i915))
-		rps->enabled = chv_rps_enable(rps);
+	if (rps->max_freq <= rps->min_freq)
+		/* leave disabled, no room for dynamic reclocking */;
+	else if (IS_CHERRYVIEW(i915))
+		enabled = chv_rps_enable(rps);
 	else if (IS_VALLEYVIEW(i915))
-		rps->enabled = vlv_rps_enable(rps);
+		enabled = vlv_rps_enable(rps);
 	else if (INTEL_GEN(i915) >= 9)
-		rps->enabled = gen9_rps_enable(rps);
+		enabled = gen9_rps_enable(rps);
 	else if (INTEL_GEN(i915) >= 8)
-		rps->enabled = gen8_rps_enable(rps);
+		enabled = gen8_rps_enable(rps);
 	else if (INTEL_GEN(i915) >= 6)
-		rps->enabled = gen6_rps_enable(rps);
+		enabled = gen6_rps_enable(rps);
 	else if (IS_IRONLAKE_M(i915))
-		rps->enabled = gen5_rps_enable(rps);
+		enabled = gen5_rps_enable(rps);
+	else
+		MISSING_CASE(INTEL_GEN(i915));
 	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
-	if (!rps->enabled)
+	if (!enabled)
 		return;
 
-	drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq);
-	drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq);
+	GT_TRACE(rps_to_gt(rps),
+		 "min:%x, max:%x, freq:[%d, %d]\n",
+		 rps->min_freq, rps->max_freq,
+		 intel_gpu_freq(rps, rps->min_freq),
+		 intel_gpu_freq(rps, rps->max_freq));
 
-	drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq);
-	drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq);
+	GEM_BUG_ON(rps->max_freq < rps->min_freq);
+	GEM_BUG_ON(rps->idle_freq > rps->max_freq);
+
+	GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
+	GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
+
+	if (has_busy_stats(rps))
+		intel_rps_set_timer(rps);
+	else if (INTEL_GEN(i915) >= 6)
+		intel_rps_set_interrupts(rps);
+	else
+		/* Ironlake currently uses intel_ips.ko */ {}
+
+	intel_rps_set_enabled(rps);
 }
 
 static void gen6_rps_disable(struct intel_rps *rps)
@@ -1232,7 +1381,9 @@ void intel_rps_disable(struct intel_rps *rps)
 {
 	struct drm_i915_private *i915 = rps_to_i915(rps);
 
-	rps->enabled = false;
+	intel_rps_clear_enabled(rps);
+	intel_rps_clear_interrupts(rps);
+	intel_rps_clear_timer(rps);
 
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_disable(rps);
@@ -1308,7 +1459,8 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
 				  CCK_GPLL_CLOCK_CONTROL,
 				  i915->czclk_freq);
 
-	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
+	drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
+		rps->gpll_ref_freq);
 }
 
 static void vlv_rps_init(struct intel_rps *rps)
@@ -1336,28 +1488,24 @@ static void vlv_rps_init(struct intel_rps *rps)
 		i915->mem_freq = 1333;
 		break;
 	}
-	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+	drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
 
 	rps->max_freq = vlv_rps_max_freq(rps);
 	rps->rp0_freq = rps->max_freq;
-	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(rps, rps->max_freq),
-			 rps->max_freq);
+	drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
+		intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
 
 	rps->efficient_freq = vlv_rps_rpe_freq(rps);
-	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(rps, rps->efficient_freq),
-			 rps->efficient_freq);
+	drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
+		intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
 
 	rps->rp1_freq = vlv_rps_guar_freq(rps);
-	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(rps, rps->rp1_freq),
-			 rps->rp1_freq);
+	drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+		intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
 
 	rps->min_freq = vlv_rps_min_freq(rps);
-	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(rps, rps->min_freq),
-			 rps->min_freq);
+	drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
+		intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
 
 	vlv_iosf_sb_put(i915,
 			BIT(VLV_IOSF_SB_PUNIT) |
@@ -1387,28 +1535,24 @@ static void chv_rps_init(struct intel_rps *rps)
 		i915->mem_freq = 1600;
 		break;
 	}
-	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+	drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
 
 	rps->max_freq = chv_rps_max_freq(rps);
 	rps->rp0_freq = rps->max_freq;
-	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(rps, rps->max_freq),
-			 rps->max_freq);
+	drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
+		intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
 
 	rps->efficient_freq = chv_rps_rpe_freq(rps);
-	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(rps, rps->efficient_freq),
-			 rps->efficient_freq);
+	drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
+		intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
 
 	rps->rp1_freq = chv_rps_guar_freq(rps);
-	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(rps, rps->rp1_freq),
-			 rps->rp1_freq);
+	drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n",
+		intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
 
 	rps->min_freq = chv_rps_min_freq(rps);
-	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(rps, rps->min_freq),
-			 rps->min_freq);
+	drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
+		intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
 
 	vlv_iosf_sb_put(i915,
 			BIT(VLV_IOSF_SB_PUNIT) |
@@ -1471,12 +1615,13 @@ static void rps_work(struct work_struct *work)
 {
 	struct intel_rps *rps = container_of(work, typeof(*rps), work);
 	struct intel_gt *gt = rps_to_gt(rps);
+	struct drm_i915_private *i915 = rps_to_i915(rps);
 	bool client_boost = false;
 	int new_freq, adj, min, max;
 	u32 pm_iir = 0;
 
 	spin_lock_irq(&gt->irq_lock);
-	pm_iir = fetch_and_zero(&rps->pm_iir) & READ_ONCE(rps->pm_events);
+	pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
 	client_boost = atomic_read(&rps->num_waiters);
 	spin_unlock_irq(&gt->irq_lock);
 
@@ -1485,6 +1630,10 @@ static void rps_work(struct work_struct *work)
 		goto out;
 
 	mutex_lock(&rps->lock);
+	if (!intel_rps_is_active(rps)) {
+		mutex_unlock(&rps->lock);
+		return;
+	}
 
 	pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
 
@@ -1494,6 +1643,12 @@ static void rps_work(struct work_struct *work)
 	max = rps->max_freq_softlimit;
 	if (client_boost)
 		max = rps->max_freq;
+
+	GT_TRACE(gt,
+		 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
+		 pm_iir, yesno(client_boost),
+		 adj, new_freq, min, max);
+
 	if (client_boost && new_freq < rps->boost_freq) {
 		new_freq = rps->boost_freq;
 		adj = 0;
@@ -1525,30 +1680,18 @@ static void rps_work(struct work_struct *work)
 		adj = 0;
 	}
 
-	rps->last_adj = adj;
-
 	/*
-	 * Limit deboosting and boosting to keep ourselves at the extremes
-	 * when in the respective power modes (i.e. slowly decrease frequencies
-	 * while in the HIGH_POWER zone and slowly increase frequencies while
-	 * in the LOW_POWER zone). On idle, we will hit the timeout and drop
-	 * to the next level quickly, and conversely if busy we expect to
-	 * hit a waitboost and rapidly switch into max power.
-	 */
-	if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
-	    (adj > 0 && rps->power.mode == LOW_POWER))
-		rps->last_adj = 0;
-
-	/* sysfs frequency interfaces may have snuck in while servicing the
-	 * interrupt
+	 * sysfs frequency limits may have snuck in while
+	 * servicing the interrupt
 	 */
 	new_freq += adj;
 	new_freq = clamp_t(int, new_freq, min, max);
 
 	if (intel_rps_set(rps, new_freq)) {
-		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
-		rps->last_adj = 0;
+		drm_dbg(&i915->drm, "Failed to set new GPU frequency\n");
+		adj = 0;
 	}
+	rps->last_adj = adj;
 
 	mutex_unlock(&rps->lock);
 
@@ -1568,6 +1711,8 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
 	if (unlikely(!events))
 		return;
 
+	GT_TRACE(gt, "irq events:%x\n", events);
+
 	gen6_gt_pm_mask_irq(gt, events);
 
 	rps->pm_iir |= events;
@@ -1579,10 +1724,12 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
 	struct intel_gt *gt = rps_to_gt(rps);
 	u32 events;
 
-	events = pm_iir & READ_ONCE(rps->pm_events);
+	events = pm_iir & rps->pm_events;
 	if (events) {
 		spin_lock(&gt->irq_lock);
 
+		GT_TRACE(gt, "irq events:%x\n", events);
+
 		gen6_gt_pm_mask_irq(gt, events);
 		rps->pm_iir |= events;
 
@@ -1640,6 +1787,7 @@ void intel_rps_init_early(struct intel_rps *rps)
 	mutex_init(&rps->power.mutex);
 
 	INIT_WORK(&rps->work, rps_work);
+	timer_setup(&rps->timer, rps_timer, 0);
 
 	atomic_set(&rps->num_waiters, 0);
 }
@@ -1668,9 +1816,10 @@ void intel_rps_init(struct intel_rps *rps)
 		sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
 				       &params, NULL);
 		if (params & BIT(31)) { /* OC supported */
-			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
-					 (rps->max_freq & 0xff) * 50,
-					 (params & 0xff) * 50);
+			drm_dbg(&i915->drm,
+				"Overclocking supported, max: %dMHz, overclock: %dMHz\n",
+				(rps->max_freq & 0xff) * 50,
+				(params & 0xff) * 50);
 			rps->max_freq = params & 0xff;
 		}
 	}
@@ -1678,7 +1827,9 @@ void intel_rps_init(struct intel_rps *rps)
 	/* Finally allow us to boost to max by default */
 	rps->boost_freq = rps->max_freq;
 	rps->idle_freq = rps->min_freq;
-	rps->cur_freq = rps->idle_freq;
+
+	/* Start in the middle, from here we will autotune based on workload */
+	rps->cur_freq = rps->efficient_freq;
 
 	rps->pm_intrmsk_mbz = 0;
 
@@ -1695,6 +1846,12 @@ void intel_rps_init(struct intel_rps *rps)
 		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
 }
 
+void intel_rps_sanitize(struct intel_rps *rps)
+{
+	if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+		rps_disable_interrupts(rps);
+}
+
 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
 {
 	struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -1722,7 +1879,7 @@ static u32 read_cagf(struct intel_rps *rps)
 		freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
 		vlv_punit_put(i915);
 	} else {
-		freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
+		freq = intel_uncore_read(rps_to_uncore(rps), GEN6_RPSTAT1);
 	}
 
 	return intel_rps_get_cagf(rps, freq);
@@ -1730,7 +1887,7 @@ static u32 read_cagf(struct intel_rps *rps)
 
 u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
 {
-	struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
+	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
 	intel_wakeref_t wakeref;
 	u32 freq = 0;
 
@@ -1930,3 +2087,7 @@ bool i915_gpu_turbo_disable(void)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_rps.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index dfa98194f3b2..8d3c9d663662 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -13,6 +13,7 @@ struct i915_request;
 
 void intel_rps_init_early(struct intel_rps *rps);
 void intel_rps_init(struct intel_rps *rps);
+void intel_rps_sanitize(struct intel_rps *rps);
 
 void intel_rps_driver_register(struct intel_rps *rps);
 void intel_rps_driver_unregister(struct intel_rps *rps);
@@ -36,4 +37,64 @@ void gen5_rps_irq_handler(struct intel_rps *rps);
 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
 
+static inline bool intel_rps_is_enabled(const struct intel_rps *rps)
+{
+	return test_bit(INTEL_RPS_ENABLED, &rps->flags);
+}
+
+static inline void intel_rps_set_enabled(struct intel_rps *rps)
+{
+	set_bit(INTEL_RPS_ENABLED, &rps->flags);
+}
+
+static inline void intel_rps_clear_enabled(struct intel_rps *rps)
+{
+	clear_bit(INTEL_RPS_ENABLED, &rps->flags);
+}
+
+static inline bool intel_rps_is_active(const struct intel_rps *rps)
+{
+	return test_bit(INTEL_RPS_ACTIVE, &rps->flags);
+}
+
+static inline void intel_rps_set_active(struct intel_rps *rps)
+{
+	set_bit(INTEL_RPS_ACTIVE, &rps->flags);
+}
+
+static inline bool intel_rps_clear_active(struct intel_rps *rps)
+{
+	return test_and_clear_bit(INTEL_RPS_ACTIVE, &rps->flags);
+}
+
+static inline bool intel_rps_has_interrupts(const struct intel_rps *rps)
+{
+	return test_bit(INTEL_RPS_INTERRUPTS, &rps->flags);
+}
+
+static inline void intel_rps_set_interrupts(struct intel_rps *rps)
+{
+	set_bit(INTEL_RPS_INTERRUPTS, &rps->flags);
+}
+
+static inline void intel_rps_clear_interrupts(struct intel_rps *rps)
+{
+	clear_bit(INTEL_RPS_INTERRUPTS, &rps->flags);
+}
+
+static inline bool intel_rps_uses_timer(const struct intel_rps *rps)
+{
+	return test_bit(INTEL_RPS_TIMER, &rps->flags);
+}
+
+static inline void intel_rps_set_timer(struct intel_rps *rps)
+{
+	set_bit(INTEL_RPS_TIMER, &rps->flags);
+}
+
+static inline void intel_rps_clear_timer(struct intel_rps *rps)
+{
+	clear_bit(INTEL_RPS_TIMER, &rps->flags);
+}
+
 #endif /* INTEL_RPS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
index c2e279154bd5..38083f0402d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -31,6 +31,13 @@ struct intel_rps_ei {
 	u32 media_c0;
 };
 
+enum {
+	INTEL_RPS_ENABLED = 0,
+	INTEL_RPS_ACTIVE,
+	INTEL_RPS_INTERRUPTS,
+	INTEL_RPS_TIMER,
+};
+
 struct intel_rps {
 	struct mutex lock; /* protects enabling and the worker */
 
@@ -38,9 +45,12 @@ struct intel_rps {
 	 * work, interrupts_enabled and pm_iir are protected by
 	 * dev_priv->irq_lock
 	 */
+	struct timer_list timer;
 	struct work_struct work;
-	bool enabled;
-	bool active;
+	unsigned long flags;
+
+	ktime_t pm_timestamp;
+	u32 pm_interval;
 	u32 pm_iir;
 
 	/* PM interrupt bits that should never be masked */
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 74f793423231..d173271c7397 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -65,7 +65,6 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
 {
 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
 	bool subslice_pg = sseu->has_subslice_pg;
-	struct intel_sseu ctx_sseu;
 	u8 slices, subslices;
 	u32 rpcs = 0;
 
@@ -78,31 +77,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
 
 	/*
 	 * If i915/perf is active, we want a stable powergating configuration
-	 * on the system.
-	 *
-	 * We could choose full enablement, but on ICL we know there are use
-	 * cases which disable slices for functional, apart for performance
-	 * reasons. So in this case we select a known stable subset.
+	 * on the system. Use the configuration pinned by i915/perf.
 	 */
-	if (!i915->perf.exclusive_stream) {
-		ctx_sseu = *req_sseu;
-	} else {
-		ctx_sseu = intel_sseu_from_device_info(sseu);
-
-		if (IS_GEN(i915, 11)) {
-			/*
-			 * We only need subslice count so it doesn't matter
-			 * which ones we select - just turn off low bits in the
-			 * amount of half of all available subslices per slice.
-			 */
-			ctx_sseu.subslice_mask =
-				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
-			ctx_sseu.slice_mask = 0x1;
-		}
-	}
+	if (i915->perf.exclusive_stream)
+		req_sseu = &i915->perf.sseu;
 
-	slices = hweight8(ctx_sseu.slice_mask);
-	subslices = hweight8(ctx_sseu.subslice_mask);
+	slices = hweight8(req_sseu->slice_mask);
+	subslices = hweight8(req_sseu->subslice_mask);
 
 	/*
 	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
@@ -175,13 +156,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
 	if (sseu->has_eu_pg) {
 		u32 val;
 
-		val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+		val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
 		val &= GEN8_RPCS_EU_MIN_MASK;
 
 		rpcs |= val;
 
-		val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
+		val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
 		val &= GEN8_RPCS_EU_MAX_MASK;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 08b56d7ab4f4..4546284fede1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -119,6 +119,15 @@ static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
 	spin_unlock_irqrestore(&gt->hwsp_lock, flags);
 }
 
+static void __rcu_cacheline_free(struct rcu_head *rcu)
+{
+	struct intel_timeline_cacheline *cl =
+		container_of(rcu, typeof(*cl), rcu);
+
+	i915_active_fini(&cl->active);
+	kfree(cl);
+}
+
 static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 {
 	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
@@ -127,8 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 	i915_vma_put(cl->hwsp->vma);
 	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
 
-	i915_active_fini(&cl->active);
-	kfree_rcu(cl, rcu);
+	call_rcu(&cl->rcu, __rcu_cacheline_free);
 }
 
 __i915_active_call
@@ -203,9 +211,9 @@ static void cacheline_free(struct intel_timeline_cacheline *cl)
 	i915_active_release(&cl->active);
 }
 
-int intel_timeline_init(struct intel_timeline *timeline,
-			struct intel_gt *gt,
-			struct i915_vma *hwsp)
+static int intel_timeline_init(struct intel_timeline *timeline,
+			       struct intel_gt *gt,
+			       struct i915_vma *hwsp)
 {
 	void *vaddr;
 
@@ -272,7 +280,7 @@ void intel_gt_init_timelines(struct intel_gt *gt)
 	INIT_LIST_HEAD(&timelines->hwsp_free_list);
 }
 
-void intel_timeline_fini(struct intel_timeline *timeline)
+static void intel_timeline_fini(struct intel_timeline *timeline)
 {
 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
 	GEM_BUG_ON(!list_empty(&timeline->requests));
@@ -329,6 +337,13 @@ int intel_timeline_pin(struct intel_timeline *tl)
 	return 0;
 }
 
+void intel_timeline_reset_seqno(const struct intel_timeline *tl)
+{
+	/* Must be pinned to be writable, and no requests in flight. */
+	GEM_BUG_ON(!atomic_read(&tl->pin_count));
+	WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+}
+
 void intel_timeline_enter(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
@@ -357,8 +372,16 @@ void intel_timeline_enter(struct intel_timeline *tl)
 		return;
 
 	spin_lock(&timelines->lock);
-	if (!atomic_fetch_inc(&tl->active_count))
+	if (!atomic_fetch_inc(&tl->active_count)) {
+		/*
+		 * The HWSP is volatile, and may have been lost while inactive,
+		 * e.g. across suspend/resume. Be paranoid, and ensure that
+		 * the HWSP value matches our seqno so we don't proclaim
+		 * the next request as already complete.
+		 */
+		intel_timeline_reset_seqno(tl);
 		list_add_tail(&tl->link, &timelines->active_list);
+	}
 	spin_unlock(&timelines->lock);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index f5b7eade3809..4298b9ac7327 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -31,11 +31,6 @@
 #include "i915_syncmap.h"
 #include "gt/intel_timeline_types.h"
 
-int intel_timeline_init(struct intel_timeline *tl,
-			struct intel_gt *gt,
-			struct i915_vma *hwsp);
-void intel_timeline_fini(struct intel_timeline *tl);
-
 struct intel_timeline *
 intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
 
@@ -84,6 +79,8 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
 void intel_timeline_exit(struct intel_timeline *tl);
 void intel_timeline_unpin(struct intel_timeline *tl);
 
+void intel_timeline_reset_seqno(const struct intel_timeline *tl);
+
 int intel_timeline_read_hwsp(struct i915_request *from,
 			     struct i915_request *until,
 			     u32 *hwsp_offset);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 5176ad1a3976..90a2b9e399b0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -485,25 +485,14 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
 				     struct i915_wa_list *wal)
 {
-	struct drm_i915_private *i915 = engine->i915;
-
 	/* WaForceContextSaveRestoreNonCoherent:cnl */
 	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
 
-	/* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
-	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
-		WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
-
 	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
-	/* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
-	if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
-		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
-				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
-
 	/* WaPushConstantDereferenceHoldDisable:cnl */
 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
 
@@ -837,7 +826,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
 			GEN10_L3BANK_MASK;
 
-		DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse);
+		drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
 		l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
 	} else {
 		l3_en = ~0;
@@ -846,7 +835,8 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 	slice = fls(sseu->slice_mask) - 1;
 	subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
 	if (!subslice) {
-		DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
+		drm_warn(&i915->drm,
+			 "No common index found between subslice mask %x and L3 bank mask %x!\n",
 			 intel_sseu_get_subslices(sseu, slice), l3_en);
 		subslice = fls(l3_en);
 		drm_WARN_ON(&i915->drm, !subslice);
@@ -861,7 +851,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
 	}
 
-	DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr);
+	drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
 
 	wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
 }
@@ -871,12 +861,6 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 {
 	wa_init_mcr(i915, wal);
 
-	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
-	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
-		wa_write_or(wal,
-			    GAMT_CHKN_BIT_REG,
-			    GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
-
 	/* WaInPlaceDecompressionHang:cnl */
 	wa_write_or(wal,
 		    GEN9_GAMT_ECO_REG_RW_IA,
@@ -933,15 +917,20 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 		    GAMT_CHKN_BIT_REG,
 		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
 
-	/* Wa_1607087056:icl */
-	wa_write_or(wal,
-		    SLICE_UNIT_LEVEL_CLKGATE,
-		    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+	/* Wa_1607087056:icl,ehl,jsl */
+	if (IS_ICELAKE(i915) ||
+	    IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
+		wa_write_or(wal,
+			    SLICE_UNIT_LEVEL_CLKGATE,
+			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+	}
 }
 
 static void
 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 {
+	wa_init_mcr(i915, wal);
+
 	/* Wa_1409420604:tgl */
 	if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
 		wa_write_or(wal,
@@ -1379,12 +1368,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			    GEN7_FF_THREAD_MODE,
 			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
 
-		/*
-		 * Wa_1409085225:tgl
-		 * Wa_14010229206:tgl
-		 */
-		wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
-
 		/* Wa_1408615072:tgl */
 		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
 			    VSUNIT_CLKGATE_DIS_TGL);
@@ -1402,6 +1385,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		wa_masked_en(wal,
 			     GEN9_CS_DEBUG_MODE1,
 			     FF_DOP_CLOCK_GATE_DISABLE);
+
+		/*
+		 * Wa_1409085225:tgl
+		 * Wa_14010229206:tgl
+		 */
+		wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
 	}
 
 	if (IS_GEN(i915, 11)) {
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 4a53ded7c2dd..b8dd3cbc8696 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -28,7 +28,6 @@
 #include "i915_drv.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
 
 #include "mock_engine.h"
 #include "selftests/mock_request.h"
@@ -328,7 +327,6 @@ int mock_engine_init(struct intel_engine_cs *engine)
 	intel_engine_init_execlists(engine);
 	intel_engine_init__pm(engine);
 	intel_engine_init_retire(engine);
-	intel_engine_pool_init(&engine->pool);
 
 	ce = create_kernel_context(engine);
 	if (IS_ERR(ce))
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index e874dfaa5316..52af1cee9a94 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -24,6 +24,7 @@ static int request_sync(struct i915_request *rq)
 
 	/* Opencode i915_request_add() so we can keep the timeline locked. */
 	__i915_request_commit(rq);
+	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	__i915_request_queue(rq, NULL);
 
 	timeout = i915_request_wait(rq, 0, HZ / 10);
@@ -154,10 +155,7 @@ static int live_context_size(void *arg)
 	 */
 
 	for_each_engine(engine, gt, id) {
-		struct {
-			struct drm_i915_gem_object *state;
-			void *pinned;
-		} saved;
+		struct file *saved;
 
 		if (!engine->context_size)
 			continue;
@@ -171,8 +169,7 @@ static int live_context_size(void *arg)
 		 * active state is sufficient, we are only checking that we
 		 * don't use more than we planned.
 		 */
-		saved.state = fetch_and_zero(&engine->default_state);
-		saved.pinned = fetch_and_zero(&engine->pinned_default_state);
+		saved = fetch_and_zero(&engine->default_state);
 
 		/* Overlaps with the execlists redzone */
 		engine->context_size += I915_GTT_PAGE_SIZE;
@@ -181,8 +178,7 @@ static int live_context_size(void *arg)
 
 		engine->context_size -= I915_GTT_PAGE_SIZE;
 
-		engine->pinned_default_state = saved.pinned;
-		engine->default_state = saved.state;
+		engine->default_state = saved;
 
 		intel_engine_pm_put(engine);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 09ff8e4f88af..242181a5214c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -7,6 +7,7 @@
 
 #include "selftest_llc.h"
 #include "selftest_rc6.h"
+#include "selftest_rps.h"
 
 static int live_gt_resume(void *arg)
 {
@@ -52,6 +53,13 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_rc6_manual),
+		SUBTEST(live_rps_clock_interval),
+		SUBTEST(live_rps_control),
+		SUBTEST(live_rps_frequency_cs),
+		SUBTEST(live_rps_frequency_srm),
+		SUBTEST(live_rps_power),
+		SUBTEST(live_rps_interrupt),
+		SUBTEST(live_rps_dynamic),
 		SUBTEST(live_gt_resume),
 	};
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index f95ae15ce865..824f99c4cc7c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -21,7 +21,8 @@
 #include "gem/selftests/mock_context.h"
 
 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
-#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
+#define NUM_GPR 16
+#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
 
 static struct i915_vma *create_scratch(struct intel_gt *gt)
 {
@@ -68,26 +69,41 @@ static void engine_heartbeat_enable(struct intel_engine_cs *engine,
 	engine->props.heartbeat_interval_ms = saved;
 }
 
+static bool is_active(struct i915_request *rq)
+{
+	if (i915_request_is_active(rq))
+		return true;
+
+	if (i915_request_on_hold(rq))
+		return true;
+
+	if (i915_request_started(rq))
+		return true;
+
+	return false;
+}
+
 static int wait_for_submit(struct intel_engine_cs *engine,
 			   struct i915_request *rq,
 			   unsigned long timeout)
 {
 	timeout += jiffies;
 	do {
-		cond_resched();
-		intel_engine_flush_submission(engine);
+		bool done = time_after(jiffies, timeout);
 
-		if (READ_ONCE(engine->execlists.pending[0]))
-			continue;
-
-		if (i915_request_is_active(rq))
+		if (i915_request_completed(rq)) /* that was quick! */
 			return 0;
 
-		if (i915_request_started(rq)) /* that was quick! */
+		/* Wait until the HW has acknowleged the submission (or err) */
+		intel_engine_flush_submission(engine);
+		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
 			return 0;
-	} while (time_before(jiffies, timeout));
 
-	return -ETIME;
+		if (done)
+			return -ETIME;
+
+		cond_resched();
+	} while (1);
 }
 
 static int wait_for_reset(struct intel_engine_cs *engine,
@@ -634,9 +650,9 @@ static int live_error_interrupt(void *arg)
 						 error_repr(p->error[i]));
 
 				if (!i915_request_started(client[i])) {
-					pr_debug("%s: %s request not stated!\n",
-						 engine->name,
-						 error_repr(p->error[i]));
+					pr_err("%s: %s request not started!\n",
+					       engine->name,
+					       error_repr(p->error[i]));
 					err = -ETIME;
 					goto out;
 				}
@@ -644,9 +660,10 @@ static int live_error_interrupt(void *arg)
 				/* Kick the tasklet to process the error */
 				intel_engine_flush_submission(engine);
 				if (client[i]->fence.error != p->error[i]) {
-					pr_err("%s: %s request completed with wrong error code: %d\n",
+					pr_err("%s: %s request (%s) with wrong error code: %d\n",
 					       engine->name,
 					       error_repr(p->error[i]),
+					       i915_request_completed(client[i]) ? "completed" : "running",
 					       client[i]->fence.error);
 					err = -EINVAL;
 					goto out;
@@ -1057,7 +1074,6 @@ static int live_timeslice_rewind(void *arg)
 			       engine->name);
 			goto err;
 		}
-		GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
 
 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
 		if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
@@ -1230,8 +1246,14 @@ static int live_timeslice_queue(void *arg)
 		if (err)
 			goto err_rq;
 
-		intel_engine_flush_submission(engine);
+		/* Wait until we ack the release_queue and start timeslicing */
+		do {
+			cond_resched();
+			intel_engine_flush_submission(engine);
+		} while (READ_ONCE(engine->execlists.pending[0]));
+
 		if (!READ_ONCE(engine->execlists.timer.expires) &&
+		    execlists_active(&engine->execlists) == rq &&
 		    !i915_request_completed(rq)) {
 			struct drm_printer p =
 				drm_info_printer(gt->i915->drm.dev);
@@ -2032,6 +2054,9 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
 		return 0;
 
+	if (!intel_has_reset_engine(arg->engine->gt))
+		return 0;
+
 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
 	rq = spinner_create_request(&arg->a.spin,
 				    arg->a.ctx, arg->engine,
@@ -2632,7 +2657,7 @@ static int create_gang(struct intel_engine_cs *engine,
 	if (IS_ERR(rq))
 		goto err_obj;
 
-	rq->batch = vma;
+	rq->batch = i915_vma_get(vma);
 	i915_request_get(rq);
 
 	i915_vma_lock(vma);
@@ -2656,6 +2681,7 @@ static int create_gang(struct intel_engine_cs *engine,
 	return 0;
 
 err_rq:
+	i915_vma_put(rq->batch);
 	i915_request_put(rq);
 err_obj:
 	i915_gem_object_put(obj);
@@ -2752,6 +2778,7 @@ static int live_preempt_gang(void *arg)
 				err = -ETIME;
 			}
 
+			i915_vma_put(rq->batch);
 			i915_request_put(rq);
 			rq = n;
 		}
@@ -2765,6 +2792,331 @@ static int live_preempt_gang(void *arg)
 	return 0;
 }
 
+static struct i915_vma *
+create_gpr_user(struct intel_engine_cs *engine,
+		struct i915_vma *result,
+		unsigned int offset)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	u32 *cs;
+	int err;
+	int i;
+
+	obj = i915_gem_object_create_internal(engine->i915, 4096);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	vma = i915_vma_instance(obj, result->vm, NULL);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return vma;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err) {
+		i915_vma_put(vma);
+		return ERR_PTR(err);
+	}
+
+	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(cs)) {
+		i915_vma_put(vma);
+		return ERR_CAST(cs);
+	}
+
+	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = CS_GPR(engine, 0);
+	*cs++ = 1;
+
+	for (i = 1; i < NUM_GPR; i++) {
+		u64 addr;
+
+		/*
+		 * Perform: GPR[i]++
+		 *
+		 * As we read and write into the context saved GPR[i], if
+		 * we restart this batch buffer from an earlier point, we
+		 * will repeat the increment and store a value > 1.
+		 */
+		*cs++ = MI_MATH(4);
+		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
+		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
+		*cs++ = MI_MATH_ADD;
+		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
+
+		addr = result->node.start + offset + i * sizeof(*cs);
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
+		*cs++ = CS_GPR(engine, 2 * i);
+		*cs++ = lower_32_bits(addr);
+		*cs++ = upper_32_bits(addr);
+
+		*cs++ = MI_SEMAPHORE_WAIT |
+			MI_SEMAPHORE_POLL |
+			MI_SEMAPHORE_SAD_GTE_SDD;
+		*cs++ = i;
+		*cs++ = lower_32_bits(result->node.start);
+		*cs++ = upper_32_bits(result->node.start);
+	}
+
+	*cs++ = MI_BATCH_BUFFER_END;
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	return vma;
+}
+
+static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int err;
+
+	obj = i915_gem_object_create_internal(gt->i915, sz);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return vma;
+	}
+
+	err = i915_ggtt_pin(vma, 0, 0);
+	if (err) {
+		i915_vma_put(vma);
+		return ERR_PTR(err);
+	}
+
+	return vma;
+}
+
+static struct i915_request *
+create_gpr_client(struct intel_engine_cs *engine,
+		  struct i915_vma *global,
+		  unsigned int offset)
+{
+	struct i915_vma *batch, *vma;
+	struct intel_context *ce;
+	struct i915_request *rq;
+	int err;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	vma = i915_vma_instance(global->obj, ce->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_ce;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto out_ce;
+
+	batch = create_gpr_user(engine, vma, offset);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto out_vma;
+	}
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_batch;
+	}
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, false);
+	if (!err)
+		err = i915_vma_move_to_active(vma, rq, 0);
+	i915_vma_unlock(vma);
+
+	i915_vma_lock(batch);
+	if (!err)
+		err = i915_request_await_object(rq, batch->obj, false);
+	if (!err)
+		err = i915_vma_move_to_active(batch, rq, 0);
+	if (!err)
+		err = rq->engine->emit_bb_start(rq,
+						batch->node.start,
+						PAGE_SIZE, 0);
+	i915_vma_unlock(batch);
+	i915_vma_unpin(batch);
+
+	if (!err)
+		i915_request_get(rq);
+	i915_request_add(rq);
+
+out_batch:
+	i915_vma_put(batch);
+out_vma:
+	i915_vma_unpin(vma);
+out_ce:
+	intel_context_put(ce);
+	return err ? ERR_PTR(err) : rq;
+}
+
+static int preempt_user(struct intel_engine_cs *engine,
+			struct i915_vma *global,
+			int id)
+{
+	struct i915_sched_attr attr = {
+		.priority = I915_PRIORITY_MAX
+	};
+	struct i915_request *rq;
+	int err = 0;
+	u32 *cs;
+
+	rq = intel_engine_create_kernel_request(engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		return PTR_ERR(cs);
+	}
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = i915_ggtt_offset(global);
+	*cs++ = 0;
+	*cs++ = id;
+
+	intel_ring_advance(rq, cs);
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	engine->schedule(rq, &attr);
+
+	if (i915_request_wait(rq, 0, HZ / 2) < 0)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	return err;
+}
+
+static int live_preempt_user(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	struct i915_vma *global;
+	enum intel_engine_id id;
+	u32 *result;
+	int err = 0;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+		return 0;
+
+	/*
+	 * In our other tests, we look at preemption in carefully
+	 * controlled conditions in the ringbuffer. Since most of the
+	 * time is spent in user batches, most of our preemptions naturally
+	 * occur there. We want to verify that when we preempt inside a batch
+	 * we continue on from the current instruction and do not roll back
+	 * to the start, or another earlier arbitration point.
+	 *
+	 * To verify this, we create a batch which is a mixture of
+	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
+	 * a few preempting contexts thrown into the mix, we look for any
+	 * repeated instructions (which show up as incorrect values).
+	 */
+
+	global = create_global(gt, 4096);
+	if (IS_ERR(global))
+		return PTR_ERR(global);
+
+	result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
+	if (IS_ERR(result)) {
+		i915_vma_unpin_and_release(&global, 0);
+		return PTR_ERR(result);
+	}
+
+	for_each_engine(engine, gt, id) {
+		struct i915_request *client[3] = {};
+		struct igt_live_test t;
+		int i;
+
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
+		if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
+			continue; /* we need per-context GPR */
+
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+			err = -EIO;
+			break;
+		}
+
+		memset(result, 0, 4096);
+
+		for (i = 0; i < ARRAY_SIZE(client); i++) {
+			struct i915_request *rq;
+
+			rq = create_gpr_client(engine, global,
+					       NUM_GPR * i * sizeof(u32));
+			if (IS_ERR(rq))
+				goto end_test;
+
+			client[i] = rq;
+		}
+
+		/* Continuously preempt the set of 3 running contexts */
+		for (i = 1; i <= NUM_GPR; i++) {
+			err = preempt_user(engine, global, i);
+			if (err)
+				goto end_test;
+		}
+
+		if (READ_ONCE(result[0]) != NUM_GPR) {
+			pr_err("%s: Failed to release semaphore\n",
+			       engine->name);
+			err = -EIO;
+			goto end_test;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(client); i++) {
+			int gpr;
+
+			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
+				err = -ETIME;
+				goto end_test;
+			}
+
+			for (gpr = 1; gpr < NUM_GPR; gpr++) {
+				if (result[NUM_GPR * i + gpr] != 1) {
+					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
+					       engine->name,
+					       i, gpr, result[NUM_GPR * i + gpr]);
+					err = -EINVAL;
+					goto end_test;
+				}
+			}
+		}
+
+end_test:
+		for (i = 0; i < ARRAY_SIZE(client); i++) {
+			if (!client[i])
+				break;
+
+			i915_request_put(client[i]);
+		}
+
+		/* Flush the semaphores on error */
+		smp_store_mb(result[0], -1);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
+	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
+	return err;
+}
+
 static int live_preempt_timeout(void *arg)
 {
 	struct intel_gt *gt = arg;
@@ -3972,6 +4324,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_chain_preempt),
 		SUBTEST(live_preempt_gang),
 		SUBTEST(live_preempt_timeout),
+		SUBTEST(live_preempt_user),
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
 		SUBTEST(live_virtual_mask),
@@ -3989,35 +4342,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 	return intel_gt_live_subtests(tests, &i915->gt);
 }
 
-static void hexdump(const void *buf, size_t len)
-{
-	const size_t rowsize = 8 * sizeof(u32);
-	const void *prev = NULL;
-	bool skip = false;
-	size_t pos;
-
-	for (pos = 0; pos < len; pos += rowsize) {
-		char line[128];
-
-		if (prev && !memcmp(prev, buf + pos, rowsize)) {
-			if (!skip) {
-				pr_info("*\n");
-				skip = true;
-			}
-			continue;
-		}
-
-		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
-						rowsize, sizeof(u32),
-						line, sizeof(line),
-						false) >= sizeof(line));
-		pr_info("[%04zx] %s\n", pos, line);
-
-		prev = buf + pos;
-		skip = false;
-	}
-}
-
 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
 {
 	const u32 offset =
@@ -4099,13 +4423,12 @@ static int live_lrc_layout(void *arg)
 		if (!engine->default_state)
 			continue;
 
-		hw = i915_gem_object_pin_map(engine->default_state,
-					     I915_MAP_WB);
+		hw = shmem_pin_map(engine->default_state);
 		if (IS_ERR(hw)) {
 			err = PTR_ERR(hw);
 			break;
 		}
-		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+		hw += LRC_STATE_OFFSET / sizeof(*hw);
 
 		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
 					 engine->kernel_context,
@@ -4166,13 +4489,13 @@ static int live_lrc_layout(void *arg)
 
 		if (err) {
 			pr_info("%s: HW register image:\n", engine->name);
-			hexdump(hw, PAGE_SIZE);
+			igt_hexdump(hw, PAGE_SIZE);
 
 			pr_info("%s: SW register image:\n", engine->name);
-			hexdump(lrc, PAGE_SIZE);
+			igt_hexdump(lrc, PAGE_SIZE);
 		}
 
-		i915_gem_object_unpin_map(engine->default_state);
+		shmem_unpin_map(engine->default_state, hw);
 		if (err)
 			break;
 	}
@@ -4241,10 +4564,35 @@ static int live_lrc_fixed(void *arg)
 				"BB_STATE"
 			},
 			{
+				i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
+				lrc_ring_wa_bb_per_ctx(engine),
+				"RING_BB_PER_CTX_PTR"
+			},
+			{
+				i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
+				lrc_ring_indirect_ptr(engine),
+				"RING_INDIRECT_CTX_PTR"
+			},
+			{
+				i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
+				lrc_ring_indirect_offset(engine),
+				"RING_INDIRECT_CTX_OFFSET"
+			},
+			{
 				i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
 				CTX_TIMESTAMP - 1,
 				"RING_CTX_TIMESTAMP"
 			},
+			{
+				i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
+				lrc_ring_gpr0(engine),
+				"RING_CS_GPR0"
+			},
+			{
+				i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
+				lrc_ring_cmd_buf_cctl(engine),
+				"RING_CMD_BUF_CCTL"
+			},
 			{ },
 		}, *t;
 		u32 *hw;
@@ -4252,13 +4600,12 @@ static int live_lrc_fixed(void *arg)
 		if (!engine->default_state)
 			continue;
 
-		hw = i915_gem_object_pin_map(engine->default_state,
-					     I915_MAP_WB);
+		hw = shmem_pin_map(engine->default_state);
 		if (IS_ERR(hw)) {
 			err = PTR_ERR(hw);
 			break;
 		}
-		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+		hw += LRC_STATE_OFFSET / sizeof(*hw);
 
 		for (t = tbl; t->name; t++) {
 			int dw = find_offset(hw, t->reg);
@@ -4274,7 +4621,7 @@ static int live_lrc_fixed(void *arg)
 			}
 		}
 
-		i915_gem_object_unpin_map(engine->default_state);
+		shmem_unpin_map(engine->default_state, hw);
 	}
 
 	return err;
@@ -4830,6 +5177,7 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
 {
 	struct i915_vma *batch;
 	u32 dw, x, *cs, *hw;
+	u32 *defaults;
 
 	batch = create_user_vma(ce->vm, SZ_64K);
 	if (IS_ERR(batch))
@@ -4841,10 +5189,17 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
 		return ERR_CAST(cs);
 	}
 
+	defaults = shmem_pin_map(ce->engine->default_state);
+	if (!defaults) {
+		i915_gem_object_unpin_map(batch->obj);
+		i915_vma_put(batch);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	x = 0;
 	dw = 0;
-	hw = ce->engine->pinned_default_state;
-	hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+	hw = defaults;
+	hw += LRC_STATE_OFFSET / sizeof(*hw);
 	do {
 		u32 len = hw[dw] & 0x7f;
 
@@ -4874,6 +5229,8 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
 
 	*cs++ = MI_BATCH_BUFFER_END;
 
+	shmem_unpin_map(ce->engine->default_state, defaults);
+
 	i915_gem_object_flush_map(batch->obj);
 	i915_gem_object_unpin_map(batch->obj);
 
@@ -4984,6 +5341,7 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
 {
 	struct i915_vma *batch;
 	u32 dw, *cs, *hw;
+	u32 *defaults;
 
 	batch = create_user_vma(ce->vm, SZ_64K);
 	if (IS_ERR(batch))
@@ -4995,9 +5353,16 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
 		return ERR_CAST(cs);
 	}
 
+	defaults = shmem_pin_map(ce->engine->default_state);
+	if (!defaults) {
+		i915_gem_object_unpin_map(batch->obj);
+		i915_vma_put(batch);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	dw = 0;
-	hw = ce->engine->pinned_default_state;
-	hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+	hw = defaults;
+	hw += LRC_STATE_OFFSET / sizeof(*hw);
 	do {
 		u32 len = hw[dw] & 0x7f;
 
@@ -5024,6 +5389,8 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
 
 	*cs++ = MI_BATCH_BUFFER_END;
 
+	shmem_unpin_map(ce->engine->default_state, defaults);
+
 	i915_gem_object_flush_map(batch->obj);
 	i915_gem_object_unpin_map(batch->obj);
 
@@ -5091,6 +5458,7 @@ static int compare_isolation(struct intel_engine_cs *engine,
 {
 	u32 x, dw, *hw, *lrc;
 	u32 *A[2], *B[2];
+	u32 *defaults;
 	int err = 0;
 
 	A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
@@ -5121,12 +5489,18 @@ static int compare_isolation(struct intel_engine_cs *engine,
 		err = PTR_ERR(lrc);
 		goto err_B1;
 	}
-	lrc += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+	lrc += LRC_STATE_OFFSET / sizeof(*hw);
+
+	defaults = shmem_pin_map(ce->engine->default_state);
+	if (!defaults) {
+		err = -ENOMEM;
+		goto err_lrc;
+	}
 
 	x = 0;
 	dw = 0;
-	hw = engine->pinned_default_state;
-	hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+	hw = defaults;
+	hw += LRC_STATE_OFFSET / sizeof(*hw);
 	do {
 		u32 len = hw[dw] & 0x7f;
 
@@ -5157,7 +5531,6 @@ static int compare_isolation(struct intel_engine_cs *engine,
 					       A[0][x], B[0][x], B[1][x],
 					       poison, lrc[dw + 1]);
 					err = -EINVAL;
-					break;
 				}
 			}
 			dw += 2;
@@ -5166,6 +5539,8 @@ static int compare_isolation(struct intel_engine_cs *engine,
 	} while (dw < PAGE_SIZE / sizeof(u32) &&
 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
 
+	shmem_unpin_map(ce->engine->default_state, defaults);
+err_lrc:
 	i915_gem_object_unpin_map(ce->state->obj);
 err_B1:
 	i915_gem_object_unpin_map(result[1]->obj);
@@ -5296,6 +5671,7 @@ static int live_lrc_isolation(void *arg)
 		0xffffffff,
 		0xffff0000,
 	};
+	int err = 0;
 
 	/*
 	 * Our goal is try and verify that per-context state cannot be
@@ -5306,7 +5682,6 @@ static int live_lrc_isolation(void *arg)
 	 */
 
 	for_each_engine(engine, gt, id) {
-		int err = 0;
 		int i;
 
 		/* Just don't even ask */
@@ -5315,25 +5690,180 @@ static int live_lrc_isolation(void *arg)
 			continue;
 
 		intel_engine_pm_get(engine);
-		if (engine->pinned_default_state) {
-			for (i = 0; i < ARRAY_SIZE(poison); i++) {
-				err = __lrc_isolation(engine, poison[i]);
-				if (err)
-					break;
+		for (i = 0; i < ARRAY_SIZE(poison); i++) {
+			int result;
 
-				err = __lrc_isolation(engine, ~poison[i]);
-				if (err)
-					break;
-			}
+			result = __lrc_isolation(engine, poison[i]);
+			if (result && !err)
+				err = result;
+
+			result = __lrc_isolation(engine, ~poison[i]);
+			if (result && !err)
+				err = result;
 		}
 		intel_engine_pm_put(engine);
+		if (igt_flush_test(gt->i915)) {
+			err = -EIO;
+			break;
+		}
+	}
+
+	return err;
+}
+
+static int indirect_ctx_submit_req(struct intel_context *ce)
+{
+	struct i915_request *rq;
+	int err = 0;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0)
+		err = -ETIME;
+
+	i915_request_put(rq);
+
+	return err;
+}
+
+#define CTX_BB_CANARY_OFFSET (3 * 1024)
+#define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
+
+static u32 *
+emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
+		MI_SRM_LRM_GLOBAL_GTT |
+		MI_LRI_LRM_CS_MMIO;
+	*cs++ = i915_mmio_reg_offset(RING_START(0));
+	*cs++ = i915_ggtt_offset(ce->state) +
+		context_wa_bb_offset(ce) +
+		CTX_BB_CANARY_OFFSET;
+	*cs++ = 0;
+
+	return cs;
+}
+
+static void
+indirect_ctx_bb_setup(struct intel_context *ce)
+{
+	u32 *cs = context_indirect_bb(ce);
+
+	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
+
+	setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
+}
+
+static bool check_ring_start(struct intel_context *ce)
+{
+	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
+		LRC_STATE_OFFSET + context_wa_bb_offset(ce);
+
+	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
+		return true;
+
+	pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
+	       ctx_bb[CTX_BB_CANARY_INDEX],
+	       ce->lrc_reg_state[CTX_RING_START]);
+
+	return false;
+}
+
+static int indirect_ctx_bb_check(struct intel_context *ce)
+{
+	int err;
+
+	err = indirect_ctx_submit_req(ce);
+	if (err)
+		return err;
+
+	if (!check_ring_start(ce))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
+{
+	struct intel_context *a, *b;
+	int err;
+
+	a = intel_context_create(engine);
+	if (IS_ERR(a))
+		return PTR_ERR(a);
+	err = intel_context_pin(a);
+	if (err)
+		goto put_a;
+
+	b = intel_context_create(engine);
+	if (IS_ERR(b)) {
+		err = PTR_ERR(b);
+		goto unpin_a;
+	}
+	err = intel_context_pin(b);
+	if (err)
+		goto put_b;
+
+	/* We use the already reserved extra page in context state */
+	if (!a->wa_bb_page) {
+		GEM_BUG_ON(b->wa_bb_page);
+		GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
+		goto unpin_b;
+	}
+
+	/*
+	 * In order to test that our per context bb is truly per context,
+	 * and executes at the intended spot on context restoring process,
+	 * make the batch store the ring start value to memory.
+	 * As ring start is restored apriori of starting the indirect ctx bb and
+	 * as it will be different for each context, it fits to this purpose.
+	 */
+	indirect_ctx_bb_setup(a);
+	indirect_ctx_bb_setup(b);
+
+	err = indirect_ctx_bb_check(a);
+	if (err)
+		goto unpin_b;
+
+	err = indirect_ctx_bb_check(b);
+
+unpin_b:
+	intel_context_unpin(b);
+put_b:
+	intel_context_put(b);
+unpin_a:
+	intel_context_unpin(a);
+put_a:
+	intel_context_put(a);
+
+	return err;
+}
+
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	for_each_engine(engine, gt, id) {
+		intel_engine_pm_get(engine);
+		err = __live_lrc_indirect_ctx_bb(engine);
+		intel_engine_pm_put(engine);
+
 		if (igt_flush_test(gt->i915))
 			err = -EIO;
+
 		if (err)
-			return err;
+			break;
 	}
 
-	return 0;
+	return err;
 }
 
 static void garbage_reset(struct intel_engine_cs *engine,
@@ -5367,7 +5897,7 @@ static struct i915_request *garbage(struct intel_context *ce,
 	prandom_bytes_state(prng,
 			    ce->lrc_reg_state,
 			    ce->engine->context_size -
-			    LRC_STATE_PN * PAGE_SIZE);
+			    LRC_STATE_OFFSET);
 
 	rq = intel_context_create_request(ce);
 	if (IS_ERR(rq)) {
@@ -5571,6 +6101,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_lrc_timestamp),
 		SUBTEST(live_lrc_garbage),
 		SUBTEST(live_pphwsp_runtime),
+		SUBTEST(live_lrc_indirect_ctx_bb),
 	};
 
 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 95b165faeba7..2dc460624bbc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -11,6 +11,7 @@
 #include "selftest_rc6.h"
 
 #include "selftests/i915_random.h"
+#include "selftests/librapl.h"
 
 static u64 rc6_residency(struct intel_rc6 *rc6)
 {
@@ -31,7 +32,9 @@ int live_rc6_manual(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct intel_rc6 *rc6 = &gt->rc6;
+	u64 rc0_power, rc6_power;
 	intel_wakeref_t wakeref;
+	ktime_t dt;
 	u64 res[2];
 	int err = 0;
 
@@ -54,7 +57,12 @@ int live_rc6_manual(void *arg)
 	msleep(1); /* wakeup is not immediate, takes about 100us on icl */
 
 	res[0] = rc6_residency(rc6);
+
+	dt = ktime_get();
+	rc0_power = librapl_energy_uJ();
 	msleep(250);
+	rc0_power = librapl_energy_uJ() - rc0_power;
+	dt = ktime_sub(ktime_get(), dt);
 	res[1] = rc6_residency(rc6);
 	if ((res[1] - res[0]) >> 10) {
 		pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
@@ -63,13 +71,24 @@ int live_rc6_manual(void *arg)
 		goto out_unlock;
 	}
 
+	rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, ktime_to_ns(dt));
+	if (!rc0_power) {
+		pr_err("No power measured while in RC0\n");
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
 	/* Manually enter RC6 */
 	intel_rc6_park(rc6);
 
 	res[0] = rc6_residency(rc6);
+	intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL);
+	dt = ktime_get();
+	rc6_power = librapl_energy_uJ();
 	msleep(100);
+	rc6_power = librapl_energy_uJ() - rc6_power;
+	dt = ktime_sub(ktime_get(), dt);
 	res[1] = rc6_residency(rc6);
-
 	if (res[1] == res[0]) {
 		pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x, residency=%lld\n",
 		       intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE),
@@ -78,6 +97,15 @@ int live_rc6_manual(void *arg)
 		err = -EINVAL;
 	}
 
+	rc6_power = div64_u64(NSEC_PER_SEC * rc6_power, ktime_to_ns(dt));
+	pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
+		rc0_power, rc6_power);
+	if (2 * rc6_power > rc0_power) {
+		pr_err("GPU leaked energy while in RC6!\n");
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
 	/* Restore what should have been the original state! */
 	intel_rc6_unpark(rc6);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
index 9995faadd7e8..3350e7c995bc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -54,6 +54,8 @@ static struct i915_vma *create_wally(struct intel_engine_cs *engine)
 	*cs++ = STACK_MAGIC;
 
 	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(obj);
 	i915_gem_object_unpin_map(obj);
 
 	vma->private = intel_context_create(engine); /* dummy residuals */
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
new file mode 100644
index 000000000000..6275d69aa9cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -0,0 +1,1331 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/pm_qos.h>
+#include <linux/sort.h>
+
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_gpu_commands.h"
+#include "intel_gt_clock_utils.h"
+#include "intel_gt_pm.h"
+#include "intel_rc6.h"
+#include "selftest_rps.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/librapl.h"
+
+/* Try to isolate the impact of cstates from determing frequency response */
+#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
+
+static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine)
+{
+	unsigned long old;
+
+	old = fetch_and_zero(&engine->props.heartbeat_interval_ms);
+
+	intel_engine_pm_get(engine);
+	intel_engine_park_heartbeat(engine);
+
+	return old;
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+				    unsigned long saved)
+{
+	intel_engine_pm_put(engine);
+
+	engine->props.heartbeat_interval_ms = saved;
+}
+
+static void dummy_rps_work(struct work_struct *wrk)
+{
+}
+
+static int cmp_u64(const void *A, const void *B)
+{
+	const u64 *a = A, *b = B;
+
+	if (a < b)
+		return -1;
+	else if (a > b)
+		return 1;
+	else
+		return 0;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const u32 *a = A, *b = B;
+
+	if (a < b)
+		return -1;
+	else if (a > b)
+		return 1;
+	else
+		return 0;
+}
+
+static struct i915_vma *
+create_spin_counter(struct intel_engine_cs *engine,
+		    struct i915_address_space *vm,
+		    bool srm,
+		    u32 **cancel,
+		    u32 **counter)
+{
+	enum {
+		COUNT,
+		INC,
+		__NGPR__,
+	};
+#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	unsigned long end;
+	u32 *base, *cs;
+	int loop, i;
+	int err;
+
+	obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	end = obj->base.size / sizeof(u32) - 1;
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return vma;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err) {
+		i915_vma_put(vma);
+		return ERR_PTR(err);
+	}
+
+	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(base)) {
+		i915_gem_object_put(obj);
+		return ERR_CAST(base);
+	}
+	cs = base;
+
+	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
+	for (i = 0; i < __NGPR__; i++) {
+		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
+		*cs++ = 0;
+		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
+		*cs++ = 0;
+	}
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
+	*cs++ = 1;
+
+	loop = cs - base;
+
+	/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
+	for (i = 0; i < 1024; i++) {
+		*cs++ = MI_MATH(4);
+		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
+		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
+		*cs++ = MI_MATH_ADD;
+		*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
+
+		if (srm) {
+			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
+			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
+			*cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
+			*cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
+		}
+	}
+
+	*cs++ = MI_BATCH_BUFFER_START_GEN8;
+	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
+	*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
+	GEM_BUG_ON(cs - base > end);
+
+	i915_gem_object_flush_map(obj);
+
+	*cancel = base + loop;
+	*counter = srm ? memset32(base + end, 0, 1) : NULL;
+	return vma;
+}
+
+static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
+{
+	u8 history[64], i;
+	unsigned long end;
+	int sleep;
+
+	i = 0;
+	memset(history, freq, sizeof(history));
+	sleep = 20;
+
+	/* The PCU does not change instantly, but drifts towards the goal? */
+	end = jiffies + msecs_to_jiffies(timeout_ms);
+	do {
+		u8 act;
+
+		act = read_cagf(rps);
+		if (time_after(jiffies, end))
+			return act;
+
+		/* Target acquired */
+		if (act == freq)
+			return act;
+
+		/* Any change within the last N samples? */
+		if (!memchr_inv(history, act, sizeof(history)))
+			return act;
+
+		history[i] = act;
+		i = (i + 1) % ARRAY_SIZE(history);
+
+		usleep_range(sleep, 2 * sleep);
+		sleep *= 2;
+		if (sleep > timeout_ms * 20)
+			sleep = timeout_ms * 20;
+	} while (1);
+}
+
+static u8 rps_set_check(struct intel_rps *rps, u8 freq)
+{
+	mutex_lock(&rps->lock);
+	GEM_BUG_ON(!intel_rps_is_active(rps));
+	intel_rps_set(rps, freq);
+	GEM_BUG_ON(rps->last_freq != freq);
+	mutex_unlock(&rps->lock);
+
+	return wait_for_freq(rps, freq, 50);
+}
+
+static void show_pstate_limits(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+
+	if (IS_BROXTON(i915)) {
+		pr_info("P_STATE_CAP[%x]: 0x%08x\n",
+			i915_mmio_reg_offset(BXT_RP_STATE_CAP),
+			intel_uncore_read(rps_to_uncore(rps),
+					  BXT_RP_STATE_CAP));
+	} else if (IS_GEN(i915, 9)) {
+		pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
+			i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
+			intel_uncore_read(rps_to_uncore(rps),
+					  GEN9_RP_STATE_LIMITS));
+	}
+}
+
+int live_rps_clock_interval(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_rps *rps = &gt->rps;
+	void (*saved_work)(struct work_struct *wrk);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = 0;
+
+	if (!intel_rps_is_enabled(rps))
+		return 0;
+
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
+
+	intel_gt_pm_wait_for_idle(gt);
+	saved_work = rps->work.func;
+	rps->work.func = dummy_rps_work;
+
+	intel_gt_pm_get(gt);
+	intel_rps_disable(&gt->rps);
+
+	intel_gt_check_clock_frequency(gt);
+
+	for_each_engine(engine, gt, id) {
+		unsigned long saved_heartbeat;
+		struct i915_request *rq;
+		u32 cycles;
+		u64 dt;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		saved_heartbeat = engine_heartbeat_disable(engine);
+
+		rq = igt_spinner_create_request(&spin,
+						engine->kernel_context,
+						MI_NOOP);
+		if (IS_ERR(rq)) {
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+
+		if (!igt_wait_for_spinner(&spin, rq)) {
+			pr_err("%s: RPS spinner did not start\n",
+			       engine->name);
+			igt_spinner_end(&spin);
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			intel_gt_set_wedged(engine->gt);
+			err = -EIO;
+			break;
+		}
+
+		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+		intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
+
+		/* Set the evaluation interval to infinity! */
+		intel_uncore_write_fw(gt->uncore,
+				      GEN6_RP_UP_EI, 0xffffffff);
+		intel_uncore_write_fw(gt->uncore,
+				      GEN6_RP_UP_THRESHOLD, 0xffffffff);
+
+		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
+				      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
+
+		if (wait_for(intel_uncore_read_fw(gt->uncore,
+						  GEN6_RP_CUR_UP_EI),
+			     10)) {
+			/* Just skip the test; assume lack of HW support */
+			pr_notice("%s: rps evaluation interval not ticking\n",
+				  engine->name);
+			err = -ENODEV;
+		} else {
+			ktime_t dt_[5];
+			u32 cycles_[5];
+			int i;
+
+			for (i = 0; i < 5; i++) {
+				preempt_disable();
+
+				dt_[i] = ktime_get();
+				cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
+
+				udelay(1000);
+
+				dt_[i] = ktime_sub(ktime_get(), dt_[i]);
+				cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
+
+				preempt_enable();
+			}
+
+			/* Use the median of both cycle/dt; close enough */
+			sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
+			cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
+			sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
+			dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
+		}
+
+		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
+		intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+
+		igt_spinner_end(&spin);
+		engine_heartbeat_enable(engine, saved_heartbeat);
+
+		if (err == 0) {
+			u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
+			u32 expected =
+				intel_gt_ns_to_pm_interval(gt, dt);
+
+			pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
+				engine->name, cycles, time, dt, expected,
+				gt->clock_frequency / 1000);
+
+			if (10 * time < 8 * dt ||
+			    8 * time > 10 * dt) {
+				pr_err("%s: rps clock time does not match walltime!\n",
+				       engine->name);
+				err = -EINVAL;
+			}
+
+			if (10 * expected < 8 * cycles ||
+			    8 * expected > 10 * cycles) {
+				pr_err("%s: walltime does not match rps clock ticks!\n",
+				       engine->name);
+				err = -EINVAL;
+			}
+		}
+
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+
+		break; /* once is enough */
+	}
+
+	intel_rps_enable(&gt->rps);
+	intel_gt_pm_put(gt);
+
+	igt_spinner_fini(&spin);
+
+	intel_gt_pm_wait_for_idle(gt);
+	rps->work.func = saved_work;
+
+	if (err == -ENODEV) /* skipped, don't report a fail */
+		err = 0;
+
+	return err;
+}
+
+int live_rps_control(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_rps *rps = &gt->rps;
+	void (*saved_work)(struct work_struct *wrk);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = 0;
+
+	/*
+	 * Check that the actual frequency matches our requested frequency,
+	 * to verify our control mechanism. We have to be careful that the
+	 * PCU may throttle the GPU in which case the actual frequency used
+	 * will be lowered than requested.
+	 */
+
+	if (!intel_rps_is_enabled(rps))
+		return 0;
+
+	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
+		return 0;
+
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
+
+	intel_gt_pm_wait_for_idle(gt);
+	saved_work = rps->work.func;
+	rps->work.func = dummy_rps_work;
+
+	intel_gt_pm_get(gt);
+	for_each_engine(engine, gt, id) {
+		unsigned long saved_heartbeat;
+		struct i915_request *rq;
+		ktime_t min_dt, max_dt;
+		int f, limit;
+		int min, max;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		saved_heartbeat = engine_heartbeat_disable(engine);
+
+		rq = igt_spinner_create_request(&spin,
+						engine->kernel_context,
+						MI_NOOP);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+
+		if (!igt_wait_for_spinner(&spin, rq)) {
+			pr_err("%s: RPS spinner did not start\n",
+			       engine->name);
+			igt_spinner_end(&spin);
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			intel_gt_set_wedged(engine->gt);
+			err = -EIO;
+			break;
+		}
+
+		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
+			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
+			       engine->name, rps->min_freq, read_cagf(rps));
+			igt_spinner_end(&spin);
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			show_pstate_limits(rps);
+			err = -EINVAL;
+			break;
+		}
+
+		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
+			if (rps_set_check(rps, f) < f)
+				break;
+		}
+
+		limit = rps_set_check(rps, f);
+
+		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
+			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
+			       engine->name, rps->min_freq, read_cagf(rps));
+			igt_spinner_end(&spin);
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			show_pstate_limits(rps);
+			err = -EINVAL;
+			break;
+		}
+
+		max_dt = ktime_get();
+		max = rps_set_check(rps, limit);
+		max_dt = ktime_sub(ktime_get(), max_dt);
+
+		min_dt = ktime_get();
+		min = rps_set_check(rps, rps->min_freq);
+		min_dt = ktime_sub(ktime_get(), min_dt);
+
+		igt_spinner_end(&spin);
+		engine_heartbeat_enable(engine, saved_heartbeat);
+
+		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
+			engine->name,
+			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
+			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
+			limit, intel_gpu_freq(rps, limit),
+			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
+
+		if (limit == rps->min_freq) {
+			pr_err("%s: GPU throttled to minimum!\n",
+			       engine->name);
+			show_pstate_limits(rps);
+			err = -ENODEV;
+			break;
+		}
+
+		if (igt_flush_test(gt->i915)) {
+			err = -EIO;
+			break;
+		}
+	}
+	intel_gt_pm_put(gt);
+
+	igt_spinner_fini(&spin);
+
+	intel_gt_pm_wait_for_idle(gt);
+	rps->work.func = saved_work;
+
+	return err;
+}
+
+static void show_pcu_config(struct intel_rps *rps)
+{
+	struct drm_i915_private *i915 = rps_to_i915(rps);
+	unsigned int max_gpu_freq, min_gpu_freq;
+	intel_wakeref_t wakeref;
+	int gpu_freq;
+
+	if (!HAS_LLC(i915))
+		return;
+
+	min_gpu_freq = rps->min_freq;
+	max_gpu_freq = rps->max_freq;
+	if (INTEL_GEN(i915) >= 9) {
+		/* Convert GT frequency to 50 HZ units */
+		min_gpu_freq /= GEN9_FREQ_SCALER;
+		max_gpu_freq /= GEN9_FREQ_SCALER;
+	}
+
+	wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
+
+	pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
+	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
+		int ia_freq = gpu_freq;
+
+		sandybridge_pcode_read(i915,
+				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
+				       &ia_freq, NULL);
+
+		pr_info("%5d  %5d  %5d\n",
+			gpu_freq * 50,
+			((ia_freq >> 0) & 0xff) * 100,
+			((ia_freq >> 8) & 0xff) * 100);
+	}
+
+	intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
+}
+
+static u64 __measure_frequency(u32 *cntr, int duration_ms)
+{
+	u64 dc, dt;
+
+	dt = ktime_get();
+	dc = READ_ONCE(*cntr);
+	usleep_range(1000 * duration_ms, 2000 * duration_ms);
+	dc = READ_ONCE(*cntr) - dc;
+	dt = ktime_get() - dt;
+
+	return div64_u64(1000 * 1000 * dc, dt);
+}
+
+static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
+{
+	u64 x[5];
+	int i;
+
+	*freq = rps_set_check(rps, *freq);
+	for (i = 0; i < 5; i++)
+		x[i] = __measure_frequency(cntr, 2);
+	*freq = (*freq + read_cagf(rps)) / 2;
+
+	/* A simple triangle filter for better result stability */
+	sort(x, 5, sizeof(*x), cmp_u64, NULL);
+	return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
+				  int duration_ms)
+{
+	u64 dc, dt;
+
+	dt = ktime_get();
+	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
+	usleep_range(1000 * duration_ms, 2000 * duration_ms);
+	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
+	dt = ktime_get() - dt;
+
+	return div64_u64(1000 * 1000 * dc, dt);
+}
+
+static u64 measure_cs_frequency_at(struct intel_rps *rps,
+				   struct intel_engine_cs *engine,
+				   int *freq)
+{
+	u64 x[5];
+	int i;
+
+	*freq = rps_set_check(rps, *freq);
+	for (i = 0; i < 5; i++)
+		x[i] = __measure_cs_frequency(engine, 2);
+	*freq = (*freq + read_cagf(rps)) / 2;
+
+	/* A simple triangle filter for better result stability */
+	sort(x, 5, sizeof(*x), cmp_u64, NULL);
+	return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
+{
+	return f_d * x > f_n * y && f_n * x < f_d * y;
+}
+
+int live_rps_frequency_cs(void *arg)
+{
+	void (*saved_work)(struct work_struct *wrk);
+	struct intel_gt *gt = arg;
+	struct intel_rps *rps = &gt->rps;
+	struct intel_engine_cs *engine;
+	struct pm_qos_request qos;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * The premise is that the GPU does change freqency at our behest.
+	 * Let's check there is a correspondence between the requested
+	 * frequency, the actual frequency, and the observed clock rate.
+	 */
+
+	if (!intel_rps_is_enabled(rps))
+		return 0;
+
+	if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
+		return 0;
+
+	if (CPU_LATENCY >= 0)
+		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
+
+	intel_gt_pm_wait_for_idle(gt);
+	saved_work = rps->work.func;
+	rps->work.func = dummy_rps_work;
+
+	for_each_engine(engine, gt, id) {
+		unsigned long saved_heartbeat;
+		struct i915_request *rq;
+		struct i915_vma *vma;
+		u32 *cancel, *cntr;
+		struct {
+			u64 count;
+			int freq;
+		} min, max;
+
+		saved_heartbeat = engine_heartbeat_disable(engine);
+
+		vma = create_spin_counter(engine,
+					  engine->kernel_context->vm, false,
+					  &cancel, &cntr);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			break;
+		}
+
+		rq = intel_engine_create_kernel_request(engine);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_vma;
+		}
+
+		i915_vma_lock(vma);
+		err = i915_request_await_object(rq, vma->obj, false);
+		if (!err)
+			err = i915_vma_move_to_active(vma, rq, 0);
+		if (!err)
+			err = rq->engine->emit_bb_start(rq,
+							vma->node.start,
+							PAGE_SIZE, 0);
+		i915_vma_unlock(vma);
+		i915_request_add(rq);
+		if (err)
+			goto err_vma;
+
+		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
+			     10)) {
+			pr_err("%s: timed loop did not start\n",
+			       engine->name);
+			goto err_vma;
+		}
+
+		min.freq = rps->min_freq;
+		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
+
+		max.freq = rps->max_freq;
+		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
+
+		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
+			engine->name,
+			min.count, intel_gpu_freq(rps, min.freq),
+			max.count, intel_gpu_freq(rps, max.freq),
+			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
+						     max.freq * min.count));
+
+		if (!scaled_within(max.freq * min.count,
+				   min.freq * max.count,
+				   2, 3)) {
+			int f;
+
+			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
+			       engine->name,
+			       max.freq * min.count,
+			       min.freq * max.count);
+			show_pcu_config(rps);
+
+			for (f = min.freq + 1; f <= rps->max_freq; f++) {
+				int act = f;
+				u64 count;
+
+				count = measure_cs_frequency_at(rps, engine, &act);
+				if (act < f)
+					break;
+
+				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
+					engine->name,
+					act, intel_gpu_freq(rps, act), count,
+					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
+								     act * min.count));
+
+				f = act; /* may skip ahead [pcu granularity] */
+			}
+
+			err = -EINVAL;
+		}
+
+err_vma:
+		*cancel = MI_BATCH_BUFFER_END;
+		i915_gem_object_flush_map(vma->obj);
+		i915_gem_object_unpin_map(vma->obj);
+		i915_vma_unpin(vma);
+		i915_vma_put(vma);
+
+		engine_heartbeat_enable(engine, saved_heartbeat);
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
+	intel_gt_pm_wait_for_idle(gt);
+	rps->work.func = saved_work;
+
+	if (CPU_LATENCY >= 0)
+		cpu_latency_qos_remove_request(&qos);
+
+	return err;
+}
+
+int live_rps_frequency_srm(void *arg)
+{
+	void (*saved_work)(struct work_struct *wrk);
+	struct intel_gt *gt = arg;
+	struct intel_rps *rps = &gt->rps;
+	struct intel_engine_cs *engine;
+	struct pm_qos_request qos;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * The premise is that the GPU does change freqency at our behest.
+	 * Let's check there is a correspondence between the requested
+	 * frequency, the actual frequency, and the observed clock rate.
+	 */
+
+	if (!intel_rps_is_enabled(rps))
+		return 0;
+
+	if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
+		return 0;
+
+	if (CPU_LATENCY >= 0)
+		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
+
+	intel_gt_pm_wait_for_idle(gt);
+	saved_work = rps->work.func;
+	rps->work.func = dummy_rps_work;
+
+	for_each_engine(engine, gt, id) {
+		unsigned long saved_heartbeat;
+		struct i915_request *rq;
+		struct i915_vma *vma;
+		u32 *cancel, *cntr;
+		struct {
+			u64 count;
+			int freq;
+		} min, max;
+
+		saved_heartbeat = engine_heartbeat_disable(engine);
+
+		vma = create_spin_counter(engine,
+					  engine->kernel_context->vm, true,
+					  &cancel, &cntr);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			break;
+		}
+
+		rq = intel_engine_create_kernel_request(engine);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_vma;
+		}
+
+		i915_vma_lock(vma);
+		err = i915_request_await_object(rq, vma->obj, false);
+		if (!err)
+			err = i915_vma_move_to_active(vma, rq, 0);
+		if (!err)
+			err = rq->engine->emit_bb_start(rq,
+							vma->node.start,
+							PAGE_SIZE, 0);
+		i915_vma_unlock(vma);
+		i915_request_add(rq);
+		if (err)
+			goto err_vma;
+
+		if (wait_for(READ_ONCE(*cntr), 10)) {
+			pr_err("%s: timed loop did not start\n",
+			       engine->name);
+			goto err_vma;
+		}
+
+		min.freq = rps->min_freq;
+		min.count = measure_frequency_at(rps, cntr, &min.freq);
+
+		max.freq = rps->max_freq;
+		max.count = measure_frequency_at(rps, cntr, &max.freq);
+
+		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
+			engine->name,
+			min.count, intel_gpu_freq(rps, min.freq),
+			max.count, intel_gpu_freq(rps, max.freq),
+			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
+						     max.freq * min.count));
+
+		if (!scaled_within(max.freq * min.count,
+				   min.freq * max.count,
+				   1, 2)) {
+			int f;
+
+			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
+			       engine->name,
+			       max.freq * min.count,
+			       min.freq * max.count);
+			show_pcu_config(rps);
+
+			for (f = min.freq + 1; f <= rps->max_freq; f++) {
+				int act = f;
+				u64 count;
+
+				count = measure_frequency_at(rps, cntr, &act);
+				if (act < f)
+					break;
+
+				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
+					engine->name,
+					act, intel_gpu_freq(rps, act), count,
+					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
+								     act * min.count));
+
+				f = act; /* may skip ahead [pcu granularity] */
+			}
+
+			err = -EINVAL;
+		}
+
+err_vma:
+		*cancel = MI_BATCH_BUFFER_END;
+		i915_gem_object_flush_map(vma->obj);
+		i915_gem_object_unpin_map(vma->obj);
+		i915_vma_unpin(vma);
+		i915_vma_put(vma);
+
+		engine_heartbeat_enable(engine, saved_heartbeat);
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
+	intel_gt_pm_wait_for_idle(gt);
+	rps->work.func = saved_work;
+
+	if (CPU_LATENCY >= 0)
+		cpu_latency_qos_remove_request(&qos);
+
+	return err;
+}
+
+static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
+{
+	/* Flush any previous EI */
+	usleep_range(timeout_us, 2 * timeout_us);
+
+	/* Reset the interrupt status */
+	rps_disable_interrupts(rps);
+	GEM_BUG_ON(rps->pm_iir);
+	rps_enable_interrupts(rps);
+
+	/* And then wait for the timeout, for real this time */
+	usleep_range(2 * timeout_us, 3 * timeout_us);
+}
+
+static int __rps_up_interrupt(struct intel_rps *rps,
+			      struct intel_engine_cs *engine,
+			      struct igt_spinner *spin)
+{
+	struct intel_uncore *uncore = engine->uncore;
+	struct i915_request *rq;
+	u32 timeout;
+
+	if (!intel_engine_can_store_dword(engine))
+		return 0;
+
+	rps_set_check(rps, rps->min_freq);
+
+	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	if (!igt_wait_for_spinner(spin, rq)) {
+		pr_err("%s: RPS spinner did not start\n",
+		       engine->name);
+		i915_request_put(rq);
+		intel_gt_set_wedged(engine->gt);
+		return -EIO;
+	}
+
+	if (!intel_rps_is_active(rps)) {
+		pr_err("%s: RPS not enabled on starting spinner\n",
+		       engine->name);
+		igt_spinner_end(spin);
+		i915_request_put(rq);
+		return -EINVAL;
+	}
+
+	if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
+		pr_err("%s: RPS did not register UP interrupt\n",
+		       engine->name);
+		i915_request_put(rq);
+		return -EINVAL;
+	}
+
+	if (rps->last_freq != rps->min_freq) {
+		pr_err("%s: RPS did not program min frequency\n",
+		       engine->name);
+		i915_request_put(rq);
+		return -EINVAL;
+	}
+
+	timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
+	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
+	timeout = DIV_ROUND_UP(timeout, 1000);
+
+	sleep_for_ei(rps, timeout);
+	GEM_BUG_ON(i915_request_completed(rq));
+
+	igt_spinner_end(spin);
+	i915_request_put(rq);
+
+	if (rps->cur_freq != rps->min_freq) {
+		pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
+		       engine->name, intel_rps_read_actual_frequency(rps));
+		return -EINVAL;
+	}
+
+	if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
+		pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
+		       engine->name, rps->pm_iir,
+		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
+		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
+		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __rps_down_interrupt(struct intel_rps *rps,
+				struct intel_engine_cs *engine)
+{
+	struct intel_uncore *uncore = engine->uncore;
+	u32 timeout;
+
+	rps_set_check(rps, rps->max_freq);
+
+	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
+		pr_err("%s: RPS did not register DOWN interrupt\n",
+		       engine->name);
+		return -EINVAL;
+	}
+
+	if (rps->last_freq != rps->max_freq) {
+		pr_err("%s: RPS did not program max frequency\n",
+		       engine->name);
+		return -EINVAL;
+	}
+
+	timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
+	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
+	timeout = DIV_ROUND_UP(timeout, 1000);
+
+	sleep_for_ei(rps, timeout);
+
+	if (rps->cur_freq != rps->max_freq) {
+		pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
+		       engine->name,
+		       intel_rps_read_actual_frequency(rps));
+		return -EINVAL;
+	}
+
+	if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
+		pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
+		       engine->name, rps->pm_iir,
+		       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
+		       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
+		       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
+		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
+		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
+		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int live_rps_interrupt(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_rps *rps = &gt->rps;
+	void (*saved_work)(struct work_struct *wrk);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	u32 pm_events;
+	int err = 0;
+
+	/*
+	 * First, let's check whether or not we are receiving interrupts.
+	 */
+
+	if (!intel_rps_has_interrupts(rps))
+		return 0;
+
+	intel_gt_pm_get(gt);
+	pm_events = rps->pm_events;
+	intel_gt_pm_put(gt);
+	if (!pm_events) {
+		pr_err("No RPS PM events registered, but RPS is enabled?\n");
+		return -ENODEV;
+	}
+
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
+
+	intel_gt_pm_wait_for_idle(gt);
+	saved_work = rps->work.func;
+	rps->work.func = dummy_rps_work;
+
+	for_each_engine(engine, gt, id) {
+		/* Keep the engine busy with a spinner; expect an UP! */
+		if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
+			unsigned long saved_heartbeat;
+
+			intel_gt_pm_wait_for_idle(engine->gt);
+			GEM_BUG_ON(intel_rps_is_active(rps));
+
+			saved_heartbeat = engine_heartbeat_disable(engine);
+
+			err = __rps_up_interrupt(rps, engine, &spin);
+
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			if (err)
+				goto out;
+
+			intel_gt_pm_wait_for_idle(engine->gt);
+		}
+
+		/* Keep the engine awake but idle and check for DOWN */
+		if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
+			unsigned long saved_heartbeat;
+
+			saved_heartbeat = engine_heartbeat_disable(engine);
+			intel_rc6_disable(&gt->rc6);
+
+			err = __rps_down_interrupt(rps, engine);
+
+			intel_rc6_enable(&gt->rc6);
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			if (err)
+				goto out;
+		}
+	}
+
+out:
+	if (igt_flush_test(gt->i915))
+		err = -EIO;
+
+	igt_spinner_fini(&spin);
+
+	intel_gt_pm_wait_for_idle(gt);
+	rps->work.func = saved_work;
+
+	return err;
+}
+
+static u64 __measure_power(int duration_ms)
+{
+	u64 dE, dt;
+
+	dt = ktime_get();
+	dE = librapl_energy_uJ();
+	usleep_range(1000 * duration_ms, 2000 * duration_ms);
+	dE = librapl_energy_uJ() - dE;
+	dt = ktime_get() - dt;
+
+	return div64_u64(1000 * 1000 * dE, dt);
+}
+
+static u64 measure_power_at(struct intel_rps *rps, int *freq)
+{
+	u64 x[5];
+	int i;
+
+	*freq = rps_set_check(rps, *freq);
+	for (i = 0; i < 5; i++)
+		x[i] = __measure_power(5);
+	*freq = (*freq + read_cagf(rps)) / 2;
+
+	/* A simple triangle filter for better result stability */
+	sort(x, 5, sizeof(*x), cmp_u64, NULL);
+	return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+int live_rps_power(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_rps *rps = &gt->rps;
+	void (*saved_work)(struct work_struct *wrk);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = 0;
+
+	/*
+	 * Our fundamental assumption is that running at lower frequency
+	 * actually saves power. Let's see if our RAPL measurement support
+	 * that theory.
+	 */
+
+	if (!intel_rps_is_enabled(rps))
+		return 0;
+
+	if (!librapl_energy_uJ())
+		return 0;
+
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
+
+	intel_gt_pm_wait_for_idle(gt);
+	saved_work = rps->work.func;
+	rps->work.func = dummy_rps_work;
+
+	for_each_engine(engine, gt, id) {
+		unsigned long saved_heartbeat;
+		struct i915_request *rq;
+		struct {
+			u64 power;
+			int freq;
+		} min, max;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		saved_heartbeat = engine_heartbeat_disable(engine);
+
+		rq = igt_spinner_create_request(&spin,
+						engine->kernel_context,
+						MI_NOOP);
+		if (IS_ERR(rq)) {
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+
+		if (!igt_wait_for_spinner(&spin, rq)) {
+			pr_err("%s: RPS spinner did not start\n",
+			       engine->name);
+			igt_spinner_end(&spin);
+			engine_heartbeat_enable(engine, saved_heartbeat);
+			intel_gt_set_wedged(engine->gt);
+			err = -EIO;
+			break;
+		}
+
+		max.freq = rps->max_freq;
+		max.power = measure_power_at(rps, &max.freq);
+
+		min.freq = rps->min_freq;
+		min.power = measure_power_at(rps, &min.freq);
+
+		igt_spinner_end(&spin);
+		engine_heartbeat_enable(engine, saved_heartbeat);
+
+		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
+			engine->name,
+			min.power, intel_gpu_freq(rps, min.freq),
+			max.power, intel_gpu_freq(rps, max.freq));
+
+		if (10 * min.freq >= 9 * max.freq) {
+			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
+				  min.freq, intel_gpu_freq(rps, min.freq),
+				  max.freq, intel_gpu_freq(rps, max.freq));
+			continue;
+		}
+
+		if (11 * min.power > 10 * max.power) {
+			pr_err("%s: did not conserve power when setting lower frequency!\n",
+			       engine->name);
+			err = -EINVAL;
+			break;
+		}
+
+		if (igt_flush_test(gt->i915)) {
+			err = -EIO;
+			break;
+		}
+	}
+
+	igt_spinner_fini(&spin);
+
+	intel_gt_pm_wait_for_idle(gt);
+	rps->work.func = saved_work;
+
+	return err;
+}
+
+int live_rps_dynamic(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_rps *rps = &gt->rps;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = 0;
+
+	/*
+	 * We've looked at the bascs, and have established that we
+	 * can change the clock frequency and that the HW will generate
+	 * interrupts based on load. Now we check how we integrate those
+	 * moving parts into dynamic reclocking based on load.
+	 */
+
+	if (!intel_rps_is_enabled(rps))
+		return 0;
+
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
+
+	for_each_engine(engine, gt, id) {
+		struct i915_request *rq;
+		struct {
+			ktime_t dt;
+			u8 freq;
+		} min, max;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		intel_gt_pm_wait_for_idle(gt);
+		GEM_BUG_ON(intel_rps_is_active(rps));
+		rps->cur_freq = rps->min_freq;
+
+		intel_engine_pm_get(engine);
+		intel_rc6_disable(&gt->rc6);
+		GEM_BUG_ON(rps->last_freq != rps->min_freq);
+
+		rq = igt_spinner_create_request(&spin,
+						engine->kernel_context,
+						MI_NOOP);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err;
+		}
+
+		i915_request_add(rq);
+
+		max.dt = ktime_get();
+		max.freq = wait_for_freq(rps, rps->max_freq, 500);
+		max.dt = ktime_sub(ktime_get(), max.dt);
+
+		igt_spinner_end(&spin);
+
+		min.dt = ktime_get();
+		min.freq = wait_for_freq(rps, rps->min_freq, 2000);
+		min.dt = ktime_sub(ktime_get(), min.dt);
+
+		pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
+			engine->name,
+			max.freq, intel_gpu_freq(rps, max.freq),
+			ktime_to_ns(max.dt),
+			min.freq, intel_gpu_freq(rps, min.freq),
+			ktime_to_ns(min.dt));
+		if (min.freq >= max.freq) {
+			pr_err("%s: dynamic reclocking of spinner failed\n!",
+			       engine->name);
+			err = -EINVAL;
+		}
+
+err:
+		intel_rc6_enable(&gt->rc6);
+		intel_engine_pm_put(engine);
+
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
+	igt_spinner_fini(&spin);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
new file mode 100644
index 000000000000..6e82a631cfa1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef SELFTEST_RPS_H
+#define SELFTEST_RPS_H
+
+int live_rps_control(void *arg);
+int live_rps_clock_interval(void *arg);
+int live_rps_frequency_cs(void *arg);
+int live_rps_frequency_srm(void *arg);
+int live_rps_power(void *arg);
+int live_rps_interrupt(void *arg);
+int live_rps_dynamic(void *arg);
+
+#endif /* SELFTEST_RPS_H */
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
new file mode 100644
index 000000000000..43c7acbdc79d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/shmem_fs.h>
+
+#include "gem/i915_gem_object.h"
+#include "shmem_utils.h"
+
+struct file *shmem_create_from_data(const char *name, void *data, size_t len)
+{
+	struct file *file;
+	int err;
+
+	file = shmem_file_setup(name, PAGE_ALIGN(len), VM_NORESERVE);
+	if (IS_ERR(file))
+		return file;
+
+	err = shmem_write(file, 0, data, len);
+	if (err) {
+		fput(file);
+		return ERR_PTR(err);
+	}
+
+	return file;
+}
+
+struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
+{
+	struct file *file;
+	void *ptr;
+
+	if (obj->ops == &i915_gem_shmem_ops) {
+		file = obj->base.filp;
+		atomic_long_inc(&file->f_count);
+		return file;
+	}
+
+	ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(ptr))
+		return ERR_CAST(ptr);
+
+	file = shmem_create_from_data("", ptr, obj->base.size);
+	i915_gem_object_unpin_map(obj);
+
+	return file;
+}
+
+static size_t shmem_npte(struct file *file)
+{
+	return file->f_mapping->host->i_size >> PAGE_SHIFT;
+}
+
+static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
+{
+	unsigned long pfn;
+
+	vunmap(ptr);
+
+	for (pfn = 0; pfn < n_pte; pfn++) {
+		struct page *page;
+
+		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
+						   GFP_KERNEL);
+		if (!WARN_ON(IS_ERR(page))) {
+			put_page(page);
+			put_page(page);
+		}
+	}
+}
+
+void *shmem_pin_map(struct file *file)
+{
+	const size_t n_pte = shmem_npte(file);
+	pte_t *stack[32], **ptes, **mem;
+	struct vm_struct *area;
+	unsigned long pfn;
+
+	mem = stack;
+	if (n_pte > ARRAY_SIZE(stack)) {
+		mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
+		if (!mem)
+			return NULL;
+	}
+
+	area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
+	if (!area) {
+		if (mem != stack)
+			kvfree(mem);
+		return NULL;
+	}
+
+	ptes = mem;
+	for (pfn = 0; pfn < n_pte; pfn++) {
+		struct page *page;
+
+		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
+						   GFP_KERNEL);
+		if (IS_ERR(page))
+			goto err_page;
+
+		**ptes++ = mk_pte(page,  PAGE_KERNEL);
+	}
+
+	if (mem != stack)
+		kvfree(mem);
+
+	mapping_set_unevictable(file->f_mapping);
+	return area->addr;
+
+err_page:
+	if (mem != stack)
+		kvfree(mem);
+
+	__shmem_unpin_map(file, area->addr, pfn);
+	return NULL;
+}
+
+void shmem_unpin_map(struct file *file, void *ptr)
+{
+	mapping_clear_unevictable(file->f_mapping);
+	__shmem_unpin_map(file, ptr, shmem_npte(file));
+}
+
+static int __shmem_rw(struct file *file, loff_t off,
+		      void *ptr, size_t len,
+		      bool write)
+{
+	unsigned long pfn;
+
+	for (pfn = off >> PAGE_SHIFT; len; pfn++) {
+		unsigned int this =
+			min_t(size_t, PAGE_SIZE - offset_in_page(off), len);
+		struct page *page;
+		void *vaddr;
+
+		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
+						   GFP_KERNEL);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		vaddr = kmap(page);
+		if (write)
+			memcpy(vaddr + offset_in_page(off), ptr, this);
+		else
+			memcpy(ptr, vaddr + offset_in_page(off), this);
+		kunmap(page);
+		put_page(page);
+
+		len -= this;
+		ptr += this;
+		off = 0;
+	}
+
+	return 0;
+}
+
+int shmem_read(struct file *file, loff_t off, void *dst, size_t len)
+{
+	return __shmem_rw(file, off, dst, len, false);
+}
+
+int shmem_write(struct file *file, loff_t off, void *src, size_t len)
+{
+	return __shmem_rw(file, off, src, len, true);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "st_shmem_utils.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.h b/drivers/gpu/drm/i915/gt/shmem_utils.h
new file mode 100644
index 000000000000..c1669170c351
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef SHMEM_UTILS_H
+#define SHMEM_UTILS_H
+
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct file;
+
+struct file *shmem_create_from_data(const char *name, void *data, size_t len);
+struct file *shmem_create_from_object(struct drm_i915_gem_object *obj);
+
+void *shmem_pin_map(struct file *file);
+void shmem_unpin_map(struct file *file, void *ptr);
+
+int shmem_read(struct file *file, loff_t off, void *dst, size_t len);
+int shmem_write(struct file *file, loff_t off, void *src, size_t len);
+
+#endif /* SHMEM_UTILS_H */
diff --git a/drivers/gpu/drm/i915/gt/st_shmem_utils.c b/drivers/gpu/drm/i915/gt/st_shmem_utils.c
new file mode 100644
index 000000000000..b279fe88b70e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/st_shmem_utils.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+/* Just a quick and causal check of the shmem_utils API */
+
+static int igt_shmem_basic(void *ignored)
+{
+	u32 datum = 0xdeadbeef, result;
+	struct file *file;
+	u32 *map;
+	int err;
+
+	file = shmem_create_from_data("mock", &datum, sizeof(datum));
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	result = 0;
+	err = shmem_read(file, 0, &result, sizeof(result));
+	if (err)
+		goto out_file;
+
+	if (result != datum) {
+		pr_err("Incorrect read back from shmemfs: %x != %x\n",
+		       result, datum);
+		err = -EINVAL;
+		goto out_file;
+	}
+
+	result = 0xc0ffee;
+	err = shmem_write(file, 0, &result, sizeof(result));
+	if (err)
+		goto out_file;
+
+	map = shmem_pin_map(file);
+	if (!map) {
+		err = -ENOMEM;
+		goto out_file;
+	}
+
+	if (*map != result) {
+		pr_err("Incorrect read back via mmap of last write: %x != %x\n",
+		       *map, result);
+		err = -EINVAL;
+		goto out_map;
+	}
+
+out_map:
+	shmem_unpin_map(file, map);
+out_file:
+	fput(file);
+	return err;
+}
+
+int shmem_utils_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_shmem_basic),
+	};
+
+	return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 8f9b2f33dbaf..535cc1169e54 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -192,6 +192,17 @@ static struct kobj_attribute max_spin_attr =
 __ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store);
 
 static ssize_t
+max_spin_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->defaults.max_busywait_duration_ns);
+}
+
+static struct kobj_attribute max_spin_def =
+__ATTR(max_busywait_duration_ns, 0444, max_spin_default, NULL);
+
+static ssize_t
 timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
 		const char *buf, size_t count)
 {
@@ -234,6 +245,17 @@ static struct kobj_attribute timeslice_duration_attr =
 __ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store);
 
 static ssize_t
+timeslice_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->defaults.timeslice_duration_ms);
+}
+
+static struct kobj_attribute timeslice_duration_def =
+__ATTR(timeslice_duration_ms, 0444, timeslice_default, NULL);
+
+static ssize_t
 stop_store(struct kobject *kobj, struct kobj_attribute *attr,
 	   const char *buf, size_t count)
 {
@@ -273,6 +295,17 @@ static struct kobj_attribute stop_timeout_attr =
 __ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
 
 static ssize_t
+stop_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->defaults.stop_timeout_ms);
+}
+
+static struct kobj_attribute stop_timeout_def =
+__ATTR(stop_timeout_ms, 0444, stop_default, NULL);
+
+static ssize_t
 preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
 		      const char *buf, size_t count)
 {
@@ -317,6 +350,18 @@ static struct kobj_attribute preempt_timeout_attr =
 __ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store);
 
 static ssize_t
+preempt_timeout_default(struct kobject *kobj, struct kobj_attribute *attr,
+			char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->defaults.preempt_timeout_ms);
+}
+
+static struct kobj_attribute preempt_timeout_def =
+__ATTR(preempt_timeout_ms, 0444, preempt_timeout_default, NULL);
+
+static ssize_t
 heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
 		const char *buf, size_t count)
 {
@@ -359,6 +404,17 @@ heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 static struct kobj_attribute heartbeat_interval_attr =
 __ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store);
 
+static ssize_t
+heartbeat_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->defaults.heartbeat_interval_ms);
+}
+
+static struct kobj_attribute heartbeat_interval_def =
+__ATTR(heartbeat_interval_ms, 0444, heartbeat_default, NULL);
+
 static void kobj_engine_release(struct kobject *kobj)
 {
 	kfree(kobj);
@@ -390,6 +446,42 @@ kobj_engine(struct kobject *dir, struct intel_engine_cs *engine)
 	return &ke->base;
 }
 
+static void add_defaults(struct kobj_engine *parent)
+{
+	static const struct attribute *files[] = {
+		&max_spin_def.attr,
+		&stop_timeout_def.attr,
+#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
+		&heartbeat_interval_def.attr,
+#endif
+		NULL
+	};
+	struct kobj_engine *ke;
+
+	ke = kzalloc(sizeof(*ke), GFP_KERNEL);
+	if (!ke)
+		return;
+
+	kobject_init(&ke->base, &kobj_engine_type);
+	ke->engine = parent->engine;
+
+	if (kobject_add(&ke->base, &parent->base, "%s", ".defaults")) {
+		kobject_put(&ke->base);
+		return;
+	}
+
+	if (sysfs_create_files(&ke->base, files))
+		return;
+
+	if (intel_engine_has_timeslices(ke->engine) &&
+	    sysfs_create_file(&ke->base, &timeslice_duration_def.attr))
+		return;
+
+	if (intel_engine_has_preempt_reset(ke->engine) &&
+	    sysfs_create_file(&ke->base, &preempt_timeout_def.attr))
+		return;
+}
+
 void intel_engines_add_sysfs(struct drm_i915_private *i915)
 {
 	static const struct attribute *files[] = {
@@ -433,6 +525,8 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
 		    sysfs_create_file(kobj, &preempt_timeout_attr.attr))
 			goto err_engine;
 
+		add_defaults(container_of(kobj, struct kobj_engine, base));
+
 		if (0) {
 err_object:
 			kobject_put(kobj);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 819f09ef51fc..861657897c0f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -169,7 +169,7 @@ void intel_guc_init_early(struct intel_guc *guc)
 {
 	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
 
-	intel_guc_fw_init_early(guc);
+	intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC);
 	intel_guc_ct_init_early(&guc->ct);
 	intel_guc_log_init_early(&guc->log);
 	intel_guc_submission_init_early(guc);
@@ -723,3 +723,47 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
 
 	return 0;
 }
+
+/**
+ * intel_guc_load_status - dump information about GuC load status
+ * @guc: the GuC
+ * @p: the &drm_printer
+ *
+ * Pretty printer for GuC load status.
+ */
+void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+	struct intel_uncore *uncore = gt->uncore;
+	intel_wakeref_t wakeref;
+
+	if (!intel_guc_is_supported(guc)) {
+		drm_printf(p, "GuC not supported\n");
+		return;
+	}
+
+	if (!intel_guc_is_wanted(guc)) {
+		drm_printf(p, "GuC disabled\n");
+		return;
+	}
+
+	intel_uc_fw_dump(&guc->fw, p);
+
+	with_intel_runtime_pm(uncore->rpm, wakeref) {
+		u32 status = intel_uncore_read(uncore, GUC_STATUS);
+		u32 i;
+
+		drm_printf(p, "\nGuC status 0x%08x:\n", status);
+		drm_printf(p, "\tBootrom status = 0x%x\n",
+			   (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
+		drm_printf(p, "\tuKernel status = 0x%x\n",
+			   (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
+		drm_printf(p, "\tMIA Core status = 0x%x\n",
+			   (status & GS_MIA_MASK) >> GS_MIA_SHIFT);
+		drm_puts(p, "\nScratch registers:\n");
+		for (i = 0; i < 16; i++) {
+			drm_printf(p, "\t%2d: \t0x%x\n",
+				   i, intel_uncore_read(uncore, SOFT_SCRATCH(i)));
+		}
+	}
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 4594ccbeaa34..e84ab67b317d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -74,6 +74,11 @@ struct intel_guc {
 	struct mutex send_mutex;
 };
 
+static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
+{
+	return container_of(log, struct intel_guc, log);
+}
+
 static
 inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
 {
@@ -190,4 +195,6 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
 int intel_guc_reset_engine(struct intel_guc *guc,
 			   struct intel_engine_cs *engine);
 
+void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
new file mode 100644
index 000000000000..fe7cb7b29a1e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "gt/debugfs_gt.h"
+#include "intel_guc.h"
+#include "intel_guc_debugfs.h"
+#include "intel_guc_log_debugfs.h"
+
+static int guc_info_show(struct seq_file *m, void *data)
+{
+	struct intel_guc *guc = m->private;
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	if (!intel_guc_is_supported(guc))
+		return -ENODEV;
+
+	intel_guc_load_status(guc, &p);
+	drm_puts(&p, "\n");
+	intel_guc_log_info(&guc->log, &p);
+
+	/* Add more as required ... */
+
+	return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info);
+
+void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
+{
+	static const struct debugfs_gt_file files[] = {
+		{ "guc_info", &guc_info_fops, NULL },
+	};
+
+	if (!intel_guc_is_supported(guc))
+		return;
+
+	intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), guc);
+	intel_guc_log_debugfs_register(&guc->log, root);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h
new file mode 100644
index 000000000000..424c26665cf1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GUC_H
+#define DEBUGFS_GUC_H
+
+struct intel_guc;
+struct dentry;
+
+void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root);
+
+#endif /* DEBUGFS_GUC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 3a1c47d600ea..d4a87f4c9421 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -13,20 +13,6 @@
 #include "intel_guc_fw.h"
 #include "i915_drv.h"
 
-/**
- * intel_guc_fw_init_early() - initializes GuC firmware struct
- * @guc: intel_guc struct
- *
- * On platforms with GuC selects firmware for uploading
- */
-void intel_guc_fw_init_early(struct intel_guc *guc)
-{
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
-
-	intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, HAS_GT_UC(i915),
-			       INTEL_INFO(i915)->platform, INTEL_REVID(i915));
-}
-
 static void guc_prepare_xfer(struct intel_uncore *uncore)
 {
 	u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES |
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
index b5ab639d7259..0b4d2a9c9435 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
@@ -8,7 +8,6 @@
 
 struct intel_guc;
 
-void intel_guc_fw_init_early(struct intel_guc *guc);
 int intel_guc_fw_upload(struct intel_guc *guc);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index caed0d57e704..fb10f3597ea5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -55,11 +55,6 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable,
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
-static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
-{
-	return container_of(log, struct intel_guc, log);
-}
-
 static void guc_log_enable_flush_events(struct intel_guc_log *log)
 {
 	intel_guc_enable_msg(log_to_guc(log),
@@ -672,3 +667,95 @@ void intel_guc_log_handle_flush_event(struct intel_guc_log *log)
 {
 	queue_work(system_highpri_wq, &log->relay.flush_work);
 }
+
+static const char *
+stringify_guc_log_type(enum guc_log_buffer_type type)
+{
+	switch (type) {
+	case GUC_ISR_LOG_BUFFER:
+		return "ISR";
+	case GUC_DPC_LOG_BUFFER:
+		return "DPC";
+	case GUC_CRASH_DUMP_LOG_BUFFER:
+		return "CRASH";
+	default:
+		MISSING_CASE(type);
+	}
+
+	return "";
+}
+
+/**
+ * intel_guc_log_info - dump information about GuC log relay
+ * @log: the GuC log
+ * @p: the &drm_printer
+ *
+ * Pretty printer for GuC log info
+ */
+void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p)
+{
+	enum guc_log_buffer_type type;
+
+	if (!intel_guc_log_relay_created(log)) {
+		drm_puts(p, "GuC log relay not created\n");
+		return;
+	}
+
+	drm_puts(p, "GuC logging stats:\n");
+
+	drm_printf(p, "\tRelay full count: %u\n", log->relay.full_count);
+
+	for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
+		drm_printf(p, "\t%s:\tflush count %10u, overflow count %10u\n",
+			   stringify_guc_log_type(type),
+			   log->stats[type].flush,
+			   log->stats[type].sampled_overflow);
+	}
+}
+
+/**
+ * intel_guc_log_dump - dump the contents of the GuC log
+ * @log: the GuC log
+ * @p: the &drm_printer
+ * @dump_load_err: dump the log saved on GuC load error
+ *
+ * Pretty printer for the GuC log
+ */
+int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
+		       bool dump_load_err)
+{
+	struct intel_guc *guc = log_to_guc(log);
+	struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
+	struct drm_i915_gem_object *obj = NULL;
+	u32 *map;
+	int i = 0;
+
+	if (!intel_guc_is_supported(guc))
+		return -ENODEV;
+
+	if (dump_load_err)
+		obj = uc->load_err_log;
+	else if (guc->log.vma)
+		obj = guc->log.vma->obj;
+
+	if (!obj)
+		return 0;
+
+	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(map)) {
+		DRM_DEBUG("Failed to pin object\n");
+		drm_puts(p, "(log data unaccessible)\n");
+		return PTR_ERR(map);
+	}
+
+	for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+		drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+			   *(map + i), *(map + i + 1),
+			   *(map + i + 2), *(map + i + 3));
+
+	drm_puts(p, "\n");
+
+	i915_gem_object_unpin_map(obj);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index c252c022c5fc..11fccd0b2294 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -79,4 +79,8 @@ static inline u32 intel_guc_log_get_level(struct intel_guc_log *log)
 	return log->level;
 }
 
+void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p);
+int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
+		       bool dump_load_err);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
new file mode 100644
index 000000000000..129e0cf7dfe2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/fs.h>
+#include <drm/drm_print.h>
+
+#include "gt/debugfs_gt.h"
+#include "intel_guc.h"
+#include "intel_guc_log.h"
+#include "intel_guc_log_debugfs.h"
+
+static int guc_log_dump_show(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	return intel_guc_log_dump(m->private, &p, false);
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_log_dump);
+
+static int guc_load_err_log_dump_show(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	return intel_guc_log_dump(m->private, &p, true);
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump);
+
+static int guc_log_level_get(void *data, u64 *val)
+{
+	struct intel_guc_log *log = data;
+
+	if (!intel_guc_is_used(log_to_guc(log)))
+		return -ENODEV;
+
+	*val = intel_guc_log_get_level(log);
+
+	return 0;
+}
+
+static int guc_log_level_set(void *data, u64 val)
+{
+	struct intel_guc_log *log = data;
+
+	if (!intel_guc_is_used(log_to_guc(log)))
+		return -ENODEV;
+
+	return intel_guc_log_set_level(log, val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(guc_log_level_fops,
+			guc_log_level_get, guc_log_level_set,
+			"%lld\n");
+
+static int guc_log_relay_open(struct inode *inode, struct file *file)
+{
+	struct intel_guc_log *log = inode->i_private;
+
+	if (!intel_guc_is_ready(log_to_guc(log)))
+		return -ENODEV;
+
+	file->private_data = log;
+
+	return intel_guc_log_relay_open(log);
+}
+
+static ssize_t
+guc_log_relay_write(struct file *filp,
+		    const char __user *ubuf,
+		    size_t cnt,
+		    loff_t *ppos)
+{
+	struct intel_guc_log *log = filp->private_data;
+	int val;
+	int ret;
+
+	ret = kstrtoint_from_user(ubuf, cnt, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Enable and start the guc log relay on value of 1.
+	 * Flush log relay for any other value.
+	 */
+	if (val == 1)
+		ret = intel_guc_log_relay_start(log);
+	else
+		intel_guc_log_relay_flush(log);
+
+	return ret ?: cnt;
+}
+
+static int guc_log_relay_release(struct inode *inode, struct file *file)
+{
+	struct intel_guc_log *log = inode->i_private;
+
+	intel_guc_log_relay_close(log);
+	return 0;
+}
+
+static const struct file_operations guc_log_relay_fops = {
+	.owner = THIS_MODULE,
+	.open = guc_log_relay_open,
+	.write = guc_log_relay_write,
+	.release = guc_log_relay_release,
+};
+
+void intel_guc_log_debugfs_register(struct intel_guc_log *log,
+				    struct dentry *root)
+{
+	static const struct debugfs_gt_file files[] = {
+		{ "guc_log_dump", &guc_log_dump_fops, NULL },
+		{ "guc_load_err_log_dump", &guc_load_err_log_dump_fops, NULL },
+		{ "guc_log_level", &guc_log_level_fops, NULL },
+		{ "guc_log_relay", &guc_log_relay_fops, NULL },
+	};
+
+	if (!intel_guc_is_supported(log_to_guc(log)))
+		return;
+
+	intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), log);
+}
+
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h
new file mode 100644
index 000000000000..e8900e3d74ea
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GUC_LOG_H
+#define DEBUGFS_GUC_LOG_H
+
+struct intel_guc_log;
+struct dentry;
+
+void intel_guc_log_debugfs_register(struct intel_guc_log *log,
+				    struct dentry *root);
+
+#endif /* DEBUGFS_GUC_LOG_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index aa6d56e25a10..94eb63f309ce 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -258,7 +258,7 @@ static void guc_submit(struct intel_engine_cs *engine,
 
 static inline int rq_prio(const struct i915_request *rq)
 {
-	return rq->sched.attr.priority | __NO_PREEMPTION;
+	return rq->sched.attr.priority;
 }
 
 static struct i915_request *schedule_in(struct i915_request *rq, int idx)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index a74b65694512..65eeb44b397d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -41,7 +41,7 @@ void intel_huc_init_early(struct intel_huc *huc)
 {
 	struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
 
-	intel_huc_fw_init_early(huc);
+	intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC);
 
 	if (INTEL_GEN(i915) >= 11) {
 		huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO;
@@ -200,9 +200,13 @@ fail:
  * This function reads status register to verify if HuC
  * firmware was successfully loaded.
  *
- * Returns: 1 if HuC firmware is loaded and verified,
- * 0 if HuC firmware is not loaded and -ENODEV if HuC
- * is not present on this platform.
+ * Returns:
+ *  * -ENODEV if HuC is not present on this platform,
+ *  * -EOPNOTSUPP if HuC firmware is disabled,
+ *  * -ENOPKG if HuC firmware was not installed,
+ *  * -ENOEXEC if HuC firmware is invalid or mismatched,
+ *  * 0 if HuC firmware is not running,
+ *  * 1 if HuC firmware is authenticated and running.
  */
 int intel_huc_check_status(struct intel_huc *huc)
 {
@@ -210,11 +214,50 @@ int intel_huc_check_status(struct intel_huc *huc)
 	intel_wakeref_t wakeref;
 	u32 status = 0;
 
-	if (!intel_huc_is_supported(huc))
+	switch (__intel_uc_fw_status(&huc->fw)) {
+	case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
 		return -ENODEV;
+	case INTEL_UC_FIRMWARE_DISABLED:
+		return -EOPNOTSUPP;
+	case INTEL_UC_FIRMWARE_MISSING:
+		return -ENOPKG;
+	case INTEL_UC_FIRMWARE_ERROR:
+		return -ENOEXEC;
+	default:
+		break;
+	}
 
 	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
 		status = intel_uncore_read(gt->uncore, huc->status.reg);
 
 	return (status & huc->status.mask) == huc->status.value;
 }
+
+/**
+ * intel_huc_load_status - dump information about HuC load status
+ * @huc: the HuC
+ * @p: the &drm_printer
+ *
+ * Pretty printer for HuC load status.
+ */
+void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p)
+{
+	struct intel_gt *gt = huc_to_gt(huc);
+	intel_wakeref_t wakeref;
+
+	if (!intel_huc_is_supported(huc)) {
+		drm_printf(p, "HuC not supported\n");
+		return;
+	}
+
+	if (!intel_huc_is_wanted(huc)) {
+		drm_printf(p, "HuC disabled\n");
+		return;
+	}
+
+	intel_uc_fw_dump(&huc->fw, p);
+
+	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+		drm_printf(p, "HuC status: 0x%08x\n",
+			   intel_uncore_read(gt->uncore, huc->status.reg));
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index a40b9cfc6c22..daee43b661d4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -57,4 +57,6 @@ static inline bool intel_huc_is_authenticated(struct intel_huc *huc)
 	return intel_uc_fw_is_running(&huc->fw);
 }
 
+void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
new file mode 100644
index 000000000000..5733c15fd123
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "gt/debugfs_gt.h"
+#include "intel_huc.h"
+#include "intel_huc_debugfs.h"
+
+static int huc_info_show(struct seq_file *m, void *data)
+{
+	struct intel_huc *huc = m->private;
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	if (!intel_huc_is_supported(huc))
+		return -ENODEV;
+
+	intel_huc_load_status(huc, &p);
+
+	return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(huc_info);
+
+void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root)
+{
+	static const struct debugfs_gt_file files[] = {
+		{ "huc_info", &huc_info_fops, NULL },
+	};
+
+	if (!intel_huc_is_supported(huc))
+		return;
+
+	intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), huc);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h
new file mode 100644
index 000000000000..be79e992f976
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_HUC_H
+#define DEBUGFS_HUC_H
+
+struct intel_huc;
+struct dentry;
+
+void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root);
+
+#endif /* DEBUGFS_HUC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index 9cdf4cbe691c..e5ef509c70e8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -8,23 +8,6 @@
 #include "i915_drv.h"
 
 /**
- * intel_huc_fw_init_early() - initializes HuC firmware struct
- * @huc: intel_huc struct
- *
- * On platforms with HuC selects firmware for uploading
- */
-void intel_huc_fw_init_early(struct intel_huc *huc)
-{
-	struct intel_gt *gt = huc_to_gt(huc);
-	struct intel_uc *uc = &gt->uc;
-	struct drm_i915_private *i915 = gt->i915;
-
-	intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC,
-			       intel_uc_wants_guc(uc),
-			       INTEL_INFO(i915)->platform, INTEL_REVID(i915));
-}
-
-/**
  * intel_huc_fw_upload() - load HuC uCode to device
  * @huc: intel_huc structure
  *
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
index b791269ce923..12f264ee3e0b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
@@ -8,7 +8,6 @@
 
 struct intel_huc;
 
-void intel_huc_fw_init_early(struct intel_huc *huc);
 int intel_huc_fw_upload(struct intel_huc *huc);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index a4cbe06e06bd..f518fe05c6f9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -45,12 +45,12 @@ static void __confirm_options(struct intel_uc *uc)
 {
 	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
 
-	DRM_DEV_DEBUG_DRIVER(i915->drm.dev,
-			     "enable_guc=%d (guc:%s submission:%s huc:%s)\n",
-			     i915_modparams.enable_guc,
-			     yesno(intel_uc_wants_guc(uc)),
-			     yesno(intel_uc_wants_guc_submission(uc)),
-			     yesno(intel_uc_wants_huc(uc)));
+	drm_dbg(&i915->drm,
+		"enable_guc=%d (guc:%s submission:%s huc:%s)\n",
+		i915_modparams.enable_guc,
+		yesno(intel_uc_wants_guc(uc)),
+		yesno(intel_uc_wants_guc_submission(uc)),
+		yesno(intel_uc_wants_huc(uc)));
 
 	if (i915_modparams.enable_guc == -1)
 		return;
@@ -63,25 +63,25 @@ static void __confirm_options(struct intel_uc *uc)
 	}
 
 	if (!intel_uc_supports_guc(uc))
-		dev_info(i915->drm.dev,
+		drm_info(&i915->drm,
 			 "Incompatible option enable_guc=%d - %s\n",
 			 i915_modparams.enable_guc, "GuC is not supported!");
 
 	if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC &&
 	    !intel_uc_supports_huc(uc))
-		dev_info(i915->drm.dev,
+		drm_info(&i915->drm,
 			 "Incompatible option enable_guc=%d - %s\n",
 			 i915_modparams.enable_guc, "HuC is not supported!");
 
 	if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION &&
 	    !intel_uc_supports_guc_submission(uc))
-		dev_info(i915->drm.dev,
+		drm_info(&i915->drm,
 			 "Incompatible option enable_guc=%d - %s\n",
 			 i915_modparams.enable_guc, "GuC submission is N/A");
 
 	if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION |
 					  ENABLE_GUC_LOAD_HUC))
-		dev_info(i915->drm.dev,
+		drm_info(&i915->drm,
 			 "Incompatible option enable_guc=%d - %s\n",
 			 i915_modparams.enable_guc, "undocumented flag");
 }
@@ -131,6 +131,13 @@ static void __uc_free_load_err_log(struct intel_uc *uc)
 		i915_gem_object_put(log);
 }
 
+void intel_uc_driver_remove(struct intel_uc *uc)
+{
+	intel_uc_fini_hw(uc);
+	intel_uc_fini(uc);
+	__uc_free_load_err_log(uc);
+}
+
 static inline bool guc_communication_enabled(struct intel_guc *guc)
 {
 	return intel_guc_ct_enabled(&guc->ct);
@@ -311,8 +318,6 @@ static void __uc_fini(struct intel_uc *uc)
 {
 	intel_huc_fini(&uc->huc);
 	intel_guc_fini(&uc->guc);
-
-	__uc_free_load_err_log(uc);
 }
 
 static int __uc_sanitize(struct intel_uc *uc)
@@ -475,14 +480,14 @@ static int __uc_init_hw(struct intel_uc *uc)
 	if (intel_uc_uses_guc_submission(uc))
 		intel_guc_submission_enable(guc);
 
-	dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
+	drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
 		 intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
 		 guc->fw.major_ver_found, guc->fw.minor_ver_found,
 		 "submission",
 		 enableddisabled(intel_uc_uses_guc_submission(uc)));
 
 	if (intel_uc_uses_huc(uc)) {
-		dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
+		drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
 			 intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
 			 huc->fw.path,
 			 huc->fw.major_ver_found, huc->fw.minor_ver_found,
@@ -503,7 +508,7 @@ err_out:
 	__uc_sanitize(uc);
 
 	if (!ret) {
-		dev_notice(i915->drm.dev, "GuC is uninitialized\n");
+		drm_notice(&i915->drm, "GuC is uninitialized\n");
 		/* We want to run without GuC submission */
 		return 0;
 	}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 5ae7b50b7dc1..9c954c589edf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -34,6 +34,7 @@ struct intel_uc {
 
 void intel_uc_init_early(struct intel_uc *uc);
 void intel_uc_driver_late_release(struct intel_uc *uc);
+void intel_uc_driver_remove(struct intel_uc *uc);
 void intel_uc_init_mmio(struct intel_uc *uc);
 void intel_uc_reset_prepare(struct intel_uc *uc);
 void intel_uc_suspend(struct intel_uc *uc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
new file mode 100644
index 000000000000..9d16b784aa0d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include "intel_guc_debugfs.h"
+#include "intel_huc_debugfs.h"
+#include "intel_uc.h"
+#include "intel_uc_debugfs.h"
+
+void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root)
+{
+	struct dentry *root;
+
+	if (!gt_root)
+		return;
+
+	/* GuC and HuC go always in pair, no need to check both */
+	if (!intel_uc_supports_guc(uc))
+		return;
+
+	root = debugfs_create_dir("uc", gt_root);
+	if (IS_ERR(root))
+		return;
+
+	intel_guc_debugfs_register(&uc->guc, root);
+	intel_huc_debugfs_register(&uc->huc, root);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h
new file mode 100644
index 000000000000..010ce250d223
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_UC_H
+#define DEBUGFS_UC_H
+
+struct intel_uc;
+struct dentry;
+
+void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root);
+
+#endif /* DEBUGFS_UC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 18c755203688..e1caae93996d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -11,26 +11,32 @@
 #include "intel_uc_fw_abi.h"
 #include "i915_drv.h"
 
-static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
+static inline struct intel_gt *
+____uc_fw_to_gt(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type)
 {
-	GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
-	if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
+	if (type == INTEL_UC_FW_TYPE_GUC)
 		return container_of(uc_fw, struct intel_gt, uc.guc.fw);
 
-	GEM_BUG_ON(uc_fw->type != INTEL_UC_FW_TYPE_HUC);
+	GEM_BUG_ON(type != INTEL_UC_FW_TYPE_HUC);
 	return container_of(uc_fw, struct intel_gt, uc.huc.fw);
 }
 
+static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
+{
+	GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
+	return ____uc_fw_to_gt(uc_fw, uc_fw->type);
+}
+
 #ifdef CONFIG_DRM_I915_DEBUG_GUC
 void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
 			       enum intel_uc_fw_status status)
 {
 	uc_fw->__status =  status;
-	DRM_DEV_DEBUG_DRIVER(__uc_fw_to_gt(uc_fw)->i915->drm.dev,
-			     "%s firmware -> %s\n",
-			     intel_uc_fw_type_repr(uc_fw->type),
-			     status == INTEL_UC_FIRMWARE_SELECTED ?
-			     uc_fw->path : intel_uc_fw_status_repr(status));
+	drm_dbg(&__uc_fw_to_gt(uc_fw)->i915->drm,
+		"%s firmware -> %s\n",
+		intel_uc_fw_type_repr(uc_fw->type),
+		status == INTEL_UC_FIRMWARE_SELECTED ?
+		uc_fw->path : intel_uc_fw_status_repr(status));
 }
 #endif
 
@@ -187,17 +193,15 @@ static void __uc_fw_user_override(struct intel_uc_fw *uc_fw)
  * intel_uc_fw_init_early - initialize the uC object and select the firmware
  * @uc_fw: uC firmware
  * @type: type of uC
- * @supported: is uC support possible
- * @platform: platform identifier
- * @rev: hardware revision
  *
  * Initialize the state of our uC object and relevant tracking and select the
  * firmware to fetch and load.
  */
 void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
-			    enum intel_uc_fw_type type, bool supported,
-			    enum intel_platform platform, u8 rev)
+			    enum intel_uc_fw_type type)
 {
+	struct drm_i915_private *i915 = ____uc_fw_to_gt(uc_fw, type)->i915;
+
 	/*
 	 * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
 	 * before we're looked at the HW caps to see if we have uc support
@@ -208,8 +212,10 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
 
 	uc_fw->type = type;
 
-	if (supported) {
-		__uc_fw_auto_select(uc_fw, platform, rev);
+	if (HAS_GT_UC(i915)) {
+		__uc_fw_auto_select(uc_fw,
+				    INTEL_INFO(i915)->platform,
+				    INTEL_REVID(i915));
 		__uc_fw_user_override(uc_fw);
 	}
 
@@ -290,7 +296,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 
 	/* Check the size of the blob before examining buffer contents */
 	if (unlikely(fw->size < sizeof(struct uc_css_header))) {
-		dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n",
+		drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			 fw->size, sizeof(struct uc_css_header));
 		err = -ENODATA;
@@ -303,7 +309,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 	size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
 		css->exponent_size_dw) * sizeof(u32);
 	if (unlikely(size != sizeof(struct uc_css_header))) {
-		dev_warn(dev,
+		drm_warn(&i915->drm,
 			 "%s firmware %s: unexpected header size: %zu != %zu\n",
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			 fw->size, sizeof(struct uc_css_header));
@@ -316,7 +322,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 
 	/* now RSA */
 	if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) {
-		dev_warn(dev, "%s firmware %s: unexpected key size: %u != %u\n",
+		drm_warn(&i915->drm, "%s firmware %s: unexpected key size: %u != %u\n",
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			 css->key_size_dw, UOS_RSA_SCRATCH_COUNT);
 		err = -EPROTO;
@@ -327,7 +333,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 	/* At least, it should have header, uCode and RSA. Size of all three. */
 	size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size;
 	if (unlikely(fw->size < size)) {
-		dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n",
+		drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			 fw->size, size);
 		err = -ENOEXEC;
@@ -337,7 +343,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 	/* Sanity check whether this fw is not larger than whole WOPCM memory */
 	size = __intel_uc_fw_get_upload_size(uc_fw);
 	if (unlikely(size >= i915->wopcm.size)) {
-		dev_warn(dev, "%s firmware %s: invalid size: %zu > %zu\n",
+		drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n",
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			 size, (size_t)i915->wopcm.size);
 		err = -E2BIG;
@@ -352,7 +358,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 
 	if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
 	    uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
-		dev_notice(dev, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+		drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
 			   intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			   uc_fw->major_ver_found, uc_fw->minor_ver_found,
 			   uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
@@ -380,9 +386,9 @@ fail:
 				  INTEL_UC_FIRMWARE_MISSING :
 				  INTEL_UC_FIRMWARE_ERROR);
 
-	dev_notice(dev, "%s firmware %s: fetch failed with error %d\n",
+	drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n",
 		   intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
-	dev_info(dev, "%s firmware(s) can be downloaded from %s\n",
+	drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n",
 		 intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
 
 	release_firmware(fw);		/* OK even if fw is NULL */
@@ -467,7 +473,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
 	/* Wait for DMA to finish */
 	ret = intel_wait_for_register_fw(uncore, DMA_CTRL, START_DMA, 0, 100);
 	if (ret)
-		dev_err(gt->i915->drm.dev, "DMA for %s fw failed, DMA_CTRL=%u\n",
+		drm_err(&gt->i915->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
 			intel_uc_fw_type_repr(uc_fw->type),
 			intel_uncore_read_fw(uncore, DMA_CTRL));
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 888ff0de0244..23d3a423ac0f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -239,8 +239,7 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
 }
 
 void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
-			    enum intel_uc_fw_type type, bool supported,
-			    enum intel_platform platform, u8 rev);
+			    enum intel_uc_fw_type type);
 int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
 void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
 int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 8b13f091cee2..0d6d59871308 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -35,7 +35,7 @@
  */
 
 #include "i915_drv.h"
-#include "i915_gem_fence_reg.h"
+#include "gt/intel_ggtt_fencing.h"
 #include "gvt.h"
 
 static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index a3cc080a46c6..8b87f130f7f1 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -882,6 +882,47 @@ static int mocs_cmd_reg_handler(struct parser_exec_state *s,
 	return 0;
 }
 
+static int is_cmd_update_pdps(unsigned int offset,
+			      struct parser_exec_state *s)
+{
+	u32 base = s->workload->engine->mmio_base;
+	return i915_mmio_reg_equal(_MMIO(offset), GEN8_RING_PDP_UDW(base, 0));
+}
+
+static int cmd_pdp_mmio_update_handler(struct parser_exec_state *s,
+				       unsigned int offset, unsigned int index)
+{
+	struct intel_vgpu *vgpu = s->vgpu;
+	struct intel_vgpu_mm *shadow_mm = s->workload->shadow_mm;
+	struct intel_vgpu_mm *mm;
+	u64 pdps[GEN8_3LVL_PDPES];
+
+	if (shadow_mm->ppgtt_mm.root_entry_type ==
+	    GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
+		pdps[0] = (u64)cmd_val(s, 2) << 32;
+		pdps[0] |= cmd_val(s, 4);
+
+		mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
+		if (!mm) {
+			gvt_vgpu_err("failed to get the 4-level shadow vm\n");
+			return -EINVAL;
+		}
+		intel_vgpu_mm_get(mm);
+		list_add_tail(&mm->ppgtt_mm.link,
+			      &s->workload->lri_shadow_mm);
+		*cmd_ptr(s, 2) = upper_32_bits(mm->ppgtt_mm.shadow_pdps[0]);
+		*cmd_ptr(s, 4) = lower_32_bits(mm->ppgtt_mm.shadow_pdps[0]);
+	} else {
+		/* Currently all guests use PML4 table and now can't
+		 * have a guest with 3-level table but uses LRI for
+		 * PPGTT update. So this is simply un-testable. */
+		GEM_BUG_ON(1);
+		gvt_vgpu_err("invalid shared shadow vm type\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int cmd_reg_handler(struct parser_exec_state *s,
 	unsigned int offset, unsigned int index, char *cmd)
 {
@@ -920,6 +961,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 		patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE);
 	}
 
+	if (is_cmd_update_pdps(offset, s) &&
+	    cmd_pdp_mmio_update_handler(s, offset, index))
+		return -EINVAL;
+
 	/* TODO
 	 * In order to let workload with inhibit context to generate
 	 * correct image data into memory, vregs values will be loaded to
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index dd25c3024370..158873f269b1 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -424,8 +424,6 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
 
 	ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
 out:
-	intel_vgpu_unpin_mm(workload->shadow_mm);
-	intel_vgpu_destroy_workload(workload);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 2a4b23f8aa74..210016192ce7 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1900,6 +1900,7 @@ struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
 
 	INIT_LIST_HEAD(&mm->ppgtt_mm.list);
 	INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
+	INIT_LIST_HEAD(&mm->ppgtt_mm.link);
 
 	if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
 		mm->ppgtt_mm.guest_pdps[0] = pdps[0];
@@ -2341,12 +2342,27 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
 {
 	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
 	int ret;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_engine_cs *engine;
+	int i;
 
 	if (bytes != 4 && bytes != 8)
 		return -EINVAL;
 
 	off -= info->gtt_start_offset;
 	ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
+
+	/* if ggtt of last submitted context is written,
+	 * that context is probably got unpinned.
+	 * Set last shadowed ctx to invalid.
+	 */
+	for_each_engine(engine, vgpu->gvt->gt, i) {
+		if (!s->last_ctx[i].valid)
+			continue;
+
+		if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
+			s->last_ctx[i].valid = false;
+	}
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 88789316807d..320b8d6ad92f 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -160,6 +160,7 @@ struct intel_vgpu_mm {
 
 			struct list_head list;
 			struct list_head lru_list;
+			struct list_head link; /* possible LRI shadow mm list */
 		} ppgtt_mm;
 		struct {
 			void *virtual_ggtt;
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index 9e1787867894..c7c561237883 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -31,7 +31,6 @@
  */
 
 #include <linux/types.h>
-#include <xen/xen.h>
 #include <linux/kthread.h>
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 58c2c7932e3f..a4a6db6b7f90 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -163,6 +163,11 @@ struct intel_vgpu_submission {
 	const struct intel_vgpu_submission_ops *ops;
 	int virtual_submission_interface;
 	bool active;
+	struct {
+		u32 lrca;
+		bool valid;
+		u64 ring_context_gpa;
+	} last_ctx[I915_NUM_ENGINES];
 };
 
 struct intel_vgpu {
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 2faf50e1b051..3e88e3b5c43a 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2812,7 +2812,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GAMTARBMODE, D_BDW_PLUS);
 
 #define RING_REG(base) _MMIO((base) + 0x270)
-	MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL);
+	MMIO_RING_F(RING_REG, 32, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL);
 #undef RING_REG
 
 	MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write);
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index b17c4a1599cd..b79da5124f83 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -79,6 +79,4 @@ struct intel_gvt_mpt {
 	bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn);
 };
 
-extern struct intel_gvt_mpt xengt_mpt;
-
 #endif /* _GVT_HYPERCALL_H_ */
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index e92ed96c9b23..0fb1df71c637 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -58,10 +58,8 @@ static void set_context_pdp_root_pointer(
 
 static void update_shadow_pdps(struct intel_vgpu_workload *workload)
 {
-	struct drm_i915_gem_object *ctx_obj =
-		workload->req->context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
-	struct page *page;
+	struct intel_context *ctx = workload->req->context;
 
 	if (WARN_ON(!workload->shadow_mm))
 		return;
@@ -69,11 +67,9 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload)
 	if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount)))
 		return;
 
-	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-	shadow_ring_context = kmap(page);
+	shadow_ring_context = (struct execlist_ring_context *)ctx->lrc_reg_state;
 	set_context_pdp_root_pointer(shadow_ring_context,
 			(void *)workload->shadow_mm->ppgtt_mm.shadow_pdps);
-	kunmap(page);
 }
 
 /*
@@ -128,16 +124,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
-	struct drm_i915_gem_object *ctx_obj =
-		workload->req->context->state->obj;
+	struct intel_context *ctx = workload->req->context;
 	struct execlist_ring_context *shadow_ring_context;
-	struct page *page;
 	void *dst;
+	void *context_base;
 	unsigned long context_gpa, context_page_num;
+	unsigned long gpa_base; /* first gpa of consecutive GPAs */
+	unsigned long gpa_size; /* size of consecutive GPAs */
+	struct intel_vgpu_submission *s = &vgpu->submission;
 	int i;
+	bool skip = false;
+	int ring_id = workload->engine->id;
+
+	GEM_BUG_ON(!intel_context_is_pinned(ctx));
+
+	context_base = (void *) ctx->lrc_reg_state -
+				(LRC_STATE_PN << I915_GTT_PAGE_SHIFT);
 
-	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-	shadow_ring_context = kmap(page);
+	shadow_ring_context = (void *) ctx->lrc_reg_state;
 
 	sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
 #define COPY_REG(name) \
@@ -169,23 +173,43 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 			I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
 
 	sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
-	kunmap(page);
 
-	if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val))
-		return 0;
+	gvt_dbg_sched("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx",
+			workload->engine->name, workload->ctx_desc.lrca,
+			workload->ctx_desc.context_id,
+			workload->ring_context_gpa);
 
-	gvt_dbg_sched("ring %s workload lrca %x",
-		      workload->engine->name,
-		      workload->ctx_desc.lrca);
+	/* only need to ensure this context is not pinned/unpinned during the
+	 * period from last submission to this this submission.
+	 * Upon reaching this function, the currently submitted context is not
+	 * supposed to get unpinned. If a misbehaving guest driver ever does
+	 * this, it would corrupt itself.
+	 */
+	if (s->last_ctx[ring_id].valid &&
+			(s->last_ctx[ring_id].lrca ==
+				workload->ctx_desc.lrca) &&
+			(s->last_ctx[ring_id].ring_context_gpa ==
+				workload->ring_context_gpa))
+		skip = true;
+
+	s->last_ctx[ring_id].lrca = workload->ctx_desc.lrca;
+	s->last_ctx[ring_id].ring_context_gpa = workload->ring_context_gpa;
 
+	if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val) || skip)
+		return 0;
+
+	s->last_ctx[ring_id].valid = false;
 	context_page_num = workload->engine->context_size;
 	context_page_num = context_page_num >> PAGE_SHIFT;
 
 	if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0)
 		context_page_num = 19;
 
-	i = 2;
-	while (i < context_page_num) {
+	/* find consecutive GPAs from gma until the first inconsecutive GPA.
+	 * read from the continuous GPAs into dst virtual address
+	 */
+	gpa_size = 0;
+	for (i = 2; i < context_page_num; i++) {
 		context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
 				(u32)((workload->ctx_desc.lrca + i) <<
 				I915_GTT_PAGE_SHIFT));
@@ -194,13 +218,26 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 			return -EFAULT;
 		}
 
-		page = i915_gem_object_get_page(ctx_obj, i);
-		dst = kmap(page);
-		intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
-				I915_GTT_PAGE_SIZE);
-		kunmap(page);
-		i++;
+		if (gpa_size == 0) {
+			gpa_base = context_gpa;
+			dst = context_base + (i << I915_GTT_PAGE_SHIFT);
+		} else if (context_gpa != gpa_base + gpa_size)
+			goto read;
+
+		gpa_size += I915_GTT_PAGE_SIZE;
+
+		if (i == context_page_num - 1)
+			goto read;
+
+		continue;
+
+read:
+		intel_gvt_hypervisor_read_gpa(vgpu, gpa_base, dst, gpa_size);
+		gpa_base = context_gpa;
+		gpa_size = I915_GTT_PAGE_SIZE;
+		dst = context_base + (i << I915_GTT_PAGE_SHIFT);
 	}
+	s->last_ctx[ring_id].valid = true;
 	return 0;
 }
 
@@ -599,10 +636,9 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			if (bb->va && !IS_ERR(bb->va))
 				i915_gem_object_unpin_map(bb->obj);
 
-			if (bb->vma && !IS_ERR(bb->vma)) {
+			if (bb->vma && !IS_ERR(bb->vma))
 				i915_vma_unpin(bb->vma);
-				i915_vma_close(bb->vma);
-			}
+
 			i915_gem_object_put(bb->obj);
 		}
 		list_del(&bb->list);
@@ -610,10 +646,11 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 	}
 }
 
-static int prepare_workload(struct intel_vgpu_workload *workload)
+static int
+intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
-	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_vgpu_mm *m;
 	int ret = 0;
 
 	ret = intel_vgpu_pin_mm(workload->shadow_mm);
@@ -628,6 +665,52 @@ static int prepare_workload(struct intel_vgpu_workload *workload)
 		return -EINVAL;
 	}
 
+	if (!list_empty(&workload->lri_shadow_mm)) {
+		list_for_each_entry(m, &workload->lri_shadow_mm,
+				    ppgtt_mm.link) {
+			ret = intel_vgpu_pin_mm(m);
+			if (ret) {
+				list_for_each_entry_from_reverse(m,
+								 &workload->lri_shadow_mm,
+								 ppgtt_mm.link)
+					intel_vgpu_unpin_mm(m);
+				gvt_vgpu_err("LRI shadow ppgtt fail to pin\n");
+				break;
+			}
+		}
+	}
+
+	if (ret)
+		intel_vgpu_unpin_mm(workload->shadow_mm);
+
+	return ret;
+}
+
+static void
+intel_vgpu_shadow_mm_unpin(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu_mm *m;
+
+	if (!list_empty(&workload->lri_shadow_mm)) {
+		list_for_each_entry(m, &workload->lri_shadow_mm,
+				    ppgtt_mm.link)
+			intel_vgpu_unpin_mm(m);
+	}
+	intel_vgpu_unpin_mm(workload->shadow_mm);
+}
+
+static int prepare_workload(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	int ret = 0;
+
+	ret = intel_vgpu_shadow_mm_pin(workload);
+	if (ret) {
+		gvt_vgpu_err("fail to pin shadow mm\n");
+		return ret;
+	}
+
 	update_shadow_pdps(workload);
 
 	set_context_ppgtt_from_shadow(workload, s->shadow[workload->engine->id]);
@@ -674,7 +757,7 @@ err_shadow_wa_ctx:
 err_shadow_batch:
 	release_shadow_batch_buffer(workload);
 err_unpin_mm:
-	intel_vgpu_unpin_mm(workload->shadow_mm);
+	intel_vgpu_shadow_mm_unpin(workload);
 	return ret;
 }
 
@@ -784,15 +867,48 @@ out:
 	return workload;
 }
 
+static void update_guest_pdps(struct intel_vgpu *vgpu,
+			      u64 ring_context_gpa, u32 pdp[8])
+{
+	u64 gpa;
+	int i;
+
+	gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val);
+
+	for (i = 0; i < 8; i++)
+		intel_gvt_hypervisor_write_gpa(vgpu,
+				gpa + i * 8, &pdp[7 - i], 4);
+}
+
+static __maybe_unused bool
+check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m)
+{
+	if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
+		u64 shadow_pdp = c->pdps[7].val | (u64) c->pdps[6].val << 32;
+
+		if (shadow_pdp != m->ppgtt_mm.shadow_pdps[0]) {
+			gvt_dbg_mm("4-level context ppgtt not match LRI command\n");
+			return false;
+		}
+		return true;
+	} else {
+		/* see comment in LRI handler in cmd_parser.c */
+		gvt_dbg_mm("invalid shadow mm type\n");
+		return false;
+	}
+}
+
 static void update_guest_context(struct intel_vgpu_workload *workload)
 {
 	struct i915_request *rq = workload->req;
 	struct intel_vgpu *vgpu = workload->vgpu;
-	struct drm_i915_gem_object *ctx_obj = rq->context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
-	struct page *page;
+	struct intel_context *ctx = workload->req->context;
+	void *context_base;
 	void *src;
 	unsigned long context_gpa, context_page_num;
+	unsigned long gpa_base; /* first gpa of consecutive GPAs */
+	unsigned long gpa_size; /* size of consecutive GPAs*/
 	int i;
 	u32 ring_base;
 	u32 head, tail;
@@ -801,6 +917,8 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 	gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id,
 		      workload->ctx_desc.lrca);
 
+	GEM_BUG_ON(!intel_context_is_pinned(ctx));
+
 	head = workload->rb_head;
 	tail = workload->rb_tail;
 	wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF;
@@ -824,9 +942,14 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 	if (IS_BROADWELL(rq->i915) && rq->engine->id == RCS0)
 		context_page_num = 19;
 
-	i = 2;
+	context_base = (void *) ctx->lrc_reg_state -
+			(LRC_STATE_PN << I915_GTT_PAGE_SHIFT);
 
-	while (i < context_page_num) {
+	/* find consecutive GPAs from gma until the first inconsecutive GPA.
+	 * write to the consecutive GPAs from src virtual address
+	 */
+	gpa_size = 0;
+	for (i = 2; i < context_page_num; i++) {
 		context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
 				(u32)((workload->ctx_desc.lrca + i) <<
 					I915_GTT_PAGE_SHIFT));
@@ -835,19 +958,39 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 			return;
 		}
 
-		page = i915_gem_object_get_page(ctx_obj, i);
-		src = kmap(page);
-		intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
-				I915_GTT_PAGE_SIZE);
-		kunmap(page);
-		i++;
+		if (gpa_size == 0) {
+			gpa_base = context_gpa;
+			src = context_base + (i << I915_GTT_PAGE_SHIFT);
+		} else if (context_gpa != gpa_base + gpa_size)
+			goto write;
+
+		gpa_size += I915_GTT_PAGE_SIZE;
+
+		if (i == context_page_num - 1)
+			goto write;
+
+		continue;
+
+write:
+		intel_gvt_hypervisor_write_gpa(vgpu, gpa_base, src, gpa_size);
+		gpa_base = context_gpa;
+		gpa_size = I915_GTT_PAGE_SIZE;
+		src = context_base + (i << I915_GTT_PAGE_SHIFT);
 	}
 
 	intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa +
 		RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
 
-	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-	shadow_ring_context = kmap(page);
+	shadow_ring_context = (void *) ctx->lrc_reg_state;
+
+	if (!list_empty(&workload->lri_shadow_mm)) {
+		struct intel_vgpu_mm *m = list_last_entry(&workload->lri_shadow_mm,
+							  struct intel_vgpu_mm,
+							  ppgtt_mm.link);
+		GEM_BUG_ON(!check_shadow_context_ppgtt(shadow_ring_context, m));
+		update_guest_pdps(vgpu, workload->ring_context_gpa,
+				  (void *)m->ppgtt_mm.guest_pdps);
+	}
 
 #define COPY_REG(name) \
 	intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
@@ -864,8 +1007,6 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 			(void *)shadow_ring_context +
 			sizeof(*shadow_ring_context),
 			I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
-
-	kunmap(page);
 }
 
 void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
@@ -959,6 +1100,9 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 
 	workload->complete(workload);
 
+	intel_vgpu_shadow_mm_unpin(workload);
+	intel_vgpu_destroy_workload(workload);
+
 	atomic_dec(&s->running_workload_num);
 	wake_up(&scheduler->workload_complete_wq);
 
@@ -1264,6 +1408,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 	atomic_set(&s->running_workload_num, 0);
 	bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
 
+	memset(s->last_ctx, 0, sizeof(s->last_ctx));
+
 	i915_vm_put(&ppgtt->vm);
 	return 0;
 
@@ -1350,6 +1496,16 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
 	release_shadow_batch_buffer(workload);
 	release_shadow_wa_ctx(&workload->wa_ctx);
 
+	if (!list_empty(&workload->lri_shadow_mm)) {
+		struct intel_vgpu_mm *m, *mm;
+		list_for_each_entry_safe(m, mm, &workload->lri_shadow_mm,
+					 ppgtt_mm.link) {
+			list_del(&m->ppgtt_mm.link);
+			intel_vgpu_mm_put(m);
+		}
+	}
+
+	GEM_BUG_ON(!list_empty(&workload->lri_shadow_mm));
 	if (workload->shadow_mm)
 		intel_vgpu_mm_put(workload->shadow_mm);
 
@@ -1368,6 +1524,7 @@ alloc_workload(struct intel_vgpu *vgpu)
 
 	INIT_LIST_HEAD(&workload->list);
 	INIT_LIST_HEAD(&workload->shadow_bb);
+	INIT_LIST_HEAD(&workload->lri_shadow_mm);
 
 	init_waitqueue_head(&workload->shadow_ctx_status_wq);
 	atomic_set(&workload->shadow_ctx_active, 0);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index bf7fc0ca4cb1..15d317f2a4a4 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -87,6 +87,7 @@ struct intel_vgpu_workload {
 	int status;
 
 	struct intel_vgpu_mm *shadow_mm;
+	struct list_head lri_shadow_mm; /* For PPGTT load cmd */
 
 	/* different submission model may need different handler */
 	int (*prepare)(struct intel_vgpu_workload *);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index c4048628188a..d960d0be5bd2 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -496,7 +496,7 @@ static int flush_lazy_signals(struct i915_active *ref)
 	return err;
 }
 
-int i915_active_wait(struct i915_active *ref)
+int __i915_active_wait(struct i915_active *ref, int state)
 {
 	int err;
 
@@ -511,7 +511,9 @@ int i915_active_wait(struct i915_active *ref)
 	if (err)
 		return err;
 
-	if (wait_var_event_interruptible(ref, i915_active_is_idle(ref)))
+	if (!i915_active_is_idle(ref) &&
+	    ___wait_var_event(ref, i915_active_is_idle(ref),
+			      state, 0, 0, schedule()))
 		return -EINTR;
 
 	flush_work(&ref->work);
@@ -540,34 +542,88 @@ static int __await_active(struct i915_active_fence *active,
 	return 0;
 }
 
+struct wait_barrier {
+	struct wait_queue_entry base;
+	struct i915_active *ref;
+};
+
+static int
+barrier_wake(wait_queue_entry_t *wq, unsigned int mode, int flags, void *key)
+{
+	struct wait_barrier *wb = container_of(wq, typeof(*wb), base);
+
+	if (i915_active_is_idle(wb->ref)) {
+		list_del(&wq->entry);
+		i915_sw_fence_complete(wq->private);
+		kfree(wq);
+	}
+
+	return 0;
+}
+
+static int __await_barrier(struct i915_active *ref, struct i915_sw_fence *fence)
+{
+	struct wait_barrier *wb;
+
+	wb = kmalloc(sizeof(*wb), GFP_KERNEL);
+	if (unlikely(!wb))
+		return -ENOMEM;
+
+	GEM_BUG_ON(i915_active_is_idle(ref));
+	if (!i915_sw_fence_await(fence)) {
+		kfree(wb);
+		return -EINVAL;
+	}
+
+	wb->base.flags = 0;
+	wb->base.func = barrier_wake;
+	wb->base.private = fence;
+	wb->ref = ref;
+
+	add_wait_queue(__var_waitqueue(ref), &wb->base);
+	return 0;
+}
+
 static int await_active(struct i915_active *ref,
 			unsigned int flags,
 			int (*fn)(void *arg, struct dma_fence *fence),
-			void *arg)
+			void *arg, struct i915_sw_fence *barrier)
 {
 	int err = 0;
 
-	/* We must always wait for the exclusive fence! */
-	if (rcu_access_pointer(ref->excl.fence)) {
+	if (!i915_active_acquire_if_busy(ref))
+		return 0;
+
+	if (flags & I915_ACTIVE_AWAIT_EXCL &&
+	    rcu_access_pointer(ref->excl.fence)) {
 		err = __await_active(&ref->excl, fn, arg);
 		if (err)
-			return err;
+			goto out;
 	}
 
-	if (flags & I915_ACTIVE_AWAIT_ALL && i915_active_acquire_if_busy(ref)) {
+	if (flags & I915_ACTIVE_AWAIT_ACTIVE) {
 		struct active_node *it, *n;
 
 		rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 			err = __await_active(&it->base, fn, arg);
 			if (err)
-				break;
+				goto out;
 		}
-		i915_active_release(ref);
+	}
+
+	if (flags & I915_ACTIVE_AWAIT_BARRIER) {
+		err = flush_lazy_signals(ref);
 		if (err)
-			return err;
+			goto out;
+
+		err = __await_barrier(ref, barrier);
+		if (err)
+			goto out;
 	}
 
-	return 0;
+out:
+	i915_active_release(ref);
+	return err;
 }
 
 static int rq_await_fence(void *arg, struct dma_fence *fence)
@@ -579,7 +635,7 @@ int i915_request_await_active(struct i915_request *rq,
 			      struct i915_active *ref,
 			      unsigned int flags)
 {
-	return await_active(ref, flags, rq_await_fence, rq);
+	return await_active(ref, flags, rq_await_fence, rq, &rq->submit);
 }
 
 static int sw_await_fence(void *arg, struct dma_fence *fence)
@@ -592,7 +648,7 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence,
 			       struct i915_active *ref,
 			       unsigned int flags)
 {
-	return await_active(ref, flags, sw_await_fence, fence);
+	return await_active(ref, flags, sw_await_fence, fence, fence);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
@@ -818,7 +874,7 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 
 		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
 		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
-		intel_engine_pm_put(engine);
+		intel_engine_pm_put_delay(engine, 1);
 	}
 }
 
@@ -937,6 +993,59 @@ void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
 	active_fence_cb(fence, cb);
 }
 
+struct auto_active {
+	struct i915_active base;
+	struct kref ref;
+};
+
+struct i915_active *i915_active_get(struct i915_active *ref)
+{
+	struct auto_active *aa = container_of(ref, typeof(*aa), base);
+
+	kref_get(&aa->ref);
+	return &aa->base;
+}
+
+static void auto_release(struct kref *ref)
+{
+	struct auto_active *aa = container_of(ref, typeof(*aa), ref);
+
+	i915_active_fini(&aa->base);
+	kfree(aa);
+}
+
+void i915_active_put(struct i915_active *ref)
+{
+	struct auto_active *aa = container_of(ref, typeof(*aa), base);
+
+	kref_put(&aa->ref, auto_release);
+}
+
+static int auto_active(struct i915_active *ref)
+{
+	i915_active_get(ref);
+	return 0;
+}
+
+static void auto_retire(struct i915_active *ref)
+{
+	i915_active_put(ref);
+}
+
+struct i915_active *i915_active_create(void)
+{
+	struct auto_active *aa;
+
+	aa = kmalloc(sizeof(*aa), GFP_KERNEL);
+	if (!aa)
+		return NULL;
+
+	kref_init(&aa->ref);
+	i915_active_init(&aa->base, auto_active, auto_retire);
+
+	return &aa->base;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_active.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index b3282ae7913c..cf4058150966 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -181,7 +181,11 @@ static inline bool i915_active_has_exclusive(struct i915_active *ref)
 	return rcu_access_pointer(ref->excl.fence);
 }
 
-int i915_active_wait(struct i915_active *ref);
+int __i915_active_wait(struct i915_active *ref, int state);
+static inline int i915_active_wait(struct i915_active *ref)
+{
+	return __i915_active_wait(ref, TASK_INTERRUPTIBLE);
+}
 
 int i915_sw_fence_await_active(struct i915_sw_fence *fence,
 			       struct i915_active *ref,
@@ -189,7 +193,9 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence,
 int i915_request_await_active(struct i915_request *rq,
 			      struct i915_active *ref,
 			      unsigned int flags);
-#define I915_ACTIVE_AWAIT_ALL BIT(0)
+#define I915_ACTIVE_AWAIT_EXCL BIT(0)
+#define I915_ACTIVE_AWAIT_ACTIVE BIT(1)
+#define I915_ACTIVE_AWAIT_BARRIER BIT(2)
 
 int i915_active_acquire(struct i915_active *ref);
 bool i915_active_acquire_if_busy(struct i915_active *ref);
@@ -221,4 +227,8 @@ void i915_request_add_active_barriers(struct i915_request *rq);
 void i915_active_print(struct i915_active *ref, struct drm_printer *m);
 void i915_active_unlock_wait(struct i915_active *ref);
 
+struct i915_active *i915_active_create(void);
+struct i915_active *i915_active_get(struct i915_active *ref);
+void i915_active_put(struct i915_active *ref);
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_config.c b/drivers/gpu/drm/i915/i915_config.c
new file mode 100644
index 000000000000..b79b5f6d2cfa
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_config.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_drv.h"
+
+unsigned long
+i915_fence_context_timeout(const struct drm_i915_private *i915, u64 context)
+{
+	if (context && IS_ACTIVE(CONFIG_DRM_I915_FENCE_TIMEOUT))
+		return msecs_to_jiffies_timeout(CONFIG_DRM_I915_FENCE_TIMEOUT);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6ca797128aa1..bca036ac6621 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -32,12 +32,13 @@
 #include <drm/drm_debugfs.h>
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_gt_buffer_pool.h"
+#include "gt/intel_gt_clock_utils.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_gt_requests.h"
 #include "gt/intel_reset.h"
 #include "gt/intel_rc6.h"
 #include "gt/intel_rps.h"
-#include "gt/uc/intel_guc_submission.h"
 
 #include "i915_debugfs.h"
 #include "i915_debugfs_params.h"
@@ -218,7 +219,7 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 struct file_stats {
 	struct i915_address_space *vm;
 	unsigned long count;
-	u64 total, unbound;
+	u64 total;
 	u64 active, inactive;
 	u64 closed;
 };
@@ -234,8 +235,6 @@ static int per_file_stats(int id, void *ptr, void *data)
 
 	stats->count++;
 	stats->total += obj->base.size;
-	if (!atomic_read(&obj->bind_count))
-		stats->unbound += obj->base.size;
 
 	spin_lock(&obj->vma.lock);
 	if (!stats->vm) {
@@ -285,13 +284,12 @@ static int per_file_stats(int id, void *ptr, void *data)
 
 #define print_file_stats(m, name, stats) do { \
 	if (stats.count) \
-		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu unbound, %llu closed)\n", \
+		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu closed)\n", \
 			   name, \
 			   stats.count, \
 			   stats.total, \
 			   stats.active, \
 			   stats.inactive, \
-			   stats.unbound, \
 			   stats.closed); \
 } while (0)
 
@@ -745,7 +743,7 @@ i915_error_state_write(struct file *filp,
 	if (!error)
 		return 0;
 
-	DRM_DEBUG_DRIVER("Resetting error state\n");
+	drm_dbg(&error->i915->drm, "Resetting error state\n");
 	i915_reset_error_state(error->i915);
 
 	return cnt;
@@ -930,21 +928,30 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 		seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
 		seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
 		seq_printf(m, "CAGF: %dMHz\n", cagf);
-		seq_printf(m, "RP CUR UP EI: %d (%dus)\n",
-			   rpupei, GT_PM_INTERVAL_TO_US(dev_priv, rpupei));
-		seq_printf(m, "RP CUR UP: %d (%dus)\n",
-			   rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup));
-		seq_printf(m, "RP PREV UP: %d (%dus)\n",
-			   rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup));
+		seq_printf(m, "RP CUR UP EI: %d (%dns)\n",
+			   rpupei,
+			   intel_gt_pm_interval_to_ns(&dev_priv->gt, rpupei));
+		seq_printf(m, "RP CUR UP: %d (%dun)\n",
+			   rpcurup,
+			   intel_gt_pm_interval_to_ns(&dev_priv->gt, rpcurup));
+		seq_printf(m, "RP PREV UP: %d (%dns)\n",
+			   rpprevup,
+			   intel_gt_pm_interval_to_ns(&dev_priv->gt, rpprevup));
 		seq_printf(m, "Up threshold: %d%%\n",
 			   rps->power.up_threshold);
 
-		seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
-			   rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei));
-		seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
-			   rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown));
-		seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
-			   rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown));
+		seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n",
+			   rpdownei,
+			   intel_gt_pm_interval_to_ns(&dev_priv->gt,
+						      rpdownei));
+		seq_printf(m, "RP CUR DOWN: %d (%dns)\n",
+			   rpcurdown,
+			   intel_gt_pm_interval_to_ns(&dev_priv->gt,
+						      rpcurdown));
+		seq_printf(m, "RP PREV DOWN: %d (%dns)\n",
+			   rpprevdown,
+			   intel_gt_pm_interval_to_ns(&dev_priv->gt,
+						      rpprevdown));
 		seq_printf(m, "Down threshold: %d%%\n",
 			   rps->power.down_threshold);
 
@@ -1193,7 +1200,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_rps *rps = &dev_priv->gt.rps;
 
-	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
+	seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps)));
+	seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps)));
 	seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
@@ -1213,7 +1221,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 
 	seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
 
-	if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && dev_priv->gt.awake) {
+	if (INTEL_GEN(dev_priv) >= 6 && intel_rps_is_active(rps)) {
 		u32 rpup, rpupei;
 		u32 rpdown, rpdownei;
 
@@ -1251,286 +1259,6 @@ static int i915_llc(struct seq_file *m, void *data)
 	return 0;
 }
 
-static int i915_huc_load_status_info(struct seq_file *m, void *data)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	intel_wakeref_t wakeref;
-	struct drm_printer p;
-
-	if (!HAS_GT_UC(dev_priv))
-		return -ENODEV;
-
-	p = drm_seq_file_printer(m);
-	intel_uc_fw_dump(&dev_priv->gt.uc.huc.fw, &p);
-
-	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
-		seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
-
-	return 0;
-}
-
-static int i915_guc_load_status_info(struct seq_file *m, void *data)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	intel_wakeref_t wakeref;
-	struct drm_printer p;
-
-	if (!HAS_GT_UC(dev_priv))
-		return -ENODEV;
-
-	p = drm_seq_file_printer(m);
-	intel_uc_fw_dump(&dev_priv->gt.uc.guc.fw, &p);
-
-	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
-		u32 tmp = I915_READ(GUC_STATUS);
-		u32 i;
-
-		seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
-		seq_printf(m, "\tBootrom status = 0x%x\n",
-			   (tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
-		seq_printf(m, "\tuKernel status = 0x%x\n",
-			   (tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
-		seq_printf(m, "\tMIA Core status = 0x%x\n",
-			   (tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
-		seq_puts(m, "\nScratch registers:\n");
-		for (i = 0; i < 16; i++) {
-			seq_printf(m, "\t%2d: \t0x%x\n",
-				   i, I915_READ(SOFT_SCRATCH(i)));
-		}
-	}
-
-	return 0;
-}
-
-static const char *
-stringify_guc_log_type(enum guc_log_buffer_type type)
-{
-	switch (type) {
-	case GUC_ISR_LOG_BUFFER:
-		return "ISR";
-	case GUC_DPC_LOG_BUFFER:
-		return "DPC";
-	case GUC_CRASH_DUMP_LOG_BUFFER:
-		return "CRASH";
-	default:
-		MISSING_CASE(type);
-	}
-
-	return "";
-}
-
-static void i915_guc_log_info(struct seq_file *m, struct intel_guc_log *log)
-{
-	enum guc_log_buffer_type type;
-
-	if (!intel_guc_log_relay_created(log)) {
-		seq_puts(m, "GuC log relay not created\n");
-		return;
-	}
-
-	seq_puts(m, "GuC logging stats:\n");
-
-	seq_printf(m, "\tRelay full count: %u\n",
-		   log->relay.full_count);
-
-	for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
-		seq_printf(m, "\t%s:\tflush count %10u, overflow count %10u\n",
-			   stringify_guc_log_type(type),
-			   log->stats[type].flush,
-			   log->stats[type].sampled_overflow);
-	}
-}
-
-static int i915_guc_info(struct seq_file *m, void *data)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct intel_uc *uc = &dev_priv->gt.uc;
-
-	if (!intel_uc_uses_guc(uc))
-		return -ENODEV;
-
-	i915_guc_log_info(m, &uc->guc.log);
-
-	/* Add more as required ... */
-
-	return 0;
-}
-
-static int i915_guc_stage_pool(struct seq_file *m, void *data)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct intel_uc *uc = &dev_priv->gt.uc;
-	struct guc_stage_desc *desc = uc->guc.stage_desc_pool_vaddr;
-	int index;
-
-	if (!intel_uc_uses_guc_submission(uc))
-		return -ENODEV;
-
-	for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) {
-		struct intel_engine_cs *engine;
-
-		if (!(desc->attribute & GUC_STAGE_DESC_ATTR_ACTIVE))
-			continue;
-
-		seq_printf(m, "GuC stage descriptor %u:\n", index);
-		seq_printf(m, "\tIndex: %u\n", desc->stage_id);
-		seq_printf(m, "\tAttribute: 0x%x\n", desc->attribute);
-		seq_printf(m, "\tPriority: %d\n", desc->priority);
-		seq_printf(m, "\tDoorbell id: %d\n", desc->db_id);
-		seq_printf(m, "\tEngines used: 0x%x\n",
-			   desc->engines_used);
-		seq_printf(m, "\tDoorbell trigger phy: 0x%llx, cpu: 0x%llx, uK: 0x%x\n",
-			   desc->db_trigger_phy,
-			   desc->db_trigger_cpu,
-			   desc->db_trigger_uk);
-		seq_printf(m, "\tProcess descriptor: 0x%x\n",
-			   desc->process_desc);
-		seq_printf(m, "\tWorkqueue address: 0x%x, size: 0x%x\n",
-			   desc->wq_addr, desc->wq_size);
-		seq_putc(m, '\n');
-
-		for_each_uabi_engine(engine, dev_priv) {
-			u32 guc_engine_id = engine->guc_id;
-			struct guc_execlist_context *lrc =
-						&desc->lrc[guc_engine_id];
-
-			seq_printf(m, "\t%s LRC:\n", engine->name);
-			seq_printf(m, "\t\tContext desc: 0x%x\n",
-				   lrc->context_desc);
-			seq_printf(m, "\t\tContext id: 0x%x\n", lrc->context_id);
-			seq_printf(m, "\t\tLRCA: 0x%x\n", lrc->ring_lrca);
-			seq_printf(m, "\t\tRing begin: 0x%x\n", lrc->ring_begin);
-			seq_printf(m, "\t\tRing end: 0x%x\n", lrc->ring_end);
-			seq_putc(m, '\n');
-		}
-	}
-
-	return 0;
-}
-
-static int i915_guc_log_dump(struct seq_file *m, void *data)
-{
-	struct drm_info_node *node = m->private;
-	struct drm_i915_private *dev_priv = node_to_i915(node);
-	bool dump_load_err = !!node->info_ent->data;
-	struct drm_i915_gem_object *obj = NULL;
-	u32 *log;
-	int i = 0;
-
-	if (!HAS_GT_UC(dev_priv))
-		return -ENODEV;
-
-	if (dump_load_err)
-		obj = dev_priv->gt.uc.load_err_log;
-	else if (dev_priv->gt.uc.guc.log.vma)
-		obj = dev_priv->gt.uc.guc.log.vma->obj;
-
-	if (!obj)
-		return 0;
-
-	log = i915_gem_object_pin_map(obj, I915_MAP_WC);
-	if (IS_ERR(log)) {
-		DRM_DEBUG("Failed to pin object\n");
-		seq_puts(m, "(log data unaccessible)\n");
-		return PTR_ERR(log);
-	}
-
-	for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
-		seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
-			   *(log + i), *(log + i + 1),
-			   *(log + i + 2), *(log + i + 3));
-
-	seq_putc(m, '\n');
-
-	i915_gem_object_unpin_map(obj);
-
-	return 0;
-}
-
-static int i915_guc_log_level_get(void *data, u64 *val)
-{
-	struct drm_i915_private *dev_priv = data;
-	struct intel_uc *uc = &dev_priv->gt.uc;
-
-	if (!intel_uc_uses_guc(uc))
-		return -ENODEV;
-
-	*val = intel_guc_log_get_level(&uc->guc.log);
-
-	return 0;
-}
-
-static int i915_guc_log_level_set(void *data, u64 val)
-{
-	struct drm_i915_private *dev_priv = data;
-	struct intel_uc *uc = &dev_priv->gt.uc;
-
-	if (!intel_uc_uses_guc(uc))
-		return -ENODEV;
-
-	return intel_guc_log_set_level(&uc->guc.log, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_level_fops,
-			i915_guc_log_level_get, i915_guc_log_level_set,
-			"%lld\n");
-
-static int i915_guc_log_relay_open(struct inode *inode, struct file *file)
-{
-	struct drm_i915_private *i915 = inode->i_private;
-	struct intel_guc *guc = &i915->gt.uc.guc;
-	struct intel_guc_log *log = &guc->log;
-
-	if (!intel_guc_is_ready(guc))
-		return -ENODEV;
-
-	file->private_data = log;
-
-	return intel_guc_log_relay_open(log);
-}
-
-static ssize_t
-i915_guc_log_relay_write(struct file *filp,
-			 const char __user *ubuf,
-			 size_t cnt,
-			 loff_t *ppos)
-{
-	struct intel_guc_log *log = filp->private_data;
-	int val;
-	int ret;
-
-	ret = kstrtoint_from_user(ubuf, cnt, 0, &val);
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * Enable and start the guc log relay on value of 1.
-	 * Flush log relay for any other value.
-	 */
-	if (val == 1)
-		ret = intel_guc_log_relay_start(log);
-	else
-		intel_guc_log_relay_flush(log);
-
-	return ret ?: cnt;
-}
-
-static int i915_guc_log_relay_release(struct inode *inode, struct file *file)
-{
-	struct drm_i915_private *i915 = inode->i_private;
-	struct intel_guc *guc = &i915->gt.uc.guc;
-
-	intel_guc_log_relay_close(&guc->log);
-	return 0;
-}
-
-static const struct file_operations i915_guc_log_relay_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_guc_log_relay_open,
-	.write = i915_guc_log_relay_write,
-	.release = i915_guc_log_relay_release,
-};
-
 static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -1576,8 +1304,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	seq_printf(m, "GT awake? %s [%d]\n",
 		   yesno(dev_priv->gt.awake),
 		   atomic_read(&dev_priv->gt.wakeref.count));
-	seq_printf(m, "CS timestamp frequency: %u kHz\n",
-		   RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
+	seq_printf(m, "CS timestamp frequency: %u Hz\n",
+		   RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_hz);
 
 	p = drm_seq_file_printer(m);
 	for_each_uabi_engine(engine, dev_priv)
@@ -1676,13 +1404,12 @@ static int
 i915_perf_noa_delay_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
-	const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
 
 	/*
 	 * This would lead to infinite waits as we're doing timestamp
 	 * difference on the CS with only 32bits.
 	 */
-	if (val > mul_u32_u32(U32_MAX, clk))
+	if (i915_cs_timestamp_ns_to_ticks(i915, val) > U32_MAX)
 		return -EINVAL;
 
 	atomic64_set(&i915->perf.noa_programming_delay, val);
@@ -1757,6 +1484,9 @@ gt_drop_caches(struct intel_gt *gt, u64 val)
 	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt))
 		intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL);
 
+	if (val & DROP_FREED)
+		intel_gt_flush_buffer_pool(gt);
+
 	return 0;
 }
 
@@ -2139,12 +1869,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_gem_objects", i915_gem_object_info, 0},
 	{"i915_gem_fence_regs", i915_gem_fence_regs_info, 0},
 	{"i915_gem_interrupt", i915_interrupt_info, 0},
-	{"i915_guc_info", i915_guc_info, 0},
-	{"i915_guc_load_status", i915_guc_load_status_info, 0},
-	{"i915_guc_log_dump", i915_guc_log_dump, 0},
-	{"i915_guc_load_err_log_dump", i915_guc_log_dump, 0, (void *)1},
-	{"i915_guc_stage_pool", i915_guc_stage_pool, 0},
-	{"i915_huc_load_status", i915_huc_load_status_info, 0},
 	{"i915_frequency_info", i915_frequency_info, 0},
 	{"i915_ring_freq_table", i915_ring_freq_table, 0},
 	{"i915_context_status", i915_context_status, 0},
@@ -2172,11 +1896,9 @@ static const struct i915_debugfs_files {
 	{"i915_error_state", &i915_error_state_fops},
 	{"i915_gpu_info", &i915_gpu_info_fops},
 #endif
-	{"i915_guc_log_level", &i915_guc_log_level_fops},
-	{"i915_guc_log_relay", &i915_guc_log_relay_fops},
 };
 
-int i915_debugfs_register(struct drm_i915_private *dev_priv)
+void i915_debugfs_register(struct drm_i915_private *dev_priv)
 {
 	struct drm_minor *minor = dev_priv->drm.primary;
 	int i;
@@ -2193,7 +1915,7 @@ int i915_debugfs_register(struct drm_i915_private *dev_priv)
 				    i915_debugfs_files[i].fops);
 	}
 
-	return drm_debugfs_create_files(i915_debugfs_list,
-					I915_DEBUGFS_ENTRIES,
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(i915_debugfs_list,
+				 I915_DEBUGFS_ENTRIES,
+				 minor->debugfs_root, minor);
 }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.h b/drivers/gpu/drm/i915/i915_debugfs.h
index 6da39c76ab5e..1de2736f1248 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.h
+++ b/drivers/gpu/drm/i915/i915_debugfs.h
@@ -12,10 +12,10 @@ struct drm_i915_private;
 struct seq_file;
 
 #ifdef CONFIG_DEBUG_FS
-int i915_debugfs_register(struct drm_i915_private *dev_priv);
+void i915_debugfs_register(struct drm_i915_private *dev_priv);
 void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj);
 #else
-static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) { return 0; }
+static inline void i915_debugfs_register(struct drm_i915_private *dev_priv) {}
 static inline void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) {}
 #endif
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 81a4621853db..34ee12f3f02d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -43,6 +43,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_irq.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_probe_helper.h>
 
 #include "display/intel_acpi.h"
@@ -227,14 +228,14 @@ static int i915_driver_modeset_probe_noirq(struct drm_i915_private *i915)
 		ret = drm_vblank_init(&i915->drm,
 				      INTEL_NUM_PIPES(i915));
 		if (ret)
-			goto out;
+			return ret;
 	}
 
 	intel_bios_init(i915);
 
 	ret = intel_vga_register(i915);
 	if (ret)
-		goto out;
+		goto cleanup_bios;
 
 	intel_power_domains_init_hw(i915, false);
 
@@ -242,13 +243,16 @@ static int i915_driver_modeset_probe_noirq(struct drm_i915_private *i915)
 
 	ret = intel_modeset_init_noirq(i915);
 	if (ret)
-		goto cleanup_vga_client;
+		goto cleanup_vga_client_pw_domain_csr;
 
 	return 0;
 
-cleanup_vga_client:
+cleanup_vga_client_pw_domain_csr:
+	intel_csr_ucode_fini(i915);
+	intel_power_domains_driver_remove(i915);
 	intel_vga_unregister(i915);
-out:
+cleanup_bios:
+	intel_bios_driver_remove(i915);
 	return ret;
 }
 
@@ -307,13 +311,13 @@ static void i915_driver_modeset_remove(struct drm_i915_private *i915)
 /* part #2: call after irq uninstall */
 static void i915_driver_modeset_remove_noirq(struct drm_i915_private *i915)
 {
-	intel_modeset_driver_remove_noirq(i915);
+	intel_csr_ucode_fini(i915);
 
-	intel_bios_driver_remove(i915);
+	intel_power_domains_driver_remove(i915);
 
 	intel_vga_unregister(i915);
 
-	intel_csr_ucode_fini(i915);
+	intel_bios_driver_remove(i915);
 }
 
 static void intel_init_dpio(struct drm_i915_private *dev_priv)
@@ -566,6 +570,62 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
 }
 
 /**
+ * i915_set_dma_info - set all relevant PCI dma info as configured for the
+ * platform
+ * @i915: valid i915 instance
+ *
+ * Set the dma max segment size, device and coherent masks.  The dma mask set
+ * needs to occur before i915_ggtt_probe_hw.
+ *
+ * A couple of platforms have special needs.  Address them as well.
+ *
+ */
+static int i915_set_dma_info(struct drm_i915_private *i915)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+	unsigned int mask_size = INTEL_INFO(i915)->dma_mask_size;
+	int ret;
+
+	GEM_BUG_ON(!mask_size);
+
+	/*
+	 * We don't have a max segment size, so set it to the max so sg's
+	 * debugging layer doesn't complain
+	 */
+	dma_set_max_seg_size(&pdev->dev, UINT_MAX);
+
+	ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(mask_size));
+	if (ret)
+		goto mask_err;
+
+	/* overlay on gen2 is broken and can't address above 1G */
+	if (IS_GEN(i915, 2))
+		mask_size = 30;
+
+	/*
+	 * 965GM sometimes incorrectly writes to hardware status page (HWS)
+	 * using 32bit addressing, overwriting memory if HWS is located
+	 * above 4GB.
+	 *
+	 * The documentation also mentions an issue with undefined
+	 * behaviour if any general state is accessed within a page above 4GB,
+	 * which also needs to be handled carefully.
+	 */
+	if (IS_I965G(i915) || IS_I965GM(i915))
+		mask_size = 32;
+
+	ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(mask_size));
+	if (ret)
+		goto mask_err;
+
+	return 0;
+
+mask_err:
+	drm_err(&i915->drm, "Can't set DMA mask/consistent mask (%d)\n", ret);
+	return ret;
+}
+
+/**
  * i915_driver_hw_probe - setup state requiring device access
  * @dev_priv: device private
  *
@@ -610,6 +670,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 	/* needs to be done before ggtt probe */
 	intel_dram_edram_detect(dev_priv);
 
+	ret = i915_set_dma_info(dev_priv);
+	if (ret)
+		return ret;
+
 	i915_perf_init(dev_priv);
 
 	ret = i915_ggtt_probe_hw(dev_priv);
@@ -638,40 +702,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 
 	pci_set_master(pdev);
 
-	/*
-	 * We don't have a max segment size, so set it to the max so sg's
-	 * debugging layer doesn't complain
-	 */
-	dma_set_max_seg_size(&pdev->dev, UINT_MAX);
-
-	/* overlay on gen2 is broken and can't address above 1G */
-	if (IS_GEN(dev_priv, 2)) {
-		ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(30));
-		if (ret) {
-			drm_err(&dev_priv->drm, "failed to set DMA mask\n");
-
-			goto err_mem_regions;
-		}
-	}
-
-	/* 965GM sometimes incorrectly writes to hardware status page (HWS)
-	 * using 32bit addressing, overwriting memory if HWS is located
-	 * above 4GB.
-	 *
-	 * The documentation also mentions an issue with undefined
-	 * behaviour if any general state is accessed within a page above 4GB,
-	 * which also needs to be handled carefully.
-	 */
-	if (IS_I965G(dev_priv) || IS_I965GM(dev_priv)) {
-		ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
-
-		if (ret) {
-			drm_err(&dev_priv->drm, "failed to set DMA mask\n");
-
-			goto err_mem_regions;
-		}
-	}
-
 	cpu_latency_qos_add_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
 
 	intel_gt_init_workarounds(dev_priv);
@@ -876,17 +906,11 @@ i915_driver_create(struct pci_dev *pdev, const struct pci_device_id *ent)
 		(struct intel_device_info *)ent->driver_data;
 	struct intel_device_info *device_info;
 	struct drm_i915_private *i915;
-	int err;
-
-	i915 = kzalloc(sizeof(*i915), GFP_KERNEL);
-	if (!i915)
-		return ERR_PTR(-ENOMEM);
 
-	err = drm_dev_init(&i915->drm, &driver, &pdev->dev);
-	if (err) {
-		kfree(i915);
-		return ERR_PTR(err);
-	}
+	i915 = devm_drm_dev_alloc(&pdev->dev, &driver,
+				  struct drm_i915_private, drm);
+	if (IS_ERR(i915))
+		return i915;
 
 	i915->drm.pdev = pdev;
 	pci_set_drvdata(pdev, i915);
@@ -901,17 +925,6 @@ i915_driver_create(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return i915;
 }
 
-static void i915_driver_destroy(struct drm_i915_private *i915)
-{
-	struct pci_dev *pdev = i915->drm.pdev;
-
-	drm_dev_fini(&i915->drm);
-	kfree(i915);
-
-	/* And make sure we never chase our dangling pointer from pci_dev */
-	pci_set_drvdata(pdev, NULL);
-}
-
 /**
  * i915_driver_probe - setup chip and create an initial config
  * @pdev: PCI device
@@ -993,12 +1006,14 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	i915_welcome_messages(i915);
 
+	i915->do_release = true;
+
 	return 0;
 
 out_cleanup_irq:
 	intel_irq_uninstall(i915);
 out_cleanup_modeset:
-	/* FIXME */
+	i915_driver_modeset_remove_noirq(i915);
 out_cleanup_hw:
 	i915_driver_hw_remove(i915);
 	intel_memory_regions_driver_release(i915);
@@ -1012,7 +1027,6 @@ out_pci_disable:
 	pci_disable_device(pdev);
 out_fini:
 	i915_probe_error(i915, "Device initialization failed (%d)\n", ret);
-	i915_driver_destroy(i915);
 	return ret;
 }
 
@@ -1035,12 +1049,12 @@ void i915_driver_remove(struct drm_i915_private *i915)
 
 	intel_irq_uninstall(i915);
 
-	i915_driver_modeset_remove_noirq(i915);
+	intel_modeset_driver_remove_noirq(i915);
 
 	i915_reset_error_state(i915);
 	i915_gem_driver_remove(i915);
 
-	intel_power_domains_driver_remove(i915);
+	i915_driver_modeset_remove_noirq(i915);
 
 	i915_driver_hw_remove(i915);
 
@@ -1052,6 +1066,9 @@ static void i915_driver_release(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
 
+	if (!dev_priv->do_release)
+		return;
+
 	disable_rpm_wakeref_asserts(rpm);
 
 	i915_gem_driver_release(dev_priv);
@@ -1065,7 +1082,6 @@ static void i915_driver_release(struct drm_device *dev)
 	intel_runtime_pm_driver_release(rpm);
 
 	i915_driver_late_release(dev_priv);
-	i915_driver_destroy(dev_priv);
 }
 
 static int i915_driver_open(struct drm_device *dev, struct drm_file *file)
@@ -1286,7 +1302,6 @@ static int i915_drm_resume(struct drm_device *dev)
 		drm_err(&dev_priv->drm, "failed to re-enable GGTT\n");
 
 	i915_ggtt_resume(&dev_priv->ggtt);
-	i915_gem_restore_fences(&dev_priv->ggtt);
 
 	intel_csr_ucode_resume(dev_priv);
 
@@ -1604,8 +1619,6 @@ static int intel_runtime_suspend(struct device *kdev)
 
 		intel_gt_runtime_resume(&dev_priv->gt);
 
-		i915_gem_restore_fences(&dev_priv->ggtt);
-
 		enable_rpm_wakeref_asserts(rpm);
 
 		return ret;
@@ -1685,7 +1698,6 @@ static int intel_runtime_resume(struct device *kdev)
 	 * we can do is to hope that things will still work (and disable RPM).
 	 */
 	intel_gt_runtime_resume(&dev_priv->gt);
-	i915_gem_restore_fences(&dev_priv->ggtt);
 
 	/*
 	 * On VLV/CHV display interrupts are part of the display
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 62b901ffabf9..adb9bf34cf97 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -92,7 +92,6 @@
 #include "intel_wopcm.h"
 
 #include "i915_gem.h"
-#include "i915_gem_fence_reg.h"
 #include "i915_gem_gtt.h"
 #include "i915_gpu_error.h"
 #include "i915_perf_types.h"
@@ -109,8 +108,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20200313"
-#define DRIVER_TIMESTAMP	1584144591
+#define DRIVER_DATE		"20200515"
+#define DRIVER_TIMESTAMP	1589543364
 
 struct drm_i915_gem_object;
 
@@ -149,6 +148,8 @@ enum hpd_pin {
 struct i915_hotplug {
 	struct delayed_work hotplug_work;
 
+	const u32 *hpd, *pch_hpd;
+
 	struct {
 		unsigned long last_jiffies;
 		int count;
@@ -417,6 +418,7 @@ struct intel_fbc {
 		struct {
 			const struct drm_format_info *format;
 			unsigned int stride;
+			u64 modifier;
 		} fb;
 		u16 gen9_wa_cfb_stride;
 		s8 fence_id;
@@ -510,6 +512,7 @@ struct i915_psr {
 	u32 dc3co_exit_delay;
 	struct delayed_work dc3co_work;
 	bool force_mode_changed;
+	struct drm_dp_vsc_sdp vsc;
 };
 
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
@@ -540,7 +543,6 @@ struct i915_suspend_saved_registers {
 	u32 saveSWF0[16];
 	u32 saveSWF1[16];
 	u32 saveSWF3[3];
-	u64 saveFENCE[I915_MAX_NUM_FENCES];
 	u32 savePCH_PORT_HOTPLUG;
 	u16 saveGCDGMBUS;
 };
@@ -615,13 +617,14 @@ struct i915_gem_mm {
 
 #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */
 
-#define I915_RESET_TIMEOUT (10 * HZ) /* 10s */
-#define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */
-
-#define I915_ENGINE_DEAD_TIMEOUT  (4 * HZ)  /* Seqno, head and subunits dead */
-#define I915_SEQNO_DEAD_TIMEOUT   (12 * HZ) /* Seqno dead with active head */
+unsigned long i915_fence_context_timeout(const struct drm_i915_private *i915,
+					 u64 context);
 
-#define I915_ENGINE_WEDGED_TIMEOUT  (60 * HZ)  /* Reset but no recovery? */
+static inline unsigned long
+i915_fence_timeout(const struct drm_i915_private *i915)
+{
+	return i915_fence_context_timeout(i915, U64_MAX);
+}
 
 /* Amount of SAGV/QGV points, BSpec precisely defines this */
 #define I915_NUM_QGV_POINTS 8
@@ -823,6 +826,9 @@ struct i915_selftest_stash {
 struct drm_i915_private {
 	struct drm_device drm;
 
+	/* FIXME: Device release actions should all be moved to drmm_ */
+	bool do_release;
+
 	const struct intel_device_info __info; /* Use INTEL_INFO() to access. */
 	struct intel_runtime_info __runtime; /* Use RUNTIME_INFO() to access. */
 	struct intel_driver_caps caps;
@@ -885,7 +891,6 @@ struct drm_i915_private {
 
 	struct pci_dev *bridge_dev;
 
-	struct intel_engine_cs *engine[I915_NUM_ENGINES];
 	struct rb_root uabi_engines;
 
 	struct resource mch_res;
@@ -1506,6 +1511,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define IS_ICL_REVID(p, since, until) \
 	(IS_ICELAKE(p) && IS_REVID(p, since, until))
 
+#define EHL_REVID_A0            0x0
+
+#define IS_EHL_REVID(p, since, until) \
+	(IS_ELKHARTLAKE(p) && IS_REVID(p, since, until))
+
 #define TGL_REVID_A0		0x0
 #define TGL_REVID_B0		0x1
 #define TGL_REVID_C0		0x2
@@ -1606,7 +1616,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_DDI(dev_priv)		 (INTEL_INFO(dev_priv)->display.has_ddi)
 #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->has_fpga_dbg)
 #define HAS_PSR(dev_priv)		 (INTEL_INFO(dev_priv)->display.has_psr)
-#define HAS_TRANSCODER_EDP(dev_priv)	 (INTEL_INFO(dev_priv)->trans_offsets[TRANSCODER_EDP] != 0)
+#define HAS_TRANSCODER(dev_priv, trans)	 ((INTEL_INFO(dev_priv)->cpu_transcoder_mask & BIT(trans)) != 0)
 
 #define HAS_RC6(dev_priv)		 (INTEL_INFO(dev_priv)->has_rc6)
 #define HAS_RC6p(dev_priv)		 (INTEL_INFO(dev_priv)->has_rc6p)
@@ -1740,6 +1750,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 			   unsigned long flags);
 #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0)
 #define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
+#define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
@@ -1913,4 +1924,16 @@ i915_coherent_map_type(struct drm_i915_private *i915)
 	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
 }
 
+static inline u64 i915_cs_timestamp_ns_to_ticks(struct drm_i915_private *i915, u64 val)
+{
+	return DIV_ROUND_UP_ULL(val * RUNTIME_INFO(i915)->cs_timestamp_frequency_hz,
+				1000000000);
+}
+
+static inline u64 i915_cs_timestamp_ticks_to_ns(struct drm_i915_private *i915, u64 val)
+{
+	return div_u64(val * 1000000000,
+		       RUNTIME_INFO(i915)->cs_timestamp_frequency_hz);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ca5420012a22..0cbcb9f54e7d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -118,7 +118,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	if (!atomic_read(&obj->bind_count))
+	if (list_empty(&obj->vma.list))
 		return 0;
 
 	/*
@@ -141,6 +141,11 @@ try_again:
 		if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK))
 			continue;
 
+		if (flags & I915_GEM_OBJECT_UNBIND_TEST) {
+			ret = -EBUSY;
+			break;
+		}
+
 		ret = -EAGAIN;
 		if (!i915_vm_tryopen(vm))
 			break;
@@ -993,18 +998,16 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			return ERR_PTR(ret);
 	}
 
+	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+	if (ret)
+		return ERR_PTR(ret);
+
 	if (vma->fence && !i915_gem_object_is_tiled(obj)) {
 		mutex_lock(&ggtt->vm.mutex);
-		ret = i915_vma_revoke_fence(vma);
+		i915_vma_revoke_fence(vma);
 		mutex_unlock(&ggtt->vm.mutex);
-		if (ret)
-			return ERR_PTR(ret);
 	}
 
-	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
-	if (ret)
-		return ERR_PTR(ret);
-
 	ret = i915_vma_wait_for_bind(vma);
 	if (ret) {
 		i915_vma_unpin(vma);
@@ -1156,7 +1159,6 @@ err_unlock:
 		/* Minimal basic recovery for KMS */
 		ret = i915_ggtt_enable_hw(dev_priv);
 		i915_ggtt_resume(&dev_priv->ggtt);
-		i915_gem_restore_fences(&dev_priv->ggtt);
 		intel_init_clock_gating(dev_priv);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 02ad1acd117c..6501939929d5 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -226,7 +226,12 @@ found:
 
 	while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) {
 		vma = container_of(node, struct i915_vma, node);
-		ret = __i915_vma_unbind(vma);
+
+		/* If we find any non-objects (!vma), we cannot evict them */
+		if (vma->node.color != I915_COLOR_UNEVICTABLE)
+			ret = __i915_vma_unbind(vma);
+		else
+			ret = -ENOSPC; /* XXX search failed, try again? */
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 54fce81d5724..d042644b9cd2 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -153,7 +153,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 			return -ENODEV;
 		break;
 	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
-		value = 1000 * RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+		value = RUNTIME_INFO(i915)->cs_timestamp_frequency_hz;
 		break;
 	case I915_PARAM_MMAP_GTT_COHERENT:
 		value = INTEL_INFO(i915)->has_coherent_ggtt;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5c8e51d2ba5b..eec292d06f11 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -467,14 +467,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, start %08x, head %08x, tail %08x\n",
+	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->context, erq->seqno,
 		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 			    &erq->flags) ? "!" : "",
 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 			    &erq->flags) ? "+" : "",
 		   erq->sched_attr.priority,
-		   erq->start, erq->head, erq->tail);
+		   erq->head, erq->tail);
 }
 
 static void error_print_context(struct drm_i915_error_state_buf *m,
@@ -1211,7 +1211,6 @@ static void record_request(const struct i915_request *request,
 	erq->context = request->fence.context;
 	erq->seqno = request->fence.seqno;
 	erq->sched_attr = request->sched.attr;
-	erq->start = i915_ggtt_offset(request->ring->vma);
 	erq->head = request->head;
 	erq->tail = request->tail;
 
@@ -1321,26 +1320,6 @@ capture_user(struct intel_engine_capture_vma *capture,
 	return capture;
 }
 
-static struct i915_vma_coredump *
-capture_object(const struct intel_gt *gt,
-	       struct drm_i915_gem_object *obj,
-	       const char *name,
-	       struct i915_vma_compress *compress)
-{
-	if (obj && i915_gem_object_has_pages(obj)) {
-		struct i915_vma fake = {
-			.node = { .start = U64_MAX, .size = obj->base.size },
-			.size = obj->base.size,
-			.pages = obj->mm.pages,
-			.obj = obj,
-		};
-
-		return i915_vma_coredump_create(gt, &fake, name, compress);
-	} else {
-		return NULL;
-	}
-}
-
 static void add_vma(struct intel_engine_coredump *ee,
 		    struct i915_vma_coredump *vma)
 {
@@ -1429,12 +1408,6 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
 					 engine->wa_ctx.vma,
 					 "WA context",
 					 compress));
-
-	add_vma(ee,
-		capture_object(engine->gt,
-			       engine->default_state,
-			       "NULL context",
-			       compress));
 }
 
 static struct intel_engine_coredump *
@@ -1860,7 +1833,7 @@ void i915_error_state_store(struct i915_gpu_coredump *error)
 		return;
 
 	i915 = error->i915;
-	dev_info(i915->drm.dev, "%s\n", error_msg(error));
+	drm_info(&i915->drm, "%s\n", error_msg(error));
 
 	if (error->simulated ||
 	    cmpxchg(&i915->gpu_error.first_error, NULL, error))
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 0d1f6c8ff355..76b80fbfb7e9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -42,7 +42,7 @@ struct i915_vma_coredump {
 	int num_pages;
 	int page_count;
 	int unused;
-	u32 *pages[0];
+	u32 *pages[];
 };
 
 struct i915_request_coredump {
@@ -50,7 +50,6 @@ struct i915_request_coredump {
 	pid_t pid;
 	u32 context;
 	u32 seqno;
-	u32 start;
 	u32 head;
 	u32 tail;
 	struct i915_sched_attr sched_attr;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8a2b83807ffc..4dc601dffc08 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -124,7 +124,6 @@ static const u32 hpd_status_i915[HPD_NUM_PINS] = {
 	[HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS,
 };
 
-/* BXT hpd list */
 static const u32 hpd_bxt[HPD_NUM_PINS] = {
 	[HPD_PORT_A] = BXT_DE_PORT_HP_DDIA,
 	[HPD_PORT_B] = BXT_DE_PORT_HP_DDIB,
@@ -168,6 +167,49 @@ static const u32 hpd_tgp[HPD_NUM_PINS] = {
 	[HPD_PORT_I] = SDE_TC_HOTPLUG_ICP(PORT_TC6),
 };
 
+static void intel_hpd_init_pins(struct drm_i915_private *dev_priv)
+{
+	struct i915_hotplug *hpd = &dev_priv->hotplug;
+
+	if (HAS_GMCH(dev_priv)) {
+		if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) ||
+		    IS_CHERRYVIEW(dev_priv))
+			hpd->hpd = hpd_status_g4x;
+		else
+			hpd->hpd = hpd_status_i915;
+		return;
+	}
+
+	if (INTEL_GEN(dev_priv) >= 12)
+		hpd->hpd = hpd_gen12;
+	else if (INTEL_GEN(dev_priv) >= 11)
+		hpd->hpd = hpd_gen11;
+	else if (IS_GEN9_LP(dev_priv))
+		hpd->hpd = hpd_bxt;
+	else if (INTEL_GEN(dev_priv) >= 8)
+		hpd->hpd = hpd_bdw;
+	else if (INTEL_GEN(dev_priv) >= 7)
+		hpd->hpd = hpd_ivb;
+	else
+		hpd->hpd = hpd_ilk;
+
+	if (!HAS_PCH_SPLIT(dev_priv) || HAS_PCH_NOP(dev_priv))
+		return;
+
+	if (HAS_PCH_TGP(dev_priv) || HAS_PCH_JSP(dev_priv))
+		hpd->pch_hpd = hpd_tgp;
+	else if (HAS_PCH_ICP(dev_priv) || HAS_PCH_MCC(dev_priv))
+		hpd->pch_hpd = hpd_icp;
+	else if (HAS_PCH_CNP(dev_priv) || HAS_PCH_SPT(dev_priv))
+		hpd->pch_hpd = hpd_spt;
+	else if (HAS_PCH_LPT(dev_priv) || HAS_PCH_CPT(dev_priv))
+		hpd->pch_hpd = hpd_cpt;
+	else if (HAS_PCH_IBX(dev_priv))
+		hpd->pch_hpd = hpd_ibx;
+	else
+		MISSING_CASE(INTEL_PCH_TYPE(dev_priv));
+}
+
 static void
 intel_handle_vblank(struct drm_i915_private *dev_priv, enum pipe pipe)
 {
@@ -1504,33 +1546,27 @@ static void i9xx_hpd_irq_handler(struct drm_i915_private *dev_priv,
 				 u32 hotplug_status)
 {
 	u32 pin_mask = 0, long_mask = 0;
+	u32 hotplug_trigger;
 
-	if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) ||
-	    IS_CHERRYVIEW(dev_priv)) {
-		u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X;
-
-		if (hotplug_trigger) {
-			intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
-					   hotplug_trigger, hotplug_trigger,
-					   hpd_status_g4x,
-					   i9xx_port_hotplug_long_detect);
+	if (IS_G4X(dev_priv) ||
+	    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X;
+	else
+		hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915;
 
-			intel_hpd_irq_handler(dev_priv, pin_mask, long_mask);
-		}
+	if (hotplug_trigger) {
+		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
+				   hotplug_trigger, hotplug_trigger,
+				   dev_priv->hotplug.hpd,
+				   i9xx_port_hotplug_long_detect);
 
-		if (hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X)
-			dp_aux_irq_handler(dev_priv);
-	} else {
-		u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915;
-
-		if (hotplug_trigger) {
-			intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
-					   hotplug_trigger, hotplug_trigger,
-					   hpd_status_i915,
-					   i9xx_port_hotplug_long_detect);
-			intel_hpd_irq_handler(dev_priv, pin_mask, long_mask);
-		}
+		intel_hpd_irq_handler(dev_priv, pin_mask, long_mask);
 	}
+
+	if ((IS_G4X(dev_priv) ||
+	     IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+	    hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X)
+		dp_aux_irq_handler(dev_priv);
 }
 
 static irqreturn_t valleyview_irq_handler(int irq, void *arg)
@@ -1696,8 +1732,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 }
 
 static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv,
-				u32 hotplug_trigger,
-				const u32 hpd[HPD_NUM_PINS])
+				u32 hotplug_trigger)
 {
 	u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0;
 
@@ -1720,8 +1755,9 @@ static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv,
 	if (!hotplug_trigger)
 		return;
 
-	intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger,
-			   dig_hotplug_reg, hpd,
+	intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
+			   hotplug_trigger, dig_hotplug_reg,
+			   dev_priv->hotplug.pch_hpd,
 			   pch_port_hotplug_long_detect);
 
 	intel_hpd_irq_handler(dev_priv, pin_mask, long_mask);
@@ -1732,7 +1768,7 @@ static void ibx_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 	enum pipe pipe;
 	u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK;
 
-	ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ibx);
+	ibx_hpd_irq_handler(dev_priv, hotplug_trigger);
 
 	if (pch_iir & SDE_AUDIO_POWER_MASK) {
 		int port = ffs((pch_iir & SDE_AUDIO_POWER_MASK) >>
@@ -1820,7 +1856,7 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 	enum pipe pipe;
 	u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_CPT;
 
-	ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_cpt);
+	ibx_hpd_irq_handler(dev_priv, hotplug_trigger);
 
 	if (pch_iir & SDE_AUDIO_POWER_MASK_CPT) {
 		int port = ffs((pch_iir & SDE_AUDIO_POWER_MASK_CPT) >>
@@ -1857,22 +1893,18 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 	u32 ddi_hotplug_trigger, tc_hotplug_trigger;
 	u32 pin_mask = 0, long_mask = 0;
 	bool (*tc_port_hotplug_long_detect)(enum hpd_pin pin, u32 val);
-	const u32 *pins;
 
 	if (HAS_PCH_TGP(dev_priv)) {
 		ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP;
 		tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP;
 		tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect;
-		pins = hpd_tgp;
 	} else if (HAS_PCH_JSP(dev_priv)) {
 		ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP;
 		tc_hotplug_trigger = 0;
-		pins = hpd_tgp;
 	} else if (HAS_PCH_MCC(dev_priv)) {
 		ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
 		tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1);
 		tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect;
-		pins = hpd_icp;
 	} else {
 		drm_WARN(&dev_priv->drm, !HAS_PCH_ICP(dev_priv),
 			 "Unrecognized PCH type 0x%x\n",
@@ -1881,7 +1913,6 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 		ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
 		tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP;
 		tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect;
-		pins = hpd_icp;
 	}
 
 	if (ddi_hotplug_trigger) {
@@ -1891,8 +1922,8 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 		I915_WRITE(SHOTPLUG_CTL_DDI, dig_hotplug_reg);
 
 		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
-				   ddi_hotplug_trigger,
-				   dig_hotplug_reg, pins,
+				   ddi_hotplug_trigger, dig_hotplug_reg,
+				   dev_priv->hotplug.pch_hpd,
 				   icp_ddi_port_hotplug_long_detect);
 	}
 
@@ -1903,8 +1934,8 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 		I915_WRITE(SHOTPLUG_CTL_TC, dig_hotplug_reg);
 
 		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
-				   tc_hotplug_trigger,
-				   dig_hotplug_reg, pins,
+				   tc_hotplug_trigger, dig_hotplug_reg,
+				   dev_priv->hotplug.pch_hpd,
 				   tc_port_hotplug_long_detect);
 	}
 
@@ -1929,7 +1960,8 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 		I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg);
 
 		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
-				   hotplug_trigger, dig_hotplug_reg, hpd_spt,
+				   hotplug_trigger, dig_hotplug_reg,
+				   dev_priv->hotplug.pch_hpd,
 				   spt_port_hotplug_long_detect);
 	}
 
@@ -1940,7 +1972,8 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 		I915_WRITE(PCH_PORT_HOTPLUG2, dig_hotplug_reg);
 
 		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
-				   hotplug2_trigger, dig_hotplug_reg, hpd_spt,
+				   hotplug2_trigger, dig_hotplug_reg,
+				   dev_priv->hotplug.pch_hpd,
 				   spt_port_hotplug2_long_detect);
 	}
 
@@ -1952,16 +1985,16 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 }
 
 static void ilk_hpd_irq_handler(struct drm_i915_private *dev_priv,
-				u32 hotplug_trigger,
-				const u32 hpd[HPD_NUM_PINS])
+				u32 hotplug_trigger)
 {
 	u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0;
 
 	dig_hotplug_reg = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL);
 	I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, dig_hotplug_reg);
 
-	intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger,
-			   dig_hotplug_reg, hpd,
+	intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
+			   hotplug_trigger, dig_hotplug_reg,
+			   dev_priv->hotplug.hpd,
 			   ilk_port_hotplug_long_detect);
 
 	intel_hpd_irq_handler(dev_priv, pin_mask, long_mask);
@@ -1974,7 +2007,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv,
 	u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG;
 
 	if (hotplug_trigger)
-		ilk_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ilk);
+		ilk_hpd_irq_handler(dev_priv, hotplug_trigger);
 
 	if (de_iir & DE_AUX_CHANNEL_A)
 		dp_aux_irq_handler(dev_priv);
@@ -2020,7 +2053,7 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv,
 	u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG_IVB;
 
 	if (hotplug_trigger)
-		ilk_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ivb);
+		ilk_hpd_irq_handler(dev_priv, hotplug_trigger);
 
 	if (de_iir & DE_ERR_INT_IVB)
 		ivb_err_int_handler(dev_priv);
@@ -2130,16 +2163,16 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg)
 }
 
 static void bxt_hpd_irq_handler(struct drm_i915_private *dev_priv,
-				u32 hotplug_trigger,
-				const u32 hpd[HPD_NUM_PINS])
+				u32 hotplug_trigger)
 {
 	u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0;
 
 	dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG);
 	I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg);
 
-	intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger,
-			   dig_hotplug_reg, hpd,
+	intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
+			   hotplug_trigger, dig_hotplug_reg,
+			   dev_priv->hotplug.hpd,
 			   bxt_port_hotplug_long_detect);
 
 	intel_hpd_irq_handler(dev_priv, pin_mask, long_mask);
@@ -2151,15 +2184,11 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir)
 	u32 trigger_tc = iir & GEN11_DE_TC_HOTPLUG_MASK;
 	u32 trigger_tbt = iir & GEN11_DE_TBT_HOTPLUG_MASK;
 	long_pulse_detect_func long_pulse_detect;
-	const u32 *hpd;
 
-	if (INTEL_GEN(dev_priv) >= 12) {
+	if (INTEL_GEN(dev_priv) >= 12)
 		long_pulse_detect = gen12_port_hotplug_long_detect;
-		hpd = hpd_gen12;
-	} else {
+	else
 		long_pulse_detect = gen11_port_hotplug_long_detect;
-		hpd = hpd_gen11;
-	}
 
 	if (trigger_tc) {
 		u32 dig_hotplug_reg;
@@ -2167,8 +2196,10 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir)
 		dig_hotplug_reg = I915_READ(GEN11_TC_HOTPLUG_CTL);
 		I915_WRITE(GEN11_TC_HOTPLUG_CTL, dig_hotplug_reg);
 
-		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tc,
-				   dig_hotplug_reg, hpd, long_pulse_detect);
+		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
+				   trigger_tc, dig_hotplug_reg,
+				   dev_priv->hotplug.hpd,
+				   long_pulse_detect);
 	}
 
 	if (trigger_tbt) {
@@ -2177,8 +2208,10 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir)
 		dig_hotplug_reg = I915_READ(GEN11_TBT_HOTPLUG_CTL);
 		I915_WRITE(GEN11_TBT_HOTPLUG_CTL, dig_hotplug_reg);
 
-		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tbt,
-				   dig_hotplug_reg, hpd, long_pulse_detect);
+		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
+				   trigger_tbt, dig_hotplug_reg,
+				   dev_priv->hotplug.hpd,
+				   long_pulse_detect);
 	}
 
 	if (pin_mask)
@@ -2309,15 +2342,13 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl)
 			if (IS_GEN9_LP(dev_priv)) {
 				tmp_mask = iir & BXT_DE_PORT_HOTPLUG_MASK;
 				if (tmp_mask) {
-					bxt_hpd_irq_handler(dev_priv, tmp_mask,
-							    hpd_bxt);
+					bxt_hpd_irq_handler(dev_priv, tmp_mask);
 					found = true;
 				}
 			} else if (IS_BROADWELL(dev_priv)) {
 				tmp_mask = iir & GEN8_PORT_DP_A_HOTPLUG;
 				if (tmp_mask) {
-					ilk_hpd_irq_handler(dev_priv,
-							    tmp_mask, hpd_bdw);
+					ilk_hpd_irq_handler(dev_priv, tmp_mask);
 					found = true;
 				}
 			}
@@ -2870,6 +2901,14 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv)
 
 	if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
 		GEN3_IRQ_RESET(uncore, SDE);
+
+	/* Wa_14010685332:icl */
+	if (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) {
+		intel_uncore_rmw(uncore, SOUTH_CHICKEN1,
+				 SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS);
+		intel_uncore_rmw(uncore, SOUTH_CHICKEN1,
+				 SBCLK_RUN_REFCLK_DIS, 0);
+	}
 }
 
 static void gen11_irq_reset(struct drm_i915_private *dev_priv)
@@ -2989,13 +3028,12 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
 	u32 hotplug_irqs, enabled_irqs;
 
-	if (HAS_PCH_IBX(dev_priv)) {
+	if (HAS_PCH_IBX(dev_priv))
 		hotplug_irqs = SDE_HOTPLUG_MASK;
-		enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx);
-	} else {
+	else
 		hotplug_irqs = SDE_HOTPLUG_MASK_CPT;
-		enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt);
-	}
+
+	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd);
 
 	ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
 
@@ -3021,13 +3059,12 @@ static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv,
 
 static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv,
 			      u32 sde_ddi_mask, u32 sde_tc_mask,
-			      u32 ddi_enable_mask, u32 tc_enable_mask,
-			      const u32 *pins)
+			      u32 ddi_enable_mask, u32 tc_enable_mask)
 {
 	u32 hotplug_irqs, enabled_irqs;
 
 	hotplug_irqs = sde_ddi_mask | sde_tc_mask;
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, pins);
+	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd);
 
 	I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ);
 
@@ -3044,8 +3081,7 @@ static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
 	icp_hpd_irq_setup(dev_priv,
 			  SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1),
-			  ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1),
-			  hpd_icp);
+			  ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1));
 }
 
 /*
@@ -3057,8 +3093,7 @@ static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
 	icp_hpd_irq_setup(dev_priv,
 			  SDE_DDI_MASK_TGP, 0,
-			  TGP_DDI_HPD_ENABLE_MASK, 0,
-			  hpd_tgp);
+			  TGP_DDI_HPD_ENABLE_MASK, 0);
 }
 
 static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv)
@@ -3083,11 +3118,9 @@ static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv)
 static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
 	u32 hotplug_irqs, enabled_irqs;
-	const u32 *hpd;
 	u32 val;
 
-	hpd = INTEL_GEN(dev_priv) >= 12 ? hpd_gen12 : hpd_gen11;
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd);
+	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd);
 	hotplug_irqs = GEN11_DE_TC_HOTPLUG_MASK | GEN11_DE_TBT_HOTPLUG_MASK;
 
 	val = I915_READ(GEN11_DE_HPD_IMR);
@@ -3099,12 +3132,10 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv)
 
 	if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP)
 		icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, SDE_TC_MASK_TGP,
-				  TGP_DDI_HPD_ENABLE_MASK,
-				  TGP_TC_HPD_ENABLE_MASK, hpd_tgp);
+				  TGP_DDI_HPD_ENABLE_MASK, TGP_TC_HPD_ENABLE_MASK);
 	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
 		icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_MASK_ICP,
-				  ICP_DDI_HPD_ENABLE_MASK,
-				  ICP_TC_HPD_ENABLE_MASK, hpd_icp);
+				  ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE_MASK);
 }
 
 static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv)
@@ -3140,7 +3171,7 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv)
 		I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ);
 
 	hotplug_irqs = SDE_HOTPLUG_MASK_SPT;
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt);
+	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd);
 
 	ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
 
@@ -3169,17 +3200,17 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv)
 
 	if (INTEL_GEN(dev_priv) >= 8) {
 		hotplug_irqs = GEN8_PORT_DP_A_HOTPLUG;
-		enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bdw);
+		enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd);
 
 		bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs);
 	} else if (INTEL_GEN(dev_priv) >= 7) {
 		hotplug_irqs = DE_DP_A_HOTPLUG_IVB;
-		enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ivb);
+		enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd);
 
 		ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs);
 	} else {
 		hotplug_irqs = DE_DP_A_HOTPLUG;
-		enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ilk);
+		enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd);
 
 		ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs);
 	}
@@ -3230,7 +3261,7 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
 	u32 hotplug_irqs, enabled_irqs;
 
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt);
+	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd);
 	hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK;
 
 	bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs);
@@ -3646,7 +3677,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->engine[RCS0]);
+			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -3751,7 +3782,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		I915_WRITE(GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->engine[RCS0]);
+			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -3893,10 +3924,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		I915_WRITE(GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->engine[RCS0]);
+			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]);
 
 		if (iir & I915_BSD_USER_INTERRUPT)
-			intel_engine_signal_breadcrumbs(dev_priv->engine[VCS0]);
+			intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]);
 
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -3924,6 +3955,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	struct drm_device *dev = &dev_priv->drm;
 	int i;
 
+	intel_hpd_init_pins(dev_priv);
+
 	intel_hpd_init_work(dev_priv);
 
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivb_parity_work);
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 2c80a0194c80..eb0b5be7c35d 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -160,6 +160,7 @@
 	GEN(2), \
 	.is_mobile = 1, \
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \
 	.display.has_overlay = 1, \
 	.display.cursor_needs_physical = 1, \
 	.display.overlay_needs_physical = 1, \
@@ -170,6 +171,7 @@
 	.engine_mask = BIT(RCS0), \
 	.has_snoop = true, \
 	.has_coherent_ggtt = false, \
+	.dma_mask_size = 32, \
 	I9XX_PIPE_OFFSETS, \
 	I9XX_CURSOR_OFFSETS, \
 	I9XX_COLORS, \
@@ -179,6 +181,7 @@
 #define I845_FEATURES \
 	GEN(2), \
 	.pipe_mask = BIT(PIPE_A), \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A), \
 	.display.has_overlay = 1, \
 	.display.overlay_needs_physical = 1, \
 	.display.has_gmch = 1, \
@@ -188,6 +191,7 @@
 	.engine_mask = BIT(RCS0), \
 	.has_snoop = true, \
 	.has_coherent_ggtt = false, \
+	.dma_mask_size = 32, \
 	I845_PIPE_OFFSETS, \
 	I845_CURSOR_OFFSETS, \
 	I9XX_COLORS, \
@@ -218,11 +222,13 @@ static const struct intel_device_info i865g_info = {
 #define GEN3_FEATURES \
 	GEN(3), \
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \
 	.display.has_gmch = 1, \
 	.gpu_reset_clobbers_display = true, \
 	.engine_mask = BIT(RCS0), \
 	.has_snoop = true, \
 	.has_coherent_ggtt = true, \
+	.dma_mask_size = 32, \
 	I9XX_PIPE_OFFSETS, \
 	I9XX_CURSOR_OFFSETS, \
 	I9XX_COLORS, \
@@ -283,6 +289,7 @@ static const struct intel_device_info g33_info = {
 	PLATFORM(INTEL_G33),
 	.display.has_hotplug = 1,
 	.display.has_overlay = 1,
+	.dma_mask_size = 36,
 };
 
 static const struct intel_device_info pnv_g_info = {
@@ -290,6 +297,7 @@ static const struct intel_device_info pnv_g_info = {
 	PLATFORM(INTEL_PINEVIEW),
 	.display.has_hotplug = 1,
 	.display.has_overlay = 1,
+	.dma_mask_size = 36,
 };
 
 static const struct intel_device_info pnv_m_info = {
@@ -298,17 +306,20 @@ static const struct intel_device_info pnv_m_info = {
 	.is_mobile = 1,
 	.display.has_hotplug = 1,
 	.display.has_overlay = 1,
+	.dma_mask_size = 36,
 };
 
 #define GEN4_FEATURES \
 	GEN(4), \
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \
 	.display.has_hotplug = 1, \
 	.display.has_gmch = 1, \
 	.gpu_reset_clobbers_display = true, \
 	.engine_mask = BIT(RCS0), \
 	.has_snoop = true, \
 	.has_coherent_ggtt = true, \
+	.dma_mask_size = 36, \
 	I9XX_PIPE_OFFSETS, \
 	I9XX_CURSOR_OFFSETS, \
 	I965_COLORS, \
@@ -354,12 +365,14 @@ static const struct intel_device_info gm45_info = {
 #define GEN5_FEATURES \
 	GEN(5), \
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \
 	.display.has_hotplug = 1, \
 	.engine_mask = BIT(RCS0) | BIT(VCS0), \
 	.has_snoop = true, \
 	.has_coherent_ggtt = true, \
 	/* ilk does support rc6, but we do not implement [power] contexts */ \
 	.has_rc6 = 0, \
+	.dma_mask_size = 36, \
 	I9XX_PIPE_OFFSETS, \
 	I9XX_CURSOR_OFFSETS, \
 	ILK_COLORS, \
@@ -381,6 +394,7 @@ static const struct intel_device_info ilk_m_info = {
 #define GEN6_FEATURES \
 	GEN(6), \
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \
 	.display.has_hotplug = 1, \
 	.display.has_fbc = 1, \
 	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
@@ -389,6 +403,7 @@ static const struct intel_device_info ilk_m_info = {
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_rps = true, \
+	.dma_mask_size = 40, \
 	.ppgtt_type = INTEL_PPGTT_ALIASING, \
 	.ppgtt_size = 31, \
 	I9XX_PIPE_OFFSETS, \
@@ -430,6 +445,7 @@ static const struct intel_device_info snb_m_gt2_info = {
 #define GEN7_FEATURES  \
 	GEN(7), \
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), \
 	.display.has_hotplug = 1, \
 	.display.has_fbc = 1, \
 	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
@@ -438,6 +454,7 @@ static const struct intel_device_info snb_m_gt2_info = {
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_rps = true, \
+	.dma_mask_size = 40, \
 	.ppgtt_type = INTEL_PPGTT_ALIASING, \
 	.ppgtt_size = 31, \
 	IVB_PIPE_OFFSETS, \
@@ -482,6 +499,7 @@ static const struct intel_device_info ivb_q_info = {
 	PLATFORM(INTEL_IVYBRIDGE),
 	.gt = 2,
 	.pipe_mask = 0, /* legal, last one wins */
+	.cpu_transcoder_mask = 0,
 	.has_l3_dpf = 1,
 };
 
@@ -490,11 +508,13 @@ static const struct intel_device_info vlv_info = {
 	GEN(7),
 	.is_lp = 1,
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B),
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B),
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
 	.has_rps = true,
 	.display.has_gmch = 1,
 	.display.has_hotplug = 1,
+	.dma_mask_size = 40,
 	.ppgtt_type = INTEL_PPGTT_ALIASING,
 	.ppgtt_size = 31,
 	.has_snoop = true,
@@ -511,6 +531,8 @@ static const struct intel_device_info vlv_info = {
 #define G75_FEATURES  \
 	GEN7_FEATURES, \
 	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \
+		BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP), \
 	.display.has_ddi = 1, \
 	.has_fpga_dbg = 1, \
 	.display.has_psr = 1, \
@@ -543,6 +565,7 @@ static const struct intel_device_info hsw_gt3_info = {
 	G75_FEATURES, \
 	GEN(8), \
 	.has_logical_ring_contexts = 1, \
+	.dma_mask_size = 39, \
 	.ppgtt_type = INTEL_PPGTT_FULL, \
 	.ppgtt_size = 48, \
 	.has_64bit_reloc = 1, \
@@ -581,6 +604,7 @@ static const struct intel_device_info chv_info = {
 	PLATFORM(INTEL_CHERRYVIEW),
 	GEN(8),
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C),
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C),
 	.display.has_hotplug = 1,
 	.is_lp = 1,
 	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0),
@@ -590,7 +614,8 @@ static const struct intel_device_info chv_info = {
 	.has_rps = true,
 	.has_logical_ring_contexts = 1,
 	.display.has_gmch = 1,
-	.ppgtt_type = INTEL_PPGTT_ALIASING,
+	.dma_mask_size = 39,
+	.ppgtt_type = INTEL_PPGTT_FULL,
 	.ppgtt_size = 32,
 	.has_reset_engine = 1,
 	.has_snoop = true,
@@ -656,6 +681,9 @@ static const struct intel_device_info skl_gt4_info = {
 	.display.has_hotplug = 1, \
 	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \
 	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \
+		BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP) | \
+		BIT(TRANSCODER_DSI_A) | BIT(TRANSCODER_DSI_C), \
 	.has_64bit_reloc = 1, \
 	.display.has_ddi = 1, \
 	.has_fpga_dbg = 1, \
@@ -670,6 +698,7 @@ static const struct intel_device_info skl_gt4_info = {
 	.has_logical_ring_contexts = 1, \
 	.has_logical_ring_preemption = 1, \
 	.has_gt_uc = 1, \
+	.dma_mask_size = 39, \
 	.ppgtt_type = INTEL_PPGTT_FULL, \
 	.ppgtt_size = 48, \
 	.has_reset_engine = 1, \
@@ -759,6 +788,9 @@ static const struct intel_device_info cnl_info = {
 #define GEN11_FEATURES \
 	GEN10_FEATURES, \
 	GEN11_DEFAULT_PAGE_SIZES, \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \
+		BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP) | \
+		BIT(TRANSCODER_DSI_0) | BIT(TRANSCODER_DSI_1), \
 	.pipe_offsets = { \
 		[TRANSCODER_A] = PIPE_A_OFFSET, \
 		[TRANSCODER_B] = PIPE_B_OFFSET, \
@@ -799,6 +831,10 @@ static const struct intel_device_info ehl_info = {
 #define GEN12_FEATURES \
 	GEN11_FEATURES, \
 	GEN(12), \
+	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \
+	.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \
+		BIT(TRANSCODER_C) | BIT(TRANSCODER_D) | \
+		BIT(TRANSCODER_DSI_0) | BIT(TRANSCODER_DSI_1), \
 	.pipe_offsets = { \
 		[TRANSCODER_A] = PIPE_A_OFFSET, \
 		[TRANSCODER_B] = PIPE_B_OFFSET, \
@@ -822,7 +858,6 @@ static const struct intel_device_info ehl_info = {
 static const struct intel_device_info tgl_info = {
 	GEN12_FEATURES,
 	PLATFORM(INTEL_TIGERLAKE),
-	.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D),
 	.display.has_modular_fia = 1,
 	.engine_mask =
 		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
@@ -920,8 +955,6 @@ static void i915_pci_remove(struct pci_dev *pdev)
 
 	i915_driver_remove(i915);
 	pci_set_drvdata(pdev, NULL);
-
-	drm_dev_put(&i915->drm);
 }
 
 /* is device_id present in comma separated list of ids */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index cf2c01f17da8..75c60c2afb7e 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -204,21 +204,6 @@
 
 #include "i915_drv.h"
 #include "i915_perf.h"
-#include "oa/i915_oa_hsw.h"
-#include "oa/i915_oa_bdw.h"
-#include "oa/i915_oa_chv.h"
-#include "oa/i915_oa_sklgt2.h"
-#include "oa/i915_oa_sklgt3.h"
-#include "oa/i915_oa_sklgt4.h"
-#include "oa/i915_oa_bxt.h"
-#include "oa/i915_oa_kblgt2.h"
-#include "oa/i915_oa_kblgt3.h"
-#include "oa/i915_oa_glk.h"
-#include "oa/i915_oa_cflgt2.h"
-#include "oa/i915_oa_cflgt3.h"
-#include "oa/i915_oa_cnl.h"
-#include "oa/i915_oa_icl.h"
-#include "oa/i915_oa_tgl.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
  * is currently generally designed assuming the largest 16M size is used such
@@ -238,26 +223,17 @@
  *
  * Although this can be observed explicitly while copying reports to userspace
  * by checking for a zeroed report-id field in tail reports, we want to account
- * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
- * read() attempts.
- *
- * In effect we define a tail pointer for reading that lags the real tail
- * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
- * time for the corresponding reports to become visible to the CPU.
- *
- * To manage this we actually track two tail pointers:
- *  1) An 'aging' tail with an associated timestamp that is tracked until we
- *     can trust the corresponding data is visible to the CPU; at which point
- *     it is considered 'aged'.
- *  2) An 'aged' tail that can be used for read()ing.
- *
- * The two separate pointers let us decouple read()s from tail pointer aging.
- *
- * The tail pointers are checked and updated at a limited rate within a hrtimer
- * callback (the same callback that is used for delivering EPOLLIN events)
- *
- * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
- * indicates that an updated tail pointer is needed.
+ * for this earlier, as part of the oa_buffer_check_unlocked to avoid lots of
+ * redundant read() attempts.
+ *
+ * We workaround this issue in oa_buffer_check_unlocked() by reading the reports
+ * in the OA buffer, starting from the tail reported by the HW until we find a
+ * report with its first 2 dwords not 0 meaning its previous report is
+ * completely in memory and ready to be read. Those dwords are also set to 0
+ * once read and the whole buffer is cleared upon OA buffer initialization. The
+ * first dword is the reason for this report while the second is the timestamp,
+ * making the chances of having those 2 fields at 0 fairly unlikely. A more
+ * detailed explanation is available in oa_buffer_check_unlocked().
  *
  * Most of the implementation details for this workaround are in
  * oa_buffer_check_unlocked() and _append_oa_reports()
@@ -272,11 +248,11 @@
 #define OA_TAIL_MARGIN_NSEC	100000ULL
 #define INVALID_TAIL_PTR	0xffffffff
 
-/* frequency for checking whether the OA unit has written new reports to the
- * circular OA buffer...
+/* The default frequency for checking whether the OA unit has written new
+ * reports to the circular OA buffer...
  */
-#define POLL_FREQUENCY 200
-#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
+#define DEFAULT_POLL_FREQUENCY_HZ 200
+#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
 
 /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
 static u32 i915_perf_stream_paranoid = true;
@@ -359,6 +335,12 @@ static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
  * @oa_periodic: Whether to enable periodic OA unit sampling
  * @oa_period_exponent: The OA unit sampling period is derived from this
  * @engine: The engine (typically rcs0) being monitored by the OA unit
+ * @has_sseu: Whether @sseu was specified by userspace
+ * @sseu: internal SSEU configuration computed either from the userspace
+ *        specified configuration in the opening parameters or a default value
+ *        (see get_default_sseu_config())
+ * @poll_oa_period: The period in nanoseconds at which the CPU will check for OA
+ * data availability
  *
  * As read_properties_unlocked() enumerates and validates the properties given
  * to open a stream of metrics the configuration is built up in the structure
@@ -378,6 +360,11 @@ struct perf_open_properties {
 	int oa_period_exponent;
 
 	struct intel_engine_cs *engine;
+
+	bool has_sseu;
+	struct intel_sseu sseu;
+
+	u64 poll_oa_period;
 };
 
 struct i915_oa_config_bo {
@@ -409,10 +396,7 @@ i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 	struct i915_oa_config *oa_config;
 
 	rcu_read_lock();
-	if (metrics_set == 1)
-		oa_config = &perf->test_config;
-	else
-		oa_config = idr_find(&perf->metrics_idr, metrics_set);
+	oa_config = idr_find(&perf->metrics_idr, metrics_set);
 	if (oa_config)
 		oa_config = i915_oa_config_get(oa_config);
 	rcu_read_unlock();
@@ -465,8 +449,8 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
  * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
  *
  * Besides returning true when there is data available to read() this function
- * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
- * and .aged_tail_idx state used for reading.
+ * also updates the tail, aging_tail and aging_timestamp in the oa_buffer
+ * object.
  *
  * Note: It's safe to read OA config state here unlocked, assuming that this is
  * only called while the stream is enabled, while the global OA configuration
@@ -476,28 +460,19 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
  */
 static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 {
+	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
 	int report_size = stream->oa_buffer.format_size;
 	unsigned long flags;
-	unsigned int aged_idx;
-	u32 head, hw_tail, aged_tail, aging_tail;
+	bool pollin;
+	u32 hw_tail;
 	u64 now;
 
 	/* We have to consider the (unlikely) possibility that read() errors
-	 * could result in an OA buffer reset which might reset the head,
-	 * tails[] and aged_tail state.
+	 * could result in an OA buffer reset which might reset the head and
+	 * tail state.
 	 */
 	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
 
-	/* NB: The head we observe here might effectively be a little out of
-	 * date (between head and tails[aged_idx].offset if there is currently
-	 * a read() in progress.
-	 */
-	head = stream->oa_buffer.head;
-
-	aged_idx = stream->oa_buffer.aged_tail_idx;
-	aged_tail = stream->oa_buffer.tails[aged_idx].offset;
-	aging_tail = stream->oa_buffer.tails[!aged_idx].offset;
-
 	hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
 
 	/* The tail pointer increases in 64 byte increments,
@@ -507,64 +482,63 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 
 	now = ktime_get_mono_fast_ns();
 
-	/* Update the aged tail
-	 *
-	 * Flip the tail pointer available for read()s once the aging tail is
-	 * old enough to trust that the corresponding data will be visible to
-	 * the CPU...
-	 *
-	 * Do this before updating the aging pointer in case we may be able to
-	 * immediately start aging a new pointer too (if new data has become
-	 * available) without needing to wait for a later hrtimer callback.
-	 */
-	if (aging_tail != INVALID_TAIL_PTR &&
-	    ((now - stream->oa_buffer.aging_timestamp) >
-	     OA_TAIL_MARGIN_NSEC)) {
-
-		aged_idx ^= 1;
-		stream->oa_buffer.aged_tail_idx = aged_idx;
+	if (hw_tail == stream->oa_buffer.aging_tail &&
+	    (now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) {
+		/* If the HW tail hasn't move since the last check and the HW
+		 * tail has been aging for long enough, declare it the new
+		 * tail.
+		 */
+		stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
+	} else {
+		u32 head, tail, aged_tail;
 
-		aged_tail = aging_tail;
+		/* NB: The head we observe here might effectively be a little
+		 * out of date. If a read() is in progress, the head could be
+		 * anywhere between this head and stream->oa_buffer.tail.
+		 */
+		head = stream->oa_buffer.head - gtt_offset;
+		aged_tail = stream->oa_buffer.tail - gtt_offset;
+
+		hw_tail -= gtt_offset;
+		tail = hw_tail;
+
+		/* Walk the stream backward until we find a report with dword 0
+		 * & 1 not at 0. Since the circular buffer pointers progress by
+		 * increments of 64 bytes and that reports can be up to 256
+		 * bytes long, we can't tell whether a report has fully landed
+		 * in memory before the first 2 dwords of the following report
+		 * have effectively landed.
+		 *
+		 * This is assuming that the writes of the OA unit land in
+		 * memory in the order they were written to.
+		 * If not : (╯°□°）╯︵ ┻━┻
+		 */
+		while (OA_TAKEN(tail, aged_tail) >= report_size) {
+			u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail);
 
-		/* Mark that we need a new pointer to start aging... */
-		stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
-		aging_tail = INVALID_TAIL_PTR;
-	}
+			if (report32[0] != 0 || report32[1] != 0)
+				break;
 
-	/* Update the aging tail
-	 *
-	 * We throttle aging tail updates until we have a new tail that
-	 * represents >= one report more data than is already available for
-	 * reading. This ensures there will be enough data for a successful
-	 * read once this new pointer has aged and ensures we will give the new
-	 * pointer time to age.
-	 */
-	if (aging_tail == INVALID_TAIL_PTR &&
-	    (aged_tail == INVALID_TAIL_PTR ||
-	     OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
-		struct i915_vma *vma = stream->oa_buffer.vma;
-		u32 gtt_offset = i915_ggtt_offset(vma);
-
-		/* Be paranoid and do a bounds check on the pointer read back
-		 * from hardware, just in case some spurious hardware condition
-		 * could put the tail out of bounds...
-		 */
-		if (hw_tail >= gtt_offset &&
-		    hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
-			stream->oa_buffer.tails[!aged_idx].offset =
-				aging_tail = hw_tail;
-			stream->oa_buffer.aging_timestamp = now;
-		} else {
-			drm_err(&stream->perf->i915->drm,
-				"Ignoring spurious out of range OA buffer tail pointer = %x\n",
-				hw_tail);
+			tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
 		}
+
+		if (OA_TAKEN(hw_tail, tail) > report_size &&
+		    __ratelimit(&stream->perf->tail_pointer_race))
+			DRM_NOTE("unlanded report(s) head=0x%x "
+				 "tail=0x%x hw_tail=0x%x\n",
+				 head, tail, hw_tail);
+
+		stream->oa_buffer.tail = gtt_offset + tail;
+		stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
+		stream->oa_buffer.aging_timestamp = now;
 	}
 
+	pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
+			  stream->oa_buffer.head - gtt_offset) >= report_size;
+
 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
-	return aged_tail == INVALID_TAIL_PTR ?
-		false : OA_TAKEN(aged_tail, head) >= report_size;
+	return pollin;
 }
 
 /**
@@ -682,7 +656,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	u32 mask = (OA_BUFFER_SIZE - 1);
 	size_t start_offset = *offset;
 	unsigned long flags;
-	unsigned int aged_tail_idx;
 	u32 head, tail;
 	u32 taken;
 	int ret = 0;
@@ -693,19 +666,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
 
 	head = stream->oa_buffer.head;
-	aged_tail_idx = stream->oa_buffer.aged_tail_idx;
-	tail = stream->oa_buffer.tails[aged_tail_idx].offset;
+	tail = stream->oa_buffer.tail;
 
 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
 	/*
-	 * An invalid tail pointer here means we're still waiting for the poll
-	 * hrtimer callback to give us a pointer
-	 */
-	if (tail == INVALID_TAIL_PTR)
-		return -EAGAIN;
-
-	/*
 	 * NB: oa_buffer.head/tail include the gtt_offset which we don't want
 	 * while indexing relative to oa_buf_base.
 	 */
@@ -838,13 +803,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		}
 
 		/*
-		 * The above reason field sanity check is based on
-		 * the assumption that the OA buffer is initially
-		 * zeroed and we reset the field after copying so the
-		 * check is still meaningful once old reports start
-		 * being overwritten.
+		 * Clear out the first 2 dword as a mean to detect unlanded
+		 * reports.
 		 */
 		report32[0] = 0;
+		report32[1] = 0;
 	}
 
 	if (start_offset != *offset) {
@@ -985,7 +948,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 	u32 mask = (OA_BUFFER_SIZE - 1);
 	size_t start_offset = *offset;
 	unsigned long flags;
-	unsigned int aged_tail_idx;
 	u32 head, tail;
 	u32 taken;
 	int ret = 0;
@@ -996,17 +958,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
 
 	head = stream->oa_buffer.head;
-	aged_tail_idx = stream->oa_buffer.aged_tail_idx;
-	tail = stream->oa_buffer.tails[aged_tail_idx].offset;
+	tail = stream->oa_buffer.tail;
 
 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
-	/* An invalid tail pointer here means we're still waiting for the poll
-	 * hrtimer callback to give us a pointer
-	 */
-	if (tail == INVALID_TAIL_PTR)
-		return -EAGAIN;
-
 	/* NB: oa_buffer.head/tail include the gtt_offset which we don't want
 	 * while indexing relative to oa_buf_base.
 	 */
@@ -1064,13 +1019,11 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 		if (ret)
 			break;
 
-		/* The above report-id field sanity check is based on
-		 * the assumption that the OA buffer is initially
-		 * zeroed and we reset the field after copying so the
-		 * check is still meaningful once old reports start
-		 * being overwritten.
+		/* Clear out the first 2 dwords as a mean to detect unlanded
+		 * reports.
 		 */
 		report32[0] = 0;
+		report32[1] = 0;
 	}
 
 	if (start_offset != *offset) {
@@ -1447,8 +1400,8 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 			   gtt_offset | OABUFFER_SIZE_16M);
 
 	/* Mark that we need updated tail pointers to read from... */
-	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
-	stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+	stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
+	stream->oa_buffer.tail = gtt_offset;
 
 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
@@ -1470,8 +1423,6 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 	 * memory...
 	 */
 	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
-
-	stream->pollin = false;
 }
 
 static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
@@ -1501,8 +1452,8 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 	intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
 
 	/* Mark that we need updated tail pointers to read from... */
-	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
-	stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+	stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
+	stream->oa_buffer.tail = gtt_offset;
 
 	/*
 	 * Reset state used to recognise context switches, affecting which
@@ -1526,8 +1477,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 	 * memory...
 	 */
 	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
-
-	stream->pollin = false;
 }
 
 static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
@@ -1557,8 +1506,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
 			   gtt_offset & GEN12_OAG_OATAILPTR_MASK);
 
 	/* Mark that we need updated tail pointers to read from... */
-	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
-	stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+	stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
+	stream->oa_buffer.tail = gtt_offset;
 
 	/*
 	 * Reset state used to recognise context switches, affecting which
@@ -1583,8 +1532,6 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
 	 */
 	memset(stream->oa_buffer.vaddr, 0,
 	       stream->oa_buffer.vma->size);
-
-	stream->pollin = false;
 }
 
 static int alloc_oa_buffer(struct i915_perf_stream *stream)
@@ -1665,10 +1612,7 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
 	struct drm_i915_gem_object *bo;
 	struct i915_vma *vma;
 	const u64 delay_ticks = 0xffffffffffffffff -
-		DIV64_U64_ROUND_UP(
-			atomic64_read(&stream->perf->noa_programming_delay) *
-			RUNTIME_INFO(i915)->cs_timestamp_frequency_khz,
-			1000000ull);
+		i915_cs_timestamp_ns_to_ticks(i915, atomic64_read(&stream->perf->noa_programming_delay));
 	const u32 base = stream->engine->mmio_base;
 #define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
 	u32 *batch, *ts0, *cs, *jump;
@@ -1970,10 +1914,11 @@ out:
 	return i915_vma_get(oa_bo->vma);
 }
 
-static struct i915_request *
+static int
 emit_oa_config(struct i915_perf_stream *stream,
 	       struct i915_oa_config *oa_config,
-	       struct intel_context *ce)
+	       struct intel_context *ce,
+	       struct i915_active *active)
 {
 	struct i915_request *rq;
 	struct i915_vma *vma;
@@ -1981,7 +1926,7 @@ emit_oa_config(struct i915_perf_stream *stream,
 
 	vma = get_oa_vma(stream, oa_config);
 	if (IS_ERR(vma))
-		return ERR_CAST(vma);
+		return PTR_ERR(vma);
 
 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
@@ -1995,6 +1940,18 @@ emit_oa_config(struct i915_perf_stream *stream,
 		goto err_vma_unpin;
 	}
 
+	if (!IS_ERR_OR_NULL(active)) {
+		/* After all individual context modifications */
+		err = i915_request_await_active(rq, active,
+						I915_ACTIVE_AWAIT_ACTIVE);
+		if (err)
+			goto err_add_request;
+
+		err = i915_active_add_request(active, rq);
+		if (err)
+			goto err_add_request;
+	}
+
 	i915_vma_lock(vma);
 	err = i915_request_await_object(rq, vma->obj, 0);
 	if (!err)
@@ -2009,14 +1966,13 @@ emit_oa_config(struct i915_perf_stream *stream,
 	if (err)
 		goto err_add_request;
 
-	i915_request_get(rq);
 err_add_request:
 	i915_request_add(rq);
 err_vma_unpin:
 	i915_vma_unpin(vma);
 err_vma_put:
 	i915_vma_put(vma);
-	return err ? ERR_PTR(err) : rq;
+	return err;
 }
 
 static struct intel_context *oa_context(struct i915_perf_stream *stream)
@@ -2024,8 +1980,9 @@ static struct intel_context *oa_context(struct i915_perf_stream *stream)
 	return stream->pinned_ctx ?: stream->engine->kernel_context;
 }
 
-static struct i915_request *
-hsw_enable_metric_set(struct i915_perf_stream *stream)
+static int
+hsw_enable_metric_set(struct i915_perf_stream *stream,
+		      struct i915_active *active)
 {
 	struct intel_uncore *uncore = stream->uncore;
 
@@ -2044,7 +2001,9 @@ hsw_enable_metric_set(struct i915_perf_stream *stream)
 	intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 			 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-	return emit_oa_config(stream, stream->oa_config, oa_context(stream));
+	return emit_oa_config(stream,
+			      stream->oa_config, oa_context(stream),
+			      active);
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2114,9 +2073,6 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
 	for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
 		reg_state[ctx_flexeu0 + i * 2 + 1] =
 			oa_config_flex_reg(stream->oa_config, flex_regs[i]);
-
-	reg_state[CTX_R_PWR_CLK_STATE] =
-		intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu);
 }
 
 struct flex {
@@ -2137,7 +2093,7 @@ gen8_store_flex(struct i915_request *rq,
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
+	offset = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET;
 	do {
 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 		*cs++ = offset + flex->offset * sizeof(u32);
@@ -2194,8 +2150,10 @@ static int gen8_modify_context(struct intel_context *ce,
 	return err;
 }
 
-static int gen8_modify_self(struct intel_context *ce,
-			    const struct flex *flex, unsigned int count)
+static int
+gen8_modify_self(struct intel_context *ce,
+		 const struct flex *flex, unsigned int count,
+		 struct i915_active *active)
 {
 	struct i915_request *rq;
 	int err;
@@ -2206,8 +2164,17 @@ static int gen8_modify_self(struct intel_context *ce,
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
+	if (!IS_ERR_OR_NULL(active)) {
+		err = i915_active_add_request(active, rq);
+		if (err)
+			goto err_add_request;
+	}
+
 	err = gen8_load_flex(rq, ce, flex, count);
+	if (err)
+		goto err_add_request;
 
+err_add_request:
 	i915_request_add(rq);
 	return err;
 }
@@ -2241,7 +2208,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
 	return err;
 }
 
-static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
+static int gen12_configure_oar_context(struct i915_perf_stream *stream,
+				       struct i915_active *active)
 {
 	int err;
 	struct intel_context *ce = stream->pinned_ctx;
@@ -2250,7 +2218,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
 		{
 			GEN8_OACTXCONTROL,
 			stream->perf->ctx_oactxctrl_offset + 1,
-			enable ? GEN8_OA_COUNTER_RESUME : 0,
+			active ? GEN8_OA_COUNTER_RESUME : 0,
 		},
 	};
 	/* Offsets in regs_lri are not used since this configuration is only
@@ -2262,13 +2230,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
 			GEN12_OAR_OACONTROL,
 			GEN12_OAR_OACONTROL_OFFSET + 1,
 			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
-			(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
+			(active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
 		},
 		{
 			RING_CONTEXT_CONTROL(ce->engine->mmio_base),
 			CTX_CONTEXT_CONTROL,
 			_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
-				      enable ?
+				      active ?
 				      GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
 				      0)
 		},
@@ -2285,7 +2253,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
 		return err;
 
 	/* Apply regs_lri using LRI with pinned context */
-	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
+	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
 }
 
 /*
@@ -2313,9 +2281,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
  * Note: it's only the RCS/Render context that has any OA state.
  * Note: the first flex register passed must always be R_PWR_CLK_STATE
  */
-static int oa_configure_all_contexts(struct i915_perf_stream *stream,
-				     struct flex *regs,
-				     size_t num_regs)
+static int
+oa_configure_all_contexts(struct i915_perf_stream *stream,
+			  struct flex *regs,
+			  size_t num_regs,
+			  struct i915_active *active)
 {
 	struct drm_i915_private *i915 = stream->perf->i915;
 	struct intel_engine_cs *engine;
@@ -2372,7 +2342,7 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream,
 
 		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
 
-		err = gen8_modify_self(ce, regs, num_regs);
+		err = gen8_modify_self(ce, regs, num_regs, active);
 		if (err)
 			return err;
 	}
@@ -2380,8 +2350,10 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream,
 	return 0;
 }
 
-static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
-					const struct i915_oa_config *oa_config)
+static int
+gen12_configure_all_contexts(struct i915_perf_stream *stream,
+			     const struct i915_oa_config *oa_config,
+			     struct i915_active *active)
 {
 	struct flex regs[] = {
 		{
@@ -2390,11 +2362,15 @@ static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
 		},
 	};
 
-	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+	return oa_configure_all_contexts(stream,
+					 regs, ARRAY_SIZE(regs),
+					 active);
 }
 
-static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
-				      const struct i915_oa_config *oa_config)
+static int
+lrc_configure_all_contexts(struct i915_perf_stream *stream,
+			   const struct i915_oa_config *oa_config,
+			   struct i915_active *active)
 {
 	/* The MMIO offsets for Flex EU registers aren't contiguous */
 	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
@@ -2427,11 +2403,14 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
 	for (i = 2; i < ARRAY_SIZE(regs); i++)
 		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
 
-	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+	return oa_configure_all_contexts(stream,
+					 regs, ARRAY_SIZE(regs),
+					 active);
 }
 
-static struct i915_request *
-gen8_enable_metric_set(struct i915_perf_stream *stream)
+static int
+gen8_enable_metric_set(struct i915_perf_stream *stream,
+		       struct i915_active *active)
 {
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
@@ -2471,11 +2450,13 @@ gen8_enable_metric_set(struct i915_perf_stream *stream)
 	 * to make sure all slices/subslices are ON before writing to NOA
 	 * registers.
 	 */
-	ret = lrc_configure_all_contexts(stream, oa_config);
+	ret = lrc_configure_all_contexts(stream, oa_config, active);
 	if (ret)
-		return ERR_PTR(ret);
+		return ret;
 
-	return emit_oa_config(stream, oa_config, oa_context(stream));
+	return emit_oa_config(stream,
+			      stream->oa_config, oa_context(stream),
+			      active);
 }
 
 static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
@@ -2485,8 +2466,9 @@ static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
 			     0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
 }
 
-static struct i915_request *
-gen12_enable_metric_set(struct i915_perf_stream *stream)
+static int
+gen12_enable_metric_set(struct i915_perf_stream *stream,
+			struct i915_active *active)
 {
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
@@ -2515,9 +2497,9 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
 	 * to make sure all slices/subslices are ON before writing to NOA
 	 * registers.
 	 */
-	ret = gen12_configure_all_contexts(stream, oa_config);
+	ret = gen12_configure_all_contexts(stream, oa_config, active);
 	if (ret)
-		return ERR_PTR(ret);
+		return ret;
 
 	/*
 	 * For Gen12, performance counters are context
@@ -2525,12 +2507,14 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
 	 * requested this.
 	 */
 	if (stream->ctx) {
-		ret = gen12_configure_oar_context(stream, true);
+		ret = gen12_configure_oar_context(stream, active);
 		if (ret)
-			return ERR_PTR(ret);
+			return ret;
 	}
 
-	return emit_oa_config(stream, oa_config, oa_context(stream));
+	return emit_oa_config(stream,
+			      stream->oa_config, oa_context(stream),
+			      active);
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
@@ -2538,7 +2522,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream)
 	struct intel_uncore *uncore = stream->uncore;
 
 	/* Reset all contexts' slices/subslices configurations. */
-	lrc_configure_all_contexts(stream, NULL);
+	lrc_configure_all_contexts(stream, NULL, NULL);
 
 	intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
 }
@@ -2548,7 +2532,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream)
 	struct intel_uncore *uncore = stream->uncore;
 
 	/* Reset all contexts' slices/subslices configurations. */
-	lrc_configure_all_contexts(stream, NULL);
+	lrc_configure_all_contexts(stream, NULL, NULL);
 
 	/* Make sure we disable noa to save power. */
 	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2559,11 +2543,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 	struct intel_uncore *uncore = stream->uncore;
 
 	/* Reset all contexts' slices/subslices configurations. */
-	gen12_configure_all_contexts(stream, NULL);
+	gen12_configure_all_contexts(stream, NULL, NULL);
 
 	/* disable the context save/restore or OAR counters */
 	if (stream->ctx)
-		gen12_configure_oar_context(stream, false);
+		gen12_configure_oar_context(stream, NULL);
 
 	/* Make sure we disable noa to save power. */
 	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2655,11 +2639,13 @@ static void gen12_oa_enable(struct i915_perf_stream *stream)
  */
 static void i915_oa_stream_enable(struct i915_perf_stream *stream)
 {
+	stream->pollin = false;
+
 	stream->perf->ops.oa_enable(stream);
 
 	if (stream->periodic)
 		hrtimer_start(&stream->poll_check_timer,
-			      ns_to_ktime(POLL_PERIOD),
+			      ns_to_ktime(stream->poll_oa_period),
 			      HRTIMER_MODE_REL_PINNED);
 }
 
@@ -2735,16 +2721,52 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = {
 
 static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
 {
-	struct i915_request *rq;
+	struct i915_active *active;
+	int err;
 
-	rq = stream->perf->ops.enable_metric_set(stream);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
+	active = i915_active_create();
+	if (!active)
+		return -ENOMEM;
 
-	i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-	i915_request_put(rq);
+	err = stream->perf->ops.enable_metric_set(stream, active);
+	if (err == 0)
+		__i915_active_wait(active, TASK_UNINTERRUPTIBLE);
 
-	return 0;
+	i915_active_put(active);
+	return err;
+}
+
+static void
+get_default_sseu_config(struct intel_sseu *out_sseu,
+			struct intel_engine_cs *engine)
+{
+	const struct sseu_dev_info *devinfo_sseu =
+		&RUNTIME_INFO(engine->i915)->sseu;
+
+	*out_sseu = intel_sseu_from_device_info(devinfo_sseu);
+
+	if (IS_GEN(engine->i915, 11)) {
+		/*
+		 * We only need subslice count so it doesn't matter which ones
+		 * we select - just turn off low bits in the amount of half of
+		 * all available subslices per slice.
+		 */
+		out_sseu->subslice_mask =
+			~(~0 << (hweight8(out_sseu->subslice_mask) / 2));
+		out_sseu->slice_mask = 0x1;
+	}
+}
+
+static int
+get_sseu_config(struct intel_sseu *out_sseu,
+		struct intel_engine_cs *engine,
+		const struct drm_i915_gem_context_param_sseu *drm_sseu)
+{
+	if (drm_sseu->engine.engine_class != engine->uabi_class ||
+	    drm_sseu->engine.engine_instance != engine->uabi_instance)
+		return -EINVAL;
+
+	return i915_gem_user_to_context_sseu(engine->i915, drm_sseu, out_sseu);
 }
 
 /**
@@ -2879,6 +2901,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		goto err_oa_buf_alloc;
 
 	stream->ops = &i915_oa_stream_ops;
+
+	perf->sseu = props->sseu;
 	WRITE_ONCE(perf->exclusive_stream, stream);
 
 	ret = i915_perf_stream_enable_sync(stream);
@@ -2930,10 +2954,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
 
 	/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
 	stream = READ_ONCE(engine->i915->perf.exclusive_stream);
-	/*
-	 * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
-	 * is already doing that, so nothing to be done for gen12 here.
-	 */
 	if (stream && INTEL_GEN(stream->perf->i915) < 12)
 		gen8_update_reg_state_unlocked(ce, stream);
 }
@@ -3024,7 +3044,8 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
 		wake_up(&stream->poll_wq);
 	}
 
-	hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
+	hrtimer_forward_now(hrtimer,
+			    ns_to_ktime(stream->poll_oa_period));
 
 	return HRTIMER_RESTART;
 }
@@ -3155,7 +3176,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		return -EINVAL;
 
 	if (config != stream->oa_config) {
-		struct i915_request *rq;
+		int err;
 
 		/*
 		 * If OA is bound to a specific context, emit the
@@ -3166,13 +3187,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		 * When set globally, we use a low priority kernel context,
 		 * so it will effectively take effect when idle.
 		 */
-		rq = emit_oa_config(stream, config, oa_context(stream));
-		if (!IS_ERR(rq)) {
+		err = emit_oa_config(stream, config, oa_context(stream), NULL);
+		if (!err)
 			config = xchg(&stream->oa_config, config);
-			i915_request_put(rq);
-		} else {
-			ret = PTR_ERR(rq);
-		}
+		else
+			ret = err;
 	}
 
 	i915_oa_config_put(config);
@@ -3385,13 +3404,21 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
 		privileged_op = true;
 	}
 
+	/*
+	 * Asking for SSEU configuration is a priviliged operation.
+	 */
+	if (props->has_sseu)
+		privileged_op = true;
+	else
+		get_default_sseu_config(&props->sseu, props->engine);
+
 	/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
 	 * we check a dev.i915.perf_stream_paranoid sysctl option
 	 * to determine if it's ok to access system wide OA counters
-	 * without CAP_SYS_ADMIN privileges.
+	 * without CAP_PERFMON or CAP_SYS_ADMIN privileges.
 	 */
 	if (privileged_op &&
-	    i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
+	    i915_perf_stream_paranoid && !perfmon_capable()) {
 		DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
 		ret = -EACCES;
 		goto err_ctx;
@@ -3405,6 +3432,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
 
 	stream->perf = perf;
 	stream->ctx = specific_ctx;
+	stream->poll_oa_period = props->poll_oa_period;
 
 	ret = i915_oa_stream_init(stream, param, props);
 	if (ret)
@@ -3454,8 +3482,7 @@ err:
 
 static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
 {
-	return div64_u64(1000000000ULL * (2ULL << exponent),
-			 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz);
+	return i915_cs_timestamp_ticks_to_ns(perf->i915, 2ULL << exponent);
 }
 
 /**
@@ -3480,8 +3507,10 @@ static int read_properties_unlocked(struct i915_perf *perf,
 {
 	u64 __user *uprop = uprops;
 	u32 i;
+	int ret;
 
 	memset(props, 0, sizeof(struct perf_open_properties));
+	props->poll_oa_period = DEFAULT_POLL_PERIOD_NS;
 
 	if (!n_props) {
 		DRM_DEBUG("No i915 perf properties given\n");
@@ -3511,7 +3540,6 @@ static int read_properties_unlocked(struct i915_perf *perf,
 	for (i = 0; i < n_props; i++) {
 		u64 oa_period, oa_freq_hz;
 		u64 id, value;
-		int ret;
 
 		ret = get_user(id, uprop);
 		if (ret)
@@ -3584,9 +3612,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
 			} else
 				oa_freq_hz = 0;
 
-			if (oa_freq_hz > i915_oa_max_sample_rate &&
-			    !capable(CAP_SYS_ADMIN)) {
-				DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n",
+			if (oa_freq_hz > i915_oa_max_sample_rate && !perfmon_capable()) {
+				DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n",
 					  i915_oa_max_sample_rate);
 				return -EACCES;
 			}
@@ -3597,6 +3624,32 @@ static int read_properties_unlocked(struct i915_perf *perf,
 		case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
 			props->hold_preemption = !!value;
 			break;
+		case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
+			struct drm_i915_gem_context_param_sseu user_sseu;
+
+			if (copy_from_user(&user_sseu,
+					   u64_to_user_ptr(value),
+					   sizeof(user_sseu))) {
+				DRM_DEBUG("Unable to copy global sseu parameter\n");
+				return -EFAULT;
+			}
+
+			ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
+			if (ret) {
+				DRM_DEBUG("Invalid SSEU configuration\n");
+				return ret;
+			}
+			props->has_sseu = true;
+			break;
+		}
+		case DRM_I915_PERF_PROP_POLL_OA_PERIOD:
+			if (value < 100000 /* 100us */) {
+				DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n",
+					  value);
+				return -EINVAL;
+			}
+			props->poll_oa_period = value;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
@@ -3679,7 +3732,6 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 void i915_perf_register(struct drm_i915_private *i915)
 {
 	struct i915_perf *perf = &i915->perf;
-	int ret;
 
 	if (!perf->i915)
 		return;
@@ -3693,64 +3745,7 @@ void i915_perf_register(struct drm_i915_private *i915)
 	perf->metrics_kobj =
 		kobject_create_and_add("metrics",
 				       &i915->drm.primary->kdev->kobj);
-	if (!perf->metrics_kobj)
-		goto exit;
-
-	sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr);
-
-	if (IS_TIGERLAKE(i915)) {
-		i915_perf_load_test_config_tgl(i915);
-	} else if (INTEL_GEN(i915) >= 11) {
-		i915_perf_load_test_config_icl(i915);
-	} else if (IS_CANNONLAKE(i915)) {
-		i915_perf_load_test_config_cnl(i915);
-	} else if (IS_COFFEELAKE(i915)) {
-		if (IS_CFL_GT2(i915))
-			i915_perf_load_test_config_cflgt2(i915);
-		if (IS_CFL_GT3(i915))
-			i915_perf_load_test_config_cflgt3(i915);
-	} else if (IS_GEMINILAKE(i915)) {
-		i915_perf_load_test_config_glk(i915);
-	} else if (IS_KABYLAKE(i915)) {
-		if (IS_KBL_GT2(i915))
-			i915_perf_load_test_config_kblgt2(i915);
-		else if (IS_KBL_GT3(i915))
-			i915_perf_load_test_config_kblgt3(i915);
-	} else if (IS_BROXTON(i915)) {
-		i915_perf_load_test_config_bxt(i915);
-	} else if (IS_SKYLAKE(i915)) {
-		if (IS_SKL_GT2(i915))
-			i915_perf_load_test_config_sklgt2(i915);
-		else if (IS_SKL_GT3(i915))
-			i915_perf_load_test_config_sklgt3(i915);
-		else if (IS_SKL_GT4(i915))
-			i915_perf_load_test_config_sklgt4(i915);
-	} else if (IS_CHERRYVIEW(i915)) {
-		i915_perf_load_test_config_chv(i915);
-	} else if (IS_BROADWELL(i915)) {
-		i915_perf_load_test_config_bdw(i915);
-	} else if (IS_HASWELL(i915)) {
-		i915_perf_load_test_config_hsw(i915);
-	}
-
-	if (perf->test_config.id == 0)
-		goto sysfs_error;
-
-	ret = sysfs_create_group(perf->metrics_kobj,
-				 &perf->test_config.sysfs_metric);
-	if (ret)
-		goto sysfs_error;
-
-	perf->test_config.perf = perf;
-	kref_init(&perf->test_config.ref);
-
-	goto exit;
 
-sysfs_error:
-	kobject_put(perf->metrics_kobj);
-	perf->metrics_kobj = NULL;
-
-exit:
 	mutex_unlock(&perf->lock);
 }
 
@@ -3770,9 +3765,6 @@ void i915_perf_unregister(struct drm_i915_private *i915)
 	if (!perf->metrics_kobj)
 		return;
 
-	sysfs_remove_group(perf->metrics_kobj,
-			   &perf->test_config.sysfs_metric);
-
 	kobject_put(perf->metrics_kobj);
 	perf->metrics_kobj = NULL;
 }
@@ -4007,7 +3999,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
+	if (i915_perf_stream_paranoid && !perfmon_capable()) {
 		DRM_DEBUG("Insufficient privileges to add i915 OA config\n");
 		return -EACCES;
 	}
@@ -4154,7 +4146,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 		return -ENOTSUPP;
 	}
 
-	if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
+	if (i915_perf_stream_paranoid && !perfmon_capable()) {
 		DRM_DEBUG("Insufficient privileges to remove i915 OA config\n");
 		return -EACCES;
 	}
@@ -4347,8 +4339,8 @@ void i915_perf_init(struct drm_i915_private *i915)
 	if (perf->ops.enable_metric_set) {
 		mutex_init(&perf->lock);
 
-		oa_sample_rate_hard_limit = 1000 *
-			(RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2);
+		oa_sample_rate_hard_limit =
+			RUNTIME_INFO(i915)->cs_timestamp_frequency_hz / 2;
 
 		mutex_init(&perf->metrics_lock);
 		idr_init(&perf->metrics_idr);
@@ -4371,6 +4363,11 @@ void i915_perf_init(struct drm_i915_private *i915)
 		ratelimit_set_flags(&perf->spurious_report_rs,
 				    RATELIMIT_MSG_ON_RELEASE);
 
+		ratelimit_state_init(&perf->tail_pointer_race,
+				     5 * HZ, 10);
+		ratelimit_set_flags(&perf->tail_pointer_race,
+				    RATELIMIT_MSG_ON_RELEASE);
+
 		atomic64_set(&perf->noa_programming_delay,
 			     500 * 1000 /* 500us */);
 
@@ -4431,8 +4428,15 @@ int i915_perf_ioctl_version(void)
 	 *    preemption on a particular context so that performance data is
 	 *    accessible from a delta of MI_RPC reports without looking at the
 	 *    OA buffer.
+	 *
+	 * 4: Add DRM_I915_PERF_PROP_ALLOWED_SSEU to limit what contexts can
+	 *    be run for the duration of the performance recording based on
+	 *    their SSEU configuration.
+	 *
+	 * 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the
+	 *    interval for the hrtimer used to check for OA data.
 	 */
-	return 3;
+	return 5;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index a0e22f00f6cf..a36a455ae336 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -16,11 +16,13 @@
 #include <linux/uuid.h>
 #include <linux/wait.h>
 
+#include "gt/intel_sseu.h"
 #include "i915_reg.h"
 #include "intel_wakeref.h"
 
 struct drm_i915_private;
 struct file;
+struct i915_active;
 struct i915_gem_context;
 struct i915_perf;
 struct i915_vma;
@@ -272,21 +274,10 @@ struct i915_perf_stream {
 		spinlock_t ptr_lock;
 
 		/**
-		 * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
-		 * used for reading.
-		 *
-		 * Initial values of 0xffffffff are invalid and imply that an
-		 * update is required (and should be ignored by an attempted
-		 * read)
-		 */
-		struct {
-			u32 offset;
-		} tails[2];
-
-		/**
-		 * @aged_tail_idx: Index for the aged tail ready to read() data up to.
+		 * @aging_tail: The last HW tail reported by HW. The data
+		 * might not have made it to memory yet though.
 		 */
-		unsigned int aged_tail_idx;
+		u32 aging_tail;
 
 		/**
 		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
@@ -302,6 +293,11 @@ struct i915_perf_stream {
 		 * OA buffer data to userspace.
 		 */
 		u32 head;
+
+		/**
+		 * @tail: The last verified tail that can be read by userspace.
+		 */
+		u32 tail;
 	} oa_buffer;
 
 	/**
@@ -309,6 +305,12 @@ struct i915_perf_stream {
 	 * reprogrammed.
 	 */
 	struct i915_vma *noa_wait;
+
+	/**
+	 * @poll_oa_period: The period in nanoseconds at which the OA
+	 * buffer should be checked for available data.
+	 */
+	u64 poll_oa_period;
 };
 
 /**
@@ -339,8 +341,8 @@ struct i915_oa_ops {
 	 * counter reports being sampled. May apply system constraints such as
 	 * disabling EU clock gating as required.
 	 */
-	struct i915_request *
-		(*enable_metric_set)(struct i915_perf_stream *stream);
+	int (*enable_metric_set)(struct i915_perf_stream *stream,
+				 struct i915_active *active);
 
 	/**
 	 * @disable_metric_set: Remove system constraints associated with using
@@ -408,12 +410,22 @@ struct i915_perf {
 	struct i915_perf_stream *exclusive_stream;
 
 	/**
+	 * @sseu: sseu configuration selected to run while perf is active,
+	 * applies to all contexts.
+	 */
+	struct intel_sseu sseu;
+
+	/**
 	 * For rate limiting any notifications of spurious
 	 * invalid OA reports
 	 */
 	struct ratelimit_state spurious_report_rs;
 
-	struct i915_oa_config test_config;
+	/**
+	 * For rate limiting any notifications of tail pointer
+	 * race.
+	 */
+	struct ratelimit_state tail_pointer_race;
 
 	u32 gen7_latched_oastatus1;
 	u32 ctx_oactxctrl_offset;
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 2c062534eac1..e991a707bdb7 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -439,29 +439,10 @@ static u64 count_interrupts(struct drm_i915_private *i915)
 	return sum;
 }
 
-static void engine_event_destroy(struct perf_event *event)
-{
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct intel_engine_cs *engine;
-
-	engine = intel_engine_lookup_user(i915,
-					  engine_event_class(event),
-					  engine_event_instance(event));
-	if (drm_WARN_ON_ONCE(&i915->drm, !engine))
-		return;
-
-	if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
-	    intel_engine_supports_stats(engine))
-		intel_disable_engine_stats(engine);
-}
-
 static void i915_pmu_event_destroy(struct perf_event *event)
 {
 	WARN_ON(event->parent);
-
-	if (is_engine_event(event))
-		engine_event_destroy(event);
+	module_put(THIS_MODULE);
 }
 
 static int
@@ -514,23 +495,13 @@ static int engine_event_init(struct perf_event *event)
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
 	struct intel_engine_cs *engine;
-	u8 sample;
-	int ret;
 
 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
 					  engine_event_instance(event));
 	if (!engine)
 		return -ENODEV;
 
-	sample = engine_event_sample(event);
-	ret = engine_event_status(engine, sample);
-	if (ret)
-		return ret;
-
-	if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
-		ret = intel_enable_engine_stats(engine);
-
-	return ret;
+	return engine_event_status(engine, engine_event_sample(event));
 }
 
 static int i915_pmu_event_init(struct perf_event *event)
@@ -563,8 +534,10 @@ static int i915_pmu_event_init(struct perf_event *event)
 	if (ret)
 		return ret;
 
-	if (!event->parent)
+	if (!event->parent) {
+		__module_get(THIS_MODULE);
 		event->destroy = i915_pmu_event_destroy;
+	}
 
 	return 0;
 }
@@ -1115,7 +1088,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	int ret = -ENOMEM;
 
 	if (INTEL_GEN(i915) <= 2) {
-		dev_info(i915->drm.dev, "PMU not supported for this GPU.");
+		drm_info(&i915->drm, "PMU not supported for this GPU.");
 		return;
 	}
 
@@ -1178,7 +1151,7 @@ err_name:
 	if (!is_igp(i915))
 		kfree(pmu->name);
 err:
-	dev_notice(i915->drm.dev, "Failed to register PMU!\n");
+	drm_notice(&i915->drm, "Failed to register PMU!\n");
 }
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index 732aad148881..5003a71113cb 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -24,15 +24,12 @@ enum {
 	I915_PRIORITY_DISPLAY,
 };
 
-#define I915_USER_PRIORITY_SHIFT 2
+#define I915_USER_PRIORITY_SHIFT 0
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
 
-#define I915_PRIORITY_WAIT		((u8)BIT(0))
-#define I915_PRIORITY_NOSEMAPHORE	((u8)BIT(1))
-
 /* Smallest priority value that cannot be bumped. */
 #define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK)
 
@@ -47,8 +44,6 @@ enum {
 #define I915_PRIORITY_UNPREEMPTABLE INT_MAX
 #define I915_PRIORITY_BARRIER INT_MAX
 
-#define __NO_PREEMPTION (I915_PRIORITY_WAIT)
-
 struct i915_priolist {
 	struct list_head requests[I915_PRIORITY_COUNT];
 	struct rb_node node;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 6e12000c4b6b..6c076a24eb82 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -561,6 +561,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
  * Registers used only by the command parser
  */
 #define BCS_SWCTRL _MMIO(0x22200)
+#define   BCS_SRC_Y REG_BIT(0)
+#define   BCS_DST_Y REG_BIT(1)
 
 /* There are 16 GPR registers */
 #define BCS_GPR(n)	_MMIO(0x22600 + (n) * 8)
@@ -2555,6 +2557,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GEN10_PAT_INDEX(index)	_MMIO(0x40e0 + (index) * 4)
 #define GEN12_PAT_INDEX(index)	_MMIO(0x4800 + (index) * 4)
 #define BSD_HWS_PGA_GEN7	_MMIO(0x04180)
+#define GEN12_GFX_CCS_AUX_NV	_MMIO(0x4208)
+#define GEN12_VD0_AUX_NV	_MMIO(0x4218)
+#define GEN12_VD1_AUX_NV	_MMIO(0x4228)
+#define GEN12_VD2_AUX_NV	_MMIO(0x4298)
+#define GEN12_VD3_AUX_NV	_MMIO(0x42A8)
+#define GEN12_VE0_AUX_NV	_MMIO(0x4238)
+#define GEN12_VE1_AUX_NV	_MMIO(0x42B8)
+#define   AUX_INV		REG_BIT(0)
 #define BLT_HWS_PGA_GEN7	_MMIO(0x04280)
 #define VEBOX_HWS_PGA_GEN7	_MMIO(0x04380)
 #define RING_ACTHD(base)	_MMIO((base) + 0x74)
@@ -2657,6 +2667,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define RING_DMA_FADD_UDW(base)	_MMIO((base) + 0x60) /* gen8+ */
 #define RING_INSTPM(base)	_MMIO((base) + 0xc0)
 #define RING_MI_MODE(base)	_MMIO((base) + 0x9c)
+#define RING_CMD_BUF_CCTL(base) _MMIO((base) + 0x84)
 #define INSTPS		_MMIO(0x2070) /* 965+ only */
 #define GEN4_INSTDONE1	_MMIO(0x207c) /* 965+ only, aka INSTDONE_2 on SNB */
 #define ACTHD_I965	_MMIO(0x2074)
@@ -4013,31 +4024,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GEN6_RP_STATE_LIMITS	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994)
 #define GEN6_RP_STATE_CAP	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998)
 #define BXT_RP_STATE_CAP        _MMIO(0x138170)
-
-/*
- * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS
- * 8300) freezing up around GPU hangs. Looks as if even
- * scheduling/timer interrupts start misbehaving if the RPS
- * EI/thresholds are "bad", leading to a very sluggish or even
- * frozen machine.
- */
-#define INTERVAL_1_28_US(us)	roundup(((us) * 100) >> 7, 25)
-#define INTERVAL_1_33_US(us)	(((us) * 3)   >> 2)
-#define INTERVAL_0_833_US(us)	(((us) * 6) / 5)
-#define GT_INTERVAL_FROM_US(dev_priv, us) (INTEL_GEN(dev_priv) >= 9 ? \
-				(IS_GEN9_LP(dev_priv) ? \
-				INTERVAL_0_833_US(us) : \
-				INTERVAL_1_33_US(us)) : \
-				INTERVAL_1_28_US(us))
-
-#define INTERVAL_1_28_TO_US(interval)  (((interval) << 7) / 100)
-#define INTERVAL_1_33_TO_US(interval)  (((interval) << 2) / 3)
-#define INTERVAL_0_833_TO_US(interval) (((interval) * 5)  / 6)
-#define GT_PM_INTERVAL_TO_US(dev_priv, interval) (INTEL_GEN(dev_priv) >= 9 ? \
-                           (IS_GEN9_LP(dev_priv) ? \
-                           INTERVAL_0_833_TO_US(interval) : \
-                           INTERVAL_1_33_TO_US(interval)) : \
-                           INTERVAL_1_28_TO_US(interval))
+#define GEN9_RP_STATE_LIMITS	_MMIO(0x138148)
 
 /*
  * Logical Context regs
@@ -4325,6 +4312,96 @@ enum {
 #define   EXITLINE_MASK		REG_GENMASK(12, 0)
 #define   EXITLINE_SHIFT	0
 
+/* VRR registers */
+#define _TRANS_VRR_CTL_A		0x60420
+#define _TRANS_VRR_CTL_B		0x61420
+#define _TRANS_VRR_CTL_C		0x62420
+#define _TRANS_VRR_CTL_D		0x63420
+#define TRANS_VRR_CTL(trans)		_MMIO_TRANS2(trans, _TRANS_VRR_CTL_A)
+#define   VRR_CTL_VRR_ENABLE		REG_BIT(31)
+#define   VRR_CTL_IGN_MAX_SHIFT		REG_BIT(30)
+#define   VRR_CTL_FLIP_LINE_EN		REG_BIT(29)
+#define   VRR_CTL_LINE_COUNT_MASK	REG_GENMASK(10, 3)
+#define   VRR_CTL_SW_FULLLINE_COUNT	REG_BIT(0)
+
+#define _TRANS_VRR_VMAX_A		0x60424
+#define _TRANS_VRR_VMAX_B		0x61424
+#define _TRANS_VRR_VMAX_C		0x62424
+#define _TRANS_VRR_VMAX_D		0x63424
+#define TRANS_VRR_VMAX(trans)		_MMIO_TRANS2(trans, _TRANS_VRR_VMAX_A)
+#define   VRR_VMAX_MASK			REG_GENMASK(19, 0)
+
+#define _TRANS_VRR_VMIN_A		0x60434
+#define _TRANS_VRR_VMIN_B		0x61434
+#define _TRANS_VRR_VMIN_C		0x62434
+#define _TRANS_VRR_VMIN_D		0x63434
+#define TRANS_VRR_VMIN(trans)		_MMIO_TRANS2(trans, _TRANS_VRR_VMIN_A)
+#define   VRR_VMIN_MASK			REG_GENMASK(15, 0)
+
+#define _TRANS_VRR_VMAXSHIFT_A		0x60428
+#define _TRANS_VRR_VMAXSHIFT_B		0x61428
+#define _TRANS_VRR_VMAXSHIFT_C		0x62428
+#define _TRANS_VRR_VMAXSHIFT_D		0x63428
+#define TRANS_VRR_VMAXSHIFT(trans)	_MMIO_TRANS2(trans, \
+					_TRANS_VRR_VMAXSHIFT_A)
+#define   VRR_VMAXSHIFT_DEC_MASK	REG_GENMASK(29, 16)
+#define   VRR_VMAXSHIFT_DEC		REG_BIT(16)
+#define   VRR_VMAXSHIFT_INC_MASK	REG_GENMASK(12, 0)
+
+#define _TRANS_VRR_STATUS_A		0x6042C
+#define _TRANS_VRR_STATUS_B		0x6142C
+#define _TRANS_VRR_STATUS_C		0x6242C
+#define _TRANS_VRR_STATUS_D		0x6342C
+#define TRANS_VRR_STATUS(trans)		_MMIO_TRANS2(trans, _TRANS_VRR_STATUS_A)
+#define   VRR_STATUS_VMAX_REACHED	REG_BIT(31)
+#define   VRR_STATUS_NOFLIP_TILL_BNDR	REG_BIT(30)
+#define   VRR_STATUS_FLIP_BEF_BNDR	REG_BIT(29)
+#define   VRR_STATUS_NO_FLIP_FRAME	REG_BIT(28)
+#define   VRR_STATUS_VRR_EN_LIVE	REG_BIT(27)
+#define   VRR_STATUS_FLIPS_SERVICED	REG_BIT(26)
+#define   VRR_STATUS_VBLANK_MASK	REG_GENMASK(22, 20)
+#define   STATUS_FSM_IDLE		REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 0)
+#define   STATUS_FSM_WAIT_TILL_FDB	REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 1)
+#define   STATUS_FSM_WAIT_TILL_FS	REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 2)
+#define   STATUS_FSM_WAIT_TILL_FLIP	REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 3)
+#define   STATUS_FSM_PIPELINE_FILL	REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 4)
+#define   STATUS_FSM_ACTIVE		REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 5)
+#define   STATUS_FSM_LEGACY_VBLANK	REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 6)
+
+#define _TRANS_VRR_VTOTAL_PREV_A	0x60480
+#define _TRANS_VRR_VTOTAL_PREV_B	0x61480
+#define _TRANS_VRR_VTOTAL_PREV_C	0x62480
+#define _TRANS_VRR_VTOTAL_PREV_D	0x63480
+#define TRANS_VRR_VTOTAL_PREV(trans)	_MMIO_TRANS2(trans, \
+					_TRANS_VRR_VTOTAL_PREV_A)
+#define   VRR_VTOTAL_FLIP_BEFR_BNDR	REG_BIT(31)
+#define   VRR_VTOTAL_FLIP_AFTER_BNDR	REG_BIT(30)
+#define   VRR_VTOTAL_FLIP_AFTER_DBLBUF	REG_BIT(29)
+#define   VRR_VTOTAL_PREV_FRAME_MASK	REG_GENMASK(19, 0)
+
+#define _TRANS_VRR_FLIPLINE_A		0x60438
+#define _TRANS_VRR_FLIPLINE_B		0x61438
+#define _TRANS_VRR_FLIPLINE_C		0x62438
+#define _TRANS_VRR_FLIPLINE_D		0x63438
+#define TRANS_VRR_FLIPLINE(trans)	_MMIO_TRANS2(trans, \
+					_TRANS_VRR_FLIPLINE_A)
+#define   VRR_FLIPLINE_MASK		REG_GENMASK(19, 0)
+
+#define _TRANS_VRR_STATUS2_A		0x6043C
+#define _TRANS_VRR_STATUS2_B		0x6143C
+#define _TRANS_VRR_STATUS2_C		0x6243C
+#define _TRANS_VRR_STATUS2_D		0x6343C
+#define TRANS_VRR_STATUS2(trans)	_MMIO_TRANS2(trans, _TRANS_VRR_STATUS2_A)
+#define   VRR_STATUS2_VERT_LN_CNT_MASK	REG_GENMASK(19, 0)
+
+#define _TRANS_PUSH_A			0x60A70
+#define _TRANS_PUSH_B			0x61A70
+#define _TRANS_PUSH_C			0x62A70
+#define _TRANS_PUSH_D			0x63A70
+#define TRANS_PUSH(trans)		_MMIO_TRANS2(trans, _TRANS_PUSH_A)
+#define   TRANS_PUSH_EN			REG_BIT(31)
+#define   TRANS_PUSH_SEND		REG_BIT(30)
+
 /*
  * HSW+ eDP PSR registers
  *
@@ -6765,7 +6842,7 @@ enum {
 #define   PLANE_CTL_FORMAT_P012			(5 << 24)
 #define   PLANE_CTL_FORMAT_XRGB_16161616F	(6 << 24)
 #define   PLANE_CTL_FORMAT_P016			(7 << 24)
-#define   PLANE_CTL_FORMAT_AYUV			(8 << 24)
+#define   PLANE_CTL_FORMAT_XYUV			(8 << 24)
 #define   PLANE_CTL_FORMAT_INDEXED		(12 << 24)
 #define   PLANE_CTL_FORMAT_RGB_565		(14 << 24)
 #define   ICL_PLANE_CTL_FORMAT_MASK		(0x1f << 23)
@@ -8504,6 +8581,7 @@ enum {
 #define  FDI_BC_BIFURCATION_SELECT	(1 << 12)
 #define  CHASSIS_CLK_REQ_DURATION_MASK	(0xf << 8)
 #define  CHASSIS_CLK_REQ_DURATION(x)	((x) << 8)
+#define  SBCLK_RUN_REFCLK_DIS		(1 << 7)
 #define  SPT_PWM_GRANULARITY		(1 << 0)
 #define SOUTH_CHICKEN2		_MMIO(0xc2004)
 #define  FDI_MPHY_IOSFSB_RESET_STATUS	(1 << 13)
@@ -8995,6 +9073,7 @@ enum {
 #define     GEN7_PCODE_ILLEGAL_DATA		0x3
 #define     GEN11_PCODE_ILLEGAL_SUBCOMMAND	0x4
 #define     GEN11_PCODE_LOCKED			0x6
+#define     GEN11_PCODE_REJECTED		0x11
 #define     GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x10
 #define   GEN6_PCODE_WRITE_RC6VIDS		0x4
 #define   GEN6_PCODE_READ_RC6VIDS		0x5
@@ -9016,10 +9095,18 @@ enum {
 #define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
 #define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
 #define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
+#define   ICL_PCODE_SAGV_DE_MEM_SS_CONFIG	0xe
+#define     ICL_PCODE_POINTS_RESTRICTED		0x0
+#define     ICL_PCODE_POINTS_RESTRICTED_MASK	0x1
 #define   GEN6_PCODE_READ_D_COMP		0x10
 #define   GEN6_PCODE_WRITE_D_COMP		0x11
+#define   ICL_PCODE_EXIT_TCCOLD			0x12
 #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
 #define   DISPLAY_IPS_CONTROL			0x19
+#define   TGL_PCODE_TCCOLD			0x26
+#define     TGL_PCODE_EXIT_TCCOLD_DATA_L_EXIT_FAILED	REG_BIT(0)
+#define     TGL_PCODE_EXIT_TCCOLD_DATA_H_BLOCK_REQ	0
+#define     TGL_PCODE_EXIT_TCCOLD_DATA_H_UNBLOCK_REQ	REG_BIT(0)
             /* See also IPS_CTL */
 #define     IPS_PCODE_CONTROL			(1 << 30)
 #define   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL	0x1A
@@ -9306,6 +9393,22 @@ enum {
 #define AUD_PIN_BUF_CTL		_MMIO(0x48414)
 #define   AUD_PIN_BUF_ENABLE		REG_BIT(31)
 
+/* Display Audio Config Reg */
+#define AUD_CONFIG_BE			_MMIO(0x65ef0)
+#define HBLANK_EARLY_ENABLE_ICL(pipe)		(0x1 << (20 - (pipe)))
+#define HBLANK_EARLY_ENABLE_TGL(pipe)		(0x1 << (24 + (pipe)))
+#define HBLANK_START_COUNT_MASK(pipe)		(0x7 << (3 + ((pipe) * 6)))
+#define HBLANK_START_COUNT(pipe, val)		(((val) & 0x7) << (3 + ((pipe)) * 6))
+#define NUMBER_SAMPLES_PER_LINE_MASK(pipe)	(0x3 << ((pipe) * 6))
+#define NUMBER_SAMPLES_PER_LINE(pipe, val)	(((val) & 0x3) << ((pipe) * 6))
+
+#define HBLANK_START_COUNT_8	0
+#define HBLANK_START_COUNT_16	1
+#define HBLANK_START_COUNT_32	2
+#define HBLANK_START_COUNT_64	3
+#define HBLANK_START_COUNT_96	4
+#define HBLANK_START_COUNT_128	5
+
 /*
  * HSW - ICL power wells
  *
@@ -9701,8 +9804,11 @@ enum skl_power_gate {
 #define  TRANS_DDI_BPC_10		(1 << 20)
 #define  TRANS_DDI_BPC_6		(2 << 20)
 #define  TRANS_DDI_BPC_12		(3 << 20)
+#define  TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK	REG_GENMASK(19, 18) /* bdw-cnl */
+#define  TRANS_DDI_PORT_SYNC_MASTER_SELECT(x)	REG_FIELD_PREP(TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK, (x))
 #define  TRANS_DDI_PVSYNC		(1 << 17)
 #define  TRANS_DDI_PHSYNC		(1 << 16)
+#define  TRANS_DDI_PORT_SYNC_ENABLE	REG_BIT(15) /* bdw-cnl */
 #define  TRANS_DDI_EDP_INPUT_MASK	(7 << 12)
 #define  TRANS_DDI_EDP_INPUT_A_ON	(0 << 12)
 #define  TRANS_DDI_EDP_INPUT_A_ONOFF	(4 << 12)
@@ -9729,12 +9835,10 @@ enum skl_power_gate {
 #define _TRANS_DDI_FUNC_CTL2_EDP	0x6f404
 #define _TRANS_DDI_FUNC_CTL2_DSI0	0x6b404
 #define _TRANS_DDI_FUNC_CTL2_DSI1	0x6bc04
-#define TRANS_DDI_FUNC_CTL2(tran)	_MMIO_TRANS2(tran, \
-						     _TRANS_DDI_FUNC_CTL2_A)
-#define  PORT_SYNC_MODE_ENABLE			(1 << 4)
-#define  PORT_SYNC_MODE_MASTER_SELECT(x)	((x) << 0)
-#define  PORT_SYNC_MODE_MASTER_SELECT_MASK	(0x7 << 0)
-#define  PORT_SYNC_MODE_MASTER_SELECT_SHIFT	0
+#define TRANS_DDI_FUNC_CTL2(tran)	_MMIO_TRANS2(tran, _TRANS_DDI_FUNC_CTL2_A)
+#define  PORT_SYNC_MODE_ENABLE			REG_BIT(4)
+#define  PORT_SYNC_MODE_MASTER_SELECT_MASK	REG_GENMASK(2, 0)
+#define  PORT_SYNC_MODE_MASTER_SELECT(x)	REG_FIELD_PREP(PORT_SYNC_MODE_MASTER_SELECT_MASK, (x))
 
 /* DisplayPort Transport Control */
 #define _DP_TP_CTL_A			0x64040
@@ -9795,6 +9899,24 @@ enum skl_power_gate {
 #define  DDI_BUF_BALANCE_LEG_ENABLE	(1 << 31)
 #define DDI_BUF_TRANS_HI(port, i)	_MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8 + 4)
 
+/* DDI DP Compliance Control */
+#define _DDI_DP_COMP_CTL_A			0x605F0
+#define _DDI_DP_COMP_CTL_B			0x615F0
+#define DDI_DP_COMP_CTL(pipe)			_MMIO_PIPE(pipe, _DDI_DP_COMP_CTL_A, _DDI_DP_COMP_CTL_B)
+#define   DDI_DP_COMP_CTL_ENABLE		(1 << 31)
+#define   DDI_DP_COMP_CTL_D10_2			(0 << 28)
+#define   DDI_DP_COMP_CTL_SCRAMBLED_0		(1 << 28)
+#define   DDI_DP_COMP_CTL_PRBS7			(2 << 28)
+#define   DDI_DP_COMP_CTL_CUSTOM80		(3 << 28)
+#define   DDI_DP_COMP_CTL_HBR2			(4 << 28)
+#define   DDI_DP_COMP_CTL_SCRAMBLED_1		(5 << 28)
+#define   DDI_DP_COMP_CTL_HBR2_RESET		(0xFC << 0)
+
+/* DDI DP Compliance Pattern */
+#define _DDI_DP_COMP_PAT_A			0x605F4
+#define _DDI_DP_COMP_PAT_B			0x615F4
+#define DDI_DP_COMP_PAT(pipe, i)		_MMIO(_PIPE(pipe, _DDI_DP_COMP_PAT_A, _DDI_DP_COMP_PAT_B) + (i) * 4)
+
 /* Sideband Interface (SBI) is programmed indirectly, via
  * SBI_ADDR, which contains the register offset; and SBI_DATA,
  * which contains the payload */
@@ -10742,6 +10864,12 @@ enum skl_power_gate {
 
 #define _PAL_PREC_MULTI_SEG_DATA_A	0x4A40C
 #define _PAL_PREC_MULTI_SEG_DATA_B	0x4AC0C
+#define  PAL_PREC_MULTI_SEG_RED_LDW_MASK   REG_GENMASK(29, 24)
+#define  PAL_PREC_MULTI_SEG_RED_UDW_MASK   REG_GENMASK(29, 20)
+#define  PAL_PREC_MULTI_SEG_GREEN_LDW_MASK REG_GENMASK(19, 14)
+#define  PAL_PREC_MULTI_SEG_GREEN_UDW_MASK REG_GENMASK(19, 10)
+#define  PAL_PREC_MULTI_SEG_BLUE_LDW_MASK  REG_GENMASK(9, 4)
+#define  PAL_PREC_MULTI_SEG_BLUE_UDW_MASK  REG_GENMASK(9, 0)
 
 #define PREC_PAL_MULTI_SEG_INDEX(pipe)	_MMIO_PIPE(pipe, \
 					_PAL_PREC_MULTI_SEG_INDEX_A, \
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e2b78db685ea..526c1e9acbd5 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/dma-fence-array.h>
+#include <linux/dma-fence-chain.h>
 #include <linux/irq_work.h>
 #include <linux/prefetch.h>
 #include <linux/sched.h>
@@ -101,6 +102,11 @@ static signed long i915_fence_wait(struct dma_fence *fence,
 				 timeout);
 }
 
+struct kmem_cache *i915_request_slab_cache(void)
+{
+	return global.slab_requests;
+}
+
 static void i915_fence_release(struct dma_fence *fence)
 {
 	struct i915_request *rq = to_request(fence);
@@ -115,6 +121,10 @@ static void i915_fence_release(struct dma_fence *fence)
 	i915_sw_fence_fini(&rq->submit);
 	i915_sw_fence_fini(&rq->semaphore);
 
+	/* Keep one request on each engine for reserved use under mempressure */
+	if (!cmpxchg(&rq->engine->request_pool, NULL, rq))
+		return;
+
 	kmem_cache_free(global.slab_requests, rq);
 }
 
@@ -358,8 +368,6 @@ __await_execution(struct i915_request *rq,
 	}
 	spin_unlock_irq(&signal->lock);
 
-	/* Copy across semaphore status as we need the same behaviour */
-	rq->sched.flags |= signal->sched.flags;
 	return 0;
 }
 
@@ -527,10 +535,8 @@ void __i915_request_unsubmit(struct i915_request *request)
 	spin_unlock(&request->lock);
 
 	/* We've already spun, don't charge on resubmitting. */
-	if (request->sched.semaphores && i915_request_started(request)) {
-		request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
+	if (request->sched.semaphores && i915_request_started(request))
 		request->sched.semaphores = 0;
-	}
 
 	/*
 	 * We don't need to wake_up any waiters on request->execute, they
@@ -588,15 +594,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
-static void irq_semaphore_cb(struct irq_work *wrk)
-{
-	struct i915_request *rq =
-		container_of(wrk, typeof(*rq), semaphore_work);
-
-	i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
-	i915_request_put(rq);
-}
-
 static int __i915_sw_fence_call
 semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 {
@@ -604,11 +601,6 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 
 	switch (state) {
 	case FENCE_COMPLETE:
-		if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
-			i915_request_get(rq);
-			init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
-			irq_work_queue(&rq->semaphore_work);
-		}
 		break;
 
 	case FENCE_FREE:
@@ -629,14 +621,22 @@ static void retire_requests(struct intel_timeline *tl)
 }
 
 static noinline struct i915_request *
-request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
+request_alloc_slow(struct intel_timeline *tl,
+		   struct i915_request **rsvd,
+		   gfp_t gfp)
 {
 	struct i915_request *rq;
 
-	if (list_empty(&tl->requests))
-		goto out;
+	/* If we cannot wait, dip into our reserves */
+	if (!gfpflags_allow_blocking(gfp)) {
+		rq = xchg(rsvd, NULL);
+		if (!rq) /* Use the normal failure path for one final WARN */
+			goto out;
 
-	if (!gfpflags_allow_blocking(gfp))
+		return rq;
+	}
+
+	if (list_empty(&tl->requests))
 		goto out;
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
@@ -721,7 +721,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq = kmem_cache_alloc(global.slab_requests,
 			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (unlikely(!rq)) {
-		rq = request_alloc_slow(tl, gfp);
+		rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp);
 		if (!rq) {
 			ret = -ENOMEM;
 			goto err_unreserve;
@@ -933,6 +933,7 @@ __emit_semaphore_wait(struct i915_request *to,
 	u32 *cs;
 
 	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
+	GEM_BUG_ON(i915_request_has_initial_breadcrumb(to));
 
 	/* We need to pin the signaler's HWSP until we are finished reading. */
 	err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
@@ -978,13 +979,26 @@ emit_semaphore_wait(struct i915_request *to,
 		    gfp_t gfp)
 {
 	const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
+	struct i915_sw_fence *wait = &to->submit;
 
 	if (!intel_context_use_semaphores(to->context))
 		goto await_fence;
 
+	if (i915_request_has_initial_breadcrumb(to))
+		goto await_fence;
+
 	if (!rcu_access_pointer(from->hwsp_cacheline))
 		goto await_fence;
 
+	/*
+	 * If this or its dependents are waiting on an external fence
+	 * that may fail catastrophically, then we want to avoid using
+	 * sempahores as they bypass the fence signaling metadata, and we
+	 * lose the fence->error propagation.
+	 */
+	if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
+		goto await_fence;
+
 	/* Just emit the first semaphore we see as request space is limited. */
 	if (already_busywaiting(to) & mask)
 		goto await_fence;
@@ -1000,11 +1014,10 @@ emit_semaphore_wait(struct i915_request *to,
 		goto await_fence;
 
 	to->sched.semaphores |= mask;
-	to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
-	return 0;
+	wait = &to->semaphore;
 
 await_fence:
-	return i915_sw_fence_await_dma_fence(&to->submit,
+	return i915_sw_fence_await_dma_fence(wait,
 					     &from->fence, 0,
 					     I915_FENCE_GFP);
 }
@@ -1039,15 +1052,56 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 	if (ret < 0)
 		return ret;
 
-	if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
-		ret = i915_sw_fence_await_dma_fence(&to->semaphore,
-						    &from->fence, 0,
-						    I915_FENCE_GFP);
-		if (ret < 0)
-			return ret;
+	return 0;
+}
+
+static void mark_external(struct i915_request *rq)
+{
+	/*
+	 * The downside of using semaphores is that we lose metadata passing
+	 * along the signaling chain. This is particularly nasty when we
+	 * need to pass along a fatal error such as EFAULT or EDEADLK. For
+	 * fatal errors we want to scrub the request before it is executed,
+	 * which means that we cannot preload the request onto HW and have
+	 * it wait upon a semaphore.
+	 */
+	rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN;
+}
+
+static int
+__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
+{
+	mark_external(rq);
+	return i915_sw_fence_await_dma_fence(&rq->submit, fence,
+					     i915_fence_context_timeout(rq->i915,
+									fence->context),
+					     I915_FENCE_GFP);
+}
+
+static int
+i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
+{
+	struct dma_fence *iter;
+	int err = 0;
+
+	if (!to_dma_fence_chain(fence))
+		return __i915_request_await_external(rq, fence);
+
+	dma_fence_chain_for_each(iter, fence) {
+		struct dma_fence_chain *chain = to_dma_fence_chain(iter);
+
+		if (!dma_fence_is_i915(chain->fence)) {
+			err = __i915_request_await_external(rq, iter);
+			break;
+		}
+
+		err = i915_request_await_dma_fence(rq, chain->fence);
+		if (err < 0)
+			break;
 	}
 
-	return 0;
+	dma_fence_put(iter);
+	return err;
 }
 
 int
@@ -1097,9 +1151,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 		if (dma_fence_is_i915(fence))
 			ret = i915_request_await_request(rq, to_request(fence));
 		else
-			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
-							    fence->context ? I915_FENCE_TIMEOUT : 0,
-							    I915_FENCE_GFP);
+			ret = i915_request_await_external(rq, fence);
 		if (ret < 0)
 			return ret;
 
@@ -1179,7 +1231,8 @@ __i915_request_await_execution(struct i915_request *to,
 	 * immediate execution, and so we must wait until it reaches the
 	 * active slot.
 	 */
-	if (intel_engine_has_semaphores(to->engine)) {
+	if (intel_engine_has_semaphores(to->engine) &&
+	    !i915_request_has_initial_breadcrumb(to)) {
 		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
 		if (err < 0)
 			return err;
@@ -1225,6 +1278,9 @@ i915_request_await_execution(struct i915_request *rq,
 			continue;
 		}
 
+		if (fence->context == rq->fence.context)
+			continue;
+
 		/*
 		 * We don't squash repeated fence dependencies here as we
 		 * want to run our callback in all cases.
@@ -1235,9 +1291,7 @@ i915_request_await_execution(struct i915_request *rq,
 							     to_request(fence),
 							     hook);
 		else
-			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
-							    I915_FENCE_TIMEOUT,
-							    GFP_KERNEL);
+			ret = i915_request_await_external(rq, fence);
 		if (ret < 0)
 			return ret;
 	} while (--nchild);
@@ -1445,14 +1499,7 @@ void i915_request_add(struct i915_request *rq)
 		attr = ctx->sched;
 	rcu_read_unlock();
 
-	if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
-		attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-	if (list_empty(&rq->sched.signalers_list))
-		attr.priority |= I915_PRIORITY_WAIT;
-
-	local_bh_disable();
 	__i915_request_queue(rq, &attr);
-	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 
 	mutex_unlock(&tl->mutex);
 }
@@ -1636,7 +1683,6 @@ long i915_request_wait(struct i915_request *rq,
 	if (flags & I915_WAIT_PRIORITY) {
 		if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
 			intel_rps_boost(rq);
-		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 	}
 
 	wait.tsk = current;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 3c552bfea67a..8ec7ee4dbadc 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -84,19 +84,26 @@ enum {
 	I915_FENCE_FLAG_PQUEUE,
 
 	/*
-	 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
+	 * I915_FENCE_FLAG_HOLD - this request is currently on hold
 	 *
-	 * Internal bookkeeping used by the breadcrumb code to track when
-	 * a request is on the various signal_list.
+	 * This request has been suspended, pending an ongoing investigation.
 	 */
-	I915_FENCE_FLAG_SIGNAL,
+	I915_FENCE_FLAG_HOLD,
 
 	/*
-	 * I915_FENCE_FLAG_HOLD - this request is currently on hold
+	 * I915_FENCE_FLAG_INITIAL_BREADCRUMB - this request has the initial
+	 * breadcrumb that marks the end of semaphore waits and start of the
+	 * user payload.
+	 */
+	I915_FENCE_FLAG_INITIAL_BREADCRUMB,
+
+	/*
+	 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
 	 *
-	 * This request has been suspended, pending an ongoing investigation.
+	 * Internal bookkeeping used by the breadcrumb code to track when
+	 * a request is on the various signal_list.
 	 */
-	I915_FENCE_FLAG_HOLD,
+	I915_FENCE_FLAG_SIGNAL,
 
 	/*
 	 * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
@@ -209,7 +216,6 @@ struct i915_request {
 	};
 	struct list_head execute_cb;
 	struct i915_sw_fence semaphore;
-	struct irq_work semaphore_work;
 
 	/*
 	 * A list of everyone we wait upon, and everyone who waits upon us.
@@ -300,6 +306,8 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence)
 	return fence->ops == &i915_fence_ops;
 }
 
+struct kmem_cache *i915_request_slab_cache(void);
+
 struct i915_request * __must_check
 __i915_request_create(struct intel_context *ce, gfp_t gfp);
 struct i915_request * __must_check
@@ -388,6 +396,12 @@ static inline bool i915_request_in_priority_queue(const struct i915_request *rq)
 	return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 }
 
+static inline bool
+i915_request_has_initial_breadcrumb(const struct i915_request *rq)
+{
+	return test_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
+}
+
 /**
  * Returns true if seq1 is later than seq2.
  */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index f0a9e8958ca0..f4ea318781f0 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -51,11 +51,11 @@ static void assert_priolists(struct intel_engine_execlists * const execlists)
 	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
 		   rb_first(&execlists->queue.rb_root));
 
-	last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
+	last_prio = INT_MAX;
 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
 		const struct i915_priolist *p = to_priolist(rb);
 
-		GEM_BUG_ON(p->priority >= last_prio);
+		GEM_BUG_ON(p->priority > last_prio);
 		last_prio = p->priority;
 
 		GEM_BUG_ON(!p->used);
@@ -174,7 +174,7 @@ sched_lock_engine(const struct i915_sched_node *node,
 
 static inline int rq_prio(const struct i915_request *rq)
 {
-	return rq->sched.attr.priority | __NO_PREEMPTION;
+	return rq->sched.attr.priority;
 }
 
 static inline bool need_preempt(int prio, int active)
@@ -209,6 +209,12 @@ static void kick_submission(struct intel_engine_cs *engine,
 	if (!inflight)
 		goto unlock;
 
+	ENGINE_TRACE(engine,
+		     "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",
+		     prio,
+		     rq->fence.context, rq->fence.seqno,
+		     inflight->fence.context, inflight->fence.seqno,
+		     inflight->sched.attr.priority);
 	engine->execlists.queue_priority_hint = prio;
 
 	/*
@@ -428,25 +434,12 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 		dep->waiter = node;
 		dep->flags = flags;
 
-		/* Keep track of whether anyone on this chain has a semaphore */
-		if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
-		    !node_started(signal))
-			node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
-
 		/* All set, now publish. Beware the lockless walkers. */
 		list_add_rcu(&dep->signal_link, &node->signalers_list);
 		list_add_rcu(&dep->wait_link, &signal->waiters_list);
 
-		/*
-		 * As we do not allow WAIT to preempt inflight requests,
-		 * once we have executed a request, along with triggering
-		 * any execution callbacks, we must preserve its ordering
-		 * within the non-preemptible FIFO.
-		 */
-		BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
-		if (flags & I915_DEPENDENCY_EXTERNAL)
-			__bump_priority(signal, __NO_PREEMPTION);
-
+		/* Propagate the chains */
+		node->flags |= signal->flags;
 		ret = true;
 	}
 
@@ -465,10 +458,14 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
 	if (!dep)
 		return -ENOMEM;
 
+	local_bh_disable();
+
 	if (!__i915_sched_node_add_dependency(node, signal, dep,
 					      flags | I915_DEPENDENCY_ALLOC))
 		i915_dependency_free(dep);
 
+	local_bh_enable(); /* kick submission tasklet */
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 7186875088a0..f72e6c397b08 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -65,7 +65,7 @@ struct i915_sched_node {
 	struct list_head link;
 	struct i915_sched_attr attr;
 	unsigned int flags;
-#define I915_SCHED_HAS_SEMAPHORE_CHAIN	BIT(0)
+#define I915_SCHED_HAS_EXTERNAL_CHAIN	BIT(0)
 	intel_engine_mask_t semaphores;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h
index 98bcb6fa0ab4..d53d207ab6eb 100644
--- a/drivers/gpu/drm/i915/i915_selftest.h
+++ b/drivers/gpu/drm/i915/i915_selftest.h
@@ -133,4 +133,6 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...);
 #define igt_timeout(t, fmt, ...) \
 	__igt_timeout((t), KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 
+void igt_hexdump(const void *buf, size_t len);
+
 #endif /* !__I915_SELFTEST_H__ */
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a3d38e089b6e..295b9829e2da 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -421,7 +421,7 @@ static void timer_i915_sw_fence_wake(struct timer_list *t)
 	if (!fence)
 		return;
 
-	pr_notice("Asynchronous wait on fence %s:%s:%llx timed out (hint:%pS)\n",
+	pr_notice("Asynchronous wait on fence %s:%s:%llx timed out (hint:%ps)\n",
 		  cb->dma->ops->get_driver_name(cb->dma),
 		  cb->dma->ops->get_timeline_name(cb->dma),
 		  cb->dma->seqno,
@@ -546,13 +546,11 @@ int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	cb->fence = fence;
 	i915_sw_fence_await(fence);
 
-	ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
-	if (ret == 0) {
-		ret = 1;
-	} else {
+	ret = 1;
+	if (dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake)) {
+		/* fence already signaled */
 		__dma_i915_sw_fence_wake(dma, &cb->base);
-		if (ret == -ENOENT) /* fence already signaled */
-			ret = 0;
+		ret = 0;
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c
index 997b2998f1f2..a3a81bb8f2c3 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence_work.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c
@@ -38,7 +38,10 @@ fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 
 		if (!f->dma.error) {
 			dma_fence_get(&f->dma);
-			queue_work(system_unbound_wq, &f->work);
+			if (test_bit(DMA_FENCE_WORK_IMM, &f->dma.flags))
+				fence_work(&f->work);
+			else
+				queue_work(system_unbound_wq, &f->work);
 		} else {
 			fence_complete(f);
 		}
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.h b/drivers/gpu/drm/i915/i915_sw_fence_work.h
index 3a22b287e201..2c409f11c5c5 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence_work.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence_work.h
@@ -32,6 +32,10 @@ struct dma_fence_work {
 	const struct dma_fence_work_ops *ops;
 };
 
+enum {
+	DMA_FENCE_WORK_IMM = DMA_FENCE_FLAG_USER_BITS,
+};
+
 void dma_fence_work_init(struct dma_fence_work *f,
 			 const struct dma_fence_work_ops *ops);
 int dma_fence_work_chain(struct dma_fence_work *f, struct dma_fence *signal);
@@ -41,4 +45,23 @@ static inline void dma_fence_work_commit(struct dma_fence_work *f)
 	i915_sw_fence_commit(&f->chain);
 }
 
+/**
+ * dma_fence_work_commit_imm: Commit the fence, and if possible execute locally.
+ * @f: the fenced worker
+ *
+ * Instead of always scheduling a worker to execute the callback (see
+ * dma_fence_work_commit()), we try to execute the callback immediately in
+ * the local context. It is required that the fence be committed before it
+ * is published, and that no other threads try to tamper with the number
+ * of asynchronous waits on the fence (or else the callback will be
+ * executed in the wrong context, i.e. not the callers).
+ */
+static inline void dma_fence_work_commit_imm(struct dma_fence_work *f)
+{
+	if (atomic_read(&f->chain.pending) <= 1)
+		__set_bit(DMA_FENCE_WORK_IMM, &f->dma.flags);
+
+	dma_fence_work_commit(f);
+}
+
 #endif /* I915_SW_FENCE_WORK_H */
diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c
index ed69b5d4a375..b3a24eac21f1 100644
--- a/drivers/gpu/drm/i915/i915_switcheroo.c
+++ b/drivers/gpu/drm/i915/i915_switcheroo.c
@@ -20,14 +20,14 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev,
 	}
 
 	if (state == VGA_SWITCHEROO_ON) {
-		pr_info("switched on\n");
+		drm_info(&i915->drm, "switched on\n");
 		i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		/* i915 resume handler doesn't set to D0 */
 		pci_set_power_state(pdev, PCI_D0);
 		i915_resume_switcheroo(i915);
 		i915->drm.switch_power_state = DRM_SWITCH_POWER_ON;
 	} else {
-		pr_info("switched off\n");
+		drm_info(&i915->drm, "switched off\n");
 		i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		i915_suspend_switcheroo(i915, pmm);
 		i915->drm.switch_power_state = DRM_SWITCH_POWER_OFF;
diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c
index 029854ae65fc..e28eae4a8f70 100644
--- a/drivers/gpu/drm/i915/i915_utils.c
+++ b/drivers/gpu/drm/i915/i915_utils.c
@@ -101,5 +101,6 @@ void set_timer_ms(struct timer_list *t, unsigned long timeout)
 	 */
 	barrier();
 
-	mod_timer(t, jiffies + timeout);
+	/* Keep t->expires = 0 reserved to indicate a canceled timer. */
+	mod_timer(t, jiffies + timeout ?: 1);
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 2cd7a7e87c0a..fc14ebf9a0b7 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -522,7 +522,6 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags)
 	GEM_BUG_ON(!obj);
 
 	i915_vma_unpin(vma);
-	i915_vma_close(vma);
 
 	if (flags & I915_VMA_RELEASE_MAP)
 		i915_gem_object_unpin_map(obj);
@@ -610,18 +609,6 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
 	return true;
 }
 
-static void assert_bind_count(const struct drm_i915_gem_object *obj)
-{
-	/*
-	 * Combine the assertion that the object is bound and that we have
-	 * pinned its pages. But we should never have bound the object
-	 * more than we have pinned its pages. (For complete accuracy, we
-	 * assume that no else is pinning the pages, but as a rough assertion
-	 * that we will not run into problems later, this will do!)
-	 */
-	GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < atomic_read(&obj->bind_count));
-}
-
 /**
  * i915_vma_insert - finds a slot for the vma in its address space
  * @vma: the vma
@@ -740,12 +727,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
-	if (vma->obj) {
-		struct drm_i915_gem_object *obj = vma->obj;
-
-		atomic_inc(&obj->bind_count);
-		assert_bind_count(obj);
-	}
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
 
 	return 0;
@@ -763,12 +744,6 @@ i915_vma_detach(struct i915_vma *vma)
 	 * it to be reaped by the shrinker.
 	 */
 	list_del(&vma->vm_link);
-	if (vma->obj) {
-		struct drm_i915_gem_object *obj = vma->obj;
-
-		assert_bind_count(obj);
-		atomic_dec(&obj->bind_count);
-	}
 }
 
 static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
@@ -915,11 +890,30 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	if (flags & PIN_GLOBAL)
 		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
 
-	/* No more allocations allowed once we hold vm->mutex */
-	err = mutex_lock_interruptible(&vma->vm->mutex);
+	/*
+	 * Differentiate between user/kernel vma inside the aliasing-ppgtt.
+	 *
+	 * We conflate the Global GTT with the user's vma when using the
+	 * aliasing-ppgtt, but it is still vitally important to try and
+	 * keep the use cases distinct. For example, userptr objects are
+	 * not allowed inside the Global GTT as that will cause lock
+	 * inversions when we have to evict them the mmu_notifier callbacks -
+	 * but they are allowed to be part of the user ppGTT which can never
+	 * be mapped. As such we try to give the distinct users of the same
+	 * mutex, distinct lockclasses [equivalent to how we keep i915_ggtt
+	 * and i915_ppgtt separate].
+	 *
+	 * NB this may cause us to mask real lock inversions -- while the
+	 * code is safe today, lockdep may not be able to spot future
+	 * transgressions.
+	 */
+	err = mutex_lock_interruptible_nested(&vma->vm->mutex,
+					      !(flags & PIN_GLOBAL));
 	if (err)
 		goto err_fence;
 
+	/* No more allocations allowed now we hold vm->mutex */
+
 	if (unlikely(i915_vma_is_closed(vma))) {
 		err = -ENOENT;
 		goto err_unlock;
@@ -982,7 +976,7 @@ err_unlock:
 	mutex_unlock(&vma->vm->mutex);
 err_fence:
 	if (work)
-		dma_fence_work_commit(&work->base);
+		dma_fence_work_commit_imm(&work->base);
 	if (wakeref)
 		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
 err_pages:
@@ -1028,13 +1022,8 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
 	} while (1);
 }
 
-void i915_vma_close(struct i915_vma *vma)
+static void __vma_close(struct i915_vma *vma, struct intel_gt *gt)
 {
-	struct intel_gt *gt = vma->vm->gt;
-	unsigned long flags;
-
-	GEM_BUG_ON(i915_vma_is_closed(vma));
-
 	/*
 	 * We defer actually closing, unbinding and destroying the VMA until
 	 * the next idle point, or if the object is freed in the meantime. By
@@ -1047,9 +1036,25 @@ void i915_vma_close(struct i915_vma *vma)
 	 * causing us to rebind the VMA once more. This ends up being a lot
 	 * of wasted work for the steady state.
 	 */
-	spin_lock_irqsave(&gt->closed_lock, flags);
+	GEM_BUG_ON(i915_vma_is_closed(vma));
 	list_add(&vma->closed_link, &gt->closed_vma);
-	spin_unlock_irqrestore(&gt->closed_lock, flags);
+}
+
+void i915_vma_close(struct i915_vma *vma)
+{
+	struct intel_gt *gt = vma->vm->gt;
+	unsigned long flags;
+
+	if (i915_vma_is_ggtt(vma))
+		return;
+
+	GEM_BUG_ON(!atomic_read(&vma->open_count));
+	if (atomic_dec_and_lock_irqsave(&vma->open_count,
+					&gt->closed_lock,
+					flags)) {
+		__vma_close(vma, gt);
+		spin_unlock_irqrestore(&gt->closed_lock, flags);
+	}
 }
 
 static void __i915_vma_remove_closed(struct i915_vma *vma)
@@ -1174,7 +1179,8 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
 	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 
 	/* Wait for the vma to be bound before we start! */
-	err = i915_request_await_active(rq, &vma->active, 0);
+	err = i915_request_await_active(rq, &vma->active,
+					I915_ACTIVE_AWAIT_EXCL);
 	if (err)
 		return err;
 
@@ -1215,6 +1221,10 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 		dma_resv_add_shared_fence(vma->resv, &rq->fence);
 		obj->write_domain = 0;
 	}
+
+	if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
+		i915_active_add_request(&vma->fence->active, rq);
+
 	obj->read_domains |= I915_GEM_GPU_DOMAINS;
 	obj->mm.dirty = true;
 
@@ -1249,6 +1259,9 @@ int __i915_vma_unbind(struct i915_vma *vma)
 	GEM_BUG_ON(i915_vma_is_active(vma));
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
+		/* Force a pagefault for domain tracking on next user access */
+		i915_vma_revoke_mmap(vma);
+
 		/*
 		 * Check that we have flushed all writes through the GGTT
 		 * before the unbind, other due to non-strict nature of those
@@ -1265,12 +1278,7 @@ int __i915_vma_unbind(struct i915_vma *vma)
 		i915_vma_flush_writes(vma);
 
 		/* release the fence reg _after_ flushing */
-		ret = i915_vma_revoke_fence(vma);
-		if (ret)
-			return ret;
-
-		/* Force a pagefault for domain tracking on next user access */
-		i915_vma_revoke_mmap(vma);
+		i915_vma_revoke_fence(vma);
 
 		__i915_vma_iounmap(vma);
 		clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
@@ -1315,7 +1323,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		/* XXX not always required: nop_clear_range */
 		wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
 
-	err = mutex_lock_interruptible(&vm->mutex);
+	err = mutex_lock_interruptible_nested(&vma->vm->mutex, !wakeref);
 	if (err)
 		goto out_rpm;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index e1ced1df13e1..8ad1daabcd58 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -30,10 +30,10 @@
 
 #include <drm/drm_mm.h>
 
+#include "gt/intel_ggtt_fencing.h"
 #include "gem/i915_gem_object.h"
 
 #include "i915_gem_gtt.h"
-#include "i915_gem_fence_reg.h"
 
 #include "i915_active.h"
 #include "i915_request.h"
@@ -326,7 +326,7 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
  * True if the vma has a fence, false otherwise.
  */
 int __must_check i915_vma_pin_fence(struct i915_vma *vma);
-int __must_check i915_vma_revoke_fence(struct i915_vma *vma);
+void i915_vma_revoke_fence(struct i915_vma *vma);
 
 int __i915_vma_pin_fence(struct i915_vma *vma);
 
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index d7fe12734db8..8a635bd4d5d8 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -98,6 +98,7 @@ void intel_device_info_print_static(const struct intel_device_info *info,
 	drm_printf(p, "platform: %s\n", intel_platform_name(info->platform));
 	drm_printf(p, "ppgtt-size: %d\n", info->ppgtt_size);
 	drm_printf(p, "ppgtt-type: %d\n", info->ppgtt_type);
+	drm_printf(p, "dma_mask_size: %u\n", info->dma_mask_size);
 
 #define PRINT_FLAG(name) drm_printf(p, "%s: %s\n", #name, yesno(info->name));
 	DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG);
@@ -135,8 +136,8 @@ void intel_device_info_print_runtime(const struct intel_runtime_info *info,
 	sseu_dump(&info->sseu, p);
 
 	drm_printf(p, "rawclk rate: %u kHz\n", info->rawclk_freq);
-	drm_printf(p, "CS timestamp frequency: %u kHz\n",
-		   info->cs_timestamp_frequency_khz);
+	drm_printf(p, "CS timestamp frequency: %u Hz\n",
+		   info->cs_timestamp_frequency_hz);
 }
 
 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
@@ -677,12 +678,12 @@ static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv)
 
 	base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
 		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1;
-	base_freq *= 1000;
+	base_freq *= 1000000;
 
 	frac_freq = ((ts_override &
 		      GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
 		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
-	frac_freq = 1000 / (frac_freq + 1);
+	frac_freq = 1000000 / (frac_freq + 1);
 
 	return base_freq + frac_freq;
 }
@@ -690,8 +691,8 @@ static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv)
 static u32 gen10_get_crystal_clock_freq(struct drm_i915_private *dev_priv,
 					u32 rpm_config_reg)
 {
-	u32 f19_2_mhz = 19200;
-	u32 f24_mhz = 24000;
+	u32 f19_2_mhz = 19200000;
+	u32 f24_mhz = 24000000;
 	u32 crystal_clock = (rpm_config_reg &
 			     GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
 			    GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
@@ -710,10 +711,10 @@ static u32 gen10_get_crystal_clock_freq(struct drm_i915_private *dev_priv,
 static u32 gen11_get_crystal_clock_freq(struct drm_i915_private *dev_priv,
 					u32 rpm_config_reg)
 {
-	u32 f19_2_mhz = 19200;
-	u32 f24_mhz = 24000;
-	u32 f25_mhz = 25000;
-	u32 f38_4_mhz = 38400;
+	u32 f19_2_mhz = 19200000;
+	u32 f24_mhz = 24000000;
+	u32 f25_mhz = 25000000;
+	u32 f38_4_mhz = 38400000;
 	u32 crystal_clock = (rpm_config_reg &
 			     GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
 			    GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
@@ -735,9 +736,9 @@ static u32 gen11_get_crystal_clock_freq(struct drm_i915_private *dev_priv,
 
 static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv)
 {
-	u32 f12_5_mhz = 12500;
-	u32 f19_2_mhz = 19200;
-	u32 f24_mhz = 24000;
+	u32 f12_5_mhz = 12500000;
+	u32 f19_2_mhz = 19200000;
+	u32 f24_mhz = 24000000;
 
 	if (INTEL_GEN(dev_priv) <= 4) {
 		/* PRMs say:
@@ -746,7 +747,7 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv)
 		 *      hclks." (through the “Clocking Configuration”
 		 *      (“CLKCFG”) MCHBAR register)
 		 */
-		return RUNTIME_INFO(dev_priv)->rawclk_freq / 16;
+		return RUNTIME_INFO(dev_priv)->rawclk_freq * 1000 / 16;
 	} else if (INTEL_GEN(dev_priv) <= 8) {
 		/* PRMs say:
 		 *
@@ -980,35 +981,32 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 			drm_info(&dev_priv->drm,
 				 "Display fused off, disabling\n");
 			info->pipe_mask = 0;
+			info->cpu_transcoder_mask = 0;
 		} else if (fuse_strap & IVB_PIPE_C_DISABLE) {
 			drm_info(&dev_priv->drm, "PipeC fused off\n");
 			info->pipe_mask &= ~BIT(PIPE_C);
+			info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C);
 		}
 	} else if (HAS_DISPLAY(dev_priv) && INTEL_GEN(dev_priv) >= 9) {
 		u32 dfsm = I915_READ(SKL_DFSM);
-		u8 enabled_mask = info->pipe_mask;
-
-		if (dfsm & SKL_DFSM_PIPE_A_DISABLE)
-			enabled_mask &= ~BIT(PIPE_A);
-		if (dfsm & SKL_DFSM_PIPE_B_DISABLE)
-			enabled_mask &= ~BIT(PIPE_B);
-		if (dfsm & SKL_DFSM_PIPE_C_DISABLE)
-			enabled_mask &= ~BIT(PIPE_C);
-		if (INTEL_GEN(dev_priv) >= 12 &&
-		    (dfsm & TGL_DFSM_PIPE_D_DISABLE))
-			enabled_mask &= ~BIT(PIPE_D);
 
-		/*
-		 * At least one pipe should be enabled and if there are
-		 * disabled pipes, they should be the last ones, with no holes
-		 * in the mask.
-		 */
-		if (enabled_mask == 0 || !is_power_of_2(enabled_mask + 1))
-			drm_err(&dev_priv->drm,
-				"invalid pipe fuse configuration: enabled_mask=0x%x\n",
-				enabled_mask);
-		else
-			info->pipe_mask = enabled_mask;
+		if (dfsm & SKL_DFSM_PIPE_A_DISABLE) {
+			info->pipe_mask &= ~BIT(PIPE_A);
+			info->cpu_transcoder_mask &= ~BIT(TRANSCODER_A);
+		}
+		if (dfsm & SKL_DFSM_PIPE_B_DISABLE) {
+			info->pipe_mask &= ~BIT(PIPE_B);
+			info->cpu_transcoder_mask &= ~BIT(TRANSCODER_B);
+		}
+		if (dfsm & SKL_DFSM_PIPE_C_DISABLE) {
+			info->pipe_mask &= ~BIT(PIPE_C);
+			info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C);
+		}
+		if (INTEL_GEN(dev_priv) >= 12 &&
+		    (dfsm & TGL_DFSM_PIPE_D_DISABLE)) {
+			info->pipe_mask &= ~BIT(PIPE_D);
+			info->cpu_transcoder_mask &= ~BIT(TRANSCODER_D);
+		}
 
 		if (dfsm & SKL_DFSM_DISPLAY_HDCP_DISABLE)
 			info->display.has_hdcp = 0;
@@ -1050,11 +1048,11 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 	drm_dbg(&dev_priv->drm, "rawclk rate: %d kHz\n", runtime->rawclk_freq);
 
 	/* Initialize command stream timestamp frequency */
-	runtime->cs_timestamp_frequency_khz =
+	runtime->cs_timestamp_frequency_hz =
 		read_timestamp_frequency(dev_priv);
-	if (runtime->cs_timestamp_frequency_khz) {
+	if (runtime->cs_timestamp_frequency_hz) {
 		runtime->cs_timestamp_period_ns =
-			div_u64(1e6, runtime->cs_timestamp_frequency_khz);
+			i915_cs_timestamp_ticks_to_ns(dev_priv, 1);
 		drm_dbg(&dev_priv->drm,
 			"CS timestamp wraparound in %lldms\n",
 			div_u64(mul_u32_u32(runtime->cs_timestamp_period_ns,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 1ecb9df2de91..62e03ffa377e 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -158,6 +158,8 @@ struct intel_device_info {
 
 	enum intel_platform platform;
 
+	unsigned int dma_mask_size; /* available DMA address bits */
+
 	enum intel_ppgtt_type ppgtt_type;
 	unsigned int ppgtt_size; /* log2, e.g. 31/32/48 bits */
 
@@ -168,6 +170,7 @@ struct intel_device_info {
 	u32 display_mmio_offset;
 
 	u8 pipe_mask;
+	u8 cpu_transcoder_mask;
 
 #define DEFINE_FLAG(name) u8 name:1
 	DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
@@ -218,7 +221,7 @@ struct intel_runtime_info {
 
 	u32 rawclk_freq;
 
-	u32 cs_timestamp_frequency_khz;
+	u32 cs_timestamp_frequency_hz;
 	u32 cs_timestamp_period_ns;
 
 	/* Media engine access to SFC per instance */
diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c
index 6b922efb1d7c..8aa12cad93ce 100644
--- a/drivers/gpu/drm/i915/intel_dram.c
+++ b/drivers/gpu/drm/i915/intel_dram.c
@@ -495,6 +495,5 @@ void intel_dram_edram_detect(struct drm_i915_private *i915)
 	else
 		i915->edram_size_mb = gen9_edram_size_mb(i915, edram_cap);
 
-	dev_info(i915->drm.dev,
-		 "Found %uMB of eDRAM\n", i915->edram_size_mb);
+	drm_info(&i915->drm, "Found %uMB of eDRAM\n", i915->edram_size_mb);
 }
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a52986a9e7a6..696491d71a1d 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -43,6 +43,7 @@
 #include "i915_fixed.h"
 #include "i915_irq.h"
 #include "i915_trace.h"
+#include "display/intel_bw.h"
 #include "intel_pm.h"
 #include "intel_sideband.h"
 #include "../../../platform/x86/intel_ips.h"
@@ -3637,10 +3638,6 @@ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)
 static bool
 intel_has_sagv(struct drm_i915_private *dev_priv)
 {
-	/* HACK! */
-	if (IS_GEN(dev_priv, 12))
-		return false;
-
 	return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) &&
 		dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;
 }
@@ -3757,42 +3754,120 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-bool intel_can_enable_sagv(struct intel_atomic_state *state)
+void intel_sagv_pre_plane_update(struct intel_atomic_state *state)
 {
-	struct drm_device *dev = state->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *crtc;
-	struct intel_plane *plane;
-	struct intel_crtc_state *crtc_state;
-	enum pipe pipe;
-	int level, latency;
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	const struct intel_bw_state *new_bw_state;
+	const struct intel_bw_state *old_bw_state;
+	u32 new_mask = 0;
 
+	/*
+	 * Just return if we can't control SAGV or don't have it.
+	 * This is different from situation when we have SAGV but just can't
+	 * afford it due to DBuf limitation - in case if SAGV is completely
+	 * disabled in a BIOS, we are not even allowed to send a PCode request,
+	 * as it will throw an error. So have to check it here.
+	 */
 	if (!intel_has_sagv(dev_priv))
-		return false;
+		return;
+
+	new_bw_state = intel_atomic_get_new_bw_state(state);
+	if (!new_bw_state)
+		return;
 
+	if (INTEL_GEN(dev_priv) < 11 && !intel_can_enable_sagv(dev_priv, new_bw_state)) {
+		intel_disable_sagv(dev_priv);
+		return;
+	}
+
+	old_bw_state = intel_atomic_get_old_bw_state(state);
 	/*
-	 * If there are no active CRTCs, no additional checks need be performed
+	 * Nothing to mask
 	 */
-	if (hweight8(state->active_pipes) == 0)
-		return true;
+	if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask)
+		return;
+
+	new_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask;
+
+	/*
+	 * If new mask is zero - means there is nothing to mask,
+	 * we can only unmask, which should be done in unmask.
+	 */
+	if (!new_mask)
+		return;
+
+	/*
+	 * Restrict required qgv points before updating the configuration.
+	 * According to BSpec we can't mask and unmask qgv points at the same
+	 * time. Also masking should be done before updating the configuration
+	 * and unmasking afterwards.
+	 */
+	icl_pcode_restrict_qgv_points(dev_priv, new_mask);
+}
+
+void intel_sagv_post_plane_update(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	const struct intel_bw_state *new_bw_state;
+	const struct intel_bw_state *old_bw_state;
+	u32 new_mask = 0;
+
+	/*
+	 * Just return if we can't control SAGV or don't have it.
+	 * This is different from situation when we have SAGV but just can't
+	 * afford it due to DBuf limitation - in case if SAGV is completely
+	 * disabled in a BIOS, we are not even allowed to send a PCode request,
+	 * as it will throw an error. So have to check it here.
+	 */
+	if (!intel_has_sagv(dev_priv))
+		return;
+
+	new_bw_state = intel_atomic_get_new_bw_state(state);
+	if (!new_bw_state)
+		return;
+
+	if (INTEL_GEN(dev_priv) < 11 && intel_can_enable_sagv(dev_priv, new_bw_state)) {
+		intel_enable_sagv(dev_priv);
+		return;
+	}
+
+	old_bw_state = intel_atomic_get_old_bw_state(state);
+	/*
+	 * Nothing to unmask
+	 */
+	if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask)
+		return;
+
+	new_mask = new_bw_state->qgv_points_mask;
 
 	/*
-	 * SKL+ workaround: bspec recommends we disable SAGV when we have
-	 * more then one pipe enabled
+	 * Allow required qgv points after updating the configuration.
+	 * According to BSpec we can't mask and unmask qgv points at the same
+	 * time. Also masking should be done before updating the configuration
+	 * and unmasking afterwards.
 	 */
-	if (hweight8(state->active_pipes) > 1)
+	icl_pcode_restrict_qgv_points(dev_priv, new_mask);
+}
+
+static bool skl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_plane *plane;
+	const struct intel_plane_state *plane_state;
+	int level, latency;
+
+	if (!intel_has_sagv(dev_priv))
 		return false;
 
-	/* Since we're now guaranteed to only have one active CRTC... */
-	pipe = ffs(state->active_pipes) - 1;
-	crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
-	crtc_state = to_intel_crtc_state(crtc->base.state);
+	if (!crtc_state->hw.active)
+		return true;
 
 	if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
 		return false;
 
-	for_each_intel_plane_on_crtc(dev, crtc, plane) {
-		struct skl_plane_wm *wm =
+	intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) {
+		const struct skl_plane_wm *wm =
 			&crtc_state->wm.skl.optimal.planes[plane->id];
 
 		/* Skip this plane if it's not enabled */
@@ -3807,7 +3882,7 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state)
 		latency = dev_priv->wm.skl_latency[level];
 
 		if (skl_needs_memory_bw_wa(dev_priv) &&
-		    plane->base.state->fb->modifier ==
+		    plane_state->uapi.fb->modifier ==
 		    I915_FORMAT_MOD_X_TILED)
 			latency += 15;
 
@@ -3823,6 +3898,112 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state)
 	return true;
 }
 
+static bool tgl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	enum plane_id plane_id;
+
+	if (!crtc_state->hw.active)
+		return true;
+
+	for_each_plane_id_on_crtc(crtc, plane_id) {
+		const struct skl_ddb_entry *plane_alloc =
+			&crtc_state->wm.skl.plane_ddb_y[plane_id];
+		const struct skl_plane_wm *wm =
+			&crtc_state->wm.skl.optimal.planes[plane_id];
+
+		if (skl_ddb_entry_size(plane_alloc) < wm->sagv_wm0.min_ddb_alloc)
+			return false;
+	}
+
+	return true;
+}
+
+static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	if (INTEL_GEN(dev_priv) >= 12)
+		return tgl_crtc_can_enable_sagv(crtc_state);
+	else
+		return skl_crtc_can_enable_sagv(crtc_state);
+}
+
+bool intel_can_enable_sagv(struct drm_i915_private *dev_priv,
+			   const struct intel_bw_state *bw_state)
+{
+	if (INTEL_GEN(dev_priv) < 11 &&
+	    bw_state->active_pipes && !is_power_of_2(bw_state->active_pipes))
+		return false;
+
+	return bw_state->pipe_sagv_reject == 0;
+}
+
+static int intel_compute_sagv_mask(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	int ret;
+	struct intel_crtc *crtc;
+	struct intel_crtc_state *new_crtc_state;
+	struct intel_bw_state *new_bw_state = NULL;
+	const struct intel_bw_state *old_bw_state = NULL;
+	int i;
+
+	for_each_new_intel_crtc_in_state(state, crtc,
+					 new_crtc_state, i) {
+		new_bw_state = intel_atomic_get_bw_state(state);
+		if (IS_ERR(new_bw_state))
+			return PTR_ERR(new_bw_state);
+
+		old_bw_state = intel_atomic_get_old_bw_state(state);
+
+		if (intel_crtc_can_enable_sagv(new_crtc_state))
+			new_bw_state->pipe_sagv_reject &= ~BIT(crtc->pipe);
+		else
+			new_bw_state->pipe_sagv_reject |= BIT(crtc->pipe);
+	}
+
+	if (!new_bw_state)
+		return 0;
+
+	new_bw_state->active_pipes =
+		intel_calc_active_pipes(state, old_bw_state->active_pipes);
+
+	if (new_bw_state->active_pipes != old_bw_state->active_pipes) {
+		ret = intel_atomic_lock_global_state(&new_bw_state->base);
+		if (ret)
+			return ret;
+	}
+
+	for_each_new_intel_crtc_in_state(state, crtc,
+					 new_crtc_state, i) {
+		struct skl_pipe_wm *pipe_wm = &new_crtc_state->wm.skl.optimal;
+
+		/*
+		 * We store use_sagv_wm in the crtc state rather than relying on
+		 * that bw state since we have no convenient way to get at the
+		 * latter from the plane commit hooks (especially in the legacy
+		 * cursor case)
+		 */
+		pipe_wm->use_sagv_wm = INTEL_GEN(dev_priv) >= 12 &&
+				       intel_can_enable_sagv(dev_priv, new_bw_state);
+	}
+
+	if (intel_can_enable_sagv(dev_priv, new_bw_state) !=
+	    intel_can_enable_sagv(dev_priv, old_bw_state)) {
+		ret = intel_atomic_serialize_global_state(&new_bw_state->base);
+		if (ret)
+			return ret;
+	} else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) {
+		ret = intel_atomic_lock_global_state(&new_bw_state->base);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /*
  * Calculate initial DBuf slice offset, based on slice size
  * and mask(i.e if slice size is 1024 and second slice is enabled
@@ -4016,6 +4197,7 @@ static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
 				 int color_plane);
 static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
 				 int level,
+				 unsigned int latency,
 				 const struct skl_wm_params *wp,
 				 const struct skl_wm_level *result_prev,
 				 struct skl_wm_level *result /* out */);
@@ -4038,7 +4220,9 @@ skl_cursor_allocation(const struct intel_crtc_state *crtc_state,
 	drm_WARN_ON(&dev_priv->drm, ret);
 
 	for (level = 0; level <= max_level; level++) {
-		skl_compute_plane_wm(crtc_state, level, &wp, &wm, &wm);
+		unsigned int latency = dev_priv->wm.skl_latency[level];
+
+		skl_compute_plane_wm(crtc_state, level, latency, &wp, &wm, &wm);
 		if (wm.min_ddb_alloc == U16_MAX)
 			break;
 
@@ -4544,6 +4728,20 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
 	return total_data_rate;
 }
 
+static const struct skl_wm_level *
+skl_plane_wm_level(const struct intel_crtc_state *crtc_state,
+		   enum plane_id plane_id,
+		   int level)
+{
+	const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal;
+	const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id];
+
+	if (level == 0 && pipe_wm->use_sagv_wm)
+		return &wm->sagv_wm0;
+
+	return &wm->wm[level];
+}
+
 static int
 skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state)
 {
@@ -4580,7 +4778,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state)
 							 plane_data_rate,
 							 uv_plane_data_rate);
 
-
 	skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state, total_data_rate,
 					   alloc, &num_active);
 	alloc_size = skl_ddb_entry_size(alloc);
@@ -4780,7 +4977,7 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate,
 	wm_intermediate_val = latency * pixel_rate * cpp;
 	ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
 
-	if (INTEL_GEN(dev_priv) >= 10)
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
 		ret = add_fixed16_u32(ret, 1);
 
 	return ret;
@@ -4915,18 +5112,19 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
 					   wp->y_min_scanlines,
 					   wp->dbuf_block_size);
 
-		if (INTEL_GEN(dev_priv) >= 10)
+		if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
 			interm_pbpl++;
 
 		wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
 							wp->y_min_scanlines);
-	} else if (wp->x_tiled && IS_GEN(dev_priv, 9)) {
-		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
-					   wp->dbuf_block_size);
-		wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
 	} else {
 		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
-					   wp->dbuf_block_size) + 1;
+					   wp->dbuf_block_size);
+
+		if (!wp->x_tiled ||
+		    INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+			interm_pbpl++;
+
 		wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
 	}
 
@@ -4972,12 +5170,12 @@ static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
 
 static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
 				 int level,
+				 unsigned int latency,
 				 const struct skl_wm_params *wp,
 				 const struct skl_wm_level *result_prev,
 				 struct skl_wm_level *result /* out */)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
-	u32 latency = dev_priv->wm.skl_latency[level];
 	uint_fixed_16_16_t method1, method2;
 	uint_fixed_16_16_t selected_result;
 	u32 res_blocks, res_lines, min_ddb_alloc = 0;
@@ -5106,14 +5304,29 @@ skl_compute_wm_levels(const struct intel_crtc_state *crtc_state,
 
 	for (level = 0; level <= max_level; level++) {
 		struct skl_wm_level *result = &levels[level];
+		unsigned int latency = dev_priv->wm.skl_latency[level];
 
-		skl_compute_plane_wm(crtc_state, level, wm_params,
-				     result_prev, result);
+		skl_compute_plane_wm(crtc_state, level, latency,
+				     wm_params, result_prev, result);
 
 		result_prev = result;
 	}
 }
 
+static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state,
+				const struct skl_wm_params *wm_params,
+				struct skl_plane_wm *plane_wm)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct skl_wm_level *sagv_wm = &plane_wm->sagv_wm0;
+	struct skl_wm_level *levels = plane_wm->wm;
+	unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us;
+
+	skl_compute_plane_wm(crtc_state, 0, latency,
+			     wm_params, &levels[0],
+			     sagv_wm);
+}
+
 static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state,
 				      const struct skl_wm_params *wp,
 				      struct skl_plane_wm *wm)
@@ -5166,10 +5379,6 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state,
 				trans_offset_b;
 	} else {
 		res_blocks = wm0_sel_res_b + trans_offset_b;
-
-		/* WA BUG:1938466 add one block for non y-tile planes */
-		if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
-			res_blocks += 1;
 	}
 
 	/*
@@ -5185,6 +5394,8 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
 				     const struct intel_plane_state *plane_state,
 				     enum plane_id plane_id, int color_plane)
 {
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
 	struct skl_wm_params wm_params;
 	int ret;
@@ -5195,6 +5406,10 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
 		return ret;
 
 	skl_compute_wm_levels(crtc_state, &wm_params, wm->wm);
+
+	if (INTEL_GEN(dev_priv) >= 12)
+		tgl_compute_sagv_wm(crtc_state, &wm_params, wm);
+
 	skl_compute_transition_wm(crtc_state, &wm_params, wm);
 
 	return 0;
@@ -5354,8 +5569,12 @@ void skl_write_plane_wm(struct intel_plane *plane,
 		&crtc_state->wm.skl.plane_ddb_uv[plane_id];
 
 	for (level = 0; level <= max_level; level++) {
+		const struct skl_wm_level *wm_level;
+
+		wm_level = skl_plane_wm_level(crtc_state, plane_id, level);
+
 		skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
-				   &wm->wm[level]);
+				   wm_level);
 	}
 	skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
 			   &wm->trans_wm);
@@ -5388,8 +5607,12 @@ void skl_write_cursor_wm(struct intel_plane *plane,
 		&crtc_state->wm.skl.plane_ddb_y[plane_id];
 
 	for (level = 0; level <= max_level; level++) {
+		const struct skl_wm_level *wm_level;
+
+		wm_level = skl_plane_wm_level(crtc_state, plane_id, level);
+
 		skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
-				   &wm->wm[level]);
+				   wm_level);
 	}
 	skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
 
@@ -5424,8 +5647,8 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
 	return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
 }
 
-static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
-					   const struct skl_ddb_entry *b)
+static bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
+				    const struct skl_ddb_entry *b)
 {
 	return a->start < b->end && b->start < a->end;
 }
@@ -5553,23 +5776,25 @@ skl_print_wm_changes(struct intel_atomic_state *state)
 				continue;
 
 			drm_dbg_kms(&dev_priv->drm,
-				    "[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm"
-				    " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n",
+				    "[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm"
+				    " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm\n",
 				    plane->base.base.id, plane->base.name,
 				    enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),
 				    enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),
 				    enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),
 				    enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),
 				    enast(old_wm->trans_wm.plane_en),
+				    enast(old_wm->sagv_wm0.plane_en),
 				    enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),
 				    enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),
 				    enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),
 				    enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en),
-				    enast(new_wm->trans_wm.plane_en));
+				    enast(new_wm->trans_wm.plane_en),
+				    enast(new_wm->sagv_wm0.plane_en));
 
 			drm_dbg_kms(&dev_priv->drm,
-				    "[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
-				      " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",
+				    "[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d"
+				      " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",
 				    plane->base.base.id, plane->base.name,
 				    enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,
 				    enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l,
@@ -5580,6 +5805,7 @@ skl_print_wm_changes(struct intel_atomic_state *state)
 				    enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,
 				    enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,
 				    enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l,
+				    enast(old_wm->sagv_wm0.ignore_lines), old_wm->sagv_wm0.plane_res_l,
 
 				    enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,
 				    enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l,
@@ -5589,37 +5815,42 @@ skl_print_wm_changes(struct intel_atomic_state *state)
 				    enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,
 				    enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,
 				    enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l,
-				    enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l);
+				    enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l,
+				    enast(new_wm->sagv_wm0.ignore_lines), new_wm->sagv_wm0.plane_res_l);
 
 			drm_dbg_kms(&dev_priv->drm,
-				    "[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
-				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
+				    "[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
+				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
 				    plane->base.base.id, plane->base.name,
 				    old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,
 				    old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,
 				    old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,
 				    old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,
 				    old_wm->trans_wm.plane_res_b,
+				    old_wm->sagv_wm0.plane_res_b,
 				    new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,
 				    new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,
 				    new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,
 				    new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b,
-				    new_wm->trans_wm.plane_res_b);
+				    new_wm->trans_wm.plane_res_b,
+				    new_wm->sagv_wm0.plane_res_b);
 
 			drm_dbg_kms(&dev_priv->drm,
-				    "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
-				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
+				    "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d"
+				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",
 				    plane->base.base.id, plane->base.name,
 				    old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,
 				    old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,
 				    old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,
 				    old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,
 				    old_wm->trans_wm.min_ddb_alloc,
+				    old_wm->sagv_wm0.min_ddb_alloc,
 				    new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,
 				    new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,
 				    new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,
 				    new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc,
-				    new_wm->trans_wm.min_ddb_alloc);
+				    new_wm->trans_wm.min_ddb_alloc,
+				    new_wm->sagv_wm0.min_ddb_alloc);
 		}
 	}
 }
@@ -5780,6 +6011,10 @@ skl_compute_wm(struct intel_atomic_state *state)
 	if (ret)
 		return ret;
 
+	ret = intel_compute_sagv_mask(state);
+	if (ret)
+		return ret;
+
 	/*
 	 * skl_compute_ddb() will have adjusted the final watermarks
 	 * based on how much ddb is available. Now we can actually
@@ -5876,8 +6111,7 @@ static void ilk_optimize_watermarks(struct intel_atomic_state *state,
 	mutex_unlock(&dev_priv->wm.wm_mutex);
 }
 
-static inline void skl_wm_level_from_reg_val(u32 val,
-					     struct skl_wm_level *level)
+static void skl_wm_level_from_reg_val(u32 val, struct skl_wm_level *level)
 {
 	level->plane_en = val & PLANE_WM_EN;
 	level->ignore_lines = val & PLANE_WM_IGNORE_LINES;
@@ -5909,6 +6143,9 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
 			skl_wm_level_from_reg_val(val, &wm->wm[level]);
 		}
 
+		if (INTEL_GEN(dev_priv) >= 12)
+			wm->sagv_wm0 = wm->wm[0];
+
 		if (plane_id != PLANE_CURSOR)
 			val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
 		else
@@ -6850,6 +7087,10 @@ static void tgl_init_clock_gating(struct drm_i915_private *dev_priv)
 	if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0))
 		I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) |
 			   TGL_VRH_GATING_DIS);
+
+	/* Wa_14011059788:tgl */
+	intel_uncore_rmw(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN,
+			 0, DFR_DISABLE);
 }
 
 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -6882,9 +7123,6 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
 	val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
 	/* ReadHitWriteOnlyDisable:cnl */
 	val |= RCCUNIT_CLKGATE_DIS;
-	/* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
-	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
-		val |= SARBUNIT_CLKGATE_DIS;
 	I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
 
 	/* Wa_2201832410:cnl */
diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h
index d60a85421c5a..614ac7f8d4cc 100644
--- a/drivers/gpu/drm/i915/intel_pm.h
+++ b/drivers/gpu/drm/i915/intel_pm.h
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 
 #include "i915_reg.h"
+#include "display/intel_bw.h"
 
 struct drm_device;
 struct drm_i915_private;
@@ -41,9 +42,12 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
 			      struct skl_pipe_wm *out);
 void g4x_wm_sanitize(struct drm_i915_private *dev_priv);
 void vlv_wm_sanitize(struct drm_i915_private *dev_priv);
-bool intel_can_enable_sagv(struct intel_atomic_state *state);
+bool intel_can_enable_sagv(struct drm_i915_private *dev_priv,
+			   const struct intel_bw_state *bw_state);
 int intel_enable_sagv(struct drm_i915_private *dev_priv);
 int intel_disable_sagv(struct drm_i915_private *dev_priv);
+void intel_sagv_pre_plane_update(struct intel_atomic_state *state);
+void intel_sagv_post_plane_update(struct intel_atomic_state *state);
 bool skl_wm_level_equals(const struct skl_wm_level *l1,
 			 const struct skl_wm_level *l2);
 bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index ad719c9602af..9cb2d7548daa 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -549,7 +549,7 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
 	 * becaue the HDA driver may require us to enable the audio power
 	 * domain during system suspend.
 	 */
-	dev_pm_set_driver_flags(kdev, DPM_FLAG_NEVER_SKIP);
+	dev_pm_set_driver_flags(kdev, DPM_FLAG_NO_DIRECT_COMPLETE);
 
 	pm_runtime_set_autosuspend_delay(kdev, 10000); /* 10s */
 	pm_runtime_mark_last_busy(kdev);
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 3f13baaef058..916ccd1c0e96 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -336,7 +336,7 @@ void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value,
 	intel_sbi_rw(i915, reg, destination, &value, false);
 }
 
-static inline int gen6_check_mailbox_status(u32 mbox)
+static int gen6_check_mailbox_status(u32 mbox)
 {
 	switch (mbox & GEN6_PCODE_ERROR_MASK) {
 	case GEN6_PCODE_SUCCESS:
@@ -356,7 +356,7 @@ static inline int gen6_check_mailbox_status(u32 mbox)
 	}
 }
 
-static inline int gen7_check_mailbox_status(u32 mbox)
+static int gen7_check_mailbox_status(u32 mbox)
 {
 	switch (mbox & GEN6_PCODE_ERROR_MASK) {
 	case GEN6_PCODE_SUCCESS:
@@ -371,6 +371,8 @@ static inline int gen7_check_mailbox_status(u32 mbox)
 		return -ENXIO;
 	case GEN11_PCODE_LOCKED:
 		return -EBUSY;
+	case GEN11_PCODE_REJECTED:
+		return -EACCES;
 	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
 		return -EOVERFLOW;
 	default:
@@ -429,7 +431,7 @@ int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
 
 	mutex_lock(&i915->sb_lock);
 	err = __sandybridge_pcode_rw(i915, mbox, val, val1,
-				     500, 0,
+				     500, 20,
 				     true);
 	mutex_unlock(&i915->sb_lock);
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index abb18b90d7c3..a61cb8ca4d50 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -665,7 +665,7 @@ void intel_uncore_forcewake_user_put(struct intel_uncore *uncore)
 		mmio_debug_resume(uncore->debug);
 
 		if (check_for_unclaimed_mmio(uncore))
-			dev_info(uncore->i915->drm.dev,
+			drm_info(&uncore->i915->drm,
 				 "Invalid mmio detected during user access\n");
 		spin_unlock(&uncore->debug->lock);
 
@@ -735,6 +735,28 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
 }
 
 /**
+ * intel_uncore_forcewake_flush - flush the delayed release
+ * @uncore: the intel_uncore structure
+ * @fw_domains: forcewake domains to flush
+ */
+void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
+				  enum forcewake_domains fw_domains)
+{
+	struct intel_uncore_forcewake_domain *domain;
+	unsigned int tmp;
+
+	if (!uncore->funcs.force_wake_put)
+		return;
+
+	fw_domains &= uncore->fw_domains;
+	for_each_fw_domain_masked(domain, fw_domains, uncore, tmp) {
+		WRITE_ONCE(domain->active, false);
+		if (hrtimer_cancel(&domain->timer))
+			intel_uncore_fw_release_timer(&domain->timer);
+	}
+}
+
+/**
  * intel_uncore_forcewake_put__locked - grab forcewake domain references
  * @uncore: the intel_uncore structure
  * @fw_domains: forcewake domains to get reference on
@@ -877,11 +899,6 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset)
 #define GEN_FW_RANGE(s, e, d) \
 	{ .start = (s), .end = (e), .domains = (d) }
 
-#define HAS_FWTABLE(dev_priv) \
-	(INTEL_GEN(dev_priv) >= 9 || \
-	 IS_CHERRYVIEW(dev_priv) || \
-	 IS_VALLEYVIEW(dev_priv))
-
 /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
 static const struct intel_forcewake_range __vlv_fw_ranges[] = {
 	GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER),
@@ -1070,8 +1087,7 @@ static const struct intel_forcewake_range __gen9_fw_ranges[] = {
 
 /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
 static const struct intel_forcewake_range __gen11_fw_ranges[] = {
-	GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER),
-	GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */
+	GEN_FW_RANGE(0x0, 0x1fff, 0), /* uncore range */
 	GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER),
 	GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_BLITTER),
 	GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER),
@@ -1081,27 +1097,31 @@ static const struct intel_forcewake_range __gen11_fw_ranges[] = {
 	GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER),
 	GEN_FW_RANGE(0x8160, 0x82ff, FORCEWAKE_BLITTER),
 	GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER),
-	GEN_FW_RANGE(0x8500, 0x8bff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x8500, 0x87ff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x8800, 0x8bff, 0),
 	GEN_FW_RANGE(0x8c00, 0x8cff, FORCEWAKE_RENDER),
-	GEN_FW_RANGE(0x8d00, 0x93ff, FORCEWAKE_BLITTER),
-	GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL),
-	GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x8d00, 0x94cf, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x9560, 0x95ff, 0),
+	GEN_FW_RANGE(0x9600, 0xafff, FORCEWAKE_BLITTER),
 	GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER),
 	GEN_FW_RANGE(0xb480, 0xdeff, FORCEWAKE_BLITTER),
 	GEN_FW_RANGE(0xdf00, 0xe8ff, FORCEWAKE_RENDER),
 	GEN_FW_RANGE(0xe900, 0x16dff, FORCEWAKE_BLITTER),
 	GEN_FW_RANGE(0x16e00, 0x19fff, FORCEWAKE_RENDER),
-	GEN_FW_RANGE(0x1a000, 0x243ff, FORCEWAKE_BLITTER),
-	GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER),
-	GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x1a000, 0x23fff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x24000, 0x2407f, 0),
+	GEN_FW_RANGE(0x24080, 0x2417f, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x24180, 0x242ff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x24300, 0x243ff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x24400, 0x24fff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x25000, 0x3ffff, FORCEWAKE_BLITTER),
 	GEN_FW_RANGE(0x40000, 0x1bffff, 0),
 	GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0),
-	GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1),
-	GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0),
-	GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_BLITTER),
+	GEN_FW_RANGE(0x1c4000, 0x1c7fff, 0),
+	GEN_FW_RANGE(0x1c8000, 0x1cffff, FORCEWAKE_MEDIA_VEBOX0),
 	GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2),
-	GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3),
-	GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1)
+	GEN_FW_RANGE(0x1d4000, 0x1dbfff, 0)
 };
 
 /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index dcfa243892c6..8d3aa8b9acf9 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -209,7 +209,11 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
 				enum forcewake_domains domains);
 void intel_uncore_forcewake_put(struct intel_uncore *uncore,
 				enum forcewake_domains domains);
-/* Like above but the caller must manage the uncore.lock itself.
+void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
+				  enum forcewake_domains fw_domains);
+
+/*
+ * Like above but the caller must manage the uncore.lock itself.
  * Must be used with I915_READ_FW and friends.
  */
 void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore,
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 8fbf6f4d3f26..dfd87d082218 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -70,11 +70,12 @@ unlock:
 
 void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
 {
-	INTEL_WAKEREF_BUG_ON(work_pending(&wf->work));
+	INTEL_WAKEREF_BUG_ON(delayed_work_pending(&wf->work));
 
 	/* Assume we are not in process context and so cannot sleep. */
 	if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
-		schedule_work(&wf->work);
+		mod_delayed_work(system_wq, &wf->work,
+				 FIELD_GET(INTEL_WAKEREF_PUT_DELAY, flags));
 		return;
 	}
 
@@ -83,7 +84,7 @@ void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
 
 static void __intel_wakeref_put_work(struct work_struct *wrk)
 {
-	struct intel_wakeref *wf = container_of(wrk, typeof(*wf), work);
+	struct intel_wakeref *wf = container_of(wrk, typeof(*wf), work.work);
 
 	if (atomic_add_unless(&wf->count, -1, 1))
 		return;
@@ -104,8 +105,9 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
 	atomic_set(&wf->count, 0);
 	wf->wakeref = 0;
 
-	INIT_WORK(&wf->work, __intel_wakeref_put_work);
-	lockdep_init_map(&wf->work.lockdep_map, "wakeref.work", &key->work, 0);
+	INIT_DELAYED_WORK(&wf->work, __intel_wakeref_put_work);
+	lockdep_init_map(&wf->work.work.lockdep_map,
+			 "wakeref.work", &key->work, 0);
 }
 
 int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 7d1e676b71ef..545c8f277c46 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -8,6 +8,7 @@
 #define INTEL_WAKEREF_H
 
 #include <linux/atomic.h>
+#include <linux/bitfield.h>
 #include <linux/bits.h>
 #include <linux/lockdep.h>
 #include <linux/mutex.h>
@@ -41,7 +42,7 @@ struct intel_wakeref {
 	struct intel_runtime_pm *rpm;
 	const struct intel_wakeref_ops *ops;
 
-	struct work_struct work;
+	struct delayed_work work;
 };
 
 struct intel_wakeref_lockclass {
@@ -117,6 +118,11 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
 	return atomic_inc_not_zero(&wf->count);
 }
 
+enum {
+	INTEL_WAKEREF_PUT_ASYNC_BIT = 0,
+	__INTEL_WAKEREF_PUT_LAST_BIT__
+};
+
 /**
  * intel_wakeref_put_flags: Release the wakeref
  * @wf: the wakeref
@@ -134,7 +140,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
  */
 static inline void
 __intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
-#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
+#define INTEL_WAKEREF_PUT_ASYNC BIT(INTEL_WAKEREF_PUT_ASYNC_BIT)
+#define INTEL_WAKEREF_PUT_DELAY \
+	GENMASK(BITS_PER_LONG - 1, __INTEL_WAKEREF_PUT_LAST_BIT__)
 {
 	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
 	if (unlikely(!atomic_add_unless(&wf->count, -1, 1)))
@@ -154,6 +162,14 @@ intel_wakeref_put_async(struct intel_wakeref *wf)
 	__intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC);
 }
 
+static inline void
+intel_wakeref_put_delay(struct intel_wakeref *wf, unsigned long delay)
+{
+	__intel_wakeref_put(wf,
+			    INTEL_WAKEREF_PUT_ASYNC |
+			    FIELD_PREP(INTEL_WAKEREF_PUT_DELAY, delay));
+}
+
 /**
  * intel_wakeref_lock: Lock the wakeref (mutex)
  * @wf: the wakeref
@@ -194,7 +210,7 @@ intel_wakeref_unlock_wait(struct intel_wakeref *wf)
 {
 	mutex_lock(&wf->mutex);
 	mutex_unlock(&wf->mutex);
-	flush_work(&wf->work);
+	flush_delayed_work(&wf->work);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c
index 2bb9f9f9a50a..ec776591e1cf 100644
--- a/drivers/gpu/drm/i915/intel_wopcm.c
+++ b/drivers/gpu/drm/i915/intel_wopcm.c
@@ -86,10 +86,10 @@ void intel_wopcm_init_early(struct intel_wopcm *wopcm)
 	else
 		wopcm->size = GEN9_WOPCM_SIZE;
 
-	DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "WOPCM: %uK\n", wopcm->size / 1024);
+	drm_dbg(&i915->drm, "WOPCM: %uK\n", wopcm->size / 1024);
 }
 
-static inline u32 context_reserved_size(struct drm_i915_private *i915)
+static u32 context_reserved_size(struct drm_i915_private *i915)
 {
 	if (IS_GEN9_LP(i915))
 		return BXT_WOPCM_RC6_CTX_RESERVED;
@@ -99,8 +99,8 @@ static inline u32 context_reserved_size(struct drm_i915_private *i915)
 		return 0;
 }
 
-static inline bool gen9_check_dword_gap(struct drm_i915_private *i915,
-					u32 guc_wopcm_base, u32 guc_wopcm_size)
+static bool gen9_check_dword_gap(struct drm_i915_private *i915,
+				 u32 guc_wopcm_base, u32 guc_wopcm_size)
 {
 	u32 offset;
 
@@ -112,7 +112,7 @@ static inline bool gen9_check_dword_gap(struct drm_i915_private *i915,
 	offset = guc_wopcm_base + GEN9_GUC_WOPCM_OFFSET;
 	if (offset > guc_wopcm_size ||
 	    (guc_wopcm_size - offset) < sizeof(u32)) {
-		dev_err(i915->drm.dev,
+		drm_err(&i915->drm,
 			"WOPCM: invalid GuC region size: %uK < %uK\n",
 			guc_wopcm_size / SZ_1K,
 			(u32)(offset + sizeof(u32)) / SZ_1K);
@@ -122,8 +122,8 @@ static inline bool gen9_check_dword_gap(struct drm_i915_private *i915,
 	return true;
 }
 
-static inline bool gen9_check_huc_fw_fits(struct drm_i915_private *i915,
-					  u32 guc_wopcm_size, u32 huc_fw_size)
+static bool gen9_check_huc_fw_fits(struct drm_i915_private *i915,
+				   u32 guc_wopcm_size, u32 huc_fw_size)
 {
 	/*
 	 * On Gen9 & CNL A0, hardware requires the total available GuC WOPCM
@@ -131,7 +131,7 @@ static inline bool gen9_check_huc_fw_fits(struct drm_i915_private *i915,
 	 * firmware uploading would fail.
 	 */
 	if (huc_fw_size > guc_wopcm_size - GUC_WOPCM_RESERVED) {
-		dev_err(i915->drm.dev, "WOPCM: no space for %s: %uK < %uK\n",
+		drm_err(&i915->drm, "WOPCM: no space for %s: %uK < %uK\n",
 			intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
 			(guc_wopcm_size - GUC_WOPCM_RESERVED) / SZ_1K,
 			huc_fw_size / 1024);
@@ -141,32 +141,31 @@ static inline bool gen9_check_huc_fw_fits(struct drm_i915_private *i915,
 	return true;
 }
 
-static inline bool check_hw_restrictions(struct drm_i915_private *i915,
-					 u32 guc_wopcm_base, u32 guc_wopcm_size,
-					 u32 huc_fw_size)
+static bool check_hw_restrictions(struct drm_i915_private *i915,
+				  u32 guc_wopcm_base, u32 guc_wopcm_size,
+				  u32 huc_fw_size)
 {
 	if (IS_GEN(i915, 9) && !gen9_check_dword_gap(i915, guc_wopcm_base,
 						     guc_wopcm_size))
 		return false;
 
-	if ((IS_GEN(i915, 9) ||
-	     IS_CNL_REVID(i915, CNL_REVID_A0, CNL_REVID_A0)) &&
+	if (IS_GEN(i915, 9) &&
 	    !gen9_check_huc_fw_fits(i915, guc_wopcm_size, huc_fw_size))
 		return false;
 
 	return true;
 }
 
-static inline bool __check_layout(struct drm_i915_private *i915, u32 wopcm_size,
-				  u32 guc_wopcm_base, u32 guc_wopcm_size,
-				  u32 guc_fw_size, u32 huc_fw_size)
+static bool __check_layout(struct drm_i915_private *i915, u32 wopcm_size,
+			   u32 guc_wopcm_base, u32 guc_wopcm_size,
+			   u32 guc_fw_size, u32 huc_fw_size)
 {
 	const u32 ctx_rsvd = context_reserved_size(i915);
 	u32 size;
 
 	size = wopcm_size - ctx_rsvd;
 	if (unlikely(range_overflows(guc_wopcm_base, guc_wopcm_size, size))) {
-		dev_err(i915->drm.dev,
+		drm_err(&i915->drm,
 			"WOPCM: invalid GuC region layout: %uK + %uK > %uK\n",
 			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K,
 			size / SZ_1K);
@@ -175,7 +174,7 @@ static inline bool __check_layout(struct drm_i915_private *i915, u32 wopcm_size,
 
 	size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED;
 	if (unlikely(guc_wopcm_size < size)) {
-		dev_err(i915->drm.dev, "WOPCM: no space for %s: %uK < %uK\n",
+		drm_err(&i915->drm, "WOPCM: no space for %s: %uK < %uK\n",
 			intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC),
 			guc_wopcm_size / SZ_1K, size / SZ_1K);
 		return false;
@@ -183,7 +182,7 @@ static inline bool __check_layout(struct drm_i915_private *i915, u32 wopcm_size,
 
 	size = huc_fw_size + WOPCM_RESERVED_SIZE;
 	if (unlikely(guc_wopcm_base < size)) {
-		dev_err(i915->drm.dev, "WOPCM: no space for %s: %uK < %uK\n",
+		drm_err(&i915->drm, "WOPCM: no space for %s: %uK < %uK\n",
 			intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
 			guc_wopcm_base / SZ_1K, size / SZ_1K);
 		return false;
@@ -242,10 +241,8 @@ void intel_wopcm_init(struct intel_wopcm *wopcm)
 		return;
 
 	if (__wopcm_regs_locked(gt->uncore, &guc_wopcm_base, &guc_wopcm_size)) {
-		DRM_DEV_DEBUG_DRIVER(i915->drm.dev,
-				     "GuC WOPCM is already locked [%uK, %uK)\n",
-				     guc_wopcm_base / SZ_1K,
-				     guc_wopcm_size / SZ_1K);
+		drm_dbg(&i915->drm, "GuC WOPCM is already locked [%uK, %uK)\n",
+			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
 		goto check;
 	}
 
@@ -266,8 +263,8 @@ void intel_wopcm_init(struct intel_wopcm *wopcm)
 	guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base;
 	guc_wopcm_size &= GUC_WOPCM_SIZE_MASK;
 
-	DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "Calculated GuC WOPCM [%uK, %uK)\n",
-			     guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+	drm_dbg(&i915->drm, "Calculated GuC WOPCM [%uK, %uK)\n",
+		guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
 
 check:
 	if (__check_layout(i915, wopcm->size, guc_wopcm_base, guc_wopcm_size,
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_bdw.c b/drivers/gpu/drm/i915/oa/i915_oa_bdw.c
deleted file mode 100644
index 14da5c3b569d..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_bdw.c
+++ /dev/null
@@ -1,90 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_bdw.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2744), 0x00800000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x000000a0 },
-	{ _MMIO(0x9888), 0x198b0000 },
-	{ _MMIO(0x9888), 0x078b0066 },
-	{ _MMIO(0x9888), 0x118b0000 },
-	{ _MMIO(0x9888), 0x258b0000 },
-	{ _MMIO(0x9888), 0x21850008 },
-	{ _MMIO(0x9888), 0x0d834000 },
-	{ _MMIO(0x9888), 0x07844000 },
-	{ _MMIO(0x9888), 0x17804000 },
-	{ _MMIO(0x9888), 0x21800000 },
-	{ _MMIO(0x9888), 0x4f800000 },
-	{ _MMIO(0x9888), 0x41800000 },
-	{ _MMIO(0x9888), 0x31800000 },
-	{ _MMIO(0x9840), 0x00000080 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_bdw(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"d6de6f55-e526-4f79-a6a6-d7315c09044e",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "d6de6f55-e526-4f79-a6a6-d7315c09044e";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_bdw.h b/drivers/gpu/drm/i915/oa/i915_oa_bdw.h
deleted file mode 100644
index 0cee3334f0a6..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_bdw.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_BDW_H__
-#define __I915_OA_BDW_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_bdw(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_bxt.c b/drivers/gpu/drm/i915/oa/i915_oa_bxt.c
deleted file mode 100644
index 3e785bafcf99..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_bxt.c
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_bxt.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2744), 0x00800000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x00000080 },
-	{ _MMIO(0x9888), 0x19800000 },
-	{ _MMIO(0x9888), 0x07800063 },
-	{ _MMIO(0x9888), 0x11800000 },
-	{ _MMIO(0x9888), 0x23810008 },
-	{ _MMIO(0x9888), 0x1d950400 },
-	{ _MMIO(0x9888), 0x0f922000 },
-	{ _MMIO(0x9888), 0x1f908000 },
-	{ _MMIO(0x9888), 0x37900000 },
-	{ _MMIO(0x9888), 0x55900000 },
-	{ _MMIO(0x9888), 0x47900000 },
-	{ _MMIO(0x9888), 0x33900000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_bxt(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"5ee72f5c-092f-421e-8b70-225f7c3e9612",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "5ee72f5c-092f-421e-8b70-225f7c3e9612";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_bxt.h b/drivers/gpu/drm/i915/oa/i915_oa_bxt.h
deleted file mode 100644
index 0bdf391323ec..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_bxt.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_BXT_H__
-#define __I915_OA_BXT_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_bxt(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.c b/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.c
deleted file mode 100644
index 0ea86f70a06c..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.c
+++ /dev/null
@@ -1,89 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_cflgt2.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2744), 0x00800000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x00000080 },
-	{ _MMIO(0x9888), 0x11810000 },
-	{ _MMIO(0x9888), 0x07810013 },
-	{ _MMIO(0x9888), 0x1f810000 },
-	{ _MMIO(0x9888), 0x1d810000 },
-	{ _MMIO(0x9888), 0x1b930040 },
-	{ _MMIO(0x9888), 0x07e54000 },
-	{ _MMIO(0x9888), 0x1f908000 },
-	{ _MMIO(0x9888), 0x11900000 },
-	{ _MMIO(0x9888), 0x37900000 },
-	{ _MMIO(0x9888), 0x53900000 },
-	{ _MMIO(0x9888), 0x45900000 },
-	{ _MMIO(0x9888), 0x33900000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_cflgt2(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"74fb4902-d3d3-4237-9e90-cbdc68d0a446",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "74fb4902-d3d3-4237-9e90-cbdc68d0a446";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h b/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h
deleted file mode 100644
index 6b862280ab78..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_CFLGT2_H__
-#define __I915_OA_CFLGT2_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_cflgt2(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.c b/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.c
deleted file mode 100644
index fc632dd890bf..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.c
+++ /dev/null
@@ -1,89 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_cflgt3.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2744), 0x00800000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x00000080 },
-	{ _MMIO(0x9888), 0x11810000 },
-	{ _MMIO(0x9888), 0x07810013 },
-	{ _MMIO(0x9888), 0x1f810000 },
-	{ _MMIO(0x9888), 0x1d810000 },
-	{ _MMIO(0x9888), 0x1b930040 },
-	{ _MMIO(0x9888), 0x07e54000 },
-	{ _MMIO(0x9888), 0x1f908000 },
-	{ _MMIO(0x9888), 0x11900000 },
-	{ _MMIO(0x9888), 0x37900000 },
-	{ _MMIO(0x9888), 0x53900000 },
-	{ _MMIO(0x9888), 0x45900000 },
-	{ _MMIO(0x9888), 0x33900000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"577e8e2c-3fa0-4875-8743-3538d585e3b0",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "577e8e2c-3fa0-4875-8743-3538d585e3b0";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h b/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h
deleted file mode 100644
index 4ca9d8f89b2f..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_CFLGT3_H__
-#define __I915_OA_CFLGT3_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_chv.c b/drivers/gpu/drm/i915/oa/i915_oa_chv.c
deleted file mode 100644
index 6cd4e9921a8a..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_chv.c
+++ /dev/null
@@ -1,89 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_chv.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2744), 0x00800000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x000000a0 },
-	{ _MMIO(0x9888), 0x59800000 },
-	{ _MMIO(0x9888), 0x59800001 },
-	{ _MMIO(0x9888), 0x338b0000 },
-	{ _MMIO(0x9888), 0x258b0066 },
-	{ _MMIO(0x9888), 0x058b0000 },
-	{ _MMIO(0x9888), 0x038b0000 },
-	{ _MMIO(0x9888), 0x03844000 },
-	{ _MMIO(0x9888), 0x47800080 },
-	{ _MMIO(0x9888), 0x57800000 },
-	{ _MMIO(0x1823a4), 0x00000000 },
-	{ _MMIO(0x9888), 0x59800000 },
-	{ _MMIO(0x9840), 0x00000080 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_chv(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"4a534b07-cba3-414d-8d60-874830e883aa",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "4a534b07-cba3-414d-8d60-874830e883aa";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_chv.h b/drivers/gpu/drm/i915/oa/i915_oa_chv.h
deleted file mode 100644
index 3cac7bbc9c71..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_chv.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_CHV_H__
-#define __I915_OA_CHV_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_chv(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cnl.c b/drivers/gpu/drm/i915/oa/i915_oa_cnl.c
deleted file mode 100644
index 1041e8914993..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_cnl.c
+++ /dev/null
@@ -1,101 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_cnl.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x0000ffff },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x0000ffff },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x0000ffff },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0xd04), 0x00000200 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x17060000 },
-	{ _MMIO(0x9840), 0x00000000 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x13034000 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x07060066 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x05060000 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x0f080040 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x07091000 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x0f041000 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x1d004000 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x35000000 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x49000000 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x3d000000 },
-	{ _MMIO(0x9884), 0x00000007 },
-	{ _MMIO(0x9888), 0x31000000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"db41edd4-d8e7-4730-ad11-b9a2d6833503",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "db41edd4-d8e7-4730-ad11-b9a2d6833503";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cnl.h b/drivers/gpu/drm/i915/oa/i915_oa_cnl.h
deleted file mode 100644
index db379f5fcbb9..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_cnl.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_CNL_H__
-#define __I915_OA_CNL_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_glk.c b/drivers/gpu/drm/i915/oa/i915_oa_glk.c
deleted file mode 100644
index bd15ebe9aeeb..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_glk.c
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_glk.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2744), 0x00800000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x00000080 },
-	{ _MMIO(0x9888), 0x19800000 },
-	{ _MMIO(0x9888), 0x07800063 },
-	{ _MMIO(0x9888), 0x11800000 },
-	{ _MMIO(0x9888), 0x23810008 },
-	{ _MMIO(0x9888), 0x1d950400 },
-	{ _MMIO(0x9888), 0x0f922000 },
-	{ _MMIO(0x9888), 0x1f908000 },
-	{ _MMIO(0x9888), 0x37900000 },
-	{ _MMIO(0x9888), 0x55900000 },
-	{ _MMIO(0x9888), 0x47900000 },
-	{ _MMIO(0x9888), 0x33900000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_glk(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"dd3fd789-e783-4204-8cd0-b671bbccb0cf",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "dd3fd789-e783-4204-8cd0-b671bbccb0cf";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_glk.h b/drivers/gpu/drm/i915/oa/i915_oa_glk.h
deleted file mode 100644
index 779f343efd11..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_glk.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_GLK_H__
-#define __I915_OA_GLK_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_glk(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_hsw.c b/drivers/gpu/drm/i915/oa/i915_oa_hsw.c
deleted file mode 100644
index 133721a8619f..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_hsw.c
+++ /dev/null
@@ -1,118 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_hsw.h"
-
-static const struct i915_oa_reg b_counter_config_render_basic[] = {
-	{ _MMIO(0x2724), 0x00800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2714), 0x00800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-};
-
-static const struct i915_oa_reg flex_eu_config_render_basic[] = {
-};
-
-static const struct i915_oa_reg mux_config_render_basic[] = {
-	{ _MMIO(0x9840), 0x00000080 },
-	{ _MMIO(0x253a4), 0x01600000 },
-	{ _MMIO(0x25440), 0x00100000 },
-	{ _MMIO(0x25128), 0x00000000 },
-	{ _MMIO(0x2691c), 0x00000800 },
-	{ _MMIO(0x26aa0), 0x01500000 },
-	{ _MMIO(0x26b9c), 0x00006000 },
-	{ _MMIO(0x2791c), 0x00000800 },
-	{ _MMIO(0x27aa0), 0x01500000 },
-	{ _MMIO(0x27b9c), 0x00006000 },
-	{ _MMIO(0x2641c), 0x00000400 },
-	{ _MMIO(0x25380), 0x00000010 },
-	{ _MMIO(0x2538c), 0x00000000 },
-	{ _MMIO(0x25384), 0x0800aaaa },
-	{ _MMIO(0x25400), 0x00000004 },
-	{ _MMIO(0x2540c), 0x06029000 },
-	{ _MMIO(0x25410), 0x00000002 },
-	{ _MMIO(0x25404), 0x5c30ffff },
-	{ _MMIO(0x25100), 0x00000016 },
-	{ _MMIO(0x25110), 0x00000400 },
-	{ _MMIO(0x25104), 0x00000000 },
-	{ _MMIO(0x26804), 0x00001211 },
-	{ _MMIO(0x26884), 0x00000100 },
-	{ _MMIO(0x26900), 0x00000002 },
-	{ _MMIO(0x26908), 0x00700000 },
-	{ _MMIO(0x26904), 0x00000000 },
-	{ _MMIO(0x26984), 0x00001022 },
-	{ _MMIO(0x26a04), 0x00000011 },
-	{ _MMIO(0x26a80), 0x00000006 },
-	{ _MMIO(0x26a88), 0x00000c02 },
-	{ _MMIO(0x26a84), 0x00000000 },
-	{ _MMIO(0x26b04), 0x00001000 },
-	{ _MMIO(0x26b80), 0x00000002 },
-	{ _MMIO(0x26b8c), 0x00000007 },
-	{ _MMIO(0x26b84), 0x00000000 },
-	{ _MMIO(0x27804), 0x00004844 },
-	{ _MMIO(0x27884), 0x00000400 },
-	{ _MMIO(0x27900), 0x00000002 },
-	{ _MMIO(0x27908), 0x0e000000 },
-	{ _MMIO(0x27904), 0x00000000 },
-	{ _MMIO(0x27984), 0x00004088 },
-	{ _MMIO(0x27a04), 0x00000044 },
-	{ _MMIO(0x27a80), 0x00000006 },
-	{ _MMIO(0x27a88), 0x00018040 },
-	{ _MMIO(0x27a84), 0x00000000 },
-	{ _MMIO(0x27b04), 0x00004000 },
-	{ _MMIO(0x27b80), 0x00000002 },
-	{ _MMIO(0x27b8c), 0x000000e0 },
-	{ _MMIO(0x27b84), 0x00000000 },
-	{ _MMIO(0x26104), 0x00002222 },
-	{ _MMIO(0x26184), 0x0c006666 },
-	{ _MMIO(0x26284), 0x04000000 },
-	{ _MMIO(0x26304), 0x04000000 },
-	{ _MMIO(0x26400), 0x00000002 },
-	{ _MMIO(0x26410), 0x000000a0 },
-	{ _MMIO(0x26404), 0x00000000 },
-	{ _MMIO(0x25420), 0x04108020 },
-	{ _MMIO(0x25424), 0x1284a420 },
-	{ _MMIO(0x2541c), 0x00000000 },
-	{ _MMIO(0x25428), 0x00042049 },
-};
-
-static ssize_t
-show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_hsw(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"403d8832-1a27-4aa6-a64e-f5389ce7b212",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_render_basic;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_render_basic);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_render_basic;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_render_basic);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_render_basic;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_render_basic);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "403d8832-1a27-4aa6-a64e-f5389ce7b212";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_render_basic_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_hsw.h b/drivers/gpu/drm/i915/oa/i915_oa_hsw.h
deleted file mode 100644
index ba97f732f136..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_hsw.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_HSW_H__
-#define __I915_OA_HSW_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_hsw(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_icl.c b/drivers/gpu/drm/i915/oa/i915_oa_icl.c
deleted file mode 100644
index 2d92041b754f..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_icl.c
+++ /dev/null
@@ -1,98 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_icl.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x0000ffff },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x0000ffff },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x0000ffff },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0xd04), 0x00000200 },
-	{ _MMIO(0x9840), 0x00000000 },
-	{ _MMIO(0x9884), 0x00000000 },
-	{ _MMIO(0x9888), 0x10060000 },
-	{ _MMIO(0x9888), 0x22060000 },
-	{ _MMIO(0x9888), 0x16060000 },
-	{ _MMIO(0x9888), 0x24060000 },
-	{ _MMIO(0x9888), 0x18060000 },
-	{ _MMIO(0x9888), 0x1a060000 },
-	{ _MMIO(0x9888), 0x12060000 },
-	{ _MMIO(0x9888), 0x14060000 },
-	{ _MMIO(0x9888), 0x10060000 },
-	{ _MMIO(0x9888), 0x22060000 },
-	{ _MMIO(0x9884), 0x00000003 },
-	{ _MMIO(0x9888), 0x16130000 },
-	{ _MMIO(0x9888), 0x24000001 },
-	{ _MMIO(0x9888), 0x0e130056 },
-	{ _MMIO(0x9888), 0x10130000 },
-	{ _MMIO(0x9888), 0x1a130000 },
-	{ _MMIO(0x9888), 0x541f0001 },
-	{ _MMIO(0x9888), 0x181f0000 },
-	{ _MMIO(0x9888), 0x4c1f0000 },
-	{ _MMIO(0x9888), 0x301f0000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"a291665e-244b-4b76-9b9a-01de9d3c8068",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "a291665e-244b-4b76-9b9a-01de9d3c8068";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_icl.h b/drivers/gpu/drm/i915/oa/i915_oa_icl.h
deleted file mode 100644
index 5c64112d720e..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_icl.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_ICL_H__
-#define __I915_OA_ICL_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.c b/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.c
deleted file mode 100644
index 1c3a67c9cfe0..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.c
+++ /dev/null
@@ -1,89 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_kblgt2.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2744), 0x00800000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x00000080 },
-	{ _MMIO(0x9888), 0x11810000 },
-	{ _MMIO(0x9888), 0x07810013 },
-	{ _MMIO(0x9888), 0x1f810000 },
-	{ _MMIO(0x9888), 0x1d810000 },
-	{ _MMIO(0x9888), 0x1b930040 },
-	{ _MMIO(0x9888), 0x07e54000 },
-	{ _MMIO(0x9888), 0x1f908000 },
-	{ _MMIO(0x9888), 0x11900000 },
-	{ _MMIO(0x9888), 0x37900000 },
-	{ _MMIO(0x9888), 0x53900000 },
-	{ _MMIO(0x9888), 0x45900000 },
-	{ _MMIO(0x9888), 0x33900000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_kblgt2(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"baa3c7e4-52b6-4b85-801e-465a94b746dd",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "baa3c7e4-52b6-4b85-801e-465a94b746dd";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h b/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h
deleted file mode 100644
index 810532fa6b63..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_KBLGT2_H__
-#define __I915_OA_KBLGT2_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_kblgt2(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.c b/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.c
deleted file mode 100644
index ebbe5a9c9fdc..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.c
+++ /dev/null
@@ -1,89 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_kblgt3.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2744), 0x00800000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x00000080 },
-	{ _MMIO(0x9888), 0x11810000 },
-	{ _MMIO(0x9888), 0x07810013 },
-	{ _MMIO(0x9888), 0x1f810000 },
-	{ _MMIO(0x9888), 0x1d810000 },
-	{ _MMIO(0x9888), 0x1b930040 },
-	{ _MMIO(0x9888), 0x07e54000 },
-	{ _MMIO(0x9888), 0x1f908000 },
-	{ _MMIO(0x9888), 0x11900000 },
-	{ _MMIO(0x9888), 0x37900000 },
-	{ _MMIO(0x9888), 0x53900000 },
-	{ _MMIO(0x9888), 0x45900000 },
-	{ _MMIO(0x9888), 0x33900000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_kblgt3(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"f1792f32-6db2-4b50-b4b2-557128f1688d",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "f1792f32-6db2-4b50-b4b2-557128f1688d";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h b/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h
deleted file mode 100644
index 13d70456fabd..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_KBLGT3_H__
-#define __I915_OA_KBLGT3_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_kblgt3(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.c b/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.c
deleted file mode 100644
index 1bc359ed34e8..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.c
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_sklgt2.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x00000080 },
-	{ _MMIO(0x9888), 0x11810000 },
-	{ _MMIO(0x9888), 0x07810016 },
-	{ _MMIO(0x9888), 0x1f810000 },
-	{ _MMIO(0x9888), 0x1d810000 },
-	{ _MMIO(0x9888), 0x1b930040 },
-	{ _MMIO(0x9888), 0x07e54000 },
-	{ _MMIO(0x9888), 0x1f908000 },
-	{ _MMIO(0x9888), 0x11900000 },
-	{ _MMIO(0x9888), 0x37900000 },
-	{ _MMIO(0x9888), 0x53900000 },
-	{ _MMIO(0x9888), 0x45900000 },
-	{ _MMIO(0x9888), 0x33900000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_sklgt2(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"1651949f-0ac0-4cb1-a06f-dafd74a407d1",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "1651949f-0ac0-4cb1-a06f-dafd74a407d1";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h b/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h
deleted file mode 100644
index fda70c51a6ec..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_SKLGT2_H__
-#define __I915_OA_SKLGT2_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_sklgt2(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.c b/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.c
deleted file mode 100644
index 6e352f881310..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.c
+++ /dev/null
@@ -1,89 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_sklgt3.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2744), 0x00800000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x00000080 },
-	{ _MMIO(0x9888), 0x11810000 },
-	{ _MMIO(0x9888), 0x07810013 },
-	{ _MMIO(0x9888), 0x1f810000 },
-	{ _MMIO(0x9888), 0x1d810000 },
-	{ _MMIO(0x9888), 0x1b930040 },
-	{ _MMIO(0x9888), 0x07e54000 },
-	{ _MMIO(0x9888), 0x1f908000 },
-	{ _MMIO(0x9888), 0x11900000 },
-	{ _MMIO(0x9888), 0x37900000 },
-	{ _MMIO(0x9888), 0x53900000 },
-	{ _MMIO(0x9888), 0x45900000 },
-	{ _MMIO(0x9888), 0x33900000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_sklgt3(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"2b985803-d3c9-4629-8a4f-634bfecba0e8",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "2b985803-d3c9-4629-8a4f-634bfecba0e8";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h b/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h
deleted file mode 100644
index df74eba5799e..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_SKLGT3_H__
-#define __I915_OA_SKLGT3_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_sklgt3(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.c b/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.c
deleted file mode 100644
index 8f345115a306..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.c
+++ /dev/null
@@ -1,89 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_sklgt4.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0x2740), 0x00000000 },
-	{ _MMIO(0x2744), 0x00800000 },
-	{ _MMIO(0x2714), 0xf0800000 },
-	{ _MMIO(0x2710), 0x00000000 },
-	{ _MMIO(0x2724), 0xf0800000 },
-	{ _MMIO(0x2720), 0x00000000 },
-	{ _MMIO(0x2770), 0x00000004 },
-	{ _MMIO(0x2774), 0x00000000 },
-	{ _MMIO(0x2778), 0x00000003 },
-	{ _MMIO(0x277c), 0x00000000 },
-	{ _MMIO(0x2780), 0x00000007 },
-	{ _MMIO(0x2784), 0x00000000 },
-	{ _MMIO(0x2788), 0x00100002 },
-	{ _MMIO(0x278c), 0x0000fff7 },
-	{ _MMIO(0x2790), 0x00100002 },
-	{ _MMIO(0x2794), 0x0000ffcf },
-	{ _MMIO(0x2798), 0x00100082 },
-	{ _MMIO(0x279c), 0x0000ffef },
-	{ _MMIO(0x27a0), 0x001000c2 },
-	{ _MMIO(0x27a4), 0x0000ffe7 },
-	{ _MMIO(0x27a8), 0x00100001 },
-	{ _MMIO(0x27ac), 0x0000ffe7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x9840), 0x00000080 },
-	{ _MMIO(0x9888), 0x11810000 },
-	{ _MMIO(0x9888), 0x07810013 },
-	{ _MMIO(0x9888), 0x1f810000 },
-	{ _MMIO(0x9888), 0x1d810000 },
-	{ _MMIO(0x9888), 0x1b930040 },
-	{ _MMIO(0x9888), 0x07e54000 },
-	{ _MMIO(0x9888), 0x1f908000 },
-	{ _MMIO(0x9888), 0x11900000 },
-	{ _MMIO(0x9888), 0x37900000 },
-	{ _MMIO(0x9888), 0x53900000 },
-	{ _MMIO(0x9888), 0x45900000 },
-	{ _MMIO(0x9888), 0x33900000 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_sklgt4(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"882fa433-1f4a-4a67-a962-c741888fe5f5",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "882fa433-1f4a-4a67-a962-c741888fe5f5";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h b/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h
deleted file mode 100644
index 378ab7ab78d5..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018-2019 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_SKLGT4_H__
-#define __I915_OA_SKLGT4_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_sklgt4(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.c b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c
deleted file mode 100644
index a29d93707345..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_tgl.c
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2018 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#include <linux/sysfs.h>
-
-#include "i915_drv.h"
-#include "i915_oa_tgl.h"
-
-static const struct i915_oa_reg b_counter_config_test_oa[] = {
-	{ _MMIO(0xD920), 0x00000000 },
-	{ _MMIO(0xD900), 0x00000000 },
-	{ _MMIO(0xD904), 0xF0800000 },
-	{ _MMIO(0xD910), 0x00000000 },
-	{ _MMIO(0xD914), 0xF0800000 },
-	{ _MMIO(0xDC40), 0x00FF0000 },
-	{ _MMIO(0xD940), 0x00000004 },
-	{ _MMIO(0xD944), 0x0000FFFF },
-	{ _MMIO(0xDC00), 0x00000004 },
-	{ _MMIO(0xDC04), 0x0000FFFF },
-	{ _MMIO(0xD948), 0x00000003 },
-	{ _MMIO(0xD94C), 0x0000FFFF },
-	{ _MMIO(0xDC08), 0x00000003 },
-	{ _MMIO(0xDC0C), 0x0000FFFF },
-	{ _MMIO(0xD950), 0x00000007 },
-	{ _MMIO(0xD954), 0x0000FFFF },
-	{ _MMIO(0xDC10), 0x00000007 },
-	{ _MMIO(0xDC14), 0x0000FFFF },
-	{ _MMIO(0xD958), 0x00100002 },
-	{ _MMIO(0xD95C), 0x0000FFF7 },
-	{ _MMIO(0xDC18), 0x00100002 },
-	{ _MMIO(0xDC1C), 0x0000FFF7 },
-	{ _MMIO(0xD960), 0x00100002 },
-	{ _MMIO(0xD964), 0x0000FFCF },
-	{ _MMIO(0xDC20), 0x00100002 },
-	{ _MMIO(0xDC24), 0x0000FFCF },
-	{ _MMIO(0xD968), 0x00100082 },
-	{ _MMIO(0xD96C), 0x0000FFEF },
-	{ _MMIO(0xDC28), 0x00100082 },
-	{ _MMIO(0xDC2C), 0x0000FFEF },
-	{ _MMIO(0xD970), 0x001000C2 },
-	{ _MMIO(0xD974), 0x0000FFE7 },
-	{ _MMIO(0xDC30), 0x001000C2 },
-	{ _MMIO(0xDC34), 0x0000FFE7 },
-	{ _MMIO(0xD978), 0x00100001 },
-	{ _MMIO(0xD97C), 0x0000FFE7 },
-	{ _MMIO(0xDC38), 0x00100001 },
-	{ _MMIO(0xDC3C), 0x0000FFE7 },
-};
-
-static const struct i915_oa_reg flex_eu_config_test_oa[] = {
-};
-
-static const struct i915_oa_reg mux_config_test_oa[] = {
-	{ _MMIO(0x0D04), 0x00000200 },
-	{ _MMIO(0x9840), 0x00000000 },
-	{ _MMIO(0x9884), 0x00000000 },
-	{ _MMIO(0x9888), 0x280E0000 },
-	{ _MMIO(0x9888), 0x1E0E0147 },
-	{ _MMIO(0x9888), 0x180E0000 },
-	{ _MMIO(0x9888), 0x160E0000 },
-	{ _MMIO(0x9888), 0x1E0F1000 },
-	{ _MMIO(0x9888), 0x1E104000 },
-	{ _MMIO(0x9888), 0x2E020100 },
-	{ _MMIO(0x9888), 0x2C030004 },
-	{ _MMIO(0x9888), 0x38003000 },
-	{ _MMIO(0x9888), 0x1E0A8000 },
-	{ _MMIO(0x9884), 0x00000003 },
-	{ _MMIO(0x9888), 0x49110000 },
-	{ _MMIO(0x9888), 0x5D101400 },
-	{ _MMIO(0x9888), 0x1D140020 },
-	{ _MMIO(0x9888), 0x1D1103A3 },
-	{ _MMIO(0x9888), 0x01110000 },
-	{ _MMIO(0x9888), 0x61111000 },
-	{ _MMIO(0x9888), 0x1F128000 },
-	{ _MMIO(0x9888), 0x17100000 },
-	{ _MMIO(0x9888), 0x55100630 },
-	{ _MMIO(0x9888), 0x57100000 },
-	{ _MMIO(0x9888), 0x31100000 },
-	{ _MMIO(0x9884), 0x00000003 },
-	{ _MMIO(0x9888), 0x65100002 },
-	{ _MMIO(0x9884), 0x00000000 },
-	{ _MMIO(0x9888), 0x42000001 },
-};
-
-static ssize_t
-show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "1\n");
-}
-
-void
-i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv)
-{
-	strlcpy(dev_priv->perf.test_config.uuid,
-		"80a833f0-2504-4321-8894-e9277844ce7b",
-		sizeof(dev_priv->perf.test_config.uuid));
-	dev_priv->perf.test_config.id = 1;
-
-	dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
-	dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
-
-	dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
-	dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
-
-	dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
-	dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
-
-	dev_priv->perf.test_config.sysfs_metric.name = "80a833f0-2504-4321-8894-e9277844ce7b";
-	dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
-
-	dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
-
-	dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
-	dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
-	dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
-}
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.h b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h
deleted file mode 100644
index 4c25f0be825c..000000000000
--- a/drivers/gpu/drm/i915/oa/i915_oa_tgl.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2018 Intel Corporation
- *
- * Autogenerated file by GPU Top : https://github.com/rib/gputop
- * DO NOT EDIT manually!
- */
-
-#ifndef __I915_OA_TGL_H__
-#define __I915_OA_TGL_H__
-
-struct drm_i915_private;
-
-void i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv);
-
-#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 68bbb1580162..4002c984c2e0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -153,7 +153,7 @@ static int live_active_wait(void *arg)
 	if (IS_ERR(active))
 		return PTR_ERR(active);
 
-	i915_active_wait(&active->base);
+	__i915_active_wait(&active->base, TASK_UNINTERRUPTIBLE);
 	if (!READ_ONCE(active->retired)) {
 		struct drm_printer p = drm_err_printer(__func__);
 
@@ -228,11 +228,11 @@ static int live_active_barrier(void *arg)
 	}
 
 	i915_active_release(&active->base);
+	if (err)
+		goto out;
 
-	if (err == 0)
-		err = i915_active_wait(&active->base);
-
-	if (err == 0 && !READ_ONCE(active->retired)) {
+	__i915_active_wait(&active->base, TASK_UNINTERRUPTIBLE);
+	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after flushing barriers!\n");
 		err = -EINVAL;
 	}
@@ -277,7 +277,7 @@ static struct intel_engine_cs *node_to_barrier(struct active_node *it)
 
 void i915_active_print(struct i915_active *ref, struct drm_printer *m)
 {
-	drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire);
+	drm_printf(m, "active %ps:%ps\n", ref->active, ref->retire);
 	drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count));
 	drm_printf(m, "\tpreallocated barriers? %s\n",
 		   yesno(!llist_empty(&ref->preallocated_barriers)));
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 623759b73bb4..88d400b9df88 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -125,8 +125,6 @@ static void pm_resume(struct drm_i915_private *i915)
 	 */
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 		i915_ggtt_resume(&i915->ggtt);
-		i915_gem_restore_fences(&i915->ggtt);
-
 		i915_gem_resume(i915);
 	}
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 06ef88510209..028baae9631f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -45,8 +45,8 @@ static void quirk_add(struct drm_i915_gem_object *obj,
 
 static int populate_ggtt(struct i915_ggtt *ggtt, struct list_head *objects)
 {
-	unsigned long unbound, bound, count;
 	struct drm_i915_gem_object *obj;
+	unsigned long count;
 
 	count = 0;
 	do {
@@ -72,30 +72,6 @@ static int populate_ggtt(struct i915_ggtt *ggtt, struct list_head *objects)
 	pr_debug("Filled GGTT with %lu pages [%llu total]\n",
 		 count, ggtt->vm.total / PAGE_SIZE);
 
-	bound = 0;
-	unbound = 0;
-	list_for_each_entry(obj, objects, st_link) {
-		GEM_BUG_ON(!obj->mm.quirked);
-
-		if (atomic_read(&obj->bind_count))
-			bound++;
-		else
-			unbound++;
-	}
-	GEM_BUG_ON(bound + unbound != count);
-
-	if (unbound) {
-		pr_err("%s: Found %lu objects unbound, expected %u!\n",
-		       __func__, unbound, 0);
-		return -EINVAL;
-	}
-
-	if (bound != count) {
-		pr_err("%s: Found %lu objects bound, expected %lu!\n",
-		       __func__, bound, count);
-		return -EINVAL;
-	}
-
 	if (list_empty(&ggtt->vm.bound_list)) {
 		pr_err("No objects on the GGTT inactive list!\n");
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index b342bef5e7c9..2e471500a646 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -331,9 +331,6 @@ static void close_object_list(struct list_head *objects,
 		vma = i915_vma_instance(obj, vm, NULL);
 		if (!IS_ERR(vma))
 			ignored = i915_vma_unbind(vma);
-		/* Only ppgtt vma may be closed before the object is freed */
-		if (!IS_ERR(vma) && !i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
 
 		list_del(&obj->st_link);
 		i915_gem_object_put(obj);
@@ -591,7 +588,7 @@ static int walk_hole(struct i915_address_space *vm,
 				pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n",
 				       __func__, addr, vma->size,
 				       hole_start, hole_end, err);
-				goto err_close;
+				goto err_put;
 			}
 			i915_vma_unpin(vma);
 
@@ -600,14 +597,14 @@ static int walk_hole(struct i915_address_space *vm,
 				pr_err("%s incorrect at %llx + %llx\n",
 				       __func__, addr, vma->size);
 				err = -EINVAL;
-				goto err_close;
+				goto err_put;
 			}
 
 			err = i915_vma_unbind(vma);
 			if (err) {
 				pr_err("%s unbind failed at %llx + %llx  with err=%d\n",
 				       __func__, addr, vma->size, err);
-				goto err_close;
+				goto err_put;
 			}
 
 			GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
@@ -616,13 +613,10 @@ static int walk_hole(struct i915_address_space *vm,
 					"%s timed out at %llx\n",
 					__func__, addr)) {
 				err = -EINTR;
-				goto err_close;
+				goto err_put;
 			}
 		}
 
-err_close:
-		if (!i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
 err_put:
 		i915_gem_object_put(obj);
 		if (err)
@@ -675,7 +669,7 @@ static int pot_hole(struct i915_address_space *vm,
 				       addr,
 				       hole_start, hole_end,
 				       err);
-				goto err;
+				goto err_obj;
 			}
 
 			if (!drm_mm_node_allocated(&vma->node) ||
@@ -685,7 +679,7 @@ static int pot_hole(struct i915_address_space *vm,
 				i915_vma_unpin(vma);
 				err = i915_vma_unbind(vma);
 				err = -EINVAL;
-				goto err;
+				goto err_obj;
 			}
 
 			i915_vma_unpin(vma);
@@ -697,13 +691,10 @@ static int pot_hole(struct i915_address_space *vm,
 				"%s timed out after %d/%d\n",
 				__func__, pot, fls64(hole_end - 1) - 1)) {
 			err = -EINTR;
-			goto err;
+			goto err_obj;
 		}
 	}
 
-err:
-	if (!i915_vma_is_ggtt(vma))
-		i915_vma_close(vma);
 err_obj:
 	i915_gem_object_put(obj);
 	return err;
@@ -778,7 +769,7 @@ static int drunk_hole(struct i915_address_space *vm,
 				       addr, BIT_ULL(size),
 				       hole_start, hole_end,
 				       err);
-				goto err;
+				goto err_obj;
 			}
 
 			if (!drm_mm_node_allocated(&vma->node) ||
@@ -788,7 +779,7 @@ static int drunk_hole(struct i915_address_space *vm,
 				i915_vma_unpin(vma);
 				err = i915_vma_unbind(vma);
 				err = -EINVAL;
-				goto err;
+				goto err_obj;
 			}
 
 			i915_vma_unpin(vma);
@@ -799,13 +790,10 @@ static int drunk_hole(struct i915_address_space *vm,
 					"%s timed out after %d/%d\n",
 					__func__, n, count)) {
 				err = -EINTR;
-				goto err;
+				goto err_obj;
 			}
 		}
 
-err:
-		if (!i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
 err_obj:
 		i915_gem_object_put(obj);
 		kfree(order);
@@ -1229,7 +1217,6 @@ static void track_vma_bind(struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	atomic_inc(&obj->bind_count); /* track for eviction later */
 	__i915_gem_object_pin_pages(obj);
 
 	GEM_BUG_ON(vma->pages);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 0a953bfc0585..5dd5d81646c4 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -37,6 +37,7 @@ selftest(gem, i915_gem_live_selftests)
 selftest(evict, i915_gem_evict_live_selftests)
 selftest(hugepages, i915_gem_huge_page_live_selftests)
 selftest(gem_contexts, i915_gem_context_live_selftests)
+selftest(gem_execbuf, i915_gem_execbuffer_live_selftests)
 selftest(blt, i915_gem_object_blt_live_selftests)
 selftest(client, i915_gem_client_blt_live_selftests)
 selftest(reset, intel_reset_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 5b39bab4da1d..6a2be7d0dd95 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -16,6 +16,7 @@
  * Tests are executed in order by igt/drv_selftest
  */
 selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */
+selftest(shmem, shmem_utils_mock_selftests)
 selftest(fence, i915_sw_fence_mock_selftests)
 selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c
index d1a1568c47ba..8eb3108f1767 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_perf.c
@@ -14,10 +14,85 @@
 #include "igt_flush_test.h"
 #include "lib_sw_fence.h"
 
+#define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
+
+static int
+alloc_empty_config(struct i915_perf *perf)
+{
+	struct i915_oa_config *oa_config;
+
+	oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
+	if (!oa_config)
+		return -ENOMEM;
+
+	oa_config->perf = perf;
+	kref_init(&oa_config->ref);
+
+	strlcpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
+
+	mutex_lock(&perf->metrics_lock);
+
+	oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
+	if (oa_config->id < 0)  {
+		mutex_unlock(&perf->metrics_lock);
+		i915_oa_config_put(oa_config);
+		return -ENOMEM;
+	}
+
+	mutex_unlock(&perf->metrics_lock);
+
+	return 0;
+}
+
+static void
+destroy_empty_config(struct i915_perf *perf)
+{
+	struct i915_oa_config *oa_config = NULL, *tmp;
+	int id;
+
+	mutex_lock(&perf->metrics_lock);
+
+	idr_for_each_entry(&perf->metrics_idr, tmp, id) {
+		if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
+			oa_config = tmp;
+			break;
+		}
+	}
+
+	if (oa_config)
+		idr_remove(&perf->metrics_idr, oa_config->id);
+
+	mutex_unlock(&perf->metrics_lock);
+
+	if (oa_config)
+		i915_oa_config_put(oa_config);
+}
+
+static struct i915_oa_config *
+get_empty_config(struct i915_perf *perf)
+{
+	struct i915_oa_config *oa_config = NULL, *tmp;
+	int id;
+
+	mutex_lock(&perf->metrics_lock);
+
+	idr_for_each_entry(&perf->metrics_idr, tmp, id) {
+		if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
+			oa_config = i915_oa_config_get(tmp);
+			break;
+		}
+	}
+
+	mutex_unlock(&perf->metrics_lock);
+
+	return oa_config;
+}
+
 static struct i915_perf_stream *
 test_stream(struct i915_perf *perf)
 {
 	struct drm_i915_perf_open_param param = {};
+	struct i915_oa_config *oa_config = get_empty_config(perf);
 	struct perf_open_properties props = {
 		.engine = intel_engine_lookup_user(perf->i915,
 						   I915_ENGINE_CLASS_RENDER,
@@ -25,13 +100,19 @@ test_stream(struct i915_perf *perf)
 		.sample_flags = SAMPLE_OA_REPORT,
 		.oa_format = IS_GEN(perf->i915, 12) ?
 		I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
-		.metrics_set = 1,
 	};
 	struct i915_perf_stream *stream;
 
+	if (!oa_config)
+		return NULL;
+
+	props.metrics_set = oa_config->id;
+
 	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
-	if (!stream)
+	if (!stream) {
+		i915_oa_config_put(oa_config);
 		return NULL;
+	}
 
 	stream->perf = perf;
 
@@ -42,6 +123,8 @@ test_stream(struct i915_perf *perf)
 	}
 	mutex_unlock(&perf->lock);
 
+	i915_oa_config_put(oa_config);
+
 	return stream;
 }
 
@@ -138,8 +221,7 @@ static int live_noa_delay(void *arg)
 		goto out;
 	}
 
-	if (rq->engine->emit_init_breadcrumb &&
-	    i915_request_timeline(rq)->has_initial_breadcrumb) {
+	if (rq->engine->emit_init_breadcrumb) {
 		err = rq->engine->emit_init_breadcrumb(rq);
 		if (err) {
 			i915_request_add(rq);
@@ -180,8 +262,7 @@ static int live_noa_delay(void *arg)
 
 	delay = intel_read_status_page(stream->engine, 0x102);
 	delay -= intel_read_status_page(stream->engine, 0x100);
-	delay = div_u64(mul_u32_u32(delay, 1000 * 1000),
-			RUNTIME_INFO(i915)->cs_timestamp_frequency_khz);
+	delay = i915_cs_timestamp_ticks_to_ns(i915, delay);
 	pr_info("GPU delay: %uns, expected %lluns\n",
 		delay, expected);
 
@@ -206,6 +287,7 @@ int i915_perf_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_noa_delay),
 	};
 	struct i915_perf *perf = &i915->perf;
+	int err;
 
 	if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
 		return 0;
@@ -213,5 +295,13 @@ int i915_perf_live_selftests(struct drm_i915_private *i915)
 	if (intel_gt_is_wedged(&i915->gt))
 		return 0;
 
-	return i915_subtests(tests, i915);
+	err = alloc_empty_config(&i915->perf);
+	if (err)
+		return err;
+
+	err = i915_subtests(tests, i915);
+
+	destroy_empty_config(&i915->perf);
+
+	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index 3bf7f53e9924..d8da142985eb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -16,5 +16,6 @@
  * Tests are executed in order by igt/i915_selftest
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(request, i915_request_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index f89d9c42f1fa..6014e8dfcbb1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -23,11 +23,13 @@
  */
 
 #include <linux/prime_numbers.h>
+#include <linux/pm_qos.h>
 
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
 
 #include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 
 #include "i915_random.h"
@@ -51,6 +53,11 @@ static unsigned int num_uabi_engines(struct drm_i915_private *i915)
 	return count;
 }
 
+static struct intel_engine_cs *rcs0(struct drm_i915_private *i915)
+{
+	return intel_engine_lookup_user(i915, I915_ENGINE_CLASS_RENDER, 0);
+}
+
 static int igt_add_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -58,7 +65,7 @@ static int igt_add_request(void *arg)
 
 	/* Basic preliminary test to create a request and let it loose! */
 
-	request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
+	request = mock_request(rcs0(i915)->kernel_context, HZ / 10);
 	if (!request)
 		return -ENOMEM;
 
@@ -76,7 +83,7 @@ static int igt_wait_request(void *arg)
 
 	/* Submit a request, then wait upon it */
 
-	request = mock_request(i915->engine[RCS0]->kernel_context, T);
+	request = mock_request(rcs0(i915)->kernel_context, T);
 	if (!request)
 		return -ENOMEM;
 
@@ -145,7 +152,7 @@ static int igt_fence_wait(void *arg)
 
 	/* Submit a request, treat it as a fence and wait upon it */
 
-	request = mock_request(i915->engine[RCS0]->kernel_context, T);
+	request = mock_request(rcs0(i915)->kernel_context, T);
 	if (!request)
 		return -ENOMEM;
 
@@ -420,7 +427,7 @@ static int mock_breadcrumbs_smoketest(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct smoketest t = {
-		.engine = i915->engine[RCS0],
+		.engine = rcs0(i915),
 		.ncontexts = 1024,
 		.max_batch = 1024,
 		.request_alloc = __mock_request_alloc
@@ -809,10 +816,12 @@ static int recursive_batch_resolve(struct i915_vma *batch)
 		return PTR_ERR(cmd);
 
 	*cmd = MI_BATCH_BUFFER_END;
-	intel_gt_chipset_flush(batch->vm->gt);
 
+	__i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd));
 	i915_gem_object_unpin_map(batch->obj);
 
+	intel_gt_chipset_flush(batch->vm->gt);
+
 	return 0;
 }
 
@@ -858,13 +867,6 @@ static int live_all_engines(void *arg)
 			goto out_request;
 		}
 
-		err = engine->emit_bb_start(request[idx],
-					    batch->node.start,
-					    batch->node.size,
-					    0);
-		GEM_BUG_ON(err);
-		request[idx]->batch = batch;
-
 		i915_vma_lock(batch);
 		err = i915_request_await_object(request[idx], batch->obj, 0);
 		if (err == 0)
@@ -872,6 +874,13 @@ static int live_all_engines(void *arg)
 		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
+		err = engine->emit_bb_start(request[idx],
+					    batch->node.start,
+					    batch->node.size,
+					    0);
+		GEM_BUG_ON(err);
+		request[idx]->batch = batch;
+
 		i915_request_get(request[idx]);
 		i915_request_add(request[idx]);
 		idx++;
@@ -986,13 +995,6 @@ static int live_sequential_engines(void *arg)
 			}
 		}
 
-		err = engine->emit_bb_start(request[idx],
-					    batch->node.start,
-					    batch->node.size,
-					    0);
-		GEM_BUG_ON(err);
-		request[idx]->batch = batch;
-
 		i915_vma_lock(batch);
 		err = i915_request_await_object(request[idx],
 						batch->obj, false);
@@ -1001,6 +1003,13 @@ static int live_sequential_engines(void *arg)
 		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
+		err = engine->emit_bb_start(request[idx],
+					    batch->node.start,
+					    batch->node.size,
+					    0);
+		GEM_BUG_ON(err);
+		request[idx]->batch = batch;
+
 		i915_request_get(request[idx]);
 		i915_request_add(request[idx]);
 
@@ -1053,9 +1062,12 @@ out_request:
 					      I915_MAP_WC);
 		if (!IS_ERR(cmd)) {
 			*cmd = MI_BATCH_BUFFER_END;
-			intel_gt_chipset_flush(engine->gt);
 
+			__i915_gem_object_flush_map(request[idx]->batch->obj,
+						    0, sizeof(*cmd));
 			i915_gem_object_unpin_map(request[idx]->batch->obj);
+
+			intel_gt_chipset_flush(engine->gt);
 		}
 
 		i915_vma_put(request[idx]->batch);
@@ -1233,7 +1245,7 @@ static int live_parallel_engines(void *arg)
 		struct igt_live_test t;
 		unsigned int idx;
 
-		snprintf(name, sizeof(name), "%pS", fn);
+		snprintf(name, sizeof(name), "%ps", *fn);
 		err = igt_live_test_begin(&t, i915, __func__, name);
 		if (err)
 			break;
@@ -1470,3 +1482,572 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+static int switch_to_kernel_sync(struct intel_context *ce, int err)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	while (!err && !intel_engine_is_idle(ce->engine))
+		intel_engine_flush_submission(ce->engine);
+
+	return err;
+}
+
+struct perf_stats {
+	struct intel_engine_cs *engine;
+	unsigned long count;
+	ktime_t time;
+	ktime_t busy;
+	u64 runtime;
+};
+
+struct perf_series {
+	struct drm_i915_private *i915;
+	unsigned int nengines;
+	struct intel_context *ce[];
+};
+
+static int s_sync0(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return err;
+}
+
+static int s_sync1(void *arg)
+{
+	struct perf_series *ps = arg;
+	struct i915_request *prev = NULL;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+
+	return err;
+}
+
+static int s_many(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
+
+		i915_request_add(rq);
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return 0;
+}
+
+static int perf_series_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		s_sync0,
+		s_sync1,
+		s_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request qos;
+	struct perf_stats *stats;
+	struct perf_series *ps;
+	unsigned int idx;
+	int err = 0;
+
+	stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
+	if (!ps) {
+		kfree(stats);
+		return -ENOMEM;
+	}
+
+	cpu_latency_qos_add_request(&qos, 0); /* disable cstates */
+
+	ps->i915 = i915;
+	ps->nengines = nengines;
+
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			goto out;
+
+		err = intel_context_pin(ce);
+		if (err) {
+			intel_context_put(ce);
+			goto out;
+		}
+
+		ps->ce[idx++] = ce;
+	}
+	GEM_BUG_ON(idx != ps->nengines);
+
+	for (fn = func; *fn && !err; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p =
+				memset(&stats[idx], 0, sizeof(stats[idx]));
+			struct intel_context *ce = ps->ce[idx];
+
+			p->engine = ps->ce[idx]->engine;
+			intel_engine_pm_get(p->engine);
+
+			if (intel_engine_supports_stats(p->engine))
+				p->busy = intel_engine_get_busy_time(p->engine) + 1;
+			p->runtime = -intel_context_get_total_runtime_ns(ce);
+			p->time = ktime_get();
+		}
+
+		err = (*fn)(ps);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p = &stats[idx];
+			struct intel_context *ce = ps->ce[idx];
+			int integer, decimal;
+			u64 busy, dt;
+
+			p->time = ktime_sub(ktime_get(), p->time);
+			if (p->busy) {
+				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
+						    p->busy - 1);
+			}
+
+			err = switch_to_kernel_sync(ce, err);
+			p->runtime += intel_context_get_total_runtime_ns(ce);
+			intel_engine_pm_put(p->engine);
+
+			busy = 100 * ktime_to_ns(p->busy);
+			dt = ktime_to_ns(p->time);
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, p->engine->name, ce->timeline->seqno,
+				integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+		}
+	}
+
+out:
+	for (idx = 0; idx < nengines; idx++) {
+		if (IS_ERR_OR_NULL(ps->ce[idx]))
+			break;
+
+		intel_context_unpin(ps->ce[idx]);
+		intel_context_put(ps->ce[idx]);
+	}
+	kfree(ps);
+
+	cpu_latency_qos_remove_request(&qos);
+	kfree(stats);
+	return err;
+}
+
+static int p_sync0(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_sync1(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct i915_request *prev = NULL;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_many(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+	bool busy;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	count = 0;
+	p->time = ktime_get();
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		p_sync0,
+		p_sync1,
+		p_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request qos;
+	struct {
+		struct perf_stats p;
+		struct task_struct *tsk;
+	} *engines;
+	int err = 0;
+
+	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	cpu_latency_qos_add_request(&qos, 0);
+
+	for (fn = func; *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			intel_engine_pm_get(engine);
+
+			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+			engines[idx].p.engine = engine;
+
+			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+						       "igt:%s", engine->name);
+			if (IS_ERR(engines[idx].tsk)) {
+				err = PTR_ERR(engines[idx].tsk);
+				intel_engine_pm_put(engine);
+				break;
+			}
+			get_task_struct(engines[idx++].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(engines[idx].tsk))
+				break;
+
+			status = kthread_stop(engines[idx].tsk);
+			if (status && !err)
+				err = status;
+
+			intel_engine_pm_put(engine);
+			put_task_struct(engines[idx++].tsk);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			struct perf_stats *p = &engines[idx].p;
+			u64 busy = 100 * ktime_to_ns(p->busy);
+			u64 dt = ktime_to_ns(p->time);
+			int integer, decimal;
+
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			GEM_BUG_ON(engine != p->engine);
+			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, engine->name, p->count, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+			idx++;
+		}
+	}
+
+	cpu_latency_qos_remove_request(&qos);
+	kfree(engines);
+	return err;
+}
+
+int i915_request_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_series_engines),
+		SUBTEST(perf_parallel_engines),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index d3bf9eefb682..1bc11c09faef 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -396,6 +396,35 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...)
 	return true;
 }
 
+void igt_hexdump(const void *buf, size_t len)
+{
+	const size_t rowsize = 8 * sizeof(u32);
+	const void *prev = NULL;
+	bool skip = false;
+	size_t pos;
+
+	for (pos = 0; pos < len; pos += rowsize) {
+		char line[128];
+
+		if (prev && !memcmp(prev, buf + pos, rowsize)) {
+			if (!skip) {
+				pr_info("*\n");
+				skip = true;
+			}
+			continue;
+		}
+
+		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+						rowsize, sizeof(u32),
+						line, sizeof(line),
+						false) >= sizeof(line));
+		pr_info("[%04zx] %s\n", pos, line);
+
+		prev = buf + pos;
+		skip = false;
+	}
+}
+
 module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400);
 module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400);
 module_param_named(st_filter, i915_selftest.filter, charp, 0400);
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 9ad4ab088466..e35ba5f9e73f 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -169,8 +169,7 @@ igt_spinner_create_request(struct igt_spinner *spin,
 
 	intel_gt_chipset_flush(engine->gt);
 
-	if (engine->emit_init_breadcrumb &&
-	    i915_request_timeline(rq)->has_initial_breadcrumb) {
+	if (engine->emit_init_breadcrumb) {
 		err = engine->emit_init_breadcrumb(rq);
 		if (err)
 			goto cancel_rq;
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 2a1d4ba1f9f3..6e80d99048e4 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -594,8 +594,11 @@ create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type,
 	void *addr;
 
 	obj = i915_gem_object_create_region(mr, size, 0);
-	if (IS_ERR(obj))
+	if (IS_ERR(obj)) {
+		if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */
+			return ERR_PTR(-ENODEV);
 		return obj;
+	}
 
 	addr = i915_gem_object_pin_map(obj, type);
 	if (IS_ERR(addr)) {
diff --git a/drivers/gpu/drm/i915/selftests/librapl.c b/drivers/gpu/drm/i915/selftests/librapl.c
new file mode 100644
index 000000000000..58710ac3f979
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/librapl.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <asm/msr.h>
+
+#include "librapl.h"
+
+u64 librapl_energy_uJ(void)
+{
+	unsigned long long power;
+	u32 units;
+
+	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power))
+		return 0;
+
+	units = (power & 0x1f00) >> 8;
+
+	if (rdmsrl_safe(MSR_PP1_ENERGY_STATUS, &power))
+		return 0;
+
+	return (1000000 * power) >> units; /* convert to uJ */
+}
diff --git a/drivers/gpu/drm/i915/selftests/librapl.h b/drivers/gpu/drm/i915/selftests/librapl.h
new file mode 100644
index 000000000000..887f3e91dd05
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/librapl.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef SELFTEST_LIBRAPL_H
+#define SELFTEST_LIBRAPL_H
+
+#include <linux/types.h>
+
+u64 librapl_energy_uJ(void);
+
+#endif /* SELFTEST_LIBRAPL_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 754d0eb6beaa..9b105b811f1f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -25,6 +25,8 @@
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
 
+#include <drm/drm_managed.h>
+
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_requests.h"
 #include "gt/mock_engine.h"
@@ -55,6 +57,9 @@ static void mock_device_release(struct drm_device *dev)
 {
 	struct drm_i915_private *i915 = to_i915(dev);
 
+	if (!i915->do_release)
+		goto out;
+
 	mock_device_flush(i915);
 	intel_gt_driver_remove(&i915->gt);
 
@@ -71,8 +76,9 @@ static void mock_device_release(struct drm_device *dev)
 
 	drm_mode_config_cleanup(&i915->drm);
 
-	drm_dev_fini(&i915->drm);
+out:
 	put_device(&i915->drm.pdev->dev);
+	i915->drm.pdev = NULL;
 }
 
 static struct drm_driver mock_driver = {
@@ -114,9 +120,14 @@ struct drm_i915_private *mock_gem_device(void)
 	struct pci_dev *pdev;
 	int err;
 
-	pdev = kzalloc(sizeof(*pdev) + sizeof(*i915), GFP_KERNEL);
+	pdev = kzalloc(sizeof(*pdev), GFP_KERNEL);
 	if (!pdev)
-		goto err;
+		return NULL;
+	i915 = kzalloc(sizeof(*i915), GFP_KERNEL);
+	if (!i915) {
+		kfree(pdev);
+		return NULL;
+	}
 
 	device_initialize(&pdev->dev);
 	pdev->class = PCI_BASE_CLASS_DISPLAY << 16;
@@ -129,7 +140,6 @@ struct drm_i915_private *mock_gem_device(void)
 	pdev->dev.archdata.iommu = (void *)-1;
 #endif
 
-	i915 = (struct drm_i915_private *)(pdev + 1);
 	pci_set_drvdata(pdev, i915);
 
 	dev_pm_domain_set(&pdev->dev, &pm_domain);
@@ -141,9 +151,13 @@ struct drm_i915_private *mock_gem_device(void)
 	err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev);
 	if (err) {
 		pr_err("Failed to initialise mock GEM device: err=%d\n", err);
-		goto put_device;
+		put_device(&pdev->dev);
+		kfree(i915);
+
+		return NULL;
 	}
 	i915->drm.pdev = pdev;
+	drmm_add_final_kfree(&i915->drm, i915);
 
 	intel_runtime_pm_init_early(&i915->runtime_pm);
 
@@ -178,16 +192,18 @@ struct drm_i915_private *mock_gem_device(void)
 
 	mkwrite_device_info(i915)->engine_mask = BIT(0);
 
-	i915->engine[RCS0] = mock_engine(i915, "mock", RCS0);
-	if (!i915->engine[RCS0])
+	i915->gt.engine[RCS0] = mock_engine(i915, "mock", RCS0);
+	if (!i915->gt.engine[RCS0])
 		goto err_unlock;
 
-	if (mock_engine_init(i915->engine[RCS0]))
+	if (mock_engine_init(i915->gt.engine[RCS0]))
 		goto err_context;
 
 	__clear_bit(I915_WEDGED, &i915->gt.reset.flags);
 	intel_engines_driver_register(i915);
 
+	i915->do_release = true;
+
 	return i915;
 
 err_context:
@@ -198,9 +214,7 @@ err_drv:
 	intel_gt_driver_late_release(&i915->gt);
 	intel_memory_regions_driver_release(i915);
 	drm_mode_config_cleanup(&i915->drm);
-	drm_dev_fini(&i915->drm);
-put_device:
-	put_device(&pdev->dev);
-err:
+	drm_dev_put(&i915->drm);
+
 	return NULL;
 }
diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c
index f22cfbf9353e..ba4ca17fd4d8 100644
--- a/drivers/gpu/drm/imx/dw_hdmi-imx.c
+++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c
@@ -18,6 +18,7 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_of.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "imx-drm.h"
 
@@ -143,10 +144,6 @@ static const struct drm_encoder_helper_funcs dw_hdmi_imx_encoder_helper_funcs =
 	.atomic_check = dw_hdmi_imx_atomic_check,
 };
 
-static const struct drm_encoder_funcs dw_hdmi_imx_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static enum drm_mode_status
 imx6q_hdmi_mode_valid(struct drm_connector *con,
 		      const struct drm_display_mode *mode)
@@ -236,8 +233,7 @@ static int dw_hdmi_imx_bind(struct device *dev, struct device *master,
 		return ret;
 
 	drm_encoder_helper_add(encoder, &dw_hdmi_imx_encoder_helper_funcs);
-	drm_encoder_init(drm, encoder, &dw_hdmi_imx_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
 
 	platform_set_drvdata(pdev, hdmi);
 
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index da87c70e413b..2e38f1a5cf8d 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -42,12 +42,6 @@ void imx_drm_connector_destroy(struct drm_connector *connector)
 }
 EXPORT_SYMBOL_GPL(imx_drm_connector_destroy);
 
-void imx_drm_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-EXPORT_SYMBOL_GPL(imx_drm_encoder_destroy);
-
 static int imx_drm_atomic_check(struct drm_device *dev,
 				struct drm_atomic_state *state)
 {
@@ -139,8 +133,8 @@ int imx_drm_encoder_parse_of(struct drm_device *drm,
 
 	encoder->possible_crtcs = crtc_mask;
 
-	/* FIXME: this is the mask of outputs which can clone this output. */
-	encoder->possible_clones = ~0;
+	/* FIXME: cloning support not clear, disable it all for now */
+	encoder->possible_clones = 0;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h
index ab9c6f706eb3..c3e1a3f14d30 100644
--- a/drivers/gpu/drm/imx/imx-drm.h
+++ b/drivers/gpu/drm/imx/imx-drm.h
@@ -38,7 +38,6 @@ int imx_drm_encoder_parse_of(struct drm_device *drm,
 	struct drm_encoder *encoder, struct device_node *np);
 
 void imx_drm_connector_destroy(struct drm_connector *connector);
-void imx_drm_encoder_destroy(struct drm_encoder *encoder);
 
 int ipu_planes_assign_pre(struct drm_device *dev,
 			  struct drm_atomic_state *state);
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index 4da22a94790c..66ea68e8da87 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -26,6 +26,7 @@
 #include <drm/drm_panel.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "imx-drm.h"
 
@@ -393,10 +394,6 @@ static const struct drm_connector_helper_funcs imx_ldb_connector_helper_funcs =
 	.best_encoder = imx_ldb_connector_best_encoder,
 };
 
-static const struct drm_encoder_funcs imx_ldb_encoder_funcs = {
-	.destroy = imx_drm_encoder_destroy,
-};
-
 static const struct drm_encoder_helper_funcs imx_ldb_encoder_helper_funcs = {
 	.atomic_mode_set = imx_ldb_encoder_atomic_mode_set,
 	.enable = imx_ldb_encoder_enable,
@@ -441,8 +438,7 @@ static int imx_ldb_register(struct drm_device *drm,
 	}
 
 	drm_encoder_helper_add(encoder, &imx_ldb_encoder_helper_funcs);
-	drm_encoder_init(drm, encoder, &imx_ldb_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS, NULL);
+	drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_LVDS);
 
 	if (imx_ldb_ch->bridge) {
 		ret = drm_bridge_attach(&imx_ldb_ch->encoder,
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index 5bbfaa2cd0f4..ee63782c77e9 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -21,6 +21,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "imx-drm.h"
 
@@ -348,10 +349,6 @@ static const struct drm_connector_helper_funcs imx_tve_connector_helper_funcs =
 	.mode_valid = imx_tve_connector_mode_valid,
 };
 
-static const struct drm_encoder_funcs imx_tve_encoder_funcs = {
-	.destroy = imx_drm_encoder_destroy,
-};
-
 static const struct drm_encoder_helper_funcs imx_tve_encoder_helper_funcs = {
 	.mode_set = imx_tve_encoder_mode_set,
 	.enable = imx_tve_encoder_enable,
@@ -479,8 +476,7 @@ static int imx_tve_register(struct drm_device *drm, struct imx_tve *tve)
 		return ret;
 
 	drm_encoder_helper_add(&tve->encoder, &imx_tve_encoder_helper_funcs);
-	drm_encoder_init(drm, &tve->encoder, &imx_tve_encoder_funcs,
-			 encoder_type, NULL);
+	drm_simple_encoder_init(drm, &tve->encoder, encoder_type);
 
 	drm_connector_helper_add(&tve->connector,
 			&imx_tve_connector_helper_funcs);
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index 08fafa4bf8c2..ac916c84a631 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -18,6 +18,7 @@
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "imx-drm.h"
 
@@ -256,10 +257,6 @@ static const struct drm_connector_helper_funcs imx_pd_connector_helper_funcs = {
 	.best_encoder = imx_pd_connector_best_encoder,
 };
 
-static const struct drm_encoder_funcs imx_pd_encoder_funcs = {
-	.destroy = imx_drm_encoder_destroy,
-};
-
 static const struct drm_bridge_funcs imx_pd_bridge_funcs = {
 	.enable = imx_pd_bridge_enable,
 	.disable = imx_pd_bridge_disable,
@@ -288,8 +285,7 @@ static int imx_pd_register(struct drm_device *drm,
 	 */
 	imxpd->connector.dpms = DRM_MODE_DPMS_OFF;
 
-	drm_encoder_init(drm, encoder, &imx_pd_encoder_funcs,
-			 DRM_MODE_ENCODER_NONE, NULL);
+	drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_NONE);
 
 	imxpd->bridge.funcs = &imx_pd_bridge_funcs;
 	drm_bridge_attach(encoder, &imxpd->bridge, NULL, 0);
diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.c b/drivers/gpu/drm/ingenic/ingenic-drm.c
index 548cc25ea4ab..55b49a31729b 100644
--- a/drivers/gpu/drm/ingenic/ingenic-drm.c
+++ b/drivers/gpu/drm/ingenic/ingenic-drm.c
@@ -23,11 +23,13 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_irq.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_plane.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 #include <drm/drm_vblank.h>
 
 #define JZ_REG_LCD_CFG				0x00
@@ -488,15 +490,6 @@ static irqreturn_t ingenic_drm_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-static void ingenic_drm_release(struct drm_device *drm)
-{
-	struct ingenic_drm *priv = drm_device_get_priv(drm);
-
-	drm_mode_config_cleanup(drm);
-	drm_dev_fini(drm);
-	kfree(priv);
-}
-
 static int ingenic_drm_enable_vblank(struct drm_crtc *crtc)
 {
 	struct ingenic_drm *priv = drm_crtc_get_priv(crtc);
@@ -540,7 +533,6 @@ static struct drm_driver ingenic_drm_driver_data = {
 	.gem_prime_mmap		= drm_gem_cma_prime_mmap,
 
 	.irq_handler		= ingenic_drm_irq_handler,
-	.release		= ingenic_drm_release,
 };
 
 static const struct drm_plane_funcs ingenic_drm_primary_plane_funcs = {
@@ -592,10 +584,6 @@ static const struct drm_mode_config_funcs ingenic_drm_mode_config_funcs = {
 	.atomic_commit		= drm_atomic_helper_commit,
 };
 
-static const struct drm_encoder_funcs ingenic_drm_encoder_funcs = {
-	.destroy		= drm_encoder_cleanup,
-};
-
 static void ingenic_drm_free_dma_hwdesc(void *d)
 {
 	struct ingenic_drm *priv = d;
@@ -623,24 +611,21 @@ static int ingenic_drm_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
+	priv = devm_drm_dev_alloc(dev, &ingenic_drm_driver_data,
+				  struct ingenic_drm, drm);
+	if (IS_ERR(priv))
+		return PTR_ERR(priv);
 
 	priv->soc_info = soc_info;
 	priv->dev = dev;
 	drm = &priv->drm;
-	drm->dev_private = priv;
 
 	platform_set_drvdata(pdev, priv);
 
-	ret = devm_drm_dev_init(dev, drm, &ingenic_drm_driver_data);
-	if (ret) {
-		kfree(priv);
+	ret = drmm_mode_config_init(drm);
+	if (ret)
 		return ret;
-	}
 
-	drm_mode_config_init(drm);
 	drm->mode_config.min_width = 0;
 	drm->mode_config.min_height = 0;
 	drm->mode_config.max_width = soc_info->max_width;
@@ -661,10 +646,8 @@ static int ingenic_drm_probe(struct platform_device *pdev)
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "Failed to get platform irq");
+	if (irq < 0)
 		return irq;
-	}
 
 	if (soc_info->needs_dev_clk) {
 		priv->lcd_clk = devm_clk_get(dev, "lcd");
@@ -730,8 +713,8 @@ static int ingenic_drm_probe(struct platform_device *pdev)
 	drm_encoder_helper_add(&priv->encoder,
 			       &ingenic_drm_encoder_helper_funcs);
 
-	ret = drm_encoder_init(drm, &priv->encoder, &ingenic_drm_encoder_funcs,
-			       DRM_MODE_ENCODER_DPI, NULL);
+	ret = drm_simple_encoder_init(drm, &priv->encoder,
+				      DRM_MODE_ENCODER_DPI);
 	if (ret) {
 		dev_err(dev, "Failed to init encoder: %i", ret);
 		return ret;
@@ -791,9 +774,7 @@ static int ingenic_drm_probe(struct platform_device *pdev)
 		goto err_devclk_disable;
 	}
 
-	ret = drm_fbdev_generic_setup(drm, 32);
-	if (ret)
-		dev_warn(dev, "Unable to start fbdev emulation: %i", ret);
+	drm_fbdev_generic_setup(drm, 32);
 
 	return 0;
 
diff --git a/drivers/gpu/drm/lima/Kconfig b/drivers/gpu/drm/lima/Kconfig
index d589f09d04d9..fa1d4f5df31e 100644
--- a/drivers/gpu/drm/lima/Kconfig
+++ b/drivers/gpu/drm/lima/Kconfig
@@ -10,5 +10,7 @@ config DRM_LIMA
        depends on OF
        select DRM_SCHED
        select DRM_GEM_SHMEM_HELPER
+       select PM_DEVFREQ
+       select DEVFREQ_GOV_SIMPLE_ONDEMAND
        help
 	 DRM driver for ARM Mali 400/450 GPUs.
diff --git a/drivers/gpu/drm/lima/Makefile b/drivers/gpu/drm/lima/Makefile
index a85444b0a1d4..ca2097b8e1ad 100644
--- a/drivers/gpu/drm/lima/Makefile
+++ b/drivers/gpu/drm/lima/Makefile
@@ -14,6 +14,8 @@ lima-y := \
 	lima_sched.o \
 	lima_ctx.o \
 	lima_dlbu.o \
-	lima_bcast.o
+	lima_bcast.o \
+	lima_trace.o \
+	lima_devfreq.o
 
 obj-$(CONFIG_DRM_LIMA) += lima.o
diff --git a/drivers/gpu/drm/lima/lima_bcast.c b/drivers/gpu/drm/lima/lima_bcast.c
index 288398027bfa..fbc43f243c54 100644
--- a/drivers/gpu/drm/lima/lima_bcast.c
+++ b/drivers/gpu/drm/lima/lima_bcast.c
@@ -26,18 +26,33 @@ void lima_bcast_enable(struct lima_device *dev, int num_pp)
 	bcast_write(LIMA_BCAST_BROADCAST_MASK, mask);
 }
 
+static int lima_bcast_hw_init(struct lima_ip *ip)
+{
+	bcast_write(LIMA_BCAST_BROADCAST_MASK, ip->data.mask << 16);
+	bcast_write(LIMA_BCAST_INTERRUPT_MASK, ip->data.mask);
+	return 0;
+}
+
+int lima_bcast_resume(struct lima_ip *ip)
+{
+	return lima_bcast_hw_init(ip);
+}
+
+void lima_bcast_suspend(struct lima_ip *ip)
+{
+
+}
+
 int lima_bcast_init(struct lima_ip *ip)
 {
-	int i, mask = 0;
+	int i;
 
 	for (i = lima_ip_pp0; i <= lima_ip_pp7; i++) {
 		if (ip->dev->ip[i].present)
-			mask |= 1 << (i - lima_ip_pp0);
+			ip->data.mask |= 1 << (i - lima_ip_pp0);
 	}
 
-	bcast_write(LIMA_BCAST_BROADCAST_MASK, mask << 16);
-	bcast_write(LIMA_BCAST_INTERRUPT_MASK, mask);
-	return 0;
+	return lima_bcast_hw_init(ip);
 }
 
 void lima_bcast_fini(struct lima_ip *ip)
diff --git a/drivers/gpu/drm/lima/lima_bcast.h b/drivers/gpu/drm/lima/lima_bcast.h
index c47e58563d0a..465ee587bceb 100644
--- a/drivers/gpu/drm/lima/lima_bcast.h
+++ b/drivers/gpu/drm/lima/lima_bcast.h
@@ -6,6 +6,8 @@
 
 struct lima_ip;
 
+int lima_bcast_resume(struct lima_ip *ip);
+void lima_bcast_suspend(struct lima_ip *ip);
 int lima_bcast_init(struct lima_ip *ip);
 void lima_bcast_fini(struct lima_ip *ip);
 
diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c
index 22fff6caa961..891d5cd5019a 100644
--- a/drivers/gpu/drm/lima/lima_ctx.c
+++ b/drivers/gpu/drm/lima/lima_ctx.c
@@ -27,6 +27,9 @@ int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id)
 	if (err < 0)
 		goto err_out0;
 
+	ctx->pid = task_pid_nr(current);
+	get_task_comm(ctx->pname, current);
+
 	return 0;
 
 err_out0:
diff --git a/drivers/gpu/drm/lima/lima_ctx.h b/drivers/gpu/drm/lima/lima_ctx.h
index 6154e5c9bfe4..74e2be09090f 100644
--- a/drivers/gpu/drm/lima/lima_ctx.h
+++ b/drivers/gpu/drm/lima/lima_ctx.h
@@ -5,6 +5,7 @@
 #define __LIMA_CTX_H__
 
 #include <linux/xarray.h>
+#include <linux/sched.h>
 
 #include "lima_device.h"
 
@@ -13,6 +14,10 @@ struct lima_ctx {
 	struct lima_device *dev;
 	struct lima_sched_context context[lima_pipe_num];
 	atomic_t guilty;
+
+	/* debug info */
+	char pname[TASK_COMM_LEN];
+	pid_t pid;
 };
 
 struct lima_ctx_mgr {
diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c
new file mode 100644
index 000000000000..bbe02817721b
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_devfreq.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2020 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+ *
+ * Based on panfrost_devfreq.c:
+ *   Copyright 2019 Collabora ltd.
+ */
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/property.h>
+
+#include "lima_device.h"
+#include "lima_devfreq.h"
+
+static void lima_devfreq_update_utilization(struct lima_devfreq *devfreq)
+{
+	ktime_t now, last;
+
+	now = ktime_get();
+	last = devfreq->time_last_update;
+
+	if (devfreq->busy_count > 0)
+		devfreq->busy_time += ktime_sub(now, last);
+	else
+		devfreq->idle_time += ktime_sub(now, last);
+
+	devfreq->time_last_update = now;
+}
+
+static int lima_devfreq_target(struct device *dev, unsigned long *freq,
+			       u32 flags)
+{
+	struct dev_pm_opp *opp;
+	int err;
+
+	opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+	dev_pm_opp_put(opp);
+
+	err = dev_pm_opp_set_rate(dev, *freq);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void lima_devfreq_reset(struct lima_devfreq *devfreq)
+{
+	devfreq->busy_time = 0;
+	devfreq->idle_time = 0;
+	devfreq->time_last_update = ktime_get();
+}
+
+static int lima_devfreq_get_dev_status(struct device *dev,
+				       struct devfreq_dev_status *status)
+{
+	struct lima_device *ldev = dev_get_drvdata(dev);
+	struct lima_devfreq *devfreq = &ldev->devfreq;
+	unsigned long irqflags;
+
+	status->current_frequency = clk_get_rate(ldev->clk_gpu);
+
+	spin_lock_irqsave(&devfreq->lock, irqflags);
+
+	lima_devfreq_update_utilization(devfreq);
+
+	status->total_time = ktime_to_ns(ktime_add(devfreq->busy_time,
+						   devfreq->idle_time));
+	status->busy_time = ktime_to_ns(devfreq->busy_time);
+
+	lima_devfreq_reset(devfreq);
+
+	spin_unlock_irqrestore(&devfreq->lock, irqflags);
+
+	dev_dbg(ldev->dev, "busy %lu total %lu %lu %% freq %lu MHz\n",
+		status->busy_time, status->total_time,
+		status->busy_time / (status->total_time / 100),
+		status->current_frequency / 1000 / 1000);
+
+	return 0;
+}
+
+static struct devfreq_dev_profile lima_devfreq_profile = {
+	.polling_ms = 50, /* ~3 frames */
+	.target = lima_devfreq_target,
+	.get_dev_status = lima_devfreq_get_dev_status,
+};
+
+void lima_devfreq_fini(struct lima_device *ldev)
+{
+	struct lima_devfreq *devfreq = &ldev->devfreq;
+
+	if (devfreq->cooling) {
+		devfreq_cooling_unregister(devfreq->cooling);
+		devfreq->cooling = NULL;
+	}
+
+	if (devfreq->devfreq) {
+		devm_devfreq_remove_device(ldev->dev, devfreq->devfreq);
+		devfreq->devfreq = NULL;
+	}
+
+	if (devfreq->opp_of_table_added) {
+		dev_pm_opp_of_remove_table(ldev->dev);
+		devfreq->opp_of_table_added = false;
+	}
+
+	if (devfreq->regulators_opp_table) {
+		dev_pm_opp_put_regulators(devfreq->regulators_opp_table);
+		devfreq->regulators_opp_table = NULL;
+	}
+
+	if (devfreq->clkname_opp_table) {
+		dev_pm_opp_put_clkname(devfreq->clkname_opp_table);
+		devfreq->clkname_opp_table = NULL;
+	}
+}
+
+int lima_devfreq_init(struct lima_device *ldev)
+{
+	struct thermal_cooling_device *cooling;
+	struct device *dev = ldev->dev;
+	struct opp_table *opp_table;
+	struct devfreq *devfreq;
+	struct lima_devfreq *ldevfreq = &ldev->devfreq;
+	struct dev_pm_opp *opp;
+	unsigned long cur_freq;
+	int ret;
+
+	if (!device_property_present(dev, "operating-points-v2"))
+		/* Optional, continue without devfreq */
+		return 0;
+
+	spin_lock_init(&ldevfreq->lock);
+
+	opp_table = dev_pm_opp_set_clkname(dev, "core");
+	if (IS_ERR(opp_table)) {
+		ret = PTR_ERR(opp_table);
+		goto err_fini;
+	}
+
+	ldevfreq->clkname_opp_table = opp_table;
+
+	opp_table = dev_pm_opp_set_regulators(dev,
+					      (const char *[]){ "mali" },
+					      1);
+	if (IS_ERR(opp_table)) {
+		ret = PTR_ERR(opp_table);
+
+		/* Continue if the optional regulator is missing */
+		if (ret != -ENODEV)
+			goto err_fini;
+	} else {
+		ldevfreq->regulators_opp_table = opp_table;
+	}
+
+	ret = dev_pm_opp_of_add_table(dev);
+	if (ret)
+		goto err_fini;
+	ldevfreq->opp_of_table_added = true;
+
+	lima_devfreq_reset(ldevfreq);
+
+	cur_freq = clk_get_rate(ldev->clk_gpu);
+
+	opp = devfreq_recommended_opp(dev, &cur_freq, 0);
+	if (IS_ERR(opp)) {
+		ret = PTR_ERR(opp);
+		goto err_fini;
+	}
+
+	lima_devfreq_profile.initial_freq = cur_freq;
+	dev_pm_opp_put(opp);
+
+	devfreq = devm_devfreq_add_device(dev, &lima_devfreq_profile,
+					  DEVFREQ_GOV_SIMPLE_ONDEMAND, NULL);
+	if (IS_ERR(devfreq)) {
+		dev_err(dev, "Couldn't initialize GPU devfreq\n");
+		ret = PTR_ERR(devfreq);
+		goto err_fini;
+	}
+
+	ldevfreq->devfreq = devfreq;
+
+	cooling = of_devfreq_cooling_register(dev->of_node, devfreq);
+	if (IS_ERR(cooling))
+		dev_info(dev, "Failed to register cooling device\n");
+	else
+		ldevfreq->cooling = cooling;
+
+	return 0;
+
+err_fini:
+	lima_devfreq_fini(ldev);
+	return ret;
+}
+
+void lima_devfreq_record_busy(struct lima_devfreq *devfreq)
+{
+	unsigned long irqflags;
+
+	if (!devfreq->devfreq)
+		return;
+
+	spin_lock_irqsave(&devfreq->lock, irqflags);
+
+	lima_devfreq_update_utilization(devfreq);
+
+	devfreq->busy_count++;
+
+	spin_unlock_irqrestore(&devfreq->lock, irqflags);
+}
+
+void lima_devfreq_record_idle(struct lima_devfreq *devfreq)
+{
+	unsigned long irqflags;
+
+	if (!devfreq->devfreq)
+		return;
+
+	spin_lock_irqsave(&devfreq->lock, irqflags);
+
+	lima_devfreq_update_utilization(devfreq);
+
+	WARN_ON(--devfreq->busy_count < 0);
+
+	spin_unlock_irqrestore(&devfreq->lock, irqflags);
+}
+
+int lima_devfreq_resume(struct lima_devfreq *devfreq)
+{
+	unsigned long irqflags;
+
+	if (!devfreq->devfreq)
+		return 0;
+
+	spin_lock_irqsave(&devfreq->lock, irqflags);
+
+	lima_devfreq_reset(devfreq);
+
+	spin_unlock_irqrestore(&devfreq->lock, irqflags);
+
+	return devfreq_resume_device(devfreq->devfreq);
+}
+
+int lima_devfreq_suspend(struct lima_devfreq *devfreq)
+{
+	if (!devfreq->devfreq)
+		return 0;
+
+	return devfreq_suspend_device(devfreq->devfreq);
+}
diff --git a/drivers/gpu/drm/lima/lima_devfreq.h b/drivers/gpu/drm/lima/lima_devfreq.h
new file mode 100644
index 000000000000..5eed2975a375
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_devfreq.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2020 Martin Blumenstingl <martin.blumenstingl@googlemail.com> */
+
+#ifndef __LIMA_DEVFREQ_H__
+#define __LIMA_DEVFREQ_H__
+
+#include <linux/spinlock.h>
+#include <linux/ktime.h>
+
+struct devfreq;
+struct opp_table;
+struct thermal_cooling_device;
+
+struct lima_device;
+
+struct lima_devfreq {
+	struct devfreq *devfreq;
+	struct opp_table *clkname_opp_table;
+	struct opp_table *regulators_opp_table;
+	struct thermal_cooling_device *cooling;
+	bool opp_of_table_added;
+
+	ktime_t busy_time;
+	ktime_t idle_time;
+	ktime_t time_last_update;
+	int busy_count;
+	/*
+	 * Protect busy_time, idle_time, time_last_update and busy_count
+	 * because these can be updated concurrently, for example by the GP
+	 * and PP interrupts.
+	 */
+	spinlock_t lock;
+};
+
+int lima_devfreq_init(struct lima_device *ldev);
+void lima_devfreq_fini(struct lima_device *ldev);
+
+void lima_devfreq_record_busy(struct lima_devfreq *devfreq);
+void lima_devfreq_record_idle(struct lima_devfreq *devfreq);
+
+int lima_devfreq_resume(struct lima_devfreq *devfreq);
+int lima_devfreq_suspend(struct lima_devfreq *devfreq);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
index 19829b543024..65fdca366e41 100644
--- a/drivers/gpu/drm/lima/lima_device.c
+++ b/drivers/gpu/drm/lima/lima_device.c
@@ -25,6 +25,8 @@ struct lima_ip_desc {
 
 	int (*init)(struct lima_ip *ip);
 	void (*fini)(struct lima_ip *ip);
+	int (*resume)(struct lima_ip *ip);
+	void (*suspend)(struct lima_ip *ip);
 };
 
 #define LIMA_IP_DESC(ipname, mst0, mst1, off0, off1, func, irq) \
@@ -41,6 +43,8 @@ struct lima_ip_desc {
 		}, \
 		.init = lima_##func##_init, \
 		.fini = lima_##func##_fini, \
+		.resume = lima_##func##_resume, \
+		.suspend = lima_##func##_suspend, \
 	}
 
 static struct lima_ip_desc lima_ip_desc[lima_ip_num] = {
@@ -77,26 +81,10 @@ const char *lima_ip_name(struct lima_ip *ip)
 	return lima_ip_desc[ip->id].name;
 }
 
-static int lima_clk_init(struct lima_device *dev)
+static int lima_clk_enable(struct lima_device *dev)
 {
 	int err;
 
-	dev->clk_bus = devm_clk_get(dev->dev, "bus");
-	if (IS_ERR(dev->clk_bus)) {
-		err = PTR_ERR(dev->clk_bus);
-		if (err != -EPROBE_DEFER)
-			dev_err(dev->dev, "get bus clk failed %d\n", err);
-		return err;
-	}
-
-	dev->clk_gpu = devm_clk_get(dev->dev, "core");
-	if (IS_ERR(dev->clk_gpu)) {
-		err = PTR_ERR(dev->clk_gpu);
-		if (err != -EPROBE_DEFER)
-			dev_err(dev->dev, "get core clk failed %d\n", err);
-		return err;
-	}
-
 	err = clk_prepare_enable(dev->clk_bus);
 	if (err)
 		return err;
@@ -105,15 +93,7 @@ static int lima_clk_init(struct lima_device *dev)
 	if (err)
 		goto error_out0;
 
-	dev->reset = devm_reset_control_array_get_optional_shared(dev->dev);
-
-	if (IS_ERR(dev->reset)) {
-		err = PTR_ERR(dev->reset);
-		if (err != -EPROBE_DEFER)
-			dev_err(dev->dev, "get reset controller failed %d\n",
-				err);
-		goto error_out1;
-	} else if (dev->reset != NULL) {
+	if (dev->reset) {
 		err = reset_control_deassert(dev->reset);
 		if (err) {
 			dev_err(dev->dev,
@@ -131,14 +111,76 @@ error_out0:
 	return err;
 }
 
-static void lima_clk_fini(struct lima_device *dev)
+static void lima_clk_disable(struct lima_device *dev)
 {
-	if (dev->reset != NULL)
+	if (dev->reset)
 		reset_control_assert(dev->reset);
 	clk_disable_unprepare(dev->clk_gpu);
 	clk_disable_unprepare(dev->clk_bus);
 }
 
+static int lima_clk_init(struct lima_device *dev)
+{
+	int err;
+
+	dev->clk_bus = devm_clk_get(dev->dev, "bus");
+	if (IS_ERR(dev->clk_bus)) {
+		err = PTR_ERR(dev->clk_bus);
+		if (err != -EPROBE_DEFER)
+			dev_err(dev->dev, "get bus clk failed %d\n", err);
+		dev->clk_bus = NULL;
+		return err;
+	}
+
+	dev->clk_gpu = devm_clk_get(dev->dev, "core");
+	if (IS_ERR(dev->clk_gpu)) {
+		err = PTR_ERR(dev->clk_gpu);
+		if (err != -EPROBE_DEFER)
+			dev_err(dev->dev, "get core clk failed %d\n", err);
+		dev->clk_gpu = NULL;
+		return err;
+	}
+
+	dev->reset = devm_reset_control_array_get_optional_shared(dev->dev);
+	if (IS_ERR(dev->reset)) {
+		err = PTR_ERR(dev->reset);
+		if (err != -EPROBE_DEFER)
+			dev_err(dev->dev, "get reset controller failed %d\n",
+				err);
+		dev->reset = NULL;
+		return err;
+	}
+
+	return lima_clk_enable(dev);
+}
+
+static void lima_clk_fini(struct lima_device *dev)
+{
+	lima_clk_disable(dev);
+}
+
+static int lima_regulator_enable(struct lima_device *dev)
+{
+	int ret;
+
+	if (!dev->regulator)
+		return 0;
+
+	ret = regulator_enable(dev->regulator);
+	if (ret < 0) {
+		dev_err(dev->dev, "failed to enable regulator: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void lima_regulator_disable(struct lima_device *dev)
+{
+	if (dev->regulator)
+		regulator_disable(dev->regulator);
+}
+
 static int lima_regulator_init(struct lima_device *dev)
 {
 	int ret;
@@ -154,25 +196,20 @@ static int lima_regulator_init(struct lima_device *dev)
 		return ret;
 	}
 
-	ret = regulator_enable(dev->regulator);
-	if (ret < 0) {
-		dev_err(dev->dev, "failed to enable regulator: %d\n", ret);
-		return ret;
-	}
-
-	return 0;
+	return lima_regulator_enable(dev);
 }
 
 static void lima_regulator_fini(struct lima_device *dev)
 {
-	if (dev->regulator)
-		regulator_disable(dev->regulator);
+	lima_regulator_disable(dev);
 }
 
 static int lima_init_ip(struct lima_device *dev, int index)
 {
+	struct platform_device *pdev = to_platform_device(dev->dev);
 	struct lima_ip_desc *desc = lima_ip_desc + index;
 	struct lima_ip *ip = dev->ip + index;
+	const char *irq_name = desc->irq_name;
 	int offset = desc->offset[dev->id];
 	bool must = desc->must_have[dev->id];
 	int err;
@@ -183,8 +220,9 @@ static int lima_init_ip(struct lima_device *dev, int index)
 	ip->dev = dev;
 	ip->id = index;
 	ip->iomem = dev->iomem + offset;
-	if (desc->irq_name) {
-		err = platform_get_irq_byname(dev->pdev, desc->irq_name);
+	if (irq_name) {
+		err = must ? platform_get_irq_byname(pdev, irq_name) :
+			     platform_get_irq_byname_optional(pdev, irq_name);
 		if (err < 0)
 			goto out;
 		ip->irq = err;
@@ -209,11 +247,34 @@ static void lima_fini_ip(struct lima_device *ldev, int index)
 		desc->fini(ip);
 }
 
+static int lima_resume_ip(struct lima_device *ldev, int index)
+{
+	struct lima_ip_desc *desc = lima_ip_desc + index;
+	struct lima_ip *ip = ldev->ip + index;
+	int ret = 0;
+
+	if (ip->present)
+		ret = desc->resume(ip);
+
+	return ret;
+}
+
+static void lima_suspend_ip(struct lima_device *ldev, int index)
+{
+	struct lima_ip_desc *desc = lima_ip_desc + index;
+	struct lima_ip *ip = ldev->ip + index;
+
+	if (ip->present)
+		desc->suspend(ip);
+}
+
 static int lima_init_gp_pipe(struct lima_device *dev)
 {
 	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
 	int err;
 
+	pipe->ldev = dev;
+
 	err = lima_sched_pipe_init(pipe, "gp");
 	if (err)
 		return err;
@@ -244,6 +305,8 @@ static int lima_init_pp_pipe(struct lima_device *dev)
 	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
 	int err, i;
 
+	pipe->ldev = dev;
+
 	err = lima_sched_pipe_init(pipe, "pp");
 	if (err)
 		return err;
@@ -290,8 +353,8 @@ static void lima_fini_pp_pipe(struct lima_device *dev)
 
 int lima_device_init(struct lima_device *ldev)
 {
+	struct platform_device *pdev = to_platform_device(ldev->dev);
 	int err, i;
-	struct resource *res;
 
 	dma_set_coherent_mask(ldev->dev, DMA_BIT_MASK(32));
 
@@ -322,8 +385,7 @@ int lima_device_init(struct lima_device *ldev)
 	} else
 		ldev->va_end = LIMA_VA_RESERVE_END;
 
-	res = platform_get_resource(ldev->pdev, IORESOURCE_MEM, 0);
-	ldev->iomem = devm_ioremap_resource(ldev->dev, res);
+	ldev->iomem = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(ldev->iomem)) {
 		dev_err(ldev->dev, "fail to ioremap iomem\n");
 		err = PTR_ERR(ldev->iomem);
@@ -344,6 +406,12 @@ int lima_device_init(struct lima_device *ldev)
 	if (err)
 		goto err_out5;
 
+	ldev->dump.magic = LIMA_DUMP_MAGIC;
+	ldev->dump.version_major = LIMA_DUMP_MAJOR;
+	ldev->dump.version_minor = LIMA_DUMP_MINOR;
+	INIT_LIST_HEAD(&ldev->error_task_list);
+	mutex_init(&ldev->error_task_list_lock);
+
 	dev_info(ldev->dev, "bus rate = %lu\n", clk_get_rate(ldev->clk_bus));
 	dev_info(ldev->dev, "mod rate = %lu", clk_get_rate(ldev->clk_gpu));
 
@@ -370,6 +438,13 @@ err_out0:
 void lima_device_fini(struct lima_device *ldev)
 {
 	int i;
+	struct lima_sched_error_task *et, *tmp;
+
+	list_for_each_entry_safe(et, tmp, &ldev->error_task_list, list) {
+		list_del(&et->list);
+		kvfree(et);
+	}
+	mutex_destroy(&ldev->error_task_list_lock);
 
 	lima_fini_pp_pipe(ldev);
 	lima_fini_gp_pipe(ldev);
@@ -387,3 +462,72 @@ void lima_device_fini(struct lima_device *ldev)
 
 	lima_clk_fini(ldev);
 }
+
+int lima_device_resume(struct device *dev)
+{
+	struct lima_device *ldev = dev_get_drvdata(dev);
+	int i, err;
+
+	err = lima_clk_enable(ldev);
+	if (err) {
+		dev_err(dev, "resume clk fail %d\n", err);
+		return err;
+	}
+
+	err = lima_regulator_enable(ldev);
+	if (err) {
+		dev_err(dev, "resume regulator fail %d\n", err);
+		goto err_out0;
+	}
+
+	for (i = 0; i < lima_ip_num; i++) {
+		err = lima_resume_ip(ldev, i);
+		if (err) {
+			dev_err(dev, "resume ip %d fail\n", i);
+			goto err_out1;
+		}
+	}
+
+	err = lima_devfreq_resume(&ldev->devfreq);
+	if (err) {
+		dev_err(dev, "devfreq resume fail\n");
+		goto err_out1;
+	}
+
+	return 0;
+
+err_out1:
+	while (--i >= 0)
+		lima_suspend_ip(ldev, i);
+	lima_regulator_disable(ldev);
+err_out0:
+	lima_clk_disable(ldev);
+	return err;
+}
+
+int lima_device_suspend(struct device *dev)
+{
+	struct lima_device *ldev = dev_get_drvdata(dev);
+	int i, err;
+
+	/* check any task running */
+	for (i = 0; i < lima_pipe_num; i++) {
+		if (atomic_read(&ldev->pipe[i].base.hw_rq_count))
+			return -EBUSY;
+	}
+
+	err = lima_devfreq_suspend(&ldev->devfreq);
+	if (err) {
+		dev_err(dev, "devfreq suspend fail\n");
+		return err;
+	}
+
+	for (i = lima_ip_num - 1; i >= 0; i--)
+		lima_suspend_ip(ldev, i);
+
+	lima_regulator_disable(ldev);
+
+	lima_clk_disable(ldev);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h
index 31158d86271c..41b9d7b4bcc7 100644
--- a/drivers/gpu/drm/lima/lima_device.h
+++ b/drivers/gpu/drm/lima/lima_device.h
@@ -6,8 +6,12 @@
 
 #include <drm/drm_device.h>
 #include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
 
 #include "lima_sched.h"
+#include "lima_dump.h"
+#include "lima_devfreq.h"
 
 enum lima_gpu_id {
 	lima_gpu_mali400 = 0,
@@ -60,6 +64,8 @@ struct lima_ip {
 		bool async_reset;
 		/* l2 cache */
 		spinlock_t lock;
+		/* pmu/bcast */
+		u32 mask;
 	} data;
 };
 
@@ -72,7 +78,6 @@ enum lima_pipe_id {
 struct lima_device {
 	struct device *dev;
 	struct drm_device *ddev;
-	struct platform_device *pdev;
 
 	enum lima_gpu_id id;
 	u32 gp_version;
@@ -94,6 +99,13 @@ struct lima_device {
 
 	u32 *dlbu_cpu;
 	dma_addr_t dlbu_dma;
+
+	struct lima_devfreq devfreq;
+
+	/* debug info */
+	struct lima_dump_head dump;
+	struct list_head error_task_list;
+	struct mutex error_task_list_lock;
 };
 
 static inline struct lima_device *
@@ -128,4 +140,7 @@ static inline int lima_poll_timeout(struct lima_ip *ip, lima_poll_func_t func,
 	return 0;
 }
 
+int lima_device_suspend(struct device *dev);
+int lima_device_resume(struct device *dev);
+
 #endif
diff --git a/drivers/gpu/drm/lima/lima_dlbu.c b/drivers/gpu/drm/lima/lima_dlbu.c
index 8399ceffb94b..c1d5ea35daa7 100644
--- a/drivers/gpu/drm/lima/lima_dlbu.c
+++ b/drivers/gpu/drm/lima/lima_dlbu.c
@@ -42,7 +42,7 @@ void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg)
 	dlbu_write(LIMA_DLBU_START_TILE_POS, reg[3]);
 }
 
-int lima_dlbu_init(struct lima_ip *ip)
+static int lima_dlbu_hw_init(struct lima_ip *ip)
 {
 	struct lima_device *dev = ip->dev;
 
@@ -52,6 +52,21 @@ int lima_dlbu_init(struct lima_ip *ip)
 	return 0;
 }
 
+int lima_dlbu_resume(struct lima_ip *ip)
+{
+	return lima_dlbu_hw_init(ip);
+}
+
+void lima_dlbu_suspend(struct lima_ip *ip)
+{
+
+}
+
+int lima_dlbu_init(struct lima_ip *ip)
+{
+	return lima_dlbu_hw_init(ip);
+}
+
 void lima_dlbu_fini(struct lima_ip *ip)
 {
 
diff --git a/drivers/gpu/drm/lima/lima_dlbu.h b/drivers/gpu/drm/lima/lima_dlbu.h
index 16f877984466..be71daaaee89 100644
--- a/drivers/gpu/drm/lima/lima_dlbu.h
+++ b/drivers/gpu/drm/lima/lima_dlbu.h
@@ -12,6 +12,8 @@ void lima_dlbu_disable(struct lima_device *dev);
 
 void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg);
 
+int lima_dlbu_resume(struct lima_ip *ip);
+void lima_dlbu_suspend(struct lima_ip *ip);
 int lima_dlbu_init(struct lima_ip *ip);
 void lima_dlbu_fini(struct lima_ip *ip);
 
diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
index 2daac64d8955..a831565af813 100644
--- a/drivers/gpu/drm/lima/lima_drv.c
+++ b/drivers/gpu/drm/lima/lima_drv.c
@@ -5,17 +5,20 @@
 #include <linux/of_platform.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <linux/pm_runtime.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_prime.h>
 #include <drm/lima_drm.h>
 
+#include "lima_device.h"
 #include "lima_drv.h"
 #include "lima_gem.h"
 #include "lima_vm.h"
 
 int lima_sched_timeout_ms;
 uint lima_heap_init_nr_pages = 8;
+uint lima_max_error_tasks;
 
 MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms");
 module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
@@ -23,6 +26,9 @@ module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
 MODULE_PARM_DESC(heap_init_nr_pages, "heap buffer init number of pages");
 module_param_named(heap_init_nr_pages, lima_heap_init_nr_pages, uint, 0444);
 
+MODULE_PARM_DESC(max_error_tasks, "max number of error tasks to save");
+module_param_named(max_error_tasks, lima_max_error_tasks, uint, 0644);
+
 static int lima_ioctl_get_param(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct drm_lima_get_param *args = data;
@@ -272,6 +278,93 @@ static struct drm_driver lima_drm_driver = {
 	.gem_prime_mmap = drm_gem_prime_mmap,
 };
 
+struct lima_block_reader {
+	void *dst;
+	size_t base;
+	size_t count;
+	size_t off;
+	ssize_t read;
+};
+
+static bool lima_read_block(struct lima_block_reader *reader,
+			    void *src, size_t src_size)
+{
+	size_t max_off = reader->base + src_size;
+
+	if (reader->off < max_off) {
+		size_t size = min_t(size_t, max_off - reader->off,
+				    reader->count);
+
+		memcpy(reader->dst, src + (reader->off - reader->base), size);
+
+		reader->dst += size;
+		reader->off += size;
+		reader->read += size;
+		reader->count -= size;
+	}
+
+	reader->base = max_off;
+
+	return !!reader->count;
+}
+
+static ssize_t lima_error_state_read(struct file *filp, struct kobject *kobj,
+				     struct bin_attribute *attr, char *buf,
+				     loff_t off, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct lima_device *ldev = dev_get_drvdata(dev);
+	struct lima_sched_error_task *et;
+	struct lima_block_reader reader = {
+		.dst = buf,
+		.count = count,
+		.off = off,
+	};
+
+	mutex_lock(&ldev->error_task_list_lock);
+
+	if (lima_read_block(&reader, &ldev->dump, sizeof(ldev->dump))) {
+		list_for_each_entry(et, &ldev->error_task_list, list) {
+			if (!lima_read_block(&reader, et->data, et->size))
+				break;
+		}
+	}
+
+	mutex_unlock(&ldev->error_task_list_lock);
+	return reader.read;
+}
+
+static ssize_t lima_error_state_write(struct file *file, struct kobject *kobj,
+				      struct bin_attribute *attr, char *buf,
+				      loff_t off, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct lima_device *ldev = dev_get_drvdata(dev);
+	struct lima_sched_error_task *et, *tmp;
+
+	mutex_lock(&ldev->error_task_list_lock);
+
+	list_for_each_entry_safe(et, tmp, &ldev->error_task_list, list) {
+		list_del(&et->list);
+		kvfree(et);
+	}
+
+	ldev->dump.size = 0;
+	ldev->dump.num_tasks = 0;
+
+	mutex_unlock(&ldev->error_task_list_lock);
+
+	return count;
+}
+
+static const struct bin_attribute lima_error_state_attr = {
+	.attr.name = "error",
+	.attr.mode = 0600,
+	.size = 0,
+	.read = lima_error_state_read,
+	.write = lima_error_state_write,
+};
+
 static int lima_pdev_probe(struct platform_device *pdev)
 {
 	struct lima_device *ldev;
@@ -288,7 +381,6 @@ static int lima_pdev_probe(struct platform_device *pdev)
 		goto err_out0;
 	}
 
-	ldev->pdev = pdev;
 	ldev->dev = &pdev->dev;
 	ldev->id = (enum lima_gpu_id)of_device_get_match_data(&pdev->dev);
 
@@ -306,16 +398,34 @@ static int lima_pdev_probe(struct platform_device *pdev)
 	if (err)
 		goto err_out1;
 
+	err = lima_devfreq_init(ldev);
+	if (err) {
+		dev_err(&pdev->dev, "Fatal error during devfreq init\n");
+		goto err_out2;
+	}
+
+	pm_runtime_set_active(ldev->dev);
+	pm_runtime_mark_last_busy(ldev->dev);
+	pm_runtime_set_autosuspend_delay(ldev->dev, 200);
+	pm_runtime_use_autosuspend(ldev->dev);
+	pm_runtime_enable(ldev->dev);
+
 	/*
 	 * Register the DRM device with the core and the connectors with
 	 * sysfs.
 	 */
 	err = drm_dev_register(ddev, 0);
 	if (err < 0)
-		goto err_out2;
+		goto err_out3;
+
+	if (sysfs_create_bin_file(&ldev->dev->kobj, &lima_error_state_attr))
+		dev_warn(ldev->dev, "fail to create error state sysfs\n");
 
 	return 0;
 
+err_out3:
+	pm_runtime_disable(ldev->dev);
+	lima_devfreq_fini(ldev);
 err_out2:
 	lima_device_fini(ldev);
 err_out1:
@@ -330,8 +440,17 @@ static int lima_pdev_remove(struct platform_device *pdev)
 	struct lima_device *ldev = platform_get_drvdata(pdev);
 	struct drm_device *ddev = ldev->ddev;
 
+	sysfs_remove_bin_file(&ldev->dev->kobj, &lima_error_state_attr);
+
 	drm_dev_unregister(ddev);
+
+	/* stop autosuspend to make sure device is in active state */
+	pm_runtime_set_autosuspend_delay(ldev->dev, -1);
+	pm_runtime_disable(ldev->dev);
+
+	lima_devfreq_fini(ldev);
 	lima_device_fini(ldev);
+
 	drm_dev_put(ddev);
 	lima_sched_slab_fini();
 	return 0;
@@ -344,26 +463,22 @@ static const struct of_device_id dt_match[] = {
 };
 MODULE_DEVICE_TABLE(of, dt_match);
 
+static const struct dev_pm_ops lima_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+	SET_RUNTIME_PM_OPS(lima_device_suspend, lima_device_resume, NULL)
+};
+
 static struct platform_driver lima_platform_driver = {
 	.probe      = lima_pdev_probe,
 	.remove     = lima_pdev_remove,
 	.driver     = {
 		.name   = "lima",
+		.pm	= &lima_pm_ops,
 		.of_match_table = dt_match,
 	},
 };
 
-static int __init lima_init(void)
-{
-	return platform_driver_register(&lima_platform_driver);
-}
-module_init(lima_init);
-
-static void __exit lima_exit(void)
-{
-	platform_driver_unregister(&lima_platform_driver);
-}
-module_exit(lima_exit);
+module_platform_driver(lima_platform_driver);
 
 MODULE_AUTHOR("Lima Project Developers");
 MODULE_DESCRIPTION("Lima DRM Driver");
diff --git a/drivers/gpu/drm/lima/lima_drv.h b/drivers/gpu/drm/lima/lima_drv.h
index f492ecc6a5d9..fdbd4077c768 100644
--- a/drivers/gpu/drm/lima/lima_drv.h
+++ b/drivers/gpu/drm/lima/lima_drv.h
@@ -10,6 +10,7 @@
 
 extern int lima_sched_timeout_ms;
 extern uint lima_heap_init_nr_pages;
+extern uint lima_max_error_tasks;
 
 struct lima_vm;
 struct lima_bo;
diff --git a/drivers/gpu/drm/lima/lima_dump.h b/drivers/gpu/drm/lima/lima_dump.h
new file mode 100644
index 000000000000..ca243d99c51b
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_dump.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2020 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DUMP_H__
+#define __LIMA_DUMP_H__
+
+#include <linux/types.h>
+
+/**
+ * dump file format for all the information to start a lima task
+ *
+ * top level format
+ * | magic code "LIMA" | format version | num tasks | data size |
+ * | reserved | reserved | reserved | reserved |
+ * | task 1 ID | task 1 size | num chunks | reserved | task 1 data |
+ * | task 2 ID | task 2 size | num chunks | reserved | task 2 data |
+ * ...
+ *
+ * task data format
+ * | chunk 1 ID | chunk 1 size | reserved | reserved | chunk 1 data |
+ * | chunk 2 ID | chunk 2 size | reserved | reserved | chunk 2 data |
+ * ...
+ *
+ */
+
+#define LIMA_DUMP_MAJOR 1
+#define LIMA_DUMP_MINOR 0
+
+#define LIMA_DUMP_MAGIC 0x414d494c
+
+struct lima_dump_head {
+	__u32 magic;
+	__u16 version_major;
+	__u16 version_minor;
+	__u32 num_tasks;
+	__u32 size;
+	__u32 reserved[4];
+};
+
+#define LIMA_DUMP_TASK_GP   0
+#define LIMA_DUMP_TASK_PP   1
+#define LIMA_DUMP_TASK_NUM  2
+
+struct lima_dump_task {
+	__u32 id;
+	__u32 size;
+	__u32 num_chunks;
+	__u32 reserved;
+};
+
+#define LIMA_DUMP_CHUNK_FRAME         0
+#define LIMA_DUMP_CHUNK_BUFFER        1
+#define LIMA_DUMP_CHUNK_PROCESS_NAME  2
+#define LIMA_DUMP_CHUNK_PROCESS_ID    3
+#define LIMA_DUMP_CHUNK_NUM           4
+
+struct lima_dump_chunk {
+	__u32 id;
+	__u32 size;
+	__u32 reserved[2];
+};
+
+struct lima_dump_chunk_buffer {
+	__u32 id;
+	__u32 size;
+	__u32 va;
+	__u32 reserved;
+};
+
+struct lima_dump_chunk_pid {
+	__u32 id;
+	__u32 size;
+	__u32 pid;
+	__u32 reserved;
+};
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_gp.c b/drivers/gpu/drm/lima/lima_gp.c
index d8841c870d90..8dd501b7a3d0 100644
--- a/drivers/gpu/drm/lima/lima_gp.c
+++ b/drivers/gpu/drm/lima/lima_gp.c
@@ -274,6 +274,23 @@ static void lima_gp_print_version(struct lima_ip *ip)
 static struct kmem_cache *lima_gp_task_slab;
 static int lima_gp_task_slab_refcnt;
 
+static int lima_gp_hw_init(struct lima_ip *ip)
+{
+	ip->data.async_reset = false;
+	lima_gp_soft_reset_async(ip);
+	return lima_gp_soft_reset_async_wait(ip);
+}
+
+int lima_gp_resume(struct lima_ip *ip)
+{
+	return lima_gp_hw_init(ip);
+}
+
+void lima_gp_suspend(struct lima_ip *ip)
+{
+
+}
+
 int lima_gp_init(struct lima_ip *ip)
 {
 	struct lima_device *dev = ip->dev;
@@ -281,9 +298,7 @@ int lima_gp_init(struct lima_ip *ip)
 
 	lima_gp_print_version(ip);
 
-	ip->data.async_reset = false;
-	lima_gp_soft_reset_async(ip);
-	err = lima_gp_soft_reset_async_wait(ip);
+	err = lima_gp_hw_init(ip);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/lima/lima_gp.h b/drivers/gpu/drm/lima/lima_gp.h
index 516e5c1babbb..02ec9af78a51 100644
--- a/drivers/gpu/drm/lima/lima_gp.h
+++ b/drivers/gpu/drm/lima/lima_gp.h
@@ -7,6 +7,8 @@
 struct lima_ip;
 struct lima_device;
 
+int lima_gp_resume(struct lima_ip *ip);
+void lima_gp_suspend(struct lima_ip *ip);
 int lima_gp_init(struct lima_ip *ip);
 void lima_gp_fini(struct lima_ip *ip);
 
diff --git a/drivers/gpu/drm/lima/lima_l2_cache.c b/drivers/gpu/drm/lima/lima_l2_cache.c
index 6873a7af5a5c..c4080a02957b 100644
--- a/drivers/gpu/drm/lima/lima_l2_cache.c
+++ b/drivers/gpu/drm/lima/lima_l2_cache.c
@@ -38,9 +38,35 @@ int lima_l2_cache_flush(struct lima_ip *ip)
 	return ret;
 }
 
+static int lima_l2_cache_hw_init(struct lima_ip *ip)
+{
+	int err;
+
+	err = lima_l2_cache_flush(ip);
+	if (err)
+		return err;
+
+	l2_cache_write(LIMA_L2_CACHE_ENABLE,
+		       LIMA_L2_CACHE_ENABLE_ACCESS |
+		       LIMA_L2_CACHE_ENABLE_READ_ALLOCATE);
+	l2_cache_write(LIMA_L2_CACHE_MAX_READS, 0x1c);
+
+	return 0;
+}
+
+int lima_l2_cache_resume(struct lima_ip *ip)
+{
+	return lima_l2_cache_hw_init(ip);
+}
+
+void lima_l2_cache_suspend(struct lima_ip *ip)
+{
+
+}
+
 int lima_l2_cache_init(struct lima_ip *ip)
 {
-	int i, err;
+	int i;
 	u32 size;
 	struct lima_device *dev = ip->dev;
 
@@ -63,15 +89,7 @@ int lima_l2_cache_init(struct lima_ip *ip)
 		 1 << (size & 0xff),
 		 1 << ((size >> 24) & 0xff));
 
-	err = lima_l2_cache_flush(ip);
-	if (err)
-		return err;
-
-	l2_cache_write(LIMA_L2_CACHE_ENABLE,
-		       LIMA_L2_CACHE_ENABLE_ACCESS|LIMA_L2_CACHE_ENABLE_READ_ALLOCATE);
-	l2_cache_write(LIMA_L2_CACHE_MAX_READS, 0x1c);
-
-	return 0;
+	return lima_l2_cache_hw_init(ip);
 }
 
 void lima_l2_cache_fini(struct lima_ip *ip)
diff --git a/drivers/gpu/drm/lima/lima_l2_cache.h b/drivers/gpu/drm/lima/lima_l2_cache.h
index c63fb676ff14..1aeeefd53fb9 100644
--- a/drivers/gpu/drm/lima/lima_l2_cache.h
+++ b/drivers/gpu/drm/lima/lima_l2_cache.h
@@ -6,6 +6,8 @@
 
 struct lima_ip;
 
+int lima_l2_cache_resume(struct lima_ip *ip);
+void lima_l2_cache_suspend(struct lima_ip *ip);
 int lima_l2_cache_init(struct lima_ip *ip);
 void lima_l2_cache_fini(struct lima_ip *ip);
 
diff --git a/drivers/gpu/drm/lima/lima_mmu.c b/drivers/gpu/drm/lima/lima_mmu.c
index f79d2af427e7..a1ae6c252dc2 100644
--- a/drivers/gpu/drm/lima/lima_mmu.c
+++ b/drivers/gpu/drm/lima/lima_mmu.c
@@ -59,12 +59,44 @@ static irqreturn_t lima_mmu_irq_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-int lima_mmu_init(struct lima_ip *ip)
+static int lima_mmu_hw_init(struct lima_ip *ip)
 {
 	struct lima_device *dev = ip->dev;
 	int err;
 	u32 v;
 
+	mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_HARD_RESET);
+	err = lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
+				    LIMA_MMU_DTE_ADDR, v, v == 0);
+	if (err)
+		return err;
+
+	mmu_write(LIMA_MMU_INT_MASK,
+		  LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
+	mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
+	return lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
+				     LIMA_MMU_STATUS, v,
+				     v & LIMA_MMU_STATUS_PAGING_ENABLED);
+}
+
+int lima_mmu_resume(struct lima_ip *ip)
+{
+	if (ip->id == lima_ip_ppmmu_bcast)
+		return 0;
+
+	return lima_mmu_hw_init(ip);
+}
+
+void lima_mmu_suspend(struct lima_ip *ip)
+{
+
+}
+
+int lima_mmu_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+
 	if (ip->id == lima_ip_ppmmu_bcast)
 		return 0;
 
@@ -74,12 +106,6 @@ int lima_mmu_init(struct lima_ip *ip)
 		return -EIO;
 	}
 
-	mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_HARD_RESET);
-	err = lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
-				    LIMA_MMU_DTE_ADDR, v, v == 0);
-	if (err)
-		return err;
-
 	err = devm_request_irq(dev->dev, ip->irq, lima_mmu_irq_handler,
 			       IRQF_SHARED, lima_ip_name(ip), ip);
 	if (err) {
@@ -87,11 +113,7 @@ int lima_mmu_init(struct lima_ip *ip)
 		return err;
 	}
 
-	mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
-	mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
-	return lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
-				     LIMA_MMU_STATUS, v,
-				     v & LIMA_MMU_STATUS_PAGING_ENABLED);
+	return lima_mmu_hw_init(ip);
 }
 
 void lima_mmu_fini(struct lima_ip *ip)
@@ -113,8 +135,7 @@ void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm)
 			      LIMA_MMU_STATUS, v,
 			      v & LIMA_MMU_STATUS_STALL_ACTIVE);
 
-	if (vm)
-		mmu_write(LIMA_MMU_DTE_ADDR, vm->pd.dma);
+	mmu_write(LIMA_MMU_DTE_ADDR, vm->pd.dma);
 
 	/* flush the TLB */
 	mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_ZAP_CACHE);
diff --git a/drivers/gpu/drm/lima/lima_mmu.h b/drivers/gpu/drm/lima/lima_mmu.h
index 4f8ccbebcba1..f0c97ac75ea0 100644
--- a/drivers/gpu/drm/lima/lima_mmu.h
+++ b/drivers/gpu/drm/lima/lima_mmu.h
@@ -7,6 +7,8 @@
 struct lima_ip;
 struct lima_vm;
 
+int lima_mmu_resume(struct lima_ip *ip);
+void lima_mmu_suspend(struct lima_ip *ip);
 int lima_mmu_init(struct lima_ip *ip);
 void lima_mmu_fini(struct lima_ip *ip);
 
diff --git a/drivers/gpu/drm/lima/lima_pmu.c b/drivers/gpu/drm/lima/lima_pmu.c
index 571f6d661581..e397e1146e96 100644
--- a/drivers/gpu/drm/lima/lima_pmu.c
+++ b/drivers/gpu/drm/lima/lima_pmu.c
@@ -21,7 +21,7 @@ static int lima_pmu_wait_cmd(struct lima_ip *ip)
 				 v, v & LIMA_PMU_INT_CMD_MASK,
 				 100, 100000);
 	if (err) {
-		dev_err(dev->dev, "timeout wait pmd cmd\n");
+		dev_err(dev->dev, "timeout wait pmu cmd\n");
 		return err;
 	}
 
@@ -29,7 +29,41 @@ static int lima_pmu_wait_cmd(struct lima_ip *ip)
 	return 0;
 }
 
-int lima_pmu_init(struct lima_ip *ip)
+static u32 lima_pmu_get_ip_mask(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	u32 ret = 0;
+	int i;
+
+	ret |= LIMA_PMU_POWER_GP0_MASK;
+
+	if (dev->id == lima_gpu_mali400) {
+		ret |= LIMA_PMU_POWER_L2_MASK;
+		for (i = 0; i < 4; i++) {
+			if (dev->ip[lima_ip_pp0 + i].present)
+				ret |= LIMA_PMU_POWER_PP_MASK(i);
+		}
+	} else {
+		if (dev->ip[lima_ip_pp0].present)
+			ret |= LIMA450_PMU_POWER_PP0_MASK;
+		for (i = lima_ip_pp1; i <= lima_ip_pp3; i++) {
+			if (dev->ip[i].present) {
+				ret |= LIMA450_PMU_POWER_PP13_MASK;
+				break;
+			}
+		}
+		for (i = lima_ip_pp4; i <= lima_ip_pp7; i++) {
+			if (dev->ip[i].present) {
+				ret |= LIMA450_PMU_POWER_PP47_MASK;
+				break;
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int lima_pmu_hw_init(struct lima_ip *ip)
 {
 	int err;
 	u32 stat;
@@ -54,7 +88,44 @@ int lima_pmu_init(struct lima_ip *ip)
 	return 0;
 }
 
-void lima_pmu_fini(struct lima_ip *ip)
+static void lima_pmu_hw_fini(struct lima_ip *ip)
 {
+	u32 stat;
+
+	if (!ip->data.mask)
+		ip->data.mask = lima_pmu_get_ip_mask(ip);
 
+	stat = ~pmu_read(LIMA_PMU_STATUS) & ip->data.mask;
+	if (stat) {
+		pmu_write(LIMA_PMU_POWER_DOWN, stat);
+
+		/* Don't wait for interrupt on Mali400 if all domains are
+		 * powered off because the HW won't generate an interrupt
+		 * in this case.
+		 */
+		if (ip->dev->id == lima_gpu_mali400)
+			pmu_write(LIMA_PMU_INT_CLEAR, LIMA_PMU_INT_CMD_MASK);
+		else
+			lima_pmu_wait_cmd(ip);
+	}
+}
+
+int lima_pmu_resume(struct lima_ip *ip)
+{
+	return lima_pmu_hw_init(ip);
+}
+
+void lima_pmu_suspend(struct lima_ip *ip)
+{
+	lima_pmu_hw_fini(ip);
+}
+
+int lima_pmu_init(struct lima_ip *ip)
+{
+	return lima_pmu_hw_init(ip);
+}
+
+void lima_pmu_fini(struct lima_ip *ip)
+{
+	lima_pmu_hw_fini(ip);
 }
diff --git a/drivers/gpu/drm/lima/lima_pmu.h b/drivers/gpu/drm/lima/lima_pmu.h
index a2a18775eb07..652dc7af3047 100644
--- a/drivers/gpu/drm/lima/lima_pmu.h
+++ b/drivers/gpu/drm/lima/lima_pmu.h
@@ -6,6 +6,8 @@
 
 struct lima_ip;
 
+int lima_pmu_resume(struct lima_ip *ip);
+void lima_pmu_suspend(struct lima_ip *ip);
 int lima_pmu_init(struct lima_ip *ip);
 void lima_pmu_fini(struct lima_ip *ip);
 
diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
index 8fef224b93c8..33f01383409c 100644
--- a/drivers/gpu/drm/lima/lima_pp.c
+++ b/drivers/gpu/drm/lima/lima_pp.c
@@ -223,6 +223,23 @@ static void lima_pp_print_version(struct lima_ip *ip)
 		 lima_ip_name(ip), name, major, minor);
 }
 
+static int lima_pp_hw_init(struct lima_ip *ip)
+{
+	ip->data.async_reset = false;
+	lima_pp_soft_reset_async(ip);
+	return lima_pp_soft_reset_async_wait(ip);
+}
+
+int lima_pp_resume(struct lima_ip *ip)
+{
+	return lima_pp_hw_init(ip);
+}
+
+void lima_pp_suspend(struct lima_ip *ip)
+{
+
+}
+
 int lima_pp_init(struct lima_ip *ip)
 {
 	struct lima_device *dev = ip->dev;
@@ -230,9 +247,7 @@ int lima_pp_init(struct lima_ip *ip)
 
 	lima_pp_print_version(ip);
 
-	ip->data.async_reset = false;
-	lima_pp_soft_reset_async(ip);
-	err = lima_pp_soft_reset_async_wait(ip);
+	err = lima_pp_hw_init(ip);
 	if (err)
 		return err;
 
@@ -254,6 +269,16 @@ void lima_pp_fini(struct lima_ip *ip)
 
 }
 
+int lima_pp_bcast_resume(struct lima_ip *ip)
+{
+	return 0;
+}
+
+void lima_pp_bcast_suspend(struct lima_ip *ip)
+{
+
+}
+
 int lima_pp_bcast_init(struct lima_ip *ip)
 {
 	struct lima_device *dev = ip->dev;
diff --git a/drivers/gpu/drm/lima/lima_pp.h b/drivers/gpu/drm/lima/lima_pp.h
index bf60c77b2633..16ec96de15a9 100644
--- a/drivers/gpu/drm/lima/lima_pp.h
+++ b/drivers/gpu/drm/lima/lima_pp.h
@@ -7,9 +7,13 @@
 struct lima_ip;
 struct lima_device;
 
+int lima_pp_resume(struct lima_ip *ip);
+void lima_pp_suspend(struct lima_ip *ip);
 int lima_pp_init(struct lima_ip *ip);
 void lima_pp_fini(struct lima_ip *ip);
 
+int lima_pp_bcast_resume(struct lima_ip *ip);
+void lima_pp_bcast_suspend(struct lima_ip *ip);
 int lima_pp_bcast_init(struct lima_ip *ip);
 void lima_pp_bcast_fini(struct lima_ip *ip);
 
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index 3886999b4533..e6cefda00279 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -3,14 +3,17 @@
 
 #include <linux/kthread.h>
 #include <linux/slab.h>
-#include <linux/xarray.h>
+#include <linux/vmalloc.h>
+#include <linux/pm_runtime.h>
 
+#include "lima_devfreq.h"
 #include "lima_drv.h"
 #include "lima_sched.h"
 #include "lima_vm.h"
 #include "lima_mmu.h"
 #include "lima_l2_cache.h"
 #include "lima_gem.h"
+#include "lima_trace.h"
 
 struct lima_fence {
 	struct dma_fence base;
@@ -176,6 +179,7 @@ struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *conte
 {
 	struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
 
+	trace_lima_task_submit(task);
 	drm_sched_entity_push_job(&task->base, &context->base);
 	return fence;
 }
@@ -191,14 +195,36 @@ static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
 	return NULL;
 }
 
+static int lima_pm_busy(struct lima_device *ldev)
+{
+	int ret;
+
+	/* resume GPU if it has been suspended by runtime PM */
+	ret = pm_runtime_get_sync(ldev->dev);
+	if (ret < 0)
+		return ret;
+
+	lima_devfreq_record_busy(&ldev->devfreq);
+	return 0;
+}
+
+static void lima_pm_idle(struct lima_device *ldev)
+{
+	lima_devfreq_record_idle(&ldev->devfreq);
+
+	/* GPU can do auto runtime suspend */
+	pm_runtime_mark_last_busy(ldev->dev);
+	pm_runtime_put_autosuspend(ldev->dev);
+}
+
 static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
 {
 	struct lima_sched_task *task = to_lima_task(job);
 	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+	struct lima_device *ldev = pipe->ldev;
 	struct lima_fence *fence;
 	struct dma_fence *ret;
-	struct lima_vm *vm = NULL, *last_vm = NULL;
-	int i;
+	int i, err;
 
 	/* after GPU reset */
 	if (job->s_fence->finished.error < 0)
@@ -207,6 +233,13 @@ static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
 	fence = lima_fence_create(pipe);
 	if (!fence)
 		return NULL;
+
+	err = lima_pm_busy(ldev);
+	if (err < 0) {
+		dma_fence_put(&fence->base);
+		return NULL;
+	}
+
 	task->fence = &fence->base;
 
 	/* for caller usage of the fence, otherwise irq handler
@@ -234,21 +267,17 @@ static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
 	for (i = 0; i < pipe->num_l2_cache; i++)
 		lima_l2_cache_flush(pipe->l2_cache[i]);
 
-	if (task->vm != pipe->current_vm) {
-		vm = lima_vm_get(task->vm);
-		last_vm = pipe->current_vm;
-		pipe->current_vm = task->vm;
-	}
+	lima_vm_put(pipe->current_vm);
+	pipe->current_vm = lima_vm_get(task->vm);
 
 	if (pipe->bcast_mmu)
-		lima_mmu_switch_vm(pipe->bcast_mmu, vm);
+		lima_mmu_switch_vm(pipe->bcast_mmu, pipe->current_vm);
 	else {
 		for (i = 0; i < pipe->num_mmu; i++)
-			lima_mmu_switch_vm(pipe->mmu[i], vm);
+			lima_mmu_switch_vm(pipe->mmu[i], pipe->current_vm);
 	}
 
-	if (last_vm)
-		lima_vm_put(last_vm);
+	trace_lima_task_run(task);
 
 	pipe->error = false;
 	pipe->task_run(pipe, task);
@@ -256,10 +285,139 @@ static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
 	return task->fence;
 }
 
+static void lima_sched_build_error_task_list(struct lima_sched_task *task)
+{
+	struct lima_sched_error_task *et;
+	struct lima_sched_pipe *pipe = to_lima_pipe(task->base.sched);
+	struct lima_ip *ip = pipe->processor[0];
+	int pipe_id = ip->id == lima_ip_gp ? lima_pipe_gp : lima_pipe_pp;
+	struct lima_device *dev = ip->dev;
+	struct lima_sched_context *sched_ctx =
+		container_of(task->base.entity,
+			     struct lima_sched_context, base);
+	struct lima_ctx *ctx =
+		container_of(sched_ctx, struct lima_ctx, context[pipe_id]);
+	struct lima_dump_task *dt;
+	struct lima_dump_chunk *chunk;
+	struct lima_dump_chunk_pid *pid_chunk;
+	struct lima_dump_chunk_buffer *buffer_chunk;
+	u32 size, task_size, mem_size;
+	int i;
+
+	mutex_lock(&dev->error_task_list_lock);
+
+	if (dev->dump.num_tasks >= lima_max_error_tasks) {
+		dev_info(dev->dev, "fail to save task state from %s pid %d: "
+			 "error task list is full\n", ctx->pname, ctx->pid);
+		goto out;
+	}
+
+	/* frame chunk */
+	size = sizeof(struct lima_dump_chunk) + pipe->frame_size;
+	/* process name chunk */
+	size += sizeof(struct lima_dump_chunk) + sizeof(ctx->pname);
+	/* pid chunk */
+	size += sizeof(struct lima_dump_chunk);
+	/* buffer chunks */
+	for (i = 0; i < task->num_bos; i++) {
+		struct lima_bo *bo = task->bos[i];
+
+		size += sizeof(struct lima_dump_chunk);
+		size += bo->heap_size ? bo->heap_size : lima_bo_size(bo);
+	}
+
+	task_size = size + sizeof(struct lima_dump_task);
+	mem_size = task_size + sizeof(*et);
+	et = kvmalloc(mem_size, GFP_KERNEL);
+	if (!et) {
+		dev_err(dev->dev, "fail to alloc task dump buffer of size %x\n",
+			mem_size);
+		goto out;
+	}
+
+	et->data = et + 1;
+	et->size = task_size;
+
+	dt = et->data;
+	memset(dt, 0, sizeof(*dt));
+	dt->id = pipe_id;
+	dt->size = size;
+
+	chunk = (struct lima_dump_chunk *)(dt + 1);
+	memset(chunk, 0, sizeof(*chunk));
+	chunk->id = LIMA_DUMP_CHUNK_FRAME;
+	chunk->size = pipe->frame_size;
+	memcpy(chunk + 1, task->frame, pipe->frame_size);
+	dt->num_chunks++;
+
+	chunk = (void *)(chunk + 1) + chunk->size;
+	memset(chunk, 0, sizeof(*chunk));
+	chunk->id = LIMA_DUMP_CHUNK_PROCESS_NAME;
+	chunk->size = sizeof(ctx->pname);
+	memcpy(chunk + 1, ctx->pname, sizeof(ctx->pname));
+	dt->num_chunks++;
+
+	pid_chunk = (void *)(chunk + 1) + chunk->size;
+	memset(pid_chunk, 0, sizeof(*pid_chunk));
+	pid_chunk->id = LIMA_DUMP_CHUNK_PROCESS_ID;
+	pid_chunk->pid = ctx->pid;
+	dt->num_chunks++;
+
+	buffer_chunk = (void *)(pid_chunk + 1) + pid_chunk->size;
+	for (i = 0; i < task->num_bos; i++) {
+		struct lima_bo *bo = task->bos[i];
+		void *data;
+
+		memset(buffer_chunk, 0, sizeof(*buffer_chunk));
+		buffer_chunk->id = LIMA_DUMP_CHUNK_BUFFER;
+		buffer_chunk->va = lima_vm_get_va(task->vm, bo);
+
+		if (bo->heap_size) {
+			buffer_chunk->size = bo->heap_size;
+
+			data = vmap(bo->base.pages, bo->heap_size >> PAGE_SHIFT,
+				    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+			if (!data) {
+				kvfree(et);
+				goto out;
+			}
+
+			memcpy(buffer_chunk + 1, data, buffer_chunk->size);
+
+			vunmap(data);
+		} else {
+			buffer_chunk->size = lima_bo_size(bo);
+
+			data = drm_gem_shmem_vmap(&bo->base.base);
+			if (IS_ERR_OR_NULL(data)) {
+				kvfree(et);
+				goto out;
+			}
+
+			memcpy(buffer_chunk + 1, data, buffer_chunk->size);
+
+			drm_gem_shmem_vunmap(&bo->base.base, data);
+		}
+
+		buffer_chunk = (void *)(buffer_chunk + 1) + buffer_chunk->size;
+		dt->num_chunks++;
+	}
+
+	list_add(&et->list, &dev->error_task_list);
+	dev->dump.size += et->size;
+	dev->dump.num_tasks++;
+
+	dev_info(dev->dev, "save error task state success\n");
+
+out:
+	mutex_unlock(&dev->error_task_list_lock);
+}
+
 static void lima_sched_timedout_job(struct drm_sched_job *job)
 {
 	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
 	struct lima_sched_task *task = to_lima_task(job);
+	struct lima_device *ldev = pipe->ldev;
 
 	if (!pipe->error)
 		DRM_ERROR("lima job timeout\n");
@@ -268,6 +426,8 @@ static void lima_sched_timedout_job(struct drm_sched_job *job)
 
 	drm_sched_increase_karma(&task->base);
 
+	lima_sched_build_error_task_list(task);
+
 	pipe->task_error(pipe);
 
 	if (pipe->bcast_mmu)
@@ -279,12 +439,12 @@ static void lima_sched_timedout_job(struct drm_sched_job *job)
 			lima_mmu_page_fault_resume(pipe->mmu[i]);
 	}
 
-	if (pipe->current_vm)
-		lima_vm_put(pipe->current_vm);
-
+	lima_vm_put(pipe->current_vm);
 	pipe->current_vm = NULL;
 	pipe->current_task = NULL;
 
+	lima_pm_idle(ldev);
+
 	drm_sched_resubmit_jobs(&pipe->base);
 	drm_sched_start(&pipe->base, true);
 }
@@ -355,6 +515,7 @@ void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
 void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
 {
 	struct lima_sched_task *task = pipe->current_task;
+	struct lima_device *ldev = pipe->ldev;
 
 	if (pipe->error) {
 		if (task && task->recoverable)
@@ -364,5 +525,7 @@ void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
 	} else {
 		pipe->task_fini(pipe);
 		dma_fence_signal(task->fence);
+
+		lima_pm_idle(ldev);
 	}
 }
diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h
index d64393fb50a9..90f03c48ef4a 100644
--- a/drivers/gpu/drm/lima/lima_sched.h
+++ b/drivers/gpu/drm/lima/lima_sched.h
@@ -5,9 +5,18 @@
 #define __LIMA_SCHED_H__
 
 #include <drm/gpu_scheduler.h>
+#include <linux/list.h>
+#include <linux/xarray.h>
 
+struct lima_device;
 struct lima_vm;
 
+struct lima_sched_error_task {
+	struct list_head list;
+	void *data;
+	u32 size;
+};
+
 struct lima_sched_task {
 	struct drm_sched_job base;
 
@@ -44,6 +53,8 @@ struct lima_sched_pipe {
 	u32 fence_seqno;
 	spinlock_t fence_lock;
 
+	struct lima_device *ldev;
+
 	struct lima_sched_task *current_task;
 	struct lima_vm *current_vm;
 
diff --git a/drivers/gpu/drm/lima/lima_trace.c b/drivers/gpu/drm/lima/lima_trace.c
new file mode 100644
index 000000000000..ea1c7289bebc
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_trace.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2020 Qiang Yu <yuq825@gmail.com> */
+
+#include "lima_sched.h"
+
+#define CREATE_TRACE_POINTS
+#include "lima_trace.h"
diff --git a/drivers/gpu/drm/lima/lima_trace.h b/drivers/gpu/drm/lima/lima_trace.h
new file mode 100644
index 000000000000..3a430e93d384
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_trace.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2020 Qiang Yu <yuq825@gmail.com> */
+
+#if !defined(_LIMA_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _LIMA_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lima
+#define TRACE_INCLUDE_FILE lima_trace
+
+DECLARE_EVENT_CLASS(lima_task,
+	TP_PROTO(struct lima_sched_task *task),
+	TP_ARGS(task),
+	TP_STRUCT__entry(
+		__field(uint64_t, task_id)
+		__field(unsigned int, context)
+		__field(unsigned int, seqno)
+		__string(pipe, task->base.sched->name)
+		),
+
+	TP_fast_assign(
+		__entry->task_id = task->base.id;
+		__entry->context = task->base.s_fence->finished.context;
+		__entry->seqno = task->base.s_fence->finished.seqno;
+		__assign_str(pipe, task->base.sched->name)
+		),
+
+	TP_printk("task=%llu, context=%u seqno=%u pipe=%s",
+		  __entry->task_id, __entry->context, __entry->seqno,
+		  __get_str(pipe))
+);
+
+DEFINE_EVENT(lima_task, lima_task_submit,
+	     TP_PROTO(struct lima_sched_task *task),
+	     TP_ARGS(task)
+);
+
+DEFINE_EVENT(lima_task, lima_task_run,
+	     TP_PROTO(struct lima_sched_task *task),
+	     TP_ARGS(task)
+);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/lima
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/lima/lima_vm.h b/drivers/gpu/drm/lima/lima_vm.h
index 22aeec77d84d..3a7c74822d8b 100644
--- a/drivers/gpu/drm/lima/lima_vm.h
+++ b/drivers/gpu/drm/lima/lima_vm.h
@@ -54,7 +54,8 @@ static inline struct lima_vm *lima_vm_get(struct lima_vm *vm)
 
 static inline void lima_vm_put(struct lima_vm *vm)
 {
-	kref_put(&vm->refcount, lima_vm_release);
+	if (vm)
+		kref_put(&vm->refcount, lima_vm_release);
 }
 
 void lima_vm_print(struct lima_vm *vm);
diff --git a/drivers/gpu/drm/mcde/mcde_display.c b/drivers/gpu/drm/mcde/mcde_display.c
index e59907e68854..04e1d38d41f7 100644
--- a/drivers/gpu/drm/mcde/mcde_display.c
+++ b/drivers/gpu/drm/mcde/mcde_display.c
@@ -948,7 +948,7 @@ static void mcde_display_disable(struct drm_simple_display_pipe *pipe)
 {
 	struct drm_crtc *crtc = &pipe->crtc;
 	struct drm_device *drm = crtc->dev;
-	struct mcde *mcde = drm->dev_private;
+	struct mcde *mcde = to_mcde(drm);
 	struct drm_pending_vblank_event *event;
 
 	drm_crtc_vblank_off(crtc);
@@ -1020,7 +1020,7 @@ static void mcde_display_update(struct drm_simple_display_pipe *pipe,
 {
 	struct drm_crtc *crtc = &pipe->crtc;
 	struct drm_device *drm = crtc->dev;
-	struct mcde *mcde = drm->dev_private;
+	struct mcde *mcde = to_mcde(drm);
 	struct drm_pending_vblank_event *event = crtc->state->event;
 	struct drm_plane *plane = &pipe->plane;
 	struct drm_plane_state *pstate = plane->state;
@@ -1078,7 +1078,7 @@ static int mcde_display_enable_vblank(struct drm_simple_display_pipe *pipe)
 {
 	struct drm_crtc *crtc = &pipe->crtc;
 	struct drm_device *drm = crtc->dev;
-	struct mcde *mcde = drm->dev_private;
+	struct mcde *mcde = to_mcde(drm);
 	u32 val;
 
 	/* Enable all VBLANK IRQs */
@@ -1097,7 +1097,7 @@ static void mcde_display_disable_vblank(struct drm_simple_display_pipe *pipe)
 {
 	struct drm_crtc *crtc = &pipe->crtc;
 	struct drm_device *drm = crtc->dev;
-	struct mcde *mcde = drm->dev_private;
+	struct mcde *mcde = to_mcde(drm);
 
 	/* Disable all VBLANK IRQs */
 	writel(0, mcde->regs + MCDE_IMSCPP);
@@ -1117,7 +1117,7 @@ static struct drm_simple_display_pipe_funcs mcde_display_funcs = {
 
 int mcde_display_init(struct drm_device *drm)
 {
-	struct mcde *mcde = drm->dev_private;
+	struct mcde *mcde = to_mcde(drm);
 	int ret;
 	static const u32 formats[] = {
 		DRM_FORMAT_ARGB8888,
diff --git a/drivers/gpu/drm/mcde/mcde_drm.h b/drivers/gpu/drm/mcde/mcde_drm.h
index 80edd6628979..679c2c4e6d9d 100644
--- a/drivers/gpu/drm/mcde/mcde_drm.h
+++ b/drivers/gpu/drm/mcde/mcde_drm.h
@@ -34,6 +34,8 @@ struct mcde {
 	struct regulator *vana;
 };
 
+#define to_mcde(dev) container_of(dev, struct mcde, drm)
+
 bool mcde_dsi_irq(struct mipi_dsi_device *mdsi);
 void mcde_dsi_te_request(struct mipi_dsi_device *mdsi);
 extern struct platform_driver mcde_dsi_driver;
diff --git a/drivers/gpu/drm/mcde/mcde_drv.c b/drivers/gpu/drm/mcde/mcde_drv.c
index f28cb7a576ba..84f3e2dbd77b 100644
--- a/drivers/gpu/drm/mcde/mcde_drv.c
+++ b/drivers/gpu/drm/mcde/mcde_drv.c
@@ -72,6 +72,7 @@
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_panel.h>
@@ -163,7 +164,7 @@ static irqreturn_t mcde_irq(int irq, void *data)
 static int mcde_modeset_init(struct drm_device *drm)
 {
 	struct drm_mode_config *mode_config;
-	struct mcde *mcde = drm->dev_private;
+	struct mcde *mcde = to_mcde(drm);
 	int ret;
 
 	if (!mcde->bridge) {
@@ -183,13 +184,13 @@ static int mcde_modeset_init(struct drm_device *drm)
 	ret = drm_vblank_init(drm, 1);
 	if (ret) {
 		dev_err(drm->dev, "failed to init vblank\n");
-		goto out_config;
+		return ret;
 	}
 
 	ret = mcde_display_init(drm);
 	if (ret) {
 		dev_err(drm->dev, "failed to init display\n");
-		goto out_config;
+		return ret;
 	}
 
 	/*
@@ -203,7 +204,7 @@ static int mcde_modeset_init(struct drm_device *drm)
 						    mcde->bridge);
 	if (ret) {
 		dev_err(drm->dev, "failed to attach display output bridge\n");
-		goto out_config;
+		return ret;
 	}
 
 	drm_mode_config_reset(drm);
@@ -211,19 +212,6 @@ static int mcde_modeset_init(struct drm_device *drm)
 	drm_fbdev_generic_setup(drm, 32);
 
 	return 0;
-
-out_config:
-	drm_mode_config_cleanup(drm);
-	return ret;
-}
-
-static void mcde_release(struct drm_device *drm)
-{
-	struct mcde *mcde = drm->dev_private;
-
-	drm_mode_config_cleanup(drm);
-	drm_dev_fini(drm);
-	kfree(mcde);
 }
 
 DEFINE_DRM_GEM_CMA_FOPS(drm_fops);
@@ -231,7 +219,6 @@ DEFINE_DRM_GEM_CMA_FOPS(drm_fops);
 static struct drm_driver mcde_drm_driver = {
 	.driver_features =
 		DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
-	.release = mcde_release,
 	.lastclose = drm_fb_helper_lastclose,
 	.ioctls = NULL,
 	.fops = &drm_fops,
@@ -259,7 +246,9 @@ static int mcde_drm_bind(struct device *dev)
 	struct drm_device *drm = dev_get_drvdata(dev);
 	int ret;
 
-	drm_mode_config_init(drm);
+	ret = drmm_mode_config_init(drm);
+	if (ret)
+		return ret;
 
 	ret = component_bind_all(drm->dev, drm);
 	if (ret) {
@@ -318,35 +307,27 @@ static int mcde_probe(struct platform_device *pdev)
 	int ret;
 	int i;
 
-	mcde = kzalloc(sizeof(*mcde), GFP_KERNEL);
-	if (!mcde)
-		return -ENOMEM;
-	mcde->dev = dev;
-
-	ret = drm_dev_init(&mcde->drm, &mcde_drm_driver, dev);
-	if (ret) {
-		kfree(mcde);
-		return ret;
-	}
+	mcde = devm_drm_dev_alloc(dev, &mcde_drm_driver, struct mcde, drm);
+	if (IS_ERR(mcde))
+		return PTR_ERR(mcde);
 	drm = &mcde->drm;
-	drm->dev_private = mcde;
+	mcde->dev = dev;
 	platform_set_drvdata(pdev, drm);
 
 	/* Enable continuous updates: this is what Linux' framebuffer expects */
 	mcde->oneshot_mode = false;
-	drm->dev_private = mcde;
 
 	/* First obtain and turn on the main power */
 	mcde->epod = devm_regulator_get(dev, "epod");
 	if (IS_ERR(mcde->epod)) {
 		ret = PTR_ERR(mcde->epod);
 		dev_err(dev, "can't get EPOD regulator\n");
-		goto dev_unref;
+		return ret;
 	}
 	ret = regulator_enable(mcde->epod);
 	if (ret) {
 		dev_err(dev, "can't enable EPOD regulator\n");
-		goto dev_unref;
+		return ret;
 	}
 	mcde->vana = devm_regulator_get(dev, "vana");
 	if (IS_ERR(mcde->vana)) {
@@ -497,8 +478,6 @@ regulator_off:
 	regulator_disable(mcde->vana);
 regulator_epod_off:
 	regulator_disable(mcde->epod);
-dev_unref:
-	drm_dev_put(drm);
 	return ret;
 
 }
@@ -506,13 +485,12 @@ dev_unref:
 static int mcde_remove(struct platform_device *pdev)
 {
 	struct drm_device *drm = platform_get_drvdata(pdev);
-	struct mcde *mcde = drm->dev_private;
+	struct mcde *mcde = to_mcde(drm);
 
 	component_master_del(&pdev->dev, &mcde_drm_comp_ops);
 	clk_disable_unprepare(mcde->mcde_clk);
 	regulator_disable(mcde->vana);
 	regulator_disable(mcde->epod);
-	drm_dev_put(drm);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/mcde/mcde_dsi.c b/drivers/gpu/drm/mcde/mcde_dsi.c
index 7af5ebb0c436..f303369305a3 100644
--- a/drivers/gpu/drm/mcde/mcde_dsi.c
+++ b/drivers/gpu/drm/mcde/mcde_dsi.c
@@ -1020,7 +1020,7 @@ static int mcde_dsi_bind(struct device *dev, struct device *master,
 			 void *data)
 {
 	struct drm_device *drm = data;
-	struct mcde *mcde = drm->dev_private;
+	struct mcde *mcde = to_mcde(drm);
 	struct mcde_dsi *d = dev_get_drvdata(dev);
 	struct device_node *child;
 	struct drm_panel *panel = NULL;
@@ -1073,10 +1073,9 @@ static int mcde_dsi_bind(struct device *dev, struct device *master,
 			panel = NULL;
 
 			bridge = of_drm_find_bridge(child);
-			if (IS_ERR(bridge)) {
-				dev_err(dev, "failed to find bridge (%ld)\n",
-					PTR_ERR(bridge));
-				return PTR_ERR(bridge);
+			if (!bridge) {
+				dev_err(dev, "failed to find bridge\n");
+				return -EINVAL;
 			}
 		}
 	}
diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index 4f0ce4cd5b8c..945c3ac92998 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -10,7 +10,9 @@
 #include <linux/kernel.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/of_gpio.h>
 #include <linux/of_graph.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/types.h>
 
@@ -20,6 +22,7 @@
 #include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_of.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "mtk_dpi_regs.h"
 #include "mtk_drm_ddp_comp.h"
@@ -74,6 +77,9 @@ struct mtk_dpi {
 	enum mtk_dpi_out_yc_map yc_map;
 	enum mtk_dpi_out_bit_num bit_num;
 	enum mtk_dpi_out_channel_swap channel_swap;
+	struct pinctrl *pinctrl;
+	struct pinctrl_state *pins_gpio;
+	struct pinctrl_state *pins_dpi;
 	int refcount;
 };
 
@@ -379,6 +385,9 @@ static void mtk_dpi_power_off(struct mtk_dpi *dpi)
 	if (--dpi->refcount != 0)
 		return;
 
+	if (dpi->pinctrl && dpi->pins_gpio)
+		pinctrl_select_state(dpi->pinctrl, dpi->pins_gpio);
+
 	mtk_dpi_disable(dpi);
 	clk_disable_unprepare(dpi->pixel_clk);
 	clk_disable_unprepare(dpi->engine_clk);
@@ -403,6 +412,9 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi)
 		goto err_pixel;
 	}
 
+	if (dpi->pinctrl && dpi->pins_dpi)
+		pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi);
+
 	mtk_dpi_enable(dpi);
 	return 0;
 
@@ -509,15 +521,6 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi,
 	return 0;
 }
 
-static void mtk_dpi_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs mtk_dpi_encoder_funcs = {
-	.destroy = mtk_dpi_encoder_destroy,
-};
-
 static bool mtk_dpi_encoder_mode_fixup(struct drm_encoder *encoder,
 				       const struct drm_display_mode *mode,
 				       struct drm_display_mode *adjusted_mode)
@@ -596,8 +599,8 @@ static int mtk_dpi_bind(struct device *dev, struct device *master, void *data)
 		return ret;
 	}
 
-	ret = drm_encoder_init(drm_dev, &dpi->encoder, &mtk_dpi_encoder_funcs,
-			       DRM_MODE_ENCODER_TMDS, NULL);
+	ret = drm_simple_encoder_init(drm_dev, &dpi->encoder,
+				      DRM_MODE_ENCODER_TMDS);
 	if (ret) {
 		dev_err(dev, "Failed to initialize decoder: %d\n", ret);
 		goto err_unregister;
@@ -705,6 +708,26 @@ static int mtk_dpi_probe(struct platform_device *pdev)
 	dpi->dev = dev;
 	dpi->conf = (struct mtk_dpi_conf *)of_device_get_match_data(dev);
 
+	dpi->pinctrl = devm_pinctrl_get(&pdev->dev);
+	if (IS_ERR(dpi->pinctrl)) {
+		dpi->pinctrl = NULL;
+		dev_dbg(&pdev->dev, "Cannot find pinctrl!\n");
+	}
+	if (dpi->pinctrl) {
+		dpi->pins_gpio = pinctrl_lookup_state(dpi->pinctrl, "sleep");
+		if (IS_ERR(dpi->pins_gpio)) {
+			dpi->pins_gpio = NULL;
+			dev_dbg(&pdev->dev, "Cannot find pinctrl idle!\n");
+		}
+		if (dpi->pins_gpio)
+			pinctrl_select_state(dpi->pinctrl, dpi->pins_gpio);
+
+		dpi->pins_dpi = pinctrl_lookup_state(dpi->pinctrl, "default");
+		if (IS_ERR(dpi->pins_dpi)) {
+			dpi->pins_dpi = NULL;
+			dev_dbg(&pdev->dev, "Cannot find pinctrl active!\n");
+		}
+	}
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dpi->regs = devm_ioremap_resource(dev, mem);
 	if (IS_ERR(dpi->regs)) {
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 0563c6813333..ce570283b55f 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -162,7 +162,9 @@ static int mtk_drm_kms_init(struct drm_device *drm)
 	}
 	private->mutex_dev = &pdev->dev;
 
-	drm_mode_config_init(drm);
+	ret = drmm_mode_config_init(drm);
+	if (ret)
+		return ret;
 
 	drm->mode_config.min_width = 64;
 	drm->mode_config.min_height = 64;
@@ -179,7 +181,7 @@ static int mtk_drm_kms_init(struct drm_device *drm)
 
 	ret = component_bind_all(drm->dev, drm);
 	if (ret)
-		goto err_config_cleanup;
+		return ret;
 
 	/*
 	 * We currently support two fixed data streams, each optional,
@@ -255,8 +257,6 @@ err_unset_dma_parms:
 		dma_dev->dma_parms = NULL;
 err_component_unbind:
 	component_unbind_all(drm->dev, drm);
-err_config_cleanup:
-	drm_mode_config_cleanup(drm);
 
 	return ret;
 }
@@ -272,7 +272,6 @@ static void mtk_drm_kms_deinit(struct drm_device *drm)
 		private->dma_dev->dma_parms = NULL;
 
 	component_unbind_all(drm->dev, drm);
-	drm_mode_config_cleanup(drm);
 }
 
 static const struct file_operations mtk_drm_fops = {
@@ -348,9 +347,7 @@ static int mtk_drm_bind(struct device *dev)
 	if (ret < 0)
 		goto err_deinit;
 
-	ret = drm_fbdev_generic_setup(drm, 32);
-	if (ret)
-		DRM_ERROR("Failed to initialize fbdev: %d\n", ret);
+	drm_fbdev_generic_setup(drm, 32);
 
 	return 0;
 
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index b04a3c2b111e..f8fd8b98c30e 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -224,6 +224,9 @@ struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev,
 
 	expected = sg_dma_address(sg->sgl);
 	for_each_sg(sg->sgl, s, sg->nents, i) {
+		if (!sg_dma_len(s))
+			break;
+
 		if (sg_dma_address(s) != expected) {
 			DRM_ERROR("sg_table is not contiguous");
 			ret = -EINVAL;
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 0ede69830a9d..a9a25087112f 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -22,6 +22,7 @@
 #include <drm/drm_panel.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "mtk_drm_ddp_comp.h"
 
@@ -787,15 +788,6 @@ static void mtk_output_dsi_disable(struct mtk_dsi *dsi)
 	dsi->enabled = false;
 }
 
-static void mtk_dsi_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs mtk_dsi_encoder_funcs = {
-	.destroy = mtk_dsi_encoder_destroy,
-};
-
 static bool mtk_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
 				       const struct drm_display_mode *mode,
 				       struct drm_display_mode *adjusted_mode)
@@ -888,8 +880,8 @@ static int mtk_dsi_create_conn_enc(struct drm_device *drm, struct mtk_dsi *dsi)
 {
 	int ret;
 
-	ret = drm_encoder_init(drm, &dsi->encoder, &mtk_dsi_encoder_funcs,
-			       DRM_MODE_ENCODER_DSI, NULL);
+	ret = drm_simple_encoder_init(drm, &dsi->encoder,
+				      DRM_MODE_ENCODER_DSI);
 	if (ret) {
 		DRM_ERROR("Failed to encoder init to drm\n");
 		return ret;
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index ff43a3d80410..7bc086ec74f7 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -311,14 +311,10 @@ static void mtk_hdmi_hw_send_info_frame(struct mtk_hdmi *hdmi, u8 *buffer,
 	u8 checksum;
 	int ctrl_frame_en = 0;
 
-	frame_type = *buffer;
-	buffer += 1;
-	frame_ver = *buffer;
-	buffer += 1;
-	frame_len = *buffer;
-	buffer += 1;
-	checksum = *buffer;
-	buffer += 1;
+	frame_type = *buffer++;
+	frame_ver = *buffer++;
+	frame_len = *buffer++;
+	checksum = *buffer++;
 	frame_data = buffer;
 
 	dev_dbg(hdmi->dev,
@@ -982,7 +978,7 @@ static int mtk_hdmi_setup_avi_infoframe(struct mtk_hdmi *hdmi,
 					struct drm_display_mode *mode)
 {
 	struct hdmi_avi_infoframe frame;
-	u8 buffer[17];
+	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
 	ssize_t err;
 
 	err = drm_hdmi_avi_infoframe_from_display_mode(&frame,
@@ -1008,7 +1004,7 @@ static int mtk_hdmi_setup_spd_infoframe(struct mtk_hdmi *hdmi,
 					const char *product)
 {
 	struct hdmi_spd_infoframe frame;
-	u8 buffer[29];
+	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_SPD_INFOFRAME_SIZE];
 	ssize_t err;
 
 	err = hdmi_spd_infoframe_init(&frame, vendor, product);
@@ -1031,7 +1027,7 @@ static int mtk_hdmi_setup_spd_infoframe(struct mtk_hdmi *hdmi,
 static int mtk_hdmi_setup_audio_infoframe(struct mtk_hdmi *hdmi)
 {
 	struct hdmi_audio_infoframe frame;
-	u8 buffer[14];
+	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AUDIO_INFOFRAME_SIZE];
 	ssize_t err;
 
 	err = hdmi_audio_infoframe_init(&frame);
diff --git a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
index e4d34484ecc8..8cee2591e728 100644
--- a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
+++ b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
@@ -88,6 +88,44 @@ static const struct phy_ops mtk_mipi_tx_ops = {
 	.owner = THIS_MODULE,
 };
 
+static void mtk_mipi_tx_get_calibration_datal(struct mtk_mipi_tx *mipi_tx)
+{
+	struct nvmem_cell *cell;
+	size_t len;
+	u32 *buf;
+
+	cell = nvmem_cell_get(mipi_tx->dev, "calibration-data");
+	if (IS_ERR(cell)) {
+		dev_info(mipi_tx->dev, "can't get nvmem_cell_get, ignore it\n");
+		return;
+	}
+	buf = (u32 *)nvmem_cell_read(cell, &len);
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(buf)) {
+		dev_info(mipi_tx->dev, "can't get data, ignore it\n");
+		return;
+	}
+
+	if (len < 3 * sizeof(u32)) {
+		dev_info(mipi_tx->dev, "invalid calibration data\n");
+		kfree(buf);
+		return;
+	}
+
+	mipi_tx->rt_code[0] = ((buf[0] >> 6 & 0x1f) << 5) |
+			       (buf[0] >> 11 & 0x1f);
+	mipi_tx->rt_code[1] = ((buf[1] >> 27 & 0x1f) << 5) |
+			       (buf[0] >> 1 & 0x1f);
+	mipi_tx->rt_code[2] = ((buf[1] >> 17 & 0x1f) << 5) |
+			       (buf[1] >> 22 & 0x1f);
+	mipi_tx->rt_code[3] = ((buf[1] >> 7 & 0x1f) << 5) |
+			       (buf[1] >> 12 & 0x1f);
+	mipi_tx->rt_code[4] = ((buf[2] >> 27 & 0x1f) << 5) |
+			       (buf[1] >> 2 & 0x1f);
+	kfree(buf);
+}
+
 static int mtk_mipi_tx_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -125,6 +163,20 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	ret = of_property_read_u32(dev->of_node, "drive-strength-microamp",
+				   &mipi_tx->mipitx_drive);
+	/* If can't get the "mipi_tx->mipitx_drive", set it default 0x8 */
+	if (ret < 0)
+		mipi_tx->mipitx_drive = 4600;
+
+	/* check the mipitx_drive valid */
+	if (mipi_tx->mipitx_drive > 6000 || mipi_tx->mipitx_drive < 3000) {
+		dev_warn(dev, "drive-strength-microamp is invalid %d, not in 3000 ~ 6000\n",
+			 mipi_tx->mipitx_drive);
+		mipi_tx->mipitx_drive = clamp_val(mipi_tx->mipitx_drive, 3000,
+						  6000);
+	}
+
 	ref_clk_name = __clk_get_name(ref_clk);
 
 	ret = of_property_read_string(dev->of_node, "clock-output-names",
@@ -160,6 +212,8 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
 
 	mipi_tx->dev = dev;
 
+	mtk_mipi_tx_get_calibration_datal(mipi_tx);
+
 	return of_clk_add_provider(dev->of_node, of_clk_src_simple_get,
 				   mipi_tx->pll);
 }
diff --git a/drivers/gpu/drm/mediatek/mtk_mipi_tx.h b/drivers/gpu/drm/mediatek/mtk_mipi_tx.h
index 413f35d86219..c76f07c3fdeb 100644
--- a/drivers/gpu/drm/mediatek/mtk_mipi_tx.h
+++ b/drivers/gpu/drm/mediatek/mtk_mipi_tx.h
@@ -12,9 +12,11 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/nvmem-consumer.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
+#include <linux/slab.h>
 
 struct mtk_mipitx_data {
 	const u32 mppll_preserve;
@@ -27,6 +29,8 @@ struct mtk_mipi_tx {
 	struct device *dev;
 	void __iomem *regs;
 	u32 data_rate;
+	u32 mipitx_drive;
+	u32 rt_code[5];
 	const struct mtk_mipitx_data *driver_data;
 	struct clk_hw pll_hw;
 	struct clk *pll;
diff --git a/drivers/gpu/drm/mediatek/mtk_mt8183_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mt8183_mipi_tx.c
index 91f08a351fd0..9f3e55aeebb2 100644
--- a/drivers/gpu/drm/mediatek/mtk_mt8183_mipi_tx.c
+++ b/drivers/gpu/drm/mediatek/mtk_mt8183_mipi_tx.c
@@ -17,6 +17,9 @@
 #define RG_DSI_BG_CORE_EN		BIT(7)
 #define RG_DSI_PAD_TIEL_SEL		BIT(8)
 
+#define MIPITX_VOLTAGE_SEL	0x0010
+#define RG_DSI_HSTX_LDO_REF_SEL		(0xf << 6)
+
 #define MIPITX_PLL_PWR		0x0028
 #define MIPITX_PLL_CON0		0x002c
 #define MIPITX_PLL_CON1		0x0030
@@ -25,6 +28,7 @@
 #define MIPITX_PLL_CON4		0x003c
 #define RG_DSI_PLL_IBIAS		(3 << 10)
 
+#define MIPITX_D2P_RTCODE	0x0100
 #define MIPITX_D2_SW_CTL_EN	0x0144
 #define MIPITX_D0_SW_CTL_EN	0x0244
 #define MIPITX_CK_CKMODE_EN	0x0328
@@ -105,6 +109,24 @@ static const struct clk_ops mtk_mipi_tx_pll_ops = {
 	.recalc_rate = mtk_mipi_tx_pll_recalc_rate,
 };
 
+static void mtk_mipi_tx_config_calibration_data(struct mtk_mipi_tx *mipi_tx)
+{
+	int i, j;
+
+	for (i = 0; i < 5; i++) {
+		if ((mipi_tx->rt_code[i] & 0x1f) == 0)
+			mipi_tx->rt_code[i] |= 0x10;
+
+		if ((mipi_tx->rt_code[i] >> 5 & 0x1f) == 0)
+			mipi_tx->rt_code[i] |= 0x10 << 5;
+
+		for (j = 0; j < 10; j++)
+			mtk_mipi_tx_update_bits(mipi_tx,
+				MIPITX_D2P_RTCODE * (i + 1) + j * 4,
+				1, mipi_tx->rt_code[i] >> j & 1);
+	}
+}
+
 static void mtk_mipi_tx_power_on_signal(struct phy *phy)
 {
 	struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy);
@@ -123,6 +145,12 @@ static void mtk_mipi_tx_power_on_signal(struct phy *phy)
 	mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_D3_SW_CTL_EN, DSI_SW_CTL_EN);
 	mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_CK_SW_CTL_EN, DSI_SW_CTL_EN);
 
+	mtk_mipi_tx_update_bits(mipi_tx, MIPITX_VOLTAGE_SEL,
+				RG_DSI_HSTX_LDO_REF_SEL,
+				(mipi_tx->mipitx_drive - 3000) / 200 << 6);
+
+	mtk_mipi_tx_config_calibration_data(mipi_tx);
+
 	mtk_mipi_tx_set_bits(mipi_tx, MIPITX_CK_CKMODE_EN, DSI_CK_CKMODE_EN);
 }
 
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 8c2e1b47e81a..4c5aafcec799 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -11,6 +11,7 @@
 #include <linux/component.h>
 #include <linux/module.h>
 #include <linux/of_graph.h>
+#include <linux/sys_soc.h>
 #include <linux/platform_device.h>
 #include <linux/soc/amlogic/meson-canvas.h>
 
@@ -183,6 +184,24 @@ static void meson_remove_framebuffers(void)
 	kfree(ap);
 }
 
+struct meson_drm_soc_attr {
+	struct meson_drm_soc_limits limits;
+	const struct soc_device_attribute *attrs;
+};
+
+static const struct meson_drm_soc_attr meson_drm_soc_attrs[] = {
+	/* S805X/S805Y HDMI PLL won't lock for HDMI PHY freq > 1,65GHz */
+	{
+		.limits = {
+			.max_hdmi_phy_freq = 1650000,
+		},
+		.attrs = (const struct soc_device_attribute []) {
+			{ .soc_id = "GXL (S805*)", },
+			{ /* sentinel */ },
+		}
+	},
+};
+
 static int meson_drv_bind_master(struct device *dev, bool has_components)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -191,7 +210,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
 	struct drm_device *drm;
 	struct resource *res;
 	void __iomem *regs;
-	int ret;
+	int ret, i;
 
 	/* Checks if an output connector is available */
 	if (!meson_vpu_has_available_connectors(dev)) {
@@ -281,10 +300,20 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
 	if (ret)
 		goto free_drm;
 
+	/* Assign limits per soc revision/package */
+	for (i = 0 ; i < ARRAY_SIZE(meson_drm_soc_attrs) ; ++i) {
+		if (soc_device_match(meson_drm_soc_attrs[i].attrs)) {
+			priv->limits = &meson_drm_soc_attrs[i].limits;
+			break;
+		}
+	}
+
 	/* Remove early framebuffers (ie. simplefb) */
 	meson_remove_framebuffers();
 
-	drm_mode_config_init(drm);
+	ret = drmm_mode_config_init(drm);
+	if (ret)
+		goto free_drm;
 	drm->mode_config.max_width = 3840;
 	drm->mode_config.max_height = 2160;
 	drm->mode_config.funcs = &meson_mode_config_funcs;
@@ -379,7 +408,6 @@ static void meson_drv_unbind(struct device *dev)
 	drm_dev_unregister(drm);
 	drm_irq_uninstall(drm);
 	drm_kms_helper_poll_fini(drm);
-	drm_mode_config_cleanup(drm);
 	drm_dev_put(drm);
 }
 
diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
index 04fdf3826643..5b23704a80d6 100644
--- a/drivers/gpu/drm/meson/meson_drv.h
+++ b/drivers/gpu/drm/meson/meson_drv.h
@@ -30,6 +30,10 @@ struct meson_drm_match_data {
 	struct meson_afbcd_ops *afbcd_ops;
 };
 
+struct meson_drm_soc_limits {
+	unsigned int max_hdmi_phy_freq;
+};
+
 struct meson_drm {
 	struct device *dev;
 	enum vpu_compatible compat;
@@ -48,6 +52,8 @@ struct meson_drm {
 	struct drm_plane *primary_plane;
 	struct drm_plane *overlay_plane;
 
+	const struct meson_drm_soc_limits *limits;
+
 	/* Components Data */
 	struct {
 		bool osd1_enabled;
diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c
index 64cb6ba4bc42..24a12c453095 100644
--- a/drivers/gpu/drm/meson/meson_dw_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c
@@ -695,7 +695,7 @@ dw_hdmi_mode_valid(struct drm_connector *connector,
 	dev_dbg(connector->dev->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n",
 		__func__, phy_freq, vclk_freq, venc_freq, hdmi_freq);
 
-	return meson_vclk_vic_supported_freq(phy_freq, vclk_freq);
+	return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq);
 }
 
 /* Encoder */
diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
index d5cbc47835bf..35338ed18209 100644
--- a/drivers/gpu/drm/meson/meson_plane.c
+++ b/drivers/gpu/drm/meson/meson_plane.c
@@ -223,7 +223,7 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
 			priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_16 |
 						OSD_COLOR_MATRIX_16_RGB565;
 			break;
-		};
+		}
 	}
 
 	switch (fb->format->format) {
diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c
index fdf26dac9fa8..0eb86943a358 100644
--- a/drivers/gpu/drm/meson/meson_vclk.c
+++ b/drivers/gpu/drm/meson/meson_vclk.c
@@ -725,6 +725,13 @@ meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq)
 	/* In DMT mode, path after PLL is always /10 */
 	freq *= 10;
 
+	/* Check against soc revision/package limits */
+	if (priv->limits) {
+		if (priv->limits->max_hdmi_phy_freq &&
+		    freq > priv->limits->max_hdmi_phy_freq)
+			return MODE_CLOCK_HIGH;
+	}
+
 	if (meson_hdmi_pll_find_params(priv, freq, &m, &frac, &od))
 		return MODE_OK;
 
@@ -762,7 +769,7 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv,
 }
 
 enum drm_mode_status
-meson_vclk_vic_supported_freq(unsigned int phy_freq,
+meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq,
 			      unsigned int vclk_freq)
 {
 	int i;
@@ -770,6 +777,13 @@ meson_vclk_vic_supported_freq(unsigned int phy_freq,
 	DRM_DEBUG_DRIVER("phy_freq = %d vclk_freq = %d\n",
 			 phy_freq, vclk_freq);
 
+	/* Check against soc revision/package limits */
+	if (priv->limits) {
+		if (priv->limits->max_hdmi_phy_freq &&
+		    phy_freq > priv->limits->max_hdmi_phy_freq)
+			return MODE_CLOCK_HIGH;
+	}
+
 	for (i = 0 ; params[i].pixel_freq ; ++i) {
 		DRM_DEBUG_DRIVER("i = %d pixel_freq = %d alt = %d\n",
 				 i, params[i].pixel_freq,
diff --git a/drivers/gpu/drm/meson/meson_vclk.h b/drivers/gpu/drm/meson/meson_vclk.h
index aed0ab2efa71..60617aaf18dd 100644
--- a/drivers/gpu/drm/meson/meson_vclk.h
+++ b/drivers/gpu/drm/meson/meson_vclk.h
@@ -25,7 +25,8 @@ enum {
 enum drm_mode_status
 meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq);
 enum drm_mode_status
-meson_vclk_vic_supported_freq(unsigned int phy_freq, unsigned int vclk_freq);
+meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq,
+			      unsigned int vclk_freq);
 
 void meson_vclk_setup(struct meson_drm *priv, unsigned int target,
 		      unsigned int phy_freq, unsigned int vclk_freq,
diff --git a/drivers/gpu/drm/mgag200/mgag200_cursor.c b/drivers/gpu/drm/mgag200/mgag200_cursor.c
index d491edd317ff..aebc9ce43d55 100644
--- a/drivers/gpu/drm/mgag200/mgag200_cursor.c
+++ b/drivers/gpu/drm/mgag200/mgag200_cursor.c
@@ -260,7 +260,7 @@ int mgag200_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 			    uint32_t handle, uint32_t width, uint32_t height)
 {
 	struct drm_device *dev = crtc->dev;
-	struct mga_device *mdev = (struct mga_device *)dev->dev_private;
+	struct mga_device *mdev = to_mga_device(dev);
 	struct drm_gem_object *obj;
 	struct drm_gem_vram_object *gbo = NULL;
 	int ret;
@@ -307,7 +307,7 @@ err_drm_gem_object_put_unlocked:
 
 int mgag200_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 {
-	struct mga_device *mdev = (struct mga_device *)crtc->dev->dev_private;
+	struct mga_device *mdev = to_mga_device(crtc->dev);
 
 	/* Our origin is at (64,64) */
 	x += 64;
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c
index 7a5bad2f57d7..c2f0e4b40b05 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -77,6 +77,8 @@ static int mga_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto err_mgag200_driver_unload;
 
+	drm_fbdev_generic_setup(dev, 0);
+
 	return 0;
 
 err_mgag200_driver_unload:
@@ -118,7 +120,7 @@ int mgag200_driver_dumb_create(struct drm_file *file,
 			       struct drm_device *dev,
 			       struct drm_mode_create_dumb *args)
 {
-	struct mga_device *mdev = dev->dev_private;
+	struct mga_device *mdev = to_mga_device(dev);
 	unsigned long pg_align;
 
 	if (WARN_ONCE(!dev->vram_mm, "VRAM MM not initialized"))
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
index 9691252d6233..d9b7e96b214f 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
@@ -104,11 +104,6 @@ struct mga_crtc {
 	bool enabled;
 };
 
-struct mga_mode_info {
-	bool mode_config_initialized;
-	struct mga_crtc *crtc;
-};
-
 struct mga_i2c_chan {
 	struct i2c_adapter adapter;
 	struct drm_device *dev;
@@ -160,17 +155,14 @@ struct mga_device {
 	void __iomem			*rmmio;
 
 	struct mga_mc			mc;
-	struct mga_mode_info		mode_info;
 
 	struct mga_cursor cursor;
 
 	size_t vram_fb_available;
 
 	bool				suspended;
-	int				num_crtc;
 	enum mga_type			type;
 	int				has_sdram;
-	struct drm_display_mode		mode;
 
 	int bpp_shifts[4];
 
@@ -179,9 +171,15 @@ struct mga_device {
 	/* SE model number stored in reg 0x1e24 */
 	u32 unique_rev_id;
 
+	struct mga_connector connector;
 	struct drm_encoder encoder;
 };
 
+static inline struct mga_device *to_mga_device(struct drm_device *dev)
+{
+	return dev->dev_private;
+}
+
 static inline enum mga_type
 mgag200_type_from_driver_data(kernel_ulong_t driver_data)
 {
@@ -196,7 +194,6 @@ mgag200_flags_from_driver_data(kernel_ulong_t driver_data)
 
 				/* mgag200_mode.c */
 int mgag200_modeset_init(struct mga_device *mdev);
-void mgag200_modeset_fini(struct mga_device *mdev);
 
 				/* mgag200_main.c */
 int mgag200_driver_load(struct drm_device *dev, unsigned long flags);
diff --git a/drivers/gpu/drm/mgag200/mgag200_i2c.c b/drivers/gpu/drm/mgag200/mgag200_i2c.c
index 9f4635916d32..09731e614e46 100644
--- a/drivers/gpu/drm/mgag200/mgag200_i2c.c
+++ b/drivers/gpu/drm/mgag200/mgag200_i2c.c
@@ -61,34 +61,34 @@ static inline void mga_i2c_set(struct mga_device *mdev, int mask, int state)
 static void mga_gpio_setsda(void *data, int state)
 {
 	struct mga_i2c_chan *i2c = data;
-	struct mga_device *mdev = i2c->dev->dev_private;
+	struct mga_device *mdev = to_mga_device(i2c->dev);
 	mga_i2c_set(mdev, i2c->data, state);
 }
 
 static void mga_gpio_setscl(void *data, int state)
 {
 	struct mga_i2c_chan *i2c = data;
-	struct mga_device *mdev = i2c->dev->dev_private;
+	struct mga_device *mdev = to_mga_device(i2c->dev);
 	mga_i2c_set(mdev, i2c->clock, state);
 }
 
 static int mga_gpio_getsda(void *data)
 {
 	struct mga_i2c_chan *i2c = data;
-	struct mga_device *mdev = i2c->dev->dev_private;
+	struct mga_device *mdev = to_mga_device(i2c->dev);
 	return (mga_i2c_read_gpio(mdev) & i2c->data) ? 1 : 0;
 }
 
 static int mga_gpio_getscl(void *data)
 {
 	struct mga_i2c_chan *i2c = data;
-	struct mga_device *mdev = i2c->dev->dev_private;
+	struct mga_device *mdev = to_mga_device(i2c->dev);
 	return (mga_i2c_read_gpio(mdev) & i2c->clock) ? 1 : 0;
 }
 
 struct mga_i2c_chan *mgag200_i2c_create(struct drm_device *dev)
 {
-	struct mga_device *mdev = dev->dev_private;
+	struct mga_device *mdev = to_mga_device(dev);
 	struct mga_i2c_chan *i2c;
 	int ret;
 	int data, clock;
diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c
index e278b6a547bd..86df799fd38c 100644
--- a/drivers/gpu/drm/mgag200/mgag200_main.c
+++ b/drivers/gpu/drm/mgag200/mgag200_main.c
@@ -10,15 +10,8 @@
 
 #include <linux/pci.h>
 
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_gem_framebuffer_helper.h>
-
 #include "mgag200_drv.h"
 
-static const struct drm_mode_config_funcs mga_mode_funcs = {
-	.fb_create = drm_gem_fb_create
-};
-
 static int mga_probe_vram(struct mga_device *mdev, void __iomem *mem)
 {
 	int offset;
@@ -66,51 +59,54 @@ static int mga_probe_vram(struct mga_device *mdev, void __iomem *mem)
 /* Map the framebuffer from the card and configure the core */
 static int mga_vram_init(struct mga_device *mdev)
 {
+	struct drm_device *dev = mdev->dev;
 	void __iomem *mem;
 
 	/* BAR 0 is VRAM */
-	mdev->mc.vram_base = pci_resource_start(mdev->dev->pdev, 0);
-	mdev->mc.vram_window = pci_resource_len(mdev->dev->pdev, 0);
+	mdev->mc.vram_base = pci_resource_start(dev->pdev, 0);
+	mdev->mc.vram_window = pci_resource_len(dev->pdev, 0);
 
-	if (!devm_request_mem_region(mdev->dev->dev, mdev->mc.vram_base, mdev->mc.vram_window,
-				"mgadrmfb_vram")) {
+	if (!devm_request_mem_region(dev->dev, mdev->mc.vram_base,
+				     mdev->mc.vram_window, "mgadrmfb_vram")) {
 		DRM_ERROR("can't reserve VRAM\n");
 		return -ENXIO;
 	}
 
-	mem = pci_iomap(mdev->dev->pdev, 0, 0);
+	mem = pci_iomap(dev->pdev, 0, 0);
 	if (!mem)
 		return -ENOMEM;
 
 	mdev->mc.vram_size = mga_probe_vram(mdev, mem);
 
-	pci_iounmap(mdev->dev->pdev, mem);
+	pci_iounmap(dev->pdev, mem);
 
 	return 0;
 }
 
-static int mgag200_device_init(struct drm_device *dev,
-			       uint32_t flags)
+int mgag200_driver_load(struct drm_device *dev, unsigned long flags)
 {
-	struct mga_device *mdev = dev->dev_private;
+	struct mga_device *mdev;
 	int ret, option;
 
+	mdev = devm_kzalloc(dev->dev, sizeof(struct mga_device), GFP_KERNEL);
+	if (mdev == NULL)
+		return -ENOMEM;
+	dev->dev_private = (void *)mdev;
+	mdev->dev = dev;
+
 	mdev->flags = mgag200_flags_from_driver_data(flags);
 	mdev->type = mgag200_type_from_driver_data(flags);
 
-	/* Hardcode the number of CRTCs to 1 */
-	mdev->num_crtc = 1;
-
 	pci_read_config_dword(dev->pdev, PCI_MGA_OPTION, &option);
 	mdev->has_sdram = !(option & (1 << 14));
 
 	/* BAR 0 is the framebuffer, BAR 1 contains registers */
-	mdev->rmmio_base = pci_resource_start(mdev->dev->pdev, 1);
-	mdev->rmmio_size = pci_resource_len(mdev->dev->pdev, 1);
+	mdev->rmmio_base = pci_resource_start(dev->pdev, 1);
+	mdev->rmmio_size = pci_resource_len(dev->pdev, 1);
 
-	if (!devm_request_mem_region(mdev->dev->dev, mdev->rmmio_base, mdev->rmmio_size,
-				"mgadrmfb_mmio")) {
-		DRM_ERROR("can't reserve mmio registers\n");
+	if (!devm_request_mem_region(dev->dev, mdev->rmmio_base,
+				     mdev->rmmio_size, "mgadrmfb_mmio")) {
+		drm_err(dev, "can't reserve mmio registers\n");
 		return -ENOMEM;
 	}
 
@@ -121,90 +117,43 @@ static int mgag200_device_init(struct drm_device *dev,
 	/* stash G200 SE model number for later use */
 	if (IS_G200_SE(mdev)) {
 		mdev->unique_rev_id = RREG32(0x1e24);
-		DRM_DEBUG("G200 SE unique revision id is 0x%x\n",
-			  mdev->unique_rev_id);
+		drm_dbg(dev, "G200 SE unique revision id is 0x%x\n",
+			mdev->unique_rev_id);
 	}
 
 	ret = mga_vram_init(mdev);
 	if (ret)
 		return ret;
 
-	mdev->bpp_shifts[0] = 0;
-	mdev->bpp_shifts[1] = 1;
-	mdev->bpp_shifts[2] = 0;
-	mdev->bpp_shifts[3] = 2;
-	return 0;
-}
-
-/*
- * Functions here will be called by the core once it's bound the driver to
- * a PCI device
- */
-
-
-int mgag200_driver_load(struct drm_device *dev, unsigned long flags)
-{
-	struct mga_device *mdev;
-	int r;
-
-	mdev = devm_kzalloc(dev->dev, sizeof(struct mga_device), GFP_KERNEL);
-	if (mdev == NULL)
-		return -ENOMEM;
-	dev->dev_private = (void *)mdev;
-	mdev->dev = dev;
-
-	r = mgag200_device_init(dev, flags);
-	if (r) {
-		dev_err(&dev->pdev->dev, "Fatal error during GPU init: %d\n", r);
-		return r;
-	}
-	r = mgag200_mm_init(mdev);
-	if (r)
+	ret = mgag200_mm_init(mdev);
+	if (ret)
 		goto err_mm;
 
-	drm_mode_config_init(dev);
-	dev->mode_config.funcs = (void *)&mga_mode_funcs;
-	if (IS_G200_SE(mdev) && mdev->vram_fb_available < (2048*1024))
-		dev->mode_config.preferred_depth = 16;
-	else
-		dev->mode_config.preferred_depth = 32;
-	dev->mode_config.prefer_shadow = 1;
-
-	r = mgag200_modeset_init(mdev);
-	if (r) {
-		dev_err(&dev->pdev->dev, "Fatal error during modeset init: %d\n", r);
-		goto err_modeset;
+	ret = mgag200_modeset_init(mdev);
+	if (ret) {
+		drm_err(dev, "Fatal error during modeset init: %d\n", ret);
+		goto err_mgag200_mm_fini;
 	}
 
-	r = mgag200_cursor_init(mdev);
-	if (r)
-		dev_warn(&dev->pdev->dev,
-			"Could not initialize cursors. Not doing hardware cursors.\n");
-
-	r = drm_fbdev_generic_setup(mdev->dev, 0);
-	if (r)
-		goto err_modeset;
+	ret = mgag200_cursor_init(mdev);
+	if (ret)
+		drm_err(dev, "Could not initialize cursors. Not doing hardware cursors.\n");
 
 	return 0;
 
-err_modeset:
-	drm_mode_config_cleanup(dev);
-	mgag200_cursor_fini(mdev);
+err_mgag200_mm_fini:
 	mgag200_mm_fini(mdev);
 err_mm:
 	dev->dev_private = NULL;
-
-	return r;
+	return ret;
 }
 
 void mgag200_driver_unload(struct drm_device *dev)
 {
-	struct mga_device *mdev = dev->dev_private;
+	struct mga_device *mdev = to_mga_device(dev);
 
 	if (mdev == NULL)
 		return;
-	mgag200_modeset_fini(mdev);
-	drm_mode_config_cleanup(dev);
 	mgag200_cursor_fini(mdev);
 	mgag200_mm_fini(mdev);
 	dev->dev_private = NULL;
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index d90e83959fca..5f4ac36a9776 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -13,6 +13,7 @@
 
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_simple_kms_helper.h>
@@ -28,7 +29,7 @@
 static void mga_crtc_load_lut(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
-	struct mga_device *mdev = dev->dev_private;
+	struct mga_device *mdev = to_mga_device(dev);
 	struct drm_framebuffer *fb = crtc->primary->fb;
 	u16 *r_ptr, *g_ptr, *b_ptr;
 	int i;
@@ -728,7 +729,7 @@ static int mga_crtc_set_plls(struct mga_device *mdev, long clock)
 
 static void mga_g200wb_prepare(struct drm_crtc *crtc)
 {
-	struct mga_device *mdev = crtc->dev->dev_private;
+	struct mga_device *mdev = to_mga_device(crtc->dev);
 	u8 tmp;
 	int iter_max;
 
@@ -783,7 +784,7 @@ static void mga_g200wb_prepare(struct drm_crtc *crtc)
 static void mga_g200wb_commit(struct drm_crtc *crtc)
 {
 	u8 tmp;
-	struct mga_device *mdev = crtc->dev->dev_private;
+	struct mga_device *mdev = to_mga_device(crtc->dev);
 
 	/* 1- The first step is to ensure that the vrsten and hrsten are set */
 	WREG8(MGAREG_CRTCEXT_INDEX, 1);
@@ -833,7 +834,7 @@ static void mga_g200wb_commit(struct drm_crtc *crtc)
  */
 static void mga_set_start_address(struct drm_crtc *crtc, unsigned offset)
 {
-	struct mga_device *mdev = crtc->dev->dev_private;
+	struct mga_device *mdev = to_mga_device(crtc->dev);
 	u32 addr;
 	int count;
 	u8 crtcext0;
@@ -902,7 +903,7 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc,
 				int x, int y, struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
-	struct mga_device *mdev = dev->dev_private;
+	struct mga_device *mdev = to_mga_device(dev);
 	const struct drm_framebuffer *fb = crtc->primary->fb;
 	int hdisplay, hsyncstart, hsyncend, htotal;
 	int vdisplay, vsyncstart, vsyncend, vtotal;
@@ -1135,9 +1136,6 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc,
 
 	WREG8(MGA_MISC_OUT, misc);
 
-	if (adjusted_mode)
-		memcpy(&mdev->mode, mode, sizeof(struct drm_display_mode));
-
 	mga_crtc_do_set_base(crtc, old_fb, x, y, 0);
 
 	/* reset tagfifo */
@@ -1263,7 +1261,7 @@ static int mga_resume(struct drm_crtc *crtc)
 static void mga_crtc_dpms(struct drm_crtc *crtc, int mode)
 {
 	struct drm_device *dev = crtc->dev;
-	struct mga_device *mdev = dev->dev_private;
+	struct mga_device *mdev = to_mga_device(dev);
 	u8 seq1 = 0, crtcext1 = 0;
 
 	switch (mode) {
@@ -1317,7 +1315,7 @@ static void mga_crtc_dpms(struct drm_crtc *crtc, int mode)
 static void mga_crtc_prepare(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
-	struct mga_device *mdev = dev->dev_private;
+	struct mga_device *mdev = to_mga_device(dev);
 	u8 tmp;
 
 	/*	mga_resume(crtc);*/
@@ -1353,7 +1351,7 @@ static void mga_crtc_prepare(struct drm_crtc *crtc)
 static void mga_crtc_commit(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
-	struct mga_device *mdev = dev->dev_private;
+	struct mga_device *mdev = to_mga_device(dev);
 	const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
 	u8 tmp;
 
@@ -1433,6 +1431,7 @@ static const struct drm_crtc_helper_funcs mga_helper_funcs = {
 /* CRTC setup */
 static void mga_crtc_init(struct mga_device *mdev)
 {
+	struct drm_device *dev = mdev->dev;
 	struct mga_crtc *mga_crtc;
 
 	mga_crtc = kzalloc(sizeof(struct mga_crtc) +
@@ -1442,14 +1441,17 @@ static void mga_crtc_init(struct mga_device *mdev)
 	if (mga_crtc == NULL)
 		return;
 
-	drm_crtc_init(mdev->dev, &mga_crtc->base, &mga_crtc_funcs);
+	drm_crtc_init(dev, &mga_crtc->base, &mga_crtc_funcs);
 
 	drm_mode_crtc_set_gamma_size(&mga_crtc->base, MGAG200_LUT_SIZE);
-	mdev->mode_info.crtc = mga_crtc;
 
 	drm_crtc_helper_add(&mga_crtc->base, &mga_helper_funcs);
 }
 
+/*
+ * Connector
+ */
+
 static int mga_vga_get_modes(struct drm_connector *connector)
 {
 	struct mga_connector *mga_connector = to_mga_connector(connector);
@@ -1495,7 +1497,7 @@ static enum drm_mode_status mga_vga_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
-	struct mga_device *mdev = (struct mga_device*)dev->dev_private;
+	struct mga_device *mdev = to_mga_device(dev);
 	int bpp = 32;
 
 	if (IS_G200_SE(mdev)) {
@@ -1574,7 +1576,6 @@ static void mga_connector_destroy(struct drm_connector *connector)
 	struct mga_connector *mga_connector = to_mga_connector(connector);
 	mgag200_i2c_destroy(mga_connector->i2c);
 	drm_connector_cleanup(connector);
-	kfree(connector);
 }
 
 static const struct drm_connector_helper_funcs mga_vga_connector_helper_funcs = {
@@ -1588,70 +1589,96 @@ static const struct drm_connector_funcs mga_vga_connector_funcs = {
 	.destroy = mga_connector_destroy,
 };
 
-static struct drm_connector *mga_vga_init(struct drm_device *dev)
+static int mgag200_vga_connector_init(struct mga_device *mdev)
 {
-	struct drm_connector *connector;
-	struct mga_connector *mga_connector;
-
-	mga_connector = kzalloc(sizeof(struct mga_connector), GFP_KERNEL);
-	if (!mga_connector)
-		return NULL;
-
-	connector = &mga_connector->base;
-	mga_connector->i2c = mgag200_i2c_create(dev);
-	if (!mga_connector->i2c)
-		DRM_ERROR("failed to add ddc bus\n");
+	struct drm_device *dev = mdev->dev;
+	struct mga_connector *mconnector = &mdev->connector;
+	struct drm_connector *connector = &mconnector->base;
+	struct mga_i2c_chan *i2c;
+	int ret;
 
-	drm_connector_init_with_ddc(dev, connector,
-				    &mga_vga_connector_funcs,
-				    DRM_MODE_CONNECTOR_VGA,
-				    &mga_connector->i2c->adapter);
+	i2c = mgag200_i2c_create(dev);
+	if (!i2c)
+		drm_warn(dev, "failed to add DDC bus\n");
 
+	ret = drm_connector_init_with_ddc(dev, connector,
+					  &mga_vga_connector_funcs,
+					  DRM_MODE_CONNECTOR_VGA,
+					  &i2c->adapter);
+	if (ret)
+		goto err_mgag200_i2c_destroy;
 	drm_connector_helper_add(connector, &mga_vga_connector_helper_funcs);
 
-	drm_connector_register(connector);
+	mconnector->i2c = i2c;
 
-	return connector;
+	return 0;
+
+err_mgag200_i2c_destroy:
+	mgag200_i2c_destroy(i2c);
+	return ret;
 }
 
+static const struct drm_mode_config_funcs mgag200_mode_config_funcs = {
+	.fb_create = drm_gem_fb_create
+};
+
+static unsigned int mgag200_preferred_depth(struct mga_device *mdev)
+{
+	if (IS_G200_SE(mdev) && mdev->vram_fb_available < (2048*1024))
+		return 16;
+	else
+		return 32;
+}
 
 int mgag200_modeset_init(struct mga_device *mdev)
 {
+	struct drm_device *dev = mdev->dev;
 	struct drm_encoder *encoder = &mdev->encoder;
-	struct drm_connector *connector;
+	struct drm_connector *connector = &mdev->connector.base;
 	int ret;
 
-	mdev->mode_info.mode_config_initialized = true;
+	mdev->bpp_shifts[0] = 0;
+	mdev->bpp_shifts[1] = 1;
+	mdev->bpp_shifts[2] = 0;
+	mdev->bpp_shifts[3] = 2;
+
+	ret = drmm_mode_config_init(dev);
+	if (ret) {
+		drm_err(dev, "drmm_mode_config_init() failed, error %d\n",
+			ret);
+		return ret;
+	}
+
+	dev->mode_config.max_width = MGAG200_MAX_FB_WIDTH;
+	dev->mode_config.max_height = MGAG200_MAX_FB_HEIGHT;
 
-	mdev->dev->mode_config.max_width = MGAG200_MAX_FB_WIDTH;
-	mdev->dev->mode_config.max_height = MGAG200_MAX_FB_HEIGHT;
+	dev->mode_config.preferred_depth = mgag200_preferred_depth(mdev);
+	dev->mode_config.prefer_shadow = 1;
 
-	mdev->dev->mode_config.fb_base = mdev->mc.vram_base;
+	dev->mode_config.fb_base = mdev->mc.vram_base;
+
+	dev->mode_config.funcs = &mgag200_mode_config_funcs;
 
 	mga_crtc_init(mdev);
 
-	ret = drm_simple_encoder_init(mdev->dev, encoder,
-				      DRM_MODE_ENCODER_DAC);
+	ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_DAC);
 	if (ret) {
-		drm_err(mdev->dev,
+		drm_err(dev,
 			"drm_simple_encoder_init() failed, error %d\n",
 			ret);
 		return ret;
 	}
 	encoder->possible_crtcs = 0x1;
 
-	connector = mga_vga_init(mdev->dev);
-	if (!connector) {
-		DRM_ERROR("mga_vga_init failed\n");
-		return -1;
+	ret = mgag200_vga_connector_init(mdev);
+	if (ret) {
+		drm_err(dev,
+			"mgag200_vga_connector_init() failed, error %d\n",
+			ret);
+		return ret;
 	}
 
 	drm_connector_attach_encoder(connector, encoder);
 
 	return 0;
 }
-
-void mgag200_modeset_fini(struct mga_device *mdev)
-{
-
-}
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c
index 075ecce4b5e0..8cae2ca4af6b 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c
@@ -148,27 +148,19 @@ reset_set(void *data, u64 val)
 DEFINE_SIMPLE_ATTRIBUTE(reset_fops, NULL, reset_set, "%llx\n");
 
 
-int a5xx_debugfs_init(struct msm_gpu *gpu, struct drm_minor *minor)
+void a5xx_debugfs_init(struct msm_gpu *gpu, struct drm_minor *minor)
 {
 	struct drm_device *dev;
-	int ret;
 
 	if (!minor)
-		return 0;
+		return;
 
 	dev = minor->dev;
 
-	ret = drm_debugfs_create_files(a5xx_debugfs_list,
-			ARRAY_SIZE(a5xx_debugfs_list),
-			minor->debugfs_root, minor);
-
-	if (ret) {
-		DRM_DEV_ERROR(dev->dev, "could not install a5xx_debugfs_list\n");
-		return ret;
-	}
+	drm_debugfs_create_files(a5xx_debugfs_list,
+				 ARRAY_SIZE(a5xx_debugfs_list),
+				 minor->debugfs_root, minor);
 
 	debugfs_create_file("reset", S_IWUGO, minor->debugfs_root, dev,
 			    &reset_fops);
-
-	return 0;
 }
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index 833468ce6b6d..54868d4e3958 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -41,7 +41,7 @@ struct a5xx_gpu {
 #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)
 
 #ifdef CONFIG_DEBUG_FS
-int a5xx_debugfs_init(struct msm_gpu *gpu, struct drm_minor *minor);
+void a5xx_debugfs_init(struct msm_gpu *gpu, struct drm_minor *minor);
 #endif
 
 /*
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index 211f5de99a44..9aba2910d83a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -170,7 +170,7 @@ struct dpu_global_state
  *
  * Main debugfs documentation is located at,
  *
- * Documentation/filesystems/debugfs.txt
+ * Documentation/filesystems/debugfs.rst
  *
  * @dpu_debugfs_setup_regset32: Initialize data for dpu_debugfs_create_regset32
  * @dpu_debugfs_create_regset32: Create 32-bit register dump file
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 47b989834af1..c902c6503675 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -259,17 +259,9 @@ static struct drm_info_list mdp5_debugfs_list[] = {
 
 static int mdp5_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor)
 {
-	struct drm_device *dev = minor->dev;
-	int ret;
-
-	ret = drm_debugfs_create_files(mdp5_debugfs_list,
-			ARRAY_SIZE(mdp5_debugfs_list),
-			minor->debugfs_root, minor);
-
-	if (ret) {
-		DRM_DEV_ERROR(dev->dev, "could not install mdp5_debugfs_list\n");
-		return ret;
-	}
+	drm_debugfs_create_files(mdp5_debugfs_list,
+				 ARRAY_SIZE(mdp5_debugfs_list),
+				 minor->debugfs_root, minor);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index 1c74381a4fc9..ee2e270f464c 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -214,31 +214,20 @@ int msm_debugfs_late_init(struct drm_device *dev)
 	return ret;
 }
 
-int msm_debugfs_init(struct drm_minor *minor)
+void msm_debugfs_init(struct drm_minor *minor)
 {
 	struct drm_device *dev = minor->dev;
 	struct msm_drm_private *priv = dev->dev_private;
-	int ret;
-
-	ret = drm_debugfs_create_files(msm_debugfs_list,
-			ARRAY_SIZE(msm_debugfs_list),
-			minor->debugfs_root, minor);
 
-	if (ret) {
-		DRM_DEV_ERROR(dev->dev, "could not install msm_debugfs_list\n");
-		return ret;
-	}
+	drm_debugfs_create_files(msm_debugfs_list,
+				 ARRAY_SIZE(msm_debugfs_list),
+				 minor->debugfs_root, minor);
 
 	debugfs_create_file("gpu", S_IRUSR, minor->debugfs_root,
 		dev, &msm_gpu_fops);
 
-	if (priv->kms && priv->kms->funcs->debugfs_init) {
-		ret = priv->kms->funcs->debugfs_init(priv->kms, minor);
-		if (ret)
-			return ret;
-	}
-
-	return ret;
+	if (priv->kms && priv->kms->funcs->debugfs_init)
+		priv->kms->funcs->debugfs_init(priv->kms, minor);
 }
 #endif
 
diff --git a/drivers/gpu/drm/msm/msm_debugfs.h b/drivers/gpu/drm/msm/msm_debugfs.h
index 2b91f8c178ad..ef58f66abbb3 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.h
+++ b/drivers/gpu/drm/msm/msm_debugfs.h
@@ -8,7 +8,7 @@
 #define __MSM_DEBUGFS_H__
 
 #ifdef CONFIG_DEBUG_FS
-int msm_debugfs_init(struct drm_minor *minor);
+void msm_debugfs_init(struct drm_minor *minor);
 #endif
 
 #endif /* __MSM_DEBUGFS_H__ */
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index be5bc2e8425c..6ccae4ba905c 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -57,7 +57,7 @@ struct msm_gpu_funcs {
 	void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state,
 			struct drm_printer *p);
 	/* for generation specific debugfs: */
-	int (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor);
+	void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor);
 #endif
 	unsigned long (*gpu_busy)(struct msm_gpu *gpu);
 	struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu);
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index 7a62fa04272d..49e57fba4925 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -1,8 +1,10 @@
+NOUVEAU_PATH ?= $(srctree)
+
 # SPDX-License-Identifier: MIT
-ccflags-y += -I $(srctree)/$(src)/include
-ccflags-y += -I $(srctree)/$(src)/include/nvkm
-ccflags-y += -I $(srctree)/$(src)/nvkm
-ccflags-y += -I $(srctree)/$(src)
+ccflags-y += -I $(NOUVEAU_PATH)/$(src)/include
+ccflags-y += -I $(NOUVEAU_PATH)/$(src)/include/nvkm
+ccflags-y += -I $(NOUVEAU_PATH)/$(src)/nvkm
+ccflags-y += -I $(NOUVEAU_PATH)/$(src)
 
 # NVKM - HW resource manager
 #- code also used by various userspace tools/tests
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index 1f08de4241e0..2de589caf508 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -605,15 +605,16 @@ static int
 nv_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
 {
 	struct nv04_display *disp = nv04_display(crtc->dev);
-	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->primary->fb);
+	struct drm_framebuffer *fb = crtc->primary->fb;
+	struct nouveau_bo *nvbo = nouveau_gem_object(fb->obj[0]);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	int ret;
 
-	ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM, false);
+	ret = nouveau_bo_pin(nvbo, TTM_PL_FLAG_VRAM, false);
 	if (ret == 0) {
 		if (disp->image[nv_crtc->index])
 			nouveau_bo_unpin(disp->image[nv_crtc->index]);
-		nouveau_bo_ref(nvfb->nvbo, &disp->image[nv_crtc->index]);
+		nouveau_bo_ref(nvbo, &disp->image[nv_crtc->index]);
 	}
 
 	return ret;
@@ -822,8 +823,8 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	struct drm_device *dev = crtc->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nv04_crtc_reg *regp = &nv04_display(dev)->mode_reg.crtc_reg[nv_crtc->index];
+	struct nouveau_bo *nvbo;
 	struct drm_framebuffer *drm_fb;
-	struct nouveau_framebuffer *fb;
 	int arb_burst, arb_lwm;
 
 	NV_DEBUG(drm, "index %d\n", nv_crtc->index);
@@ -839,13 +840,12 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	 */
 	if (atomic) {
 		drm_fb = passed_fb;
-		fb = nouveau_framebuffer(passed_fb);
 	} else {
 		drm_fb = crtc->primary->fb;
-		fb = nouveau_framebuffer(crtc->primary->fb);
 	}
 
-	nv_crtc->fb.offset = fb->nvbo->bo.offset;
+	nvbo = nouveau_gem_object(drm_fb->obj[0]);
+	nv_crtc->fb.offset = nvbo->bo.offset;
 
 	if (nv_crtc->lut.depth != drm_fb->format->depth) {
 		nv_crtc->lut.depth = drm_fb->format->depth;
@@ -1143,8 +1143,9 @@ nv04_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	const int swap_interval = (flags & DRM_MODE_PAGE_FLIP_ASYNC) ? 0 : 1;
 	struct drm_device *dev = crtc->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_bo *old_bo = nouveau_framebuffer(crtc->primary->fb)->nvbo;
-	struct nouveau_bo *new_bo = nouveau_framebuffer(fb)->nvbo;
+	struct drm_framebuffer *old_fb = crtc->primary->fb;
+	struct nouveau_bo *old_bo = nouveau_gem_object(old_fb->obj[0]);
+	struct nouveau_bo *new_bo = nouveau_gem_object(fb->obj[0]);
 	struct nv04_page_flip_state *s;
 	struct nouveau_channel *chan;
 	struct nouveau_cli *cli;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 44ee82d0c9b6..0f4ebefed1fd 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -30,6 +30,7 @@
 #include "nouveau_encoder.h"
 #include "nouveau_connector.h"
 #include "nouveau_bo.h"
+#include "nouveau_gem.h"
 
 #include <nvif/if0004.h>
 
@@ -52,13 +53,13 @@ nv04_display_fini(struct drm_device *dev, bool suspend)
 
 	/* Un-pin FB and cursors so they'll be evicted to system memory. */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_framebuffer *nouveau_fb;
+		struct drm_framebuffer *fb = crtc->primary->fb;
+		struct nouveau_bo *nvbo;
 
-		nouveau_fb = nouveau_framebuffer(crtc->primary->fb);
-		if (!nouveau_fb || !nouveau_fb->nvbo)
+		if (!fb || !fb->obj[0])
 			continue;
-
-		nouveau_bo_unpin(nouveau_fb->nvbo);
+		nvbo = nouveau_gem_object(fb->obj[0]);
+		nouveau_bo_unpin(nvbo);
 	}
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -104,13 +105,13 @@ nv04_display_init(struct drm_device *dev, bool resume, bool runtime)
 
 	/* Re-pin FB/cursors. */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_framebuffer *nouveau_fb;
+		struct drm_framebuffer *fb = crtc->primary->fb;
+		struct nouveau_bo *nvbo;
 
-		nouveau_fb = nouveau_framebuffer(crtc->primary->fb);
-		if (!nouveau_fb || !nouveau_fb->nvbo)
+		if (!fb || !fb->obj[0])
 			continue;
-
-		ret = nouveau_bo_pin(nouveau_fb->nvbo, TTM_PL_FLAG_VRAM, true);
+		nvbo = nouveau_gem_object(fb->obj[0]);
+		ret = nouveau_bo_pin(nvbo, TTM_PL_FLAG_VRAM, true);
 		if (ret)
 			NV_ERROR(drm, "Could not pin framebuffer\n");
 	}
diff --git a/drivers/gpu/drm/nouveau/dispnv04/overlay.c b/drivers/gpu/drm/nouveau/dispnv04/overlay.c
index a3a0a73ae8ab..6248fd1dbc6d 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/overlay.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/overlay.c
@@ -31,6 +31,7 @@
 #include "nouveau_bo.h"
 #include "nouveau_connector.h"
 #include "nouveau_display.h"
+#include "nouveau_gem.h"
 #include "nvreg.h"
 #include "disp.h"
 
@@ -120,9 +121,9 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	struct nvif_object *dev = &drm->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
-	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nouveau_bo *cur = nv_plane->cur;
+	struct nouveau_bo *nvbo;
 	bool flip = nv_plane->flip;
 	int soff = NV_PCRTC0_SIZE * nv_crtc->index;
 	int soff2 = NV_PCRTC0_SIZE * !nv_crtc->index;
@@ -140,17 +141,18 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	if (ret)
 		return ret;
 
-	ret = nouveau_bo_pin(nv_fb->nvbo, TTM_PL_FLAG_VRAM, false);
+	nvbo = nouveau_gem_object(fb->obj[0]);
+	ret = nouveau_bo_pin(nvbo, TTM_PL_FLAG_VRAM, false);
 	if (ret)
 		return ret;
 
-	nv_plane->cur = nv_fb->nvbo;
+	nv_plane->cur = nvbo;
 
 	nvif_mask(dev, NV_PCRTC_ENGINE_CTRL + soff, NV_CRTC_FSEL_OVERLAY, NV_CRTC_FSEL_OVERLAY);
 	nvif_mask(dev, NV_PCRTC_ENGINE_CTRL + soff2, NV_CRTC_FSEL_OVERLAY, 0);
 
 	nvif_wr32(dev, NV_PVIDEO_BASE(flip), 0);
-	nvif_wr32(dev, NV_PVIDEO_OFFSET_BUFF(flip), nv_fb->nvbo->bo.offset);
+	nvif_wr32(dev, NV_PVIDEO_OFFSET_BUFF(flip), nvbo->bo.offset);
 	nvif_wr32(dev, NV_PVIDEO_SIZE_IN(flip), src_h << 16 | src_w);
 	nvif_wr32(dev, NV_PVIDEO_POINT_IN(flip), src_y << 16 | src_x);
 	nvif_wr32(dev, NV_PVIDEO_DS_DX(flip), (src_w << 20) / crtc_w);
@@ -172,7 +174,7 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	if (format & NV_PVIDEO_FORMAT_PLANAR) {
 		nvif_wr32(dev, NV_PVIDEO_UVPLANE_BASE(flip), 0);
 		nvif_wr32(dev, NV_PVIDEO_UVPLANE_OFFSET_BUFF(flip),
-			nv_fb->nvbo->bo.offset + fb->offsets[1]);
+			nvbo->bo.offset + fb->offsets[1]);
 	}
 	nvif_wr32(dev, NV_PVIDEO_FORMAT(flip), format | fb->pitches[0]);
 	nvif_wr32(dev, NV_PVIDEO_STOP, 0);
@@ -368,8 +370,8 @@ nv04_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	struct nvif_object *dev = &nouveau_drm(plane->dev)->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
-	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
 	struct nouveau_bo *cur = nv_plane->cur;
+	struct nouveau_bo *nvbo;
 	uint32_t overlay = 1;
 	int brightness = (nv_plane->brightness - 512) * 62 / 512;
 	int ret, i;
@@ -384,11 +386,12 @@ nv04_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	if (ret)
 		return ret;
 
-	ret = nouveau_bo_pin(nv_fb->nvbo, TTM_PL_FLAG_VRAM, false);
+	nvbo = nouveau_gem_object(fb->obj[0]);
+	ret = nouveau_bo_pin(nvbo, TTM_PL_FLAG_VRAM, false);
 	if (ret)
 		return ret;
 
-	nv_plane->cur = nv_fb->nvbo;
+	nv_plane->cur = nvbo;
 
 	nvif_wr32(dev, NV_PVIDEO_OE_STATE, 0);
 	nvif_wr32(dev, NV_PVIDEO_SU_STATE, 0);
@@ -396,7 +399,7 @@ nv04_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 
 	for (i = 0; i < 2; i++) {
 		nvif_wr32(dev, NV_PVIDEO_BUFF0_START_ADDRESS + 4 * i,
-			  nv_fb->nvbo->bo.offset);
+			  nvbo->bo.offset);
 		nvif_wr32(dev, NV_PVIDEO_BUFF0_PITCH_LENGTH + 4 * i,
 			  fb->pitches[0]);
 		nvif_wr32(dev, NV_PVIDEO_BUFF0_OFFSET + 4 * i, 0);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/base507c.c b/drivers/gpu/drm/nouveau/dispnv50/base507c.c
index ee782151d332..511258bfbcbc 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/base507c.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/base507c.c
@@ -263,7 +263,8 @@ base507c_new_(const struct nv50_wndw_func *func, const u32 *format,
 	struct nv50_disp_base_channel_dma_v0 args = {
 		.head = head,
 	};
-	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct nouveau_display *disp = nouveau_display(drm->dev);
+	struct nv50_disp *disp50 = nv50_disp(drm->dev);
 	struct nv50_wndw *wndw;
 	int ret;
 
@@ -273,9 +274,9 @@ base507c_new_(const struct nv50_wndw_func *func, const u32 *format,
 	if (*pwndw = wndw, ret)
 		return ret;
 
-	ret = nv50_dmac_create(&drm->client.device, &disp->disp->object,
+	ret = nv50_dmac_create(&drm->client.device, &disp->disp.object,
 			       &oclass, head, &args, sizeof(args),
-			       disp->sync->bo.offset, &wndw->wndw);
+			       disp50->sync->bo.offset, &wndw->wndw);
 	if (ret) {
 		NV_ERROR(drm, "base%04x allocation failed: %d\n", oclass, ret);
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.h b/drivers/gpu/drm/nouveau/dispnv50/core.h
index ff94f3f6f264..99157dc94d23 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/core.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/core.h
@@ -2,6 +2,7 @@
 #define __NV50_KMS_CORE_H__
 #include "disp.h"
 #include "atom.h"
+#include <nouveau_encoder.h>
 
 struct nv50_core {
 	const struct nv50_core_func *func;
@@ -15,6 +16,7 @@ void nv50_core_del(struct nv50_core **);
 struct nv50_core_func {
 	void (*init)(struct nv50_core *);
 	void (*ntfy_init)(struct nouveau_bo *, u32 offset);
+	int (*caps_init)(struct nouveau_drm *, struct nv50_disp *);
 	int (*ntfy_wait_done)(struct nouveau_bo *, u32 offset,
 			      struct nvif_device *);
 	void (*update)(struct nv50_core *, u32 *interlock, bool ntfy);
@@ -27,6 +29,9 @@ struct nv50_core_func {
 	const struct nv50_outp_func {
 		void (*ctrl)(struct nv50_core *, int or, u32 ctrl,
 			     struct nv50_head_atom *);
+		/* XXX: Only used by SORs and PIORs for now */
+		void (*get_caps)(struct nv50_disp *,
+				 struct nouveau_encoder *, int or);
 	} *dac, *pior, *sor;
 };
 
@@ -35,6 +40,7 @@ int core507d_new_(const struct nv50_core_func *, struct nouveau_drm *, s32,
 		  struct nv50_core **);
 void core507d_init(struct nv50_core *);
 void core507d_ntfy_init(struct nouveau_bo *, u32);
+int core507d_caps_init(struct nouveau_drm *, struct nv50_disp *);
 int core507d_ntfy_wait_done(struct nouveau_bo *, u32, struct nvif_device *);
 void core507d_update(struct nv50_core *, u32 *, bool);
 
@@ -51,6 +57,7 @@ extern const struct nv50_outp_func sor907d;
 int core917d_new(struct nouveau_drm *, s32, struct nv50_core **);
 
 int corec37d_new(struct nouveau_drm *, s32, struct nv50_core **);
+int corec37d_caps_init(struct nouveau_drm *, struct nv50_disp *);
 int corec37d_ntfy_wait_done(struct nouveau_bo *, u32, struct nvif_device *);
 void corec37d_update(struct nv50_core *, u32 *, bool);
 void corec37d_wndw_owner(struct nv50_core *);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core507d.c b/drivers/gpu/drm/nouveau/dispnv50/core507d.c
index c5152c39c684..e341f572c269 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/core507d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/core507d.c
@@ -62,6 +62,20 @@ core507d_ntfy_init(struct nouveau_bo *bo, u32 offset)
 	nouveau_bo_wr32(bo, offset / 4, 0x00000000);
 }
 
+int
+core507d_caps_init(struct nouveau_drm *drm, struct nv50_disp *disp)
+{
+	u32 *push = evo_wait(&disp->core->chan, 2);
+
+	if (push) {
+		evo_mthd(push, 0x008c, 1);
+		evo_data(push, 0x0);
+		evo_kick(push, &disp->core->chan);
+	}
+
+	return 0;
+}
+
 void
 core507d_init(struct nv50_core *core)
 {
@@ -77,6 +91,7 @@ static const struct nv50_core_func
 core507d = {
 	.init = core507d_init,
 	.ntfy_init = core507d_ntfy_init,
+	.caps_init = core507d_caps_init,
 	.ntfy_wait_done = core507d_ntfy_wait_done,
 	.update = core507d_update,
 	.head = &head507d,
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core827d.c b/drivers/gpu/drm/nouveau/dispnv50/core827d.c
index 6123a068f836..2e0c1c536afe 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/core827d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/core827d.c
@@ -26,6 +26,7 @@ static const struct nv50_core_func
 core827d = {
 	.init = core507d_init,
 	.ntfy_init = core507d_ntfy_init,
+	.caps_init = core507d_caps_init,
 	.ntfy_wait_done = core507d_ntfy_wait_done,
 	.update = core507d_update,
 	.head = &head827d,
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core907d.c b/drivers/gpu/drm/nouveau/dispnv50/core907d.c
index ef822f813435..271629832629 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/core907d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/core907d.c
@@ -26,6 +26,7 @@ static const struct nv50_core_func
 core907d = {
 	.init = core507d_init,
 	.ntfy_init = core507d_ntfy_init,
+	.caps_init = core507d_caps_init,
 	.ntfy_wait_done = core507d_ntfy_wait_done,
 	.update = core507d_update,
 	.head = &head907d,
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core917d.c b/drivers/gpu/drm/nouveau/dispnv50/core917d.c
index 392338df5bfd..5cc072d4c30f 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/core917d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/core917d.c
@@ -26,6 +26,7 @@ static const struct nv50_core_func
 core917d = {
 	.init = core507d_init,
 	.ntfy_init = core507d_ntfy_init,
+	.caps_init = core507d_caps_init,
 	.ntfy_wait_done = core507d_ntfy_wait_done,
 	.update = core507d_update,
 	.head = &head917d,
diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c
index c03cb987856b..e0c8811fb8e4 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c
@@ -22,6 +22,7 @@
 #include "core.h"
 #include "head.h"
 
+#include <nvif/class.h>
 #include <nouveau_bo.h>
 
 #include <nvif/timer.h>
@@ -87,6 +88,30 @@ corec37d_ntfy_init(struct nouveau_bo *bo, u32 offset)
 	nouveau_bo_wr32(bo, offset / 4 + 3, 0x00000000);
 }
 
+int corec37d_caps_init(struct nouveau_drm *drm, struct nv50_disp *disp)
+{
+	int ret;
+
+	ret = nvif_object_init(&disp->disp->object, 0, GV100_DISP_CAPS,
+			       NULL, 0, &disp->caps);
+	if (ret) {
+		NV_ERROR(drm,
+			 "Failed to init notifier caps region: %d\n",
+			 ret);
+		return ret;
+	}
+
+	ret = nvif_object_map(&disp->caps, NULL, 0);
+	if (ret) {
+		NV_ERROR(drm,
+			 "Failed to map notifier caps region: %d\n",
+			 ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 static void
 corec37d_init(struct nv50_core *core)
 {
@@ -111,6 +136,7 @@ static const struct nv50_core_func
 corec37d = {
 	.init = corec37d_init,
 	.ntfy_init = corec37d_ntfy_init,
+	.caps_init = corec37d_caps_init,
 	.ntfy_wait_done = corec37d_ntfy_wait_done,
 	.update = corec37d_update,
 	.wndw.owner = corec37d_wndw_owner,
diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec57d.c b/drivers/gpu/drm/nouveau/dispnv50/corec57d.c
index 147adcd60937..10ba9e9e4ae6 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/corec57d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/corec57d.c
@@ -46,6 +46,7 @@ static const struct nv50_core_func
 corec57d = {
 	.init = corec57d_init,
 	.ntfy_init = corec37d_ntfy_init,
+	.caps_init = corec37d_caps_init,
 	.ntfy_wait_done = corec37d_ntfy_wait_done,
 	.update = corec37d_update,
 	.wndw.owner = corec37d_wndw_owner,
diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
index 8c5cf096f69b..658a200ab616 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
@@ -32,7 +32,7 @@
 bool
 curs507a_space(struct nv50_wndw *wndw)
 {
-	nvif_msec(&nouveau_drm(wndw->plane.dev)->client.device, 2,
+	nvif_msec(&nouveau_drm(wndw->plane.dev)->client.device, 100,
 		if (nvif_rd32(&wndw->wimm.base.user, 0x0008) >= 4)
 			return true;
 	);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 6be9df1820c5..7622490d8602 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -482,15 +482,16 @@ nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
  * audio component binding for ELD notification
  */
 static void
-nv50_audio_component_eld_notify(struct drm_audio_component *acomp, int port)
+nv50_audio_component_eld_notify(struct drm_audio_component *acomp, int port,
+				int dev_id)
 {
 	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify)
 		acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr,
-						 port, -1);
+						 port, dev_id);
 }
 
 static int
-nv50_audio_component_get_eld(struct device *kdev, int port, int pipe,
+nv50_audio_component_get_eld(struct device *kdev, int port, int dev_id,
 			     bool *enabled, unsigned char *buf, int max_bytes)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(kdev);
@@ -506,7 +507,8 @@ nv50_audio_component_get_eld(struct device *kdev, int port, int pipe,
 		nv_encoder = nouveau_encoder(encoder);
 		nv_connector = nouveau_encoder_connector_get(nv_encoder);
 		nv_crtc = nouveau_crtc(encoder->crtc);
-		if (!nv_connector || !nv_crtc || nv_crtc->index != port)
+		if (!nv_connector || !nv_crtc || nv_encoder->or != port ||
+		    nv_crtc->index != dev_id)
 			continue;
 		*enabled = drm_detect_monitor_audio(nv_connector->edid);
 		if (*enabled) {
@@ -600,7 +602,8 @@ nv50_audio_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
 
 	nvif_mthd(&disp->disp->object, 0, &args, sizeof(args));
 
-	nv50_audio_component_eld_notify(drm->audio.component, nv_crtc->index);
+	nv50_audio_component_eld_notify(drm->audio.component, nv_encoder->or,
+					nv_crtc->index);
 }
 
 static void
@@ -634,7 +637,8 @@ nv50_audio_enable(struct drm_encoder *encoder, struct drm_display_mode *mode)
 	nvif_mthd(&disp->disp->object, 0, &args,
 		  sizeof(args.base) + drm_eld_size(args.data));
 
-	nv50_audio_component_eld_notify(drm->audio.component, nv_crtc->index);
+	nv50_audio_component_eld_notify(drm->audio.component, nv_encoder->or,
+					nv_crtc->index);
 }
 
 /******************************************************************************
@@ -904,15 +908,9 @@ nv50_msto_atomic_check(struct drm_encoder *encoder,
 	if (!state->duplicated) {
 		const int clock = crtc_state->adjusted_mode.clock;
 
-		/*
-		 * XXX: Since we don't use HDR in userspace quite yet, limit
-		 * the bpc to 8 to save bandwidth on the topology. In the
-		 * future, we'll want to properly fix this by dynamically
-		 * selecting the highest possible bpc that would fit in the
-		 * topology
-		 */
-		asyh->or.bpc = min(connector->display_info.bpc, 8U);
-		asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3, false);
+		asyh->or.bpc = connector->display_info.bpc;
+		asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3,
+						    false);
 	}
 
 	slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr, mstc->port,
@@ -1058,7 +1056,14 @@ static enum drm_mode_status
 nv50_mstc_mode_valid(struct drm_connector *connector,
 		     struct drm_display_mode *mode)
 {
-	return MODE_OK;
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	struct nouveau_encoder *outp = mstc->mstm->outp;
+
+	/* TODO: calculate the PBN from the dotclock and validate against the
+	 * MSTB's max possible PBN
+	 */
+
+	return nv50_dp_mode_valid(connector, outp, mode, NULL);
 }
 
 static int
@@ -1072,8 +1077,17 @@ nv50_mstc_get_modes(struct drm_connector *connector)
 	if (mstc->edid)
 		ret = drm_add_edid_modes(&mstc->connector, mstc->edid);
 
-	if (!mstc->connector.display_info.bpc)
-		mstc->connector.display_info.bpc = 8;
+	/*
+	 * XXX: Since we don't use HDR in userspace quite yet, limit the bpc
+	 * to 8 to save bandwidth on the topology. In the future, we'll want
+	 * to properly fix this by dynamically selecting the highest possible
+	 * bpc that would fit in the topology
+	 */
+	if (connector->display_info.bpc)
+		connector->display_info.bpc =
+			clamp(connector->display_info.bpc, 6U, 8U);
+	else
+		connector->display_info.bpc = 8;
 
 	if (mstc->native)
 		drm_mode_destroy(mstc->connector.dev, mstc->native);
@@ -1123,8 +1137,10 @@ nv50_mstc_detect(struct drm_connector *connector,
 		return connector_status_disconnected;
 
 	ret = pm_runtime_get_sync(connector->dev->dev);
-	if (ret < 0 && ret != -EACCES)
+	if (ret < 0 && ret != -EACCES) {
+		pm_runtime_put_autosuspend(connector->dev->dev);
 		return connector_status_disconnected;
+	}
 
 	ret = drm_dp_mst_detect_port(connector, ctx, mstc->port->mgr,
 				     mstc->port);
@@ -1659,6 +1675,7 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
+	struct nv50_disp *disp = nv50_disp(connector->dev);
 	int type, ret;
 
 	switch (dcbe->type) {
@@ -1685,10 +1702,12 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 
 	drm_connector_attach_encoder(connector, encoder);
 
+	disp->core->func->sor->get_caps(disp, nv_encoder, ffs(dcbe->or) - 1);
+
 	if (dcbe->type == DCB_OUTPUT_DP) {
-		struct nv50_disp *disp = nv50_disp(encoder->dev);
 		struct nvkm_i2c_aux *aux =
 			nvkm_i2c_aux_find(i2c, dcbe->i2c_index);
+
 		if (aux) {
 			if (disp->disp->object.oclass < GF110_DISP) {
 				/* HW has no support for address-only
@@ -1801,7 +1820,9 @@ nv50_pior_func = {
 static int
 nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
 {
-	struct nouveau_drm *drm = nouveau_drm(connector->dev);
+	struct drm_device *dev = connector->dev;
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nv50_disp *disp = nv50_disp(dev);
 	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = NULL;
 	struct nvkm_i2c_aux *aux = NULL;
@@ -1840,6 +1861,9 @@ nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	drm_encoder_helper_add(encoder, &nv50_pior_help);
 
 	drm_connector_attach_encoder(connector, encoder);
+
+	disp->core->func->pior->get_caps(disp, nv_encoder, ffs(dcbe->or) - 1);
+
 	return 0;
 }
 
@@ -2369,7 +2393,8 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime)
 	struct drm_encoder *encoder;
 	struct drm_plane *plane;
 
-	core->func->init(core);
+	if (resume || runtime)
+		core->func->init(core);
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
@@ -2396,6 +2421,8 @@ nv50_display_destroy(struct drm_device *dev)
 
 	nv50_audio_component_fini(nouveau_drm(dev));
 
+	nvif_object_unmap(&disp->caps);
+	nvif_object_fini(&disp->caps);
 	nv50_core_del(&disp->core);
 
 	nouveau_bo_unmap(disp->sync);
@@ -2456,6 +2483,22 @@ nv50_display_create(struct drm_device *dev)
 	if (ret)
 		goto out;
 
+	disp->core->func->init(disp->core);
+	if (disp->core->func->caps_init) {
+		ret = disp->core->func->caps_init(drm, disp);
+		if (ret)
+			goto out;
+	}
+
+	/* Assign the correct format modifiers */
+	if (disp->disp->object.oclass >= TU102_DISP)
+		nouveau_display(dev)->format_modifiers = wndwc57e_modifiers;
+	else
+	if (disp->disp->object.oclass >= GF110_DISP)
+		nouveau_display(dev)->format_modifiers = disp90xx_modifiers;
+	else
+		nouveau_display(dev)->format_modifiers = disp50xx_modifiers;
+
 	/* create crtc objects to represent the hw heads */
 	if (disp->disp->object.oclass >= GV100_DISP)
 		crtcs = nvif_rd32(&device->object, 0x610060) & 0xff;
@@ -2551,3 +2594,53 @@ out:
 		nv50_display_destroy(dev);
 	return ret;
 }
+
+/******************************************************************************
+ * Format modifiers
+ *****************************************************************************/
+
+/****************************************************************
+ *            Log2(block height) ----------------------------+  *
+ *            Page Kind ----------------------------------+  |  *
+ *            Gob Height/Page Kind Generation ------+     |  |  *
+ *                          Sector layout -------+  |     |  |  *
+ *                          Compression ------+  |  |     |  |  */
+const u64 disp50xx_modifiers[] = { /*         |  |  |     |  |  */
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 0),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 1),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 2),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 3),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 4),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x7a, 5),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 0),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 1),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 2),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 3),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 4),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x78, 5),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 0),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 1),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 2),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 3),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 4),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 1, 0x70, 5),
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
+
+/****************************************************************
+ *            Log2(block height) ----------------------------+  *
+ *            Page Kind ----------------------------------+  |  *
+ *            Gob Height/Page Kind Generation ------+     |  |  *
+ *                          Sector layout -------+  |     |  |  *
+ *                          Compression ------+  |  |     |  |  */
+const u64 disp90xx_modifiers[] = { /*         |  |  |     |  |  */
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 0),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 1),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 2),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 3),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 4),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 0, 0xfe, 5),
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h
index d54fe00ac3a3..696e70a6b98b 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h
@@ -9,6 +9,7 @@ struct nv50_msto;
 struct nv50_disp {
 	struct nvif_disp *disp;
 	struct nv50_core *core;
+	struct nvif_object caps;
 
 #define NV50_DISP_SYNC(c, o)                                ((c) * 0x040 + (o))
 #define NV50_DISP_CORE_NTFY                       NV50_DISP_SYNC(0      , 0x00)
@@ -78,6 +79,10 @@ void nv50_dmac_destroy(struct nv50_dmac *);
 u32 *evo_wait(struct nv50_dmac *, int nr);
 void evo_kick(u32 *, struct nv50_dmac *);
 
+extern const u64 disp50xx_modifiers[];
+extern const u64 disp90xx_modifiers[];
+extern const u64 wndwc57e_modifiers[];
+
 #define evo_mthd(p, m, s) do {						\
 	const u32 _m = (m), _s = (s);					\
 	if (drm_debug_enabled(DRM_UT_KMS))				\
diff --git a/drivers/gpu/drm/nouveau/dispnv50/headc37d.c b/drivers/gpu/drm/nouveau/dispnv50/headc37d.c
index 00011ce109a6..4a9a32b89f74 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/headc37d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/headc37d.c
@@ -168,14 +168,15 @@ headc37d_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
 	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
 	struct nv50_head_mode *m = &asyh->mode;
 	u32 *push;
-	if ((push = evo_wait(core, 12))) {
+	if ((push = evo_wait(core, 13))) {
 		evo_mthd(push, 0x2064 + (head->base.index * 0x400), 5);
 		evo_data(push, (m->v.active  << 16) | m->h.active );
 		evo_data(push, (m->v.synce   << 16) | m->h.synce  );
 		evo_data(push, (m->v.blanke  << 16) | m->h.blanke );
 		evo_data(push, (m->v.blanks  << 16) | m->h.blanks );
 		evo_data(push, (m->v.blank2e << 16) | m->v.blank2s);
-		evo_mthd(push, 0x200c + (head->base.index * 0x400), 1);
+		evo_mthd(push, 0x2008 + (head->base.index * 0x400), 2);
+		evo_data(push, m->interlace);
 		evo_data(push, m->clock * 1000);
 		evo_mthd(push, 0x2028 + (head->base.index * 0x400), 1);
 		evo_data(push, m->clock * 1000);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/headc57d.c b/drivers/gpu/drm/nouveau/dispnv50/headc57d.c
index 938d910a1b1e..859131a8bc3c 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/headc57d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/headc57d.c
@@ -173,14 +173,15 @@ headc57d_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
 	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
 	struct nv50_head_mode *m = &asyh->mode;
 	u32 *push;
-	if ((push = evo_wait(core, 12))) {
+	if ((push = evo_wait(core, 13))) {
 		evo_mthd(push, 0x2064 + (head->base.index * 0x400), 5);
 		evo_data(push, (m->v.active  << 16) | m->h.active );
 		evo_data(push, (m->v.synce   << 16) | m->h.synce  );
 		evo_data(push, (m->v.blanke  << 16) | m->h.blanke );
 		evo_data(push, (m->v.blanks  << 16) | m->h.blanks );
 		evo_data(push, (m->v.blank2e << 16) | m->v.blank2s);
-		evo_mthd(push, 0x200c + (head->base.index * 0x400), 1);
+		evo_mthd(push, 0x2008 + (head->base.index * 0x400), 2);
+		evo_data(push, m->interlace);
 		evo_data(push, m->clock * 1000);
 		evo_mthd(push, 0x2028 + (head->base.index * 0x400), 1);
 		evo_data(push, m->clock * 1000);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/pior507d.c b/drivers/gpu/drm/nouveau/dispnv50/pior507d.c
index d2bac6a341dc..45d8ce7d2c28 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/pior507d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/pior507d.c
@@ -38,7 +38,15 @@ pior507d_ctrl(struct nv50_core *core, int or, u32 ctrl,
 	}
 }
 
+static void
+pior507d_get_caps(struct nv50_disp *disp, struct nouveau_encoder *outp,
+		  int or)
+{
+	outp->caps.dp_interlace = true;
+}
+
 const struct nv50_outp_func
 pior507d = {
 	.ctrl = pior507d_ctrl,
+	.get_caps = pior507d_get_caps,
 };
diff --git a/drivers/gpu/drm/nouveau/dispnv50/sor507d.c b/drivers/gpu/drm/nouveau/dispnv50/sor507d.c
index 5222fe6a9b21..9a59fa7da00d 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/sor507d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/sor507d.c
@@ -38,7 +38,14 @@ sor507d_ctrl(struct nv50_core *core, int or, u32 ctrl,
 	}
 }
 
+static void
+sor507d_get_caps(struct nv50_disp *core, struct nouveau_encoder *outp, int or)
+{
+	outp->caps.dp_interlace = true;
+}
+
 const struct nv50_outp_func
 sor507d = {
 	.ctrl = sor507d_ctrl,
+	.get_caps = sor507d_get_caps,
 };
diff --git a/drivers/gpu/drm/nouveau/dispnv50/sor907d.c b/drivers/gpu/drm/nouveau/dispnv50/sor907d.c
index b0314ec11fb3..9577ccf1c809 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/sor907d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/sor907d.c
@@ -21,6 +21,7 @@
  */
 #include "core.h"
 
+#include <nouveau_bo.h>
 #include <nvif/class.h>
 
 static void
@@ -35,7 +36,17 @@ sor907d_ctrl(struct nv50_core *core, int or, u32 ctrl,
 	}
 }
 
+static void
+sor907d_get_caps(struct nv50_disp *disp, struct nouveau_encoder *outp, int or)
+{
+	const int off = or * 2;
+	u32 tmp = nouveau_bo_rd32(disp->sync, 0x000014 + off);
+
+	outp->caps.dp_interlace = !!(tmp & 0x04000000);
+}
+
 const struct nv50_outp_func
 sor907d = {
 	.ctrl = sor907d_ctrl,
+	.get_caps = sor907d_get_caps,
 };
diff --git a/drivers/gpu/drm/nouveau/dispnv50/sorc37d.c b/drivers/gpu/drm/nouveau/dispnv50/sorc37d.c
index dff059241c5d..c86ca955fdcd 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/sorc37d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/sorc37d.c
@@ -33,7 +33,16 @@ sorc37d_ctrl(struct nv50_core *core, int or, u32 ctrl,
 	}
 }
 
+static void
+sorc37d_get_caps(struct nv50_disp *disp, struct nouveau_encoder *outp, int or)
+{
+	u32 tmp = nvif_rd32(&disp->caps, 0x000144 + (or * 8));
+
+	outp->caps.dp_interlace = !!(tmp & 0x04000000);
+}
+
 const struct nv50_outp_func
 sorc37d = {
 	.ctrl = sorc37d_ctrl,
+	.get_caps = sorc37d_get_caps,
 };
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index bb737f9281e6..e25ead56052c 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -29,6 +29,7 @@
 #include <drm/drm_fourcc.h>
 
 #include "nouveau_bo.h"
+#include "nouveau_gem.h"
 
 static void
 nv50_wndw_ctxdma_del(struct nv50_wndw_ctxdma *ctxdma)
@@ -39,12 +40,13 @@ nv50_wndw_ctxdma_del(struct nv50_wndw_ctxdma *ctxdma)
 }
 
 static struct nv50_wndw_ctxdma *
-nv50_wndw_ctxdma_new(struct nv50_wndw *wndw, struct nouveau_framebuffer *fb)
+nv50_wndw_ctxdma_new(struct nv50_wndw *wndw, struct drm_framebuffer *fb)
 {
-	struct nouveau_drm *drm = nouveau_drm(fb->base.dev);
+	struct nouveau_drm *drm = nouveau_drm(fb->dev);
 	struct nv50_wndw_ctxdma *ctxdma;
-	const u8    kind = fb->nvbo->kind;
-	const u32 handle = 0xfb000000 | kind;
+	u32 handle;
+	u32 unused;
+	u8  kind;
 	struct {
 		struct nv_dma_v0 base;
 		union {
@@ -56,6 +58,9 @@ nv50_wndw_ctxdma_new(struct nv50_wndw *wndw, struct nouveau_framebuffer *fb)
 	u32 argc = sizeof(args.base);
 	int ret;
 
+	nouveau_framebuffer_get_layout(fb, &unused, &kind);
+	handle = 0xfb000000 | kind;
+
 	list_for_each_entry(ctxdma, &wndw->ctxdma.list, head) {
 		if (ctxdma->object.handle == handle)
 			return ctxdma;
@@ -234,16 +239,20 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, bool modeset,
 			       struct nv50_wndw_atom *asyw,
 			       struct nv50_head_atom *asyh)
 {
-	struct nouveau_framebuffer *fb = nouveau_framebuffer(asyw->state.fb);
+	struct drm_framebuffer *fb = asyw->state.fb;
 	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
+	uint8_t kind;
+	uint32_t tile_mode;
 	int ret;
 
 	NV_ATOMIC(drm, "%s acquire\n", wndw->plane.name);
 
-	if (asyw->state.fb != armw->state.fb || !armw->visible || modeset) {
-		asyw->image.w = fb->base.width;
-		asyw->image.h = fb->base.height;
-		asyw->image.kind = fb->nvbo->kind;
+	if (fb != armw->state.fb || !armw->visible || modeset) {
+		nouveau_framebuffer_get_layout(fb, &tile_mode, &kind);
+
+		asyw->image.w = fb->width;
+		asyw->image.h = fb->height;
+		asyw->image.kind = kind;
 
 		ret = nv50_wndw_atomic_check_acquire_rgb(asyw);
 		if (ret) {
@@ -255,16 +264,16 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, bool modeset,
 		if (asyw->image.kind) {
 			asyw->image.layout = 0;
 			if (drm->client.device.info.chipset >= 0xc0)
-				asyw->image.blockh = fb->nvbo->mode >> 4;
+				asyw->image.blockh = tile_mode >> 4;
 			else
-				asyw->image.blockh = fb->nvbo->mode;
-			asyw->image.blocks[0] = fb->base.pitches[0] / 64;
+				asyw->image.blockh = tile_mode;
+			asyw->image.blocks[0] = fb->pitches[0] / 64;
 			asyw->image.pitch[0] = 0;
 		} else {
 			asyw->image.layout = 1;
 			asyw->image.blockh = 0;
 			asyw->image.blocks[0] = 0;
-			asyw->image.pitch[0] = fb->base.pitches[0];
+			asyw->image.pitch[0] = fb->pitches[0];
 		}
 
 		if (!asyh->state.async_flip)
@@ -471,47 +480,50 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
 static void
 nv50_wndw_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state)
 {
-	struct nouveau_framebuffer *fb = nouveau_framebuffer(old_state->fb);
 	struct nouveau_drm *drm = nouveau_drm(plane->dev);
+	struct nouveau_bo *nvbo;
 
 	NV_ATOMIC(drm, "%s cleanup: %p\n", plane->name, old_state->fb);
 	if (!old_state->fb)
 		return;
 
-	nouveau_bo_unpin(fb->nvbo);
+	nvbo = nouveau_gem_object(old_state->fb->obj[0]);
+	nouveau_bo_unpin(nvbo);
 }
 
 static int
 nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
 {
-	struct nouveau_framebuffer *fb = nouveau_framebuffer(state->fb);
+	struct drm_framebuffer *fb = state->fb;
 	struct nouveau_drm *drm = nouveau_drm(plane->dev);
 	struct nv50_wndw *wndw = nv50_wndw(plane);
 	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
+	struct nouveau_bo *nvbo;
 	struct nv50_head_atom *asyh;
 	struct nv50_wndw_ctxdma *ctxdma;
 	int ret;
 
-	NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, state->fb);
+	NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, fb);
 	if (!asyw->state.fb)
 		return 0;
 
-	ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM, true);
+	nvbo = nouveau_gem_object(fb->obj[0]);
+	ret = nouveau_bo_pin(nvbo, TTM_PL_FLAG_VRAM, true);
 	if (ret)
 		return ret;
 
 	if (wndw->ctxdma.parent) {
 		ctxdma = nv50_wndw_ctxdma_new(wndw, fb);
 		if (IS_ERR(ctxdma)) {
-			nouveau_bo_unpin(fb->nvbo);
+			nouveau_bo_unpin(nvbo);
 			return PTR_ERR(ctxdma);
 		}
 
 		asyw->image.handle[0] = ctxdma->object.handle;
 	}
 
-	asyw->state.fence = dma_resv_get_excl_rcu(fb->nvbo->bo.base.resv);
-	asyw->image.offset[0] = fb->nvbo->bo.offset;
+	asyw->state.fence = dma_resv_get_excl_rcu(nvbo->bo.base.resv);
+	asyw->image.offset[0] = nvbo->bo.offset;
 
 	if (wndw->func->prepare) {
 		asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc);
@@ -603,6 +615,29 @@ nv50_wndw_destroy(struct drm_plane *plane)
 	kfree(wndw);
 }
 
+/* This function assumes the format has already been validated against the plane
+ * and the modifier was validated against the device-wides modifier list at FB
+ * creation time.
+ */
+static bool nv50_plane_format_mod_supported(struct drm_plane *plane,
+					    u32 format, u64 modifier)
+{
+	struct nouveau_drm *drm = nouveau_drm(plane->dev);
+	uint8_t i;
+
+	if (drm->client.device.info.chipset < 0xc0) {
+		const struct drm_format_info *info = drm_format_info(format);
+		const uint8_t kind = (modifier >> 12) & 0xff;
+
+		if (!format) return false;
+
+		for (i = 0; i < info->num_planes; i++)
+			if ((info->cpp[i] != 4) && kind != 0x70) return false;
+	}
+
+	return true;
+}
+
 const struct drm_plane_funcs
 nv50_wndw = {
 	.update_plane = drm_atomic_helper_update_plane,
@@ -611,6 +646,7 @@ nv50_wndw = {
 	.reset = nv50_wndw_reset,
 	.atomic_duplicate_state = nv50_wndw_atomic_duplicate_state,
 	.atomic_destroy_state = nv50_wndw_atomic_destroy_state,
+	.format_mod_supported = nv50_plane_format_mod_supported,
 };
 
 static int
@@ -658,7 +694,8 @@ nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev,
 	for (nformat = 0; format[nformat]; nformat++);
 
 	ret = drm_universal_plane_init(dev, &wndw->plane, heads, &nv50_wndw,
-				       format, nformat, NULL,
+				       format, nformat,
+				       nouveau_display(dev)->format_modifiers,
 				       type, "%s-%d", name, index);
 	if (ret) {
 		kfree(*pwndw);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c
index 35c9c52fab26..1d64741595ba 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c
@@ -173,6 +173,23 @@ wndwc57e_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, int size)
 	return true;
 }
 
+/****************************************************************
+ *            Log2(block height) ----------------------------+  *
+ *            Page Kind ----------------------------------+  |  *
+ *            Gob Height/Page Kind Generation ------+     |  |  *
+ *                          Sector layout -------+  |     |  |  *
+ *                          Compression ------+  |  |     |  |  */
+const u64 wndwc57e_modifiers[] = { /*         |  |  |     |  |  */
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 0),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 1),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 2),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 3),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 4),
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 5),
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
+
 static const struct nv50_wndw_func
 wndwc57e = {
 	.acquire = wndwc37e_acquire,
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 30659747ffe8..2c79beb41126 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -89,6 +89,8 @@
 #define GV100_DISP                                    /* cl5070.h */ 0x0000c370
 #define TU102_DISP                                    /* cl5070.h */ 0x0000c570
 
+#define GV100_DISP_CAPS                                              0x0000c373
+
 #define NV31_MPEG                                                    0x00003174
 #define G82_MPEG                                                     0x00008274
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index 1218f28c14ba..76288c682e9e 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -24,6 +24,8 @@ struct nvkm_subdev_func {
 };
 
 extern const char *nvkm_subdev_name[NVKM_SUBDEV_NR];
+int nvkm_subdev_new_(const struct nvkm_subdev_func *, struct nvkm_device *,
+		     int index, struct nvkm_subdev **);
 void nvkm_subdev_ctor(const struct nvkm_subdev_func *, struct nvkm_device *,
 		      int index, struct nvkm_subdev *);
 void nvkm_subdev_del(struct nvkm_subdev **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index fe3a10255c36..69a84d0197d0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -49,7 +49,6 @@ static struct nouveau_dsm_priv {
 	bool optimus_flags_detected;
 	bool optimus_skip_dsm;
 	acpi_handle dhandle;
-	acpi_handle rom_handle;
 } nouveau_dsm_priv;
 
 bool nouveau_is_optimus(void) {
@@ -212,37 +211,6 @@ static const struct vga_switcheroo_handler nouveau_dsm_handler = {
 	.get_client_id = nouveau_dsm_get_client_id,
 };
 
-/*
- * Firmware supporting Windows 8 or later do not use _DSM to put the device into
- * D3cold, they instead rely on disabling power resources on the parent.
- */
-static bool nouveau_pr3_present(struct pci_dev *pdev)
-{
-	struct pci_dev *parent_pdev = pci_upstream_bridge(pdev);
-	struct acpi_device *parent_adev;
-
-	if (!parent_pdev)
-		return false;
-
-	if (!parent_pdev->bridge_d3) {
-		/*
-		 * Parent PCI bridge is currently not power managed.
-		 * Since userspace can change these afterwards to be on
-		 * the safe side we stick with _DSM and prevent usage of
-		 * _PR3 from the bridge.
-		 */
-		pci_d3cold_disable(pdev);
-		return false;
-	}
-
-	parent_adev = ACPI_COMPANION(&parent_pdev->dev);
-	if (!parent_adev)
-		return false;
-
-	return parent_adev->power.flags.power_resources &&
-		acpi_has_method(parent_adev->handle, "_PR3");
-}
-
 static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out,
 				  bool *has_mux, bool *has_opt,
 				  bool *has_opt_flags, bool *has_pr3)
@@ -250,6 +218,16 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out
 	acpi_handle dhandle;
 	bool supports_mux;
 	int optimus_funcs;
+	struct pci_dev *parent_pdev;
+
+	*has_pr3 = false;
+	parent_pdev = pci_upstream_bridge(pdev);
+	if (parent_pdev) {
+		if (parent_pdev->bridge_d3)
+			*has_pr3 = pci_pr3_present(parent_pdev);
+		else
+			pci_d3cold_disable(pdev);
+	}
 
 	dhandle = ACPI_HANDLE(&pdev->dev);
 	if (!dhandle)
@@ -270,7 +248,6 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out
 	*has_mux = supports_mux;
 	*has_opt = !!optimus_funcs;
 	*has_opt_flags = optimus_funcs & (1 << NOUVEAU_DSM_OPTIMUS_FLAGS);
-	*has_pr3 = false;
 
 	if (optimus_funcs) {
 		uint32_t result;
@@ -280,8 +257,6 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out
 			 (result & OPTIMUS_ENABLED) ? "enabled" : "disabled",
 			 (result & OPTIMUS_DYNAMIC_PWR_CAP) ? "dynamic power, " : "",
 			 (result & OPTIMUS_HDA_CODEC_MASK) ? "hda bios codec supported" : "");
-
-		*has_pr3 = nouveau_pr3_present(pdev);
 	}
 }
 
@@ -385,59 +360,6 @@ void nouveau_unregister_dsm_handler(void) {}
 void nouveau_switcheroo_optimus_dsm(void) {}
 #endif
 
-/* retrieve the ROM in 4k blocks */
-static int nouveau_rom_call(acpi_handle rom_handle, uint8_t *bios,
-			    int offset, int len)
-{
-	acpi_status status;
-	union acpi_object rom_arg_elements[2], *obj;
-	struct acpi_object_list rom_arg;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
-
-	rom_arg.count = 2;
-	rom_arg.pointer = &rom_arg_elements[0];
-
-	rom_arg_elements[0].type = ACPI_TYPE_INTEGER;
-	rom_arg_elements[0].integer.value = offset;
-
-	rom_arg_elements[1].type = ACPI_TYPE_INTEGER;
-	rom_arg_elements[1].integer.value = len;
-
-	status = acpi_evaluate_object(rom_handle, NULL, &rom_arg, &buffer);
-	if (ACPI_FAILURE(status)) {
-		pr_info("failed to evaluate ROM got %s\n",
-			acpi_format_exception(status));
-		return -ENODEV;
-	}
-	obj = (union acpi_object *)buffer.pointer;
-	len = min(len, (int)obj->buffer.length);
-	memcpy(bios+offset, obj->buffer.pointer, len);
-	kfree(buffer.pointer);
-	return len;
-}
-
-bool nouveau_acpi_rom_supported(struct device *dev)
-{
-	acpi_status status;
-	acpi_handle dhandle, rom_handle;
-
-	dhandle = ACPI_HANDLE(dev);
-	if (!dhandle)
-		return false;
-
-	status = acpi_get_handle(dhandle, "_ROM", &rom_handle);
-	if (ACPI_FAILURE(status))
-		return false;
-
-	nouveau_dsm_priv.rom_handle = rom_handle;
-	return true;
-}
-
-int nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len)
-{
-	return nouveau_rom_call(nouveau_dsm_priv.rom_handle, bios, offset, len);
-}
-
 void *
 nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.h b/drivers/gpu/drm/nouveau/nouveau_acpi.h
index 1e6e8a8c0455..330f9b837066 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.h
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.h
@@ -10,8 +10,6 @@ bool nouveau_is_v1_dsm(void);
 void nouveau_register_dsm_handler(void);
 void nouveau_unregister_dsm_handler(void);
 void nouveau_switcheroo_optimus_dsm(void);
-int nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len);
-bool nouveau_acpi_rom_supported(struct device *);
 void *nouveau_acpi_edid(struct drm_device *, struct drm_connector *);
 #else
 static inline bool nouveau_is_optimus(void) { return false; };
@@ -19,8 +17,6 @@ static inline bool nouveau_is_v1_dsm(void) { return false; };
 static inline void nouveau_register_dsm_handler(void) {}
 static inline void nouveau_unregister_dsm_handler(void) {}
 static inline void nouveau_switcheroo_optimus_dsm(void) {}
-static inline bool nouveau_acpi_rom_supported(struct device *dev) { return false; }
-static inline int nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len) { return -EINVAL; }
 static inline void *nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector) { return NULL; }
 #endif
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 9a9a7f5003d3..1b383ae0248f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -38,6 +38,7 @@
 #include "nouveau_reg.h"
 #include "nouveau_drv.h"
 #include "dispnv04/hw.h"
+#include "dispnv50/disp.h"
 #include "nouveau_acpi.h"
 
 #include "nouveau_display.h"
@@ -509,7 +510,11 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 	nv_connector->detected_encoder = nv_encoder;
 
 	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		connector->interlace_allowed = true;
+		if (nv_encoder->dcb->type == DCB_OUTPUT_DP)
+			connector->interlace_allowed =
+				nv_encoder->caps.dp_interlace;
+		else
+			connector->interlace_allowed = true;
 		connector->doublescan_allowed = true;
 	} else
 	if (nv_encoder->dcb->type == DCB_OUTPUT_LVDS ||
@@ -1029,6 +1034,29 @@ get_tmds_link_bandwidth(struct drm_connector *connector)
 		return 112000 * duallink_scale;
 }
 
+enum drm_mode_status
+nouveau_conn_mode_clock_valid(const struct drm_display_mode *mode,
+			      const unsigned min_clock,
+			      const unsigned max_clock,
+			      unsigned int *clock_out)
+{
+	unsigned int clock = mode->clock;
+
+	if ((mode->flags & DRM_MODE_FLAG_3D_MASK) ==
+	    DRM_MODE_FLAG_3D_FRAME_PACKING)
+		clock *= 2;
+
+	if (clock < min_clock)
+		return MODE_CLOCK_LOW;
+	if (clock > max_clock)
+		return MODE_CLOCK_HIGH;
+
+	if (clock_out)
+		*clock_out = clock;
+
+	return MODE_OK;
+}
+
 static enum drm_mode_status
 nouveau_connector_mode_valid(struct drm_connector *connector,
 			     struct drm_display_mode *mode)
@@ -1037,7 +1065,6 @@ nouveau_connector_mode_valid(struct drm_connector *connector,
 	struct nouveau_encoder *nv_encoder = nv_connector->detected_encoder;
 	struct drm_encoder *encoder = to_drm_encoder(nv_encoder);
 	unsigned min_clock = 25000, max_clock = min_clock;
-	unsigned clock = mode->clock;
 
 	switch (nv_encoder->dcb->type) {
 	case DCB_OUTPUT_LVDS:
@@ -1060,25 +1087,14 @@ nouveau_connector_mode_valid(struct drm_connector *connector,
 	case DCB_OUTPUT_TV:
 		return get_slave_funcs(encoder)->mode_valid(encoder, mode);
 	case DCB_OUTPUT_DP:
-		max_clock  = nv_encoder->dp.link_nr;
-		max_clock *= nv_encoder->dp.link_bw;
-		clock = clock * (connector->display_info.bpc * 3) / 10;
-		break;
+		return nv50_dp_mode_valid(connector, nv_encoder, mode, NULL);
 	default:
 		BUG();
 		return MODE_BAD;
 	}
 
-	if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING)
-		clock *= 2;
-
-	if (clock < min_clock)
-		return MODE_CLOCK_LOW;
-
-	if (clock > max_clock)
-		return MODE_CLOCK_HIGH;
-
-	return MODE_OK;
+	return nouveau_conn_mode_clock_valid(mode, min_clock, max_clock,
+					     NULL);
 }
 
 static struct drm_encoder *
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index de84fb4708c7..9e062c7adec8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h
@@ -195,6 +195,11 @@ int nouveau_conn_atomic_get_property(struct drm_connector *,
 				     const struct drm_connector_state *,
 				     struct drm_property *, u64 *);
 struct drm_display_mode *nouveau_conn_native_mode(struct drm_connector *);
+enum drm_mode_status
+nouveau_conn_mode_clock_valid(const struct drm_display_mode *,
+			      const unsigned min_clock,
+			      const unsigned max_clock,
+			      unsigned *clock);
 
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
 extern int nouveau_backlight_init(struct drm_connector *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 15a3d40edf02..63b5c8cf9ae4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -181,8 +181,11 @@ nouveau_debugfs_pstate_set(struct file *file, const char __user *ubuf,
 	}
 
 	ret = pm_runtime_get_sync(drm->dev);
-	if (ret < 0 && ret != -EACCES)
+	if (ret < 0 && ret != -EACCES) {
+		pm_runtime_put_autosuspend(drm->dev);
 		return ret;
+	}
+
 	ret = nvif_mthd(ctrl, NVIF_CONTROL_PSTATE_USER, &args, sizeof(args));
 	pm_runtime_put_autosuspend(drm->dev);
 	if (ret < 0)
@@ -217,7 +220,7 @@ static const struct nouveau_debugfs_files {
 	{"pstate", &nouveau_pstate_fops},
 };
 
-int
+void
 nouveau_drm_debugfs_init(struct drm_minor *minor)
 {
 	struct nouveau_drm *drm = nouveau_drm(minor->dev);
@@ -240,12 +243,10 @@ nouveau_drm_debugfs_init(struct drm_minor *minor)
 	 */
 	dentry = debugfs_lookup("vbios.rom", minor->debugfs_root);
 	if (!dentry)
-		return 0;
+		return;
 
 	d_inode(dentry)->i_size = drm->vbios.length;
 	dput(dentry);
-
-	return 0;
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.h b/drivers/gpu/drm/nouveau/nouveau_debugfs.h
index 8909c010e8ea..77f0323b38ba 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.h
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.h
@@ -18,15 +18,13 @@ nouveau_debugfs(struct drm_device *dev)
 	return nouveau_drm(dev)->debugfs;
 }
 
-extern int  nouveau_drm_debugfs_init(struct drm_minor *);
+extern void  nouveau_drm_debugfs_init(struct drm_minor *);
 extern int  nouveau_debugfs_init(struct nouveau_drm *);
 extern void nouveau_debugfs_fini(struct nouveau_drm *);
 #else
-static inline int
+static inline void
 nouveau_drm_debugfs_init(struct drm_minor *minor)
-{
-       return 0;
-}
+{}
 
 static inline int
 nouveau_debugfs_init(struct nouveau_drm *drm)
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 700817dc4fa0..496c4621cc78 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -31,6 +31,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
 
@@ -179,41 +180,164 @@ nouveau_display_vblank_init(struct drm_device *dev)
 	return 0;
 }
 
+static const struct drm_framebuffer_funcs nouveau_framebuffer_funcs = {
+	.destroy = drm_gem_fb_destroy,
+	.create_handle = drm_gem_fb_create_handle,
+};
+
 static void
-nouveau_user_framebuffer_destroy(struct drm_framebuffer *drm_fb)
+nouveau_decode_mod(struct nouveau_drm *drm,
+		   uint64_t modifier,
+		   uint32_t *tile_mode,
+		   uint8_t *kind)
+{
+	BUG_ON(!tile_mode || !kind);
+
+	if (modifier == DRM_FORMAT_MOD_LINEAR) {
+		/* tile_mode will not be used in this case */
+		*tile_mode = 0;
+		*kind = 0;
+	} else {
+		/*
+		 * Extract the block height and kind from the corresponding
+		 * modifier fields.  See drm_fourcc.h for details.
+		 */
+		*tile_mode = (uint32_t)(modifier & 0xF);
+		*kind = (uint8_t)((modifier >> 12) & 0xFF);
+
+		if (drm->client.device.info.chipset >= 0xc0)
+			*tile_mode <<= 4;
+	}
+}
+
+void
+nouveau_framebuffer_get_layout(struct drm_framebuffer *fb,
+			       uint32_t *tile_mode,
+			       uint8_t *kind)
 {
-	struct nouveau_framebuffer *fb = nouveau_framebuffer(drm_fb);
+	if (fb->flags & DRM_MODE_FB_MODIFIERS) {
+		struct nouveau_drm *drm = nouveau_drm(fb->dev);
 
-	if (fb->nvbo)
-		drm_gem_object_put_unlocked(&fb->nvbo->bo.base);
+		nouveau_decode_mod(drm, fb->modifier, tile_mode, kind);
+	} else {
+		const struct nouveau_bo *nvbo = nouveau_gem_object(fb->obj[0]);
 
-	drm_framebuffer_cleanup(drm_fb);
-	kfree(fb);
+		*tile_mode = nvbo->mode;
+		*kind = nvbo->kind;
+	}
 }
 
 static int
-nouveau_user_framebuffer_create_handle(struct drm_framebuffer *drm_fb,
-				       struct drm_file *file_priv,
-				       unsigned int *handle)
+nouveau_validate_decode_mod(struct nouveau_drm *drm,
+			    uint64_t modifier,
+			    uint32_t *tile_mode,
+			    uint8_t *kind)
 {
-	struct nouveau_framebuffer *fb = nouveau_framebuffer(drm_fb);
+	struct nouveau_display *disp = nouveau_display(drm->dev);
+	int mod;
+
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+		return -EINVAL;
+	}
 
-	return drm_gem_handle_create(file_priv, &fb->nvbo->bo.base, handle);
+	BUG_ON(!disp->format_modifiers);
+
+	for (mod = 0;
+	     (disp->format_modifiers[mod] != DRM_FORMAT_MOD_INVALID) &&
+	     (disp->format_modifiers[mod] != modifier);
+	     mod++);
+
+	if (disp->format_modifiers[mod] == DRM_FORMAT_MOD_INVALID)
+		return -EINVAL;
+
+	nouveau_decode_mod(drm, modifier, tile_mode, kind);
+
+	return 0;
 }
 
-static const struct drm_framebuffer_funcs nouveau_framebuffer_funcs = {
-	.destroy = nouveau_user_framebuffer_destroy,
-	.create_handle = nouveau_user_framebuffer_create_handle,
-};
+static inline uint32_t
+nouveau_get_width_in_blocks(uint32_t stride)
+{
+	/* GOBs per block in the x direction is always one, and GOBs are
+	 * 64 bytes wide
+	 */
+	static const uint32_t log_block_width = 6;
+
+	return (stride + (1 << log_block_width) - 1) >> log_block_width;
+}
+
+static inline uint32_t
+nouveau_get_height_in_blocks(struct nouveau_drm *drm,
+			     uint32_t height,
+			     uint32_t log_block_height_in_gobs)
+{
+	uint32_t log_gob_height;
+	uint32_t log_block_height;
+
+	BUG_ON(drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA);
+
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI)
+		log_gob_height = 2;
+	else
+		log_gob_height = 3;
+
+	log_block_height = log_block_height_in_gobs + log_gob_height;
+
+	return (height + (1 << log_block_height) - 1) >> log_block_height;
+}
+
+static int
+nouveau_check_bl_size(struct nouveau_drm *drm, struct nouveau_bo *nvbo,
+		      uint32_t offset, uint32_t stride, uint32_t h,
+		      uint32_t tile_mode)
+{
+	uint32_t gob_size, bw, bh;
+	uint64_t bl_size;
+
+	BUG_ON(drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA);
+
+	if (drm->client.device.info.chipset >= 0xc0) {
+		if (tile_mode & 0xF)
+			return -EINVAL;
+		tile_mode >>= 4;
+	}
+
+	if (tile_mode & 0xFFFFFFF0)
+		return -EINVAL;
+
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI)
+		gob_size = 256;
+	else
+		gob_size = 512;
+
+	bw = nouveau_get_width_in_blocks(stride);
+	bh = nouveau_get_height_in_blocks(drm, h, tile_mode);
+
+	bl_size = bw * bh * (1 << tile_mode) * gob_size;
+
+	DRM_DEBUG_KMS("offset=%u stride=%u h=%u tile_mode=0x%02x bw=%u bh=%u gob_size=%u bl_size=%llu size=%lu\n",
+		      offset, stride, h, tile_mode, bw, bh, gob_size, bl_size,
+		      nvbo->bo.mem.size);
+
+	if (bl_size + offset > nvbo->bo.mem.size)
+		return -ERANGE;
+
+	return 0;
+}
 
 int
 nouveau_framebuffer_new(struct drm_device *dev,
 			const struct drm_mode_fb_cmd2 *mode_cmd,
-			struct nouveau_bo *nvbo,
-			struct nouveau_framebuffer **pfb)
+			struct drm_gem_object *gem,
+			struct drm_framebuffer **pfb)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_framebuffer *fb;
+	struct nouveau_bo *nvbo = nouveau_gem_object(gem);
+	struct drm_framebuffer *fb;
+	const struct drm_format_info *info;
+	unsigned int width, height, i;
+	uint32_t tile_mode;
+	uint8_t kind;
 	int ret;
 
         /* YUV overlays have special requirements pre-NV50 */
@@ -236,13 +360,50 @@ nouveau_framebuffer_new(struct drm_device *dev,
 		return -EINVAL;
 	}
 
+	if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
+		if (nouveau_validate_decode_mod(drm, mode_cmd->modifier[0],
+						&tile_mode, &kind)) {
+			DRM_DEBUG_KMS("Unsupported modifier: 0x%llx\n",
+				      mode_cmd->modifier[0]);
+			return -EINVAL;
+		}
+	} else {
+		tile_mode = nvbo->mode;
+		kind = nvbo->kind;
+	}
+
+	info = drm_get_format_info(dev, mode_cmd);
+
+	for (i = 0; i < info->num_planes; i++) {
+		width = drm_format_info_plane_width(info,
+						    mode_cmd->width,
+						    i);
+		height = drm_format_info_plane_height(info,
+						      mode_cmd->height,
+						      i);
+
+		if (kind) {
+			ret = nouveau_check_bl_size(drm, nvbo,
+						    mode_cmd->offsets[i],
+						    mode_cmd->pitches[i],
+						    height, tile_mode);
+			if (ret)
+				return ret;
+		} else {
+			uint32_t size = mode_cmd->pitches[i] * height;
+
+			if (size + mode_cmd->offsets[i] > nvbo->bo.mem.size)
+				return -ERANGE;
+		}
+	}
+
 	if (!(fb = *pfb = kzalloc(sizeof(*fb), GFP_KERNEL)))
 		return -ENOMEM;
 
-	drm_helper_mode_fill_fb_struct(dev, &fb->base, mode_cmd);
-	fb->nvbo = nvbo;
+	drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd);
+	fb->obj[0] = gem;
 
-	ret = drm_framebuffer_init(dev, &fb->base, &nouveau_framebuffer_funcs);
+	ret = drm_framebuffer_init(dev, fb, &nouveau_framebuffer_funcs);
 	if (ret)
 		kfree(fb);
 	return ret;
@@ -253,19 +414,17 @@ nouveau_user_framebuffer_create(struct drm_device *dev,
 				struct drm_file *file_priv,
 				const struct drm_mode_fb_cmd2 *mode_cmd)
 {
-	struct nouveau_framebuffer *fb;
-	struct nouveau_bo *nvbo;
+	struct drm_framebuffer *fb;
 	struct drm_gem_object *gem;
 	int ret;
 
 	gem = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]);
 	if (!gem)
 		return ERR_PTR(-ENOENT);
-	nvbo = nouveau_gem_object(gem);
 
-	ret = nouveau_framebuffer_new(dev, mode_cmd, nvbo, &fb);
+	ret = nouveau_framebuffer_new(dev, mode_cmd, gem, &fb);
 	if (ret == 0)
-		return &fb->base;
+		return fb;
 
 	drm_gem_object_put_unlocked(gem);
 	return ERR_PTR(ret);
@@ -517,6 +676,7 @@ nouveau_display_create(struct drm_device *dev)
 
 	dev->mode_config.preferred_depth = 24;
 	dev->mode_config.prefer_shadow = 1;
+	dev->mode_config.allow_fb_modifiers = true;
 
 	if (drm->client.device.info.chipset < 0x11)
 		dev->mode_config.async_page_flip = false;
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h
index de004018ab5c..6e0d900441d6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.h
+++ b/drivers/gpu/drm/nouveau/nouveau_display.h
@@ -8,26 +8,11 @@
 
 #include <drm/drm_framebuffer.h>
 
-struct nouveau_framebuffer {
-	struct drm_framebuffer base;
-	struct nouveau_bo *nvbo;
-	struct nouveau_vma *vma;
-	u32 r_handle;
-	u32 r_format;
-	u32 r_pitch;
-	struct nvif_object h_base[4];
-	struct nvif_object h_core;
-};
-
-static inline struct nouveau_framebuffer *
-nouveau_framebuffer(struct drm_framebuffer *fb)
-{
-	return container_of(fb, struct nouveau_framebuffer, base);
-}
-
-int nouveau_framebuffer_new(struct drm_device *,
-			    const struct drm_mode_fb_cmd2 *,
-			    struct nouveau_bo *, struct nouveau_framebuffer **);
+int
+nouveau_framebuffer_new(struct drm_device *dev,
+			const struct drm_mode_fb_cmd2 *mode_cmd,
+			struct drm_gem_object *gem,
+			struct drm_framebuffer **pfb);
 
 struct nouveau_display {
 	void *priv;
@@ -47,6 +32,8 @@ struct nouveau_display {
 	struct drm_property *color_vibrance_property;
 
 	struct drm_atomic_state *suspend;
+
+	const u64 *format_modifiers;
 };
 
 static inline struct nouveau_display *
@@ -75,6 +62,10 @@ int  nouveau_display_dumb_map_offset(struct drm_file *, struct drm_device *,
 
 void nouveau_hdmi_mode_set(struct drm_encoder *, struct drm_display_mode *);
 
+void
+nouveau_framebuffer_get_layout(struct drm_framebuffer *fb, uint32_t *tile_mode,
+			       uint8_t *kind);
+
 struct drm_framebuffer *
 nouveau_user_framebuffer_create(struct drm_device *, struct drm_file *,
 				const struct drm_mode_fb_cmd2 *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index ad89e09a0be3..e5c230d9ae24 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -25,12 +25,14 @@
 #include "nouveau_dma.h"
 #include "nouveau_mem.h"
 #include "nouveau_bo.h"
+#include "nouveau_svm.h"
 
 #include <nvif/class.h>
 #include <nvif/object.h>
 #include <nvif/if000c.h>
 #include <nvif/if500b.h>
 #include <nvif/if900b.h>
+#include <nvif/if000c.h>
 
 #include <linux/sched/mm.h>
 #include <linux/hmm.h>
@@ -54,66 +56,69 @@ enum nouveau_aper {
 typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages,
 				      enum nouveau_aper, u64 dst_addr,
 				      enum nouveau_aper, u64 src_addr);
+typedef int (*nouveau_clear_page_t)(struct nouveau_drm *drm, u32 length,
+				      enum nouveau_aper, u64 dst_addr);
 
 struct nouveau_dmem_chunk {
 	struct list_head list;
 	struct nouveau_bo *bo;
 	struct nouveau_drm *drm;
-	unsigned long pfn_first;
 	unsigned long callocated;
-	unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)];
-	spinlock_t lock;
+	struct dev_pagemap pagemap;
 };
 
 struct nouveau_dmem_migrate {
 	nouveau_migrate_copy_t copy_func;
+	nouveau_clear_page_t clear_func;
 	struct nouveau_channel *chan;
 };
 
 struct nouveau_dmem {
 	struct nouveau_drm *drm;
-	struct dev_pagemap pagemap;
 	struct nouveau_dmem_migrate migrate;
-	struct list_head chunk_free;
-	struct list_head chunk_full;
-	struct list_head chunk_empty;
+	struct list_head chunks;
 	struct mutex mutex;
+	struct page *free_pages;
+	spinlock_t lock;
 };
 
-static inline struct nouveau_dmem *page_to_dmem(struct page *page)
+static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page)
 {
-	return container_of(page->pgmap, struct nouveau_dmem, pagemap);
+	return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap);
+}
+
+static struct nouveau_drm *page_to_drm(struct page *page)
+{
+	struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
+
+	return chunk->drm;
 }
 
-static unsigned long nouveau_dmem_page_addr(struct page *page)
+unsigned long nouveau_dmem_page_addr(struct page *page)
 {
-	struct nouveau_dmem_chunk *chunk = page->zone_device_data;
-	unsigned long idx = page_to_pfn(page) - chunk->pfn_first;
+	struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
+	unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) -
+				chunk->pagemap.res.start;
 
-	return (idx << PAGE_SHIFT) + chunk->bo->bo.offset;
+	return chunk->bo->bo.offset + off;
 }
 
 static void nouveau_dmem_page_free(struct page *page)
 {
-	struct nouveau_dmem_chunk *chunk = page->zone_device_data;
-	unsigned long idx = page_to_pfn(page) - chunk->pfn_first;
+	struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
+	struct nouveau_dmem *dmem = chunk->drm->dmem;
+
+	spin_lock(&dmem->lock);
+	page->zone_device_data = dmem->free_pages;
+	dmem->free_pages = page;
 
-	/*
-	 * FIXME:
-	 *
-	 * This is really a bad example, we need to overhaul nouveau memory
-	 * management to be more page focus and allow lighter locking scheme
-	 * to be use in the process.
-	 */
-	spin_lock(&chunk->lock);
-	clear_bit(idx, chunk->bitmap);
 	WARN_ON(!chunk->callocated);
 	chunk->callocated--;
 	/*
 	 * FIXME when chunk->callocated reach 0 we should add the chunk to
 	 * a reclaim list so that it can be freed in case of memory pressure.
 	 */
-	spin_unlock(&chunk->lock);
+	spin_unlock(&dmem->lock);
 }
 
 static void nouveau_dmem_fence_done(struct nouveau_fence **fence)
@@ -165,8 +170,8 @@ error_free_page:
 
 static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 {
-	struct nouveau_dmem *dmem = page_to_dmem(vmf->page);
-	struct nouveau_drm *drm = dmem->drm;
+	struct nouveau_drm *drm = page_to_drm(vmf->page);
+	struct nouveau_dmem *dmem = drm->dmem;
 	struct nouveau_fence *fence;
 	unsigned long src = 0, dst = 0;
 	dma_addr_t dma_addr = 0;
@@ -209,131 +214,105 @@ static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
 };
 
 static int
-nouveau_dmem_chunk_alloc(struct nouveau_drm *drm)
+nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
 {
 	struct nouveau_dmem_chunk *chunk;
+	struct resource *res;
+	struct page *page;
+	void *ptr;
+	unsigned long i, pfn_first;
 	int ret;
 
-	if (drm->dmem == NULL)
-		return -EINVAL;
-
-	mutex_lock(&drm->dmem->mutex);
-	chunk = list_first_entry_or_null(&drm->dmem->chunk_empty,
-					 struct nouveau_dmem_chunk,
-					 list);
+	chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
 	if (chunk == NULL) {
-		mutex_unlock(&drm->dmem->mutex);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out;
 	}
 
-	list_del(&chunk->list);
-	mutex_unlock(&drm->dmem->mutex);
+	/* Allocate unused physical address space for device private pages. */
+	res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE,
+				      "nouveau_dmem");
+	if (IS_ERR(res)) {
+		ret = PTR_ERR(res);
+		goto out_free;
+	}
+
+	chunk->drm = drm;
+	chunk->pagemap.type = MEMORY_DEVICE_PRIVATE;
+	chunk->pagemap.res = *res;
+	chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
+	chunk->pagemap.owner = drm->dev;
 
 	ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0,
 			     TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL,
 			     &chunk->bo);
 	if (ret)
-		goto out;
+		goto out_release;
 
 	ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
-	if (ret) {
-		nouveau_bo_ref(NULL, &chunk->bo);
-		goto out;
-	}
+	if (ret)
+		goto out_bo_free;
 
-	bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES);
-	spin_lock_init(&chunk->lock);
+	ptr = memremap_pages(&chunk->pagemap, numa_node_id());
+	if (IS_ERR(ptr)) {
+		ret = PTR_ERR(ptr);
+		goto out_bo_unpin;
+	}
 
-out:
 	mutex_lock(&drm->dmem->mutex);
-	if (chunk->bo)
-		list_add(&chunk->list, &drm->dmem->chunk_empty);
-	else
-		list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
+	list_add(&chunk->list, &drm->dmem->chunks);
 	mutex_unlock(&drm->dmem->mutex);
 
-	return ret;
-}
-
-static struct nouveau_dmem_chunk *
-nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm)
-{
-	struct nouveau_dmem_chunk *chunk;
-
-	chunk = list_first_entry_or_null(&drm->dmem->chunk_free,
-					 struct nouveau_dmem_chunk,
-					 list);
-	if (chunk)
-		return chunk;
-
-	chunk = list_first_entry_or_null(&drm->dmem->chunk_empty,
-					 struct nouveau_dmem_chunk,
-					 list);
-	if (chunk->bo)
-		return chunk;
-
-	return NULL;
-}
-
-static int
-nouveau_dmem_pages_alloc(struct nouveau_drm *drm,
-			 unsigned long npages,
-			 unsigned long *pages)
-{
-	struct nouveau_dmem_chunk *chunk;
-	unsigned long c;
-	int ret;
-
-	memset(pages, 0xff, npages * sizeof(*pages));
-
-	mutex_lock(&drm->dmem->mutex);
-	for (c = 0; c < npages;) {
-		unsigned long i;
-
-		chunk = nouveau_dmem_chunk_first_free_locked(drm);
-		if (chunk == NULL) {
-			mutex_unlock(&drm->dmem->mutex);
-			ret = nouveau_dmem_chunk_alloc(drm);
-			if (ret) {
-				if (c)
-					return 0;
-				return ret;
-			}
-			mutex_lock(&drm->dmem->mutex);
-			continue;
-		}
-
-		spin_lock(&chunk->lock);
-		i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES);
-		while (i < DMEM_CHUNK_NPAGES && c < npages) {
-			pages[c] = chunk->pfn_first + i;
-			set_bit(i, chunk->bitmap);
-			chunk->callocated++;
-			c++;
-
-			i = find_next_zero_bit(chunk->bitmap,
-					DMEM_CHUNK_NPAGES, i);
-		}
-		spin_unlock(&chunk->lock);
+	pfn_first = chunk->pagemap.res.start >> PAGE_SHIFT;
+	page = pfn_to_page(pfn_first);
+	spin_lock(&drm->dmem->lock);
+	for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
+		page->zone_device_data = drm->dmem->free_pages;
+		drm->dmem->free_pages = page;
 	}
-	mutex_unlock(&drm->dmem->mutex);
+	*ppage = page;
+	chunk->callocated++;
+	spin_unlock(&drm->dmem->lock);
+
+	NV_INFO(drm, "DMEM: registered %ldMB of device memory\n",
+		DMEM_CHUNK_SIZE >> 20);
 
 	return 0;
+
+out_bo_unpin:
+	nouveau_bo_unpin(chunk->bo);
+out_bo_free:
+	nouveau_bo_ref(NULL, &chunk->bo);
+out_release:
+	release_mem_region(chunk->pagemap.res.start,
+			   resource_size(&chunk->pagemap.res));
+out_free:
+	kfree(chunk);
+out:
+	return ret;
 }
 
 static struct page *
 nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
 {
-	unsigned long pfns[1];
-	struct page *page;
+	struct nouveau_dmem_chunk *chunk;
+	struct page *page = NULL;
 	int ret;
 
-	/* FIXME stop all the miss-match API ... */
-	ret = nouveau_dmem_pages_alloc(drm, 1, pfns);
-	if (ret)
-		return NULL;
+	spin_lock(&drm->dmem->lock);
+	if (drm->dmem->free_pages) {
+		page = drm->dmem->free_pages;
+		drm->dmem->free_pages = page->zone_device_data;
+		chunk = nouveau_page_to_chunk(page);
+		chunk->callocated++;
+		spin_unlock(&drm->dmem->lock);
+	} else {
+		spin_unlock(&drm->dmem->lock);
+		ret = nouveau_dmem_chunk_alloc(drm, &page);
+		if (ret)
+			return NULL;
+	}
 
-	page = pfn_to_page(pfns[0]);
 	get_page(page);
 	lock_page(page);
 	return page;
@@ -356,12 +335,7 @@ nouveau_dmem_resume(struct nouveau_drm *drm)
 		return;
 
 	mutex_lock(&drm->dmem->mutex);
-	list_for_each_entry (chunk, &drm->dmem->chunk_free, list) {
-		ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
-		/* FIXME handle pin failure */
-		WARN_ON(ret);
-	}
-	list_for_each_entry (chunk, &drm->dmem->chunk_full, list) {
+	list_for_each_entry(chunk, &drm->dmem->chunks, list) {
 		ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
 		/* FIXME handle pin failure */
 		WARN_ON(ret);
@@ -378,12 +352,8 @@ nouveau_dmem_suspend(struct nouveau_drm *drm)
 		return;
 
 	mutex_lock(&drm->dmem->mutex);
-	list_for_each_entry (chunk, &drm->dmem->chunk_free, list) {
-		nouveau_bo_unpin(chunk->bo);
-	}
-	list_for_each_entry (chunk, &drm->dmem->chunk_full, list) {
+	list_for_each_entry(chunk, &drm->dmem->chunks, list)
 		nouveau_bo_unpin(chunk->bo);
-	}
 	mutex_unlock(&drm->dmem->mutex);
 }
 
@@ -397,15 +367,13 @@ nouveau_dmem_fini(struct nouveau_drm *drm)
 
 	mutex_lock(&drm->dmem->mutex);
 
-	WARN_ON(!list_empty(&drm->dmem->chunk_free));
-	WARN_ON(!list_empty(&drm->dmem->chunk_full));
-
-	list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) {
-		if (chunk->bo) {
-			nouveau_bo_unpin(chunk->bo);
-			nouveau_bo_ref(NULL, &chunk->bo);
-		}
+	list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) {
+		nouveau_bo_unpin(chunk->bo);
+		nouveau_bo_ref(NULL, &chunk->bo);
 		list_del(&chunk->list);
+		memunmap_pages(&chunk->pagemap);
+		release_mem_region(chunk->pagemap.res.start,
+				   resource_size(&chunk->pagemap.res));
 		kfree(chunk);
 	}
 
@@ -472,6 +440,52 @@ nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
 }
 
 static int
+nvc0b5_migrate_clear(struct nouveau_drm *drm, u32 length,
+		     enum nouveau_aper dst_aper, u64 dst_addr)
+{
+	struct nouveau_channel *chan = drm->dmem->migrate.chan;
+	u32 launch_dma = (1 << 10) /* REMAP_ENABLE_TRUE */ |
+			 (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ |
+			 (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ |
+			 (1 << 2) /* FLUSH_ENABLE_TRUE. */ |
+			 (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */;
+	u32 remap = (4 <<  0) /* DST_X_CONST_A */ |
+		    (5 <<  4) /* DST_Y_CONST_B */ |
+		    (3 << 16) /* COMPONENT_SIZE_FOUR */ |
+		    (1 << 24) /* NUM_DST_COMPONENTS_TWO */;
+	int ret;
+
+	ret = RING_SPACE(chan, 12);
+	if (ret)
+		return ret;
+
+	switch (dst_aper) {
+	case NOUVEAU_APER_VRAM:
+		BEGIN_IMC0(chan, NvSubCopy, 0x0264, 0);
+			break;
+	case NOUVEAU_APER_HOST:
+		BEGIN_IMC0(chan, NvSubCopy, 0x0264, 1);
+		break;
+	default:
+		return -EINVAL;
+	}
+	launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */
+
+	BEGIN_NVC0(chan, NvSubCopy, 0x0700, 3);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, remap);
+	BEGIN_NVC0(chan, NvSubCopy, 0x0408, 2);
+	OUT_RING(chan, upper_32_bits(dst_addr));
+	OUT_RING(chan, lower_32_bits(dst_addr));
+	BEGIN_NVC0(chan, NvSubCopy, 0x0418, 1);
+	OUT_RING(chan, length >> 3);
+	BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
+	OUT_RING(chan, launch_dma);
+	return 0;
+}
+
+static int
 nouveau_dmem_migrate_init(struct nouveau_drm *drm)
 {
 	switch (drm->ttm.copy.oclass) {
@@ -480,6 +494,7 @@ nouveau_dmem_migrate_init(struct nouveau_drm *drm)
 	case  VOLTA_DMA_COPY_A:
 	case TURING_DMA_COPY_A:
 		drm->dmem->migrate.copy_func = nvc0b5_migrate_copy;
+		drm->dmem->migrate.clear_func = nvc0b5_migrate_clear;
 		drm->dmem->migrate.chan = drm->ttm.chan;
 		return 0;
 	default:
@@ -491,9 +506,6 @@ nouveau_dmem_migrate_init(struct nouveau_drm *drm)
 void
 nouveau_dmem_init(struct nouveau_drm *drm)
 {
-	struct device *device = drm->dev->dev;
-	struct resource *res;
-	unsigned long i, size, pfn_first;
 	int ret;
 
 	/* This only make sense on PASCAL or newer */
@@ -505,84 +517,53 @@ nouveau_dmem_init(struct nouveau_drm *drm)
 
 	drm->dmem->drm = drm;
 	mutex_init(&drm->dmem->mutex);
-	INIT_LIST_HEAD(&drm->dmem->chunk_free);
-	INIT_LIST_HEAD(&drm->dmem->chunk_full);
-	INIT_LIST_HEAD(&drm->dmem->chunk_empty);
-
-	size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE);
+	INIT_LIST_HEAD(&drm->dmem->chunks);
+	mutex_init(&drm->dmem->mutex);
+	spin_lock_init(&drm->dmem->lock);
 
 	/* Initialize migration dma helpers before registering memory */
 	ret = nouveau_dmem_migrate_init(drm);
-	if (ret)
-		goto out_free;
-
-	/*
-	 * FIXME we need some kind of policy to decide how much VRAM we
-	 * want to register with HMM. For now just register everything
-	 * and latter if we want to do thing like over commit then we
-	 * could revisit this.
-	 */
-	res = devm_request_free_mem_region(device, &iomem_resource, size);
-	if (IS_ERR(res))
-		goto out_free;
-	drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
-	drm->dmem->pagemap.res = *res;
-	drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops;
-	drm->dmem->pagemap.owner = drm->dev;
-	if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap)))
-		goto out_free;
-
-	pfn_first = res->start >> PAGE_SHIFT;
-	for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) {
-		struct nouveau_dmem_chunk *chunk;
-		struct page *page;
-		unsigned long j;
-
-		chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
-		if (chunk == NULL) {
-			nouveau_dmem_fini(drm);
-			return;
-		}
-
-		chunk->drm = drm;
-		chunk->pfn_first = pfn_first + (i * DMEM_CHUNK_NPAGES);
-		list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
-
-		page = pfn_to_page(chunk->pfn_first);
-		for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page)
-			page->zone_device_data = chunk;
+	if (ret) {
+		kfree(drm->dmem);
+		drm->dmem = NULL;
 	}
-
-	NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20);
-	return;
-out_free:
-	kfree(drm->dmem);
-	drm->dmem = NULL;
 }
 
 static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
-		unsigned long src, dma_addr_t *dma_addr)
+		unsigned long src, dma_addr_t *dma_addr, u64 *pfn)
 {
 	struct device *dev = drm->dev->dev;
 	struct page *dpage, *spage;
+	unsigned long paddr;
 
 	spage = migrate_pfn_to_page(src);
-	if (!spage || !(src & MIGRATE_PFN_MIGRATE))
+	if (!(src & MIGRATE_PFN_MIGRATE))
 		goto out;
 
 	dpage = nouveau_dmem_page_alloc_locked(drm);
 	if (!dpage)
-		return 0;
-
-	*dma_addr = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(dev, *dma_addr))
-		goto out_free_page;
+		goto out;
 
-	if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_VRAM,
-			nouveau_dmem_page_addr(dpage), NOUVEAU_APER_HOST,
-			*dma_addr))
-		goto out_dma_unmap;
+	paddr = nouveau_dmem_page_addr(dpage);
+	if (spage) {
+		*dma_addr = dma_map_page(dev, spage, 0, page_size(spage),
+					 DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, *dma_addr))
+			goto out_free_page;
+		if (drm->dmem->migrate.copy_func(drm, page_size(spage),
+			NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, *dma_addr))
+			goto out_dma_unmap;
+	} else {
+		*dma_addr = DMA_MAPPING_ERROR;
+		if (drm->dmem->migrate.clear_func(drm, page_size(dpage),
+			NOUVEAU_APER_VRAM, paddr))
+			goto out_free_page;
+	}
 
+	*pfn = NVIF_VMM_PFNMAP_V0_V | NVIF_VMM_PFNMAP_V0_VRAM |
+		((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT);
+	if (src & MIGRATE_PFN_WRITE)
+		*pfn |= NVIF_VMM_PFNMAP_V0_W;
 	return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
 
 out_dma_unmap:
@@ -590,19 +571,21 @@ out_dma_unmap:
 out_free_page:
 	nouveau_dmem_page_free_locked(drm, dpage);
 out:
+	*pfn = NVIF_VMM_PFNMAP_V0_NONE;
 	return 0;
 }
 
 static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
-		struct migrate_vma *args, dma_addr_t *dma_addrs)
+		struct nouveau_svmm *svmm, struct migrate_vma *args,
+		dma_addr_t *dma_addrs, u64 *pfns)
 {
 	struct nouveau_fence *fence;
 	unsigned long addr = args->start, nr_dma = 0, i;
 
 	for (i = 0; addr < args->end; i++) {
 		args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->src[i],
-				dma_addrs + nr_dma);
-		if (args->dst[i])
+				dma_addrs + nr_dma, pfns + i);
+		if (!dma_mapping_error(drm->dev->dev, dma_addrs[nr_dma]))
 			nr_dma++;
 		addr += PAGE_SIZE;
 	}
@@ -610,20 +593,18 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
 	nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
 	migrate_vma_pages(args);
 	nouveau_dmem_fence_done(&fence);
+	nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
 
 	while (nr_dma--) {
 		dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE,
 				DMA_BIDIRECTIONAL);
 	}
-	/*
-	 * FIXME optimization: update GPU page table to point to newly migrated
-	 * memory.
-	 */
 	migrate_vma_finalize(args);
 }
 
 int
 nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
+			 struct nouveau_svmm *svmm,
 			 struct vm_area_struct *vma,
 			 unsigned long start,
 			 unsigned long end)
@@ -635,9 +616,13 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 		.vma		= vma,
 		.start		= start,
 	};
-	unsigned long c, i;
+	unsigned long i;
+	u64 *pfns;
 	int ret = -ENOMEM;
 
+	if (drm->dmem == NULL)
+		return -ENODEV;
+
 	args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL);
 	if (!args.src)
 		goto out;
@@ -649,19 +634,25 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 	if (!dma_addrs)
 		goto out_free_dst;
 
-	for (i = 0; i < npages; i += c) {
-		c = min(SG_MAX_SINGLE_ALLOC, npages);
-		args.end = start + (c << PAGE_SHIFT);
+	pfns = nouveau_pfns_alloc(max);
+	if (!pfns)
+		goto out_free_dma;
+
+	for (i = 0; i < npages; i += max) {
+		args.end = start + (max << PAGE_SHIFT);
 		ret = migrate_vma_setup(&args);
 		if (ret)
-			goto out_free_dma;
+			goto out_free_pfns;
 
 		if (args.cpages)
-			nouveau_dmem_migrate_chunk(drm, &args, dma_addrs);
+			nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_addrs,
+						   pfns);
 		args.start = args.end;
 	}
 
 	ret = 0;
+out_free_pfns:
+	nouveau_pfns_free(pfns);
 out_free_dma:
 	kfree(dma_addrs);
 out_free_dst:
@@ -671,28 +662,3 @@ out_free_src:
 out:
 	return ret;
 }
-
-void
-nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
-			 struct hmm_range *range)
-{
-	unsigned long i, npages;
-
-	npages = (range->end - range->start) >> PAGE_SHIFT;
-	for (i = 0; i < npages; ++i) {
-		struct page *page;
-		uint64_t addr;
-
-		page = hmm_device_entry_to_page(range, range->pfns[i]);
-		if (page == NULL)
-			continue;
-
-		if (!is_device_private_page(page))
-			continue;
-
-		addr = nouveau_dmem_page_addr(page);
-		range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
-		range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
-		range->pfns[i] |= NVIF_VMM_PFNMAP_V0_VRAM;
-	}
-}
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.h b/drivers/gpu/drm/nouveau/nouveau_dmem.h
index 92394be5d649..64da5d3635c8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.h
@@ -25,6 +25,7 @@
 struct drm_device;
 struct drm_file;
 struct nouveau_drm;
+struct nouveau_svmm;
 struct hmm_range;
 
 #if IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM)
@@ -34,12 +35,12 @@ void nouveau_dmem_suspend(struct nouveau_drm *);
 void nouveau_dmem_resume(struct nouveau_drm *);
 
 int nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
+			     struct nouveau_svmm *svmm,
 			     struct vm_area_struct *vma,
 			     unsigned long start,
 			     unsigned long end);
+unsigned long nouveau_dmem_page_addr(struct page *page);
 
-void nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
-			      struct hmm_range *range);
 #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
 static inline void nouveau_dmem_init(struct nouveau_drm *drm) {}
 static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {}
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 2674f1587457..8a0f7994e1ae 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -98,3 +98,34 @@ nouveau_dp_detect(struct nouveau_encoder *nv_encoder)
 		return NOUVEAU_DP_SST;
 	return ret;
 }
+
+/* TODO:
+ * - Use the minimum possible BPC here, once we add support for the max bpc
+ *   property.
+ * - Validate the mode against downstream port caps (see
+ *   drm_dp_downstream_max_clock())
+ * - Validate against the DP caps advertised by the GPU (we don't check these
+ *   yet)
+ */
+enum drm_mode_status
+nv50_dp_mode_valid(struct drm_connector *connector,
+		   struct nouveau_encoder *outp,
+		   const struct drm_display_mode *mode,
+		   unsigned *out_clock)
+{
+	const unsigned min_clock = 25000;
+	unsigned max_clock, clock;
+	enum drm_mode_status ret;
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE && !outp->caps.dp_interlace)
+		return MODE_NO_INTERLACE;
+
+	max_clock = outp->dp.link_nr * outp->dp.link_bw;
+	clock = mode->clock * (connector->display_info.bpc * 3) / 10;
+
+	ret = nouveau_conn_mode_clock_valid(mode, min_clock, max_clock,
+					    &clock);
+	if (out_clock)
+		*out_clock = clock;
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index ca4087f5a15b..ac93d12201dc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -681,8 +681,6 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
 {
 	struct nvkm_device *device;
 	struct drm_device *drm_dev;
-	struct apertures_struct *aper;
-	bool boot = false;
 	int ret;
 
 	if (vga_switcheroo_client_probe_defer(pdev))
@@ -699,32 +697,9 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
 	nvkm_device_del(&device);
 
 	/* Remove conflicting drivers (vesafb, efifb etc). */
-	aper = alloc_apertures(3);
-	if (!aper)
-		return -ENOMEM;
-
-	aper->ranges[0].base = pci_resource_start(pdev, 1);
-	aper->ranges[0].size = pci_resource_len(pdev, 1);
-	aper->count = 1;
-
-	if (pci_resource_len(pdev, 2)) {
-		aper->ranges[aper->count].base = pci_resource_start(pdev, 2);
-		aper->ranges[aper->count].size = pci_resource_len(pdev, 2);
-		aper->count++;
-	}
-
-	if (pci_resource_len(pdev, 3)) {
-		aper->ranges[aper->count].base = pci_resource_start(pdev, 3);
-		aper->ranges[aper->count].size = pci_resource_len(pdev, 3);
-		aper->count++;
-	}
-
-#ifdef CONFIG_X86
-	boot = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
-#endif
-	if (nouveau_modeset != 2)
-		drm_fb_helper_remove_conflicting_framebuffers(aper, "nouveaufb", boot);
-	kfree(aper);
+	ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "nouveaufb");
+	if (ret)
+		return ret;
 
 	ret = nvkm_device_pci_new(pdev, nouveau_config, nouveau_debug,
 				  true, true, ~0ULL, &device);
diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index 3517f920bf89..de51733b0476 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h
@@ -66,6 +66,10 @@ struct nouveau_encoder {
 		} dp;
 	};
 
+	struct {
+		bool dp_interlace : 1;
+	} caps;
+
 	void (*enc_save)(struct drm_encoder *encoder);
 	void (*enc_restore)(struct drm_encoder *encoder);
 	void (*update)(struct nouveau_encoder *, u8 head,
@@ -100,6 +104,10 @@ enum nouveau_dp_status {
 };
 
 int nouveau_dp_detect(struct nouveau_encoder *);
+enum drm_mode_status nv50_dp_mode_valid(struct drm_connector *,
+					struct nouveau_encoder *,
+					const struct drm_display_mode *,
+					unsigned *clock);
 
 struct nouveau_connector *
 nouveau_encoder_connector_get(struct nouveau_encoder *encoder);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 24d543a01f43..3d11b84d4cf9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -312,7 +312,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nvif_device *device = &drm->client.device;
 	struct fb_info *info;
-	struct nouveau_framebuffer *fb;
+	struct drm_framebuffer *fb;
 	struct nouveau_channel *chan;
 	struct nouveau_bo *nvbo;
 	struct drm_mode_fb_cmd2 mode_cmd;
@@ -335,7 +335,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 		goto out;
 	}
 
-	ret = nouveau_framebuffer_new(dev, &mode_cmd, nvbo, &fb);
+	ret = nouveau_framebuffer_new(dev, &mode_cmd, &nvbo->bo.base, &fb);
 	if (ret)
 		goto out_unref;
 
@@ -353,7 +353,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 
 	chan = nouveau_nofbaccel ? NULL : drm->channel;
 	if (chan && device->info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_vma_new(nvbo, chan->vmm, &fb->vma);
+		ret = nouveau_vma_new(nvbo, chan->vmm, &fbcon->vma);
 		if (ret) {
 			NV_ERROR(drm, "failed to map fb into chan: %d\n", ret);
 			chan = NULL;
@@ -367,7 +367,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 	}
 
 	/* setup helper */
-	fbcon->helper.fb = &fb->base;
+	fbcon->helper.fb = fb;
 
 	if (!chan)
 		info->flags = FBINFO_HWACCEL_DISABLED;
@@ -376,12 +376,12 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 			      FBINFO_HWACCEL_FILLRECT |
 			      FBINFO_HWACCEL_IMAGEBLIT;
 	info->fbops = &nouveau_fbcon_sw_ops;
-	info->fix.smem_start = fb->nvbo->bo.mem.bus.base +
-			       fb->nvbo->bo.mem.bus.offset;
-	info->fix.smem_len = fb->nvbo->bo.mem.num_pages << PAGE_SHIFT;
+	info->fix.smem_start = nvbo->bo.mem.bus.base +
+			       nvbo->bo.mem.bus.offset;
+	info->fix.smem_len = nvbo->bo.mem.num_pages << PAGE_SHIFT;
 
-	info->screen_base = nvbo_kmap_obj_iovirtual(fb->nvbo);
-	info->screen_size = fb->nvbo->bo.mem.num_pages << PAGE_SHIFT;
+	info->screen_base = nvbo_kmap_obj_iovirtual(nvbo);
+	info->screen_size = nvbo->bo.mem.num_pages << PAGE_SHIFT;
 
 	drm_fb_helper_fill_info(info, &fbcon->helper, sizes);
 
@@ -393,19 +393,19 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 
 	/* To allow resizeing without swapping buffers */
 	NV_INFO(drm, "allocated %dx%d fb: 0x%llx, bo %p\n",
-		fb->base.width, fb->base.height, fb->nvbo->bo.offset, nvbo);
+		fb->width, fb->height, nvbo->bo.offset, nvbo);
 
 	vga_switcheroo_client_fb_set(dev->pdev, info);
 	return 0;
 
 out_unlock:
 	if (chan)
-		nouveau_vma_del(&fb->vma);
-	nouveau_bo_unmap(fb->nvbo);
+		nouveau_vma_del(&fbcon->vma);
+	nouveau_bo_unmap(nvbo);
 out_unpin:
-	nouveau_bo_unpin(fb->nvbo);
+	nouveau_bo_unpin(nvbo);
 out_unref:
-	nouveau_bo_ref(NULL, &fb->nvbo);
+	nouveau_bo_ref(NULL, &nvbo);
 out:
 	return ret;
 }
@@ -413,16 +413,18 @@ out:
 static int
 nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
 {
-	struct nouveau_framebuffer *nouveau_fb = nouveau_framebuffer(fbcon->helper.fb);
+	struct drm_framebuffer *fb = fbcon->helper.fb;
+	struct nouveau_bo *nvbo;
 
 	drm_fb_helper_unregister_fbi(&fbcon->helper);
 	drm_fb_helper_fini(&fbcon->helper);
 
-	if (nouveau_fb && nouveau_fb->nvbo) {
-		nouveau_vma_del(&nouveau_fb->vma);
-		nouveau_bo_unmap(nouveau_fb->nvbo);
-		nouveau_bo_unpin(nouveau_fb->nvbo);
-		drm_framebuffer_put(&nouveau_fb->base);
+	if (fb && fb->obj[0]) {
+		nvbo = nouveau_gem_object(fb->obj[0]);
+		nouveau_vma_del(&fbcon->vma);
+		nouveau_bo_unmap(nvbo);
+		nouveau_bo_unpin(nvbo);
+		drm_framebuffer_put(fb);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
index 73a7eeba3973..1796d8824580 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
@@ -31,6 +31,8 @@
 
 #include "nouveau_display.h"
 
+struct nouveau_vma;
+
 struct nouveau_fbdev {
 	struct drm_fb_helper helper; /* must be first */
 	unsigned int saved_flags;
@@ -41,6 +43,7 @@ struct nouveau_fbdev {
 	struct nvif_object gdi;
 	struct nvif_object blit;
 	struct nvif_object twod;
+	struct nouveau_vma *vma;
 
 	struct mutex hotplug_lock;
 	bool hotplug_waiting;
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index f5ece1f94973..4c3f131ad31d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -76,8 +76,10 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv)
 		return ret;
 
 	ret = pm_runtime_get_sync(dev);
-	if (ret < 0 && ret != -EACCES)
+	if (ret < 0 && ret != -EACCES) {
+		pm_runtime_put_autosuspend(dev);
 		goto out;
+	}
 
 	ret = nouveau_vma_new(nvbo, vmm, &vma);
 	pm_runtime_mark_last_busy(dev);
@@ -157,8 +159,8 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv)
 			if (!WARN_ON(ret < 0 && ret != -EACCES)) {
 				nouveau_gem_object_unmap(nvbo, vma);
 				pm_runtime_mark_last_busy(dev);
-				pm_runtime_put_autosuspend(dev);
 			}
+			pm_runtime_put_autosuspend(dev);
 		}
 	}
 	ttm_bo_unreserve(&nvbo->bo);
diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 039e23548e08..23cd43a7fd19 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c
@@ -95,14 +95,3 @@ struct platform_driver nouveau_platform_driver = {
 	.probe = nouveau_platform_probe,
 	.remove = nouveau_platform_remove,
 };
-
-#if IS_ENABLED(CONFIG_ARCH_TEGRA_124_SOC) || IS_ENABLED(CONFIG_ARCH_TEGRA_132_SOC)
-MODULE_FIRMWARE("nvidia/gk20a/fecs_data.bin");
-MODULE_FIRMWARE("nvidia/gk20a/fecs_inst.bin");
-MODULE_FIRMWARE("nvidia/gk20a/gpccs_data.bin");
-MODULE_FIRMWARE("nvidia/gk20a/gpccs_inst.bin");
-MODULE_FIRMWARE("nvidia/gk20a/sw_bundle_init.bin");
-MODULE_FIRMWARE("nvidia/gk20a/sw_ctx.bin");
-MODULE_FIRMWARE("nvidia/gk20a/sw_method_init.bin");
-MODULE_FIRMWARE("nvidia/gk20a/sw_nonctx.bin");
-#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 645fedd77e21..22f054f7ee3e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -70,6 +70,12 @@ struct nouveau_svm {
 #define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a)
 #define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a)
 
+struct nouveau_pfnmap_args {
+	struct nvif_ioctl_v0 i;
+	struct nvif_ioctl_mthd_v0 m;
+	struct nvif_vmm_pfnmap_v0 p;
+};
+
 struct nouveau_ivmm {
 	struct nouveau_svmm *svmm;
 	u64 inst;
@@ -187,7 +193,8 @@ nouveau_svmm_bind(struct drm_device *dev, void *data,
 		addr = max(addr, vma->vm_start);
 		next = min(vma->vm_end, end);
 		/* This is a best effort so we ignore errors */
-		nouveau_dmem_migrate_vma(cli->drm, vma, addr, next);
+		nouveau_dmem_migrate_vma(cli->drm, cli->svm.svmm, vma, addr,
+					 next);
 		addr = next;
 	}
 
@@ -369,19 +376,6 @@ out_free:
 	return ret;
 }
 
-static const u64
-nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = {
-	[HMM_PFN_VALID         ] = NVIF_VMM_PFNMAP_V0_V,
-	[HMM_PFN_WRITE         ] = NVIF_VMM_PFNMAP_V0_W,
-};
-
-static const u64
-nouveau_svm_pfn_values[HMM_PFN_VALUE_MAX] = {
-	[HMM_PFN_ERROR  ] = ~NVIF_VMM_PFNMAP_V0_V,
-	[HMM_PFN_NONE   ] =  NVIF_VMM_PFNMAP_V0_NONE,
-	[HMM_PFN_SPECIAL] = ~NVIF_VMM_PFNMAP_V0_V,
-};
-
 /* Issue fault replay for GPU to retry accesses that faulted previously. */
 static void
 nouveau_svm_fault_replay(struct nouveau_svm *svm)
@@ -519,9 +513,45 @@ static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
 	.invalidate = nouveau_svm_range_invalidate,
 };
 
+static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
+				    struct hmm_range *range, u64 *ioctl_addr)
+{
+	unsigned long i, npages;
+
+	/*
+	 * The ioctl_addr prepared here is passed through nvif_object_ioctl()
+	 * to an eventual DMA map in something like gp100_vmm_pgt_pfn()
+	 *
+	 * This is all just encoding the internal hmm representation into a
+	 * different nouveau internal representation.
+	 */
+	npages = (range->end - range->start) >> PAGE_SHIFT;
+	for (i = 0; i < npages; ++i) {
+		struct page *page;
+
+		if (!(range->hmm_pfns[i] & HMM_PFN_VALID)) {
+			ioctl_addr[i] = 0;
+			continue;
+		}
+
+		page = hmm_pfn_to_page(range->hmm_pfns[i]);
+		if (is_device_private_page(page))
+			ioctl_addr[i] = nouveau_dmem_page_addr(page) |
+					NVIF_VMM_PFNMAP_V0_V |
+					NVIF_VMM_PFNMAP_V0_VRAM;
+		else
+			ioctl_addr[i] = page_to_phys(page) |
+					NVIF_VMM_PFNMAP_V0_V |
+					NVIF_VMM_PFNMAP_V0_HOST;
+		if (range->hmm_pfns[i] & HMM_PFN_WRITE)
+			ioctl_addr[i] |= NVIF_VMM_PFNMAP_V0_W;
+	}
+}
+
 static int nouveau_range_fault(struct nouveau_svmm *svmm,
 			       struct nouveau_drm *drm, void *data, u32 size,
-			       u64 *pfns, struct svm_notifier *notifier)
+			       unsigned long hmm_pfns[], u64 *ioctl_addr,
+			       struct svm_notifier *notifier)
 {
 	unsigned long timeout =
 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
@@ -530,26 +560,27 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
 		.notifier = &notifier->notifier,
 		.start = notifier->notifier.interval_tree.start,
 		.end = notifier->notifier.interval_tree.last + 1,
-		.pfns = pfns,
-		.flags = nouveau_svm_pfn_flags,
-		.values = nouveau_svm_pfn_values,
-		.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT,
+		.pfn_flags_mask = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE,
+		.hmm_pfns = hmm_pfns,
 	};
 	struct mm_struct *mm = notifier->notifier.mm;
-	long ret;
+	int ret;
 
 	while (true) {
 		if (time_after(jiffies, timeout))
 			return -EBUSY;
 
 		range.notifier_seq = mmu_interval_read_begin(range.notifier);
-		range.default_flags = 0;
-		range.pfn_flags_mask = -1UL;
 		down_read(&mm->mmap_sem);
 		ret = hmm_range_fault(&range);
 		up_read(&mm->mmap_sem);
-		if (ret <= 0) {
-			if (ret == 0 || ret == -EBUSY)
+		if (ret) {
+			/*
+			 * FIXME: the input PFN_REQ flags are destroyed on
+			 * -EBUSY, we need to regenerate them, also for the
+			 * other continue below
+			 */
+			if (ret == -EBUSY)
 				continue;
 			return ret;
 		}
@@ -563,7 +594,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
 		break;
 	}
 
-	nouveau_dmem_convert_pfn(drm, &range);
+	nouveau_hmm_convert_pfn(drm, &range, ioctl_addr);
 
 	svmm->vmm->vmm.object.client->super = true;
 	ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL);
@@ -590,6 +621,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
 		} i;
 		u64 phys[16];
 	} args;
+	unsigned long hmm_pfns[ARRAY_SIZE(args.phys)];
 	struct vm_area_struct *vma;
 	u64 inst, start, limit;
 	int fi, fn, pi, fill;
@@ -705,12 +737,17 @@ nouveau_svm_fault(struct nvif_notify *notify)
 			 * access flags.
 			 *XXX: atomic?
 			 */
-			if (buffer->fault[fn]->access != 0 /* READ. */ &&
-			    buffer->fault[fn]->access != 3 /* PREFETCH. */) {
-				args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V |
-						  NVIF_VMM_PFNMAP_V0_W;
-			} else {
-				args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V;
+			switch (buffer->fault[fn]->access) {
+			case 0: /* READ. */
+				hmm_pfns[pi++] = HMM_PFN_REQ_FAULT;
+				break;
+			case 3: /* PREFETCH. */
+				hmm_pfns[pi++] = 0;
+				break;
+			default:
+				hmm_pfns[pi++] = HMM_PFN_REQ_FAULT |
+						 HMM_PFN_REQ_WRITE;
+				break;
 			}
 			args.i.p.size = pi << PAGE_SHIFT;
 
@@ -738,7 +775,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
 			fill = (buffer->fault[fn    ]->addr -
 				buffer->fault[fn - 1]->addr) >> PAGE_SHIFT;
 			while (--fill)
-				args.phys[pi++] = NVIF_VMM_PFNMAP_V0_NONE;
+				hmm_pfns[pi++] = 0;
 		}
 
 		SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)",
@@ -754,7 +791,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
 			ret = nouveau_range_fault(
 				svmm, svm->drm, &args,
 				sizeof(args.i) + pi * sizeof(args.phys[0]),
-				args.phys, &notifier);
+				hmm_pfns, args.phys, &notifier);
 			mmu_interval_notifier_remove(&notifier.notifier);
 		}
 		mmput(mm);
@@ -784,6 +821,56 @@ nouveau_svm_fault(struct nvif_notify *notify)
 	return NVIF_NOTIFY_KEEP;
 }
 
+static struct nouveau_pfnmap_args *
+nouveau_pfns_to_args(void *pfns)
+{
+	return container_of(pfns, struct nouveau_pfnmap_args, p.phys);
+}
+
+u64 *
+nouveau_pfns_alloc(unsigned long npages)
+{
+	struct nouveau_pfnmap_args *args;
+
+	args = kzalloc(struct_size(args, p.phys, npages), GFP_KERNEL);
+	if (!args)
+		return NULL;
+
+	args->i.type = NVIF_IOCTL_V0_MTHD;
+	args->m.method = NVIF_VMM_V0_PFNMAP;
+	args->p.page = PAGE_SHIFT;
+
+	return args->p.phys;
+}
+
+void
+nouveau_pfns_free(u64 *pfns)
+{
+	struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
+
+	kfree(args);
+}
+
+void
+nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
+		 unsigned long addr, u64 *pfns, unsigned long npages)
+{
+	struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
+	int ret;
+
+	args->p.addr = addr;
+	args->p.size = npages << PAGE_SHIFT;
+
+	mutex_lock(&svmm->mutex);
+
+	svmm->vmm->vmm.object.client->super = true;
+	ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, sizeof(*args) +
+				npages * sizeof(args->p.phys[0]), NULL);
+	svmm->vmm->vmm.object.client->super = false;
+
+	mutex_unlock(&svmm->mutex);
+}
+
 static void
 nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.h b/drivers/gpu/drm/nouveau/nouveau_svm.h
index e839d8189461..f0fcd1b72e8b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.h
@@ -18,6 +18,11 @@ void nouveau_svmm_fini(struct nouveau_svmm **);
 int nouveau_svmm_join(struct nouveau_svmm *, u64 inst);
 void nouveau_svmm_part(struct nouveau_svmm *, u64 inst);
 int nouveau_svmm_bind(struct drm_device *, void *, struct drm_file *);
+
+u64 *nouveau_pfns_alloc(unsigned long npages);
+void nouveau_pfns_free(u64 *pfns);
+void nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
+		      unsigned long addr, u64 *pfns, unsigned long npages);
 #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
 static inline void nouveau_svm_init(struct nouveau_drm *drm) {}
 static inline void nouveau_svm_fini(struct nouveau_drm *drm) {}
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index facd18564e0d..47428f79ede8 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -149,7 +149,6 @@ int
 nv50_fbcon_accel_init(struct fb_info *info)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_framebuffer *fb = nouveau_framebuffer(nfbdev->helper.fb);
 	struct drm_device *dev = nfbdev->helper.dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_channel *chan = drm->channel;
@@ -240,8 +239,8 @@ nv50_fbcon_accel_init(struct fb_info *info)
 	OUT_RING(chan, info->fix.line_length);
 	OUT_RING(chan, info->var.xres_virtual);
 	OUT_RING(chan, info->var.yres_virtual);
-	OUT_RING(chan, upper_32_bits(fb->vma->addr));
-	OUT_RING(chan, lower_32_bits(fb->vma->addr));
+	OUT_RING(chan, upper_32_bits(nfbdev->vma->addr));
+	OUT_RING(chan, lower_32_bits(nfbdev->vma->addr));
 	BEGIN_NV04(chan, NvSub2D, 0x0230, 2);
 	OUT_RING(chan, format);
 	OUT_RING(chan, 1);
@@ -249,8 +248,8 @@ nv50_fbcon_accel_init(struct fb_info *info)
 	OUT_RING(chan, info->fix.line_length);
 	OUT_RING(chan, info->var.xres_virtual);
 	OUT_RING(chan, info->var.yres_virtual);
-	OUT_RING(chan, upper_32_bits(fb->vma->addr));
-	OUT_RING(chan, lower_32_bits(fb->vma->addr));
+	OUT_RING(chan, upper_32_bits(nfbdev->vma->addr));
+	OUT_RING(chan, lower_32_bits(nfbdev->vma->addr));
 	FIRE_RING(chan);
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
index c0deef4fe727..cb56163ed608 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
@@ -150,7 +150,6 @@ nvc0_fbcon_accel_init(struct fb_info *info)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
 	struct drm_device *dev = nfbdev->helper.dev;
-	struct nouveau_framebuffer *fb = nouveau_framebuffer(nfbdev->helper.fb);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_channel *chan = drm->channel;
 	int ret, format;
@@ -240,8 +239,8 @@ nvc0_fbcon_accel_init(struct fb_info *info)
 	OUT_RING  (chan, info->fix.line_length);
 	OUT_RING  (chan, info->var.xres_virtual);
 	OUT_RING  (chan, info->var.yres_virtual);
-	OUT_RING  (chan, upper_32_bits(fb->vma->addr));
-	OUT_RING  (chan, lower_32_bits(fb->vma->addr));
+	OUT_RING  (chan, upper_32_bits(nfbdev->vma->addr));
+	OUT_RING  (chan, lower_32_bits(nfbdev->vma->addr));
 	BEGIN_NVC0(chan, NvSub2D, 0x0230, 10);
 	OUT_RING  (chan, format);
 	OUT_RING  (chan, 1);
@@ -251,8 +250,8 @@ nvc0_fbcon_accel_init(struct fb_info *info)
 	OUT_RING  (chan, info->fix.line_length);
 	OUT_RING  (chan, info->var.xres_virtual);
 	OUT_RING  (chan, info->var.yres_virtual);
-	OUT_RING  (chan, upper_32_bits(fb->vma->addr));
-	OUT_RING  (chan, lower_32_bits(fb->vma->addr));
+	OUT_RING  (chan, upper_32_bits(nfbdev->vma->addr));
+	OUT_RING  (chan, lower_32_bits(nfbdev->vma->addr));
 	FIRE_RING (chan);
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/memory.c b/drivers/gpu/drm/nouveau/nvkm/core/memory.c
index 4cc186262d34..38130ef272d6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/memory.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/memory.c
@@ -140,7 +140,7 @@ nvkm_memory_new(struct nvkm_device *device, enum nvkm_memory_target target,
 {
 	struct nvkm_instmem *imem = device->imem;
 	struct nvkm_memory *memory;
-	int ret = -ENOSYS;
+	int ret;
 
 	if (unlikely(target != NVKM_MEM_TARGET_INST || !imem))
 		return -ENOSYS;
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
index 79a8f9d305c5..49d468b45d3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
@@ -221,3 +221,14 @@ nvkm_subdev_ctor(const struct nvkm_subdev_func *func,
 	__mutex_init(&subdev->mutex, name, &nvkm_subdev_lock_class[index]);
 	subdev->debug = nvkm_dbgopt(device->dbgopt, name);
 }
+
+int
+nvkm_subdev_new_(const struct nvkm_subdev_func *func,
+		 struct nvkm_device *device, int index,
+		 struct nvkm_subdev **psubdev)
+{
+	if (!(*psubdev = kzalloc(sizeof(**psubdev), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_subdev_ctor(func, device, index, *psubdev);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 8ebbe1656008..5b90c2a1bf3d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2924,6 +2924,20 @@ nvkm_device_del(struct nvkm_device **pdevice)
 	}
 }
 
+static inline bool
+nvkm_device_endianness(struct nvkm_device *device)
+{
+	u32 boot1 = nvkm_rd32(device, 0x000004) & 0x01000001;
+#ifdef __BIG_ENDIAN
+	if (!boot1)
+		return false;
+#else
+	if (boot1)
+		return false;
+#endif
+	return true;
+}
+
 int
 nvkm_device_ctor(const struct nvkm_device_func *func,
 		 const struct nvkm_device_quirk *quirk,
@@ -2934,8 +2948,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 {
 	struct nvkm_subdev *subdev;
 	u64 mmio_base, mmio_size;
-	u32 boot0, strap;
-	void __iomem *map;
+	u32 boot0, boot1, strap;
 	int ret = -EEXIST, i;
 	unsigned chipset;
 
@@ -2961,26 +2974,30 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 	mmio_base = device->func->resource_addr(device, 0);
 	mmio_size = device->func->resource_size(device, 0);
 
-	/* identify the chipset, and determine classes of subdev/engines */
-	if (detect) {
-		map = ioremap(mmio_base, 0x102000);
-		if (ret = -ENOMEM, map == NULL)
+	if (detect || mmio) {
+		device->pri = ioremap(mmio_base, mmio_size);
+		if (device->pri == NULL) {
+			nvdev_error(device, "unable to map PRI\n");
+			ret = -ENOMEM;
 			goto done;
+		}
+	}
 
+	/* identify the chipset, and determine classes of subdev/engines */
+	if (detect) {
 		/* switch mmio to cpu's native endianness */
-#ifndef __BIG_ENDIAN
-		if (ioread32_native(map + 0x000004) != 0x00000000) {
-#else
-		if (ioread32_native(map + 0x000004) == 0x00000000) {
-#endif
-			iowrite32_native(0x01000001, map + 0x000004);
-			ioread32_native(map);
+		if (!nvkm_device_endianness(device)) {
+			nvkm_wr32(device, 0x000004, 0x01000001);
+			nvkm_rd32(device, 0x000000);
+			if (!nvkm_device_endianness(device)) {
+				nvdev_error(device,
+					    "GPU not supported on big-endian\n");
+				ret = -ENOSYS;
+				goto done;
+			}
 		}
 
-		/* read boot0 and strapping information */
-		boot0 = ioread32_native(map + 0x000000);
-		strap = ioread32_native(map + 0x101000);
-		iounmap(map);
+		boot0 = nvkm_rd32(device, 0x000000);
 
 		/* chipset can be overridden for devel/testing purposes */
 		chipset = nvkm_longopt(device->cfgopt, "NvChipset", 0);
@@ -3138,6 +3155,17 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		nvdev_info(device, "NVIDIA %s (%08x)\n",
 			   device->chip->name, boot0);
 
+		/* vGPU detection */
+		boot1 = nvkm_rd32(device, 0x0000004);
+		if (device->card_type >= TU100 && (boot1 & 0x00030000)) {
+			nvdev_info(device, "vGPUs are not supported\n");
+			ret = -ENODEV;
+			goto done;
+		}
+
+		/* read strapping information */
+		strap = nvkm_rd32(device, 0x101000);
+
 		/* determine frequency of timing crystal */
 		if ( device->card_type <= NV_10 || device->chipset < 0x17 ||
 		    (device->chipset >= 0x20 && device->chipset < 0x25))
@@ -3158,15 +3186,6 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 	if (!device->name)
 		device->name = device->chip->name;
 
-	if (mmio) {
-		device->pri = ioremap(mmio_base, mmio_size);
-		if (!device->pri) {
-			nvdev_error(device, "unable to map PRI\n");
-			ret = -ENOMEM;
-			goto done;
-		}
-	}
-
 	mutex_init(&device->mutex);
 
 	for (i = 0; i < NVKM_SUBDEV_NR; i++) {
@@ -3254,6 +3273,10 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 
 	ret = 0;
 done:
+	if (device->pri && (!mmio || ret)) {
+		iounmap(device->pri);
+		device->pri = NULL;
+	}
 	mutex_unlock(&nv_devices_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index 0d584d0da59c..571687ba85b8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -47,6 +47,7 @@ nvkm-y += nvkm/engine/disp/dp.o
 
 nvkm-y += nvkm/engine/disp/hdagt215.o
 nvkm-y += nvkm/engine/disp/hdagf119.o
+nvkm-y += nvkm/engine/disp/hdagv100.o
 
 nvkm-y += nvkm/engine/disp/hdmi.o
 nvkm-y += nvkm/engine/disp/hdmig84.o
@@ -74,6 +75,8 @@ nvkm-y += nvkm/engine/disp/rootgp102.o
 nvkm-y += nvkm/engine/disp/rootgv100.o
 nvkm-y += nvkm/engine/disp/roottu102.o
 
+nvkm-y += nvkm/engine/disp/capsgv100.o
+
 nvkm-y += nvkm/engine/disp/channv50.o
 nvkm-y += nvkm/engine/disp/changf119.o
 nvkm-y += nvkm/engine/disp/changv100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/capsgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/capsgv100.c
new file mode 100644
index 000000000000..5026e530f4bb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/capsgv100.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define gv100_disp_caps(p) container_of((p), struct gv100_disp_caps, object)
+#include "rootnv50.h"
+
+struct gv100_disp_caps {
+	struct nvkm_object object;
+	struct nv50_disp *disp;
+};
+
+static int
+gv100_disp_caps_map(struct nvkm_object *object, void *argv, u32 argc,
+		    enum nvkm_object_map *type, u64 *addr, u64 *size)
+{
+	struct gv100_disp_caps *caps = gv100_disp_caps(object);
+	struct nvkm_device *device = caps->disp->base.engine.subdev.device;
+	*type = NVKM_OBJECT_MAP_IO;
+	*addr = 0x640000 + device->func->resource_addr(device, 0);
+	*size = 0x1000;
+	return 0;
+}
+
+static const struct nvkm_object_func
+gv100_disp_caps = {
+	.map = gv100_disp_caps_map,
+};
+
+int
+gv100_disp_caps_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	struct gv100_disp_caps *caps;
+
+	if (!(caps = kzalloc(sizeof(*caps), GFP_KERNEL)))
+		return -ENOMEM;
+	*pobject = &caps->object;
+
+	nvkm_object_ctor(&gv100_disp_caps, oclass, &caps->object);
+	caps->disp = disp;
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagf119.c
index 0fa0ec0a1de0..19d2d58344e4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagf119.c
@@ -24,10 +24,18 @@
 #include "ior.h"
 
 void
-gf119_hda_eld(struct nvkm_ior *ior, u8 *data, u8 size)
+gf119_hda_device_entry(struct nvkm_ior *ior, int head)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
-	const u32 soff = 0x030 * ior->id;
+	const u32 hoff = 0x800 * head;
+	nvkm_mask(device, 0x616548 + hoff, 0x00000070, head << 4);
+}
+
+void
+gf119_hda_eld(struct nvkm_ior *ior, int head, u8 *data, u8 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	const u32 soff = 0x030 * ior->id + (head * 0x04);
 	int i;
 
 	for (i = 0; i < size; i++)
@@ -41,14 +49,14 @@ void
 gf119_hda_hpd(struct nvkm_ior *ior, int head, bool present)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
-	const u32 hoff = 0x800 * head;
+	const u32 soff = 0x030 * ior->id + (head * 0x04);
 	u32 data = 0x80000000;
 	u32 mask = 0x80000001;
 	if (present) {
-		nvkm_mask(device, 0x616548 + hoff, 0x00000070, 0x00000000);
+		ior->func->hda.device_entry(ior, head);
 		data |= 0x00000001;
 	} else {
 		mask |= 0x00000002;
 	}
-	nvkm_mask(device, 0x10ec10 + ior->id * 0x030, mask, data);
+	nvkm_mask(device, 0x10ec10 + soff, mask, data);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
index 4509d2ba880e..0d1b81fe1093 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
@@ -24,7 +24,7 @@
 #include "ior.h"
 
 void
-gt215_hda_eld(struct nvkm_ior *ior, u8 *data, u8 size)
+gt215_hda_eld(struct nvkm_ior *ior, int head, u8 *data, u8 size)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 soff = ior->id * 0x800;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagv100.c
new file mode 100644
index 000000000000..57d374ecfeef
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagv100.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ior.h"
+
+void
+gv100_hda_device_entry(struct nvkm_ior *ior, int head)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	const u32 hoff = 0x800 * head;
+	nvkm_mask(device, 0x616528 + hoff, 0x00000070, head << 4);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
index 009d3a8b7a50..c1d7a36e4d3c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
@@ -87,7 +87,8 @@ struct nvkm_ior_func {
 
 	struct {
 		void (*hpd)(struct nvkm_ior *, int head, bool present);
-		void (*eld)(struct nvkm_ior *, u8 *data, u8 size);
+		void (*eld)(struct nvkm_ior *, int head, u8 *data, u8 size);
+		void (*device_entry)(struct nvkm_ior *, int head);
 	} hda;
 };
 
@@ -158,10 +159,13 @@ void gv100_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
 void gm200_hdmi_scdc(struct nvkm_ior *, int, u8);
 
 void gt215_hda_hpd(struct nvkm_ior *, int, bool);
-void gt215_hda_eld(struct nvkm_ior *, u8 *, u8);
+void gt215_hda_eld(struct nvkm_ior *, int, u8 *, u8);
 
 void gf119_hda_hpd(struct nvkm_ior *, int, bool);
-void gf119_hda_eld(struct nvkm_ior *, u8 *, u8);
+void gf119_hda_eld(struct nvkm_ior *, int, u8 *, u8);
+void gf119_hda_device_entry(struct nvkm_ior *, int);
+
+void gv100_hda_device_entry(struct nvkm_ior *, int);
 
 #define IOR_MSG(i,l,f,a...) do {                                               \
 	struct nvkm_ior *_ior = (i);                                           \
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c
index 9c658d632d37..47efb48d769a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_root_func
 gv100_disp_root = {
 	.user = {
+		{{-1,-1,GV100_DISP_CAPS                }, gv100_disp_caps_new },
 		{{0,0,GV100_DISP_CURSOR                }, gv100_disp_curs_new },
 		{{0,0,GV100_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new },
 		{{0,0,GV100_DISP_CORE_CHANNEL_DMA      }, gv100_disp_core_new },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
index 5f758948d6e1..a7672ef17d3b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
@@ -155,7 +155,7 @@ nv50_disp_root_mthd_(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 			if (outp->info.type == DCB_OUTPUT_DP)
 				ior->func->dp.audio(ior, hidx, true);
 			ior->func->hda.hpd(ior, hidx, true);
-			ior->func->hda.eld(ior, data, size);
+			ior->func->hda.eld(ior, hidx, data, size);
 		} else {
 			if (outp->info.type == DCB_OUTPUT_DP)
 				ior->func->dp.audio(ior, hidx, false);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
index a1f942793f98..7070f5408d92 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
@@ -24,6 +24,9 @@ int  nv50_disp_root_new_(const struct nv50_disp_root_func *, struct nvkm_disp *,
 			 const struct nvkm_oclass *, void *data, u32 size,
 			 struct nvkm_object **);
 
+int gv100_disp_caps_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+
 extern const struct nvkm_disp_oclass nv50_disp_root_oclass;
 extern const struct nvkm_disp_oclass g84_disp_root_oclass;
 extern const struct nvkm_disp_oclass g94_disp_root_oclass;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c
index 579a5d02308a..d8719d38b98a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_root_func
 tu102_disp_root = {
 	.user = {
+		{{-1,-1,GV100_DISP_CAPS                }, gv100_disp_caps_new },
 		{{0,0,TU102_DISP_CURSOR                }, gv100_disp_curs_new },
 		{{0,0,TU102_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new },
 		{{0,0,TU102_DISP_CORE_CHANNEL_DMA      }, gv100_disp_core_new },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
index 456a5a143522..3b3643fb1019 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
@@ -177,6 +177,7 @@ gf119_sor = {
 	.hda = {
 		.hpd = gf119_hda_hpd,
 		.eld = gf119_hda_eld,
+		.device_entry = gf119_hda_device_entry,
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c
index b94090edaebf..0c0925680790 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c
@@ -43,6 +43,7 @@ gk104_sor = {
 	.hda = {
 		.hpd = gf119_hda_hpd,
 		.eld = gf119_hda_eld,
+		.device_entry = gf119_hda_device_entry,
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
index e6965dec09c9..38045c92197f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
@@ -57,6 +57,7 @@ gm107_sor = {
 	.hda = {
 		.hpd = gf119_hda_hpd,
 		.eld = gf119_hda_eld,
+		.device_entry = gf119_hda_device_entry,
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
index 384f82652bec..cf2075db742a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
@@ -115,6 +115,7 @@ gm200_sor = {
 	.hda = {
 		.hpd = gf119_hda_hpd,
 		.eld = gf119_hda_eld,
+		.device_entry = gf119_hda_device_entry,
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c
index b0597ff9a714..d11a0dff10c6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c
@@ -103,6 +103,7 @@ gv100_sor = {
 	.hda = {
 		.hpd = gf119_hda_hpd,
 		.eld = gf119_hda_eld,
+		.device_entry = gv100_hda_device_entry,
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c
index 4d5f3791ea7b..fa6d74251237 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c
@@ -88,6 +88,7 @@ tu102_sor = {
 	.hda = {
 		.hpd = gf119_hda_hpd,
 		.eld = gf119_hda_eld,
+		.device_entry = gv100_hda_device_entry,
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index 4209b24a46d7..ec330d791d15 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -319,6 +319,17 @@ gk20a_gr_load_sw(struct gf100_gr *gr, const char *path, int ver)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_124_SOC) || IS_ENABLED(CONFIG_ARCH_TEGRA_132_SOC)
+MODULE_FIRMWARE("nvidia/gk20a/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/gk20a/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/gk20a/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/gk20a/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/gk20a/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/gk20a/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/gk20a/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/gk20a/sw_nonctx.bin");
+#endif
+
 static int
 gk20a_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
index 8eb2a930a9b5..e4866a02e457 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
@@ -250,6 +250,11 @@ nvkm_acr_oneinit(struct nvkm_subdev *subdev)
 		list_add_tail(&lsf->head, &acr->lsf);
 	}
 
+	/* Ensure the falcon that'll provide ACR functions is booted first. */
+	lsf = nvkm_acr_falcon(device);
+	if (lsf)
+		list_move(&lsf->head, &acr->lsf);
+
 	if (!acr->wpr_fw || acr->wpr_comp)
 		wpr_size = acr->func->wpr_layout(acr);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c
index aecce2dac558..667fa016496e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c
@@ -100,25 +100,21 @@ nvkm_acr_hsfw_load_image(struct nvkm_acr *acr, const char *name, int ver,
 	hsfw->data_size = lhdr->data_size;
 
 	hsfw->sig.prod.size = fwhdr->sig_prod_size;
-	hsfw->sig.prod.data = kmalloc(hsfw->sig.prod.size, GFP_KERNEL);
+	hsfw->sig.prod.data = kmemdup(fw->data + fwhdr->sig_prod_offset + sig,
+				      hsfw->sig.prod.size, GFP_KERNEL);
 	if (!hsfw->sig.prod.data) {
 		ret = -ENOMEM;
 		goto done;
 	}
 
-	memcpy(hsfw->sig.prod.data, fw->data + fwhdr->sig_prod_offset + sig,
-	       hsfw->sig.prod.size);
-
 	hsfw->sig.dbg.size = fwhdr->sig_dbg_size;
-	hsfw->sig.dbg.data = kmalloc(hsfw->sig.dbg.size, GFP_KERNEL);
+	hsfw->sig.dbg.data = kmemdup(fw->data + fwhdr->sig_dbg_offset + sig,
+				     hsfw->sig.dbg.size, GFP_KERNEL);
 	if (!hsfw->sig.dbg.data) {
 		ret = -ENOMEM;
 		goto done;
 	}
 
-	memcpy(hsfw->sig.dbg.data, fw->data + fwhdr->sig_dbg_offset + sig,
-	       hsfw->sig.dbg.size);
-
 	hsfw->sig.patch_loc = loc;
 done:
 	nvkm_firmware_put(fw);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c
index 06572f8ce914..f9c427559538 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c
@@ -22,22 +22,39 @@
  */
 #include "priv.h"
 
-#if defined(CONFIG_ACPI) && defined(CONFIG_X86)
-int nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len);
-bool nouveau_acpi_rom_supported(struct device *);
-#else
-static inline bool
-nouveau_acpi_rom_supported(struct device *dev)
+static int
+acpi_read_bios(acpi_handle rom_handle, u8 *bios, u32 offset, u32 length)
 {
-	return false;
-}
+#if defined(CONFIG_ACPI) && defined(CONFIG_X86)
+	acpi_status status;
+	union acpi_object rom_arg_elements[2], *obj;
+	struct acpi_object_list rom_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
 
-static inline int
-nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len)
-{
+	rom_arg.count = 2;
+	rom_arg.pointer = &rom_arg_elements[0];
+
+	rom_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	rom_arg_elements[0].integer.value = offset;
+
+	rom_arg_elements[1].type = ACPI_TYPE_INTEGER;
+	rom_arg_elements[1].integer.value = length;
+
+	status = acpi_evaluate_object(rom_handle, NULL, &rom_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		pr_info("failed to evaluate ROM got %s\n",
+			acpi_format_exception(status));
+		return -ENODEV;
+	}
+	obj = (union acpi_object *)buffer.pointer;
+	length = min(length, obj->buffer.length);
+	memcpy(bios+offset, obj->buffer.pointer, length);
+	kfree(buffer.pointer);
+	return length;
+#else
 	return -EINVAL;
-}
 #endif
+}
 
 /* This version of the shadow function disobeys the ACPI spec and tries
  * to fetch in units of more than 4KiB at a time.  This is a LOT faster
@@ -51,7 +68,7 @@ acpi_read_fast(void *data, u32 offset, u32 length, struct nvkm_bios *bios)
 	u32 fetch = limit - start;
 
 	if (nvbios_extend(bios, limit) >= 0) {
-		int ret = nouveau_acpi_get_bios_chunk(bios->data, start, fetch);
+		int ret = acpi_read_bios(data, bios->data, start, fetch);
 		if (ret == fetch)
 			return fetch;
 	}
@@ -73,9 +90,8 @@ acpi_read_slow(void *data, u32 offset, u32 length, struct nvkm_bios *bios)
 
 	if (nvbios_extend(bios, limit) >= 0) {
 		while (start + fetch < limit) {
-			int ret = nouveau_acpi_get_bios_chunk(bios->data,
-							      start + fetch,
-							      0x1000);
+			int ret = acpi_read_bios(data, bios->data,
+						 start + fetch, 0x1000);
 			if (ret != 0x1000)
 				break;
 			fetch += 0x1000;
@@ -88,9 +104,22 @@ acpi_read_slow(void *data, u32 offset, u32 length, struct nvkm_bios *bios)
 static void *
 acpi_init(struct nvkm_bios *bios, const char *name)
 {
-	if (!nouveau_acpi_rom_supported(bios->subdev.device->dev))
+#if defined(CONFIG_ACPI) && defined(CONFIG_X86)
+	acpi_status status;
+	acpi_handle dhandle, rom_handle;
+
+	dhandle = ACPI_HANDLE(bios->subdev.device->dev);
+	if (!dhandle)
 		return ERR_PTR(-ENODEV);
-	return NULL;
+
+	status = acpi_get_handle(dhandle, "_ROM", &rom_handle);
+	if (ACPI_FAILURE(status))
+		return ERR_PTR(-ENODEV);
+
+	return rom_handle;
+#else
+	return ERR_PTR(-ENODEV);
+#endif
 }
 
 const struct nvbios_source
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c
index d80dbc8f09b2..2340040942c9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c
@@ -114,9 +114,5 @@ int
 gf100_ibus_new(struct nvkm_device *device, int index,
 	       struct nvkm_subdev **pibus)
 {
-	struct nvkm_subdev *ibus;
-	if (!(ibus = *pibus = kzalloc(sizeof(*ibus), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_subdev_ctor(&gf100_ibus, device, index, ibus);
-	return 0;
+	return nvkm_subdev_new_(&gf100_ibus, device, index, pibus);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf117.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf117.c
index 3905a80da811..1124dadac145 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf117.c
@@ -43,9 +43,5 @@ int
 gf117_ibus_new(struct nvkm_device *device, int index,
 	       struct nvkm_subdev **pibus)
 {
-	struct nvkm_subdev *ibus;
-	if (!(ibus = *pibus = kzalloc(sizeof(*ibus), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_subdev_ctor(&gf117_ibus, device, index, ibus);
-	return 0;
+	return nvkm_subdev_new_(&gf117_ibus, device, index, pibus);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c
index 9025ed1bd2a9..f3915f85838e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c
@@ -117,9 +117,5 @@ int
 gk104_ibus_new(struct nvkm_device *device, int index,
 	       struct nvkm_subdev **pibus)
 {
-	struct nvkm_subdev *ibus;
-	if (!(ibus = *pibus = kzalloc(sizeof(*ibus), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_subdev_ctor(&gk104_ibus, device, index, ibus);
-	return 0;
+	return nvkm_subdev_new_(&gk104_ibus, device, index, pibus);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c
index 1a4ab825852c..187d544378b0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c
@@ -81,9 +81,5 @@ int
 gk20a_ibus_new(struct nvkm_device *device, int index,
 	       struct nvkm_subdev **pibus)
 {
-	struct nvkm_subdev *ibus;
-	if (!(ibus = *pibus = kzalloc(sizeof(*ibus), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_subdev_ctor(&gk20a_ibus, device, index, ibus);
-	return 0;
+	return nvkm_subdev_new_(&gk20a_ibus, device, index, pibus);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gm200.c
index c63328152bfa..0f1f0ad6377e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gm200.c
@@ -32,9 +32,5 @@ int
 gm200_ibus_new(struct nvkm_device *device, int index,
 	       struct nvkm_subdev **pibus)
 {
-	struct nvkm_subdev *ibus;
-	if (!(ibus = *pibus = kzalloc(sizeof(*ibus), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_subdev_ctor(&gm200_ibus, device, index, ibus);
-	return 0;
+	return nvkm_subdev_new_(&gm200_ibus, device, index, pibus);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gp10b.c
index 39db90aa2c80..0347b367cefe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gp10b.c
@@ -51,9 +51,5 @@ int
 gp10b_ibus_new(struct nvkm_device *device, int index,
 	       struct nvkm_subdev **pibus)
 {
-	struct nvkm_subdev *ibus;
-	if (!(ibus = *pibus = kzalloc(sizeof(*ibus), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_subdev_ctor(&gp10b_ibus, device, index, ibus);
-	return 0;
+	return nvkm_subdev_new_(&gp10b_ibus, device, index, pibus);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index 41640e0584ac..199f94e15c5f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -580,7 +580,7 @@ nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 				it.pte[it.lvl]++;
 			}
 		}
-	};
+	}
 
 	nvkm_vmm_flush(&it);
 	return ~0ULL;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index 5e55ecbd8005..d3f8f916d0db 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -304,7 +304,7 @@ int tu102_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		FILL(VMM, PT, PTEI, _ptes, MAP, _addr);                        \
 		PTEI += _ptes;                                                 \
 		PTEN -= _ptes;                                                 \
-	};                                                                     \
+	}                                                                      \
 	nvkm_done((PT)->memory);                                               \
 } while(0)
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/ic.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/ic.c
index 03b355dabab3..abf3eda683f0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/ic.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/ic.c
@@ -36,8 +36,8 @@ probe_monitoring_device(struct nvkm_i2c_bus *bus,
 
 	request_module("%s%s", I2C_MODULE_PREFIX, info->type);
 
-	client = i2c_new_device(&bus->i2c, info);
-	if (!client)
+	client = i2c_new_client_device(&bus->i2c, info);
+	if (IS_ERR(client))
 		return false;
 
 	if (!client->dev.driver ||
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c
index dbb90f2d2ccd..6639ee9b05d3 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc.c
+++ b/drivers/gpu/drm/omapdrm/dss/dispc.c
@@ -3137,33 +3137,12 @@ static void _dispc_mgr_set_lcd_timings(struct dispc_device *dispc,
 	dispc_write_reg(dispc, DISPC_TIMING_H(channel), timing_h);
 	dispc_write_reg(dispc, DISPC_TIMING_V(channel), timing_v);
 
-	if (vm->flags & DISPLAY_FLAGS_VSYNC_HIGH)
-		vs = false;
-	else
-		vs = true;
-
-	if (vm->flags & DISPLAY_FLAGS_HSYNC_HIGH)
-		hs = false;
-	else
-		hs = true;
-
-	if (vm->flags & DISPLAY_FLAGS_DE_HIGH)
-		de = false;
-	else
-		de = true;
-
-	if (vm->flags & DISPLAY_FLAGS_PIXDATA_POSEDGE)
-		ipc = false;
-	else
-		ipc = true;
-
-	/* always use the 'rf' setting */
-	onoff = true;
-
-	if (vm->flags & DISPLAY_FLAGS_SYNC_POSEDGE)
-		rf = true;
-	else
-		rf = false;
+	vs = !!(vm->flags & DISPLAY_FLAGS_VSYNC_LOW);
+	hs = !!(vm->flags & DISPLAY_FLAGS_HSYNC_LOW);
+	de = !!(vm->flags & DISPLAY_FLAGS_DE_LOW);
+	ipc = !!(vm->flags & DISPLAY_FLAGS_PIXDATA_NEGEDGE);
+	onoff = true; /* always use the 'rf' setting */
+	rf = !!(vm->flags & DISPLAY_FLAGS_SYNC_POSEDGE);
 
 	l = FLD_VAL(onoff, 17, 17) |
 		FLD_VAL(rf, 16, 16) |
diff --git a/drivers/gpu/drm/omapdrm/dss/venc.c b/drivers/gpu/drm/omapdrm/dss/venc.c
index 766553bb2f87..9701843ccf09 100644
--- a/drivers/gpu/drm/omapdrm/dss/venc.c
+++ b/drivers/gpu/drm/omapdrm/dss/venc.c
@@ -208,49 +208,6 @@ static const struct venc_config venc_config_ntsc_trm = {
 	.gen_ctrl				= 0x00F90000,
 };
 
-static const struct venc_config venc_config_pal_bdghi = {
-	.f_control				= 0,
-	.vidout_ctrl				= 0,
-	.sync_ctrl				= 0,
-	.hfltr_ctrl				= 0,
-	.x_color				= 0,
-	.line21					= 0,
-	.ln_sel					= 21,
-	.htrigger_vtrigger			= 0,
-	.tvdetgp_int_start_stop_x		= 0x00140001,
-	.tvdetgp_int_start_stop_y		= 0x00010001,
-	.gen_ctrl				= 0x00FB0000,
-
-	.llen					= 864-1,
-	.flens					= 625-1,
-	.cc_carr_wss_carr			= 0x2F7625ED,
-	.c_phase				= 0xDF,
-	.gain_u					= 0x111,
-	.gain_v					= 0x181,
-	.gain_y					= 0x140,
-	.black_level				= 0x3e,
-	.blank_level				= 0x3e,
-	.m_control				= 0<<2 | 1<<1,
-	.bstamp_wss_data			= 0x42,
-	.s_carr					= 0x2a098acb,
-	.l21__wc_ctl				= 0<<13 | 0x16<<8 | 0<<0,
-	.savid__eavid				= 0x06A70108,
-	.flen__fal				= 23<<16 | 624<<0,
-	.lal__phase_reset			= 2<<17 | 310<<0,
-	.hs_int_start_stop_x			= 0x00920358,
-	.hs_ext_start_stop_x			= 0x000F035F,
-	.vs_int_start_x				= 0x1a7<<16,
-	.vs_int_stop_x__vs_int_start_y		= 0x000601A7,
-	.vs_int_stop_y__vs_ext_start_x		= 0x01AF0036,
-	.vs_ext_stop_x__vs_ext_start_y		= 0x27101af,
-	.vs_ext_stop_y				= 0x05,
-	.avid_start_stop_x			= 0x03530082,
-	.avid_start_stop_y			= 0x0270002E,
-	.fid_int_start_x__fid_int_start_y	= 0x0005008A,
-	.fid_int_offset_y__fid_ext_start_x	= 0x002E0138,
-	.fid_ext_start_y__fid_ext_offset_y	= 0x01380005,
-};
-
 enum venc_videomode {
 	VENC_MODE_UNKNOWN,
 	VENC_MODE_PAL,
diff --git a/drivers/gpu/drm/omapdrm/omap_debugfs.c b/drivers/gpu/drm/omapdrm/omap_debugfs.c
index 34dfb33145b4..b57fbe8a0ac2 100644
--- a/drivers/gpu/drm/omapdrm/omap_debugfs.c
+++ b/drivers/gpu/drm/omapdrm/omap_debugfs.c
@@ -80,31 +80,16 @@ static struct drm_info_list omap_dmm_debugfs_list[] = {
 	{"tiler_map", tiler_map_show, 0},
 };
 
-int omap_debugfs_init(struct drm_minor *minor)
+void omap_debugfs_init(struct drm_minor *minor)
 {
-	struct drm_device *dev = minor->dev;
-	int ret;
-
-	ret = drm_debugfs_create_files(omap_debugfs_list,
-			ARRAY_SIZE(omap_debugfs_list),
-			minor->debugfs_root, minor);
-
-	if (ret) {
-		dev_err(dev->dev, "could not install omap_debugfs_list\n");
-		return ret;
-	}
+	drm_debugfs_create_files(omap_debugfs_list,
+				 ARRAY_SIZE(omap_debugfs_list),
+				 minor->debugfs_root, minor);
 
 	if (dmm_is_available())
-		ret = drm_debugfs_create_files(omap_dmm_debugfs_list,
-				ARRAY_SIZE(omap_dmm_debugfs_list),
-				minor->debugfs_root, minor);
-
-	if (ret) {
-		dev_err(dev->dev, "could not install omap_dmm_debugfs_list\n");
-		return ret;
-	}
-
-	return ret;
+		drm_debugfs_create_files(omap_dmm_debugfs_list,
+					 ARRAY_SIZE(omap_dmm_debugfs_list),
+					 minor->debugfs_root, minor);
 }
 
 #endif
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index 7c4b66efcaa7..8a1fac680138 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -82,6 +82,6 @@ struct omap_drm_private {
 };
 
 
-int omap_debugfs_init(struct drm_minor *minor);
+void omap_debugfs_init(struct drm_minor *minor);
 
 #endif /* __OMAPDRM_DRV_H__ */
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index a1723c1b5fbf..39055c1f0e2f 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -18,6 +18,16 @@ config DRM_PANEL_ARM_VERSATILE
 	  reference designs. The panel is detected using special registers
 	  in the Versatile family syscon registers.
 
+config DRM_PANEL_ASUS_Z00T_TM5P5_NT35596
+	tristate "ASUS Z00T TM5P5 NT35596 panel"
+	depends on GPIOLIB && OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for the ASUS TMP5P5
+	  NT35596 1080x1920 video mode panel as found in some Asus
+	  Zenfone 2 Laser Z00T devices.
+
 config DRM_PANEL_BOE_HIMAX8279D
 	tristate "Boe Himax8279d panel"
 	depends on OF
@@ -137,6 +147,17 @@ config DRM_PANEL_KINGDISPLAY_KD097D04
 	  24 bit RGB per pixel. It provides a MIPI DSI interface to
 	  the host and has a built-in LED backlight.
 
+config DRM_PANEL_LEADTEK_LTK050H3146W
+	tristate "Leadtek LTK050H3146W panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for Leadtek LTK050H3146W
+	  TFT-LCD modules. The panel has a 720x1280 resolution and uses
+	  24 bit RGB per pixel. It provides a MIPI DSI interface to
+	  the host and has a built-in LED backlight.
+
 config DRM_PANEL_LEADTEK_LTK500HD1829
 	tristate "Leadtek LTK500HD1829 panel"
 	depends on OF
@@ -433,6 +454,14 @@ config DRM_PANEL_TRULY_NT35597_WQXGA
 	  Say Y here if you want to enable support for Truly NT35597 WQXGA Dual DSI
 	  Video Mode panel
 
+config DRM_PANEL_VISIONOX_RM69299
+	tristate "Visionox RM69299"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	help
+	  Say Y here if you want to enable support for Visionox
+	  RM69299  DSI Video Mode panel.
+
 config DRM_PANEL_XINPENG_XPP055C272
 	tristate "Xinpeng XPP055C272 panel driver"
 	depends on OF
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index 96a883cd6630..de74f282c433 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DRM_PANEL_ARM_VERSATILE) += panel-arm-versatile.o
+obj-$(CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596) += panel-asus-z00t-tm5p5-n35596.o
 obj-$(CONFIG_DRM_PANEL_BOE_HIMAX8279D) += panel-boe-himax8279d.o
 obj-$(CONFIG_DRM_PANEL_BOE_TV101WUM_NL6) += panel-boe-tv101wum-nl6.o
 obj-$(CONFIG_DRM_PANEL_LVDS) += panel-lvds.o
@@ -12,6 +13,7 @@ obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9881C) += panel-ilitek-ili9881c.o
 obj-$(CONFIG_DRM_PANEL_INNOLUX_P079ZCA) += panel-innolux-p079zca.o
 obj-$(CONFIG_DRM_PANEL_JDI_LT070ME05000) += panel-jdi-lt070me05000.o
 obj-$(CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04) += panel-kingdisplay-kd097d04.o
+obj-$(CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W) += panel-leadtek-ltk050h3146w.o
 obj-$(CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829) += panel-leadtek-ltk500hd1829.o
 obj-$(CONFIG_DRM_PANEL_LG_LB035Q02) += panel-lg-lb035q02.o
 obj-$(CONFIG_DRM_PANEL_LG_LG4573) += panel-lg-lg4573.o
@@ -46,4 +48,5 @@ obj-$(CONFIG_DRM_PANEL_TPO_TD028TTEC1) += panel-tpo-td028ttec1.o
 obj-$(CONFIG_DRM_PANEL_TPO_TD043MTEA1) += panel-tpo-td043mtea1.o
 obj-$(CONFIG_DRM_PANEL_TPO_TPG110) += panel-tpo-tpg110.o
 obj-$(CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA) += panel-truly-nt35597.o
+obj-$(CONFIG_DRM_PANEL_VISIONOX_RM69299) += panel-visionox-rm69299.o
 obj-$(CONFIG_DRM_PANEL_XINPENG_XPP055C272) += panel-xinpeng-xpp055c272.o
diff --git a/drivers/gpu/drm/panel/panel-asus-z00t-tm5p5-n35596.c b/drivers/gpu/drm/panel/panel-asus-z00t-tm5p5-n35596.c
new file mode 100644
index 000000000000..39e0f0373f3c
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-asus-z00t-tm5p5-n35596.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/backlight.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regulator/consumer.h>
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
+struct tm5p5_nt35596 {
+	struct drm_panel panel;
+	struct mipi_dsi_device *dsi;
+	struct regulator_bulk_data supplies[2];
+	struct gpio_desc *reset_gpio;
+	bool prepared;
+};
+
+static inline struct tm5p5_nt35596 *to_tm5p5_nt35596(struct drm_panel *panel)
+{
+	return container_of(panel, struct tm5p5_nt35596, panel);
+}
+
+#define dsi_generic_write_seq(dsi, seq...) do {				\
+		static const u8 d[] = { seq };				\
+		int ret;						\
+		ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d));	\
+		if (ret < 0)						\
+			return ret;					\
+	} while (0)
+
+#define dsi_dcs_write_seq(dsi, seq...) do {				\
+		static const u8 d[] = { seq };				\
+		int ret;						\
+		ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d));	\
+		if (ret < 0)						\
+			return ret;					\
+	} while (0)
+
+static void tm5p5_nt35596_reset(struct tm5p5_nt35596 *ctx)
+{
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+	usleep_range(1000, 2000);
+	gpiod_set_value_cansleep(ctx->reset_gpio, 0);
+	usleep_range(1000, 2000);
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+	usleep_range(15000, 16000);
+}
+
+static int tm5p5_nt35596_on(struct tm5p5_nt35596 *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+
+	dsi_generic_write_seq(dsi, 0xff, 0x05);
+	dsi_generic_write_seq(dsi, 0xfb, 0x01);
+	dsi_generic_write_seq(dsi, 0xc5, 0x31);
+	dsi_generic_write_seq(dsi, 0xff, 0x04);
+	dsi_generic_write_seq(dsi, 0x01, 0x84);
+	dsi_generic_write_seq(dsi, 0x05, 0x25);
+	dsi_generic_write_seq(dsi, 0x06, 0x01);
+	dsi_generic_write_seq(dsi, 0x07, 0x20);
+	dsi_generic_write_seq(dsi, 0x08, 0x06);
+	dsi_generic_write_seq(dsi, 0x09, 0x08);
+	dsi_generic_write_seq(dsi, 0x0a, 0x10);
+	dsi_generic_write_seq(dsi, 0x0b, 0x10);
+	dsi_generic_write_seq(dsi, 0x0c, 0x10);
+	dsi_generic_write_seq(dsi, 0x0d, 0x14);
+	dsi_generic_write_seq(dsi, 0x0e, 0x14);
+	dsi_generic_write_seq(dsi, 0x0f, 0x14);
+	dsi_generic_write_seq(dsi, 0x10, 0x14);
+	dsi_generic_write_seq(dsi, 0x11, 0x14);
+	dsi_generic_write_seq(dsi, 0x12, 0x14);
+	dsi_generic_write_seq(dsi, 0x17, 0xf3);
+	dsi_generic_write_seq(dsi, 0x18, 0xc0);
+	dsi_generic_write_seq(dsi, 0x19, 0xc0);
+	dsi_generic_write_seq(dsi, 0x1a, 0xc0);
+	dsi_generic_write_seq(dsi, 0x1b, 0xb3);
+	dsi_generic_write_seq(dsi, 0x1c, 0xb3);
+	dsi_generic_write_seq(dsi, 0x1d, 0xb3);
+	dsi_generic_write_seq(dsi, 0x1e, 0xb3);
+	dsi_generic_write_seq(dsi, 0x1f, 0xb3);
+	dsi_generic_write_seq(dsi, 0x20, 0xb3);
+	dsi_generic_write_seq(dsi, 0xfb, 0x01);
+	dsi_generic_write_seq(dsi, 0xff, 0x00);
+	dsi_generic_write_seq(dsi, 0xfb, 0x01);
+	dsi_generic_write_seq(dsi, 0x35, 0x01);
+	dsi_generic_write_seq(dsi, 0xd3, 0x06);
+	dsi_generic_write_seq(dsi, 0xd4, 0x04);
+	dsi_generic_write_seq(dsi, 0x5e, 0x0d);
+	dsi_generic_write_seq(dsi, 0x11, 0x00);
+	msleep(100);
+	dsi_generic_write_seq(dsi, 0x29, 0x00);
+	dsi_generic_write_seq(dsi, 0x53, 0x24);
+
+	return 0;
+}
+
+static int tm5p5_nt35596_off(struct tm5p5_nt35596 *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	ret = mipi_dsi_dcs_set_display_off(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display off: %d\n", ret);
+		return ret;
+	}
+	msleep(60);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enter sleep mode: %d\n", ret);
+		return ret;
+	}
+
+	dsi_dcs_write_seq(dsi, 0x4f, 0x01);
+
+	return 0;
+}
+
+static int tm5p5_nt35596_prepare(struct drm_panel *panel)
+{
+	struct tm5p5_nt35596 *ctx = to_tm5p5_nt35596(panel);
+	struct device *dev = &ctx->dsi->dev;
+	int ret;
+
+	if (ctx->prepared)
+		return 0;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enable regulators: %d\n", ret);
+		return ret;
+	}
+
+	tm5p5_nt35596_reset(ctx);
+
+	ret = tm5p5_nt35596_on(ctx);
+	if (ret < 0) {
+		dev_err(dev, "Failed to initialize panel: %d\n", ret);
+		gpiod_set_value_cansleep(ctx->reset_gpio, 0);
+		regulator_bulk_disable(ARRAY_SIZE(ctx->supplies),
+				       ctx->supplies);
+		return ret;
+	}
+
+	ctx->prepared = true;
+	return 0;
+}
+
+static int tm5p5_nt35596_unprepare(struct drm_panel *panel)
+{
+	struct tm5p5_nt35596 *ctx = to_tm5p5_nt35596(panel);
+	struct device *dev = &ctx->dsi->dev;
+	int ret;
+
+	if (!ctx->prepared)
+		return 0;
+
+	ret = tm5p5_nt35596_off(ctx);
+	if (ret < 0)
+		dev_err(dev, "Failed to un-initialize panel: %d\n", ret);
+
+	gpiod_set_value_cansleep(ctx->reset_gpio, 0);
+	regulator_bulk_disable(ARRAY_SIZE(ctx->supplies),
+			       ctx->supplies);
+
+	ctx->prepared = false;
+	return 0;
+}
+
+static const struct drm_display_mode tm5p5_nt35596_mode = {
+	.clock = (1080 + 100 + 8 + 16) * (1920 + 4 + 2 + 4) * 60 / 1000,
+	.hdisplay = 1080,
+	.hsync_start = 1080 + 100,
+	.hsync_end = 1080 + 100 + 8,
+	.htotal = 1080 + 100 + 8 + 16,
+	.vdisplay = 1920,
+	.vsync_start = 1920 + 4,
+	.vsync_end = 1920 + 4 + 2,
+	.vtotal = 1920 + 4 + 2 + 4,
+	.vrefresh = 60,
+	.width_mm = 68,
+	.height_mm = 121,
+};
+
+static int tm5p5_nt35596_get_modes(struct drm_panel *panel,
+				   struct drm_connector *connector)
+{
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(connector->dev, &tm5p5_nt35596_mode);
+	if (!mode)
+		return -ENOMEM;
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	connector->display_info.width_mm = mode->width_mm;
+	connector->display_info.height_mm = mode->height_mm;
+	drm_mode_probed_add(connector, mode);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs tm5p5_nt35596_panel_funcs = {
+	.prepare = tm5p5_nt35596_prepare,
+	.unprepare = tm5p5_nt35596_unprepare,
+	.get_modes = tm5p5_nt35596_get_modes,
+};
+
+static int tm5p5_nt35596_bl_update_status(struct backlight_device *bl)
+{
+	struct mipi_dsi_device *dsi = bl_get_data(bl);
+	u16 brightness = bl->props.brightness;
+	int ret;
+
+	if (bl->props.power != FB_BLANK_UNBLANK ||
+	    bl->props.fb_blank != FB_BLANK_UNBLANK ||
+	    bl->props.state & (BL_CORE_SUSPENDED | BL_CORE_FBBLANK))
+		brightness = 0;
+
+	dsi->mode_flags &= ~MIPI_DSI_MODE_LPM;
+
+	ret = mipi_dsi_dcs_set_display_brightness(dsi, brightness);
+	if (ret < 0)
+		return ret;
+
+	dsi->mode_flags |= MIPI_DSI_MODE_LPM;
+
+	return 0;
+}
+
+static int tm5p5_nt35596_bl_get_brightness(struct backlight_device *bl)
+{
+	struct mipi_dsi_device *dsi = bl_get_data(bl);
+	u16 brightness = bl->props.brightness;
+	int ret;
+
+	dsi->mode_flags &= ~MIPI_DSI_MODE_LPM;
+
+	ret = mipi_dsi_dcs_get_display_brightness(dsi, &brightness);
+	if (ret < 0)
+		return ret;
+
+	dsi->mode_flags |= MIPI_DSI_MODE_LPM;
+
+	return brightness & 0xff;
+}
+
+static const struct backlight_ops tm5p5_nt35596_bl_ops = {
+	.update_status = tm5p5_nt35596_bl_update_status,
+	.get_brightness = tm5p5_nt35596_bl_get_brightness,
+};
+
+static struct backlight_device *
+tm5p5_nt35596_create_backlight(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	const struct backlight_properties props = {
+		.type = BACKLIGHT_RAW,
+		.brightness = 255,
+		.max_brightness = 255,
+	};
+
+	return devm_backlight_device_register(dev, dev_name(dev), dev, dsi,
+					      &tm5p5_nt35596_bl_ops, &props);
+}
+
+static int tm5p5_nt35596_probe(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	struct tm5p5_nt35596 *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->supplies[0].supply = "vdd";
+	ctx->supplies[1].supply = "vddio";
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ctx->supplies),
+				      ctx->supplies);
+	if (ret < 0) {
+		dev_err(dev, "Failed to get regulators: %d\n", ret);
+		return ret;
+	}
+
+	ctx->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ctx->reset_gpio)) {
+		ret = PTR_ERR(ctx->reset_gpio);
+		dev_err(dev, "Failed to get reset-gpios: %d\n", ret);
+		return ret;
+	}
+
+	ctx->dsi = dsi;
+	mipi_dsi_set_drvdata(dsi, ctx);
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+			  MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_EOT_PACKET |
+			  MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM;
+
+	drm_panel_init(&ctx->panel, dev, &tm5p5_nt35596_panel_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+
+	ctx->panel.backlight = tm5p5_nt35596_create_backlight(dsi);
+	if (IS_ERR(ctx->panel.backlight)) {
+		ret = PTR_ERR(ctx->panel.backlight);
+		dev_err(dev, "Failed to create backlight: %d\n", ret);
+		return ret;
+	}
+
+	ret = drm_panel_add(&ctx->panel);
+	if (ret < 0) {
+		dev_err(dev, "Failed to add panel: %d\n", ret);
+		return ret;
+	}
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to attach to DSI host: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int tm5p5_nt35596_remove(struct mipi_dsi_device *dsi)
+{
+	struct tm5p5_nt35596 *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(&dsi->dev,
+			"Failed to detach from DSI host: %d\n", ret);
+
+	drm_panel_remove(&ctx->panel);
+
+	return 0;
+}
+
+static const struct of_device_id tm5p5_nt35596_of_match[] = {
+	{ .compatible = "asus,z00t-tm5p5-n35596" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, tm5p5_nt35596_of_match);
+
+static struct mipi_dsi_driver tm5p5_nt35596_driver = {
+	.probe = tm5p5_nt35596_probe,
+	.remove = tm5p5_nt35596_remove,
+	.driver = {
+		.name = "panel-tm5p5-nt35596",
+		.of_match_table = tm5p5_nt35596_of_match,
+	},
+};
+module_mipi_dsi_driver(tm5p5_nt35596_driver);
+
+MODULE_AUTHOR("Konrad Dybcio <konradybcio@gmail.com>");
+MODULE_DESCRIPTION("DRM driver for tm5p5 nt35596 1080p video mode dsi panel");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
index 48a164257d18..46fe1805c588 100644
--- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
+++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
@@ -696,6 +696,34 @@ static const struct panel_desc auo_b101uan08_3_desc = {
 	.init_cmds = auo_b101uan08_3_init_cmd,
 };
 
+static const struct drm_display_mode boe_tv105wum_nw0_default_mode = {
+	.clock = 159916,
+	.hdisplay = 1200,
+	.hsync_start = 1200 + 80,
+	.hsync_end = 1200 + 80 + 24,
+	.htotal = 1200 + 80 + 24 + 60,
+	.vdisplay = 1920,
+	.vsync_start = 1920 + 20,
+	.vsync_end = 1920 + 20 + 4,
+	.vtotal = 1920 + 20 + 4 + 10,
+	.vrefresh = 60,
+	.type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
+};
+
+static const struct panel_desc boe_tv105wum_nw0_desc = {
+	.modes = &boe_tv105wum_nw0_default_mode,
+	.bpc = 8,
+	.size = {
+		.width_mm = 141,
+		.height_mm = 226,
+	},
+	.lanes = 4,
+	.format = MIPI_DSI_FMT_RGB888,
+	.mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+		      MIPI_DSI_MODE_LPM,
+	.init_cmds = boe_init_cmd,
+};
+
 static int boe_panel_get_modes(struct drm_panel *panel,
 			       struct drm_connector *connector)
 {
@@ -834,6 +862,9 @@ static const struct of_device_id boe_of_match[] = {
 	{ .compatible = "auo,b101uan08.3",
 	  .data = &auo_b101uan08_3_desc
 	},
+	{ .compatible = "boe,tv105wum-nw0",
+	  .data = &boe_tv105wum_nw0_desc
+	},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, boe_of_match);
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
index 09935520e606..873b1c7059bd 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
@@ -379,7 +379,7 @@ static int ili9322_init(struct drm_panel *panel, struct ili9322 *ili)
 				"can't set up VCOM amplitude (%d)\n", ret);
 			return ret;
 		}
-	};
+	}
 
 	if (ili->vcom_high != U8_MAX) {
 		ret = regmap_write(ili->regmap, ILI9322_VCOM_HIGH,
@@ -388,7 +388,7 @@ static int ili9322_init(struct drm_panel *panel, struct ili9322 *ili)
 			dev_err(ili->dev, "can't set up VCOM high (%d)\n", ret);
 			return ret;
 		}
-	};
+	}
 
 	/* Set up gamma correction */
 	for (i = 0; i < ARRAY_SIZE(ili->gamma); i++) {
diff --git a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c
new file mode 100644
index 000000000000..5a7a31c8513e
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c
@@ -0,0 +1,691 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Theobroma Systems Design und Consulting GmbH
+ */
+
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/media-bus-format.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/regulator/consumer.h>
+
+#include <video/display_timing.h>
+#include <video/mipi_display.h>
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
+struct ltk050h3146w_cmd {
+	char cmd;
+	char data;
+};
+
+struct ltk050h3146w;
+struct ltk050h3146w_desc {
+	const struct drm_display_mode *mode;
+	int (*init)(struct ltk050h3146w *ctx);
+};
+
+struct ltk050h3146w {
+	struct device *dev;
+	struct drm_panel panel;
+	struct gpio_desc *reset_gpio;
+	struct regulator *vci;
+	struct regulator *iovcc;
+	const struct ltk050h3146w_desc *panel_desc;
+	bool prepared;
+};
+
+static const struct ltk050h3146w_cmd page1_cmds[] = {
+	{ 0x22, 0x0A }, /* BGR SS GS */
+	{ 0x31, 0x00 }, /* column inversion */
+	{ 0x53, 0xA2 }, /* VCOM1 */
+	{ 0x55, 0xA2 }, /* VCOM2 */
+	{ 0x50, 0x81 }, /* VREG1OUT=5V */
+	{ 0x51, 0x85 }, /* VREG2OUT=-5V */
+	{ 0x62, 0x0D }, /* EQT Time setting */
+/*
+ * The vendor init selected page 1 here _again_
+ * Is this supposed to be page 2?
+ */
+	{ 0xA0, 0x00 },
+	{ 0xA1, 0x1A },
+	{ 0xA2, 0x28 },
+	{ 0xA3, 0x13 },
+	{ 0xA4, 0x16 },
+	{ 0xA5, 0x29 },
+	{ 0xA6, 0x1D },
+	{ 0xA7, 0x1E },
+	{ 0xA8, 0x84 },
+	{ 0xA9, 0x1C },
+	{ 0xAA, 0x28 },
+	{ 0xAB, 0x75 },
+	{ 0xAC, 0x1A },
+	{ 0xAD, 0x19 },
+	{ 0xAE, 0x4D },
+	{ 0xAF, 0x22 },
+	{ 0xB0, 0x28 },
+	{ 0xB1, 0x54 },
+	{ 0xB2, 0x66 },
+	{ 0xB3, 0x39 },
+	{ 0xC0, 0x00 },
+	{ 0xC1, 0x1A },
+	{ 0xC2, 0x28 },
+	{ 0xC3, 0x13 },
+	{ 0xC4, 0x16 },
+	{ 0xC5, 0x29 },
+	{ 0xC6, 0x1D },
+	{ 0xC7, 0x1E },
+	{ 0xC8, 0x84 },
+	{ 0xC9, 0x1C },
+	{ 0xCA, 0x28 },
+	{ 0xCB, 0x75 },
+	{ 0xCC, 0x1A },
+	{ 0xCD, 0x19 },
+	{ 0xCE, 0x4D },
+	{ 0xCF, 0x22 },
+	{ 0xD0, 0x28 },
+	{ 0xD1, 0x54 },
+	{ 0xD2, 0x66 },
+	{ 0xD3, 0x39 },
+};
+
+static const struct ltk050h3146w_cmd page3_cmds[] = {
+	{ 0x01, 0x00 },
+	{ 0x02, 0x00 },
+	{ 0x03, 0x73 },
+	{ 0x04, 0x00 },
+	{ 0x05, 0x00 },
+	{ 0x06, 0x0a },
+	{ 0x07, 0x00 },
+	{ 0x08, 0x00 },
+	{ 0x09, 0x01 },
+	{ 0x0a, 0x00 },
+	{ 0x0b, 0x00 },
+	{ 0x0c, 0x01 },
+	{ 0x0d, 0x00 },
+	{ 0x0e, 0x00 },
+	{ 0x0f, 0x1d },
+	{ 0x10, 0x1d },
+	{ 0x11, 0x00 },
+	{ 0x12, 0x00 },
+	{ 0x13, 0x00 },
+	{ 0x14, 0x00 },
+	{ 0x15, 0x00 },
+	{ 0x16, 0x00 },
+	{ 0x17, 0x00 },
+	{ 0x18, 0x00 },
+	{ 0x19, 0x00 },
+	{ 0x1a, 0x00 },
+	{ 0x1b, 0x00 },
+	{ 0x1c, 0x00 },
+	{ 0x1d, 0x00 },
+	{ 0x1e, 0x40 },
+	{ 0x1f, 0x80 },
+	{ 0x20, 0x06 },
+	{ 0x21, 0x02 },
+	{ 0x22, 0x00 },
+	{ 0x23, 0x00 },
+	{ 0x24, 0x00 },
+	{ 0x25, 0x00 },
+	{ 0x26, 0x00 },
+	{ 0x27, 0x00 },
+	{ 0x28, 0x33 },
+	{ 0x29, 0x03 },
+	{ 0x2a, 0x00 },
+	{ 0x2b, 0x00 },
+	{ 0x2c, 0x00 },
+	{ 0x2d, 0x00 },
+	{ 0x2e, 0x00 },
+	{ 0x2f, 0x00 },
+	{ 0x30, 0x00 },
+	{ 0x31, 0x00 },
+	{ 0x32, 0x00 },
+	{ 0x33, 0x00 },
+	{ 0x34, 0x04 },
+	{ 0x35, 0x00 },
+	{ 0x36, 0x00 },
+	{ 0x37, 0x00 },
+	{ 0x38, 0x3C },
+	{ 0x39, 0x35 },
+	{ 0x3A, 0x01 },
+	{ 0x3B, 0x40 },
+	{ 0x3C, 0x00 },
+	{ 0x3D, 0x01 },
+	{ 0x3E, 0x00 },
+	{ 0x3F, 0x00 },
+	{ 0x40, 0x00 },
+	{ 0x41, 0x88 },
+	{ 0x42, 0x00 },
+	{ 0x43, 0x00 },
+	{ 0x44, 0x1F },
+	{ 0x50, 0x01 },
+	{ 0x51, 0x23 },
+	{ 0x52, 0x45 },
+	{ 0x53, 0x67 },
+	{ 0x54, 0x89 },
+	{ 0x55, 0xab },
+	{ 0x56, 0x01 },
+	{ 0x57, 0x23 },
+	{ 0x58, 0x45 },
+	{ 0x59, 0x67 },
+	{ 0x5a, 0x89 },
+	{ 0x5b, 0xab },
+	{ 0x5c, 0xcd },
+	{ 0x5d, 0xef },
+	{ 0x5e, 0x11 },
+	{ 0x5f, 0x01 },
+	{ 0x60, 0x00 },
+	{ 0x61, 0x15 },
+	{ 0x62, 0x14 },
+	{ 0x63, 0x0E },
+	{ 0x64, 0x0F },
+	{ 0x65, 0x0C },
+	{ 0x66, 0x0D },
+	{ 0x67, 0x06 },
+	{ 0x68, 0x02 },
+	{ 0x69, 0x07 },
+	{ 0x6a, 0x02 },
+	{ 0x6b, 0x02 },
+	{ 0x6c, 0x02 },
+	{ 0x6d, 0x02 },
+	{ 0x6e, 0x02 },
+	{ 0x6f, 0x02 },
+	{ 0x70, 0x02 },
+	{ 0x71, 0x02 },
+	{ 0x72, 0x02 },
+	{ 0x73, 0x02 },
+	{ 0x74, 0x02 },
+	{ 0x75, 0x01 },
+	{ 0x76, 0x00 },
+	{ 0x77, 0x14 },
+	{ 0x78, 0x15 },
+	{ 0x79, 0x0E },
+	{ 0x7a, 0x0F },
+	{ 0x7b, 0x0C },
+	{ 0x7c, 0x0D },
+	{ 0x7d, 0x06 },
+	{ 0x7e, 0x02 },
+	{ 0x7f, 0x07 },
+	{ 0x80, 0x02 },
+	{ 0x81, 0x02 },
+	{ 0x82, 0x02 },
+	{ 0x83, 0x02 },
+	{ 0x84, 0x02 },
+	{ 0x85, 0x02 },
+	{ 0x86, 0x02 },
+	{ 0x87, 0x02 },
+	{ 0x88, 0x02 },
+	{ 0x89, 0x02 },
+	{ 0x8A, 0x02 },
+};
+
+static const struct ltk050h3146w_cmd page4_cmds[] = {
+	{ 0x70, 0x00 },
+	{ 0x71, 0x00 },
+	{ 0x82, 0x0F }, /* VGH_MOD clamp level=15v */
+	{ 0x84, 0x0F }, /* VGH clamp level 15V */
+	{ 0x85, 0x0D }, /* VGL clamp level (-10V) */
+	{ 0x32, 0xAC },
+	{ 0x8C, 0x80 },
+	{ 0x3C, 0xF5 },
+	{ 0xB5, 0x07 }, /* GAMMA OP */
+	{ 0x31, 0x45 }, /* SOURCE OP */
+	{ 0x3A, 0x24 }, /* PS_EN OFF */
+	{ 0x88, 0x33 }, /* LVD */
+};
+
+static inline
+struct ltk050h3146w *panel_to_ltk050h3146w(struct drm_panel *panel)
+{
+	return container_of(panel, struct ltk050h3146w, panel);
+}
+
+#define dsi_dcs_write_seq(dsi, cmd, seq...) do {			\
+		static const u8 d[] = { seq };				\
+		int ret;						\
+		ret = mipi_dsi_dcs_write(dsi, cmd, d, ARRAY_SIZE(d));	\
+		if (ret < 0)						\
+			return ret;					\
+	} while (0)
+
+static int ltk050h3146w_init_sequence(struct ltk050h3146w *ctx)
+{
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	int ret;
+
+	/*
+	 * Init sequence was supplied by the panel vendor without much
+	 * documentation.
+	 */
+	dsi_dcs_write_seq(dsi, 0xdf, 0x93, 0x65, 0xf8);
+	dsi_dcs_write_seq(dsi, 0xb0, 0x01, 0x03, 0x02, 0x00, 0x64, 0x06,
+			  0x01);
+	dsi_dcs_write_seq(dsi, 0xb2, 0x00, 0xb5);
+	dsi_dcs_write_seq(dsi, 0xb3, 0x00, 0xb5);
+	dsi_dcs_write_seq(dsi, 0xb7, 0x00, 0xbf, 0x00, 0x00, 0xbf, 0x00);
+
+	dsi_dcs_write_seq(dsi, 0xb9, 0x00, 0xc4, 0x23, 0x07);
+	dsi_dcs_write_seq(dsi, 0xbb, 0x02, 0x01, 0x24, 0x00, 0x28, 0x0f,
+			  0x28, 0x04, 0xcc, 0xcc, 0xcc);
+	dsi_dcs_write_seq(dsi, 0xbc, 0x0f, 0x04);
+	dsi_dcs_write_seq(dsi, 0xbe, 0x1e, 0xf2);
+	dsi_dcs_write_seq(dsi, 0xc0, 0x26, 0x03);
+	dsi_dcs_write_seq(dsi, 0xc1, 0x00, 0x12);
+	dsi_dcs_write_seq(dsi, 0xc3, 0x04, 0x02, 0x02, 0x76, 0x01, 0x80,
+			  0x80);
+	dsi_dcs_write_seq(dsi, 0xc4, 0x24, 0x80, 0xb4, 0x81, 0x12, 0x0f,
+			  0x16, 0x00, 0x00);
+	dsi_dcs_write_seq(dsi, 0xc8, 0x7f, 0x72, 0x67, 0x5d, 0x5d, 0x50,
+			  0x56, 0x41, 0x59, 0x57, 0x55, 0x70, 0x5b, 0x5f,
+			  0x4f, 0x47, 0x38, 0x23, 0x08, 0x7f, 0x72, 0x67,
+			  0x5d, 0x5d, 0x50, 0x56, 0x41, 0x59, 0x57, 0x55,
+			  0x70, 0x5b, 0x5f, 0x4f, 0x47, 0x38, 0x23, 0x08);
+	dsi_dcs_write_seq(dsi, 0xd0, 0x1e, 0x1f, 0x57, 0x58, 0x48, 0x4a,
+			  0x44, 0x46, 0x40, 0x1f, 0x42, 0x1f, 0x1f, 0x1f,
+			  0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
+	dsi_dcs_write_seq(dsi, 0xd1, 0x1e, 0x1f, 0x57, 0x58, 0x49, 0x4b,
+			  0x45, 0x47, 0x41, 0x1f, 0x43, 0x1f, 0x1f, 0x1f,
+			  0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
+	dsi_dcs_write_seq(dsi, 0xd2, 0x1f, 0x1e, 0x17, 0x18, 0x07, 0x05,
+			  0x0b, 0x09, 0x03, 0x1f, 0x01, 0x1f, 0x1f, 0x1f,
+			  0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
+	dsi_dcs_write_seq(dsi, 0xd3, 0x1f, 0x1e, 0x17, 0x18, 0x06, 0x04,
+			  0x0a, 0x08, 0x02, 0x1f, 0x00, 0x1f, 0x1f, 0x1f,
+			  0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
+	dsi_dcs_write_seq(dsi, 0xd4, 0x00, 0x00, 0x00, 0x0c, 0x06, 0x20,
+			  0x01, 0x02, 0x00, 0x60, 0x15, 0xb0, 0x30, 0x03,
+			  0x04, 0x00, 0x60, 0x72, 0x0a, 0x00, 0x60, 0x08);
+	dsi_dcs_write_seq(dsi, 0xd5, 0x00, 0x06, 0x06, 0x00, 0x30, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0xbc, 0x50, 0x00, 0x05,
+			  0x21, 0x00, 0x60);
+	dsi_dcs_write_seq(dsi, 0xdd, 0x2c, 0xa3, 0x00);
+	dsi_dcs_write_seq(dsi, 0xde, 0x02);
+	dsi_dcs_write_seq(dsi, 0xb2, 0x32, 0x1c);
+	dsi_dcs_write_seq(dsi, 0xb7, 0x3b, 0x70, 0x00, 0x04);
+	dsi_dcs_write_seq(dsi, 0xc1, 0x11);
+	dsi_dcs_write_seq(dsi, 0xbb, 0x21, 0x22, 0x23, 0x24, 0x36, 0x37);
+	dsi_dcs_write_seq(dsi, 0xc2, 0x20, 0x38, 0x1e, 0x84);
+	dsi_dcs_write_seq(dsi, 0xde, 0x00);
+
+	ret = mipi_dsi_dcs_set_tear_on(dsi, 1);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev, "failed to set tear on: %d\n",
+			      ret);
+		return ret;
+	}
+
+	msleep(60);
+
+	return 0;
+}
+
+static const struct drm_display_mode ltk050h3146w_mode = {
+	.hdisplay	= 720,
+	.hsync_start	= 720 + 42,
+	.hsync_end	= 720 + 42 + 8,
+	.htotal		= 720 + 42 + 8 + 42,
+	.vdisplay	= 1280,
+	.vsync_start	= 1280 + 12,
+	.vsync_end	= 1280 + 12 + 4,
+	.vtotal		= 1280 + 12 + 4 + 18,
+	.clock		= 64018,
+	.width_mm	= 62,
+	.height_mm	= 110,
+};
+
+static const struct ltk050h3146w_desc ltk050h3146w_data = {
+	.mode = &ltk050h3146w_mode,
+	.init = ltk050h3146w_init_sequence,
+};
+
+static int ltk050h3146w_a2_select_page(struct ltk050h3146w *ctx, int page)
+{
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	u8 d[3] = { 0x98, 0x81, page };
+
+	return mipi_dsi_dcs_write(dsi, 0xff, d, ARRAY_SIZE(d));
+}
+
+static int ltk050h3146w_a2_write_page(struct ltk050h3146w *ctx, int page,
+				      const struct ltk050h3146w_cmd *cmds,
+				      int num)
+{
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	int i, ret;
+
+	ret = ltk050h3146w_a2_select_page(ctx, page);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev, "failed to select page %d: %d\n",
+			      page, ret);
+		return ret;
+	}
+
+	for (i = 0; i < num; i++) {
+		ret = mipi_dsi_generic_write(dsi, &cmds[i],
+					     sizeof(struct ltk050h3146w_cmd));
+		if (ret < 0) {
+			DRM_DEV_ERROR(ctx->dev,
+				      "failed to write page %d init cmds: %d\n",
+				       page, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int ltk050h3146w_a2_init_sequence(struct ltk050h3146w *ctx)
+{
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	int ret;
+
+	/*
+	 * Init sequence was supplied by the panel vendor without much
+	 * documentation.
+	 */
+	ret = ltk050h3146w_a2_write_page(ctx, 3, page3_cmds,
+					 ARRAY_SIZE(page3_cmds));
+	if (ret < 0)
+		return ret;
+
+	ret = ltk050h3146w_a2_write_page(ctx, 4, page4_cmds,
+					 ARRAY_SIZE(page4_cmds));
+	if (ret < 0)
+		return ret;
+
+	ret = ltk050h3146w_a2_write_page(ctx, 1, page1_cmds,
+					 ARRAY_SIZE(page1_cmds));
+	if (ret < 0)
+		return ret;
+
+	ret = ltk050h3146w_a2_select_page(ctx, 0);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev, "failed to select page 0: %d\n", ret);
+		return ret;
+	}
+
+	/* vendor code called this without param, where there should be one */
+	ret = mipi_dsi_dcs_set_tear_on(dsi, 0);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev, "failed to set tear on: %d\n",
+			      ret);
+		return ret;
+	}
+
+	msleep(60);
+
+	return 0;
+}
+
+static const struct drm_display_mode ltk050h3146w_a2_mode = {
+	.hdisplay	= 720,
+	.hsync_start	= 720 + 42,
+	.hsync_end	= 720 + 42 + 10,
+	.htotal		= 720 + 42 + 10 + 60,
+	.vdisplay	= 1280,
+	.vsync_start	= 1280 + 18,
+	.vsync_end	= 1280 + 18 + 4,
+	.vtotal		= 1280 + 18 + 4 + 12,
+	.clock		= 65595,
+	.width_mm	= 62,
+	.height_mm	= 110,
+};
+
+static const struct ltk050h3146w_desc ltk050h3146w_a2_data = {
+	.mode = &ltk050h3146w_a2_mode,
+	.init = ltk050h3146w_a2_init_sequence,
+};
+
+static int ltk050h3146w_unprepare(struct drm_panel *panel)
+{
+	struct ltk050h3146w *ctx = panel_to_ltk050h3146w(panel);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	int ret;
+
+	if (!ctx->prepared)
+		return 0;
+
+	ret = mipi_dsi_dcs_set_display_off(dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev, "failed to set display off: %d\n",
+			      ret);
+		return ret;
+	}
+
+	mipi_dsi_dcs_enter_sleep_mode(dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev, "failed to enter sleep mode: %d\n",
+			      ret);
+		return ret;
+	}
+
+	regulator_disable(ctx->iovcc);
+	regulator_disable(ctx->vci);
+
+	ctx->prepared = false;
+
+	return 0;
+}
+
+static int ltk050h3146w_prepare(struct drm_panel *panel)
+{
+	struct ltk050h3146w *ctx = panel_to_ltk050h3146w(panel);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	int ret;
+
+	if (ctx->prepared)
+		return 0;
+
+	DRM_DEV_DEBUG_DRIVER(ctx->dev, "Resetting the panel\n");
+	ret = regulator_enable(ctx->vci);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev,
+			      "Failed to enable vci supply: %d\n", ret);
+		return ret;
+	}
+	ret = regulator_enable(ctx->iovcc);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev,
+			      "Failed to enable iovcc supply: %d\n", ret);
+		goto disable_vci;
+	}
+
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+	usleep_range(5000, 6000);
+	gpiod_set_value_cansleep(ctx->reset_gpio, 0);
+	msleep(20);
+
+	ret = ctx->panel_desc->init(ctx);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev, "Panel init sequence failed: %d\n",
+			      ret);
+		goto disable_iovcc;
+	}
+
+	ret = mipi_dsi_dcs_exit_sleep_mode(dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev, "Failed to exit sleep mode: %d\n", ret);
+		goto disable_iovcc;
+	}
+
+	/* T9: 120ms */
+	msleep(120);
+
+	ret = mipi_dsi_dcs_set_display_on(dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev, "Failed to set display on: %d\n", ret);
+		goto disable_iovcc;
+	}
+
+	msleep(50);
+
+	ctx->prepared = true;
+
+	return 0;
+
+disable_iovcc:
+	regulator_disable(ctx->iovcc);
+disable_vci:
+	regulator_disable(ctx->vci);
+	return ret;
+}
+
+static int ltk050h3146w_get_modes(struct drm_panel *panel,
+				  struct drm_connector *connector)
+{
+	struct ltk050h3146w *ctx = panel_to_ltk050h3146w(panel);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(connector->dev, ctx->panel_desc->mode);
+	if (!mode)
+		return -ENOMEM;
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	connector->display_info.width_mm = mode->width_mm;
+	connector->display_info.height_mm = mode->height_mm;
+	drm_mode_probed_add(connector, mode);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs ltk050h3146w_funcs = {
+	.unprepare	= ltk050h3146w_unprepare,
+	.prepare	= ltk050h3146w_prepare,
+	.get_modes	= ltk050h3146w_get_modes,
+};
+
+static int ltk050h3146w_probe(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	struct ltk050h3146w *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->panel_desc = of_device_get_match_data(dev);
+	if (!ctx->panel_desc)
+		return -EINVAL;
+
+	ctx->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ctx->reset_gpio)) {
+		DRM_DEV_ERROR(dev, "cannot get reset gpio\n");
+		return PTR_ERR(ctx->reset_gpio);
+	}
+
+	ctx->vci = devm_regulator_get(dev, "vci");
+	if (IS_ERR(ctx->vci)) {
+		ret = PTR_ERR(ctx->vci);
+		if (ret != -EPROBE_DEFER)
+			DRM_DEV_ERROR(dev,
+				      "Failed to request vci regulator: %d\n",
+				      ret);
+		return ret;
+	}
+
+	ctx->iovcc = devm_regulator_get(dev, "iovcc");
+	if (IS_ERR(ctx->iovcc)) {
+		ret = PTR_ERR(ctx->iovcc);
+		if (ret != -EPROBE_DEFER)
+			DRM_DEV_ERROR(dev,
+				      "Failed to request iovcc regulator: %d\n",
+				      ret);
+		return ret;
+	}
+
+	mipi_dsi_set_drvdata(dsi, ctx);
+
+	ctx->dev = dev;
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+			  MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_EOT_PACKET;
+
+	drm_panel_init(&ctx->panel, &dsi->dev, &ltk050h3146w_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+
+	ret = drm_panel_of_backlight(&ctx->panel);
+	if (ret)
+		return ret;
+
+	drm_panel_add(&ctx->panel);
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "mipi_dsi_attach failed: %d\n", ret);
+		drm_panel_remove(&ctx->panel);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void ltk050h3146w_shutdown(struct mipi_dsi_device *dsi)
+{
+	struct ltk050h3146w *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = drm_panel_unprepare(&ctx->panel);
+	if (ret < 0)
+		DRM_DEV_ERROR(&dsi->dev, "Failed to unprepare panel: %d\n",
+			      ret);
+
+	ret = drm_panel_disable(&ctx->panel);
+	if (ret < 0)
+		DRM_DEV_ERROR(&dsi->dev, "Failed to disable panel: %d\n",
+			      ret);
+}
+
+static int ltk050h3146w_remove(struct mipi_dsi_device *dsi)
+{
+	struct ltk050h3146w *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ltk050h3146w_shutdown(dsi);
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		DRM_DEV_ERROR(&dsi->dev, "Failed to detach from DSI host: %d\n",
+			      ret);
+
+	drm_panel_remove(&ctx->panel);
+
+	return 0;
+}
+
+static const struct of_device_id ltk050h3146w_of_match[] = {
+	{
+		.compatible = "leadtek,ltk050h3146w",
+		.data = &ltk050h3146w_data,
+	},
+	{
+		.compatible = "leadtek,ltk050h3146w-a2",
+		.data = &ltk050h3146w_a2_data,
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ltk050h3146w_of_match);
+
+static struct mipi_dsi_driver ltk050h3146w_driver = {
+	.driver = {
+		.name = "panel-leadtek-ltk050h3146w",
+		.of_match_table = ltk050h3146w_of_match,
+	},
+	.probe	= ltk050h3146w_probe,
+	.remove = ltk050h3146w_remove,
+	.shutdown = ltk050h3146w_shutdown,
+};
+module_mipi_dsi_driver(ltk050h3146w_driver);
+
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@theobroma-systems.com>");
+MODULE_DESCRIPTION("DRM driver for Leadtek LTK050H3146W MIPI DSI panel");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-leadtek-ltk500hd1829.c b/drivers/gpu/drm/panel/panel-leadtek-ltk500hd1829.c
index 76ecf2de9c44..113ab9c0396b 100644
--- a/drivers/gpu/drm/panel/panel-leadtek-ltk500hd1829.c
+++ b/drivers/gpu/drm/panel/panel-leadtek-ltk500hd1829.c
@@ -377,7 +377,7 @@ static const struct drm_display_mode default_mode = {
 	.vsync_end	= 1280 + 30 + 4,
 	.vtotal		= 1280 + 30 + 4 + 12,
 	.vrefresh	= 60,
-	.clock		= 41600,
+	.clock		= 69217,
 	.width_mm	= 62,
 	.height_mm	= 110,
 };
diff --git a/drivers/gpu/drm/panel/panel-novatek-nt39016.c b/drivers/gpu/drm/panel/panel-novatek-nt39016.c
index a470810f7dbe..05cae8d62d56 100644
--- a/drivers/gpu/drm/panel/panel-novatek-nt39016.c
+++ b/drivers/gpu/drm/panel/panel-novatek-nt39016.c
@@ -49,7 +49,8 @@ enum nt39016_regs {
 #define NT39016_SYSTEM_STANDBY	BIT(1)
 
 struct nt39016_panel_info {
-	struct drm_display_mode display_mode;
+	const struct drm_display_mode *display_modes;
+	unsigned int num_modes;
 	u16 width_mm, height_mm;
 	u32 bus_format, bus_flags;
 };
@@ -212,15 +213,22 @@ static int nt39016_get_modes(struct drm_panel *drm_panel,
 	struct nt39016 *panel = to_nt39016(drm_panel);
 	const struct nt39016_panel_info *panel_info = panel->panel_info;
 	struct drm_display_mode *mode;
+	unsigned int i;
 
-	mode = drm_mode_duplicate(connector->dev, &panel_info->display_mode);
-	if (!mode)
-		return -ENOMEM;
+	for (i = 0; i < panel_info->num_modes; i++) {
+		mode = drm_mode_duplicate(connector->dev,
+					  &panel_info->display_modes[i]);
+		if (!mode)
+			return -ENOMEM;
+
+		drm_mode_set_name(mode);
 
-	drm_mode_set_name(mode);
+		mode->type = DRM_MODE_TYPE_DRIVER;
+		if (panel_info->num_modes == 1)
+			mode->type |= DRM_MODE_TYPE_PREFERRED;
 
-	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
-	drm_mode_probed_add(connector, mode);
+		drm_mode_probed_add(connector, mode);
+	}
 
 	connector->display_info.bpc = 8;
 	connector->display_info.width_mm = panel_info->width_mm;
@@ -230,7 +238,7 @@ static int nt39016_get_modes(struct drm_panel *drm_panel,
 					 &panel_info->bus_format, 1);
 	connector->display_info.bus_flags = panel_info->bus_flags;
 
-	return 1;
+	return panel_info->num_modes;
 }
 
 static const struct drm_panel_funcs nt39016_funcs = {
@@ -316,8 +324,8 @@ static int nt39016_remove(struct spi_device *spi)
 	return 0;
 }
 
-static const struct nt39016_panel_info kd035g6_info = {
-	.display_mode = {
+static const struct drm_display_mode kd035g6_display_modes[] = {
+	{	/* 60 Hz */
 		.clock = 6000,
 		.hdisplay = 320,
 		.hsync_start = 320 + 10,
@@ -330,6 +338,24 @@ static const struct nt39016_panel_info kd035g6_info = {
 		.vrefresh = 60,
 		.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
 	},
+	{	/* 50 Hz */
+		.clock = 5400,
+		.hdisplay = 320,
+		.hsync_start = 320 + 42,
+		.hsync_end = 320 + 42 + 50,
+		.htotal = 320 + 42 + 50 + 20,
+		.vdisplay = 240,
+		.vsync_start = 240 + 5,
+		.vsync_end = 240 + 5 + 1,
+		.vtotal = 240 + 5 + 1 + 4,
+		.vrefresh = 50,
+		.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+	},
+};
+
+static const struct nt39016_panel_info kd035g6_info = {
+	.display_modes = kd035g6_display_modes,
+	.num_modes = ARRAY_SIZE(kd035g6_display_modes),
 	.width_mm = 71,
 	.height_mm = 53,
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 3ad828eaefe1..b6ecd1552132 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -23,6 +23,7 @@
 
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
@@ -108,6 +109,7 @@ struct panel_simple {
 	struct i2c_adapter *ddc;
 
 	struct gpio_desc *enable_gpio;
+	struct gpio_desc *hpd_gpio;
 
 	struct drm_display_mode override_mode;
 };
@@ -259,11 +261,37 @@ static int panel_simple_unprepare(struct drm_panel *panel)
 	return 0;
 }
 
+static int panel_simple_get_hpd_gpio(struct device *dev,
+				     struct panel_simple *p, bool from_probe)
+{
+	int err;
+
+	p->hpd_gpio = devm_gpiod_get_optional(dev, "hpd", GPIOD_IN);
+	if (IS_ERR(p->hpd_gpio)) {
+		err = PTR_ERR(p->hpd_gpio);
+
+		/*
+		 * If we're called from probe we won't consider '-EPROBE_DEFER'
+		 * to be an error--we'll leave the error code in "hpd_gpio".
+		 * When we try to use it we'll try again.  This allows for
+		 * circular dependencies where the component providing the
+		 * hpd gpio needs the panel to init before probing.
+		 */
+		if (err != -EPROBE_DEFER || !from_probe) {
+			dev_err(dev, "failed to get 'hpd' GPIO: %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
 static int panel_simple_prepare(struct drm_panel *panel)
 {
 	struct panel_simple *p = to_panel_simple(panel);
 	unsigned int delay;
 	int err;
+	int hpd_asserted;
 
 	if (p->prepared)
 		return 0;
@@ -282,6 +310,26 @@ static int panel_simple_prepare(struct drm_panel *panel)
 	if (delay)
 		msleep(delay);
 
+	if (p->hpd_gpio) {
+		if (IS_ERR(p->hpd_gpio)) {
+			err = panel_simple_get_hpd_gpio(panel->dev, p, false);
+			if (err)
+				return err;
+		}
+
+		err = readx_poll_timeout(gpiod_get_value_cansleep, p->hpd_gpio,
+					 hpd_asserted, hpd_asserted,
+					 1000, 2000000);
+		if (hpd_asserted < 0)
+			err = hpd_asserted;
+
+		if (err) {
+			dev_err(panel->dev,
+				"error waiting for hpd GPIO: %d\n", err);
+			return err;
+		}
+	}
+
 	p->prepared = true;
 
 	return 0;
@@ -462,6 +510,11 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc)
 	panel->desc = desc;
 
 	panel->no_hpd = of_property_read_bool(dev->of_node, "no-hpd");
+	if (!panel->no_hpd) {
+		err = panel_simple_get_hpd_gpio(dev, panel, true);
+		if (err)
+			return err;
+	}
 
 	panel->supply = devm_regulator_get(dev, "power");
 	if (IS_ERR(panel->supply))
@@ -836,7 +889,8 @@ static const struct panel_desc auo_g101evn010 = {
 		.width = 216,
 		.height = 135,
 	},
-	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG,
+	.connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
 static const struct drm_display_mode auo_g104sn02_mode = {
@@ -862,6 +916,31 @@ static const struct panel_desc auo_g104sn02 = {
 	},
 };
 
+static const struct drm_display_mode auo_g121ean01_mode = {
+	.clock = 66700,
+	.hdisplay = 1280,
+	.hsync_start = 1280 + 58,
+	.hsync_end = 1280 + 58 + 8,
+	.htotal = 1280 + 58 + 8 + 70,
+	.vdisplay = 800,
+	.vsync_start = 800 + 6,
+	.vsync_end = 800 + 6 + 4,
+	.vtotal = 800 + 6 + 4 + 10,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc auo_g121ean01 = {
+	.modes = &auo_g121ean01_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 261,
+		.height = 163,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+};
+
 static const struct display_timing auo_g133han01_timings = {
 	.pixelclock = { 134000000, 141200000, 149000000 },
 	.hactive = { 1920, 1920, 1920 },
@@ -892,6 +971,31 @@ static const struct panel_desc auo_g133han01 = {
 	.connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
+static const struct drm_display_mode auo_g156xtn01_mode = {
+	.clock = 76000,
+	.hdisplay = 1366,
+	.hsync_start = 1366 + 33,
+	.hsync_end = 1366 + 33 + 67,
+	.htotal = 1560,
+	.vdisplay = 768,
+	.vsync_start = 768 + 4,
+	.vsync_end = 768 + 4 + 4,
+	.vtotal = 806,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc auo_g156xtn01 = {
+	.modes = &auo_g156xtn01_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 344,
+		.height = 194,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+};
+
 static const struct display_timing auo_g185han01_timings = {
 	.pixelclock = { 120000000, 144000000, 175000000 },
 	.hactive = { 1920, 1920, 1920 },
@@ -922,6 +1026,36 @@ static const struct panel_desc auo_g185han01 = {
 	.connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
+static const struct display_timing auo_g190ean01_timings = {
+	.pixelclock = { 90000000, 108000000, 135000000 },
+	.hactive = { 1280, 1280, 1280 },
+	.hfront_porch = { 126, 184, 1266 },
+	.hback_porch = { 84, 122, 844 },
+	.hsync_len = { 70, 102, 704 },
+	.vactive = { 1024, 1024, 1024 },
+	.vfront_porch = { 4, 26, 76 },
+	.vback_porch = { 2, 8, 25 },
+	.vsync_len = { 2, 8, 25 },
+};
+
+static const struct panel_desc auo_g190ean01 = {
+	.timings = &auo_g190ean01_timings,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 376,
+		.height = 301,
+	},
+	.delay = {
+		.prepare = 50,
+		.enable = 200,
+		.disable = 110,
+		.unprepare = 1000,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+};
+
 static const struct display_timing auo_p320hvn03_timings = {
 	.pixelclock = { 106000000, 148500000, 164000000 },
 	.hactive = { 1920, 1920, 1920 },
@@ -1092,6 +1226,38 @@ static const struct panel_desc boe_nv101wxmn51 = {
 	},
 };
 
+/* Also used for boe_nv133fhm_n62 */
+static const struct drm_display_mode boe_nv133fhm_n61_modes = {
+	.clock = 147840,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 48,
+	.hsync_end = 1920 + 48 + 32,
+	.htotal = 1920 + 48 + 32 + 200,
+	.vdisplay = 1080,
+	.vsync_start = 1080 + 3,
+	.vsync_end = 1080 + 3 + 6,
+	.vtotal = 1080 + 3 + 6 + 31,
+	.vrefresh = 60,
+};
+
+/* Also used for boe_nv133fhm_n62 */
+static const struct panel_desc boe_nv133fhm_n61 = {
+	.modes = &boe_nv133fhm_n61_modes,
+	.num_modes = 1,
+	.bpc = 6,
+	.size = {
+		.width = 294,
+		.height = 165,
+	},
+	.delay = {
+		.hpd_absent_delay = 200,
+		.unprepare = 500,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DATA_MSB_TO_LSB,
+	.connector_type = DRM_MODE_CONNECTOR_eDP,
+};
+
 static const struct drm_display_mode boe_nv140fhmn49_modes[] = {
 	{
 		.clock = 148500,
@@ -1980,6 +2146,37 @@ static const struct panel_desc innolux_zj070na_01p = {
 	},
 };
 
+static const struct drm_display_mode ivo_m133nwf4_r0_mode = {
+	.clock = 138778,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 24,
+	.hsync_end = 1920 + 24 + 48,
+	.htotal = 1920 + 24 + 48 + 88,
+	.vdisplay = 1080,
+	.vsync_start = 1080 + 3,
+	.vsync_end = 1080 + 3 + 12,
+	.vtotal = 1080 + 3 + 12 + 17,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC,
+};
+
+static const struct panel_desc ivo_m133nwf4_r0 = {
+	.modes = &ivo_m133nwf4_r0_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 294,
+		.height = 165,
+	},
+	.delay = {
+		.hpd_absent_delay = 200,
+		.unprepare = 500,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DATA_MSB_TO_LSB,
+	.connector_type = DRM_MODE_CONNECTOR_eDP,
+};
+
 static const struct display_timing koe_tx14d24vm1bpa_timing = {
 	.pixelclock = { 5580000, 5850000, 6200000 },
 	.hactive = { 320, 320, 320 },
@@ -2168,6 +2365,7 @@ static const struct panel_desc lg_lp120up1 = {
 		.width = 267,
 		.height = 183,
 	},
+	.connector_type = DRM_MODE_CONNECTOR_eDP,
 };
 
 static const struct drm_display_mode lg_lp129qe_mode = {
@@ -3065,6 +3263,32 @@ static const struct panel_desc shelly_sca07010_bfn_lnn = {
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
 };
 
+static const struct drm_display_mode starry_kr070pe2t_mode = {
+	.clock = 33000,
+	.hdisplay = 800,
+	.hsync_start = 800 + 209,
+	.hsync_end = 800 + 209 + 1,
+	.htotal = 800 + 209 + 1 + 45,
+	.vdisplay = 480,
+	.vsync_start = 480 + 22,
+	.vsync_end = 480 + 22 + 1,
+	.vtotal = 480 + 22 + 1 + 22,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc starry_kr070pe2t = {
+	.modes = &starry_kr070pe2t_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 152,
+		.height = 86,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+};
+
 static const struct drm_display_mode starry_kr122ea0sra_mode = {
 	.clock = 147000,
 	.hdisplay = 1920,
@@ -3455,12 +3679,21 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "auo,g104sn02",
 		.data = &auo_g104sn02,
 	}, {
+		.compatible = "auo,g121ean01",
+		.data = &auo_g121ean01,
+	}, {
 		.compatible = "auo,g133han01",
 		.data = &auo_g133han01,
 	}, {
+		.compatible = "auo,g156xtn01",
+		.data = &auo_g156xtn01,
+	}, {
 		.compatible = "auo,g185han01",
 		.data = &auo_g185han01,
 	}, {
+		.compatible = "auo,g190ean01",
+		.data = &auo_g190ean01,
+	}, {
 		.compatible = "auo,p320hvn03",
 		.data = &auo_p320hvn03,
 	}, {
@@ -3479,6 +3712,12 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "boe,nv101wxmn51",
 		.data = &boe_nv101wxmn51,
 	}, {
+		.compatible = "boe,nv133fhm-n61",
+		.data = &boe_nv133fhm_n61,
+	}, {
+		.compatible = "boe,nv133fhm-n62",
+		.data = &boe_nv133fhm_n61,
+	}, {
 		.compatible = "boe,nv140fhmn49",
 		.data = &boe_nv140fhmn49,
 	}, {
@@ -3587,6 +3826,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "innolux,zj070na-01p",
 		.data = &innolux_zj070na_01p,
 	}, {
+		.compatible = "ivo,m133nwf4-r0",
+		.data = &ivo_m133nwf4_r0,
+	}, {
 		.compatible = "koe,tx14d24vm1bpa",
 		.data = &koe_tx14d24vm1bpa,
 	}, {
@@ -3716,6 +3958,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "shelly,sca07010-bfn-lnn",
 		.data = &shelly_sca07010_bfn_lnn,
 	}, {
+		.compatible = "starry,kr070pe2t",
+		.data = &starry_kr070pe2t,
+	}, {
 		.compatible = "starry,kr122ea0sra",
 		.data = &starry_kr122ea0sra,
 	}, {
diff --git a/drivers/gpu/drm/panel/panel-truly-nt35597.c b/drivers/gpu/drm/panel/panel-truly-nt35597.c
index 012ca62bf30e..f0ad6081570f 100644
--- a/drivers/gpu/drm/panel/panel-truly-nt35597.c
+++ b/drivers/gpu/drm/panel/panel-truly-nt35597.c
@@ -490,9 +490,7 @@ static int truly_nt35597_panel_add(struct truly_nt35597 *ctx)
 {
 	struct device *dev = ctx->dev;
 	int ret, i;
-	const struct nt35597_config *config;
 
-	config = ctx->config;
 	for (i = 0; i < ARRAY_SIZE(ctx->supplies); i++)
 		ctx->supplies[i].supply = regulator_names[i];
 
diff --git a/drivers/gpu/drm/panel/panel-visionox-rm69299.c b/drivers/gpu/drm/panel/panel-visionox-rm69299.c
new file mode 100644
index 000000000000..42f299ad3804
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-visionox-rm69299.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/regulator/consumer.h>
+
+#include <video/mipi_display.h>
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
+struct visionox_rm69299 {
+	struct drm_panel panel;
+	struct regulator_bulk_data supplies[2];
+	struct gpio_desc *reset_gpio;
+	struct mipi_dsi_device *dsi;
+	bool prepared;
+	bool enabled;
+};
+
+static inline struct visionox_rm69299 *panel_to_ctx(struct drm_panel *panel)
+{
+	return container_of(panel, struct visionox_rm69299, panel);
+}
+
+static int visionox_rm69299_power_on(struct visionox_rm69299 *ctx)
+{
+	int ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Reset sequence of visionox panel requires the panel to be
+	 * out of reset for 10ms, followed by being held in reset
+	 * for 10ms and then out again
+	 */
+	gpiod_set_value(ctx->reset_gpio, 1);
+	usleep_range(10000, 20000);
+	gpiod_set_value(ctx->reset_gpio, 0);
+	usleep_range(10000, 20000);
+	gpiod_set_value(ctx->reset_gpio, 1);
+	usleep_range(10000, 20000);
+
+	return 0;
+}
+
+static int visionox_rm69299_power_off(struct visionox_rm69299 *ctx)
+{
+	gpiod_set_value(ctx->reset_gpio, 0);
+
+	return regulator_bulk_disable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
+}
+
+static int visionox_rm69299_unprepare(struct drm_panel *panel)
+{
+	struct visionox_rm69299 *ctx = panel_to_ctx(panel);
+	int ret;
+
+	ctx->dsi->mode_flags = 0;
+
+	ret = mipi_dsi_dcs_write(ctx->dsi, MIPI_DCS_SET_DISPLAY_OFF, NULL, 0);
+	if (ret < 0)
+		DRM_DEV_ERROR(ctx->panel.dev,
+			      "set_display_off cmd failed ret = %d\n", ret);
+
+	/* 120ms delay required here as per DCS spec */
+	msleep(120);
+
+	ret = mipi_dsi_dcs_write(ctx->dsi, MIPI_DCS_ENTER_SLEEP_MODE, NULL, 0);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->panel.dev,
+			      "enter_sleep cmd failed ret = %d\n", ret);
+	}
+
+	ret = visionox_rm69299_power_off(ctx);
+
+	ctx->prepared = false;
+	return ret;
+}
+
+static int visionox_rm69299_prepare(struct drm_panel *panel)
+{
+	struct visionox_rm69299 *ctx = panel_to_ctx(panel);
+	int ret;
+
+	if (ctx->prepared)
+		return 0;
+
+	ret = visionox_rm69299_power_on(ctx);
+	if (ret < 0)
+		return ret;
+
+	ctx->dsi->mode_flags |= MIPI_DSI_MODE_LPM;
+
+	ret = mipi_dsi_dcs_write_buffer(ctx->dsi, (u8[]) { 0xfe, 0x00 }, 2);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->panel.dev,
+			      "cmd set tx 0 failed, ret = %d\n", ret);
+		goto power_off;
+	}
+
+	ret = mipi_dsi_dcs_write_buffer(ctx->dsi, (u8[]) { 0xc2, 0x08 }, 2);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->panel.dev,
+			      "cmd set tx 1 failed, ret = %d\n", ret);
+		goto power_off;
+	}
+
+	ret = mipi_dsi_dcs_write_buffer(ctx->dsi, (u8[]) { 0x35, 0x00 }, 2);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->panel.dev,
+			      "cmd set tx 2 failed, ret = %d\n", ret);
+		goto power_off;
+	}
+
+	ret = mipi_dsi_dcs_write_buffer(ctx->dsi, (u8[]) { 0x51, 0xff }, 2);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->panel.dev,
+			      "cmd set tx 3 failed, ret = %d\n", ret);
+		goto power_off;
+	}
+
+	ret = mipi_dsi_dcs_write(ctx->dsi, MIPI_DCS_EXIT_SLEEP_MODE, NULL, 0);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->panel.dev,
+			      "exit_sleep_mode cmd failed ret = %d\n", ret);
+		goto power_off;
+	}
+
+	/* Per DSI spec wait 120ms after sending exit sleep DCS command */
+	msleep(120);
+
+	ret = mipi_dsi_dcs_write(ctx->dsi, MIPI_DCS_SET_DISPLAY_ON, NULL, 0);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->panel.dev,
+			      "set_display_on cmd failed ret = %d\n", ret);
+		goto power_off;
+	}
+
+	/* Per DSI spec wait 120ms after sending set_display_on DCS command */
+	msleep(120);
+
+	ctx->prepared = true;
+
+	return 0;
+
+power_off:
+	return ret;
+}
+
+static const struct drm_display_mode visionox_rm69299_1080x2248_60hz = {
+	.name = "1080x2248",
+	.clock = 158695,
+	.hdisplay = 1080,
+	.hsync_start = 1080 + 26,
+	.hsync_end = 1080 + 26 + 2,
+	.htotal = 1080 + 26 + 2 + 36,
+	.vdisplay = 2248,
+	.vsync_start = 2248 + 56,
+	.vsync_end = 2248 + 56 + 4,
+	.vtotal = 2248 + 56 + 4 + 4,
+	.vrefresh = 60,
+	.flags = 0,
+};
+
+static int visionox_rm69299_get_modes(struct drm_panel *panel,
+				      struct drm_connector *connector)
+{
+	struct visionox_rm69299 *ctx = panel_to_ctx(panel);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_create(connector->dev);
+	if (!mode) {
+		DRM_DEV_ERROR(ctx->panel.dev,
+			      "failed to create a new display mode\n");
+		return 0;
+	}
+
+	connector->display_info.width_mm = 74;
+	connector->display_info.height_mm = 131;
+	drm_mode_copy(mode, &visionox_rm69299_1080x2248_60hz);
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	drm_mode_probed_add(connector, mode);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs visionox_rm69299_drm_funcs = {
+	.unprepare = visionox_rm69299_unprepare,
+	.prepare = visionox_rm69299_prepare,
+	.get_modes = visionox_rm69299_get_modes,
+};
+
+static int visionox_rm69299_probe(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	struct visionox_rm69299 *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	mipi_dsi_set_drvdata(dsi, ctx);
+
+	ctx->panel.dev = dev;
+	ctx->dsi = dsi;
+
+	ctx->supplies[0].supply = "vdda";
+	ctx->supplies[1].supply = "vdd3p3";
+
+	ret = devm_regulator_bulk_get(ctx->panel.dev, ARRAY_SIZE(ctx->supplies),
+				      ctx->supplies);
+	if (ret < 0)
+		return ret;
+
+	ctx->reset_gpio = devm_gpiod_get(ctx->panel.dev,
+					 "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ctx->reset_gpio)) {
+		DRM_DEV_ERROR(dev, "cannot get reset gpio %ld\n",
+			      PTR_ERR(ctx->reset_gpio));
+		return PTR_ERR(ctx->reset_gpio);
+	}
+
+	drm_panel_init(&ctx->panel, dev, &visionox_rm69299_drm_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+	ctx->panel.dev = dev;
+	ctx->panel.funcs = &visionox_rm69299_drm_funcs;
+	drm_panel_add(&ctx->panel);
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_LPM |
+			  MIPI_DSI_CLOCK_NON_CONTINUOUS;
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "dsi attach failed ret = %d\n", ret);
+		goto err_dsi_attach;
+	}
+
+	ret = regulator_set_load(ctx->supplies[0].consumer, 32000);
+	if (ret) {
+		DRM_DEV_ERROR(dev,
+			      "regulator set load failed for vdda supply ret = %d\n",
+			      ret);
+		goto err_set_load;
+	}
+
+	ret = regulator_set_load(ctx->supplies[1].consumer, 13200);
+	if (ret) {
+		DRM_DEV_ERROR(dev,
+			      "regulator set load failed for vdd3p3 supply ret = %d\n",
+			      ret);
+		goto err_set_load;
+	}
+
+	return 0;
+
+err_set_load:
+	mipi_dsi_detach(dsi);
+err_dsi_attach:
+	drm_panel_remove(&ctx->panel);
+	return ret;
+}
+
+static int visionox_rm69299_remove(struct mipi_dsi_device *dsi)
+{
+	struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi);
+
+	mipi_dsi_detach(ctx->dsi);
+	mipi_dsi_device_unregister(ctx->dsi);
+
+	drm_panel_remove(&ctx->panel);
+	return 0;
+}
+
+static const struct of_device_id visionox_rm69299_of_match[] = {
+	{ .compatible = "visionox,rm69299-1080p-display", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, visionox_rm69299_of_match);
+
+static struct mipi_dsi_driver visionox_rm69299_driver = {
+	.driver = {
+		.name = "panel-visionox-rm69299",
+		.of_match_table = visionox_rm69299_of_match,
+	},
+	.probe = visionox_rm69299_probe,
+	.remove = visionox_rm69299_remove,
+};
+module_mipi_dsi_driver(visionox_rm69299_driver);
+
+MODULE_DESCRIPTION("Visionox RM69299 DSI Panel Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile
index 0c70f0e91d21..67d430d433e0 100644
--- a/drivers/gpu/drm/pl111/Makefile
+++ b/drivers/gpu/drm/pl111/Makefile
@@ -3,7 +3,6 @@ pl111_drm-y +=	pl111_display.o \
 		pl111_versatile.o \
 		pl111_drv.o
 
-pl111_drm-$(CONFIG_ARCH_VEXPRESS) += pl111_vexpress.o
 pl111_drm-$(CONFIG_ARCH_NOMADIK) += pl111_nomadik.o
 pl111_drm-$(CONFIG_DEBUG_FS) += pl111_debugfs.o
 
diff --git a/drivers/gpu/drm/pl111/pl111_debugfs.c b/drivers/gpu/drm/pl111/pl111_debugfs.c
index 3c8e82016854..26ca8cdf3e60 100644
--- a/drivers/gpu/drm/pl111/pl111_debugfs.c
+++ b/drivers/gpu/drm/pl111/pl111_debugfs.c
@@ -51,10 +51,10 @@ static const struct drm_info_list pl111_debugfs_list[] = {
 	{"regs", pl111_debugfs_regs, 0},
 };
 
-int
+void
 pl111_debugfs_init(struct drm_minor *minor)
 {
-	return drm_debugfs_create_files(pl111_debugfs_list,
-					ARRAY_SIZE(pl111_debugfs_list),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(pl111_debugfs_list,
+				 ARRAY_SIZE(pl111_debugfs_list),
+				 minor->debugfs_root, minor);
 }
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
index 77d2da9a8a7c..ba399bcb792f 100644
--- a/drivers/gpu/drm/pl111/pl111_drm.h
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -84,6 +84,6 @@ struct pl111_drm_dev_private {
 
 int pl111_display_init(struct drm_device *dev);
 irqreturn_t pl111_irq(int irq, void *data);
-int pl111_debugfs_init(struct drm_minor *minor);
+void pl111_debugfs_init(struct drm_minor *minor);
 
 #endif /* _PL111_DRM_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c
index aa8aa8d9e405..da0c39dae874 100644
--- a/drivers/gpu/drm/pl111/pl111_drv.c
+++ b/drivers/gpu/drm/pl111/pl111_drv.c
@@ -90,10 +90,13 @@ static int pl111_modeset_init(struct drm_device *dev)
 	struct drm_panel *panel = NULL;
 	struct drm_bridge *bridge = NULL;
 	bool defer = false;
-	int ret = 0;
+	int ret;
 	int i;
 
-	drm_mode_config_init(dev);
+	ret = drmm_mode_config_init(dev);
+	if (ret)
+		return ret;
+
 	mode_config = &dev->mode_config;
 	mode_config->funcs = &mode_config_funcs;
 	mode_config->min_width = 1;
@@ -154,7 +157,7 @@ static int pl111_modeset_init(struct drm_device *dev)
 						    DRM_MODE_CONNECTOR_Unknown);
 		if (IS_ERR(bridge)) {
 			ret = PTR_ERR(bridge);
-			goto out_config;
+			goto finish;
 		}
 	} else if (bridge) {
 		dev_info(dev->dev, "Using non-panel bridge\n");
@@ -197,8 +200,6 @@ static int pl111_modeset_init(struct drm_device *dev)
 out_bridge:
 	if (panel)
 		drm_panel_bridge_remove(bridge);
-out_config:
-	drm_mode_config_cleanup(dev);
 finish:
 	return ret;
 }
@@ -343,7 +344,6 @@ static int pl111_amba_remove(struct amba_device *amba_dev)
 	drm_dev_unregister(drm);
 	if (priv->panel)
 		drm_panel_bridge_remove(priv->bridge);
-	drm_mode_config_cleanup(drm);
 	drm_dev_put(drm);
 	of_reserved_mem_device_release(dev);
 
@@ -444,6 +444,7 @@ static const struct amba_id pl111_id_table[] = {
 	},
 	{0, 0},
 };
+MODULE_DEVICE_TABLE(amba, pl111_id_table);
 
 static struct amba_driver pl111_amba_driver __maybe_unused = {
 	.drv = {
diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c
index 4f325c410b5d..64f01a4e6767 100644
--- a/drivers/gpu/drm/pl111/pl111_versatile.c
+++ b/drivers/gpu/drm/pl111/pl111_versatile.c
@@ -8,9 +8,9 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/regmap.h>
+#include <linux/vexpress.h>
 
 #include "pl111_versatile.h"
-#include "pl111_vexpress.h"
 #include "pl111_drm.h"
 
 static struct regmap *versatile_syscon_map;
@@ -361,13 +361,110 @@ static const struct pl111_variant_data pl111_vexpress = {
 	.broken_clockdivider = true,
 };
 
+#define VEXPRESS_FPGAMUX_MOTHERBOARD		0x00
+#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_1	0x01
+#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_2	0x02
+
+static int pl111_vexpress_clcd_init(struct device *dev, struct device_node *np,
+				    struct pl111_drm_dev_private *priv)
+{
+	struct platform_device *pdev;
+	struct device_node *root;
+	struct device_node *child;
+	struct device_node *ct_clcd = NULL;
+	struct regmap *map;
+	bool has_coretile_clcd = false;
+	bool has_coretile_hdlcd = false;
+	bool mux_motherboard = true;
+	u32 val;
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_VEXPRESS_CONFIG))
+		return -ENODEV;
+
+	/*
+	 * Check if we have a CLCD or HDLCD on the core tile by checking if a
+	 * CLCD or HDLCD is available in the root of the device tree.
+	 */
+	root = of_find_node_by_path("/");
+	if (!root)
+		return -EINVAL;
+
+	for_each_available_child_of_node(root, child) {
+		if (of_device_is_compatible(child, "arm,pl111")) {
+			has_coretile_clcd = true;
+			ct_clcd = child;
+			break;
+		}
+		if (of_device_is_compatible(child, "arm,hdlcd")) {
+			has_coretile_hdlcd = true;
+			of_node_put(child);
+			break;
+		}
+	}
+
+	of_node_put(root);
+
+	/*
+	 * If there is a coretile HDLCD and it has a driver,
+	 * do not mux the CLCD on the motherboard to the DVI.
+	 */
+	if (has_coretile_hdlcd && IS_ENABLED(CONFIG_DRM_HDLCD))
+		mux_motherboard = false;
+
+	/*
+	 * On the Vexpress CA9 we let the CLCD on the coretile
+	 * take precedence, so also in this case do not mux the
+	 * motherboard to the DVI.
+	 */
+	if (has_coretile_clcd)
+		mux_motherboard = false;
+
+	if (mux_motherboard) {
+		dev_info(dev, "DVI muxed to motherboard CLCD\n");
+		val = VEXPRESS_FPGAMUX_MOTHERBOARD;
+	} else if (ct_clcd == dev->of_node) {
+		dev_info(dev,
+			 "DVI muxed to daughterboard 1 (core tile) CLCD\n");
+		val = VEXPRESS_FPGAMUX_DAUGHTERBOARD_1;
+	} else {
+		dev_info(dev, "core tile graphics present\n");
+		dev_info(dev, "this device will be deactivated\n");
+		return -ENODEV;
+	}
+
+	/* Call into deep Vexpress configuration API */
+	pdev = of_find_device_by_node(np);
+	if (!pdev) {
+		dev_err(dev, "can't find the sysreg device, deferring\n");
+		return -EPROBE_DEFER;
+	}
+
+	map = devm_regmap_init_vexpress_config(&pdev->dev);
+	if (IS_ERR(map)) {
+		platform_device_put(pdev);
+		return PTR_ERR(map);
+	}
+
+	ret = regmap_write(map, 0, val);
+	platform_device_put(pdev);
+	if (ret) {
+		dev_err(dev, "error setting DVI muxmode\n");
+		return -ENODEV;
+	}
+
+	priv->variant = &pl111_vexpress;
+	dev_info(dev, "initializing Versatile Express PL111\n");
+
+	return 0;
+}
+
 int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv)
 {
 	const struct of_device_id *clcd_id;
 	enum versatile_clcd versatile_clcd_type;
 	struct device_node *np;
 	struct regmap *map;
-	int ret;
 
 	np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match,
 					     &clcd_id);
@@ -378,6 +475,15 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv)
 
 	versatile_clcd_type = (enum versatile_clcd)clcd_id->data;
 
+	/* Versatile Express special handling */
+	if (versatile_clcd_type == VEXPRESS_CLCD_V2M) {
+		int ret = pl111_vexpress_clcd_init(dev, np, priv);
+		of_node_put(np);
+		if (ret)
+			dev_err(dev, "Versatile Express init failed - %d", ret);
+		return ret;
+	}
+
 	/*
 	 * On the Integrator, check if we should use the IM-PD1 instead,
 	 * if we find it, it will take precedence. This is on the Integrator/AP
@@ -390,37 +496,8 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv)
 			versatile_clcd_type = (enum versatile_clcd)clcd_id->data;
 	}
 
-	/* Versatile Express special handling */
-	if (versatile_clcd_type == VEXPRESS_CLCD_V2M) {
-		struct platform_device *pdev;
-
-		/* Registers a driver for the muxfpga */
-		ret = vexpress_muxfpga_init();
-		if (ret) {
-			dev_err(dev, "unable to initialize muxfpga driver\n");
-			of_node_put(np);
-			return ret;
-		}
-
-		/* Call into deep Vexpress configuration API */
-		pdev = of_find_device_by_node(np);
-		if (!pdev) {
-			dev_err(dev, "can't find the sysreg device, deferring\n");
-			of_node_put(np);
-			return -EPROBE_DEFER;
-		}
-		map = dev_get_drvdata(&pdev->dev);
-		if (!map) {
-			dev_err(dev, "sysreg has not yet probed\n");
-			platform_device_put(pdev);
-			of_node_put(np);
-			return -EPROBE_DEFER;
-		}
-	} else {
-		map = syscon_node_to_regmap(np);
-	}
+	map = syscon_node_to_regmap(np);
 	of_node_put(np);
-
 	if (IS_ERR(map)) {
 		dev_err(dev, "no Versatile syscon regmap\n");
 		return PTR_ERR(map);
@@ -466,13 +543,6 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv)
 		priv->variant_display_disable = pl111_realview_clcd_disable;
 		dev_info(dev, "set up callbacks for RealView PL111\n");
 		break;
-	case VEXPRESS_CLCD_V2M:
-		priv->variant = &pl111_vexpress;
-		dev_info(dev, "initializing Versatile Express PL111\n");
-		ret = pl111_vexpress_clcd_init(dev, priv, map);
-		if (ret)
-			return ret;
-		break;
 	default:
 		dev_info(dev, "unknown Versatile system controller\n");
 		break;
diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.c b/drivers/gpu/drm/pl111/pl111_vexpress.c
deleted file mode 100644
index 350570fe06b5..000000000000
--- a/drivers/gpu/drm/pl111/pl111_vexpress.c
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Versatile Express PL111 handling
- * Copyright (C) 2018 Linus Walleij
- *
- * This module binds to the "arm,vexpress-muxfpga" device on the
- * Versatile Express configuration bus and sets up which CLCD instance
- * gets muxed out on the DVI bridge.
- */
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/regmap.h>
-#include <linux/vexpress.h>
-#include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-#include "pl111_drm.h"
-#include "pl111_vexpress.h"
-
-#define VEXPRESS_FPGAMUX_MOTHERBOARD		0x00
-#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_1	0x01
-#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_2	0x02
-
-int pl111_vexpress_clcd_init(struct device *dev,
-			     struct pl111_drm_dev_private *priv,
-			     struct regmap *map)
-{
-	struct device_node *root;
-	struct device_node *child;
-	struct device_node *ct_clcd = NULL;
-	bool has_coretile_clcd = false;
-	bool has_coretile_hdlcd = false;
-	bool mux_motherboard = true;
-	u32 val;
-	int ret;
-
-	/*
-	 * Check if we have a CLCD or HDLCD on the core tile by checking if a
-	 * CLCD or HDLCD is available in the root of the device tree.
-	 */
-	root = of_find_node_by_path("/");
-	if (!root)
-		return -EINVAL;
-
-	for_each_available_child_of_node(root, child) {
-		if (of_device_is_compatible(child, "arm,pl111")) {
-			has_coretile_clcd = true;
-			ct_clcd = child;
-			break;
-		}
-		if (of_device_is_compatible(child, "arm,hdlcd")) {
-			has_coretile_hdlcd = true;
-			of_node_put(child);
-			break;
-		}
-	}
-
-	of_node_put(root);
-
-	/*
-	 * If there is a coretile HDLCD and it has a driver,
-	 * do not mux the CLCD on the motherboard to the DVI.
-	 */
-	if (has_coretile_hdlcd && IS_ENABLED(CONFIG_DRM_HDLCD))
-		mux_motherboard = false;
-
-	/*
-	 * On the Vexpress CA9 we let the CLCD on the coretile
-	 * take precedence, so also in this case do not mux the
-	 * motherboard to the DVI.
-	 */
-	if (has_coretile_clcd)
-		mux_motherboard = false;
-
-	if (mux_motherboard) {
-		dev_info(dev, "DVI muxed to motherboard CLCD\n");
-		val = VEXPRESS_FPGAMUX_MOTHERBOARD;
-	} else if (ct_clcd == dev->of_node) {
-		dev_info(dev,
-			 "DVI muxed to daughterboard 1 (core tile) CLCD\n");
-		val = VEXPRESS_FPGAMUX_DAUGHTERBOARD_1;
-	} else {
-		dev_info(dev, "core tile graphics present\n");
-		dev_info(dev, "this device will be deactivated\n");
-		return -ENODEV;
-	}
-
-	ret = regmap_write(map, 0, val);
-	if (ret) {
-		dev_err(dev, "error setting DVI muxmode\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-/*
- * This sets up the regmap pointer that will then be retrieved by
- * the detection code in pl111_versatile.c and passed in to the
- * pl111_vexpress_clcd_init() function above.
- */
-static int vexpress_muxfpga_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct regmap *map;
-
-	map = devm_regmap_init_vexpress_config(&pdev->dev);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
-	dev_set_drvdata(dev, map);
-
-	return 0;
-}
-
-static const struct of_device_id vexpress_muxfpga_match[] = {
-	{ .compatible = "arm,vexpress-muxfpga", },
-	{}
-};
-
-static struct platform_driver vexpress_muxfpga_driver = {
-	.driver = {
-		.name = "vexpress-muxfpga",
-		.of_match_table = of_match_ptr(vexpress_muxfpga_match),
-	},
-	.probe = vexpress_muxfpga_probe,
-};
-
-int vexpress_muxfpga_init(void)
-{
-	int ret;
-
-	ret = platform_driver_register(&vexpress_muxfpga_driver);
-	/* -EBUSY just means this driver is already registered */
-	if (ret == -EBUSY)
-		ret = 0;
-	return ret;
-}
diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.h b/drivers/gpu/drm/pl111/pl111_vexpress.h
deleted file mode 100644
index 5d3681bb4c00..000000000000
--- a/drivers/gpu/drm/pl111/pl111_vexpress.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-struct device;
-struct pl111_drm_dev_private;
-struct regmap;
-
-#ifdef CONFIG_ARCH_VEXPRESS
-
-int pl111_vexpress_clcd_init(struct device *dev,
-			     struct pl111_drm_dev_private *priv,
-			     struct regmap *map);
-
-int vexpress_muxfpga_init(void);
-
-#else
-
-static inline int pl111_vexpress_clcd_init(struct device *dev,
-					   struct pl111_drm_dev_private *priv,
-					   struct regmap *map)
-{
-	return -ENODEV;
-}
-
-static inline int vexpress_muxfpga_init(void)
-{
-	return 0;
-}
-
-#endif
diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c
index a4f4175bbdbe..524d35b648d8 100644
--- a/drivers/gpu/drm/qxl/qxl_debugfs.c
+++ b/drivers/gpu/drm/qxl/qxl_debugfs.c
@@ -39,7 +39,7 @@ static int
 qxl_debugfs_irq_received(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct qxl_device *qdev = node->minor->dev->dev_private;
+	struct qxl_device *qdev = to_qxl(node->minor->dev);
 
 	seq_printf(m, "%d\n", atomic_read(&qdev->irq_received));
 	seq_printf(m, "%d\n", atomic_read(&qdev->irq_received_display));
@@ -53,7 +53,7 @@ static int
 qxl_debugfs_buffers_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct qxl_device *qdev = node->minor->dev->dev_private;
+	struct qxl_device *qdev = to_qxl(node->minor->dev);
 	struct qxl_bo *bo;
 
 	list_for_each_entry(bo, &qdev->gem.objects, list) {
@@ -79,36 +79,29 @@ static struct drm_info_list qxl_debugfs_list[] = {
 #define QXL_DEBUGFS_ENTRIES ARRAY_SIZE(qxl_debugfs_list)
 #endif
 
-int
+void
 qxl_debugfs_init(struct drm_minor *minor)
 {
 #if defined(CONFIG_DEBUG_FS)
-	int r;
-	struct qxl_device *dev =
-		(struct qxl_device *) minor->dev->dev_private;
+	struct qxl_device *dev = to_qxl(minor->dev);
 
 	drm_debugfs_create_files(qxl_debugfs_list, QXL_DEBUGFS_ENTRIES,
 				 minor->debugfs_root, minor);
 
-	r = qxl_ttm_debugfs_init(dev);
-	if (r) {
-		DRM_ERROR("Failed to init TTM debugfs\n");
-		return r;
-	}
+	qxl_ttm_debugfs_init(dev);
 #endif
-	return 0;
 }
 
-int qxl_debugfs_add_files(struct qxl_device *qdev,
-			  struct drm_info_list *files,
-			  unsigned int nfiles)
+void qxl_debugfs_add_files(struct qxl_device *qdev,
+			   struct drm_info_list *files,
+			   unsigned int nfiles)
 {
 	unsigned int i;
 
 	for (i = 0; i < qdev->debugfs_count; i++) {
 		if (qdev->debugfs[i].files == files) {
 			/* Already registered */
-			return 0;
+			return;
 		}
 	}
 
@@ -116,7 +109,7 @@ int qxl_debugfs_add_files(struct qxl_device *qdev,
 	if (i > QXL_DEBUGFS_MAX_COMPONENTS) {
 		DRM_ERROR("Reached maximum number of debugfs components.\n");
 		DRM_ERROR("Report so we increase QXL_DEBUGFS_MAX_COMPONENTS.\n");
-		return -EINVAL;
+		return;
 	}
 	qdev->debugfs[qdev->debugfs_count].files = files;
 	qdev->debugfs[qdev->debugfs_count].num_files = nfiles;
@@ -126,5 +119,4 @@ int qxl_debugfs_add_files(struct qxl_device *qdev,
 				 qdev->ddev.primary->debugfs_root,
 				 qdev->ddev.primary);
 #endif
-	return 0;
 }
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 91f398d51cfa..9d45d5a4278f 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -221,7 +221,7 @@ static int qxl_add_mode(struct drm_connector *connector,
 			bool preferred)
 {
 	struct drm_device *dev = connector->dev;
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct drm_display_mode *mode = NULL;
 	int rc;
 
@@ -242,7 +242,7 @@ static int qxl_add_mode(struct drm_connector *connector,
 static int qxl_add_monitors_config_modes(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct qxl_output *output = drm_connector_to_qxl_output(connector);
 	int h = output->index;
 	struct qxl_head *head;
@@ -310,7 +310,7 @@ static void qxl_crtc_update_monitors_config(struct drm_crtc *crtc,
 					    const char *reason)
 {
 	struct drm_device *dev = crtc->dev;
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
 	struct qxl_head head;
 	int oldcount, i = qcrtc->index;
@@ -400,7 +400,7 @@ static int qxl_framebuffer_surface_dirty(struct drm_framebuffer *fb,
 					 unsigned int num_clips)
 {
 	/* TODO: vmwgfx where this was cribbed from had locking. Why? */
-	struct qxl_device *qdev = fb->dev->dev_private;
+	struct qxl_device *qdev = to_qxl(fb->dev);
 	struct drm_clip_rect norect;
 	struct qxl_bo *qobj;
 	bool is_primary;
@@ -462,7 +462,7 @@ static const struct drm_crtc_helper_funcs qxl_crtc_helper_funcs = {
 static int qxl_primary_atomic_check(struct drm_plane *plane,
 				    struct drm_plane_state *state)
 {
-	struct qxl_device *qdev = plane->dev->dev_private;
+	struct qxl_device *qdev = to_qxl(plane->dev);
 	struct qxl_bo *bo;
 
 	if (!state->crtc || !state->fb)
@@ -476,7 +476,7 @@ static int qxl_primary_atomic_check(struct drm_plane *plane,
 static int qxl_primary_apply_cursor(struct drm_plane *plane)
 {
 	struct drm_device *dev = plane->dev;
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct drm_framebuffer *fb = plane->state->fb;
 	struct qxl_crtc *qcrtc = to_qxl_crtc(plane->state->crtc);
 	struct qxl_cursor_cmd *cmd;
@@ -523,7 +523,7 @@ out_free_release:
 static void qxl_primary_atomic_update(struct drm_plane *plane,
 				      struct drm_plane_state *old_state)
 {
-	struct qxl_device *qdev = plane->dev->dev_private;
+	struct qxl_device *qdev = to_qxl(plane->dev);
 	struct qxl_bo *bo = gem_to_qxl_bo(plane->state->fb->obj[0]);
 	struct qxl_bo *primary;
 	struct drm_clip_rect norect = {
@@ -554,7 +554,7 @@ static void qxl_primary_atomic_update(struct drm_plane *plane,
 static void qxl_primary_atomic_disable(struct drm_plane *plane,
 				       struct drm_plane_state *old_state)
 {
-	struct qxl_device *qdev = plane->dev->dev_private;
+	struct qxl_device *qdev = to_qxl(plane->dev);
 
 	if (old_state->fb) {
 		struct qxl_bo *bo = gem_to_qxl_bo(old_state->fb->obj[0]);
@@ -570,7 +570,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 				     struct drm_plane_state *old_state)
 {
 	struct drm_device *dev = plane->dev;
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct drm_framebuffer *fb = plane->state->fb;
 	struct qxl_crtc *qcrtc = to_qxl_crtc(plane->state->crtc);
 	struct qxl_release *release;
@@ -679,7 +679,7 @@ out_free_release:
 static void qxl_cursor_atomic_disable(struct drm_plane *plane,
 				      struct drm_plane_state *old_state)
 {
-	struct qxl_device *qdev = plane->dev->dev_private;
+	struct qxl_device *qdev = to_qxl(plane->dev);
 	struct qxl_release *release;
 	struct qxl_cursor_cmd *cmd;
 	int ret;
@@ -762,7 +762,7 @@ static void qxl_calc_dumb_shadow(struct qxl_device *qdev,
 static int qxl_plane_prepare_fb(struct drm_plane *plane,
 				struct drm_plane_state *new_state)
 {
-	struct qxl_device *qdev = plane->dev->dev_private;
+	struct qxl_device *qdev = to_qxl(plane->dev);
 	struct drm_gem_object *obj;
 	struct qxl_bo *user_bo;
 	struct qxl_surface surf;
@@ -923,7 +923,7 @@ static int qdev_crtc_init(struct drm_device *dev, int crtc_id)
 {
 	struct qxl_crtc *qxl_crtc;
 	struct drm_plane *primary, *cursor;
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	int r;
 
 	qxl_crtc = kzalloc(sizeof(struct qxl_crtc), GFP_KERNEL);
@@ -965,7 +965,7 @@ free_mem:
 static int qxl_conn_get_modes(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct qxl_output *output = drm_connector_to_qxl_output(connector);
 	unsigned int pwidth = 1024;
 	unsigned int pheight = 768;
@@ -991,7 +991,7 @@ static enum drm_mode_status qxl_conn_mode_valid(struct drm_connector *connector,
 			       struct drm_display_mode *mode)
 {
 	struct drm_device *ddev = connector->dev;
-	struct qxl_device *qdev = ddev->dev_private;
+	struct qxl_device *qdev = to_qxl(ddev);
 
 	if (qxl_check_mode(qdev, mode->hdisplay, mode->vdisplay) != 0)
 		return MODE_BAD;
@@ -1021,7 +1021,7 @@ static enum drm_connector_status qxl_conn_detect(
 	struct qxl_output *output =
 		drm_connector_to_qxl_output(connector);
 	struct drm_device *ddev = connector->dev;
-	struct qxl_device *qdev = ddev->dev_private;
+	struct qxl_device *qdev = to_qxl(ddev);
 	bool connected = false;
 
 	/* The first monitor is always connected */
@@ -1071,7 +1071,7 @@ static int qxl_mode_create_hotplug_mode_update_property(struct qxl_device *qdev)
 
 static int qdev_output_init(struct drm_device *dev, int num_output)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct qxl_output *qxl_output;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 4fda3f9b29f4..13872b882775 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -81,13 +81,16 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -EINVAL; /* TODO: ENODEV ? */
 	}
 
-	qdev = kzalloc(sizeof(struct qxl_device), GFP_KERNEL);
-	if (!qdev)
+	qdev = devm_drm_dev_alloc(&pdev->dev, &qxl_driver,
+				  struct qxl_device, ddev);
+	if (IS_ERR(qdev)) {
+		pr_err("Unable to init drm dev");
 		return -ENOMEM;
+	}
 
 	ret = pci_enable_device(pdev);
 	if (ret)
-		goto free_dev;
+		return ret;
 
 	ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "qxl");
 	if (ret)
@@ -101,7 +104,7 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		}
 	}
 
-	ret = qxl_device_init(qdev, &qxl_driver, pdev);
+	ret = qxl_device_init(qdev, pdev);
 	if (ret)
 		goto put_vga;
 
@@ -128,14 +131,13 @@ put_vga:
 		vga_put(pdev, VGA_RSRC_LEGACY_IO);
 disable_pci:
 	pci_disable_device(pdev);
-free_dev:
-	kfree(qdev);
+
 	return ret;
 }
 
 static void qxl_drm_release(struct drm_device *dev)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 
 	/*
 	 * TODO: qxl_device_fini() call should be in qxl_pci_remove(),
@@ -144,8 +146,6 @@ static void qxl_drm_release(struct drm_device *dev)
 	 */
 	qxl_modeset_fini(qdev);
 	qxl_device_fini(qdev);
-	dev->dev_private = NULL;
-	kfree(qdev);
 }
 
 static void
@@ -157,7 +157,6 @@ qxl_pci_remove(struct pci_dev *pdev)
 	drm_atomic_helper_shutdown(dev);
 	if (is_vga(pdev))
 		vga_put(pdev, VGA_RSRC_LEGACY_IO);
-	drm_dev_put(dev);
 }
 
 DEFINE_DRM_GEM_FOPS(qxl_fops);
@@ -165,7 +164,7 @@ DEFINE_DRM_GEM_FOPS(qxl_fops);
 static int qxl_drm_freeze(struct drm_device *dev)
 {
 	struct pci_dev *pdev = dev->pdev;
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	int ret;
 
 	ret = drm_mode_config_helper_suspend(dev);
@@ -187,7 +186,7 @@ static int qxl_drm_freeze(struct drm_device *dev)
 
 static int qxl_drm_resume(struct drm_device *dev, bool thaw)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 
 	qdev->ram_header->int_mask = QXL_INTERRUPT_MASK;
 	if (!thaw) {
@@ -246,7 +245,7 @@ static int qxl_pm_restore(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
-	struct qxl_device *qdev = drm_dev->dev_private;
+	struct qxl_device *qdev = to_qxl(drm_dev);
 
 	qxl_io_reset(qdev);
 	return qxl_drm_resume(drm_dev, false);
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 27e45a2d6b52..31e35f787df2 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -190,13 +190,8 @@ struct qxl_debugfs {
 	unsigned int num_files;
 };
 
-int qxl_debugfs_add_files(struct qxl_device *rdev,
-			     struct drm_info_list *files,
-			     unsigned int nfiles);
 int qxl_debugfs_fence_init(struct qxl_device *rdev);
 
-struct qxl_device;
-
 struct qxl_device {
 	struct drm_device ddev;
 
@@ -276,11 +271,12 @@ struct qxl_device {
 	int monitors_config_height;
 };
 
+#define to_qxl(dev) container_of(dev, struct qxl_device, ddev)
+
 extern const struct drm_ioctl_desc qxl_ioctls[];
 extern int qxl_max_ioctl;
 
-int qxl_device_init(struct qxl_device *qdev, struct drm_driver *drv,
-		    struct pci_dev *pdev);
+int qxl_device_init(struct qxl_device *qdev, struct pci_dev *pdev);
 void qxl_device_fini(struct qxl_device *qdev);
 
 int qxl_modeset_init(struct qxl_device *qdev);
@@ -442,8 +438,8 @@ int qxl_garbage_collect(struct qxl_device *qdev);
 
 /* debugfs */
 
-int qxl_debugfs_init(struct drm_minor *minor);
-int qxl_ttm_debugfs_init(struct qxl_device *qdev);
+void qxl_debugfs_init(struct drm_minor *minor);
+void qxl_ttm_debugfs_init(struct qxl_device *qdev);
 
 /* qxl_prime.c */
 int qxl_gem_prime_pin(struct drm_gem_object *obj);
@@ -461,9 +457,9 @@ int qxl_gem_prime_mmap(struct drm_gem_object *obj,
 int qxl_irq_init(struct qxl_device *qdev);
 irqreturn_t qxl_irq_handler(int irq, void *arg);
 
-int qxl_debugfs_add_files(struct qxl_device *qdev,
-			  struct drm_info_list *files,
-			  unsigned int nfiles);
+void qxl_debugfs_add_files(struct qxl_device *qdev,
+			   struct drm_info_list *files,
+			   unsigned int nfiles);
 
 int qxl_surface_id_alloc(struct qxl_device *qdev,
 			 struct qxl_bo *surf);
diff --git a/drivers/gpu/drm/qxl/qxl_dumb.c b/drivers/gpu/drm/qxl/qxl_dumb.c
index 272d19b677d8..24e903383aa1 100644
--- a/drivers/gpu/drm/qxl/qxl_dumb.c
+++ b/drivers/gpu/drm/qxl/qxl_dumb.c
@@ -32,7 +32,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
 			    struct drm_device *dev,
 			    struct drm_mode_create_dumb *args)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct qxl_bo *qobj;
 	uint32_t handle;
 	int r;
diff --git a/drivers/gpu/drm/qxl/qxl_gem.c b/drivers/gpu/drm/qxl/qxl_gem.c
index 69f37db1027a..5ff6fa9b799c 100644
--- a/drivers/gpu/drm/qxl/qxl_gem.c
+++ b/drivers/gpu/drm/qxl/qxl_gem.c
@@ -34,7 +34,7 @@ void qxl_gem_object_free(struct drm_gem_object *gobj)
 	struct qxl_device *qdev;
 	struct ttm_buffer_object *tbo;
 
-	qdev = (struct qxl_device *)gobj->dev->dev_private;
+	qdev = to_qxl(gobj->dev);
 
 	qxl_surface_evict(qdev, qobj, false);
 
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 72f3f1bbb40c..13bd1d11c703 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -36,7 +36,7 @@
 static int qxl_alloc_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct drm_qxl_alloc *qxl_alloc = data;
 	int ret;
 	struct qxl_bo *qobj;
@@ -64,7 +64,7 @@ static int qxl_alloc_ioctl(struct drm_device *dev, void *data,
 static int qxl_map_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct drm_qxl_map *qxl_map = data;
 
 	return qxl_mode_dumb_mmap(file_priv, &qdev->ddev, qxl_map->handle,
@@ -276,7 +276,7 @@ out_free_reloc:
 static int qxl_execbuffer_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct drm_qxl_execbuffer *execbuffer = data;
 	struct drm_qxl_command user_cmd;
 	int cmd_num;
@@ -301,7 +301,7 @@ static int qxl_execbuffer_ioctl(struct drm_device *dev, void *data,
 static int qxl_update_area_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct drm_qxl_update_area *update_area = data;
 	struct qxl_rect area = {.left = update_area->left,
 				.top = update_area->top,
@@ -351,7 +351,7 @@ out:
 static int qxl_getparam_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct drm_qxl_getparam *param = data;
 
 	switch (param->param) {
@@ -370,7 +370,7 @@ static int qxl_getparam_ioctl(struct drm_device *dev, void *data,
 static int qxl_clientcap_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct drm_qxl_clientcap *param = data;
 	int byte, idx;
 
@@ -391,7 +391,7 @@ static int qxl_clientcap_ioctl(struct drm_device *dev, void *data,
 static int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file)
 {
-	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	struct drm_qxl_alloc_surf *param = data;
 	struct qxl_bo *qobj;
 	int handle;
diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c
index 8435af108632..1ba5a702d763 100644
--- a/drivers/gpu/drm/qxl/qxl_irq.c
+++ b/drivers/gpu/drm/qxl/qxl_irq.c
@@ -32,7 +32,7 @@
 irqreturn_t qxl_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
-	struct qxl_device *qdev = (struct qxl_device *)dev->dev_private;
+	struct qxl_device *qdev = to_qxl(dev);
 	uint32_t pending;
 
 	pending = xchg(&qdev->ram_header->int_pending, 0);
diff --git a/drivers/gpu/drm/qxl/qxl_kms.c b/drivers/gpu/drm/qxl/qxl_kms.c
index 70b20ee4741a..a6d873052cd4 100644
--- a/drivers/gpu/drm/qxl/qxl_kms.c
+++ b/drivers/gpu/drm/qxl/qxl_kms.c
@@ -27,6 +27,7 @@
 #include <linux/pci.h>
 
 #include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_probe_helper.h>
 
 #include "qxl_drv.h"
@@ -107,20 +108,12 @@ static void qxl_gc_work(struct work_struct *work)
 }
 
 int qxl_device_init(struct qxl_device *qdev,
-		    struct drm_driver *drv,
 		    struct pci_dev *pdev)
 {
 	int r, sb;
 
-	r = drm_dev_init(&qdev->ddev, drv, &pdev->dev);
-	if (r) {
-		pr_err("Unable to init drm dev");
-		goto error;
-	}
-
 	qdev->ddev.pdev = pdev;
 	pci_set_drvdata(pdev, &qdev->ddev);
-	qdev->ddev.dev_private = qdev;
 
 	mutex_init(&qdev->gem.mutex);
 	mutex_init(&qdev->update_area_mutex);
@@ -136,8 +129,7 @@ int qxl_device_init(struct qxl_device *qdev,
 	qdev->vram_mapping = io_mapping_create_wc(qdev->vram_base, pci_resource_len(pdev, 0));
 	if (!qdev->vram_mapping) {
 		pr_err("Unable to create vram_mapping");
-		r = -ENOMEM;
-		goto error;
+		return -ENOMEM;
 	}
 
 	if (pci_resource_len(pdev, 4) > 0) {
@@ -218,7 +210,7 @@ int qxl_device_init(struct qxl_device *qdev,
 				&(qdev->ram_header->cursor_ring_hdr),
 				sizeof(struct qxl_command),
 				QXL_CURSOR_RING_SIZE,
-				qdev->io_base + QXL_IO_NOTIFY_CMD,
+				qdev->io_base + QXL_IO_NOTIFY_CURSOR,
 				false,
 				&qdev->cursor_event);
 
@@ -291,7 +283,6 @@ surface_mapping_free:
 	io_mapping_free(qdev->surface_mapping);
 vram_mapping_free:
 	io_mapping_free(qdev->vram_mapping);
-error:
 	return r;
 }
 
diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c
index ab72dc3476e9..edc8a9916872 100644
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@@ -33,7 +33,7 @@ static void qxl_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	struct qxl_device *qdev;
 
 	bo = to_qxl_bo(tbo);
-	qdev = (struct qxl_device *)bo->tbo.base.dev->dev_private;
+	qdev = to_qxl(bo->tbo.base.dev);
 
 	qxl_surface_evict(qdev, bo, false);
 	WARN_ON_ONCE(bo->map_count > 0);
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 2feca734c7b1..4fae3e393da1 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -243,7 +243,7 @@ static int qxl_release_validate_bo(struct qxl_bo *bo)
 		return ret;
 
 	/* allocate a surface for reserved + validated buffers */
-	ret = qxl_bo_check_id(bo->tbo.base.dev->dev_private, bo);
+	ret = qxl_bo_check_id(to_qxl(bo->tbo.base.dev), bo);
 	if (ret)
 		return ret;
 	return 0;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 62a5e424971b..f09a712b1ed2 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -243,7 +243,7 @@ static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
 	if (!qxl_ttm_bo_is_qxl_bo(bo))
 		return;
 	qbo = to_qxl_bo(bo);
-	qdev = qbo->tbo.base.dev->dev_private;
+	qdev = to_qxl(qbo->tbo.base.dev);
 
 	if (bo->mem.mem_type == TTM_PL_PRIV && qbo->surface_id)
 		qxl_surface_evict(qdev, qbo, new_mem ? true : false);
@@ -322,7 +322,7 @@ static int qxl_mm_dump_table(struct seq_file *m, void *data)
 }
 #endif
 
-int qxl_ttm_debugfs_init(struct qxl_device *qdev)
+void qxl_ttm_debugfs_init(struct qxl_device *qdev)
 {
 #if defined(CONFIG_DEBUG_FS)
 	static struct drm_info_list qxl_mem_types_list[QXL_DEBUGFS_MEM_TYPES];
@@ -343,8 +343,6 @@ int qxl_ttm_debugfs_init(struct qxl_device *qdev)
 			qxl_mem_types_list[i].data = qdev->mman.bdev.man[TTM_PL_PRIV].priv;
 
 	}
-	return qxl_debugfs_add_files(qdev, qxl_mem_types_list, i);
-#else
-	return 0;
+	qxl_debugfs_add_files(qdev, qxl_mem_types_list, i);
 #endif
 }
diff --git a/drivers/gpu/drm/r128/ati_pcigart.c b/drivers/gpu/drm/r128/ati_pcigart.c
index 9b4072f97215..3e76ae5a17ee 100644
--- a/drivers/gpu/drm/r128/ati_pcigart.c
+++ b/drivers/gpu/drm/r128/ati_pcigart.c
@@ -32,9 +32,10 @@
  */
 
 #include <linux/export.h>
+#include <linux/pci.h>
 
 #include <drm/drm_device.h>
-#include <drm/drm_pci.h>
+#include <drm/drm_legacy.h>
 #include <drm/drm_print.h>
 
 #include "ati_pcigart.h"
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index c693b2ca0329..11c97edde54d 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -3,42 +3,13 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Idrivers/gpu/drm/amd/include
-
 hostprogs := mkregtable
-clean-files := rn50_reg_safe.h r100_reg_safe.h r200_reg_safe.h rv515_reg_safe.h r300_reg_safe.h r420_reg_safe.h rs600_reg_safe.h r600_reg_safe.h evergreen_reg_safe.h cayman_reg_safe.h
+targets := rn50_reg_safe.h r100_reg_safe.h r200_reg_safe.h rv515_reg_safe.h r300_reg_safe.h r420_reg_safe.h rs600_reg_safe.h r600_reg_safe.h evergreen_reg_safe.h cayman_reg_safe.h
 
-quiet_cmd_mkregtable = MKREGTABLE $@
+quiet_cmd_mkregtable = MKREG   $@
       cmd_mkregtable = $(obj)/mkregtable $< > $@
 
-$(obj)/rn50_reg_safe.h: $(src)/reg_srcs/rn50 $(obj)/mkregtable
-	$(call if_changed,mkregtable)
-
-$(obj)/r100_reg_safe.h: $(src)/reg_srcs/r100 $(obj)/mkregtable
-	$(call if_changed,mkregtable)
-
-$(obj)/r200_reg_safe.h: $(src)/reg_srcs/r200 $(obj)/mkregtable
-	$(call if_changed,mkregtable)
-
-$(obj)/rv515_reg_safe.h: $(src)/reg_srcs/rv515 $(obj)/mkregtable
-	$(call if_changed,mkregtable)
-
-$(obj)/r300_reg_safe.h: $(src)/reg_srcs/r300 $(obj)/mkregtable
-	$(call if_changed,mkregtable)
-
-$(obj)/r420_reg_safe.h: $(src)/reg_srcs/r420 $(obj)/mkregtable
-	$(call if_changed,mkregtable)
-
-$(obj)/rs600_reg_safe.h: $(src)/reg_srcs/rs600 $(obj)/mkregtable
-	$(call if_changed,mkregtable)
-
-$(obj)/r600_reg_safe.h: $(src)/reg_srcs/r600 $(obj)/mkregtable
-	$(call if_changed,mkregtable)
-
-$(obj)/evergreen_reg_safe.h: $(src)/reg_srcs/evergreen $(obj)/mkregtable
-	$(call if_changed,mkregtable)
-
-$(obj)/cayman_reg_safe.h: $(src)/reg_srcs/cayman $(obj)/mkregtable
+$(obj)/%_reg_safe.h: $(src)/reg_srcs/% $(obj)/mkregtable FORCE
 	$(call if_changed,mkregtable)
 
 $(obj)/r100.o: $(obj)/r100_reg_safe.h $(obj)/rn50_reg_safe.h
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index 2c27627b6659..f15b20da5315 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -1211,8 +1211,7 @@ static int atom_execute_table_locked(struct atom_context *ctx, int index, uint32
 	SDEBUG("<<\n");
 
 free:
-	if (ws)
-		kfree(ectx.ws);
+	kfree(ectx.ws);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index a9257bed3484..134aa2b01f90 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -65,13 +65,6 @@ static const struct ci_pt_defaults defaults_bonaire_xt =
 	{ 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 }
 };
 
-static const struct ci_pt_defaults defaults_bonaire_pro =
-{
-	1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x65062,
-	{ 0x8C,  0x23F, 0x244, 0xA6,  0x83,  0x85,  0x86,  0x86,  0x83,  0xDB,  0xDB,  0xDA,  0x67,  0x60,  0x5F  },
-	{ 0x187, 0x193, 0x193, 0x1C7, 0x1D1, 0x1D1, 0x210, 0x219, 0x219, 0x266, 0x26C, 0x26C, 0x2C9, 0x2CB, 0x2CB }
-};
-
 static const struct ci_pt_defaults defaults_saturn_xt =
 {
 	1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x70000,
@@ -79,13 +72,6 @@ static const struct ci_pt_defaults defaults_saturn_xt =
 	{ 0x187, 0x187, 0x187, 0x1C7, 0x1C7, 0x1C7, 0x210, 0x210, 0x210, 0x266, 0x266, 0x266, 0x2C9, 0x2C9, 0x2C9 }
 };
 
-static const struct ci_pt_defaults defaults_saturn_pro =
-{
-	1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x30000,
-	{ 0x96,  0x21D, 0x23B, 0xA1,  0x85,  0x87,  0x83,  0x84,  0x81,  0xE6,  0xE6,  0xE6,  0x71,  0x6A,  0x6A  },
-	{ 0x193, 0x19E, 0x19E, 0x1D2, 0x1DC, 0x1DC, 0x21A, 0x223, 0x223, 0x26E, 0x27E, 0x274, 0x2CF, 0x2D2, 0x2D2 }
-};
-
 static const struct ci_pt_config_reg didt_config_ci[] =
 {
 	{ 0x10, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 848ef68d9086..5d2591725189 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2111,7 +2111,7 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
 								    ucOverdriveThermalController];
 			info.addr = power_info->info.ucOverdriveControllerAddress >> 1;
 			strlcpy(info.type, name, sizeof(info.type));
-			i2c_new_device(&rdev->pm.i2c_bus->adapter, &info);
+			i2c_new_client_device(&rdev->pm.i2c_bus->adapter, &info);
 		}
 	}
 	num_modes = power_info->info.ucNumOfPowerModeEntries;
@@ -2351,7 +2351,7 @@ static void radeon_atombios_add_pplib_thermal_controller(struct radeon_device *r
 				const char *name = pp_lib_thermal_controller_names[controller->ucType];
 				info.addr = controller->ucI2cAddress >> 1;
 				strlcpy(info.type, name, sizeof(info.type));
-				i2c_new_device(&rdev->pm.i2c_bus->adapter, &info);
+				i2c_new_client_device(&rdev->pm.i2c_bus->adapter, &info);
 			}
 		} else {
 			DRM_INFO("Unknown thermal controller type %d at 0x%02x %s fan control\n",
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index c3e49c973812..d3c04df7e75d 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -2704,7 +2704,7 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev)
 				const char *name = thermal_controller_names[thermal_controller];
 				info.addr = i2c_addr >> 1;
 				strlcpy(info.type, name, sizeof(info.type));
-				i2c_new_device(&rdev->pm.i2c_bus->adapter, &info);
+				i2c_new_client_device(&rdev->pm.i2c_bus->adapter, &info);
 			}
 		}
 	} else {
@@ -2721,7 +2721,7 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev)
 				const char *name = "f75375";
 				info.addr = 0x28;
 				strlcpy(info.type, name, sizeof(info.type));
-				i2c_new_device(&rdev->pm.i2c_bus->adapter, &info);
+				i2c_new_client_device(&rdev->pm.i2c_bus->adapter, &info);
 				DRM_INFO("Possible %s thermal controller at 0x%02x\n",
 					 name, info.addr);
 			}
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 59f8186a2415..bbb0883e8ce6 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -36,6 +36,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
 #include <linux/mmu_notifier.h>
+#include <linux/pci.h>
 
 #include <drm/drm_agpsupport.h>
 #include <drm/drm_crtc_helper.h>
@@ -44,7 +45,6 @@
 #include <drm/drm_file.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_ioctl.h>
-#include <drm/drm_pci.h>
 #include <drm/drm_pciids.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 58176db85952..c5d1dc9618a4 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -158,7 +158,7 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	}
 
 	if (radeon_is_px(dev)) {
-		dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP);
+		dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
 		pm_runtime_use_autosuspend(dev->dev);
 		pm_runtime_set_autosuspend_delay(dev->dev, 5000);
 		pm_runtime_set_active(dev->dev);
@@ -828,7 +828,7 @@ int radeon_enable_vblank_kms(struct drm_crtc *crtc)
 	unsigned long irqflags;
 	int r;
 
-	if (pipe < 0 || pipe >= rdev->num_crtc) {
+	if (pipe >= rdev->num_crtc) {
 		DRM_ERROR("Invalid crtc %d\n", pipe);
 		return -EINVAL;
 	}
@@ -854,7 +854,7 @@ void radeon_disable_vblank_kms(struct drm_crtc *crtc)
 	struct radeon_device *rdev = dev->dev_private;
 	unsigned long irqflags;
 
-	if (pipe < 0 || pipe >= rdev->num_crtc) {
+	if (pipe >= rdev->num_crtc) {
 		DRM_ERROR("Invalid crtc %d\n", pipe);
 		return;
 	}
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 2cb85dbe728f..a167e1c36d24 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -252,24 +252,6 @@ static const struct si_dte_data dte_data_tahiti =
 	false
 };
 
-static const struct si_dte_data dte_data_tahiti_le =
-{
-	{ 0x1E8480, 0x7A1200, 0x2160EC0, 0x3938700, 0 },
-	{ 0x7D, 0x7D, 0x4E4, 0xB00, 0 },
-	0x5,
-	0xAFC8,
-	0x64,
-	0x32,
-	1,
-	0,
-	0x10,
-	{ 0x78, 0x7C, 0x82, 0x88, 0x8E, 0x94, 0x9A, 0xA0, 0xA6, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC, 0xC0, 0xC4 },
-	{ 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700, 0x3938700 },
-	{ 0x2AF8, 0x2AF8, 0x29BB, 0x27F9, 0x2637, 0x2475, 0x22B3, 0x20F1, 0x1F2F, 0x1D6D, 0x1734, 0x1414, 0x10F4, 0xDD4, 0xAB4, 0x794 },
-	85,
-	true
-};
-
 static const struct si_dte_data dte_data_tahiti_pro =
 {
 	{ 0x1E8480, 0x3D0900, 0x989680, 0x2625A00, 0x0 },
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index 654e2dd08146..3e67cf70f040 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -530,7 +530,6 @@ static int rcar_du_remove(struct platform_device *pdev)
 	drm_dev_unregister(ddev);
 
 	drm_kms_helper_poll_fini(ddev);
-	drm_mode_config_cleanup(ddev);
 
 	drm_dev_put(ddev);
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
index c07c6a88aff0..b0335da0c161 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
@@ -13,6 +13,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "rcar_du_drv.h"
 #include "rcar_du_encoder.h"
@@ -23,13 +24,6 @@
  * Encoder
  */
 
-static const struct drm_encoder_helper_funcs encoder_helper_funcs = {
-};
-
-static const struct drm_encoder_funcs encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static unsigned int rcar_du_encoder_count_ports(struct device_node *node)
 {
 	struct device_node *ports;
@@ -110,13 +104,11 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu,
 		}
 	}
 
-	ret = drm_encoder_init(rcdu->ddev, encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_NONE, NULL);
+	ret = drm_simple_encoder_init(rcdu->ddev, encoder,
+				      DRM_MODE_ENCODER_NONE);
 	if (ret < 0)
 		goto done;
 
-	drm_encoder_helper_add(encoder, &encoder_helper_funcs);
-
 	/*
 	 * Attach the bridge to the encoder. The bridge will create the
 	 * connector.
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index fcfd916227d1..482329102f19 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -712,7 +712,9 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
 	unsigned int i;
 	int ret;
 
-	drm_mode_config_init(dev);
+	ret = drmm_mode_config_init(dev);
+	if (ret)
+		return ret;
 
 	dev->mode_config.min_width = 0;
 	dev->mode_config.min_height = 0;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
index c6430027169f..a0021fc25b27 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
@@ -785,13 +785,15 @@ int rcar_du_planes_init(struct rcar_du_group *rgrp)
 
 		drm_plane_create_alpha_property(&plane->plane);
 
-		if (type == DRM_PLANE_TYPE_PRIMARY)
-			continue;
-
-		drm_object_attach_property(&plane->plane.base,
-					   rcdu->props.colorkey,
-					   RCAR_DU_COLORKEY_NONE);
-		drm_plane_create_zpos_property(&plane->plane, 1, 1, 7);
+		if (type == DRM_PLANE_TYPE_PRIMARY) {
+			drm_plane_create_zpos_immutable_property(&plane->plane,
+								 0);
+		} else {
+			drm_object_attach_property(&plane->plane.base,
+						   rcdu->props.colorkey,
+						   RCAR_DU_COLORKEY_NONE);
+			drm_plane_create_zpos_property(&plane->plane, 1, 1, 7);
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index 5e4faf258c31..f1a81c9b184d 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -392,12 +392,14 @@ int rcar_du_vsp_init(struct rcar_du_vsp *vsp, struct device_node *np,
 		drm_plane_helper_add(&plane->plane,
 				     &rcar_du_vsp_plane_helper_funcs);
 
-		if (type == DRM_PLANE_TYPE_PRIMARY)
-			continue;
-
-		drm_plane_create_alpha_property(&plane->plane);
-		drm_plane_create_zpos_property(&plane->plane, 1, 1,
-					       vsp->num_planes - 1);
+		if (type == DRM_PLANE_TYPE_PRIMARY) {
+			drm_plane_create_zpos_immutable_property(&plane->plane,
+								 0);
+		} else {
+			drm_plane_create_alpha_property(&plane->plane);
+			drm_plane_create_zpos_property(&plane->plane, 1, 1,
+						       vsp->num_planes - 1);
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
index ce98c08aa8b4..ade2327a10e2 100644
--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
@@ -26,6 +26,7 @@
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_vop.h"
@@ -258,10 +259,6 @@ static struct drm_encoder_helper_funcs rockchip_dp_encoder_helper_funcs = {
 	.atomic_check = rockchip_dp_drm_encoder_atomic_check,
 };
 
-static struct drm_encoder_funcs rockchip_dp_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int rockchip_dp_of_probe(struct rockchip_dp_device *dp)
 {
 	struct device *dev = dp->dev;
@@ -309,8 +306,8 @@ static int rockchip_dp_drm_create_encoder(struct rockchip_dp_device *dp)
 							     dev->of_node);
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
-	ret = drm_encoder_init(drm_dev, encoder, &rockchip_dp_encoder_funcs,
-			       DRM_MODE_ENCODER_TMDS, NULL);
+	ret = drm_simple_encoder_init(drm_dev, encoder,
+				      DRM_MODE_ENCODER_TMDS);
 	if (ret) {
 		DRM_ERROR("failed to initialize encoder with drm\n");
 		return ret;
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index eed594bd38d3..c634b95b50f7 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -20,6 +20,7 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "cdn-dp-core.h"
 #include "cdn-dp-reg.h"
@@ -689,10 +690,6 @@ static const struct drm_encoder_helper_funcs cdn_dp_encoder_helper_funcs = {
 	.atomic_check = cdn_dp_encoder_atomic_check,
 };
 
-static const struct drm_encoder_funcs cdn_dp_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int cdn_dp_parse_dt(struct cdn_dp_device *dp)
 {
 	struct device *dev = dp->dev;
@@ -1030,8 +1027,8 @@ static int cdn_dp_bind(struct device *dev, struct device *master, void *data)
 							     dev->of_node);
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
-	ret = drm_encoder_init(drm_dev, encoder, &cdn_dp_encoder_funcs,
-			       DRM_MODE_ENCODER_TMDS, NULL);
+	ret = drm_simple_encoder_init(drm_dev, encoder,
+				      DRM_MODE_ENCODER_TMDS);
 	if (ret) {
 		DRM_ERROR("failed to initialize encoder with drm\n");
 		return ret;
@@ -1109,7 +1106,7 @@ static const struct component_ops cdn_dp_component_ops = {
 	.unbind = cdn_dp_unbind,
 };
 
-int cdn_dp_suspend(struct device *dev)
+static int cdn_dp_suspend(struct device *dev)
 {
 	struct cdn_dp_device *dp = dev_get_drvdata(dev);
 	int ret = 0;
@@ -1123,7 +1120,7 @@ int cdn_dp_suspend(struct device *dev)
 	return ret;
 }
 
-int cdn_dp_resume(struct device *dev)
+static int cdn_dp_resume(struct device *dev)
 {
 	struct cdn_dp_device *dp = dev_get_drvdata(dev);
 
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-reg.c b/drivers/gpu/drm/rockchip/cdn-dp-reg.c
index 7361c07cb4a7..9d2163ef4d6e 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-reg.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-reg.c
@@ -601,7 +601,7 @@ static int cdn_dp_get_msa_misc(struct video_info *video,
 	case YCBCR_4_2_0:
 		val[0] = 5;
 		break;
-	};
+	}
 
 	switch (video->color_depth) {
 	case 6:
@@ -619,7 +619,7 @@ static int cdn_dp_get_msa_misc(struct video_info *video,
 	case 16:
 		val[1] = 4;
 		break;
-	};
+	}
 
 	msa_misc = 2 * val[0] + 32 * val[1] +
 		   ((video->color_fmt == Y_ONLY) ? (1 << 14) : 0);
@@ -700,7 +700,7 @@ int cdn_dp_config_video(struct cdn_dp_device *dp)
 	case 16:
 		val = BCS_16;
 		break;
-	};
+	}
 
 	val += video->color_fmt << 8;
 	ret = cdn_dp_reg_write(dp, DP_FRAMER_PXL_REPR, val);
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
index 6e1270e45f97..3feff0c45b3f 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -21,6 +21,7 @@
 #include <drm/bridge/dw_mipi_dsi.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_of.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_vop.h"
@@ -789,10 +790,6 @@ dw_mipi_dsi_encoder_helper_funcs = {
 	.disable = dw_mipi_dsi_encoder_disable,
 };
 
-static const struct drm_encoder_funcs dw_mipi_dsi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int rockchip_dsi_drm_create_encoder(struct dw_mipi_dsi_rockchip *dsi,
 					   struct drm_device *drm_dev)
 {
@@ -802,8 +799,7 @@ static int rockchip_dsi_drm_create_encoder(struct dw_mipi_dsi_rockchip *dsi,
 	encoder->possible_crtcs = drm_of_find_possible_crtcs(drm_dev,
 							     dsi->dev->of_node);
 
-	ret = drm_encoder_init(drm_dev, encoder, &dw_mipi_dsi_encoder_funcs,
-			       DRM_MODE_ENCODER_DSI, NULL);
+	ret = drm_simple_encoder_init(drm_dev, encoder, DRM_MODE_ENCODER_DSI);
 	if (ret) {
 		DRM_ERROR("Failed to initialize encoder with drm\n");
 		return ret;
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 7f56d8c3491d..121aa8a63a76 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -14,6 +14,7 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_vop.h"
@@ -237,10 +238,6 @@ dw_hdmi_rockchip_mode_valid(struct drm_connector *connector,
 	return (valid) ? MODE_OK : MODE_BAD;
 }
 
-static const struct drm_encoder_funcs dw_hdmi_rockchip_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static void dw_hdmi_rockchip_encoder_disable(struct drm_encoder *encoder)
 {
 }
@@ -546,8 +543,7 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
 	}
 
 	drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs);
-	drm_encoder_init(drm, encoder, &dw_hdmi_rockchip_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
 
 	platform_set_drvdata(pdev, hdmi);
 
diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c
index e5864e823020..7afdc54eb3ec 100644
--- a/drivers/gpu/drm/rockchip/inno_hdmi.c
+++ b/drivers/gpu/drm/rockchip/inno_hdmi.c
@@ -19,6 +19,7 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_vop.h"
@@ -532,10 +533,6 @@ static struct drm_encoder_helper_funcs inno_hdmi_encoder_helper_funcs = {
 	.atomic_check = inno_hdmi_encoder_atomic_check,
 };
 
-static struct drm_encoder_funcs inno_hdmi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static enum drm_connector_status
 inno_hdmi_connector_detect(struct drm_connector *connector, bool force)
 {
@@ -617,8 +614,7 @@ static int inno_hdmi_register(struct drm_device *drm, struct inno_hdmi *hdmi)
 		return -EPROBE_DEFER;
 
 	drm_encoder_helper_add(encoder, &inno_hdmi_encoder_helper_funcs);
-	drm_encoder_init(drm, encoder, &inno_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
 
 	hdmi->connector.polled = DRM_CONNECTOR_POLL_HPD;
 
diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
index fe203d38664e..1c546c3a8998 100644
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -6,6 +6,7 @@
 
 #include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include <linux/clk.h>
 #include <linux/mfd/syscon.h>
@@ -451,10 +452,6 @@ struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = {
 	.atomic_check = rk3066_hdmi_encoder_atomic_check,
 };
 
-static const struct drm_encoder_funcs rk3066_hdmi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static enum drm_connector_status
 rk3066_hdmi_connector_detect(struct drm_connector *connector, bool force)
 {
@@ -557,8 +554,7 @@ rk3066_hdmi_register(struct drm_device *drm, struct rk3066_hdmi *hdmi)
 		return -EPROBE_DEFER;
 
 	drm_encoder_helper_add(encoder, &rk3066_hdmi_encoder_helper_funcs);
-	drm_encoder_init(drm, encoder, &rk3066_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
 
 	hdmi->connector.polled = DRM_CONNECTOR_POLL_HPD;
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 20ecb1508a22..0f3eb392fe39 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -135,14 +135,16 @@ static int rockchip_drm_bind(struct device *dev)
 	if (ret)
 		goto err_free;
 
-	drm_mode_config_init(drm_dev);
+	ret = drmm_mode_config_init(drm_dev);
+	if (ret)
+		goto err_iommu_cleanup;
 
 	rockchip_drm_mode_config_init(drm_dev);
 
 	/* Try to bind all sub drivers. */
 	ret = component_bind_all(dev, drm_dev);
 	if (ret)
-		goto err_mode_config_cleanup;
+		goto err_iommu_cleanup;
 
 	ret = drm_vblank_init(drm_dev, drm_dev->mode_config.num_crtc);
 	if (ret)
@@ -173,12 +175,9 @@ err_kms_helper_poll_fini:
 	rockchip_drm_fbdev_fini(drm_dev);
 err_unbind_all:
 	component_unbind_all(dev, drm_dev);
-err_mode_config_cleanup:
-	drm_mode_config_cleanup(drm_dev);
+err_iommu_cleanup:
 	rockchip_iommu_cleanup(drm_dev);
 err_free:
-	drm_dev->dev_private = NULL;
-	dev_set_drvdata(dev, NULL);
 	drm_dev_put(drm_dev);
 	return ret;
 }
@@ -194,11 +193,8 @@ static void rockchip_drm_unbind(struct device *dev)
 
 	drm_atomic_helper_shutdown(drm_dev);
 	component_unbind_all(dev, drm_dev);
-	drm_mode_config_cleanup(drm_dev);
 	rockchip_iommu_cleanup(drm_dev);
 
-	drm_dev->dev_private = NULL;
-	dev_set_drvdata(dev, NULL);
 	drm_dev_put(drm_dev);
 }
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index c5b06048124e..e33c2dcd0d4b 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -30,6 +30,7 @@ struct rockchip_crtc_state {
 	int output_mode;
 	int output_bpc;
 	int output_flags;
+	bool enable_afbc;
 };
 #define to_rockchip_crtc_state(s) \
 		container_of(s, struct rockchip_crtc_state, base)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index 221e72e71432..9b13c784b347 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -57,8 +57,49 @@ static const struct drm_mode_config_helper_funcs rockchip_mode_config_helpers =
 	.atomic_commit_tail = drm_atomic_helper_commit_tail_rpm,
 };
 
+static struct drm_framebuffer *
+rockchip_fb_create(struct drm_device *dev, struct drm_file *file,
+		   const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_afbc_framebuffer *afbc_fb;
+	const struct drm_format_info *info;
+	int ret;
+
+	info = drm_get_format_info(dev, mode_cmd);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	afbc_fb = kzalloc(sizeof(*afbc_fb), GFP_KERNEL);
+	if (!afbc_fb)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_gem_fb_init_with_funcs(dev, &afbc_fb->base, file, mode_cmd,
+					 &rockchip_drm_fb_funcs);
+	if (ret) {
+		kfree(afbc_fb);
+		return ERR_PTR(ret);
+	}
+
+	if (drm_is_afbc(mode_cmd->modifier[0])) {
+		int ret, i;
+
+		ret = drm_gem_fb_afbc_init(dev, mode_cmd, afbc_fb);
+		if (ret) {
+			struct drm_gem_object **obj = afbc_fb->base.obj;
+
+			for (i = 0; i < info->num_planes; ++i)
+				drm_gem_object_put_unlocked(obj[i]);
+
+			kfree(afbc_fb);
+			return ERR_PTR(ret);
+		}
+	}
+
+	return &afbc_fb->base;
+}
+
 static const struct drm_mode_config_funcs rockchip_drm_mode_config_funcs = {
-	.fb_create = drm_gem_fb_create_with_dirty,
+	.fb_create = rockchip_fb_create,
 	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index cecb2cc781f5..33463b79a37b 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -91,9 +91,22 @@
 #define VOP_WIN_TO_INDEX(vop_win) \
 	((vop_win) - (vop_win)->vop->win)
 
+#define VOP_AFBC_SET(vop, name, v) \
+	do { \
+		if ((vop)->data->afbc) \
+			vop_reg_set((vop), &(vop)->data->afbc->name, \
+				    0, ~0, v, #name); \
+	} while (0)
+
 #define to_vop(x) container_of(x, struct vop, crtc)
 #define to_vop_win(x) container_of(x, struct vop_win, base)
 
+#define AFBC_FMT_RGB565		0x0
+#define AFBC_FMT_U8U8U8U8	0x5
+#define AFBC_FMT_U8U8U8		0x4
+
+#define AFBC_TILE_16x16		BIT(4)
+
 /*
  * The coefficients of the following matrix are all fixed points.
  * The format is S2.10 for the 3x3 part of the matrix, and S9.12 for the offsets.
@@ -274,6 +287,29 @@ static enum vop_data_format vop_convert_format(uint32_t format)
 	}
 }
 
+static int vop_convert_afbc_format(uint32_t format)
+{
+	switch (format) {
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
+		return AFBC_FMT_U8U8U8U8;
+	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_BGR888:
+		return AFBC_FMT_U8U8U8;
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_BGR565:
+		return AFBC_FMT_RGB565;
+	/* either of the below should not be reachable */
+	default:
+		DRM_WARN_ONCE("unsupported AFBC format[%08x]\n", format);
+		return -EINVAL;
+	}
+
+	return -EINVAL;
+}
+
 static uint16_t scl_vop_cal_scale(enum scale_mode mode, uint32_t src,
 				  uint32_t dst, bool is_horizontal,
 				  int vsu_mode, int *vskiplines)
@@ -598,6 +634,17 @@ static int vop_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state)
 			vop_win_disable(vop, vop_win);
 		}
 	}
+
+	if (vop->data->afbc) {
+		struct rockchip_crtc_state *s;
+		/*
+		 * Disable AFBC and forget there was a vop window with AFBC
+		 */
+		VOP_AFBC_SET(vop, enable, 0);
+		s = to_rockchip_crtc_state(crtc->state);
+		s->enable_afbc = false;
+	}
+
 	spin_unlock(&vop->reg_lock);
 
 	vop_cfg_done(vop);
@@ -710,6 +757,26 @@ static void vop_plane_destroy(struct drm_plane *plane)
 	drm_plane_cleanup(plane);
 }
 
+static inline bool rockchip_afbc(u64 modifier)
+{
+	return modifier == ROCKCHIP_AFBC_MOD;
+}
+
+static bool rockchip_mod_supported(struct drm_plane *plane,
+				   u32 format, u64 modifier)
+{
+	if (modifier == DRM_FORMAT_MOD_LINEAR)
+		return true;
+
+	if (!rockchip_afbc(modifier)) {
+		DRM_DEBUG_KMS("Unsupported format modifier 0x%llx\n", modifier);
+
+		return false;
+	}
+
+	return vop_convert_afbc_format(format) >= 0;
+}
+
 static int vop_plane_atomic_check(struct drm_plane *plane,
 			   struct drm_plane_state *state)
 {
@@ -758,6 +825,30 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
 		return -EINVAL;
 	}
 
+	if (rockchip_afbc(fb->modifier)) {
+		struct vop *vop = to_vop(crtc);
+
+		if (!vop->data->afbc) {
+			DRM_ERROR("vop does not support AFBC\n");
+			return -EINVAL;
+		}
+
+		ret = vop_convert_afbc_format(fb->format->format);
+		if (ret < 0)
+			return ret;
+
+		if (state->src.x1 || state->src.y1) {
+			DRM_ERROR("AFBC does not support offset display, xpos=%d, ypos=%d, offset=%d\n", state->src.x1, state->src.y1, fb->offsets[0]);
+			return -EINVAL;
+		}
+
+		if (state->rotation && state->rotation != DRM_MODE_ROTATE_0) {
+			DRM_ERROR("No rotation support in AFBC, rotation=%d\n",
+				  state->rotation);
+			return -EINVAL;
+		}
+	}
+
 	return 0;
 }
 
@@ -846,6 +937,16 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
 
 	spin_lock(&vop->reg_lock);
 
+	if (rockchip_afbc(fb->modifier)) {
+		int afbc_format = vop_convert_afbc_format(fb->format->format);
+
+		VOP_AFBC_SET(vop, format, afbc_format | AFBC_TILE_16x16);
+		VOP_AFBC_SET(vop, hreg_block_split, 0);
+		VOP_AFBC_SET(vop, win_sel, VOP_WIN_TO_INDEX(vop_win));
+		VOP_AFBC_SET(vop, hdr_ptr, dma_addr);
+		VOP_AFBC_SET(vop, pic_size, act_info);
+	}
+
 	VOP_WIN_SET(vop, win, format, format);
 	VOP_WIN_SET(vop, win, yrgb_vir, DIV_ROUND_UP(fb->pitches[0], 4));
 	VOP_WIN_SET(vop, win, yrgb_mst, dma_addr);
@@ -1001,6 +1102,7 @@ static const struct drm_plane_funcs vop_plane_funcs = {
 	.reset = drm_atomic_helper_plane_reset,
 	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+	.format_mod_supported = rockchip_mod_supported,
 };
 
 static int vop_crtc_enable_vblank(struct drm_crtc *crtc)
@@ -1310,6 +1412,10 @@ static int vop_crtc_atomic_check(struct drm_crtc *crtc,
 				 struct drm_crtc_state *crtc_state)
 {
 	struct vop *vop = to_vop(crtc);
+	struct drm_plane *plane;
+	struct drm_plane_state *plane_state;
+	struct rockchip_crtc_state *s;
+	int afbc_planes = 0;
 
 	if (vop->lut_regs && crtc_state->color_mgmt_changed &&
 	    crtc_state->gamma_lut) {
@@ -1323,6 +1429,27 @@ static int vop_crtc_atomic_check(struct drm_crtc *crtc,
 		}
 	}
 
+	drm_atomic_crtc_state_for_each_plane(plane, crtc_state) {
+		plane_state =
+			drm_atomic_get_plane_state(crtc_state->state, plane);
+		if (IS_ERR(plane_state)) {
+			DRM_DEBUG_KMS("Cannot get plane state for plane %s\n",
+				      plane->name);
+			return PTR_ERR(plane_state);
+		}
+
+		if (drm_is_afbc(plane_state->fb->modifier))
+			++afbc_planes;
+	}
+
+	if (afbc_planes > 1) {
+		DRM_DEBUG_KMS("Invalid number of AFBC planes; got %d, expected at most 1\n", afbc_planes);
+		return -EINVAL;
+	}
+
+	s = to_rockchip_crtc_state(crtc_state);
+	s->enable_afbc = afbc_planes > 0;
+
 	return 0;
 }
 
@@ -1333,6 +1460,7 @@ static void vop_crtc_atomic_flush(struct drm_crtc *crtc,
 	struct drm_plane_state *old_plane_state, *new_plane_state;
 	struct vop *vop = to_vop(crtc);
 	struct drm_plane *plane;
+	struct rockchip_crtc_state *s;
 	int i;
 
 	if (WARN_ON(!vop->is_enabled))
@@ -1340,6 +1468,9 @@ static void vop_crtc_atomic_flush(struct drm_crtc *crtc,
 
 	spin_lock(&vop->reg_lock);
 
+	/* Enable AFBC if there is some AFBC window, disable otherwise. */
+	s = to_rockchip_crtc_state(crtc->state);
+	VOP_AFBC_SET(vop, enable, s->enable_afbc);
 	vop_cfg_done(vop);
 
 	spin_unlock(&vop->reg_lock);
@@ -1634,7 +1765,8 @@ static int vop_create_crtc(struct vop *vop)
 					       0, &vop_plane_funcs,
 					       win_data->phy->data_formats,
 					       win_data->phy->nformats,
-					       NULL, win_data->type, NULL);
+					       win_data->phy->format_modifiers,
+					       win_data->type, NULL);
 		if (ret) {
 			DRM_DEV_ERROR(vop->dev, "failed to init plane %d\n",
 				      ret);
@@ -1678,7 +1810,8 @@ static int vop_create_crtc(struct vop *vop)
 					       &vop_plane_funcs,
 					       win_data->phy->data_formats,
 					       win_data->phy->nformats,
-					       NULL, win_data->type, NULL);
+					       win_data->phy->format_modifiers,
+					       win_data->type, NULL);
 		if (ret) {
 			DRM_DEV_ERROR(vop->dev, "failed to init overlay %d\n",
 				      ret);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
index cc672620d6e0..d03bdb531ef2 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
@@ -17,6 +17,11 @@
 
 #define NUM_YUV2YUV_COEFFICIENTS 12
 
+#define ROCKCHIP_AFBC_MOD \
+	DRM_FORMAT_MOD_ARM_AFBC( \
+		AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | AFBC_FORMAT_MOD_SPARSE \
+	)
+
 enum vop_data_format {
 	VOP_FMT_ARGB8888 = 0,
 	VOP_FMT_RGB888,
@@ -34,6 +39,16 @@ struct vop_reg {
 	bool relaxed;
 };
 
+struct vop_afbc {
+	struct vop_reg enable;
+	struct vop_reg win_sel;
+	struct vop_reg format;
+	struct vop_reg hreg_block_split;
+	struct vop_reg pic_size;
+	struct vop_reg hdr_ptr;
+	struct vop_reg rstn;
+};
+
 struct vop_modeset {
 	struct vop_reg htotal_pw;
 	struct vop_reg hact_st_end;
@@ -134,6 +149,7 @@ struct vop_win_phy {
 	const struct vop_scl_regs *scl;
 	const uint32_t *data_formats;
 	uint32_t nformats;
+	const uint64_t *format_modifiers;
 
 	struct vop_reg enable;
 	struct vop_reg gate;
@@ -173,6 +189,7 @@ struct vop_data {
 	const struct vop_misc *misc;
 	const struct vop_modeset *modeset;
 	const struct vop_output *output;
+	const struct vop_afbc *afbc;
 	const struct vop_win_yuv2yuv_data *win_yuv2yuv;
 	const struct vop_win_data *win;
 	unsigned int win_size;
diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c
index 449a62908d21..63f967902c2d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c
+++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c
@@ -16,13 +16,14 @@
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
+
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
-
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_vop.h"
@@ -435,10 +436,6 @@ struct drm_encoder_helper_funcs px30_lvds_encoder_helper_funcs = {
 	.atomic_check = rockchip_lvds_encoder_atomic_check,
 };
 
-static const struct drm_encoder_funcs rockchip_lvds_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int rk3288_lvds_probe(struct platform_device *pdev,
 			     struct rockchip_lvds *lvds)
 {
@@ -607,8 +604,7 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master,
 	encoder->possible_crtcs = drm_of_find_possible_crtcs(drm_dev,
 							     dev->of_node);
 
-	ret = drm_encoder_init(drm_dev, encoder, &rockchip_lvds_encoder_funcs,
-			       DRM_MODE_ENCODER_LVDS, NULL);
+	ret = drm_simple_encoder_init(drm_dev, encoder, DRM_MODE_ENCODER_LVDS);
 	if (ret < 0) {
 		DRM_DEV_ERROR(drm_dev->dev,
 			      "failed to initialize encoder: %d\n", ret);
diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c
index 90784781e515..9a771af5d0c9 100644
--- a/drivers/gpu/drm/rockchip/rockchip_rgb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c
@@ -14,6 +14,7 @@
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_vop.h"
@@ -67,10 +68,6 @@ struct drm_encoder_helper_funcs rockchip_rgb_encoder_helper_funcs = {
 	.atomic_check = rockchip_rgb_encoder_atomic_check,
 };
 
-static const struct drm_encoder_funcs rockchip_rgb_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 struct rockchip_rgb *rockchip_rgb_init(struct device *dev,
 				       struct drm_crtc *crtc,
 				       struct drm_device *drm_dev)
@@ -126,8 +123,7 @@ struct rockchip_rgb *rockchip_rgb_init(struct device *dev,
 	encoder = &rgb->encoder;
 	encoder->possible_crtcs = drm_crtc_mask(crtc);
 
-	ret = drm_encoder_init(drm_dev, encoder, &rockchip_rgb_encoder_funcs,
-			       DRM_MODE_ENCODER_NONE, NULL);
+	ret = drm_simple_encoder_init(drm_dev, encoder, DRM_MODE_ENCODER_NONE);
 	if (ret < 0) {
 		DRM_DEV_ERROR(drm_dev->dev,
 			      "failed to initialize encoder: %d\n", ret);
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
index 7a9d979c8d5d..2413deded22c 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
@@ -50,6 +50,17 @@ static const uint32_t formats_win_full[] = {
 	DRM_FORMAT_NV24,
 };
 
+static const uint64_t format_modifiers_win_full[] = {
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID,
+};
+
+static const uint64_t format_modifiers_win_full_afbc[] = {
+	ROCKCHIP_AFBC_MOD,
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID,
+};
+
 static const uint32_t formats_win_lite[] = {
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_ARGB8888,
@@ -61,6 +72,11 @@ static const uint32_t formats_win_lite[] = {
 	DRM_FORMAT_BGR565,
 };
 
+static const uint64_t format_modifiers_win_lite[] = {
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID,
+};
+
 static const struct vop_scl_regs rk3036_win_scl = {
 	.scale_yrgb_x = VOP_REG(RK3036_WIN0_SCL_FACTOR_YRGB, 0xffff, 0x0),
 	.scale_yrgb_y = VOP_REG(RK3036_WIN0_SCL_FACTOR_YRGB, 0xffff, 16),
@@ -72,6 +88,7 @@ static const struct vop_win_phy rk3036_win0_data = {
 	.scl = &rk3036_win_scl,
 	.data_formats = formats_win_full,
 	.nformats = ARRAY_SIZE(formats_win_full),
+	.format_modifiers = format_modifiers_win_full,
 	.enable = VOP_REG(RK3036_SYS_CTRL, 0x1, 0),
 	.format = VOP_REG(RK3036_SYS_CTRL, 0x7, 3),
 	.rb_swap = VOP_REG(RK3036_SYS_CTRL, 0x1, 15),
@@ -87,6 +104,7 @@ static const struct vop_win_phy rk3036_win0_data = {
 static const struct vop_win_phy rk3036_win1_data = {
 	.data_formats = formats_win_lite,
 	.nformats = ARRAY_SIZE(formats_win_lite),
+	.format_modifiers = format_modifiers_win_lite,
 	.enable = VOP_REG(RK3036_SYS_CTRL, 0x1, 1),
 	.format = VOP_REG(RK3036_SYS_CTRL, 0x7, 6),
 	.rb_swap = VOP_REG(RK3036_SYS_CTRL, 0x1, 19),
@@ -153,6 +171,7 @@ static const struct vop_data rk3036_vop = {
 static const struct vop_win_phy rk3126_win1_data = {
 	.data_formats = formats_win_lite,
 	.nformats = ARRAY_SIZE(formats_win_lite),
+	.format_modifiers = format_modifiers_win_lite,
 	.enable = VOP_REG(RK3036_SYS_CTRL, 0x1, 1),
 	.format = VOP_REG(RK3036_SYS_CTRL, 0x7, 6),
 	.rb_swap = VOP_REG(RK3036_SYS_CTRL, 0x1, 19),
@@ -234,6 +253,7 @@ static const struct vop_win_phy px30_win0_data = {
 	.scl = &px30_win_scl,
 	.data_formats = formats_win_full,
 	.nformats = ARRAY_SIZE(formats_win_full),
+	.format_modifiers = format_modifiers_win_full,
 	.enable = VOP_REG(PX30_WIN0_CTRL0, 0x1, 0),
 	.format = VOP_REG(PX30_WIN0_CTRL0, 0x7, 1),
 	.rb_swap = VOP_REG(PX30_WIN0_CTRL0, 0x1, 12),
@@ -249,6 +269,7 @@ static const struct vop_win_phy px30_win0_data = {
 static const struct vop_win_phy px30_win1_data = {
 	.data_formats = formats_win_lite,
 	.nformats = ARRAY_SIZE(formats_win_lite),
+	.format_modifiers = format_modifiers_win_lite,
 	.enable = VOP_REG(PX30_WIN1_CTRL0, 0x1, 0),
 	.format = VOP_REG(PX30_WIN1_CTRL0, 0x7, 4),
 	.rb_swap = VOP_REG(PX30_WIN1_CTRL0, 0x1, 12),
@@ -261,6 +282,7 @@ static const struct vop_win_phy px30_win1_data = {
 static const struct vop_win_phy px30_win2_data = {
 	.data_formats = formats_win_lite,
 	.nformats = ARRAY_SIZE(formats_win_lite),
+	.format_modifiers = format_modifiers_win_lite,
 	.gate = VOP_REG(PX30_WIN2_CTRL0, 0x1, 4),
 	.enable = VOP_REG(PX30_WIN2_CTRL0, 0x1, 0),
 	.format = VOP_REG(PX30_WIN2_CTRL0, 0x3, 5),
@@ -316,6 +338,7 @@ static const struct vop_win_phy rk3066_win0_data = {
 	.scl = &rk3066_win_scl,
 	.data_formats = formats_win_full,
 	.nformats = ARRAY_SIZE(formats_win_full),
+	.format_modifiers = format_modifiers_win_full,
 	.enable = VOP_REG(RK3066_SYS_CTRL1, 0x1, 0),
 	.format = VOP_REG(RK3066_SYS_CTRL0, 0x7, 4),
 	.rb_swap = VOP_REG(RK3066_SYS_CTRL0, 0x1, 19),
@@ -332,6 +355,7 @@ static const struct vop_win_phy rk3066_win1_data = {
 	.scl = &rk3066_win_scl,
 	.data_formats = formats_win_full,
 	.nformats = ARRAY_SIZE(formats_win_full),
+	.format_modifiers = format_modifiers_win_full,
 	.enable = VOP_REG(RK3066_SYS_CTRL1, 0x1, 1),
 	.format = VOP_REG(RK3066_SYS_CTRL0, 0x7, 7),
 	.rb_swap = VOP_REG(RK3066_SYS_CTRL0, 0x1, 23),
@@ -347,6 +371,7 @@ static const struct vop_win_phy rk3066_win1_data = {
 static const struct vop_win_phy rk3066_win2_data = {
 	.data_formats = formats_win_lite,
 	.nformats = ARRAY_SIZE(formats_win_lite),
+	.format_modifiers = format_modifiers_win_lite,
 	.enable = VOP_REG(RK3066_SYS_CTRL1, 0x1, 2),
 	.format = VOP_REG(RK3066_SYS_CTRL0, 0x7, 10),
 	.rb_swap = VOP_REG(RK3066_SYS_CTRL0, 0x1, 27),
@@ -426,6 +451,7 @@ static const struct vop_win_phy rk3188_win0_data = {
 	.scl = &rk3188_win_scl,
 	.data_formats = formats_win_full,
 	.nformats = ARRAY_SIZE(formats_win_full),
+	.format_modifiers = format_modifiers_win_full,
 	.enable = VOP_REG(RK3188_SYS_CTRL, 0x1, 0),
 	.format = VOP_REG(RK3188_SYS_CTRL, 0x7, 3),
 	.rb_swap = VOP_REG(RK3188_SYS_CTRL, 0x1, 15),
@@ -440,6 +466,7 @@ static const struct vop_win_phy rk3188_win0_data = {
 static const struct vop_win_phy rk3188_win1_data = {
 	.data_formats = formats_win_lite,
 	.nformats = ARRAY_SIZE(formats_win_lite),
+	.format_modifiers = format_modifiers_win_lite,
 	.enable = VOP_REG(RK3188_SYS_CTRL, 0x1, 1),
 	.format = VOP_REG(RK3188_SYS_CTRL, 0x7, 6),
 	.rb_swap = VOP_REG(RK3188_SYS_CTRL, 0x1, 19),
@@ -545,6 +572,7 @@ static const struct vop_win_phy rk3288_win01_data = {
 	.scl = &rk3288_win_full_scl,
 	.data_formats = formats_win_full,
 	.nformats = ARRAY_SIZE(formats_win_full),
+	.format_modifiers = format_modifiers_win_full,
 	.enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0),
 	.format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1),
 	.rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12),
@@ -563,6 +591,7 @@ static const struct vop_win_phy rk3288_win01_data = {
 static const struct vop_win_phy rk3288_win23_data = {
 	.data_formats = formats_win_lite,
 	.nformats = ARRAY_SIZE(formats_win_lite),
+	.format_modifiers = format_modifiers_win_lite,
 	.enable = VOP_REG(RK3288_WIN2_CTRL0, 0x1, 4),
 	.gate = VOP_REG(RK3288_WIN2_CTRL0, 0x1, 0),
 	.format = VOP_REG(RK3288_WIN2_CTRL0, 0x7, 1),
@@ -677,6 +706,7 @@ static const struct vop_win_phy rk3368_win01_data = {
 	.scl = &rk3288_win_full_scl,
 	.data_formats = formats_win_full,
 	.nformats = ARRAY_SIZE(formats_win_full),
+	.format_modifiers = format_modifiers_win_full,
 	.enable = VOP_REG(RK3368_WIN0_CTRL0, 0x1, 0),
 	.format = VOP_REG(RK3368_WIN0_CTRL0, 0x7, 1),
 	.rb_swap = VOP_REG(RK3368_WIN0_CTRL0, 0x1, 12),
@@ -697,6 +727,7 @@ static const struct vop_win_phy rk3368_win01_data = {
 static const struct vop_win_phy rk3368_win23_data = {
 	.data_formats = formats_win_lite,
 	.nformats = ARRAY_SIZE(formats_win_lite),
+	.format_modifiers = format_modifiers_win_lite,
 	.gate = VOP_REG(RK3368_WIN2_CTRL0, 0x1, 0),
 	.enable = VOP_REG(RK3368_WIN2_CTRL0, 0x1, 4),
 	.format = VOP_REG(RK3368_WIN2_CTRL0, 0x3, 5),
@@ -817,6 +848,53 @@ static const struct vop_win_yuv2yuv_data rk3399_vop_big_win_yuv2yuv_data[] = {
 	  .y2r_en = VOP_REG(RK3399_YUV2YUV_WIN, 0x1, 9) },
 	{ .base = 0xC0, .phy = &rk3399_yuv2yuv_win23_data },
 	{ .base = 0x120, .phy = &rk3399_yuv2yuv_win23_data },
+
+};
+
+static const struct vop_win_phy rk3399_win01_data = {
+	.scl = &rk3288_win_full_scl,
+	.data_formats = formats_win_full,
+	.nformats = ARRAY_SIZE(formats_win_full),
+	.format_modifiers = format_modifiers_win_full_afbc,
+	.enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0),
+	.format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1),
+	.rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12),
+	.y_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 22),
+	.act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0),
+	.dsp_info = VOP_REG(RK3288_WIN0_DSP_INFO, 0x0fff0fff, 0),
+	.dsp_st = VOP_REG(RK3288_WIN0_DSP_ST, 0x1fff1fff, 0),
+	.yrgb_mst = VOP_REG(RK3288_WIN0_YRGB_MST, 0xffffffff, 0),
+	.uv_mst = VOP_REG(RK3288_WIN0_CBR_MST, 0xffffffff, 0),
+	.yrgb_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 0),
+	.uv_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 16),
+	.src_alpha_ctl = VOP_REG(RK3288_WIN0_SRC_ALPHA_CTRL, 0xff, 0),
+	.dst_alpha_ctl = VOP_REG(RK3288_WIN0_DST_ALPHA_CTRL, 0xff, 0),
+};
+
+/*
+ * rk3399 vop big windows register layout is same as rk3288, but we
+ * have a separate rk3399 win data array here so that we can advertise
+ * AFBC on the primary plane.
+ */
+static const struct vop_win_data rk3399_vop_win_data[] = {
+	{ .base = 0x00, .phy = &rk3399_win01_data,
+	  .type = DRM_PLANE_TYPE_PRIMARY },
+	{ .base = 0x40, .phy = &rk3288_win01_data,
+	  .type = DRM_PLANE_TYPE_OVERLAY },
+	{ .base = 0x00, .phy = &rk3288_win23_data,
+	  .type = DRM_PLANE_TYPE_OVERLAY },
+	{ .base = 0x50, .phy = &rk3288_win23_data,
+	  .type = DRM_PLANE_TYPE_CURSOR },
+};
+
+static const struct vop_afbc rk3399_vop_afbc = {
+	.rstn = VOP_REG(RK3399_AFBCD0_CTRL, 0x1, 3),
+	.enable = VOP_REG(RK3399_AFBCD0_CTRL, 0x1, 0),
+	.win_sel = VOP_REG(RK3399_AFBCD0_CTRL, 0x3, 1),
+	.format = VOP_REG(RK3399_AFBCD0_CTRL, 0x1f, 16),
+	.hreg_block_split = VOP_REG(RK3399_AFBCD0_CTRL, 0x1, 21),
+	.hdr_ptr = VOP_REG(RK3399_AFBCD0_HDR_PTR, 0xffffffff, 0),
+	.pic_size = VOP_REG(RK3399_AFBCD0_PIC_SIZE, 0xffffffff, 0),
 };
 
 static const struct vop_data rk3399_vop_big = {
@@ -826,9 +904,10 @@ static const struct vop_data rk3399_vop_big = {
 	.common = &rk3288_common,
 	.modeset = &rk3288_modeset,
 	.output = &rk3399_output,
+	.afbc = &rk3399_vop_afbc,
 	.misc = &rk3368_misc,
-	.win = rk3368_vop_win_data,
-	.win_size = ARRAY_SIZE(rk3368_vop_win_data),
+	.win = rk3399_vop_win_data,
+	.win_size = ARRAY_SIZE(rk3399_vop_win_data),
 	.win_yuv2yuv = rk3399_vop_big_win_yuv2yuv_data,
 };
 
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index 75a752d59ef1..03556dbfcafb 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
@@ -17,6 +17,7 @@
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 #include <drm/drm_vblank.h>
 
 #include "shmob_drm_backlight.h"
@@ -558,15 +559,6 @@ static const struct drm_encoder_helper_funcs encoder_helper_funcs = {
 	.mode_set = shmob_drm_encoder_mode_set,
 };
 
-static void shmob_drm_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs encoder_funcs = {
-	.destroy = shmob_drm_encoder_destroy,
-};
-
 int shmob_drm_encoder_create(struct shmob_drm_device *sdev)
 {
 	struct drm_encoder *encoder = &sdev->encoder.encoder;
@@ -576,8 +568,8 @@ int shmob_drm_encoder_create(struct shmob_drm_device *sdev)
 
 	encoder->possible_crtcs = 1;
 
-	ret = drm_encoder_init(sdev->ddev, encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_LVDS, NULL);
+	ret = drm_simple_encoder_init(sdev->ddev, encoder,
+				      DRM_MODE_ENCODER_LVDS);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
index b8c0930959c7..ae9d6b8d3ca8 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
@@ -192,7 +192,6 @@ static int shmob_drm_remove(struct platform_device *pdev)
 
 	drm_dev_unregister(ddev);
 	drm_kms_helper_poll_fini(ddev);
-	drm_mode_config_cleanup(ddev);
 	drm_irq_uninstall(ddev);
 	drm_dev_put(ddev);
 
@@ -288,7 +287,6 @@ err_irq_uninstall:
 	drm_irq_uninstall(ddev);
 err_modeset_cleanup:
 	drm_kms_helper_poll_fini(ddev);
-	drm_mode_config_cleanup(ddev);
 err_free_drm_dev:
 	drm_dev_put(ddev);
 
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_kms.c b/drivers/gpu/drm/shmobile/shmob_drm_kms.c
index c51197b6fd85..7a866d6ce6bb 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_kms.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_kms.c
@@ -126,7 +126,11 @@ static const struct drm_mode_config_funcs shmob_drm_mode_config_funcs = {
 
 int shmob_drm_modeset_init(struct shmob_drm_device *sdev)
 {
-	drm_mode_config_init(sdev->ddev);
+	int ret;
+
+	ret = drmm_mode_config_init(sdev->ddev);
+	if (ret)
+		return ret;
 
 	shmob_drm_crtc_create(sdev);
 	shmob_drm_encoder_create(sdev);
diff --git a/drivers/gpu/drm/sti/sti_compositor.c b/drivers/gpu/drm/sti/sti_compositor.c
index c7652584255d..319962a2c17b 100644
--- a/drivers/gpu/drm/sti/sti_compositor.c
+++ b/drivers/gpu/drm/sti/sti_compositor.c
@@ -42,8 +42,8 @@ static const struct sti_compositor_data stih407_compositor_data = {
 	},
 };
 
-int sti_compositor_debugfs_init(struct sti_compositor *compo,
-				struct drm_minor *minor)
+void sti_compositor_debugfs_init(struct sti_compositor *compo,
+				 struct drm_minor *minor)
 {
 	unsigned int i;
 
@@ -54,8 +54,6 @@ int sti_compositor_debugfs_init(struct sti_compositor *compo,
 	for (i = 0; i < STI_MAX_MIXER; i++)
 		if (compo->mixer[i])
 			sti_mixer_debugfs_init(compo->mixer[i], minor);
-
-	return 0;
 }
 
 static int sti_compositor_bind(struct device *dev,
diff --git a/drivers/gpu/drm/sti/sti_compositor.h b/drivers/gpu/drm/sti/sti_compositor.h
index ac4bb3834810..25bb01bdd013 100644
--- a/drivers/gpu/drm/sti/sti_compositor.h
+++ b/drivers/gpu/drm/sti/sti_compositor.h
@@ -79,7 +79,7 @@ struct sti_compositor {
 	struct notifier_block vtg_vblank_nb[STI_MAX_MIXER];
 };
 
-int sti_compositor_debugfs_init(struct sti_compositor *compo,
-				struct drm_minor *minor);
+void sti_compositor_debugfs_init(struct sti_compositor *compo,
+				 struct drm_minor *minor);
 
 #endif
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index 49e6cb8f5836..6f37c104c46f 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -319,7 +319,7 @@ static int sti_crtc_late_register(struct drm_crtc *crtc)
 	struct sti_compositor *compo = dev_get_drvdata(mixer->dev);
 
 	if (drm_crtc_index(crtc) == 0)
-		return sti_compositor_debugfs_init(compo, crtc->dev->primary);
+		sti_compositor_debugfs_init(compo, crtc->dev->primary);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c
index ea64c1dcaf63..a98057431023 100644
--- a/drivers/gpu/drm/sti/sti_cursor.c
+++ b/drivers/gpu/drm/sti/sti_cursor.c
@@ -131,17 +131,17 @@ static struct drm_info_list cursor_debugfs_files[] = {
 	{ "cursor", cursor_dbg_show, 0, NULL },
 };
 
-static int cursor_debugfs_init(struct sti_cursor *cursor,
-			       struct drm_minor *minor)
+static void cursor_debugfs_init(struct sti_cursor *cursor,
+				struct drm_minor *minor)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(cursor_debugfs_files); i++)
 		cursor_debugfs_files[i].data = cursor;
 
-	return drm_debugfs_create_files(cursor_debugfs_files,
-					ARRAY_SIZE(cursor_debugfs_files),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(cursor_debugfs_files,
+				 ARRAY_SIZE(cursor_debugfs_files),
+				 minor->debugfs_root, minor);
 }
 
 static void sti_cursor_argb8888_to_clut8(struct sti_cursor *cursor, u32 *src)
@@ -342,7 +342,9 @@ static int sti_cursor_late_register(struct drm_plane *drm_plane)
 	struct sti_plane *plane = to_sti_plane(drm_plane);
 	struct sti_cursor *cursor = to_sti_cursor(plane);
 
-	return cursor_debugfs_init(cursor, drm_plane->dev->primary);
+	cursor_debugfs_init(cursor, drm_plane->dev->primary);
+
+	return 0;
 }
 
 static const struct drm_plane_funcs sti_cursor_plane_helpers_funcs = {
diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c
index 50870d8cbb76..3f9db3e3f397 100644
--- a/drivers/gpu/drm/sti/sti_drv.c
+++ b/drivers/gpu/drm/sti/sti_drv.c
@@ -92,24 +92,16 @@ static struct drm_info_list sti_drm_dbg_list[] = {
 	{"fps_get", sti_drm_fps_dbg_show, 0},
 };
 
-static int sti_drm_dbg_init(struct drm_minor *minor)
+static void sti_drm_dbg_init(struct drm_minor *minor)
 {
-	int ret;
-
-	ret = drm_debugfs_create_files(sti_drm_dbg_list,
-				       ARRAY_SIZE(sti_drm_dbg_list),
-				       minor->debugfs_root, minor);
-	if (ret)
-		goto err;
+	drm_debugfs_create_files(sti_drm_dbg_list,
+				 ARRAY_SIZE(sti_drm_dbg_list),
+				 minor->debugfs_root, minor);
 
 	debugfs_create_file("fps_show", S_IRUGO | S_IWUSR, minor->debugfs_root,
 			    minor->dev, &sti_drm_fps_fops);
 
 	DRM_INFO("%s: debugfs installed\n", DRIVER_NAME);
-	return 0;
-err:
-	DRM_ERROR("%s: cannot install debugfs\n", DRIVER_NAME);
-	return ret;
 }
 
 static const struct drm_mode_config_funcs sti_mode_config_funcs = {
diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c
index 3d04bfca21a0..de4af7735c46 100644
--- a/drivers/gpu/drm/sti/sti_dvo.c
+++ b/drivers/gpu/drm/sti/sti_dvo.c
@@ -196,16 +196,16 @@ static struct drm_info_list dvo_debugfs_files[] = {
 	{ "dvo", dvo_dbg_show, 0, NULL },
 };
 
-static int dvo_debugfs_init(struct sti_dvo *dvo, struct drm_minor *minor)
+static void dvo_debugfs_init(struct sti_dvo *dvo, struct drm_minor *minor)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(dvo_debugfs_files); i++)
 		dvo_debugfs_files[i].data = dvo;
 
-	return drm_debugfs_create_files(dvo_debugfs_files,
-					ARRAY_SIZE(dvo_debugfs_files),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(dvo_debugfs_files,
+				 ARRAY_SIZE(dvo_debugfs_files),
+				 minor->debugfs_root, minor);
 }
 
 static void sti_dvo_disable(struct drm_bridge *bridge)
@@ -405,10 +405,7 @@ static int sti_dvo_late_register(struct drm_connector *connector)
 		= to_sti_dvo_connector(connector);
 	struct sti_dvo *dvo = dvo_connector->dvo;
 
-	if (dvo_debugfs_init(dvo, dvo->drm_dev->primary)) {
-		DRM_ERROR("DVO debugfs setup failed\n");
-		return -EINVAL;
-	}
+	dvo_debugfs_init(dvo, dvo->drm_dev->primary);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index 11595c748844..2d5a2b5b78b8 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c
@@ -343,9 +343,10 @@ static int gdp_debugfs_init(struct sti_gdp *gdp, struct drm_minor *minor)
 	for (i = 0; i < nb_files; i++)
 		gdp_debugfs_files[i].data = gdp;
 
-	return drm_debugfs_create_files(gdp_debugfs_files,
-					nb_files,
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(gdp_debugfs_files,
+				 nb_files,
+				 minor->debugfs_root, minor);
+	return 0;
 }
 
 static int sti_gdp_fourcc2format(int fourcc)
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index f3f28d79b0e4..a1ec891eaf3a 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -367,16 +367,16 @@ static struct drm_info_list hda_debugfs_files[] = {
 	{ "hda", hda_dbg_show, 0, NULL },
 };
 
-static int hda_debugfs_init(struct sti_hda *hda, struct drm_minor *minor)
+static void hda_debugfs_init(struct sti_hda *hda, struct drm_minor *minor)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(hda_debugfs_files); i++)
 		hda_debugfs_files[i].data = hda;
 
-	return drm_debugfs_create_files(hda_debugfs_files,
-					ARRAY_SIZE(hda_debugfs_files),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(hda_debugfs_files,
+				 ARRAY_SIZE(hda_debugfs_files),
+				 minor->debugfs_root, minor);
 }
 
 /**
@@ -643,10 +643,7 @@ static int sti_hda_late_register(struct drm_connector *connector)
 		= to_sti_hda_connector(connector);
 	struct sti_hda *hda = hda_connector->hda;
 
-	if (hda_debugfs_init(hda, hda->drm_dev->primary)) {
-		DRM_ERROR("HDA debugfs setup failed\n");
-		return -EINVAL;
-	}
+	hda_debugfs_init(hda, hda->drm_dev->primary);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index 18eaf786ffa4..5b15c4974e6b 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -727,16 +727,16 @@ static struct drm_info_list hdmi_debugfs_files[] = {
 	{ "hdmi", hdmi_dbg_show, 0, NULL },
 };
 
-static int hdmi_debugfs_init(struct sti_hdmi *hdmi, struct drm_minor *minor)
+static void hdmi_debugfs_init(struct sti_hdmi *hdmi, struct drm_minor *minor)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(hdmi_debugfs_files); i++)
 		hdmi_debugfs_files[i].data = hdmi;
 
-	return drm_debugfs_create_files(hdmi_debugfs_files,
-					ARRAY_SIZE(hdmi_debugfs_files),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(hdmi_debugfs_files,
+				 ARRAY_SIZE(hdmi_debugfs_files),
+				 minor->debugfs_root, minor);
 }
 
 static void sti_hdmi_disable(struct drm_bridge *bridge)
@@ -1113,10 +1113,7 @@ static int sti_hdmi_late_register(struct drm_connector *connector)
 		= to_sti_hdmi_connector(connector);
 	struct sti_hdmi *hdmi = hdmi_connector->hdmi;
 
-	if (hdmi_debugfs_init(hdmi, hdmi->drm_dev->primary)) {
-		DRM_ERROR("HDMI debugfs setup failed\n");
-		return -EINVAL;
-	}
+	hdmi_debugfs_init(hdmi, hdmi->drm_dev->primary);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index 1015abe0ce08..5a4e12194a77 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -639,16 +639,16 @@ static struct drm_info_list hqvdp_debugfs_files[] = {
 	{ "hqvdp", hqvdp_dbg_show, 0, NULL },
 };
 
-static int hqvdp_debugfs_init(struct sti_hqvdp *hqvdp, struct drm_minor *minor)
+static void hqvdp_debugfs_init(struct sti_hqvdp *hqvdp, struct drm_minor *minor)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(hqvdp_debugfs_files); i++)
 		hqvdp_debugfs_files[i].data = hqvdp;
 
-	return drm_debugfs_create_files(hqvdp_debugfs_files,
-					ARRAY_SIZE(hqvdp_debugfs_files),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(hqvdp_debugfs_files,
+				 ARRAY_SIZE(hqvdp_debugfs_files),
+				 minor->debugfs_root, minor);
 }
 
 /**
@@ -1274,7 +1274,9 @@ static int sti_hqvdp_late_register(struct drm_plane *drm_plane)
 	struct sti_plane *plane = to_sti_plane(drm_plane);
 	struct sti_hqvdp *hqvdp = to_sti_hqvdp(plane);
 
-	return hqvdp_debugfs_init(hqvdp, drm_plane->dev->primary);
+	hqvdp_debugfs_init(hqvdp, drm_plane->dev->primary);
+
+	return 0;
 }
 
 static const struct drm_plane_funcs sti_hqvdp_plane_helpers_funcs = {
diff --git a/drivers/gpu/drm/sti/sti_mixer.c b/drivers/gpu/drm/sti/sti_mixer.c
index c3a3e1e5fc8a..7e5f14646625 100644
--- a/drivers/gpu/drm/sti/sti_mixer.c
+++ b/drivers/gpu/drm/sti/sti_mixer.c
@@ -178,7 +178,7 @@ static struct drm_info_list mixer1_debugfs_files[] = {
 	{ "mixer_aux", mixer_dbg_show, 0, NULL },
 };
 
-int sti_mixer_debugfs_init(struct sti_mixer *mixer, struct drm_minor *minor)
+void sti_mixer_debugfs_init(struct sti_mixer *mixer, struct drm_minor *minor)
 {
 	unsigned int i;
 	struct drm_info_list *mixer_debugfs_files;
@@ -194,15 +194,15 @@ int sti_mixer_debugfs_init(struct sti_mixer *mixer, struct drm_minor *minor)
 		nb_files = ARRAY_SIZE(mixer1_debugfs_files);
 		break;
 	default:
-		return -EINVAL;
+		return;
 	}
 
 	for (i = 0; i < nb_files; i++)
 		mixer_debugfs_files[i].data = mixer;
 
-	return drm_debugfs_create_files(mixer_debugfs_files,
-					nb_files,
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(mixer_debugfs_files,
+				 nb_files,
+				 minor->debugfs_root, minor);
 }
 
 void sti_mixer_set_background_status(struct sti_mixer *mixer, bool enable)
diff --git a/drivers/gpu/drm/sti/sti_mixer.h b/drivers/gpu/drm/sti/sti_mixer.h
index d9544246913a..ab06beb7b258 100644
--- a/drivers/gpu/drm/sti/sti_mixer.h
+++ b/drivers/gpu/drm/sti/sti_mixer.h
@@ -58,7 +58,7 @@ int sti_mixer_active_video_area(struct sti_mixer *mixer,
 
 void sti_mixer_set_background_status(struct sti_mixer *mixer, bool enable);
 
-int sti_mixer_debugfs_init(struct sti_mixer *mixer, struct drm_minor *minor);
+void sti_mixer_debugfs_init(struct sti_mixer *mixer, struct drm_minor *minor);
 
 /* depth in Cross-bar control = z order */
 #define GAM_MIXER_NB_DEPTH_LEVEL 6
diff --git a/drivers/gpu/drm/sti/sti_tvout.c b/drivers/gpu/drm/sti/sti_tvout.c
index c36a8da373cb..df3817f0fd30 100644
--- a/drivers/gpu/drm/sti/sti_tvout.c
+++ b/drivers/gpu/drm/sti/sti_tvout.c
@@ -570,16 +570,16 @@ static struct drm_info_list tvout_debugfs_files[] = {
 	{ "tvout", tvout_dbg_show, 0, NULL },
 };
 
-static int tvout_debugfs_init(struct sti_tvout *tvout, struct drm_minor *minor)
+static void tvout_debugfs_init(struct sti_tvout *tvout, struct drm_minor *minor)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(tvout_debugfs_files); i++)
 		tvout_debugfs_files[i].data = tvout;
 
-	return drm_debugfs_create_files(tvout_debugfs_files,
-					ARRAY_SIZE(tvout_debugfs_files),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(tvout_debugfs_files,
+				 ARRAY_SIZE(tvout_debugfs_files),
+				 minor->debugfs_root, minor);
 }
 
 static void sti_tvout_encoder_dpms(struct drm_encoder *encoder, int mode)
@@ -603,14 +603,11 @@ static void sti_tvout_encoder_destroy(struct drm_encoder *encoder)
 static int sti_tvout_late_register(struct drm_encoder *encoder)
 {
 	struct sti_tvout *tvout = to_sti_tvout(encoder);
-	int ret;
 
 	if (tvout->debugfs_registered)
 		return 0;
 
-	ret = tvout_debugfs_init(tvout, encoder->dev->primary);
-	if (ret)
-		return ret;
+	tvout_debugfs_init(tvout, encoder->dev->primary);
 
 	tvout->debugfs_registered = true;
 	return 0;
diff --git a/drivers/gpu/drm/sti/sti_vid.c b/drivers/gpu/drm/sti/sti_vid.c
index 2d4230410464..2d818397918d 100644
--- a/drivers/gpu/drm/sti/sti_vid.c
+++ b/drivers/gpu/drm/sti/sti_vid.c
@@ -124,16 +124,16 @@ static struct drm_info_list vid_debugfs_files[] = {
 	{ "vid", vid_dbg_show, 0, NULL },
 };
 
-int vid_debugfs_init(struct sti_vid *vid, struct drm_minor *minor)
+void vid_debugfs_init(struct sti_vid *vid, struct drm_minor *minor)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(vid_debugfs_files); i++)
 		vid_debugfs_files[i].data = vid;
 
-	return drm_debugfs_create_files(vid_debugfs_files,
-					ARRAY_SIZE(vid_debugfs_files),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(vid_debugfs_files,
+				 ARRAY_SIZE(vid_debugfs_files),
+				 minor->debugfs_root, minor);
 }
 
 void sti_vid_commit(struct sti_vid *vid,
diff --git a/drivers/gpu/drm/sti/sti_vid.h b/drivers/gpu/drm/sti/sti_vid.h
index 9dbd78461de1..991849ba50b5 100644
--- a/drivers/gpu/drm/sti/sti_vid.h
+++ b/drivers/gpu/drm/sti/sti_vid.h
@@ -26,6 +26,6 @@ void sti_vid_disable(struct sti_vid *vid);
 struct sti_vid *sti_vid_create(struct device *dev, struct drm_device *drm_dev,
 			       int id, void __iomem *baseaddr);
 
-int vid_debugfs_init(struct sti_vid *vid, struct drm_minor *minor);
+void vid_debugfs_init(struct sti_vid *vid, struct drm_minor *minor);
 
 #endif
diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c
index ea9fcbdc68b3..0f85dd86cafa 100644
--- a/drivers/gpu/drm/stm/drv.c
+++ b/drivers/gpu/drm/stm/drv.c
@@ -88,7 +88,9 @@ static int drv_load(struct drm_device *ddev)
 
 	ddev->dev_private = (void *)ldev;
 
-	drm_mode_config_init(ddev);
+	ret = drmm_mode_config_init(ddev);
+	if (ret)
+		return ret;
 
 	/*
 	 * set max width and height as default value.
@@ -103,7 +105,7 @@ static int drv_load(struct drm_device *ddev)
 
 	ret = ltdc_load(ddev);
 	if (ret)
-		goto err;
+		return ret;
 
 	drm_mode_config_reset(ddev);
 	drm_kms_helper_poll_init(ddev);
@@ -111,9 +113,6 @@ static int drv_load(struct drm_device *ddev)
 	platform_set_drvdata(pdev, ddev);
 
 	return 0;
-err:
-	drm_mode_config_cleanup(ddev);
-	return ret;
 }
 
 static void drv_unload(struct drm_device *ddev)
@@ -122,7 +121,6 @@ static void drv_unload(struct drm_device *ddev)
 
 	drm_kms_helper_poll_fini(ddev);
 	ltdc_unload(ddev);
-	drm_mode_config_cleanup(ddev);
 }
 
 static __maybe_unused int drv_suspend(struct device *dev)
diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index df585fe64f61..f894968d6e45 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -42,8 +42,6 @@
 
 #define MAX_IRQ 4
 
-#define MAX_ENDPOINTS 2
-
 #define HWVER_10200 0x010200
 #define HWVER_10300 0x010300
 #define HWVER_20101 0x020101
@@ -1201,36 +1199,20 @@ int ltdc_load(struct drm_device *ddev)
 	struct ltdc_device *ldev = ddev->dev_private;
 	struct device *dev = ddev->dev;
 	struct device_node *np = dev->of_node;
-	struct drm_bridge *bridge[MAX_ENDPOINTS] = {NULL};
-	struct drm_panel *panel[MAX_ENDPOINTS] = {NULL};
+	struct drm_bridge *bridge;
+	struct drm_panel *panel;
 	struct drm_crtc *crtc;
 	struct reset_control *rstc;
 	struct resource *res;
-	int irq, ret, i, endpoint_not_ready = -ENODEV;
+	int irq, i, nb_endpoints;
+	int ret = -ENODEV;
 
 	DRM_DEBUG_DRIVER("\n");
 
-	/* Get endpoints if any */
-	for (i = 0; i < MAX_ENDPOINTS; i++) {
-		ret = drm_of_find_panel_or_bridge(np, 0, i, &panel[i],
-						  &bridge[i]);
-
-		/*
-		 * If at least one endpoint is -EPROBE_DEFER, defer probing,
-		 * else if at least one endpoint is ready, continue probing.
-		 */
-		if (ret == -EPROBE_DEFER)
-			return ret;
-		else if (!ret)
-			endpoint_not_ready = 0;
-	}
-
-	if (endpoint_not_ready)
-		return endpoint_not_ready;
-
-	rstc = devm_reset_control_get_exclusive(dev, NULL);
-
-	mutex_init(&ldev->err_lock);
+	/* Get number of endpoints */
+	nb_endpoints = of_graph_get_endpoint_count(np);
+	if (!nb_endpoints)
+		return -ENODEV;
 
 	ldev->pixel_clk = devm_clk_get(dev, "lcd");
 	if (IS_ERR(ldev->pixel_clk)) {
@@ -1244,6 +1226,43 @@ int ltdc_load(struct drm_device *ddev)
 		return -ENODEV;
 	}
 
+	/* Get endpoints if any */
+	for (i = 0; i < nb_endpoints; i++) {
+		ret = drm_of_find_panel_or_bridge(np, 0, i, &panel, &bridge);
+
+		/*
+		 * If at least one endpoint is -ENODEV, continue probing,
+		 * else if at least one endpoint returned an error
+		 * (ie -EPROBE_DEFER) then stop probing.
+		 */
+		if (ret == -ENODEV)
+			continue;
+		else if (ret)
+			goto err;
+
+		if (panel) {
+			bridge = drm_panel_bridge_add_typed(panel,
+							    DRM_MODE_CONNECTOR_DPI);
+			if (IS_ERR(bridge)) {
+				DRM_ERROR("panel-bridge endpoint %d\n", i);
+				ret = PTR_ERR(bridge);
+				goto err;
+			}
+		}
+
+		if (bridge) {
+			ret = ltdc_encoder_init(ddev, bridge);
+			if (ret) {
+				DRM_ERROR("init encoder endpoint %d\n", i);
+				goto err;
+			}
+		}
+	}
+
+	rstc = devm_reset_control_get_exclusive(dev, NULL);
+
+	mutex_init(&ldev->err_lock);
+
 	if (!IS_ERR(rstc)) {
 		reset_control_assert(rstc);
 		usleep_range(10, 20);
@@ -1285,27 +1304,7 @@ int ltdc_load(struct drm_device *ddev)
 			DRM_ERROR("Failed to register LTDC interrupt\n");
 			goto err;
 		}
-	}
 
-	/* Add endpoints panels or bridges if any */
-	for (i = 0; i < MAX_ENDPOINTS; i++) {
-		if (panel[i]) {
-			bridge[i] = drm_panel_bridge_add_typed(panel[i],
-							       DRM_MODE_CONNECTOR_DPI);
-			if (IS_ERR(bridge[i])) {
-				DRM_ERROR("panel-bridge endpoint %d\n", i);
-				ret = PTR_ERR(bridge[i]);
-				goto err;
-			}
-		}
-
-		if (bridge[i]) {
-			ret = ltdc_encoder_init(ddev, bridge[i]);
-			if (ret) {
-				DRM_ERROR("init encoder endpoint %d\n", i);
-				goto err;
-			}
-		}
 	}
 
 	crtc = devm_kzalloc(dev, sizeof(*crtc), GFP_KERNEL);
@@ -1340,8 +1339,8 @@ int ltdc_load(struct drm_device *ddev)
 
 	return 0;
 err:
-	for (i = 0; i < MAX_ENDPOINTS; i++)
-		drm_panel_bridge_remove(bridge[i]);
+	for (i = 0; i < nb_endpoints; i++)
+		drm_of_panel_bridge_remove(ddev->dev->of_node, 0, i);
 
 	clk_disable_unprepare(ldev->pixel_clk);
 
@@ -1350,11 +1349,14 @@ err:
 
 void ltdc_unload(struct drm_device *ddev)
 {
-	int i;
+	struct device *dev = ddev->dev;
+	int nb_endpoints, i;
 
 	DRM_DEBUG_DRIVER("\n");
 
-	for (i = 0; i < MAX_ENDPOINTS; i++)
+	nb_endpoints = of_graph_get_endpoint_count(dev->of_node);
+
+	for (i = 0; i < nb_endpoints; i++)
 		drm_of_panel_bridge_remove(ddev->dev->of_node, 0, i);
 
 	pm_runtime_disable(ddev->dev);
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index 68d4644ac2dc..e324d7db7b7d 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -22,6 +22,7 @@
 #include <drm/drm_panel.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "sun4i_backend.h"
 #include "sun4i_crtc.h"
@@ -204,10 +205,6 @@ static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = {
 	.mode_valid	= sun4i_hdmi_mode_valid,
 };
 
-static const struct drm_encoder_funcs sun4i_hdmi_funcs = {
-	.destroy	= drm_encoder_cleanup,
-};
-
 static int sun4i_hdmi_get_modes(struct drm_connector *connector)
 {
 	struct sun4i_hdmi *hdmi = drm_connector_to_sun4i_hdmi(connector);
@@ -611,11 +608,8 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master,
 
 	drm_encoder_helper_add(&hdmi->encoder,
 			       &sun4i_hdmi_helper_funcs);
-	ret = drm_encoder_init(drm,
-			       &hdmi->encoder,
-			       &sun4i_hdmi_funcs,
-			       DRM_MODE_ENCODER_TMDS,
-			       NULL);
+	ret = drm_simple_encoder_init(drm, &hdmi->encoder,
+				      DRM_MODE_ENCODER_TMDS);
 	if (ret) {
 		dev_err(dev, "Couldn't initialise the HDMI encoder\n");
 		goto err_put_ddc_i2c;
diff --git a/drivers/gpu/drm/sun4i/sun4i_lvds.c b/drivers/gpu/drm/sun4i/sun4i_lvds.c
index 26e5c7ceb8ff..ffda3184aa12 100644
--- a/drivers/gpu/drm/sun4i/sun4i_lvds.c
+++ b/drivers/gpu/drm/sun4i/sun4i_lvds.c
@@ -12,6 +12,7 @@
 #include <drm/drm_panel.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "sun4i_crtc.h"
 #include "sun4i_tcon.h"
@@ -96,10 +97,6 @@ static const struct drm_encoder_helper_funcs sun4i_lvds_enc_helper_funcs = {
 	.enable		= sun4i_lvds_encoder_enable,
 };
 
-static const struct drm_encoder_funcs sun4i_lvds_enc_funcs = {
-	.destroy	= drm_encoder_cleanup,
-};
-
 int sun4i_lvds_init(struct drm_device *drm, struct sun4i_tcon *tcon)
 {
 	struct drm_encoder *encoder;
@@ -121,11 +118,8 @@ int sun4i_lvds_init(struct drm_device *drm, struct sun4i_tcon *tcon)
 
 	drm_encoder_helper_add(&lvds->encoder,
 			       &sun4i_lvds_enc_helper_funcs);
-	ret = drm_encoder_init(drm,
-			       &lvds->encoder,
-			       &sun4i_lvds_enc_funcs,
-			       DRM_MODE_ENCODER_LVDS,
-			       NULL);
+	ret = drm_simple_encoder_init(drm, &lvds->encoder,
+				      DRM_MODE_ENCODER_LVDS);
 	if (ret) {
 		dev_err(drm->dev, "Couldn't initialise the lvds encoder\n");
 		goto err_out;
diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index 3b23d5be3cf3..5a7d43939ae6 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -14,6 +14,7 @@
 #include <drm/drm_panel.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "sun4i_crtc.h"
 #include "sun4i_tcon.h"
@@ -188,15 +189,6 @@ static struct drm_encoder_helper_funcs sun4i_rgb_enc_helper_funcs = {
 	.mode_valid	= sun4i_rgb_mode_valid,
 };
 
-static void sun4i_rgb_enc_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static struct drm_encoder_funcs sun4i_rgb_enc_funcs = {
-	.destroy	= sun4i_rgb_enc_destroy,
-};
-
 int sun4i_rgb_init(struct drm_device *drm, struct sun4i_tcon *tcon)
 {
 	struct drm_encoder *encoder;
@@ -218,11 +210,8 @@ int sun4i_rgb_init(struct drm_device *drm, struct sun4i_tcon *tcon)
 
 	drm_encoder_helper_add(&rgb->encoder,
 			       &sun4i_rgb_enc_helper_funcs);
-	ret = drm_encoder_init(drm,
-			       &rgb->encoder,
-			       &sun4i_rgb_enc_funcs,
-			       DRM_MODE_ENCODER_NONE,
-			       NULL);
+	ret = drm_simple_encoder_init(drm, &rgb->encoder,
+				      DRM_MODE_ENCODER_NONE);
 	if (ret) {
 		dev_err(drm->dev, "Couldn't initialise the rgb encoder\n");
 		goto err_out;
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 624437b27cdc..359b56e43b83 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -812,10 +812,8 @@ static int sun4i_tcon_init_irq(struct device *dev,
 	int irq, ret;
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "Couldn't retrieve the TCON interrupt\n");
+	if (irq < 0)
 		return irq;
-	}
 
 	ret = devm_request_irq(dev, irq, sun4i_tcon_handler, 0,
 			       dev_name(dev), tcon);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tv.c b/drivers/gpu/drm/sun4i/sun4i_tv.c
index 39c15282e448..63f4428ac3bf 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tv.c
@@ -19,6 +19,7 @@
 #include <drm/drm_panel.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "sun4i_crtc.h"
 #include "sun4i_drv.h"
@@ -473,15 +474,6 @@ static struct drm_encoder_helper_funcs sun4i_tv_helper_funcs = {
 	.mode_set	= sun4i_tv_mode_set,
 };
 
-static void sun4i_tv_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static struct drm_encoder_funcs sun4i_tv_funcs = {
-	.destroy	= sun4i_tv_destroy,
-};
-
 static int sun4i_tv_comp_get_modes(struct drm_connector *connector)
 {
 	int i;
@@ -592,11 +584,8 @@ static int sun4i_tv_bind(struct device *dev, struct device *master,
 
 	drm_encoder_helper_add(&tv->encoder,
 			       &sun4i_tv_helper_funcs);
-	ret = drm_encoder_init(drm,
-			       &tv->encoder,
-			       &sun4i_tv_funcs,
-			       DRM_MODE_ENCODER_TVDAC,
-			       NULL);
+	ret = drm_simple_encoder_init(drm, &tv->encoder,
+				      DRM_MODE_ENCODER_TVDAC);
 	if (ret) {
 		dev_err(dev, "Couldn't initialise the TV encoder\n");
 		goto err_disable_clk;
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
index 3eb89f1eb0e1..aa67cb037e9d 100644
--- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
@@ -24,6 +24,7 @@
 #include <drm/drm_panel.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "sun4i_crtc.h"
 #include "sun4i_tcon.h"
@@ -846,10 +847,6 @@ static const struct drm_encoder_helper_funcs sun6i_dsi_enc_helper_funcs = {
 	.enable		= sun6i_dsi_encoder_enable,
 };
 
-static const struct drm_encoder_funcs sun6i_dsi_enc_funcs = {
-	.destroy	= drm_encoder_cleanup,
-};
-
 static u32 sun6i_dsi_dcs_build_pkt_hdr(struct sun6i_dsi *dsi,
 				       const struct mipi_dsi_msg *msg)
 {
@@ -1062,11 +1059,8 @@ static int sun6i_dsi_bind(struct device *dev, struct device *master,
 
 	drm_encoder_helper_add(&dsi->encoder,
 			       &sun6i_dsi_enc_helper_funcs);
-	ret = drm_encoder_init(drm,
-			       &dsi->encoder,
-			       &sun6i_dsi_enc_funcs,
-			       DRM_MODE_ENCODER_DSI,
-			       NULL);
+	ret = drm_simple_encoder_init(drm, &dsi->encoder,
+				      DRM_MODE_ENCODER_DSI);
 	if (ret) {
 		dev_err(dsi->dev, "Couldn't initialise the DSI encoder\n");
 		return ret;
diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
index e8a317d5ba19..972682bb8000 100644
--- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
+++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
@@ -10,6 +10,7 @@
 
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_of.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "sun8i_dw_hdmi.h"
 #include "sun8i_tcon_top.h"
@@ -29,10 +30,6 @@ sun8i_dw_hdmi_encoder_helper_funcs = {
 	.mode_set = sun8i_dw_hdmi_encoder_mode_set,
 };
 
-static const struct drm_encoder_funcs sun8i_dw_hdmi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static enum drm_mode_status
 sun8i_dw_hdmi_mode_valid_a83t(struct drm_connector *connector,
 			      const struct drm_display_mode *mode)
@@ -220,8 +217,7 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master,
 	}
 
 	drm_encoder_helper_add(encoder, &sun8i_dw_hdmi_encoder_helper_funcs);
-	drm_encoder_init(drm, encoder, &sun8i_dw_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
 
 	sun8i_hdmi_phy_init(hdmi->phy);
 
diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.c b/drivers/gpu/drm/sun4i/sun8i_mixer.c
index 4a64f7ae437a..56cc037fd312 100644
--- a/drivers/gpu/drm/sun4i/sun8i_mixer.c
+++ b/drivers/gpu/drm/sun4i/sun8i_mixer.c
@@ -27,314 +27,225 @@
 #include "sun8i_vi_layer.h"
 #include "sunxi_engine.h"
 
+struct de2_fmt_info {
+	u32	drm_fmt;
+	u32	de2_fmt;
+};
+
 static const struct de2_fmt_info de2_formats[] = {
 	{
 		.drm_fmt = DRM_FORMAT_ARGB8888,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ARGB8888,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_ABGR8888,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ABGR8888,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_RGBA8888,
 		.de2_fmt = SUN8I_MIXER_FBFMT_RGBA8888,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_BGRA8888,
 		.de2_fmt = SUN8I_MIXER_FBFMT_BGRA8888,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_XRGB8888,
 		.de2_fmt = SUN8I_MIXER_FBFMT_XRGB8888,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_XBGR8888,
 		.de2_fmt = SUN8I_MIXER_FBFMT_XBGR8888,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_RGBX8888,
 		.de2_fmt = SUN8I_MIXER_FBFMT_RGBX8888,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_BGRX8888,
 		.de2_fmt = SUN8I_MIXER_FBFMT_BGRX8888,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_RGB888,
 		.de2_fmt = SUN8I_MIXER_FBFMT_RGB888,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_BGR888,
 		.de2_fmt = SUN8I_MIXER_FBFMT_BGR888,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_RGB565,
 		.de2_fmt = SUN8I_MIXER_FBFMT_RGB565,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_BGR565,
 		.de2_fmt = SUN8I_MIXER_FBFMT_BGR565,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_ARGB4444,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ARGB4444,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		/* for DE2 VI layer which ignores alpha */
 		.drm_fmt = DRM_FORMAT_XRGB4444,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ARGB4444,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_ABGR4444,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ABGR4444,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		/* for DE2 VI layer which ignores alpha */
 		.drm_fmt = DRM_FORMAT_XBGR4444,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ABGR4444,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_RGBA4444,
 		.de2_fmt = SUN8I_MIXER_FBFMT_RGBA4444,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		/* for DE2 VI layer which ignores alpha */
 		.drm_fmt = DRM_FORMAT_RGBX4444,
 		.de2_fmt = SUN8I_MIXER_FBFMT_RGBA4444,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_BGRA4444,
 		.de2_fmt = SUN8I_MIXER_FBFMT_BGRA4444,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		/* for DE2 VI layer which ignores alpha */
 		.drm_fmt = DRM_FORMAT_BGRX4444,
 		.de2_fmt = SUN8I_MIXER_FBFMT_BGRA4444,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_ARGB1555,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ARGB1555,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		/* for DE2 VI layer which ignores alpha */
 		.drm_fmt = DRM_FORMAT_XRGB1555,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ARGB1555,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_ABGR1555,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ABGR1555,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		/* for DE2 VI layer which ignores alpha */
 		.drm_fmt = DRM_FORMAT_XBGR1555,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ABGR1555,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_RGBA5551,
 		.de2_fmt = SUN8I_MIXER_FBFMT_RGBA5551,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		/* for DE2 VI layer which ignores alpha */
 		.drm_fmt = DRM_FORMAT_RGBX5551,
 		.de2_fmt = SUN8I_MIXER_FBFMT_RGBA5551,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_BGRA5551,
 		.de2_fmt = SUN8I_MIXER_FBFMT_BGRA5551,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		/* for DE2 VI layer which ignores alpha */
 		.drm_fmt = DRM_FORMAT_BGRX5551,
 		.de2_fmt = SUN8I_MIXER_FBFMT_BGRA5551,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_ARGB2101010,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ARGB2101010,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_ABGR2101010,
 		.de2_fmt = SUN8I_MIXER_FBFMT_ABGR2101010,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_RGBA1010102,
 		.de2_fmt = SUN8I_MIXER_FBFMT_RGBA1010102,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_BGRA1010102,
 		.de2_fmt = SUN8I_MIXER_FBFMT_BGRA1010102,
-		.rgb = true,
-		.csc = SUN8I_CSC_MODE_OFF,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_UYVY,
 		.de2_fmt = SUN8I_MIXER_FBFMT_UYVY,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_VYUY,
 		.de2_fmt = SUN8I_MIXER_FBFMT_VYUY,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_YUYV,
 		.de2_fmt = SUN8I_MIXER_FBFMT_YUYV,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_YVYU,
 		.de2_fmt = SUN8I_MIXER_FBFMT_YVYU,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_NV16,
 		.de2_fmt = SUN8I_MIXER_FBFMT_NV16,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_NV61,
 		.de2_fmt = SUN8I_MIXER_FBFMT_NV61,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_NV12,
 		.de2_fmt = SUN8I_MIXER_FBFMT_NV12,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_NV21,
 		.de2_fmt = SUN8I_MIXER_FBFMT_NV21,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_YUV422,
 		.de2_fmt = SUN8I_MIXER_FBFMT_YUV422,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_YUV420,
 		.de2_fmt = SUN8I_MIXER_FBFMT_YUV420,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_YUV411,
 		.de2_fmt = SUN8I_MIXER_FBFMT_YUV411,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_YVU422,
 		.de2_fmt = SUN8I_MIXER_FBFMT_YUV422,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YVU2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_YVU420,
 		.de2_fmt = SUN8I_MIXER_FBFMT_YUV420,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YVU2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_YVU411,
 		.de2_fmt = SUN8I_MIXER_FBFMT_YUV411,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YVU2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_P010,
 		.de2_fmt = SUN8I_MIXER_FBFMT_P010_YUV,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 	{
 		.drm_fmt = DRM_FORMAT_P210,
 		.de2_fmt = SUN8I_MIXER_FBFMT_P210_YUV,
-		.rgb = false,
-		.csc = SUN8I_CSC_MODE_YUV2RGB,
 	},
 };
 
-const struct de2_fmt_info *sun8i_mixer_format_info(u32 format)
+int sun8i_mixer_drm_format_to_hw(u32 format, u32 *hw_format)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(de2_formats); ++i)
-		if (de2_formats[i].drm_fmt == format)
-			return &de2_formats[i];
+		if (de2_formats[i].drm_fmt == format) {
+			*hw_format = de2_formats[i].de2_fmt;
+			return 0;
+		}
 
-	return NULL;
+	return -EINVAL;
 }
 
 static void sun8i_mixer_commit(struct sunxi_engine *engine)
diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h
index 345b28b0a80a..7576b523fdbb 100644
--- a/drivers/gpu/drm/sun4i/sun8i_mixer.h
+++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h
@@ -10,7 +10,6 @@
 #include <linux/regmap.h>
 #include <linux/reset.h>
 
-#include "sun8i_csc.h"
 #include "sunxi_engine.h"
 
 #define SUN8I_MIXER_SIZE(w, h)			(((h) - 1) << 16 | ((w) - 1))
@@ -144,13 +143,6 @@
 #define SUN50I_MIXER_CDC0_EN			0xd0000
 #define SUN50I_MIXER_CDC1_EN			0xd8000
 
-struct de2_fmt_info {
-	u32			drm_fmt;
-	u32			de2_fmt;
-	bool			rgb;
-	enum sun8i_csc_mode	csc;
-};
-
 /**
  * struct sun8i_mixer_cfg - mixer HW configuration
  * @vi_num: number of VI channels
@@ -210,5 +202,5 @@ sun8i_channel_base(struct sun8i_mixer *mixer, int channel)
 		return DE2_CH_BASE + channel * DE2_CH_SIZE;
 }
 
-const struct de2_fmt_info *sun8i_mixer_format_info(u32 format);
+int sun8i_mixer_drm_format_to_hw(u32 format, u32 *hw_format);
 #endif /* _SUN8I_MIXER_H_ */
diff --git a/drivers/gpu/drm/sun4i/sun8i_ui_layer.c b/drivers/gpu/drm/sun4i/sun8i_ui_layer.c
index c87fd842918e..54f937a7d5e7 100644
--- a/drivers/gpu/drm/sun4i/sun8i_ui_layer.c
+++ b/drivers/gpu/drm/sun4i/sun8i_ui_layer.c
@@ -19,8 +19,8 @@
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
 
-#include "sun8i_ui_layer.h"
 #include "sun8i_mixer.h"
+#include "sun8i_ui_layer.h"
 #include "sun8i_ui_scaler.h"
 
 static void sun8i_ui_layer_enable(struct sun8i_mixer *mixer, int channel,
@@ -174,18 +174,20 @@ static int sun8i_ui_layer_update_formats(struct sun8i_mixer *mixer, int channel,
 					 int overlay, struct drm_plane *plane)
 {
 	struct drm_plane_state *state = plane->state;
-	const struct de2_fmt_info *fmt_info;
-	u32 val, ch_base;
+	const struct drm_format_info *fmt;
+	u32 val, ch_base, hw_fmt;
+	int ret;
 
 	ch_base = sun8i_channel_base(mixer, channel);
 
-	fmt_info = sun8i_mixer_format_info(state->fb->format->format);
-	if (!fmt_info || !fmt_info->rgb) {
+	fmt = state->fb->format;
+	ret = sun8i_mixer_drm_format_to_hw(fmt->format, &hw_fmt);
+	if (ret || fmt->is_yuv) {
 		DRM_DEBUG_DRIVER("Invalid format\n");
 		return -EINVAL;
 	}
 
-	val = fmt_info->de2_fmt << SUN8I_MIXER_CHAN_UI_LAYER_ATTR_FBFMT_OFFSET;
+	val = hw_fmt << SUN8I_MIXER_CHAN_UI_LAYER_ATTR_FBFMT_OFFSET;
 	regmap_update_bits(mixer->engine.regs,
 			   SUN8I_MIXER_CHAN_UI_LAYER_ATTR(ch_base, overlay),
 			   SUN8I_MIXER_CHAN_UI_LAYER_ATTR_FBFMT_MASK, val);
diff --git a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c
index b8398ca18b0f..22c8c5375d0d 100644
--- a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c
+++ b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c
@@ -12,8 +12,9 @@
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
 
-#include "sun8i_vi_layer.h"
+#include "sun8i_csc.h"
 #include "sun8i_mixer.h"
+#include "sun8i_vi_layer.h"
 #include "sun8i_vi_scaler.h"
 
 static void sun8i_vi_layer_enable(struct sun8i_mixer *mixer, int channel,
@@ -210,28 +211,47 @@ static int sun8i_vi_layer_update_coord(struct sun8i_mixer *mixer, int channel,
 	return 0;
 }
 
+static bool sun8i_vi_layer_get_csc_mode(const struct drm_format_info *format)
+{
+	if (!format->is_yuv)
+		return SUN8I_CSC_MODE_OFF;
+
+	switch (format->format) {
+	case DRM_FORMAT_YVU411:
+	case DRM_FORMAT_YVU420:
+	case DRM_FORMAT_YVU422:
+	case DRM_FORMAT_YVU444:
+		return SUN8I_CSC_MODE_YVU2RGB;
+	default:
+		return SUN8I_CSC_MODE_YUV2RGB;
+	}
+}
+
 static int sun8i_vi_layer_update_formats(struct sun8i_mixer *mixer, int channel,
 					 int overlay, struct drm_plane *plane)
 {
 	struct drm_plane_state *state = plane->state;
-	const struct de2_fmt_info *fmt_info;
-	u32 val, ch_base;
+	u32 val, ch_base, csc_mode, hw_fmt;
+	const struct drm_format_info *fmt;
+	int ret;
 
 	ch_base = sun8i_channel_base(mixer, channel);
 
-	fmt_info = sun8i_mixer_format_info(state->fb->format->format);
-	if (!fmt_info) {
+	fmt = state->fb->format;
+	ret = sun8i_mixer_drm_format_to_hw(fmt->format, &hw_fmt);
+	if (ret) {
 		DRM_DEBUG_DRIVER("Invalid format\n");
-		return -EINVAL;
+		return ret;
 	}
 
-	val = fmt_info->de2_fmt << SUN8I_MIXER_CHAN_VI_LAYER_ATTR_FBFMT_OFFSET;
+	val = hw_fmt << SUN8I_MIXER_CHAN_VI_LAYER_ATTR_FBFMT_OFFSET;
 	regmap_update_bits(mixer->engine.regs,
 			   SUN8I_MIXER_CHAN_VI_LAYER_ATTR(ch_base, overlay),
 			   SUN8I_MIXER_CHAN_VI_LAYER_ATTR_FBFMT_MASK, val);
 
-	if (fmt_info->csc != SUN8I_CSC_MODE_OFF) {
-		sun8i_csc_set_ccsc_coefficients(mixer, channel, fmt_info->csc,
+	csc_mode = sun8i_vi_layer_get_csc_mode(fmt);
+	if (csc_mode != SUN8I_CSC_MODE_OFF) {
+		sun8i_csc_set_ccsc_coefficients(mixer, channel, csc_mode,
 						state->color_encoding,
 						state->color_range);
 		sun8i_csc_enable_ccsc(mixer, channel, true);
@@ -239,7 +259,7 @@ static int sun8i_vi_layer_update_formats(struct sun8i_mixer *mixer, int channel,
 		sun8i_csc_enable_ccsc(mixer, channel, false);
 	}
 
-	if (fmt_info->rgb)
+	if (!fmt->is_yuv)
 		val = SUN8I_MIXER_CHAN_VI_LAYER_ATTR_RGB_MODE;
 	else
 		val = 0;
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 1a7b08f35776..83f31c6e891c 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -1496,7 +1496,6 @@ static int tegra_dc_late_register(struct drm_crtc *crtc)
 	struct drm_minor *minor = crtc->dev->primary;
 	struct dentry *root;
 	struct tegra_dc *dc = to_tegra_dc(crtc);
-	int err;
 
 #ifdef CONFIG_DEBUG_FS
 	root = crtc->debugfs_entry;
@@ -1512,17 +1511,9 @@ static int tegra_dc_late_register(struct drm_crtc *crtc)
 	for (i = 0; i < count; i++)
 		dc->debugfs_files[i].data = dc;
 
-	err = drm_debugfs_create_files(dc->debugfs_files, count, root, minor);
-	if (err < 0)
-		goto free;
+	drm_debugfs_create_files(dc->debugfs_files, count, root, minor);
 
 	return 0;
-
-free:
-	kfree(dc->debugfs_files);
-	dc->debugfs_files = NULL;
-
-	return err;
 }
 
 static void tegra_dc_early_unregister(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 7dfb50f65067..105fb9cdbb3b 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -5,12 +5,10 @@
 
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/pinctrl/pinconf-generic.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/pinctrl/pinmux.h>
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 583cd6e0ae27..211906347f3f 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -839,11 +839,11 @@ static struct drm_info_list tegra_debugfs_list[] = {
 	{ "iova", tegra_debugfs_iova, 0 },
 };
 
-static int tegra_debugfs_init(struct drm_minor *minor)
+static void tegra_debugfs_init(struct drm_minor *minor)
 {
-	return drm_debugfs_create_files(tegra_debugfs_list,
-					ARRAY_SIZE(tegra_debugfs_list),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(tegra_debugfs_list,
+				 ARRAY_SIZE(tegra_debugfs_list),
+				 minor->debugfs_root, minor);
 }
 #endif
 
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index ed99b67deb29..b25443255be6 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -9,7 +9,7 @@
 
 #include <linux/host1x.h>
 #include <linux/iova.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_edid.h>
@@ -152,8 +152,6 @@ enum drm_connector_status
 tegra_output_connector_detect(struct drm_connector *connector, bool force);
 void tegra_output_connector_destroy(struct drm_connector *connector);
 
-void tegra_output_encoder_destroy(struct drm_encoder *encoder);
-
 /* from dpaux.c */
 struct drm_dp_aux *drm_dp_aux_find_by_of_node(struct device_node *np);
 enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux);
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index 88b9d64c77bf..38beab9ab4f8 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -22,6 +22,7 @@
 #include <drm/drm_file.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "dc.h"
 #include "drm.h"
@@ -234,7 +235,6 @@ static int tegra_dsi_late_register(struct drm_connector *connector)
 	struct drm_minor *minor = connector->dev->primary;
 	struct dentry *root = connector->debugfs_entry;
 	struct tegra_dsi *dsi = to_dsi(output);
-	int err;
 
 	dsi->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
 				     GFP_KERNEL);
@@ -244,17 +244,9 @@ static int tegra_dsi_late_register(struct drm_connector *connector)
 	for (i = 0; i < count; i++)
 		dsi->debugfs_files[i].data = dsi;
 
-	err = drm_debugfs_create_files(dsi->debugfs_files, count, root, minor);
-	if (err < 0)
-		goto free;
+	drm_debugfs_create_files(dsi->debugfs_files, count, root, minor);
 
 	return 0;
-
-free:
-	kfree(dsi->debugfs_files);
-	dsi->debugfs_files = NULL;
-
-	return err;
 }
 
 static void tegra_dsi_early_unregister(struct drm_connector *connector)
@@ -824,10 +816,6 @@ static const struct drm_connector_helper_funcs tegra_dsi_connector_helper_funcs
 	.mode_valid = tegra_dsi_connector_mode_valid,
 };
 
-static const struct drm_encoder_funcs tegra_dsi_encoder_funcs = {
-	.destroy = tegra_output_encoder_destroy,
-};
-
 static void tegra_dsi_unprepare(struct tegra_dsi *dsi)
 {
 	int err;
@@ -1058,9 +1046,8 @@ static int tegra_dsi_init(struct host1x_client *client)
 					 &tegra_dsi_connector_helper_funcs);
 		dsi->output.connector.dpms = DRM_MODE_DPMS_OFF;
 
-		drm_encoder_init(drm, &dsi->output.encoder,
-				 &tegra_dsi_encoder_funcs,
-				 DRM_MODE_ENCODER_DSI, NULL);
+		drm_simple_encoder_init(drm, &dsi->output.encoder,
+					DRM_MODE_ENCODER_DSI);
 		drm_encoder_helper_add(&dsi->output.encoder,
 				       &tegra_dsi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index b8a328f53862..2b0666ac681b 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
  *
  * Based on the KMS/FB CMA helpers
- *   Copyright (C) 2012 Analog Device Inc.
+ *   Copyright (C) 2012 Analog Devices Inc.
  */
 
 #include <linux/console.h>
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 38252c0f068d..d09a24931c87 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -7,7 +7,6 @@
 #include <linux/clk.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
 #include <linux/hdmi.h>
 #include <linux/math64.h>
 #include <linux/module.h>
@@ -22,6 +21,7 @@
 #include <drm/drm_file.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "hda.h"
 #include "hdmi.h"
@@ -1064,7 +1064,6 @@ static int tegra_hdmi_late_register(struct drm_connector *connector)
 	struct drm_minor *minor = connector->dev->primary;
 	struct dentry *root = connector->debugfs_entry;
 	struct tegra_hdmi *hdmi = to_hdmi(output);
-	int err;
 
 	hdmi->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
 				      GFP_KERNEL);
@@ -1074,17 +1073,9 @@ static int tegra_hdmi_late_register(struct drm_connector *connector)
 	for (i = 0; i < count; i++)
 		hdmi->debugfs_files[i].data = hdmi;
 
-	err = drm_debugfs_create_files(hdmi->debugfs_files, count, root, minor);
-	if (err < 0)
-		goto free;
+	drm_debugfs_create_files(hdmi->debugfs_files, count, root, minor);
 
 	return 0;
-
-free:
-	kfree(hdmi->debugfs_files);
-	hdmi->debugfs_files = NULL;
-
-	return err;
 }
 
 static void tegra_hdmi_early_unregister(struct drm_connector *connector)
@@ -1136,10 +1127,6 @@ tegra_hdmi_connector_helper_funcs = {
 	.mode_valid = tegra_hdmi_connector_mode_valid,
 };
 
-static const struct drm_encoder_funcs tegra_hdmi_encoder_funcs = {
-	.destroy = tegra_output_encoder_destroy,
-};
-
 static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
 {
 	struct tegra_output *output = encoder_to_output(encoder);
@@ -1445,8 +1432,8 @@ static int tegra_hdmi_init(struct host1x_client *client)
 				 &tegra_hdmi_connector_helper_funcs);
 	hdmi->output.connector.dpms = DRM_MODE_DPMS_OFF;
 
-	drm_encoder_init(drm, &hdmi->output.encoder, &tegra_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm, &hdmi->output.encoder,
+				DRM_MODE_ENCODER_TMDS);
 	drm_encoder_helper_add(&hdmi->output.encoder,
 			       &tegra_hdmi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index a264259b97a2..e36e5e7c2f69 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -6,6 +6,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "drm.h"
 #include "dc.h"
@@ -79,11 +80,6 @@ void tegra_output_connector_destroy(struct drm_connector *connector)
 	drm_connector_cleanup(connector);
 }
 
-void tegra_output_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
 static irqreturn_t hpd_irq(int irq, void *data)
 {
 	struct tegra_output *output = data;
diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c
index 4be4dfd4a68a..0562a7eb793f 100644
--- a/drivers/gpu/drm/tegra/rgb.c
+++ b/drivers/gpu/drm/tegra/rgb.c
@@ -8,6 +8,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "drm.h"
 #include "dc.h"
@@ -110,10 +111,6 @@ static const struct drm_connector_helper_funcs tegra_rgb_connector_helper_funcs
 	.mode_valid = tegra_rgb_connector_mode_valid,
 };
 
-static const struct drm_encoder_funcs tegra_rgb_encoder_funcs = {
-	.destroy = tegra_output_encoder_destroy,
-};
-
 static void tegra_rgb_encoder_disable(struct drm_encoder *encoder)
 {
 	struct tegra_output *output = encoder_to_output(encoder);
@@ -281,8 +278,7 @@ int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc)
 				 &tegra_rgb_connector_helper_funcs);
 	output->connector.dpms = DRM_MODE_DPMS_OFF;
 
-	drm_encoder_init(drm, &output->encoder, &tegra_rgb_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS, NULL);
+	drm_simple_encoder_init(drm, &output->encoder, DRM_MODE_ENCODER_LVDS);
 	drm_encoder_helper_add(&output->encoder,
 			       &tegra_rgb_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 81226a4953c1..7cbcf9617f5e 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -6,7 +6,6 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/debugfs.h>
-#include <linux/gpio.h>
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -23,6 +22,7 @@
 #include <drm/drm_file.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_scdc_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "dc.h"
 #include "dp.h"
@@ -1687,7 +1687,6 @@ static int tegra_sor_late_register(struct drm_connector *connector)
 	struct drm_minor *minor = connector->dev->primary;
 	struct dentry *root = connector->debugfs_entry;
 	struct tegra_sor *sor = to_sor(output);
-	int err;
 
 	sor->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
 				     GFP_KERNEL);
@@ -1697,17 +1696,9 @@ static int tegra_sor_late_register(struct drm_connector *connector)
 	for (i = 0; i < count; i++)
 		sor->debugfs_files[i].data = sor;
 
-	err = drm_debugfs_create_files(sor->debugfs_files, count, root, minor);
-	if (err < 0)
-		goto free;
+	drm_debugfs_create_files(sor->debugfs_files, count, root, minor);
 
 	return 0;
-
-free:
-	kfree(sor->debugfs_files);
-	sor->debugfs_files = NULL;
-
-	return err;
 }
 
 static void tegra_sor_early_unregister(struct drm_connector *connector)
@@ -1805,10 +1796,6 @@ static const struct drm_connector_helper_funcs tegra_sor_connector_helper_funcs
 	.mode_valid = tegra_sor_connector_mode_valid,
 };
 
-static const struct drm_encoder_funcs tegra_sor_encoder_funcs = {
-	.destroy = tegra_output_encoder_destroy,
-};
-
 static int
 tegra_sor_encoder_atomic_check(struct drm_encoder *encoder,
 			       struct drm_crtc_state *crtc_state,
@@ -3102,8 +3089,7 @@ static int tegra_sor_init(struct host1x_client *client)
 				 &tegra_sor_connector_helper_funcs);
 	sor->output.connector.dpms = DRM_MODE_DPMS_OFF;
 
-	drm_encoder_init(drm, &sor->output.encoder, &tegra_sor_encoder_funcs,
-			 encoder, NULL);
+	drm_simple_encoder_init(drm, &sor->output.encoder, encoder);
 	drm_encoder_helper_add(&sor->output.encoder, helpers);
 
 	drm_connector_attach_encoder(&sor->output.connector,
diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c
index 3221a707e073..89a226912de8 100644
--- a/drivers/gpu/drm/tidss/tidss_crtc.c
+++ b/drivers/gpu/drm/tidss/tidss_crtc.c
@@ -24,7 +24,7 @@
 static void tidss_crtc_finish_page_flip(struct tidss_crtc *tcrtc)
 {
 	struct drm_device *ddev = tcrtc->crtc.dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	struct drm_pending_vblank_event *event;
 	unsigned long flags;
 	bool busy;
@@ -88,7 +88,7 @@ static int tidss_crtc_atomic_check(struct drm_crtc *crtc,
 				   struct drm_crtc_state *state)
 {
 	struct drm_device *ddev = crtc->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	struct dispc_device *dispc = tidss->dispc;
 	struct tidss_crtc *tcrtc = to_tidss_crtc(crtc);
 	u32 hw_videoport = tcrtc->hw_videoport;
@@ -165,7 +165,7 @@ static void tidss_crtc_atomic_flush(struct drm_crtc *crtc,
 {
 	struct tidss_crtc *tcrtc = to_tidss_crtc(crtc);
 	struct drm_device *ddev = crtc->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	unsigned long flags;
 
 	dev_dbg(ddev->dev,
@@ -216,7 +216,7 @@ static void tidss_crtc_atomic_enable(struct drm_crtc *crtc,
 {
 	struct tidss_crtc *tcrtc = to_tidss_crtc(crtc);
 	struct drm_device *ddev = crtc->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	const struct drm_display_mode *mode = &crtc->state->adjusted_mode;
 	unsigned long flags;
 	int r;
@@ -259,7 +259,7 @@ static void tidss_crtc_atomic_disable(struct drm_crtc *crtc,
 {
 	struct tidss_crtc *tcrtc = to_tidss_crtc(crtc);
 	struct drm_device *ddev = crtc->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	unsigned long flags;
 
 	dev_dbg(ddev->dev, "%s, event %p\n", __func__, crtc->state->event);
@@ -295,7 +295,7 @@ enum drm_mode_status tidss_crtc_mode_valid(struct drm_crtc *crtc,
 {
 	struct tidss_crtc *tcrtc = to_tidss_crtc(crtc);
 	struct drm_device *ddev = crtc->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 
 	return dispc_vp_mode_valid(tidss->dispc, tcrtc->hw_videoport, mode);
 }
@@ -314,7 +314,7 @@ static const struct drm_crtc_helper_funcs tidss_crtc_helper_funcs = {
 static int tidss_crtc_enable_vblank(struct drm_crtc *crtc)
 {
 	struct drm_device *ddev = crtc->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 
 	dev_dbg(ddev->dev, "%s\n", __func__);
 
@@ -328,7 +328,7 @@ static int tidss_crtc_enable_vblank(struct drm_crtc *crtc)
 static void tidss_crtc_disable_vblank(struct drm_crtc *crtc)
 {
 	struct drm_device *ddev = crtc->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 
 	dev_dbg(ddev->dev, "%s\n", __func__);
 
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 29f42768e294..629dd06393f6 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -181,10 +181,6 @@ const struct dispc_features dispc_am65x_feats = {
 	.vid_name = { "vid", "vidl1" },
 	.vid_lite = { false, true, },
 	.vid_order = { 1, 0 },
-
-	.errata = {
-		.i2000 = true,
-	},
 };
 
 static const u16 tidss_j721e_common_regs[DISPC_COMMON_REG_TABLE_LEN] = {
@@ -2674,12 +2670,9 @@ int dispc_init(struct tidss_device *tidss)
 		return -ENOMEM;
 
 	num_fourccs = 0;
-	for (i = 0; i < ARRAY_SIZE(dispc_color_formats); ++i) {
-		if (feat->errata.i2000 &&
-		    dispc_fourcc_is_yuv(dispc_color_formats[i].fourcc))
-			continue;
+	for (i = 0; i < ARRAY_SIZE(dispc_color_formats); ++i)
 		dispc->fourccs[num_fourccs++] = dispc_color_formats[i].fourcc;
-	}
+
 	dispc->num_fourccs = num_fourccs;
 	dispc->tidss = tidss;
 	dispc->dev = dev;
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.h b/drivers/gpu/drm/tidss/tidss_dispc.h
index a4a68249e44b..902e612ff7ac 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.h
+++ b/drivers/gpu/drm/tidss/tidss_dispc.h
@@ -46,10 +46,6 @@ struct dispc_features_scaling {
 	u32 xinc_max;
 };
 
-struct dispc_errata {
-	bool i2000; /* DSS Does Not Support YUV Pixel Data Formats */
-};
-
 enum dispc_vp_bus_type {
 	DISPC_VP_DPI,		/* DPI output */
 	DISPC_VP_OLDI,		/* OLDI (LVDS) output */
@@ -83,8 +79,6 @@ struct dispc_features {
 	const char *vid_name[TIDSS_MAX_PLANES]; /* Should match dt reg names */
 	bool vid_lite[TIDSS_MAX_PLANES];
 	u32 vid_order[TIDSS_MAX_PLANES];
-
-	struct dispc_errata errata;
 };
 
 extern const struct dispc_features dispc_k2g_feats;
diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c
index d95e4be2c7b9..99edc66ebdef 100644
--- a/drivers/gpu/drm/tidss/tidss_drv.c
+++ b/drivers/gpu/drm/tidss/tidss_drv.c
@@ -17,6 +17,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_irq.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_probe_helper.h>
 
 #include "tidss_dispc.h"
@@ -102,15 +103,7 @@ static const struct dev_pm_ops tidss_pm_ops = {
 
 static void tidss_release(struct drm_device *ddev)
 {
-	struct tidss_device *tidss = ddev->dev_private;
-
 	drm_kms_helper_poll_fini(ddev);
-
-	tidss_modeset_cleanup(tidss);
-
-	drm_dev_fini(ddev);
-
-	kfree(tidss);
 }
 
 DEFINE_DRM_GEM_CMA_FOPS(tidss_fops);
@@ -142,26 +135,18 @@ static int tidss_probe(struct platform_device *pdev)
 
 	dev_dbg(dev, "%s\n", __func__);
 
-	/* Can't use devm_* since drm_device's lifetime may exceed dev's */
-	tidss = kzalloc(sizeof(*tidss), GFP_KERNEL);
-	if (!tidss)
-		return -ENOMEM;
+	tidss = devm_drm_dev_alloc(&pdev->dev, &tidss_driver,
+				   struct tidss_device, ddev);
+	if (IS_ERR(tidss))
+		return PTR_ERR(tidss);
 
 	ddev = &tidss->ddev;
 
-	ret = devm_drm_dev_init(&pdev->dev, ddev, &tidss_driver);
-	if (ret) {
-		kfree(ddev);
-		return ret;
-	}
-
 	tidss->dev = dev;
 	tidss->feat = of_device_get_match_data(dev);
 
 	platform_set_drvdata(pdev, tidss);
 
-	ddev->dev_private = tidss;
-
 	ret = dispc_init(tidss);
 	if (ret) {
 		dev_err(dev, "failed to initialize dispc: %d\n", ret);
diff --git a/drivers/gpu/drm/tidss/tidss_drv.h b/drivers/gpu/drm/tidss/tidss_drv.h
index e2aa6436ad18..3b0a3d87b7c4 100644
--- a/drivers/gpu/drm/tidss/tidss_drv.h
+++ b/drivers/gpu/drm/tidss/tidss_drv.h
@@ -29,10 +29,10 @@ struct tidss_device {
 
 	spinlock_t wait_lock;	/* protects the irq masks */
 	dispc_irq_t irq_mask;	/* enabled irqs in addition to wait_list */
-
-	struct drm_atomic_state *saved_state;
 };
 
+#define to_tidss(__dev) container_of(__dev, struct tidss_device, ddev)
+
 int tidss_runtime_get(struct tidss_device *tidss);
 void tidss_runtime_put(struct tidss_device *tidss);
 
diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c
index 612c046738e5..1b80f2d62e0a 100644
--- a/drivers/gpu/drm/tidss/tidss_irq.c
+++ b/drivers/gpu/drm/tidss/tidss_irq.c
@@ -23,7 +23,7 @@ static void tidss_irq_update(struct tidss_device *tidss)
 void tidss_irq_enable_vblank(struct drm_crtc *crtc)
 {
 	struct drm_device *ddev = crtc->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	struct tidss_crtc *tcrtc = to_tidss_crtc(crtc);
 	u32 hw_videoport = tcrtc->hw_videoport;
 	unsigned long flags;
@@ -38,7 +38,7 @@ void tidss_irq_enable_vblank(struct drm_crtc *crtc)
 void tidss_irq_disable_vblank(struct drm_crtc *crtc)
 {
 	struct drm_device *ddev = crtc->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	struct tidss_crtc *tcrtc = to_tidss_crtc(crtc);
 	u32 hw_videoport = tcrtc->hw_videoport;
 	unsigned long flags;
@@ -53,7 +53,7 @@ void tidss_irq_disable_vblank(struct drm_crtc *crtc)
 irqreturn_t tidss_irq_handler(int irq, void *arg)
 {
 	struct drm_device *ddev = (struct drm_device *)arg;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	unsigned int id;
 	dispc_irq_t irqstatus;
 
@@ -95,7 +95,7 @@ void tidss_irq_resume(struct tidss_device *tidss)
 
 void tidss_irq_preinstall(struct drm_device *ddev)
 {
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 
 	spin_lock_init(&tidss->wait_lock);
 
@@ -109,7 +109,7 @@ void tidss_irq_preinstall(struct drm_device *ddev)
 
 int tidss_irq_postinstall(struct drm_device *ddev)
 {
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	unsigned long flags;
 	unsigned int i;
 
@@ -138,7 +138,7 @@ int tidss_irq_postinstall(struct drm_device *ddev)
 
 void tidss_irq_uninstall(struct drm_device *ddev)
 {
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 
 	tidss_runtime_get(tidss);
 	dispc_set_irqenable(tidss->dispc, 0);
diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c
index 7d419960b030..4b99e9fa84a5 100644
--- a/drivers/gpu/drm/tidss/tidss_kms.c
+++ b/drivers/gpu/drm/tidss/tidss_kms.c
@@ -25,7 +25,7 @@
 static void tidss_atomic_commit_tail(struct drm_atomic_state *old_state)
 {
 	struct drm_device *ddev = old_state->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 
 	dev_dbg(ddev->dev, "%s\n", __func__);
 
@@ -258,7 +258,9 @@ int tidss_modeset_init(struct tidss_device *tidss)
 
 	dev_dbg(tidss->dev, "%s\n", __func__);
 
-	drm_mode_config_init(ddev);
+	ret = drmm_mode_config_init(ddev);
+	if (ret)
+		return ret;
 
 	ddev->mode_config.min_width = 8;
 	ddev->mode_config.min_height = 8;
@@ -270,11 +272,11 @@ int tidss_modeset_init(struct tidss_device *tidss)
 
 	ret = tidss_dispc_modeset_init(tidss);
 	if (ret)
-		goto err_mode_config_cleanup;
+		return ret;
 
 	ret = drm_vblank_init(ddev, tidss->num_crtcs);
 	if (ret)
-		goto err_mode_config_cleanup;
+		return ret;
 
 	/* Start with vertical blanking interrupt reporting disabled. */
 	for (i = 0; i < tidss->num_crtcs; ++i)
@@ -285,15 +287,4 @@ int tidss_modeset_init(struct tidss_device *tidss)
 	dev_dbg(tidss->dev, "%s done\n", __func__);
 
 	return 0;
-
-err_mode_config_cleanup:
-	drm_mode_config_cleanup(ddev);
-	return ret;
-}
-
-void tidss_modeset_cleanup(struct tidss_device *tidss)
-{
-	struct drm_device *ddev = &tidss->ddev;
-
-	drm_mode_config_cleanup(ddev);
 }
diff --git a/drivers/gpu/drm/tidss/tidss_kms.h b/drivers/gpu/drm/tidss/tidss_kms.h
index dda5625d0128..99aaff099f22 100644
--- a/drivers/gpu/drm/tidss/tidss_kms.h
+++ b/drivers/gpu/drm/tidss/tidss_kms.h
@@ -10,6 +10,5 @@
 struct tidss_device;
 
 int tidss_modeset_init(struct tidss_device *tidss);
-void tidss_modeset_cleanup(struct tidss_device *tidss);
 
 #endif
diff --git a/drivers/gpu/drm/tidss/tidss_plane.c b/drivers/gpu/drm/tidss/tidss_plane.c
index 798488948fc5..0a563eabcbb9 100644
--- a/drivers/gpu/drm/tidss/tidss_plane.c
+++ b/drivers/gpu/drm/tidss/tidss_plane.c
@@ -22,7 +22,7 @@ static int tidss_plane_atomic_check(struct drm_plane *plane,
 				    struct drm_plane_state *state)
 {
 	struct drm_device *ddev = plane->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	struct tidss_plane *tplane = to_tidss_plane(plane);
 	const struct drm_format_info *finfo;
 	struct drm_crtc_state *crtc_state;
@@ -101,7 +101,7 @@ static void tidss_plane_atomic_update(struct drm_plane *plane,
 				      struct drm_plane_state *old_state)
 {
 	struct drm_device *ddev = plane->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	struct tidss_plane *tplane = to_tidss_plane(plane);
 	struct drm_plane_state *state = plane->state;
 	u32 hw_videoport;
@@ -133,7 +133,7 @@ static void tidss_plane_atomic_disable(struct drm_plane *plane,
 				       struct drm_plane_state *old_state)
 {
 	struct drm_device *ddev = plane->dev;
-	struct tidss_device *tidss = ddev->dev_private;
+	struct tidss_device *tidss = to_tidss(ddev);
 	struct tidss_plane *tplane = to_tidss_plane(plane);
 
 	dev_dbg(ddev->dev, "%s\n", __func__);
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 0791a0200cc3..a5e9ee4c7fbf 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -390,10 +390,9 @@ static int tilcdc_init(struct drm_driver *ddrv, struct device *dev)
 	ret = drm_dev_register(ddev, 0);
 	if (ret)
 		goto init_failed;
+	priv->is_registered = true;
 
 	drm_fbdev_generic_setup(ddev, bpp);
-
-	priv->is_registered = true;
 	return 0;
 
 init_failed:
@@ -478,26 +477,17 @@ static struct drm_info_list tilcdc_debugfs_list[] = {
 		{ "mm",   tilcdc_mm_show,   0 },
 };
 
-static int tilcdc_debugfs_init(struct drm_minor *minor)
+static void tilcdc_debugfs_init(struct drm_minor *minor)
 {
-	struct drm_device *dev = minor->dev;
 	struct tilcdc_module *mod;
-	int ret;
 
-	ret = drm_debugfs_create_files(tilcdc_debugfs_list,
-			ARRAY_SIZE(tilcdc_debugfs_list),
-			minor->debugfs_root, minor);
+	drm_debugfs_create_files(tilcdc_debugfs_list,
+				 ARRAY_SIZE(tilcdc_debugfs_list),
+				 minor->debugfs_root, minor);
 
 	list_for_each_entry(mod, &module_list, list)
 		if (mod->funcs->debugfs_init)
 			mod->funcs->debugfs_init(mod, minor);
-
-	if (ret) {
-		dev_err(dev->dev, "could not install tilcdc_debugfs_list\n");
-		return ret;
-	}
-
-	return ret;
 }
 #endif
 
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_external.c b/drivers/gpu/drm/tilcdc/tilcdc_external.c
index 28b7f703236e..b177525588c1 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_external.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_external.c
@@ -10,6 +10,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_of.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "tilcdc_drv.h"
 #include "tilcdc_external.h"
@@ -83,10 +84,6 @@ int tilcdc_add_component_encoder(struct drm_device *ddev)
 	return 0;
 }
 
-static const struct drm_encoder_funcs tilcdc_external_encoder_funcs = {
-	.destroy	= drm_encoder_cleanup,
-};
-
 static
 int tilcdc_attach_bridge(struct drm_device *ddev, struct drm_bridge *bridge)
 {
@@ -131,9 +128,8 @@ int tilcdc_attach_external_device(struct drm_device *ddev)
 	if (!priv->external_encoder)
 		return -ENOMEM;
 
-	ret = drm_encoder_init(ddev, priv->external_encoder,
-			       &tilcdc_external_encoder_funcs,
-			       DRM_MODE_ENCODER_NONE, NULL);
+	ret = drm_simple_encoder_init(ddev, priv->external_encoder,
+				      DRM_MODE_ENCODER_NONE);
 	if (ret) {
 		dev_err(ddev->dev, "drm_encoder_init() failed %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_panel.c b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
index 5584e656b857..12823d60c4e8 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_panel.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
@@ -16,6 +16,7 @@
 #include <drm/drm_connector.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "tilcdc_drv.h"
 #include "tilcdc_panel.h"
@@ -74,10 +75,6 @@ static void panel_encoder_mode_set(struct drm_encoder *encoder,
 	/* nothing needed */
 }
 
-static const struct drm_encoder_funcs panel_encoder_funcs = {
-		.destroy        = drm_encoder_cleanup,
-};
-
 static const struct drm_encoder_helper_funcs panel_encoder_helper_funcs = {
 		.dpms           = panel_encoder_dpms,
 		.prepare        = panel_encoder_prepare,
@@ -102,8 +99,7 @@ static struct drm_encoder *panel_encoder_create(struct drm_device *dev,
 	encoder = &panel_encoder->base;
 	encoder->possible_crtcs = 1;
 
-	ret = drm_encoder_init(dev, encoder, &panel_encoder_funcs,
-			DRM_MODE_ENCODER_LVDS, NULL);
+	ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_LVDS);
 	if (ret < 0)
 		goto fail;
 
diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig
index 4160e74e4751..2b6414f0fa75 100644
--- a/drivers/gpu/drm/tiny/Kconfig
+++ b/drivers/gpu/drm/tiny/Kconfig
@@ -1,5 +1,24 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
+config DRM_CIRRUS_QEMU
+	tristate "Cirrus driver for QEMU emulated device"
+	depends on DRM && PCI && MMU
+	select DRM_KMS_HELPER
+	select DRM_GEM_SHMEM_HELPER
+	help
+	 This is a KMS driver for emulated cirrus device in qemu.
+	 It is *NOT* intended for real cirrus devices. This requires
+	 the modesetting userspace X.org driver.
+
+	 Cirrus is obsolete, the hardware was designed in the 90ies
+	 and can't keep up with todays needs.  More background:
+	 https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
+
+	 Better alternatives are:
+	   - stdvga (DRM_BOCHS, qemu -vga std, default in qemu 2.2+)
+	   - qxl (DRM_QXL, qemu -vga qxl, works best with spice)
+	   - virtio (DRM_VIRTIO_GPU), qemu -vga virtio)
+
 config DRM_GM12U320
 	tristate "GM12U320 driver for USB projectors"
 	depends on DRM && USB
diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile
index c96ceee71453..6ae4e9e5a35f 100644
--- a/drivers/gpu/drm/tiny/Makefile
+++ b/drivers/gpu/drm/tiny/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
+obj-$(CONFIG_DRM_CIRRUS_QEMU)		+= cirrus.o
 obj-$(CONFIG_DRM_GM12U320)		+= gm12u320.o
 obj-$(CONFIG_TINYDRM_HX8357D)		+= hx8357d.o
 obj-$(CONFIG_TINYDRM_ILI9225)		+= ili9225.o
diff --git a/drivers/gpu/drm/cirrus/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
index d2ff63ce8eaf..744a8e337e41 100644
--- a/drivers/gpu/drm/cirrus/cirrus.c
+++ b/drivers/gpu/drm/tiny/cirrus.c
@@ -35,6 +35,7 @@
 #include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_simple_kms_helper.h>
@@ -58,6 +59,8 @@ struct cirrus_device {
 	void __iomem		       *mmio;
 };
 
+#define to_cirrus(_dev) container_of(_dev, struct cirrus_device, dev)
+
 /* ------------------------------------------------------------------ */
 /*
  * The meat of this driver. The core passes us a mode and we have to program
@@ -310,7 +313,7 @@ static int cirrus_mode_set(struct cirrus_device *cirrus,
 static int cirrus_fb_blit_rect(struct drm_framebuffer *fb,
 			       struct drm_rect *rect)
 {
-	struct cirrus_device *cirrus = fb->dev->dev_private;
+	struct cirrus_device *cirrus = to_cirrus(fb->dev);
 	void *vmap;
 	int idx, ret;
 
@@ -435,7 +438,7 @@ static void cirrus_pipe_enable(struct drm_simple_display_pipe *pipe,
 			       struct drm_crtc_state *crtc_state,
 			       struct drm_plane_state *plane_state)
 {
-	struct cirrus_device *cirrus = pipe->crtc.dev->dev_private;
+	struct cirrus_device *cirrus = to_cirrus(pipe->crtc.dev);
 
 	cirrus_mode_set(cirrus, &crtc_state->mode, plane_state->fb);
 	cirrus_fb_blit_fullscreen(plane_state->fb);
@@ -444,7 +447,7 @@ static void cirrus_pipe_enable(struct drm_simple_display_pipe *pipe,
 static void cirrus_pipe_update(struct drm_simple_display_pipe *pipe,
 			       struct drm_plane_state *old_state)
 {
-	struct cirrus_device *cirrus = pipe->crtc.dev->dev_private;
+	struct cirrus_device *cirrus = to_cirrus(pipe->crtc.dev);
 	struct drm_plane_state *state = pipe->plane.state;
 	struct drm_crtc *crtc = &pipe->crtc;
 	struct drm_rect rect;
@@ -509,11 +512,15 @@ static const struct drm_mode_config_funcs cirrus_mode_config_funcs = {
 	.atomic_commit = drm_atomic_helper_commit,
 };
 
-static void cirrus_mode_config_init(struct cirrus_device *cirrus)
+static int cirrus_mode_config_init(struct cirrus_device *cirrus)
 {
 	struct drm_device *dev = &cirrus->dev;
+	int ret;
+
+	ret = drmm_mode_config_init(dev);
+	if (ret)
+		return ret;
 
-	drm_mode_config_init(dev);
 	dev->mode_config.min_width = 0;
 	dev->mode_config.min_height = 0;
 	dev->mode_config.max_width = CIRRUS_MAX_PITCH / 2;
@@ -521,18 +528,12 @@ static void cirrus_mode_config_init(struct cirrus_device *cirrus)
 	dev->mode_config.preferred_depth = 16;
 	dev->mode_config.prefer_shadow = 0;
 	dev->mode_config.funcs = &cirrus_mode_config_funcs;
+
+	return 0;
 }
 
 /* ------------------------------------------------------------------ */
 
-static void cirrus_release(struct drm_device *dev)
-{
-	struct cirrus_device *cirrus = dev->dev_private;
-
-	drm_mode_config_cleanup(dev);
-	kfree(cirrus);
-}
-
 DEFINE_DRM_GEM_FOPS(cirrus_fops);
 
 static struct drm_driver cirrus_driver = {
@@ -546,7 +547,6 @@ static struct drm_driver cirrus_driver = {
 
 	.fops		 = &cirrus_fops,
 	DRM_GEM_SHMEM_DRIVER_OPS,
-	.release         = cirrus_release,
 };
 
 static int cirrus_pci_probe(struct pci_dev *pdev,
@@ -560,7 +560,7 @@ static int cirrus_pci_probe(struct pci_dev *pdev,
 	if (ret)
 		return ret;
 
-	ret = pci_enable_device(pdev);
+	ret = pcim_enable_device(pdev);
 	if (ret)
 		return ret;
 
@@ -569,36 +569,34 @@ static int cirrus_pci_probe(struct pci_dev *pdev,
 		return ret;
 
 	ret = -ENOMEM;
-	cirrus = kzalloc(sizeof(*cirrus), GFP_KERNEL);
-	if (cirrus == NULL)
-		goto err_pci_release;
+	cirrus = devm_drm_dev_alloc(&pdev->dev, &cirrus_driver,
+				    struct cirrus_device, dev);
+	if (IS_ERR(cirrus))
+		return PTR_ERR(cirrus);
 
 	dev = &cirrus->dev;
-	ret = drm_dev_init(dev, &cirrus_driver, &pdev->dev);
-	if (ret)
-		goto err_free_cirrus;
-	dev->dev_private = cirrus;
 
-	ret = -ENOMEM;
-	cirrus->vram = ioremap(pci_resource_start(pdev, 0),
-			       pci_resource_len(pdev, 0));
+	cirrus->vram = devm_ioremap(&pdev->dev, pci_resource_start(pdev, 0),
+				    pci_resource_len(pdev, 0));
 	if (cirrus->vram == NULL)
-		goto err_dev_put;
+		return -ENOMEM;
 
-	cirrus->mmio = ioremap(pci_resource_start(pdev, 1),
-			       pci_resource_len(pdev, 1));
+	cirrus->mmio = devm_ioremap(&pdev->dev, pci_resource_start(pdev, 1),
+				    pci_resource_len(pdev, 1));
 	if (cirrus->mmio == NULL)
-		goto err_unmap_vram;
+		return -ENOMEM;
 
-	cirrus_mode_config_init(cirrus);
+	ret = cirrus_mode_config_init(cirrus);
+	if (ret)
+		return ret;
 
 	ret = cirrus_conn_init(cirrus);
 	if (ret < 0)
-		goto err_cleanup;
+		return ret;
 
 	ret = cirrus_pipe_init(cirrus);
 	if (ret < 0)
-		goto err_cleanup;
+		return ret;
 
 	drm_mode_config_reset(dev);
 
@@ -606,36 +604,18 @@ static int cirrus_pci_probe(struct pci_dev *pdev,
 	pci_set_drvdata(pdev, dev);
 	ret = drm_dev_register(dev, 0);
 	if (ret)
-		goto err_cleanup;
+		return ret;
 
 	drm_fbdev_generic_setup(dev, dev->mode_config.preferred_depth);
 	return 0;
-
-err_cleanup:
-	drm_mode_config_cleanup(dev);
-	iounmap(cirrus->mmio);
-err_unmap_vram:
-	iounmap(cirrus->vram);
-err_dev_put:
-	drm_dev_put(dev);
-err_free_cirrus:
-	kfree(cirrus);
-err_pci_release:
-	pci_release_regions(pdev);
-	return ret;
 }
 
 static void cirrus_pci_remove(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	struct cirrus_device *cirrus = dev->dev_private;
 
 	drm_dev_unplug(dev);
 	drm_atomic_helper_shutdown(dev);
-	iounmap(cirrus->mmio);
-	iounmap(cirrus->vram);
-	drm_dev_put(dev);
-	pci_release_regions(pdev);
 }
 
 static const struct pci_device_id pciidlist[] = {
diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c
index a48173441ae0..cc397671f689 100644
--- a/drivers/gpu/drm/tiny/gm12u320.c
+++ b/drivers/gpu/drm/tiny/gm12u320.c
@@ -19,6 +19,7 @@
 #include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_simple_kms_helper.h>
@@ -87,18 +88,18 @@ struct gm12u320_device {
 	struct usb_device               *udev;
 	unsigned char                   *cmd_buf;
 	unsigned char                   *data_buf[GM12U320_BLOCK_COUNT];
-	bool                             pipe_enabled;
 	struct {
-		bool                     run;
-		struct workqueue_struct *workq;
-		struct work_struct       work;
-		wait_queue_head_t        waitq;
+		struct delayed_work       work;
 		struct mutex             lock;
 		struct drm_framebuffer  *fb;
 		struct drm_rect          rect;
+		int frame;
+		int draw_status_timeout;
 	} fb_update;
 };
 
+#define to_gm12u320(__dev) container_of(__dev, struct gm12u320_device, dev)
+
 static const char cmd_data[CMD_SIZE] = {
 	0x55, 0x53, 0x42, 0x43, 0x00, 0x00, 0x00, 0x00,
 	0x68, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x10, 0xff,
@@ -159,7 +160,7 @@ static int gm12u320_usb_alloc(struct gm12u320_device *gm12u320)
 	int i, block_size;
 	const char *hdr;
 
-	gm12u320->cmd_buf = kmalloc(CMD_SIZE, GFP_KERNEL);
+	gm12u320->cmd_buf = drmm_kmalloc(&gm12u320->dev, CMD_SIZE, GFP_KERNEL);
 	if (!gm12u320->cmd_buf)
 		return -ENOMEM;
 
@@ -172,7 +173,8 @@ static int gm12u320_usb_alloc(struct gm12u320_device *gm12u320)
 			hdr = data_block_header;
 		}
 
-		gm12u320->data_buf[i] = kzalloc(block_size, GFP_KERNEL);
+		gm12u320->data_buf[i] = drmm_kzalloc(&gm12u320->dev,
+						     block_size, GFP_KERNEL);
 		if (!gm12u320->data_buf[i])
 			return -ENOMEM;
 
@@ -182,26 +184,9 @@ static int gm12u320_usb_alloc(struct gm12u320_device *gm12u320)
 		       data_block_footer, DATA_BLOCK_FOOTER_SIZE);
 	}
 
-	gm12u320->fb_update.workq = create_singlethread_workqueue(DRIVER_NAME);
-	if (!gm12u320->fb_update.workq)
-		return -ENOMEM;
-
 	return 0;
 }
 
-static void gm12u320_usb_free(struct gm12u320_device *gm12u320)
-{
-	int i;
-
-	if (gm12u320->fb_update.workq)
-		destroy_workqueue(gm12u320->fb_update.workq);
-
-	for (i = 0; i < GM12U320_BLOCK_COUNT; i++)
-		kfree(gm12u320->data_buf[i]);
-
-	kfree(gm12u320->cmd_buf);
-}
-
 static int gm12u320_misc_request(struct gm12u320_device *gm12u320,
 				 u8 req_a, u8 req_b,
 				 u8 arg_a, u8 arg_b, u8 arg_c, u8 arg_d)
@@ -344,80 +329,77 @@ unlock:
 static void gm12u320_fb_update_work(struct work_struct *work)
 {
 	struct gm12u320_device *gm12u320 =
-		container_of(work, struct gm12u320_device, fb_update.work);
-	int draw_status_timeout = FIRST_FRAME_TIMEOUT;
+		container_of(to_delayed_work(work), struct gm12u320_device,
+			     fb_update.work);
 	int block, block_size, len;
-	int frame = 0;
 	int ret = 0;
 
-	while (gm12u320->fb_update.run) {
-		gm12u320_copy_fb_to_blocks(gm12u320);
-
-		for (block = 0; block < GM12U320_BLOCK_COUNT; block++) {
-			if (block == GM12U320_BLOCK_COUNT - 1)
-				block_size = DATA_LAST_BLOCK_SIZE;
-			else
-				block_size = DATA_BLOCK_SIZE;
-
-			/* Send data command to device */
-			memcpy(gm12u320->cmd_buf, cmd_data, CMD_SIZE);
-			gm12u320->cmd_buf[8] = block_size & 0xff;
-			gm12u320->cmd_buf[9] = block_size >> 8;
-			gm12u320->cmd_buf[20] = 0xfc - block * 4;
-			gm12u320->cmd_buf[21] = block | (frame << 7);
-
-			ret = usb_bulk_msg(gm12u320->udev,
-				usb_sndbulkpipe(gm12u320->udev, DATA_SND_EPT),
-				gm12u320->cmd_buf, CMD_SIZE, &len,
-				CMD_TIMEOUT);
-			if (ret || len != CMD_SIZE)
-				goto err;
-
-			/* Send data block to device */
-			ret = usb_bulk_msg(gm12u320->udev,
-				usb_sndbulkpipe(gm12u320->udev, DATA_SND_EPT),
-				gm12u320->data_buf[block], block_size,
-				&len, DATA_TIMEOUT);
-			if (ret || len != block_size)
-				goto err;
-
-			/* Read status */
-			ret = usb_bulk_msg(gm12u320->udev,
-				usb_rcvbulkpipe(gm12u320->udev, DATA_RCV_EPT),
-				gm12u320->cmd_buf, READ_STATUS_SIZE, &len,
-				CMD_TIMEOUT);
-			if (ret || len != READ_STATUS_SIZE)
-				goto err;
-		}
+	gm12u320_copy_fb_to_blocks(gm12u320);
+
+	for (block = 0; block < GM12U320_BLOCK_COUNT; block++) {
+		if (block == GM12U320_BLOCK_COUNT - 1)
+			block_size = DATA_LAST_BLOCK_SIZE;
+		else
+			block_size = DATA_BLOCK_SIZE;
+
+		/* Send data command to device */
+		memcpy(gm12u320->cmd_buf, cmd_data, CMD_SIZE);
+		gm12u320->cmd_buf[8] = block_size & 0xff;
+		gm12u320->cmd_buf[9] = block_size >> 8;
+		gm12u320->cmd_buf[20] = 0xfc - block * 4;
+		gm12u320->cmd_buf[21] =
+			block | (gm12u320->fb_update.frame << 7);
 
-		/* Send draw command to device */
-		memcpy(gm12u320->cmd_buf, cmd_draw, CMD_SIZE);
 		ret = usb_bulk_msg(gm12u320->udev,
 			usb_sndbulkpipe(gm12u320->udev, DATA_SND_EPT),
-			gm12u320->cmd_buf, CMD_SIZE, &len, CMD_TIMEOUT);
+			gm12u320->cmd_buf, CMD_SIZE, &len,
+			CMD_TIMEOUT);
 		if (ret || len != CMD_SIZE)
 			goto err;
 
+		/* Send data block to device */
+		ret = usb_bulk_msg(gm12u320->udev,
+			usb_sndbulkpipe(gm12u320->udev, DATA_SND_EPT),
+			gm12u320->data_buf[block], block_size,
+			&len, DATA_TIMEOUT);
+		if (ret || len != block_size)
+			goto err;
+
 		/* Read status */
 		ret = usb_bulk_msg(gm12u320->udev,
 			usb_rcvbulkpipe(gm12u320->udev, DATA_RCV_EPT),
 			gm12u320->cmd_buf, READ_STATUS_SIZE, &len,
-			draw_status_timeout);
+			CMD_TIMEOUT);
 		if (ret || len != READ_STATUS_SIZE)
 			goto err;
-
-		draw_status_timeout = CMD_TIMEOUT;
-		frame = !frame;
-
-		/*
-		 * We must draw a frame every 2s otherwise the projector
-		 * switches back to showing its logo.
-		 */
-		wait_event_timeout(gm12u320->fb_update.waitq,
-				   !gm12u320->fb_update.run ||
-					gm12u320->fb_update.fb != NULL,
-				   IDLE_TIMEOUT);
 	}
+
+	/* Send draw command to device */
+	memcpy(gm12u320->cmd_buf, cmd_draw, CMD_SIZE);
+	ret = usb_bulk_msg(gm12u320->udev,
+		usb_sndbulkpipe(gm12u320->udev, DATA_SND_EPT),
+		gm12u320->cmd_buf, CMD_SIZE, &len, CMD_TIMEOUT);
+	if (ret || len != CMD_SIZE)
+		goto err;
+
+	/* Read status */
+	ret = usb_bulk_msg(gm12u320->udev,
+		usb_rcvbulkpipe(gm12u320->udev, DATA_RCV_EPT),
+		gm12u320->cmd_buf, READ_STATUS_SIZE, &len,
+		gm12u320->fb_update.draw_status_timeout);
+	if (ret || len != READ_STATUS_SIZE)
+		goto err;
+
+	gm12u320->fb_update.draw_status_timeout = CMD_TIMEOUT;
+	gm12u320->fb_update.frame = !gm12u320->fb_update.frame;
+
+	/*
+	 * We must draw a frame every 2s otherwise the projector
+	 * switches back to showing its logo.
+	 */
+	queue_delayed_work(system_long_wq, &gm12u320->fb_update.work,
+			   IDLE_TIMEOUT);
+
 	return;
 err:
 	/* Do not log errors caused by module unload or device unplug */
@@ -428,7 +410,7 @@ err:
 static void gm12u320_fb_mark_dirty(struct drm_framebuffer *fb,
 				   struct drm_rect *dirty)
 {
-	struct gm12u320_device *gm12u320 = fb->dev->dev_private;
+	struct gm12u320_device *gm12u320 = to_gm12u320(fb->dev);
 	struct drm_framebuffer *old_fb = NULL;
 	bool wakeup = false;
 
@@ -452,36 +434,24 @@ static void gm12u320_fb_mark_dirty(struct drm_framebuffer *fb,
 	mutex_unlock(&gm12u320->fb_update.lock);
 
 	if (wakeup)
-		wake_up(&gm12u320->fb_update.waitq);
+		mod_delayed_work(system_long_wq, &gm12u320->fb_update.work, 0);
 
 	if (old_fb)
 		drm_framebuffer_put(old_fb);
 }
 
-static void gm12u320_start_fb_update(struct gm12u320_device *gm12u320)
-{
-	mutex_lock(&gm12u320->fb_update.lock);
-	gm12u320->fb_update.run = true;
-	mutex_unlock(&gm12u320->fb_update.lock);
-
-	queue_work(gm12u320->fb_update.workq, &gm12u320->fb_update.work);
-}
-
 static void gm12u320_stop_fb_update(struct gm12u320_device *gm12u320)
 {
-	mutex_lock(&gm12u320->fb_update.lock);
-	gm12u320->fb_update.run = false;
-	mutex_unlock(&gm12u320->fb_update.lock);
+	struct drm_framebuffer *old_fb;
 
-	wake_up(&gm12u320->fb_update.waitq);
-	cancel_work_sync(&gm12u320->fb_update.work);
+	cancel_delayed_work_sync(&gm12u320->fb_update.work);
 
 	mutex_lock(&gm12u320->fb_update.lock);
-	if (gm12u320->fb_update.fb) {
-		drm_framebuffer_put(gm12u320->fb_update.fb);
-		gm12u320->fb_update.fb = NULL;
-	}
+	old_fb = gm12u320->fb_update.fb;
+	gm12u320->fb_update.fb = NULL;
 	mutex_unlock(&gm12u320->fb_update.lock);
+
+	drm_framebuffer_put(old_fb);
 }
 
 static int gm12u320_set_ecomode(struct gm12u320_device *gm12u320)
@@ -589,20 +559,18 @@ static void gm12u320_pipe_enable(struct drm_simple_display_pipe *pipe,
 				 struct drm_crtc_state *crtc_state,
 				 struct drm_plane_state *plane_state)
 {
-	struct gm12u320_device *gm12u320 = pipe->crtc.dev->dev_private;
 	struct drm_rect rect = { 0, 0, GM12U320_USER_WIDTH, GM12U320_HEIGHT };
+	struct gm12u320_device *gm12u320 = to_gm12u320(pipe->crtc.dev);
 
+	gm12u320->fb_update.draw_status_timeout = FIRST_FRAME_TIMEOUT;
 	gm12u320_fb_mark_dirty(plane_state->fb, &rect);
-	gm12u320_start_fb_update(gm12u320);
-	gm12u320->pipe_enabled = true;
 }
 
 static void gm12u320_pipe_disable(struct drm_simple_display_pipe *pipe)
 {
-	struct gm12u320_device *gm12u320 = pipe->crtc.dev->dev_private;
+	struct gm12u320_device *gm12u320 = to_gm12u320(pipe->crtc.dev);
 
 	gm12u320_stop_fb_update(gm12u320);
-	gm12u320->pipe_enabled = false;
 }
 
 static void gm12u320_pipe_update(struct drm_simple_display_pipe *pipe,
@@ -630,16 +598,6 @@ static const uint64_t gm12u320_pipe_modifiers[] = {
 	DRM_FORMAT_MOD_INVALID
 };
 
-static void gm12u320_driver_release(struct drm_device *dev)
-{
-	struct gm12u320_device *gm12u320 = dev->dev_private;
-
-	gm12u320_usb_free(gm12u320);
-	drm_mode_config_cleanup(dev);
-	drm_dev_fini(dev);
-	kfree(gm12u320);
-}
-
 DEFINE_DRM_GEM_FOPS(gm12u320_fops);
 
 static struct drm_driver gm12u320_drm_driver = {
@@ -651,7 +609,6 @@ static struct drm_driver gm12u320_drm_driver = {
 	.major		 = DRIVER_MAJOR,
 	.minor		 = DRIVER_MINOR,
 
-	.release	 = gm12u320_driver_release,
 	.fops		 = &gm12u320_fops,
 	DRM_GEM_SHMEM_DRIVER_OPS,
 };
@@ -676,24 +633,21 @@ static int gm12u320_usb_probe(struct usb_interface *interface,
 	if (interface->cur_altsetting->desc.bInterfaceNumber != 0)
 		return -ENODEV;
 
-	gm12u320 = kzalloc(sizeof(*gm12u320), GFP_KERNEL);
-	if (gm12u320 == NULL)
-		return -ENOMEM;
+	gm12u320 = devm_drm_dev_alloc(&interface->dev, &gm12u320_drm_driver,
+				      struct gm12u320_device, dev);
+	if (IS_ERR(gm12u320))
+		return PTR_ERR(gm12u320);
 
 	gm12u320->udev = interface_to_usbdev(interface);
-	INIT_WORK(&gm12u320->fb_update.work, gm12u320_fb_update_work);
+	INIT_DELAYED_WORK(&gm12u320->fb_update.work, gm12u320_fb_update_work);
 	mutex_init(&gm12u320->fb_update.lock);
-	init_waitqueue_head(&gm12u320->fb_update.waitq);
 
 	dev = &gm12u320->dev;
-	ret = drm_dev_init(dev, &gm12u320_drm_driver, &interface->dev);
-	if (ret) {
-		kfree(gm12u320);
+
+	ret = drmm_mode_config_init(dev);
+	if (ret)
 		return ret;
-	}
-	dev->dev_private = gm12u320;
 
-	drm_mode_config_init(dev);
 	dev->mode_config.min_width = GM12U320_USER_WIDTH;
 	dev->mode_config.max_width = GM12U320_USER_WIDTH;
 	dev->mode_config.min_height = GM12U320_HEIGHT;
@@ -702,15 +656,15 @@ static int gm12u320_usb_probe(struct usb_interface *interface,
 
 	ret = gm12u320_usb_alloc(gm12u320);
 	if (ret)
-		goto err_put;
+		return ret;
 
 	ret = gm12u320_set_ecomode(gm12u320);
 	if (ret)
-		goto err_put;
+		return ret;
 
 	ret = gm12u320_conn_init(gm12u320);
 	if (ret)
-		goto err_put;
+		return ret;
 
 	ret = drm_simple_display_pipe_init(&gm12u320->dev,
 					   &gm12u320->pipe,
@@ -720,56 +674,44 @@ static int gm12u320_usb_probe(struct usb_interface *interface,
 					   gm12u320_pipe_modifiers,
 					   &gm12u320->conn);
 	if (ret)
-		goto err_put;
+		return ret;
 
 	drm_mode_config_reset(dev);
 
 	usb_set_intfdata(interface, dev);
 	ret = drm_dev_register(dev, 0);
 	if (ret)
-		goto err_put;
+		return ret;
 
 	drm_fbdev_generic_setup(dev, 0);
 
 	return 0;
-
-err_put:
-	drm_dev_put(dev);
-	return ret;
 }
 
 static void gm12u320_usb_disconnect(struct usb_interface *interface)
 {
 	struct drm_device *dev = usb_get_intfdata(interface);
-	struct gm12u320_device *gm12u320 = dev->dev_private;
 
-	gm12u320_stop_fb_update(gm12u320);
 	drm_dev_unplug(dev);
-	drm_dev_put(dev);
+	drm_atomic_helper_shutdown(dev);
 }
 
 static __maybe_unused int gm12u320_suspend(struct usb_interface *interface,
 					   pm_message_t message)
 {
 	struct drm_device *dev = usb_get_intfdata(interface);
-	struct gm12u320_device *gm12u320 = dev->dev_private;
 
-	if (gm12u320->pipe_enabled)
-		gm12u320_stop_fb_update(gm12u320);
-
-	return 0;
+	return drm_mode_config_helper_suspend(dev);
 }
 
 static __maybe_unused int gm12u320_resume(struct usb_interface *interface)
 {
 	struct drm_device *dev = usb_get_intfdata(interface);
-	struct gm12u320_device *gm12u320 = dev->dev_private;
+	struct gm12u320_device *gm12u320 = to_gm12u320(dev);
 
 	gm12u320_set_ecomode(gm12u320);
-	if (gm12u320->pipe_enabled)
-		gm12u320_start_fb_update(gm12u320);
 
-	return 0;
+	return drm_mode_config_helper_resume(dev);
 }
 
 static const struct usb_device_id id_table[] = {
diff --git a/drivers/gpu/drm/tiny/hx8357d.c b/drivers/gpu/drm/tiny/hx8357d.c
index 9af8ff84974f..b4bc358a3269 100644
--- a/drivers/gpu/drm/tiny/hx8357d.c
+++ b/drivers/gpu/drm/tiny/hx8357d.c
@@ -21,6 +21,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mipi_dbi.h>
 #include <drm/drm_modeset_helper.h>
 #include <video/mipi_display.h>
@@ -195,7 +196,6 @@ DEFINE_DRM_GEM_CMA_FOPS(hx8357d_fops);
 static struct drm_driver hx8357d_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
 	.fops			= &hx8357d_fops,
-	.release		= mipi_dbi_release,
 	DRM_GEM_CMA_VMAP_DRIVER_OPS,
 	.debugfs_init		= mipi_dbi_debugfs_init,
 	.name			= "hx8357d",
@@ -226,18 +226,12 @@ static int hx8357d_probe(struct spi_device *spi)
 	u32 rotation = 0;
 	int ret;
 
-	dbidev = kzalloc(sizeof(*dbidev), GFP_KERNEL);
-	if (!dbidev)
-		return -ENOMEM;
+	dbidev = devm_drm_dev_alloc(dev, &hx8357d_driver,
+				    struct mipi_dbi_dev, drm);
+	if (IS_ERR(dbidev))
+		return PTR_ERR(dbidev);
 
 	drm = &dbidev->drm;
-	ret = devm_drm_dev_init(dev, drm, &hx8357d_driver);
-	if (ret) {
-		kfree(dbidev);
-		return ret;
-	}
-
-	drm_mode_config_init(drm);
 
 	dc = devm_gpiod_get(dev, "dc", GPIOD_OUT_LOW);
 	if (IS_ERR(dc)) {
diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c
index 802fb8dde1b6..d1a5ab6747d5 100644
--- a/drivers/gpu/drm/tiny/ili9225.c
+++ b/drivers/gpu/drm/tiny/ili9225.c
@@ -24,6 +24,7 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mipi_dbi.h>
 #include <drm/drm_rect.h>
 
@@ -345,7 +346,6 @@ DEFINE_DRM_GEM_CMA_FOPS(ili9225_fops);
 static struct drm_driver ili9225_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
 	.fops			= &ili9225_fops,
-	.release		= mipi_dbi_release,
 	DRM_GEM_CMA_VMAP_DRIVER_OPS,
 	.name			= "ili9225",
 	.desc			= "Ilitek ILI9225",
@@ -376,19 +376,13 @@ static int ili9225_probe(struct spi_device *spi)
 	u32 rotation = 0;
 	int ret;
 
-	dbidev = kzalloc(sizeof(*dbidev), GFP_KERNEL);
-	if (!dbidev)
-		return -ENOMEM;
+	dbidev = devm_drm_dev_alloc(dev, &ili9225_driver,
+				    struct mipi_dbi_dev, drm);
+	if (IS_ERR(dbidev))
+		return PTR_ERR(dbidev);
 
 	dbi = &dbidev->dbi;
 	drm = &dbidev->drm;
-	ret = devm_drm_dev_init(dev, drm, &ili9225_driver);
-	if (ret) {
-		kfree(dbidev);
-		return ret;
-	}
-
-	drm_mode_config_init(drm);
 
 	dbi->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
 	if (IS_ERR(dbi->reset)) {
diff --git a/drivers/gpu/drm/tiny/ili9341.c b/drivers/gpu/drm/tiny/ili9341.c
index 33b51dc7faa8..bb819f45a5d3 100644
--- a/drivers/gpu/drm/tiny/ili9341.c
+++ b/drivers/gpu/drm/tiny/ili9341.c
@@ -20,6 +20,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mipi_dbi.h>
 #include <drm/drm_modeset_helper.h>
 #include <video/mipi_display.h>
@@ -151,7 +152,6 @@ DEFINE_DRM_GEM_CMA_FOPS(ili9341_fops);
 static struct drm_driver ili9341_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
 	.fops			= &ili9341_fops,
-	.release		= mipi_dbi_release,
 	DRM_GEM_CMA_VMAP_DRIVER_OPS,
 	.debugfs_init		= mipi_dbi_debugfs_init,
 	.name			= "ili9341",
@@ -183,19 +183,13 @@ static int ili9341_probe(struct spi_device *spi)
 	u32 rotation = 0;
 	int ret;
 
-	dbidev = kzalloc(sizeof(*dbidev), GFP_KERNEL);
-	if (!dbidev)
-		return -ENOMEM;
+	dbidev = devm_drm_dev_alloc(dev, &ili9341_driver,
+				    struct mipi_dbi_dev, drm);
+	if (IS_ERR(dbidev))
+		return PTR_ERR(dbidev);
 
 	dbi = &dbidev->dbi;
 	drm = &dbidev->drm;
-	ret = devm_drm_dev_init(dev, drm, &ili9341_driver);
-	if (ret) {
-		kfree(dbidev);
-		return ret;
-	}
-
-	drm_mode_config_init(drm);
 
 	dbi->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
 	if (IS_ERR(dbi->reset)) {
diff --git a/drivers/gpu/drm/tiny/ili9486.c b/drivers/gpu/drm/tiny/ili9486.c
index 532560aebb1e..2702ea557d29 100644
--- a/drivers/gpu/drm/tiny/ili9486.c
+++ b/drivers/gpu/drm/tiny/ili9486.c
@@ -19,6 +19,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mipi_dbi.h>
 #include <drm/drm_modeset_helper.h>
 
@@ -164,7 +165,6 @@ DEFINE_DRM_GEM_CMA_FOPS(ili9486_fops);
 static struct drm_driver ili9486_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
 	.fops			= &ili9486_fops,
-	.release		= mipi_dbi_release,
 	DRM_GEM_CMA_VMAP_DRIVER_OPS,
 	.debugfs_init		= mipi_dbi_debugfs_init,
 	.name			= "ili9486",
@@ -197,19 +197,13 @@ static int ili9486_probe(struct spi_device *spi)
 	u32 rotation = 0;
 	int ret;
 
-	dbidev = kzalloc(sizeof(*dbidev), GFP_KERNEL);
-	if (!dbidev)
-		return -ENOMEM;
+	dbidev = devm_drm_dev_alloc(dev, &ili9486_driver,
+				    struct mipi_dbi_dev, drm);
+	if (IS_ERR(dbidev))
+		return PTR_ERR(dbidev);
 
 	dbi = &dbidev->dbi;
 	drm = &dbidev->drm;
-	ret = devm_drm_dev_init(dev, drm, &ili9486_driver);
-	if (ret) {
-		kfree(dbidev);
-		return ret;
-	}
-
-	drm_mode_config_init(drm);
 
 	dbi->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
 	if (IS_ERR(dbi->reset)) {
diff --git a/drivers/gpu/drm/tiny/mi0283qt.c b/drivers/gpu/drm/tiny/mi0283qt.c
index e2cfd9a17143..08ac549ab0f7 100644
--- a/drivers/gpu/drm/tiny/mi0283qt.c
+++ b/drivers/gpu/drm/tiny/mi0283qt.c
@@ -18,6 +18,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mipi_dbi.h>
 #include <drm/drm_modeset_helper.h>
 #include <video/mipi_display.h>
@@ -155,7 +156,6 @@ DEFINE_DRM_GEM_CMA_FOPS(mi0283qt_fops);
 static struct drm_driver mi0283qt_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
 	.fops			= &mi0283qt_fops,
-	.release		= mipi_dbi_release,
 	DRM_GEM_CMA_VMAP_DRIVER_OPS,
 	.debugfs_init		= mipi_dbi_debugfs_init,
 	.name			= "mi0283qt",
@@ -187,19 +187,13 @@ static int mi0283qt_probe(struct spi_device *spi)
 	u32 rotation = 0;
 	int ret;
 
-	dbidev = kzalloc(sizeof(*dbidev), GFP_KERNEL);
-	if (!dbidev)
-		return -ENOMEM;
+	dbidev = devm_drm_dev_alloc(dev, &mi0283qt_driver,
+				    struct mipi_dbi_dev, drm);
+	if (IS_ERR(dbidev))
+		return PTR_ERR(dbidev);
 
 	dbi = &dbidev->dbi;
 	drm = &dbidev->drm;
-	ret = devm_drm_dev_init(dev, drm, &mi0283qt_driver);
-	if (ret) {
-		kfree(dbidev);
-		return ret;
-	}
-
-	drm_mode_config_init(drm);
 
 	dbi->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
 	if (IS_ERR(dbi->reset)) {
diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c
index f5ebcaf7ee3a..1c0e7169545b 100644
--- a/drivers/gpu/drm/tiny/repaper.c
+++ b/drivers/gpu/drm/tiny/repaper.c
@@ -31,6 +31,7 @@
 #include <drm/drm_format_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_modes.h>
 #include <drm/drm_rect.h>
 #include <drm/drm_probe_helper.h>
@@ -908,17 +909,6 @@ static const struct drm_mode_config_funcs repaper_mode_config_funcs = {
 	.atomic_commit = drm_atomic_helper_commit,
 };
 
-static void repaper_release(struct drm_device *drm)
-{
-	struct repaper_epd *epd = drm_to_epd(drm);
-
-	DRM_DEBUG_DRIVER("\n");
-
-	drm_mode_config_cleanup(drm);
-	drm_dev_fini(drm);
-	kfree(epd);
-}
-
 static const uint32_t repaper_formats[] = {
 	DRM_FORMAT_XRGB8888,
 };
@@ -956,7 +946,6 @@ DEFINE_DRM_GEM_CMA_FOPS(repaper_fops);
 static struct drm_driver repaper_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
 	.fops			= &repaper_fops,
-	.release		= repaper_release,
 	DRM_GEM_CMA_VMAP_DRIVER_OPS,
 	.name			= "repaper",
 	.desc			= "Pervasive Displays RePaper e-ink panels",
@@ -1013,19 +1002,16 @@ static int repaper_probe(struct spi_device *spi)
 		}
 	}
 
-	epd = kzalloc(sizeof(*epd), GFP_KERNEL);
-	if (!epd)
-		return -ENOMEM;
+	epd = devm_drm_dev_alloc(dev, &repaper_driver,
+				 struct repaper_epd, drm);
+	if (IS_ERR(epd))
+		return PTR_ERR(epd);
 
 	drm = &epd->drm;
 
-	ret = devm_drm_dev_init(dev, drm, &repaper_driver);
-	if (ret) {
-		kfree(epd);
+	ret = drmm_mode_config_init(drm);
+	if (ret)
 		return ret;
-	}
-
-	drm_mode_config_init(drm);
 	drm->mode_config.funcs = &repaper_mode_config_funcs;
 
 	epd->spi = spi;
diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c
index 9ef559dd3191..2a1fae422f7a 100644
--- a/drivers/gpu/drm/tiny/st7586.c
+++ b/drivers/gpu/drm/tiny/st7586.c
@@ -21,6 +21,7 @@
 #include <drm/drm_format_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mipi_dbi.h>
 #include <drm/drm_rect.h>
 
@@ -284,7 +285,6 @@ DEFINE_DRM_GEM_CMA_FOPS(st7586_fops);
 static struct drm_driver st7586_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
 	.fops			= &st7586_fops,
-	.release		= mipi_dbi_release,
 	DRM_GEM_CMA_VMAP_DRIVER_OPS,
 	.debugfs_init		= mipi_dbi_debugfs_init,
 	.name			= "st7586",
@@ -317,19 +317,13 @@ static int st7586_probe(struct spi_device *spi)
 	size_t bufsize;
 	int ret;
 
-	dbidev = kzalloc(sizeof(*dbidev), GFP_KERNEL);
-	if (!dbidev)
-		return -ENOMEM;
+	dbidev = devm_drm_dev_alloc(dev, &st7586_driver,
+				    struct mipi_dbi_dev, drm);
+	if (IS_ERR(dbidev))
+		return PTR_ERR(dbidev);
 
 	dbi = &dbidev->dbi;
 	drm = &dbidev->drm;
-	ret = devm_drm_dev_init(dev, drm, &st7586_driver);
-	if (ret) {
-		kfree(dbidev);
-		return ret;
-	}
-
-	drm_mode_config_init(drm);
 
 	bufsize = (st7586_mode.vdisplay + 2) / 3 * st7586_mode.hdisplay;
 
diff --git a/drivers/gpu/drm/tiny/st7735r.c b/drivers/gpu/drm/tiny/st7735r.c
index 3cd9b8d9888d..0af1b15efdf8 100644
--- a/drivers/gpu/drm/tiny/st7735r.c
+++ b/drivers/gpu/drm/tiny/st7735r.c
@@ -21,6 +21,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mipi_dbi.h>
 
 #define ST7735R_FRMCTR1		0xb1
@@ -156,7 +157,6 @@ DEFINE_DRM_GEM_CMA_FOPS(st7735r_fops);
 static struct drm_driver st7735r_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
 	.fops			= &st7735r_fops,
-	.release		= mipi_dbi_release,
 	DRM_GEM_CMA_VMAP_DRIVER_OPS,
 	.debugfs_init		= mipi_dbi_debugfs_init,
 	.name			= "st7735r",
@@ -195,22 +195,16 @@ static int st7735r_probe(struct spi_device *spi)
 	if (!cfg)
 		cfg = (void *)spi_get_device_id(spi)->driver_data;
 
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
+	priv = devm_drm_dev_alloc(dev, &st7735r_driver,
+				  struct st7735r_priv, dbidev.drm);
+	if (IS_ERR(priv))
+		return PTR_ERR(priv);
 
 	dbidev = &priv->dbidev;
 	priv->cfg = cfg;
 
 	dbi = &dbidev->dbi;
 	drm = &dbidev->drm;
-	ret = devm_drm_dev_init(dev, drm, &st7735r_driver);
-	if (ret) {
-		kfree(dbidev);
-		return ret;
-	}
-
-	drm_mode_config_init(drm);
 
 	dbi->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
 	if (IS_ERR(dbi->reset)) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9e07c3f75156..f73b81c2576e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -588,7 +588,8 @@ static void ttm_bo_release(struct kref *kref)
 		ttm_mem_io_unlock(man);
 	}
 
-	if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) {
+	if (!dma_resv_test_signaled_rcu(bo->base.resv, true) ||
+	    !dma_resv_trylock(bo->base.resv)) {
 		/* The BO is not idle, resurrect it for delayed destroy */
 		ttm_bo_flush_all_fences(bo);
 		bo->deleted = true;
@@ -621,6 +622,7 @@ static void ttm_bo_release(struct kref *kref)
 	spin_unlock(&ttm_bo_glob.lru_lock);
 
 	ttm_bo_cleanup_memtype_use(bo);
+	dma_resv_unlock(bo->base.resv);
 
 	BUG_ON(bo->mem.mm_node != NULL);
 	atomic_dec(&ttm_bo_glob.bo_count);
diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c
index 0afdfb0d1fe1..cdc1c42e1669 100644
--- a/drivers/gpu/drm/udl/udl_connector.c
+++ b/drivers/gpu/drm/udl/udl_connector.c
@@ -59,7 +59,7 @@ static int udl_get_modes(struct drm_connector *connector)
 static enum drm_mode_status udl_mode_valid(struct drm_connector *connector,
 			  struct drm_display_mode *mode)
 {
-	struct udl_device *udl = connector->dev->dev_private;
+	struct udl_device *udl = to_udl(connector->dev);
 	if (!udl->sku_pixel_limit)
 		return 0;
 
@@ -72,7 +72,7 @@ static enum drm_mode_status udl_mode_valid(struct drm_connector *connector,
 static enum drm_connector_status
 udl_detect(struct drm_connector *connector, bool force)
 {
-	struct udl_device *udl = connector->dev->dev_private;
+	struct udl_device *udl = to_udl(connector->dev);
 	struct udl_drm_connector *udl_connector =
 					container_of(connector,
 					struct udl_drm_connector,
diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index e6c1cd77d4d4..d1aa50fd6d65 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -10,6 +10,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_file.h>
 #include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_print.h>
@@ -33,17 +34,8 @@ static int udl_usb_resume(struct usb_interface *interface)
 
 DEFINE_DRM_GEM_FOPS(udl_driver_fops);
 
-static void udl_driver_release(struct drm_device *dev)
-{
-	udl_fini(dev);
-	udl_modeset_cleanup(dev);
-	drm_dev_fini(dev);
-	kfree(dev);
-}
-
 static struct drm_driver driver = {
 	.driver_features = DRIVER_ATOMIC | DRIVER_GEM | DRIVER_MODESET,
-	.release = udl_driver_release,
 
 	/* gem hooks */
 	.gem_create_object = udl_driver_gem_create_object,
@@ -65,27 +57,19 @@ static struct udl_device *udl_driver_create(struct usb_interface *interface)
 	struct udl_device *udl;
 	int r;
 
-	udl = kzalloc(sizeof(*udl), GFP_KERNEL);
-	if (!udl)
-		return ERR_PTR(-ENOMEM);
-
-	r = drm_dev_init(&udl->drm, &driver, &interface->dev);
-	if (r) {
-		kfree(udl);
-		return ERR_PTR(r);
-	}
+	udl = devm_drm_dev_alloc(&interface->dev, &driver,
+				 struct udl_device, drm);
+	if (IS_ERR(udl))
+		return udl;
 
 	udl->udev = udev;
-	udl->drm.dev_private = udl;
 
 	r = udl_init(udl);
-	if (r) {
-		drm_dev_fini(&udl->drm);
-		kfree(udl);
+	if (r)
 		return ERR_PTR(r);
-	}
 
 	usb_set_intfdata(interface, udl);
+
 	return udl;
 }
 
@@ -101,31 +85,22 @@ static int udl_usb_probe(struct usb_interface *interface,
 
 	r = drm_dev_register(&udl->drm, 0);
 	if (r)
-		goto err_free;
+		return r;
 
 	DRM_INFO("Initialized udl on minor %d\n", udl->drm.primary->index);
 
-	r = drm_fbdev_generic_setup(&udl->drm, 0);
-	if (r)
-		goto err_drm_dev_unregister;
+	drm_fbdev_generic_setup(&udl->drm, 0);
 
 	return 0;
-
-err_drm_dev_unregister:
-	drm_dev_unregister(&udl->drm);
-err_free:
-	drm_dev_put(&udl->drm);
-	return r;
 }
 
 static void udl_usb_disconnect(struct usb_interface *interface)
 {
 	struct drm_device *dev = usb_get_intfdata(interface);
 
-	drm_kms_helper_poll_disable(dev);
+	drm_kms_helper_poll_fini(dev);
 	udl_drop_usb(dev);
 	drm_dev_unplug(dev);
-	drm_dev_put(dev);
 }
 
 /*
diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h
index e67227c44cc4..2642f94a63fc 100644
--- a/drivers/gpu/drm/udl/udl_drv.h
+++ b/drivers/gpu/drm/udl/udl_drv.h
@@ -68,7 +68,6 @@ struct udl_device {
 
 /* modeset */
 int udl_modeset_init(struct drm_device *dev);
-void udl_modeset_cleanup(struct drm_device *dev);
 struct drm_connector *udl_connector_init(struct drm_device *dev);
 
 struct urb *udl_get_urb(struct drm_device *dev);
@@ -77,7 +76,6 @@ int udl_submit_urb(struct drm_device *dev, struct urb *urb, size_t len);
 void udl_urb_completion(struct urb *urb);
 
 int udl_init(struct udl_device *udl);
-void udl_fini(struct drm_device *dev);
 
 int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr,
 		     const char *front, char **urb_buf_ptr,
diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index 538718919916..f5d27f2a5654 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -351,13 +351,3 @@ int udl_drop_usb(struct drm_device *dev)
 	udl_free_urb_list(dev);
 	return 0;
 }
-
-void udl_fini(struct drm_device *dev)
-{
-	struct udl_device *udl = to_udl(dev);
-
-	drm_kms_helper_poll_fini(dev);
-
-	if (udl->urbs.count)
-		udl_free_urb_list(dev);
-}
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index d59ebac70b15..fef43f4e3bac 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -215,7 +215,7 @@ static char *udl_dummy_render(char *wrptr)
 static int udl_crtc_write_mode_to_hw(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
-	struct udl_device *udl = dev->dev_private;
+	struct udl_device *udl = to_udl(dev);
 	struct urb *urb;
 	char *buf;
 	int retval;
@@ -266,8 +266,8 @@ static int udl_aligned_damage_clip(struct drm_rect *clip, int x, int y,
 	return 0;
 }
 
-int udl_handle_damage(struct drm_framebuffer *fb, int x, int y,
-		      int width, int height)
+static int udl_handle_damage(struct drm_framebuffer *fb, int x, int y,
+			     int width, int height)
 {
 	struct drm_device *dev = fb->dev;
 	struct dma_buf_attachment *import_attach = fb->obj[0]->import_attach;
@@ -369,7 +369,7 @@ udl_simple_display_pipe_enable(struct drm_simple_display_pipe *pipe,
 	struct drm_crtc *crtc = &pipe->crtc;
 	struct drm_device *dev = crtc->dev;
 	struct drm_framebuffer *fb = plane_state->fb;
-	struct udl_device *udl = dev->dev_private;
+	struct udl_device *udl = to_udl(dev);
 	struct drm_display_mode *mode = &crtc_state->mode;
 	char *buf;
 	char *wrptr;
@@ -464,11 +464,13 @@ static const struct drm_mode_config_funcs udl_mode_funcs = {
 int udl_modeset_init(struct drm_device *dev)
 {
 	size_t format_count = ARRAY_SIZE(udl_simple_display_pipe_formats);
-	struct udl_device *udl = dev->dev_private;
+	struct udl_device *udl = to_udl(dev);
 	struct drm_connector *connector;
 	int ret;
 
-	drm_mode_config_init(dev);
+	ret = drmm_mode_config_init(dev);
+	if (ret)
+		return ret;
 
 	dev->mode_config.min_width = 640;
 	dev->mode_config.min_height = 480;
@@ -482,10 +484,8 @@ int udl_modeset_init(struct drm_device *dev)
 	dev->mode_config.funcs = &udl_mode_funcs;
 
 	connector = udl_connector_init(dev);
-	if (IS_ERR(connector)) {
-		ret = PTR_ERR(connector);
-		goto err_drm_mode_config_cleanup;
-	}
+	if (IS_ERR(connector))
+		return PTR_ERR(connector);
 
 	format_count = ARRAY_SIZE(udl_simple_display_pipe_formats);
 
@@ -494,18 +494,9 @@ int udl_modeset_init(struct drm_device *dev)
 					   udl_simple_display_pipe_formats,
 					   format_count, NULL, connector);
 	if (ret)
-		goto err_drm_mode_config_cleanup;
+		return ret;
 
 	drm_mode_config_reset(dev);
 
 	return 0;
-
-err_drm_mode_config_cleanup:
-	drm_mode_config_cleanup(dev);
-	return ret;
-}
-
-void udl_modeset_cleanup(struct drm_device *dev)
-{
-	drm_mode_config_cleanup(dev);
 }
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index 9e953ce64ef7..e76b24bb8828 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -132,7 +132,7 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused)
 	u32 ident0, ident1, ident2, ident3, cores;
 	int ret, core;
 
-	ret = pm_runtime_get_sync(v3d->dev);
+	ret = pm_runtime_get_sync(v3d->drm.dev);
 	if (ret < 0)
 		return ret;
 
@@ -187,8 +187,8 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused)
 			   (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0);
 	}
 
-	pm_runtime_mark_last_busy(v3d->dev);
-	pm_runtime_put_autosuspend(v3d->dev);
+	pm_runtime_mark_last_busy(v3d->drm.dev);
+	pm_runtime_put_autosuspend(v3d->drm.dev);
 
 	return 0;
 }
@@ -219,7 +219,7 @@ static int v3d_measure_clock(struct seq_file *m, void *unused)
 	int measure_ms = 1000;
 	int ret;
 
-	ret = pm_runtime_get_sync(v3d->dev);
+	ret = pm_runtime_get_sync(v3d->drm.dev);
 	if (ret < 0)
 		return ret;
 
@@ -245,8 +245,8 @@ static int v3d_measure_clock(struct seq_file *m, void *unused)
 		   cycles / (measure_ms * 1000),
 		   (cycles / (measure_ms * 100)) % 10);
 
-	pm_runtime_mark_last_busy(v3d->dev);
-	pm_runtime_put_autosuspend(v3d->dev);
+	pm_runtime_mark_last_busy(v3d->drm.dev);
+	pm_runtime_put_autosuspend(v3d->drm.dev);
 
 	return 0;
 }
@@ -258,10 +258,10 @@ static const struct drm_info_list v3d_debugfs_list[] = {
 	{"bo_stats", v3d_debugfs_bo_stats, 0},
 };
 
-int
+void
 v3d_debugfs_init(struct drm_minor *minor)
 {
-	return drm_debugfs_create_files(v3d_debugfs_list,
-					ARRAY_SIZE(v3d_debugfs_list),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(v3d_debugfs_list,
+				 ARRAY_SIZE(v3d_debugfs_list),
+				 minor->debugfs_root, minor);
 }
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index eaa8e9682373..82a7dfdd14c2 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -25,6 +25,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_managed.h>
 #include <uapi/drm/v3d_drm.h>
 
 #include "v3d_drv.h"
@@ -104,7 +105,7 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
 		if (args->value != 0)
 			return -EINVAL;
 
-		ret = pm_runtime_get_sync(v3d->dev);
+		ret = pm_runtime_get_sync(v3d->drm.dev);
 		if (ret < 0)
 			return ret;
 		if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 &&
@@ -113,8 +114,8 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
 		} else {
 			args->value = V3D_READ(offset);
 		}
-		pm_runtime_mark_last_busy(v3d->dev);
-		pm_runtime_put_autosuspend(v3d->dev);
+		pm_runtime_mark_last_busy(v3d->drm.dev);
+		pm_runtime_put_autosuspend(v3d->drm.dev);
 		return 0;
 	}
 
@@ -234,9 +235,9 @@ static int
 map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name)
 {
 	struct resource *res =
-		platform_get_resource_byname(v3d->pdev, IORESOURCE_MEM, name);
+		platform_get_resource_byname(v3d_to_pdev(v3d), IORESOURCE_MEM, name);
 
-	*regs = devm_ioremap_resource(v3d->dev, res);
+	*regs = devm_ioremap_resource(v3d->drm.dev, res);
 	return PTR_ERR_OR_ZERO(*regs);
 }
 
@@ -250,20 +251,21 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
 	u32 ident1;
 
 
-	v3d = kzalloc(sizeof(*v3d), GFP_KERNEL);
-	if (!v3d)
-		return -ENOMEM;
-	v3d->dev = dev;
-	v3d->pdev = pdev;
+	v3d = devm_drm_dev_alloc(dev, &v3d_drm_driver, struct v3d_dev, drm);
+	if (IS_ERR(v3d))
+		return PTR_ERR(v3d);
+
 	drm = &v3d->drm;
 
+	platform_set_drvdata(pdev, drm);
+
 	ret = map_regs(v3d, &v3d->hub_regs, "hub");
 	if (ret)
-		goto dev_free;
+		return ret;
 
 	ret = map_regs(v3d, &v3d->core_regs[0], "core0");
 	if (ret)
-		goto dev_free;
+		return ret;
 
 	mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);
 	dev->coherent_dma_mask =
@@ -281,45 +283,37 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
 		ret = PTR_ERR(v3d->reset);
 
 		if (ret == -EPROBE_DEFER)
-			goto dev_free;
+			return ret;
 
 		v3d->reset = NULL;
 		ret = map_regs(v3d, &v3d->bridge_regs, "bridge");
 		if (ret) {
 			dev_err(dev,
 				"Failed to get reset control or bridge regs\n");
-			goto dev_free;
+			return ret;
 		}
 	}
 
 	if (v3d->ver < 41) {
 		ret = map_regs(v3d, &v3d->gca_regs, "gca");
 		if (ret)
-			goto dev_free;
+			return ret;
 	}
 
 	v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr,
 					GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
 	if (!v3d->mmu_scratch) {
 		dev_err(dev, "Failed to allocate MMU scratch page\n");
-		ret = -ENOMEM;
-		goto dev_free;
+		return -ENOMEM;
 	}
 
 	pm_runtime_use_autosuspend(dev);
 	pm_runtime_set_autosuspend_delay(dev, 50);
 	pm_runtime_enable(dev);
 
-	ret = drm_dev_init(&v3d->drm, &v3d_drm_driver, dev);
-	if (ret)
-		goto dma_free;
-
-	platform_set_drvdata(pdev, drm);
-	drm->dev_private = v3d;
-
 	ret = v3d_gem_init(drm);
 	if (ret)
-		goto dev_destroy;
+		goto dma_free;
 
 	ret = v3d_irq_init(v3d);
 	if (ret)
@@ -335,12 +329,8 @@ irq_disable:
 	v3d_irq_disable(v3d);
 gem_destroy:
 	v3d_gem_destroy(drm);
-dev_destroy:
-	drm_dev_put(drm);
 dma_free:
 	dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
-dev_free:
-	kfree(v3d);
 	return ret;
 }
 
@@ -353,9 +343,8 @@ static int v3d_platform_drm_remove(struct platform_device *pdev)
 
 	v3d_gem_destroy(drm);
 
-	drm_dev_put(drm);
-
-	dma_free_wc(v3d->dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
+	dma_free_wc(v3d->drm.dev, 4096, v3d->mmu_scratch,
+		    v3d->mmu_scratch_paddr);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index ac2603334587..8a390738d65b 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -14,7 +14,6 @@
 #include "uapi/drm/v3d_drm.h"
 
 struct clk;
-struct device;
 struct platform_device;
 struct reset_control;
 
@@ -47,8 +46,6 @@ struct v3d_dev {
 	int ver;
 	bool single_irq_line;
 
-	struct device *dev;
-	struct platform_device *pdev;
 	void __iomem *hub_regs;
 	void __iomem *core_regs[3];
 	void __iomem *bridge_regs;
@@ -121,7 +118,7 @@ struct v3d_dev {
 static inline struct v3d_dev *
 to_v3d_dev(struct drm_device *dev)
 {
-	return (struct v3d_dev *)dev->dev_private;
+	return container_of(dev, struct v3d_dev, drm);
 }
 
 static inline bool
@@ -130,6 +127,8 @@ v3d_has_csd(struct v3d_dev *v3d)
 	return v3d->ver >= 41;
 }
 
+#define v3d_to_pdev(v3d) to_platform_device((v3d)->drm.dev)
+
 /* The per-fd struct, which tracks the MMU mappings. */
 struct v3d_file_priv {
 	struct v3d_dev *v3d;
@@ -316,7 +315,7 @@ struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev,
 						 struct sg_table *sgt);
 
 /* v3d_debugfs.c */
-int v3d_debugfs_init(struct drm_minor *minor);
+void v3d_debugfs_init(struct drm_minor *minor);
 
 /* v3d_fence.c */
 extern const struct dma_fence_ops v3d_fence_ops;
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 549dde83408b..09a7639cf161 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -370,8 +370,8 @@ v3d_job_free(struct kref *ref)
 	dma_fence_put(job->irq_fence);
 	dma_fence_put(job->done_fence);
 
-	pm_runtime_mark_last_busy(job->v3d->dev);
-	pm_runtime_put_autosuspend(job->v3d->dev);
+	pm_runtime_mark_last_busy(job->v3d->drm.dev);
+	pm_runtime_put_autosuspend(job->v3d->drm.dev);
 
 	kfree(job);
 }
@@ -439,7 +439,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	job->v3d = v3d;
 	job->free = free;
 
-	ret = pm_runtime_get_sync(v3d->dev);
+	ret = pm_runtime_get_sync(v3d->drm.dev);
 	if (ret < 0)
 		return ret;
 
@@ -458,7 +458,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	return 0;
 fail:
 	xa_destroy(&job->deps);
-	pm_runtime_put_autosuspend(v3d->dev);
+	pm_runtime_put_autosuspend(v3d->drm.dev);
 	return ret;
 }
 
@@ -886,12 +886,12 @@ v3d_gem_init(struct drm_device *dev)
 	 */
 	drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1);
 
-	v3d->pt = dma_alloc_wc(v3d->dev, pt_size,
+	v3d->pt = dma_alloc_wc(v3d->drm.dev, pt_size,
 			       &v3d->pt_paddr,
 			       GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
 	if (!v3d->pt) {
 		drm_mm_takedown(&v3d->mm);
-		dev_err(v3d->dev,
+		dev_err(v3d->drm.dev,
 			"Failed to allocate page tables. "
 			"Please ensure you have CMA enabled.\n");
 		return -ENOMEM;
@@ -903,7 +903,7 @@ v3d_gem_init(struct drm_device *dev)
 	ret = v3d_sched_init(v3d);
 	if (ret) {
 		drm_mm_takedown(&v3d->mm);
-		dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt,
+		dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt,
 				  v3d->pt_paddr);
 	}
 
@@ -925,5 +925,6 @@ v3d_gem_destroy(struct drm_device *dev)
 
 	drm_mm_takedown(&v3d->mm);
 
-	dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr);
+	dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt,
+			  v3d->pt_paddr);
 }
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index 662e67279a7b..51b65263c657 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -128,7 +128,7 @@ v3d_irq(int irq, void *arg)
 	 * always-allowed mode.
 	 */
 	if (intsts & V3D_INT_GMPV)
-		dev_err(v3d->dev, "GMP violation\n");
+		dev_err(v3d->drm.dev, "GMP violation\n");
 
 	/* V3D 4.2 wires the hub and core IRQs together, so if we &
 	 * didn't see the common one then check hub for MMU IRQs.
@@ -189,7 +189,7 @@ v3d_hub_irq(int irq, void *arg)
 				client = v3d41_axi_ids[axi_id];
 		}
 
-		dev_err(v3d->dev, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
+		dev_err(v3d->drm.dev, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
 			client, axi_id, (long long)vio_addr,
 			((intsts & V3D_HUB_INT_MMU_WRV) ?
 			 ", write violation" : ""),
@@ -217,16 +217,17 @@ v3d_irq_init(struct v3d_dev *v3d)
 		V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
 	V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
 
-	irq1 = platform_get_irq(v3d->pdev, 1);
+	irq1 = platform_get_irq(v3d_to_pdev(v3d), 1);
 	if (irq1 == -EPROBE_DEFER)
 		return irq1;
 	if (irq1 > 0) {
-		ret = devm_request_irq(v3d->dev, irq1,
+		ret = devm_request_irq(v3d->drm.dev, irq1,
 				       v3d_irq, IRQF_SHARED,
 				       "v3d_core0", v3d);
 		if (ret)
 			goto fail;
-		ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 0),
+		ret = devm_request_irq(v3d->drm.dev,
+				       platform_get_irq(v3d_to_pdev(v3d), 0),
 				       v3d_hub_irq, IRQF_SHARED,
 				       "v3d_hub", v3d);
 		if (ret)
@@ -234,7 +235,8 @@ v3d_irq_init(struct v3d_dev *v3d)
 	} else {
 		v3d->single_irq_line = true;
 
-		ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 0),
+		ret = devm_request_irq(v3d->drm.dev,
+				       platform_get_irq(v3d_to_pdev(v3d), 0),
 				       v3d_irq, IRQF_SHARED,
 				       "v3d", v3d);
 		if (ret)
@@ -246,7 +248,7 @@ v3d_irq_init(struct v3d_dev *v3d)
 
 fail:
 	if (ret != -EPROBE_DEFER)
-		dev_err(v3d->dev, "IRQ setup failed: %d\n", ret);
+		dev_err(v3d->drm.dev, "IRQ setup failed: %d\n", ret);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c
index 395e81d97163..3b81ea28c0bb 100644
--- a/drivers/gpu/drm/v3d/v3d_mmu.c
+++ b/drivers/gpu/drm/v3d/v3d_mmu.c
@@ -40,7 +40,7 @@ static int v3d_mmu_flush_all(struct v3d_dev *v3d)
 	ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
 			 V3D_MMU_CTL_TLB_CLEARING), 100);
 	if (ret)
-		dev_err(v3d->dev, "TLB clear wait idle pre-wait failed\n");
+		dev_err(v3d->drm.dev, "TLB clear wait idle pre-wait failed\n");
 
 	V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) |
 		  V3D_MMU_CTL_TLB_CLEAR);
@@ -52,14 +52,14 @@ static int v3d_mmu_flush_all(struct v3d_dev *v3d)
 	ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
 			 V3D_MMU_CTL_TLB_CLEARING), 100);
 	if (ret) {
-		dev_err(v3d->dev, "TLB clear wait idle failed\n");
+		dev_err(v3d->drm.dev, "TLB clear wait idle failed\n");
 		return ret;
 	}
 
 	ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) &
 			 V3D_MMUC_CONTROL_FLUSHING), 100);
 	if (ret)
-		dev_err(v3d->dev, "MMUC flush wait idle failed\n");
+		dev_err(v3d->drm.dev, "MMUC flush wait idle failed\n");
 
 	return ret;
 }
@@ -109,7 +109,7 @@ void v3d_mmu_insert_ptes(struct v3d_bo *bo)
 		     shmem_obj->base.size >> V3D_MMU_PAGE_SHIFT);
 
 	if (v3d_mmu_flush_all(v3d))
-		dev_err(v3d->dev, "MMU flush timeout\n");
+		dev_err(v3d->drm.dev, "MMU flush timeout\n");
 }
 
 void v3d_mmu_remove_ptes(struct v3d_bo *bo)
@@ -122,5 +122,5 @@ void v3d_mmu_remove_ptes(struct v3d_bo *bo)
 		v3d->pt[page] = 0;
 
 	if (v3d_mmu_flush_all(v3d))
-		dev_err(v3d->dev, "MMU flush timeout\n");
+		dev_err(v3d->drm.dev, "MMU flush timeout\n");
 }
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 8c2df6d95283..0747614a78f0 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -403,7 +403,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 			     msecs_to_jiffies(hang_limit_ms),
 			     "v3d_bin");
 	if (ret) {
-		dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
+		dev_err(v3d->drm.dev, "Failed to create bin scheduler: %d.", ret);
 		return ret;
 	}
 
@@ -413,7 +413,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 			     msecs_to_jiffies(hang_limit_ms),
 			     "v3d_render");
 	if (ret) {
-		dev_err(v3d->dev, "Failed to create render scheduler: %d.",
+		dev_err(v3d->drm.dev, "Failed to create render scheduler: %d.",
 			ret);
 		v3d_sched_fini(v3d);
 		return ret;
@@ -425,7 +425,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 			     msecs_to_jiffies(hang_limit_ms),
 			     "v3d_tfu");
 	if (ret) {
-		dev_err(v3d->dev, "Failed to create TFU scheduler: %d.",
+		dev_err(v3d->drm.dev, "Failed to create TFU scheduler: %d.",
 			ret);
 		v3d_sched_fini(v3d);
 		return ret;
@@ -438,7 +438,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 				     msecs_to_jiffies(hang_limit_ms),
 				     "v3d_csd");
 		if (ret) {
-			dev_err(v3d->dev, "Failed to create CSD scheduler: %d.",
+			dev_err(v3d->drm.dev, "Failed to create CSD scheduler: %d.",
 				ret);
 			v3d_sched_fini(v3d);
 			return ret;
@@ -450,7 +450,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 				     msecs_to_jiffies(hang_limit_ms),
 				     "v3d_cache_clean");
 		if (ret) {
-			dev_err(v3d->dev, "Failed to create CACHE_CLEAN scheduler: %d.",
+			dev_err(v3d->drm.dev, "Failed to create CACHE_CLEAN scheduler: %d.",
 				ret);
 			v3d_sched_fini(v3d);
 			return ret;
diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c
index ac8f75db2ecd..cf2e3e6a2388 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_drv.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c
@@ -17,6 +17,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_file.h>
 #include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
 
 #include "vbox_drv.h"
 
@@ -45,28 +46,22 @@ static int vbox_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		return ret;
 
-	vbox = kzalloc(sizeof(*vbox), GFP_KERNEL);
-	if (!vbox)
-		return -ENOMEM;
-
-	ret = drm_dev_init(&vbox->ddev, &driver, &pdev->dev);
-	if (ret) {
-		kfree(vbox);
-		return ret;
-	}
+	vbox = devm_drm_dev_alloc(&pdev->dev, &driver,
+				  struct vbox_private, ddev);
+	if (IS_ERR(vbox))
+		return PTR_ERR(vbox);
 
 	vbox->ddev.pdev = pdev;
-	vbox->ddev.dev_private = vbox;
 	pci_set_drvdata(pdev, vbox);
 	mutex_init(&vbox->hw_mutex);
 
-	ret = pci_enable_device(pdev);
+	ret = pcim_enable_device(pdev);
 	if (ret)
-		goto err_dev_put;
+		return ret;
 
 	ret = vbox_hw_init(vbox);
 	if (ret)
-		goto err_pci_disable;
+		return ret;
 
 	ret = vbox_mm_init(vbox);
 	if (ret)
@@ -80,14 +75,12 @@ static int vbox_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto err_mode_fini;
 
-	ret = drm_fbdev_generic_setup(&vbox->ddev, 32);
-	if (ret)
-		goto err_irq_fini;
-
 	ret = drm_dev_register(&vbox->ddev, 0);
 	if (ret)
 		goto err_irq_fini;
 
+	drm_fbdev_generic_setup(&vbox->ddev, 32);
+
 	return 0;
 
 err_irq_fini:
@@ -98,10 +91,6 @@ err_mm_fini:
 	vbox_mm_fini(vbox);
 err_hw_fini:
 	vbox_hw_fini(vbox);
-err_pci_disable:
-	pci_disable_device(pdev);
-err_dev_put:
-	drm_dev_put(&vbox->ddev);
 	return ret;
 }
 
@@ -114,7 +103,6 @@ static void vbox_pci_remove(struct pci_dev *pdev)
 	vbox_mode_fini(vbox);
 	vbox_mm_fini(vbox);
 	vbox_hw_fini(vbox);
-	drm_dev_put(&vbox->ddev);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.h b/drivers/gpu/drm/vboxvideo/vbox_drv.h
index 87421903816c..ac7c2effc46f 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_drv.h
+++ b/drivers/gpu/drm/vboxvideo/vbox_drv.h
@@ -127,6 +127,7 @@ struct vbox_encoder {
 #define to_vbox_crtc(x) container_of(x, struct vbox_crtc, base)
 #define to_vbox_connector(x) container_of(x, struct vbox_connector, base)
 #define to_vbox_encoder(x) container_of(x, struct vbox_encoder, base)
+#define to_vbox_dev(x) container_of(x, struct vbox_private, ddev)
 
 bool vbox_check_supported(u16 id);
 int vbox_hw_init(struct vbox_private *vbox);
diff --git a/drivers/gpu/drm/vboxvideo/vbox_irq.c b/drivers/gpu/drm/vboxvideo/vbox_irq.c
index 16a1e29f5292..631657fa554f 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_irq.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_irq.c
@@ -34,7 +34,7 @@ void vbox_report_hotplug(struct vbox_private *vbox)
 irqreturn_t vbox_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = (struct drm_device *)arg;
-	struct vbox_private *vbox = (struct vbox_private *)dev->dev_private;
+	struct vbox_private *vbox = to_vbox_dev(dev);
 	u32 host_flags = vbox_get_flags(vbox);
 
 	if (!(host_flags & HGSMIHOSTFLAGS_IRQ))
diff --git a/drivers/gpu/drm/vboxvideo/vbox_main.c b/drivers/gpu/drm/vboxvideo/vbox_main.c
index 9dcab115a261..d68d9bad7674 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_main.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_main.c
@@ -71,8 +71,6 @@ static void vbox_accel_fini(struct vbox_private *vbox)
 
 	for (i = 0; i < vbox->num_crtcs; ++i)
 		vbva_disable(&vbox->vbva_info[i], vbox->guest_pool, i);
-
-	pci_iounmap(vbox->ddev.pdev, vbox->vbva_buffers);
 }
 
 /* Do we support the 4.3 plus mode hint reporting interface? */
@@ -123,21 +121,22 @@ int vbox_hw_init(struct vbox_private *vbox)
 		return -ENOMEM;
 
 	/* Create guest-heap mem-pool use 2^4 = 16 byte chunks */
-	vbox->guest_pool = gen_pool_create(4, -1);
+	vbox->guest_pool = devm_gen_pool_create(vbox->ddev.dev, 4, -1,
+						"vboxvideo-accel");
 	if (!vbox->guest_pool)
-		goto err_unmap_guest_heap;
+		return -ENOMEM;
 
 	ret = gen_pool_add_virt(vbox->guest_pool,
 				(unsigned long)vbox->guest_heap,
 				GUEST_HEAP_OFFSET(vbox),
 				GUEST_HEAP_USABLE_SIZE, -1);
 	if (ret)
-		goto err_destroy_guest_pool;
+		return ret;
 
 	ret = hgsmi_test_query_conf(vbox->guest_pool);
 	if (ret) {
 		DRM_ERROR("vboxvideo: hgsmi_test_query_conf failed\n");
-		goto err_destroy_guest_pool;
+		return ret;
 	}
 
 	/* Reduce available VRAM size to reflect the guest heap. */
@@ -149,33 +148,23 @@ int vbox_hw_init(struct vbox_private *vbox)
 
 	if (!have_hgsmi_mode_hints(vbox)) {
 		ret = -ENOTSUPP;
-		goto err_destroy_guest_pool;
+		return ret;
 	}
 
 	vbox->last_mode_hints = devm_kcalloc(vbox->ddev.dev, vbox->num_crtcs,
 					     sizeof(struct vbva_modehint),
 					     GFP_KERNEL);
-	if (!vbox->last_mode_hints) {
-		ret = -ENOMEM;
-		goto err_destroy_guest_pool;
-	}
+	if (!vbox->last_mode_hints)
+		return -ENOMEM;
 
 	ret = vbox_accel_init(vbox);
 	if (ret)
-		goto err_destroy_guest_pool;
+		return ret;
 
 	return 0;
-
-err_destroy_guest_pool:
-	gen_pool_destroy(vbox->guest_pool);
-err_unmap_guest_heap:
-	pci_iounmap(vbox->ddev.pdev, vbox->guest_heap);
-	return ret;
 }
 
 void vbox_hw_fini(struct vbox_private *vbox)
 {
 	vbox_accel_fini(vbox);
-	gen_pool_destroy(vbox->guest_pool);
-	pci_iounmap(vbox->ddev.pdev, vbox->guest_heap);
 }
diff --git a/drivers/gpu/drm/vboxvideo/vbox_mode.c b/drivers/gpu/drm/vboxvideo/vbox_mode.c
index 0883a435e62b..d9a5af62af89 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_mode.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_mode.c
@@ -36,7 +36,7 @@ static void vbox_do_modeset(struct drm_crtc *crtc)
 	u16 flags;
 	s32 x_offset, y_offset;
 
-	vbox = crtc->dev->dev_private;
+	vbox = to_vbox_dev(crtc->dev);
 	width = vbox_crtc->width ? vbox_crtc->width : 640;
 	height = vbox_crtc->height ? vbox_crtc->height : 480;
 	bpp = fb ? fb->format->cpp[0] * 8 : 32;
@@ -77,7 +77,7 @@ static void vbox_do_modeset(struct drm_crtc *crtc)
 static int vbox_set_view(struct drm_crtc *crtc)
 {
 	struct vbox_crtc *vbox_crtc = to_vbox_crtc(crtc);
-	struct vbox_private *vbox = crtc->dev->dev_private;
+	struct vbox_private *vbox = to_vbox_dev(crtc->dev);
 	struct vbva_infoview *p;
 
 	/*
@@ -174,7 +174,7 @@ static void vbox_crtc_set_base_and_mode(struct drm_crtc *crtc,
 					int x, int y)
 {
 	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(fb->obj[0]);
-	struct vbox_private *vbox = crtc->dev->dev_private;
+	struct vbox_private *vbox = to_vbox_dev(crtc->dev);
 	struct vbox_crtc *vbox_crtc = to_vbox_crtc(crtc);
 	bool needs_modeset = drm_atomic_crtc_needs_modeset(crtc->state);
 
@@ -272,7 +272,7 @@ static void vbox_primary_atomic_update(struct drm_plane *plane,
 {
 	struct drm_crtc *crtc = plane->state->crtc;
 	struct drm_framebuffer *fb = plane->state->fb;
-	struct vbox_private *vbox = fb->dev->dev_private;
+	struct vbox_private *vbox = to_vbox_dev(fb->dev);
 	struct drm_mode_rect *clips;
 	uint32_t num_clips, i;
 
@@ -704,7 +704,7 @@ static int vbox_get_modes(struct drm_connector *connector)
 	int preferred_width, preferred_height;
 
 	vbox_connector = to_vbox_connector(connector);
-	vbox = connector->dev->dev_private;
+	vbox = to_vbox_dev(connector->dev);
 
 	hgsmi_report_flags_location(vbox->guest_pool, GUEST_HEAP_OFFSET(vbox) +
 				    HOST_FLAGS_OFFSET);
diff --git a/drivers/gpu/drm/vboxvideo/vbox_ttm.c b/drivers/gpu/drm/vboxvideo/vbox_ttm.c
index 976423d0c3cc..f5a06675da43 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_ttm.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_ttm.c
@@ -24,25 +24,13 @@ int vbox_mm_init(struct vbox_private *vbox)
 		return ret;
 	}
 
-#ifdef DRM_MTRR_WC
-	vbox->fb_mtrr = drm_mtrr_add(pci_resource_start(dev->pdev, 0),
-				     pci_resource_len(dev->pdev, 0),
-				     DRM_MTRR_WC);
-#else
 	vbox->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
 					 pci_resource_len(dev->pdev, 0));
-#endif
 	return 0;
 }
 
 void vbox_mm_fini(struct vbox_private *vbox)
 {
-#ifdef DRM_MTRR_WC
-	drm_mtrr_del(vbox->fb_mtrr,
-		     pci_resource_start(vbox->ddev.pdev, 0),
-		     pci_resource_len(vbox->ddev.pdev, 0), DRM_MTRR_WC);
-#else
 	arch_phys_wc_del(vbox->fb_mtrr);
-#endif
 	drm_vram_helper_release_mm(&vbox->ddev);
 }
diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c
index b61b2d3407b5..4fbbf980a299 100644
--- a/drivers/gpu/drm/vc4/vc4_debugfs.c
+++ b/drivers/gpu/drm/vc4/vc4_debugfs.c
@@ -20,7 +20,7 @@ struct vc4_debugfs_info_entry {
  * Called at drm_dev_register() time on each of the minors registered
  * by the DRM device, to attach the debugfs files.
  */
-int
+void
 vc4_debugfs_init(struct drm_minor *minor)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(minor->dev);
@@ -30,14 +30,9 @@ vc4_debugfs_init(struct drm_minor *minor)
 			    minor->debugfs_root, &vc4->load_tracker_enabled);
 
 	list_for_each_entry(entry, &vc4->debugfs_list, link) {
-		int ret = drm_debugfs_create_files(&entry->info, 1,
-						   minor->debugfs_root, minor);
-
-		if (ret)
-			return ret;
+		drm_debugfs_create_files(&entry->info, 1,
+					 minor->debugfs_root, minor);
 	}
-
-	return 0;
 }
 
 static int vc4_debugfs_regset32(struct seq_file *m, void *unused)
diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
index 6dfede03396e..a90f2545baee 100644
--- a/drivers/gpu/drm/vc4/vc4_dpi.c
+++ b/drivers/gpu/drm/vc4/vc4_dpi.c
@@ -17,6 +17,7 @@
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/of_graph.h>
@@ -114,10 +115,6 @@ static const struct debugfs_reg32 dpi_regs[] = {
 	VC4_REG32(DPI_ID),
 };
 
-static const struct drm_encoder_funcs vc4_dpi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static void vc4_dpi_encoder_disable(struct drm_encoder *encoder)
 {
 	struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder);
@@ -309,8 +306,7 @@ static int vc4_dpi_bind(struct device *dev, struct device *master, void *data)
 	if (ret)
 		DRM_ERROR("Failed to turn on core clock: %d\n", ret);
 
-	drm_encoder_init(drm, dpi->encoder, &vc4_dpi_encoder_funcs,
-			 DRM_MODE_ENCODER_DPI, NULL);
+	drm_simple_encoder_init(drm, dpi->encoder, DRM_MODE_ENCODER_DPI);
 	drm_encoder_helper_add(dpi->encoder, &vc4_dpi_encoder_helper_funcs);
 
 	ret = vc4_dpi_init_bridge(dpi);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 139d25a8328e..3b1f02efefbe 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -759,7 +759,7 @@ void vc4_crtc_get_margins(struct drm_crtc_state *state,
 			  unsigned int *top, unsigned int *bottom);
 
 /* vc4_debugfs.c */
-int vc4_debugfs_init(struct drm_minor *minor);
+void vc4_debugfs_init(struct drm_minor *minor);
 #ifdef CONFIG_DEBUG_FS
 void vc4_debugfs_add_file(struct drm_device *drm,
 			  const char *filename,
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index d99b1d526651..eaf276978ee7 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -37,6 +37,7 @@
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "vc4_drv.h"
 #include "vc4_regs.h"
@@ -652,15 +653,6 @@ static const struct debugfs_reg32 dsi1_regs[] = {
 	VC4_REG32(DSI1_ID),
 };
 
-static void vc4_dsi_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs vc4_dsi_encoder_funcs = {
-	.destroy = vc4_dsi_encoder_destroy,
-};
-
 static void vc4_dsi_latch_ulps(struct vc4_dsi *dsi, bool latch)
 {
 	u32 afec0 = DSI_PORT_READ(PHY_AFEC0);
@@ -1615,8 +1607,7 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 	if (dsi->port == 1)
 		vc4->dsi1 = dsi;
 
-	drm_encoder_init(drm, dsi->encoder, &vc4_dsi_encoder_funcs,
-			 DRM_MODE_ENCODER_DSI, NULL);
+	drm_simple_encoder_init(drm, dsi->encoder, DRM_MODE_ENCODER_DSI);
 	drm_encoder_helper_add(dsi->encoder, &vc4_dsi_encoder_helper_funcs);
 
 	ret = drm_bridge_attach(dsi->encoder, dsi->bridge, NULL, 0);
@@ -1656,7 +1647,7 @@ static void vc4_dsi_unbind(struct device *dev, struct device *master,
 	 * normally.
 	 */
 	list_splice_init(&dsi->bridge_chain, &dsi->encoder->bridge_chain);
-	vc4_dsi_encoder_destroy(dsi->encoder);
+	drm_encoder_cleanup(dsi->encoder);
 
 	if (dsi->port == 1)
 		vc4->dsi1 = NULL;
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 340719238753..625bfcf52dc4 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -34,6 +34,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/i2c.h>
@@ -306,15 +307,6 @@ static struct drm_connector *vc4_hdmi_connector_init(struct drm_device *dev,
 	return connector;
 }
 
-static void vc4_hdmi_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs vc4_hdmi_encoder_funcs = {
-	.destroy = vc4_hdmi_encoder_destroy,
-};
-
 static int vc4_hdmi_stop_packet(struct drm_encoder *encoder,
 				enum hdmi_infoframe_type type)
 {
@@ -1406,8 +1398,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 	}
 	pm_runtime_enable(dev);
 
-	drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm, hdmi->encoder, DRM_MODE_ENCODER_TMDS);
 	drm_encoder_helper_add(hdmi->encoder, &vc4_hdmi_encoder_helper_funcs);
 
 	hdmi->connector =
@@ -1465,7 +1456,7 @@ err_destroy_conn:
 	vc4_hdmi_connector_destroy(hdmi->connector);
 #endif
 err_destroy_encoder:
-	vc4_hdmi_encoder_destroy(hdmi->encoder);
+	drm_encoder_cleanup(hdmi->encoder);
 err_unprepare_hsm:
 	clk_disable_unprepare(hdmi->hsm_clock);
 	pm_runtime_disable(dev);
@@ -1484,7 +1475,7 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master,
 
 	cec_unregister_adapter(hdmi->cec_adap);
 	vc4_hdmi_connector_destroy(hdmi->connector);
-	vc4_hdmi_encoder_destroy(hdmi->encoder);
+	drm_encoder_cleanup(hdmi->encoder);
 
 	clk_disable_unprepare(hdmi->hsm_clock);
 	pm_runtime_disable(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c
index 7402bc768664..bd5b8eb58b18 100644
--- a/drivers/gpu/drm/vc4/vc4_vec.c
+++ b/drivers/gpu/drm/vc4/vc4_vec.c
@@ -17,6 +17,7 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/of_graph.h>
@@ -374,10 +375,6 @@ static struct drm_connector *vc4_vec_connector_init(struct drm_device *dev,
 	return connector;
 }
 
-static const struct drm_encoder_funcs vc4_vec_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static void vc4_vec_encoder_disable(struct drm_encoder *encoder)
 {
 	struct vc4_vec_encoder *vc4_vec_encoder = to_vc4_vec_encoder(encoder);
@@ -566,8 +563,7 @@ static int vc4_vec_bind(struct device *dev, struct device *master, void *data)
 
 	pm_runtime_enable(dev);
 
-	drm_encoder_init(drm, vec->encoder, &vc4_vec_encoder_funcs,
-			 DRM_MODE_ENCODER_TVDAC, NULL);
+	drm_simple_encoder_init(drm, vec->encoder, DRM_MODE_ENCODER_TVDAC);
 	drm_encoder_helper_add(vec->encoder, &vc4_vec_encoder_helper_funcs);
 
 	vec->connector = vc4_vec_connector_init(drm, vec);
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 909eba43664a..ec1a8ebb6f1b 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -39,6 +39,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_prime.h>
 
 #include "vgem_drv.h"
@@ -431,9 +432,6 @@ static void vgem_release(struct drm_device *dev)
 	struct vgem_device *vgem = container_of(dev, typeof(*vgem), drm);
 
 	platform_device_unregister(vgem->platform);
-	drm_dev_fini(&vgem->drm);
-
-	kfree(vgem);
 }
 
 static struct drm_driver vgem_driver = {
@@ -489,16 +487,19 @@ static int __init vgem_init(void)
 			   &vgem_device->platform->dev);
 	if (ret)
 		goto out_unregister;
+	drmm_add_final_kfree(&vgem_device->drm, vgem_device);
 
 	/* Final step: expose the device/driver to userspace */
-	ret  = drm_dev_register(&vgem_device->drm, 0);
+	ret = drm_dev_register(&vgem_device->drm, 0);
 	if (ret)
-		goto out_fini;
+		goto out_put;
 
 	return 0;
 
-out_fini:
-	drm_dev_fini(&vgem_device->drm);
+out_put:
+	drm_dev_put(&vgem_device->drm);
+	return ret;
+
 out_unregister:
 	platform_device_unregister(vgem_device->platform);
 out_free:
diff --git a/drivers/gpu/drm/virtio/virtgpu_debugfs.c b/drivers/gpu/drm/virtio/virtgpu_debugfs.c
index e27120d512b0..3221520f61f0 100644
--- a/drivers/gpu/drm/virtio/virtgpu_debugfs.c
+++ b/drivers/gpu/drm/virtio/virtgpu_debugfs.c
@@ -72,11 +72,10 @@ static struct drm_info_list virtio_gpu_debugfs_list[] = {
 
 #define VIRTIO_GPU_DEBUGFS_ENTRIES ARRAY_SIZE(virtio_gpu_debugfs_list)
 
-int
+void
 virtio_gpu_debugfs_init(struct drm_minor *minor)
 {
 	drm_debugfs_create_files(virtio_gpu_debugfs_list,
 				 VIRTIO_GPU_DEBUGFS_ENTRIES,
 				 minor->debugfs_root, minor);
-	return 0;
 }
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index 2b7e6ae65546..cc7fd957a307 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -30,6 +30,7 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "virtgpu_drv.h"
 
@@ -240,10 +241,6 @@ static const struct drm_connector_funcs virtio_gpu_connector_funcs = {
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
-static const struct drm_encoder_funcs virtio_gpu_enc_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int vgdev_output_init(struct virtio_gpu_device *vgdev, int index)
 {
 	struct drm_device *dev = vgdev->ddev;
@@ -276,8 +273,7 @@ static int vgdev_output_init(struct virtio_gpu_device *vgdev, int index)
 	if (vgdev->has_edid)
 		drm_connector_attach_edid_property(connector);
 
-	drm_encoder_init(dev, encoder, &virtio_gpu_enc_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL, NULL);
+	drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_VIRTUAL);
 	drm_encoder_helper_add(encoder, &virtio_gpu_enc_helper_funcs);
 	encoder->possible_crtcs = 1 << index;
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 7879ff58236f..9ff9f4ac0522 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -218,27 +218,19 @@ struct virtio_gpu_fpriv {
 	struct mutex context_lock;
 };
 
-/* virtio_ioctl.c */
+/* virtgpu_ioctl.c */
 #define DRM_VIRTIO_NUM_IOCTLS 10
 extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS];
 void virtio_gpu_create_context(struct drm_device *dev, struct drm_file *file);
 
-/* virtio_kms.c */
+/* virtgpu_kms.c */
 int virtio_gpu_init(struct drm_device *dev);
 void virtio_gpu_deinit(struct drm_device *dev);
 void virtio_gpu_release(struct drm_device *dev);
 int virtio_gpu_driver_open(struct drm_device *dev, struct drm_file *file);
 void virtio_gpu_driver_postclose(struct drm_device *dev, struct drm_file *file);
 
-/* virtio_gem.c */
-void virtio_gpu_gem_free_object(struct drm_gem_object *gem_obj);
-int virtio_gpu_gem_init(struct virtio_gpu_device *vgdev);
-void virtio_gpu_gem_fini(struct virtio_gpu_device *vgdev);
-int virtio_gpu_gem_create(struct drm_file *file,
-			  struct drm_device *dev,
-			  struct virtio_gpu_object_params *params,
-			  struct drm_gem_object **obj_p,
-			  uint32_t *handle_p);
+/* virtgpu_gem.c */
 int virtio_gpu_gem_object_open(struct drm_gem_object *obj,
 			       struct drm_file *file);
 void virtio_gpu_gem_object_close(struct drm_gem_object *obj,
@@ -264,7 +256,7 @@ void virtio_gpu_array_put_free_delayed(struct virtio_gpu_device *vgdev,
 				       struct virtio_gpu_object_array *objs);
 void virtio_gpu_array_put_free_work(struct work_struct *work);
 
-/* virtio vg */
+/* virtgpu_vq.c */
 int virtio_gpu_alloc_vbufs(struct virtio_gpu_device *vgdev);
 void virtio_gpu_free_vbufs(struct virtio_gpu_device *vgdev);
 void virtio_gpu_cmd_create_resource(struct virtio_gpu_device *vgdev,
@@ -288,10 +280,10 @@ void virtio_gpu_cmd_set_scanout(struct virtio_gpu_device *vgdev,
 				uint32_t scanout_id, uint32_t resource_id,
 				uint32_t width, uint32_t height,
 				uint32_t x, uint32_t y);
-int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
-			     struct virtio_gpu_object *obj,
-			     struct virtio_gpu_mem_entry *ents,
-			     unsigned int nents);
+void virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
+			      struct virtio_gpu_object *obj,
+			      struct virtio_gpu_mem_entry *ents,
+			      unsigned int nents);
 int virtio_gpu_attach_status_page(struct virtio_gpu_device *vgdev);
 int virtio_gpu_detach_status_page(struct virtio_gpu_device *vgdev);
 void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev,
@@ -344,17 +336,17 @@ void virtio_gpu_dequeue_fence_func(struct work_struct *work);
 
 void virtio_gpu_notify(struct virtio_gpu_device *vgdev);
 
-/* virtio_gpu_display.c */
+/* virtgpu_display.c */
 void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
 void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
 
-/* virtio_gpu_plane.c */
+/* virtgpu_plane.c */
 uint32_t virtio_gpu_translate_format(uint32_t drm_fourcc);
 struct drm_plane *virtio_gpu_plane_init(struct virtio_gpu_device *vgdev,
 					enum drm_plane_type type,
 					int index);
 
-/* virtio_gpu_fence.c */
+/* virtgpu_fence.c */
 struct virtio_gpu_fence *virtio_gpu_fence_alloc(
 	struct virtio_gpu_device *vgdev);
 void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
@@ -363,7 +355,7 @@ void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev,
 				    u64 last_seq);
 
-/* virtio_gpu_object */
+/* virtgpu_object.c */
 void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo);
 struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev,
 						size_t size);
@@ -379,7 +371,7 @@ struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
 	struct drm_device *dev, struct dma_buf_attachment *attach,
 	struct sg_table *sgt);
 
-/* virgl debugfs */
-int virtio_gpu_debugfs_init(struct drm_minor *minor);
+/* virtgpu_debugfs.c */
+void virtio_gpu_debugfs_init(struct drm_minor *minor);
 
 #endif
diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
index f0d5a8974677..d6cb350ae52a 100644
--- a/drivers/gpu/drm/virtio/virtgpu_gem.c
+++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
@@ -28,11 +28,11 @@
 
 #include "virtgpu_drv.h"
 
-int virtio_gpu_gem_create(struct drm_file *file,
-			  struct drm_device *dev,
-			  struct virtio_gpu_object_params *params,
-			  struct drm_gem_object **obj_p,
-			  uint32_t *handle_p)
+static int virtio_gpu_gem_create(struct drm_file *file,
+				 struct drm_device *dev,
+				 struct virtio_gpu_object_params *params,
+				 struct drm_gem_object **obj_p,
+				 uint32_t *handle_p)
 {
 	struct virtio_gpu_device *vgdev = dev->dev_private;
 	struct virtio_gpu_object *obj;
@@ -117,7 +117,7 @@ int virtio_gpu_gem_object_open(struct drm_gem_object *obj,
 	struct virtio_gpu_object_array *objs;
 
 	if (!vgdev->has_virgl_3d)
-		return 0;
+		goto out_notify;
 
 	objs = virtio_gpu_array_alloc(1);
 	if (!objs)
@@ -126,6 +126,7 @@ int virtio_gpu_gem_object_open(struct drm_gem_object *obj,
 
 	virtio_gpu_cmd_context_attach_resource(vgdev, vfpriv->ctx_id,
 					       objs);
+out_notify:
 	virtio_gpu_notify(vgdev);
 	return 0;
 }
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 512daff92038..5df722072ba0 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -47,7 +47,6 @@ void virtio_gpu_create_context(struct drm_device *dev, struct drm_file *file)
 	get_task_comm(dbgname, current);
 	virtio_gpu_cmd_context_create(vgdev, vfpriv->ctx_id,
 				      strlen(dbgname), dbgname);
-	virtio_gpu_notify(vgdev);
 	vfpriv->context_created = true;
 
 out_unlock:
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
index d9039bb7c5e3..6ccbd01cd888 100644
--- a/drivers/gpu/drm/virtio/virtgpu_object.c
+++ b/drivers/gpu/drm/virtio/virtgpu_object.c
@@ -235,13 +235,8 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
 		return ret;
 	}
 
-	ret = virtio_gpu_object_attach(vgdev, bo, ents, nents);
-	if (ret != 0) {
-		virtio_gpu_free_object(&shmem_obj->base);
-		return ret;
-	}
+	virtio_gpu_object_attach(vgdev, bo, ents, nents);
 
-	virtio_gpu_notify(vgdev);
 	*bo_ptr = bo;
 	return 0;
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index 73854915ec34..9e663a5d9952 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -1087,14 +1087,13 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
 	virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence);
 }
 
-int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
-			     struct virtio_gpu_object *obj,
-			     struct virtio_gpu_mem_entry *ents,
-			     unsigned int nents)
+void virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
+			      struct virtio_gpu_object *obj,
+			      struct virtio_gpu_mem_entry *ents,
+			      unsigned int nents)
 {
 	virtio_gpu_cmd_resource_attach_backing(vgdev, obj->hw_res_handle,
 					       ents, nents, NULL);
-	return 0;
 }
 
 void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev,
diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c
index 860de052e820..1e8b2169d834 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.c
+++ b/drivers/gpu/drm/vkms/vkms_drv.c
@@ -21,6 +21,7 @@
 #include <drm/drm_file.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
 
@@ -34,7 +35,7 @@
 
 static struct vkms_device *vkms_device;
 
-bool enable_cursor;
+bool enable_cursor = true;
 module_param_named(enable_cursor, enable_cursor, bool, 0444);
 MODULE_PARM_DESC(enable_cursor, "Enable/Disable cursor support");
 
@@ -63,7 +64,6 @@ static void vkms_release(struct drm_device *dev)
 	platform_device_unregister(vkms->platform);
 	drm_atomic_helper_shutdown(&vkms->drm);
 	drm_mode_config_cleanup(&vkms->drm);
-	drm_dev_fini(&vkms->drm);
 	destroy_workqueue(vkms->output.composer_workq);
 }
 
@@ -158,13 +158,14 @@ static int __init vkms_init(void)
 			   &vkms_device->platform->dev);
 	if (ret)
 		goto out_unregister;
+	drmm_add_final_kfree(&vkms_device->drm, vkms_device);
 
 	ret = dma_coerce_mask_and_coherent(vkms_device->drm.dev,
 					   DMA_BIT_MASK(64));
 
 	if (ret) {
 		DRM_ERROR("Could not initialize DMA support\n");
-		goto out_fini;
+		goto out_put;
 	}
 
 	vkms_device->drm.irq_enabled = true;
@@ -172,25 +173,25 @@ static int __init vkms_init(void)
 	ret = drm_vblank_init(&vkms_device->drm, 1);
 	if (ret) {
 		DRM_ERROR("Failed to vblank\n");
-		goto out_fini;
+		goto out_put;
 	}
 
 	ret = vkms_modeset_init(vkms_device);
 	if (ret)
-		goto out_fini;
+		goto out_put;
 
 	ret = drm_dev_register(&vkms_device->drm, 0);
 	if (ret)
-		goto out_fini;
+		goto out_put;
 
 	return 0;
 
-out_fini:
-	drm_dev_fini(&vkms_device->drm);
+out_put:
+	drm_dev_put(&vkms_device->drm);
+	return ret;
 
 out_unregister:
 	platform_device_unregister(vkms_device->platform);
-
 out_free:
 	kfree(vkms_device);
 	return ret;
@@ -205,8 +206,6 @@ static void __exit vkms_exit(void)
 
 	drm_dev_unregister(&vkms_device->drm);
 	drm_dev_put(&vkms_device->drm);
-
-	kfree(vkms_device);
 }
 
 module_init(vkms_init);
diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h
index eda04ffba7b1..f4036bb0b9a8 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.h
+++ b/drivers/gpu/drm/vkms/vkms_drv.h
@@ -117,11 +117,6 @@ struct drm_plane *vkms_plane_init(struct vkms_device *vkmsdev,
 				  enum drm_plane_type type, int index);
 
 /* Gem stuff */
-struct drm_gem_object *vkms_gem_create(struct drm_device *dev,
-				       struct drm_file *file,
-				       u32 *handle,
-				       u64 size);
-
 vm_fault_t vkms_gem_fault(struct vm_fault *vmf);
 
 int vkms_dumb_create(struct drm_file *file, struct drm_device *dev,
diff --git a/drivers/gpu/drm/vkms/vkms_gem.c b/drivers/gpu/drm/vkms/vkms_gem.c
index 2e01186fb943..c541fec57566 100644
--- a/drivers/gpu/drm/vkms/vkms_gem.c
+++ b/drivers/gpu/drm/vkms/vkms_gem.c
@@ -97,10 +97,10 @@ vm_fault_t vkms_gem_fault(struct vm_fault *vmf)
 	return ret;
 }
 
-struct drm_gem_object *vkms_gem_create(struct drm_device *dev,
-				       struct drm_file *file,
-				       u32 *handle,
-				       u64 size)
+static struct drm_gem_object *vkms_gem_create(struct drm_device *dev,
+					      struct drm_file *file,
+					      u32 *handle,
+					      u64 size)
 {
 	struct vkms_gem_object *obj;
 	int ret;
@@ -113,7 +113,6 @@ struct drm_gem_object *vkms_gem_create(struct drm_device *dev,
 		return ERR_CAST(obj);
 
 	ret = drm_gem_handle_create(file, &obj->gem, handle);
-	drm_gem_object_put_unlocked(&obj->gem);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -142,6 +141,8 @@ int vkms_dumb_create(struct drm_file *file, struct drm_device *dev,
 	args->size = gem_obj->size;
 	args->pitch = pitch;
 
+	drm_gem_object_put_unlocked(gem_obj);
+
 	DRM_DEBUG_DRIVER("Created object of size %lld\n", size);
 
 	return 0;
diff --git a/drivers/gpu/drm/vkms/vkms_output.c b/drivers/gpu/drm/vkms/vkms_output.c
index fb1941a6522c..85afb77e97f0 100644
--- a/drivers/gpu/drm/vkms/vkms_output.c
+++ b/drivers/gpu/drm/vkms/vkms_output.c
@@ -3,6 +3,7 @@
 #include "vkms_drv.h"
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 static void vkms_connector_destroy(struct drm_connector *connector)
 {
@@ -17,10 +18,6 @@ static const struct drm_connector_funcs vkms_connector_funcs = {
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
-static const struct drm_encoder_funcs vkms_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int vkms_conn_get_modes(struct drm_connector *connector)
 {
 	int count;
@@ -70,8 +67,7 @@ int vkms_output_init(struct vkms_device *vkmsdev, int index)
 
 	drm_connector_helper_add(connector, &vkms_conn_helper_funcs);
 
-	ret = drm_encoder_init(dev, encoder, &vkms_encoder_funcs,
-			       DRM_MODE_ENCODER_VIRTUAL, NULL);
+	ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_VIRTUAL);
 	if (ret) {
 		DRM_ERROR("Failed to init encoder\n");
 		goto err_encoder;
diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c
index 374142018171..1fd458e877ca 100644
--- a/drivers/gpu/drm/xen/xen_drm_front.c
+++ b/drivers/gpu/drm/xen/xen_drm_front.c
@@ -460,9 +460,6 @@ static void xen_drm_drv_release(struct drm_device *dev)
 	drm_atomic_helper_shutdown(dev);
 	drm_mode_config_cleanup(dev);
 
-	drm_dev_fini(dev);
-	kfree(dev);
-
 	if (front_info->cfg.be_alloc)
 		xenbus_switch_state(front_info->xb_dev,
 				    XenbusStateInitialising);
@@ -561,6 +558,7 @@ fail_register:
 fail_modeset:
 	drm_kms_helper_poll_fini(drm_dev);
 	drm_mode_config_cleanup(drm_dev);
+	drm_dev_put(drm_dev);
 fail:
 	kfree(drm_info);
 	return ret;
diff --git a/drivers/gpu/drm/zte/zx_hdmi.c b/drivers/gpu/drm/zte/zx_hdmi.c
index b98a1420dcd3..76a16d997a23 100644
--- a/drivers/gpu/drm/zte/zx_hdmi.c
+++ b/drivers/gpu/drm/zte/zx_hdmi.c
@@ -20,6 +20,7 @@
 #include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_print.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include <sound/hdmi-codec.h>
 
@@ -254,10 +255,6 @@ static const struct drm_encoder_helper_funcs zx_hdmi_encoder_helper_funcs = {
 	.mode_set = zx_hdmi_encoder_mode_set,
 };
 
-static const struct drm_encoder_funcs zx_hdmi_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int zx_hdmi_connector_get_modes(struct drm_connector *connector)
 {
 	struct zx_hdmi *hdmi = to_zx_hdmi(connector);
@@ -313,8 +310,7 @@ static int zx_hdmi_register(struct drm_device *drm, struct zx_hdmi *hdmi)
 
 	encoder->possible_crtcs = VOU_CRTC_MASK;
 
-	drm_encoder_init(drm, encoder, &zx_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
 	drm_encoder_helper_add(encoder, &zx_hdmi_encoder_helper_funcs);
 
 	hdmi->connector.polled = DRM_CONNECTOR_POLL_HPD;
diff --git a/drivers/gpu/drm/zte/zx_tvenc.c b/drivers/gpu/drm/zte/zx_tvenc.c
index c598b7daf1f1..d8a89ba383bc 100644
--- a/drivers/gpu/drm/zte/zx_tvenc.c
+++ b/drivers/gpu/drm/zte/zx_tvenc.c
@@ -14,6 +14,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "zx_drm_drv.h"
 #include "zx_tvenc_regs.h"
@@ -218,10 +219,6 @@ static const struct drm_encoder_helper_funcs zx_tvenc_encoder_helper_funcs = {
 	.mode_set = zx_tvenc_encoder_mode_set,
 };
 
-static const struct drm_encoder_funcs zx_tvenc_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int zx_tvenc_connector_get_modes(struct drm_connector *connector)
 {
 	struct zx_tvenc *tvenc = to_zx_tvenc(connector);
@@ -285,8 +282,7 @@ static int zx_tvenc_register(struct drm_device *drm, struct zx_tvenc *tvenc)
 	 */
 	encoder->possible_crtcs = BIT(1);
 
-	drm_encoder_init(drm, encoder, &zx_tvenc_encoder_funcs,
-			 DRM_MODE_ENCODER_TVDAC, NULL);
+	drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TVDAC);
 	drm_encoder_helper_add(encoder, &zx_tvenc_encoder_helper_funcs);
 
 	connector->interlace_allowed = true;
diff --git a/drivers/gpu/drm/zte/zx_vga.c b/drivers/gpu/drm/zte/zx_vga.c
index c4fa3bbaba78..a7ed7f5ca837 100644
--- a/drivers/gpu/drm/zte/zx_vga.c
+++ b/drivers/gpu/drm/zte/zx_vga.c
@@ -14,6 +14,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "zx_drm_drv.h"
 #include "zx_vga_regs.h"
@@ -72,10 +73,6 @@ static const struct drm_encoder_helper_funcs zx_vga_encoder_helper_funcs = {
 	.disable = zx_vga_encoder_disable,
 };
 
-static const struct drm_encoder_funcs zx_vga_encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
 static int zx_vga_connector_get_modes(struct drm_connector *connector)
 {
 	struct zx_vga *vga = to_zx_vga(connector);
@@ -154,8 +151,7 @@ static int zx_vga_register(struct drm_device *drm, struct zx_vga *vga)
 
 	encoder->possible_crtcs = VOU_CRTC_MASK;
 
-	ret = drm_encoder_init(drm, encoder, &zx_vga_encoder_funcs,
-			       DRM_MODE_ENCODER_DAC, NULL);
+	ret = drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_DAC);
 	if (ret) {
 		DRM_DEV_ERROR(dev, "failed to init encoder: %d\n", ret);
 		return ret;
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 23f358cb7f49..90070b337c10 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -290,6 +290,34 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
 EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
 
 /*
+ * Set/change the vCPU (@target_vp) the channel (@child_relid) will interrupt.
+ *
+ * CHANNELMSG_MODIFYCHANNEL messages are aynchronous.  Also, Hyper-V does not
+ * ACK such messages.  IOW we can't know when the host will stop interrupting
+ * the "old" vCPU and start interrupting the "new" vCPU for the given channel.
+ *
+ * The CHANNELMSG_MODIFYCHANNEL message type is supported since VMBus version
+ * VERSION_WIN10_V4_1.
+ */
+int vmbus_send_modifychannel(u32 child_relid, u32 target_vp)
+{
+	struct vmbus_channel_modifychannel conn_msg;
+	int ret;
+
+	memset(&conn_msg, 0, sizeof(conn_msg));
+	conn_msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL;
+	conn_msg.child_relid = child_relid;
+	conn_msg.target_vp = target_vp;
+
+	ret = vmbus_post_msg(&conn_msg, sizeof(conn_msg), true);
+
+	trace_vmbus_send_modifychannel(&conn_msg, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vmbus_send_modifychannel);
+
+/*
  * create_gpadl_header - Creates a gpadl for the specified buffer
  */
 static int create_gpadl_header(void *kbuffer, u32 size,
@@ -594,35 +622,31 @@ post_msg_err:
 }
 EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
 
-static void reset_channel_cb(void *arg)
-{
-	struct vmbus_channel *channel = arg;
-
-	channel->onchannel_callback = NULL;
-}
-
 void vmbus_reset_channel_cb(struct vmbus_channel *channel)
 {
+	unsigned long flags;
+
 	/*
 	 * vmbus_on_event(), running in the per-channel tasklet, can race
 	 * with vmbus_close_internal() in the case of SMP guest, e.g., when
 	 * the former is accessing channel->inbound.ring_buffer, the latter
 	 * could be freeing the ring_buffer pages, so here we must stop it
 	 * first.
+	 *
+	 * vmbus_chan_sched() might call the netvsc driver callback function
+	 * that ends up scheduling NAPI work that accesses the ring buffer.
+	 * At this point, we have to ensure that any such work is completed
+	 * and that the channel ring buffer is no longer being accessed, cf.
+	 * the calls to napi_disable() in netvsc_device_remove().
 	 */
 	tasklet_disable(&channel->callback_event);
 
-	channel->sc_creation_callback = NULL;
+	/* See the inline comments in vmbus_chan_sched(). */
+	spin_lock_irqsave(&channel->sched_lock, flags);
+	channel->onchannel_callback = NULL;
+	spin_unlock_irqrestore(&channel->sched_lock, flags);
 
-	/* Stop the callback asap */
-	if (channel->target_cpu != get_cpu()) {
-		put_cpu();
-		smp_call_function_single(channel->target_cpu, reset_channel_cb,
-					 channel, true);
-	} else {
-		reset_channel_cb(channel);
-		put_cpu();
-	}
+	channel->sc_creation_callback = NULL;
 
 	/* Re-enable tasklet for use on re-open */
 	tasklet_enable(&channel->callback_event);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 501c43c5851d..417a95e5094d 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -18,14 +18,15 @@
 #include <linux/module.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
+#include <linux/cpu.h>
 #include <linux/hyperv.h>
 #include <asm/mshyperv.h>
 
 #include "hyperv_vmbus.h"
 
-static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
+static void init_vp_index(struct vmbus_channel *channel);
 
-static const struct vmbus_device vmbus_devs[] = {
+const struct vmbus_device vmbus_devs[] = {
 	/* IDE */
 	{ .dev_type = HV_IDE,
 	  HV_IDE_GUID,
@@ -315,11 +316,11 @@ static struct vmbus_channel *alloc_channel(void)
 	if (!channel)
 		return NULL;
 
+	spin_lock_init(&channel->sched_lock);
 	spin_lock_init(&channel->lock);
 	init_completion(&channel->rescind_event);
 
 	INIT_LIST_HEAD(&channel->sc_list);
-	INIT_LIST_HEAD(&channel->percpu_list);
 
 	tasklet_init(&channel->callback_event,
 		     vmbus_on_event, (unsigned long)channel);
@@ -340,23 +341,49 @@ static void free_channel(struct vmbus_channel *channel)
 	kobject_put(&channel->kobj);
 }
 
-static void percpu_channel_enq(void *arg)
+void vmbus_channel_map_relid(struct vmbus_channel *channel)
 {
-	struct vmbus_channel *channel = arg;
-	struct hv_per_cpu_context *hv_cpu
-		= this_cpu_ptr(hv_context.cpu_context);
-
-	list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list);
+	if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
+		return;
+	/*
+	 * The mapping of the channel's relid is visible from the CPUs that
+	 * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
+	 * execute:
+	 *
+	 *  (a) In the "normal (i.e., not resuming from hibernation)" path,
+	 *      the full barrier in smp_store_mb() guarantees that the store
+	 *      is propagated to all CPUs before the add_channel_work work
+	 *      is queued.  In turn, add_channel_work is queued before the
+	 *      channel's ring buffer is allocated/initialized and the
+	 *      OPENCHANNEL message for the channel is sent in vmbus_open().
+	 *      Hyper-V won't start sending the interrupts for the channel
+	 *      before the OPENCHANNEL message is acked.  The memory barrier
+	 *      in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
+	 *      that vmbus_chan_sched() must find the channel's relid in
+	 *      recv_int_page before retrieving the channel pointer from the
+	 *      array of channels.
+	 *
+	 *  (b) In the "resuming from hibernation" path, the smp_store_mb()
+	 *      guarantees that the store is propagated to all CPUs before
+	 *      the VMBus connection is marked as ready for the resume event
+	 *      (cf. check_ready_for_resume_event()).  The interrupt handler
+	 *      of the VMBus driver and vmbus_chan_sched() can not run before
+	 *      vmbus_bus_resume() has completed execution (cf. resume_noirq).
+	 */
+	smp_store_mb(
+		vmbus_connection.channels[channel->offermsg.child_relid],
+		channel);
 }
 
-static void percpu_channel_deq(void *arg)
+void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
 {
-	struct vmbus_channel *channel = arg;
-
-	list_del_rcu(&channel->percpu_list);
+	if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
+		return;
+	WRITE_ONCE(
+		vmbus_connection.channels[channel->offermsg.child_relid],
+		NULL);
 }
 
-
 static void vmbus_release_relid(u32 relid)
 {
 	struct vmbus_channel_relid_released msg;
@@ -373,39 +400,43 @@ static void vmbus_release_relid(u32 relid)
 
 void hv_process_channel_removal(struct vmbus_channel *channel)
 {
-	struct vmbus_channel *primary_channel;
 	unsigned long flags;
 
-	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+	lockdep_assert_held(&vmbus_connection.channel_mutex);
 	BUG_ON(!channel->rescind);
 
-	if (channel->target_cpu != get_cpu()) {
-		put_cpu();
-		smp_call_function_single(channel->target_cpu,
-					 percpu_channel_deq, channel, true);
-	} else {
-		percpu_channel_deq(channel);
-		put_cpu();
-	}
+	/*
+	 * hv_process_channel_removal() could find INVALID_RELID only for
+	 * hv_sock channels.  See the inline comments in vmbus_onoffer().
+	 */
+	WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
+		!is_hvsock_channel(channel));
+
+	/*
+	 * Upon suspend, an in-use hv_sock channel is removed from the array of
+	 * channels and the relid is invalidated.  After hibernation, when the
+	 * user-space appplication destroys the channel, it's unnecessary and
+	 * unsafe to remove the channel from the array of channels.  See also
+	 * the inline comments before the call of vmbus_release_relid() below.
+	 */
+	if (channel->offermsg.child_relid != INVALID_RELID)
+		vmbus_channel_unmap_relid(channel);
 
 	if (channel->primary_channel == NULL) {
 		list_del(&channel->listentry);
-
-		primary_channel = channel;
 	} else {
-		primary_channel = channel->primary_channel;
+		struct vmbus_channel *primary_channel = channel->primary_channel;
 		spin_lock_irqsave(&primary_channel->lock, flags);
 		list_del(&channel->sc_list);
 		spin_unlock_irqrestore(&primary_channel->lock, flags);
 	}
 
 	/*
-	 * We need to free the bit for init_vp_index() to work in the case
-	 * of sub-channel, when we reload drivers like hv_netvsc.
+	 * If this is a "perf" channel, updates the hv_numa_map[] masks so that
+	 * init_vp_index() can (re-)use the CPU.
 	 */
-	if (channel->affinity_policy == HV_LOCALIZED)
-		cpumask_clear_cpu(channel->target_cpu,
-				  &primary_channel->alloced_cpus_in_node);
+	if (hv_is_perf_channel(channel))
+		hv_clear_alloced_cpu(channel->target_cpu);
 
 	/*
 	 * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
@@ -440,23 +471,8 @@ static void vmbus_add_channel_work(struct work_struct *work)
 		container_of(work, struct vmbus_channel, add_channel_work);
 	struct vmbus_channel *primary_channel = newchannel->primary_channel;
 	unsigned long flags;
-	u16 dev_type;
 	int ret;
 
-	dev_type = hv_get_dev_type(newchannel);
-
-	init_vp_index(newchannel, dev_type);
-
-	if (newchannel->target_cpu != get_cpu()) {
-		put_cpu();
-		smp_call_function_single(newchannel->target_cpu,
-					 percpu_channel_enq,
-					 newchannel, true);
-	} else {
-		percpu_channel_enq(newchannel);
-		put_cpu();
-	}
-
 	/*
 	 * This state is used to indicate a successful open
 	 * so that when we do close the channel normally, we
@@ -488,7 +504,7 @@ static void vmbus_add_channel_work(struct work_struct *work)
 	if (!newchannel->device_obj)
 		goto err_deq_chan;
 
-	newchannel->device_obj->device_id = dev_type;
+	newchannel->device_obj->device_id = newchannel->device_id;
 	/*
 	 * Add the new device to the bus. This will kick off device-driver
 	 * binding which eventually invokes the device driver's AddDevice()
@@ -523,17 +539,10 @@ err_deq_chan:
 		spin_unlock_irqrestore(&primary_channel->lock, flags);
 	}
 
-	mutex_unlock(&vmbus_connection.channel_mutex);
+	/* vmbus_process_offer() has mapped the channel. */
+	vmbus_channel_unmap_relid(newchannel);
 
-	if (newchannel->target_cpu != get_cpu()) {
-		put_cpu();
-		smp_call_function_single(newchannel->target_cpu,
-					 percpu_channel_deq,
-					 newchannel, true);
-	} else {
-		percpu_channel_deq(newchannel);
-		put_cpu();
-	}
+	mutex_unlock(&vmbus_connection.channel_mutex);
 
 	vmbus_release_relid(newchannel->offermsg.child_relid);
 
@@ -551,8 +560,35 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	unsigned long flags;
 	bool fnew = true;
 
+	/*
+	 * Synchronize vmbus_process_offer() and CPU hotplugging:
+	 *
+	 * CPU1				CPU2
+	 *
+	 * [vmbus_process_offer()]	[Hot removal of the CPU]
+	 *
+	 * CPU_READ_LOCK		CPUS_WRITE_LOCK
+	 * LOAD cpu_online_mask		SEARCH chn_list
+	 * STORE target_cpu		LOAD target_cpu
+	 * INSERT chn_list		STORE cpu_online_mask
+	 * CPUS_READ_UNLOCK		CPUS_WRITE_UNLOCK
+	 *
+	 * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
+	 * 		CPU2's SEARCH from *not* seeing CPU1's INSERT
+	 *
+	 * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
+	 * 		CPU2's LOAD from *not* seing CPU1's STORE
+	 */
+	cpus_read_lock();
+
+	/*
+	 * Serializes the modifications of the chn_list list as well as
+	 * the accesses to next_numa_node_id in init_vp_index().
+	 */
 	mutex_lock(&vmbus_connection.channel_mutex);
 
+	init_vp_index(newchannel);
+
 	/* Remember the channels that should be cleaned up upon suspend. */
 	if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
 		atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
@@ -599,7 +635,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 		spin_unlock_irqrestore(&channel->lock, flags);
 	}
 
+	vmbus_channel_map_relid(newchannel);
+
 	mutex_unlock(&vmbus_connection.channel_mutex);
+	cpus_read_unlock();
 
 	/*
 	 * vmbus_process_offer() mustn't call channel->sc_creation_callback()
@@ -632,73 +671,61 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
  * We use this state to statically distribute the channel interrupt load.
  */
 static int next_numa_node_id;
-/*
- * init_vp_index() accesses global variables like next_numa_node_id, and
- * it can run concurrently for primary channels and sub-channels: see
- * vmbus_process_offer(), so we need the lock to protect the global
- * variables.
- */
-static DEFINE_SPINLOCK(bind_channel_to_cpu_lock);
 
 /*
  * Starting with Win8, we can statically distribute the incoming
  * channel interrupt load by binding a channel to VCPU.
- * We distribute the interrupt loads to one or more NUMA nodes based on
- * the channel's affinity_policy.
  *
  * For pre-win8 hosts or non-performance critical channels we assign the
- * first CPU in the first NUMA node.
+ * VMBUS_CONNECT_CPU.
+ *
+ * Starting with win8, performance critical channels will be distributed
+ * evenly among all the available NUMA nodes.  Once the node is assigned,
+ * we will assign the CPU based on a simple round robin scheme.
  */
-static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
+static void init_vp_index(struct vmbus_channel *channel)
 {
-	u32 cur_cpu;
-	bool perf_chn = vmbus_devs[dev_type].perf_device;
-	struct vmbus_channel *primary = channel->primary_channel;
-	int next_node;
+	bool perf_chn = hv_is_perf_channel(channel);
 	cpumask_var_t available_mask;
 	struct cpumask *alloced_mask;
+	u32 target_cpu;
+	int numa_node;
 
 	if ((vmbus_proto_version == VERSION_WS2008) ||
 	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn) ||
 	    !alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
 		/*
 		 * Prior to win8, all channel interrupts are
-		 * delivered on cpu 0.
+		 * delivered on VMBUS_CONNECT_CPU.
 		 * Also if the channel is not a performance critical
-		 * channel, bind it to cpu 0.
-		 * In case alloc_cpumask_var() fails, bind it to cpu 0.
+		 * channel, bind it to VMBUS_CONNECT_CPU.
+		 * In case alloc_cpumask_var() fails, bind it to
+		 * VMBUS_CONNECT_CPU.
 		 */
-		channel->numa_node = 0;
-		channel->target_cpu = 0;
-		channel->target_vp = hv_cpu_number_to_vp_number(0);
+		channel->numa_node = cpu_to_node(VMBUS_CONNECT_CPU);
+		channel->target_cpu = VMBUS_CONNECT_CPU;
+		channel->target_vp =
+			hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
+		if (perf_chn)
+			hv_set_alloced_cpu(VMBUS_CONNECT_CPU);
 		return;
 	}
 
-	spin_lock(&bind_channel_to_cpu_lock);
-
-	/*
-	 * Based on the channel affinity policy, we will assign the NUMA
-	 * nodes.
-	 */
-
-	if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
-		while (true) {
-			next_node = next_numa_node_id++;
-			if (next_node == nr_node_ids) {
-				next_node = next_numa_node_id = 0;
-				continue;
-			}
-			if (cpumask_empty(cpumask_of_node(next_node)))
-				continue;
-			break;
+	while (true) {
+		numa_node = next_numa_node_id++;
+		if (numa_node == nr_node_ids) {
+			next_numa_node_id = 0;
+			continue;
 		}
-		channel->numa_node = next_node;
-		primary = channel;
+		if (cpumask_empty(cpumask_of_node(numa_node)))
+			continue;
+		break;
 	}
-	alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
+	channel->numa_node = numa_node;
+	alloced_mask = &hv_context.hv_numa_map[numa_node];
 
 	if (cpumask_weight(alloced_mask) ==
-	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
+	    cpumask_weight(cpumask_of_node(numa_node))) {
 		/*
 		 * We have cycled through all the CPUs in the node;
 		 * reset the alloced map.
@@ -706,59 +733,13 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 		cpumask_clear(alloced_mask);
 	}
 
-	cpumask_xor(available_mask, alloced_mask,
-		    cpumask_of_node(primary->numa_node));
-
-	cur_cpu = -1;
-
-	if (primary->affinity_policy == HV_LOCALIZED) {
-		/*
-		 * Normally Hyper-V host doesn't create more subchannels
-		 * than there are VCPUs on the node but it is possible when not
-		 * all present VCPUs on the node are initialized by guest.
-		 * Clear the alloced_cpus_in_node to start over.
-		 */
-		if (cpumask_equal(&primary->alloced_cpus_in_node,
-				  cpumask_of_node(primary->numa_node)))
-			cpumask_clear(&primary->alloced_cpus_in_node);
-	}
-
-	while (true) {
-		cur_cpu = cpumask_next(cur_cpu, available_mask);
-		if (cur_cpu >= nr_cpu_ids) {
-			cur_cpu = -1;
-			cpumask_copy(available_mask,
-				     cpumask_of_node(primary->numa_node));
-			continue;
-		}
-
-		if (primary->affinity_policy == HV_LOCALIZED) {
-			/*
-			 * NOTE: in the case of sub-channel, we clear the
-			 * sub-channel related bit(s) in
-			 * primary->alloced_cpus_in_node in
-			 * hv_process_channel_removal(), so when we
-			 * reload drivers like hv_netvsc in SMP guest, here
-			 * we're able to re-allocate
-			 * bit from primary->alloced_cpus_in_node.
-			 */
-			if (!cpumask_test_cpu(cur_cpu,
-					      &primary->alloced_cpus_in_node)) {
-				cpumask_set_cpu(cur_cpu,
-						&primary->alloced_cpus_in_node);
-				cpumask_set_cpu(cur_cpu, alloced_mask);
-				break;
-			}
-		} else {
-			cpumask_set_cpu(cur_cpu, alloced_mask);
-			break;
-		}
-	}
+	cpumask_xor(available_mask, alloced_mask, cpumask_of_node(numa_node));
 
-	channel->target_cpu = cur_cpu;
-	channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu);
+	target_cpu = cpumask_first(available_mask);
+	cpumask_set_cpu(target_cpu, alloced_mask);
 
-	spin_unlock(&bind_channel_to_cpu_lock);
+	channel->target_cpu = target_cpu;
+	channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
 
 	free_cpumask_var(available_mask);
 }
@@ -890,6 +871,7 @@ static void vmbus_setup_channel_state(struct vmbus_channel *channel,
 	       sizeof(struct vmbus_channel_offer_channel));
 	channel->monitor_grp = (u8)offer->monitorid / 32;
 	channel->monitor_bit = (u8)offer->monitorid % 32;
+	channel->device_id = hv_get_dev_type(channel);
 }
 
 /*
@@ -940,8 +922,6 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 	oldchannel = find_primary_channel_by_offer(offer);
 
 	if (oldchannel != NULL) {
-		atomic_dec(&vmbus_connection.offer_in_progress);
-
 		/*
 		 * We're resuming from hibernation: all the sub-channel and
 		 * hv_sock channels we had before the hibernation should have
@@ -949,36 +929,65 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 		 * primary channel that we had before the hibernation.
 		 */
 
+		/*
+		 * { Initially: channel relid = INVALID_RELID,
+		 *		channels[valid_relid] = NULL }
+		 *
+		 * CPU1					CPU2
+		 *
+		 * [vmbus_onoffer()]			[vmbus_device_release()]
+		 *
+		 * LOCK channel_mutex			LOCK channel_mutex
+		 * STORE channel relid = valid_relid	LOAD r1 = channel relid
+		 * MAP_RELID channel			if (r1 != INVALID_RELID)
+		 * UNLOCK channel_mutex			  UNMAP_RELID channel
+		 *					UNLOCK channel_mutex
+		 *
+		 * Forbids: r1 == valid_relid &&
+		 * 		channels[valid_relid] == channel
+		 *
+		 * Note.  r1 can be INVALID_RELID only for an hv_sock channel.
+		 * None of the hv_sock channels which were present before the
+		 * suspend are re-offered upon the resume.  See the WARN_ON()
+		 * in hv_process_channel_removal().
+		 */
+		mutex_lock(&vmbus_connection.channel_mutex);
+
+		atomic_dec(&vmbus_connection.offer_in_progress);
+
 		WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
 		/* Fix up the relid. */
 		oldchannel->offermsg.child_relid = offer->child_relid;
 
 		offer_sz = sizeof(*offer);
-		if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
-			check_ready_for_resume_event();
-			return;
+		if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) {
+			/*
+			 * This is not an error, since the host can also change
+			 * the other field(s) of the offer, e.g. on WS RS5
+			 * (Build 17763), the offer->connection_id of the
+			 * Mellanox VF vmbus device can change when the host
+			 * reoffers the device upon resume.
+			 */
+			pr_debug("vmbus offer changed: relid=%d\n",
+				 offer->child_relid);
+
+			print_hex_dump_debug("Old vmbus offer: ",
+					     DUMP_PREFIX_OFFSET, 16, 4,
+					     &oldchannel->offermsg, offer_sz,
+					     false);
+			print_hex_dump_debug("New vmbus offer: ",
+					     DUMP_PREFIX_OFFSET, 16, 4,
+					     offer, offer_sz, false);
+
+			/* Fix up the old channel. */
+			vmbus_setup_channel_state(oldchannel, offer);
 		}
 
-		/*
-		 * This is not an error, since the host can also change the
-		 * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
-		 * the offer->connection_id of the Mellanox VF vmbus device
-		 * can change when the host reoffers the device upon resume.
-		 */
-		pr_debug("vmbus offer changed: relid=%d\n",
-			 offer->child_relid);
-
-		print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
-				     16, 4, &oldchannel->offermsg, offer_sz,
-				     false);
-		print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
-				     16, 4, offer, offer_sz, false);
-
-		/* Fix up the old channel. */
-		vmbus_setup_channel_state(oldchannel, offer);
-
+		/* Add the channel back to the array of channels. */
+		vmbus_channel_map_relid(oldchannel);
 		check_ready_for_resume_event();
 
+		mutex_unlock(&vmbus_connection.channel_mutex);
 		return;
 	}
 
@@ -1028,11 +1037,22 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	 * offer comes in first and then the rescind.
 	 * Since we process these events in work elements,
 	 * and with preemption, we may end up processing
-	 * the events out of order. Given that we handle these
-	 * work elements on the same CPU, this is possible only
-	 * in the case of preemption. In any case wait here
-	 * until the offer processing has moved beyond the
-	 * point where the channel is discoverable.
+	 * the events out of order.  We rely on the synchronization
+	 * provided by offer_in_progress and by channel_mutex for
+	 * ordering these events:
+	 *
+	 * { Initially: offer_in_progress = 1 }
+	 *
+	 * CPU1				CPU2
+	 *
+	 * [vmbus_onoffer()]		[vmbus_onoffer_rescind()]
+	 *
+	 * LOCK channel_mutex		WAIT_ON offer_in_progress == 0
+	 * DECREMENT offer_in_progress	LOCK channel_mutex
+	 * STORE channels[]		LOAD channels[]
+	 * UNLOCK channel_mutex		UNLOCK channel_mutex
+	 *
+	 * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
 	 */
 
 	while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
@@ -1332,30 +1352,36 @@ static void vmbus_onversion_response(
 /* Channel message dispatch table */
 const struct vmbus_channel_message_table_entry
 channel_message_table[CHANNELMSG_COUNT] = {
-	{ CHANNELMSG_INVALID,			0, NULL },
-	{ CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer },
-	{ CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind },
-	{ CHANNELMSG_REQUESTOFFERS,		0, NULL },
-	{ CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered },
-	{ CHANNELMSG_OPENCHANNEL,		0, NULL },
-	{ CHANNELMSG_OPENCHANNEL_RESULT,	1, vmbus_onopen_result },
-	{ CHANNELMSG_CLOSECHANNEL,		0, NULL },
-	{ CHANNELMSG_GPADL_HEADER,		0, NULL },
-	{ CHANNELMSG_GPADL_BODY,		0, NULL },
-	{ CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created },
-	{ CHANNELMSG_GPADL_TEARDOWN,		0, NULL },
-	{ CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown },
-	{ CHANNELMSG_RELID_RELEASED,		0, NULL },
-	{ CHANNELMSG_INITIATE_CONTACT,		0, NULL },
-	{ CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response },
-	{ CHANNELMSG_UNLOAD,			0, NULL },
-	{ CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response },
-	{ CHANNELMSG_18,			0, NULL },
-	{ CHANNELMSG_19,			0, NULL },
-	{ CHANNELMSG_20,			0, NULL },
-	{ CHANNELMSG_TL_CONNECT_REQUEST,	0, NULL },
-	{ CHANNELMSG_22,			0, NULL },
-	{ CHANNELMSG_TL_CONNECT_RESULT,		0, NULL },
+	{ CHANNELMSG_INVALID,			0, NULL, 0},
+	{ CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer,
+		sizeof(struct vmbus_channel_offer_channel)},
+	{ CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind,
+		sizeof(struct vmbus_channel_rescind_offer) },
+	{ CHANNELMSG_REQUESTOFFERS,		0, NULL, 0},
+	{ CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered, 0},
+	{ CHANNELMSG_OPENCHANNEL,		0, NULL, 0},
+	{ CHANNELMSG_OPENCHANNEL_RESULT,	1, vmbus_onopen_result,
+		sizeof(struct vmbus_channel_open_result)},
+	{ CHANNELMSG_CLOSECHANNEL,		0, NULL, 0},
+	{ CHANNELMSG_GPADL_HEADER,		0, NULL, 0},
+	{ CHANNELMSG_GPADL_BODY,		0, NULL, 0},
+	{ CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created,
+		sizeof(struct vmbus_channel_gpadl_created)},
+	{ CHANNELMSG_GPADL_TEARDOWN,		0, NULL, 0},
+	{ CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown,
+		sizeof(struct vmbus_channel_gpadl_torndown) },
+	{ CHANNELMSG_RELID_RELEASED,		0, NULL, 0},
+	{ CHANNELMSG_INITIATE_CONTACT,		0, NULL, 0},
+	{ CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response,
+		sizeof(struct vmbus_channel_version_response)},
+	{ CHANNELMSG_UNLOAD,			0, NULL, 0},
+	{ CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response, 0},
+	{ CHANNELMSG_18,			0, NULL, 0},
+	{ CHANNELMSG_19,			0, NULL, 0},
+	{ CHANNELMSG_20,			0, NULL, 0},
+	{ CHANNELMSG_TL_CONNECT_REQUEST,	0, NULL, 0},
+	{ CHANNELMSG_MODIFYCHANNEL,		0, NULL, 0},
+	{ CHANNELMSG_TL_CONNECT_RESULT,		0, NULL, 0},
 };
 
 /*
@@ -1363,13 +1389,8 @@ channel_message_table[CHANNELMSG_COUNT] = {
  *
  * This is invoked in the vmbus worker thread context.
  */
-void vmbus_onmessage(void *context)
+void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
 {
-	struct hv_message *msg = context;
-	struct vmbus_channel_message_header *hdr;
-
-	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
-
 	trace_vmbus_on_message(hdr);
 
 	/*
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 74e77de89b4f..11170d9a2e1a 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -69,7 +69,6 @@ MODULE_PARM_DESC(max_version,
 int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
 {
 	int ret = 0;
-	unsigned int cur_cpu;
 	struct vmbus_channel_initiate_contact *msg;
 	unsigned long flags;
 
@@ -102,24 +101,7 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
 
 	msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]);
 	msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]);
-	/*
-	 * We want all channel messages to be delivered on CPU 0.
-	 * This has been the behavior pre-win8. This is not
-	 * perf issue and having all channel messages delivered on CPU 0
-	 * would be ok.
-	 * For post win8 hosts, we support receiving channel messagges on
-	 * all the CPUs. This is needed for kexec to work correctly where
-	 * the CPU attempting to connect may not be CPU 0.
-	 */
-	if (version >= VERSION_WIN8_1) {
-		cur_cpu = get_cpu();
-		msg->target_vcpu = hv_cpu_number_to_vp_number(cur_cpu);
-		vmbus_connection.connect_cpu = cur_cpu;
-		put_cpu();
-	} else {
-		msg->target_vcpu = 0;
-		vmbus_connection.connect_cpu = 0;
-	}
+	msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
 
 	/*
 	 * Add to list before we send the request since we may
@@ -266,6 +248,14 @@ int vmbus_connect(void)
 	pr_info("Vmbus version:%d.%d\n",
 		version >> 16, version & 0xFFFF);
 
+	vmbus_connection.channels = kcalloc(MAX_CHANNEL_RELIDS,
+					    sizeof(struct vmbus_channel *),
+					    GFP_KERNEL);
+	if (vmbus_connection.channels == NULL) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
 	kfree(msginfo);
 	return 0;
 
@@ -313,33 +303,9 @@ void vmbus_disconnect(void)
  */
 struct vmbus_channel *relid2channel(u32 relid)
 {
-	struct vmbus_channel *channel;
-	struct vmbus_channel *found_channel  = NULL;
-	struct list_head *cur, *tmp;
-	struct vmbus_channel *cur_sc;
-
-	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
-
-	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
-		if (channel->offermsg.child_relid == relid) {
-			found_channel = channel;
-			break;
-		} else if (!list_empty(&channel->sc_list)) {
-			/*
-			 * Deal with sub-channels.
-			 */
-			list_for_each_safe(cur, tmp, &channel->sc_list) {
-				cur_sc = list_entry(cur, struct vmbus_channel,
-							sc_list);
-				if (cur_sc->offermsg.child_relid == relid) {
-					found_channel = cur_sc;
-					break;
-				}
-			}
-		}
-	}
-
-	return found_channel;
+	if (WARN_ON(relid >= MAX_CHANNEL_RELIDS))
+		return NULL;
+	return READ_ONCE(vmbus_connection.channels[relid]);
 }
 
 /*
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 533c8b82b344..857290dcfd95 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -117,8 +117,6 @@ int hv_synic_alloc(void)
 			pr_err("Unable to allocate post msg page\n");
 			goto err;
 		}
-
-		INIT_LIST_HEAD(&hv_cpu->chan_list);
 	}
 
 	return 0;
@@ -246,10 +244,18 @@ int hv_synic_cleanup(unsigned int cpu)
 	unsigned long flags;
 
 	/*
+	 * Hyper-V does not provide a way to change the connect CPU once
+	 * it is set; we must prevent the connect CPU from going offline.
+	 */
+	if (cpu == VMBUS_CONNECT_CPU)
+		return -EBUSY;
+
+	/*
 	 * Search for channels which are bound to the CPU we're about to
-	 * cleanup. In case we find one and vmbus is still connected we need to
-	 * fail, this will effectively prevent CPU offlining. There is no way
-	 * we can re-bind channels to different CPUs for now.
+	 * cleanup.  In case we find one and vmbus is still connected, we
+	 * fail; this will effectively prevent CPU offlining.
+	 *
+	 * TODO: Re-bind the channels to different CPUs.
 	 */
 	mutex_lock(&vmbus_connection.channel_mutex);
 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index bb9ba3f7c794..5040d7e0cd9e 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -71,7 +71,7 @@ static void fcopy_poll_wrapper(void *channel)
 {
 	/* Transaction is finished, reset the state here to avoid races. */
 	fcopy_transaction.state = HVUTIL_READY;
-	hv_fcopy_onchannelcallback(channel);
+	tasklet_schedule(&((struct vmbus_channel *)channel)->callback_event);
 }
 
 static void fcopy_timeout_func(struct work_struct *dummy)
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 1c75b38f0d6d..783779e4cc1a 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -80,7 +80,7 @@ static void vss_poll_wrapper(void *channel)
 {
 	/* Transaction is finished, reset the state here to avoid races. */
 	vss_transaction.state = HVUTIL_READY;
-	hv_vss_onchannelcallback(channel);
+	tasklet_schedule(&((struct vmbus_channel *)channel)->callback_event);
 }
 
 /*
diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h
index f9d14db980cb..6063bb21bb13 100644
--- a/drivers/hv/hv_trace.h
+++ b/drivers/hv/hv_trace.h
@@ -44,10 +44,8 @@ TRACE_EVENT(vmbus_onoffer,
 			   __entry->monitorid = offer->monitorid;
 			   __entry->is_ddc_int = offer->is_dedicated_interrupt;
 			   __entry->connection_id = offer->connection_id;
-			   memcpy(__entry->if_type,
-				  &offer->offer.if_type.b, 16);
-			   memcpy(__entry->if_instance,
-				  &offer->offer.if_instance.b, 16);
+			   export_guid(__entry->if_type, &offer->offer.if_type);
+			   export_guid(__entry->if_instance, &offer->offer.if_instance);
 			   __entry->chn_flags = offer->offer.chn_flags;
 			   __entry->mmio_mb = offer->offer.mmio_megabytes;
 			   __entry->sub_idx = offer->offer.sub_channel_index;
@@ -296,6 +294,25 @@ TRACE_EVENT(vmbus_send_tl_connect_request,
 		    )
 	);
 
+TRACE_EVENT(vmbus_send_modifychannel,
+	    TP_PROTO(const struct vmbus_channel_modifychannel *msg,
+		     int ret),
+	    TP_ARGS(msg, ret),
+	    TP_STRUCT__entry(
+		    __field(u32, child_relid)
+		    __field(u32, target_vp)
+		    __field(int, ret)
+		    ),
+	    TP_fast_assign(
+		    __entry->child_relid = msg->child_relid;
+		    __entry->target_vp = msg->target_vp;
+		    __entry->ret = ret;
+		    ),
+	    TP_printk("binding child_relid 0x%x to target_vp 0x%x, ret %d",
+		      __entry->child_relid, __entry->target_vp, __entry->ret
+		    )
+	);
+
 DECLARE_EVENT_CLASS(vmbus_channel,
 	TP_PROTO(const struct vmbus_channel *channel),
 	TP_ARGS(channel),
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 70b30e223a57..40e2b9f91163 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -132,12 +132,6 @@ struct hv_per_cpu_context {
 	 * basis.
 	 */
 	struct tasklet_struct msg_dpc;
-
-	/*
-	 * To optimize the mapping of relid to channel, maintain
-	 * per-cpu list of the channels based on their CPU affinity.
-	 */
-	struct list_head chan_list;
 };
 
 struct hv_context {
@@ -202,6 +196,8 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
 /* TODO: Need to make this configurable */
 #define MAX_NUM_CHANNELS_SUPPORTED	256
 
+#define MAX_CHANNEL_RELIDS					\
+	max(MAX_NUM_CHANNELS_SUPPORTED, HV_EVENT_FLAGS_COUNT)
 
 enum vmbus_connect_state {
 	DISCONNECTED,
@@ -212,12 +208,13 @@ enum vmbus_connect_state {
 
 #define MAX_SIZE_CHANNEL_MESSAGE	HV_MESSAGE_PAYLOAD_BYTE_COUNT
 
-struct vmbus_connection {
-	/*
-	 * CPU on which the initial host contact was made.
-	 */
-	int connect_cpu;
+/*
+ * The CPU that Hyper-V will interrupt for VMBUS messages, such as
+ * CHANNELMSG_OFFERCHANNEL and CHANNELMSG_RESCIND_CHANNELOFFER.
+ */
+#define VMBUS_CONNECT_CPU	0
 
+struct vmbus_connection {
 	u32 msg_conn_id;
 
 	atomic_t offer_in_progress;
@@ -250,6 +247,9 @@ struct vmbus_connection {
 	struct list_head chn_list;
 	struct mutex channel_mutex;
 
+	/* Array of channels */
+	struct vmbus_channel **channels;
+
 	/*
 	 * An offer message is handled first on the work_queue, and then
 	 * is further handled on handle_primary_chan_wq or
@@ -317,6 +317,7 @@ struct vmbus_channel_message_table_entry {
 	enum vmbus_channel_message_type message_type;
 	enum vmbus_message_handler_type handler_type;
 	void (*message_handler)(struct vmbus_channel_message_header *msg);
+	u32 min_payload_len;
 };
 
 extern const struct vmbus_channel_message_table_entry
@@ -336,6 +337,9 @@ int vmbus_add_channel_kobj(struct hv_device *device_obj,
 
 void vmbus_remove_channel_attr_group(struct vmbus_channel *channel);
 
+void vmbus_channel_map_relid(struct vmbus_channel *channel);
+void vmbus_channel_unmap_relid(struct vmbus_channel *channel);
+
 struct vmbus_channel *relid2channel(u32 relid);
 
 void vmbus_free_channels(void);
@@ -374,12 +378,7 @@ static inline void hv_poll_channel(struct vmbus_channel *channel,
 {
 	if (!channel)
 		return;
-
-	if (in_interrupt() && (channel->target_cpu == smp_processor_id())) {
-		cb(channel);
-		return;
-	}
-	smp_call_function_single(channel->target_cpu, cb, channel, true);
+	cb(channel);
 }
 
 enum hvutil_device_state {
@@ -396,6 +395,54 @@ enum delay {
 	MESSAGE_DELAY   = 1,
 };
 
+extern const struct vmbus_device vmbus_devs[];
+
+static inline bool hv_is_perf_channel(struct vmbus_channel *channel)
+{
+	return vmbus_devs[channel->device_id].perf_device;
+}
+
+static inline bool hv_is_alloced_cpu(unsigned int cpu)
+{
+	struct vmbus_channel *channel, *sc;
+
+	lockdep_assert_held(&vmbus_connection.channel_mutex);
+	/*
+	 * List additions/deletions as well as updates of the target CPUs are
+	 * protected by channel_mutex.
+	 */
+	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+		if (!hv_is_perf_channel(channel))
+			continue;
+		if (channel->target_cpu == cpu)
+			return true;
+		list_for_each_entry(sc, &channel->sc_list, sc_list) {
+			if (sc->target_cpu == cpu)
+				return true;
+		}
+	}
+	return false;
+}
+
+static inline void hv_set_alloced_cpu(unsigned int cpu)
+{
+	cpumask_set_cpu(cpu, &hv_context.hv_numa_map[cpu_to_node(cpu)]);
+}
+
+static inline void hv_clear_alloced_cpu(unsigned int cpu)
+{
+	if (hv_is_alloced_cpu(cpu))
+		return;
+	cpumask_clear_cpu(cpu, &hv_context.hv_numa_map[cpu_to_node(cpu)]);
+}
+
+static inline void hv_update_alloced_cpus(unsigned int old_cpu,
+					  unsigned int new_cpu)
+{
+	hv_set_alloced_cpu(new_cpu);
+	hv_clear_alloced_cpu(old_cpu);
+}
+
 #ifdef CONFIG_HYPERV_TESTING
 
 int hv_debug_add_dev_dir(struct hv_device *dev);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e06c6b9555cf..9147ee9d5f7d 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -117,14 +117,6 @@ static int vmbus_exists(void)
 	return 0;
 }
 
-#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
-static void print_alias_name(struct hv_device *hv_dev, char *alias_name)
-{
-	int i;
-	for (i = 0; i < VMBUS_ALIAS_LEN; i += 2)
-		sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]);
-}
-
 static u8 channel_monitor_group(const struct vmbus_channel *channel)
 {
 	return (u8)channel->offermsg.monitorid / 32;
@@ -201,7 +193,7 @@ static ssize_t class_id_show(struct device *dev,
 	if (!hv_dev->channel)
 		return -ENODEV;
 	return sprintf(buf, "{%pUl}\n",
-		       hv_dev->channel->offermsg.offer.if_type.b);
+		       &hv_dev->channel->offermsg.offer.if_type);
 }
 static DEVICE_ATTR_RO(class_id);
 
@@ -213,7 +205,7 @@ static ssize_t device_id_show(struct device *dev,
 	if (!hv_dev->channel)
 		return -ENODEV;
 	return sprintf(buf, "{%pUl}\n",
-		       hv_dev->channel->offermsg.offer.if_instance.b);
+		       &hv_dev->channel->offermsg.offer.if_instance);
 }
 static DEVICE_ATTR_RO(device_id);
 
@@ -221,10 +213,8 @@ static ssize_t modalias_show(struct device *dev,
 			     struct device_attribute *dev_attr, char *buf)
 {
 	struct hv_device *hv_dev = device_to_hv_device(dev);
-	char alias_name[VMBUS_ALIAS_LEN + 1];
 
-	print_alias_name(hv_dev, alias_name);
-	return sprintf(buf, "vmbus:%s\n", alias_name);
+	return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
 }
 static DEVICE_ATTR_RO(modalias);
 
@@ -693,12 +683,9 @@ __ATTRIBUTE_GROUPS(vmbus_dev);
 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
 {
 	struct hv_device *dev = device_to_hv_device(device);
-	int ret;
-	char alias_name[VMBUS_ALIAS_LEN + 1];
+	const char *format = "MODALIAS=vmbus:%*phN";
 
-	print_alias_name(dev, alias_name);
-	ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name);
-	return ret;
+	return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type);
 }
 
 static const struct hv_vmbus_device_id *
@@ -1033,7 +1020,10 @@ static struct bus_type  hv_bus = {
 
 struct onmessage_work_context {
 	struct work_struct work;
-	struct hv_message msg;
+	struct {
+		struct hv_message_header header;
+		u8 payload[];
+	} msg;
 };
 
 static void vmbus_onmessage_work(struct work_struct *work)
@@ -1046,7 +1036,8 @@ static void vmbus_onmessage_work(struct work_struct *work)
 
 	ctx = container_of(work, struct onmessage_work_context,
 			   work);
-	vmbus_onmessage(&ctx->msg);
+	vmbus_onmessage((struct vmbus_channel_message_header *)
+			&ctx->msg.payload);
 	kfree(ctx);
 }
 
@@ -1061,6 +1052,13 @@ void vmbus_on_msg_dpc(unsigned long data)
 	struct onmessage_work_context *ctx;
 	u32 message_type = msg->header.message_type;
 
+	/*
+	 * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as
+	 * it is being used in 'struct vmbus_channel_message_header' definition
+	 * which is supposed to match hypervisor ABI.
+	 */
+	BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32));
+
 	if (message_type == HVMSG_NONE)
 		/* no msg */
 		return;
@@ -1074,41 +1072,88 @@ void vmbus_on_msg_dpc(unsigned long data)
 		goto msg_handled;
 	}
 
+	if (msg->header.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) {
+		WARN_ONCE(1, "payload size is too large (%d)\n",
+			  msg->header.payload_size);
+		goto msg_handled;
+	}
+
 	entry = &channel_message_table[hdr->msgtype];
 
 	if (!entry->message_handler)
 		goto msg_handled;
 
+	if (msg->header.payload_size < entry->min_payload_len) {
+		WARN_ONCE(1, "message too short: msgtype=%d len=%d\n",
+			  hdr->msgtype, msg->header.payload_size);
+		goto msg_handled;
+	}
+
 	if (entry->handler_type	== VMHT_BLOCKING) {
-		ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
+		ctx = kmalloc(sizeof(*ctx) + msg->header.payload_size,
+			      GFP_ATOMIC);
 		if (ctx == NULL)
 			return;
 
 		INIT_WORK(&ctx->work, vmbus_onmessage_work);
-		memcpy(&ctx->msg, msg, sizeof(*msg));
+		memcpy(&ctx->msg, msg, sizeof(msg->header) +
+		       msg->header.payload_size);
 
 		/*
 		 * The host can generate a rescind message while we
 		 * may still be handling the original offer. We deal with
-		 * this condition by ensuring the processing is done on the
-		 * same CPU.
+		 * this condition by relying on the synchronization provided
+		 * by offer_in_progress and by channel_mutex.  See also the
+		 * inline comments in vmbus_onoffer_rescind().
 		 */
 		switch (hdr->msgtype) {
 		case CHANNELMSG_RESCIND_CHANNELOFFER:
 			/*
 			 * If we are handling the rescind message;
 			 * schedule the work on the global work queue.
+			 *
+			 * The OFFER message and the RESCIND message should
+			 * not be handled by the same serialized work queue,
+			 * because the OFFER handler may call vmbus_open(),
+			 * which tries to open the channel by sending an
+			 * OPEN_CHANNEL message to the host and waits for
+			 * the host's response; however, if the host has
+			 * rescinded the channel before it receives the
+			 * OPEN_CHANNEL message, the host just silently
+			 * ignores the OPEN_CHANNEL message; as a result,
+			 * the guest's OFFER handler hangs for ever, if we
+			 * handle the RESCIND message in the same serialized
+			 * work queue: the RESCIND handler can not start to
+			 * run before the OFFER handler finishes.
 			 */
-			schedule_work_on(vmbus_connection.connect_cpu,
-					 &ctx->work);
+			schedule_work(&ctx->work);
 			break;
 
 		case CHANNELMSG_OFFERCHANNEL:
+			/*
+			 * The host sends the offer message of a given channel
+			 * before sending the rescind message of the same
+			 * channel.  These messages are sent to the guest's
+			 * connect CPU; the guest then starts processing them
+			 * in the tasklet handler on this CPU:
+			 *
+			 * VMBUS_CONNECT_CPU
+			 *
+			 * [vmbus_on_msg_dpc()]
+			 * atomic_inc()  // CHANNELMSG_OFFERCHANNEL
+			 * queue_work()
+			 * ...
+			 * [vmbus_on_msg_dpc()]
+			 * schedule_work()  // CHANNELMSG_RESCIND_CHANNELOFFER
+			 *
+			 * We rely on the memory-ordering properties of the
+			 * queue_work() and schedule_work() primitives, which
+			 * guarantee that the atomic increment will be visible
+			 * to the CPUs which will execute the offer & rescind
+			 * works by the time these works will start execution.
+			 */
 			atomic_inc(&vmbus_connection.offer_in_progress);
-			queue_work_on(vmbus_connection.connect_cpu,
-				      vmbus_connection.work_queue,
-				      &ctx->work);
-			break;
+			fallthrough;
 
 		default:
 			queue_work(vmbus_connection.work_queue, &ctx->work);
@@ -1133,10 +1178,11 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
 	WARN_ON(!is_hvsock_channel(channel));
 
 	/*
-	 * sizeof(*ctx) is small and the allocation should really not fail,
+	 * Allocation size is small and the allocation should really not fail,
 	 * otherwise the state of the hv_sock connections ends up in limbo.
 	 */
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
+	ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind),
+		      GFP_KERNEL | __GFP_NOFAIL);
 
 	/*
 	 * So far, these are not really used by Linux. Just set them to the
@@ -1146,31 +1192,17 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
 	ctx->msg.header.payload_size = sizeof(*rescind);
 
 	/* These values are actually used by Linux. */
-	rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
+	rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload;
 	rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
 	rescind->child_relid = channel->offermsg.child_relid;
 
 	INIT_WORK(&ctx->work, vmbus_onmessage_work);
 
-	queue_work_on(vmbus_connection.connect_cpu,
-		      vmbus_connection.work_queue,
-		      &ctx->work);
+	queue_work(vmbus_connection.work_queue, &ctx->work);
 }
 #endif /* CONFIG_PM_SLEEP */
 
 /*
- * Direct callback for channels using other deferred processing
- */
-static void vmbus_channel_isr(struct vmbus_channel *channel)
-{
-	void (*callback_fn)(void *);
-
-	callback_fn = READ_ONCE(channel->onchannel_callback);
-	if (likely(callback_fn != NULL))
-		(*callback_fn)(channel->channel_callback_context);
-}
-
-/*
  * Schedule all channels with events pending
  */
 static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
@@ -1200,6 +1232,7 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
 		return;
 
 	for_each_set_bit(relid, recv_int_page, maxbits) {
+		void (*callback_fn)(void *context);
 		struct vmbus_channel *channel;
 
 		if (!sync_test_and_clear_bit(relid, recv_int_page))
@@ -1209,33 +1242,54 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
 		if (relid == 0)
 			continue;
 
+		/*
+		 * Pairs with the kfree_rcu() in vmbus_chan_release().
+		 * Guarantees that the channel data structure doesn't
+		 * get freed while the channel pointer below is being
+		 * dereferenced.
+		 */
 		rcu_read_lock();
 
 		/* Find channel based on relid */
-		list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) {
-			if (channel->offermsg.child_relid != relid)
-				continue;
+		channel = relid2channel(relid);
+		if (channel == NULL)
+			goto sched_unlock_rcu;
 
-			if (channel->rescind)
-				continue;
+		if (channel->rescind)
+			goto sched_unlock_rcu;
 
-			trace_vmbus_chan_sched(channel);
+		/*
+		 * Make sure that the ring buffer data structure doesn't get
+		 * freed while we dereference the ring buffer pointer.  Test
+		 * for the channel's onchannel_callback being NULL within a
+		 * sched_lock critical section.  See also the inline comments
+		 * in vmbus_reset_channel_cb().
+		 */
+		spin_lock(&channel->sched_lock);
 
-			++channel->interrupts;
+		callback_fn = channel->onchannel_callback;
+		if (unlikely(callback_fn == NULL))
+			goto sched_unlock;
 
-			switch (channel->callback_mode) {
-			case HV_CALL_ISR:
-				vmbus_channel_isr(channel);
-				break;
+		trace_vmbus_chan_sched(channel);
 
-			case HV_CALL_BATCHED:
-				hv_begin_read(&channel->inbound);
-				/* fallthrough */
-			case HV_CALL_DIRECT:
-				tasklet_schedule(&channel->callback_event);
-			}
+		++channel->interrupts;
+
+		switch (channel->callback_mode) {
+		case HV_CALL_ISR:
+			(*callback_fn)(channel->channel_callback_context);
+			break;
+
+		case HV_CALL_BATCHED:
+			hv_begin_read(&channel->inbound);
+			fallthrough;
+		case HV_CALL_DIRECT:
+			tasklet_schedule(&channel->callback_event);
 		}
 
+sched_unlock:
+		spin_unlock(&channel->sched_lock);
+sched_unlock_rcu:
 		rcu_read_unlock();
 	}
 }
@@ -1364,7 +1418,6 @@ static int vmbus_bus_init(void)
 {
 	int ret;
 
-	/* Hypervisor initialization...setup hypercall page..etc */
 	ret = hv_init();
 	if (ret != 0) {
 		pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
@@ -1553,8 +1606,24 @@ static ssize_t vmbus_chan_attr_show(struct kobject *kobj,
 	return attribute->show(chan, buf);
 }
 
+static ssize_t vmbus_chan_attr_store(struct kobject *kobj,
+				     struct attribute *attr, const char *buf,
+				     size_t count)
+{
+	const struct vmbus_chan_attribute *attribute
+		= container_of(attr, struct vmbus_chan_attribute, attr);
+	struct vmbus_channel *chan
+		= container_of(kobj, struct vmbus_channel, kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(chan, buf, count);
+}
+
 static const struct sysfs_ops vmbus_chan_sysfs_ops = {
 	.show = vmbus_chan_attr_show,
+	.store = vmbus_chan_attr_store,
 };
 
 static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf)
@@ -1625,11 +1694,110 @@ static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf)
 }
 static VMBUS_CHAN_ATTR_RO(write_avail);
 
-static ssize_t show_target_cpu(struct vmbus_channel *channel, char *buf)
+static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
 {
 	return sprintf(buf, "%u\n", channel->target_cpu);
 }
-static VMBUS_CHAN_ATTR(cpu, S_IRUGO, show_target_cpu, NULL);
+static ssize_t target_cpu_store(struct vmbus_channel *channel,
+				const char *buf, size_t count)
+{
+	u32 target_cpu, origin_cpu;
+	ssize_t ret = count;
+
+	if (vmbus_proto_version < VERSION_WIN10_V4_1)
+		return -EIO;
+
+	if (sscanf(buf, "%uu", &target_cpu) != 1)
+		return -EIO;
+
+	/* Validate target_cpu for the cpumask_test_cpu() operation below. */
+	if (target_cpu >= nr_cpumask_bits)
+		return -EINVAL;
+
+	/* No CPUs should come up or down during this. */
+	cpus_read_lock();
+
+	if (!cpumask_test_cpu(target_cpu, cpu_online_mask)) {
+		cpus_read_unlock();
+		return -EINVAL;
+	}
+
+	/*
+	 * Synchronizes target_cpu_store() and channel closure:
+	 *
+	 * { Initially: state = CHANNEL_OPENED }
+	 *
+	 * CPU1				CPU2
+	 *
+	 * [target_cpu_store()]		[vmbus_disconnect_ring()]
+	 *
+	 * LOCK channel_mutex		LOCK channel_mutex
+	 * LOAD r1 = state		LOAD r2 = state
+	 * IF (r1 == CHANNEL_OPENED)	IF (r2 == CHANNEL_OPENED)
+	 *   SEND MODIFYCHANNEL		  STORE state = CHANNEL_OPEN
+	 *   [...]			  SEND CLOSECHANNEL
+	 * UNLOCK channel_mutex		UNLOCK channel_mutex
+	 *
+	 * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes
+	 * 		CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND
+	 *
+	 * Note.  The host processes the channel messages "sequentially", in
+	 * the order in which they are received on a per-partition basis.
+	 */
+	mutex_lock(&vmbus_connection.channel_mutex);
+
+	/*
+	 * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
+	 * avoid sending the message and fail here for such channels.
+	 */
+	if (channel->state != CHANNEL_OPENED_STATE) {
+		ret = -EIO;
+		goto cpu_store_unlock;
+	}
+
+	origin_cpu = channel->target_cpu;
+	if (target_cpu == origin_cpu)
+		goto cpu_store_unlock;
+
+	if (vmbus_send_modifychannel(channel->offermsg.child_relid,
+				     hv_cpu_number_to_vp_number(target_cpu))) {
+		ret = -EIO;
+		goto cpu_store_unlock;
+	}
+
+	/*
+	 * Warning.  At this point, there is *no* guarantee that the host will
+	 * have successfully processed the vmbus_send_modifychannel() request.
+	 * See the header comment of vmbus_send_modifychannel() for more info.
+	 *
+	 * Lags in the processing of the above vmbus_send_modifychannel() can
+	 * result in missed interrupts if the "old" target CPU is taken offline
+	 * before Hyper-V starts sending interrupts to the "new" target CPU.
+	 * But apart from this offlining scenario, the code tolerates such
+	 * lags.  It will function correctly even if a channel interrupt comes
+	 * in on a CPU that is different from the channel target_cpu value.
+	 */
+
+	channel->target_cpu = target_cpu;
+	channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
+	channel->numa_node = cpu_to_node(target_cpu);
+
+	/* See init_vp_index(). */
+	if (hv_is_perf_channel(channel))
+		hv_update_alloced_cpus(origin_cpu, target_cpu);
+
+	/* Currently set only for storvsc channels. */
+	if (channel->change_target_cpu_callback) {
+		(*channel->change_target_cpu_callback)(channel,
+				origin_cpu, target_cpu);
+	}
+
+cpu_store_unlock:
+	mutex_unlock(&vmbus_connection.channel_mutex);
+	cpus_read_unlock();
+	return ret;
+}
+static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
 
 static ssize_t channel_pending_show(struct vmbus_channel *channel,
 				    char *buf)
@@ -1830,7 +1998,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
 	int ret;
 
 	dev_set_name(&child_device_obj->device, "%pUl",
-		     child_device_obj->channel->offermsg.offer.if_instance.b);
+		     &child_device_obj->channel->offermsg.offer.if_instance);
 
 	child_device_obj->device.bus = &hv_bus;
 	child_device_obj->device.parent = &hv_acpi_dev->dev;
@@ -2221,9 +2389,12 @@ static int vmbus_bus_suspend(struct device *dev)
 
 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 		/*
-		 * Invalidate the field. Upon resume, vmbus_onoffer() will fix
-		 * up the field, and the other fields (if necessary).
+		 * Remove the channel from the array of channels and invalidate
+		 * the channel's relid.  Upon resume, vmbus_onoffer() will fix
+		 * up the relid (and other fields, if necessary) and add the
+		 * channel back to the array.
 		 */
+		vmbus_channel_unmap_relid(channel);
 		channel->offermsg.child_relid = INVALID_RELID;
 
 		if (is_hvsock_channel(channel)) {
@@ -2470,6 +2641,7 @@ static void __exit vmbus_exit(void)
 	hv_debug_rm_all_dir();
 
 	vmbus_free_channels();
+	kfree(vmbus_connection.channels);
 
 	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
 		kmsg_dump_unregister(&hv_kmsg_dumper);
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 4c62f900bf7e..288ae9f63588 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
 	  This driver can also be built as a module. If so, the module
 	  will be called fam15h_power.
 
+config SENSORS_AMD_ENERGY
+	tristate "AMD RAPL MSR based Energy driver"
+	depends on X86
+	help
+	  If you say yes here you get support for core and package energy
+	  sensors, based on RAPL MSR for AMD family 17h and above CPUs.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called as amd_energy.
+
 config SENSORS_APPLESMC
 	tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
 	depends on INPUT && X86
@@ -404,6 +414,31 @@ config SENSORS_ATXP1
 	  This driver can also be built as a module. If so, the module
 	  will be called atxp1.
 
+config SENSORS_BT1_PVT
+	tristate "Baikal-T1 Process, Voltage, Temperature sensor driver"
+	depends on MIPS_BAIKAL_T1 || COMPILE_TEST
+	help
+	  If you say yes here you get support for Baikal-T1 PVT sensor
+	  embedded into the SoC.
+
+	  This driver can also be built as a module. If so, the module will be
+	  called bt1-pvt.
+
+config SENSORS_BT1_PVT_ALARMS
+	bool "Enable Baikal-T1 PVT sensor alarms"
+	depends on SENSORS_BT1_PVT
+	help
+	  Baikal-T1 PVT IP-block provides threshold registers for each
+	  supported sensor. But the corresponding interrupts might be
+	  generated by the thresholds comparator only in synchronization with
+	  a data conversion. Additionally there is only one sensor data can
+	  be converted at a time. All of these makes the interface impossible
+	  to be used for the hwmon alarms implementation without periodic
+	  switch between the PVT sensors. By default the data conversion is
+	  performed on demand from the user-space. If this config is enabled
+	  the data conversion will be periodically performed and the data will be
+	  saved in the internal driver cache.
+
 config SENSORS_DRIVETEMP
 	tristate "Hard disk drives with temperature sensors"
 	depends on SCSI && ATA
@@ -523,6 +558,15 @@ config SENSORS_F75375S
 	  This driver can also be built as a module. If so, the module
 	  will be called f75375s.
 
+config SENSORS_GSC
+	tristate "Gateworks System Controller ADC"
+	depends on MFD_GATEWORKS_GSC
+	help
+	  Support for the Gateworks System Controller A/D converters.
+
+	  To compile this driver as a module, choose M here:
+	  the module will be called gsc-hwmon.
+
 config SENSORS_MC13783_ADC
         tristate "Freescale MC13783/MC13892 ADC"
         depends on MFD_MC13XXX
@@ -1198,10 +1242,11 @@ config SENSORS_LM90
 	help
 	  If you say yes here you get support for National Semiconductor LM90,
 	  LM86, LM89 and LM99, Analog Devices ADM1032, ADT7461, and ADT7461A,
-	  Maxim MAX6646, MAX6647, MAX6648, MAX6649, MAX6657, MAX6658, MAX6659,
-	  MAX6680, MAX6681, MAX6692, MAX6695, MAX6696, ON Semiconductor NCT1008,
-	  Winbond/Nuvoton W83L771W/G/AWG/ASG, Philips SA56004, GMT G781, and
-	  Texas Instruments TMP451 sensor chips.
+	  Maxim MAX6646, MAX6647, MAX6648, MAX6649, MAX6654, MAX6657, MAX6658,
+	  MAX6659, MAX6680, MAX6681, MAX6692, MAX6695, MAX6696,
+	  ON Semiconductor NCT1008, Winbond/Nuvoton W83L771W/G/AWG/ASG,
+	  Philips SA56004, GMT G781, and Texas Instruments TMP451
+	  sensor chips.
 
 	  This driver can also be built as a module. If so, the module
 	  will be called lm90.
@@ -1340,10 +1385,12 @@ config SENSORS_NCT7802
 
 config SENSORS_NCT7904
 	tristate "Nuvoton NCT7904"
-	depends on I2C
+	depends on I2C && WATCHDOG
+	select WATCHDOG_CORE
 	help
 	  If you say yes here you get support for the Nuvoton NCT7904
-	  hardware monitoring chip, including manual fan speed control.
+	  hardware monitoring chip, including manual fan speed control
+	  and support for the integrated watchdog.
 
 	  This driver can also be built as a module. If so, the module
 	  will be called nct7904.
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index b0b9c8e57176..3e32c21f5efe 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)	+= adt7411.o
 obj-$(CONFIG_SENSORS_ADT7462)	+= adt7462.o
 obj-$(CONFIG_SENSORS_ADT7470)	+= adt7470.o
 obj-$(CONFIG_SENSORS_ADT7475)	+= adt7475.o
+obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
 obj-$(CONFIG_SENSORS_APPLESMC)	+= applesmc.o
 obj-$(CONFIG_SENSORS_ARM_SCMI)	+= scmi-hwmon.o
 obj-$(CONFIG_SENSORS_ARM_SCPI)	+= scpi-hwmon.o
@@ -53,6 +54,7 @@ obj-$(CONFIG_SENSORS_ASC7621)	+= asc7621.o
 obj-$(CONFIG_SENSORS_ASPEED)	+= aspeed-pwm-tacho.o
 obj-$(CONFIG_SENSORS_ATXP1)	+= atxp1.o
 obj-$(CONFIG_SENSORS_AXI_FAN_CONTROL) += axi-fan-control.o
+obj-$(CONFIG_SENSORS_BT1_PVT)	+= bt1-pvt.o
 obj-$(CONFIG_SENSORS_CORETEMP)	+= coretemp.o
 obj-$(CONFIG_SENSORS_DA9052_ADC)+= da9052-hwmon.o
 obj-$(CONFIG_SENSORS_DA9055)+= da9055-hwmon.o
@@ -74,6 +76,7 @@ obj-$(CONFIG_SENSORS_G760A)	+= g760a.o
 obj-$(CONFIG_SENSORS_G762)	+= g762.o
 obj-$(CONFIG_SENSORS_GL518SM)	+= gl518sm.o
 obj-$(CONFIG_SENSORS_GL520SM)	+= gl520sm.o
+obj-$(CONFIG_SENSORS_GSC)	+= gsc-hwmon.o
 obj-$(CONFIG_SENSORS_GPIO_FAN)	+= gpio-fan.o
 obj-$(CONFIG_SENSORS_HIH6130)	+= hih6130.o
 obj-$(CONFIG_SENSORS_ULTRA45)	+= ultra45_env.o
diff --git a/drivers/hwmon/adt7411.c b/drivers/hwmon/adt7411.c
index c7010b91bc13..5a839cc2ed1c 100644
--- a/drivers/hwmon/adt7411.c
+++ b/drivers/hwmon/adt7411.c
@@ -716,7 +716,6 @@ static struct i2c_driver adt7411_driver = {
 
 module_i2c_driver(adt7411_driver);
 
-MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de> and "
-	"Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_AUTHOR("Sascha Hauer, Wolfram Sang <kernel@pengutronix.de>");
 MODULE_DESCRIPTION("ADT7411 driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
new file mode 100644
index 000000000000..e95b7426106e
--- /dev/null
+++ b/drivers/hwmon/amd_energy.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ */
+#include <asm/cpu_device_id.h>
+
+#include <linux/bits.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/hwmon.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/processor.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/topology.h>
+#include <linux/types.h>
+
+#define DRVNAME			"amd_energy"
+
+#define ENERGY_PWR_UNIT_MSR	0xC0010299
+#define ENERGY_CORE_MSR		0xC001029A
+#define ENERGY_PKG_MSR		0xC001029B
+
+#define AMD_ENERGY_UNIT_MASK	0x01F00
+#define AMD_ENERGY_MASK		0xFFFFFFFF
+
+struct sensor_accumulator {
+	u64 energy_ctr;
+	u64 prev_value;
+	char label[10];
+};
+
+struct amd_energy_data {
+	struct hwmon_channel_info energy_info;
+	const struct hwmon_channel_info *info[2];
+	struct hwmon_chip_info chip;
+	struct task_struct *wrap_accumulate;
+	/* Lock around the accumulator */
+	struct mutex lock;
+	/* An accumulator for each core and socket */
+	struct sensor_accumulator *accums;
+	/* Energy Status Units */
+	u64 energy_units;
+	int nr_cpus;
+	int nr_socks;
+	int core_id;
+};
+
+static int amd_energy_read_labels(struct device *dev,
+				  enum hwmon_sensor_types type,
+				  u32 attr, int channel,
+				  const char **str)
+{
+	struct amd_energy_data *data = dev_get_drvdata(dev);
+
+	*str = data->accums[channel].label;
+	return 0;
+}
+
+static void get_energy_units(struct amd_energy_data *data)
+{
+	u64 rapl_units;
+
+	rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
+	data->energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
+}
+
+static void accumulate_socket_delta(struct amd_energy_data *data,
+				    int sock, int cpu)
+{
+	struct sensor_accumulator *s_accum;
+	u64 input;
+
+	mutex_lock(&data->lock);
+	rdmsrl_safe_on_cpu(cpu, ENERGY_PKG_MSR, &input);
+	input &= AMD_ENERGY_MASK;
+
+	s_accum = &data->accums[data->nr_cpus + sock];
+	if (input >= s_accum->prev_value)
+		s_accum->energy_ctr +=
+			input - s_accum->prev_value;
+	else
+		s_accum->energy_ctr += UINT_MAX -
+			s_accum->prev_value + input;
+
+	s_accum->prev_value = input;
+	mutex_unlock(&data->lock);
+}
+
+static void accumulate_core_delta(struct amd_energy_data *data)
+{
+	struct sensor_accumulator *c_accum;
+	u64 input;
+	int cpu;
+
+	mutex_lock(&data->lock);
+	if (data->core_id >= data->nr_cpus)
+		data->core_id = 0;
+
+	cpu = data->core_id;
+
+	if (!cpu_online(cpu))
+		goto out;
+
+	rdmsrl_safe_on_cpu(cpu, ENERGY_CORE_MSR, &input);
+	input &= AMD_ENERGY_MASK;
+
+	c_accum = &data->accums[cpu];
+
+	if (input >= c_accum->prev_value)
+		c_accum->energy_ctr +=
+			input - c_accum->prev_value;
+	else
+		c_accum->energy_ctr += UINT_MAX -
+			c_accum->prev_value + input;
+
+	c_accum->prev_value = input;
+
+out:
+	data->core_id++;
+	mutex_unlock(&data->lock);
+}
+
+static void read_accumulate(struct amd_energy_data *data)
+{
+	int sock;
+
+	for (sock = 0; sock < data->nr_socks; sock++) {
+		int cpu;
+
+		cpu = cpumask_first_and(cpu_online_mask,
+					cpumask_of_node(sock));
+
+		accumulate_socket_delta(data, sock, cpu);
+	}
+
+	accumulate_core_delta(data);
+}
+
+static void amd_add_delta(struct amd_energy_data *data, int ch,
+			  int cpu, long *val, bool is_core)
+{
+	struct sensor_accumulator *s_accum, *c_accum;
+	u64 input;
+
+	mutex_lock(&data->lock);
+	if (!is_core) {
+		rdmsrl_safe_on_cpu(cpu, ENERGY_PKG_MSR, &input);
+		input &= AMD_ENERGY_MASK;
+
+		s_accum = &data->accums[ch];
+		if (input >= s_accum->prev_value)
+			input += s_accum->energy_ctr -
+				  s_accum->prev_value;
+		else
+			input += UINT_MAX - s_accum->prev_value +
+				  s_accum->energy_ctr;
+	} else {
+		rdmsrl_safe_on_cpu(cpu, ENERGY_CORE_MSR, &input);
+		input &= AMD_ENERGY_MASK;
+
+		c_accum = &data->accums[ch];
+		if (input >= c_accum->prev_value)
+			input += c_accum->energy_ctr -
+				 c_accum->prev_value;
+		else
+			input += UINT_MAX - c_accum->prev_value +
+				 c_accum->energy_ctr;
+	}
+
+	/* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) μJoules */
+	*val = div64_ul(input * 1000000UL, BIT(data->energy_units));
+
+	mutex_unlock(&data->lock);
+}
+
+static int amd_energy_read(struct device *dev,
+			   enum hwmon_sensor_types type,
+			   u32 attr, int channel, long *val)
+{
+	struct amd_energy_data *data = dev_get_drvdata(dev);
+	int cpu;
+
+	if (channel >= data->nr_cpus) {
+		cpu = cpumask_first_and(cpu_online_mask,
+					cpumask_of_node
+					(channel - data->nr_cpus));
+		amd_add_delta(data, channel, cpu, val, false);
+	} else {
+		cpu = channel;
+		if (!cpu_online(cpu))
+			return -ENODEV;
+
+		amd_add_delta(data, channel, cpu, val, true);
+	}
+
+	return 0;
+}
+
+static umode_t amd_energy_is_visible(const void *_data,
+				     enum hwmon_sensor_types type,
+				     u32 attr, int channel)
+{
+	return 0444;
+}
+
+static int energy_accumulator(void *p)
+{
+	struct amd_energy_data *data = (struct amd_energy_data *)p;
+
+	while (!kthread_should_stop()) {
+		/*
+		 * Ignoring the conditions such as
+		 * cpu being offline or rdmsr failure
+		 */
+		read_accumulate(data);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (kthread_should_stop())
+			break;
+
+		/*
+		 * On a 240W system, with default resolution the
+		 * Socket Energy status register may wrap around in
+		 * 2^32*15.3 e-6/240 = 273.8041 secs (~4.5 mins)
+		 *
+		 * let us accumulate for every 100secs
+		 */
+		schedule_timeout(msecs_to_jiffies(100000));
+	}
+	return 0;
+}
+
+static const struct hwmon_ops amd_energy_ops = {
+	.is_visible = amd_energy_is_visible,
+	.read = amd_energy_read,
+	.read_string = amd_energy_read_labels,
+};
+
+static int amd_create_sensor(struct device *dev,
+			     struct amd_energy_data *data,
+			     u8 type, u32 config)
+{
+	struct hwmon_channel_info *info = &data->energy_info;
+	struct sensor_accumulator *accums;
+	int i, num_siblings, cpus, sockets;
+	u32 *s_config;
+
+	/* Identify the number of siblings per core */
+	num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
+
+	sockets = num_possible_nodes();
+
+	/*
+	 * Energy counter register is accessed at core level.
+	 * Hence, filterout the siblings.
+	 */
+	cpus = num_present_cpus() / num_siblings;
+
+	s_config = devm_kcalloc(dev, cpus + sockets,
+				sizeof(u32), GFP_KERNEL);
+	if (!s_config)
+		return -ENOMEM;
+
+	accums = devm_kcalloc(dev, cpus + sockets,
+			      sizeof(struct sensor_accumulator),
+			      GFP_KERNEL);
+	if (!accums)
+		return -ENOMEM;
+
+	info->type = type;
+	info->config = s_config;
+
+	data->nr_cpus = cpus;
+	data->nr_socks = sockets;
+	data->accums = accums;
+
+	for (i = 0; i < cpus + sockets; i++) {
+		s_config[i] = config;
+		if (i < cpus)
+			scnprintf(accums[i].label, 10,
+				  "Ecore%03u", i);
+		else
+			scnprintf(accums[i].label, 10,
+				  "Esocket%u", (i - cpus));
+	}
+
+	return 0;
+}
+
+static int amd_energy_probe(struct platform_device *pdev)
+{
+	struct device *hwmon_dev;
+	struct amd_energy_data *data;
+	struct device *dev = &pdev->dev;
+
+	data = devm_kzalloc(dev,
+			    sizeof(struct amd_energy_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->chip.ops = &amd_energy_ops;
+	data->chip.info = data->info;
+
+	dev_set_drvdata(dev, data);
+	/* Populate per-core energy reporting */
+	data->info[0] = &data->energy_info;
+	amd_create_sensor(dev, data, hwmon_energy,
+			  HWMON_E_INPUT | HWMON_E_LABEL);
+
+	mutex_init(&data->lock);
+	get_energy_units(data);
+
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
+							 data,
+							 &data->chip,
+							 NULL);
+	if (IS_ERR(hwmon_dev))
+		return PTR_ERR(hwmon_dev);
+
+	data->wrap_accumulate = kthread_run(energy_accumulator, data,
+					    "%s", dev_name(hwmon_dev));
+	if (IS_ERR(data->wrap_accumulate))
+		return PTR_ERR(data->wrap_accumulate);
+
+	return PTR_ERR_OR_ZERO(data->wrap_accumulate);
+}
+
+static int amd_energy_remove(struct platform_device *pdev)
+{
+	struct amd_energy_data *data = dev_get_drvdata(&pdev->dev);
+
+	if (data && data->wrap_accumulate)
+		kthread_stop(data->wrap_accumulate);
+
+	return 0;
+}
+
+static const struct platform_device_id amd_energy_ids[] = {
+	{ .name = DRVNAME, },
+	{}
+};
+MODULE_DEVICE_TABLE(platform, amd_energy_ids);
+
+static struct platform_driver amd_energy_driver = {
+	.probe = amd_energy_probe,
+	.remove	= amd_energy_remove,
+	.id_table = amd_energy_ids,
+	.driver = {
+		.name = DRVNAME,
+	},
+};
+
+static struct platform_device *amd_energy_platdev;
+
+static const struct x86_cpu_id cpu_ids[] __initconst = {
+	X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
+
+static int __init amd_energy_init(void)
+{
+	int ret;
+
+	if (!x86_match_cpu(cpu_ids))
+		return -ENODEV;
+
+	ret = platform_driver_register(&amd_energy_driver);
+	if (ret)
+		return ret;
+
+	amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
+	if (!amd_energy_platdev) {
+		platform_driver_unregister(&amd_energy_driver);
+		return -ENOMEM;
+	}
+
+	ret = platform_device_add(amd_energy_platdev);
+	if (ret) {
+		platform_device_put(amd_energy_platdev);
+		platform_driver_unregister(&amd_energy_driver);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit amd_energy_exit(void)
+{
+	platform_device_unregister(amd_energy_platdev);
+	platform_driver_unregister(&amd_energy_driver);
+}
+
+module_init(amd_energy_init);
+module_exit(amd_energy_exit);
+
+MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
+MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index ec93b8d673f5..316618409315 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -156,14 +156,19 @@ static struct workqueue_struct *applesmc_led_wq;
  */
 static int wait_read(void)
 {
+	unsigned long end = jiffies + (APPLESMC_MAX_WAIT * HZ) / USEC_PER_SEC;
 	u8 status;
 	int us;
+
 	for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
-		udelay(us);
+		usleep_range(us, us * 16);
 		status = inb(APPLESMC_CMD_PORT);
 		/* read: wait for smc to settle */
 		if (status & 0x01)
 			return 0;
+		/* timeout: give up */
+		if (time_after(jiffies, end))
+			break;
 	}
 
 	pr_warn("wait_read() fail: 0x%02x\n", status);
@@ -178,10 +183,11 @@ static int send_byte(u8 cmd, u16 port)
 {
 	u8 status;
 	int us;
+	unsigned long end = jiffies + (APPLESMC_MAX_WAIT * HZ) / USEC_PER_SEC;
 
 	outb(cmd, port);
 	for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
-		udelay(us);
+		usleep_range(us, us * 16);
 		status = inb(APPLESMC_CMD_PORT);
 		/* write: wait for smc to settle */
 		if (status & 0x02)
@@ -190,7 +196,7 @@ static int send_byte(u8 cmd, u16 port)
 		if (status & 0x04)
 			return 0;
 		/* timeout: give up */
-		if (us << 1 == APPLESMC_MAX_WAIT)
+		if (time_after(jiffies, end))
 			break;
 		/* busy: long wait and resend */
 		udelay(APPLESMC_RETRY_WAIT);
diff --git a/drivers/hwmon/bt1-pvt.c b/drivers/hwmon/bt1-pvt.c
new file mode 100644
index 000000000000..1a9772fb1f73
--- /dev/null
+++ b/drivers/hwmon/bt1-pvt.c
@@ -0,0 +1,1146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+ *
+ * Authors:
+ *   Maxim Kaurkin <maxim.kaurkin@baikalelectronics.ru>
+ *   Serge Semin <Sergey.Semin@baikalelectronics.ru>
+ *
+ * Baikal-T1 Process, Voltage, Temperature sensor driver
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/hwmon.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/seqlock.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include "bt1-pvt.h"
+
+/*
+ * For the sake of the code simplification we created the sensors info table
+ * with the sensor names, activation modes, threshold registers base address
+ * and the thresholds bit fields.
+ */
+static const struct pvt_sensor_info pvt_info[] = {
+	PVT_SENSOR_INFO(0, "CPU Core Temperature", hwmon_temp, TEMP, TTHRES),
+	PVT_SENSOR_INFO(0, "CPU Core Voltage", hwmon_in, VOLT, VTHRES),
+	PVT_SENSOR_INFO(1, "CPU Core Low-Vt", hwmon_in, LVT, LTHRES),
+	PVT_SENSOR_INFO(2, "CPU Core High-Vt", hwmon_in, HVT, HTHRES),
+	PVT_SENSOR_INFO(3, "CPU Core Standard-Vt", hwmon_in, SVT, STHRES),
+};
+
+/*
+ * The original translation formulae of the temperature (in degrees of Celsius)
+ * to PVT data and vice-versa are following:
+ * N = 1.8322e-8*(T^4) + 2.343e-5*(T^3) + 8.7018e-3*(T^2) + 3.9269*(T^1) +
+ *     1.7204e2,
+ * T = -1.6743e-11*(N^4) + 8.1542e-8*(N^3) + -1.8201e-4*(N^2) +
+ *     3.1020e-1*(N^1) - 4.838e1,
+ * where T = [-48.380, 147.438]C and N = [0, 1023].
+ * They must be accordingly altered to be suitable for the integer arithmetics.
+ * The technique is called 'factor redistribution', which just makes sure the
+ * multiplications and divisions are made so to have a result of the operations
+ * within the integer numbers limit. In addition we need to translate the
+ * formulae to accept millidegrees of Celsius. Here what they look like after
+ * the alterations:
+ * N = (18322e-20*(T^4) + 2343e-13*(T^3) + 87018e-9*(T^2) + 39269e-3*T +
+ *     17204e2) / 1e4,
+ * T = -16743e-12*(D^4) + 81542e-9*(D^3) - 182010e-6*(D^2) + 310200e-3*D -
+ *     48380,
+ * where T = [-48380, 147438] mC and N = [0, 1023].
+ */
+static const struct pvt_poly poly_temp_to_N = {
+	.total_divider = 10000,
+	.terms = {
+		{4, 18322, 10000, 10000},
+		{3, 2343, 10000, 10},
+		{2, 87018, 10000, 10},
+		{1, 39269, 1000, 1},
+		{0, 1720400, 1, 1}
+	}
+};
+
+static const struct pvt_poly poly_N_to_temp = {
+	.total_divider = 1,
+	.terms = {
+		{4, -16743, 1000, 1},
+		{3, 81542, 1000, 1},
+		{2, -182010, 1000, 1},
+		{1, 310200, 1000, 1},
+		{0, -48380, 1, 1}
+	}
+};
+
+/*
+ * Similar alterations are performed for the voltage conversion equations.
+ * The original formulae are:
+ * N = 1.8658e3*V - 1.1572e3,
+ * V = (N + 1.1572e3) / 1.8658e3,
+ * where V = [0.620, 1.168] V and N = [0, 1023].
+ * After the optimization they looks as follows:
+ * N = (18658e-3*V - 11572) / 10,
+ * V = N * 10^5 / 18658 + 11572 * 10^4 / 18658.
+ */
+static const struct pvt_poly poly_volt_to_N = {
+	.total_divider = 10,
+	.terms = {
+		{1, 18658, 1000, 1},
+		{0, -11572, 1, 1}
+	}
+};
+
+static const struct pvt_poly poly_N_to_volt = {
+	.total_divider = 10,
+	.terms = {
+		{1, 100000, 18658, 1},
+		{0, 115720000, 1, 18658}
+	}
+};
+
+/*
+ * Here is the polynomial calculation function, which performs the
+ * redistributed terms calculations. It's pretty straightforward. We walk
+ * over each degree term up to the free one, and perform the redistributed
+ * multiplication of the term coefficient, its divider (as for the rationale
+ * fraction representation), data power and the rational fraction divider
+ * leftover. Then all of this is collected in a total sum variable, which
+ * value is normalized by the total divider before being returned.
+ */
+static long pvt_calc_poly(const struct pvt_poly *poly, long data)
+{
+	const struct pvt_poly_term *term = poly->terms;
+	long tmp, ret = 0;
+	int deg;
+
+	do {
+		tmp = term->coef;
+		for (deg = 0; deg < term->deg; ++deg)
+			tmp = mult_frac(tmp, data, term->divider);
+		ret += tmp / term->divider_leftover;
+	} while ((term++)->deg);
+
+	return ret / poly->total_divider;
+}
+
+static inline u32 pvt_update(void __iomem *reg, u32 mask, u32 data)
+{
+	u32 old;
+
+	old = readl_relaxed(reg);
+	writel((old & ~mask) | (data & mask), reg);
+
+	return old & mask;
+}
+
+/*
+ * Baikal-T1 PVT mode can be updated only when the controller is disabled.
+ * So first we disable it, then set the new mode together with the controller
+ * getting back enabled. The same concerns the temperature trim and
+ * measurements timeout. If it is necessary the interface mutex is supposed
+ * to be locked at the time the operations are performed.
+ */
+static inline void pvt_set_mode(struct pvt_hwmon *pvt, u32 mode)
+{
+	u32 old;
+
+	mode = FIELD_PREP(PVT_CTRL_MODE_MASK, mode);
+
+	old = pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_EN, 0);
+	pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_MODE_MASK | PVT_CTRL_EN,
+		   mode | old);
+}
+
+static inline u32 pvt_calc_trim(long temp)
+{
+	temp = clamp_val(temp, 0, PVT_TRIM_TEMP);
+
+	return DIV_ROUND_UP(temp, PVT_TRIM_STEP);
+}
+
+static inline void pvt_set_trim(struct pvt_hwmon *pvt, u32 trim)
+{
+	u32 old;
+
+	trim = FIELD_PREP(PVT_CTRL_TRIM_MASK, trim);
+
+	old = pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_EN, 0);
+	pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_TRIM_MASK | PVT_CTRL_EN,
+		   trim | old);
+}
+
+static inline void pvt_set_tout(struct pvt_hwmon *pvt, u32 tout)
+{
+	u32 old;
+
+	old = pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_EN, 0);
+	writel(tout, pvt->regs + PVT_TTIMEOUT);
+	pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_EN, old);
+}
+
+/*
+ * This driver can optionally provide the hwmon alarms for each sensor the PVT
+ * controller supports. The alarms functionality is made compile-time
+ * configurable due to the hardware interface implementation peculiarity
+ * described further in this comment. So in case if alarms are unnecessary in
+ * your system design it's recommended to have them disabled to prevent the PVT
+ * IRQs being periodically raised to get the data cache/alarms status up to
+ * date.
+ *
+ * Baikal-T1 PVT embedded controller is based on the Analog Bits PVT sensor,
+ * but is equipped with a dedicated control wrapper. It exposes the PVT
+ * sub-block registers space via the APB3 bus. In addition the wrapper provides
+ * a common interrupt vector of the sensors conversion completion events and
+ * threshold value alarms. Alas the wrapper interface hasn't been fully thought
+ * through. There is only one sensor can be activated at a time, for which the
+ * thresholds comparator is enabled right after the data conversion is
+ * completed. Due to this if alarms need to be implemented for all available
+ * sensors we can't just set the thresholds and enable the interrupts. We need
+ * to enable the sensors one after another and let the controller to detect
+ * the alarms by itself at each conversion. This also makes pointless to handle
+ * the alarms interrupts, since in occasion they happen synchronously with
+ * data conversion completion. The best driver design would be to have the
+ * completion interrupts enabled only and keep the converted value in the
+ * driver data cache. This solution is implemented if hwmon alarms are enabled
+ * in this driver. In case if the alarms are disabled, the conversion is
+ * performed on demand at the time a sensors input file is read.
+ */
+
+#if defined(CONFIG_SENSORS_BT1_PVT_ALARMS)
+
+#define pvt_hard_isr NULL
+
+static irqreturn_t pvt_soft_isr(int irq, void *data)
+{
+	const struct pvt_sensor_info *info;
+	struct pvt_hwmon *pvt = data;
+	struct pvt_cache *cache;
+	u32 val, thres_sts, old;
+
+	/*
+	 * DVALID bit will be cleared by reading the data. We need to save the
+	 * status before the next conversion happens. Threshold events will be
+	 * handled a bit later.
+	 */
+	thres_sts = readl(pvt->regs + PVT_RAW_INTR_STAT);
+
+	/*
+	 * Then lets recharge the PVT interface with the next sampling mode.
+	 * Lock the interface mutex to serialize trim, timeouts and alarm
+	 * thresholds settings.
+	 */
+	cache = &pvt->cache[pvt->sensor];
+	info = &pvt_info[pvt->sensor];
+	pvt->sensor = (pvt->sensor == PVT_SENSOR_LAST) ?
+		      PVT_SENSOR_FIRST : (pvt->sensor + 1);
+
+	/*
+	 * For some reason we have to mask the interrupt before changing the
+	 * mode, otherwise sometimes the temperature mode doesn't get
+	 * activated even though the actual mode in the ctrl register
+	 * corresponds to one. Then we read the data. By doing so we also
+	 * recharge the data conversion. After this the mode corresponding
+	 * to the next sensor in the row is set. Finally we enable the
+	 * interrupts back.
+	 */
+	mutex_lock(&pvt->iface_mtx);
+
+	old = pvt_update(pvt->regs + PVT_INTR_MASK, PVT_INTR_DVALID,
+			 PVT_INTR_DVALID);
+
+	val = readl(pvt->regs + PVT_DATA);
+
+	pvt_set_mode(pvt, pvt_info[pvt->sensor].mode);
+
+	pvt_update(pvt->regs + PVT_INTR_MASK, PVT_INTR_DVALID, old);
+
+	mutex_unlock(&pvt->iface_mtx);
+
+	/*
+	 * We can now update the data cache with data just retrieved from the
+	 * sensor. Lock write-seqlock to make sure the reader has a coherent
+	 * data.
+	 */
+	write_seqlock(&cache->data_seqlock);
+
+	cache->data = FIELD_GET(PVT_DATA_DATA_MASK, val);
+
+	write_sequnlock(&cache->data_seqlock);
+
+	/*
+	 * While PVT core is doing the next mode data conversion, we'll check
+	 * whether the alarms were triggered for the current sensor. Note that
+	 * according to the documentation only one threshold IRQ status can be
+	 * set at a time, that's why if-else statement is utilized.
+	 */
+	if ((thres_sts & info->thres_sts_lo) ^ cache->thres_sts_lo) {
+		WRITE_ONCE(cache->thres_sts_lo, thres_sts & info->thres_sts_lo);
+		hwmon_notify_event(pvt->hwmon, info->type, info->attr_min_alarm,
+				   info->channel);
+	} else if ((thres_sts & info->thres_sts_hi) ^ cache->thres_sts_hi) {
+		WRITE_ONCE(cache->thres_sts_hi, thres_sts & info->thres_sts_hi);
+		hwmon_notify_event(pvt->hwmon, info->type, info->attr_max_alarm,
+				   info->channel);
+	}
+
+	return IRQ_HANDLED;
+}
+
+inline umode_t pvt_limit_is_visible(enum pvt_sensor_type type)
+{
+	return 0644;
+}
+
+inline umode_t pvt_alarm_is_visible(enum pvt_sensor_type type)
+{
+	return 0444;
+}
+
+static int pvt_read_data(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
+			 long *val)
+{
+	struct pvt_cache *cache = &pvt->cache[type];
+	unsigned int seq;
+	u32 data;
+
+	do {
+		seq = read_seqbegin(&cache->data_seqlock);
+		data = cache->data;
+	} while (read_seqretry(&cache->data_seqlock, seq));
+
+	if (type == PVT_TEMP)
+		*val = pvt_calc_poly(&poly_N_to_temp, data);
+	else
+		*val = pvt_calc_poly(&poly_N_to_volt, data);
+
+	return 0;
+}
+
+static int pvt_read_limit(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
+			  bool is_low, long *val)
+{
+	u32 data;
+
+	/* No need in serialization, since it is just read from MMIO. */
+	data = readl(pvt->regs + pvt_info[type].thres_base);
+
+	if (is_low)
+		data = FIELD_GET(PVT_THRES_LO_MASK, data);
+	else
+		data = FIELD_GET(PVT_THRES_HI_MASK, data);
+
+	if (type == PVT_TEMP)
+		*val = pvt_calc_poly(&poly_N_to_temp, data);
+	else
+		*val = pvt_calc_poly(&poly_N_to_volt, data);
+
+	return 0;
+}
+
+static int pvt_write_limit(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
+			   bool is_low, long val)
+{
+	u32 data, limit, mask;
+	int ret;
+
+	if (type == PVT_TEMP) {
+		val = clamp(val, PVT_TEMP_MIN, PVT_TEMP_MAX);
+		data = pvt_calc_poly(&poly_temp_to_N, val);
+	} else {
+		val = clamp(val, PVT_VOLT_MIN, PVT_VOLT_MAX);
+		data = pvt_calc_poly(&poly_volt_to_N, val);
+	}
+
+	/* Serialize limit update, since a part of the register is changed. */
+	ret = mutex_lock_interruptible(&pvt->iface_mtx);
+	if (ret)
+		return ret;
+
+	/* Make sure the upper and lower ranges don't intersect. */
+	limit = readl(pvt->regs + pvt_info[type].thres_base);
+	if (is_low) {
+		limit = FIELD_GET(PVT_THRES_HI_MASK, limit);
+		data = clamp_val(data, PVT_DATA_MIN, limit);
+		data = FIELD_PREP(PVT_THRES_LO_MASK, data);
+		mask = PVT_THRES_LO_MASK;
+	} else {
+		limit = FIELD_GET(PVT_THRES_LO_MASK, limit);
+		data = clamp_val(data, limit, PVT_DATA_MAX);
+		data = FIELD_PREP(PVT_THRES_HI_MASK, data);
+		mask = PVT_THRES_HI_MASK;
+	}
+
+	pvt_update(pvt->regs + pvt_info[type].thres_base, mask, data);
+
+	mutex_unlock(&pvt->iface_mtx);
+
+	return 0;
+}
+
+static int pvt_read_alarm(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
+			  bool is_low, long *val)
+{
+	if (is_low)
+		*val = !!READ_ONCE(pvt->cache[type].thres_sts_lo);
+	else
+		*val = !!READ_ONCE(pvt->cache[type].thres_sts_hi);
+
+	return 0;
+}
+
+static const struct hwmon_channel_info *pvt_channel_info[] = {
+	HWMON_CHANNEL_INFO(chip,
+			   HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL),
+	HWMON_CHANNEL_INFO(temp,
+			   HWMON_T_INPUT | HWMON_T_TYPE | HWMON_T_LABEL |
+			   HWMON_T_MIN | HWMON_T_MIN_ALARM |
+			   HWMON_T_MAX | HWMON_T_MAX_ALARM |
+			   HWMON_T_OFFSET),
+	HWMON_CHANNEL_INFO(in,
+			   HWMON_I_INPUT | HWMON_I_LABEL |
+			   HWMON_I_MIN | HWMON_I_MIN_ALARM |
+			   HWMON_I_MAX | HWMON_I_MAX_ALARM,
+			   HWMON_I_INPUT | HWMON_I_LABEL |
+			   HWMON_I_MIN | HWMON_I_MIN_ALARM |
+			   HWMON_I_MAX | HWMON_I_MAX_ALARM,
+			   HWMON_I_INPUT | HWMON_I_LABEL |
+			   HWMON_I_MIN | HWMON_I_MIN_ALARM |
+			   HWMON_I_MAX | HWMON_I_MAX_ALARM,
+			   HWMON_I_INPUT | HWMON_I_LABEL |
+			   HWMON_I_MIN | HWMON_I_MIN_ALARM |
+			   HWMON_I_MAX | HWMON_I_MAX_ALARM),
+	NULL
+};
+
+#else /* !CONFIG_SENSORS_BT1_PVT_ALARMS */
+
+static irqreturn_t pvt_hard_isr(int irq, void *data)
+{
+	struct pvt_hwmon *pvt = data;
+	struct pvt_cache *cache;
+	u32 val;
+
+	/*
+	 * Mask the DVALID interrupt so after exiting from the handler a
+	 * repeated conversion wouldn't happen.
+	 */
+	pvt_update(pvt->regs + PVT_INTR_MASK, PVT_INTR_DVALID,
+		   PVT_INTR_DVALID);
+
+	/*
+	 * Nothing special for alarm-less driver. Just read the data, update
+	 * the cache and notify a waiter of this event.
+	 */
+	val = readl(pvt->regs + PVT_DATA);
+	if (!(val & PVT_DATA_VALID)) {
+		dev_err(pvt->dev, "Got IRQ when data isn't valid\n");
+		return IRQ_HANDLED;
+	}
+
+	cache = &pvt->cache[pvt->sensor];
+
+	WRITE_ONCE(cache->data, FIELD_GET(PVT_DATA_DATA_MASK, val));
+
+	complete(&cache->conversion);
+
+	return IRQ_HANDLED;
+}
+
+#define pvt_soft_isr NULL
+
+inline umode_t pvt_limit_is_visible(enum pvt_sensor_type type)
+{
+	return 0;
+}
+
+inline umode_t pvt_alarm_is_visible(enum pvt_sensor_type type)
+{
+	return 0;
+}
+
+static int pvt_read_data(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
+			 long *val)
+{
+	struct pvt_cache *cache = &pvt->cache[type];
+	u32 data;
+	int ret;
+
+	/*
+	 * Lock PVT conversion interface until data cache is updated. The
+	 * data read procedure is following: set the requested PVT sensor
+	 * mode, enable IRQ and conversion, wait until conversion is finished,
+	 * then disable conversion and IRQ, and read the cached data.
+	 */
+	ret = mutex_lock_interruptible(&pvt->iface_mtx);
+	if (ret)
+		return ret;
+
+	pvt->sensor = type;
+	pvt_set_mode(pvt, pvt_info[type].mode);
+
+	/*
+	 * Unmask the DVALID interrupt and enable the sensors conversions.
+	 * Do the reverse procedure when conversion is done.
+	 */
+	pvt_update(pvt->regs + PVT_INTR_MASK, PVT_INTR_DVALID, 0);
+	pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_EN, PVT_CTRL_EN);
+
+	wait_for_completion(&cache->conversion);
+
+	pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_EN, 0);
+	pvt_update(pvt->regs + PVT_INTR_MASK, PVT_INTR_DVALID,
+		   PVT_INTR_DVALID);
+
+	data = READ_ONCE(cache->data);
+
+	mutex_unlock(&pvt->iface_mtx);
+
+	if (type == PVT_TEMP)
+		*val = pvt_calc_poly(&poly_N_to_temp, data);
+	else
+		*val = pvt_calc_poly(&poly_N_to_volt, data);
+
+	return 0;
+}
+
+static int pvt_read_limit(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
+			  bool is_low, long *val)
+{
+	return -EOPNOTSUPP;
+}
+
+static int pvt_write_limit(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
+			   bool is_low, long val)
+{
+	return -EOPNOTSUPP;
+}
+
+static int pvt_read_alarm(struct pvt_hwmon *pvt, enum pvt_sensor_type type,
+			  bool is_low, long *val)
+{
+	return -EOPNOTSUPP;
+}
+
+static const struct hwmon_channel_info *pvt_channel_info[] = {
+	HWMON_CHANNEL_INFO(chip,
+			   HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL),
+	HWMON_CHANNEL_INFO(temp,
+			   HWMON_T_INPUT | HWMON_T_TYPE | HWMON_T_LABEL |
+			   HWMON_T_OFFSET),
+	HWMON_CHANNEL_INFO(in,
+			   HWMON_I_INPUT | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_LABEL),
+	NULL
+};
+
+#endif /* !CONFIG_SENSORS_BT1_PVT_ALARMS */
+
+static inline bool pvt_hwmon_channel_is_valid(enum hwmon_sensor_types type,
+					      int ch)
+{
+	switch (type) {
+	case hwmon_temp:
+		if (ch < 0 || ch >= PVT_TEMP_CHS)
+			return false;
+		break;
+	case hwmon_in:
+		if (ch < 0 || ch >= PVT_VOLT_CHS)
+			return false;
+		break;
+	default:
+		break;
+	}
+
+	/* The rest of the types are independent from the channel number. */
+	return true;
+}
+
+static umode_t pvt_hwmon_is_visible(const void *data,
+				    enum hwmon_sensor_types type,
+				    u32 attr, int ch)
+{
+	if (!pvt_hwmon_channel_is_valid(type, ch))
+		return 0;
+
+	switch (type) {
+	case hwmon_chip:
+		switch (attr) {
+		case hwmon_chip_update_interval:
+			return 0644;
+		}
+		break;
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+		case hwmon_temp_type:
+		case hwmon_temp_label:
+			return 0444;
+		case hwmon_temp_min:
+		case hwmon_temp_max:
+			return pvt_limit_is_visible(ch);
+		case hwmon_temp_min_alarm:
+		case hwmon_temp_max_alarm:
+			return pvt_alarm_is_visible(ch);
+		case hwmon_temp_offset:
+			return 0644;
+		}
+		break;
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_input:
+		case hwmon_in_label:
+			return 0444;
+		case hwmon_in_min:
+		case hwmon_in_max:
+			return pvt_limit_is_visible(PVT_VOLT + ch);
+		case hwmon_in_min_alarm:
+		case hwmon_in_max_alarm:
+			return pvt_alarm_is_visible(PVT_VOLT + ch);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int pvt_read_trim(struct pvt_hwmon *pvt, long *val)
+{
+	u32 data;
+
+	data = readl(pvt->regs + PVT_CTRL);
+	*val = FIELD_GET(PVT_CTRL_TRIM_MASK, data) * PVT_TRIM_STEP;
+
+	return 0;
+}
+
+static int pvt_write_trim(struct pvt_hwmon *pvt, long val)
+{
+	u32 trim;
+	int ret;
+
+	/*
+	 * Serialize trim update, since a part of the register is changed and
+	 * the controller is supposed to be disabled during this operation.
+	 */
+	ret = mutex_lock_interruptible(&pvt->iface_mtx);
+	if (ret)
+		return ret;
+
+	trim = pvt_calc_trim(val);
+	pvt_set_trim(pvt, trim);
+
+	mutex_unlock(&pvt->iface_mtx);
+
+	return 0;
+}
+
+static int pvt_read_timeout(struct pvt_hwmon *pvt, long *val)
+{
+	unsigned long rate;
+	ktime_t kt;
+	u32 data;
+
+	rate = clk_get_rate(pvt->clks[PVT_CLOCK_REF].clk);
+	if (!rate)
+		return -ENODEV;
+
+	/*
+	 * Don't bother with mutex here, since we just read data from MMIO.
+	 * We also have to scale the ticks timeout up to compensate the
+	 * ms-ns-data translations.
+	 */
+	data = readl(pvt->regs + PVT_TTIMEOUT) + 1;
+
+	/*
+	 * Calculate ref-clock based delay (Ttotal) between two consecutive
+	 * data samples of the same sensor. So we first must calculate the
+	 * delay introduced by the internal ref-clock timer (Tref * Fclk).
+	 * Then add the constant timeout cuased by each conversion latency
+	 * (Tmin). The basic formulae for each conversion is following:
+	 *   Ttotal = Tref * Fclk + Tmin
+	 * Note if alarms are enabled the sensors are polled one after
+	 * another, so in order to have the delay being applicable for each
+	 * sensor the requested value must be equally redistirbuted.
+	 */
+#if defined(CONFIG_SENSORS_BT1_PVT_ALARMS)
+	kt = ktime_set(PVT_SENSORS_NUM * (u64)data, 0);
+	kt = ktime_divns(kt, rate);
+	kt = ktime_add_ns(kt, PVT_SENSORS_NUM * PVT_TOUT_MIN);
+#else
+	kt = ktime_set(data, 0);
+	kt = ktime_divns(kt, rate);
+	kt = ktime_add_ns(kt, PVT_TOUT_MIN);
+#endif
+
+	/* Return the result in msec as hwmon sysfs interface requires. */
+	*val = ktime_to_ms(kt);
+
+	return 0;
+}
+
+static int pvt_write_timeout(struct pvt_hwmon *pvt, long val)
+{
+	unsigned long rate;
+	ktime_t kt;
+	u32 data;
+	int ret;
+
+	rate = clk_get_rate(pvt->clks[PVT_CLOCK_REF].clk);
+	if (!rate)
+		return -ENODEV;
+
+	/*
+	 * If alarms are enabled, the requested timeout must be divided
+	 * between all available sensors to have the requested delay
+	 * applicable to each individual sensor.
+	 */
+	kt = ms_to_ktime(val);
+#if defined(CONFIG_SENSORS_BT1_PVT_ALARMS)
+	kt = ktime_divns(kt, PVT_SENSORS_NUM);
+#endif
+
+	/*
+	 * Subtract a constant lag, which always persists due to the limited
+	 * PVT sampling rate. Make sure the timeout is not negative.
+	 */
+	kt = ktime_sub_ns(kt, PVT_TOUT_MIN);
+	if (ktime_to_ns(kt) < 0)
+		kt = ktime_set(0, 0);
+
+	/*
+	 * Finally recalculate the timeout in terms of the reference clock
+	 * period.
+	 */
+	data = ktime_divns(kt * rate, NSEC_PER_SEC);
+
+	/*
+	 * Update the measurements delay, but lock the interface first, since
+	 * we have to disable PVT in order to have the new delay actually
+	 * updated.
+	 */
+	ret = mutex_lock_interruptible(&pvt->iface_mtx);
+	if (ret)
+		return ret;
+
+	pvt_set_tout(pvt, data);
+
+	mutex_unlock(&pvt->iface_mtx);
+
+	return 0;
+}
+
+static int pvt_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			  u32 attr, int ch, long *val)
+{
+	struct pvt_hwmon *pvt = dev_get_drvdata(dev);
+
+	if (!pvt_hwmon_channel_is_valid(type, ch))
+		return -EINVAL;
+
+	switch (type) {
+	case hwmon_chip:
+		switch (attr) {
+		case hwmon_chip_update_interval:
+			return pvt_read_timeout(pvt, val);
+		}
+		break;
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+			return pvt_read_data(pvt, ch, val);
+		case hwmon_temp_type:
+			*val = 1;
+			return 0;
+		case hwmon_temp_min:
+			return pvt_read_limit(pvt, ch, true, val);
+		case hwmon_temp_max:
+			return pvt_read_limit(pvt, ch, false, val);
+		case hwmon_temp_min_alarm:
+			return pvt_read_alarm(pvt, ch, true, val);
+		case hwmon_temp_max_alarm:
+			return pvt_read_alarm(pvt, ch, false, val);
+		case hwmon_temp_offset:
+			return pvt_read_trim(pvt, val);
+		}
+		break;
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_input:
+			return pvt_read_data(pvt, PVT_VOLT + ch, val);
+		case hwmon_in_min:
+			return pvt_read_limit(pvt, PVT_VOLT + ch, true, val);
+		case hwmon_in_max:
+			return pvt_read_limit(pvt, PVT_VOLT + ch, false, val);
+		case hwmon_in_min_alarm:
+			return pvt_read_alarm(pvt, PVT_VOLT + ch, true, val);
+		case hwmon_in_max_alarm:
+			return pvt_read_alarm(pvt, PVT_VOLT + ch, false, val);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int pvt_hwmon_read_string(struct device *dev,
+				 enum hwmon_sensor_types type,
+				 u32 attr, int ch, const char **str)
+{
+	if (!pvt_hwmon_channel_is_valid(type, ch))
+		return -EINVAL;
+
+	switch (type) {
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_label:
+			*str = pvt_info[ch].label;
+			return 0;
+		}
+		break;
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_label:
+			*str = pvt_info[PVT_VOLT + ch].label;
+			return 0;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int pvt_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
+			   u32 attr, int ch, long val)
+{
+	struct pvt_hwmon *pvt = dev_get_drvdata(dev);
+
+	if (!pvt_hwmon_channel_is_valid(type, ch))
+		return -EINVAL;
+
+	switch (type) {
+	case hwmon_chip:
+		switch (attr) {
+		case hwmon_chip_update_interval:
+			return pvt_write_timeout(pvt, val);
+		}
+		break;
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_min:
+			return pvt_write_limit(pvt, ch, true, val);
+		case hwmon_temp_max:
+			return pvt_write_limit(pvt, ch, false, val);
+		case hwmon_temp_offset:
+			return pvt_write_trim(pvt, val);
+		}
+		break;
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_min:
+			return pvt_write_limit(pvt, PVT_VOLT + ch, true, val);
+		case hwmon_in_max:
+			return pvt_write_limit(pvt, PVT_VOLT + ch, false, val);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static const struct hwmon_ops pvt_hwmon_ops = {
+	.is_visible = pvt_hwmon_is_visible,
+	.read = pvt_hwmon_read,
+	.read_string = pvt_hwmon_read_string,
+	.write = pvt_hwmon_write
+};
+
+static const struct hwmon_chip_info pvt_hwmon_info = {
+	.ops = &pvt_hwmon_ops,
+	.info = pvt_channel_info
+};
+
+static void pvt_clear_data(void *data)
+{
+	struct pvt_hwmon *pvt = data;
+#if !defined(CONFIG_SENSORS_BT1_PVT_ALARMS)
+	int idx;
+
+	for (idx = 0; idx < PVT_SENSORS_NUM; ++idx)
+		complete_all(&pvt->cache[idx].conversion);
+#endif
+
+	mutex_destroy(&pvt->iface_mtx);
+}
+
+static struct pvt_hwmon *pvt_create_data(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct pvt_hwmon *pvt;
+	int ret, idx;
+
+	pvt = devm_kzalloc(dev, sizeof(*pvt), GFP_KERNEL);
+	if (!pvt)
+		return ERR_PTR(-ENOMEM);
+
+	ret = devm_add_action(dev, pvt_clear_data, pvt);
+	if (ret) {
+		dev_err(dev, "Can't add PVT data clear action\n");
+		return ERR_PTR(ret);
+	}
+
+	pvt->dev = dev;
+	pvt->sensor = PVT_SENSOR_FIRST;
+	mutex_init(&pvt->iface_mtx);
+
+#if defined(CONFIG_SENSORS_BT1_PVT_ALARMS)
+	for (idx = 0; idx < PVT_SENSORS_NUM; ++idx)
+		seqlock_init(&pvt->cache[idx].data_seqlock);
+#else
+	for (idx = 0; idx < PVT_SENSORS_NUM; ++idx)
+		init_completion(&pvt->cache[idx].conversion);
+#endif
+
+	return pvt;
+}
+
+static int pvt_request_regs(struct pvt_hwmon *pvt)
+{
+	struct platform_device *pdev = to_platform_device(pvt->dev);
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(pvt->dev, "Couldn't find PVT memresource\n");
+		return -EINVAL;
+	}
+
+	pvt->regs = devm_ioremap_resource(pvt->dev, res);
+	if (IS_ERR(pvt->regs)) {
+		dev_err(pvt->dev, "Couldn't map PVT registers\n");
+		return PTR_ERR(pvt->regs);
+	}
+
+	return 0;
+}
+
+static void pvt_disable_clks(void *data)
+{
+	struct pvt_hwmon *pvt = data;
+
+	clk_bulk_disable_unprepare(PVT_CLOCK_NUM, pvt->clks);
+}
+
+static int pvt_request_clks(struct pvt_hwmon *pvt)
+{
+	int ret;
+
+	pvt->clks[PVT_CLOCK_APB].id = "pclk";
+	pvt->clks[PVT_CLOCK_REF].id = "ref";
+
+	ret = devm_clk_bulk_get(pvt->dev, PVT_CLOCK_NUM, pvt->clks);
+	if (ret) {
+		dev_err(pvt->dev, "Couldn't get PVT clocks descriptors\n");
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(PVT_CLOCK_NUM, pvt->clks);
+	if (ret) {
+		dev_err(pvt->dev, "Couldn't enable the PVT clocks\n");
+		return ret;
+	}
+
+	ret = devm_add_action_or_reset(pvt->dev, pvt_disable_clks, pvt);
+	if (ret) {
+		dev_err(pvt->dev, "Can't add PVT clocks disable action\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void pvt_init_iface(struct pvt_hwmon *pvt)
+{
+	u32 trim, temp;
+
+	/*
+	 * Make sure all interrupts and controller are disabled so not to
+	 * accidentally have ISR executed before the driver data is fully
+	 * initialized. Clear the IRQ status as well.
+	 */
+	pvt_update(pvt->regs + PVT_INTR_MASK, PVT_INTR_ALL, PVT_INTR_ALL);
+	pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_EN, 0);
+	readl(pvt->regs + PVT_CLR_INTR);
+	readl(pvt->regs + PVT_DATA);
+
+	/* Setup default sensor mode, timeout and temperature trim. */
+	pvt_set_mode(pvt, pvt_info[pvt->sensor].mode);
+	pvt_set_tout(pvt, PVT_TOUT_DEF);
+
+	trim = PVT_TRIM_DEF;
+	if (!of_property_read_u32(pvt->dev->of_node,
+	     "baikal,pvt-temp-offset-millicelsius", &temp))
+		trim = pvt_calc_trim(temp);
+
+	pvt_set_trim(pvt, trim);
+}
+
+static int pvt_request_irq(struct pvt_hwmon *pvt)
+{
+	struct platform_device *pdev = to_platform_device(pvt->dev);
+	int ret;
+
+	pvt->irq = platform_get_irq(pdev, 0);
+	if (pvt->irq < 0)
+		return pvt->irq;
+
+	ret = devm_request_threaded_irq(pvt->dev, pvt->irq,
+					pvt_hard_isr, pvt_soft_isr,
+#if defined(CONFIG_SENSORS_BT1_PVT_ALARMS)
+					IRQF_SHARED | IRQF_TRIGGER_HIGH |
+					IRQF_ONESHOT,
+#else
+					IRQF_SHARED | IRQF_TRIGGER_HIGH,
+#endif
+					"pvt", pvt);
+	if (ret) {
+		dev_err(pvt->dev, "Couldn't request PVT IRQ\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int pvt_create_hwmon(struct pvt_hwmon *pvt)
+{
+	pvt->hwmon = devm_hwmon_device_register_with_info(pvt->dev, "pvt", pvt,
+		&pvt_hwmon_info, NULL);
+	if (IS_ERR(pvt->hwmon)) {
+		dev_err(pvt->dev, "Couldn't create hwmon device\n");
+		return PTR_ERR(pvt->hwmon);
+	}
+
+	return 0;
+}
+
+#if defined(CONFIG_SENSORS_BT1_PVT_ALARMS)
+
+static void pvt_disable_iface(void *data)
+{
+	struct pvt_hwmon *pvt = data;
+
+	mutex_lock(&pvt->iface_mtx);
+	pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_EN, 0);
+	pvt_update(pvt->regs + PVT_INTR_MASK, PVT_INTR_DVALID,
+		   PVT_INTR_DVALID);
+	mutex_unlock(&pvt->iface_mtx);
+}
+
+static int pvt_enable_iface(struct pvt_hwmon *pvt)
+{
+	int ret;
+
+	ret = devm_add_action(pvt->dev, pvt_disable_iface, pvt);
+	if (ret) {
+		dev_err(pvt->dev, "Can't add PVT disable interface action\n");
+		return ret;
+	}
+
+	/*
+	 * Enable sensors data conversion and IRQ. We need to lock the
+	 * interface mutex since hwmon has just been created and the
+	 * corresponding sysfs files are accessible from user-space,
+	 * which theoretically may cause races.
+	 */
+	mutex_lock(&pvt->iface_mtx);
+	pvt_update(pvt->regs + PVT_INTR_MASK, PVT_INTR_DVALID, 0);
+	pvt_update(pvt->regs + PVT_CTRL, PVT_CTRL_EN, PVT_CTRL_EN);
+	mutex_unlock(&pvt->iface_mtx);
+
+	return 0;
+}
+
+#else /* !CONFIG_SENSORS_BT1_PVT_ALARMS */
+
+static int pvt_enable_iface(struct pvt_hwmon *pvt)
+{
+	return 0;
+}
+
+#endif /* !CONFIG_SENSORS_BT1_PVT_ALARMS */
+
+static int pvt_probe(struct platform_device *pdev)
+{
+	struct pvt_hwmon *pvt;
+	int ret;
+
+	pvt = pvt_create_data(pdev);
+	if (IS_ERR(pvt))
+		return PTR_ERR(pvt);
+
+	ret = pvt_request_regs(pvt);
+	if (ret)
+		return ret;
+
+	ret = pvt_request_clks(pvt);
+	if (ret)
+		return ret;
+
+	pvt_init_iface(pvt);
+
+	ret = pvt_request_irq(pvt);
+	if (ret)
+		return ret;
+
+	ret = pvt_create_hwmon(pvt);
+	if (ret)
+		return ret;
+
+	ret = pvt_enable_iface(pvt);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static const struct of_device_id pvt_of_match[] = {
+	{ .compatible = "baikal,bt1-pvt" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, pvt_of_match);
+
+static struct platform_driver pvt_driver = {
+	.probe = pvt_probe,
+	.driver = {
+		.name = "bt1-pvt",
+		.of_match_table = pvt_of_match
+	}
+};
+module_platform_driver(pvt_driver);
+
+MODULE_AUTHOR("Maxim Kaurkin <maxim.kaurkin@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal-T1 PVT driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwmon/bt1-pvt.h b/drivers/hwmon/bt1-pvt.h
new file mode 100644
index 000000000000..5eac73e94885
--- /dev/null
+++ b/drivers/hwmon/bt1-pvt.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+ *
+ * Baikal-T1 Process, Voltage, Temperature sensor driver
+ */
+#ifndef __HWMON_BT1_PVT_H__
+#define __HWMON_BT1_PVT_H__
+
+#include <linux/completion.h>
+#include <linux/hwmon.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/seqlock.h>
+
+/* Baikal-T1 PVT registers and their bitfields */
+#define PVT_CTRL			0x00
+#define PVT_CTRL_EN			BIT(0)
+#define PVT_CTRL_MODE_FLD		1
+#define PVT_CTRL_MODE_MASK		GENMASK(3, PVT_CTRL_MODE_FLD)
+#define PVT_CTRL_MODE_TEMP		0x0
+#define PVT_CTRL_MODE_VOLT		0x1
+#define PVT_CTRL_MODE_LVT		0x2
+#define PVT_CTRL_MODE_HVT		0x4
+#define PVT_CTRL_MODE_SVT		0x6
+#define PVT_CTRL_TRIM_FLD		4
+#define PVT_CTRL_TRIM_MASK		GENMASK(8, PVT_CTRL_TRIM_FLD)
+#define PVT_DATA			0x04
+#define PVT_DATA_VALID			BIT(10)
+#define PVT_DATA_DATA_FLD		0
+#define PVT_DATA_DATA_MASK		GENMASK(9, PVT_DATA_DATA_FLD)
+#define PVT_TTHRES			0x08
+#define PVT_VTHRES			0x0C
+#define PVT_LTHRES			0x10
+#define PVT_HTHRES			0x14
+#define PVT_STHRES			0x18
+#define PVT_THRES_LO_FLD		0
+#define PVT_THRES_LO_MASK		GENMASK(9, PVT_THRES_LO_FLD)
+#define PVT_THRES_HI_FLD		10
+#define PVT_THRES_HI_MASK		GENMASK(19, PVT_THRES_HI_FLD)
+#define PVT_TTIMEOUT			0x1C
+#define PVT_INTR_STAT			0x20
+#define PVT_INTR_MASK			0x24
+#define PVT_RAW_INTR_STAT		0x28
+#define PVT_INTR_DVALID			BIT(0)
+#define PVT_INTR_TTHRES_LO		BIT(1)
+#define PVT_INTR_TTHRES_HI		BIT(2)
+#define PVT_INTR_VTHRES_LO		BIT(3)
+#define PVT_INTR_VTHRES_HI		BIT(4)
+#define PVT_INTR_LTHRES_LO		BIT(5)
+#define PVT_INTR_LTHRES_HI		BIT(6)
+#define PVT_INTR_HTHRES_LO		BIT(7)
+#define PVT_INTR_HTHRES_HI		BIT(8)
+#define PVT_INTR_STHRES_LO		BIT(9)
+#define PVT_INTR_STHRES_HI		BIT(10)
+#define PVT_INTR_ALL			GENMASK(10, 0)
+#define PVT_CLR_INTR			0x2C
+
+/*
+ * PVT sensors-related limits and default values
+ * @PVT_TEMP_MIN: Minimal temperature in millidegrees of Celsius.
+ * @PVT_TEMP_MAX: Maximal temperature in millidegrees of Celsius.
+ * @PVT_TEMP_CHS: Number of temperature hwmon channels.
+ * @PVT_VOLT_MIN: Minimal voltage in mV.
+ * @PVT_VOLT_MAX: Maximal voltage in mV.
+ * @PVT_VOLT_CHS: Number of voltage hwmon channels.
+ * @PVT_DATA_MIN: Minimal PVT raw data value.
+ * @PVT_DATA_MAX: Maximal PVT raw data value.
+ * @PVT_TRIM_MIN: Minimal temperature sensor trim value.
+ * @PVT_TRIM_MAX: Maximal temperature sensor trim value.
+ * @PVT_TRIM_DEF: Default temperature sensor trim value (set a proper value
+ *		  when one is determined for Baikal-T1 SoC).
+ * @PVT_TRIM_TEMP: Maximum temperature encoded by the trim factor.
+ * @PVT_TRIM_STEP: Temperature stride corresponding to the trim value.
+ * @PVT_TOUT_MIN: Minimal timeout between samples in nanoseconds.
+ * @PVT_TOUT_DEF: Default data measurements timeout. In case if alarms are
+ *		  activated the PVT IRQ is enabled to be raised after each
+ *		  conversion in order to have the thresholds checked and the
+ *		  converted value cached. Too frequent conversions may cause
+ *		  the system CPU overload. Lets set the 50ms delay between
+ *		  them by default to prevent this.
+ */
+#define PVT_TEMP_MIN		-48380L
+#define PVT_TEMP_MAX		147438L
+#define PVT_TEMP_CHS		1
+#define PVT_VOLT_MIN		620L
+#define PVT_VOLT_MAX		1168L
+#define PVT_VOLT_CHS		4
+#define PVT_DATA_MIN		0
+#define PVT_DATA_MAX		(PVT_DATA_DATA_MASK >> PVT_DATA_DATA_FLD)
+#define PVT_TRIM_MIN		0
+#define PVT_TRIM_MAX		(PVT_CTRL_TRIM_MASK >> PVT_CTRL_TRIM_FLD)
+#define PVT_TRIM_TEMP		7130
+#define PVT_TRIM_STEP		(PVT_TRIM_TEMP / PVT_TRIM_MAX)
+#define PVT_TRIM_DEF		0
+#define PVT_TOUT_MIN		(NSEC_PER_SEC / 3000)
+#if defined(CONFIG_SENSORS_BT1_PVT_ALARMS)
+# define PVT_TOUT_DEF		60000
+#else
+# define PVT_TOUT_DEF		0
+#endif
+
+/*
+ * enum pvt_sensor_type - Baikal-T1 PVT sensor types (correspond to each PVT
+ *			  sampling mode)
+ * @PVT_SENSOR*: helpers to traverse the sensors in loops.
+ * @PVT_TEMP: PVT Temperature sensor.
+ * @PVT_VOLT: PVT Voltage sensor.
+ * @PVT_LVT: PVT Low-Voltage threshold sensor.
+ * @PVT_HVT: PVT High-Voltage threshold sensor.
+ * @PVT_SVT: PVT Standard-Voltage threshold sensor.
+ */
+enum pvt_sensor_type {
+	PVT_SENSOR_FIRST,
+	PVT_TEMP = PVT_SENSOR_FIRST,
+	PVT_VOLT,
+	PVT_LVT,
+	PVT_HVT,
+	PVT_SVT,
+	PVT_SENSOR_LAST = PVT_SVT,
+	PVT_SENSORS_NUM
+};
+
+/*
+ * enum pvt_clock_type - Baikal-T1 PVT clocks.
+ * @PVT_CLOCK_APB: APB clock.
+ * @PVT_CLOCK_REF: PVT reference clock.
+ */
+enum pvt_clock_type {
+	PVT_CLOCK_APB,
+	PVT_CLOCK_REF,
+	PVT_CLOCK_NUM
+};
+
+/*
+ * struct pvt_sensor_info - Baikal-T1 PVT sensor informational structure
+ * @channel: Sensor channel ID.
+ * @label: hwmon sensor label.
+ * @mode: PVT mode corresponding to the channel.
+ * @thres_base: upper and lower threshold values of the sensor.
+ * @thres_sts_lo: low threshold status bitfield.
+ * @thres_sts_hi: high threshold status bitfield.
+ * @type: Sensor type.
+ * @attr_min_alarm: Min alarm attribute ID.
+ * @attr_min_alarm: Max alarm attribute ID.
+ */
+struct pvt_sensor_info {
+	int channel;
+	const char *label;
+	u32 mode;
+	unsigned long thres_base;
+	u32 thres_sts_lo;
+	u32 thres_sts_hi;
+	enum hwmon_sensor_types type;
+	u32 attr_min_alarm;
+	u32 attr_max_alarm;
+};
+
+#define PVT_SENSOR_INFO(_ch, _label, _type, _mode, _thres)	\
+	{							\
+		.channel = _ch,					\
+		.label = _label,				\
+		.mode = PVT_CTRL_MODE_ ##_mode,			\
+		.thres_base = PVT_ ##_thres,			\
+		.thres_sts_lo = PVT_INTR_ ##_thres## _LO,	\
+		.thres_sts_hi = PVT_INTR_ ##_thres## _HI,	\
+		.type = _type,					\
+		.attr_min_alarm = _type## _min,			\
+		.attr_max_alarm = _type## _max,			\
+	}
+
+/*
+ * struct pvt_cache - PVT sensors data cache
+ * @data: data cache in raw format.
+ * @thres_sts_lo: low threshold status saved on the previous data conversion.
+ * @thres_sts_hi: high threshold status saved on the previous data conversion.
+ * @data_seqlock: cached data seq-lock.
+ * @conversion: data conversion completion.
+ */
+struct pvt_cache {
+	u32 data;
+#if defined(CONFIG_SENSORS_BT1_PVT_ALARMS)
+	seqlock_t data_seqlock;
+	u32 thres_sts_lo;
+	u32 thres_sts_hi;
+#else
+	struct completion conversion;
+#endif
+};
+
+/*
+ * struct pvt_hwmon - Baikal-T1 PVT private data
+ * @dev: device structure of the PVT platform device.
+ * @hwmon: hwmon device structure.
+ * @regs: pointer to the Baikal-T1 PVT registers region.
+ * @irq: PVT events IRQ number.
+ * @clks: Array of the PVT clocks descriptor (APB/ref clocks).
+ * @ref_clk: Pointer to the reference clocks descriptor.
+ * @iface_mtx: Generic interface mutex (used to lock the alarm registers
+ *	       when the alarms enabled, or the data conversion interface
+ *	       if alarms are disabled).
+ * @sensor: current PVT sensor the data conversion is being performed for.
+ * @cache: data cache descriptor.
+ */
+struct pvt_hwmon {
+	struct device *dev;
+	struct device *hwmon;
+
+	void __iomem *regs;
+	int irq;
+
+	struct clk_bulk_data clks[PVT_CLOCK_NUM];
+
+	struct mutex iface_mtx;
+	enum pvt_sensor_type sensor;
+	struct pvt_cache cache[PVT_SENSORS_NUM];
+};
+
+/*
+ * struct pvt_poly_term - a term descriptor of the PVT data translation
+ *			  polynomial
+ * @deg: degree of the term.
+ * @coef: multiplication factor of the term.
+ * @divider: distributed divider per each degree.
+ * @divider_leftover: divider leftover, which couldn't be redistributed.
+ */
+struct pvt_poly_term {
+	unsigned int deg;
+	long coef;
+	long divider;
+	long divider_leftover;
+};
+
+/*
+ * struct pvt_poly - PVT data translation polynomial descriptor
+ * @total_divider: total data divider.
+ * @terms: polynomial terms up to a free one.
+ */
+struct pvt_poly {
+	long total_divider;
+	struct pvt_poly_term terms[];
+};
+
+#endif /* __HWMON_BT1_PVT_H__ */
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index ab719d372b0d..16be012a95ed 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -1073,13 +1073,6 @@ static const struct dmi_system_id i8k_dmi_table[] __initconst = {
 		},
 	},
 	{
-		.ident = "Dell XPS421",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "XPS L421X"),
-		},
-	},
-	{
 		.ident = "Dell Studio",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
@@ -1088,14 +1081,6 @@ static const struct dmi_system_id i8k_dmi_table[] __initconst = {
 		.driver_data = (void *)&i8k_config_data[DELL_STUDIO],
 	},
 	{
-		.ident = "Dell XPS 13",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "XPS13"),
-		},
-		.driver_data = (void *)&i8k_config_data[DELL_XPS],
-	},
-	{
 		.ident = "Dell XPS M140",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
@@ -1104,17 +1089,10 @@ static const struct dmi_system_id i8k_dmi_table[] __initconst = {
 		.driver_data = (void *)&i8k_config_data[DELL_XPS],
 	},
 	{
-		.ident = "Dell XPS 15 9560",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "XPS 15 9560"),
-		},
-	},
-	{
-		.ident = "Dell XPS 15 9570",
+		.ident = "Dell XPS",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "XPS 15 9570"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "XPS"),
 		},
 	},
 	{ }
diff --git a/drivers/hwmon/gsc-hwmon.c b/drivers/hwmon/gsc-hwmon.c
new file mode 100644
index 000000000000..2137bc65829d
--- /dev/null
+++ b/drivers/hwmon/gsc-hwmon.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Gateworks System Controller Hardware Monitor module
+ *
+ * Copyright (C) 2020 Gateworks Corporation
+ */
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/mfd/gsc.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include <linux/platform_data/gsc_hwmon.h>
+
+#define GSC_HWMON_MAX_TEMP_CH	16
+#define GSC_HWMON_MAX_IN_CH	16
+
+#define GSC_HWMON_RESOLUTION	12
+#define GSC_HWMON_VREF		2500
+
+struct gsc_hwmon_data {
+	struct gsc_dev *gsc;
+	struct gsc_hwmon_platform_data *pdata;
+	struct regmap *regmap;
+	const struct gsc_hwmon_channel *temp_ch[GSC_HWMON_MAX_TEMP_CH];
+	const struct gsc_hwmon_channel *in_ch[GSC_HWMON_MAX_IN_CH];
+	u32 temp_config[GSC_HWMON_MAX_TEMP_CH + 1];
+	u32 in_config[GSC_HWMON_MAX_IN_CH + 1];
+	struct hwmon_channel_info temp_info;
+	struct hwmon_channel_info in_info;
+	const struct hwmon_channel_info *info[3];
+	struct hwmon_chip_info chip;
+};
+
+static struct regmap_bus gsc_hwmon_regmap_bus = {
+	.reg_read = gsc_read,
+	.reg_write = gsc_write,
+};
+
+static const struct regmap_config gsc_hwmon_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.cache_type = REGCACHE_NONE,
+};
+
+static ssize_t pwm_auto_point_temp_show(struct device *dev,
+					struct device_attribute *devattr,
+					char *buf)
+{
+	struct gsc_hwmon_data *hwmon = dev_get_drvdata(dev);
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	u8 reg = hwmon->pdata->fan_base + (2 * attr->index);
+	u8 regs[2];
+	int ret;
+
+	ret = regmap_bulk_read(hwmon->regmap, reg, regs, 2);
+	if (ret)
+		return ret;
+
+	ret = regs[0] | regs[1] << 8;
+	return sprintf(buf, "%d\n", ret * 10);
+}
+
+static ssize_t pwm_auto_point_temp_store(struct device *dev,
+					 struct device_attribute *devattr,
+					 const char *buf, size_t count)
+{
+	struct gsc_hwmon_data *hwmon = dev_get_drvdata(dev);
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	u8 reg = hwmon->pdata->fan_base + (2 * attr->index);
+	u8 regs[2];
+	long temp;
+	int err;
+
+	if (kstrtol(buf, 10, &temp))
+		return -EINVAL;
+
+	temp = clamp_val(temp, 0, 10000);
+	temp = DIV_ROUND_CLOSEST(temp, 10);
+
+	regs[0] = temp & 0xff;
+	regs[1] = (temp >> 8) & 0xff;
+	err = regmap_bulk_write(hwmon->regmap, reg, regs, 2);
+	if (err)
+		return err;
+
+	return count;
+}
+
+static ssize_t pwm_auto_point_pwm_show(struct device *dev,
+				       struct device_attribute *devattr,
+				       char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+
+	return sprintf(buf, "%d\n", 255 * (50 + (attr->index * 10)) / 100);
+}
+
+static SENSOR_DEVICE_ATTR_RO(pwm1_auto_point1_pwm, pwm_auto_point_pwm, 0);
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point1_temp, pwm_auto_point_temp, 0);
+
+static SENSOR_DEVICE_ATTR_RO(pwm1_auto_point2_pwm, pwm_auto_point_pwm, 1);
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point2_temp, pwm_auto_point_temp, 1);
+
+static SENSOR_DEVICE_ATTR_RO(pwm1_auto_point3_pwm, pwm_auto_point_pwm, 2);
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point3_temp, pwm_auto_point_temp, 2);
+
+static SENSOR_DEVICE_ATTR_RO(pwm1_auto_point4_pwm, pwm_auto_point_pwm, 3);
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point4_temp, pwm_auto_point_temp, 3);
+
+static SENSOR_DEVICE_ATTR_RO(pwm1_auto_point5_pwm, pwm_auto_point_pwm, 4);
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point5_temp, pwm_auto_point_temp, 4);
+
+static SENSOR_DEVICE_ATTR_RO(pwm1_auto_point6_pwm, pwm_auto_point_pwm, 5);
+static SENSOR_DEVICE_ATTR_RW(pwm1_auto_point6_temp, pwm_auto_point_temp, 5);
+
+static struct attribute *gsc_hwmon_attributes[] = {
+	&sensor_dev_attr_pwm1_auto_point1_pwm.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point1_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point2_pwm.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point2_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point3_pwm.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point3_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point4_pwm.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point4_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point5_pwm.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point5_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point6_pwm.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_point6_temp.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group gsc_hwmon_group = {
+	.attrs = gsc_hwmon_attributes,
+};
+__ATTRIBUTE_GROUPS(gsc_hwmon);
+
+static int
+gsc_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	       int channel, long *val)
+{
+	struct gsc_hwmon_data *hwmon = dev_get_drvdata(dev);
+	const struct gsc_hwmon_channel *ch;
+	int sz, ret;
+	long tmp;
+	u8 buf[3];
+
+	switch (type) {
+	case hwmon_in:
+		ch = hwmon->in_ch[channel];
+		break;
+	case hwmon_temp:
+		ch = hwmon->temp_ch[channel];
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	sz = (ch->mode == mode_voltage) ? 3 : 2;
+	ret = regmap_bulk_read(hwmon->regmap, ch->reg, buf, sz);
+	if (ret)
+		return ret;
+
+	tmp = 0;
+	while (sz-- > 0)
+		tmp |= (buf[sz] << (8 * sz));
+
+	switch (ch->mode) {
+	case mode_temperature:
+		if (tmp > 0x8000)
+			tmp -= 0xffff;
+		break;
+	case mode_voltage_raw:
+		tmp = clamp_val(tmp, 0, BIT(GSC_HWMON_RESOLUTION));
+		/* scale based on ref voltage and ADC resolution */
+		tmp *= GSC_HWMON_VREF;
+		tmp >>= GSC_HWMON_RESOLUTION;
+		/* scale based on optional voltage divider */
+		if (ch->vdiv[0] && ch->vdiv[1]) {
+			tmp *= (ch->vdiv[0] + ch->vdiv[1]);
+			tmp /= ch->vdiv[1];
+		}
+		/* adjust by uV offset */
+		tmp += ch->mvoffset;
+		break;
+	case mode_voltage:
+		/* no adjustment needed */
+		break;
+	}
+
+	*val = tmp;
+
+	return 0;
+}
+
+static int
+gsc_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
+		      u32 attr, int channel, const char **buf)
+{
+	struct gsc_hwmon_data *hwmon = dev_get_drvdata(dev);
+
+	switch (type) {
+	case hwmon_in:
+		*buf = hwmon->in_ch[channel]->name;
+		break;
+	case hwmon_temp:
+		*buf = hwmon->temp_ch[channel]->name;
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
+static umode_t
+gsc_hwmon_is_visible(const void *_data, enum hwmon_sensor_types type, u32 attr,
+		     int ch)
+{
+	return 0444;
+}
+
+static const struct hwmon_ops gsc_hwmon_ops = {
+	.is_visible = gsc_hwmon_is_visible,
+	.read = gsc_hwmon_read,
+	.read_string = gsc_hwmon_read_string,
+};
+
+static struct gsc_hwmon_platform_data *
+gsc_hwmon_get_devtree_pdata(struct device *dev)
+{
+	struct gsc_hwmon_platform_data *pdata;
+	struct gsc_hwmon_channel *ch;
+	struct fwnode_handle *child;
+	struct device_node *fan;
+	int nchannels;
+
+	nchannels = device_get_child_node_count(dev);
+	if (nchannels == 0)
+		return ERR_PTR(-ENODEV);
+
+	pdata = devm_kzalloc(dev,
+			     sizeof(*pdata) + nchannels * sizeof(*ch),
+			     GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+	ch = (struct gsc_hwmon_channel *)(pdata + 1);
+	pdata->channels = ch;
+	pdata->nchannels = nchannels;
+
+	/* fan controller base address */
+	fan = of_find_compatible_node(dev->parent->of_node, NULL, "gw,gsc-fan");
+	if (fan && of_property_read_u32(fan, "reg", &pdata->fan_base)) {
+		dev_err(dev, "fan node without base\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* allocate structures for channels and count instances of each type */
+	device_for_each_child_node(dev, child) {
+		if (fwnode_property_read_string(child, "label", &ch->name)) {
+			dev_err(dev, "channel without label\n");
+			fwnode_handle_put(child);
+			return ERR_PTR(-EINVAL);
+		}
+		if (fwnode_property_read_u32(child, "reg", &ch->reg)) {
+			dev_err(dev, "channel without reg\n");
+			fwnode_handle_put(child);
+			return ERR_PTR(-EINVAL);
+		}
+		if (fwnode_property_read_u32(child, "gw,mode", &ch->mode)) {
+			dev_err(dev, "channel without mode\n");
+			fwnode_handle_put(child);
+			return ERR_PTR(-EINVAL);
+		}
+		if (ch->mode > mode_max) {
+			dev_err(dev, "invalid channel mode\n");
+			fwnode_handle_put(child);
+			return ERR_PTR(-EINVAL);
+		}
+
+		if (!fwnode_property_read_u32(child,
+					      "gw,voltage-offset-microvolt",
+					      &ch->mvoffset))
+			ch->mvoffset /= 1000;
+		fwnode_property_read_u32_array(child,
+					       "gw,voltage-divider-ohms",
+					       ch->vdiv, ARRAY_SIZE(ch->vdiv));
+		ch++;
+	}
+
+	return pdata;
+}
+
+static int gsc_hwmon_probe(struct platform_device *pdev)
+{
+	struct gsc_dev *gsc = dev_get_drvdata(pdev->dev.parent);
+	struct device *dev = &pdev->dev;
+	struct device *hwmon_dev;
+	struct gsc_hwmon_platform_data *pdata = dev_get_platdata(dev);
+	struct gsc_hwmon_data *hwmon;
+	const struct attribute_group **groups;
+	int i, i_in, i_temp;
+
+	if (!pdata) {
+		pdata = gsc_hwmon_get_devtree_pdata(dev);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+	}
+
+	hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+	if (!hwmon)
+		return -ENOMEM;
+	hwmon->gsc = gsc;
+	hwmon->pdata = pdata;
+
+	hwmon->regmap = devm_regmap_init(dev, &gsc_hwmon_regmap_bus,
+					 gsc->i2c_hwmon,
+					 &gsc_hwmon_regmap_config);
+	if (IS_ERR(hwmon->regmap))
+		return PTR_ERR(hwmon->regmap);
+
+	for (i = 0, i_in = 0, i_temp = 0; i < hwmon->pdata->nchannels; i++) {
+		const struct gsc_hwmon_channel *ch = &pdata->channels[i];
+
+		switch (ch->mode) {
+		case mode_temperature:
+			if (i_temp == GSC_HWMON_MAX_TEMP_CH) {
+				dev_err(gsc->dev, "too many temp channels\n");
+				return -EINVAL;
+			}
+			hwmon->temp_ch[i_temp] = ch;
+			hwmon->temp_config[i_temp] = HWMON_T_INPUT |
+						     HWMON_T_LABEL;
+			i_temp++;
+			break;
+		case mode_voltage:
+		case mode_voltage_raw:
+			if (i_in == GSC_HWMON_MAX_IN_CH) {
+				dev_err(gsc->dev, "too many input channels\n");
+				return -EINVAL;
+			}
+			hwmon->in_ch[i_in] = ch;
+			hwmon->in_config[i_in] =
+				HWMON_I_INPUT | HWMON_I_LABEL;
+			i_in++;
+			break;
+		default:
+			dev_err(gsc->dev, "invalid mode: %d\n", ch->mode);
+			return -EINVAL;
+		}
+	}
+
+	/* setup config structures */
+	hwmon->chip.ops = &gsc_hwmon_ops;
+	hwmon->chip.info = hwmon->info;
+	hwmon->info[0] = &hwmon->temp_info;
+	hwmon->info[1] = &hwmon->in_info;
+	hwmon->temp_info.type = hwmon_temp;
+	hwmon->temp_info.config = hwmon->temp_config;
+	hwmon->in_info.type = hwmon_in;
+	hwmon->in_info.config = hwmon->in_config;
+
+	groups = pdata->fan_base ? gsc_hwmon_groups : NULL;
+	hwmon_dev = devm_hwmon_device_register_with_info(dev,
+							 KBUILD_MODNAME, hwmon,
+							 &hwmon->chip, groups);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static const struct of_device_id gsc_hwmon_of_match[] = {
+	{ .compatible = "gw,gsc-adc", },
+	{}
+};
+
+static struct platform_driver gsc_hwmon_driver = {
+	.driver = {
+		.name = "gsc-hwmon",
+		.of_match_table = gsc_hwmon_of_match,
+	},
+	.probe = gsc_hwmon_probe,
+};
+
+module_platform_driver(gsc_hwmon_driver);
+
+MODULE_AUTHOR("Tim Harvey <tharvey@gateworks.com>");
+MODULE_DESCRIPTION("GSC hardware monitor driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 6a30fb453f7a..3f596a5328da 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -15,6 +15,7 @@
 #include <linux/gfp.h>
 #include <linux/hwmon.h>
 #include <linux/idr.h>
+#include <linux/list.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
@@ -31,7 +32,7 @@ struct hwmon_device {
 	const char *name;
 	struct device dev;
 	const struct hwmon_chip_info *chip;
-
+	struct list_head tzdata;
 	struct attribute_group group;
 	const struct attribute_group **groups;
 };
@@ -55,12 +56,12 @@ struct hwmon_device_attribute {
 
 /*
  * Thermal zone information
- * In addition to the reference to the hwmon device,
- * also provides the sensor index.
  */
 struct hwmon_thermal_data {
+	struct list_head node;		/* hwmon tzdata list entry */
 	struct device *dev;		/* Reference to hwmon device */
 	int index;			/* sensor index */
+	struct thermal_zone_device *tzd;/* thermal zone device */
 };
 
 static ssize_t
@@ -156,10 +157,17 @@ static const struct thermal_zone_of_device_ops hwmon_thermal_ops = {
 	.get_temp = hwmon_thermal_get_temp,
 };
 
+static void hwmon_thermal_remove_sensor(void *data)
+{
+	list_del(data);
+}
+
 static int hwmon_thermal_add_sensor(struct device *dev, int index)
 {
+	struct hwmon_device *hwdev = to_hwmon_device(dev);
 	struct hwmon_thermal_data *tdata;
 	struct thermal_zone_device *tzd;
+	int err;
 
 	tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
 	if (!tdata)
@@ -177,13 +185,68 @@ static int hwmon_thermal_add_sensor(struct device *dev, int index)
 	if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
 		return PTR_ERR(tzd);
 
+	err = devm_add_action(dev, hwmon_thermal_remove_sensor, &tdata->node);
+	if (err)
+		return err;
+
+	tdata->tzd = tzd;
+	list_add(&tdata->node, &hwdev->tzdata);
+
 	return 0;
 }
+
+static int hwmon_thermal_register_sensors(struct device *dev)
+{
+	struct hwmon_device *hwdev = to_hwmon_device(dev);
+	const struct hwmon_chip_info *chip = hwdev->chip;
+	const struct hwmon_channel_info **info = chip->info;
+	void *drvdata = dev_get_drvdata(dev);
+	int i;
+
+	for (i = 1; info[i]; i++) {
+		int j;
+
+		if (info[i]->type != hwmon_temp)
+			continue;
+
+		for (j = 0; info[i]->config[j]; j++) {
+			int err;
+
+			if (!(info[i]->config[j] & HWMON_T_INPUT) ||
+			    !chip->ops->is_visible(drvdata, hwmon_temp,
+						   hwmon_temp_input, j))
+				continue;
+
+			err = hwmon_thermal_add_sensor(dev, j);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static void hwmon_thermal_notify(struct device *dev, int index)
+{
+	struct hwmon_device *hwdev = to_hwmon_device(dev);
+	struct hwmon_thermal_data *tzdata;
+
+	list_for_each_entry(tzdata, &hwdev->tzdata, node) {
+		if (tzdata->index == index) {
+			thermal_zone_device_update(tzdata->tzd,
+						   THERMAL_EVENT_UNSPECIFIED);
+		}
+	}
+}
+
 #else
-static int hwmon_thermal_add_sensor(struct device *dev, int index)
+static int hwmon_thermal_register_sensors(struct device *dev)
 {
 	return 0;
 }
+
+static void hwmon_thermal_notify(struct device *dev, int index) { }
+
 #endif /* IS_REACHABLE(CONFIG_THERMAL) && ... */
 
 static int hwmon_attr_base(enum hwmon_sensor_types type)
@@ -511,6 +574,35 @@ static const int __templates_size[] = {
 	[hwmon_intrusion] = ARRAY_SIZE(hwmon_intrusion_attr_templates),
 };
 
+int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type,
+		       u32 attr, int channel)
+{
+	char sattr[MAX_SYSFS_ATTR_NAME_LENGTH];
+	const char * const *templates;
+	const char *template;
+	int base;
+
+	if (type >= ARRAY_SIZE(__templates))
+		return -EINVAL;
+	if (attr >= __templates_size[type])
+		return -EINVAL;
+
+	templates = __templates[type];
+	template = templates[attr];
+
+	base = hwmon_attr_base(type);
+
+	scnprintf(sattr, MAX_SYSFS_ATTR_NAME_LENGTH, template, base + channel);
+	sysfs_notify(&dev->kobj, NULL, sattr);
+	kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+
+	if (type == hwmon_temp)
+		hwmon_thermal_notify(dev, channel);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hwmon_notify_event);
+
 static int hwmon_num_channel_attrs(const struct hwmon_channel_info *info)
 {
 	int i, n;
@@ -596,7 +688,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 {
 	struct hwmon_device *hwdev;
 	struct device *hdev;
-	int i, j, err, id;
+	int i, err, id;
 
 	/* Complain about invalid characters in hwmon name attribute */
 	if (name && (!strlen(name) || strpbrk(name, "-* \t\n")))
@@ -661,33 +753,19 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 	if (err)
 		goto free_hwmon;
 
+	INIT_LIST_HEAD(&hwdev->tzdata);
+
 	if (dev && dev->of_node && chip && chip->ops->read &&
 	    chip->info[0]->type == hwmon_chip &&
 	    (chip->info[0]->config[0] & HWMON_C_REGISTER_TZ)) {
-		const struct hwmon_channel_info **info = chip->info;
-
-		for (i = 1; info[i]; i++) {
-			if (info[i]->type != hwmon_temp)
-				continue;
-
-			for (j = 0; info[i]->config[j]; j++) {
-				if (!chip->ops->is_visible(drvdata, hwmon_temp,
-							   hwmon_temp_input, j))
-					continue;
-				if (info[i]->config[j] & HWMON_T_INPUT) {
-					err = hwmon_thermal_add_sensor(hdev, j);
-					if (err) {
-						device_unregister(hdev);
-						/*
-						 * Don't worry about hwdev;
-						 * hwmon_dev_release(), called
-						 * from device_unregister(),
-						 * will free it.
-						 */
-						goto ida_remove;
-					}
-				}
-			}
+		err = hwmon_thermal_register_sensors(hdev);
+		if (err) {
+			device_unregister(hdev);
+			/*
+			 * Don't worry about hwdev; hwmon_dev_release(), called
+			 * from device_unregister(), will free it.
+			 */
+			goto ida_remove;
 		}
 	}
 
diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c
index e9e78c0b7212..55d474ec7c35 100644
--- a/drivers/hwmon/ina2xx.c
+++ b/drivers/hwmon/ina2xx.c
@@ -74,6 +74,17 @@
 #define INA226_READ_AVG(reg)		(((reg) & INA226_AVG_RD_MASK) >> 9)
 #define INA226_SHIFT_AVG(val)		((val) << 9)
 
+/* bit number of alert functions in Mask/Enable Register */
+#define INA226_SHUNT_OVER_VOLTAGE_BIT	15
+#define INA226_SHUNT_UNDER_VOLTAGE_BIT	14
+#define INA226_BUS_OVER_VOLTAGE_BIT	13
+#define INA226_BUS_UNDER_VOLTAGE_BIT	12
+#define INA226_POWER_OVER_LIMIT_BIT	11
+
+/* bit mask for alert config bits of Mask/Enable Register */
+#define INA226_ALERT_CONFIG_MASK	0xFC00
+#define INA226_ALERT_FUNCTION_FLAG	BIT(4)
+
 /* common attrs, ina226 attrs and NULL */
 #define INA2XX_MAX_ATTRIBUTE_GROUPS	3
 
@@ -303,6 +314,145 @@ static ssize_t ina2xx_value_show(struct device *dev,
 			ina2xx_get_value(data, attr->index, regval));
 }
 
+static int ina226_reg_to_alert(struct ina2xx_data *data, u8 bit, u16 regval)
+{
+	int reg;
+
+	switch (bit) {
+	case INA226_SHUNT_OVER_VOLTAGE_BIT:
+	case INA226_SHUNT_UNDER_VOLTAGE_BIT:
+		reg = INA2XX_SHUNT_VOLTAGE;
+		break;
+	case INA226_BUS_OVER_VOLTAGE_BIT:
+	case INA226_BUS_UNDER_VOLTAGE_BIT:
+		reg = INA2XX_BUS_VOLTAGE;
+		break;
+	case INA226_POWER_OVER_LIMIT_BIT:
+		reg = INA2XX_POWER;
+		break;
+	default:
+		/* programmer goofed */
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+
+	return ina2xx_get_value(data, reg, regval);
+}
+
+/*
+ * Turns alert limit values into register values.
+ * Opposite of the formula in ina2xx_get_value().
+ */
+static s16 ina226_alert_to_reg(struct ina2xx_data *data, u8 bit, int val)
+{
+	switch (bit) {
+	case INA226_SHUNT_OVER_VOLTAGE_BIT:
+	case INA226_SHUNT_UNDER_VOLTAGE_BIT:
+		val *= data->config->shunt_div;
+		return clamp_val(val, SHRT_MIN, SHRT_MAX);
+	case INA226_BUS_OVER_VOLTAGE_BIT:
+	case INA226_BUS_UNDER_VOLTAGE_BIT:
+		val = (val * 1000) << data->config->bus_voltage_shift;
+		val = DIV_ROUND_CLOSEST(val, data->config->bus_voltage_lsb);
+		return clamp_val(val, 0, SHRT_MAX);
+	case INA226_POWER_OVER_LIMIT_BIT:
+		val = DIV_ROUND_CLOSEST(val, data->power_lsb_uW);
+		return clamp_val(val, 0, USHRT_MAX);
+	default:
+		/* programmer goofed */
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+
+static ssize_t ina226_alert_show(struct device *dev,
+				 struct device_attribute *da, char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+	struct ina2xx_data *data = dev_get_drvdata(dev);
+	int regval;
+	int val = 0;
+	int ret;
+
+	mutex_lock(&data->config_lock);
+	ret = regmap_read(data->regmap, INA226_MASK_ENABLE, &regval);
+	if (ret)
+		goto abort;
+
+	if (regval & BIT(attr->index)) {
+		ret = regmap_read(data->regmap, INA226_ALERT_LIMIT, &regval);
+		if (ret)
+			goto abort;
+		val = ina226_reg_to_alert(data, attr->index, regval);
+	}
+
+	ret = snprintf(buf, PAGE_SIZE, "%d\n", val);
+abort:
+	mutex_unlock(&data->config_lock);
+	return ret;
+}
+
+static ssize_t ina226_alert_store(struct device *dev,
+				  struct device_attribute *da,
+				  const char *buf, size_t count)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+	struct ina2xx_data *data = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Clear all alerts first to avoid accidentally triggering ALERT pin
+	 * due to register write sequence. Then, only enable the alert
+	 * if the value is non-zero.
+	 */
+	mutex_lock(&data->config_lock);
+	ret = regmap_update_bits(data->regmap, INA226_MASK_ENABLE,
+				 INA226_ALERT_CONFIG_MASK, 0);
+	if (ret < 0)
+		goto abort;
+
+	ret = regmap_write(data->regmap, INA226_ALERT_LIMIT,
+			   ina226_alert_to_reg(data, attr->index, val));
+	if (ret < 0)
+		goto abort;
+
+	if (val != 0) {
+		ret = regmap_update_bits(data->regmap, INA226_MASK_ENABLE,
+					 INA226_ALERT_CONFIG_MASK,
+					 BIT(attr->index));
+		if (ret < 0)
+			goto abort;
+	}
+
+	ret = count;
+abort:
+	mutex_unlock(&data->config_lock);
+	return ret;
+}
+
+static ssize_t ina226_alarm_show(struct device *dev,
+				 struct device_attribute *da, char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+	struct ina2xx_data *data = dev_get_drvdata(dev);
+	int regval;
+	int alarm = 0;
+	int ret;
+
+	ret = regmap_read(data->regmap, INA226_MASK_ENABLE, &regval);
+	if (ret)
+		return ret;
+
+	alarm = (regval & BIT(attr->index)) &&
+		(regval & INA226_ALERT_FUNCTION_FLAG);
+	return snprintf(buf, PAGE_SIZE, "%d\n", alarm);
+}
+
 /*
  * In order to keep calibration register value fixed, the product
  * of current_lsb and shunt_resistor should also be fixed and equal
@@ -392,15 +542,38 @@ static ssize_t ina226_interval_show(struct device *dev,
 
 /* shunt voltage */
 static SENSOR_DEVICE_ATTR_RO(in0_input, ina2xx_value, INA2XX_SHUNT_VOLTAGE);
+/* shunt voltage over/under voltage alert setting and alarm */
+static SENSOR_DEVICE_ATTR_RW(in0_crit, ina226_alert,
+			     INA226_SHUNT_OVER_VOLTAGE_BIT);
+static SENSOR_DEVICE_ATTR_RW(in0_lcrit, ina226_alert,
+			     INA226_SHUNT_UNDER_VOLTAGE_BIT);
+static SENSOR_DEVICE_ATTR_RO(in0_crit_alarm, ina226_alarm,
+			     INA226_SHUNT_OVER_VOLTAGE_BIT);
+static SENSOR_DEVICE_ATTR_RO(in0_lcrit_alarm, ina226_alarm,
+			     INA226_SHUNT_UNDER_VOLTAGE_BIT);
 
 /* bus voltage */
 static SENSOR_DEVICE_ATTR_RO(in1_input, ina2xx_value, INA2XX_BUS_VOLTAGE);
+/* bus voltage over/under voltage alert setting and alarm */
+static SENSOR_DEVICE_ATTR_RW(in1_crit, ina226_alert,
+			     INA226_BUS_OVER_VOLTAGE_BIT);
+static SENSOR_DEVICE_ATTR_RW(in1_lcrit, ina226_alert,
+			     INA226_BUS_UNDER_VOLTAGE_BIT);
+static SENSOR_DEVICE_ATTR_RO(in1_crit_alarm, ina226_alarm,
+			     INA226_BUS_OVER_VOLTAGE_BIT);
+static SENSOR_DEVICE_ATTR_RO(in1_lcrit_alarm, ina226_alarm,
+			     INA226_BUS_UNDER_VOLTAGE_BIT);
 
 /* calculated current */
 static SENSOR_DEVICE_ATTR_RO(curr1_input, ina2xx_value, INA2XX_CURRENT);
 
 /* calculated power */
 static SENSOR_DEVICE_ATTR_RO(power1_input, ina2xx_value, INA2XX_POWER);
+/* over-limit power alert setting and alarm */
+static SENSOR_DEVICE_ATTR_RW(power1_crit, ina226_alert,
+			     INA226_POWER_OVER_LIMIT_BIT);
+static SENSOR_DEVICE_ATTR_RO(power1_crit_alarm, ina226_alarm,
+			     INA226_POWER_OVER_LIMIT_BIT);
 
 /* shunt resistance */
 static SENSOR_DEVICE_ATTR_RW(shunt_resistor, ina2xx_shunt, INA2XX_CALIBRATION);
@@ -423,6 +596,16 @@ static const struct attribute_group ina2xx_group = {
 };
 
 static struct attribute *ina226_attrs[] = {
+	&sensor_dev_attr_in0_crit.dev_attr.attr,
+	&sensor_dev_attr_in0_lcrit.dev_attr.attr,
+	&sensor_dev_attr_in0_crit_alarm.dev_attr.attr,
+	&sensor_dev_attr_in0_lcrit_alarm.dev_attr.attr,
+	&sensor_dev_attr_in1_crit.dev_attr.attr,
+	&sensor_dev_attr_in1_lcrit.dev_attr.attr,
+	&sensor_dev_attr_in1_crit_alarm.dev_attr.attr,
+	&sensor_dev_attr_in1_lcrit_alarm.dev_attr.attr,
+	&sensor_dev_attr_power1_crit.dev_attr.attr,
+	&sensor_dev_attr_power1_crit_alarm.dev_attr.attr,
 	&sensor_dev_attr_update_interval.dev_attr.attr,
 	NULL,
 };
diff --git a/drivers/hwmon/lm70.c b/drivers/hwmon/lm70.c
index 4122e59f0bb4..ae2b84263a44 100644
--- a/drivers/hwmon/lm70.c
+++ b/drivers/hwmon/lm70.c
@@ -25,7 +25,7 @@
 #include <linux/spi/spi.h>
 #include <linux/slab.h>
 #include <linux/of_device.h>
-
+#include <linux/acpi.h>
 
 #define DRVNAME		"lm70"
 
@@ -148,18 +148,50 @@ static const struct of_device_id lm70_of_ids[] = {
 MODULE_DEVICE_TABLE(of, lm70_of_ids);
 #endif
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id lm70_acpi_ids[] = {
+	{
+		.id = "LM000070",
+		.driver_data = LM70_CHIP_LM70,
+	},
+	{
+		.id = "TMP00121",
+		.driver_data = LM70_CHIP_TMP121,
+	},
+	{
+		.id = "LM000071",
+		.driver_data = LM70_CHIP_LM71,
+	},
+	{
+		.id = "LM000074",
+		.driver_data = LM70_CHIP_LM74,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, lm70_acpi_ids);
+#endif
+
 static int lm70_probe(struct spi_device *spi)
 {
-	const struct of_device_id *match;
+	const struct of_device_id *of_match;
 	struct device *hwmon_dev;
 	struct lm70 *p_lm70;
 	int chip;
 
-	match = of_match_device(lm70_of_ids, &spi->dev);
-	if (match)
-		chip = (int)(uintptr_t)match->data;
-	else
-		chip = spi_get_device_id(spi)->driver_data;
+	of_match = of_match_device(lm70_of_ids, &spi->dev);
+	if (of_match)
+		chip = (int)(uintptr_t)of_match->data;
+	else {
+#ifdef CONFIG_ACPI
+		const struct acpi_device_id *acpi_match;
+
+		acpi_match = acpi_match_device(lm70_acpi_ids, &spi->dev);
+		if (acpi_match)
+			chip = (int)(uintptr_t)acpi_match->driver_data;
+		else
+#endif
+			chip = spi_get_device_id(spi)->driver_data;
+	}
 
 	/* signaling is SPI_MODE_0 */
 	if (spi->mode & (SPI_CPOL | SPI_CPHA))
@@ -195,6 +227,7 @@ static struct spi_driver lm70_driver = {
 	.driver = {
 		.name	= "lm70",
 		.of_match_table	= of_match_ptr(lm70_of_ids),
+		.acpi_match_table = ACPI_PTR(lm70_acpi_ids),
 	},
 	.id_table = lm70_ids,
 	.probe	= lm70_probe,
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index 5e6392294c03..ba0be48aeadd 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -797,8 +797,10 @@ static int lm75_detect(struct i2c_client *new_client,
 
 	/* First check for LM75A */
 	if (i2c_smbus_read_byte_data(new_client, 7) == LM75A_ID) {
-		/* LM75A returns 0xff on unused registers so
-		   just to be sure we check for that too. */
+		/*
+		 * LM75A returns 0xff on unused registers so
+		 * just to be sure we check for that too.
+		 */
 		if (i2c_smbus_read_byte_data(new_client, 4) != 0xff
 		 || i2c_smbus_read_byte_data(new_client, 5) != 0xff
 		 || i2c_smbus_read_byte_data(new_client, 6) != 0xff)
@@ -849,6 +851,7 @@ static int lm75_suspend(struct device *dev)
 {
 	int status;
 	struct i2c_client *client = to_i2c_client(dev);
+
 	status = i2c_smbus_read_byte_data(client, LM75_REG_CONF);
 	if (status < 0) {
 		dev_dbg(&client->dev, "Can't read config? %d\n", status);
@@ -863,6 +866,7 @@ static int lm75_resume(struct device *dev)
 {
 	int status;
 	struct i2c_client *client = to_i2c_client(dev);
+
 	status = i2c_smbus_read_byte_data(client, LM75_REG_CONF);
 	if (status < 0) {
 		dev_dbg(&client->dev, "Can't read config? %d\n", status);
diff --git a/drivers/hwmon/lm75.h b/drivers/hwmon/lm75.h
index b614e6328566..a398171162a8 100644
--- a/drivers/hwmon/lm75.h
+++ b/drivers/hwmon/lm75.h
@@ -1,17 +1,15 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
-    lm75.h - Part of lm_sensors, Linux kernel modules for hardware
-	      monitoring
-    Copyright (c) 2003 Mark M. Hoffman <mhoffman@lightlink.com>
-
-*/
+ * lm75.h - Part of lm_sensors, Linux kernel modules for hardware monitoring
+ * Copyright (c) 2003 Mark M. Hoffman <mhoffman@lightlink.com>
+ */
 
 /*
-    This file contains common code for encoding/decoding LM75 type
-    temperature readings, which are emulated by many of the chips
-    we support.  As the user is unlikely to load more than one driver
-    which contains this code, we don't worry about the wasted space.
-*/
+ * This file contains common code for encoding/decoding LM75 type
+ * temperature readings, which are emulated by many of the chips
+ * we support.  As the user is unlikely to load more than one driver
+ * which contains this code, we don't worry about the wasted space.
+ */
 
 #include <linux/kernel.h>
 
@@ -20,18 +18,23 @@
 #define LM75_TEMP_MAX 125000
 #define LM75_SHUTDOWN 0x01
 
-/* TEMP: 0.001C/bit (-55C to +125C)
-   REG: (0.5C/bit, two's complement) << 7 */
+/*
+ * TEMP: 0.001C/bit (-55C to +125C)
+ * REG: (0.5C/bit, two's complement) << 7
+ */
 static inline u16 LM75_TEMP_TO_REG(long temp)
 {
 	int ntemp = clamp_val(temp, LM75_TEMP_MIN, LM75_TEMP_MAX);
+
 	ntemp += (ntemp < 0 ? -250 : 250);
 	return (u16)((ntemp / 500) << 7);
 }
 
 static inline int LM75_TEMP_FROM_REG(u16 reg)
 {
-	/* use integer division instead of equivalent right shift to
-	   guarantee arithmetic shift and preserve the sign */
+	/*
+	 * use integer division instead of equivalent right shift to
+	 * guarantee arithmetic shift and preserve the sign
+	 */
 	return ((s16)reg / 128) * 500;
 }
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 9b3c9f390ef8..7bdc664af55b 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -35,6 +35,14 @@
  * explicitly as max6659, or if its address is not 0x4c.
  * These chips lack the remote temperature offset feature.
  *
+ * This driver also supports the MAX6654 chip made by Maxim. This chip can
+ * be at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is
+ * otherwise similar to MAX6657/MAX6658/MAX6659. Extended range is available
+ * by setting the configuration register accordingly, and is done during
+ * initialization. Extended precision is only available at conversion rates
+ * of 1 Hz and slower. Note that extended precision is not enabled by
+ * default, as this driver initializes all chips to 2 Hz by design.
+ *
  * This driver also supports the MAX6646, MAX6647, MAX6648, MAX6649 and
  * MAX6692 chips made by Maxim.  These are again similar to the LM86,
  * but they use unsigned temperature values and can report temperatures
@@ -94,8 +102,8 @@
  * have address 0x4d.
  * MAX6647 has address 0x4e.
  * MAX6659 can have address 0x4c, 0x4d or 0x4e.
- * MAX6680 and MAX6681 can have address 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b,
- * 0x4c, 0x4d or 0x4e.
+ * MAX6654, MAX6680, and MAX6681 can have address 0x18, 0x19, 0x1a, 0x29,
+ * 0x2a, 0x2b, 0x4c, 0x4d or 0x4e.
  * SA56004 can have address 0x48 through 0x4F.
  */
 
@@ -104,7 +112,7 @@ static const unsigned short normal_i2c[] = {
 	0x4d, 0x4e, 0x4f, I2C_CLIENT_END };
 
 enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680,
-	max6646, w83l771, max6696, sa56004, g781, tmp451 };
+	max6646, w83l771, max6696, sa56004, g781, tmp451, max6654 };
 
 /*
  * The LM90 registers
@@ -145,7 +153,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680,
 #define LM90_REG_R_TCRIT_HYST		0x21
 #define LM90_REG_W_TCRIT_HYST		0x21
 
-/* MAX6646/6647/6649/6657/6658/6659/6695/6696 registers */
+/* MAX6646/6647/6649/6654/6657/6658/6659/6695/6696 registers */
 
 #define MAX6657_REG_R_LOCAL_TEMPL	0x11
 #define MAX6696_REG_R_STATUS2		0x12
@@ -209,6 +217,7 @@ static const struct i2c_device_id lm90_id[] = {
 	{ "max6646", max6646 },
 	{ "max6647", max6646 },
 	{ "max6649", max6646 },
+	{ "max6654", max6654 },
 	{ "max6657", max6657 },
 	{ "max6658", max6657 },
 	{ "max6659", max6659 },
@@ -270,6 +279,10 @@ static const struct of_device_id __maybe_unused lm90_of_match[] = {
 		.data = (void *)max6646
 	},
 	{
+		.compatible = "dallas,max6654",
+		.data = (void *)max6654
+	},
+	{
 		.compatible = "dallas,max6657",
 		.data = (void *)max6657
 	},
@@ -367,6 +380,11 @@ static const struct lm90_params lm90_params[] = {
 		.max_convrate = 6,
 		.reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
 	},
+	[max6654] = {
+		.alert_alarms = 0x7c,
+		.max_convrate = 7,
+		.reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
+	},
 	[max6657] = {
 		.flags = LM90_PAUSE_FOR_CONFIG,
 		.alert_alarms = 0x7c,
@@ -1557,6 +1575,16 @@ static int lm90_detect(struct i2c_client *client,
 		 && (config1 & 0x3f) == 0x00
 		 && convrate <= 0x07) {
 			name = "max6646";
+		} else
+		/*
+		 * The chip_id of the MAX6654 holds the revision of the chip.
+		 * The lowest 3 bits of the config1 register are unused and
+		 * should return zero when read.
+		 */
+		if (chip_id == 0x08
+		 && (config1 & 0x07) == 0x00
+		 && convrate <= 0x07) {
+			name = "max6654";
 		}
 	} else
 	if (address == 0x4C
@@ -1661,6 +1689,15 @@ static int lm90_init_client(struct i2c_client *client, struct lm90_data *data)
 		config |= 0x18;
 
 	/*
+	 * Put MAX6654 into extended range (0x20, extend minimum range from
+	 * 0 degrees to -64 degrees). Note that extended resolution is not
+	 * possible on the MAX6654 unless conversion rate is set to 1 Hz or
+	 * slower, which is intentionally not done by default.
+	 */
+	if (data->kind == max6654)
+		config |= 0x20;
+
+	/*
 	 * Select external channel 0 for max6695/96
 	 */
 	if (data->kind == max6696)
diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index 7efa6bfef060..e7e1ddc1d631 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -2047,7 +2047,7 @@ store_temp_beep(struct device *dev, struct device_attribute *attr,
 static umode_t nct6775_in_is_visible(struct kobject *kobj,
 				     struct attribute *attr, int index)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct nct6775_data *data = dev_get_drvdata(dev);
 	int in = index / 5;	/* voltage index */
 
@@ -2253,7 +2253,7 @@ store_fan_pulses(struct device *dev, struct device_attribute *attr,
 static umode_t nct6775_fan_is_visible(struct kobject *kobj,
 				      struct attribute *attr, int index)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct nct6775_data *data = dev_get_drvdata(dev);
 	int fan = index / 6;	/* fan index */
 	int nr = index % 6;	/* attribute index */
@@ -2440,7 +2440,7 @@ store_temp_type(struct device *dev, struct device_attribute *attr,
 static umode_t nct6775_temp_is_visible(struct kobject *kobj,
 				       struct attribute *attr, int index)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct nct6775_data *data = dev_get_drvdata(dev);
 	int temp = index / 10;	/* temp index */
 	int nr = index % 10;	/* attribute index */
@@ -3257,7 +3257,7 @@ store_auto_temp(struct device *dev, struct device_attribute *attr,
 static umode_t nct6775_pwm_is_visible(struct kobject *kobj,
 				      struct attribute *attr, int index)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct nct6775_data *data = dev_get_drvdata(dev);
 	int pwm = index / 36;	/* pwm index */
 	int nr = index % 36;	/* attribute index */
@@ -3459,7 +3459,7 @@ static SENSOR_DEVICE_ATTR(beep_enable, S_IWUSR | S_IRUGO, show_beep,
 static umode_t nct6775_other_is_visible(struct kobject *kobj,
 					struct attribute *attr, int index)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct nct6775_data *data = dev_get_drvdata(dev);
 
 	if (index == 0 && !data->have_vid)
diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c
index 2e97e56c72c7..570df8eb5272 100644
--- a/drivers/hwmon/nct7802.c
+++ b/drivers/hwmon/nct7802.c
@@ -679,7 +679,7 @@ static struct attribute *nct7802_temp_attrs[] = {
 static umode_t nct7802_temp_is_visible(struct kobject *kobj,
 				       struct attribute *attr, int index)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct nct7802_data *data = dev_get_drvdata(dev);
 	unsigned int reg;
 	int err;
@@ -778,7 +778,7 @@ static struct attribute *nct7802_in_attrs[] = {
 static umode_t nct7802_in_is_visible(struct kobject *kobj,
 				     struct attribute *attr, int index)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct nct7802_data *data = dev_get_drvdata(dev);
 	unsigned int reg;
 	int err;
@@ -853,7 +853,7 @@ static struct attribute *nct7802_fan_attrs[] = {
 static umode_t nct7802_fan_is_visible(struct kobject *kobj,
 				      struct attribute *attr, int index)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct nct7802_data *data = dev_get_drvdata(dev);
 	int fan = index / 4;	/* 4 attributes per fan */
 	unsigned int reg;
diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c
index a7eb10d2a053..b0425694f702 100644
--- a/drivers/hwmon/nct7904.c
+++ b/drivers/hwmon/nct7904.c
@@ -8,6 +8,9 @@
  * Copyright (c) 2019 Advantech
  * Author: Amy.Shih <amy.shih@advantech.com.tw>
  *
+ * Copyright (c) 2020 Advantech
+ * Author: Yuechao Zhao <yuechao.zhao@advantech.com.cn>
+ *
  * Supports the following chips:
  *
  * Chip        #vin  #fan  #pwm  #temp  #dts  chip ID
@@ -20,6 +23,7 @@
 #include <linux/i2c.h>
 #include <linux/mutex.h>
 #include <linux/hwmon.h>
+#include <linux/watchdog.h>
 
 #define VENDOR_ID_REG		0x7A	/* Any bank */
 #define NUVOTON_ID		0x50
@@ -88,18 +92,42 @@
 #define FANCTL1_FMR_REG		0x00	/* Bank 3; 1 reg per channel */
 #define FANCTL1_OUT_REG		0x10	/* Bank 3; 1 reg per channel */
 
+#define WDT_LOCK_REG		0xE0	/* W/O Lock Watchdog Register */
+#define WDT_EN_REG		0xE1	/* R/O Watchdog Enable Register */
+#define WDT_STS_REG		0xE2	/* R/O Watchdog Status Register */
+#define WDT_TIMER_REG		0xE3	/* R/W Watchdog Timer Register */
+#define WDT_SOFT_EN		0x55	/* Enable soft watchdog timer */
+#define WDT_SOFT_DIS		0xAA	/* Disable soft watchdog timer */
+
 #define VOLT_MONITOR_MODE	0x0
 #define THERMAL_DIODE_MODE	0x1
 #define THERMISTOR_MODE		0x3
 
 #define ENABLE_TSI	BIT(1)
 
+#define WATCHDOG_TIMEOUT	1	/* 1 minute default timeout */
+
+/*The timeout range is 1-255 minutes*/
+#define MIN_TIMEOUT		(1 * 60)
+#define MAX_TIMEOUT		(255 * 60)
+
+static int timeout;
+module_param(timeout, int, 0);
+MODULE_PARM_DESC(timeout, "Watchdog timeout in minutes. 1 <= timeout <= 255, default="
+			__MODULE_STRING(WATCHDOG_TIMEOUT) ".");
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+			__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
 static const unsigned short normal_i2c[] = {
 	0x2d, 0x2e, I2C_CLIENT_END
 };
 
 struct nct7904_data {
 	struct i2c_client *client;
+	struct watchdog_device wdt;
 	struct mutex bank_lock;
 	int bank_sel;
 	u32 fanin_mask;
@@ -892,6 +920,95 @@ static const struct hwmon_chip_info nct7904_chip_info = {
 	.info = nct7904_info,
 };
 
+/*
+ * Watchdog Function
+ */
+static int nct7904_wdt_start(struct watchdog_device *wdt)
+{
+	struct nct7904_data *data = watchdog_get_drvdata(wdt);
+
+	/* Enable soft watchdog timer */
+	return nct7904_write_reg(data, BANK_0, WDT_LOCK_REG, WDT_SOFT_EN);
+}
+
+static int nct7904_wdt_stop(struct watchdog_device *wdt)
+{
+	struct nct7904_data *data = watchdog_get_drvdata(wdt);
+
+	return nct7904_write_reg(data, BANK_0, WDT_LOCK_REG, WDT_SOFT_DIS);
+}
+
+static int nct7904_wdt_set_timeout(struct watchdog_device *wdt,
+				   unsigned int timeout)
+{
+	struct nct7904_data *data = watchdog_get_drvdata(wdt);
+	/*
+	 * The NCT7904 is very special in watchdog function.
+	 * Its minimum unit is minutes. And wdt->timeout needs
+	 * to match the actual timeout selected. So, this needs
+	 * to be: wdt->timeout = timeout / 60 * 60.
+	 * For example, if the user configures a timeout of
+	 * 119 seconds, the actual timeout will be 60 seconds.
+	 * So, wdt->timeout must then be set to 60 seconds.
+	 */
+	wdt->timeout = timeout / 60 * 60;
+
+	return nct7904_write_reg(data, BANK_0, WDT_TIMER_REG,
+				 wdt->timeout / 60);
+}
+
+static int nct7904_wdt_ping(struct watchdog_device *wdt)
+{
+	/*
+	 * Note:
+	 * NCT7904 does not support refreshing WDT_TIMER_REG register when
+	 * the watchdog is active. Please disable watchdog before feeding
+	 * the watchdog and enable it again.
+	 */
+	struct nct7904_data *data = watchdog_get_drvdata(wdt);
+	int ret;
+
+	/* Disable soft watchdog timer */
+	ret = nct7904_write_reg(data, BANK_0, WDT_LOCK_REG, WDT_SOFT_DIS);
+	if (ret < 0)
+		return ret;
+
+	/* feed watchdog */
+	ret = nct7904_write_reg(data, BANK_0, WDT_TIMER_REG, wdt->timeout / 60);
+	if (ret < 0)
+		return ret;
+
+	/* Enable soft watchdog timer */
+	return nct7904_write_reg(data, BANK_0, WDT_LOCK_REG, WDT_SOFT_EN);
+}
+
+static unsigned int nct7904_wdt_get_timeleft(struct watchdog_device *wdt)
+{
+	struct nct7904_data *data = watchdog_get_drvdata(wdt);
+	int ret;
+
+	ret = nct7904_read_reg(data, BANK_0, WDT_TIMER_REG);
+	if (ret < 0)
+		return 0;
+
+	return ret * 60;
+}
+
+static const struct watchdog_info nct7904_wdt_info = {
+	.options	= WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
+				WDIOF_MAGICCLOSE,
+	.identity	= "nct7904 watchdog",
+};
+
+static const struct watchdog_ops nct7904_wdt_ops = {
+	.owner		= THIS_MODULE,
+	.start		= nct7904_wdt_start,
+	.stop		= nct7904_wdt_stop,
+	.ping		= nct7904_wdt_ping,
+	.set_timeout	= nct7904_wdt_set_timeout,
+	.get_timeleft	= nct7904_wdt_get_timeleft,
+};
+
 static int nct7904_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -1022,7 +1139,26 @@ static int nct7904_probe(struct i2c_client *client,
 	hwmon_dev =
 		devm_hwmon_device_register_with_info(dev, client->name, data,
 						     &nct7904_chip_info, NULL);
-	return PTR_ERR_OR_ZERO(hwmon_dev);
+	ret = PTR_ERR_OR_ZERO(hwmon_dev);
+	if (ret)
+		return ret;
+
+	/* Watchdog initialization */
+	data->wdt.ops = &nct7904_wdt_ops;
+	data->wdt.info = &nct7904_wdt_info;
+
+	data->wdt.timeout = WATCHDOG_TIMEOUT * 60; /* Set default timeout */
+	data->wdt.min_timeout = MIN_TIMEOUT;
+	data->wdt.max_timeout = MAX_TIMEOUT;
+	data->wdt.parent = &client->dev;
+
+	watchdog_init_timeout(&data->wdt, timeout * 60, &client->dev);
+	watchdog_set_nowayout(&data->wdt, nowayout);
+	watchdog_set_drvdata(&data->wdt, data);
+
+	watchdog_stop_on_unregister(&data->wdt);
+
+	return devm_watchdog_register_device(dev, &data->wdt);
 }
 
 static const struct i2c_device_id nct7904_id[] = {
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index de12a565006d..a337195b1c39 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -146,6 +146,15 @@ config SENSORS_MAX16064
 	  This driver can also be built as a module. If so, the module will
 	  be called max16064.
 
+config SENSORS_MAX16601
+	tristate "Maxim MAX16601"
+	help
+	  If you say yes here you get hardware monitoring support for Maxim
+	  MAX16601.
+
+	  This driver can also be built as a module. If so, the module will
+	  be called max16601.
+
 config SENSORS_MAX20730
 	tristate "Maxim MAX20730, MAX20734, MAX20743"
 	help
diff --git a/drivers/hwmon/pmbus/Makefile b/drivers/hwmon/pmbus/Makefile
index 5feb45806123..c4b15db996ad 100644
--- a/drivers/hwmon/pmbus/Makefile
+++ b/drivers/hwmon/pmbus/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_SENSORS_LM25066)	+= lm25066.o
 obj-$(CONFIG_SENSORS_LTC2978)	+= ltc2978.o
 obj-$(CONFIG_SENSORS_LTC3815)	+= ltc3815.o
 obj-$(CONFIG_SENSORS_MAX16064)	+= max16064.o
+obj-$(CONFIG_SENSORS_MAX16601)	+= max16601.o
 obj-$(CONFIG_SENSORS_MAX20730)	+= max20730.o
 obj-$(CONFIG_SENSORS_MAX20751)	+= max20751.o
 obj-$(CONFIG_SENSORS_MAX31785)	+= max31785.o
diff --git a/drivers/hwmon/pmbus/max16601.c b/drivers/hwmon/pmbus/max16601.c
new file mode 100644
index 000000000000..51cdfaf9023c
--- /dev/null
+++ b/drivers/hwmon/pmbus/max16601.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hardware monitoring driver for Maxim MAX16601
+ *
+ * Implementation notes:
+ *
+ * Ths chip supports two rails, VCORE and VSA. Telemetry information for the
+ * two rails is reported in two subsequent I2C addresses. The driver
+ * instantiates a dummy I2C client at the second I2C address to report
+ * information for the VSA rail in a single instance of the driver.
+ * Telemetry for the VSA rail is reported to the PMBus core in PMBus page 2.
+ *
+ * The chip reports input current using two separate methods. The input current
+ * reported with the standard READ_IIN command is derived from the output
+ * current. The first method is reported to the PMBus core with PMBus page 0,
+ * the second method is reported with PMBus page 1.
+ *
+ * The chip supports reading per-phase temperatures and per-phase input/output
+ * currents for VCORE. Telemetry is reported in vendor specific registers.
+ * The driver translates the vendor specific register values to PMBus standard
+ * register values and reports per-phase information in PMBus page 0.
+ *
+ * Copyright 2019, 2020 Google LLC.
+ */
+
+#include <linux/bits.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "pmbus.h"
+
+#define REG_SETPT_DVID		0xd1
+#define  DAC_10MV_MODE		BIT(4)
+#define REG_IOUT_AVG_PK		0xee
+#define REG_IIN_SENSOR		0xf1
+#define REG_TOTAL_INPUT_POWER	0xf2
+#define REG_PHASE_ID		0xf3
+#define  CORE_RAIL_INDICATOR	BIT(7)
+#define REG_PHASE_REPORTING	0xf4
+
+struct max16601_data {
+	struct pmbus_driver_info info;
+	struct i2c_client *vsa;
+	int iout_avg_pkg;
+};
+
+#define to_max16601_data(x) container_of(x, struct max16601_data, info)
+
+static int max16601_read_byte(struct i2c_client *client, int page, int reg)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct max16601_data *data = to_max16601_data(info);
+
+	if (page > 0) {
+		if (page == 2)	/* VSA */
+			return i2c_smbus_read_byte_data(data->vsa, reg);
+		return -EOPNOTSUPP;
+	}
+	return -ENODATA;
+}
+
+static int max16601_read_word(struct i2c_client *client, int page, int phase,
+			      int reg)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct max16601_data *data = to_max16601_data(info);
+	u8 buf[I2C_SMBUS_BLOCK_MAX + 1];
+	int ret;
+
+	switch (page) {
+	case 0:		/* VCORE */
+		if (phase == 0xff)
+			return -ENODATA;
+		switch (reg) {
+		case PMBUS_READ_IIN:
+		case PMBUS_READ_IOUT:
+		case PMBUS_READ_TEMPERATURE_1:
+			ret = i2c_smbus_write_byte_data(client, REG_PHASE_ID,
+							phase);
+			if (ret)
+				return ret;
+			ret = i2c_smbus_read_block_data(client,
+							REG_PHASE_REPORTING,
+							buf);
+			if (ret < 0)
+				return ret;
+			if (ret < 6)
+				return -EIO;
+			switch (reg) {
+			case PMBUS_READ_TEMPERATURE_1:
+				return buf[1] << 8 | buf[0];
+			case PMBUS_READ_IOUT:
+				return buf[3] << 8 | buf[2];
+			case PMBUS_READ_IIN:
+				return buf[5] << 8 | buf[4];
+			default:
+				break;
+			}
+		}
+		return -EOPNOTSUPP;
+	case 1:		/* VCORE, read IIN/PIN from sensor element */
+		switch (reg) {
+		case PMBUS_READ_IIN:
+			return i2c_smbus_read_word_data(client, REG_IIN_SENSOR);
+		case PMBUS_READ_PIN:
+			return i2c_smbus_read_word_data(client,
+							REG_TOTAL_INPUT_POWER);
+		default:
+			break;
+		}
+		return -EOPNOTSUPP;
+	case 2:		/* VSA */
+		switch (reg) {
+		case PMBUS_VIRT_READ_IOUT_MAX:
+			ret = i2c_smbus_read_word_data(data->vsa,
+						       REG_IOUT_AVG_PK);
+			if (ret < 0)
+				return ret;
+			if (sign_extend32(ret, 10) >
+			    sign_extend32(data->iout_avg_pkg, 10))
+				data->iout_avg_pkg = ret;
+			return data->iout_avg_pkg;
+		case PMBUS_VIRT_RESET_IOUT_HISTORY:
+			return 0;
+		case PMBUS_IOUT_OC_FAULT_LIMIT:
+		case PMBUS_IOUT_OC_WARN_LIMIT:
+		case PMBUS_OT_FAULT_LIMIT:
+		case PMBUS_OT_WARN_LIMIT:
+		case PMBUS_READ_IIN:
+		case PMBUS_READ_IOUT:
+		case PMBUS_READ_TEMPERATURE_1:
+		case PMBUS_STATUS_WORD:
+			return i2c_smbus_read_word_data(data->vsa, reg);
+		default:
+			return -EOPNOTSUPP;
+		}
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int max16601_write_byte(struct i2c_client *client, int page, u8 reg)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct max16601_data *data = to_max16601_data(info);
+
+	if (page == 2) {
+		if (reg == PMBUS_CLEAR_FAULTS)
+			return i2c_smbus_write_byte(data->vsa, reg);
+		return -EOPNOTSUPP;
+	}
+	return -ENODATA;
+}
+
+static int max16601_write_word(struct i2c_client *client, int page, int reg,
+			       u16 value)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct max16601_data *data = to_max16601_data(info);
+
+	switch (page) {
+	case 0:		/* VCORE */
+		return -ENODATA;
+	case 1:		/* VCORE IIN/PIN from sensor element */
+	default:
+		return -EOPNOTSUPP;
+	case 2:		/* VSA */
+		switch (reg) {
+		case PMBUS_VIRT_RESET_IOUT_HISTORY:
+			data->iout_avg_pkg = 0xfc00;
+			return 0;
+		case PMBUS_IOUT_OC_FAULT_LIMIT:
+		case PMBUS_IOUT_OC_WARN_LIMIT:
+		case PMBUS_OT_FAULT_LIMIT:
+		case PMBUS_OT_WARN_LIMIT:
+			return i2c_smbus_write_word_data(data->vsa, reg, value);
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+}
+
+static int max16601_identify(struct i2c_client *client,
+			     struct pmbus_driver_info *info)
+{
+	int reg;
+
+	reg = i2c_smbus_read_byte_data(client, REG_SETPT_DVID);
+	if (reg < 0)
+		return reg;
+	if (reg & DAC_10MV_MODE)
+		info->vrm_version[0] = vr13;
+	else
+		info->vrm_version[0] = vr12;
+
+	return 0;
+}
+
+static struct pmbus_driver_info max16601_info = {
+	.pages = 3,
+	.format[PSC_VOLTAGE_IN] = linear,
+	.format[PSC_VOLTAGE_OUT] = vid,
+	.format[PSC_CURRENT_IN] = linear,
+	.format[PSC_CURRENT_OUT] = linear,
+	.format[PSC_TEMPERATURE] = linear,
+	.format[PSC_POWER] = linear,
+	.func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_IIN | PMBUS_HAVE_PIN |
+		PMBUS_HAVE_STATUS_INPUT |
+		PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
+		PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
+		PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP |
+		PMBUS_HAVE_POUT | PMBUS_PAGE_VIRTUAL | PMBUS_PHASE_VIRTUAL,
+	.func[1] = PMBUS_HAVE_IIN | PMBUS_HAVE_PIN | PMBUS_PAGE_VIRTUAL,
+	.func[2] = PMBUS_HAVE_IIN | PMBUS_HAVE_STATUS_INPUT |
+		PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
+		PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_PAGE_VIRTUAL,
+	.phases[0] = 8,
+	.pfunc[0] = PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_TEMP,
+	.pfunc[1] = PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT,
+	.pfunc[2] = PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_TEMP,
+	.pfunc[3] = PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT,
+	.pfunc[4] = PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_TEMP,
+	.pfunc[5] = PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT,
+	.pfunc[6] = PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_TEMP,
+	.pfunc[7] = PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT,
+	.identify = max16601_identify,
+	.read_byte_data = max16601_read_byte,
+	.read_word_data = max16601_read_word,
+	.write_byte = max16601_write_byte,
+	.write_word_data = max16601_write_word,
+};
+
+static void max16601_remove(void *_data)
+{
+	struct max16601_data *data = _data;
+
+	i2c_unregister_device(data->vsa);
+}
+
+static int max16601_probe(struct i2c_client *client,
+			  const struct i2c_device_id *id)
+{
+	struct device *dev = &client->dev;
+	u8 buf[I2C_SMBUS_BLOCK_MAX + 1];
+	struct max16601_data *data;
+	int ret;
+
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_READ_BYTE_DATA |
+				     I2C_FUNC_SMBUS_READ_BLOCK_DATA))
+		return -ENODEV;
+
+	ret = i2c_smbus_read_block_data(client, PMBUS_IC_DEVICE_ID, buf);
+	if (ret < 0)
+		return -ENODEV;
+
+	/* PMBUS_IC_DEVICE_ID is expected to return "MAX16601y.xx" */
+	if (ret < 11 || strncmp(buf, "MAX16601", 8)) {
+		buf[ret] = '\0';
+		dev_err(dev, "Unsupported chip '%s'\n", buf);
+		return -ENODEV;
+	}
+
+	ret = i2c_smbus_read_byte_data(client, REG_PHASE_ID);
+	if (ret < 0)
+		return ret;
+	if (!(ret & CORE_RAIL_INDICATOR)) {
+		dev_err(dev,
+			"Driver must be instantiated on CORE rail I2C address\n");
+		return -ENODEV;
+	}
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->iout_avg_pkg = 0xfc00;
+	data->vsa = i2c_new_dummy_device(client->adapter, client->addr + 1);
+	if (IS_ERR(data->vsa)) {
+		dev_err(dev, "Failed to register VSA client\n");
+		return PTR_ERR(data->vsa);
+	}
+	ret = devm_add_action_or_reset(dev, max16601_remove, data);
+	if (ret)
+		return ret;
+
+	data->info = max16601_info;
+
+	return pmbus_do_probe(client, id, &data->info);
+}
+
+static const struct i2c_device_id max16601_id[] = {
+	{"max16601", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, max16601_id);
+
+static struct i2c_driver max16601_driver = {
+	.driver = {
+		   .name = "max16601",
+		   },
+	.probe = max16601_probe,
+	.remove = pmbus_do_remove,
+	.id_table = max16601_id,
+};
+
+module_i2c_driver(max16601_driver);
+
+MODULE_AUTHOR("Guenter Roeck <linux@roeck-us.net>");
+MODULE_DESCRIPTION("PMBus driver for Maxim MAX16601");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index 8d321bf7d15b..a420877ba533 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -109,8 +109,8 @@ struct pmbus_data {
 	bool has_status_word;		/* device uses STATUS_WORD register */
 	int (*read_status)(struct i2c_client *client, int page);
 
-	u8 currpage;
-	u8 currphase;	/* current phase, 0xff for all */
+	s16 currpage;	/* current page, -1 for unknown/unset */
+	s16 currphase;	/* current phase, 0xff for all, -1 for unknown/unset */
 };
 
 struct pmbus_debugfs_entry {
@@ -2529,8 +2529,8 @@ int pmbus_do_probe(struct i2c_client *client, const struct i2c_device_id *id,
 	if (pdata)
 		data->flags = pdata->flags;
 	data->info = info;
-	data->currpage = 0xff;
-	data->currphase = 0xfe;
+	data->currpage = -1;
+	data->currphase = -1;
 
 	ret = pmbus_init_common(client, data, info);
 	if (ret < 0)
diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
index 83e841be1081..02dbb5ca3bcf 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -107,7 +107,7 @@ config CORESIGHT_CPU_DEBUG
 	  can quickly get to know program counter (PC), secure state,
 	  exception level, etc. Before use debugging functionality, platform
 	  needs to ensure the clock domain and power domain are enabled
-	  properly, please refer Documentation/trace/coresight-cpu-debug.rst
+	  properly, please refer Documentation/trace/coresight/coresight-cpu-debug.rst
 	  for detailed description and the example for usage.
 
 config CORESIGHT_CTI
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 5536673060cc..c429d664f655 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -357,12 +357,12 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	if (dev->flags & ACCESS_NO_IRQ_SUSPEND) {
 		dev_pm_set_driver_flags(&pdev->dev,
 					DPM_FLAG_SMART_PREPARE |
-					DPM_FLAG_LEAVE_SUSPENDED);
+					DPM_FLAG_MAY_SKIP_RESUME);
 	} else {
 		dev_pm_set_driver_flags(&pdev->dev,
 					DPM_FLAG_SMART_PREPARE |
 					DPM_FLAG_SMART_SUSPEND |
-					DPM_FLAG_LEAVE_SUSPENDED);
+					DPM_FLAG_MAY_SKIP_RESUME);
 	}
 
 	/* The code below assumes runtime PM to be disabled. */
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index d79cd6d54b3a..97f2e29265da 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -1008,7 +1008,6 @@ static int i3c_master_getmrl_locked(struct i3c_master_controller *master,
 				    struct i3c_device_info *info)
 {
 	struct i3c_ccc_cmd_dest dest;
-	unsigned int expected_len;
 	struct i3c_ccc_mrl *mrl;
 	struct i3c_ccc_cmd cmd;
 	int ret;
@@ -1024,22 +1023,23 @@ static int i3c_master_getmrl_locked(struct i3c_master_controller *master,
 	if (!(info->bcr & I3C_BCR_IBI_PAYLOAD))
 		dest.payload.len -= 1;
 
-	expected_len = dest.payload.len;
 	i3c_ccc_cmd_init(&cmd, true, I3C_CCC_GETMRL, &dest, 1);
 	ret = i3c_master_send_ccc_cmd_locked(master, &cmd);
 	if (ret)
 		goto out;
 
-	if (dest.payload.len != expected_len) {
+	switch (dest.payload.len) {
+	case 3:
+		info->max_ibi_len = mrl->ibi_len;
+		fallthrough;
+	case 2:
+		info->max_read_len = be16_to_cpu(mrl->read_len);
+		break;
+	default:
 		ret = -EIO;
 		goto out;
 	}
 
-	info->max_read_len = be16_to_cpu(mrl->read_len);
-
-	if (info->bcr & I3C_BCR_IBI_PAYLOAD)
-		info->max_ibi_len = mrl->ibi_len;
-
 out:
 	i3c_ccc_cmd_dest_cleanup(&dest);
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index dcf8b51b47fd..7f17f8303988 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1034,8 +1034,8 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 	return 0;
 }
 
-static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
-				int format, char *buf, int buflen)
+static int ide_cdrom_read_tocentry(ide_drive_t *drive, int trackno,
+		int msf_flag, int format, char *buf, int buflen)
 {
 	unsigned char cmd[BLK_MAX_CDB];
 
@@ -1104,7 +1104,7 @@ int ide_cd_read_toc(ide_drive_t *drive)
 				     sectors_per_frame << SECTOR_SHIFT);
 
 	/* first read just the header, so we know how long the TOC is */
-	stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
+	stat = ide_cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
 				    sizeof(struct atapi_toc_header));
 	if (stat)
 		return stat;
@@ -1121,7 +1121,7 @@ int ide_cd_read_toc(ide_drive_t *drive)
 		ntracks = MAX_TRACKS;
 
 	/* now read the whole schmeer */
-	stat = cdrom_read_tocentry(drive, toc->hdr.first_track, 1, 0,
+	stat = ide_cdrom_read_tocentry(drive, toc->hdr.first_track, 1, 0,
 				  (char *)&toc->hdr,
 				   sizeof(struct atapi_toc_header) +
 				   (ntracks + 1) *
@@ -1141,7 +1141,7 @@ int ide_cd_read_toc(ide_drive_t *drive)
 		 * Heiko Eißfeldt.
 		 */
 		ntracks = 0;
-		stat = cdrom_read_tocentry(drive, CDROM_LEADOUT, 1, 0,
+		stat = ide_cdrom_read_tocentry(drive, CDROM_LEADOUT, 1, 0,
 					   (char *)&toc->hdr,
 					   sizeof(struct atapi_toc_header) +
 					   (ntracks + 1) *
@@ -1181,7 +1181,7 @@ int ide_cd_read_toc(ide_drive_t *drive)
 
 	if (toc->hdr.first_track != CDROM_LEADOUT) {
 		/* read the multisession information */
-		stat = cdrom_read_tocentry(drive, 0, 0, 1, (char *)&ms_tmp,
+		stat = ide_cdrom_read_tocentry(drive, 0, 0, 1, (char *)&ms_tmp,
 					   sizeof(ms_tmp));
 		if (stat)
 			return stat;
@@ -1195,7 +1195,7 @@ int ide_cd_read_toc(ide_drive_t *drive)
 
 	if (drive->atapi_flags & IDE_AFLAG_TOCADDR_AS_BCD) {
 		/* re-read multisession information using MSF format */
-		stat = cdrom_read_tocentry(drive, 0, 1, 1, (char *)&ms_tmp,
+		stat = ide_cdrom_read_tocentry(drive, 0, 1, 1, (char *)&ms_tmp,
 					   sizeof(ms_tmp));
 		if (stat)
 			return stat;
@@ -1305,8 +1305,7 @@ static int ide_cdrom_register(ide_drive_t *drive, int nslots)
 	if (drive->atapi_flags & IDE_AFLAG_NO_SPEED_SELECT)
 		devinfo->mask |= CDC_SELECT_SPEED;
 
-	devinfo->disk = info->disk;
-	return register_cdrom(devinfo);
+	return register_cdrom(info->disk, devinfo);
 }
 
 static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index b137f27a34d5..c31f1d2b3b07 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -233,10 +233,13 @@ static ide_startstop_t do_special(ide_drive_t *drive)
 void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct scatterlist *sg = hwif->sg_table;
+	struct scatterlist *sg = hwif->sg_table, *last_sg = NULL;
 	struct request *rq = cmd->rq;
 
-	cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
+	cmd->sg_nents = __blk_rq_map_sg(drive->queue, rq, sg, &last_sg);
+	if (blk_rq_bytes(rq) && (blk_rq_bytes(rq) & rq->q->dma_pad_mask))
+		last_sg->length +=
+			(rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
 }
 EXPORT_SYMBOL_GPL(ide_map_sg);
 
diff --git a/drivers/iio/dac/ad5761.c b/drivers/iio/dac/ad5761.c
index 4fb42b743f0f..7468fbd11684 100644
--- a/drivers/iio/dac/ad5761.c
+++ b/drivers/iio/dac/ad5761.c
@@ -3,7 +3,7 @@
  * AD5721, AD5721R, AD5761, AD5761R, Voltage Output Digital to Analog Converter
  *
  * Copyright 2016 Qtechnology A/S
- * 2016 Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ * 2016 Ricardo Ribalda <ribalda@kernel.org>
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -423,6 +423,6 @@ static struct spi_driver ad5761_driver = {
 };
 module_spi_driver(ad5761_driver);
 
-MODULE_AUTHOR("Ricardo Ribalda <ricardo.ribalda@gmail.com>");
+MODULE_AUTHOR("Ricardo Ribalda <ribalda@kernel.org>");
 MODULE_DESCRIPTION("Analog Devices AD5721, AD5721R, AD5761, AD5761R driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/dac/ti-dac7612.c b/drivers/iio/dac/ti-dac7612.c
index c46805144dd4..de0c6573cd97 100644
--- a/drivers/iio/dac/ti-dac7612.c
+++ b/drivers/iio/dac/ti-dac7612.c
@@ -3,7 +3,7 @@
  * DAC7612 Dual, 12-Bit Serial input Digital-to-Analog Converter
  *
  * Copyright 2019 Qtechnology A/S
- * 2019 Ricardo Ribalda <ricardo@ribalda.com>
+ * 2019 Ricardo Ribalda <ribalda@kernel.org>
  *
  * Licensed under the GPL-2.
  */
@@ -179,6 +179,6 @@ static struct spi_driver dac7612_driver = {
 };
 module_spi_driver(dac7612_driver);
 
-MODULE_AUTHOR("Ricardo Ribalda <ricardo@ribalda.com>");
+MODULE_AUTHOR("Ricardo Ribalda <ribalda@kernel.org>");
 MODULE_DESCRIPTION("Texas Instruments DAC7612 DAC driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/dummy/iio_dummy_evgen.c b/drivers/iio/dummy/iio_dummy_evgen.c
index a6edf30567aa..ee85d596e528 100644
--- a/drivers/iio/dummy/iio_dummy_evgen.c
+++ b/drivers/iio/dummy/iio_dummy_evgen.c
@@ -37,8 +37,7 @@ struct iio_dummy_eventgen {
 	struct iio_dummy_regs regs[IIO_EVENTGEN_NO];
 	struct mutex lock;
 	bool inuse[IIO_EVENTGEN_NO];
-	struct irq_sim irq_sim;
-	int base;
+	struct irq_domain *irq_sim_domain;
 };
 
 /* We can only ever have one instance of this 'device' */
@@ -52,13 +51,14 @@ static int iio_dummy_evgen_create(void)
 	if (!iio_evgen)
 		return -ENOMEM;
 
-	ret = irq_sim_init(&iio_evgen->irq_sim, IIO_EVENTGEN_NO);
-	if (ret < 0) {
+	iio_evgen->irq_sim_domain = irq_domain_create_sim(NULL,
+							  IIO_EVENTGEN_NO);
+	if (IS_ERR(iio_evgen->irq_sim_domain)) {
+		ret = PTR_ERR(iio_evgen->irq_sim_domain);
 		kfree(iio_evgen);
 		return ret;
 	}
 
-	iio_evgen->base = irq_sim_irqnum(&iio_evgen->irq_sim, 0);
 	mutex_init(&iio_evgen->lock);
 
 	return 0;
@@ -80,7 +80,7 @@ int iio_dummy_evgen_get_irq(void)
 	mutex_lock(&iio_evgen->lock);
 	for (i = 0; i < IIO_EVENTGEN_NO; i++) {
 		if (!iio_evgen->inuse[i]) {
-			ret = irq_sim_irqnum(&iio_evgen->irq_sim, i);
+			ret = irq_create_mapping(iio_evgen->irq_sim_domain, i);
 			iio_evgen->inuse[i] = true;
 			break;
 		}
@@ -101,21 +101,27 @@ EXPORT_SYMBOL_GPL(iio_dummy_evgen_get_irq);
  */
 void iio_dummy_evgen_release_irq(int irq)
 {
+	struct irq_data *irqd = irq_get_irq_data(irq);
+
 	mutex_lock(&iio_evgen->lock);
-	iio_evgen->inuse[irq - iio_evgen->base] = false;
+	iio_evgen->inuse[irqd_to_hwirq(irqd)] = false;
+	irq_dispose_mapping(irq);
 	mutex_unlock(&iio_evgen->lock);
 }
 EXPORT_SYMBOL_GPL(iio_dummy_evgen_release_irq);
 
 struct iio_dummy_regs *iio_dummy_evgen_get_regs(int irq)
 {
-	return &iio_evgen->regs[irq - iio_evgen->base];
+	struct irq_data *irqd = irq_get_irq_data(irq);
+
+	return &iio_evgen->regs[irqd_to_hwirq(irqd)];
+
 }
 EXPORT_SYMBOL_GPL(iio_dummy_evgen_get_regs);
 
 static void iio_dummy_evgen_free(void)
 {
-	irq_sim_fini(&iio_evgen->irq_sim);
+	irq_domain_remove_sim(iio_evgen->irq_sim_domain);
 	kfree(iio_evgen);
 }
 
@@ -131,7 +137,7 @@ static ssize_t iio_evgen_poke(struct device *dev,
 {
 	struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
 	unsigned long event;
-	int ret;
+	int ret, irq;
 
 	ret = kstrtoul(buf, 10, &event);
 	if (ret)
@@ -140,7 +146,10 @@ static ssize_t iio_evgen_poke(struct device *dev,
 	iio_evgen->regs[this_attr->address].reg_id   = this_attr->address;
 	iio_evgen->regs[this_attr->address].reg_data = event;
 
-	irq_sim_fire(&iio_evgen->irq_sim, this_attr->address);
+	irq = irq_find_mapping(iio_evgen->irq_sim_domain, this_attr->address);
+	ret = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, true);
+	if (ret)
+		return ret;
 
 	return len;
 }
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 4da03f823474..f81ca20f4b69 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -206,13 +206,6 @@ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
 		return -EINVAL;
 	}
 
-	/* Verify that access is OK for the user buffer */
-	if (!access_ok((void __user *)vaddr,
-		       npages * PAGE_SIZE)) {
-		dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
-			   (void *)vaddr, npages);
-		return -EFAULT;
-	}
 	/* Allocate the array of struct page pointers needed for pinning */
 	pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
 	if (!pages)
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index a85aada04a64..66b9a68f5e9f 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -532,4 +532,31 @@ config LOONGSON_HTPIC
 	help
 	  Support for the Loongson-3 HyperTransport PIC Controller.
 
+config LOONGSON_HTVEC
+	bool "Loongson3 HyperTransport Interrupt Vector Controller"
+	depends on MACH_LOONGSON64
+	default MACH_LOONGSON64
+	select IRQ_DOMAIN_HIERARCHY
+	help
+	  Support for the Loongson3 HyperTransport Interrupt Vector Controller.
+
+config LOONGSON_PCH_PIC
+	bool "Loongson PCH PIC Controller"
+	depends on MACH_LOONGSON64 || COMPILE_TEST
+	default MACH_LOONGSON64
+	select IRQ_DOMAIN_HIERARCHY
+	select IRQ_FASTEOI_HIERARCHY_HANDLERS
+	help
+	  Support for the Loongson PCH PIC Controller.
+
+config LOONGSON_PCH_MSI
+	bool "Loongson PCH PIC Controller"
+	depends on MACH_LOONGSON64 || COMPILE_TEST
+	depends on PCI
+	default MACH_LOONGSON64
+	select IRQ_DOMAIN_HIERARCHY
+	select PCI_MSI
+	help
+	  Support for the Loongson PCH MSI Controller.
+
 endmenu
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 37bbe39bf909..3a4ce283189a 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -107,3 +107,6 @@ obj-$(CONFIG_TI_SCI_INTR_IRQCHIP)	+= irq-ti-sci-intr.o
 obj-$(CONFIG_TI_SCI_INTA_IRQCHIP)	+= irq-ti-sci-inta.o
 obj-$(CONFIG_LOONGSON_LIOINTC)		+= irq-loongson-liointc.o
 obj-$(CONFIG_LOONGSON_HTPIC)		+= irq-loongson-htpic.o
+obj-$(CONFIG_LOONGSON_HTVEC)		+= irq-loongson-htvec.o
+obj-$(CONFIG_LOONGSON_PCH_PIC)		+= irq-loongson-pch-pic.o
+obj-$(CONFIG_LOONGSON_PCH_MSI)		+= irq-loongson-pch-msi.o
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 124251b0ccba..cd685f521c77 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -174,6 +174,13 @@ static struct {
 	int			next_victim;
 } vpe_proxy;
 
+struct cpu_lpi_count {
+	atomic_t	managed;
+	atomic_t	unmanaged;
+};
+
+static DEFINE_PER_CPU(struct cpu_lpi_count, cpu_lpi_count);
+
 static LIST_HEAD(its_nodes);
 static DEFINE_RAW_SPINLOCK(its_lock);
 static struct rdists *gic_rdists;
@@ -1510,42 +1517,159 @@ static void its_unmask_irq(struct irq_data *d)
 	lpi_update_config(d, 0, LPI_PROP_ENABLED);
 }
 
+static __maybe_unused u32 its_read_lpi_count(struct irq_data *d, int cpu)
+{
+	if (irqd_affinity_is_managed(d))
+		return atomic_read(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed);
+
+	return atomic_read(&per_cpu_ptr(&cpu_lpi_count, cpu)->unmanaged);
+}
+
+static void its_inc_lpi_count(struct irq_data *d, int cpu)
+{
+	if (irqd_affinity_is_managed(d))
+		atomic_inc(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed);
+	else
+		atomic_inc(&per_cpu_ptr(&cpu_lpi_count, cpu)->unmanaged);
+}
+
+static void its_dec_lpi_count(struct irq_data *d, int cpu)
+{
+	if (irqd_affinity_is_managed(d))
+		atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed);
+	else
+		atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->unmanaged);
+}
+
+static unsigned int cpumask_pick_least_loaded(struct irq_data *d,
+					      const struct cpumask *cpu_mask)
+{
+	unsigned int cpu = nr_cpu_ids, tmp;
+	int count = S32_MAX;
+
+	for_each_cpu(tmp, cpu_mask) {
+		int this_count = its_read_lpi_count(d, tmp);
+		if (this_count < count) {
+			cpu = tmp;
+		        count = this_count;
+		}
+	}
+
+	return cpu;
+}
+
+/*
+ * As suggested by Thomas Gleixner in:
+ * https://lore.kernel.org/r/87h80q2aoc.fsf@nanos.tec.linutronix.de
+ */
+static int its_select_cpu(struct irq_data *d,
+			  const struct cpumask *aff_mask)
+{
+	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+	cpumask_var_t tmpmask;
+	int cpu, node;
+
+	if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	node = its_dev->its->numa_node;
+
+	if (!irqd_affinity_is_managed(d)) {
+		/* First try the NUMA node */
+		if (node != NUMA_NO_NODE) {
+			/*
+			 * Try the intersection of the affinity mask and the
+			 * node mask (and the online mask, just to be safe).
+			 */
+			cpumask_and(tmpmask, cpumask_of_node(node), aff_mask);
+			cpumask_and(tmpmask, tmpmask, cpu_online_mask);
+
+			/*
+			 * Ideally, we would check if the mask is empty, and
+			 * try again on the full node here.
+			 *
+			 * But it turns out that the way ACPI describes the
+			 * affinity for ITSs only deals about memory, and
+			 * not target CPUs, so it cannot describe a single
+			 * ITS placed next to two NUMA nodes.
+			 *
+			 * Instead, just fallback on the online mask. This
+			 * diverges from Thomas' suggestion above.
+			 */
+			cpu = cpumask_pick_least_loaded(d, tmpmask);
+			if (cpu < nr_cpu_ids)
+				goto out;
+
+			/* If we can't cross sockets, give up */
+			if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144))
+				goto out;
+
+			/* If the above failed, expand the search */
+		}
+
+		/* Try the intersection of the affinity and online masks */
+		cpumask_and(tmpmask, aff_mask, cpu_online_mask);
+
+		/* If that doesn't fly, the online mask is the last resort */
+		if (cpumask_empty(tmpmask))
+			cpumask_copy(tmpmask, cpu_online_mask);
+
+		cpu = cpumask_pick_least_loaded(d, tmpmask);
+	} else {
+		cpumask_and(tmpmask, irq_data_get_affinity_mask(d), cpu_online_mask);
+
+		/* If we cannot cross sockets, limit the search to that node */
+		if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) &&
+		    node != NUMA_NO_NODE)
+			cpumask_and(tmpmask, tmpmask, cpumask_of_node(node));
+
+		cpu = cpumask_pick_least_loaded(d, tmpmask);
+	}
+out:
+	free_cpumask_var(tmpmask);
+
+	pr_debug("IRQ%d -> %*pbl CPU%d\n", d->irq, cpumask_pr_args(aff_mask), cpu);
+	return cpu;
+}
+
 static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 			    bool force)
 {
-	unsigned int cpu;
-	const struct cpumask *cpu_mask = cpu_online_mask;
 	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
 	struct its_collection *target_col;
 	u32 id = its_get_event_id(d);
+	int cpu, prev_cpu;
 
 	/* A forwarded interrupt should use irq_set_vcpu_affinity */
 	if (irqd_is_forwarded_to_vcpu(d))
 		return -EINVAL;
 
-       /* lpi cannot be routed to a redistributor that is on a foreign node */
-	if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) {
-		if (its_dev->its->numa_node >= 0) {
-			cpu_mask = cpumask_of_node(its_dev->its->numa_node);
-			if (!cpumask_intersects(mask_val, cpu_mask))
-				return -EINVAL;
-		}
-	}
+	prev_cpu = its_dev->event_map.col_map[id];
+	its_dec_lpi_count(d, prev_cpu);
 
-	cpu = cpumask_any_and(mask_val, cpu_mask);
+	if (!force)
+		cpu = its_select_cpu(d, mask_val);
+	else
+		cpu = cpumask_pick_least_loaded(d, mask_val);
 
-	if (cpu >= nr_cpu_ids)
-		return -EINVAL;
+	if (cpu < 0 || cpu >= nr_cpu_ids)
+		goto err;
 
 	/* don't set the affinity when the target cpu is same as current one */
-	if (cpu != its_dev->event_map.col_map[id]) {
+	if (cpu != prev_cpu) {
 		target_col = &its_dev->its->collections[cpu];
 		its_send_movi(its_dev, target_col, id);
 		its_dev->event_map.col_map[id] = cpu;
 		irq_data_update_effective_affinity(d, cpumask_of(cpu));
 	}
 
+	its_inc_lpi_count(d, cpu);
+
 	return IRQ_SET_MASK_OK_DONE;
+
+err:
+	its_inc_lpi_count(d, prev_cpu);
+	return -EINVAL;
 }
 
 static u64 its_irq_get_msi_base(struct its_device *its_dev)
@@ -3432,22 +3556,13 @@ static int its_irq_domain_activate(struct irq_domain *domain,
 {
 	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
 	u32 event = its_get_event_id(d);
-	const struct cpumask *cpu_mask = cpu_online_mask;
 	int cpu;
 
-	/* get the cpu_mask of local node */
-	if (its_dev->its->numa_node >= 0)
-		cpu_mask = cpumask_of_node(its_dev->its->numa_node);
-
-	/* Bind the LPI to the first possible CPU */
-	cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
-	if (cpu >= nr_cpu_ids) {
-		if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
-			return -EINVAL;
-
-		cpu = cpumask_first(cpu_online_mask);
-	}
+	cpu = its_select_cpu(d, cpu_online_mask);
+	if (cpu < 0 || cpu >= nr_cpu_ids)
+		return -EINVAL;
 
+	its_inc_lpi_count(d, cpu);
 	its_dev->event_map.col_map[event] = cpu;
 	irq_data_update_effective_affinity(d, cpumask_of(cpu));
 
@@ -3462,6 +3577,7 @@ static void its_irq_domain_deactivate(struct irq_domain *domain,
 	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
 	u32 event = its_get_event_id(d);
 
+	its_dec_lpi_count(d, its_dev->event_map.col_map[event]);
 	/* Stop the delivery of interrupts */
 	its_send_discard(its_dev, event);
 }
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index d7006ef18a0d..cc46bc2d634b 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1150,7 +1150,7 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 	isb();
 }
 
-static void gic_smp_init(void)
+static void __init gic_smp_init(void)
 {
 	set_smp_cross_call(gic_raise_softirq);
 	cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
@@ -1282,7 +1282,6 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 		irq_set_percpu_devid(irq);
 		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_percpu_devid_irq, NULL, NULL);
-		irq_set_status_flags(irq, IRQ_NOAUTOEN);
 		break;
 
 	case SPI_RANGE:
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 30ab623343d3..00de05abd3c3 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -982,7 +982,6 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 		irq_set_percpu_devid(irq);
 		irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
 				    handle_percpu_devid_irq, NULL, NULL);
-		irq_set_status_flags(irq, IRQ_NOAUTOEN);
 	} else {
 		irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
diff --git a/drivers/irqchip/irq-loongson-htvec.c b/drivers/irqchip/irq-loongson-htvec.c
new file mode 100644
index 000000000000..1ece9337c78d
--- /dev/null
+++ b/drivers/irqchip/irq-loongson-htvec.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2020, Jiaxun Yang <jiaxun.yang@flygoat.com>
+ *  Loongson HyperTransport Interrupt Vector support
+ */
+
+#define pr_fmt(fmt) "htvec: " fmt
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+/* Registers */
+#define HTVEC_EN_OFF		0x20
+#define HTVEC_MAX_PARENT_IRQ	4
+
+#define VEC_COUNT_PER_REG	32
+#define VEC_REG_COUNT		4
+#define VEC_COUNT		(VEC_COUNT_PER_REG * VEC_REG_COUNT)
+#define VEC_REG_IDX(irq_id)	((irq_id) / VEC_COUNT_PER_REG)
+#define VEC_REG_BIT(irq_id)	((irq_id) % VEC_COUNT_PER_REG)
+
+struct htvec {
+	void __iomem		*base;
+	struct irq_domain	*htvec_domain;
+	raw_spinlock_t		htvec_lock;
+};
+
+static void htvec_irq_dispatch(struct irq_desc *desc)
+{
+	int i;
+	u32 pending;
+	bool handled = false;
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct htvec *priv = irq_desc_get_handler_data(desc);
+
+	chained_irq_enter(chip, desc);
+
+	for (i = 0; i < VEC_REG_COUNT; i++) {
+		pending = readl(priv->base + 4 * i);
+		while (pending) {
+			int bit = __ffs(pending);
+
+			generic_handle_irq(irq_linear_revmap(priv->htvec_domain, bit +
+							     VEC_COUNT_PER_REG * i));
+			pending &= ~BIT(bit);
+			handled = true;
+		}
+	}
+
+	if (!handled)
+		spurious_interrupt();
+
+	chained_irq_exit(chip, desc);
+}
+
+static void htvec_ack_irq(struct irq_data *d)
+{
+	struct htvec *priv = irq_data_get_irq_chip_data(d);
+
+	writel(BIT(VEC_REG_BIT(d->hwirq)),
+	       priv->base + VEC_REG_IDX(d->hwirq) * 4);
+}
+
+static void htvec_mask_irq(struct irq_data *d)
+{
+	u32 reg;
+	void __iomem *addr;
+	struct htvec *priv = irq_data_get_irq_chip_data(d);
+
+	raw_spin_lock(&priv->htvec_lock);
+	addr = priv->base + HTVEC_EN_OFF;
+	addr += VEC_REG_IDX(d->hwirq) * 4;
+	reg = readl(addr);
+	reg &= ~BIT(VEC_REG_BIT(d->hwirq));
+	writel(reg, addr);
+	raw_spin_unlock(&priv->htvec_lock);
+}
+
+static void htvec_unmask_irq(struct irq_data *d)
+{
+	u32 reg;
+	void __iomem *addr;
+	struct htvec *priv = irq_data_get_irq_chip_data(d);
+
+	raw_spin_lock(&priv->htvec_lock);
+	addr = priv->base + HTVEC_EN_OFF;
+	addr += VEC_REG_IDX(d->hwirq) * 4;
+	reg = readl(addr);
+	reg |= BIT(VEC_REG_BIT(d->hwirq));
+	writel(reg, addr);
+	raw_spin_unlock(&priv->htvec_lock);
+}
+
+static struct irq_chip htvec_irq_chip = {
+	.name			= "LOONGSON_HTVEC",
+	.irq_mask		= htvec_mask_irq,
+	.irq_unmask		= htvec_unmask_irq,
+	.irq_ack		= htvec_ack_irq,
+};
+
+static int htvec_domain_alloc(struct irq_domain *domain, unsigned int virq,
+			      unsigned int nr_irqs, void *arg)
+{
+	unsigned long hwirq;
+	unsigned int type, i;
+	struct htvec *priv = domain->host_data;
+
+	irq_domain_translate_onecell(domain, arg, &hwirq, &type);
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_domain_set_info(domain, virq + i, hwirq + i, &htvec_irq_chip,
+				    priv, handle_edge_irq, NULL, NULL);
+	}
+
+	return 0;
+}
+
+static void htvec_domain_free(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
+
+		irq_set_handler(virq + i, NULL);
+		irq_domain_reset_irq_data(d);
+	}
+}
+
+static const struct irq_domain_ops htvec_domain_ops = {
+	.translate	= irq_domain_translate_onecell,
+	.alloc		= htvec_domain_alloc,
+	.free		= htvec_domain_free,
+};
+
+static void htvec_reset(struct htvec *priv)
+{
+	u32 idx;
+
+	/* Clear IRQ cause registers, mask all interrupts */
+	for (idx = 0; idx < VEC_REG_COUNT; idx++) {
+		writel_relaxed(0x0, priv->base + HTVEC_EN_OFF + 4 * idx);
+		writel_relaxed(0xFFFFFFFF, priv->base);
+	}
+}
+
+static int htvec_of_init(struct device_node *node,
+				struct device_node *parent)
+{
+	struct htvec *priv;
+	int err, parent_irq[4], num_parents = 0, i;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	raw_spin_lock_init(&priv->htvec_lock);
+	priv->base = of_iomap(node, 0);
+	if (!priv->base) {
+		err = -ENOMEM;
+		goto free_priv;
+	}
+
+	/* Interrupt may come from any of the 4 interrupt line */
+	for (i = 0; i < HTVEC_MAX_PARENT_IRQ; i++) {
+		parent_irq[i] = irq_of_parse_and_map(node, i);
+		if (parent_irq[i] <= 0)
+			break;
+
+		num_parents++;
+	}
+
+	if (!num_parents) {
+		pr_err("Failed to get parent irqs\n");
+		err = -ENODEV;
+		goto iounmap_base;
+	}
+
+	priv->htvec_domain = irq_domain_create_linear(of_node_to_fwnode(node),
+						      VEC_COUNT,
+						      &htvec_domain_ops,
+						      priv);
+	if (!priv->htvec_domain) {
+		pr_err("Failed to create IRQ domain\n");
+		err = -ENOMEM;
+		goto iounmap_base;
+	}
+
+	htvec_reset(priv);
+
+	for (i = 0; i < num_parents; i++)
+		irq_set_chained_handler_and_data(parent_irq[i],
+						 htvec_irq_dispatch, priv);
+
+	return 0;
+
+iounmap_base:
+	iounmap(priv->base);
+free_priv:
+	kfree(priv);
+
+	return err;
+}
+
+IRQCHIP_DECLARE(htvec, "loongson,htvec-1.0", htvec_of_init);
diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c
new file mode 100644
index 000000000000..50becd21008c
--- /dev/null
+++ b/drivers/irqchip/irq-loongson-pch-msi.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2020, Jiaxun Yang <jiaxun.yang@flygoat.com>
+ *  Loongson PCH MSI support
+ */
+
+#define pr_fmt(fmt) "pch-msi: " fmt
+
+#include <linux/irqchip.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+struct pch_msi_data {
+	struct mutex	msi_map_lock;
+	phys_addr_t	doorbell;
+	u32		irq_first;	/* The vector number that MSIs starts */
+	u32		num_irqs;	/* The number of vectors for MSIs */
+	unsigned long	*msi_map;
+};
+
+static void pch_msi_mask_msi_irq(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void pch_msi_unmask_msi_irq(struct irq_data *d)
+{
+	irq_chip_unmask_parent(d);
+	pci_msi_unmask_irq(d);
+}
+
+static struct irq_chip pch_msi_irq_chip = {
+	.name			= "PCH PCI MSI",
+	.irq_mask		= pch_msi_mask_msi_irq,
+	.irq_unmask		= pch_msi_unmask_msi_irq,
+	.irq_ack		= irq_chip_ack_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+};
+
+static int pch_msi_allocate_hwirq(struct pch_msi_data *priv, int num_req)
+{
+	int first;
+
+	mutex_lock(&priv->msi_map_lock);
+
+	first = bitmap_find_free_region(priv->msi_map, priv->num_irqs,
+					get_count_order(num_req));
+	if (first < 0) {
+		mutex_unlock(&priv->msi_map_lock);
+		return -ENOSPC;
+	}
+
+	mutex_unlock(&priv->msi_map_lock);
+
+	return priv->irq_first + first;
+}
+
+static void pch_msi_free_hwirq(struct pch_msi_data *priv,
+				int hwirq, int num_req)
+{
+	int first = hwirq - priv->irq_first;
+
+	mutex_lock(&priv->msi_map_lock);
+	bitmap_release_region(priv->msi_map, first, get_count_order(num_req));
+	mutex_unlock(&priv->msi_map_lock);
+}
+
+static void pch_msi_compose_msi_msg(struct irq_data *data,
+					struct msi_msg *msg)
+{
+	struct pch_msi_data *priv = irq_data_get_irq_chip_data(data);
+
+	msg->address_hi = upper_32_bits(priv->doorbell);
+	msg->address_lo = lower_32_bits(priv->doorbell);
+	msg->data = data->hwirq;
+}
+
+static struct msi_domain_info pch_msi_domain_info = {
+	.flags	= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
+	.chip	= &pch_msi_irq_chip,
+};
+
+static struct irq_chip middle_irq_chip = {
+	.name			= "PCH MSI",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_ack		= irq_chip_ack_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_compose_msi_msg	= pch_msi_compose_msi_msg,
+};
+
+static int pch_msi_parent_domain_alloc(struct irq_domain *domain,
+					unsigned int virq, int hwirq)
+{
+	struct irq_fwspec fwspec;
+	int ret;
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 1;
+	fwspec.param[0] = hwirq;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int pch_msi_middle_domain_alloc(struct irq_domain *domain,
+					   unsigned int virq,
+					   unsigned int nr_irqs, void *args)
+{
+	struct pch_msi_data *priv = domain->host_data;
+	int hwirq, err, i;
+
+	hwirq = pch_msi_allocate_hwirq(priv, nr_irqs);
+	if (hwirq < 0)
+		return hwirq;
+
+	for (i = 0; i < nr_irqs; i++) {
+		err = pch_msi_parent_domain_alloc(domain, virq + i, hwirq + i);
+		if (err)
+			goto err_hwirq;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &middle_irq_chip, priv);
+	}
+
+	return 0;
+
+err_hwirq:
+	pch_msi_free_hwirq(priv, hwirq, nr_irqs);
+	irq_domain_free_irqs_parent(domain, virq, i - 1);
+
+	return err;
+}
+
+static void pch_msi_middle_domain_free(struct irq_domain *domain,
+					   unsigned int virq,
+					   unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct pch_msi_data *priv = irq_data_get_irq_chip_data(d);
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+	pch_msi_free_hwirq(priv, d->hwirq, nr_irqs);
+}
+
+static const struct irq_domain_ops pch_msi_middle_domain_ops = {
+	.alloc	= pch_msi_middle_domain_alloc,
+	.free	= pch_msi_middle_domain_free,
+};
+
+static int pch_msi_init_domains(struct pch_msi_data *priv,
+				struct device_node *node,
+				struct irq_domain *parent)
+{
+	struct irq_domain *middle_domain, *msi_domain;
+
+	middle_domain = irq_domain_create_linear(of_node_to_fwnode(node),
+						priv->num_irqs,
+						&pch_msi_middle_domain_ops,
+						priv);
+	if (!middle_domain) {
+		pr_err("Failed to create the MSI middle domain\n");
+		return -ENOMEM;
+	}
+
+	middle_domain->parent = parent;
+	irq_domain_update_bus_token(middle_domain, DOMAIN_BUS_NEXUS);
+
+	msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node),
+					       &pch_msi_domain_info,
+					       middle_domain);
+	if (!msi_domain) {
+		pr_err("Failed to create PCI MSI domain\n");
+		irq_domain_remove(middle_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int pch_msi_init(struct device_node *node,
+			    struct device_node *parent)
+{
+	struct pch_msi_data *priv;
+	struct irq_domain *parent_domain;
+	struct resource res;
+	int ret;
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		pr_err("Failed to find the parent domain\n");
+		return -ENXIO;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	mutex_init(&priv->msi_map_lock);
+
+	ret = of_address_to_resource(node, 0, &res);
+	if (ret) {
+		pr_err("Failed to allocate resource\n");
+		goto err_priv;
+	}
+
+	priv->doorbell = res.start;
+
+	if (of_property_read_u32(node, "loongson,msi-base-vec",
+				&priv->irq_first)) {
+		pr_err("Unable to parse MSI vec base\n");
+		ret = -EINVAL;
+		goto err_priv;
+	}
+
+	if (of_property_read_u32(node, "loongson,msi-num-vecs",
+				&priv->num_irqs)) {
+		pr_err("Unable to parse MSI vec number\n");
+		ret = -EINVAL;
+		goto err_priv;
+	}
+
+	priv->msi_map = bitmap_alloc(priv->num_irqs, GFP_KERNEL);
+	if (!priv->msi_map) {
+		ret = -ENOMEM;
+		goto err_priv;
+	}
+
+	pr_debug("Registering %d MSIs, starting at %d\n",
+		 priv->num_irqs, priv->irq_first);
+
+	ret = pch_msi_init_domains(priv, node, parent_domain);
+	if (ret)
+		goto err_map;
+
+	return 0;
+
+err_map:
+	kfree(priv->msi_map);
+err_priv:
+	kfree(priv);
+	return ret;
+}
+
+IRQCHIP_DECLARE(pch_msi, "loongson,pch-msi-1.0", pch_msi_init);
diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c
new file mode 100644
index 000000000000..2a05b9305012
--- /dev/null
+++ b/drivers/irqchip/irq-loongson-pch-pic.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2020, Jiaxun Yang <jiaxun.yang@flygoat.com>
+ *  Loongson PCH PIC support
+ */
+
+#define pr_fmt(fmt) "pch-pic: " fmt
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+/* Registers */
+#define PCH_PIC_MASK		0x20
+#define PCH_PIC_HTMSI_EN	0x40
+#define PCH_PIC_EDGE		0x60
+#define PCH_PIC_CLR		0x80
+#define PCH_PIC_AUTO0		0xc0
+#define PCH_PIC_AUTO1		0xe0
+#define PCH_INT_ROUTE(irq)	(0x100 + irq)
+#define PCH_INT_HTVEC(irq)	(0x200 + irq)
+#define PCH_PIC_POL		0x3e0
+
+#define PIC_COUNT_PER_REG	32
+#define PIC_REG_COUNT		2
+#define PIC_COUNT		(PIC_COUNT_PER_REG * PIC_REG_COUNT)
+#define PIC_REG_IDX(irq_id)	((irq_id) / PIC_COUNT_PER_REG)
+#define PIC_REG_BIT(irq_id)	((irq_id) % PIC_COUNT_PER_REG)
+
+struct pch_pic {
+	void __iomem		*base;
+	struct irq_domain	*pic_domain;
+	u32			ht_vec_base;
+	raw_spinlock_t		pic_lock;
+};
+
+static void pch_pic_bitset(struct pch_pic *priv, int offset, int bit)
+{
+	u32 reg;
+	void __iomem *addr = priv->base + offset + PIC_REG_IDX(bit) * 4;
+
+	raw_spin_lock(&priv->pic_lock);
+	reg = readl(addr);
+	reg |= BIT(PIC_REG_BIT(bit));
+	writel(reg, addr);
+	raw_spin_unlock(&priv->pic_lock);
+}
+
+static void pch_pic_bitclr(struct pch_pic *priv, int offset, int bit)
+{
+	u32 reg;
+	void __iomem *addr = priv->base + offset + PIC_REG_IDX(bit) * 4;
+
+	raw_spin_lock(&priv->pic_lock);
+	reg = readl(addr);
+	reg &= ~BIT(PIC_REG_BIT(bit));
+	writel(reg, addr);
+	raw_spin_unlock(&priv->pic_lock);
+}
+
+static void pch_pic_eoi_irq(struct irq_data *d)
+{
+	u32 idx = PIC_REG_IDX(d->hwirq);
+	struct pch_pic *priv = irq_data_get_irq_chip_data(d);
+
+	writel(BIT(PIC_REG_BIT(d->hwirq)),
+			priv->base + PCH_PIC_CLR + idx * 4);
+}
+
+static void pch_pic_mask_irq(struct irq_data *d)
+{
+	struct pch_pic *priv = irq_data_get_irq_chip_data(d);
+
+	pch_pic_bitset(priv, PCH_PIC_MASK, d->hwirq);
+	irq_chip_mask_parent(d);
+}
+
+static void pch_pic_unmask_irq(struct irq_data *d)
+{
+	struct pch_pic *priv = irq_data_get_irq_chip_data(d);
+
+	irq_chip_unmask_parent(d);
+	pch_pic_bitclr(priv, PCH_PIC_MASK, d->hwirq);
+}
+
+static int pch_pic_set_type(struct irq_data *d, unsigned int type)
+{
+	struct pch_pic *priv = irq_data_get_irq_chip_data(d);
+	int ret = 0;
+
+	switch (type) {
+	case IRQ_TYPE_EDGE_RISING:
+		pch_pic_bitset(priv, PCH_PIC_EDGE, d->hwirq);
+		pch_pic_bitclr(priv, PCH_PIC_POL, d->hwirq);
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		pch_pic_bitset(priv, PCH_PIC_EDGE, d->hwirq);
+		pch_pic_bitset(priv, PCH_PIC_POL, d->hwirq);
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		pch_pic_bitclr(priv, PCH_PIC_EDGE, d->hwirq);
+		pch_pic_bitclr(priv, PCH_PIC_POL, d->hwirq);
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		pch_pic_bitclr(priv, PCH_PIC_EDGE, d->hwirq);
+		pch_pic_bitset(priv, PCH_PIC_POL, d->hwirq);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static struct irq_chip pch_pic_irq_chip = {
+	.name			= "PCH PIC",
+	.irq_mask		= pch_pic_mask_irq,
+	.irq_unmask		= pch_pic_unmask_irq,
+	.irq_ack		= irq_chip_ack_parent,
+	.irq_eoi		= pch_pic_eoi_irq,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_set_type		= pch_pic_set_type,
+};
+
+static int pch_pic_alloc(struct irq_domain *domain, unsigned int virq,
+			      unsigned int nr_irqs, void *arg)
+{
+	int err;
+	unsigned int type;
+	unsigned long hwirq;
+	struct irq_fwspec fwspec;
+	struct pch_pic *priv = domain->host_data;
+
+	irq_domain_translate_twocell(domain, arg, &hwirq, &type);
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 1;
+	fwspec.param[0] = hwirq + priv->ht_vec_base;
+
+	err = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (err)
+		return err;
+
+	irq_domain_set_info(domain, virq, hwirq,
+			    &pch_pic_irq_chip, priv,
+			    handle_fasteoi_ack_irq, NULL, NULL);
+	irq_set_probe(virq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops pch_pic_domain_ops = {
+	.translate	= irq_domain_translate_twocell,
+	.alloc		= pch_pic_alloc,
+	.free		= irq_domain_free_irqs_parent,
+};
+
+static void pch_pic_reset(struct pch_pic *priv)
+{
+	int i;
+
+	for (i = 0; i < PIC_COUNT; i++) {
+		/* Write vectore ID */
+		writeb(priv->ht_vec_base + i, priv->base + PCH_INT_HTVEC(i));
+		/* Hardcode route to HT0 Lo */
+		writeb(1, priv->base + PCH_INT_ROUTE(i));
+	}
+
+	for (i = 0; i < PIC_REG_COUNT; i++) {
+		/* Clear IRQ cause registers, mask all interrupts */
+		writel_relaxed(0xFFFFFFFF, priv->base + PCH_PIC_MASK + 4 * i);
+		writel_relaxed(0xFFFFFFFF, priv->base + PCH_PIC_CLR + 4 * i);
+		/* Clear auto bounce, we don't need that */
+		writel_relaxed(0, priv->base + PCH_PIC_AUTO0 + 4 * i);
+		writel_relaxed(0, priv->base + PCH_PIC_AUTO1 + 4 * i);
+		/* Enable HTMSI transformer */
+		writel_relaxed(0xFFFFFFFF, priv->base + PCH_PIC_HTMSI_EN + 4 * i);
+	}
+}
+
+static int pch_pic_of_init(struct device_node *node,
+				struct device_node *parent)
+{
+	struct pch_pic *priv;
+	struct irq_domain *parent_domain;
+	int err;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	raw_spin_lock_init(&priv->pic_lock);
+	priv->base = of_iomap(node, 0);
+	if (!priv->base) {
+		err = -ENOMEM;
+		goto free_priv;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		pr_err("Failed to find the parent domain\n");
+		err = -ENXIO;
+		goto iounmap_base;
+	}
+
+	if (of_property_read_u32(node, "loongson,pic-base-vec",
+				&priv->ht_vec_base)) {
+		pr_err("Failed to determine pic-base-vec\n");
+		err = -EINVAL;
+		goto iounmap_base;
+	}
+
+	priv->pic_domain = irq_domain_create_hierarchy(parent_domain, 0,
+						       PIC_COUNT,
+						       of_node_to_fwnode(node),
+						       &pch_pic_domain_ops,
+						       priv);
+	if (!priv->pic_domain) {
+		pr_err("Failed to create IRQ domain\n");
+		err = -ENOMEM;
+		goto iounmap_base;
+	}
+
+	pch_pic_reset(priv);
+
+	return 0;
+
+iounmap_base:
+	iounmap(priv->base);
+free_priv:
+	kfree(priv);
+
+	return err;
+}
+
+IRQCHIP_DECLARE(pch_pic, "loongson,pch-pic-1.0", pch_pic_of_init);
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index d0a71febdadc..d9c53f85a68e 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -76,6 +76,7 @@ struct plic_handler {
 	void __iomem		*enable_base;
 	struct plic_priv	*priv;
 };
+static bool plic_cpuhp_setup_done;
 static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
 
 static inline void plic_toggle(struct plic_handler *handler,
@@ -176,9 +177,12 @@ static struct irq_chip plic_chip = {
 static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq,
 			      irq_hw_number_t hwirq)
 {
+	struct plic_priv *priv = d->host_data;
+
 	irq_domain_set_info(d, irq, hwirq, &plic_chip, d->host_data,
 			    handle_fasteoi_irq, NULL, NULL);
 	irq_set_noprobe(irq);
+	irq_set_affinity(irq, &priv->lmask);
 	return 0;
 }
 
@@ -282,6 +286,7 @@ static int __init plic_init(struct device_node *node,
 	int error = 0, nr_contexts, nr_handlers = 0, i;
 	u32 nr_irqs;
 	struct plic_priv *priv;
+	struct plic_handler *handler;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -301,8 +306,6 @@ static int __init plic_init(struct device_node *node,
 	nr_contexts = of_irq_count(node);
 	if (WARN_ON(!nr_contexts))
 		goto out_iounmap;
-	if (WARN_ON(nr_contexts < num_possible_cpus()))
-		goto out_iounmap;
 
 	error = -ENOMEM;
 	priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1,
@@ -312,7 +315,6 @@ static int __init plic_init(struct device_node *node,
 
 	for (i = 0; i < nr_contexts; i++) {
 		struct of_phandle_args parent;
-		struct plic_handler *handler;
 		irq_hw_number_t hwirq;
 		int cpu, hartid;
 
@@ -366,11 +368,20 @@ done:
 		nr_handlers++;
 	}
 
-	cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
+	/*
+	 * We can have multiple PLIC instances so setup cpuhp state only
+	 * when context handler for current/boot CPU is present.
+	 */
+	handler = this_cpu_ptr(&plic_handlers);
+	if (handler->present && !plic_cpuhp_setup_done) {
+		cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
 				  "irqchip/sifive/plic:starting",
 				  plic_starting_cpu, plic_dying_cpu);
-	pr_info("mapped %d interrupts with %d handlers for %d contexts.\n",
-		nr_irqs, nr_handlers, nr_contexts);
+		plic_cpuhp_setup_done = true;
+	}
+
+	pr_info("%pOFP: mapped %d interrupts with %d handlers for"
+		" %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts);
 	set_handle_irq(plic_handle_irq);
 	return 0;
 
diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c
index 66cdc003b8f4..d288acbc99c7 100644
--- a/drivers/leds/leds-pca963x.c
+++ b/drivers/leds/leds-pca963x.c
@@ -4,7 +4,7 @@
  * Copyright 2013 Qtechnology/AS
  *
  * Author: Peter Meerwald <p.meerwald@bct-electronic.com>
- * Author: Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ * Author: Ricardo Ribalda <ribalda@kernel.org>
  *
  * Based on leds-pca955x.c
  *
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 5c1034c22197..f185f1a00008 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -21,16 +21,14 @@
 void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
 				unsigned long flags)
 {
-	struct request_queue *q = pblk->dev->q;
 	struct pblk_w_ctx w_ctx;
 	sector_t lba = pblk_get_lba(bio);
-	unsigned long start_time = jiffies;
+	unsigned long start_time;
 	unsigned int bpos, pos;
 	int nr_entries = pblk_get_secs(bio);
 	int i, ret;
 
-	generic_start_io_acct(q, REQ_OP_WRITE, bio_sectors(bio),
-			      &pblk->disk->part0);
+	start_time = bio_start_io_acct(bio);
 
 	/* Update the write buffer head (mem) with the entries that we can
 	 * write. The write in itself cannot fail, so there is no need to
@@ -79,7 +77,7 @@ retry:
 	pblk_rl_inserted(&pblk->rl, nr_entries);
 
 out:
-	generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time);
+	bio_end_io_acct(bio, start_time);
 	pblk_write_should_kick(pblk);
 
 	if (ret == NVM_IO_DONE)
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 9a967a2e83dd..6e677ff62cc9 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -145,9 +145,8 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
 	int ret = 0;
 
 	map_size = pblk_trans_map_size(pblk);
-	pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN
-					| __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM,
-					PAGE_KERNEL);
+	pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN |
+				    __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM);
 	if (!pblk->trans_map) {
 		pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n",
 				map_size);
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 8efd14e683dc..140927ebf41e 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -187,12 +187,11 @@ static void pblk_end_user_read(struct bio *bio, int error)
 static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
 			       bool put_line)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
 	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 	struct bio *int_bio = rqd->bio;
 	unsigned long start_time = r_ctx->start_time;
 
-	generic_end_io_acct(dev->q, REQ_OP_READ, &pblk->disk->part0, start_time);
+	bio_end_io_acct(int_bio, start_time);
 
 	if (rqd->error)
 		pblk_log_read_err(pblk, rqd);
@@ -263,17 +262,15 @@ retry:
 
 void pblk_submit_read(struct pblk *pblk, struct bio *bio)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct request_queue *q = dev->q;
 	sector_t blba = pblk_get_lba(bio);
 	unsigned int nr_secs = pblk_get_secs(bio);
 	bool from_cache;
 	struct pblk_g_ctx *r_ctx;
 	struct nvm_rq *rqd;
 	struct bio *int_bio, *split_bio;
+	unsigned long start_time;
 
-	generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio),
-			      &pblk->disk->part0);
+	start_time = bio_start_io_acct(bio);
 
 	rqd = pblk_alloc_rqd(pblk, PBLK_READ);
 
@@ -283,7 +280,7 @@ void pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	rqd->end_io = pblk_end_io_read;
 
 	r_ctx = nvm_rq_to_pdu(rqd);
-	r_ctx->start_time = jiffies;
+	r_ctx->start_time = start_time;
 	r_ctx->lba = blba;
 
 	if (pblk_alloc_rqd_meta(pblk, rqd)) {
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 83eb05bf85ff..8450d7c008d0 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -2184,8 +2184,6 @@ pmu_read(struct file *file, char __user *buf,
 
 	if (count < 1 || !pp)
 		return -EINVAL;
-	if (!access_ok(buf, count))
-		return -EFAULT;
 
 	spin_lock_irqsave(&pp->lock, flags);
 	add_wait_queue(&pp->wait, &wait);
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index 6dfa653d30db..bf7dd96db9b3 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -26,3 +26,12 @@ config BCACHE_CLOSURES_DEBUG
 	Keeps all active closures in a linked list and provides a debugfs
 	interface to list them, which makes it possible to see asynchronous
 	operations that get stuck.
+
+config BCACHE_ASYNC_REGISTRAION
+	bool "Asynchronous device registration (EXPERIMENTAL)"
+	depends on BCACHE
+	help
+	Add a sysfs file /sys/fs/bcache/register_async. Writing registering
+	device path into this file will returns immediately and the real
+	registration work is handled in kernel work queue in asynchronous
+	way.
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 74a9849ea164..221e0191b687 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -176,7 +176,7 @@
  * - updates to non leaf nodes just happen synchronously (see btree_split()).
  */
 
-#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
+#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
 
 #include <linux/bcache.h>
 #include <linux/bio.h>
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 4385303836d8..4995fcaefe29 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -6,7 +6,7 @@
  * Copyright 2012 Google, Inc.
  */
 
-#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
+#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
 
 #include "util.h"
 #include "bset.h"
@@ -31,7 +31,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set)
 		if (b->ops->key_dump)
 			b->ops->key_dump(b, k);
 		else
-			pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
+			pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
 
 		if (next < bset_bkey_last(i) &&
 		    bkey_cmp(k, b->ops->is_extents ?
@@ -1225,7 +1225,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
 
 	out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0;
 
-	pr_debug("sorted %i keys", out->keys);
+	pr_debug("sorted %i keys\n", out->keys);
 }
 
 static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 72856e5f23a3..39de94edd73a 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -619,7 +619,7 @@ retry:
 	 * and BTREE_NODE_journal_flush bit cleared by btree_flush_write().
 	 */
 	if (btree_node_journal_flush(b)) {
-		pr_debug("bnode %p is flushing by journal, retry", b);
+		pr_debug("bnode %p is flushing by journal, retry\n", b);
 		mutex_unlock(&b->write_lock);
 		udelay(1);
 		goto retry;
@@ -802,7 +802,7 @@ int bch_btree_cache_alloc(struct cache_set *c)
 	c->shrink.batch = c->btree_pages * 2;
 
 	if (register_shrinker(&c->shrink))
-		pr_warn("bcache: %s: could not register shrinker",
+		pr_warn("bcache: %s: could not register shrinker\n",
 				__func__);
 
 	return 0;
@@ -1054,7 +1054,7 @@ retry:
 	 */
 	if (btree_node_journal_flush(b)) {
 		mutex_unlock(&b->write_lock);
-		pr_debug("bnode %p journal_flush set, retry", b);
+		pr_debug("bnode %p journal_flush set, retry\n", b);
 		udelay(1);
 		goto retry;
 	}
@@ -1798,7 +1798,7 @@ static void bch_btree_gc(struct cache_set *c)
 			schedule_timeout_interruptible(msecs_to_jiffies
 						       (GC_SLEEP_MS));
 		else if (ret)
-			pr_warn("gc failed!");
+			pr_warn("gc failed!\n");
 	} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
 
 	bch_btree_gc_finish(c);
@@ -1907,10 +1907,8 @@ static int bch_btree_check_thread(void *arg)
 	struct btree_iter iter;
 	struct bkey *k, *p;
 	int cur_idx, prev_idx, skip_nr;
-	int i, n;
 
 	k = p = NULL;
-	i = n = 0;
 	cur_idx = prev_idx = 0;
 	ret = 0;
 
@@ -2045,7 +2043,7 @@ int bch_btree_check(struct cache_set *c)
 				    &check_state->infos[i],
 				    name);
 		if (IS_ERR(check_state->infos[i].thread)) {
-			pr_err("fails to run thread bch_btrchk[%d]", i);
+			pr_err("fails to run thread bch_btrchk[%d]\n", i);
 			for (--i; i >= 0; i--)
 				kthread_stop(check_state->infos[i].thread);
 			ret = -ENOMEM;
@@ -2456,7 +2454,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
 	if (ret) {
 		struct bkey *k;
 
-		pr_err("error %i", ret);
+		pr_err("error %i\n", ret);
 
 		while ((k = bch_keylist_pop(keys)))
 			bkey_put(c, k);
@@ -2744,7 +2742,7 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
 			break;
 
 		if (bkey_cmp(&buf->last_scanned, end) >= 0) {
-			pr_debug("scan finished");
+			pr_debug("scan finished\n");
 			break;
 		}
 
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 886710043025..9162af5bb6ec 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -130,18 +130,18 @@ static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
 	char buf[80];
 
 	bch_extent_to_text(buf, sizeof(buf), k);
-	pr_err(" %s", buf);
+	pr_cont(" %s", buf);
 
 	for (j = 0; j < KEY_PTRS(k); j++) {
 		size_t n = PTR_BUCKET_NR(b->c, k, j);
 
-		pr_err(" bucket %zu", n);
+		pr_cont(" bucket %zu", n);
 		if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
-			pr_err(" prio %i",
-			       PTR_BUCKET(b->c, k, j)->prio);
+			pr_cont(" prio %i",
+				PTR_BUCKET(b->c, k, j)->prio);
 	}
 
-	pr_err(" %s\n", bch_ptr_status(b->c, k));
+	pr_cont(" %s\n", bch_ptr_status(b->c, k));
 }
 
 /* Btree ptrs */
@@ -553,7 +553,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
 
 		if (stale && KEY_DIRTY(k)) {
 			bch_extent_to_text(buf, sizeof(buf), k);
-			pr_info("stale dirty pointer, stale %u, key: %s",
+			pr_info("stale dirty pointer, stale %u, key: %s\n",
 				stale, buf);
 		}
 
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 4d93f07f63e5..b25ee33b0d0b 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -65,14 +65,14 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
 	 * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors.
 	 */
 	if (bio->bi_opf & REQ_RAHEAD) {
-		pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore",
+		pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n",
 				    dc->backing_dev_name);
 		return;
 	}
 
 	errors = atomic_add_return(1, &dc->io_errors);
 	if (errors < dc->error_limit)
-		pr_err("%s: IO error on backing device, unrecoverable",
+		pr_err("%s: IO error on backing device, unrecoverable\n",
 			dc->backing_dev_name);
 	else
 		bch_cached_dev_error(dc);
@@ -123,12 +123,12 @@ void bch_count_io_errors(struct cache *ca,
 		errors >>= IO_ERROR_SHIFT;
 
 		if (errors < ca->set->error_limit)
-			pr_err("%s: IO error on %s%s",
+			pr_err("%s: IO error on %s%s\n",
 			       ca->cache_dev_name, m,
 			       is_read ? ", recovering." : ".");
 		else
 			bch_cache_set_error(ca->set,
-					    "%s: too many IO errors %s",
+					    "%s: too many IO errors %s\n",
 					    ca->cache_dev_name, m);
 	}
 }
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 0e3ff9745ac7..90aac4e2333f 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -47,7 +47,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
 
 	closure_init_stack(&cl);
 
-	pr_debug("reading %u", bucket_index);
+	pr_debug("reading %u\n", bucket_index);
 
 	while (offset < ca->sb.bucket_size) {
 reread:		left = ca->sb.bucket_size - offset;
@@ -78,13 +78,13 @@ reread:		left = ca->sb.bucket_size - offset;
 			size_t blocks, bytes = set_bytes(j);
 
 			if (j->magic != jset_magic(&ca->sb)) {
-				pr_debug("%u: bad magic", bucket_index);
+				pr_debug("%u: bad magic\n", bucket_index);
 				return ret;
 			}
 
 			if (bytes > left << 9 ||
 			    bytes > PAGE_SIZE << JSET_BITS) {
-				pr_info("%u: too big, %zu bytes, offset %u",
+				pr_info("%u: too big, %zu bytes, offset %u\n",
 					bucket_index, bytes, offset);
 				return ret;
 			}
@@ -93,7 +93,7 @@ reread:		left = ca->sb.bucket_size - offset;
 				goto reread;
 
 			if (j->csum != csum_set(j)) {
-				pr_info("%u: bad csum, %zu bytes, offset %u",
+				pr_info("%u: bad csum, %zu bytes, offset %u\n",
 					bucket_index, bytes, offset);
 				return ret;
 			}
@@ -190,7 +190,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
 		uint64_t seq;
 
 		bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
-		pr_debug("%u journal buckets", ca->sb.njournal_buckets);
+		pr_debug("%u journal buckets\n", ca->sb.njournal_buckets);
 
 		/*
 		 * Read journal buckets ordered by golden ratio hash to quickly
@@ -215,7 +215,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
 		 * If that fails, check all the buckets we haven't checked
 		 * already
 		 */
-		pr_debug("falling back to linear search");
+		pr_debug("falling back to linear search\n");
 
 		for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
 		     l < ca->sb.njournal_buckets;
@@ -233,7 +233,7 @@ bsearch:
 		/* Binary search */
 		m = l;
 		r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
-		pr_debug("starting binary search, l %u r %u", l, r);
+		pr_debug("starting binary search, l %u r %u\n", l, r);
 
 		while (l + 1 < r) {
 			seq = list_entry(list->prev, struct journal_replay,
@@ -253,7 +253,7 @@ bsearch:
 		 * Read buckets in reverse order until we stop finding more
 		 * journal entries
 		 */
-		pr_debug("finishing up: m %u njournal_buckets %u",
+		pr_debug("finishing up: m %u njournal_buckets %u\n",
 			 m, ca->sb.njournal_buckets);
 		l = m;
 
@@ -370,10 +370,10 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
 
 		if (n != i->j.seq) {
 			if (n == start && is_discard_enabled(s))
-				pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)",
+				pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n",
 					n, i->j.seq - 1, start, end);
 			else {
-				pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)",
+				pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n",
 					n, i->j.seq - 1, start, end);
 				ret = -EIO;
 				goto err;
@@ -403,7 +403,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
 		entries++;
 	}
 
-	pr_info("journal replay done, %i keys in %i entries, seq %llu",
+	pr_info("journal replay done, %i keys in %i entries, seq %llu\n",
 		keys, entries, end);
 err:
 	while (!list_empty(list)) {
@@ -481,7 +481,7 @@ static void btree_flush_write(struct cache_set *c)
 			break;
 
 		if (btree_node_journal_flush(b))
-			pr_err("BUG: flush_write bit should not be set here!");
+			pr_err("BUG: flush_write bit should not be set here!\n");
 
 		mutex_lock(&b->write_lock);
 
@@ -534,13 +534,13 @@ static void btree_flush_write(struct cache_set *c)
 	for (i = 0; i < nr; i++) {
 		b = btree_nodes[i];
 		if (!b) {
-			pr_err("BUG: btree_nodes[%d] is NULL", i);
+			pr_err("BUG: btree_nodes[%d] is NULL\n", i);
 			continue;
 		}
 
 		/* safe to check without holding b->write_lock */
 		if (!btree_node_journal_flush(b)) {
-			pr_err("BUG: bnode %p: journal_flush bit cleaned", b);
+			pr_err("BUG: bnode %p: journal_flush bit cleaned\n", b);
 			continue;
 		}
 
@@ -548,14 +548,14 @@ static void btree_flush_write(struct cache_set *c)
 		if (!btree_current_write(b)->journal) {
 			clear_bit(BTREE_NODE_journal_flush, &b->flags);
 			mutex_unlock(&b->write_lock);
-			pr_debug("bnode %p: written by others", b);
+			pr_debug("bnode %p: written by others\n", b);
 			continue;
 		}
 
 		if (!btree_node_dirty(b)) {
 			clear_bit(BTREE_NODE_journal_flush, &b->flags);
 			mutex_unlock(&b->write_lock);
-			pr_debug("bnode %p: dirty bit cleaned by others", b);
+			pr_debug("bnode %p: dirty bit cleaned by others\n", b);
 			continue;
 		}
 
@@ -716,7 +716,7 @@ void bch_journal_next(struct journal *j)
 	j->cur->data->keys	= 0;
 
 	if (fifo_full(&j->pin))
-		pr_debug("journal_pin full (%zu)", fifo_used(&j->pin));
+		pr_debug("journal_pin full (%zu)\n", fifo_used(&j->pin));
 }
 
 static void journal_write_endio(struct bio *bio)
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 71a90fbec314..7acf024e99f3 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -110,7 +110,7 @@ static void bch_data_invalidate(struct closure *cl)
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 	struct bio *bio = op->bio;
 
-	pr_debug("invalidating %i sectors from %llu",
+	pr_debug("invalidating %i sectors from %llu\n",
 		 bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
 
 	while (bio_sectors(bio)) {
@@ -396,7 +396,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 
 	if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
 	    bio_sectors(bio) & (c->sb.block_size - 1)) {
-		pr_debug("skipping unaligned io");
+		pr_debug("skipping unaligned io\n");
 		goto skip;
 	}
 
@@ -650,7 +650,7 @@ static void backing_request_endio(struct bio *bio)
 		 */
 		if (unlikely(s->iop.writeback &&
 			     bio->bi_opf & REQ_PREFLUSH)) {
-			pr_err("Can't flush %s: returned bi_status %i",
+			pr_err("Can't flush %s: returned bi_status %i\n",
 				dc->backing_dev_name, bio->bi_status);
 		} else {
 			/* set to orig_bio->bi_status in bio_complete() */
@@ -668,9 +668,7 @@ static void backing_request_endio(struct bio *bio)
 static void bio_complete(struct search *s)
 {
 	if (s->orig_bio) {
-		generic_end_io_acct(s->d->disk->queue, bio_op(s->orig_bio),
-				    &s->d->disk->part0, s->start_time);
-
+		bio_end_io_acct(s->orig_bio, s->start_time);
 		trace_bcache_request_end(s->d, s->orig_bio);
 		s->orig_bio->bi_status = s->iop.status;
 		bio_endio(s->orig_bio);
@@ -730,7 +728,7 @@ static inline struct search *search_alloc(struct bio *bio,
 	s->recoverable		= 1;
 	s->write		= op_is_write(bio_op(bio));
 	s->read_dirty_data	= 0;
-	s->start_time		= jiffies;
+	s->start_time		= bio_start_io_acct(bio);
 
 	s->iop.c		= d->c;
 	s->iop.bio		= NULL;
@@ -1082,8 +1080,7 @@ static void detached_dev_end_io(struct bio *bio)
 	bio->bi_end_io = ddip->bi_end_io;
 	bio->bi_private = ddip->bi_private;
 
-	generic_end_io_acct(ddip->d->disk->queue, bio_op(bio),
-			    &ddip->d->disk->part0, ddip->start_time);
+	bio_end_io_acct(bio, ddip->start_time);
 
 	if (bio->bi_status) {
 		struct cached_dev *dc = container_of(ddip->d,
@@ -1108,7 +1105,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
 	 */
 	ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
 	ddip->d = d;
-	ddip->start_time = jiffies;
+	ddip->start_time = bio_start_io_acct(bio);
 	ddip->bi_end_io = bio->bi_end_io;
 	ddip->bi_private = bio->bi_private;
 	bio->bi_end_io = detached_dev_end_io;
@@ -1190,11 +1187,6 @@ blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
 		}
 	}
 
-	generic_start_io_acct(q,
-			      bio_op(bio),
-			      bio_sectors(bio),
-			      &d->disk->part0);
-
 	bio_set_dev(bio, dc->bdev);
 	bio->bi_iter.bi_sector += dc->sb.data_offset;
 
@@ -1311,8 +1303,6 @@ blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
-	generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
-
 	s = search_alloc(bio, d);
 	cl = &s->cl;
 	bio = &s->bio.bio;
@@ -1372,7 +1362,6 @@ void bch_flash_dev_request_init(struct bcache_device *d)
 {
 	struct gendisk *g = d->disk;
 
-	g->queue->make_request_fn		= flash_dev_make_request;
 	g->queue->backing_dev_info->congested_fn = flash_dev_congested;
 	d->cache_miss				= flash_dev_cache_miss;
 	d->ioctl				= flash_dev_ioctl;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index d98354fa28e3..f9975c22bf7e 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -89,7 +89,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
 	for (i = 0; i < SB_JOURNAL_BUCKETS; i++)
 		sb->d[i] = le64_to_cpu(s->d[i]);
 
-	pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
+	pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n",
 		 sb->version, sb->flags, sb->seq, sb->keys);
 
 	err = "Not a bcache superblock (bad offset)";
@@ -234,7 +234,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
 
 	out->csum = csum_set(out);
 
-	pr_debug("ver %llu, flags %llu, seq %llu",
+	pr_debug("ver %llu, flags %llu, seq %llu\n",
 		 sb->version, sb->flags, sb->seq);
 
 	submit_bio(bio);
@@ -365,11 +365,11 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
 	}
 
 	bch_extent_to_text(buf, sizeof(buf), k);
-	pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf);
+	pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf);
 
 	for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
 		if (!bch_is_zero(u->uuid, 16))
-			pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u",
+			pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n",
 				 u - c->uuids, u->uuid, u->label,
 				 u->first_reg, u->last_reg, u->invalidated);
 
@@ -534,7 +534,7 @@ int bch_prio_write(struct cache *ca, bool wait)
 	struct bucket *b;
 	struct closure cl;
 
-	pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu",
+	pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n",
 		 fifo_used(&ca->free[RESERVE_PRIO]),
 		 fifo_used(&ca->free[RESERVE_NONE]),
 		 fifo_used(&ca->free_inc));
@@ -629,12 +629,12 @@ static int prio_read(struct cache *ca, uint64_t bucket)
 
 			if (p->csum !=
 			    bch_crc64(&p->magic, bucket_bytes(ca) - 8)) {
-				pr_warn("bad csum reading priorities");
+				pr_warn("bad csum reading priorities\n");
 				goto out;
 			}
 
 			if (p->magic != pset_magic(&ca->sb)) {
-				pr_warn("bad magic reading priorities");
+				pr_warn("bad magic reading priorities\n");
 				goto out;
 			}
 
@@ -728,11 +728,11 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
 
 	ret = sysfs_create_link(&d->kobj, &c->kobj, "cache");
 	if (ret < 0)
-		pr_err("Couldn't create device -> cache set symlink");
+		pr_err("Couldn't create device -> cache set symlink\n");
 
 	ret = sysfs_create_link(&c->kobj, &d->kobj, d->name);
 	if (ret < 0)
-		pr_err("Couldn't create cache set -> device symlink");
+		pr_err("Couldn't create cache set -> device symlink\n");
 
 	clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
 }
@@ -789,15 +789,17 @@ static void bcache_device_free(struct bcache_device *d)
 	lockdep_assert_held(&bch_register_lock);
 
 	if (disk)
-		pr_info("%s stopped", disk->disk_name);
+		pr_info("%s stopped\n", disk->disk_name);
 	else
-		pr_err("bcache device (NULL gendisk) stopped");
+		pr_err("bcache device (NULL gendisk) stopped\n");
 
 	if (d->c)
 		bcache_device_detach(d);
 
 	if (disk) {
-		if (disk->flags & GENHD_FL_UP)
+		bool disk_added = (disk->flags & GENHD_FL_UP) != 0;
+
+		if (disk_added)
 			del_gendisk(disk);
 
 		if (disk->queue)
@@ -805,7 +807,8 @@ static void bcache_device_free(struct bcache_device *d)
 
 		ida_simple_remove(&bcache_device_idx,
 				  first_minor_to_idx(disk->first_minor));
-		put_disk(disk);
+		if (disk_added)
+			put_disk(disk);
 	}
 
 	bioset_exit(&d->bio_split);
@@ -830,7 +833,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
 	d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
 
 	if (!d->nr_stripes || d->nr_stripes > max_stripes) {
-		pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
+		pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)\n",
 			(unsigned int)d->nr_stripes);
 		return -ENOMEM;
 	}
@@ -928,11 +931,11 @@ static int cached_dev_status_update(void *arg)
 			dc->offline_seconds = 0;
 
 		if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
-			pr_err("%s: device offline for %d seconds",
+			pr_err("%s: device offline for %d seconds\n",
 			       dc->backing_dev_name,
 			       BACKING_DEV_OFFLINE_TIMEOUT);
-			pr_err("%s: disable I/O request due to backing "
-			       "device offline", dc->disk.name);
+			pr_err("%s: disable I/O request due to backing device offline\n",
+			       dc->disk.name);
 			dc->io_disable = true;
 			/* let others know earlier that io_disable is true */
 			smp_mb();
@@ -959,7 +962,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
 	};
 
 	if (dc->io_disable) {
-		pr_err("I/O disabled on cached dev %s",
+		pr_err("I/O disabled on cached dev %s\n",
 		       dc->backing_dev_name);
 		kfree(env[1]);
 		kfree(env[2]);
@@ -971,7 +974,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
 		kfree(env[1]);
 		kfree(env[2]);
 		kfree(buf);
-		pr_info("cached dev %s is running already",
+		pr_info("cached dev %s is running already\n",
 		       dc->backing_dev_name);
 		return -EBUSY;
 	}
@@ -1001,16 +1004,14 @@ int bch_cached_dev_run(struct cached_dev *dc)
 	if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
 	    sysfs_create_link(&disk_to_dev(d->disk)->kobj,
 			      &d->kobj, "bcache")) {
-		pr_err("Couldn't create bcache dev <-> disk sysfs symlinks");
+		pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n");
 		return -ENOMEM;
 	}
 
 	dc->status_update_thread = kthread_run(cached_dev_status_update,
 					       dc, "bcache_status_update");
 	if (IS_ERR(dc->status_update_thread)) {
-		pr_warn("failed to create bcache_status_update kthread, "
-			"continue to run without monitoring backing "
-			"device status");
+		pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n");
 	}
 
 	return 0;
@@ -1036,7 +1037,7 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
 	} while (time_out > 0);
 
 	if (time_out == 0)
-		pr_warn("give up waiting for dc->writeback_write_update to quit");
+		pr_warn("give up waiting for dc->writeback_write_update to quit\n");
 
 	cancel_delayed_work_sync(&dc->writeback_rate_update);
 }
@@ -1077,7 +1078,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
 
 	mutex_unlock(&bch_register_lock);
 
-	pr_info("Caching disabled for %s", dc->backing_dev_name);
+	pr_info("Caching disabled for %s\n", dc->backing_dev_name);
 
 	/* Drop ref we took in cached_dev_detach() */
 	closure_put(&dc->disk.cl);
@@ -1117,20 +1118,20 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 		return -ENOENT;
 
 	if (dc->disk.c) {
-		pr_err("Can't attach %s: already attached",
+		pr_err("Can't attach %s: already attached\n",
 		       dc->backing_dev_name);
 		return -EINVAL;
 	}
 
 	if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
-		pr_err("Can't attach %s: shutting down",
+		pr_err("Can't attach %s: shutting down\n",
 		       dc->backing_dev_name);
 		return -EINVAL;
 	}
 
 	if (dc->sb.block_size < c->sb.block_size) {
 		/* Will die */
-		pr_err("Couldn't attach %s: block size less than set's block size",
+		pr_err("Couldn't attach %s: block size less than set's block size\n",
 		       dc->backing_dev_name);
 		return -EINVAL;
 	}
@@ -1138,7 +1139,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 	/* Check whether already attached */
 	list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
 		if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
-			pr_err("Tried to attach %s but duplicate UUID already attached",
+			pr_err("Tried to attach %s but duplicate UUID already attached\n",
 				dc->backing_dev_name);
 
 			return -EINVAL;
@@ -1157,14 +1158,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 
 	if (!u) {
 		if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
-			pr_err("Couldn't find uuid for %s in set",
+			pr_err("Couldn't find uuid for %s in set\n",
 			       dc->backing_dev_name);
 			return -ENOENT;
 		}
 
 		u = uuid_find_empty(c);
 		if (!u) {
-			pr_err("Not caching %s, no room for UUID",
+			pr_err("Not caching %s, no room for UUID\n",
 			       dc->backing_dev_name);
 			return -EINVAL;
 		}
@@ -1210,7 +1211,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 	down_write(&dc->writeback_lock);
 	if (bch_cached_dev_writeback_start(dc)) {
 		up_write(&dc->writeback_lock);
-		pr_err("Couldn't start writeback facilities for %s",
+		pr_err("Couldn't start writeback facilities for %s\n",
 		       dc->disk.disk->disk_name);
 		return -ENOMEM;
 	}
@@ -1233,7 +1234,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 		 */
 		kthread_stop(dc->writeback_thread);
 		cancel_writeback_rate_update_dwork(dc);
-		pr_err("Couldn't run cached device %s",
+		pr_err("Couldn't run cached device %s\n",
 		       dc->backing_dev_name);
 		return ret;
 	}
@@ -1244,7 +1245,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 	/* Allow the writeback thread to proceed */
 	up_write(&dc->writeback_lock);
 
-	pr_info("Caching %s as %s on set %pU",
+	pr_info("Caching %s as %s on set %pU\n",
 		dc->backing_dev_name,
 		dc->disk.disk->disk_name,
 		dc->disk.c->sb.set_uuid);
@@ -1384,7 +1385,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
 	if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
 		goto err;
 
-	pr_info("registered backing device %s", dc->backing_dev_name);
+	pr_info("registered backing device %s\n", dc->backing_dev_name);
 
 	list_add(&dc->list, &uncached_devices);
 	/* attach to a matched cache set if it exists */
@@ -1401,7 +1402,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
 
 	return 0;
 err:
-	pr_notice("error %s: %s", dc->backing_dev_name, err);
+	pr_notice("error %s: %s\n", dc->backing_dev_name, err);
 	bcache_device_stop(&dc->disk);
 	return ret;
 }
@@ -1497,7 +1498,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
 
 	u = uuid_find_empty(c);
 	if (!u) {
-		pr_err("Can't create volume, no room for UUID");
+		pr_err("Can't create volume, no room for UUID\n");
 		return -EINVAL;
 	}
 
@@ -1523,7 +1524,7 @@ bool bch_cached_dev_error(struct cached_dev *dc)
 	smp_mb();
 
 	pr_err("stop %s: too many IO errors on backing device %s\n",
-		dc->disk.disk->disk_name, dc->backing_dev_name);
+	       dc->disk.disk->disk_name, dc->backing_dev_name);
 
 	bcache_device_stop(&dc->disk);
 	return true;
@@ -1534,6 +1535,7 @@ bool bch_cached_dev_error(struct cached_dev *dc)
 __printf(2, 3)
 bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
 {
+	struct va_format vaf;
 	va_list args;
 
 	if (c->on_error != ON_ERROR_PANIC &&
@@ -1541,20 +1543,22 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
 		return false;
 
 	if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
-		pr_info("CACHE_SET_IO_DISABLE already set");
+		pr_info("CACHE_SET_IO_DISABLE already set\n");
 
 	/*
 	 * XXX: we can be called from atomic context
 	 * acquire_console_sem();
 	 */
 
-	pr_err("bcache: error on %pU: ", c->sb.set_uuid);
-
 	va_start(args, fmt);
-	vprintk(fmt, args);
-	va_end(args);
 
-	pr_err(", disabling caching\n");
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_err("error on %pU: %pV, disabling caching\n",
+	       c->sb.set_uuid, &vaf);
+
+	va_end(args);
 
 	if (c->on_error == ON_ERROR_PANIC)
 		panic("panic forced after error\n");
@@ -1606,7 +1610,7 @@ static void cache_set_free(struct closure *cl)
 	list_del(&c->list);
 	mutex_unlock(&bch_register_lock);
 
-	pr_info("Cache set %pU unregistered", c->sb.set_uuid);
+	pr_info("Cache set %pU unregistered\n", c->sb.set_uuid);
 	wake_up(&unregister_wait);
 
 	closure_debug_destroy(&c->cl);
@@ -1677,7 +1681,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
 					   struct cached_dev *dc)
 {
 	if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
-		pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
+		pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n",
 			d->disk->disk_name, c->sb.set_uuid);
 		bcache_device_stop(d);
 	} else if (atomic_read(&dc->has_dirty)) {
@@ -1685,7 +1689,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
 		 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
 		 * and dc->has_dirty == 1
 		 */
-		pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
+		pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n",
 			d->disk->disk_name);
 		/*
 		 * There might be a small time gap that cache set is
@@ -1707,7 +1711,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
 		 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
 		 * and dc->has_dirty == 0
 		 */
-		pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
+		pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n",
 			d->disk->disk_name);
 	}
 }
@@ -1874,7 +1878,7 @@ static int run_cache_set(struct cache_set *c)
 		if (bch_journal_read(c, &journal))
 			goto err;
 
-		pr_debug("btree_journal_read() done");
+		pr_debug("btree_journal_read() done\n");
 
 		err = "no journal entries found";
 		if (list_empty(&journal))
@@ -1920,7 +1924,7 @@ static int run_cache_set(struct cache_set *c)
 
 		bch_journal_mark(c, &journal);
 		bch_initial_gc_finish(c);
-		pr_debug("btree_check() done");
+		pr_debug("btree_check() done\n");
 
 		/*
 		 * bcache_journal_next() can't happen sooner, or
@@ -1951,7 +1955,7 @@ static int run_cache_set(struct cache_set *c)
 		if (bch_journal_replay(c, &journal))
 			goto err;
 	} else {
-		pr_notice("invalidating existing data");
+		pr_notice("invalidating existing data\n");
 
 		for_each_cache(ca, c, i) {
 			unsigned int j;
@@ -2085,7 +2089,7 @@ found:
 		memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16);
 		c->sb.flags             = ca->sb.flags;
 		c->sb.seq		= ca->sb.seq;
-		pr_debug("set version = %llu", c->sb.version);
+		pr_debug("set version = %llu\n", c->sb.version);
 	}
 
 	kobject_get(&ca->kobj);
@@ -2247,7 +2251,7 @@ err_btree_alloc:
 err_free:
 	module_put(THIS_MODULE);
 	if (err)
-		pr_notice("error %s: %s", ca->cache_dev_name, err);
+		pr_notice("error %s: %s\n", ca->cache_dev_name, err);
 	return ret;
 }
 
@@ -2301,14 +2305,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
 		goto out;
 	}
 
-	pr_info("registered cache device %s", ca->cache_dev_name);
+	pr_info("registered cache device %s\n", ca->cache_dev_name);
 
 out:
 	kobject_put(&ca->kobj);
 
 err:
 	if (err)
-		pr_notice("error %s: %s", ca->cache_dev_name, err);
+		pr_notice("error %s: %s\n", ca->cache_dev_name, err);
 
 	return ret;
 }
@@ -2323,6 +2327,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
 
 kobj_attribute_write(register,		register_bcache);
 kobj_attribute_write(register_quiet,	register_bcache);
+kobj_attribute_write(register_async,	register_bcache);
 kobj_attribute_write(pendings_cleanup,	bch_pending_bdevs_cleanup);
 
 static bool bch_is_open_backing(struct block_device *bdev)
@@ -2358,6 +2363,83 @@ static bool bch_is_open(struct block_device *bdev)
 	return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
 }
 
+struct async_reg_args {
+	struct work_struct reg_work;
+	char *path;
+	struct cache_sb *sb;
+	struct cache_sb_disk *sb_disk;
+	struct block_device *bdev;
+};
+
+static void register_bdev_worker(struct work_struct *work)
+{
+	int fail = false;
+	struct async_reg_args *args =
+		container_of(work, struct async_reg_args, reg_work);
+	struct cached_dev *dc;
+
+	dc = kzalloc(sizeof(*dc), GFP_KERNEL);
+	if (!dc) {
+		fail = true;
+		put_page(virt_to_page(args->sb_disk));
+		blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		goto out;
+	}
+
+	mutex_lock(&bch_register_lock);
+	if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0)
+		fail = true;
+	mutex_unlock(&bch_register_lock);
+
+out:
+	if (fail)
+		pr_info("error %s: fail to register backing device\n",
+			args->path);
+	kfree(args->sb);
+	kfree(args->path);
+	kfree(args);
+	module_put(THIS_MODULE);
+}
+
+static void register_cache_worker(struct work_struct *work)
+{
+	int fail = false;
+	struct async_reg_args *args =
+		container_of(work, struct async_reg_args, reg_work);
+	struct cache *ca;
+
+	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+	if (!ca) {
+		fail = true;
+		put_page(virt_to_page(args->sb_disk));
+		blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		goto out;
+	}
+
+	/* blkdev_put() will be called in bch_cache_release() */
+	if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0)
+		fail = true;
+
+out:
+	if (fail)
+		pr_info("error %s: fail to register cache device\n",
+			args->path);
+	kfree(args->sb);
+	kfree(args->path);
+	kfree(args);
+	module_put(THIS_MODULE);
+}
+
+static void register_device_aync(struct async_reg_args *args)
+{
+	if (SB_IS_BDEV(args->sb))
+		INIT_WORK(&args->reg_work, register_bdev_worker);
+	else
+		INIT_WORK(&args->reg_work, register_cache_worker);
+
+	queue_work(system_wq, &args->reg_work);
+}
+
 static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 			       const char *buffer, size_t size)
 {
@@ -2420,6 +2502,26 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 		goto out_blkdev_put;
 
 	err = "failed to register device";
+	if (attr == &ksysfs_register_async) {
+		/* register in asynchronous way */
+		struct async_reg_args *args =
+			kzalloc(sizeof(struct async_reg_args), GFP_KERNEL);
+
+		if (!args) {
+			ret = -ENOMEM;
+			err = "cannot allocate memory";
+			goto out_put_sb_page;
+		}
+
+		args->path	= path;
+		args->sb	= sb;
+		args->sb_disk	= sb_disk;
+		args->bdev	= bdev;
+		register_device_aync(args);
+		/* No wait and returns to user space */
+		goto async_done;
+	}
+
 	if (SB_IS_BDEV(sb)) {
 		struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
 
@@ -2447,6 +2549,7 @@ done:
 	kfree(sb);
 	kfree(path);
 	module_put(THIS_MODULE);
+async_done:
 	return size;
 
 out_put_sb_page:
@@ -2461,7 +2564,7 @@ out_free_path:
 out_module_put:
 	module_put(THIS_MODULE);
 out:
-	pr_info("error %s: %s", path?path:"", err);
+	pr_info("error %s: %s\n", path?path:"", err);
 	return ret;
 }
 
@@ -2506,7 +2609,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
 	mutex_unlock(&bch_register_lock);
 
 	list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
-		pr_info("delete pdev %p", pdev);
+		pr_info("delete pdev %p\n", pdev);
 		list_del(&pdev->list);
 		bcache_device_stop(&pdev->dc->disk);
 		kfree(pdev);
@@ -2549,7 +2652,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
 
 		mutex_unlock(&bch_register_lock);
 
-		pr_info("Stopping all devices:");
+		pr_info("Stopping all devices:\n");
 
 		/*
 		 * The reason bch_register_lock is not held to call
@@ -2599,9 +2702,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
 		finish_wait(&unregister_wait, &wait);
 
 		if (stopped)
-			pr_info("All devices stopped");
+			pr_info("All devices stopped\n");
 		else
-			pr_notice("Timeout waiting for devices to be closed");
+			pr_notice("Timeout waiting for devices to be closed\n");
 out:
 		mutex_unlock(&bch_register_lock);
 	}
@@ -2637,7 +2740,7 @@ static void check_module_parameters(void)
 	if (bch_cutoff_writeback_sync == 0)
 		bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC;
 	else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) {
-		pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u",
+		pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n",
 			bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX);
 		bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX;
 	}
@@ -2645,13 +2748,13 @@ static void check_module_parameters(void)
 	if (bch_cutoff_writeback == 0)
 		bch_cutoff_writeback = CUTOFF_WRITEBACK;
 	else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) {
-		pr_warn("set bch_cutoff_writeback (%u) to max value %u",
+		pr_warn("set bch_cutoff_writeback (%u) to max value %u\n",
 			bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX);
 		bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX;
 	}
 
 	if (bch_cutoff_writeback > bch_cutoff_writeback_sync) {
-		pr_warn("set bch_cutoff_writeback (%u) to %u",
+		pr_warn("set bch_cutoff_writeback (%u) to %u\n",
 			bch_cutoff_writeback, bch_cutoff_writeback_sync);
 		bch_cutoff_writeback = bch_cutoff_writeback_sync;
 	}
@@ -2662,6 +2765,9 @@ static int __init bcache_init(void)
 	static const struct attribute *files[] = {
 		&ksysfs_register.attr,
 		&ksysfs_register_quiet.attr,
+#ifdef CONFIG_BCACHE_ASYNC_REGISTRAION
+		&ksysfs_register_async.attr,
+#endif
 		&ksysfs_pendings_cleanup.attr,
 		NULL
 	};
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 323276994aab..0dadec5a78f6 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -421,7 +421,7 @@ STORE(__cached_dev)
 				return size;
 		}
 		if (v == -ENOENT)
-			pr_err("Can't attach %s: cache set not found", buf);
+			pr_err("Can't attach %s: cache set not found\n", buf);
 		return v;
 	}
 
@@ -455,7 +455,7 @@ STORE(bch_cached_dev)
 			 */
 			if (dc->writeback_running) {
 				dc->writeback_running = false;
-				pr_err("%s: failed to run non-existent writeback thread",
+				pr_err("%s: failed to run non-existent writeback thread\n",
 						dc->disk.disk->disk_name);
 			}
 		} else
@@ -872,11 +872,11 @@ STORE(__bch_cache_set)
 		if (v) {
 			if (test_and_set_bit(CACHE_SET_IO_DISABLE,
 					     &c->flags))
-				pr_warn("CACHE_SET_IO_DISABLE already set");
+				pr_warn("CACHE_SET_IO_DISABLE already set\n");
 		} else {
 			if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
 						&c->flags))
-				pr_warn("CACHE_SET_IO_DISABLE already cleared");
+				pr_warn("CACHE_SET_IO_DISABLE already cleared\n");
 		}
 	}
 
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3f7641fb28d5..1cf1e5016cb9 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -809,7 +809,7 @@ static int bch_root_node_dirty_init(struct cache_set *c,
 			schedule_timeout_interruptible(
 				msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
 		else if (ret < 0) {
-			pr_warn("sectors dirty init failed, ret=%d!", ret);
+			pr_warn("sectors dirty init failed, ret=%d!\n", ret);
 			break;
 		}
 	} while (ret == -EAGAIN);
@@ -917,7 +917,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
 
 	state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
 	if (!state) {
-		pr_warn("sectors dirty init failed: cannot allocate memory");
+		pr_warn("sectors dirty init failed: cannot allocate memory\n");
 		return;
 	}
 
@@ -945,7 +945,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
 				    &state->infos[i],
 				    name);
 		if (IS_ERR(state->infos[i].thread)) {
-			pr_err("fails to run thread bch_dirty_init[%d]", i);
+			pr_err("fails to run thread bch_dirty_init[%d]\n", i);
 			for (--i; i >= 0; i--)
 				kthread_stop(state->infos[i].thread);
 			goto out;
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 2d519c223562..d1786cfd7f22 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -400,13 +400,13 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 	 */
 	if (gfp_mask & __GFP_NORETRY) {
 		unsigned noio_flag = memalloc_noio_save();
-		void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
+		void *ptr = __vmalloc(c->block_size, gfp_mask);
 
 		memalloc_noio_restore(noio_flag);
 		return ptr;
 	}
 
-	return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
+	return __vmalloc(c->block_size, gfp_mask);
 }
 
 /*
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 4094c47eca7f..84cb04904fab 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2657,7 +2657,7 @@ static void bitmap_flush_work(struct work_struct *work)
 
 	dm_integrity_flush_buffers(ic);
 	if (ic->meta_dev)
-		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO, NULL);
+		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
 
 	limit = ic->provided_data_sectors;
 	if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 3f8577e2c13b..f60c02512121 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -547,7 +547,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
 	md->tag_set->ops = &dm_mq_ops;
 	md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
 	md->tag_set->numa_node = md->numa_node_id;
-	md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
+	md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
 	md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
 	md->tag_set->driver_data = md;
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 0a2cc197f62b..8277b959e00b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -279,7 +279,6 @@ static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
 static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
 				  sector_t start, sector_t len, void *data)
 {
-	struct request_queue *q;
 	struct queue_limits *limits = data;
 	struct block_device *bdev = dev->bdev;
 	sector_t dev_size =
@@ -288,22 +287,6 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
 		limits->logical_block_size >> SECTOR_SHIFT;
 	char b[BDEVNAME_SIZE];
 
-	/*
-	 * Some devices exist without request functions,
-	 * such as loop devices not yet bound to backing files.
-	 * Forbid the use of such devices.
-	 */
-	q = bdev_get_queue(bdev);
-	if (!q || !q->make_request_fn) {
-		DMWARN("%s: %s is not yet initialised: "
-		       "start=%llu, len=%llu, dev_size=%llu",
-		       dm_device_name(ti->table->md), bdevname(bdev, b),
-		       (unsigned long long)start,
-		       (unsigned long long)len,
-		       (unsigned long long)dev_size);
-		return 1;
-	}
-
 	if (!dev_size)
 		return 0;
 
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 369de15c4e80..bf2245370305 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -661,7 +661,7 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set)
 
 	ret = dmz_rdwr_block(zmd, REQ_OP_WRITE, block, mblk->page);
 	if (ret == 0)
-		ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
+		ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO);
 
 	return ret;
 }
@@ -703,7 +703,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
 
 	/* Flush drive cache (this will also sync data) */
 	if (ret == 0)
-		ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
+		ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO);
 
 	return ret;
 }
@@ -772,7 +772,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
 
 	/* If there are no dirty metadata blocks, just flush the device cache */
 	if (list_empty(&write_list)) {
-		ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
+		ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO);
 		goto err;
 	}
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index db9e46114653..3f39fa1ac756 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -26,6 +26,7 @@
 #include <linux/pr.h>
 #include <linux/refcount.h>
 #include <linux/part_stat.h>
+#include <linux/blk-crypto.h>
 
 #define DM_MSG_PREFIX "core"
 
@@ -680,11 +681,7 @@ static void start_io_acct(struct dm_io *io)
 	struct mapped_device *md = io->md;
 	struct bio *bio = io->orig_bio;
 
-	io->start_time = jiffies;
-
-	generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio),
-			      &dm_disk(md)->part0);
-
+	io->start_time = bio_start_io_acct(bio);
 	if (unlikely(dm_stats_used(&md->stats)))
 		dm_stats_account_io(&md->stats, bio_data_dir(bio),
 				    bio->bi_iter.bi_sector, bio_sectors(bio),
@@ -697,8 +694,7 @@ static void end_io_acct(struct dm_io *io)
 	struct bio *bio = io->orig_bio;
 	unsigned long duration = jiffies - io->start_time;
 
-	generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
-			    io->start_time);
+	bio_end_io_acct(bio, io->start_time);
 
 	if (unlikely(dm_stats_used(&md->stats)))
 		dm_stats_account_io(&md->stats, bio_data_dir(bio),
@@ -1334,6 +1330,8 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
 
 	__bio_clone_fast(clone, bio);
 
+	bio_crypt_clone(clone, bio, GFP_NOIO);
+
 	if (bio_integrity(bio)) {
 		int r;
 
@@ -1788,6 +1786,18 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
 	int srcu_idx;
 	struct dm_table *map;
 
+	if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) {
+		/*
+		 * We are called with a live reference on q_usage_counter, but
+		 * that one will be released as soon as we return.  Grab an
+		 * extra one as blk_mq_make_request expects to be able to
+		 * consume a reference (which lives until the request is freed
+		 * in case a request is allocated).
+		 */
+		percpu_ref_get(&q->q_usage_counter);
+		return blk_mq_make_request(q, bio);
+	}
+
 	map = dm_get_live_table(md, &srcu_idx);
 
 	/* if we're suspended, we have to queue this io for later */
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index b952bd45bd6a..95a5f3757fa3 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -324,14 +324,6 @@ static void end_bitmap_write(struct buffer_head *bh, int uptodate)
 		wake_up(&bitmap->write_wait);
 }
 
-/* copied from buffer.c */
-static void
-__clear_page_buffers(struct page *page)
-{
-	ClearPagePrivate(page);
-	set_page_private(page, 0);
-	put_page(page);
-}
 static void free_buffers(struct page *page)
 {
 	struct buffer_head *bh;
@@ -345,7 +337,7 @@ static void free_buffers(struct page *page)
 		free_buffer_head(bh);
 		bh = next;
 	}
-	__clear_page_buffers(page);
+	detach_page_private(page);
 	put_page(page);
 }
 
@@ -374,7 +366,7 @@ static int read_page(struct file *file, unsigned long index,
 		ret = -ENOMEM;
 		goto out;
 	}
-	attach_page_buffers(page, bh);
+	attach_page_private(page, bh);
 	blk_cur = index << (PAGE_SHIFT - inode->i_blkbits);
 	while (bh) {
 		block = blk_cur;
diff --git a/drivers/md/md-linear.h b/drivers/md/md-linear.h
index 8381d651d4ed..24e97db50ebb 100644
--- a/drivers/md/md-linear.h
+++ b/drivers/md/md-linear.h
@@ -12,6 +12,6 @@ struct linear_conf
 	struct rcu_head		rcu;
 	sector_t		array_sectors;
 	int			raid_disks; /* a copy of mddev->raid_disks */
-	struct dev_info		disks[0];
+	struct dev_info		disks[];
 };
 #endif
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 271e8a587354..f567f536b529 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -89,6 +89,7 @@ static struct module *md_cluster_mod;
 static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
 static struct workqueue_struct *md_wq;
 static struct workqueue_struct *md_misc_wq;
+static struct workqueue_struct *md_rdev_misc_wq;
 
 static int remove_and_add_spares(struct mddev *mddev,
 				 struct md_rdev *this);
@@ -227,13 +228,13 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
 		goto abort;
 
 	if (mddev->serial_info_pool == NULL) {
-		unsigned int noio_flag;
-
-		noio_flag = memalloc_noio_save();
+		/*
+		 * already in memalloc noio context by
+		 * mddev_suspend()
+		 */
 		mddev->serial_info_pool =
 			mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
 						sizeof(struct serial_info));
-		memalloc_noio_restore(noio_flag);
 		if (!mddev->serial_info_pool) {
 			rdevs_uninit_serial(mddev);
 			pr_err("can't alloc memory pool for serialization\n");
@@ -466,7 +467,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
 	const int sgrp = op_stat_group(bio_op(bio));
-	struct mddev *mddev = q->queuedata;
+	struct mddev *mddev = bio->bi_disk->private_data;
 	unsigned int sectors;
 
 	if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
@@ -527,11 +528,15 @@ void mddev_suspend(struct mddev *mddev)
 	wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
 
 	del_timer_sync(&mddev->safemode_timer);
+	/* restrict memory reclaim I/O during raid array is suspend */
+	mddev->noio_flag = memalloc_noio_save();
 }
 EXPORT_SYMBOL_GPL(mddev_suspend);
 
 void mddev_resume(struct mddev *mddev)
 {
+	/* entred the memalloc scope from mddev_suspend() */
+	memalloc_noio_restore(mddev->noio_flag);
 	lockdep_assert_held(&mddev->reconfig_mutex);
 	if (--mddev->suspended)
 		return;
@@ -2454,7 +2459,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
 	return err;
 }
 
-static void md_delayed_delete(struct work_struct *ws)
+static void rdev_delayed_delete(struct work_struct *ws)
 {
 	struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
 	kobject_del(&rdev->kobj);
@@ -2479,9 +2484,9 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
 	 * to delay it due to rcu usage.
 	 */
 	synchronize_rcu();
-	INIT_WORK(&rdev->del_work, md_delayed_delete);
+	INIT_WORK(&rdev->del_work, rdev_delayed_delete);
 	kobject_get(&rdev->kobj);
-	queue_work(md_misc_wq, &rdev->del_work);
+	queue_work(md_rdev_misc_wq, &rdev->del_work);
 }
 
 /*
@@ -3191,8 +3196,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
 			rdev->saved_raid_disk = -1;
 		clear_bit(In_sync, &rdev->flags);
 		clear_bit(Bitmap_sync, &rdev->flags);
-		err = rdev->mddev->pers->
-			hot_add_disk(rdev->mddev, rdev);
+		err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
 		if (err) {
 			rdev->raid_disk = -1;
 			return err;
@@ -4514,6 +4518,20 @@ null_show(struct mddev *mddev, char *page)
 	return -EINVAL;
 }
 
+/* need to ensure rdev_delayed_delete() has completed */
+static void flush_rdev_wq(struct mddev *mddev)
+{
+	struct md_rdev *rdev;
+
+	rcu_read_lock();
+	rdev_for_each_rcu(rdev, mddev)
+		if (work_pending(&rdev->del_work)) {
+			flush_workqueue(md_rdev_misc_wq);
+			break;
+		}
+	rcu_read_unlock();
+}
+
 static ssize_t
 new_dev_store(struct mddev *mddev, const char *buf, size_t len)
 {
@@ -4541,8 +4559,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
 	    minor != MINOR(dev))
 		return -EOVERFLOW;
 
-	flush_workqueue(md_misc_wq);
-
+	flush_rdev_wq(mddev);
 	err = mddev_lock(mddev);
 	if (err)
 		return err;
@@ -4780,7 +4797,8 @@ action_store(struct mddev *mddev, const char *page, size_t len)
 			clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 		if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
 		    mddev_lock(mddev) == 0) {
-			flush_workqueue(md_misc_wq);
+			if (work_pending(&mddev->del_work))
+				flush_workqueue(md_misc_wq);
 			if (mddev->sync_thread) {
 				set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 				md_reap_sync_thread(mddev);
@@ -5626,7 +5644,6 @@ static int md_alloc(dev_t dev, char *name)
 	mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
 	if (!mddev->queue)
 		goto abort;
-	mddev->queue->queuedata = mddev;
 
 	blk_set_stacking_limits(&mddev->queue->limits);
 
@@ -6147,7 +6164,8 @@ static void md_clean(struct mddev *mddev)
 static void __md_stop_writes(struct mddev *mddev)
 {
 	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	flush_workqueue(md_misc_wq);
+	if (work_pending(&mddev->del_work))
+		flush_workqueue(md_misc_wq);
 	if (mddev->sync_thread) {
 		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 		md_reap_sync_thread(mddev);
@@ -6200,7 +6218,8 @@ static void __md_stop(struct mddev *mddev)
 	md_bitmap_destroy(mddev);
 	mddev_detach(mddev);
 	/* Ensure ->event_work is done */
-	flush_workqueue(md_misc_wq);
+	if (mddev->event_work.func)
+		flush_workqueue(md_misc_wq);
 	spin_lock(&mddev->lock);
 	mddev->pers = NULL;
 	spin_unlock(&mddev->lock);
@@ -7495,9 +7514,8 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
 
 	}
 
-	if (cmd == ADD_NEW_DISK)
-		/* need to ensure md_delayed_delete() has completed */
-		flush_workqueue(md_misc_wq);
+	if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
+		flush_rdev_wq(mddev);
 
 	if (cmd == HOT_REMOVE_DISK)
 		/* need to ensure recovery thread has run */
@@ -7752,7 +7770,8 @@ static int md_open(struct block_device *bdev, fmode_t mode)
 		 */
 		mddev_put(mddev);
 		/* Wait until bdev->bd_disk is definitely gone */
-		flush_workqueue(md_misc_wq);
+		if (work_pending(&mddev->del_work))
+			flush_workqueue(md_misc_wq);
 		/* Then retry the open from the top */
 		return -ERESTARTSYS;
 	}
@@ -9040,8 +9059,7 @@ static int remove_and_add_spares(struct mddev *mddev,
 
 			rdev->recovery_offset = 0;
 		}
-		if (mddev->pers->
-		    hot_add_disk(mddev, rdev) == 0) {
+		if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
 			if (sysfs_link_rdev(mddev, rdev))
 				/* failure here is OK */;
 			if (!test_bit(Journal, &rdev->flags))
@@ -9469,6 +9487,10 @@ static int __init md_init(void)
 	if (!md_misc_wq)
 		goto err_misc_wq;
 
+	md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
+	if (!md_misc_wq)
+		goto err_rdev_misc_wq;
+
 	if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
 		goto err_md;
 
@@ -9490,6 +9512,8 @@ static int __init md_init(void)
 err_mdp:
 	unregister_blkdev(MD_MAJOR, "md");
 err_md:
+	destroy_workqueue(md_rdev_misc_wq);
+err_rdev_misc_wq:
 	destroy_workqueue(md_misc_wq);
 err_misc_wq:
 	destroy_workqueue(md_wq);
@@ -9776,6 +9800,7 @@ static __exit void md_exit(void)
 		 * destroy_workqueue() below will wait for that to complete.
 		 */
 	}
+	destroy_workqueue(md_rdev_misc_wq);
 	destroy_workqueue(md_misc_wq);
 	destroy_workqueue(md_wq);
 }
@@ -9785,7 +9810,7 @@ module_exit(md_exit)
 
 static int get_ro(char *buffer, const struct kernel_param *kp)
 {
-	return sprintf(buffer, "%d", start_readonly);
+	return sprintf(buffer, "%d\n", start_readonly);
 }
 static int set_ro(const char *val, const struct kernel_param *kp)
 {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index acd681939112..612814d07d35 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -497,6 +497,7 @@ struct mddev {
 	void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
 	struct md_cluster_info		*cluster_info;
 	unsigned int			good_device_nr;	/* good device num within cluster raid */
+	unsigned int			noio_flag; /* for memalloc scope API */
 
 	bool	has_superblocks:1;
 	bool	fail_last_dev:1;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index cd810e195086..dcd27f3da84e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -296,22 +296,17 @@ static void reschedule_retry(struct r1bio *r1_bio)
 static void call_bio_endio(struct r1bio *r1_bio)
 {
 	struct bio *bio = r1_bio->master_bio;
-	struct r1conf *conf = r1_bio->mddev->private;
 
 	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
 		bio->bi_status = BLK_STS_IOERR;
 
 	bio_endio(bio);
-	/*
-	 * Wake up any possible resync thread that waits for the device
-	 * to go idle.
-	 */
-	allow_barrier(conf, r1_bio->sector);
 }
 
 static void raid_end_bio_io(struct r1bio *r1_bio)
 {
 	struct bio *bio = r1_bio->master_bio;
+	struct r1conf *conf = r1_bio->mddev->private;
 
 	/* if nobody has done the final endio yet, do it now */
 	if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
@@ -322,6 +317,12 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
 
 		call_bio_endio(r1_bio);
 	}
+	/*
+	 * Wake up any possible resync thread that waits for the device
+	 * to go idle.  All I/Os, even write-behind writes, are done.
+	 */
+	allow_barrier(conf, r1_bio->sector);
+
 	free_r1bio(r1_bio);
 }
 
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index e7ccad898736..b7eb09e8c025 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -180,7 +180,7 @@ struct r1bio {
 	 * if the IO is in WRITE direction, then multiple bios are used.
 	 * We choose the number when they are allocated.
 	 */
-	struct bio		*bios[0];
+	struct bio		*bios[];
 	/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
 };
 
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index d3eaaf3eb1bc..79cd2b7d3128 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -153,7 +153,7 @@ struct r10bio {
 		};
 		sector_t	addr;
 		int		devnum;
-	} devs[0];
+	} devs[];
 };
 
 /* bits for r10bio.state */
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index d50238d0a85d..a750f4bbb5d9 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -1037,7 +1037,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
 	}
 
 	/* flush the disk cache after recovery if necessary */
-	ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL, NULL);
+	ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL);
 out:
 	__free_page(page);
 	return ret;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ba00e9877f02..ab8067f9ce8c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2215,10 +2215,13 @@ static int grow_stripes(struct r5conf *conf, int num)
 }
 
 /**
- * scribble_len - return the required size of the scribble region
+ * scribble_alloc - allocate percpu scribble buffer for required size
+ *		    of the scribble region
+ * @percpu - from for_each_present_cpu() of the caller
  * @num - total number of disks in the array
+ * @cnt - scribble objs count for required size of the scribble region
  *
- * The size must be enough to contain:
+ * The scribble buffer size must be enough to contain:
  * 1/ a struct page pointer for each device in the array +2
  * 2/ room to convert each entry in (1) to its corresponding dma
  *    (dma_map_page()) or page (page_address()) address.
@@ -2228,14 +2231,19 @@ static int grow_stripes(struct r5conf *conf, int num)
  * of the P and Q blocks.
  */
 static int scribble_alloc(struct raid5_percpu *percpu,
-			  int num, int cnt, gfp_t flags)
+			  int num, int cnt)
 {
 	size_t obj_size =
 		sizeof(struct page *) * (num+2) +
 		sizeof(addr_conv_t) * (num+2);
 	void *scribble;
 
-	scribble = kvmalloc_array(cnt, obj_size, flags);
+	/*
+	 * If here is in raid array suspend context, it is in memalloc noio
+	 * context as well, there is no potential recursive memory reclaim
+	 * I/Os with the GFP_KERNEL flag.
+	 */
+	scribble = kvmalloc_array(cnt, obj_size, GFP_KERNEL);
 	if (!scribble)
 		return -ENOMEM;
 
@@ -2267,8 +2275,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
 
 		percpu = per_cpu_ptr(conf->percpu, cpu);
 		err = scribble_alloc(percpu, new_disks,
-				     new_sectors / STRIPE_SECTORS,
-				     GFP_NOIO);
+				     new_sectors / STRIPE_SECTORS);
 		if (err)
 			break;
 	}
@@ -6759,8 +6766,7 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu
 			       conf->previous_raid_disks),
 			   max(conf->chunk_sectors,
 			       conf->prev_chunk_sectors)
-			   / STRIPE_SECTORS,
-			   GFP_KERNEL)) {
+			   / STRIPE_SECTORS)) {
 		free_scratch_buffer(conf, percpu);
 		return -ENOMEM;
 	}
diff --git a/drivers/media/cec/cec-notifier.c b/drivers/media/cec/cec-notifier.c
index e748cd54b45d..517e0035fc99 100644
--- a/drivers/media/cec/cec-notifier.c
+++ b/drivers/media/cec/cec-notifier.c
@@ -2,7 +2,7 @@
 /*
  * cec-notifier.c - notify CEC drivers of physical address changes
  *
- * Copyright 2016 Russell King <rmk+kernel@arm.linux.org.uk>
+ * Copyright 2016 Russell King.
  * Copyright 2016-2017 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  */
 
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
index 6db60e9d5183..92072a08af25 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
@@ -309,8 +309,7 @@ static void *vb2_dma_sg_vaddr(void *buf_priv)
 		if (buf->db_attach)
 			buf->vaddr = dma_buf_vmap(buf->db_attach->dmabuf);
 		else
-			buf->vaddr = vm_map_ram(buf->pages,
-					buf->num_pages, -1, PAGE_KERNEL);
+			buf->vaddr = vm_map_ram(buf->pages, buf->num_pages, -1);
 	}
 
 	/* add offset in case userptr is not page-aligned */
diff --git a/drivers/media/common/videobuf2/videobuf2-vmalloc.c b/drivers/media/common/videobuf2/videobuf2-vmalloc.c
index 1a4f0ca87c7c..c66fda4a65e4 100644
--- a/drivers/media/common/videobuf2/videobuf2-vmalloc.c
+++ b/drivers/media/common/videobuf2/videobuf2-vmalloc.c
@@ -107,8 +107,7 @@ static void *vb2_vmalloc_get_userptr(struct device *dev, unsigned long vaddr,
 		buf->vaddr = (__force void *)
 			ioremap(__pfn_to_phys(nums[0]), size + offset);
 	} else {
-		buf->vaddr = vm_map_ram(frame_vector_pages(vec), n_pages, -1,
-					PAGE_KERNEL);
+		buf->vaddr = vm_map_ram(frame_vector_pages(vec), n_pages, -1);
 	}
 
 	if (!buf->vaddr)
diff --git a/drivers/media/i2c/imx214.c b/drivers/media/i2c/imx214.c
index 4175d06ffd47..1ef5af9a8c8b 100644
--- a/drivers/media/i2c/imx214.c
+++ b/drivers/media/i2c/imx214.c
@@ -4,7 +4,7 @@
  *
  * Copyright 2018 Qtechnology A/S
  *
- * Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ * Ricardo Ribalda <ribalda@kernel.org>
  */
 #include <linux/clk.h>
 #include <linux/delay.h>
@@ -1120,5 +1120,5 @@ static struct i2c_driver imx214_i2c_driver = {
 module_i2c_driver(imx214_i2c_driver);
 
 MODULE_DESCRIPTION("Sony IMX214 Camera driver");
-MODULE_AUTHOR("Ricardo Ribalda <ricardo.ribalda@gmail.com>");
+MODULE_AUTHOR("Ricardo Ribalda <ribalda@kernel.org>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/media/mmc/siano/smssdio.c b/drivers/media/mmc/siano/smssdio.c
index def5e93849d2..065b572e0272 100644
--- a/drivers/media/mmc/siano/smssdio.c
+++ b/drivers/media/mmc/siano/smssdio.c
@@ -58,15 +58,15 @@ static const struct sdio_device_id smssdio_ids[] = {
 	 .driver_data = SMS1XXX_BOARD_SIANO_VEGA},
 	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, SDIO_DEVICE_ID_SIANO_VENICE),
 	 .driver_data = SMS1XXX_BOARD_SIANO_VEGA},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, 0x302),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, SDIO_DEVICE_ID_SIANO_MING),
 	.driver_data = SMS1XXX_BOARD_SIANO_MING},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, 0x500),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, SDIO_DEVICE_ID_SIANO_PELE),
 	.driver_data = SMS1XXX_BOARD_SIANO_PELE},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, 0x600),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, SDIO_DEVICE_ID_SIANO_RIO),
 	.driver_data = SMS1XXX_BOARD_SIANO_RIO},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, 0x700),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, SDIO_DEVICE_ID_SIANO_DENVER_2160),
 	.driver_data = SMS1XXX_BOARD_SIANO_DENVER_2160},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, 0x800),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_SIANO, SDIO_DEVICE_ID_SIANO_DENVER_1530),
 	.driver_data = SMS1XXX_BOARD_SIANO_DENVER_1530},
 	{ /* end: all zeroes */ },
 };
diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c
index 5f8883031c9c..0d8372cc364a 100644
--- a/drivers/media/pci/ivtv/ivtv-udma.c
+++ b/drivers/media/pci/ivtv/ivtv-udma.c
@@ -92,7 +92,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
 {
 	struct ivtv_dma_page_info user_dma;
 	struct ivtv_user_dma *dma = &itv->udma;
-	int i, err;
+	int err;
 
 	IVTV_DEBUG_DMA("ivtv_udma_setup, dst: 0x%08x\n", (unsigned int)ivtv_dest_addr);
 
@@ -111,16 +111,15 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
 		return -EINVAL;
 	}
 
-	/* Get user pages for DMA Xfer */
-	err = get_user_pages_unlocked(user_dma.uaddr, user_dma.page_count,
+	/* Pin user pages for DMA Xfer */
+	err = pin_user_pages_unlocked(user_dma.uaddr, user_dma.page_count,
 			dma->map, FOLL_FORCE);
 
 	if (user_dma.page_count != err) {
 		IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n",
 			   err, user_dma.page_count);
 		if (err >= 0) {
-			for (i = 0; i < err; i++)
-				put_page(dma->map[i]);
+			unpin_user_pages(dma->map, err);
 			return -EINVAL;
 		}
 		return err;
@@ -130,9 +129,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
 
 	/* Fill SG List with new values */
 	if (ivtv_udma_fill_sg_list(dma, &user_dma, 0) < 0) {
-		for (i = 0; i < dma->page_count; i++) {
-			put_page(dma->map[i]);
-		}
+		unpin_user_pages(dma->map, dma->page_count);
 		dma->page_count = 0;
 		return -ENOMEM;
 	}
@@ -153,7 +150,6 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
 void ivtv_udma_unmap(struct ivtv *itv)
 {
 	struct ivtv_user_dma *dma = &itv->udma;
-	int i;
 
 	IVTV_DEBUG_INFO("ivtv_unmap_user_dma\n");
 
@@ -169,10 +165,7 @@ void ivtv_udma_unmap(struct ivtv *itv)
 	/* sync DMA */
 	ivtv_udma_sync_for_cpu(itv);
 
-	/* Release User Pages */
-	for (i = 0; i < dma->page_count; i++) {
-		put_page(dma->map[i]);
-	}
+	unpin_user_pages(dma->map, dma->page_count);
 	dma->page_count = 0;
 }
 
diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c
index cd2fe2d444c0..5f7dc9771f8d 100644
--- a/drivers/media/pci/ivtv/ivtv-yuv.c
+++ b/drivers/media/pci/ivtv/ivtv-yuv.c
@@ -30,7 +30,6 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
 	struct yuv_playback_info *yi = &itv->yuv_info;
 	u8 frame = yi->draw_frame;
 	struct yuv_frame_info *f = &yi->new_frame_info[frame];
-	int i;
 	int y_pages, uv_pages;
 	unsigned long y_buffer_offset, uv_buffer_offset;
 	int y_decode_height, uv_decode_height, y_size;
@@ -62,12 +61,12 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
 	ivtv_udma_get_page_info (&y_dma, (unsigned long)args->y_source, 720 * y_decode_height);
 	ivtv_udma_get_page_info (&uv_dma, (unsigned long)args->uv_source, 360 * uv_decode_height);
 
-	/* Get user pages for DMA Xfer */
-	y_pages = get_user_pages_unlocked(y_dma.uaddr,
+	/* Pin user pages for DMA Xfer */
+	y_pages = pin_user_pages_unlocked(y_dma.uaddr,
 			y_dma.page_count, &dma->map[0], FOLL_FORCE);
 	uv_pages = 0; /* silence gcc. value is set and consumed only if: */
 	if (y_pages == y_dma.page_count) {
-		uv_pages = get_user_pages_unlocked(uv_dma.uaddr,
+		uv_pages = pin_user_pages_unlocked(uv_dma.uaddr,
 				uv_dma.page_count, &dma->map[y_pages],
 				FOLL_FORCE);
 	}
@@ -81,8 +80,7 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
 				 uv_pages, uv_dma.page_count);
 
 			if (uv_pages >= 0) {
-				for (i = 0; i < uv_pages; i++)
-					put_page(dma->map[y_pages + i]);
+				unpin_user_pages(&dma->map[y_pages], uv_pages);
 				rc = -EFAULT;
 			} else {
 				rc = uv_pages;
@@ -93,8 +91,7 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
 				 y_pages, y_dma.page_count);
 		}
 		if (y_pages >= 0) {
-			for (i = 0; i < y_pages; i++)
-				put_page(dma->map[i]);
+			unpin_user_pages(dma->map, y_pages);
 			/*
 			 * Inherit the -EFAULT from rc's
 			 * initialization, but allow it to be
@@ -112,9 +109,7 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
 	/* Fill & map SG List */
 	if (ivtv_udma_fill_sg_list (dma, &uv_dma, ivtv_udma_fill_sg_list (dma, &y_dma, 0)) < 0) {
 		IVTV_DEBUG_WARN("could not allocate bounce buffers for highmem userspace buffers\n");
-		for (i = 0; i < dma->page_count; i++) {
-			put_page(dma->map[i]);
-		}
+		unpin_user_pages(dma->map, dma->page_count);
 		dma->page_count = 0;
 		return -ENOMEM;
 	}
diff --git a/drivers/media/pci/ivtv/ivtvfb.c b/drivers/media/pci/ivtv/ivtvfb.c
index 0c2859844081..e2d56dca5be4 100644
--- a/drivers/media/pci/ivtv/ivtvfb.c
+++ b/drivers/media/pci/ivtv/ivtvfb.c
@@ -281,10 +281,10 @@ static int ivtvfb_prep_dec_dma_to_device(struct ivtv *itv,
 	/* Map User DMA */
 	if (ivtv_udma_setup(itv, ivtv_dest_addr, userbuf, size_in_bytes) <= 0) {
 		mutex_unlock(&itv->udma.lock);
-		IVTVFB_WARN("ivtvfb_prep_dec_dma_to_device, Error with get_user_pages: %d bytes, %d pages returned\n",
+		IVTVFB_WARN("ivtvfb_prep_dec_dma_to_device, Error with pin_user_pages: %d bytes, %d pages returned\n",
 			       size_in_bytes, itv->udma.page_count);
 
-		/* get_user_pages must have failed completely */
+		/* pin_user_pages must have failed completely */
 		return -EIO;
 	}
 
diff --git a/drivers/media/usb/pulse8-cec/pulse8-cec.c b/drivers/media/usb/pulse8-cec/pulse8-cec.c
index 0655aa9ecf28..beae6aa12638 100644
--- a/drivers/media/usb/pulse8-cec/pulse8-cec.c
+++ b/drivers/media/usb/pulse8-cec/pulse8-cec.c
@@ -661,7 +661,6 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio,
 	u8 *data = pulse8->data + 1;
 	u8 cmd[2];
 	int err;
-	struct tm tm;
 	time64_t date;
 
 	pulse8->vers = 0;
@@ -682,10 +681,7 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio,
 	if (err)
 		return err;
 	date = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
-	time64_to_tm(date, 0, &tm);
-	dev_info(pulse8->dev, "Firmware build date %04ld.%02d.%02d %02d:%02d:%02d\n",
-		 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
-		 tm.tm_hour, tm.tm_min, tm.tm_sec);
+	dev_info(pulse8->dev, "Firmware build date %ptT\n", &date);
 
 	dev_dbg(pulse8->dev, "Persistent config:\n");
 	cmd[0] = MSGCODE_GET_AUTO_ENABLED;
diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c
index 97f0f8b23b5d..8a1e1b95b379 100644
--- a/drivers/media/v4l2-core/v4l2-fwnode.c
+++ b/drivers/media/v4l2-core/v4l2-fwnode.c
@@ -980,7 +980,7 @@ static int v4l2_fwnode_reference_parse(struct device *dev,
  *
  * THIS EXAMPLE EXISTS MERELY TO DOCUMENT THIS FUNCTION. DO NOT USE IT AS A
  * REFERENCE IN HOW ACPI TABLES SHOULD BE WRITTEN!! See documentation under
- * Documentation/acpi/dsd instead and especially graph.txt,
+ * Documentation/firmware-guide/acpi/dsd/ instead and especially graph.txt,
  * data-node-references.txt and leds.txt .
  *
  *	Scope (\_SB.PCI0.I2C2)
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 0a59249198d3..687e9c848053 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -407,6 +407,21 @@ config MFD_EXYNOS_LPASS
 	  Select this option to enable support for Samsung Exynos Low Power
 	  Audio Subsystem.
 
+config MFD_GATEWORKS_GSC
+	tristate "Gateworks System Controller"
+	depends on (I2C && OF)
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	help
+	  Enable support for the Gateworks System Controller (GSC) found
+	  on Gateworks Single Board Computers supporting system functions
+	  such as push-button monitor, multiple ADC's for voltage and
+	  temperature monitoring, fan controller and watchdog monitor.
+	  This driver provides common support for accessing the device.
+	  Additional drivers must be enabled in order to use the
+	  functionality of the device.
+
 config MFD_MC13XXX
 	tristate
 	depends on (SPI_MASTER || I2C)
@@ -551,7 +566,7 @@ config INTEL_SOC_PMIC
 
 config INTEL_SOC_PMIC_BXTWC
 	tristate "Support for Intel Broxton Whiskey Cove PMIC"
-	depends on INTEL_PMC_IPC
+	depends on MFD_INTEL_PMC_BXT
 	select MFD_CORE
 	select REGMAP_IRQ
 	help
@@ -593,7 +608,7 @@ config INTEL_SOC_PMIC_MRFLD
 	tristate "Support for Intel Merrifield Basin Cove PMIC"
 	depends on GPIOLIB
 	depends on ACPI
-	depends on INTEL_SCU_IPC
+	depends on INTEL_SCU
 	select MFD_CORE
 	select REGMAP_IRQ
 	help
@@ -625,13 +640,27 @@ config MFD_INTEL_LPSS_PCI
 
 config MFD_INTEL_MSIC
 	bool "Intel MSIC"
-	depends on INTEL_SCU_IPC
+	depends on INTEL_SCU
 	select MFD_CORE
 	help
 	  Select this option to enable access to Intel MSIC (Avatele
 	  Passage) chip. This chip embeds audio, battery, GPIO, etc.
 	  devices used in Intel Medfield platforms.
 
+config MFD_INTEL_PMC_BXT
+	tristate "Intel PMC Driver for Broxton"
+	depends on X86
+	depends on X86_PLATFORM_DEVICES
+	depends on ACPI
+	select INTEL_SCU_IPC
+	select MFD_CORE
+	help
+	  This driver provides support for the PMC (Power Management
+	  Controller) on Intel Broxton and Apollo Lake. The PMC is a
+	  multi-function device that exposes IPC, General Control
+	  Register and P-unit access. In addition this creates devices
+	  for iTCO watchdog and telemetry that are part of the PMC.
+
 config MFD_IPAQ_MICRO
 	bool "Atmel Micro ASIC (iPAQ h3100/h3600/h3700) Support"
 	depends on SA1100_H3100 || SA1100_H3600
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index f935d10cbf0f..bea2be419822 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_MFD_BCM590XX)	+= bcm590xx.o
 obj-$(CONFIG_MFD_BD9571MWV)	+= bd9571mwv.o
 obj-$(CONFIG_MFD_CROS_EC_DEV)	+= cros_ec_dev.o
 obj-$(CONFIG_MFD_EXYNOS_LPASS)	+= exynos-lpass.o
+obj-$(CONFIG_MFD_GATEWORKS_GSC)	+= gateworks-gsc.o
 
 obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 obj-$(CONFIG_HTC_I2CPLD)	+= htc-i2cpld.o
@@ -212,6 +213,7 @@ obj-$(CONFIG_MFD_INTEL_LPSS)	+= intel-lpss.o
 obj-$(CONFIG_MFD_INTEL_LPSS_PCI)	+= intel-lpss-pci.o
 obj-$(CONFIG_MFD_INTEL_LPSS_ACPI)	+= intel-lpss-acpi.o
 obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
+obj-$(CONFIG_MFD_INTEL_PMC_BXT)	+= intel_pmc_bxt.o
 obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
 obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
diff --git a/drivers/mfd/gateworks-gsc.c b/drivers/mfd/gateworks-gsc.c
new file mode 100644
index 000000000000..576da62fbb0c
--- /dev/null
+++ b/drivers/mfd/gateworks-gsc.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The Gateworks System Controller (GSC) is a multi-function
+ * device designed for use in Gateworks Single Board Computers.
+ * The control interface is I2C, with an interrupt. The device supports
+ * system functions such as push-button monitoring, multiple ADC's for
+ * voltage and temperature monitoring, fan controller and watchdog monitor.
+ *
+ * Copyright (C) 2020 Gateworks Corporation
+ */
+
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/gsc.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <asm/unaligned.h>
+
+/*
+ * The GSC suffers from an errata where occasionally during
+ * ADC cycles the chip can NAK I2C transactions. To ensure we have reliable
+ * register access we place retries around register access.
+ */
+#define I2C_RETRIES	3
+
+int gsc_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct i2c_client *client = context;
+	int retry, ret;
+
+	for (retry = 0; retry < I2C_RETRIES; retry++) {
+		ret = i2c_smbus_write_byte_data(client, reg, val);
+		/*
+		 * -EAGAIN returned when the i2c host controller is busy
+		 * -EIO returned when i2c device is busy
+		 */
+		if (ret != -EAGAIN && ret != -EIO)
+			break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gsc_write);
+
+int gsc_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct i2c_client *client = context;
+	int retry, ret;
+
+	for (retry = 0; retry < I2C_RETRIES; retry++) {
+		ret = i2c_smbus_read_byte_data(client, reg);
+		/*
+		 * -EAGAIN returned when the i2c host controller is busy
+		 * -EIO returned when i2c device is busy
+		 */
+		if (ret != -EAGAIN && ret != -EIO)
+			break;
+	}
+	*val = ret & 0xff;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gsc_read);
+
+/*
+ * gsc_powerdown - API to use GSC to power down board for a specific time
+ *
+ * secs - number of seconds to remain powered off
+ */
+static int gsc_powerdown(struct gsc_dev *gsc, unsigned long secs)
+{
+	int ret;
+	unsigned char regs[4];
+
+	dev_info(&gsc->i2c->dev, "GSC powerdown for %ld seconds\n",
+		 secs);
+
+	put_unaligned_le32(secs, regs);
+	ret = regmap_bulk_write(gsc->regmap, GSC_TIME_ADD, regs, 4);
+	if (ret)
+		return ret;
+
+	ret = regmap_update_bits(gsc->regmap, GSC_CTRL_1,
+				 BIT(GSC_CTRL_1_SLEEP_ADD),
+				 BIT(GSC_CTRL_1_SLEEP_ADD));
+	if (ret)
+		return ret;
+
+	ret = regmap_update_bits(gsc->regmap, GSC_CTRL_1,
+				 BIT(GSC_CTRL_1_SLEEP_ACTIVATE) |
+				 BIT(GSC_CTRL_1_SLEEP_ENABLE),
+				 BIT(GSC_CTRL_1_SLEEP_ACTIVATE) |
+				 BIT(GSC_CTRL_1_SLEEP_ENABLE));
+
+
+	return ret;
+}
+
+static ssize_t gsc_show(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	struct gsc_dev *gsc = dev_get_drvdata(dev);
+	const char *name = attr->attr.name;
+	int rz = 0;
+
+	if (strcasecmp(name, "fw_version") == 0)
+		rz = sprintf(buf, "%d\n", gsc->fwver);
+	else if (strcasecmp(name, "fw_crc") == 0)
+		rz = sprintf(buf, "0x%04x\n", gsc->fwcrc);
+	else
+		dev_err(dev, "invalid command: '%s'\n", name);
+
+	return rz;
+}
+
+static ssize_t gsc_store(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct gsc_dev *gsc = dev_get_drvdata(dev);
+	const char *name = attr->attr.name;
+	long value;
+
+	if (strcasecmp(name, "powerdown") == 0) {
+		if (kstrtol(buf, 0, &value) == 0)
+			gsc_powerdown(gsc, value);
+	} else {
+		dev_err(dev, "invalid command: '%s\n", name);
+	}
+
+	return count;
+}
+
+static struct device_attribute attr_fwver =
+	__ATTR(fw_version, 0440, gsc_show, NULL);
+static struct device_attribute attr_fwcrc =
+	__ATTR(fw_crc, 0440, gsc_show, NULL);
+static struct device_attribute attr_pwrdown =
+	__ATTR(powerdown, 0220, NULL, gsc_store);
+
+static struct attribute *gsc_attrs[] = {
+	&attr_fwver.attr,
+	&attr_fwcrc.attr,
+	&attr_pwrdown.attr,
+	NULL,
+};
+
+static struct attribute_group attr_group = {
+	.attrs = gsc_attrs,
+};
+
+static const struct of_device_id gsc_of_match[] = {
+	{ .compatible = "gw,gsc", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, gsc_of_match);
+
+static struct regmap_bus gsc_regmap_bus = {
+	.reg_read = gsc_read,
+	.reg_write = gsc_write,
+};
+
+static const struct regmap_config gsc_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.cache_type = REGCACHE_NONE,
+	.max_register = GSC_WP,
+};
+
+static const struct regmap_irq gsc_irqs[] = {
+	REGMAP_IRQ_REG(GSC_IRQ_PB, 0, BIT(GSC_IRQ_PB)),
+	REGMAP_IRQ_REG(GSC_IRQ_KEY_ERASED, 0, BIT(GSC_IRQ_KEY_ERASED)),
+	REGMAP_IRQ_REG(GSC_IRQ_EEPROM_WP, 0, BIT(GSC_IRQ_EEPROM_WP)),
+	REGMAP_IRQ_REG(GSC_IRQ_RESV, 0, BIT(GSC_IRQ_RESV)),
+	REGMAP_IRQ_REG(GSC_IRQ_GPIO, 0, BIT(GSC_IRQ_GPIO)),
+	REGMAP_IRQ_REG(GSC_IRQ_TAMPER, 0, BIT(GSC_IRQ_TAMPER)),
+	REGMAP_IRQ_REG(GSC_IRQ_WDT_TIMEOUT, 0, BIT(GSC_IRQ_WDT_TIMEOUT)),
+	REGMAP_IRQ_REG(GSC_IRQ_SWITCH_HOLD, 0, BIT(GSC_IRQ_SWITCH_HOLD)),
+};
+
+static const struct regmap_irq_chip gsc_irq_chip = {
+	.name = "gateworks-gsc",
+	.irqs = gsc_irqs,
+	.num_irqs = ARRAY_SIZE(gsc_irqs),
+	.num_regs = 1,
+	.status_base = GSC_IRQ_STATUS,
+	.mask_base = GSC_IRQ_ENABLE,
+	.mask_invert = true,
+	.ack_base = GSC_IRQ_STATUS,
+	.ack_invert = true,
+};
+
+static int gsc_probe(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	struct gsc_dev *gsc;
+	struct regmap_irq_chip_data *irq_data;
+	int ret;
+	unsigned int reg;
+
+	gsc = devm_kzalloc(dev, sizeof(*gsc), GFP_KERNEL);
+	if (!gsc)
+		return -ENOMEM;
+
+	gsc->dev = &client->dev;
+	gsc->i2c = client;
+	i2c_set_clientdata(client, gsc);
+
+	gsc->regmap = devm_regmap_init(dev, &gsc_regmap_bus, client,
+				       &gsc_regmap_config);
+	if (IS_ERR(gsc->regmap))
+		return PTR_ERR(gsc->regmap);
+
+	if (regmap_read(gsc->regmap, GSC_FW_VER, &reg))
+		return -EIO;
+	gsc->fwver = reg;
+
+	regmap_read(gsc->regmap, GSC_FW_CRC, &reg);
+	gsc->fwcrc = reg;
+	regmap_read(gsc->regmap, GSC_FW_CRC + 1, &reg);
+	gsc->fwcrc |= reg << 8;
+
+	gsc->i2c_hwmon = devm_i2c_new_dummy_device(dev, client->adapter,
+						   GSC_HWMON);
+	if (IS_ERR(gsc->i2c_hwmon)) {
+		dev_err(dev, "Failed to allocate I2C device for HWMON\n");
+		return PTR_ERR(gsc->i2c_hwmon);
+	}
+
+	ret = devm_regmap_add_irq_chip(dev, gsc->regmap, client->irq,
+				       IRQF_ONESHOT | IRQF_SHARED |
+				       IRQF_TRIGGER_FALLING, 0,
+				       &gsc_irq_chip, &irq_data);
+	if (ret)
+		return ret;
+
+	dev_info(dev, "Gateworks System Controller v%d: fw 0x%04x\n",
+		 gsc->fwver, gsc->fwcrc);
+
+	ret = sysfs_create_group(&dev->kobj, &attr_group);
+	if (ret)
+		dev_err(dev, "failed to create sysfs attrs\n");
+
+	ret = devm_of_platform_populate(dev);
+	if (ret) {
+		sysfs_remove_group(&dev->kobj, &attr_group);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int gsc_remove(struct i2c_client *client)
+{
+	sysfs_remove_group(&client->dev.kobj, &attr_group);
+
+	return 0;
+}
+
+static struct i2c_driver gsc_driver = {
+	.driver = {
+		.name	= "gateworks-gsc",
+		.of_match_table = gsc_of_match,
+	},
+	.probe_new	= gsc_probe,
+	.remove		= gsc_remove,
+};
+module_i2c_driver(gsc_driver);
+
+MODULE_AUTHOR("Tim Harvey <tharvey@gateworks.com>");
+MODULE_DESCRIPTION("I2C Core interface for GSC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/intel_pmc_bxt.c b/drivers/mfd/intel_pmc_bxt.c
new file mode 100644
index 000000000000..9f01d38acc7f
--- /dev/null
+++ b/drivers/mfd/intel_pmc_bxt.c
@@ -0,0 +1,468 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for the Intel Broxton PMC
+ *
+ * (C) Copyright 2014 - 2020 Intel Corporation
+ *
+ * This driver is based on Intel SCU IPC driver (intel_scu_ipc.c) by
+ * Sreedhara DS <sreedhara.ds@intel.com>
+ *
+ * The PMC (Power Management Controller) running on the ARC processor
+ * communicates with another entity running in the IA (Intel Architecture)
+ * core through an IPC (Intel Processor Communications) mechanism which in
+ * turn sends messages between the IA and the PMC.
+ */
+
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/intel_pmc_bxt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/itco_wdt.h>
+
+#include <asm/intel_scu_ipc.h>
+
+/* Residency with clock rate at 19.2MHz to usecs */
+#define S0IX_RESIDENCY_IN_USECS(d, s)		\
+({						\
+	u64 result = 10ull * ((d) + (s));	\
+	do_div(result, 192);			\
+	result;					\
+})
+
+/* Resources exported from IFWI */
+#define PLAT_RESOURCE_IPC_INDEX		0
+#define PLAT_RESOURCE_IPC_SIZE		0x1000
+#define PLAT_RESOURCE_GCR_OFFSET	0x1000
+#define PLAT_RESOURCE_GCR_SIZE		0x1000
+#define PLAT_RESOURCE_BIOS_DATA_INDEX	1
+#define PLAT_RESOURCE_BIOS_IFACE_INDEX	2
+#define PLAT_RESOURCE_TELEM_SSRAM_INDEX	3
+#define PLAT_RESOURCE_ISP_DATA_INDEX	4
+#define PLAT_RESOURCE_ISP_IFACE_INDEX	5
+#define PLAT_RESOURCE_GTD_DATA_INDEX	6
+#define PLAT_RESOURCE_GTD_IFACE_INDEX	7
+#define PLAT_RESOURCE_ACPI_IO_INDEX	0
+
+/*
+ * BIOS does not create an ACPI device for each PMC function, but
+ * exports multiple resources from one ACPI device (IPC) for multiple
+ * functions. This driver is responsible for creating a child device and
+ * to export resources for those functions.
+ */
+#define SMI_EN_OFFSET			0x0040
+#define SMI_EN_SIZE			4
+#define TCO_BASE_OFFSET			0x0060
+#define TCO_REGS_SIZE			16
+#define TELEM_SSRAM_SIZE		240
+#define TELEM_PMC_SSRAM_OFFSET		0x1b00
+#define TELEM_PUNIT_SSRAM_OFFSET	0x1a00
+
+/* Commands */
+#define PMC_NORTHPEAK_CTRL		0xed
+
+static inline bool is_gcr_valid(u32 offset)
+{
+	return offset < PLAT_RESOURCE_GCR_SIZE - 8;
+}
+
+/**
+ * intel_pmc_gcr_read64() - Read a 64-bit PMC GCR register
+ * @pmc: PMC device pointer
+ * @offset: offset of GCR register from GCR address base
+ * @data: data pointer for storing the register output
+ *
+ * Reads the 64-bit PMC GCR register at given offset.
+ *
+ * Return: Negative value on error or 0 on success.
+ */
+int intel_pmc_gcr_read64(struct intel_pmc_dev *pmc, u32 offset, u64 *data)
+{
+	if (!is_gcr_valid(offset))
+		return -EINVAL;
+
+	spin_lock(&pmc->gcr_lock);
+	*data = readq(pmc->gcr_mem_base + offset);
+	spin_unlock(&pmc->gcr_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_pmc_gcr_read64);
+
+/**
+ * intel_pmc_gcr_update() - Update PMC GCR register bits
+ * @pmc: PMC device pointer
+ * @offset: offset of GCR register from GCR address base
+ * @mask: bit mask for update operation
+ * @val: update value
+ *
+ * Updates the bits of given GCR register as specified by
+ * @mask and @val.
+ *
+ * Return: Negative value on error or 0 on success.
+ */
+int intel_pmc_gcr_update(struct intel_pmc_dev *pmc, u32 offset, u32 mask, u32 val)
+{
+	u32 new_val;
+
+	if (!is_gcr_valid(offset))
+		return -EINVAL;
+
+	spin_lock(&pmc->gcr_lock);
+	new_val = readl(pmc->gcr_mem_base + offset);
+
+	new_val = (new_val & ~mask) | (val & mask);
+	writel(new_val, pmc->gcr_mem_base + offset);
+
+	new_val = readl(pmc->gcr_mem_base + offset);
+	spin_unlock(&pmc->gcr_lock);
+
+	/* Check whether the bit update is successful */
+	return (new_val & mask) != (val & mask) ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(intel_pmc_gcr_update);
+
+/**
+ * intel_pmc_s0ix_counter_read() - Read S0ix residency
+ * @pmc: PMC device pointer
+ * @data: Out param that contains current S0ix residency count.
+ *
+ * Writes to @data how many usecs the system has been in low-power S0ix
+ * state.
+ *
+ * Return: An error code or 0 on success.
+ */
+int intel_pmc_s0ix_counter_read(struct intel_pmc_dev *pmc, u64 *data)
+{
+	u64 deep, shlw;
+
+	spin_lock(&pmc->gcr_lock);
+	deep = readq(pmc->gcr_mem_base + PMC_GCR_TELEM_DEEP_S0IX_REG);
+	shlw = readq(pmc->gcr_mem_base + PMC_GCR_TELEM_SHLW_S0IX_REG);
+	spin_unlock(&pmc->gcr_lock);
+
+	*data = S0IX_RESIDENCY_IN_USECS(deep, shlw);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_pmc_s0ix_counter_read);
+
+/**
+ * simplecmd_store() - Send a simple IPC command
+ * @dev: Device under the attribute is
+ * @attr: Attribute in question
+ * @buf: Buffer holding data to be stored to the attribute
+ * @count: Number of bytes in @buf
+ *
+ * Expects a string with two integers separated with space. These two
+ * values hold command and subcommand that is send to PMC.
+ *
+ * Return: Number number of bytes written (@count) or negative errno in
+ *	   case of error.
+ */
+static ssize_t simplecmd_store(struct device *dev, struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct intel_pmc_dev *pmc = dev_get_drvdata(dev);
+	struct intel_scu_ipc_dev *scu = pmc->scu;
+	int subcmd;
+	int cmd;
+	int ret;
+
+	ret = sscanf(buf, "%d %d", &cmd, &subcmd);
+	if (ret != 2) {
+		dev_err(dev, "Invalid values, expected: cmd subcmd\n");
+		return -EINVAL;
+	}
+
+	ret = intel_scu_ipc_dev_simple_command(scu, cmd, subcmd);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_WO(simplecmd);
+
+/**
+ * northpeak_store() - Enable or disable Northpeak
+ * @dev: Device under the attribute is
+ * @attr: Attribute in question
+ * @buf: Buffer holding data to be stored to the attribute
+ * @count: Number of bytes in @buf
+ *
+ * Expects an unsigned integer. Non-zero enables Northpeak and zero
+ * disables it.
+ *
+ * Return: Number number of bytes written (@count) or negative errno in
+ *	   case of error.
+ */
+static ssize_t northpeak_store(struct device *dev, struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct intel_pmc_dev *pmc = dev_get_drvdata(dev);
+	struct intel_scu_ipc_dev *scu = pmc->scu;
+	unsigned long val;
+	int subcmd;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	/* Northpeak is enabled if subcmd == 1 and disabled if it is 0 */
+	if (val)
+		subcmd = 1;
+	else
+		subcmd = 0;
+
+	ret = intel_scu_ipc_dev_simple_command(scu, PMC_NORTHPEAK_CTRL, subcmd);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_WO(northpeak);
+
+static struct attribute *intel_pmc_attrs[] = {
+	&dev_attr_northpeak.attr,
+	&dev_attr_simplecmd.attr,
+	NULL
+};
+
+static const struct attribute_group intel_pmc_group = {
+	.attrs = intel_pmc_attrs,
+};
+
+static const struct attribute_group *intel_pmc_groups[] = {
+	&intel_pmc_group,
+	NULL
+};
+
+static struct resource punit_res[6];
+
+static struct mfd_cell punit = {
+	.name = "intel_punit_ipc",
+	.resources = punit_res,
+};
+
+static struct itco_wdt_platform_data tco_pdata = {
+	.name = "Apollo Lake SoC",
+	.version = 5,
+	.no_reboot_use_pmc = true,
+};
+
+static struct resource tco_res[2];
+
+static const struct mfd_cell tco = {
+	.name = "iTCO_wdt",
+	.ignore_resource_conflicts = true,
+	.resources = tco_res,
+	.num_resources = ARRAY_SIZE(tco_res),
+	.platform_data = &tco_pdata,
+	.pdata_size = sizeof(tco_pdata),
+};
+
+static const struct resource telem_res[] = {
+	DEFINE_RES_MEM(TELEM_PUNIT_SSRAM_OFFSET, TELEM_SSRAM_SIZE),
+	DEFINE_RES_MEM(TELEM_PMC_SSRAM_OFFSET, TELEM_SSRAM_SIZE),
+};
+
+static const struct mfd_cell telem = {
+	.name = "intel_telemetry",
+	.resources = telem_res,
+	.num_resources = ARRAY_SIZE(telem_res),
+};
+
+static int intel_pmc_get_tco_resources(struct platform_device *pdev)
+{
+	struct resource *res;
+
+	if (acpi_has_watchdog())
+		return 0;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO,
+				    PLAT_RESOURCE_ACPI_IO_INDEX);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get IO resource\n");
+		return -EINVAL;
+	}
+
+	tco_res[0].flags = IORESOURCE_IO;
+	tco_res[0].start = res->start + TCO_BASE_OFFSET;
+	tco_res[0].end = tco_res[0].start + TCO_REGS_SIZE - 1;
+	tco_res[1].flags = IORESOURCE_IO;
+	tco_res[1].start = res->start + SMI_EN_OFFSET;
+	tco_res[1].end = tco_res[1].start + SMI_EN_SIZE - 1;
+
+	return 0;
+}
+
+static int intel_pmc_get_resources(struct platform_device *pdev,
+				   struct intel_pmc_dev *pmc,
+				   struct intel_scu_ipc_data *scu_data)
+{
+	struct resource gcr_res;
+	size_t npunit_res = 0;
+	struct resource *res;
+	int ret;
+
+	scu_data->irq = platform_get_irq_optional(pdev, 0);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM,
+				    PLAT_RESOURCE_IPC_INDEX);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get IPC resource\n");
+		return -EINVAL;
+	}
+
+	/* IPC registers */
+	scu_data->mem.flags = res->flags;
+	scu_data->mem.start = res->start;
+	scu_data->mem.end = res->start + PLAT_RESOURCE_IPC_SIZE - 1;
+
+	/* GCR registers */
+	gcr_res.flags = res->flags;
+	gcr_res.start = res->start + PLAT_RESOURCE_GCR_OFFSET;
+	gcr_res.end = gcr_res.start + PLAT_RESOURCE_GCR_SIZE - 1;
+
+	pmc->gcr_mem_base = devm_ioremap_resource(&pdev->dev, &gcr_res);
+	if (IS_ERR(pmc->gcr_mem_base))
+		return PTR_ERR(pmc->gcr_mem_base);
+
+	/* Only register iTCO watchdog if there is no WDAT ACPI table */
+	ret = intel_pmc_get_tco_resources(pdev);
+	if (ret)
+		return ret;
+
+	/* BIOS data register */
+	res = platform_get_resource(pdev, IORESOURCE_MEM,
+				    PLAT_RESOURCE_BIOS_DATA_INDEX);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get resource of P-unit BIOS data\n");
+		return -EINVAL;
+	}
+	punit_res[npunit_res++] = *res;
+
+	/* BIOS interface register */
+	res = platform_get_resource(pdev, IORESOURCE_MEM,
+				    PLAT_RESOURCE_BIOS_IFACE_INDEX);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get resource of P-unit BIOS interface\n");
+		return -EINVAL;
+	}
+	punit_res[npunit_res++] = *res;
+
+	/* ISP data register, optional */
+	res = platform_get_resource(pdev, IORESOURCE_MEM,
+				    PLAT_RESOURCE_ISP_DATA_INDEX);
+	if (res)
+		punit_res[npunit_res++] = *res;
+
+	/* ISP interface register, optional */
+	res = platform_get_resource(pdev, IORESOURCE_MEM,
+				    PLAT_RESOURCE_ISP_IFACE_INDEX);
+	if (res)
+		punit_res[npunit_res++] = *res;
+
+	/* GTD data register, optional */
+	res = platform_get_resource(pdev, IORESOURCE_MEM,
+				    PLAT_RESOURCE_GTD_DATA_INDEX);
+	if (res)
+		punit_res[npunit_res++] = *res;
+
+	/* GTD interface register, optional */
+	res = platform_get_resource(pdev, IORESOURCE_MEM,
+				    PLAT_RESOURCE_GTD_IFACE_INDEX);
+	if (res)
+		punit_res[npunit_res++] = *res;
+
+	punit.num_resources = npunit_res;
+
+	/* Telemetry SSRAM is optional */
+	res = platform_get_resource(pdev, IORESOURCE_MEM,
+				    PLAT_RESOURCE_TELEM_SSRAM_INDEX);
+	if (res)
+		pmc->telem_base = res;
+
+	return 0;
+}
+
+static int intel_pmc_create_devices(struct intel_pmc_dev *pmc)
+{
+	int ret;
+
+	if (!acpi_has_watchdog()) {
+		ret = devm_mfd_add_devices(pmc->dev, PLATFORM_DEVID_AUTO, &tco,
+					   1, NULL, 0, NULL);
+		if (ret)
+			return ret;
+	}
+
+	ret = devm_mfd_add_devices(pmc->dev, PLATFORM_DEVID_AUTO, &punit, 1,
+				   NULL, 0, NULL);
+	if (ret)
+		return ret;
+
+	if (pmc->telem_base) {
+		ret = devm_mfd_add_devices(pmc->dev, PLATFORM_DEVID_AUTO,
+					   &telem, 1, pmc->telem_base, 0, NULL);
+	}
+
+	return ret;
+}
+
+static const struct acpi_device_id intel_pmc_acpi_ids[] = {
+	{ "INT34D2" },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, intel_pmc_acpi_ids);
+
+static int intel_pmc_probe(struct platform_device *pdev)
+{
+	struct intel_scu_ipc_data scu_data = {};
+	struct intel_pmc_dev *pmc;
+	int ret;
+
+	pmc = devm_kzalloc(&pdev->dev, sizeof(*pmc), GFP_KERNEL);
+	if (!pmc)
+		return -ENOMEM;
+
+	pmc->dev = &pdev->dev;
+	spin_lock_init(&pmc->gcr_lock);
+
+	ret = intel_pmc_get_resources(pdev, pmc, &scu_data);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to request resources\n");
+		return ret;
+	}
+
+	pmc->scu = devm_intel_scu_ipc_register(&pdev->dev, &scu_data);
+	if (IS_ERR(pmc->scu))
+		return PTR_ERR(pmc->scu);
+
+	platform_set_drvdata(pdev, pmc);
+
+	ret = intel_pmc_create_devices(pmc);
+	if (ret)
+		dev_err(&pdev->dev, "Failed to create PMC devices\n");
+
+	return ret;
+}
+
+static struct platform_driver intel_pmc_driver = {
+	.probe = intel_pmc_probe,
+	.driver = {
+		.name = "intel_pmc_bxt",
+		.acpi_match_table = intel_pmc_acpi_ids,
+		.dev_groups = intel_pmc_groups,
+	},
+};
+module_platform_driver(intel_pmc_driver);
+
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
+MODULE_AUTHOR("Zha Qipeng <qipeng.zha@intel.com>");
+MODULE_DESCRIPTION("Intel Broxton PMC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c
index 739cfb5b69fe..eba89780dbe7 100644
--- a/drivers/mfd/intel_soc_pmic_bxtwc.c
+++ b/drivers/mfd/intel_soc_pmic_bxtwc.c
@@ -15,7 +15,7 @@
 #include <linux/mfd/intel_soc_pmic_bxtwc.h>
 #include <linux/module.h>
 
-#include <asm/intel_pmc_ipc.h>
+#include <asm/intel_scu_ipc.h>
 
 /* PMIC device registers */
 #define REG_ADDR_MASK		0xFF00
@@ -58,6 +58,10 @@
 /* Whiskey Cove PMIC share same ACPI ID between different platforms */
 #define BROXTON_PMIC_WC_HRV	4
 
+#define PMC_PMIC_ACCESS		0xFF
+#define PMC_PMIC_READ		0x0
+#define PMC_PMIC_WRITE		0x1
+
 enum bxtwc_irqs {
 	BXTWC_PWRBTN_LVL1_IRQ = 0,
 	BXTWC_TMU_LVL1_IRQ,
@@ -288,13 +292,12 @@ static int regmap_ipc_byte_reg_read(void *context, unsigned int reg,
 
 	ipc_in[0] = reg;
 	ipc_in[1] = i2c_addr;
-	ret = intel_pmc_ipc_command(PMC_IPC_PMIC_ACCESS,
-			PMC_IPC_PMIC_ACCESS_READ,
-			ipc_in, sizeof(ipc_in), (u32 *)ipc_out, 1);
-	if (ret) {
-		dev_err(pmic->dev, "Failed to read from PMIC\n");
+	ret = intel_scu_ipc_dev_command(pmic->scu, PMC_PMIC_ACCESS,
+					PMC_PMIC_READ, ipc_in, sizeof(ipc_in),
+					ipc_out, sizeof(ipc_out));
+	if (ret)
 		return ret;
-	}
+
 	*val = ipc_out[0];
 
 	return 0;
@@ -303,7 +306,6 @@ static int regmap_ipc_byte_reg_read(void *context, unsigned int reg,
 static int regmap_ipc_byte_reg_write(void *context, unsigned int reg,
 				       unsigned int val)
 {
-	int ret;
 	int i2c_addr;
 	u8 ipc_in[3];
 	struct intel_soc_pmic *pmic = context;
@@ -321,15 +323,9 @@ static int regmap_ipc_byte_reg_write(void *context, unsigned int reg,
 	ipc_in[0] = reg;
 	ipc_in[1] = i2c_addr;
 	ipc_in[2] = val;
-	ret = intel_pmc_ipc_command(PMC_IPC_PMIC_ACCESS,
-			PMC_IPC_PMIC_ACCESS_WRITE,
-			ipc_in, sizeof(ipc_in), NULL, 0);
-	if (ret) {
-		dev_err(pmic->dev, "Failed to write to PMIC\n");
-		return ret;
-	}
-
-	return 0;
+	return intel_scu_ipc_dev_command(pmic->scu, PMC_PMIC_ACCESS,
+					 PMC_PMIC_WRITE, ipc_in, sizeof(ipc_in),
+					 NULL, 0);
 }
 
 /* sysfs interfaces to r/w PMIC registers, required by initial script */
@@ -457,6 +453,10 @@ static int bxtwc_probe(struct platform_device *pdev)
 	dev_set_drvdata(&pdev->dev, pmic);
 	pmic->dev = &pdev->dev;
 
+	pmic->scu = devm_intel_scu_ipc_dev_get(&pdev->dev);
+	if (!pmic->scu)
+		return -EPROBE_DEFER;
+
 	pmic->regmap = devm_regmap_init(&pdev->dev, NULL, pmic,
 					&bxtwc_regmap_config);
 	if (IS_ERR(pmic->regmap)) {
diff --git a/drivers/mfd/intel_soc_pmic_mrfld.c b/drivers/mfd/intel_soc_pmic_mrfld.c
index 26a1551c5faf..bd94c989d232 100644
--- a/drivers/mfd/intel_soc_pmic_mrfld.c
+++ b/drivers/mfd/intel_soc_pmic_mrfld.c
@@ -74,10 +74,11 @@ static const struct mfd_cell bcove_dev[] = {
 static int bcove_ipc_byte_reg_read(void *context, unsigned int reg,
 				    unsigned int *val)
 {
+	struct intel_soc_pmic *pmic = context;
 	u8 ipc_out;
 	int ret;
 
-	ret = intel_scu_ipc_ioread8(reg, &ipc_out);
+	ret = intel_scu_ipc_dev_ioread8(pmic->scu, reg, &ipc_out);
 	if (ret)
 		return ret;
 
@@ -88,10 +89,11 @@ static int bcove_ipc_byte_reg_read(void *context, unsigned int reg,
 static int bcove_ipc_byte_reg_write(void *context, unsigned int reg,
 				     unsigned int val)
 {
+	struct intel_soc_pmic *pmic = context;
 	u8 ipc_in = val;
 	int ret;
 
-	ret = intel_scu_ipc_iowrite8(reg, ipc_in);
+	ret = intel_scu_ipc_dev_iowrite8(pmic->scu, reg, ipc_in);
 	if (ret)
 		return ret;
 
@@ -117,6 +119,10 @@ static int bcove_probe(struct platform_device *pdev)
 	if (!pmic)
 		return -ENOMEM;
 
+	pmic->scu = devm_intel_scu_ipc_dev_get(dev);
+	if (!pmic->scu)
+		return -ENOMEM;
+
 	platform_set_drvdata(pdev, pmic);
 	pmic->dev = &pdev->dev;
 
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index a1ed375fed37..71f795b510ce 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -241,7 +241,7 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * MEI requires to resume from runtime suspend mode
 	 * in order to perform link reset flow upon system suspend.
 	 */
-	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
 
 	/*
 	 * ME maps runtime suspend/resume to D0i states,
diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c
index beacf2a2f2b5..4bf26ce61044 100644
--- a/drivers/misc/mei/pci-txe.c
+++ b/drivers/misc/mei/pci-txe.c
@@ -128,7 +128,7 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * MEI requires to resume from runtime suspend mode
 	 * in order to perform link reset flow upon system suspend.
 	 */
-	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
 
 	/*
 	 * TXE maps runtime suspend/resume to own power gating states,
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index ce16d6b99295..2d8328d928d5 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -233,8 +233,6 @@ static int vmci_host_setup_notify(struct vmci_ctx *context,
 	 * about the size.
 	 */
 	BUILD_BUG_ON(sizeof(bool) != sizeof(u8));
-	if (!access_ok((void __user *)uva, sizeof(u8)))
-		return VMCI_ERROR_GENERIC;
 
 	/*
 	 * Lock physical page backing a given user VA.
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 74de3f2dda38..70207f11a654 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -93,6 +93,20 @@ mmc_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 			return retval;
 	}
 
+	if (card->type == MMC_TYPE_SDIO || card->type == MMC_TYPE_SD_COMBO) {
+		retval = add_uevent_var(env, "SDIO_ID=%04X:%04X",
+					card->cis.vendor, card->cis.device);
+		if (retval)
+			return retval;
+	}
+
+	/*
+	 * SDIO (non-combo) cards are not handled by mmc_block driver and do not
+	 * have accessible CID register which used by mmc_card_name() function.
+	 */
+	if (card->type == MMC_TYPE_SDIO)
+		return 0;
+
 	retval = add_uevent_var(env, "MMC_NAME=%s", mmc_card_name(card));
 	if (retval)
 		return retval;
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 4c5de6d37ac7..8d2b808e9b58 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1815,8 +1815,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
 	unsigned int rem, to = from + nr;
 	int err;
 
-	if (!(card->host->caps & MMC_CAP_ERASE) ||
-	    !(card->csd.cmdclass & CCC_ERASE))
+	if (!(card->csd.cmdclass & CCC_ERASE))
 		return -EOPNOTSUPP;
 
 	if (!card->erase_size)
@@ -1872,8 +1871,7 @@ EXPORT_SYMBOL(mmc_erase);
 
 int mmc_can_erase(struct mmc_card *card)
 {
-	if ((card->host->caps & MMC_CAP_ERASE) &&
-	    (card->csd.cmdclass & CCC_ERASE) && card->erase_size)
+	if (card->csd.cmdclass & CCC_ERASE && card->erase_size)
 		return 1;
 	return 0;
 }
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 09e0c7659469..9ec84c86c46a 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -219,7 +219,7 @@ static int mmc_clock_opt_set(void *data, u64 val)
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
+DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
 	"%llu\n");
 
 void mmc_add_host_debugfs(struct mmc_host *host)
@@ -232,8 +232,8 @@ void mmc_add_host_debugfs(struct mmc_host *host)
 	debugfs_create_file("ios", S_IRUSR, root, host, &mmc_ios_fops);
 	debugfs_create_x32("caps", S_IRUSR, root, &host->caps);
 	debugfs_create_x32("caps2", S_IRUSR, root, &host->caps2);
-	debugfs_create_file("clock", S_IRUSR | S_IWUSR, root, host,
-			    &mmc_clock_fops);
+	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
+				   &mmc_clock_fops);
 
 #ifdef CONFIG_FAIL_MMC_REQUEST
 	if (fail_request)
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index de94fbe629bd..4203303f946a 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -647,6 +647,9 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
 				 mmc_hostname(card->host),
 				 card->ext_csd.cmdq_depth);
 		}
+		card->ext_csd.enhanced_rpmb_supported =
+					(card->ext_csd.rel_param &
+					 EXT_CSD_WR_REL_PARAM_EN_RPMB_REL_WR);
 	}
 out:
 	return err;
@@ -786,6 +789,8 @@ MMC_DEV_ATTR(enhanced_area_offset, "%llu\n",
 		card->ext_csd.enhanced_area_offset);
 MMC_DEV_ATTR(enhanced_area_size, "%u\n", card->ext_csd.enhanced_area_size);
 MMC_DEV_ATTR(raw_rpmb_size_mult, "%#x\n", card->ext_csd.raw_rpmb_size_mult);
+MMC_DEV_ATTR(enhanced_rpmb_supported, "%#x\n",
+	card->ext_csd.enhanced_rpmb_supported);
 MMC_DEV_ATTR(rel_sectors, "%#x\n", card->ext_csd.rel_sectors);
 MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr);
 MMC_DEV_ATTR(rca, "0x%04x\n", card->rca);
@@ -843,6 +848,7 @@ static struct attribute *mmc_std_attrs[] = {
 	&dev_attr_enhanced_area_offset.attr,
 	&dev_attr_enhanced_area_size.attr,
 	&dev_attr_raw_rpmb_size_mult.attr,
+	&dev_attr_enhanced_rpmb_supported.attr,
 	&dev_attr_rel_sectors.attr,
 	&dev_attr_ocr.attr,
 	&dev_attr_rca.attr,
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index 3dba15bccce2..472fa2fdcf13 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -139,7 +139,7 @@ static const struct mmc_fixup sdio_fixup_methods[] = {
 	SDIO_FIXUP(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8797_F0,
 		   add_quirk, MMC_QUIRK_BROKEN_IRQ_POLLING),
 
-	SDIO_FIXUP(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8887WLAN,
+	SDIO_FIXUP(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8887_F0,
 		   add_limit_rate_quirk, 150000000),
 
 	END_FIXUP
diff --git a/drivers/mmc/core/regulator.c b/drivers/mmc/core/regulator.c
index b6febbcf8978..96b1d15045d6 100644
--- a/drivers/mmc/core/regulator.c
+++ b/drivers/mmc/core/regulator.c
@@ -136,6 +136,8 @@ static int mmc_regulator_set_voltage_if_supported(struct regulator *regulator,
 						  int min_uV, int target_uV,
 						  int max_uV)
 {
+	int current_uV;
+
 	/*
 	 * Check if supported first to avoid errors since we may try several
 	 * signal levels during power up and don't want to show errors.
@@ -143,6 +145,14 @@ static int mmc_regulator_set_voltage_if_supported(struct regulator *regulator,
 	if (!regulator_is_supported_voltage(regulator, min_uV, max_uV))
 		return -EINVAL;
 
+	/*
+	 * The voltage is already set, no need to switch.
+	 * Return 1 to indicate that no switch happened.
+	 */
+	current_uV = regulator_get_voltage(regulator);
+	if (current_uV == target_uV)
+		return 1;
+
 	return regulator_set_voltage_triplet(regulator, min_uV, target_uV,
 					     max_uV);
 }
@@ -198,9 +208,10 @@ int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios)
 		 * voltage in two steps and try to stay close to vmmc
 		 * with a 0.3V tolerance at first.
 		 */
-		if (!mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
-						min_uV, volt, max_uV))
-			return 0;
+		ret = mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
+							min_uV, volt, max_uV);
+		if (ret >= 0)
+			return ret;
 
 		return mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
 						2700000, volt, 3600000);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 76c7add367d5..5a2210c25aa7 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -376,11 +376,11 @@ int mmc_sd_switch_hs(struct mmc_card *card)
 	if (!status)
 		return -ENOMEM;
 
-	err = mmc_sd_switch(card, 1, 0, 1, status);
+	err = mmc_sd_switch(card, 1, 0, HIGH_SPEED_BUS_SPEED, status);
 	if (err)
 		goto out;
 
-	if ((status[16] & 0xF) != 1) {
+	if ((status[16] & 0xF) != HIGH_SPEED_BUS_SPEED) {
 		pr_warn("%s: Problem switching card into high-speed mode!\n",
 			mmc_hostname(card->host));
 		err = 0;
@@ -707,7 +707,12 @@ static ssize_t mmc_dsr_show(struct device *dev,
 
 static DEVICE_ATTR(dsr, S_IRUGO, mmc_dsr_show, NULL);
 
+MMC_DEV_ATTR(vendor, "0x%04x\n", card->cis.vendor);
+MMC_DEV_ATTR(device, "0x%04x\n", card->cis.device);
+
 static struct attribute *sd_std_attrs[] = {
+	&dev_attr_vendor.attr,
+	&dev_attr_device.attr,
 	&dev_attr_cid.attr,
 	&dev_attr_csd.attr,
 	&dev_attr_scr.attr,
@@ -726,7 +731,26 @@ static struct attribute *sd_std_attrs[] = {
 	&dev_attr_dsr.attr,
 	NULL,
 };
-ATTRIBUTE_GROUPS(sd_std);
+
+static umode_t sd_std_is_visible(struct kobject *kobj, struct attribute *attr,
+				 int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct mmc_card *card = mmc_dev_to_card(dev);
+
+	/* CIS vendor and device ids are available only for Combo cards */
+	if ((attr == &dev_attr_vendor.attr || attr == &dev_attr_device.attr) &&
+	    card->type != MMC_TYPE_SD_COMBO)
+		return 0;
+
+	return attr->mode;
+}
+
+static const struct attribute_group sd_std_group = {
+	.attrs = sd_std_attrs,
+	.is_visible = sd_std_is_visible,
+};
+__ATTRIBUTE_GROUPS(sd_std);
 
 struct device_type sd_type = {
 	.groups = sd_std_groups,
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index ebb387aa5158..b65b26f76d71 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -27,6 +27,24 @@
 #include "sdio_ops.h"
 #include "sdio_cis.h"
 
+MMC_DEV_ATTR(vendor, "0x%04x\n", card->cis.vendor);
+MMC_DEV_ATTR(device, "0x%04x\n", card->cis.device);
+MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr);
+MMC_DEV_ATTR(rca, "0x%04x\n", card->rca);
+
+static struct attribute *sdio_std_attrs[] = {
+	&dev_attr_vendor.attr,
+	&dev_attr_device.attr,
+	&dev_attr_ocr.attr,
+	&dev_attr_rca.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(sdio_std);
+
+static struct device_type sdio_type = {
+	.groups = sdio_std_groups,
+};
+
 static int sdio_read_fbr(struct sdio_func *func)
 {
 	int ret;
@@ -543,13 +561,33 @@ out:
 	return err;
 }
 
-static void mmc_sdio_resend_if_cond(struct mmc_host *host,
-				    struct mmc_card *card)
+static int mmc_sdio_pre_init(struct mmc_host *host, u32 ocr,
+			     struct mmc_card *card)
 {
+	if (card)
+		mmc_remove_card(card);
+
+	/*
+	 * Reset the card by performing the same steps that are taken by
+	 * mmc_rescan_try_freq() and mmc_attach_sdio() during a "normal" probe.
+	 *
+	 * sdio_reset() is technically not needed. Having just powered up the
+	 * hardware, it should already be in reset state. However, some
+	 * platforms (such as SD8686 on OLPC) do not instantly cut power,
+	 * meaning that a reset is required when restoring power soon after
+	 * powering off. It is harmless in other cases.
+	 *
+	 * The CMD5 reset (mmc_send_io_op_cond()), according to the SDIO spec,
+	 * is not necessary for non-removable cards. However, it is required
+	 * for OLPC SD8686 (which expects a [CMD5,5,3,7] init sequence), and
+	 * harmless in other situations.
+	 *
+	 */
+
 	sdio_reset(host);
 	mmc_go_idle(host);
-	mmc_send_if_cond(host, host->ocr_avail);
-	mmc_remove_card(card);
+	mmc_send_if_cond(host, ocr);
+	return mmc_send_io_op_cond(host, 0, NULL);
 }
 
 /*
@@ -584,7 +622,7 @@ try_again:
 	 */
 	err = mmc_send_io_op_cond(host, ocr, &rocr);
 	if (err)
-		goto err;
+		return err;
 
 	/*
 	 * For SPI, enable CRC as appropriate.
@@ -592,17 +630,15 @@ try_again:
 	if (mmc_host_is_spi(host)) {
 		err = mmc_spi_set_crc(host, use_spi_crc);
 		if (err)
-			goto err;
+			return err;
 	}
 
 	/*
 	 * Allocate card structure.
 	 */
-	card = mmc_alloc_card(host, NULL);
-	if (IS_ERR(card)) {
-		err = PTR_ERR(card);
-		goto err;
-	}
+	card = mmc_alloc_card(host, &sdio_type);
+	if (IS_ERR(card))
+		return PTR_ERR(card);
 
 	if ((rocr & R4_MEMORY_PRESENT) &&
 	    mmc_sd_get_cid(host, ocr & rocr, card->raw_cid, NULL) == 0) {
@@ -610,19 +646,15 @@ try_again:
 
 		if (oldcard && (oldcard->type != MMC_TYPE_SD_COMBO ||
 		    memcmp(card->raw_cid, oldcard->raw_cid, sizeof(card->raw_cid)) != 0)) {
-			mmc_remove_card(card);
-			pr_debug("%s: Perhaps the card was replaced\n",
-				mmc_hostname(host));
-			return -ENOENT;
+			err = -ENOENT;
+			goto mismatch;
 		}
 	} else {
 		card->type = MMC_TYPE_SDIO;
 
 		if (oldcard && oldcard->type != MMC_TYPE_SDIO) {
-			mmc_remove_card(card);
-			pr_debug("%s: Perhaps the card was replaced\n",
-				mmc_hostname(host));
-			return -ENOENT;
+			err = -ENOENT;
+			goto mismatch;
 		}
 	}
 
@@ -646,7 +678,7 @@ try_again:
 	if (rocr & ocr & R4_18V_PRESENT) {
 		err = mmc_set_uhs_voltage(host, ocr_card);
 		if (err == -EAGAIN) {
-			mmc_sdio_resend_if_cond(host, card);
+			mmc_sdio_pre_init(host, ocr_card, card);
 			retries--;
 			goto try_again;
 		} else if (err) {
@@ -677,7 +709,7 @@ try_again:
 	if (!oldcard && card->type == MMC_TYPE_SD_COMBO) {
 		err = mmc_sd_get_csd(host, card);
 		if (err)
-			return err;
+			goto remove;
 
 		mmc_decode_cid(card);
 	}
@@ -704,7 +736,12 @@ try_again:
 			mmc_set_timing(card->host, MMC_TIMING_SD_HS);
 		}
 
-		goto finish;
+		if (oldcard)
+			mmc_remove_card(card);
+		else
+			host->card = card;
+
+		return 0;
 	}
 
 	/*
@@ -713,14 +750,13 @@ try_again:
 	 */
 	err = sdio_read_cccr(card, ocr);
 	if (err) {
-		mmc_sdio_resend_if_cond(host, card);
+		mmc_sdio_pre_init(host, ocr_card, card);
 		if (ocr & R4_18V_PRESENT) {
 			/* Retry init sequence, but without R4_18V_PRESENT. */
 			retries = 0;
 			goto try_again;
-		} else {
-			goto remove;
 		}
+		return err;
 	}
 
 	/*
@@ -731,16 +767,14 @@ try_again:
 		goto remove;
 
 	if (oldcard) {
-		int same = (card->cis.vendor == oldcard->cis.vendor &&
-			    card->cis.device == oldcard->cis.device);
-		mmc_remove_card(card);
-		if (!same) {
-			pr_debug("%s: Perhaps the card was replaced\n",
-				mmc_hostname(host));
-			return -ENOENT;
+		if (card->cis.vendor == oldcard->cis.vendor &&
+		    card->cis.device == oldcard->cis.device) {
+			mmc_remove_card(card);
+			card = oldcard;
+		} else {
+			err = -ENOENT;
+			goto mismatch;
 		}
-
-		card = oldcard;
 	}
 	card->ocr = ocr_card;
 	mmc_fixup_device(card, sdio_fixup_methods);
@@ -801,16 +835,15 @@ try_again:
 		err = -EINVAL;
 		goto remove;
 	}
-finish:
-	if (!oldcard)
-		host->card = card;
+
+	host->card = card;
 	return 0;
 
+mismatch:
+	pr_debug("%s: Perhaps the card was replaced\n", mmc_hostname(host));
 remove:
-	if (!oldcard)
+	if (oldcard != card)
 		mmc_remove_card(card);
-
-err:
 	return err;
 }
 
@@ -818,28 +851,7 @@ static int mmc_sdio_reinit_card(struct mmc_host *host)
 {
 	int ret;
 
-	/*
-	 * Reset the card by performing the same steps that are taken by
-	 * mmc_rescan_try_freq() and mmc_attach_sdio() during a "normal" probe.
-	 *
-	 * sdio_reset() is technically not needed. Having just powered up the
-	 * hardware, it should already be in reset state. However, some
-	 * platforms (such as SD8686 on OLPC) do not instantly cut power,
-	 * meaning that a reset is required when restoring power soon after
-	 * powering off. It is harmless in other cases.
-	 *
-	 * The CMD5 reset (mmc_send_io_op_cond()), according to the SDIO spec,
-	 * is not necessary for non-removable cards. However, it is required
-	 * for OLPC SD8686 (which expects a [CMD5,5,3,7] init sequence), and
-	 * harmless in other situations.
-	 *
-	 */
-
-	sdio_reset(host);
-	mmc_go_idle(host);
-	mmc_send_if_cond(host, host->card->ocr);
-
-	ret = mmc_send_io_op_cond(host, 0, NULL);
+	ret = mmc_sdio_pre_init(host, host->card->ocr, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 462b5352fea7..0ce332ad986b 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -171,7 +171,7 @@ config MMC_SDHCI_OF_ASPEED
 config MMC_SDHCI_OF_AT91
 	tristate "SDHCI OF support for the Atmel SDMMC controller"
 	depends on MMC_SDHCI_PLTFM
-	depends on OF
+	depends on OF && HAVE_CLK
 	help
 	  This selects the Atmel SDMMC driver
 
@@ -235,6 +235,19 @@ config MMC_SDHCI_CNS3XXX
 
 	  If unsure, say N.
 
+config MMC_SDHCI_ESDHC_MCF
+	tristate "SDHCI support for the Freescale eSDHC ColdFire controller"
+	depends on M5441x
+	depends on MMC_SDHCI_PLTFM
+	select MMC_SDHCI_IO_ACCESSORS
+	help
+	  This selects the Freescale eSDHC controller support for
+	  ColdFire mcf5441x devices.
+
+	  If you have a controller with this interface, say Y or M here.
+
+	  If unsure, say N.
+
 config MMC_SDHCI_ESDHC_IMX
 	tristate "SDHCI support for the Freescale eSDHC/uSDHC i.MX controller"
 	depends on ARCH_MXC
@@ -405,6 +418,20 @@ config MMC_MESON_GX
 
 	  If you have a controller with this interface, say Y here.
 
+config MMC_MESON_MX_SDHC
+	tristate "Amlogic Meson SDHC Host Controller support"
+	depends on (ARM && ARCH_MESON) || COMPILE_TEST
+	depends on COMMON_CLK
+	depends on OF
+	help
+	  This selects support for the SDHC Host Controller on
+	  Amlogic Meson6, Meson8, Meson8b and Meson8m2 SoCs.
+	  The controller supports the SD/SDIO Spec 3.x and eMMC Spec 4.5x
+	  with 1, 4, and 8 bit bus widths.
+
+	  If you have a controller with this interface, say Y or M here.
+	  If unsure, say N.
+
 config MMC_MESON_MX_SDIO
 	tristate "Amlogic Meson6/Meson8/Meson8b SD/MMC Host Controller support"
 	depends on ARCH_MESON || COMPILE_TEST
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index b929ef941208..4d5bcb0144a0 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -68,6 +68,8 @@ obj-$(CONFIG_MMC_VUB300)	+= vub300.o
 obj-$(CONFIG_MMC_USHC)		+= ushc.o
 obj-$(CONFIG_MMC_WMT)		+= wmt-sdmmc.o
 obj-$(CONFIG_MMC_MESON_GX)	+= meson-gx-mmc.o
+meson-mx-sdhc-objs 		:= meson-mx-sdhc-clkc.o meson-mx-sdhc-mmc.o
+obj-$(CONFIG_MMC_MESON_MX_SDHC)	+= meson-mx-sdhc.o
 obj-$(CONFIG_MMC_MESON_MX_SDIO)	+= meson-mx-sdio.o
 obj-$(CONFIG_MMC_MOXART)	+= moxart-mmc.o
 obj-$(CONFIG_MMC_SUNXI)		+= sunxi-mmc.o
@@ -82,6 +84,7 @@ obj-$(CONFIG_MMC_REALTEK_USB)	+= rtsx_usb_sdmmc.o
 obj-$(CONFIG_MMC_SDHCI_PLTFM)		+= sdhci-pltfm.o
 obj-$(CONFIG_MMC_SDHCI_CADENCE)		+= sdhci-cadence.o
 obj-$(CONFIG_MMC_SDHCI_CNS3XXX)		+= sdhci-cns3xxx.o
+obj-$(CONFIG_MMC_SDHCI_ESDHC_MCF)       += sdhci-esdhc-mcf.o
 obj-$(CONFIG_MMC_SDHCI_ESDHC_IMX)	+= sdhci-esdhc-imx.o
 obj-$(CONFIG_MMC_SDHCI_DOVE)		+= sdhci-dove.o
 obj-$(CONFIG_MMC_SDHCI_TEGRA)		+= sdhci-tegra.o
diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c
index 914e17bab3be..ceb4924e02d0 100644
--- a/drivers/mmc/host/android-goldfish.c
+++ b/drivers/mmc/host/android-goldfish.c
@@ -27,7 +27,6 @@
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
 #include <linux/mmc/mmc.h>
-#include <linux/mmc/sdio.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
 
@@ -404,14 +403,6 @@ static void goldfish_mmc_request(struct mmc_host *mmc, struct mmc_request *req)
 	host->mrq = req;
 	goldfish_mmc_prepare_data(host, req);
 	goldfish_mmc_start_command(host, req->cmd);
-
-	/*
-	 * This is to avoid accidentally being detected as an SDIO card
-	 * in mmc_attach_sdio().
-	 */
-	if (req->cmd->opcode == SD_IO_SEND_OP_COND &&
-	    req->cmd->flags == (MMC_RSP_SPI_R4 | MMC_RSP_R4 | MMC_CMD_BCR))
-		req->cmd->error = -EINVAL;
 }
 
 static void goldfish_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
@@ -482,6 +473,7 @@ static int goldfish_mmc_probe(struct platform_device *pdev)
 	mmc->f_max = 24000000;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
+	mmc->caps2 = MMC_CAP2_NO_SDIO;
 
 	/* Use scatterlist DMA to reduce per-transfer costs.
 	 * NOTE max_seg_size assumption that small blocks aren't
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index aeaaa5314924..5cb692687698 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -169,6 +169,7 @@
 #define	atmci_writel(port, reg, value)			\
 	__raw_writel((value), (port)->regs + reg)
 
+#define ATMCI_CMD_TIMEOUT_MS	2000
 #define AUTOSUSPEND_DELAY	50
 
 #define ATMCI_DATA_ERROR_FLAGS	(ATMCI_DCRCE | ATMCI_DTOE | ATMCI_OVRE | ATMCI_UNRE)
@@ -808,6 +809,9 @@ static u32 atmci_prepare_command(struct mmc_host *mmc,
 static void atmci_send_command(struct atmel_mci *host,
 		struct mmc_command *cmd, u32 cmd_flags)
 {
+	unsigned int timeout_ms = cmd->busy_timeout ? cmd->busy_timeout :
+		ATMCI_CMD_TIMEOUT_MS;
+
 	WARN_ON(host->cmd);
 	host->cmd = cmd;
 
@@ -817,6 +821,8 @@ static void atmci_send_command(struct atmel_mci *host,
 
 	atmci_writel(host, ATMCI_ARGR, cmd->arg);
 	atmci_writel(host, ATMCI_CMDR, cmd_flags);
+
+	mod_timer(&host->timer, jiffies + msecs_to_jiffies(timeout_ms));
 }
 
 static void atmci_send_stop_cmd(struct atmel_mci *host, struct mmc_data *data)
@@ -1314,8 +1320,6 @@ static void atmci_start_request(struct atmel_mci *host,
 	 * prepared yet.)
 	 */
 	atmci_writel(host, ATMCI_IER, iflags);
-
-	mod_timer(&host->timer, jiffies +  msecs_to_jiffies(2000));
 }
 
 static void atmci_queue_request(struct atmel_mci *host,
@@ -1557,6 +1561,8 @@ static void atmci_request_end(struct atmel_mci *host, struct mmc_request *mrq)
 
 	WARN_ON(host->cmd || host->data);
 
+	del_timer(&host->timer);
+
 	/*
 	 * Update the MMC clock rate if necessary. This may be
 	 * necessary if set_ios() is called when a different slot is
@@ -1583,8 +1589,6 @@ static void atmci_request_end(struct atmel_mci *host, struct mmc_request *mrq)
 		host->state = STATE_IDLE;
 	}
 
-	del_timer(&host->timer);
-
 	spin_unlock(&host->lock);
 	mmc_request_done(prev_mmc, mrq);
 	spin_lock(&host->lock);
diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c
index 8823680ca42c..9bb1910268ca 100644
--- a/drivers/mmc/host/au1xmmc.c
+++ b/drivers/mmc/host/au1xmmc.c
@@ -259,7 +259,7 @@ static void au1xmmc_tasklet_finish(unsigned long param)
 	au1xmmc_finish_request(host);
 }
 
-static int au1xmmc_send_command(struct au1xmmc_host *host, int wait,
+static int au1xmmc_send_command(struct au1xmmc_host *host,
 				struct mmc_command *cmd, struct mmc_data *data)
 {
 	u32 mmccmd = (cmd->opcode << SD_CMD_CI_SHIFT);
@@ -302,9 +302,6 @@ static int au1xmmc_send_command(struct au1xmmc_host *host, int wait,
 	__raw_writel(cmd->arg, HOST_CMDARG(host));
 	wmb(); /* drain writebuffer */
 
-	if (wait)
-		IRQ_OFF(host, SD_CONFIG_CR);
-
 	__raw_writel((mmccmd | SD_CMD_GO), HOST_CMD(host));
 	wmb(); /* drain writebuffer */
 
@@ -312,19 +309,6 @@ static int au1xmmc_send_command(struct au1xmmc_host *host, int wait,
 	while (__raw_readl(HOST_CMD(host)) & SD_CMD_GO)
 		/* nop */;
 
-	/* Wait for the command to come back */
-	if (wait) {
-		u32 status = __raw_readl(HOST_STATUS(host));
-
-		while (!(status & SD_STATUS_CR))
-			status = __raw_readl(HOST_STATUS(host));
-
-		/* Clear the CR status */
-		__raw_writel(SD_STATUS_CR, HOST_STATUS(host));
-
-		IRQ_ON(host, SD_CONFIG_CR);
-	}
-
 	return 0;
 }
 
@@ -711,7 +695,7 @@ static void au1xmmc_request(struct mmc_host* mmc, struct mmc_request* mrq)
 	}
 
 	if (!ret)
-		ret = au1xmmc_send_command(host, 0, mrq->cmd, mrq->data);
+		ret = au1xmmc_send_command(host, mrq->cmd, mrq->data);
 
 	if (ret) {
 		mrq->cmd->error = ret;
diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
index c3d949847cbd..a0767790a826 100644
--- a/drivers/mmc/host/bcm2835.c
+++ b/drivers/mmc/host/bcm2835.c
@@ -1280,8 +1280,7 @@ static int bcm2835_add_host(struct bcm2835_host *host)
 
 	/* host controller capabilities */
 	mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED |
-		     MMC_CAP_NEEDS_POLL | MMC_CAP_HW_RESET | MMC_CAP_ERASE |
-		     MMC_CAP_CMD23;
+		     MMC_CAP_NEEDS_POLL | MMC_CAP_HW_RESET | MMC_CAP_CMD23;
 
 	spin_lock_init(&host->lock);
 	mutex_init(&host->mutex);
diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c
index 89deb451e0ac..c5da3aaee334 100644
--- a/drivers/mmc/host/cavium.c
+++ b/drivers/mmc/host/cavium.c
@@ -1038,8 +1038,7 @@ int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host)
 	 * Disable bounce buffers for max_segs = 1
 	 */
 	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
-		     MMC_CAP_ERASE | MMC_CAP_CMD23 | MMC_CAP_POWER_OFF_CARD |
-		     MMC_CAP_3_3V_DDR;
+		     MMC_CAP_CMD23 | MMC_CAP_POWER_OFF_CARD | MMC_CAP_3_3V_DDR;
 
 	if (host->use_sg)
 		mmc->max_segs = 16;
diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c
index e33270e40539..e84ed84ea4cc 100644
--- a/drivers/mmc/host/cb710-mmc.c
+++ b/drivers/mmc/host/cb710-mmc.c
@@ -10,6 +10,8 @@
 #include <linux/delay.h>
 #include "cb710-mmc.h"
 
+#define CB710_MMC_REQ_TIMEOUT_MS	2000
+
 static const u8 cb710_clock_divider_log2[8] = {
 /*	1, 2, 4, 8, 16, 32, 128, 512 */
 	0, 1, 2, 3,  4,  5,   7,   9
@@ -707,6 +709,12 @@ static int cb710_mmc_init(struct platform_device *pdev)
 	mmc->f_min = val >> cb710_clock_divider_log2[CB710_MAX_DIVIDER_IDX];
 	mmc->ocr_avail = MMC_VDD_32_33|MMC_VDD_33_34;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
+	/*
+	 * In cb710_wait_for_event() we use a fixed timeout of ~2s, hence let's
+	 * inform the core about it. A future improvement should instead make
+	 * use of the cmd->busy_timeout.
+	 */
+	mmc->max_busy_timeout = CB710_MMC_REQ_TIMEOUT_MS;
 
 	reader = mmc_priv(mmc);
 
diff --git a/drivers/mmc/host/dw_mmc-k3.c b/drivers/mmc/host/dw_mmc-k3.c
index 23b6f65b3785..50977ff18074 100644
--- a/drivers/mmc/host/dw_mmc-k3.c
+++ b/drivers/mmc/host/dw_mmc-k3.c
@@ -424,7 +424,7 @@ static int dw_mci_hi3660_switch_voltage(struct mmc_host *mmc,
 
 	if (!IS_ERR(mmc->supply.vqmmc)) {
 		ret = mmc_regulator_set_vqmmc(mmc, ios);
-		if (ret) {
+		if (ret < 0) {
 			dev_err(host->dev, "Regulator set error %d\n", ret);
 			return ret;
 		}
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index bc5278ab5707..35ae5737c622 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1546,8 +1546,7 @@ static int dw_mci_switch_voltage(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	if (!IS_ERR(mmc->supply.vqmmc)) {
 		ret = mmc_regulator_set_vqmmc(mmc, ios);
-
-		if (ret) {
+		if (ret < 0) {
 			dev_dbg(&mmc->class_dev,
 					 "Regulator set error %d - %s V\n",
 					 ret, uhs & v18 ? "1.8" : "3.3");
@@ -2752,12 +2751,6 @@ static int dw_mci_init_slot_caps(struct dw_mci_slot *slot)
 	if (host->pdata->caps)
 		mmc->caps = host->pdata->caps;
 
-	/*
-	 * Support MMC_CAP_ERASE by default.
-	 * It needs to use trim/discard/erase commands.
-	 */
-	mmc->caps |= MMC_CAP_ERASE;
-
 	if (host->pdata->pm_caps)
 		mmc->pm_caps = host->pdata->pm_caps;
 
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index fbae87d1f017..cba7a6fcd178 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -108,6 +108,7 @@
 #define	JZ_MMC_LPM_LOW_POWER_MODE_EN BIT(0)
 
 #define JZ_MMC_CLK_RATE 24000000
+#define JZ_MMC_REQ_TIMEOUT_MS 5000
 
 enum jz4740_mmc_version {
 	JZ_MMC_JZ4740,
@@ -440,7 +441,8 @@ static unsigned int jz4740_mmc_poll_irq(struct jz4740_mmc_host *host,
 
 	if (timeout == 0) {
 		set_bit(0, &host->waiting);
-		mod_timer(&host->timeout_timer, jiffies + 5*HZ);
+		mod_timer(&host->timeout_timer,
+			  jiffies + msecs_to_jiffies(JZ_MMC_REQ_TIMEOUT_MS));
 		jz4740_mmc_set_irq_enabled(host, irq, true);
 		return true;
 	}
@@ -893,7 +895,8 @@ static void jz4740_mmc_request(struct mmc_host *mmc, struct mmc_request *req)
 
 	host->state = JZ4740_MMC_STATE_READ_RESPONSE;
 	set_bit(0, &host->waiting);
-	mod_timer(&host->timeout_timer, jiffies + 5*HZ);
+	mod_timer(&host->timeout_timer,
+		  jiffies + msecs_to_jiffies(JZ_MMC_REQ_TIMEOUT_MS));
 	jz4740_mmc_send_command(host, req->cmd);
 }
 
@@ -1023,6 +1026,12 @@ static int jz4740_mmc_probe(struct platform_device* pdev)
 	mmc->f_min = mmc->f_max / 128;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
+	/*
+	 * We use a fixed timeout of 5s, hence inform the core about it. A
+	 * future improvement should instead respect the cmd->busy_timeout.
+	 */
+	mmc->max_busy_timeout = JZ_MMC_REQ_TIMEOUT_MS;
+
 	mmc->max_blk_size = (1 << 10) - 1;
 	mmc->max_blk_count = (1 << 15) - 1;
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 35400cf2a2e4..7eb38d7482c6 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -1004,6 +1004,8 @@ static int meson_mmc_card_busy(struct mmc_host *mmc)
 
 static int meson_mmc_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
 {
+	int ret;
+
 	/* vqmmc regulator is available */
 	if (!IS_ERR(mmc->supply.vqmmc)) {
 		/*
@@ -1013,7 +1015,8 @@ static int meson_mmc_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
 		 * to 1.8v. Please make sure the regulator framework is aware
 		 * of your own regulator constraints
 		 */
-		return mmc_regulator_set_vqmmc(mmc, ios);
+		ret = mmc_regulator_set_vqmmc(mmc, ios);
+		return ret < 0 ? ret : 0;
 	}
 
 	/* no vqmmc regulator, assume fixed regulator at 3/3.3V */
diff --git a/drivers/mmc/host/meson-mx-sdhc-clkc.c b/drivers/mmc/host/meson-mx-sdhc-clkc.c
new file mode 100644
index 000000000000..e1f29b279123
--- /dev/null
+++ b/drivers/mmc/host/meson-mx-sdhc-clkc.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Amlogic Meson SDHC clock controller
+ *
+ * Copyright (C) 2020 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+
+#include "meson-mx-sdhc.h"
+
+#define MESON_SDHC_NUM_BUILTIN_CLKS	6
+
+struct meson_mx_sdhc_clkc {
+	struct clk_mux			src_sel;
+	struct clk_divider		div;
+	struct clk_gate			mod_clk_en;
+	struct clk_gate			tx_clk_en;
+	struct clk_gate			rx_clk_en;
+	struct clk_gate			sd_clk_en;
+};
+
+static const struct clk_parent_data meson_mx_sdhc_src_sel_parents[4] = {
+	{ .fw_name = "clkin0" },
+	{ .fw_name = "clkin1" },
+	{ .fw_name = "clkin2" },
+	{ .fw_name = "clkin3" },
+};
+
+static const struct clk_div_table meson_mx_sdhc_div_table[] = {
+	{ .div = 6, .val = 5, },
+	{ .div = 8, .val = 7, },
+	{ .div = 9, .val = 8, },
+	{ .div = 10, .val = 9, },
+	{ .div = 12, .val = 11, },
+	{ .div = 16, .val = 15, },
+	{ .div = 18, .val = 17, },
+	{ .div = 34, .val = 33, },
+	{ .div = 142, .val = 141, },
+	{ .div = 850, .val = 849, },
+	{ .div = 2126, .val = 2125, },
+	{ .div = 4096, .val = 4095, },
+	{ /* sentinel */ }
+};
+
+static int meson_mx_sdhc_clk_hw_register(struct device *dev,
+					 const char *name_suffix,
+					 const struct clk_parent_data *parents,
+					 unsigned int num_parents,
+					 const struct clk_ops *ops,
+					 struct clk_hw *hw)
+{
+	struct clk_init_data init = { };
+	char clk_name[32];
+
+	snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dev),
+		 name_suffix);
+
+	init.name = clk_name;
+	init.ops = ops;
+	init.flags = CLK_SET_RATE_PARENT;
+	init.parent_data = parents;
+	init.num_parents = num_parents;
+
+	hw->init = &init;
+
+	return devm_clk_hw_register(dev, hw);
+}
+
+static int meson_mx_sdhc_gate_clk_hw_register(struct device *dev,
+					      const char *name_suffix,
+					      struct clk_hw *parent,
+					      struct clk_hw *hw)
+{
+	struct clk_parent_data parent_data = { .hw = parent };
+
+	return meson_mx_sdhc_clk_hw_register(dev, name_suffix, &parent_data, 1,
+					     &clk_gate_ops, hw);
+}
+
+int meson_mx_sdhc_register_clkc(struct device *dev, void __iomem *base,
+				struct clk_bulk_data *clk_bulk_data)
+{
+	struct clk_parent_data div_parent = { };
+	struct meson_mx_sdhc_clkc *clkc_data;
+	int ret;
+
+	clkc_data = devm_kzalloc(dev, sizeof(*clkc_data), GFP_KERNEL);
+	if (!clkc_data)
+		return -ENOMEM;
+
+	clkc_data->src_sel.reg = base + MESON_SDHC_CLKC;
+	clkc_data->src_sel.mask = 0x3;
+	clkc_data->src_sel.shift = 16;
+	ret = meson_mx_sdhc_clk_hw_register(dev, "src_sel",
+					    meson_mx_sdhc_src_sel_parents, 4,
+					    &clk_mux_ops,
+					    &clkc_data->src_sel.hw);
+	if (ret)
+		return ret;
+
+	clkc_data->div.reg = base + MESON_SDHC_CLKC;
+	clkc_data->div.shift = 0;
+	clkc_data->div.width = 12;
+	clkc_data->div.table = meson_mx_sdhc_div_table;
+	div_parent.hw = &clkc_data->src_sel.hw;
+	ret = meson_mx_sdhc_clk_hw_register(dev, "div", &div_parent, 1,
+					    &clk_divider_ops,
+					    &clkc_data->div.hw);
+	if (ret)
+		return ret;
+
+	clkc_data->mod_clk_en.reg = base + MESON_SDHC_CLKC;
+	clkc_data->mod_clk_en.bit_idx = 15;
+	ret = meson_mx_sdhc_gate_clk_hw_register(dev, "mod_clk_on",
+						 &clkc_data->div.hw,
+						 &clkc_data->mod_clk_en.hw);
+	if (ret)
+		return ret;
+
+	clkc_data->tx_clk_en.reg = base + MESON_SDHC_CLKC;
+	clkc_data->tx_clk_en.bit_idx = 14;
+	ret = meson_mx_sdhc_gate_clk_hw_register(dev, "tx_clk_on",
+						 &clkc_data->div.hw,
+						 &clkc_data->tx_clk_en.hw);
+	if (ret)
+		return ret;
+
+	clkc_data->rx_clk_en.reg = base + MESON_SDHC_CLKC;
+	clkc_data->rx_clk_en.bit_idx = 13;
+	ret = meson_mx_sdhc_gate_clk_hw_register(dev, "rx_clk_on",
+						 &clkc_data->div.hw,
+						 &clkc_data->rx_clk_en.hw);
+	if (ret)
+		return ret;
+
+	clkc_data->sd_clk_en.reg = base + MESON_SDHC_CLKC;
+	clkc_data->sd_clk_en.bit_idx = 12;
+	ret = meson_mx_sdhc_gate_clk_hw_register(dev, "sd_clk_on",
+						 &clkc_data->div.hw,
+						 &clkc_data->sd_clk_en.hw);
+	if (ret)
+		return ret;
+
+	/*
+	 * TODO: Replace clk_hw.clk with devm_clk_hw_get_clk() once that is
+	 * available.
+	 */
+	clk_bulk_data[0].clk = clkc_data->mod_clk_en.hw.clk;
+	clk_bulk_data[1].clk = clkc_data->sd_clk_en.hw.clk;
+	clk_bulk_data[2].clk = clkc_data->tx_clk_en.hw.clk;
+	clk_bulk_data[3].clk = clkc_data->rx_clk_en.hw.clk;
+
+	return 0;
+}
diff --git a/drivers/mmc/host/meson-mx-sdhc-mmc.c b/drivers/mmc/host/meson-mx-sdhc-mmc.c
new file mode 100644
index 000000000000..53e3f6a4245a
--- /dev/null
+++ b/drivers/mmc/host/meson-mx-sdhc-mmc.c
@@ -0,0 +1,914 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Amlogic Meson6/Meson8/Meson8b/Meson8m2 SDHC MMC host controller driver.
+ *
+ * Copyright (C) 2020 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/types.h>
+
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/sdio.h>
+#include <linux/mmc/slot-gpio.h>
+
+#include "meson-mx-sdhc.h"
+
+#define MESON_SDHC_NUM_BULK_CLKS				4
+#define MESON_SDHC_MAX_BLK_SIZE					512
+#define MESON_SDHC_NUM_TUNING_TRIES				10
+
+#define MESON_SDHC_WAIT_CMD_READY_SLEEP_US			1
+#define MESON_SDHC_WAIT_CMD_READY_TIMEOUT_US			100000
+#define MESON_SDHC_WAIT_BEFORE_SEND_SLEEP_US			1
+#define MESON_SDHC_WAIT_BEFORE_SEND_TIMEOUT_US			200
+
+struct meson_mx_sdhc_data {
+	void		(*init_hw)(struct mmc_host *mmc);
+	void		(*set_pdma)(struct mmc_host *mmc);
+	void		(*wait_before_send)(struct mmc_host *mmc);
+	bool		hardware_flush_all_cmds;
+};
+
+struct meson_mx_sdhc_host {
+	struct mmc_host			*mmc;
+
+	struct mmc_request		*mrq;
+	struct mmc_command		*cmd;
+	int				error;
+
+	struct regmap			*regmap;
+
+	struct clk			*pclk;
+	struct clk			*sd_clk;
+	struct clk_bulk_data		bulk_clks[MESON_SDHC_NUM_BULK_CLKS];
+	bool				bulk_clks_enabled;
+
+	const struct meson_mx_sdhc_data	*platform;
+};
+
+static const struct regmap_config meson_mx_sdhc_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = MESON_SDHC_CLK2,
+};
+
+static void meson_mx_sdhc_hw_reset(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+
+	regmap_write(host->regmap, MESON_SDHC_SRST, MESON_SDHC_SRST_MAIN_CTRL |
+		     MESON_SDHC_SRST_RXFIFO | MESON_SDHC_SRST_TXFIFO |
+		     MESON_SDHC_SRST_DPHY_RX | MESON_SDHC_SRST_DPHY_TX |
+		     MESON_SDHC_SRST_DMA_IF);
+	usleep_range(10, 100);
+
+	regmap_write(host->regmap, MESON_SDHC_SRST, 0);
+	usleep_range(10, 100);
+}
+
+static void meson_mx_sdhc_clear_fifo(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+	u32 stat;
+
+	regmap_read(host->regmap, MESON_SDHC_STAT, &stat);
+	if (!FIELD_GET(MESON_SDHC_STAT_RXFIFO_CNT, stat) &&
+	    !FIELD_GET(MESON_SDHC_STAT_TXFIFO_CNT, stat))
+		return;
+
+	regmap_write(host->regmap, MESON_SDHC_SRST, MESON_SDHC_SRST_RXFIFO |
+		     MESON_SDHC_SRST_TXFIFO | MESON_SDHC_SRST_MAIN_CTRL);
+	udelay(5);
+
+	regmap_read(host->regmap, MESON_SDHC_STAT, &stat);
+	if (FIELD_GET(MESON_SDHC_STAT_RXFIFO_CNT, stat) ||
+	    FIELD_GET(MESON_SDHC_STAT_TXFIFO_CNT, stat))
+		dev_warn(mmc_dev(host->mmc),
+			 "Failed to clear FIFOs, RX: %lu, TX: %lu\n",
+			 FIELD_GET(MESON_SDHC_STAT_RXFIFO_CNT, stat),
+			 FIELD_GET(MESON_SDHC_STAT_TXFIFO_CNT, stat));
+}
+
+static void meson_mx_sdhc_wait_cmd_ready(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+	u32 stat, esta;
+	int ret;
+
+	ret = regmap_read_poll_timeout(host->regmap, MESON_SDHC_STAT, stat,
+				       !(stat & MESON_SDHC_STAT_CMD_BUSY),
+				       MESON_SDHC_WAIT_CMD_READY_SLEEP_US,
+				       MESON_SDHC_WAIT_CMD_READY_TIMEOUT_US);
+	if (ret) {
+		dev_warn(mmc_dev(mmc),
+			 "Failed to poll for CMD_BUSY while processing CMD%d\n",
+			 host->cmd->opcode);
+		meson_mx_sdhc_hw_reset(mmc);
+	}
+
+	ret = regmap_read_poll_timeout(host->regmap, MESON_SDHC_ESTA, esta,
+				       !(esta & MESON_SDHC_ESTA_11_13),
+				       MESON_SDHC_WAIT_CMD_READY_SLEEP_US,
+				       MESON_SDHC_WAIT_CMD_READY_TIMEOUT_US);
+	if (ret) {
+		dev_warn(mmc_dev(mmc),
+			 "Failed to poll for ESTA[13:11] while processing CMD%d\n",
+			 host->cmd->opcode);
+		meson_mx_sdhc_hw_reset(mmc);
+	}
+}
+
+static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc,
+				    struct mmc_command *cmd)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+	u32 ictl, send;
+	int pack_len;
+
+	host->cmd = cmd;
+
+	ictl = MESON_SDHC_ICTL_DATA_TIMEOUT | MESON_SDHC_ICTL_DATA_ERR_CRC |
+	       MESON_SDHC_ICTL_RXFIFO_FULL | MESON_SDHC_ICTL_TXFIFO_EMPTY |
+	       MESON_SDHC_ICTL_RESP_TIMEOUT | MESON_SDHC_ICTL_RESP_ERR_CRC;
+
+	send = FIELD_PREP(MESON_SDHC_SEND_CMD_INDEX, cmd->opcode);
+
+	if (cmd->data) {
+		send |= MESON_SDHC_SEND_CMD_HAS_DATA;
+		send |= FIELD_PREP(MESON_SDHC_SEND_TOTAL_PACK,
+				   cmd->data->blocks - 1);
+
+		if (cmd->data->blksz < MESON_SDHC_MAX_BLK_SIZE)
+			pack_len = cmd->data->blksz;
+		else
+			pack_len = 0;
+
+		if (cmd->data->flags & MMC_DATA_WRITE)
+			send |= MESON_SDHC_SEND_DATA_DIR;
+
+		/*
+		 * If command with no data, just wait response done
+		 * interrupt(int[0]), and if command with data transfer, just
+		 * wait dma done interrupt(int[11]), don't need care about
+		 * dat0 busy or not.
+		 */
+		if (host->platform->hardware_flush_all_cmds ||
+		    cmd->data->flags & MMC_DATA_WRITE)
+			/* hardware flush: */
+			ictl |= MESON_SDHC_ICTL_DMA_DONE;
+		else
+			/* software flush: */
+			ictl |= MESON_SDHC_ICTL_DATA_XFER_OK;
+	} else {
+		pack_len = 0;
+
+		ictl |= MESON_SDHC_ICTL_RESP_OK;
+	}
+
+	if (cmd->opcode == MMC_STOP_TRANSMISSION)
+		send |= MESON_SDHC_SEND_DATA_STOP;
+
+	if (cmd->flags & MMC_RSP_PRESENT)
+		send |= MESON_SDHC_SEND_CMD_HAS_RESP;
+
+	if (cmd->flags & MMC_RSP_136) {
+		send |= MESON_SDHC_SEND_RESP_LEN;
+		send |= MESON_SDHC_SEND_RESP_NO_CRC;
+	}
+
+	if (!(cmd->flags & MMC_RSP_CRC))
+		send |= MESON_SDHC_SEND_RESP_NO_CRC;
+
+	if (cmd->flags & MMC_RSP_BUSY)
+		send |= MESON_SDHC_SEND_R1B;
+
+	/* enable the new IRQs and mask all pending ones */
+	regmap_write(host->regmap, MESON_SDHC_ICTL, ictl);
+	regmap_write(host->regmap, MESON_SDHC_ISTA, MESON_SDHC_ISTA_ALL_IRQS);
+
+	regmap_write(host->regmap, MESON_SDHC_ARGU, cmd->arg);
+
+	regmap_update_bits(host->regmap, MESON_SDHC_CTRL,
+			   MESON_SDHC_CTRL_PACK_LEN,
+			   FIELD_PREP(MESON_SDHC_CTRL_PACK_LEN, pack_len));
+
+	if (cmd->data)
+		regmap_write(host->regmap, MESON_SDHC_ADDR,
+			     sg_dma_address(cmd->data->sg));
+
+	meson_mx_sdhc_wait_cmd_ready(mmc);
+
+	if (cmd->data)
+		host->platform->set_pdma(mmc);
+
+	if (host->platform->wait_before_send)
+		host->platform->wait_before_send(mmc);
+
+	regmap_write(host->regmap, MESON_SDHC_SEND, send);
+}
+
+static void meson_mx_sdhc_disable_clks(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+
+	if (!host->bulk_clks_enabled)
+		return;
+
+	clk_bulk_disable_unprepare(MESON_SDHC_NUM_BULK_CLKS, host->bulk_clks);
+
+	host->bulk_clks_enabled = false;
+}
+
+static int meson_mx_sdhc_enable_clks(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+	int ret;
+
+	if (host->bulk_clks_enabled)
+		return 0;
+
+	ret = clk_bulk_prepare_enable(MESON_SDHC_NUM_BULK_CLKS,
+				      host->bulk_clks);
+	if (ret)
+		return ret;
+
+	host->bulk_clks_enabled = true;
+
+	return 0;
+}
+
+static int meson_mx_sdhc_set_clk(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+	u32 rx_clk_phase;
+	int ret;
+
+	meson_mx_sdhc_disable_clks(mmc);
+
+	if (ios->clock) {
+		ret = clk_set_rate(host->sd_clk, ios->clock);
+		if (ret) {
+			dev_warn(mmc_dev(mmc),
+				 "Failed to set MMC clock to %uHz: %d\n",
+				 ios->clock, host->error);
+			return ret;
+		}
+
+		ret = meson_mx_sdhc_enable_clks(mmc);
+		if (ret)
+			return ret;
+
+		mmc->actual_clock = clk_get_rate(host->sd_clk);
+
+		/*
+		 * according to Amlogic the following latching points are
+		 * selected with empirical values, there is no (known) formula
+		 * to calculate these.
+		 */
+		if (mmc->actual_clock > 100000000) {
+			rx_clk_phase = 1;
+		} else if (mmc->actual_clock > 45000000) {
+			if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_330)
+				rx_clk_phase = 15;
+			else
+				rx_clk_phase = 11;
+		} else if (mmc->actual_clock >= 25000000) {
+			rx_clk_phase = 15;
+		} else if (mmc->actual_clock > 5000000) {
+			rx_clk_phase = 23;
+		} else if (mmc->actual_clock > 1000000) {
+			rx_clk_phase = 55;
+		} else {
+			rx_clk_phase = 1061;
+		}
+
+		regmap_update_bits(host->regmap, MESON_SDHC_CLK2,
+				   MESON_SDHC_CLK2_RX_CLK_PHASE,
+				   FIELD_PREP(MESON_SDHC_CLK2_RX_CLK_PHASE,
+					      rx_clk_phase));
+	} else {
+		mmc->actual_clock = 0;
+	}
+
+	return 0;
+}
+
+static void meson_mx_sdhc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+	unsigned short vdd = ios->vdd;
+
+	switch (ios->power_mode) {
+	case MMC_POWER_OFF:
+		vdd = 0;
+		fallthrough;
+
+	case MMC_POWER_UP:
+		if (!IS_ERR(mmc->supply.vmmc)) {
+			host->error = mmc_regulator_set_ocr(mmc,
+							    mmc->supply.vmmc,
+							    vdd);
+			if (host->error)
+				return;
+		}
+
+		break;
+
+	case MMC_POWER_ON:
+		break;
+	}
+
+	host->error = meson_mx_sdhc_set_clk(mmc, ios);
+	if (host->error)
+		return;
+
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_1:
+		regmap_update_bits(host->regmap, MESON_SDHC_CTRL,
+				   MESON_SDHC_CTRL_DAT_TYPE,
+				   FIELD_PREP(MESON_SDHC_CTRL_DAT_TYPE, 0));
+		break;
+
+	case MMC_BUS_WIDTH_4:
+		regmap_update_bits(host->regmap, MESON_SDHC_CTRL,
+				   MESON_SDHC_CTRL_DAT_TYPE,
+				   FIELD_PREP(MESON_SDHC_CTRL_DAT_TYPE, 1));
+		break;
+
+	case MMC_BUS_WIDTH_8:
+		regmap_update_bits(host->regmap, MESON_SDHC_CTRL,
+				   MESON_SDHC_CTRL_DAT_TYPE,
+				   FIELD_PREP(MESON_SDHC_CTRL_DAT_TYPE, 2));
+		break;
+
+	default:
+		dev_err(mmc_dev(mmc), "unsupported bus width: %d\n",
+			ios->bus_width);
+		host->error = -EINVAL;
+		return;
+	}
+}
+
+static int meson_mx_sdhc_map_dma(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct mmc_data *data = mrq->data;
+	int dma_len;
+
+	if (!data)
+		return 0;
+
+	dma_len = dma_map_sg(mmc_dev(mmc), data->sg, data->sg_len,
+			     mmc_get_dma_dir(data));
+	if (dma_len <= 0) {
+		dev_err(mmc_dev(mmc), "dma_map_sg failed\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void meson_mx_sdhc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+	struct mmc_command *cmd = mrq->cmd;
+
+	if (!host->error)
+		host->error = meson_mx_sdhc_map_dma(mmc, mrq);
+
+	if (host->error) {
+		cmd->error = host->error;
+		mmc_request_done(mmc, mrq);
+		return;
+	}
+
+	host->mrq = mrq;
+
+	meson_mx_sdhc_start_cmd(mmc, mrq->cmd);
+}
+
+static int meson_mx_sdhc_card_busy(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+	u32 stat;
+
+	regmap_read(host->regmap, MESON_SDHC_STAT, &stat);
+	return FIELD_GET(MESON_SDHC_STAT_DAT3_0, stat) == 0;
+}
+
+static bool meson_mx_sdhc_tuning_point_matches(struct mmc_host *mmc,
+					       u32 opcode)
+{
+	unsigned int i, num_matches = 0;
+	int ret;
+
+	for (i = 0; i < MESON_SDHC_NUM_TUNING_TRIES; i++) {
+		ret = mmc_send_tuning(mmc, opcode, NULL);
+		if (!ret)
+			num_matches++;
+	}
+
+	return num_matches == MESON_SDHC_NUM_TUNING_TRIES;
+}
+
+static int meson_mx_sdhc_execute_tuning(struct mmc_host *mmc, u32 opcode)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+	int div, start, len, best_start, best_len;
+	int curr_phase, old_phase, new_phase;
+	u32 val;
+
+	len = 0;
+	start = 0;
+	best_len = 0;
+
+	regmap_read(host->regmap, MESON_SDHC_CLK2, &val);
+	old_phase = FIELD_GET(MESON_SDHC_CLK2_RX_CLK_PHASE, val);
+
+	regmap_read(host->regmap, MESON_SDHC_CLKC, &val);
+	div = FIELD_GET(MESON_SDHC_CLKC_CLK_DIV, val);
+
+	for (curr_phase = 0; curr_phase <= div; curr_phase++) {
+		regmap_update_bits(host->regmap, MESON_SDHC_CLK2,
+				   MESON_SDHC_CLK2_RX_CLK_PHASE,
+				   FIELD_PREP(MESON_SDHC_CLK2_RX_CLK_PHASE,
+					      curr_phase));
+
+		if (meson_mx_sdhc_tuning_point_matches(mmc, opcode)) {
+			if (!len) {
+				start = curr_phase;
+
+				dev_dbg(mmc_dev(mmc),
+					"New RX phase window starts at %u\n",
+					start);
+			}
+
+			len++;
+		} else {
+			if (len > best_len) {
+				best_start = start;
+				best_len = len;
+
+				dev_dbg(mmc_dev(mmc),
+					"New best RX phase window: %u - %u\n",
+					best_start, best_start + best_len);
+			}
+
+			/* reset the current window */
+			len = 0;
+		}
+	}
+
+	if (len > best_len)
+		/* the last window is the best (or possibly only) window */
+		new_phase = start + (len / 2);
+	else if (best_len)
+		/* there was a better window than the last */
+		new_phase = best_start + (best_len / 2);
+	else
+		/* no window was found at all, reset to the original phase */
+		new_phase = old_phase;
+
+	regmap_update_bits(host->regmap, MESON_SDHC_CLK2,
+			   MESON_SDHC_CLK2_RX_CLK_PHASE,
+			   FIELD_PREP(MESON_SDHC_CLK2_RX_CLK_PHASE,
+				      new_phase));
+
+	if (!len && !best_len)
+		return -EIO;
+
+	dev_dbg(mmc_dev(mmc), "Tuned RX clock phase to %u\n", new_phase);
+
+	return 0;
+}
+
+static const struct mmc_host_ops meson_mx_sdhc_ops = {
+	.hw_reset			= meson_mx_sdhc_hw_reset,
+	.request			= meson_mx_sdhc_request,
+	.set_ios			= meson_mx_sdhc_set_ios,
+	.card_busy			= meson_mx_sdhc_card_busy,
+	.execute_tuning			= meson_mx_sdhc_execute_tuning,
+	.get_cd				= mmc_gpio_get_cd,
+	.get_ro				= mmc_gpio_get_ro,
+};
+
+static void meson_mx_sdhc_request_done(struct meson_mx_sdhc_host *host)
+{
+	struct mmc_request *mrq = host->mrq;
+	struct mmc_host *mmc = host->mmc;
+
+	/* disable interrupts and mask all pending ones */
+	regmap_update_bits(host->regmap, MESON_SDHC_ICTL,
+			   MESON_SDHC_ICTL_ALL_IRQS, 0);
+	regmap_update_bits(host->regmap, MESON_SDHC_ISTA,
+			   MESON_SDHC_ISTA_ALL_IRQS, MESON_SDHC_ISTA_ALL_IRQS);
+
+	host->mrq = NULL;
+	host->cmd = NULL;
+
+	mmc_request_done(mmc, mrq);
+}
+
+static u32 meson_mx_sdhc_read_response(struct meson_mx_sdhc_host *host, u8 idx)
+{
+	u32 val;
+
+	regmap_update_bits(host->regmap, MESON_SDHC_PDMA,
+			   MESON_SDHC_PDMA_DMA_MODE, 0);
+
+	regmap_update_bits(host->regmap, MESON_SDHC_PDMA,
+			   MESON_SDHC_PDMA_PIO_RDRESP,
+			   FIELD_PREP(MESON_SDHC_PDMA_PIO_RDRESP, idx));
+
+	regmap_read(host->regmap, MESON_SDHC_ARGU, &val);
+
+	return val;
+}
+
+static irqreturn_t meson_mx_sdhc_irq(int irq, void *data)
+{
+	struct meson_mx_sdhc_host *host = data;
+	struct mmc_command *cmd = host->cmd;
+	u32 ictl, ista;
+
+	regmap_read(host->regmap, MESON_SDHC_ICTL, &ictl);
+	regmap_read(host->regmap, MESON_SDHC_ISTA, &ista);
+
+	if (!(ictl & ista))
+		return IRQ_NONE;
+
+	if (ista & MESON_SDHC_ISTA_RXFIFO_FULL ||
+	    ista & MESON_SDHC_ISTA_TXFIFO_EMPTY)
+		cmd->error = -EIO;
+	else if (ista & MESON_SDHC_ISTA_RESP_ERR_CRC)
+		cmd->error = -EILSEQ;
+	else if (ista & MESON_SDHC_ISTA_RESP_TIMEOUT)
+		cmd->error = -ETIMEDOUT;
+
+	if (cmd->data) {
+		if (ista & MESON_SDHC_ISTA_DATA_ERR_CRC)
+			cmd->data->error = -EILSEQ;
+		else if (ista & MESON_SDHC_ISTA_DATA_TIMEOUT)
+			cmd->data->error = -ETIMEDOUT;
+	}
+
+	if (cmd->error || (cmd->data && cmd->data->error))
+		dev_dbg(mmc_dev(host->mmc), "CMD%d error, ISTA: 0x%08x\n",
+			cmd->opcode, ista);
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t meson_mx_sdhc_irq_thread(int irq, void *irq_data)
+{
+	struct meson_mx_sdhc_host *host = irq_data;
+	struct mmc_command *cmd;
+	u32 val;
+
+	cmd = host->cmd;
+	if (WARN_ON(!cmd))
+		return IRQ_HANDLED;
+
+	if (cmd->data && !cmd->data->error) {
+		if (!host->platform->hardware_flush_all_cmds &&
+		    cmd->data->flags & MMC_DATA_READ) {
+			meson_mx_sdhc_wait_cmd_ready(host->mmc);
+
+			/*
+			 * If MESON_SDHC_PDMA_RXFIFO_MANUAL_FLUSH was
+			 * previously 0x1 then it has to be set to 0x3. If it
+			 * was 0x0 before then it has to be set to 0x2. Without
+			 * this reading SD cards sometimes transfers garbage,
+			 * which results in cards not being detected due to:
+			 *   unrecognised SCR structure version <random number>
+			 */
+			val = FIELD_PREP(MESON_SDHC_PDMA_RXFIFO_MANUAL_FLUSH,
+					 2);
+			regmap_update_bits(host->regmap, MESON_SDHC_PDMA, val,
+					   val);
+		}
+
+		dma_unmap_sg(mmc_dev(host->mmc), cmd->data->sg,
+			     cmd->data->sg_len, mmc_get_dma_dir(cmd->data));
+
+		cmd->data->bytes_xfered = cmd->data->blksz * cmd->data->blocks;
+	}
+
+	meson_mx_sdhc_wait_cmd_ready(host->mmc);
+
+	if (cmd->flags & MMC_RSP_136) {
+		cmd->resp[0] = meson_mx_sdhc_read_response(host, 4);
+		cmd->resp[1] = meson_mx_sdhc_read_response(host, 3);
+		cmd->resp[2] = meson_mx_sdhc_read_response(host, 2);
+		cmd->resp[3] = meson_mx_sdhc_read_response(host, 1);
+	} else {
+		cmd->resp[0] = meson_mx_sdhc_read_response(host, 0);
+	}
+
+	if (cmd->error == -EIO || cmd->error == -ETIMEDOUT)
+		meson_mx_sdhc_hw_reset(host->mmc);
+	else if (cmd->data)
+		/*
+		 * Clear the FIFOs after completing data transfers to prevent
+		 * corrupting data on write access. It's not clear why this is
+		 * needed (for reads and writes), but it mimics what the BSP
+		 * kernel did.
+		 */
+		meson_mx_sdhc_clear_fifo(host->mmc);
+
+	meson_mx_sdhc_request_done(host);
+
+	return IRQ_HANDLED;
+}
+
+static void meson_mx_sdhc_init_hw_meson8(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+
+	regmap_write(host->regmap, MESON_SDHC_MISC,
+		     FIELD_PREP(MESON_SDHC_MISC_TXSTART_THRES, 7) |
+		     FIELD_PREP(MESON_SDHC_MISC_WCRC_ERR_PATT, 5) |
+		     FIELD_PREP(MESON_SDHC_MISC_WCRC_OK_PATT, 2));
+
+	regmap_write(host->regmap, MESON_SDHC_ENHC,
+		     FIELD_PREP(MESON_SDHC_ENHC_RXFIFO_TH, 63) |
+		     MESON_SDHC_ENHC_MESON6_DMA_WR_RESP |
+		     FIELD_PREP(MESON_SDHC_ENHC_MESON6_RX_TIMEOUT, 255) |
+		     FIELD_PREP(MESON_SDHC_ENHC_SDIO_IRQ_PERIOD, 12));
+};
+
+static void meson_mx_sdhc_set_pdma_meson8(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+
+	if (host->cmd->data->flags & MMC_DATA_WRITE)
+		regmap_update_bits(host->regmap, MESON_SDHC_PDMA,
+				   MESON_SDHC_PDMA_DMA_MODE |
+				   MESON_SDHC_PDMA_RD_BURST |
+				   MESON_SDHC_PDMA_TXFIFO_FILL,
+				   MESON_SDHC_PDMA_DMA_MODE |
+				   FIELD_PREP(MESON_SDHC_PDMA_RD_BURST, 31) |
+				   MESON_SDHC_PDMA_TXFIFO_FILL);
+	else
+		regmap_update_bits(host->regmap, MESON_SDHC_PDMA,
+				   MESON_SDHC_PDMA_DMA_MODE |
+				   MESON_SDHC_PDMA_RXFIFO_MANUAL_FLUSH,
+				   MESON_SDHC_PDMA_DMA_MODE |
+				   FIELD_PREP(MESON_SDHC_PDMA_RXFIFO_MANUAL_FLUSH,
+					      1));
+
+	if (host->cmd->data->flags & MMC_DATA_WRITE)
+		regmap_update_bits(host->regmap, MESON_SDHC_PDMA,
+				   MESON_SDHC_PDMA_RD_BURST,
+				   FIELD_PREP(MESON_SDHC_PDMA_RD_BURST, 15));
+}
+
+static void meson_mx_sdhc_wait_before_send_meson8(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+	u32 val;
+	int ret;
+
+	ret = regmap_read_poll_timeout(host->regmap, MESON_SDHC_ESTA, val,
+				       val == 0,
+				       MESON_SDHC_WAIT_BEFORE_SEND_SLEEP_US,
+				       MESON_SDHC_WAIT_BEFORE_SEND_TIMEOUT_US);
+	if (ret)
+		dev_warn(mmc_dev(mmc),
+			 "Failed to wait for ESTA to clear: 0x%08x\n", val);
+
+	if (host->cmd->data && host->cmd->data->flags & MMC_DATA_WRITE) {
+		ret = regmap_read_poll_timeout(host->regmap, MESON_SDHC_STAT,
+					val, val & MESON_SDHC_STAT_TXFIFO_CNT,
+					MESON_SDHC_WAIT_BEFORE_SEND_SLEEP_US,
+					MESON_SDHC_WAIT_BEFORE_SEND_TIMEOUT_US);
+		if (ret)
+			dev_warn(mmc_dev(mmc),
+				 "Failed to wait for TX FIFO to fill\n");
+	}
+}
+
+static void meson_mx_sdhc_init_hw_meson8m2(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+
+	regmap_write(host->regmap, MESON_SDHC_MISC,
+		     FIELD_PREP(MESON_SDHC_MISC_TXSTART_THRES, 6) |
+		     FIELD_PREP(MESON_SDHC_MISC_WCRC_ERR_PATT, 5) |
+		     FIELD_PREP(MESON_SDHC_MISC_WCRC_OK_PATT, 2));
+
+	regmap_write(host->regmap, MESON_SDHC_ENHC,
+		     FIELD_PREP(MESON_SDHC_ENHC_RXFIFO_TH, 64) |
+		     FIELD_PREP(MESON_SDHC_ENHC_MESON8M2_DEBUG, 1) |
+		     MESON_SDHC_ENHC_MESON8M2_WRRSP_MODE |
+		     FIELD_PREP(MESON_SDHC_ENHC_SDIO_IRQ_PERIOD, 12));
+}
+
+static void meson_mx_sdhc_set_pdma_meson8m2(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+
+	regmap_update_bits(host->regmap, MESON_SDHC_PDMA,
+			   MESON_SDHC_PDMA_DMA_MODE, MESON_SDHC_PDMA_DMA_MODE);
+}
+
+static void meson_mx_sdhc_init_hw(struct mmc_host *mmc)
+{
+	struct meson_mx_sdhc_host *host = mmc_priv(mmc);
+
+	meson_mx_sdhc_hw_reset(mmc);
+
+	regmap_write(host->regmap, MESON_SDHC_CTRL,
+		     FIELD_PREP(MESON_SDHC_CTRL_RX_PERIOD, 0xf) |
+		     FIELD_PREP(MESON_SDHC_CTRL_RX_TIMEOUT, 0x7f) |
+		     FIELD_PREP(MESON_SDHC_CTRL_RX_ENDIAN, 0x7) |
+		     FIELD_PREP(MESON_SDHC_CTRL_TX_ENDIAN, 0x7));
+
+	/*
+	 * start with a valid divider and enable the memory (un-setting
+	 * MESON_SDHC_CLKC_MEM_PWR_OFF).
+	 */
+	regmap_write(host->regmap, MESON_SDHC_CLKC, MESON_SDHC_CLKC_CLK_DIV);
+
+	regmap_write(host->regmap, MESON_SDHC_CLK2,
+		     FIELD_PREP(MESON_SDHC_CLK2_SD_CLK_PHASE, 1));
+
+	regmap_write(host->regmap, MESON_SDHC_PDMA,
+		     MESON_SDHC_PDMA_DMA_URGENT |
+		     FIELD_PREP(MESON_SDHC_PDMA_WR_BURST, 7) |
+		     FIELD_PREP(MESON_SDHC_PDMA_TXFIFO_TH, 49) |
+		     FIELD_PREP(MESON_SDHC_PDMA_RD_BURST, 15) |
+		     FIELD_PREP(MESON_SDHC_PDMA_RXFIFO_TH, 7));
+
+	/* some initialization bits depend on the SoC: */
+	host->platform->init_hw(mmc);
+
+	/* disable and mask all interrupts: */
+	regmap_write(host->regmap, MESON_SDHC_ICTL, 0);
+	regmap_write(host->regmap, MESON_SDHC_ISTA, MESON_SDHC_ISTA_ALL_IRQS);
+}
+
+static int meson_mx_sdhc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct meson_mx_sdhc_host *host;
+	struct mmc_host *mmc;
+	void __iomem *base;
+	int ret, irq;
+
+	mmc = mmc_alloc_host(sizeof(*host), dev);
+	if (!mmc)
+		return -ENOMEM;
+
+	ret = devm_add_action_or_reset(dev, (void(*)(void *))mmc_free_host,
+				       mmc);
+	if (ret) {
+		dev_err(dev, "Failed to register mmc_free_host action\n");
+		return ret;
+	}
+
+	host = mmc_priv(mmc);
+	host->mmc = mmc;
+
+	platform_set_drvdata(pdev, host);
+
+	host->platform = device_get_match_data(dev);
+	if (!host->platform)
+		return -EINVAL;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	host->regmap = devm_regmap_init_mmio(dev, base,
+					     &meson_mx_sdhc_regmap_config);
+	if (IS_ERR(host->regmap))
+		return PTR_ERR(host->regmap);
+
+	host->pclk = devm_clk_get(dev, "pclk");
+	if (IS_ERR(host->pclk))
+		return PTR_ERR(host->pclk);
+
+	/* accessing any register requires the module clock to be enabled: */
+	ret = clk_prepare_enable(host->pclk);
+	if (ret) {
+		dev_err(dev, "Failed to enable 'pclk' clock\n");
+		return ret;
+	}
+
+	meson_mx_sdhc_init_hw(mmc);
+
+	ret = meson_mx_sdhc_register_clkc(dev, base, host->bulk_clks);
+	if (ret)
+		goto err_disable_pclk;
+
+	host->sd_clk = host->bulk_clks[1].clk;
+
+	/* Get regulators and the supported OCR mask */
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret)
+		goto err_disable_pclk;
+
+	mmc->max_req_size = SZ_128K;
+	mmc->max_seg_size = mmc->max_req_size;
+	mmc->max_blk_count = FIELD_GET(MESON_SDHC_SEND_TOTAL_PACK, ~0);
+	mmc->max_blk_size = MESON_SDHC_MAX_BLK_SIZE;
+	mmc->max_busy_timeout = 30 * MSEC_PER_SEC;
+	mmc->f_min = clk_round_rate(host->sd_clk, 1);
+	mmc->f_max = clk_round_rate(host->sd_clk, ULONG_MAX);
+	mmc->max_current_180 = 300;
+	mmc->max_current_330 = 300;
+	mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_HW_RESET;
+	mmc->ops = &meson_mx_sdhc_ops;
+
+	ret = mmc_of_parse(mmc);
+	if (ret)
+		goto err_disable_pclk;
+
+	irq = platform_get_irq(pdev, 0);
+	ret = devm_request_threaded_irq(dev, irq, meson_mx_sdhc_irq,
+					meson_mx_sdhc_irq_thread, IRQF_ONESHOT,
+					NULL, host);
+	if (ret)
+		goto err_disable_pclk;
+
+	ret = mmc_add_host(mmc);
+	if (ret)
+		goto err_disable_pclk;
+
+	return 0;
+
+err_disable_pclk:
+	clk_disable_unprepare(host->pclk);
+	return ret;
+}
+
+static int meson_mx_sdhc_remove(struct platform_device *pdev)
+{
+	struct meson_mx_sdhc_host *host = platform_get_drvdata(pdev);
+
+	mmc_remove_host(host->mmc);
+
+	meson_mx_sdhc_disable_clks(host->mmc);
+
+	clk_disable_unprepare(host->pclk);
+
+	return 0;
+}
+
+static const struct meson_mx_sdhc_data meson_mx_sdhc_data_meson8 = {
+	.init_hw			= meson_mx_sdhc_init_hw_meson8,
+	.set_pdma			= meson_mx_sdhc_set_pdma_meson8,
+	.wait_before_send		= meson_mx_sdhc_wait_before_send_meson8,
+	.hardware_flush_all_cmds	= false,
+};
+
+static const struct meson_mx_sdhc_data meson_mx_sdhc_data_meson8m2 = {
+	.init_hw			= meson_mx_sdhc_init_hw_meson8m2,
+	.set_pdma			= meson_mx_sdhc_set_pdma_meson8m2,
+	.hardware_flush_all_cmds	= true,
+};
+
+static const struct of_device_id meson_mx_sdhc_of_match[] = {
+	{
+		.compatible = "amlogic,meson8-sdhc",
+		.data = &meson_mx_sdhc_data_meson8
+	},
+	{
+		.compatible = "amlogic,meson8b-sdhc",
+		.data = &meson_mx_sdhc_data_meson8
+	},
+	{
+		.compatible = "amlogic,meson8m2-sdhc",
+		.data = &meson_mx_sdhc_data_meson8m2
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, meson_mx_sdhc_of_match);
+
+static struct platform_driver meson_mx_sdhc_driver = {
+	.probe   = meson_mx_sdhc_probe,
+	.remove  = meson_mx_sdhc_remove,
+	.driver  = {
+		.name = "meson-mx-sdhc",
+		.of_match_table = of_match_ptr(meson_mx_sdhc_of_match),
+	},
+};
+
+module_platform_driver(meson_mx_sdhc_driver);
+
+MODULE_DESCRIPTION("Meson6, Meson8, Meson8b and Meson8m2 SDHC Host Driver");
+MODULE_AUTHOR("Martin Blumenstingl <martin.blumenstingl@googlemail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/meson-mx-sdhc.h b/drivers/mmc/host/meson-mx-sdhc.h
new file mode 100644
index 000000000000..230e8fbe6b3f
--- /dev/null
+++ b/drivers/mmc/host/meson-mx-sdhc.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2020 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+ */
+
+#ifndef _MESON_MX_SDHC_H_
+#define _MESON_MX_SDHC_H_
+
+#include <linux/bitfield.h>
+
+#define MESON_SDHC_ARGU						0x00
+
+#define MESON_SDHC_SEND						0x04
+	#define MESON_SDHC_SEND_CMD_INDEX			GENMASK(5, 0)
+	#define MESON_SDHC_SEND_CMD_HAS_RESP			BIT(6)
+	#define MESON_SDHC_SEND_CMD_HAS_DATA			BIT(7)
+	#define MESON_SDHC_SEND_RESP_LEN			BIT(8)
+	#define MESON_SDHC_SEND_RESP_NO_CRC			BIT(9)
+	#define MESON_SDHC_SEND_DATA_DIR			BIT(10)
+	#define MESON_SDHC_SEND_DATA_STOP			BIT(11)
+	#define MESON_SDHC_SEND_R1B				BIT(12)
+	#define MESON_SDHC_SEND_TOTAL_PACK			GENMASK(31, 16)
+
+#define MESON_SDHC_CTRL						0x08
+	#define MESON_SDHC_CTRL_DAT_TYPE			GENMASK(1, 0)
+	#define MESON_SDHC_CTRL_DDR_MODE			BIT(2)
+	#define MESON_SDHC_CTRL_TX_CRC_NOCHECK			BIT(3)
+	#define MESON_SDHC_CTRL_PACK_LEN			GENMASK(12, 4)
+	#define MESON_SDHC_CTRL_RX_TIMEOUT			GENMASK(19, 13)
+	#define MESON_SDHC_CTRL_RX_PERIOD			GENMASK(23, 20)
+	#define MESON_SDHC_CTRL_RX_ENDIAN			GENMASK(26, 24)
+	#define MESON_SDHC_CTRL_SDIO_IRQ_MODE			BIT(27)
+	#define MESON_SDHC_CTRL_DAT0_IRQ_SEL			BIT(28)
+	#define MESON_SDHC_CTRL_TX_ENDIAN			GENMASK(31, 29)
+
+#define MESON_SDHC_STAT						0x0c
+	#define MESON_SDHC_STAT_CMD_BUSY			BIT(0)
+	#define MESON_SDHC_STAT_DAT3_0				GENMASK(4, 1)
+	#define MESON_SDHC_STAT_CMD				BIT(5)
+	#define MESON_SDHC_STAT_RXFIFO_CNT			GENMASK(12, 6)
+	#define MESON_SDHC_STAT_TXFIFO_CNT			GENMASK(19, 13)
+	#define MESON_SDHC_STAT_DAT7_4				GENMASK(23, 20)
+
+#define MESON_SDHC_CLKC						0x10
+	#define MESON_SDHC_CLKC_CLK_DIV				GENMASK(11, 0)
+	#define MESON_SDHC_CLKC_CLK_JIC				BIT(24)
+	#define MESON_SDHC_CLKC_MEM_PWR_OFF			GENMASK(26, 25)
+
+#define MESON_SDHC_ADDR						0x14
+
+#define MESON_SDHC_PDMA						0x18
+	#define MESON_SDHC_PDMA_DMA_MODE			BIT(0)
+	#define MESON_SDHC_PDMA_PIO_RDRESP			GENMASK(3, 1)
+	#define MESON_SDHC_PDMA_DMA_URGENT			BIT(4)
+	#define MESON_SDHC_PDMA_WR_BURST			GENMASK(9, 5)
+	#define MESON_SDHC_PDMA_RD_BURST			GENMASK(14, 10)
+	#define MESON_SDHC_PDMA_RXFIFO_TH			GENMASK(21, 15)
+	#define MESON_SDHC_PDMA_TXFIFO_TH			GENMASK(28, 22)
+	#define MESON_SDHC_PDMA_RXFIFO_MANUAL_FLUSH		GENMASK(30, 29)
+	#define MESON_SDHC_PDMA_TXFIFO_FILL			BIT(31)
+
+#define MESON_SDHC_MISC						0x1c
+	#define MESON_SDHC_MISC_WCRC_ERR_PATT			GENMASK(6, 4)
+	#define MESON_SDHC_MISC_WCRC_OK_PATT			GENMASK(9, 7)
+	#define MESON_SDHC_MISC_BURST_NUM			GENMASK(21, 16)
+	#define MESON_SDHC_MISC_THREAD_ID			GENMASK(27, 22)
+	#define MESON_SDHC_MISC_MANUAL_STOP			BIT(28)
+	#define MESON_SDHC_MISC_TXSTART_THRES			GENMASK(31, 29)
+
+#define MESON_SDHC_DATA						0x20
+
+#define MESON_SDHC_ICTL						0x24
+	#define MESON_SDHC_ICTL_RESP_OK				BIT(0)
+	#define MESON_SDHC_ICTL_RESP_TIMEOUT			BIT(1)
+	#define MESON_SDHC_ICTL_RESP_ERR_CRC			BIT(2)
+	#define MESON_SDHC_ICTL_RESP_OK_NOCLEAR			BIT(3)
+	#define MESON_SDHC_ICTL_DATA_1PACK_OK			BIT(4)
+	#define MESON_SDHC_ICTL_DATA_TIMEOUT			BIT(5)
+	#define MESON_SDHC_ICTL_DATA_ERR_CRC			BIT(6)
+	#define MESON_SDHC_ICTL_DATA_XFER_OK			BIT(7)
+	#define MESON_SDHC_ICTL_RX_HIGHER			BIT(8)
+	#define MESON_SDHC_ICTL_RX_LOWER			BIT(9)
+	#define MESON_SDHC_ICTL_DAT1_IRQ			BIT(10)
+	#define MESON_SDHC_ICTL_DMA_DONE			BIT(11)
+	#define MESON_SDHC_ICTL_RXFIFO_FULL			BIT(12)
+	#define MESON_SDHC_ICTL_TXFIFO_EMPTY			BIT(13)
+	#define MESON_SDHC_ICTL_ADDI_DAT1_IRQ			BIT(14)
+	#define MESON_SDHC_ICTL_ALL_IRQS			GENMASK(14, 0)
+	#define MESON_SDHC_ICTL_DAT1_IRQ_DELAY			GENMASK(17, 16)
+
+#define MESON_SDHC_ISTA						0x28
+	#define MESON_SDHC_ISTA_RESP_OK				BIT(0)
+	#define MESON_SDHC_ISTA_RESP_TIMEOUT			BIT(1)
+	#define MESON_SDHC_ISTA_RESP_ERR_CRC			BIT(2)
+	#define MESON_SDHC_ISTA_RESP_OK_NOCLEAR			BIT(3)
+	#define MESON_SDHC_ISTA_DATA_1PACK_OK			BIT(4)
+	#define MESON_SDHC_ISTA_DATA_TIMEOUT			BIT(5)
+	#define MESON_SDHC_ISTA_DATA_ERR_CRC			BIT(6)
+	#define MESON_SDHC_ISTA_DATA_XFER_OK			BIT(7)
+	#define MESON_SDHC_ISTA_RX_HIGHER			BIT(8)
+	#define MESON_SDHC_ISTA_RX_LOWER			BIT(9)
+	#define MESON_SDHC_ISTA_DAT1_IRQ			BIT(10)
+	#define MESON_SDHC_ISTA_DMA_DONE			BIT(11)
+	#define MESON_SDHC_ISTA_RXFIFO_FULL			BIT(12)
+	#define MESON_SDHC_ISTA_TXFIFO_EMPTY			BIT(13)
+	#define MESON_SDHC_ISTA_ADDI_DAT1_IRQ			BIT(14)
+	#define MESON_SDHC_ISTA_ALL_IRQS			GENMASK(14, 0)
+
+#define MESON_SDHC_SRST						0x2c
+	#define MESON_SDHC_SRST_MAIN_CTRL			BIT(0)
+	#define MESON_SDHC_SRST_RXFIFO				BIT(1)
+	#define MESON_SDHC_SRST_TXFIFO				BIT(2)
+	#define MESON_SDHC_SRST_DPHY_RX				BIT(3)
+	#define MESON_SDHC_SRST_DPHY_TX				BIT(4)
+	#define MESON_SDHC_SRST_DMA_IF				BIT(5)
+
+#define MESON_SDHC_ESTA						0x30
+	#define MESON_SDHC_ESTA_11_13				GENMASK(13, 11)
+
+#define MESON_SDHC_ENHC						0x34
+	#define MESON_SDHC_ENHC_MESON8M2_WRRSP_MODE		BIT(0)
+	#define MESON_SDHC_ENHC_MESON8M2_CHK_WRRSP		BIT(1)
+	#define MESON_SDHC_ENHC_MESON8M2_CHK_DMA		BIT(2)
+	#define MESON_SDHC_ENHC_MESON8M2_DEBUG			GENMASK(5, 3)
+	#define MESON_SDHC_ENHC_MESON6_RX_TIMEOUT		GENMASK(7, 0)
+	#define MESON_SDHC_ENHC_MESON6_DMA_RD_RESP		BIT(16)
+	#define MESON_SDHC_ENHC_MESON6_DMA_WR_RESP		BIT(17)
+	#define MESON_SDHC_ENHC_SDIO_IRQ_PERIOD			GENMASK(15, 8)
+	#define MESON_SDHC_ENHC_RXFIFO_TH			GENMASK(24, 18)
+	#define MESON_SDHC_ENHC_TXFIFO_TH			GENMASK(31, 25)
+
+#define MESON_SDHC_CLK2						0x38
+	#define MESON_SDHC_CLK2_RX_CLK_PHASE			GENMASK(11, 0)
+	#define MESON_SDHC_CLK2_SD_CLK_PHASE			GENMASK(23, 12)
+
+struct clk_bulk_data;
+
+int meson_mx_sdhc_register_clkc(struct device *dev, void __iomem *base,
+				struct clk_bulk_data *clk_bulk_data);
+
+#endif /* _MESON_MX_SDHC_H_ */
diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c
index 2e58743d83bb..9b2cf7afc246 100644
--- a/drivers/mmc/host/meson-mx-sdio.c
+++ b/drivers/mmc/host/meson-mx-sdio.c
@@ -246,6 +246,9 @@ static void meson_mx_mmc_request_done(struct meson_mx_mmc_host *host)
 
 	mrq = host->mrq;
 
+	if (host->cmd->error)
+		meson_mx_mmc_soft_reset(host);
+
 	host->mrq = NULL;
 	host->cmd = NULL;
 
@@ -561,7 +564,7 @@ static int meson_mx_mmc_add_host(struct meson_mx_mmc_host *host)
 	mmc->f_max = clk_round_rate(host->cfg_div_clk,
 				    clk_get_rate(host->parent_clk));
 
-	mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23 | MMC_CAP_WAIT_WHILE_BUSY;
+	mmc->caps |= MMC_CAP_CMD23 | MMC_CAP_WAIT_WHILE_BUSY;
 	mmc->ops = &meson_mx_mmc_ops;
 
 	ret = mmc_of_parse(mmc);
diff --git a/drivers/mmc/host/mmc_hsq.c b/drivers/mmc/host/mmc_hsq.c
index b90b2c97b6cf..a5e05ed0fda3 100644
--- a/drivers/mmc/host/mmc_hsq.c
+++ b/drivers/mmc/host/mmc_hsq.c
@@ -16,11 +16,20 @@
 #define HSQ_NUM_SLOTS	64
 #define HSQ_INVALID_TAG	HSQ_NUM_SLOTS
 
+static void mmc_hsq_retry_handler(struct work_struct *work)
+{
+	struct mmc_hsq *hsq = container_of(work, struct mmc_hsq, retry_work);
+	struct mmc_host *mmc = hsq->mmc;
+
+	mmc->ops->request(mmc, hsq->mrq);
+}
+
 static void mmc_hsq_pump_requests(struct mmc_hsq *hsq)
 {
 	struct mmc_host *mmc = hsq->mmc;
 	struct hsq_slot *slot;
 	unsigned long flags;
+	int ret = 0;
 
 	spin_lock_irqsave(&hsq->lock, flags);
 
@@ -42,7 +51,24 @@ static void mmc_hsq_pump_requests(struct mmc_hsq *hsq)
 
 	spin_unlock_irqrestore(&hsq->lock, flags);
 
-	mmc->ops->request(mmc, hsq->mrq);
+	if (mmc->ops->request_atomic)
+		ret = mmc->ops->request_atomic(mmc, hsq->mrq);
+	else
+		mmc->ops->request(mmc, hsq->mrq);
+
+	/*
+	 * If returning BUSY from request_atomic(), which means the card
+	 * may be busy now, and we should change to non-atomic context to
+	 * try again for this unusual case, to avoid time-consuming operations
+	 * in the atomic context.
+	 *
+	 * Note: we just give a warning for other error cases, since the host
+	 * driver will handle them.
+	 */
+	if (ret == -EBUSY)
+		schedule_work(&hsq->retry_work);
+	else
+		WARN_ON_ONCE(ret);
 }
 
 static void mmc_hsq_update_next_tag(struct mmc_hsq *hsq, int remains)
@@ -325,6 +351,7 @@ int mmc_hsq_init(struct mmc_hsq *hsq, struct mmc_host *mmc)
 	hsq->mmc->cqe_private = hsq;
 	mmc->cqe_ops = &mmc_hsq_ops;
 
+	INIT_WORK(&hsq->retry_work, mmc_hsq_retry_handler);
 	spin_lock_init(&hsq->lock);
 	init_waitqueue_head(&hsq->wait_queue);
 
diff --git a/drivers/mmc/host/mmc_hsq.h b/drivers/mmc/host/mmc_hsq.h
index 18b9cf55925f..ffdd9cd172c3 100644
--- a/drivers/mmc/host/mmc_hsq.h
+++ b/drivers/mmc/host/mmc_hsq.h
@@ -12,6 +12,7 @@ struct mmc_hsq {
 	wait_queue_head_t wait_queue;
 	struct hsq_slot *slot;
 	spinlock_t lock;
+	struct work_struct retry_work;
 
 	int next_tag;
 	int num_slots;
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 951f76dc1ddd..39bb1e30c2d7 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -77,14 +77,8 @@
 
 #define MMC_SPI_BLOCKSIZE	512
 
-
-/* These fixed timeouts come from the latest SD specs, which say to ignore
- * the CSD values.  The R1B value is for card erase (e.g. the "I forgot the
- * card's password" scenario); it's mostly applied to STOP_TRANSMISSION after
- * reads which takes nowhere near that long.  Older cards may be able to use
- * shorter timeouts ... but why bother?
- */
-#define r1b_timeout		(HZ * 3)
+#define MMC_SPI_R1B_TIMEOUT_MS	3000
+#define MMC_SPI_INIT_TIMEOUT_MS	3000
 
 /* One of the critical speed parameters is the amount of data which may
  * be transferred in one command. If this value is too low, the SD card
@@ -248,6 +242,7 @@ static char *maptype(struct mmc_command *cmd)
 static int mmc_spi_response_get(struct mmc_spi_host *host,
 		struct mmc_command *cmd, int cs_on)
 {
+	unsigned long timeout_ms;
 	u8	*cp = host->data->status;
 	u8	*end = cp + host->t.len;
 	int	value = 0;
@@ -346,8 +341,11 @@ checkstatus:
 		/* maybe we read all the busy tokens already */
 		while (cp < end && *cp == 0)
 			cp++;
-		if (cp == end)
-			mmc_spi_wait_unbusy(host, r1b_timeout);
+		if (cp == end) {
+			timeout_ms = cmd->busy_timeout ? cmd->busy_timeout :
+				MMC_SPI_R1B_TIMEOUT_MS;
+			mmc_spi_wait_unbusy(host, msecs_to_jiffies(timeout_ms));
+		}
 		break;
 
 	/* SPI R2 == R1 + second status byte; SEND_STATUS
@@ -1118,7 +1116,7 @@ static void mmc_spi_initsequence(struct mmc_spi_host *host)
 	/* Try to be very sure any previous command has completed;
 	 * wait till not-busy, skip debris from any old commands.
 	 */
-	mmc_spi_wait_unbusy(host, r1b_timeout);
+	mmc_spi_wait_unbusy(host, msecs_to_jiffies(MMC_SPI_INIT_TIMEOUT_MS));
 	mmc_spi_readbytes(host, 10);
 
 	/*
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 647567def612..a69d6a0c2e15 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1861,31 +1861,17 @@ static int mmci_get_cd(struct mmc_host *mmc)
 static int mmci_sig_volt_switch(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct mmci_host *host = mmc_priv(mmc);
-	int ret = 0;
-
-	if (!IS_ERR(mmc->supply.vqmmc)) {
+	int ret;
 
-		switch (ios->signal_voltage) {
-		case MMC_SIGNAL_VOLTAGE_330:
-			ret = regulator_set_voltage(mmc->supply.vqmmc,
-						2700000, 3600000);
-			break;
-		case MMC_SIGNAL_VOLTAGE_180:
-			ret = regulator_set_voltage(mmc->supply.vqmmc,
-						1700000, 1950000);
-			break;
-		case MMC_SIGNAL_VOLTAGE_120:
-			ret = regulator_set_voltage(mmc->supply.vqmmc,
-						1100000, 1300000);
-			break;
-		}
+	ret = mmc_regulator_set_vqmmc(mmc, ios);
 
-		if (!ret && host->ops && host->ops->post_sig_volt_switch)
-			ret = host->ops->post_sig_volt_switch(host, ios);
+	if (!ret && host->ops && host->ops->post_sig_volt_switch)
+		ret = host->ops->post_sig_volt_switch(host, ios);
+	else if (ret)
+		ret = 0;
 
-		if (ret)
-			dev_warn(mmc_dev(mmc), "Voltage switch failed\n");
-	}
+	if (ret < 0)
+		dev_warn(mmc_dev(mmc), "Voltage switch failed\n");
 
 	return ret;
 }
diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
index d33e62bd6153..51db30acf4dc 100644
--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
+++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
@@ -119,20 +119,19 @@ static void sdmmc_idma_unprep_data(struct mmci_host *host,
 static int sdmmc_idma_setup(struct mmci_host *host)
 {
 	struct sdmmc_idma *idma;
+	struct device *dev = mmc_dev(host->mmc);
 
-	idma = devm_kzalloc(mmc_dev(host->mmc), sizeof(*idma), GFP_KERNEL);
+	idma = devm_kzalloc(dev, sizeof(*idma), GFP_KERNEL);
 	if (!idma)
 		return -ENOMEM;
 
 	host->dma_priv = idma;
 
 	if (host->variant->dma_lli) {
-		idma->sg_cpu = dmam_alloc_coherent(mmc_dev(host->mmc),
-						   SDMMC_LLI_BUF_LEN,
+		idma->sg_cpu = dmam_alloc_coherent(dev, SDMMC_LLI_BUF_LEN,
 						   &idma->sg_dma, GFP_KERNEL);
 		if (!idma->sg_cpu) {
-			dev_err(mmc_dev(host->mmc),
-				"Failed to alloc IDMA descriptor\n");
+			dev_err(dev, "Failed to alloc IDMA descriptor\n");
 			return -ENOMEM;
 		}
 		host->mmc->max_segs = SDMMC_LLI_BUF_LEN /
@@ -143,7 +142,7 @@ static int sdmmc_idma_setup(struct mmci_host *host)
 		host->mmc->max_seg_size = host->mmc->max_req_size;
 	}
 
-	return 0;
+	return dma_set_max_seg_size(dev, host->mmc->max_seg_size);
 }
 
 static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
@@ -188,6 +187,9 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
 static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data)
 {
 	writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR);
+
+	if (!data->host_cookie)
+		sdmmc_idma_unprep_data(host, data, 0);
 }
 
 static void mmci_sdmmc_set_clkreg(struct mmci_host *host, unsigned int desired)
@@ -519,6 +521,7 @@ void sdmmc_variant_init(struct mmci_host *host)
 	struct sdmmc_dlyb *dlyb;
 
 	host->ops = &sdmmc_variant_ops;
+	host->pwr_reg = readl_relaxed(host->base + MMCIPOWER);
 
 	base_dlyb = devm_of_iomap(mmc_dev(host->mmc), np, 1, NULL);
 	if (IS_ERR(base_dlyb))
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index b221c02cc71f..39e7fc54c438 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -1369,7 +1369,7 @@ static void msdc_set_buswidth(struct msdc_host *host, u32 width)
 static int msdc_ops_switch_volt(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct msdc_host *host = mmc_priv(mmc);
-	int ret = 0;
+	int ret;
 
 	if (!IS_ERR(mmc->supply.vqmmc)) {
 		if (ios->signal_voltage != MMC_SIGNAL_VOLTAGE_330 &&
@@ -1379,18 +1379,19 @@ static int msdc_ops_switch_volt(struct mmc_host *mmc, struct mmc_ios *ios)
 		}
 
 		ret = mmc_regulator_set_vqmmc(mmc, ios);
-		if (ret) {
+		if (ret < 0) {
 			dev_dbg(host->dev, "Regulator set error %d (%d)\n",
 				ret, ios->signal_voltage);
-		} else {
-			/* Apply different pinctrl settings for different signal voltage */
-			if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180)
-				pinctrl_select_state(host->pinctrl, host->pins_uhs);
-			else
-				pinctrl_select_state(host->pinctrl, host->pins_default);
+			return ret;
 		}
+
+		/* Apply different pinctrl settings for different signal voltage */
+		if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180)
+			pinctrl_select_state(host->pinctrl, host->pins_uhs);
+		else
+			pinctrl_select_state(host->pinctrl, host->pins_default);
 	}
-	return ret;
+	return 0;
 }
 
 static int msdc_card_busy(struct mmc_host *mmc)
@@ -2325,7 +2326,7 @@ static int msdc_drv_probe(struct platform_device *pdev)
 	if (mmc->caps & MMC_CAP_SDIO_IRQ)
 		mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
 
-	mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23;
+	mmc->caps |= MMC_CAP_CMD23;
 	/* MMC core transfer sizes tunable parameters */
 	mmc->max_segs = MAX_BD_NUM;
 	if (host->dev_comp->support_64g)
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 203b61712601..cc0752a9df6d 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -752,8 +752,6 @@ static int mvsd_probe(struct platform_device *pdev)
 	if (maxfreq)
 		mmc->f_max = maxfreq;
 
-	mmc->caps |= MMC_CAP_ERASE;
-
 	spin_lock_init(&host->lock);
 
 	host->base = devm_platform_ioremap_resource(pdev, 0);
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index d82674aed447..b1820def36c0 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -634,8 +634,7 @@ static int mxs_mmc_probe(struct platform_device *pdev)
 	/* set mmc core parameters */
 	mmc->ops = &mxs_mmc_ops;
 	mmc->caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED |
-		    MMC_CAP_SDIO_IRQ | MMC_CAP_NEEDS_POLL | MMC_CAP_CMD23 |
-		    MMC_CAP_ERASE;
+		    MMC_CAP_SDIO_IRQ | MMC_CAP_NEEDS_POLL | MMC_CAP_CMD23;
 
 	host->broken_cd = of_property_read_bool(np, "broken-cd");
 
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index d74e73c95fdf..33d7af7c7762 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1244,7 +1244,7 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id)
 
 	mmc->caps = 0;
 	if (host->pdata->slots[id].wires >= 4)
-		mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_ERASE;
+		mmc->caps |= MMC_CAP_4_BIT_DATA;
 
 	mmc->ops = &mmc_omap_ops;
 	mmc->f_min = 400000;
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index a379c45b985c..37b8740513f5 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1922,7 +1922,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
 
 	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
-		     MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE | MMC_CAP_CMD23;
+		     MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_CMD23;
 
 	mmc->caps |= mmc_pdata(host)->caps;
 	if (mmc->caps & MMC_CAP_8_BIT_DATA)
diff --git a/drivers/mmc/host/owl-mmc.c b/drivers/mmc/host/owl-mmc.c
index 01ffe51f413d..5e20c099fe03 100644
--- a/drivers/mmc/host/owl-mmc.c
+++ b/drivers/mmc/host/owl-mmc.c
@@ -92,6 +92,8 @@
 #define OWL_SD_STATE_RC16ER		BIT(1)
 #define OWL_SD_STATE_CRC7ER		BIT(0)
 
+#define OWL_CMD_TIMEOUT_MS		30000
+
 struct owl_mmc_host {
 	struct device *dev;
 	struct reset_control *reset;
@@ -172,6 +174,7 @@ static void owl_mmc_send_cmd(struct owl_mmc_host *owl_host,
 			     struct mmc_command *cmd,
 			     struct mmc_data *data)
 {
+	unsigned long timeout;
 	u32 mode, state, resp[2];
 	u32 cmd_rsp_mask = 0;
 
@@ -239,7 +242,10 @@ static void owl_mmc_send_cmd(struct owl_mmc_host *owl_host,
 	if (data)
 		return;
 
-	if (!wait_for_completion_timeout(&owl_host->sdc_complete, 30 * HZ)) {
+	timeout = msecs_to_jiffies(cmd->busy_timeout ? cmd->busy_timeout :
+		OWL_CMD_TIMEOUT_MS);
+
+	if (!wait_for_completion_timeout(&owl_host->sdc_complete, timeout)) {
 		dev_err(owl_host->dev, "CMD interrupt timeout\n");
 		cmd->error = -ETIMEDOUT;
 		return;
diff --git a/drivers/mmc/host/renesas_sdhi.h b/drivers/mmc/host/renesas_sdhi.h
index 2a4c83a5f32e..14c64caefc64 100644
--- a/drivers/mmc/host/renesas_sdhi.h
+++ b/drivers/mmc/host/renesas_sdhi.h
@@ -36,6 +36,7 @@ struct renesas_sdhi_of_data {
 struct renesas_sdhi_quirks {
 	bool hs400_disabled;
 	bool hs400_4taps;
+	u32 hs400_bad_taps;
 };
 
 struct tmio_mmc_dma {
@@ -61,8 +62,10 @@ struct renesas_sdhi {
 
 	/* Tuning values: 1 for success, 0 for failure */
 	DECLARE_BITMAP(taps, BITS_PER_LONG);
+	/* Sampling data comparison: 1 for match, 0 for mismatch */
+	DECLARE_BITMAP(smpcmp, BITS_PER_LONG);
 	unsigned int tap_num;
-	unsigned long tap_set;
+	unsigned int tap_set;
 };
 
 #define host_to_priv(host) \
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index df826661366f..15e21894bd44 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_domain.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/mfd/tmio.h>
@@ -82,16 +83,11 @@ static int renesas_sdhi_clk_enable(struct tmio_mmc_host *host)
 {
 	struct mmc_host *mmc = host->mmc;
 	struct renesas_sdhi *priv = host_to_priv(host);
-	int ret = clk_prepare_enable(priv->clk);
-
-	if (ret < 0)
-		return ret;
+	int ret;
 
 	ret = clk_prepare_enable(priv->clk_cd);
-	if (ret < 0) {
-		clk_disable_unprepare(priv->clk);
+	if (ret < 0)
 		return ret;
-	}
 
 	/*
 	 * The clock driver may not know what maximum frequency
@@ -197,7 +193,6 @@ static void renesas_sdhi_clk_disable(struct tmio_mmc_host *host)
 {
 	struct renesas_sdhi *priv = host_to_priv(host);
 
-	clk_disable_unprepare(priv->clk);
 	clk_disable_unprepare(priv->clk_cd);
 }
 
@@ -237,7 +232,7 @@ static int renesas_sdhi_start_signal_voltage_switch(struct mmc_host *mmc,
 			MMC_SIGNAL_VOLTAGE_330 ? 0 : -EINVAL;
 
 	ret = mmc_regulator_set_vqmmc(host->mmc, ios);
-	if (ret)
+	if (ret < 0)
 		return ret;
 
 	return pinctrl_select_state(priv->pinctrl, pin_state);
@@ -325,6 +320,8 @@ static void renesas_sdhi_hs400_complete(struct mmc_host *mmc)
 {
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 	struct renesas_sdhi *priv = host_to_priv(host);
+	u32 bad_taps = priv->quirks ? priv->quirks->hs400_bad_taps : 0;
+	bool use_4tap = priv->quirks && priv->quirks->hs400_4taps;
 
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~CLK_CTL_SCLKEN &
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
@@ -352,10 +349,23 @@ static void renesas_sdhi_hs400_complete(struct mmc_host *mmc)
 		       SH_MOBILE_SDHI_SCC_DTCNTL_TAPEN |
 		       0x4 << SH_MOBILE_SDHI_SCC_DTCNTL_TAPNUM_SHIFT);
 
+	/* Avoid bad TAP */
+	if (bad_taps & BIT(priv->tap_set)) {
+		u32 new_tap = (priv->tap_set + 1) % priv->tap_num;
+
+		if (bad_taps & BIT(new_tap))
+			new_tap = (priv->tap_set - 1) % priv->tap_num;
 
-	if (priv->quirks && priv->quirks->hs400_4taps)
-		sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_TAPSET,
-			       priv->tap_set / 2);
+		if (bad_taps & BIT(new_tap)) {
+			new_tap = priv->tap_set;
+			dev_dbg(&host->pdev->dev, "Can't handle three bad tap in a row\n");
+		}
+
+		priv->tap_set = new_tap;
+	}
+
+	sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_TAPSET,
+		       priv->tap_set / (use_4tap ? 2 : 1));
 
 	sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_CKSEL,
 		       SH_MOBILE_SDHI_SCC_CKSEL_DTSEL |
@@ -422,20 +432,16 @@ static int renesas_sdhi_prepare_hs400_tuning(struct mmc_host *mmc, struct mmc_io
 	return 0;
 }
 
-#define SH_MOBILE_SDHI_MAX_TAP 3
+#define SH_MOBILE_SDHI_MIN_TAP_ROW 3
 
 static int renesas_sdhi_select_tuning(struct tmio_mmc_host *host)
 {
 	struct renesas_sdhi *priv = host_to_priv(host);
-	unsigned long tap_cnt;  /* counter of tuning success */
-	unsigned long tap_start;/* start position of tuning success */
-	unsigned long tap_end;  /* end position of tuning success */
-	unsigned long ntap;     /* temporary counter of tuning success */
-	unsigned long i;
+	unsigned int tap_start = 0, tap_end = 0, tap_cnt = 0, rs, re, i;
+	unsigned int taps_size = priv->tap_num * 2, min_tap_row;
+	unsigned long *bitmap;
 
 	priv->doing_tune = false;
-
-	/* Clear SCC_RVSREQ */
 	sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_RVSREQ, 0);
 
 	/*
@@ -443,42 +449,42 @@ static int renesas_sdhi_select_tuning(struct tmio_mmc_host *host)
 	 * result requiring the tap to be good in both runs before
 	 * considering it for tuning selection.
 	 */
-	for (i = 0; i < priv->tap_num * 2; i++) {
+	for (i = 0; i < taps_size; i++) {
 		int offset = priv->tap_num * (i < priv->tap_num ? 1 : -1);
 
 		if (!test_bit(i, priv->taps))
 			clear_bit(i + offset, priv->taps);
+
+		if (!test_bit(i, priv->smpcmp))
+			clear_bit(i + offset, priv->smpcmp);
 	}
 
 	/*
-	 * Find the longest consecutive run of successful probes.  If that
-	 * is more than SH_MOBILE_SDHI_MAX_TAP probes long then use the
-	 * center index as the tap.
+	 * If all TAP are OK, the sampling clock position is selected by
+	 * identifying the change point of data.
 	 */
-	tap_cnt = 0;
-	ntap = 0;
-	tap_start = 0;
-	tap_end = 0;
-	for (i = 0; i < priv->tap_num * 2; i++) {
-		if (test_bit(i, priv->taps)) {
-			ntap++;
-		} else {
-			if (ntap > tap_cnt) {
-				tap_start = i - ntap;
-				tap_end = i - 1;
-				tap_cnt = ntap;
-			}
-			ntap = 0;
-		}
+	if (bitmap_full(priv->taps, taps_size)) {
+		bitmap = priv->smpcmp;
+		min_tap_row = 1;
+	} else {
+		bitmap = priv->taps;
+		min_tap_row = SH_MOBILE_SDHI_MIN_TAP_ROW;
 	}
 
-	if (ntap > tap_cnt) {
-		tap_start = i - ntap;
-		tap_end = i - 1;
-		tap_cnt = ntap;
+	/*
+	 * Find the longest consecutive run of successful probes. If that
+	 * is at least SH_MOBILE_SDHI_MIN_TAP_ROW probes long then use the
+	 * center index as the tap, otherwise bail out.
+	 */
+	bitmap_for_each_set_region(bitmap, rs, re, 0, taps_size) {
+		if (re - rs > tap_cnt) {
+			tap_end = re;
+			tap_start = rs;
+			tap_cnt = tap_end - tap_start;
+		}
 	}
 
-	if (tap_cnt >= SH_MOBILE_SDHI_MAX_TAP)
+	if (tap_cnt >= min_tap_row)
 		priv->tap_set = (tap_start + tap_end) / 2 % priv->tap_num;
 	else
 		return -EIO;
@@ -511,6 +517,7 @@ static int renesas_sdhi_execute_tuning(struct tmio_mmc_host *host, u32 opcode)
 
 	priv->doing_tune = true;
 	bitmap_zero(priv->taps, priv->tap_num * 2);
+	bitmap_zero(priv->smpcmp, priv->tap_num * 2);
 
 	/* Issue CMD19 twice for each tap */
 	for (i = 0; i < 2 * priv->tap_num; i++) {
@@ -519,6 +526,9 @@ static int renesas_sdhi_execute_tuning(struct tmio_mmc_host *host, u32 opcode)
 
 		if (mmc_send_tuning(host->mmc, opcode, NULL) == 0)
 			set_bit(i, priv->taps);
+
+		if (sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_SMPCMP) == 0)
+			set_bit(i, priv->smpcmp);
 	}
 
 	return renesas_sdhi_select_tuning(host);
@@ -527,7 +537,7 @@ static int renesas_sdhi_execute_tuning(struct tmio_mmc_host *host, u32 opcode)
 static bool renesas_sdhi_manual_correction(struct tmio_mmc_host *host, bool use_4tap)
 {
 	struct renesas_sdhi *priv = host_to_priv(host);
-	unsigned long new_tap = priv->tap_set;
+	unsigned int new_tap = priv->tap_set, error_tap = priv->tap_set;
 	u32 val;
 
 	val = sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_RVSREQ);
@@ -539,20 +549,32 @@ static bool renesas_sdhi_manual_correction(struct tmio_mmc_host *host, bool use_
 	/* Change TAP position according to correction status */
 	if (sd_ctrl_read16(host, CTL_VERSION) == SDHI_VER_GEN3_SDMMC &&
 	    host->mmc->ios.timing == MMC_TIMING_MMC_HS400) {
+		u32 bad_taps = priv->quirks ? priv->quirks->hs400_bad_taps : 0;
 		/*
 		 * With HS400, the DAT signal is based on DS, not CLK.
 		 * Therefore, use only CMD status.
 		 */
 		u32 smpcmp = sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_SMPCMP) &
 					   SH_MOBILE_SDHI_SCC_SMPCMP_CMD_ERR;
-		if (!smpcmp)
+		if (!smpcmp) {
 			return false;	/* no error in CMD signal */
-		else if (smpcmp == SH_MOBILE_SDHI_SCC_SMPCMP_CMD_REQUP)
+		} else if (smpcmp == SH_MOBILE_SDHI_SCC_SMPCMP_CMD_REQUP) {
 			new_tap++;
-		else if (smpcmp == SH_MOBILE_SDHI_SCC_SMPCMP_CMD_REQDOWN)
+			error_tap--;
+		} else if (smpcmp == SH_MOBILE_SDHI_SCC_SMPCMP_CMD_REQDOWN) {
 			new_tap--;
-		else
+			error_tap++;
+		} else {
 			return true;	/* need retune */
+		}
+
+		/*
+		 * When new_tap is a bad tap, we cannot change. Then, we compare
+		 * with the HS200 tuning result. When smpcmp[error_tap] is OK,
+		 * we can at least retune.
+		 */
+		if (bad_taps & BIT(new_tap % priv->tap_num))
+			return test_bit(error_tap % priv->tap_num, priv->smpcmp);
 	} else {
 		if (val & SH_MOBILE_SDHI_SCC_RVSREQ_RVSERR)
 			return true;    /* need retune */
@@ -705,17 +727,35 @@ static const struct renesas_sdhi_quirks sdhi_quirks_4tap_nohs400 = {
 
 static const struct renesas_sdhi_quirks sdhi_quirks_4tap = {
 	.hs400_4taps = true,
+	.hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
 };
 
 static const struct renesas_sdhi_quirks sdhi_quirks_nohs400 = {
 	.hs400_disabled = true,
 };
 
+static const struct renesas_sdhi_quirks sdhi_quirks_bad_taps1357 = {
+	.hs400_bad_taps = BIT(1) | BIT(3) | BIT(5) | BIT(7),
+};
+
+static const struct renesas_sdhi_quirks sdhi_quirks_bad_taps2367 = {
+	.hs400_bad_taps = BIT(2) | BIT(3) | BIT(6) | BIT(7),
+};
+
+/*
+ * Note for r8a7796 / r8a774a1: we can't distinguish ES1.1 and 1.2 as of now.
+ * So, we want to treat them equally and only have a match for ES1.2 to enforce
+ * this if there ever will be a way to distinguish ES1.2.
+ */
 static const struct soc_device_attribute sdhi_quirks_match[]  = {
 	{ .soc_id = "r8a774a1", .revision = "ES1.[012]", .data = &sdhi_quirks_4tap_nohs400 },
 	{ .soc_id = "r8a7795", .revision = "ES1.*", .data = &sdhi_quirks_4tap_nohs400 },
 	{ .soc_id = "r8a7795", .revision = "ES2.0", .data = &sdhi_quirks_4tap },
+	{ .soc_id = "r8a7795", .revision = "ES3.*", .data = &sdhi_quirks_bad_taps2367 },
 	{ .soc_id = "r8a7796", .revision = "ES1.[012]", .data = &sdhi_quirks_4tap_nohs400 },
+	{ .soc_id = "r8a7796", .revision = "ES1.*", .data = &sdhi_quirks_4tap },
+	{ .soc_id = "r8a7796", .revision = "ES3.*", .data = &sdhi_quirks_bad_taps1357 },
+	{ .soc_id = "r8a77965", .data = &sdhi_quirks_bad_taps2367 },
 	{ .soc_id = "r8a77980", .data = &sdhi_quirks_nohs400 },
 	{ /* Sentinel. */ },
 };
@@ -860,6 +900,8 @@ int renesas_sdhi_probe(struct platform_device *pdev,
 	/* All SDHI have SDIO status bits which must be 1 */
 	mmc_data->flags |= TMIO_MMC_SDIO_STATUS_SETBITS;
 
+	dev_pm_domain_start(&pdev->dev);
+
 	ret = renesas_sdhi_clk_enable(host);
 	if (ret)
 		goto efree;
@@ -933,10 +975,8 @@ int renesas_sdhi_probe(struct platform_device *pdev,
 			goto eirq;
 	}
 
-	dev_info(&pdev->dev, "%s base at 0x%08lx max clock rate %u MHz\n",
-		 mmc_hostname(host->mmc), (unsigned long)
-		 (platform_get_resource(pdev, IORESOURCE_MEM, 0)->start),
-		 host->mmc->f_max / 1000000);
+	dev_info(&pdev->dev, "%s base at %pa, max clock rate %u MHz\n",
+		 mmc_hostname(host->mmc), &res->start, host->mmc->f_max / 1000000);
 
 	return ret;
 
diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 11087976ab19..5a71f6678fd3 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -1347,7 +1347,7 @@ static void realtek_init_host(struct realtek_pci_sdmmc *host)
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34 | MMC_VDD_165_195;
 	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SD_HIGHSPEED |
 		MMC_CAP_MMC_HIGHSPEED | MMC_CAP_BUS_WIDTH_TEST |
-		MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | MMC_CAP_ERASE;
+		MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25;
 	mmc->caps2 = MMC_CAP2_NO_PRESCAN_POWERUP | MMC_CAP2_FULL_PWR_CYCLE;
 	mmc->max_current_330 = 400;
 	mmc->max_current_180 = 800;
diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c
index 81d0dfe553a8..a7084c50ad65 100644
--- a/drivers/mmc/host/rtsx_usb_sdmmc.c
+++ b/drivers/mmc/host/rtsx_usb_sdmmc.c
@@ -1314,7 +1314,7 @@ static void rtsx_usb_init_host(struct rtsx_usb_sdmmc *host)
 	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SD_HIGHSPEED |
 		MMC_CAP_MMC_HIGHSPEED | MMC_CAP_BUS_WIDTH_TEST |
 		MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | MMC_CAP_UHS_SDR50 |
-		MMC_CAP_ERASE | MMC_CAP_SYNC_RUNTIME_PM;
+		MMC_CAP_SYNC_RUNTIME_PM;
 	mmc->caps2 = MMC_CAP2_NO_PRESCAN_POWERUP | MMC_CAP2_FULL_PWR_CYCLE |
 		MMC_CAP2_NO_SDIO;
 
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 1e616ae56b13..444b2769ae2c 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -958,13 +958,6 @@ static int s3cmci_setup_data(struct s3cmci_host *host, struct mmc_data *data)
 {
 	u32 dcon, imsk, stoptries = 3;
 
-	/* write DCON register */
-
-	if (!data) {
-		writel(0, host->base + S3C2410_SDIDCON);
-		return 0;
-	}
-
 	if ((data->blksz & 3) != 0) {
 		/* We cannot deal with unaligned blocks with more than
 		 * one block being transferred. */
diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c
index 6da6d4fb5edd..4a6c9ba82538 100644
--- a/drivers/mmc/host/sdhci-cadence.c
+++ b/drivers/mmc/host/sdhci-cadence.c
@@ -97,6 +97,11 @@ static int sdhci_cdns_write_phy_reg(struct sdhci_cdns_priv *priv,
 	u32 tmp;
 	int ret;
 
+	ret = readl_poll_timeout(reg, tmp, !(tmp & SDHCI_CDNS_HRS04_ACK),
+				 0, 10);
+	if (ret)
+		return ret;
+
 	tmp = FIELD_PREP(SDHCI_CDNS_HRS04_WDATA, data) |
 	      FIELD_PREP(SDHCI_CDNS_HRS04_ADDR, addr);
 	writel(tmp, reg);
@@ -111,7 +116,10 @@ static int sdhci_cdns_write_phy_reg(struct sdhci_cdns_priv *priv,
 	tmp &= ~SDHCI_CDNS_HRS04_WR;
 	writel(tmp, reg);
 
-	return 0;
+	ret = readl_poll_timeout(reg, tmp, !(tmp & SDHCI_CDNS_HRS04_ACK),
+				 0, 10);
+
+	return ret;
 }
 
 static unsigned int sdhci_cdns_phy_param_count(struct device_node *np)
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 5ec8e4bf1ac7..1d7f84b23a22 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -8,6 +8,7 @@
  *   Author: Wolfram Sang <kernel@pengutronix.de>
  */
 
+#include <linux/bitfield.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/delay.h>
@@ -89,7 +90,8 @@
 #define ESDHC_STD_TUNING_EN		(1 << 24)
 /* NOTE: the minimum valid tuning start tap for mx6sl is 1 */
 #define ESDHC_TUNING_START_TAP_DEFAULT	0x1
-#define ESDHC_TUNING_START_TAP_MASK	0xff
+#define ESDHC_TUNING_START_TAP_MASK	0x7f
+#define ESDHC_TUNING_CMD_CRC_CHECK_DISABLE	(1 << 7)
 #define ESDHC_TUNING_STEP_MASK		0x00070000
 #define ESDHC_TUNING_STEP_SHIFT		16
 
@@ -214,6 +216,7 @@ static const struct esdhc_soc_data usdhc_imx6sl_data = {
 static const struct esdhc_soc_data usdhc_imx6sll_data = {
 	.flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
 			| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
+			| ESDHC_FLAG_HS400
 			| ESDHC_FLAG_STATE_LOST_IN_LPMODE,
 };
 
@@ -399,7 +402,8 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg)
 				val = SDHCI_SUPPORT_DDR50 | SDHCI_SUPPORT_SDR104
 					| SDHCI_SUPPORT_SDR50
 					| SDHCI_USE_SDR50_TUNING
-					| (SDHCI_TUNING_MODE_3 << SDHCI_RETUNING_MODE_SHIFT);
+					| FIELD_PREP(SDHCI_RETUNING_MODE_MASK,
+						     SDHCI_TUNING_MODE_3);
 
 			if (imx_data->socdata->flags & ESDHC_FLAG_HS400)
 				val |= SDHCI_SUPPORT_HS400;
@@ -417,9 +421,9 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg)
 
 	if (unlikely(reg == SDHCI_MAX_CURRENT) && esdhc_is_usdhc(imx_data)) {
 		val = 0;
-		val |= 0xFF << SDHCI_MAX_CURRENT_330_SHIFT;
-		val |= 0xFF << SDHCI_MAX_CURRENT_300_SHIFT;
-		val |= 0xFF << SDHCI_MAX_CURRENT_180_SHIFT;
+		val |= FIELD_PREP(SDHCI_MAX_CURRENT_330_MASK, 0xFF);
+		val |= FIELD_PREP(SDHCI_MAX_CURRENT_300_MASK, 0xFF);
+		val |= FIELD_PREP(SDHCI_MAX_CURRENT_180_MASK, 0xFF);
 	}
 
 	if (unlikely(reg == SDHCI_INT_STATUS)) {
@@ -1313,6 +1317,18 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host)
 				tmp |= imx_data->boarddata.tuning_step
 					<< ESDHC_TUNING_STEP_SHIFT;
 			}
+
+			/* Disable the CMD CRC check for tuning, if not, need to
+			 * add some delay after every tuning command, because
+			 * hardware standard tuning logic will directly go to next
+			 * step once it detect the CMD CRC error, will not wait for
+			 * the card side to finally send out the tuning data, trigger
+			 * the buffer read ready interrupt immediately. If usdhc send
+			 * the next tuning command some eMMC card will stuck, can't
+			 * response, block the tuning procedure or the first command
+			 * after the whole tuning procedure always can't get any response.
+			 */
+			 tmp |= ESDHC_TUNING_CMD_CRC_CHECK_DISABLE;
 			writel(tmp, host->ioaddr + ESDHC_TUNING_CTRL);
 		} else if (imx_data->socdata->flags & ESDHC_FLAG_MAN_TUNING) {
 			/*
@@ -1596,6 +1612,10 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 	if (esdhc_is_usdhc(imx_data)) {
 		host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN;
 		host->mmc->caps |= MMC_CAP_1_8V_DDR | MMC_CAP_3_3V_DDR;
+
+		/* GPIO CD can be set as a wakeup source */
+		host->mmc->caps |= MMC_CAP_CD_WAKE;
+
 		if (!(imx_data->socdata->flags & ESDHC_FLAG_HS200))
 			host->quirks2 |= SDHCI_QUIRK2_BROKEN_HS200;
 
@@ -1653,8 +1673,6 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 	if (err)
 		goto disable_ahb_clk;
 
-	host->tuning_delay = 1;
-
 	sdhci_esdhc_imx_hwinit(host);
 
 	err = sdhci_add_host(host);
@@ -1731,8 +1749,14 @@ static int sdhci_esdhc_suspend(struct device *dev)
 		mmc_retune_needed(host->mmc);
 
 	ret = sdhci_suspend_host(host);
-	if (!ret)
-		return pinctrl_pm_select_sleep_state(dev);
+	if (ret)
+		return ret;
+
+	ret = pinctrl_pm_select_sleep_state(dev);
+	if (ret)
+		return ret;
+
+	ret = mmc_gpio_set_cd_wake(host->mmc, true);
 
 	return ret;
 }
@@ -1756,6 +1780,9 @@ static int sdhci_esdhc_resume(struct device *dev)
 	if (host->mmc->caps2 & MMC_CAP2_CQE)
 		ret = cqhci_resume(host->mmc);
 
+	if (!ret)
+		ret = mmc_gpio_set_cd_wake(host->mmc, false);
+
 	return ret;
 }
 #endif
diff --git a/drivers/mmc/host/sdhci-esdhc-mcf.c b/drivers/mmc/host/sdhci-esdhc-mcf.c
new file mode 100644
index 000000000000..71bf086a9812
--- /dev/null
+++ b/drivers/mmc/host/sdhci-esdhc-mcf.c
@@ -0,0 +1,521 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Freescale eSDHC ColdFire family controller driver, platform bus.
+ *
+ * Copyright (c) 2020 Timesys Corporation
+ *   Author: Angelo Dureghello <angelo.dureghello@timesys.it>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/platform_data/mmc-esdhc-mcf.h>
+#include <linux/mmc/mmc.h>
+#include "sdhci-pltfm.h"
+#include "sdhci-esdhc.h"
+
+#define	ESDHC_PROCTL_D3CD		0x08
+#define ESDHC_SYS_CTRL_DTOCV_MASK	0x0f
+#define ESDHC_DEFAULT_HOST_CONTROL	0x28
+
+/*
+ * Freescale eSDHC has DMA ERR flag at bit 28, not as std spec says, bit 25.
+ */
+#define ESDHC_INT_VENDOR_SPEC_DMA_ERR	BIT(28)
+
+struct pltfm_mcf_data {
+	struct clk *clk_ipg;
+	struct clk *clk_ahb;
+	struct clk *clk_per;
+	int aside;
+	int current_bus_width;
+};
+
+static inline void esdhc_mcf_buffer_swap32(u32 *buf, int len)
+{
+	int i;
+	u32 temp;
+
+	len = (len + 3) >> 2;
+
+	for (i = 0; i < len;  i++) {
+		temp = swab32(*buf);
+		*buf++ = temp;
+	}
+}
+
+static inline void esdhc_clrset_be(struct sdhci_host *host,
+				   u32 mask, u32 val, int reg)
+{
+	void __iomem *base = host->ioaddr + (reg & ~3);
+	u8 shift = (reg & 3) << 3;
+
+	mask <<= shift;
+	val <<= shift;
+
+	if (reg == SDHCI_HOST_CONTROL)
+		val |= ESDHC_PROCTL_D3CD;
+
+	writel((readl(base) & ~mask) | val, base);
+}
+
+/*
+ * Note: mcf is big-endian, single bytes need to be accessed at big endian
+ * offsets.
+ */
+static void esdhc_mcf_writeb_be(struct sdhci_host *host, u8 val, int reg)
+{
+	void __iomem *base = host->ioaddr + (reg & ~3);
+	u8 shift = (reg & 3) << 3;
+	u32 mask = ~(0xff << shift);
+
+	if (reg == SDHCI_HOST_CONTROL) {
+		u32 host_ctrl = ESDHC_DEFAULT_HOST_CONTROL;
+		u8 dma_bits = (val & SDHCI_CTRL_DMA_MASK) >> 3;
+		u8 tmp = readb(host->ioaddr + SDHCI_HOST_CONTROL + 1);
+
+		tmp &= ~0x03;
+		tmp |= dma_bits;
+
+		/*
+		 * Recomposition needed, restore always endianness and
+		 * keep D3CD and AI, just setting bus width.
+		 */
+		host_ctrl |= val;
+		host_ctrl |= (dma_bits << 8);
+		writel(host_ctrl, host->ioaddr + SDHCI_HOST_CONTROL);
+
+		return;
+	}
+
+	writel((readl(base) & mask) | (val << shift), base);
+}
+
+static void esdhc_mcf_writew_be(struct sdhci_host *host, u16 val, int reg)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct pltfm_mcf_data *mcf_data = sdhci_pltfm_priv(pltfm_host);
+	void __iomem *base = host->ioaddr + (reg & ~3);
+	u8 shift = (reg & 3) << 3;
+	u32 mask = ~(0xffff << shift);
+
+	switch (reg) {
+	case SDHCI_TRANSFER_MODE:
+		mcf_data->aside = val;
+		return;
+	case SDHCI_COMMAND:
+		if (host->cmd->opcode == MMC_STOP_TRANSMISSION)
+			val |= SDHCI_CMD_ABORTCMD;
+
+		/*
+		 * As for the fsl driver,
+		 * we have to set the mode in a single write here.
+		 */
+		writel(val << 16 | mcf_data->aside,
+		       host->ioaddr + SDHCI_TRANSFER_MODE);
+		return;
+	}
+
+	writel((readl(base) & mask) | (val << shift), base);
+}
+
+static void esdhc_mcf_writel_be(struct sdhci_host *host, u32 val, int reg)
+{
+	writel(val, host->ioaddr + reg);
+}
+
+static u8 esdhc_mcf_readb_be(struct sdhci_host *host, int reg)
+{
+	if (reg == SDHCI_HOST_CONTROL) {
+		u8 __iomem *base = host->ioaddr + (reg & ~3);
+		u16 val = readw(base + 2);
+		u8 dma_bits = (val >> 5) & SDHCI_CTRL_DMA_MASK;
+		u8 host_ctrl = val & 0xff;
+
+		host_ctrl &= ~SDHCI_CTRL_DMA_MASK;
+		host_ctrl |= dma_bits;
+
+		return host_ctrl;
+	}
+
+	return readb(host->ioaddr + (reg ^ 0x3));
+}
+
+static u16 esdhc_mcf_readw_be(struct sdhci_host *host, int reg)
+{
+	/*
+	 * For SDHCI_HOST_VERSION, sdhci specs defines 0xFE,
+	 * a wrong offset for us, we are at 0xFC.
+	 */
+	if (reg == SDHCI_HOST_VERSION)
+		reg -= 2;
+
+	return readw(host->ioaddr + (reg ^ 0x2));
+}
+
+static u32 esdhc_mcf_readl_be(struct sdhci_host *host, int reg)
+{
+	u32 val;
+
+	val = readl(host->ioaddr + reg);
+
+	/*
+	 * RM (25.3.9) sd pin clock must never exceed 25Mhz.
+	 * So forcing legacy mode at 25Mhz.
+	 */
+	if (unlikely(reg == SDHCI_CAPABILITIES))
+		val &= ~SDHCI_CAN_DO_HISPD;
+
+	if (unlikely(reg == SDHCI_INT_STATUS)) {
+		if (val & ESDHC_INT_VENDOR_SPEC_DMA_ERR) {
+			val &= ~ESDHC_INT_VENDOR_SPEC_DMA_ERR;
+			val |= SDHCI_INT_ADMA_ERROR;
+		}
+	}
+
+	return val;
+}
+
+static unsigned int esdhc_mcf_get_max_timeout_count(struct sdhci_host *host)
+{
+	return 1 << 27;
+}
+
+static void esdhc_mcf_set_timeout(struct sdhci_host *host,
+				  struct mmc_command *cmd)
+{
+	/* Use maximum timeout counter */
+	esdhc_clrset_be(host, ESDHC_SYS_CTRL_DTOCV_MASK, 0xE,
+			SDHCI_TIMEOUT_CONTROL);
+}
+
+static void esdhc_mcf_reset(struct sdhci_host *host, u8 mask)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct pltfm_mcf_data *mcf_data = sdhci_pltfm_priv(pltfm_host);
+
+	sdhci_reset(host, mask);
+
+	esdhc_clrset_be(host, ESDHC_CTRL_BUSWIDTH_MASK,
+			mcf_data->current_bus_width, SDHCI_HOST_CONTROL);
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+}
+
+static unsigned int esdhc_mcf_pltfm_get_max_clock(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+	return pltfm_host->clock;
+}
+
+static unsigned int esdhc_mcf_pltfm_get_min_clock(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+	return pltfm_host->clock / 256 / 16;
+}
+
+static void esdhc_mcf_pltfm_set_clock(struct sdhci_host *host,
+				      unsigned int clock)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	unsigned long *pll_dr = (unsigned long *)MCF_PLL_DR;
+	u32 fvco, fsys, fesdhc, temp;
+	const int sdclkfs[] = {2, 4, 8, 16, 32, 64, 128, 256};
+	int delta, old_delta = clock;
+	int i, q, ri, rq;
+
+	if (clock == 0) {
+		host->mmc->actual_clock = 0;
+		return;
+	}
+
+	/*
+	 * ColdFire eSDHC clock.s
+	 *
+	 * pll -+-> / outdiv1 --> fsys
+	 *      +-> / outdiv3 --> eSDHC clock ---> / SDCCLKFS / DVS
+	 *
+	 * mcf5441x datasheet says:
+	 * (8.1.2) eSDHC should be 40 MHz max
+	 * (25.3.9) eSDHC input is, as example, 96 Mhz ...
+	 * (25.3.9) sd pin clock must never exceed 25Mhz
+	 *
+	 * fvco = fsys * outdvi1 + 1
+	 * fshdc = fvco / outdiv3 + 1
+	 */
+	temp = readl(pll_dr);
+	fsys = pltfm_host->clock;
+	fvco = fsys * ((temp & 0x1f) + 1);
+	fesdhc = fvco / (((temp >> 10) & 0x1f) + 1);
+
+	for (i = 0; i < 8; ++i) {
+		int result = fesdhc / sdclkfs[i];
+
+		for (q = 1; q < 17; ++q) {
+			int finale = result / q;
+
+			delta = abs(clock - finale);
+
+			if (delta < old_delta) {
+				old_delta = delta;
+				ri = i;
+				rq = q;
+			}
+		}
+	}
+
+	/*
+	 * Apply divisors and re-enable all the clocks
+	 */
+	temp = ((sdclkfs[ri] >> 1) << 8) | ((rq - 1) << 4) |
+		(ESDHC_CLOCK_IPGEN | ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN);
+	esdhc_clrset_be(host, 0x0000fff7, temp, SDHCI_CLOCK_CONTROL);
+
+	host->mmc->actual_clock = clock;
+
+	mdelay(1);
+}
+
+static void esdhc_mcf_pltfm_set_bus_width(struct sdhci_host *host, int width)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct pltfm_mcf_data *mcf_data = sdhci_pltfm_priv(pltfm_host);
+
+	switch (width) {
+	case MMC_BUS_WIDTH_4:
+		mcf_data->current_bus_width = ESDHC_CTRL_4BITBUS;
+		break;
+	default:
+		mcf_data->current_bus_width = 0;
+		break;
+	}
+
+	esdhc_clrset_be(host, ESDHC_CTRL_BUSWIDTH_MASK,
+			mcf_data->current_bus_width, SDHCI_HOST_CONTROL);
+}
+
+static void esdhc_mcf_request_done(struct sdhci_host *host,
+				   struct mmc_request *mrq)
+{
+	struct scatterlist *sg;
+	u32 *buffer;
+	int i;
+
+	if (!mrq->data || !mrq->data->bytes_xfered)
+		goto exit_done;
+
+	if (mmc_get_dma_dir(mrq->data) != DMA_FROM_DEVICE)
+		goto exit_done;
+
+	/*
+	 * On mcf5441x there is no hw sdma option/flag to select the dma
+	 * transfer endiannes. A swap after the transfer is needed.
+	 */
+	for_each_sg(mrq->data->sg, sg, mrq->data->sg_len, i) {
+		buffer = (u32 *)sg_virt(sg);
+		esdhc_mcf_buffer_swap32(buffer, sg->length);
+	}
+
+exit_done:
+	mmc_request_done(host->mmc, mrq);
+}
+
+static void esdhc_mcf_copy_to_bounce_buffer(struct sdhci_host *host,
+					    struct mmc_data *data,
+					    unsigned int length)
+{
+	sg_copy_to_buffer(data->sg, data->sg_len,
+			  host->bounce_buffer, length);
+
+	esdhc_mcf_buffer_swap32((u32 *)host->bounce_buffer,
+				data->blksz * data->blocks);
+}
+
+static struct sdhci_ops sdhci_esdhc_ops = {
+	.reset = esdhc_mcf_reset,
+	.set_clock = esdhc_mcf_pltfm_set_clock,
+	.get_max_clock = esdhc_mcf_pltfm_get_max_clock,
+	.get_min_clock = esdhc_mcf_pltfm_get_min_clock,
+	.set_bus_width = esdhc_mcf_pltfm_set_bus_width,
+	.get_max_timeout_count = esdhc_mcf_get_max_timeout_count,
+	.set_timeout = esdhc_mcf_set_timeout,
+	.write_b = esdhc_mcf_writeb_be,
+	.write_w = esdhc_mcf_writew_be,
+	.write_l = esdhc_mcf_writel_be,
+	.read_b = esdhc_mcf_readb_be,
+	.read_w = esdhc_mcf_readw_be,
+	.read_l = esdhc_mcf_readl_be,
+	.copy_to_bounce_buffer = esdhc_mcf_copy_to_bounce_buffer,
+	.request_done = esdhc_mcf_request_done,
+};
+
+static const struct sdhci_pltfm_data sdhci_esdhc_mcf_pdata = {
+	.ops = &sdhci_esdhc_ops,
+	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_FORCE_DMA,
+		 /*
+		  * Mandatory quirk,
+		  * controller does not support cmd23,
+		  * without, on > 8G cards cmd23 is used, and
+		  * driver times out.
+		  */
+		  SDHCI_QUIRK2_HOST_NO_CMD23,
+};
+
+static int esdhc_mcf_plat_init(struct sdhci_host *host,
+			       struct pltfm_mcf_data *mcf_data)
+{
+	struct mcf_esdhc_platform_data *plat_data;
+
+	if (!host->mmc->parent->platform_data) {
+		dev_err(mmc_dev(host->mmc), "no platform data!\n");
+		return -EINVAL;
+	}
+
+	plat_data = (struct mcf_esdhc_platform_data *)
+			host->mmc->parent->platform_data;
+
+	/* Card_detect */
+	switch (plat_data->cd_type) {
+	default:
+	case ESDHC_CD_CONTROLLER:
+		/* We have a working card_detect back */
+		host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
+		break;
+	case ESDHC_CD_PERMANENT:
+		host->mmc->caps |= MMC_CAP_NONREMOVABLE;
+		break;
+	case ESDHC_CD_NONE:
+		break;
+	}
+
+	switch (plat_data->max_bus_width) {
+	case 4:
+		host->mmc->caps |= MMC_CAP_4_BIT_DATA;
+		break;
+	case 1:
+	default:
+		host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
+		break;
+	}
+
+	return 0;
+}
+
+static int sdhci_esdhc_mcf_probe(struct platform_device *pdev)
+{
+	struct sdhci_host *host;
+	struct sdhci_pltfm_host *pltfm_host;
+	struct pltfm_mcf_data *mcf_data;
+	int err;
+
+	host = sdhci_pltfm_init(pdev, &sdhci_esdhc_mcf_pdata,
+				sizeof(*mcf_data));
+
+	if (IS_ERR(host))
+		return PTR_ERR(host);
+
+	pltfm_host = sdhci_priv(host);
+	mcf_data = sdhci_pltfm_priv(pltfm_host);
+
+	host->sdma_boundary = 0;
+
+	host->flags |= SDHCI_AUTO_CMD12;
+
+	mcf_data->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
+	if (IS_ERR(mcf_data->clk_ipg)) {
+		err = PTR_ERR(mcf_data->clk_ipg);
+		goto err_exit;
+	}
+
+	mcf_data->clk_ahb = devm_clk_get(&pdev->dev, "ahb");
+	if (IS_ERR(mcf_data->clk_ahb)) {
+		err = PTR_ERR(mcf_data->clk_ahb);
+		goto err_exit;
+	}
+
+	mcf_data->clk_per = devm_clk_get(&pdev->dev, "per");
+	if (IS_ERR(mcf_data->clk_per)) {
+		err = PTR_ERR(mcf_data->clk_per);
+		goto err_exit;
+	}
+
+	pltfm_host->clk = mcf_data->clk_per;
+	pltfm_host->clock = clk_get_rate(pltfm_host->clk);
+	err = clk_prepare_enable(mcf_data->clk_per);
+	if (err)
+		goto err_exit;
+
+	err = clk_prepare_enable(mcf_data->clk_ipg);
+	if (err)
+		goto unprep_per;
+
+	err = clk_prepare_enable(mcf_data->clk_ahb);
+	if (err)
+		goto unprep_ipg;
+
+	err = esdhc_mcf_plat_init(host, mcf_data);
+	if (err)
+		goto unprep_ahb;
+
+	err = sdhci_setup_host(host);
+	if (err)
+		goto unprep_ahb;
+
+	if (!host->bounce_buffer) {
+		dev_err(&pdev->dev, "bounce buffer not allocated");
+		err = -ENOMEM;
+		goto cleanup;
+	}
+
+	err = __sdhci_add_host(host);
+	if (err)
+		goto cleanup;
+
+	return 0;
+
+cleanup:
+	sdhci_cleanup_host(host);
+unprep_ahb:
+	clk_disable_unprepare(mcf_data->clk_ahb);
+unprep_ipg:
+	clk_disable_unprepare(mcf_data->clk_ipg);
+unprep_per:
+	clk_disable_unprepare(mcf_data->clk_per);
+err_exit:
+	sdhci_pltfm_free(pdev);
+
+	return err;
+}
+
+static int sdhci_esdhc_mcf_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct pltfm_mcf_data *mcf_data = sdhci_pltfm_priv(pltfm_host);
+
+	sdhci_remove_host(host, 0);
+
+	clk_disable_unprepare(mcf_data->clk_ipg);
+	clk_disable_unprepare(mcf_data->clk_ahb);
+	clk_disable_unprepare(mcf_data->clk_per);
+
+	sdhci_pltfm_free(pdev);
+
+	return 0;
+}
+
+static struct platform_driver sdhci_esdhc_mcf_driver = {
+	.driver	= {
+		.name = "sdhci-esdhc-mcf",
+	},
+	.probe = sdhci_esdhc_mcf_probe,
+	.remove = sdhci_esdhc_mcf_remove,
+};
+
+module_platform_driver(sdhci_esdhc_mcf_driver);
+
+MODULE_DESCRIPTION("SDHCI driver for Freescale ColdFire eSDHC");
+MODULE_AUTHOR("Angelo Dureghello <angelo.dureghello@timesys.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index 947212f16bc6..a30796e79b1c 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h
@@ -5,7 +5,7 @@
  * Copyright (c) 2007 Freescale Semiconductor, Inc.
  * Copyright (c) 2009 MontaVista Software, Inc.
  * Copyright (c) 2010 Pengutronix e.K.
- *   Author: Wolfram Sang <w.sang@pengutronix.de>
+ *   Author: Wolfram Sang <kernel@pengutronix.de>
  */
 
 #ifndef _DRIVERS_MMC_SDHCI_ESDHC_H
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index a8bcb3f16aa4..b277dd7fbdb5 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -10,6 +10,7 @@
 #include <linux/delay.h>
 #include <linux/mmc/mmc.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_opp.h>
 #include <linux/slab.h>
 #include <linux/iopoll.h>
 #include <linux/regulator/consumer.h>
@@ -56,19 +57,27 @@
 #define CORE_FLL_CYCLE_CNT	BIT(18)
 #define CORE_DLL_CLOCK_DISABLE	BIT(21)
 
-#define CORE_VENDOR_SPEC_POR_VAL 0xa1c
+#define DLL_USR_CTL_POR_VAL	0x10800
+#define ENABLE_DLL_LOCK_STATUS	BIT(26)
+#define FINE_TUNE_MODE_EN	BIT(27)
+#define BIAS_OK_SIGNAL		BIT(29)
+
+#define DLL_CONFIG_3_LOW_FREQ_VAL	0x08
+#define DLL_CONFIG_3_HIGH_FREQ_VAL	0x10
+
+#define CORE_VENDOR_SPEC_POR_VAL 0xa9c
 #define CORE_CLK_PWRSAVE	BIT(1)
 #define CORE_HC_MCLK_SEL_DFLT	(2 << 8)
 #define CORE_HC_MCLK_SEL_HS400	(3 << 8)
 #define CORE_HC_MCLK_SEL_MASK	(3 << 8)
-#define CORE_IO_PAD_PWR_SWITCH_EN	(1 << 15)
-#define CORE_IO_PAD_PWR_SWITCH  (1 << 16)
+#define CORE_IO_PAD_PWR_SWITCH_EN	BIT(15)
+#define CORE_IO_PAD_PWR_SWITCH	BIT(16)
 #define CORE_HC_SELECT_IN_EN	BIT(18)
 #define CORE_HC_SELECT_IN_HS400	(6 << 19)
 #define CORE_HC_SELECT_IN_MASK	(7 << 19)
 
-#define CORE_3_0V_SUPPORT	(1 << 25)
-#define CORE_1_8V_SUPPORT	(1 << 26)
+#define CORE_3_0V_SUPPORT	BIT(25)
+#define CORE_1_8V_SUPPORT	BIT(26)
 #define CORE_VOLT_SUPPORT	(CORE_3_0V_SUPPORT | CORE_1_8V_SUPPORT)
 
 #define CORE_CSR_CDC_CTLR_CFG0		0x130
@@ -156,6 +165,7 @@ struct sdhci_msm_offset {
 	u32 core_dll_config_3;
 	u32 core_ddr_config_old; /* Applicable to sdcc minor ver < 0x49 */
 	u32 core_ddr_config;
+	u32 core_dll_usr_ctl; /* Present on SDCC5.1 onwards */
 };
 
 static const struct sdhci_msm_offset sdhci_msm_v5_offset = {
@@ -185,6 +195,7 @@ static const struct sdhci_msm_offset sdhci_msm_v5_offset = {
 	.core_dll_config_2 = 0x254,
 	.core_dll_config_3 = 0x258,
 	.core_ddr_config = 0x25c,
+	.core_dll_usr_ctl = 0x388,
 };
 
 static const struct sdhci_msm_offset sdhci_msm_mci_offset = {
@@ -230,6 +241,7 @@ struct sdhci_msm_variant_ops {
 struct sdhci_msm_variant_info {
 	bool mci_removed;
 	bool restore_dll_config;
+	bool uses_tassadar_dll;
 	const struct sdhci_msm_variant_ops *var_ops;
 	const struct sdhci_msm_offset *offset;
 };
@@ -243,6 +255,8 @@ struct sdhci_msm_host {
 	struct clk_bulk_data bulk_clks[4]; /* core, iface, cal, sleep clocks */
 	unsigned long clk_rate;
 	struct mmc_host *mmc;
+	struct opp_table *opp_table;
+	bool has_opp_table;
 	bool use_14lpp_dll_reset;
 	bool tuning_done;
 	bool calibration_done;
@@ -260,6 +274,9 @@ struct sdhci_msm_host {
 	bool use_cdr;
 	u32 transfer_mode;
 	bool updated_ddr_cfg;
+	bool uses_tassadar_dll;
+	u32 dll_config;
+	u32 ddr_config;
 };
 
 static const struct sdhci_msm_offset *sdhci_priv_msm_offset(struct sdhci_host *host)
@@ -332,7 +349,7 @@ static void msm_set_clock_rate_for_bus_mode(struct sdhci_host *host,
 	int rc;
 
 	clock = msm_get_clock_rate_for_bus_mode(host, clock);
-	rc = clk_set_rate(core_clk, clock);
+	rc = dev_pm_opp_set_rate(mmc_dev(host->mmc), clock);
 	if (rc) {
 		pr_err("%s: Failed to set clock at rate %u at timing %d\n",
 		       mmc_hostname(host->mmc), clock,
@@ -601,6 +618,9 @@ static int msm_init_cm_dll(struct sdhci_host *host)
 	config &= ~CORE_CLK_PWRSAVE;
 	writel_relaxed(config, host->ioaddr + msm_offset->core_vendor_spec);
 
+	config = msm_host->dll_config;
+	writel_relaxed(config, host->ioaddr + msm_offset->core_dll_config);
+
 	if (msm_host->use_14lpp_dll_reset) {
 		config = readl_relaxed(host->ioaddr +
 				msm_offset->core_dll_config);
@@ -626,7 +646,9 @@ static int msm_init_cm_dll(struct sdhci_host *host)
 	config |= CORE_DLL_PDN;
 	writel_relaxed(config, host->ioaddr +
 			msm_offset->core_dll_config);
-	msm_cm_dll_set_freq(host);
+
+	if (!msm_host->dll_config)
+		msm_cm_dll_set_freq(host);
 
 	if (msm_host->use_14lpp_dll_reset &&
 	    !IS_ERR_OR_NULL(msm_host->xo_clk)) {
@@ -666,7 +688,8 @@ static int msm_init_cm_dll(struct sdhci_host *host)
 			msm_offset->core_dll_config);
 
 	if (msm_host->use_14lpp_dll_reset) {
-		msm_cm_dll_set_freq(host);
+		if (!msm_host->dll_config)
+			msm_cm_dll_set_freq(host);
 		config = readl_relaxed(host->ioaddr +
 				msm_offset->core_dll_config_2);
 		config &= ~CORE_DLL_CLOCK_DISABLE;
@@ -674,6 +697,27 @@ static int msm_init_cm_dll(struct sdhci_host *host)
 				msm_offset->core_dll_config_2);
 	}
 
+	/*
+	 * Configure DLL user control register to enable DLL status.
+	 * This setting is applicable to SDCC v5.1 onwards only.
+	 */
+	if (msm_host->uses_tassadar_dll) {
+		config = DLL_USR_CTL_POR_VAL | FINE_TUNE_MODE_EN |
+			ENABLE_DLL_LOCK_STATUS | BIAS_OK_SIGNAL;
+		writel_relaxed(config, host->ioaddr +
+				msm_offset->core_dll_usr_ctl);
+
+		config = readl_relaxed(host->ioaddr +
+				msm_offset->core_dll_config_3);
+		config &= ~0xFF;
+		if (msm_host->clk_rate < 150000000)
+			config |= DLL_CONFIG_3_LOW_FREQ_VAL;
+		else
+			config |= DLL_CONFIG_3_HIGH_FREQ_VAL;
+		writel_relaxed(config, host->ioaddr +
+			msm_offset->core_dll_config_3);
+	}
+
 	config = readl_relaxed(host->ioaddr +
 			msm_offset->core_dll_config);
 	config |= CORE_DLL_EN;
@@ -951,7 +995,7 @@ static int sdhci_msm_cm_dll_sdc4_calibration(struct sdhci_host *host)
 		ddr_cfg_offset = msm_offset->core_ddr_config;
 	else
 		ddr_cfg_offset = msm_offset->core_ddr_config_old;
-	writel_relaxed(DDR_CONFIG_POR_VAL, host->ioaddr + ddr_cfg_offset);
+	writel_relaxed(msm_host->ddr_config, host->ioaddr + ddr_cfg_offset);
 
 	if (mmc->ios.enhanced_strobe) {
 		config = readl_relaxed(host->ioaddr +
@@ -1130,6 +1174,12 @@ static int sdhci_msm_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	msm_host->use_cdr = true;
 
 	/*
+	 * Clear tuning_done flag before tuning to ensure proper
+	 * HS400 settings.
+	 */
+	msm_host->tuning_done = 0;
+
+	/*
 	 * For HS400 tuning in HS200 timing requires:
 	 * - select MCLK/2 in VENDOR_SPEC
 	 * - program MCLK to 400MHz (or nearest supported) in GCC
@@ -1830,6 +1880,36 @@ static void sdhci_msm_reset(struct sdhci_host *host, u8 mask)
 	sdhci_reset(host, mask);
 }
 
+#define DRIVER_NAME "sdhci_msm"
+#define SDHCI_MSM_DUMP(f, x...) \
+	pr_err("%s: " DRIVER_NAME ": " f, mmc_hostname(host->mmc), ## x)
+
+void sdhci_msm_dump_vendor_regs(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+	const struct sdhci_msm_offset *msm_offset = msm_host->offset;
+
+	SDHCI_MSM_DUMP("----------- VENDOR REGISTER DUMP -----------\n");
+
+	SDHCI_MSM_DUMP(
+			"DLL sts: 0x%08x | DLL cfg:  0x%08x | DLL cfg2: 0x%08x\n",
+		readl_relaxed(host->ioaddr + msm_offset->core_dll_status),
+		readl_relaxed(host->ioaddr + msm_offset->core_dll_config),
+		readl_relaxed(host->ioaddr + msm_offset->core_dll_config_2));
+	SDHCI_MSM_DUMP(
+			"DLL cfg3: 0x%08x | DLL usr ctl:  0x%08x | DDR cfg: 0x%08x\n",
+		readl_relaxed(host->ioaddr + msm_offset->core_dll_config_3),
+		readl_relaxed(host->ioaddr + msm_offset->core_dll_usr_ctl),
+		readl_relaxed(host->ioaddr + msm_offset->core_ddr_config));
+	SDHCI_MSM_DUMP(
+			"Vndr func: 0x%08x | Vndr func2 : 0x%08x Vndr func3: 0x%08x\n",
+		readl_relaxed(host->ioaddr + msm_offset->core_vendor_spec),
+		readl_relaxed(host->ioaddr +
+			msm_offset->core_vendor_spec_func2),
+		readl_relaxed(host->ioaddr + msm_offset->core_vendor_spec3));
+}
+
 static const struct sdhci_msm_variant_ops mci_var_ops = {
 	.msm_readl_relaxed = sdhci_msm_mci_variant_readl_relaxed,
 	.msm_writel_relaxed = sdhci_msm_mci_variant_writel_relaxed,
@@ -1858,10 +1938,18 @@ static const struct sdhci_msm_variant_info sdm845_sdhci_var = {
 	.offset = &sdhci_msm_v5_offset,
 };
 
+static const struct sdhci_msm_variant_info sm8250_sdhci_var = {
+	.mci_removed = true,
+	.uses_tassadar_dll = true,
+	.var_ops = &v5_var_ops,
+	.offset = &sdhci_msm_v5_offset,
+};
+
 static const struct of_device_id sdhci_msm_dt_match[] = {
 	{.compatible = "qcom,sdhci-msm-v4", .data = &sdhci_msm_mci_var},
 	{.compatible = "qcom,sdhci-msm-v5", .data = &sdhci_msm_v5_var},
 	{.compatible = "qcom,sdm845-sdhci", .data = &sdm845_sdhci_var},
+	{.compatible = "qcom,sm8250-sdhci", .data = &sm8250_sdhci_var},
 	{},
 };
 
@@ -1877,16 +1965,34 @@ static const struct sdhci_ops sdhci_msm_ops = {
 	.write_w = sdhci_msm_writew,
 	.write_b = sdhci_msm_writeb,
 	.irq	= sdhci_msm_cqe_irq,
+	.dump_vendor_regs = sdhci_msm_dump_vendor_regs,
 };
 
 static const struct sdhci_pltfm_data sdhci_msm_pdata = {
 	.quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION |
 		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
-		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+		  SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
+
 	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
 	.ops = &sdhci_msm_ops,
 };
 
+static inline void sdhci_msm_get_of_property(struct platform_device *pdev,
+		struct sdhci_host *host)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+	if (of_property_read_u32(node, "qcom,ddr-config",
+				&msm_host->ddr_config))
+		msm_host->ddr_config = DDR_CONFIG_POR_VAL;
+
+	of_property_read_u32(node, "qcom,dll-config", &msm_host->dll_config);
+}
+
+
 static int sdhci_msm_probe(struct platform_device *pdev)
 {
 	struct sdhci_host *host;
@@ -1925,10 +2031,12 @@ static int sdhci_msm_probe(struct platform_device *pdev)
 	msm_host->restore_dll_config = var_info->restore_dll_config;
 	msm_host->var_ops = var_info->var_ops;
 	msm_host->offset = var_info->offset;
+	msm_host->uses_tassadar_dll = var_info->uses_tassadar_dll;
 
 	msm_offset = msm_host->offset;
 
 	sdhci_get_of_property(pdev);
+	sdhci_msm_get_of_property(pdev, host);
 
 	msm_host->saved_tuning_phase = INVALID_TUNING_PHASE;
 
@@ -1962,8 +2070,23 @@ static int sdhci_msm_probe(struct platform_device *pdev)
 	}
 	msm_host->bulk_clks[0].clk = clk;
 
+	msm_host->opp_table = dev_pm_opp_set_clkname(&pdev->dev, "core");
+	if (IS_ERR(msm_host->opp_table)) {
+		ret = PTR_ERR(msm_host->opp_table);
+		goto bus_clk_disable;
+	}
+
+	/* OPP table is optional */
+	ret = dev_pm_opp_of_add_table(&pdev->dev);
+	if (!ret) {
+		msm_host->has_opp_table = true;
+	} else if (ret != -ENODEV) {
+		dev_err(&pdev->dev, "Invalid OPP table in Device tree\n");
+		goto opp_cleanup;
+	}
+
 	/* Vote for maximum clock rate for maximum performance */
-	ret = clk_set_rate(clk, INT_MAX);
+	ret = dev_pm_opp_set_rate(&pdev->dev, INT_MAX);
 	if (ret)
 		dev_warn(&pdev->dev, "core clock boost failed\n");
 
@@ -1980,7 +2103,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
 	ret = clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks),
 				      msm_host->bulk_clks);
 	if (ret)
-		goto bus_clk_disable;
+		goto opp_cleanup;
 
 	/*
 	 * xo clock is needed for FLL feature of cm_dll.
@@ -2117,6 +2240,10 @@ pm_runtime_disable:
 clk_disable:
 	clk_bulk_disable_unprepare(ARRAY_SIZE(msm_host->bulk_clks),
 				   msm_host->bulk_clks);
+opp_cleanup:
+	if (msm_host->has_opp_table)
+		dev_pm_opp_of_remove_table(&pdev->dev);
+	dev_pm_opp_put_clkname(msm_host->opp_table);
 bus_clk_disable:
 	if (!IS_ERR(msm_host->bus_clk))
 		clk_disable_unprepare(msm_host->bus_clk);
@@ -2135,6 +2262,9 @@ static int sdhci_msm_remove(struct platform_device *pdev)
 
 	sdhci_remove_host(host, dead);
 
+	if (msm_host->has_opp_table)
+		dev_pm_opp_of_remove_table(&pdev->dev);
+	dev_pm_opp_put_clkname(msm_host->opp_table);
 	pm_runtime_get_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_put_noidle(&pdev->dev);
@@ -2153,6 +2283,8 @@ static __maybe_unused int sdhci_msm_runtime_suspend(struct device *dev)
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
 
+	/* Drop the performance vote */
+	dev_pm_opp_set_rate(dev, 0);
 	clk_bulk_disable_unprepare(ARRAY_SIZE(msm_host->bulk_clks),
 				   msm_host->bulk_clks);
 
@@ -2175,9 +2307,11 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev)
 	 * restore the SDR DLL settings when the clock is ungated.
 	 */
 	if (msm_host->restore_dll_config && msm_host->clk_rate)
-		return sdhci_msm_restore_sdr_dll_config(host);
+		ret = sdhci_msm_restore_sdr_dll_config(host);
 
-	return 0;
+	dev_pm_opp_set_rate(dev, msm_host->clk_rate);
+
+	return ret;
 }
 
 static const struct dev_pm_ops sdhci_msm_pm_ops = {
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index d4905c106c06..2a4c8a2f3e64 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -28,15 +28,26 @@
 #include "sdhci-pltfm.h"
 
 #define SDHCI_ARASAN_VENDOR_REGISTER	0x78
+
+#define SDHCI_ARASAN_ITAPDLY_REGISTER	0xF0F8
+#define SDHCI_ARASAN_OTAPDLY_REGISTER	0xF0FC
+
 #define SDHCI_ARASAN_CQE_BASE_ADDR	0x200
 #define VENDOR_ENHANCED_STROBE		BIT(0)
 
 #define PHY_CLK_TOO_SLOW_HZ		400000
 
+#define SDHCI_ITAPDLY_CHGWIN		0x200
+#define SDHCI_ITAPDLY_ENABLE		0x100
+#define SDHCI_OTAPDLY_ENABLE		0x40
+
 /* Default settings for ZynqMP Clock Phases */
 #define ZYNQMP_ICLK_PHASE {0, 63, 63, 0, 63,  0,   0, 183, 54,  0, 0}
 #define ZYNQMP_OCLK_PHASE {0, 72, 60, 0, 60, 72, 135, 48, 72, 135, 0}
 
+#define VERSAL_ICLK_PHASE {0, 132, 132, 0, 132, 0, 0, 162, 90, 0, 0}
+#define VERSAL_OCLK_PHASE {0,  60, 48, 0, 48, 72, 90, 36, 60, 90, 0}
+
 /*
  * On some SoCs the syscon area has a feature where the upper 16-bits of
  * each 32-bit register act as a write mask for the lower 16-bits.  This allows
@@ -62,22 +73,36 @@ struct sdhci_arasan_soc_ctl_field {
 /**
  * struct sdhci_arasan_soc_ctl_map - Map in syscon to corecfg registers
  *
- * It's up to the licensee of the Arsan IP block to make these available
- * somewhere if needed.  Presumably these will be scattered somewhere that's
- * accessible via the syscon API.
- *
  * @baseclkfreq:	Where to find corecfg_baseclkfreq
  * @clockmultiplier:	Where to find corecfg_clockmultiplier
+ * @support64b:		Where to find SUPPORT64B bit
  * @hiword_update:	If true, use HIWORD_UPDATE to access the syscon
+ *
+ * It's up to the licensee of the Arsan IP block to make these available
+ * somewhere if needed.  Presumably these will be scattered somewhere that's
+ * accessible via the syscon API.
  */
 struct sdhci_arasan_soc_ctl_map {
 	struct sdhci_arasan_soc_ctl_field	baseclkfreq;
 	struct sdhci_arasan_soc_ctl_field	clockmultiplier;
+	struct sdhci_arasan_soc_ctl_field	support64b;
 	bool					hiword_update;
 };
 
 /**
- * struct sdhci_arasan_clk_data
+ * struct sdhci_arasan_clk_ops - Clock Operations for Arasan SD controller
+ *
+ * @sdcardclk_ops:	The output clock related operations
+ * @sampleclk_ops:	The sample clock related operations
+ */
+struct sdhci_arasan_clk_ops {
+	const struct clk_ops *sdcardclk_ops;
+	const struct clk_ops *sampleclk_ops;
+};
+
+/**
+ * struct sdhci_arasan_clk_data - Arasan Controller Clock Data.
+ *
  * @sdcardclk_hw:	Struct for the clock we might provide to a PHY.
  * @sdcardclk:		Pointer to normal 'struct clock' for sdcardclk_hw.
  * @sampleclk_hw:	Struct for the clock we might provide to a PHY.
@@ -103,14 +128,18 @@ struct sdhci_arasan_zynqmp_clk_data {
 };
 
 /**
- * struct sdhci_arasan_data
+ * struct sdhci_arasan_data - Arasan Controller Data
+ *
  * @host:		Pointer to the main SDHCI host structure.
  * @clk_ahb:		Pointer to the AHB clock
  * @phy:		Pointer to the generic phy
  * @is_phy_on:		True if the PHY is on; false if not.
+ * @has_cqe:		True if controller has command queuing engine.
  * @clk_data:		Struct for the Arasan Controller Clock Data.
+ * @clk_ops:		Struct for the Arasan Controller Clock Operations.
  * @soc_ctl_base:	Pointer to regmap for syscon for soc_ctl registers.
  * @soc_ctl_map:	Map to get offsets into soc_ctl registers.
+ * @quirks:		Arasan deviations from spec.
  */
 struct sdhci_arasan_data {
 	struct sdhci_host *host;
@@ -120,10 +149,11 @@ struct sdhci_arasan_data {
 
 	bool		has_cqe;
 	struct sdhci_arasan_clk_data clk_data;
+	const struct sdhci_arasan_clk_ops *clk_ops;
 
 	struct regmap	*soc_ctl_base;
 	const struct sdhci_arasan_soc_ctl_map *soc_ctl_map;
-	unsigned int	quirks; /* Arasan deviations from spec */
+	unsigned int	quirks;
 
 /* Controller does not have CD wired and will not function normally without */
 #define SDHCI_ARASAN_QUIRK_FORCE_CDTEST	BIT(0)
@@ -135,6 +165,7 @@ struct sdhci_arasan_data {
 struct sdhci_arasan_of_data {
 	const struct sdhci_arasan_soc_ctl_map *soc_ctl_map;
 	const struct sdhci_pltfm_data *pdata;
+	const struct sdhci_arasan_clk_ops *clk_ops;
 };
 
 static const struct sdhci_arasan_soc_ctl_map rk3399_soc_ctl_map = {
@@ -155,17 +186,26 @@ static const struct sdhci_arasan_soc_ctl_map intel_lgm_sdxc_soc_ctl_map = {
 	.hiword_update = false,
 };
 
+static const struct sdhci_arasan_soc_ctl_map intel_keembay_soc_ctl_map = {
+	.baseclkfreq = { .reg = 0x0, .width = 8, .shift = 14 },
+	.clockmultiplier = { .reg = 0x4, .width = 8, .shift = 14 },
+	.support64b = { .reg = 0x4, .width = 1, .shift = 24 },
+	.hiword_update = false,
+};
+
 /**
  * sdhci_arasan_syscon_write - Write to a field in soc_ctl registers
  *
+ * @host:	The sdhci_host
+ * @fld:	The field to write to
+ * @val:	The value to write
+ *
  * This function allows writing to fields in sdhci_arasan_soc_ctl_map.
  * Note that if a field is specified as not available (shift < 0) then
  * this function will silently return an error code.  It will be noisy
  * and print errors for any other (unexpected) errors.
  *
- * @host:	The sdhci_host
- * @fld:	The field to write to
- * @val:	The value to write
+ * Return: 0 on success and error value on error
  */
 static int sdhci_arasan_syscon_write(struct sdhci_host *host,
 				   const struct sdhci_arasan_soc_ctl_field *fld,
@@ -335,29 +375,6 @@ static const struct sdhci_ops sdhci_arasan_ops = {
 	.set_power = sdhci_set_power_and_bus_voltage,
 };
 
-static const struct sdhci_pltfm_data sdhci_arasan_pdata = {
-	.ops = &sdhci_arasan_ops,
-	.quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
-	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
-			SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN |
-			SDHCI_QUIRK2_STOP_WITH_TC,
-};
-
-static struct sdhci_arasan_of_data sdhci_arasan_data = {
-	.pdata = &sdhci_arasan_pdata,
-};
-
-static const struct sdhci_pltfm_data sdhci_arasan_zynqmp_pdata = {
-	.ops = &sdhci_arasan_ops,
-	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
-			SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN |
-			SDHCI_QUIRK2_STOP_WITH_TC,
-};
-
-static struct sdhci_arasan_of_data sdhci_arasan_zynqmp_data = {
-	.pdata = &sdhci_arasan_zynqmp_pdata,
-};
-
 static u32 sdhci_arasan_cqhci_irq(struct sdhci_host *host, u32 intmask)
 {
 	int cmd_error = 0;
@@ -414,28 +431,14 @@ static const struct sdhci_pltfm_data sdhci_arasan_cqe_pdata = {
 			SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
 };
 
-static struct sdhci_arasan_of_data sdhci_arasan_rk3399_data = {
-	.soc_ctl_map = &rk3399_soc_ctl_map,
-	.pdata = &sdhci_arasan_cqe_pdata,
-};
-
-static struct sdhci_arasan_of_data intel_lgm_emmc_data = {
-	.soc_ctl_map = &intel_lgm_emmc_soc_ctl_map,
-	.pdata = &sdhci_arasan_cqe_pdata,
-};
-
-static struct sdhci_arasan_of_data intel_lgm_sdxc_data = {
-	.soc_ctl_map = &intel_lgm_sdxc_soc_ctl_map,
-	.pdata = &sdhci_arasan_cqe_pdata,
-};
-
 #ifdef CONFIG_PM_SLEEP
 /**
  * sdhci_arasan_suspend - Suspend method for the driver
  * @dev:	Address of the device structure
- * Returns 0 on success and error value on error
  *
  * Put the device in a low power state.
+ *
+ * Return: 0 on success and error value on error
  */
 static int sdhci_arasan_suspend(struct device *dev)
 {
@@ -476,9 +479,10 @@ static int sdhci_arasan_suspend(struct device *dev)
 /**
  * sdhci_arasan_resume - Resume method for the driver
  * @dev:	Address of the device structure
- * Returns 0 on success and error value on error
  *
  * Resume operation after suspend
+ *
+ * Return: 0 on success and error value on error
  */
 static int sdhci_arasan_resume(struct device *dev)
 {
@@ -524,54 +528,19 @@ static int sdhci_arasan_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(sdhci_arasan_dev_pm_ops, sdhci_arasan_suspend,
 			 sdhci_arasan_resume);
 
-static const struct of_device_id sdhci_arasan_of_match[] = {
-	/* SoC-specific compatible strings w/ soc_ctl_map */
-	{
-		.compatible = "rockchip,rk3399-sdhci-5.1",
-		.data = &sdhci_arasan_rk3399_data,
-	},
-	{
-		.compatible = "intel,lgm-sdhci-5.1-emmc",
-		.data = &intel_lgm_emmc_data,
-	},
-	{
-		.compatible = "intel,lgm-sdhci-5.1-sdxc",
-		.data = &intel_lgm_sdxc_data,
-	},
-	/* Generic compatible below here */
-	{
-		.compatible = "arasan,sdhci-8.9a",
-		.data = &sdhci_arasan_data,
-	},
-	{
-		.compatible = "arasan,sdhci-5.1",
-		.data = &sdhci_arasan_data,
-	},
-	{
-		.compatible = "arasan,sdhci-4.9a",
-		.data = &sdhci_arasan_data,
-	},
-	{
-		.compatible = "xlnx,zynqmp-8.9a",
-		.data = &sdhci_arasan_zynqmp_data,
-	},
-	{ /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, sdhci_arasan_of_match);
-
 /**
  * sdhci_arasan_sdcardclk_recalc_rate - Return the card clock rate
  *
+ * @hw:			Pointer to the hardware clock structure.
+ * @parent_rate:		The parent rate (should be rate of clk_xin).
+ *
  * Return the current actual rate of the SD card clock.  This can be used
  * to communicate with out PHY.
  *
- * @hw:			Pointer to the hardware clock structure.
- * @parent_rate		The parent rate (should be rate of clk_xin).
- * Returns the card clock rate.
+ * Return: The card clock rate.
  */
 static unsigned long sdhci_arasan_sdcardclk_recalc_rate(struct clk_hw *hw,
 						      unsigned long parent_rate)
-
 {
 	struct sdhci_arasan_clk_data *clk_data =
 		container_of(hw, struct sdhci_arasan_clk_data, sdcardclk_hw);
@@ -589,16 +558,16 @@ static const struct clk_ops arasan_sdcardclk_ops = {
 /**
  * sdhci_arasan_sampleclk_recalc_rate - Return the sampling clock rate
  *
+ * @hw:			Pointer to the hardware clock structure.
+ * @parent_rate:		The parent rate (should be rate of clk_xin).
+ *
  * Return the current actual rate of the sampling clock.  This can be used
  * to communicate with out PHY.
  *
- * @hw:			Pointer to the hardware clock structure.
- * @parent_rate		The parent rate (should be rate of clk_xin).
- * Returns the sample clock rate.
+ * Return: The sample clock rate.
  */
 static unsigned long sdhci_arasan_sampleclk_recalc_rate(struct clk_hw *hw,
 						      unsigned long parent_rate)
-
 {
 	struct sdhci_arasan_clk_data *clk_data =
 		container_of(hw, struct sdhci_arasan_clk_data, sampleclk_hw);
@@ -616,14 +585,14 @@ static const struct clk_ops arasan_sampleclk_ops = {
 /**
  * sdhci_zynqmp_sdcardclk_set_phase - Set the SD Output Clock Tap Delays
  *
+ * @hw:			Pointer to the hardware clock structure.
+ * @degrees:		The clock phase shift between 0 - 359.
+ *
  * Set the SD Output Clock Tap Delays for Output path
  *
- * @hw:			Pointer to the hardware clock structure.
- * @degrees		The clock phase shift between 0 - 359.
  * Return: 0 on success and error value on error
  */
 static int sdhci_zynqmp_sdcardclk_set_phase(struct clk_hw *hw, int degrees)
-
 {
 	struct sdhci_arasan_clk_data *clk_data =
 		container_of(hw, struct sdhci_arasan_clk_data, sdcardclk_hw);
@@ -688,14 +657,14 @@ static const struct clk_ops zynqmp_sdcardclk_ops = {
 /**
  * sdhci_zynqmp_sampleclk_set_phase - Set the SD Input Clock Tap Delays
  *
+ * @hw:			Pointer to the hardware clock structure.
+ * @degrees:		The clock phase shift between 0 - 359.
+ *
  * Set the SD Input Clock Tap Delays for Input path
  *
- * @hw:			Pointer to the hardware clock structure.
- * @degrees		The clock phase shift between 0 - 359.
  * Return: 0 on success and error value on error
  */
 static int sdhci_zynqmp_sampleclk_set_phase(struct clk_hw *hw, int degrees)
-
 {
 	struct sdhci_arasan_clk_data *clk_data =
 		container_of(hw, struct sdhci_arasan_clk_data, sampleclk_hw);
@@ -757,6 +726,152 @@ static const struct clk_ops zynqmp_sampleclk_ops = {
 	.set_phase = sdhci_zynqmp_sampleclk_set_phase,
 };
 
+/**
+ * sdhci_versal_sdcardclk_set_phase - Set the SD Output Clock Tap Delays
+ *
+ * @hw:			Pointer to the hardware clock structure.
+ * @degrees:		The clock phase shift between 0 - 359.
+ *
+ * Set the SD Output Clock Tap Delays for Output path
+ *
+ * Return: 0 on success and error value on error
+ */
+static int sdhci_versal_sdcardclk_set_phase(struct clk_hw *hw, int degrees)
+{
+	struct sdhci_arasan_clk_data *clk_data =
+		container_of(hw, struct sdhci_arasan_clk_data, sdcardclk_hw);
+	struct sdhci_arasan_data *sdhci_arasan =
+		container_of(clk_data, struct sdhci_arasan_data, clk_data);
+	struct sdhci_host *host = sdhci_arasan->host;
+	u8 tap_delay, tap_max = 0;
+
+	/*
+	 * This is applicable for SDHCI_SPEC_300 and above
+	 * Versal does not set phase for <=25MHz clock.
+	 * If degrees is zero, no need to do anything.
+	 */
+	if (host->version < SDHCI_SPEC_300 ||
+	    host->timing == MMC_TIMING_LEGACY ||
+	    host->timing == MMC_TIMING_UHS_SDR12 || !degrees)
+		return 0;
+
+	switch (host->timing) {
+	case MMC_TIMING_MMC_HS:
+	case MMC_TIMING_SD_HS:
+	case MMC_TIMING_UHS_SDR25:
+	case MMC_TIMING_UHS_DDR50:
+	case MMC_TIMING_MMC_DDR52:
+		/* For 50MHz clock, 30 Taps are available */
+		tap_max = 30;
+		break;
+	case MMC_TIMING_UHS_SDR50:
+		/* For 100MHz clock, 15 Taps are available */
+		tap_max = 15;
+		break;
+	case MMC_TIMING_UHS_SDR104:
+	case MMC_TIMING_MMC_HS200:
+		/* For 200MHz clock, 8 Taps are available */
+		tap_max = 8;
+	default:
+		break;
+	}
+
+	tap_delay = (degrees * tap_max) / 360;
+
+	/* Set the Clock Phase */
+	if (tap_delay) {
+		u32 regval;
+
+		regval = sdhci_readl(host, SDHCI_ARASAN_OTAPDLY_REGISTER);
+		regval |= SDHCI_OTAPDLY_ENABLE;
+		sdhci_writel(host, regval, SDHCI_ARASAN_OTAPDLY_REGISTER);
+		regval |= tap_delay;
+		sdhci_writel(host, regval, SDHCI_ARASAN_OTAPDLY_REGISTER);
+	}
+
+	return 0;
+}
+
+static const struct clk_ops versal_sdcardclk_ops = {
+	.recalc_rate = sdhci_arasan_sdcardclk_recalc_rate,
+	.set_phase = sdhci_versal_sdcardclk_set_phase,
+};
+
+/**
+ * sdhci_versal_sampleclk_set_phase - Set the SD Input Clock Tap Delays
+ *
+ * @hw:			Pointer to the hardware clock structure.
+ * @degrees:		The clock phase shift between 0 - 359.
+ *
+ * Set the SD Input Clock Tap Delays for Input path
+ *
+ * Return: 0 on success and error value on error
+ */
+static int sdhci_versal_sampleclk_set_phase(struct clk_hw *hw, int degrees)
+{
+	struct sdhci_arasan_clk_data *clk_data =
+		container_of(hw, struct sdhci_arasan_clk_data, sampleclk_hw);
+	struct sdhci_arasan_data *sdhci_arasan =
+		container_of(clk_data, struct sdhci_arasan_data, clk_data);
+	struct sdhci_host *host = sdhci_arasan->host;
+	u8 tap_delay, tap_max = 0;
+
+	/*
+	 * This is applicable for SDHCI_SPEC_300 and above
+	 * Versal does not set phase for <=25MHz clock.
+	 * If degrees is zero, no need to do anything.
+	 */
+	if (host->version < SDHCI_SPEC_300 ||
+	    host->timing == MMC_TIMING_LEGACY ||
+	    host->timing == MMC_TIMING_UHS_SDR12 || !degrees)
+		return 0;
+
+	switch (host->timing) {
+	case MMC_TIMING_MMC_HS:
+	case MMC_TIMING_SD_HS:
+	case MMC_TIMING_UHS_SDR25:
+	case MMC_TIMING_UHS_DDR50:
+	case MMC_TIMING_MMC_DDR52:
+		/* For 50MHz clock, 120 Taps are available */
+		tap_max = 120;
+		break;
+	case MMC_TIMING_UHS_SDR50:
+		/* For 100MHz clock, 60 Taps are available */
+		tap_max = 60;
+		break;
+	case MMC_TIMING_UHS_SDR104:
+	case MMC_TIMING_MMC_HS200:
+		/* For 200MHz clock, 30 Taps are available */
+		tap_max = 30;
+	default:
+		break;
+	}
+
+	tap_delay = (degrees * tap_max) / 360;
+
+	/* Set the Clock Phase */
+	if (tap_delay) {
+		u32 regval;
+
+		regval = sdhci_readl(host, SDHCI_ARASAN_ITAPDLY_REGISTER);
+		regval |= SDHCI_ITAPDLY_CHGWIN;
+		sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER);
+		regval |= SDHCI_ITAPDLY_ENABLE;
+		sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER);
+		regval |= tap_delay;
+		sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER);
+		regval &= ~SDHCI_ITAPDLY_CHGWIN;
+		sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER);
+	}
+
+	return 0;
+}
+
+static const struct clk_ops versal_sampleclk_ops = {
+	.recalc_rate = sdhci_arasan_sampleclk_recalc_rate,
+	.set_phase = sdhci_versal_sampleclk_set_phase,
+};
+
 static void arasan_zynqmp_dll_reset(struct sdhci_host *host, u32 deviceid)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -804,6 +919,9 @@ static int arasan_zynqmp_execute_tuning(struct mmc_host *mmc, u32 opcode)
 /**
  * sdhci_arasan_update_clockmultiplier - Set corecfg_clockmultiplier
  *
+ * @host:		The sdhci_host
+ * @value:		The value to write
+ *
  * The corecfg_clockmultiplier is supposed to contain clock multiplier
  * value of programmable clock generator.
  *
@@ -815,8 +933,6 @@ static int arasan_zynqmp_execute_tuning(struct mmc_host *mmc, u32 opcode)
  * - The value of corecfg_clockmultiplier should sync with that of corresponding
  *   value reading from sdhci_capability_register. So this function is called
  *   once at probe time and never called again.
- *
- * @host:		The sdhci_host
  */
 static void sdhci_arasan_update_clockmultiplier(struct sdhci_host *host,
 						u32 value)
@@ -843,6 +959,8 @@ static void sdhci_arasan_update_clockmultiplier(struct sdhci_host *host,
 /**
  * sdhci_arasan_update_baseclkfreq - Set corecfg_baseclkfreq
  *
+ * @host:		The sdhci_host
+ *
  * The corecfg_baseclkfreq is supposed to contain the MHz of clk_xin.  This
  * function can be used to make that happen.
  *
@@ -854,8 +972,6 @@ static void sdhci_arasan_update_clockmultiplier(struct sdhci_host *host,
  * - It's assumed that clk_xin is not dynamic and that we use the SDHCI divider
  *   to achieve lower clock rates.  That means that this function is called once
  *   at probe time and never called again.
- *
- * @host:		The sdhci_host
  */
 static void sdhci_arasan_update_baseclkfreq(struct sdhci_host *host)
 {
@@ -919,10 +1035,10 @@ static void arasan_dt_read_clk_phase(struct device *dev,
 /**
  * arasan_dt_parse_clk_phases - Read Clock Delay values from DT
  *
- * Called at initialization to parse the values of Clock Delays.
- *
  * @dev:		Pointer to our struct device.
  * @clk_data:		Pointer to the Clock Data structure
+ *
+ * Called at initialization to parse the values of Clock Delays.
  */
 static void arasan_dt_parse_clk_phases(struct device *dev,
 				       struct sdhci_arasan_clk_data *clk_data)
@@ -954,6 +1070,16 @@ static void arasan_dt_parse_clk_phases(struct device *dev,
 		}
 	}
 
+	if (of_device_is_compatible(dev->of_node, "xlnx,versal-8.9a")) {
+		iclk_phase = (int [MMC_TIMING_MMC_HS400 + 1]) VERSAL_ICLK_PHASE;
+		oclk_phase = (int [MMC_TIMING_MMC_HS400 + 1]) VERSAL_OCLK_PHASE;
+
+		for (i = 0; i <= MMC_TIMING_MMC_HS400; i++) {
+			clk_data->clk_phase_in[i] = iclk_phase[i];
+			clk_data->clk_phase_out[i] = oclk_phase[i];
+		}
+	}
+
 	arasan_dt_read_clk_phase(dev, clk_data, MMC_TIMING_LEGACY,
 				 "clk-phase-legacy");
 	arasan_dt_read_clk_phase(dev, clk_data, MMC_TIMING_MMC_HS,
@@ -978,17 +1104,191 @@ static void arasan_dt_parse_clk_phases(struct device *dev,
 				 "clk-phase-mmc-hs400");
 }
 
+static const struct sdhci_pltfm_data sdhci_arasan_pdata = {
+	.ops = &sdhci_arasan_ops,
+	.quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+			SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN |
+			SDHCI_QUIRK2_STOP_WITH_TC,
+};
+
+static const struct sdhci_arasan_clk_ops arasan_clk_ops = {
+	.sdcardclk_ops = &arasan_sdcardclk_ops,
+	.sampleclk_ops = &arasan_sampleclk_ops,
+};
+
+static struct sdhci_arasan_of_data sdhci_arasan_generic_data = {
+	.pdata = &sdhci_arasan_pdata,
+	.clk_ops = &arasan_clk_ops,
+};
+
+static const struct sdhci_pltfm_data sdhci_keembay_emmc_pdata = {
+	.ops = &sdhci_arasan_cqe_ops,
+	.quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+		SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+		SDHCI_QUIRK_NO_LED |
+		SDHCI_QUIRK_32BIT_DMA_ADDR |
+		SDHCI_QUIRK_32BIT_DMA_SIZE |
+		SDHCI_QUIRK_32BIT_ADMA_SIZE,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+		SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN |
+		SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 |
+		SDHCI_QUIRK2_STOP_WITH_TC |
+		SDHCI_QUIRK2_BROKEN_64_BIT_DMA,
+};
+
+static const struct sdhci_pltfm_data sdhci_keembay_sd_pdata = {
+	.ops = &sdhci_arasan_ops,
+	.quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+		SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+		SDHCI_QUIRK_NO_LED |
+		SDHCI_QUIRK_32BIT_DMA_ADDR |
+		SDHCI_QUIRK_32BIT_DMA_SIZE |
+		SDHCI_QUIRK_32BIT_ADMA_SIZE,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+		SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN |
+		SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON |
+		SDHCI_QUIRK2_STOP_WITH_TC |
+		SDHCI_QUIRK2_BROKEN_64_BIT_DMA,
+};
+
+static const struct sdhci_pltfm_data sdhci_keembay_sdio_pdata = {
+	.ops = &sdhci_arasan_ops,
+	.quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+		SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+		SDHCI_QUIRK_NO_LED |
+		SDHCI_QUIRK_32BIT_DMA_ADDR |
+		SDHCI_QUIRK_32BIT_DMA_SIZE |
+		SDHCI_QUIRK_32BIT_ADMA_SIZE,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+		SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN |
+		SDHCI_QUIRK2_HOST_OFF_CARD_ON |
+		SDHCI_QUIRK2_BROKEN_64_BIT_DMA,
+};
+
+static struct sdhci_arasan_of_data sdhci_arasan_rk3399_data = {
+	.soc_ctl_map = &rk3399_soc_ctl_map,
+	.pdata = &sdhci_arasan_cqe_pdata,
+	.clk_ops = &arasan_clk_ops,
+};
+
+static struct sdhci_arasan_of_data intel_lgm_emmc_data = {
+	.soc_ctl_map = &intel_lgm_emmc_soc_ctl_map,
+	.pdata = &sdhci_arasan_cqe_pdata,
+	.clk_ops = &arasan_clk_ops,
+};
+
+static struct sdhci_arasan_of_data intel_lgm_sdxc_data = {
+	.soc_ctl_map = &intel_lgm_sdxc_soc_ctl_map,
+	.pdata = &sdhci_arasan_cqe_pdata,
+	.clk_ops = &arasan_clk_ops,
+};
+
+static const struct sdhci_pltfm_data sdhci_arasan_zynqmp_pdata = {
+	.ops = &sdhci_arasan_ops,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+			SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN |
+			SDHCI_QUIRK2_STOP_WITH_TC,
+};
+
+static const struct sdhci_arasan_clk_ops zynqmp_clk_ops = {
+	.sdcardclk_ops = &zynqmp_sdcardclk_ops,
+	.sampleclk_ops = &zynqmp_sampleclk_ops,
+};
+
+static struct sdhci_arasan_of_data sdhci_arasan_zynqmp_data = {
+	.pdata = &sdhci_arasan_zynqmp_pdata,
+	.clk_ops = &zynqmp_clk_ops,
+};
+
+static const struct sdhci_arasan_clk_ops versal_clk_ops = {
+	.sdcardclk_ops = &versal_sdcardclk_ops,
+	.sampleclk_ops = &versal_sampleclk_ops,
+};
+
+static struct sdhci_arasan_of_data sdhci_arasan_versal_data = {
+	.pdata = &sdhci_arasan_zynqmp_pdata,
+	.clk_ops = &versal_clk_ops,
+};
+
+static struct sdhci_arasan_of_data intel_keembay_emmc_data = {
+	.soc_ctl_map = &intel_keembay_soc_ctl_map,
+	.pdata = &sdhci_keembay_emmc_pdata,
+};
+
+static struct sdhci_arasan_of_data intel_keembay_sd_data = {
+	.soc_ctl_map = &intel_keembay_soc_ctl_map,
+	.pdata = &sdhci_keembay_sd_pdata,
+};
+
+static struct sdhci_arasan_of_data intel_keembay_sdio_data = {
+	.soc_ctl_map = &intel_keembay_soc_ctl_map,
+	.pdata = &sdhci_keembay_sdio_pdata,
+};
+
+static const struct of_device_id sdhci_arasan_of_match[] = {
+	/* SoC-specific compatible strings w/ soc_ctl_map */
+	{
+		.compatible = "rockchip,rk3399-sdhci-5.1",
+		.data = &sdhci_arasan_rk3399_data,
+	},
+	{
+		.compatible = "intel,lgm-sdhci-5.1-emmc",
+		.data = &intel_lgm_emmc_data,
+	},
+	{
+		.compatible = "intel,lgm-sdhci-5.1-sdxc",
+		.data = &intel_lgm_sdxc_data,
+	},
+	{
+		.compatible = "intel,keembay-sdhci-5.1-emmc",
+		.data = &intel_keembay_emmc_data,
+	},
+	{
+		.compatible = "intel,keembay-sdhci-5.1-sd",
+		.data = &intel_keembay_sd_data,
+	},
+	{
+		.compatible = "intel,keembay-sdhci-5.1-sdio",
+		.data = &intel_keembay_sdio_data,
+	},
+	/* Generic compatible below here */
+	{
+		.compatible = "arasan,sdhci-8.9a",
+		.data = &sdhci_arasan_generic_data,
+	},
+	{
+		.compatible = "arasan,sdhci-5.1",
+		.data = &sdhci_arasan_generic_data,
+	},
+	{
+		.compatible = "arasan,sdhci-4.9a",
+		.data = &sdhci_arasan_generic_data,
+	},
+	{
+		.compatible = "xlnx,zynqmp-8.9a",
+		.data = &sdhci_arasan_zynqmp_data,
+	},
+	{
+		.compatible = "xlnx,versal-8.9a",
+		.data = &sdhci_arasan_versal_data,
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sdhci_arasan_of_match);
+
 /**
  * sdhci_arasan_register_sdcardclk - Register the sdcardclk for a PHY to use
  *
+ * @sdhci_arasan:	Our private data structure.
+ * @clk_xin:		Pointer to the functional clock
+ * @dev:		Pointer to our struct device.
+ *
  * Some PHY devices need to know what the actual card clock is.  In order for
  * them to find out, we'll provide a clock through the common clock framework
  * for them to query.
  *
- * @sdhci_arasan:	Our private data structure.
- * @clk_xin:		Pointer to the functional clock
- * @dev:		Pointer to our struct device.
- * Returns 0 on success and error value on error
+ * Return: 0 on success and error value on error
  */
 static int
 sdhci_arasan_register_sdcardclk(struct sdhci_arasan_data *sdhci_arasan,
@@ -1012,10 +1312,7 @@ sdhci_arasan_register_sdcardclk(struct sdhci_arasan_data *sdhci_arasan,
 	sdcardclk_init.parent_names = &parent_clk_name;
 	sdcardclk_init.num_parents = 1;
 	sdcardclk_init.flags = CLK_GET_RATE_NOCACHE;
-	if (of_device_is_compatible(np, "xlnx,zynqmp-8.9a"))
-		sdcardclk_init.ops = &zynqmp_sdcardclk_ops;
-	else
-		sdcardclk_init.ops = &arasan_sdcardclk_ops;
+	sdcardclk_init.ops = sdhci_arasan->clk_ops->sdcardclk_ops;
 
 	clk_data->sdcardclk_hw.init = &sdcardclk_init;
 	clk_data->sdcardclk =
@@ -1033,14 +1330,15 @@ sdhci_arasan_register_sdcardclk(struct sdhci_arasan_data *sdhci_arasan,
 /**
  * sdhci_arasan_register_sampleclk - Register the sampleclk for a PHY to use
  *
+ * @sdhci_arasan:	Our private data structure.
+ * @clk_xin:		Pointer to the functional clock
+ * @dev:		Pointer to our struct device.
+ *
  * Some PHY devices need to know what the actual card clock is.  In order for
  * them to find out, we'll provide a clock through the common clock framework
  * for them to query.
  *
- * @sdhci_arasan:	Our private data structure.
- * @clk_xin:		Pointer to the functional clock
- * @dev:		Pointer to our struct device.
- * Returns 0 on success and error value on error
+ * Return: 0 on success and error value on error
  */
 static int
 sdhci_arasan_register_sampleclk(struct sdhci_arasan_data *sdhci_arasan,
@@ -1064,10 +1362,7 @@ sdhci_arasan_register_sampleclk(struct sdhci_arasan_data *sdhci_arasan,
 	sampleclk_init.parent_names = &parent_clk_name;
 	sampleclk_init.num_parents = 1;
 	sampleclk_init.flags = CLK_GET_RATE_NOCACHE;
-	if (of_device_is_compatible(np, "xlnx,zynqmp-8.9a"))
-		sampleclk_init.ops = &zynqmp_sampleclk_ops;
-	else
-		sampleclk_init.ops = &arasan_sampleclk_ops;
+	sampleclk_init.ops = sdhci_arasan->clk_ops->sampleclk_ops;
 
 	clk_data->sampleclk_hw.init = &sampleclk_init;
 	clk_data->sampleclk =
@@ -1085,10 +1380,10 @@ sdhci_arasan_register_sampleclk(struct sdhci_arasan_data *sdhci_arasan,
 /**
  * sdhci_arasan_unregister_sdclk - Undoes sdhci_arasan_register_sdclk()
  *
+ * @dev:		Pointer to our struct device.
+ *
  * Should be called any time we're exiting and sdhci_arasan_register_sdclk()
  * returned success.
- *
- * @dev:		Pointer to our struct device.
  */
 static void sdhci_arasan_unregister_sdclk(struct device *dev)
 {
@@ -1101,8 +1396,46 @@ static void sdhci_arasan_unregister_sdclk(struct device *dev)
 }
 
 /**
+ * sdhci_arasan_update_support64b - Set SUPPORT_64B (64-bit System Bus Support)
+ *
+ * This should be set based on the System Address Bus.
+ * 0: the Core supports only 32-bit System Address Bus.
+ * 1: the Core supports 64-bit System Address Bus.
+ *
+ * NOTES:
+ * - For Keem Bay, it is required to clear this bit. Its default value is 1'b1.
+ *   Keem Bay does not support 64-bit access.
+ *
+ * @host		The sdhci_host
+ */
+static void sdhci_arasan_update_support64b(struct sdhci_host *host, u32 value)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
+	const struct sdhci_arasan_soc_ctl_map *soc_ctl_map =
+		sdhci_arasan->soc_ctl_map;
+
+	/* Having a map is optional */
+	if (!soc_ctl_map)
+		return;
+
+	/* If we have a map, we expect to have a syscon */
+	if (!sdhci_arasan->soc_ctl_base) {
+		pr_warn("%s: Have regmap, but no soc-ctl-syscon\n",
+			mmc_hostname(host->mmc));
+		return;
+	}
+
+	sdhci_arasan_syscon_write(host, &soc_ctl_map->support64b, value);
+}
+
+/**
  * sdhci_arasan_register_sdclk - Register the sdcardclk for a PHY to use
  *
+ * @sdhci_arasan:	Our private data structure.
+ * @clk_xin:		Pointer to the functional clock
+ * @dev:		Pointer to our struct device.
+ *
  * Some PHY devices need to know what the actual card clock is.  In order for
  * them to find out, we'll provide a clock through the common clock framework
  * for them to query.
@@ -1115,10 +1448,7 @@ static void sdhci_arasan_unregister_sdclk(struct device *dev)
  * to create nice clean device tree bindings and later (if needed) we can try
  * re-architecting SDHCI if we see some benefit to it.
  *
- * @sdhci_arasan:	Our private data structure.
- * @clk_xin:		Pointer to the functional clock
- * @dev:		Pointer to our struct device.
- * Returns 0 on success and error value on error
+ * Return: 0 on success and error value on error
  */
 static int sdhci_arasan_register_sdclk(struct sdhci_arasan_data *sdhci_arasan,
 				       struct clk *clk_xin,
@@ -1215,6 +1545,7 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
 	sdhci_arasan->host = host;
 
 	sdhci_arasan->soc_ctl_map = data->soc_ctl_map;
+	sdhci_arasan->clk_ops = data->clk_ops;
 
 	node = of_parse_phandle(pdev->dev.of_node, "arasan,soc-ctl-syscon", 0);
 	if (node) {
@@ -1270,6 +1601,15 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
 				    "rockchip,rk3399-sdhci-5.1"))
 		sdhci_arasan_update_clockmultiplier(host, 0x0);
 
+	if (of_device_is_compatible(np, "intel,keembay-sdhci-5.1-emmc") ||
+	    of_device_is_compatible(np, "intel,keembay-sdhci-5.1-sd") ||
+	    of_device_is_compatible(np, "intel,keembay-sdhci-5.1-sdio")) {
+		sdhci_arasan_update_clockmultiplier(host, 0x0);
+		sdhci_arasan_update_support64b(host, 0x0);
+
+		host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
+	}
+
 	sdhci_arasan_update_baseclkfreq(host);
 
 	ret = sdhci_arasan_register_sdclk(sdhci_arasan, clk_xin, &pdev->dev);
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index c79bff5e2280..1ece2c50042c 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -6,6 +6,7 @@
  *		 2015 Ludovic Desroches <ludovic.desroches@atmel.com>
  */
 
+#include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/err.h>
@@ -120,9 +121,12 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask)
 	    || mmc_gpio_get_cd(host->mmc) >= 0)
 		sdhci_at91_set_force_card_detect(host);
 
-	if (priv->cal_always_on && (mask & SDHCI_RESET_ALL))
-		sdhci_writel(host, SDMMC_CALCR_ALWYSON | SDMMC_CALCR_EN,
+	if (priv->cal_always_on && (mask & SDHCI_RESET_ALL)) {
+		u32 calcr = sdhci_readl(host, SDMMC_CALCR);
+
+		sdhci_writel(host, calcr | SDMMC_CALCR_ALWYSON | SDMMC_CALCR_EN,
 			     SDMMC_CALCR);
+	}
 }
 
 static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
@@ -179,9 +183,9 @@ static int sdhci_at91_set_clks_presets(struct device *dev)
 	clk_mul = gck_rate / clk_base_rate - 1;
 
 	caps0 &= ~SDHCI_CLOCK_V3_BASE_MASK;
-	caps0 |= (clk_base << SDHCI_CLOCK_BASE_SHIFT) & SDHCI_CLOCK_V3_BASE_MASK;
+	caps0 |= FIELD_PREP(SDHCI_CLOCK_V3_BASE_MASK, clk_base);
 	caps1 &= ~SDHCI_CLOCK_MUL_MASK;
-	caps1 |= (clk_mul << SDHCI_CLOCK_MUL_SHIFT) & SDHCI_CLOCK_MUL_MASK;
+	caps1 |= FIELD_PREP(SDHCI_CLOCK_MUL_MASK, clk_mul);
 	/* Set capabilities in r/w mode. */
 	writel(SDMMC_CACR_KEY | SDMMC_CACR_CAPWREN, host->ioaddr + SDMMC_CACR);
 	writel(caps0, host->ioaddr + SDHCI_CAPABILITIES);
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index a5137845a1c7..64ac0dbee95c 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -16,6 +16,9 @@
 
 #include "sdhci-pltfm.h"
 
+/* DWCMSHC specific Mode Select value */
+#define DWCMSHC_CTRL_HS400		0x7
+
 #define BOUNDARY_OK(addr, len) \
 	((addr | (SZ_128M - 1)) == ((addr + len - 1) | (SZ_128M - 1)))
 
@@ -46,10 +49,36 @@ static void dwcmshc_adma_write_desc(struct sdhci_host *host, void **desc,
 	sdhci_adma_write_desc(host, desc, addr, len, cmd);
 }
 
+static void dwcmshc_set_uhs_signaling(struct sdhci_host *host,
+				      unsigned int timing)
+{
+	u16 ctrl_2;
+
+	ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+	/* Select Bus Speed Mode for host */
+	ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+	if ((timing == MMC_TIMING_MMC_HS200) ||
+	    (timing == MMC_TIMING_UHS_SDR104))
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
+	else if (timing == MMC_TIMING_UHS_SDR12)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
+	else if ((timing == MMC_TIMING_UHS_SDR25) ||
+		 (timing == MMC_TIMING_MMC_HS))
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
+	else if (timing == MMC_TIMING_UHS_SDR50)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR50;
+	else if ((timing == MMC_TIMING_UHS_DDR50) ||
+		 (timing == MMC_TIMING_MMC_DDR52))
+		ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
+	else if (timing == MMC_TIMING_MMC_HS400)
+		ctrl_2 |= DWCMSHC_CTRL_HS400;
+	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+}
+
 static const struct sdhci_ops sdhci_dwcmshc_ops = {
 	.set_clock		= sdhci_set_clock,
 	.set_bus_width		= sdhci_set_bus_width,
-	.set_uhs_signaling	= sdhci_set_uhs_signaling,
+	.set_uhs_signaling	= dwcmshc_set_uhs_signaling,
 	.get_max_clock		= sdhci_pltfm_clk_get_max_clock,
 	.reset			= sdhci_reset,
 	.adma_write_desc	= dwcmshc_adma_write_desc,
@@ -134,6 +163,48 @@ static int dwcmshc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int dwcmshc_suspend(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	int ret;
+
+	ret = sdhci_suspend_host(host);
+	if (ret)
+		return ret;
+
+	clk_disable_unprepare(pltfm_host->clk);
+	if (!IS_ERR(priv->bus_clk))
+		clk_disable_unprepare(priv->bus_clk);
+
+	return ret;
+}
+
+static int dwcmshc_resume(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	int ret;
+
+	ret = clk_prepare_enable(pltfm_host->clk);
+	if (ret)
+		return ret;
+
+	if (!IS_ERR(priv->bus_clk)) {
+		ret = clk_prepare_enable(priv->bus_clk);
+		if (ret)
+			return ret;
+	}
+
+	return sdhci_resume_host(host);
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(dwcmshc_pmops, dwcmshc_suspend, dwcmshc_resume);
+
 static const struct of_device_id sdhci_dwcmshc_dt_ids[] = {
 	{ .compatible = "snps,dwcmshc-sdhci" },
 	{}
@@ -144,6 +215,7 @@ static struct platform_driver sdhci_dwcmshc_driver = {
 	.driver	= {
 		.name	= "sdhci-dwcmshc",
 		.of_match_table = sdhci_dwcmshc_dt_ids,
+		.pm = &dwcmshc_pmops,
 	},
 	.probe	= dwcmshc_probe,
 	.remove	= dwcmshc_remove,
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 5d8dd870bd44..7c73d243dc6c 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -1135,6 +1135,40 @@ static int esdhc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 static void esdhc_set_uhs_signaling(struct sdhci_host *host,
 				   unsigned int timing)
 {
+	u32 val;
+
+	/*
+	 * There are specific registers setting for HS400 mode.
+	 * Clean all of them if controller is in HS400 mode to
+	 * exit HS400 mode before re-setting any speed mode.
+	 */
+	val = sdhci_readl(host, ESDHC_TBCTL);
+	if (val & ESDHC_HS400_MODE) {
+		val = sdhci_readl(host, ESDHC_SDTIMNGCTL);
+		val &= ~ESDHC_FLW_CTL_BG;
+		sdhci_writel(host, val, ESDHC_SDTIMNGCTL);
+
+		val = sdhci_readl(host, ESDHC_SDCLKCTL);
+		val &= ~ESDHC_CMD_CLK_CTL;
+		sdhci_writel(host, val, ESDHC_SDCLKCTL);
+
+		esdhc_clock_enable(host, false);
+		val = sdhci_readl(host, ESDHC_TBCTL);
+		val &= ~ESDHC_HS400_MODE;
+		sdhci_writel(host, val, ESDHC_TBCTL);
+		esdhc_clock_enable(host, true);
+
+		val = sdhci_readl(host, ESDHC_DLLCFG0);
+		val &= ~(ESDHC_DLL_ENABLE | ESDHC_DLL_FREQ_SEL);
+		sdhci_writel(host, val, ESDHC_DLLCFG0);
+
+		val = sdhci_readl(host, ESDHC_TBCTL);
+		val &= ~ESDHC_HS400_WNDW_ADJUST;
+		sdhci_writel(host, val, ESDHC_TBCTL);
+
+		esdhc_tuning_block_enable(host, false);
+	}
+
 	if (timing == MMC_TIMING_MMC_HS400)
 		esdhc_tuning_block_enable(host, true);
 	else
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 2527244c2ae1..bb6802448b2f 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -249,12 +249,8 @@ static int ricoh_probe(struct sdhci_pci_chip *chip)
 static int ricoh_mmc_probe_slot(struct sdhci_pci_slot *slot)
 {
 	slot->host->caps =
-		((0x21 << SDHCI_TIMEOUT_CLK_SHIFT)
-			& SDHCI_TIMEOUT_CLK_MASK) |
-
-		((0x21 << SDHCI_CLOCK_BASE_SHIFT)
-			& SDHCI_CLOCK_BASE_MASK) |
-
+		FIELD_PREP(SDHCI_TIMEOUT_CLK_MASK, 0x21) |
+		FIELD_PREP(SDHCI_CLOCK_BASE_MASK, 0x21) |
 		SDHCI_TIMEOUT_CLK_UNIT |
 		SDHCI_CAN_VDD_330 |
 		SDHCI_CAN_DO_HISPD |
@@ -1749,6 +1745,7 @@ static const struct pci_device_id pci_ids[] = {
 	SDHCI_PCI_DEVICE(SYNOPSYS, DWC_MSHC, snps),
 	SDHCI_PCI_DEVICE(GLI, 9750, gl9750),
 	SDHCI_PCI_DEVICE(GLI, 9755, gl9755),
+	SDHCI_PCI_DEVICE(GLI, 9763E, gl9763e),
 	SDHCI_PCI_DEVICE_CLASS(AMD, SYSTEM_SDHCI, PCI_CLASS_MASK, amd),
 	/* Generic SD host controller */
 	{PCI_DEVICE_CLASS(SYSTEM_SDHCI, PCI_CLASS_MASK)},
diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
index fd76aa672e02..ca0166d9bf82 100644
--- a/drivers/mmc/host/sdhci-pci-gli.c
+++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -63,6 +63,19 @@
 #define   SDHCI_GLI_9750_TUNING_PARAMETERS_RX_DLY    GENMASK(2, 0)
 #define   GLI_9750_TUNING_PARAMETERS_RX_DLY_VALUE    0x1
 
+#define SDHCI_GLI_9763E_CTRL_HS400  0x7
+
+#define SDHCI_GLI_9763E_HS400_ES_REG      0x52C
+#define   SDHCI_GLI_9763E_HS400_ES_BIT      BIT(8)
+
+#define PCIE_GLI_9763E_VHS	 0x884
+#define   GLI_9763E_VHS_REV	   GENMASK(19, 16)
+#define   GLI_9763E_VHS_REV_R      0x0
+#define   GLI_9763E_VHS_REV_M      0x1
+#define   GLI_9763E_VHS_REV_W      0x2
+#define PCIE_GLI_9763E_SCR	 0x8E0
+#define   GLI_9763E_SCR_AXI_REQ	   BIT(9)
+
 #define GLI_MAX_TUNING_LOOP 40
 
 /* Genesys Logic chipset */
@@ -351,6 +364,81 @@ static int sdhci_pci_gli_resume(struct sdhci_pci_chip *chip)
 }
 #endif
 
+static void gl9763e_hs400_enhanced_strobe(struct mmc_host *mmc,
+					  struct mmc_ios *ios)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	u32 val;
+
+	val = sdhci_readl(host, SDHCI_GLI_9763E_HS400_ES_REG);
+	if (ios->enhanced_strobe)
+		val |= SDHCI_GLI_9763E_HS400_ES_BIT;
+	else
+		val &= ~SDHCI_GLI_9763E_HS400_ES_BIT;
+
+	sdhci_writel(host, val, SDHCI_GLI_9763E_HS400_ES_REG);
+}
+
+static void sdhci_set_gl9763e_signaling(struct sdhci_host *host,
+					unsigned int timing)
+{
+	u16 ctrl_2;
+
+	ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+	ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+	if (timing == MMC_TIMING_MMC_HS200)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
+	else if (timing == MMC_TIMING_MMC_HS)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
+	else if (timing == MMC_TIMING_MMC_DDR52)
+		ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
+	else if (timing == MMC_TIMING_MMC_HS400)
+		ctrl_2 |= SDHCI_GLI_9763E_CTRL_HS400;
+
+	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+}
+
+static void gli_set_gl9763e(struct sdhci_pci_slot *slot)
+{
+	struct pci_dev *pdev = slot->chip->pdev;
+	u32 value;
+
+	pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
+	value &= ~GLI_9763E_VHS_REV;
+	value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_W);
+	pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value);
+
+	pci_read_config_dword(pdev, PCIE_GLI_9763E_SCR, &value);
+	value |= GLI_9763E_SCR_AXI_REQ;
+	pci_write_config_dword(pdev, PCIE_GLI_9763E_SCR, value);
+
+	pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
+	value &= ~GLI_9763E_VHS_REV;
+	value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_R);
+	pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value);
+}
+
+static int gli_probe_slot_gl9763e(struct sdhci_pci_slot *slot)
+{
+	struct sdhci_host *host = slot->host;
+
+	host->mmc->caps |= MMC_CAP_8_BIT_DATA |
+			   MMC_CAP_1_8V_DDR |
+			   MMC_CAP_NONREMOVABLE;
+	host->mmc->caps2 |= MMC_CAP2_HS200_1_8V_SDR |
+			    MMC_CAP2_HS400_1_8V |
+			    MMC_CAP2_HS400_ES |
+			    MMC_CAP2_NO_SDIO |
+			    MMC_CAP2_NO_SD;
+	gli_pcie_enable_msi(slot);
+	host->mmc_host_ops.hs400_enhanced_strobe =
+					gl9763e_hs400_enhanced_strobe;
+	gli_set_gl9763e(slot);
+	sdhci_enable_v4_mode(host);
+
+	return 0;
+}
+
 static const struct sdhci_ops sdhci_gl9755_ops = {
 	.set_clock		= sdhci_set_clock,
 	.enable_dma		= sdhci_pci_enable_dma,
@@ -390,3 +478,21 @@ const struct sdhci_pci_fixes sdhci_gl9750 = {
 	.resume         = sdhci_pci_gli_resume,
 #endif
 };
+
+static const struct sdhci_ops sdhci_gl9763e_ops = {
+	.set_clock		= sdhci_set_clock,
+	.enable_dma		= sdhci_pci_enable_dma,
+	.set_bus_width		= sdhci_set_bus_width,
+	.reset			= sdhci_reset,
+	.set_uhs_signaling	= sdhci_set_gl9763e_signaling,
+	.voltage_switch		= sdhci_gli_voltage_switch,
+};
+
+const struct sdhci_pci_fixes sdhci_gl9763e = {
+	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+	.probe_slot	= gli_probe_slot_gl9763e,
+	.ops            = &sdhci_gl9763e_ops,
+#ifdef CONFIG_PM_SLEEP
+	.resume         = sdhci_pci_gli_resume,
+#endif
+};
diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index fa8105087d68..e2a846885902 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -494,7 +494,7 @@ static void sdhci_o2_enable_clk(struct sdhci_host *host, u16 clk)
 	}
 }
 
-void sdhci_pci_o2_set_clock(struct sdhci_host *host, unsigned int clock)
+static void sdhci_pci_o2_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	u16 clk;
 
@@ -509,7 +509,7 @@ void sdhci_pci_o2_set_clock(struct sdhci_host *host, unsigned int clock)
 	sdhci_o2_enable_clk(host, clk);
 }
 
-int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
+static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
 {
 	struct sdhci_pci_chip *chip;
 	struct sdhci_host *host;
@@ -578,7 +578,7 @@ int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
 	return 0;
 }
 
-int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
+static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
 {
 	int ret;
 	u8 scratch;
@@ -783,7 +783,7 @@ int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
 }
 
 #ifdef CONFIG_PM_SLEEP
-int sdhci_pci_o2_resume(struct sdhci_pci_chip *chip)
+static int sdhci_pci_o2_resume(struct sdhci_pci_chip *chip)
 {
 	sdhci_pci_o2_probe(chip);
 	return sdhci_pci_resume_host(chip);
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index 42ccd123b046..d0ed232af0eb 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -72,6 +72,7 @@
 
 #define PCI_DEVICE_ID_GLI_9755		0x9755
 #define PCI_DEVICE_ID_GLI_9750		0x9750
+#define PCI_DEVICE_ID_GLI_9763E		0xe763
 
 /*
  * PCI device class and mask
@@ -195,5 +196,6 @@ extern const struct sdhci_pci_fixes sdhci_snps;
 extern const struct sdhci_pci_fixes sdhci_o2;
 extern const struct sdhci_pci_fixes sdhci_gl9750;
 extern const struct sdhci_pci_fixes sdhci_gl9755;
+extern const struct sdhci_pci_fixes sdhci_gl9763e;
 
 #endif /* __SDHCI_PCI_H */
diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
index 2ab42c59e4f8..a910cb461ed7 100644
--- a/drivers/mmc/host/sdhci-sprd.c
+++ b/drivers/mmc/host/sdhci-sprd.c
@@ -406,7 +406,8 @@ static struct sdhci_ops sdhci_sprd_ops = {
 	.request_done = sdhci_sprd_request_done,
 };
 
-static void sdhci_sprd_request(struct mmc_host *mmc, struct mmc_request *mrq)
+static void sdhci_sprd_check_auto_cmd23(struct mmc_host *mmc,
+					struct mmc_request *mrq)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
 	struct sdhci_sprd_host *sprd_host = TO_SPRD_HOST(host);
@@ -422,10 +423,23 @@ static void sdhci_sprd_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	    mrq->sbc && (mrq->sbc->arg & SDHCI_SPRD_ARG2_STUFF) &&
 	    (host->flags & SDHCI_AUTO_CMD23))
 		host->flags &= ~SDHCI_AUTO_CMD23;
+}
+
+static void sdhci_sprd_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	sdhci_sprd_check_auto_cmd23(mmc, mrq);
 
 	sdhci_request(mmc, mrq);
 }
 
+static int sdhci_sprd_request_atomic(struct mmc_host *mmc,
+				      struct mmc_request *mrq)
+{
+	sdhci_sprd_check_auto_cmd23(mmc, mrq);
+
+	return sdhci_request_atomic(mmc, mrq);
+}
+
 static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
@@ -434,7 +448,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	if (!IS_ERR(mmc->supply.vqmmc)) {
 		ret = mmc_regulator_set_vqmmc(mmc, ios);
-		if (ret) {
+		if (ret < 0) {
 			pr_err("%s: Switching signalling voltage failed\n",
 			       mmc_hostname(mmc));
 			return ret;
@@ -556,11 +570,17 @@ static int sdhci_sprd_probe(struct platform_device *pdev)
 		sdhci_sprd_voltage_switch;
 
 	host->mmc->caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED |
-		MMC_CAP_ERASE | MMC_CAP_CMD23 | MMC_CAP_WAIT_WHILE_BUSY;
+		MMC_CAP_WAIT_WHILE_BUSY;
+
 	ret = mmc_of_parse(host->mmc);
 	if (ret)
 		goto pltfm_free;
 
+	if (!mmc_card_is_removable(host->mmc))
+		host->mmc_host_ops.request_atomic = sdhci_sprd_request_atomic;
+	else
+		host->always_defer_done = true;
+
 	sprd_host = TO_SPRD_HOST(host);
 	sdhci_sprd_phy_param_parse(sprd_host, pdev->dev.of_node);
 
@@ -654,8 +674,6 @@ static int sdhci_sprd_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_cleanup_host;
 
-	host->always_defer_done = true;
-
 	ret = __sdhci_add_host(host);
 	if (ret)
 		goto err_cleanup_host;
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 3e2c5101291d..3a372ab3d12e 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -605,6 +605,39 @@ static void tegra_sdhci_parse_pad_autocal_dt(struct sdhci_host *host)
 		autocal->pull_down_1v8 = 0;
 
 	err = device_property_read_u32(host->mmc->parent,
+			"nvidia,pad-autocal-pull-up-offset-sdr104",
+			&autocal->pull_up_sdr104);
+	if (err)
+		autocal->pull_up_sdr104 = autocal->pull_up_1v8;
+
+	err = device_property_read_u32(host->mmc->parent,
+			"nvidia,pad-autocal-pull-down-offset-sdr104",
+			&autocal->pull_down_sdr104);
+	if (err)
+		autocal->pull_down_sdr104 = autocal->pull_down_1v8;
+
+	err = device_property_read_u32(host->mmc->parent,
+			"nvidia,pad-autocal-pull-up-offset-hs400",
+			&autocal->pull_up_hs400);
+	if (err)
+		autocal->pull_up_hs400 = autocal->pull_up_1v8;
+
+	err = device_property_read_u32(host->mmc->parent,
+			"nvidia,pad-autocal-pull-down-offset-hs400",
+			&autocal->pull_down_hs400);
+	if (err)
+		autocal->pull_down_hs400 = autocal->pull_down_1v8;
+
+	/*
+	 * Different fail-safe drive strength values based on the signaling
+	 * voltage are applicable for SoCs supporting 3V3 and 1V8 pad controls.
+	 * So, avoid reading below device tree properties for SoCs that don't
+	 * have NVQUIRK_NEEDS_PAD_CONTROL.
+	 */
+	if (!(tegra_host->soc_data->nvquirks & NVQUIRK_NEEDS_PAD_CONTROL))
+		return;
+
+	err = device_property_read_u32(host->mmc->parent,
 			"nvidia,pad-autocal-pull-up-offset-3v3-timeout",
 			&autocal->pull_up_3v3_timeout);
 	if (err) {
@@ -647,30 +680,6 @@ static void tegra_sdhci_parse_pad_autocal_dt(struct sdhci_host *host)
 				mmc_hostname(host->mmc));
 		autocal->pull_down_1v8_timeout = 0;
 	}
-
-	err = device_property_read_u32(host->mmc->parent,
-			"nvidia,pad-autocal-pull-up-offset-sdr104",
-			&autocal->pull_up_sdr104);
-	if (err)
-		autocal->pull_up_sdr104 = autocal->pull_up_1v8;
-
-	err = device_property_read_u32(host->mmc->parent,
-			"nvidia,pad-autocal-pull-down-offset-sdr104",
-			&autocal->pull_down_sdr104);
-	if (err)
-		autocal->pull_down_sdr104 = autocal->pull_down_1v8;
-
-	err = device_property_read_u32(host->mmc->parent,
-			"nvidia,pad-autocal-pull-up-offset-hs400",
-			&autocal->pull_up_hs400);
-	if (err)
-		autocal->pull_up_hs400 = autocal->pull_up_1v8;
-
-	err = device_property_read_u32(host->mmc->parent,
-			"nvidia,pad-autocal-pull-down-offset-hs400",
-			&autocal->pull_down_hs400);
-	if (err)
-		autocal->pull_down_hs400 = autocal->pull_down_1v8;
 }
 
 static void tegra_sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index e368f2dabf20..37b1158c1c0c 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -48,10 +48,10 @@
 static unsigned int debug_quirks = 0;
 static unsigned int debug_quirks2;
 
-static void sdhci_finish_data(struct sdhci_host *);
-
 static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable);
 
+static bool sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd);
+
 void sdhci_dumpregs(struct sdhci_host *host)
 {
 	SDHCI_DUMP("============ SDHCI REGISTER DUMP ===========\n");
@@ -111,6 +111,9 @@ void sdhci_dumpregs(struct sdhci_host *host)
 		}
 	}
 
+	if (host->ops->dump_vendor_regs)
+		host->ops->dump_vendor_regs(host);
+
 	SDHCI_DUMP("============================================\n");
 }
 EXPORT_SYMBOL_GPL(sdhci_dumpregs);
@@ -317,6 +320,7 @@ out:
 static void sdhci_init(struct sdhci_host *host, int soft)
 {
 	struct mmc_host *mmc = host->mmc;
+	unsigned long flags;
 
 	if (soft)
 		sdhci_do_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
@@ -326,7 +330,9 @@ static void sdhci_init(struct sdhci_host *host, int soft)
 	if (host->v4_mode)
 		sdhci_do_enable_v4_mode(host);
 
+	spin_lock_irqsave(&host->lock, flags);
 	sdhci_set_default_irqs(host);
+	spin_unlock_irqrestore(&host->lock, flags);
 
 	host->cqe_on = false;
 
@@ -634,9 +640,13 @@ static int sdhci_pre_dma_transfer(struct sdhci_host *host,
 		}
 		if (mmc_get_dma_dir(data) == DMA_TO_DEVICE) {
 			/* Copy the data to the bounce buffer */
-			sg_copy_to_buffer(data->sg, data->sg_len,
-					  host->bounce_buffer,
-					  length);
+			if (host->ops->copy_to_bounce_buffer) {
+				host->ops->copy_to_bounce_buffer(host,
+								 data, length);
+			} else {
+				sg_copy_to_buffer(data->sg, data->sg_len,
+						  host->bounce_buffer, length);
+			}
 		}
 		/* Switch ownership to the DMA */
 		dma_sync_single_for_device(host->mmc->parent,
@@ -1350,13 +1360,25 @@ static inline bool sdhci_auto_cmd12(struct sdhci_host *host,
 	       !mrq->cap_cmd_during_tfr;
 }
 
+static inline bool sdhci_auto_cmd23(struct sdhci_host *host,
+				    struct mmc_request *mrq)
+{
+	return mrq->sbc && (host->flags & SDHCI_AUTO_CMD23);
+}
+
+static inline bool sdhci_manual_cmd23(struct sdhci_host *host,
+				      struct mmc_request *mrq)
+{
+	return mrq->sbc && !(host->flags & SDHCI_AUTO_CMD23);
+}
+
 static inline void sdhci_auto_cmd_select(struct sdhci_host *host,
 					 struct mmc_command *cmd,
 					 u16 *mode)
 {
 	bool use_cmd12 = sdhci_auto_cmd12(host, cmd->mrq) &&
 			 (cmd->opcode != SD_IO_RW_EXTENDED);
-	bool use_cmd23 = cmd->mrq->sbc && (host->flags & SDHCI_AUTO_CMD23);
+	bool use_cmd23 = sdhci_auto_cmd23(host, cmd->mrq);
 	u16 ctrl2;
 
 	/*
@@ -1416,7 +1438,7 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host,
 	if (mmc_op_multi(cmd->opcode) || data->blocks > 1) {
 		mode = SDHCI_TRNS_BLK_CNT_EN | SDHCI_TRNS_MULTI;
 		sdhci_auto_cmd_select(host, cmd, &mode);
-		if (cmd->mrq->sbc && (host->flags & SDHCI_AUTO_CMD23))
+		if (sdhci_auto_cmd23(host, cmd->mrq))
 			sdhci_writel(host, cmd->mrq->sbc->arg, SDHCI_ARGUMENT2);
 	}
 
@@ -1466,6 +1488,9 @@ static void __sdhci_finish_mrq(struct sdhci_host *host, struct mmc_request *mrq)
 	if (host->data_cmd && host->data_cmd->mrq == mrq)
 		host->data_cmd = NULL;
 
+	if (host->deferred_cmd && host->deferred_cmd->mrq == mrq)
+		host->deferred_cmd = NULL;
+
 	if (host->data && host->data->mrq == mrq)
 		host->data = NULL;
 
@@ -1487,7 +1512,7 @@ static void sdhci_finish_mrq(struct sdhci_host *host, struct mmc_request *mrq)
 	queue_work(host->complete_wq, &host->complete_work);
 }
 
-static void sdhci_finish_data(struct sdhci_host *host)
+static void __sdhci_finish_data(struct sdhci_host *host, bool sw_data_timeout)
 {
 	struct mmc_command *data_cmd = host->data_cmd;
 	struct mmc_data *data = host->data;
@@ -1539,14 +1564,31 @@ static void sdhci_finish_data(struct sdhci_host *host)
 		} else {
 			/* Avoid triggering warning in sdhci_send_command() */
 			host->cmd = NULL;
-			sdhci_send_command(host, data->stop);
+			if (!sdhci_send_command(host, data->stop)) {
+				if (sw_data_timeout) {
+					/*
+					 * This is anyway a sw data timeout, so
+					 * give up now.
+					 */
+					data->stop->error = -EIO;
+					__sdhci_finish_mrq(host, data->mrq);
+				} else {
+					WARN_ON(host->deferred_cmd);
+					host->deferred_cmd = data->stop;
+				}
+			}
 		}
 	} else {
 		__sdhci_finish_mrq(host, data->mrq);
 	}
 }
 
-void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
+static void sdhci_finish_data(struct sdhci_host *host)
+{
+	__sdhci_finish_data(host, false);
+}
+
+static bool sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 {
 	int flags;
 	u32 mask;
@@ -1561,9 +1603,6 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 	    cmd->opcode == MMC_STOP_TRANSMISSION)
 		cmd->flags |= MMC_RSP_BUSY;
 
-	/* Wait max 10 ms */
-	timeout = 10;
-
 	mask = SDHCI_CMD_INHIBIT;
 	if (sdhci_data_line_cmd(cmd))
 		mask |= SDHCI_DATA_INHIBIT;
@@ -1573,18 +1612,8 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 	if (cmd->mrq->data && (cmd == cmd->mrq->data->stop))
 		mask &= ~SDHCI_DATA_INHIBIT;
 
-	while (sdhci_readl(host, SDHCI_PRESENT_STATE) & mask) {
-		if (timeout == 0) {
-			pr_err("%s: Controller never released inhibit bit(s).\n",
-			       mmc_hostname(host->mmc));
-			sdhci_dumpregs(host);
-			cmd->error = -EIO;
-			sdhci_finish_mrq(host, cmd->mrq);
-			return;
-		}
-		timeout--;
-		mdelay(1);
-	}
+	if (sdhci_readl(host, SDHCI_PRESENT_STATE) & mask)
+		return false;
 
 	host->cmd = cmd;
 	host->data_timeout = 0;
@@ -1606,11 +1635,13 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 	sdhci_set_transfer_mode(host, cmd);
 
 	if ((cmd->flags & MMC_RSP_136) && (cmd->flags & MMC_RSP_BUSY)) {
-		pr_err("%s: Unsupported response type!\n",
-			mmc_hostname(host->mmc));
-		cmd->error = -EINVAL;
-		sdhci_finish_mrq(host, cmd->mrq);
-		return;
+		WARN_ONCE(1, "Unsupported response type!\n");
+		/*
+		 * This does not happen in practice because 136-bit response
+		 * commands never have busy waiting, so rather than complicate
+		 * the error path, just remove busy waiting and continue.
+		 */
+		cmd->flags &= ~MMC_RSP_BUSY;
 	}
 
 	if (!(cmd->flags & MMC_RSP_PRESENT))
@@ -1645,8 +1676,61 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 		sdhci_external_dma_pre_transfer(host, cmd);
 
 	sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
+
+	return true;
+}
+
+static bool sdhci_present_error(struct sdhci_host *host,
+				struct mmc_command *cmd, bool present)
+{
+	if (!present || host->flags & SDHCI_DEVICE_DEAD) {
+		cmd->error = -ENOMEDIUM;
+		return true;
+	}
+
+	return false;
+}
+
+static bool sdhci_send_command_retry(struct sdhci_host *host,
+				     struct mmc_command *cmd,
+				     unsigned long flags)
+	__releases(host->lock)
+	__acquires(host->lock)
+{
+	struct mmc_command *deferred_cmd = host->deferred_cmd;
+	int timeout = 10; /* Approx. 10 ms */
+	bool present;
+
+	while (!sdhci_send_command(host, cmd)) {
+		if (!timeout--) {
+			pr_err("%s: Controller never released inhibit bit(s).\n",
+			       mmc_hostname(host->mmc));
+			sdhci_dumpregs(host);
+			cmd->error = -EIO;
+			return false;
+		}
+
+		spin_unlock_irqrestore(&host->lock, flags);
+
+		usleep_range(1000, 1250);
+
+		present = host->mmc->ops->get_cd(host->mmc);
+
+		spin_lock_irqsave(&host->lock, flags);
+
+		/* A deferred command might disappear, handle that */
+		if (cmd == deferred_cmd && cmd != host->deferred_cmd)
+			return true;
+
+		if (sdhci_present_error(host, cmd, present))
+			return false;
+	}
+
+	if (cmd == host->deferred_cmd)
+		host->deferred_cmd = NULL;
+
+	return true;
 }
-EXPORT_SYMBOL_GPL(sdhci_send_command);
 
 static void sdhci_read_rsp_136(struct sdhci_host *host, struct mmc_command *cmd)
 {
@@ -1707,7 +1791,10 @@ static void sdhci_finish_command(struct sdhci_host *host)
 
 	/* Finished CMD23, now send actual command. */
 	if (cmd == cmd->mrq->sbc) {
-		sdhci_send_command(host, cmd->mrq->cmd);
+		if (!sdhci_send_command(host, cmd->mrq->cmd)) {
+			WARN_ON(host->deferred_cmd);
+			host->deferred_cmd = cmd->mrq->cmd;
+		}
 	} else {
 
 		/* Processed actual command. */
@@ -2037,11 +2124,10 @@ EXPORT_SYMBOL_GPL(sdhci_set_power_and_bus_voltage);
 
 void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 {
-	struct sdhci_host *host;
-	int present;
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct mmc_command *cmd;
 	unsigned long flags;
-
-	host = mmc_priv(mmc);
+	bool present;
 
 	/* Firstly check card presence */
 	present = mmc->ops->get_cd(mmc);
@@ -2050,19 +2136,57 @@ void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
 	sdhci_led_activate(host);
 
-	if (!present || host->flags & SDHCI_DEVICE_DEAD) {
-		mrq->cmd->error = -ENOMEDIUM;
+	if (sdhci_present_error(host, mrq->cmd, present))
+		goto out_finish;
+
+	cmd = sdhci_manual_cmd23(host, mrq) ? mrq->sbc : mrq->cmd;
+
+	if (!sdhci_send_command_retry(host, cmd, flags))
+		goto out_finish;
+
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	return;
+
+out_finish:
+	sdhci_finish_mrq(host, mrq);
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+EXPORT_SYMBOL_GPL(sdhci_request);
+
+int sdhci_request_atomic(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct mmc_command *cmd;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	if (sdhci_present_error(host, mrq->cmd, true)) {
 		sdhci_finish_mrq(host, mrq);
-	} else {
-		if (mrq->sbc && !(host->flags & SDHCI_AUTO_CMD23))
-			sdhci_send_command(host, mrq->sbc);
-		else
-			sdhci_send_command(host, mrq->cmd);
+		goto out_finish;
 	}
 
+	cmd = sdhci_manual_cmd23(host, mrq) ? mrq->sbc : mrq->cmd;
+
+	/*
+	 * The HSQ may send a command in interrupt context without polling
+	 * the busy signaling, which means we should return BUSY if controller
+	 * has not released inhibit bits to allow HSQ trying to send request
+	 * again in non-atomic context. So we should not finish this request
+	 * here.
+	 */
+	if (!sdhci_send_command(host, cmd))
+		ret = -EBUSY;
+	else
+		sdhci_led_activate(host);
+
+out_finish:
 	spin_unlock_irqrestore(&host->lock, flags);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(sdhci_request);
+EXPORT_SYMBOL_GPL(sdhci_request_atomic);
 
 void sdhci_set_bus_width(struct sdhci_host *host, int width)
 {
@@ -2411,7 +2535,7 @@ int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
 
 		if (!IS_ERR(mmc->supply.vqmmc)) {
 			ret = mmc_regulator_set_vqmmc(mmc, ios);
-			if (ret) {
+			if (ret < 0) {
 				pr_warn("%s: Switching to 3.3V signalling voltage failed\n",
 					mmc_hostname(mmc));
 				return -EIO;
@@ -2434,7 +2558,7 @@ int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
 			return -EINVAL;
 		if (!IS_ERR(mmc->supply.vqmmc)) {
 			ret = mmc_regulator_set_vqmmc(mmc, ios);
-			if (ret) {
+			if (ret < 0) {
 				pr_warn("%s: Switching to 1.8V signalling voltage failed\n",
 					mmc_hostname(mmc));
 				return -EIO;
@@ -2466,7 +2590,7 @@ int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
 			return -EINVAL;
 		if (!IS_ERR(mmc->supply.vqmmc)) {
 			ret = mmc_regulator_set_vqmmc(mmc, ios);
-			if (ret) {
+			if (ret < 0) {
 				pr_warn("%s: Switching to 1.2V signalling voltage failed\n",
 					mmc_hostname(mmc));
 				return -EIO;
@@ -2600,7 +2724,11 @@ void sdhci_send_tuning(struct sdhci_host *host, u32 opcode)
 	 */
 	sdhci_writew(host, SDHCI_TRNS_READ, SDHCI_TRANSFER_MODE);
 
-	sdhci_send_command(host, &cmd);
+	if (!sdhci_send_command_retry(host, &cmd, flags)) {
+		spin_unlock_irqrestore(&host->lock, flags);
+		host->tuning_done = 0;
+		return;
+	}
 
 	host->cmd = NULL;
 
@@ -3018,7 +3146,7 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
 
 		if (host->data) {
 			host->data->error = -ETIMEDOUT;
-			sdhci_finish_data(host);
+			__sdhci_finish_data(host, true);
 			queue_work(host->complete_wq, &host->complete_work);
 		} else if (host->data_cmd) {
 			host->data_cmd->error = -ETIMEDOUT;
@@ -3390,6 +3518,9 @@ cont:
 		}
 	}
 out:
+	if (host->deferred_cmd)
+		result = IRQ_WAKE_THREAD;
+
 	spin_unlock(&host->lock);
 
 	/* Process mrqs ready for immediate completion */
@@ -3415,6 +3546,7 @@ out:
 static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
 {
 	struct sdhci_host *host = dev_id;
+	struct mmc_command *cmd;
 	unsigned long flags;
 	u32 isr;
 
@@ -3422,8 +3554,14 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
 		;
 
 	spin_lock_irqsave(&host->lock, flags);
+
 	isr = host->thread_isr;
 	host->thread_isr = 0;
+
+	cmd = host->deferred_cmd;
+	if (cmd && !sdhci_send_command_retry(host, cmd, flags))
+		sdhci_finish_mrq(host, cmd->mrq);
+
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	if (isr & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
@@ -4114,11 +4252,9 @@ int sdhci_setup_host(struct sdhci_host *host)
 	}
 
 	if (host->version >= SDHCI_SPEC_300)
-		host->max_clk = (host->caps & SDHCI_CLOCK_V3_BASE_MASK)
-			>> SDHCI_CLOCK_BASE_SHIFT;
+		host->max_clk = FIELD_GET(SDHCI_CLOCK_V3_BASE_MASK, host->caps);
 	else
-		host->max_clk = (host->caps & SDHCI_CLOCK_BASE_MASK)
-			>> SDHCI_CLOCK_BASE_SHIFT;
+		host->max_clk = FIELD_GET(SDHCI_CLOCK_BASE_MASK, host->caps);
 
 	host->max_clk *= 1000000;
 	if (host->max_clk == 0 || host->quirks &
@@ -4136,8 +4272,7 @@ int sdhci_setup_host(struct sdhci_host *host)
 	 * In case of Host Controller v3.00, find out whether clock
 	 * multiplier is supported.
 	 */
-	host->clk_mul = (host->caps1 & SDHCI_CLOCK_MUL_MASK) >>
-			SDHCI_CLOCK_MUL_SHIFT;
+	host->clk_mul = FIELD_GET(SDHCI_CLOCK_MUL_MASK, host->caps1);
 
 	/*
 	 * In case the value in Clock Multiplier is 0, then programmable
@@ -4170,8 +4305,7 @@ int sdhci_setup_host(struct sdhci_host *host)
 		mmc->f_max = max_clk;
 
 	if (!(host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)) {
-		host->timeout_clk = (host->caps & SDHCI_TIMEOUT_CLK_MASK) >>
-					SDHCI_TIMEOUT_CLK_SHIFT;
+		host->timeout_clk = FIELD_GET(SDHCI_TIMEOUT_CLK_MASK, host->caps);
 
 		if (host->caps & SDHCI_TIMEOUT_CLK_UNIT)
 			host->timeout_clk *= 1000;
@@ -4201,7 +4335,7 @@ int sdhci_setup_host(struct sdhci_host *host)
 	    !host->ops->get_max_timeout_count)
 		mmc->max_busy_timeout = 0;
 
-	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
+	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_CMD23;
 	mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
 
 	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
@@ -4323,8 +4457,8 @@ int sdhci_setup_host(struct sdhci_host *host)
 		mmc->caps |= MMC_CAP_DRIVER_TYPE_D;
 
 	/* Initial value for re-tuning timer count */
-	host->tuning_count = (host->caps1 & SDHCI_RETUNING_TIMER_COUNT_MASK) >>
-			     SDHCI_RETUNING_TIMER_COUNT_SHIFT;
+	host->tuning_count = FIELD_GET(SDHCI_RETUNING_TIMER_COUNT_MASK,
+				       host->caps1);
 
 	/*
 	 * In case Re-tuning Timer is not disabled, the actual value of
@@ -4334,8 +4468,7 @@ int sdhci_setup_host(struct sdhci_host *host)
 		host->tuning_count = 1 << (host->tuning_count - 1);
 
 	/* Re-tuning mode supported by the Host Controller */
-	host->tuning_mode = (host->caps1 & SDHCI_RETUNING_MODE_MASK) >>
-			     SDHCI_RETUNING_MODE_SHIFT;
+	host->tuning_mode = FIELD_GET(SDHCI_RETUNING_MODE_MASK, host->caps1);
 
 	ocr_avail = 0;
 
@@ -4357,35 +4490,32 @@ int sdhci_setup_host(struct sdhci_host *host)
 
 			curr = min_t(u32, curr, SDHCI_MAX_CURRENT_LIMIT);
 			max_current_caps =
-				(curr << SDHCI_MAX_CURRENT_330_SHIFT) |
-				(curr << SDHCI_MAX_CURRENT_300_SHIFT) |
-				(curr << SDHCI_MAX_CURRENT_180_SHIFT);
+				FIELD_PREP(SDHCI_MAX_CURRENT_330_MASK, curr) |
+				FIELD_PREP(SDHCI_MAX_CURRENT_300_MASK, curr) |
+				FIELD_PREP(SDHCI_MAX_CURRENT_180_MASK, curr);
 		}
 	}
 
 	if (host->caps & SDHCI_CAN_VDD_330) {
 		ocr_avail |= MMC_VDD_32_33 | MMC_VDD_33_34;
 
-		mmc->max_current_330 = ((max_current_caps &
-				   SDHCI_MAX_CURRENT_330_MASK) >>
-				   SDHCI_MAX_CURRENT_330_SHIFT) *
-				   SDHCI_MAX_CURRENT_MULTIPLIER;
+		mmc->max_current_330 = FIELD_GET(SDHCI_MAX_CURRENT_330_MASK,
+						 max_current_caps) *
+						SDHCI_MAX_CURRENT_MULTIPLIER;
 	}
 	if (host->caps & SDHCI_CAN_VDD_300) {
 		ocr_avail |= MMC_VDD_29_30 | MMC_VDD_30_31;
 
-		mmc->max_current_300 = ((max_current_caps &
-				   SDHCI_MAX_CURRENT_300_MASK) >>
-				   SDHCI_MAX_CURRENT_300_SHIFT) *
-				   SDHCI_MAX_CURRENT_MULTIPLIER;
+		mmc->max_current_300 = FIELD_GET(SDHCI_MAX_CURRENT_300_MASK,
+						 max_current_caps) *
+						SDHCI_MAX_CURRENT_MULTIPLIER;
 	}
 	if (host->caps & SDHCI_CAN_VDD_180) {
 		ocr_avail |= MMC_VDD_165_195;
 
-		mmc->max_current_180 = ((max_current_caps &
-				   SDHCI_MAX_CURRENT_180_MASK) >>
-				   SDHCI_MAX_CURRENT_180_SHIFT) *
-				   SDHCI_MAX_CURRENT_MULTIPLIER;
+		mmc->max_current_180 = FIELD_GET(SDHCI_MAX_CURRENT_180_MASK,
+						 max_current_caps) *
+						SDHCI_MAX_CURRENT_MULTIPLIER;
 	}
 
 	/* If OCR set by host, use it instead. */
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 79dffbb731d3..0008bbd27127 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -200,12 +200,10 @@
 #define  SDHCI_CTRL_PRESET_VAL_ENABLE	0x8000
 
 #define SDHCI_CAPABILITIES	0x40
-#define  SDHCI_TIMEOUT_CLK_MASK	0x0000003F
-#define  SDHCI_TIMEOUT_CLK_SHIFT 0
+#define  SDHCI_TIMEOUT_CLK_MASK		GENMASK(5, 0)
 #define  SDHCI_TIMEOUT_CLK_UNIT	0x00000080
-#define  SDHCI_CLOCK_BASE_MASK	0x00003F00
-#define  SDHCI_CLOCK_V3_BASE_MASK	0x0000FF00
-#define  SDHCI_CLOCK_BASE_SHIFT	8
+#define  SDHCI_CLOCK_BASE_MASK		GENMASK(13, 8)
+#define  SDHCI_CLOCK_V3_BASE_MASK	GENMASK(15, 8)
 #define  SDHCI_MAX_BLOCK_MASK	0x00030000
 #define  SDHCI_MAX_BLOCK_SHIFT  16
 #define  SDHCI_CAN_DO_8BIT	0x00040000
@@ -220,32 +218,25 @@
 #define  SDHCI_CAN_64BIT_V4	0x08000000
 #define  SDHCI_CAN_64BIT	0x10000000
 
+#define SDHCI_CAPABILITIES_1	0x44
 #define  SDHCI_SUPPORT_SDR50	0x00000001
 #define  SDHCI_SUPPORT_SDR104	0x00000002
 #define  SDHCI_SUPPORT_DDR50	0x00000004
 #define  SDHCI_DRIVER_TYPE_A	0x00000010
 #define  SDHCI_DRIVER_TYPE_C	0x00000020
 #define  SDHCI_DRIVER_TYPE_D	0x00000040
-#define  SDHCI_RETUNING_TIMER_COUNT_MASK	0x00000F00
-#define  SDHCI_RETUNING_TIMER_COUNT_SHIFT	8
+#define  SDHCI_RETUNING_TIMER_COUNT_MASK	GENMASK(11, 8)
 #define  SDHCI_USE_SDR50_TUNING			0x00002000
-#define  SDHCI_RETUNING_MODE_MASK		0x0000C000
-#define  SDHCI_RETUNING_MODE_SHIFT		14
-#define  SDHCI_CLOCK_MUL_MASK	0x00FF0000
-#define  SDHCI_CLOCK_MUL_SHIFT	16
+#define  SDHCI_RETUNING_MODE_MASK		GENMASK(15, 14)
+#define  SDHCI_CLOCK_MUL_MASK			GENMASK(23, 16)
 #define  SDHCI_CAN_DO_ADMA3	0x08000000
 #define  SDHCI_SUPPORT_HS400	0x80000000 /* Non-standard */
 
-#define SDHCI_CAPABILITIES_1	0x44
-
 #define SDHCI_MAX_CURRENT		0x48
-#define  SDHCI_MAX_CURRENT_LIMIT	0xFF
-#define  SDHCI_MAX_CURRENT_330_MASK	0x0000FF
-#define  SDHCI_MAX_CURRENT_330_SHIFT	0
-#define  SDHCI_MAX_CURRENT_300_MASK	0x00FF00
-#define  SDHCI_MAX_CURRENT_300_SHIFT	8
-#define  SDHCI_MAX_CURRENT_180_MASK	0xFF0000
-#define  SDHCI_MAX_CURRENT_180_SHIFT	16
+#define  SDHCI_MAX_CURRENT_LIMIT	GENMASK(7, 0)
+#define  SDHCI_MAX_CURRENT_330_MASK	GENMASK(7, 0)
+#define  SDHCI_MAX_CURRENT_300_MASK	GENMASK(15, 8)
+#define  SDHCI_MAX_CURRENT_180_MASK	GENMASK(23, 16)
 #define   SDHCI_MAX_CURRENT_MULTIPLIER	4
 
 /* 4C-4F reserved for more max current */
@@ -540,6 +531,7 @@ struct sdhci_host {
 	struct mmc_request *mrqs_done[SDHCI_MAX_MRQS];	/* Requests done */
 	struct mmc_command *cmd;	/* Current command */
 	struct mmc_command *data_cmd;	/* Current data command */
+	struct mmc_command *deferred_cmd;	/* Deferred command */
 	struct mmc_data *data;	/* Current data request */
 	unsigned int data_early:1;	/* Data finished before cmd */
 
@@ -653,8 +645,12 @@ struct sdhci_ops {
 	void	(*voltage_switch)(struct sdhci_host *host);
 	void	(*adma_write_desc)(struct sdhci_host *host, void **desc,
 				   dma_addr_t addr, int len, unsigned int cmd);
+	void	(*copy_to_bounce_buffer)(struct sdhci_host *host,
+					 struct mmc_data *data,
+					 unsigned int length);
 	void	(*request_done)(struct sdhci_host *host,
 				struct mmc_request *mrq);
+	void    (*dump_vendor_regs)(struct sdhci_host *host);
 };
 
 #ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
@@ -757,7 +753,6 @@ void sdhci_cleanup_host(struct sdhci_host *host);
 int __sdhci_add_host(struct sdhci_host *host);
 int sdhci_add_host(struct sdhci_host *host);
 void sdhci_remove_host(struct sdhci_host *host, int dead);
-void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd);
 
 static inline void sdhci_read_caps(struct sdhci_host *host)
 {
@@ -776,6 +771,7 @@ void sdhci_set_power_and_bus_voltage(struct sdhci_host *host,
 void sdhci_set_power_noreg(struct sdhci_host *host, unsigned char mode,
 			   unsigned short vdd);
 void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq);
+int sdhci_request_atomic(struct mmc_host *mmc, struct mmc_request *mrq);
 void sdhci_set_bus_width(struct sdhci_host *host, int width);
 void sdhci_reset(struct sdhci_host *host, u8 mask);
 void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing);
diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c
index a38b8b2a4e5c..76a8cd3a186f 100644
--- a/drivers/mmc/host/sdricoh_cs.c
+++ b/drivers/mmc/host/sdricoh_cs.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/ioport.h>
+#include <linux/iopoll.h>
 #include <linux/scatterlist.h>
 
 #include <pcmcia/cistpl.h>
@@ -22,6 +23,7 @@
 #include <linux/io.h>
 
 #include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
 
 #define DRIVER_NAME "sdricoh_cs"
 
@@ -57,10 +59,8 @@ static unsigned int switchlocked;
 #define STATUS_BUSY              0x40000000
 
 /* timeouts */
-#define INIT_TIMEOUT      100
-#define CMD_TIMEOUT       100000
-#define TRANSFER_TIMEOUT  100000
-#define BUSY_TIMEOUT      32767
+#define SDRICOH_CMD_TIMEOUT_US	1000000
+#define SDRICOH_DATA_TIMEOUT_US	1000000
 
 /* list of supported pcmcia devices */
 static const struct pcmcia_device_id pcmcia_ids[] = {
@@ -124,19 +124,24 @@ static inline unsigned int sdricoh_readb(struct sdricoh_host *host,
 	return value;
 }
 
-static int sdricoh_query_status(struct sdricoh_host *host, unsigned int wanted,
-				unsigned int timeout){
-	unsigned int loop;
+static bool sdricoh_status_ok(struct sdricoh_host *host, unsigned int status,
+			      unsigned int wanted)
+{
+	sdricoh_writel(host, R2E4_STATUS_RESP, status);
+	return status & wanted;
+}
+
+static int sdricoh_query_status(struct sdricoh_host *host, unsigned int wanted)
+{
+	int ret;
 	unsigned int status = 0;
 	struct device *dev = host->dev;
-	for (loop = 0; loop < timeout; loop++) {
-		status = sdricoh_readl(host, R21C_STATUS);
-		sdricoh_writel(host, R2E4_STATUS_RESP, status);
-		if (status & wanted)
-			break;
-	}
 
-	if (loop == timeout) {
+	ret = read_poll_timeout(sdricoh_readl, status,
+				sdricoh_status_ok(host, status, wanted),
+				32, SDRICOH_DATA_TIMEOUT_US, false,
+				host, R21C_STATUS);
+	if (ret) {
 		dev_err(dev, "query_status: timeout waiting for %x\n", wanted);
 		return -ETIMEDOUT;
 	}
@@ -150,35 +155,46 @@ static int sdricoh_query_status(struct sdricoh_host *host, unsigned int wanted,
 
 }
 
-static int sdricoh_mmc_cmd(struct sdricoh_host *host, unsigned char opcode,
-			   unsigned int arg)
+static int sdricoh_mmc_cmd(struct sdricoh_host *host, struct mmc_command *cmd)
 {
-	unsigned int status;
-	int result = 0;
-	unsigned int loop = 0;
+	unsigned int status, timeout_us;
+	int ret;
+	unsigned char opcode = cmd->opcode;
+
 	/* reset status reg? */
 	sdricoh_writel(host, R21C_STATUS, 0x18);
+
+	/* MMC_APP_CMDs need some special handling */
+	if (host->app_cmd) {
+		opcode |= 64;
+		host->app_cmd = 0;
+	} else if (opcode == MMC_APP_CMD)
+		host->app_cmd = 1;
+
 	/* fill parameters */
-	sdricoh_writel(host, R204_CMD_ARG, arg);
+	sdricoh_writel(host, R204_CMD_ARG, cmd->arg);
 	sdricoh_writel(host, R200_CMD, (0x10000 << 8) | opcode);
+
 	/* wait for command completion */
-	if (opcode) {
-		for (loop = 0; loop < CMD_TIMEOUT; loop++) {
-			status = sdricoh_readl(host, R21C_STATUS);
-			sdricoh_writel(host, R2E4_STATUS_RESP, status);
-			if (status  & STATUS_CMD_FINISHED)
-				break;
-		}
-		/* don't check for timeout in the loop it is not always
-		   reset correctly
-		*/
-		if (loop == CMD_TIMEOUT || status & STATUS_CMD_TIMEOUT)
-			result = -ETIMEDOUT;
+	if (!opcode)
+		return 0;
 
-	}
+	timeout_us = cmd->busy_timeout ? cmd->busy_timeout * 1000 :
+		SDRICOH_CMD_TIMEOUT_US;
 
-	return result;
+	ret = read_poll_timeout(sdricoh_readl, status,
+			sdricoh_status_ok(host, status, STATUS_CMD_FINISHED),
+			32, timeout_us, false,
+			host, R21C_STATUS);
+
+	/*
+	 * Don't check for timeout status in the loop, as it's not always reset
+	 * correctly.
+	 */
+	if (ret || status & STATUS_CMD_TIMEOUT)
+		return -ETIMEDOUT;
 
+	return 0;
 }
 
 static int sdricoh_reset(struct sdricoh_host *host)
@@ -207,8 +223,7 @@ static int sdricoh_blockio(struct sdricoh_host *host, int read,
 	u32 data = 0;
 	/* wait until the data is available */
 	if (read) {
-		if (sdricoh_query_status(host, STATUS_READY_TO_READ,
-						TRANSFER_TIMEOUT))
+		if (sdricoh_query_status(host, STATUS_READY_TO_READ))
 			return -ETIMEDOUT;
 		sdricoh_writel(host, R21C_STATUS, 0x18);
 		/* read data */
@@ -224,8 +239,7 @@ static int sdricoh_blockio(struct sdricoh_host *host, int read,
 			}
 		}
 	} else {
-		if (sdricoh_query_status(host, STATUS_READY_TO_WRITE,
-						TRANSFER_TIMEOUT))
+		if (sdricoh_query_status(host, STATUS_READY_TO_WRITE))
 			return -ETIMEDOUT;
 		sdricoh_writel(host, R21C_STATUS, 0x18);
 		/* write data */
@@ -251,28 +265,20 @@ static void sdricoh_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	struct mmc_command *cmd = mrq->cmd;
 	struct mmc_data *data = cmd->data;
 	struct device *dev = host->dev;
-	unsigned char opcode = cmd->opcode;
 	int i;
 
 	dev_dbg(dev, "=============================\n");
-	dev_dbg(dev, "sdricoh_request opcode=%i\n", opcode);
+	dev_dbg(dev, "sdricoh_request opcode=%i\n", cmd->opcode);
 
 	sdricoh_writel(host, R21C_STATUS, 0x18);
 
-	/* MMC_APP_CMDs need some special handling */
-	if (host->app_cmd) {
-		opcode |= 64;
-		host->app_cmd = 0;
-	} else if (opcode == 55)
-		host->app_cmd = 1;
-
 	/* read/write commands seem to require this */
 	if (data) {
 		sdricoh_writew(host, R226_BLOCKSIZE, data->blksz);
 		sdricoh_writel(host, R208_DATAIO, 0);
 	}
 
-	cmd->error = sdricoh_mmc_cmd(host, opcode, cmd->arg);
+	cmd->error = sdricoh_mmc_cmd(host, cmd);
 
 	/* read response buffer */
 	if (cmd->flags & MMC_RSP_PRESENT) {
@@ -323,8 +329,7 @@ static void sdricoh_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
 		sdricoh_writel(host, R208_DATAIO, 1);
 
-		if (sdricoh_query_status(host, STATUS_TRANSFER_FINISHED,
-					TRANSFER_TIMEOUT)) {
+		if (sdricoh_query_status(host, STATUS_TRANSFER_FINISHED)) {
 			dev_err(dev, "sdricoh_request: transfer end error\n");
 			cmd->error = -EINVAL;
 		}
diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index f87d7967457f..5e95bbc51644 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -951,9 +951,13 @@ static void sunxi_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 static int sunxi_mmc_volt_switch(struct mmc_host *mmc, struct mmc_ios *ios)
 {
+	int ret;
+
 	/* vqmmc regulator is available */
-	if (!IS_ERR(mmc->supply.vqmmc))
-		return mmc_regulator_set_vqmmc(mmc, ios);
+	if (!IS_ERR(mmc->supply.vqmmc)) {
+		ret = mmc_regulator_set_vqmmc(mmc, ios);
+		return ret < 0 ? ret : 0;
+	}
 
 	/* no vqmmc regulator, assume fixed regulator at 3/3.3V */
 	if (mmc->ios.signal_voltage == MMC_SIGNAL_VOLTAGE_330)
@@ -1390,7 +1394,7 @@ static int sunxi_mmc_probe(struct platform_device *pdev)
 	mmc->f_min		=   400000;
 	mmc->f_max		= 52000000;
 	mmc->caps	       |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
-				  MMC_CAP_ERASE | MMC_CAP_SDIO_IRQ;
+				  MMC_CAP_SDIO_IRQ;
 
 	/*
 	 * Some H5 devices do not have signal traces precise enough to
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index 54271b92ee59..5987656e0474 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -73,6 +73,8 @@ module_param(fixed_timeout, bool, 0644);
 
 #define TIFM_MMCSD_MAX_BLOCK_SIZE  0x0800UL
 
+#define TIFM_MMCSD_REQ_TIMEOUT_MS  1000
+
 enum {
 	CMD_READY    = 0x0001,
 	FIFO_READY   = 0x0002,
@@ -959,7 +961,12 @@ static int tifm_sd_probe(struct tifm_dev *sock)
 	host = mmc_priv(mmc);
 	tifm_set_drvdata(sock, mmc);
 	host->dev = sock;
-	host->timeout_jiffies = msecs_to_jiffies(1000);
+	host->timeout_jiffies = msecs_to_jiffies(TIFM_MMCSD_REQ_TIMEOUT_MS);
+	/*
+	 * We use a fixed request timeout of 1s, hence inform the core about it.
+	 * A future improvement should instead respect the cmd->busy_timeout.
+	 */
+	mmc->max_busy_timeout = TIFM_MMCSD_REQ_TIMEOUT_MS;
 
 	tasklet_init(&host->finish_tasklet, tifm_sd_end_cmd,
 		     (unsigned long)host);
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 9520bd94cf43..d7fde57c78c1 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -39,7 +39,6 @@
 #include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/platform_device.h>
-#include <linux/pm_domain.h>
 #include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
@@ -1128,7 +1127,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host)
 	if (ret == -EPROBE_DEFER)
 		return ret;
 
-	mmc->caps |= MMC_CAP_ERASE | MMC_CAP_4_BIT_DATA | pdata->capabilities;
+	mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities;
 	mmc->caps2 |= pdata->capabilities2;
 	mmc->max_segs = pdata->max_segs ? : 32;
 	mmc->max_blk_size = TMIO_MAX_BLK_SIZE;
@@ -1192,7 +1191,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host)
 	/* See if we also get DMA */
 	tmio_mmc_request_dma(_host, pdata);
 
-	dev_pm_domain_start(&pdev->dev);
 	pm_runtime_get_noresume(&pdev->dev);
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
@@ -1231,12 +1229,14 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host)
 	cancel_work_sync(&host->done);
 	cancel_delayed_work_sync(&host->delayed_reset_work);
 	tmio_mmc_release_dma(host);
+	tmio_mmc_disable_mmc_irqs(host, TMIO_MASK_ALL);
 
-	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	if (host->native_hotplug)
 		pm_runtime_put_noidle(&pdev->dev);
-	pm_runtime_put_sync(&pdev->dev);
+
 	pm_runtime_disable(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+	pm_runtime_put_noidle(&pdev->dev);
 }
 EXPORT_SYMBOL_GPL(tmio_mmc_host_remove);
 
diff --git a/drivers/mmc/host/uniphier-sd.c b/drivers/mmc/host/uniphier-sd.c
index a1683c49cb90..f82baf99fd69 100644
--- a/drivers/mmc/host/uniphier-sd.c
+++ b/drivers/mmc/host/uniphier-sd.c
@@ -610,11 +610,6 @@ static int uniphier_sd_probe(struct platform_device *pdev)
 		}
 	}
 
-	ret = devm_request_irq(dev, irq, tmio_mmc_irq, IRQF_SHARED,
-			       dev_name(dev), host);
-	if (ret)
-		goto free_host;
-
 	if (priv->caps & UNIPHIER_SD_CAP_EXTENDED_IP)
 		host->dma_ops = &uniphier_sd_internal_dma_ops;
 	else
@@ -642,8 +637,15 @@ static int uniphier_sd_probe(struct platform_device *pdev)
 	if (ret)
 		goto free_host;
 
+	ret = devm_request_irq(dev, irq, tmio_mmc_irq, IRQF_SHARED,
+			       dev_name(dev), host);
+	if (ret)
+		goto remove_host;
+
 	return 0;
 
+remove_host:
+	tmio_mmc_host_remove(host);
 free_host:
 	tmio_mmc_host_free(host);
 
diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index 9a0b1e4e405d..369b8dee2e3d 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -136,6 +136,8 @@
 
 #define USDHI6_MIN_DMA 64
 
+#define USDHI6_REQ_TIMEOUT_MS 4000
+
 enum usdhi6_wait_for {
 	USDHI6_WAIT_FOR_REQUEST,
 	USDHI6_WAIT_FOR_CMD,
@@ -1763,7 +1765,12 @@ static int usdhi6_probe(struct platform_device *pdev)
 	host		= mmc_priv(mmc);
 	host->mmc	= mmc;
 	host->wait	= USDHI6_WAIT_FOR_REQUEST;
-	host->timeout	= msecs_to_jiffies(4000);
+	host->timeout	= msecs_to_jiffies(USDHI6_REQ_TIMEOUT_MS);
+	/*
+	 * We use a fixed timeout of 4s, hence inform the core about it. A
+	 * future improvement should instead respect the cmd->busy_timeout.
+	 */
+	mmc->max_busy_timeout = USDHI6_REQ_TIMEOUT_MS;
 
 	host->pinctrl = devm_pinctrl_get(&pdev->dev);
 	if (IS_ERR(host->pinctrl)) {
diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c
index e48bddd95ce6..ef95bce50889 100644
--- a/drivers/mmc/host/via-sdmmc.c
+++ b/drivers/mmc/host/via-sdmmc.c
@@ -319,6 +319,8 @@ struct via_crdr_mmc_host {
 /* some devices need a very long delay for power to stabilize */
 #define VIA_CRDR_QUIRK_300MS_PWRDELAY	0x0001
 
+#define VIA_CMD_TIMEOUT_MS		1000
+
 static const struct pci_device_id via_ids[] = {
 	{PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_9530,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0,},
@@ -551,14 +553,17 @@ static void via_sdc_send_command(struct via_crdr_mmc_host *host,
 {
 	void __iomem *addrbase;
 	struct mmc_data *data;
+	unsigned int timeout_ms;
 	u32 cmdctrl = 0;
 
 	WARN_ON(host->cmd);
 
 	data = cmd->data;
-	mod_timer(&host->timer, jiffies + HZ);
 	host->cmd = cmd;
 
+	timeout_ms = cmd->busy_timeout ? cmd->busy_timeout : VIA_CMD_TIMEOUT_MS;
+	mod_timer(&host->timer, jiffies + msecs_to_jiffies(timeout_ms));
+
 	/*Command index*/
 	cmdctrl = cmd->opcode << 8;
 
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index 740179f42cf2..67f917d6ecd3 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -28,6 +28,8 @@
 #include <linux/pnp.h>
 #include <linux/highmem.h>
 #include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/sd.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 
@@ -770,22 +772,22 @@ static void wbsd_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		 * interrupts.
 		 */
 		switch (cmd->opcode) {
-		case 11:
-		case 17:
-		case 18:
-		case 20:
-		case 24:
-		case 25:
-		case 26:
-		case 27:
-		case 30:
-		case 42:
-		case 56:
+		case SD_SWITCH_VOLTAGE:
+		case MMC_READ_SINGLE_BLOCK:
+		case MMC_READ_MULTIPLE_BLOCK:
+		case MMC_WRITE_DAT_UNTIL_STOP:
+		case MMC_WRITE_BLOCK:
+		case MMC_WRITE_MULTIPLE_BLOCK:
+		case MMC_PROGRAM_CID:
+		case MMC_PROGRAM_CSD:
+		case MMC_SEND_WRITE_PROT:
+		case MMC_LOCK_UNLOCK:
+		case MMC_GEN_CMD:
 			break;
 
 		/* ACMDs. We don't keep track of state, so we just treat them
 		 * like any other command. */
-		case 51:
+		case SD_APP_SEND_SCR:
 			break;
 
 		default:
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 42d401ea60ee..6ddab796216d 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -170,6 +170,16 @@ config MTD_OOPS
 	  buffer in a flash partition where it can be read back at some
 	  later point.
 
+config MTD_PSTORE
+	tristate "Log panic/oops to an MTD buffer based on pstore"
+	depends on PSTORE_BLK
+	help
+	  This enables panic and oops messages to be logged to a circular
+	  buffer in a flash partition where it can be read back as files after
+	  mounting pstore filesystem.
+
+	  If unsure, say N.
+
 config MTD_SWAP
 	tristate "Swap on MTD device support"
 	depends on MTD && SWAP
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 56cc60ccc477..593d0593a038 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_RFD_FTL)		+= rfd_ftl.o
 obj-$(CONFIG_SSFDC)		+= ssfdc.o
 obj-$(CONFIG_SM_FTL)		+= sm_ftl.o
 obj-$(CONFIG_MTD_OOPS)		+= mtdoops.o
+obj-$(CONFIG_MTD_PSTORE)	+= mtdpstore.o
 obj-$(CONFIG_MTD_SWAP)		+= mtdswap.o
 
 nftl-objs		:= nftlcore.o nftlmount.o
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 29d41003d6e0..b47691e1b81c 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -2036,11 +2036,10 @@ static struct backing_dev_info * __init mtd_bdi_init(char *name)
 	struct backing_dev_info *bdi;
 	int ret;
 
-	bdi = bdi_alloc(GFP_KERNEL);
+	bdi = bdi_alloc(NUMA_NO_NODE);
 	if (!bdi)
 		return ERR_PTR(-ENOMEM);
 
-	bdi->name = name;
 	/*
 	 * We put '-0' suffix to the name to get the same name format as we
 	 * used to get. Since this is called only once, we get a unique name. 
diff --git a/drivers/mtd/mtdpstore.c b/drivers/mtd/mtdpstore.c
new file mode 100644
index 000000000000..a4fe6060b960
--- /dev/null
+++ b/drivers/mtd/mtdpstore.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define dev_fmt(fmt) "mtdoops-pstore: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pstore_blk.h>
+#include <linux/mtd/mtd.h>
+#include <linux/bitops.h>
+
+static struct mtdpstore_context {
+	int index;
+	struct pstore_blk_config info;
+	struct pstore_device_info dev;
+	struct mtd_info *mtd;
+	unsigned long *rmmap;		/* removed bit map */
+	unsigned long *usedmap;		/* used bit map */
+	/*
+	 * used for panic write
+	 * As there are no block_isbad for panic case, we should keep this
+	 * status before panic to ensure panic_write not failed.
+	 */
+	unsigned long *badmap;		/* bad block bit map */
+} oops_cxt;
+
+static int mtdpstore_block_isbad(struct mtdpstore_context *cxt, loff_t off)
+{
+	int ret;
+	struct mtd_info *mtd = cxt->mtd;
+	u64 blknum;
+
+	off = ALIGN_DOWN(off, mtd->erasesize);
+	blknum = div_u64(off, mtd->erasesize);
+
+	if (test_bit(blknum, cxt->badmap))
+		return true;
+	ret = mtd_block_isbad(mtd, off);
+	if (ret < 0) {
+		dev_err(&mtd->dev, "mtd_block_isbad failed, aborting\n");
+		return ret;
+	} else if (ret > 0) {
+		set_bit(blknum, cxt->badmap);
+		return true;
+	}
+	return false;
+}
+
+static inline int mtdpstore_panic_block_isbad(struct mtdpstore_context *cxt,
+		loff_t off)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	u64 blknum;
+
+	off = ALIGN_DOWN(off, mtd->erasesize);
+	blknum = div_u64(off, mtd->erasesize);
+	return test_bit(blknum, cxt->badmap);
+}
+
+static inline void mtdpstore_mark_used(struct mtdpstore_context *cxt,
+		loff_t off)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	u64 zonenum = div_u64(off, cxt->info.kmsg_size);
+
+	dev_dbg(&mtd->dev, "mark zone %llu used\n", zonenum);
+	set_bit(zonenum, cxt->usedmap);
+}
+
+static inline void mtdpstore_mark_unused(struct mtdpstore_context *cxt,
+		loff_t off)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	u64 zonenum = div_u64(off, cxt->info.kmsg_size);
+
+	dev_dbg(&mtd->dev, "mark zone %llu unused\n", zonenum);
+	clear_bit(zonenum, cxt->usedmap);
+}
+
+static inline void mtdpstore_block_mark_unused(struct mtdpstore_context *cxt,
+		loff_t off)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	u32 zonecnt = mtd->erasesize / cxt->info.kmsg_size;
+	u64 zonenum;
+
+	off = ALIGN_DOWN(off, mtd->erasesize);
+	zonenum = div_u64(off, cxt->info.kmsg_size);
+	while (zonecnt > 0) {
+		dev_dbg(&mtd->dev, "mark zone %llu unused\n", zonenum);
+		clear_bit(zonenum, cxt->usedmap);
+		zonenum++;
+		zonecnt--;
+	}
+}
+
+static inline int mtdpstore_is_used(struct mtdpstore_context *cxt, loff_t off)
+{
+	u64 zonenum = div_u64(off, cxt->info.kmsg_size);
+	u64 blknum = div_u64(off, cxt->mtd->erasesize);
+
+	if (test_bit(blknum, cxt->badmap))
+		return true;
+	return test_bit(zonenum, cxt->usedmap);
+}
+
+static int mtdpstore_block_is_used(struct mtdpstore_context *cxt,
+		loff_t off)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	u32 zonecnt = mtd->erasesize / cxt->info.kmsg_size;
+	u64 zonenum;
+
+	off = ALIGN_DOWN(off, mtd->erasesize);
+	zonenum = div_u64(off, cxt->info.kmsg_size);
+	while (zonecnt > 0) {
+		if (test_bit(zonenum, cxt->usedmap))
+			return true;
+		zonenum++;
+		zonecnt--;
+	}
+	return false;
+}
+
+static int mtdpstore_is_empty(struct mtdpstore_context *cxt, char *buf,
+		size_t size)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	size_t sz;
+	int i;
+
+	sz = min_t(uint32_t, size, mtd->writesize / 4);
+	for (i = 0; i < sz; i++) {
+		if (buf[i] != (char)0xFF)
+			return false;
+	}
+	return true;
+}
+
+static void mtdpstore_mark_removed(struct mtdpstore_context *cxt, loff_t off)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	u64 zonenum = div_u64(off, cxt->info.kmsg_size);
+
+	dev_dbg(&mtd->dev, "mark zone %llu removed\n", zonenum);
+	set_bit(zonenum, cxt->rmmap);
+}
+
+static void mtdpstore_block_clear_removed(struct mtdpstore_context *cxt,
+		loff_t off)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	u32 zonecnt = mtd->erasesize / cxt->info.kmsg_size;
+	u64 zonenum;
+
+	off = ALIGN_DOWN(off, mtd->erasesize);
+	zonenum = div_u64(off, cxt->info.kmsg_size);
+	while (zonecnt > 0) {
+		clear_bit(zonenum, cxt->rmmap);
+		zonenum++;
+		zonecnt--;
+	}
+}
+
+static int mtdpstore_block_is_removed(struct mtdpstore_context *cxt,
+		loff_t off)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	u32 zonecnt = mtd->erasesize / cxt->info.kmsg_size;
+	u64 zonenum;
+
+	off = ALIGN_DOWN(off, mtd->erasesize);
+	zonenum = div_u64(off, cxt->info.kmsg_size);
+	while (zonecnt > 0) {
+		if (test_bit(zonenum, cxt->rmmap))
+			return true;
+		zonenum++;
+		zonecnt--;
+	}
+	return false;
+}
+
+static int mtdpstore_erase_do(struct mtdpstore_context *cxt, loff_t off)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	struct erase_info erase;
+	int ret;
+
+	off = ALIGN_DOWN(off, cxt->mtd->erasesize);
+	dev_dbg(&mtd->dev, "try to erase off 0x%llx\n", off);
+	erase.len = cxt->mtd->erasesize;
+	erase.addr = off;
+	ret = mtd_erase(cxt->mtd, &erase);
+	if (!ret)
+		mtdpstore_block_clear_removed(cxt, off);
+	else
+		dev_err(&mtd->dev, "erase of region [0x%llx, 0x%llx] on \"%s\" failed\n",
+		       (unsigned long long)erase.addr,
+		       (unsigned long long)erase.len, cxt->info.device);
+	return ret;
+}
+
+/*
+ * called while removing file
+ *
+ * Avoiding over erasing, do erase block only when the whole block is unused.
+ * If the block contains valid log, do erase lazily on flush_removed() when
+ * unregister.
+ */
+static ssize_t mtdpstore_erase(size_t size, loff_t off)
+{
+	struct mtdpstore_context *cxt = &oops_cxt;
+
+	if (mtdpstore_block_isbad(cxt, off))
+		return -EIO;
+
+	mtdpstore_mark_unused(cxt, off);
+
+	/* If the block still has valid data, mtdpstore do erase lazily */
+	if (likely(mtdpstore_block_is_used(cxt, off))) {
+		mtdpstore_mark_removed(cxt, off);
+		return 0;
+	}
+
+	/* all zones are unused, erase it */
+	return mtdpstore_erase_do(cxt, off);
+}
+
+/*
+ * What is security for mtdpstore?
+ * As there is no erase for panic case, we should ensure at least one zone
+ * is writable. Otherwise, panic write will fail.
+ * If zone is used, write operation will return -ENOMSG, which means that
+ * pstore/blk will try one by one until gets an empty zone. So, it is not
+ * needed to ensure the next zone is empty, but at least one.
+ */
+static int mtdpstore_security(struct mtdpstore_context *cxt, loff_t off)
+{
+	int ret = 0, i;
+	struct mtd_info *mtd = cxt->mtd;
+	u32 zonenum = (u32)div_u64(off, cxt->info.kmsg_size);
+	u32 zonecnt = (u32)div_u64(cxt->mtd->size, cxt->info.kmsg_size);
+	u32 blkcnt = (u32)div_u64(cxt->mtd->size, cxt->mtd->erasesize);
+	u32 erasesize = cxt->mtd->erasesize;
+
+	for (i = 0; i < zonecnt; i++) {
+		u32 num = (zonenum + i) % zonecnt;
+
+		/* found empty zone */
+		if (!test_bit(num, cxt->usedmap))
+			return 0;
+	}
+
+	/* If there is no any empty zone, we have no way but to do erase */
+	while (blkcnt--) {
+		div64_u64_rem(off + erasesize, cxt->mtd->size, (u64 *)&off);
+
+		if (mtdpstore_block_isbad(cxt, off))
+			continue;
+
+		ret = mtdpstore_erase_do(cxt, off);
+		if (!ret) {
+			mtdpstore_block_mark_unused(cxt, off);
+			break;
+		}
+	}
+
+	if (ret)
+		dev_err(&mtd->dev, "all blocks bad!\n");
+	dev_dbg(&mtd->dev, "end security\n");
+	return ret;
+}
+
+static ssize_t mtdpstore_write(const char *buf, size_t size, loff_t off)
+{
+	struct mtdpstore_context *cxt = &oops_cxt;
+	struct mtd_info *mtd = cxt->mtd;
+	size_t retlen;
+	int ret;
+
+	if (mtdpstore_block_isbad(cxt, off))
+		return -ENOMSG;
+
+	/* zone is used, please try next one */
+	if (mtdpstore_is_used(cxt, off))
+		return -ENOMSG;
+
+	dev_dbg(&mtd->dev, "try to write off 0x%llx size %zu\n", off, size);
+	ret = mtd_write(cxt->mtd, off, size, &retlen, (u_char *)buf);
+	if (ret < 0 || retlen != size) {
+		dev_err(&mtd->dev, "write failure at %lld (%zu of %zu written), err %d\n",
+				off, retlen, size, ret);
+		return -EIO;
+	}
+	mtdpstore_mark_used(cxt, off);
+
+	mtdpstore_security(cxt, off);
+	return retlen;
+}
+
+static inline bool mtdpstore_is_io_error(int ret)
+{
+	return ret < 0 && !mtd_is_bitflip(ret) && !mtd_is_eccerr(ret);
+}
+
+/*
+ * All zones will be read as pstore/blk will read zone one by one when do
+ * recover.
+ */
+static ssize_t mtdpstore_read(char *buf, size_t size, loff_t off)
+{
+	struct mtdpstore_context *cxt = &oops_cxt;
+	struct mtd_info *mtd = cxt->mtd;
+	size_t retlen, done;
+	int ret;
+
+	if (mtdpstore_block_isbad(cxt, off))
+		return -ENOMSG;
+
+	dev_dbg(&mtd->dev, "try to read off 0x%llx size %zu\n", off, size);
+	for (done = 0, retlen = 0; done < size; done += retlen) {
+		retlen = 0;
+
+		ret = mtd_read(cxt->mtd, off + done, size - done, &retlen,
+				(u_char *)buf + done);
+		if (mtdpstore_is_io_error(ret)) {
+			dev_err(&mtd->dev, "read failure at %lld (%zu of %zu read), err %d\n",
+					off + done, retlen, size - done, ret);
+			/* the zone may be broken, try next one */
+			return -ENOMSG;
+		}
+
+		/*
+		 * ECC error. The impact on log data is so small. Maybe we can
+		 * still read it and try to understand. So mtdpstore just hands
+		 * over what it gets and user can judge whether the data is
+		 * valid or not.
+		 */
+		if (mtd_is_eccerr(ret)) {
+			dev_err(&mtd->dev, "ecc error at %lld (%zu of %zu read), err %d\n",
+					off + done, retlen, size - done, ret);
+			/* driver may not set retlen when ecc error */
+			retlen = retlen == 0 ? size - done : retlen;
+		}
+	}
+
+	if (mtdpstore_is_empty(cxt, buf, size))
+		mtdpstore_mark_unused(cxt, off);
+	else
+		mtdpstore_mark_used(cxt, off);
+
+	mtdpstore_security(cxt, off);
+	return retlen;
+}
+
+static ssize_t mtdpstore_panic_write(const char *buf, size_t size, loff_t off)
+{
+	struct mtdpstore_context *cxt = &oops_cxt;
+	struct mtd_info *mtd = cxt->mtd;
+	size_t retlen;
+	int ret;
+
+	if (mtdpstore_panic_block_isbad(cxt, off))
+		return -ENOMSG;
+
+	/* zone is used, please try next one */
+	if (mtdpstore_is_used(cxt, off))
+		return -ENOMSG;
+
+	ret = mtd_panic_write(cxt->mtd, off, size, &retlen, (u_char *)buf);
+	if (ret < 0 || size != retlen) {
+		dev_err(&mtd->dev, "panic write failure at %lld (%zu of %zu read), err %d\n",
+				off, retlen, size, ret);
+		return -EIO;
+	}
+	mtdpstore_mark_used(cxt, off);
+
+	return retlen;
+}
+
+static void mtdpstore_notify_add(struct mtd_info *mtd)
+{
+	int ret;
+	struct mtdpstore_context *cxt = &oops_cxt;
+	struct pstore_blk_config *info = &cxt->info;
+	unsigned long longcnt;
+
+	if (!strcmp(mtd->name, info->device))
+		cxt->index = mtd->index;
+
+	if (mtd->index != cxt->index || cxt->index < 0)
+		return;
+
+	dev_dbg(&mtd->dev, "found matching MTD device %s\n", mtd->name);
+
+	if (mtd->size < info->kmsg_size * 2) {
+		dev_err(&mtd->dev, "MTD partition %d not big enough\n",
+				mtd->index);
+		return;
+	}
+	/*
+	 * kmsg_size must be aligned to 4096 Bytes, which is limited by
+	 * psblk. The default value of kmsg_size is 64KB. If kmsg_size
+	 * is larger than erasesize, some errors will occur since mtdpsotre
+	 * is designed on it.
+	 */
+	if (mtd->erasesize < info->kmsg_size) {
+		dev_err(&mtd->dev, "eraseblock size of MTD partition %d too small\n",
+				mtd->index);
+		return;
+	}
+	if (unlikely(info->kmsg_size % mtd->writesize)) {
+		dev_err(&mtd->dev, "record size %lu KB must align to write size %d KB\n",
+				info->kmsg_size / 1024,
+				mtd->writesize / 1024);
+		return;
+	}
+
+	longcnt = BITS_TO_LONGS(div_u64(mtd->size, info->kmsg_size));
+	cxt->rmmap = kcalloc(longcnt, sizeof(long), GFP_KERNEL);
+	cxt->usedmap = kcalloc(longcnt, sizeof(long), GFP_KERNEL);
+
+	longcnt = BITS_TO_LONGS(div_u64(mtd->size, mtd->erasesize));
+	cxt->badmap = kcalloc(longcnt, sizeof(long), GFP_KERNEL);
+
+	cxt->dev.total_size = mtd->size;
+	/* just support dmesg right now */
+	cxt->dev.flags = PSTORE_FLAGS_DMESG;
+	cxt->dev.read = mtdpstore_read;
+	cxt->dev.write = mtdpstore_write;
+	cxt->dev.erase = mtdpstore_erase;
+	cxt->dev.panic_write = mtdpstore_panic_write;
+
+	ret = register_pstore_device(&cxt->dev);
+	if (ret) {
+		dev_err(&mtd->dev, "mtd%d register to psblk failed\n",
+				mtd->index);
+		return;
+	}
+	cxt->mtd = mtd;
+	dev_info(&mtd->dev, "Attached to MTD device %d\n", mtd->index);
+}
+
+static int mtdpstore_flush_removed_do(struct mtdpstore_context *cxt,
+		loff_t off, size_t size)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	u_char *buf;
+	int ret;
+	size_t retlen;
+	struct erase_info erase;
+
+	buf = kmalloc(mtd->erasesize, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* 1st. read to cache */
+	ret = mtd_read(mtd, off, mtd->erasesize, &retlen, buf);
+	if (mtdpstore_is_io_error(ret))
+		goto free;
+
+	/* 2nd. erase block */
+	erase.len = mtd->erasesize;
+	erase.addr = off;
+	ret = mtd_erase(mtd, &erase);
+	if (ret)
+		goto free;
+
+	/* 3rd. write back */
+	while (size) {
+		unsigned int zonesize = cxt->info.kmsg_size;
+
+		/* there is valid data on block, write back */
+		if (mtdpstore_is_used(cxt, off)) {
+			ret = mtd_write(mtd, off, zonesize, &retlen, buf);
+			if (ret)
+				dev_err(&mtd->dev, "write failure at %lld (%zu of %u written), err %d\n",
+						off, retlen, zonesize, ret);
+		}
+
+		off += zonesize;
+		size -= min_t(unsigned int, zonesize, size);
+	}
+
+free:
+	kfree(buf);
+	return ret;
+}
+
+/*
+ * What does mtdpstore_flush_removed() do?
+ * When user remove any log file on pstore filesystem, mtdpstore should do
+ * something to ensure log file removed. If the whole block is no longer used,
+ * it's nice to erase the block. However if the block still contains valid log,
+ * what mtdpstore can do is to erase and write the valid log back.
+ */
+static int mtdpstore_flush_removed(struct mtdpstore_context *cxt)
+{
+	struct mtd_info *mtd = cxt->mtd;
+	int ret;
+	loff_t off;
+	u32 blkcnt = (u32)div_u64(mtd->size, mtd->erasesize);
+
+	for (off = 0; blkcnt > 0; blkcnt--, off += mtd->erasesize) {
+		ret = mtdpstore_block_isbad(cxt, off);
+		if (ret)
+			continue;
+
+		ret = mtdpstore_block_is_removed(cxt, off);
+		if (!ret)
+			continue;
+
+		ret = mtdpstore_flush_removed_do(cxt, off, mtd->erasesize);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static void mtdpstore_notify_remove(struct mtd_info *mtd)
+{
+	struct mtdpstore_context *cxt = &oops_cxt;
+
+	if (mtd->index != cxt->index || cxt->index < 0)
+		return;
+
+	mtdpstore_flush_removed(cxt);
+
+	unregister_pstore_device(&cxt->dev);
+	kfree(cxt->badmap);
+	kfree(cxt->usedmap);
+	kfree(cxt->rmmap);
+	cxt->mtd = NULL;
+	cxt->index = -1;
+}
+
+static struct mtd_notifier mtdpstore_notifier = {
+	.add	= mtdpstore_notify_add,
+	.remove	= mtdpstore_notify_remove,
+};
+
+static int __init mtdpstore_init(void)
+{
+	int ret;
+	struct mtdpstore_context *cxt = &oops_cxt;
+	struct pstore_blk_config *info = &cxt->info;
+
+	ret = pstore_blk_get_config(info);
+	if (unlikely(ret))
+		return ret;
+
+	if (strlen(info->device) == 0) {
+		pr_err("mtd device must be supplied (device name is empty)\n");
+		return -EINVAL;
+	}
+	if (!info->kmsg_size) {
+		pr_err("no backend enabled (kmsg_size is 0)\n");
+		return -EINVAL;
+	}
+
+	/* Setup the MTD device to use */
+	ret = kstrtoint((char *)info->device, 0, &cxt->index);
+	if (ret)
+		cxt->index = -1;
+
+	register_mtd_user(&mtdpstore_notifier);
+	return 0;
+}
+module_init(mtdpstore_init);
+
+static void __exit mtdpstore_exit(void)
+{
+	unregister_mtd_user(&mtdpstore_notifier);
+}
+module_exit(mtdpstore_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("WeiXiong Liao <liaoweixiong@allwinnertech.com>");
+MODULE_DESCRIPTION("MTD backend for pstore/blk");
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index b57b84fb97d0..14d890b00d2c 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -1297,7 +1297,7 @@ static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum,
 	if (!ubi_dbg_chk_io(ubi))
 		return 0;
 
-	buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL);
+	buf1 = __vmalloc(len, GFP_NOFS);
 	if (!buf1) {
 		ubi_err(ubi, "cannot allocate memory to check writes");
 		return 0;
@@ -1361,7 +1361,7 @@ int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len)
 	if (!ubi_dbg_chk_io(ubi))
 		return 0;
 
-	buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL);
+	buf = __vmalloc(len, GFP_NOFS);
 	if (!buf) {
 		ubi_err(ubi, "cannot allocate memory to check for 0xFFs");
 		return 0;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 444532292588..a279f4fa9962 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -7646,7 +7646,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	e1000_print_device_info(adapter);
 
-	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
 
 	if (pci_dev_run_wake(pdev) && hw->mac.type < e1000_pch_cnp)
 		pm_runtime_put_noidle(&pdev->dev);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b46bff8fe056..8bb3db2cbd41 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3445,7 +3445,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		}
 	}
 
-	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
 
 	pm_runtime_put_noidle(&pdev->dev);
 	return 0;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 43fcabb5c023..6919c50e449a 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -5184,7 +5184,7 @@ static int igc_probe(struct pci_dev *pdev,
 	pcie_print_link_status(pdev);
 	netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
 
-	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
 
 	pm_runtime_put_noidle(&pdev->dev);
 
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index ca68aa1df801..41f5cf0bb997 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -636,9 +636,12 @@ void netvsc_device_remove(struct hv_device *device)
 
 	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 
-	/* And disassociate NAPI context from device */
-	for (i = 0; i < net_device->num_chn; i++)
+	/* Disable NAPI and disassociate its context from the device. */
+	for (i = 0; i < net_device->num_chn; i++) {
+		/* See also vmbus_reset_channel_cb(). */
+		napi_disable(&net_device->chan_table[i].napi);
 		netif_napi_del(&net_device->chan_table[i].napi);
+	}
 
 	/*
 	 * At this point, no one should be accessing net_device
diff --git a/drivers/net/phy/swphy.c b/drivers/net/phy/swphy.c
index 53c214a22b95..59f1ba4d49bc 100644
--- a/drivers/net/phy/swphy.c
+++ b/drivers/net/phy/swphy.c
@@ -2,7 +2,7 @@
 /*
  * Software PHY emulation
  *
- * Code taken from fixed_phy.c by Russell King <rmk+kernel@arm.linux.org.uk>
+ * Code taken from fixed_phy.c by Russell King.
  *
  * Author: Vitaly Bordug <vbordug@ru.mvista.com>
  *         Anton Vorontsov <avorontsov@ru.mvista.com>
diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index e2aff2254a40..63f882c690bf 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -1083,10 +1083,10 @@ static void ath10k_sdio_set_mbox_info(struct ath10k *ar)
 
 	mbox_info->ext_info[0].htc_ext_addr = ATH10K_HIF_MBOX0_EXT_BASE_ADDR;
 
-	dev_id_base = FIELD_GET(QCA_MANUFACTURER_ID_BASE, device);
-	dev_id_chiprev = FIELD_GET(QCA_MANUFACTURER_ID_REV_MASK, device);
+	dev_id_base = (device & 0x0F00);
+	dev_id_chiprev = (device & 0x00FF);
 	switch (dev_id_base) {
-	case QCA_MANUFACTURER_ID_AR6005_BASE:
+	case (SDIO_DEVICE_ID_ATHEROS_AR6005 & 0x0F00):
 		if (dev_id_chiprev < 4)
 			mbox_info->ext_info[0].htc_ext_sz =
 				ATH10K_HIF_MBOX0_EXT_WIDTH;
@@ -1097,7 +1097,7 @@ static void ath10k_sdio_set_mbox_info(struct ath10k *ar)
 			mbox_info->ext_info[0].htc_ext_sz =
 				ATH10K_HIF_MBOX0_EXT_WIDTH_ROME_2_0;
 		break;
-	case QCA_MANUFACTURER_ID_QCA9377_BASE:
+	case (SDIO_DEVICE_ID_ATHEROS_QCA9377 & 0x0F00):
 		mbox_info->ext_info[0].htc_ext_sz =
 			ATH10K_HIF_MBOX0_EXT_WIDTH_ROME_2_0;
 		break;
@@ -2270,19 +2270,16 @@ static int ath10k_sdio_probe(struct sdio_func *func,
 	skb_queue_head_init(&ar_sdio->rx_head);
 	INIT_WORK(&ar_sdio->async_work_rx, ath10k_rx_indication_async_work);
 
-	dev_id_base = FIELD_GET(QCA_MANUFACTURER_ID_BASE, id->device);
-	switch (dev_id_base) {
-	case QCA_MANUFACTURER_ID_AR6005_BASE:
-	case QCA_MANUFACTURER_ID_QCA9377_BASE:
-		ar->dev_id = QCA9377_1_0_DEVICE_ID;
-		break;
-	default:
+	dev_id_base = (id->device & 0x0F00);
+	if (dev_id_base != (SDIO_DEVICE_ID_ATHEROS_AR6005 & 0x0F00) &&
+	    dev_id_base != (SDIO_DEVICE_ID_ATHEROS_QCA9377 & 0x0F00)) {
 		ret = -ENODEV;
 		ath10k_err(ar, "unsupported device id %u (0x%x)\n",
 			   dev_id_base, id->device);
 		goto err_free_wq;
 	}
 
+	ar->dev_id = QCA9377_1_0_DEVICE_ID;
 	ar->id.vendor = id->vendor;
 	ar->id.device = id->device;
 
@@ -2333,10 +2330,8 @@ static void ath10k_sdio_remove(struct sdio_func *func)
 }
 
 static const struct sdio_device_id ath10k_sdio_devices[] = {
-	{SDIO_DEVICE(QCA_MANUFACTURER_CODE,
-		     (QCA_SDIO_ID_AR6005_BASE | 0xA))},
-	{SDIO_DEVICE(QCA_MANUFACTURER_CODE,
-		     (QCA_SDIO_ID_QCA9377_BASE | 0x1))},
+	{SDIO_DEVICE(SDIO_VENDOR_ID_ATHEROS, SDIO_DEVICE_ID_ATHEROS_AR6005)},
+	{SDIO_DEVICE(SDIO_VENDOR_ID_ATHEROS, SDIO_DEVICE_ID_ATHEROS_QCA9377)},
 	{},
 };
 
diff --git a/drivers/net/wireless/ath/ath10k/sdio.h b/drivers/net/wireless/ath/ath10k/sdio.h
index 29523600887d..b6ac927628b1 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.h
+++ b/drivers/net/wireless/ath/ath10k/sdio.h
@@ -10,14 +10,6 @@
 
 #define ATH10K_HIF_MBOX_BLOCK_SIZE              256
 
-#define QCA_MANUFACTURER_ID_BASE                GENMASK(11, 8)
-#define QCA_MANUFACTURER_ID_AR6005_BASE         0x5
-#define QCA_MANUFACTURER_ID_QCA9377_BASE        0x7
-#define QCA_SDIO_ID_AR6005_BASE                 0x500
-#define QCA_SDIO_ID_QCA9377_BASE                0x700
-#define QCA_MANUFACTURER_ID_REV_MASK            0x00FF
-#define QCA_MANUFACTURER_CODE                   0x271 /* Qualcomm/Atheros */
-
 #define ATH10K_SDIO_MAX_BUFFER_SIZE             4096 /*Unsure of this constant*/
 
 /* Mailbox address in SDIO address space */
diff --git a/drivers/net/wireless/ath/ath6kl/hif.h b/drivers/net/wireless/ath/ath6kl/hif.h
index aea7fea2a81e..f9d3f3a5edfe 100644
--- a/drivers/net/wireless/ath/ath6kl/hif.h
+++ b/drivers/net/wireless/ath/ath6kl/hif.h
@@ -35,12 +35,6 @@
 #define MAX_SCATTER_ENTRIES_PER_REQ      16
 #define MAX_SCATTER_REQ_TRANSFER_SIZE    (32 * 1024)
 
-#define MANUFACTURER_ID_AR6003_BASE        0x300
-#define MANUFACTURER_ID_AR6004_BASE        0x400
-    /* SDIO manufacturer ID and Codes */
-#define MANUFACTURER_ID_ATH6KL_BASE_MASK     0xFF00
-#define MANUFACTURER_CODE                  0x271	/* Atheros */
-
 /* Mailbox address in SDIO address space */
 #define HIF_MBOX_BASE_ADDR                 0x800
 #define HIF_MBOX_WIDTH                     0x800
diff --git a/drivers/net/wireless/ath/ath6kl/sdio.c b/drivers/net/wireless/ath/ath6kl/sdio.c
index bb50680580f3..6b51a2dceadc 100644
--- a/drivers/net/wireless/ath/ath6kl/sdio.c
+++ b/drivers/net/wireless/ath/ath6kl/sdio.c
@@ -799,8 +799,7 @@ static int ath6kl_sdio_config(struct ath6kl *ar)
 
 	sdio_claim_host(func);
 
-	if ((ar_sdio->id->device & MANUFACTURER_ID_ATH6KL_BASE_MASK) >=
-	    MANUFACTURER_ID_AR6003_BASE) {
+	if (ar_sdio->id->device >= SDIO_DEVICE_ID_ATHEROS_AR6003_00) {
 		/* enable 4-bit ASYNC interrupt on AR6003 or later */
 		ret = ath6kl_sdio_func0_cmd52_wr_byte(func->card,
 						CCCR_SDIO_IRQ_MODE_REG,
@@ -1409,13 +1408,13 @@ static void ath6kl_sdio_remove(struct sdio_func *func)
 }
 
 static const struct sdio_device_id ath6kl_sdio_devices[] = {
-	{SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6003_BASE | 0x0))},
-	{SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6003_BASE | 0x1))},
-	{SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x0))},
-	{SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x1))},
-	{SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x2))},
-	{SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x18))},
-	{SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x19))},
+	{SDIO_DEVICE(SDIO_VENDOR_ID_ATHEROS, SDIO_DEVICE_ID_ATHEROS_AR6003_00)},
+	{SDIO_DEVICE(SDIO_VENDOR_ID_ATHEROS, SDIO_DEVICE_ID_ATHEROS_AR6003_01)},
+	{SDIO_DEVICE(SDIO_VENDOR_ID_ATHEROS, SDIO_DEVICE_ID_ATHEROS_AR6004_00)},
+	{SDIO_DEVICE(SDIO_VENDOR_ID_ATHEROS, SDIO_DEVICE_ID_ATHEROS_AR6004_01)},
+	{SDIO_DEVICE(SDIO_VENDOR_ID_ATHEROS, SDIO_DEVICE_ID_ATHEROS_AR6004_02)},
+	{SDIO_DEVICE(SDIO_VENDOR_ID_ATHEROS, SDIO_DEVICE_ID_ATHEROS_AR6004_18)},
+	{SDIO_DEVICE(SDIO_VENDOR_ID_ATHEROS, SDIO_DEVICE_ID_ATHEROS_AR6004_19)},
 	{},
 };
 
diff --git a/drivers/net/wireless/broadcom/b43/sdio.c b/drivers/net/wireless/broadcom/b43/sdio.c
index 881a7938c494..02b0cfd535ab 100644
--- a/drivers/net/wireless/broadcom/b43/sdio.c
+++ b/drivers/net/wireless/broadcom/b43/sdio.c
@@ -180,8 +180,8 @@ static void b43_sdio_remove(struct sdio_func *func)
 }
 
 static const struct sdio_device_id b43_sdio_ids[] = {
-	{ SDIO_DEVICE(0x02d0, 0x044b) }, /* Nintendo Wii WLAN daughter card */
-	{ SDIO_DEVICE(0x0092, 0x0004) }, /* C-guys, Inc. EW-CG1102GC */
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_BROADCOM, SDIO_DEVICE_ID_BROADCOM_NINTENDO_WII) },
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_CGUYS, SDIO_DEVICE_ID_CGUYS_EW_CG1102GC) },
 	{ },
 };
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index b1a66320ba54..46346cb3bc84 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -912,7 +912,7 @@ static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
 		goto out;
 	}
 	switch (sdiodev->func2->device) {
-	case SDIO_DEVICE_ID_CYPRESS_4373:
+	case SDIO_DEVICE_ID_BROADCOM_CYPRESS_4373:
 		f2_blksz = SDIO_4373_FUNC2_BLOCKSIZE;
 		break;
 	case SDIO_DEVICE_ID_BROADCOM_4359:
@@ -986,9 +986,9 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4354),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4356),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4359),
-	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_CYPRESS_4373),
-	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_CYPRESS_43012),
-	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_CYPRESS_89359),
+	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_4373),
+	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_43012),
+	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_89359),
 	{ /* end: all zeroes */ }
 };
 MODULE_DEVICE_TABLE(sdio, brcmf_sdmmc_ids);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 760b7737e745..310d8075f5d7 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4197,7 +4197,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 				   bus->hostintmask, NULL);
 
 		switch (sdiod->func1->device) {
-		case SDIO_DEVICE_ID_CYPRESS_4373:
+		case SDIO_DEVICE_ID_BROADCOM_CYPRESS_4373:
 			brcmf_dbg(INFO, "set F2 watermark to 0x%x*4 bytes\n",
 				  CY_4373_F2_WATERMARK);
 			brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK,
@@ -4210,7 +4210,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 			brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_MESBUSYCTRL,
 					   CY_4373_F1_MESBUSYCTRL, &err);
 			break;
-		case SDIO_DEVICE_ID_CYPRESS_43012:
+		case SDIO_DEVICE_ID_BROADCOM_CYPRESS_43012:
 			brcmf_dbg(INFO, "set F2 watermark to 0x%x*4 bytes\n",
 				  CY_43012_F2_WATERMARK);
 			brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK,
diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c
index acf61b93b782..44fbd0acb87a 100644
--- a/drivers/net/wireless/marvell/libertas/if_sdio.c
+++ b/drivers/net/wireless/marvell/libertas/if_sdio.c
@@ -65,7 +65,7 @@ static const struct sdio_device_id if_sdio_ids[] = {
 	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL,
 			SDIO_DEVICE_ID_MARVELL_LIBERTAS) },
 	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL,
-			SDIO_DEVICE_ID_MARVELL_8688WLAN) },
+			SDIO_DEVICE_ID_MARVELL_8688_WLAN) },
 	{ /* end: all zeroes */				},
 };
 
diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c
index 6a2dcb01caf4..a042965962a2 100644
--- a/drivers/net/wireless/marvell/mwifiex/sdio.c
+++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
@@ -480,45 +480,25 @@ static void mwifiex_sdio_coredump(struct device *dev)
 		schedule_work(&card->work);
 }
 
-/* Device ID for SD8786 */
-#define SDIO_DEVICE_ID_MARVELL_8786   (0x9116)
-/* Device ID for SD8787 */
-#define SDIO_DEVICE_ID_MARVELL_8787   (0x9119)
-/* Device ID for SD8797 */
-#define SDIO_DEVICE_ID_MARVELL_8797   (0x9129)
-/* Device ID for SD8897 */
-#define SDIO_DEVICE_ID_MARVELL_8897   (0x912d)
-/* Device ID for SD8887 */
-#define SDIO_DEVICE_ID_MARVELL_8887   (0x9135)
-/* Device ID for SD8801 */
-#define SDIO_DEVICE_ID_MARVELL_8801   (0x9139)
-/* Device ID for SD8977 */
-#define SDIO_DEVICE_ID_MARVELL_8977   (0x9145)
-/* Device ID for SD8987 */
-#define SDIO_DEVICE_ID_MARVELL_8987   (0x9149)
-/* Device ID for SD8997 */
-#define SDIO_DEVICE_ID_MARVELL_8997   (0x9141)
-
-
 /* WLAN IDs */
 static const struct sdio_device_id mwifiex_ids[] = {
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8786),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8786_WLAN),
 		.driver_data = (unsigned long) &mwifiex_sdio_sd8786},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8787),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8787_WLAN),
 		.driver_data = (unsigned long) &mwifiex_sdio_sd8787},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8797),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8797_WLAN),
 		.driver_data = (unsigned long) &mwifiex_sdio_sd8797},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8897),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8897_WLAN),
 		.driver_data = (unsigned long) &mwifiex_sdio_sd8897},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8887),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8887_WLAN),
 		.driver_data = (unsigned long)&mwifiex_sdio_sd8887},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8801),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8801_WLAN),
 		.driver_data = (unsigned long)&mwifiex_sdio_sd8801},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8977),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8977_WLAN),
 		.driver_data = (unsigned long)&mwifiex_sdio_sd8977},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8987),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8987_WLAN),
 		.driver_data = (unsigned long)&mwifiex_sdio_sd8987},
-	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8997),
+	{SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8997_WLAN),
 		.driver_data = (unsigned long)&mwifiex_sdio_sd8997},
 	{},
 };
diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c
index de613c623a2c..69857f080704 100644
--- a/drivers/nfc/s3fwrn5/firmware.c
+++ b/drivers/nfc/s3fwrn5/firmware.c
@@ -434,15 +434,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
 		goto out;
 	}
 
-	{
-		SHASH_DESC_ON_STACK(desc, tfm);
-
-		desc->tfm = tfm;
-
-		ret = crypto_shash_digest(desc, fw->image, image_size,
-					  hash_data);
-		shash_desc_zero(desc);
-	}
+	ret = crypto_shash_tfm_digest(tfm, fw->image, image_size, hash_data);
 
 	crypto_free_shash(tfm);
 	if (ret) {
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 43751fab9d36..036e23aef9b0 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -178,7 +178,9 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	bip = bio_integrity(bio);
 	nsblk = q->queuedata;
 	rw = bio_data_dir(bio);
-	do_acct = nd_iostat_start(bio, &start);
+	do_acct = blk_queue_io_stat(bio->bi_disk->queue);
+	if (do_acct)
+		start = bio_start_io_acct(bio);
 	bio_for_each_segment(bvec, bio, iter) {
 		unsigned int len = bvec.bv_len;
 
@@ -195,7 +197,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 		}
 	}
 	if (do_acct)
-		nd_iostat_end(bio, start);
+		bio_end_io_acct(bio, start);
 
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 3b09419218d6..90c0c4bbe77b 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1452,7 +1452,9 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	if (!bio_integrity_prep(bio))
 		return BLK_QC_T_NONE;
 
-	do_acct = nd_iostat_start(bio, &start);
+	do_acct = blk_queue_io_stat(bio->bi_disk->queue);
+	if (do_acct)
+		start = bio_start_io_acct(bio);
 	bio_for_each_segment(bvec, bio, iter) {
 		unsigned int len = bvec.bv_len;
 
@@ -1477,7 +1479,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 		}
 	}
 	if (do_acct)
-		nd_iostat_end(bio, start);
+		bio_end_io_acct(bio, start);
 
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 85dbb2a322b9..85c1ae813ea3 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -396,25 +396,6 @@ static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
 #endif
 int nd_blk_region_init(struct nd_region *nd_region);
 int nd_region_activate(struct nd_region *nd_region);
-void __nd_iostat_start(struct bio *bio, unsigned long *start);
-static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
-{
-	struct gendisk *disk = bio->bi_disk;
-
-	if (!blk_queue_io_stat(disk->queue))
-		return false;
-
-	*start = jiffies;
-	generic_start_io_acct(disk->queue, bio_op(bio), bio_sectors(bio),
-			      &disk->part0);
-	return true;
-}
-static inline void nd_iostat_end(struct bio *bio, unsigned long start)
-{
-	struct gendisk *disk = bio->bi_disk;
-
-	generic_end_io_acct(disk->queue, bio_op(bio), &disk->part0, start);
-}
 static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
 		unsigned int len)
 {
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 2df6994acf83..97f948f8f4e6 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -202,7 +202,9 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 	if (bio->bi_opf & REQ_PREFLUSH)
 		ret = nvdimm_flush(nd_region, bio);
 
-	do_acct = nd_iostat_start(bio, &start);
+	do_acct = blk_queue_io_stat(bio->bi_disk->queue);
+	if (do_acct)
+		start = bio_start_io_acct(bio);
 	bio_for_each_segment(bvec, bio, iter) {
 		if (op_is_write(bio_op(bio)))
 			rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
@@ -216,7 +218,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 		}
 	}
 	if (do_acct)
-		nd_iostat_end(bio, start);
+		bio_end_io_acct(bio, start);
 
 	if (bio->bi_opf & REQ_FUA)
 		ret = nvdimm_flush(nd_region, bio);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f3c037f5a9ba..0585efa47d8f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -19,7 +19,6 @@
 #include <linux/pr.h>
 #include <linux/ptrace.h>
 #include <linux/nvme_ioctl.h>
-#include <linux/t10-pi.h>
 #include <linux/pm_qos.h>
 #include <asm/unaligned.h>
 
@@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
 	nvme_put_ctrl(ctrl);
 }
 
-static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
-{
-	return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
-}
-
 static blk_status_t nvme_error_status(u16 status)
 {
 	switch (status & 0x7ff) {
@@ -310,7 +304,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
 		return true;
 
 	nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
-	blk_mq_complete_request(req);
+	blk_mq_force_complete_rq(req);
 	return true;
 }
 EXPORT_SYMBOL_GPL(nvme_cancel_request);
@@ -433,7 +427,6 @@ static void nvme_free_ns_head(struct kref *ref)
 
 	nvme_mpath_remove_disk(head);
 	ida_simple_remove(&head->subsys->ns_ida, head->instance);
-	list_del_init(&head->entry);
 	cleanup_srcu_struct(&head->srcu);
 	nvme_put_subsystem(head->subsys);
 	kfree(head);
@@ -530,7 +523,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
 
 	c.directive.opcode = nvme_admin_directive_recv;
 	c.directive.nsid = cpu_to_le32(nsid);
-	c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1);
+	c.directive.numd = cpu_to_le32(nvme_bytes_to_numd(sizeof(*s)));
 	c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
 	c.directive.dtype = NVME_DIR_STREAMS;
 
@@ -553,19 +546,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
 
 	ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
 	if (ret)
-		return ret;
+		goto out_disable_stream;
 
 	ctrl->nssa = le16_to_cpu(s.nssa);
 	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
 		dev_info(ctrl->device, "too few streams (%u) available\n",
 					ctrl->nssa);
-		nvme_disable_streams(ctrl);
-		return 0;
+		goto out_disable_stream;
 	}
 
 	ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
 	dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
 	return 0;
+
+out_disable_stream:
+	nvme_disable_streams(ctrl);
+	return ret;
 }
 
 /*
@@ -1027,6 +1023,19 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
 
+/*
+ * In NVMe 1.0 the CNS field was just a binary controller or namespace
+ * flag, thus sending any new CNS opcodes has a big chance of not working.
+ * Qemu unfortunately had that bug after reporting a 1.1 version compliance
+ * (but not for any later version).
+ */
+static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl)
+{
+	if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)
+		return ctrl->vs < NVME_VS(1, 2, 0);
+	return ctrl->vs < NVME_VS(1, 1, 0);
+}
+
 static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
 	struct nvme_command c = { };
@@ -1290,7 +1299,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	meta_len = (io.nblocks + 1) * ns->ms;
 	metadata = nvme_to_user_ptr(io.metadata);
 
-	if (ns->ext) {
+	if (ns->features & NVME_NS_EXT_LBAS) {
 		length += meta_len;
 		meta_len = 0;
 	} else if (meta_len) {
@@ -1392,8 +1401,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
 	}
 	if (effects & NVME_CMD_EFFECTS_CCC)
 		nvme_init_identify(ctrl);
-	if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
+	if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
 		nvme_queue_scan(ctrl);
+		flush_work(&ctrl->scan_work);
+	}
 }
 
 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
@@ -1682,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
+static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
+				u32 max_integrity_segments)
 {
 	struct blk_integrity integrity;
 
@@ -1705,20 +1717,15 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
 	}
 	integrity.tuple_size = ms;
 	blk_integrity_register(disk, &integrity);
-	blk_queue_max_integrity_segments(disk->queue, 1);
+	blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
 }
 #else
-static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
+static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
+				u32 max_integrity_segments)
 {
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-static void nvme_set_chunk_size(struct nvme_ns *ns)
-{
-	u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob);
-	blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
-}
-
 static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
@@ -1804,12 +1811,37 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
 		memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0;
 }
 
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+				 u32 *phys_bs, u32 *io_opt)
+{
+	struct streams_directive_params s;
+	int ret;
+
+	if (!ctrl->nr_streams)
+		return 0;
+
+	ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id);
+	if (ret)
+		return ret;
+
+	ns->sws = le32_to_cpu(s.sws);
+	ns->sgs = le16_to_cpu(s.sgs);
+
+	if (ns->sws) {
+		*phys_bs = ns->sws * (1 << ns->lba_shift);
+		if (ns->sgs)
+			*io_opt = *phys_bs * ns->sgs;
+	}
+
+	return 0;
+}
+
 static void nvme_update_disk_info(struct gendisk *disk,
 		struct nvme_ns *ns, struct nvme_id_ns *id)
 {
 	sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
 	unsigned short bs = 1 << ns->lba_shift;
-	u32 atomic_bs, phys_bs, io_opt;
+	u32 atomic_bs, phys_bs, io_opt = 0;
 
 	if (ns->lba_shift > PAGE_SHIFT) {
 		/* unsupported block size, set capacity to 0 later */
@@ -1818,26 +1850,25 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	blk_mq_freeze_queue(disk->queue);
 	blk_integrity_unregister(disk);
 
+	atomic_bs = phys_bs = bs;
+	nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt);
 	if (id->nabo == 0) {
 		/*
 		 * Bit 1 indicates whether NAWUPF is defined for this namespace
 		 * and whether it should be used instead of AWUPF. If NAWUPF ==
 		 * 0 then AWUPF must be used instead.
 		 */
-		if (id->nsfeat & (1 << 1) && id->nawupf)
+		if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
 			atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
 		else
 			atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
-	} else {
-		atomic_bs = bs;
 	}
-	phys_bs = bs;
-	io_opt = bs;
-	if (id->nsfeat & (1 << 4)) {
+
+	if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
 		/* NPWG = Namespace Preferred Write Granularity */
-		phys_bs *= 1 + le16_to_cpu(id->npwg);
+		phys_bs = bs * (1 + le16_to_cpu(id->npwg));
 		/* NOWS = Namespace Optimal Write Size */
-		io_opt *= 1 + le16_to_cpu(id->nows);
+		io_opt = bs * (1 + le16_to_cpu(id->nows));
 	}
 
 	blk_queue_logical_block_size(disk->queue, bs);
@@ -1850,19 +1881,34 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	blk_queue_io_min(disk->queue, phys_bs);
 	blk_queue_io_opt(disk->queue, io_opt);
 
-	if (ns->ms && !ns->ext &&
-	    (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
-		nvme_init_integrity(disk, ns->ms, ns->pi_type);
-	if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) ||
-	    ns->lba_shift > PAGE_SHIFT)
+	/*
+	 * The block layer can't support LBA sizes larger than the page size
+	 * yet, so catch this early and don't allow block I/O.
+	 */
+	if (ns->lba_shift > PAGE_SHIFT)
 		capacity = 0;
 
+	/*
+	 * Register a metadata profile for PI, or the plain non-integrity NVMe
+	 * metadata masquerading as Type 0 if supported, otherwise reject block
+	 * I/O to namespaces with metadata except when the namespace supports
+	 * PI, as it can strip/insert in that case.
+	 */
+	if (ns->ms) {
+		if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+		    (ns->features & NVME_NS_METADATA_SUPPORTED))
+			nvme_init_integrity(disk, ns->ms, ns->pi_type,
+					    ns->ctrl->max_integrity_segments);
+		else if (!nvme_ns_has_pi(ns))
+			capacity = 0;
+	}
+
 	set_capacity_revalidate_and_notify(disk, capacity, false);
 
 	nvme_config_discard(disk, ns);
 	nvme_config_write_zeroes(disk, ns);
 
-	if (id->nsattr & (1 << 0))
+	if (id->nsattr & NVME_NS_ATTR_RO)
 		set_disk_ro(disk, true);
 	else
 		set_disk_ro(disk, false);
@@ -1870,9 +1916,11 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	blk_mq_unfreeze_queue(disk->queue);
 }
 
-static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
+static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 {
 	struct nvme_ns *ns = disk->private_data;
+	struct nvme_ctrl *ctrl = ns->ctrl;
+	u32 iob;
 
 	/*
 	 * If identify namespace failed, use default 512 byte block size so
@@ -1881,32 +1929,55 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 	ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
 	if (ns->lba_shift == 0)
 		ns->lba_shift = 9;
-	ns->noiob = le16_to_cpu(id->noiob);
+
+	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+	    is_power_of_2(ctrl->max_hw_sectors))
+		iob = ctrl->max_hw_sectors;
+	else
+		iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
+
+	ns->features = 0;
 	ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
-	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
 	/* the PI implementation requires metadata equal t10 pi tuple size */
 	if (ns->ms == sizeof(struct t10_pi_tuple))
 		ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
 	else
 		ns->pi_type = 0;
 
-	if (ns->noiob)
-		nvme_set_chunk_size(ns);
+	if (ns->ms) {
+		/*
+		 * For PCIe only the separate metadata pointer is supported,
+		 * as the block layer supplies metadata in a separate bio_vec
+		 * chain. For Fabrics, only metadata as part of extended data
+		 * LBA is supported on the wire per the Fabrics specification,
+		 * but the HBA/HCA will do the remapping from the separate
+		 * metadata buffers for us.
+		 */
+		if (id->flbas & NVME_NS_FLBAS_META_EXT) {
+			ns->features |= NVME_NS_EXT_LBAS;
+			if ((ctrl->ops->flags & NVME_F_FABRICS) &&
+			    (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) &&
+			    ctrl->max_integrity_segments)
+				ns->features |= NVME_NS_METADATA_SUPPORTED;
+		} else {
+			if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS))
+				return -EINVAL;
+			if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+				ns->features |= NVME_NS_METADATA_SUPPORTED;
+		}
+	}
+
+	if (iob)
+		blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
 	nvme_update_disk_info(disk, ns, id);
 #ifdef CONFIG_NVME_MULTIPATH
 	if (ns->head->disk) {
 		nvme_update_disk_info(ns->head->disk, ns, id);
 		blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
-		if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
-			struct backing_dev_info *info =
-				ns->head->disk->queue->backing_dev_info;
-
-                        info->capabilities |= BDI_CAP_STABLE_WRITES;
-		}
-
 		revalidate_disk(ns->head->disk);
 	}
 #endif
+	return 0;
 }
 
 static int nvme_revalidate_disk(struct gendisk *disk)
@@ -1931,7 +2002,6 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 		goto free_id;
 	}
 
-	__nvme_revalidate_disk(disk, id);
 	ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids);
 	if (ret)
 		goto free_id;
@@ -1940,8 +2010,10 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 		dev_err(ctrl->device,
 			"identifiers changed for nsid %d\n", ns->head->ns_id);
 		ret = -ENODEV;
+		goto free_id;
 	}
 
+	ret = __nvme_revalidate_disk(disk, id);
 free_id:
 	kfree(id);
 out:
@@ -2249,10 +2321,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
 		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
 		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
 	}
-	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
-	    is_power_of_2(ctrl->max_hw_sectors))
-		blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
 	blk_queue_virt_boundary(q, ctrl->page_size - 1);
+	blk_queue_dma_alignment(q, 7);
 	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
 		vwc = true;
 	blk_queue_write_cache(q, vwc, vwc);
@@ -2655,7 +2725,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
 			return false;
 		}
 
-		if ((id->cmic & (1 << 1)) ||
+		if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
 		    (ctrl->opts && ctrl->opts->discovery_nqn))
 			continue;
 
@@ -2746,7 +2816,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
 		void *log, size_t size, u64 offset)
 {
 	struct nvme_command c = { };
-	unsigned long dwlen = size / 4 - 1;
+	u32 dwlen = nvme_bytes_to_numd(size);
 
 	c.get_log_page.opcode = nvme_admin_get_log_page;
 	c.get_log_page.nsid = cpu_to_le32(nsid);
@@ -3401,7 +3471,6 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
 
 	list_for_each_entry(h, &subsys->nsheads, entry) {
 		if (nvme_ns_ids_valid(&new->ids) &&
-		    !list_empty(&h->list) &&
 		    nvme_ns_ids_equal(&new->ids, &h->ids))
 			return -EINVAL;
 	}
@@ -3410,8 +3479,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
 }
 
 static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
-		unsigned nsid, struct nvme_id_ns *id,
-		struct nvme_ns_ids *ids)
+		unsigned nsid, struct nvme_ns_ids *ids)
 {
 	struct nvme_ns_head *head;
 	size_t size = sizeof(*head);
@@ -3469,42 +3537,51 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 		struct nvme_id_ns *id)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
-	bool is_shared = id->nmic & (1 << 0);
+	bool is_shared = id->nmic & NVME_NS_NMIC_SHARED;
 	struct nvme_ns_head *head = NULL;
 	struct nvme_ns_ids ids;
 	int ret = 0;
 
 	ret = nvme_report_ns_ids(ctrl, nsid, id, &ids);
-	if (ret)
-		goto out;
+	if (ret) {
+		if (ret < 0)
+			return ret;
+		return blk_status_to_errno(nvme_error_status(ret));
+	}
 
 	mutex_lock(&ctrl->subsys->lock);
-	if (is_shared)
-		head = nvme_find_ns_head(ctrl->subsys, nsid);
+	head = nvme_find_ns_head(ctrl->subsys, nsid);
 	if (!head) {
-		head = nvme_alloc_ns_head(ctrl, nsid, id, &ids);
+		head = nvme_alloc_ns_head(ctrl, nsid, &ids);
 		if (IS_ERR(head)) {
 			ret = PTR_ERR(head);
 			goto out_unlock;
 		}
+		head->shared = is_shared;
 	} else {
+		ret = -EINVAL;
+		if (!is_shared || !head->shared) {
+			dev_err(ctrl->device,
+				"Duplicate unshared namespace %d\n", nsid);
+			goto out_put_ns_head;
+		}
 		if (!nvme_ns_ids_equal(&head->ids, &ids)) {
 			dev_err(ctrl->device,
 				"IDs don't match for shared namespace %d\n",
 					nsid);
-			ret = -EINVAL;
-			goto out_unlock;
+			goto out_put_ns_head;
 		}
 	}
 
 	list_add_tail(&ns->siblings, &head->list);
 	ns->head = head;
+	mutex_unlock(&ctrl->subsys->lock);
+	return 0;
 
+out_put_ns_head:
+	nvme_put_ns_head(head);
 out_unlock:
 	mutex_unlock(&ctrl->subsys->lock);
-out:
-	if (ret > 0)
-		ret = blk_status_to_errno(nvme_error_status(ret));
 	return ret;
 }
 
@@ -3535,32 +3612,6 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	return ret;
 }
 
-static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
-{
-	struct streams_directive_params s;
-	int ret;
-
-	if (!ctrl->nr_streams)
-		return 0;
-
-	ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id);
-	if (ret)
-		return ret;
-
-	ns->sws = le32_to_cpu(s.sws);
-	ns->sgs = le16_to_cpu(s.sgs);
-
-	if (ns->sws) {
-		unsigned int bs = 1 << ns->lba_shift;
-
-		blk_queue_io_min(ns->queue, bs * ns->sws);
-		if (ns->sgs)
-			blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
-	}
-
-	return 0;
-}
-
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
 	struct nvme_ns *ns;
@@ -3604,7 +3655,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	ret = nvme_init_ns_head(ns, nsid, id);
 	if (ret)
 		goto out_free_id;
-	nvme_setup_streams_ns(ctrl, ns);
 	nvme_set_disk_name(disk_name, ns, ctrl, &flags);
 
 	disk = alloc_disk_node(0, node);
@@ -3618,7 +3668,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
 	ns->disk = disk;
 
-	__nvme_revalidate_disk(disk, id);
+	if (__nvme_revalidate_disk(disk, id))
+		goto out_free_disk;
 
 	if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
 		ret = nvme_nvm_register(ns, disk_name, node);
@@ -3645,9 +3696,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	/* prevent double queue cleanup */
 	ns->disk->queue = NULL;
 	put_disk(ns->disk);
+ out_free_disk:
+	del_gendisk(ns->disk);
  out_unlink_ns:
 	mutex_lock(&ctrl->subsys->lock);
 	list_del_rcu(&ns->siblings);
+	if (list_empty(&ns->head->list))
+		list_del_init(&ns->head->entry);
 	mutex_unlock(&ctrl->subsys->lock);
 	nvme_put_ns_head(ns->head);
  out_free_id:
@@ -3667,7 +3722,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 
 	mutex_lock(&ns->ctrl->subsys->lock);
 	list_del_rcu(&ns->siblings);
+	if (list_empty(&ns->head->list))
+		list_del_init(&ns->head->entry);
 	mutex_unlock(&ns->ctrl->subsys->lock);
+
 	synchronize_rcu(); /* guarantee not available in head->list */
 	nvme_mpath_clear_current_path(ns);
 	synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
@@ -3687,6 +3745,16 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 	nvme_put_ns(ns);
 }
 
+static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid)
+{
+	struct nvme_ns *ns = nvme_find_get_ns(ctrl, nsid);
+
+	if (ns) {
+		nvme_ns_remove(ns);
+		nvme_put_ns(ns);
+	}
+}
+
 static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
 	struct nvme_ns *ns;
@@ -3718,39 +3786,34 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 
 }
 
-static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
+static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
 {
-	struct nvme_ns *ns;
+	const int nr_entries = NVME_IDENTIFY_DATA_SIZE / sizeof(__le32);
 	__le32 *ns_list;
-	unsigned i, j, nsid, prev = 0;
-	unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024);
-	int ret = 0;
+	u32 prev = 0;
+	int ret = 0, i;
+
+	if (nvme_ctrl_limited_cns(ctrl))
+		return -EOPNOTSUPP;
 
 	ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
 	if (!ns_list)
 		return -ENOMEM;
 
-	for (i = 0; i < num_lists; i++) {
+	for (;;) {
 		ret = nvme_identify_ns_list(ctrl, prev, ns_list);
 		if (ret)
 			goto free;
 
-		for (j = 0; j < min(nn, 1024U); j++) {
-			nsid = le32_to_cpu(ns_list[j]);
-			if (!nsid)
-				goto out;
+		for (i = 0; i < nr_entries; i++) {
+			u32 nsid = le32_to_cpu(ns_list[i]);
 
+			if (!nsid)	/* end of the list? */
+				goto out;
 			nvme_validate_ns(ctrl, nsid);
-
-			while (++prev < nsid) {
-				ns = nvme_find_get_ns(ctrl, prev);
-				if (ns) {
-					nvme_ns_remove(ns);
-					nvme_put_ns(ns);
-				}
-			}
+			while (++prev < nsid)
+				nvme_ns_remove_by_nsid(ctrl, prev);
 		}
-		nn -= j;
 	}
  out:
 	nvme_remove_invalid_namespaces(ctrl, prev);
@@ -3759,9 +3822,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
 	return ret;
 }
 
-static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
+static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl)
 {
-	unsigned i;
+	struct nvme_id_ctrl *id;
+	u32 nn, i;
+
+	if (nvme_identify_ctrl(ctrl, &id))
+		return;
+	nn = le32_to_cpu(id->nn);
+	kfree(id);
 
 	for (i = 1; i <= nn; i++)
 		nvme_validate_ns(ctrl, i);
@@ -3798,8 +3867,6 @@ static void nvme_scan_work(struct work_struct *work)
 {
 	struct nvme_ctrl *ctrl =
 		container_of(work, struct nvme_ctrl, scan_work);
-	struct nvme_id_ctrl *id;
-	unsigned nn;
 
 	/* No tagset on a live ctrl means IO queues could not created */
 	if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
@@ -3810,20 +3877,11 @@ static void nvme_scan_work(struct work_struct *work)
 		nvme_clear_changed_ns_log(ctrl);
 	}
 
-	if (nvme_identify_ctrl(ctrl, &id))
-		return;
-
 	mutex_lock(&ctrl->scan_lock);
-	nn = le32_to_cpu(id->nn);
-	if (ctrl->vs >= NVME_VS(1, 1, 0) &&
-	    !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
-		if (!nvme_scan_ns_list(ctrl, nn))
-			goto out_free_id;
-	}
-	nvme_scan_ns_sequential(ctrl, nn);
-out_free_id:
+	if (nvme_scan_ns_list(ctrl) != 0)
+		nvme_scan_ns_sequential(ctrl);
 	mutex_unlock(&ctrl->scan_lock);
-	kfree(id);
+
 	down_write(&ctrl->namespaces_rwsem);
 	list_sort(NULL, &ctrl->namespaces, ns_cmp);
 	up_write(&ctrl->namespaces_rwsem);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 7dfc4a2ecf1e..cb0007592c12 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -14,6 +14,7 @@
 #include "fabrics.h"
 #include <linux/nvme-fc-driver.h>
 #include <linux/nvme-fc.h>
+#include "fc.h"
 #include <scsi/scsi_transport_fc.h>
 
 /* *************************** Data Structures/Defines ****************** */
@@ -61,6 +62,17 @@ struct nvmefc_ls_req_op {
 	bool			req_queued;
 };
 
+struct nvmefc_ls_rcv_op {
+	struct nvme_fc_rport		*rport;
+	struct nvmefc_ls_rsp		*lsrsp;
+	union nvmefc_ls_requests	*rqstbuf;
+	union nvmefc_ls_responses	*rspbuf;
+	u16				rqstdatalen;
+	bool				handled;
+	dma_addr_t			rspdma;
+	struct list_head		lsrcv_list;	/* rport->ls_rcv_list */
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
 enum nvme_fcpop_state {
 	FCPOP_STATE_UNINIT	= 0,
 	FCPOP_STATE_IDLE	= 1,
@@ -96,7 +108,7 @@ struct nvme_fc_fcp_op {
 struct nvme_fcp_op_w_sgl {
 	struct nvme_fc_fcp_op	op;
 	struct scatterlist	sgl[NVME_INLINE_SG_CNT];
-	uint8_t			priv[0];
+	uint8_t			priv[];
 };
 
 struct nvme_fc_lport {
@@ -117,6 +129,7 @@ struct nvme_fc_rport {
 	struct list_head		endp_list; /* for lport->endp_list */
 	struct list_head		ctrl_list;
 	struct list_head		ls_req_list;
+	struct list_head		ls_rcv_list;
 	struct list_head		disc_list;
 	struct device			*dev;	/* physical device for dma */
 	struct nvme_fc_lport		*lport;
@@ -124,11 +137,12 @@ struct nvme_fc_rport {
 	struct kref			ref;
 	atomic_t                        act_ctrl_cnt;
 	unsigned long			dev_loss_end;
+	struct work_struct		lsrcv_work;
 } __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
 
-enum nvme_fcctrl_flags {
-	FCCTRL_TERMIO		= (1 << 0),
-};
+/* fc_ctrl flags values - specified as bit positions */
+#define ASSOC_ACTIVE		0
+#define FCCTRL_TERMIO		1
 
 struct nvme_fc_ctrl {
 	spinlock_t		lock;
@@ -139,9 +153,9 @@ struct nvme_fc_ctrl {
 	u32			cnum;
 
 	bool			ioq_live;
-	bool			assoc_active;
 	atomic_t		err_work_active;
 	u64			association_id;
+	struct nvmefc_ls_rcv_op	*rcv_disconn;
 
 	struct list_head	ctrl_list;	/* rport->ctrl_list */
 
@@ -152,7 +166,7 @@ struct nvme_fc_ctrl {
 	struct work_struct	err_work;
 
 	struct kref		ref;
-	u32			flags;
+	unsigned long		flags;
 	u32			iocnt;
 	wait_queue_head_t	ioabort_wait;
 
@@ -219,6 +233,9 @@ static struct device *fc_udev_device;
 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
 			struct nvme_fc_queue *, unsigned int);
 
+static void nvme_fc_handle_ls_rqst_work(struct work_struct *work);
+
+
 static void
 nvme_fc_free_lport(struct kref *ref)
 {
@@ -394,7 +411,10 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
 	newrec->ops = template;
 	newrec->dev = dev;
 	ida_init(&newrec->endp_cnt);
-	newrec->localport.private = &newrec[1];
+	if (template->local_priv_sz)
+		newrec->localport.private = &newrec[1];
+	else
+		newrec->localport.private = NULL;
 	newrec->localport.node_name = pinfo->node_name;
 	newrec->localport.port_name = pinfo->port_name;
 	newrec->localport.port_role = pinfo->port_role;
@@ -701,9 +721,13 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
 	atomic_set(&newrec->act_ctrl_cnt, 0);
 	spin_lock_init(&newrec->lock);
 	newrec->remoteport.localport = &lport->localport;
+	INIT_LIST_HEAD(&newrec->ls_rcv_list);
 	newrec->dev = lport->dev;
 	newrec->lport = lport;
-	newrec->remoteport.private = &newrec[1];
+	if (lport->ops->remote_priv_sz)
+		newrec->remoteport.private = &newrec[1];
+	else
+		newrec->remoteport.private = NULL;
 	newrec->remoteport.port_role = pinfo->port_role;
 	newrec->remoteport.node_name = pinfo->node_name;
 	newrec->remoteport.port_name = pinfo->port_name;
@@ -711,6 +735,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
 	newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
 	newrec->remoteport.port_num = idx;
 	__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
+	INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work);
 
 	spin_lock_irqsave(&nvme_fc_lock, flags);
 	list_add_tail(&newrec->endp_list, &lport->endp_list);
@@ -1000,6 +1025,7 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
 
+static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
 
 static void
 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
@@ -1140,41 +1166,6 @@ nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport,
 	return __nvme_fc_send_ls_req(rport, lsop, done);
 }
 
-/* Validation Error indexes into the string table below */
-enum {
-	VERR_NO_ERROR		= 0,
-	VERR_LSACC		= 1,
-	VERR_LSDESC_RQST	= 2,
-	VERR_LSDESC_RQST_LEN	= 3,
-	VERR_ASSOC_ID		= 4,
-	VERR_ASSOC_ID_LEN	= 5,
-	VERR_CONN_ID		= 6,
-	VERR_CONN_ID_LEN	= 7,
-	VERR_CR_ASSOC		= 8,
-	VERR_CR_ASSOC_ACC_LEN	= 9,
-	VERR_CR_CONN		= 10,
-	VERR_CR_CONN_ACC_LEN	= 11,
-	VERR_DISCONN		= 12,
-	VERR_DISCONN_ACC_LEN	= 13,
-};
-
-static char *validation_errors[] = {
-	"OK",
-	"Not LS_ACC",
-	"Not LSDESC_RQST",
-	"Bad LSDESC_RQST Length",
-	"Not Association ID",
-	"Bad Association ID Length",
-	"Not Connection ID",
-	"Bad Connection ID Length",
-	"Not CR_ASSOC Rqst",
-	"Bad CR_ASSOC ACC Length",
-	"Not CR_CONN Rqst",
-	"Bad CR_CONN ACC Length",
-	"Not Disconnect Rqst",
-	"Bad Disconnect ACC Length",
-};
-
 static int
 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
 	struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio)
@@ -1183,21 +1174,27 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
 	struct nvmefc_ls_req *lsreq;
 	struct fcnvme_ls_cr_assoc_rqst *assoc_rqst;
 	struct fcnvme_ls_cr_assoc_acc *assoc_acc;
+	unsigned long flags;
 	int ret, fcret = 0;
 
 	lsop = kzalloc((sizeof(*lsop) +
-			 ctrl->lport->ops->lsrqst_priv_sz +
-			 sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL);
+			 sizeof(*assoc_rqst) + sizeof(*assoc_acc) +
+			 ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
 	if (!lsop) {
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: send Create Association failed: ENOMEM\n",
+			ctrl->cnum);
 		ret = -ENOMEM;
 		goto out_no_memory;
 	}
-	lsreq = &lsop->ls_req;
 
-	lsreq->private = (void *)&lsop[1];
-	assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)
-			(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
+	assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1];
 	assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1];
+	lsreq = &lsop->ls_req;
+	if (ctrl->lport->ops->lsrqst_priv_sz)
+		lsreq->private = &assoc_acc[1];
+	else
+		lsreq->private = NULL;
 
 	assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION;
 	assoc_rqst->desc_list_len =
@@ -1267,11 +1264,13 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
 			"q %d Create Association LS failed: %s\n",
 			queue->qnum, validation_errors[fcret]);
 	} else {
+		spin_lock_irqsave(&ctrl->lock, flags);
 		ctrl->association_id =
 			be64_to_cpu(assoc_acc->associd.association_id);
 		queue->connection_id =
 			be64_to_cpu(assoc_acc->connectid.connection_id);
 		set_bit(NVME_FC_Q_CONNECTED, &queue->flags);
+		spin_unlock_irqrestore(&ctrl->lock, flags);
 	}
 
 out_free_buffer:
@@ -1295,18 +1294,23 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	int ret, fcret = 0;
 
 	lsop = kzalloc((sizeof(*lsop) +
-			 ctrl->lport->ops->lsrqst_priv_sz +
-			 sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL);
+			 sizeof(*conn_rqst) + sizeof(*conn_acc) +
+			 ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
 	if (!lsop) {
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: send Create Connection failed: ENOMEM\n",
+			ctrl->cnum);
 		ret = -ENOMEM;
 		goto out_no_memory;
 	}
-	lsreq = &lsop->ls_req;
 
-	lsreq->private = (void *)&lsop[1];
-	conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)
-			(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
+	conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1];
 	conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1];
+	lsreq = &lsop->ls_req;
+	if (ctrl->lport->ops->lsrqst_priv_sz)
+		lsreq->private = (void *)&conn_acc[1];
+	else
+		lsreq->private = NULL;
 
 	conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION;
 	conn_rqst->desc_list_len = cpu_to_be32(
@@ -1420,54 +1424,385 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
 	int ret;
 
 	lsop = kzalloc((sizeof(*lsop) +
-			 ctrl->lport->ops->lsrqst_priv_sz +
-			 sizeof(*discon_rqst) + sizeof(*discon_acc)),
-			GFP_KERNEL);
-	if (!lsop)
-		/* couldn't sent it... too bad */
+			sizeof(*discon_rqst) + sizeof(*discon_acc) +
+			ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
+	if (!lsop) {
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: send Disconnect Association "
+			"failed: ENOMEM\n",
+			ctrl->cnum);
 		return;
+	}
 
+	discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1];
+	discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
 	lsreq = &lsop->ls_req;
+	if (ctrl->lport->ops->lsrqst_priv_sz)
+		lsreq->private = (void *)&discon_acc[1];
+	else
+		lsreq->private = NULL;
 
-	lsreq->private = (void *)&lsop[1];
-	discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)
-			(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
-	discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
+	nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc,
+				ctrl->association_id);
 
-	discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC;
-	discon_rqst->desc_list_len = cpu_to_be32(
-				sizeof(struct fcnvme_lsdesc_assoc_id) +
-				sizeof(struct fcnvme_lsdesc_disconn_cmd));
+	ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
+				nvme_fc_disconnect_assoc_done);
+	if (ret)
+		kfree(lsop);
+}
 
-	discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
-	discon_rqst->associd.desc_len =
-			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_lsdesc_assoc_id));
+static void
+nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
+{
+	struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private;
+	struct nvme_fc_rport *rport = lsop->rport;
+	struct nvme_fc_lport *lport = rport->lport;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rport->lock, flags);
+	list_del(&lsop->lsrcv_list);
+	spin_unlock_irqrestore(&rport->lock, flags);
+
+	fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma,
+				sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
+	fc_dma_unmap_single(lport->dev, lsop->rspdma,
+			sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
+
+	kfree(lsop);
+
+	nvme_fc_rport_put(rport);
+}
+
+static void
+nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop)
+{
+	struct nvme_fc_rport *rport = lsop->rport;
+	struct nvme_fc_lport *lport = rport->lport;
+	struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
+	int ret;
+
+	fc_dma_sync_single_for_device(lport->dev, lsop->rspdma,
+				  sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
+
+	ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport,
+				     lsop->lsrsp);
+	if (ret) {
+		dev_warn(lport->dev,
+			"LLDD rejected LS RSP xmt: LS %d status %d\n",
+			w0->ls_cmd, ret);
+		nvme_fc_xmt_ls_rsp_done(lsop->lsrsp);
+		return;
+	}
+}
+
+static struct nvme_fc_ctrl *
+nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport,
+		      struct nvmefc_ls_rcv_op *lsop)
+{
+	struct fcnvme_ls_disconnect_assoc_rqst *rqst =
+					&lsop->rqstbuf->rq_dis_assoc;
+	struct nvme_fc_ctrl *ctrl, *ret = NULL;
+	struct nvmefc_ls_rcv_op *oldls = NULL;
+	u64 association_id = be64_to_cpu(rqst->associd.association_id);
+	unsigned long flags;
+
+	spin_lock_irqsave(&rport->lock, flags);
+
+	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
+		if (!nvme_fc_ctrl_get(ctrl))
+			continue;
+		spin_lock(&ctrl->lock);
+		if (association_id == ctrl->association_id) {
+			oldls = ctrl->rcv_disconn;
+			ctrl->rcv_disconn = lsop;
+			ret = ctrl;
+		}
+		spin_unlock(&ctrl->lock);
+		if (ret)
+			/* leave the ctrl get reference */
+			break;
+		nvme_fc_ctrl_put(ctrl);
+	}
+
+	spin_unlock_irqrestore(&rport->lock, flags);
+
+	/* transmit a response for anything that was pending */
+	if (oldls) {
+		dev_info(rport->lport->dev,
+			"NVME-FC{%d}: Multiple Disconnect Association "
+			"LS's received\n", ctrl->cnum);
+		/* overwrite good response with bogus failure */
+		oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf,
+						sizeof(*oldls->rspbuf),
+						rqst->w0.ls_cmd,
+						FCNVME_RJT_RC_UNAB,
+						FCNVME_RJT_EXP_NONE, 0);
+		nvme_fc_xmt_ls_rsp(oldls);
+	}
+
+	return ret;
+}
+
+/*
+ * returns true to mean LS handled and ls_rsp can be sent
+ * returns false to defer ls_rsp xmt (will be done as part of
+ *     association termination)
+ */
+static bool
+nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop)
+{
+	struct nvme_fc_rport *rport = lsop->rport;
+	struct fcnvme_ls_disconnect_assoc_rqst *rqst =
+					&lsop->rqstbuf->rq_dis_assoc;
+	struct fcnvme_ls_disconnect_assoc_acc *acc =
+					&lsop->rspbuf->rsp_dis_assoc;
+	struct nvme_fc_ctrl *ctrl = NULL;
+	int ret = 0;
+
+	memset(acc, 0, sizeof(*acc));
+
+	ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst);
+	if (!ret) {
+		/* match an active association */
+		ctrl = nvme_fc_match_disconn_ls(rport, lsop);
+		if (!ctrl)
+			ret = VERR_NO_ASSOC;
+	}
+
+	if (ret) {
+		dev_info(rport->lport->dev,
+			"Disconnect LS failed: %s\n",
+			validation_errors[ret]);
+		lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+					sizeof(*acc), rqst->w0.ls_cmd,
+					(ret == VERR_NO_ASSOC) ?
+						FCNVME_RJT_RC_INV_ASSOC :
+						FCNVME_RJT_RC_LOGIC,
+					FCNVME_RJT_EXP_NONE, 0);
+		return true;
+	}
 
-	discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id);
+	/* format an ACCept response */
 
-	discon_rqst->discon_cmd.desc_tag = cpu_to_be32(
-						FCNVME_LSDESC_DISCONN_CMD);
-	discon_rqst->discon_cmd.desc_len =
+	lsop->lsrsp->rsplen = sizeof(*acc);
+
+	nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
 			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_lsdesc_disconn_cmd));
+				sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
+			FCNVME_LS_DISCONNECT_ASSOC);
 
-	lsreq->rqstaddr = discon_rqst;
-	lsreq->rqstlen = sizeof(*discon_rqst);
-	lsreq->rspaddr = discon_acc;
-	lsreq->rsplen = sizeof(*discon_acc);
-	lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
+	/*
+	 * the transmit of the response will occur after the exchanges
+	 * for the association have been ABTS'd by
+	 * nvme_fc_delete_association().
+	 */
 
-	ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
-				nvme_fc_disconnect_assoc_done);
-	if (ret)
-		kfree(lsop);
+	/* fail the association */
+	nvme_fc_error_recovery(ctrl, "Disconnect Association LS received");
+
+	/* release the reference taken by nvme_fc_match_disconn_ls() */
+	nvme_fc_ctrl_put(ctrl);
+
+	return false;
 }
 
+/*
+ * Actual Processing routine for received FC-NVME LS Requests from the LLD
+ * returns true if a response should be sent afterward, false if rsp will
+ * be sent asynchronously.
+ */
+static bool
+nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop)
+{
+	struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
+	bool ret = true;
+
+	lsop->lsrsp->nvme_fc_private = lsop;
+	lsop->lsrsp->rspbuf = lsop->rspbuf;
+	lsop->lsrsp->rspdma = lsop->rspdma;
+	lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done;
+	/* Be preventative. handlers will later set to valid length */
+	lsop->lsrsp->rsplen = 0;
 
-/* *********************** NVME Ctrl Routines **************************** */
+	/*
+	 * handlers:
+	 *   parse request input, execute the request, and format the
+	 *   LS response
+	 */
+	switch (w0->ls_cmd) {
+	case FCNVME_LS_DISCONNECT_ASSOC:
+		ret = nvme_fc_ls_disconnect_assoc(lsop);
+		break;
+	case FCNVME_LS_DISCONNECT_CONN:
+		lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
+				sizeof(*lsop->rspbuf), w0->ls_cmd,
+				FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0);
+		break;
+	case FCNVME_LS_CREATE_ASSOCIATION:
+	case FCNVME_LS_CREATE_CONNECTION:
+		lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
+				sizeof(*lsop->rspbuf), w0->ls_cmd,
+				FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0);
+		break;
+	default:
+		lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
+				sizeof(*lsop->rspbuf), w0->ls_cmd,
+				FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
+		break;
+	}
 
-static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
+	return(ret);
+}
+
+static void
+nvme_fc_handle_ls_rqst_work(struct work_struct *work)
+{
+	struct nvme_fc_rport *rport =
+		container_of(work, struct nvme_fc_rport, lsrcv_work);
+	struct fcnvme_ls_rqst_w0 *w0;
+	struct nvmefc_ls_rcv_op *lsop;
+	unsigned long flags;
+	bool sendrsp;
+
+restart:
+	sendrsp = true;
+	spin_lock_irqsave(&rport->lock, flags);
+	list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) {
+		if (lsop->handled)
+			continue;
+
+		lsop->handled = true;
+		if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
+			spin_unlock_irqrestore(&rport->lock, flags);
+			sendrsp = nvme_fc_handle_ls_rqst(lsop);
+		} else {
+			spin_unlock_irqrestore(&rport->lock, flags);
+			w0 = &lsop->rqstbuf->w0;
+			lsop->lsrsp->rsplen = nvme_fc_format_rjt(
+						lsop->rspbuf,
+						sizeof(*lsop->rspbuf),
+						w0->ls_cmd,
+						FCNVME_RJT_RC_UNAB,
+						FCNVME_RJT_EXP_NONE, 0);
+		}
+		if (sendrsp)
+			nvme_fc_xmt_ls_rsp(lsop);
+		goto restart;
+	}
+	spin_unlock_irqrestore(&rport->lock, flags);
+}
+
+/**
+ * nvme_fc_rcv_ls_req - transport entry point called by an LLDD
+ *                       upon the reception of a NVME LS request.
+ *
+ * The nvme-fc layer will copy payload to an internal structure for
+ * processing.  As such, upon completion of the routine, the LLDD may
+ * immediately free/reuse the LS request buffer passed in the call.
+ *
+ * If this routine returns error, the LLDD should abort the exchange.
+ *
+ * @remoteport: pointer to the (registered) remote port that the LS
+ *              was received from. The remoteport is associated with
+ *              a specific localport.
+ * @lsrsp:      pointer to a nvmefc_ls_rsp response structure to be
+ *              used to reference the exchange corresponding to the LS
+ *              when issuing an ls response.
+ * @lsreqbuf:   pointer to the buffer containing the LS Request
+ * @lsreqbuf_len: length, in bytes, of the received LS request
+ */
+int
+nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
+			struct nvmefc_ls_rsp *lsrsp,
+			void *lsreqbuf, u32 lsreqbuf_len)
+{
+	struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
+	struct nvme_fc_lport *lport = rport->lport;
+	struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf;
+	struct nvmefc_ls_rcv_op *lsop;
+	unsigned long flags;
+	int ret;
+
+	nvme_fc_rport_get(rport);
+
+	/* validate there's a routine to transmit a response */
+	if (!lport->ops->xmt_ls_rsp) {
+		dev_info(lport->dev,
+			"RCV %s LS failed: no LLDD xmt_ls_rsp\n",
+			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+				nvmefc_ls_names[w0->ls_cmd] : "");
+		ret = -EINVAL;
+		goto out_put;
+	}
+
+	if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) {
+		dev_info(lport->dev,
+			"RCV %s LS failed: payload too large\n",
+			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+				nvmefc_ls_names[w0->ls_cmd] : "");
+		ret = -E2BIG;
+		goto out_put;
+	}
+
+	lsop = kzalloc(sizeof(*lsop) +
+			sizeof(union nvmefc_ls_requests) +
+			sizeof(union nvmefc_ls_responses),
+			GFP_KERNEL);
+	if (!lsop) {
+		dev_info(lport->dev,
+			"RCV %s LS failed: No memory\n",
+			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+				nvmefc_ls_names[w0->ls_cmd] : "");
+		ret = -ENOMEM;
+		goto out_put;
+	}
+	lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1];
+	lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1];
+
+	lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf,
+					sizeof(*lsop->rspbuf),
+					DMA_TO_DEVICE);
+	if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) {
+		dev_info(lport->dev,
+			"RCV %s LS failed: DMA mapping failure\n",
+			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+				nvmefc_ls_names[w0->ls_cmd] : "");
+		ret = -EFAULT;
+		goto out_free;
+	}
+
+	lsop->rport = rport;
+	lsop->lsrsp = lsrsp;
+
+	memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len);
+	lsop->rqstdatalen = lsreqbuf_len;
+
+	spin_lock_irqsave(&rport->lock, flags);
+	if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) {
+		spin_unlock_irqrestore(&rport->lock, flags);
+		ret = -ENOTCONN;
+		goto out_unmap;
+	}
+	list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list);
+	spin_unlock_irqrestore(&rport->lock, flags);
+
+	schedule_work(&rport->lsrcv_work);
+
+	return 0;
+
+out_unmap:
+	fc_dma_unmap_single(lport->dev, lsop->rspdma,
+			sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
+out_free:
+	kfree(lsop);
+out_put:
+	nvme_fc_rport_put(rport);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req);
+
+
+/* *********************** NVME Ctrl Routines **************************** */
 
 static void
 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
@@ -1500,7 +1835,7 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
 	opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
 	if (opstate != FCPOP_STATE_ACTIVE)
 		atomic_set(&op->state, opstate);
-	else if (ctrl->flags & FCCTRL_TERMIO)
+	else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
 		ctrl->iocnt++;
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 
@@ -1537,7 +1872,7 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 
 	if (opstate == FCPOP_STATE_ABORTED) {
 		spin_lock_irqsave(&ctrl->lock, flags);
-		if (ctrl->flags & FCCTRL_TERMIO) {
+		if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
 			if (!--ctrl->iocnt)
 				wake_up(&ctrl->ioabort_wait);
 		}
@@ -1771,7 +2106,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
 	res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
 	if (res)
 		return res;
-	op->op.fcp_req.first_sgl = &op->sgl[0];
+	op->op.fcp_req.first_sgl = op->sgl;
 	op->op.fcp_req.private = &op->priv[0];
 	nvme_req(rq)->ctrl = &ctrl->ctrl;
 	return res;
@@ -1783,15 +2118,17 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
 	struct nvme_fc_fcp_op *aen_op;
 	struct nvme_fc_cmd_iu *cmdiu;
 	struct nvme_command *sqe;
-	void *private;
+	void *private = NULL;
 	int i, ret;
 
 	aen_op = ctrl->aen_ops;
 	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
-		private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
+		if (ctrl->lport->ops->fcprqst_priv_sz) {
+			private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
 						GFP_KERNEL);
-		if (!private)
-			return -ENOMEM;
+			if (!private)
+				return -ENOMEM;
+		}
 
 		cmdiu = &aen_op->cmd_iu;
 		sqe = &cmdiu->sqe;
@@ -1822,9 +2159,6 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
 
 	aen_op = ctrl->aen_ops;
 	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
-		if (!aen_op->fcp_req.private)
-			continue;
-
 		__nvme_fc_exit_request(ctrl, aen_op);
 
 		kfree(aen_op->fcp_req.private);
@@ -2366,16 +2700,9 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
 	struct nvme_fc_fcp_op *aen_op;
-	unsigned long flags;
-	bool terminating = false;
 	blk_status_t ret;
 
-	spin_lock_irqsave(&ctrl->lock, flags);
-	if (ctrl->flags & FCCTRL_TERMIO)
-		terminating = true;
-	spin_unlock_irqrestore(&ctrl->lock, flags);
-
-	if (terminating)
+	if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
 		return;
 
 	aen_op = &ctrl->aen_ops[0];
@@ -2584,10 +2911,9 @@ nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl)
 	struct nvme_fc_rport *rport = ctrl->rport;
 	u32 cnt;
 
-	if (ctrl->assoc_active)
+	if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags))
 		return 1;
 
-	ctrl->assoc_active = true;
 	cnt = atomic_inc_return(&rport->act_ctrl_cnt);
 	if (cnt == 1)
 		nvme_fc_rport_active_on_lport(rport);
@@ -2602,7 +2928,7 @@ nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl)
 	struct nvme_fc_lport *lport = rport->lport;
 	u32 cnt;
 
-	/* ctrl->assoc_active=false will be set independently */
+	/* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */
 
 	cnt = atomic_dec_return(&rport->act_ctrl_cnt);
 	if (cnt == 0) {
@@ -2622,6 +2948,8 @@ static int
 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	struct nvmefc_ls_rcv_op *disls = NULL;
+	unsigned long flags;
 	int ret;
 	bool changed;
 
@@ -2739,12 +3067,18 @@ out_term_aen_ops:
 out_disconnect_admin_queue:
 	/* send a Disconnect(association) LS to fc-nvme target */
 	nvme_fc_xmt_disconnect_assoc(ctrl);
+	spin_lock_irqsave(&ctrl->lock, flags);
 	ctrl->association_id = 0;
+	disls = ctrl->rcv_disconn;
+	ctrl->rcv_disconn = NULL;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+	if (disls)
+		nvme_fc_xmt_ls_rsp(disls);
 out_delete_hw_queue:
 	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
 out_free_queue:
 	nvme_fc_free_queue(&ctrl->queues[0]);
-	ctrl->assoc_active = false;
+	clear_bit(ASSOC_ACTIVE, &ctrl->flags);
 	nvme_fc_ctlr_inactive_on_rport(ctrl);
 
 	return ret;
@@ -2759,14 +3093,14 @@ out_free_queue:
 static void
 nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 {
+	struct nvmefc_ls_rcv_op *disls = NULL;
 	unsigned long flags;
 
-	if (!ctrl->assoc_active)
+	if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
 		return;
-	ctrl->assoc_active = false;
 
 	spin_lock_irqsave(&ctrl->lock, flags);
-	ctrl->flags |= FCCTRL_TERMIO;
+	set_bit(FCCTRL_TERMIO, &ctrl->flags);
 	ctrl->iocnt = 0;
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 
@@ -2817,7 +3151,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	/* wait for all io that had to be aborted */
 	spin_lock_irq(&ctrl->lock);
 	wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
-	ctrl->flags &= ~FCCTRL_TERMIO;
+	clear_bit(FCCTRL_TERMIO, &ctrl->flags);
 	spin_unlock_irq(&ctrl->lock);
 
 	nvme_fc_term_aen_ops(ctrl);
@@ -2831,7 +3165,17 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	if (ctrl->association_id)
 		nvme_fc_xmt_disconnect_assoc(ctrl);
 
+	spin_lock_irqsave(&ctrl->lock, flags);
 	ctrl->association_id = 0;
+	disls = ctrl->rcv_disconn;
+	ctrl->rcv_disconn = NULL;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+	if (disls)
+		/*
+		 * if a Disconnect Request was waiting for a response, send
+		 * now that all ABTS's have been issued (and are complete).
+		 */
+		nvme_fc_xmt_ls_rsp(disls);
 
 	if (ctrl->ctrl.tagset) {
 		nvme_fc_delete_hw_io_queues(ctrl);
@@ -2902,7 +3246,9 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 			dev_warn(ctrl->ctrl.device,
 				"NVME-FC{%d}: dev_loss_tmo (%d) expired "
 				"while waiting for remoteport connectivity.\n",
-				ctrl->cnum, portptr->dev_loss_tmo);
+				ctrl->cnum, min_t(int, portptr->dev_loss_tmo,
+					(ctrl->ctrl.opts->max_reconnects *
+					 ctrl->ctrl.opts->reconnect_delay)));
 		WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
 	}
 }
@@ -3089,7 +3435,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->dev = lport->dev;
 	ctrl->cnum = idx;
 	ctrl->ioq_live = false;
-	ctrl->assoc_active = false;
 	atomic_set(&ctrl->err_work_active, 0);
 	init_waitqueue_head(&ctrl->ioabort_wait);
 
diff --git a/drivers/nvme/host/fc.h b/drivers/nvme/host/fc.h
new file mode 100644
index 000000000000..05ce566f2caf
--- /dev/null
+++ b/drivers/nvme/host/fc.h
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2016, Avago Technologies
+ */
+
+#ifndef _NVME_FC_TRANSPORT_H
+#define _NVME_FC_TRANSPORT_H 1
+
+
+/*
+ * Common definitions between the nvme_fc (host) transport and
+ * nvmet_fc (target) transport implementation.
+ */
+
+/*
+ * ******************  FC-NVME LS HANDLING ******************
+ */
+
+union nvmefc_ls_requests {
+	struct fcnvme_ls_rqst_w0		w0;
+	struct fcnvme_ls_cr_assoc_rqst		rq_cr_assoc;
+	struct fcnvme_ls_cr_conn_rqst		rq_cr_conn;
+	struct fcnvme_ls_disconnect_assoc_rqst	rq_dis_assoc;
+	struct fcnvme_ls_disconnect_conn_rqst	rq_dis_conn;
+} __aligned(128);	/* alignment for other things alloc'd with */
+
+union nvmefc_ls_responses {
+	struct fcnvme_ls_rjt			rsp_rjt;
+	struct fcnvme_ls_cr_assoc_acc		rsp_cr_assoc;
+	struct fcnvme_ls_cr_conn_acc		rsp_cr_conn;
+	struct fcnvme_ls_disconnect_assoc_acc	rsp_dis_assoc;
+	struct fcnvme_ls_disconnect_conn_acc	rsp_dis_conn;
+} __aligned(128);	/* alignment for other things alloc'd with */
+
+static inline void
+nvme_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd)
+{
+	struct fcnvme_ls_acc_hdr *acc = buf;
+
+	acc->w0.ls_cmd = ls_cmd;
+	acc->desc_list_len = desc_len;
+	acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST);
+	acc->rqst.desc_len =
+			fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst));
+	acc->rqst.w0.ls_cmd = rqst_ls_cmd;
+}
+
+static inline int
+nvme_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd,
+			u8 reason, u8 explanation, u8 vendor)
+{
+	struct fcnvme_ls_rjt *rjt = buf;
+
+	nvme_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST,
+			fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)),
+			ls_cmd);
+	rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT);
+	rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt));
+	rjt->rjt.reason_code = reason;
+	rjt->rjt.reason_explanation = explanation;
+	rjt->rjt.vendor = vendor;
+
+	return sizeof(struct fcnvme_ls_rjt);
+}
+
+/* Validation Error indexes into the string table below */
+enum {
+	VERR_NO_ERROR		= 0,
+	VERR_CR_ASSOC_LEN	= 1,
+	VERR_CR_ASSOC_RQST_LEN	= 2,
+	VERR_CR_ASSOC_CMD	= 3,
+	VERR_CR_ASSOC_CMD_LEN	= 4,
+	VERR_ERSP_RATIO		= 5,
+	VERR_ASSOC_ALLOC_FAIL	= 6,
+	VERR_QUEUE_ALLOC_FAIL	= 7,
+	VERR_CR_CONN_LEN	= 8,
+	VERR_CR_CONN_RQST_LEN	= 9,
+	VERR_ASSOC_ID		= 10,
+	VERR_ASSOC_ID_LEN	= 11,
+	VERR_NO_ASSOC		= 12,
+	VERR_CONN_ID		= 13,
+	VERR_CONN_ID_LEN	= 14,
+	VERR_INVAL_CONN		= 15,
+	VERR_CR_CONN_CMD	= 16,
+	VERR_CR_CONN_CMD_LEN	= 17,
+	VERR_DISCONN_LEN	= 18,
+	VERR_DISCONN_RQST_LEN	= 19,
+	VERR_DISCONN_CMD	= 20,
+	VERR_DISCONN_CMD_LEN	= 21,
+	VERR_DISCONN_SCOPE	= 22,
+	VERR_RS_LEN		= 23,
+	VERR_RS_RQST_LEN	= 24,
+	VERR_RS_CMD		= 25,
+	VERR_RS_CMD_LEN		= 26,
+	VERR_RS_RCTL		= 27,
+	VERR_RS_RO		= 28,
+	VERR_LSACC		= 29,
+	VERR_LSDESC_RQST	= 30,
+	VERR_LSDESC_RQST_LEN	= 31,
+	VERR_CR_ASSOC		= 32,
+	VERR_CR_ASSOC_ACC_LEN	= 33,
+	VERR_CR_CONN		= 34,
+	VERR_CR_CONN_ACC_LEN	= 35,
+	VERR_DISCONN		= 36,
+	VERR_DISCONN_ACC_LEN	= 37,
+};
+
+static char *validation_errors[] = {
+	"OK",
+	"Bad CR_ASSOC Length",
+	"Bad CR_ASSOC Rqst Length",
+	"Not CR_ASSOC Cmd",
+	"Bad CR_ASSOC Cmd Length",
+	"Bad Ersp Ratio",
+	"Association Allocation Failed",
+	"Queue Allocation Failed",
+	"Bad CR_CONN Length",
+	"Bad CR_CONN Rqst Length",
+	"Not Association ID",
+	"Bad Association ID Length",
+	"No Association",
+	"Not Connection ID",
+	"Bad Connection ID Length",
+	"Invalid Connection ID",
+	"Not CR_CONN Cmd",
+	"Bad CR_CONN Cmd Length",
+	"Bad DISCONN Length",
+	"Bad DISCONN Rqst Length",
+	"Not DISCONN Cmd",
+	"Bad DISCONN Cmd Length",
+	"Bad Disconnect Scope",
+	"Bad RS Length",
+	"Bad RS Rqst Length",
+	"Not RS Cmd",
+	"Bad RS Cmd Length",
+	"Bad RS R_CTL",
+	"Bad RS Relative Offset",
+	"Not LS_ACC",
+	"Not LSDESC_RQST",
+	"Bad LSDESC_RQST Length",
+	"Not CR_ASSOC Rqst",
+	"Bad CR_ASSOC ACC Length",
+	"Not CR_CONN Rqst",
+	"Bad CR_CONN ACC Length",
+	"Not Disconnect Rqst",
+	"Bad Disconnect ACC Length",
+};
+
+#define NVME_FC_LAST_LS_CMD_VALUE	FCNVME_LS_DISCONNECT_CONN
+
+static char *nvmefc_ls_names[] = {
+	"Reserved (0)",
+	"RJT (1)",
+	"ACC (2)",
+	"Create Association",
+	"Create Connection",
+	"Disconnect Association",
+	"Disconnect Connection",
+};
+
+static inline void
+nvmefc_fmt_lsreq_discon_assoc(struct nvmefc_ls_req *lsreq,
+	struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst,
+	struct fcnvme_ls_disconnect_assoc_acc *discon_acc,
+	u64 association_id)
+{
+	lsreq->rqstaddr = discon_rqst;
+	lsreq->rqstlen = sizeof(*discon_rqst);
+	lsreq->rspaddr = discon_acc;
+	lsreq->rsplen = sizeof(*discon_acc);
+	lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
+
+	discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC;
+	discon_rqst->desc_list_len = cpu_to_be32(
+				sizeof(struct fcnvme_lsdesc_assoc_id) +
+				sizeof(struct fcnvme_lsdesc_disconn_cmd));
+
+	discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
+	discon_rqst->associd.desc_len =
+			fcnvme_lsdesc_len(
+				sizeof(struct fcnvme_lsdesc_assoc_id));
+
+	discon_rqst->associd.association_id = cpu_to_be64(association_id);
+
+	discon_rqst->discon_cmd.desc_tag = cpu_to_be32(
+						FCNVME_LSDESC_DISCONN_CMD);
+	discon_rqst->discon_cmd.desc_len =
+			fcnvme_lsdesc_len(
+				sizeof(struct fcnvme_lsdesc_disconn_cmd));
+}
+
+static inline int
+nvmefc_vldt_lsreq_discon_assoc(u32 rqstlen,
+	struct fcnvme_ls_disconnect_assoc_rqst *rqst)
+{
+	int ret = 0;
+
+	if (rqstlen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst))
+		ret = VERR_DISCONN_LEN;
+	else if (rqst->desc_list_len !=
+			fcnvme_lsdesc_len(
+				sizeof(struct fcnvme_ls_disconnect_assoc_rqst)))
+		ret = VERR_DISCONN_RQST_LEN;
+	else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
+		ret = VERR_ASSOC_ID;
+	else if (rqst->associd.desc_len !=
+			fcnvme_lsdesc_len(
+				sizeof(struct fcnvme_lsdesc_assoc_id)))
+		ret = VERR_ASSOC_ID_LEN;
+	else if (rqst->discon_cmd.desc_tag !=
+			cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD))
+		ret = VERR_DISCONN_CMD;
+	else if (rqst->discon_cmd.desc_len !=
+			fcnvme_lsdesc_len(
+				sizeof(struct fcnvme_lsdesc_disconn_cmd)))
+		ret = VERR_DISCONN_CMD_LEN;
+	/*
+	 * As the standard changed on the LS, check if old format and scope
+	 * something other than Association (e.g. 0).
+	 */
+	else if (rqst->discon_cmd.rsvd8[0])
+		ret = VERR_DISCONN_SCOPE;
+
+	return ret;
+}
+
+#endif /* _NVME_FC_TRANSPORT_H */
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index ec46693f6b64..69608755d415 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -171,7 +171,7 @@ struct nvme_nvm_bb_tbl {
 	__le32	tdresv;
 	__le32	thresv;
 	__le32	rsvd2[8];
-	__u8	blk[0];
+	__u8	blk[];
 };
 
 struct nvme_nvm_id20_addrf {
@@ -961,7 +961,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
 	geo = &dev->geo;
 	geo->csecs = 1 << ns->lba_shift;
 	geo->sos = ns->ms;
-	geo->ext = ns->ext;
+	if (ns->features & NVME_NS_EXT_LBAS)
+		geo->ext = true;
+	else
+		geo->ext = false;
 	geo->mdts = ns->ctrl->max_hw_sectors;
 
 	dev->q = q;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 54603bd3e02d..da78e499947a 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2017-2018 Christoph Hellwig.
  */
 
+#include <linux/backing-dev.h>
 #include <linux/moduleparam.h>
 #include <trace/events/block.h>
 #include "nvme.h"
@@ -293,7 +294,7 @@ static bool nvme_available_path(struct nvme_ns_head *head)
 static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
 		struct bio *bio)
 {
-	struct nvme_ns_head *head = q->queuedata;
+	struct nvme_ns_head *head = bio->bi_disk->private_data;
 	struct device *dev = disk_to_dev(head->disk);
 	struct nvme_ns *ns;
 	blk_qc_t ret = BLK_QC_T_NONE;
@@ -371,13 +372,12 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	 * We also do this for private namespaces as the namespace sharing data could
 	 * change after a rescan.
 	 */
-	if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
+	if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
 		return 0;
 
 	q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node);
 	if (!q)
 		goto out;
-	q->queuedata = head;
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 	/* set to a default value for 512 until disk is validated */
 	blk_queue_logical_block_size(q, 512);
@@ -666,6 +666,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
 		nvme_mpath_set_live(ns);
 		mutex_unlock(&ns->head->lock);
 	}
+
+	if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
+		struct backing_dev_info *info =
+					ns->head->disk->queue->backing_dev_info;
+
+		info->capabilities |= BDI_CAP_STABLE_WRITES;
+	}
 }
 
 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -687,7 +694,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	int error;
 
 	/* check if multipath is enabled and we have the capability */
-	if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3)))
+	if (!multipath || !ctrl->subsys ||
+	    !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
 		return 0;
 
 	ctrl->anacap = id->anacap;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 2e04a36296d9..fa5c75501049 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -16,6 +16,7 @@
 #include <linux/fault-inject.h>
 #include <linux/rcupdate.h>
 #include <linux/wait.h>
+#include <linux/t10-pi.h>
 
 #include <trace/events/block.h>
 
@@ -30,8 +31,10 @@ extern unsigned int admin_timeout;
 
 #ifdef CONFIG_ARCH_NO_SG_CHAIN
 #define  NVME_INLINE_SG_CNT  0
+#define  NVME_INLINE_METADATA_SG_CNT  0
 #else
 #define  NVME_INLINE_SG_CNT  2
+#define  NVME_INLINE_METADATA_SG_CNT  1
 #endif
 
 extern struct workqueue_struct *nvme_wq;
@@ -228,6 +231,7 @@ struct nvme_ctrl {
 	u32 page_size;
 	u32 max_hw_sectors;
 	u32 max_segments;
+	u32 max_integrity_segments;
 	u16 crdt[3];
 	u16 oncs;
 	u16 oacs;
@@ -352,6 +356,7 @@ struct nvme_ns_head {
 	struct nvme_ns_ids	ids;
 	struct list_head	entry;
 	struct kref		ref;
+	bool			shared;
 	int			instance;
 #ifdef CONFIG_NVME_MULTIPATH
 	struct gendisk		*disk;
@@ -363,6 +368,11 @@ struct nvme_ns_head {
 #endif
 };
 
+enum nvme_ns_features {
+	NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
+	NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
+};
+
 struct nvme_ns {
 	struct list_head list;
 
@@ -382,18 +392,23 @@ struct nvme_ns {
 	u16 ms;
 	u16 sgs;
 	u32 sws;
-	bool ext;
 	u8 pi_type;
+	unsigned long features;
 	unsigned long flags;
 #define NVME_NS_REMOVING	0
 #define NVME_NS_DEAD     	1
 #define NVME_NS_ANA_PENDING	2
-	u16 noiob;
 
 	struct nvme_fault_inject fault_inject;
 
 };
 
+/* NVMe ns supports metadata actions by the controller (generate/strip) */
+static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
+{
+	return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
+}
+
 struct nvme_ctrl_ops {
 	const char *name;
 	struct module *module;
@@ -449,6 +464,14 @@ static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba)
 	return lba << (ns->lba_shift - SECTOR_SHIFT);
 }
 
+/*
+ * Convert byte length to nvme's 0-based num dwords
+ */
+static inline u32 nvme_bytes_to_numd(size_t len)
+{
+	return (len >> 2) - 1;
+}
+
 static inline void nvme_end_request(struct request *req, __le16 status,
 		union nvme_result result)
 {
@@ -489,7 +512,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
 void nvme_start_ctrl(struct nvme_ctrl *ctrl);
 void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
-void nvme_put_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_identify(struct nvme_ctrl *ctrl);
 
 void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cc46e250fcac..d690d5593a80 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -68,14 +68,30 @@ static int io_queue_depth = 1024;
 module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
 MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
 
+static int io_queue_count_set(const char *val, const struct kernel_param *kp)
+{
+	unsigned int n;
+	int ret;
+
+	ret = kstrtouint(val, 10, &n);
+	if (ret != 0 || n > num_possible_cpus())
+		return -EINVAL;
+	return param_set_uint(val, kp);
+}
+
+static const struct kernel_param_ops io_queue_count_ops = {
+	.set = io_queue_count_set,
+	.get = param_get_uint,
+};
+
 static unsigned int write_queues;
-module_param(write_queues, uint, 0644);
+module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644);
 MODULE_PARM_DESC(write_queues,
 	"Number of queues to use for writes. If not set, reads and writes "
 	"will share a queue set.");
 
 static unsigned int poll_queues;
-module_param(poll_queues, uint, 0644);
+module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644);
 MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
 
 struct nvme_dev;
@@ -128,6 +144,9 @@ struct nvme_dev {
 	dma_addr_t host_mem_descs_dma;
 	struct nvme_host_mem_buf_desc *host_mem_descs;
 	void **host_mem_desc_bufs;
+	unsigned int nr_allocated_queues;
+	unsigned int nr_write_queues;
+	unsigned int nr_poll_queues;
 };
 
 static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
@@ -166,14 +185,13 @@ struct nvme_queue {
 	void *sq_cmds;
 	 /* only used for poll queues: */
 	spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
-	volatile struct nvme_completion *cqes;
+	struct nvme_completion *cqes;
 	dma_addr_t sq_dma_addr;
 	dma_addr_t cq_dma_addr;
 	u32 __iomem *q_db;
 	u16 q_depth;
 	u16 cq_vector;
 	u16 sq_tail;
-	u16 last_sq_tail;
 	u16 cq_head;
 	u16 qid;
 	u8 cq_phase;
@@ -209,25 +227,14 @@ struct nvme_iod {
 	struct scatterlist *sg;
 };
 
-static unsigned int max_io_queues(void)
+static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev)
 {
-	return num_possible_cpus() + write_queues + poll_queues;
-}
-
-static unsigned int max_queue_count(void)
-{
-	/* IO queues + admin queue */
-	return 1 + max_io_queues();
-}
-
-static inline unsigned int nvme_dbbuf_size(u32 stride)
-{
-	return (max_queue_count() * 8 * stride);
+	return dev->nr_allocated_queues * 8 * dev->db_stride;
 }
 
 static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
 {
-	unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
+	unsigned int mem_size = nvme_dbbuf_size(dev);
 
 	if (dev->dbbuf_dbs)
 		return 0;
@@ -252,7 +259,7 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
 
 static void nvme_dbbuf_dma_free(struct nvme_dev *dev)
 {
-	unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
+	unsigned int mem_size = nvme_dbbuf_size(dev);
 
 	if (dev->dbbuf_dbs) {
 		dma_free_coherent(dev->dev, mem_size,
@@ -446,24 +453,11 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
 	return 0;
 }
 
-/*
- * Write sq tail if we are asked to, or if the next command would wrap.
- */
-static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
+static inline void nvme_write_sq_db(struct nvme_queue *nvmeq)
 {
-	if (!write_sq) {
-		u16 next_tail = nvmeq->sq_tail + 1;
-
-		if (next_tail == nvmeq->q_depth)
-			next_tail = 0;
-		if (next_tail != nvmeq->last_sq_tail)
-			return;
-	}
-
 	if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
 			nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
 		writel(nvmeq->sq_tail, nvmeq->q_db);
-	nvmeq->last_sq_tail = nvmeq->sq_tail;
 }
 
 /**
@@ -480,7 +474,8 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
 	       cmd, sizeof(*cmd));
 	if (++nvmeq->sq_tail == nvmeq->q_depth)
 		nvmeq->sq_tail = 0;
-	nvme_write_sq_db(nvmeq, write_sq);
+	if (write_sq)
+		nvme_write_sq_db(nvmeq);
 	spin_unlock(&nvmeq->sq_lock);
 }
 
@@ -489,8 +484,7 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
 	struct nvme_queue *nvmeq = hctx->driver_data;
 
 	spin_lock(&nvmeq->sq_lock);
-	if (nvmeq->sq_tail != nvmeq->last_sq_tail)
-		nvme_write_sq_db(nvmeq, true);
+	nvme_write_sq_db(nvmeq);
 	spin_unlock(&nvmeq->sq_lock);
 }
 
@@ -922,8 +916,9 @@ static void nvme_pci_complete_rq(struct request *req)
 /* We read the CQE phase first to check if the rest of the entry is valid */
 static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
 {
-	return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
-			nvmeq->cq_phase;
+	struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head];
+
+	return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase;
 }
 
 static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
@@ -944,7 +939,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
 
 static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 {
-	volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
+	struct nvme_completion *cqe = &nvmeq->cqes[idx];
 	struct request *req;
 
 	if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
@@ -1501,7 +1496,6 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
 	struct nvme_dev *dev = nvmeq->dev;
 
 	nvmeq->sq_tail = 0;
-	nvmeq->last_sq_tail = 0;
 	nvmeq->cq_head = 0;
 	nvmeq->cq_phase = 1;
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
@@ -2003,7 +1997,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
 static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
 {
 	struct nvme_dev *dev = affd->priv;
-	unsigned int nr_read_queues;
+	unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues;
 
 	/*
 	 * If there is no interupt available for queues, ensure that
@@ -2019,12 +2013,12 @@ static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
 	if (!nrirqs) {
 		nrirqs = 1;
 		nr_read_queues = 0;
-	} else if (nrirqs == 1 || !write_queues) {
+	} else if (nrirqs == 1 || !nr_write_queues) {
 		nr_read_queues = 0;
-	} else if (write_queues >= nrirqs) {
+	} else if (nr_write_queues >= nrirqs) {
 		nr_read_queues = 1;
 	} else {
-		nr_read_queues = nrirqs - write_queues;
+		nr_read_queues = nrirqs - nr_write_queues;
 	}
 
 	dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
@@ -2048,7 +2042,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
 	 * Poll queues don't need interrupts, but we need at least one IO
 	 * queue left over for non-polled IO.
 	 */
-	this_p_queues = poll_queues;
+	this_p_queues = dev->nr_poll_queues;
 	if (this_p_queues >= nr_io_queues) {
 		this_p_queues = nr_io_queues - 1;
 		irq_queues = 1;
@@ -2078,14 +2072,25 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
 		__nvme_disable_io_queues(dev, nvme_admin_delete_cq);
 }
 
+static unsigned int nvme_max_io_queues(struct nvme_dev *dev)
+{
+	return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues;
+}
+
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
 	struct nvme_queue *adminq = &dev->queues[0];
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
-	int result, nr_io_queues;
+	unsigned int nr_io_queues;
 	unsigned long size;
+	int result;
 
-	nr_io_queues = max_io_queues();
+	/*
+	 * Sample the module parameters once at reset time so that we have
+	 * stable values to work with.
+	 */
+	dev->nr_write_queues = write_queues;
+	dev->nr_poll_queues = poll_queues;
 
 	/*
 	 * If tags are shared with admin queue (Apple bug), then
@@ -2093,6 +2098,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	 */
 	if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
 		nr_io_queues = 1;
+	else
+		nr_io_queues = min(nvme_max_io_queues(dev),
+				   dev->nr_allocated_queues - 1);
 
 	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
 	if (result < 0)
@@ -2565,6 +2573,12 @@ static void nvme_reset_work(struct work_struct *work)
 		goto out;
 	}
 
+	/*
+	 * We do not support an SGL for metadata (yet), so we are limited to a
+	 * single integrity segment for the separate metadata pointer.
+	 */
+	dev->ctrl.max_integrity_segments = 1;
+
 	result = nvme_init_identify(&dev->ctrl);
 	if (result)
 		goto out;
@@ -2767,8 +2781,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!dev)
 		return -ENOMEM;
 
-	dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue),
-					GFP_KERNEL, node);
+	dev->nr_write_queues = write_queues;
+	dev->nr_poll_queues = poll_queues;
+	dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1;
+	dev->queues = kcalloc_node(dev->nr_allocated_queues,
+			sizeof(struct nvme_queue), GFP_KERNEL, node);
 	if (!dev->queues)
 		goto free;
 
@@ -3131,8 +3148,6 @@ static int __init nvme_init(void)
 	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
 	BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
 
-	write_queues = min(write_queues, num_possible_cpus());
-	poll_queues = min(poll_queues, num_possible_cpus());
 	return pci_register_driver(&nvme_driver);
 }
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index cac8a930396a..f8f856dc0c67 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -34,6 +34,11 @@
 
 #define NVME_RDMA_MAX_INLINE_SEGMENTS	4
 
+#define NVME_RDMA_DATA_SGL_SIZE \
+	(sizeof(struct scatterlist) * NVME_INLINE_SG_CNT)
+#define NVME_RDMA_METADATA_SGL_SIZE \
+	(sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT)
+
 struct nvme_rdma_device {
 	struct ib_device	*dev;
 	struct ib_pd		*pd;
@@ -48,6 +53,11 @@ struct nvme_rdma_qe {
 	u64			dma;
 };
 
+struct nvme_rdma_sgl {
+	int			nents;
+	struct sg_table		sg_table;
+};
+
 struct nvme_rdma_queue;
 struct nvme_rdma_request {
 	struct nvme_request	req;
@@ -58,12 +68,12 @@ struct nvme_rdma_request {
 	refcount_t		ref;
 	struct ib_sge		sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
 	u32			num_sge;
-	int			nents;
 	struct ib_reg_wr	reg_wr;
 	struct ib_cqe		reg_cqe;
 	struct nvme_rdma_queue  *queue;
-	struct sg_table		sg_table;
-	struct scatterlist	first_sgl[];
+	struct nvme_rdma_sgl	data_sgl;
+	struct nvme_rdma_sgl	*metadata_sgl;
+	bool			use_sig_mr;
 };
 
 enum nvme_rdma_queue_flags {
@@ -85,6 +95,7 @@ struct nvme_rdma_queue {
 	struct rdma_cm_id	*cm_id;
 	int			cm_error;
 	struct completion	cm_done;
+	bool			pi_support;
 };
 
 struct nvme_rdma_ctrl {
@@ -261,6 +272,8 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
 	init_attr.qp_type = IB_QPT_RC;
 	init_attr.send_cq = queue->ib_cq;
 	init_attr.recv_cq = queue->ib_cq;
+	if (queue->pi_support)
+		init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
 
 	ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
 
@@ -290,6 +303,12 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
 	if (!req->sqe.data)
 		return -ENOMEM;
 
+	/* metadata nvme_rdma_sgl struct is located after command's data SGL */
+	if (queue->pi_support)
+		req->metadata_sgl = (void *)nvme_req(rq) +
+			sizeof(struct nvme_rdma_request) +
+			NVME_RDMA_DATA_SGL_SIZE;
+
 	req->queue = queue;
 
 	return 0;
@@ -400,6 +419,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 	dev = queue->device;
 	ibdev = dev->dev;
 
+	if (queue->pi_support)
+		ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs);
 	ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
 
 	/*
@@ -416,10 +437,16 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 	nvme_rdma_dev_put(dev);
 }
 
-static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
+static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
 {
-	return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
-		     ibdev->attrs.max_fast_reg_page_list_len - 1);
+	u32 max_page_list_len;
+
+	if (pi_support)
+		max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len;
+	else
+		max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len;
+
+	return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
 }
 
 static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
@@ -476,7 +503,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
 	 * misaligned we'll end up using two entries for a single data page,
 	 * so one additional entry is required.
 	 */
-	pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1;
+	pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1;
 	ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
 			      queue->queue_size,
 			      IB_MR_TYPE_MEM_REG,
@@ -488,10 +515,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
 		goto out_destroy_ring;
 	}
 
+	if (queue->pi_support) {
+		ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs,
+				      queue->queue_size, IB_MR_TYPE_INTEGRITY,
+				      pages_per_mr, pages_per_mr);
+		if (ret) {
+			dev_err(queue->ctrl->ctrl.device,
+				"failed to initialize PI MR pool sized %d for QID %d\n",
+				queue->queue_size, idx);
+			goto out_destroy_mr_pool;
+		}
+	}
+
 	set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
 
 	return 0;
 
+out_destroy_mr_pool:
+	ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
 out_destroy_ring:
 	nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
 			    sizeof(struct nvme_completion), DMA_FROM_DEVICE);
@@ -513,6 +554,10 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
 
 	queue = &ctrl->queues[idx];
 	queue->ctrl = ctrl;
+	if (idx && ctrl->ctrl.max_integrity_segments)
+		queue->pi_support = true;
+	else
+		queue->pi_support = false;
 	init_completion(&queue->cm_done);
 
 	if (idx > 0)
@@ -723,7 +768,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
 		set->reserved_tags = 2; /* connect + keep-alive */
 		set->numa_node = nctrl->numa_node;
 		set->cmd_size = sizeof(struct nvme_rdma_request) +
-			NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
+				NVME_RDMA_DATA_SGL_SIZE;
 		set->driver_data = ctrl;
 		set->nr_hw_queues = 1;
 		set->timeout = ADMIN_TIMEOUT;
@@ -737,7 +782,10 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
 		set->numa_node = nctrl->numa_node;
 		set->flags = BLK_MQ_F_SHOULD_MERGE;
 		set->cmd_size = sizeof(struct nvme_rdma_request) +
-			NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
+				NVME_RDMA_DATA_SGL_SIZE;
+		if (nctrl->max_integrity_segments)
+			set->cmd_size += sizeof(struct nvme_rdma_sgl) +
+					 NVME_RDMA_METADATA_SGL_SIZE;
 		set->driver_data = ctrl;
 		set->nr_hw_queues = nctrl->queue_count - 1;
 		set->timeout = NVME_IO_TIMEOUT;
@@ -770,6 +818,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
 static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		bool new)
 {
+	bool pi_capable = false;
 	int error;
 
 	error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
@@ -779,7 +828,13 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 	ctrl->device = ctrl->queues[0].device;
 	ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
 
-	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
+	/* T10-PI support */
+	if (ctrl->device->dev->attrs.device_cap_flags &
+	    IB_DEVICE_INTEGRITY_HANDOVER)
+		pi_capable = true;
+
+	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
+							pi_capable);
 
 	/*
 	 * Bind the async event SQE DMA mapping to the admin queue lifetime.
@@ -821,6 +876,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 
 	ctrl->ctrl.max_segments = ctrl->max_fr_pages;
 	ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
+	if (pi_capable)
+		ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
+	else
+		ctrl->ctrl.max_integrity_segments = 0;
 
 	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
@@ -1149,17 +1208,29 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_rdma_device *dev = queue->device;
 	struct ib_device *ibdev = dev->dev;
+	struct list_head *pool = &queue->qp->rdma_mrs;
 
 	if (!blk_rq_nr_phys_segments(rq))
 		return;
 
+	if (blk_integrity_rq(rq)) {
+		ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
+				req->metadata_sgl->nents, rq_dma_dir(rq));
+		sg_free_table_chained(&req->metadata_sgl->sg_table,
+				      NVME_INLINE_METADATA_SG_CNT);
+	}
+
+	if (req->use_sig_mr)
+		pool = &queue->qp->sig_mrs;
+
 	if (req->mr) {
-		ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
+		ib_mr_pool_put(queue->qp, pool, req->mr);
 		req->mr = NULL;
 	}
 
-	ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
-	sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
+	ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
+			rq_dma_dir(rq));
+	sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
 }
 
 static int nvme_rdma_set_sg_null(struct nvme_command *c)
@@ -1178,7 +1249,7 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
 		int count)
 {
 	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
-	struct scatterlist *sgl = req->sg_table.sgl;
+	struct scatterlist *sgl = req->data_sgl.sg_table.sgl;
 	struct ib_sge *sge = &req->sge[1];
 	u32 len = 0;
 	int i;
@@ -1203,8 +1274,8 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue,
 {
 	struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
 
-	sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl));
-	put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length);
+	sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl));
+	put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length);
 	put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key);
 	sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
 	return 0;
@@ -1225,7 +1296,8 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
 	 * Align the MR to a 4K page size to match the ctrl page size and
 	 * the block virtual boundary.
 	 */
-	nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
+	nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL,
+			  SZ_4K);
 	if (unlikely(nr < count)) {
 		ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
 		req->mr = NULL;
@@ -1256,12 +1328,125 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
 	return 0;
 }
 
+static void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
+		struct nvme_command *cmd, struct ib_sig_domain *domain,
+		u16 control, u8 pi_type)
+{
+	domain->sig_type = IB_SIG_TYPE_T10_DIF;
+	domain->sig.dif.bg_type = IB_T10DIF_CRC;
+	domain->sig.dif.pi_interval = 1 << bi->interval_exp;
+	domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
+	if (control & NVME_RW_PRINFO_PRCHK_REF)
+		domain->sig.dif.ref_remap = true;
+
+	domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
+	domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
+	domain->sig.dif.app_escape = true;
+	if (pi_type == NVME_NS_DPS_PI_TYPE3)
+		domain->sig.dif.ref_escape = true;
+}
+
+static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi,
+		struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs,
+		u8 pi_type)
+{
+	u16 control = le16_to_cpu(cmd->rw.control);
+
+	memset(sig_attrs, 0, sizeof(*sig_attrs));
+	if (control & NVME_RW_PRINFO_PRACT) {
+		/* for WRITE_INSERT/READ_STRIP no memory domain */
+		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
+		nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
+					 pi_type);
+		/* Clear the PRACT bit since HCA will generate/verify the PI */
+		control &= ~NVME_RW_PRINFO_PRACT;
+		cmd->rw.control = cpu_to_le16(control);
+	} else {
+		/* for WRITE_PASS/READ_PASS both wire/memory domains exist */
+		nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
+					 pi_type);
+		nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+					 pi_type);
+	}
+}
+
+static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask)
+{
+	*mask = 0;
+	if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF)
+		*mask |= IB_SIG_CHECK_REFTAG;
+	if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD)
+		*mask |= IB_SIG_CHECK_GUARD;
+}
+
+static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	if (unlikely(wc->status != IB_WC_SUCCESS))
+		nvme_rdma_wr_error(cq, wc, "SIG");
+}
+
+static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
+		struct nvme_rdma_request *req, struct nvme_command *c,
+		int count, int pi_count)
+{
+	struct nvme_rdma_sgl *sgl = &req->data_sgl;
+	struct ib_reg_wr *wr = &req->reg_wr;
+	struct request *rq = blk_mq_rq_from_pdu(req);
+	struct nvme_ns *ns = rq->q->queuedata;
+	struct bio *bio = rq->bio;
+	struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
+	int nr;
+
+	req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
+	if (WARN_ON_ONCE(!req->mr))
+		return -EAGAIN;
+
+	nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL,
+			     req->metadata_sgl->sg_table.sgl, pi_count, NULL,
+			     SZ_4K);
+	if (unlikely(nr))
+		goto mr_put;
+
+	nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c,
+				req->mr->sig_attrs, ns->pi_type);
+	nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
+
+	ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
+
+	req->reg_cqe.done = nvme_rdma_sig_done;
+	memset(wr, 0, sizeof(*wr));
+	wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
+	wr->wr.wr_cqe = &req->reg_cqe;
+	wr->wr.num_sge = 0;
+	wr->wr.send_flags = 0;
+	wr->mr = req->mr;
+	wr->key = req->mr->rkey;
+	wr->access = IB_ACCESS_LOCAL_WRITE |
+		     IB_ACCESS_REMOTE_READ |
+		     IB_ACCESS_REMOTE_WRITE;
+
+	sg->addr = cpu_to_le64(req->mr->iova);
+	put_unaligned_le24(req->mr->length, sg->length);
+	put_unaligned_le32(req->mr->rkey, sg->key);
+	sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
+
+	return 0;
+
+mr_put:
+	ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr);
+	req->mr = NULL;
+	if (nr < 0)
+		return nr;
+	return -EINVAL;
+}
+
 static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 		struct request *rq, struct nvme_command *c)
 {
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_rdma_device *dev = queue->device;
 	struct ib_device *ibdev = dev->dev;
+	int pi_count = 0;
 	int count, ret;
 
 	req->num_sge = 1;
@@ -1272,22 +1457,52 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 	if (!blk_rq_nr_phys_segments(rq))
 		return nvme_rdma_set_sg_null(c);
 
-	req->sg_table.sgl = req->first_sgl;
-	ret = sg_alloc_table_chained(&req->sg_table,
-			blk_rq_nr_phys_segments(rq), req->sg_table.sgl,
+	req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1);
+	ret = sg_alloc_table_chained(&req->data_sgl.sg_table,
+			blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl,
 			NVME_INLINE_SG_CNT);
 	if (ret)
 		return -ENOMEM;
 
-	req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
+	req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
+					    req->data_sgl.sg_table.sgl);
 
-	count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
-			      rq_dma_dir(rq));
+	count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
+			      req->data_sgl.nents, rq_dma_dir(rq));
 	if (unlikely(count <= 0)) {
 		ret = -EIO;
 		goto out_free_table;
 	}
 
+	if (blk_integrity_rq(rq)) {
+		req->metadata_sgl->sg_table.sgl =
+			(struct scatterlist *)(req->metadata_sgl + 1);
+		ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table,
+				blk_rq_count_integrity_sg(rq->q, rq->bio),
+				req->metadata_sgl->sg_table.sgl,
+				NVME_INLINE_METADATA_SG_CNT);
+		if (unlikely(ret)) {
+			ret = -ENOMEM;
+			goto out_unmap_sg;
+		}
+
+		req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
+				rq->bio, req->metadata_sgl->sg_table.sgl);
+		pi_count = ib_dma_map_sg(ibdev,
+					 req->metadata_sgl->sg_table.sgl,
+					 req->metadata_sgl->nents,
+					 rq_dma_dir(rq));
+		if (unlikely(pi_count <= 0)) {
+			ret = -EIO;
+			goto out_free_pi_table;
+		}
+	}
+
+	if (req->use_sig_mr) {
+		ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count);
+		goto out;
+	}
+
 	if (count <= dev->num_inline_segments) {
 		if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
 		    queue->ctrl->use_inline_data &&
@@ -1306,14 +1521,23 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 	ret = nvme_rdma_map_sg_fr(queue, req, c, count);
 out:
 	if (unlikely(ret))
-		goto out_unmap_sg;
+		goto out_unmap_pi_sg;
 
 	return 0;
 
+out_unmap_pi_sg:
+	if (blk_integrity_rq(rq))
+		ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
+				req->metadata_sgl->nents, rq_dma_dir(rq));
+out_free_pi_table:
+	if (blk_integrity_rq(rq))
+		sg_free_table_chained(&req->metadata_sgl->sg_table,
+				      NVME_INLINE_METADATA_SG_CNT);
 out_unmap_sg:
-	ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
+	ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
+			rq_dma_dir(rq));
 out_free_table:
-	sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
+	sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
 	return ret;
 }
 
@@ -1761,6 +1985,15 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(rq);
 
+	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+	    queue->pi_support &&
+	    (c->common.opcode == nvme_cmd_write ||
+	     c->common.opcode == nvme_cmd_read) &&
+	    nvme_ns_has_pi(ns))
+		req->use_sig_mr = true;
+	else
+		req->use_sig_mr = false;
+
 	err = nvme_rdma_map_data(queue, rq, c);
 	if (unlikely(err < 0)) {
 		dev_err(queue->ctrl->ctrl.device,
@@ -1801,12 +2034,46 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
 	return ib_process_cq_direct(queue->ib_cq, -1);
 }
 
+static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req)
+{
+	struct request *rq = blk_mq_rq_from_pdu(req);
+	struct ib_mr_status mr_status;
+	int ret;
+
+	ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
+	if (ret) {
+		pr_err("ib_check_mr_status failed, ret %d\n", ret);
+		nvme_req(rq)->status = NVME_SC_INVALID_PI;
+		return;
+	}
+
+	if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
+		switch (mr_status.sig_err.err_type) {
+		case IB_SIG_BAD_GUARD:
+			nvme_req(rq)->status = NVME_SC_GUARD_CHECK;
+			break;
+		case IB_SIG_BAD_REFTAG:
+			nvme_req(rq)->status = NVME_SC_REFTAG_CHECK;
+			break;
+		case IB_SIG_BAD_APPTAG:
+			nvme_req(rq)->status = NVME_SC_APPTAG_CHECK;
+			break;
+		}
+		pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
+		       mr_status.sig_err.err_type, mr_status.sig_err.expected,
+		       mr_status.sig_err.actual);
+	}
+}
+
 static void nvme_rdma_complete_rq(struct request *rq)
 {
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_rdma_queue *queue = req->queue;
 	struct ib_device *ibdev = queue->device->dev;
 
+	if (req->use_sig_mr)
+		nvme_rdma_check_pi_status(req);
+
 	nvme_rdma_unmap_data(queue, rq);
 	ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
 			    DMA_TO_DEVICE);
@@ -1926,7 +2193,7 @@ out_fail:
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
 	.name			= "rdma",
 	.module			= THIS_MODULE,
-	.flags			= NVME_F_FABRICS,
+	.flags			= NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 4c972d8abf31..1843110ec34f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -60,6 +60,7 @@ struct nvme_tcp_request {
 enum nvme_tcp_queue_flags {
 	NVME_TCP_Q_ALLOCATED	= 0,
 	NVME_TCP_Q_LIVE		= 1,
+	NVME_TCP_Q_POLLING	= 2,
 };
 
 enum nvme_tcp_recv_state {
@@ -75,6 +76,7 @@ struct nvme_tcp_queue {
 	int			io_cpu;
 
 	spinlock_t		lock;
+	struct mutex		send_mutex;
 	struct list_head	send_list;
 
 	/* recv state */
@@ -131,6 +133,7 @@ static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
 static struct workqueue_struct *nvme_tcp_wq;
 static struct blk_mq_ops nvme_tcp_mq_ops;
 static struct blk_mq_ops nvme_tcp_admin_mq_ops;
+static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
 
 static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
 {
@@ -257,15 +260,29 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
 	}
 }
 
-static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req)
+static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
+		bool sync)
 {
 	struct nvme_tcp_queue *queue = req->queue;
+	bool empty;
 
 	spin_lock(&queue->lock);
+	empty = list_empty(&queue->send_list) && !queue->request;
 	list_add_tail(&req->entry, &queue->send_list);
 	spin_unlock(&queue->lock);
 
-	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+	/*
+	 * if we're the first on the send_list and we can try to send
+	 * directly, otherwise queue io_work. Also, only do that if we
+	 * are on the same cpu, so we don't introduce contention.
+	 */
+	if (queue->io_cpu == smp_processor_id() &&
+	    sync && empty && mutex_trylock(&queue->send_mutex)) {
+		nvme_tcp_try_send(queue);
+		mutex_unlock(&queue->send_mutex);
+	} else {
+		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+	}
 }
 
 static inline struct nvme_tcp_request *
@@ -578,7 +595,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
 	req->state = NVME_TCP_SEND_H2C_PDU;
 	req->offset = 0;
 
-	nvme_tcp_queue_request(req);
+	nvme_tcp_queue_request(req, false);
 
 	return 0;
 }
@@ -794,11 +811,12 @@ static void nvme_tcp_data_ready(struct sock *sk)
 {
 	struct nvme_tcp_queue *queue;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	queue = sk->sk_user_data;
-	if (likely(queue && queue->rd_enabled))
+	if (likely(queue && queue->rd_enabled) &&
+	    !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
 		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 static void nvme_tcp_write_space(struct sock *sk)
@@ -867,7 +885,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		if (last && !queue->data_digest)
 			flags |= MSG_EOR;
 		else
-			flags |= MSG_MORE;
+			flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
 
 		/* can't zcopy slab pages */
 		if (unlikely(PageSlab(page))) {
@@ -906,11 +924,16 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 	struct nvme_tcp_queue *queue = req->queue;
 	struct nvme_tcp_cmd_pdu *pdu = req->pdu;
 	bool inline_data = nvme_tcp_has_inline_data(req);
-	int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR);
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	int len = sizeof(*pdu) + hdgst - req->offset;
+	int flags = MSG_DONTWAIT;
 	int ret;
 
+	if (inline_data)
+		flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+	else
+		flags |= MSG_EOR;
+
 	if (queue->hdr_digest && !req->offset)
 		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
 
@@ -949,7 +972,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
 
 	ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
 			offset_in_page(pdu) + req->offset, len,
-			MSG_DONTWAIT | MSG_MORE);
+			MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
 	if (unlikely(ret <= 0))
 		return ret;
 
@@ -1063,11 +1086,14 @@ static void nvme_tcp_io_work(struct work_struct *w)
 		bool pending = false;
 		int result;
 
-		result = nvme_tcp_try_send(queue);
-		if (result > 0)
-			pending = true;
-		else if (unlikely(result < 0))
-			break;
+		if (mutex_trylock(&queue->send_mutex)) {
+			result = nvme_tcp_try_send(queue);
+			mutex_unlock(&queue->send_mutex);
+			if (result > 0)
+				pending = true;
+			else if (unlikely(result < 0))
+				break;
+		}
 
 		result = nvme_tcp_try_recv(queue);
 		if (result > 0)
@@ -1318,6 +1344,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 	queue->ctrl = ctrl;
 	INIT_LIST_HEAD(&queue->send_list);
 	spin_lock_init(&queue->lock);
+	mutex_init(&queue->send_mutex);
 	INIT_WORK(&queue->io_work, nvme_tcp_io_work);
 	queue->queue_size = queue_size;
 
@@ -1506,6 +1533,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
 		set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
 		set->reserved_tags = 2; /* connect + keep-alive */
 		set->numa_node = NUMA_NO_NODE;
+		set->flags = BLK_MQ_F_BLOCKING;
 		set->cmd_size = sizeof(struct nvme_tcp_request);
 		set->driver_data = ctrl;
 		set->nr_hw_queues = 1;
@@ -1517,7 +1545,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
 		set->queue_depth = nctrl->sqsize + 1;
 		set->reserved_tags = 1; /* fabric connect */
 		set->numa_node = NUMA_NO_NODE;
-		set->flags = BLK_MQ_F_SHOULD_MERGE;
+		set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
 		set->cmd_size = sizeof(struct nvme_tcp_request);
 		set->driver_data = ctrl;
 		set->nr_hw_queues = nctrl->queue_count - 1;
@@ -2076,7 +2104,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
 	ctrl->async_req.curr_bio = NULL;
 	ctrl->async_req.data_len = 0;
 
-	nvme_tcp_queue_request(&ctrl->async_req);
+	nvme_tcp_queue_request(&ctrl->async_req, true);
 }
 
 static enum blk_eh_timer_return
@@ -2207,7 +2235,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(rq);
 
-	nvme_tcp_queue_request(req);
+	nvme_tcp_queue_request(req, true);
 
 	return BLK_STS_OK;
 }
@@ -2265,9 +2293,11 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
 	if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
 		return 0;
 
+	set_bit(NVME_TCP_Q_POLLING, &queue->flags);
 	if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
 		sk_busy_loop(sk, true);
 	nvme_tcp_try_recv(queue);
+	clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
 	return queue->nr_cqe;
 }
 
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index d7f48c0fb311..4474952d64c6 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -4,6 +4,7 @@ config NVME_TARGET
 	tristate "NVMe Target support"
 	depends on BLOCK
 	depends on CONFIGFS_FS
+	select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
 	select SGL_ALLOC
 	help
 	  This enabled target side support for the NVMe protocol, that is
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 9d6f75cfa77c..1db8c0498668 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -295,7 +295,7 @@ out:
 
 static void nvmet_execute_get_log_page(struct nvmet_req *req)
 {
-	if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd)))
+	if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd)))
 		return;
 
 	switch (req->cmd->get_log_page.lid) {
@@ -341,6 +341,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvme_id_ctrl *id;
+	u32 cmd_capsule_size;
 	u16 status = 0;
 
 	id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -433,9 +434,15 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 
 	strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
 
-	/* Max command capsule size is sqe + single page of in-capsule data */
-	id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
-				  req->port->inline_data_size) / 16);
+	/*
+	 * Max command capsule size is sqe + in-capsule data size.
+	 * Disable in-capsule data for Metadata capable controllers.
+	 */
+	cmd_capsule_size = sizeof(struct nvme_command);
+	if (!ctrl->pi_support)
+		cmd_capsule_size += req->port->inline_data_size;
+	id->ioccsz = cpu_to_le32(cmd_capsule_size / 16);
+
 	/* Max response capsule size is cqe */
 	id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
 
@@ -465,6 +472,7 @@ out:
 
 static void nvmet_execute_identify_ns(struct nvmet_req *req)
 {
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvmet_ns *ns;
 	struct nvme_id_ns *id;
 	u16 status = 0;
@@ -482,10 +490,12 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
 	}
 
 	/* return an all zeroed buffer if we can't find an active namespace */
-	ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+	ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid);
 	if (!ns)
 		goto done;
 
+	nvmet_ns_revalidate(ns);
+
 	/*
 	 * nuse = ncap = nsze isn't always true, but we have no way to find
 	 * that out from the underlying device.
@@ -521,6 +531,16 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
 
 	id->lbaf[0].ds = ns->blksize_shift;
 
+	if (ctrl->pi_support && nvmet_ns_has_pi(ns)) {
+		id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST |
+			  NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 |
+			  NVME_NS_DPC_PI_TYPE3;
+		id->mc = NVME_MC_EXTENDED_LBA;
+		id->dps = ns->pi_type;
+		id->flbas = NVME_NS_FLBAS_META_EXT;
+		id->lbaf[0].ms = cpu_to_le16(ns->metadata_size);
+	}
+
 	if (ns->readonly)
 		id->nsattr |= (1 << 0);
 	nvmet_put_namespace(ns);
@@ -625,7 +645,7 @@ out:
 
 static void nvmet_execute_identify(struct nvmet_req *req)
 {
-	if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+	if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
 		return;
 
 	switch (req->cmd->identify.cns) {
@@ -654,7 +674,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
  */
 static void nvmet_execute_abort(struct nvmet_req *req)
 {
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 	nvmet_set_result(req, 1);
 	nvmet_req_complete(req, 0);
@@ -743,7 +763,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
 	u16 nsqr;
 	u16 ncqr;
 
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 
 	switch (cdw10 & 0xff) {
@@ -815,7 +835,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
 	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
 	u16 status = 0;
 
-	if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10)))
+	if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10)))
 		return;
 
 	switch (cdw10 & 0xff) {
@@ -882,7 +902,7 @@ void nvmet_execute_async_event(struct nvmet_req *req)
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 
 	mutex_lock(&ctrl->lock);
@@ -901,7 +921,7 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 
 	pr_debug("ctrl %d update keep-alive timer for %d secs\n",
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 58cabd7b6fc5..419e0d4ce79b 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -20,61 +20,71 @@ static const struct config_item_type nvmet_subsys_type;
 static LIST_HEAD(nvmet_ports_list);
 struct list_head *nvmet_ports = &nvmet_ports_list;
 
-static const struct nvmet_transport_name {
+struct nvmet_type_name_map {
 	u8		type;
 	const char	*name;
-} nvmet_transport_names[] = {
+};
+
+static struct nvmet_type_name_map nvmet_transport[] = {
 	{ NVMF_TRTYPE_RDMA,	"rdma" },
 	{ NVMF_TRTYPE_FC,	"fc" },
 	{ NVMF_TRTYPE_TCP,	"tcp" },
 	{ NVMF_TRTYPE_LOOP,	"loop" },
 };
 
+static const struct nvmet_type_name_map nvmet_addr_family[] = {
+	{ NVMF_ADDR_FAMILY_PCI,		"pcie" },
+	{ NVMF_ADDR_FAMILY_IP4,		"ipv4" },
+	{ NVMF_ADDR_FAMILY_IP6,		"ipv6" },
+	{ NVMF_ADDR_FAMILY_IB,		"ib" },
+	{ NVMF_ADDR_FAMILY_FC,		"fc" },
+	{ NVMF_ADDR_FAMILY_LOOP,	"loop" },
+};
+
+static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller)
+{
+	if (p->enabled)
+		pr_err("Disable port '%u' before changing attribute in %s\n",
+				le16_to_cpu(p->disc_addr.portid), caller);
+	return p->enabled;
+}
+
 /*
  * nvmet_port Generic ConfigFS definitions.
  * Used in any place in the ConfigFS tree that refers to an address.
  */
-static ssize_t nvmet_addr_adrfam_show(struct config_item *item,
-		char *page)
+static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page)
 {
-	switch (to_nvmet_port(item)->disc_addr.adrfam) {
-	case NVMF_ADDR_FAMILY_IP4:
-		return sprintf(page, "ipv4\n");
-	case NVMF_ADDR_FAMILY_IP6:
-		return sprintf(page, "ipv6\n");
-	case NVMF_ADDR_FAMILY_IB:
-		return sprintf(page, "ib\n");
-	case NVMF_ADDR_FAMILY_FC:
-		return sprintf(page, "fc\n");
-	default:
-		return sprintf(page, "\n");
+	u8 adrfam = to_nvmet_port(item)->disc_addr.adrfam;
+	int i;
+
+	for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) {
+		if (nvmet_addr_family[i].type == adrfam)
+			return sprintf(page, "%s\n", nvmet_addr_family[i].name);
 	}
+
+	return sprintf(page, "\n");
 }
 
 static ssize_t nvmet_addr_adrfam_store(struct config_item *item,
 		const char *page, size_t count)
 {
 	struct nvmet_port *port = to_nvmet_port(item);
+	int i;
 
-	if (port->enabled) {
-		pr_err("Cannot modify address while enabled\n");
-		pr_err("Disable the address before modifying\n");
+	if (nvmet_is_port_enabled(port, __func__))
 		return -EACCES;
-	}
 
-	if (sysfs_streq(page, "ipv4")) {
-		port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP4;
-	} else if (sysfs_streq(page, "ipv6")) {
-		port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP6;
-	} else if (sysfs_streq(page, "ib")) {
-		port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IB;
-	} else if (sysfs_streq(page, "fc")) {
-		port->disc_addr.adrfam = NVMF_ADDR_FAMILY_FC;
-	} else {
-		pr_err("Invalid value '%s' for adrfam\n", page);
-		return -EINVAL;
+	for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) {
+		if (sysfs_streq(page, nvmet_addr_family[i].name))
+			goto found;
 	}
 
+	pr_err("Invalid value '%s' for adrfam\n", page);
+	return -EINVAL;
+
+found:
+	port->disc_addr.adrfam = nvmet_addr_family[i].type;
 	return count;
 }
 
@@ -100,11 +110,9 @@ static ssize_t nvmet_addr_portid_store(struct config_item *item,
 		return -EINVAL;
 	}
 
-	if (port->enabled) {
-		pr_err("Cannot modify address while enabled\n");
-		pr_err("Disable the address before modifying\n");
+	if (nvmet_is_port_enabled(port, __func__))
 		return -EACCES;
-	}
+
 	port->disc_addr.portid = cpu_to_le16(portid);
 	return count;
 }
@@ -130,11 +138,8 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item,
 		return -EINVAL;
 	}
 
-	if (port->enabled) {
-		pr_err("Cannot modify address while enabled\n");
-		pr_err("Disable the address before modifying\n");
+	if (nvmet_is_port_enabled(port, __func__))
 		return -EACCES;
-	}
 
 	if (sscanf(page, "%s\n", port->disc_addr.traddr) != 1)
 		return -EINVAL;
@@ -143,20 +148,24 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_, addr_traddr);
 
-static ssize_t nvmet_addr_treq_show(struct config_item *item,
-		char *page)
+static const struct nvmet_type_name_map nvmet_addr_treq[] = {
+	{ NVMF_TREQ_NOT_SPECIFIED,	"not specified" },
+	{ NVMF_TREQ_REQUIRED,		"required" },
+	{ NVMF_TREQ_NOT_REQUIRED,	"not required" },
+};
+
+static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page)
 {
-	switch (to_nvmet_port(item)->disc_addr.treq &
-		NVME_TREQ_SECURE_CHANNEL_MASK) {
-	case NVMF_TREQ_NOT_SPECIFIED:
-		return sprintf(page, "not specified\n");
-	case NVMF_TREQ_REQUIRED:
-		return sprintf(page, "required\n");
-	case NVMF_TREQ_NOT_REQUIRED:
-		return sprintf(page, "not required\n");
-	default:
-		return sprintf(page, "\n");
+	u8 treq = to_nvmet_port(item)->disc_addr.treq &
+		NVME_TREQ_SECURE_CHANNEL_MASK;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
+		if (treq == nvmet_addr_treq[i].type)
+			return sprintf(page, "%s\n", nvmet_addr_treq[i].name);
 	}
+
+	return sprintf(page, "\n");
 }
 
 static ssize_t nvmet_addr_treq_store(struct config_item *item,
@@ -164,25 +173,22 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item,
 {
 	struct nvmet_port *port = to_nvmet_port(item);
 	u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK;
+	int i;
 
-	if (port->enabled) {
-		pr_err("Cannot modify address while enabled\n");
-		pr_err("Disable the address before modifying\n");
+	if (nvmet_is_port_enabled(port, __func__))
 		return -EACCES;
-	}
 
-	if (sysfs_streq(page, "not specified")) {
-		treq |= NVMF_TREQ_NOT_SPECIFIED;
-	} else if (sysfs_streq(page, "required")) {
-		treq |= NVMF_TREQ_REQUIRED;
-	} else if (sysfs_streq(page, "not required")) {
-		treq |= NVMF_TREQ_NOT_REQUIRED;
-	} else {
-		pr_err("Invalid value '%s' for treq\n", page);
-		return -EINVAL;
+	for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
+		if (sysfs_streq(page, nvmet_addr_treq[i].name))
+			goto found;
 	}
-	port->disc_addr.treq = treq;
 
+	pr_err("Invalid value '%s' for treq\n", page);
+	return -EINVAL;
+
+found:
+	treq |= nvmet_addr_treq[i].type;
+	port->disc_addr.treq = treq;
 	return count;
 }
 
@@ -206,11 +212,8 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
 		pr_err("Invalid value '%s' for trsvcid\n", page);
 		return -EINVAL;
 	}
-	if (port->enabled) {
-		pr_err("Cannot modify address while enabled\n");
-		pr_err("Disable the address before modifying\n");
+	if (nvmet_is_port_enabled(port, __func__))
 		return -EACCES;
-	}
 
 	if (sscanf(page, "%s\n", port->disc_addr.trsvcid) != 1)
 		return -EINVAL;
@@ -233,11 +236,8 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
 	struct nvmet_port *port = to_nvmet_port(item);
 	int ret;
 
-	if (port->enabled) {
-		pr_err("Cannot modify inline_data_size while port enabled\n");
-		pr_err("Disable the port before modifying\n");
+	if (nvmet_is_port_enabled(port, __func__))
 		return -EACCES;
-	}
 	ret = kstrtoint(page, 0, &port->inline_data_size);
 	if (ret) {
 		pr_err("Invalid value '%s' for inline_data_size\n", page);
@@ -248,16 +248,45 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_, param_inline_data_size);
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static ssize_t nvmet_param_pi_enable_show(struct config_item *item,
+		char *page)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable);
+}
+
+static ssize_t nvmet_param_pi_enable_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+	bool val;
+
+	if (strtobool(page, &val))
+		return -EINVAL;
+
+	if (port->enabled) {
+		pr_err("Disable port before setting pi_enable value.\n");
+		return -EACCES;
+	}
+
+	port->pi_enable = val;
+	return count;
+}
+
+CONFIGFS_ATTR(nvmet_, param_pi_enable);
+#endif
+
 static ssize_t nvmet_addr_trtype_show(struct config_item *item,
 		char *page)
 {
 	struct nvmet_port *port = to_nvmet_port(item);
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
-		if (port->disc_addr.trtype != nvmet_transport_names[i].type)
-			continue;
-		return sprintf(page, "%s\n", nvmet_transport_names[i].name);
+	for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) {
+		if (port->disc_addr.trtype == nvmet_transport[i].type)
+			return sprintf(page, "%s\n", nvmet_transport[i].name);
 	}
 
 	return sprintf(page, "\n");
@@ -276,22 +305,20 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item,
 	struct nvmet_port *port = to_nvmet_port(item);
 	int i;
 
-	if (port->enabled) {
-		pr_err("Cannot modify address while enabled\n");
-		pr_err("Disable the address before modifying\n");
+	if (nvmet_is_port_enabled(port, __func__))
 		return -EACCES;
-	}
 
-	for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
-		if (sysfs_streq(page, nvmet_transport_names[i].name))
+	for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) {
+		if (sysfs_streq(page, nvmet_transport[i].name))
 			goto found;
 	}
 
 	pr_err("Invalid value '%s' for trtype\n", page);
 	return -EINVAL;
+
 found:
 	memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
-	port->disc_addr.trtype = nvmet_transport_names[i].type;
+	port->disc_addr.trtype = nvmet_transport[i].type;
 	if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA)
 		nvmet_port_init_tsas_rdma(port);
 	return count;
@@ -327,7 +354,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item,
 
 	kfree(ns->device_path);
 	ret = -ENOMEM;
-	ns->device_path = kstrndup(page, len, GFP_KERNEL);
+	ns->device_path = kmemdup_nul(page, len, GFP_KERNEL);
 	if (!ns->device_path)
 		goto out_unlock;
 
@@ -543,6 +570,31 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_ns_, buffered_io);
 
+static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_ns *ns = to_nvmet_ns(item);
+	bool val;
+
+	if (strtobool(page, &val))
+		return -EINVAL;
+
+	if (!val)
+		return -EINVAL;
+
+	mutex_lock(&ns->subsys->lock);
+	if (!ns->enabled) {
+		pr_err("enable ns before revalidate.\n");
+		mutex_unlock(&ns->subsys->lock);
+		return -EINVAL;
+	}
+	nvmet_ns_revalidate(ns);
+	mutex_unlock(&ns->subsys->lock);
+	return count;
+}
+
+CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);
+
 static struct configfs_attribute *nvmet_ns_attrs[] = {
 	&nvmet_ns_attr_device_path,
 	&nvmet_ns_attr_device_nguid,
@@ -550,6 +602,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
 	&nvmet_ns_attr_ana_grpid,
 	&nvmet_ns_attr_enable,
 	&nvmet_ns_attr_buffered_io,
+	&nvmet_ns_attr_revalidate_size,
 #ifdef CONFIG_PCI_P2PDMA
 	&nvmet_ns_attr_p2pmem,
 #endif
@@ -963,7 +1016,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
 			return -EINVAL;
 	}
 
-	new_model_number = kstrndup(page, len, GFP_KERNEL);
+	new_model_number = kmemdup_nul(page, len, GFP_KERNEL);
 	if (!new_model_number)
 		return -ENOMEM;
 
@@ -987,6 +1040,28 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
 }
 CONFIGFS_ATTR(nvmet_subsys_, attr_model);
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
+						char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support);
+}
+
+static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item,
+						 const char *page, size_t count)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+	bool pi_enable;
+
+	if (strtobool(page, &pi_enable))
+		return -EINVAL;
+
+	subsys->pi_support = pi_enable;
+	return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable);
+#endif
+
 static struct configfs_attribute *nvmet_subsys_attrs[] = {
 	&nvmet_subsys_attr_attr_allow_any_host,
 	&nvmet_subsys_attr_attr_version,
@@ -994,6 +1069,9 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
 	&nvmet_subsys_attr_attr_cntlid_min,
 	&nvmet_subsys_attr_attr_cntlid_max,
 	&nvmet_subsys_attr_attr_model,
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+	&nvmet_subsys_attr_attr_pi_enable,
+#endif
 	NULL,
 };
 
@@ -1149,10 +1227,7 @@ static const struct config_item_type nvmet_referrals_type = {
 	.ct_group_ops	= &nvmet_referral_group_ops,
 };
 
-static struct {
-	enum nvme_ana_state	state;
-	const char		*name;
-} nvmet_ana_state_names[] = {
+static struct nvmet_type_name_map nvmet_ana_state[] = {
 	{ NVME_ANA_OPTIMIZED,		"optimized" },
 	{ NVME_ANA_NONOPTIMIZED,	"non-optimized" },
 	{ NVME_ANA_INACCESSIBLE,	"inaccessible" },
@@ -1167,10 +1242,9 @@ static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item,
 	enum nvme_ana_state state = grp->port->ana_state[grp->grpid];
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
-		if (state != nvmet_ana_state_names[i].state)
-			continue;
-		return sprintf(page, "%s\n", nvmet_ana_state_names[i].name);
+	for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) {
+		if (state == nvmet_ana_state[i].type)
+			return sprintf(page, "%s\n", nvmet_ana_state[i].name);
 	}
 
 	return sprintf(page, "\n");
@@ -1180,10 +1254,11 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
 		const char *page, size_t count)
 {
 	struct nvmet_ana_group *grp = to_ana_group(item);
+	enum nvme_ana_state *ana_state = grp->port->ana_state;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
-		if (sysfs_streq(page, nvmet_ana_state_names[i].name))
+	for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) {
+		if (sysfs_streq(page, nvmet_ana_state[i].name))
 			goto found;
 	}
 
@@ -1192,10 +1267,9 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
 
 found:
 	down_write(&nvmet_ana_sem);
-	grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state;
+	ana_state[grp->grpid] = (enum nvme_ana_state) nvmet_ana_state[i].type;
 	nvmet_ana_chgcnt++;
 	up_write(&nvmet_ana_sem);
-
 	nvmet_port_send_ana_event(grp->port);
 	return count;
 }
@@ -1297,6 +1371,9 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
 	&nvmet_attr_addr_trsvcid,
 	&nvmet_attr_addr_trtype,
 	&nvmet_attr_param_inline_data_size,
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+	&nvmet_attr_param_pi_enable,
+#endif
 	NULL,
 };
 
@@ -1346,6 +1423,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
 	port->inline_data_size = -1;	/* < 0 == let the transport choose */
 
 	port->disc_addr.portid = cpu_to_le16(portid);
+	port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX;
 	port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW;
 	config_group_init_type_name(&port->group, name, &nvmet_port_type);
 
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b685f99d56a1..6392bcd30bd7 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -134,15 +134,10 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
 	struct nvmet_async_event *aen;
 	struct nvmet_req *req;
 
-	while (1) {
-		mutex_lock(&ctrl->lock);
-		aen = list_first_entry_or_null(&ctrl->async_events,
-				struct nvmet_async_event, entry);
-		if (!aen || !ctrl->nr_async_event_cmds) {
-			mutex_unlock(&ctrl->lock);
-			break;
-		}
-
+	mutex_lock(&ctrl->lock);
+	while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) {
+		aen = list_first_entry(&ctrl->async_events,
+				       struct nvmet_async_event, entry);
 		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
 		if (status == 0)
 			nvmet_set_result(req, nvmet_async_event_result(aen));
@@ -151,20 +146,21 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
 		kfree(aen);
 
 		mutex_unlock(&ctrl->lock);
+		trace_nvmet_async_event(ctrl, req->cqe->result.u32);
 		nvmet_req_complete(req, status);
+		mutex_lock(&ctrl->lock);
 	}
+	mutex_unlock(&ctrl->lock);
 }
 
 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
 {
-	struct nvmet_req *req;
+	struct nvmet_async_event *aen, *tmp;
 
 	mutex_lock(&ctrl->lock);
-	while (ctrl->nr_async_event_cmds) {
-		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
-		mutex_unlock(&ctrl->lock);
-		nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
-		mutex_lock(&ctrl->lock);
+	list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) {
+		list_del(&aen->entry);
+		kfree(aen);
 	}
 	mutex_unlock(&ctrl->lock);
 }
@@ -322,12 +318,21 @@ int nvmet_enable_port(struct nvmet_port *port)
 	if (!try_module_get(ops->owner))
 		return -EINVAL;
 
-	ret = ops->add_port(port);
-	if (ret) {
-		module_put(ops->owner);
-		return ret;
+	/*
+	 * If the user requested PI support and the transport isn't pi capable,
+	 * don't enable the port.
+	 */
+	if (port->pi_enable && !ops->metadata_support) {
+		pr_err("T10-PI is not supported by transport type %d\n",
+		       port->disc_addr.trtype);
+		ret = -EINVAL;
+		goto out_put;
 	}
 
+	ret = ops->add_port(port);
+	if (ret)
+		goto out_put;
+
 	/* If the transport didn't set inline_data_size, then disable it. */
 	if (port->inline_data_size < 0)
 		port->inline_data_size = 0;
@@ -335,6 +340,10 @@ int nvmet_enable_port(struct nvmet_port *port)
 	port->enabled = true;
 	port->tr_ops = ops;
 	return 0;
+
+out_put:
+	module_put(ops->owner);
+	return ret;
 }
 
 void nvmet_disable_port(struct nvmet_port *port)
@@ -514,6 +523,19 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
 		ns->nsid);
 }
 
+void nvmet_ns_revalidate(struct nvmet_ns *ns)
+{
+	loff_t oldsize = ns->size;
+
+	if (ns->bdev)
+		nvmet_bdev_ns_revalidate(ns);
+	else
+		nvmet_file_ns_revalidate(ns);
+
+	if (oldsize != ns->size)
+		nvmet_ns_changed(ns->subsys, ns->nsid);
+}
+
 int nvmet_ns_enable(struct nvmet_ns *ns)
 {
 	struct nvmet_subsys *subsys = ns->subsys;
@@ -764,10 +786,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
 	 * If this is the admin queue, complete all AERs so that our
 	 * queue doesn't have outstanding requests on it.
 	 */
-	if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) {
+	if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
 		nvmet_async_events_process(ctrl, status);
-		nvmet_async_events_free(ctrl);
-	}
 	percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
 	wait_for_completion(&sq->confirm_done);
 	wait_for_completion(&sq->free_done);
@@ -873,8 +893,11 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 	req->sq = sq;
 	req->ops = ops;
 	req->sg = NULL;
+	req->metadata_sg = NULL;
 	req->sg_cnt = 0;
+	req->metadata_sg_cnt = 0;
 	req->transfer_len = 0;
+	req->metadata_len = 0;
 	req->cqe->status = 0;
 	req->cqe->sq_head = 0;
 	req->ns = NULL;
@@ -936,9 +959,9 @@ void nvmet_req_uninit(struct nvmet_req *req)
 }
 EXPORT_SYMBOL_GPL(nvmet_req_uninit);
 
-bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
+bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
 {
-	if (unlikely(data_len != req->transfer_len)) {
+	if (unlikely(len != req->transfer_len)) {
 		req->error_loc = offsetof(struct nvme_common_command, dptr);
 		nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
 		return false;
@@ -946,7 +969,7 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
 
 	return true;
 }
-EXPORT_SYMBOL_GPL(nvmet_check_data_len);
+EXPORT_SYMBOL_GPL(nvmet_check_transfer_len);
 
 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
 {
@@ -959,50 +982,90 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
 	return true;
 }
 
-int nvmet_req_alloc_sgl(struct nvmet_req *req)
+static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
 {
-	struct pci_dev *p2p_dev = NULL;
+	return req->transfer_len - req->metadata_len;
+}
 
-	if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
-		if (req->sq->ctrl && req->ns)
-			p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
-						    req->ns->nsid);
+static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req)
+{
+	req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt,
+			nvmet_data_transfer_len(req));
+	if (!req->sg)
+		goto out_err;
 
-		req->p2p_dev = NULL;
-		if (req->sq->qid && p2p_dev) {
-			req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
-						       req->transfer_len);
-			if (req->sg) {
-				req->p2p_dev = p2p_dev;
-				return 0;
-			}
-		}
+	if (req->metadata_len) {
+		req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev,
+				&req->metadata_sg_cnt, req->metadata_len);
+		if (!req->metadata_sg)
+			goto out_free_sg;
+	}
+	return 0;
+out_free_sg:
+	pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
+out_err:
+	return -ENOMEM;
+}
 
-		/*
-		 * If no P2P memory was available we fallback to using
-		 * regular memory
-		 */
+static bool nvmet_req_find_p2p_dev(struct nvmet_req *req)
+{
+	if (!IS_ENABLED(CONFIG_PCI_P2PDMA))
+		return false;
+
+	if (req->sq->ctrl && req->sq->qid && req->ns) {
+		req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
+						 req->ns->nsid);
+		if (req->p2p_dev)
+			return true;
 	}
 
-	req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
+	req->p2p_dev = NULL;
+	return false;
+}
+
+int nvmet_req_alloc_sgls(struct nvmet_req *req)
+{
+	if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req))
+		return 0;
+
+	req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
+			    &req->sg_cnt);
 	if (unlikely(!req->sg))
-		return -ENOMEM;
+		goto out;
+
+	if (req->metadata_len) {
+		req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL,
+					     &req->metadata_sg_cnt);
+		if (unlikely(!req->metadata_sg))
+			goto out_free;
+	}
 
 	return 0;
+out_free:
+	sgl_free(req->sg);
+out:
+	return -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
+EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls);
 
-void nvmet_req_free_sgl(struct nvmet_req *req)
+void nvmet_req_free_sgls(struct nvmet_req *req)
 {
-	if (req->p2p_dev)
+	if (req->p2p_dev) {
 		pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
-	else
+		if (req->metadata_sg)
+			pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
+	} else {
 		sgl_free(req->sg);
+		if (req->metadata_sg)
+			sgl_free(req->metadata_sg);
+	}
 
 	req->sg = NULL;
+	req->metadata_sg = NULL;
 	req->sg_cnt = 0;
+	req->metadata_sg_cnt = 0;
 }
-EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
+EXPORT_SYMBOL_GPL(nvmet_req_free_sgls);
 
 static inline bool nvmet_cc_en(u32 cc)
 {
@@ -1357,6 +1420,7 @@ static void nvmet_ctrl_free(struct kref *ref)
 
 	ida_simple_remove(&cntlid_ida, ctrl->cntlid);
 
+	nvmet_async_events_free(ctrl);
 	kfree(ctrl->sqs);
 	kfree(ctrl->cqs);
 	kfree(ctrl->changed_ns_list);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 0c2274b21e15..40cf0b6e6c9d 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -171,7 +171,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
 	u16 status = 0;
 	void *buffer;
 
-	if (!nvmet_check_data_len(req, data_len))
+	if (!nvmet_check_transfer_len(req, data_len))
 		return;
 
 	if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
@@ -244,7 +244,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
 	const char model[] = "Linux";
 	u16 status = 0;
 
-	if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+	if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
 		return;
 
 	if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
@@ -298,7 +298,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req)
 	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
 	u16 stat;
 
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 
 	switch (cdw10 & 0xff) {
@@ -324,7 +324,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req)
 	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
 	u16 stat = 0;
 
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 
 	switch (cdw10 & 0xff) {
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index feef15c38ec9..42bd12b8bf00 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -12,7 +12,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
 	u64 val = le64_to_cpu(req->cmd->prop_set.value);
 	u16 status = 0;
 
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 
 	if (req->cmd->prop_set.attrib & 1) {
@@ -41,7 +41,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
 	u16 status = 0;
 	u64 val = 0;
 
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 
 	if (req->cmd->prop_get.attrib & 1) {
@@ -156,7 +156,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 	struct nvmet_ctrl *ctrl = NULL;
 	u16 status = 0;
 
-	if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+	if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
 		return;
 
 	d = kmalloc(sizeof(*d), GFP_KERNEL);
@@ -197,6 +197,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 		goto out;
 	}
 
+	ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
+
 	uuid_copy(&ctrl->hostid, &d->hostid);
 
 	status = nvmet_install_queue(ctrl, req);
@@ -205,8 +207,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 		goto out;
 	}
 
-	pr_info("creating controller %d for subsystem %s for NQN %s.\n",
-		ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
+	pr_info("creating controller %d for subsystem %s for NQN %s%s.\n",
+		ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
+		ctrl->pi_support ? " T10-PI is enabled" : "");
 	req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
 
 out:
@@ -223,7 +226,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
 	u16 qid = le16_to_cpu(c->qid);
 	u16 status = 0;
 
-	if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+	if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
 		return;
 
 	d = kmalloc(sizeof(*d), GFP_KERNEL);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index a8ceb7721640..27fd3b5aa621 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -14,6 +14,7 @@
 #include "nvmet.h"
 #include <linux/nvme-fc-driver.h>
 #include <linux/nvme-fc.h>
+#include "../host/fc.h"
 
 
 /* *************************** Data Structures/Defines ****************** */
@@ -21,23 +22,21 @@
 
 #define NVMET_LS_CTX_COUNT		256
 
-/* for this implementation, assume small single frame rqst/rsp */
-#define NVME_FC_MAX_LS_BUFFER_SIZE		2048
-
 struct nvmet_fc_tgtport;
 struct nvmet_fc_tgt_assoc;
 
-struct nvmet_fc_ls_iod {
-	struct nvmefc_tgt_ls_req	*lsreq;
+struct nvmet_fc_ls_iod {		/* for an LS RQST RCV */
+	struct nvmefc_ls_rsp		*lsrsp;
 	struct nvmefc_tgt_fcp_req	*fcpreq;	/* only if RS */
 
-	struct list_head		ls_list;	/* tgtport->ls_list */
+	struct list_head		ls_rcv_list; /* tgtport->ls_rcv_list */
 
 	struct nvmet_fc_tgtport		*tgtport;
 	struct nvmet_fc_tgt_assoc	*assoc;
+	void				*hosthandle;
 
-	u8				*rqstbuf;
-	u8				*rspbuf;
+	union nvmefc_ls_requests	*rqstbuf;
+	union nvmefc_ls_responses	*rspbuf;
 	u16				rqstdatalen;
 	dma_addr_t			rspdma;
 
@@ -46,6 +45,18 @@ struct nvmet_fc_ls_iod {
 	struct work_struct		work;
 } __aligned(sizeof(unsigned long long));
 
+struct nvmet_fc_ls_req_op {		/* for an LS RQST XMT */
+	struct nvmefc_ls_req		ls_req;
+
+	struct nvmet_fc_tgtport		*tgtport;
+	void				*hosthandle;
+
+	int				ls_error;
+	struct list_head		lsreq_list; /* tgtport->ls_req_list */
+	bool				req_queued;
+};
+
+
 /* desired maximum for a single sequence - if sg list allows it */
 #define NVMET_FC_MAX_SEQ_LENGTH		(256 * 1024)
 
@@ -83,7 +94,6 @@ struct nvmet_fc_fcp_iod {
 };
 
 struct nvmet_fc_tgtport {
-
 	struct nvmet_fc_target_port	fc_target_port;
 
 	struct list_head		tgt_list; /* nvmet_fc_target_list */
@@ -92,9 +102,11 @@ struct nvmet_fc_tgtport {
 
 	struct nvmet_fc_ls_iod		*iod;
 	spinlock_t			lock;
-	struct list_head		ls_list;
+	struct list_head		ls_rcv_list;
+	struct list_head		ls_req_list;
 	struct list_head		ls_busylist;
 	struct list_head		assoc_list;
+	struct list_head		host_list;
 	struct ida			assoc_cnt;
 	struct nvmet_fc_port_entry	*pe;
 	struct kref			ref;
@@ -136,14 +148,26 @@ struct nvmet_fc_tgt_queue {
 	struct nvmet_fc_fcp_iod		fod[];		/* array of fcp_iods */
 } __aligned(sizeof(unsigned long long));
 
+struct nvmet_fc_hostport {
+	struct nvmet_fc_tgtport		*tgtport;
+	void				*hosthandle;
+	struct list_head		host_list;
+	struct kref			ref;
+	u8				invalid;
+};
+
 struct nvmet_fc_tgt_assoc {
 	u64				association_id;
 	u32				a_id;
+	atomic_t			terminating;
 	struct nvmet_fc_tgtport		*tgtport;
+	struct nvmet_fc_hostport	*hostport;
+	struct nvmet_fc_ls_iod		*rcv_disconn;
 	struct list_head		a_list;
 	struct nvmet_fc_tgt_queue	*queues[NVMET_NR_QUEUES + 1];
 	struct kref			ref;
 	struct work_struct		del_work;
+	atomic_t			del_work_active;
 };
 
 
@@ -227,6 +251,8 @@ static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
 static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 					struct nvmet_fc_fcp_iod *fod);
 static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc);
+static void nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport,
+				struct nvmet_fc_ls_iod *iod);
 
 
 /* *********************** FC-NVME DMA Handling **************************** */
@@ -318,6 +344,188 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 }
 
 
+/* ********************** FC-NVME LS XMT Handling ************************* */
+
+
+static void
+__nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
+{
+	struct nvmet_fc_tgtport *tgtport = lsop->tgtport;
+	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tgtport->lock, flags);
+
+	if (!lsop->req_queued) {
+		spin_unlock_irqrestore(&tgtport->lock, flags);
+		return;
+	}
+
+	list_del(&lsop->lsreq_list);
+
+	lsop->req_queued = false;
+
+	spin_unlock_irqrestore(&tgtport->lock, flags);
+
+	fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma,
+				  (lsreq->rqstlen + lsreq->rsplen),
+				  DMA_BIDIRECTIONAL);
+
+	nvmet_fc_tgtport_put(tgtport);
+}
+
+static int
+__nvmet_fc_send_ls_req(struct nvmet_fc_tgtport *tgtport,
+		struct nvmet_fc_ls_req_op *lsop,
+		void (*done)(struct nvmefc_ls_req *req, int status))
+{
+	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!tgtport->ops->ls_req)
+		return -EOPNOTSUPP;
+
+	if (!nvmet_fc_tgtport_get(tgtport))
+		return -ESHUTDOWN;
+
+	lsreq->done = done;
+	lsop->req_queued = false;
+	INIT_LIST_HEAD(&lsop->lsreq_list);
+
+	lsreq->rqstdma = fc_dma_map_single(tgtport->dev, lsreq->rqstaddr,
+				  lsreq->rqstlen + lsreq->rsplen,
+				  DMA_BIDIRECTIONAL);
+	if (fc_dma_mapping_error(tgtport->dev, lsreq->rqstdma)) {
+		ret = -EFAULT;
+		goto out_puttgtport;
+	}
+	lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen;
+
+	spin_lock_irqsave(&tgtport->lock, flags);
+
+	list_add_tail(&lsop->lsreq_list, &tgtport->ls_req_list);
+
+	lsop->req_queued = true;
+
+	spin_unlock_irqrestore(&tgtport->lock, flags);
+
+	ret = tgtport->ops->ls_req(&tgtport->fc_target_port, lsop->hosthandle,
+				   lsreq);
+	if (ret)
+		goto out_unlink;
+
+	return 0;
+
+out_unlink:
+	lsop->ls_error = ret;
+	spin_lock_irqsave(&tgtport->lock, flags);
+	lsop->req_queued = false;
+	list_del(&lsop->lsreq_list);
+	spin_unlock_irqrestore(&tgtport->lock, flags);
+	fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma,
+				  (lsreq->rqstlen + lsreq->rsplen),
+				  DMA_BIDIRECTIONAL);
+out_puttgtport:
+	nvmet_fc_tgtport_put(tgtport);
+
+	return ret;
+}
+
+static int
+nvmet_fc_send_ls_req_async(struct nvmet_fc_tgtport *tgtport,
+		struct nvmet_fc_ls_req_op *lsop,
+		void (*done)(struct nvmefc_ls_req *req, int status))
+{
+	/* don't wait for completion */
+
+	return __nvmet_fc_send_ls_req(tgtport, lsop, done);
+}
+
+static void
+nvmet_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
+{
+	struct nvmet_fc_ls_req_op *lsop =
+		container_of(lsreq, struct nvmet_fc_ls_req_op, ls_req);
+
+	__nvmet_fc_finish_ls_req(lsop);
+
+	/* fc-nvme target doesn't care about success or failure of cmd */
+
+	kfree(lsop);
+}
+
+/*
+ * This routine sends a FC-NVME LS to disconnect (aka terminate)
+ * the FC-NVME Association.  Terminating the association also
+ * terminates the FC-NVME connections (per queue, both admin and io
+ * queues) that are part of the association. E.g. things are torn
+ * down, and the related FC-NVME Association ID and Connection IDs
+ * become invalid.
+ *
+ * The behavior of the fc-nvme target is such that it's
+ * understanding of the association and connections will implicitly
+ * be torn down. The action is implicit as it may be due to a loss of
+ * connectivity with the fc-nvme host, so the target may never get a
+ * response even if it tried.  As such, the action of this routine
+ * is to asynchronously send the LS, ignore any results of the LS, and
+ * continue on with terminating the association. If the fc-nvme host
+ * is present and receives the LS, it too can tear down.
+ */
+static void
+nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc)
+{
+	struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
+	struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
+	struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
+	struct nvmet_fc_ls_req_op *lsop;
+	struct nvmefc_ls_req *lsreq;
+	int ret;
+
+	/*
+	 * If ls_req is NULL or no hosthandle, it's an older lldd and no
+	 * message is normal. Otherwise, send unless the hostport has
+	 * already been invalidated by the lldd.
+	 */
+	if (!tgtport->ops->ls_req || !assoc->hostport ||
+	    assoc->hostport->invalid)
+		return;
+
+	lsop = kzalloc((sizeof(*lsop) +
+			sizeof(*discon_rqst) + sizeof(*discon_acc) +
+			tgtport->ops->lsrqst_priv_sz), GFP_KERNEL);
+	if (!lsop) {
+		dev_info(tgtport->dev,
+			"{%d:%d} send Disconnect Association failed: ENOMEM\n",
+			tgtport->fc_target_port.port_num, assoc->a_id);
+		return;
+	}
+
+	discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1];
+	discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
+	lsreq = &lsop->ls_req;
+	if (tgtport->ops->lsrqst_priv_sz)
+		lsreq->private = (void *)&discon_acc[1];
+	else
+		lsreq->private = NULL;
+
+	lsop->tgtport = tgtport;
+	lsop->hosthandle = assoc->hostport->hosthandle;
+
+	nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc,
+				assoc->association_id);
+
+	ret = nvmet_fc_send_ls_req_async(tgtport, lsop,
+				nvmet_fc_disconnect_assoc_done);
+	if (ret) {
+		dev_info(tgtport->dev,
+			"{%d:%d} XMT Disconnect Association failed: %d\n",
+			tgtport->fc_target_port.port_num, assoc->a_id, ret);
+		kfree(lsop);
+	}
+}
+
+
 /* *********************** FC-NVME Port Management ************************ */
 
 
@@ -337,17 +545,18 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport)
 	for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) {
 		INIT_WORK(&iod->work, nvmet_fc_handle_ls_rqst_work);
 		iod->tgtport = tgtport;
-		list_add_tail(&iod->ls_list, &tgtport->ls_list);
+		list_add_tail(&iod->ls_rcv_list, &tgtport->ls_rcv_list);
 
-		iod->rqstbuf = kcalloc(2, NVME_FC_MAX_LS_BUFFER_SIZE,
-			GFP_KERNEL);
+		iod->rqstbuf = kzalloc(sizeof(union nvmefc_ls_requests) +
+				       sizeof(union nvmefc_ls_responses),
+				       GFP_KERNEL);
 		if (!iod->rqstbuf)
 			goto out_fail;
 
-		iod->rspbuf = iod->rqstbuf + NVME_FC_MAX_LS_BUFFER_SIZE;
+		iod->rspbuf = (union nvmefc_ls_responses *)&iod->rqstbuf[1];
 
 		iod->rspdma = fc_dma_map_single(tgtport->dev, iod->rspbuf,
-						NVME_FC_MAX_LS_BUFFER_SIZE,
+						sizeof(*iod->rspbuf),
 						DMA_TO_DEVICE);
 		if (fc_dma_mapping_error(tgtport->dev, iod->rspdma))
 			goto out_fail;
@@ -357,12 +566,12 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport)
 
 out_fail:
 	kfree(iod->rqstbuf);
-	list_del(&iod->ls_list);
+	list_del(&iod->ls_rcv_list);
 	for (iod--, i--; i >= 0; iod--, i--) {
 		fc_dma_unmap_single(tgtport->dev, iod->rspdma,
-				NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE);
+				sizeof(*iod->rspbuf), DMA_TO_DEVICE);
 		kfree(iod->rqstbuf);
-		list_del(&iod->ls_list);
+		list_del(&iod->ls_rcv_list);
 	}
 
 	kfree(iod);
@@ -378,10 +587,10 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport)
 
 	for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) {
 		fc_dma_unmap_single(tgtport->dev,
-				iod->rspdma, NVME_FC_MAX_LS_BUFFER_SIZE,
+				iod->rspdma, sizeof(*iod->rspbuf),
 				DMA_TO_DEVICE);
 		kfree(iod->rqstbuf);
-		list_del(&iod->ls_list);
+		list_del(&iod->ls_rcv_list);
 	}
 	kfree(tgtport->iod);
 }
@@ -393,10 +602,10 @@ nvmet_fc_alloc_ls_iod(struct nvmet_fc_tgtport *tgtport)
 	unsigned long flags;
 
 	spin_lock_irqsave(&tgtport->lock, flags);
-	iod = list_first_entry_or_null(&tgtport->ls_list,
-					struct nvmet_fc_ls_iod, ls_list);
+	iod = list_first_entry_or_null(&tgtport->ls_rcv_list,
+					struct nvmet_fc_ls_iod, ls_rcv_list);
 	if (iod)
-		list_move_tail(&iod->ls_list, &tgtport->ls_busylist);
+		list_move_tail(&iod->ls_rcv_list, &tgtport->ls_busylist);
 	spin_unlock_irqrestore(&tgtport->lock, flags);
 	return iod;
 }
@@ -409,7 +618,7 @@ nvmet_fc_free_ls_iod(struct nvmet_fc_tgtport *tgtport,
 	unsigned long flags;
 
 	spin_lock_irqsave(&tgtport->lock, flags);
-	list_move(&iod->ls_list, &tgtport->ls_list);
+	list_move(&iod->ls_rcv_list, &tgtport->ls_rcv_list);
 	spin_unlock_irqrestore(&tgtport->lock, flags);
 }
 
@@ -678,31 +887,33 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 	struct nvmet_fc_fcp_iod *fod = queue->fod;
 	struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr;
 	unsigned long flags;
-	int i, writedataactive;
+	int i;
 	bool disconnect;
 
 	disconnect = atomic_xchg(&queue->connected, 0);
 
+	/* if not connected, nothing to do */
+	if (!disconnect)
+		return;
+
 	spin_lock_irqsave(&queue->qlock, flags);
 	/* abort outstanding io's */
 	for (i = 0; i < queue->sqsize; fod++, i++) {
 		if (fod->active) {
 			spin_lock(&fod->flock);
 			fod->abort = true;
-			writedataactive = fod->writedataactive;
-			spin_unlock(&fod->flock);
 			/*
 			 * only call lldd abort routine if waiting for
 			 * writedata. other outstanding ops should finish
 			 * on their own.
 			 */
-			if (writedataactive) {
-				spin_lock(&fod->flock);
+			if (fod->writedataactive) {
 				fod->aborted = true;
 				spin_unlock(&fod->flock);
 				tgtport->ops->fcp_abort(
 					&tgtport->fc_target_port, fod->fcpreq);
-			}
+			} else
+				spin_unlock(&fod->flock);
 		}
 	}
 
@@ -742,8 +953,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 
 	flush_workqueue(queue->work_q);
 
-	if (disconnect)
-		nvmet_sq_destroy(&queue->nvme_sq);
+	nvmet_sq_destroy(&queue->nvme_sq);
 
 	nvmet_fc_tgt_q_put(queue);
 }
@@ -778,17 +988,114 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
 }
 
 static void
+nvmet_fc_hostport_free(struct kref *ref)
+{
+	struct nvmet_fc_hostport *hostport =
+		container_of(ref, struct nvmet_fc_hostport, ref);
+	struct nvmet_fc_tgtport *tgtport = hostport->tgtport;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tgtport->lock, flags);
+	list_del(&hostport->host_list);
+	spin_unlock_irqrestore(&tgtport->lock, flags);
+	if (tgtport->ops->host_release && hostport->invalid)
+		tgtport->ops->host_release(hostport->hosthandle);
+	kfree(hostport);
+	nvmet_fc_tgtport_put(tgtport);
+}
+
+static void
+nvmet_fc_hostport_put(struct nvmet_fc_hostport *hostport)
+{
+	kref_put(&hostport->ref, nvmet_fc_hostport_free);
+}
+
+static int
+nvmet_fc_hostport_get(struct nvmet_fc_hostport *hostport)
+{
+	return kref_get_unless_zero(&hostport->ref);
+}
+
+static void
+nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport)
+{
+	/* if LLDD not implemented, leave as NULL */
+	if (!hostport->hosthandle)
+		return;
+
+	nvmet_fc_hostport_put(hostport);
+}
+
+static struct nvmet_fc_hostport *
+nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
+{
+	struct nvmet_fc_hostport *newhost, *host, *match = NULL;
+	unsigned long flags;
+
+	/* if LLDD not implemented, leave as NULL */
+	if (!hosthandle)
+		return NULL;
+
+	/* take reference for what will be the newly allocated hostport */
+	if (!nvmet_fc_tgtport_get(tgtport))
+		return ERR_PTR(-EINVAL);
+
+	newhost = kzalloc(sizeof(*newhost), GFP_KERNEL);
+	if (!newhost) {
+		spin_lock_irqsave(&tgtport->lock, flags);
+		list_for_each_entry(host, &tgtport->host_list, host_list) {
+			if (host->hosthandle == hosthandle && !host->invalid) {
+				if (nvmet_fc_hostport_get(host)) {
+					match = host;
+					break;
+				}
+			}
+		}
+		spin_unlock_irqrestore(&tgtport->lock, flags);
+		/* no allocation - release reference */
+		nvmet_fc_tgtport_put(tgtport);
+		return (match) ? match : ERR_PTR(-ENOMEM);
+	}
+
+	newhost->tgtport = tgtport;
+	newhost->hosthandle = hosthandle;
+	INIT_LIST_HEAD(&newhost->host_list);
+	kref_init(&newhost->ref);
+
+	spin_lock_irqsave(&tgtport->lock, flags);
+	list_for_each_entry(host, &tgtport->host_list, host_list) {
+		if (host->hosthandle == hosthandle && !host->invalid) {
+			if (nvmet_fc_hostport_get(host)) {
+				match = host;
+				break;
+			}
+		}
+	}
+	if (match) {
+		kfree(newhost);
+		newhost = NULL;
+		/* releasing allocation - release reference */
+		nvmet_fc_tgtport_put(tgtport);
+	} else
+		list_add_tail(&newhost->host_list, &tgtport->host_list);
+	spin_unlock_irqrestore(&tgtport->lock, flags);
+
+	return (match) ? match : newhost;
+}
+
+static void
 nvmet_fc_delete_assoc(struct work_struct *work)
 {
 	struct nvmet_fc_tgt_assoc *assoc =
 		container_of(work, struct nvmet_fc_tgt_assoc, del_work);
 
 	nvmet_fc_delete_target_assoc(assoc);
+	atomic_set(&assoc->del_work_active, 0);
 	nvmet_fc_tgt_a_put(assoc);
 }
 
 static struct nvmet_fc_tgt_assoc *
-nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
+nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
 {
 	struct nvmet_fc_tgt_assoc *assoc, *tmpassoc;
 	unsigned long flags;
@@ -805,13 +1112,19 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
 		goto out_free_assoc;
 
 	if (!nvmet_fc_tgtport_get(tgtport))
-		goto out_ida_put;
+		goto out_ida;
+
+	assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle);
+	if (IS_ERR(assoc->hostport))
+		goto out_put;
 
 	assoc->tgtport = tgtport;
 	assoc->a_id = idx;
 	INIT_LIST_HEAD(&assoc->a_list);
 	kref_init(&assoc->ref);
 	INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc);
+	atomic_set(&assoc->del_work_active, 0);
+	atomic_set(&assoc->terminating, 0);
 
 	while (needrandom) {
 		get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID);
@@ -819,11 +1132,12 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
 
 		spin_lock_irqsave(&tgtport->lock, flags);
 		needrandom = false;
-		list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list)
+		list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) {
 			if (ran == tmpassoc->association_id) {
 				needrandom = true;
 				break;
 			}
+		}
 		if (!needrandom) {
 			assoc->association_id = ran;
 			list_add_tail(&assoc->a_list, &tgtport->assoc_list);
@@ -833,7 +1147,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
 
 	return assoc;
 
-out_ida_put:
+out_put:
+	nvmet_fc_tgtport_put(tgtport);
+out_ida:
 	ida_simple_remove(&tgtport->assoc_cnt, idx);
 out_free_assoc:
 	kfree(assoc);
@@ -846,12 +1162,24 @@ nvmet_fc_target_assoc_free(struct kref *ref)
 	struct nvmet_fc_tgt_assoc *assoc =
 		container_of(ref, struct nvmet_fc_tgt_assoc, ref);
 	struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
+	struct nvmet_fc_ls_iod	*oldls;
 	unsigned long flags;
 
+	/* Send Disconnect now that all i/o has completed */
+	nvmet_fc_xmt_disconnect_assoc(assoc);
+
+	nvmet_fc_free_hostport(assoc->hostport);
 	spin_lock_irqsave(&tgtport->lock, flags);
 	list_del(&assoc->a_list);
+	oldls = assoc->rcv_disconn;
 	spin_unlock_irqrestore(&tgtport->lock, flags);
+	/* if pending Rcv Disconnect Association LS, send rsp now */
+	if (oldls)
+		nvmet_fc_xmt_ls_rsp(tgtport, oldls);
 	ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id);
+	dev_info(tgtport->dev,
+		"{%d:%d} Association freed\n",
+		tgtport->fc_target_port.port_num, assoc->a_id);
 	kfree(assoc);
 	nvmet_fc_tgtport_put(tgtport);
 }
@@ -874,7 +1202,13 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
 	struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
 	struct nvmet_fc_tgt_queue *queue;
 	unsigned long flags;
-	int i;
+	int i, terminating;
+
+	terminating = atomic_xchg(&assoc->terminating, 1);
+
+	/* if already terminating, do nothing */
+	if (terminating)
+		return;
 
 	spin_lock_irqsave(&tgtport->lock, flags);
 	for (i = NVMET_NR_QUEUES; i >= 0; i--) {
@@ -890,6 +1224,10 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
 	}
 	spin_unlock_irqrestore(&tgtport->lock, flags);
 
+	dev_info(tgtport->dev,
+		"{%d:%d} Association deleted\n",
+		tgtport->fc_target_port.port_num, assoc->a_id);
+
 	nvmet_fc_tgt_a_put(assoc);
 }
 
@@ -1048,16 +1386,21 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
 
 	newrec->fc_target_port.node_name = pinfo->node_name;
 	newrec->fc_target_port.port_name = pinfo->port_name;
-	newrec->fc_target_port.private = &newrec[1];
+	if (template->target_priv_sz)
+		newrec->fc_target_port.private = &newrec[1];
+	else
+		newrec->fc_target_port.private = NULL;
 	newrec->fc_target_port.port_id = pinfo->port_id;
 	newrec->fc_target_port.port_num = idx;
 	INIT_LIST_HEAD(&newrec->tgt_list);
 	newrec->dev = dev;
 	newrec->ops = template;
 	spin_lock_init(&newrec->lock);
-	INIT_LIST_HEAD(&newrec->ls_list);
+	INIT_LIST_HEAD(&newrec->ls_rcv_list);
+	INIT_LIST_HEAD(&newrec->ls_req_list);
 	INIT_LIST_HEAD(&newrec->ls_busylist);
 	INIT_LIST_HEAD(&newrec->assoc_list);
+	INIT_LIST_HEAD(&newrec->host_list);
 	kref_init(&newrec->ref);
 	ida_init(&newrec->assoc_cnt);
 	newrec->max_sg_cnt = template->max_sgl_segments;
@@ -1134,17 +1477,90 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
 {
 	struct nvmet_fc_tgt_assoc *assoc, *next;
 	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&tgtport->lock, flags);
+	list_for_each_entry_safe(assoc, next,
+				&tgtport->assoc_list, a_list) {
+		if (!nvmet_fc_tgt_a_get(assoc))
+			continue;
+		ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1);
+		if (ret == 0) {
+			if (!schedule_work(&assoc->del_work))
+				nvmet_fc_tgt_a_put(assoc);
+		} else {
+			/* already deleting - release local reference */
+			nvmet_fc_tgt_a_put(assoc);
+		}
+	}
+	spin_unlock_irqrestore(&tgtport->lock, flags);
+}
+
+/**
+ * nvmet_fc_invalidate_host - transport entry point called by an LLDD
+ *                       to remove references to a hosthandle for LS's.
+ *
+ * The nvmet-fc layer ensures that any references to the hosthandle
+ * on the targetport are forgotten (set to NULL).  The LLDD will
+ * typically call this when a login with a remote host port has been
+ * lost, thus LS's for the remote host port are no longer possible.
+ *
+ * If an LS request is outstanding to the targetport/hosthandle (or
+ * issued concurrently with the call to invalidate the host), the
+ * LLDD is responsible for terminating/aborting the LS and completing
+ * the LS request. It is recommended that these terminations/aborts
+ * occur after calling to invalidate the host handle to avoid additional
+ * retries by the nvmet-fc transport. The nvmet-fc transport may
+ * continue to reference host handle while it cleans up outstanding
+ * NVME associations. The nvmet-fc transport will call the
+ * ops->host_release() callback to notify the LLDD that all references
+ * are complete and the related host handle can be recovered.
+ * Note: if there are no references, the callback may be called before
+ * the invalidate host call returns.
+ *
+ * @target_port: pointer to the (registered) target port that a prior
+ *              LS was received on and which supplied the transport the
+ *              hosthandle.
+ * @hosthandle: the handle (pointer) that represents the host port
+ *              that no longer has connectivity and that LS's should
+ *              no longer be directed to.
+ */
+void
+nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port,
+			void *hosthandle)
+{
+	struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port);
+	struct nvmet_fc_tgt_assoc *assoc, *next;
+	unsigned long flags;
+	bool noassoc = true;
+	int ret;
 
 	spin_lock_irqsave(&tgtport->lock, flags);
 	list_for_each_entry_safe(assoc, next,
 				&tgtport->assoc_list, a_list) {
+		if (!assoc->hostport ||
+		    assoc->hostport->hosthandle != hosthandle)
+			continue;
 		if (!nvmet_fc_tgt_a_get(assoc))
 			continue;
-		if (!schedule_work(&assoc->del_work))
+		assoc->hostport->invalid = 1;
+		noassoc = false;
+		ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1);
+		if (ret == 0) {
+			if (!schedule_work(&assoc->del_work))
+				nvmet_fc_tgt_a_put(assoc);
+		} else {
+			/* already deleting - release local reference */
 			nvmet_fc_tgt_a_put(assoc);
+		}
 	}
 	spin_unlock_irqrestore(&tgtport->lock, flags);
+
+	/* if there's nothing to wait for - call the callback */
+	if (noassoc && tgtport->ops->host_release)
+		tgtport->ops->host_release(hosthandle);
 }
+EXPORT_SYMBOL_GPL(nvmet_fc_invalidate_host);
 
 /*
  * nvmet layer has called to terminate an association
@@ -1157,6 +1573,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
 	struct nvmet_fc_tgt_queue *queue;
 	unsigned long flags;
 	bool found_ctrl = false;
+	int ret;
 
 	/* this is a bit ugly, but don't want to make locks layered */
 	spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
@@ -1180,8 +1597,14 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
 		nvmet_fc_tgtport_put(tgtport);
 
 		if (found_ctrl) {
-			if (!schedule_work(&assoc->del_work))
+			ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1);
+			if (ret == 0) {
+				if (!schedule_work(&assoc->del_work))
+					nvmet_fc_tgt_a_put(assoc);
+			} else {
+				/* already deleting - release local reference */
 				nvmet_fc_tgt_a_put(assoc);
+			}
 			return;
 		}
 
@@ -1211,6 +1634,13 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port)
 	/* terminate any outstanding associations */
 	__nvmet_fc_free_assocs(tgtport);
 
+	/*
+	 * should terminate LS's as well. However, LS's will be generated
+	 * at the tail end of association termination, so they likely don't
+	 * exist yet. And even if they did, it's worthwhile to just let
+	 * them finish and targetport ref counting will clean things up.
+	 */
+
 	nvmet_fc_tgtport_put(tgtport);
 
 	return 0;
@@ -1218,113 +1648,15 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port)
 EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport);
 
 
-/* *********************** FC-NVME LS Handling **************************** */
-
-
-static void
-nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd)
-{
-	struct fcnvme_ls_acc_hdr *acc = buf;
-
-	acc->w0.ls_cmd = ls_cmd;
-	acc->desc_list_len = desc_len;
-	acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST);
-	acc->rqst.desc_len =
-			fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst));
-	acc->rqst.w0.ls_cmd = rqst_ls_cmd;
-}
+/* ********************** FC-NVME LS RCV Handling ************************* */
 
-static int
-nvmet_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd,
-			u8 reason, u8 explanation, u8 vendor)
-{
-	struct fcnvme_ls_rjt *rjt = buf;
-
-	nvmet_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST,
-			fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)),
-			ls_cmd);
-	rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT);
-	rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt));
-	rjt->rjt.reason_code = reason;
-	rjt->rjt.reason_explanation = explanation;
-	rjt->rjt.vendor = vendor;
-
-	return sizeof(struct fcnvme_ls_rjt);
-}
-
-/* Validation Error indexes into the string table below */
-enum {
-	VERR_NO_ERROR		= 0,
-	VERR_CR_ASSOC_LEN	= 1,
-	VERR_CR_ASSOC_RQST_LEN	= 2,
-	VERR_CR_ASSOC_CMD	= 3,
-	VERR_CR_ASSOC_CMD_LEN	= 4,
-	VERR_ERSP_RATIO		= 5,
-	VERR_ASSOC_ALLOC_FAIL	= 6,
-	VERR_QUEUE_ALLOC_FAIL	= 7,
-	VERR_CR_CONN_LEN	= 8,
-	VERR_CR_CONN_RQST_LEN	= 9,
-	VERR_ASSOC_ID		= 10,
-	VERR_ASSOC_ID_LEN	= 11,
-	VERR_NO_ASSOC		= 12,
-	VERR_CONN_ID		= 13,
-	VERR_CONN_ID_LEN	= 14,
-	VERR_NO_CONN		= 15,
-	VERR_CR_CONN_CMD	= 16,
-	VERR_CR_CONN_CMD_LEN	= 17,
-	VERR_DISCONN_LEN	= 18,
-	VERR_DISCONN_RQST_LEN	= 19,
-	VERR_DISCONN_CMD	= 20,
-	VERR_DISCONN_CMD_LEN	= 21,
-	VERR_DISCONN_SCOPE	= 22,
-	VERR_RS_LEN		= 23,
-	VERR_RS_RQST_LEN	= 24,
-	VERR_RS_CMD		= 25,
-	VERR_RS_CMD_LEN		= 26,
-	VERR_RS_RCTL		= 27,
-	VERR_RS_RO		= 28,
-};
-
-static char *validation_errors[] = {
-	"OK",
-	"Bad CR_ASSOC Length",
-	"Bad CR_ASSOC Rqst Length",
-	"Not CR_ASSOC Cmd",
-	"Bad CR_ASSOC Cmd Length",
-	"Bad Ersp Ratio",
-	"Association Allocation Failed",
-	"Queue Allocation Failed",
-	"Bad CR_CONN Length",
-	"Bad CR_CONN Rqst Length",
-	"Not Association ID",
-	"Bad Association ID Length",
-	"No Association",
-	"Not Connection ID",
-	"Bad Connection ID Length",
-	"No Connection",
-	"Not CR_CONN Cmd",
-	"Bad CR_CONN Cmd Length",
-	"Bad DISCONN Length",
-	"Bad DISCONN Rqst Length",
-	"Not DISCONN Cmd",
-	"Bad DISCONN Cmd Length",
-	"Bad Disconnect Scope",
-	"Bad RS Length",
-	"Bad RS Rqst Length",
-	"Not RS Cmd",
-	"Bad RS Cmd Length",
-	"Bad RS R_CTL",
-	"Bad RS Relative Offset",
-};
 
 static void
 nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
 			struct nvmet_fc_ls_iod *iod)
 {
-	struct fcnvme_ls_cr_assoc_rqst *rqst =
-				(struct fcnvme_ls_cr_assoc_rqst *)iod->rqstbuf;
-	struct fcnvme_ls_cr_assoc_acc *acc =
-				(struct fcnvme_ls_cr_assoc_acc *)iod->rspbuf;
+	struct fcnvme_ls_cr_assoc_rqst *rqst = &iod->rqstbuf->rq_cr_assoc;
+	struct fcnvme_ls_cr_assoc_acc *acc = &iod->rspbuf->rsp_cr_assoc;
 	struct nvmet_fc_tgt_queue *queue;
 	int ret = 0;
 
@@ -1356,7 +1688,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
 
 	else {
 		/* new association w/ admin queue */
-		iod->assoc = nvmet_fc_alloc_target_assoc(tgtport);
+		iod->assoc = nvmet_fc_alloc_target_assoc(
+						tgtport, iod->hosthandle);
 		if (!iod->assoc)
 			ret = VERR_ASSOC_ALLOC_FAIL;
 		else {
@@ -1371,8 +1704,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
 		dev_err(tgtport->dev,
 			"Create Association LS failed: %s\n",
 			validation_errors[ret]);
-		iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
-				NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
+		iod->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+				sizeof(*acc), rqst->w0.ls_cmd,
 				FCNVME_RJT_RC_LOGIC,
 				FCNVME_RJT_EXP_NONE, 0);
 		return;
@@ -1382,11 +1715,15 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
 	atomic_set(&queue->connected, 1);
 	queue->sqhd = 0;	/* best place to init value */
 
+	dev_info(tgtport->dev,
+		"{%d:%d} Association created\n",
+		tgtport->fc_target_port.port_num, iod->assoc->a_id);
+
 	/* format a response */
 
-	iod->lsreq->rsplen = sizeof(*acc);
+	iod->lsrsp->rsplen = sizeof(*acc);
 
-	nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
+	nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
 			fcnvme_lsdesc_len(
 				sizeof(struct fcnvme_ls_cr_assoc_acc)),
 			FCNVME_LS_CREATE_ASSOCIATION);
@@ -1407,10 +1744,8 @@ static void
 nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
 			struct nvmet_fc_ls_iod *iod)
 {
-	struct fcnvme_ls_cr_conn_rqst *rqst =
-				(struct fcnvme_ls_cr_conn_rqst *)iod->rqstbuf;
-	struct fcnvme_ls_cr_conn_acc *acc =
-				(struct fcnvme_ls_cr_conn_acc *)iod->rspbuf;
+	struct fcnvme_ls_cr_conn_rqst *rqst = &iod->rqstbuf->rq_cr_conn;
+	struct fcnvme_ls_cr_conn_acc *acc = &iod->rspbuf->rsp_cr_conn;
 	struct nvmet_fc_tgt_queue *queue;
 	int ret = 0;
 
@@ -1462,8 +1797,8 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
 		dev_err(tgtport->dev,
 			"Create Connection LS failed: %s\n",
 			validation_errors[ret]);
-		iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
-				NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
+		iod->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+				sizeof(*acc), rqst->w0.ls_cmd,
 				(ret == VERR_NO_ASSOC) ?
 					FCNVME_RJT_RC_INV_ASSOC :
 					FCNVME_RJT_RC_LOGIC,
@@ -1477,9 +1812,9 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
 
 	/* format a response */
 
-	iod->lsreq->rsplen = sizeof(*acc);
+	iod->lsrsp->rsplen = sizeof(*acc);
 
-	nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
+	nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
 			fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)),
 			FCNVME_LS_CREATE_CONNECTION);
 	acc->connectid.desc_tag = cpu_to_be32(FCNVME_LSDESC_CONN_ID);
@@ -1491,46 +1826,28 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
 				be16_to_cpu(rqst->connect_cmd.qid)));
 }
 
-static void
+/*
+ * Returns true if the LS response is to be transmit
+ * Returns false if the LS response is to be delayed
+ */
+static int
 nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
 			struct nvmet_fc_ls_iod *iod)
 {
 	struct fcnvme_ls_disconnect_assoc_rqst *rqst =
-			(struct fcnvme_ls_disconnect_assoc_rqst *)iod->rqstbuf;
+						&iod->rqstbuf->rq_dis_assoc;
 	struct fcnvme_ls_disconnect_assoc_acc *acc =
-			(struct fcnvme_ls_disconnect_assoc_acc *)iod->rspbuf;
-	struct nvmet_fc_tgt_assoc *assoc;
+						&iod->rspbuf->rsp_dis_assoc;
+	struct nvmet_fc_tgt_assoc *assoc = NULL;
+	struct nvmet_fc_ls_iod *oldls = NULL;
+	unsigned long flags;
 	int ret = 0;
 
 	memset(acc, 0, sizeof(*acc));
 
-	if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst))
-		ret = VERR_DISCONN_LEN;
-	else if (rqst->desc_list_len !=
-			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_ls_disconnect_assoc_rqst)))
-		ret = VERR_DISCONN_RQST_LEN;
-	else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
-		ret = VERR_ASSOC_ID;
-	else if (rqst->associd.desc_len !=
-			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_lsdesc_assoc_id)))
-		ret = VERR_ASSOC_ID_LEN;
-	else if (rqst->discon_cmd.desc_tag !=
-			cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD))
-		ret = VERR_DISCONN_CMD;
-	else if (rqst->discon_cmd.desc_len !=
-			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_lsdesc_disconn_cmd)))
-		ret = VERR_DISCONN_CMD_LEN;
-	/*
-	 * As the standard changed on the LS, check if old format and scope
-	 * something other than Association (e.g. 0).
-	 */
-	else if (rqst->discon_cmd.rsvd8[0])
-		ret = VERR_DISCONN_SCOPE;
-	else {
-		/* match an active association */
+	ret = nvmefc_vldt_lsreq_discon_assoc(iod->rqstdatalen, rqst);
+	if (!ret) {
+		/* match an active association - takes an assoc ref if !NULL */
 		assoc = nvmet_fc_find_target_assoc(tgtport,
 				be64_to_cpu(rqst->associd.association_id));
 		iod->assoc = assoc;
@@ -1538,34 +1855,63 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
 			ret = VERR_NO_ASSOC;
 	}
 
-	if (ret) {
+	if (ret || !assoc) {
 		dev_err(tgtport->dev,
 			"Disconnect LS failed: %s\n",
 			validation_errors[ret]);
-		iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
-				NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
+		iod->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+				sizeof(*acc), rqst->w0.ls_cmd,
 				(ret == VERR_NO_ASSOC) ?
 					FCNVME_RJT_RC_INV_ASSOC :
-					(ret == VERR_NO_CONN) ?
-						FCNVME_RJT_RC_INV_CONN :
-						FCNVME_RJT_RC_LOGIC,
+					FCNVME_RJT_RC_LOGIC,
 				FCNVME_RJT_EXP_NONE, 0);
-		return;
+		return true;
 	}
 
 	/* format a response */
 
-	iod->lsreq->rsplen = sizeof(*acc);
+	iod->lsrsp->rsplen = sizeof(*acc);
 
-	nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
+	nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
 			fcnvme_lsdesc_len(
 				sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
 			FCNVME_LS_DISCONNECT_ASSOC);
 
 	/* release get taken in nvmet_fc_find_target_assoc */
-	nvmet_fc_tgt_a_put(iod->assoc);
+	nvmet_fc_tgt_a_put(assoc);
+
+	/*
+	 * The rules for LS response says the response cannot
+	 * go back until ABTS's have been sent for all outstanding
+	 * I/O and a Disconnect Association LS has been sent.
+	 * So... save off the Disconnect LS to send the response
+	 * later. If there was a prior LS already saved, replace
+	 * it with the newer one and send a can't perform reject
+	 * on the older one.
+	 */
+	spin_lock_irqsave(&tgtport->lock, flags);
+	oldls = assoc->rcv_disconn;
+	assoc->rcv_disconn = iod;
+	spin_unlock_irqrestore(&tgtport->lock, flags);
+
+	nvmet_fc_delete_target_assoc(assoc);
 
-	nvmet_fc_delete_target_assoc(iod->assoc);
+	if (oldls) {
+		dev_info(tgtport->dev,
+			"{%d:%d} Multiple Disconnect Association LS's "
+			"received\n",
+			tgtport->fc_target_port.port_num, assoc->a_id);
+		/* overwrite good response with bogus failure */
+		oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf,
+						sizeof(*iod->rspbuf),
+						/* ok to use rqst, LS is same */
+						rqst->w0.ls_cmd,
+						FCNVME_RJT_RC_UNAB,
+						FCNVME_RJT_EXP_NONE, 0);
+		nvmet_fc_xmt_ls_rsp(tgtport, oldls);
+	}
+
+	return false;
 }
 
 
@@ -1577,13 +1923,13 @@ static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req);
 static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops;
 
 static void
-nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq)
+nvmet_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
 {
-	struct nvmet_fc_ls_iod *iod = lsreq->nvmet_fc_private;
+	struct nvmet_fc_ls_iod *iod = lsrsp->nvme_fc_private;
 	struct nvmet_fc_tgtport *tgtport = iod->tgtport;
 
 	fc_dma_sync_single_for_cpu(tgtport->dev, iod->rspdma,
-				NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE);
+				sizeof(*iod->rspbuf), DMA_TO_DEVICE);
 	nvmet_fc_free_ls_iod(tgtport, iod);
 	nvmet_fc_tgtport_put(tgtport);
 }
@@ -1595,11 +1941,11 @@ nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport,
 	int ret;
 
 	fc_dma_sync_single_for_device(tgtport->dev, iod->rspdma,
-				  NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE);
+				  sizeof(*iod->rspbuf), DMA_TO_DEVICE);
 
-	ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsreq);
+	ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsrsp);
 	if (ret)
-		nvmet_fc_xmt_ls_rsp_done(iod->lsreq);
+		nvmet_fc_xmt_ls_rsp_done(iod->lsrsp);
 }
 
 /*
@@ -1609,15 +1955,15 @@ static void
 nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport,
 			struct nvmet_fc_ls_iod *iod)
 {
-	struct fcnvme_ls_rqst_w0 *w0 =
-			(struct fcnvme_ls_rqst_w0 *)iod->rqstbuf;
+	struct fcnvme_ls_rqst_w0 *w0 = &iod->rqstbuf->rq_cr_assoc.w0;
+	bool sendrsp = true;
 
-	iod->lsreq->nvmet_fc_private = iod;
-	iod->lsreq->rspbuf = iod->rspbuf;
-	iod->lsreq->rspdma = iod->rspdma;
-	iod->lsreq->done = nvmet_fc_xmt_ls_rsp_done;
+	iod->lsrsp->nvme_fc_private = iod;
+	iod->lsrsp->rspbuf = iod->rspbuf;
+	iod->lsrsp->rspdma = iod->rspdma;
+	iod->lsrsp->done = nvmet_fc_xmt_ls_rsp_done;
 	/* Be preventative. handlers will later set to valid length */
-	iod->lsreq->rsplen = 0;
+	iod->lsrsp->rsplen = 0;
 
 	iod->assoc = NULL;
 
@@ -1637,15 +1983,16 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport,
 		break;
 	case FCNVME_LS_DISCONNECT_ASSOC:
 		/* Terminate a Queue/Connection or the Association */
-		nvmet_fc_ls_disconnect(tgtport, iod);
+		sendrsp = nvmet_fc_ls_disconnect(tgtport, iod);
 		break;
 	default:
-		iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf,
-				NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd,
+		iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf,
+				sizeof(*iod->rspbuf), w0->ls_cmd,
 				FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
 	}
 
-	nvmet_fc_xmt_ls_rsp(tgtport, iod);
+	if (sendrsp)
+		nvmet_fc_xmt_ls_rsp(tgtport, iod);
 }
 
 /*
@@ -1674,35 +2021,53 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work)
  *
  * @target_port: pointer to the (registered) target port the LS was
  *              received on.
- * @lsreq:      pointer to a lsreq request structure to be used to reference
+ * @lsrsp:      pointer to a lsrsp structure to be used to reference
  *              the exchange corresponding to the LS.
  * @lsreqbuf:   pointer to the buffer containing the LS Request
  * @lsreqbuf_len: length, in bytes, of the received LS request
  */
 int
 nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port,
-			struct nvmefc_tgt_ls_req *lsreq,
+			void *hosthandle,
+			struct nvmefc_ls_rsp *lsrsp,
 			void *lsreqbuf, u32 lsreqbuf_len)
 {
 	struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port);
 	struct nvmet_fc_ls_iod *iod;
-
-	if (lsreqbuf_len > NVME_FC_MAX_LS_BUFFER_SIZE)
+	struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf;
+
+	if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) {
+		dev_info(tgtport->dev,
+			"RCV %s LS failed: payload too large (%d)\n",
+			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+				nvmefc_ls_names[w0->ls_cmd] : "",
+			lsreqbuf_len);
 		return -E2BIG;
+	}
 
-	if (!nvmet_fc_tgtport_get(tgtport))
+	if (!nvmet_fc_tgtport_get(tgtport)) {
+		dev_info(tgtport->dev,
+			"RCV %s LS failed: target deleting\n",
+			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+				nvmefc_ls_names[w0->ls_cmd] : "");
 		return -ESHUTDOWN;
+	}
 
 	iod = nvmet_fc_alloc_ls_iod(tgtport);
 	if (!iod) {
+		dev_info(tgtport->dev,
+			"RCV %s LS failed: context allocation failed\n",
+			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+				nvmefc_ls_names[w0->ls_cmd] : "");
 		nvmet_fc_tgtport_put(tgtport);
 		return -ENOENT;
 	}
 
-	iod->lsreq = lsreq;
+	iod->lsrsp = lsrsp;
 	iod->fcpreq = NULL;
 	memcpy(iod->rqstbuf, lsreqbuf, lsreqbuf_len);
 	iod->rqstdatalen = lsreqbuf_len;
+	iod->hosthandle = hosthandle;
 
 	schedule_work(&iod->work);
 
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index f69ce66e2d44..2ff1d1334a03 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -208,10 +208,13 @@ struct fcloop_rport {
 };
 
 struct fcloop_tport {
-	struct nvmet_fc_target_port *targetport;
-	struct nvme_fc_remote_port *remoteport;
-	struct fcloop_nport *nport;
-	struct fcloop_lport *lport;
+	struct nvmet_fc_target_port	*targetport;
+	struct nvme_fc_remote_port	*remoteport;
+	struct fcloop_nport		*nport;
+	struct fcloop_lport		*lport;
+	spinlock_t			lock;
+	struct list_head		ls_list;
+	struct work_struct		ls_work;
 };
 
 struct fcloop_nport {
@@ -228,7 +231,8 @@ struct fcloop_nport {
 
 struct fcloop_lsreq {
 	struct nvmefc_ls_req		*lsreq;
-	struct nvmefc_tgt_ls_req	tgt_ls_req;
+	struct nvmefc_ls_rsp		ls_rsp;
+	int				lsdir;	/* H2T or T2H */
 	int				status;
 	struct list_head		ls_list; /* fcloop_rport->ls_list */
 };
@@ -267,9 +271,9 @@ struct fcloop_ini_fcpreq {
 };
 
 static inline struct fcloop_lsreq *
-tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq)
+ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp)
 {
-	return container_of(tgt_lsreq, struct fcloop_lsreq, tgt_ls_req);
+	return container_of(lsrsp, struct fcloop_lsreq, ls_rsp);
 }
 
 static inline struct fcloop_fcpreq *
@@ -323,7 +327,7 @@ fcloop_rport_lsrqst_work(struct work_struct *work)
 }
 
 static int
-fcloop_ls_req(struct nvme_fc_local_port *localport,
+fcloop_h2t_ls_req(struct nvme_fc_local_port *localport,
 			struct nvme_fc_remote_port *remoteport,
 			struct nvmefc_ls_req *lsreq)
 {
@@ -344,27 +348,28 @@ fcloop_ls_req(struct nvme_fc_local_port *localport,
 	}
 
 	tls_req->status = 0;
-	ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req,
-				 lsreq->rqstaddr, lsreq->rqstlen);
+	ret = nvmet_fc_rcv_ls_req(rport->targetport, rport,
+				  &tls_req->ls_rsp,
+				  lsreq->rqstaddr, lsreq->rqstlen);
 
 	return ret;
 }
 
 static int
-fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
-			struct nvmefc_tgt_ls_req *tgt_lsreq)
+fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
+			struct nvmefc_ls_rsp *lsrsp)
 {
-	struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq);
+	struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp);
 	struct nvmefc_ls_req *lsreq = tls_req->lsreq;
 	struct fcloop_tport *tport = targetport->private;
 	struct nvme_fc_remote_port *remoteport = tport->remoteport;
 	struct fcloop_rport *rport;
 
-	memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf,
-		((lsreq->rsplen < tgt_lsreq->rsplen) ?
-				lsreq->rsplen : tgt_lsreq->rsplen));
+	memcpy(lsreq->rspaddr, lsrsp->rspbuf,
+		((lsreq->rsplen < lsrsp->rsplen) ?
+				lsreq->rsplen : lsrsp->rsplen));
 
-	tgt_lsreq->done(tgt_lsreq);
+	lsrsp->done(lsrsp);
 
 	if (remoteport) {
 		rport = remoteport->private;
@@ -377,6 +382,99 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
 	return 0;
 }
 
+static void
+fcloop_tport_lsrqst_work(struct work_struct *work)
+{
+	struct fcloop_tport *tport =
+		container_of(work, struct fcloop_tport, ls_work);
+	struct fcloop_lsreq *tls_req;
+
+	spin_lock(&tport->lock);
+	for (;;) {
+		tls_req = list_first_entry_or_null(&tport->ls_list,
+				struct fcloop_lsreq, ls_list);
+		if (!tls_req)
+			break;
+
+		list_del(&tls_req->ls_list);
+		spin_unlock(&tport->lock);
+
+		tls_req->lsreq->done(tls_req->lsreq, tls_req->status);
+		/*
+		 * callee may free memory containing tls_req.
+		 * do not reference lsreq after this.
+		 */
+
+		spin_lock(&tport->lock);
+	}
+	spin_unlock(&tport->lock);
+}
+
+static int
+fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
+			struct nvmefc_ls_req *lsreq)
+{
+	struct fcloop_lsreq *tls_req = lsreq->private;
+	struct fcloop_tport *tport = targetport->private;
+	int ret = 0;
+
+	/*
+	 * hosthandle should be the dst.rport value.
+	 * hosthandle ignored as fcloop currently is
+	 * 1:1 tgtport vs remoteport
+	 */
+	tls_req->lsreq = lsreq;
+	INIT_LIST_HEAD(&tls_req->ls_list);
+
+	if (!tport->remoteport) {
+		tls_req->status = -ECONNREFUSED;
+		spin_lock(&tport->lock);
+		list_add_tail(&tport->ls_list, &tls_req->ls_list);
+		spin_unlock(&tport->lock);
+		schedule_work(&tport->ls_work);
+		return ret;
+	}
+
+	tls_req->status = 0;
+	ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp,
+				 lsreq->rqstaddr, lsreq->rqstlen);
+
+	return ret;
+}
+
+static int
+fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
+			struct nvme_fc_remote_port *remoteport,
+			struct nvmefc_ls_rsp *lsrsp)
+{
+	struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp);
+	struct nvmefc_ls_req *lsreq = tls_req->lsreq;
+	struct fcloop_rport *rport = remoteport->private;
+	struct nvmet_fc_target_port *targetport = rport->targetport;
+	struct fcloop_tport *tport;
+
+	memcpy(lsreq->rspaddr, lsrsp->rspbuf,
+		((lsreq->rsplen < lsrsp->rsplen) ?
+				lsreq->rsplen : lsrsp->rsplen));
+	lsrsp->done(lsrsp);
+
+	if (targetport) {
+		tport = targetport->private;
+		spin_lock(&tport->lock);
+		list_add_tail(&tport->ls_list, &tls_req->ls_list);
+		spin_unlock(&tport->lock);
+		schedule_work(&tport->ls_work);
+	}
+
+	return 0;
+}
+
+static void
+fcloop_t2h_host_release(void *hosthandle)
+{
+	/* host handle ignored for now */
+}
+
 /*
  * Simulate reception of RSCN and converting it to a initiator transport
  * call to rescan a remote port.
@@ -762,13 +860,19 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport,
 }
 
 static void
-fcloop_ls_abort(struct nvme_fc_local_port *localport,
+fcloop_h2t_ls_abort(struct nvme_fc_local_port *localport,
 			struct nvme_fc_remote_port *remoteport,
 				struct nvmefc_ls_req *lsreq)
 {
 }
 
 static void
+fcloop_t2h_ls_abort(struct nvmet_fc_target_port *targetport,
+			void *hosthandle, struct nvmefc_ls_req *lsreq)
+{
+}
+
+static void
 fcloop_fcp_abort(struct nvme_fc_local_port *localport,
 			struct nvme_fc_remote_port *remoteport,
 			void *hw_queue_handle,
@@ -867,6 +971,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
 {
 	struct fcloop_tport *tport = targetport->private;
 
+	flush_work(&tport->ls_work);
 	fcloop_nport_put(tport->nport);
 }
 
@@ -879,10 +984,11 @@ static struct nvme_fc_port_template fctemplate = {
 	.remoteport_delete	= fcloop_remoteport_delete,
 	.create_queue		= fcloop_create_queue,
 	.delete_queue		= fcloop_delete_queue,
-	.ls_req			= fcloop_ls_req,
+	.ls_req			= fcloop_h2t_ls_req,
 	.fcp_io			= fcloop_fcp_req,
-	.ls_abort		= fcloop_ls_abort,
+	.ls_abort		= fcloop_h2t_ls_abort,
 	.fcp_abort		= fcloop_fcp_abort,
+	.xmt_ls_rsp		= fcloop_t2h_xmt_ls_rsp,
 	.max_hw_queues		= FCLOOP_HW_QUEUES,
 	.max_sgl_segments	= FCLOOP_SGL_SEGS,
 	.max_dif_sgl_segments	= FCLOOP_SGL_SEGS,
@@ -896,11 +1002,14 @@ static struct nvme_fc_port_template fctemplate = {
 
 static struct nvmet_fc_target_template tgttemplate = {
 	.targetport_delete	= fcloop_targetport_delete,
-	.xmt_ls_rsp		= fcloop_xmt_ls_rsp,
+	.xmt_ls_rsp		= fcloop_h2t_xmt_ls_rsp,
 	.fcp_op			= fcloop_fcp_op,
 	.fcp_abort		= fcloop_tgt_fcp_abort,
 	.fcp_req_release	= fcloop_fcp_req_release,
 	.discovery_event	= fcloop_tgt_discovery_evt,
+	.ls_req			= fcloop_t2h_ls_req,
+	.ls_abort		= fcloop_t2h_ls_abort,
+	.host_release		= fcloop_t2h_host_release,
 	.max_hw_queues		= FCLOOP_HW_QUEUES,
 	.max_sgl_segments	= FCLOOP_SGL_SEGS,
 	.max_dif_sgl_segments	= FCLOOP_SGL_SEGS,
@@ -909,6 +1018,7 @@ static struct nvmet_fc_target_template tgttemplate = {
 	.target_features	= 0,
 	/* sizes of additional private data for data structures */
 	.target_priv_sz		= sizeof(struct fcloop_tport),
+	.lsrqst_priv_sz		= sizeof(struct fcloop_lsreq),
 };
 
 static ssize_t
@@ -1258,6 +1368,9 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr,
 	tport->nport = nport;
 	tport->lport = nport->lport;
 	nport->tport = tport;
+	spin_lock_init(&tport->lock);
+	INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work);
+	INIT_LIST_HEAD(&tport->ls_list);
 
 	return count;
 }
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index ea0e596be15d..3dd6f566a240 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -47,6 +47,22 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
 	id->nows = to0based(ql->io_opt / ql->logical_block_size);
 }
 
+static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
+{
+	struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
+
+	if (bi) {
+		ns->metadata_size = bi->tuple_size;
+		if (bi->profile == &t10_pi_type1_crc)
+			ns->pi_type = NVME_NS_DPS_PI_TYPE1;
+		else if (bi->profile == &t10_pi_type3_crc)
+			ns->pi_type = NVME_NS_DPS_PI_TYPE3;
+		else
+			/* Unsupported metadata type */
+			ns->metadata_size = 0;
+	}
+}
+
 int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
 {
 	int ret;
@@ -64,6 +80,12 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
 	}
 	ns->size = i_size_read(ns->bdev->bd_inode);
 	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+
+	ns->pi_type = 0;
+	ns->metadata_size = 0;
+	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
+		nvmet_bdev_ns_enable_integrity(ns);
+
 	return 0;
 }
 
@@ -75,6 +97,11 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
 	}
 }
 
+void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
+{
+	ns->size = i_size_read(ns->bdev->bd_inode);
+}
+
 static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
 {
 	u16 status = NVME_SC_SUCCESS;
@@ -142,6 +169,61 @@ static void nvmet_bio_done(struct bio *bio)
 		bio_put(bio);
 }
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
+				struct sg_mapping_iter *miter)
+{
+	struct blk_integrity *bi;
+	struct bio_integrity_payload *bip;
+	struct block_device *bdev = req->ns->bdev;
+	int rc;
+	size_t resid, len;
+
+	bi = bdev_get_integrity(bdev);
+	if (unlikely(!bi)) {
+		pr_err("Unable to locate bio_integrity\n");
+		return -ENODEV;
+	}
+
+	bip = bio_integrity_alloc(bio, GFP_NOIO,
+		min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES));
+	if (IS_ERR(bip)) {
+		pr_err("Unable to allocate bio_integrity_payload\n");
+		return PTR_ERR(bip);
+	}
+
+	bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
+	/* virtual start sector must be in integrity interval units */
+	bip_set_seed(bip, bio->bi_iter.bi_sector >>
+		     (bi->interval_exp - SECTOR_SHIFT));
+
+	resid = bip->bip_iter.bi_size;
+	while (resid > 0 && sg_miter_next(miter)) {
+		len = min_t(size_t, miter->length, resid);
+		rc = bio_integrity_add_page(bio, miter->page, len,
+					    offset_in_page(miter->addr));
+		if (unlikely(rc != len)) {
+			pr_err("bio_integrity_add_page() failed; %d\n", rc);
+			sg_miter_stop(miter);
+			return -ENOMEM;
+		}
+
+		resid -= len;
+		if (len < miter->length)
+			miter->consumed -= miter->length - len;
+	}
+	sg_miter_stop(miter);
+
+	return 0;
+}
+#else
+static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
+				struct sg_mapping_iter *miter)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
 static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 {
 	int sg_cnt = req->sg_cnt;
@@ -149,9 +231,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 	struct scatterlist *sg;
 	struct blk_plug plug;
 	sector_t sector;
-	int op, i;
+	int op, i, rc;
+	struct sg_mapping_iter prot_miter;
+	unsigned int iter_flags;
+	unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
 
-	if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+	if (!nvmet_check_transfer_len(req, total_len))
 		return;
 
 	if (!req->sg_cnt) {
@@ -163,8 +248,10 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 		op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
 		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
 			op |= REQ_FUA;
+		iter_flags = SG_MITER_TO_SG;
 	} else {
 		op = REQ_OP_READ;
+		iter_flags = SG_MITER_FROM_SG;
 	}
 
 	if (is_pci_p2pdma_page(sg_page(req->sg)))
@@ -186,11 +273,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 	bio->bi_opf = op;
 
 	blk_start_plug(&plug);
+	if (req->metadata_len)
+		sg_miter_start(&prot_miter, req->metadata_sg,
+			       req->metadata_sg_cnt, iter_flags);
+
 	for_each_sg(req->sg, sg, req->sg_cnt, i) {
 		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
 				!= sg->length) {
 			struct bio *prev = bio;
 
+			if (req->metadata_len) {
+				rc = nvmet_bdev_alloc_bip(req, bio,
+							  &prot_miter);
+				if (unlikely(rc)) {
+					bio_io_error(bio);
+					return;
+				}
+			}
+
 			bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
 			bio_set_dev(bio, req->ns->bdev);
 			bio->bi_iter.bi_sector = sector;
@@ -204,6 +304,14 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 		sg_cnt--;
 	}
 
+	if (req->metadata_len) {
+		rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
+		if (unlikely(rc)) {
+			bio_io_error(bio);
+			return;
+		}
+	}
+
 	submit_bio(bio);
 	blk_finish_plug(&plug);
 }
@@ -212,7 +320,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
 {
 	struct bio *bio = &req->b.inline_bio;
 
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 
 	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
@@ -226,7 +334,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
 
 u16 nvmet_bdev_flush(struct nvmet_req *req)
 {
-	if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL, NULL))
+	if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL))
 		return NVME_SC_INTERNAL | NVME_SC_DNR;
 	return 0;
 }
@@ -304,7 +412,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
 	sector_t nr_sector;
 	int ret;
 
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 
 	sector = le64_to_cpu(write_zeroes->slba) <<
@@ -331,6 +439,8 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
 	case nvme_cmd_read:
 	case nvme_cmd_write:
 		req->execute = nvmet_bdev_execute_rw;
+		if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
+			req->metadata_len = nvmet_rw_metadata_len(req);
 		return 0;
 	case nvme_cmd_flush:
 		req->execute = nvmet_bdev_execute_flush;
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index cd5670b83118..0abbefd9925e 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -13,6 +13,18 @@
 #define NVMET_MAX_MPOOL_BVEC		16
 #define NVMET_MIN_MPOOL_OBJ		16
 
+int nvmet_file_ns_revalidate(struct nvmet_ns *ns)
+{
+	struct kstat stat;
+	int ret;
+
+	ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE,
+			  AT_STATX_FORCE_SYNC);
+	if (!ret)
+		ns->size = stat.size;
+	return ret;
+}
+
 void nvmet_file_ns_disable(struct nvmet_ns *ns)
 {
 	if (ns->file) {
@@ -30,7 +42,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
 int nvmet_file_ns_enable(struct nvmet_ns *ns)
 {
 	int flags = O_RDWR | O_LARGEFILE;
-	struct kstat stat;
 	int ret;
 
 	if (!ns->buffered_io)
@@ -43,12 +54,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
 		return PTR_ERR(ns->file);
 	}
 
-	ret = vfs_getattr(&ns->file->f_path,
-			&stat, STATX_SIZE, AT_STATX_FORCE_SYNC);
+	ret = nvmet_file_ns_revalidate(ns);
 	if (ret)
 		goto err;
 
-	ns->size = stat.size;
 	/*
 	 * i_blkbits can be greater than the universally accepted upper bound,
 	 * so make sure we export a sane namespace lba_shift.
@@ -232,7 +241,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
 {
 	ssize_t nr_bvec = req->sg_cnt;
 
-	if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+	if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
 		return;
 
 	if (!req->sg_cnt || !nr_bvec) {
@@ -276,7 +285,7 @@ static void nvmet_file_flush_work(struct work_struct *w)
 
 static void nvmet_file_execute_flush(struct nvmet_req *req)
 {
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 	INIT_WORK(&req->f.work, nvmet_file_flush_work);
 	schedule_work(&req->f.work);
@@ -366,7 +375,7 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
 
 static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
 {
-	if (!nvmet_check_data_len(req, 0))
+	if (!nvmet_check_transfer_len(req, 0))
 		return;
 	INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
 	schedule_work(&req->f.work);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 421dff3ea143..809691291e73 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -19,6 +19,7 @@
 #include <linux/rcupdate.h>
 #include <linux/blkdev.h>
 #include <linux/radix-tree.h>
+#include <linux/t10-pi.h>
 
 #define NVMET_ASYNC_EVENTS		4
 #define NVMET_ERROR_LOG_SLOTS		128
@@ -77,6 +78,8 @@ struct nvmet_ns {
 
 	int			use_p2pmem;
 	struct pci_dev		*p2p_dev;
+	int			pi_type;
+	int			metadata_size;
 };
 
 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -142,6 +145,7 @@ struct nvmet_port {
 	bool				enabled;
 	int				inline_data_size;
 	const struct nvmet_fabrics_ops	*tr_ops;
+	bool				pi_enable;
 };
 
 static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@@ -201,6 +205,7 @@ struct nvmet_ctrl {
 	spinlock_t		error_lock;
 	u64			err_counter;
 	struct nvme_error_slot	slots[NVMET_ERROR_LOG_SLOTS];
+	bool			pi_support;
 };
 
 struct nvmet_subsys_model {
@@ -230,6 +235,7 @@ struct nvmet_subsys {
 	u64			ver;
 	u64			serial;
 	char			*subsysnqn;
+	bool			pi_support;
 
 	struct config_group	group;
 
@@ -281,6 +287,7 @@ struct nvmet_fabrics_ops {
 	unsigned int type;
 	unsigned int msdbd;
 	bool has_keyed_sgls : 1;
+	bool metadata_support : 1;
 	void (*queue_response)(struct nvmet_req *req);
 	int (*add_port)(struct nvmet_port *port);
 	void (*remove_port)(struct nvmet_port *port);
@@ -302,6 +309,7 @@ struct nvmet_req {
 	struct nvmet_cq		*cq;
 	struct nvmet_ns		*ns;
 	struct scatterlist	*sg;
+	struct scatterlist	*metadata_sg;
 	struct bio_vec		inline_bvec[NVMET_MAX_INLINE_BIOVEC];
 	union {
 		struct {
@@ -315,8 +323,10 @@ struct nvmet_req {
 		} f;
 	};
 	int			sg_cnt;
+	int			metadata_sg_cnt;
 	/* data length as parsed from the SGL descriptor: */
 	size_t			transfer_len;
+	size_t			metadata_len;
 
 	struct nvmet_port	*port;
 
@@ -384,11 +394,11 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 		struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
 void nvmet_req_uninit(struct nvmet_req *req);
-bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len);
+bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len);
 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len);
 void nvmet_req_complete(struct nvmet_req *req, u16 status);
-int nvmet_req_alloc_sgl(struct nvmet_req *req);
-void nvmet_req_free_sgl(struct nvmet_req *req);
+int nvmet_req_alloc_sgls(struct nvmet_req *req);
+void nvmet_req_free_sgls(struct nvmet_req *req);
 
 void nvmet_execute_keep_alive(struct nvmet_req *req);
 
@@ -498,13 +508,24 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns);
 u16 nvmet_bdev_flush(struct nvmet_req *req);
 u16 nvmet_file_flush(struct nvmet_req *req);
 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
+void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
+int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
+void nvmet_ns_revalidate(struct nvmet_ns *ns);
 
-static inline u32 nvmet_rw_len(struct nvmet_req *req)
+static inline u32 nvmet_rw_data_len(struct nvmet_req *req)
 {
 	return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
 			req->ns->blksize_shift;
 }
 
+static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req)
+{
+	if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+		return 0;
+	return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) *
+			req->ns->metadata_size;
+}
+
 static inline u32 nvmet_dsm_len(struct nvmet_req *req)
 {
 	return (le32_to_cpu(req->cmd->dsm.nr) + 1) *
@@ -519,4 +540,11 @@ static inline __le16 to0based(u32 a)
 	return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
 }
 
+static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns)
+{
+	if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+		return false;
+	return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple);
+}
+
 #endif /* _NVMET_H */
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index fd47de0e4e4e..d5141780592e 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -33,6 +33,9 @@
 
 /* Assume mpsmin == device_page_size == 4KB */
 #define NVMET_RDMA_MAX_MDTS			8
+#define NVMET_RDMA_MAX_METADATA_MDTS		5
+
+struct nvmet_rdma_srq;
 
 struct nvmet_rdma_cmd {
 	struct ib_sge		sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
@@ -41,6 +44,7 @@ struct nvmet_rdma_cmd {
 	struct scatterlist	inline_sg[NVMET_RDMA_MAX_INLINE_SGE];
 	struct nvme_command     *nvme_cmd;
 	struct nvmet_rdma_queue	*queue;
+	struct nvmet_rdma_srq   *nsrq;
 };
 
 enum {
@@ -57,6 +61,7 @@ struct nvmet_rdma_rsp {
 	struct nvmet_rdma_queue	*queue;
 
 	struct ib_cqe		read_cqe;
+	struct ib_cqe		write_cqe;
 	struct rdma_rw_ctx	rw;
 
 	struct nvmet_req	req;
@@ -83,6 +88,7 @@ struct nvmet_rdma_queue {
 	struct ib_cq		*cq;
 	atomic_t		sq_wr_avail;
 	struct nvmet_rdma_device *dev;
+	struct nvmet_rdma_srq   *nsrq;
 	spinlock_t		state_lock;
 	enum nvmet_rdma_queue_state state;
 	struct nvmet_cq		nvme_cq;
@@ -100,6 +106,7 @@ struct nvmet_rdma_queue {
 
 	int			idx;
 	int			host_qid;
+	int			comp_vector;
 	int			recv_queue_size;
 	int			send_queue_size;
 
@@ -113,11 +120,17 @@ struct nvmet_rdma_port {
 	struct delayed_work	repair_work;
 };
 
+struct nvmet_rdma_srq {
+	struct ib_srq            *srq;
+	struct nvmet_rdma_cmd    *cmds;
+	struct nvmet_rdma_device *ndev;
+};
+
 struct nvmet_rdma_device {
 	struct ib_device	*device;
 	struct ib_pd		*pd;
-	struct ib_srq		*srq;
-	struct nvmet_rdma_cmd	*srq_cmds;
+	struct nvmet_rdma_srq	**srqs;
+	int			srq_count;
 	size_t			srq_size;
 	struct kref		ref;
 	struct list_head	entry;
@@ -129,6 +142,16 @@ static bool nvmet_rdma_use_srq;
 module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
 MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
 
+static int srq_size_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops srq_size_ops = {
+	.set = srq_size_set,
+	.get = param_get_int,
+};
+
+static int nvmet_rdma_srq_size = 1024;
+module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644);
+MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)");
+
 static DEFINE_IDA(nvmet_rdma_queue_ida);
 static LIST_HEAD(nvmet_rdma_queue_list);
 static DEFINE_MUTEX(nvmet_rdma_queue_mutex);
@@ -140,6 +163,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
 static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
 static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
 static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
+static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc);
 static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
 static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
 static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
@@ -149,6 +173,17 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
 
 static const struct nvmet_fabrics_ops nvmet_rdma_ops;
 
+static int srq_size_set(const char *val, const struct kernel_param *kp)
+{
+	int n = 0, ret;
+
+	ret = kstrtoint(val, 10, &n);
+	if (ret != 0 || n < 256)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
 static int num_pages(int len)
 {
 	return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT);
@@ -391,6 +426,9 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
 
 	/* Data In / RDMA READ */
 	r->read_cqe.done = nvmet_rdma_read_data_done;
+	/* Data Out / RDMA WRITE */
+	r->write_cqe.done = nvmet_rdma_write_data_done;
+
 	return 0;
 
 out_free_rsp:
@@ -466,8 +504,8 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
 		cmd->sge[0].addr, cmd->sge[0].length,
 		DMA_FROM_DEVICE);
 
-	if (ndev->srq)
-		ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
+	if (cmd->nsrq)
+		ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL);
 	else
 		ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
 
@@ -500,6 +538,129 @@ static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
 	spin_unlock(&queue->rsp_wr_wait_lock);
 }
 
+static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr)
+{
+	struct ib_mr_status mr_status;
+	int ret;
+	u16 status = 0;
+
+	ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
+	if (ret) {
+		pr_err("ib_check_mr_status failed, ret %d\n", ret);
+		return NVME_SC_INVALID_PI;
+	}
+
+	if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
+		switch (mr_status.sig_err.err_type) {
+		case IB_SIG_BAD_GUARD:
+			status = NVME_SC_GUARD_CHECK;
+			break;
+		case IB_SIG_BAD_REFTAG:
+			status = NVME_SC_REFTAG_CHECK;
+			break;
+		case IB_SIG_BAD_APPTAG:
+			status = NVME_SC_APPTAG_CHECK;
+			break;
+		}
+		pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
+		       mr_status.sig_err.err_type,
+		       mr_status.sig_err.expected,
+		       mr_status.sig_err.actual);
+	}
+
+	return status;
+}
+
+static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi,
+		struct nvme_command *cmd, struct ib_sig_domain *domain,
+		u16 control, u8 pi_type)
+{
+	domain->sig_type = IB_SIG_TYPE_T10_DIF;
+	domain->sig.dif.bg_type = IB_T10DIF_CRC;
+	domain->sig.dif.pi_interval = 1 << bi->interval_exp;
+	domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
+	if (control & NVME_RW_PRINFO_PRCHK_REF)
+		domain->sig.dif.ref_remap = true;
+
+	domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
+	domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
+	domain->sig.dif.app_escape = true;
+	if (pi_type == NVME_NS_DPS_PI_TYPE3)
+		domain->sig.dif.ref_escape = true;
+}
+
+static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req,
+				     struct ib_sig_attrs *sig_attrs)
+{
+	struct nvme_command *cmd = req->cmd;
+	u16 control = le16_to_cpu(cmd->rw.control);
+	u8 pi_type = req->ns->pi_type;
+	struct blk_integrity *bi;
+
+	bi = bdev_get_integrity(req->ns->bdev);
+
+	memset(sig_attrs, 0, sizeof(*sig_attrs));
+
+	if (control & NVME_RW_PRINFO_PRACT) {
+		/* for WRITE_INSERT/READ_STRIP no wire domain */
+		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
+		nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+					  pi_type);
+		/* Clear the PRACT bit since HCA will generate/verify the PI */
+		control &= ~NVME_RW_PRINFO_PRACT;
+		cmd->rw.control = cpu_to_le16(control);
+		/* PI is added by the HW */
+		req->transfer_len += req->metadata_len;
+	} else {
+		/* for WRITE_PASS/READ_PASS both wire/memory domains exist */
+		nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
+					  pi_type);
+		nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+					  pi_type);
+	}
+
+	if (control & NVME_RW_PRINFO_PRCHK_REF)
+		sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG;
+	if (control & NVME_RW_PRINFO_PRCHK_GUARD)
+		sig_attrs->check_mask |= IB_SIG_CHECK_GUARD;
+	if (control & NVME_RW_PRINFO_PRCHK_APP)
+		sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG;
+}
+
+static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key,
+				  struct ib_sig_attrs *sig_attrs)
+{
+	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+	struct nvmet_req *req = &rsp->req;
+	int ret;
+
+	if (req->metadata_len)
+		ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp,
+			cm_id->port_num, req->sg, req->sg_cnt,
+			req->metadata_sg, req->metadata_sg_cnt, sig_attrs,
+			addr, key, nvmet_data_dir(req));
+	else
+		ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
+				       req->sg, req->sg_cnt, 0, addr, key,
+				       nvmet_data_dir(req));
+
+	return ret;
+}
+
+static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp)
+{
+	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+	struct nvmet_req *req = &rsp->req;
+
+	if (req->metadata_len)
+		rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp,
+			cm_id->port_num, req->sg, req->sg_cnt,
+			req->metadata_sg, req->metadata_sg_cnt,
+			nvmet_data_dir(req));
+	else
+		rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num,
+				    req->sg, req->sg_cnt, nvmet_data_dir(req));
+}
 
 static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
 {
@@ -507,14 +668,11 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
 
 	atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
 
-	if (rsp->n_rdma) {
-		rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
-				queue->cm_id->port_num, rsp->req.sg,
-				rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
-	}
+	if (rsp->n_rdma)
+		nvmet_rdma_rw_ctx_destroy(rsp);
 
 	if (rsp->req.sg != rsp->cmd->inline_sg)
-		nvmet_req_free_sgl(&rsp->req);
+		nvmet_req_free_sgls(&rsp->req);
 
 	if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
 		nvmet_rdma_process_wr_wait_list(queue);
@@ -566,11 +724,16 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
 		rsp->send_wr.opcode = IB_WR_SEND;
 	}
 
-	if (nvmet_rdma_need_data_out(rsp))
-		first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
-				cm_id->port_num, NULL, &rsp->send_wr);
-	else
+	if (nvmet_rdma_need_data_out(rsp)) {
+		if (rsp->req.metadata_len)
+			first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
+					cm_id->port_num, &rsp->write_cqe, NULL);
+		else
+			first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
+					cm_id->port_num, NULL, &rsp->send_wr);
+	} else {
 		first_wr = &rsp->send_wr;
+	}
 
 	nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
 
@@ -589,15 +752,14 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
 	struct nvmet_rdma_queue *queue = cq->cq_context;
+	u16 status = 0;
 
 	WARN_ON(rsp->n_rdma <= 0);
 	atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
-	rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
-			queue->cm_id->port_num, rsp->req.sg,
-			rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
 	rsp->n_rdma = 0;
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		nvmet_rdma_rw_ctx_destroy(rsp);
 		nvmet_req_uninit(&rsp->req);
 		nvmet_rdma_release_rsp(rsp);
 		if (wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -608,7 +770,58 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
 		return;
 	}
 
-	rsp->req.execute(&rsp->req);
+	if (rsp->req.metadata_len)
+		status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
+	nvmet_rdma_rw_ctx_destroy(rsp);
+
+	if (unlikely(status))
+		nvmet_req_complete(&rsp->req, status);
+	else
+		rsp->req.execute(&rsp->req);
+}
+
+static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct nvmet_rdma_rsp *rsp =
+		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
+	struct nvmet_rdma_queue *queue = cq->cq_context;
+	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+	u16 status;
+
+	if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+		return;
+
+	WARN_ON(rsp->n_rdma <= 0);
+	atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
+	rsp->n_rdma = 0;
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		nvmet_rdma_rw_ctx_destroy(rsp);
+		nvmet_req_uninit(&rsp->req);
+		nvmet_rdma_release_rsp(rsp);
+		if (wc->status != IB_WC_WR_FLUSH_ERR) {
+			pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n",
+				wc->wr_cqe, ib_wc_status_msg(wc->status),
+				wc->status);
+			nvmet_rdma_error_comp(queue);
+		}
+		return;
+	}
+
+	/*
+	 * Upon RDMA completion check the signature status
+	 * - if succeeded send good NVMe response
+	 * - if failed send bad NVMe response with appropriate error
+	 */
+	status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
+	if (unlikely(status))
+		rsp->req.cqe->status = cpu_to_le16(status << 1);
+	nvmet_rdma_rw_ctx_destroy(rsp);
+
+	if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) {
+		pr_err("sending cmd response failed\n");
+		nvmet_rdma_release_rsp(rsp);
+	}
 }
 
 static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
@@ -665,9 +878,9 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
 static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
 		struct nvme_keyed_sgl_desc *sgl, bool invalidate)
 {
-	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
 	u64 addr = le64_to_cpu(sgl->addr);
 	u32 key = get_unaligned_le32(sgl->key);
+	struct ib_sig_attrs sig_attrs;
 	int ret;
 
 	rsp->req.transfer_len = get_unaligned_le24(sgl->length);
@@ -676,13 +889,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
 	if (!rsp->req.transfer_len)
 		return 0;
 
-	ret = nvmet_req_alloc_sgl(&rsp->req);
+	if (rsp->req.metadata_len)
+		nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs);
+
+	ret = nvmet_req_alloc_sgls(&rsp->req);
 	if (unlikely(ret < 0))
 		goto error_out;
 
-	ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
-			rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
-			nvmet_data_dir(&rsp->req));
+	ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs);
 	if (unlikely(ret < 0))
 		goto error_out;
 	rsp->n_rdma += ret;
@@ -845,23 +1059,40 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 	nvmet_rdma_handle_command(queue, rsp);
 }
 
-static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev)
+static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq)
 {
-	if (!ndev->srq)
+	nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size,
+			     false);
+	ib_destroy_srq(nsrq->srq);
+
+	kfree(nsrq);
+}
+
+static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev)
+{
+	int i;
+
+	if (!ndev->srqs)
 		return;
 
-	nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
-	ib_destroy_srq(ndev->srq);
+	for (i = 0; i < ndev->srq_count; i++)
+		nvmet_rdma_destroy_srq(ndev->srqs[i]);
+
+	kfree(ndev->srqs);
 }
 
-static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
+static struct nvmet_rdma_srq *
+nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
 {
 	struct ib_srq_init_attr srq_attr = { NULL, };
+	size_t srq_size = ndev->srq_size;
+	struct nvmet_rdma_srq *nsrq;
 	struct ib_srq *srq;
-	size_t srq_size;
 	int ret, i;
 
-	srq_size = 4095;	/* XXX: tune */
+	nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL);
+	if (!nsrq)
+		return ERR_PTR(-ENOMEM);
 
 	srq_attr.attr.max_wr = srq_size;
 	srq_attr.attr.max_sge = 1 + ndev->inline_page_count;
@@ -869,35 +1100,73 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
 	srq_attr.srq_type = IB_SRQT_BASIC;
 	srq = ib_create_srq(ndev->pd, &srq_attr);
 	if (IS_ERR(srq)) {
-		/*
-		 * If SRQs aren't supported we just go ahead and use normal
-		 * non-shared receive queues.
-		 */
-		pr_info("SRQ requested but not supported.\n");
-		return 0;
+		ret = PTR_ERR(srq);
+		goto out_free;
 	}
 
-	ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
-	if (IS_ERR(ndev->srq_cmds)) {
-		ret = PTR_ERR(ndev->srq_cmds);
+	nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
+	if (IS_ERR(nsrq->cmds)) {
+		ret = PTR_ERR(nsrq->cmds);
 		goto out_destroy_srq;
 	}
 
-	ndev->srq = srq;
-	ndev->srq_size = srq_size;
+	nsrq->srq = srq;
+	nsrq->ndev = ndev;
 
 	for (i = 0; i < srq_size; i++) {
-		ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
+		nsrq->cmds[i].nsrq = nsrq;
+		ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]);
 		if (ret)
 			goto out_free_cmds;
 	}
 
-	return 0;
+	return nsrq;
 
 out_free_cmds:
-	nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
+	nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false);
 out_destroy_srq:
 	ib_destroy_srq(srq);
+out_free:
+	kfree(nsrq);
+	return ERR_PTR(ret);
+}
+
+static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev)
+{
+	int i, ret;
+
+	if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) {
+		/*
+		 * If SRQs aren't supported we just go ahead and use normal
+		 * non-shared receive queues.
+		 */
+		pr_info("SRQ requested but not supported.\n");
+		return 0;
+	}
+
+	ndev->srq_size = min(ndev->device->attrs.max_srq_wr,
+			     nvmet_rdma_srq_size);
+	ndev->srq_count = min(ndev->device->num_comp_vectors,
+			      ndev->device->attrs.max_srq);
+
+	ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL);
+	if (!ndev->srqs)
+		return -ENOMEM;
+
+	for (i = 0; i < ndev->srq_count; i++) {
+		ndev->srqs[i] = nvmet_rdma_init_srq(ndev);
+		if (IS_ERR(ndev->srqs[i])) {
+			ret = PTR_ERR(ndev->srqs[i]);
+			goto err_srq;
+		}
+	}
+
+	return 0;
+
+err_srq:
+	while (--i >= 0)
+		nvmet_rdma_destroy_srq(ndev->srqs[i]);
+	kfree(ndev->srqs);
 	return ret;
 }
 
@@ -910,7 +1179,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
 	list_del(&ndev->entry);
 	mutex_unlock(&device_list_mutex);
 
-	nvmet_rdma_destroy_srq(ndev);
+	nvmet_rdma_destroy_srqs(ndev);
 	ib_dealloc_pd(ndev->pd);
 
 	kfree(ndev);
@@ -957,7 +1226,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
 		goto out_free_dev;
 
 	if (nvmet_rdma_use_srq) {
-		ret = nvmet_rdma_init_srq(ndev);
+		ret = nvmet_rdma_init_srqs(ndev);
 		if (ret)
 			goto out_free_pd;
 	}
@@ -981,14 +1250,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 {
 	struct ib_qp_init_attr qp_attr;
 	struct nvmet_rdma_device *ndev = queue->dev;
-	int comp_vector, nr_cqe, ret, i, factor;
-
-	/*
-	 * Spread the io queues across completion vectors,
-	 * but still keep all admin queues on vector 0.
-	 */
-	comp_vector = !queue->host_qid ? 0 :
-		queue->idx % ndev->device->num_comp_vectors;
+	int nr_cqe, ret, i, factor;
 
 	/*
 	 * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND.
@@ -996,7 +1258,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 	nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size;
 
 	queue->cq = ib_alloc_cq(ndev->device, queue,
-			nr_cqe + 1, comp_vector,
+			nr_cqe + 1, queue->comp_vector,
 			IB_POLL_WORKQUEUE);
 	if (IS_ERR(queue->cq)) {
 		ret = PTR_ERR(queue->cq);
@@ -1020,14 +1282,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 	qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
 					ndev->device->attrs.max_send_sge);
 
-	if (ndev->srq) {
-		qp_attr.srq = ndev->srq;
+	if (queue->nsrq) {
+		qp_attr.srq = queue->nsrq->srq;
 	} else {
 		/* +1 for drain */
 		qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
 		qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
 	}
 
+	if (queue->port->pi_enable && queue->host_qid)
+		qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
+
 	ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
 	if (ret) {
 		pr_err("failed to create_qp ret= %d\n", ret);
@@ -1041,7 +1306,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 		 __func__, queue->cq->cqe, qp_attr.cap.max_send_sge,
 		 qp_attr.cap.max_send_wr, queue->cm_id);
 
-	if (!ndev->srq) {
+	if (!queue->nsrq) {
 		for (i = 0; i < queue->recv_queue_size; i++) {
 			queue->cmds[i].queue = queue;
 			ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
@@ -1076,7 +1341,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
 	nvmet_sq_destroy(&queue->nvme_sq);
 
 	nvmet_rdma_destroy_queue_ib(queue);
-	if (!queue->dev->srq) {
+	if (!queue->nsrq) {
 		nvmet_rdma_free_cmds(queue->dev, queue->cmds,
 				queue->recv_queue_size,
 				!queue->host_qid);
@@ -1146,6 +1411,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 		struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *event)
 {
+	struct nvmet_rdma_port *port = cm_id->context;
 	struct nvmet_rdma_queue *queue;
 	int ret;
 
@@ -1172,6 +1438,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 	INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
 	queue->dev = ndev;
 	queue->cm_id = cm_id;
+	queue->port = port->nport;
 
 	spin_lock_init(&queue->state_lock);
 	queue->state = NVMET_RDMA_Q_CONNECTING;
@@ -1188,13 +1455,23 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 		goto out_destroy_sq;
 	}
 
+	/*
+	 * Spread the io queues across completion vectors,
+	 * but still keep all admin queues on vector 0.
+	 */
+	queue->comp_vector = !queue->host_qid ? 0 :
+		queue->idx % ndev->device->num_comp_vectors;
+
+
 	ret = nvmet_rdma_alloc_rsps(queue);
 	if (ret) {
 		ret = NVME_RDMA_CM_NO_RSC;
 		goto out_ida_remove;
 	}
 
-	if (!ndev->srq) {
+	if (ndev->srqs) {
+		queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count];
+	} else {
 		queue->cmds = nvmet_rdma_alloc_cmds(ndev,
 				queue->recv_queue_size,
 				!queue->host_qid);
@@ -1215,7 +1492,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 	return queue;
 
 out_free_cmds:
-	if (!ndev->srq) {
+	if (!queue->nsrq) {
 		nvmet_rdma_free_cmds(queue->dev, queue->cmds,
 				queue->recv_queue_size,
 				!queue->host_qid);
@@ -1241,6 +1518,10 @@ static void nvmet_rdma_qp_event(struct ib_event *event, void *priv)
 	case IB_EVENT_COMM_EST:
 		rdma_notify(queue->cm_id, event->event);
 		break;
+	case IB_EVENT_QP_LAST_WQE_REACHED:
+		pr_debug("received last WQE reached event for queue=0x%p\n",
+			 queue);
+		break;
 	default:
 		pr_err("received IB QP event: %s (%d)\n",
 		       ib_event_msg(event->event), event->event);
@@ -1275,7 +1556,6 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
 static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *event)
 {
-	struct nvmet_rdma_port *port = cm_id->context;
 	struct nvmet_rdma_device *ndev;
 	struct nvmet_rdma_queue *queue;
 	int ret = -EINVAL;
@@ -1291,7 +1571,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 		ret = -ENOMEM;
 		goto put_device;
 	}
-	queue->port = port->nport;
 
 	if (queue->host_qid == 0) {
 		/* Let inflight controller teardown complete */
@@ -1563,6 +1842,14 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
 		goto out_destroy_id;
 	}
 
+	if (port->nport->pi_enable &&
+	    !(cm_id->device->attrs.device_cap_flags &
+	      IB_DEVICE_INTEGRITY_HANDOVER)) {
+		pr_err("T10-PI is not supported for %pISpcs\n", addr);
+		ret = -EINVAL;
+		goto out_destroy_id;
+	}
+
 	port->cm_id = cm_id;
 	return 0;
 
@@ -1672,6 +1959,8 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
 
 static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
 {
+	if (ctrl->pi_support)
+		return NVMET_RDMA_MAX_METADATA_MDTS;
 	return NVMET_RDMA_MAX_MDTS;
 }
 
@@ -1680,6 +1969,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
 	.type			= NVMF_TRTYPE_RDMA,
 	.msdbd			= 1,
 	.has_keyed_sgls		= 1,
+	.metadata_support	= 1,
 	.add_port		= nvmet_rdma_add_port,
 	.remove_port		= nvmet_rdma_remove_port,
 	.queue_response		= nvmet_rdma_queue_response,
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 4546049a96b3..1669177cd26c 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -325,6 +325,14 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
 		kernel_sock_shutdown(queue->sock, SHUT_RDWR);
 }
 
+static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status)
+{
+	if (status == -EPIPE || status == -ECONNRESET)
+		kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+	else
+		nvmet_tcp_fatal_error(queue);
+}
+
 static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
 {
 	struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
@@ -510,7 +518,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
 
 	ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu),
 			offset_in_page(cmd->data_pdu) + cmd->offset,
-			left, MSG_DONTWAIT | MSG_MORE);
+			left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
 	if (ret <= 0)
 		return ret;
 
@@ -538,7 +546,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
 		if ((!last_in_batch && cmd->queue->send_list_len) ||
 		    cmd->wbytes_done + left < cmd->req.transfer_len ||
 		    queue->data_digest || !queue->nvme_sq.sqhd_disabled)
-			flags |= MSG_MORE;
+			flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
 
 		ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
 					left, flags);
@@ -585,7 +593,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
 	int ret;
 
 	if (!last_in_batch && cmd->queue->send_list_len)
-		flags |= MSG_MORE;
+		flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
 	else
 		flags |= MSG_EOR;
 
@@ -614,7 +622,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
 	int ret;
 
 	if (!last_in_batch && cmd->queue->send_list_len)
-		flags |= MSG_MORE;
+		flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
 	else
 		flags |= MSG_EOR;
 
@@ -644,6 +652,8 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
 
 	if (!last_in_batch && cmd->queue->send_list_len)
 		msg.msg_flags |= MSG_MORE;
+	else
+		msg.msg_flags |= MSG_EOR;
 
 	ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
 	if (unlikely(ret <= 0))
@@ -716,11 +726,15 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
 
 	for (i = 0; i < budget; i++) {
 		ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
-		if (ret <= 0)
+		if (unlikely(ret < 0)) {
+			nvmet_tcp_socket_error(queue, ret);
+			goto done;
+		} else if (ret == 0) {
 			break;
+		}
 		(*sends)++;
 	}
-
+done:
 	return ret;
 }
 
@@ -1157,11 +1171,15 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
 
 	for (i = 0; i < budget; i++) {
 		ret = nvmet_tcp_try_recv_one(queue);
-		if (ret <= 0)
+		if (unlikely(ret < 0)) {
+			nvmet_tcp_socket_error(queue, ret);
+			goto done;
+		} else if (ret == 0) {
 			break;
+		}
 		(*recvs)++;
 	}
-
+done:
 	return ret;
 }
 
@@ -1186,27 +1204,16 @@ static void nvmet_tcp_io_work(struct work_struct *w)
 		pending = false;
 
 		ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
-		if (ret > 0) {
+		if (ret > 0)
 			pending = true;
-		} else if (ret < 0) {
-			if (ret == -EPIPE || ret == -ECONNRESET)
-				kernel_sock_shutdown(queue->sock, SHUT_RDWR);
-			else
-				nvmet_tcp_fatal_error(queue);
+		else if (ret < 0)
 			return;
-		}
 
 		ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
-		if (ret > 0) {
-			/* transmitted message/data */
+		if (ret > 0)
 			pending = true;
-		} else if (ret < 0) {
-			if (ret == -EPIPE || ret == -ECONNRESET)
-				kernel_sock_shutdown(queue->sock, SHUT_RDWR);
-			else
-				nvmet_tcp_fatal_error(queue);
+		else if (ret < 0)
 			return;
-		}
 
 	} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
 
diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h
index e645caa882dd..0458046d6501 100644
--- a/drivers/nvme/target/trace.h
+++ b/drivers/nvme/target/trace.h
@@ -130,6 +130,34 @@ TRACE_EVENT(nvmet_req_complete,
 
 );
 
+#define aer_name(aer) { aer, #aer }
+
+TRACE_EVENT(nvmet_async_event,
+	TP_PROTO(struct nvmet_ctrl *ctrl, __le32 result),
+	TP_ARGS(ctrl, result),
+	TP_STRUCT__entry(
+		__field(int, ctrl_id)
+		__field(u32, result)
+	),
+	TP_fast_assign(
+		__entry->ctrl_id = ctrl->cntlid;
+		__entry->result = (le32_to_cpu(result) & 0xff00) >> 8;
+	),
+	TP_printk("nvmet%d: NVME_AEN=%#08x [%s]",
+		__entry->ctrl_id, __entry->result,
+		__print_symbolic(__entry->result,
+		aer_name(NVME_AER_NOTICE_NS_CHANGED),
+		aer_name(NVME_AER_NOTICE_ANA),
+		aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
+		aer_name(NVME_AER_NOTICE_DISC_CHANGED),
+		aer_name(NVME_AER_ERROR),
+		aer_name(NVME_AER_SMART),
+		aer_name(NVME_AER_CSS),
+		aer_name(NVME_AER_VS))
+	)
+);
+#undef aer_name
+
 #endif /* _TRACE_NVMET_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
index 12ea4a4ad607..6c9edc8bbc95 100644
--- a/drivers/oprofile/event_buffer.c
+++ b/drivers/oprofile/event_buffer.c
@@ -113,7 +113,7 @@ static int event_buffer_open(struct inode *inode, struct file *file)
 {
 	int err = -EPERM;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!perfmon_capable())
 		return -EPERM;
 
 	if (test_and_set_bit_lock(0, &buffer_opened))
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index 91bfdb784829..ae36edb1d7db 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -258,6 +258,16 @@ config PCI_HYPERV_INTERFACE
 	  The Hyper-V PCI Interface is a helper driver allows other drivers to
 	  have a common interface with the Hyper-V PCI frontend driver.
 
+config PCI_LOONGSON
+	bool "LOONGSON PCI Controller"
+	depends on MACH_LOONGSON64 || COMPILE_TEST
+	depends on OF
+	depends on PCI_QUIRKS
+	default MACH_LOONGSON64
+	help
+	  Say Y here if you want to enable PCI controller support on
+	  Loongson systems.
+
 source "drivers/pci/controller/dwc/Kconfig"
 source "drivers/pci/controller/mobiveil/Kconfig"
 source "drivers/pci/controller/cadence/Kconfig"
diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
index 158c59771824..fbac4b0190a0 100644
--- a/drivers/pci/controller/Makefile
+++ b/drivers/pci/controller/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_PCIE_MEDIATEK) += pcie-mediatek.o
 obj-$(CONFIG_PCIE_TANGO_SMP8759) += pcie-tango.o
 obj-$(CONFIG_VMD) += vmd.o
 obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o
+obj-$(CONFIG_PCI_LOONGSON) += pci-loongson.o
 # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
 obj-y				+= dwc/
 obj-y				+= mobiveil/
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index e15022ff63e3..222ff5639ebe 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -1356,11 +1356,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 {
 	struct irq_cfg *cfg = irqd_cfg(data);
 	struct hv_pcibus_device *hbus;
+	struct vmbus_channel *channel;
 	struct hv_pci_dev *hpdev;
 	struct pci_bus *pbus;
 	struct pci_dev *pdev;
 	struct cpumask *dest;
-	unsigned long flags;
 	struct compose_comp_ctxt comp;
 	struct tran_int_desc *int_desc;
 	struct {
@@ -1378,6 +1378,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	dest = irq_data_get_effective_affinity_mask(data);
 	pbus = pdev->bus;
 	hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
+	channel = hbus->hdev->channel;
 	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
 	if (!hpdev)
 		goto return_null_message;
@@ -1436,42 +1437,51 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	}
 
 	/*
+	 * Prevents hv_pci_onchannelcallback() from running concurrently
+	 * in the tasklet.
+	 */
+	tasklet_disable(&channel->callback_event);
+
+	/*
 	 * Since this function is called with IRQ locks held, can't
 	 * do normal wait for completion; instead poll.
 	 */
 	while (!try_wait_for_completion(&comp.comp_pkt.host_event)) {
+		unsigned long flags;
+
 		/* 0xFFFF means an invalid PCI VENDOR ID. */
 		if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
 			dev_err_once(&hbus->hdev->device,
 				     "the device has gone\n");
-			goto free_int_desc;
+			goto enable_tasklet;
 		}
 
 		/*
-		 * When the higher level interrupt code calls us with
-		 * interrupt disabled, we must poll the channel by calling
-		 * the channel callback directly when channel->target_cpu is
-		 * the current CPU. When the higher level interrupt code
-		 * calls us with interrupt enabled, let's add the
-		 * local_irq_save()/restore() to avoid race:
-		 * hv_pci_onchannelcallback() can also run in tasklet.
+		 * Make sure that the ring buffer data structure doesn't get
+		 * freed while we dereference the ring buffer pointer.  Test
+		 * for the channel's onchannel_callback being NULL within a
+		 * sched_lock critical section.  See also the inline comments
+		 * in vmbus_reset_channel_cb().
 		 */
-		local_irq_save(flags);
-
-		if (hbus->hdev->channel->target_cpu == smp_processor_id())
-			hv_pci_onchannelcallback(hbus);
-
-		local_irq_restore(flags);
+		spin_lock_irqsave(&channel->sched_lock, flags);
+		if (unlikely(channel->onchannel_callback == NULL)) {
+			spin_unlock_irqrestore(&channel->sched_lock, flags);
+			goto enable_tasklet;
+		}
+		hv_pci_onchannelcallback(hbus);
+		spin_unlock_irqrestore(&channel->sched_lock, flags);
 
 		if (hpdev->state == hv_pcichild_ejecting) {
 			dev_err_once(&hbus->hdev->device,
 				     "the device is being ejected\n");
-			goto free_int_desc;
+			goto enable_tasklet;
 		}
 
 		udelay(100);
 	}
 
+	tasklet_enable(&channel->callback_event);
+
 	if (comp.comp_pkt.completion_status < 0) {
 		dev_err(&hbus->hdev->device,
 			"Request for interrupt failed: 0x%x",
@@ -1495,6 +1505,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	put_pcichild(hpdev);
 	return;
 
+enable_tasklet:
+	tasklet_enable(&channel->callback_event);
 free_int_desc:
 	kfree(int_desc);
 drop_reference:
diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c
new file mode 100644
index 000000000000..459009c8a4a0
--- /dev/null
+++ b/drivers/pci/controller/pci-loongson.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Loongson PCI Host Controller Driver
+ *
+ * Copyright (C) 2020 Jiaxun Yang <jiaxun.yang@flygoat.com>
+ */
+
+#include <linux/of_device.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#include "../pci.h"
+
+/* Device IDs */
+#define DEV_PCIE_PORT_0	0x7a09
+#define DEV_PCIE_PORT_1	0x7a19
+#define DEV_PCIE_PORT_2	0x7a29
+
+#define DEV_LS2K_APB	0x7a02
+#define DEV_LS7A_CONF	0x7a10
+#define DEV_LS7A_LPC	0x7a0c
+
+#define FLAG_CFG0	BIT(0)
+#define FLAG_CFG1	BIT(1)
+#define FLAG_DEV_FIX	BIT(2)
+
+struct loongson_pci {
+	void __iomem *cfg0_base;
+	void __iomem *cfg1_base;
+	struct platform_device *pdev;
+	u32 flags;
+};
+
+/* Fixup wrong class code in PCIe bridges */
+static void bridge_class_quirk(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_PCI << 8;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LOONGSON,
+			DEV_PCIE_PORT_0, bridge_class_quirk);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LOONGSON,
+			DEV_PCIE_PORT_1, bridge_class_quirk);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LOONGSON,
+			DEV_PCIE_PORT_2, bridge_class_quirk);
+
+static void system_bus_quirk(struct pci_dev *pdev)
+{
+	/*
+	 * The address space consumed by these devices is outside the
+	 * resources of the host bridge.
+	 */
+	pdev->mmio_always_on = 1;
+	pdev->non_compliant_bars = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS2K_APB, system_bus_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_CONF, system_bus_quirk);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+			DEV_LS7A_LPC, system_bus_quirk);
+
+static void loongson_mrrs_quirk(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->bus;
+	struct pci_dev *bridge;
+	static const struct pci_device_id bridge_devids[] = {
+		{ PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_0) },
+		{ PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_1) },
+		{ PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_2) },
+		{ 0, },
+	};
+
+	/* look for the matching bridge */
+	while (!pci_is_root_bus(bus)) {
+		bridge = bus->self;
+		bus = bus->parent;
+		/*
+		 * Some Loongson PCIe ports have a h/w limitation of
+		 * 256 bytes maximum read request size. They can't handle
+		 * anything larger than this. So force this limit on
+		 * any devices attached under these ports.
+		 */
+		if (pci_match_id(bridge_devids, bridge)) {
+			if (pcie_get_readrq(dev) > 256) {
+				pci_info(dev, "limiting MRRS to 256\n");
+				pcie_set_readrq(dev, 256);
+			}
+			break;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, loongson_mrrs_quirk);
+
+static void __iomem *cfg1_map(struct loongson_pci *priv, int bus,
+				unsigned int devfn, int where)
+{
+	unsigned long addroff = 0x0;
+
+	if (bus != 0)
+		addroff |= BIT(28); /* Type 1 Access */
+	addroff |= (where & 0xff) | ((where & 0xf00) << 16);
+	addroff |= (bus << 16) | (devfn << 8);
+	return priv->cfg1_base + addroff;
+}
+
+static void __iomem *cfg0_map(struct loongson_pci *priv, int bus,
+				unsigned int devfn, int where)
+{
+	unsigned long addroff = 0x0;
+
+	if (bus != 0)
+		addroff |= BIT(24); /* Type 1 Access */
+	addroff |= (bus << 16) | (devfn << 8) | where;
+	return priv->cfg0_base + addroff;
+}
+
+static void __iomem *pci_loongson_map_bus(struct pci_bus *bus, unsigned int devfn,
+			       int where)
+{
+	unsigned char busnum = bus->number;
+	struct pci_host_bridge *bridge = pci_find_host_bridge(bus);
+	struct loongson_pci *priv =  pci_host_bridge_priv(bridge);
+
+	/*
+	 * Do not read more than one device on the bus other than
+	 * the host bus. For our hardware the root bus is always bus 0.
+	 */
+	if (priv->flags & FLAG_DEV_FIX && busnum != 0 &&
+		PCI_SLOT(devfn) > 0)
+		return NULL;
+
+	/* CFG0 can only access standard space */
+	if (where < PCI_CFG_SPACE_SIZE && priv->cfg0_base)
+		return cfg0_map(priv, busnum, devfn, where);
+
+	/* CFG1 can access extended space */
+	if (where < PCI_CFG_SPACE_EXP_SIZE && priv->cfg1_base)
+		return cfg1_map(priv, busnum, devfn, where);
+
+	return NULL;
+}
+
+static int loongson_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	int irq;
+	u8 val;
+
+	irq = of_irq_parse_and_map_pci(dev, slot, pin);
+	if (irq > 0)
+		return irq;
+
+	/* Care i8259 legacy systems */
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &val);
+	/* i8259 only have 15 IRQs */
+	if (val > 15)
+		return 0;
+
+	return val;
+}
+
+/* H/w only accept 32-bit PCI operations */
+static struct pci_ops loongson_pci_ops = {
+	.map_bus = pci_loongson_map_bus,
+	.read	= pci_generic_config_read32,
+	.write	= pci_generic_config_write32,
+};
+
+static const struct of_device_id loongson_pci_of_match[] = {
+	{ .compatible = "loongson,ls2k-pci",
+		.data = (void *)(FLAG_CFG0 | FLAG_CFG1 | FLAG_DEV_FIX), },
+	{ .compatible = "loongson,ls7a-pci",
+		.data = (void *)(FLAG_CFG0 | FLAG_CFG1 | FLAG_DEV_FIX), },
+	{ .compatible = "loongson,rs780e-pci",
+		.data = (void *)(FLAG_CFG0), },
+	{}
+};
+
+static int loongson_pci_probe(struct platform_device *pdev)
+{
+	struct loongson_pci *priv;
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct pci_host_bridge *bridge;
+	struct resource *regs;
+	int err;
+
+	if (!node)
+		return -ENODEV;
+
+	bridge = devm_pci_alloc_host_bridge(dev, sizeof(*priv));
+	if (!bridge)
+		return -ENODEV;
+
+	priv = pci_host_bridge_priv(bridge);
+	priv->pdev = pdev;
+	priv->flags = (unsigned long)of_device_get_match_data(dev);
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs) {
+		dev_err(dev, "missing mem resources for cfg0\n");
+		return -EINVAL;
+	}
+
+	priv->cfg0_base = devm_pci_remap_cfg_resource(dev, regs);
+	if (IS_ERR(priv->cfg0_base))
+		return PTR_ERR(priv->cfg0_base);
+
+	/* CFG1 is optional */
+	if (priv->flags & FLAG_CFG1) {
+		regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		if (!regs)
+			dev_info(dev, "missing mem resource for cfg1\n");
+		else {
+			priv->cfg1_base = devm_pci_remap_cfg_resource(dev, regs);
+			if (IS_ERR(priv->cfg1_base))
+				priv->cfg1_base = NULL;
+		}
+	}
+
+	err = pci_parse_request_of_pci_ranges(dev, &bridge->windows,
+						&bridge->dma_ranges, NULL);
+	if (err) {
+		dev_err(dev, "failed to get bridge resources\n");
+		return err;
+	}
+
+	bridge->dev.parent = dev;
+	bridge->sysdata = priv;
+	bridge->ops = &loongson_pci_ops;
+	bridge->map_irq = loongson_map_irq;
+
+	err = pci_host_probe(bridge);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static struct platform_driver loongson_pci_driver = {
+	.driver = {
+		.name = "loongson-pci",
+		.of_match_table = loongson_pci_of_match,
+	},
+	.probe = loongson_pci_probe,
+};
+builtin_platform_driver(loongson_pci_driver);
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index b3869951c0eb..b4c92cee13f8 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -385,19 +385,12 @@ static unsigned char acpiphp_max_busnr(struct pci_bus *bus)
 static void acpiphp_set_acpi_region(struct acpiphp_slot *slot)
 {
 	struct acpiphp_func *func;
-	union acpi_object params[2];
-	struct acpi_object_list arg_list;
 
 	list_for_each_entry(func, &slot->funcs, sibling) {
-		arg_list.count = 2;
-		arg_list.pointer = params;
-		params[0].type = ACPI_TYPE_INTEGER;
-		params[0].integer.value = ACPI_ADR_SPACE_PCI_CONFIG;
-		params[1].type = ACPI_TYPE_INTEGER;
-		params[1].integer.value = 1;
 		/* _REG is optional, we don't care about if there is failure */
-		acpi_evaluate_object(func_to_handle(func), "_REG", &arg_list,
-				     NULL);
+		acpi_evaluate_reg(func_to_handle(func),
+				  ACPI_ADR_SPACE_PCI_CONFIG,
+				  ACPI_REG_CONNECT);
 	}
 }
 
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 312cc45c44c7..bf779f291f15 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -275,7 +275,7 @@ static int pciehp_suspend(struct pcie_device *dev)
 	 * If the port is already runtime suspended we can keep it that
 	 * way.
 	 */
-	if (dev_pm_smart_suspend_and_suspended(&dev->port->dev))
+	if (dev_pm_skip_suspend(&dev->port->dev))
 		return 0;
 
 	pciehp_disable_interrupt(dev);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 0454ca0e4e3f..da6510af1221 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -776,7 +776,7 @@ static int pci_pm_suspend(struct device *dev)
 
 static int pci_pm_suspend_late(struct device *dev)
 {
-	if (dev_pm_smart_suspend_and_suspended(dev))
+	if (dev_pm_skip_suspend(dev))
 		return 0;
 
 	pci_fixup_device(pci_fixup_suspend, to_pci_dev(dev));
@@ -789,10 +789,8 @@ static int pci_pm_suspend_noirq(struct device *dev)
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
-	if (dev_pm_smart_suspend_and_suspended(dev)) {
-		dev->power.may_skip_resume = true;
+	if (dev_pm_skip_suspend(dev))
 		return 0;
-	}
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_suspend_late(dev, PMSG_SUSPEND);
@@ -880,8 +878,8 @@ Fixup:
 	 * pci_pm_complete() to take care of fixing up the device's state
 	 * anyway, if need be.
 	 */
-	dev->power.may_skip_resume = device_may_wakeup(dev) ||
-					!device_can_wakeup(dev);
+	if (device_can_wakeup(dev) && !device_may_wakeup(dev))
+		dev->power.may_skip_resume = false;
 
 	return 0;
 }
@@ -893,18 +891,10 @@ static int pci_pm_resume_noirq(struct device *dev)
 	pci_power_t prev_state = pci_dev->current_state;
 	bool skip_bus_pm = pci_dev->skip_bus_pm;
 
-	if (dev_pm_may_skip_resume(dev))
+	if (dev_pm_skip_resume(dev))
 		return 0;
 
 	/*
-	 * Devices with DPM_FLAG_SMART_SUSPEND may be left in runtime suspend
-	 * during system suspend, so update their runtime PM status to "active"
-	 * as they are going to be put into D0 shortly.
-	 */
-	if (dev_pm_smart_suspend_and_suspended(dev))
-		pm_runtime_set_active(dev);
-
-	/*
 	 * In the suspend-to-idle case, devices left in D0 during suspend will
 	 * stay in D0, so it is not necessary to restore or update their
 	 * configuration here and attempting to put them into D0 again is
@@ -928,6 +918,14 @@ static int pci_pm_resume_noirq(struct device *dev)
 	return 0;
 }
 
+static int pci_pm_resume_early(struct device *dev)
+{
+	if (dev_pm_skip_resume(dev))
+		return 0;
+
+	return pm_generic_resume_early(dev);
+}
+
 static int pci_pm_resume(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
@@ -961,6 +959,7 @@ static int pci_pm_resume(struct device *dev)
 #define pci_pm_suspend_late	NULL
 #define pci_pm_suspend_noirq	NULL
 #define pci_pm_resume		NULL
+#define pci_pm_resume_early	NULL
 #define pci_pm_resume_noirq	NULL
 
 #endif /* !CONFIG_SUSPEND */
@@ -1127,7 +1126,7 @@ static int pci_pm_poweroff(struct device *dev)
 
 static int pci_pm_poweroff_late(struct device *dev)
 {
-	if (dev_pm_smart_suspend_and_suspended(dev))
+	if (dev_pm_skip_suspend(dev))
 		return 0;
 
 	pci_fixup_device(pci_fixup_suspend, to_pci_dev(dev));
@@ -1140,7 +1139,7 @@ static int pci_pm_poweroff_noirq(struct device *dev)
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
-	if (dev_pm_smart_suspend_and_suspended(dev))
+	if (dev_pm_skip_suspend(dev))
 		return 0;
 
 	if (pci_has_legacy_pm_support(pci_dev))
@@ -1358,6 +1357,7 @@ static const struct dev_pm_ops pci_dev_pm_ops = {
 	.suspend = pci_pm_suspend,
 	.suspend_late = pci_pm_suspend_late,
 	.resume = pci_pm_resume,
+	.resume_early = pci_pm_resume_early,
 	.freeze = pci_pm_freeze,
 	.thaw = pci_pm_thaw,
 	.poweroff = pci_pm_poweroff,
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 160d67c59310..3acf151ae015 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -115,7 +115,7 @@ static int pcie_portdrv_probe(struct pci_dev *dev,
 
 	pci_save_state(dev);
 
-	dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NEVER_SKIP |
+	dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE |
 					   DPM_FLAG_SMART_SUSPEND);
 
 	if (pci_bridge_d3_possible(dev)) {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 77b8a145c39b..d9c2c3301a8a 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1822,7 +1822,7 @@ int pci_setup_device(struct pci_dev *dev)
 	/* Device class may be changed after fixup */
 	class = dev->class >> 8;
 
-	if (dev->non_compliant_bars) {
+	if (dev->non_compliant_bars && !dev->mmio_always_on) {
 		pci_read_config_word(dev, PCI_COMMAND, &cmd);
 		if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
 			pci_info(dev, "device has non-compliant BARs; disabling IO/MEM decoding\n");
diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c
index f2741c04289d..35158cfd9c1a 100644
--- a/drivers/pcmcia/electra_cf.c
+++ b/drivers/pcmcia/electra_cf.c
@@ -178,10 +178,9 @@ static int electra_cf_probe(struct platform_device *ofdev)
 	struct device_node *np = ofdev->dev.of_node;
 	struct electra_cf_socket   *cf;
 	struct resource mem, io;
-	int status;
+	int status = -ENOMEM;
 	const unsigned int *prop;
 	int err;
-	struct vm_struct *area;
 
 	err = of_address_to_resource(np, 0, &mem);
 	if (err)
@@ -202,30 +201,19 @@ static int electra_cf_probe(struct platform_device *ofdev)
 	cf->mem_phys = mem.start;
 	cf->mem_size = PAGE_ALIGN(resource_size(&mem));
 	cf->mem_base = ioremap(cf->mem_phys, cf->mem_size);
+	if (!cf->mem_base)
+		goto out_free_cf;
 	cf->io_size = PAGE_ALIGN(resource_size(&io));
-
-	area = __get_vm_area(cf->io_size, 0, PHB_IO_BASE, PHB_IO_END);
-	if (area == NULL) {
-		status = -ENOMEM;
-		goto fail1;
-	}
-
-	cf->io_virt = (void __iomem *)(area->addr);
+	cf->io_virt = ioremap_phb(io.start, cf->io_size);
+	if (!cf->io_virt)
+		goto out_unmap_mem;
 
 	cf->gpio_base = ioremap(0xfc103000, 0x1000);
+	if (!cf->gpio_base)
+		goto out_unmap_virt;
 	dev_set_drvdata(device, cf);
 
-	if (!cf->mem_base || !cf->io_virt || !cf->gpio_base ||
-	    (__ioremap_at(io.start, cf->io_virt, cf->io_size,
-			  pgprot_noncached(PAGE_KERNEL)) == NULL)) {
-		dev_err(device, "can't ioremap ranges\n");
-		status = -ENOMEM;
-		goto fail1;
-	}
-
-
 	cf->io_base = (unsigned long)cf->io_virt - VMALLOC_END;
-
 	cf->iomem.start = (unsigned long)cf->mem_base;
 	cf->iomem.end = (unsigned long)cf->mem_base + (mem.end - mem.start);
 	cf->iomem.flags = IORESOURCE_MEM;
@@ -305,14 +293,13 @@ fail1:
 	if (cf->irq)
 		free_irq(cf->irq, cf);
 
-	if (cf->io_virt)
-		__iounmap_at(cf->io_virt, cf->io_size);
-	if (cf->mem_base)
-		iounmap(cf->mem_base);
-	if (cf->gpio_base)
-		iounmap(cf->gpio_base);
-	if (area)
-		device_init_wakeup(&ofdev->dev, 0);
+	iounmap(cf->gpio_base);
+out_unmap_virt:
+	device_init_wakeup(&ofdev->dev, 0);
+	iounmap(cf->io_virt);
+out_unmap_mem:
+	iounmap(cf->mem_base);
+out_free_cf:
 	kfree(cf);
 	return status;
 
@@ -330,7 +317,7 @@ static int electra_cf_remove(struct platform_device *ofdev)
 	free_irq(cf->irq, cf);
 	del_timer_sync(&cf->timer);
 
-	__iounmap_at(cf->io_virt, cf->io_size);
+	iounmap(cf->io_virt);
 	iounmap(cf->mem_base);
 	iounmap(cf->gpio_base);
 	release_mem_region(cf->mem_phys, cf->mem_size);
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 09ae8a970880..a9261cf48293 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -79,13 +79,6 @@ config FSL_IMX8_DDR_PMU
 	  can give information about memory throughput and other related
 	  events.
 
-config HISI_PMU
-       bool "HiSilicon SoC PMU"
-       depends on ARM64 && ACPI
-       help
-         Support for HiSilicon SoC uncore performance monitoring
-         unit (PMU), such as: L3C, HHA and DDRC.
-
 config QCOM_L2_PMU
 	bool "Qualcomm Technologies L2-cache PMU"
 	depends on ARCH_QCOM && ARM64 && ACPI
@@ -129,4 +122,6 @@ config ARM_SPE_PMU
 	  Extension, which provides periodic sampling of operations in
 	  the CPU pipeline and reports this via the perf AUX interface.
 
+source "drivers/perf/hisilicon/Kconfig"
+
 endmenu
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index 70968c8c09d7..518d0603e24f 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -690,10 +690,8 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_warn(&pdev->dev, "Failed to find IRQ\n");
+	if (irq < 0)
 		return -EINVAL;
-	}
 
 	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_%d",
 				PMUNAME, atomic_inc_return(&pmu_idx));
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index f01a57e5a5f3..48e28ef93a70 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -814,7 +814,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 	if (err) {
 		dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
 			err, &res_0->start);
-		return err;
+		goto out_clear_affinity;
 	}
 
 	err = perf_pmu_register(&smmu_pmu->pmu, name, -1);
@@ -833,6 +833,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 
 out_unregister:
 	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
+out_clear_affinity:
+	irq_set_affinity_hint(smmu_pmu->irq, NULL);
 	return err;
 }
 
@@ -842,6 +844,7 @@ static int smmu_pmu_remove(struct platform_device *pdev)
 
 	perf_pmu_unregister(&smmu_pmu->pmu);
 	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
+	irq_set_affinity_hint(smmu_pmu->irq, NULL);
 
 	return 0;
 }
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index b72c04852599..d80f48798bce 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -274,7 +274,7 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event)
 	if (!attr->exclude_kernel)
 		reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT);
 
-	if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && capable(CAP_SYS_ADMIN))
+	if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable())
 		reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT);
 
 	return reg;
@@ -700,7 +700,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
 		return -EOPNOTSUPP;
 
 	reg = arm_spe_event_to_pmscr(event);
-	if (!capable(CAP_SYS_ADMIN) &&
+	if (!perfmon_capable() &&
 	    (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) |
 		    BIT(SYS_PMSCR_EL1_CX_SHIFT) |
 		    BIT(SYS_PMSCR_EL1_PCT_SHIFT))))
@@ -1133,10 +1133,8 @@ static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu)
 	struct platform_device *pdev = spe_pmu->pdev;
 	int irq = platform_get_irq(pdev, 0);
 
-	if (irq < 0) {
-		dev_err(&pdev->dev, "failed to get IRQ (%d)\n", irq);
+	if (irq < 0)
 		return -ENXIO;
-	}
 
 	if (!irq_is_percpu(irq)) {
 		dev_err(&pdev->dev, "expected PPI but got SPI (%d)\n", irq);
diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig
new file mode 100644
index 000000000000..c5d1b7019fff
--- /dev/null
+++ b/drivers/perf/hisilicon/Kconfig
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config HISI_PMU
+	tristate "HiSilicon SoC PMU drivers"
+	depends on ARM64 && ACPI
+	  help
+	  Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home
+	  Agent performance monitor and DDR Controller performance monitor.
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
index c3a96ec2bf66..e8377061845f 100644
--- a/drivers/perf/hisilicon/Makefile
+++ b/drivers/perf/hisilicon/Makefile
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o
+obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
+			  hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index 453f1c6a16ca..15713faaa07e 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -394,8 +394,9 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
 	ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1);
 	if (ret) {
 		dev_err(ddrc_pmu->dev, "DDRC PMU register failed!\n");
-		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
-					    &ddrc_pmu->node);
+		cpuhp_state_remove_instance_nocalls(
+			CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, &ddrc_pmu->node);
+		irq_set_affinity_hint(ddrc_pmu->irq, NULL);
 	}
 
 	return ret;
@@ -406,8 +407,9 @@ static int hisi_ddrc_pmu_remove(struct platform_device *pdev)
 	struct hisi_pmu *ddrc_pmu = platform_get_drvdata(pdev);
 
 	perf_pmu_unregister(&ddrc_pmu->pmu);
-	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
-				    &ddrc_pmu->node);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+					    &ddrc_pmu->node);
+	irq_set_affinity_hint(ddrc_pmu->irq, NULL);
 
 	return 0;
 }
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index 6a1dd72d8abb..dcc5600788a9 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -283,7 +283,7 @@ static struct attribute *hisi_hha_pmu_events_attr[] = {
 	HISI_PMU_EVENT_ATTR(rx_wbip,		0x05),
 	HISI_PMU_EVENT_ATTR(rx_wtistash,	0x11),
 	HISI_PMU_EVENT_ATTR(rd_ddr_64b,		0x1c),
-	HISI_PMU_EVENT_ATTR(wr_dr_64b,		0x1d),
+	HISI_PMU_EVENT_ATTR(wr_ddr_64b,		0x1d),
 	HISI_PMU_EVENT_ATTR(rd_ddr_128b,	0x1e),
 	HISI_PMU_EVENT_ATTR(wr_ddr_128b,	0x1f),
 	HISI_PMU_EVENT_ATTR(spill_num,		0x20),
@@ -406,8 +406,9 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
 	ret = perf_pmu_register(&hha_pmu->pmu, name, -1);
 	if (ret) {
 		dev_err(hha_pmu->dev, "HHA PMU register failed!\n");
-		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
-					    &hha_pmu->node);
+		cpuhp_state_remove_instance_nocalls(
+			CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node);
+		irq_set_affinity_hint(hha_pmu->irq, NULL);
 	}
 
 	return ret;
@@ -418,8 +419,9 @@ static int hisi_hha_pmu_remove(struct platform_device *pdev)
 	struct hisi_pmu *hha_pmu = platform_get_drvdata(pdev);
 
 	perf_pmu_unregister(&hha_pmu->pmu);
-	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
-				    &hha_pmu->node);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+					    &hha_pmu->node);
+	irq_set_affinity_hint(hha_pmu->irq, NULL);
 
 	return 0;
 }
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 1151e99b241c..8dd1278bec04 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -396,8 +396,9 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
 	ret = perf_pmu_register(&l3c_pmu->pmu, name, -1);
 	if (ret) {
 		dev_err(l3c_pmu->dev, "L3C PMU register failed!\n");
-		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
-					    &l3c_pmu->node);
+		cpuhp_state_remove_instance_nocalls(
+			CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node);
+		irq_set_affinity_hint(l3c_pmu->irq, NULL);
 	}
 
 	return ret;
@@ -408,8 +409,9 @@ static int hisi_l3c_pmu_remove(struct platform_device *pdev)
 	struct hisi_pmu *l3c_pmu = platform_get_drvdata(pdev);
 
 	perf_pmu_unregister(&l3c_pmu->pmu);
-	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
-				    &l3c_pmu->node);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+					    &l3c_pmu->node);
+	irq_set_affinity_hint(l3c_pmu->irq, NULL);
 
 	return 0;
 }
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 584de8f807cc..97aff877a4e7 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -35,6 +35,7 @@ ssize_t hisi_format_sysfs_show(struct device *dev,
 
 	return sprintf(buf, "%s\n", (char *)eattr->var);
 }
+EXPORT_SYMBOL_GPL(hisi_format_sysfs_show);
 
 /*
  * PMU event attributes
@@ -48,6 +49,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev,
 
 	return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var);
 }
+EXPORT_SYMBOL_GPL(hisi_event_sysfs_show);
 
 /*
  * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show
@@ -59,6 +61,7 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev,
 
 	return sprintf(buf, "%d\n", hisi_pmu->on_cpu);
 }
+EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show);
 
 static bool hisi_validate_event_group(struct perf_event *event)
 {
@@ -97,6 +100,7 @@ int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx)
 {
 	return idx >= 0 && idx < hisi_pmu->num_counters;
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_counter_valid);
 
 int hisi_uncore_pmu_get_event_idx(struct perf_event *event)
 {
@@ -113,6 +117,7 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event)
 
 	return idx;
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx);
 
 static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
 {
@@ -173,6 +178,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init);
 
 /*
  * Set the counter to count the event that we're interested in,
@@ -220,6 +226,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event)
 	/* Write start value to the hardware event counter */
 	hisi_pmu->ops->write_counter(hisi_pmu, hwc, val);
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period);
 
 void hisi_uncore_pmu_event_update(struct perf_event *event)
 {
@@ -240,6 +247,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event)
 		HISI_MAX_PERIOD(hisi_pmu->counter_bits);
 	local64_add(delta, &event->count);
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update);
 
 void hisi_uncore_pmu_start(struct perf_event *event, int flags)
 {
@@ -262,6 +270,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags)
 	hisi_uncore_pmu_enable_event(event);
 	perf_event_update_userpage(event);
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start);
 
 void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
 {
@@ -278,6 +287,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
 	hisi_uncore_pmu_event_update(event);
 	hwc->state |= PERF_HES_UPTODATE;
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop);
 
 int hisi_uncore_pmu_add(struct perf_event *event, int flags)
 {
@@ -300,6 +310,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add);
 
 void hisi_uncore_pmu_del(struct perf_event *event, int flags)
 {
@@ -311,12 +322,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags)
 	perf_event_update_userpage(event);
 	hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL;
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del);
 
 void hisi_uncore_pmu_read(struct perf_event *event)
 {
 	/* Read hardware counter and update the perf counter statistics */
 	hisi_uncore_pmu_event_update(event);
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read);
 
 void hisi_uncore_pmu_enable(struct pmu *pmu)
 {
@@ -329,6 +342,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu)
 
 	hisi_pmu->ops->start_counters(hisi_pmu);
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable);
 
 void hisi_uncore_pmu_disable(struct pmu *pmu)
 {
@@ -336,6 +350,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu)
 
 	hisi_pmu->ops->stop_counters(hisi_pmu);
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable);
 
 
 /*
@@ -414,10 +429,11 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
 	hisi_pmu->on_cpu = cpu;
 
 	/* Overflow interrupt also should use the same CPU */
-	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
+	WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(cpu)));
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu);
 
 int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 {
@@ -446,7 +462,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 	perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
 	/* Use this CPU for event counting */
 	hisi_pmu->on_cpu = target;
-	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
+	WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(target)));
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/chrome/chromeos_pstore.c b/drivers/platform/chrome/chromeos_pstore.c
index d13770785fb5..fa51153688b4 100644
--- a/drivers/platform/chrome/chromeos_pstore.c
+++ b/drivers/platform/chrome/chromeos_pstore.c
@@ -57,7 +57,7 @@ static struct ramoops_platform_data chromeos_ramoops_data = {
 	.record_size	= 0x40000,
 	.console_size	= 0x20000,
 	.ftrace_size	= 0x20000,
-	.dump_oops	= 1,
+	.max_reason	= KMSG_DUMP_OOPS,
 };
 
 static struct platform_device chromeos_ramoops = {
diff --git a/drivers/platform/mips/Kconfig b/drivers/platform/mips/Kconfig
index 5e77b0dc5fd6..8ac149173c64 100644
--- a/drivers/platform/mips/Kconfig
+++ b/drivers/platform/mips/Kconfig
@@ -24,4 +24,10 @@ config CPU_HWMON
 	help
 	  Loongson-3A/3B CPU Hwmon (temperature sensor) driver.
 
+config RS780E_ACPI
+	bool "Loongson RS780E ACPI Controller"
+	depends on MACH_LOONGSON64 || COMPILE_TEST
+	help
+	  Loongson RS780E PCH ACPI Controller driver.
+
 endif # MIPS_PLATFORM_DEVICES
diff --git a/drivers/platform/mips/Makefile b/drivers/platform/mips/Makefile
index be8146c20dc8..178149098777 100644
--- a/drivers/platform/mips/Makefile
+++ b/drivers/platform/mips/Makefile
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_CPU_HWMON) += cpu_hwmon.o
+obj-$(CONFIG_RS780E_ACPI) += rs780e-acpi.o
diff --git a/arch/mips/loongson64/acpi_init.c b/drivers/platform/mips/rs780e-acpi.c
index 8d7c119ddf91..e5a643b78ac9 100644
--- a/arch/mips/loongson64/acpi_init.c
+++ b/drivers/platform/mips/rs780e-acpi.c
@@ -3,32 +3,23 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/export.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
-#define SBX00_ACPI_IO_BASE 0x800
-#define SBX00_ACPI_IO_SIZE 0x100
+static unsigned long acpi_iobase;
 
-#define ACPI_PM_EVT_BLK         (SBX00_ACPI_IO_BASE + 0x00) /* 4 bytes */
-#define ACPI_PM_CNT_BLK         (SBX00_ACPI_IO_BASE + 0x04) /* 2 bytes */
-#define ACPI_PMA_CNT_BLK        (SBX00_ACPI_IO_BASE + 0x0F) /* 1 byte */
-#define ACPI_PM_TMR_BLK         (SBX00_ACPI_IO_BASE + 0x18) /* 4 bytes */
-#define ACPI_GPE0_BLK           (SBX00_ACPI_IO_BASE + 0x10) /* 8 bytes */
-#define ACPI_END                (SBX00_ACPI_IO_BASE + 0x80)
+#define ACPI_PM_EVT_BLK         (acpi_iobase + 0x00) /* 4 bytes */
+#define ACPI_PM_CNT_BLK         (acpi_iobase + 0x04) /* 2 bytes */
+#define ACPI_PMA_CNT_BLK        (acpi_iobase + 0x0F) /* 1 byte */
+#define ACPI_PM_TMR_BLK         (acpi_iobase + 0x18) /* 4 bytes */
+#define ACPI_GPE0_BLK           (acpi_iobase + 0x10) /* 8 bytes */
+#define ACPI_END                (acpi_iobase + 0x80)
 
 #define PM_INDEX        0xCD6
 #define PM_DATA         0xCD7
 #define PM2_INDEX       0xCD0
 #define PM2_DATA        0xCD1
 
-/*
- * SCI interrupt need acpi space, allocate here
- */
-
-static int __init register_acpi_resource(void)
-{
-	request_region(SBX00_ACPI_IO_BASE, SBX00_ACPI_IO_SIZE, "acpi");
-	return 0;
-}
-
 static void pmio_write_index(u16 index, u8 reg, u8 value)
 {
 	outb(reg, index);
@@ -141,11 +132,38 @@ void acpi_registers_setup(void)
 	pm2_iowrite(0xf8, value);
 }
 
-int __init sbx00_acpi_init(void)
+static int rs780e_acpi_probe(struct platform_device *pdev)
 {
-	register_acpi_resource();
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (!res)
+		return -ENODEV;
+
+	/* SCI interrupt need acpi space, allocate here */
+	if (!request_region(res->start, resource_size(res), "acpi")) {
+		pr_err("RS780E-ACPI: Failed to request IO Region\n");
+		return -EBUSY;
+	}
+
+	acpi_iobase = res->start;
+
 	acpi_registers_setup();
 	acpi_hw_clear_status();
 
 	return 0;
 }
+
+static const struct of_device_id rs780e_acpi_match[] = {
+	{ .compatible = "loongson,rs780e-acpi" },
+	{},
+};
+
+static struct platform_driver rs780e_acpi_driver = {
+	.probe = rs780e_acpi_probe,
+	.driver = {
+		.name = "RS780E-ACPI",
+		.of_match_table = rs780e_acpi_match,
+	},
+};
+builtin_platform_driver(rs780e_acpi_driver);
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 0ad7ad8cf8e1..fb739b242796 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -78,6 +78,16 @@ config HUAWEI_WMI
 	  To compile this driver as a module, choose M here: the module
 	  will be called huawei-wmi.
 
+config INTEL_WMI_SBL_FW_UPDATE
+	tristate "Intel WMI Slim Bootloader firmware update signaling driver"
+	depends on ACPI_WMI
+	help
+	  Say Y here if you want to be able to use the WMI interface to signal
+	  Slim Bootloader to trigger update on next reboot.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called intel-wmi-sbl-fw-update.
+
 config INTEL_WMI_THUNDERBOLT
 	tristate "Intel WMI thunderbolt force power driver"
 	depends on ACPI_WMI
@@ -1269,7 +1279,8 @@ config INTEL_UNCORE_FREQ_CONTROL
 config INTEL_BXTWC_PMIC_TMU
 	tristate "Intel BXT Whiskey Cove TMU Driver"
 	depends on REGMAP
-	depends on INTEL_SOC_PMIC_BXTWC && INTEL_PMC_IPC
+	depends on MFD_INTEL_PMC_BXT
+	depends on INTEL_SOC_PMIC_BXTWC
 	---help---
 	  Select this driver to use Intel BXT Whiskey Cove PMIC TMU feature.
 	  This driver enables the alarm wakeup functionality in the TMU unit
@@ -1295,7 +1306,7 @@ config INTEL_MFLD_THERMAL
 
 config INTEL_MID_POWER_BUTTON
 	tristate "power button driver for Intel MID platforms"
-	depends on INTEL_SCU_IPC && INPUT
+	depends on INTEL_SCU && INPUT
 	help
 	  This driver handles the power button on the Intel MID platforms.
 
@@ -1327,14 +1338,6 @@ config INTEL_PMC_CORE
 		- LTR Ignore
 		- MPHY/PLL gating status (Sunrisepoint PCH only)
 
-config INTEL_PMC_IPC
-	tristate "Intel PMC IPC Driver"
-	depends on ACPI && PCI
-	---help---
-	This driver provides support for PMC control on some Intel platforms.
-	The PMC is an ARC processor which defines IPC commands for communication
-	with other entities in the CPU.
-
 config INTEL_PUNIT_IPC
 	tristate "Intel P-Unit IPC Driver"
 	---help---
@@ -1342,17 +1345,39 @@ config INTEL_PUNIT_IPC
 	  which is used to bridge the communications between kernel and P-Unit.
 
 config INTEL_SCU_IPC
-	bool "Intel SCU IPC Support"
-	depends on X86_INTEL_MID
-	default y
-	---help---
-	  IPC is used to bridge the communications between kernel and SCU on
-	  some embedded Intel x86 platforms. This is not needed for PC-type
-	  machines.
+	bool
+
+config INTEL_SCU
+	bool
+	select INTEL_SCU_IPC
+
+config INTEL_SCU_PCI
+	bool "Intel SCU PCI driver"
+	depends on PCI
+	select INTEL_SCU
+	help
+	  This driver is used to bridge the communications between kernel
+	  and SCU on some embedded Intel x86 platforms. It also creates
+	  devices that are connected to the SoC through the SCU.
+	  Platforms supported:
+	    Medfield
+	    Clovertrail
+	    Merrifield
+	    Broxton
+	    Apollo Lake
+
+config INTEL_SCU_PLATFORM
+	tristate "Intel SCU platform driver"
+	depends on ACPI
+	select INTEL_SCU
+	help
+	  This driver is used to bridge the communications between kernel
+	  and SCU (sometimes called PMC as well). The driver currently
+	  supports Intel Elkhart Lake and compatible platforms.
 
 config INTEL_SCU_IPC_UTIL
 	tristate "Intel SCU IPC utility driver"
-	depends on INTEL_SCU_IPC
+	depends on INTEL_SCU
 	---help---
 	  The IPC Util driver provides an interface with the SCU enabling
 	  low level access for debug work and updating the firmware. Say
@@ -1360,7 +1385,9 @@ config INTEL_SCU_IPC_UTIL
 
 config INTEL_TELEMETRY
 	tristate "Intel SoC Telemetry Driver"
-	depends on INTEL_PMC_IPC && INTEL_PUNIT_IPC && X86_64
+	depends on X86_64
+	depends on MFD_INTEL_PMC_BXT
+	depends on INTEL_PUNIT_IPC
 	---help---
 	  This driver provides interfaces to configure and use
 	  telemetry for INTEL SoC from APL onwards. It is also
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 53408d965874..2b85852a1a87 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_WMI_BMOF)		+= wmi-bmof.o
 # WMI drivers
 obj-$(CONFIG_ALIENWARE_WMI)		+= alienware-wmi.o
 obj-$(CONFIG_HUAWEI_WMI)		+= huawei-wmi.o
+obj-$(CONFIG_INTEL_WMI_SBL_FW_UPDATE)	+= intel-wmi-sbl-fw-update.o
 obj-$(CONFIG_INTEL_WMI_THUNDERBOLT)	+= intel-wmi-thunderbolt.o
 obj-$(CONFIG_MXM_WMI)			+= mxm-wmi.o
 obj-$(CONFIG_PEAQ_WMI)			+= peaq-wmi.o
@@ -138,9 +139,10 @@ obj-$(CONFIG_INTEL_MFLD_THERMAL)	+= intel_mid_thermal.o
 obj-$(CONFIG_INTEL_MID_POWER_BUTTON)	+= intel_mid_powerbtn.o
 obj-$(CONFIG_INTEL_MRFLD_PWRBTN)	+= intel_mrfld_pwrbtn.o
 obj-$(CONFIG_INTEL_PMC_CORE)		+= intel_pmc_core.o intel_pmc_core_pltdrv.o
-obj-$(CONFIG_INTEL_PMC_IPC)		+= intel_pmc_ipc.o
 obj-$(CONFIG_INTEL_PUNIT_IPC)		+= intel_punit_ipc.o
 obj-$(CONFIG_INTEL_SCU_IPC)		+= intel_scu_ipc.o
+obj-$(CONFIG_INTEL_SCU_PCI)		+= intel_scu_pcidrv.o
+obj-$(CONFIG_INTEL_SCU_PLATFORM)	+= intel_scu_pltdrv.o
 obj-$(CONFIG_INTEL_SCU_IPC_UTIL)	+= intel_scu_ipcutil.o
 obj-$(CONFIG_INTEL_TELEMETRY)		+= intel_telemetry_core.o \
 					   intel_telemetry_pltdrv.o \
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index 8cc86f4e3ac1..4df7609b4aa9 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -827,7 +827,7 @@ MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTMU*:");
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnENBFT*:");
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTMA*:");
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTVR46*:");
-MODULE_ALIAS("dmi:*:*Acer*:pnExtensa 5420*:");
+MODULE_ALIAS("dmi:*:*Acer*:pnExtensa*5420*:");
 
 module_init(acerhdf_init);
 module_exit(acerhdf_exit);
diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index a666fbc2e73b..0edafe687fa9 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -640,22 +640,15 @@ static enum led_brightness asus_kled_cdev_get(struct led_classdev *led_cdev)
 
 static void asus_led_exit(struct asus_laptop *asus)
 {
-	if (!IS_ERR_OR_NULL(asus->wled.led.dev))
-		led_classdev_unregister(&asus->wled.led);
-	if (!IS_ERR_OR_NULL(asus->bled.led.dev))
-		led_classdev_unregister(&asus->bled.led);
-	if (!IS_ERR_OR_NULL(asus->mled.led.dev))
-		led_classdev_unregister(&asus->mled.led);
-	if (!IS_ERR_OR_NULL(asus->tled.led.dev))
-		led_classdev_unregister(&asus->tled.led);
-	if (!IS_ERR_OR_NULL(asus->pled.led.dev))
-		led_classdev_unregister(&asus->pled.led);
-	if (!IS_ERR_OR_NULL(asus->rled.led.dev))
-		led_classdev_unregister(&asus->rled.led);
-	if (!IS_ERR_OR_NULL(asus->gled.led.dev))
-		led_classdev_unregister(&asus->gled.led);
-	if (!IS_ERR_OR_NULL(asus->kled.led.dev))
-		led_classdev_unregister(&asus->kled.led);
+	led_classdev_unregister(&asus->wled.led);
+	led_classdev_unregister(&asus->bled.led);
+	led_classdev_unregister(&asus->mled.led);
+	led_classdev_unregister(&asus->tled.led);
+	led_classdev_unregister(&asus->pled.led);
+	led_classdev_unregister(&asus->rled.led);
+	led_classdev_unregister(&asus->gled.led);
+	led_classdev_unregister(&asus->kled.led);
+
 	if (asus->led_workqueue) {
 		destroy_workqueue(asus->led_workqueue);
 		asus->led_workqueue = NULL;
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index c4404d9c1de4..8c4d00482ef0 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -472,6 +472,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
 	{ KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } },
 	{ KE_IGNORE, 0x6E, },  /* Low Battery notification */
 	{ KE_KEY, 0x71, { KEY_F13 } }, /* General-purpose button */
+	{ KE_IGNORE, 0x79, },  /* Charger type dectection notification */
 	{ KE_KEY, 0x7a, { KEY_ALS_TOGGLE } }, /* Ambient Light Sensor Toggle */
 	{ KE_KEY, 0x7c, { KEY_MICMUTE } },
 	{ KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index bb7c529d7d16..877aade19497 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -57,6 +57,7 @@ MODULE_LICENSE("GPL");
 #define NOTIFY_BRNDOWN_MIN		0x20
 #define NOTIFY_BRNDOWN_MAX		0x2e
 #define NOTIFY_FNLOCK_TOGGLE		0x4e
+#define NOTIFY_KBD_DOCK_CHANGE		0x75
 #define NOTIFY_KBD_BRTUP		0xc4
 #define NOTIFY_KBD_BRTDWN		0xc5
 #define NOTIFY_KBD_BRTTOGGLE		0xc7
@@ -116,6 +117,8 @@ struct bios_args {
 	u32 arg0;
 	u32 arg1;
 	u32 arg2; /* At least TUF Gaming series uses 3 dword input buffer. */
+	u32 arg4;
+	u32 arg5;
 } __packed;
 
 /*
@@ -222,45 +225,6 @@ struct asus_wmi {
 	struct asus_wmi_driver *driver;
 };
 
-/* Input **********************************************************************/
-
-static int asus_wmi_input_init(struct asus_wmi *asus)
-{
-	int err;
-
-	asus->inputdev = input_allocate_device();
-	if (!asus->inputdev)
-		return -ENOMEM;
-
-	asus->inputdev->name = asus->driver->input_name;
-	asus->inputdev->phys = asus->driver->input_phys;
-	asus->inputdev->id.bustype = BUS_HOST;
-	asus->inputdev->dev.parent = &asus->platform_device->dev;
-	set_bit(EV_REP, asus->inputdev->evbit);
-
-	err = sparse_keymap_setup(asus->inputdev, asus->driver->keymap, NULL);
-	if (err)
-		goto err_free_dev;
-
-	err = input_register_device(asus->inputdev);
-	if (err)
-		goto err_free_dev;
-
-	return 0;
-
-err_free_dev:
-	input_free_device(asus->inputdev);
-	return err;
-}
-
-static void asus_wmi_input_exit(struct asus_wmi *asus)
-{
-	if (asus->inputdev)
-		input_unregister_device(asus->inputdev);
-
-	asus->inputdev = NULL;
-}
-
 /* WMI ************************************************************************/
 
 static int asus_wmi_evaluate_method3(u32 method_id,
@@ -309,7 +273,7 @@ static int asus_wmi_evaluate_method_agfn(const struct acpi_buffer args)
 	struct acpi_buffer input;
 	u64 phys_addr;
 	u32 retval;
-	u32 status = -1;
+	u32 status;
 
 	/*
 	 * Copy to dma capable address otherwise memory corruption occurs as
@@ -381,6 +345,53 @@ static bool asus_wmi_dev_is_present(struct asus_wmi *asus, u32 dev_id)
 	return status == 0 && (retval & ASUS_WMI_DSTS_PRESENCE_BIT);
 }
 
+/* Input **********************************************************************/
+
+static int asus_wmi_input_init(struct asus_wmi *asus)
+{
+	int err, result;
+
+	asus->inputdev = input_allocate_device();
+	if (!asus->inputdev)
+		return -ENOMEM;
+
+	asus->inputdev->name = asus->driver->input_name;
+	asus->inputdev->phys = asus->driver->input_phys;
+	asus->inputdev->id.bustype = BUS_HOST;
+	asus->inputdev->dev.parent = &asus->platform_device->dev;
+	set_bit(EV_REP, asus->inputdev->evbit);
+
+	err = sparse_keymap_setup(asus->inputdev, asus->driver->keymap, NULL);
+	if (err)
+		goto err_free_dev;
+
+	result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_KBD_DOCK);
+	if (result >= 0) {
+		input_set_capability(asus->inputdev, EV_SW, SW_TABLET_MODE);
+		input_report_switch(asus->inputdev, SW_TABLET_MODE, !result);
+	} else if (result != -ENODEV) {
+		pr_err("Error checking for keyboard-dock: %d\n", result);
+	}
+
+	err = input_register_device(asus->inputdev);
+	if (err)
+		goto err_free_dev;
+
+	return 0;
+
+err_free_dev:
+	input_free_device(asus->inputdev);
+	return err;
+}
+
+static void asus_wmi_input_exit(struct asus_wmi *asus)
+{
+	if (asus->inputdev)
+		input_unregister_device(asus->inputdev);
+
+	asus->inputdev = NULL;
+}
+
 /* Battery ********************************************************************/
 
 /* The battery maximum charging percentage */
@@ -675,14 +686,11 @@ static enum led_brightness lightbar_led_get(struct led_classdev *led_cdev)
 
 static void asus_wmi_led_exit(struct asus_wmi *asus)
 {
-	if (!IS_ERR_OR_NULL(asus->kbd_led.dev))
-		led_classdev_unregister(&asus->kbd_led);
-	if (!IS_ERR_OR_NULL(asus->tpd_led.dev))
-		led_classdev_unregister(&asus->tpd_led);
-	if (!IS_ERR_OR_NULL(asus->wlan_led.dev))
-		led_classdev_unregister(&asus->wlan_led);
-	if (!IS_ERR_OR_NULL(asus->lightbar_led.dev))
-		led_classdev_unregister(&asus->lightbar_led);
+	led_classdev_unregister(&asus->kbd_led);
+	led_classdev_unregister(&asus->tpd_led);
+	led_classdev_unregister(&asus->wlan_led);
+	led_classdev_unregister(&asus->lightbar_led);
+
 	if (asus->led_workqueue)
 		destroy_workqueue(asus->led_workqueue);
 }
@@ -2058,9 +2066,9 @@ static int asus_wmi_get_event_code(u32 value)
 
 static void asus_wmi_handle_event_code(int code, struct asus_wmi *asus)
 {
-	int orig_code;
 	unsigned int key_value = 1;
 	bool autorelease = 1;
+	int result, orig_code;
 
 	orig_code = code;
 
@@ -2105,6 +2113,17 @@ static void asus_wmi_handle_event_code(int code, struct asus_wmi *asus)
 		return;
 	}
 
+	if (code == NOTIFY_KBD_DOCK_CHANGE) {
+		result = asus_wmi_get_devstate_simple(asus,
+						      ASUS_WMI_DEVID_KBD_DOCK);
+		if (result >= 0) {
+			input_report_switch(asus->inputdev, SW_TABLET_MODE,
+					    !result);
+			input_sync(asus->inputdev);
+		}
+		return;
+	}
+
 	if (asus->fan_boost_mode_available && code == NOTIFY_KBD_FBM) {
 		fan_boost_mode_switch_next(asus);
 		return;
diff --git a/drivers/platform/x86/dcdbas.c b/drivers/platform/x86/dcdbas.c
index 84f4cc839cc3..d513a59a5d47 100644
--- a/drivers/platform/x86/dcdbas.c
+++ b/drivers/platform/x86/dcdbas.c
@@ -15,6 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/acpi.h>
 #include <linux/dma-mapping.h>
+#include <linux/dmi.h>
 #include <linux/errno.h>
 #include <linux/cpu.h>
 #include <linux/gfp.h>
@@ -34,7 +35,7 @@
 #include "dcdbas.h"
 
 #define DRIVER_NAME		"dcdbas"
-#define DRIVER_VERSION		"5.6.0-3.3"
+#define DRIVER_VERSION		"5.6.0-3.4"
 #define DRIVER_DESCRIPTION	"Dell Systems Management Base Driver"
 
 static struct platform_device *dcdbas_pdev;
@@ -45,7 +46,7 @@ static unsigned long smi_data_buf_size;
 static unsigned long max_smi_data_buf_size = MAX_SMI_DATA_BUF_SIZE;
 static u32 smi_data_buf_phys_addr;
 static DEFINE_MUTEX(smi_data_lock);
-static u8 *eps_buffer;
+static u8 *bios_buffer;
 
 static unsigned int host_control_action;
 static unsigned int host_control_smi_type;
@@ -518,8 +519,10 @@ static inline struct smm_eps_table *check_eps_table(u8 *addr)
 
 static int dcdbas_check_wsmt(void)
 {
+	const struct dmi_device *dev = NULL;
 	struct acpi_table_wsmt *wsmt = NULL;
 	struct smm_eps_table *eps = NULL;
+	u64 bios_buf_paddr;
 	u64 remap_size;
 	u8 *addr;
 
@@ -532,6 +535,17 @@ static int dcdbas_check_wsmt(void)
 	    !(wsmt->protection_flags & ACPI_WSMT_COMM_BUFFER_NESTED_PTR_PROTECTION))
 		return 0;
 
+	/*
+	 * BIOS could provide the address/size of the protected buffer
+	 * in an SMBIOS string or in an EPS structure in 0xFxxxx.
+	 */
+
+	/* Check SMBIOS for buffer address */
+	while ((dev = dmi_find_device(DMI_DEV_TYPE_OEM_STRING, NULL, dev)))
+		if (sscanf(dev->name, "30[%16llx;%8llx]", &bios_buf_paddr,
+		    &remap_size) == 2)
+			goto remap;
+
 	/* Scan for EPS (entry point structure) */
 	for (addr = (u8 *)__va(0xf0000);
 	     addr < (u8 *)__va(0x100000 - sizeof(struct smm_eps_table));
@@ -542,34 +556,37 @@ static int dcdbas_check_wsmt(void)
 	}
 
 	if (!eps) {
-		dev_dbg(&dcdbas_pdev->dev, "found WSMT, but no EPS found\n");
+		dev_dbg(&dcdbas_pdev->dev, "found WSMT, but no firmware buffer found\n");
 		return -ENODEV;
 	}
+	bios_buf_paddr = eps->smm_comm_buff_addr;
+	remap_size = eps->num_of_4k_pages * PAGE_SIZE;
 
+remap:
 	/*
 	 * Get physical address of buffer and map to virtual address.
 	 * Table gives size in 4K pages, regardless of actual system page size.
 	 */
-	if (upper_32_bits(eps->smm_comm_buff_addr + 8)) {
-		dev_warn(&dcdbas_pdev->dev, "found WSMT, but EPS buffer address is above 4GB\n");
+	if (upper_32_bits(bios_buf_paddr + 8)) {
+		dev_warn(&dcdbas_pdev->dev, "found WSMT, but buffer address is above 4GB\n");
 		return -EINVAL;
 	}
 	/*
 	 * Limit remap size to MAX_SMI_DATA_BUF_SIZE + 8 (since the first 8
 	 * bytes are used for a semaphore, not the data buffer itself).
 	 */
-	remap_size = eps->num_of_4k_pages * PAGE_SIZE;
 	if (remap_size > MAX_SMI_DATA_BUF_SIZE + 8)
 		remap_size = MAX_SMI_DATA_BUF_SIZE + 8;
-	eps_buffer = memremap(eps->smm_comm_buff_addr, remap_size, MEMREMAP_WB);
-	if (!eps_buffer) {
-		dev_warn(&dcdbas_pdev->dev, "found WSMT, but failed to map EPS buffer\n");
+
+	bios_buffer = memremap(bios_buf_paddr, remap_size, MEMREMAP_WB);
+	if (!bios_buffer) {
+		dev_warn(&dcdbas_pdev->dev, "found WSMT, but failed to map buffer\n");
 		return -ENOMEM;
 	}
 
 	/* First 8 bytes is for a semaphore, not part of the smi_data_buf */
-	smi_data_buf_phys_addr = eps->smm_comm_buff_addr + 8;
-	smi_data_buf = eps_buffer + 8;
+	smi_data_buf_phys_addr = bios_buf_paddr + 8;
+	smi_data_buf = bios_buffer + 8;
 	smi_data_buf_size = remap_size - 8;
 	max_smi_data_buf_size = smi_data_buf_size;
 	wsmt_enabled = true;
@@ -736,8 +753,8 @@ static void __exit dcdbas_exit(void)
 	 */
 	if (dcdbas_pdev)
 		smi_data_buf_free();
-	if (eps_buffer)
-		memunmap(eps_buffer);
+	if (bios_buffer)
+		memunmap(bios_buffer);
 	platform_device_unregister(dcdbas_pdev_reg);
 	platform_driver_unregister(&dcdbas_driver);
 }
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index f8d3e3bd1bb5..5e9c2296931c 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -2204,10 +2204,13 @@ static int __init dell_init(void)
 
 	dell_laptop_register_notifier(&dell_laptop_notifier);
 
-	micmute_led_cdev.brightness = ledtrig_audio_get(LED_AUDIO_MICMUTE);
-	ret = led_classdev_register(&platform_device->dev, &micmute_led_cdev);
-	if (ret < 0)
-		goto fail_led;
+	if (dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE) &&
+	    dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE)) {
+		micmute_led_cdev.brightness = ledtrig_audio_get(LED_AUDIO_MICMUTE);
+		ret = led_classdev_register(&platform_device->dev, &micmute_led_cdev);
+		if (ret < 0)
+			goto fail_led;
+	}
 
 	if (acpi_video_get_backlight_type() != acpi_backlight_vendor)
 		return 0;
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 86e8dd6a8b33..c25a4286d766 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -310,6 +310,16 @@ static const struct key_entry dell_wmi_keymap_type_0011[] = {
 	/* Battery inserted */
 	{ KE_IGNORE, 0xfff1, { KEY_RESERVED } },
 
+	/*
+	 * Detachable keyboard detached / undocked
+	 * Note SW_TABLET_MODE is already reported through the intel_vbtn
+	 * driver for this, so we ignore it.
+	 */
+	{ KE_IGNORE, 0xfff2, { KEY_RESERVED } },
+
+	/* Detachable keyboard attached / docked */
+	{ KE_IGNORE, 0xfff3, { KEY_RESERVED } },
+
 	/* Keyboard backlight level changed */
 	{ KE_IGNORE, KBD_LED_OFF_TOKEN,      { KEY_RESERVED } },
 	{ KE_IGNORE, KBD_LED_ON_TOKEN,       { KEY_RESERVED } },
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 776868d5e458..ba08c9235f76 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -541,13 +541,11 @@ static int eeepc_led_init(struct eeepc_laptop *eeepc)
 
 static void eeepc_led_exit(struct eeepc_laptop *eeepc)
 {
-	if (!IS_ERR_OR_NULL(eeepc->tpd_led.dev))
-		led_classdev_unregister(&eeepc->tpd_led);
+	led_classdev_unregister(&eeepc->tpd_led);
 	if (eeepc->led_workqueue)
 		destroy_workqueue(eeepc->led_workqueue);
 }
 
-
 /*
  * PCI hotplug (for wlan rfkill)
  */
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index a881b709af25..1762f335bac9 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -111,10 +111,10 @@ enum hp_wireless2_bits {
 	HPWMI_POWER_SOFT	= 0x02,
 	HPWMI_POWER_BIOS	= 0x04,
 	HPWMI_POWER_HARD	= 0x08,
+	HPWMI_POWER_FW_OR_HW	= HPWMI_POWER_BIOS | HPWMI_POWER_HARD,
 };
 
-#define IS_HWBLOCKED(x) ((x & (HPWMI_POWER_BIOS | HPWMI_POWER_HARD)) \
-			 != (HPWMI_POWER_BIOS | HPWMI_POWER_HARD))
+#define IS_HWBLOCKED(x) ((x & HPWMI_POWER_FW_OR_HW) != HPWMI_POWER_FW_OR_HW)
 #define IS_SWBLOCKED(x) !(x & HPWMI_POWER_SOFT)
 
 struct bios_rfkill2_device_state {
@@ -461,8 +461,14 @@ static ssize_t postcode_show(struct device *dev, struct device_attribute *attr,
 static ssize_t als_store(struct device *dev, struct device_attribute *attr,
 			 const char *buf, size_t count)
 {
-	u32 tmp = simple_strtoul(buf, NULL, 10);
-	int ret = hp_wmi_perform_query(HPWMI_ALS_QUERY, HPWMI_WRITE, &tmp,
+	u32 tmp;
+	int ret;
+
+	ret = kstrtou32(buf, 10, &tmp);
+	if (ret)
+		return ret;
+
+	ret = hp_wmi_perform_query(HPWMI_ALS_QUERY, HPWMI_WRITE, &tmp,
 				       sizeof(tmp), sizeof(tmp));
 	if (ret)
 		return ret < 0 ? ret : -EINVAL;
@@ -473,22 +479,20 @@ static ssize_t als_store(struct device *dev, struct device_attribute *attr,
 static ssize_t postcode_store(struct device *dev, struct device_attribute *attr,
 			      const char *buf, size_t count)
 {
-	long unsigned int tmp2;
+	u32 tmp = 1;
+	bool clear;
 	int ret;
-	u32 tmp;
 
-	ret = kstrtoul(buf, 10, &tmp2);
-	if (!ret && tmp2 != 1)
-		ret = -EINVAL;
+	ret = kstrtobool(buf, &clear);
 	if (ret)
-		goto out;
+		return ret;
+
+	if (clear == false)
+		return -EINVAL;
 
 	/* Clear the POST error code. It is kept until until cleared. */
-	tmp = (u32) tmp2;
 	ret = hp_wmi_perform_query(HPWMI_POSTCODEERROR_QUERY, HPWMI_WRITE, &tmp,
 				       sizeof(tmp), sizeof(tmp));
-
-out:
 	if (ret)
 		return ret < 0 ? ret : -EINVAL;
 
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index cc7dd4d87cce..9ee79b74311c 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -79,6 +79,13 @@ static const struct dmi_system_id button_array_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Wacom MobileStudio Pro 16"),
 		},
 	},
+	{
+		.ident = "HP Spectre x2 (2015)",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x2 Detachable"),
+		},
+	},
 	{ }
 };
 
diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c
index b5880936d785..0487b606a274 100644
--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -40,28 +40,70 @@ static const struct key_entry intel_vbtn_keymap[] = {
 	{ KE_IGNORE, 0xC7, { KEY_VOLUMEDOWN } },	/* volume-down key release */
 	{ KE_KEY,    0xC8, { KEY_ROTATE_LOCK_TOGGLE } },	/* rotate-lock key press */
 	{ KE_KEY,    0xC9, { KEY_ROTATE_LOCK_TOGGLE } },	/* rotate-lock key release */
+};
+
+static const struct key_entry intel_vbtn_switchmap[] = {
 	{ KE_SW,     0xCA, { .sw = { SW_DOCK, 1 } } },		/* Docked */
 	{ KE_SW,     0xCB, { .sw = { SW_DOCK, 0 } } },		/* Undocked */
 	{ KE_SW,     0xCC, { .sw = { SW_TABLET_MODE, 1 } } },	/* Tablet */
 	{ KE_SW,     0xCD, { .sw = { SW_TABLET_MODE, 0 } } },	/* Laptop */
-	{ KE_END },
 };
 
+#define KEYMAP_LEN \
+	(ARRAY_SIZE(intel_vbtn_keymap) + ARRAY_SIZE(intel_vbtn_switchmap) + 1)
+
 struct intel_vbtn_priv {
+	struct key_entry keymap[KEYMAP_LEN];
 	struct input_dev *input_dev;
+	bool has_buttons;
+	bool has_switches;
 	bool wakeup_mode;
 };
 
+static void detect_tablet_mode(struct platform_device *device)
+{
+	struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev);
+	acpi_handle handle = ACPI_HANDLE(&device->dev);
+	unsigned long long vgbs;
+	acpi_status status;
+	int m;
+
+	status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs);
+	if (ACPI_FAILURE(status))
+		return;
+
+	m = !(vgbs & TABLET_MODE_FLAG);
+	input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
+	m = (vgbs & DOCK_MODE_FLAG) ? 1 : 0;
+	input_report_switch(priv->input_dev, SW_DOCK, m);
+}
+
 static int intel_vbtn_input_setup(struct platform_device *device)
 {
 	struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev);
-	int ret;
+	int ret, keymap_len = 0;
+
+	if (priv->has_buttons) {
+		memcpy(&priv->keymap[keymap_len], intel_vbtn_keymap,
+		       ARRAY_SIZE(intel_vbtn_keymap) *
+		       sizeof(struct key_entry));
+		keymap_len += ARRAY_SIZE(intel_vbtn_keymap);
+	}
+
+	if (priv->has_switches) {
+		memcpy(&priv->keymap[keymap_len], intel_vbtn_switchmap,
+		       ARRAY_SIZE(intel_vbtn_switchmap) *
+		       sizeof(struct key_entry));
+		keymap_len += ARRAY_SIZE(intel_vbtn_switchmap);
+	}
+
+	priv->keymap[keymap_len].type = KE_END;
 
 	priv->input_dev = devm_input_allocate_device(&device->dev);
 	if (!priv->input_dev)
 		return -ENOMEM;
 
-	ret = sparse_keymap_setup(priv->input_dev, intel_vbtn_keymap, NULL);
+	ret = sparse_keymap_setup(priv->input_dev, priv->keymap, NULL);
 	if (ret)
 		return ret;
 
@@ -69,6 +111,9 @@ static int intel_vbtn_input_setup(struct platform_device *device)
 	priv->input_dev->name = "Intel Virtual Button driver";
 	priv->input_dev->id.bustype = BUS_HOST;
 
+	if (priv->has_switches)
+		detect_tablet_mode(device);
+
 	return input_register_device(priv->input_dev);
 }
 
@@ -114,44 +159,46 @@ out_unknown:
 	dev_dbg(&device->dev, "unknown event index 0x%x\n", event);
 }
 
-static void detect_tablet_mode(struct platform_device *device)
+static bool intel_vbtn_has_buttons(acpi_handle handle)
 {
-	const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE);
-	struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev);
-	acpi_handle handle = ACPI_HANDLE(&device->dev);
-	struct acpi_buffer vgbs_output = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *obj;
 	acpi_status status;
-	int m;
 
-	if (!(chassis_type && strcmp(chassis_type, "31") == 0))
-		goto out;
+	status = acpi_evaluate_object(handle, "VBDL", NULL, NULL);
+	return ACPI_SUCCESS(status);
+}
 
-	status = acpi_evaluate_object(handle, "VGBS", NULL, &vgbs_output);
-	if (ACPI_FAILURE(status))
-		goto out;
+static bool intel_vbtn_has_switches(acpi_handle handle)
+{
+	const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE);
+	unsigned long long vgbs;
+	acpi_status status;
 
-	obj = vgbs_output.pointer;
-	if (!(obj && obj->type == ACPI_TYPE_INTEGER))
-		goto out;
+	/*
+	 * Some normal laptops have a VGBS method despite being non-convertible
+	 * and their VGBS method always returns 0, causing detect_tablet_mode()
+	 * to report SW_TABLET_MODE=1 to userspace, which causes issues.
+	 * These laptops have a DMI chassis_type of 9 ("Laptop"), do not report
+	 * switches on any devices with a DMI chassis_type of 9.
+	 */
+	if (chassis_type && strcmp(chassis_type, "9") == 0)
+		return false;
 
-	m = !(obj->integer.value & TABLET_MODE_FLAG);
-	input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
-	m = (obj->integer.value & DOCK_MODE_FLAG) ? 1 : 0;
-	input_report_switch(priv->input_dev, SW_DOCK, m);
-out:
-	kfree(vgbs_output.pointer);
+	status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs);
+	return ACPI_SUCCESS(status);
 }
 
 static int intel_vbtn_probe(struct platform_device *device)
 {
 	acpi_handle handle = ACPI_HANDLE(&device->dev);
+	bool has_buttons, has_switches;
 	struct intel_vbtn_priv *priv;
 	acpi_status status;
 	int err;
 
-	status = acpi_evaluate_object(handle, "VBDL", NULL, NULL);
-	if (ACPI_FAILURE(status)) {
+	has_buttons = intel_vbtn_has_buttons(handle);
+	has_switches = intel_vbtn_has_switches(handle);
+
+	if (!has_buttons && !has_switches) {
 		dev_warn(&device->dev, "failed to read Intel Virtual Button driver\n");
 		return -ENODEV;
 	}
@@ -161,14 +208,15 @@ static int intel_vbtn_probe(struct platform_device *device)
 		return -ENOMEM;
 	dev_set_drvdata(&device->dev, priv);
 
+	priv->has_buttons = has_buttons;
+	priv->has_switches = has_switches;
+
 	err = intel_vbtn_input_setup(device);
 	if (err) {
 		pr_err("Failed to setup Intel Virtual Button\n");
 		return err;
 	}
 
-	detect_tablet_mode(device);
-
 	status = acpi_install_notify_handler(handle,
 					     ACPI_DEVICE_NOTIFY,
 					     notify_handler,
diff --git a/drivers/platform/x86/intel-wmi-sbl-fw-update.c b/drivers/platform/x86/intel-wmi-sbl-fw-update.c
new file mode 100644
index 000000000000..ea87fa0786e8
--- /dev/null
+++ b/drivers/platform/x86/intel-wmi-sbl-fw-update.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Slim Bootloader(SBL) firmware update signaling driver
+ *
+ * Slim Bootloader is a small, open-source, non UEFI compliant, boot firmware
+ * optimized for running on certain Intel platforms.
+ *
+ * SBL exposes an ACPI-WMI device via /sys/bus/wmi/devices/<INTEL_WMI_SBL_GUID>.
+ * This driver further adds "firmware_update_request" device attribute.
+ * This attribute normally has a value of 0 and userspace can signal SBL
+ * to update firmware, on next reboot, by writing a value of 1.
+ *
+ * More details of SBL firmware update process is available at:
+ * https://slimbootloader.github.io/security/firmware-update.html
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/wmi.h>
+
+#define INTEL_WMI_SBL_GUID  "44FADEB1-B204-40F2-8581-394BBDC1B651"
+
+static int get_fwu_request(struct device *dev, u32 *out)
+{
+	struct acpi_buffer result = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *obj;
+	acpi_status status;
+
+	status = wmi_query_block(INTEL_WMI_SBL_GUID, 0, &result);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "wmi_query_block failed\n");
+		return -ENODEV;
+	}
+
+	obj = (union acpi_object *)result.pointer;
+	if (!obj || obj->type != ACPI_TYPE_INTEGER) {
+		dev_warn(dev, "wmi_query_block returned invalid value\n");
+		kfree(obj);
+		return -EINVAL;
+	}
+
+	*out = obj->integer.value;
+	kfree(obj);
+
+	return 0;
+}
+
+static int set_fwu_request(struct device *dev, u32 in)
+{
+	struct acpi_buffer input;
+	acpi_status status;
+	u32 value;
+
+	value = in;
+	input.length = sizeof(u32);
+	input.pointer = &value;
+
+	status = wmi_set_block(INTEL_WMI_SBL_GUID, 0, &input);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "wmi_set_block failed\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static ssize_t firmware_update_request_show(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	u32 val;
+	int ret;
+
+	ret = get_fwu_request(dev, &val);
+	if (ret)
+		return ret;
+
+	return sprintf(buf, "%d\n", val);
+}
+
+static ssize_t firmware_update_request_store(struct device *dev,
+					     struct device_attribute *attr,
+					     const char *buf, size_t count)
+{
+	unsigned int val;
+	int ret;
+
+	ret = kstrtouint(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	/* May later be extended to support values other than 0 and 1 */
+	if (val > 1)
+		return -ERANGE;
+
+	ret = set_fwu_request(dev, val);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_RW(firmware_update_request);
+
+static struct attribute *firmware_update_attrs[] = {
+	&dev_attr_firmware_update_request.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(firmware_update);
+
+static int intel_wmi_sbl_fw_update_probe(struct wmi_device *wdev,
+					 const void *context)
+{
+	dev_info(&wdev->dev, "Slim Bootloader signaling driver attached\n");
+	return 0;
+}
+
+static int intel_wmi_sbl_fw_update_remove(struct wmi_device *wdev)
+{
+	dev_info(&wdev->dev, "Slim Bootloader signaling driver removed\n");
+	return 0;
+}
+
+static const struct wmi_device_id intel_wmi_sbl_id_table[] = {
+	{ .guid_string = INTEL_WMI_SBL_GUID },
+	{}
+};
+MODULE_DEVICE_TABLE(wmi, intel_wmi_sbl_id_table);
+
+static struct wmi_driver intel_wmi_sbl_fw_update_driver = {
+	.driver = {
+		.name = "intel-wmi-sbl-fw-update",
+		.dev_groups = firmware_update_groups,
+	},
+	.probe = intel_wmi_sbl_fw_update_probe,
+	.remove = intel_wmi_sbl_fw_update_remove,
+	.id_table = intel_wmi_sbl_id_table,
+};
+module_wmi_driver(intel_wmi_sbl_fw_update_driver);
+
+MODULE_AUTHOR("Jithu Joseph <jithu.joseph@intel.com>");
+MODULE_DESCRIPTION("Slim Bootloader firmware update signaling driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_cht_int33fe_typec.c b/drivers/platform/x86/intel_cht_int33fe_typec.c
index 04138215956b..48638d1c56e5 100644
--- a/drivers/platform/x86/intel_cht_int33fe_typec.c
+++ b/drivers/platform/x86/intel_cht_int33fe_typec.c
@@ -6,14 +6,14 @@
  *
  * Some Intel Cherry Trail based device which ship with Windows 10, have
  * this weird INT33FE ACPI device with a CRS table with 4 I2cSerialBusV2
- * resources, for 4 different chips attached to various i2c busses:
- * 1. The Whiskey Cove pmic, which is also described by the INT34D3 ACPI device
+ * resources, for 4 different chips attached to various I²C buses:
+ * 1. The Whiskey Cove PMIC, which is also described by the INT34D3 ACPI device
  * 2. Maxim MAX17047 Fuel Gauge Controller
  * 3. FUSB302 USB Type-C Controller
  * 4. PI3USB30532 USB switch
  *
  * So this driver is a stub / pseudo driver whose only purpose is to
- * instantiate i2c-clients for chips 2 - 4, so that standard i2c drivers
+ * instantiate I²C clients for chips 2 - 4, so that standard I²C drivers
  * for these chips can bind to the them.
  */
 
@@ -21,43 +21,32 @@
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 #include <linux/usb/pd.h>
 
 #include "intel_cht_int33fe_common.h"
 
-enum {
-	INT33FE_NODE_FUSB302,
-	INT33FE_NODE_MAX17047,
-	INT33FE_NODE_PI3USB30532,
-	INT33FE_NODE_DISPLAYPORT,
-	INT33FE_NODE_USB_CONNECTOR,
-	INT33FE_NODE_MAX,
-};
-
 /*
- * Grrr I severly dislike buggy BIOS-es. At least one BIOS enumerates
+ * Grrr, I severely dislike buggy BIOS-es. At least one BIOS enumerates
  * the max17047 both through the INT33FE ACPI device (it is right there
  * in the resources table) as well as through a separate MAX17047 device.
  *
- * These helpers are used to work around this by checking if an i2c-client
+ * These helpers are used to work around this by checking if an I²C client
  * for the max17047 has already been registered.
  */
 static int cht_int33fe_check_for_max17047(struct device *dev, void *data)
 {
 	struct i2c_client **max17047 = data;
 	struct acpi_device *adev;
-	const char *hid;
 
 	adev = ACPI_COMPANION(dev);
 	if (!adev)
 		return 0;
 
-	hid = acpi_device_hid(adev);
-
 	/* The MAX17047 ACPI node doesn't have an UID, so we don't check that */
-	if (strcmp(hid, "MAX17047"))
+	if (!acpi_dev_hid_uid_match(adev, "MAX17047", NULL))
 		return 0;
 
 	*max17047 = to_i2c_client(dev);
@@ -66,11 +55,16 @@ static int cht_int33fe_check_for_max17047(struct device *dev, void *data)
 
 static const char * const max17047_suppliers[] = { "bq24190-charger" };
 
-static const struct property_entry max17047_props[] = {
+static const struct property_entry max17047_properties[] = {
 	PROPERTY_ENTRY_STRING_ARRAY("supplied-from", max17047_suppliers),
 	{ }
 };
 
+static const struct software_node max17047_node = {
+	.name = "max17047",
+	.properties = max17047_properties,
+};
+
 /*
  * We are not using inline property here because those are constant,
  * and we need to adjust this one at runtime to point to real
@@ -80,12 +74,17 @@ static struct software_node_ref_args fusb302_mux_refs[] = {
 	{ .node = NULL },
 };
 
-static const struct property_entry fusb302_props[] = {
+static const struct property_entry fusb302_properties[] = {
 	PROPERTY_ENTRY_STRING("linux,extcon-name", "cht_wcove_pwrsrc"),
 	PROPERTY_ENTRY_REF_ARRAY("usb-role-switch", fusb302_mux_refs),
 	{ }
 };
 
+static const struct software_node fusb302_node = {
+	.name = "fusb302",
+	.properties = fusb302_properties,
+};
+
 #define PDO_FIXED_FLAGS \
 	(PDO_FIXED_DUAL_ROLE | PDO_FIXED_DATA_SWAP | PDO_FIXED_USB_COMM)
 
@@ -98,31 +97,40 @@ static const u32 snk_pdo[] = {
 	PDO_VAR(5000, 12000, 3000),
 };
 
-static const struct software_node nodes[];
+static const struct software_node pi3usb30532_node = {
+	.name = "pi3usb30532",
+};
+
+static const struct software_node displayport_node = {
+	.name = "displayport",
+};
 
-static const struct property_entry usb_connector_props[] = {
+static const struct property_entry usb_connector_properties[] = {
 	PROPERTY_ENTRY_STRING("data-role", "dual"),
 	PROPERTY_ENTRY_STRING("power-role", "dual"),
 	PROPERTY_ENTRY_STRING("try-power-role", "sink"),
 	PROPERTY_ENTRY_U32_ARRAY("source-pdos", src_pdo),
 	PROPERTY_ENTRY_U32_ARRAY("sink-pdos", snk_pdo),
 	PROPERTY_ENTRY_U32("op-sink-microwatt", 2500000),
-	PROPERTY_ENTRY_REF("orientation-switch",
-			   &nodes[INT33FE_NODE_PI3USB30532]),
-	PROPERTY_ENTRY_REF("mode-switch",
-			   &nodes[INT33FE_NODE_PI3USB30532]),
-	PROPERTY_ENTRY_REF("displayport",
-			   &nodes[INT33FE_NODE_DISPLAYPORT]),
+	PROPERTY_ENTRY_REF("orientation-switch", &pi3usb30532_node),
+	PROPERTY_ENTRY_REF("mode-switch", &pi3usb30532_node),
+	PROPERTY_ENTRY_REF("displayport", &displayport_node),
 	{ }
 };
 
-static const struct software_node nodes[] = {
-	{ "fusb302", NULL, fusb302_props },
-	{ "max17047", NULL, max17047_props },
-	{ "pi3usb30532" },
-	{ "displayport" },
-	{ "connector", &nodes[0], usb_connector_props },
-	{ }
+static const struct software_node usb_connector_node = {
+	.name = "connector",
+	.parent = &fusb302_node,
+	.properties = usb_connector_properties,
+};
+
+static const struct software_node *node_group[] = {
+	&fusb302_node,
+	&max17047_node,
+	&pi3usb30532_node,
+	&displayport_node,
+	&usb_connector_node,
+	NULL
 };
 
 static int cht_int33fe_setup_dp(struct cht_int33fe_data *data)
@@ -130,7 +138,7 @@ static int cht_int33fe_setup_dp(struct cht_int33fe_data *data)
 	struct fwnode_handle *fwnode;
 	struct pci_dev *pdev;
 
-	fwnode = software_node_fwnode(&nodes[INT33FE_NODE_DISPLAYPORT]);
+	fwnode = software_node_fwnode(&displayport_node);
 	if (!fwnode)
 		return -ENODEV;
 
@@ -155,11 +163,10 @@ static int cht_int33fe_setup_dp(struct cht_int33fe_data *data)
 
 static void cht_int33fe_remove_nodes(struct cht_int33fe_data *data)
 {
-	software_node_unregister_nodes(nodes);
+	software_node_unregister_node_group(node_group);
 
 	if (fusb302_mux_refs[0].node) {
-		fwnode_handle_put(
-			software_node_fwnode(fusb302_mux_refs[0].node));
+		fwnode_handle_put(software_node_fwnode(fusb302_mux_refs[0].node));
 		fusb302_mux_refs[0].node = NULL;
 	}
 
@@ -192,7 +199,7 @@ static int cht_int33fe_add_nodes(struct cht_int33fe_data *data)
 	 */
 	fusb302_mux_refs[0].node = mux_ref_node;
 
-	ret = software_node_register_nodes(nodes);
+	ret = software_node_register_node_group(node_group);
 	if (ret)
 		return ret;
 
@@ -222,16 +229,15 @@ cht_int33fe_register_max17047(struct device *dev, struct cht_int33fe_data *data)
 	struct fwnode_handle *fwnode;
 	int ret;
 
-	fwnode = software_node_fwnode(&nodes[INT33FE_NODE_MAX17047]);
+	fwnode = software_node_fwnode(&max17047_node);
 	if (!fwnode)
 		return -ENODEV;
 
 	i2c_for_each_dev(&max17047, cht_int33fe_check_for_max17047);
 	if (max17047) {
-		/* Pre-existing i2c-client for the max17047, add device-props */
-		fwnode->secondary = ERR_PTR(-ENODEV);
-		max17047->dev.fwnode->secondary = fwnode;
-		/* And re-probe to get the new device-props applied. */
+		/* Pre-existing I²C client for the max17047, add device properties */
+		set_secondary_fwnode(&max17047->dev, fwnode);
+		/* And re-probe to get the new device properties applied */
 		ret = device_reprobe(&max17047->dev);
 		if (ret)
 			dev_warn(dev, "Reprobing max17047 error: %d\n", ret);
@@ -266,7 +272,7 @@ int cht_int33fe_typec_probe(struct cht_int33fe_data *data)
 	 *    must be registered before the fusb302 is instantiated, otherwise
 	 *    it will end up with a dummy-regulator.
 	 * Note "cht_wc_usb_typec_vbus" comes from the regulator_init_data
-	 * which is defined in i2c-cht-wc.c from where the bq24292i i2c-client
+	 * which is defined in i2c-cht-wc.c from where the bq24292i I²C client
 	 * gets instantiated. We use regulator_get_optional here so that we
 	 * don't end up getting a dummy-regulator ourselves.
 	 */
@@ -277,7 +283,7 @@ int cht_int33fe_typec_probe(struct cht_int33fe_data *data)
 	}
 	regulator_put(regulator);
 
-	/* The FUSB302 uses the irq at index 1 and is the only irq user */
+	/* The FUSB302 uses the IRQ at index 1 and is the only IRQ user */
 	fusb302_irq = acpi_dev_gpio_irq_get(ACPI_COMPANION(dev), 1);
 	if (fusb302_irq < 0) {
 		if (fusb302_irq != -EPROBE_DEFER)
@@ -289,12 +295,12 @@ int cht_int33fe_typec_probe(struct cht_int33fe_data *data)
 	if (ret)
 		return ret;
 
-	/* Work around BIOS bug, see comment on cht_int33fe_check_for_max17047 */
+	/* Work around BIOS bug, see comment on cht_int33fe_check_for_max17047() */
 	ret = cht_int33fe_register_max17047(dev, data);
 	if (ret)
 		goto out_remove_nodes;
 
-	fwnode = software_node_fwnode(&nodes[INT33FE_NODE_FUSB302]);
+	fwnode = software_node_fwnode(&fusb302_node);
 	if (!fwnode) {
 		ret = -ENODEV;
 		goto out_unregister_max17047;
@@ -312,7 +318,7 @@ int cht_int33fe_typec_probe(struct cht_int33fe_data *data)
 		goto out_unregister_max17047;
 	}
 
-	fwnode = software_node_fwnode(&nodes[INT33FE_NODE_PI3USB30532]);
+	fwnode = software_node_fwnode(&pi3usb30532_node);
 	if (!fwnode) {
 		ret = -ENODEV;
 		goto out_unregister_fusb302;
diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c
index 9c9f209c8a33..df434abbb66f 100644
--- a/drivers/platform/x86/intel_mid_powerbtn.c
+++ b/drivers/platform/x86/intel_mid_powerbtn.c
@@ -46,6 +46,7 @@ struct mid_pb_ddata {
 	unsigned short mirqlvl1_addr;
 	unsigned short pbstat_addr;
 	u8 pbstat_mask;
+	struct intel_scu_ipc_dev *scu;
 	int (*setup)(struct mid_pb_ddata *ddata);
 };
 
@@ -55,7 +56,8 @@ static int mid_pbstat(struct mid_pb_ddata *ddata, int *value)
 	int ret;
 	u8 pbstat;
 
-	ret = intel_scu_ipc_ioread8(ddata->pbstat_addr, &pbstat);
+	ret = intel_scu_ipc_dev_ioread8(ddata->scu, ddata->pbstat_addr,
+					&pbstat);
 	if (ret)
 		return ret;
 
@@ -67,14 +69,15 @@ static int mid_pbstat(struct mid_pb_ddata *ddata, int *value)
 
 static int mid_irq_ack(struct mid_pb_ddata *ddata)
 {
-	return intel_scu_ipc_update_register(ddata->mirqlvl1_addr, 0, MSIC_PWRBTNM);
+	return intel_scu_ipc_dev_update(ddata->scu, ddata->mirqlvl1_addr, 0,
+					MSIC_PWRBTNM);
 }
 
 static int mrfld_setup(struct mid_pb_ddata *ddata)
 {
 	/* Unmask the PBIRQ and MPBIRQ on Tangier */
-	intel_scu_ipc_update_register(BCOVE_PBIRQ, 0, MSIC_PWRBTNM);
-	intel_scu_ipc_update_register(BCOVE_PBIRQMASK, 0, MSIC_PWRBTNM);
+	intel_scu_ipc_dev_update(ddata->scu, BCOVE_PBIRQ, 0, MSIC_PWRBTNM);
+	intel_scu_ipc_dev_update(ddata->scu, BCOVE_PBIRQMASK, 0, MSIC_PWRBTNM);
 
 	return 0;
 }
@@ -161,6 +164,10 @@ static int mid_pb_probe(struct platform_device *pdev)
 			return error;
 	}
 
+	ddata->scu = devm_intel_scu_ipc_dev_get(&pdev->dev);
+	if (!ddata->scu)
+		return -EPROBE_DEFER;
+
 	error = devm_request_threaded_irq(&pdev->dev, irq, NULL, mid_pb_isr,
 					  IRQF_ONESHOT, DRIVER_NAME, ddata);
 	if (error) {
diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c
deleted file mode 100644
index 2433bf73f1ed..000000000000
--- a/drivers/platform/x86/intel_pmc_ipc.c
+++ /dev/null
@@ -1,949 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Driver for the Intel PMC IPC mechanism
- *
- * (C) Copyright 2014-2015 Intel Corporation
- *
- * This driver is based on Intel SCU IPC driver(intel_scu_ipc.c) by
- *     Sreedhara DS <sreedhara.ds@intel.com>
- *
- * PMC running in ARC processor communicates with other entity running in IA
- * core through IPC mechanism which in turn messaging between IA core ad PMC.
- */
-
-#include <linux/acpi.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/io-64-nonatomic-lo-hi.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/platform_device.h>
-
-#include <asm/intel_pmc_ipc.h>
-
-#include <linux/platform_data/itco_wdt.h>
-
-/*
- * IPC registers
- * The IA write to IPC_CMD command register triggers an interrupt to the ARC,
- * The ARC handles the interrupt and services it, writing optional data to
- * the IPC1 registers, updates the IPC_STS response register with the status.
- */
-#define IPC_CMD			0x00
-#define		IPC_CMD_MSI		BIT(8)
-#define		IPC_CMD_SIZE		16
-#define		IPC_CMD_SUBCMD		12
-#define IPC_STATUS		0x04
-#define		IPC_STATUS_IRQ		BIT(2)
-#define		IPC_STATUS_ERR		BIT(1)
-#define		IPC_STATUS_BUSY		BIT(0)
-#define IPC_SPTR		0x08
-#define IPC_DPTR		0x0C
-#define IPC_WRITE_BUFFER	0x80
-#define IPC_READ_BUFFER		0x90
-
-/* Residency with clock rate at 19.2MHz to usecs */
-#define S0IX_RESIDENCY_IN_USECS(d, s)		\
-({						\
-	u64 result = 10ull * ((d) + (s));	\
-	do_div(result, 192);			\
-	result;					\
-})
-
-/*
- * 16-byte buffer for sending data associated with IPC command.
- */
-#define IPC_DATA_BUFFER_SIZE	16
-
-#define IPC_LOOP_CNT		3000000
-#define IPC_MAX_SEC		3
-
-#define IPC_TRIGGER_MODE_IRQ		true
-
-/* exported resources from IFWI */
-#define PLAT_RESOURCE_IPC_INDEX		0
-#define PLAT_RESOURCE_IPC_SIZE		0x1000
-#define PLAT_RESOURCE_GCR_OFFSET	0x1000
-#define PLAT_RESOURCE_GCR_SIZE		0x1000
-#define PLAT_RESOURCE_BIOS_DATA_INDEX	1
-#define PLAT_RESOURCE_BIOS_IFACE_INDEX	2
-#define PLAT_RESOURCE_TELEM_SSRAM_INDEX	3
-#define PLAT_RESOURCE_ISP_DATA_INDEX	4
-#define PLAT_RESOURCE_ISP_IFACE_INDEX	5
-#define PLAT_RESOURCE_GTD_DATA_INDEX	6
-#define PLAT_RESOURCE_GTD_IFACE_INDEX	7
-#define PLAT_RESOURCE_ACPI_IO_INDEX	0
-
-/*
- * BIOS does not create an ACPI device for each PMC function,
- * but exports multiple resources from one ACPI device(IPC) for
- * multiple functions. This driver is responsible to create a
- * platform device and to export resources for those functions.
- */
-#define TCO_DEVICE_NAME			"iTCO_wdt"
-#define SMI_EN_OFFSET			0x40
-#define SMI_EN_SIZE			4
-#define TCO_BASE_OFFSET			0x60
-#define TCO_REGS_SIZE			16
-#define PUNIT_DEVICE_NAME		"intel_punit_ipc"
-#define TELEMETRY_DEVICE_NAME		"intel_telemetry"
-#define TELEM_SSRAM_SIZE		240
-#define TELEM_PMC_SSRAM_OFFSET		0x1B00
-#define TELEM_PUNIT_SSRAM_OFFSET	0x1A00
-#define TCO_PMC_OFFSET			0x08
-#define TCO_PMC_SIZE			0x04
-
-/* PMC register bit definitions */
-
-/* PMC_CFG_REG bit masks */
-#define PMC_CFG_NO_REBOOT_MASK		BIT_MASK(4)
-#define PMC_CFG_NO_REBOOT_EN		(1 << 4)
-#define PMC_CFG_NO_REBOOT_DIS		(0 << 4)
-
-static struct intel_pmc_ipc_dev {
-	struct device *dev;
-	void __iomem *ipc_base;
-	bool irq_mode;
-	int irq;
-	int cmd;
-	struct completion cmd_complete;
-
-	/* The following PMC BARs share the same ACPI device with the IPC */
-	resource_size_t acpi_io_base;
-	int acpi_io_size;
-	struct platform_device *tco_dev;
-
-	/* gcr */
-	void __iomem *gcr_mem_base;
-	bool has_gcr_regs;
-	spinlock_t gcr_lock;
-
-	/* punit */
-	struct platform_device *punit_dev;
-	unsigned int punit_res_count;
-
-	/* Telemetry */
-	resource_size_t telem_pmc_ssram_base;
-	resource_size_t telem_punit_ssram_base;
-	int telem_pmc_ssram_size;
-	int telem_punit_ssram_size;
-	u8 telem_res_inval;
-	struct platform_device *telemetry_dev;
-} ipcdev;
-
-static char *ipc_err_sources[] = {
-	[IPC_ERR_NONE] =
-		"no error",
-	[IPC_ERR_CMD_NOT_SUPPORTED] =
-		"command not supported",
-	[IPC_ERR_CMD_NOT_SERVICED] =
-		"command not serviced",
-	[IPC_ERR_UNABLE_TO_SERVICE] =
-		"unable to service",
-	[IPC_ERR_CMD_INVALID] =
-		"command invalid",
-	[IPC_ERR_CMD_FAILED] =
-		"command failed",
-	[IPC_ERR_EMSECURITY] =
-		"Invalid Battery",
-	[IPC_ERR_UNSIGNEDKERNEL] =
-		"Unsigned kernel",
-};
-
-/* Prevent concurrent calls to the PMC */
-static DEFINE_MUTEX(ipclock);
-
-static inline void ipc_send_command(u32 cmd)
-{
-	ipcdev.cmd = cmd;
-	if (ipcdev.irq_mode) {
-		reinit_completion(&ipcdev.cmd_complete);
-		cmd |= IPC_CMD_MSI;
-	}
-	writel(cmd, ipcdev.ipc_base + IPC_CMD);
-}
-
-static inline u32 ipc_read_status(void)
-{
-	return readl(ipcdev.ipc_base + IPC_STATUS);
-}
-
-static inline void ipc_data_writel(u32 data, u32 offset)
-{
-	writel(data, ipcdev.ipc_base + IPC_WRITE_BUFFER + offset);
-}
-
-static inline u32 ipc_data_readl(u32 offset)
-{
-	return readl(ipcdev.ipc_base + IPC_READ_BUFFER + offset);
-}
-
-static inline u64 gcr_data_readq(u32 offset)
-{
-	return readq(ipcdev.gcr_mem_base + offset);
-}
-
-static inline int is_gcr_valid(u32 offset)
-{
-	if (!ipcdev.has_gcr_regs)
-		return -EACCES;
-
-	if (offset > PLAT_RESOURCE_GCR_SIZE)
-		return -EINVAL;
-
-	return 0;
-}
-
-/**
- * intel_pmc_gcr_read64() - Read a 64-bit PMC GCR register
- * @offset:	offset of GCR register from GCR address base
- * @data:	data pointer for storing the register output
- *
- * Reads the 64-bit PMC GCR register at given offset.
- *
- * Return:	negative value on error or 0 on success.
- */
-int intel_pmc_gcr_read64(u32 offset, u64 *data)
-{
-	int ret;
-
-	spin_lock(&ipcdev.gcr_lock);
-
-	ret = is_gcr_valid(offset);
-	if (ret < 0) {
-		spin_unlock(&ipcdev.gcr_lock);
-		return ret;
-	}
-
-	*data = readq(ipcdev.gcr_mem_base + offset);
-
-	spin_unlock(&ipcdev.gcr_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(intel_pmc_gcr_read64);
-
-/**
- * intel_pmc_gcr_update() - Update PMC GCR register bits
- * @offset:	offset of GCR register from GCR address base
- * @mask:	bit mask for update operation
- * @val:	update value
- *
- * Updates the bits of given GCR register as specified by
- * @mask and @val.
- *
- * Return:	negative value on error or 0 on success.
- */
-static int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val)
-{
-	u32 new_val;
-	int ret = 0;
-
-	spin_lock(&ipcdev.gcr_lock);
-
-	ret = is_gcr_valid(offset);
-	if (ret < 0)
-		goto gcr_ipc_unlock;
-
-	new_val = readl(ipcdev.gcr_mem_base + offset);
-
-	new_val &= ~mask;
-	new_val |= val & mask;
-
-	writel(new_val, ipcdev.gcr_mem_base + offset);
-
-	new_val = readl(ipcdev.gcr_mem_base + offset);
-
-	/* check whether the bit update is successful */
-	if ((new_val & mask) != (val & mask)) {
-		ret = -EIO;
-		goto gcr_ipc_unlock;
-	}
-
-gcr_ipc_unlock:
-	spin_unlock(&ipcdev.gcr_lock);
-	return ret;
-}
-
-static int update_no_reboot_bit(void *priv, bool set)
-{
-	u32 value = set ? PMC_CFG_NO_REBOOT_EN : PMC_CFG_NO_REBOOT_DIS;
-
-	return intel_pmc_gcr_update(PMC_GCR_PMC_CFG_REG,
-				    PMC_CFG_NO_REBOOT_MASK, value);
-}
-
-static int intel_pmc_ipc_check_status(void)
-{
-	int status;
-	int ret = 0;
-
-	if (ipcdev.irq_mode) {
-		if (0 == wait_for_completion_timeout(
-				&ipcdev.cmd_complete, IPC_MAX_SEC * HZ))
-			ret = -ETIMEDOUT;
-	} else {
-		int loop_count = IPC_LOOP_CNT;
-
-		while ((ipc_read_status() & IPC_STATUS_BUSY) && --loop_count)
-			udelay(1);
-		if (loop_count == 0)
-			ret = -ETIMEDOUT;
-	}
-
-	status = ipc_read_status();
-	if (ret == -ETIMEDOUT) {
-		dev_err(ipcdev.dev,
-			"IPC timed out, TS=0x%x, CMD=0x%x\n",
-			status, ipcdev.cmd);
-		return ret;
-	}
-
-	if (status & IPC_STATUS_ERR) {
-		int i;
-
-		ret = -EIO;
-		i = (status >> IPC_CMD_SIZE) & 0xFF;
-		if (i < ARRAY_SIZE(ipc_err_sources))
-			dev_err(ipcdev.dev,
-				"IPC failed: %s, STS=0x%x, CMD=0x%x\n",
-				ipc_err_sources[i], status, ipcdev.cmd);
-		else
-			dev_err(ipcdev.dev,
-				"IPC failed: unknown, STS=0x%x, CMD=0x%x\n",
-				status, ipcdev.cmd);
-		if ((i == IPC_ERR_UNSIGNEDKERNEL) || (i == IPC_ERR_EMSECURITY))
-			ret = -EACCES;
-	}
-
-	return ret;
-}
-
-/**
- * intel_pmc_ipc_simple_command() - Simple IPC command
- * @cmd:	IPC command code.
- * @sub:	IPC command sub type.
- *
- * Send a simple IPC command to PMC when don't need to specify
- * input/output data and source/dest pointers.
- *
- * Return:	an IPC error code or 0 on success.
- */
-static int intel_pmc_ipc_simple_command(int cmd, int sub)
-{
-	int ret;
-
-	mutex_lock(&ipclock);
-	if (ipcdev.dev == NULL) {
-		mutex_unlock(&ipclock);
-		return -ENODEV;
-	}
-	ipc_send_command(sub << IPC_CMD_SUBCMD | cmd);
-	ret = intel_pmc_ipc_check_status();
-	mutex_unlock(&ipclock);
-
-	return ret;
-}
-
-/**
- * intel_pmc_ipc_raw_cmd() - IPC command with data and pointers
- * @cmd:	IPC command code.
- * @sub:	IPC command sub type.
- * @in:		input data of this IPC command.
- * @inlen:	input data length in bytes.
- * @out:	output data of this IPC command.
- * @outlen:	output data length in dwords.
- * @sptr:	data writing to SPTR register.
- * @dptr:	data writing to DPTR register.
- *
- * Send an IPC command to PMC with input/output data and source/dest pointers.
- *
- * Return:	an IPC error code or 0 on success.
- */
-static int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out,
-				 u32 outlen, u32 dptr, u32 sptr)
-{
-	u32 wbuf[4] = { 0 };
-	int ret;
-	int i;
-
-	if (inlen > IPC_DATA_BUFFER_SIZE || outlen > IPC_DATA_BUFFER_SIZE / 4)
-		return -EINVAL;
-
-	mutex_lock(&ipclock);
-	if (ipcdev.dev == NULL) {
-		mutex_unlock(&ipclock);
-		return -ENODEV;
-	}
-	memcpy(wbuf, in, inlen);
-	writel(dptr, ipcdev.ipc_base + IPC_DPTR);
-	writel(sptr, ipcdev.ipc_base + IPC_SPTR);
-	/* The input data register is 32bit register and inlen is in Byte */
-	for (i = 0; i < ((inlen + 3) / 4); i++)
-		ipc_data_writel(wbuf[i], 4 * i);
-	ipc_send_command((inlen << IPC_CMD_SIZE) |
-			(sub << IPC_CMD_SUBCMD) | cmd);
-	ret = intel_pmc_ipc_check_status();
-	if (!ret) {
-		/* out is read from 32bit register and outlen is in 32bit */
-		for (i = 0; i < outlen; i++)
-			*out++ = ipc_data_readl(4 * i);
-	}
-	mutex_unlock(&ipclock);
-
-	return ret;
-}
-
-/**
- * intel_pmc_ipc_command() -  IPC command with input/output data
- * @cmd:	IPC command code.
- * @sub:	IPC command sub type.
- * @in:		input data of this IPC command.
- * @inlen:	input data length in bytes.
- * @out:	output data of this IPC command.
- * @outlen:	output data length in dwords.
- *
- * Send an IPC command to PMC with input/output data.
- *
- * Return:	an IPC error code or 0 on success.
- */
-int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
-			  u32 *out, u32 outlen)
-{
-	return intel_pmc_ipc_raw_cmd(cmd, sub, in, inlen, out, outlen, 0, 0);
-}
-EXPORT_SYMBOL_GPL(intel_pmc_ipc_command);
-
-static irqreturn_t ioc(int irq, void *dev_id)
-{
-	int status;
-
-	if (ipcdev.irq_mode) {
-		status = ipc_read_status();
-		writel(status | IPC_STATUS_IRQ, ipcdev.ipc_base + IPC_STATUS);
-	}
-	complete(&ipcdev.cmd_complete);
-
-	return IRQ_HANDLED;
-}
-
-static int ipc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-	struct intel_pmc_ipc_dev *pmc = &ipcdev;
-	int ret;
-
-	/* Only one PMC is supported */
-	if (pmc->dev)
-		return -EBUSY;
-
-	pmc->irq_mode = IPC_TRIGGER_MODE_IRQ;
-
-	spin_lock_init(&ipcdev.gcr_lock);
-
-	ret = pcim_enable_device(pdev);
-	if (ret)
-		return ret;
-
-	ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
-	if (ret)
-		return ret;
-
-	init_completion(&pmc->cmd_complete);
-
-	pmc->ipc_base = pcim_iomap_table(pdev)[0];
-
-	ret = devm_request_irq(&pdev->dev, pdev->irq, ioc, 0, "intel_pmc_ipc",
-				pmc);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to request irq\n");
-		return ret;
-	}
-
-	pmc->dev = &pdev->dev;
-
-	pci_set_drvdata(pdev, pmc);
-
-	return 0;
-}
-
-static const struct pci_device_id ipc_pci_ids[] = {
-	{PCI_VDEVICE(INTEL, 0x0a94), 0},
-	{PCI_VDEVICE(INTEL, 0x1a94), 0},
-	{PCI_VDEVICE(INTEL, 0x5a94), 0},
-	{ 0,}
-};
-MODULE_DEVICE_TABLE(pci, ipc_pci_ids);
-
-static struct pci_driver ipc_pci_driver = {
-	.name = "intel_pmc_ipc",
-	.id_table = ipc_pci_ids,
-	.probe = ipc_pci_probe,
-};
-
-static ssize_t intel_pmc_ipc_simple_cmd_store(struct device *dev,
-					      struct device_attribute *attr,
-					      const char *buf, size_t count)
-{
-	int subcmd;
-	int cmd;
-	int ret;
-
-	ret = sscanf(buf, "%d %d", &cmd, &subcmd);
-	if (ret != 2) {
-		dev_err(dev, "Error args\n");
-		return -EINVAL;
-	}
-
-	ret = intel_pmc_ipc_simple_command(cmd, subcmd);
-	if (ret) {
-		dev_err(dev, "command %d error with %d\n", cmd, ret);
-		return ret;
-	}
-	return (ssize_t)count;
-}
-static DEVICE_ATTR(simplecmd, 0200, NULL, intel_pmc_ipc_simple_cmd_store);
-
-static ssize_t intel_pmc_ipc_northpeak_store(struct device *dev,
-					     struct device_attribute *attr,
-					     const char *buf, size_t count)
-{
-	unsigned long val;
-	int subcmd;
-	int ret;
-
-	ret = kstrtoul(buf, 0, &val);
-	if (ret)
-		return ret;
-
-	if (val)
-		subcmd = 1;
-	else
-		subcmd = 0;
-	ret = intel_pmc_ipc_simple_command(PMC_IPC_NORTHPEAK_CTRL, subcmd);
-	if (ret) {
-		dev_err(dev, "command north %d error with %d\n", subcmd, ret);
-		return ret;
-	}
-	return (ssize_t)count;
-}
-static DEVICE_ATTR(northpeak, 0200, NULL, intel_pmc_ipc_northpeak_store);
-
-static struct attribute *intel_ipc_attrs[] = {
-	&dev_attr_northpeak.attr,
-	&dev_attr_simplecmd.attr,
-	NULL
-};
-
-static const struct attribute_group intel_ipc_group = {
-	.attrs = intel_ipc_attrs,
-};
-
-static const struct attribute_group *intel_ipc_groups[] = {
-	&intel_ipc_group,
-	NULL
-};
-
-static struct resource punit_res_array[] = {
-	/* Punit BIOS */
-	{
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.flags = IORESOURCE_MEM,
-	},
-	/* Punit ISP */
-	{
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.flags = IORESOURCE_MEM,
-	},
-	/* Punit GTD */
-	{
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.flags = IORESOURCE_MEM,
-	},
-};
-
-#define TCO_RESOURCE_ACPI_IO		0
-#define TCO_RESOURCE_SMI_EN_IO		1
-#define TCO_RESOURCE_GCR_MEM		2
-static struct resource tco_res[] = {
-	/* ACPI - TCO */
-	{
-		.flags = IORESOURCE_IO,
-	},
-	/* ACPI - SMI */
-	{
-		.flags = IORESOURCE_IO,
-	},
-};
-
-static struct itco_wdt_platform_data tco_info = {
-	.name = "Apollo Lake SoC",
-	.version = 5,
-	.no_reboot_priv = &ipcdev,
-	.update_no_reboot_bit = update_no_reboot_bit,
-};
-
-#define TELEMETRY_RESOURCE_PUNIT_SSRAM	0
-#define TELEMETRY_RESOURCE_PMC_SSRAM	1
-static struct resource telemetry_res[] = {
-	/*Telemetry*/
-	{
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.flags = IORESOURCE_MEM,
-	},
-};
-
-static int ipc_create_punit_device(void)
-{
-	struct platform_device *pdev;
-	const struct platform_device_info pdevinfo = {
-		.parent = ipcdev.dev,
-		.name = PUNIT_DEVICE_NAME,
-		.id = -1,
-		.res = punit_res_array,
-		.num_res = ipcdev.punit_res_count,
-		};
-
-	pdev = platform_device_register_full(&pdevinfo);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
-
-	ipcdev.punit_dev = pdev;
-
-	return 0;
-}
-
-static int ipc_create_tco_device(void)
-{
-	struct platform_device *pdev;
-	struct resource *res;
-	const struct platform_device_info pdevinfo = {
-		.parent = ipcdev.dev,
-		.name = TCO_DEVICE_NAME,
-		.id = -1,
-		.res = tco_res,
-		.num_res = ARRAY_SIZE(tco_res),
-		.data = &tco_info,
-		.size_data = sizeof(tco_info),
-		};
-
-	res = tco_res + TCO_RESOURCE_ACPI_IO;
-	res->start = ipcdev.acpi_io_base + TCO_BASE_OFFSET;
-	res->end = res->start + TCO_REGS_SIZE - 1;
-
-	res = tco_res + TCO_RESOURCE_SMI_EN_IO;
-	res->start = ipcdev.acpi_io_base + SMI_EN_OFFSET;
-	res->end = res->start + SMI_EN_SIZE - 1;
-
-	pdev = platform_device_register_full(&pdevinfo);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
-
-	ipcdev.tco_dev = pdev;
-
-	return 0;
-}
-
-static int ipc_create_telemetry_device(void)
-{
-	struct platform_device *pdev;
-	struct resource *res;
-	const struct platform_device_info pdevinfo = {
-		.parent = ipcdev.dev,
-		.name = TELEMETRY_DEVICE_NAME,
-		.id = -1,
-		.res = telemetry_res,
-		.num_res = ARRAY_SIZE(telemetry_res),
-		};
-
-	res = telemetry_res + TELEMETRY_RESOURCE_PUNIT_SSRAM;
-	res->start = ipcdev.telem_punit_ssram_base;
-	res->end = res->start + ipcdev.telem_punit_ssram_size - 1;
-
-	res = telemetry_res + TELEMETRY_RESOURCE_PMC_SSRAM;
-	res->start = ipcdev.telem_pmc_ssram_base;
-	res->end = res->start + ipcdev.telem_pmc_ssram_size - 1;
-
-	pdev = platform_device_register_full(&pdevinfo);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
-
-	ipcdev.telemetry_dev = pdev;
-
-	return 0;
-}
-
-static int ipc_create_pmc_devices(void)
-{
-	int ret;
-
-	/* If we have ACPI based watchdog use that instead */
-	if (!acpi_has_watchdog()) {
-		ret = ipc_create_tco_device();
-		if (ret) {
-			dev_err(ipcdev.dev, "Failed to add tco platform device\n");
-			return ret;
-		}
-	}
-
-	ret = ipc_create_punit_device();
-	if (ret) {
-		dev_err(ipcdev.dev, "Failed to add punit platform device\n");
-		platform_device_unregister(ipcdev.tco_dev);
-		return ret;
-	}
-
-	if (!ipcdev.telem_res_inval) {
-		ret = ipc_create_telemetry_device();
-		if (ret) {
-			dev_warn(ipcdev.dev,
-				"Failed to add telemetry platform device\n");
-			platform_device_unregister(ipcdev.punit_dev);
-			platform_device_unregister(ipcdev.tco_dev);
-		}
-	}
-
-	return ret;
-}
-
-static int ipc_plat_get_res(struct platform_device *pdev)
-{
-	struct resource *res, *punit_res = punit_res_array;
-	void __iomem *addr;
-	int size;
-
-	res = platform_get_resource(pdev, IORESOURCE_IO,
-				    PLAT_RESOURCE_ACPI_IO_INDEX);
-	if (!res) {
-		dev_err(&pdev->dev, "Failed to get io resource\n");
-		return -ENXIO;
-	}
-	size = resource_size(res);
-	ipcdev.acpi_io_base = res->start;
-	ipcdev.acpi_io_size = size;
-	dev_info(&pdev->dev, "io res: %pR\n", res);
-
-	ipcdev.punit_res_count = 0;
-
-	/* This is index 0 to cover BIOS data register */
-	res = platform_get_resource(pdev, IORESOURCE_MEM,
-				    PLAT_RESOURCE_BIOS_DATA_INDEX);
-	if (!res) {
-		dev_err(&pdev->dev, "Failed to get res of punit BIOS data\n");
-		return -ENXIO;
-	}
-	punit_res[ipcdev.punit_res_count++] = *res;
-	dev_info(&pdev->dev, "punit BIOS data res: %pR\n", res);
-
-	/* This is index 1 to cover BIOS interface register */
-	res = platform_get_resource(pdev, IORESOURCE_MEM,
-				    PLAT_RESOURCE_BIOS_IFACE_INDEX);
-	if (!res) {
-		dev_err(&pdev->dev, "Failed to get res of punit BIOS iface\n");
-		return -ENXIO;
-	}
-	punit_res[ipcdev.punit_res_count++] = *res;
-	dev_info(&pdev->dev, "punit BIOS interface res: %pR\n", res);
-
-	/* This is index 2 to cover ISP data register, optional */
-	res = platform_get_resource(pdev, IORESOURCE_MEM,
-				    PLAT_RESOURCE_ISP_DATA_INDEX);
-	if (res) {
-		punit_res[ipcdev.punit_res_count++] = *res;
-		dev_info(&pdev->dev, "punit ISP data res: %pR\n", res);
-	}
-
-	/* This is index 3 to cover ISP interface register, optional */
-	res = platform_get_resource(pdev, IORESOURCE_MEM,
-				    PLAT_RESOURCE_ISP_IFACE_INDEX);
-	if (res) {
-		punit_res[ipcdev.punit_res_count++] = *res;
-		dev_info(&pdev->dev, "punit ISP interface res: %pR\n", res);
-	}
-
-	/* This is index 4 to cover GTD data register, optional */
-	res = platform_get_resource(pdev, IORESOURCE_MEM,
-				    PLAT_RESOURCE_GTD_DATA_INDEX);
-	if (res) {
-		punit_res[ipcdev.punit_res_count++] = *res;
-		dev_info(&pdev->dev, "punit GTD data res: %pR\n", res);
-	}
-
-	/* This is index 5 to cover GTD interface register, optional */
-	res = platform_get_resource(pdev, IORESOURCE_MEM,
-				    PLAT_RESOURCE_GTD_IFACE_INDEX);
-	if (res) {
-		punit_res[ipcdev.punit_res_count++] = *res;
-		dev_info(&pdev->dev, "punit GTD interface res: %pR\n", res);
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM,
-				    PLAT_RESOURCE_IPC_INDEX);
-	if (!res) {
-		dev_err(&pdev->dev, "Failed to get ipc resource\n");
-		return -ENXIO;
-	}
-	size = PLAT_RESOURCE_IPC_SIZE + PLAT_RESOURCE_GCR_SIZE;
-	res->end = res->start + size - 1;
-
-	addr = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(addr))
-		return PTR_ERR(addr);
-
-	ipcdev.ipc_base = addr;
-
-	ipcdev.gcr_mem_base = addr + PLAT_RESOURCE_GCR_OFFSET;
-	dev_info(&pdev->dev, "ipc res: %pR\n", res);
-
-	ipcdev.telem_res_inval = 0;
-	res = platform_get_resource(pdev, IORESOURCE_MEM,
-				    PLAT_RESOURCE_TELEM_SSRAM_INDEX);
-	if (!res) {
-		dev_err(&pdev->dev, "Failed to get telemetry ssram resource\n");
-		ipcdev.telem_res_inval = 1;
-	} else {
-		ipcdev.telem_punit_ssram_base = res->start +
-						TELEM_PUNIT_SSRAM_OFFSET;
-		ipcdev.telem_punit_ssram_size = TELEM_SSRAM_SIZE;
-		ipcdev.telem_pmc_ssram_base = res->start +
-						TELEM_PMC_SSRAM_OFFSET;
-		ipcdev.telem_pmc_ssram_size = TELEM_SSRAM_SIZE;
-		dev_info(&pdev->dev, "telemetry ssram res: %pR\n", res);
-	}
-
-	return 0;
-}
-
-/**
- * intel_pmc_s0ix_counter_read() - Read S0ix residency.
- * @data: Out param that contains current S0ix residency count.
- *
- * Return: an error code or 0 on success.
- */
-int intel_pmc_s0ix_counter_read(u64 *data)
-{
-	u64 deep, shlw;
-
-	if (!ipcdev.has_gcr_regs)
-		return -EACCES;
-
-	deep = gcr_data_readq(PMC_GCR_TELEM_DEEP_S0IX_REG);
-	shlw = gcr_data_readq(PMC_GCR_TELEM_SHLW_S0IX_REG);
-
-	*data = S0IX_RESIDENCY_IN_USECS(deep, shlw);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(intel_pmc_s0ix_counter_read);
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id ipc_acpi_ids[] = {
-	{ "INT34D2", 0},
-	{ }
-};
-MODULE_DEVICE_TABLE(acpi, ipc_acpi_ids);
-#endif
-
-static int ipc_plat_probe(struct platform_device *pdev)
-{
-	int ret;
-
-	ipcdev.dev = &pdev->dev;
-	ipcdev.irq_mode = IPC_TRIGGER_MODE_IRQ;
-	init_completion(&ipcdev.cmd_complete);
-	spin_lock_init(&ipcdev.gcr_lock);
-
-	ipcdev.irq = platform_get_irq(pdev, 0);
-	if (ipcdev.irq < 0)
-		return -EINVAL;
-
-	ret = ipc_plat_get_res(pdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to request resource\n");
-		return ret;
-	}
-
-	ret = ipc_create_pmc_devices();
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to create pmc devices\n");
-		return ret;
-	}
-
-	if (devm_request_irq(&pdev->dev, ipcdev.irq, ioc, IRQF_NO_SUSPEND,
-			     "intel_pmc_ipc", &ipcdev)) {
-		dev_err(&pdev->dev, "Failed to request irq\n");
-		ret = -EBUSY;
-		goto err_irq;
-	}
-
-	ipcdev.has_gcr_regs = true;
-
-	return 0;
-
-err_irq:
-	platform_device_unregister(ipcdev.tco_dev);
-	platform_device_unregister(ipcdev.punit_dev);
-	platform_device_unregister(ipcdev.telemetry_dev);
-
-	return ret;
-}
-
-static int ipc_plat_remove(struct platform_device *pdev)
-{
-	devm_free_irq(&pdev->dev, ipcdev.irq, &ipcdev);
-	platform_device_unregister(ipcdev.tco_dev);
-	platform_device_unregister(ipcdev.punit_dev);
-	platform_device_unregister(ipcdev.telemetry_dev);
-	ipcdev.dev = NULL;
-	return 0;
-}
-
-static struct platform_driver ipc_plat_driver = {
-	.remove = ipc_plat_remove,
-	.probe = ipc_plat_probe,
-	.driver = {
-		.name = "pmc-ipc-plat",
-		.acpi_match_table = ACPI_PTR(ipc_acpi_ids),
-		.dev_groups = intel_ipc_groups,
-	},
-};
-
-static int __init intel_pmc_ipc_init(void)
-{
-	int ret;
-
-	ret = platform_driver_register(&ipc_plat_driver);
-	if (ret) {
-		pr_err("Failed to register PMC ipc platform driver\n");
-		return ret;
-	}
-	ret = pci_register_driver(&ipc_pci_driver);
-	if (ret) {
-		pr_err("Failed to register PMC ipc pci driver\n");
-		platform_driver_unregister(&ipc_plat_driver);
-		return ret;
-	}
-	return ret;
-}
-
-static void __exit intel_pmc_ipc_exit(void)
-{
-	pci_unregister_driver(&ipc_pci_driver);
-	platform_driver_unregister(&ipc_plat_driver);
-}
-
-MODULE_AUTHOR("Zha Qipeng <qipeng.zha@intel.com>");
-MODULE_DESCRIPTION("Intel PMC IPC driver");
-MODULE_LICENSE("GPL v2");
-
-/* Some modules are dependent on this, so init earlier */
-fs_initcall(intel_pmc_ipc_init);
-module_exit(intel_pmc_ipc_exit);
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index 3d7da5266136..d9cf7f7602b0 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c
@@ -18,11 +18,10 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/pm.h>
-#include <linux/sfi.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
 
-#include <asm/intel-mid.h>
 #include <asm/intel_scu_ipc.h>
 
 /* IPC defines the following message types */
@@ -55,14 +54,14 @@
 #define IPC_IOC	          0x100		/* IPC command register IOC bit */
 
 struct intel_scu_ipc_dev {
-	struct device *dev;
+	struct device dev;
+	struct resource mem;
+	struct module *owner;
+	int irq;
 	void __iomem *ipc_base;
 	struct completion cmd_complete;
-	u8 irq_mode;
 };
 
-static struct intel_scu_ipc_dev  ipcdev; /* Only one for now */
-
 #define IPC_STATUS		0x04
 #define IPC_STATUS_IRQ		BIT(2)
 #define IPC_STATUS_ERR		BIT(1)
@@ -78,8 +77,110 @@ static struct intel_scu_ipc_dev  ipcdev; /* Only one for now */
 /* Timeout in jiffies */
 #define IPC_TIMEOUT		(3 * HZ)
 
+static struct intel_scu_ipc_dev *ipcdev; /* Only one for now */
 static DEFINE_MUTEX(ipclock); /* lock used to prevent multiple call to SCU */
 
+static struct class intel_scu_ipc_class = {
+	.name = "intel_scu_ipc",
+	.owner = THIS_MODULE,
+};
+
+/**
+ * intel_scu_ipc_dev_get() - Get SCU IPC instance
+ *
+ * The recommended new API takes SCU IPC instance as parameter and this
+ * function can be called by driver to get the instance. This also makes
+ * sure the driver providing the IPC functionality cannot be unloaded
+ * while the caller has the instance.
+ *
+ * Call intel_scu_ipc_dev_put() to release the instance.
+ *
+ * Returns %NULL if SCU IPC is not currently available.
+ */
+struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void)
+{
+	struct intel_scu_ipc_dev *scu = NULL;
+
+	mutex_lock(&ipclock);
+	if (ipcdev) {
+		get_device(&ipcdev->dev);
+		/*
+		 * Prevent the IPC provider from being unloaded while it
+		 * is being used.
+		 */
+		if (!try_module_get(ipcdev->owner))
+			put_device(&ipcdev->dev);
+		else
+			scu = ipcdev;
+	}
+
+	mutex_unlock(&ipclock);
+	return scu;
+}
+EXPORT_SYMBOL_GPL(intel_scu_ipc_dev_get);
+
+/**
+ * intel_scu_ipc_dev_put() - Put SCU IPC instance
+ * @scu: SCU IPC instance
+ *
+ * This function releases the SCU IPC instance retrieved from
+ * intel_scu_ipc_dev_get() and allows the driver providing IPC to be
+ * unloaded.
+ */
+void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu)
+{
+	if (scu) {
+		module_put(scu->owner);
+		put_device(&scu->dev);
+	}
+}
+EXPORT_SYMBOL_GPL(intel_scu_ipc_dev_put);
+
+struct intel_scu_ipc_devres {
+	struct intel_scu_ipc_dev *scu;
+};
+
+static void devm_intel_scu_ipc_dev_release(struct device *dev, void *res)
+{
+	struct intel_scu_ipc_devres *dr = res;
+	struct intel_scu_ipc_dev *scu = dr->scu;
+
+	intel_scu_ipc_dev_put(scu);
+}
+
+/**
+ * devm_intel_scu_ipc_dev_get() - Allocate managed SCU IPC device
+ * @dev: Device requesting the SCU IPC device
+ *
+ * The recommended new API takes SCU IPC instance as parameter and this
+ * function can be called by driver to get the instance. This also makes
+ * sure the driver providing the IPC functionality cannot be unloaded
+ * while the caller has the instance.
+ *
+ * Returns %NULL if SCU IPC is not currently available.
+ */
+struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev)
+{
+	struct intel_scu_ipc_devres *dr;
+	struct intel_scu_ipc_dev *scu;
+
+	dr = devres_alloc(devm_intel_scu_ipc_dev_release, sizeof(*dr), GFP_KERNEL);
+	if (!dr)
+		return NULL;
+
+	scu = intel_scu_ipc_dev_get();
+	if (!scu) {
+		devres_free(dr);
+		return NULL;
+	}
+
+	dr->scu = scu;
+	devres_add(dev, dr);
+
+	return scu;
+}
+EXPORT_SYMBOL_GPL(devm_intel_scu_ipc_dev_get);
+
 /*
  * Send ipc command
  * Command Register (Write Only):
@@ -143,7 +244,6 @@ static inline int busy_loop(struct intel_scu_ipc_dev *scu)
 		usleep_range(50, 100);
 	} while (time_before(jiffies, end));
 
-	dev_err(scu->dev, "IPC timed out");
 	return -ETIMEDOUT;
 }
 
@@ -152,10 +252,8 @@ static inline int ipc_wait_for_interrupt(struct intel_scu_ipc_dev *scu)
 {
 	int status;
 
-	if (!wait_for_completion_timeout(&scu->cmd_complete, IPC_TIMEOUT)) {
-		dev_err(scu->dev, "IPC timed out\n");
+	if (!wait_for_completion_timeout(&scu->cmd_complete, IPC_TIMEOUT))
 		return -ETIMEDOUT;
-	}
 
 	status = ipc_read_status(scu);
 	if (status & IPC_STATUS_ERR)
@@ -166,13 +264,13 @@ static inline int ipc_wait_for_interrupt(struct intel_scu_ipc_dev *scu)
 
 static int intel_scu_ipc_check_status(struct intel_scu_ipc_dev *scu)
 {
-	return scu->irq_mode ? ipc_wait_for_interrupt(scu) : busy_loop(scu);
+	return scu->irq > 0 ? ipc_wait_for_interrupt(scu) : busy_loop(scu);
 }
 
 /* Read/Write power control(PMIC in Langwell, MSIC in PenWell) registers */
-static int pwr_reg_rdwr(u16 *addr, u8 *data, u32 count, u32 op, u32 id)
+static int pwr_reg_rdwr(struct intel_scu_ipc_dev *scu, u16 *addr, u8 *data,
+			u32 count, u32 op, u32 id)
 {
-	struct intel_scu_ipc_dev *scu = &ipcdev;
 	int nc;
 	u32 offset = 0;
 	int err;
@@ -182,8 +280,9 @@ static int pwr_reg_rdwr(u16 *addr, u8 *data, u32 count, u32 op, u32 id)
 	memset(cbuf, 0, sizeof(cbuf));
 
 	mutex_lock(&ipclock);
-
-	if (scu->dev == NULL) {
+	if (!scu)
+		scu = ipcdev;
+	if (!scu) {
 		mutex_unlock(&ipclock);
 		return -ENODEV;
 	}
@@ -222,7 +321,8 @@ static int pwr_reg_rdwr(u16 *addr, u8 *data, u32 count, u32 op, u32 id)
 }
 
 /**
- * intel_scu_ipc_ioread8		-	read a word via the SCU
+ * intel_scu_ipc_dev_ioread8() - Read a byte via the SCU
+ * @scu: Optional SCU IPC instance
  * @addr: Register on SCU
  * @data: Return pointer for read byte
  *
@@ -231,14 +331,15 @@ static int pwr_reg_rdwr(u16 *addr, u8 *data, u32 count, u32 op, u32 id)
  *
  * This function may sleep.
  */
-int intel_scu_ipc_ioread8(u16 addr, u8 *data)
+int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr, u8 *data)
 {
-	return pwr_reg_rdwr(&addr, data, 1, IPCMSG_PCNTRL, IPC_CMD_PCNTRL_R);
+	return pwr_reg_rdwr(scu, &addr, data, 1, IPCMSG_PCNTRL, IPC_CMD_PCNTRL_R);
 }
-EXPORT_SYMBOL(intel_scu_ipc_ioread8);
+EXPORT_SYMBOL(intel_scu_ipc_dev_ioread8);
 
 /**
- * intel_scu_ipc_iowrite8		-	write a byte via the SCU
+ * intel_scu_ipc_dev_iowrite8() - Write a byte via the SCU
+ * @scu: Optional SCU IPC instance
  * @addr: Register on SCU
  * @data: Byte to write
  *
@@ -247,14 +348,15 @@ EXPORT_SYMBOL(intel_scu_ipc_ioread8);
  *
  * This function may sleep.
  */
-int intel_scu_ipc_iowrite8(u16 addr, u8 data)
+int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr, u8 data)
 {
-	return pwr_reg_rdwr(&addr, &data, 1, IPCMSG_PCNTRL, IPC_CMD_PCNTRL_W);
+	return pwr_reg_rdwr(scu, &addr, &data, 1, IPCMSG_PCNTRL, IPC_CMD_PCNTRL_W);
 }
-EXPORT_SYMBOL(intel_scu_ipc_iowrite8);
+EXPORT_SYMBOL(intel_scu_ipc_dev_iowrite8);
 
 /**
- * intel_scu_ipc_readvv		-	read a set of registers
+ * intel_scu_ipc_dev_readv() - Read a set of registers
+ * @scu: Optional SCU IPC instance
  * @addr: Register list
  * @data: Bytes to return
  * @len: Length of array
@@ -266,14 +368,16 @@ EXPORT_SYMBOL(intel_scu_ipc_iowrite8);
  *
  * This function may sleep.
  */
-int intel_scu_ipc_readv(u16 *addr, u8 *data, int len)
+int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr, u8 *data,
+			    size_t len)
 {
-	return pwr_reg_rdwr(addr, data, len, IPCMSG_PCNTRL, IPC_CMD_PCNTRL_R);
+	return pwr_reg_rdwr(scu, addr, data, len, IPCMSG_PCNTRL, IPC_CMD_PCNTRL_R);
 }
-EXPORT_SYMBOL(intel_scu_ipc_readv);
+EXPORT_SYMBOL(intel_scu_ipc_dev_readv);
 
 /**
- * intel_scu_ipc_writev		-	write a set of registers
+ * intel_scu_ipc_dev_writev() - Write a set of registers
+ * @scu: Optional SCU IPC instance
  * @addr: Register list
  * @data: Bytes to write
  * @len: Length of array
@@ -285,16 +389,18 @@ EXPORT_SYMBOL(intel_scu_ipc_readv);
  *
  * This function may sleep.
  */
-int intel_scu_ipc_writev(u16 *addr, u8 *data, int len)
+int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr, u8 *data,
+			     size_t len)
 {
-	return pwr_reg_rdwr(addr, data, len, IPCMSG_PCNTRL, IPC_CMD_PCNTRL_W);
+	return pwr_reg_rdwr(scu, addr, data, len, IPCMSG_PCNTRL, IPC_CMD_PCNTRL_W);
 }
-EXPORT_SYMBOL(intel_scu_ipc_writev);
+EXPORT_SYMBOL(intel_scu_ipc_dev_writev);
 
 /**
- * intel_scu_ipc_update_register	-	r/m/w a register
+ * intel_scu_ipc_dev_update() - Update a register
+ * @scu: Optional SCU IPC instance
  * @addr: Register address
- * @bits: Bits to update
+ * @data: Bits to update
  * @mask: Mask of bits to update
  *
  * Read-modify-write power control unit register. The first data argument
@@ -305,15 +411,17 @@ EXPORT_SYMBOL(intel_scu_ipc_writev);
  * This function may sleep. Locking between SCU accesses is handled
  * for the caller.
  */
-int intel_scu_ipc_update_register(u16 addr, u8 bits, u8 mask)
+int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr, u8 data,
+			     u8 mask)
 {
-	u8 data[2] = { bits, mask };
-	return pwr_reg_rdwr(&addr, data, 1, IPCMSG_PCNTRL, IPC_CMD_PCNTRL_M);
+	u8 tmp[2] = { data, mask };
+	return pwr_reg_rdwr(scu, &addr, tmp, 1, IPCMSG_PCNTRL, IPC_CMD_PCNTRL_M);
 }
-EXPORT_SYMBOL(intel_scu_ipc_update_register);
+EXPORT_SYMBOL(intel_scu_ipc_dev_update);
 
 /**
- * intel_scu_ipc_simple_command	-	send a simple command
+ * intel_scu_ipc_dev_simple_command() - Send a simple command
+ * @scu: Optional SCU IPC instance
  * @cmd: Command
  * @sub: Sub type
  *
@@ -324,62 +432,89 @@ EXPORT_SYMBOL(intel_scu_ipc_update_register);
  * This function may sleep. Locking for SCU accesses is handled for the
  * caller.
  */
-int intel_scu_ipc_simple_command(int cmd, int sub)
+int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd,
+				     int sub)
 {
-	struct intel_scu_ipc_dev *scu = &ipcdev;
+	u32 cmdval;
 	int err;
 
 	mutex_lock(&ipclock);
-	if (scu->dev == NULL) {
+	if (!scu)
+		scu = ipcdev;
+	if (!scu) {
 		mutex_unlock(&ipclock);
 		return -ENODEV;
 	}
-	ipc_command(scu, sub << 12 | cmd);
+	scu = ipcdev;
+	cmdval = sub << 12 | cmd;
+	ipc_command(scu, cmdval);
 	err = intel_scu_ipc_check_status(scu);
 	mutex_unlock(&ipclock);
+	if (err)
+		dev_err(&scu->dev, "IPC command %#x failed with %d\n", cmdval, err);
 	return err;
 }
-EXPORT_SYMBOL(intel_scu_ipc_simple_command);
+EXPORT_SYMBOL(intel_scu_ipc_dev_simple_command);
 
 /**
- * intel_scu_ipc_command	-	command with data
+ * intel_scu_ipc_command_with_size() - Command with data
+ * @scu: Optional SCU IPC instance
  * @cmd: Command
  * @sub: Sub type
  * @in: Input data
- * @inlen: Input length in dwords
+ * @inlen: Input length in bytes
+ * @size: Input size written to the IPC command register in whatever
+ *	  units (dword, byte) the particular firmware requires. Normally
+ *	  should be the same as @inlen.
  * @out: Output data
- * @outlen: Output length in dwords
+ * @outlen: Output length in bytes
  *
  * Issue a command to the SCU which involves data transfers. Do the
  * data copies under the lock but leave it for the caller to interpret.
  */
-int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
-			  u32 *out, int outlen)
+int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd,
+					int sub, const void *in, size_t inlen,
+					size_t size, void *out, size_t outlen)
 {
-	struct intel_scu_ipc_dev *scu = &ipcdev;
+	size_t outbuflen = DIV_ROUND_UP(outlen, sizeof(u32));
+	size_t inbuflen = DIV_ROUND_UP(inlen, sizeof(u32));
+	u32 cmdval, inbuf[4] = {};
 	int i, err;
 
+	if (inbuflen > 4 || outbuflen > 4)
+		return -EINVAL;
+
 	mutex_lock(&ipclock);
-	if (scu->dev == NULL) {
+	if (!scu)
+		scu = ipcdev;
+	if (!scu) {
 		mutex_unlock(&ipclock);
 		return -ENODEV;
 	}
 
-	for (i = 0; i < inlen; i++)
-		ipc_data_writel(scu, *in++, 4 * i);
+	memcpy(inbuf, in, inlen);
+	for (i = 0; i < inbuflen; i++)
+		ipc_data_writel(scu, inbuf[i], 4 * i);
 
-	ipc_command(scu, (inlen << 16) | (sub << 12) | cmd);
+	cmdval = (size << 16) | (sub << 12) | cmd;
+	ipc_command(scu, cmdval);
 	err = intel_scu_ipc_check_status(scu);
 
 	if (!err) {
-		for (i = 0; i < outlen; i++)
-			*out++ = ipc_data_readl(scu, 4 * i);
+		u32 outbuf[4] = {};
+
+		for (i = 0; i < outbuflen; i++)
+			outbuf[i] = ipc_data_readl(scu, 4 * i);
+
+		memcpy(out, outbuf, outlen);
 	}
 
 	mutex_unlock(&ipclock);
+	if (err)
+		dev_err(&scu->dev, "IPC command %#x failed with %d\n", cmdval, err);
 	return err;
 }
-EXPORT_SYMBOL(intel_scu_ipc_command);
+EXPORT_SYMBOL(intel_scu_ipc_dev_command_with_size);
 
 /*
  * Interrupt handler gets called when ioc bit of IPC_COMMAND_REG set to 1
@@ -399,61 +534,179 @@ static irqreturn_t ioc(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static void intel_scu_ipc_release(struct device *dev)
+{
+	struct intel_scu_ipc_dev *scu;
+
+	scu = container_of(dev, struct intel_scu_ipc_dev, dev);
+	if (scu->irq > 0)
+		free_irq(scu->irq, scu);
+	iounmap(scu->ipc_base);
+	release_mem_region(scu->mem.start, resource_size(&scu->mem));
+	kfree(scu);
+}
+
 /**
- *	ipc_probe	-	probe an Intel SCU IPC
- *	@pdev: the PCI device matching
- *	@id: entry in the match table
+ * __intel_scu_ipc_register() - Register SCU IPC device
+ * @parent: Parent device
+ * @scu_data: Data used to configure SCU IPC
+ * @owner: Module registering the SCU IPC device
  *
- *	Enable and install an intel SCU IPC. This appears in the PCI space
- *	but uses some hard coded addresses as well.
+ * Call this function to register SCU IPC mechanism under @parent.
+ * Returns pointer to the new SCU IPC device or ERR_PTR() in case of
+ * failure. The caller may use the returned instance if it needs to do
+ * SCU IPC calls itself.
  */
-static int ipc_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+struct intel_scu_ipc_dev *
+__intel_scu_ipc_register(struct device *parent,
+			 const struct intel_scu_ipc_data *scu_data,
+			 struct module *owner)
 {
 	int err;
-	struct intel_scu_ipc_dev *scu = &ipcdev;
+	struct intel_scu_ipc_dev *scu;
+	void __iomem *ipc_base;
 
-	if (scu->dev)		/* We support only one SCU */
-		return -EBUSY;
+	mutex_lock(&ipclock);
+	/* We support only one IPC */
+	if (ipcdev) {
+		err = -EBUSY;
+		goto err_unlock;
+	}
 
-	err = pcim_enable_device(pdev);
-	if (err)
-		return err;
+	scu = kzalloc(sizeof(*scu), GFP_KERNEL);
+	if (!scu) {
+		err = -ENOMEM;
+		goto err_unlock;
+	}
 
-	err = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
-	if (err)
-		return err;
+	scu->owner = owner;
+	scu->dev.parent = parent;
+	scu->dev.class = &intel_scu_ipc_class;
+	scu->dev.release = intel_scu_ipc_release;
+	dev_set_name(&scu->dev, "intel_scu_ipc");
+
+	if (!request_mem_region(scu_data->mem.start, resource_size(&scu_data->mem),
+				"intel_scu_ipc")) {
+		err = -EBUSY;
+		goto err_free;
+	}
 
+	ipc_base = ioremap(scu_data->mem.start, resource_size(&scu_data->mem));
+	if (!ipc_base) {
+		err = -ENOMEM;
+		goto err_release;
+	}
+
+	scu->ipc_base = ipc_base;
+	scu->mem = scu_data->mem;
+	scu->irq = scu_data->irq;
 	init_completion(&scu->cmd_complete);
 
-	scu->ipc_base = pcim_iomap_table(pdev)[0];
+	if (scu->irq > 0) {
+		err = request_irq(scu->irq, ioc, 0, "intel_scu_ipc", scu);
+		if (err)
+			goto err_unmap;
+	}
 
-	err = devm_request_irq(&pdev->dev, pdev->irq, ioc, 0, "intel_scu_ipc",
-			       scu);
-	if (err)
-		return err;
+	/*
+	 * After this point intel_scu_ipc_release() takes care of
+	 * releasing the SCU IPC resources once refcount drops to zero.
+	 */
+	err = device_register(&scu->dev);
+	if (err) {
+		put_device(&scu->dev);
+		goto err_unlock;
+	}
 
 	/* Assign device at last */
-	scu->dev = &pdev->dev;
+	ipcdev = scu;
+	mutex_unlock(&ipclock);
 
-	intel_scu_devices_create();
+	return scu;
 
-	pci_set_drvdata(pdev, scu);
-	return 0;
+err_unmap:
+	iounmap(ipc_base);
+err_release:
+	release_mem_region(scu_data->mem.start, resource_size(&scu_data->mem));
+err_free:
+	kfree(scu);
+err_unlock:
+	mutex_unlock(&ipclock);
+
+	return ERR_PTR(err);
 }
+EXPORT_SYMBOL_GPL(__intel_scu_ipc_register);
 
-static const struct pci_device_id pci_ids[] = {
-	{ PCI_VDEVICE(INTEL, 0x080e) },
-	{ PCI_VDEVICE(INTEL, 0x08ea) },
-	{ PCI_VDEVICE(INTEL, 0x11a0) },
-	{}
-};
+/**
+ * intel_scu_ipc_unregister() - Unregister SCU IPC
+ * @scu: SCU IPC handle
+ *
+ * This unregisters the SCU IPC device and releases the acquired
+ * resources once the refcount goes to zero.
+ */
+void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu)
+{
+	mutex_lock(&ipclock);
+	if (!WARN_ON(!ipcdev)) {
+		ipcdev = NULL;
+		device_unregister(&scu->dev);
+	}
+	mutex_unlock(&ipclock);
+}
+EXPORT_SYMBOL_GPL(intel_scu_ipc_unregister);
 
-static struct pci_driver ipc_driver = {
-	.driver = {
-		.suppress_bind_attrs = true,
-	},
-	.name = "intel_scu_ipc",
-	.id_table = pci_ids,
-	.probe = ipc_probe,
-};
-builtin_pci_driver(ipc_driver);
+static void devm_intel_scu_ipc_unregister(struct device *dev, void *res)
+{
+	struct intel_scu_ipc_devres *dr = res;
+	struct intel_scu_ipc_dev *scu = dr->scu;
+
+	intel_scu_ipc_unregister(scu);
+}
+
+/**
+ * __devm_intel_scu_ipc_register() - Register managed SCU IPC device
+ * @parent: Parent device
+ * @scu_data: Data used to configure SCU IPC
+ * @owner: Module registering the SCU IPC device
+ *
+ * Call this function to register managed SCU IPC mechanism under
+ * @parent. Returns pointer to the new SCU IPC device or ERR_PTR() in
+ * case of failure. The caller may use the returned instance if it needs
+ * to do SCU IPC calls itself.
+ */
+struct intel_scu_ipc_dev *
+__devm_intel_scu_ipc_register(struct device *parent,
+			      const struct intel_scu_ipc_data *scu_data,
+			      struct module *owner)
+{
+	struct intel_scu_ipc_devres *dr;
+	struct intel_scu_ipc_dev *scu;
+
+	dr = devres_alloc(devm_intel_scu_ipc_unregister, sizeof(*dr), GFP_KERNEL);
+	if (!dr)
+		return NULL;
+
+	scu = __intel_scu_ipc_register(parent, scu_data, owner);
+	if (IS_ERR(scu)) {
+		devres_free(dr);
+		return scu;
+	}
+
+	dr->scu = scu;
+	devres_add(parent, dr);
+
+	return scu;
+}
+EXPORT_SYMBOL_GPL(__devm_intel_scu_ipc_register);
+
+static int __init intel_scu_ipc_init(void)
+{
+	return class_register(&intel_scu_ipc_class);
+}
+subsys_initcall(intel_scu_ipc_init);
+
+static void __exit intel_scu_ipc_exit(void)
+{
+	class_unregister(&intel_scu_ipc_class);
+}
+module_exit(intel_scu_ipc_exit);
diff --git a/drivers/platform/x86/intel_scu_ipcutil.c b/drivers/platform/x86/intel_scu_ipcutil.c
index 8afe6fa06d7b..b7c10c15a3d6 100644
--- a/drivers/platform/x86/intel_scu_ipcutil.c
+++ b/drivers/platform/x86/intel_scu_ipcutil.c
@@ -22,6 +22,9 @@
 
 static int major;
 
+struct intel_scu_ipc_dev *scu;
+static DEFINE_MUTEX(scu_lock);
+
 /* IOCTL commands */
 #define	INTE_SCU_IPC_REGISTER_READ	0
 #define INTE_SCU_IPC_REGISTER_WRITE	1
@@ -52,12 +55,12 @@ static int scu_reg_access(u32 cmd, struct scu_ipc_data  *data)
 
 	switch (cmd) {
 	case INTE_SCU_IPC_REGISTER_READ:
-		return intel_scu_ipc_readv(data->addr, data->data, count);
+		return intel_scu_ipc_dev_readv(scu, data->addr, data->data, count);
 	case INTE_SCU_IPC_REGISTER_WRITE:
-		return intel_scu_ipc_writev(data->addr, data->data, count);
+		return intel_scu_ipc_dev_writev(scu, data->addr, data->data, count);
 	case INTE_SCU_IPC_REGISTER_UPDATE:
-		return intel_scu_ipc_update_register(data->addr[0],
-						    data->data[0], data->mask);
+		return intel_scu_ipc_dev_update(scu, data->addr[0], data->data[0],
+						data->mask);
 	default:
 		return -ENOTTY;
 	}
@@ -91,8 +94,40 @@ static long scu_ipc_ioctl(struct file *fp, unsigned int cmd,
 	return 0;
 }
 
+static int scu_ipc_open(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	/* Only single open at the time */
+	mutex_lock(&scu_lock);
+	if (scu) {
+		ret = -EBUSY;
+		goto unlock;
+	}
+
+	scu = intel_scu_ipc_dev_get();
+	if (!scu)
+		ret = -ENODEV;
+
+unlock:
+	mutex_unlock(&scu_lock);
+	return ret;
+}
+
+static int scu_ipc_release(struct inode *inode, struct file *file)
+{
+	mutex_lock(&scu_lock);
+	intel_scu_ipc_dev_put(scu);
+	scu = NULL;
+	mutex_unlock(&scu_lock);
+
+	return 0;
+}
+
 static const struct file_operations scu_ipc_fops = {
 	.unlocked_ioctl = scu_ipc_ioctl,
+	.open = scu_ipc_open,
+	.release = scu_ipc_release,
 };
 
 static int __init ipc_module_init(void)
diff --git a/drivers/platform/x86/intel_scu_pcidrv.c b/drivers/platform/x86/intel_scu_pcidrv.c
new file mode 100644
index 000000000000..8c5fd8240da9
--- /dev/null
+++ b/drivers/platform/x86/intel_scu_pcidrv.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI driver for the Intel SCU.
+ *
+ * Copyright (C) 2008-2010, 2015, 2020 Intel Corporation
+ * Authors: Sreedhara DS (sreedhara.ds@intel.com)
+ *	    Mika Westerberg <mika.westerberg@linux.intel.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+
+#include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
+
+static int intel_scu_pci_probe(struct pci_dev *pdev,
+			       const struct pci_device_id *id)
+{
+	void (*setup_fn)(void) = (void (*)(void))id->driver_data;
+	struct intel_scu_ipc_data scu_data = {};
+	struct intel_scu_ipc_dev *scu;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	scu_data.mem = pdev->resource[0];
+	scu_data.irq = pdev->irq;
+
+	scu = intel_scu_ipc_register(&pdev->dev, &scu_data);
+	if (IS_ERR(scu))
+		return PTR_ERR(scu);
+
+	if (setup_fn)
+		setup_fn();
+	return 0;
+}
+
+static void intel_mid_scu_setup(void)
+{
+	intel_scu_devices_create();
+}
+
+static const struct pci_device_id pci_ids[] = {
+	{ PCI_VDEVICE(INTEL, 0x080e),
+	  .driver_data = (kernel_ulong_t)intel_mid_scu_setup },
+	{ PCI_VDEVICE(INTEL, 0x08ea),
+	  .driver_data = (kernel_ulong_t)intel_mid_scu_setup },
+	{ PCI_VDEVICE(INTEL, 0x0a94) },
+	{ PCI_VDEVICE(INTEL, 0x11a0),
+	  .driver_data = (kernel_ulong_t)intel_mid_scu_setup },
+	{ PCI_VDEVICE(INTEL, 0x1a94) },
+	{ PCI_VDEVICE(INTEL, 0x5a94) },
+	{}
+};
+
+static struct pci_driver intel_scu_pci_driver = {
+	.driver = {
+		.suppress_bind_attrs = true,
+	},
+	.name = "intel_scu",
+	.id_table = pci_ids,
+	.probe = intel_scu_pci_probe,
+};
+
+builtin_pci_driver(intel_scu_pci_driver);
diff --git a/drivers/platform/x86/intel_scu_pltdrv.c b/drivers/platform/x86/intel_scu_pltdrv.c
new file mode 100644
index 000000000000..56ec6ae4c824
--- /dev/null
+++ b/drivers/platform/x86/intel_scu_pltdrv.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Platform driver for the Intel SCU.
+ *
+ * Copyright (C) 2019, Intel Corporation
+ * Authors: Divya Sasidharan <divya.s.sasidharan@intel.com>
+ *	    Mika Westerberg <mika.westerberg@linux.intel.com>
+ *	    Rajmohan Mani <rajmohan.mani@intel.com>
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <asm/intel_scu_ipc.h>
+
+static int intel_scu_platform_probe(struct platform_device *pdev)
+{
+	struct intel_scu_ipc_data scu_data = {};
+	struct intel_scu_ipc_dev *scu;
+	const struct resource *res;
+
+	scu_data.irq = platform_get_irq_optional(pdev, 0);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENOMEM;
+
+	scu_data.mem = *res;
+
+	scu = devm_intel_scu_ipc_register(&pdev->dev, &scu_data);
+	if (IS_ERR(scu))
+		return PTR_ERR(scu);
+
+	platform_set_drvdata(pdev, scu);
+	return 0;
+}
+
+static const struct acpi_device_id intel_scu_acpi_ids[] = {
+	{ "INTC1026" },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, intel_scu_acpi_ids);
+
+static struct platform_driver intel_scu_platform_driver = {
+	.probe = intel_scu_platform_probe,
+	.driver = {
+		.name = "intel_scu",
+		.acpi_match_table = intel_scu_acpi_ids,
+	},
+};
+module_platform_driver(intel_scu_platform_driver);
+
+MODULE_AUTHOR("Divya Sasidharan <divya.s.sasidharan@intel.com>");
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com");
+MODULE_AUTHOR("Rajmohan Mani <rajmohan.mani@intel.com>");
+MODULE_DESCRIPTION("Intel SCU platform driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
index de4169d0796b..d84e2174cbde 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
+++ b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
@@ -21,13 +21,12 @@
 #define PUNIT_MAILBOX_BUSY_BIT		31
 
 /*
- * Commands has variable amount of processing time. Most of the commands will
- * be done in 0-3 tries, but some takes up to 50.
- * The real processing time was observed as 25us for the most of the commands
- * at 2GHz. It is possible to optimize this count taking samples on customer
- * systems.
+ * The average time to complete some commands is about 40us. The current
+ * count is enough to satisfy 40us. But when the firmware is very busy, this
+ * causes timeout occasionally.  So increase to deal with some worst case
+ * scenarios. Most of the command still complete in few us.
  */
-#define OS_MAILBOX_RETRY_COUNT		50
+#define OS_MAILBOX_RETRY_COUNT		100
 
 struct isst_if_device {
 	struct mutex mutex;
diff --git a/drivers/platform/x86/intel_telemetry_core.c b/drivers/platform/x86/intel_telemetry_core.c
index d4040bb222b4..fdf55b5d6948 100644
--- a/drivers/platform/x86/intel_telemetry_core.c
+++ b/drivers/platform/x86/intel_telemetry_core.c
@@ -353,21 +353,16 @@ int telemetry_clear_pltdata(void)
 EXPORT_SYMBOL_GPL(telemetry_clear_pltdata);
 
 /**
- * telemetry_pltconfig_valid() - Checkif platform config is valid
+ * telemetry_get_pltdata() - Return telemetry platform config
  *
- * Usage by other than telemetry module is invalid
- *
- * Return: 0 success, < 0 for failure
+ * May be used by other telemetry modules to get platform specific
+ * configuration.
  */
-int telemetry_pltconfig_valid(void)
+struct telemetry_plt_config *telemetry_get_pltdata(void)
 {
-	if (telm_core_conf.plt_config)
-		return 0;
-
-	else
-		return -EINVAL;
+	return telm_core_conf.plt_config;
 }
-EXPORT_SYMBOL_GPL(telemetry_pltconfig_valid);
+EXPORT_SYMBOL_GPL(telemetry_get_pltdata);
 
 static inline int telemetry_get_pssevtname(enum telemetry_unit telem_unit,
 					   const char **name, int len)
diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index 8a53d3b485b3..1d4d0fbfd63c 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -15,6 +15,7 @@
  */
 #include <linux/debugfs.h>
 #include <linux/device.h>
+#include <linux/mfd/intel_pmc_bxt.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/seq_file.h>
@@ -22,7 +23,6 @@
 
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
-#include <asm/intel_pmc_ipc.h>
 #include <asm/intel_telemetry.h>
 
 #define DRIVER_NAME			"telemetry_soc_debugfs"
@@ -647,10 +647,11 @@ DEFINE_SHOW_ATTRIBUTE(telem_soc_states);
 
 static int telem_s0ix_res_get(void *data, u64 *val)
 {
+	struct telemetry_plt_config *plt_config = telemetry_get_pltdata();
 	u64 s0ix_total_res;
 	int ret;
 
-	ret = intel_pmc_s0ix_counter_read(&s0ix_total_res);
+	ret = intel_pmc_s0ix_counter_read(plt_config->pmc, &s0ix_total_res);
 	if (ret) {
 		pr_err("Failed to read S0ix residency");
 		return ret;
@@ -837,12 +838,15 @@ static int pm_suspend_exit_cb(void)
 	 */
 	if (suspend_shlw_ctr_exit == suspend_shlw_ctr_temp &&
 	    suspend_deep_ctr_exit == suspend_deep_ctr_temp) {
-		ret = intel_pmc_gcr_read64(PMC_GCR_TELEM_SHLW_S0IX_REG,
+		struct telemetry_plt_config *plt_config = telemetry_get_pltdata();
+		struct intel_pmc_dev *pmc = plt_config->pmc;
+
+		ret = intel_pmc_gcr_read64(pmc, PMC_GCR_TELEM_SHLW_S0IX_REG,
 					  &suspend_shlw_res_exit);
 		if (ret < 0)
 			goto out;
 
-		ret = intel_pmc_gcr_read64(PMC_GCR_TELEM_DEEP_S0IX_REG,
+		ret = intel_pmc_gcr_read64(pmc, PMC_GCR_TELEM_DEEP_S0IX_REG,
 					  &suspend_deep_res_exit);
 		if (ret < 0)
 			goto out;
@@ -910,8 +914,7 @@ static int __init telemetry_debugfs_init(void)
 
 	debugfs_conf = (struct telemetry_debugfs_conf *)id->driver_data;
 
-	err = telemetry_pltconfig_valid();
-	if (err < 0) {
+	if (!telemetry_get_pltdata()) {
 		pr_info("Invalid pltconfig, ensure IPC1 device is enabled in BIOS\n");
 		return -ENODEV;
 	}
diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c
index 987a24e3344e..405dea87de6b 100644
--- a/drivers/platform/x86/intel_telemetry_pltdrv.c
+++ b/drivers/platform/x86/intel_telemetry_pltdrv.c
@@ -15,7 +15,6 @@
 
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
-#include <asm/intel_pmc_ipc.h>
 #include <asm/intel_punit_ipc.h>
 #include <asm/intel_telemetry.h>
 
@@ -35,6 +34,7 @@
 #define TELEM_SSRAM_STARTTIME_OFFSET	8
 #define TELEM_SSRAM_EVTLOG_OFFSET	16
 
+#define IOSS_TELEM			0xeb
 #define IOSS_TELEM_EVENT_READ		0x0
 #define IOSS_TELEM_EVENT_WRITE		0x1
 #define IOSS_TELEM_INFO_READ		0x2
@@ -42,9 +42,6 @@
 #define IOSS_TELEM_TRACE_CTL_WRITE	0x6
 #define IOSS_TELEM_EVENT_CTL_READ	0x7
 #define IOSS_TELEM_EVENT_CTL_WRITE	0x8
-#define IOSS_TELEM_EVT_CTRL_WRITE_SIZE	0x4
-#define IOSS_TELEM_READ_WORD		0x1
-#define IOSS_TELEM_WRITE_FOURBYTES	0x4
 #define IOSS_TELEM_EVT_WRITE_SIZE	0x3
 
 #define TELEM_INFO_SRAMEVTS_MASK	0xFF00
@@ -250,17 +247,14 @@ static int telemetry_check_evtid(enum telemetry_unit telem_unit,
 static inline int telemetry_plt_config_ioss_event(u32 evt_id, int index)
 {
 	u32 write_buf;
-	int ret;
 
 	write_buf = evt_id | TELEM_EVENT_ENABLE;
 	write_buf <<= BITS_PER_BYTE;
 	write_buf |= index;
 
-	ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
-				    IOSS_TELEM_EVENT_WRITE, (u8 *)&write_buf,
-				    IOSS_TELEM_EVT_WRITE_SIZE, NULL, 0);
-
-	return ret;
+	return intel_scu_ipc_dev_command(telm_conf->scu, IOSS_TELEM,
+					 IOSS_TELEM_EVENT_WRITE, &write_buf,
+					 IOSS_TELEM_EVT_WRITE_SIZE, NULL, 0);
 }
 
 static inline int telemetry_plt_config_pss_event(u32 evt_id, int index)
@@ -278,6 +272,7 @@ static inline int telemetry_plt_config_pss_event(u32 evt_id, int index)
 static int telemetry_setup_iossevtconfig(struct telemetry_evtconfig evtconfig,
 					 enum telemetry_action action)
 {
+	struct intel_scu_ipc_dev *scu = telm_conf->scu;
 	u8 num_ioss_evts, ioss_period;
 	int ret, index, idx;
 	u32 *ioss_evtmap;
@@ -288,9 +283,9 @@ static int telemetry_setup_iossevtconfig(struct telemetry_evtconfig evtconfig,
 	ioss_evtmap = evtconfig.evtmap;
 
 	/* Get telemetry EVENT CTL */
-	ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
+	ret = intel_scu_ipc_dev_command(scu, IOSS_TELEM,
 				    IOSS_TELEM_EVENT_CTL_READ, NULL, 0,
-				    &telem_ctrl, IOSS_TELEM_READ_WORD);
+				    &telem_ctrl, sizeof(telem_ctrl));
 	if (ret) {
 		pr_err("IOSS TELEM_CTRL Read Failed\n");
 		return ret;
@@ -299,11 +294,9 @@ static int telemetry_setup_iossevtconfig(struct telemetry_evtconfig evtconfig,
 	/* Disable Telemetry */
 	TELEM_DISABLE(telem_ctrl);
 
-	ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
-				    IOSS_TELEM_EVENT_CTL_WRITE,
-				    (u8 *)&telem_ctrl,
-				    IOSS_TELEM_EVT_CTRL_WRITE_SIZE,
-				    NULL, 0);
+	ret = intel_scu_ipc_dev_command(scu, IOSS_TELEM,
+				    IOSS_TELEM_EVENT_CTL_WRITE, &telem_ctrl,
+				    sizeof(telem_ctrl), NULL, 0);
 	if (ret) {
 		pr_err("IOSS TELEM_CTRL Event Disable Write Failed\n");
 		return ret;
@@ -315,10 +308,9 @@ static int telemetry_setup_iossevtconfig(struct telemetry_evtconfig evtconfig,
 		/* Clear All Events */
 		TELEM_CLEAR_EVENTS(telem_ctrl);
 
-		ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
+		ret = intel_scu_ipc_dev_command(scu, IOSS_TELEM,
 					    IOSS_TELEM_EVENT_CTL_WRITE,
-					    (u8 *)&telem_ctrl,
-					    IOSS_TELEM_EVT_CTRL_WRITE_SIZE,
+					    &telem_ctrl, sizeof(telem_ctrl),
 					    NULL, 0);
 		if (ret) {
 			pr_err("IOSS TELEM_CTRL Event Disable Write Failed\n");
@@ -344,10 +336,9 @@ static int telemetry_setup_iossevtconfig(struct telemetry_evtconfig evtconfig,
 		/* Clear All Events */
 		TELEM_CLEAR_EVENTS(telem_ctrl);
 
-		ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
+		ret = intel_scu_ipc_dev_command(scu, IOSS_TELEM,
 					    IOSS_TELEM_EVENT_CTL_WRITE,
-					    (u8 *)&telem_ctrl,
-					    IOSS_TELEM_EVT_CTRL_WRITE_SIZE,
+					    &telem_ctrl, sizeof(telem_ctrl),
 					    NULL, 0);
 		if (ret) {
 			pr_err("IOSS TELEM_CTRL Event Disable Write Failed\n");
@@ -396,10 +387,9 @@ static int telemetry_setup_iossevtconfig(struct telemetry_evtconfig evtconfig,
 	TELEM_ENABLE_PERIODIC(telem_ctrl);
 	telem_ctrl |= ioss_period;
 
-	ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
+	ret = intel_scu_ipc_dev_command(scu, IOSS_TELEM,
 				    IOSS_TELEM_EVENT_CTL_WRITE,
-				    (u8 *)&telem_ctrl,
-				    IOSS_TELEM_EVT_CTRL_WRITE_SIZE, NULL, 0);
+				    &telem_ctrl, sizeof(telem_ctrl), NULL, 0);
 	if (ret) {
 		pr_err("IOSS TELEM_CTRL Event Enable Write Failed\n");
 		return ret;
@@ -586,8 +576,9 @@ static int telemetry_setup(struct platform_device *pdev)
 	u32 read_buf, events, event_regs;
 	int ret;
 
-	ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY, IOSS_TELEM_INFO_READ,
-				    NULL, 0, &read_buf, IOSS_TELEM_READ_WORD);
+	ret = intel_scu_ipc_dev_command(telm_conf->scu, IOSS_TELEM,
+					IOSS_TELEM_INFO_READ, NULL, 0,
+					&read_buf, sizeof(read_buf));
 	if (ret) {
 		dev_err(&pdev->dev, "IOSS TELEM_INFO Read Failed\n");
 		return ret;
@@ -681,6 +672,8 @@ static int telemetry_plt_set_sampling_period(u8 pss_period, u8 ioss_period)
 
 	mutex_lock(&(telm_conf->telem_lock));
 	if (ioss_period) {
+		struct intel_scu_ipc_dev *scu = telm_conf->scu;
+
 		if (TELEM_SAMPLE_PERIOD_INVALID(ioss_period)) {
 			pr_err("IOSS Sampling Period Out of Range\n");
 			ret = -EINVAL;
@@ -688,9 +681,9 @@ static int telemetry_plt_set_sampling_period(u8 pss_period, u8 ioss_period)
 		}
 
 		/* Get telemetry EVENT CTL */
-		ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
+		ret = intel_scu_ipc_dev_command(scu, IOSS_TELEM,
 					    IOSS_TELEM_EVENT_CTL_READ, NULL, 0,
-					    &telem_ctrl, IOSS_TELEM_READ_WORD);
+					    &telem_ctrl, sizeof(telem_ctrl));
 		if (ret) {
 			pr_err("IOSS TELEM_CTRL Read Failed\n");
 			goto out;
@@ -699,11 +692,10 @@ static int telemetry_plt_set_sampling_period(u8 pss_period, u8 ioss_period)
 		/* Disable Telemetry */
 		TELEM_DISABLE(telem_ctrl);
 
-		ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
-					    IOSS_TELEM_EVENT_CTL_WRITE,
-					    (u8 *)&telem_ctrl,
-					    IOSS_TELEM_EVT_CTRL_WRITE_SIZE,
-					    NULL, 0);
+		ret = intel_scu_ipc_dev_command(scu, IOSS_TELEM,
+						IOSS_TELEM_EVENT_CTL_WRITE,
+						&telem_ctrl, sizeof(telem_ctrl),
+						NULL, 0);
 		if (ret) {
 			pr_err("IOSS TELEM_CTRL Event Disable Write Failed\n");
 			goto out;
@@ -715,11 +707,10 @@ static int telemetry_plt_set_sampling_period(u8 pss_period, u8 ioss_period)
 		TELEM_ENABLE_PERIODIC(telem_ctrl);
 		telem_ctrl |= ioss_period;
 
-		ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
-					    IOSS_TELEM_EVENT_CTL_WRITE,
-					    (u8 *)&telem_ctrl,
-					    IOSS_TELEM_EVT_CTRL_WRITE_SIZE,
-					    NULL, 0);
+		ret = intel_scu_ipc_dev_command(scu, IOSS_TELEM,
+						IOSS_TELEM_EVENT_CTL_WRITE,
+						&telem_ctrl, sizeof(telem_ctrl),
+						NULL, 0);
 		if (ret) {
 			pr_err("IOSS TELEM_CTRL Event Enable Write Failed\n");
 			goto out;
@@ -1014,9 +1005,9 @@ static int telemetry_plt_get_trace_verbosity(enum telemetry_unit telem_unit,
 		break;
 
 	case TELEM_IOSS:
-		ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
-				IOSS_TELEM_TRACE_CTL_READ, NULL, 0, &temp,
-				IOSS_TELEM_READ_WORD);
+		ret = intel_scu_ipc_dev_command(telm_conf->scu,
+				IOSS_TELEM, IOSS_TELEM_TRACE_CTL_READ,
+				NULL, 0, &temp, sizeof(temp));
 		if (ret) {
 			pr_err("IOSS TRACE_CTL Read Failed\n");
 			goto out;
@@ -1068,9 +1059,9 @@ static int telemetry_plt_set_trace_verbosity(enum telemetry_unit telem_unit,
 		break;
 
 	case TELEM_IOSS:
-		ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
-				IOSS_TELEM_TRACE_CTL_READ, NULL, 0, &temp,
-				IOSS_TELEM_READ_WORD);
+		ret = intel_scu_ipc_dev_command(telm_conf->scu, IOSS_TELEM,
+						IOSS_TELEM_TRACE_CTL_READ,
+						NULL, 0, &temp, sizeof(temp));
 		if (ret) {
 			pr_err("IOSS TRACE_CTL Read Failed\n");
 			goto out;
@@ -1079,9 +1070,9 @@ static int telemetry_plt_set_trace_verbosity(enum telemetry_unit telem_unit,
 		TELEM_CLEAR_VERBOSITY_BITS(temp);
 		TELEM_SET_VERBOSITY_BITS(temp, verbosity);
 
-		ret = intel_pmc_ipc_command(PMC_IPC_PMC_TELEMTRY,
-				IOSS_TELEM_TRACE_CTL_WRITE, (u8 *)&temp,
-				IOSS_TELEM_WRITE_FOURBYTES, NULL, 0);
+		ret = intel_scu_ipc_dev_command(telm_conf->scu, IOSS_TELEM,
+						IOSS_TELEM_TRACE_CTL_WRITE,
+						&temp, sizeof(temp), NULL, 0);
 		if (ret) {
 			pr_err("IOSS TRACE_CTL Verbosity Set Failed\n");
 			goto out;
@@ -1124,6 +1115,8 @@ static int telemetry_pltdrv_probe(struct platform_device *pdev)
 
 	telm_conf = (struct telemetry_plt_config *)id->driver_data;
 
+	telm_conf->pmc = dev_get_drvdata(pdev->dev.parent);
+
 	mem = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(mem))
 		return PTR_ERR(mem);
@@ -1136,6 +1129,12 @@ static int telemetry_pltdrv_probe(struct platform_device *pdev)
 
 	telm_conf->ioss_config.regmap = mem;
 
+	telm_conf->scu = devm_intel_scu_ipc_dev_get(&pdev->dev);
+	if (!telm_conf->scu) {
+		ret = -EPROBE_DEFER;
+		goto out;
+	}
+
 	mutex_init(&telm_conf->telem_lock);
 	mutex_init(&telm_conf->telem_trace_lock);
 
diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
index c0bb1f864dfe..dd900a76d8de 100644
--- a/drivers/platform/x86/lg-laptop.c
+++ b/drivers/platform/x86/lg-laptop.c
@@ -67,9 +67,7 @@ static u32 inited;
 #define INIT_INPUT_WMI_0        0x01
 #define INIT_INPUT_WMI_2        0x02
 #define INIT_INPUT_ACPI         0x04
-#define INIT_TPAD_LED           0x08
-#define INIT_KBD_LED            0x10
-#define INIT_SPARSE_KEYMAP        0x80
+#define INIT_SPARSE_KEYMAP      0x80
 
 static const struct key_entry wmi_keymap[] = {
 	{KE_KEY, 0x70, {KEY_F15} },	 /* LG control panel (F1) */
@@ -626,11 +624,9 @@ static int acpi_add(struct acpi_device *device)
 	if (ret)
 		goto out_platform_device;
 
-	if (!led_classdev_register(&pf_device->dev, &kbd_backlight))
-		inited |= INIT_KBD_LED;
-
-	if (!led_classdev_register(&pf_device->dev, &tpad_led))
-		inited |= INIT_TPAD_LED;
+	/* LEDs are optional */
+	led_classdev_register(&pf_device->dev, &kbd_backlight);
+	led_classdev_register(&pf_device->dev, &tpad_led);
 
 	wmi_input_setup();
 
@@ -646,11 +642,9 @@ out_platform_registered:
 static int acpi_remove(struct acpi_device *device)
 {
 	sysfs_remove_group(&pf_device->dev.kobj, &dev_attribute_group);
-	if (inited & INIT_KBD_LED)
-		led_classdev_unregister(&kbd_backlight);
 
-	if (inited & INIT_TPAD_LED)
-		led_classdev_unregister(&tpad_led);
+	led_classdev_unregister(&tpad_led);
+	led_classdev_unregister(&kbd_backlight);
 
 	wmi_input_destroy();
 	platform_device_unregister(pf_device);
diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index 23e40aa2176e..d5cec6e35bb8 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c
@@ -1138,8 +1138,7 @@ static enum led_brightness kbd_led_get(struct led_classdev *led_cdev)
 
 static void samsung_leds_exit(struct samsung_laptop *samsung)
 {
-	if (!IS_ERR_OR_NULL(samsung->kbd_led.dev))
-		led_classdev_unregister(&samsung->kbd_led);
+	led_classdev_unregister(&samsung->kbd_led);
 	if (samsung->led_workqueue)
 		destroy_workqueue(samsung->led_workqueue);
 }
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 51309f7ceede..e5a1b5533408 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -757,33 +757,6 @@ static union acpi_object *__call_snc_method(acpi_handle handle, char *method,
 	return result;
 }
 
-static int sony_nc_int_call(acpi_handle handle, char *name, int *value,
-		int *result)
-{
-	union acpi_object *object = NULL;
-	if (value) {
-		u64 v = *value;
-		object = __call_snc_method(handle, name, &v);
-	} else
-		object = __call_snc_method(handle, name, NULL);
-
-	if (!object)
-		return -EINVAL;
-
-	if (object->type != ACPI_TYPE_INTEGER) {
-		pr_warn("Invalid acpi_object: expected 0x%x got 0x%x\n",
-				ACPI_TYPE_INTEGER, object->type);
-		kfree(object);
-		return -EINVAL;
-	}
-
-	if (result)
-		*result = object->integer.value;
-
-	kfree(object);
-	return 0;
-}
-
 #define MIN(a, b)	(a > b ? b : a)
 static int sony_nc_buffer_call(acpi_handle handle, char *name, u64 *value,
 		void *buffer, size_t buflen)
@@ -795,17 +768,20 @@ static int sony_nc_buffer_call(acpi_handle handle, char *name, u64 *value,
 	if (!object)
 		return -EINVAL;
 
-	if (object->type == ACPI_TYPE_BUFFER) {
+	if (!buffer) {
+		/* do nothing */
+	} else if (object->type == ACPI_TYPE_BUFFER) {
 		len = MIN(buflen, object->buffer.length);
+		memset(buffer, 0, buflen);
 		memcpy(buffer, object->buffer.pointer, len);
 
 	} else if (object->type == ACPI_TYPE_INTEGER) {
 		len = MIN(buflen, sizeof(object->integer.value));
+		memset(buffer, 0, buflen);
 		memcpy(buffer, &object->integer.value, len);
 
 	} else {
-		pr_warn("Invalid acpi_object: expected 0x%x got 0x%x\n",
-				ACPI_TYPE_BUFFER, object->type);
+		pr_warn("Unexpected acpi_object: 0x%x\n", object->type);
 		ret = -EINVAL;
 	}
 
@@ -813,6 +789,23 @@ static int sony_nc_buffer_call(acpi_handle handle, char *name, u64 *value,
 	return ret;
 }
 
+static int sony_nc_int_call(acpi_handle handle, char *name, int *value, int
+		*result)
+{
+	int ret;
+
+	if (value) {
+		u64 v = *value;
+
+		ret = sony_nc_buffer_call(handle, name, &v, result,
+				sizeof(*result));
+	} else {
+		ret =  sony_nc_buffer_call(handle, name, NULL, result,
+				sizeof(*result));
+	}
+	return ret;
+}
+
 struct sony_nc_handles {
 	u16 cap[0x10];
 	struct device_attribute devattr;
@@ -2295,7 +2288,12 @@ static void sony_nc_thermal_cleanup(struct platform_device *pd)
 #ifdef CONFIG_PM_SLEEP
 static void sony_nc_thermal_resume(void)
 {
-	unsigned int status = sony_nc_thermal_mode_get();
+	int status;
+
+	if (!th_handle)
+		return;
+
+	status = sony_nc_thermal_mode_get();
 
 	if (status != th_handle->mode)
 		sony_nc_thermal_mode_set(th_handle->mode);
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 0f704484ae1d..ff7f0a4f2475 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -318,6 +318,7 @@ static struct {
 	u32 uwb:1;
 	u32 fan_ctrl_status_undef:1;
 	u32 second_fan:1;
+	u32 second_fan_ctl:1;
 	u32 beep_needs_two_args:1;
 	u32 mixer_no_level_control:1;
 	u32 battery_force_primary:1;
@@ -884,20 +885,11 @@ static ssize_t dispatch_proc_write(struct file *file,
 
 	if (!ibm || !ibm->write)
 		return -EINVAL;
-	if (count > PAGE_SIZE - 2)
-		return -EINVAL;
-
-	kernbuf = kmalloc(count + 2, GFP_KERNEL);
-	if (!kernbuf)
-		return -ENOMEM;
 
-	if (copy_from_user(kernbuf, userbuf, count)) {
-		kfree(kernbuf);
-		return -EFAULT;
-	}
+	kernbuf = strndup_user(userbuf, PAGE_SIZE);
+	if (IS_ERR(kernbuf))
+		return PTR_ERR(kernbuf);
 
-	kernbuf[count] = 0;
-	strcat(kernbuf, ",");
 	ret = ibm->write(kernbuf);
 	if (ret == 0)
 		ret = count;
@@ -915,23 +907,6 @@ static const struct proc_ops dispatch_proc_ops = {
 	.proc_write	= dispatch_proc_write,
 };
 
-static char *next_cmd(char **cmds)
-{
-	char *start = *cmds;
-	char *end;
-
-	while ((end = strchr(start, ',')) && end == start)
-		start = end + 1;
-
-	if (!end)
-		return NULL;
-
-	*end = 0;
-	*cmds = end + 1;
-	return start;
-}
-
-
 /****************************************************************************
  ****************************************************************************
  *
@@ -1422,7 +1397,7 @@ static int tpacpi_rfk_procfs_write(const enum tpacpi_rfk_id id, char *buf)
 	if (id >= TPACPI_RFK_SW_MAX)
 		return -ENODEV;
 
-	while ((cmd = next_cmd(&buf))) {
+	while ((cmd = strsep(&buf, ","))) {
 		if (strlencmp(cmd, "enable") == 0)
 			status = TPACPI_RFK_RADIO_ON;
 		else if (strlencmp(cmd, "disable") == 0)
@@ -4305,7 +4280,7 @@ static int hotkey_write(char *buf)
 	mask = hotkey_user_mask;
 
 	res = 0;
-	while ((cmd = next_cmd(&buf))) {
+	while ((cmd = strsep(&buf, ","))) {
 		if (strlencmp(cmd, "enable") == 0) {
 			hotkey_enabledisable_warn(1);
 		} else if (strlencmp(cmd, "disable") == 0) {
@@ -5232,7 +5207,7 @@ static int video_write(char *buf)
 	enable = 0;
 	disable = 0;
 
-	while ((cmd = next_cmd(&buf))) {
+	while ((cmd = strsep(&buf, ","))) {
 		if (strlencmp(cmd, "lcd_enable") == 0) {
 			enable |= TP_ACPI_VIDEO_S_LCD;
 		} else if (strlencmp(cmd, "lcd_disable") == 0) {
@@ -5433,8 +5408,7 @@ static int __init kbdlight_init(struct ibm_init_struct *iibm)
 
 static void kbdlight_exit(void)
 {
-	if (tp_features.kbdlight)
-		led_classdev_unregister(&tpacpi_led_kbdlight.led_classdev);
+	led_classdev_unregister(&tpacpi_led_kbdlight.led_classdev);
 }
 
 static int kbdlight_set_level_and_update(int level)
@@ -5472,23 +5446,18 @@ static int kbdlight_read(struct seq_file *m)
 static int kbdlight_write(char *buf)
 {
 	char *cmd;
-	int level = -1;
+	int res, level = -EINVAL;
 
 	if (!tp_features.kbdlight)
 		return -ENODEV;
 
-	while ((cmd = next_cmd(&buf))) {
-		if (strlencmp(cmd, "0") == 0)
-			level = 0;
-		else if (strlencmp(cmd, "1") == 0)
-			level = 1;
-		else if (strlencmp(cmd, "2") == 0)
-			level = 2;
-		else
-			return -EINVAL;
+	while ((cmd = strsep(&buf, ","))) {
+		res = kstrtoint(cmd, 10, &level);
+		if (res < 0)
+			return res;
 	}
 
-	if (level == -1)
+	if (level >= 3 || level < 0)
 		return -EINVAL;
 
 	return kbdlight_set_level_and_update(level);
@@ -5657,7 +5626,7 @@ static int light_write(char *buf)
 	if (!tp_features.light)
 		return -ENODEV;
 
-	while ((cmd = next_cmd(&buf))) {
+	while ((cmd = strsep(&buf, ","))) {
 		if (strlencmp(cmd, "on") == 0) {
 			newstatus = 1;
 		} else if (strlencmp(cmd, "off") == 0) {
@@ -5742,7 +5711,7 @@ static int cmos_write(char *buf)
 	char *cmd;
 	int cmos_cmd, res;
 
-	while ((cmd = next_cmd(&buf))) {
+	while ((cmd = strsep(&buf, ","))) {
 		if (sscanf(cmd, "%u", &cmos_cmd) == 1 &&
 		    cmos_cmd >= 0 && cmos_cmd <= 21) {
 			/* cmos_cmd set */
@@ -5948,20 +5917,14 @@ static void led_exit(void)
 {
 	unsigned int i;
 
-	for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
-		if (tpacpi_leds[i].led_classdev.name)
-			led_classdev_unregister(&tpacpi_leds[i].led_classdev);
-	}
+	for (i = 0; i < TPACPI_LED_NUMLEDS; i++)
+		led_classdev_unregister(&tpacpi_leds[i].led_classdev);
 
 	kfree(tpacpi_leds);
 }
 
 static int __init tpacpi_init_led(unsigned int led)
 {
-	int rc;
-
-	tpacpi_leds[led].led = led;
-
 	/* LEDs with no name don't get registered */
 	if (!tpacpi_led_names[led])
 		return 0;
@@ -5969,17 +5932,12 @@ static int __init tpacpi_init_led(unsigned int led)
 	tpacpi_leds[led].led_classdev.brightness_set_blocking = &led_sysfs_set;
 	tpacpi_leds[led].led_classdev.blink_set = &led_sysfs_blink_set;
 	if (led_supported == TPACPI_LED_570)
-		tpacpi_leds[led].led_classdev.brightness_get =
-						&led_sysfs_get;
+		tpacpi_leds[led].led_classdev.brightness_get = &led_sysfs_get;
 
 	tpacpi_leds[led].led_classdev.name = tpacpi_led_names[led];
+	tpacpi_leds[led].led = led;
 
-	rc = led_classdev_register(&tpacpi_pdev->dev,
-				&tpacpi_leds[led].led_classdev);
-	if (rc < 0)
-		tpacpi_leds[led].led_classdev.name = NULL;
-
-	return rc;
+	return led_classdev_register(&tpacpi_pdev->dev, &tpacpi_leds[led].led_classdev);
 }
 
 static const struct tpacpi_quirk led_useful_qtable[] __initconst = {
@@ -6089,8 +6047,7 @@ static int __init led_init(struct ibm_init_struct *iibm)
 	for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
 		tpacpi_leds[i].led = -1;
 
-		if (!tpacpi_is_led_restricted(i) &&
-		    test_bit(i, &useful_leds)) {
+		if (!tpacpi_is_led_restricted(i) && test_bit(i, &useful_leds)) {
 			rc = tpacpi_init_led(i);
 			if (rc < 0) {
 				led_exit();
@@ -6143,12 +6100,14 @@ static int led_write(char *buf)
 	if (!led_supported)
 		return -ENODEV;
 
-	while ((cmd = next_cmd(&buf))) {
+	while ((cmd = strsep(&buf, ","))) {
 		if (sscanf(cmd, "%d", &led) != 1)
 			return -EINVAL;
 
-		if (led < 0 || led > (TPACPI_LED_NUMLEDS - 1) ||
-				tpacpi_leds[led].led < 0)
+		if (led < 0 || led > (TPACPI_LED_NUMLEDS - 1))
+			return -ENODEV;
+
+		if (tpacpi_leds[led].led < 0)
 			return -ENODEV;
 
 		if (strstr(cmd, "off")) {
@@ -6228,7 +6187,7 @@ static int beep_write(char *buf)
 	if (!beep_handle)
 		return -ENODEV;
 
-	while ((cmd = next_cmd(&buf))) {
+	while ((cmd = strsep(&buf, ","))) {
 		if (sscanf(cmd, "%u", &beep_cmd) == 1 &&
 		    beep_cmd >= 0 && beep_cmd <= 17) {
 			/* beep_cmd set */
@@ -7116,7 +7075,7 @@ static int brightness_write(char *buf)
 	if (level < 0)
 		return level;
 
-	while ((cmd = next_cmd(&buf))) {
+	while ((cmd = strsep(&buf, ","))) {
 		if (strlencmp(cmd, "up") == 0) {
 			if (level < bright_maxlvl)
 				level++;
@@ -7868,7 +7827,7 @@ static int volume_write(char *buf)
 	new_level = s & TP_EC_AUDIO_LVL_MSK;
 	new_mute  = s & TP_EC_AUDIO_MUTESW_MSK;
 
-	while ((cmd = next_cmd(&buf))) {
+	while ((cmd = strsep(&buf, ","))) {
 		if (!tp_features.mixer_no_level_control) {
 			if (strlencmp(cmd, "up") == 0) {
 				if (new_mute)
@@ -8324,11 +8283,19 @@ static int fan_set_level(int level)
 
 	switch (fan_control_access_mode) {
 	case TPACPI_FAN_WR_ACPI_SFAN:
-		if (level >= 0 && level <= 7) {
-			if (!acpi_evalf(sfan_handle, NULL, NULL, "vd", level))
-				return -EIO;
-		} else
+		if ((level < 0) || (level > 7))
 			return -EINVAL;
+
+		if (tp_features.second_fan_ctl) {
+			if (!fan_select_fan2() ||
+			    !acpi_evalf(sfan_handle, NULL, NULL, "vd", level)) {
+				pr_warn("Couldn't set 2nd fan level, disabling support\n");
+				tp_features.second_fan_ctl = 0;
+			}
+			fan_select_fan1();
+		}
+		if (!acpi_evalf(sfan_handle, NULL, NULL, "vd", level))
+			return -EIO;
 		break;
 
 	case TPACPI_FAN_WR_ACPI_FANS:
@@ -8345,6 +8312,15 @@ static int fan_set_level(int level)
 		else if (level & TP_EC_FAN_AUTO)
 			level |= 4;	/* safety min speed 4 */
 
+		if (tp_features.second_fan_ctl) {
+			if (!fan_select_fan2() ||
+			    !acpi_ec_write(fan_status_offset, level)) {
+				pr_warn("Couldn't set 2nd fan level, disabling support\n");
+				tp_features.second_fan_ctl = 0;
+			}
+			fan_select_fan1();
+
+		}
 		if (!acpi_ec_write(fan_status_offset, level))
 			return -EIO;
 		else
@@ -8763,6 +8739,7 @@ static const struct attribute_group fan_attr_group = {
 
 #define TPACPI_FAN_Q1	0x0001		/* Unitialized HFSP */
 #define TPACPI_FAN_2FAN	0x0002		/* EC 0x31 bit 0 selects fan2 */
+#define TPACPI_FAN_2CTL	0x0004		/* selects fan2 control */
 
 static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
 	TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1),
@@ -8771,6 +8748,13 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
 	TPACPI_QEC_IBM('7', '0', TPACPI_FAN_Q1),
 	TPACPI_QEC_LNV('7', 'M', TPACPI_FAN_2FAN),
 	TPACPI_Q_LNV('N', '1', TPACPI_FAN_2FAN),
+	TPACPI_Q_LNV3('N', '1', 'D', TPACPI_FAN_2CTL),	/* P70 */
+	TPACPI_Q_LNV3('N', '1', 'E', TPACPI_FAN_2CTL),	/* P50 */
+	TPACPI_Q_LNV3('N', '1', 'T', TPACPI_FAN_2CTL),	/* P71 */
+	TPACPI_Q_LNV3('N', '1', 'U', TPACPI_FAN_2CTL),	/* P51 */
+	TPACPI_Q_LNV3('N', '2', 'C', TPACPI_FAN_2CTL),	/* P52 / P72 */
+	TPACPI_Q_LNV3('N', '2', 'E', TPACPI_FAN_2CTL),	/* P1 / X1 Extreme (1st gen) */
+	TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL),	/* P1 / X1 Extreme (2nd gen) */
 };
 
 static int __init fan_init(struct ibm_init_struct *iibm)
@@ -8788,6 +8772,7 @@ static int __init fan_init(struct ibm_init_struct *iibm)
 	fan_watchdog_maxinterval = 0;
 	tp_features.fan_ctrl_status_undef = 0;
 	tp_features.second_fan = 0;
+	tp_features.second_fan_ctl = 0;
 	fan_control_desired_level = 7;
 
 	if (tpacpi_is_ibm()) {
@@ -8812,8 +8797,12 @@ static int __init fan_init(struct ibm_init_struct *iibm)
 				fan_quirk1_setup();
 			if (quirks & TPACPI_FAN_2FAN) {
 				tp_features.second_fan = 1;
-				dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_FAN,
-					"secondary fan support enabled\n");
+				pr_info("secondary fan support enabled\n");
+			}
+			if (quirks & TPACPI_FAN_2CTL) {
+				tp_features.second_fan = 1;
+				tp_features.second_fan_ctl = 1;
+				pr_info("secondary fan control enabled\n");
 			}
 		} else {
 			pr_err("ThinkPad ACPI EC access misbehaving, fan status and control unavailable\n");
@@ -9148,7 +9137,7 @@ static int fan_write(char *buf)
 	char *cmd;
 	int rc = 0;
 
-	while (!rc && (cmd = next_cmd(&buf))) {
+	while (!rc && (cmd = strsep(&buf, ","))) {
 		if (!((fan_control_commands & TPACPI_FAN_CMD_LEVEL) &&
 		      fan_write_cmd_level(cmd, &rc)) &&
 		    !((fan_control_commands & TPACPI_FAN_CMD_ENABLE) &&
@@ -9271,10 +9260,8 @@ static int mute_led_init(struct ibm_init_struct *iibm)
 		mute_led_cdev[i].brightness = ledtrig_audio_get(i);
 		err = led_classdev_register(&tpacpi_pdev->dev, &mute_led_cdev[i]);
 		if (err < 0) {
-			while (i--) {
-				if (led_tables[i].state >= 0)
-					led_classdev_unregister(&mute_led_cdev[i]);
-			}
+			while (i--)
+				led_classdev_unregister(&mute_led_cdev[i]);
 			return err;
 		}
 	}
@@ -9286,10 +9273,8 @@ static void mute_led_exit(void)
 	int i;
 
 	for (i = 0; i < TPACPI_LED_MAX; i++) {
-		if (led_tables[i].state >= 0) {
-			led_classdev_unregister(&mute_led_cdev[i]);
-			tpacpi_led_set(i, false);
-		}
+		led_classdev_unregister(&mute_led_cdev[i]);
+		tpacpi_led_set(i, false);
 	}
 }
 
@@ -9786,19 +9771,18 @@ static int lcdshadow_read(struct seq_file *m)
 static int lcdshadow_write(char *buf)
 {
 	char *cmd;
-	int state = -1;
+	int res, state = -EINVAL;
 
 	if (lcdshadow_state < 0)
 		return -ENODEV;
 
-	while ((cmd = next_cmd(&buf))) {
-		if (strlencmp(cmd, "0") == 0)
-			state = 0;
-		else if (strlencmp(cmd, "1") == 0)
-			state = 1;
+	while ((cmd = strsep(&buf, ","))) {
+		res = kstrtoint(cmd, 10, &state);
+		if (res < 0)
+			return res;
 	}
 
-	if (state == -1)
+	if (state >= 2 || state < 0)
 		return -EINVAL;
 
 	return lcdshadow_set(state);
@@ -10314,10 +10298,9 @@ static int __init set_ibm_param(const char *val, const struct kernel_param *kp)
 			continue;
 
 		if (strcmp(ibm->name, kp->name) == 0 && ibm->write) {
-			if (strlen(val) > sizeof(ibms_init[i].param) - 2)
+			if (strlen(val) > sizeof(ibms_init[i].param) - 1)
 				return -ENOSPC;
 			strcpy(ibms_init[i].param, val);
-			strcat(ibms_init[i].param, ",");
 			return 0;
 		}
 	}
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 808944546739..1ddab5a6dead 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -205,9 +205,6 @@ struct toshiba_acpi_dev {
 	unsigned int special_functions;
 
 	bool kbd_event_generated;
-	bool kbd_led_registered;
-	bool illumination_led_registered;
-	bool eco_led_registered;
 	bool killswitch;
 };
 
@@ -458,7 +455,6 @@ static void toshiba_illumination_available(struct toshiba_acpi_dev *dev)
 	acpi_status status;
 
 	dev->illumination_supported = 0;
-	dev->illumination_led_registered = false;
 
 	if (!sci_open(dev))
 		return;
@@ -528,7 +524,6 @@ static void toshiba_kbd_illum_available(struct toshiba_acpi_dev *dev)
 	acpi_status status;
 
 	dev->kbd_illum_supported = 0;
-	dev->kbd_led_registered = false;
 	dev->kbd_event_generated = false;
 
 	if (!sci_open(dev))
@@ -673,7 +668,6 @@ static void toshiba_eco_mode_available(struct toshiba_acpi_dev *dev)
 	acpi_status status;
 
 	dev->eco_supported = 0;
-	dev->eco_led_registered = false;
 
 	status = tci_raw(dev, in, out);
 	if (ACPI_FAILURE(status)) {
@@ -2993,14 +2987,9 @@ static int toshiba_acpi_remove(struct acpi_device *acpi_dev)
 
 	backlight_device_unregister(dev->backlight_dev);
 
-	if (dev->illumination_led_registered)
-		led_classdev_unregister(&dev->led_dev);
-
-	if (dev->kbd_led_registered)
-		led_classdev_unregister(&dev->kbd_led);
-
-	if (dev->eco_led_registered)
-		led_classdev_unregister(&dev->eco_led);
+	led_classdev_unregister(&dev->led_dev);
+	led_classdev_unregister(&dev->kbd_led);
+	led_classdev_unregister(&dev->eco_led);
 
 	if (dev->wwan_rfk) {
 		rfkill_unregister(dev->wwan_rfk);
@@ -3092,8 +3081,7 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 		dev->led_dev.max_brightness = 1;
 		dev->led_dev.brightness_set = toshiba_illumination_set;
 		dev->led_dev.brightness_get = toshiba_illumination_get;
-		if (!led_classdev_register(&acpi_dev->dev, &dev->led_dev))
-			dev->illumination_led_registered = true;
+		led_classdev_register(&acpi_dev->dev, &dev->led_dev);
 	}
 
 	toshiba_eco_mode_available(dev);
@@ -3102,8 +3090,7 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 		dev->eco_led.max_brightness = 1;
 		dev->eco_led.brightness_set = toshiba_eco_mode_set_status;
 		dev->eco_led.brightness_get = toshiba_eco_mode_get_status;
-		if (!led_classdev_register(&dev->acpi_dev->dev, &dev->eco_led))
-			dev->eco_led_registered = true;
+		led_classdev_register(&dev->acpi_dev->dev, &dev->eco_led);
 	}
 
 	toshiba_kbd_illum_available(dev);
@@ -3119,8 +3106,7 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 		dev->kbd_led.max_brightness = 1;
 		dev->kbd_led.brightness_set = toshiba_kbd_backlight_set;
 		dev->kbd_led.brightness_get = toshiba_kbd_backlight_get;
-		if (!led_classdev_register(&dev->acpi_dev->dev, &dev->kbd_led))
-			dev->kbd_led_registered = true;
+		led_classdev_register(&dev->acpi_dev->dev, &dev->kbd_led);
 	}
 
 	ret = toshiba_touchpad_get(dev, &dummy);
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 6ec8923dec1a..5c223015ee71 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -373,6 +373,23 @@ static const struct ts_dmi_data jumper_ezpad_mini3_data = {
 	.properties	= jumper_ezpad_mini3_props,
 };
 
+static const struct property_entry mpman_mpwin895cl_props[] = {
+	PROPERTY_ENTRY_U32("touchscreen-min-x", 3),
+	PROPERTY_ENTRY_U32("touchscreen-min-y", 9),
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1728),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 1150),
+	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+	PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-mpman-mpwin895cl.fw"),
+	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+	PROPERTY_ENTRY_BOOL("silead,home-button"),
+	{ }
+};
+
+static const struct ts_dmi_data mpman_mpwin895cl_data = {
+	.acpi_name	= "MSSL1680:00",
+	.properties	= mpman_mpwin895cl_props,
+};
+
 static const struct property_entry myria_my8307_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1720),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
@@ -448,6 +465,24 @@ static const struct ts_dmi_data onda_v820w_32g_data = {
 	.properties	= onda_v820w_32g_props,
 };
 
+static const struct property_entry onda_v891_v5_props[] = {
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1715),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
+	PROPERTY_ENTRY_BOOL("touchscreen-inverted-x"),
+	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+	PROPERTY_ENTRY_STRING("firmware-name",
+			      "gsl3676-onda-v891-v5.fw"),
+	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+	PROPERTY_ENTRY_BOOL("silead,home-button"),
+	{ }
+};
+
+static const struct ts_dmi_data onda_v891_v5_data = {
+	.acpi_name	= "MSSL1680:00",
+	.properties	= onda_v891_v5_props,
+};
+
 static const struct property_entry onda_v891w_v1_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-min-x", 46),
 	PROPERTY_ENTRY_U32("touchscreen-min-y",  8),
@@ -588,6 +623,22 @@ static const struct ts_dmi_data schneider_sct101ctm_data = {
 	.properties	= schneider_sct101ctm_props,
 };
 
+static const struct property_entry techbite_arc_11_6_props[] = {
+	PROPERTY_ENTRY_U32("touchscreen-min-x", 5),
+	PROPERTY_ENTRY_U32("touchscreen-min-y", 7),
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1981),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 1270),
+	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-techbite-arc-11-6.fw"),
+	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+	{ }
+};
+
+static const struct ts_dmi_data techbite_arc_11_6_data = {
+	.acpi_name	= "MSSL1680:00",
+	.properties	= techbite_arc_11_6_props,
+};
+
 static const struct property_entry teclast_x3_plus_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1980),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1500),
@@ -662,11 +713,14 @@ static const struct ts_dmi_data trekstor_primetab_t13b_data = {
 };
 
 static const struct property_entry trekstor_surftab_twin_10_1_props[] = {
-	PROPERTY_ENTRY_U32("touchscreen-size-x", 1900),
+	PROPERTY_ENTRY_U32("touchscreen-min-x", 20),
+	PROPERTY_ENTRY_U32("touchscreen-min-y", 0),
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1890),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
 	PROPERTY_ENTRY_U32("touchscreen-inverted-y", 1),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-surftab-twin-10-1-st10432-8.fw"),
 	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
 
@@ -691,6 +745,20 @@ static const struct ts_dmi_data trekstor_surftab_wintron70_data = {
 	.properties	= trekstor_surftab_wintron70_props,
 };
 
+static const struct property_entry vinga_twizzle_j116_props[] = {
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1920),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
+	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-vinga-twizzle_j116.fw"),
+	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+	PROPERTY_ENTRY_BOOL("silead,home-button"),
+	{ }
+};
+
+static const struct ts_dmi_data vinga_twizzle_j116_data = {
+	.acpi_name	= "MSSL1680:00",
+	.properties	= vinga_twizzle_j116_props,
+};
+
 /* NOTE: Please keep this table sorted alphabetically */
 const struct dmi_system_id touchscreen_dmi_table[] = {
 	{
@@ -909,6 +977,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 		},
 	},
 	{
+		/* MP Man MPWIN895CL */
+		.driver_data = (void *)&mpman_mpwin895cl_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "MPMAN"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MPWIN8900CL"),
+		},
+	},
+	{
 		/* Myria MY8307 */
 		.driver_data = (void *)&myria_my8307_data,
 		.matches = {
@@ -941,6 +1017,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 		},
 	},
 	{
+		/* ONDA V891 v5 */
+		.driver_data = (void *)&onda_v891_v5_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ONDA"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "ONDA Tablet"),
+			DMI_MATCH(DMI_BIOS_VERSION, "ONDA.D869CJABNRBA06"),
+		},
+	},
+	{
 		/* ONDA V891w revision P891WBEBV1B00 aka v1 */
 		.driver_data = (void *)&onda_v891w_v1_data,
 		.matches = {
@@ -1030,6 +1115,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 		},
 	},
 	{
+		/* Techbite Arc 11.6 */
+		.driver_data = (void *)&techbite_arc_11_6_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "mPTech"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "techBite Arc 11.6"),
+			DMI_MATCH(DMI_BOARD_NAME, "G8316_272B"),
+		},
+	},
+	{
 		/* Teclast X3 Plus */
 		.driver_data = (void *)&teclast_x3_plus_data,
 		.matches = {
@@ -1107,6 +1201,21 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 		},
 	},
 	{
+		/* Trekstor Yourbook C11B (same touchscreen as the Primebook C11) */
+		.driver_data = (void *)&trekstor_primebook_c11_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "TREKSTOR"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "YOURBOOK C11B"),
+		},
+	},
+	{
+		/* Vinga Twizzle J116 */
+		.driver_data = (void *)&vinga_twizzle_j116_data,
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "VINGA Twizzle J116"),
+		},
+	},
+	{
 		/* Yours Y8W81, same case and touchscreen as Chuwi Vi8 */
 		.driver_data = (void *)&chuwi_vi8_data,
 		.matches = {
@@ -1114,7 +1223,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Y8W81"),
 		},
 	},
-	{ },
+	{ }
 };
 
 static const struct ts_dmi_data *ts_data;
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 941739db7199..d88f388a3450 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -111,11 +111,11 @@ static struct platform_driver acpi_wmi_driver = {
 
 static bool find_guid(const char *guid_string, struct wmi_block **out)
 {
-	uuid_le guid_input;
+	guid_t guid_input;
 	struct wmi_block *wblock;
 	struct guid_block *block;
 
-	if (uuid_le_to_bin(guid_string, &guid_input))
+	if (guid_parse(guid_string, &guid_input))
 		return false;
 
 	list_for_each_entry(wblock, &wmi_block_list, list) {
@@ -134,7 +134,7 @@ static const void *find_guid_context(struct wmi_block *wblock,
 				      struct wmi_driver *wdriver)
 {
 	const struct wmi_device_id *id;
-	uuid_le guid_input;
+	guid_t guid_input;
 
 	if (wblock == NULL || wdriver == NULL)
 		return NULL;
@@ -143,7 +143,7 @@ static const void *find_guid_context(struct wmi_block *wblock,
 
 	id = wdriver->id_table;
 	while (*id->guid_string) {
-		if (uuid_le_to_bin(id->guid_string, &guid_input))
+		if (guid_parse(id->guid_string, &guid_input))
 			continue;
 		if (!memcmp(wblock->gblock.guid, &guid_input, 16))
 			return id->context;
@@ -202,7 +202,7 @@ static acpi_status wmi_method_enable(struct wmi_block *wblock, int enable)
 /**
  * set_required_buffer_size - Sets the buffer size needed for performing IOCTL
  * @wdev: A wmi bus device from a driver
- * @instance: Instance index
+ * @length: Required buffer size
  *
  * Allocates memory needed for buffer, stores the buffer size in that memory
  */
@@ -222,8 +222,8 @@ EXPORT_SYMBOL_GPL(set_required_buffer_size);
  * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
  * @instance: Instance index
  * @method_id: Method ID to call
- * &in: Buffer containing input for the method call
- * &out: Empty buffer to return the method results
+ * @in: Buffer containing input for the method call
+ * @out: Empty buffer to return the method results
  *
  * Call an ACPI-WMI method
  */
@@ -244,8 +244,8 @@ EXPORT_SYMBOL_GPL(wmi_evaluate_method);
  * @wdev: A wmi bus device from a driver
  * @instance: Instance index
  * @method_id: Method ID to call
- * &in: Buffer containing input for the method call
- * &out: Empty buffer to return the method results
+ * @in: Buffer containing input for the method call
+ * @out: Empty buffer to return the method results
  *
  * Call an ACPI-WMI method
  */
@@ -364,7 +364,7 @@ static acpi_status __query_block(struct wmi_block *wblock, u8 instance,
  * wmi_query_block - Return contents of a WMI block (deprecated)
  * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
  * @instance: Instance index
- * &out: Empty buffer to return the contents of the data block to
+ * @out: Empty buffer to return the contents of the data block to
  *
  * Return the contents of an ACPI-WMI data block to a buffer
  */
@@ -399,7 +399,7 @@ EXPORT_SYMBOL_GPL(wmidev_block_query);
  * wmi_set_block - Write to a WMI block
  * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
  * @instance: Instance index
- * &in: Buffer containing new values for the data block
+ * @in: Buffer containing new values for the data block
  *
  * Write the contents of the input buffer to an ACPI-WMI data block
  */
@@ -510,6 +510,7 @@ static void wmi_notify_debug(u32 value, void *context)
 
 /**
  * wmi_install_notify_handler - Register handler for WMI events
+ * @guid: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
  * @handler: Function to handle notifications
  * @data: Data to be returned to handler when event is fired
  *
@@ -520,12 +521,12 @@ wmi_notify_handler handler, void *data)
 {
 	struct wmi_block *block;
 	acpi_status status = AE_NOT_EXIST;
-	uuid_le guid_input;
+	guid_t guid_input;
 
 	if (!guid || !handler)
 		return AE_BAD_PARAMETER;
 
-	if (uuid_le_to_bin(guid, &guid_input))
+	if (guid_parse(guid, &guid_input))
 		return AE_BAD_PARAMETER;
 
 	list_for_each_entry(block, &wmi_block_list, list) {
@@ -552,6 +553,7 @@ EXPORT_SYMBOL_GPL(wmi_install_notify_handler);
 
 /**
  * wmi_uninstall_notify_handler - Unregister handler for WMI events
+ * @guid: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
  *
  * Unregister handler for events sent to the ACPI-WMI mapper device.
  */
@@ -559,12 +561,12 @@ acpi_status wmi_remove_notify_handler(const char *guid)
 {
 	struct wmi_block *block;
 	acpi_status status = AE_NOT_EXIST;
-	uuid_le guid_input;
+	guid_t guid_input;
 
 	if (!guid)
 		return AE_BAD_PARAMETER;
 
-	if (uuid_le_to_bin(guid, &guid_input))
+	if (guid_parse(guid, &guid_input))
 		return AE_BAD_PARAMETER;
 
 	list_for_each_entry(block, &wmi_block_list, list) {
@@ -795,9 +797,9 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver)
 		return 0;
 
 	while (*id->guid_string) {
-		uuid_le driver_guid;
+		guid_t driver_guid;
 
-		if (WARN_ON(uuid_le_to_bin(id->guid_string, &driver_guid)))
+		if (WARN_ON(guid_parse(id->guid_string, &driver_guid)))
 			continue;
 		if (!memcmp(&driver_guid, wblock->gblock.guid, 16))
 			return 1;
@@ -1116,8 +1118,7 @@ static void wmi_free_devices(struct acpi_device *device)
 	}
 }
 
-static bool guid_already_parsed(struct acpi_device *device,
-				const u8 *guid)
+static bool guid_already_parsed(struct acpi_device *device, const u8 *guid)
 {
 	struct wmi_block *wblock;
 
@@ -1327,10 +1328,8 @@ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event,
 		wblock->handler(event, wblock->handler_data);
 	}
 
-	if (debug_event) {
-		pr_info("DEBUG Event GUID: %pUL\n",
-			wblock->gblock.guid);
-	}
+	if (debug_event)
+		pr_info("DEBUG Event GUID: %pUL\n", wblock->gblock.guid);
 
 	acpi_bus_generate_netlink_event(
 		wblock->acpi_device->pnp.device_class,
diff --git a/drivers/pnp/pnpbios/pnpbios.h b/drivers/pnp/pnpbios/pnpbios.h
index 37acb8378f39..2ce739ff9c1a 100644
--- a/drivers/pnp/pnpbios/pnpbios.h
+++ b/drivers/pnp/pnpbios/pnpbios.h
@@ -107,7 +107,7 @@ struct pnp_bios_node {
 	__u32 eisa_id;
 	__u8 type_code[3];
 	__u16 flags;
-	__u8 data[0];
+	__u8 data[];
 };
 #pragma pack()
 
diff --git a/drivers/power/supply/bd70528-charger.c b/drivers/power/supply/bd70528-charger.c
index b8e1ec106627..3b820110ecfa 100644
--- a/drivers/power/supply/bd70528-charger.c
+++ b/drivers/power/supply/bd70528-charger.c
@@ -335,14 +335,14 @@ static int bd70528_get_present(struct bd70528_psy *bdpsy, int *val)
 	return 0;
 }
 
-struct linear_range {
+struct bd70528_linear_range {
 	int min;
 	int step;
 	int vals;
 	int low_sel;
 };
 
-static const struct linear_range current_limit_ranges[] = {
+static const struct bd70528_linear_range current_limit_ranges[] = {
 	{
 		.min = 5,
 		.step = 1,
@@ -374,7 +374,7 @@ static const struct linear_range current_limit_ranges[] = {
  * voltage for low temperatures. The driver currently only reads
  * the charge current at room temperature. We do set both though.
  */
-static const struct linear_range warm_charge_curr[] = {
+static const struct bd70528_linear_range warm_charge_curr[] = {
 	{
 		.min = 10,
 		.step = 10,
@@ -398,7 +398,7 @@ static const struct linear_range warm_charge_curr[] = {
 #define MAX_WARM_CHG_CURR_SEL 0x1f
 #define MIN_CHG_CURR_SEL 0x0
 
-static int find_value_for_selector_low(const struct linear_range *r,
+static int find_value_for_selector_low(const struct bd70528_linear_range *r,
 				       int selectors, unsigned int sel,
 				       unsigned int *val)
 {
@@ -420,7 +420,7 @@ static int find_value_for_selector_low(const struct linear_range *r,
  * I guess it is enough if we use voltage/current which is closest (below)
  * the requested?
  */
-static int find_selector_for_value_low(const struct linear_range *r,
+static int find_selector_for_value_low(const struct bd70528_linear_range *r,
 				       int selectors, unsigned int val,
 				       unsigned int *sel, bool *found)
 {
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index eb328655bc01..61a63a16b5e7 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -26,9 +26,6 @@
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 
-/* Local defines */
-#define MSR_PLATFORM_POWER_LIMIT	0x0000065C
-
 /* bitmasks for RAPL MSRs, used by primitive access functions */
 #define ENERGY_STATUS_MASK      0xffffffff
 
@@ -989,6 +986,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	&rapl_defaults_core),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,	&rapl_defaults_core),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,	&rapl_defaults_core),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT,	&rapl_defaults_core),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&rapl_defaults_core),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,	&rapl_defaults_core),
 
diff --git a/drivers/regulator/88pg86x.c b/drivers/regulator/88pg86x.c
index d5ef55c81185..71cfa2c5de5e 100644
--- a/drivers/regulator/88pg86x.c
+++ b/drivers/regulator/88pg86x.c
@@ -11,13 +11,13 @@ static const struct regulator_ops pg86x_ops = {
 	.list_voltage = regulator_list_voltage_linear_range,
 };
 
-static const struct regulator_linear_range pg86x_buck1_ranges[] = {
+static const struct linear_range pg86x_buck1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(      0,  0, 10,     0),
 	REGULATOR_LINEAR_RANGE(1000000, 11, 34, 25000),
 	REGULATOR_LINEAR_RANGE(1600000, 35, 47, 50000),
 };
 
-static const struct regulator_linear_range pg86x_buck2_ranges[] = {
+static const struct linear_range pg86x_buck2_ranges[] = {
 	REGULATOR_LINEAR_RANGE(      0,  0, 15,     0),
 	REGULATOR_LINEAR_RANGE(1000000, 16, 39, 25000),
 	REGULATOR_LINEAR_RANGE(1600000, 40, 52, 50000),
diff --git a/drivers/regulator/88pm800-regulator.c b/drivers/regulator/88pm800-regulator.c
index 69ae25886181..d08ee81ed1ac 100644
--- a/drivers/regulator/88pm800-regulator.c
+++ b/drivers/regulator/88pm800-regulator.c
@@ -134,13 +134,13 @@ struct pm800_regulator_info {
 }
 
 /* Ranges are sorted in ascending order. */
-static const struct regulator_linear_range buck1_volt_range[] = {
+static const struct linear_range buck1_volt_range[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 0x4f, 12500),
 	REGULATOR_LINEAR_RANGE(1600000, 0x50, 0x54, 50000),
 };
 
 /* BUCK 2~5 have same ranges. */
-static const struct regulator_linear_range buck2_5_volt_range[] = {
+static const struct linear_range buck2_5_volt_range[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 0x4f, 12500),
 	REGULATOR_LINEAR_RANGE(1600000, 0x50, 0x72, 50000),
 };
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index f4b72cb098ef..8f677f5d79b4 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menuconfig REGULATOR
 	bool "Voltage and Current Regulator Support"
+	select LINEAR_RANGES
 	help
 	  Generic Voltage and Current Regulator support.
 
@@ -585,6 +586,16 @@ config REGULATOR_MAX77802
 	  Exynos5420/Exynos5800 SoCs to control various voltages.
 	  It includes support for control of voltage and ramp speed.
 
+config REGULATOR_MAX77826
+	tristate "Maxim 77826 regulator"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  This driver controls a Maxim 77826 regulator via I2C bus.
+	  The regulator include 15 LDOs, BUCK and BUCK BOOST regulator.
+	  It includes support for control of output voltage. This
+	  regulator is found on the Samsung Galaxy S5 (klte) smartphone.
+
 config REGULATOR_MC13XXX_CORE
 	tristate
 
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 6610ee001d9a..e8f163371071 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_REGULATOR_MAX8998) += max8998.o
 obj-$(CONFIG_REGULATOR_MAX77686) += max77686-regulator.o
 obj-$(CONFIG_REGULATOR_MAX77693) += max77693-regulator.o
 obj-$(CONFIG_REGULATOR_MAX77802) += max77802-regulator.o
+obj-$(CONFIG_REGULATOR_MAX77826) += max77826-regulator.o
 obj-$(CONFIG_REGULATOR_MC13783) += mc13783-regulator.o
 obj-$(CONFIG_REGULATOR_MC13892) += mc13892-regulator.o
 obj-$(CONFIG_REGULATOR_MC13XXX_CORE) +=  mc13xxx-regulator-core.o
diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c
index f60e1b26c2d2..716ca5bb178e 100644
--- a/drivers/regulator/ab8500.c
+++ b/drivers/regulator/ab8500.c
@@ -139,17 +139,6 @@ static const unsigned int ldo_vintcore_voltages[] = {
 	1350000,
 };
 
-static const unsigned int ldo_sdio_voltages[] = {
-	1160000,
-	1050000,
-	1100000,
-	1500000,
-	1800000,
-	2200000,
-	2910000,
-	3050000,
-};
-
 static const unsigned int fixed_1200000_voltage[] = {
 	1200000,
 };
@@ -166,10 +155,6 @@ static const unsigned int fixed_2050000_voltage[] = {
 	2050000,
 };
 
-static const unsigned int fixed_3300000_voltage[] = {
-	3300000,
-};
-
 static const unsigned int ldo_vana_voltages[] = {
 	1050000,
 	1075000,
@@ -192,13 +177,6 @@ static const unsigned int ldo_vaudio_voltages[] = {
 	2600000,	/* Duplicated in Vaudio and IsoUicc Control register. */
 };
 
-static const unsigned int ldo_vdmic_voltages[] = {
-	1800000,
-	1900000,
-	2000000,
-	2850000,
-};
-
 static DEFINE_MUTEX(shared_mode_mutex);
 static struct ab8500_shared_mode ldo_anamic1_shared;
 static struct ab8500_shared_mode ldo_anamic2_shared;
diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c
index 0fa97f934df4..19b9742c9ecc 100644
--- a/drivers/regulator/act8865-regulator.c
+++ b/drivers/regulator/act8865-regulator.c
@@ -220,13 +220,13 @@ static const struct regmap_config act8865_regmap_config = {
 	.val_bits = 8,
 };
 
-static const struct regulator_linear_range act8865_voltage_ranges[] = {
+static const struct linear_range act8865_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 23, 25000),
 	REGULATOR_LINEAR_RANGE(1200000, 24, 47, 50000),
 	REGULATOR_LINEAR_RANGE(2400000, 48, 63, 100000),
 };
 
-static const struct regulator_linear_range act8600_sudcdc_voltage_ranges[] = {
+static const struct linear_range act8600_sudcdc_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(3000000, 0, 63, 0),
 	REGULATOR_LINEAR_RANGE(3000000, 64, 159, 100000),
 	REGULATOR_LINEAR_RANGE(12600000, 160, 191, 200000),
diff --git a/drivers/regulator/act8945a-regulator.c b/drivers/regulator/act8945a-regulator.c
index d2f804dbc785..6a62f946ccae 100644
--- a/drivers/regulator/act8945a-regulator.c
+++ b/drivers/regulator/act8945a-regulator.c
@@ -73,7 +73,7 @@ struct act8945a_pmic {
 	u32 op_mode[ACT8945A_ID_MAX];
 };
 
-static const struct regulator_linear_range act8945a_voltage_ranges[] = {
+static const struct linear_range act8945a_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 23, 25000),
 	REGULATOR_LINEAR_RANGE(1200000, 24, 47, 50000),
 	REGULATOR_LINEAR_RANGE(2400000, 48, 63, 100000),
diff --git a/drivers/regulator/arizona-ldo1.c b/drivers/regulator/arizona-ldo1.c
index 1a3d7b720f5e..ade0bef4569d 100644
--- a/drivers/regulator/arizona-ldo1.c
+++ b/drivers/regulator/arizona-ldo1.c
@@ -87,7 +87,7 @@ static const struct regulator_ops arizona_ldo1_hc_ops = {
 	.set_bypass = regulator_set_bypass_regmap,
 };
 
-static const struct regulator_linear_range arizona_ldo1_hc_ranges[] = {
+static const struct linear_range arizona_ldo1_hc_ranges[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0, 0x6, 50000),
 	REGULATOR_LINEAR_RANGE(1800000, 0x7, 0x7, 0),
 };
diff --git a/drivers/regulator/arizona-micsupp.c b/drivers/regulator/arizona-micsupp.c
index ae1a5de3e57d..f6cfd3f6f0dd 100644
--- a/drivers/regulator/arizona-micsupp.c
+++ b/drivers/regulator/arizona-micsupp.c
@@ -125,7 +125,7 @@ static const struct regulator_ops arizona_micsupp_ops = {
 	.set_bypass = arizona_micsupp_set_bypass,
 };
 
-static const struct regulator_linear_range arizona_micsupp_ranges[] = {
+static const struct linear_range arizona_micsupp_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1700000, 0,    0x1e, 50000),
 	REGULATOR_LINEAR_RANGE(3300000, 0x1f, 0x1f, 0),
 };
@@ -152,7 +152,7 @@ static const struct regulator_desc arizona_micsupp = {
 	.owner = THIS_MODULE,
 };
 
-static const struct regulator_linear_range arizona_micsupp_ext_ranges[] = {
+static const struct linear_range arizona_micsupp_ext_ranges[] = {
 	REGULATOR_LINEAR_RANGE(900000,  0,    0x14, 25000),
 	REGULATOR_LINEAR_RANGE(1500000, 0x15, 0x27, 100000),
 };
diff --git a/drivers/regulator/as3711-regulator.c b/drivers/regulator/as3711-regulator.c
index ece88103f2fd..b6b9206969ae 100644
--- a/drivers/regulator/as3711-regulator.c
+++ b/drivers/regulator/as3711-regulator.c
@@ -103,18 +103,18 @@ static const struct regulator_ops as3711_dldo_ops = {
 	.map_voltage		= regulator_map_voltage_linear_range,
 };
 
-static const struct regulator_linear_range as3711_sd_ranges[] = {
+static const struct linear_range as3711_sd_ranges[] = {
 	REGULATOR_LINEAR_RANGE(612500, 0x1, 0x40, 12500),
 	REGULATOR_LINEAR_RANGE(1425000, 0x41, 0x70, 25000),
 	REGULATOR_LINEAR_RANGE(2650000, 0x71, 0x7f, 50000),
 };
 
-static const struct regulator_linear_range as3711_aldo_ranges[] = {
+static const struct linear_range as3711_aldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1200000, 0, 0xf, 50000),
 	REGULATOR_LINEAR_RANGE(1800000, 0x10, 0x1f, 100000),
 };
 
-static const struct regulator_linear_range as3711_dldo_ranges[] = {
+static const struct linear_range as3711_dldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0, 0x10, 50000),
 	REGULATOR_LINEAR_RANGE(1750000, 0x20, 0x3f, 50000),
 };
diff --git a/drivers/regulator/as3722-regulator.c b/drivers/regulator/as3722-regulator.c
index bd5d0bacb08d..33ca197860b3 100644
--- a/drivers/regulator/as3722-regulator.c
+++ b/drivers/regulator/as3722-regulator.c
@@ -389,7 +389,7 @@ static const struct regulator_ops as3722_ldo6_extcntrl_ops = {
 	.set_bypass = regulator_set_bypass_regmap,
 };
 
-static const struct regulator_linear_range as3722_ldo_ranges[] = {
+static const struct linear_range as3722_ldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x00, 0x00, 0),
 	REGULATOR_LINEAR_RANGE(825000, 0x01, 0x24, 25000),
 	REGULATOR_LINEAR_RANGE(1725000, 0x40, 0x7F, 25000),
@@ -487,7 +487,7 @@ static bool as3722_sd0_is_low_voltage(struct as3722_regulators *as3722_regs)
 	return false;
 }
 
-static const struct regulator_linear_range as3722_sd2345_ranges[] = {
+static const struct linear_range as3722_sd2345_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x00, 0x00, 0),
 	REGULATOR_LINEAR_RANGE(612500, 0x01, 0x40, 12500),
 	REGULATOR_LINEAR_RANGE(1425000, 0x41, 0x70, 25000),
diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c
index 1e6eb5b1f8d8..fbc95cadaf53 100644
--- a/drivers/regulator/axp20x-regulator.c
+++ b/drivers/regulator/axp20x-regulator.c
@@ -510,7 +510,7 @@ static const struct regulator_ops axp20x_ops_sw = {
 	.is_enabled		= regulator_is_enabled_regmap,
 };
 
-static const struct regulator_linear_range axp20x_ldo4_ranges[] = {
+static const struct linear_range axp20x_ldo4_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1250000,
 			       AXP20X_LDO4_V_OUT_1250mV_START,
 			       AXP20X_LDO4_V_OUT_1250mV_END,
@@ -638,7 +638,7 @@ static const struct regulator_desc axp22x_drivevbus_regulator = {
 };
 
 /* DCDC ranges shared with AXP813 */
-static const struct regulator_linear_range axp803_dcdc234_ranges[] = {
+static const struct linear_range axp803_dcdc234_ranges[] = {
 	REGULATOR_LINEAR_RANGE(500000,
 			       AXP803_DCDC234_500mV_START,
 			       AXP803_DCDC234_500mV_END,
@@ -649,7 +649,7 @@ static const struct regulator_linear_range axp803_dcdc234_ranges[] = {
 			       20000),
 };
 
-static const struct regulator_linear_range axp803_dcdc5_ranges[] = {
+static const struct linear_range axp803_dcdc5_ranges[] = {
 	REGULATOR_LINEAR_RANGE(800000,
 			       AXP803_DCDC5_800mV_START,
 			       AXP803_DCDC5_800mV_END,
@@ -660,7 +660,7 @@ static const struct regulator_linear_range axp803_dcdc5_ranges[] = {
 			       20000),
 };
 
-static const struct regulator_linear_range axp803_dcdc6_ranges[] = {
+static const struct linear_range axp803_dcdc6_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000,
 			       AXP803_DCDC6_600mV_START,
 			       AXP803_DCDC6_600mV_END,
@@ -672,7 +672,7 @@ static const struct regulator_linear_range axp803_dcdc6_ranges[] = {
 };
 
 /* AXP806's CLDO2 and AXP809's DLDO1 share the same range */
-static const struct regulator_linear_range axp803_dldo2_ranges[] = {
+static const struct linear_range axp803_dldo2_ranges[] = {
 	REGULATOR_LINEAR_RANGE(700000,
 			       AXP803_DLDO2_700mV_START,
 			       AXP803_DLDO2_700mV_END,
@@ -758,7 +758,7 @@ static const struct regulator_desc axp803_regulators[] = {
 	AXP_DESC_FIXED(AXP803, RTC_LDO, "rtc-ldo", "ips", 3000),
 };
 
-static const struct regulator_linear_range axp806_dcdca_ranges[] = {
+static const struct linear_range axp806_dcdca_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000,
 			       AXP806_DCDCA_600mV_START,
 			       AXP806_DCDCA_600mV_END,
@@ -769,7 +769,7 @@ static const struct regulator_linear_range axp806_dcdca_ranges[] = {
 			       20000),
 };
 
-static const struct regulator_linear_range axp806_dcdcd_ranges[] = {
+static const struct linear_range axp806_dcdcd_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000,
 			       AXP806_DCDCD_600mV_START,
 			       AXP806_DCDCD_600mV_END,
@@ -834,7 +834,7 @@ static const struct regulator_desc axp806_regulators[] = {
 		    AXP806_PWR_OUT_CTRL2, AXP806_PWR_OUT_SW_MASK),
 };
 
-static const struct regulator_linear_range axp809_dcdc4_ranges[] = {
+static const struct linear_range axp809_dcdc4_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000,
 			       AXP809_DCDC4_600mV_START,
 			       AXP809_DCDC4_600mV_END,
diff --git a/drivers/regulator/bcm590xx-regulator.c b/drivers/regulator/bcm590xx-regulator.c
index 8c98c3f07660..65e23fc5f9c3 100644
--- a/drivers/regulator/bcm590xx-regulator.c
+++ b/drivers/regulator/bcm590xx-regulator.c
@@ -116,14 +116,14 @@ static const unsigned int ldo_vbus[] = {
 };
 
 /* DCDC group CSR: supported voltages in microvolts */
-static const struct regulator_linear_range dcdc_csr_ranges[] = {
+static const struct linear_range dcdc_csr_ranges[] = {
 	REGULATOR_LINEAR_RANGE(860000, 2, 50, 10000),
 	REGULATOR_LINEAR_RANGE(1360000, 51, 55, 20000),
 	REGULATOR_LINEAR_RANGE(900000, 56, 63, 0),
 };
 
 /* DCDC group IOSR1: supported voltages in microvolts */
-static const struct regulator_linear_range dcdc_iosr1_ranges[] = {
+static const struct linear_range dcdc_iosr1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(860000, 2, 51, 10000),
 	REGULATOR_LINEAR_RANGE(1500000, 52, 52, 0),
 	REGULATOR_LINEAR_RANGE(1800000, 53, 53, 0),
@@ -131,7 +131,7 @@ static const struct regulator_linear_range dcdc_iosr1_ranges[] = {
 };
 
 /* DCDC group SDSR1: supported voltages in microvolts */
-static const struct regulator_linear_range dcdc_sdsr1_ranges[] = {
+static const struct linear_range dcdc_sdsr1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(860000, 2, 50, 10000),
 	REGULATOR_LINEAR_RANGE(1340000, 51, 51, 0),
 	REGULATOR_LINEAR_RANGE(900000, 52, 63, 0),
@@ -143,7 +143,7 @@ struct bcm590xx_info {
 	u8 n_voltages;
 	const unsigned int *volt_table;
 	u8 n_linear_ranges;
-	const struct regulator_linear_range *linear_ranges;
+	const struct linear_range *linear_ranges;
 };
 
 #define BCM590XX_REG_TABLE(_name, _table) \
diff --git a/drivers/regulator/bd70528-regulator.c b/drivers/regulator/bd70528-regulator.c
index 5bf8a2dc5fe7..d44adf7e875a 100644
--- a/drivers/regulator/bd70528-regulator.c
+++ b/drivers/regulator/bd70528-regulator.c
@@ -20,22 +20,22 @@
 #define BUCK_RAMPRATE_125MV 1
 #define BUCK_RAMP_MAX 250
 
-static const struct regulator_linear_range bd70528_buck1_volts[] = {
+static const struct linear_range bd70528_buck1_volts[] = {
 	REGULATOR_LINEAR_RANGE(1200000, 0x00, 0x1, 600000),
 	REGULATOR_LINEAR_RANGE(2750000, 0x2, 0xf, 50000),
 };
-static const struct regulator_linear_range bd70528_buck2_volts[] = {
+static const struct linear_range bd70528_buck2_volts[] = {
 	REGULATOR_LINEAR_RANGE(1200000, 0x00, 0x1, 300000),
 	REGULATOR_LINEAR_RANGE(1550000, 0x2, 0xd, 50000),
 	REGULATOR_LINEAR_RANGE(3000000, 0xe, 0xf, 300000),
 };
-static const struct regulator_linear_range bd70528_buck3_volts[] = {
+static const struct linear_range bd70528_buck3_volts[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0x00, 0xd, 50000),
 	REGULATOR_LINEAR_RANGE(1800000, 0xe, 0xf, 0),
 };
 
 /* All LDOs have same voltage ranges */
-static const struct regulator_linear_range bd70528_ldo_volts[] = {
+static const struct linear_range bd70528_ldo_volts[] = {
 	REGULATOR_LINEAR_RANGE(1650000, 0x0, 0x07, 50000),
 	REGULATOR_LINEAR_RANGE(2100000, 0x8, 0x0f, 100000),
 	REGULATOR_LINEAR_RANGE(2850000, 0x10, 0x19, 50000),
diff --git a/drivers/regulator/bd71828-regulator.c b/drivers/regulator/bd71828-regulator.c
index b2fa17be4988..85c0b9000963 100644
--- a/drivers/regulator/bd71828-regulator.c
+++ b/drivers/regulator/bd71828-regulator.c
@@ -65,27 +65,27 @@ static const struct reg_init buck7_inits[] = {
 	},
 };
 
-static const struct regulator_linear_range bd71828_buck1267_volts[] = {
+static const struct linear_range bd71828_buck1267_volts[] = {
 	REGULATOR_LINEAR_RANGE(500000, 0x00, 0xef, 6250),
 	REGULATOR_LINEAR_RANGE(2000000, 0xf0, 0xff, 0),
 };
 
-static const struct regulator_linear_range bd71828_buck3_volts[] = {
+static const struct linear_range bd71828_buck3_volts[] = {
 	REGULATOR_LINEAR_RANGE(1200000, 0x00, 0x0f, 50000),
 	REGULATOR_LINEAR_RANGE(2000000, 0x10, 0x1f, 0),
 };
 
-static const struct regulator_linear_range bd71828_buck4_volts[] = {
+static const struct linear_range bd71828_buck4_volts[] = {
 	REGULATOR_LINEAR_RANGE(1000000, 0x00, 0x1f, 25000),
 	REGULATOR_LINEAR_RANGE(1800000, 0x20, 0x3f, 0),
 };
 
-static const struct regulator_linear_range bd71828_buck5_volts[] = {
+static const struct linear_range bd71828_buck5_volts[] = {
 	REGULATOR_LINEAR_RANGE(2500000, 0x00, 0x0f, 50000),
 	REGULATOR_LINEAR_RANGE(3300000, 0x10, 0x1f, 0),
 };
 
-static const struct regulator_linear_range bd71828_ldo_volts[] = {
+static const struct linear_range bd71828_ldo_volts[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0x00, 0x31, 50000),
 	REGULATOR_LINEAR_RANGE(3300000, 0x32, 0x3f, 0),
 };
diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c
index cf3872837abc..7b311389f925 100644
--- a/drivers/regulator/bd718x7-regulator.c
+++ b/drivers/regulator/bd718x7-regulator.c
@@ -55,12 +55,20 @@ static int bd718xx_buck1234_set_ramp_delay(struct regulator_dev *rdev,
 				  BUCK_RAMPRATE_MASK, ramp_value << 6);
 }
 
-/* Bucks 1 to 4 support DVS. PWM mode is used when voltage is changed.
+/*
+ * On BD71837 (not on BD71847, BD71850, ...)
+ * Bucks 1 to 4 support DVS. PWM mode is used when voltage is changed.
  * Bucks 5 to 8 and LDOs can use PFM and must be disabled when voltage
  * is changed. Hence we return -EBUSY for these if voltage is changed
  * when BUCK/LDO is enabled.
+ *
+ * On BD71847, BD71850, ... The LDO voltage can be changed when LDO is
+ * enabled. But if voltage is increased the LDO power-good monitoring
+ * must be disabled for the duration of changing + 1mS to ensure voltage
+ * has reached the higher level before HW does next under voltage detection
+ * cycle.
  */
-static int bd718xx_set_voltage_sel_restricted(struct regulator_dev *rdev,
+static int bd71837_set_voltage_sel_restricted(struct regulator_dev *rdev,
 						    unsigned int sel)
 {
 	if (regulator_is_enabled_regmap(rdev))
@@ -69,9 +77,124 @@ static int bd718xx_set_voltage_sel_restricted(struct regulator_dev *rdev,
 	return regulator_set_voltage_sel_regmap(rdev, sel);
 }
 
+static void voltage_change_done(struct regulator_dev *rdev, unsigned int sel,
+				unsigned int *mask)
+{
+	int ret;
+
+	if (*mask) {
+		/*
+		 * Let's allow scheduling as we use I2C anyways. We just need to
+		 * guarantee minimum of 1ms sleep - it shouldn't matter if we
+		 * exceed it due to the scheduling.
+		 */
+		msleep(1);
+		/*
+		 * Note for next hacker. The PWRGOOD should not be masked on
+		 * BD71847 so we will just unconditionally enable detection
+		 * when voltage is set.
+		 * If someone want's to disable PWRGOOD he must implement
+		 * caching and restoring the old value here. I am not
+		 * aware of such use-cases so for the sake of the simplicity
+		 * we just always enable PWRGOOD here.
+		 */
+		ret = regmap_update_bits(rdev->regmap, BD718XX_REG_MVRFLTMASK2,
+					 *mask, 0);
+		if (ret)
+			dev_err(&rdev->dev,
+				"Failed to re-enable voltage monitoring (%d)\n",
+				ret);
+	}
+}
+
+static int voltage_change_prepare(struct regulator_dev *rdev, unsigned int sel,
+				  unsigned int *mask)
+{
+	int ret;
+
+	*mask = 0;
+	if (regulator_is_enabled_regmap(rdev)) {
+		int now, new;
+
+		now = rdev->desc->ops->get_voltage_sel(rdev);
+		if (now < 0)
+			return now;
+
+		now = rdev->desc->ops->list_voltage(rdev, now);
+		if (now < 0)
+			return now;
+
+		new = rdev->desc->ops->list_voltage(rdev, sel);
+		if (new < 0)
+			return new;
+
+		/*
+		 * If we increase LDO voltage when LDO is enabled we need to
+		 * disable the power-good detection until voltage has reached
+		 * the new level. According to HW colleagues the maximum time
+		 * it takes is 1000us. I assume that on systems with light load
+		 * this might be less - and we could probably use DT to give
+		 * system specific delay value if performance matters.
+		 *
+		 * Well, knowing we use I2C here and can add scheduling delays
+		 * I don't think it is worth the hassle and I just add fixed
+		 * 1ms sleep here (and allow scheduling). If this turns out to
+		 * be a problem we can change it to delay and make the delay
+		 * time configurable.
+		 */
+		if (new > now) {
+			int ldo_offset = rdev->desc->id - BD718XX_LDO1;
+
+			*mask = BD718XX_LDO1_VRMON80 << ldo_offset;
+			ret = regmap_update_bits(rdev->regmap,
+						 BD718XX_REG_MVRFLTMASK2,
+						 *mask, *mask);
+			if (ret) {
+				dev_err(&rdev->dev,
+					"Failed to stop voltage monitoring\n");
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int bd718xx_set_voltage_sel_restricted(struct regulator_dev *rdev,
+						    unsigned int sel)
+{
+	int ret;
+	int mask;
+
+	ret = voltage_change_prepare(rdev, sel, &mask);
+	if (ret)
+		return ret;
+
+	ret = regulator_set_voltage_sel_regmap(rdev, sel);
+	voltage_change_done(rdev, sel, &mask);
+
+	return ret;
+}
+
 static int bd718xx_set_voltage_sel_pickable_restricted(
 		struct regulator_dev *rdev, unsigned int sel)
 {
+	int ret;
+	int mask;
+
+	ret = voltage_change_prepare(rdev, sel, &mask);
+	if (ret)
+		return ret;
+
+	ret = regulator_set_voltage_sel_pickable_regmap(rdev, sel);
+	voltage_change_done(rdev, sel, &mask);
+
+	return ret;
+}
+
+static int bd71837_set_voltage_sel_pickable_restricted(
+		struct regulator_dev *rdev, unsigned int sel)
+{
 	if (regulator_is_enabled_regmap(rdev))
 		return -EBUSY;
 
@@ -85,6 +208,16 @@ static const struct regulator_ops bd718xx_pickable_range_ldo_ops = {
 	.list_voltage = regulator_list_voltage_pickable_linear_range,
 	.set_voltage_sel = bd718xx_set_voltage_sel_pickable_restricted,
 	.get_voltage_sel = regulator_get_voltage_sel_pickable_regmap,
+
+};
+
+static const struct regulator_ops bd71837_pickable_range_ldo_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_pickable_linear_range,
+	.set_voltage_sel = bd71837_set_voltage_sel_pickable_restricted,
+	.get_voltage_sel = regulator_get_voltage_sel_pickable_regmap,
 };
 
 static const struct regulator_ops bd718xx_pickable_range_buck_ops = {
@@ -92,11 +225,30 @@ static const struct regulator_ops bd718xx_pickable_range_buck_ops = {
 	.disable = regulator_disable_regmap,
 	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = regulator_list_voltage_pickable_linear_range,
-	.set_voltage_sel = bd718xx_set_voltage_sel_pickable_restricted,
+	.set_voltage_sel = regulator_set_voltage_sel_pickable_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_pickable_regmap,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+};
+
+static const struct regulator_ops bd71837_pickable_range_buck_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_pickable_linear_range,
+	.set_voltage_sel = bd71837_set_voltage_sel_pickable_restricted,
 	.get_voltage_sel = regulator_get_voltage_sel_pickable_regmap,
 	.set_voltage_time_sel = regulator_set_voltage_time_sel,
 };
 
+static const struct regulator_ops bd71837_ldo_regulator_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_linear_range,
+	.set_voltage_sel = bd71837_set_voltage_sel_restricted,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+};
+
 static const struct regulator_ops bd718xx_ldo_regulator_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
@@ -106,6 +258,15 @@ static const struct regulator_ops bd718xx_ldo_regulator_ops = {
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 };
 
+static const struct regulator_ops bd71837_ldo_regulator_nolinear_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_table,
+	.set_voltage_sel = bd71837_set_voltage_sel_restricted,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+};
+
 static const struct regulator_ops bd718xx_ldo_regulator_nolinear_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
@@ -120,7 +281,17 @@ static const struct regulator_ops bd718xx_buck_regulator_ops = {
 	.disable = regulator_disable_regmap,
 	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = regulator_list_voltage_linear_range,
-	.set_voltage_sel = bd718xx_set_voltage_sel_restricted,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+};
+
+static const struct regulator_ops bd71837_buck_regulator_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_linear_range,
+	.set_voltage_sel = bd71837_set_voltage_sel_restricted,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_time_sel = regulator_set_voltage_time_sel,
 };
@@ -131,6 +302,17 @@ static const struct regulator_ops bd718xx_buck_regulator_nolinear_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = regulator_list_voltage_table,
 	.map_voltage = regulator_map_voltage_ascend,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+};
+
+static const struct regulator_ops bd71837_buck_regulator_nolinear_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_table,
+	.map_voltage = regulator_map_voltage_ascend,
 	.set_voltage_sel = bd718xx_set_voltage_sel_restricted,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_time_sel = regulator_set_voltage_time_sel,
@@ -152,7 +334,7 @@ static const struct regulator_ops bd718xx_dvs_buck_regulator_ops = {
  * BD71847 BUCK1/2
  * 0.70 to 1.30V (10mV step)
  */
-static const struct regulator_linear_range bd718xx_dvs_buck_volts[] = {
+static const struct linear_range bd718xx_dvs_buck_volts[] = {
 	REGULATOR_LINEAR_RANGE(700000, 0x00, 0x3C, 10000),
 	REGULATOR_LINEAR_RANGE(1300000, 0x3D, 0x3F, 0),
 };
@@ -163,7 +345,7 @@ static const struct regulator_linear_range bd718xx_dvs_buck_volts[] = {
  * and
  * 0.675 to 1.325 (range 1)
  */
-static const struct regulator_linear_range bd71837_buck5_volts[] = {
+static const struct linear_range bd71837_buck5_volts[] = {
 	/* Ranges when VOLT_SEL bit is 0 */
 	REGULATOR_LINEAR_RANGE(700000, 0x00, 0x03, 100000),
 	REGULATOR_LINEAR_RANGE(1050000, 0x04, 0x05, 50000),
@@ -185,7 +367,7 @@ static const unsigned int bd71837_buck5_volt_range_sel[] = {
 /*
  * BD71847 BUCK3
  */
-static const struct regulator_linear_range bd71847_buck3_volts[] = {
+static const struct linear_range bd71847_buck3_volts[] = {
 	/* Ranges when VOLT_SEL bits are 00 */
 	REGULATOR_LINEAR_RANGE(700000, 0x00, 0x03, 100000),
 	REGULATOR_LINEAR_RANGE(1050000, 0x04, 0x05, 50000),
@@ -202,7 +384,7 @@ static const unsigned int bd71847_buck3_volt_range_sel[] = {
 	0x0, 0x0, 0x0, 0x40, 0x80, 0x80, 0x80
 };
 
-static const struct regulator_linear_range bd71847_buck4_volts[] = {
+static const struct linear_range bd71847_buck4_volts[] = {
 	REGULATOR_LINEAR_RANGE(3000000, 0x00, 0x03, 100000),
 	REGULATOR_LINEAR_RANGE(2600000, 0x00, 0x03, 100000),
 };
@@ -213,7 +395,7 @@ static const unsigned int bd71847_buck4_volt_range_sel[] = { 0x0, 0x40 };
  * BUCK6
  * 3.0V to 3.3V (step 100mV)
  */
-static const struct regulator_linear_range bd71837_buck6_volts[] = {
+static const struct linear_range bd71837_buck6_volts[] = {
 	REGULATOR_LINEAR_RANGE(3000000, 0x00, 0x03, 100000),
 };
 
@@ -237,7 +419,7 @@ static const unsigned int bd718xx_3rd_nodvs_buck_volts[] = {
  * BUCK8
  * 0.8V to 1.40V (step 10mV)
  */
-static const struct regulator_linear_range bd718xx_4th_nodvs_buck_volts[] = {
+static const struct linear_range bd718xx_4th_nodvs_buck_volts[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0x00, 0x3C, 10000),
 };
 
@@ -245,7 +427,7 @@ static const struct regulator_linear_range bd718xx_4th_nodvs_buck_volts[] = {
  * LDO1
  * 3.0 to 3.3V (100mV step)
  */
-static const struct regulator_linear_range bd718xx_ldo1_volts[] = {
+static const struct linear_range bd718xx_ldo1_volts[] = {
 	REGULATOR_LINEAR_RANGE(3000000, 0x00, 0x03, 100000),
 	REGULATOR_LINEAR_RANGE(1600000, 0x00, 0x03, 100000),
 };
@@ -264,7 +446,7 @@ static const unsigned int ldo_2_volts[] = {
  * LDO3
  * 1.8 to 3.3V (100mV step)
  */
-static const struct regulator_linear_range bd718xx_ldo3_volts[] = {
+static const struct linear_range bd718xx_ldo3_volts[] = {
 	REGULATOR_LINEAR_RANGE(1800000, 0x00, 0x0F, 100000),
 };
 
@@ -272,7 +454,7 @@ static const struct regulator_linear_range bd718xx_ldo3_volts[] = {
  * LDO4
  * 0.9 to 1.8V (100mV step)
  */
-static const struct regulator_linear_range bd718xx_ldo4_volts[] = {
+static const struct linear_range bd718xx_ldo4_volts[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0x00, 0x09, 100000),
 };
 
@@ -280,7 +462,7 @@ static const struct regulator_linear_range bd718xx_ldo4_volts[] = {
  * LDO5 for BD71837
  * 1.8 to 3.3V (100mV step)
  */
-static const struct regulator_linear_range bd71837_ldo5_volts[] = {
+static const struct linear_range bd71837_ldo5_volts[] = {
 	REGULATOR_LINEAR_RANGE(1800000, 0x00, 0x0F, 100000),
 };
 
@@ -288,7 +470,7 @@ static const struct regulator_linear_range bd71837_ldo5_volts[] = {
  * LDO5 for BD71837
  * 1.8 to 3.3V (100mV step)
  */
-static const struct regulator_linear_range bd71847_ldo5_volts[] = {
+static const struct linear_range bd71847_ldo5_volts[] = {
 	REGULATOR_LINEAR_RANGE(1800000, 0x00, 0x0F, 100000),
 	REGULATOR_LINEAR_RANGE(800000, 0x00, 0x0F, 100000),
 };
@@ -299,7 +481,7 @@ static const unsigned int bd71847_ldo5_volt_range_sel[] = { 0x0, 0x20 };
  * LDO6
  * 0.9 to 1.8V (100mV step)
  */
-static const struct regulator_linear_range bd718xx_ldo6_volts[] = {
+static const struct linear_range bd718xx_ldo6_volts[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0x00, 0x09, 100000),
 };
 
@@ -307,7 +489,7 @@ static const struct regulator_linear_range bd718xx_ldo6_volts[] = {
  * LDO7
  * 1.8 to 3.3V (100mV step)
  */
-static const struct regulator_linear_range bd71837_ldo7_volts[] = {
+static const struct linear_range bd71837_ldo7_volts[] = {
 	REGULATOR_LINEAR_RANGE(1800000, 0x00, 0x0F, 100000),
 };
 
@@ -805,7 +987,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("BUCK5"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_BUCK5,
-			.ops = &bd718xx_pickable_range_buck_ops,
+			.ops = &bd71837_pickable_range_buck_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD71837_BUCK5_VOLTAGE_NUM,
 			.linear_ranges = bd71837_buck5_volts,
@@ -832,7 +1014,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("BUCK6"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_BUCK6,
-			.ops = &bd718xx_buck_regulator_ops,
+			.ops = &bd71837_buck_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD71837_BUCK6_VOLTAGE_NUM,
 			.linear_ranges = bd71837_buck6_volts,
@@ -856,7 +1038,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("BUCK7"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_BUCK7,
-			.ops = &bd718xx_buck_regulator_nolinear_ops,
+			.ops = &bd71837_buck_regulator_nolinear_ops,
 			.type = REGULATOR_VOLTAGE,
 			.volt_table = &bd718xx_3rd_nodvs_buck_volts[0],
 			.n_voltages = ARRAY_SIZE(bd718xx_3rd_nodvs_buck_volts),
@@ -878,7 +1060,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("BUCK8"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_BUCK8,
-			.ops = &bd718xx_buck_regulator_ops,
+			.ops = &bd71837_buck_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD718XX_4TH_NODVS_BUCK_VOLTAGE_NUM,
 			.linear_ranges = bd718xx_4th_nodvs_buck_volts,
@@ -902,7 +1084,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("LDO1"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_LDO1,
-			.ops = &bd718xx_pickable_range_ldo_ops,
+			.ops = &bd71837_pickable_range_ldo_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD718XX_LDO1_VOLTAGE_NUM,
 			.linear_ranges = bd718xx_ldo1_volts,
@@ -928,7 +1110,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("LDO2"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_LDO2,
-			.ops = &bd718xx_ldo_regulator_nolinear_ops,
+			.ops = &bd71837_ldo_regulator_nolinear_ops,
 			.type = REGULATOR_VOLTAGE,
 			.volt_table = &ldo_2_volts[0],
 			.vsel_reg = BD718XX_REG_LDO2_VOLT,
@@ -950,7 +1132,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("LDO3"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_LDO3,
-			.ops = &bd718xx_ldo_regulator_ops,
+			.ops = &bd71837_ldo_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD718XX_LDO3_VOLTAGE_NUM,
 			.linear_ranges = bd718xx_ldo3_volts,
@@ -973,7 +1155,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("LDO4"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_LDO4,
-			.ops = &bd718xx_ldo_regulator_ops,
+			.ops = &bd71837_ldo_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD718XX_LDO4_VOLTAGE_NUM,
 			.linear_ranges = bd718xx_ldo4_volts,
@@ -996,7 +1178,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("LDO5"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_LDO5,
-			.ops = &bd718xx_ldo_regulator_ops,
+			.ops = &bd71837_ldo_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD71837_LDO5_VOLTAGE_NUM,
 			.linear_ranges = bd71837_ldo5_volts,
@@ -1023,7 +1205,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("LDO6"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_LDO6,
-			.ops = &bd718xx_ldo_regulator_ops,
+			.ops = &bd71837_ldo_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD718XX_LDO6_VOLTAGE_NUM,
 			.linear_ranges = bd718xx_ldo6_volts,
@@ -1050,7 +1232,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("LDO7"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_LDO7,
-			.ops = &bd718xx_ldo_regulator_ops,
+			.ops = &bd71837_ldo_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD71837_LDO7_VOLTAGE_NUM,
 			.linear_ranges = bd71837_ldo7_volts,
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 7486f6e4e613..03154f5b939f 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3642,36 +3642,19 @@ finish:
 	return done;
 }
 
-static int regulator_balance_voltage(struct regulator_dev *rdev,
-				     suspend_state_t state)
+int regulator_do_balance_voltage(struct regulator_dev *rdev,
+				 suspend_state_t state, bool skip_coupled)
 {
 	struct regulator_dev **c_rdevs;
 	struct regulator_dev *best_rdev;
 	struct coupling_desc *c_desc = &rdev->coupling_desc;
-	struct regulator_coupler *coupler = c_desc->coupler;
 	int i, ret, n_coupled, best_min_uV, best_max_uV, best_c_rdev;
 	unsigned int delta, best_delta;
 	unsigned long c_rdev_done = 0;
 	bool best_c_rdev_done;
 
 	c_rdevs = c_desc->coupled_rdevs;
-	n_coupled = c_desc->n_coupled;
-
-	/*
-	 * If system is in a state other than PM_SUSPEND_ON, don't check
-	 * other coupled regulators.
-	 */
-	if (state != PM_SUSPEND_ON)
-		n_coupled = 1;
-
-	if (c_desc->n_resolved < n_coupled) {
-		rdev_err(rdev, "Not all coupled regulators registered\n");
-		return -EPERM;
-	}
-
-	/* Invoke custom balancer for customized couplers */
-	if (coupler && coupler->balance_voltage)
-		return coupler->balance_voltage(coupler, rdev, state);
+	n_coupled = skip_coupled ? 1 : c_desc->n_coupled;
 
 	/*
 	 * Find the best possible voltage change on each loop. Leave the loop
@@ -3742,6 +3725,32 @@ out:
 	return ret;
 }
 
+static int regulator_balance_voltage(struct regulator_dev *rdev,
+				     suspend_state_t state)
+{
+	struct coupling_desc *c_desc = &rdev->coupling_desc;
+	struct regulator_coupler *coupler = c_desc->coupler;
+	bool skip_coupled = false;
+
+	/*
+	 * If system is in a state other than PM_SUSPEND_ON, don't check
+	 * other coupled regulators.
+	 */
+	if (state != PM_SUSPEND_ON)
+		skip_coupled = true;
+
+	if (c_desc->n_resolved < c_desc->n_coupled) {
+		rdev_err(rdev, "Not all coupled regulators registered\n");
+		return -EPERM;
+	}
+
+	/* Invoke custom balancer for customized couplers */
+	if (coupler && coupler->balance_voltage)
+		return coupler->balance_voltage(coupler, rdev, state);
+
+	return regulator_do_balance_voltage(rdev, state, skip_coupled);
+}
+
 /**
  * regulator_set_voltage - set regulator output voltage
  * @regulator: regulator source
@@ -4312,6 +4321,7 @@ EXPORT_SYMBOL_GPL(regulator_set_load);
 int regulator_allow_bypass(struct regulator *regulator, bool enable)
 {
 	struct regulator_dev *rdev = regulator->rdev;
+	const char *name = rdev_get_name(rdev);
 	int ret = 0;
 
 	if (!rdev->desc->ops->set_bypass)
@@ -4326,18 +4336,26 @@ int regulator_allow_bypass(struct regulator *regulator, bool enable)
 		rdev->bypass_count++;
 
 		if (rdev->bypass_count == rdev->open_count) {
+			trace_regulator_bypass_enable(name);
+
 			ret = rdev->desc->ops->set_bypass(rdev, enable);
 			if (ret != 0)
 				rdev->bypass_count--;
+			else
+				trace_regulator_bypass_enable_complete(name);
 		}
 
 	} else if (!enable && regulator->bypass) {
 		rdev->bypass_count--;
 
 		if (rdev->bypass_count != rdev->open_count) {
+			trace_regulator_bypass_disable(name);
+
 			ret = rdev->desc->ops->set_bypass(rdev, enable);
 			if (ret != 0)
 				rdev->bypass_count++;
+			else
+				trace_regulator_bypass_disable_complete(name);
 		}
 	}
 
@@ -5496,6 +5514,7 @@ static void regulator_summary_show_subtree(struct seq_file *s,
 		seq_printf(s, "%*s%-*s ",
 			   (level + 1) * 3 + 1, "",
 			   30 - (level + 1) * 3,
+			   consumer->supply_name ? consumer->supply_name :
 			   consumer->dev ? dev_name(consumer->dev) : "deviceless");
 
 		switch (rdev->desc->type) {
diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index 5493c3a86426..770e694824ac 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -248,7 +248,7 @@ static int da9034_set_dvc_voltage_sel(struct regulator_dev *rdev,
 	return ret;
 }
 
-static const struct regulator_linear_range da9034_ldo12_ranges[] = {
+static const struct linear_range da9034_ldo12_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1700000, 0, 7, 50000),
 	REGULATOR_LINEAR_RANGE(2700000, 8, 15, 50000),
 };
diff --git a/drivers/regulator/db8500-prcmu.c b/drivers/regulator/db8500-prcmu.c
index 88a2dcb9fe8a..0ce6ec4933af 100644
--- a/drivers/regulator/db8500-prcmu.c
+++ b/drivers/regulator/db8500-prcmu.c
@@ -181,7 +181,7 @@ static int db8500_regulator_switch_disable(struct regulator_dev *rdev)
 		goto out;
 	}
 
-	info->is_enabled = 0;
+	info->is_enabled = false;
 out:
 	return ret;
 }
diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index bb16c465426e..e970e9d2f8be 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -131,10 +131,11 @@ int regulator_get_voltage_sel_pickable_regmap(struct regulator_dev *rdev)
 	unsigned int r_val;
 	int range;
 	unsigned int val;
-	int ret, i;
-	unsigned int voltages_in_range = 0;
+	int ret;
+	unsigned int voltages = 0;
+	const struct linear_range *r = rdev->desc->linear_ranges;
 
-	if (!rdev->desc->linear_ranges)
+	if (!r)
 		return -EINVAL;
 
 	ret = regmap_read(rdev->regmap, rdev->desc->vsel_reg, &val);
@@ -152,11 +153,9 @@ int regulator_get_voltage_sel_pickable_regmap(struct regulator_dev *rdev)
 	if (range < 0)
 		return -EINVAL;
 
-	for (i = 0; i < range; i++)
-		voltages_in_range += (rdev->desc->linear_ranges[i].max_sel -
-				     rdev->desc->linear_ranges[i].min_sel) + 1;
+	voltages = linear_range_values_in_range_array(r, range);
 
-	return val + voltages_in_range;
+	return val + voltages;
 }
 EXPORT_SYMBOL_GPL(regulator_get_voltage_sel_pickable_regmap);
 
@@ -179,8 +178,11 @@ int regulator_set_voltage_sel_pickable_regmap(struct regulator_dev *rdev,
 	unsigned int voltages_in_range = 0;
 
 	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
-		voltages_in_range = (rdev->desc->linear_ranges[i].max_sel -
-				     rdev->desc->linear_ranges[i].min_sel) + 1;
+		const struct linear_range *r;
+
+		r = &rdev->desc->linear_ranges[i];
+		voltages_in_range = linear_range_values_in_range(r);
+
 		if (sel < voltages_in_range)
 			break;
 		sel -= voltages_in_range;
@@ -405,8 +407,10 @@ EXPORT_SYMBOL_GPL(regulator_map_voltage_linear);
 int regulator_map_voltage_linear_range(struct regulator_dev *rdev,
 				       int min_uV, int max_uV)
 {
-	const struct regulator_linear_range *range;
+	const struct linear_range *range;
 	int ret = -EINVAL;
+	unsigned int sel;
+	bool found;
 	int voltage, i;
 
 	if (!rdev->desc->n_linear_ranges) {
@@ -415,35 +419,19 @@ int regulator_map_voltage_linear_range(struct regulator_dev *rdev,
 	}
 
 	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
-		int linear_max_uV;
-
 		range = &rdev->desc->linear_ranges[i];
-		linear_max_uV = range->min_uV +
-			(range->max_sel - range->min_sel) * range->uV_step;
 
-		if (!(min_uV <= linear_max_uV && max_uV >= range->min_uV))
+		ret = linear_range_get_selector_high(range, min_uV, &sel,
+						     &found);
+		if (ret)
 			continue;
-
-		if (min_uV <= range->min_uV)
-			min_uV = range->min_uV;
-
-		/* range->uV_step == 0 means fixed voltage range */
-		if (range->uV_step == 0) {
-			ret = 0;
-		} else {
-			ret = DIV_ROUND_UP(min_uV - range->min_uV,
-					   range->uV_step);
-			if (ret < 0)
-				return ret;
-		}
-
-		ret += range->min_sel;
+		ret = sel;
 
 		/*
 		 * Map back into a voltage to verify we're still in bounds.
 		 * If we are not, then continue checking rest of the ranges.
 		 */
-		voltage = rdev->desc->ops->list_voltage(rdev, ret);
+		voltage = rdev->desc->ops->list_voltage(rdev, sel);
 		if (voltage >= min_uV && voltage <= max_uV)
 			break;
 	}
@@ -468,7 +456,7 @@ EXPORT_SYMBOL_GPL(regulator_map_voltage_linear_range);
 int regulator_map_voltage_pickable_linear_range(struct regulator_dev *rdev,
 						int min_uV, int max_uV)
 {
-	const struct regulator_linear_range *range;
+	const struct linear_range *range;
 	int ret = -EINVAL;
 	int voltage, i;
 	unsigned int selector = 0;
@@ -480,30 +468,25 @@ int regulator_map_voltage_pickable_linear_range(struct regulator_dev *rdev,
 
 	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
 		int linear_max_uV;
+		bool found;
+		unsigned int sel;
 
 		range = &rdev->desc->linear_ranges[i];
-		linear_max_uV = range->min_uV +
-			(range->max_sel - range->min_sel) * range->uV_step;
+		linear_max_uV = linear_range_get_max_value(range);
 
-		if (!(min_uV <= linear_max_uV && max_uV >= range->min_uV)) {
-			selector += (range->max_sel - range->min_sel + 1);
+		if (!(min_uV <= linear_max_uV && max_uV >= range->min)) {
+			selector += linear_range_values_in_range(range);
 			continue;
 		}
 
-		if (min_uV <= range->min_uV)
-			min_uV = range->min_uV;
-
-		/* range->uV_step == 0 means fixed voltage range */
-		if (range->uV_step == 0) {
-			ret = 0;
-		} else {
-			ret = DIV_ROUND_UP(min_uV - range->min_uV,
-					   range->uV_step);
-			if (ret < 0)
-				return ret;
+		ret = linear_range_get_selector_high(range, min_uV, &sel,
+						     &found);
+		if (ret) {
+			selector += linear_range_values_in_range(range);
+			continue;
 		}
 
-		ret += selector;
+		ret = selector + sel;
 
 		voltage = rdev->desc->ops->list_voltage(rdev, ret);
 
@@ -513,7 +496,7 @@ int regulator_map_voltage_pickable_linear_range(struct regulator_dev *rdev,
 		 * exit but retry until we have checked all ranges.
 		 */
 		if (voltage < min_uV || voltage > max_uV)
-			selector += (range->max_sel - range->min_sel + 1);
+			selector += linear_range_values_in_range(range);
 		else
 			break;
 	}
@@ -561,7 +544,7 @@ EXPORT_SYMBOL_GPL(regulator_list_voltage_linear);
 int regulator_list_voltage_pickable_linear_range(struct regulator_dev *rdev,
 						 unsigned int selector)
 {
-	const struct regulator_linear_range *range;
+	const struct linear_range *range;
 	int i;
 	unsigned int all_sels = 0;
 
@@ -571,18 +554,28 @@ int regulator_list_voltage_pickable_linear_range(struct regulator_dev *rdev,
 	}
 
 	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
-		unsigned int sels_in_range;
+		unsigned int sel_indexes;
 
 		range = &rdev->desc->linear_ranges[i];
 
-		sels_in_range = range->max_sel - range->min_sel;
+		sel_indexes = linear_range_values_in_range(range) - 1;
 
-		if (all_sels + sels_in_range >= selector) {
+		if (all_sels + sel_indexes >= selector) {
 			selector -= all_sels;
-			return range->min_uV + (range->uV_step * selector);
+			/*
+			 * As we see here, pickable ranges work only as
+			 * long as the first selector for each pickable
+			 * range is 0, and the each subsequent range for
+			 * this 'pick' follow immediately at next unused
+			 * selector (Eg. there is no gaps between ranges).
+			 * I think this is fine but it probably should be
+			 * documented. OTOH, whole pickable range stuff
+			 * might benefit from some documentation
+			 */
+			return range->min + (range->step * selector);
 		}
 
-		all_sels += (sels_in_range + 1);
+		all_sels += (sel_indexes + 1);
 	}
 
 	return -EINVAL;
@@ -604,27 +597,18 @@ EXPORT_SYMBOL_GPL(regulator_list_voltage_pickable_linear_range);
 int regulator_desc_list_voltage_linear_range(const struct regulator_desc *desc,
 					     unsigned int selector)
 {
-	const struct regulator_linear_range *range;
-	int i;
-
-	if (!desc->n_linear_ranges) {
-		BUG_ON(!desc->n_linear_ranges);
-		return -EINVAL;
-	}
-
-	for (i = 0; i < desc->n_linear_ranges; i++) {
-		range = &desc->linear_ranges[i];
-
-		if (!(selector >= range->min_sel &&
-		      selector <= range->max_sel))
-			continue;
+	unsigned int val;
+	int ret;
 
-		selector -= range->min_sel;
+	BUG_ON(!desc->n_linear_ranges);
 
-		return range->min_uV + (range->uV_step * selector);
-	}
+	ret = linear_range_get_value_array(desc->linear_ranges,
+					   desc->n_linear_ranges, selector,
+					   &val);
+	if (ret)
+		return ret;
 
-	return -EINVAL;
+	return val;
 }
 EXPORT_SYMBOL_GPL(regulator_desc_list_voltage_linear_range);
 
diff --git a/drivers/regulator/hi6421-regulator.c b/drivers/regulator/hi6421-regulator.c
index 5ac3d7c29725..66219d8dfc1a 100644
--- a/drivers/regulator/hi6421-regulator.c
+++ b/drivers/regulator/hi6421-regulator.c
@@ -87,7 +87,7 @@ static const unsigned int ldo_8_voltages[] = {
 };
 
 /* Ranges are sorted in ascending order. */
-static const struct regulator_linear_range ldo_audio_volt_range[] = {
+static const struct linear_range ldo_audio_volt_range[] = {
 	REGULATOR_LINEAR_RANGE(2800000, 0, 3, 50000),
 	REGULATOR_LINEAR_RANGE(3000000, 4, 7, 100000),
 };
@@ -195,7 +195,7 @@ static const struct regulator_ops hi6421_buck345_ops;
  * _id - LDO id name string
  * _match - of match name string
  * n_volt - number of votages available
- * volt_ranges - array of regulator_linear_range
+ * volt_ranges - array of linear_range
  * vstep - voltage increase in each linear step in uV
  * vreg - voltage select register
  * vmask - voltage select mask
diff --git a/drivers/regulator/lochnagar-regulator.c b/drivers/regulator/lochnagar-regulator.c
index 9b05e03ba830..5ea3e4141684 100644
--- a/drivers/regulator/lochnagar-regulator.c
+++ b/drivers/regulator/lochnagar-regulator.c
@@ -36,7 +36,7 @@ static const struct regulator_ops lochnagar_micvdd_ops = {
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 };
 
-static const struct regulator_linear_range lochnagar_micvdd_ranges[] = {
+static const struct linear_range lochnagar_micvdd_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1000000, 0,    0xC, 50000),
 	REGULATOR_LINEAR_RANGE(1700000, 0xD, 0x1F, 100000),
 };
@@ -97,7 +97,7 @@ static const struct regulator_ops lochnagar_vddcore_ops = {
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 };
 
-static const struct regulator_linear_range lochnagar_vddcore_ranges[] = {
+static const struct linear_range lochnagar_vddcore_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0x8, 0x41, 12500),
 };
 
diff --git a/drivers/regulator/lp873x-regulator.c b/drivers/regulator/lp873x-regulator.c
index b55de293ca7a..fe049b67e7d5 100644
--- a/drivers/regulator/lp873x-regulator.c
+++ b/drivers/regulator/lp873x-regulator.c
@@ -54,14 +54,14 @@ struct lp873x_regulator {
 
 static const struct lp873x_regulator regulators[];
 
-static const struct regulator_linear_range buck0_buck1_ranges[] = {
+static const struct linear_range buck0_buck1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x0, 0x13, 0),
 	REGULATOR_LINEAR_RANGE(700000, 0x14, 0x17, 10000),
 	REGULATOR_LINEAR_RANGE(735000, 0x18, 0x9d, 5000),
 	REGULATOR_LINEAR_RANGE(1420000, 0x9e, 0xff, 20000),
 };
 
-static const struct regulator_linear_range ldo0_ldo1_ranges[] = {
+static const struct linear_range ldo0_ldo1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0x0, 0x19, 100000),
 };
 
diff --git a/drivers/regulator/lp87565-regulator.c b/drivers/regulator/lp87565-regulator.c
index 4ae12ac1f4c6..5d525dacf959 100644
--- a/drivers/regulator/lp87565-regulator.c
+++ b/drivers/regulator/lp87565-regulator.c
@@ -46,7 +46,7 @@ struct lp87565_regulator {
 
 static const struct lp87565_regulator regulators[];
 
-static const struct regulator_linear_range buck0_1_2_3_ranges[] = {
+static const struct linear_range buck0_1_2_3_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0xA, 0x17, 10000),
 	REGULATOR_LINEAR_RANGE(735000, 0x18, 0x9d, 5000),
 	REGULATOR_LINEAR_RANGE(1420000, 0x9e, 0xff, 20000),
diff --git a/drivers/regulator/lp8788-buck.c b/drivers/regulator/lp8788-buck.c
index 222502a29658..74b7b496b12d 100644
--- a/drivers/regulator/lp8788-buck.c
+++ b/drivers/regulator/lp8788-buck.c
@@ -92,7 +92,7 @@ struct lp8788_buck {
 };
 
 /* BUCK 1 ~ 4 voltage ranges */
-static const struct regulator_linear_range buck_volt_ranges[] = {
+static const struct linear_range buck_volt_ranges[] = {
 	REGULATOR_LINEAR_RANGE(500000, 0, 0, 0),
 	REGULATOR_LINEAR_RANGE(800000, 1, 25, 50000),
 };
diff --git a/drivers/regulator/max77650-regulator.c b/drivers/regulator/max77650-regulator.c
index ac89a412f665..ca08f94a368d 100644
--- a/drivers/regulator/max77650-regulator.c
+++ b/drivers/regulator/max77650-regulator.c
@@ -49,7 +49,7 @@ static const unsigned int max77651_sbb1_volt_range_sel[] = {
 	0x0, 0x1, 0x2, 0x3
 };
 
-static const struct regulator_linear_range max77651_sbb1_volt_ranges[] = {
+static const struct linear_range max77651_sbb1_volt_ranges[] = {
 	/* range index 0 */
 	REGULATOR_LINEAR_RANGE(2400000, 0x00, 0x0f, 50000),
 	/* range index 1 */
diff --git a/drivers/regulator/max77826-regulator.c b/drivers/regulator/max77826-regulator.c
new file mode 100644
index 000000000000..502ab6afc814
--- /dev/null
+++ b/drivers/regulator/max77826-regulator.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+//
+// max77826-regulator.c  - regulator driver for Maxim MAX77826
+//
+// Author: Iskren Chernev <iskren.chernev@gmail.com>
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+
+enum max77826_registers {
+	MAX77826_REG_INT_SRC = 0x00,
+	MAX77826_REG_SYS_INT,
+	MAX77826_REG_INT1,
+	MAX77826_REG_INT2,
+	MAX77826_REG_BB_INT,
+	MAX77826_REG_INT_SRC_M,
+	MAX77826_REG_TOPSYS_INT_M,
+	MAX77826_REG_INT1_M,
+	MAX77826_REG_INT2_M,
+	MAX77826_REG_BB_INT_M,
+	MAX77826_REG_TOPSYS_STAT,
+	MAX77826_REG_STAT1,
+	MAX77826_REG_STAT2,
+	MAX77826_REG_BB_STAT,
+	/* 0x0E - 0x0F: Reserved */
+	MAX77826_REG_LDO_OPMD1 = 0x10,
+	MAX77826_REG_LDO_OPMD2,
+	MAX77826_REG_LDO_OPMD3,
+	MAX77826_REG_LDO_OPMD4,
+	MAX77826_REG_B_BB_OPMD,
+	/* 0x15 - 0x1F: Reserved */
+	MAX77826_REG_LDO1_CFG = 0x20,
+	MAX77826_REG_LDO2_CFG,
+	MAX77826_REG_LDO3_CFG,
+	MAX77826_REG_LDO4_CFG,
+	MAX77826_REG_LDO5_CFG,
+	MAX77826_REG_LDO6_CFG,
+	MAX77826_REG_LDO7_CFG,
+	MAX77826_REG_LDO8_CFG,
+	MAX77826_REG_LDO9_CFG,
+	MAX77826_REG_LDO10_CFG,
+	MAX77826_REG_LDO11_CFG,
+	MAX77826_REG_LDO12_CFG,
+	MAX77826_REG_LDO13_CFG,
+	MAX77826_REG_LDO14_CFG,
+	MAX77826_REG_LDO15_CFG,
+	/* 0x2F: Reserved */
+	MAX77826_REG_BUCK_CFG = 0x30,
+	MAX77826_REG_BUCK_VOUT,
+	MAX77826_REG_BB_CFG,
+	MAX77826_REG_BB_VOUT,
+	/* 0x34 - 0x3F: Reserved */
+	MAX77826_REG_BUCK_SS_FREQ = 0x40,
+	MAX77826_REG_UVLO_FALL,
+	/* 0x42 - 0xCE: Reserved */
+	MAX77826_REG_DEVICE_ID = 0xCF,
+};
+
+enum max77826_regulators {
+	MAX77826_LDO1 = 0,
+	MAX77826_LDO2,
+	MAX77826_LDO3,
+	MAX77826_LDO4,
+	MAX77826_LDO5,
+	MAX77826_LDO6,
+	MAX77826_LDO7,
+	MAX77826_LDO8,
+	MAX77826_LDO9,
+	MAX77826_LDO10,
+	MAX77826_LDO11,
+	MAX77826_LDO12,
+	MAX77826_LDO13,
+	MAX77826_LDO14,
+	MAX77826_LDO15,
+	MAX77826_BUCK,
+	MAX77826_BUCKBOOST,
+	MAX77826_MAX_REGULATORS,
+};
+
+#define MAX77826_MASK_LDO		0x7f
+#define MAX77826_MASK_BUCK		0xff
+#define MAX77826_MASK_BUCKBOOST		0x7f
+#define MAX77826_BUCK_RAMP_DELAY	12500
+
+/* values in mV */
+/* for LDO1-3 */
+#define MAX77826_NMOS_LDO_VOLT_MIN	600000
+#define MAX77826_NMOS_LDO_VOLT_MAX	2187500
+#define MAX77826_NMOS_LDO_VOLT_STEP	12500
+
+/* for LDO4-15 */
+#define MAX77826_PMOS_LDO_VOLT_MIN	800000
+#define MAX77826_PMOS_LDO_VOLT_MAX	3975000
+#define MAX77826_PMOS_LDO_VOLT_STEP	25000
+
+/* for BUCK */
+#define MAX77826_BUCK_VOLT_MIN		500000
+#define MAX77826_BUCK_VOLT_MAX		1800000
+#define MAX77826_BUCK_VOLT_STEP		6250
+
+/* for BUCKBOOST */
+#define MAX77826_BUCKBOOST_VOLT_MIN	2600000
+#define MAX77826_BUCKBOOST_VOLT_MAX	4187500
+#define MAX77826_BUCKBOOST_VOLT_STEP	12500
+#define MAX77826_VOLT_RANGE(_type)					\
+	((MAX77826_ ## _type ## _VOLT_MAX -				\
+	  MAX77826_ ## _type ## _VOLT_MIN) /				\
+	 MAX77826_ ## _type ## _VOLT_STEP + 1)
+
+#define MAX77826_LDO(_id, _type)					\
+	[MAX77826_LDO ## _id] = {					\
+		.id = MAX77826_LDO ## _id,				\
+		.name = "LDO"#_id,					\
+		.of_match = of_match_ptr("LDO"#_id),			\
+		.regulators_node = "regulators",			\
+		.ops = &max77826_most_ops,				\
+		.min_uV = MAX77826_ ## _type ## _LDO_VOLT_MIN,		\
+		.uV_step = MAX77826_ ## _type ## _LDO_VOLT_STEP,	\
+		.n_voltages = MAX77826_VOLT_RANGE(_type ## _LDO),	\
+		.enable_reg = MAX77826_REG_LDO_OPMD1 + (_id - 1) / 4,	\
+		.enable_mask = BIT(((_id - 1) % 4) * 2 + 1),		\
+		.vsel_reg = MAX77826_REG_LDO1_CFG + (_id - 1),		\
+		.vsel_mask = MAX77826_MASK_LDO,				\
+		.owner = THIS_MODULE,					\
+	}
+
+#define MAX77826_BUCK(_idx, _id, _ops)					\
+	[MAX77826_ ## _id] = {						\
+		.id = MAX77826_ ## _id,					\
+		.name = #_id,						\
+		.of_match = of_match_ptr(#_id),				\
+		.regulators_node = "regulators",			\
+		.ops = &_ops,						\
+		.min_uV =  MAX77826_ ## _id ## _VOLT_MIN,		\
+		.uV_step = MAX77826_ ## _id ## _VOLT_STEP,		\
+		.n_voltages = MAX77826_VOLT_RANGE(_id),			\
+		.enable_reg = MAX77826_REG_B_BB_OPMD,			\
+		.enable_mask = BIT(_idx * 2 + 1),			\
+		.vsel_reg = MAX77826_REG_BUCK_VOUT + _idx * 2,		\
+		.vsel_mask = MAX77826_MASK_ ## _id,			\
+		.owner = THIS_MODULE,					\
+	}
+
+
+
+struct max77826_regulator_info {
+	struct regmap *regmap;
+	struct regulator_desc *rdesc;
+};
+
+static const struct regmap_config max77826_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = MAX77826_REG_DEVICE_ID,
+};
+
+static int max77826_set_voltage_time_sel(struct regulator_dev *,
+				unsigned int old_selector,
+				unsigned int new_selector);
+
+static const struct regulator_ops max77826_most_ops = {
+	.enable			= regulator_enable_regmap,
+	.disable		= regulator_disable_regmap,
+	.is_enabled		= regulator_is_enabled_regmap,
+	.list_voltage		= regulator_list_voltage_linear,
+	.map_voltage		= regulator_map_voltage_linear,
+	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
+	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
+};
+
+static const struct regulator_ops max77826_buck_ops = {
+	.enable			= regulator_enable_regmap,
+	.disable		= regulator_disable_regmap,
+	.is_enabled		= regulator_is_enabled_regmap,
+	.list_voltage		= regulator_list_voltage_linear,
+	.map_voltage		= regulator_map_voltage_linear,
+	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
+	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
+	.set_voltage_time_sel	= max77826_set_voltage_time_sel,
+};
+
+static struct regulator_desc max77826_regulators_desc[] = {
+	MAX77826_LDO(1, NMOS),
+	MAX77826_LDO(2, NMOS),
+	MAX77826_LDO(3, NMOS),
+	MAX77826_LDO(4, PMOS),
+	MAX77826_LDO(5, PMOS),
+	MAX77826_LDO(6, PMOS),
+	MAX77826_LDO(7, PMOS),
+	MAX77826_LDO(8, PMOS),
+	MAX77826_LDO(9, PMOS),
+	MAX77826_LDO(10, PMOS),
+	MAX77826_LDO(11, PMOS),
+	MAX77826_LDO(12, PMOS),
+	MAX77826_LDO(13, PMOS),
+	MAX77826_LDO(14, PMOS),
+	MAX77826_LDO(15, PMOS),
+	MAX77826_BUCK(0, BUCK, max77826_buck_ops),
+	MAX77826_BUCK(1, BUCKBOOST, max77826_most_ops),
+};
+
+static int max77826_set_voltage_time_sel(struct regulator_dev *rdev,
+				unsigned int old_selector,
+				unsigned int new_selector)
+{
+	if (new_selector > old_selector) {
+		return DIV_ROUND_UP(MAX77826_BUCK_VOLT_STEP *
+				(new_selector - old_selector),
+				MAX77826_BUCK_RAMP_DELAY);
+	}
+
+	return 0;
+}
+
+static int max77826_read_device_id(struct regmap *regmap, struct device *dev)
+{
+	unsigned int device_id;
+	int res;
+
+	res = regmap_read(regmap, MAX77826_REG_DEVICE_ID, &device_id);
+	if (!res)
+		dev_dbg(dev, "DEVICE_ID: 0x%x\n", device_id);
+
+	return res;
+}
+
+static int max77826_i2c_probe(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	struct max77826_regulator_info *info;
+	struct regulator_config config = {};
+	struct regulator_dev *rdev;
+	struct regmap *regmap;
+	int i;
+
+	info = devm_kzalloc(dev, sizeof(struct max77826_regulator_info),
+				GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->rdesc = max77826_regulators_desc;
+	regmap = devm_regmap_init_i2c(client, &max77826_regmap_config);
+	if (IS_ERR(regmap)) {
+		dev_err(dev, "Failed to allocate regmap!\n");
+		return PTR_ERR(regmap);
+	}
+
+	info->regmap = regmap;
+	i2c_set_clientdata(client, info);
+
+	config.dev = dev;
+	config.regmap = regmap;
+	config.driver_data = info;
+
+	for (i = 0; i < MAX77826_MAX_REGULATORS; i++) {
+		rdev = devm_regulator_register(dev,
+					       &max77826_regulators_desc[i],
+					       &config);
+		if (IS_ERR(rdev)) {
+			dev_err(dev, "Failed to register regulator!\n");
+			return PTR_ERR(rdev);
+		}
+	}
+
+	return max77826_read_device_id(regmap, dev);
+}
+
+static const struct of_device_id max77826_of_match[] = {
+	{ .compatible = "maxim,max77826" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, max77826_of_match);
+
+static const struct i2c_device_id max77826_id[] = {
+	{ "max77826-regulator" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(i2c, max77826_id);
+
+static struct i2c_driver max77826_regulator_driver = {
+	.driver = {
+		.name = "max77826",
+		.of_match_table = of_match_ptr(max77826_of_match),
+	},
+	.probe_new = max77826_i2c_probe,
+	.id_table = max77826_id,
+};
+module_i2c_driver(max77826_regulator_driver);
+
+MODULE_AUTHOR("Iskren Chernev <iskren.chernev@gmail.com>");
+MODULE_DESCRIPTION("MAX77826 PMIC regulator driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/max8998.c b/drivers/regulator/max8998.c
index 60599c3bb845..340413bba0c5 100644
--- a/drivers/regulator/max8998.c
+++ b/drivers/regulator/max8998.c
@@ -33,6 +33,10 @@ struct max8998_data {
 	unsigned int		buck2_idx;
 };
 
+static const unsigned int charger_current_table[] = {
+	90000, 380000, 475000, 550000, 570000, 600000, 700000, 800000,
+};
+
 static int max8998_get_enable_register(struct regulator_dev *rdev,
 					int *reg, int *shift)
 {
@@ -63,6 +67,10 @@ static int max8998_get_enable_register(struct regulator_dev *rdev,
 		*reg = MAX8998_REG_CHGR2;
 		*shift = 7 - (ldo - MAX8998_ESAFEOUT1);
 		break;
+	case MAX8998_CHARGER:
+		*reg = MAX8998_REG_CHGR2;
+		*shift = 0;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -88,6 +96,11 @@ static int max8998_ldo_is_enabled(struct regulator_dev *rdev)
 	return val & (1 << shift);
 }
 
+static int max8998_ldo_is_enabled_inverted(struct regulator_dev *rdev)
+{
+	return (!max8998_ldo_is_enabled(rdev));
+}
+
 static int max8998_ldo_enable(struct regulator_dev *rdev)
 {
 	struct max8998_data *max8998 = rdev_get_drvdata(rdev);
@@ -358,6 +371,74 @@ static int max8998_set_voltage_buck_time_sel(struct regulator_dev *rdev,
 	return 0;
 }
 
+static int max8998_set_current_limit(struct regulator_dev *rdev,
+				     int min_uA, int max_uA)
+{
+	struct max8998_data *max8998 = rdev_get_drvdata(rdev);
+	struct i2c_client *i2c = max8998->iodev->i2c;
+	unsigned int n_currents = rdev->desc->n_current_limits;
+	int i, sel = -1;
+
+	if (n_currents == 0)
+		return -EINVAL;
+
+	if (rdev->desc->curr_table) {
+		const unsigned int *curr_table = rdev->desc->curr_table;
+		bool ascend = curr_table[n_currents - 1] > curr_table[0];
+
+		/* search for closest to maximum */
+		if (ascend) {
+			for (i = n_currents - 1; i >= 0; i--) {
+				if (min_uA <= curr_table[i] &&
+				    curr_table[i] <= max_uA) {
+					sel = i;
+					break;
+				}
+			}
+		} else {
+			for (i = 0; i < n_currents; i++) {
+				if (min_uA <= curr_table[i] &&
+				    curr_table[i] <= max_uA) {
+					sel = i;
+					break;
+				}
+			}
+		}
+	}
+
+	if (sel < 0)
+		return -EINVAL;
+
+	sel <<= ffs(rdev->desc->csel_mask) - 1;
+
+	return max8998_update_reg(i2c, rdev->desc->csel_reg,
+				  sel, rdev->desc->csel_mask);
+}
+
+int max8998_get_current_limit(struct regulator_dev *rdev)
+{
+	struct max8998_data *max8998 = rdev_get_drvdata(rdev);
+	struct i2c_client *i2c = max8998->iodev->i2c;
+	u8 val;
+	int ret;
+
+	ret = max8998_read_reg(i2c, rdev->desc->csel_reg, &val);
+	if (ret != 0)
+		return ret;
+
+	val &= rdev->desc->csel_mask;
+	val >>= ffs(rdev->desc->csel_mask) - 1;
+
+	if (rdev->desc->curr_table) {
+		if (val >= rdev->desc->n_current_limits)
+			return -EINVAL;
+
+		return rdev->desc->curr_table[val];
+	}
+
+	return -EINVAL;
+}
+
 static const struct regulator_ops max8998_ldo_ops = {
 	.list_voltage		= regulator_list_voltage_linear,
 	.map_voltage		= regulator_map_voltage_linear,
@@ -379,6 +460,15 @@ static const struct regulator_ops max8998_buck_ops = {
 	.set_voltage_time_sel	= max8998_set_voltage_buck_time_sel,
 };
 
+static const struct regulator_ops max8998_charger_ops = {
+	.set_current_limit	= max8998_set_current_limit,
+	.get_current_limit	= max8998_get_current_limit,
+	.is_enabled		= max8998_ldo_is_enabled_inverted,
+	/* Swapped as register is inverted */
+	.enable			= max8998_ldo_disable,
+	.disable		= max8998_ldo_enable,
+};
+
 static const struct regulator_ops max8998_others_ops = {
 	.is_enabled		= max8998_ldo_is_enabled,
 	.enable			= max8998_ldo_enable,
@@ -397,6 +487,19 @@ static const struct regulator_ops max8998_others_ops = {
 		.owner = THIS_MODULE, \
 	}
 
+#define MAX8998_CURRENT_REG(_name, _ops, _table, _reg, _mask) \
+	{ \
+		.name = #_name, \
+		.id = MAX8998_##_name, \
+		.ops = _ops, \
+		.curr_table = _table, \
+		.n_current_limits = ARRAY_SIZE(_table), \
+		.csel_reg = _reg, \
+		.csel_mask = _mask, \
+		.type = REGULATOR_CURRENT, \
+		.owner = THIS_MODULE, \
+	}
+
 #define MAX8998_OTHERS_REG(_name, _id) \
 	{ \
 		.name = #_name, \
@@ -432,6 +535,8 @@ static const struct regulator_desc regulators[] = {
 	MAX8998_OTHERS_REG(ENVICHG, MAX8998_ENVICHG),
 	MAX8998_OTHERS_REG(ESAFEOUT1, MAX8998_ESAFEOUT1),
 	MAX8998_OTHERS_REG(ESAFEOUT2, MAX8998_ESAFEOUT2),
+	MAX8998_CURRENT_REG(CHARGER, &max8998_charger_ops,
+			    charger_current_table, MAX8998_REG_CHGR1, 0x7),
 };
 
 static int max8998_pmic_dt_parse_dvs_gpio(struct max8998_dev *iodev,
diff --git a/drivers/regulator/mcp16502.c b/drivers/regulator/mcp16502.c
index e5a02711cb46..6d0ad74935b3 100644
--- a/drivers/regulator/mcp16502.c
+++ b/drivers/regulator/mcp16502.c
@@ -391,11 +391,11 @@ static const struct of_device_id mcp16502_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, mcp16502_ids);
 
-static const struct regulator_linear_range b1l12_ranges[] = {
+static const struct linear_range b1l12_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1200000, VDD_LOW_SEL, VDD_HIGH_SEL, 50000),
 };
 
-static const struct regulator_linear_range b234_ranges[] = {
+static const struct linear_range b234_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, VDD_LOW_SEL, VDD_HIGH_SEL, 25000),
 };
 
diff --git a/drivers/regulator/mp8859.c b/drivers/regulator/mp8859.c
index 6ed987648188..f2300714d5a9 100644
--- a/drivers/regulator/mp8859.c
+++ b/drivers/regulator/mp8859.c
@@ -73,7 +73,7 @@ static int mp8859_get_voltage_sel(struct regulator_dev *rdev)
 	return val;
 }
 
-static const struct regulator_linear_range mp8859_dcdc_ranges[] = {
+static const struct linear_range mp8859_dcdc_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, VOL_MIN_IDX, VOL_MAX_IDX, 10000),
 };
 
diff --git a/drivers/regulator/mt6323-regulator.c b/drivers/regulator/mt6323-regulator.c
index 893ea190788a..ff9016170db3 100644
--- a/drivers/regulator/mt6323-regulator.c
+++ b/drivers/regulator/mt6323-regulator.c
@@ -102,15 +102,15 @@ struct mt6323_regulator_info {
 	.modeset_mask = _modeset_mask,					\
 }
 
-static const struct regulator_linear_range buck_volt_range1[] = {
+static const struct linear_range buck_volt_range1[] = {
 	REGULATOR_LINEAR_RANGE(700000, 0, 0x7f, 6250),
 };
 
-static const struct regulator_linear_range buck_volt_range2[] = {
+static const struct linear_range buck_volt_range2[] = {
 	REGULATOR_LINEAR_RANGE(1400000, 0, 0x7f, 12500),
 };
 
-static const struct regulator_linear_range buck_volt_range3[] = {
+static const struct linear_range buck_volt_range3[] = {
 	REGULATOR_LINEAR_RANGE(500000, 0, 0x3f, 50000),
 };
 
diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c
index ba42682e06f3..13cb6ac9a892 100644
--- a/drivers/regulator/mt6358-regulator.c
+++ b/drivers/regulator/mt6358-regulator.c
@@ -137,19 +137,19 @@ struct mt6358_regulator_info {
 	.qi = BIT(15),							\
 }
 
-static const struct regulator_linear_range buck_volt_range1[] = {
+static const struct linear_range buck_volt_range1[] = {
 	REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 6250),
 };
 
-static const struct regulator_linear_range buck_volt_range2[] = {
+static const struct linear_range buck_volt_range2[] = {
 	REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 12500),
 };
 
-static const struct regulator_linear_range buck_volt_range3[] = {
+static const struct linear_range buck_volt_range3[] = {
 	REGULATOR_LINEAR_RANGE(500000, 0, 0x3f, 50000),
 };
 
-static const struct regulator_linear_range buck_volt_range4[] = {
+static const struct linear_range buck_volt_range4[] = {
 	REGULATOR_LINEAR_RANGE(1000000, 0, 0x7f, 12500),
 };
 
diff --git a/drivers/regulator/mt6380-regulator.c b/drivers/regulator/mt6380-regulator.c
index b6aed090b5e0..9efd8710a6f3 100644
--- a/drivers/regulator/mt6380-regulator.c
+++ b/drivers/regulator/mt6380-regulator.c
@@ -152,15 +152,15 @@ struct mt6380_regulator_info {
 	.modeset_mask = _modeset_mask,					\
 }
 
-static const struct regulator_linear_range buck_volt_range1[] = {
+static const struct linear_range buck_volt_range1[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 0xfe, 6250),
 };
 
-static const struct regulator_linear_range buck_volt_range2[] = {
+static const struct linear_range buck_volt_range2[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 0xfe, 6250),
 };
 
-static const struct regulator_linear_range buck_volt_range3[] = {
+static const struct linear_range buck_volt_range3[] = {
 	REGULATOR_LINEAR_RANGE(1200000, 0, 0x3c, 25000),
 };
 
diff --git a/drivers/regulator/mt6397-regulator.c b/drivers/regulator/mt6397-regulator.c
index fd9ed864a0c1..269c2a6028e8 100644
--- a/drivers/regulator/mt6397-regulator.c
+++ b/drivers/regulator/mt6397-regulator.c
@@ -102,15 +102,15 @@ struct mt6397_regulator_info {
 	.qi = BIT(15),							\
 }
 
-static const struct regulator_linear_range buck_volt_range1[] = {
+static const struct linear_range buck_volt_range1[] = {
 	REGULATOR_LINEAR_RANGE(700000, 0, 0x7f, 6250),
 };
 
-static const struct regulator_linear_range buck_volt_range2[] = {
+static const struct linear_range buck_volt_range2[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0, 0x7f, 6250),
 };
 
-static const struct regulator_linear_range buck_volt_range3[] = {
+static const struct linear_range buck_volt_range3[] = {
 	REGULATOR_LINEAR_RANGE(1500000, 0, 0x1f, 20000),
 };
 
diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index 31325912d311..337dd614695e 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -22,14 +22,14 @@
 #include <linux/of_platform.h>
 #include <linux/regulator/of_regulator.h>
 
-static const struct regulator_linear_range smps_low_ranges[] = {
+static const struct linear_range smps_low_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0),
 	REGULATOR_LINEAR_RANGE(500000, 0x1, 0x6, 0),
 	REGULATOR_LINEAR_RANGE(510000, 0x7, 0x79, 10000),
 	REGULATOR_LINEAR_RANGE(1650000, 0x7A, 0x7f, 0),
 };
 
-static const struct regulator_linear_range smps_high_ranges[] = {
+static const struct linear_range smps_high_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0),
 	REGULATOR_LINEAR_RANGE(1000000, 0x1, 0x6, 0),
 	REGULATOR_LINEAR_RANGE(1020000, 0x7, 0x79, 20000),
diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index c86ad40015ce..79bdc129cb50 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -86,7 +86,7 @@ enum rpmh_regulator_type {
 struct rpmh_vreg_hw_data {
 	enum rpmh_regulator_type		regulator_type;
 	const struct regulator_ops		*ops;
-	const struct regulator_linear_range	voltage_range;
+	const struct linear_range	voltage_range;
 	int					n_voltages;
 	int					hpm_min_load_uA;
 	const int				*pmic_mode_map;
@@ -832,11 +832,11 @@ static const struct rpmh_vreg_init_data pm8150_vreg_data[] = {
 	RPMH_VREG("ldo10",  "ldo%s10", &pmic5_pldo,      "vdd-l2-l10"),
 	RPMH_VREG("ldo11",  "ldo%s11", &pmic5_nldo,      "vdd-l1-l8-l11"),
 	RPMH_VREG("ldo12",  "ldo%s12", &pmic5_pldo_lv,   "vdd-l7-l12-l14-l15"),
-	RPMH_VREG("ldo13",  "ldo%s13", &pmic5_pldo,      "vdd-l13-l6-l17"),
+	RPMH_VREG("ldo13",  "ldo%s13", &pmic5_pldo,      "vdd-l13-l16-l17"),
 	RPMH_VREG("ldo14",  "ldo%s14", &pmic5_pldo_lv,   "vdd-l7-l12-l14-l15"),
 	RPMH_VREG("ldo15",  "ldo%s15", &pmic5_pldo_lv,   "vdd-l7-l12-l14-l15"),
-	RPMH_VREG("ldo16",  "ldo%s16", &pmic5_pldo,      "vdd-l13-l6-l17"),
-	RPMH_VREG("ldo17",  "ldo%s17", &pmic5_pldo,      "vdd-l13-l6-l17"),
+	RPMH_VREG("ldo16",  "ldo%s16", &pmic5_pldo,      "vdd-l13-l16-l17"),
+	RPMH_VREG("ldo17",  "ldo%s17", &pmic5_pldo,      "vdd-l13-l16-l17"),
 	RPMH_VREG("ldo18",  "ldo%s18", &pmic5_nldo,      "vdd-l3-l4-l5-l18"),
 	{},
 };
@@ -857,7 +857,7 @@ static const struct rpmh_vreg_init_data pm8150l_vreg_data[] = {
 	RPMH_VREG("ldo5",   "ldo%s5",  &pmic5_pldo,      "vdd-l4-l5-l6"),
 	RPMH_VREG("ldo6",   "ldo%s6",  &pmic5_pldo,      "vdd-l4-l5-l6"),
 	RPMH_VREG("ldo7",   "ldo%s7",  &pmic5_pldo,      "vdd-l7-l11"),
-	RPMH_VREG("ldo8",   "ldo%s8",  &pmic5_pldo_lv,   "vdd-l1-l8-l11"),
+	RPMH_VREG("ldo8",   "ldo%s8",  &pmic5_pldo_lv,   "vdd-l1-l8"),
 	RPMH_VREG("ldo9",   "ldo%s9",  &pmic5_pldo,      "vdd-l9-l10"),
 	RPMH_VREG("ldo10",  "ldo%s10", &pmic5_pldo,      "vdd-l9-l10"),
 	RPMH_VREG("ldo11",  "ldo%s11", &pmic5_pldo,      "vdd-l7-l11"),
diff --git a/drivers/regulator/qcom_rpm-regulator.c b/drivers/regulator/qcom_rpm-regulator.c
index 7fc97f23fcf4..0066f850f15d 100644
--- a/drivers/regulator/qcom_rpm-regulator.c
+++ b/drivers/regulator/qcom_rpm-regulator.c
@@ -148,41 +148,41 @@ static const struct rpm_reg_parts rpm8960_ncp_parts = {
 /*
  * Physically available PMIC regulator voltage ranges
  */
-static const struct regulator_linear_range pldo_ranges[] = {
+static const struct linear_range pldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE( 750000,   0,  59, 12500),
 	REGULATOR_LINEAR_RANGE(1500000,  60, 123, 25000),
 	REGULATOR_LINEAR_RANGE(3100000, 124, 160, 50000),
 };
 
-static const struct regulator_linear_range nldo_ranges[] = {
+static const struct linear_range nldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE( 750000,   0,  63, 12500),
 };
 
-static const struct regulator_linear_range nldo1200_ranges[] = {
+static const struct linear_range nldo1200_ranges[] = {
 	REGULATOR_LINEAR_RANGE( 375000,   0,  59,  6250),
 	REGULATOR_LINEAR_RANGE( 750000,  60, 123, 12500),
 };
 
-static const struct regulator_linear_range smps_ranges[] = {
+static const struct linear_range smps_ranges[] = {
 	REGULATOR_LINEAR_RANGE( 375000,   0,  29, 12500),
 	REGULATOR_LINEAR_RANGE( 750000,  30,  89, 12500),
 	REGULATOR_LINEAR_RANGE(1500000,  90, 153, 25000),
 };
 
-static const struct regulator_linear_range ftsmps_ranges[] = {
+static const struct linear_range ftsmps_ranges[] = {
 	REGULATOR_LINEAR_RANGE( 350000,   0,   6, 50000),
 	REGULATOR_LINEAR_RANGE( 700000,   7,  63, 12500),
 	REGULATOR_LINEAR_RANGE(1500000,  64, 100, 50000),
 };
 
-static const struct regulator_linear_range smb208_ranges[] = {
+static const struct linear_range smb208_ranges[] = {
 	REGULATOR_LINEAR_RANGE( 375000,   0,  29, 12500),
 	REGULATOR_LINEAR_RANGE( 750000,  30,  89, 12500),
 	REGULATOR_LINEAR_RANGE(1500000,  90, 153, 25000),
 	REGULATOR_LINEAR_RANGE(3100000, 154, 234, 25000),
 };
 
-static const struct regulator_linear_range ncp_ranges[] = {
+static const struct linear_range ncp_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1500000,   0,  31, 50000),
 };
 
@@ -604,16 +604,6 @@ static const struct qcom_rpm_reg pm8921_smps = {
 	.supports_force_mode_bypass = false,
 };
 
-static const struct qcom_rpm_reg pm8921_ftsmps = {
-	.desc.linear_ranges = ftsmps_ranges,
-	.desc.n_linear_ranges = ARRAY_SIZE(ftsmps_ranges),
-	.desc.n_voltages = 101,
-	.desc.ops = &uV_ops,
-	.parts = &rpm8960_smps_parts,
-	.supports_force_mode_auto = true,
-	.supports_force_mode_bypass = false,
-};
-
 static const struct qcom_rpm_reg pm8921_ncp = {
 	.desc.linear_ranges = ncp_ranges,
 	.desc.n_linear_ranges = ARRAY_SIZE(ncp_ranges),
diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index fdde4195cefb..53a64d856926 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -199,7 +199,7 @@ static const struct regulator_ops rpm_bob_ops = {
 };
 
 static const struct regulator_desc pma8084_hfsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(375000,  0,  95, 12500),
 		REGULATOR_LINEAR_RANGE(1550000, 96, 158, 25000),
 	},
@@ -209,7 +209,7 @@ static const struct regulator_desc pma8084_hfsmps = {
 };
 
 static const struct regulator_desc pma8084_ftsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(350000,  0, 184, 5000),
 		REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000),
 	},
@@ -219,7 +219,7 @@ static const struct regulator_desc pma8084_ftsmps = {
 };
 
 static const struct regulator_desc pma8084_pldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE( 750000,  0,  63, 12500),
 		REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000),
 		REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000),
@@ -230,7 +230,7 @@ static const struct regulator_desc pma8084_pldo = {
 };
 
 static const struct regulator_desc pma8084_nldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(750000, 0, 63, 12500),
 	},
 	.n_linear_ranges = 1,
@@ -243,7 +243,7 @@ static const struct regulator_desc pma8084_switch = {
 };
 
 static const struct regulator_desc pm8x41_hfsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE( 375000,  0,  95, 12500),
 		REGULATOR_LINEAR_RANGE(1575000, 96, 158, 25000),
 	},
@@ -253,7 +253,7 @@ static const struct regulator_desc pm8x41_hfsmps = {
 };
 
 static const struct regulator_desc pm8841_ftsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(350000,  0, 184, 5000),
 		REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000),
 	},
@@ -263,7 +263,7 @@ static const struct regulator_desc pm8841_ftsmps = {
 };
 
 static const struct regulator_desc pm8941_boost = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(4000000, 0, 30, 50000),
 	},
 	.n_linear_ranges = 1,
@@ -272,7 +272,7 @@ static const struct regulator_desc pm8941_boost = {
 };
 
 static const struct regulator_desc pm8941_pldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE( 750000,  0,  63, 12500),
 		REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000),
 		REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000),
@@ -283,7 +283,7 @@ static const struct regulator_desc pm8941_pldo = {
 };
 
 static const struct regulator_desc pm8941_nldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(750000, 0, 63, 12500),
 	},
 	.n_linear_ranges = 1,
@@ -302,7 +302,7 @@ static const struct regulator_desc pm8941_switch = {
 };
 
 static const struct regulator_desc pm8916_pldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(750000, 0, 208, 12500),
 	},
 	.n_linear_ranges = 1,
@@ -311,7 +311,7 @@ static const struct regulator_desc pm8916_pldo = {
 };
 
 static const struct regulator_desc pm8916_nldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(375000, 0, 93, 12500),
 	},
 	.n_linear_ranges = 1,
@@ -320,7 +320,7 @@ static const struct regulator_desc pm8916_nldo = {
 };
 
 static const struct regulator_desc pm8916_buck_lvo_smps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(375000, 0, 95, 12500),
 		REGULATOR_LINEAR_RANGE(750000, 96, 127, 25000),
 	},
@@ -330,7 +330,7 @@ static const struct regulator_desc pm8916_buck_lvo_smps = {
 };
 
 static const struct regulator_desc pm8916_buck_hvo_smps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(1550000, 0, 31, 25000),
 	},
 	.n_linear_ranges = 1,
@@ -339,7 +339,7 @@ static const struct regulator_desc pm8916_buck_hvo_smps = {
 };
 
 static const struct regulator_desc pm8950_hfsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(375000, 0, 95, 12500),
 		REGULATOR_LINEAR_RANGE(1550000, 96, 127, 25000),
 	},
@@ -349,7 +349,7 @@ static const struct regulator_desc pm8950_hfsmps = {
 };
 
 static const struct regulator_desc pm8950_ftsmps2p5 = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(80000, 0, 255, 5000),
 		REGULATOR_LINEAR_RANGE(160000, 256, 460, 10000),
 	},
@@ -359,7 +359,7 @@ static const struct regulator_desc pm8950_ftsmps2p5 = {
 };
 
 static const struct regulator_desc pm8950_ult_nldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(375000, 0, 202, 12500),
 	},
 	.n_linear_ranges = 1,
@@ -368,7 +368,7 @@ static const struct regulator_desc pm8950_ult_nldo = {
 };
 
 static const struct regulator_desc pm8950_ult_pldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(1750000, 0, 127, 12500),
 	},
 	.n_linear_ranges = 1,
@@ -377,7 +377,7 @@ static const struct regulator_desc pm8950_ult_pldo = {
 };
 
 static const struct regulator_desc pm8950_pldo_lv = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(1500000, 0, 16, 25000),
 	},
 	.n_linear_ranges = 1,
@@ -386,7 +386,7 @@ static const struct regulator_desc pm8950_pldo_lv = {
 };
 
 static const struct regulator_desc pm8950_pldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(975000, 0, 164, 12500),
 	},
 	.n_linear_ranges = 1,
@@ -396,7 +396,7 @@ static const struct regulator_desc pm8950_pldo = {
 
 
 static const struct regulator_desc pm8994_hfsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE( 375000,  0,  95, 12500),
 		REGULATOR_LINEAR_RANGE(1550000, 96, 158, 25000),
 	},
@@ -406,7 +406,7 @@ static const struct regulator_desc pm8994_hfsmps = {
 };
 
 static const struct regulator_desc pm8994_ftsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(350000,  0, 199, 5000),
 		REGULATOR_LINEAR_RANGE(700000, 200, 349, 10000),
 	},
@@ -416,7 +416,7 @@ static const struct regulator_desc pm8994_ftsmps = {
 };
 
 static const struct regulator_desc pm8994_nldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(750000, 0, 63, 12500),
 	},
 	.n_linear_ranges = 1,
@@ -425,7 +425,7 @@ static const struct regulator_desc pm8994_nldo = {
 };
 
 static const struct regulator_desc pm8994_pldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE( 750000,  0,  63, 12500),
 		REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000),
 		REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000),
@@ -446,7 +446,7 @@ static const struct regulator_desc pm8994_lnldo = {
 };
 
 static const struct regulator_desc pmi8994_ftsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(350000,  0, 199, 5000),
 		REGULATOR_LINEAR_RANGE(700000, 200, 349, 10000),
 	},
@@ -456,7 +456,7 @@ static const struct regulator_desc pmi8994_ftsmps = {
 };
 
 static const struct regulator_desc pmi8994_hfsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(350000,  0,  80, 12500),
 		REGULATOR_LINEAR_RANGE(700000, 81, 141, 25000),
 	},
@@ -466,7 +466,7 @@ static const struct regulator_desc pmi8994_hfsmps = {
 };
 
 static const struct regulator_desc pmi8994_bby = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(3000000, 0, 44, 50000),
 	},
 	.n_linear_ranges = 1,
@@ -475,7 +475,7 @@ static const struct regulator_desc pmi8994_bby = {
 };
 
 static const struct regulator_desc pmi8994_boost = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(4000000, 0, 30, 50000),
 	},
 	.n_linear_ranges = 1,
@@ -484,7 +484,7 @@ static const struct regulator_desc pmi8994_boost = {
 };
 
 static const struct regulator_desc pm8998_ftsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(320000, 0, 258, 4000),
 	},
 	.n_linear_ranges = 1,
@@ -493,7 +493,7 @@ static const struct regulator_desc pm8998_ftsmps = {
 };
 
 static const struct regulator_desc pm8998_hfsmps = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(320000, 0, 215, 8000),
 	},
 	.n_linear_ranges = 1,
@@ -502,7 +502,7 @@ static const struct regulator_desc pm8998_hfsmps = {
 };
 
 static const struct regulator_desc pm8998_nldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(312000, 0, 127, 8000),
 	},
 	.n_linear_ranges = 1,
@@ -511,7 +511,7 @@ static const struct regulator_desc pm8998_nldo = {
 };
 
 static const struct regulator_desc pm8998_pldo = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(1664000, 0, 255, 8000),
 	},
 	.n_linear_ranges = 1,
@@ -520,7 +520,7 @@ static const struct regulator_desc pm8998_pldo = {
 };
 
 static const struct regulator_desc pm8998_pldo_lv = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(1256000, 0, 127, 8000),
 	},
 	.n_linear_ranges = 1,
@@ -533,7 +533,7 @@ static const struct regulator_desc pm8998_switch = {
 };
 
 static const struct regulator_desc pmi8998_bob = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(1824000, 0, 83, 32000),
 	},
 	.n_linear_ranges = 1,
@@ -542,7 +542,7 @@ static const struct regulator_desc pmi8998_bob = {
 };
 
 static const struct regulator_desc pms405_hfsmps3 = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(320000, 0, 215, 8000),
 	},
 	.n_linear_ranges = 1,
@@ -551,7 +551,7 @@ static const struct regulator_desc pms405_hfsmps3 = {
 };
 
 static const struct regulator_desc pms405_nldo300 = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(312000, 0, 127, 8000),
 	},
 	.n_linear_ranges = 1,
@@ -560,7 +560,7 @@ static const struct regulator_desc pms405_nldo300 = {
 };
 
 static const struct regulator_desc pms405_nldo1200 = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(312000, 0, 127, 8000),
 	},
 	.n_linear_ranges = 1,
@@ -569,7 +569,7 @@ static const struct regulator_desc pms405_nldo1200 = {
 };
 
 static const struct regulator_desc pms405_pldo50 = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(1664000, 0, 128, 16000),
 	},
 	.n_linear_ranges = 1,
@@ -578,7 +578,7 @@ static const struct regulator_desc pms405_pldo50 = {
 };
 
 static const struct regulator_desc pms405_pldo150 = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(1664000, 0, 128, 16000),
 	},
 	.n_linear_ranges = 1,
@@ -587,7 +587,7 @@ static const struct regulator_desc pms405_pldo150 = {
 };
 
 static const struct regulator_desc pms405_pldo600 = {
-	.linear_ranges = (struct regulator_linear_range[]) {
+	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE(1256000, 0, 98, 8000),
 	},
 	.n_linear_ranges = 1,
diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c
index 31f79fda3238..e926c1a85846 100644
--- a/drivers/regulator/rk808-regulator.c
+++ b/drivers/regulator/rk808-regulator.c
@@ -165,14 +165,14 @@ static const int rk808_buck_config_regs[] = {
 	RK808_BUCK4_CONFIG_REG,
 };
 
-static const struct regulator_linear_range rk808_ldo3_voltage_ranges[] = {
+static const struct linear_range rk808_ldo3_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0, 13, 100000),
 	REGULATOR_LINEAR_RANGE(2500000, 15, 15, 0),
 };
 
 #define RK809_BUCK5_SEL_CNT		(8)
 
-static const struct regulator_linear_range rk809_buck5_voltage_ranges[] = {
+static const struct linear_range rk809_buck5_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1500000, 0, 0, 0),
 	REGULATOR_LINEAR_RANGE(1800000, 1, 3, 200000),
 	REGULATOR_LINEAR_RANGE(2800000, 4, 5, 200000),
@@ -201,14 +201,14 @@ static const struct regulator_linear_range rk809_buck5_voltage_ranges[] = {
 #define RK817_BUCK1_SEL_CNT (RK817_BUCK1_SEL0 + RK817_BUCK1_SEL1 + 1)
 #define RK817_BUCK3_SEL_CNT (RK817_BUCK1_SEL0 + RK817_BUCK3_SEL1 + 1)
 
-static const struct regulator_linear_range rk817_buck1_voltage_ranges[] = {
+static const struct linear_range rk817_buck1_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(RK817_BUCK1_MIN0, 0,
 			       RK817_BUCK1_SEL0, RK817_BUCK1_STP0),
 	REGULATOR_LINEAR_RANGE(RK817_BUCK1_MIN1, RK817_BUCK1_SEL0 + 1,
 			       RK817_BUCK1_SEL_CNT, RK817_BUCK1_STP1),
 };
 
-static const struct regulator_linear_range rk817_buck3_voltage_ranges[] = {
+static const struct linear_range rk817_buck3_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(RK817_BUCK1_MIN0, 0,
 			       RK817_BUCK1_SEL0, RK817_BUCK1_STP0),
 	REGULATOR_LINEAR_RANGE(RK817_BUCK1_MIN1, RK817_BUCK1_SEL0 + 1,
@@ -665,7 +665,7 @@ static const struct regulator_ops rk808_switch_ops = {
 	.set_suspend_disable	= rk808_set_suspend_disable,
 };
 
-static const struct regulator_linear_range rk805_buck_1_2_voltage_ranges[] = {
+static const struct linear_range rk805_buck_1_2_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(712500, 0, 59, 12500),
 	REGULATOR_LINEAR_RANGE(1800000, 60, 62, 200000),
 	REGULATOR_LINEAR_RANGE(2300000, 63, 63, 0),
diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index 23d288278957..33cf84bce05a 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -749,37 +749,37 @@ static const struct regulator_ops s2mps15_reg_buck_ops = {
 }
 
 /* voltage range for s2mps15 LDO 3, 5, 15, 16, 18, 20, 23 and 27 */
-static const struct regulator_linear_range s2mps15_ldo_voltage_ranges1[] = {
+static const struct linear_range s2mps15_ldo_voltage_ranges1[] = {
 	REGULATOR_LINEAR_RANGE(1000000, 0xc, 0x38, 25000),
 };
 
 /* voltage range for s2mps15 LDO 2, 6, 14, 17, 19, 21, 24 and 25 */
-static const struct regulator_linear_range s2mps15_ldo_voltage_ranges2[] = {
+static const struct linear_range s2mps15_ldo_voltage_ranges2[] = {
 	REGULATOR_LINEAR_RANGE(1800000, 0x0, 0x3f, 25000),
 };
 
 /* voltage range for s2mps15 LDO 4, 11, 12, 13, 22 and 26 */
-static const struct regulator_linear_range s2mps15_ldo_voltage_ranges3[] = {
+static const struct linear_range s2mps15_ldo_voltage_ranges3[] = {
 	REGULATOR_LINEAR_RANGE(700000, 0x0, 0x34, 12500),
 };
 
 /* voltage range for s2mps15 LDO 7, 8, 9 and 10 */
-static const struct regulator_linear_range s2mps15_ldo_voltage_ranges4[] = {
+static const struct linear_range s2mps15_ldo_voltage_ranges4[] = {
 	REGULATOR_LINEAR_RANGE(700000, 0x10, 0x20, 25000),
 };
 
 /* voltage range for s2mps15 LDO 1 */
-static const struct regulator_linear_range s2mps15_ldo_voltage_ranges5[] = {
+static const struct linear_range s2mps15_ldo_voltage_ranges5[] = {
 	REGULATOR_LINEAR_RANGE(500000, 0x0, 0x20, 12500),
 };
 
 /* voltage range for s2mps15 BUCK 1, 2, 3, 4, 5, 6 and 7 */
-static const struct regulator_linear_range s2mps15_buck_voltage_ranges1[] = {
+static const struct linear_range s2mps15_buck_voltage_ranges1[] = {
 	REGULATOR_LINEAR_RANGE(500000, 0x20, 0xc0, 6250),
 };
 
 /* voltage range for s2mps15 BUCK 8, 9 and 10 */
-static const struct regulator_linear_range s2mps15_buck_voltage_ranges2[] = {
+static const struct linear_range s2mps15_buck_voltage_ranges2[] = {
 	REGULATOR_LINEAR_RANGE(1000000, 0x20, 0x78, 12500),
 };
 
diff --git a/drivers/regulator/sky81452-regulator.c b/drivers/regulator/sky81452-regulator.c
index 177dede82a61..37658affe072 100644
--- a/drivers/regulator/sky81452-regulator.c
+++ b/drivers/regulator/sky81452-regulator.c
@@ -32,7 +32,7 @@ static const struct regulator_ops sky81452_reg_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 };
 
-static const struct regulator_linear_range sky81452_reg_ranges[] = {
+static const struct linear_range sky81452_reg_ranges[] = {
 	REGULATOR_LINEAR_RANGE(4500000, 0, 14, 250000),
 	REGULATOR_LINEAR_RANGE(9000000, 15, 31, 1000000),
 };
diff --git a/drivers/regulator/stpmic1_regulator.c b/drivers/regulator/stpmic1_regulator.c
index f3d7d007ecbb..adc9973d1b2f 100644
--- a/drivers/regulator/stpmic1_regulator.c
+++ b/drivers/regulator/stpmic1_regulator.c
@@ -57,13 +57,13 @@ enum {
 /* Ramp delay worst case is (2250uV/uS) */
 #define PMIC_RAMP_DELAY 2200
 
-static const struct regulator_linear_range buck1_ranges[] = {
+static const struct linear_range buck1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(725000, 0, 4, 0),
 	REGULATOR_LINEAR_RANGE(725000, 5, 36, 25000),
 	REGULATOR_LINEAR_RANGE(1500000, 37, 63, 0),
 };
 
-static const struct regulator_linear_range buck2_ranges[] = {
+static const struct linear_range buck2_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1000000, 0, 17, 0),
 	REGULATOR_LINEAR_RANGE(1050000, 18, 19, 0),
 	REGULATOR_LINEAR_RANGE(1100000, 20, 21, 0),
@@ -77,7 +77,7 @@ static const struct regulator_linear_range buck2_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1500000, 36, 63, 0),
 };
 
-static const struct regulator_linear_range buck3_ranges[] = {
+static const struct linear_range buck3_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1000000, 0, 19, 0),
 	REGULATOR_LINEAR_RANGE(1100000, 20, 23, 0),
 	REGULATOR_LINEAR_RANGE(1200000, 24, 27, 0),
@@ -87,7 +87,7 @@ static const struct regulator_linear_range buck3_ranges[] = {
 	REGULATOR_LINEAR_RANGE(3400000, 56, 63, 0),
 };
 
-static const struct regulator_linear_range buck4_ranges[] = {
+static const struct linear_range buck4_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 27, 25000),
 	REGULATOR_LINEAR_RANGE(1300000, 28, 29, 0),
 	REGULATOR_LINEAR_RANGE(1350000, 30, 31, 0),
@@ -97,19 +97,19 @@ static const struct regulator_linear_range buck4_ranges[] = {
 	REGULATOR_LINEAR_RANGE(3900000, 61, 63, 0),
 };
 
-static const struct regulator_linear_range ldo1_ranges[] = {
+static const struct linear_range ldo1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1700000, 0, 7, 0),
 	REGULATOR_LINEAR_RANGE(1700000, 8, 24, 100000),
 	REGULATOR_LINEAR_RANGE(3300000, 25, 31, 0),
 };
 
-static const struct regulator_linear_range ldo2_ranges[] = {
+static const struct linear_range ldo2_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1700000, 0, 7, 0),
 	REGULATOR_LINEAR_RANGE(1700000, 8, 24, 100000),
 	REGULATOR_LINEAR_RANGE(3300000, 25, 30, 0),
 };
 
-static const struct regulator_linear_range ldo3_ranges[] = {
+static const struct linear_range ldo3_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1700000, 0, 7, 0),
 	REGULATOR_LINEAR_RANGE(1700000, 8, 24, 100000),
 	REGULATOR_LINEAR_RANGE(3300000, 25, 30, 0),
@@ -117,13 +117,13 @@ static const struct regulator_linear_range ldo3_ranges[] = {
 	REGULATOR_LINEAR_RANGE(500000, 31, 31, 0),
 };
 
-static const struct regulator_linear_range ldo5_ranges[] = {
+static const struct linear_range ldo5_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1700000, 0, 7, 0),
 	REGULATOR_LINEAR_RANGE(1700000, 8, 30, 100000),
 	REGULATOR_LINEAR_RANGE(3900000, 31, 31, 0),
 };
 
-static const struct regulator_linear_range ldo6_ranges[] = {
+static const struct linear_range ldo6_ranges[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0, 24, 100000),
 	REGULATOR_LINEAR_RANGE(3300000, 25, 31, 0),
 };
diff --git a/drivers/regulator/tps65086-regulator.c b/drivers/regulator/tps65086-regulator.c
index 5a5e9b5bf4be..9910e949373c 100644
--- a/drivers/regulator/tps65086-regulator.c
+++ b/drivers/regulator/tps65086-regulator.c
@@ -71,23 +71,23 @@ struct tps65086_regulator {
 	unsigned int decay_mask;
 };
 
-static const struct regulator_linear_range tps65086_10mv_ranges[] = {
+static const struct linear_range tps65086_10mv_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0),
 	REGULATOR_LINEAR_RANGE(410000, 0x1, 0x7F, 10000),
 };
 
-static const struct regulator_linear_range tps65086_buck126_25mv_ranges[] = {
+static const struct linear_range tps65086_buck126_25mv_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0),
 	REGULATOR_LINEAR_RANGE(1000000, 0x1, 0x18, 0),
 	REGULATOR_LINEAR_RANGE(1025000, 0x19, 0x7F, 25000),
 };
 
-static const struct regulator_linear_range tps65086_buck345_25mv_ranges[] = {
+static const struct linear_range tps65086_buck345_25mv_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0),
 	REGULATOR_LINEAR_RANGE(425000, 0x1, 0x7F, 25000),
 };
 
-static const struct regulator_linear_range tps65086_ldoa1_ranges[] = {
+static const struct linear_range tps65086_ldoa1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1350000, 0x0, 0x0, 0),
 	REGULATOR_LINEAR_RANGE(1500000, 0x1, 0x7, 100000),
 	REGULATOR_LINEAR_RANGE(2300000, 0x8, 0xB, 100000),
@@ -95,7 +95,7 @@ static const struct regulator_linear_range tps65086_ldoa1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(3300000, 0xE, 0xE, 0),
 };
 
-static const struct regulator_linear_range tps65086_ldoa23_ranges[] = {
+static const struct linear_range tps65086_ldoa23_ranges[] = {
 	REGULATOR_LINEAR_RANGE(700000, 0x0, 0xD, 50000),
 	REGULATOR_LINEAR_RANGE(1400000, 0xE, 0xF, 100000),
 };
diff --git a/drivers/regulator/tps65217-regulator.c b/drivers/regulator/tps65217-regulator.c
index 67ba78da77ec..d27dbbafcf72 100644
--- a/drivers/regulator/tps65217-regulator.c
+++ b/drivers/regulator/tps65217-regulator.c
@@ -56,14 +56,14 @@ static const unsigned int LDO1_VSEL_table[] = {
 	2800000, 3000000, 3100000, 3300000,
 };
 
-static const struct regulator_linear_range tps65217_uv1_ranges[] = {
+static const struct linear_range tps65217_uv1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0, 24, 25000),
 	REGULATOR_LINEAR_RANGE(1550000, 25, 52, 50000),
 	REGULATOR_LINEAR_RANGE(3000000, 53, 55, 100000),
 	REGULATOR_LINEAR_RANGE(3300000, 56, 63, 0),
 };
 
-static const struct regulator_linear_range tps65217_uv2_ranges[] = {
+static const struct linear_range tps65217_uv2_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1500000, 0, 8, 50000),
 	REGULATOR_LINEAR_RANGE(2000000, 9, 13, 100000),
 	REGULATOR_LINEAR_RANGE(2450000, 14, 31, 50000),
diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c
index b72035610013..05d13f807918 100644
--- a/drivers/regulator/tps65218-regulator.c
+++ b/drivers/regulator/tps65218-regulator.c
@@ -56,17 +56,17 @@
 		.bypass_mask	= _sm,				\
 	}							\
 
-static const struct regulator_linear_range dcdc1_dcdc2_ranges[] = {
+static const struct linear_range dcdc1_dcdc2_ranges[] = {
 	REGULATOR_LINEAR_RANGE(850000, 0x0, 0x32, 10000),
 	REGULATOR_LINEAR_RANGE(1375000, 0x33, 0x3f, 25000),
 };
 
-static const struct regulator_linear_range ldo1_dcdc3_ranges[] = {
+static const struct linear_range ldo1_dcdc3_ranges[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0x0, 0x1a, 25000),
 	REGULATOR_LINEAR_RANGE(1600000, 0x1b, 0x3f, 50000),
 };
 
-static const struct regulator_linear_range dcdc4_ranges[] = {
+static const struct linear_range dcdc4_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1175000, 0x0, 0xf, 25000),
 	REGULATOR_LINEAR_RANGE(1600000, 0x10, 0x34, 50000),
 };
diff --git a/drivers/regulator/tps65912-regulator.c b/drivers/regulator/tps65912-regulator.c
index 276faeddc370..15c79931ea89 100644
--- a/drivers/regulator/tps65912-regulator.c
+++ b/drivers/regulator/tps65912-regulator.c
@@ -46,11 +46,11 @@ enum tps65912_regulators { DCDC1, DCDC2, DCDC3, DCDC4, LDO1, LDO2, LDO3,
 		.n_linear_ranges	= ARRAY_SIZE(_lr),		\
 	}
 
-static const struct regulator_linear_range tps65912_dcdc_ranges[] = {
+static const struct linear_range tps65912_dcdc_ranges[] = {
 	REGULATOR_LINEAR_RANGE(500000, 0x0, 0x3f, 50000),
 };
 
-static const struct regulator_linear_range tps65912_ldo_ranges[] = {
+static const struct linear_range tps65912_ldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0x0, 0x20, 25000),
 	REGULATOR_LINEAR_RANGE(1650000, 0x21, 0x3c, 50000),
 	REGULATOR_LINEAR_RANGE(3100000, 0x3d, 0x3f, 100000),
diff --git a/drivers/regulator/tps80031-regulator.c b/drivers/regulator/tps80031-regulator.c
index 85a6a8ca8c1b..a29e65230132 100644
--- a/drivers/regulator/tps80031-regulator.c
+++ b/drivers/regulator/tps80031-regulator.c
@@ -271,7 +271,7 @@ static int tps80031_vbus_is_enabled(struct regulator_dev *rdev)
 {
 	struct tps80031_regulator *ri = rdev_get_drvdata(rdev);
 	struct device *parent = to_tps80031_dev(rdev);
-	int ret = -EIO;
+	int ret;
 	uint8_t ctrl1 = 0;
 	uint8_t ctrl3 = 0;
 
@@ -322,7 +322,7 @@ static int tps80031_vbus_disable(struct regulator_dev *rdev)
 {
 	struct tps80031_regulator *ri = rdev_get_drvdata(rdev);
 	struct device *parent = to_tps80031_dev(rdev);
-	int ret = 0;
+	int ret;
 
 	if (ri->config_flags & TPS80031_VBUS_DISCHRG_EN_PDN) {
 		ret = tps80031_write(parent, TPS80031_SLAVE_ID2,
@@ -530,7 +530,8 @@ static int tps80031_regulator_config(struct device *parent,
 	case TPS80031_REGULATOR_LDOUSB:
 		if (ri->config_flags & (TPS80031_USBLDO_INPUT_VSYS |
 			TPS80031_USBLDO_INPUT_PMID)) {
-			unsigned val = 0;
+			unsigned val;
+
 			if (ri->config_flags & TPS80031_USBLDO_INPUT_VSYS)
 				val = MISC2_LDOUSB_IN_VSYS;
 			else
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index 866b4dd01da9..4a51cfea45ac 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -360,12 +360,12 @@ static const u16 VINTANA2_VSEL_table[] = {
 };
 
 /* 600mV to 1450mV in 12.5 mV steps */
-static const struct regulator_linear_range VDD1_ranges[] = {
+static const struct linear_range VDD1_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 68, 12500)
 };
 
 /* 600mV to 1450mV in 12.5 mV steps, everything above = 1500mV */
-static const struct regulator_linear_range VDD2_ranges[] = {
+static const struct linear_range VDD2_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 68, 12500),
 	REGULATOR_LINEAR_RANGE(1500000, 69, 69, 12500)
 };
diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c
index b8100c3cedad..f7db250a7583 100644
--- a/drivers/regulator/twl6030-regulator.c
+++ b/drivers/regulator/twl6030-regulator.c
@@ -495,7 +495,7 @@ static const struct regulator_ops twlsmps_ops = {
 };
 
 /*----------------------------------------------------------------------*/
-static const struct regulator_linear_range twl6030ldo_linear_range[] = {
+static const struct linear_range twl6030ldo_linear_range[] = {
 	REGULATOR_LINEAR_RANGE(0, 0, 0, 0),
 	REGULATOR_LINEAR_RANGE(1000000, 1, 24, 100000),
 	REGULATOR_LINEAR_RANGE(2750000, 31, 31, 0),
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index 018dbbd96771..ad2203d11a88 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -204,7 +204,7 @@ static irqreturn_t wm831x_dcdc_oc_irq(int irq, void *data)
  * BUCKV specifics
  */
 
-static const struct regulator_linear_range wm831x_buckv_ranges[] = {
+static const struct linear_range wm831x_buckv_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 0x7, 0),
 	REGULATOR_LINEAR_RANGE(600000, 0x8, 0x68, 12500),
 };
diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
index 56754686c982..7b6cf4810cb7 100644
--- a/drivers/regulator/wm831x-ldo.c
+++ b/drivers/regulator/wm831x-ldo.c
@@ -59,7 +59,7 @@ static irqreturn_t wm831x_ldo_uv_irq(int irq, void *data)
  * General purpose LDOs
  */
 
-static const struct regulator_linear_range wm831x_gp_ldo_ranges[] = {
+static const struct linear_range wm831x_gp_ldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0, 14, 50000),
 	REGULATOR_LINEAR_RANGE(1700000, 15, 31, 100000),
 };
@@ -312,7 +312,7 @@ static struct platform_driver wm831x_gp_ldo_driver = {
  * Analogue LDOs
  */
 
-static const struct regulator_linear_range wm831x_aldo_ranges[] = {
+static const struct linear_range wm831x_aldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1000000, 0, 12, 50000),
 	REGULATOR_LINEAR_RANGE(1700000, 13, 31, 100000),
 };
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 56d6168a888d..ae5f0e7fce8b 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -470,7 +470,7 @@ static int wm8350_dcdc_set_suspend_mode(struct regulator_dev *rdev,
 	return 0;
 }
 
-static const struct regulator_linear_range wm8350_ldo_ranges[] = {
+static const struct linear_range wm8350_ldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0, 15, 50000),
 	REGULATOR_LINEAR_RANGE(1800000, 16, 31, 100000),
 };
diff --git a/drivers/regulator/wm8400-regulator.c b/drivers/regulator/wm8400-regulator.c
index 6f331b51e479..4cb1fbb59722 100644
--- a/drivers/regulator/wm8400-regulator.c
+++ b/drivers/regulator/wm8400-regulator.c
@@ -13,7 +13,7 @@
 #include <linux/regulator/driver.h>
 #include <linux/mfd/wm8400-private.h>
 
-static const struct regulator_linear_range wm8400_ldo_ranges[] = {
+static const struct linear_range wm8400_ldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0, 14, 50000),
 	REGULATOR_LINEAR_RANGE(1700000, 15, 31, 100000),
 };
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 7d079154f849..af5b0ecb8f89 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -143,9 +143,6 @@ int dasd_scan_partitions(struct dasd_block *block)
  */
 void dasd_destroy_partitions(struct dasd_block *block)
 {
-	/* The two structs have 168/176 byte on 31/64 bit. */
-	struct blkpg_partition bpart;
-	struct blkpg_ioctl_arg barg;
 	struct block_device *bdev;
 
 	/*
@@ -155,19 +152,10 @@ void dasd_destroy_partitions(struct dasd_block *block)
 	bdev = block->bdev;
 	block->bdev = NULL;
 
-	/*
-	 * See fs/partition/check.c:delete_partition
-	 * Can't call delete_partitions directly. Use ioctl.
-	 * The ioctl also does locking and invalidation.
-	 */
-	memset(&bpart, 0, sizeof(struct blkpg_partition));
-	memset(&barg, 0, sizeof(struct blkpg_ioctl_arg));
-	barg.data = (void __force __user *) &bpart;
-	barg.op = BLKPG_DEL_PARTITION;
-	for (bpart.pno = block->gdp->minors - 1; bpart.pno > 0; bpart.pno--)
-		ioctl_by_bdev(bdev, BLKPG, (unsigned long) &barg);
-
-	invalidate_partition(block->gdp, 0);
+	mutex_lock(&bdev->bd_mutex);
+	blk_drop_partitions(bdev);
+	mutex_unlock(&bdev->bd_mutex);
+
 	/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
 	blkdev_put(bdev, FMODE_READ);
 	set_capacity(block->gdp, 0);
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 9a5f3add325f..777734d1b4e5 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -22,6 +22,7 @@
 #include <asm/schid.h>
 #include <asm/cmb.h>
 #include <linux/uaccess.h>
+#include <linux/dasd_mod.h>
 
 /* This is ugly... */
 #define PRINTK_HEADER "dasd_ioctl:"
@@ -457,10 +458,9 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
 /*
  * Return dasd information. Used for BIODASDINFO and BIODASDINFO2.
  */
-static int dasd_ioctl_information(struct dasd_block *block,
-				  unsigned int cmd, void __user *argp)
+static int __dasd_ioctl_information(struct dasd_block *block,
+		struct dasd_information2_t *dasd_info)
 {
-	struct dasd_information2_t *dasd_info;
 	struct subchannel_id sch_id;
 	struct ccw_dev_id dev_id;
 	struct dasd_device *base;
@@ -473,15 +473,9 @@ static int dasd_ioctl_information(struct dasd_block *block,
 	if (!base->discipline || !base->discipline->fill_info)
 		return -EINVAL;
 
-	dasd_info = kzalloc(sizeof(struct dasd_information2_t), GFP_KERNEL);
-	if (dasd_info == NULL)
-		return -ENOMEM;
-
 	rc = base->discipline->fill_info(base, dasd_info);
-	if (rc) {
-		kfree(dasd_info);
+	if (rc)
 		return rc;
-	}
 
 	cdev = base->cdev;
 	ccw_device_get_id(cdev, &dev_id);
@@ -520,15 +514,24 @@ static int dasd_ioctl_information(struct dasd_block *block,
 	list_for_each(l, &base->ccw_queue)
 		dasd_info->chanq_len++;
 	spin_unlock_irqrestore(&block->queue_lock, flags);
+	return 0;
+}
 
-	rc = 0;
-	if (copy_to_user(argp, dasd_info,
-			 ((cmd == (unsigned int) BIODASDINFO2) ?
-			  sizeof(struct dasd_information2_t) :
-			  sizeof(struct dasd_information_t))))
-		rc = -EFAULT;
+static int dasd_ioctl_information(struct dasd_block *block, void __user *argp,
+		size_t copy_size)
+{
+	struct dasd_information2_t *dasd_info;
+	int error;
+
+	dasd_info = kzalloc(sizeof(*dasd_info), GFP_KERNEL);
+	if (!dasd_info)
+		return -ENOMEM;
+
+	error = __dasd_ioctl_information(block, dasd_info);
+	if (!error && copy_to_user(argp, dasd_info, copy_size))
+		error = -EFAULT;
 	kfree(dasd_info);
-	return rc;
+	return error;
 }
 
 /*
@@ -622,10 +625,12 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode,
 		rc = dasd_ioctl_check_format(bdev, argp);
 		break;
 	case BIODASDINFO:
-		rc = dasd_ioctl_information(block, cmd, argp);
+		rc = dasd_ioctl_information(block, argp,
+				sizeof(struct dasd_information_t));
 		break;
 	case BIODASDINFO2:
-		rc = dasd_ioctl_information(block, cmd, argp);
+		rc = dasd_ioctl_information(block, argp,
+				sizeof(struct dasd_information2_t));
 		break;
 	case BIODASDPRRD:
 		rc = dasd_ioctl_read_profile(block, argp);
@@ -660,3 +665,36 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode,
 	dasd_put_device(base);
 	return rc;
 }
+
+
+/**
+ * dasd_biodasdinfo() - fill out the dasd information structure
+ * @disk [in]: pointer to gendisk structure that references a DASD
+ * @info [out]: pointer to the dasd_information2_t structure
+ *
+ * Provide access to DASD specific information.
+ * The gendisk structure is checked if it belongs to the DASD driver by
+ * comparing the gendisk->fops pointer.
+ * If it does not belong to the DASD driver -EINVAL is returned.
+ * Otherwise the provided dasd_information2_t structure is filled out.
+ *
+ * Returns:
+ *   %0 on success and a negative error value on failure.
+ */
+int dasd_biodasdinfo(struct gendisk *disk, struct dasd_information2_t *info)
+{
+	struct dasd_device *base;
+	int error;
+
+	if (disk->fops != &dasd_device_operations)
+		return -EINVAL;
+
+	base = dasd_device_from_gendisk(disk);
+	if (!base)
+		return -ENODEV;
+	error = __dasd_ioctl_information(base->block, info);
+	dasd_put_device(base);
+	return error;
+}
+/* export that symbol_get in partition detection is possible */
+EXPORT_SYMBOL_GPL(dasd_biodasdinfo);
diff --git a/drivers/scsi/esas2r/esas2r_ioctl.c b/drivers/scsi/esas2r/esas2r_ioctl.c
index 442c5e70a7b4..cc620f10eabc 100644
--- a/drivers/scsi/esas2r/esas2r_ioctl.c
+++ b/drivers/scsi/esas2r/esas2r_ioctl.c
@@ -1510,7 +1510,7 @@ ioctl_done:
 	}
 
 	/* Always copy the buffer back, if only to pick up the status */
-	err = __copy_to_user(arg, ioctl, sizeof(struct atto_express_ioctl));
+	err = copy_to_user(arg, ioctl, sizeof(struct atto_express_ioctl));
 	if (err != 0) {
 		esas2r_log(ESAS2R_LOG_WARN,
 			   "ioctl_handler copy_to_user didn't copy everything (err %d, cmd %u)",
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 8e2a356911a9..62e96d4fdcc6 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -143,7 +143,7 @@ struct lpfc_dmabuf {
 
 struct lpfc_nvmet_ctxbuf {
 	struct list_head list;
-	struct lpfc_nvmet_rcv_ctx *context;
+	struct lpfc_async_xchg_ctx *context;
 	struct lpfc_iocbq *iocbq;
 	struct lpfc_sglq *sglq;
 	struct work_struct defer_work;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 1354c141d614..f089867674cb 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -37,8 +37,6 @@
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/fc/fc_fs.h>
 
-#include <linux/nvme-fc-driver.h>
-
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
@@ -48,7 +46,6 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_version.h"
 #include "lpfc_compat.h"
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 76dc8d9493d2..9ee6b930a655 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -24,7 +24,6 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *);
 
 struct fc_rport;
 struct fc_frame_header;
-struct lpfc_nvmet_rcv_ctx;
 void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli_read_link_ste(struct lpfc_hba *);
 void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t);
@@ -564,12 +563,16 @@ void lpfc_nvme_update_localport(struct lpfc_vport *vport);
 int lpfc_nvmet_create_targetport(struct lpfc_hba *phba);
 int lpfc_nvmet_update_targetport(struct lpfc_hba *phba);
 void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba);
-void lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba,
-			struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb);
+int lpfc_nvme_handle_lsreq(struct lpfc_hba *phba,
+			struct lpfc_async_xchg_ctx *axchg);
+int lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba,
+			struct lpfc_async_xchg_ctx *axchg);
 void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint32_t idx,
 				struct rqb_dmabuf *nvmebuf, uint64_t isr_ts,
 				uint8_t cqflag);
 void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
+void lpfc_nvmet_invalidate_host(struct lpfc_hba *phba,
+			struct lpfc_nodelist *ndlp);
 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
 				struct lpfc_iocbq *cmdiocb,
 				struct lpfc_wcqe_complete *abts_cmpl);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 2aa578d20f8c..196f6ae9952e 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -44,7 +44,6 @@
 #include "lpfc_disc.h"
 #include "lpfc.h"
 #include "lpfc_scsi.h"
-#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_version.h"
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 8a6e02aa553f..4daae90e0c99 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -39,8 +39,6 @@
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/fc/fc_fs.h>
 
-#include <linux/nvme-fc-driver.h>
-
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
@@ -50,7 +48,6 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -1035,7 +1032,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 {
 	struct lpfc_hba   *phba = vport->phba;
 	struct lpfc_nvmet_tgtport *tgtp;
-	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
+	struct lpfc_async_xchg_ctx *ctxp, *next_ctxp;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_fc4_ctrl_stat *cstat;
 	struct lpfc_nvme_lport *lport;
@@ -2166,10 +2163,6 @@ lpfc_debugfs_lockstat_write(struct file *file, const char __user *buf,
 	char *pbuf;
 	int i;
 
-	/* Protect copy from user */
-	if (!access_ok(buf, nbytes))
-		return -EFAULT;
-
 	memset(mybuf, 0, sizeof(mybuf));
 
 	if (copy_from_user(mybuf, buf, nbytes))
@@ -2621,10 +2614,6 @@ lpfc_debugfs_multixripools_write(struct file *file, const char __user *buf,
 	if (nbytes > 64)
 		nbytes = 64;
 
-	/* Protect copy from user */
-	if (!access_ok(buf, nbytes))
-		return -EFAULT;
-
 	memset(mybuf, 0, sizeof(mybuf));
 
 	if (copy_from_user(mybuf, buf, nbytes))
@@ -2787,10 +2776,6 @@ lpfc_debugfs_scsistat_write(struct file *file, const char __user *buf,
 	char mybuf[6] = {0};
 	int i;
 
-	/* Protect copy from user */
-	if (!access_ok(buf, nbytes))
-		return -EFAULT;
-
 	if (copy_from_user(mybuf, buf, (nbytes >= sizeof(mybuf)) ?
 				       (sizeof(mybuf) - 1) : nbytes))
 		return -EFAULT;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 789eecbf32eb..f5952f8cd4b5 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -36,8 +36,6 @@
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/fc/fc_fs.h>
 
-#include <linux/nvme-fc-driver.h>
-
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_nl.h"
@@ -825,6 +823,12 @@ lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove)
 		if ((phba->sli_rev < LPFC_SLI_REV4) &&
 		    (!remove && ndlp->nlp_type & NLP_FABRIC))
 			continue;
+
+		/* Notify transport of connectivity loss to trigger cleanup. */
+		if (phba->nvmet_support &&
+		    ndlp->nlp_state == NLP_STE_UNMAPPED_NODE)
+			lpfc_nvmet_invalidate_host(phba, ndlp);
+
 		lpfc_disc_state_machine(vport, ndlp, NULL,
 					remove
 					? NLP_EVT_DEVICE_RM
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 4104bdcdbb6f..ea99483345f2 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -50,8 +50,6 @@
 #include <scsi/scsi_tcq.h>
 #include <scsi/fc/fc_fs.h>
 
-#include <linux/nvme-fc-driver.h>
-
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
@@ -61,7 +59,6 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -1032,7 +1029,7 @@ static int
 lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 {
 	struct lpfc_io_buf *psb, *psb_next;
-	struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next;
+	struct lpfc_async_xchg_ctx *ctxp, *ctxp_next;
 	struct lpfc_sli4_hdw_queue *qp;
 	LIST_HEAD(aborts);
 	LIST_HEAD(nvme_aborts);
@@ -1099,7 +1096,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 				 &nvmet_aborts);
 		spin_unlock_irq(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
-			ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
+			ctxp->flag &= ~(LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP);
 			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 		}
 	}
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 7082279e4c01..726f6619230f 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -31,8 +31,6 @@
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/fc/fc_fs.h>
 
-#include <linux/nvme-fc-driver.h>
-
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
@@ -41,8 +39,6 @@
 #include "lpfc_disc.h"
 #include "lpfc.h"
 #include "lpfc_scsi.h"
-#include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
 #include "lpfc_crtn.h"
 #include "lpfc_logmsg.h"
 
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index a024e5a3918f..d8501bd959e7 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -32,8 +32,6 @@
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/fc/fc_fs.h>
 
-#include <linux/nvme-fc-driver.h>
-
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
@@ -491,6 +489,11 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 					 (unsigned long long)
 					 wwn_to_u64(sp->portName.u.wwn));
 
+		/* Notify transport of connectivity loss to trigger cleanup. */
+		if (phba->nvmet_support &&
+		    ndlp->nlp_state == NLP_STE_UNMAPPED_NODE)
+			lpfc_nvmet_invalidate_host(phba, ndlp);
+
 		ndlp->nlp_prev_state = ndlp->nlp_state;
 		/* rport needs to be unregistered first */
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
@@ -841,6 +844,12 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL);
 	else
 		lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL);
+
+	/* Notify transport of connectivity loss to trigger cleanup. */
+	if (phba->nvmet_support &&
+	    ndlp->nlp_state == NLP_STE_UNMAPPED_NODE)
+		lpfc_nvmet_invalidate_host(phba, ndlp);
+
 	if (ndlp->nlp_DID == Fabric_DID) {
 		if (vport->port_state <= LPFC_FDISC)
 			goto out;
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index a45936e08031..b46ba70f78da 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -36,9 +36,6 @@
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/fc/fc_fs.h>
 
-#include <linux/nvme.h>
-#include <linux/nvme-fc-driver.h>
-#include <linux/nvme-fc.h>
 #include "lpfc_version.h"
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -396,43 +393,100 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport)
 	return;
 }
 
-static void
-lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
-		       struct lpfc_wcqe_complete *wcqe)
+/**
+ * lpfc_nvme_handle_lsreq - Process an unsolicited NVME LS request
+ * @phba: pointer to lpfc hba data structure.
+ * @axchg: pointer to exchange context for the NVME LS request
+ *
+ * This routine is used for processing an asychronously received NVME LS
+ * request. Any remaining validation is done and the LS is then forwarded
+ * to the nvme-fc transport via nvme_fc_rcv_ls_req().
+ *
+ * The calling sequence should be: nvme_fc_rcv_ls_req() -> (processing)
+ * -> lpfc_nvme_xmt_ls_rsp/cmp -> req->done.
+ * __lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg.
+ *
+ * Returns 0 if LS was handled and delivered to the transport
+ * Returns 1 if LS failed to be handled and should be dropped
+ */
+int
+lpfc_nvme_handle_lsreq(struct lpfc_hba *phba,
+			struct lpfc_async_xchg_ctx *axchg)
 {
-	struct lpfc_vport *vport = cmdwqe->vport;
+#if (IS_ENABLED(CONFIG_NVME_FC))
+	struct lpfc_vport *vport;
+	struct lpfc_nvme_rport *lpfc_rport;
+	struct nvme_fc_remote_port *remoteport;
 	struct lpfc_nvme_lport *lport;
-	uint32_t status;
+	uint32_t *payload = axchg->payload;
+	int rc;
+
+	vport = axchg->ndlp->vport;
+	lpfc_rport = axchg->ndlp->nrport;
+	if (!lpfc_rport)
+		return -EINVAL;
+
+	remoteport = lpfc_rport->remoteport;
+	if (!vport->localport)
+		return -EINVAL;
+
+	lport = vport->localport->private;
+	if (!lport)
+		return -EINVAL;
+
+	rc = nvme_fc_rcv_ls_req(remoteport, &axchg->ls_rsp, axchg->payload,
+				axchg->size);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6205 NVME Unsol rcv: sz %d rc %d: %08x %08x %08x "
+			"%08x %08x %08x\n",
+			axchg->size, rc,
+			*payload, *(payload+1), *(payload+2),
+			*(payload+3), *(payload+4), *(payload+5));
+
+	if (!rc)
+		return 0;
+#endif
+	return 1;
+}
+
+/**
+ * __lpfc_nvme_ls_req_cmp - Generic completion handler for a NVME
+ *        LS request.
+ * @phba: Pointer to HBA context object
+ * @vport: The local port that issued the LS
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * This function is the generic completion handler for NVME LS requests.
+ * The function updates any states and statistics, calls the transport
+ * ls_req done() routine, then tears down the command and buffers used
+ * for the LS request.
+ **/
+void
+__lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba,  struct lpfc_vport *vport,
+			struct lpfc_iocbq *cmdwqe,
+			struct lpfc_wcqe_complete *wcqe)
+{
 	struct nvmefc_ls_req *pnvme_lsreq;
 	struct lpfc_dmabuf *buf_ptr;
 	struct lpfc_nodelist *ndlp;
+	uint32_t status;
 
 	pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2;
+	ndlp = (struct lpfc_nodelist *)cmdwqe->context1;
 	status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
 
-	if (vport->localport) {
-		lport = (struct lpfc_nvme_lport *)vport->localport->private;
-		if (lport) {
-			atomic_inc(&lport->fc4NvmeLsCmpls);
-			if (status) {
-				if (bf_get(lpfc_wcqe_c_xb, wcqe))
-					atomic_inc(&lport->cmpl_ls_xb);
-				atomic_inc(&lport->cmpl_ls_err);
-			}
-		}
-	}
-
-	ndlp = (struct lpfc_nodelist *)cmdwqe->context1;
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
-			 "6047 nvme cmpl Enter "
-			 "Data %px DID %x Xri: %x status %x reason x%x "
-			 "cmd:x%px lsreg:x%px bmp:x%px ndlp:x%px\n",
+			 "6047 NVMEx LS REQ %px cmpl DID %x Xri: %x "
+			 "status %x reason x%x cmd:x%px lsreg:x%px bmp:x%px "
+			 "ndlp:x%px\n",
 			 pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
 			 cmdwqe->sli4_xritag, status,
 			 (wcqe->parameter & 0xffff),
 			 cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp);
 
-	lpfc_nvmeio_data(phba, "NVME LS  CMPL: xri x%x stat x%x parm x%x\n",
+	lpfc_nvmeio_data(phba, "NVMEx LS CMPL: xri x%x stat x%x parm x%x\n",
 			 cmdwqe->sli4_xritag, status, wcqe->parameter);
 
 	if (cmdwqe->context3) {
@@ -445,7 +499,7 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 		pnvme_lsreq->done(pnvme_lsreq, status);
 	else
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-				 "6046 nvme cmpl without done call back? "
+				 "6046 NVMEx cmpl without done call back? "
 				 "Data %px DID %x Xri: %x status %x\n",
 				pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
 				cmdwqe->sli4_xritag, status);
@@ -456,6 +510,31 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	lpfc_sli_release_iocbq(phba, cmdwqe);
 }
 
+static void
+lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+		       struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_vport *vport = cmdwqe->vport;
+	struct lpfc_nvme_lport *lport;
+	uint32_t status;
+
+	status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
+
+	if (vport->localport) {
+		lport = (struct lpfc_nvme_lport *)vport->localport->private;
+		if (lport) {
+			atomic_inc(&lport->fc4NvmeLsCmpls);
+			if (status) {
+				if (bf_get(lpfc_wcqe_c_xb, wcqe))
+					atomic_inc(&lport->cmpl_ls_xb);
+				atomic_inc(&lport->cmpl_ls_err);
+			}
+		}
+	}
+
+	__lpfc_nvme_ls_req_cmp(phba, vport, cmdwqe, wcqe);
+}
+
 static int
 lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
 		  struct lpfc_dmabuf *inp,
@@ -557,13 +636,6 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
 
 
 	/* Issue GEN REQ WQE for NPORT <did> */
-	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-			 "6050 Issue GEN REQ WQE to NPORT x%x "
-			 "Data: x%x x%x wq:x%px lsreq:x%px bmp:x%px "
-			 "xmit:%d 1st:%d\n",
-			 ndlp->nlp_DID, genwqe->iotag,
-			 vport->port_state,
-			genwqe, pnvme_lsreq, bmp, xmit_len, first_len);
 	genwqe->wqe_cmpl = cmpl;
 	genwqe->iocb_cmpl = NULL;
 	genwqe->drvrTimeout = tmo + LPFC_DRVR_TIMEOUT;
@@ -575,105 +647,108 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
 
 	rc = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], genwqe);
 	if (rc) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC | LOG_ELS,
 				 "6045 Issue GEN REQ WQE to NPORT x%x "
-				 "Data: x%x x%x\n",
+				 "Data: x%x x%x  rc x%x\n",
 				 ndlp->nlp_DID, genwqe->iotag,
-				 vport->port_state);
+				 vport->port_state, rc);
 		lpfc_sli_release_iocbq(phba, genwqe);
 		return 1;
 	}
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_ELS,
+			 "6050 Issue GEN REQ WQE to NPORT x%x "
+			 "Data: oxid: x%x state: x%x wq:x%px lsreq:x%px "
+			 "bmp:x%px xmit:%d 1st:%d\n",
+			 ndlp->nlp_DID, genwqe->sli4_xritag,
+			 vport->port_state,
+			 genwqe, pnvme_lsreq, bmp, xmit_len, first_len);
 	return 0;
 }
 
+
 /**
- * lpfc_nvme_ls_req - Issue an Link Service request
- * @lpfc_pnvme: Pointer to the driver's nvme instance data
- * @lpfc_nvme_lport: Pointer to the driver's local port data
- * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * __lpfc_nvme_ls_req - Generic service routine to issue an NVME LS request
+ * @vport: The local port issuing the LS
+ * @ndlp: The remote port to send the LS to
+ * @pnvme_lsreq: Pointer to LS request structure from the transport
  *
- * Driver registers this routine to handle any link service request
- * from the nvme_fc transport to a remote nvme-aware port.
+ * Routine validates the ndlp, builds buffers and sends a GEN_REQUEST
+ * WQE to perform the LS operation.
  *
  * Return value :
  *   0 - Success
- *   TODO: What are the failure codes.
+ *   non-zero: various error codes, in form of -Exxx
  **/
-static int
-lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
-		 struct nvme_fc_remote_port *pnvme_rport,
-		 struct nvmefc_ls_req *pnvme_lsreq)
+int
+__lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+		      struct nvmefc_ls_req *pnvme_lsreq,
+		      void (*gen_req_cmp)(struct lpfc_hba *phba,
+				struct lpfc_iocbq *cmdwqe,
+				struct lpfc_wcqe_complete *wcqe))
 {
-	int ret = 0;
-	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_rport *rport;
-	struct lpfc_vport *vport;
-	struct lpfc_nodelist *ndlp;
-	struct ulp_bde64 *bpl;
 	struct lpfc_dmabuf *bmp;
+	struct ulp_bde64 *bpl;
+	int ret;
 	uint16_t ntype, nstate;
 
-	/* there are two dma buf in the request, actually there is one and
-	 * the second one is just the start address + cmd size.
-	 * Before calling lpfc_nvme_gen_req these buffers need to be wrapped
-	 * in a lpfc_dmabuf struct. When freeing we just free the wrapper
-	 * because the nvem layer owns the data bufs.
-	 * We do not have to break these packets open, we don't care what is in
-	 * them. And we do not have to look at the resonse data, we only care
-	 * that we got a response. All of the caring is going to happen in the
-	 * nvme-fc layer.
-	 */
-
-	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
-	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
-	if (unlikely(!lport) || unlikely(!rport))
-		return -EINVAL;
-
-	vport = lport->vport;
-
-	if (vport->load_flag & FC_UNLOADING)
-		return -ENODEV;
-
-	/* Need the ndlp.  It is stored in the driver's rport. */
-	ndlp = rport->ndlp;
 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
-				 "6051 Remoteport x%px, rport has invalid ndlp. "
-				 "Failing LS Req\n", pnvme_rport);
+		lpfc_printf_vlog(vport, KERN_ERR,
+				 LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR,
+				 "6051 NVMEx LS REQ: Bad NDLP x%px, Failing "
+				 "LS Req\n",
+				 ndlp);
 		return -ENODEV;
 	}
 
-	/* The remote node has to be a mapped nvme target or an
-	 * unmapped nvme initiator or it's an error.
-	 */
 	ntype = ndlp->nlp_type;
 	nstate = ndlp->nlp_state;
 	if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) ||
 	    (ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
-				 "6088 DID x%06x not ready for "
-				 "IO. State x%x, Type x%x\n",
-				 pnvme_rport->port_id,
-				 ndlp->nlp_state, ndlp->nlp_type);
+		lpfc_printf_vlog(vport, KERN_ERR,
+				 LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR,
+				 "6088 NVMEx LS REQ: Fail DID x%06x not "
+				 "ready for IO. Type x%x, State x%x\n",
+				 ndlp->nlp_DID, ntype, nstate);
 		return -ENODEV;
 	}
-	bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+
+	/*
+	 * there are two dma buf in the request, actually there is one and
+	 * the second one is just the start address + cmd size.
+	 * Before calling lpfc_nvme_gen_req these buffers need to be wrapped
+	 * in a lpfc_dmabuf struct. When freeing we just free the wrapper
+	 * because the nvem layer owns the data bufs.
+	 * We do not have to break these packets open, we don't care what is
+	 * in them. And we do not have to look at the resonse data, we only
+	 * care that we got a response. All of the caring is going to happen
+	 * in the nvme-fc layer.
+	 */
+
+	bmp = kmalloc(sizeof(*bmp), GFP_KERNEL);
 	if (!bmp) {
 
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-				 "6044 Could not find node for DID %x\n",
-				 pnvme_rport->port_id);
-		return 2;
+		lpfc_printf_vlog(vport, KERN_ERR,
+				 LOG_NVME_DISC | LOG_NVME_IOERR,
+				 "6044 NVMEx LS REQ: Could not alloc LS buf "
+				 "for DID %x\n",
+				 ndlp->nlp_DID);
+		return -ENOMEM;
 	}
-	INIT_LIST_HEAD(&bmp->list);
+
 	bmp->virt = lpfc_mbuf_alloc(vport->phba, MEM_PRI, &(bmp->phys));
 	if (!bmp->virt) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-				 "6042 Could not find node for DID %x\n",
-				 pnvme_rport->port_id);
+		lpfc_printf_vlog(vport, KERN_ERR,
+				 LOG_NVME_DISC | LOG_NVME_IOERR,
+				 "6042 NVMEx LS REQ: Could not alloc mbuf "
+				 "for DID %x\n",
+				 ndlp->nlp_DID);
 		kfree(bmp);
-		return 3;
+		return -ENOMEM;
 	}
+
+	INIT_LIST_HEAD(&bmp->list);
+
 	bpl = (struct ulp_bde64 *)bmp->virt;
 	bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rqstdma));
 	bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rqstdma));
@@ -688,118 +763,206 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
 	bpl->tus.f.bdeSize = pnvme_lsreq->rsplen;
 	bpl->tus.w = le32_to_cpu(bpl->tus.w);
 
-	/* Expand print to include key fields. */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
-			 "6149 Issue LS Req to DID 0x%06x lport x%px, "
-			 "rport x%px lsreq x%px rqstlen:%d rsplen:%d "
-			 "%pad %pad\n",
-			 ndlp->nlp_DID, pnvme_lport, pnvme_rport,
-			 pnvme_lsreq, pnvme_lsreq->rqstlen,
-			 pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
-			 &pnvme_lsreq->rspdma);
-
-	atomic_inc(&lport->fc4NvmeLsRequests);
+			"6149 NVMEx LS REQ: Issue to DID 0x%06x lsreq x%px, "
+			"rqstlen:%d rsplen:%d %pad %pad\n",
+			ndlp->nlp_DID, pnvme_lsreq, pnvme_lsreq->rqstlen,
+			pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
+			&pnvme_lsreq->rspdma);
 
-	/* Hardcode the wait to 30 seconds.  Connections are failing otherwise.
-	 * This code allows it all to work.
-	 */
 	ret = lpfc_nvme_gen_req(vport, bmp, pnvme_lsreq->rqstaddr,
-				pnvme_lsreq, lpfc_nvme_cmpl_gen_req,
-				ndlp, 2, 30, 0);
+				pnvme_lsreq, gen_req_cmp, ndlp, 2,
+				LPFC_NVME_LS_TIMEOUT, 0);
 	if (ret != WQE_SUCCESS) {
-		atomic_inc(&lport->xmt_ls_err);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-				 "6052 EXIT. issue ls wqe failed lport x%px, "
-				 "rport x%px lsreq x%px Status %x DID %x\n",
-				 pnvme_lport, pnvme_rport, pnvme_lsreq,
-				 ret, ndlp->nlp_DID);
+		lpfc_printf_vlog(vport, KERN_ERR,
+				 LOG_NVME_DISC | LOG_NVME_IOERR,
+				 "6052 NVMEx REQ: EXIT. issue ls wqe failed "
+				 "lsreq x%px Status %x DID %x\n",
+				 pnvme_lsreq, ret, ndlp->nlp_DID);
 		lpfc_mbuf_free(vport->phba, bmp->virt, bmp->phys);
 		kfree(bmp);
-		return ret;
+		return -EIO;
 	}
 
-	/* Stub in routine and return 0 for now. */
-	return ret;
+	return 0;
 }
 
 /**
- * lpfc_nvme_ls_abort - Issue an Link Service request
- * @lpfc_pnvme: Pointer to the driver's nvme instance data
- * @lpfc_nvme_lport: Pointer to the driver's local port data
- * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * lpfc_nvme_ls_req - Issue an NVME Link Service request
+ * @lpfc_nvme_lport: Transport localport that LS is to be issued from.
+ * @lpfc_nvme_rport: Transport remoteport that LS is to be sent to.
+ * @pnvme_lsreq - the transport nvme_ls_req structure for the LS
  *
  * Driver registers this routine to handle any link service request
  * from the nvme_fc transport to a remote nvme-aware port.
  *
  * Return value :
  *   0 - Success
- *   TODO: What are the failure codes.
+ *   non-zero: various error codes, in form of -Exxx
  **/
-static void
-lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
-		   struct nvme_fc_remote_port *pnvme_rport,
-		   struct nvmefc_ls_req *pnvme_lsreq)
+static int
+lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
+		 struct nvme_fc_remote_port *pnvme_rport,
+		 struct nvmefc_ls_req *pnvme_lsreq)
 {
 	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport;
 	struct lpfc_vport *vport;
-	struct lpfc_hba *phba;
-	struct lpfc_nodelist *ndlp;
-	LIST_HEAD(abort_list);
-	struct lpfc_sli_ring *pring;
-	struct lpfc_iocbq *wqe, *next_wqe;
+	int ret;
 
 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
-	if (unlikely(!lport))
-		return;
-	vport = lport->vport;
-	phba = vport->phba;
+	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
+	if (unlikely(!lport) || unlikely(!rport))
+		return -EINVAL;
 
+	vport = lport->vport;
 	if (vport->load_flag & FC_UNLOADING)
-		return;
+		return -ENODEV;
+
+	atomic_inc(&lport->fc4NvmeLsRequests);
+
+	ret = __lpfc_nvme_ls_req(vport, rport->ndlp, pnvme_lsreq,
+				 lpfc_nvme_ls_req_cmp);
+	if (ret)
+		atomic_inc(&lport->xmt_ls_err);
+
+	return ret;
+}
+
+/**
+ * __lpfc_nvme_ls_abort - Generic service routine to abort a prior
+ *         NVME LS request
+ * @vport: The local port that issued the LS
+ * @ndlp: The remote port the LS was sent to
+ * @pnvme_lsreq: Pointer to LS request structure from the transport
+ *
+ * The driver validates the ndlp, looks for the LS, and aborts the
+ * LS if found.
+ *
+ * Returns:
+ * 0 : if LS found and aborted
+ * non-zero: various error conditions in form -Exxx
+ **/
+int
+__lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+			struct nvmefc_ls_req *pnvme_lsreq)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_sli_ring *pring;
+	struct lpfc_iocbq *wqe, *next_wqe;
+	bool foundit = false;
 
-	ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
 	if (!ndlp) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
-				 "6049 Could not find node for DID %x\n",
-				 pnvme_rport->port_id);
-		return;
+		lpfc_printf_log(phba, KERN_ERR,
+				LOG_NVME_DISC | LOG_NODE |
+					LOG_NVME_IOERR | LOG_NVME_ABTS,
+				"6049 NVMEx LS REQ Abort: Bad NDLP x%px DID "
+				"x%06x, Failing LS Req\n",
+				ndlp, ndlp ? ndlp->nlp_DID : 0);
+		return -EINVAL;
 	}
 
-	/* Expand print to include key fields. */
-	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
-			 "6040 ENTER.  lport x%px, rport x%px lsreq x%px rqstlen:%d "
-			 "rsplen:%d %pad %pad\n",
-			 pnvme_lport, pnvme_rport,
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS,
+			 "6040 NVMEx LS REQ Abort: Issue LS_ABORT for lsreq "
+			 "x%p rqstlen:%d rsplen:%d %pad %pad\n",
 			 pnvme_lsreq, pnvme_lsreq->rqstlen,
 			 pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
 			 &pnvme_lsreq->rspdma);
 
 	/*
-	 * Lock the ELS ring txcmplq and build a local list of all ELS IOs
-	 * that need an ABTS.  The IOs need to stay on the txcmplq so that
-	 * the abort operation completes them successfully.
+	 * Lock the ELS ring txcmplq and look for the wqe that matches
+	 * this ELS. If found, issue an abort on the wqe.
 	 */
 	pring = phba->sli4_hba.nvmels_wq->pring;
 	spin_lock_irq(&phba->hbalock);
 	spin_lock(&pring->ring_lock);
 	list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) {
-		/* Add to abort_list on on NDLP match. */
-		if (lpfc_check_sli_ndlp(phba, pring, wqe, ndlp)) {
+		if (wqe->context2 == pnvme_lsreq) {
 			wqe->iocb_flag |= LPFC_DRIVER_ABORTED;
-			list_add_tail(&wqe->dlist, &abort_list);
+			foundit = true;
+			break;
 		}
 	}
 	spin_unlock(&pring->ring_lock);
-	spin_unlock_irq(&phba->hbalock);
 
-	/* Abort the targeted IOs and remove them from the abort list. */
-	list_for_each_entry_safe(wqe, next_wqe, &abort_list, dlist) {
-		atomic_inc(&lport->xmt_ls_abort);
-		spin_lock_irq(&phba->hbalock);
-		list_del_init(&wqe->dlist);
+	if (foundit)
 		lpfc_sli_issue_abort_iotag(phba, pring, wqe);
-		spin_unlock_irq(&phba->hbalock);
+	spin_unlock_irq(&phba->hbalock);
+
+	if (foundit)
+		return 0;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS,
+			 "6213 NVMEx LS REQ Abort: Unable to locate req x%p\n",
+			 pnvme_lsreq);
+	return -EINVAL;
+}
+
+static int
+lpfc_nvme_xmt_ls_rsp(struct nvme_fc_local_port *localport,
+		     struct nvme_fc_remote_port *remoteport,
+		     struct nvmefc_ls_rsp *ls_rsp)
+{
+	struct lpfc_async_xchg_ctx *axchg =
+		container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp);
+	struct lpfc_nvme_lport *lport;
+	int rc;
+
+	if (axchg->phba->pport->load_flag & FC_UNLOADING)
+		return -ENODEV;
+
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, __lpfc_nvme_xmt_ls_rsp_cmp);
+
+	if (rc) {
+		/*
+		 * unless the failure is due to having already sent
+		 * the response, an abort will be generated for the
+		 * exchange if the rsp can't be sent.
+		 */
+		if (rc != -EALREADY)
+			atomic_inc(&lport->xmt_ls_abort);
+		return rc;
 	}
+
+	return 0;
+}
+
+/**
+ * lpfc_nvme_ls_abort - Abort a prior NVME LS request
+ * @lpfc_nvme_lport: Transport localport that LS is to be issued from.
+ * @lpfc_nvme_rport: Transport remoteport that LS is to be sent to.
+ * @pnvme_lsreq - the transport nvme_ls_req structure for the LS
+ *
+ * Driver registers this routine to abort a NVME LS request that is
+ * in progress (from the transports perspective).
+ **/
+static void
+lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
+		   struct nvme_fc_remote_port *pnvme_rport,
+		   struct nvmefc_ls_req *pnvme_lsreq)
+{
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_hba *phba;
+	struct lpfc_nodelist *ndlp;
+	int ret;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	if (unlikely(!lport))
+		return;
+	vport = lport->vport;
+	phba = vport->phba;
+
+	if (vport->load_flag & FC_UNLOADING)
+		return;
+
+	ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
+
+	ret = __lpfc_nvme_ls_abort(vport, ndlp, pnvme_lsreq);
+	if (!ret)
+		atomic_inc(&lport->xmt_ls_abort);
 }
 
 /* Fix up the existing sgls for NVME IO. */
@@ -1911,6 +2074,7 @@ static struct nvme_fc_port_template lpfc_nvme_template = {
 	.fcp_io       = lpfc_nvme_fcp_io_submit,
 	.ls_abort     = lpfc_nvme_ls_abort,
 	.fcp_abort    = lpfc_nvme_fcp_abort,
+	.xmt_ls_rsp   = lpfc_nvme_xmt_ls_rsp,
 
 	.max_hw_queues = 1,
 	.max_sgl_segments = LPFC_NVME_DEFAULT_SEGS,
@@ -2106,6 +2270,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 		atomic_set(&lport->cmpl_fcp_err, 0);
 		atomic_set(&lport->cmpl_ls_xb, 0);
 		atomic_set(&lport->cmpl_ls_err, 0);
+
 		atomic_set(&lport->fc4NvmeLsRequests, 0);
 		atomic_set(&lport->fc4NvmeLsCmpls, 0);
 	}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 593c48ff634e..4a4c3f780e1f 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -21,6 +21,10 @@
  * included with this package.                                     *
  ********************************************************************/
 
+#include <linux/nvme.h>
+#include <linux/nvme-fc-driver.h>
+#include <linux/nvme-fc.h>
+
 #define LPFC_NVME_DEFAULT_SEGS		(64 + 1)	/* 256K IOs */
 
 #define LPFC_NVME_ERSP_LEN		0x20
@@ -74,3 +78,179 @@ struct lpfc_nvme_rport {
 struct lpfc_nvme_fcpreq_priv {
 	struct lpfc_io_buf *nvme_buf;
 };
+
+/*
+ * set NVME LS request timeouts to 30s. It is larger than the 2*R_A_TOV
+ * set by the spec, which appears to have issues with some devices.
+ */
+#define LPFC_NVME_LS_TIMEOUT		30
+
+
+#define LPFC_NVMET_DEFAULT_SEGS		(64 + 1)	/* 256K IOs */
+#define LPFC_NVMET_RQE_MIN_POST		128
+#define LPFC_NVMET_RQE_DEF_POST		512
+#define LPFC_NVMET_RQE_DEF_COUNT	2048
+#define LPFC_NVMET_SUCCESS_LEN		12
+
+#define LPFC_NVMET_MRQ_AUTO		0
+#define LPFC_NVMET_MRQ_MAX		16
+
+#define LPFC_NVMET_WAIT_TMO		(5 * MSEC_PER_SEC)
+
+/* Used for NVME Target */
+#define LPFC_NVMET_INV_HOST_ACTIVE      1
+
+struct lpfc_nvmet_tgtport {
+	struct lpfc_hba *phba;
+	struct completion *tport_unreg_cmp;
+	atomic_t state;		/* tracks nvmet hosthandle invalidation */
+
+	/* Stats counters - lpfc_nvmet_unsol_ls_buffer */
+	atomic_t rcv_ls_req_in;
+	atomic_t rcv_ls_req_out;
+	atomic_t rcv_ls_req_drop;
+	atomic_t xmt_ls_abort;
+	atomic_t xmt_ls_abort_cmpl;
+
+	/* Stats counters - lpfc_nvmet_xmt_ls_rsp */
+	atomic_t xmt_ls_rsp;
+	atomic_t xmt_ls_drop;
+
+	/* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */
+	atomic_t xmt_ls_rsp_error;
+	atomic_t xmt_ls_rsp_aborted;
+	atomic_t xmt_ls_rsp_xb_set;
+	atomic_t xmt_ls_rsp_cmpl;
+
+	/* Stats counters - lpfc_nvmet_unsol_fcp_buffer */
+	atomic_t rcv_fcp_cmd_in;
+	atomic_t rcv_fcp_cmd_out;
+	atomic_t rcv_fcp_cmd_drop;
+	atomic_t rcv_fcp_cmd_defer;
+	atomic_t xmt_fcp_release;
+
+	/* Stats counters - lpfc_nvmet_xmt_fcp_op */
+	atomic_t xmt_fcp_drop;
+	atomic_t xmt_fcp_read_rsp;
+	atomic_t xmt_fcp_read;
+	atomic_t xmt_fcp_write;
+	atomic_t xmt_fcp_rsp;
+
+	/* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */
+	atomic_t xmt_fcp_rsp_xb_set;
+	atomic_t xmt_fcp_rsp_cmpl;
+	atomic_t xmt_fcp_rsp_error;
+	atomic_t xmt_fcp_rsp_aborted;
+	atomic_t xmt_fcp_rsp_drop;
+
+	/* Stats counters - lpfc_nvmet_xmt_fcp_abort */
+	atomic_t xmt_fcp_xri_abort_cqe;
+	atomic_t xmt_fcp_abort;
+	atomic_t xmt_fcp_abort_cmpl;
+	atomic_t xmt_abort_sol;
+	atomic_t xmt_abort_unsol;
+	atomic_t xmt_abort_rsp;
+	atomic_t xmt_abort_rsp_error;
+
+	/* Stats counters - defer IO */
+	atomic_t defer_ctx;
+	atomic_t defer_fod;
+	atomic_t defer_wqfull;
+};
+
+struct lpfc_nvmet_ctx_info {
+	struct list_head nvmet_ctx_list;
+	spinlock_t	nvmet_ctx_list_lock; /* lock per CPU */
+	struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu;
+	struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu;
+	uint16_t	nvmet_ctx_list_cnt;
+	char pad[16];  /* pad to a cache-line */
+};
+
+/* This retrieves the context info associated with the specified cpu / mrq */
+#define lpfc_get_ctx_list(phba, cpu, mrq)  \
+	(phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq))
+
+/* Values for state field of struct lpfc_async_xchg_ctx */
+#define LPFC_NVME_STE_LS_RCV		1
+#define LPFC_NVME_STE_LS_ABORT		2
+#define LPFC_NVME_STE_LS_RSP		3
+#define LPFC_NVME_STE_RCV		4
+#define LPFC_NVME_STE_DATA		5
+#define LPFC_NVME_STE_ABORT		6
+#define LPFC_NVME_STE_DONE		7
+#define LPFC_NVME_STE_FREE		0xff
+
+/* Values for flag field of struct lpfc_async_xchg_ctx */
+#define LPFC_NVME_IO_INP		0x1  /* IO is in progress on exchange */
+#define LPFC_NVME_ABORT_OP		0x2  /* Abort WQE issued on exchange */
+#define LPFC_NVME_XBUSY			0x4  /* XB bit set on IO cmpl */
+#define LPFC_NVME_CTX_RLS		0x8  /* ctx free requested */
+#define LPFC_NVME_ABTS_RCV		0x10  /* ABTS received on exchange */
+#define LPFC_NVME_CTX_REUSE_WQ		0x20  /* ctx reused via WQ */
+#define LPFC_NVME_DEFER_WQFULL		0x40  /* Waiting on a free WQE */
+#define LPFC_NVME_TNOTIFY		0x80  /* notify transport of abts */
+
+struct lpfc_async_xchg_ctx {
+	union {
+		struct nvmefc_tgt_fcp_req fcp_req;
+	} hdlrctx;
+	struct list_head list;
+	struct lpfc_hba *phba;
+	struct lpfc_nodelist *ndlp;
+	struct nvmefc_ls_req *ls_req;
+	struct nvmefc_ls_rsp ls_rsp;
+	struct lpfc_iocbq *wqeq;
+	struct lpfc_iocbq *abort_wqeq;
+	spinlock_t ctxlock; /* protect flag access */
+	uint32_t sid;
+	uint32_t offset;
+	uint16_t oxid;
+	uint16_t size;
+	uint16_t entry_cnt;
+	uint16_t cpu;
+	uint16_t idx;
+	uint16_t state;
+	uint16_t flag;
+	void *payload;
+	struct rqb_dmabuf *rqb_buffer;
+	struct lpfc_nvmet_ctxbuf *ctxbuf;
+	struct lpfc_sli4_hdw_queue *hdwq;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint64_t ts_isr_cmd;
+	uint64_t ts_cmd_nvme;
+	uint64_t ts_nvme_data;
+	uint64_t ts_data_wqput;
+	uint64_t ts_isr_data;
+	uint64_t ts_data_nvme;
+	uint64_t ts_nvme_status;
+	uint64_t ts_status_wqput;
+	uint64_t ts_isr_status;
+	uint64_t ts_status_nvme;
+#endif
+};
+
+
+/* routines found in lpfc_nvme.c */
+int __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+		struct nvmefc_ls_req *pnvme_lsreq,
+		void (*gen_req_cmp)(struct lpfc_hba *phba,
+				struct lpfc_iocbq *cmdwqe,
+				struct lpfc_wcqe_complete *wcqe));
+void __lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba,  struct lpfc_vport *vport,
+		struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe);
+int __lpfc_nvme_ls_abort(struct lpfc_vport *vport,
+		struct lpfc_nodelist *ndlp, struct nvmefc_ls_req *pnvme_lsreq);
+
+/* routines found in lpfc_nvmet.c */
+int lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
+			struct lpfc_async_xchg_ctx *ctxp, uint32_t sid,
+			uint16_t xri);
+int __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg,
+			struct nvmefc_ls_rsp *ls_rsp,
+			void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba,
+				struct lpfc_iocbq *cmdwqe,
+				struct lpfc_wcqe_complete *wcqe));
+void __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba,
+		struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe);
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 565419bf8d74..32eb5e873e9b 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -36,10 +36,6 @@
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/fc/fc_fs.h>
 
-#include <linux/nvme.h>
-#include <linux/nvme-fc-driver.h>
-#include <linux/nvme-fc.h>
-
 #include "lpfc_version.h"
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -50,29 +46,25 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
 #include "lpfc_debugfs.h"
 
 static struct lpfc_iocbq *lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *,
-						 struct lpfc_nvmet_rcv_ctx *,
+						 struct lpfc_async_xchg_ctx *,
 						 dma_addr_t rspbuf,
 						 uint16_t rspsize);
 static struct lpfc_iocbq *lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *,
-						  struct lpfc_nvmet_rcv_ctx *);
+						  struct lpfc_async_xchg_ctx *);
 static int lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *,
-					  struct lpfc_nvmet_rcv_ctx *,
+					  struct lpfc_async_xchg_ctx *,
 					  uint32_t, uint16_t);
 static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *,
-					    struct lpfc_nvmet_rcv_ctx *,
+					    struct lpfc_async_xchg_ctx *,
 					    uint32_t, uint16_t);
-static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *,
-					   struct lpfc_nvmet_rcv_ctx *,
-					   uint32_t, uint16_t);
 static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *, struct lpfc_queue *,
-				    struct lpfc_nvmet_rcv_ctx *);
+				    struct lpfc_async_xchg_ctx *);
 static void lpfc_nvmet_fcp_rqst_defer_work(struct work_struct *);
 
 static void lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf);
@@ -221,10 +213,10 @@ lpfc_nvmet_cmd_template(void)
 }
 
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
-static struct lpfc_nvmet_rcv_ctx *
+static struct lpfc_async_xchg_ctx *
 lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri)
 {
-	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_async_xchg_ctx *ctxp;
 	unsigned long iflag;
 	bool found = false;
 
@@ -243,10 +235,10 @@ lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri)
 	return NULL;
 }
 
-static struct lpfc_nvmet_rcv_ctx *
+static struct lpfc_async_xchg_ctx *
 lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 oxid, u32 sid)
 {
-	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_async_xchg_ctx *ctxp;
 	unsigned long iflag;
 	bool found = false;
 
@@ -267,7 +259,8 @@ lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 oxid, u32 sid)
 #endif
 
 static void
-lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
+lpfc_nvmet_defer_release(struct lpfc_hba *phba,
+			struct lpfc_async_xchg_ctx *ctxp)
 {
 	lockdep_assert_held(&ctxp->ctxlock);
 
@@ -275,10 +268,10 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
 			"6313 NVMET Defer ctx release oxid x%x flg x%x\n",
 			ctxp->oxid, ctxp->flag);
 
-	if (ctxp->flag & LPFC_NVMET_CTX_RLS)
+	if (ctxp->flag & LPFC_NVME_CTX_RLS)
 		return;
 
-	ctxp->flag |= LPFC_NVMET_CTX_RLS;
+	ctxp->flag |= LPFC_NVME_CTX_RLS;
 	spin_lock(&phba->sli4_hba.t_active_list_lock);
 	list_del(&ctxp->list);
 	spin_unlock(&phba->sli4_hba.t_active_list_lock);
@@ -288,6 +281,53 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
 }
 
 /**
+ * __lpfc_nvme_xmt_ls_rsp_cmp - Generic completion handler for the
+ *         transmission of an NVME LS response.
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. The function frees memory resources used for the command
+ * used to send the NVME LS RSP.
+ **/
+void
+__lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			   struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_async_xchg_ctx *axchg = cmdwqe->context2;
+	struct nvmefc_ls_rsp *ls_rsp = &axchg->ls_rsp;
+	uint32_t status, result;
+
+	status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
+	result = wcqe->parameter;
+
+	if (axchg->state != LPFC_NVME_STE_LS_RSP || axchg->entry_cnt != 2) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR,
+				"6410 NVMEx LS cmpl state mismatch IO x%x: "
+				"%d %d\n",
+				axchg->oxid, axchg->state, axchg->entry_cnt);
+	}
+
+	lpfc_nvmeio_data(phba, "NVMEx LS  CMPL: xri x%x stat x%x result x%x\n",
+			 axchg->oxid, status, result);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6038 NVMEx LS rsp cmpl: %d %d oxid x%x\n",
+			status, result, axchg->oxid);
+
+	lpfc_nlp_put(cmdwqe->context1);
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
+	lpfc_sli_release_iocbq(phba, cmdwqe);
+	ls_rsp->done(ls_rsp);
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6200 NVMEx LS rsp cmpl done status %d oxid x%x\n",
+			status, axchg->oxid);
+	kfree(axchg);
+}
+
+/**
  * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response
  * @phba: Pointer to HBA context object.
  * @cmdwqe: Pointer to driver command WQE object.
@@ -295,33 +335,23 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
  *
  * The function is called from SLI ring event handler with no
  * lock held. This function is the completion handler for NVME LS commands
- * The function frees memory resources used for the NVME commands.
+ * The function updates any states and statistics, then calls the
+ * generic completion handler to free resources.
  **/
 static void
 lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			  struct lpfc_wcqe_complete *wcqe)
 {
 	struct lpfc_nvmet_tgtport *tgtp;
-	struct nvmefc_tgt_ls_req *rsp;
-	struct lpfc_nvmet_rcv_ctx *ctxp;
 	uint32_t status, result;
 
-	status = bf_get(lpfc_wcqe_c_status, wcqe);
-	result = wcqe->parameter;
-	ctxp = cmdwqe->context2;
-
-	if (ctxp->state != LPFC_NVMET_STE_LS_RSP || ctxp->entry_cnt != 2) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6410 NVMET LS cmpl state mismatch IO x%x: "
-				"%d %d\n",
-				ctxp->oxid, ctxp->state, ctxp->entry_cnt);
-	}
-
 	if (!phba->targetport)
-		goto out;
+		goto finish;
 
-	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
+	result = wcqe->parameter;
 
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (tgtp) {
 		if (status) {
 			atomic_inc(&tgtp->xmt_ls_rsp_error);
@@ -334,22 +364,8 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 		}
 	}
 
-out:
-	rsp = &ctxp->ctx.ls_req;
-
-	lpfc_nvmeio_data(phba, "NVMET LS  CMPL: xri x%x stat x%x result x%x\n",
-			 ctxp->oxid, status, result);
-
-	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
-			"6038 NVMET LS rsp cmpl: %d %d oxid x%x\n",
-			status, result, ctxp->oxid);
-
-	lpfc_nlp_put(cmdwqe->context1);
-	cmdwqe->context2 = NULL;
-	cmdwqe->context3 = NULL;
-	lpfc_sli_release_iocbq(phba, cmdwqe);
-	rsp->done(rsp);
-	kfree(ctxp);
+finish:
+	__lpfc_nvme_xmt_ls_rsp_cmp(phba, cmdwqe, wcqe);
 }
 
 /**
@@ -369,7 +385,7 @@ void
 lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 {
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
-	struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
+	struct lpfc_async_xchg_ctx *ctxp = ctx_buf->context;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
 	struct rqb_dmabuf *nvmebuf;
@@ -378,7 +394,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 	int cpu;
 	unsigned long iflag;
 
-	if (ctxp->state == LPFC_NVMET_STE_FREE) {
+	if (ctxp->state == LPFC_NVME_STE_FREE) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6411 NVMET free, already free IO x%x: %d %d\n",
 				ctxp->oxid, ctxp->state, ctxp->entry_cnt);
@@ -390,8 +406,8 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 		/* check if freed in another path whilst acquiring lock */
 		if (nvmebuf) {
 			ctxp->rqb_buffer = NULL;
-			if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) {
-				ctxp->flag &= ~LPFC_NVMET_CTX_REUSE_WQ;
+			if (ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) {
+				ctxp->flag &= ~LPFC_NVME_CTX_REUSE_WQ;
 				spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
 				nvmebuf->hrq->rqbp->rqb_free_buffer(phba,
 								    nvmebuf);
@@ -404,7 +420,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 			spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
 		}
 	}
-	ctxp->state = LPFC_NVMET_STE_FREE;
+	ctxp->state = LPFC_NVME_STE_FREE;
 
 	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
 	if (phba->sli4_hba.nvmet_io_wait_cnt) {
@@ -421,14 +437,14 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 		size = nvmebuf->bytes_recv;
 		sid = sli4_sid_from_fc_hdr(fc_hdr);
 
-		ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
+		ctxp = (struct lpfc_async_xchg_ctx *)ctx_buf->context;
 		ctxp->wqeq = NULL;
 		ctxp->offset = 0;
 		ctxp->phba = phba;
 		ctxp->size = size;
 		ctxp->oxid = oxid;
 		ctxp->sid = sid;
-		ctxp->state = LPFC_NVMET_STE_RCV;
+		ctxp->state = LPFC_NVME_STE_RCV;
 		ctxp->entry_cnt = 1;
 		ctxp->flag = 0;
 		ctxp->ctxbuf = ctx_buf;
@@ -453,7 +469,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 
 		/* Indicate that a replacement buffer has been posted */
 		spin_lock_irqsave(&ctxp->ctxlock, iflag);
-		ctxp->flag |= LPFC_NVMET_CTX_REUSE_WQ;
+		ctxp->flag |= LPFC_NVME_CTX_REUSE_WQ;
 		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
 
 		if (!queue_work(phba->wq, &ctx_buf->defer_work)) {
@@ -495,7 +511,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 static void
 lpfc_nvmet_ktime(struct lpfc_hba *phba,
-		 struct lpfc_nvmet_rcv_ctx *ctxp)
+		 struct lpfc_async_xchg_ctx *ctxp)
 {
 	uint64_t seg1, seg2, seg3, seg4, seg5;
 	uint64_t seg6, seg7, seg8, seg9, seg10;
@@ -704,16 +720,16 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 {
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct nvmefc_tgt_fcp_req *rsp;
-	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_async_xchg_ctx *ctxp;
 	uint32_t status, result, op, start_clean, logerr;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	int id;
 #endif
 
 	ctxp = cmdwqe->context2;
-	ctxp->flag &= ~LPFC_NVMET_IO_INP;
+	ctxp->flag &= ~LPFC_NVME_IO_INP;
 
-	rsp = &ctxp->ctx.fcp_req;
+	rsp = &ctxp->hdlrctx.fcp_req;
 	op = rsp->op;
 
 	status = bf_get(lpfc_wcqe_c_status, wcqe);
@@ -740,13 +756,13 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 
 		/* pick up SLI4 exhange busy condition */
 		if (bf_get(lpfc_wcqe_c_xb, wcqe)) {
-			ctxp->flag |= LPFC_NVMET_XBUSY;
+			ctxp->flag |= LPFC_NVME_XBUSY;
 			logerr |= LOG_NVME_ABTS;
 			if (tgtp)
 				atomic_inc(&tgtp->xmt_fcp_rsp_xb_set);
 
 		} else {
-			ctxp->flag &= ~LPFC_NVMET_XBUSY;
+			ctxp->flag &= ~LPFC_NVME_XBUSY;
 		}
 
 		lpfc_printf_log(phba, KERN_INFO, logerr,
@@ -768,7 +784,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	if ((op == NVMET_FCOP_READDATA_RSP) ||
 	    (op == NVMET_FCOP_RSP)) {
 		/* Sanity check */
-		ctxp->state = LPFC_NVMET_STE_DONE;
+		ctxp->state = LPFC_NVME_STE_DONE;
 		ctxp->entry_cnt++;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -826,17 +842,32 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 #endif
 }
 
-static int
-lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
-		      struct nvmefc_tgt_ls_req *rsp)
+/**
+ * __lpfc_nvme_xmt_ls_rsp - Generic service routine to issue transmit
+ *         an NVME LS rsp for a prior NVME LS request that was received.
+ * @axchg: pointer to exchange context for the NVME LS request the response
+ *         is for.
+ * @ls_rsp: pointer to the transport LS RSP that is to be sent
+ * @xmt_ls_rsp_cmp: completion routine to call upon RSP transmit done
+ *
+ * This routine is used to format and send a WQE to transmit a NVME LS
+ * Response.  The response is for a prior NVME LS request that was
+ * received and posted to the transport.
+ *
+ * Returns:
+ *  0 : if response successfully transmit
+ *  non-zero : if response failed to transmit, of the form -Exxx.
+ **/
+int
+__lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg,
+			struct nvmefc_ls_rsp *ls_rsp,
+			void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba,
+				struct lpfc_iocbq *cmdwqe,
+				struct lpfc_wcqe_complete *wcqe))
 {
-	struct lpfc_nvmet_rcv_ctx *ctxp =
-		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.ls_req);
-	struct lpfc_hba *phba = ctxp->phba;
-	struct hbq_dmabuf *nvmebuf =
-		(struct hbq_dmabuf *)ctxp->rqb_buffer;
+	struct lpfc_hba *phba = axchg->phba;
+	struct hbq_dmabuf *nvmebuf = (struct hbq_dmabuf *)axchg->rqb_buffer;
 	struct lpfc_iocbq *nvmewqeq;
-	struct lpfc_nvmet_tgtport *nvmep = tgtport->private;
 	struct lpfc_dmabuf dmabuf;
 	struct ulp_bde64 bpl;
 	int rc;
@@ -844,34 +875,28 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 	if (phba->pport->load_flag & FC_UNLOADING)
 		return -ENODEV;
 
-	if (phba->pport->load_flag & FC_UNLOADING)
-		return -ENODEV;
-
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
-			"6023 NVMET LS rsp oxid x%x\n", ctxp->oxid);
+			"6023 NVMEx LS rsp oxid x%x\n", axchg->oxid);
 
-	if ((ctxp->state != LPFC_NVMET_STE_LS_RCV) ||
-	    (ctxp->entry_cnt != 1)) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6412 NVMET LS rsp state mismatch "
+	if (axchg->state != LPFC_NVME_STE_LS_RCV || axchg->entry_cnt != 1) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR,
+				"6412 NVMEx LS rsp state mismatch "
 				"oxid x%x: %d %d\n",
-				ctxp->oxid, ctxp->state, ctxp->entry_cnt);
+				axchg->oxid, axchg->state, axchg->entry_cnt);
+		return -EALREADY;
 	}
-	ctxp->state = LPFC_NVMET_STE_LS_RSP;
-	ctxp->entry_cnt++;
+	axchg->state = LPFC_NVME_STE_LS_RSP;
+	axchg->entry_cnt++;
 
-	nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, ctxp, rsp->rspdma,
-				      rsp->rsplen);
+	nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, axchg, ls_rsp->rspdma,
+					 ls_rsp->rsplen);
 	if (nvmewqeq == NULL) {
-		atomic_inc(&nvmep->xmt_ls_drop);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6150 LS Drop IO x%x: Prep\n",
-				ctxp->oxid);
-		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
-		atomic_inc(&nvmep->xmt_ls_abort);
-		lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp,
-						ctxp->sid, ctxp->oxid);
-		return -ENOMEM;
+		lpfc_printf_log(phba, KERN_ERR,
+				LOG_NVME_DISC | LOG_NVME_IOERR | LOG_NVME_ABTS,
+				"6150 NVMEx LS Drop Rsp x%x: Prep\n",
+				axchg->oxid);
+		rc = -ENOMEM;
+		goto out_free_buf;
 	}
 
 	/* Save numBdes for bpl2sgl */
@@ -881,39 +906,106 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 	dmabuf.virt = &bpl;
 	bpl.addrLow = nvmewqeq->wqe.xmit_sequence.bde.addrLow;
 	bpl.addrHigh = nvmewqeq->wqe.xmit_sequence.bde.addrHigh;
-	bpl.tus.f.bdeSize = rsp->rsplen;
+	bpl.tus.f.bdeSize = ls_rsp->rsplen;
 	bpl.tus.f.bdeFlags = 0;
 	bpl.tus.w = le32_to_cpu(bpl.tus.w);
+	/*
+	 * Note: although we're using stack space for the dmabuf, the
+	 * call to lpfc_sli4_issue_wqe is synchronous, so it will not
+	 * be referenced after it returns back to this routine.
+	 */
 
-	nvmewqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_rsp_cmp;
+	nvmewqeq->wqe_cmpl = xmt_ls_rsp_cmp;
 	nvmewqeq->iocb_cmpl = NULL;
-	nvmewqeq->context2 = ctxp;
+	nvmewqeq->context2 = axchg;
 
-	lpfc_nvmeio_data(phba, "NVMET LS  RESP: xri x%x wqidx x%x len x%x\n",
-			 ctxp->oxid, nvmewqeq->hba_wqidx, rsp->rsplen);
+	lpfc_nvmeio_data(phba, "NVMEx LS RSP: xri x%x wqidx x%x len x%x\n",
+			 axchg->oxid, nvmewqeq->hba_wqidx, ls_rsp->rsplen);
+
+	rc = lpfc_sli4_issue_wqe(phba, axchg->hdwq, nvmewqeq);
+
+	/* clear to be sure there's no reference */
+	nvmewqeq->context3 = NULL;
 
-	rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq);
 	if (rc == WQE_SUCCESS) {
 		/*
 		 * Okay to repost buffer here, but wait till cmpl
 		 * before freeing ctxp and iocbq.
 		 */
 		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
-		atomic_inc(&nvmep->xmt_ls_rsp);
 		return 0;
 	}
-	/* Give back resources */
-	atomic_inc(&nvmep->xmt_ls_drop);
-	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-			"6151 LS Drop IO x%x: Issue %d\n",
-			ctxp->oxid, rc);
+
+	lpfc_printf_log(phba, KERN_ERR,
+			LOG_NVME_DISC | LOG_NVME_IOERR | LOG_NVME_ABTS,
+			"6151 NVMEx LS RSP x%x: failed to transmit %d\n",
+			axchg->oxid, rc);
+
+	rc = -ENXIO;
 
 	lpfc_nlp_put(nvmewqeq->context1);
 
+out_free_buf:
+	/* Give back resources */
 	lpfc_in_buf_free(phba, &nvmebuf->dbuf);
-	atomic_inc(&nvmep->xmt_ls_abort);
-	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid);
-	return -ENXIO;
+
+	/*
+	 * As transport doesn't track completions of responses, if the rsp
+	 * fails to send, the transport will effectively ignore the rsp
+	 * and consider the LS done. However, the driver has an active
+	 * exchange open for the LS - so be sure to abort the exchange
+	 * if the response isn't sent.
+	 */
+	lpfc_nvme_unsol_ls_issue_abort(phba, axchg, axchg->sid, axchg->oxid);
+	return rc;
+}
+
+/**
+ * lpfc_nvmet_xmt_ls_rsp - Transmit NVME LS response
+ * @tgtport: pointer to target port that NVME LS is to be transmit from.
+ * @ls_rsp: pointer to the transport LS RSP that is to be sent
+ *
+ * Driver registers this routine to transmit responses for received NVME
+ * LS requests.
+ *
+ * This routine is used to format and send a WQE to transmit a NVME LS
+ * Response. The ls_rsp is used to reverse-map the LS to the original
+ * NVME LS request sequence, which provides addressing information for
+ * the remote port the LS to be sent to, as well as the exchange id
+ * that is the LS is bound to.
+ *
+ * Returns:
+ *  0 : if response successfully transmit
+ *  non-zero : if response failed to transmit, of the form -Exxx.
+ **/
+static int
+lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
+		      struct nvmefc_ls_rsp *ls_rsp)
+{
+	struct lpfc_async_xchg_ctx *axchg =
+		container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp);
+	struct lpfc_nvmet_tgtport *nvmep = tgtport->private;
+	int rc;
+
+	if (axchg->phba->pport->load_flag & FC_UNLOADING)
+		return -ENODEV;
+
+	rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, lpfc_nvmet_xmt_ls_rsp_cmp);
+
+	if (rc) {
+		atomic_inc(&nvmep->xmt_ls_drop);
+		/*
+		 * unless the failure is due to having already sent
+		 * the response, an abort will be generated for the
+		 * exchange if the rsp can't be sent.
+		 */
+		if (rc != -EALREADY)
+			atomic_inc(&nvmep->xmt_ls_abort);
+		return rc;
+	}
+
+	atomic_inc(&nvmep->xmt_ls_rsp);
+	return 0;
 }
 
 static int
@@ -921,8 +1013,8 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 		      struct nvmefc_tgt_fcp_req *rsp)
 {
 	struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
-	struct lpfc_nvmet_rcv_ctx *ctxp =
-		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+	struct lpfc_async_xchg_ctx *ctxp =
+		container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
 	struct lpfc_hba *phba = ctxp->phba;
 	struct lpfc_queue *wq;
 	struct lpfc_iocbq *nvmewqeq;
@@ -968,8 +1060,8 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 #endif
 
 	/* Sanity check */
-	if ((ctxp->flag & LPFC_NVMET_ABTS_RCV) ||
-	    (ctxp->state == LPFC_NVMET_STE_ABORT)) {
+	if ((ctxp->flag & LPFC_NVME_ABTS_RCV) ||
+	    (ctxp->state == LPFC_NVME_STE_ABORT)) {
 		atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6102 IO oxid x%x aborted\n",
@@ -997,7 +1089,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n",
 			 ctxp->oxid, rsp->op, rsp->rsplen);
 
-	ctxp->flag |= LPFC_NVMET_IO_INP;
+	ctxp->flag |= LPFC_NVME_IO_INP;
 	rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq);
 	if (rc == WQE_SUCCESS) {
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -1016,7 +1108,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 		 * WQ was full, so queue nvmewqeq to be sent after
 		 * WQE release CQE
 		 */
-		ctxp->flag |= LPFC_NVMET_DEFER_WQFULL;
+		ctxp->flag |= LPFC_NVME_DEFER_WQFULL;
 		wq = ctxp->hdwq->io_wq;
 		pring = wq->pring;
 		spin_lock_irqsave(&pring->ring_lock, iflags);
@@ -1056,8 +1148,8 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 			 struct nvmefc_tgt_fcp_req *req)
 {
 	struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
-	struct lpfc_nvmet_rcv_ctx *ctxp =
-		container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+	struct lpfc_async_xchg_ctx *ctxp =
+		container_of(req, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
 	struct lpfc_hba *phba = ctxp->phba;
 	struct lpfc_queue *wq;
 	unsigned long flags;
@@ -1085,13 +1177,13 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	/* Since iaab/iaar are NOT set, we need to check
 	 * if the firmware is in process of aborting IO
 	 */
-	if (ctxp->flag & (LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP)) {
+	if (ctxp->flag & (LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP)) {
 		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		return;
 	}
-	ctxp->flag |= LPFC_NVMET_ABORT_OP;
+	ctxp->flag |= LPFC_NVME_ABORT_OP;
 
-	if (ctxp->flag & LPFC_NVMET_DEFER_WQFULL) {
+	if (ctxp->flag & LPFC_NVME_DEFER_WQFULL) {
 		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
 						 ctxp->oxid);
@@ -1101,11 +1193,11 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	}
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 
-	/* An state of LPFC_NVMET_STE_RCV means we have just received
+	/* A state of LPFC_NVME_STE_RCV means we have just received
 	 * the NVME command and have not started processing it.
 	 * (by issuing any IO WQEs on this exchange yet)
 	 */
-	if (ctxp->state == LPFC_NVMET_STE_RCV)
+	if (ctxp->state == LPFC_NVME_STE_RCV)
 		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
 						 ctxp->oxid);
 	else
@@ -1118,26 +1210,26 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 			   struct nvmefc_tgt_fcp_req *rsp)
 {
 	struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
-	struct lpfc_nvmet_rcv_ctx *ctxp =
-		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+	struct lpfc_async_xchg_ctx *ctxp =
+		container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
 	struct lpfc_hba *phba = ctxp->phba;
 	unsigned long flags;
 	bool aborting = false;
 
 	spin_lock_irqsave(&ctxp->ctxlock, flags);
-	if (ctxp->flag & LPFC_NVMET_XBUSY)
+	if (ctxp->flag & LPFC_NVME_XBUSY)
 		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
 				"6027 NVMET release with XBUSY flag x%x"
 				" oxid x%x\n",
 				ctxp->flag, ctxp->oxid);
-	else if (ctxp->state != LPFC_NVMET_STE_DONE &&
-		 ctxp->state != LPFC_NVMET_STE_ABORT)
+	else if (ctxp->state != LPFC_NVME_STE_DONE &&
+		 ctxp->state != LPFC_NVME_STE_ABORT)
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6413 NVMET release bad state %d %d oxid x%x\n",
 				ctxp->state, ctxp->entry_cnt, ctxp->oxid);
 
-	if ((ctxp->flag & LPFC_NVMET_ABORT_OP) ||
-	    (ctxp->flag & LPFC_NVMET_XBUSY)) {
+	if ((ctxp->flag & LPFC_NVME_ABORT_OP) ||
+	    (ctxp->flag & LPFC_NVME_XBUSY)) {
 		aborting = true;
 		/* let the abort path do the real release */
 		lpfc_nvmet_defer_release(phba, ctxp);
@@ -1148,7 +1240,7 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 			 ctxp->state, aborting);
 
 	atomic_inc(&lpfc_nvmep->xmt_fcp_release);
-	ctxp->flag &= ~LPFC_NVMET_TNOTIFY;
+	ctxp->flag &= ~LPFC_NVME_TNOTIFY;
 
 	if (aborting)
 		return;
@@ -1161,8 +1253,8 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
 		     struct nvmefc_tgt_fcp_req *rsp)
 {
 	struct lpfc_nvmet_tgtport *tgtp;
-	struct lpfc_nvmet_rcv_ctx *ctxp =
-		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+	struct lpfc_async_xchg_ctx *ctxp =
+		container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
 	struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer;
 	struct lpfc_hba *phba = ctxp->phba;
 	unsigned long iflag;
@@ -1190,6 +1282,116 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
 	spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
 }
 
+/**
+ * lpfc_nvmet_ls_req_cmp - completion handler for a nvme ls request
+ * @phba: Pointer to HBA context object
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * This function is the completion handler for NVME LS requests.
+ * The function updates any states and statistics, then calls the
+ * generic completion handler to finish completion of the request.
+ **/
+static void
+lpfc_nvmet_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+		       struct lpfc_wcqe_complete *wcqe)
+{
+	__lpfc_nvme_ls_req_cmp(phba, cmdwqe->vport, cmdwqe, wcqe);
+}
+
+/**
+ * lpfc_nvmet_ls_req - Issue an Link Service request
+ * @targetport - pointer to target instance registered with nvmet transport.
+ * @hosthandle - hosthandle set by the driver in a prior ls_rqst_rcv.
+ *               Driver sets this value to the ndlp pointer.
+ * @pnvme_lsreq - the transport nvme_ls_req structure for the LS
+ *
+ * Driver registers this routine to handle any link service request
+ * from the nvme_fc transport to a remote nvme-aware port.
+ *
+ * Return value :
+ *   0 - Success
+ *   non-zero: various error codes, in form of -Exxx
+ **/
+static int
+lpfc_nvmet_ls_req(struct nvmet_fc_target_port *targetport,
+		  void *hosthandle,
+		  struct nvmefc_ls_req *pnvme_lsreq)
+{
+	struct lpfc_nvmet_tgtport *lpfc_nvmet = targetport->private;
+	struct lpfc_hba *phba;
+	struct lpfc_nodelist *ndlp;
+	int ret;
+	u32 hstate;
+
+	if (!lpfc_nvmet)
+		return -EINVAL;
+
+	phba = lpfc_nvmet->phba;
+	if (phba->pport->load_flag & FC_UNLOADING)
+		return -EINVAL;
+
+	hstate = atomic_read(&lpfc_nvmet->state);
+	if (hstate == LPFC_NVMET_INV_HOST_ACTIVE)
+		return -EACCES;
+
+	ndlp = (struct lpfc_nodelist *)hosthandle;
+
+	ret = __lpfc_nvme_ls_req(phba->pport, ndlp, pnvme_lsreq,
+				 lpfc_nvmet_ls_req_cmp);
+
+	return ret;
+}
+
+/**
+ * lpfc_nvmet_ls_abort - Abort a prior NVME LS request
+ * @targetport: Transport targetport, that LS was issued from.
+ * @hosthandle - hosthandle set by the driver in a prior ls_rqst_rcv.
+ *               Driver sets this value to the ndlp pointer.
+ * @pnvme_lsreq - the transport nvme_ls_req structure for LS to be aborted
+ *
+ * Driver registers this routine to abort an NVME LS request that is
+ * in progress (from the transports perspective).
+ **/
+static void
+lpfc_nvmet_ls_abort(struct nvmet_fc_target_port *targetport,
+		    void *hosthandle,
+		    struct nvmefc_ls_req *pnvme_lsreq)
+{
+	struct lpfc_nvmet_tgtport *lpfc_nvmet = targetport->private;
+	struct lpfc_hba *phba;
+	struct lpfc_nodelist *ndlp;
+	int ret;
+
+	phba = lpfc_nvmet->phba;
+	if (phba->pport->load_flag & FC_UNLOADING)
+		return;
+
+	ndlp = (struct lpfc_nodelist *)hosthandle;
+
+	ret = __lpfc_nvme_ls_abort(phba->pport, ndlp, pnvme_lsreq);
+	if (!ret)
+		atomic_inc(&lpfc_nvmet->xmt_ls_abort);
+}
+
+static void
+lpfc_nvmet_host_release(void *hosthandle)
+{
+	struct lpfc_nodelist *ndlp = hosthandle;
+	struct lpfc_hba *phba = NULL;
+	struct lpfc_nvmet_tgtport *tgtp;
+
+	phba = ndlp->phba;
+	if (!phba->targetport || !phba->targetport->private)
+		return;
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+			"6202 NVMET XPT releasing hosthandle x%px\n",
+			hosthandle);
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	atomic_set(&tgtp->state, 0);
+}
+
 static void
 lpfc_nvmet_discovery_event(struct nvmet_fc_target_port *tgtport)
 {
@@ -1214,6 +1416,9 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
 	.fcp_req_release = lpfc_nvmet_xmt_fcp_release,
 	.defer_rcv	= lpfc_nvmet_defer_rcv,
 	.discovery_event = lpfc_nvmet_discovery_event,
+	.ls_req         = lpfc_nvmet_ls_req,
+	.ls_abort       = lpfc_nvmet_ls_abort,
+	.host_release   = lpfc_nvmet_host_release,
 
 	.max_hw_queues  = 1,
 	.max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
@@ -1224,6 +1429,7 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
 	.target_features = 0,
 	/* sizes of additional private data for data structures */
 	.target_priv_sz = sizeof(struct lpfc_nvmet_tgtport),
+	.lsrqst_priv_sz = 0,
 };
 
 static void
@@ -1368,7 +1574,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
 			return -ENOMEM;
 		}
 		ctx_buf->context->ctxbuf = ctx_buf;
-		ctx_buf->context->state = LPFC_NVMET_STE_FREE;
+		ctx_buf->context->state = LPFC_NVME_STE_FREE;
 
 		ctx_buf->iocbq = lpfc_sli_get_iocbq(phba);
 		if (!ctx_buf->iocbq) {
@@ -1568,7 +1774,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
 	uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
-	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
+	struct lpfc_async_xchg_ctx *ctxp, *next_ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct nvmefc_tgt_fcp_req *req = NULL;
 	struct lpfc_nodelist *ndlp;
@@ -1599,12 +1805,12 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 		/* Check if we already received a free context call
 		 * and we have completed processing an abort situation.
 		 */
-		if (ctxp->flag & LPFC_NVMET_CTX_RLS &&
-		    !(ctxp->flag & LPFC_NVMET_ABORT_OP)) {
+		if (ctxp->flag & LPFC_NVME_CTX_RLS &&
+		    !(ctxp->flag & LPFC_NVME_ABORT_OP)) {
 			list_del_init(&ctxp->list);
 			released = true;
 		}
-		ctxp->flag &= ~LPFC_NVMET_XBUSY;
+		ctxp->flag &= ~LPFC_NVME_XBUSY;
 		spin_unlock(&ctxp->ctxlock);
 		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 
@@ -1646,15 +1852,15 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 				rxid);
 
 		spin_lock_irqsave(&ctxp->ctxlock, iflag);
-		ctxp->flag |= LPFC_NVMET_ABTS_RCV;
-		ctxp->state = LPFC_NVMET_STE_ABORT;
+		ctxp->flag |= LPFC_NVME_ABTS_RCV;
+		ctxp->state = LPFC_NVME_STE_ABORT;
 		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
 
 		lpfc_nvmeio_data(phba,
 				 "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n",
 				 xri, raw_smp_processor_id(), 0);
 
-		req = &ctxp->ctx.fcp_req;
+		req = &ctxp->hdlrctx.fcp_req;
 		if (req)
 			nvmet_fc_rcv_fcp_abort(phba->targetport, req);
 	}
@@ -1667,7 +1873,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 {
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_hba *phba = vport->phba;
-	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
+	struct lpfc_async_xchg_ctx *ctxp, *next_ctxp;
 	struct nvmefc_tgt_fcp_req *rsp;
 	uint32_t sid;
 	uint16_t oxid, xri;
@@ -1690,7 +1896,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
 
 		spin_lock_irqsave(&ctxp->ctxlock, iflag);
-		ctxp->flag |= LPFC_NVMET_ABTS_RCV;
+		ctxp->flag |= LPFC_NVME_ABTS_RCV;
 		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
 
 		lpfc_nvmeio_data(phba,
@@ -1700,7 +1906,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 				"6319 NVMET Rcv ABTS:acc xri x%x\n", xri);
 
-		rsp = &ctxp->ctx.fcp_req;
+		rsp = &ctxp->hdlrctx.fcp_req;
 		nvmet_fc_rcv_fcp_abort(phba->targetport, rsp);
 
 		/* Respond with BA_ACC accordingly */
@@ -1759,7 +1965,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 		xri = ctxp->ctxbuf->sglq->sli4_xritag;
 
 		spin_lock_irqsave(&ctxp->ctxlock, iflag);
-		ctxp->flag |= (LPFC_NVMET_ABTS_RCV | LPFC_NVMET_ABORT_OP);
+		ctxp->flag |= (LPFC_NVME_ABTS_RCV | LPFC_NVME_ABORT_OP);
 		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
 
 		lpfc_nvmeio_data(phba,
@@ -1771,10 +1977,10 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 				"flag x%x state x%x\n",
 				ctxp->oxid, xri, ctxp->flag, ctxp->state);
 
-		if (ctxp->flag & LPFC_NVMET_TNOTIFY) {
+		if (ctxp->flag & LPFC_NVME_TNOTIFY) {
 			/* Notify the transport */
 			nvmet_fc_rcv_fcp_abort(phba->targetport,
-					       &ctxp->ctx.fcp_req);
+					       &ctxp->hdlrctx.fcp_req);
 		} else {
 			cancel_work_sync(&ctxp->ctxbuf->defer_work);
 			spin_lock_irqsave(&ctxp->ctxlock, iflag);
@@ -1802,7 +2008,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 
 static void
 lpfc_nvmet_wqfull_flush(struct lpfc_hba *phba, struct lpfc_queue *wq,
-			struct lpfc_nvmet_rcv_ctx *ctxp)
+			struct lpfc_async_xchg_ctx *ctxp)
 {
 	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *nvmewqeq;
@@ -1853,7 +2059,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba,
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *nvmewqeq;
-	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_async_xchg_ctx *ctxp;
 	unsigned long iflags;
 	int rc;
 
@@ -1867,7 +2073,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba,
 		list_remove_head(&wq->wqfull_list, nvmewqeq, struct lpfc_iocbq,
 				 list);
 		spin_unlock_irqrestore(&pring->ring_lock, iflags);
-		ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmewqeq->context2;
+		ctxp = (struct lpfc_async_xchg_ctx *)nvmewqeq->context2;
 		rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq);
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		if (rc == -EBUSY) {
@@ -1879,7 +2085,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba,
 		if (rc == WQE_SUCCESS) {
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 			if (ctxp->ts_cmd_nvme) {
-				if (ctxp->ctx.fcp_req.op == NVMET_FCOP_RSP)
+				if (ctxp->hdlrctx.fcp_req.op == NVMET_FCOP_RSP)
 					ctxp->ts_status_wqput = ktime_get_ns();
 				else
 					ctxp->ts_data_wqput = ktime_get_ns();
@@ -1926,114 +2132,61 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_nvmet_unsol_ls_buffer - Process an unsolicited event data buffer
+ * lpfc_nvmet_handle_lsreq - Process an NVME LS request
  * @phba: pointer to lpfc hba data structure.
- * @pring: pointer to a SLI ring.
- * @nvmebuf: pointer to lpfc nvme command HBQ data structure.
+ * @axchg: pointer to exchange context for the NVME LS request
  *
- * This routine is used for processing the WQE associated with a unsolicited
- * event. It first determines whether there is an existing ndlp that matches
- * the DID from the unsolicited WQE. If not, it will create a new one with
- * the DID from the unsolicited WQE. The ELS command from the unsolicited
- * WQE is then used to invoke the proper routine and to set up proper state
- * of the discovery state machine.
- **/
-static void
-lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
-			   struct hbq_dmabuf *nvmebuf)
+ * This routine is used for processing an asychronously received NVME LS
+ * request. Any remaining validation is done and the LS is then forwarded
+ * to the nvmet-fc transport via nvmet_fc_rcv_ls_req().
+ *
+ * The calling sequence should be: nvmet_fc_rcv_ls_req() -> (processing)
+ * -> lpfc_nvmet_xmt_ls_rsp/cmp -> req->done.
+ * lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg.
+ *
+ * Returns 0 if LS was handled and delivered to the transport
+ * Returns 1 if LS failed to be handled and should be dropped
+ */
+int
+lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba,
+			struct lpfc_async_xchg_ctx *axchg)
 {
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
-	struct lpfc_nvmet_tgtport *tgtp;
-	struct fc_frame_header *fc_hdr;
-	struct lpfc_nvmet_rcv_ctx *ctxp;
-	uint32_t *payload;
-	uint32_t size, oxid, sid, rc;
-
-
-	if (!nvmebuf || !phba->targetport) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6154 LS Drop IO\n");
-		oxid = 0;
-		size = 0;
-		sid = 0;
-		ctxp = NULL;
-		goto dropit;
-	}
-
-	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
-	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
-
-	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	payload = (uint32_t *)(nvmebuf->dbuf.virt);
-	size = bf_get(lpfc_rcqe_length,  &nvmebuf->cq_event.cqe.rcqe_cmpl);
-	sid = sli4_sid_from_fc_hdr(fc_hdr);
+	struct lpfc_nvmet_tgtport *tgtp = phba->targetport->private;
+	uint32_t *payload = axchg->payload;
+	int rc;
 
-	ctxp = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx), GFP_ATOMIC);
-	if (ctxp == NULL) {
-		atomic_inc(&tgtp->rcv_ls_req_drop);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6155 LS Drop IO x%x: Alloc\n",
-				oxid);
-dropit:
-		lpfc_nvmeio_data(phba, "NVMET LS  DROP: "
-				 "xri x%x sz %d from %06x\n",
-				 oxid, size, sid);
-		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
-		return;
-	}
-	ctxp->phba = phba;
-	ctxp->size = size;
-	ctxp->oxid = oxid;
-	ctxp->sid = sid;
-	ctxp->wqeq = NULL;
-	ctxp->state = LPFC_NVMET_STE_LS_RCV;
-	ctxp->entry_cnt = 1;
-	ctxp->rqb_buffer = (void *)nvmebuf;
-	ctxp->hdwq = &phba->sli4_hba.hdwq[0];
+	atomic_inc(&tgtp->rcv_ls_req_in);
 
-	lpfc_nvmeio_data(phba, "NVMET LS   RCV: xri x%x sz %d from %06x\n",
-			 oxid, size, sid);
 	/*
-	 * The calling sequence should be:
-	 * nvmet_fc_rcv_ls_req -> lpfc_nvmet_xmt_ls_rsp/cmp ->_req->done
-	 * lpfc_nvmet_xmt_ls_rsp_cmp should free the allocated ctxp.
+	 * Driver passes the ndlp as the hosthandle argument allowing
+	 * the transport to generate LS requests for any associateions
+	 * that are created.
 	 */
-	atomic_inc(&tgtp->rcv_ls_req_in);
-	rc = nvmet_fc_rcv_ls_req(phba->targetport, &ctxp->ctx.ls_req,
-				 payload, size);
+	rc = nvmet_fc_rcv_ls_req(phba->targetport, axchg->ndlp, &axchg->ls_rsp,
+				 axchg->payload, axchg->size);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
 			"6037 NVMET Unsol rcv: sz %d rc %d: %08x %08x %08x "
-			"%08x %08x %08x\n", size, rc,
+			"%08x %08x %08x\n", axchg->size, rc,
 			*payload, *(payload+1), *(payload+2),
 			*(payload+3), *(payload+4), *(payload+5));
 
-	if (rc == 0) {
+	if (!rc) {
 		atomic_inc(&tgtp->rcv_ls_req_out);
-		return;
+		return 0;
 	}
 
-	lpfc_nvmeio_data(phba, "NVMET LS  DROP: xri x%x sz %d from %06x\n",
-			 oxid, size, sid);
-
 	atomic_inc(&tgtp->rcv_ls_req_drop);
-	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-			"6156 LS Drop IO x%x: nvmet_fc_rcv_ls_req %d\n",
-			ctxp->oxid, rc);
-
-	/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
-	lpfc_in_buf_free(phba, &nvmebuf->dbuf);
-
-	atomic_inc(&tgtp->xmt_ls_abort);
-	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid);
 #endif
+	return 1;
 }
 
 static void
 lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
 {
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
-	struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
+	struct lpfc_async_xchg_ctx *ctxp = ctx_buf->context;
 	struct lpfc_hba *phba = ctxp->phba;
 	struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer;
 	struct lpfc_nvmet_tgtport *tgtp;
@@ -2054,7 +2207,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
 		return;
 	}
 
-	if (ctxp->flag & LPFC_NVMET_ABTS_RCV) {
+	if (ctxp->flag & LPFC_NVME_ABTS_RCV) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6324 IO oxid x%x aborted\n",
 				ctxp->oxid);
@@ -2063,7 +2216,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
 
 	payload = (uint32_t *)(nvmebuf->dbuf.virt);
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	ctxp->flag |= LPFC_NVMET_TNOTIFY;
+	ctxp->flag |= LPFC_NVME_TNOTIFY;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	if (ctxp->ts_isr_cmd)
 		ctxp->ts_cmd_nvme = ktime_get_ns();
@@ -2077,13 +2230,13 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
 	 * A buffer has already been reposted for this IO, so just free
 	 * the nvmebuf.
 	 */
-	rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
+	rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->hdlrctx.fcp_req,
 				  payload, ctxp->size);
 	/* Process FCP command */
 	if (rc == 0) {
 		atomic_inc(&tgtp->rcv_fcp_cmd_out);
 		spin_lock_irqsave(&ctxp->ctxlock, iflags);
-		if ((ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) ||
+		if ((ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) ||
 		    (nvmebuf != ctxp->rqb_buffer)) {
 			spin_unlock_irqrestore(&ctxp->ctxlock, iflags);
 			return;
@@ -2102,7 +2255,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
 		atomic_inc(&tgtp->rcv_fcp_cmd_out);
 		atomic_inc(&tgtp->defer_fod);
 		spin_lock_irqsave(&ctxp->ctxlock, iflags);
-		if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) {
+		if (ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) {
 			spin_unlock_irqrestore(&ctxp->ctxlock, iflags);
 			return;
 		}
@@ -2117,7 +2270,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
 			phba->sli4_hba.nvmet_mrq_data[qno], 1, qno);
 		return;
 	}
-	ctxp->flag &= ~LPFC_NVMET_TNOTIFY;
+	ctxp->flag &= ~LPFC_NVME_TNOTIFY;
 	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 			"2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
@@ -2224,7 +2377,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 			    uint64_t isr_timestamp,
 			    uint8_t cqflag)
 {
-	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_async_xchg_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
 	struct lpfc_nvmet_ctxbuf *ctx_buf;
@@ -2306,11 +2459,11 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 
 	sid = sli4_sid_from_fc_hdr(fc_hdr);
 
-	ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
+	ctxp = (struct lpfc_async_xchg_ctx *)ctx_buf->context;
 	spin_lock_irqsave(&phba->sli4_hba.t_active_list_lock, iflag);
 	list_add_tail(&ctxp->list, &phba->sli4_hba.t_active_ctx_list);
 	spin_unlock_irqrestore(&phba->sli4_hba.t_active_list_lock, iflag);
-	if (ctxp->state != LPFC_NVMET_STE_FREE) {
+	if (ctxp->state != LPFC_NVME_STE_FREE) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6414 NVMET Context corrupt %d %d oxid x%x\n",
 				ctxp->state, ctxp->entry_cnt, ctxp->oxid);
@@ -2322,7 +2475,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	ctxp->oxid = oxid;
 	ctxp->sid = sid;
 	ctxp->idx = idx;
-	ctxp->state = LPFC_NVMET_STE_RCV;
+	ctxp->state = LPFC_NVME_STE_RCV;
 	ctxp->entry_cnt = 1;
 	ctxp->flag = 0;
 	ctxp->ctxbuf = ctx_buf;
@@ -2369,40 +2522,6 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 }
 
 /**
- * lpfc_nvmet_unsol_ls_event - Process an unsolicited event from an nvme nport
- * @phba: pointer to lpfc hba data structure.
- * @pring: pointer to a SLI ring.
- * @nvmebuf: pointer to received nvme data structure.
- *
- * This routine is used to process an unsolicited event received from a SLI
- * (Service Level Interface) ring. The actual processing of the data buffer
- * associated with the unsolicited event is done by invoking the routine
- * lpfc_nvmet_unsol_ls_buffer() after properly set up the buffer from the
- * SLI RQ on which the unsolicited event was received.
- **/
-void
-lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
-			  struct lpfc_iocbq *piocb)
-{
-	struct lpfc_dmabuf *d_buf;
-	struct hbq_dmabuf *nvmebuf;
-
-	d_buf = piocb->context2;
-	nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
-
-	if (!nvmebuf) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"3015 LS Drop IO\n");
-		return;
-	}
-	if (phba->nvmet_support == 0) {
-		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
-		return;
-	}
-	lpfc_nvmet_unsol_ls_buffer(phba, pring, nvmebuf);
-}
-
-/**
  * lpfc_nvmet_unsol_fcp_event - Process an unsolicited event from an nvme nport
  * @phba: pointer to lpfc hba data structure.
  * @idx: relative index of MRQ vector
@@ -2462,7 +2581,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
  **/
 static struct lpfc_iocbq *
 lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba,
-		       struct lpfc_nvmet_rcv_ctx *ctxp,
+		       struct lpfc_async_xchg_ctx *ctxp,
 		       dma_addr_t rspbuf, uint16_t rspsize)
 {
 	struct lpfc_nodelist *ndlp;
@@ -2584,9 +2703,9 @@ nvme_wqe_free_wqeq_exit:
 
 static struct lpfc_iocbq *
 lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
-			struct lpfc_nvmet_rcv_ctx *ctxp)
+			struct lpfc_async_xchg_ctx *ctxp)
 {
-	struct nvmefc_tgt_fcp_req *rsp = &ctxp->ctx.fcp_req;
+	struct nvmefc_tgt_fcp_req *rsp = &ctxp->hdlrctx.fcp_req;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct sli4_sge *sgl;
 	struct lpfc_nodelist *ndlp;
@@ -2647,9 +2766,9 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 	}
 
 	/* Sanity check */
-	if (((ctxp->state == LPFC_NVMET_STE_RCV) &&
+	if (((ctxp->state == LPFC_NVME_STE_RCV) &&
 	    (ctxp->entry_cnt == 1)) ||
-	    (ctxp->state == LPFC_NVMET_STE_DATA)) {
+	    (ctxp->state == LPFC_NVME_STE_DATA)) {
 		wqe = &nvmewqe->wqe;
 	} else {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
@@ -2912,7 +3031,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 		sgl++;
 		ctxp->offset += cnt;
 	}
-	ctxp->state = LPFC_NVMET_STE_DATA;
+	ctxp->state = LPFC_NVME_STE_DATA;
 	ctxp->entry_cnt++;
 	return nvmewqe;
 }
@@ -2931,7 +3050,7 @@ static void
 lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			     struct lpfc_wcqe_complete *wcqe)
 {
-	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_async_xchg_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	uint32_t result;
 	unsigned long flags;
@@ -2941,23 +3060,23 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+	if (ctxp->flag & LPFC_NVME_ABORT_OP)
 		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
 	spin_lock_irqsave(&ctxp->ctxlock, flags);
-	ctxp->state = LPFC_NVMET_STE_DONE;
+	ctxp->state = LPFC_NVME_STE_DONE;
 
 	/* Check if we already received a free context call
 	 * and we have completed processing an abort situation.
 	 */
-	if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
-	    !(ctxp->flag & LPFC_NVMET_XBUSY)) {
+	if ((ctxp->flag & LPFC_NVME_CTX_RLS) &&
+	    !(ctxp->flag & LPFC_NVME_XBUSY)) {
 		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		list_del_init(&ctxp->list);
 		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		released = true;
 	}
-	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+	ctxp->flag &= ~LPFC_NVME_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 	atomic_inc(&tgtp->xmt_abort_rsp);
 
@@ -2981,7 +3100,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	lpfc_sli_release_iocbq(phba, cmdwqe);
 
 	/* Since iaab/iaar are NOT set, there is no work left.
-	 * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
+	 * For LPFC_NVME_XBUSY, lpfc_sli4_nvmet_xri_aborted
 	 * should have been called already.
 	 */
 }
@@ -3000,7 +3119,7 @@ static void
 lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			       struct lpfc_wcqe_complete *wcqe)
 {
-	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_async_xchg_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	unsigned long flags;
 	uint32_t result;
@@ -3020,11 +3139,11 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	spin_lock_irqsave(&ctxp->ctxlock, flags);
-	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+	if (ctxp->flag & LPFC_NVME_ABORT_OP)
 		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
 	/* Sanity check */
-	if (ctxp->state != LPFC_NVMET_STE_ABORT) {
+	if (ctxp->state != LPFC_NVME_STE_ABORT) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
 				"6112 ABTS Wrong state:%d oxid x%x\n",
 				ctxp->state, ctxp->oxid);
@@ -3033,15 +3152,15 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	/* Check if we already received a free context call
 	 * and we have completed processing an abort situation.
 	 */
-	ctxp->state = LPFC_NVMET_STE_DONE;
-	if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
-	    !(ctxp->flag & LPFC_NVMET_XBUSY)) {
+	ctxp->state = LPFC_NVME_STE_DONE;
+	if ((ctxp->flag & LPFC_NVME_CTX_RLS) &&
+	    !(ctxp->flag & LPFC_NVME_XBUSY)) {
 		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		list_del_init(&ctxp->list);
 		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		released = true;
 	}
-	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+	ctxp->flag &= ~LPFC_NVME_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 	atomic_inc(&tgtp->xmt_abort_rsp);
 
@@ -3062,7 +3181,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 
 	/* Since iaab/iaar are NOT set, there is no work left.
-	 * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
+	 * For LPFC_NVME_XBUSY, lpfc_sli4_nvmet_xri_aborted
 	 * should have been called already.
 	 */
 }
@@ -3081,15 +3200,17 @@ static void
 lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			    struct lpfc_wcqe_complete *wcqe)
 {
-	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_async_xchg_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	uint32_t result;
 
 	ctxp = cmdwqe->context2;
 	result = wcqe->parameter;
 
-	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_ls_abort_cmpl);
+	if (phba->nvmet_support) {
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		atomic_inc(&tgtp->xmt_ls_abort_cmpl);
+	}
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6083 Abort cmpl: ctx x%px WCQE:%08x %08x %08x %08x\n",
@@ -3107,7 +3228,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 		return;
 	}
 
-	if (ctxp->state != LPFC_NVMET_STE_LS_ABORT) {
+	if (ctxp->state != LPFC_NVME_STE_LS_ABORT) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6416 NVMET LS abort cmpl state mismatch: "
 				"oxid x%x: %d %d\n",
@@ -3122,10 +3243,10 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 
 static int
 lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
-			     struct lpfc_nvmet_rcv_ctx *ctxp,
+			     struct lpfc_async_xchg_ctx *ctxp,
 			     uint32_t sid, uint16_t xri)
 {
-	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_nvmet_tgtport *tgtp = NULL;
 	struct lpfc_iocbq *abts_wqeq;
 	union lpfc_wqe128 *wqe_abts;
 	struct lpfc_nodelist *ndlp;
@@ -3134,13 +3255,15 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 			"6067 ABTS: sid %x xri x%x/x%x\n",
 			sid, xri, ctxp->wqeq->sli4_xritag);
 
-	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (phba->nvmet_support && phba->targetport)
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 
 	ndlp = lpfc_findnode_did(phba->pport, sid);
 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
 	    ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
 	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
-		atomic_inc(&tgtp->xmt_abort_rsp_error);
+		if (tgtp)
+			atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
 				"6134 Drop ABTS - wrong NDLP state x%x.\n",
 				(ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
@@ -3217,7 +3340,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 
 static int
 lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
-			       struct lpfc_nvmet_rcv_ctx *ctxp,
+			       struct lpfc_async_xchg_ctx *ctxp,
 			       uint32_t sid, uint16_t xri)
 {
 	struct lpfc_nvmet_tgtport *tgtp;
@@ -3244,7 +3367,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 
 		/* No failure to an ABTS request. */
 		spin_lock_irqsave(&ctxp->ctxlock, flags);
-		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+		ctxp->flag &= ~LPFC_NVME_ABORT_OP;
 		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		return 0;
 	}
@@ -3258,13 +3381,13 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 				"6161 ABORT failed: No wqeqs: "
 				"xri: x%x\n", ctxp->oxid);
 		/* No failure to an ABTS request. */
-		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+		ctxp->flag &= ~LPFC_NVME_ABORT_OP;
 		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		return 0;
 	}
 	abts_wqeq = ctxp->abort_wqeq;
-	ctxp->state = LPFC_NVMET_STE_ABORT;
-	opt = (ctxp->flag & LPFC_NVMET_ABTS_RCV) ? INHIBIT_ABORT : 0;
+	ctxp->state = LPFC_NVME_STE_ABORT;
+	opt = (ctxp->flag & LPFC_NVME_ABTS_RCV) ? INHIBIT_ABORT : 0;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 
 	/* Announce entry to new IO submit field. */
@@ -3287,7 +3410,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 				phba->hba_flag, ctxp->oxid);
 		lpfc_sli_release_iocbq(phba, abts_wqeq);
 		spin_lock_irqsave(&ctxp->ctxlock, flags);
-		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+		ctxp->flag &= ~LPFC_NVME_ABORT_OP;
 		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		return 0;
 	}
@@ -3302,7 +3425,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 				ctxp->oxid);
 		lpfc_sli_release_iocbq(phba, abts_wqeq);
 		spin_lock_irqsave(&ctxp->ctxlock, flags);
-		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+		ctxp->flag &= ~LPFC_NVME_ABORT_OP;
 		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		return 0;
 	}
@@ -3331,7 +3454,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 
 	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	spin_lock_irqsave(&ctxp->ctxlock, flags);
-	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+	ctxp->flag &= ~LPFC_NVME_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 	lpfc_sli_release_iocbq(phba, abts_wqeq);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
@@ -3343,7 +3466,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 
 static int
 lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
-				 struct lpfc_nvmet_rcv_ctx *ctxp,
+				 struct lpfc_async_xchg_ctx *ctxp,
 				 uint32_t sid, uint16_t xri)
 {
 	struct lpfc_nvmet_tgtport *tgtp;
@@ -3358,14 +3481,14 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 		ctxp->wqeq->hba_wqidx = 0;
 	}
 
-	if (ctxp->state == LPFC_NVMET_STE_FREE) {
+	if (ctxp->state == LPFC_NVME_STE_FREE) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6417 NVMET ABORT ctx freed %d %d oxid x%x\n",
 				ctxp->state, ctxp->entry_cnt, ctxp->oxid);
 		rc = WQE_BUSY;
 		goto aerr;
 	}
-	ctxp->state = LPFC_NVMET_STE_ABORT;
+	ctxp->state = LPFC_NVME_STE_ABORT;
 	ctxp->entry_cnt++;
 	rc = lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri);
 	if (rc == 0)
@@ -3387,13 +3510,13 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 
 aerr:
 	spin_lock_irqsave(&ctxp->ctxlock, flags);
-	if (ctxp->flag & LPFC_NVMET_CTX_RLS) {
+	if (ctxp->flag & LPFC_NVME_CTX_RLS) {
 		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		list_del_init(&ctxp->list);
 		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		released = true;
 	}
-	ctxp->flag &= ~(LPFC_NVMET_ABORT_OP | LPFC_NVMET_CTX_RLS);
+	ctxp->flag &= ~(LPFC_NVME_ABORT_OP | LPFC_NVME_CTX_RLS);
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 
 	atomic_inc(&tgtp->xmt_abort_rsp_error);
@@ -3406,29 +3529,39 @@ aerr:
 	return 1;
 }
 
-static int
-lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
-				struct lpfc_nvmet_rcv_ctx *ctxp,
+/**
+ * lpfc_nvme_unsol_ls_issue_abort - issue ABTS on an exchange received
+ *        via async frame receive where the frame is not handled.
+ * @phba: pointer to adapter structure
+ * @ctxp: pointer to the asynchronously received received sequence
+ * @sid: address of the remote port to send the ABTS to
+ * @xri: oxid value to for the ABTS (other side's exchange id).
+ **/
+int
+lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
+				struct lpfc_async_xchg_ctx *ctxp,
 				uint32_t sid, uint16_t xri)
 {
-	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_nvmet_tgtport *tgtp = NULL;
 	struct lpfc_iocbq *abts_wqeq;
 	unsigned long flags;
 	int rc;
 
-	if ((ctxp->state == LPFC_NVMET_STE_LS_RCV && ctxp->entry_cnt == 1) ||
-	    (ctxp->state == LPFC_NVMET_STE_LS_RSP && ctxp->entry_cnt == 2)) {
-		ctxp->state = LPFC_NVMET_STE_LS_ABORT;
+	if ((ctxp->state == LPFC_NVME_STE_LS_RCV && ctxp->entry_cnt == 1) ||
+	    (ctxp->state == LPFC_NVME_STE_LS_RSP && ctxp->entry_cnt == 2)) {
+		ctxp->state = LPFC_NVME_STE_LS_ABORT;
 		ctxp->entry_cnt++;
 	} else {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6418 NVMET LS abort state mismatch "
 				"IO x%x: %d %d\n",
 				ctxp->oxid, ctxp->state, ctxp->entry_cnt);
-		ctxp->state = LPFC_NVMET_STE_LS_ABORT;
+		ctxp->state = LPFC_NVME_STE_LS_ABORT;
 	}
 
-	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (phba->nvmet_support && phba->targetport)
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+
 	if (!ctxp->wqeq) {
 		/* Issue ABTS for this WQE based on iotag */
 		ctxp->wqeq = lpfc_sli_get_iocbq(phba);
@@ -3455,16 +3588,44 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
 	rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
-		atomic_inc(&tgtp->xmt_abort_unsol);
+		if (tgtp)
+			atomic_inc(&tgtp->xmt_abort_unsol);
 		return 0;
 	}
 out:
-	atomic_inc(&tgtp->xmt_abort_rsp_error);
+	if (tgtp)
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 	abts_wqeq->context2 = NULL;
 	abts_wqeq->context3 = NULL;
 	lpfc_sli_release_iocbq(phba, abts_wqeq);
-	kfree(ctxp);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
 			"6056 Failed to Issue ABTS. Status x%x\n", rc);
-	return 0;
+	return 1;
+}
+
+/**
+ * lpfc_nvmet_invalidate_host
+ *
+ * @phba - pointer to the driver instance bound to an adapter port.
+ * @ndlp - pointer to an lpfc_nodelist type
+ *
+ * This routine upcalls the nvmet transport to invalidate an NVME
+ * host to which this target instance had active connections.
+ */
+void
+lpfc_nvmet_invalidate_host(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_NVME_ABTS,
+			"6203 Invalidating hosthandle x%px\n",
+			ndlp);
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	atomic_set(&tgtp->state, LPFC_NVMET_INV_HOST_ACTIVE);
+
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+	/* Need to get the nvmet_fc_target_port pointer here.*/
+	nvmet_fc_invalidate_host(phba->targetport, ndlp);
+#endif
 }
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
deleted file mode 100644
index b80b1639b9a7..000000000000
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/*******************************************************************
- * This file is part of the Emulex Linux Device Driver for         *
- * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
- * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
- * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
- * EMULEX and SLI are trademarks of Emulex.                        *
- * www.broadcom.com                                                *
- * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
- *                                                                 *
- * This program is free software; you can redistribute it and/or   *
- * modify it under the terms of version 2 of the GNU General       *
- * Public License as published by the Free Software Foundation.    *
- * This program is distributed in the hope that it will be useful. *
- * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
- * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
- * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
- * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
- * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
- * more details, a copy of which can be found in the file COPYING  *
- * included with this package.                                     *
- ********************************************************************/
-
-#define LPFC_NVMET_DEFAULT_SEGS		(64 + 1)	/* 256K IOs */
-#define LPFC_NVMET_RQE_MIN_POST		128
-#define LPFC_NVMET_RQE_DEF_POST		512
-#define LPFC_NVMET_RQE_DEF_COUNT	2048
-#define LPFC_NVMET_SUCCESS_LEN		12
-
-#define LPFC_NVMET_MRQ_AUTO		0
-#define LPFC_NVMET_MRQ_MAX		16
-
-#define LPFC_NVMET_WAIT_TMO		(5 * MSEC_PER_SEC)
-
-/* Used for NVME Target */
-struct lpfc_nvmet_tgtport {
-	struct lpfc_hba *phba;
-	struct completion *tport_unreg_cmp;
-
-	/* Stats counters - lpfc_nvmet_unsol_ls_buffer */
-	atomic_t rcv_ls_req_in;
-	atomic_t rcv_ls_req_out;
-	atomic_t rcv_ls_req_drop;
-	atomic_t xmt_ls_abort;
-	atomic_t xmt_ls_abort_cmpl;
-
-	/* Stats counters - lpfc_nvmet_xmt_ls_rsp */
-	atomic_t xmt_ls_rsp;
-	atomic_t xmt_ls_drop;
-
-	/* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */
-	atomic_t xmt_ls_rsp_error;
-	atomic_t xmt_ls_rsp_aborted;
-	atomic_t xmt_ls_rsp_xb_set;
-	atomic_t xmt_ls_rsp_cmpl;
-
-	/* Stats counters - lpfc_nvmet_unsol_fcp_buffer */
-	atomic_t rcv_fcp_cmd_in;
-	atomic_t rcv_fcp_cmd_out;
-	atomic_t rcv_fcp_cmd_drop;
-	atomic_t rcv_fcp_cmd_defer;
-	atomic_t xmt_fcp_release;
-
-	/* Stats counters - lpfc_nvmet_xmt_fcp_op */
-	atomic_t xmt_fcp_drop;
-	atomic_t xmt_fcp_read_rsp;
-	atomic_t xmt_fcp_read;
-	atomic_t xmt_fcp_write;
-	atomic_t xmt_fcp_rsp;
-
-	/* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */
-	atomic_t xmt_fcp_rsp_xb_set;
-	atomic_t xmt_fcp_rsp_cmpl;
-	atomic_t xmt_fcp_rsp_error;
-	atomic_t xmt_fcp_rsp_aborted;
-	atomic_t xmt_fcp_rsp_drop;
-
-	/* Stats counters - lpfc_nvmet_xmt_fcp_abort */
-	atomic_t xmt_fcp_xri_abort_cqe;
-	atomic_t xmt_fcp_abort;
-	atomic_t xmt_fcp_abort_cmpl;
-	atomic_t xmt_abort_sol;
-	atomic_t xmt_abort_unsol;
-	atomic_t xmt_abort_rsp;
-	atomic_t xmt_abort_rsp_error;
-
-	/* Stats counters - defer IO */
-	atomic_t defer_ctx;
-	atomic_t defer_fod;
-	atomic_t defer_wqfull;
-};
-
-struct lpfc_nvmet_ctx_info {
-	struct list_head nvmet_ctx_list;
-	spinlock_t	nvmet_ctx_list_lock; /* lock per CPU */
-	struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu;
-	struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu;
-	uint16_t	nvmet_ctx_list_cnt;
-	char pad[16];  /* pad to a cache-line */
-};
-
-/* This retrieves the context info associated with the specified cpu / mrq */
-#define lpfc_get_ctx_list(phba, cpu, mrq)  \
-	(phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq))
-
-struct lpfc_nvmet_rcv_ctx {
-	union {
-		struct nvmefc_tgt_ls_req ls_req;
-		struct nvmefc_tgt_fcp_req fcp_req;
-	} ctx;
-	struct list_head list;
-	struct lpfc_hba *phba;
-	struct lpfc_iocbq *wqeq;
-	struct lpfc_iocbq *abort_wqeq;
-	spinlock_t ctxlock; /* protect flag access */
-	uint32_t sid;
-	uint32_t offset;
-	uint16_t oxid;
-	uint16_t size;
-	uint16_t entry_cnt;
-	uint16_t cpu;
-	uint16_t idx;
-	uint16_t state;
-	/* States */
-#define LPFC_NVMET_STE_LS_RCV		1
-#define LPFC_NVMET_STE_LS_ABORT		2
-#define LPFC_NVMET_STE_LS_RSP		3
-#define LPFC_NVMET_STE_RCV		4
-#define LPFC_NVMET_STE_DATA		5
-#define LPFC_NVMET_STE_ABORT		6
-#define LPFC_NVMET_STE_DONE		7
-#define LPFC_NVMET_STE_FREE		0xff
-	uint16_t flag;
-#define LPFC_NVMET_IO_INP		0x1  /* IO is in progress on exchange */
-#define LPFC_NVMET_ABORT_OP		0x2  /* Abort WQE issued on exchange */
-#define LPFC_NVMET_XBUSY		0x4  /* XB bit set on IO cmpl */
-#define LPFC_NVMET_CTX_RLS		0x8  /* ctx free requested */
-#define LPFC_NVMET_ABTS_RCV		0x10  /* ABTS received on exchange */
-#define LPFC_NVMET_CTX_REUSE_WQ		0x20  /* ctx reused via WQ */
-#define LPFC_NVMET_DEFER_WQFULL		0x40  /* Waiting on a free WQE */
-#define LPFC_NVMET_TNOTIFY		0x80  /* notify transport of abts */
-	struct rqb_dmabuf *rqb_buffer;
-	struct lpfc_nvmet_ctxbuf *ctxbuf;
-	struct lpfc_sli4_hdw_queue *hdwq;
-
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-	uint64_t ts_isr_cmd;
-	uint64_t ts_cmd_nvme;
-	uint64_t ts_nvme_data;
-	uint64_t ts_data_wqput;
-	uint64_t ts_isr_data;
-	uint64_t ts_data_nvme;
-	uint64_t ts_nvme_status;
-	uint64_t ts_status_wqput;
-	uint64_t ts_isr_status;
-	uint64_t ts_status_nvme;
-#endif
-};
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index b6fb665e6ec4..9e21c4f3b009 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -39,8 +39,6 @@
 #include <asm/set_memory.h>
 #endif
 
-#include <linux/nvme-fc-driver.h>
-
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
@@ -50,7 +48,6 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
 #include "lpfc_crtn.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_compat.h"
@@ -2796,6 +2793,123 @@ lpfc_sli_get_buff(struct lpfc_hba *phba,
 }
 
 /**
+ * lpfc_nvme_unsol_ls_handler - Process an unsolicited event data buffer
+ *                              containing a NVME LS request.
+ * @phba: pointer to lpfc hba data structure.
+ * @piocb: pointer to the iocbq struct representing the sequence starting
+ *        frame.
+ *
+ * This routine initially validates the NVME LS, validates there is a login
+ * with the port that sent the LS, and then calls the appropriate nvme host
+ * or target LS request handler.
+ **/
+static void
+lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
+{
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_dmabuf *d_buf;
+	struct hbq_dmabuf *nvmebuf;
+	struct fc_frame_header *fc_hdr;
+	struct lpfc_async_xchg_ctx *axchg = NULL;
+	char *failwhy = NULL;
+	uint32_t oxid, sid, did, fctl, size;
+	int ret = 1;
+
+	d_buf = piocb->context2;
+
+	nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
+	fc_hdr = nvmebuf->hbuf.virt;
+	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+	sid = sli4_sid_from_fc_hdr(fc_hdr);
+	did = sli4_did_from_fc_hdr(fc_hdr);
+	fctl = (fc_hdr->fh_f_ctl[0] << 16 |
+		fc_hdr->fh_f_ctl[1] << 8 |
+		fc_hdr->fh_f_ctl[2]);
+	size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl);
+
+	lpfc_nvmeio_data(phba, "NVME LS    RCV: xri x%x sz %d from %06x\n",
+			 oxid, size, sid);
+
+	if (phba->pport->load_flag & FC_UNLOADING) {
+		failwhy = "Driver Unloading";
+	} else if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
+		failwhy = "NVME FC4 Disabled";
+	} else if (!phba->nvmet_support && !phba->pport->localport) {
+		failwhy = "No Localport";
+	} else if (phba->nvmet_support && !phba->targetport) {
+		failwhy = "No Targetport";
+	} else if (unlikely(fc_hdr->fh_r_ctl != FC_RCTL_ELS4_REQ)) {
+		failwhy = "Bad NVME LS R_CTL";
+	} else if (unlikely((fctl & 0x00FF0000) !=
+			(FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT))) {
+		failwhy = "Bad NVME LS F_CTL";
+	} else {
+		axchg = kzalloc(sizeof(*axchg), GFP_ATOMIC);
+		if (!axchg)
+			failwhy = "No CTX memory";
+	}
+
+	if (unlikely(failwhy)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR,
+				"6154 Drop NVME LS: SID %06X OXID x%X: %s\n",
+				sid, oxid, failwhy);
+		goto out_fail;
+	}
+
+	/* validate the source of the LS is logged in */
+	ndlp = lpfc_findnode_did(phba->pport, sid);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
+	    ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
+	     (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+				"6216 NVME Unsol rcv: No ndlp: "
+				"NPort_ID x%x oxid x%x\n",
+				sid, oxid);
+		goto out_fail;
+	}
+
+	axchg->phba = phba;
+	axchg->ndlp = ndlp;
+	axchg->size = size;
+	axchg->oxid = oxid;
+	axchg->sid = sid;
+	axchg->wqeq = NULL;
+	axchg->state = LPFC_NVME_STE_LS_RCV;
+	axchg->entry_cnt = 1;
+	axchg->rqb_buffer = (void *)nvmebuf;
+	axchg->hdwq = &phba->sli4_hba.hdwq[0];
+	axchg->payload = nvmebuf->dbuf.virt;
+	INIT_LIST_HEAD(&axchg->list);
+
+	if (phba->nvmet_support)
+		ret = lpfc_nvmet_handle_lsreq(phba, axchg);
+	else
+		ret = lpfc_nvme_handle_lsreq(phba, axchg);
+
+	/* if zero, LS was successfully handled. If non-zero, LS not handled */
+	if (!ret)
+		return;
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR,
+			"6155 Drop NVME LS from DID %06X: SID %06X OXID x%X "
+			"NVMe%s handler failed %d\n",
+			did, sid, oxid,
+			(phba->nvmet_support) ? "T" : "I", ret);
+
+out_fail:
+
+	/* recycle receive buffer */
+	lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+
+	/* If start of new exchange, abort it */
+	if (axchg && (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX)))
+		ret = lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid);
+
+	if (ret)
+		kfree(axchg);
+}
+
+/**
  * lpfc_complete_unsol_iocb - Complete an unsolicited sequence
  * @phba: Pointer to HBA context object.
  * @pring: Pointer to driver SLI ring object.
@@ -2816,7 +2930,7 @@ lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
 	switch (fch_type) {
 	case FC_TYPE_NVME:
-		lpfc_nvmet_unsol_ls_event(phba, pring, saveq);
+		lpfc_nvme_unsol_ls_handler(phba, saveq);
 		return 1;
 	default:
 		break;
@@ -13981,8 +14095,8 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 
 		/* Just some basic sanity checks on FCP Command frame */
 		fctl = (fc_hdr->fh_f_ctl[0] << 16 |
-		fc_hdr->fh_f_ctl[1] << 8 |
-		fc_hdr->fh_f_ctl[2]);
+			fc_hdr->fh_f_ctl[1] << 8 |
+			fc_hdr->fh_f_ctl[2]);
 		if (((fctl &
 		    (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) !=
 		    (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) ||
@@ -19891,7 +20005,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
 		    struct lpfc_iocbq *pwqe)
 {
 	union lpfc_wqe128 *wqe = &pwqe->wqe;
-	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_async_xchg_ctx *ctxp;
 	struct lpfc_queue *wq;
 	struct lpfc_sglq *sglq;
 	struct lpfc_sli_ring *pring;
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index 8f3af87b6bb0..45d04b7b2643 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -211,18 +211,18 @@ static int scsi_ioctl_common(struct scsi_device *sdev, int cmd, void __user *arg
 	}
 
 	switch (cmd) {
-	case SCSI_IOCTL_GET_IDLUN:
-		if (!access_ok(arg, sizeof(struct scsi_idlun)))
+	case SCSI_IOCTL_GET_IDLUN: {
+		struct scsi_idlun v = {
+			.dev_id = (sdev->id & 0xff)
+				 + ((sdev->lun & 0xff) << 8)
+				 + ((sdev->channel & 0xff) << 16)
+				 + ((sdev->host->host_no & 0xff) << 24),
+			.host_unique_id = sdev->host->unique_id
+		};
+		if (copy_to_user(arg, &v, sizeof(struct scsi_idlun)))
 			return -EFAULT;
-
-		__put_user((sdev->id & 0xff)
-			 + ((sdev->lun & 0xff) << 8)
-			 + ((sdev->channel & 0xff) << 16)
-			 + ((sdev->host->host_no & 0xff) << 24),
-			 &((struct scsi_idlun __user *)arg)->dev_id);
-		__put_user(sdev->host->unique_id,
-			 &((struct scsi_idlun __user *)arg)->host_unique_id);
 		return 0;
+	}
 	case SCSI_IOCTL_GET_BUS_NUMBER:
 		return put_user(sdev->host->host_no, (int __user *)arg);
 	case SCSI_IOCTL_PROBE_HOST:
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 06c260f6cdae..df4905df5cd4 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -978,28 +978,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		scsi_io_completion_action(cmd, result);
 }
 
-static blk_status_t scsi_init_sgtable(struct request *req,
-		struct scsi_data_buffer *sdb)
+static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev,
+		struct request *rq)
 {
-	int count;
-
-	/*
-	 * If sg table allocation fails, requeue request later.
-	 */
-	if (unlikely(sg_alloc_table_chained(&sdb->table,
-			blk_rq_nr_phys_segments(req), sdb->table.sgl,
-			SCSI_INLINE_SG_CNT)))
-		return BLK_STS_RESOURCE;
-
-	/* 
-	 * Next, walk the list, and fill in the addresses and sizes of
-	 * each segment.
-	 */
-	count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
-	BUG_ON(count > sdb->table.nents);
-	sdb->table.nents = count;
-	sdb->length = blk_rq_payload_bytes(req);
-	return BLK_STS_OK;
+	return sdev->dma_drain_len && blk_rq_is_passthrough(rq) &&
+	       !op_is_write(req_op(rq)) &&
+	       sdev->host->hostt->dma_need_drain(rq);
 }
 
 /*
@@ -1015,19 +999,62 @@ static blk_status_t scsi_init_sgtable(struct request *req,
  */
 blk_status_t scsi_init_io(struct scsi_cmnd *cmd)
 {
+	struct scsi_device *sdev = cmd->device;
 	struct request *rq = cmd->request;
+	unsigned short nr_segs = blk_rq_nr_phys_segments(rq);
+	struct scatterlist *last_sg = NULL;
 	blk_status_t ret;
+	bool need_drain = scsi_cmd_needs_dma_drain(sdev, rq);
+	int count;
 
-	if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq)))
+	if (WARN_ON_ONCE(!nr_segs))
 		return BLK_STS_IOERR;
 
-	ret = scsi_init_sgtable(rq, &cmd->sdb);
-	if (ret)
-		return ret;
+	/*
+	 * Make sure there is space for the drain.  The driver must adjust
+	 * max_hw_segments to be prepared for this.
+	 */
+	if (need_drain)
+		nr_segs++;
+
+	/*
+	 * If sg table allocation fails, requeue request later.
+	 */
+	if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, nr_segs,
+			cmd->sdb.table.sgl, SCSI_INLINE_SG_CNT)))
+		return BLK_STS_RESOURCE;
+
+	/*
+	 * Next, walk the list, and fill in the addresses and sizes of
+	 * each segment.
+	 */
+	count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg);
+
+	if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) {
+		unsigned int pad_len =
+			(rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
+
+		last_sg->length += pad_len;
+		cmd->extra_len += pad_len;
+	}
+
+	if (need_drain) {
+		sg_unmark_end(last_sg);
+		last_sg = sg_next(last_sg);
+		sg_set_buf(last_sg, sdev->dma_drain_buf, sdev->dma_drain_len);
+		sg_mark_end(last_sg);
+
+		cmd->extra_len += sdev->dma_drain_len;
+		count++;
+	}
+
+	BUG_ON(count > cmd->sdb.table.nents);
+	cmd->sdb.table.nents = count;
+	cmd->sdb.length = blk_rq_payload_bytes(rq);
 
 	if (blk_integrity_rq(rq)) {
 		struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
-		int ivecs, count;
+		int ivecs;
 
 		if (WARN_ON_ONCE(!prot_sdb)) {
 			/*
@@ -1610,12 +1637,7 @@ static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
 	struct request_queue *q = hctx->queue;
 	struct scsi_device *sdev = q->queuedata;
 
-	if (scsi_dev_queue_ready(q, sdev))
-		return true;
-
-	if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev))
-		blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
-	return false;
+	return scsi_dev_queue_ready(q, sdev);
 }
 
 static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -1684,6 +1706,7 @@ out_put_budget:
 	case BLK_STS_OK:
 		break;
 	case BLK_STS_RESOURCE:
+	case BLK_STS_ZONE_RESOURCE:
 		if (atomic_read(&sdev->device_busy) ||
 		    scsi_device_blocked(sdev))
 			ret = BLK_STS_DEV_RESOURCE;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a793cb08d025..7b0383e42b4c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1206,6 +1206,12 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 		}
 	}
 
+	if (req_op(rq) == REQ_OP_ZONE_APPEND) {
+		ret = sd_zbc_prepare_zone_append(cmd, &lba, nr_blocks);
+		if (ret)
+			return ret;
+	}
+
 	fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0;
 	dix = scsi_prot_sg_count(cmd);
 	dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
@@ -1287,6 +1293,7 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
 		return sd_setup_flush_cmnd(cmd);
 	case REQ_OP_READ:
 	case REQ_OP_WRITE:
+	case REQ_OP_ZONE_APPEND:
 		return sd_setup_read_write_cmnd(cmd);
 	case REQ_OP_ZONE_RESET:
 		return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER,
@@ -2055,7 +2062,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 
  out:
 	if (sd_is_zoned(sdkp))
-		sd_zbc_complete(SCpnt, good_bytes, &sshdr);
+		good_bytes = sd_zbc_complete(SCpnt, good_bytes, &sshdr);
 
 	SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt,
 					   "sd_done: completed %d of %d bytes\n",
@@ -3372,6 +3379,10 @@ static int sd_probe(struct device *dev)
 	sdkp->first_scan = 1;
 	sdkp->max_medium_access_timeouts = SD_MAX_MEDIUM_TIMEOUTS;
 
+	error = sd_zbc_init_disk(sdkp);
+	if (error)
+		goto out_free_index;
+
 	sd_revalidate_disk(gd);
 
 	gd->flags = GENHD_FL_EXT_DEVT;
@@ -3409,6 +3420,7 @@ static int sd_probe(struct device *dev)
  out_put:
 	put_disk(gd);
  out_free:
+	sd_zbc_release_disk(sdkp);
 	kfree(sdkp);
  out:
 	scsi_autopm_put_device(sdp);
@@ -3485,6 +3497,8 @@ static void scsi_disk_release(struct device *dev)
 	put_disk(disk);
 	put_device(&sdkp->device->sdev_gendev);
 
+	sd_zbc_release_disk(sdkp);
+
 	kfree(sdkp);
 }
 
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 50fff0bf8c8e..3a74f4b45134 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -79,6 +79,12 @@ struct scsi_disk {
 	u32		zones_optimal_open;
 	u32		zones_optimal_nonseq;
 	u32		zones_max_open;
+	u32		*zones_wp_offset;
+	spinlock_t	zones_wp_offset_lock;
+	u32		*rev_wp_offset;
+	struct mutex	rev_mutex;
+	struct work_struct zone_wp_offset_work;
+	char		*zone_wp_update_buf;
 #endif
 	atomic_t	openers;
 	sector_t	capacity;	/* size in logical blocks */
@@ -207,17 +213,35 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
 
 #ifdef CONFIG_BLK_DEV_ZONED
 
+int sd_zbc_init_disk(struct scsi_disk *sdkp);
+void sd_zbc_release_disk(struct scsi_disk *sdkp);
 extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
 extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
 blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
 					 unsigned char op, bool all);
-extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
-			    struct scsi_sense_hdr *sshdr);
+unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
+			     struct scsi_sense_hdr *sshdr);
 int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
 		unsigned int nr_zones, report_zones_cb cb, void *data);
 
+blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
+				        unsigned int nr_blocks);
+
 #else /* CONFIG_BLK_DEV_ZONED */
 
+static inline int sd_zbc_init(void)
+{
+	return 0;
+}
+
+static inline int sd_zbc_init_disk(struct scsi_disk *sdkp)
+{
+	return 0;
+}
+
+static inline void sd_zbc_exit(void) {}
+static inline void sd_zbc_release_disk(struct scsi_disk *sdkp) {}
+
 static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
 				    unsigned char *buf)
 {
@@ -233,9 +257,18 @@ static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
 	return BLK_STS_TARGET;
 }
 
-static inline void sd_zbc_complete(struct scsi_cmnd *cmd,
-				   unsigned int good_bytes,
-				   struct scsi_sense_hdr *sshdr) {}
+static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd,
+			unsigned int good_bytes, struct scsi_sense_hdr *sshdr)
+{
+	return 0;
+}
+
+static inline blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd,
+						      sector_t *lba,
+						      unsigned int nr_blocks)
+{
+	return BLK_STS_TARGET;
+}
 
 #define sd_zbc_report_zones NULL
 
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index f45c22b09726..6f7eba66687e 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -11,6 +11,7 @@
 #include <linux/blkdev.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/mm.h>
+#include <linux/mutex.h>
 
 #include <asm/unaligned.h>
 
@@ -19,11 +20,36 @@
 
 #include "sd.h"
 
+static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
+{
+	if (zone->type == ZBC_ZONE_TYPE_CONV)
+		return 0;
+
+	switch (zone->cond) {
+	case BLK_ZONE_COND_IMP_OPEN:
+	case BLK_ZONE_COND_EXP_OPEN:
+	case BLK_ZONE_COND_CLOSED:
+		return zone->wp - zone->start;
+	case BLK_ZONE_COND_FULL:
+		return zone->len;
+	case BLK_ZONE_COND_EMPTY:
+	case BLK_ZONE_COND_OFFLINE:
+	case BLK_ZONE_COND_READONLY:
+	default:
+		/*
+		 * Offline and read-only zones do not have a valid
+		 * write pointer. Use 0 as for an empty zone.
+		 */
+		return 0;
+	}
+}
+
 static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
 			       unsigned int idx, report_zones_cb cb, void *data)
 {
 	struct scsi_device *sdp = sdkp->device;
 	struct blk_zone zone = { 0 };
+	int ret;
 
 	zone.type = buf[0] & 0x0f;
 	zone.cond = (buf[1] >> 4) & 0xf;
@@ -39,7 +65,14 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
 	    zone.cond == ZBC_ZONE_COND_FULL)
 		zone.wp = zone.start + zone.len;
 
-	return cb(&zone, idx, data);
+	ret = cb(&zone, idx, data);
+	if (ret)
+		return ret;
+
+	if (sdkp->rev_wp_offset)
+		sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
+
+	return 0;
 }
 
 /**
@@ -136,8 +169,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
 
 	while (bufsize >= SECTOR_SIZE) {
 		buf = __vmalloc(bufsize,
-				GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY,
-				PAGE_KERNEL);
+				GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY);
 		if (buf) {
 			*buflen = bufsize;
 			return buf;
@@ -209,6 +241,136 @@ out:
 	return ret;
 }
 
+static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
+{
+	struct request *rq = cmd->request;
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+	sector_t sector = blk_rq_pos(rq);
+
+	if (!sd_is_zoned(sdkp))
+		/* Not a zoned device */
+		return BLK_STS_IOERR;
+
+	if (sdkp->device->changed)
+		return BLK_STS_IOERR;
+
+	if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
+		/* Unaligned request */
+		return BLK_STS_IOERR;
+
+	return BLK_STS_OK;
+}
+
+#define SD_ZBC_INVALID_WP_OFST	(~0u)
+#define SD_ZBC_UPDATING_WP_OFST	(SD_ZBC_INVALID_WP_OFST - 1)
+
+static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
+				    void *data)
+{
+	struct scsi_disk *sdkp = data;
+
+	lockdep_assert_held(&sdkp->zones_wp_offset_lock);
+
+	sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
+
+	return 0;
+}
+
+static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
+{
+	struct scsi_disk *sdkp;
+	unsigned int zno;
+	int ret;
+
+	sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
+
+	spin_lock_bh(&sdkp->zones_wp_offset_lock);
+	for (zno = 0; zno < sdkp->nr_zones; zno++) {
+		if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
+			continue;
+
+		spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+		ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
+					     SD_BUF_SIZE,
+					     zno * sdkp->zone_blocks, true);
+		spin_lock_bh(&sdkp->zones_wp_offset_lock);
+		if (!ret)
+			sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
+					    zno, sd_zbc_update_wp_offset_cb,
+					    sdkp);
+	}
+	spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+
+	scsi_device_put(sdkp->device);
+}
+
+/**
+ * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
+ * @cmd: the command to setup
+ * @lba: the LBA to patch
+ * @nr_blocks: the number of LBAs to be written
+ *
+ * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
+ * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
+ * patching of the lba for an emulated ZONE_APPEND command.
+ *
+ * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
+ * schedule a REPORT ZONES command and return BLK_STS_IOERR.
+ */
+blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
+					unsigned int nr_blocks)
+{
+	struct request *rq = cmd->request;
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+	unsigned int wp_offset, zno = blk_rq_zone_no(rq);
+	blk_status_t ret;
+
+	ret = sd_zbc_cmnd_checks(cmd);
+	if (ret != BLK_STS_OK)
+		return ret;
+
+	if (!blk_rq_zone_is_seq(rq))
+		return BLK_STS_IOERR;
+
+	/* Unlock of the write lock will happen in sd_zbc_complete() */
+	if (!blk_req_zone_write_trylock(rq))
+		return BLK_STS_ZONE_RESOURCE;
+
+	spin_lock_bh(&sdkp->zones_wp_offset_lock);
+	wp_offset = sdkp->zones_wp_offset[zno];
+	switch (wp_offset) {
+	case SD_ZBC_INVALID_WP_OFST:
+		/*
+		 * We are about to schedule work to update a zone write pointer
+		 * offset, which will cause the zone append command to be
+		 * requeued. So make sure that the scsi device does not go away
+		 * while the work is being processed.
+		 */
+		if (scsi_device_get(sdkp->device)) {
+			ret = BLK_STS_IOERR;
+			break;
+		}
+		sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
+		schedule_work(&sdkp->zone_wp_offset_work);
+		fallthrough;
+	case SD_ZBC_UPDATING_WP_OFST:
+		ret = BLK_STS_DEV_RESOURCE;
+		break;
+	default:
+		wp_offset = sectors_to_logical(sdkp->device, wp_offset);
+		if (wp_offset + nr_blocks > sdkp->zone_blocks) {
+			ret = BLK_STS_IOERR;
+			break;
+		}
+
+		*lba += wp_offset;
+	}
+	spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+	if (ret)
+		blk_req_zone_write_unlock(rq);
+	return ret;
+}
+
 /**
  * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
  *			can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
@@ -223,20 +385,14 @@ blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
 					 unsigned char op, bool all)
 {
 	struct request *rq = cmd->request;
-	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	sector_t sector = blk_rq_pos(rq);
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	sector_t block = sectors_to_logical(sdkp->device, sector);
+	blk_status_t ret;
 
-	if (!sd_is_zoned(sdkp))
-		/* Not a zoned device */
-		return BLK_STS_IOERR;
-
-	if (sdkp->device->changed)
-		return BLK_STS_IOERR;
-
-	if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
-		/* Unaligned request */
-		return BLK_STS_IOERR;
+	ret = sd_zbc_cmnd_checks(cmd);
+	if (ret != BLK_STS_OK)
+		return ret;
 
 	cmd->cmd_len = 16;
 	memset(cmd->cmnd, 0, cmd->cmd_len);
@@ -255,16 +411,105 @@ blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
 	return BLK_STS_OK;
 }
 
+static bool sd_zbc_need_zone_wp_update(struct request *rq)
+{
+	switch (req_op(rq)) {
+	case REQ_OP_ZONE_APPEND:
+	case REQ_OP_ZONE_FINISH:
+	case REQ_OP_ZONE_RESET:
+	case REQ_OP_ZONE_RESET_ALL:
+		return true;
+	case REQ_OP_WRITE:
+	case REQ_OP_WRITE_ZEROES:
+	case REQ_OP_WRITE_SAME:
+		return blk_rq_zone_is_seq(rq);
+	default:
+		return false;
+	}
+}
+
+/**
+ * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
+ * @cmd: Completed command
+ * @good_bytes: Command reply bytes
+ *
+ * Called from sd_zbc_complete() to handle the update of the cached zone write
+ * pointer value in case an update is needed.
+ */
+static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
+					  unsigned int good_bytes)
+{
+	int result = cmd->result;
+	struct request *rq = cmd->request;
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+	unsigned int zno = blk_rq_zone_no(rq);
+	enum req_opf op = req_op(rq);
+
+	/*
+	 * If we got an error for a command that needs updating the write
+	 * pointer offset cache, we must mark the zone wp offset entry as
+	 * invalid to force an update from disk the next time a zone append
+	 * command is issued.
+	 */
+	spin_lock_bh(&sdkp->zones_wp_offset_lock);
+
+	if (result && op != REQ_OP_ZONE_RESET_ALL) {
+		if (op == REQ_OP_ZONE_APPEND) {
+			/* Force complete completion (no retry) */
+			good_bytes = 0;
+			scsi_set_resid(cmd, blk_rq_bytes(rq));
+		}
+
+		/*
+		 * Force an update of the zone write pointer offset on
+		 * the next zone append access.
+		 */
+		if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
+			sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
+		goto unlock_wp_offset;
+	}
+
+	switch (op) {
+	case REQ_OP_ZONE_APPEND:
+		rq->__sector += sdkp->zones_wp_offset[zno];
+		fallthrough;
+	case REQ_OP_WRITE_ZEROES:
+	case REQ_OP_WRITE_SAME:
+	case REQ_OP_WRITE:
+		if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
+			sdkp->zones_wp_offset[zno] +=
+						good_bytes >> SECTOR_SHIFT;
+		break;
+	case REQ_OP_ZONE_RESET:
+		sdkp->zones_wp_offset[zno] = 0;
+		break;
+	case REQ_OP_ZONE_FINISH:
+		sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
+		break;
+	case REQ_OP_ZONE_RESET_ALL:
+		memset(sdkp->zones_wp_offset, 0,
+		       sdkp->nr_zones * sizeof(unsigned int));
+		break;
+	default:
+		break;
+	}
+
+unlock_wp_offset:
+	spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+
+	return good_bytes;
+}
+
 /**
  * sd_zbc_complete - ZBC command post processing.
  * @cmd: Completed command
  * @good_bytes: Command reply bytes
  * @sshdr: command sense header
  *
- * Called from sd_done(). Process report zones reply and handle reset zone
- * and write commands errors.
+ * Called from sd_done() to handle zone commands errors and updates to the
+ * device queue zone write pointer offset cahce.
  */
-void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
+unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
 		     struct scsi_sense_hdr *sshdr)
 {
 	int result = cmd->result;
@@ -280,7 +525,13 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
 		 * so be quiet about the error.
 		 */
 		rq->rq_flags |= RQF_QUIET;
-	}
+	} else if (sd_zbc_need_zone_wp_update(rq))
+		good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
+
+	if (req_op(rq) == REQ_OP_ZONE_APPEND)
+		blk_req_zone_write_unlock(rq);
+
+	return good_bytes;
 }
 
 /**
@@ -382,11 +633,67 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf,
 	return 0;
 }
 
+static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
+{
+	struct scsi_disk *sdkp = scsi_disk(disk);
+
+	swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
+}
+
+static int sd_zbc_revalidate_zones(struct scsi_disk *sdkp,
+				   u32 zone_blocks,
+				   unsigned int nr_zones)
+{
+	struct gendisk *disk = sdkp->disk;
+	int ret = 0;
+
+	/*
+	 * Make sure revalidate zones are serialized to ensure exclusive
+	 * updates of the scsi disk data.
+	 */
+	mutex_lock(&sdkp->rev_mutex);
+
+	/*
+	 * Revalidate the disk zones to update the device request queue zone
+	 * bitmaps and the zone write pointer offset array. Do this only once
+	 * the device capacity is set on the second revalidate execution for
+	 * disk scan or if something changed when executing a normal revalidate.
+	 */
+	if (sdkp->first_scan) {
+		sdkp->zone_blocks = zone_blocks;
+		sdkp->nr_zones = nr_zones;
+		goto unlock;
+	}
+
+	if (sdkp->zone_blocks == zone_blocks &&
+	    sdkp->nr_zones == nr_zones &&
+	    disk->queue->nr_zones == nr_zones)
+		goto unlock;
+
+	sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_NOIO);
+	if (!sdkp->rev_wp_offset) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
+
+	kvfree(sdkp->rev_wp_offset);
+	sdkp->rev_wp_offset = NULL;
+
+unlock:
+	mutex_unlock(&sdkp->rev_mutex);
+
+	return ret;
+}
+
 int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 {
 	struct gendisk *disk = sdkp->disk;
+	struct request_queue *q = disk->queue;
 	unsigned int nr_zones;
 	u32 zone_blocks = 0;
+	u32 max_append;
 	int ret;
 
 	if (!sd_is_zoned(sdkp))
@@ -407,35 +714,31 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 		goto err;
 
 	/* The drive satisfies the kernel restrictions: set it up */
-	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, sdkp->disk->queue);
-	blk_queue_required_elevator_features(sdkp->disk->queue,
-					     ELEVATOR_F_ZBD_SEQ_WRITE);
+	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
+	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
 	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
 
 	/* READ16/WRITE16 is mandatory for ZBC disks */
 	sdkp->device->use_16_for_rw = 1;
 	sdkp->device->use_10_for_rw = 0;
 
+	ret = sd_zbc_revalidate_zones(sdkp, zone_blocks, nr_zones);
+	if (ret)
+		goto err;
+
 	/*
-	 * Revalidate the disk zone bitmaps once the block device capacity is
-	 * set on the second revalidate execution during disk scan and if
-	 * something changed when executing a normal revalidate.
+	 * On the first scan 'chunk_sectors' isn't setup yet, so calling
+	 * blk_queue_max_zone_append_sectors() will result in a WARN(). Defer
+	 * this setting to the second scan.
 	 */
-	if (sdkp->first_scan) {
-		sdkp->zone_blocks = zone_blocks;
-		sdkp->nr_zones = nr_zones;
+	if (sdkp->first_scan)
 		return 0;
-	}
 
-	if (sdkp->zone_blocks != zone_blocks ||
-	    sdkp->nr_zones != nr_zones ||
-	    disk->queue->nr_zones != nr_zones) {
-		ret = blk_revalidate_disk_zones(disk);
-		if (ret != 0)
-			goto err;
-		sdkp->zone_blocks = zone_blocks;
-		sdkp->nr_zones = nr_zones;
-	}
+	max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks),
+			   q->limits.max_segments << (PAGE_SHIFT - 9));
+	max_append = min_t(u32, max_append, queue_max_hw_sectors(q));
+
+	blk_queue_max_zone_append_sectors(q, max_append);
 
 	return 0;
 
@@ -461,3 +764,28 @@ void sd_zbc_print_zones(struct scsi_disk *sdkp)
 			  sdkp->nr_zones,
 			  sdkp->zone_blocks);
 }
+
+int sd_zbc_init_disk(struct scsi_disk *sdkp)
+{
+	if (!sd_is_zoned(sdkp))
+		return 0;
+
+	sdkp->zones_wp_offset = NULL;
+	spin_lock_init(&sdkp->zones_wp_offset_lock);
+	sdkp->rev_wp_offset = NULL;
+	mutex_init(&sdkp->rev_mutex);
+	INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
+	sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
+	if (!sdkp->zone_wp_update_buf)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void sd_zbc_release_disk(struct scsi_disk *sdkp)
+{
+	kvfree(sdkp->zones_wp_offset);
+	sdkp->zones_wp_offset = NULL;
+	kfree(sdkp->zone_wp_update_buf);
+	sdkp->zone_wp_update_buf = NULL;
+}
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index d2fe3fa470f9..f9b589d60a46 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -794,9 +794,8 @@ static int sr_probe(struct device *dev)
 	set_capacity(disk, cd->capacity);
 	disk->private_data = &cd->driver;
 	disk->queue = sdev->request_queue;
-	cd->cdi.disk = disk;
 
-	if (register_cdrom(&cd->cdi))
+	if (register_cdrom(disk, &cd->cdi))
 		goto fail_put;
 
 	/*
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index fb41636519ee..072ed8728657 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -621,6 +621,64 @@ get_in_err:
 
 }
 
+static void storvsc_change_target_cpu(struct vmbus_channel *channel, u32 old,
+				      u32 new)
+{
+	struct storvsc_device *stor_device;
+	struct vmbus_channel *cur_chn;
+	bool old_is_alloced = false;
+	struct hv_device *device;
+	unsigned long flags;
+	int cpu;
+
+	device = channel->primary_channel ?
+			channel->primary_channel->device_obj
+				: channel->device_obj;
+	stor_device = get_out_stor_device(device);
+	if (!stor_device)
+		return;
+
+	/* See storvsc_do_io() -> get_og_chn(). */
+	spin_lock_irqsave(&device->channel->lock, flags);
+
+	/*
+	 * Determines if the storvsc device has other channels assigned to
+	 * the "old" CPU to update the alloced_cpus mask and the stor_chns
+	 * array.
+	 */
+	if (device->channel != channel && device->channel->target_cpu == old) {
+		cur_chn = device->channel;
+		old_is_alloced = true;
+		goto old_is_alloced;
+	}
+	list_for_each_entry(cur_chn, &device->channel->sc_list, sc_list) {
+		if (cur_chn == channel)
+			continue;
+		if (cur_chn->target_cpu == old) {
+			old_is_alloced = true;
+			goto old_is_alloced;
+		}
+	}
+
+old_is_alloced:
+	if (old_is_alloced)
+		WRITE_ONCE(stor_device->stor_chns[old], cur_chn);
+	else
+		cpumask_clear_cpu(old, &stor_device->alloced_cpus);
+
+	/* "Flush" the stor_chns array. */
+	for_each_possible_cpu(cpu) {
+		if (stor_device->stor_chns[cpu] && !cpumask_test_cpu(
+					cpu, &stor_device->alloced_cpus))
+			WRITE_ONCE(stor_device->stor_chns[cpu], NULL);
+	}
+
+	WRITE_ONCE(stor_device->stor_chns[new], channel);
+	cpumask_set_cpu(new, &stor_device->alloced_cpus);
+
+	spin_unlock_irqrestore(&device->channel->lock, flags);
+}
+
 static void handle_sc_creation(struct vmbus_channel *new_sc)
 {
 	struct hv_device *device = new_sc->primary_channel->device_obj;
@@ -648,6 +706,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
 		return;
 	}
 
+	new_sc->change_target_cpu_callback = storvsc_change_target_cpu;
+
 	/* Add the sub-channel to the array of available channels. */
 	stor_device->stor_chns[new_sc->target_cpu] = new_sc;
 	cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
@@ -876,6 +936,8 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
 	if (stor_device->stor_chns == NULL)
 		return -ENOMEM;
 
+	device->channel->change_target_cpu_callback = storvsc_change_target_cpu;
+
 	stor_device->stor_chns[device->channel->target_cpu] = device->channel;
 	cpumask_set_cpu(device->channel->target_cpu,
 			&stor_device->alloced_cpus);
@@ -1248,8 +1310,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
 	const struct cpumask *node_mask;
 	int num_channels, tgt_cpu;
 
-	if (stor_device->num_sc == 0)
+	if (stor_device->num_sc == 0) {
+		stor_device->stor_chns[q_num] = stor_device->device->channel;
 		return stor_device->device->channel;
+	}
 
 	/*
 	 * Our channel array is sparsley populated and we
@@ -1258,7 +1322,6 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
 	 * The strategy is simple:
 	 * I. Ensure NUMA locality
 	 * II. Distribute evenly (best effort)
-	 * III. Mapping is persistent.
 	 */
 
 	node_mask = cpumask_of_node(cpu_to_node(q_num));
@@ -1268,8 +1331,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
 		if (cpumask_test_cpu(tgt_cpu, node_mask))
 			num_channels++;
 	}
-	if (num_channels == 0)
+	if (num_channels == 0) {
+		stor_device->stor_chns[q_num] = stor_device->device->channel;
 		return stor_device->device->channel;
+	}
 
 	hash_qnum = q_num;
 	while (hash_qnum >= num_channels)
@@ -1295,6 +1360,7 @@ static int storvsc_do_io(struct hv_device *device,
 	struct storvsc_device *stor_device;
 	struct vstor_packet *vstor_packet;
 	struct vmbus_channel *outgoing_channel, *channel;
+	unsigned long flags;
 	int ret = 0;
 	const struct cpumask *node_mask;
 	int tgt_cpu;
@@ -1308,10 +1374,11 @@ static int storvsc_do_io(struct hv_device *device,
 
 	request->device  = device;
 	/*
-	 * Select an an appropriate channel to send the request out.
+	 * Select an appropriate channel to send the request out.
 	 */
-	if (stor_device->stor_chns[q_num] != NULL) {
-		outgoing_channel = stor_device->stor_chns[q_num];
+	/* See storvsc_change_target_cpu(). */
+	outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
+	if (outgoing_channel != NULL) {
 		if (outgoing_channel->target_cpu == q_num) {
 			/*
 			 * Ideally, we want to pick a different channel if
@@ -1324,7 +1391,10 @@ static int storvsc_do_io(struct hv_device *device,
 					continue;
 				if (tgt_cpu == q_num)
 					continue;
-				channel = stor_device->stor_chns[tgt_cpu];
+				channel = READ_ONCE(
+					stor_device->stor_chns[tgt_cpu]);
+				if (channel == NULL)
+					continue;
 				if (hv_get_avail_to_write_percent(
 							&channel->outbound)
 						> ring_avail_percent_lowater) {
@@ -1350,7 +1420,10 @@ static int storvsc_do_io(struct hv_device *device,
 			for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
 				if (cpumask_test_cpu(tgt_cpu, node_mask))
 					continue;
-				channel = stor_device->stor_chns[tgt_cpu];
+				channel = READ_ONCE(
+					stor_device->stor_chns[tgt_cpu]);
+				if (channel == NULL)
+					continue;
 				if (hv_get_avail_to_write_percent(
 							&channel->outbound)
 						> ring_avail_percent_lowater) {
@@ -1360,7 +1433,14 @@ static int storvsc_do_io(struct hv_device *device,
 			}
 		}
 	} else {
+		spin_lock_irqsave(&device->channel->lock, flags);
+		outgoing_channel = stor_device->stor_chns[q_num];
+		if (outgoing_channel != NULL) {
+			spin_unlock_irqrestore(&device->channel->lock, flags);
+			goto found_channel;
+		}
 		outgoing_channel = get_og_chn(stor_device, q_num);
+		spin_unlock_irqrestore(&device->channel->lock, flags);
 	}
 
 found_channel:
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index bf42a17a45de..285baa7e474e 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -80,16 +80,6 @@ config QCOM_PDR_HELPERS
 	tristate
 	select QCOM_QMI_HELPERS
 
-config QCOM_PM
-	bool "Qualcomm Power Management"
-	depends on ARCH_QCOM && !ARM64
-	select ARM_CPU_SUSPEND
-	select QCOM_SCM
-	help
-	  QCOM Platform specific power driver to manage cores and L2 low power
-	  modes. It interface with various system drivers to put the cores in
-	  low power modes.
-
 config QCOM_QMI_HELPERS
 	tristate
 	depends on NET
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 5d6b83dc58e8..92cc4232d72c 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_QCOM_GSBI)	+=	qcom_gsbi.o
 obj-$(CONFIG_QCOM_MDT_LOADER)	+= mdt_loader.o
 obj-$(CONFIG_QCOM_OCMEM)	+= ocmem.o
 obj-$(CONFIG_QCOM_PDR_HELPERS)	+= pdr_interface.o
-obj-$(CONFIG_QCOM_PM)	+=	spm.o
 obj-$(CONFIG_QCOM_QMI_HELPERS)	+= qmi_helpers.o
 qmi_helpers-y	+= qmi_encdec.o qmi_interface.o
 obj-$(CONFIG_QCOM_RMTFS_MEM)	+= rmtfs_mem.o
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 741b9140992a..8f1f8fca79e3 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -226,17 +226,20 @@ config SPI_DESIGNWARE
 	help
 	  general driver for SPI controller core from DesignWare
 
+if SPI_DESIGNWARE
+
+config SPI_DW_DMA
+	bool "DMA support for DW SPI controller"
+
 config SPI_DW_PCI
 	tristate "PCI interface driver for DW SPI core"
-	depends on SPI_DESIGNWARE && PCI
-
-config SPI_DW_MID_DMA
-	bool "DMA support for DW SPI controller on Intel MID platform"
-	depends on SPI_DW_PCI && DW_DMAC_PCI
+	depends on PCI
 
 config SPI_DW_MMIO
 	tristate "Memory-mapped io interface driver for DW SPI core"
-	depends on SPI_DESIGNWARE
+	depends on HAS_IOMEM
+
+endif
 
 config SPI_DLN2
        tristate "Diolan DLN-2 USB SPI adapter"
@@ -844,6 +847,7 @@ config SPI_TXX9
 config SPI_UNIPHIER
 	tristate "Socionext UniPhier SPI Controller"
 	depends on (ARCH_UNIPHIER || COMPILE_TEST) && OF
+	depends on HAS_IOMEM
 	help
 	  This enables a driver for the Socionext UniPhier SoC SCSSI SPI controller.
 
@@ -910,6 +914,12 @@ config SPI_ZYNQMP_GQSPI
 	help
 	  Enables Xilinx GQSPI controller driver for Zynq UltraScale+ MPSoC.
 
+config SPI_AMD
+	tristate "AMD SPI controller"
+	depends on SPI_MASTER || COMPILE_TEST
+	help
+	  Enables SPI controller driver for AMD SoC.
+
 #
 # Add new SPI master controllers in alphabetical order above this line
 #
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 28f601327f8c..d2e41d3d464a 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -36,9 +36,10 @@ obj-$(CONFIG_SPI_COLDFIRE_QSPI)		+= spi-coldfire-qspi.o
 obj-$(CONFIG_SPI_DAVINCI)		+= spi-davinci.o
 obj-$(CONFIG_SPI_DLN2)			+= spi-dln2.o
 obj-$(CONFIG_SPI_DESIGNWARE)		+= spi-dw.o
+spi-dw-y				:= spi-dw-core.o
+spi-dw-$(CONFIG_SPI_DW_DMA)		+= spi-dw-dma.o
 obj-$(CONFIG_SPI_DW_MMIO)		+= spi-dw-mmio.o
-obj-$(CONFIG_SPI_DW_PCI)		+= spi-dw-midpci.o
-spi-dw-midpci-objs			:= spi-dw-pci.o spi-dw-mid.o
+obj-$(CONFIG_SPI_DW_PCI)		+= spi-dw-pci.o
 obj-$(CONFIG_SPI_EFM32)			+= spi-efm32.o
 obj-$(CONFIG_SPI_EP93XX)		+= spi-ep93xx.o
 obj-$(CONFIG_SPI_FALCON)		+= spi-falcon.o
@@ -127,6 +128,7 @@ obj-$(CONFIG_SPI_XLP)			+= spi-xlp.o
 obj-$(CONFIG_SPI_XTENSA_XTFPGA)		+= spi-xtensa-xtfpga.o
 obj-$(CONFIG_SPI_ZYNQ_QSPI)		+= spi-zynq-qspi.o
 obj-$(CONFIG_SPI_ZYNQMP_GQSPI)		+= spi-zynqmp-gqspi.o
+obj-$(CONFIG_SPI_AMD)			+= spi-amd.o
 
 # SPI slave protocol handlers
 obj-$(CONFIG_SPI_SLAVE_TIME)		+= spi-slave-time.o
diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
new file mode 100644
index 000000000000..d0aacd4de1b9
--- /dev/null
+++ b/drivers/spi/spi-amd.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+//
+// AMD SPI controller driver
+//
+// Copyright (c) 2020, Advanced Micro Devices, Inc.
+//
+// Author: Sanjay R Mehta <sanju.mehta@amd.com>
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/spi/spi.h>
+
+#define AMD_SPI_CTRL0_REG	0x00
+#define AMD_SPI_EXEC_CMD	BIT(16)
+#define AMD_SPI_FIFO_CLEAR	BIT(20)
+#define AMD_SPI_BUSY		BIT(31)
+
+#define AMD_SPI_OPCODE_MASK	0xFF
+
+#define AMD_SPI_ALT_CS_REG	0x1D
+#define AMD_SPI_ALT_CS_MASK	0x3
+
+#define AMD_SPI_FIFO_BASE	0x80
+#define AMD_SPI_TX_COUNT_REG	0x48
+#define AMD_SPI_RX_COUNT_REG	0x4B
+#define AMD_SPI_STATUS_REG	0x4C
+
+#define AMD_SPI_MEM_SIZE	200
+
+/* M_CMD OP codes for SPI */
+#define AMD_SPI_XFER_TX		1
+#define AMD_SPI_XFER_RX		2
+
+struct amd_spi {
+	void __iomem *io_remap_addr;
+	unsigned long io_base_addr;
+	u32 rom_addr;
+	u8 chip_select;
+};
+
+static inline u8 amd_spi_readreg8(struct spi_master *master, int idx)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+
+	return ioread8((u8 __iomem *)amd_spi->io_remap_addr + idx);
+}
+
+static inline void amd_spi_writereg8(struct spi_master *master, int idx,
+				     u8 val)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+
+	iowrite8(val, ((u8 __iomem *)amd_spi->io_remap_addr + idx));
+}
+
+static inline void amd_spi_setclear_reg8(struct spi_master *master, int idx,
+					 u8 set, u8 clear)
+{
+	u8 tmp = amd_spi_readreg8(master, idx);
+
+	tmp = (tmp & ~clear) | set;
+	amd_spi_writereg8(master, idx, tmp);
+}
+
+static inline u32 amd_spi_readreg32(struct spi_master *master, int idx)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+
+	return ioread32((u8 __iomem *)amd_spi->io_remap_addr + idx);
+}
+
+static inline void amd_spi_writereg32(struct spi_master *master, int idx,
+				      u32 val)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+
+	iowrite32(val, ((u8 __iomem *)amd_spi->io_remap_addr + idx));
+}
+
+static inline void amd_spi_setclear_reg32(struct spi_master *master, int idx,
+					  u32 set, u32 clear)
+{
+	u32 tmp = amd_spi_readreg32(master, idx);
+
+	tmp = (tmp & ~clear) | set;
+	amd_spi_writereg32(master, idx, tmp);
+}
+
+static void amd_spi_select_chip(struct spi_master *master)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+	u8 chip_select = amd_spi->chip_select;
+
+	amd_spi_setclear_reg8(master, AMD_SPI_ALT_CS_REG, chip_select,
+			      AMD_SPI_ALT_CS_MASK);
+}
+
+static void amd_spi_clear_fifo_ptr(struct spi_master *master)
+{
+	amd_spi_setclear_reg32(master, AMD_SPI_CTRL0_REG, AMD_SPI_FIFO_CLEAR,
+			       AMD_SPI_FIFO_CLEAR);
+}
+
+static void amd_spi_set_opcode(struct spi_master *master, u8 cmd_opcode)
+{
+	amd_spi_setclear_reg32(master, AMD_SPI_CTRL0_REG, cmd_opcode,
+			       AMD_SPI_OPCODE_MASK);
+}
+
+static inline void amd_spi_set_rx_count(struct spi_master *master,
+					u8 rx_count)
+{
+	amd_spi_setclear_reg8(master, AMD_SPI_RX_COUNT_REG, rx_count, 0xff);
+}
+
+static inline void amd_spi_set_tx_count(struct spi_master *master,
+					u8 tx_count)
+{
+	amd_spi_setclear_reg8(master, AMD_SPI_TX_COUNT_REG, tx_count, 0xff);
+}
+
+static inline int amd_spi_busy_wait(struct amd_spi *amd_spi)
+{
+	bool spi_busy;
+	int timeout = 100000;
+
+	/* poll for SPI bus to become idle */
+	spi_busy = (ioread32((u8 __iomem *)amd_spi->io_remap_addr +
+		    AMD_SPI_CTRL0_REG) & AMD_SPI_BUSY) == AMD_SPI_BUSY;
+	while (spi_busy) {
+		usleep_range(10, 20);
+		if (timeout-- < 0)
+			return -ETIMEDOUT;
+
+		spi_busy = (ioread32((u8 __iomem *)amd_spi->io_remap_addr +
+			    AMD_SPI_CTRL0_REG) & AMD_SPI_BUSY) == AMD_SPI_BUSY;
+	}
+
+	return 0;
+}
+
+static void amd_spi_execute_opcode(struct spi_master *master)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+
+	/* Set ExecuteOpCode bit in the CTRL0 register */
+	amd_spi_setclear_reg32(master, AMD_SPI_CTRL0_REG, AMD_SPI_EXEC_CMD,
+			       AMD_SPI_EXEC_CMD);
+
+	amd_spi_busy_wait(amd_spi);
+}
+
+static int amd_spi_master_setup(struct spi_device *spi)
+{
+	struct spi_master *master = spi->master;
+
+	amd_spi_clear_fifo_ptr(master);
+
+	return 0;
+}
+
+static inline int amd_spi_fifo_xfer(struct amd_spi *amd_spi,
+				    struct spi_master *master,
+				    struct spi_message *message)
+{
+	struct spi_transfer *xfer = NULL;
+	u8 cmd_opcode;
+	u8 *buf = NULL;
+	u32 m_cmd = 0;
+	u32 i = 0;
+	u32 tx_len = 0, rx_len = 0;
+
+	list_for_each_entry(xfer, &message->transfers,
+			    transfer_list) {
+		if (xfer->rx_buf)
+			m_cmd = AMD_SPI_XFER_RX;
+		if (xfer->tx_buf)
+			m_cmd = AMD_SPI_XFER_TX;
+
+		if (m_cmd & AMD_SPI_XFER_TX) {
+			buf = (u8 *)xfer->tx_buf;
+			tx_len = xfer->len - 1;
+			cmd_opcode = *(u8 *)xfer->tx_buf;
+			buf++;
+			amd_spi_set_opcode(master, cmd_opcode);
+
+			/* Write data into the FIFO. */
+			for (i = 0; i < tx_len; i++) {
+				iowrite8(buf[i],
+					 ((u8 __iomem *)amd_spi->io_remap_addr +
+					 AMD_SPI_FIFO_BASE + i));
+			}
+
+			amd_spi_set_tx_count(master, tx_len);
+			amd_spi_clear_fifo_ptr(master);
+			/* Execute command */
+			amd_spi_execute_opcode(master);
+		}
+		if (m_cmd & AMD_SPI_XFER_RX) {
+			/*
+			 * Store no. of bytes to be received from
+			 * FIFO
+			 */
+			rx_len = xfer->len;
+			buf = (u8 *)xfer->rx_buf;
+			amd_spi_set_rx_count(master, rx_len);
+			amd_spi_clear_fifo_ptr(master);
+			/* Execute command */
+			amd_spi_execute_opcode(master);
+			/* Read data from FIFO to receive buffer  */
+			for (i = 0; i < rx_len; i++)
+				buf[i] = amd_spi_readreg8(master,
+							  AMD_SPI_FIFO_BASE +
+							  tx_len + i);
+		}
+	}
+
+	/* Update statistics */
+	message->actual_length = tx_len + rx_len + 1;
+	/* complete the transaction */
+	message->status = 0;
+	spi_finalize_current_message(master);
+
+	return 0;
+}
+
+static int amd_spi_master_transfer(struct spi_master *master,
+				   struct spi_message *msg)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+	struct spi_device *spi = msg->spi;
+
+	amd_spi->chip_select = spi->chip_select;
+	amd_spi_select_chip(master);
+
+	/*
+	 * Extract spi_transfers from the spi message and
+	 * program the controller.
+	 */
+	amd_spi_fifo_xfer(amd_spi, master, msg);
+
+	return 0;
+}
+
+static int amd_spi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct spi_master *master;
+	struct amd_spi *amd_spi;
+	struct resource *res;
+	int err = 0;
+
+	/* Allocate storage for spi_master and driver private data */
+	master = spi_alloc_master(dev, sizeof(struct amd_spi));
+	if (!master) {
+		dev_err(dev, "Error allocating SPI master\n");
+		return -ENOMEM;
+	}
+
+	amd_spi = spi_master_get_devdata(master);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	amd_spi->io_remap_addr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(amd_spi->io_remap_addr)) {
+		err = PTR_ERR(amd_spi->io_remap_addr);
+		dev_err(dev, "error %d ioremap of SPI registers failed\n", err);
+		goto err_free_master;
+	}
+	dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr);
+
+	/* Initialize the spi_master fields */
+	master->bus_num = 0;
+	master->num_chipselect = 4;
+	master->mode_bits = 0;
+	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->setup = amd_spi_master_setup;
+	master->transfer_one_message = amd_spi_master_transfer;
+
+	/* Register the controller with SPI framework */
+	err = devm_spi_register_master(dev, master);
+	if (err) {
+		dev_err(dev, "error %d registering SPI controller\n", err);
+		goto err_free_master;
+	}
+
+	return 0;
+
+err_free_master:
+	spi_master_put(master);
+
+	return err;
+}
+
+static const struct acpi_device_id spi_acpi_match[] = {
+	{ "AMDI0061", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, spi_acpi_match);
+
+static struct platform_driver amd_spi_driver = {
+	.driver = {
+		.name = "amd_spi",
+		.acpi_match_table = ACPI_PTR(spi_acpi_match),
+	},
+	.probe = amd_spi_probe,
+};
+
+module_platform_driver(amd_spi_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Sanjay Mehta <sanju.mehta@amd.com>");
+MODULE_DESCRIPTION("AMD SPI Master Controller Driver");
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
index e450ee17787f..fcde419e480c 100644
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -276,11 +276,11 @@ static int a3700_spi_fifo_flush(struct a3700_spi *a3700_spi)
 	return -ETIMEDOUT;
 }
 
-static int a3700_spi_init(struct a3700_spi *a3700_spi)
+static void a3700_spi_init(struct a3700_spi *a3700_spi)
 {
 	struct spi_master *master = a3700_spi->master;
 	u32 val;
-	int i, ret = 0;
+	int i;
 
 	/* Reset SPI unit */
 	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
@@ -311,8 +311,6 @@ static int a3700_spi_init(struct a3700_spi *a3700_spi)
 	/* Mask the interrupts and clear cause bits */
 	spireg_write(a3700_spi, A3700_SPI_INT_MASK_REG, 0);
 	spireg_write(a3700_spi, A3700_SPI_INT_STAT_REG, ~0U);
-
-	return ret;
 }
 
 static irqreturn_t a3700_spi_interrupt(int irq, void *dev_id)
@@ -886,9 +884,7 @@ static int a3700_spi_probe(struct platform_device *pdev)
 	master->min_speed_hz = DIV_ROUND_UP(clk_get_rate(spi->clk),
 						A3700_SPI_MAX_PRESCALE);
 
-	ret = a3700_spi_init(spi);
-	if (ret)
-		goto error_clk;
+	a3700_spi_init(spi);
 
 	ret = devm_request_irq(dev, spi->irq, a3700_spi_interrupt, 0,
 			       dev_name(dev), master);
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 013458cabe3c..57ee8c3b7972 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -706,6 +706,7 @@ static void atmel_spi_next_xfer_pio(struct spi_master *master,
 static int atmel_spi_next_xfer_dma_submit(struct spi_master *master,
 				struct spi_transfer *xfer,
 				u32 *plen)
+	__must_hold(&as->lock)
 {
 	struct atmel_spi	*as = spi_master_get_devdata(master);
 	struct dma_chan		*rxchan = master->dma_rx;
diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index eb9b78a90dcf..af86e6d6e16b 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -489,22 +489,6 @@ static int spi_engine_probe(struct platform_device *pdev)
 
 	spin_lock_init(&spi_engine->lock);
 
-	spi_engine->base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(spi_engine->base)) {
-		ret = PTR_ERR(spi_engine->base);
-		goto err_put_master;
-	}
-
-	version = readl(spi_engine->base + SPI_ENGINE_REG_VERSION);
-	if (SPI_ENGINE_VERSION_MAJOR(version) != 1) {
-		dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%c\n",
-			SPI_ENGINE_VERSION_MAJOR(version),
-			SPI_ENGINE_VERSION_MINOR(version),
-			SPI_ENGINE_VERSION_PATCH(version));
-		ret = -ENODEV;
-		goto err_put_master;
-	}
-
 	spi_engine->clk = devm_clk_get(&pdev->dev, "s_axi_aclk");
 	if (IS_ERR(spi_engine->clk)) {
 		ret = PTR_ERR(spi_engine->clk);
@@ -525,6 +509,22 @@ static int spi_engine_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_clk_disable;
 
+	spi_engine->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(spi_engine->base)) {
+		ret = PTR_ERR(spi_engine->base);
+		goto err_ref_clk_disable;
+	}
+
+	version = readl(spi_engine->base + SPI_ENGINE_REG_VERSION);
+	if (SPI_ENGINE_VERSION_MAJOR(version) != 1) {
+		dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%c\n",
+			SPI_ENGINE_VERSION_MAJOR(version),
+			SPI_ENGINE_VERSION_MINOR(version),
+			SPI_ENGINE_VERSION_PATCH(version));
+		ret = -ENODEV;
+		goto err_ref_clk_disable;
+	}
+
 	writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_RESET);
 	writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING);
 	writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE);
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 23d295f36c80..681d09085175 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -91,6 +91,7 @@
 #define MSPI_MSPI_STATUS			0x020
 #define MSPI_CPTQP				0x024
 #define MSPI_SPCR3				0x028
+#define MSPI_REV				0x02c
 #define MSPI_TXRAM				0x040
 #define MSPI_RXRAM				0x0c0
 #define MSPI_CDRAM				0x140
@@ -106,14 +107,22 @@
 #define MSPI_SPCR2_SPE				BIT(6)
 #define MSPI_SPCR2_CONT_AFTER_CMD		BIT(7)
 
+#define MSPI_SPCR3_FASTBR			BIT(0)
+#define MSPI_SPCR3_FASTDT			BIT(1)
+#define MSPI_SPCR3_SYSCLKSEL_MASK		GENMASK(11, 10)
+#define MSPI_SPCR3_SYSCLKSEL_27			(MSPI_SPCR3_SYSCLKSEL_MASK & \
+						 ~(BIT(10) | BIT(11)))
+#define MSPI_SPCR3_SYSCLKSEL_108		(MSPI_SPCR3_SYSCLKSEL_MASK & \
+						 BIT(11))
+
 #define MSPI_MSPI_STATUS_SPIF			BIT(0)
 
 #define INTR_BASE_BIT_SHIFT			0x02
 #define INTR_COUNT				0x07
 
 #define NUM_CHIPSELECT				4
-#define QSPI_SPBR_MIN				8U
 #define QSPI_SPBR_MAX				255U
+#define MSPI_BASE_FREQ				27000000UL
 
 #define OPCODE_DIOR				0xBB
 #define OPCODE_QIOR				0xEB
@@ -217,6 +226,9 @@ struct bcm_qspi {
 	struct bcm_qspi_dev_id *dev_ids;
 	struct completion mspi_done;
 	struct completion bspi_done;
+	u8 mspi_maj_rev;
+	u8 mspi_min_rev;
+	bool mspi_spcr3_sysclk;
 };
 
 static inline bool has_bspi(struct bcm_qspi *qspi)
@@ -224,6 +236,36 @@ static inline bool has_bspi(struct bcm_qspi *qspi)
 	return qspi->bspi_mode;
 }
 
+/* hardware supports spcr3 and fast baud-rate  */
+static inline bool bcm_qspi_has_fastbr(struct bcm_qspi *qspi)
+{
+	if (!has_bspi(qspi) &&
+	    ((qspi->mspi_maj_rev >= 1) &&
+	     (qspi->mspi_min_rev >= 5)))
+		return true;
+
+	return false;
+}
+
+/* hardware supports sys clk 108Mhz  */
+static inline bool bcm_qspi_has_sysclk_108(struct bcm_qspi *qspi)
+{
+	if (!has_bspi(qspi) && (qspi->mspi_spcr3_sysclk ||
+	    ((qspi->mspi_maj_rev >= 1) &&
+	     (qspi->mspi_min_rev >= 6))))
+		return true;
+
+	return false;
+}
+
+static inline int bcm_qspi_spbr_min(struct bcm_qspi *qspi)
+{
+	if (bcm_qspi_has_fastbr(qspi))
+		return 1;
+	else
+		return 8;
+}
+
 /* Read qspi controller register*/
 static inline u32 bcm_qspi_read(struct bcm_qspi *qspi, enum base_type type,
 				unsigned int offset)
@@ -531,16 +573,39 @@ static void bcm_qspi_hw_set_parms(struct bcm_qspi *qspi,
 	if (xp->speed_hz)
 		spbr = qspi->base_clk / (2 * xp->speed_hz);
 
-	spcr = clamp_val(spbr, QSPI_SPBR_MIN, QSPI_SPBR_MAX);
+	spcr = clamp_val(spbr, bcm_qspi_spbr_min(qspi), QSPI_SPBR_MAX);
 	bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_LSB, spcr);
 
-	spcr = MSPI_MASTER_BIT;
+	if (!qspi->mspi_maj_rev)
+		/* legacy controller */
+		spcr = MSPI_MASTER_BIT;
+	else
+		spcr = 0;
+
 	/* for 16 bit the data should be zero */
 	if (xp->bits_per_word != 16)
 		spcr |= xp->bits_per_word << 2;
 	spcr |= xp->mode & 3;
+
 	bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_MSB, spcr);
 
+	if (bcm_qspi_has_fastbr(qspi)) {
+		spcr = 0;
+
+		/* enable fastbr */
+		spcr |=	MSPI_SPCR3_FASTBR;
+
+		if (bcm_qspi_has_sysclk_108(qspi)) {
+			/* SYSCLK_108 */
+			spcr |= MSPI_SPCR3_SYSCLKSEL_108;
+			qspi->base_clk = MSPI_BASE_FREQ * 4;
+			/* Change spbr as we changed sysclk */
+			bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_LSB, 4);
+		}
+
+		bcm_qspi_write(qspi, MSPI, MSPI_SPCR3, spcr);
+	}
+
 	qspi->last_parms = *xp;
 }
 
@@ -612,19 +677,15 @@ static int update_qspi_trans_byte_count(struct bcm_qspi *qspi,
 		if (qt->trans->cs_change &&
 		    (flags & TRANS_STATUS_BREAK_CS_CHANGE))
 			ret |= TRANS_STATUS_BREAK_CS_CHANGE;
-		if (ret)
-			goto done;
 
-		dev_dbg(&qspi->pdev->dev, "advance msg exit\n");
 		if (bcm_qspi_mspi_transfer_is_last(qspi, qt))
-			ret = TRANS_STATUS_BREAK_EOM;
+			ret |= TRANS_STATUS_BREAK_EOM;
 		else
-			ret = TRANS_STATUS_BREAK_NO_BYTES;
+			ret |= TRANS_STATUS_BREAK_NO_BYTES;
 
 		qt->trans = NULL;
 	}
 
-done:
 	dev_dbg(&qspi->pdev->dev, "trans %p len %d byte %d ret %x\n",
 		qt->trans, qt->trans ? qt->trans->len : 0, qt->byte, ret);
 	return ret;
@@ -670,7 +731,7 @@ static void read_from_hw(struct bcm_qspi *qspi, int slots)
 			if (buf)
 				buf[tp.byte] = read_rxram_slot_u8(qspi, slot);
 			dev_dbg(&qspi->pdev->dev, "RD %02x\n",
-				buf ? buf[tp.byte] : 0xff);
+				buf ? buf[tp.byte] : 0x0);
 		} else {
 			u16 *buf = tp.trans->rx_buf;
 
@@ -678,7 +739,7 @@ static void read_from_hw(struct bcm_qspi *qspi, int slots)
 				buf[tp.byte / 2] = read_rxram_slot_u16(qspi,
 								      slot);
 			dev_dbg(&qspi->pdev->dev, "RD %04x\n",
-				buf ? buf[tp.byte] : 0xffff);
+				buf ? buf[tp.byte / 2] : 0x0);
 		}
 
 		update_qspi_trans_byte_count(qspi, &tp,
@@ -733,13 +794,13 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi)
 	while (!tstatus && slot < MSPI_NUM_CDRAM) {
 		if (tp.trans->bits_per_word <= 8) {
 			const u8 *buf = tp.trans->tx_buf;
-			u8 val = buf ? buf[tp.byte] : 0xff;
+			u8 val = buf ? buf[tp.byte] : 0x00;
 
 			write_txram_slot_u8(qspi, slot, val);
 			dev_dbg(&qspi->pdev->dev, "WR %02x\n", val);
 		} else {
 			const u16 *buf = tp.trans->tx_buf;
-			u16 val = buf ? buf[tp.byte / 2] : 0xffff;
+			u16 val = buf ? buf[tp.byte / 2] : 0x0000;
 
 			write_txram_slot_u16(qspi, slot, val);
 			dev_dbg(&qspi->pdev->dev, "WR %04x\n", val);
@@ -771,7 +832,16 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi)
 	bcm_qspi_write(qspi, MSPI, MSPI_NEWQP, 0);
 	bcm_qspi_write(qspi, MSPI, MSPI_ENDQP, slot - 1);
 
-	if (tstatus & TRANS_STATUS_BREAK_DESELECT) {
+	/*
+	 *  case 1) EOM =1, cs_change =0: SSb inactive
+	 *  case 2) EOM =1, cs_change =1: SSb stay active
+	 *  case 3) EOM =0, cs_change =0: SSb stay active
+	 *  case 4) EOM =0, cs_change =1: SSb inactive
+	 */
+	if (((tstatus & TRANS_STATUS_BREAK_DESELECT)
+	     == TRANS_STATUS_BREAK_CS_CHANGE) ||
+	    ((tstatus & TRANS_STATUS_BREAK_DESELECT)
+	     == TRANS_STATUS_BREAK_EOM)) {
 		mspi_cdram = read_cdram_slot(qspi, slot - 1) &
 			~MSPI_CDRAM_CONT_BIT;
 		write_cdram_slot(qspi, slot - 1, mspi_cdram);
@@ -1190,8 +1260,51 @@ static const struct spi_controller_mem_ops bcm_qspi_mem_ops = {
 	.exec_op = bcm_qspi_exec_mem_op,
 };
 
+struct bcm_qspi_data {
+	bool	has_mspi_rev;
+	bool	has_spcr3_sysclk;
+};
+
+static const struct bcm_qspi_data bcm_qspi_no_rev_data = {
+	.has_mspi_rev	= false,
+	.has_spcr3_sysclk = false,
+};
+
+static const struct bcm_qspi_data bcm_qspi_rev_data = {
+	.has_mspi_rev	= true,
+	.has_spcr3_sysclk = false,
+};
+
+static const struct bcm_qspi_data bcm_qspi_spcr3_data = {
+	.has_mspi_rev	= true,
+	.has_spcr3_sysclk = true,
+};
+
 static const struct of_device_id bcm_qspi_of_match[] = {
-	{ .compatible = "brcm,spi-bcm-qspi" },
+	{
+		.compatible = "brcm,spi-bcm7425-qspi",
+		.data = &bcm_qspi_no_rev_data,
+	},
+	{
+		.compatible = "brcm,spi-bcm7429-qspi",
+		.data = &bcm_qspi_no_rev_data,
+	},
+	{
+		.compatible = "brcm,spi-bcm7435-qspi",
+		.data = &bcm_qspi_no_rev_data,
+	},
+	{
+		.compatible = "brcm,spi-bcm-qspi",
+		.data = &bcm_qspi_rev_data,
+	},
+	{
+		.compatible = "brcm,spi-bcm7216-qspi",
+		.data = &bcm_qspi_spcr3_data,
+	},
+	{
+		.compatible = "brcm,spi-bcm7278-qspi",
+		.data = &bcm_qspi_spcr3_data,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, bcm_qspi_of_match);
@@ -1199,12 +1312,15 @@ MODULE_DEVICE_TABLE(of, bcm_qspi_of_match);
 int bcm_qspi_probe(struct platform_device *pdev,
 		   struct bcm_qspi_soc_intc *soc_intc)
 {
+	const struct of_device_id *of_id = NULL;
+	const struct bcm_qspi_data *data;
 	struct device *dev = &pdev->dev;
 	struct bcm_qspi *qspi;
 	struct spi_master *master;
 	struct resource *res;
 	int irq, ret = 0, num_ints = 0;
 	u32 val;
+	u32 rev = 0;
 	const char *name = NULL;
 	int num_irqs = ARRAY_SIZE(qspi_irq_tab);
 
@@ -1212,9 +1328,12 @@ int bcm_qspi_probe(struct platform_device *pdev,
 	if (!dev->of_node)
 		return -ENODEV;
 
-	if (!of_match_node(bcm_qspi_of_match, dev->of_node))
+	of_id = of_match_node(bcm_qspi_of_match, dev->of_node);
+	if (!of_id)
 		return -ENODEV;
 
+	data = of_id->data;
+
 	master = spi_alloc_master(dev, sizeof(struct bcm_qspi));
 	if (!master) {
 		dev_err(dev, "error allocating spi_master\n");
@@ -1222,6 +1341,11 @@ int bcm_qspi_probe(struct platform_device *pdev,
 	}
 
 	qspi = spi_master_get_devdata(master);
+
+	qspi->clk = devm_clk_get_optional(&pdev->dev, NULL);
+	if (IS_ERR(qspi->clk))
+		return PTR_ERR(qspi->clk);
+
 	qspi->pdev = pdev;
 	qspi->trans_pos.trans = NULL;
 	qspi->trans_pos.byte = 0;
@@ -1335,13 +1459,6 @@ int bcm_qspi_probe(struct platform_device *pdev,
 		qspi->soc_intc = NULL;
 	}
 
-	qspi->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(qspi->clk)) {
-		dev_warn(dev, "unable to get clock\n");
-		ret = PTR_ERR(qspi->clk);
-		goto qspi_probe_err;
-	}
-
 	ret = clk_prepare_enable(qspi->clk);
 	if (ret) {
 		dev_err(dev, "failed to prepare clock\n");
@@ -1349,7 +1466,19 @@ int bcm_qspi_probe(struct platform_device *pdev,
 	}
 
 	qspi->base_clk = clk_get_rate(qspi->clk);
-	qspi->max_speed_hz = qspi->base_clk / (QSPI_SPBR_MIN * 2);
+
+	if (data->has_mspi_rev) {
+		rev = bcm_qspi_read(qspi, MSPI, MSPI_REV);
+		/* some older revs do not have a MSPI_REV register */
+		if ((rev & 0xff) == 0xff)
+			rev = 0;
+	}
+
+	qspi->mspi_maj_rev = (rev >> 4) & 0xf;
+	qspi->mspi_min_rev = rev & 0xf;
+	qspi->mspi_spcr3_sysclk = data->has_spcr3_sysclk;
+
+	qspi->max_speed_hz = qspi->base_clk / (bcm_qspi_spbr_min(qspi) * 2);
 
 	bcm_qspi_hw_init(qspi);
 	init_completion(&qspi->mspi_done);
@@ -1406,7 +1535,7 @@ static int __maybe_unused bcm_qspi_suspend(struct device *dev)
 			bcm_qspi_read(qspi, BSPI, BSPI_STRAP_OVERRIDE_CTRL);
 
 	spi_master_suspend(qspi->master);
-	clk_disable(qspi->clk);
+	clk_disable_unprepare(qspi->clk);
 	bcm_qspi_hw_uninit(qspi);
 
 	return 0;
@@ -1424,7 +1553,7 @@ static int __maybe_unused bcm_qspi_resume(struct device *dev)
 		qspi->soc_intc->bcm_qspi_int_set(qspi->soc_intc, MSPI_DONE,
 						 true);
 
-	ret = clk_enable(qspi->clk);
+	ret = clk_prepare_enable(qspi->clk);
 	if (!ret)
 		spi_master_resume(qspi->master);
 
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 11c235879bb7..237bd306c268 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -191,12 +191,12 @@ static void bcm2835_debugfs_remove(struct bcm2835_spi *bs)
 }
 #endif /* CONFIG_DEBUG_FS */
 
-static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg)
+static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned int reg)
 {
 	return readl(bs->regs + reg);
 }
 
-static inline void bcm2835_wr(struct bcm2835_spi *bs, unsigned reg, u32 val)
+static inline void bcm2835_wr(struct bcm2835_spi *bs, unsigned int reg, u32 val)
 {
 	writel(val, bs->regs + reg);
 }
@@ -940,6 +940,7 @@ static int bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
 	if (dma_mapping_error(ctlr->dma_tx->device->dev, bs->fill_tx_addr)) {
 		dev_err(dev, "cannot map zero page - not using DMA mode\n");
 		bs->fill_tx_addr = 0;
+		ret = -ENOMEM;
 		goto err_release;
 	}
 
@@ -949,6 +950,7 @@ static int bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
 						     DMA_MEM_TO_DEV, 0);
 	if (!bs->fill_tx_desc) {
 		dev_err(dev, "cannot prepare fill_tx_desc - not using DMA mode\n");
+		ret = -ENOMEM;
 		goto err_release;
 	}
 
@@ -979,6 +981,7 @@ static int bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
 	if (dma_mapping_error(ctlr->dma_rx->device->dev, bs->clear_rx_addr)) {
 		dev_err(dev, "cannot map clear_rx_cs - not using DMA mode\n");
 		bs->clear_rx_addr = 0;
+		ret = -ENOMEM;
 		goto err_release;
 	}
 
@@ -989,6 +992,7 @@ static int bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
 					   DMA_MEM_TO_DEV, 0);
 		if (!bs->clear_rx_desc[i]) {
 			dev_err(dev, "cannot prepare clear_rx_desc - not using DMA mode\n");
+			ret = -ENOMEM;
 			goto err_release;
 		}
 
@@ -1347,7 +1351,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
 		goto out_dma_release;
 	}
 
-	err = devm_spi_register_controller(&pdev->dev, ctlr);
+	err = spi_register_controller(ctlr);
 	if (err) {
 		dev_err(&pdev->dev, "could not register SPI controller: %d\n",
 			err);
@@ -1374,17 +1378,28 @@ static int bcm2835_spi_remove(struct platform_device *pdev)
 
 	bcm2835_debugfs_remove(bs);
 
+	spi_unregister_controller(ctlr);
+
+	bcm2835_dma_release(ctlr, bs);
+
 	/* Clear FIFOs, and disable the HW block */
 	bcm2835_wr(bs, BCM2835_SPI_CS,
 		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
 
 	clk_disable_unprepare(bs->clk);
 
-	bcm2835_dma_release(ctlr, bs);
-
 	return 0;
 }
 
+static void bcm2835_spi_shutdown(struct platform_device *pdev)
+{
+	int ret;
+
+	ret = bcm2835_spi_remove(pdev);
+	if (ret)
+		dev_err(&pdev->dev, "failed to shutdown\n");
+}
+
 static const struct of_device_id bcm2835_spi_match[] = {
 	{ .compatible = "brcm,bcm2835-spi", },
 	{}
@@ -1398,6 +1413,7 @@ static struct platform_driver bcm2835_spi_driver = {
 	},
 	.probe		= bcm2835_spi_probe,
 	.remove		= bcm2835_spi_remove,
+	.shutdown	= bcm2835_spi_shutdown,
 };
 module_platform_driver(bcm2835_spi_driver);
 
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index a2162ff56a12..c331efd6e86b 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -569,7 +569,7 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev)
 		goto out_clk_disable;
 	}
 
-	err = devm_spi_register_master(&pdev->dev, master);
+	err = spi_register_master(master);
 	if (err) {
 		dev_err(&pdev->dev, "could not register SPI master: %d\n", err);
 		goto out_clk_disable;
@@ -593,6 +593,8 @@ static int bcm2835aux_spi_remove(struct platform_device *pdev)
 
 	bcm2835aux_debugfs_remove(bs);
 
+	spi_unregister_master(master);
+
 	bcm2835aux_spi_reset_hw(bs);
 
 	/* disable the HW block by releasing the clock */
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw-core.c
index 31e3f866d11a..323c66c5db50 100644
--- a/drivers/spi/spi-dw.c
+++ b/drivers/spi/spi-dw-core.c
@@ -24,74 +24,34 @@ struct chip_data {
 	u8 tmode;		/* TR/TO/RO/EEPROM */
 	u8 type;		/* SPI/SSP/MicroWire */
 
-	u8 poll_mode;		/* 1 means use poll mode */
-
 	u16 clk_div;		/* baud rate divider */
 	u32 speed_hz;		/* baud rate */
-	void (*cs_control)(u32 command);
 };
 
 #ifdef CONFIG_DEBUG_FS
-#define SPI_REGS_BUFSIZE	1024
-static ssize_t dw_spi_show_regs(struct file *file, char __user *user_buf,
-		size_t count, loff_t *ppos)
-{
-	struct dw_spi *dws = file->private_data;
-	char *buf;
-	u32 len = 0;
-	ssize_t ret;
-
-	buf = kzalloc(SPI_REGS_BUFSIZE, GFP_KERNEL);
-	if (!buf)
-		return 0;
-
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"%s registers:\n", dev_name(&dws->master->dev));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"=================================\n");
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"CTRL0: \t\t0x%08x\n", dw_readl(dws, DW_SPI_CTRL0));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"CTRL1: \t\t0x%08x\n", dw_readl(dws, DW_SPI_CTRL1));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"SSIENR: \t0x%08x\n", dw_readl(dws, DW_SPI_SSIENR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"SER: \t\t0x%08x\n", dw_readl(dws, DW_SPI_SER));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"BAUDR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_BAUDR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"TXFTLR: \t0x%08x\n", dw_readl(dws, DW_SPI_TXFLTR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"RXFTLR: \t0x%08x\n", dw_readl(dws, DW_SPI_RXFLTR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"TXFLR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_TXFLR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"RXFLR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_RXFLR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"SR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_SR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"IMR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_IMR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"ISR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_ISR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"DMACR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_DMACR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"DMATDLR: \t0x%08x\n", dw_readl(dws, DW_SPI_DMATDLR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"DMARDLR: \t0x%08x\n", dw_readl(dws, DW_SPI_DMARDLR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"=================================\n");
-
-	ret = simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-	return ret;
+
+#define DW_SPI_DBGFS_REG(_name, _off)	\
+{					\
+	.name = _name,			\
+	.offset = _off,			\
 }
 
-static const struct file_operations dw_spi_regs_ops = {
-	.owner		= THIS_MODULE,
-	.open		= simple_open,
-	.read		= dw_spi_show_regs,
-	.llseek		= default_llseek,
+static const struct debugfs_reg32 dw_spi_dbgfs_regs[] = {
+	DW_SPI_DBGFS_REG("CTRLR0", DW_SPI_CTRLR0),
+	DW_SPI_DBGFS_REG("CTRLR1", DW_SPI_CTRLR1),
+	DW_SPI_DBGFS_REG("SSIENR", DW_SPI_SSIENR),
+	DW_SPI_DBGFS_REG("SER", DW_SPI_SER),
+	DW_SPI_DBGFS_REG("BAUDR", DW_SPI_BAUDR),
+	DW_SPI_DBGFS_REG("TXFTLR", DW_SPI_TXFTLR),
+	DW_SPI_DBGFS_REG("RXFTLR", DW_SPI_RXFTLR),
+	DW_SPI_DBGFS_REG("TXFLR", DW_SPI_TXFLR),
+	DW_SPI_DBGFS_REG("RXFLR", DW_SPI_RXFLR),
+	DW_SPI_DBGFS_REG("SR", DW_SPI_SR),
+	DW_SPI_DBGFS_REG("IMR", DW_SPI_IMR),
+	DW_SPI_DBGFS_REG("ISR", DW_SPI_ISR),
+	DW_SPI_DBGFS_REG("DMACR", DW_SPI_DMACR),
+	DW_SPI_DBGFS_REG("DMATDLR", DW_SPI_DMATDLR),
+	DW_SPI_DBGFS_REG("DMARDLR", DW_SPI_DMARDLR),
 };
 
 static int dw_spi_debugfs_init(struct dw_spi *dws)
@@ -103,8 +63,11 @@ static int dw_spi_debugfs_init(struct dw_spi *dws)
 	if (!dws->debugfs)
 		return -ENOMEM;
 
-	debugfs_create_file("registers", S_IFREG | S_IRUGO,
-		dws->debugfs, (void *)dws, &dw_spi_regs_ops);
+	dws->regset.regs = dw_spi_dbgfs_regs;
+	dws->regset.nregs = ARRAY_SIZE(dw_spi_dbgfs_regs);
+	dws->regset.base = dws->regs;
+	debugfs_create_regset32("registers", 0400, dws->debugfs, &dws->regset);
+
 	return 0;
 }
 
@@ -127,13 +90,16 @@ static inline void dw_spi_debugfs_remove(struct dw_spi *dws)
 void dw_spi_set_cs(struct spi_device *spi, bool enable)
 {
 	struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
-	struct chip_data *chip = spi_get_ctldata(spi);
-
-	/* Chip select logic is inverted from spi_set_cs() */
-	if (chip && chip->cs_control)
-		chip->cs_control(!enable);
+	bool cs_high = !!(spi->mode & SPI_CS_HIGH);
 
-	if (!enable)
+	/*
+	 * DW SPI controller demands any native CS being set in order to
+	 * proceed with data transfer. So in order to activate the SPI
+	 * communications we must set a corresponding bit in the Slave
+	 * Enable register no matter whether the SPI core is configured to
+	 * support active-high or active-low CS level.
+	 */
+	if (cs_high == enable)
 		dw_writel(dws, DW_SPI_SER, BIT(spi->chip_select));
 	else if (dws->cs_override)
 		dw_writel(dws, DW_SPI_SER, 0);
@@ -265,17 +231,56 @@ static irqreturn_t dw_spi_irq(int irq, void *dev_id)
 	return dws->transfer_handler(dws);
 }
 
-/* Must be called inside pump_transfers() */
-static int poll_transfer(struct dw_spi *dws)
+/* Configure CTRLR0 for DW_apb_ssi */
+u32 dw_spi_update_cr0(struct spi_controller *master, struct spi_device *spi,
+		      struct spi_transfer *transfer)
 {
-	do {
-		dw_writer(dws);
-		dw_reader(dws);
-		cpu_relax();
-	} while (dws->rx_end > dws->rx);
+	struct chip_data *chip = spi_get_ctldata(spi);
+	u32 cr0;
 
-	return 0;
+	/* Default SPI mode is SCPOL = 0, SCPH = 0 */
+	cr0 = (transfer->bits_per_word - 1)
+		| (chip->type << SPI_FRF_OFFSET)
+		| ((((spi->mode & SPI_CPOL) ? 1 : 0) << SPI_SCOL_OFFSET) |
+		   (((spi->mode & SPI_CPHA) ? 1 : 0) << SPI_SCPH_OFFSET) |
+		   (((spi->mode & SPI_LOOP) ? 1 : 0) << SPI_SRL_OFFSET))
+		| (chip->tmode << SPI_TMOD_OFFSET);
+
+	return cr0;
+}
+EXPORT_SYMBOL_GPL(dw_spi_update_cr0);
+
+/* Configure CTRLR0 for DWC_ssi */
+u32 dw_spi_update_cr0_v1_01a(struct spi_controller *master,
+			     struct spi_device *spi,
+			     struct spi_transfer *transfer)
+{
+	struct chip_data *chip = spi_get_ctldata(spi);
+	u32 cr0;
+
+	/* CTRLR0[ 4: 0] Data Frame Size */
+	cr0 = (transfer->bits_per_word - 1);
+
+	/* CTRLR0[ 7: 6] Frame Format */
+	cr0 |= chip->type << DWC_SSI_CTRLR0_FRF_OFFSET;
+
+	/*
+	 * SPI mode (SCPOL|SCPH)
+	 * CTRLR0[ 8] Serial Clock Phase
+	 * CTRLR0[ 9] Serial Clock Polarity
+	 */
+	cr0 |= ((spi->mode & SPI_CPOL) ? 1 : 0) << DWC_SSI_CTRLR0_SCPOL_OFFSET;
+	cr0 |= ((spi->mode & SPI_CPHA) ? 1 : 0) << DWC_SSI_CTRLR0_SCPH_OFFSET;
+
+	/* CTRLR0[11:10] Transfer Mode */
+	cr0 |= chip->tmode << DWC_SSI_CTRLR0_TMOD_OFFSET;
+
+	/* CTRLR0[13] Shift Register Loop */
+	cr0 |= ((spi->mode & SPI_LOOP) ? 1 : 0) << DWC_SSI_CTRLR0_SRL_OFFSET;
+
+	return cr0;
 }
+EXPORT_SYMBOL_GPL(dw_spi_update_cr0_v1_01a);
 
 static int dw_spi_transfer_one(struct spi_controller *master,
 		struct spi_device *spi, struct spi_transfer *transfer)
@@ -313,34 +318,11 @@ static int dw_spi_transfer_one(struct spi_controller *master,
 		spi_set_clk(dws, chip->clk_div);
 	}
 
+	transfer->effective_speed_hz = dws->max_freq / chip->clk_div;
 	dws->n_bytes = DIV_ROUND_UP(transfer->bits_per_word, BITS_PER_BYTE);
-	dws->dma_width = DIV_ROUND_UP(transfer->bits_per_word, BITS_PER_BYTE);
-
-	/* Default SPI mode is SCPOL = 0, SCPH = 0 */
-	cr0 = (transfer->bits_per_word - 1)
-		| (chip->type << SPI_FRF_OFFSET)
-		| ((((spi->mode & SPI_CPOL) ? 1 : 0) << SPI_SCOL_OFFSET) |
-			(((spi->mode & SPI_CPHA) ? 1 : 0) << SPI_SCPH_OFFSET) |
-			(((spi->mode & SPI_LOOP) ? 1 : 0) << SPI_SRL_OFFSET))
-		| (chip->tmode << SPI_TMOD_OFFSET);
 
-	/*
-	 * Adjust transfer mode if necessary. Requires platform dependent
-	 * chipselect mechanism.
-	 */
-	if (chip->cs_control) {
-		if (dws->rx && dws->tx)
-			chip->tmode = SPI_TMOD_TR;
-		else if (dws->rx)
-			chip->tmode = SPI_TMOD_RO;
-		else
-			chip->tmode = SPI_TMOD_TO;
-
-		cr0 &= ~SPI_TMOD_MASK;
-		cr0 |= (chip->tmode << SPI_TMOD_OFFSET);
-	}
-
-	dw_writel(dws, DW_SPI_CTRL0, cr0);
+	cr0 = dws->update_cr0(master, spi, transfer);
+	dw_writel(dws, DW_SPI_CTRLR0, cr0);
 
 	/* Check if current transfer is a DMA transaction */
 	if (master->can_dma && master->can_dma(master, spi, transfer))
@@ -359,9 +341,9 @@ static int dw_spi_transfer_one(struct spi_controller *master,
 			spi_enable_chip(dws, 1);
 			return ret;
 		}
-	} else if (!chip->poll_mode) {
+	} else {
 		txlevel = min_t(u16, dws->fifo_len / 2, dws->len / dws->n_bytes);
-		dw_writel(dws, DW_SPI_TXFLTR, txlevel);
+		dw_writel(dws, DW_SPI_TXFTLR, txlevel);
 
 		/* Set the interrupt mask */
 		imask |= SPI_INT_TXEI | SPI_INT_TXOI |
@@ -373,14 +355,8 @@ static int dw_spi_transfer_one(struct spi_controller *master,
 
 	spi_enable_chip(dws, 1);
 
-	if (dws->dma_mapped) {
-		ret = dws->dma_ops->dma_transfer(dws, transfer);
-		if (ret < 0)
-			return ret;
-	}
-
-	if (chip->poll_mode)
-		return poll_transfer(dws);
+	if (dws->dma_mapped)
+		return dws->dma_ops->dma_transfer(dws, transfer);
 
 	return 1;
 }
@@ -399,7 +375,6 @@ static void dw_spi_handle_err(struct spi_controller *master,
 /* This may be called twice for each spi dev */
 static int dw_spi_setup(struct spi_device *spi)
 {
-	struct dw_spi_chip *chip_info = NULL;
 	struct chip_data *chip;
 
 	/* Only alloc on first setup */
@@ -411,21 +386,6 @@ static int dw_spi_setup(struct spi_device *spi)
 		spi_set_ctldata(spi, chip);
 	}
 
-	/*
-	 * Protocol drivers may change the chip settings, so...
-	 * if chip_info exists, use it
-	 */
-	chip_info = spi->controller_data;
-
-	/* chip_info doesn't always exist */
-	if (chip_info) {
-		if (chip_info->cs_control)
-			chip->cs_control = chip_info->cs_control;
-
-		chip->poll_mode = chip_info->poll_mode;
-		chip->type = chip_info->type;
-	}
-
 	chip->tmode = SPI_TMOD_TR;
 
 	return 0;
@@ -452,11 +412,11 @@ static void spi_hw_init(struct device *dev, struct dw_spi *dws)
 		u32 fifo;
 
 		for (fifo = 1; fifo < 256; fifo++) {
-			dw_writel(dws, DW_SPI_TXFLTR, fifo);
-			if (fifo != dw_readl(dws, DW_SPI_TXFLTR))
+			dw_writel(dws, DW_SPI_TXFTLR, fifo);
+			if (fifo != dw_readl(dws, DW_SPI_TXFTLR))
 				break;
 		}
-		dw_writel(dws, DW_SPI_TXFLTR, 0);
+		dw_writel(dws, DW_SPI_TXFTLR, 0);
 
 		dws->fifo_len = (fifo == 1) ? 0 : fifo;
 		dev_dbg(dev, "Detected FIFO size: %u bytes\n", dws->fifo_len);
@@ -481,7 +441,6 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
 
 	dws->master = master;
 	dws->type = SSI_MOTO_SPI;
-	dws->dma_inited = 0;
 	dws->dma_addr = (dma_addr_t)(dws->paddr + DW_SPI_DR);
 	spin_lock_init(&dws->buf_lock);
 
@@ -517,16 +476,16 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
 	spi_hw_init(dev, dws);
 
 	if (dws->dma_ops && dws->dma_ops->dma_init) {
-		ret = dws->dma_ops->dma_init(dws);
+		ret = dws->dma_ops->dma_init(dev, dws);
 		if (ret) {
 			dev_warn(dev, "DMA init failed\n");
-			dws->dma_inited = 0;
 		} else {
 			master->can_dma = dws->dma_ops->can_dma;
+			master->flags |= SPI_CONTROLLER_MUST_TX;
 		}
 	}
 
-	ret = devm_spi_register_controller(dev, master);
+	ret = spi_register_controller(master);
 	if (ret) {
 		dev_err(&master->dev, "problem registering spi master\n");
 		goto err_dma_exit;
@@ -550,6 +509,8 @@ void dw_spi_remove_host(struct dw_spi *dws)
 {
 	dw_spi_debugfs_remove(dws);
 
+	spi_unregister_controller(dws->master);
+
 	if (dws->dma_ops && dws->dma_ops->dma_exit)
 		dws->dma_ops->dma_exit(dws);
 
diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c
new file mode 100644
index 000000000000..5986c520b196
--- /dev/null
+++ b/drivers/spi/spi-dw-dma.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Special handling for DW DMA core
+ *
+ * Copyright (c) 2009, 2014 Intel Corporation.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/irqreturn.h>
+#include <linux/jiffies.h>
+#include <linux/pci.h>
+#include <linux/platform_data/dma-dw.h>
+#include <linux/spi/spi.h>
+#include <linux/types.h>
+
+#include "spi-dw.h"
+
+#define WAIT_RETRIES	5
+#define RX_BUSY		0
+#define RX_BURST_LEVEL	16
+#define TX_BUSY		1
+#define TX_BURST_LEVEL	16
+
+static bool dw_spi_dma_chan_filter(struct dma_chan *chan, void *param)
+{
+	struct dw_dma_slave *s = param;
+
+	if (s->dma_dev != chan->device->dev)
+		return false;
+
+	chan->private = s;
+	return true;
+}
+
+static void dw_spi_dma_maxburst_init(struct dw_spi *dws)
+{
+	struct dma_slave_caps caps;
+	u32 max_burst, def_burst;
+	int ret;
+
+	def_burst = dws->fifo_len / 2;
+
+	ret = dma_get_slave_caps(dws->rxchan, &caps);
+	if (!ret && caps.max_burst)
+		max_burst = caps.max_burst;
+	else
+		max_burst = RX_BURST_LEVEL;
+
+	dws->rxburst = min(max_burst, def_burst);
+
+	ret = dma_get_slave_caps(dws->txchan, &caps);
+	if (!ret && caps.max_burst)
+		max_burst = caps.max_burst;
+	else
+		max_burst = TX_BURST_LEVEL;
+
+	dws->txburst = min(max_burst, def_burst);
+}
+
+static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws)
+{
+	struct dw_dma_slave dma_tx = { .dst_id = 1 }, *tx = &dma_tx;
+	struct dw_dma_slave dma_rx = { .src_id = 0 }, *rx = &dma_rx;
+	struct pci_dev *dma_dev;
+	dma_cap_mask_t mask;
+
+	/*
+	 * Get pci device for DMA controller, currently it could only
+	 * be the DMA controller of Medfield
+	 */
+	dma_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x0827, NULL);
+	if (!dma_dev)
+		return -ENODEV;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/* 1. Init rx channel */
+	rx->dma_dev = &dma_dev->dev;
+	dws->rxchan = dma_request_channel(mask, dw_spi_dma_chan_filter, rx);
+	if (!dws->rxchan)
+		goto err_exit;
+
+	/* 2. Init tx channel */
+	tx->dma_dev = &dma_dev->dev;
+	dws->txchan = dma_request_channel(mask, dw_spi_dma_chan_filter, tx);
+	if (!dws->txchan)
+		goto free_rxchan;
+
+	dws->master->dma_rx = dws->rxchan;
+	dws->master->dma_tx = dws->txchan;
+
+	init_completion(&dws->dma_completion);
+
+	dw_spi_dma_maxburst_init(dws);
+
+	return 0;
+
+free_rxchan:
+	dma_release_channel(dws->rxchan);
+	dws->rxchan = NULL;
+err_exit:
+	return -EBUSY;
+}
+
+static int dw_spi_dma_init_generic(struct device *dev, struct dw_spi *dws)
+{
+	dws->rxchan = dma_request_slave_channel(dev, "rx");
+	if (!dws->rxchan)
+		return -ENODEV;
+
+	dws->txchan = dma_request_slave_channel(dev, "tx");
+	if (!dws->txchan) {
+		dma_release_channel(dws->rxchan);
+		dws->rxchan = NULL;
+		return -ENODEV;
+	}
+
+	dws->master->dma_rx = dws->rxchan;
+	dws->master->dma_tx = dws->txchan;
+
+	init_completion(&dws->dma_completion);
+
+	dw_spi_dma_maxburst_init(dws);
+
+	return 0;
+}
+
+static void dw_spi_dma_exit(struct dw_spi *dws)
+{
+	if (dws->txchan) {
+		dmaengine_terminate_sync(dws->txchan);
+		dma_release_channel(dws->txchan);
+	}
+
+	if (dws->rxchan) {
+		dmaengine_terminate_sync(dws->rxchan);
+		dma_release_channel(dws->rxchan);
+	}
+
+	dw_writel(dws, DW_SPI_DMACR, 0);
+}
+
+static irqreturn_t dw_spi_dma_transfer_handler(struct dw_spi *dws)
+{
+	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
+
+	if (!irq_status)
+		return IRQ_NONE;
+
+	dw_readl(dws, DW_SPI_ICR);
+	spi_reset_chip(dws);
+
+	dev_err(&dws->master->dev, "%s: FIFO overrun/underrun\n", __func__);
+	dws->master->cur_msg->status = -EIO;
+	complete(&dws->dma_completion);
+	return IRQ_HANDLED;
+}
+
+static bool dw_spi_can_dma(struct spi_controller *master,
+			   struct spi_device *spi, struct spi_transfer *xfer)
+{
+	struct dw_spi *dws = spi_controller_get_devdata(master);
+
+	return xfer->len > dws->fifo_len;
+}
+
+static enum dma_slave_buswidth dw_spi_dma_convert_width(u8 n_bytes)
+{
+	if (n_bytes == 1)
+		return DMA_SLAVE_BUSWIDTH_1_BYTE;
+	else if (n_bytes == 2)
+		return DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+	return DMA_SLAVE_BUSWIDTH_UNDEFINED;
+}
+
+static int dw_spi_dma_wait(struct dw_spi *dws, struct spi_transfer *xfer)
+{
+	unsigned long long ms;
+
+	ms = xfer->len * MSEC_PER_SEC * BITS_PER_BYTE;
+	do_div(ms, xfer->effective_speed_hz);
+	ms += ms + 200;
+
+	if (ms > UINT_MAX)
+		ms = UINT_MAX;
+
+	ms = wait_for_completion_timeout(&dws->dma_completion,
+					 msecs_to_jiffies(ms));
+
+	if (ms == 0) {
+		dev_err(&dws->master->cur_msg->spi->dev,
+			"DMA transaction timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static inline bool dw_spi_dma_tx_busy(struct dw_spi *dws)
+{
+	return !(dw_readl(dws, DW_SPI_SR) & SR_TF_EMPT);
+}
+
+static int dw_spi_dma_wait_tx_done(struct dw_spi *dws,
+				   struct spi_transfer *xfer)
+{
+	int retry = WAIT_RETRIES;
+	struct spi_delay delay;
+	u32 nents;
+
+	nents = dw_readl(dws, DW_SPI_TXFLR);
+	delay.unit = SPI_DELAY_UNIT_SCK;
+	delay.value = nents * dws->n_bytes * BITS_PER_BYTE;
+
+	while (dw_spi_dma_tx_busy(dws) && retry--)
+		spi_delay_exec(&delay, xfer);
+
+	if (retry < 0) {
+		dev_err(&dws->master->dev, "Tx hanged up\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/*
+ * dws->dma_chan_busy is set before the dma transfer starts, callback for tx
+ * channel will clear a corresponding bit.
+ */
+static void dw_spi_dma_tx_done(void *arg)
+{
+	struct dw_spi *dws = arg;
+
+	clear_bit(TX_BUSY, &dws->dma_chan_busy);
+	if (test_bit(RX_BUSY, &dws->dma_chan_busy))
+		return;
+
+	dw_writel(dws, DW_SPI_DMACR, 0);
+	complete(&dws->dma_completion);
+}
+
+static struct dma_async_tx_descriptor *
+dw_spi_dma_prepare_tx(struct dw_spi *dws, struct spi_transfer *xfer)
+{
+	struct dma_slave_config txconf;
+	struct dma_async_tx_descriptor *txdesc;
+
+	if (!xfer->tx_buf)
+		return NULL;
+
+	memset(&txconf, 0, sizeof(txconf));
+	txconf.direction = DMA_MEM_TO_DEV;
+	txconf.dst_addr = dws->dma_addr;
+	txconf.dst_maxburst = dws->txburst;
+	txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	txconf.dst_addr_width = dw_spi_dma_convert_width(dws->n_bytes);
+	txconf.device_fc = false;
+
+	dmaengine_slave_config(dws->txchan, &txconf);
+
+	txdesc = dmaengine_prep_slave_sg(dws->txchan,
+				xfer->tx_sg.sgl,
+				xfer->tx_sg.nents,
+				DMA_MEM_TO_DEV,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!txdesc)
+		return NULL;
+
+	txdesc->callback = dw_spi_dma_tx_done;
+	txdesc->callback_param = dws;
+
+	return txdesc;
+}
+
+static inline bool dw_spi_dma_rx_busy(struct dw_spi *dws)
+{
+	return !!(dw_readl(dws, DW_SPI_SR) & SR_RF_NOT_EMPT);
+}
+
+static int dw_spi_dma_wait_rx_done(struct dw_spi *dws)
+{
+	int retry = WAIT_RETRIES;
+	struct spi_delay delay;
+	unsigned long ns, us;
+	u32 nents;
+
+	/*
+	 * It's unlikely that DMA engine is still doing the data fetching, but
+	 * if it's let's give it some reasonable time. The timeout calculation
+	 * is based on the synchronous APB/SSI reference clock rate, on a
+	 * number of data entries left in the Rx FIFO, times a number of clock
+	 * periods normally needed for a single APB read/write transaction
+	 * without PREADY signal utilized (which is true for the DW APB SSI
+	 * controller).
+	 */
+	nents = dw_readl(dws, DW_SPI_RXFLR);
+	ns = 4U * NSEC_PER_SEC / dws->max_freq * nents;
+	if (ns <= NSEC_PER_USEC) {
+		delay.unit = SPI_DELAY_UNIT_NSECS;
+		delay.value = ns;
+	} else {
+		us = DIV_ROUND_UP(ns, NSEC_PER_USEC);
+		delay.unit = SPI_DELAY_UNIT_USECS;
+		delay.value = clamp_val(us, 0, USHRT_MAX);
+	}
+
+	while (dw_spi_dma_rx_busy(dws) && retry--)
+		spi_delay_exec(&delay, NULL);
+
+	if (retry < 0) {
+		dev_err(&dws->master->dev, "Rx hanged up\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/*
+ * dws->dma_chan_busy is set before the dma transfer starts, callback for rx
+ * channel will clear a corresponding bit.
+ */
+static void dw_spi_dma_rx_done(void *arg)
+{
+	struct dw_spi *dws = arg;
+
+	clear_bit(RX_BUSY, &dws->dma_chan_busy);
+	if (test_bit(TX_BUSY, &dws->dma_chan_busy))
+		return;
+
+	dw_writel(dws, DW_SPI_DMACR, 0);
+	complete(&dws->dma_completion);
+}
+
+static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws,
+		struct spi_transfer *xfer)
+{
+	struct dma_slave_config rxconf;
+	struct dma_async_tx_descriptor *rxdesc;
+
+	if (!xfer->rx_buf)
+		return NULL;
+
+	memset(&rxconf, 0, sizeof(rxconf));
+	rxconf.direction = DMA_DEV_TO_MEM;
+	rxconf.src_addr = dws->dma_addr;
+	rxconf.src_maxburst = dws->rxburst;
+	rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	rxconf.src_addr_width = dw_spi_dma_convert_width(dws->n_bytes);
+	rxconf.device_fc = false;
+
+	dmaengine_slave_config(dws->rxchan, &rxconf);
+
+	rxdesc = dmaengine_prep_slave_sg(dws->rxchan,
+				xfer->rx_sg.sgl,
+				xfer->rx_sg.nents,
+				DMA_DEV_TO_MEM,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!rxdesc)
+		return NULL;
+
+	rxdesc->callback = dw_spi_dma_rx_done;
+	rxdesc->callback_param = dws;
+
+	return rxdesc;
+}
+
+static int dw_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
+{
+	u16 imr = 0, dma_ctrl = 0;
+
+	dw_writel(dws, DW_SPI_DMARDLR, dws->rxburst - 1);
+	dw_writel(dws, DW_SPI_DMATDLR, dws->fifo_len - dws->txburst);
+
+	if (xfer->tx_buf)
+		dma_ctrl |= SPI_DMA_TDMAE;
+	if (xfer->rx_buf)
+		dma_ctrl |= SPI_DMA_RDMAE;
+	dw_writel(dws, DW_SPI_DMACR, dma_ctrl);
+
+	/* Set the interrupt mask */
+	if (xfer->tx_buf)
+		imr |= SPI_INT_TXOI;
+	if (xfer->rx_buf)
+		imr |= SPI_INT_RXUI | SPI_INT_RXOI;
+	spi_umask_intr(dws, imr);
+
+	reinit_completion(&dws->dma_completion);
+
+	dws->transfer_handler = dw_spi_dma_transfer_handler;
+
+	return 0;
+}
+
+static int dw_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
+{
+	struct dma_async_tx_descriptor *txdesc, *rxdesc;
+	int ret;
+
+	/* Prepare the TX dma transfer */
+	txdesc = dw_spi_dma_prepare_tx(dws, xfer);
+
+	/* Prepare the RX dma transfer */
+	rxdesc = dw_spi_dma_prepare_rx(dws, xfer);
+
+	/* rx must be started before tx due to spi instinct */
+	if (rxdesc) {
+		set_bit(RX_BUSY, &dws->dma_chan_busy);
+		dmaengine_submit(rxdesc);
+		dma_async_issue_pending(dws->rxchan);
+	}
+
+	if (txdesc) {
+		set_bit(TX_BUSY, &dws->dma_chan_busy);
+		dmaengine_submit(txdesc);
+		dma_async_issue_pending(dws->txchan);
+	}
+
+	ret = dw_spi_dma_wait(dws, xfer);
+	if (ret)
+		return ret;
+
+	if (txdesc && dws->master->cur_msg->status == -EINPROGRESS) {
+		ret = dw_spi_dma_wait_tx_done(dws, xfer);
+		if (ret)
+			return ret;
+	}
+
+	if (rxdesc && dws->master->cur_msg->status == -EINPROGRESS)
+		ret = dw_spi_dma_wait_rx_done(dws);
+
+	return ret;
+}
+
+static void dw_spi_dma_stop(struct dw_spi *dws)
+{
+	if (test_bit(TX_BUSY, &dws->dma_chan_busy)) {
+		dmaengine_terminate_sync(dws->txchan);
+		clear_bit(TX_BUSY, &dws->dma_chan_busy);
+	}
+	if (test_bit(RX_BUSY, &dws->dma_chan_busy)) {
+		dmaengine_terminate_sync(dws->rxchan);
+		clear_bit(RX_BUSY, &dws->dma_chan_busy);
+	}
+
+	dw_writel(dws, DW_SPI_DMACR, 0);
+}
+
+static const struct dw_spi_dma_ops dw_spi_dma_mfld_ops = {
+	.dma_init	= dw_spi_dma_init_mfld,
+	.dma_exit	= dw_spi_dma_exit,
+	.dma_setup	= dw_spi_dma_setup,
+	.can_dma	= dw_spi_can_dma,
+	.dma_transfer	= dw_spi_dma_transfer,
+	.dma_stop	= dw_spi_dma_stop,
+};
+
+void dw_spi_dma_setup_mfld(struct dw_spi *dws)
+{
+	dws->dma_ops = &dw_spi_dma_mfld_ops;
+}
+EXPORT_SYMBOL_GPL(dw_spi_dma_setup_mfld);
+
+static const struct dw_spi_dma_ops dw_spi_dma_generic_ops = {
+	.dma_init	= dw_spi_dma_init_generic,
+	.dma_exit	= dw_spi_dma_exit,
+	.dma_setup	= dw_spi_dma_setup,
+	.can_dma	= dw_spi_can_dma,
+	.dma_transfer	= dw_spi_dma_transfer,
+	.dma_stop	= dw_spi_dma_stop,
+};
+
+void dw_spi_dma_setup_generic(struct dw_spi *dws)
+{
+	dws->dma_ops = &dw_spi_dma_generic_ops;
+}
+EXPORT_SYMBOL_GPL(dw_spi_dma_setup_generic);
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
deleted file mode 100644
index 0d86c37e0aeb..000000000000
--- a/drivers/spi/spi-dw-mid.c
+++ /dev/null
@@ -1,322 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Special handling for DW core on Intel MID platform
- *
- * Copyright (c) 2009, 2014 Intel Corporation.
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/dmaengine.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/spi/spi.h>
-#include <linux/types.h>
-
-#include "spi-dw.h"
-
-#ifdef CONFIG_SPI_DW_MID_DMA
-#include <linux/pci.h>
-#include <linux/platform_data/dma-dw.h>
-
-#define RX_BUSY		0
-#define TX_BUSY		1
-
-static struct dw_dma_slave mid_dma_tx = { .dst_id = 1 };
-static struct dw_dma_slave mid_dma_rx = { .src_id = 0 };
-
-static bool mid_spi_dma_chan_filter(struct dma_chan *chan, void *param)
-{
-	struct dw_dma_slave *s = param;
-
-	if (s->dma_dev != chan->device->dev)
-		return false;
-
-	chan->private = s;
-	return true;
-}
-
-static int mid_spi_dma_init(struct dw_spi *dws)
-{
-	struct pci_dev *dma_dev;
-	struct dw_dma_slave *tx = dws->dma_tx;
-	struct dw_dma_slave *rx = dws->dma_rx;
-	dma_cap_mask_t mask;
-
-	/*
-	 * Get pci device for DMA controller, currently it could only
-	 * be the DMA controller of Medfield
-	 */
-	dma_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x0827, NULL);
-	if (!dma_dev)
-		return -ENODEV;
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
-
-	/* 1. Init rx channel */
-	rx->dma_dev = &dma_dev->dev;
-	dws->rxchan = dma_request_channel(mask, mid_spi_dma_chan_filter, rx);
-	if (!dws->rxchan)
-		goto err_exit;
-	dws->master->dma_rx = dws->rxchan;
-
-	/* 2. Init tx channel */
-	tx->dma_dev = &dma_dev->dev;
-	dws->txchan = dma_request_channel(mask, mid_spi_dma_chan_filter, tx);
-	if (!dws->txchan)
-		goto free_rxchan;
-	dws->master->dma_tx = dws->txchan;
-
-	dws->dma_inited = 1;
-	return 0;
-
-free_rxchan:
-	dma_release_channel(dws->rxchan);
-err_exit:
-	return -EBUSY;
-}
-
-static void mid_spi_dma_exit(struct dw_spi *dws)
-{
-	if (!dws->dma_inited)
-		return;
-
-	dmaengine_terminate_sync(dws->txchan);
-	dma_release_channel(dws->txchan);
-
-	dmaengine_terminate_sync(dws->rxchan);
-	dma_release_channel(dws->rxchan);
-}
-
-static irqreturn_t dma_transfer(struct dw_spi *dws)
-{
-	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
-
-	if (!irq_status)
-		return IRQ_NONE;
-
-	dw_readl(dws, DW_SPI_ICR);
-	spi_reset_chip(dws);
-
-	dev_err(&dws->master->dev, "%s: FIFO overrun/underrun\n", __func__);
-	dws->master->cur_msg->status = -EIO;
-	spi_finalize_current_transfer(dws->master);
-	return IRQ_HANDLED;
-}
-
-static bool mid_spi_can_dma(struct spi_controller *master,
-		struct spi_device *spi, struct spi_transfer *xfer)
-{
-	struct dw_spi *dws = spi_controller_get_devdata(master);
-
-	if (!dws->dma_inited)
-		return false;
-
-	return xfer->len > dws->fifo_len;
-}
-
-static enum dma_slave_buswidth convert_dma_width(u32 dma_width) {
-	if (dma_width == 1)
-		return DMA_SLAVE_BUSWIDTH_1_BYTE;
-	else if (dma_width == 2)
-		return DMA_SLAVE_BUSWIDTH_2_BYTES;
-
-	return DMA_SLAVE_BUSWIDTH_UNDEFINED;
-}
-
-/*
- * dws->dma_chan_busy is set before the dma transfer starts, callback for tx
- * channel will clear a corresponding bit.
- */
-static void dw_spi_dma_tx_done(void *arg)
-{
-	struct dw_spi *dws = arg;
-
-	clear_bit(TX_BUSY, &dws->dma_chan_busy);
-	if (test_bit(RX_BUSY, &dws->dma_chan_busy))
-		return;
-	spi_finalize_current_transfer(dws->master);
-}
-
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws,
-		struct spi_transfer *xfer)
-{
-	struct dma_slave_config txconf;
-	struct dma_async_tx_descriptor *txdesc;
-
-	if (!xfer->tx_buf)
-		return NULL;
-
-	txconf.direction = DMA_MEM_TO_DEV;
-	txconf.dst_addr = dws->dma_addr;
-	txconf.dst_maxburst = 16;
-	txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	txconf.dst_addr_width = convert_dma_width(dws->dma_width);
-	txconf.device_fc = false;
-
-	dmaengine_slave_config(dws->txchan, &txconf);
-
-	txdesc = dmaengine_prep_slave_sg(dws->txchan,
-				xfer->tx_sg.sgl,
-				xfer->tx_sg.nents,
-				DMA_MEM_TO_DEV,
-				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	if (!txdesc)
-		return NULL;
-
-	txdesc->callback = dw_spi_dma_tx_done;
-	txdesc->callback_param = dws;
-
-	return txdesc;
-}
-
-/*
- * dws->dma_chan_busy is set before the dma transfer starts, callback for rx
- * channel will clear a corresponding bit.
- */
-static void dw_spi_dma_rx_done(void *arg)
-{
-	struct dw_spi *dws = arg;
-
-	clear_bit(RX_BUSY, &dws->dma_chan_busy);
-	if (test_bit(TX_BUSY, &dws->dma_chan_busy))
-		return;
-	spi_finalize_current_transfer(dws->master);
-}
-
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws,
-		struct spi_transfer *xfer)
-{
-	struct dma_slave_config rxconf;
-	struct dma_async_tx_descriptor *rxdesc;
-
-	if (!xfer->rx_buf)
-		return NULL;
-
-	rxconf.direction = DMA_DEV_TO_MEM;
-	rxconf.src_addr = dws->dma_addr;
-	rxconf.src_maxburst = 16;
-	rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	rxconf.src_addr_width = convert_dma_width(dws->dma_width);
-	rxconf.device_fc = false;
-
-	dmaengine_slave_config(dws->rxchan, &rxconf);
-
-	rxdesc = dmaengine_prep_slave_sg(dws->rxchan,
-				xfer->rx_sg.sgl,
-				xfer->rx_sg.nents,
-				DMA_DEV_TO_MEM,
-				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	if (!rxdesc)
-		return NULL;
-
-	rxdesc->callback = dw_spi_dma_rx_done;
-	rxdesc->callback_param = dws;
-
-	return rxdesc;
-}
-
-static int mid_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
-{
-	u16 dma_ctrl = 0;
-
-	dw_writel(dws, DW_SPI_DMARDLR, 0xf);
-	dw_writel(dws, DW_SPI_DMATDLR, 0x10);
-
-	if (xfer->tx_buf)
-		dma_ctrl |= SPI_DMA_TDMAE;
-	if (xfer->rx_buf)
-		dma_ctrl |= SPI_DMA_RDMAE;
-	dw_writel(dws, DW_SPI_DMACR, dma_ctrl);
-
-	/* Set the interrupt mask */
-	spi_umask_intr(dws, SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI);
-
-	dws->transfer_handler = dma_transfer;
-
-	return 0;
-}
-
-static int mid_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
-{
-	struct dma_async_tx_descriptor *txdesc, *rxdesc;
-
-	/* Prepare the TX dma transfer */
-	txdesc = dw_spi_dma_prepare_tx(dws, xfer);
-
-	/* Prepare the RX dma transfer */
-	rxdesc = dw_spi_dma_prepare_rx(dws, xfer);
-
-	/* rx must be started before tx due to spi instinct */
-	if (rxdesc) {
-		set_bit(RX_BUSY, &dws->dma_chan_busy);
-		dmaengine_submit(rxdesc);
-		dma_async_issue_pending(dws->rxchan);
-	}
-
-	if (txdesc) {
-		set_bit(TX_BUSY, &dws->dma_chan_busy);
-		dmaengine_submit(txdesc);
-		dma_async_issue_pending(dws->txchan);
-	}
-
-	return 0;
-}
-
-static void mid_spi_dma_stop(struct dw_spi *dws)
-{
-	if (test_bit(TX_BUSY, &dws->dma_chan_busy)) {
-		dmaengine_terminate_sync(dws->txchan);
-		clear_bit(TX_BUSY, &dws->dma_chan_busy);
-	}
-	if (test_bit(RX_BUSY, &dws->dma_chan_busy)) {
-		dmaengine_terminate_sync(dws->rxchan);
-		clear_bit(RX_BUSY, &dws->dma_chan_busy);
-	}
-}
-
-static const struct dw_spi_dma_ops mid_dma_ops = {
-	.dma_init	= mid_spi_dma_init,
-	.dma_exit	= mid_spi_dma_exit,
-	.dma_setup	= mid_spi_dma_setup,
-	.can_dma	= mid_spi_can_dma,
-	.dma_transfer	= mid_spi_dma_transfer,
-	.dma_stop	= mid_spi_dma_stop,
-};
-#endif
-
-/* Some specific info for SPI0 controller on Intel MID */
-
-/* HW info for MRST Clk Control Unit, 32b reg per controller */
-#define MRST_SPI_CLK_BASE	100000000	/* 100m */
-#define MRST_CLK_SPI_REG	0xff11d86c
-#define CLK_SPI_BDIV_OFFSET	0
-#define CLK_SPI_BDIV_MASK	0x00000007
-#define CLK_SPI_CDIV_OFFSET	9
-#define CLK_SPI_CDIV_MASK	0x00000e00
-#define CLK_SPI_DISABLE_OFFSET	8
-
-int dw_spi_mid_init(struct dw_spi *dws)
-{
-	void __iomem *clk_reg;
-	u32 clk_cdiv;
-
-	clk_reg = ioremap(MRST_CLK_SPI_REG, 16);
-	if (!clk_reg)
-		return -ENOMEM;
-
-	/* Get SPI controller operating freq info */
-	clk_cdiv = readl(clk_reg + dws->bus_num * sizeof(u32));
-	clk_cdiv &= CLK_SPI_CDIV_MASK;
-	clk_cdiv >>= CLK_SPI_CDIV_OFFSET;
-	dws->max_freq = MRST_SPI_CLK_BASE / (clk_cdiv + 1);
-
-	iounmap(clk_reg);
-
-#ifdef CONFIG_SPI_DW_MID_DMA
-	dws->dma_tx = &mid_dma_tx;
-	dws->dma_rx = &mid_dma_rx;
-	dws->dma_ops = &mid_dma_ops;
-#endif
-	return 0;
-}
diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index 384a3ab6dc2d..403403deae66 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -7,7 +7,6 @@
 
 #include <linux/clk.h>
 #include <linux/err.h>
-#include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
@@ -20,6 +19,7 @@
 #include <linux/acpi.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
+#include <linux/reset.h>
 
 #include "spi-dw.h"
 
@@ -30,6 +30,7 @@ struct dw_spi_mmio {
 	struct clk     *clk;
 	struct clk     *pclk;
 	void           *priv;
+	struct reset_control *rstc;
 };
 
 #define MSCC_CPU_SYSTEM_CTRL_GENERAL_CTRL	0x24
@@ -44,6 +45,13 @@ struct dw_spi_mmio {
 #define MSCC_SPI_MST_SW_MODE_SW_PIN_CTRL_MODE	BIT(13)
 #define MSCC_SPI_MST_SW_MODE_SW_SPI_CS(x)	(x << 5)
 
+/*
+ * For Keem Bay, CTRLR0[31] is used to select controller mode.
+ * 0: SSI is slave
+ * 1: SSI is master
+ */
+#define KEEMBAY_CTRLR0_SSIC_IS_MST		BIT(31)
+
 struct dw_spi_mscc {
 	struct regmap       *syscon;
 	void __iomem        *spi_mst;
@@ -106,6 +114,9 @@ static int dw_spi_mscc_init(struct platform_device *pdev,
 	dwsmmio->dws.set_cs = dw_spi_mscc_set_cs;
 	dwsmmio->priv = dwsmscc;
 
+	/* Register hook to configure CTRLR0 */
+	dwsmmio->dws.update_cr0 = dw_spi_update_cr0;
+
 	return 0;
 }
 
@@ -128,6 +139,49 @@ static int dw_spi_alpine_init(struct platform_device *pdev,
 {
 	dwsmmio->dws.cs_override = 1;
 
+	/* Register hook to configure CTRLR0 */
+	dwsmmio->dws.update_cr0 = dw_spi_update_cr0;
+
+	return 0;
+}
+
+static int dw_spi_dw_apb_init(struct platform_device *pdev,
+			      struct dw_spi_mmio *dwsmmio)
+{
+	/* Register hook to configure CTRLR0 */
+	dwsmmio->dws.update_cr0 = dw_spi_update_cr0;
+
+	dw_spi_dma_setup_generic(&dwsmmio->dws);
+
+	return 0;
+}
+
+static int dw_spi_dwc_ssi_init(struct platform_device *pdev,
+			       struct dw_spi_mmio *dwsmmio)
+{
+	/* Register hook to configure CTRLR0 */
+	dwsmmio->dws.update_cr0 = dw_spi_update_cr0_v1_01a;
+
+	dw_spi_dma_setup_generic(&dwsmmio->dws);
+
+	return 0;
+}
+
+static u32 dw_spi_update_cr0_keembay(struct spi_controller *master,
+				     struct spi_device *spi,
+				     struct spi_transfer *transfer)
+{
+	u32 cr0 = dw_spi_update_cr0_v1_01a(master, spi, transfer);
+
+	return cr0 | KEEMBAY_CTRLR0_SSIC_IS_MST;
+}
+
+static int dw_spi_keembay_init(struct platform_device *pdev,
+			       struct dw_spi_mmio *dwsmmio)
+{
+	/* Register hook to configure CTRLR0 */
+	dwsmmio->dws.update_cr0 = dw_spi_update_cr0_keembay;
+
 	return 0;
 }
 
@@ -136,6 +190,7 @@ static int dw_spi_mmio_probe(struct platform_device *pdev)
 	int (*init_func)(struct platform_device *pdev,
 			 struct dw_spi_mmio *dwsmmio);
 	struct dw_spi_mmio *dwsmmio;
+	struct resource *mem;
 	struct dw_spi *dws;
 	int ret;
 	int num_cs;
@@ -148,11 +203,11 @@ static int dw_spi_mmio_probe(struct platform_device *pdev)
 	dws = &dwsmmio->dws;
 
 	/* Get basic io resource and map it */
-	dws->regs = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(dws->regs)) {
-		dev_err(&pdev->dev, "SPI region map failed\n");
+	dws->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &mem);
+	if (IS_ERR(dws->regs))
 		return PTR_ERR(dws->regs);
-	}
+
+	dws->paddr = mem->start;
 
 	dws->irq = platform_get_irq(pdev, 0);
 	if (dws->irq < 0)
@@ -175,6 +230,14 @@ static int dw_spi_mmio_probe(struct platform_device *pdev)
 	if (ret)
 		goto out_clk;
 
+	/* find an optional reset controller */
+	dwsmmio->rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, "spi");
+	if (IS_ERR(dwsmmio->rstc)) {
+		ret = PTR_ERR(dwsmmio->rstc);
+		goto out_clk;
+	}
+	reset_control_deassert(dwsmmio->rstc);
+
 	dws->bus_num = pdev->id;
 
 	dws->max_freq = clk_get_rate(dwsmmio->clk);
@@ -208,6 +271,8 @@ out:
 	clk_disable_unprepare(dwsmmio->pclk);
 out_clk:
 	clk_disable_unprepare(dwsmmio->clk);
+	reset_control_assert(dwsmmio->rstc);
+
 	return ret;
 }
 
@@ -219,25 +284,30 @@ static int dw_spi_mmio_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 	clk_disable_unprepare(dwsmmio->pclk);
 	clk_disable_unprepare(dwsmmio->clk);
+	reset_control_assert(dwsmmio->rstc);
 
 	return 0;
 }
 
 static const struct of_device_id dw_spi_mmio_of_match[] = {
-	{ .compatible = "snps,dw-apb-ssi", },
+	{ .compatible = "snps,dw-apb-ssi", .data = dw_spi_dw_apb_init},
 	{ .compatible = "mscc,ocelot-spi", .data = dw_spi_mscc_ocelot_init},
 	{ .compatible = "mscc,jaguar2-spi", .data = dw_spi_mscc_jaguar2_init},
 	{ .compatible = "amazon,alpine-dw-apb-ssi", .data = dw_spi_alpine_init},
-	{ .compatible = "renesas,rzn1-spi", },
+	{ .compatible = "renesas,rzn1-spi", .data = dw_spi_dw_apb_init},
+	{ .compatible = "snps,dwc-ssi-1.01a", .data = dw_spi_dwc_ssi_init},
+	{ .compatible = "intel,keembay-ssi", .data = dw_spi_keembay_init},
 	{ /* end of table */}
 };
 MODULE_DEVICE_TABLE(of, dw_spi_mmio_of_match);
 
+#ifdef CONFIG_ACPI
 static const struct acpi_device_id dw_spi_mmio_acpi_match[] = {
-	{"HISI0173", 0},
+	{"HISI0173", (kernel_ulong_t)dw_spi_dw_apb_init},
 	{},
 };
 MODULE_DEVICE_TABLE(acpi, dw_spi_mmio_acpi_match);
+#endif
 
 static struct platform_driver dw_spi_mmio_driver = {
 	.probe		= dw_spi_mmio_probe,
diff --git a/drivers/spi/spi-dw-pci.c b/drivers/spi/spi-dw-pci.c
index 12c131b5fb4e..2ea73809ca34 100644
--- a/drivers/spi/spi-dw-pci.c
+++ b/drivers/spi/spi-dw-pci.c
@@ -5,7 +5,6 @@
  * Copyright (c) 2009, 2014 Intel Corporation.
  */
 
-#include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
@@ -16,6 +15,15 @@
 
 #define DRIVER_NAME "dw_spi_pci"
 
+/* HW info for MRST Clk Control Unit, 32b reg per controller */
+#define MRST_SPI_CLK_BASE	100000000	/* 100m */
+#define MRST_CLK_SPI_REG	0xff11d86c
+#define CLK_SPI_BDIV_OFFSET	0
+#define CLK_SPI_BDIV_MASK	0x00000007
+#define CLK_SPI_CDIV_OFFSET	9
+#define CLK_SPI_CDIV_MASK	0x00000e00
+#define CLK_SPI_DISABLE_OFFSET	8
+
 struct spi_pci_desc {
 	int	(*setup)(struct dw_spi *);
 	u16	num_cs;
@@ -23,19 +31,55 @@ struct spi_pci_desc {
 	u32	max_freq;
 };
 
+static int spi_mid_init(struct dw_spi *dws)
+{
+	void __iomem *clk_reg;
+	u32 clk_cdiv;
+
+	clk_reg = ioremap(MRST_CLK_SPI_REG, 16);
+	if (!clk_reg)
+		return -ENOMEM;
+
+	/* Get SPI controller operating freq info */
+	clk_cdiv = readl(clk_reg + dws->bus_num * sizeof(u32));
+	clk_cdiv &= CLK_SPI_CDIV_MASK;
+	clk_cdiv >>= CLK_SPI_CDIV_OFFSET;
+	dws->max_freq = MRST_SPI_CLK_BASE / (clk_cdiv + 1);
+
+	iounmap(clk_reg);
+
+	/* Register hook to configure CTRLR0 */
+	dws->update_cr0 = dw_spi_update_cr0;
+
+	dw_spi_dma_setup_mfld(dws);
+
+	return 0;
+}
+
+static int spi_generic_init(struct dw_spi *dws)
+{
+	/* Register hook to configure CTRLR0 */
+	dws->update_cr0 = dw_spi_update_cr0;
+
+	dw_spi_dma_setup_generic(dws);
+
+	return 0;
+}
+
 static struct spi_pci_desc spi_pci_mid_desc_1 = {
-	.setup = dw_spi_mid_init,
+	.setup = spi_mid_init,
 	.num_cs = 5,
 	.bus_num = 0,
 };
 
 static struct spi_pci_desc spi_pci_mid_desc_2 = {
-	.setup = dw_spi_mid_init,
+	.setup = spi_mid_init,
 	.num_cs = 2,
 	.bus_num = 1,
 };
 
 static struct spi_pci_desc spi_pci_ehl_desc = {
+	.setup = spi_generic_init,
 	.num_cs = 2,
 	.bus_num = -1,
 	.max_freq = 100000000,
diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h
index 1bf5713e047d..151ba316619e 100644
--- a/drivers/spi/spi-dw.h
+++ b/drivers/spi/spi-dw.h
@@ -2,18 +2,21 @@
 #ifndef DW_SPI_HEADER_H
 #define DW_SPI_HEADER_H
 
+#include <linux/completion.h>
+#include <linux/debugfs.h>
+#include <linux/irqreturn.h>
 #include <linux/io.h>
 #include <linux/scatterlist.h>
 
 /* Register offsets */
-#define DW_SPI_CTRL0			0x00
-#define DW_SPI_CTRL1			0x04
+#define DW_SPI_CTRLR0			0x00
+#define DW_SPI_CTRLR1			0x04
 #define DW_SPI_SSIENR			0x08
 #define DW_SPI_MWCR			0x0c
 #define DW_SPI_SER			0x10
 #define DW_SPI_BAUDR			0x14
-#define DW_SPI_TXFLTR			0x18
-#define DW_SPI_RXFLTR			0x1c
+#define DW_SPI_TXFTLR			0x18
+#define DW_SPI_RXFTLR			0x1c
 #define DW_SPI_TXFLR			0x20
 #define DW_SPI_RXFLR			0x24
 #define DW_SPI_SR			0x28
@@ -57,6 +60,15 @@
 #define SPI_SRL_OFFSET			11
 #define SPI_CFS_OFFSET			12
 
+/* Bit fields in CTRLR0 based on DWC_ssi_databook.pdf v1.01a */
+#define DWC_SSI_CTRLR0_SRL_OFFSET	13
+#define DWC_SSI_CTRLR0_TMOD_OFFSET	10
+#define DWC_SSI_CTRLR0_TMOD_MASK	GENMASK(11, 10)
+#define DWC_SSI_CTRLR0_SCPOL_OFFSET	9
+#define DWC_SSI_CTRLR0_SCPH_OFFSET	8
+#define DWC_SSI_CTRLR0_FRF_OFFSET	6
+#define DWC_SSI_CTRLR0_DFS_OFFSET	0
+
 /* Bit fields in SR, 7 bits */
 #define SR_MASK				0x7f		/* cover 7 bits */
 #define SR_BUSY				(1 << 0)
@@ -90,7 +102,7 @@ enum dw_ssi_type {
 
 struct dw_spi;
 struct dw_spi_dma_ops {
-	int (*dma_init)(struct dw_spi *dws);
+	int (*dma_init)(struct device *dev, struct dw_spi *dws);
 	void (*dma_exit)(struct dw_spi *dws);
 	int (*dma_setup)(struct dw_spi *dws, struct spi_transfer *xfer);
 	bool (*can_dma)(struct spi_controller *master, struct spi_device *spi,
@@ -114,6 +126,8 @@ struct dw_spi {
 	u16			bus_num;
 	u16			num_cs;		/* supported slave numbers */
 	void (*set_cs)(struct spi_device *spi, bool enable);
+	u32 (*update_cr0)(struct spi_controller *master, struct spi_device *spi,
+			  struct spi_transfer *transfer);
 
 	/* Current message transfer state info */
 	size_t			len;
@@ -124,24 +138,22 @@ struct dw_spi {
 	void			*rx_end;
 	int			dma_mapped;
 	u8			n_bytes;	/* current is a 1/2 bytes op */
-	u32			dma_width;
 	irqreturn_t		(*transfer_handler)(struct dw_spi *dws);
 	u32			current_freq;	/* frequency in hz */
 
 	/* DMA info */
-	int			dma_inited;
 	struct dma_chan		*txchan;
+	u32			txburst;
 	struct dma_chan		*rxchan;
+	u32			rxburst;
 	unsigned long		dma_chan_busy;
 	dma_addr_t		dma_addr; /* phy address of the Data register */
 	const struct dw_spi_dma_ops *dma_ops;
-	void			*dma_tx;
-	void			*dma_rx;
+	struct completion	dma_completion;
 
-	/* Bus interface info */
-	void			*priv;
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs;
+	struct debugfs_regset32 regset;
 #endif
 };
 
@@ -235,24 +247,28 @@ static inline void spi_shutdown_chip(struct dw_spi *dws)
 	spi_set_clk(dws, 0);
 }
 
-/*
- * Each SPI slave device to work with dw_api controller should
- * has such a structure claiming its working mode (poll or PIO/DMA),
- * which can be save in the "controller_data" member of the
- * struct spi_device.
- */
-struct dw_spi_chip {
-	u8 poll_mode;	/* 1 for controller polling mode */
-	u8 type;	/* SPI/SSP/MicroWire */
-	void (*cs_control)(u32 command);
-};
-
 extern void dw_spi_set_cs(struct spi_device *spi, bool enable);
 extern int dw_spi_add_host(struct device *dev, struct dw_spi *dws);
 extern void dw_spi_remove_host(struct dw_spi *dws);
 extern int dw_spi_suspend_host(struct dw_spi *dws);
 extern int dw_spi_resume_host(struct dw_spi *dws);
+extern u32 dw_spi_update_cr0(struct spi_controller *master,
+			     struct spi_device *spi,
+			     struct spi_transfer *transfer);
+extern u32 dw_spi_update_cr0_v1_01a(struct spi_controller *master,
+				    struct spi_device *spi,
+				    struct spi_transfer *transfer);
+
+#ifdef CONFIG_SPI_DW_DMA
+
+extern void dw_spi_dma_setup_mfld(struct dw_spi *dws);
+extern void dw_spi_dma_setup_generic(struct dw_spi *dws);
+
+#else
+
+static inline void dw_spi_dma_setup_mfld(struct dw_spi *dws) {}
+static inline void dw_spi_dma_setup_generic(struct dw_spi *dws) {}
+
+#endif /* !CONFIG_SPI_DW_DMA */
 
-/* platform related setup */
-extern int dw_spi_mid_init(struct dw_spi *dws); /* Intel MID platforms */
 #endif /* DW_SPI_HEADER_H */
diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c
index 4e1ccd4e52b6..8c854b187b1d 100644
--- a/drivers/spi/spi-ep93xx.c
+++ b/drivers/spi/spi-ep93xx.c
@@ -31,7 +31,8 @@
 #include <linux/platform_data/spi-ep93xx.h>
 
 #define SSPCR0			0x0000
-#define SSPCR0_MODE_SHIFT	6
+#define SSPCR0_SPO		BIT(6)
+#define SSPCR0_SPH		BIT(7)
 #define SSPCR0_SCR_SHIFT	8
 
 #define SSPCR1			0x0004
@@ -159,7 +160,10 @@ static int ep93xx_spi_chip_setup(struct spi_master *master,
 		return err;
 
 	cr0 = div_scr << SSPCR0_SCR_SHIFT;
-	cr0 |= (spi->mode & (SPI_CPHA | SPI_CPOL)) << SSPCR0_MODE_SHIFT;
+	if (spi->mode & SPI_CPOL)
+		cr0 |= SSPCR0_SPO;
+	if (spi->mode & SPI_CPHA)
+		cr0 |= SSPCR0_SPH;
 	cr0 |= dss;
 
 	dev_dbg(&master->dev, "setup: mode %d, cpsr %d, scr %d, dss %d\n",
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 50e41f66a2d7..a35faced0456 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0+
 //
 // Copyright 2013 Freescale Semiconductor, Inc.
+// Copyright 2020 NXP
 //
 // Freescale DSPI driver
 // This file contains a driver for the Freescale DSPI
@@ -26,6 +27,9 @@
 #define SPI_MCR_CLR_TXF			BIT(11)
 #define SPI_MCR_CLR_RXF			BIT(10)
 #define SPI_MCR_XSPI			BIT(3)
+#define SPI_MCR_DIS_TXF			BIT(13)
+#define SPI_MCR_DIS_RXF			BIT(12)
+#define SPI_MCR_HALT			BIT(0)
 
 #define SPI_TCR				0x08
 #define SPI_TCR_GET_TCNT(x)		(((x) & GENMASK(31, 16)) >> 16)
@@ -246,13 +250,33 @@ struct fsl_dspi {
 
 static void dspi_native_host_to_dev(struct fsl_dspi *dspi, u32 *txdata)
 {
-	memcpy(txdata, dspi->tx, dspi->oper_word_size);
+	switch (dspi->oper_word_size) {
+	case 1:
+		*txdata = *(u8 *)dspi->tx;
+		break;
+	case 2:
+		*txdata = *(u16 *)dspi->tx;
+		break;
+	case 4:
+		*txdata = *(u32 *)dspi->tx;
+		break;
+	}
 	dspi->tx += dspi->oper_word_size;
 }
 
 static void dspi_native_dev_to_host(struct fsl_dspi *dspi, u32 rxdata)
 {
-	memcpy(dspi->rx, &rxdata, dspi->oper_word_size);
+	switch (dspi->oper_word_size) {
+	case 1:
+		*(u8 *)dspi->rx = rxdata;
+		break;
+	case 2:
+		*(u16 *)dspi->rx = rxdata;
+		break;
+	case 4:
+		*(u32 *)dspi->rx = rxdata;
+		break;
+	}
 	dspi->rx += dspi->oper_word_size;
 }
 
@@ -1417,6 +1441,24 @@ static int dspi_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static void dspi_shutdown(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr = platform_get_drvdata(pdev);
+	struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
+
+	/* Disable RX and TX */
+	regmap_update_bits(dspi->regmap, SPI_MCR,
+			   SPI_MCR_DIS_TXF | SPI_MCR_DIS_RXF,
+			   SPI_MCR_DIS_TXF | SPI_MCR_DIS_RXF);
+
+	/* Stop Running */
+	regmap_update_bits(dspi->regmap, SPI_MCR, SPI_MCR_HALT, SPI_MCR_HALT);
+
+	dspi_release_dma(dspi);
+	clk_disable_unprepare(dspi->clk);
+	spi_unregister_controller(dspi->ctlr);
+}
+
 static struct platform_driver fsl_dspi_driver = {
 	.driver.name		= DRIVER_NAME,
 	.driver.of_match_table	= fsl_dspi_dt_ids,
@@ -1424,6 +1466,7 @@ static struct platform_driver fsl_dspi_driver = {
 	.driver.pm		= &dspi_pm,
 	.probe			= dspi_probe,
 	.remove			= dspi_remove,
+	.shutdown		= dspi_shutdown,
 };
 module_platform_driver(fsl_dspi_driver);
 
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index 8b41b70f6f5c..1552b28b9515 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -186,14 +186,13 @@ static bool fsl_lpspi_can_dma(struct spi_controller *controller,
 
 	bytes_per_word = fsl_lpspi_bytes_per_word(transfer->bits_per_word);
 
-	switch (bytes_per_word)
-	{
-		case 1:
-		case 2:
-		case 4:
-			break;
-		default:
-			return false;
+	switch (bytes_per_word) {
+	case 1:
+	case 2:
+	case 4:
+		break;
+	default:
+		return false;
 	}
 
 	return true;
@@ -941,7 +940,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	ret = pm_runtime_get_sync(fsl_lpspi->dev);
 	if (ret < 0) {
 		dev_err(fsl_lpspi->dev, "failed to enable clock\n");
-		goto out_controller_put;
+		goto out_pm_get;
 	}
 
 	temp = readl(fsl_lpspi->base + IMX7ULP_PARAM);
@@ -950,13 +949,15 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 
 	ret = fsl_lpspi_dma_init(&pdev->dev, fsl_lpspi, controller);
 	if (ret == -EPROBE_DEFER)
-		goto out_controller_put;
+		goto out_pm_get;
 
 	if (ret < 0)
 		dev_err(&pdev->dev, "dma setup error %d, use pio\n", ret);
 
 	return 0;
 
+out_pm_get:
+	pm_runtime_put_noidle(fsl_lpspi->dev);
 out_controller_put:
 	spi_controller_put(controller);
 
diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c
index 02e5cba0a5bb..6766262d7e75 100644
--- a/drivers/spi/spi-fsl-qspi.c
+++ b/drivers/spi/spi-fsl-qspi.c
@@ -876,14 +876,15 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					"QuadSPI-memory");
-	q->ahb_addr = devm_ioremap_resource(dev, res);
-	if (IS_ERR(q->ahb_addr)) {
-		ret = PTR_ERR(q->ahb_addr);
+	q->memmap_phy = res->start;
+	/* Since there are 4 cs, map size required is 4 times ahb_buf_size */
+	q->ahb_addr = devm_ioremap(dev, q->memmap_phy,
+				   (q->devtype_data->ahb_buf_size * 4));
+	if (!q->ahb_addr) {
+		ret = -ENOMEM;
 		goto err_put_ctrl;
 	}
 
-	q->memmap_phy = res->start;
-
 	/* find the clocks */
 	q->clk_en = devm_clk_get(dev, "qspi_en");
 	if (IS_ERR(q->clk_en)) {
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 3b81772fea0d..67f022b8c81d 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -588,7 +588,7 @@ static void fsl_spi_grlib_probe(struct device *dev)
 	pdata->cs_control = fsl_spi_grlib_cs_control;
 }
 
-static struct spi_master * fsl_spi_probe(struct device *dev,
+static struct spi_master *fsl_spi_probe(struct device *dev,
 		struct resource *mem, unsigned int irq)
 {
 	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c
index e3b57252d075..64a18d08a4d9 100644
--- a/drivers/spi/spi-hisi-sfc-v3xx.c
+++ b/drivers/spi/spi-hisi-sfc-v3xx.c
@@ -17,6 +17,11 @@
 
 #define HISI_SFC_V3XX_VERSION (0x1f8)
 
+#define HISI_SFC_V3XX_INT_STAT (0x120)
+#define HISI_SFC_V3XX_INT_STAT_PP_ERR BIT(2)
+#define HISI_SFC_V3XX_INT_STAT_ADDR_IACCES BIT(5)
+#define HISI_SFC_V3XX_INT_CLR (0x12c)
+#define HISI_SFC_V3XX_INT_CLR_CLEAR (0xff)
 #define HISI_SFC_V3XX_CMD_CFG (0x300)
 #define HISI_SFC_V3XX_CMD_CFG_DUAL_IN_DUAL_OUT (1 << 17)
 #define HISI_SFC_V3XX_CMD_CFG_DUAL_IO (2 << 17)
@@ -163,7 +168,7 @@ static int hisi_sfc_v3xx_generic_exec_op(struct hisi_sfc_v3xx_host *host,
 					 u8 chip_select)
 {
 	int ret, len = op->data.nbytes;
-	u32 config = 0;
+	u32 int_stat, config = 0;
 
 	if (op->addr.nbytes)
 		config |= HISI_SFC_V3XX_CMD_CFG_ADDR_EN_MSK;
@@ -228,6 +233,25 @@ static int hisi_sfc_v3xx_generic_exec_op(struct hisi_sfc_v3xx_host *host,
 	if (ret)
 		return ret;
 
+	/*
+	 * The interrupt status register indicates whether an error occurs
+	 * after per operation. Check it, and clear the interrupts for
+	 * next time judgement.
+	 */
+	int_stat = readl(host->regbase + HISI_SFC_V3XX_INT_STAT);
+	writel(HISI_SFC_V3XX_INT_CLR_CLEAR,
+	       host->regbase + HISI_SFC_V3XX_INT_CLR);
+
+	if (int_stat & HISI_SFC_V3XX_INT_STAT_ADDR_IACCES) {
+		dev_err(host->dev, "fail to access protected address\n");
+		return -EIO;
+	}
+
+	if (int_stat & HISI_SFC_V3XX_INT_STAT_PP_ERR) {
+		dev_err(host->dev, "page program operation failed\n");
+		return -EIO;
+	}
+
 	if (op->data.dir == SPI_MEM_DATA_IN)
 		hisi_sfc_v3xx_read_databuf(host, op->data.buf.in, len);
 
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index f4f28a400a96..b7a85e3fe1c1 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -71,6 +71,7 @@ struct spi_imx_devtype_data {
 	void (*reset)(struct spi_imx_data *);
 	void (*setup_wml)(struct spi_imx_data *);
 	void (*disable)(struct spi_imx_data *);
+	void (*disable_dma)(struct spi_imx_data *);
 	bool has_dmamode;
 	bool has_slavemode;
 	unsigned int fifo_size;
@@ -485,6 +486,11 @@ static void mx51_ecspi_trigger(struct spi_imx_data *spi_imx)
 	writel(reg, spi_imx->base + MX51_ECSPI_CTRL);
 }
 
+static void mx51_disable_dma(struct spi_imx_data *spi_imx)
+{
+	writel(0, spi_imx->base + MX51_ECSPI_DMA);
+}
+
 static void mx51_ecspi_disable(struct spi_imx_data *spi_imx)
 {
 	u32 ctrl;
@@ -987,6 +993,7 @@ static struct spi_imx_devtype_data imx51_ecspi_devtype_data = {
 	.rx_available = mx51_ecspi_rx_available,
 	.reset = mx51_ecspi_reset,
 	.setup_wml = mx51_setup_wml,
+	.disable_dma = mx51_disable_dma,
 	.fifo_size = 64,
 	.has_dmamode = true,
 	.dynamic_burst = true,
@@ -1001,6 +1008,7 @@ static struct spi_imx_devtype_data imx53_ecspi_devtype_data = {
 	.prepare_transfer = mx51_ecspi_prepare_transfer,
 	.trigger = mx51_ecspi_trigger,
 	.rx_available = mx51_ecspi_rx_available,
+	.disable_dma = mx51_disable_dma,
 	.reset = mx51_ecspi_reset,
 	.fifo_size = 64,
 	.has_dmamode = true,
@@ -1385,6 +1393,7 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc_tx) {
 		dmaengine_terminate_all(master->dma_tx);
+		dmaengine_terminate_all(master->dma_rx);
 		return -EINVAL;
 	}
 
@@ -1498,6 +1507,7 @@ static int spi_imx_transfer(struct spi_device *spi,
 				struct spi_transfer *transfer)
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+	int ret;
 
 	/* flush rxfifo before transfer */
 	while (spi_imx->devtype_data->rx_available(spi_imx))
@@ -1506,10 +1516,23 @@ static int spi_imx_transfer(struct spi_device *spi,
 	if (spi_imx->slave_mode)
 		return spi_imx_pio_transfer_slave(spi, transfer);
 
-	if (spi_imx->usedma)
-		return spi_imx_dma_transfer(spi_imx, transfer);
-	else
-		return spi_imx_pio_transfer(spi, transfer);
+	/*
+	 * fallback PIO mode if dma setup error happen, for example sdma
+	 * firmware may not be updated as ERR009165 required.
+	 */
+	if (spi_imx->usedma) {
+		ret = spi_imx_dma_transfer(spi_imx, transfer);
+		if (ret != -EINVAL)
+			return ret;
+
+		spi_imx->devtype_data->disable_dma(spi_imx);
+
+		spi_imx->usedma = false;
+		spi_imx->dynamic_burst = spi_imx->devtype_data->dynamic_burst;
+		dev_dbg(&spi->dev, "Fallback to PIO mode\n");
+	}
+
+	return spi_imx_pio_transfer(spi, transfer);
 }
 
 static int spi_imx_setup(struct spi_device *spi)
diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index adaa0c49f966..9a86cc27fcc0 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -108,15 +108,17 @@ static int spi_check_buswidth_req(struct spi_mem *mem, u8 buswidth, bool tx)
 		return 0;
 
 	case 2:
-		if ((tx && (mode & (SPI_TX_DUAL | SPI_TX_QUAD))) ||
-		    (!tx && (mode & (SPI_RX_DUAL | SPI_RX_QUAD))))
+		if ((tx &&
+		     (mode & (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL))) ||
+		    (!tx &&
+		     (mode & (SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL))))
 			return 0;
 
 		break;
 
 	case 4:
-		if ((tx && (mode & SPI_TX_QUAD)) ||
-		    (!tx && (mode & SPI_RX_QUAD)))
+		if ((tx && (mode & (SPI_TX_QUAD | SPI_TX_OCTAL))) ||
+		    (!tx && (mode & (SPI_RX_QUAD | SPI_RX_OCTAL))))
 			return 0;
 
 		break;
diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index c15a9910549f..7bc302b50396 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -391,7 +391,7 @@ static int mtk_nor_pp_unbuffered(struct mtk_nor *sp,
 	return mtk_nor_cmd_exec(sp, MTK_NOR_CMD_WRITE, 6 * BITS_PER_BYTE);
 }
 
-int mtk_nor_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+static int mtk_nor_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
 	struct mtk_nor *sp = spi_controller_get_devdata(mem->spi->master);
 	int ret;
diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c
index 4f94c9127fc1..cc9ef371db14 100644
--- a/drivers/spi/spi-mux.c
+++ b/drivers/spi/spi-mux.c
@@ -51,6 +51,10 @@ static int spi_mux_select(struct spi_device *spi)
 	struct spi_mux_priv *priv = spi_controller_get_devdata(spi->controller);
 	int ret;
 
+	ret = mux_control_select(priv->mux, spi->chip_select);
+	if (ret)
+		return ret;
+
 	if (priv->current_cs == spi->chip_select)
 		return 0;
 
@@ -62,10 +66,6 @@ static int spi_mux_select(struct spi_device *spi)
 	priv->spi->mode = spi->mode;
 	priv->spi->bits_per_word = spi->bits_per_word;
 
-	ret = mux_control_select(priv->mux, spi->chip_select);
-	if (ret)
-		return ret;
-
 	priv->current_cs = spi->chip_select;
 
 	return 0;
diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index 1f59beb7d27e..43f73db22f21 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -17,10 +17,8 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/clk.h>
 #include <linux/sizes.h>
-#include <linux/gpio.h>
 #include <asm/unaligned.h>
 
 #define DRIVER_NAME			"orion_spi"
@@ -98,7 +96,6 @@ struct orion_spi {
 	struct clk              *clk;
 	struct clk              *axi_clk;
 	const struct orion_spi_dev *devdata;
-	int			unused_hw_gpio;
 
 	struct orion_child_options	child[ORION_NUM_CHIPSELECTS];
 };
@@ -325,20 +322,27 @@ orion_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
 static void orion_spi_set_cs(struct spi_device *spi, bool enable)
 {
 	struct orion_spi *orion_spi;
-	int cs;
 
 	orion_spi = spi_master_get_devdata(spi->master);
 
-	if (gpio_is_valid(spi->cs_gpio))
-		cs = orion_spi->unused_hw_gpio;
-	else
-		cs = spi->chip_select;
-
+	/*
+	 * If this line is using a GPIO to control chip select, this internal
+	 * .set_cs() function will still be called, so we clear any previous
+	 * chip select. The CS we activate will not have any elecrical effect,
+	 * as it is handled by a GPIO, but that doesn't matter. What we need
+	 * is to deassert the old chip select and assert some other chip select.
+	 */
 	orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, ORION_SPI_CS_MASK);
 	orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG,
-				ORION_SPI_CS(cs));
+			  ORION_SPI_CS(spi->chip_select));
 
-	/* Chip select logic is inverted from spi_set_cs */
+	/*
+	 * Chip select logic is inverted from spi_set_cs(). For lines using a
+	 * GPIO to do chip select SPI_CS_HIGH is enforced and inversion happens
+	 * in the GPIO library, but we don't care about that, because in those
+	 * cases we are dealing with an unused native CS anyways so the polarity
+	 * doesn't matter.
+	 */
 	if (!enable)
 		orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
 	else
@@ -503,9 +507,6 @@ static int orion_spi_transfer_one(struct spi_master *master,
 
 static int orion_spi_setup(struct spi_device *spi)
 {
-	if (gpio_is_valid(spi->cs_gpio)) {
-		gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
-	}
 	return orion_spi_setup_transfer(spi, NULL);
 }
 
@@ -622,13 +623,13 @@ static int orion_spi_probe(struct platform_device *pdev)
 	master->setup = orion_spi_setup;
 	master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
 	master->auto_runtime_pm = true;
+	master->use_gpio_descriptors = true;
 	master->flags = SPI_MASTER_GPIO_SS;
 
 	platform_set_drvdata(pdev, master);
 
 	spi = spi_master_get_devdata(master);
 	spi->master = master;
-	spi->unused_hw_gpio = -1;
 
 	of_id = of_match_device(orion_spi_of_match_table, &pdev->dev);
 	devdata = (of_id) ? of_id->data : &orion_spi_dev_data;
@@ -683,7 +684,6 @@ static int orion_spi_probe(struct platform_device *pdev)
 	for_each_available_child_of_node(pdev->dev.of_node, np) {
 		struct orion_direct_acc *dir_acc;
 		u32 cs;
-		int cs_gpio;
 
 		/* Get chip-select number from the "reg" property */
 		status = of_property_read_u32(np, "reg", &cs);
@@ -695,44 +695,6 @@ static int orion_spi_probe(struct platform_device *pdev)
 		}
 
 		/*
-		 * Initialize the CS GPIO:
-		 * - properly request the actual GPIO signal
-		 * - de-assert the logical signal so that all GPIO CS lines
-		 *   are inactive when probing for slaves
-		 * - find an unused physical CS which will be driven for any
-		 *   slave which uses a CS GPIO
-		 */
-		cs_gpio = of_get_named_gpio(pdev->dev.of_node, "cs-gpios", cs);
-		if (cs_gpio > 0) {
-			char *gpio_name;
-			int cs_flags;
-
-			if (spi->unused_hw_gpio == -1) {
-				dev_info(&pdev->dev,
-					"Selected unused HW CS#%d for any GPIO CSes\n",
-					cs);
-				spi->unused_hw_gpio = cs;
-			}
-
-			gpio_name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
-					"%s-CS%d", dev_name(&pdev->dev), cs);
-			if (!gpio_name) {
-				status = -ENOMEM;
-				goto out_rel_axi_clk;
-			}
-
-			cs_flags = of_property_read_bool(np, "spi-cs-high") ?
-				GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH;
-			status = devm_gpio_request_one(&pdev->dev, cs_gpio,
-					cs_flags, gpio_name);
-			if (status) {
-				dev_err(&pdev->dev,
-					"Can't request GPIO for CS %d\n", cs);
-				goto out_rel_axi_clk;
-			}
-		}
-
-		/*
 		 * Check if an address is configured for this SPI device. If
 		 * not, the MBus mapping via the 'ranges' property in the 'soc'
 		 * node is not configured and this device should not use the
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 73d2a65d0b6e..6721910e5f2a 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -150,6 +150,7 @@ static const struct lpss_config lpss_platforms[] = {
 		.tx_threshold_hi = 48,
 		.cs_sel_shift = 8,
 		.cs_sel_mask = 3 << 8,
+		.cs_clk_stays_gated = true,
 	},
 	{	/* LPSS_CNL_SSP */
 		.offset = 0x200,
@@ -1884,7 +1885,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 
 	/* Register with the SPI framework */
 	platform_set_drvdata(pdev, drv_data);
-	status = devm_spi_register_controller(&pdev->dev, controller);
+	status = spi_register_controller(controller);
 	if (status != 0) {
 		dev_err(&pdev->dev, "problem registering spi controller\n");
 		goto out_error_pm_runtime_enabled;
@@ -1893,7 +1894,6 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	return status;
 
 out_error_pm_runtime_enabled:
-	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
 out_error_clock_enabled:
@@ -1916,6 +1916,8 @@ static int pxa2xx_spi_remove(struct platform_device *pdev)
 
 	pm_runtime_get_sync(&pdev->dev);
 
+	spi_unregister_controller(drv_data->controller);
+
 	/* Disable the SSP at the peripheral and SOC level */
 	pxa2xx_spi_write(drv_data, SSCR0, 0);
 	clk_disable_unprepare(ssp->clk);
diff --git a/drivers/spi/spi-rb4xx.c b/drivers/spi/spi-rb4xx.c
index 4c9620e0d18c..8aa51beb4ff3 100644
--- a/drivers/spi/spi-rb4xx.c
+++ b/drivers/spi/spi-rb4xx.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/spi/spi.h>
+#include <linux/of.h>
 
 #include <asm/mach-ath79/ar71xx_regs.h>
 
@@ -150,6 +151,7 @@ static int rb4xx_spi_probe(struct platform_device *pdev)
 	if (IS_ERR(ahb_clk))
 		return PTR_ERR(ahb_clk);
 
+	master->dev.of_node = pdev->dev.of_node;
 	master->bus_num = 0;
 	master->num_chipselect = 3;
 	master->mode_bits = SPI_TX_DUAL;
@@ -158,6 +160,11 @@ static int rb4xx_spi_probe(struct platform_device *pdev)
 	master->transfer_one = rb4xx_transfer_one;
 	master->set_cs = rb4xx_set_cs;
 
+	rbspi = spi_master_get_devdata(master);
+	rbspi->base = spi_base;
+	rbspi->clk = ahb_clk;
+	platform_set_drvdata(pdev, rbspi);
+
 	err = devm_spi_register_master(&pdev->dev, master);
 	if (err) {
 		dev_err(&pdev->dev, "failed to register SPI master\n");
@@ -168,11 +175,6 @@ static int rb4xx_spi_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	rbspi = spi_master_get_devdata(master);
-	rbspi->base = spi_base;
-	rbspi->clk = ahb_clk;
-	platform_set_drvdata(pdev, rbspi);
-
 	/* Enable SPI */
 	rb4xx_write(rbspi, AR71XX_SPI_REG_FS, AR71XX_SPI_FS_GPIO);
 
@@ -188,11 +190,18 @@ static int rb4xx_spi_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id rb4xx_spi_dt_match[] = {
+	{ .compatible = "mikrotik,rb4xx-spi" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, rb4xx_spi_dt_match);
+
 static struct platform_driver rb4xx_spi_drv = {
 	.probe = rb4xx_spi_probe,
 	.remove = rb4xx_spi_remove,
 	.driver = {
 		.name = "rb4xx-spi",
+		.of_match_table = of_match_ptr(rb4xx_spi_dt_match),
 	},
 };
 
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 70ef63e0b6b8..9b8a5e1233c0 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -183,6 +183,8 @@ struct rockchip_spi {
 	u8 rsd;
 
 	bool cs_asserted[ROCKCHIP_SPI_MAX_CS_NUM];
+
+	bool slave_abort;
 };
 
 static inline void spi_enable_chip(struct rockchip_spi *rs, bool enable)
@@ -219,8 +221,8 @@ static u32 get_fifo_len(struct rockchip_spi *rs)
 
 static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 {
-	struct spi_master *master = spi->master;
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = spi->controller;
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	bool cs_asserted = !enable;
 
 	/* Return immediately for no-op */
@@ -244,10 +246,10 @@ static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 	rs->cs_asserted[spi->chip_select] = cs_asserted;
 }
 
-static void rockchip_spi_handle_err(struct spi_master *master,
+static void rockchip_spi_handle_err(struct spi_controller *ctlr,
 				    struct spi_message *msg)
 {
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	/* stop running spi transfer
 	 * this also flushes both rx and tx fifos
@@ -258,10 +260,10 @@ static void rockchip_spi_handle_err(struct spi_master *master,
 	writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
 
 	if (atomic_read(&rs->state) & TXDMA)
-		dmaengine_terminate_async(master->dma_tx);
+		dmaengine_terminate_async(ctlr->dma_tx);
 
 	if (atomic_read(&rs->state) & RXDMA)
-		dmaengine_terminate_async(master->dma_rx);
+		dmaengine_terminate_async(ctlr->dma_rx);
 }
 
 static void rockchip_spi_pio_writer(struct rockchip_spi *rs)
@@ -319,8 +321,8 @@ static void rockchip_spi_pio_reader(struct rockchip_spi *rs)
 
 static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
 {
-	struct spi_master *master = dev_id;
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_id;
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	if (rs->tx_left)
 		rockchip_spi_pio_writer(rs);
@@ -329,7 +331,7 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
 	if (!rs->rx_left) {
 		spi_enable_chip(rs, false);
 		writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
-		spi_finalize_current_transfer(master);
+		spi_finalize_current_transfer(ctlr);
 	}
 
 	return IRQ_HANDLED;
@@ -355,35 +357,35 @@ static int rockchip_spi_prepare_irq(struct rockchip_spi *rs,
 
 static void rockchip_spi_dma_rxcb(void *data)
 {
-	struct spi_master *master = data;
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = data;
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	int state = atomic_fetch_andnot(RXDMA, &rs->state);
 
-	if (state & TXDMA)
+	if (state & TXDMA && !rs->slave_abort)
 		return;
 
 	spi_enable_chip(rs, false);
-	spi_finalize_current_transfer(master);
+	spi_finalize_current_transfer(ctlr);
 }
 
 static void rockchip_spi_dma_txcb(void *data)
 {
-	struct spi_master *master = data;
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = data;
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	int state = atomic_fetch_andnot(TXDMA, &rs->state);
 
-	if (state & RXDMA)
+	if (state & RXDMA && !rs->slave_abort)
 		return;
 
 	/* Wait until the FIFO data completely. */
 	wait_for_idle(rs);
 
 	spi_enable_chip(rs, false);
-	spi_finalize_current_transfer(master);
+	spi_finalize_current_transfer(ctlr);
 }
 
 static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
-		struct spi_master *master, struct spi_transfer *xfer)
+		struct spi_controller *ctlr, struct spi_transfer *xfer)
 {
 	struct dma_async_tx_descriptor *rxdesc, *txdesc;
 
@@ -398,17 +400,17 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
 			.src_maxburst = 1,
 		};
 
-		dmaengine_slave_config(master->dma_rx, &rxconf);
+		dmaengine_slave_config(ctlr->dma_rx, &rxconf);
 
 		rxdesc = dmaengine_prep_slave_sg(
-				master->dma_rx,
+				ctlr->dma_rx,
 				xfer->rx_sg.sgl, xfer->rx_sg.nents,
 				DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
 		if (!rxdesc)
 			return -EINVAL;
 
 		rxdesc->callback = rockchip_spi_dma_rxcb;
-		rxdesc->callback_param = master;
+		rxdesc->callback_param = ctlr;
 	}
 
 	txdesc = NULL;
@@ -420,27 +422,27 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
 			.dst_maxburst = rs->fifo_len / 4,
 		};
 
-		dmaengine_slave_config(master->dma_tx, &txconf);
+		dmaengine_slave_config(ctlr->dma_tx, &txconf);
 
 		txdesc = dmaengine_prep_slave_sg(
-				master->dma_tx,
+				ctlr->dma_tx,
 				xfer->tx_sg.sgl, xfer->tx_sg.nents,
 				DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
 		if (!txdesc) {
 			if (rxdesc)
-				dmaengine_terminate_sync(master->dma_rx);
+				dmaengine_terminate_sync(ctlr->dma_rx);
 			return -EINVAL;
 		}
 
 		txdesc->callback = rockchip_spi_dma_txcb;
-		txdesc->callback_param = master;
+		txdesc->callback_param = ctlr;
 	}
 
 	/* rx must be started before tx due to spi instinct */
 	if (rxdesc) {
 		atomic_or(RXDMA, &rs->state);
 		dmaengine_submit(rxdesc);
-		dma_async_issue_pending(master->dma_rx);
+		dma_async_issue_pending(ctlr->dma_rx);
 	}
 
 	spi_enable_chip(rs, true);
@@ -448,7 +450,7 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
 	if (txdesc) {
 		atomic_or(TXDMA, &rs->state);
 		dmaengine_submit(txdesc);
-		dma_async_issue_pending(master->dma_tx);
+		dma_async_issue_pending(ctlr->dma_tx);
 	}
 
 	/* 1 means the transfer is in progress */
@@ -457,7 +459,7 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
 
 static void rockchip_spi_config(struct rockchip_spi *rs,
 		struct spi_device *spi, struct spi_transfer *xfer,
-		bool use_dma)
+		bool use_dma, bool slave_mode)
 {
 	u32 cr0 = CR0_FRF_SPI  << CR0_FRF_OFFSET
 	        | CR0_BHT_8BIT << CR0_BHT_OFFSET
@@ -466,6 +468,10 @@ static void rockchip_spi_config(struct rockchip_spi *rs,
 	u32 cr1;
 	u32 dmacr = 0;
 
+	if (slave_mode)
+		cr0 |= CR0_OPM_SLAVE << CR0_OPM_OFFSET;
+	rs->slave_abort = false;
+
 	cr0 |= rs->rsd << CR0_RSD_OFFSET;
 	cr0 |= (spi->mode & 0x3U) << CR0_SCPH_OFFSET;
 	if (spi->mode & SPI_LSB_FIRST)
@@ -493,7 +499,7 @@ static void rockchip_spi_config(struct rockchip_spi *rs,
 		break;
 	default:
 		/* we only whitelist 4, 8 and 16 bit words in
-		 * master->bits_per_word_mask, so this shouldn't
+		 * ctlr->bits_per_word_mask, so this shouldn't
 		 * happen
 		 */
 		unreachable();
@@ -535,12 +541,22 @@ static size_t rockchip_spi_max_transfer_size(struct spi_device *spi)
 	return ROCKCHIP_SPI_MAX_TRANLEN;
 }
 
+static int rockchip_spi_slave_abort(struct spi_controller *ctlr)
+{
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
+
+	rs->slave_abort = true;
+	complete(&ctlr->xfer_completion);
+
+	return 0;
+}
+
 static int rockchip_spi_transfer_one(
-		struct spi_master *master,
+		struct spi_controller *ctlr,
 		struct spi_device *spi,
 		struct spi_transfer *xfer)
 {
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	bool use_dma;
 
 	WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
@@ -558,21 +574,21 @@ static int rockchip_spi_transfer_one(
 
 	rs->n_bytes = xfer->bits_per_word <= 8 ? 1 : 2;
 
-	use_dma = master->can_dma ? master->can_dma(master, spi, xfer) : false;
+	use_dma = ctlr->can_dma ? ctlr->can_dma(ctlr, spi, xfer) : false;
 
-	rockchip_spi_config(rs, spi, xfer, use_dma);
+	rockchip_spi_config(rs, spi, xfer, use_dma, ctlr->slave);
 
 	if (use_dma)
-		return rockchip_spi_prepare_dma(rs, master, xfer);
+		return rockchip_spi_prepare_dma(rs, ctlr, xfer);
 
 	return rockchip_spi_prepare_irq(rs, xfer);
 }
 
-static bool rockchip_spi_can_dma(struct spi_master *master,
+static bool rockchip_spi_can_dma(struct spi_controller *ctlr,
 				 struct spi_device *spi,
 				 struct spi_transfer *xfer)
 {
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	unsigned int bytes_per_word = xfer->bits_per_word <= 8 ? 1 : 2;
 
 	/* if the numbor of spi words to transfer is less than the fifo
@@ -586,44 +602,55 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 {
 	int ret;
 	struct rockchip_spi *rs;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct resource *mem;
+	struct device_node *np = pdev->dev.of_node;
 	u32 rsd_nsecs;
+	bool slave_mode;
+
+	slave_mode = of_property_read_bool(np, "spi-slave");
+
+	if (slave_mode)
+		ctlr = spi_alloc_slave(&pdev->dev,
+				sizeof(struct rockchip_spi));
+	else
+		ctlr = spi_alloc_master(&pdev->dev,
+				sizeof(struct rockchip_spi));
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct rockchip_spi));
-	if (!master)
+	if (!ctlr)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, ctlr);
 
-	rs = spi_master_get_devdata(master);
+	rs = spi_controller_get_devdata(ctlr);
+	ctlr->slave = slave_mode;
 
 	/* Get basic io resource and map it */
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	rs->regs = devm_ioremap_resource(&pdev->dev, mem);
 	if (IS_ERR(rs->regs)) {
 		ret =  PTR_ERR(rs->regs);
-		goto err_put_master;
+		goto err_put_ctlr;
 	}
 
 	rs->apb_pclk = devm_clk_get(&pdev->dev, "apb_pclk");
 	if (IS_ERR(rs->apb_pclk)) {
 		dev_err(&pdev->dev, "Failed to get apb_pclk\n");
 		ret = PTR_ERR(rs->apb_pclk);
-		goto err_put_master;
+		goto err_put_ctlr;
 	}
 
 	rs->spiclk = devm_clk_get(&pdev->dev, "spiclk");
 	if (IS_ERR(rs->spiclk)) {
 		dev_err(&pdev->dev, "Failed to get spi_pclk\n");
 		ret = PTR_ERR(rs->spiclk);
-		goto err_put_master;
+		goto err_put_ctlr;
 	}
 
 	ret = clk_prepare_enable(rs->apb_pclk);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to enable apb_pclk\n");
-		goto err_put_master;
+		goto err_put_ctlr;
 	}
 
 	ret = clk_prepare_enable(rs->spiclk);
@@ -639,7 +666,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 		goto err_disable_spiclk;
 
 	ret = devm_request_threaded_irq(&pdev->dev, ret, rockchip_spi_isr, NULL,
-			IRQF_ONESHOT, dev_name(&pdev->dev), master);
+			IRQF_ONESHOT, dev_name(&pdev->dev), ctlr);
 	if (ret)
 		goto err_disable_spiclk;
 
@@ -673,78 +700,90 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
-	master->auto_runtime_pm = true;
-	master->bus_num = pdev->id;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_LSB_FIRST;
-	master->num_chipselect = ROCKCHIP_SPI_MAX_CS_NUM;
-	master->dev.of_node = pdev->dev.of_node;
-	master->bits_per_word_mask = SPI_BPW_MASK(16) | SPI_BPW_MASK(8) | SPI_BPW_MASK(4);
-	master->min_speed_hz = rs->freq / BAUDR_SCKDV_MAX;
-	master->max_speed_hz = min(rs->freq / BAUDR_SCKDV_MIN, MAX_SCLK_OUT);
-
-	master->set_cs = rockchip_spi_set_cs;
-	master->transfer_one = rockchip_spi_transfer_one;
-	master->max_transfer_size = rockchip_spi_max_transfer_size;
-	master->handle_err = rockchip_spi_handle_err;
-	master->flags = SPI_MASTER_GPIO_SS;
-
-	master->dma_tx = dma_request_chan(rs->dev, "tx");
-	if (IS_ERR(master->dma_tx)) {
+	ctlr->auto_runtime_pm = true;
+	ctlr->bus_num = pdev->id;
+	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_LSB_FIRST;
+	if (slave_mode) {
+		ctlr->mode_bits |= SPI_NO_CS;
+		ctlr->slave_abort = rockchip_spi_slave_abort;
+	} else {
+		ctlr->flags = SPI_MASTER_GPIO_SS;
+		ctlr->max_native_cs = ROCKCHIP_SPI_MAX_CS_NUM;
+		/*
+		 * rk spi0 has two native cs, spi1..5 one cs only
+		 * if num-cs is missing in the dts, default to 1
+		 */
+		if (of_property_read_u16(np, "num-cs", &ctlr->num_chipselect))
+			ctlr->num_chipselect = 1;
+		ctlr->use_gpio_descriptors = true;
+	}
+	ctlr->dev.of_node = pdev->dev.of_node;
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(16) | SPI_BPW_MASK(8) | SPI_BPW_MASK(4);
+	ctlr->min_speed_hz = rs->freq / BAUDR_SCKDV_MAX;
+	ctlr->max_speed_hz = min(rs->freq / BAUDR_SCKDV_MIN, MAX_SCLK_OUT);
+
+	ctlr->set_cs = rockchip_spi_set_cs;
+	ctlr->transfer_one = rockchip_spi_transfer_one;
+	ctlr->max_transfer_size = rockchip_spi_max_transfer_size;
+	ctlr->handle_err = rockchip_spi_handle_err;
+
+	ctlr->dma_tx = dma_request_chan(rs->dev, "tx");
+	if (IS_ERR(ctlr->dma_tx)) {
 		/* Check tx to see if we need defer probing driver */
-		if (PTR_ERR(master->dma_tx) == -EPROBE_DEFER) {
+		if (PTR_ERR(ctlr->dma_tx) == -EPROBE_DEFER) {
 			ret = -EPROBE_DEFER;
 			goto err_disable_pm_runtime;
 		}
 		dev_warn(rs->dev, "Failed to request TX DMA channel\n");
-		master->dma_tx = NULL;
+		ctlr->dma_tx = NULL;
 	}
 
-	master->dma_rx = dma_request_chan(rs->dev, "rx");
-	if (IS_ERR(master->dma_rx)) {
-		if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) {
+	ctlr->dma_rx = dma_request_chan(rs->dev, "rx");
+	if (IS_ERR(ctlr->dma_rx)) {
+		if (PTR_ERR(ctlr->dma_rx) == -EPROBE_DEFER) {
 			ret = -EPROBE_DEFER;
 			goto err_free_dma_tx;
 		}
 		dev_warn(rs->dev, "Failed to request RX DMA channel\n");
-		master->dma_rx = NULL;
+		ctlr->dma_rx = NULL;
 	}
 
-	if (master->dma_tx && master->dma_rx) {
+	if (ctlr->dma_tx && ctlr->dma_rx) {
 		rs->dma_addr_tx = mem->start + ROCKCHIP_SPI_TXDR;
 		rs->dma_addr_rx = mem->start + ROCKCHIP_SPI_RXDR;
-		master->can_dma = rockchip_spi_can_dma;
+		ctlr->can_dma = rockchip_spi_can_dma;
 	}
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to register master\n");
+		dev_err(&pdev->dev, "Failed to register controller\n");
 		goto err_free_dma_rx;
 	}
 
 	return 0;
 
 err_free_dma_rx:
-	if (master->dma_rx)
-		dma_release_channel(master->dma_rx);
+	if (ctlr->dma_rx)
+		dma_release_channel(ctlr->dma_rx);
 err_free_dma_tx:
-	if (master->dma_tx)
-		dma_release_channel(master->dma_tx);
+	if (ctlr->dma_tx)
+		dma_release_channel(ctlr->dma_tx);
 err_disable_pm_runtime:
 	pm_runtime_disable(&pdev->dev);
 err_disable_spiclk:
 	clk_disable_unprepare(rs->spiclk);
 err_disable_apbclk:
 	clk_disable_unprepare(rs->apb_pclk);
-err_put_master:
-	spi_master_put(master);
+err_put_ctlr:
+	spi_controller_put(ctlr);
 
 	return ret;
 }
 
 static int rockchip_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = spi_master_get(platform_get_drvdata(pdev));
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = spi_controller_get(platform_get_drvdata(pdev));
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	pm_runtime_get_sync(&pdev->dev);
 
@@ -755,12 +794,12 @@ static int rockchip_spi_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_set_suspended(&pdev->dev);
 
-	if (master->dma_tx)
-		dma_release_channel(master->dma_tx);
-	if (master->dma_rx)
-		dma_release_channel(master->dma_rx);
+	if (ctlr->dma_tx)
+		dma_release_channel(ctlr->dma_tx);
+	if (ctlr->dma_rx)
+		dma_release_channel(ctlr->dma_rx);
 
-	spi_master_put(master);
+	spi_controller_put(ctlr);
 
 	return 0;
 }
@@ -769,9 +808,9 @@ static int rockchip_spi_remove(struct platform_device *pdev)
 static int rockchip_spi_suspend(struct device *dev)
 {
 	int ret;
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
 
-	ret = spi_master_suspend(master);
+	ret = spi_controller_suspend(ctlr);
 	if (ret < 0)
 		return ret;
 
@@ -787,8 +826,8 @@ static int rockchip_spi_suspend(struct device *dev)
 static int rockchip_spi_resume(struct device *dev)
 {
 	int ret;
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	pinctrl_pm_select_default_state(dev);
 
@@ -796,7 +835,7 @@ static int rockchip_spi_resume(struct device *dev)
 	if (ret < 0)
 		return ret;
 
-	ret = spi_master_resume(master);
+	ret = spi_controller_resume(ctlr);
 	if (ret < 0) {
 		clk_disable_unprepare(rs->spiclk);
 		clk_disable_unprepare(rs->apb_pclk);
@@ -809,8 +848,8 @@ static int rockchip_spi_resume(struct device *dev)
 #ifdef CONFIG_PM
 static int rockchip_spi_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	clk_disable_unprepare(rs->spiclk);
 	clk_disable_unprepare(rs->apb_pclk);
@@ -821,8 +860,8 @@ static int rockchip_spi_runtime_suspend(struct device *dev)
 static int rockchip_spi_runtime_resume(struct device *dev)
 {
 	int ret;
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	ret = clk_prepare_enable(rs->apb_pclk);
 	if (ret < 0)
diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 5497eeb3bf3e..ee0f3edf49cd 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -345,6 +345,6 @@ static struct i2c_driver sc18is602_driver = {
 
 module_i2c_driver(sc18is602_driver);
 
-MODULE_DESCRIPTION("SC18IC602/603 SPI Master Driver");
+MODULE_DESCRIPTION("SC18IS602/603 SPI Master Driver");
 MODULE_AUTHOR("Guenter Roeck");
 MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 1c11a00a2c36..b2579af0e3eb 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -1398,7 +1398,7 @@ static int sh_msiof_spi_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(sh_msiof_spi_pm_ops, sh_msiof_spi_suspend,
 			 sh_msiof_spi_resume);
-#define DEV_PM_OPS	&sh_msiof_spi_pm_ops
+#define DEV_PM_OPS	(&sh_msiof_spi_pm_ops)
 #else
 #define DEV_PM_OPS	NULL
 #endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/spi/spi-sprd-adi.c b/drivers/spi/spi-sprd-adi.c
index 87dadb6b8ebf..88e6543648cb 100644
--- a/drivers/spi/spi-sprd-adi.c
+++ b/drivers/spi/spi-sprd-adi.c
@@ -319,7 +319,7 @@ static int sprd_adi_transfer_one(struct spi_controller *ctlr,
 
 static void sprd_adi_set_wdt_rst_mode(struct sprd_adi *sadi)
 {
-#ifdef CONFIG_SPRD_WATCHDOG
+#if IS_ENABLED(CONFIG_SPRD_WATCHDOG)
 	u32 val;
 
 	/* Set default watchdog reboot mode */
diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
index d066f5144c3e..3c44bb2fd9b1 100644
--- a/drivers/spi/spi-stm32-qspi.c
+++ b/drivers/spi/spi-stm32-qspi.c
@@ -16,6 +16,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/sizes.h>
@@ -87,6 +88,7 @@
 #define STM32_BUSY_TIMEOUT_US 100000
 #define STM32_ABT_TIMEOUT_US 100000
 #define STM32_COMP_TIMEOUT_MS 1000
+#define STM32_AUTOSUSPEND_DELAY -1
 
 struct stm32_qspi_flash {
 	struct stm32_qspi *qspi;
@@ -431,10 +433,17 @@ static int stm32_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 	struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
 	int ret;
 
+	ret = pm_runtime_get_sync(qspi->dev);
+	if (ret < 0)
+		return ret;
+
 	mutex_lock(&qspi->lock);
 	ret = stm32_qspi_send(mem, op);
 	mutex_unlock(&qspi->lock);
 
+	pm_runtime_mark_last_busy(qspi->dev);
+	pm_runtime_put_autosuspend(qspi->dev);
+
 	return ret;
 }
 
@@ -444,6 +453,7 @@ static int stm32_qspi_setup(struct spi_device *spi)
 	struct stm32_qspi *qspi = spi_controller_get_devdata(ctrl);
 	struct stm32_qspi_flash *flash;
 	u32 presc;
+	int ret;
 
 	if (ctrl->busy)
 		return -EBUSY;
@@ -451,6 +461,10 @@ static int stm32_qspi_setup(struct spi_device *spi)
 	if (!spi->max_speed_hz)
 		return -EINVAL;
 
+	ret = pm_runtime_get_sync(qspi->dev);
+	if (ret < 0)
+		return ret;
+
 	presc = DIV_ROUND_UP(qspi->clk_rate, spi->max_speed_hz) - 1;
 
 	flash = &qspi->flash[spi->chip_select];
@@ -467,6 +481,9 @@ static int stm32_qspi_setup(struct spi_device *spi)
 	writel_relaxed(qspi->dcr_reg, qspi->io_base + QSPI_DCR);
 	mutex_unlock(&qspi->lock);
 
+	pm_runtime_mark_last_busy(qspi->dev);
+	pm_runtime_put_autosuspend(qspi->dev);
+
 	return 0;
 }
 
@@ -538,10 +555,15 @@ static const struct spi_controller_mem_ops stm32_qspi_mem_ops = {
 
 static void stm32_qspi_release(struct stm32_qspi *qspi)
 {
+	pm_runtime_get_sync(qspi->dev);
 	/* disable qspi */
 	writel_relaxed(0, qspi->io_base + QSPI_CR);
 	stm32_qspi_dma_free(qspi);
 	mutex_destroy(&qspi->lock);
+	pm_runtime_put_noidle(qspi->dev);
+	pm_runtime_disable(qspi->dev);
+	pm_runtime_set_suspended(qspi->dev);
+	pm_runtime_dont_use_autosuspend(qspi->dev);
 	clk_disable_unprepare(qspi->clk);
 }
 
@@ -643,9 +665,20 @@ static int stm32_qspi_probe(struct platform_device *pdev)
 	ctrl->num_chipselect = STM32_QSPI_MAX_NORCHIP;
 	ctrl->dev.of_node = dev->of_node;
 
+	pm_runtime_set_autosuspend_delay(dev, STM32_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+	pm_runtime_get_noresume(dev);
+
 	ret = devm_spi_register_master(dev, ctrl);
-	if (!ret)
-		return 0;
+	if (ret)
+		goto err_qspi_release;
+
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+
+	return 0;
 
 err_qspi_release:
 	stm32_qspi_release(qspi);
@@ -660,14 +693,28 @@ static int stm32_qspi_remove(struct platform_device *pdev)
 	struct stm32_qspi *qspi = platform_get_drvdata(pdev);
 
 	stm32_qspi_release(qspi);
+
 	return 0;
 }
 
-static int __maybe_unused stm32_qspi_suspend(struct device *dev)
+static int __maybe_unused stm32_qspi_runtime_suspend(struct device *dev)
 {
 	struct stm32_qspi *qspi = dev_get_drvdata(dev);
 
 	clk_disable_unprepare(qspi->clk);
+
+	return 0;
+}
+
+static int __maybe_unused stm32_qspi_runtime_resume(struct device *dev)
+{
+	struct stm32_qspi *qspi = dev_get_drvdata(dev);
+
+	return clk_prepare_enable(qspi->clk);
+}
+
+static int __maybe_unused stm32_qspi_suspend(struct device *dev)
+{
 	pinctrl_pm_select_sleep_state(dev);
 
 	return 0;
@@ -683,10 +730,17 @@ static int __maybe_unused stm32_qspi_resume(struct device *dev)
 	writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR);
 	writel_relaxed(qspi->dcr_reg, qspi->io_base + QSPI_DCR);
 
+	pm_runtime_mark_last_busy(qspi->dev);
+	pm_runtime_put_autosuspend(qspi->dev);
+
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(stm32_qspi_pm_ops, stm32_qspi_suspend, stm32_qspi_resume);
+static const struct dev_pm_ops stm32_qspi_pm_ops = {
+	SET_RUNTIME_PM_OPS(stm32_qspi_runtime_suspend,
+			   stm32_qspi_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(stm32_qspi_suspend, stm32_qspi_resume)
+};
 
 static const struct of_device_id stm32_qspi_match[] = {
 	{.compatible = "st,stm32f469-qspi"},
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 44ac6eb3298d..4c643dfc7fbb 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -811,7 +811,9 @@ static irqreturn_t stm32f4_spi_irq_event(int irq, void *dev_id)
 		mask |= STM32F4_SPI_SR_TXE;
 	}
 
-	if (!spi->cur_usedma && spi->cur_comm == SPI_FULL_DUPLEX) {
+	if (!spi->cur_usedma && (spi->cur_comm == SPI_FULL_DUPLEX ||
+				spi->cur_comm == SPI_SIMPLEX_RX ||
+				spi->cur_comm == SPI_3WIRE_RX)) {
 		/* TXE flag is set and is handled when RXNE flag occurs */
 		sr &= ~STM32F4_SPI_SR_TXE;
 		mask |= STM32F4_SPI_SR_RXNE | STM32F4_SPI_SR_OVR;
@@ -850,7 +852,7 @@ static irqreturn_t stm32f4_spi_irq_event(int irq, void *dev_id)
 		stm32f4_spi_read_rx(spi);
 		if (spi->rx_len == 0)
 			end = true;
-		else /* Load data for discontinuous mode */
+		else if (spi->tx_buf)/* Load data for discontinuous mode */
 			stm32f4_spi_write_tx(spi);
 	}
 
@@ -1151,7 +1153,9 @@ static int stm32f4_spi_transfer_one_irq(struct stm32_spi *spi)
 	/* Enable the interrupts relative to the current communication mode */
 	if (spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX) {
 		cr2 |= STM32F4_SPI_CR2_TXEIE;
-	} else if (spi->cur_comm == SPI_FULL_DUPLEX) {
+	} else if (spi->cur_comm == SPI_FULL_DUPLEX ||
+				spi->cur_comm == SPI_SIMPLEX_RX ||
+				spi->cur_comm == SPI_3WIRE_RX) {
 		/* In transmit-only mode, the OVR flag is set in the SR register
 		 * since the received data are never read. Therefore set OVR
 		 * interrupt only when rx buffer is available.
@@ -1462,10 +1466,16 @@ static int stm32f4_spi_set_mode(struct stm32_spi *spi, unsigned int comm_type)
 		stm32_spi_set_bits(spi, STM32F4_SPI_CR1,
 					STM32F4_SPI_CR1_BIDIMODE |
 					STM32F4_SPI_CR1_BIDIOE);
-	} else if (comm_type == SPI_FULL_DUPLEX) {
+	} else if (comm_type == SPI_FULL_DUPLEX ||
+				comm_type == SPI_SIMPLEX_RX) {
 		stm32_spi_clr_bits(spi, STM32F4_SPI_CR1,
 					STM32F4_SPI_CR1_BIDIMODE |
 					STM32F4_SPI_CR1_BIDIOE);
+	} else if (comm_type == SPI_3WIRE_RX) {
+		stm32_spi_set_bits(spi, STM32F4_SPI_CR1,
+					STM32F4_SPI_CR1_BIDIMODE);
+		stm32_spi_clr_bits(spi, STM32F4_SPI_CR1,
+					STM32F4_SPI_CR1_BIDIOE);
 	} else {
 		return -EINVAL;
 	}
@@ -1906,6 +1916,7 @@ static int stm32_spi_probe(struct platform_device *pdev)
 	master->prepare_message = stm32_spi_prepare_msg;
 	master->transfer_one = stm32_spi_transfer_one;
 	master->unprepare_message = stm32_spi_unprepare_msg;
+	master->flags = SPI_MASTER_MUST_TX;
 
 	spi->dma_tx = dma_request_chan(spi->dev, "tx");
 	if (IS_ERR(spi->dma_tx)) {
diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index ec7967be9e2f..ecea15534c42 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -470,6 +470,7 @@ static int sun6i_spi_probe(struct platform_device *pdev)
 
 	master->max_speed_hz = 100 * 1000 * 1000;
 	master->min_speed_hz = 3 * 1000;
+	master->use_gpio_descriptors = true;
 	master->set_cs = sun6i_spi_set_cs;
 	master->transfer_one = sun6i_spi_transfer_one;
 	master->num_chipselect = 4;
diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index 83edabdb41ad..c2c58871a947 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -1398,6 +1398,7 @@ static int tegra_spi_probe(struct platform_device *pdev)
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
+		pm_runtime_put_noidle(&pdev->dev);
 		goto exit_pm_disable;
 	}
 
diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c
index 514429379206..02cf5f463ba6 100644
--- a/drivers/spi/spi-tegra20-sflash.c
+++ b/drivers/spi/spi-tegra20-sflash.c
@@ -491,6 +491,7 @@ static int tegra_sflash_probe(struct platform_device *pdev)
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
+		pm_runtime_put_noidle(&pdev->dev);
 		goto exit_pm_disable;
 	}
 
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index 7f4d932dade7..a07b72e9c344 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -1118,6 +1118,7 @@ static int tegra_slink_probe(struct platform_device *pdev)
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
+		pm_runtime_put_noidle(&pdev->dev);
 		goto exit_pm_disable;
 	}
 	tspi->def_command_reg  = SLINK_M_S;
diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c
index 0fa50979644d..6a9ef8ee3cc9 100644
--- a/drivers/spi/spi-uniphier.c
+++ b/drivers/spi/spi-uniphier.c
@@ -659,8 +659,7 @@ static int uniphier_spi_probe(struct platform_device *pdev)
 	priv->master = master;
 	priv->is_save_param = false;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	priv->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(priv->base)) {
 		ret = PTR_ERR(priv->base);
 		goto out_master_put;
@@ -716,8 +715,10 @@ static int uniphier_spi_probe(struct platform_device *pdev)
 
 	master->dma_tx = dma_request_chan(&pdev->dev, "tx");
 	if (IS_ERR_OR_NULL(master->dma_tx)) {
-		if (PTR_ERR(master->dma_tx) == -EPROBE_DEFER)
+		if (PTR_ERR(master->dma_tx) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
 			goto out_disable_clk;
+		}
 		master->dma_tx = NULL;
 		dma_tx_burst = INT_MAX;
 	} else {
@@ -732,8 +733,10 @@ static int uniphier_spi_probe(struct platform_device *pdev)
 
 	master->dma_rx = dma_request_chan(&pdev->dev, "rx");
 	if (IS_ERR_OR_NULL(master->dma_rx)) {
-		if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER)
+		if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
 			goto out_disable_clk;
+		}
 		master->dma_rx = NULL;
 		dma_rx_burst = INT_MAX;
 	} else {
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index c92c89467e7e..8158e281f354 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1023,7 +1023,8 @@ static int spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 	void *tmp;
 	unsigned int max_tx, max_rx;
 
-	if (ctlr->flags & (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX)) {
+	if ((ctlr->flags & (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX))
+		&& !(msg->spi->mode & SPI_3WIRE)) {
 		max_tx = 0;
 		max_rx = 0;
 
@@ -1075,7 +1076,7 @@ static int spi_transfer_wait(struct spi_controller *ctlr,
 {
 	struct spi_statistics *statm = &ctlr->statistics;
 	struct spi_statistics *stats = &msg->spi->statistics;
-	unsigned long long ms = 1;
+	unsigned long long ms;
 
 	if (spi_controller_is_slave(ctlr)) {
 		if (wait_for_completion_interruptible(&ctlr->xfer_completion)) {
@@ -1160,6 +1161,8 @@ int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer)
 {
 	int delay;
 
+	might_sleep();
+
 	if (!_delay)
 		return -EINVAL;
 
@@ -2111,6 +2114,7 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 			}
 
 			lookup->max_speed_hz = sb->connection_speed;
+			lookup->bits_per_word = sb->data_bit_length;
 
 			if (sb->clock_phase == ACPI_SPI_SECOND_PHASE)
 				lookup->mode |= SPI_CPHA;
@@ -2760,6 +2764,8 @@ void spi_unregister_controller(struct spi_controller *ctlr)
 	struct spi_controller *found;
 	int id = ctlr->bus_num;
 
+	device_for_each_child(&ctlr->dev, NULL, __unregister);
+
 	/* First make sure that this controller was ever added */
 	mutex_lock(&board_lock);
 	found = idr_find(&spi_master_idr, id);
@@ -2772,7 +2778,6 @@ void spi_unregister_controller(struct spi_controller *ctlr)
 	list_del(&ctlr->list);
 	mutex_unlock(&board_lock);
 
-	device_for_each_child(&ctlr->dev, NULL, __unregister);
 	device_unregister(&ctlr->dev);
 	/* free bus id */
 	mutex_lock(&board_lock);
@@ -3853,8 +3858,7 @@ static u8	*buf;
  * is zero for success, else a negative errno status code.
  * This call may only be used from a context that may sleep.
  *
- * Parameters to this routine are always copied using a small buffer;
- * portable code should never use this for more than 32 bytes.
+ * Parameters to this routine are always copied using a small buffer.
  * Performance-sensitive or bulk transfer code should instead use
  * spi_{async,sync}() calls with dma-safe buffers.
  *
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 80dd1025b953..d753df700e9e 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -62,7 +62,8 @@ static DECLARE_BITMAP(minors, N_SPI_MINORS);
 #define SPI_MODE_MASK		(SPI_CPHA | SPI_CPOL | SPI_CS_HIGH \
 				| SPI_LSB_FIRST | SPI_3WIRE | SPI_LOOP \
 				| SPI_NO_CS | SPI_READY | SPI_TX_DUAL \
-				| SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD)
+				| SPI_TX_QUAD | SPI_TX_OCTAL | SPI_RX_DUAL \
+				| SPI_RX_QUAD | SPI_RX_OCTAL)
 
 struct spidev_data {
 	dev_t			devt;
diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c
index 473b465724f1..0755b11348ed 100644
--- a/drivers/staging/android/ion/ion_heap.c
+++ b/drivers/staging/android/ion/ion_heap.c
@@ -99,12 +99,12 @@ int ion_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer,
 
 static int ion_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
 {
-	void *addr = vm_map_ram(pages, num, -1, pgprot);
+	void *addr = vmap(pages, num, VM_MAP, pgprot);
 
 	if (!addr)
 		return -ENOMEM;
 	memset(addr, 0, PAGE_SIZE * num);
-	vm_unmap_ram(addr, num);
+	vunmap(addr);
 
 	return 0;
 }
diff --git a/drivers/staging/comedi/Makefile b/drivers/staging/comedi/Makefile
index 6af5da3b4315..072ed83a5a6a 100644
--- a/drivers/staging/comedi/Makefile
+++ b/drivers/staging/comedi/Makefile
@@ -4,7 +4,6 @@ ccflags-$(CONFIG_COMEDI_DEBUG)		:= -DDEBUG
 comedi-y				:= comedi_fops.o range.o drivers.o \
 					   comedi_buf.o
 comedi-$(CONFIG_PROC_FS)		+= proc.o
-comedi-$(CONFIG_COMPAT)			+= comedi_compat32.o
 
 obj-$(CONFIG_COMEDI_PCI_DRIVERS)	+= comedi_pci.o
 obj-$(CONFIG_COMEDI_PCMCIA_DRIVERS)	+= comedi_pcmcia.o
diff --git a/drivers/staging/comedi/comedi_compat32.c b/drivers/staging/comedi/comedi_compat32.c
deleted file mode 100644
index 36a3564ba1fb..000000000000
--- a/drivers/staging/comedi/comedi_compat32.c
+++ /dev/null
@@ -1,455 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * comedi/comedi_compat32.c
- * 32-bit ioctl compatibility for 64-bit comedi kernel module.
- *
- * Author: Ian Abbott, MEV Ltd. <abbotti@mev.co.uk>
- * Copyright (C) 2007 MEV Ltd. <http://www.mev.co.uk/>
- *
- * COMEDI - Linux Control and Measurement Device Interface
- * Copyright (C) 1997-2007 David A. Schleef <ds@schleef.org>
- */
-
-#include <linux/uaccess.h>
-#include <linux/compat.h>
-#include <linux/fs.h>
-#include "comedi.h"
-#include "comedi_compat32.h"
-
-#define COMEDI32_CHANINFO _IOR(CIO, 3, struct comedi32_chaninfo_struct)
-#define COMEDI32_RANGEINFO _IOR(CIO, 8, struct comedi32_rangeinfo_struct)
-/*
- * N.B. COMEDI32_CMD and COMEDI_CMD ought to use _IOWR, not _IOR.
- * It's too late to change it now, but it only affects the command number.
- */
-#define COMEDI32_CMD _IOR(CIO, 9, struct comedi32_cmd_struct)
-/*
- * N.B. COMEDI32_CMDTEST and COMEDI_CMDTEST ought to use _IOWR, not _IOR.
- * It's too late to change it now, but it only affects the command number.
- */
-#define COMEDI32_CMDTEST _IOR(CIO, 10, struct comedi32_cmd_struct)
-#define COMEDI32_INSNLIST _IOR(CIO, 11, struct comedi32_insnlist_struct)
-#define COMEDI32_INSN _IOR(CIO, 12, struct comedi32_insn_struct)
-
-struct comedi32_chaninfo_struct {
-	unsigned int subdev;
-	compat_uptr_t maxdata_list;	/* 32-bit 'unsigned int *' */
-	compat_uptr_t flaglist;	/* 32-bit 'unsigned int *' */
-	compat_uptr_t rangelist;	/* 32-bit 'unsigned int *' */
-	unsigned int unused[4];
-};
-
-struct comedi32_rangeinfo_struct {
-	unsigned int range_type;
-	compat_uptr_t range_ptr;	/* 32-bit 'void *' */
-};
-
-struct comedi32_cmd_struct {
-	unsigned int subdev;
-	unsigned int flags;
-	unsigned int start_src;
-	unsigned int start_arg;
-	unsigned int scan_begin_src;
-	unsigned int scan_begin_arg;
-	unsigned int convert_src;
-	unsigned int convert_arg;
-	unsigned int scan_end_src;
-	unsigned int scan_end_arg;
-	unsigned int stop_src;
-	unsigned int stop_arg;
-	compat_uptr_t chanlist;	/* 32-bit 'unsigned int *' */
-	unsigned int chanlist_len;
-	compat_uptr_t data;	/* 32-bit 'short *' */
-	unsigned int data_len;
-};
-
-struct comedi32_insn_struct {
-	unsigned int insn;
-	unsigned int n;
-	compat_uptr_t data;	/* 32-bit 'unsigned int *' */
-	unsigned int subdev;
-	unsigned int chanspec;
-	unsigned int unused[3];
-};
-
-struct comedi32_insnlist_struct {
-	unsigned int n_insns;
-	compat_uptr_t insns;	/* 32-bit 'struct comedi_insn *' */
-};
-
-/* Handle translated ioctl. */
-static int translated_ioctl(struct file *file, unsigned int cmd,
-			    unsigned long arg)
-{
-	if (file->f_op->unlocked_ioctl)
-		return file->f_op->unlocked_ioctl(file, cmd, arg);
-
-	return -ENOTTY;
-}
-
-/* Handle 32-bit COMEDI_CHANINFO ioctl. */
-static int compat_chaninfo(struct file *file, unsigned long arg)
-{
-	struct comedi_chaninfo __user *chaninfo;
-	struct comedi32_chaninfo_struct __user *chaninfo32;
-	int err;
-	union {
-		unsigned int uint;
-		compat_uptr_t uptr;
-	} temp;
-
-	chaninfo32 = compat_ptr(arg);
-	chaninfo = compat_alloc_user_space(sizeof(*chaninfo));
-
-	/* Copy chaninfo structure.  Ignore unused members. */
-	if (!access_ok(chaninfo32, sizeof(*chaninfo32)) ||
-	    !access_ok(chaninfo, sizeof(*chaninfo)))
-		return -EFAULT;
-
-	err = 0;
-	err |= __get_user(temp.uint, &chaninfo32->subdev);
-	err |= __put_user(temp.uint, &chaninfo->subdev);
-	err |= __get_user(temp.uptr, &chaninfo32->maxdata_list);
-	err |= __put_user(compat_ptr(temp.uptr), &chaninfo->maxdata_list);
-	err |= __get_user(temp.uptr, &chaninfo32->flaglist);
-	err |= __put_user(compat_ptr(temp.uptr), &chaninfo->flaglist);
-	err |= __get_user(temp.uptr, &chaninfo32->rangelist);
-	err |= __put_user(compat_ptr(temp.uptr), &chaninfo->rangelist);
-	if (err)
-		return -EFAULT;
-
-	return translated_ioctl(file, COMEDI_CHANINFO, (unsigned long)chaninfo);
-}
-
-/* Handle 32-bit COMEDI_RANGEINFO ioctl. */
-static int compat_rangeinfo(struct file *file, unsigned long arg)
-{
-	struct comedi_rangeinfo __user *rangeinfo;
-	struct comedi32_rangeinfo_struct __user *rangeinfo32;
-	int err;
-	union {
-		unsigned int uint;
-		compat_uptr_t uptr;
-	} temp;
-
-	rangeinfo32 = compat_ptr(arg);
-	rangeinfo = compat_alloc_user_space(sizeof(*rangeinfo));
-
-	/* Copy rangeinfo structure. */
-	if (!access_ok(rangeinfo32, sizeof(*rangeinfo32)) ||
-	    !access_ok(rangeinfo, sizeof(*rangeinfo)))
-		return -EFAULT;
-
-	err = 0;
-	err |= __get_user(temp.uint, &rangeinfo32->range_type);
-	err |= __put_user(temp.uint, &rangeinfo->range_type);
-	err |= __get_user(temp.uptr, &rangeinfo32->range_ptr);
-	err |= __put_user(compat_ptr(temp.uptr), &rangeinfo->range_ptr);
-	if (err)
-		return -EFAULT;
-
-	return translated_ioctl(file, COMEDI_RANGEINFO,
-				(unsigned long)rangeinfo);
-}
-
-/* Copy 32-bit cmd structure to native cmd structure. */
-static int get_compat_cmd(struct comedi_cmd __user *cmd,
-			  struct comedi32_cmd_struct __user *cmd32)
-{
-	int err;
-	union {
-		unsigned int uint;
-		compat_uptr_t uptr;
-	} temp;
-
-	/* Copy cmd structure. */
-	if (!access_ok(cmd32, sizeof(*cmd32)) ||
-	    !access_ok(cmd, sizeof(*cmd)))
-		return -EFAULT;
-
-	err = 0;
-	err |= __get_user(temp.uint, &cmd32->subdev);
-	err |= __put_user(temp.uint, &cmd->subdev);
-	err |= __get_user(temp.uint, &cmd32->flags);
-	err |= __put_user(temp.uint, &cmd->flags);
-	err |= __get_user(temp.uint, &cmd32->start_src);
-	err |= __put_user(temp.uint, &cmd->start_src);
-	err |= __get_user(temp.uint, &cmd32->start_arg);
-	err |= __put_user(temp.uint, &cmd->start_arg);
-	err |= __get_user(temp.uint, &cmd32->scan_begin_src);
-	err |= __put_user(temp.uint, &cmd->scan_begin_src);
-	err |= __get_user(temp.uint, &cmd32->scan_begin_arg);
-	err |= __put_user(temp.uint, &cmd->scan_begin_arg);
-	err |= __get_user(temp.uint, &cmd32->convert_src);
-	err |= __put_user(temp.uint, &cmd->convert_src);
-	err |= __get_user(temp.uint, &cmd32->convert_arg);
-	err |= __put_user(temp.uint, &cmd->convert_arg);
-	err |= __get_user(temp.uint, &cmd32->scan_end_src);
-	err |= __put_user(temp.uint, &cmd->scan_end_src);
-	err |= __get_user(temp.uint, &cmd32->scan_end_arg);
-	err |= __put_user(temp.uint, &cmd->scan_end_arg);
-	err |= __get_user(temp.uint, &cmd32->stop_src);
-	err |= __put_user(temp.uint, &cmd->stop_src);
-	err |= __get_user(temp.uint, &cmd32->stop_arg);
-	err |= __put_user(temp.uint, &cmd->stop_arg);
-	err |= __get_user(temp.uptr, &cmd32->chanlist);
-	err |= __put_user((unsigned int __force *)compat_ptr(temp.uptr),
-			  &cmd->chanlist);
-	err |= __get_user(temp.uint, &cmd32->chanlist_len);
-	err |= __put_user(temp.uint, &cmd->chanlist_len);
-	err |= __get_user(temp.uptr, &cmd32->data);
-	err |= __put_user(compat_ptr(temp.uptr), &cmd->data);
-	err |= __get_user(temp.uint, &cmd32->data_len);
-	err |= __put_user(temp.uint, &cmd->data_len);
-	return err ? -EFAULT : 0;
-}
-
-/* Copy native cmd structure to 32-bit cmd structure. */
-static int put_compat_cmd(struct comedi32_cmd_struct __user *cmd32,
-			  struct comedi_cmd __user *cmd)
-{
-	int err;
-	unsigned int temp;
-
-	/*
-	 * Copy back most of cmd structure.
-	 *
-	 * Assume the pointer values are already valid.
-	 * (Could use ptr_to_compat() to set them.)
-	 */
-	if (!access_ok(cmd, sizeof(*cmd)) ||
-	    !access_ok(cmd32, sizeof(*cmd32)))
-		return -EFAULT;
-
-	err = 0;
-	err |= __get_user(temp, &cmd->subdev);
-	err |= __put_user(temp, &cmd32->subdev);
-	err |= __get_user(temp, &cmd->flags);
-	err |= __put_user(temp, &cmd32->flags);
-	err |= __get_user(temp, &cmd->start_src);
-	err |= __put_user(temp, &cmd32->start_src);
-	err |= __get_user(temp, &cmd->start_arg);
-	err |= __put_user(temp, &cmd32->start_arg);
-	err |= __get_user(temp, &cmd->scan_begin_src);
-	err |= __put_user(temp, &cmd32->scan_begin_src);
-	err |= __get_user(temp, &cmd->scan_begin_arg);
-	err |= __put_user(temp, &cmd32->scan_begin_arg);
-	err |= __get_user(temp, &cmd->convert_src);
-	err |= __put_user(temp, &cmd32->convert_src);
-	err |= __get_user(temp, &cmd->convert_arg);
-	err |= __put_user(temp, &cmd32->convert_arg);
-	err |= __get_user(temp, &cmd->scan_end_src);
-	err |= __put_user(temp, &cmd32->scan_end_src);
-	err |= __get_user(temp, &cmd->scan_end_arg);
-	err |= __put_user(temp, &cmd32->scan_end_arg);
-	err |= __get_user(temp, &cmd->stop_src);
-	err |= __put_user(temp, &cmd32->stop_src);
-	err |= __get_user(temp, &cmd->stop_arg);
-	err |= __put_user(temp, &cmd32->stop_arg);
-	/* Assume chanlist pointer is unchanged. */
-	err |= __get_user(temp, &cmd->chanlist_len);
-	err |= __put_user(temp, &cmd32->chanlist_len);
-	/* Assume data pointer is unchanged. */
-	err |= __get_user(temp, &cmd->data_len);
-	err |= __put_user(temp, &cmd32->data_len);
-	return err ? -EFAULT : 0;
-}
-
-/* Handle 32-bit COMEDI_CMD ioctl. */
-static int compat_cmd(struct file *file, unsigned long arg)
-{
-	struct comedi_cmd __user *cmd;
-	struct comedi32_cmd_struct __user *cmd32;
-	int rc, err;
-
-	cmd32 = compat_ptr(arg);
-	cmd = compat_alloc_user_space(sizeof(*cmd));
-
-	rc = get_compat_cmd(cmd, cmd32);
-	if (rc)
-		return rc;
-
-	rc = translated_ioctl(file, COMEDI_CMD, (unsigned long)cmd);
-	if (rc == -EAGAIN) {
-		/* Special case: copy cmd back to user. */
-		err = put_compat_cmd(cmd32, cmd);
-		if (err)
-			rc = err;
-	}
-
-	return rc;
-}
-
-/* Handle 32-bit COMEDI_CMDTEST ioctl. */
-static int compat_cmdtest(struct file *file, unsigned long arg)
-{
-	struct comedi_cmd __user *cmd;
-	struct comedi32_cmd_struct __user *cmd32;
-	int rc, err;
-
-	cmd32 = compat_ptr(arg);
-	cmd = compat_alloc_user_space(sizeof(*cmd));
-
-	rc = get_compat_cmd(cmd, cmd32);
-	if (rc)
-		return rc;
-
-	rc = translated_ioctl(file, COMEDI_CMDTEST, (unsigned long)cmd);
-	if (rc < 0)
-		return rc;
-
-	err = put_compat_cmd(cmd32, cmd);
-	if (err)
-		rc = err;
-
-	return rc;
-}
-
-/* Copy 32-bit insn structure to native insn structure. */
-static int get_compat_insn(struct comedi_insn __user *insn,
-			   struct comedi32_insn_struct __user *insn32)
-{
-	int err;
-	union {
-		unsigned int uint;
-		compat_uptr_t uptr;
-	} temp;
-
-	/* Copy insn structure.  Ignore the unused members. */
-	err = 0;
-	if (!access_ok(insn32, sizeof(*insn32)) ||
-	    !access_ok(insn, sizeof(*insn)))
-		return -EFAULT;
-
-	err |= __get_user(temp.uint, &insn32->insn);
-	err |= __put_user(temp.uint, &insn->insn);
-	err |= __get_user(temp.uint, &insn32->n);
-	err |= __put_user(temp.uint, &insn->n);
-	err |= __get_user(temp.uptr, &insn32->data);
-	err |= __put_user(compat_ptr(temp.uptr), &insn->data);
-	err |= __get_user(temp.uint, &insn32->subdev);
-	err |= __put_user(temp.uint, &insn->subdev);
-	err |= __get_user(temp.uint, &insn32->chanspec);
-	err |= __put_user(temp.uint, &insn->chanspec);
-	return err ? -EFAULT : 0;
-}
-
-/* Handle 32-bit COMEDI_INSNLIST ioctl. */
-static int compat_insnlist(struct file *file, unsigned long arg)
-{
-	struct combined_insnlist {
-		struct comedi_insnlist insnlist;
-		struct comedi_insn insn[1];
-	} __user *s;
-	struct comedi32_insnlist_struct __user *insnlist32;
-	struct comedi32_insn_struct __user *insn32;
-	compat_uptr_t uptr;
-	unsigned int n_insns, n;
-	int err, rc;
-
-	insnlist32 = compat_ptr(arg);
-
-	/* Get 32-bit insnlist structure.  */
-	if (!access_ok(insnlist32, sizeof(*insnlist32)))
-		return -EFAULT;
-
-	err = 0;
-	err |= __get_user(n_insns, &insnlist32->n_insns);
-	err |= __get_user(uptr, &insnlist32->insns);
-	insn32 = compat_ptr(uptr);
-	if (err)
-		return -EFAULT;
-
-	/* Allocate user memory to copy insnlist and insns into. */
-	s = compat_alloc_user_space(offsetof(struct combined_insnlist,
-					     insn[n_insns]));
-
-	/* Set native insnlist structure. */
-	if (!access_ok(&s->insnlist, sizeof(s->insnlist)))
-		return -EFAULT;
-
-	err |= __put_user(n_insns, &s->insnlist.n_insns);
-	err |= __put_user(&s->insn[0], &s->insnlist.insns);
-	if (err)
-		return -EFAULT;
-
-	/* Copy insn structures. */
-	for (n = 0; n < n_insns; n++) {
-		rc = get_compat_insn(&s->insn[n], &insn32[n]);
-		if (rc)
-			return rc;
-	}
-
-	return translated_ioctl(file, COMEDI_INSNLIST,
-				(unsigned long)&s->insnlist);
-}
-
-/* Handle 32-bit COMEDI_INSN ioctl. */
-static int compat_insn(struct file *file, unsigned long arg)
-{
-	struct comedi_insn __user *insn;
-	struct comedi32_insn_struct __user *insn32;
-	int rc;
-
-	insn32 = compat_ptr(arg);
-	insn = compat_alloc_user_space(sizeof(*insn));
-
-	rc = get_compat_insn(insn, insn32);
-	if (rc)
-		return rc;
-
-	return translated_ioctl(file, COMEDI_INSN, (unsigned long)insn);
-}
-
-/*
- * compat_ioctl file operation.
- *
- * Returns -ENOIOCTLCMD for unrecognised ioctl codes.
- */
-long comedi_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	int rc;
-
-	switch (cmd) {
-	case COMEDI_DEVCONFIG:
-	case COMEDI_DEVINFO:
-	case COMEDI_SUBDINFO:
-	case COMEDI_BUFCONFIG:
-	case COMEDI_BUFINFO:
-		/* Just need to translate the pointer argument. */
-		arg = (unsigned long)compat_ptr(arg);
-		rc = translated_ioctl(file, cmd, arg);
-		break;
-	case COMEDI_LOCK:
-	case COMEDI_UNLOCK:
-	case COMEDI_CANCEL:
-	case COMEDI_POLL:
-	case COMEDI_SETRSUBD:
-	case COMEDI_SETWSUBD:
-		/* No translation needed. */
-		rc = translated_ioctl(file, cmd, arg);
-		break;
-	case COMEDI32_CHANINFO:
-		rc = compat_chaninfo(file, arg);
-		break;
-	case COMEDI32_RANGEINFO:
-		rc = compat_rangeinfo(file, arg);
-		break;
-	case COMEDI32_CMD:
-		rc = compat_cmd(file, arg);
-		break;
-	case COMEDI32_CMDTEST:
-		rc = compat_cmdtest(file, arg);
-		break;
-	case COMEDI32_INSNLIST:
-		rc = compat_insnlist(file, arg);
-		break;
-	case COMEDI32_INSN:
-		rc = compat_insn(file, arg);
-		break;
-	default:
-		rc = -ENOIOCTLCMD;
-		break;
-	}
-	return rc;
-}
diff --git a/drivers/staging/comedi/comedi_compat32.h b/drivers/staging/comedi/comedi_compat32.h
deleted file mode 100644
index dc3e2a9442c7..000000000000
--- a/drivers/staging/comedi/comedi_compat32.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * comedi/comedi_compat32.h
- * 32-bit ioctl compatibility for 64-bit comedi kernel module.
- *
- * Author: Ian Abbott, MEV Ltd. <abbotti@mev.co.uk>
- * Copyright (C) 2007 MEV Ltd. <http://www.mev.co.uk/>
- *
- * COMEDI - Linux Control and Measurement Device Interface
- * Copyright (C) 1997-2007 David A. Schleef <ds@schleef.org>
- */
-
-#ifndef _COMEDI_COMPAT32_H
-#define _COMEDI_COMPAT32_H
-
-#ifdef CONFIG_COMPAT
-
-struct file;
-long comedi_compat_ioctl(struct file *file, unsigned int cmd,
-			 unsigned long arg);
-
-#else /* CONFIG_COMPAT */
-
-#define comedi_compat_ioctl	NULL
-
-#endif /* CONFIG_COMPAT */
-
-#endif /* _COMEDI_COMPAT32_H */
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index e84b4fb493d6..a56c8f74a27b 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -4,13 +4,14 @@
  * comedi kernel module
  *
  * COMEDI - Linux Control and Measurement Device Interface
- * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
+ * Copyright (C) 1997-2007 David A. Schleef <ds@schleef.org>
+ * compat ioctls:
+ * Author: Ian Abbott, MEV Ltd. <abbotti@mev.co.uk>
+ * Copyright (C) 2007 MEV Ltd. <http://www.mev.co.uk/>
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include "comedi_compat32.h"
-
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -27,6 +28,7 @@
 
 #include <linux/io.h>
 #include <linux/uaccess.h>
+#include <linux/compat.h>
 
 #include "comedi_internal.h"
 
@@ -1047,31 +1049,28 @@ static int do_subdinfo_ioctl(struct comedi_device *dev,
  *	array of range table lengths to chaninfo->range_table_list if requested
  */
 static int do_chaninfo_ioctl(struct comedi_device *dev,
-			     struct comedi_chaninfo __user *arg)
+			     struct comedi_chaninfo *it)
 {
 	struct comedi_subdevice *s;
-	struct comedi_chaninfo it;
 
 	lockdep_assert_held(&dev->mutex);
-	if (copy_from_user(&it, arg, sizeof(it)))
-		return -EFAULT;
 
-	if (it.subdev >= dev->n_subdevices)
+	if (it->subdev >= dev->n_subdevices)
 		return -EINVAL;
-	s = &dev->subdevices[it.subdev];
+	s = &dev->subdevices[it->subdev];
 
-	if (it.maxdata_list) {
+	if (it->maxdata_list) {
 		if (s->maxdata || !s->maxdata_list)
 			return -EINVAL;
-		if (copy_to_user(it.maxdata_list, s->maxdata_list,
+		if (copy_to_user(it->maxdata_list, s->maxdata_list,
 				 s->n_chan * sizeof(unsigned int)))
 			return -EFAULT;
 	}
 
-	if (it.flaglist)
+	if (it->flaglist)
 		return -EINVAL;	/* flaglist not supported */
 
-	if (it.rangelist) {
+	if (it->rangelist) {
 		int i;
 
 		if (!s->range_table_list)
@@ -1079,9 +1078,9 @@ static int do_chaninfo_ioctl(struct comedi_device *dev,
 		for (i = 0; i < s->n_chan; i++) {
 			int x;
 
-			x = (dev->minor << 28) | (it.subdev << 24) | (i << 16) |
+			x = (dev->minor << 28) | (it->subdev << 24) | (i << 16) |
 			    (s->range_table_list[i]->length);
-			if (put_user(x, it.rangelist + i))
+			if (put_user(x, it->rangelist + i))
 				return -EFAULT;
 		}
 	}
@@ -1521,34 +1520,19 @@ out:
 #define MIN_SAMPLES 16
 #define MAX_SAMPLES 65536
 static int do_insnlist_ioctl(struct comedi_device *dev,
-			     struct comedi_insnlist __user *arg, void *file)
+			     struct comedi_insn *insns,
+			     unsigned int n_insns,
+			     void *file)
 {
-	struct comedi_insnlist insnlist;
-	struct comedi_insn *insns = NULL;
 	unsigned int *data = NULL;
 	unsigned int max_n_data_required = MIN_SAMPLES;
 	int i = 0;
 	int ret = 0;
 
 	lockdep_assert_held(&dev->mutex);
-	if (copy_from_user(&insnlist, arg, sizeof(insnlist)))
-		return -EFAULT;
-
-	insns = kcalloc(insnlist.n_insns, sizeof(*insns), GFP_KERNEL);
-	if (!insns) {
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	if (copy_from_user(insns, insnlist.insns,
-			   sizeof(*insns) * insnlist.n_insns)) {
-		dev_dbg(dev->class_dev, "copy_from_user failed\n");
-		ret = -EFAULT;
-		goto error;
-	}
 
 	/* Determine maximum memory needed for all instructions. */
-	for (i = 0; i < insnlist.n_insns; ++i) {
+	for (i = 0; i < n_insns; ++i) {
 		if (insns[i].n > MAX_SAMPLES) {
 			dev_dbg(dev->class_dev,
 				"number of samples too large\n");
@@ -1566,7 +1550,7 @@ static int do_insnlist_ioctl(struct comedi_device *dev,
 		goto error;
 	}
 
-	for (i = 0; i < insnlist.n_insns; ++i) {
+	for (i = 0; i < n_insns; ++i) {
 		if (insns[i].insn & INSN_MASK_WRITE) {
 			if (copy_from_user(data, insns[i].data,
 					   insns[i].n * sizeof(unsigned int))) {
@@ -1593,7 +1577,6 @@ static int do_insnlist_ioctl(struct comedi_device *dev,
 	}
 
 error:
-	kfree(insns);
 	kfree(data);
 
 	if (ret < 0)
@@ -1616,22 +1599,19 @@ error:
  *	data (for reads) to insn->data pointer
  */
 static int do_insn_ioctl(struct comedi_device *dev,
-			 struct comedi_insn __user *arg, void *file)
+			 struct comedi_insn *insn, void *file)
 {
-	struct comedi_insn insn;
 	unsigned int *data = NULL;
 	unsigned int n_data = MIN_SAMPLES;
 	int ret = 0;
 
 	lockdep_assert_held(&dev->mutex);
-	if (copy_from_user(&insn, arg, sizeof(insn)))
-		return -EFAULT;
 
-	n_data = max(n_data, insn.n);
+	n_data = max(n_data, insn->n);
 
 	/* This is where the behavior of insn and insnlist deviate. */
-	if (insn.n > MAX_SAMPLES) {
-		insn.n = MAX_SAMPLES;
+	if (insn->n > MAX_SAMPLES) {
+		insn->n = MAX_SAMPLES;
 		n_data = MAX_SAMPLES;
 	}
 
@@ -1641,26 +1621,26 @@ static int do_insn_ioctl(struct comedi_device *dev,
 		goto error;
 	}
 
-	if (insn.insn & INSN_MASK_WRITE) {
+	if (insn->insn & INSN_MASK_WRITE) {
 		if (copy_from_user(data,
-				   insn.data,
-				   insn.n * sizeof(unsigned int))) {
+				   insn->data,
+				   insn->n * sizeof(unsigned int))) {
 			ret = -EFAULT;
 			goto error;
 		}
 	}
-	ret = parse_insn(dev, &insn, data, file);
+	ret = parse_insn(dev, insn, data, file);
 	if (ret < 0)
 		goto error;
-	if (insn.insn & INSN_MASK_READ) {
-		if (copy_to_user(insn.data,
+	if (insn->insn & INSN_MASK_READ) {
+		if (copy_to_user(insn->data,
 				 data,
-				 insn.n * sizeof(unsigned int))) {
+				 insn->n * sizeof(unsigned int))) {
 			ret = -EFAULT;
 			goto error;
 		}
 	}
-	ret = insn.n;
+	ret = insn->n;
 
 error:
 	kfree(data);
@@ -1669,17 +1649,11 @@ error:
 }
 
 static int __comedi_get_user_cmd(struct comedi_device *dev,
-				 struct comedi_cmd __user *arg,
 				 struct comedi_cmd *cmd)
 {
 	struct comedi_subdevice *s;
 
 	lockdep_assert_held(&dev->mutex);
-	if (copy_from_user(cmd, arg, sizeof(*cmd))) {
-		dev_dbg(dev->class_dev, "bad cmd address\n");
-		return -EFAULT;
-	}
-
 	if (cmd->subdev >= dev->n_subdevices) {
 		dev_dbg(dev->class_dev, "%d no such subdevice\n", cmd->subdev);
 		return -ENODEV;
@@ -1767,9 +1741,8 @@ static int __comedi_get_user_chanlist(struct comedi_device *dev,
  *	possibly modified comedi_cmd structure (when -EAGAIN returned)
  */
 static int do_cmd_ioctl(struct comedi_device *dev,
-			struct comedi_cmd __user *arg, void *file)
+			struct comedi_cmd *cmd, bool *copy, void *file)
 {
-	struct comedi_cmd cmd;
 	struct comedi_subdevice *s;
 	struct comedi_async *async;
 	unsigned int __user *user_chanlist;
@@ -1777,15 +1750,15 @@ static int do_cmd_ioctl(struct comedi_device *dev,
 
 	lockdep_assert_held(&dev->mutex);
 
-	/* get the user's cmd and do some simple validation */
-	ret = __comedi_get_user_cmd(dev, arg, &cmd);
+	/* do some simple cmd validation */
+	ret = __comedi_get_user_cmd(dev, cmd);
 	if (ret)
 		return ret;
 
 	/* save user's chanlist pointer so it can be restored later */
-	user_chanlist = (unsigned int __user *)cmd.chanlist;
+	user_chanlist = (unsigned int __user *)cmd->chanlist;
 
-	s = &dev->subdevices[cmd.subdev];
+	s = &dev->subdevices[cmd->subdev];
 	async = s->async;
 
 	/* are we locked? (ioctl lock) */
@@ -1801,13 +1774,13 @@ static int do_cmd_ioctl(struct comedi_device *dev,
 	}
 
 	/* make sure channel/gain list isn't too short */
-	if (cmd.chanlist_len < 1) {
+	if (cmd->chanlist_len < 1) {
 		dev_dbg(dev->class_dev, "channel/gain list too short %u < 1\n",
-			cmd.chanlist_len);
+			cmd->chanlist_len);
 		return -EINVAL;
 	}
 
-	async->cmd = cmd;
+	async->cmd = *cmd;
 	async->cmd.data = NULL;
 
 	/* load channel/gain list */
@@ -1819,15 +1792,11 @@ static int do_cmd_ioctl(struct comedi_device *dev,
 
 	if (async->cmd.flags & CMDF_BOGUS || ret) {
 		dev_dbg(dev->class_dev, "test returned %d\n", ret);
-		cmd = async->cmd;
+		*cmd = async->cmd;
 		/* restore chanlist pointer before copying back */
-		cmd.chanlist = (unsigned int __force *)user_chanlist;
-		cmd.data = NULL;
-		if (copy_to_user(arg, &cmd, sizeof(cmd))) {
-			dev_dbg(dev->class_dev, "fault writing cmd\n");
-			ret = -EFAULT;
-			goto cleanup;
-		}
+		cmd->chanlist = (unsigned int __force *)user_chanlist;
+		cmd->data = NULL;
+		*copy = true;
 		ret = -EAGAIN;
 		goto cleanup;
 	}
@@ -1877,44 +1846,39 @@ cleanup:
  *	possibly modified comedi_cmd structure
  */
 static int do_cmdtest_ioctl(struct comedi_device *dev,
-			    struct comedi_cmd __user *arg, void *file)
+			    struct comedi_cmd *cmd, bool *copy, void *file)
 {
-	struct comedi_cmd cmd;
 	struct comedi_subdevice *s;
 	unsigned int __user *user_chanlist;
 	int ret;
 
 	lockdep_assert_held(&dev->mutex);
 
-	/* get the user's cmd and do some simple validation */
-	ret = __comedi_get_user_cmd(dev, arg, &cmd);
+	/* do some simple cmd validation */
+	ret = __comedi_get_user_cmd(dev, cmd);
 	if (ret)
 		return ret;
 
 	/* save user's chanlist pointer so it can be restored later */
-	user_chanlist = (unsigned int __user *)cmd.chanlist;
+	user_chanlist = (unsigned int __user *)cmd->chanlist;
 
-	s = &dev->subdevices[cmd.subdev];
+	s = &dev->subdevices[cmd->subdev];
 
 	/* user_chanlist can be NULL for COMEDI_CMDTEST ioctl */
 	if (user_chanlist) {
 		/* load channel/gain list */
-		ret = __comedi_get_user_chanlist(dev, s, user_chanlist, &cmd);
+		ret = __comedi_get_user_chanlist(dev, s, user_chanlist, cmd);
 		if (ret)
 			return ret;
 	}
 
-	ret = s->do_cmdtest(dev, s, &cmd);
+	ret = s->do_cmdtest(dev, s, cmd);
 
-	kfree(cmd.chanlist);	/* free kernel copy of user chanlist */
+	kfree(cmd->chanlist);	/* free kernel copy of user chanlist */
 
 	/* restore chanlist pointer before copying back */
-	cmd.chanlist = (unsigned int __force *)user_chanlist;
-
-	if (copy_to_user(arg, &cmd, sizeof(cmd))) {
-		dev_dbg(dev->class_dev, "bad cmd address\n");
-		ret = -EFAULT;
-	}
+	cmd->chanlist = (unsigned int __force *)user_chanlist;
+	*copy = true;
 
 	return ret;
 }
@@ -2203,12 +2167,22 @@ static long comedi_unlocked_ioctl(struct file *file, unsigned int cmd,
 				       (struct comedi_subdinfo __user *)arg,
 				       file);
 		break;
-	case COMEDI_CHANINFO:
-		rc = do_chaninfo_ioctl(dev, (void __user *)arg);
+	case COMEDI_CHANINFO: {
+		struct comedi_chaninfo it;
+		if (copy_from_user(&it, (void __user *)arg, sizeof(it)))
+			rc = -EFAULT;
+		else
+			rc = do_chaninfo_ioctl(dev, &it);
 		break;
-	case COMEDI_RANGEINFO:
-		rc = do_rangeinfo_ioctl(dev, (void __user *)arg);
+	}
+	case COMEDI_RANGEINFO: {
+		struct comedi_rangeinfo it;
+		if (copy_from_user(&it, (void __user *)arg, sizeof(it)))
+			rc = -EFAULT;
+		else
+			rc = do_rangeinfo_ioctl(dev, &it);
 		break;
+	}
 	case COMEDI_BUFINFO:
 		rc = do_bufinfo_ioctl(dev,
 				      (struct comedi_bufinfo __user *)arg,
@@ -2223,22 +2197,64 @@ static long comedi_unlocked_ioctl(struct file *file, unsigned int cmd,
 	case COMEDI_CANCEL:
 		rc = do_cancel_ioctl(dev, arg, file);
 		break;
-	case COMEDI_CMD:
-		rc = do_cmd_ioctl(dev, (struct comedi_cmd __user *)arg, file);
+	case COMEDI_CMD: {
+		struct comedi_cmd cmd;
+		bool copy = false;
+
+		if (copy_from_user(&cmd, (void __user *)arg, sizeof(cmd))) {
+			rc = -EFAULT;
+			break;
+		}
+		rc = do_cmd_ioctl(dev, &cmd, &copy, file);
+		if (copy && copy_to_user((void __user *)arg, &cmd, sizeof(cmd)))
+			rc = -EFAULT;
 		break;
-	case COMEDI_CMDTEST:
-		rc = do_cmdtest_ioctl(dev, (struct comedi_cmd __user *)arg,
-				      file);
+	}
+	case COMEDI_CMDTEST: {
+		struct comedi_cmd cmd;
+		bool copy = false;
+
+		if (copy_from_user(&cmd, (void __user *)arg, sizeof(cmd))) {
+			rc = -EFAULT;
+			break;
+		}
+		rc = do_cmdtest_ioctl(dev, &cmd, &copy, file);
+		if (copy && copy_to_user((void __user *)arg, &cmd, sizeof(cmd)))
+			rc = -EFAULT;
 		break;
-	case COMEDI_INSNLIST:
-		rc = do_insnlist_ioctl(dev,
-				       (struct comedi_insnlist __user *)arg,
-				       file);
+	}
+	case COMEDI_INSNLIST: {
+		struct comedi_insnlist insnlist;
+		struct comedi_insn *insns = NULL;
+
+		if (copy_from_user(&insnlist, (void __user *)arg,
+				   sizeof(insnlist))) {
+			rc = -EFAULT;
+			break;
+		}
+		insns = kcalloc(insnlist.n_insns, sizeof(*insns), GFP_KERNEL);
+		if (!insns) {
+			rc = -ENOMEM;
+			break;
+		}
+		if (copy_from_user(insns, insnlist.insns,
+				   sizeof(*insns) * insnlist.n_insns)) {
+			rc = -EFAULT;
+			kfree(insns);
+			break;
+		}
+		rc = do_insnlist_ioctl(dev, insns, insnlist.n_insns, file);
+		kfree(insns);
 		break;
-	case COMEDI_INSN:
-		rc = do_insn_ioctl(dev, (struct comedi_insn __user *)arg,
-				   file);
+	}
+	case COMEDI_INSN: {
+		struct comedi_insn insn;
+		if (copy_from_user(&insn, (void __user *)arg, sizeof(insn)))
+			rc = -EFAULT;
+		else
+			rc = do_insn_ioctl(dev, &insn, file);
 		break;
+	}
 	case COMEDI_POLL:
 		rc = do_poll_ioctl(dev, arg, file);
 		break;
@@ -2808,6 +2824,344 @@ static int comedi_close(struct inode *inode, struct file *file)
 	return 0;
 }
 
+#ifdef CONFIG_COMPAT
+
+#define COMEDI32_CHANINFO _IOR(CIO, 3, struct comedi32_chaninfo_struct)
+#define COMEDI32_RANGEINFO _IOR(CIO, 8, struct comedi32_rangeinfo_struct)
+/*
+ * N.B. COMEDI32_CMD and COMEDI_CMD ought to use _IOWR, not _IOR.
+ * It's too late to change it now, but it only affects the command number.
+ */
+#define COMEDI32_CMD _IOR(CIO, 9, struct comedi32_cmd_struct)
+/*
+ * N.B. COMEDI32_CMDTEST and COMEDI_CMDTEST ought to use _IOWR, not _IOR.
+ * It's too late to change it now, but it only affects the command number.
+ */
+#define COMEDI32_CMDTEST _IOR(CIO, 10, struct comedi32_cmd_struct)
+#define COMEDI32_INSNLIST _IOR(CIO, 11, struct comedi32_insnlist_struct)
+#define COMEDI32_INSN _IOR(CIO, 12, struct comedi32_insn_struct)
+
+struct comedi32_chaninfo_struct {
+	unsigned int subdev;
+	compat_uptr_t maxdata_list;	/* 32-bit 'unsigned int *' */
+	compat_uptr_t flaglist;	/* 32-bit 'unsigned int *' */
+	compat_uptr_t rangelist;	/* 32-bit 'unsigned int *' */
+	unsigned int unused[4];
+};
+
+struct comedi32_rangeinfo_struct {
+	unsigned int range_type;
+	compat_uptr_t range_ptr;	/* 32-bit 'void *' */
+};
+
+struct comedi32_cmd_struct {
+	unsigned int subdev;
+	unsigned int flags;
+	unsigned int start_src;
+	unsigned int start_arg;
+	unsigned int scan_begin_src;
+	unsigned int scan_begin_arg;
+	unsigned int convert_src;
+	unsigned int convert_arg;
+	unsigned int scan_end_src;
+	unsigned int scan_end_arg;
+	unsigned int stop_src;
+	unsigned int stop_arg;
+	compat_uptr_t chanlist;	/* 32-bit 'unsigned int *' */
+	unsigned int chanlist_len;
+	compat_uptr_t data;	/* 32-bit 'short *' */
+	unsigned int data_len;
+};
+
+struct comedi32_insn_struct {
+	unsigned int insn;
+	unsigned int n;
+	compat_uptr_t data;	/* 32-bit 'unsigned int *' */
+	unsigned int subdev;
+	unsigned int chanspec;
+	unsigned int unused[3];
+};
+
+struct comedi32_insnlist_struct {
+	unsigned int n_insns;
+	compat_uptr_t insns;	/* 32-bit 'struct comedi_insn *' */
+};
+
+/* Handle 32-bit COMEDI_CHANINFO ioctl. */
+static int compat_chaninfo(struct file *file, unsigned long arg)
+{
+	struct comedi_file *cfp = file->private_data;
+	struct comedi_device *dev = cfp->dev;
+	struct comedi32_chaninfo_struct chaninfo32;
+	struct comedi_chaninfo chaninfo;
+	int err;
+
+	if (copy_from_user(&chaninfo32, compat_ptr(arg), sizeof(chaninfo32)))
+		return -EFAULT;
+
+	memset(&chaninfo, 0, sizeof(chaninfo));
+	chaninfo.subdev = chaninfo32.subdev;
+	chaninfo.maxdata_list = compat_ptr(chaninfo32.maxdata_list);
+	chaninfo.flaglist = compat_ptr(chaninfo32.flaglist);
+	chaninfo.rangelist = compat_ptr(chaninfo32.rangelist);
+
+	mutex_lock(&dev->mutex);
+	err = do_chaninfo_ioctl(dev, &chaninfo);
+	mutex_unlock(&dev->mutex);
+	return err;
+}
+
+/* Handle 32-bit COMEDI_RANGEINFO ioctl. */
+static int compat_rangeinfo(struct file *file, unsigned long arg)
+{
+	struct comedi_file *cfp = file->private_data;
+	struct comedi_device *dev = cfp->dev;
+	struct comedi32_rangeinfo_struct rangeinfo32;
+	struct comedi_rangeinfo rangeinfo;
+	int err;
+
+	if (copy_from_user(&rangeinfo32, compat_ptr(arg), sizeof(rangeinfo32)))
+		return -EFAULT;
+	memset(&rangeinfo, 0, sizeof(rangeinfo));
+	rangeinfo.range_type = rangeinfo32.range_type;
+	rangeinfo.range_ptr = compat_ptr(rangeinfo32.range_ptr);
+
+	mutex_lock(&dev->mutex);
+	err = do_rangeinfo_ioctl(dev, &rangeinfo);
+	mutex_unlock(&dev->mutex);
+	return err;
+}
+
+/* Copy 32-bit cmd structure to native cmd structure. */
+static int get_compat_cmd(struct comedi_cmd *cmd,
+			  struct comedi32_cmd_struct __user *cmd32)
+{
+	struct comedi32_cmd_struct v32;
+
+	if (copy_from_user(&v32, cmd32, sizeof(v32)))
+		return -EFAULT;
+
+	cmd->subdev = v32.subdev;
+	cmd->flags = v32.flags;
+	cmd->start_src = v32.start_src;
+	cmd->start_arg = v32.start_arg;
+	cmd->scan_begin_src = v32.scan_begin_src;
+	cmd->scan_begin_arg = v32.scan_begin_arg;
+	cmd->convert_src = v32.convert_src;
+	cmd->convert_arg = v32.convert_arg;
+	cmd->scan_end_src = v32.scan_end_src;
+	cmd->scan_end_arg = v32.scan_end_arg;
+	cmd->stop_src = v32.stop_src;
+	cmd->stop_arg = v32.stop_arg;
+	cmd->chanlist = compat_ptr(v32.chanlist);
+	cmd->chanlist_len = v32.chanlist_len;
+	cmd->data = compat_ptr(v32.data);
+	cmd->data_len = v32.data_len;
+	return 0;
+}
+
+/* Copy native cmd structure to 32-bit cmd structure. */
+static int put_compat_cmd(struct comedi32_cmd_struct __user *cmd32,
+			  struct comedi_cmd *cmd)
+{
+	struct comedi32_cmd_struct v32;
+
+	memset(&v32, 0, sizeof(v32));
+	v32.subdev = cmd->subdev;
+	v32.flags = cmd->flags;
+	v32.start_src = cmd->start_src;
+	v32.start_arg = cmd->start_arg;
+	v32.scan_begin_src = cmd->scan_begin_src;
+	v32.scan_begin_arg = cmd->scan_begin_arg;
+	v32.convert_src = cmd->convert_src;
+	v32.convert_arg = cmd->convert_arg;
+	v32.scan_end_src = cmd->scan_end_src;
+	v32.scan_end_arg = cmd->scan_end_arg;
+	v32.stop_src = cmd->stop_src;
+	v32.stop_arg = cmd->stop_arg;
+	/* Assume chanlist pointer is unchanged. */
+	v32.chanlist = ptr_to_compat(cmd->chanlist);
+	v32.chanlist_len = cmd->chanlist_len;
+	v32.data = ptr_to_compat(cmd->data);
+	v32.data_len = cmd->data_len;
+	return copy_to_user(cmd32, &v32, sizeof(v32));
+}
+
+/* Handle 32-bit COMEDI_CMD ioctl. */
+static int compat_cmd(struct file *file, unsigned long arg)
+{
+	struct comedi_file *cfp = file->private_data;
+	struct comedi_device *dev = cfp->dev;
+	struct comedi_cmd cmd;
+	bool copy = false;
+	int rc, err;
+
+	rc = get_compat_cmd(&cmd, compat_ptr(arg));
+	if (rc)
+		return rc;
+
+	mutex_lock(&dev->mutex);
+	rc = do_cmd_ioctl(dev, &cmd, &copy, file);
+	mutex_unlock(&dev->mutex);
+	if (copy) {
+		/* Special case: copy cmd back to user. */
+		err = put_compat_cmd(compat_ptr(arg), &cmd);
+		if (err)
+			rc = err;
+	}
+	return rc;
+}
+
+/* Handle 32-bit COMEDI_CMDTEST ioctl. */
+static int compat_cmdtest(struct file *file, unsigned long arg)
+{
+	struct comedi_file *cfp = file->private_data;
+	struct comedi_device *dev = cfp->dev;
+	struct comedi_cmd cmd;
+	bool copy = false;
+	int rc, err;
+
+	rc = get_compat_cmd(&cmd, compat_ptr(arg));
+	if (rc)
+		return rc;
+
+	mutex_lock(&dev->mutex);
+	rc = do_cmdtest_ioctl(dev, &cmd, &copy, file);
+	mutex_unlock(&dev->mutex);
+	if (copy) {
+		err = put_compat_cmd(compat_ptr(arg), &cmd);
+		if (err)
+			rc = err;
+	}
+	return rc;
+}
+
+/* Copy 32-bit insn structure to native insn structure. */
+static int get_compat_insn(struct comedi_insn *insn,
+			   struct comedi32_insn_struct __user *insn32)
+{
+	struct comedi32_insn_struct v32;
+
+	/* Copy insn structure.  Ignore the unused members. */
+	if (copy_from_user(&v32, insn32, sizeof(v32)))
+		return -EFAULT;
+	memset(insn, 0, sizeof(*insn));
+	insn->insn = v32.insn;
+	insn->n = v32.n;
+	insn->data = compat_ptr(v32.data);
+	insn->subdev = v32.subdev;
+	insn->chanspec = v32.chanspec;
+	return 0;
+}
+
+/* Handle 32-bit COMEDI_INSNLIST ioctl. */
+static int compat_insnlist(struct file *file, unsigned long arg)
+{
+	struct comedi_file *cfp = file->private_data;
+	struct comedi_device *dev = cfp->dev;
+	struct comedi32_insnlist_struct insnlist32;
+	struct comedi32_insn_struct __user *insn32;
+	struct comedi_insn *insns;
+	unsigned int n;
+	int rc;
+
+	if (copy_from_user(&insnlist32, compat_ptr(arg), sizeof(insnlist32)))
+		return -EFAULT;
+
+	insns = kcalloc(insnlist32.n_insns, sizeof(*insns), GFP_KERNEL);
+	if (!insns)
+		return -ENOMEM;
+
+	/* Copy insn structures. */
+	insn32 = compat_ptr(insnlist32.insns);
+	for (n = 0; n < insnlist32.n_insns; n++) {
+		rc = get_compat_insn(insns + n, insn32 + n);
+		if (rc) {
+			kfree(insns);
+			return rc;
+		}
+	}
+
+	mutex_lock(&dev->mutex);
+	rc = do_insnlist_ioctl(dev, insns, insnlist32.n_insns, file);
+	mutex_unlock(&dev->mutex);
+	return rc;
+}
+
+/* Handle 32-bit COMEDI_INSN ioctl. */
+static int compat_insn(struct file *file, unsigned long arg)
+{
+	struct comedi_file *cfp = file->private_data;
+	struct comedi_device *dev = cfp->dev;
+	struct comedi_insn insn;
+	int rc;
+
+	rc = get_compat_insn(&insn, (void __user *)arg);
+	if (rc)
+		return rc;
+
+	mutex_lock(&dev->mutex);
+	rc = do_insn_ioctl(dev, &insn, file);
+	mutex_unlock(&dev->mutex);
+	return rc;
+}
+
+/*
+ * compat_ioctl file operation.
+ *
+ * Returns -ENOIOCTLCMD for unrecognised ioctl codes.
+ */
+static long comedi_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int rc;
+
+	switch (cmd) {
+	case COMEDI_DEVCONFIG:
+	case COMEDI_DEVINFO:
+	case COMEDI_SUBDINFO:
+	case COMEDI_BUFCONFIG:
+	case COMEDI_BUFINFO:
+		/* Just need to translate the pointer argument. */
+		arg = (unsigned long)compat_ptr(arg);
+		rc = comedi_unlocked_ioctl(file, cmd, arg);
+		break;
+	case COMEDI_LOCK:
+	case COMEDI_UNLOCK:
+	case COMEDI_CANCEL:
+	case COMEDI_POLL:
+	case COMEDI_SETRSUBD:
+	case COMEDI_SETWSUBD:
+		/* No translation needed. */
+		rc = comedi_unlocked_ioctl(file, cmd, arg);
+		break;
+	case COMEDI32_CHANINFO:
+		rc = compat_chaninfo(file, arg);
+		break;
+	case COMEDI32_RANGEINFO:
+		rc = compat_rangeinfo(file, arg);
+		break;
+	case COMEDI32_CMD:
+		rc = compat_cmd(file, arg);
+		break;
+	case COMEDI32_CMDTEST:
+		rc = compat_cmdtest(file, arg);
+		break;
+	case COMEDI32_INSNLIST:
+		rc = compat_insnlist(file, arg);
+		break;
+	case COMEDI32_INSN:
+		rc = compat_insn(file, arg);
+		break;
+	default:
+		rc = -ENOIOCTLCMD;
+		break;
+	}
+	return rc;
+}
+#else
+#define comedi_compat_ioctl NULL
+#endif
+
 static const struct file_operations comedi_fops = {
 	.owner = THIS_MODULE,
 	.unlocked_ioctl = comedi_unlocked_ioctl,
diff --git a/drivers/staging/comedi/comedi_internal.h b/drivers/staging/comedi/comedi_internal.h
index 515f293a5d26..7c8f18f55122 100644
--- a/drivers/staging/comedi/comedi_internal.h
+++ b/drivers/staging/comedi/comedi_internal.h
@@ -18,7 +18,7 @@ struct comedi_subdevice;
 struct device;
 
 int do_rangeinfo_ioctl(struct comedi_device *dev,
-		       struct comedi_rangeinfo __user *arg);
+		       struct comedi_rangeinfo *it);
 struct comedi_device *comedi_alloc_board_minor(struct device *hardware_device);
 void comedi_release_hardware_device(struct device *hardware_device);
 int comedi_alloc_subdevice_minor(struct comedi_subdevice *s);
diff --git a/drivers/staging/comedi/range.c b/drivers/staging/comedi/range.c
index 89d599877445..a4e6fe0fb729 100644
--- a/drivers/staging/comedi/range.c
+++ b/drivers/staging/comedi/range.c
@@ -46,17 +46,14 @@ EXPORT_SYMBOL_GPL(range_unknown);
  *	array of comedi_krange structures to rangeinfo->range_ptr pointer
  */
 int do_rangeinfo_ioctl(struct comedi_device *dev,
-		       struct comedi_rangeinfo __user *arg)
+		       struct comedi_rangeinfo *it)
 {
-	struct comedi_rangeinfo it;
 	int subd, chan;
 	const struct comedi_lrange *lr;
 	struct comedi_subdevice *s;
 
-	if (copy_from_user(&it, arg, sizeof(struct comedi_rangeinfo)))
-		return -EFAULT;
-	subd = (it.range_type >> 24) & 0xf;
-	chan = (it.range_type >> 16) & 0xff;
+	subd = (it->range_type >> 24) & 0xf;
+	chan = (it->range_type >> 16) & 0xff;
 
 	if (!dev->attached)
 		return -EINVAL;
@@ -73,15 +70,15 @@ int do_rangeinfo_ioctl(struct comedi_device *dev,
 		return -EINVAL;
 	}
 
-	if (RANGE_LENGTH(it.range_type) != lr->length) {
+	if (RANGE_LENGTH(it->range_type) != lr->length) {
 		dev_dbg(dev->class_dev,
 			"wrong length %d should be %d (0x%08x)\n",
-			RANGE_LENGTH(it.range_type),
-			lr->length, it.range_type);
+			RANGE_LENGTH(it->range_type),
+			lr->length, it->range_type);
 		return -EINVAL;
 	}
 
-	if (copy_to_user(it.range_ptr, lr->range,
+	if (copy_to_user(it->range_ptr, lr->range,
 			 sizeof(struct comedi_krange) * lr->length))
 		return -EFAULT;
 
diff --git a/drivers/staging/greybus/sdio.c b/drivers/staging/greybus/sdio.c
index 68c5718be827..0939f4a4c963 100644
--- a/drivers/staging/greybus/sdio.c
+++ b/drivers/staging/greybus/sdio.c
@@ -67,7 +67,6 @@ static void _gb_sdio_set_host_caps(struct gb_sdio_host *host, u32 r)
 		((r & GB_SDIO_CAP_8_BIT_DATA) ? MMC_CAP_8_BIT_DATA : 0) |
 		((r & GB_SDIO_CAP_MMC_HS) ? MMC_CAP_MMC_HIGHSPEED : 0) |
 		((r & GB_SDIO_CAP_SD_HS) ? MMC_CAP_SD_HIGHSPEED : 0) |
-		((r & GB_SDIO_CAP_ERASE) ? MMC_CAP_ERASE : 0) |
 		((r & GB_SDIO_CAP_1_2V_DDR) ? MMC_CAP_1_2V_DDR : 0) |
 		((r & GB_SDIO_CAP_1_8V_DDR) ? MMC_CAP_1_8V_DDR : 0) |
 		((r & GB_SDIO_CAP_POWER_OFF_CARD) ? MMC_CAP_POWER_OFF_CARD : 0) |
@@ -411,6 +410,7 @@ static int gb_sdio_command(struct gb_sdio_host *host, struct mmc_command *cmd)
 	struct gb_sdio_command_request request = {0};
 	struct gb_sdio_command_response response;
 	struct mmc_data *data = host->mrq->data;
+	unsigned int timeout_ms;
 	u8 cmd_flags;
 	u8 cmd_type;
 	int i;
@@ -469,9 +469,12 @@ static int gb_sdio_command(struct gb_sdio_host *host, struct mmc_command *cmd)
 		request.data_blksz = cpu_to_le16(data->blksz);
 	}
 
-	ret = gb_operation_sync(host->connection, GB_SDIO_TYPE_COMMAND,
-				&request, sizeof(request), &response,
-				sizeof(response));
+	timeout_ms = cmd->busy_timeout ? cmd->busy_timeout :
+		GB_OPERATION_TIMEOUT_DEFAULT;
+
+	ret = gb_operation_sync_timeout(host->connection, GB_SDIO_TYPE_COMMAND,
+					&request, sizeof(request), &response,
+					sizeof(response), timeout_ms);
 	if (ret < 0)
 		goto out;
 
diff --git a/drivers/staging/media/ipu3/ipu3-css-pool.h b/drivers/staging/media/ipu3/ipu3-css-pool.h
index f4a60b41401b..a8ccd4f70320 100644
--- a/drivers/staging/media/ipu3/ipu3-css-pool.h
+++ b/drivers/staging/media/ipu3/ipu3-css-pool.h
@@ -15,14 +15,12 @@ struct imgu_device;
  * @size:		size of the buffer in bytes.
  * @vaddr:		kernel virtual address.
  * @daddr:		iova dma address to access IPU3.
- * @vma:		private, a pointer to &struct vm_struct,
- *			used for imgu_dmamap_free.
  */
 struct imgu_css_map {
 	size_t size;
 	void *vaddr;
 	dma_addr_t daddr;
-	struct vm_struct *vma;
+	struct page **pages;
 };
 
 /**
diff --git a/drivers/staging/media/ipu3/ipu3-dmamap.c b/drivers/staging/media/ipu3/ipu3-dmamap.c
index 7431322379f6..8a19b0024152 100644
--- a/drivers/staging/media/ipu3/ipu3-dmamap.c
+++ b/drivers/staging/media/ipu3/ipu3-dmamap.c
@@ -96,6 +96,7 @@ void *imgu_dmamap_alloc(struct imgu_device *imgu, struct imgu_css_map *map,
 	unsigned long shift = iova_shift(&imgu->iova_domain);
 	struct device *dev = &imgu->pci_dev->dev;
 	size_t size = PAGE_ALIGN(len);
+	int count = size >> PAGE_SHIFT;
 	struct page **pages;
 	dma_addr_t iovaddr;
 	struct iova *iova;
@@ -114,7 +115,7 @@ void *imgu_dmamap_alloc(struct imgu_device *imgu, struct imgu_css_map *map,
 
 	/* Call IOMMU driver to setup pgt */
 	iovaddr = iova_dma_addr(&imgu->iova_domain, iova);
-	for (i = 0; i < size / PAGE_SIZE; ++i) {
+	for (i = 0; i < count; ++i) {
 		rval = imgu_mmu_map(imgu->mmu, iovaddr,
 				    page_to_phys(pages[i]), PAGE_SIZE);
 		if (rval)
@@ -123,33 +124,23 @@ void *imgu_dmamap_alloc(struct imgu_device *imgu, struct imgu_css_map *map,
 		iovaddr += PAGE_SIZE;
 	}
 
-	/* Now grab a virtual region */
-	map->vma = __get_vm_area(size, VM_USERMAP, VMALLOC_START, VMALLOC_END);
-	if (!map->vma)
+	map->vaddr = vmap(pages, count, VM_USERMAP, PAGE_KERNEL);
+	if (!map->vaddr)
 		goto out_unmap;
 
-	map->vma->pages = pages;
-	/* And map it in KVA */
-	if (map_vm_area(map->vma, PAGE_KERNEL, pages))
-		goto out_vunmap;
-
+	map->pages = pages;
 	map->size = size;
 	map->daddr = iova_dma_addr(&imgu->iova_domain, iova);
-	map->vaddr = map->vma->addr;
 
 	dev_dbg(dev, "%s: allocated %zu @ IOVA %pad @ VA %p\n", __func__,
-		size, &map->daddr, map->vma->addr);
-
-	return map->vma->addr;
+		size, &map->daddr, map->vaddr);
 
-out_vunmap:
-	vunmap(map->vma->addr);
+	return map->vaddr;
 
 out_unmap:
 	imgu_dmamap_free_buffer(pages, size);
 	imgu_mmu_unmap(imgu->mmu, iova_dma_addr(&imgu->iova_domain, iova),
 		       i * PAGE_SIZE);
-	map->vma = NULL;
 
 out_free_iova:
 	__free_iova(&imgu->iova_domain, iova);
@@ -177,8 +168,6 @@ void imgu_dmamap_unmap(struct imgu_device *imgu, struct imgu_css_map *map)
  */
 void imgu_dmamap_free(struct imgu_device *imgu, struct imgu_css_map *map)
 {
-	struct vm_struct *area = map->vma;
-
 	dev_dbg(&imgu->pci_dev->dev, "%s: freeing %zu @ IOVA %pad @ VA %p\n",
 		__func__, map->size, &map->daddr, map->vaddr);
 
@@ -187,11 +176,8 @@ void imgu_dmamap_free(struct imgu_device *imgu, struct imgu_css_map *map)
 
 	imgu_dmamap_unmap(imgu, map);
 
-	if (WARN_ON(!area) || WARN_ON(!area->pages))
-		return;
-
-	imgu_dmamap_free_buffer(area->pages, map->size);
 	vunmap(map->vaddr);
+	imgu_dmamap_free_buffer(map->pages, map->size);
 	map->vaddr = NULL;
 }
 
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index 991f49ee4026..b09eac4b6d64 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -423,13 +423,6 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file,
 	struct n_hdlc_buf *rbuf;
 	DECLARE_WAITQUEUE(wait, current);
 
-	/* verify user access to buffer */
-	if (!access_ok(buf, nr)) {
-		pr_warn("%s(%d) %s() can't verify user buffer\n",
-				__FILE__, __LINE__, __func__);
-		return -EFAULT;
-	}
-
 	add_wait_queue(&tty->read_wait, &wait);
 
 	for (;;) {
diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index 5cd8c36c8fcc..70d7826788f5 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -109,6 +109,28 @@ static void early_serial8250_write(struct console *console,
 	uart_console_write(port, s, count, serial_putc);
 }
 
+#ifdef CONFIG_CONSOLE_POLL
+static int early_serial8250_read(struct console *console,
+				 char *s, unsigned int count)
+{
+	struct earlycon_device *device = console->data;
+	struct uart_port *port = &device->port;
+	unsigned int status;
+	int num_read = 0;
+
+	while (num_read < count) {
+		status = serial8250_early_in(port, UART_LSR);
+		if (!(status & UART_LSR_DR))
+			break;
+		s[num_read++] = serial8250_early_in(port, UART_RX);
+	}
+
+	return num_read;
+}
+#else
+#define early_serial8250_read NULL
+#endif
+
 static void __init init_port(struct earlycon_device *device)
 {
 	struct uart_port *port = &device->port;
@@ -149,6 +171,7 @@ int __init early_serial8250_setup(struct earlycon_device *device,
 		init_port(device);
 
 	device->con->write = early_serial8250_write;
+	device->con->read = early_serial8250_read;
 	return 0;
 }
 EARLYCON_DECLARE(uart8250, early_serial8250_setup);
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 2296bb0f9578..c010f639298d 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2435,6 +2435,37 @@ static void pl011_early_write(struct console *con, const char *s, unsigned n)
 	uart_console_write(&dev->port, s, n, pl011_putc);
 }
 
+#ifdef CONFIG_CONSOLE_POLL
+static int pl011_getc(struct uart_port *port)
+{
+	if (readl(port->membase + UART01x_FR) & UART01x_FR_RXFE)
+		return NO_POLL_CHAR;
+
+	if (port->iotype == UPIO_MEM32)
+		return readl(port->membase + UART01x_DR);
+	else
+		return readb(port->membase + UART01x_DR);
+}
+
+static int pl011_early_read(struct console *con, char *s, unsigned int n)
+{
+	struct earlycon_device *dev = con->data;
+	int ch, num_read = 0;
+
+	while (num_read < n) {
+		ch = pl011_getc(&dev->port);
+		if (ch == NO_POLL_CHAR)
+			break;
+
+		s[num_read++] = ch;
+	}
+
+	return num_read;
+}
+#else
+#define pl011_early_read NULL
+#endif
+
 /*
  * On non-ACPI systems, earlycon is enabled by specifying
  * "earlycon=pl011,<address>" on the kernel command line.
@@ -2454,6 +2485,7 @@ static int __init pl011_early_console_setup(struct earlycon_device *device,
 		return -ENODEV;
 
 	device->con->write = pl011_early_write;
+	device->con->read = pl011_early_read;
 
 	return 0;
 }
diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index c9f94fa82be4..41396982e9e0 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c
@@ -20,6 +20,8 @@
 #include <linux/vt_kern.h>
 #include <linux/input.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/serial_core.h>
 
 #define MAX_CONFIG_LEN		40
 
@@ -27,6 +29,7 @@ static struct kgdb_io		kgdboc_io_ops;
 
 /* -1 = init not run yet, 0 = unconfigured, 1 = configured. */
 static int configured		= -1;
+static DEFINE_MUTEX(config_mutex);
 
 static char config[MAX_CONFIG_LEN];
 static struct kparam_string kps = {
@@ -38,6 +41,14 @@ static int kgdboc_use_kms;  /* 1 if we use kernel mode switching */
 static struct tty_driver	*kgdb_tty_driver;
 static int			kgdb_tty_line;
 
+static struct platform_device *kgdboc_pdev;
+
+#if IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE)
+static struct kgdb_io		kgdboc_earlycon_io_ops;
+static struct console		*earlycon;
+static int                      (*earlycon_orig_exit)(struct console *con);
+#endif /* IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */
+
 #ifdef CONFIG_KDB_KEYBOARD
 static int kgdboc_reset_connect(struct input_handler *handler,
 				struct input_dev *dev,
@@ -131,13 +142,27 @@ static void kgdboc_unregister_kbd(void)
 #define kgdboc_restore_input()
 #endif /* ! CONFIG_KDB_KEYBOARD */
 
+#if IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE)
+static void cleanup_earlycon(void)
+{
+	if (earlycon)
+		kgdb_unregister_io_module(&kgdboc_earlycon_io_ops);
+}
+#else /* !IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */
+static inline void cleanup_earlycon(void) { }
+#endif /* !IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */
+
 static void cleanup_kgdboc(void)
 {
+	cleanup_earlycon();
+
+	if (configured != 1)
+		return;
+
 	if (kgdb_unregister_nmi_console())
 		return;
 	kgdboc_unregister_kbd();
-	if (configured == 1)
-		kgdb_unregister_io_module(&kgdboc_io_ops);
+	kgdb_unregister_io_module(&kgdboc_io_ops);
 }
 
 static int configure_kgdboc(void)
@@ -198,20 +223,79 @@ nmi_con_failed:
 	kgdb_unregister_io_module(&kgdboc_io_ops);
 noconfig:
 	kgdboc_unregister_kbd();
-	config[0] = 0;
 	configured = 0;
-	cleanup_kgdboc();
 
 	return err;
 }
 
+static int kgdboc_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+
+	mutex_lock(&config_mutex);
+	if (configured != 1) {
+		ret = configure_kgdboc();
+
+		/* Convert "no device" to "defer" so we'll keep trying */
+		if (ret == -ENODEV)
+			ret = -EPROBE_DEFER;
+	}
+	mutex_unlock(&config_mutex);
+
+	return ret;
+}
+
+static struct platform_driver kgdboc_platform_driver = {
+	.probe = kgdboc_probe,
+	.driver = {
+		.name = "kgdboc",
+		.suppress_bind_attrs = true,
+	},
+};
+
 static int __init init_kgdboc(void)
 {
-	/* Already configured? */
-	if (configured == 1)
+	int ret;
+
+	/*
+	 * kgdboc is a little bit of an odd "platform_driver".  It can be
+	 * up and running long before the platform_driver object is
+	 * created and thus doesn't actually store anything in it.  There's
+	 * only one instance of kgdb so anything is stored as global state.
+	 * The platform_driver is only created so that we can leverage the
+	 * kernel's mechanisms (like -EPROBE_DEFER) to call us when our
+	 * underlying tty is ready.  Here we init our platform driver and
+	 * then create the single kgdboc instance.
+	 */
+	ret = platform_driver_register(&kgdboc_platform_driver);
+	if (ret)
+		return ret;
+
+	kgdboc_pdev = platform_device_alloc("kgdboc", PLATFORM_DEVID_NONE);
+	if (!kgdboc_pdev) {
+		ret = -ENOMEM;
+		goto err_did_register;
+	}
+
+	ret = platform_device_add(kgdboc_pdev);
+	if (!ret)
 		return 0;
 
-	return configure_kgdboc();
+	platform_device_put(kgdboc_pdev);
+
+err_did_register:
+	platform_driver_unregister(&kgdboc_platform_driver);
+	return ret;
+}
+
+static void exit_kgdboc(void)
+{
+	mutex_lock(&config_mutex);
+	cleanup_kgdboc();
+	mutex_unlock(&config_mutex);
+
+	platform_device_unregister(kgdboc_pdev);
+	platform_driver_unregister(&kgdboc_platform_driver);
 }
 
 static int kgdboc_get_char(void)
@@ -234,24 +318,20 @@ static int param_set_kgdboc_var(const char *kmessage,
 				const struct kernel_param *kp)
 {
 	size_t len = strlen(kmessage);
+	int ret = 0;
 
 	if (len >= MAX_CONFIG_LEN) {
 		pr_err("config string too long\n");
 		return -ENOSPC;
 	}
 
-	/* Only copy in the string if the init function has not run yet */
-	if (configured < 0) {
-		strcpy(config, kmessage);
-		return 0;
-	}
-
 	if (kgdb_connected) {
 		pr_err("Cannot reconfigure while KGDB is connected.\n");
-
 		return -EBUSY;
 	}
 
+	mutex_lock(&config_mutex);
+
 	strcpy(config, kmessage);
 	/* Chop out \n char as a result of echo */
 	if (len && config[len - 1] == '\n')
@@ -260,8 +340,30 @@ static int param_set_kgdboc_var(const char *kmessage,
 	if (configured == 1)
 		cleanup_kgdboc();
 
-	/* Go and configure with the new params. */
-	return configure_kgdboc();
+	/*
+	 * Configure with the new params as long as init already ran.
+	 * Note that we can get called before init if someone loads us
+	 * with "modprobe kgdboc kgdboc=..." or if they happen to use the
+	 * the odd syntax of "kgdboc.kgdboc=..." on the kernel command.
+	 */
+	if (configured >= 0)
+		ret = configure_kgdboc();
+
+	/*
+	 * If we couldn't configure then clear out the config.  Note that
+	 * specifying an invalid config on the kernel command line vs.
+	 * through sysfs have slightly different behaviors.  If we fail
+	 * to configure what was specified on the kernel command line
+	 * we'll leave it in the 'config' and return -EPROBE_DEFER from
+	 * our probe.  When specified through sysfs userspace is
+	 * responsible for loading the tty driver before setting up.
+	 */
+	if (ret)
+		config[0] = '\0';
+
+	mutex_unlock(&config_mutex);
+
+	return ret;
 }
 
 static int dbg_restore_graphics;
@@ -275,14 +377,10 @@ static void kgdboc_pre_exp_handler(void)
 	/* Increment the module count when the debugger is active */
 	if (!kgdb_connected)
 		try_module_get(THIS_MODULE);
-
-	atomic_inc(&ignore_console_lock_warning);
 }
 
 static void kgdboc_post_exp_handler(void)
 {
-	atomic_dec(&ignore_console_lock_warning);
-
 	/* decrement the module count when the debugger detaches */
 	if (!kgdb_connected)
 		module_put(THIS_MODULE);
@@ -301,7 +399,7 @@ static struct kgdb_io kgdboc_io_ops = {
 	.post_exception		= kgdboc_post_exp_handler,
 };
 
-#ifdef CONFIG_KGDB_SERIAL_CONSOLE
+#if IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE)
 static int kgdboc_option_setup(char *opt)
 {
 	if (!opt) {
@@ -324,23 +422,181 @@ __setup("kgdboc=", kgdboc_option_setup);
 /* This is only available if kgdboc is a built in for early debugging */
 static int __init kgdboc_early_init(char *opt)
 {
-	/* save the first character of the config string because the
-	 * init routine can destroy it.
-	 */
-	char save_ch;
-
 	kgdboc_option_setup(opt);
-	save_ch = config[0];
-	init_kgdboc();
-	config[0] = save_ch;
+	configure_kgdboc();
 	return 0;
 }
 
 early_param("ekgdboc", kgdboc_early_init);
-#endif /* CONFIG_KGDB_SERIAL_CONSOLE */
+
+static int kgdboc_earlycon_get_char(void)
+{
+	char c;
+
+	if (!earlycon->read(earlycon, &c, 1))
+		return NO_POLL_CHAR;
+
+	return c;
+}
+
+static void kgdboc_earlycon_put_char(u8 chr)
+{
+	earlycon->write(earlycon, &chr, 1);
+}
+
+static void kgdboc_earlycon_pre_exp_handler(void)
+{
+	struct console *con;
+	static bool already_warned;
+
+	if (already_warned)
+		return;
+
+	/*
+	 * When the first normal console comes up the kernel will take all
+	 * the boot consoles out of the list.  Really, we should stop using
+	 * the boot console when it does that but until a TTY is registered
+	 * we have no other choice so we keep using it.  Since not all
+	 * serial drivers might be OK with this, print a warning once per
+	 * boot if we detect this case.
+	 */
+	for_each_console(con)
+		if (con == earlycon)
+			return;
+
+	already_warned = true;
+	pr_warn("kgdboc_earlycon is still using bootconsole\n");
+}
+
+static int kgdboc_earlycon_deferred_exit(struct console *con)
+{
+	/*
+	 * If we get here it means the boot console is going away but we
+	 * don't yet have a suitable replacement.  Don't pass through to
+	 * the original exit routine.  We'll call it later in our deinit()
+	 * function.  For now, restore the original exit() function pointer
+	 * as a sentinal that we've hit this point.
+	 */
+	con->exit = earlycon_orig_exit;
+
+	return 0;
+}
+
+static void kgdboc_earlycon_deinit(void)
+{
+	if (!earlycon)
+		return;
+
+	if (earlycon->exit == kgdboc_earlycon_deferred_exit)
+		/*
+		 * kgdboc_earlycon is exiting but original boot console exit
+		 * was never called (AKA kgdboc_earlycon_deferred_exit()
+		 * didn't ever run).  Undo our trap.
+		 */
+		earlycon->exit = earlycon_orig_exit;
+	else if (earlycon->exit)
+		/*
+		 * We skipped calling the exit() routine so we could try to
+		 * keep using the boot console even after it went away.  We're
+		 * finally done so call the function now.
+		 */
+		earlycon->exit(earlycon);
+
+	earlycon = NULL;
+}
+
+static struct kgdb_io kgdboc_earlycon_io_ops = {
+	.name			= "kgdboc_earlycon",
+	.read_char		= kgdboc_earlycon_get_char,
+	.write_char		= kgdboc_earlycon_put_char,
+	.pre_exception		= kgdboc_earlycon_pre_exp_handler,
+	.deinit			= kgdboc_earlycon_deinit,
+	.is_console		= true,
+};
+
+#define MAX_CONSOLE_NAME_LEN (sizeof((struct console *) 0)->name)
+static char kgdboc_earlycon_param[MAX_CONSOLE_NAME_LEN] __initdata;
+static bool kgdboc_earlycon_late_enable __initdata;
+
+static int __init kgdboc_earlycon_init(char *opt)
+{
+	struct console *con;
+
+	kdb_init(KDB_INIT_EARLY);
+
+	/*
+	 * Look for a matching console, or if the name was left blank just
+	 * pick the first one we find.
+	 */
+	console_lock();
+	for_each_console(con) {
+		if (con->write && con->read &&
+		    (con->flags & (CON_BOOT | CON_ENABLED)) &&
+		    (!opt || !opt[0] || strcmp(con->name, opt) == 0))
+			break;
+	}
+
+	if (!con) {
+		/*
+		 * Both earlycon and kgdboc_earlycon are initialized during			 * early parameter parsing. We cannot guarantee earlycon gets
+		 * in first and, in any case, on ACPI systems earlycon may
+		 * defer its own initialization (usually to somewhere within
+		 * setup_arch() ). To cope with either of these situations
+		 * we can defer our own initialization to a little later in
+		 * the boot.
+		 */
+		if (!kgdboc_earlycon_late_enable) {
+			pr_info("No suitable earlycon yet, will try later\n");
+			if (opt)
+				strscpy(kgdboc_earlycon_param, opt,
+					sizeof(kgdboc_earlycon_param));
+			kgdboc_earlycon_late_enable = true;
+		} else {
+			pr_info("Couldn't find kgdb earlycon\n");
+		}
+		goto unlock;
+	}
+
+	earlycon = con;
+	pr_info("Going to register kgdb with earlycon '%s'\n", con->name);
+	if (kgdb_register_io_module(&kgdboc_earlycon_io_ops) != 0) {
+		earlycon = NULL;
+		pr_info("Failed to register kgdb with earlycon\n");
+	} else {
+		/* Trap exit so we can keep earlycon longer if needed. */
+		earlycon_orig_exit = con->exit;
+		con->exit = kgdboc_earlycon_deferred_exit;
+	}
+
+unlock:
+	console_unlock();
+
+	/* Non-zero means malformed option so we always return zero */
+	return 0;
+}
+
+early_param("kgdboc_earlycon", kgdboc_earlycon_init);
+
+/*
+ * This is only intended for the late adoption of an early console.
+ *
+ * It is not a reliable way to adopt regular consoles because we can not
+ * control what order console initcalls are made and, in any case, many
+ * regular consoles are registered much later in the boot process than
+ * the console initcalls!
+ */
+static int __init kgdboc_earlycon_late_init(void)
+{
+	if (kgdboc_earlycon_late_enable)
+		kgdboc_earlycon_init(kgdboc_earlycon_param);
+	return 0;
+}
+console_initcall(kgdboc_earlycon_late_init);
+
+#endif /* IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */
 
 module_init(init_kgdboc);
-module_exit(cleanup_kgdboc);
+module_exit(exit_kgdboc);
 module_param_call(kgdboc, param_set_kgdboc_var, param_get_string, &kps, 0644);
 MODULE_PARM_DESC(kgdboc, "<serial_device>[,baud]");
 MODULE_DESCRIPTION("KGDB Console TTY Driver");
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 6119090ce045..6bace1c6bb09 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1090,6 +1090,36 @@ static void qcom_geni_serial_earlycon_write(struct console *con,
 	__qcom_geni_serial_console_write(&dev->port, s, n);
 }
 
+#ifdef CONFIG_CONSOLE_POLL
+static int qcom_geni_serial_earlycon_read(struct console *con,
+					  char *s, unsigned int n)
+{
+	struct earlycon_device *dev = con->data;
+	struct uart_port *uport = &dev->port;
+	int num_read = 0;
+	int ch;
+
+	while (num_read < n) {
+		ch = qcom_geni_serial_get_char(uport);
+		if (ch == NO_POLL_CHAR)
+			break;
+		s[num_read++] = ch;
+	}
+
+	return num_read;
+}
+
+static void __init qcom_geni_serial_enable_early_read(struct geni_se *se,
+						      struct console *con)
+{
+	geni_se_setup_s_cmd(se, UART_START_READ, 0);
+	con->read = qcom_geni_serial_earlycon_read;
+}
+#else
+static inline void qcom_geni_serial_enable_early_read(struct geni_se *se,
+						      struct console *con) { }
+#endif
+
 static int __init qcom_geni_serial_earlycon_setup(struct earlycon_device *dev,
 								const char *opt)
 {
@@ -1136,6 +1166,8 @@ static int __init qcom_geni_serial_earlycon_setup(struct earlycon_device *dev,
 
 	dev->con->write = qcom_geni_serial_earlycon_write;
 	dev->con->setup = NULL;
+	qcom_geni_serial_enable_early_read(&se, dev->con);
+
 	return 0;
 }
 OF_EARLYCON_DECLARE(qcom_geni, "qcom,geni-debug-uart",
diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
index 44f28a114c2b..94b6fa6e585e 100644
--- a/drivers/usb/core/devices.c
+++ b/drivers/usb/core/devices.c
@@ -598,8 +598,6 @@ static ssize_t usb_device_read(struct file *file, char __user *buf,
 		return -EINVAL;
 	if (nbytes <= 0)
 		return 0;
-	if (!access_ok(buf, nbytes))
-		return -EFAULT;
 
 	mutex_lock(&usb_bus_idr_lock);
 	/* print devices for all busses */
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index d93d94d7ff50..96d4507d988a 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1136,11 +1136,6 @@ static int proc_control(struct usb_dev_state *ps, void __user *arg)
 		ctrl.bRequestType, ctrl.bRequest, ctrl.wValue,
 		ctrl.wIndex, ctrl.wLength);
 	if (ctrl.bRequestType & 0x80) {
-		if (ctrl.wLength && !access_ok(ctrl.data,
-					       ctrl.wLength)) {
-			ret = -EINVAL;
-			goto done;
-		}
 		pipe = usb_rcvctrlpipe(dev, 0);
 		snoop_urb(dev, NULL, pipe, ctrl.wLength, tmo, SUBMIT, NULL, 0);
 
@@ -1225,10 +1220,6 @@ static int proc_bulk(struct usb_dev_state *ps, void __user *arg)
 	}
 	tmo = bulk.timeout;
 	if (bulk.ep & 0x80) {
-		if (len1 && !access_ok(bulk.data, len1)) {
-			ret = -EINVAL;
-			goto done;
-		}
 		snoop_urb(dev, NULL, pipe, len1, tmo, SUBMIT, NULL, 0);
 
 		usb_unlock_device(dev);
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index f3816a5c861e..df671acdd464 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -252,9 +252,6 @@ static ssize_t f_hidg_read(struct file *file, char __user *buffer,
 	if (!count)
 		return 0;
 
-	if (!access_ok(buffer, count))
-		return -EFAULT;
-
 	spin_lock_irqsave(&hidg->read_spinlock, flags);
 
 #define READ_COND (!list_empty(&hidg->completed_out_req))
@@ -339,9 +336,6 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer,
 	unsigned long flags;
 	ssize_t status = -ENOMEM;
 
-	if (!access_ok(buffer, count))
-		return -EFAULT;
-
 	spin_lock_irqsave(&hidg->write_spinlock, flags);
 
 #define WRITE_COND (!hidg->write_pending)
diff --git a/drivers/usb/typec/mux/Kconfig b/drivers/usb/typec/mux/Kconfig
index 77eb97b2aa86..a4dbd11f8ee2 100644
--- a/drivers/usb/typec/mux/Kconfig
+++ b/drivers/usb/typec/mux/Kconfig
@@ -11,7 +11,7 @@ config TYPEC_MUX_PI3USB30532
 
 config TYPEC_MUX_INTEL_PMC
 	tristate "Intel PMC mux control"
-	depends on INTEL_PMC_IPC
+	depends on INTEL_SCU_IPC
 	select USB_ROLE_SWITCH
 	help
 	  Driver for USB muxes controlled by Intel PMC FW. Intel PMC FW can
diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c
index c22e5c4bbf1a..1ac0a3eb7dd8 100644
--- a/drivers/usb/typec/mux/intel_pmc_mux.c
+++ b/drivers/usb/typec/mux/intel_pmc_mux.c
@@ -15,7 +15,7 @@
 #include <linux/usb/typec_dp.h>
 #include <linux/usb/typec_tbt.h>
 
-#include <asm/intel_pmc_ipc.h>
+#include <asm/intel_scu_ipc.h>
 
 #define PMC_USBC_CMD		0xa7
 
@@ -97,6 +97,7 @@ struct pmc_usb_port {
 struct pmc_usb {
 	u8 num_ports;
 	struct device *dev;
+	struct intel_scu_ipc_dev *ipc;
 	struct pmc_usb_port *port;
 };
 
@@ -108,9 +109,8 @@ static int pmc_usb_command(struct pmc_usb_port *port, u8 *msg, u32 len)
 	 * Error bit will always be 0 with the USBC command.
 	 * Status can be checked from the response message.
 	 */
-	intel_pmc_ipc_command(PMC_USBC_CMD, 0, msg, len,
-			      (void *)response, 1);
-
+	intel_scu_ipc_dev_command(port->pmc->ipc, PMC_USBC_CMD, 0, msg, len,
+				  response, sizeof(response));
 	if (response[2]) {
 		if (response[2] & BIT(1))
 			return -EIO;
@@ -374,6 +374,10 @@ static int pmc_usb_probe(struct platform_device *pdev)
 	if (!pmc->port)
 		return -ENOMEM;
 
+	pmc->ipc = devm_intel_scu_ipc_dev_get(&pdev->dev);
+	if (!pmc->ipc)
+		return -ENODEV;
+
 	pmc->dev = &pdev->dev;
 
 	/*
diff --git a/drivers/usb/typec/tcpm/Kconfig b/drivers/usb/typec/tcpm/Kconfig
index 5b986d6c801d..fa3f39336246 100644
--- a/drivers/usb/typec/tcpm/Kconfig
+++ b/drivers/usb/typec/tcpm/Kconfig
@@ -41,8 +41,8 @@ config TYPEC_FUSB302
 config TYPEC_WCOVE
 	tristate "Intel WhiskeyCove PMIC USB Type-C PHY driver"
 	depends on ACPI
+	depends on MFD_INTEL_PMC_BXT
 	depends on INTEL_SOC_PMIC
-	depends on INTEL_PMC_IPC
 	depends on BXT_WC_PMIC_OPREGION
 	help
 	  This driver adds support for USB Type-C on Intel Broxton platforms
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 91b0a719d221..fa88e8b9a83d 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -472,7 +472,7 @@ config FB_OF
 
 config FB_CONTROL
 	bool "Apple \"control\" display support"
-	depends on (FB = y) && PPC_PMAC && PPC32
+	depends on (FB = y) && ((PPC_PMAC && PPC32) || COMPILE_TEST)
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
diff --git a/drivers/video/fbdev/amifb.c b/drivers/video/fbdev/amifb.c
index 20e03e00b66d..6062104f3afb 100644
--- a/drivers/video/fbdev/amifb.c
+++ b/drivers/video/fbdev/amifb.c
@@ -1855,8 +1855,6 @@ static int ami_get_var_cursorinfo(struct fb_var_cursorinfo *var,
 	var->yspot = par->crsr.spot_y;
 	if (size > var->height * var->width)
 		return -ENAMETOOLONG;
-	if (!access_ok(data, size))
-		return -EFAULT;
 	delta = 1 << par->crsr.fmode;
 	lspr = lofsprite + (delta << 1);
 	if (par->bplcon0 & BPC0_LACE)
@@ -1935,8 +1933,6 @@ static int ami_set_var_cursorinfo(struct fb_var_cursorinfo *var,
 		return -EINVAL;
 	if (!var->height)
 		return -EINVAL;
-	if (!access_ok(data, var->width * var->height))
-		return -EFAULT;
 	delta = 1 << fmode;
 	lofsprite = shfsprite = (u_short *)spritememory;
 	lspr = lofsprite + (delta << 1);
diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c
index 314ab82e01c0..6f7838979f0a 100644
--- a/drivers/video/fbdev/arcfb.c
+++ b/drivers/video/fbdev/arcfb.c
@@ -544,10 +544,6 @@ static int arcfb_probe(struct platform_device *dev)
 	par->cslut[1] = 0x06;
 	info->flags = FBINFO_FLAG_DEFAULT;
 	spin_lock_init(&par->lock);
-	retval = register_framebuffer(info);
-	if (retval < 0)
-		goto err1;
-	platform_set_drvdata(dev, info);
 	if (irq) {
 		par->irq = irq;
 		if (request_irq(par->irq, &arcfb_interrupt, IRQF_SHARED,
@@ -558,6 +554,10 @@ static int arcfb_probe(struct platform_device *dev)
 			goto err1;
 		}
 	}
+	retval = register_framebuffer(info);
+	if (retval < 0)
+		goto err1;
+	platform_set_drvdata(dev, info);
 	fb_info(info, "Arc frame buffer device, using %dK of video memory\n",
 		videomemorysize >> 10);
 
@@ -593,6 +593,8 @@ static int arcfb_remove(struct platform_device *dev)
 
 	if (info) {
 		unregister_framebuffer(info);
+		if (irq)
+			free_irq(((struct arcfb_par *)(info->par))->irq, info);
 		vfree((void __force *)info->screen_base);
 		framebuffer_release(info);
 	}
diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
index d567f5d56c13..1e252192569a 100644
--- a/drivers/video/fbdev/atmel_lcdfb.c
+++ b/drivers/video/fbdev/atmel_lcdfb.c
@@ -1114,7 +1114,6 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev)
 
 	sinfo->irq_base = platform_get_irq(pdev, 0);
 	if (sinfo->irq_base < 0) {
-		dev_err(dev, "unable to get irq\n");
 		ret = sinfo->irq_base;
 		goto stop_clk;
 	}
diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c
index d7e41c8dd533..d05d4195acad 100644
--- a/drivers/video/fbdev/aty/aty128fb.c
+++ b/drivers/video/fbdev/aty/aty128fb.c
@@ -334,20 +334,6 @@ static const struct aty128_meminfo sdr_128 = {
 	.name = "128-bit SDR SGRAM (1:1)",
 };
 
-static const struct aty128_meminfo sdr_64 = {
-	.ML = 4,
-	.MB = 8,
-	.Trcd = 3,
-	.Trp = 3,
-	.Twr = 1,
-	.CL = 3,
-	.Tr2w = 1,
-	.LoopLatency = 17,
-	.DspOn = 46,
-	.Rloop = 17,
-	.name = "64-bit SDR SGRAM (1:1)",
-};
-
 static const struct aty128_meminfo sdr_sgram = {
 	.ML = 4,
 	.MB = 4,
diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c
index 175d2598f28e..b0ac895e5ac9 100644
--- a/drivers/video/fbdev/aty/atyfb_base.c
+++ b/drivers/video/fbdev/aty/atyfb_base.c
@@ -126,7 +126,7 @@
 #ifdef DEBUG
 #define DPRINTK(fmt, args...)	printk(KERN_DEBUG "atyfb: " fmt, ## args)
 #else
-#define DPRINTK(fmt, args...)
+#define DPRINTK(fmt, args...)	no_printk(fmt, ##args)
 #endif
 
 #define PRINTKI(fmt, args...)	printk(KERN_INFO "atyfb: " fmt, ## args)
@@ -3819,9 +3819,9 @@ static int __init atyfb_setup(char *options)
 
 	while ((this_opt = strsep(&options, ",")) != NULL) {
 		if (!strncmp(this_opt, "noaccel", 7)) {
-			noaccel = 1;
+			noaccel = true;
 		} else if (!strncmp(this_opt, "nomtrr", 6)) {
-			nomtrr = 1;
+			nomtrr = true;
 		} else if (!strncmp(this_opt, "vram:", 5))
 			vram = simple_strtoul(this_opt + 5, NULL, 0);
 		else if (!strncmp(this_opt, "pll:", 4))
diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c
index 38b61cdb5ca4..9c4f1be856ec 100644
--- a/drivers/video/fbdev/controlfb.c
+++ b/drivers/video/fbdev/controlfb.c
@@ -31,7 +31,6 @@
  *  more details.
  */
 
-#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
@@ -48,12 +47,37 @@
 #include <linux/nvram.h>
 #include <linux/adb.h>
 #include <linux/cuda.h>
+#ifdef CONFIG_PPC_PMAC
 #include <asm/prom.h>
 #include <asm/btext.h>
+#endif
 
 #include "macmodes.h"
 #include "controlfb.h"
 
+#if !defined(CONFIG_PPC_PMAC) || !defined(CONFIG_PPC32)
+#define invalid_vram_cache(addr)
+#undef in_8
+#undef out_8
+#undef in_le32
+#undef out_le32
+#define in_8(addr)		0
+#define out_8(addr, val)
+#define in_le32(addr)		0
+#define out_le32(addr, val)
+#define pgprot_cached_wthru(prot) (prot)
+#else
+static void invalid_vram_cache(void __force *addr)
+{
+	eieio();
+	dcbf(addr);
+	mb();
+	eieio();
+	dcbf(addr);
+	mb();
+}
+#endif
+
 struct fb_par_control {
 	int	vmode, cmode;
 	int	xres, yres;
@@ -117,38 +141,6 @@ struct fb_info_control {
 #define CNTRL_REG(INFO,REG) (&(((INFO)->control_regs->REG).r))
 
 
-/******************** Prototypes for exported functions ********************/
-/*
- * struct fb_ops
- */
-static int controlfb_pan_display(struct fb_var_screeninfo *var,
-	struct fb_info *info);
-static int controlfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
-	u_int transp, struct fb_info *info);
-static int controlfb_blank(int blank_mode, struct fb_info *info);
-static int controlfb_mmap(struct fb_info *info,
-	struct vm_area_struct *vma);
-static int controlfb_set_par (struct fb_info *info);
-static int controlfb_check_var (struct fb_var_screeninfo *var, struct fb_info *info);
-
-/******************** Prototypes for internal functions **********************/
-
-static void set_control_clock(unsigned char *params);
-static int init_control(struct fb_info_control *p);
-static void control_set_hardware(struct fb_info_control *p,
-	struct fb_par_control *par);
-static int control_of_init(struct device_node *dp);
-static void find_vram_size(struct fb_info_control *p);
-static int read_control_sense(struct fb_info_control *p);
-static int calc_clock_params(unsigned long clk, unsigned char *param);
-static int control_var_to_par(struct fb_var_screeninfo *var,
-	struct fb_par_control *par, const struct fb_info *fb_info);
-static inline void control_par_to_var(struct fb_par_control *par,
-	struct fb_var_screeninfo *var);
-static void control_init_info(struct fb_info *info, struct fb_info_control *p);
-static void control_cleanup(void);
-
-
 /************************** Internal variables *******************************/
 
 static struct fb_info_control *control_fb;
@@ -157,189 +149,6 @@ static int default_vmode __initdata = VMODE_NVRAM;
 static int default_cmode __initdata = CMODE_NVRAM;
 
 
-static const struct fb_ops controlfb_ops = {
-	.owner		= THIS_MODULE,
-	.fb_check_var	= controlfb_check_var,
-	.fb_set_par	= controlfb_set_par,
-	.fb_setcolreg	= controlfb_setcolreg,
-	.fb_pan_display = controlfb_pan_display,
-	.fb_blank	= controlfb_blank,
-	.fb_mmap	= controlfb_mmap,
-	.fb_fillrect	= cfb_fillrect,
-	.fb_copyarea	= cfb_copyarea,
-	.fb_imageblit	= cfb_imageblit,
-};
-
-
-/********************  The functions for controlfb_ops ********************/
-
-#ifdef MODULE
-MODULE_LICENSE("GPL");
-
-int init_module(void)
-{
-	struct device_node *dp;
-	int ret = -ENXIO;
-
-	dp = of_find_node_by_name(NULL, "control");
-	if (dp && !control_of_init(dp))
-		ret = 0;
-	of_node_put(dp);
-
-	return ret;
-}
-
-void cleanup_module(void)
-{
-	control_cleanup();
-}
-#endif
-
-/*
- * Checks a var structure
- */
-static int controlfb_check_var (struct fb_var_screeninfo *var, struct fb_info *info)
-{
-	struct fb_par_control par;
-	int err;
-
-	err = control_var_to_par(var, &par, info);
-	if (err)
-		return err;	
-	control_par_to_var(&par, var);
-
-	return 0;
-}
-
-/*
- * Applies current var to display
- */
-static int controlfb_set_par (struct fb_info *info)
-{
-	struct fb_info_control *p =
-		container_of(info, struct fb_info_control, info);
-	struct fb_par_control par;
-	int err;
-
-	if((err = control_var_to_par(&info->var, &par, info))) {
-		printk (KERN_ERR "controlfb_set_par: error calling"
-				 " control_var_to_par: %d.\n", err);
-		return err;
-	}
-	
-	control_set_hardware(p, &par);
-
-	info->fix.visual = (p->par.cmode == CMODE_8) ?
-		FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_DIRECTCOLOR;
-	info->fix.line_length = p->par.pitch;
-	info->fix.xpanstep = 32 >> p->par.cmode;
-	info->fix.ypanstep = 1;
-
-	return 0;
-}
-
-/*
- * Set screen start address according to var offset values
- */
-static inline void set_screen_start(int xoffset, int yoffset,
-	struct fb_info_control *p)
-{
-	struct fb_par_control *par = &p->par;
-
-	par->xoffset = xoffset;
-	par->yoffset = yoffset;
-	out_le32(CNTRL_REG(p,start_addr),
-		 par->yoffset * par->pitch + (par->xoffset << par->cmode));
-}
-
-
-static int controlfb_pan_display(struct fb_var_screeninfo *var,
-				 struct fb_info *info)
-{
-	unsigned int xoffset, hstep;
-	struct fb_info_control *p =
-		container_of(info, struct fb_info_control, info);
-	struct fb_par_control *par = &p->par;
-
-	/*
-	 * make sure start addr will be 32-byte aligned
-	 */
-	hstep = 0x1f >> par->cmode;
-	xoffset = (var->xoffset + hstep) & ~hstep;
-
-	if (xoffset+par->xres > par->vxres ||
-	    var->yoffset+par->yres > par->vyres)
-		return -EINVAL;
-
-	set_screen_start(xoffset, var->yoffset, p);
-
-	return 0;
-}
-
-
-/*
- * Private mmap since we want to have a different caching on the framebuffer
- * for controlfb.
- * Note there's no locking in here; it's done in fb_mmap() in fbmem.c.
- */
-static int controlfb_mmap(struct fb_info *info,
-                       struct vm_area_struct *vma)
-{
-	unsigned long mmio_pgoff;
-	unsigned long start;
-	u32 len;
-
-	start = info->fix.smem_start;
-	len = info->fix.smem_len;
-	mmio_pgoff = PAGE_ALIGN((start & ~PAGE_MASK) + len) >> PAGE_SHIFT;
-	if (vma->vm_pgoff >= mmio_pgoff) {
-		if (info->var.accel_flags)
-			return -EINVAL;
-		vma->vm_pgoff -= mmio_pgoff;
-		start = info->fix.mmio_start;
-		len = info->fix.mmio_len;
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	} else {
-		/* framebuffer */
-		vma->vm_page_prot = pgprot_cached_wthru(vma->vm_page_prot);
-	}
-
-	return vm_iomap_memory(vma, start, len);
-}
-
-static int controlfb_blank(int blank_mode, struct fb_info *info)
-{
-	struct fb_info_control *p =
-		container_of(info, struct fb_info_control, info);
-	unsigned ctrl;
-
-	ctrl = le32_to_cpup(CNTRL_REG(p,ctrl));
-	if (blank_mode > 0)
-		switch (blank_mode) {
-		case FB_BLANK_VSYNC_SUSPEND:
-			ctrl &= ~3;
-			break;
-		case FB_BLANK_HSYNC_SUSPEND:
-			ctrl &= ~0x30;
-			break;
-		case FB_BLANK_POWERDOWN:
-			ctrl &= ~0x33;
-			/* fall through */
-		case FB_BLANK_NORMAL:
-			ctrl |= 0x400;
-			break;
-		default:
-			break;
-		}
-	else {
-		ctrl &= ~0x400;
-		ctrl |= 0x33;
-	}
-	out_le32(CNTRL_REG(p,ctrl), ctrl);
-
-	return 0;
-}
-
 static int controlfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 			     u_int transp, struct fb_info *info)
 {
@@ -396,75 +205,18 @@ static void set_control_clock(unsigned char *params)
 #endif	
 }
 
-
 /*
- * finish off the driver initialization and register
+ * Set screen start address according to var offset values
  */
-static int __init init_control(struct fb_info_control *p)
+static inline void set_screen_start(int xoffset, int yoffset,
+	struct fb_info_control *p)
 {
-	int full, sense, vmode, cmode, vyres;
-	struct fb_var_screeninfo var;
-	int rc;
-	
-	printk(KERN_INFO "controlfb: ");
-
-	full = p->total_vram == 0x400000;
-
-	/* Try to pick a video mode out of NVRAM if we have one. */
-	cmode = default_cmode;
-	if (IS_REACHABLE(CONFIG_NVRAM) && cmode == CMODE_NVRAM)
-		cmode = nvram_read_byte(NV_CMODE);
-	if (cmode < CMODE_8 || cmode > CMODE_32)
-		cmode = CMODE_8;
-
-	vmode = default_vmode;
-	if (IS_REACHABLE(CONFIG_NVRAM) && vmode == VMODE_NVRAM)
-		vmode = nvram_read_byte(NV_VMODE);
-	if (vmode < 1 || vmode > VMODE_MAX ||
-	    control_mac_modes[vmode - 1].m[full] < cmode) {
-		sense = read_control_sense(p);
-		printk(KERN_CONT "Monitor sense value = 0x%x, ", sense);
-		vmode = mac_map_monitor_sense(sense);
-		if (control_mac_modes[vmode - 1].m[full] < 0)
-			vmode = VMODE_640_480_60;
-		cmode = min(cmode, control_mac_modes[vmode - 1].m[full]);
-	}
-
-	/* Initialize info structure */
-	control_init_info(&p->info, p);
-
-	/* Setup default var */
-	if (mac_vmode_to_var(vmode, cmode, &var) < 0) {
-		/* This shouldn't happen! */
-		printk("mac_vmode_to_var(%d, %d,) failed\n", vmode, cmode);
-try_again:
-		vmode = VMODE_640_480_60;
-		cmode = CMODE_8;
-		if (mac_vmode_to_var(vmode, cmode, &var) < 0) {
-			printk(KERN_ERR "controlfb: mac_vmode_to_var() failed\n");
-			return -ENXIO;
-		}
-		printk(KERN_INFO "controlfb: ");
-	}
-	printk("using video mode %d and color mode %d.\n", vmode, cmode);
-
-	vyres = (p->total_vram - CTRLFB_OFF) / (var.xres << cmode);
-	if (vyres > var.yres)
-		var.yres_virtual = vyres;
-
-	/* Apply default var */
-	var.activate = FB_ACTIVATE_NOW;
-	rc = fb_set_var(&p->info, &var);
-	if (rc && (vmode != VMODE_640_480_60 || cmode != CMODE_8))
-		goto try_again;
-
-	/* Register with fbdev layer */
-	if (register_framebuffer(&p->info) < 0)
-		return -ENXIO;
-
-	fb_info(&p->info, "control display adapter\n");
+	struct fb_par_control *par = &p->par;
 
-	return 0;
+	par->xoffset = xoffset;
+	par->yoffset = yoffset;
+	out_le32(CNTRL_REG(p,start_addr),
+		 par->yoffset * par->pitch + (par->xoffset << par->cmode));
 }
 
 #define RADACAL_WRITE(a,d) \
@@ -528,67 +280,6 @@ static void control_set_hardware(struct fb_info_control *p, struct fb_par_contro
 #endif /* CONFIG_BOOTX_TEXT */
 }
 
-
-/*
- * Parse user specified options (`video=controlfb:')
- */
-static void __init control_setup(char *options)
-{
-	char *this_opt;
-
-	if (!options || !*options)
-		return;
-
-	while ((this_opt = strsep(&options, ",")) != NULL) {
-		if (!strncmp(this_opt, "vmode:", 6)) {
-			int vmode = simple_strtoul(this_opt+6, NULL, 0);
-			if (vmode > 0 && vmode <= VMODE_MAX &&
-			    control_mac_modes[vmode - 1].m[1] >= 0)
-				default_vmode = vmode;
-		} else if (!strncmp(this_opt, "cmode:", 6)) {
-			int depth = simple_strtoul(this_opt+6, NULL, 0);
-			switch (depth) {
-			 case CMODE_8:
-			 case CMODE_16:
-			 case CMODE_32:
-			 	default_cmode = depth;
-			 	break;
-			 case 8:
-				default_cmode = CMODE_8;
-				break;
-			 case 15:
-			 case 16:
-				default_cmode = CMODE_16;
-				break;
-			 case 24:
-			 case 32:
-				default_cmode = CMODE_32;
-				break;
-			}
-		}
-	}
-}
-
-static int __init control_init(void)
-{
-	struct device_node *dp;
-	char *option = NULL;
-	int ret = -ENXIO;
-
-	if (fb_get_options("controlfb", &option))
-		return -ENODEV;
-	control_setup(option);
-
-	dp = of_find_node_by_name(NULL, "control");
-	if (dp && !control_of_init(dp))
-		ret = 0;
-	of_node_put(dp);
-
-	return ret;
-}
-
-module_init(control_init);
-
 /* Work out which banks of VRAM we have installed. */
 /* danj: I guess the card just ignores writes to nonexistant VRAM... */
 
@@ -605,12 +296,7 @@ static void __init find_vram_size(struct fb_info_control *p)
 
 	out_8(&p->frame_buffer[0x600000], 0xb3);
 	out_8(&p->frame_buffer[0x600001], 0x71);
-	asm volatile("eieio; dcbf 0,%0" : : "r" (&p->frame_buffer[0x600000])
-					: "memory" );
-	mb();
-	asm volatile("eieio; dcbi 0,%0" : : "r" (&p->frame_buffer[0x600000])
-					: "memory" );
-	mb();
+	invalid_vram_cache(&p->frame_buffer[0x600000]);
 
 	bank2 = (in_8(&p->frame_buffer[0x600000]) == 0xb3)
 		&& (in_8(&p->frame_buffer[0x600001]) == 0x71);
@@ -624,12 +310,7 @@ static void __init find_vram_size(struct fb_info_control *p)
 
 	out_8(&p->frame_buffer[0], 0x5a);
 	out_8(&p->frame_buffer[1], 0xc7);
-	asm volatile("eieio; dcbf 0,%0" : : "r" (&p->frame_buffer[0])
-					: "memory" );
-	mb();
-	asm volatile("eieio; dcbi 0,%0" : : "r" (&p->frame_buffer[0])
-					: "memory" );
-	mb();
+	invalid_vram_cache(&p->frame_buffer[0]);
 
 	bank1 = (in_8(&p->frame_buffer[0]) == 0x5a)
 		&& (in_8(&p->frame_buffer[1]) == 0xc7);
@@ -663,78 +344,6 @@ static void __init find_vram_size(struct fb_info_control *p)
 			(bank1 + bank2) << 1, bank1 << 1, bank2 << 1);
 }
 
-
-/*
- * find "control" and initialize
- */
-static int __init control_of_init(struct device_node *dp)
-{
-	struct fb_info_control	*p;
-	struct resource		fb_res, reg_res;
-
-	if (control_fb) {
-		printk(KERN_ERR "controlfb: only one control is supported\n");
-		return -ENXIO;
-	}
-
-	if (of_pci_address_to_resource(dp, 2, &fb_res) ||
-	    of_pci_address_to_resource(dp, 1, &reg_res)) {
-		printk(KERN_ERR "can't get 2 addresses for control\n");
-		return -ENXIO;
-	}
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-	control_fb = p;	/* save it for cleanups */
-
-	/* Map in frame buffer and registers */
-	p->fb_orig_base = fb_res.start;
-	p->fb_orig_size = resource_size(&fb_res);
-	/* use the big-endian aperture (??) */
-	p->frame_buffer_phys = fb_res.start + 0x800000;
-	p->control_regs_phys = reg_res.start;
-	p->control_regs_size = resource_size(&reg_res);
-
-	if (!p->fb_orig_base ||
-	    !request_mem_region(p->fb_orig_base,p->fb_orig_size,"controlfb")) {
-		p->fb_orig_base = 0;
-		goto error_out;
-	}
-	/* map at most 8MB for the frame buffer */
-	p->frame_buffer = ioremap_wt(p->frame_buffer_phys, 0x800000);
-
-	if (!p->control_regs_phys ||
-	    !request_mem_region(p->control_regs_phys, p->control_regs_size,
-	    "controlfb regs")) {
-		p->control_regs_phys = 0;
-		goto error_out;
-	}
-	p->control_regs = ioremap(p->control_regs_phys, p->control_regs_size);
-
-	p->cmap_regs_phys = 0xf301b000;	 /* XXX not in prom? */
-	if (!request_mem_region(p->cmap_regs_phys, 0x1000, "controlfb cmap")) {
-		p->cmap_regs_phys = 0;
-		goto error_out;
-	}
-	p->cmap_regs = ioremap(p->cmap_regs_phys, 0x1000);
-
-	if (!p->cmap_regs || !p->control_regs || !p->frame_buffer)
-		goto error_out;
-
-	find_vram_size(p);
-	if (!p->total_vram)
-		goto error_out;
-
-	if (init_control(p) < 0)
-		goto error_out;
-
-	return 0;
-
-error_out:
-	control_cleanup();
-	return -ENXIO;
-}
-
 /*
  * Get the monitor sense value.
  * Note that this can be called before calibrate_delay,
@@ -1019,6 +628,150 @@ static void control_par_to_var(struct fb_par_control *par, struct fb_var_screeni
 	var->pixclock >>= par->regvals.clock_params[2];
 }
 
+/********************  The functions for controlfb_ops ********************/
+
+/*
+ * Checks a var structure
+ */
+static int controlfb_check_var (struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	struct fb_par_control par;
+	int err;
+
+	err = control_var_to_par(var, &par, info);
+	if (err)
+		return err;	
+	control_par_to_var(&par, var);
+
+	return 0;
+}
+
+/*
+ * Applies current var to display
+ */
+static int controlfb_set_par (struct fb_info *info)
+{
+	struct fb_info_control *p =
+		container_of(info, struct fb_info_control, info);
+	struct fb_par_control par;
+	int err;
+
+	if((err = control_var_to_par(&info->var, &par, info))) {
+		printk (KERN_ERR "controlfb_set_par: error calling"
+				 " control_var_to_par: %d.\n", err);
+		return err;
+	}
+	
+	control_set_hardware(p, &par);
+
+	info->fix.visual = (p->par.cmode == CMODE_8) ?
+		FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_DIRECTCOLOR;
+	info->fix.line_length = p->par.pitch;
+	info->fix.xpanstep = 32 >> p->par.cmode;
+	info->fix.ypanstep = 1;
+
+	return 0;
+}
+
+static int controlfb_pan_display(struct fb_var_screeninfo *var,
+				 struct fb_info *info)
+{
+	unsigned int xoffset, hstep;
+	struct fb_info_control *p =
+		container_of(info, struct fb_info_control, info);
+	struct fb_par_control *par = &p->par;
+
+	/*
+	 * make sure start addr will be 32-byte aligned
+	 */
+	hstep = 0x1f >> par->cmode;
+	xoffset = (var->xoffset + hstep) & ~hstep;
+
+	if (xoffset+par->xres > par->vxres ||
+	    var->yoffset+par->yres > par->vyres)
+		return -EINVAL;
+
+	set_screen_start(xoffset, var->yoffset, p);
+
+	return 0;
+}
+
+static int controlfb_blank(int blank_mode, struct fb_info *info)
+{
+	struct fb_info_control __maybe_unused *p =
+		container_of(info, struct fb_info_control, info);
+	unsigned ctrl;
+
+	ctrl = in_le32(CNTRL_REG(p, ctrl));
+	if (blank_mode > 0)
+		switch (blank_mode) {
+		case FB_BLANK_VSYNC_SUSPEND:
+			ctrl &= ~3;
+			break;
+		case FB_BLANK_HSYNC_SUSPEND:
+			ctrl &= ~0x30;
+			break;
+		case FB_BLANK_POWERDOWN:
+			ctrl &= ~0x33;
+			/* fall through */
+		case FB_BLANK_NORMAL:
+			ctrl |= 0x400;
+			break;
+		default:
+			break;
+		}
+	else {
+		ctrl &= ~0x400;
+		ctrl |= 0x33;
+	}
+	out_le32(CNTRL_REG(p,ctrl), ctrl);
+
+	return 0;
+}
+
+/*
+ * Private mmap since we want to have a different caching on the framebuffer
+ * for controlfb.
+ * Note there's no locking in here; it's done in fb_mmap() in fbmem.c.
+ */
+static int controlfb_mmap(struct fb_info *info,
+                       struct vm_area_struct *vma)
+{
+	unsigned long mmio_pgoff;
+	unsigned long start;
+	u32 len;
+
+	start = info->fix.smem_start;
+	len = info->fix.smem_len;
+	mmio_pgoff = PAGE_ALIGN((start & ~PAGE_MASK) + len) >> PAGE_SHIFT;
+	if (vma->vm_pgoff >= mmio_pgoff) {
+		if (info->var.accel_flags)
+			return -EINVAL;
+		vma->vm_pgoff -= mmio_pgoff;
+		start = info->fix.mmio_start;
+		len = info->fix.mmio_len;
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	} else {
+		/* framebuffer */
+		vma->vm_page_prot = pgprot_cached_wthru(vma->vm_page_prot);
+	}
+
+	return vm_iomap_memory(vma, start, len);
+}
+
+static const struct fb_ops controlfb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_check_var	= controlfb_check_var,
+	.fb_set_par	= controlfb_set_par,
+	.fb_setcolreg	= controlfb_setcolreg,
+	.fb_pan_display = controlfb_pan_display,
+	.fb_blank	= controlfb_blank,
+	.fb_mmap	= controlfb_mmap,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+};
+
 /*
  * Set misc info vars for this driver
  */
@@ -1045,6 +798,115 @@ static void __init control_init_info(struct fb_info *info, struct fb_info_contro
         info->fix.accel = FB_ACCEL_NONE;
 }
 
+/*
+ * Parse user specified options (`video=controlfb:')
+ */
+static void __init control_setup(char *options)
+{
+	char *this_opt;
+
+	if (!options || !*options)
+		return;
+
+	while ((this_opt = strsep(&options, ",")) != NULL) {
+		if (!strncmp(this_opt, "vmode:", 6)) {
+			int vmode = simple_strtoul(this_opt+6, NULL, 0);
+			if (vmode > 0 && vmode <= VMODE_MAX &&
+			    control_mac_modes[vmode - 1].m[1] >= 0)
+				default_vmode = vmode;
+		} else if (!strncmp(this_opt, "cmode:", 6)) {
+			int depth = simple_strtoul(this_opt+6, NULL, 0);
+			switch (depth) {
+			 case CMODE_8:
+			 case CMODE_16:
+			 case CMODE_32:
+			 	default_cmode = depth;
+			 	break;
+			 case 8:
+				default_cmode = CMODE_8;
+				break;
+			 case 15:
+			 case 16:
+				default_cmode = CMODE_16;
+				break;
+			 case 24:
+			 case 32:
+				default_cmode = CMODE_32;
+				break;
+			}
+		}
+	}
+}
+
+/*
+ * finish off the driver initialization and register
+ */
+static int __init init_control(struct fb_info_control *p)
+{
+	int full, sense, vmode, cmode, vyres;
+	struct fb_var_screeninfo var;
+	int rc;
+	
+	printk(KERN_INFO "controlfb: ");
+
+	full = p->total_vram == 0x400000;
+
+	/* Try to pick a video mode out of NVRAM if we have one. */
+	cmode = default_cmode;
+	if (IS_REACHABLE(CONFIG_NVRAM) && cmode == CMODE_NVRAM)
+		cmode = nvram_read_byte(NV_CMODE);
+	if (cmode < CMODE_8 || cmode > CMODE_32)
+		cmode = CMODE_8;
+
+	vmode = default_vmode;
+	if (IS_REACHABLE(CONFIG_NVRAM) && vmode == VMODE_NVRAM)
+		vmode = nvram_read_byte(NV_VMODE);
+	if (vmode < 1 || vmode > VMODE_MAX ||
+	    control_mac_modes[vmode - 1].m[full] < cmode) {
+		sense = read_control_sense(p);
+		printk(KERN_CONT "Monitor sense value = 0x%x, ", sense);
+		vmode = mac_map_monitor_sense(sense);
+		if (control_mac_modes[vmode - 1].m[full] < 0)
+			vmode = VMODE_640_480_60;
+		cmode = min(cmode, control_mac_modes[vmode - 1].m[full]);
+	}
+
+	/* Initialize info structure */
+	control_init_info(&p->info, p);
+
+	/* Setup default var */
+	if (mac_vmode_to_var(vmode, cmode, &var) < 0) {
+		/* This shouldn't happen! */
+		printk("mac_vmode_to_var(%d, %d,) failed\n", vmode, cmode);
+try_again:
+		vmode = VMODE_640_480_60;
+		cmode = CMODE_8;
+		if (mac_vmode_to_var(vmode, cmode, &var) < 0) {
+			printk(KERN_ERR "controlfb: mac_vmode_to_var() failed\n");
+			return -ENXIO;
+		}
+		printk(KERN_INFO "controlfb: ");
+	}
+	printk("using video mode %d and color mode %d.\n", vmode, cmode);
+
+	vyres = (p->total_vram - CTRLFB_OFF) / (var.xres << cmode);
+	if (vyres > var.yres)
+		var.yres_virtual = vyres;
+
+	/* Apply default var */
+	var.activate = FB_ACTIVATE_NOW;
+	rc = fb_set_var(&p->info, &var);
+	if (rc && (vmode != VMODE_640_480_60 || cmode != CMODE_8))
+		goto try_again;
+
+	/* Register with fbdev layer */
+	if (register_framebuffer(&p->info) < 0)
+		return -ENXIO;
+
+	fb_info(&p->info, "control display adapter\n");
+
+	return 0;
+}
 
 static void control_cleanup(void)
 {
@@ -1071,4 +933,93 @@ static void control_cleanup(void)
 	kfree(p);
 }
 
+/*
+ * find "control" and initialize
+ */
+static int __init control_of_init(struct device_node *dp)
+{
+	struct fb_info_control	*p;
+	struct resource		fb_res, reg_res;
+
+	if (control_fb) {
+		printk(KERN_ERR "controlfb: only one control is supported\n");
+		return -ENXIO;
+	}
+
+	if (of_pci_address_to_resource(dp, 2, &fb_res) ||
+	    of_pci_address_to_resource(dp, 1, &reg_res)) {
+		printk(KERN_ERR "can't get 2 addresses for control\n");
+		return -ENXIO;
+	}
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+	control_fb = p;	/* save it for cleanups */
+
+	/* Map in frame buffer and registers */
+	p->fb_orig_base = fb_res.start;
+	p->fb_orig_size = resource_size(&fb_res);
+	/* use the big-endian aperture (??) */
+	p->frame_buffer_phys = fb_res.start + 0x800000;
+	p->control_regs_phys = reg_res.start;
+	p->control_regs_size = resource_size(&reg_res);
+
+	if (!p->fb_orig_base ||
+	    !request_mem_region(p->fb_orig_base,p->fb_orig_size,"controlfb")) {
+		p->fb_orig_base = 0;
+		goto error_out;
+	}
+	/* map at most 8MB for the frame buffer */
+	p->frame_buffer = ioremap_wt(p->frame_buffer_phys, 0x800000);
+
+	if (!p->control_regs_phys ||
+	    !request_mem_region(p->control_regs_phys, p->control_regs_size,
+	    "controlfb regs")) {
+		p->control_regs_phys = 0;
+		goto error_out;
+	}
+	p->control_regs = ioremap(p->control_regs_phys, p->control_regs_size);
+
+	p->cmap_regs_phys = 0xf301b000;	 /* XXX not in prom? */
+	if (!request_mem_region(p->cmap_regs_phys, 0x1000, "controlfb cmap")) {
+		p->cmap_regs_phys = 0;
+		goto error_out;
+	}
+	p->cmap_regs = ioremap(p->cmap_regs_phys, 0x1000);
+
+	if (!p->cmap_regs || !p->control_regs || !p->frame_buffer)
+		goto error_out;
+
+	find_vram_size(p);
+	if (!p->total_vram)
+		goto error_out;
+
+	if (init_control(p) < 0)
+		goto error_out;
+
+	return 0;
+
+error_out:
+	control_cleanup();
+	return -ENXIO;
+}
+
+static int __init control_init(void)
+{
+	struct device_node *dp;
+	char *option = NULL;
+	int ret = -ENXIO;
+
+	if (fb_get_options("controlfb", &option))
+		return -ENODEV;
+	control_setup(option);
+
+	dp = of_find_node_by_name(NULL, "control");
+	if (dp && !control_of_init(dp))
+		ret = 0;
+	of_node_put(dp);
+
+	return ret;
+}
 
+device_initcall(control_init);
diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c
index 8e2e19f3bf44..d62a1e43864e 100644
--- a/drivers/video/fbdev/core/fbmon.c
+++ b/drivers/video/fbdev/core/fbmon.c
@@ -44,7 +44,7 @@
 #ifdef DEBUG
 #define DPRINTK(fmt, args...) printk(fmt,## args)
 #else
-#define DPRINTK(fmt, args...)
+#define DPRINTK(fmt, args...) no_printk(fmt, ##args)
 #endif
 
 #define FBMON_FIX_HEADER  1
diff --git a/drivers/video/fbdev/cyber2000fb.c b/drivers/video/fbdev/cyber2000fb.c
index 460826a7ad55..513f58f28b0f 100644
--- a/drivers/video/fbdev/cyber2000fb.c
+++ b/drivers/video/fbdev/cyber2000fb.c
@@ -1160,12 +1160,14 @@ EXPORT_SYMBOL(cyber2000fb_detach);
 #define DDC_SDA_IN	(1 << 6)
 
 static void cyber2000fb_enable_ddc(struct cfb_info *cfb)
+	__acquires(&cfb->reg_b0_lock)
 {
 	spin_lock(&cfb->reg_b0_lock);
 	cyber2000fb_writew(0x1bf, 0x3ce, cfb);
 }
 
 static void cyber2000fb_disable_ddc(struct cfb_info *cfb)
+	__releases(&cfb->reg_b0_lock)
 {
 	cyber2000fb_writew(0x0bf, 0x3ce, cfb);
 	spin_unlock(&cfb->reg_b0_lock);
diff --git a/drivers/video/fbdev/i810/i810_main.c b/drivers/video/fbdev/i810/i810_main.c
index aa7583d963ac..13bbf7fe13bf 100644
--- a/drivers/video/fbdev/i810/i810_main.c
+++ b/drivers/video/fbdev/i810/i810_main.c
@@ -1966,13 +1966,13 @@ static int i810fb_setup(char *options)
 	
 	while ((this_opt = strsep(&options, ",")) != NULL) {
 		if (!strncmp(this_opt, "mtrr", 4))
-			mtrr = 1;
+			mtrr = true;
 		else if (!strncmp(this_opt, "accel", 5))
-			accel = 1;
+			accel = true;
 		else if (!strncmp(this_opt, "extvga", 6))
-			extvga = 1;
+			extvga = true;
 		else if (!strncmp(this_opt, "sync", 4))
-			sync = 1;
+			sync = true;
 		else if (!strncmp(this_opt, "vram:", 5))
 			vram = (simple_strtoul(this_opt+5, NULL, 0));
 		else if (!strncmp(this_opt, "voffset:", 8))
@@ -1998,7 +1998,7 @@ static int i810fb_setup(char *options)
 		else if (!strncmp(this_opt, "vsync2:", 7))
 			vsync2 = simple_strtoul(this_opt+7, NULL, 0);
 		else if (!strncmp(this_opt, "dcolor", 6))
-			dcolor = 1;
+			dcolor = true;
 		else if (!strncmp(this_opt, "ddc3", 4))
 			ddc3 = true;
 		else
diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
index 370bf2553d43..884b16efa7e8 100644
--- a/drivers/video/fbdev/imxfb.c
+++ b/drivers/video/fbdev/imxfb.c
@@ -172,6 +172,7 @@ struct imxfb_info {
 	int			num_modes;
 
 	struct regulator	*lcd_pwr;
+	int			lcd_pwr_enabled;
 };
 
 static const struct platform_device_id imxfb_devtype[] = {
@@ -801,16 +802,30 @@ static int imxfb_lcd_get_power(struct lcd_device *lcddev)
 	return FB_BLANK_UNBLANK;
 }
 
+static int imxfb_regulator_set(struct imxfb_info *fbi, int enable)
+{
+	int ret;
+
+	if (enable == fbi->lcd_pwr_enabled)
+		return 0;
+
+	if (enable)
+		ret = regulator_enable(fbi->lcd_pwr);
+	else
+		ret = regulator_disable(fbi->lcd_pwr);
+
+	if (ret == 0)
+		fbi->lcd_pwr_enabled = enable;
+
+	return ret;
+}
+
 static int imxfb_lcd_set_power(struct lcd_device *lcddev, int power)
 {
 	struct imxfb_info *fbi = dev_get_drvdata(&lcddev->dev);
 
-	if (!IS_ERR(fbi->lcd_pwr)) {
-		if (power == FB_BLANK_UNBLANK)
-			return regulator_enable(fbi->lcd_pwr);
-		else
-			return regulator_disable(fbi->lcd_pwr);
-	}
+	if (!IS_ERR(fbi->lcd_pwr))
+		return imxfb_regulator_set(fbi, power == FB_BLANK_UNBLANK);
 
 	return 0;
 }
diff --git a/drivers/video/fbdev/matrox/g450_pll.c b/drivers/video/fbdev/matrox/g450_pll.c
index c15f8a57498e..ff8e321a22ce 100644
--- a/drivers/video/fbdev/matrox/g450_pll.c
+++ b/drivers/video/fbdev/matrox/g450_pll.c
@@ -333,11 +333,9 @@ static int __g450_setclk(struct matrox_fb_info *minfo, unsigned int fout,
 			 unsigned int *deltaarray)
 {
 	unsigned int mnpcount;
-	unsigned int pixel_vco;
 	const struct matrox_pll_limits* pi;
 	struct matrox_pll_cache* ci;
 
-	pixel_vco = 0;
 	switch (pll) {
 		case M_PIXEL_PLL_A:
 		case M_PIXEL_PLL_B:
@@ -420,7 +418,6 @@ static int __g450_setclk(struct matrox_fb_info *minfo, unsigned int fout,
 				
 				mnp = matroxfb_DAC_in(minfo, M1064_XPIXPLLCM) << 16;
 				mnp |= matroxfb_DAC_in(minfo, M1064_XPIXPLLCN) << 8;
-				pixel_vco = g450_mnp2vco(minfo, mnp);
 				matroxfb_DAC_unlock_irqrestore(flags);
 			}
 			pi = &minfo->limits.video;
@@ -441,25 +438,6 @@ static int __g450_setclk(struct matrox_fb_info *minfo, unsigned int fout,
 			unsigned int delta;
 
 			vco = g450_mnp2vco(minfo, mnp);
-#if 0			
-			if (pll == M_VIDEO_PLL) {
-				unsigned int big, small;
-
-				if (vco < pixel_vco) {
-					small = vco;
-					big = pixel_vco;
-				} else {
-					small = pixel_vco;
-					big = vco;
-				}
-				while (big > small) {
-					big >>= 1;
-				}
-				if (big == small) {
-					continue;
-				}
-			}
-#endif			
 			delta = pll_freq_delta(fout, g450_vco2f(mnp, vco));
 			for (idx = mnpcount; idx > 0; idx--) {
 				/* == is important; due to nextpll algorithm we get
diff --git a/drivers/video/fbdev/matrox/matroxfb_base.h b/drivers/video/fbdev/matrox/matroxfb_base.h
index f85ad25659e5..759dee996af1 100644
--- a/drivers/video/fbdev/matrox/matroxfb_base.h
+++ b/drivers/video/fbdev/matrox/matroxfb_base.h
@@ -86,7 +86,7 @@
 #ifdef DEBUG
 #define dprintk(X...)	printk(X)
 #else
-#define dprintk(X...)
+#define dprintk(X...)	no_printk(X)
 #endif
 
 #ifndef PCI_SS_VENDOR_ID_SIEMENS_NIXDORF
diff --git a/drivers/video/fbdev/mb862xx/mb862xxfb_accel.c b/drivers/video/fbdev/mb862xx/mb862xxfb_accel.c
index 42569264801f..d40b806461ca 100644
--- a/drivers/video/fbdev/mb862xx/mb862xxfb_accel.c
+++ b/drivers/video/fbdev/mb862xx/mb862xxfb_accel.c
@@ -184,7 +184,6 @@ static void mb86290fb_imageblit16(u32 *cmd, u16 step, u16 dx, u16 dy,
 static void mb86290fb_imageblit(struct fb_info *info,
 				const struct fb_image *image)
 {
-	int mdr;
 	u32 *cmd = NULL;
 	void (*cmdfn) (u32 *, u16, u16, u16, u16, u16, u32, u32,
 		       const struct fb_image *, struct fb_info *) = NULL;
@@ -196,7 +195,6 @@ static void mb86290fb_imageblit(struct fb_info *info,
 	u16 dx = image->dx, dy = image->dy;
 	int x2, y2, vxres, vyres;
 
-	mdr = (GDC_ROP_COPY << 9);
 	x2 = image->dx + image->width;
 	y2 = image->dy + image->height;
 	vxres = info->var.xres_virtual;
diff --git a/drivers/video/fbdev/mx3fb.c b/drivers/video/fbdev/mx3fb.c
index 4af28e4421e5..603731a5a72e 100644
--- a/drivers/video/fbdev/mx3fb.c
+++ b/drivers/video/fbdev/mx3fb.c
@@ -509,7 +509,7 @@ static int sdc_init_panel(struct mx3fb_data *mx3fb, enum ipu_panel panel,
 			  uint16_t h_start_width, uint16_t h_sync_width,
 			  uint16_t h_end_width, uint16_t v_start_width,
 			  uint16_t v_sync_width, uint16_t v_end_width,
-			  struct ipu_di_signal_cfg sig)
+			  const struct ipu_di_signal_cfg *sig)
 {
 	unsigned long lock_flags;
 	uint32_t reg;
@@ -591,17 +591,17 @@ static int sdc_init_panel(struct mx3fb_data *mx3fb, enum ipu_panel panel,
 
 	/* DI settings */
 	old_conf = mx3fb_read_reg(mx3fb, DI_DISP_IF_CONF) & 0x78FFFFFF;
-	old_conf |= sig.datamask_en << DI_D3_DATAMSK_SHIFT |
-		sig.clksel_en << DI_D3_CLK_SEL_SHIFT |
-		sig.clkidle_en << DI_D3_CLK_IDLE_SHIFT;
+	old_conf |= sig->datamask_en << DI_D3_DATAMSK_SHIFT |
+		sig->clksel_en << DI_D3_CLK_SEL_SHIFT |
+		sig->clkidle_en << DI_D3_CLK_IDLE_SHIFT;
 	mx3fb_write_reg(mx3fb, old_conf, DI_DISP_IF_CONF);
 
 	old_conf = mx3fb_read_reg(mx3fb, DI_DISP_SIG_POL) & 0xE0FFFFFF;
-	old_conf |= sig.data_pol << DI_D3_DATA_POL_SHIFT |
-		sig.clk_pol << DI_D3_CLK_POL_SHIFT |
-		sig.enable_pol << DI_D3_DRDY_SHARP_POL_SHIFT |
-		sig.Hsync_pol << DI_D3_HSYNC_POL_SHIFT |
-		sig.Vsync_pol << DI_D3_VSYNC_POL_SHIFT;
+	old_conf |= sig->data_pol << DI_D3_DATA_POL_SHIFT |
+		sig->clk_pol << DI_D3_CLK_POL_SHIFT |
+		sig->enable_pol << DI_D3_DRDY_SHARP_POL_SHIFT |
+		sig->Hsync_pol << DI_D3_HSYNC_POL_SHIFT |
+		sig->Vsync_pol << DI_D3_VSYNC_POL_SHIFT;
 	mx3fb_write_reg(mx3fb, old_conf, DI_DISP_SIG_POL);
 
 	map = &di_mappings[mx3fb->disp_data_fmt];
@@ -855,7 +855,7 @@ static int __set_par(struct fb_info *fbi, bool lock)
 				   fbi->var.upper_margin,
 				   fbi->var.vsync_len,
 				   fbi->var.lower_margin +
-				   fbi->var.vsync_len, sig_cfg) != 0) {
+				   fbi->var.vsync_len, &sig_cfg) != 0) {
 			dev_err(fbi->device,
 				"mx3fb: Error initializing panel.\n");
 			return -EINVAL;
diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index e8a304f84ea8..1a9d6242916e 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -1247,7 +1247,7 @@ static ssize_t omapfb_show_caps_num(struct device *dev,
 	size = 0;
 	while (size < PAGE_SIZE && plane < OMAPFB_PLANE_NUM) {
 		omapfb_get_caps(fbdev, plane, &caps);
-		size += snprintf(&buf[size], PAGE_SIZE - size,
+		size += scnprintf(&buf[size], PAGE_SIZE - size,
 			"plane#%d %#010x %#010x %#010x\n",
 			plane, caps.ctrl, caps.plane_color, caps.wnd_color);
 		plane++;
@@ -1268,28 +1268,28 @@ static ssize_t omapfb_show_caps_text(struct device *dev,
 	size = 0;
 	while (size < PAGE_SIZE && plane < OMAPFB_PLANE_NUM) {
 		omapfb_get_caps(fbdev, plane, &caps);
-		size += snprintf(&buf[size], PAGE_SIZE - size,
+		size += scnprintf(&buf[size], PAGE_SIZE - size,
 				 "plane#%d:\n", plane);
 		for (i = 0; i < ARRAY_SIZE(ctrl_caps) &&
 		     size < PAGE_SIZE; i++) {
 			if (ctrl_caps[i].flag & caps.ctrl)
-				size += snprintf(&buf[size], PAGE_SIZE - size,
+				size += scnprintf(&buf[size], PAGE_SIZE - size,
 					" %s\n", ctrl_caps[i].name);
 		}
-		size += snprintf(&buf[size], PAGE_SIZE - size,
+		size += scnprintf(&buf[size], PAGE_SIZE - size,
 				 " plane colors:\n");
 		for (i = 0; i < ARRAY_SIZE(color_caps) &&
 		     size < PAGE_SIZE; i++) {
 			if (color_caps[i].flag & caps.plane_color)
-				size += snprintf(&buf[size], PAGE_SIZE - size,
+				size += scnprintf(&buf[size], PAGE_SIZE - size,
 					"  %s\n", color_caps[i].name);
 		}
-		size += snprintf(&buf[size], PAGE_SIZE - size,
+		size += scnprintf(&buf[size], PAGE_SIZE - size,
 				 " window colors:\n");
 		for (i = 0; i < ARRAY_SIZE(color_caps) &&
 		     size < PAGE_SIZE; i++) {
 			if (color_caps[i].flag & caps.wnd_color)
-				size += snprintf(&buf[size], PAGE_SIZE - size,
+				size += scnprintf(&buf[size], PAGE_SIZE - size,
 					"  %s\n", color_caps[i].name);
 		}
 
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dispc.c b/drivers/video/fbdev/omap2/omapfb/dss/dispc.c
index ce37da85cc45..4a16798b2ecd 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/dispc.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/dispc.c
@@ -557,11 +557,6 @@ u32 dispc_mgr_get_sync_lost_irq(enum omap_channel channel)
 }
 EXPORT_SYMBOL(dispc_mgr_get_sync_lost_irq);
 
-u32 dispc_wb_get_framedone_irq(void)
-{
-	return DISPC_IRQ_FRAMEDONEWB;
-}
-
 bool dispc_mgr_go_busy(enum omap_channel channel)
 {
 	return mgr_fld_read(channel, DISPC_MGR_FLD_GO) == 1;
@@ -579,30 +574,6 @@ void dispc_mgr_go(enum omap_channel channel)
 }
 EXPORT_SYMBOL(dispc_mgr_go);
 
-bool dispc_wb_go_busy(void)
-{
-	return REG_GET(DISPC_CONTROL2, 6, 6) == 1;
-}
-
-void dispc_wb_go(void)
-{
-	enum omap_plane plane = OMAP_DSS_WB;
-	bool enable, go;
-
-	enable = REG_GET(DISPC_OVL_ATTRIBUTES(plane), 0, 0) == 1;
-
-	if (!enable)
-		return;
-
-	go = REG_GET(DISPC_CONTROL2, 6, 6) == 1;
-	if (go) {
-		DSSERR("GO bit not down for WB\n");
-		return;
-	}
-
-	REG_FLD_MOD(DISPC_CONTROL2, 1, 6, 6);
-}
-
 static void dispc_ovl_write_firh_reg(enum omap_plane plane, int reg, u32 value)
 {
 	dispc_write_reg(DISPC_OVL_FIR_COEF_H(plane, reg), value);
@@ -1028,13 +999,6 @@ static enum omap_channel dispc_ovl_get_channel_out(enum omap_plane plane)
 	}
 }
 
-void dispc_wb_set_channel_in(enum dss_writeback_channel channel)
-{
-	enum omap_plane plane = OMAP_DSS_WB;
-
-	REG_FLD_MOD(DISPC_OVL_ATTRIBUTES(plane), channel, 18, 16);
-}
-
 static void dispc_ovl_set_burst_size(enum omap_plane plane,
 		enum omap_burst_size burst_size)
 {
@@ -2805,74 +2769,6 @@ int dispc_ovl_setup(enum omap_plane plane, const struct omap_overlay_info *oi,
 }
 EXPORT_SYMBOL(dispc_ovl_setup);
 
-int dispc_wb_setup(const struct omap_dss_writeback_info *wi,
-		bool mem_to_mem, const struct omap_video_timings *mgr_timings)
-{
-	int r;
-	u32 l;
-	enum omap_plane plane = OMAP_DSS_WB;
-	const int pos_x = 0, pos_y = 0;
-	const u8 zorder = 0, global_alpha = 0;
-	const bool replication = false;
-	bool truncation;
-	int in_width = mgr_timings->x_res;
-	int in_height = mgr_timings->y_res;
-	enum omap_overlay_caps caps =
-		OMAP_DSS_OVL_CAP_SCALE | OMAP_DSS_OVL_CAP_PRE_MULT_ALPHA;
-
-	DSSDBG("dispc_wb_setup, pa %x, pa_uv %x, %d,%d -> %dx%d, cmode %x, "
-		"rot %d, mir %d\n", wi->paddr, wi->p_uv_addr, in_width,
-		in_height, wi->width, wi->height, wi->color_mode, wi->rotation,
-		wi->mirror);
-
-	r = dispc_ovl_setup_common(plane, caps, wi->paddr, wi->p_uv_addr,
-		wi->buf_width, pos_x, pos_y, in_width, in_height, wi->width,
-		wi->height, wi->color_mode, wi->rotation, wi->mirror, zorder,
-		wi->pre_mult_alpha, global_alpha, wi->rotation_type,
-		replication, mgr_timings, mem_to_mem);
-
-	switch (wi->color_mode) {
-	case OMAP_DSS_COLOR_RGB16:
-	case OMAP_DSS_COLOR_RGB24P:
-	case OMAP_DSS_COLOR_ARGB16:
-	case OMAP_DSS_COLOR_RGBA16:
-	case OMAP_DSS_COLOR_RGB12U:
-	case OMAP_DSS_COLOR_ARGB16_1555:
-	case OMAP_DSS_COLOR_XRGB16_1555:
-	case OMAP_DSS_COLOR_RGBX16:
-		truncation = true;
-		break;
-	default:
-		truncation = false;
-		break;
-	}
-
-	/* setup extra DISPC_WB_ATTRIBUTES */
-	l = dispc_read_reg(DISPC_OVL_ATTRIBUTES(plane));
-	l = FLD_MOD(l, truncation, 10, 10);	/* TRUNCATIONENABLE */
-	l = FLD_MOD(l, mem_to_mem, 19, 19);	/* WRITEBACKMODE */
-	if (mem_to_mem)
-		l = FLD_MOD(l, 1, 26, 24);	/* CAPTUREMODE */
-	else
-		l = FLD_MOD(l, 0, 26, 24);	/* CAPTUREMODE */
-	dispc_write_reg(DISPC_OVL_ATTRIBUTES(plane), l);
-
-	if (mem_to_mem) {
-		/* WBDELAYCOUNT */
-		REG_FLD_MOD(DISPC_OVL_ATTRIBUTES2(plane), 0, 7, 0);
-	} else {
-		int wbdelay;
-
-		wbdelay = min(mgr_timings->vfp + mgr_timings->vsw +
-			mgr_timings->vbp, 255);
-
-		/* WBDELAYCOUNT */
-		REG_FLD_MOD(DISPC_OVL_ATTRIBUTES2(plane), wbdelay, 7, 0);
-	}
-
-	return r;
-}
-
 int dispc_ovl_enable(enum omap_plane plane, bool enable)
 {
 	DSSDBG("dispc_enable_plane %d, %d\n", plane, enable);
@@ -2903,16 +2799,6 @@ bool dispc_mgr_is_enabled(enum omap_channel channel)
 }
 EXPORT_SYMBOL(dispc_mgr_is_enabled);
 
-void dispc_wb_enable(bool enable)
-{
-	dispc_ovl_enable(OMAP_DSS_WB, enable);
-}
-
-bool dispc_wb_is_enabled(void)
-{
-	return dispc_ovl_enabled(OMAP_DSS_WB);
-}
-
 static void dispc_lcd_enable_signal_polarity(bool act_high)
 {
 	if (!dss_has_feature(FEAT_LCDENABLEPOL))
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dss.h b/drivers/video/fbdev/omap2/omapfb/dss/dss.h
index a2269008590f..21cfcbf74a6d 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/dss.h
+++ b/drivers/video/fbdev/omap2/omapfb/dss/dss.h
@@ -89,17 +89,6 @@ enum dss_dsi_content_type {
 	DSS_DSI_CONTENT_GENERIC,
 };
 
-enum dss_writeback_channel {
-	DSS_WB_LCD1_MGR =	0,
-	DSS_WB_LCD2_MGR =	1,
-	DSS_WB_TV_MGR =		2,
-	DSS_WB_OVL0 =		3,
-	DSS_WB_OVL1 =		4,
-	DSS_WB_OVL2 =		5,
-	DSS_WB_OVL3 =		6,
-	DSS_WB_LCD3_MGR =	7,
-};
-
 enum dss_pll_id {
 	DSS_PLL_DSI1,
 	DSS_PLL_DSI2,
@@ -403,15 +392,6 @@ int dispc_mgr_get_clock_div(enum omap_channel channel,
 		struct dispc_clock_info *cinfo);
 void dispc_set_tv_pclk(unsigned long pclk);
 
-u32 dispc_wb_get_framedone_irq(void);
-bool dispc_wb_go_busy(void);
-void dispc_wb_go(void);
-void dispc_wb_enable(bool enable);
-bool dispc_wb_is_enabled(void);
-void dispc_wb_set_channel_in(enum dss_writeback_channel channel);
-int dispc_wb_setup(const struct omap_dss_writeback_info *wi,
-		bool mem_to_mem, const struct omap_video_timings *timings);
-
 u32 dispc_read_irqstatus(void);
 void dispc_clear_irqstatus(u32 mask);
 u32 dispc_read_irqenable(void);
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/venc.c b/drivers/video/fbdev/omap2/omapfb/dss/venc.c
index f81e2a46366d..d5404d56c922 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/venc.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/venc.c
@@ -209,49 +209,6 @@ static const struct venc_config venc_config_ntsc_trm = {
 	.gen_ctrl				= 0x00F90000,
 };
 
-static const struct venc_config venc_config_pal_bdghi = {
-	.f_control				= 0,
-	.vidout_ctrl				= 0,
-	.sync_ctrl				= 0,
-	.hfltr_ctrl				= 0,
-	.x_color				= 0,
-	.line21					= 0,
-	.ln_sel					= 21,
-	.htrigger_vtrigger			= 0,
-	.tvdetgp_int_start_stop_x		= 0x00140001,
-	.tvdetgp_int_start_stop_y		= 0x00010001,
-	.gen_ctrl				= 0x00FB0000,
-
-	.llen					= 864-1,
-	.flens					= 625-1,
-	.cc_carr_wss_carr			= 0x2F7625ED,
-	.c_phase				= 0xDF,
-	.gain_u					= 0x111,
-	.gain_v					= 0x181,
-	.gain_y					= 0x140,
-	.black_level				= 0x3e,
-	.blank_level				= 0x3e,
-	.m_control				= 0<<2 | 1<<1,
-	.bstamp_wss_data			= 0x42,
-	.s_carr					= 0x2a098acb,
-	.l21__wc_ctl				= 0<<13 | 0x16<<8 | 0<<0,
-	.savid__eavid				= 0x06A70108,
-	.flen__fal				= 23<<16 | 624<<0,
-	.lal__phase_reset			= 2<<17 | 310<<0,
-	.hs_int_start_stop_x			= 0x00920358,
-	.hs_ext_start_stop_x			= 0x000F035F,
-	.vs_int_start_x				= 0x1a7<<16,
-	.vs_int_stop_x__vs_int_start_y		= 0x000601A7,
-	.vs_int_stop_y__vs_ext_start_x		= 0x01AF0036,
-	.vs_ext_stop_x__vs_ext_start_y		= 0x27101af,
-	.vs_ext_stop_y				= 0x05,
-	.avid_start_stop_x			= 0x03530082,
-	.avid_start_stop_y			= 0x0270002E,
-	.fid_int_start_x__fid_int_start_y	= 0x0005008A,
-	.fid_int_offset_y__fid_ext_start_x	= 0x002E0138,
-	.fid_ext_start_y__fid_ext_offset_y	= 0x01380005,
-};
-
 const struct omap_video_timings omap_dss_pal_timings = {
 	.x_res		= 720,
 	.y_res		= 574,
diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c
index 56995f44e76d..f40be68d5aac 100644
--- a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c
+++ b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c
@@ -482,9 +482,6 @@ static int omapfb_memory_read(struct fb_info *fbi,
 	if (!display || !display->driver->memory_read)
 		return -ENOENT;
 
-	if (!access_ok(mr->buffer, mr->buffer_size))
-		return -EFAULT;
-
 	if (mr->w > 4096 || mr->h > 4096)
 		return -EINVAL;
 
diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c b/drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c
index 4a5db170ef59..2d39dbfa742e 100644
--- a/drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c
+++ b/drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c
@@ -147,11 +147,11 @@ static ssize_t show_overlays(struct device *dev,
 			if (ovl == fbdev->overlays[ovlnum])
 				break;
 
-		l += snprintf(buf + l, PAGE_SIZE - l, "%s%d",
+		l += scnprintf(buf + l, PAGE_SIZE - l, "%s%d",
 				t == 0 ? "" : ",", ovlnum);
 	}
 
-	l += snprintf(buf + l, PAGE_SIZE - l, "\n");
+	l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
 
 	omapfb_unlock(fbdev);
 	unlock_fb_info(fbi);
@@ -328,11 +328,11 @@ static ssize_t show_overlays_rotate(struct device *dev,
 	lock_fb_info(fbi);
 
 	for (t = 0; t < ofbi->num_overlays; t++) {
-		l += snprintf(buf + l, PAGE_SIZE - l, "%s%d",
+		l += scnprintf(buf + l, PAGE_SIZE - l, "%s%d",
 				t == 0 ? "" : ",", ofbi->rotation[t]);
 	}
 
-	l += snprintf(buf + l, PAGE_SIZE - l, "\n");
+	l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
 
 	unlock_fb_info(fbi);
 
diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c
index fe2cadeb1b66..c7c98d8e2359 100644
--- a/drivers/video/fbdev/pm2fb.c
+++ b/drivers/video/fbdev/pm2fb.c
@@ -54,7 +54,7 @@
 #define DPRINTK(a, b...)	\
 	printk(KERN_DEBUG "pm2fb: %s: " a, __func__ , ## b)
 #else
-#define DPRINTK(a, b...)
+#define DPRINTK(a, b...)	no_printk(a, ##b)
 #endif
 
 #define PM2_PIXMAP_SIZE	(1600 * 4)
diff --git a/drivers/video/fbdev/pm3fb.c b/drivers/video/fbdev/pm3fb.c
index 2f5e23c8f8ec..7497bd36334c 100644
--- a/drivers/video/fbdev/pm3fb.c
+++ b/drivers/video/fbdev/pm3fb.c
@@ -44,7 +44,7 @@
 #define DPRINTK(a, b...)	\
 	printk(KERN_DEBUG "pm3fb: %s: " a, __func__ , ## b)
 #else
-#define DPRINTK(a, b...)
+#define DPRINTK(a, b...)	no_printk(a, ##b)
 #endif
 
 #define PM3_PIXMAP_SIZE	(2048 * 4)
@@ -306,7 +306,7 @@ static void pm3fb_init_engine(struct fb_info *info)
 					   PM3PixelSize_GLOBAL_32BIT);
 			break;
 		default:
-			DPRINTK(1, "Unsupported depth %d\n",
+			DPRINTK("Unsupported depth %d\n",
 				info->var.bits_per_pixel);
 			break;
 		}
@@ -349,8 +349,8 @@ static void pm3fb_init_engine(struct fb_info *info)
 					   (1 << 10) | (0 << 3));
 			break;
 		default:
-			DPRINTK(1, "Unsupported depth %d\n",
-				info->current_par->depth);
+			DPRINTK("Unsupported depth %d\n",
+				info->var.bits_per_pixel);
 			break;
 		}
 	}
diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c
index aef8a3042590..eedfbd3572a8 100644
--- a/drivers/video/fbdev/pxa168fb.c
+++ b/drivers/video/fbdev/pxa168fb.c
@@ -557,12 +557,11 @@ static const struct fb_ops pxa168fb_ops = {
 	.fb_imageblit	= cfb_imageblit,
 };
 
-static int pxa168fb_init_mode(struct fb_info *info,
+static void pxa168fb_init_mode(struct fb_info *info,
 			      struct pxa168fb_mach_info *mi)
 {
 	struct pxa168fb_info *fbi = info->par;
 	struct fb_var_screeninfo *var = &info->var;
-	int ret = 0;
 	u32 total_w, total_h, refresh;
 	u64 div_result;
 	const struct fb_videomode *m;
@@ -593,8 +592,6 @@ static int pxa168fb_init_mode(struct fb_info *info,
 	div_result = 1000000000000ll;
 	do_div(div_result, total_w * total_h * refresh);
 	var->pixclock = (u32)div_result;
-
-	return ret;
 }
 
 static int pxa168fb_probe(struct platform_device *pdev)
diff --git a/drivers/video/fbdev/riva/riva_hw.c b/drivers/video/fbdev/riva/riva_hw.c
index 0601c13f2105..08c9ee46978e 100644
--- a/drivers/video/fbdev/riva/riva_hw.c
+++ b/drivers/video/fbdev/riva/riva_hw.c
@@ -1343,24 +1343,6 @@ int CalcStateExt
 /*
  * Load fixed function state and pre-calculated/stored state.
  */
-#if 0
-#define LOAD_FIXED_STATE(tbl,dev)                                       \
-    for (i = 0; i < sizeof(tbl##Table##dev)/8; i++)                 \
-        chip->dev[tbl##Table##dev[i][0]] = tbl##Table##dev[i][1]
-#define LOAD_FIXED_STATE_8BPP(tbl,dev)                                  \
-    for (i = 0; i < sizeof(tbl##Table##dev##_8BPP)/8; i++)            \
-        chip->dev[tbl##Table##dev##_8BPP[i][0]] = tbl##Table##dev##_8BPP[i][1]
-#define LOAD_FIXED_STATE_15BPP(tbl,dev)                                 \
-    for (i = 0; i < sizeof(tbl##Table##dev##_15BPP)/8; i++)           \
-        chip->dev[tbl##Table##dev##_15BPP[i][0]] = tbl##Table##dev##_15BPP[i][1]
-#define LOAD_FIXED_STATE_16BPP(tbl,dev)                                 \
-    for (i = 0; i < sizeof(tbl##Table##dev##_16BPP)/8; i++)           \
-        chip->dev[tbl##Table##dev##_16BPP[i][0]] = tbl##Table##dev##_16BPP[i][1]
-#define LOAD_FIXED_STATE_32BPP(tbl,dev)                                 \
-    for (i = 0; i < sizeof(tbl##Table##dev##_32BPP)/8; i++)           \
-        chip->dev[tbl##Table##dev##_32BPP[i][0]] = tbl##Table##dev##_32BPP[i][1]
-#endif
-
 #define LOAD_FIXED_STATE(tbl,dev)                                       \
     for (i = 0; i < sizeof(tbl##Table##dev)/8; i++)                 \
         NV_WR32(&chip->dev[tbl##Table##dev[i][0]], 0, tbl##Table##dev[i][1])
diff --git a/drivers/video/fbdev/s1d13xxxfb.c b/drivers/video/fbdev/s1d13xxxfb.c
index eaea8c373753..4541afcf9386 100644
--- a/drivers/video/fbdev/s1d13xxxfb.c
+++ b/drivers/video/fbdev/s1d13xxxfb.c
@@ -721,9 +721,7 @@ static void s1d13xxxfb_fetch_hw_state(struct fb_info *info)
 		xres, yres, xres_virtual, yres_virtual, is_color, is_dual, is_tft);
 }
 
-
-static int
-s1d13xxxfb_remove(struct platform_device *pdev)
+static void __s1d13xxxfb_remove(struct platform_device *pdev)
 {
 	struct fb_info *info = platform_get_drvdata(pdev);
 	struct s1d13xxxfb_par *par = NULL;
@@ -749,6 +747,14 @@ s1d13xxxfb_remove(struct platform_device *pdev)
 			   resource_size(&pdev->resource[0]));
 	release_mem_region(pdev->resource[1].start,
 			   resource_size(&pdev->resource[1]));
+}
+
+static int s1d13xxxfb_remove(struct platform_device *pdev)
+{
+	struct fb_info *info = platform_get_drvdata(pdev);
+
+	unregister_framebuffer(info);
+	__s1d13xxxfb_remove(pdev);
 	return 0;
 }
 
@@ -895,7 +901,7 @@ static int s1d13xxxfb_probe(struct platform_device *pdev)
 	return 0;
 
 bail:
-	s1d13xxxfb_remove(pdev);
+	__s1d13xxxfb_remove(pdev);
 	return ret;
 
 }
diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c
index 2d285cc384cf..3e6e13f7a831 100644
--- a/drivers/video/fbdev/sa1100fb.c
+++ b/drivers/video/fbdev/sa1100fb.c
@@ -173,7 +173,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/cpufreq.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/mutex.h>
@@ -799,8 +799,8 @@ static void sa1100fb_enable_controller(struct sa1100fb_info *fbi)
 	writel_relaxed(fbi->dbar2, fbi->base + DBAR2);
 	writel_relaxed(fbi->reg_lccr0 | LCCR0_LEN, fbi->base + LCCR0);
 
-	if (machine_is_shannon())
-		gpio_set_value(SHANNON_GPIO_DISP_EN, 1);
+	if (fbi->shannon_lcden)
+		gpiod_set_value(fbi->shannon_lcden, 1);
 
 	dev_dbg(fbi->dev, "DBAR1: 0x%08x\n", readl_relaxed(fbi->base + DBAR1));
 	dev_dbg(fbi->dev, "DBAR2: 0x%08x\n", readl_relaxed(fbi->base + DBAR2));
@@ -817,8 +817,8 @@ static void sa1100fb_disable_controller(struct sa1100fb_info *fbi)
 
 	dev_dbg(fbi->dev, "Disabling LCD controller\n");
 
-	if (machine_is_shannon())
-		gpio_set_value(SHANNON_GPIO_DISP_EN, 0);
+	if (fbi->shannon_lcden)
+		gpiod_set_value(fbi->shannon_lcden, 0);
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	add_wait_queue(&fbi->ctrlr_wait, &wait);
@@ -1173,12 +1173,10 @@ static int sa1100fb_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	if (machine_is_shannon()) {
-		ret = devm_gpio_request_one(&pdev->dev, SHANNON_GPIO_DISP_EN,
-			GPIOF_OUT_INIT_LOW, "display enable");
-		if (ret)
-			return ret;
-	}
+	fbi->shannon_lcden = gpiod_get_optional(&pdev->dev, "shannon-lcden",
+						GPIOD_OUT_LOW);
+	if (IS_ERR(fbi->shannon_lcden))
+		return PTR_ERR(fbi->shannon_lcden);
 
 	/* Initialize video memory */
 	ret = sa1100fb_map_video_memory(fbi);
diff --git a/drivers/video/fbdev/sa1100fb.h b/drivers/video/fbdev/sa1100fb.h
index d0aa33b0b88a..b4363444fa5d 100644
--- a/drivers/video/fbdev/sa1100fb.h
+++ b/drivers/video/fbdev/sa1100fb.h
@@ -10,6 +10,8 @@
  * for more details.
  */
 
+struct gpio_desc;
+
 #define LCCR0           0x0000          /* LCD Control Reg. 0 */
 #define LCSR            0x0004          /* LCD Status Reg. */
 #define DBAR1           0x0010          /* LCD DMA Base Address Reg. channel 1 */
@@ -33,6 +35,7 @@ struct sa1100fb_info {
 	struct device		*dev;
 	const struct sa1100fb_rgb *rgb[NR_RGB];
 	void __iomem		*base;
+	struct gpio_desc	*shannon_lcden;
 
 	/*
 	 * These are the addresses we mapped
diff --git a/drivers/video/fbdev/savage/savagefb.h b/drivers/video/fbdev/savage/savagefb.h
index aba04afe712d..3314d5b6b43b 100644
--- a/drivers/video/fbdev/savage/savagefb.h
+++ b/drivers/video/fbdev/savage/savagefb.h
@@ -21,7 +21,7 @@
 #ifdef SAVAGEFB_DEBUG
 # define DBG(x)		printk (KERN_DEBUG "savagefb: %s\n", (x));
 #else
-# define DBG(x)
+# define DBG(x)		no_printk(x)
 # define SavagePrintRegs(...)
 #endif
 
diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c
index 12fa1050f3eb..8e06ba912d60 100644
--- a/drivers/video/fbdev/ssd1307fb.c
+++ b/drivers/video/fbdev/ssd1307fb.c
@@ -12,8 +12,7 @@
 #include <linux/i2c.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_gpio.h>
+#include <linux/property.h>
 #include <linux/pwm.h>
 #include <linux/uaccess.h>
 #include <linux/regulator/consumer.h>
@@ -49,8 +48,6 @@
 static u_int refreshrate = REFRESHRATE;
 module_param(refreshrate, uint, 0);
 
-struct ssd1307fb_par;
-
 struct ssd1307fb_deviceinfo {
 	u32 default_vcomh;
 	u32 default_dclk_div;
@@ -80,7 +77,6 @@ struct ssd1307fb_par {
 	u32 prechargep1;
 	u32 prechargep2;
 	struct pwm_device *pwm;
-	u32 pwm_period;
 	struct gpio_desc *reset;
 	struct regulator *vbat_reg;
 	u32 vcomh;
@@ -298,9 +294,9 @@ static void ssd1307fb_deferred_io(struct fb_info *info,
 
 static int ssd1307fb_init(struct ssd1307fb_par *par)
 {
+	struct pwm_state pwmstate;
 	int ret;
 	u32 precharge, dclk, com_invdir, compins;
-	struct pwm_args pargs;
 
 	if (par->device_info->need_pwm) {
 		par->pwm = pwm_get(&par->client->dev, NULL);
@@ -309,21 +305,15 @@ static int ssd1307fb_init(struct ssd1307fb_par *par)
 			return PTR_ERR(par->pwm);
 		}
 
-		/*
-		 * FIXME: pwm_apply_args() should be removed when switching to
-		 * the atomic PWM API.
-		 */
-		pwm_apply_args(par->pwm);
-
-		pwm_get_args(par->pwm, &pargs);
+		pwm_init_state(par->pwm, &pwmstate);
+		pwm_set_relative_duty_cycle(&pwmstate, 50, 100);
+		pwm_apply_state(par->pwm, &pwmstate);
 
-		par->pwm_period = pargs.period;
 		/* Enable the PWM */
-		pwm_config(par->pwm, par->pwm_period / 2, par->pwm_period);
 		pwm_enable(par->pwm);
 
 		dev_dbg(&par->client->dev, "Using PWM%d with a %dns period.\n",
-			par->pwm->pwm, par->pwm_period);
+			par->pwm->pwm, pwm_get_period(par->pwm));
 	}
 
 	/* Set initial contrast */
@@ -586,25 +576,19 @@ static const struct of_device_id ssd1307fb_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, ssd1307fb_of_match);
 
-static int ssd1307fb_probe(struct i2c_client *client,
-			   const struct i2c_device_id *id)
+static int ssd1307fb_probe(struct i2c_client *client)
 {
+	struct device *dev = &client->dev;
 	struct backlight_device *bl;
 	char bl_name[12];
 	struct fb_info *info;
-	struct device_node *node = client->dev.of_node;
 	struct fb_deferred_io *ssd1307fb_defio;
 	u32 vmem_size;
 	struct ssd1307fb_par *par;
 	void *vmem;
 	int ret;
 
-	if (!node) {
-		dev_err(&client->dev, "No device tree data found!\n");
-		return -EINVAL;
-	}
-
-	info = framebuffer_alloc(sizeof(struct ssd1307fb_par), &client->dev);
+	info = framebuffer_alloc(sizeof(struct ssd1307fb_par), dev);
 	if (!info)
 		return -ENOMEM;
 
@@ -612,67 +596,65 @@ static int ssd1307fb_probe(struct i2c_client *client,
 	par->info = info;
 	par->client = client;
 
-	par->device_info = of_device_get_match_data(&client->dev);
+	par->device_info = device_get_match_data(dev);
 
-	par->reset = devm_gpiod_get_optional(&client->dev, "reset",
-					     GPIOD_OUT_LOW);
+	par->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
 	if (IS_ERR(par->reset)) {
-		dev_err(&client->dev, "failed to get reset gpio: %ld\n",
+		dev_err(dev, "failed to get reset gpio: %ld\n",
 			PTR_ERR(par->reset));
 		ret = PTR_ERR(par->reset);
 		goto fb_alloc_error;
 	}
 
-	par->vbat_reg = devm_regulator_get_optional(&client->dev, "vbat");
+	par->vbat_reg = devm_regulator_get_optional(dev, "vbat");
 	if (IS_ERR(par->vbat_reg)) {
 		ret = PTR_ERR(par->vbat_reg);
 		if (ret == -ENODEV) {
 			par->vbat_reg = NULL;
 		} else {
-			dev_err(&client->dev, "failed to get VBAT regulator: %d\n",
-				ret);
+			dev_err(dev, "failed to get VBAT regulator: %d\n", ret);
 			goto fb_alloc_error;
 		}
 	}
 
-	if (of_property_read_u32(node, "solomon,width", &par->width))
+	if (device_property_read_u32(dev, "solomon,width", &par->width))
 		par->width = 96;
 
-	if (of_property_read_u32(node, "solomon,height", &par->height))
+	if (device_property_read_u32(dev, "solomon,height", &par->height))
 		par->height = 16;
 
-	if (of_property_read_u32(node, "solomon,page-offset", &par->page_offset))
+	if (device_property_read_u32(dev, "solomon,page-offset", &par->page_offset))
 		par->page_offset = 1;
 
-	if (of_property_read_u32(node, "solomon,com-offset", &par->com_offset))
+	if (device_property_read_u32(dev, "solomon,com-offset", &par->com_offset))
 		par->com_offset = 0;
 
-	if (of_property_read_u32(node, "solomon,prechargep1", &par->prechargep1))
+	if (device_property_read_u32(dev, "solomon,prechargep1", &par->prechargep1))
 		par->prechargep1 = 2;
 
-	if (of_property_read_u32(node, "solomon,prechargep2", &par->prechargep2))
+	if (device_property_read_u32(dev, "solomon,prechargep2", &par->prechargep2))
 		par->prechargep2 = 2;
 
-	if (!of_property_read_u8_array(node, "solomon,lookup-table",
-				       par->lookup_table,
-				       ARRAY_SIZE(par->lookup_table)))
+	if (!device_property_read_u8_array(dev, "solomon,lookup-table",
+					   par->lookup_table,
+					   ARRAY_SIZE(par->lookup_table)))
 		par->lookup_table_set = 1;
 
-	par->seg_remap = !of_property_read_bool(node, "solomon,segment-no-remap");
-	par->com_seq = of_property_read_bool(node, "solomon,com-seq");
-	par->com_lrremap = of_property_read_bool(node, "solomon,com-lrremap");
-	par->com_invdir = of_property_read_bool(node, "solomon,com-invdir");
+	par->seg_remap = !device_property_read_bool(dev, "solomon,segment-no-remap");
+	par->com_seq = device_property_read_bool(dev, "solomon,com-seq");
+	par->com_lrremap = device_property_read_bool(dev, "solomon,com-lrremap");
+	par->com_invdir = device_property_read_bool(dev, "solomon,com-invdir");
 	par->area_color_enable =
-		of_property_read_bool(node, "solomon,area-color-enable");
-	par->low_power = of_property_read_bool(node, "solomon,low-power");
+		device_property_read_bool(dev, "solomon,area-color-enable");
+	par->low_power = device_property_read_bool(dev, "solomon,low-power");
 
 	par->contrast = 127;
 	par->vcomh = par->device_info->default_vcomh;
 
 	/* Setup display timing */
-	if (of_property_read_u32(node, "solomon,dclk-div", &par->dclk_div))
+	if (device_property_read_u32(dev, "solomon,dclk-div", &par->dclk_div))
 		par->dclk_div = par->device_info->default_dclk_div;
-	if (of_property_read_u32(node, "solomon,dclk-frq", &par->dclk_frq))
+	if (device_property_read_u32(dev, "solomon,dclk-frq", &par->dclk_frq))
 		par->dclk_frq = par->device_info->default_dclk_frq;
 
 	vmem_size = DIV_ROUND_UP(par->width, 8) * par->height;
@@ -680,15 +662,15 @@ static int ssd1307fb_probe(struct i2c_client *client,
 	vmem = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 					get_order(vmem_size));
 	if (!vmem) {
-		dev_err(&client->dev, "Couldn't allocate graphical memory.\n");
+		dev_err(dev, "Couldn't allocate graphical memory.\n");
 		ret = -ENOMEM;
 		goto fb_alloc_error;
 	}
 
-	ssd1307fb_defio = devm_kzalloc(&client->dev, sizeof(*ssd1307fb_defio),
+	ssd1307fb_defio = devm_kzalloc(dev, sizeof(*ssd1307fb_defio),
 				       GFP_KERNEL);
 	if (!ssd1307fb_defio) {
-		dev_err(&client->dev, "Couldn't allocate deferred io.\n");
+		dev_err(dev, "Couldn't allocate deferred io.\n");
 		ret = -ENOMEM;
 		goto fb_alloc_error;
 	}
@@ -726,8 +708,7 @@ static int ssd1307fb_probe(struct i2c_client *client,
 	if (par->vbat_reg) {
 		ret = regulator_enable(par->vbat_reg);
 		if (ret) {
-			dev_err(&client->dev, "failed to enable VBAT: %d\n",
-				ret);
+			dev_err(dev, "failed to enable VBAT: %d\n", ret);
 			goto reset_oled_error;
 		}
 	}
@@ -738,17 +719,16 @@ static int ssd1307fb_probe(struct i2c_client *client,
 
 	ret = register_framebuffer(info);
 	if (ret) {
-		dev_err(&client->dev, "Couldn't register the framebuffer\n");
+		dev_err(dev, "Couldn't register the framebuffer\n");
 		goto panel_init_error;
 	}
 
 	snprintf(bl_name, sizeof(bl_name), "ssd1307fb%d", info->node);
-	bl = backlight_device_register(bl_name, &client->dev, par,
-				       &ssd1307fb_bl_ops, NULL);
+	bl = backlight_device_register(bl_name, dev, par, &ssd1307fb_bl_ops,
+				       NULL);
 	if (IS_ERR(bl)) {
 		ret = PTR_ERR(bl);
-		dev_err(&client->dev, "unable to register backlight device: %d\n",
-			ret);
+		dev_err(dev, "unable to register backlight device: %d\n", ret);
 		goto bl_init_error;
 	}
 
@@ -756,7 +736,7 @@ static int ssd1307fb_probe(struct i2c_client *client,
 	bl->props.max_brightness = MAX_CONTRAST;
 	info->bl_dev = bl;
 
-	dev_info(&client->dev, "fb%d: %s framebuffer device registered, using %d bytes of video memory\n", info->node, info->fix.id, vmem_size);
+	dev_info(dev, "fb%d: %s framebuffer device registered, using %d bytes of video memory\n", info->node, info->fix.id, vmem_size);
 
 	return 0;
 
@@ -810,7 +790,7 @@ static const struct i2c_device_id ssd1307fb_i2c_id[] = {
 MODULE_DEVICE_TABLE(i2c, ssd1307fb_i2c_id);
 
 static struct i2c_driver ssd1307fb_driver = {
-	.probe = ssd1307fb_probe,
+	.probe_new = ssd1307fb_probe,
 	.remove = ssd1307fb_remove,
 	.id_table = ssd1307fb_i2c_id,
 	.driver = {
diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index 07905d385949..5b014b479f83 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -64,9 +64,9 @@ static const struct usb_device_id id_table[] = {
 MODULE_DEVICE_TABLE(usb, id_table);
 
 /* module options */
-static bool console = 1; /* Allow fbcon to open framebuffer */
-static bool fb_defio = 1;  /* Detect mmap writes using page faults */
-static bool shadow = 1; /* Optionally disable shadow framebuffer */
+static bool console = true; /* Allow fbcon to open framebuffer */
+static bool fb_defio = true;  /* Detect mmap writes using page faults */
+static bool shadow = true; /* Optionally disable shadow framebuffer */
 static int pixel_limit; /* Optionally force a pixel resolution limit */
 
 struct dlfb_deferred_free {
diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c
index 53d08d1b56f5..bee29aadc646 100644
--- a/drivers/video/fbdev/uvesafb.c
+++ b/drivers/video/fbdev/uvesafb.c
@@ -45,7 +45,7 @@ static const struct fb_fix_screeninfo uvesafb_fix = {
 };
 
 static int mtrr		= 3;	/* enable mtrr by default */
-static bool blank	= 1;	/* enable blanking by default */
+static bool blank	= true;	/* enable blanking by default */
 static int ypan		= 1;	/* 0: scroll, 1: ypan, 2: ywrap */
 static bool pmi_setpal	= true; /* use PMI for palette changes */
 static bool nocrtc;		/* ignore CRTC settings */
@@ -1560,7 +1560,7 @@ static ssize_t uvesafb_show_vbe_modes(struct device *dev,
 	int ret = 0, i;
 
 	for (i = 0; i < par->vbe_modes_cnt && ret < PAGE_SIZE; i++) {
-		ret += snprintf(buf + ret, PAGE_SIZE - ret,
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret,
 			"%dx%d-%d, 0x%.4x\n",
 			par->vbe_modes[i].x_res, par->vbe_modes[i].y_res,
 			par->vbe_modes[i].depth, par->vbe_modes[i].mode_id);
@@ -1824,19 +1824,19 @@ static int uvesafb_setup(char *options)
 		else if (!strcmp(this_opt, "ywrap"))
 			ypan = 2;
 		else if (!strcmp(this_opt, "vgapal"))
-			pmi_setpal = 0;
+			pmi_setpal = false;
 		else if (!strcmp(this_opt, "pmipal"))
-			pmi_setpal = 1;
+			pmi_setpal = true;
 		else if (!strncmp(this_opt, "mtrr:", 5))
 			mtrr = simple_strtoul(this_opt+5, NULL, 0);
 		else if (!strcmp(this_opt, "nomtrr"))
 			mtrr = 0;
 		else if (!strcmp(this_opt, "nocrtc"))
-			nocrtc = 1;
+			nocrtc = true;
 		else if (!strcmp(this_opt, "noedid"))
-			noedid = 1;
+			noedid = true;
 		else if (!strcmp(this_opt, "noblank"))
-			blank = 0;
+			blank = true;
 		else if (!strncmp(this_opt, "vtotal:", 7))
 			vram_total = simple_strtoul(this_opt + 7, NULL, 0);
 		else if (!strncmp(this_opt, "vremap:", 7))
diff --git a/drivers/video/fbdev/valkyriefb.c b/drivers/video/fbdev/valkyriefb.c
index 4d20c4603e5a..8425afe37d7c 100644
--- a/drivers/video/fbdev/valkyriefb.c
+++ b/drivers/video/fbdev/valkyriefb.c
@@ -331,7 +331,7 @@ int __init valkyriefb_init(void)
 		struct resource r;
 
 		dp = of_find_node_by_name(NULL, "valkyrie");
-		if (dp == 0)
+		if (!dp)
 			return 0;
 
 		if (of_address_to_resource(dp, 0, &r)) {
@@ -345,7 +345,7 @@ int __init valkyriefb_init(void)
 #endif /* ppc (!CONFIG_MAC) */
 
 	p = kzalloc(sizeof(*p), GFP_ATOMIC);
-	if (p == 0)
+	if (!p)
 		return -ENOMEM;
 
 	/* Map in frame buffer and registers */
diff --git a/drivers/video/fbdev/vesafb.c b/drivers/video/fbdev/vesafb.c
index a1fe24ea869b..df6de5a9dd4c 100644
--- a/drivers/video/fbdev/vesafb.c
+++ b/drivers/video/fbdev/vesafb.c
@@ -32,6 +32,7 @@
 struct vesafb_par {
 	u32 pseudo_palette[256];
 	int wc_cookie;
+	struct resource *region;
 };
 
 static struct fb_var_screeninfo vesafb_defined = {
@@ -411,7 +412,7 @@ static int vesafb_probe(struct platform_device *dev)
 
 	/* request failure does not faze us, as vgacon probably has this
 	 * region already (FIXME) */
-	request_region(0x3c0, 32, "vesafb");
+	par->region = request_region(0x3c0, 32, "vesafb");
 
 	if (mtrr == 3) {
 		unsigned int temp_size = size_total;
@@ -439,7 +440,7 @@ static int vesafb_probe(struct platform_device *dev)
 		       "vesafb: abort, cannot ioremap video memory 0x%x @ 0x%lx\n",
 			vesafb_fix.smem_len, vesafb_fix.smem_start);
 		err = -EIO;
-		goto err;
+		goto err_release_region;
 	}
 
 	printk(KERN_INFO "vesafb: framebuffer at 0x%lx, mapped to 0x%p, "
@@ -458,19 +459,22 @@ static int vesafb_probe(struct platform_device *dev)
 
 	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) {
 		err = -ENOMEM;
-		goto err;
+		goto err_release_region;
 	}
 	if (register_framebuffer(info)<0) {
 		err = -EINVAL;
 		fb_dealloc_cmap(&info->cmap);
-		goto err;
+		goto err_release_region;
 	}
 	fb_info(info, "%s frame buffer device\n", info->fix.id);
 	return 0;
-err:
+err_release_region:
 	arch_phys_wc_del(par->wc_cookie);
 	if (info->screen_base)
 		iounmap(info->screen_base);
+	if (par->region)
+		release_region(0x3c0, 32);
+err:
 	framebuffer_release(info);
 	release_mem_region(vesafb_fix.smem_start, size_total);
 	return err;
@@ -481,6 +485,8 @@ static int vesafb_remove(struct platform_device *pdev)
 	struct fb_info *info = platform_get_drvdata(pdev);
 
 	unregister_framebuffer(info);
+	if (((struct vesafb_par *)(info->par))->region)
+		release_region(0x3c0, 32);
 	framebuffer_release(info);
 
 	return 0;
diff --git a/drivers/video/fbdev/via/debug.h b/drivers/video/fbdev/via/debug.h
index 6a320bd76936..80fdfe4171c5 100644
--- a/drivers/video/fbdev/via/debug.h
+++ b/drivers/video/fbdev/via/debug.h
@@ -7,6 +7,8 @@
 #ifndef __DEBUG_H__
 #define __DEBUG_H__
 
+#include <linux/printk.h>
+
 #ifndef VIAFB_DEBUG
 #define VIAFB_DEBUG 0
 #endif
@@ -14,14 +16,14 @@
 #if VIAFB_DEBUG
 #define DEBUG_MSG(f, a...)   printk(f, ## a)
 #else
-#define DEBUG_MSG(f, a...)
+#define DEBUG_MSG(f, a...)   no_printk(f, ## a)
 #endif
 
 #define VIAFB_WARN 0
 #if VIAFB_WARN
 #define WARN_MSG(f, a...)   printk(f, ## a)
 #else
-#define WARN_MSG(f, a...)
+#define WARN_MSG(f, a...)   no_printk(f, ## a)
 #endif
 
 #endif /* __DEBUG_H__ */
diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c
index 852673c40a2f..22deb340a048 100644
--- a/drivers/video/fbdev/via/viafbdev.c
+++ b/drivers/video/fbdev/via/viafbdev.c
@@ -1144,7 +1144,7 @@ static ssize_t viafb_dvp0_proc_write(struct file *file,
 		if (value != NULL) {
 			if (kstrtou8(value, 0, &reg_val) < 0)
 				return -EINVAL;
-			DEBUG_MSG(KERN_INFO "DVP0:reg_val[%l]=:%x\n", i,
+			DEBUG_MSG(KERN_INFO "DVP0:reg_val[%lu]=:%x\n", i,
 				  reg_val);
 			switch (i) {
 			case 0:
diff --git a/drivers/video/fbdev/vt8500lcdfb.c b/drivers/video/fbdev/vt8500lcdfb.c
index f744479dc7df..c61476247ba8 100644
--- a/drivers/video/fbdev/vt8500lcdfb.c
+++ b/drivers/video/fbdev/vt8500lcdfb.c
@@ -230,6 +230,7 @@ static int vt8500lcd_blank(int blank, struct fb_info *info)
 		    info->fix.visual == FB_VISUAL_STATIC_PSEUDOCOLOR)
 			for (i = 0; i < 256; i++)
 				vt8500lcd_setcolreg(i, 0, 0, 0, 0, info);
+		fallthrough;
 	case FB_BLANK_UNBLANK:
 		if (info->fix.visual == FB_VISUAL_PSEUDOCOLOR ||
 		    info->fix.visual == FB_VISUAL_STATIC_PSEUDOCOLOR)
diff --git a/drivers/video/fbdev/w100fb.c b/drivers/video/fbdev/w100fb.c
index 2d6e2738b792..d96ab28f8ce4 100644
--- a/drivers/video/fbdev/w100fb.c
+++ b/drivers/video/fbdev/w100fb.c
@@ -588,6 +588,7 @@ static void w100fb_restore_vidmem(struct w100fb_par *par)
 		memsize=par->mach->mem->size;
 		memcpy_toio(remapped_fbuf + (W100_FB_BASE-MEM_WINDOW_BASE), par->saved_extmem, memsize);
 		vfree(par->saved_extmem);
+		par->saved_extmem = NULL;
 	}
 	if (par->saved_intmem) {
 		memsize=MEM_INT_SIZE;
@@ -596,6 +597,7 @@ static void w100fb_restore_vidmem(struct w100fb_par *par)
 		else
 			memcpy_toio(remapped_fbuf + (W100_FB_BASE-MEM_WINDOW_BASE), par->saved_intmem, memsize);
 		vfree(par->saved_intmem);
+		par->saved_intmem = NULL;
 	}
 }
 
diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c
index 856a8c4e84a2..e70792b3e367 100644
--- a/drivers/video/hdmi.c
+++ b/drivers/video/hdmi.c
@@ -1768,20 +1768,21 @@ hdmi_vendor_any_infoframe_unpack(union hdmi_vendor_any_infoframe *frame,
 }
 
 /**
- * hdmi_drm_infoframe_unpack() - unpack binary buffer to a HDMI DRM infoframe
+ * hdmi_drm_infoframe_unpack_only() - unpack binary buffer of CTA-861-G DRM
+ *                                    infoframe DataBytes to a HDMI DRM
+ *                                    infoframe
  * @frame: HDMI DRM infoframe
  * @buffer: source buffer
  * @size: size of buffer
  *
- * Unpacks the information contained in binary @buffer into a structured
- * @frame of the HDMI Dynamic Range and Mastering (DRM) information frame.
- * Also verifies the checksum as required by section 5.3.5 of the HDMI 1.4
- * specification.
+ * Unpacks CTA-861-G DRM infoframe DataBytes contained in the binary @buffer
+ * into a structured @frame of the HDMI Dynamic Range and Mastering (DRM)
+ * infoframe.
  *
  * Returns 0 on success or a negative error code on failure.
  */
-static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame,
-				     const void *buffer, size_t size)
+int hdmi_drm_infoframe_unpack_only(struct hdmi_drm_infoframe *frame,
+				   const void *buffer, size_t size)
 {
 	const u8 *ptr = buffer;
 	const u8 *temp;
@@ -1790,23 +1791,13 @@ static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame,
 	int ret;
 	int i;
 
-	if (size < HDMI_INFOFRAME_SIZE(DRM))
-		return -EINVAL;
-
-	if (ptr[0] != HDMI_INFOFRAME_TYPE_DRM ||
-	    ptr[1] != 1 ||
-	    ptr[2] != HDMI_DRM_INFOFRAME_SIZE)
-		return -EINVAL;
-
-	if (hdmi_infoframe_checksum(buffer, HDMI_INFOFRAME_SIZE(DRM)) != 0)
+	if (size < HDMI_DRM_INFOFRAME_SIZE)
 		return -EINVAL;
 
 	ret = hdmi_drm_infoframe_init(frame);
 	if (ret)
 		return ret;
 
-	ptr += HDMI_INFOFRAME_HEADER_SIZE;
-
 	frame->eotf = ptr[0] & 0x7;
 	frame->metadata_type = ptr[1] & 0x7;
 
@@ -1814,7 +1805,7 @@ static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame,
 	for (i = 0; i < 3; i++) {
 		x_lsb = *temp++;
 		x_msb = *temp++;
-		frame->display_primaries[i].x =  (x_msb << 8) | x_lsb;
+		frame->display_primaries[i].x = (x_msb << 8) | x_lsb;
 		y_lsb = *temp++;
 		y_msb = *temp++;
 		frame->display_primaries[i].y = (y_msb << 8) | y_lsb;
@@ -1830,6 +1821,42 @@ static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame,
 
 	return 0;
 }
+EXPORT_SYMBOL(hdmi_drm_infoframe_unpack_only);
+
+/**
+ * hdmi_drm_infoframe_unpack() - unpack binary buffer to a HDMI DRM infoframe
+ * @frame: HDMI DRM infoframe
+ * @buffer: source buffer
+ * @size: size of buffer
+ *
+ * Unpacks the CTA-861-G DRM infoframe contained in the binary @buffer into
+ * a structured @frame of the HDMI Dynamic Range and Mastering (DRM)
+ * infoframe. It also verifies the checksum as required by section 5.3.5 of
+ * the HDMI 1.4 specification.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame,
+				     const void *buffer, size_t size)
+{
+	const u8 *ptr = buffer;
+	int ret;
+
+	if (size < HDMI_INFOFRAME_SIZE(DRM))
+		return -EINVAL;
+
+	if (ptr[0] != HDMI_INFOFRAME_TYPE_DRM ||
+	    ptr[1] != 1 ||
+	    ptr[2] != HDMI_DRM_INFOFRAME_SIZE)
+		return -EINVAL;
+
+	if (hdmi_infoframe_checksum(buffer, HDMI_INFOFRAME_SIZE(DRM)) != 0)
+		return -EINVAL;
+
+	ret = hdmi_drm_infoframe_unpack_only(frame, ptr + HDMI_INFOFRAME_HEADER_SIZE,
+					     size - HDMI_INFOFRAME_HEADER_SIZE);
+	return ret;
+}
 
 /**
  * hdmi_infoframe_unpack() - unpack binary buffer to a HDMI infoframe
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 0663c604bd64..b739c476955b 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1217,6 +1217,7 @@ config ITCO_WDT
 	depends on (X86 || IA64) && PCI
 	select WATCHDOG_CORE
 	depends on I2C || I2C=n
+	depends on MFD_INTEL_PMC_BXT || !MFD_INTEL_PMC_BXT
 	select LPC_ICH if !EXPERT
 	select I2C_I801 if !EXPERT && I2C
 	---help---
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index e707c4797f76..a370a185a41c 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -64,6 +64,7 @@
 #include <linux/uaccess.h>		/* For copy_to_user/put_user/... */
 #include <linux/io.h>			/* For inb/outb/... */
 #include <linux/platform_data/itco_wdt.h>
+#include <linux/mfd/intel_pmc_bxt.h>
 
 #include "iTCO_vendor.h"
 
@@ -233,12 +234,24 @@ static int update_no_reboot_bit_cnt(void *priv, bool set)
 	return val != newval ? -EIO : 0;
 }
 
+static int update_no_reboot_bit_pmc(void *priv, bool set)
+{
+	struct intel_pmc_dev *pmc = priv;
+	u32 bits = PMC_CFG_NO_REBOOT_EN;
+	u32 value = set ? bits : 0;
+
+	return intel_pmc_gcr_update(pmc, PMC_GCR_PMC_CFG_REG, bits, value);
+}
+
 static void iTCO_wdt_no_reboot_bit_setup(struct iTCO_wdt_private *p,
-		struct itco_wdt_platform_data *pdata)
+					 struct platform_device *pdev,
+					 struct itco_wdt_platform_data *pdata)
 {
-	if (pdata->update_no_reboot_bit) {
-		p->update_no_reboot_bit = pdata->update_no_reboot_bit;
-		p->no_reboot_priv = pdata->no_reboot_priv;
+	if (pdata->no_reboot_use_pmc) {
+		struct intel_pmc_dev *pmc = dev_get_drvdata(pdev->dev.parent);
+
+		p->update_no_reboot_bit = update_no_reboot_bit_pmc;
+		p->no_reboot_priv = pmc;
 		return;
 	}
 
@@ -478,14 +491,14 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	iTCO_wdt_no_reboot_bit_setup(p, pdata);
+	iTCO_wdt_no_reboot_bit_setup(p, pdev, pdata);
 
 	/*
 	 * Get the Memory-Mapped GCS or PMC register, we need it for the
 	 * NO_REBOOT flag (TCO v2 and v3).
 	 */
 	if (p->iTCO_version >= 2 && p->iTCO_version < 6 &&
-	    !pdata->update_no_reboot_bit) {
+	    !pdata->no_reboot_use_pmc) {
 		p->gcs_pmc_res = platform_get_resource(pdev,
 						       IORESOURCE_MEM,
 						       ICH_RES_MEM_GCS_PMC);
diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c
index 470213abfd3d..1ae03b64ef8b 100644
--- a/drivers/watchdog/intel-mid_wdt.c
+++ b/drivers/watchdog/intel-mid_wdt.c
@@ -33,14 +33,24 @@ enum {
 	SCU_WATCHDOG_KEEPALIVE,
 };
 
-static inline int wdt_command(int sub, u32 *in, int inlen)
+struct mid_wdt {
+	struct watchdog_device wd;
+	struct device *dev;
+	struct intel_scu_ipc_dev *scu;
+};
+
+static inline int
+wdt_command(struct mid_wdt *mid, int sub, const void *in, size_t inlen, size_t size)
 {
-	return intel_scu_ipc_command(IPC_WATCHDOG, sub, in, inlen, NULL, 0);
+	struct intel_scu_ipc_dev *scu = mid->scu;
+
+	return intel_scu_ipc_dev_command_with_size(scu, IPC_WATCHDOG, sub, in,
+						   inlen, size, NULL, 0);
 }
 
 static int wdt_start(struct watchdog_device *wd)
 {
-	struct device *dev = watchdog_get_drvdata(wd);
+	struct mid_wdt *mid = watchdog_get_drvdata(wd);
 	int ret, in_size;
 	int timeout = wd->timeout;
 	struct ipc_wd_start {
@@ -49,38 +59,41 @@ static int wdt_start(struct watchdog_device *wd)
 	} ipc_wd_start = { timeout - MID_WDT_PRETIMEOUT, timeout };
 
 	/*
-	 * SCU expects the input size for watchdog IPC to
-	 * be based on 4 bytes
+	 * SCU expects the input size for watchdog IPC to be 2 which is the
+	 * size of the structure in dwords. SCU IPC normally takes bytes
+	 * but this is a special case where we specify size to be different
+	 * than inlen.
 	 */
 	in_size = DIV_ROUND_UP(sizeof(ipc_wd_start), 4);
 
-	ret = wdt_command(SCU_WATCHDOG_START, (u32 *)&ipc_wd_start, in_size);
+	ret = wdt_command(mid, SCU_WATCHDOG_START, &ipc_wd_start,
+			  sizeof(ipc_wd_start), in_size);
 	if (ret)
-		dev_crit(dev, "error starting watchdog: %d\n", ret);
+		dev_crit(mid->dev, "error starting watchdog: %d\n", ret);
 
 	return ret;
 }
 
 static int wdt_ping(struct watchdog_device *wd)
 {
-	struct device *dev = watchdog_get_drvdata(wd);
+	struct mid_wdt *mid = watchdog_get_drvdata(wd);
 	int ret;
 
-	ret = wdt_command(SCU_WATCHDOG_KEEPALIVE, NULL, 0);
+	ret = wdt_command(mid, SCU_WATCHDOG_KEEPALIVE, NULL, 0, 0);
 	if (ret)
-		dev_crit(dev, "Error executing keepalive: %d\n", ret);
+		dev_crit(mid->dev, "Error executing keepalive: %d\n", ret);
 
 	return ret;
 }
 
 static int wdt_stop(struct watchdog_device *wd)
 {
-	struct device *dev = watchdog_get_drvdata(wd);
+	struct mid_wdt *mid = watchdog_get_drvdata(wd);
 	int ret;
 
-	ret = wdt_command(SCU_WATCHDOG_STOP, NULL, 0);
+	ret = wdt_command(mid, SCU_WATCHDOG_STOP, NULL, 0, 0);
 	if (ret)
-		dev_crit(dev, "Error stopping watchdog: %d\n", ret);
+		dev_crit(mid->dev, "Error stopping watchdog: %d\n", ret);
 
 	return ret;
 }
@@ -110,6 +123,7 @@ static int mid_wdt_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct watchdog_device *wdt_dev;
 	struct intel_mid_wdt_pdata *pdata = dev->platform_data;
+	struct mid_wdt *mid;
 	int ret;
 
 	if (!pdata) {
@@ -123,10 +137,13 @@ static int mid_wdt_probe(struct platform_device *pdev)
 			return ret;
 	}
 
-	wdt_dev = devm_kzalloc(dev, sizeof(*wdt_dev), GFP_KERNEL);
-	if (!wdt_dev)
+	mid = devm_kzalloc(dev, sizeof(*mid), GFP_KERNEL);
+	if (!mid)
 		return -ENOMEM;
 
+	mid->dev = dev;
+	wdt_dev = &mid->wd;
+
 	wdt_dev->info = &mid_wdt_info;
 	wdt_dev->ops = &mid_wdt_ops;
 	wdt_dev->min_timeout = MID_WDT_TIMEOUT_MIN;
@@ -135,7 +152,7 @@ static int mid_wdt_probe(struct platform_device *pdev)
 	wdt_dev->parent = dev;
 
 	watchdog_set_nowayout(wdt_dev, WATCHDOG_NOWAYOUT);
-	watchdog_set_drvdata(wdt_dev, dev);
+	watchdog_set_drvdata(wdt_dev, mid);
 
 	ret = devm_request_irq(dev, pdata->irq, mid_wdt_irq,
 			       IRQF_SHARED | IRQF_NO_SUSPEND, "watchdog",
@@ -145,6 +162,10 @@ static int mid_wdt_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	mid->scu = devm_intel_scu_ipc_dev_get(dev);
+	if (!mid->scu)
+		return -EPROBE_DEFER;
+
 	/*
 	 * The firmware followed by U-Boot leaves the watchdog running
 	 * with the default threshold which may vary. When we get here
diff --git a/fs/Kconfig b/fs/Kconfig
index f08fbbfafd9a..d1ad3935fb85 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -166,7 +166,7 @@ config TMPFS
 	  space. If you unmount a tmpfs instance, everything stored therein is
 	  lost.
 
-	  See <file:Documentation/filesystems/tmpfs.txt> for details.
+	  See <file:Documentation/filesystems/tmpfs.rst> for details.
 
 config TMPFS_POSIX_ACL
 	bool "Tmpfs POSIX Access Control Lists"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 62dc4f577ba1..04f86b8c100e 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -36,6 +36,12 @@ config COMPAT_BINFMT_ELF
 config ARCH_BINFMT_ELF_STATE
 	bool
 
+config ARCH_HAVE_ELF_PROT
+	bool
+
+config ARCH_USE_GNU_PROPERTY
+	bool
+
 config BINFMT_ELF_FDPIC
 	bool "Kernel support for FDPIC ELF binaries"
 	default y if !BINFMT_ELF
@@ -72,7 +78,7 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS
 
 	  The core dump behavior can be controlled per process using
 	  the /proc/PID/coredump_filter pseudo-file; this setting is
-	  inherited.  See Documentation/filesystems/proc.txt for details.
+	  inherited.  See Documentation/filesystems/proc.rst for details.
 
 	  This config option changes the default setting of coredump_filter
 	  seen at boot time.  If unsure, say Y.
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig
index df4650dccf68..44738fed6625 100644
--- a/fs/adfs/Kconfig
+++ b/fs/adfs/Kconfig
@@ -12,7 +12,7 @@ config ADFS_FS
 
 	  The ADFS partition should be the first partition (i.e.,
 	  /dev/[hs]d?1) on each of your drives. Please read the file
-	  <file:Documentation/filesystems/adfs.txt> for further details.
+	  <file:Documentation/filesystems/adfs.rst> for further details.
 
 	  To compile this code as a module, choose M here: the module will be
 	  called adfs.
diff --git a/fs/affs/Kconfig b/fs/affs/Kconfig
index 84c46b9025c5..eb9d0ab850cb 100644
--- a/fs/affs/Kconfig
+++ b/fs/affs/Kconfig
@@ -9,7 +9,7 @@ config AFFS_FS
 	  FFS partition on your hard drive.  Amiga floppies however cannot be
 	  read with this driver due to an incompatibility of the floppy
 	  controller used in an Amiga and the standard floppy controller in
-	  PCs and workstations. Read <file:Documentation/filesystems/affs.txt>
+	  PCs and workstations. Read <file:Documentation/filesystems/affs.rst>
 	  and <file:fs/affs/Changes>.
 
 	  With this driver you can also mount disk files used by Bernd
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 3fb1f559e317..1ad211d72b3b 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -8,7 +8,7 @@ config AFS_FS
 	  If you say Y here, you will get an experimental Andrew File System
 	  driver. It currently only supports unsecured read-only AFS access.
 
-	  See <file:Documentation/filesystems/afs.txt> for more information.
+	  See <file:Documentation/filesystems/afs.rst> for more information.
 
 	  If unsure, say N.
 
@@ -18,7 +18,7 @@ config AFS_DEBUG
 	help
 	  Say Y here to make runtime controllable debugging messages appear.
 
-	  See <file:Documentation/filesystems/afs.txt> for more information.
+	  See <file:Documentation/filesystems/afs.rst> for more information.
 
 	  If unsure, say N.
 
@@ -37,6 +37,6 @@ config AFS_DEBUG_CURSOR
 	  the dmesg log if the server rotation algorithm fails to successfully
 	  contact a server.
 
-	  See <file:Documentation/filesystems/afs.txt> for more information.
+	  See <file:Documentation/filesystems/afs.rst> for more information.
 
 	  If unsure, say N.
diff --git a/fs/aio.c b/fs/aio.c
index 5f3d3d814928..6483f9274d5e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -176,6 +176,7 @@ struct fsync_iocb {
 	struct file		*file;
 	struct work_struct	work;
 	bool			datasync;
+	struct cred		*creds;
 };
 
 struct poll_iocb {
@@ -1589,8 +1590,11 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
 static void aio_fsync_work(struct work_struct *work)
 {
 	struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
+	const struct cred *old_cred = override_creds(iocb->fsync.creds);
 
 	iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
+	revert_creds(old_cred);
+	put_cred(iocb->fsync.creds);
 	iocb_put(iocb);
 }
 
@@ -1604,6 +1608,10 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
 	if (unlikely(!req->file->f_op->fsync))
 		return -EINVAL;
 
+	req->creds = prepare_creds();
+	if (!req->creds)
+		return -ENOMEM;
+
 	req->datasync = datasync;
 	INIT_WORK(&req->work, aio_fsync_work);
 	schedule_work(&req->work);
diff --git a/fs/bfs/Kconfig b/fs/bfs/Kconfig
index 3e1247f07913..3a757805b585 100644
--- a/fs/bfs/Kconfig
+++ b/fs/bfs/Kconfig
@@ -11,7 +11,7 @@ config BFS_FS
 	  on your /stand slice from within Linux.  You then also need to say Y
 	  to "UnixWare slices support", below.  More information about the BFS
 	  file system is contained in the file
-	  <file:Documentation/filesystems/bfs.txt>.
+	  <file:Documentation/filesystems/bfs.rst>.
 
 	  If you don't know what this is about, say N.
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 25d489bc9453..8945671fe0e5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -40,12 +40,18 @@
 #include <linux/sched/coredump.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/cputime.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
 #include <linux/cred.h>
 #include <linux/dax.h>
 #include <linux/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
 
+#ifndef ELF_COMPAT
+#define ELF_COMPAT 0
+#endif
+
 #ifndef user_long_t
 #define user_long_t long
 #endif
@@ -539,7 +545,8 @@ static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
 
 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
 
-static inline int make_prot(u32 p_flags)
+static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
+			    bool has_interp, bool is_interp)
 {
 	int prot = 0;
 
@@ -549,7 +556,8 @@ static inline int make_prot(u32 p_flags)
 		prot |= PROT_WRITE;
 	if (p_flags & PF_X)
 		prot |= PROT_EXEC;
-	return prot;
+
+	return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
 }
 
 /* This is much more generalized than the library routine read function,
@@ -559,7 +567,8 @@ static inline int make_prot(u32 p_flags)
 
 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 		struct file *interpreter,
-		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
+		unsigned long no_base, struct elf_phdr *interp_elf_phdata,
+		struct arch_elf_state *arch_state)
 {
 	struct elf_phdr *eppnt;
 	unsigned long load_addr = 0;
@@ -591,7 +600,8 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 		if (eppnt->p_type == PT_LOAD) {
 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
-			int elf_prot = make_prot(eppnt->p_flags);
+			int elf_prot = make_prot(eppnt->p_flags, arch_state,
+						 true, true);
 			unsigned long vaddr = 0;
 			unsigned long k, map_addr;
 
@@ -682,6 +692,111 @@ out:
  * libraries.  There is no binary dependent code anywhere else.
  */
 
+static int parse_elf_property(const char *data, size_t *off, size_t datasz,
+			      struct arch_elf_state *arch,
+			      bool have_prev_type, u32 *prev_type)
+{
+	size_t o, step;
+	const struct gnu_property *pr;
+	int ret;
+
+	if (*off == datasz)
+		return -ENOENT;
+
+	if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
+		return -EIO;
+	o = *off;
+	datasz -= *off;
+
+	if (datasz < sizeof(*pr))
+		return -ENOEXEC;
+	pr = (const struct gnu_property *)(data + o);
+	o += sizeof(*pr);
+	datasz -= sizeof(*pr);
+
+	if (pr->pr_datasz > datasz)
+		return -ENOEXEC;
+
+	WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
+	step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
+	if (step > datasz)
+		return -ENOEXEC;
+
+	/* Properties are supposed to be unique and sorted on pr_type: */
+	if (have_prev_type && pr->pr_type <= *prev_type)
+		return -ENOEXEC;
+	*prev_type = pr->pr_type;
+
+	ret = arch_parse_elf_property(pr->pr_type, data + o,
+				      pr->pr_datasz, ELF_COMPAT, arch);
+	if (ret)
+		return ret;
+
+	*off = o + step;
+	return 0;
+}
+
+#define NOTE_DATA_SZ SZ_1K
+#define GNU_PROPERTY_TYPE_0_NAME "GNU"
+#define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
+
+static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
+				struct arch_elf_state *arch)
+{
+	union {
+		struct elf_note nhdr;
+		char data[NOTE_DATA_SZ];
+	} note;
+	loff_t pos;
+	ssize_t n;
+	size_t off, datasz;
+	int ret;
+	bool have_prev_type;
+	u32 prev_type;
+
+	if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
+		return 0;
+
+	/* load_elf_binary() shouldn't call us unless this is true... */
+	if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
+		return -ENOEXEC;
+
+	/* If the properties are crazy large, that's too bad (for now): */
+	if (phdr->p_filesz > sizeof(note))
+		return -ENOEXEC;
+
+	pos = phdr->p_offset;
+	n = kernel_read(f, &note, phdr->p_filesz, &pos);
+
+	BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
+	if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
+		return -EIO;
+
+	if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
+	    note.nhdr.n_namesz != NOTE_NAME_SZ ||
+	    strncmp(note.data + sizeof(note.nhdr),
+		    GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
+		return -ENOEXEC;
+
+	off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
+		       ELF_GNU_PROPERTY_ALIGN);
+	if (off > n)
+		return -ENOEXEC;
+
+	if (note.nhdr.n_descsz > n - off)
+		return -ENOEXEC;
+	datasz = off + note.nhdr.n_descsz;
+
+	have_prev_type = false;
+	do {
+		ret = parse_elf_property(note.data, &off, datasz, arch,
+					 have_prev_type, &prev_type);
+		have_prev_type = true;
+	} while (!ret);
+
+	return ret == -ENOENT ? 0 : ret;
+}
+
 static int load_elf_binary(struct linux_binprm *bprm)
 {
 	struct file *interpreter = NULL; /* to shut gcc up */
@@ -689,6 +804,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	int load_addr_set = 0;
 	unsigned long error;
 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
+	struct elf_phdr *elf_property_phdata = NULL;
 	unsigned long elf_bss, elf_brk;
 	int bss_prot = 0;
 	int retval, i;
@@ -726,6 +842,11 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
 		char *elf_interpreter;
 
+		if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
+			elf_property_phdata = elf_ppnt;
+			continue;
+		}
+
 		if (elf_ppnt->p_type != PT_INTERP)
 			continue;
 
@@ -819,9 +940,14 @@ out_free_interp:
 			goto out_free_dentry;
 
 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
+		elf_property_phdata = NULL;
 		elf_ppnt = interp_elf_phdata;
 		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
 			switch (elf_ppnt->p_type) {
+			case PT_GNU_PROPERTY:
+				elf_property_phdata = elf_ppnt;
+				break;
+
 			case PT_LOPROC ... PT_HIPROC:
 				retval = arch_elf_pt_proc(interp_elf_ex,
 							  elf_ppnt, interpreter,
@@ -832,6 +958,11 @@ out_free_interp:
 			}
 	}
 
+	retval = parse_elf_properties(interpreter ?: bprm->file,
+				      elf_property_phdata, &arch_state);
+	if (retval)
+		goto out_free_dentry;
+
 	/*
 	 * Allow arch code to reject the ELF at this point, whilst it's
 	 * still possible to return an error to the code that invoked
@@ -913,7 +1044,8 @@ out_free_interp:
 			}
 		}
 
-		elf_prot = make_prot(elf_ppnt->p_flags);
+		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
+				     !!interpreter, false);
 
 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 
@@ -1056,7 +1188,8 @@ out_free_interp:
 	if (interpreter) {
 		elf_entry = load_elf_interp(interp_elf_ex,
 					    interpreter,
-					    load_bias, interp_elf_phdata);
+					    load_bias, interp_elf_phdata,
+					    &arch_state);
 		if (!IS_ERR((void *)elf_entry)) {
 			/*
 			 * load_elf_interp() returns relocation
@@ -1355,7 +1488,6 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
 		u32 __user *header = (u32 __user *) vma->vm_start;
 		u32 word;
-		mm_segment_t fs = get_fs();
 		/*
 		 * Doing it this way gets the constant folded by GCC.
 		 */
@@ -1368,14 +1500,8 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 		magic.elfmag[EI_MAG1] = ELFMAG1;
 		magic.elfmag[EI_MAG2] = ELFMAG2;
 		magic.elfmag[EI_MAG3] = ELFMAG3;
-		/*
-		 * Switch to the user "segment" for get_user(),
-		 * then put back what elf_core_dump() had in place.
-		 */
-		set_fs(USER_DS);
 		if (unlikely(get_user(word, header)))
 			word = 0;
-		set_fs(fs);
 		if (word == magic.cmp)
 			return PAGE_SIZE;
 	}
@@ -1556,10 +1682,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
 		const kernel_siginfo_t *siginfo)
 {
-	mm_segment_t old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
-	set_fs(old_fs);
+	copy_siginfo_to_external(csigdata, siginfo);
 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
 }
 
@@ -2186,7 +2309,6 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
 static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
-	mm_segment_t fs;
 	int segs, i;
 	size_t vma_data_size = 0;
 	struct vm_area_struct *vma, *gate_vma;
@@ -2235,13 +2357,10 @@ static int elf_core_dump(struct coredump_params *cprm)
 	 * notes.  This also sets up the file header.
 	 */
 	if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
-		goto cleanup;
+		goto end_coredump;
 
 	has_dumped = 1;
 
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
 	offset += sizeof(elf);				/* Elf header */
 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
 
@@ -2369,9 +2488,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 	}
 
 end_coredump:
-	set_fs(fs);
-
-cleanup:
 	free_note_info(&info);
 	kfree(shdr4extnum);
 	kvfree(vma_filesz);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 240f66663543..d9501a86cec9 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1549,7 +1549,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 {
 #define	NUM_NOTES	6
 	int has_dumped = 0;
-	mm_segment_t fs;
 	int segs;
 	int i;
 	struct vm_area_struct *vma;
@@ -1589,31 +1588,31 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	/* alloc memory for large data structures: too large to be on stack */
 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
 	if (!elf)
-		goto cleanup;
+		goto end_coredump;
 	prstatus = kzalloc(sizeof(*prstatus), GFP_KERNEL);
 	if (!prstatus)
-		goto cleanup;
+		goto end_coredump;
 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
 	if (!psinfo)
-		goto cleanup;
+		goto end_coredump;
 	notes = kmalloc_array(NUM_NOTES, sizeof(struct memelfnote),
 			      GFP_KERNEL);
 	if (!notes)
-		goto cleanup;
+		goto end_coredump;
 	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
 	if (!fpu)
-		goto cleanup;
+		goto end_coredump;
 #ifdef ELF_CORE_COPY_XFPREGS
 	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
 	if (!xfpu)
-		goto cleanup;
+		goto end_coredump;
 #endif
 
 	for (ct = current->mm->core_state->dumper.next;
 					ct; ct = ct->next) {
 		tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
 		if (!tmp)
-			goto cleanup;
+			goto end_coredump;
 
 		tmp->thread = ct->task;
 		list_add(&tmp->list, &thread_list);
@@ -1678,9 +1677,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 			  "LINUX", ELF_CORE_XFPREG_TYPE, sizeof(*xfpu), xfpu);
 #endif
 
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
 	offset += sizeof(*elf);				/* Elf header */
 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
 
@@ -1788,9 +1784,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	}
 
 end_coredump:
-	set_fs(fs);
-
-cleanup:
 	while (!list_empty(&thread_list)) {
 		struct list_head *tmp = thread_list.next;
 		list_del(tmp);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 93672c3f1c78..47860e589388 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -255,7 +255,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 			break;
 		if (!(iocb->ki_flags & IOCB_HIPRI) ||
 		    !blk_poll(bdev_get_queue(bdev), qc, true))
-			io_schedule();
+			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
 
@@ -449,7 +449,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 
 		if (!(iocb->ki_flags & IOCB_HIPRI) ||
 		    !blk_poll(bdev_get_queue(bdev), qc, true))
-			io_schedule();
+			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
 
@@ -614,10 +614,9 @@ static int blkdev_readpage(struct file * file, struct page * page)
 	return block_read_full_page(page, blkdev_get_block);
 }
 
-static int blkdev_readpages(struct file *file, struct address_space *mapping,
-			struct list_head *pages, unsigned nr_pages)
+static void blkdev_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
+	mpage_readahead(rac, blkdev_get_block);
 }
 
 static int blkdev_write_begin(struct file *file, struct address_space *mapping,
@@ -672,7 +671,7 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	 * i_mutex and doing so causes performance issues with concurrent
 	 * O_SYNC writers to a block device.
 	 */
-	error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
+	error = blkdev_issue_flush(bdev, GFP_KERNEL);
 	if (error == -EOPNOTSUPP)
 		error = 0;
 
@@ -713,7 +712,6 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
 	blk_queue_exit(bdev->bd_queue);
 	return result;
 }
-EXPORT_SYMBOL_GPL(bdev_read_page);
 
 /**
  * bdev_write_page() - Start writing a page to a block device
@@ -758,7 +756,6 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 	blk_queue_exit(bdev->bd_queue);
 	return result;
 }
-EXPORT_SYMBOL_GPL(bdev_write_page);
 
 /*
  * pseudo-fs
@@ -882,21 +879,6 @@ static int bdev_set(struct inode *inode, void *data)
 
 static LIST_HEAD(all_bdevs);
 
-/*
- * If there is a bdev inode for this device, unhash it so that it gets evicted
- * as soon as last inode reference is dropped.
- */
-void bdev_unhash_inode(dev_t dev)
-{
-	struct inode *inode;
-
-	inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
-	if (inode) {
-		remove_inode_hash(inode);
-		iput(inode);
-	}
-}
-
 struct block_device *bdget(dev_t dev)
 {
 	struct block_device *bdev;
@@ -1516,7 +1498,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 	lockdep_assert_held(&bdev->bd_mutex);
 
 rescan:
-	ret = blk_drop_partitions(disk, bdev);
+	ret = blk_drop_partitions(bdev);
 	if (ret)
 		return ret;
 
@@ -2023,8 +2005,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (bdev_read_only(I_BDEV(bd_inode)))
 		return -EPERM;
 
-	/* uswsusp needs write permission to the swap */
-	if (IS_SWAPFILE(bd_inode) && !hibernation_available())
+	if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode))
 		return -ETXTBSY;
 
 	if (!iov_iter_count(from))
@@ -2085,7 +2066,7 @@ static int blkdev_writepages(struct address_space *mapping,
 
 static const struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
-	.readpages	= blkdev_readpages,
+	.readahead	= blkdev_readahead,
 	.writepage	= blkdev_writepage,
 	.write_begin	= blkdev_write_begin,
 	.write_end	= blkdev_write_end,
@@ -2183,18 +2164,6 @@ const struct file_operations def_blk_fops = {
 	.fallocate	= blkdev_fallocate,
 };
 
-int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
-{
-	int res;
-	mm_segment_t old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	res = blkdev_ioctl(bdev, 0, cmd, arg);
-	set_fs(old_fs);
-	return res;
-}
-
-EXPORT_SYMBOL(ioctl_by_bdev);
-
 /**
  * lookup_bdev  - lookup a struct block_device by name
  * @pathname:	special file representing the block device
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 575636f6491e..68b95ad82126 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -14,6 +14,7 @@ config BTRFS_FS
 	select LZO_DECOMPRESS
 	select ZSTD_COMPRESS
 	select ZSTD_DECOMPRESS
+	select FS_IOMAP
 	select RAID6_PQ
 	select XOR_BLOCKS
 	select SRCU
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0cc02577577b..d888e71e66b6 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -13,6 +13,7 @@
 #include "transaction.h"
 #include "delayed-ref.h"
 #include "locking.h"
+#include "misc.h"
 
 /* Just an arbitrary number so we can be sure this happened */
 #define BACKREF_FOUND_SHARED 6
@@ -537,18 +538,13 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 				const u64 *extent_item_pos, bool ignore_offset)
 {
 	struct btrfs_root *root;
-	struct btrfs_key root_key;
 	struct extent_buffer *eb;
 	int ret = 0;
 	int root_level;
 	int level = ref->level;
 	struct btrfs_key search_key = ref->key_for_search;
 
-	root_key.objectid = ref->root_id;
-	root_key.type = BTRFS_ROOT_ITEM_KEY;
-	root_key.offset = (u64)-1;
-
-	root = btrfs_get_fs_root(fs_info, &root_key, false);
+	root = btrfs_get_fs_root(fs_info, ref->root_id, false);
 	if (IS_ERR(root)) {
 		ret = PTR_ERR(root);
 		goto out_free;
@@ -2295,3 +2291,832 @@ void free_ipath(struct inode_fs_paths *ipath)
 	kvfree(ipath->fspath);
 	kfree(ipath);
 }
+
+struct btrfs_backref_iter *btrfs_backref_iter_alloc(
+		struct btrfs_fs_info *fs_info, gfp_t gfp_flag)
+{
+	struct btrfs_backref_iter *ret;
+
+	ret = kzalloc(sizeof(*ret), gfp_flag);
+	if (!ret)
+		return NULL;
+
+	ret->path = btrfs_alloc_path();
+	if (!ret) {
+		kfree(ret);
+		return NULL;
+	}
+
+	/* Current backref iterator only supports iteration in commit root */
+	ret->path->search_commit_root = 1;
+	ret->path->skip_locking = 1;
+	ret->fs_info = fs_info;
+
+	return ret;
+}
+
+int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
+{
+	struct btrfs_fs_info *fs_info = iter->fs_info;
+	struct btrfs_path *path = iter->path;
+	struct btrfs_extent_item *ei;
+	struct btrfs_key key;
+	int ret;
+
+	key.objectid = bytenr;
+	key.type = BTRFS_METADATA_ITEM_KEY;
+	key.offset = (u64)-1;
+	iter->bytenr = bytenr;
+
+	ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+	if (ret < 0)
+		return ret;
+	if (ret == 0) {
+		ret = -EUCLEAN;
+		goto release;
+	}
+	if (path->slots[0] == 0) {
+		WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+		ret = -EUCLEAN;
+		goto release;
+	}
+	path->slots[0]--;
+
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+	if ((key.type != BTRFS_EXTENT_ITEM_KEY &&
+	     key.type != BTRFS_METADATA_ITEM_KEY) || key.objectid != bytenr) {
+		ret = -ENOENT;
+		goto release;
+	}
+	memcpy(&iter->cur_key, &key, sizeof(key));
+	iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
+						    path->slots[0]);
+	iter->end_ptr = (u32)(iter->item_ptr +
+			btrfs_item_size_nr(path->nodes[0], path->slots[0]));
+	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			    struct btrfs_extent_item);
+
+	/*
+	 * Only support iteration on tree backref yet.
+	 *
+	 * This is an extra precaution for non skinny-metadata, where
+	 * EXTENT_ITEM is also used for tree blocks, that we can only use
+	 * extent flags to determine if it's a tree block.
+	 */
+	if (btrfs_extent_flags(path->nodes[0], ei) & BTRFS_EXTENT_FLAG_DATA) {
+		ret = -ENOTSUPP;
+		goto release;
+	}
+	iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei));
+
+	/* If there is no inline backref, go search for keyed backref */
+	if (iter->cur_ptr >= iter->end_ptr) {
+		ret = btrfs_next_item(fs_info->extent_root, path);
+
+		/* No inline nor keyed ref */
+		if (ret > 0) {
+			ret = -ENOENT;
+			goto release;
+		}
+		if (ret < 0)
+			goto release;
+
+		btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key,
+				path->slots[0]);
+		if (iter->cur_key.objectid != bytenr ||
+		    (iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
+		     iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) {
+			ret = -ENOENT;
+			goto release;
+		}
+		iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
+							   path->slots[0]);
+		iter->item_ptr = iter->cur_ptr;
+		iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size_nr(
+				      path->nodes[0], path->slots[0]));
+	}
+
+	return 0;
+release:
+	btrfs_backref_iter_release(iter);
+	return ret;
+}
+
+/*
+ * Go to the next backref item of current bytenr, can be either inlined or
+ * keyed.
+ *
+ * Caller needs to check whether it's inline ref or not by iter->cur_key.
+ *
+ * Return 0 if we get next backref without problem.
+ * Return >0 if there is no extra backref for this bytenr.
+ * Return <0 if there is something wrong happened.
+ */
+int btrfs_backref_iter_next(struct btrfs_backref_iter *iter)
+{
+	struct extent_buffer *eb = btrfs_backref_get_eb(iter);
+	struct btrfs_path *path = iter->path;
+	struct btrfs_extent_inline_ref *iref;
+	int ret;
+	u32 size;
+
+	if (btrfs_backref_iter_is_inline_ref(iter)) {
+		/* We're still inside the inline refs */
+		ASSERT(iter->cur_ptr < iter->end_ptr);
+
+		if (btrfs_backref_has_tree_block_info(iter)) {
+			/* First tree block info */
+			size = sizeof(struct btrfs_tree_block_info);
+		} else {
+			/* Use inline ref type to determine the size */
+			int type;
+
+			iref = (struct btrfs_extent_inline_ref *)
+				((unsigned long)iter->cur_ptr);
+			type = btrfs_extent_inline_ref_type(eb, iref);
+
+			size = btrfs_extent_inline_ref_size(type);
+		}
+		iter->cur_ptr += size;
+		if (iter->cur_ptr < iter->end_ptr)
+			return 0;
+
+		/* All inline items iterated, fall through */
+	}
+
+	/* We're at keyed items, there is no inline item, go to the next one */
+	ret = btrfs_next_item(iter->fs_info->extent_root, iter->path);
+	if (ret)
+		return ret;
+
+	btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, path->slots[0]);
+	if (iter->cur_key.objectid != iter->bytenr ||
+	    (iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
+	     iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY))
+		return 1;
+	iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
+					path->slots[0]);
+	iter->cur_ptr = iter->item_ptr;
+	iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size_nr(path->nodes[0],
+						path->slots[0]);
+	return 0;
+}
+
+void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
+			      struct btrfs_backref_cache *cache, int is_reloc)
+{
+	int i;
+
+	cache->rb_root = RB_ROOT;
+	for (i = 0; i < BTRFS_MAX_LEVEL; i++)
+		INIT_LIST_HEAD(&cache->pending[i]);
+	INIT_LIST_HEAD(&cache->changed);
+	INIT_LIST_HEAD(&cache->detached);
+	INIT_LIST_HEAD(&cache->leaves);
+	INIT_LIST_HEAD(&cache->pending_edge);
+	INIT_LIST_HEAD(&cache->useless_node);
+	cache->fs_info = fs_info;
+	cache->is_reloc = is_reloc;
+}
+
+struct btrfs_backref_node *btrfs_backref_alloc_node(
+		struct btrfs_backref_cache *cache, u64 bytenr, int level)
+{
+	struct btrfs_backref_node *node;
+
+	ASSERT(level >= 0 && level < BTRFS_MAX_LEVEL);
+	node = kzalloc(sizeof(*node), GFP_NOFS);
+	if (!node)
+		return node;
+
+	INIT_LIST_HEAD(&node->list);
+	INIT_LIST_HEAD(&node->upper);
+	INIT_LIST_HEAD(&node->lower);
+	RB_CLEAR_NODE(&node->rb_node);
+	cache->nr_nodes++;
+	node->level = level;
+	node->bytenr = bytenr;
+
+	return node;
+}
+
+struct btrfs_backref_edge *btrfs_backref_alloc_edge(
+		struct btrfs_backref_cache *cache)
+{
+	struct btrfs_backref_edge *edge;
+
+	edge = kzalloc(sizeof(*edge), GFP_NOFS);
+	if (edge)
+		cache->nr_edges++;
+	return edge;
+}
+
+/*
+ * Drop the backref node from cache, also cleaning up all its
+ * upper edges and any uncached nodes in the path.
+ *
+ * This cleanup happens bottom up, thus the node should either
+ * be the lowest node in the cache or a detached node.
+ */
+void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
+				struct btrfs_backref_node *node)
+{
+	struct btrfs_backref_node *upper;
+	struct btrfs_backref_edge *edge;
+
+	if (!node)
+		return;
+
+	BUG_ON(!node->lowest && !node->detached);
+	while (!list_empty(&node->upper)) {
+		edge = list_entry(node->upper.next, struct btrfs_backref_edge,
+				  list[LOWER]);
+		upper = edge->node[UPPER];
+		list_del(&edge->list[LOWER]);
+		list_del(&edge->list[UPPER]);
+		btrfs_backref_free_edge(cache, edge);
+
+		if (RB_EMPTY_NODE(&upper->rb_node)) {
+			BUG_ON(!list_empty(&node->upper));
+			btrfs_backref_drop_node(cache, node);
+			node = upper;
+			node->lowest = 1;
+			continue;
+		}
+		/*
+		 * Add the node to leaf node list if no other child block
+		 * cached.
+		 */
+		if (list_empty(&upper->lower)) {
+			list_add_tail(&upper->lower, &cache->leaves);
+			upper->lowest = 1;
+		}
+	}
+
+	btrfs_backref_drop_node(cache, node);
+}
+
+/*
+ * Release all nodes/edges from current cache
+ */
+void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
+{
+	struct btrfs_backref_node *node;
+	int i;
+
+	while (!list_empty(&cache->detached)) {
+		node = list_entry(cache->detached.next,
+				  struct btrfs_backref_node, list);
+		btrfs_backref_cleanup_node(cache, node);
+	}
+
+	while (!list_empty(&cache->leaves)) {
+		node = list_entry(cache->leaves.next,
+				  struct btrfs_backref_node, lower);
+		btrfs_backref_cleanup_node(cache, node);
+	}
+
+	cache->last_trans = 0;
+
+	for (i = 0; i < BTRFS_MAX_LEVEL; i++)
+		ASSERT(list_empty(&cache->pending[i]));
+	ASSERT(list_empty(&cache->pending_edge));
+	ASSERT(list_empty(&cache->useless_node));
+	ASSERT(list_empty(&cache->changed));
+	ASSERT(list_empty(&cache->detached));
+	ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
+	ASSERT(!cache->nr_nodes);
+	ASSERT(!cache->nr_edges);
+}
+
+/*
+ * Handle direct tree backref
+ *
+ * Direct tree backref means, the backref item shows its parent bytenr
+ * directly. This is for SHARED_BLOCK_REF backref (keyed or inlined).
+ *
+ * @ref_key:	The converted backref key.
+ *		For keyed backref, it's the item key.
+ *		For inlined backref, objectid is the bytenr,
+ *		type is btrfs_inline_ref_type, offset is
+ *		btrfs_inline_ref_offset.
+ */
+static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
+				      struct btrfs_key *ref_key,
+				      struct btrfs_backref_node *cur)
+{
+	struct btrfs_backref_edge *edge;
+	struct btrfs_backref_node *upper;
+	struct rb_node *rb_node;
+
+	ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY);
+
+	/* Only reloc root uses backref pointing to itself */
+	if (ref_key->objectid == ref_key->offset) {
+		struct btrfs_root *root;
+
+		cur->is_reloc_root = 1;
+		/* Only reloc backref cache cares about a specific root */
+		if (cache->is_reloc) {
+			root = find_reloc_root(cache->fs_info, cur->bytenr);
+			if (WARN_ON(!root))
+				return -ENOENT;
+			cur->root = root;
+		} else {
+			/*
+			 * For generic purpose backref cache, reloc root node
+			 * is useless.
+			 */
+			list_add(&cur->list, &cache->useless_node);
+		}
+		return 0;
+	}
+
+	edge = btrfs_backref_alloc_edge(cache);
+	if (!edge)
+		return -ENOMEM;
+
+	rb_node = rb_simple_search(&cache->rb_root, ref_key->offset);
+	if (!rb_node) {
+		/* Parent node not yet cached */
+		upper = btrfs_backref_alloc_node(cache, ref_key->offset,
+					   cur->level + 1);
+		if (!upper) {
+			btrfs_backref_free_edge(cache, edge);
+			return -ENOMEM;
+		}
+
+		/*
+		 *  Backrefs for the upper level block isn't cached, add the
+		 *  block to pending list
+		 */
+		list_add_tail(&edge->list[UPPER], &cache->pending_edge);
+	} else {
+		/* Parent node already cached */
+		upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
+		ASSERT(upper->checked);
+		INIT_LIST_HEAD(&edge->list[UPPER]);
+	}
+	btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER);
+	return 0;
+}
+
+/*
+ * Handle indirect tree backref
+ *
+ * Indirect tree backref means, we only know which tree the node belongs to.
+ * We still need to do a tree search to find out the parents. This is for
+ * TREE_BLOCK_REF backref (keyed or inlined).
+ *
+ * @ref_key:	The same as @ref_key in  handle_direct_tree_backref()
+ * @tree_key:	The first key of this tree block.
+ * @path:	A clean (released) path, to avoid allocating path everytime
+ *		the function get called.
+ */
+static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache,
+					struct btrfs_path *path,
+					struct btrfs_key *ref_key,
+					struct btrfs_key *tree_key,
+					struct btrfs_backref_node *cur)
+{
+	struct btrfs_fs_info *fs_info = cache->fs_info;
+	struct btrfs_backref_node *upper;
+	struct btrfs_backref_node *lower;
+	struct btrfs_backref_edge *edge;
+	struct extent_buffer *eb;
+	struct btrfs_root *root;
+	struct rb_node *rb_node;
+	int level;
+	bool need_check = true;
+	int ret;
+
+	root = btrfs_get_fs_root(fs_info, ref_key->offset, false);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
+		cur->cowonly = 1;
+
+	if (btrfs_root_level(&root->root_item) == cur->level) {
+		/* Tree root */
+		ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr);
+		/*
+		 * For reloc backref cache, we may ignore reloc root.  But for
+		 * general purpose backref cache, we can't rely on
+		 * btrfs_should_ignore_reloc_root() as it may conflict with
+		 * current running relocation and lead to missing root.
+		 *
+		 * For general purpose backref cache, reloc root detection is
+		 * completely relying on direct backref (key->offset is parent
+		 * bytenr), thus only do such check for reloc cache.
+		 */
+		if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) {
+			btrfs_put_root(root);
+			list_add(&cur->list, &cache->useless_node);
+		} else {
+			cur->root = root;
+		}
+		return 0;
+	}
+
+	level = cur->level + 1;
+
+	/* Search the tree to find parent blocks referring to the block */
+	path->search_commit_root = 1;
+	path->skip_locking = 1;
+	path->lowest_level = level;
+	ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0);
+	path->lowest_level = 0;
+	if (ret < 0) {
+		btrfs_put_root(root);
+		return ret;
+	}
+	if (ret > 0 && path->slots[level] > 0)
+		path->slots[level]--;
+
+	eb = path->nodes[level];
+	if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) {
+		btrfs_err(fs_info,
+"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
+			  cur->bytenr, level - 1, root->root_key.objectid,
+			  tree_key->objectid, tree_key->type, tree_key->offset);
+		btrfs_put_root(root);
+		ret = -ENOENT;
+		goto out;
+	}
+	lower = cur;
+
+	/* Add all nodes and edges in the path */
+	for (; level < BTRFS_MAX_LEVEL; level++) {
+		if (!path->nodes[level]) {
+			ASSERT(btrfs_root_bytenr(&root->root_item) ==
+			       lower->bytenr);
+			/* Same as previous should_ignore_reloc_root() call */
+			if (btrfs_should_ignore_reloc_root(root) &&
+			    cache->is_reloc) {
+				btrfs_put_root(root);
+				list_add(&lower->list, &cache->useless_node);
+			} else {
+				lower->root = root;
+			}
+			break;
+		}
+
+		edge = btrfs_backref_alloc_edge(cache);
+		if (!edge) {
+			btrfs_put_root(root);
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		eb = path->nodes[level];
+		rb_node = rb_simple_search(&cache->rb_root, eb->start);
+		if (!rb_node) {
+			upper = btrfs_backref_alloc_node(cache, eb->start,
+							 lower->level + 1);
+			if (!upper) {
+				btrfs_put_root(root);
+				btrfs_backref_free_edge(cache, edge);
+				ret = -ENOMEM;
+				goto out;
+			}
+			upper->owner = btrfs_header_owner(eb);
+			if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
+				upper->cowonly = 1;
+
+			/*
+			 * If we know the block isn't shared we can avoid
+			 * checking its backrefs.
+			 */
+			if (btrfs_block_can_be_shared(root, eb))
+				upper->checked = 0;
+			else
+				upper->checked = 1;
+
+			/*
+			 * Add the block to pending list if we need to check its
+			 * backrefs, we only do this once while walking up a
+			 * tree as we will catch anything else later on.
+			 */
+			if (!upper->checked && need_check) {
+				need_check = false;
+				list_add_tail(&edge->list[UPPER],
+					      &cache->pending_edge);
+			} else {
+				if (upper->checked)
+					need_check = true;
+				INIT_LIST_HEAD(&edge->list[UPPER]);
+			}
+		} else {
+			upper = rb_entry(rb_node, struct btrfs_backref_node,
+					 rb_node);
+			ASSERT(upper->checked);
+			INIT_LIST_HEAD(&edge->list[UPPER]);
+			if (!upper->owner)
+				upper->owner = btrfs_header_owner(eb);
+		}
+		btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER);
+
+		if (rb_node) {
+			btrfs_put_root(root);
+			break;
+		}
+		lower = upper;
+		upper = NULL;
+	}
+out:
+	btrfs_release_path(path);
+	return ret;
+}
+
+/*
+ * Add backref node @cur into @cache.
+ *
+ * NOTE: Even if the function returned 0, @cur is not yet cached as its upper
+ *	 links aren't yet bi-directional. Needs to finish such links.
+ *	 Use btrfs_backref_finish_upper_links() to finish such linkage.
+ *
+ * @path:	Released path for indirect tree backref lookup
+ * @iter:	Released backref iter for extent tree search
+ * @node_key:	The first key of the tree block
+ */
+int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
+				struct btrfs_path *path,
+				struct btrfs_backref_iter *iter,
+				struct btrfs_key *node_key,
+				struct btrfs_backref_node *cur)
+{
+	struct btrfs_fs_info *fs_info = cache->fs_info;
+	struct btrfs_backref_edge *edge;
+	struct btrfs_backref_node *exist;
+	int ret;
+
+	ret = btrfs_backref_iter_start(iter, cur->bytenr);
+	if (ret < 0)
+		return ret;
+	/*
+	 * We skip the first btrfs_tree_block_info, as we don't use the key
+	 * stored in it, but fetch it from the tree block
+	 */
+	if (btrfs_backref_has_tree_block_info(iter)) {
+		ret = btrfs_backref_iter_next(iter);
+		if (ret < 0)
+			goto out;
+		/* No extra backref? This means the tree block is corrupted */
+		if (ret > 0) {
+			ret = -EUCLEAN;
+			goto out;
+		}
+	}
+	WARN_ON(cur->checked);
+	if (!list_empty(&cur->upper)) {
+		/*
+		 * The backref was added previously when processing backref of
+		 * type BTRFS_TREE_BLOCK_REF_KEY
+		 */
+		ASSERT(list_is_singular(&cur->upper));
+		edge = list_entry(cur->upper.next, struct btrfs_backref_edge,
+				  list[LOWER]);
+		ASSERT(list_empty(&edge->list[UPPER]));
+		exist = edge->node[UPPER];
+		/*
+		 * Add the upper level block to pending list if we need check
+		 * its backrefs
+		 */
+		if (!exist->checked)
+			list_add_tail(&edge->list[UPPER], &cache->pending_edge);
+	} else {
+		exist = NULL;
+	}
+
+	for (; ret == 0; ret = btrfs_backref_iter_next(iter)) {
+		struct extent_buffer *eb;
+		struct btrfs_key key;
+		int type;
+
+		cond_resched();
+		eb = btrfs_backref_get_eb(iter);
+
+		key.objectid = iter->bytenr;
+		if (btrfs_backref_iter_is_inline_ref(iter)) {
+			struct btrfs_extent_inline_ref *iref;
+
+			/* Update key for inline backref */
+			iref = (struct btrfs_extent_inline_ref *)
+				((unsigned long)iter->cur_ptr);
+			type = btrfs_get_extent_inline_ref_type(eb, iref,
+							BTRFS_REF_TYPE_BLOCK);
+			if (type == BTRFS_REF_TYPE_INVALID) {
+				ret = -EUCLEAN;
+				goto out;
+			}
+			key.type = type;
+			key.offset = btrfs_extent_inline_ref_offset(eb, iref);
+		} else {
+			key.type = iter->cur_key.type;
+			key.offset = iter->cur_key.offset;
+		}
+
+		/*
+		 * Parent node found and matches current inline ref, no need to
+		 * rebuild this node for this inline ref
+		 */
+		if (exist &&
+		    ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
+		      exist->owner == key.offset) ||
+		     (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
+		      exist->bytenr == key.offset))) {
+			exist = NULL;
+			continue;
+		}
+
+		/* SHARED_BLOCK_REF means key.offset is the parent bytenr */
+		if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
+			ret = handle_direct_tree_backref(cache, &key, cur);
+			if (ret < 0)
+				goto out;
+			continue;
+		} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
+			ret = -EINVAL;
+			btrfs_print_v0_err(fs_info);
+			btrfs_handle_fs_error(fs_info, ret, NULL);
+			goto out;
+		} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
+			continue;
+		}
+
+		/*
+		 * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
+		 * means the root objectid. We need to search the tree to get
+		 * its parent bytenr.
+		 */
+		ret = handle_indirect_tree_backref(cache, path, &key, node_key,
+						   cur);
+		if (ret < 0)
+			goto out;
+	}
+	ret = 0;
+	cur->checked = 1;
+	WARN_ON(exist);
+out:
+	btrfs_backref_iter_release(iter);
+	return ret;
+}
+
+/*
+ * Finish the upwards linkage created by btrfs_backref_add_tree_node()
+ */
+int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
+				     struct btrfs_backref_node *start)
+{
+	struct list_head *useless_node = &cache->useless_node;
+	struct btrfs_backref_edge *edge;
+	struct rb_node *rb_node;
+	LIST_HEAD(pending_edge);
+
+	ASSERT(start->checked);
+
+	/* Insert this node to cache if it's not COW-only */
+	if (!start->cowonly) {
+		rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
+					   &start->rb_node);
+		if (rb_node)
+			btrfs_backref_panic(cache->fs_info, start->bytenr,
+					    -EEXIST);
+		list_add_tail(&start->lower, &cache->leaves);
+	}
+
+	/*
+	 * Use breadth first search to iterate all related edges.
+	 *
+	 * The starting points are all the edges of this node
+	 */
+	list_for_each_entry(edge, &start->upper, list[LOWER])
+		list_add_tail(&edge->list[UPPER], &pending_edge);
+
+	while (!list_empty(&pending_edge)) {
+		struct btrfs_backref_node *upper;
+		struct btrfs_backref_node *lower;
+		struct rb_node *rb_node;
+
+		edge = list_first_entry(&pending_edge,
+				struct btrfs_backref_edge, list[UPPER]);
+		list_del_init(&edge->list[UPPER]);
+		upper = edge->node[UPPER];
+		lower = edge->node[LOWER];
+
+		/* Parent is detached, no need to keep any edges */
+		if (upper->detached) {
+			list_del(&edge->list[LOWER]);
+			btrfs_backref_free_edge(cache, edge);
+
+			/* Lower node is orphan, queue for cleanup */
+			if (list_empty(&lower->upper))
+				list_add(&lower->list, useless_node);
+			continue;
+		}
+
+		/*
+		 * All new nodes added in current build_backref_tree() haven't
+		 * been linked to the cache rb tree.
+		 * So if we have upper->rb_node populated, this means a cache
+		 * hit. We only need to link the edge, as @upper and all its
+		 * parents have already been linked.
+		 */
+		if (!RB_EMPTY_NODE(&upper->rb_node)) {
+			if (upper->lowest) {
+				list_del_init(&upper->lower);
+				upper->lowest = 0;
+			}
+
+			list_add_tail(&edge->list[UPPER], &upper->lower);
+			continue;
+		}
+
+		/* Sanity check, we shouldn't have any unchecked nodes */
+		if (!upper->checked) {
+			ASSERT(0);
+			return -EUCLEAN;
+		}
+
+		/* Sanity check, COW-only node has non-COW-only parent */
+		if (start->cowonly != upper->cowonly) {
+			ASSERT(0);
+			return -EUCLEAN;
+		}
+
+		/* Only cache non-COW-only (subvolume trees) tree blocks */
+		if (!upper->cowonly) {
+			rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
+						   &upper->rb_node);
+			if (rb_node) {
+				btrfs_backref_panic(cache->fs_info,
+						upper->bytenr, -EEXIST);
+				return -EUCLEAN;
+			}
+		}
+
+		list_add_tail(&edge->list[UPPER], &upper->lower);
+
+		/*
+		 * Also queue all the parent edges of this uncached node
+		 * to finish the upper linkage
+		 */
+		list_for_each_entry(edge, &upper->upper, list[LOWER])
+			list_add_tail(&edge->list[UPPER], &pending_edge);
+	}
+	return 0;
+}
+
+void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
+				 struct btrfs_backref_node *node)
+{
+	struct btrfs_backref_node *lower;
+	struct btrfs_backref_node *upper;
+	struct btrfs_backref_edge *edge;
+
+	while (!list_empty(&cache->useless_node)) {
+		lower = list_first_entry(&cache->useless_node,
+				   struct btrfs_backref_node, list);
+		list_del_init(&lower->list);
+	}
+	while (!list_empty(&cache->pending_edge)) {
+		edge = list_first_entry(&cache->pending_edge,
+				struct btrfs_backref_edge, list[UPPER]);
+		list_del(&edge->list[UPPER]);
+		list_del(&edge->list[LOWER]);
+		lower = edge->node[LOWER];
+		upper = edge->node[UPPER];
+		btrfs_backref_free_edge(cache, edge);
+
+		/*
+		 * Lower is no longer linked to any upper backref nodes and
+		 * isn't in the cache, we can free it ourselves.
+		 */
+		if (list_empty(&lower->upper) &&
+		    RB_EMPTY_NODE(&lower->rb_node))
+			list_add(&lower->list, &cache->useless_node);
+
+		if (!RB_EMPTY_NODE(&upper->rb_node))
+			continue;
+
+		/* Add this guy's upper edges to the list to process */
+		list_for_each_entry(edge, &upper->upper, list[LOWER])
+			list_add_tail(&edge->list[UPPER],
+				      &cache->pending_edge);
+		if (list_empty(&upper->upper))
+			list_add(&upper->list, &cache->useless_node);
+	}
+
+	while (!list_empty(&cache->useless_node)) {
+		lower = list_first_entry(&cache->useless_node,
+				   struct btrfs_backref_node, list);
+		list_del_init(&lower->list);
+		if (lower == node)
+			node = NULL;
+		btrfs_backref_free_node(cache, lower);
+	}
+
+	btrfs_backref_cleanup_node(cache, node);
+	ASSERT(list_empty(&cache->useless_node) &&
+	       list_empty(&cache->pending_edge));
+}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 723d6da99114..ff705cc564a9 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -8,6 +8,7 @@
 
 #include <linux/btrfs.h>
 #include "ulist.h"
+#include "disk-io.h"
 #include "extent_io.h"
 
 struct inode_fs_paths {
@@ -78,4 +79,300 @@ struct prelim_ref {
 	u64 wanted_disk_byte;
 };
 
+/*
+ * Iterate backrefs of one extent.
+ *
+ * Now it only supports iteration of tree block in commit root.
+ */
+struct btrfs_backref_iter {
+	u64 bytenr;
+	struct btrfs_path *path;
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_key cur_key;
+	u32 item_ptr;
+	u32 cur_ptr;
+	u32 end_ptr;
+};
+
+struct btrfs_backref_iter *btrfs_backref_iter_alloc(
+		struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
+
+static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
+{
+	if (!iter)
+		return;
+	btrfs_free_path(iter->path);
+	kfree(iter);
+}
+
+static inline struct extent_buffer *btrfs_backref_get_eb(
+		struct btrfs_backref_iter *iter)
+{
+	if (!iter)
+		return NULL;
+	return iter->path->nodes[0];
+}
+
+/*
+ * For metadata with EXTENT_ITEM key (non-skinny) case, the first inline data
+ * is btrfs_tree_block_info, without a btrfs_extent_inline_ref header.
+ *
+ * This helper determines if that's the case.
+ */
+static inline bool btrfs_backref_has_tree_block_info(
+		struct btrfs_backref_iter *iter)
+{
+	if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY &&
+	    iter->cur_ptr - iter->item_ptr == sizeof(struct btrfs_extent_item))
+		return true;
+	return false;
+}
+
+int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
+
+int btrfs_backref_iter_next(struct btrfs_backref_iter *iter);
+
+static inline bool btrfs_backref_iter_is_inline_ref(
+		struct btrfs_backref_iter *iter)
+{
+	if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY ||
+	    iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
+		return true;
+	return false;
+}
+
+static inline void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
+{
+	iter->bytenr = 0;
+	iter->item_ptr = 0;
+	iter->cur_ptr = 0;
+	iter->end_ptr = 0;
+	btrfs_release_path(iter->path);
+	memset(&iter->cur_key, 0, sizeof(iter->cur_key));
+}
+
+/*
+ * Backref cache related structures
+ *
+ * The whole objective of backref_cache is to build a bi-directional map
+ * of tree blocks (represented by backref_node) and all their parents.
+ */
+
+/*
+ * Represent a tree block in the backref cache
+ */
+struct btrfs_backref_node {
+	struct {
+		struct rb_node rb_node;
+		u64 bytenr;
+	}; /* Use rb_simple_node for search/insert */
+
+	u64 new_bytenr;
+	/* Objectid of tree block owner, can be not uptodate */
+	u64 owner;
+	/* Link to pending, changed or detached list */
+	struct list_head list;
+
+	/* List of upper level edges, which link this node to its parents */
+	struct list_head upper;
+	/* List of lower level edges, which link this node to its children */
+	struct list_head lower;
+
+	/* NULL if this node is not tree root */
+	struct btrfs_root *root;
+	/* Extent buffer got by COWing the block */
+	struct extent_buffer *eb;
+	/* Level of the tree block */
+	unsigned int level:8;
+	/* Is the block in a non-shareable tree */
+	unsigned int cowonly:1;
+	/* 1 if no child node is in the cache */
+	unsigned int lowest:1;
+	/* Is the extent buffer locked */
+	unsigned int locked:1;
+	/* Has the block been processed */
+	unsigned int processed:1;
+	/* Have backrefs of this block been checked */
+	unsigned int checked:1;
+	/*
+	 * 1 if corresponding block has been COWed but some upper level block
+	 * pointers may not point to the new location
+	 */
+	unsigned int pending:1;
+	/* 1 if the backref node isn't connected to any other backref node */
+	unsigned int detached:1;
+
+	/*
+	 * For generic purpose backref cache, where we only care if it's a reloc
+	 * root, doesn't care the source subvolid.
+	 */
+	unsigned int is_reloc_root:1;
+};
+
+#define LOWER	0
+#define UPPER	1
+
+/*
+ * Represent an edge connecting upper and lower backref nodes.
+ */
+struct btrfs_backref_edge {
+	/*
+	 * list[LOWER] is linked to btrfs_backref_node::upper of lower level
+	 * node, and list[UPPER] is linked to btrfs_backref_node::lower of
+	 * upper level node.
+	 *
+	 * Also, build_backref_tree() uses list[UPPER] for pending edges, before
+	 * linking list[UPPER] to its upper level nodes.
+	 */
+	struct list_head list[2];
+
+	/* Two related nodes */
+	struct btrfs_backref_node *node[2];
+};
+
+struct btrfs_backref_cache {
+	/* Red black tree of all backref nodes in the cache */
+	struct rb_root rb_root;
+	/* For passing backref nodes to btrfs_reloc_cow_block */
+	struct btrfs_backref_node *path[BTRFS_MAX_LEVEL];
+	/*
+	 * List of blocks that have been COWed but some block pointers in upper
+	 * level blocks may not reflect the new location
+	 */
+	struct list_head pending[BTRFS_MAX_LEVEL];
+	/* List of backref nodes with no child node */
+	struct list_head leaves;
+	/* List of blocks that have been COWed in current transaction */
+	struct list_head changed;
+	/* List of detached backref node. */
+	struct list_head detached;
+
+	u64 last_trans;
+
+	int nr_nodes;
+	int nr_edges;
+
+	/* List of unchecked backref edges during backref cache build */
+	struct list_head pending_edge;
+
+	/* List of useless backref nodes during backref cache build */
+	struct list_head useless_node;
+
+	struct btrfs_fs_info *fs_info;
+
+	/*
+	 * Whether this cache is for relocation
+	 *
+	 * Reloction backref cache require more info for reloc root compared
+	 * to generic backref cache.
+	 */
+	unsigned int is_reloc;
+};
+
+void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
+			      struct btrfs_backref_cache *cache, int is_reloc);
+struct btrfs_backref_node *btrfs_backref_alloc_node(
+		struct btrfs_backref_cache *cache, u64 bytenr, int level);
+struct btrfs_backref_edge *btrfs_backref_alloc_edge(
+		struct btrfs_backref_cache *cache);
+
+#define		LINK_LOWER	(1 << 0)
+#define		LINK_UPPER	(1 << 1)
+static inline void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
+					   struct btrfs_backref_node *lower,
+					   struct btrfs_backref_node *upper,
+					   int link_which)
+{
+	ASSERT(upper && lower && upper->level == lower->level + 1);
+	edge->node[LOWER] = lower;
+	edge->node[UPPER] = upper;
+	if (link_which & LINK_LOWER)
+		list_add_tail(&edge->list[LOWER], &lower->upper);
+	if (link_which & LINK_UPPER)
+		list_add_tail(&edge->list[UPPER], &upper->lower);
+}
+
+static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
+					   struct btrfs_backref_node *node)
+{
+	if (node) {
+		cache->nr_nodes--;
+		btrfs_put_root(node->root);
+		kfree(node);
+	}
+}
+
+static inline void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
+					   struct btrfs_backref_edge *edge)
+{
+	if (edge) {
+		cache->nr_edges--;
+		kfree(edge);
+	}
+}
+
+static inline void btrfs_backref_unlock_node_buffer(
+		struct btrfs_backref_node *node)
+{
+	if (node->locked) {
+		btrfs_tree_unlock(node->eb);
+		node->locked = 0;
+	}
+}
+
+static inline void btrfs_backref_drop_node_buffer(
+		struct btrfs_backref_node *node)
+{
+	if (node->eb) {
+		btrfs_backref_unlock_node_buffer(node);
+		free_extent_buffer(node->eb);
+		node->eb = NULL;
+	}
+}
+
+/*
+ * Drop the backref node from cache without cleaning up its children
+ * edges.
+ *
+ * This can only be called on node without parent edges.
+ * The children edges are still kept as is.
+ */
+static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
+					   struct btrfs_backref_node *node)
+{
+	BUG_ON(!list_empty(&node->upper));
+
+	btrfs_backref_drop_node_buffer(node);
+	list_del(&node->list);
+	list_del(&node->lower);
+	if (!RB_EMPTY_NODE(&node->rb_node))
+		rb_erase(&node->rb_node, &tree->rb_root);
+	btrfs_backref_free_node(tree, node);
+}
+
+void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
+				struct btrfs_backref_node *node);
+
+void btrfs_backref_release_cache(struct btrfs_backref_cache *cache);
+
+static inline void btrfs_backref_panic(struct btrfs_fs_info *fs_info,
+				       u64 bytenr, int errno)
+{
+	btrfs_panic(fs_info, errno,
+		    "Inconsistency in backref cache found at offset %llu",
+		    bytenr);
+}
+
+int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
+				struct btrfs_path *path,
+				struct btrfs_backref_iter *iter,
+				struct btrfs_key *node_key,
+				struct btrfs_backref_node *cur);
+
+int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
+				     struct btrfs_backref_node *start);
+
+void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
+				 struct btrfs_backref_node *node);
+
 #endif
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 696f47103cfc..176e8a292fd1 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -7,7 +7,6 @@
 #include "disk-io.h"
 #include "free-space-cache.h"
 #include "free-space-tree.h"
-#include "disk-io.h"
 #include "volumes.h"
 #include "transaction.h"
 #include "ref-verify.h"
@@ -161,6 +160,8 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
 	struct rb_node *parent = NULL;
 	struct btrfs_block_group *cache;
 
+	ASSERT(block_group->length != 0);
+
 	spin_lock(&info->block_group_cache_lock);
 	p = &info->block_group_cache_tree.rb_node;
 
@@ -863,11 +864,34 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
 	}
 }
 
+static int remove_block_group_item(struct btrfs_trans_handle *trans,
+				   struct btrfs_path *path,
+				   struct btrfs_block_group *block_group)
+{
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	int ret;
+
+	root = fs_info->extent_root;
+	key.objectid = block_group->start;
+	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+	key.offset = block_group->length;
+
+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+	if (ret > 0)
+		ret = -ENOENT;
+	if (ret < 0)
+		return ret;
+
+	ret = btrfs_del_item(trans, root, path);
+	return ret;
+}
+
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 			     u64 group_start, struct extent_map *em)
 {
 	struct btrfs_fs_info *fs_info = trans->fs_info;
-	struct btrfs_root *root = fs_info->extent_root;
 	struct btrfs_path *path;
 	struct btrfs_block_group *block_group;
 	struct btrfs_free_cluster *cluster;
@@ -1065,26 +1089,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
 	spin_unlock(&block_group->space_info->lock);
 
-	key.objectid = block_group->start;
-	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
-	key.offset = block_group->length;
-
 	mutex_lock(&fs_info->chunk_mutex);
 	spin_lock(&block_group->lock);
 	block_group->removed = 1;
 	/*
-	 * At this point trimming can't start on this block group, because we
-	 * removed the block group from the tree fs_info->block_group_cache_tree
-	 * so no one can't find it anymore and even if someone already got this
-	 * block group before we removed it from the rbtree, they have already
-	 * incremented block_group->trimming - if they didn't, they won't find
-	 * any free space entries because we already removed them all when we
-	 * called btrfs_remove_free_space_cache().
+	 * At this point trimming or scrub can't start on this block group,
+	 * because we removed the block group from the rbtree
+	 * fs_info->block_group_cache_tree so no one can't find it anymore and
+	 * even if someone already got this block group before we removed it
+	 * from the rbtree, they have already incremented block_group->frozen -
+	 * if they didn't, for the trimming case they won't find any free space
+	 * entries because we already removed them all when we called
+	 * btrfs_remove_free_space_cache().
 	 *
 	 * And we must not remove the extent map from the fs_info->mapping_tree
 	 * to prevent the same logical address range and physical device space
-	 * ranges from being reused for a new block group. This is because our
-	 * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
+	 * ranges from being reused for a new block group. This is needed to
+	 * avoid races with trimming and scrub.
+	 *
+	 * An fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
 	 * completely transactionless, so while it is trimming a range the
 	 * currently running transaction might finish and a new one start,
 	 * allowing for new block groups to be created that can reuse the same
@@ -1095,7 +1118,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	 * in place until the extents have been discarded completely when
 	 * the transaction commit has completed.
 	 */
-	remove_em = (atomic_read(&block_group->trimming) == 0);
+	remove_em = (atomic_read(&block_group->frozen) == 0);
 	spin_unlock(&block_group->lock);
 
 	mutex_unlock(&fs_info->chunk_mutex);
@@ -1107,16 +1130,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	/* Once for the block groups rbtree */
 	btrfs_put_block_group(block_group);
 
-	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-	if (ret > 0)
-		ret = -EIO;
+	ret = remove_block_group_item(trans, path, block_group);
 	if (ret < 0)
 		goto out;
 
-	ret = btrfs_del_item(trans, root, path);
-	if (ret)
-		goto out;
-
 	if (remove_em) {
 		struct extent_map_tree *em_tree;
 
@@ -1175,7 +1192,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
 	free_extent_map(em);
 
 	return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
-							   num_items, 1);
+							   num_items);
 }
 
 /*
@@ -1284,25 +1301,17 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
 		ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
 					EXTENT_DIRTY);
 		if (ret)
-			goto err;
+			goto out;
 	}
 
 	ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
 				EXTENT_DIRTY);
-	if (ret)
-		goto err;
+out:
 	mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 	if (prev_trans)
 		btrfs_put_transaction(prev_trans);
 
-	return true;
-
-err:
-	mutex_unlock(&fs_info->unused_bg_unpin_mutex);
-	if (prev_trans)
-		btrfs_put_transaction(prev_trans);
-	btrfs_dec_block_group_ro(bg);
-	return false;
+	return ret == 0;
 }
 
 /*
@@ -1400,8 +1409,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 		 * We could have pending pinned extents for this block group,
 		 * just delete them, we don't care about them anymore.
 		 */
-		if (!clean_pinned_extents(trans, block_group))
+		if (!clean_pinned_extents(trans, block_group)) {
+			btrfs_dec_block_group_ro(block_group);
 			goto end_trans;
+		}
 
 		/*
 		 * At this point, the block_group is read only and should fail
@@ -1450,7 +1461,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 
 		/* Implicit trim during transaction commit. */
 		if (trimming)
-			btrfs_get_block_group_trimming(block_group);
+			btrfs_freeze_block_group(block_group);
 
 		/*
 		 * Btrfs_remove_chunk will abort the transaction if things go
@@ -1460,7 +1471,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 
 		if (ret) {
 			if (trimming)
-				btrfs_put_block_group_trimming(block_group);
+				btrfs_unfreeze_block_group(block_group);
 			goto end_trans;
 		}
 
@@ -1774,7 +1785,7 @@ static void link_block_group(struct btrfs_block_group *cache)
 }
 
 static struct btrfs_block_group *btrfs_create_block_group_cache(
-		struct btrfs_fs_info *fs_info, u64 start, u64 size)
+		struct btrfs_fs_info *fs_info, u64 start)
 {
 	struct btrfs_block_group *cache;
 
@@ -1790,7 +1801,6 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
 	}
 
 	cache->start = start;
-	cache->length = size;
 
 	cache->fs_info = fs_info;
 	cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
@@ -1809,7 +1819,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
 	INIT_LIST_HEAD(&cache->dirty_list);
 	INIT_LIST_HEAD(&cache->io_list);
 	btrfs_init_free_space_ctl(cache);
-	atomic_set(&cache->trimming, 0);
+	atomic_set(&cache->frozen, 0);
 	mutex_init(&cache->free_space_lock);
 	btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
 
@@ -1870,25 +1880,44 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
 	return ret;
 }
 
+static int read_block_group_item(struct btrfs_block_group *cache,
+				 struct btrfs_path *path,
+				 const struct btrfs_key *key)
+{
+	struct extent_buffer *leaf = path->nodes[0];
+	struct btrfs_block_group_item bgi;
+	int slot = path->slots[0];
+
+	cache->length = key->offset;
+
+	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+			   sizeof(bgi));
+	cache->used = btrfs_stack_block_group_used(&bgi);
+	cache->flags = btrfs_stack_block_group_flags(&bgi);
+
+	return 0;
+}
+
 static int read_one_block_group(struct btrfs_fs_info *info,
 				struct btrfs_path *path,
 				const struct btrfs_key *key,
 				int need_clear)
 {
-	struct extent_buffer *leaf = path->nodes[0];
 	struct btrfs_block_group *cache;
 	struct btrfs_space_info *space_info;
-	struct btrfs_block_group_item bgi;
 	const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
-	int slot = path->slots[0];
 	int ret;
 
 	ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY);
 
-	cache = btrfs_create_block_group_cache(info, key->objectid, key->offset);
+	cache = btrfs_create_block_group_cache(info, key->objectid);
 	if (!cache)
 		return -ENOMEM;
 
+	ret = read_block_group_item(cache, path, key);
+	if (ret < 0)
+		goto error;
+
 	if (need_clear) {
 		/*
 		 * When we mount with old space cache, we need to
@@ -1903,10 +1932,6 @@ static int read_one_block_group(struct btrfs_fs_info *info,
 		if (btrfs_test_opt(info, SPACE_CACHE))
 			cache->disk_cache_state = BTRFS_DC_CLEAR;
 	}
-	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
-			   sizeof(bgi));
-	cache->used = btrfs_stack_block_group_used(&bgi);
-	cache->flags = btrfs_stack_block_group_flags(&bgi);
 	if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
 	    (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
 			btrfs_err(info,
@@ -1934,15 +1959,15 @@ static int read_one_block_group(struct btrfs_fs_info *info,
 	 * are empty, and we can just add all the space in and be done with it.
 	 * This saves us _a_lot_ of time, particularly in the full case.
 	 */
-	if (key->offset == cache->used) {
+	if (cache->length == cache->used) {
 		cache->last_byte_to_unpin = (u64)-1;
 		cache->cached = BTRFS_CACHE_FINISHED;
 		btrfs_free_excluded_extents(cache);
 	} else if (cache->used == 0) {
 		cache->last_byte_to_unpin = (u64)-1;
 		cache->cached = BTRFS_CACHE_FINISHED;
-		add_new_free_space(cache, key->objectid,
-				   key->objectid + key->offset);
+		add_new_free_space(cache, cache->start,
+				   cache->start + cache->length);
 		btrfs_free_excluded_extents(cache);
 	}
 
@@ -1952,7 +1977,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
 		goto error;
 	}
 	trace_btrfs_add_block_group(info, cache, 0);
-	btrfs_update_space_info(info, cache->flags, key->offset,
+	btrfs_update_space_info(info, cache->flags, cache->length,
 				cache->used, cache->bytes_super, &space_info);
 
 	cache->space_info = space_info;
@@ -1991,7 +2016,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
-	path->reada = READA_FORWARD;
 
 	cache_gen = btrfs_super_cache_generation(info->super_copy);
 	if (btrfs_test_opt(info, SPACE_CACHE) &&
@@ -2046,13 +2070,32 @@ error:
 	return ret;
 }
 
+static int insert_block_group_item(struct btrfs_trans_handle *trans,
+				   struct btrfs_block_group *block_group)
+{
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+	struct btrfs_block_group_item bgi;
+	struct btrfs_root *root;
+	struct btrfs_key key;
+
+	spin_lock(&block_group->lock);
+	btrfs_set_stack_block_group_used(&bgi, block_group->used);
+	btrfs_set_stack_block_group_chunk_objectid(&bgi,
+				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+	btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
+	key.objectid = block_group->start;
+	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+	key.offset = block_group->length;
+	spin_unlock(&block_group->lock);
+
+	root = fs_info->extent_root;
+	return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
+}
+
 void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
 {
 	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_block_group *block_group;
-	struct btrfs_root *extent_root = fs_info->extent_root;
-	struct btrfs_block_group_item item;
-	struct btrfs_key key;
 	int ret = 0;
 
 	if (!trans->can_flush_pending_bgs)
@@ -2065,21 +2108,11 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
 		if (ret)
 			goto next;
 
-		spin_lock(&block_group->lock);
-		btrfs_set_stack_block_group_used(&item, block_group->used);
-		btrfs_set_stack_block_group_chunk_objectid(&item,
-				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
-		btrfs_set_stack_block_group_flags(&item, block_group->flags);
-		key.objectid = block_group->start;
-		key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
-		key.offset = block_group->length;
-		spin_unlock(&block_group->lock);
-
-		ret = btrfs_insert_item(trans, extent_root, &key, &item,
-					sizeof(item));
+		ret = insert_block_group_item(trans, block_group);
 		if (ret)
 			btrfs_abort_transaction(trans, ret);
-		ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
+		ret = btrfs_finish_chunk_alloc(trans, block_group->start,
+					block_group->length);
 		if (ret)
 			btrfs_abort_transaction(trans, ret);
 		add_block_group_free_space(trans, block_group);
@@ -2100,10 +2133,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
 
 	btrfs_set_log_full_commit(trans);
 
-	cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
+	cache = btrfs_create_block_group_cache(fs_info, chunk_offset);
 	if (!cache)
 		return -ENOMEM;
 
+	cache->length = size;
 	cache->used = bytes_used;
 	cache->flags = type;
 	cache->last_byte_to_unpin = (u64)-1;
@@ -2314,13 +2348,13 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
 	spin_unlock(&sinfo->lock);
 }
 
-static int write_one_cache_group(struct btrfs_trans_handle *trans,
-				 struct btrfs_path *path,
-				 struct btrfs_block_group *cache)
+static int update_block_group_item(struct btrfs_trans_handle *trans,
+				   struct btrfs_path *path,
+				   struct btrfs_block_group *cache)
 {
 	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret;
-	struct btrfs_root *extent_root = fs_info->extent_root;
+	struct btrfs_root *root = fs_info->extent_root;
 	unsigned long bi;
 	struct extent_buffer *leaf;
 	struct btrfs_block_group_item bgi;
@@ -2330,7 +2364,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
 	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
 	key.offset = cache->length;
 
-	ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
+	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 	if (ret) {
 		if (ret > 0)
 			ret = -ENOENT;
@@ -2642,7 +2676,7 @@ again:
 			}
 		}
 		if (!ret) {
-			ret = write_one_cache_group(trans, path, cache);
+			ret = update_block_group_item(trans, path, cache);
 			/*
 			 * Our block group might still be attached to the list
 			 * of new block groups in the transaction handle of some
@@ -2791,7 +2825,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
 			}
 		}
 		if (!ret) {
-			ret = write_one_cache_group(trans, path, cache);
+			ret = update_block_group_item(trans, path, cache);
 			/*
 			 * One of the free space endio workers might have
 			 * created a new block group while updating a free space
@@ -2808,7 +2842,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
 			if (ret == -ENOENT) {
 				wait_event(cur_trans->writer_wait,
 				   atomic_read(&cur_trans->num_writers) == 1);
-				ret = write_one_cache_group(trans, path, cache);
+				ret = update_block_group_item(trans, path, cache);
 			}
 			if (ret)
 				btrfs_abort_transaction(trans, ret);
@@ -3384,3 +3418,44 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 	}
 	return 0;
 }
+
+void btrfs_freeze_block_group(struct btrfs_block_group *cache)
+{
+	atomic_inc(&cache->frozen);
+}
+
+void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
+{
+	struct btrfs_fs_info *fs_info = block_group->fs_info;
+	struct extent_map_tree *em_tree;
+	struct extent_map *em;
+	bool cleanup;
+
+	spin_lock(&block_group->lock);
+	cleanup = (atomic_dec_and_test(&block_group->frozen) &&
+		   block_group->removed);
+	spin_unlock(&block_group->lock);
+
+	if (cleanup) {
+		mutex_lock(&fs_info->chunk_mutex);
+		em_tree = &fs_info->mapping_tree;
+		write_lock(&em_tree->lock);
+		em = lookup_extent_mapping(em_tree, block_group->start,
+					   1);
+		BUG_ON(!em); /* logic error, can't happen */
+		remove_extent_mapping(em_tree, em);
+		write_unlock(&em_tree->lock);
+		mutex_unlock(&fs_info->chunk_mutex);
+
+		/* once for us and once for the tree */
+		free_extent_map(em);
+		free_extent_map(em);
+
+		/*
+		 * We may have left one free space entry and other possible
+		 * tasks trimming this block group have left 1 entry each one.
+		 * Free them if any.
+		 */
+		__btrfs_remove_free_space_cache(block_group->free_space_ctl);
+	}
+}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 107bb557ca8d..b6ee70a039c7 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -129,8 +129,17 @@ struct btrfs_block_group {
 	/* For read-only block groups */
 	struct list_head ro_list;
 
+	/*
+	 * When non-zero it means the block group's logical address and its
+	 * device extents can not be reused for future block group allocations
+	 * until the counter goes down to 0. This is to prevent them from being
+	 * reused while some task is still using the block group after it was
+	 * deleted - we want to make sure they can only be reused for new block
+	 * groups after that task is done with the deleted block group.
+	 */
+	atomic_t frozen;
+
 	/* For discard operations */
-	atomic_t trimming;
 	struct list_head discard_list;
 	int discard_index;
 	u64 discard_eligible_time;
@@ -283,6 +292,9 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
 		cache->cached == BTRFS_CACHE_ERROR;
 }
 
+void btrfs_freeze_block_group(struct btrfs_block_group *cache);
+void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
+
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
 		     u64 physical, u64 **logical, int *naddrs, int *stripe_len);
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 27efec8f7c5b..7e1549a84fcc 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -5,6 +5,7 @@
 #include "block-rsv.h"
 #include "space-info.h"
 #include "transaction.h"
+#include "block-group.h"
 
 /*
  * HOW DO BLOCK RESERVES WORK
@@ -405,6 +406,8 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
 	else
 		block_rsv->full = 0;
 
+	if (block_rsv->size >= sinfo->total_bytes)
+		sinfo->force_alloc = CHUNK_ALLOC_FORCE;
 	spin_unlock(&block_rsv->lock);
 	spin_unlock(&sinfo->lock);
 }
@@ -455,7 +458,7 @@ static struct btrfs_block_rsv *get_block_rsv(
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_block_rsv *block_rsv = NULL;
 
-	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+	if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
 	    (root == fs_info->csum_root && trans->adding_csums) ||
 	    (root == fs_info->uuid_root))
 		block_rsv = trans->block_rsv;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 27a1fefce508..aeff56a0e105 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -7,6 +7,7 @@
 #define BTRFS_INODE_H
 
 #include <linux/hash.h>
+#include <linux/refcount.h>
 #include "extent_map.h"
 #include "extent_io.h"
 #include "ordered-data.h"
@@ -27,7 +28,6 @@ enum {
 	BTRFS_INODE_NEEDS_FULL_SYNC,
 	BTRFS_INODE_COPY_EVERYTHING,
 	BTRFS_INODE_IN_DELALLOC_LIST,
-	BTRFS_INODE_READDIO_NEED_LOCK,
 	BTRFS_INODE_HAS_PROPS,
 	BTRFS_INODE_SNAPSHOT_FLUSH,
 };
@@ -293,53 +293,25 @@ static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
 	return ret;
 }
 
-#define BTRFS_DIO_ORIG_BIO_SUBMITTED	0x1
-
 struct btrfs_dio_private {
 	struct inode *inode;
-	unsigned long flags;
 	u64 logical_offset;
 	u64 disk_bytenr;
 	u64 bytes;
-	void *private;
-
-	/* number of bios pending for this dio */
-	atomic_t pending_bios;
 
-	/* IO errors */
-	int errors;
-
-	/* orig_bio is our btrfs_io_bio */
-	struct bio *orig_bio;
+	/*
+	 * References to this structure. There is one reference per in-flight
+	 * bio plus one while we're still setting up.
+	 */
+	refcount_t refs;
 
 	/* dio_bio came from fs/direct-io.c */
 	struct bio *dio_bio;
 
-	/*
-	 * The original bio may be split to several sub-bios, this is
-	 * done during endio of sub-bios
-	 */
-	blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
-			blk_status_t);
+	/* Array of checksums */
+	u8 csums[];
 };
 
-/*
- * Disable DIO read nolock optimization, so new dio readers will be forced
- * to grab i_mutex. It is used to avoid the endless truncate due to
- * nonlocked dio read.
- */
-static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
-{
-	set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
-	smp_mb();
-}
-
-static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
-{
-	smp_mb__before_atomic();
-	clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
-}
-
 /* Array of bytes with variable length, hexadecimal format 0x1234 */
 #define CSUM_FMT				"0x%*phN"
 #define CSUM_FMT_VALUE(size, bytes)		size, bytes
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9ab610cc9114..c6e648603f85 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -194,11 +194,9 @@ static int check_compressed_csum(struct btrfs_inode *inode,
 	for (i = 0; i < cb->nr_pages; i++) {
 		page = cb->compressed_pages[i];
 
-		crypto_shash_init(shash);
 		kaddr = kmap_atomic(page);
-		crypto_shash_update(shash, kaddr, PAGE_SIZE);
+		crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
 		kunmap_atomic(kaddr);
-		crypto_shash_final(shash, (u8 *)&csum);
 
 		if (memcmp(&csum, cb_sum, csum_size)) {
 			btrfs_print_data_csum_error(inode, disk_start,
@@ -1142,6 +1140,22 @@ static void put_workspace(int type, struct list_head *ws)
 }
 
 /*
+ * Adjust @level according to the limits of the compression algorithm or
+ * fallback to default
+ */
+static unsigned int btrfs_compress_set_level(int type, unsigned level)
+{
+	const struct btrfs_compress_op *ops = btrfs_compress_op[type];
+
+	if (level == 0)
+		level = ops->default_level;
+	else
+		level = min(level, ops->max_level);
+
+	return level;
+}
+
+/*
  * Given an address space and start and length, compress the bytes into @pages
  * that are allocated on demand.
  *
@@ -1748,19 +1762,3 @@ unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
 
 	return level;
 }
-
-/*
- * Adjust @level according to the limits of the compression algorithm or
- * fallback to default
- */
-unsigned int btrfs_compress_set_level(int type, unsigned level)
-{
-	const struct btrfs_compress_op *ops = btrfs_compress_op[type];
-
-	if (level == 0)
-		level = ops->default_level;
-	else
-		level = min(level, ops->max_level);
-
-	return level;
-}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index d253f7aa8ed5..284a3ad31350 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -140,8 +140,6 @@ extern const struct btrfs_compress_op btrfs_zstd_compress;
 const char* btrfs_compress_type2str(enum btrfs_compression_type type);
 bool btrfs_compress_is_valid_type(const char *str, size_t len);
 
-unsigned int btrfs_compress_set_level(int type, unsigned level);
-
 int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
 
 #endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index bfedbbe2311f..3a7648bff42c 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -144,9 +144,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
 	return eb;
 }
 
-/* cowonly root (everything not a reference counted cow subvolume), just get
- * put onto a simple dirty list.  transaction.c walks this to make sure they
- * get properly updated on disk.
+/*
+ * Cowonly root (not-shareable trees, everything not subvolume or reloc roots),
+ * just get put onto a simple dirty list.  Transaction walks this list to make
+ * sure they get properly updated on disk.
  */
 static void add_root_to_dirty_list(struct btrfs_root *root)
 {
@@ -185,9 +186,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 	int level;
 	struct btrfs_disk_key disk_key;
 
-	WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
 		trans->transid != fs_info->running_transaction->transid);
-	WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
 		trans->transid != root->last_trans);
 
 	level = btrfs_header_level(buf);
@@ -826,12 +827,11 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
 			      struct extent_buffer *buf)
 {
 	/*
-	 * Tree blocks not in reference counted trees and tree roots
-	 * are never shared. If a block was allocated after the last
-	 * snapshot and the block was not allocated by tree relocation,
-	 * we know the block is not shared.
+	 * Tree blocks not in shareable trees and tree roots are never shared.
+	 * If a block was allocated after the last snapshot and the block was
+	 * not allocated by tree relocation, we know the block is not shared.
 	 */
-	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+	if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
 	    buf != root->node && buf != root->commit_root &&
 	    (btrfs_header_generation(buf) <=
 	     btrfs_root_last_snapshot(&root->root_item) ||
@@ -1024,9 +1024,9 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 
 	btrfs_assert_tree_locked(buf);
 
-	WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
 		trans->transid != fs_info->running_transaction->transid);
-	WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
 		trans->transid != root->last_trans);
 
 	level = btrfs_header_level(buf);
@@ -1065,7 +1065,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 		return ret;
 	}
 
-	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
+	if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
 		ret = btrfs_reloc_cow_block(trans, root, buf, cow);
 		if (ret) {
 			btrfs_abort_transaction(trans, ret);
@@ -1668,15 +1668,8 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
 {
 	int low = 0;
 	int high = max;
-	int mid;
 	int ret;
-	struct btrfs_disk_key *tmp = NULL;
-	struct btrfs_disk_key unaligned;
-	unsigned long offset;
-	char *kaddr = NULL;
-	unsigned long map_start = 0;
-	unsigned long map_len = 0;
-	int err;
+	const int key_size = sizeof(struct btrfs_disk_key);
 
 	if (low > high) {
 		btrfs_err(eb->fs_info,
@@ -1687,32 +1680,26 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
 	}
 
 	while (low < high) {
+		unsigned long oip;
+		unsigned long offset;
+		struct btrfs_disk_key *tmp;
+		struct btrfs_disk_key unaligned;
+		int mid;
+
 		mid = (low + high) / 2;
 		offset = p + mid * item_size;
+		oip = offset_in_page(offset);
 
-		if (!kaddr || offset < map_start ||
-		    (offset + sizeof(struct btrfs_disk_key)) >
-		    map_start + map_len) {
-
-			err = map_private_extent_buffer(eb, offset,
-						sizeof(struct btrfs_disk_key),
-						&kaddr, &map_start, &map_len);
-
-			if (!err) {
-				tmp = (struct btrfs_disk_key *)(kaddr + offset -
-							map_start);
-			} else if (err == 1) {
-				read_extent_buffer(eb, &unaligned,
-						   offset, sizeof(unaligned));
-				tmp = &unaligned;
-			} else {
-				return err;
-			}
+		if (oip + key_size <= PAGE_SIZE) {
+			const unsigned long idx = offset >> PAGE_SHIFT;
+			char *kaddr = page_address(eb->pages[idx]);
 
+			tmp = (struct btrfs_disk_key *)(kaddr + oip);
 		} else {
-			tmp = (struct btrfs_disk_key *)(kaddr + offset -
-							map_start);
+			read_extent_buffer(eb, &unaligned, offset, key_size);
+			tmp = &unaligned;
 		}
+
 		ret = comp_keys(tmp, key);
 
 		if (ret < 0)
@@ -1733,9 +1720,9 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
  * leaves vs nodes
  */
 int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
-		     int level, int *slot)
+		     int *slot)
 {
-	if (level == 0)
+	if (btrfs_header_level(eb) == 0)
 		return generic_bin_search(eb,
 					  offsetof(struct btrfs_leaf, items),
 					  sizeof(struct btrfs_item),
@@ -2348,16 +2335,15 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	u64 blocknr;
 	u64 gen;
-	struct extent_buffer *b = *eb_ret;
 	struct extent_buffer *tmp;
 	struct btrfs_key first_key;
 	int ret;
 	int parent_level;
 
-	blocknr = btrfs_node_blockptr(b, slot);
-	gen = btrfs_node_ptr_generation(b, slot);
-	parent_level = btrfs_header_level(b);
-	btrfs_node_key_to_cpu(b, &first_key, slot);
+	blocknr = btrfs_node_blockptr(*eb_ret, slot);
+	gen = btrfs_node_ptr_generation(*eb_ret, slot);
+	parent_level = btrfs_header_level(*eb_ret);
+	btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
 
 	tmp = find_extent_buffer(fs_info, blocknr);
 	if (tmp) {
@@ -2501,19 +2487,6 @@ done:
 	return ret;
 }
 
-static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
-		      int level, int *prev_cmp, int *slot)
-{
-	if (*prev_cmp != 0) {
-		*prev_cmp = btrfs_bin_search(b, key, level, slot);
-		return *prev_cmp;
-	}
-
-	*slot = 0;
-
-	return 0;
-}
-
 int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
 		u64 iobjectid, u64 ioff, u8 key_type,
 		struct btrfs_key *found_key)
@@ -2783,9 +2756,23 @@ cow_done:
 			}
 		}
 
-		ret = key_search(b, key, level, &prev_cmp, &slot);
-		if (ret < 0)
-			goto done;
+		/*
+		 * If btrfs_bin_search returns an exact match (prev_cmp == 0)
+		 * we can safely assume the target key will always be in slot 0
+		 * on lower levels due to the invariants BTRFS' btree provides,
+		 * namely that a btrfs_key_ptr entry always points to the
+		 * lowest key in the child node, thus we can skip searching
+		 * lower levels
+		 */
+		if (prev_cmp == 0) {
+			slot = 0;
+			ret = 0;
+		} else {
+			ret = btrfs_bin_search(b, key, &slot);
+			prev_cmp = ret;
+			if (ret < 0)
+				goto done;
+		}
 
 		if (level == 0) {
 			p->slots[level] = slot;
@@ -2909,7 +2896,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
 	int level;
 	int lowest_unlock = 1;
 	u8 lowest_level = 0;
-	int prev_cmp = -1;
 
 	lowest_level = p->lowest_level;
 	WARN_ON(p->nodes[0] != NULL);
@@ -2942,12 +2928,7 @@ again:
 		 */
 		btrfs_unlock_up_safe(p, level + 1);
 
-		/*
-		 * Since we can unwind ebs we want to do a real search every
-		 * time.
-		 */
-		prev_cmp = -1;
-		ret = key_search(b, key, level, &prev_cmp, &slot);
+		ret = btrfs_bin_search(b, key, &slot);
 		if (ret < 0)
 			goto done;
 
@@ -3507,19 +3488,17 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
 {
 	struct btrfs_item *start_item;
 	struct btrfs_item *end_item;
-	struct btrfs_map_token token;
 	int data_len;
 	int nritems = btrfs_header_nritems(l);
 	int end = min(nritems, start + nr) - 1;
 
 	if (!nr)
 		return 0;
-	btrfs_init_map_token(&token, l);
 	start_item = btrfs_item_nr(start);
 	end_item = btrfs_item_nr(end);
-	data_len = btrfs_token_item_offset(l, start_item, &token) +
-		btrfs_token_item_size(l, start_item, &token);
-	data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
+	data_len = btrfs_item_offset(l, start_item) +
+		   btrfs_item_size(l, start_item);
+	data_len = data_len - btrfs_item_offset(l, end_item);
 	data_len += sizeof(struct btrfs_item) * nr;
 	WARN_ON(data_len < 0);
 	return data_len;
@@ -3650,8 +3629,8 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
 	push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
 	for (i = 0; i < right_nritems; i++) {
 		item = btrfs_item_nr(i);
-		push_space -= btrfs_token_item_size(right, item, &token);
-		btrfs_set_token_item_offset(right, item, push_space, &token);
+		push_space -= btrfs_token_item_size(&token, item);
+		btrfs_set_token_item_offset(&token, item, push_space);
 	}
 
 	left_nritems -= push_items;
@@ -3859,10 +3838,9 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
 
 		item = btrfs_item_nr(i);
 
-		ioff = btrfs_token_item_offset(left, item, &token);
-		btrfs_set_token_item_offset(left, item,
-		      ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size),
-		      &token);
+		ioff = btrfs_token_item_offset(&token, item);
+		btrfs_set_token_item_offset(&token, item,
+		      ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
 	}
 	btrfs_set_header_nritems(left, old_left_nritems + push_items);
 
@@ -3892,9 +3870,8 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
 	for (i = 0; i < right_nritems; i++) {
 		item = btrfs_item_nr(i);
 
-		push_space = push_space - btrfs_token_item_size(right,
-								item, &token);
-		btrfs_set_token_item_offset(right, item, push_space, &token);
+		push_space = push_space - btrfs_token_item_size(&token, item);
+		btrfs_set_token_item_offset(&token, item, push_space);
 	}
 
 	btrfs_mark_buffer_dirty(left);
@@ -4036,9 +4013,8 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
 		struct btrfs_item *item = btrfs_item_nr(i);
 		u32 ioff;
 
-		ioff = btrfs_token_item_offset(right, item, &token);
-		btrfs_set_token_item_offset(right, item,
-					    ioff + rt_data_off, &token);
+		ioff = btrfs_token_item_offset(&token, item);
+		btrfs_set_token_item_offset(&token, item, ioff + rt_data_off);
 	}
 
 	btrfs_set_header_nritems(l, mid);
@@ -4541,9 +4517,8 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
 		u32 ioff;
 		item = btrfs_item_nr(i);
 
-		ioff = btrfs_token_item_offset(leaf, item, &token);
-		btrfs_set_token_item_offset(leaf, item,
-					    ioff + size_diff, &token);
+		ioff = btrfs_token_item_offset(&token, item);
+		btrfs_set_token_item_offset(&token, item, ioff + size_diff);
 	}
 
 	/* shift the data */
@@ -4640,9 +4615,8 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
 		u32 ioff;
 		item = btrfs_item_nr(i);
 
-		ioff = btrfs_token_item_offset(leaf, item, &token);
-		btrfs_set_token_item_offset(leaf, item,
-					    ioff - data_size, &token);
+		ioff = btrfs_token_item_offset(&token, item);
+		btrfs_set_token_item_offset(&token, item, ioff - data_size);
 	}
 
 	/* shift the data */
@@ -4718,9 +4692,9 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
 			u32 ioff;
 
 			item = btrfs_item_nr(i);
-			ioff = btrfs_token_item_offset(leaf, item, &token);
-			btrfs_set_token_item_offset(leaf, item,
-						    ioff - total_data, &token);
+			ioff = btrfs_token_item_offset(&token, item);
+			btrfs_set_token_item_offset(&token, item,
+						    ioff - total_data);
 		}
 		/* shift the items */
 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -4739,10 +4713,9 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
 		btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
 		btrfs_set_item_key(leaf, &disk_key, slot + i);
 		item = btrfs_item_nr(slot + i);
-		btrfs_set_token_item_offset(leaf, item,
-					    data_end - data_size[i], &token);
+		btrfs_set_token_item_offset(&token, item, data_end - data_size[i]);
 		data_end -= data_size[i];
-		btrfs_set_token_item_size(leaf, item, data_size[i], &token);
+		btrfs_set_token_item_size(&token, item, data_size[i]);
 	}
 
 	btrfs_set_header_nritems(leaf, nritems + nr);
@@ -4930,9 +4903,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 			u32 ioff;
 
 			item = btrfs_item_nr(i);
-			ioff = btrfs_token_item_offset(leaf, item, &token);
-			btrfs_set_token_item_offset(leaf, item,
-						    ioff + dsize, &token);
+			ioff = btrfs_token_item_offset(&token, item);
+			btrfs_set_token_item_offset(&token, item, ioff + dsize);
 		}
 
 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
@@ -5103,7 +5075,7 @@ again:
 	while (1) {
 		nritems = btrfs_header_nritems(cur);
 		level = btrfs_header_level(cur);
-		sret = btrfs_bin_search(cur, min_key, level, &slot);
+		sret = btrfs_bin_search(cur, min_key, &slot);
 		if (sret < 0) {
 			ret = sret;
 			goto out;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8aa7b9dac405..161533040978 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -28,6 +28,7 @@
 #include <linux/dynamic_debug.h>
 #include <linux/refcount.h>
 #include <linux/crc32c.h>
+#include <linux/iomap.h>
 #include "extent-io-tree.h"
 #include "extent_io.h"
 #include "extent_map.h"
@@ -582,6 +583,7 @@ struct btrfs_fs_info {
 	struct btrfs_root *quota_root;
 	struct btrfs_root *uuid_root;
 	struct btrfs_root *free_space_root;
+	struct btrfs_root *data_reloc_root;
 
 	/* the log root tree is a directory of all the other log roots */
 	struct btrfs_root *log_root_tree;
@@ -758,7 +760,6 @@ struct btrfs_fs_info {
 	struct btrfs_workqueue *endio_workers;
 	struct btrfs_workqueue *endio_meta_workers;
 	struct btrfs_workqueue *endio_raid56_workers;
-	struct btrfs_workqueue *endio_repair_workers;
 	struct btrfs_workqueue *rmw_workers;
 	struct btrfs_workqueue *endio_meta_write_workers;
 	struct btrfs_workqueue *endio_write_workers;
@@ -970,7 +971,28 @@ enum {
 	 * is used to tell us when more checks are required
 	 */
 	BTRFS_ROOT_IN_TRANS_SETUP,
-	BTRFS_ROOT_REF_COWS,
+
+	/*
+	 * Set if tree blocks of this root can be shared by other roots.
+	 * Only subvolume trees and their reloc trees have this bit set.
+	 * Conflicts with TRACK_DIRTY bit.
+	 *
+	 * This affects two things:
+	 *
+	 * - How balance works
+	 *   For shareable roots, we need to use reloc tree and do path
+	 *   replacement for balance, and need various pre/post hooks for
+	 *   snapshot creation to handle them.
+	 *
+	 *   While for non-shareable trees, we just simply do a tree search
+	 *   with COW.
+	 *
+	 * - How dirty roots are tracked
+	 *   For shareable roots, btrfs_record_root_in_trans() is needed to
+	 *   track them, while non-subvolume roots have TRACK_DIRTY bit, they
+	 *   don't need to set this manually.
+	 */
+	BTRFS_ROOT_SHAREABLE,
 	BTRFS_ROOT_TRACK_DIRTY,
 	BTRFS_ROOT_IN_RADIX,
 	BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
@@ -1056,7 +1078,7 @@ struct btrfs_root {
 	struct btrfs_key defrag_progress;
 	struct btrfs_key defrag_max;
 
-	/* the dirty list is only used by non-reference counted roots */
+	/* The dirty list is only used by non-shareable roots */
 	struct list_head dirty_list;
 
 	struct list_head root_list;
@@ -1146,6 +1168,9 @@ struct btrfs_root {
 	/* Record pairs of swapped blocks for qgroup */
 	struct btrfs_qgroup_swapped_blocks swapped_blocks;
 
+	/* Used only by log trees, when logging csum items */
+	struct extent_io_tree log_csum_range;
+
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 	u64 alloc_bytenr;
 #endif
@@ -1341,7 +1366,7 @@ do {                                                                   \
 	 BTRFS_INODE_ROOT_ITEM_INIT)
 
 struct btrfs_map_token {
-	const struct extent_buffer *eb;
+	struct extent_buffer *eb;
 	char *kaddr;
 	unsigned long offset;
 };
@@ -1353,7 +1378,8 @@ static inline void btrfs_init_map_token(struct btrfs_map_token *token,
 					struct extent_buffer *eb)
 {
 	token->eb = eb;
-	token->kaddr = NULL;
+	token->kaddr = page_address(eb->pages[0]);
+	token->offset = 0;
 }
 
 /* some macros to generate set/get functions for the struct fields.  This
@@ -1377,15 +1403,14 @@ static inline void btrfs_init_map_token(struct btrfs_map_token *token,
 			   sizeof(((type *)0)->member)))
 
 #define DECLARE_BTRFS_SETGET_BITS(bits)					\
-u##bits btrfs_get_token_##bits(const struct extent_buffer *eb,		\
-			       const void *ptr, unsigned long off,	\
-			       struct btrfs_map_token *token);		\
-void btrfs_set_token_##bits(struct extent_buffer *eb, const void *ptr,	\
-			    unsigned long off, u##bits val,		\
-			    struct btrfs_map_token *token);		\
+u##bits btrfs_get_token_##bits(struct btrfs_map_token *token,		\
+			       const void *ptr, unsigned long off);	\
+void btrfs_set_token_##bits(struct btrfs_map_token *token,		\
+			    const void *ptr, unsigned long off,		\
+			    u##bits val);				\
 u##bits btrfs_get_##bits(const struct extent_buffer *eb,		\
 			 const void *ptr, unsigned long off);		\
-void btrfs_set_##bits(struct extent_buffer *eb, void *ptr,		\
+void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr,	\
 		      unsigned long off, u##bits val);
 
 DECLARE_BTRFS_SETGET_BITS(8)
@@ -1400,25 +1425,23 @@ static inline u##bits btrfs_##name(const struct extent_buffer *eb,	\
 	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
 	return btrfs_get_##bits(eb, s, offsetof(type, member));		\
 }									\
-static inline void btrfs_set_##name(struct extent_buffer *eb, type *s,	\
+static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
 				    u##bits val)			\
 {									\
 	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
 	btrfs_set_##bits(eb, s, offsetof(type, member), val);		\
 }									\
-static inline u##bits btrfs_token_##name(const struct extent_buffer *eb,\
-					 const type *s,			\
-					 struct btrfs_map_token *token)	\
+static inline u##bits btrfs_token_##name(struct btrfs_map_token *token,	\
+					 const type *s)			\
 {									\
 	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
-	return btrfs_get_token_##bits(eb, s, offsetof(type, member), token); \
+	return btrfs_get_token_##bits(token, s, offsetof(type, member));\
 }									\
-static inline void btrfs_set_token_##name(struct extent_buffer *eb,	\
-					  type *s, u##bits val,		\
-                                         struct btrfs_map_token *token)	\
+static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
+					  type *s, u##bits val)		\
 {									\
 	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
-	btrfs_set_token_##bits(eb, s, offsetof(type, member), val, token); \
+	btrfs_set_token_##bits(token, s, offsetof(type, member), val);	\
 }
 
 #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)		\
@@ -1428,7 +1451,7 @@ static inline u##bits btrfs_##name(const struct extent_buffer *eb)	\
 	u##bits res = le##bits##_to_cpu(p->member);			\
 	return res;							\
 }									\
-static inline void btrfs_set_##name(struct extent_buffer *eb,		\
+static inline void btrfs_set_##name(const struct extent_buffer *eb,	\
 				    u##bits val)			\
 {									\
 	type *p = page_address(eb->pages[0]);				\
@@ -1446,7 +1469,7 @@ static inline void btrfs_set_##name(type *s, u##bits val)		\
 }
 
 
-static inline u64 btrfs_device_total_bytes(struct extent_buffer *eb,
+static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
 					   struct btrfs_dev_item *s)
 {
 	BUILD_BUG_ON(sizeof(u64) !=
@@ -1454,7 +1477,7 @@ static inline u64 btrfs_device_total_bytes(struct extent_buffer *eb,
 	return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
 					    total_bytes));
 }
-static inline void btrfs_set_device_total_bytes(struct extent_buffer *eb,
+static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
 						struct btrfs_dev_item *s,
 						u64 val)
 {
@@ -1558,13 +1581,13 @@ static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
 	return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
 }
 
-static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
+static inline u64 btrfs_stripe_offset_nr(const struct extent_buffer *eb,
 					 struct btrfs_chunk *c, int nr)
 {
 	return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
 }
 
-static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
+static inline u64 btrfs_stripe_devid_nr(const struct extent_buffer *eb,
 					 struct btrfs_chunk *c, int nr)
 {
 	return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
@@ -1644,31 +1667,21 @@ BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
 BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
 		   chunk_offset, 64);
 BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
-
-static inline unsigned long btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
-{
-	unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
-	return (unsigned long)dev + ptr;
-}
-
 BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
 BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
 		   generation, 64);
 BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
 
-BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
-
-
 BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
 
-static inline void btrfs_tree_block_key(struct extent_buffer *eb,
+static inline void btrfs_tree_block_key(const struct extent_buffer *eb,
 					struct btrfs_tree_block_info *item,
 					struct btrfs_disk_key *key)
 {
 	read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
 }
 
-static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
+static inline void btrfs_set_tree_block_key(const struct extent_buffer *eb,
 					    struct btrfs_tree_block_info *item,
 					    struct btrfs_disk_key *key)
 {
@@ -1706,12 +1719,6 @@ static inline u32 btrfs_extent_inline_ref_size(int type)
 	return 0;
 }
 
-BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
-BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
-		   generation, 64);
-BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
-BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
-
 /* struct btrfs_node */
 BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
 BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
@@ -1720,7 +1727,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr,
 BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr,
 			 generation, 64);
 
-static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
+static inline u64 btrfs_node_blockptr(const struct extent_buffer *eb, int nr)
 {
 	unsigned long ptr;
 	ptr = offsetof(struct btrfs_node, ptrs) +
@@ -1728,7 +1735,7 @@ static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
 	return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
 }
 
-static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
+static inline void btrfs_set_node_blockptr(const struct extent_buffer *eb,
 					   int nr, u64 val)
 {
 	unsigned long ptr;
@@ -1737,7 +1744,7 @@ static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
 	btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
 }
 
-static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
+static inline u64 btrfs_node_ptr_generation(const struct extent_buffer *eb, int nr)
 {
 	unsigned long ptr;
 	ptr = offsetof(struct btrfs_node, ptrs) +
@@ -1745,7 +1752,7 @@ static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
 	return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
 }
 
-static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
+static inline void btrfs_set_node_ptr_generation(const struct extent_buffer *eb,
 						 int nr, u64 val)
 {
 	unsigned long ptr;
@@ -1763,7 +1770,7 @@ static inline unsigned long btrfs_node_key_ptr_offset(int nr)
 void btrfs_node_key(const struct extent_buffer *eb,
 		    struct btrfs_disk_key *disk_key, int nr);
 
-static inline void btrfs_set_node_key(struct extent_buffer *eb,
+static inline void btrfs_set_node_key(const struct extent_buffer *eb,
 				      struct btrfs_disk_key *disk_key, int nr)
 {
 	unsigned long ptr;
@@ -2498,8 +2505,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 			 struct btrfs_ref *generic_ref);
 
 int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
-void btrfs_get_block_group_trimming(struct btrfs_block_group *cache);
-void btrfs_put_block_group_trimming(struct btrfs_block_group *cache);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
 
 enum btrfs_reserve_flush_enum {
@@ -2512,6 +2517,7 @@ enum btrfs_reserve_flush_enum {
 	BTRFS_RESERVE_FLUSH_LIMIT,
 	BTRFS_RESERVE_FLUSH_EVICT,
 	BTRFS_RESERVE_FLUSH_ALL,
+	BTRFS_RESERVE_FLUSH_ALL_STEAL,
 };
 
 enum btrfs_flush_state {
@@ -2551,7 +2557,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
 
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
-		     int level, int *slot);
+		     int *slot);
 int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
 int btrfs_previous_item(struct btrfs_root *root,
 			struct btrfs_path *path, u64 min_objectid,
@@ -2896,10 +2902,9 @@ void btrfs_free_inode(struct inode *inode);
 int btrfs_drop_inode(struct inode *inode);
 int __init btrfs_init_cachep(void);
 void __cold btrfs_destroy_cachep(void);
-struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
+struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
 			      struct btrfs_root *root, struct btrfs_path *path);
-struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
-			 struct btrfs_root *root);
+struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root);
 struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
 				    struct page *page, size_t pg_offset,
 				    u64 start, u64 end);
@@ -2929,6 +2934,9 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
 void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
 					  u64 end, int uptodate);
 extern const struct dentry_operations btrfs_dentry_operations;
+ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
+extern const struct iomap_ops btrfs_dio_iomap_ops;
+extern const struct iomap_dio_ops btrfs_dops;
 
 /* ioctl.c */
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -3381,6 +3389,9 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
 int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
 			      struct btrfs_pending_snapshot *pending);
 int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info);
+struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info,
+				   u64 bytenr);
+int btrfs_should_ignore_reloc_root(struct btrfs_root *root);
 
 /* scrub.c */
 int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d10c7be10f3b..7c6f0bbb54a5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -358,16 +358,14 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
 
 	shash->tfm = fs_info->csum_shash;
-	crypto_shash_init(shash);
 
 	/*
 	 * The super_block structure does not span the whole
 	 * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
 	 * filled with zeros and is included in the checksum.
 	 */
-	crypto_shash_update(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
-			    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
-	crypto_shash_final(shash, result);
+	crypto_shash_digest(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
+			    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
 
 	if (memcmp(disk_sb->csum, result, btrfs_super_csum_size(disk_sb)))
 		return 1;
@@ -709,9 +707,7 @@ static void end_workqueue_bio(struct bio *bio)
 		else
 			wq = fs_info->endio_write_workers;
 	} else {
-		if (unlikely(end_io_wq->metadata == BTRFS_WQ_ENDIO_DIO_REPAIR))
-			wq = fs_info->endio_repair_workers;
-		else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
 			wq = fs_info->endio_raid56_workers;
 		else if (end_io_wq->metadata)
 			wq = fs_info->endio_meta_workers;
@@ -980,9 +976,7 @@ static void btree_invalidatepage(struct page *page, unsigned int offset,
 		btrfs_warn(BTRFS_I(page->mapping->host)->root->fs_info,
 			   "page private not zero on page %llu",
 			   (unsigned long long)page_offset(page));
-		ClearPagePrivate(page);
-		set_page_private(page, 0);
-		put_page(page);
+		detach_page_private(page);
 	}
 }
 
@@ -1137,9 +1131,12 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 	root->log_transid = 0;
 	root->log_transid_committed = -1;
 	root->last_log_commit = 0;
-	if (!dummy)
+	if (!dummy) {
 		extent_io_tree_init(fs_info, &root->dirty_log_pages,
 				    IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL);
+		extent_io_tree_init(fs_info, &root->log_csum_range,
+				    IO_TREE_LOG_CSUM_RANGE, NULL);
+	}
 
 	memset(&root->root_key, 0, sizeof(root->root_key));
 	memset(&root->root_item, 0, sizeof(root->root_item));
@@ -1277,12 +1274,13 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
 	root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
 
 	/*
-	 * DON'T set REF_COWS for log trees
+	 * DON'T set SHAREABLE bit for log trees.
 	 *
-	 * log trees do not get reference counted because they go away
-	 * before a real commit is actually done.  They do store pointers
-	 * to file data extents, and those reference counts still get
-	 * updated (along with back refs to the log tree).
+	 * Log trees are not exposed to user space thus can't be snapshotted,
+	 * and they go away before a real commit is actually done.
+	 *
+	 * They do store pointers to file data extents, and those reference
+	 * counts still get updated (along with back refs to the log tree).
 	 */
 
 	leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID,
@@ -1420,8 +1418,9 @@ static int btrfs_init_fs_root(struct btrfs_root *root)
 	if (ret)
 		goto fail;
 
-	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
-		set_bit(BTRFS_ROOT_REF_COWS, &root->state);
+	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
+	    root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
+		set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
 		btrfs_check_and_init_root_item(&root->root_item);
 	}
 
@@ -1526,6 +1525,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 	btrfs_put_root(fs_info->uuid_root);
 	btrfs_put_root(fs_info->free_space_root);
 	btrfs_put_root(fs_info->fs_root);
+	btrfs_put_root(fs_info->data_reloc_root);
 	btrfs_check_leaked_roots(fs_info);
 	btrfs_extent_buffer_leak_debug_check(fs_info);
 	kfree(fs_info->super_copy);
@@ -1535,35 +1535,34 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 
 
 struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
-				     struct btrfs_key *location,
-				     bool check_ref)
+				     u64 objectid, bool check_ref)
 {
 	struct btrfs_root *root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
 	int ret;
 
-	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
+	if (objectid == BTRFS_ROOT_TREE_OBJECTID)
 		return btrfs_grab_root(fs_info->tree_root);
-	if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
+	if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
 		return btrfs_grab_root(fs_info->extent_root);
-	if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
+	if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
 		return btrfs_grab_root(fs_info->chunk_root);
-	if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
+	if (objectid == BTRFS_DEV_TREE_OBJECTID)
 		return btrfs_grab_root(fs_info->dev_root);
-	if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
+	if (objectid == BTRFS_CSUM_TREE_OBJECTID)
 		return btrfs_grab_root(fs_info->csum_root);
-	if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
+	if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
 		return btrfs_grab_root(fs_info->quota_root) ?
 			fs_info->quota_root : ERR_PTR(-ENOENT);
-	if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
+	if (objectid == BTRFS_UUID_TREE_OBJECTID)
 		return btrfs_grab_root(fs_info->uuid_root) ?
 			fs_info->uuid_root : ERR_PTR(-ENOENT);
-	if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
+	if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
 		return btrfs_grab_root(fs_info->free_space_root) ?
 			fs_info->free_space_root : ERR_PTR(-ENOENT);
 again:
-	root = btrfs_lookup_fs_root(fs_info, location->objectid);
+	root = btrfs_lookup_fs_root(fs_info, objectid);
 	if (root) {
 		if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
 			btrfs_put_root(root);
@@ -1572,7 +1571,10 @@ again:
 		return root;
 	}
 
-	root = btrfs_read_tree_root(fs_info->tree_root, location);
+	key.objectid = objectid;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+	root = btrfs_read_tree_root(fs_info->tree_root, &key);
 	if (IS_ERR(root))
 		return root;
 
@@ -1592,7 +1594,7 @@ again:
 	}
 	key.objectid = BTRFS_ORPHAN_OBJECTID;
 	key.type = BTRFS_ORPHAN_ITEM_KEY;
-	key.offset = location->objectid;
+	key.offset = objectid;
 
 	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
 	btrfs_free_path(path);
@@ -1942,7 +1944,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_destroy_workqueue(fs_info->workers);
 	btrfs_destroy_workqueue(fs_info->endio_workers);
 	btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
-	btrfs_destroy_workqueue(fs_info->endio_repair_workers);
 	btrfs_destroy_workqueue(fs_info->rmw_workers);
 	btrfs_destroy_workqueue(fs_info->endio_write_workers);
 	btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
@@ -1983,6 +1984,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
 	free_root_extent_buffers(info->quota_root);
 	free_root_extent_buffers(info->uuid_root);
 	free_root_extent_buffers(info->fs_root);
+	free_root_extent_buffers(info->data_reloc_root);
 	if (free_chunk_root)
 		free_root_extent_buffers(info->chunk_root);
 	free_root_extent_buffers(info->free_space_root);
@@ -1995,6 +1997,7 @@ void btrfs_put_root(struct btrfs_root *root)
 
 	if (refcount_dec_and_test(&root->refs)) {
 		WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+		WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
 		if (root->anon_dev)
 			free_anon_bdev(root->anon_dev);
 		btrfs_drew_lock_destroy(&root->snapshot_lock);
@@ -2145,8 +2148,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
 	fs_info->endio_raid56_workers =
 		btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
 				      max_active, 4);
-	fs_info->endio_repair_workers =
-		btrfs_alloc_workqueue(fs_info, "endio-repair", flags, 1, 0);
 	fs_info->rmw_workers =
 		btrfs_alloc_workqueue(fs_info, "rmw", flags, max_active, 2);
 	fs_info->endio_write_workers =
@@ -2170,7 +2171,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
 	      fs_info->flush_workers &&
 	      fs_info->endio_workers && fs_info->endio_meta_workers &&
 	      fs_info->endio_meta_write_workers &&
-	      fs_info->endio_repair_workers &&
 	      fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
 	      fs_info->endio_freespace_worker && fs_info->rmw_workers &&
 	      fs_info->caching_workers && fs_info->readahead_workers &&
@@ -2292,6 +2292,19 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
 	fs_info->csum_root = root;
 
+	/*
+	 * This tree can share blocks with some other fs tree during relocation
+	 * and we need a proper setup by btrfs_get_fs_root
+	 */
+	root = btrfs_get_fs_root(tree_root->fs_info,
+				 BTRFS_DATA_RELOC_TREE_OBJECTID, true);
+	if (IS_ERR(root)) {
+		ret = PTR_ERR(root);
+		goto out;
+	}
+	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+	fs_info->data_reloc_root = root;
+
 	location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
 	root = btrfs_read_tree_root(tree_root, &location);
 	if (!IS_ERR(root)) {
@@ -2829,7 +2842,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 	u64 generation;
 	u64 features;
 	u16 csum_type;
-	struct btrfs_key location;
 	struct btrfs_super_block *disk_super;
 	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
 	struct btrfs_root *tree_root;
@@ -3243,11 +3255,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 		}
 	}
 
-	location.objectid = BTRFS_FS_TREE_OBJECTID;
-	location.type = BTRFS_ROOT_ITEM_KEY;
-	location.offset = 0;
-
-	fs_info->fs_root = btrfs_get_fs_root(fs_info, &location, true);
+	fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
 	if (IS_ERR(fs_info->fs_root)) {
 		err = PTR_ERR(fs_info->fs_root);
 		btrfs_warn(fs_info, "failed to read fs tree: %d", err);
@@ -3510,10 +3518,9 @@ static int write_dev_supers(struct btrfs_device *device,
 
 		btrfs_set_super_bytenr(sb, bytenr);
 
-		crypto_shash_init(shash);
-		crypto_shash_update(shash, (const char *)sb + BTRFS_CSUM_SIZE,
-				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
-		crypto_shash_final(shash, sb->csum);
+		crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
+				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
+				    sb->csum);
 
 		page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
 					   GFP_NOFS);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index cd629113f61c..bf43245406c4 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -25,7 +25,6 @@ enum btrfs_wq_endio_type {
 	BTRFS_WQ_ENDIO_METADATA,
 	BTRFS_WQ_ENDIO_FREE_SPACE,
 	BTRFS_WQ_ENDIO_RAID56,
-	BTRFS_WQ_ENDIO_DIO_REPAIR,
 };
 
 static inline u64 btrfs_sb_offset(int mirror)
@@ -67,8 +66,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
 void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
 
 struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
-				     struct btrfs_key *key,
-				     bool check_ref);
+				     u64 objectid, bool check_ref);
 
 void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 2bb25d2dc44b..1a8d419d9e1f 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -64,24 +64,15 @@ struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
 	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
 	struct btrfs_root *root;
 	struct inode *inode;
-	struct btrfs_key key;
 
 	if (objectid < BTRFS_FIRST_FREE_OBJECTID)
 		return ERR_PTR(-ESTALE);
 
-	key.objectid = root_objectid;
-	key.type = BTRFS_ROOT_ITEM_KEY;
-	key.offset = (u64)-1;
-
-	root = btrfs_get_fs_root(fs_info, &key, true);
+	root = btrfs_get_fs_root(fs_info, root_objectid, true);
 	if (IS_ERR(root))
 		return ERR_CAST(root);
 
-	key.objectid = objectid;
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.offset = 0;
-
-	inode = btrfs_iget(sb, &key, root);
+	inode = btrfs_iget(sb, objectid, root);
 	btrfs_put_root(root);
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
@@ -200,9 +191,7 @@ struct dentry *btrfs_get_parent(struct dentry *child)
 					found_key.offset, 0, 0);
 	}
 
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.offset = 0;
-	return d_obtain_alias(btrfs_iget(fs_info->sb, &key, root));
+	return d_obtain_alias(btrfs_iget(fs_info->sb, key.objectid, root));
 fail:
 	btrfs_free_path(path);
 	return ERR_PTR(ret);
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index b4a7bad3e82e..b6561455b3c4 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -44,6 +44,7 @@ enum {
 	IO_TREE_TRANS_DIRTY_PAGES,
 	IO_TREE_ROOT_DIRTY_LOG_PAGES,
 	IO_TREE_INODE_FILE_EXTENT,
+	IO_TREE_LOG_CSUM_RANGE,
 	IO_TREE_SELFTEST,
 };
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 54a64d1e18c6..c0bc35f932bf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2114,22 +2114,6 @@ static u64 find_middle(struct rb_root *root)
 }
 #endif
 
-static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
-{
-	u64 num_bytes;
-
-	num_bytes = heads * (sizeof(struct btrfs_extent_item) +
-			     sizeof(struct btrfs_extent_inline_ref));
-	if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
-		num_bytes += heads * sizeof(struct btrfs_tree_block_info);
-
-	/*
-	 * We don't ever fill up leaves all the way so multiply by 2 just to be
-	 * closer to what we're really going to want to use.
-	 */
-	return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
-}
-
 /*
  * Takes the number of bytes to be csumm'ed and figures out how many leaves it
  * would require to store the csums for that many bytes.
@@ -2442,7 +2426,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 	nritems = btrfs_header_nritems(buf);
 	level = btrfs_header_level(buf);
 
-	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
+	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
 		return 0;
 
 	if (full_backref)
@@ -2932,7 +2916,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
 						   &trimmed);
 
 		list_del_init(&block_group->bg_list);
-		btrfs_put_block_group_trimming(block_group);
+		btrfs_unfreeze_block_group(block_group);
 		btrfs_put_block_group(block_group);
 
 		if (ret) {
@@ -3369,6 +3353,7 @@ static struct btrfs_block_group *btrfs_lock_cluster(
 		   struct btrfs_block_group *block_group,
 		   struct btrfs_free_cluster *cluster,
 		   int delalloc)
+	__acquires(&cluster->refill_lock)
 {
 	struct btrfs_block_group *used_bg = NULL;
 
@@ -5501,8 +5486,6 @@ out:
 	 */
 	if (!for_reloc && !root_dropped)
 		btrfs_add_dead_root(root);
-	if (err && err != -EAGAIN)
-		btrfs_handle_fs_error(fs_info, err, NULL);
 	return err;
 }
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 39e45b8a5031..68c96057ad2d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2333,7 +2333,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
 	return 0;
 }
 
-int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num)
+int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
 {
 	struct btrfs_fs_info *fs_info = eb->fs_info;
 	u64 start = eb->start;
@@ -2537,8 +2537,9 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
 	return 0;
 }
 
-bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
-			   struct io_failure_record *failrec, int failed_mirror)
+static bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
+				   struct io_failure_record *failrec,
+				   int failed_mirror)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	int num_copies;
@@ -2561,7 +2562,7 @@ bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
 	 *	a) deliver good data to the caller
 	 *	b) correct the bad sectors on disk
 	 */
-	if (failed_bio_pages > 1) {
+	if (needs_validation) {
 		/*
 		 * to fulfill b), we need to know the exact failing sectors, as
 		 * we don't want to rewrite any more than the failed ones. thus,
@@ -2600,94 +2601,115 @@ bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
 	return true;
 }
 
-
-struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
-				    struct io_failure_record *failrec,
-				    struct page *page, int pg_offset, int icsum,
-				    bio_end_io_t *endio_func, void *data)
+static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct bio *bio;
-	struct btrfs_io_bio *btrfs_failed_bio;
-	struct btrfs_io_bio *btrfs_bio;
+	u64 len = 0;
+	const u32 blocksize = inode->i_sb->s_blocksize;
 
-	bio = btrfs_io_bio_alloc(1);
-	bio->bi_end_io = endio_func;
-	bio->bi_iter.bi_sector = failrec->logical >> 9;
-	bio->bi_iter.bi_size = 0;
-	bio->bi_private = data;
+	/*
+	 * If bi_status is BLK_STS_OK, then this was a checksum error, not an
+	 * I/O error. In this case, we already know exactly which sector was
+	 * bad, so we don't need to validate.
+	 */
+	if (bio->bi_status == BLK_STS_OK)
+		return false;
 
-	btrfs_failed_bio = btrfs_io_bio(failed_bio);
-	if (btrfs_failed_bio->csum) {
-		u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+	/*
+	 * We need to validate each sector individually if the failed I/O was
+	 * for multiple sectors.
+	 *
+	 * There are a few possible bios that can end up here:
+	 * 1. A buffered read bio, which is not cloned.
+	 * 2. A direct I/O read bio, which is cloned.
+	 * 3. A (buffered or direct) repair bio, which is not cloned.
+	 *
+	 * For cloned bios (case 2), we can get the size from
+	 * btrfs_io_bio->iter; for non-cloned bios (cases 1 and 3), we can get
+	 * it from the bvecs.
+	 */
+	if (bio_flagged(bio, BIO_CLONED)) {
+		if (btrfs_io_bio(bio)->iter.bi_size > blocksize)
+			return true;
+	} else {
+		struct bio_vec *bvec;
+		int i;
 
-		btrfs_bio = btrfs_io_bio(bio);
-		btrfs_bio->csum = btrfs_bio->csum_inline;
-		icsum *= csum_size;
-		memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
-		       csum_size);
+		bio_for_each_bvec_all(bvec, bio, i) {
+			len += bvec->bv_len;
+			if (len > blocksize)
+				return true;
+		}
 	}
-
-	bio_add_page(bio, page, failrec->len, pg_offset);
-
-	return bio;
+	return false;
 }
 
-/*
- * This is a generic handler for readpage errors. If other copies exist, read
- * those and write back good data to the failed position. Does not investigate
- * in remapping the failed extent elsewhere, hoping the device will be smart
- * enough to do this as needed
- */
-static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
-			      struct page *page, u64 start, u64 end,
-			      int failed_mirror)
+blk_status_t btrfs_submit_read_repair(struct inode *inode,
+				      struct bio *failed_bio, u64 phy_offset,
+				      struct page *page, unsigned int pgoff,
+				      u64 start, u64 end, int failed_mirror,
+				      submit_bio_hook_t *submit_bio_hook)
 {
 	struct io_failure_record *failrec;
-	struct inode *inode = page->mapping->host;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
-	struct bio *bio;
-	int read_mode = 0;
+	struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
+	const int icsum = phy_offset >> inode->i_sb->s_blocksize_bits;
+	bool need_validation;
+	struct bio *repair_bio;
+	struct btrfs_io_bio *repair_io_bio;
 	blk_status_t status;
 	int ret;
-	unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
+
+	btrfs_debug(fs_info,
+		   "repair read error: read error at %llu", start);
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
 	ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
 	if (ret)
-		return ret;
+		return errno_to_blk_status(ret);
+
+	need_validation = btrfs_io_needs_validation(inode, failed_bio);
 
-	if (!btrfs_check_repairable(inode, failed_bio_pages, failrec,
+	if (!btrfs_check_repairable(inode, need_validation, failrec,
 				    failed_mirror)) {
 		free_io_failure(failure_tree, tree, failrec);
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 
-	if (failed_bio_pages > 1)
-		read_mode |= REQ_FAILFAST_DEV;
+	repair_bio = btrfs_io_bio_alloc(1);
+	repair_io_bio = btrfs_io_bio(repair_bio);
+	repair_bio->bi_opf = REQ_OP_READ;
+	if (need_validation)
+		repair_bio->bi_opf |= REQ_FAILFAST_DEV;
+	repair_bio->bi_end_io = failed_bio->bi_end_io;
+	repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
+	repair_bio->bi_private = failed_bio->bi_private;
+
+	if (failed_io_bio->csum) {
+		const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
 
-	phy_offset >>= inode->i_sb->s_blocksize_bits;
-	bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
-				      start - page_offset(page),
-				      (int)phy_offset, failed_bio->bi_end_io,
-				      NULL);
-	bio->bi_opf = REQ_OP_READ | read_mode;
+		repair_io_bio->csum = repair_io_bio->csum_inline;
+		memcpy(repair_io_bio->csum,
+		       failed_io_bio->csum + csum_size * icsum, csum_size);
+	}
+
+	bio_add_page(repair_bio, page, failrec->len, pgoff);
+	repair_io_bio->logical = failrec->start;
+	repair_io_bio->iter = repair_bio->bi_iter;
 
 	btrfs_debug(btrfs_sb(inode->i_sb),
-		"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
-		read_mode, failrec->this_mirror, failrec->in_validation);
+"repair read error: submitting new read to mirror %d, in_validation=%d",
+		    failrec->this_mirror, failrec->in_validation);
 
-	status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
-					 failrec->bio_flags);
+	status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
+				 failrec->bio_flags);
 	if (status) {
 		free_io_failure(failure_tree, tree, failrec);
-		bio_put(bio);
-		ret = blk_status_to_errno(status);
+		bio_put(repair_bio);
 	}
-
-	return ret;
+	return status;
 }
 
 /* lots and lots of room for performance fixes in the end_bio funcs */
@@ -2859,9 +2881,10 @@ static void end_bio_extent_readpage(struct bio *bio)
 			 * If it can't handle the error it will return -EIO and
 			 * we remain responsible for that page.
 			 */
-			ret = bio_readpage_error(bio, offset, page, start, end,
-						 mirror);
-			if (ret == 0) {
+			if (!btrfs_submit_read_repair(inode, bio, offset, page,
+						start - page_offset(page),
+						start, end, mirror,
+						tree->ops->submit_bio_hook)) {
 				uptodate = !bio->bi_status;
 				offset += len;
 				continue;
@@ -3076,22 +3099,16 @@ static int submit_extent_page(unsigned int opf,
 static void attach_extent_buffer_page(struct extent_buffer *eb,
 				      struct page *page)
 {
-	if (!PagePrivate(page)) {
-		SetPagePrivate(page);
-		get_page(page);
-		set_page_private(page, (unsigned long)eb);
-	} else {
+	if (!PagePrivate(page))
+		attach_page_private(page, eb);
+	else
 		WARN_ON(page->private != (unsigned long)eb);
-	}
 }
 
 void set_page_extent_mapped(struct page *page)
 {
-	if (!PagePrivate(page)) {
-		SetPagePrivate(page);
-		get_page(page);
-		set_page_private(page, EXTENT_PAGE_PRIVATE);
-	}
+	if (!PagePrivate(page))
+		attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE);
 }
 
 static struct extent_map *
@@ -4367,51 +4384,32 @@ int extent_writepages(struct address_space *mapping,
 	return ret;
 }
 
-int extent_readpages(struct address_space *mapping, struct list_head *pages,
-		     unsigned nr_pages)
+void extent_readahead(struct readahead_control *rac)
 {
 	struct bio *bio = NULL;
 	unsigned long bio_flags = 0;
 	struct page *pagepool[16];
 	struct extent_map *em_cached = NULL;
-	int nr = 0;
 	u64 prev_em_start = (u64)-1;
+	int nr;
 
-	while (!list_empty(pages)) {
-		u64 contig_end = 0;
-
-		for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
-			struct page *page = lru_to_page(pages);
-
-			prefetchw(&page->flags);
-			list_del(&page->lru);
-			if (add_to_page_cache_lru(page, mapping, page->index,
-						readahead_gfp_mask(mapping))) {
-				put_page(page);
-				break;
-			}
-
-			pagepool[nr++] = page;
-			contig_end = page_offset(page) + PAGE_SIZE - 1;
-		}
-
-		if (nr) {
-			u64 contig_start = page_offset(pagepool[0]);
+	while ((nr = readahead_page_batch(rac, pagepool))) {
+		u64 contig_start = page_offset(pagepool[0]);
+		u64 contig_end = page_offset(pagepool[nr - 1]) + PAGE_SIZE - 1;
 
-			ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
+		ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
 
-			contiguous_readpages(pagepool, nr, contig_start,
-				     contig_end, &em_cached, &bio, &bio_flags,
-				     &prev_em_start);
-		}
+		contiguous_readpages(pagepool, nr, contig_start, contig_end,
+				&em_cached, &bio, &bio_flags, &prev_em_start);
 	}
 
 	if (em_cached)
 		free_extent_map(em_cached);
 
-	if (bio)
-		return submit_one_bio(bio, 0, bio_flags);
-	return 0;
+	if (bio) {
+		if (submit_one_bio(bio, 0, bio_flags))
+			return;
+	}
 }
 
 /*
@@ -4887,7 +4885,7 @@ static void __free_extent_buffer(struct extent_buffer *eb)
 	kmem_cache_free(extent_buffer_cache, eb);
 }
 
-int extent_buffer_under_io(struct extent_buffer *eb)
+int extent_buffer_under_io(const struct extent_buffer *eb)
 {
 	return (atomic_read(&eb->io_pages) ||
 		test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
@@ -4929,10 +4927,7 @@ static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
 			 * We need to make sure we haven't be attached
 			 * to a new eb.
 			 */
-			ClearPagePrivate(page);
-			set_page_private(page, 0);
-			/* One for the page private */
-			put_page(page);
+			detach_page_private(page);
 		}
 
 		if (mapped)
@@ -4995,7 +4990,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
 	return eb;
 }
 
-struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
+struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
 {
 	int i;
 	struct page *p;
@@ -5401,7 +5396,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
 	release_extent_buffer(eb);
 }
 
-void clear_extent_buffer_dirty(struct extent_buffer *eb)
+void clear_extent_buffer_dirty(const struct extent_buffer *eb)
 {
 	int i;
 	int num_pages;
@@ -5599,8 +5594,7 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
 	struct page *page;
 	char *kaddr;
 	char *dst = (char *)dstv;
-	size_t start_offset = offset_in_page(eb->start);
-	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
+	unsigned long i = start >> PAGE_SHIFT;
 
 	if (start + len > eb->len) {
 		WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
@@ -5609,7 +5603,7 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
 		return;
 	}
 
-	offset = offset_in_page(start_offset + start);
+	offset = offset_in_page(start);
 
 	while (len > 0) {
 		page = eb->pages[i];
@@ -5634,14 +5628,13 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
 	struct page *page;
 	char *kaddr;
 	char __user *dst = (char __user *)dstv;
-	size_t start_offset = offset_in_page(eb->start);
-	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
+	unsigned long i = start >> PAGE_SHIFT;
 	int ret = 0;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = offset_in_page(start_offset + start);
+	offset = offset_in_page(start);
 
 	while (len > 0) {
 		page = eb->pages[i];
@@ -5662,48 +5655,6 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
 	return ret;
 }
 
-/*
- * return 0 if the item is found within a page.
- * return 1 if the item spans two pages.
- * return -EINVAL otherwise.
- */
-int map_private_extent_buffer(const struct extent_buffer *eb,
-			      unsigned long start, unsigned long min_len,
-			      char **map, unsigned long *map_start,
-			      unsigned long *map_len)
-{
-	size_t offset;
-	char *kaddr;
-	struct page *p;
-	size_t start_offset = offset_in_page(eb->start);
-	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
-	unsigned long end_i = (start_offset + start + min_len - 1) >>
-		PAGE_SHIFT;
-
-	if (start + min_len > eb->len) {
-		WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
-		       eb->start, eb->len, start, min_len);
-		return -EINVAL;
-	}
-
-	if (i != end_i)
-		return 1;
-
-	if (i == 0) {
-		offset = start_offset;
-		*map_start = 0;
-	} else {
-		offset = 0;
-		*map_start = ((u64)i << PAGE_SHIFT) - start_offset;
-	}
-
-	p = eb->pages[i];
-	kaddr = page_address(p);
-	*map = kaddr + offset;
-	*map_len = PAGE_SIZE - offset;
-	return 0;
-}
-
 int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
 			 unsigned long start, unsigned long len)
 {
@@ -5712,14 +5663,13 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
 	struct page *page;
 	char *kaddr;
 	char *ptr = (char *)ptrv;
-	size_t start_offset = offset_in_page(eb->start);
-	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
+	unsigned long i = start >> PAGE_SHIFT;
 	int ret = 0;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = offset_in_page(start_offset + start);
+	offset = offset_in_page(start);
 
 	while (len > 0) {
 		page = eb->pages[i];
@@ -5739,7 +5689,7 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
 	return ret;
 }
 
-void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
+void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
 		const void *srcv)
 {
 	char *kaddr;
@@ -5750,7 +5700,7 @@ void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
 			BTRFS_FSID_SIZE);
 }
 
-void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
+void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
 {
 	char *kaddr;
 
@@ -5760,7 +5710,7 @@ void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
 			BTRFS_FSID_SIZE);
 }
 
-void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
+void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
 			 unsigned long start, unsigned long len)
 {
 	size_t cur;
@@ -5768,13 +5718,12 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
 	struct page *page;
 	char *kaddr;
 	char *src = (char *)srcv;
-	size_t start_offset = offset_in_page(eb->start);
-	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
+	unsigned long i = start >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = offset_in_page(start_offset + start);
+	offset = offset_in_page(start);
 
 	while (len > 0) {
 		page = eb->pages[i];
@@ -5791,20 +5740,19 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
 	}
 }
 
-void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
+void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
 		unsigned long len)
 {
 	size_t cur;
 	size_t offset;
 	struct page *page;
 	char *kaddr;
-	size_t start_offset = offset_in_page(eb->start);
-	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
+	unsigned long i = start >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = offset_in_page(start_offset + start);
+	offset = offset_in_page(start);
 
 	while (len > 0) {
 		page = eb->pages[i];
@@ -5820,8 +5768,8 @@ void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
 	}
 }
 
-void copy_extent_buffer_full(struct extent_buffer *dst,
-			     struct extent_buffer *src)
+void copy_extent_buffer_full(const struct extent_buffer *dst,
+			     const struct extent_buffer *src)
 {
 	int i;
 	int num_pages;
@@ -5834,7 +5782,8 @@ void copy_extent_buffer_full(struct extent_buffer *dst,
 				page_address(src->pages[i]));
 }
 
-void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+void copy_extent_buffer(const struct extent_buffer *dst,
+			const struct extent_buffer *src,
 			unsigned long dst_offset, unsigned long src_offset,
 			unsigned long len)
 {
@@ -5843,12 +5792,11 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 	size_t offset;
 	struct page *page;
 	char *kaddr;
-	size_t start_offset = offset_in_page(dst->start);
-	unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
+	unsigned long i = dst_offset >> PAGE_SHIFT;
 
 	WARN_ON(src->len != dst_len);
 
-	offset = offset_in_page(start_offset + dst_offset);
+	offset = offset_in_page(dst_offset);
 
 	while (len > 0) {
 		page = dst->pages[i];
@@ -5879,12 +5827,11 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
  * This helper hides the ugliness of finding the byte in an extent buffer which
  * contains a given bit.
  */
-static inline void eb_bitmap_offset(struct extent_buffer *eb,
+static inline void eb_bitmap_offset(const struct extent_buffer *eb,
 				    unsigned long start, unsigned long nr,
 				    unsigned long *page_index,
 				    size_t *page_offset)
 {
-	size_t start_offset = offset_in_page(eb->start);
 	size_t byte_offset = BIT_BYTE(nr);
 	size_t offset;
 
@@ -5893,7 +5840,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
 	 * the bitmap item in the extent buffer + the offset of the byte in the
 	 * bitmap item.
 	 */
-	offset = start_offset + start + byte_offset;
+	offset = start + byte_offset;
 
 	*page_index = offset >> PAGE_SHIFT;
 	*page_offset = offset_in_page(offset);
@@ -5905,7 +5852,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
  * @start: offset of the bitmap item in the extent buffer
  * @nr: bit number to test
  */
-int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
 			   unsigned long nr)
 {
 	u8 *kaddr;
@@ -5927,7 +5874,7 @@ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
  * @pos: bit number of the first bit
  * @len: number of bits to set
  */
-void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
 			      unsigned long pos, unsigned long len)
 {
 	u8 *kaddr;
@@ -5969,8 +5916,9 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
  * @pos: bit number of the first bit
  * @len: number of bits to clear
  */
-void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
-				unsigned long pos, unsigned long len)
+void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
+				unsigned long start, unsigned long pos,
+				unsigned long len)
 {
 	u8 *kaddr;
 	struct page *page;
@@ -6031,14 +5979,14 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
 		memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
 }
 
-void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
-			   unsigned long src_offset, unsigned long len)
+void memcpy_extent_buffer(const struct extent_buffer *dst,
+			  unsigned long dst_offset, unsigned long src_offset,
+			  unsigned long len)
 {
 	struct btrfs_fs_info *fs_info = dst->fs_info;
 	size_t cur;
 	size_t dst_off_in_page;
 	size_t src_off_in_page;
-	size_t start_offset = offset_in_page(dst->start);
 	unsigned long dst_i;
 	unsigned long src_i;
 
@@ -6056,11 +6004,11 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 	}
 
 	while (len > 0) {
-		dst_off_in_page = offset_in_page(start_offset + dst_offset);
-		src_off_in_page = offset_in_page(start_offset + src_offset);
+		dst_off_in_page = offset_in_page(dst_offset);
+		src_off_in_page = offset_in_page(src_offset);
 
-		dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
-		src_i = (start_offset + src_offset) >> PAGE_SHIFT;
+		dst_i = dst_offset >> PAGE_SHIFT;
+		src_i = src_offset >> PAGE_SHIFT;
 
 		cur = min(len, (unsigned long)(PAGE_SIZE -
 					       src_off_in_page));
@@ -6076,8 +6024,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 	}
 }
 
-void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
-			   unsigned long src_offset, unsigned long len)
+void memmove_extent_buffer(const struct extent_buffer *dst,
+			   unsigned long dst_offset, unsigned long src_offset,
+			   unsigned long len)
 {
 	struct btrfs_fs_info *fs_info = dst->fs_info;
 	size_t cur;
@@ -6085,7 +6034,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 	size_t src_off_in_page;
 	unsigned long dst_end = dst_offset + len - 1;
 	unsigned long src_end = src_offset + len - 1;
-	size_t start_offset = offset_in_page(dst->start);
 	unsigned long dst_i;
 	unsigned long src_i;
 
@@ -6106,11 +6054,11 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 		return;
 	}
 	while (len > 0) {
-		dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
-		src_i = (start_offset + src_end) >> PAGE_SHIFT;
+		dst_i = dst_end >> PAGE_SHIFT;
+		src_i = src_end >> PAGE_SHIFT;
 
-		dst_off_in_page = offset_in_page(start_offset + dst_end);
-		src_off_in_page = offset_in_page(start_offset + src_end);
+		dst_off_in_page = offset_in_page(dst_end);
+		src_off_in_page = offset_in_page(src_end);
 
 		cur = min_t(unsigned long, len, src_off_in_page + 1);
 		cur = min(cur, dst_off_in_page + 1);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 2ed65bd0760e..602bf3af9fb4 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -66,6 +66,10 @@ struct btrfs_io_bio;
 struct io_failure_record;
 struct extent_io_tree;
 
+typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
+					 int mirror_num,
+					 unsigned long bio_flags);
+
 typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
 		struct bio *bio, u64 bio_offset);
 
@@ -74,8 +78,7 @@ struct extent_io_ops {
 	 * The following callbacks must be always defined, the function
 	 * pointer will be called unconditionally.
 	 */
-	blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio,
-					int mirror_num, unsigned long bio_flags);
+	submit_bio_hook_t *submit_bio_hook;
 	int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
 				    struct page *page, u64 start, u64 end,
 				    int mirror);
@@ -198,8 +201,7 @@ int extent_writepages(struct address_space *mapping,
 		      struct writeback_control *wbc);
 int btree_write_cache_pages(struct address_space *mapping,
 			    struct writeback_control *wbc);
-int extent_readpages(struct address_space *mapping, struct list_head *pages,
-		     unsigned nr_pages);
+void extent_readahead(struct readahead_control *rac);
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len);
 void set_page_extent_mapped(struct page *page);
@@ -210,7 +212,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
 						  u64 start, unsigned long len);
 struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
 						u64 start);
-struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
+struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src);
 struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 					 u64 start);
 void free_extent_buffer(struct extent_buffer *eb);
@@ -228,7 +230,7 @@ static inline int num_extent_pages(const struct extent_buffer *eb)
 	       (eb->start >> PAGE_SHIFT);
 }
 
-static inline int extent_buffer_uptodate(struct extent_buffer *eb)
+static inline int extent_buffer_uptodate(const struct extent_buffer *eb)
 {
 	return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 }
@@ -241,37 +243,37 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dst,
 int read_extent_buffer_to_user(const struct extent_buffer *eb,
 			       void __user *dst, unsigned long start,
 			       unsigned long len);
-void write_extent_buffer_fsid(struct extent_buffer *eb, const void *src);
-void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
+void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
+void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
 		const void *src);
-void write_extent_buffer(struct extent_buffer *eb, const void *src,
+void write_extent_buffer(const struct extent_buffer *eb, const void *src,
 			 unsigned long start, unsigned long len);
-void copy_extent_buffer_full(struct extent_buffer *dst,
-			     struct extent_buffer *src);
-void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+void copy_extent_buffer_full(const struct extent_buffer *dst,
+			     const struct extent_buffer *src);
+void copy_extent_buffer(const struct extent_buffer *dst,
+			const struct extent_buffer *src,
 			unsigned long dst_offset, unsigned long src_offset,
 			unsigned long len);
-void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
-			   unsigned long src_offset, unsigned long len);
-void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
-			   unsigned long src_offset, unsigned long len);
-void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
+void memcpy_extent_buffer(const struct extent_buffer *dst,
+			  unsigned long dst_offset, unsigned long src_offset,
+			  unsigned long len);
+void memmove_extent_buffer(const struct extent_buffer *dst,
+			   unsigned long dst_offset, unsigned long src_offset,
+			   unsigned long len);
+void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
 			   unsigned long len);
-int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
 			   unsigned long pos);
-void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
 			      unsigned long pos, unsigned long len);
-void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
-				unsigned long pos, unsigned long len);
-void clear_extent_buffer_dirty(struct extent_buffer *eb);
+void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
+				unsigned long start, unsigned long pos,
+				unsigned long len);
+void clear_extent_buffer_dirty(const struct extent_buffer *eb);
 bool set_extent_buffer_dirty(struct extent_buffer *eb);
 void set_extent_buffer_uptodate(struct extent_buffer *eb);
 void clear_extent_buffer_uptodate(struct extent_buffer *eb);
-int extent_buffer_under_io(struct extent_buffer *eb);
-int map_private_extent_buffer(const struct extent_buffer *eb,
-			      unsigned long offset, unsigned long min_len,
-			      char **map, unsigned long *map_start,
-			      unsigned long *map_len);
+int extent_buffer_under_io(const struct extent_buffer *eb);
 void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
 void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
 void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
@@ -290,7 +292,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
 		      u64 length, u64 logical, struct page *page,
 		      unsigned int pg_offset, int mirror_num);
 void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
-int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num);
+int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
 
 /*
  * When IO fails, either with EIO or csum verification fails, we
@@ -312,12 +314,12 @@ struct io_failure_record {
 };
 
 
-bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
-			    struct io_failure_record *failrec, int fail_mirror);
-struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
-				    struct io_failure_record *failrec,
-				    struct page *page, int pg_offset, int icsum,
-				    bio_end_io_t *endio_func, void *data);
+blk_status_t btrfs_submit_read_repair(struct inode *inode,
+				      struct bio *failed_bio, u64 phy_offset,
+				      struct page *page, unsigned int pgoff,
+				      u64 start, u64 end, int failed_mirror,
+				      submit_bio_hook_t *submit_bio_hook);
+
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 bool find_lock_delalloc_range(struct inode *inode,
 			     struct page *locked_page, u64 *start,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b618ad5339ba..706a3128e192 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -242,11 +242,13 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 /**
  * btrfs_lookup_bio_sums - Look up checksums for a bio.
  * @inode: inode that the bio is for.
- * @bio: bio embedded in btrfs_io_bio.
+ * @bio: bio to look up.
  * @offset: Unless (u64)-1, look up checksums for this offset in the file.
  *          If (u64)-1, use the page offsets from the bio instead.
- * @dst: Buffer of size btrfs_super_csum_size() used to return checksum. If
- *       NULL, the checksum is returned in btrfs_io_bio(bio)->csum instead.
+ * @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
+ *       checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
+ *       NULL, the checksum buffer is allocated and returned in
+ *       btrfs_io_bio(bio)->csum instead.
  *
  * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
  */
@@ -256,7 +258,6 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct bio_vec bvec;
 	struct bvec_iter iter;
-	struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
 	struct btrfs_csum_item *item = NULL;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct btrfs_path *path;
@@ -277,6 +278,8 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 
 	nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
 	if (!dst) {
+		struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
+
 		if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
 			btrfs_bio->csum = kmalloc_array(nblocks, csum_size,
 							GFP_NOFS);
@@ -598,13 +601,12 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 				index = 0;
 			}
 
-			crypto_shash_init(shash);
 			data = kmap_atomic(bvec.bv_page);
-			crypto_shash_update(shash, data + bvec.bv_offset
+			crypto_shash_digest(shash, data + bvec.bv_offset
 					    + (i * fs_info->sectorsize),
-					    fs_info->sectorsize);
+					    fs_info->sectorsize,
+					    sums->sums + index);
 			kunmap_atomic(data);
-			crypto_shash_final(shash, (char *)(sums->sums + index));
 			index += csum_size;
 			offset += fs_info->sectorsize;
 			this_sum_bytes += fs_info->sectorsize;
@@ -869,7 +871,7 @@ again:
 	}
 	ret = PTR_ERR(item);
 	if (ret != -EFBIG && ret != -ENOENT)
-		goto fail_unlock;
+		goto out;
 
 	if (ret == -EFBIG) {
 		u32 item_size;
@@ -887,10 +889,12 @@ again:
 		nritems = btrfs_header_nritems(path->nodes[0]);
 		if (!nritems || (path->slots[0] >= nritems - 1)) {
 			ret = btrfs_next_leaf(root, path);
-			if (ret == 1)
+			if (ret < 0) {
+				goto out;
+			} else if (ret > 0) {
 				found_next = 1;
-			if (ret != 0)
 				goto insert;
+			}
 			slot = path->slots[0];
 		}
 		btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
@@ -905,14 +909,27 @@ again:
 	}
 
 	/*
-	 * at this point, we know the tree has an item, but it isn't big
-	 * enough yet to put our csum in.  Grow it
+	 * At this point, we know the tree has a checksum item that ends at an
+	 * offset matching the start of the checksum range we want to insert.
+	 * We try to extend that item as much as possible and then add as many
+	 * checksums to it as they fit.
+	 *
+	 * First check if the leaf has enough free space for at least one
+	 * checksum. If it has go directly to the item extension code, otherwise
+	 * release the path and do a search for insertion before the extension.
 	 */
+	if (btrfs_leaf_free_space(leaf) >= csum_size) {
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+		csum_offset = (bytenr - found_key.offset) >>
+			fs_info->sb->s_blocksize_bits;
+		goto extend_csum;
+	}
+
 	btrfs_release_path(path);
 	ret = btrfs_search_slot(trans, root, &file_key, path,
 				csum_size, 1);
 	if (ret < 0)
-		goto fail_unlock;
+		goto out;
 
 	if (ret > 0) {
 		if (path->slots[0] == 0)
@@ -931,19 +948,13 @@ again:
 		goto insert;
 	}
 
+extend_csum:
 	if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) /
 	    csum_size) {
 		int extend_nr;
 		u64 tmp;
 		u32 diff;
-		u32 free_space;
-
-		if (btrfs_leaf_free_space(leaf) <
-				 sizeof(struct btrfs_item) + csum_size * 2)
-			goto insert;
 
-		free_space = btrfs_leaf_free_space(leaf) -
-					 sizeof(struct btrfs_item) - csum_size;
 		tmp = sums->len - total_bytes;
 		tmp >>= fs_info->sb->s_blocksize_bits;
 		WARN_ON(tmp < 1);
@@ -954,7 +965,7 @@ again:
 			   MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
 
 		diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
-		diff = min(free_space, diff);
+		diff = min_t(u32, btrfs_leaf_free_space(leaf), diff);
 		diff /= csum_size;
 		diff *= csum_size;
 
@@ -985,9 +996,9 @@ insert:
 				      ins_size);
 	path->leave_spinning = 0;
 	if (ret < 0)
-		goto fail_unlock;
+		goto out;
 	if (WARN_ON(ret != 0))
-		goto fail_unlock;
+		goto out;
 	leaf = path->nodes[0];
 csum:
 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
@@ -1017,9 +1028,6 @@ found:
 out:
 	btrfs_free_path(path);
 	return ret;
-
-fail_unlock:
-	goto out;
 }
 
 void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 719e68ab552c..fde125616687 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -275,26 +275,18 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 {
 	struct btrfs_root *inode_root;
 	struct inode *inode;
-	struct btrfs_key key;
 	struct btrfs_ioctl_defrag_range_args range;
 	int num_defrag;
 	int ret;
 
 	/* get the inode */
-	key.objectid = defrag->root;
-	key.type = BTRFS_ROOT_ITEM_KEY;
-	key.offset = (u64)-1;
-
-	inode_root = btrfs_get_fs_root(fs_info, &key, true);
+	inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
 	if (IS_ERR(inode_root)) {
 		ret = PTR_ERR(inode_root);
 		goto cleanup;
 	}
 
-	key.objectid = defrag->ino;
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.offset = 0;
-	inode = btrfs_iget(fs_info->sb, &key, inode_root);
+	inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root);
 	btrfs_put_root(inode_root);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
@@ -775,7 +767,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
 		modify_tree = 0;
 
-	update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+	update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
 		       root == fs_info->tree_root);
 	while (1) {
 		recow = 0;
@@ -1817,21 +1809,61 @@ again:
 	return num_written ? num_written : ret;
 }
 
-static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
+                               const struct iov_iter *iter, loff_t offset)
+{
+        const unsigned int blocksize_mask = fs_info->sectorsize - 1;
+
+        if (offset & blocksize_mask)
+                return -EINVAL;
+
+        if (iov_iter_alignment(iter) & blocksize_mask)
+                return -EINVAL;
+
+	return 0;
+}
+
+static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
-	loff_t pos;
-	ssize_t written;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	loff_t pos = iocb->ki_pos;
+	ssize_t written = 0;
 	ssize_t written_buffered;
 	loff_t endbyte;
 	int err;
+	size_t count = 0;
+	bool relock = false;
 
-	written = generic_file_direct_write(iocb, from);
+	if (check_direct_IO(fs_info, from, pos))
+		goto buffered;
+
+	count = iov_iter_count(from);
+	/*
+	 * If the write DIO is beyond the EOF, we need update the isize, but it
+	 * is protected by i_mutex. So we can not unlock the i_mutex at this
+	 * case.
+	 */
+	if (pos + count <= inode->i_size) {
+		inode_unlock(inode);
+		relock = true;
+	} else if (iocb->ki_flags & IOCB_NOWAIT) {
+		return -EAGAIN;
+	}
+
+	down_read(&BTRFS_I(inode)->dio_sem);
+	written = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dops,
+			       is_sync_kiocb(iocb));
+	up_read(&BTRFS_I(inode)->dio_sem);
+
+	if (relock)
+		inode_lock(inode);
 
 	if (written < 0 || !iov_iter_count(from))
 		return written;
 
+buffered:
 	pos = iocb->ki_pos;
 	written_buffered = btrfs_buffered_write(iocb, from);
 	if (written_buffered < 0) {
@@ -1970,7 +2002,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 		atomic_inc(&BTRFS_I(inode)->sync_writers);
 
 	if (iocb->ki_flags & IOCB_DIRECT) {
-		num_written = __btrfs_direct_write(iocb, from);
+		num_written = btrfs_direct_write(iocb, from);
 	} else {
 		num_written = btrfs_buffered_write(iocb, from);
 		if (num_written > 0)
@@ -3484,9 +3516,54 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
 	return generic_file_open(inode, filp);
 }
 
+static int check_direct_read(struct btrfs_fs_info *fs_info,
+                               const struct iov_iter *iter, loff_t offset)
+{
+	int ret;
+	int i, seg;
+
+	ret = check_direct_IO(fs_info, iter, offset);
+	if (ret < 0)
+		return ret;
+
+	for (seg = 0; seg < iter->nr_segs; seg++)
+		for (i = seg + 1; i < iter->nr_segs; i++)
+			if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
+				return -EINVAL;
+	return 0;
+}
+
+static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	ssize_t ret;
+
+	if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
+		return 0;
+
+	inode_lock_shared(inode);
+        ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dops,
+			   is_sync_kiocb(iocb));
+	inode_unlock_shared(inode);
+	return ret;
+}
+
+static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	ssize_t ret = 0;
+
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		ret = btrfs_direct_read(iocb, to);
+		if (ret < 0)
+			return ret;
+	}
+
+	return generic_file_buffered_read(iocb, to, ret);
+}
+
 const struct file_operations btrfs_file_operations = {
 	.llseek		= btrfs_file_llseek,
-	.read_iter      = generic_file_read_iter,
+	.read_iter      = btrfs_file_read_iter,
 	.splice_read	= generic_file_splice_read,
 	.write_iter	= btrfs_file_write_iter,
 	.mmap		= btrfs_file_mmap,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3613da065a73..55955bd424d7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -82,7 +82,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
 	 * sure NOFS is set to keep us from deadlocking.
 	 */
 	nofs_flag = memalloc_nofs_save();
-	inode = btrfs_iget_path(fs_info->sb, &location, root, path);
+	inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
 	btrfs_release_path(path);
 	memalloc_nofs_restore(nofs_flag);
 	if (IS_ERR(inode))
@@ -1190,13 +1190,10 @@ out:
 	if (ret) {
 		invalidate_inode_pages2(inode->i_mapping);
 		BTRFS_I(inode)->generation = 0;
-		if (block_group) {
-#ifdef CONFIG_BTRFS_DEBUG
-			btrfs_err(root->fs_info,
-				  "failed to write free space cache for block group %llu",
-				  block_group->start);
-#endif
-		}
+		if (block_group)
+			btrfs_debug(root->fs_info,
+	  "failed to write free space cache for block group %llu error %d",
+				  block_group->start, ret);
 	}
 	btrfs_update_inode(trans, root, inode);
 
@@ -1415,11 +1412,9 @@ int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
 	ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
 				block_group, &block_group->io_ctl, trans);
 	if (ret) {
-#ifdef CONFIG_BTRFS_DEBUG
-		btrfs_err(fs_info,
-			  "failed to write free space cache for block group %llu",
-			  block_group->start);
-#endif
+		btrfs_debug(fs_info,
+	  "failed to write free space cache for block group %llu error %d",
+			  block_group->start, ret);
 		spin_lock(&block_group->lock);
 		block_group->disk_cache_state = BTRFS_DC_ERROR;
 		spin_unlock(&block_group->lock);
@@ -3762,46 +3757,6 @@ out:
 	return ret;
 }
 
-void btrfs_get_block_group_trimming(struct btrfs_block_group *cache)
-{
-	atomic_inc(&cache->trimming);
-}
-
-void btrfs_put_block_group_trimming(struct btrfs_block_group *block_group)
-{
-	struct btrfs_fs_info *fs_info = block_group->fs_info;
-	struct extent_map_tree *em_tree;
-	struct extent_map *em;
-	bool cleanup;
-
-	spin_lock(&block_group->lock);
-	cleanup = (atomic_dec_and_test(&block_group->trimming) &&
-		   block_group->removed);
-	spin_unlock(&block_group->lock);
-
-	if (cleanup) {
-		mutex_lock(&fs_info->chunk_mutex);
-		em_tree = &fs_info->mapping_tree;
-		write_lock(&em_tree->lock);
-		em = lookup_extent_mapping(em_tree, block_group->start,
-					   1);
-		BUG_ON(!em); /* logic error, can't happen */
-		remove_extent_mapping(em_tree, em);
-		write_unlock(&em_tree->lock);
-		mutex_unlock(&fs_info->chunk_mutex);
-
-		/* once for us and once for the tree */
-		free_extent_map(em);
-		free_extent_map(em);
-
-		/*
-		 * We've left one free space entry and other tasks trimming
-		 * this block group have left 1 entry each one. Free them.
-		 */
-		__btrfs_remove_free_space_cache(block_group->free_space_ctl);
-	}
-}
-
 int btrfs_trim_block_group(struct btrfs_block_group *block_group,
 			   u64 *trimmed, u64 start, u64 end, u64 minlen)
 {
@@ -3816,7 +3771,7 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group,
 		spin_unlock(&block_group->lock);
 		return 0;
 	}
-	btrfs_get_block_group_trimming(block_group);
+	btrfs_freeze_block_group(block_group);
 	spin_unlock(&block_group->lock);
 
 	ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
@@ -3829,7 +3784,7 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group,
 	if (rem)
 		reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
 out:
-	btrfs_put_block_group_trimming(block_group);
+	btrfs_unfreeze_block_group(block_group);
 	return ret;
 }
 
@@ -3846,11 +3801,11 @@ int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
 		spin_unlock(&block_group->lock);
 		return 0;
 	}
-	btrfs_get_block_group_trimming(block_group);
+	btrfs_freeze_block_group(block_group);
 	spin_unlock(&block_group->lock);
 
 	ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
-	btrfs_put_block_group_trimming(block_group);
+	btrfs_unfreeze_block_group(block_group);
 
 	return ret;
 }
@@ -3868,13 +3823,13 @@ int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
 		spin_unlock(&block_group->lock);
 		return 0;
 	}
-	btrfs_get_block_group_trimming(block_group);
+	btrfs_freeze_block_group(block_group);
 	spin_unlock(&block_group->lock);
 
 	ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
 			   async);
 
-	btrfs_put_block_group_trimming(block_group);
+	btrfs_unfreeze_block_group(block_group);
 
 	return ret;
 }
@@ -4035,11 +3990,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
 		if (release_metadata)
 			btrfs_delalloc_release_metadata(BTRFS_I(inode),
 					inode->i_size, true);
-#ifdef CONFIG_BTRFS_DEBUG
-		btrfs_err(fs_info,
-			  "failed to write free ino cache for root %llu",
-			  root->root_key.objectid);
-#endif
+		btrfs_debug(fs_info,
+			  "failed to write free ino cache for root %llu error %d",
+			  root->root_key.objectid, ret);
 	}
 
 	return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 320d1062068d..768c8be4c765 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5,7 +5,6 @@
 
 #include <linux/kernel.h>
 #include <linux/bio.h>
-#include <linux/buffer_head.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
@@ -49,17 +48,18 @@
 #include "qgroup.h"
 #include "delalloc-space.h"
 #include "block-group.h"
+#include "space-info.h"
 
 struct btrfs_iget_args {
-	struct btrfs_key *location;
+	u64 ino;
 	struct btrfs_root *root;
 };
 
 struct btrfs_dio_data {
 	u64 reserve;
-	u64 unsubmitted_oe_range_start;
-	u64 unsubmitted_oe_range_end;
-	int overwrite;
+	loff_t length;
+	ssize_t submitted;
+	struct extent_changeset *data_reserved;
 };
 
 static const struct inode_operations btrfs_dir_inode_operations;
@@ -1142,7 +1142,7 @@ out_unlock:
 	 */
 	if (extent_reserved) {
 		extent_clear_unlock_delalloc(inode, start,
-					     start + cur_alloc_size,
+					     start + cur_alloc_size - 1,
 					     locked_page,
 					     clear_bits,
 					     page_ops);
@@ -1355,6 +1355,66 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
 	return 1;
 }
 
+static int fallback_to_cow(struct inode *inode, struct page *locked_page,
+			   const u64 start, const u64 end,
+			   int *page_started, unsigned long *nr_written)
+{
+	const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+	const u64 range_bytes = end + 1 - start;
+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+	u64 range_start = start;
+	u64 count;
+
+	/*
+	 * If EXTENT_NORESERVE is set it means that when the buffered write was
+	 * made we had not enough available data space and therefore we did not
+	 * reserve data space for it, since we though we could do NOCOW for the
+	 * respective file range (either there is prealloc extent or the inode
+	 * has the NOCOW bit set).
+	 *
+	 * However when we need to fallback to COW mode (because for example the
+	 * block group for the corresponding extent was turned to RO mode by a
+	 * scrub or relocation) we need to do the following:
+	 *
+	 * 1) We increment the bytes_may_use counter of the data space info.
+	 *    If COW succeeds, it allocates a new data extent and after doing
+	 *    that it decrements the space info's bytes_may_use counter and
+	 *    increments its bytes_reserved counter by the same amount (we do
+	 *    this at btrfs_add_reserved_bytes()). So we need to increment the
+	 *    bytes_may_use counter to compensate (when space is reserved at
+	 *    buffered write time, the bytes_may_use counter is incremented);
+	 *
+	 * 2) We clear the EXTENT_NORESERVE bit from the range. We do this so
+	 *    that if the COW path fails for any reason, it decrements (through
+	 *    extent_clear_unlock_delalloc()) the bytes_may_use counter of the
+	 *    data space info, which we incremented in the step above.
+	 *
+	 * If we need to fallback to cow and the inode corresponds to a free
+	 * space cache inode, we must also increment bytes_may_use of the data
+	 * space_info for the same reason. Space caches always get a prealloc
+	 * extent for them, however scrub or balance may have set the block
+	 * group that contains that extent to RO mode.
+	 */
+	count = count_range_bits(io_tree, &range_start, end, range_bytes,
+				 EXTENT_NORESERVE, 0);
+	if (count > 0 || is_space_ino) {
+		const u64 bytes = is_space_ino ? range_bytes : count;
+		struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+		struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+
+		spin_lock(&sinfo->lock);
+		btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
+		spin_unlock(&sinfo->lock);
+
+		if (count > 0)
+			clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE,
+					 0, 0, NULL);
+	}
+
+	return cow_file_range(inode, locked_page, start, end, page_started,
+			      nr_written, 1);
+}
+
 /*
  * when nowcow writeback call back.  This checks for snapshots or COW copies
  * of the extents that exist in the file, and COWs the file as required.
@@ -1602,9 +1662,9 @@ out_check:
 		 * NOCOW, following one which needs to be COW'ed
 		 */
 		if (cow_start != (u64)-1) {
-			ret = cow_file_range(inode, locked_page,
-					     cow_start, found_key.offset - 1,
-					     page_started, nr_written, 1);
+			ret = fallback_to_cow(inode, locked_page, cow_start,
+					      found_key.offset - 1,
+					      page_started, nr_written);
 			if (ret) {
 				if (nocow)
 					btrfs_dec_nocow_writers(fs_info,
@@ -1693,8 +1753,8 @@ out_check:
 
 	if (cow_start != (u64)-1) {
 		cur_offset = end;
-		ret = cow_file_range(inode, locked_page, cow_start, end,
-				     page_started, nr_written, 1);
+		ret = fallback_to_cow(inode, locked_page, cow_start, end,
+				      page_started, nr_written);
 		if (ret)
 			goto error;
 	}
@@ -2726,10 +2786,9 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
 	btrfs_queue_work(wq, &ordered_extent->work);
 }
 
-static int __readpage_endio_check(struct inode *inode,
-				  struct btrfs_io_bio *io_bio,
-				  int icsum, struct page *page,
-				  int pgoff, u64 start, size_t len)
+static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
+			   int icsum, struct page *page, int pgoff, u64 start,
+			   size_t len)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
@@ -2743,9 +2802,7 @@ static int __readpage_endio_check(struct inode *inode,
 	kaddr = kmap_atomic(page);
 	shash->tfm = fs_info->csum_shash;
 
-	crypto_shash_init(shash);
-	crypto_shash_update(shash, kaddr + pgoff, len);
-	crypto_shash_final(shash, csum);
+	crypto_shash_digest(shash, kaddr + pgoff, len, csum);
 
 	if (memcmp(csum, csum_expected, csum_size))
 		goto zeroit;
@@ -2790,8 +2847,8 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	}
 
 	phy_offset >>= inode->i_sb->s_blocksize_bits;
-	return __readpage_endio_check(inode, io_bio, phy_offset, page, offset,
-				      start, (size_t)(end - start + 1));
+	return check_data_csum(inode, io_bio, phy_offset, page, offset, start,
+			       (size_t)(end - start + 1));
 }
 
 /*
@@ -2981,7 +3038,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 		found_key.objectid = found_key.offset;
 		found_key.type = BTRFS_INODE_ITEM_KEY;
 		found_key.offset = 0;
-		inode = btrfs_iget(fs_info->sb, &found_key, root);
+		inode = btrfs_iget(fs_info->sb, last_objectid, root);
 		ret = PTR_ERR_OR_ZERO(inode);
 		if (ret && ret != -ENOENT)
 			goto out;
@@ -3000,18 +3057,16 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 			 * orphan must not get deleted.
 			 * find_dead_roots already ran before us, so if this
 			 * is a snapshot deletion, we should find the root
-			 * in the dead_roots list
+			 * in the fs_roots radix tree.
 			 */
-			spin_lock(&fs_info->trans_lock);
-			list_for_each_entry(dead_root, &fs_info->dead_roots,
-					    root_list) {
-				if (dead_root->root_key.objectid ==
-				    found_key.objectid) {
-					is_dead_root = 1;
-					break;
-				}
-			}
-			spin_unlock(&fs_info->trans_lock);
+
+			spin_lock(&fs_info->fs_roots_radix_lock);
+			dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
+							 (unsigned long)found_key.objectid);
+			if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
+				is_dead_root = 1;
+			spin_unlock(&fs_info->fs_roots_radix_lock);
+
 			if (is_dead_root) {
 				/* prevent this orphan from being found again */
 				key.offset = found_key.objectid - 1;
@@ -3357,43 +3412,40 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 
 	btrfs_init_map_token(&token, leaf);
 
-	btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
-	btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
-	btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
-				   &token);
-	btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
-	btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
-
-	btrfs_set_token_timespec_sec(leaf, &item->atime,
-				     inode->i_atime.tv_sec, &token);
-	btrfs_set_token_timespec_nsec(leaf, &item->atime,
-				      inode->i_atime.tv_nsec, &token);
-
-	btrfs_set_token_timespec_sec(leaf, &item->mtime,
-				     inode->i_mtime.tv_sec, &token);
-	btrfs_set_token_timespec_nsec(leaf, &item->mtime,
-				      inode->i_mtime.tv_nsec, &token);
-
-	btrfs_set_token_timespec_sec(leaf, &item->ctime,
-				     inode->i_ctime.tv_sec, &token);
-	btrfs_set_token_timespec_nsec(leaf, &item->ctime,
-				      inode->i_ctime.tv_nsec, &token);
-
-	btrfs_set_token_timespec_sec(leaf, &item->otime,
-				     BTRFS_I(inode)->i_otime.tv_sec, &token);
-	btrfs_set_token_timespec_nsec(leaf, &item->otime,
-				      BTRFS_I(inode)->i_otime.tv_nsec, &token);
-
-	btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
-				     &token);
-	btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
-					 &token);
-	btrfs_set_token_inode_sequence(leaf, item, inode_peek_iversion(inode),
-				       &token);
-	btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
-	btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
-	btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
-	btrfs_set_token_inode_block_group(leaf, item, 0, &token);
+	btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
+	btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
+	btrfs_set_token_inode_size(&token, item, BTRFS_I(inode)->disk_i_size);
+	btrfs_set_token_inode_mode(&token, item, inode->i_mode);
+	btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
+
+	btrfs_set_token_timespec_sec(&token, &item->atime,
+				     inode->i_atime.tv_sec);
+	btrfs_set_token_timespec_nsec(&token, &item->atime,
+				      inode->i_atime.tv_nsec);
+
+	btrfs_set_token_timespec_sec(&token, &item->mtime,
+				     inode->i_mtime.tv_sec);
+	btrfs_set_token_timespec_nsec(&token, &item->mtime,
+				      inode->i_mtime.tv_nsec);
+
+	btrfs_set_token_timespec_sec(&token, &item->ctime,
+				     inode->i_ctime.tv_sec);
+	btrfs_set_token_timespec_nsec(&token, &item->ctime,
+				      inode->i_ctime.tv_nsec);
+
+	btrfs_set_token_timespec_sec(&token, &item->otime,
+				     BTRFS_I(inode)->i_otime.tv_sec);
+	btrfs_set_token_timespec_nsec(&token, &item->otime,
+				      BTRFS_I(inode)->i_otime.tv_nsec);
+
+	btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
+	btrfs_set_token_inode_generation(&token, item,
+					 BTRFS_I(inode)->generation);
+	btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
+	btrfs_set_token_inode_transid(&token, item, trans->transid);
+	btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
+	btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
+	btrfs_set_token_inode_block_group(&token, item, 0);
 }
 
 /*
@@ -3618,7 +3670,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
 	 * 1 for the inode ref
 	 * 1 for the inode
 	 */
-	return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
+	return btrfs_start_transaction_fallback_global_rsv(root, 5);
 }
 
 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
@@ -4108,11 +4160,12 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 	BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
 
 	/*
-	 * for non-free space inodes and ref cows, we want to back off from
-	 * time to time
+	 * For non-free space inodes and non-shareable roots, we want to back
+	 * off from time to time.  This means all inodes in subvolume roots,
+	 * reloc roots, and data reloc roots.
 	 */
 	if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
-	    test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+	    test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
 		be_nice = true;
 
 	path = btrfs_alloc_path();
@@ -4120,20 +4173,19 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 		return -ENOMEM;
 	path->reada = READA_BACK;
 
-	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
+	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
 		lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1,
 				 &cached_state);
 
-	/*
-	 * We want to drop from the next block forward in case this new size is
-	 * not block aligned since we will be keeping the last block of the
-	 * extent just the way it is.
-	 */
-	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
-	    root == fs_info->tree_root)
+		/*
+		 * We want to drop from the next block forward in case this
+		 * new size is not block aligned since we will be keeping the
+		 * last block of the extent just the way it is.
+		 */
 		btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
 					fs_info->sectorsize),
 					(u64)-1, 0);
+	}
 
 	/*
 	 * This function is also used to drop the items in the log tree before
@@ -4241,7 +4293,7 @@ search_again:
 							 extent_num_bytes);
 				num_dec = (orig_num_bytes -
 					   extent_num_bytes);
-				if (test_bit(BTRFS_ROOT_REF_COWS,
+				if (test_bit(BTRFS_ROOT_SHAREABLE,
 					     &root->state) &&
 				    extent_start != 0)
 					inode_sub_bytes(inode, num_dec);
@@ -4257,7 +4309,7 @@ search_again:
 				num_dec = btrfs_file_extent_num_bytes(leaf, fi);
 				if (extent_start != 0) {
 					found_extent = 1;
-					if (test_bit(BTRFS_ROOT_REF_COWS,
+					if (test_bit(BTRFS_ROOT_SHAREABLE,
 						     &root->state))
 						inode_sub_bytes(inode, num_dec);
 				}
@@ -4293,7 +4345,7 @@ search_again:
 				clear_len = fs_info->sectorsize;
 			}
 
-			if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+			if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
 				inode_sub_bytes(inode, item_end + 1 - new_size);
 		}
 delete:
@@ -4334,8 +4386,7 @@ delete:
 		should_throttle = false;
 
 		if (found_extent &&
-		    (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
-		     root == fs_info->tree_root)) {
+		    root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
 			struct btrfs_ref ref = { 0 };
 
 			bytes_deleted += extent_num_bytes;
@@ -4759,10 +4810,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 
 		truncate_setsize(inode, newsize);
 
-		/* Disable nonlocked read DIO to avoid the endless truncate */
-		btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
 		inode_dio_wait(inode);
-		btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
 
 		ret = btrfs_truncate(inode, newsize == oldsize);
 		if (ret && inode->i_nlink) {
@@ -4856,8 +4904,8 @@ static void evict_inode_truncate_pages(struct inode *inode)
 
 	/*
 	 * Keep looping until we have no more ranges in the io tree.
-	 * We can have ongoing bios started by readpages (called from readahead)
-	 * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+	 * We can have ongoing bios started by readahead that have
+	 * their endio callback (extent_io.c:end_bio_extent_readpage)
 	 * still in progress (unlocked the pages in the bio but did not yet
 	 * unlocked the ranges in the io tree). Therefore this means some
 	 * ranges can still be locked and eviction started because before
@@ -5154,7 +5202,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
 
 	btrfs_release_path(path);
 
-	new_root = btrfs_get_fs_root(fs_info, location, true);
+	new_root = btrfs_get_fs_root(fs_info, location->objectid, true);
 	if (IS_ERR(new_root)) {
 		err = PTR_ERR(new_root);
 		goto out;
@@ -5232,9 +5280,11 @@ static void inode_tree_del(struct inode *inode)
 static int btrfs_init_locked_inode(struct inode *inode, void *p)
 {
 	struct btrfs_iget_args *args = p;
-	inode->i_ino = args->location->objectid;
-	memcpy(&BTRFS_I(inode)->location, args->location,
-	       sizeof(*args->location));
+
+	inode->i_ino = args->ino;
+	BTRFS_I(inode)->location.objectid = args->ino;
+	BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
+	BTRFS_I(inode)->location.offset = 0;
 	BTRFS_I(inode)->root = btrfs_grab_root(args->root);
 	BUG_ON(args->root && !BTRFS_I(inode)->root);
 	return 0;
@@ -5243,19 +5293,19 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
 static int btrfs_find_actor(struct inode *inode, void *opaque)
 {
 	struct btrfs_iget_args *args = opaque;
-	return args->location->objectid == BTRFS_I(inode)->location.objectid &&
+
+	return args->ino == BTRFS_I(inode)->location.objectid &&
 		args->root == BTRFS_I(inode)->root;
 }
 
-static struct inode *btrfs_iget_locked(struct super_block *s,
-				       struct btrfs_key *location,
+static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino,
 				       struct btrfs_root *root)
 {
 	struct inode *inode;
 	struct btrfs_iget_args args;
-	unsigned long hashval = btrfs_inode_hash(location->objectid, root);
+	unsigned long hashval = btrfs_inode_hash(ino, root);
 
-	args.location = location;
+	args.ino = ino;
 	args.root = root;
 
 	inode = iget5_locked(s, hashval, btrfs_find_actor,
@@ -5265,17 +5315,17 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
 }
 
 /*
- * Get an inode object given its location and corresponding root.
+ * Get an inode object given its inode number and corresponding root.
  * Path can be preallocated to prevent recursing back to iget through
  * allocator. NULL is also valid but may require an additional allocation
  * later.
  */
-struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
+struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
 			      struct btrfs_root *root, struct btrfs_path *path)
 {
 	struct inode *inode;
 
-	inode = btrfs_iget_locked(s, location, root);
+	inode = btrfs_iget_locked(s, ino, root);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
@@ -5302,10 +5352,9 @@ struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
 	return inode;
 }
 
-struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
-			 struct btrfs_root *root)
+struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root)
 {
-	return btrfs_iget_path(s, location, root, NULL);
+	return btrfs_iget_path(s, ino, root, NULL);
 }
 
 static struct inode *new_simple_dir(struct super_block *s,
@@ -5374,7 +5423,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 		return ERR_PTR(ret);
 
 	if (location.type == BTRFS_INODE_ITEM_KEY) {
-		inode = btrfs_iget(dir->i_sb, &location, root);
+		inode = btrfs_iget(dir->i_sb, location.objectid, root);
 		if (IS_ERR(inode))
 			return inode;
 
@@ -5398,7 +5447,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 		else
 			inode = new_simple_dir(dir->i_sb, &location, sub_root);
 	} else {
-		inode = btrfs_iget(dir->i_sb, &location, sub_root);
+		inode = btrfs_iget(dir->i_sb, location.objectid, sub_root);
 	}
 	if (root != sub_root)
 		btrfs_put_root(sub_root);
@@ -5779,7 +5828,8 @@ int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
 static int btrfs_insert_inode_locked(struct inode *inode)
 {
 	struct btrfs_iget_args args;
-	args.location = &BTRFS_I(inode)->location;
+
+	args.ino = BTRFS_I(inode)->location.objectid;
 	args.root = BTRFS_I(inode)->root;
 
 	return insert_inode_locked4(inode,
@@ -6991,7 +7041,7 @@ out:
 }
 
 static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
-			      struct extent_state **cached_state, int writing)
+			      struct extent_state **cached_state, bool writing)
 {
 	struct btrfs_ordered_extent *ordered;
 	int ret = 0;
@@ -7050,11 +7100,11 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 			 * for it to complete) and then invalidate the pages for
 			 * this range (through invalidate_inode_pages2_range()),
 			 * but that can lead us to a deadlock with a concurrent
-			 * call to readpages() (a buffered read or a defrag call
+			 * call to readahead (a buffered read or a defrag call
 			 * triggered a readahead) on a page lock due to an
 			 * ordered dio extent we created before but did not have
 			 * yet a corresponding bio submitted (whence it can not
-			 * complete), which makes readpages() wait for that
+			 * complete), which makes readahead wait for that
 			 * ordered extent to complete while holding a lock on
 			 * that page.
 			 */
@@ -7129,30 +7179,7 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
 }
 
 
-static int btrfs_get_blocks_direct_read(struct extent_map *em,
-					struct buffer_head *bh_result,
-					struct inode *inode,
-					u64 start, u64 len)
-{
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-
-	if (em->block_start == EXTENT_MAP_HOLE ||
-			test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
-		return -ENOENT;
-
-	len = min(len, em->len - (start - em->start));
-
-	bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
-		inode->i_blkbits;
-	bh_result->b_size = len;
-	bh_result->b_bdev = fs_info->fs_devices->latest_bdev;
-	set_buffer_mapped(bh_result);
-
-	return 0;
-}
-
 static int btrfs_get_blocks_direct_write(struct extent_map **map,
-					 struct buffer_head *bh_result,
 					 struct inode *inode,
 					 struct btrfs_dio_data *dio_data,
 					 u64 start, u64 len)
@@ -7214,7 +7241,6 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
 	}
 
 	/* this will cow the extent */
-	len = bh_result->b_size;
 	free_extent_map(em);
 	*map = em = btrfs_new_extent_direct(inode, start, len);
 	if (IS_ERR(em)) {
@@ -7225,64 +7251,73 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
 	len = min(len, em->len - (start - em->start));
 
 skip_cow:
-	bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
-		inode->i_blkbits;
-	bh_result->b_size = len;
-	bh_result->b_bdev = fs_info->fs_devices->latest_bdev;
-	set_buffer_mapped(bh_result);
-
-	if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
-		set_buffer_new(bh_result);
-
 	/*
 	 * Need to update the i_size under the extent lock so buffered
 	 * readers will get the updated i_size when we unlock.
 	 */
-	if (!dio_data->overwrite && start + len > i_size_read(inode))
+	if (start + len > i_size_read(inode))
 		i_size_write(inode, start + len);
 
-	WARN_ON(dio_data->reserve < len);
 	dio_data->reserve -= len;
-	dio_data->unsubmitted_oe_range_end = start + len;
-	current->journal_info = dio_data;
 out:
 	return ret;
 }
 
-static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
-				   struct buffer_head *bh_result, int create)
+static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
+		loff_t length, unsigned flags, struct iomap *iomap,
+		struct iomap *srcmap)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct extent_map *em;
 	struct extent_state *cached_state = NULL;
 	struct btrfs_dio_data *dio_data = NULL;
-	u64 start = iblock << inode->i_blkbits;
 	u64 lockstart, lockend;
-	u64 len = bh_result->b_size;
+	const bool write = !!(flags & IOMAP_WRITE);
 	int ret = 0;
+	u64 len = length;
+	bool unlock_extents = false;
 
-	if (!create)
+	if (!write)
 		len = min_t(u64, len, fs_info->sectorsize);
 
 	lockstart = start;
 	lockend = start + len - 1;
 
-	if (current->journal_info) {
-		/*
-		 * Need to pull our outstanding extents and set journal_info to NULL so
-		 * that anything that needs to check if there's a transaction doesn't get
-		 * confused.
-		 */
-		dio_data = current->journal_info;
-		current->journal_info = NULL;
+	/*
+	 * The generic stuff only does filemap_write_and_wait_range, which
+	 * isn't enough if we've written compressed pages to this area, so we
+	 * need to flush the dirty pages again to make absolutely sure that any
+	 * outstanding dirty pages are on disk.
+	 */
+	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+		     &BTRFS_I(inode)->runtime_flags))
+		ret = filemap_fdatawrite_range(inode->i_mapping, start,
+					       start + length - 1);
+
+	dio_data = kzalloc(sizeof(*dio_data), GFP_NOFS);
+	if (!dio_data)
+		return -ENOMEM;
+
+	dio_data->length = length;
+	if (write) {
+		dio_data->reserve = round_up(length, fs_info->sectorsize);
+		ret = btrfs_delalloc_reserve_space(inode,
+				&dio_data->data_reserved,
+				start, dio_data->reserve);
+		if (ret) {
+			extent_changeset_free(dio_data->data_reserved);
+			kfree(dio_data);
+			return ret;
+		}
 	}
+	iomap->private = dio_data;
+
 
 	/*
 	 * If this errors out it's because we couldn't invalidate pagecache for
 	 * this range and we need to fallback to buffered.
 	 */
-	if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
-			       create)) {
+	if (lock_extent_direct(inode, lockstart, lockend, &cached_state, write)) {
 		ret = -ENOTBLK;
 		goto err;
 	}
@@ -7314,35 +7349,47 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 		goto unlock_err;
 	}
 
-	if (create) {
-		ret = btrfs_get_blocks_direct_write(&em, bh_result, inode,
-						    dio_data, start, len);
+	len = min(len, em->len - (start - em->start));
+	if (write) {
+		ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
+						    start, len);
 		if (ret < 0)
 			goto unlock_err;
-
-		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
-				     lockend, &cached_state);
+		unlock_extents = true;
+		/* Recalc len in case the new em is smaller than requested */
+		len = min(len, em->len - (start - em->start));
 	} else {
-		ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
-						   start, len);
-		/* Can be negative only if we read from a hole */
-		if (ret < 0) {
-			ret = 0;
-			free_extent_map(em);
-			goto unlock_err;
-		}
 		/*
 		 * We need to unlock only the end area that we aren't using.
 		 * The rest is going to be unlocked by the endio routine.
 		 */
-		lockstart = start + bh_result->b_size;
-		if (lockstart < lockend) {
-			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
-					     lockstart, lockend, &cached_state);
-		} else {
-			free_extent_state(cached_state);
-		}
+		lockstart = start + len;
+		if (lockstart < lockend)
+			unlock_extents = true;
+	}
+
+	if (unlock_extents)
+		unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+				     lockstart, lockend, &cached_state);
+	else
+		free_extent_state(cached_state);
+
+	/*
+	 * Translate extent map information to iomap.
+	 * We trim the extents (and move the addr) even though iomap code does
+	 * that, since we have locked only the parts we are performing I/O in.
+	 */
+	if ((em->block_start == EXTENT_MAP_HOLE) ||
+	    (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && !write)) {
+		iomap->addr = IOMAP_NULL_ADDR;
+		iomap->type = IOMAP_HOLE;
+	} else {
+		iomap->addr = em->block_start + (start - em->start);
+		iomap->type = IOMAP_MAPPED;
 	}
+	iomap->offset = start;
+	iomap->bdev = fs_info->fs_devices->latest_bdev;
+	iomap->length = len;
 
 	free_extent_map(em);
 
@@ -7352,370 +7399,152 @@ unlock_err:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
 			     &cached_state);
 err:
-	if (dio_data)
-		current->journal_info = dio_data;
+	if (dio_data) {
+		btrfs_delalloc_release_space(inode, dio_data->data_reserved,
+				start, dio_data->reserve, true);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->reserve);
+		extent_changeset_free(dio_data->data_reserved);
+		kfree(dio_data);
+	}
 	return ret;
 }
 
-static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
-						 struct bio *bio,
-						 int mirror_num)
+static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+		ssize_t written, unsigned flags, struct iomap *iomap)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	blk_status_t ret;
+	int ret = 0;
+	struct btrfs_dio_data *dio_data = iomap->private;
+	size_t submitted = dio_data->submitted;
+	const bool write = !!(flags & IOMAP_WRITE);
 
-	BUG_ON(bio_op(bio) == REQ_OP_WRITE);
+	if (!write && (iomap->type == IOMAP_HOLE)) {
+		/* If reading from a hole, unlock and return */
+		unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1);
+		goto out;
+	}
 
-	ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DIO_REPAIR);
-	if (ret)
-		return ret;
+	if (submitted < length) {
+		pos += submitted;
+		length -= submitted;
+		if (write)
+			__endio_write_update_ordered(inode, pos, length, false);
+		else
+			unlock_extent(&BTRFS_I(inode)->io_tree, pos,
+				      pos + length - 1);
+		ret = -ENOTBLK;
+	}
 
-	ret = btrfs_map_bio(fs_info, bio, mirror_num);
+	if (write) {
+		if (dio_data->reserve)
+			btrfs_delalloc_release_space(inode,
+					dio_data->data_reserved, pos,
+					dio_data->reserve, true);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->length);
+		extent_changeset_free(dio_data->data_reserved);
+	}
+out:
+	kfree(dio_data);
+	iomap->private = NULL;
 
 	return ret;
 }
 
-static int btrfs_check_dio_repairable(struct inode *inode,
-				      struct bio *failed_bio,
-				      struct io_failure_record *failrec,
-				      int failed_mirror)
+static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	int num_copies;
-
-	num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
-	if (num_copies == 1) {
-		/*
-		 * we only have a single copy of the data, so don't bother with
-		 * all the retry and error correction code that follows. no
-		 * matter what the error is, it is very likely to persist.
-		 */
-		btrfs_debug(fs_info,
-			"Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
-			num_copies, failrec->this_mirror, failed_mirror);
-		return 0;
-	}
-
-	failrec->failed_mirror = failed_mirror;
-	failrec->this_mirror++;
-	if (failrec->this_mirror == failed_mirror)
-		failrec->this_mirror++;
+	/*
+	 * This implies a barrier so that stores to dio_bio->bi_status before
+	 * this and loads of dio_bio->bi_status after this are fully ordered.
+	 */
+	if (!refcount_dec_and_test(&dip->refs))
+		return;
 
-	if (failrec->this_mirror > num_copies) {
-		btrfs_debug(fs_info,
-			"Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
-			num_copies, failrec->this_mirror, failed_mirror);
-		return 0;
+	if (bio_op(dip->dio_bio) == REQ_OP_WRITE) {
+		__endio_write_update_ordered(dip->inode, dip->logical_offset,
+					     dip->bytes,
+					     !dip->dio_bio->bi_status);
+	} else {
+		unlock_extent(&BTRFS_I(dip->inode)->io_tree,
+			      dip->logical_offset,
+			      dip->logical_offset + dip->bytes - 1);
 	}
 
-	return 1;
+	bio_endio(dip->dio_bio);
+	kfree(dip);
 }
 
-static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
-				   struct page *page, unsigned int pgoff,
-				   u64 start, u64 end, int failed_mirror,
-				   bio_end_io_t *repair_endio, void *repair_arg)
+static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio,
+					  int mirror_num,
+					  unsigned long bio_flags)
 {
-	struct io_failure_record *failrec;
-	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
-	struct bio *bio;
-	int isector;
-	unsigned int read_mode = 0;
-	int segs;
-	int ret;
-	blk_status_t status;
-	struct bio_vec bvec;
+	struct btrfs_dio_private *dip = bio->bi_private;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	blk_status_t ret;
 
-	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
+	BUG_ON(bio_op(bio) == REQ_OP_WRITE);
 
-	ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
+	ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
 	if (ret)
-		return errno_to_blk_status(ret);
-
-	ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
-					 failed_mirror);
-	if (!ret) {
-		free_io_failure(failure_tree, io_tree, failrec);
-		return BLK_STS_IOERR;
-	}
-
-	segs = bio_segments(failed_bio);
-	bio_get_first_bvec(failed_bio, &bvec);
-	if (segs > 1 ||
-	    (bvec.bv_len > btrfs_inode_sectorsize(inode)))
-		read_mode |= REQ_FAILFAST_DEV;
-
-	isector = start - btrfs_io_bio(failed_bio)->logical;
-	isector >>= inode->i_sb->s_blocksize_bits;
-	bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
-				pgoff, isector, repair_endio, repair_arg);
-	bio->bi_opf = REQ_OP_READ | read_mode;
-
-	btrfs_debug(BTRFS_I(inode)->root->fs_info,
-		    "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
-		    read_mode, failrec->this_mirror, failrec->in_validation);
-
-	status = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
-	if (status) {
-		free_io_failure(failure_tree, io_tree, failrec);
-		bio_put(bio);
-	}
-
-	return status;
-}
-
-struct btrfs_retry_complete {
-	struct completion done;
-	struct inode *inode;
-	u64 start;
-	int uptodate;
-};
+		return ret;
 
-static void btrfs_retry_endio_nocsum(struct bio *bio)
-{
-	struct btrfs_retry_complete *done = bio->bi_private;
-	struct inode *inode = done->inode;
-	struct bio_vec *bvec;
-	struct extent_io_tree *io_tree, *failure_tree;
-	struct bvec_iter_all iter_all;
-
-	if (bio->bi_status)
-		goto end;
-
-	ASSERT(bio->bi_vcnt == 1);
-	io_tree = &BTRFS_I(inode)->io_tree;
-	failure_tree = &BTRFS_I(inode)->io_failure_tree;
-	ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode));
-
-	done->uptodate = 1;
-	ASSERT(!bio_flagged(bio, BIO_CLONED));
-	bio_for_each_segment_all(bvec, bio, iter_all)
-		clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree,
-				 io_tree, done->start, bvec->bv_page,
-				 btrfs_ino(BTRFS_I(inode)), 0);
-end:
-	complete(&done->done);
-	bio_put(bio);
+	refcount_inc(&dip->refs);
+	ret = btrfs_map_bio(fs_info, bio, mirror_num);
+	if (ret)
+		refcount_dec(&dip->refs);
+	return ret;
 }
 
-static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode,
-						struct btrfs_io_bio *io_bio)
+static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
+					     struct btrfs_io_bio *io_bio,
+					     const bool uptodate)
 {
-	struct btrfs_fs_info *fs_info;
+	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	const u32 sectorsize = fs_info->sectorsize;
+	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+	const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
 	struct bio_vec bvec;
 	struct bvec_iter iter;
-	struct btrfs_retry_complete done;
-	u64 start;
-	unsigned int pgoff;
-	u32 sectorsize;
-	int nr_sectors;
-	blk_status_t ret;
+	u64 start = io_bio->logical;
+	int icsum = 0;
 	blk_status_t err = BLK_STS_OK;
 
-	fs_info = BTRFS_I(inode)->root->fs_info;
-	sectorsize = fs_info->sectorsize;
-
-	start = io_bio->logical;
-	done.inode = inode;
-	io_bio->bio.bi_iter = io_bio->iter;
+	__bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) {
+		unsigned int i, nr_sectors, pgoff;
 
-	bio_for_each_segment(bvec, &io_bio->bio, iter) {
 		nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
 		pgoff = bvec.bv_offset;
-
-next_block_or_try_again:
-		done.uptodate = 0;
-		done.start = start;
-		init_completion(&done.done);
-
-		ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
-				pgoff, start, start + sectorsize - 1,
-				io_bio->mirror_num,
-				btrfs_retry_endio_nocsum, &done);
-		if (ret) {
-			err = ret;
-			goto next;
-		}
-
-		wait_for_completion_io(&done.done);
-
-		if (!done.uptodate) {
-			/* We might have another mirror, so try again */
-			goto next_block_or_try_again;
-		}
-
-next:
-		start += sectorsize;
-
-		nr_sectors--;
-		if (nr_sectors) {
-			pgoff += sectorsize;
+		for (i = 0; i < nr_sectors; i++) {
 			ASSERT(pgoff < PAGE_SIZE);
-			goto next_block_or_try_again;
-		}
-	}
-
-	return err;
-}
-
-static void btrfs_retry_endio(struct bio *bio)
-{
-	struct btrfs_retry_complete *done = bio->bi_private;
-	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-	struct extent_io_tree *io_tree, *failure_tree;
-	struct inode *inode = done->inode;
-	struct bio_vec *bvec;
-	int uptodate;
-	int ret;
-	int i = 0;
-	struct bvec_iter_all iter_all;
-
-	if (bio->bi_status)
-		goto end;
-
-	uptodate = 1;
-
-	ASSERT(bio->bi_vcnt == 1);
-	ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(done->inode));
-
-	io_tree = &BTRFS_I(inode)->io_tree;
-	failure_tree = &BTRFS_I(inode)->io_failure_tree;
-
-	ASSERT(!bio_flagged(bio, BIO_CLONED));
-	bio_for_each_segment_all(bvec, bio, iter_all) {
-		ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
-					     bvec->bv_offset, done->start,
-					     bvec->bv_len);
-		if (!ret)
-			clean_io_failure(BTRFS_I(inode)->root->fs_info,
-					 failure_tree, io_tree, done->start,
-					 bvec->bv_page,
-					 btrfs_ino(BTRFS_I(inode)),
-					 bvec->bv_offset);
-		else
-			uptodate = 0;
-		i++;
-	}
-
-	done->uptodate = uptodate;
-end:
-	complete(&done->done);
-	bio_put(bio);
-}
-
-static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
-		struct btrfs_io_bio *io_bio, blk_status_t err)
-{
-	struct btrfs_fs_info *fs_info;
-	struct bio_vec bvec;
-	struct bvec_iter iter;
-	struct btrfs_retry_complete done;
-	u64 start;
-	u64 offset = 0;
-	u32 sectorsize;
-	int nr_sectors;
-	unsigned int pgoff;
-	int csum_pos;
-	bool uptodate = (err == 0);
-	int ret;
-	blk_status_t status;
-
-	fs_info = BTRFS_I(inode)->root->fs_info;
-	sectorsize = fs_info->sectorsize;
-
-	err = BLK_STS_OK;
-	start = io_bio->logical;
-	done.inode = inode;
-	io_bio->bio.bi_iter = io_bio->iter;
-
-	bio_for_each_segment(bvec, &io_bio->bio, iter) {
-		nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
-
-		pgoff = bvec.bv_offset;
-next_block:
-		if (uptodate) {
-			csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
-			ret = __readpage_endio_check(inode, io_bio, csum_pos,
-					bvec.bv_page, pgoff, start, sectorsize);
-			if (likely(!ret))
-				goto next;
-		}
-try_again:
-		done.uptodate = 0;
-		done.start = start;
-		init_completion(&done.done);
-
-		status = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
-					pgoff, start, start + sectorsize - 1,
-					io_bio->mirror_num, btrfs_retry_endio,
-					&done);
-		if (status) {
-			err = status;
-			goto next;
-		}
-
-		wait_for_completion_io(&done.done);
-
-		if (!done.uptodate) {
-			/* We might have another mirror, so try again */
-			goto try_again;
-		}
-next:
-		offset += sectorsize;
-		start += sectorsize;
-
-		ASSERT(nr_sectors);
-
-		nr_sectors--;
-		if (nr_sectors) {
+			if (uptodate &&
+			    (!csum || !check_data_csum(inode, io_bio, icsum,
+						       bvec.bv_page, pgoff,
+						       start, sectorsize))) {
+				clean_io_failure(fs_info, failure_tree, io_tree,
+						 start, bvec.bv_page,
+						 btrfs_ino(BTRFS_I(inode)),
+						 pgoff);
+			} else {
+				blk_status_t status;
+
+				status = btrfs_submit_read_repair(inode,
+							&io_bio->bio,
+							start - io_bio->logical,
+							bvec.bv_page, pgoff,
+							start,
+							start + sectorsize - 1,
+							io_bio->mirror_num,
+							submit_dio_repair_bio);
+				if (status)
+					err = status;
+			}
+			start += sectorsize;
+			icsum++;
 			pgoff += sectorsize;
-			ASSERT(pgoff < PAGE_SIZE);
-			goto next_block;
 		}
 	}
-
 	return err;
 }
 
-static blk_status_t btrfs_subio_endio_read(struct inode *inode,
-		struct btrfs_io_bio *io_bio, blk_status_t err)
-{
-	bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
-
-	if (skip_csum) {
-		if (unlikely(err))
-			return __btrfs_correct_data_nocsum(inode, io_bio);
-		else
-			return BLK_STS_OK;
-	} else {
-		return __btrfs_subio_endio_read(inode, io_bio, err);
-	}
-}
-
-static void btrfs_endio_direct_read(struct bio *bio)
-{
-	struct btrfs_dio_private *dip = bio->bi_private;
-	struct inode *inode = dip->inode;
-	struct bio *dio_bio;
-	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-	blk_status_t err = bio->bi_status;
-
-	if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
-		err = btrfs_subio_endio_read(inode, io_bio, err);
-
-	unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
-		      dip->logical_offset + dip->bytes - 1);
-	dio_bio = dip->dio_bio;
-
-	kfree(dip);
-
-	dio_bio->bi_status = err;
-	dio_end_io(dio_bio);
-	btrfs_io_bio_free_csum(io_bio);
-	bio_put(bio);
-}
-
 static void __endio_write_update_ordered(struct inode *inode,
 					 const u64 offset, const u64 bytes,
 					 const bool uptodate)
@@ -7759,21 +7588,6 @@ static void __endio_write_update_ordered(struct inode *inode,
 	}
 }
 
-static void btrfs_endio_direct_write(struct bio *bio)
-{
-	struct btrfs_dio_private *dip = bio->bi_private;
-	struct bio *dio_bio = dip->dio_bio;
-
-	__endio_write_update_ordered(dip->inode, dip->logical_offset,
-				     dip->bytes, !bio->bi_status);
-
-	kfree(dip);
-
-	dio_bio->bi_status = bio->bi_status;
-	dio_end_io(dio_bio);
-	bio_put(bio);
-}
-
 static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
 				    struct bio *bio, u64 offset)
 {
@@ -7797,64 +7611,16 @@ static void btrfs_end_dio_bio(struct bio *bio)
 			   (unsigned long long)bio->bi_iter.bi_sector,
 			   bio->bi_iter.bi_size, err);
 
-	if (dip->subio_endio)
-		err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
-
-	if (err) {
-		/*
-		 * We want to perceive the errors flag being set before
-		 * decrementing the reference count. We don't need a barrier
-		 * since atomic operations with a return value are fully
-		 * ordered as per atomic_t.txt
-		 */
-		dip->errors = 1;
+	if (bio_op(bio) == REQ_OP_READ) {
+		err = btrfs_check_read_dio_bio(dip->inode, btrfs_io_bio(bio),
+					       !err);
 	}
 
-	/* if there are more bios still pending for this dio, just exit */
-	if (!atomic_dec_and_test(&dip->pending_bios))
-		goto out;
+	if (err)
+		dip->dio_bio->bi_status = err;
 
-	if (dip->errors) {
-		bio_io_error(dip->orig_bio);
-	} else {
-		dip->dio_bio->bi_status = BLK_STS_OK;
-		bio_endio(dip->orig_bio);
-	}
-out:
 	bio_put(bio);
-}
-
-static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
-						 struct btrfs_dio_private *dip,
-						 struct bio *bio,
-						 u64 file_offset)
-{
-	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-	struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
-	u16 csum_size;
-	blk_status_t ret;
-
-	/*
-	 * We load all the csum data we need when we submit
-	 * the first bio to reduce the csum tree search and
-	 * contention.
-	 */
-	if (dip->logical_offset == file_offset) {
-		ret = btrfs_lookup_bio_sums(inode, dip->orig_bio, file_offset,
-					    NULL);
-		if (ret)
-			return ret;
-	}
-
-	if (bio == dip->orig_bio)
-		return 0;
-
-	file_offset -= dip->logical_offset;
-	file_offset >>= inode->i_sb->s_blocksize_bits;
-	csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy);
-	io_bio->csum = orig_io_bio->csum + csum_size * file_offset;
-
-	return 0;
+	btrfs_dio_private_put(dip);
 }
 
 static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
@@ -7892,10 +7658,12 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 		if (ret)
 			goto err;
 	} else {
-		ret = btrfs_lookup_and_bind_dio_csum(inode, dip, bio,
-						     file_offset);
-		if (ret)
-			goto err;
+		u64 csum_offset;
+
+		csum_offset = file_offset - dip->logical_offset;
+		csum_offset >>= inode->i_sb->s_blocksize_bits;
+		csum_offset *= btrfs_super_csum_size(fs_info->super_copy);
+		btrfs_io_bio(bio)->csum = dip->csums + csum_offset;
 	}
 map:
 	ret = btrfs_map_bio(fs_info, bio, 0);
@@ -7903,14 +7671,53 @@ err:
 	return ret;
 }
 
-static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
+/*
+ * If this succeeds, the btrfs_dio_private is responsible for cleaning up locked
+ * or ordered extents whether or not we submit any bios.
+ */
+static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
+							  struct inode *inode,
+							  loff_t file_offset)
 {
-	struct inode *inode = dip->inode;
+	const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
+	const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
+	size_t dip_size;
+	struct btrfs_dio_private *dip;
+
+	dip_size = sizeof(*dip);
+	if (!write && csum) {
+		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+		const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+		size_t nblocks;
+
+		nblocks = dio_bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
+		dip_size += csum_size * nblocks;
+	}
+
+	dip = kzalloc(dip_size, GFP_NOFS);
+	if (!dip)
+		return NULL;
+
+	dip->inode = inode;
+	dip->logical_offset = file_offset;
+	dip->bytes = dio_bio->bi_iter.bi_size;
+	dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
+	dip->dio_bio = dio_bio;
+	refcount_set(&dip->refs, 1);
+	return dip;
+}
+
+static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
+		struct bio *dio_bio, loff_t file_offset)
+{
+	const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
+	const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
+			     BTRFS_BLOCK_GROUP_RAID56_MASK);
+	struct btrfs_dio_private *dip;
 	struct bio *bio;
-	struct bio *orig_bio = dip->orig_bio;
-	u64 start_sector = orig_bio->bi_iter.bi_sector;
-	u64 file_offset = dip->logical_offset;
+	u64 start_sector;
 	int async_submit = 0;
 	u64 submit_len;
 	int clone_offset = 0;
@@ -7918,330 +7725,108 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
 	int ret;
 	blk_status_t status;
 	struct btrfs_io_geometry geom;
+	struct btrfs_dio_data *dio_data = iomap->private;
 
-	submit_len = orig_bio->bi_iter.bi_size;
-	ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
-				    start_sector << 9, submit_len, &geom);
-	if (ret)
-		return -EIO;
+	dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
+	if (!dip) {
+		if (!write) {
+			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
+				file_offset + dio_bio->bi_iter.bi_size - 1);
+		}
+		dio_bio->bi_status = BLK_STS_RESOURCE;
+		bio_endio(dio_bio);
+		return BLK_QC_T_NONE;
+	}
 
-	if (geom.len >= submit_len) {
-		bio = orig_bio;
-		dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
-		goto submit;
+	if (!write && csum) {
+		/*
+		 * Load the csums up front to reduce csum tree searches and
+		 * contention when submitting bios.
+		 */
+		status = btrfs_lookup_bio_sums(inode, dio_bio, file_offset,
+					       dip->csums);
+		if (status != BLK_STS_OK)
+			goto out_err;
 	}
 
-	/* async crcs make it difficult to collect full stripe writes. */
-	if (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK)
-		async_submit = 0;
-	else
-		async_submit = 1;
+	start_sector = dio_bio->bi_iter.bi_sector;
+	submit_len = dio_bio->bi_iter.bi_size;
 
-	/* bio split */
-	ASSERT(geom.len <= INT_MAX);
-	atomic_inc(&dip->pending_bios);
 	do {
+		ret = btrfs_get_io_geometry(fs_info, btrfs_op(dio_bio),
+					    start_sector << 9, submit_len,
+					    &geom);
+		if (ret) {
+			status = errno_to_blk_status(ret);
+			goto out_err;
+		}
+		ASSERT(geom.len <= INT_MAX);
+
 		clone_len = min_t(int, submit_len, geom.len);
 
 		/*
 		 * This will never fail as it's passing GPF_NOFS and
 		 * the allocation is backed by btrfs_bioset.
 		 */
-		bio = btrfs_bio_clone_partial(orig_bio, clone_offset,
-					      clone_len);
+		bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len);
 		bio->bi_private = dip;
 		bio->bi_end_io = btrfs_end_dio_bio;
 		btrfs_io_bio(bio)->logical = file_offset;
 
 		ASSERT(submit_len >= clone_len);
 		submit_len -= clone_len;
-		if (submit_len == 0)
-			break;
 
 		/*
 		 * Increase the count before we submit the bio so we know
 		 * the end IO handler won't happen before we increase the
 		 * count. Otherwise, the dip might get freed before we're
 		 * done setting it up.
+		 *
+		 * We transfer the initial reference to the last bio, so we
+		 * don't need to increment the reference count for the last one.
 		 */
-		atomic_inc(&dip->pending_bios);
+		if (submit_len > 0) {
+			refcount_inc(&dip->refs);
+			/*
+			 * If we are submitting more than one bio, submit them
+			 * all asynchronously. The exception is RAID 5 or 6, as
+			 * asynchronous checksums make it difficult to collect
+			 * full stripe writes.
+			 */
+			if (!raid56)
+				async_submit = 1;
+		}
 
 		status = btrfs_submit_dio_bio(bio, inode, file_offset,
 						async_submit);
 		if (status) {
 			bio_put(bio);
-			atomic_dec(&dip->pending_bios);
+			if (submit_len > 0)
+				refcount_dec(&dip->refs);
 			goto out_err;
 		}
 
+		dio_data->submitted += clone_len;
 		clone_offset += clone_len;
 		start_sector += clone_len >> 9;
 		file_offset += clone_len;
-
-		ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
-				      start_sector << 9, submit_len, &geom);
-		if (ret)
-			goto out_err;
 	} while (submit_len > 0);
+	return BLK_QC_T_NONE;
 
-submit:
-	status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
-	if (!status)
-		return 0;
-
-	bio_put(bio);
 out_err:
-	dip->errors = 1;
-	/*
-	 * Before atomic variable goto zero, we must  make sure dip->errors is
-	 * perceived to be set. This ordering is ensured by the fact that an
-	 * atomic operations with a return value are fully ordered as per
-	 * atomic_t.txt
-	 */
-	if (atomic_dec_and_test(&dip->pending_bios))
-		bio_io_error(dip->orig_bio);
-
-	/* bio_end_io() will handle error, so we needn't return it */
-	return 0;
-}
-
-static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
-				loff_t file_offset)
-{
-	struct btrfs_dio_private *dip = NULL;
-	struct bio *bio = NULL;
-	struct btrfs_io_bio *io_bio;
-	bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
-	int ret = 0;
-
-	bio = btrfs_bio_clone(dio_bio);
-
-	dip = kzalloc(sizeof(*dip), GFP_NOFS);
-	if (!dip) {
-		ret = -ENOMEM;
-		goto free_ordered;
-	}
-
-	dip->private = dio_bio->bi_private;
-	dip->inode = inode;
-	dip->logical_offset = file_offset;
-	dip->bytes = dio_bio->bi_iter.bi_size;
-	dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
-	bio->bi_private = dip;
-	dip->orig_bio = bio;
-	dip->dio_bio = dio_bio;
-	atomic_set(&dip->pending_bios, 0);
-	io_bio = btrfs_io_bio(bio);
-	io_bio->logical = file_offset;
-
-	if (write) {
-		bio->bi_end_io = btrfs_endio_direct_write;
-	} else {
-		bio->bi_end_io = btrfs_endio_direct_read;
-		dip->subio_endio = btrfs_subio_endio_read;
-	}
-
-	/*
-	 * Reset the range for unsubmitted ordered extents (to a 0 length range)
-	 * even if we fail to submit a bio, because in such case we do the
-	 * corresponding error handling below and it must not be done a second
-	 * time by btrfs_direct_IO().
-	 */
-	if (write) {
-		struct btrfs_dio_data *dio_data = current->journal_info;
-
-		dio_data->unsubmitted_oe_range_end = dip->logical_offset +
-			dip->bytes;
-		dio_data->unsubmitted_oe_range_start =
-			dio_data->unsubmitted_oe_range_end;
-	}
-
-	ret = btrfs_submit_direct_hook(dip);
-	if (!ret)
-		return;
-
-	btrfs_io_bio_free_csum(io_bio);
-
-free_ordered:
-	/*
-	 * If we arrived here it means either we failed to submit the dip
-	 * or we either failed to clone the dio_bio or failed to allocate the
-	 * dip. If we cloned the dio_bio and allocated the dip, we can just
-	 * call bio_endio against our io_bio so that we get proper resource
-	 * cleanup if we fail to submit the dip, otherwise, we must do the
-	 * same as btrfs_endio_direct_[write|read] because we can't call these
-	 * callbacks - they require an allocated dip and a clone of dio_bio.
-	 */
-	if (bio && dip) {
-		bio_io_error(bio);
-		/*
-		 * The end io callbacks free our dip, do the final put on bio
-		 * and all the cleanup and final put for dio_bio (through
-		 * dio_end_io()).
-		 */
-		dip = NULL;
-		bio = NULL;
-	} else {
-		if (write)
-			__endio_write_update_ordered(inode,
-						file_offset,
-						dio_bio->bi_iter.bi_size,
-						false);
-		else
-			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
-			      file_offset + dio_bio->bi_iter.bi_size - 1);
-
-		dio_bio->bi_status = BLK_STS_IOERR;
-		/*
-		 * Releases and cleans up our dio_bio, no need to bio_put()
-		 * nor bio_endio()/bio_io_error() against dio_bio.
-		 */
-		dio_end_io(dio_bio);
-	}
-	if (bio)
-		bio_put(bio);
-	kfree(dip);
-}
-
-static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
-			       const struct iov_iter *iter, loff_t offset)
-{
-	int seg;
-	int i;
-	unsigned int blocksize_mask = fs_info->sectorsize - 1;
-	ssize_t retval = -EINVAL;
-
-	if (offset & blocksize_mask)
-		goto out;
-
-	if (iov_iter_alignment(iter) & blocksize_mask)
-		goto out;
-
-	/* If this is a write we don't need to check anymore */
-	if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
-		return 0;
-	/*
-	 * Check to make sure we don't have duplicate iov_base's in this
-	 * iovec, if so return EINVAL, otherwise we'll get csum errors
-	 * when reading back.
-	 */
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		for (i = seg + 1; i < iter->nr_segs; i++) {
-			if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
-				goto out;
-		}
-	}
-	retval = 0;
-out:
-	return retval;
+	dip->dio_bio->bi_status = status;
+	btrfs_dio_private_put(dip);
+	return BLK_QC_T_NONE;
 }
 
-static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_mapping->host;
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_dio_data dio_data = { 0 };
-	struct extent_changeset *data_reserved = NULL;
-	loff_t offset = iocb->ki_pos;
-	size_t count = 0;
-	int flags = 0;
-	bool wakeup = true;
-	bool relock = false;
-	ssize_t ret;
-
-	if (check_direct_IO(fs_info, iter, offset))
-		return 0;
-
-	inode_dio_begin(inode);
-
-	/*
-	 * The generic stuff only does filemap_write_and_wait_range, which
-	 * isn't enough if we've written compressed pages to this area, so
-	 * we need to flush the dirty pages again to make absolutely sure
-	 * that any outstanding dirty pages are on disk.
-	 */
-	count = iov_iter_count(iter);
-	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-		     &BTRFS_I(inode)->runtime_flags))
-		filemap_fdatawrite_range(inode->i_mapping, offset,
-					 offset + count - 1);
-
-	if (iov_iter_rw(iter) == WRITE) {
-		/*
-		 * If the write DIO is beyond the EOF, we need update
-		 * the isize, but it is protected by i_mutex. So we can
-		 * not unlock the i_mutex at this case.
-		 */
-		if (offset + count <= inode->i_size) {
-			dio_data.overwrite = 1;
-			inode_unlock(inode);
-			relock = true;
-		} else if (iocb->ki_flags & IOCB_NOWAIT) {
-			ret = -EAGAIN;
-			goto out;
-		}
-		ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
-						   offset, count);
-		if (ret)
-			goto out;
-
-		/*
-		 * We need to know how many extents we reserved so that we can
-		 * do the accounting properly if we go over the number we
-		 * originally calculated.  Abuse current->journal_info for this.
-		 */
-		dio_data.reserve = round_up(count,
-					    fs_info->sectorsize);
-		dio_data.unsubmitted_oe_range_start = (u64)offset;
-		dio_data.unsubmitted_oe_range_end = (u64)offset;
-		current->journal_info = &dio_data;
-		down_read(&BTRFS_I(inode)->dio_sem);
-	} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
-				     &BTRFS_I(inode)->runtime_flags)) {
-		inode_dio_end(inode);
-		flags = DIO_LOCKING | DIO_SKIP_HOLES;
-		wakeup = false;
-	}
-
-	ret = __blockdev_direct_IO(iocb, inode,
-				   fs_info->fs_devices->latest_bdev,
-				   iter, btrfs_get_blocks_direct, NULL,
-				   btrfs_submit_direct, flags);
-	if (iov_iter_rw(iter) == WRITE) {
-		up_read(&BTRFS_I(inode)->dio_sem);
-		current->journal_info = NULL;
-		if (ret < 0 && ret != -EIOCBQUEUED) {
-			if (dio_data.reserve)
-				btrfs_delalloc_release_space(inode, data_reserved,
-					offset, dio_data.reserve, true);
-			/*
-			 * On error we might have left some ordered extents
-			 * without submitting corresponding bios for them, so
-			 * cleanup them up to avoid other tasks getting them
-			 * and waiting for them to complete forever.
-			 */
-			if (dio_data.unsubmitted_oe_range_start <
-			    dio_data.unsubmitted_oe_range_end)
-				__endio_write_update_ordered(inode,
-					dio_data.unsubmitted_oe_range_start,
-					dio_data.unsubmitted_oe_range_end -
-					dio_data.unsubmitted_oe_range_start,
-					false);
-		} else if (ret >= 0 && (size_t)ret < count)
-			btrfs_delalloc_release_space(inode, data_reserved,
-					offset, count - (size_t)ret, true);
-		btrfs_delalloc_release_extents(BTRFS_I(inode), count);
-	}
-out:
-	if (wakeup)
-		inode_dio_end(inode);
-	if (relock)
-		inode_lock(inode);
+const struct iomap_ops btrfs_dio_iomap_ops = {
+	.iomap_begin            = btrfs_dio_iomap_begin,
+	.iomap_end              = btrfs_dio_iomap_end,
+};
 
-	extent_changeset_free(data_reserved);
-	return ret;
-}
+const struct iomap_dio_ops btrfs_dops = {
+	.submit_io		= btrfs_submit_direct,
+};
 
 #define BTRFS_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC)
 
@@ -8293,21 +7878,16 @@ static int btrfs_writepages(struct address_space *mapping,
 	return extent_writepages(mapping, wbc);
 }
 
-static int
-btrfs_readpages(struct file *file, struct address_space *mapping,
-		struct list_head *pages, unsigned nr_pages)
+static void btrfs_readahead(struct readahead_control *rac)
 {
-	return extent_readpages(mapping, pages, nr_pages);
+	extent_readahead(rac);
 }
 
 static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
 {
 	int ret = try_release_extent_mapping(page, gfp_flags);
-	if (ret == 1) {
-		ClearPagePrivate(page);
-		set_page_private(page, 0);
-		put_page(page);
-	}
+	if (ret == 1)
+		detach_page_private(page);
 	return ret;
 }
 
@@ -8329,14 +7909,8 @@ static int btrfs_migratepage(struct address_space *mapping,
 	if (ret != MIGRATEPAGE_SUCCESS)
 		return ret;
 
-	if (page_has_private(page)) {
-		ClearPagePrivate(page);
-		get_page(newpage);
-		set_page_private(newpage, page_private(page));
-		set_page_private(page, 0);
-		put_page(page);
-		SetPagePrivate(newpage);
-	}
+	if (page_has_private(page))
+		attach_page_private(newpage, detach_page_private(page));
 
 	if (PagePrivate2(page)) {
 		ClearPagePrivate2(page);
@@ -8458,11 +8032,7 @@ again:
 	}
 
 	ClearPageChecked(page);
-	if (PagePrivate(page)) {
-		ClearPagePrivate(page);
-		set_page_private(page, 0);
-		put_page(page);
-	}
+	detach_page_private(page);
 }
 
 /*
@@ -10553,8 +10123,8 @@ static const struct address_space_operations btrfs_aops = {
 	.readpage	= btrfs_readpage,
 	.writepage	= btrfs_writepage,
 	.writepages	= btrfs_writepages,
-	.readpages	= btrfs_readpages,
-	.direct_IO	= btrfs_direct_IO,
+	.readahead	= btrfs_readahead,
+	.direct_IO	= noop_direct_IO,
 	.invalidatepage = btrfs_invalidatepage,
 	.releasepage	= btrfs_releasepage,
 #ifdef CONFIG_MIGRATION
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 40b729dce91c..168deb8ef68a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -660,7 +660,7 @@ static noinline int create_subvol(struct inode *dir,
 		goto fail;
 
 	key.offset = (u64)-1;
-	new_root = btrfs_get_fs_root(fs_info, &key, true);
+	new_root = btrfs_get_fs_root(fs_info, objectid, true);
 	if (IS_ERR(new_root)) {
 		ret = PTR_ERR(new_root);
 		btrfs_abort_transaction(trans, ret);
@@ -748,9 +748,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 	struct btrfs_pending_snapshot *pending_snapshot;
 	struct btrfs_trans_handle *trans;
 	int ret;
-	bool snapshot_force_cow = false;
 
-	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
 		return -EINVAL;
 
 	if (atomic_read(&root->nr_swapfiles)) {
@@ -771,27 +770,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 		goto free_pending;
 	}
 
-	/*
-	 * Force new buffered writes to reserve space even when NOCOW is
-	 * possible. This is to avoid later writeback (running dealloc) to
-	 * fallback to COW mode and unexpectedly fail with ENOSPC.
-	 */
-	btrfs_drew_read_lock(&root->snapshot_lock);
-
-	ret = btrfs_start_delalloc_snapshot(root);
-	if (ret)
-		goto dec_and_free;
-
-	/*
-	 * All previous writes have started writeback in NOCOW mode, so now
-	 * we force future writes to fallback to COW mode during snapshot
-	 * creation.
-	 */
-	atomic_inc(&root->snapshot_force_cow);
-	snapshot_force_cow = true;
-
-	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
-
 	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
 			     BTRFS_BLOCK_RSV_TEMP);
 	/*
@@ -806,7 +784,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 					&pending_snapshot->block_rsv, 8,
 					false);
 	if (ret)
-		goto dec_and_free;
+		goto free_pending;
 
 	pending_snapshot->dentry = dentry;
 	pending_snapshot->root = root;
@@ -848,11 +826,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 fail:
 	btrfs_put_root(pending_snapshot->snap);
 	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
-dec_and_free:
-	if (snapshot_force_cow)
-		atomic_dec(&root->snapshot_force_cow);
-	btrfs_drew_read_unlock(&root->snapshot_lock);
-
 free_pending:
 	kfree(pending_snapshot->root_item);
 	btrfs_free_path(pending_snapshot->path);
@@ -983,6 +956,45 @@ out_unlock:
 	return error;
 }
 
+static noinline int btrfs_mksnapshot(const struct path *parent,
+				   const char *name, int namelen,
+				   struct btrfs_root *root,
+				   bool readonly,
+				   struct btrfs_qgroup_inherit *inherit)
+{
+	int ret;
+	bool snapshot_force_cow = false;
+
+	/*
+	 * Force new buffered writes to reserve space even when NOCOW is
+	 * possible. This is to avoid later writeback (running dealloc) to
+	 * fallback to COW mode and unexpectedly fail with ENOSPC.
+	 */
+	btrfs_drew_read_lock(&root->snapshot_lock);
+
+	ret = btrfs_start_delalloc_snapshot(root);
+	if (ret)
+		goto out;
+
+	/*
+	 * All previous writes have started writeback in NOCOW mode, so now
+	 * we force future writes to fallback to COW mode during snapshot
+	 * creation.
+	 */
+	atomic_inc(&root->snapshot_force_cow);
+	snapshot_force_cow = true;
+
+	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
+
+	ret = btrfs_mksubvol(parent, name, namelen,
+			     root, readonly, inherit);
+out:
+	if (snapshot_force_cow)
+		atomic_dec(&root->snapshot_force_cow);
+	btrfs_drew_read_unlock(&root->snapshot_lock);
+	return ret;
+}
+
 /*
  * When we're defragging a range, we don't want to kick it off again
  * if it is really just waiting for delalloc to send it down.
@@ -1762,7 +1774,7 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
 			 */
 			ret = -EPERM;
 		} else {
-			ret = btrfs_mksubvol(&file->f_path, name, namelen,
+			ret = btrfs_mksnapshot(&file->f_path, name, namelen,
 					     BTRFS_I(src_inode)->root,
 					     readonly, inherit);
 		}
@@ -2127,10 +2139,7 @@ static noinline int search_ioctl(struct inode *inode,
 		/* search the root of the inode that was passed */
 		root = btrfs_grab_root(BTRFS_I(inode)->root);
 	} else {
-		key.objectid = sk->tree_id;
-		key.type = BTRFS_ROOT_ITEM_KEY;
-		key.offset = (u64)-1;
-		root = btrfs_get_fs_root(info, &key, true);
+		root = btrfs_get_fs_root(info, sk->tree_id, true);
 		if (IS_ERR(root)) {
 			btrfs_free_path(path);
 			return PTR_ERR(root);
@@ -2263,10 +2272,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
 
 	ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];
 
-	key.objectid = tree_id;
-	key.type = BTRFS_ROOT_ITEM_KEY;
-	key.offset = (u64)-1;
-	root = btrfs_get_fs_root(info, &key, true);
+	root = btrfs_get_fs_root(info, tree_id, true);
 	if (IS_ERR(root)) {
 		ret = PTR_ERR(root);
 		root = NULL;
@@ -2359,10 +2365,7 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
 	if (dirid != upper_limit.objectid) {
 		ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
 
-		key.objectid = treeid;
-		key.type = BTRFS_ROOT_ITEM_KEY;
-		key.offset = (u64)-1;
-		root = btrfs_get_fs_root(fs_info, &key, true);
+		root = btrfs_get_fs_root(fs_info, treeid, true);
 		if (IS_ERR(root)) {
 			ret = PTR_ERR(root);
 			goto out;
@@ -2421,7 +2424,7 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
 				goto out_put;
 			}
 
-			temp_inode = btrfs_iget(sb, &key2, root);
+			temp_inode = btrfs_iget(sb, key2.objectid, root);
 			if (IS_ERR(temp_inode)) {
 				ret = PTR_ERR(temp_inode);
 				goto out_put;
@@ -2608,9 +2611,7 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
 
 	/* Get root_item of inode's subvolume */
 	key.objectid = BTRFS_I(inode)->root->root_key.objectid;
-	key.type = BTRFS_ROOT_ITEM_KEY;
-	key.offset = (u64)-1;
-	root = btrfs_get_fs_root(fs_info, &key, true);
+	root = btrfs_get_fs_root(fs_info, key.objectid, true);
 	if (IS_ERR(root)) {
 		ret = PTR_ERR(root);
 		goto out_free;
@@ -3278,7 +3279,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 	struct btrfs_dir_item *di;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_path *path = NULL;
-	struct btrfs_key location;
 	struct btrfs_disk_key disk_key;
 	u64 objectid = 0;
 	u64 dir_id;
@@ -3299,11 +3299,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 	if (!objectid)
 		objectid = BTRFS_FS_TREE_OBJECTID;
 
-	location.objectid = objectid;
-	location.type = BTRFS_ROOT_ITEM_KEY;
-	location.offset = (u64)-1;
-
-	new_root = btrfs_get_fs_root(fs_info, &location, true);
+	new_root = btrfs_get_fs_root(fs_info, objectid, true);
 	if (IS_ERR(new_root)) {
 		ret = PTR_ERR(new_root);
 		goto out;
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index fb647d8cf527..f75612e18a82 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -410,6 +410,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
  * The rwlock is held for write upon exit.
  */
 void btrfs_tree_lock(struct extent_buffer *eb)
+	__acquires(&eb->lock)
 {
 	u64 start_ns = 0;
 
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 72bab64ecf60..6461ebc3a1c1 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -6,6 +6,7 @@
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <asm/div64.h>
+#include <linux/rbtree.h>
 
 #define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
 
@@ -58,4 +59,57 @@ static inline bool has_single_bit_set(u64 n)
 	return is_power_of_two_u64(n);
 }
 
+/*
+ * Simple bytenr based rb_tree relate structures
+ *
+ * Any structure wants to use bytenr as single search index should have their
+ * structure start with these members.
+ */
+struct rb_simple_node {
+	struct rb_node rb_node;
+	u64 bytenr;
+};
+
+static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
+{
+	struct rb_node *node = root->rb_node;
+	struct rb_simple_node *entry;
+
+	while (node) {
+		entry = rb_entry(node, struct rb_simple_node, rb_node);
+
+		if (bytenr < entry->bytenr)
+			node = node->rb_left;
+		else if (bytenr > entry->bytenr)
+			node = node->rb_right;
+		else
+			return node;
+	}
+	return NULL;
+}
+
+static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
+					       struct rb_node *node)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct rb_simple_node *entry;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct rb_simple_node, rb_node);
+
+		if (bytenr < entry->bytenr)
+			p = &(*p)->rb_left;
+		else if (bytenr > entry->bytenr)
+			p = &(*p)->rb_right;
+		else
+			return parent;
+	}
+
+	rb_link_node(node, parent, p);
+	rb_insert_color(node, root);
+	return NULL;
+}
+
 #endif
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index ff1ff90e48b1..2dcb1cb21634 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -408,19 +408,14 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *parent_root)
 {
 	struct super_block *sb = root->fs_info->sb;
-	struct btrfs_key key;
 	struct inode *parent_inode, *child_inode;
 	int ret;
 
-	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.offset = 0;
-
-	parent_inode = btrfs_iget(sb, &key, parent_root);
+	parent_inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, parent_root);
 	if (IS_ERR(parent_inode))
 		return PTR_ERR(parent_inode);
 
-	child_inode = btrfs_iget(sb, &key, root);
+	child_inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, root);
 	if (IS_ERR(child_inode)) {
 		iput(parent_inode);
 		return PTR_ERR(child_inode);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c3888fb367e7..5bd4089ad0e1 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2622,6 +2622,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
 	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *srcgroup;
 	struct btrfs_qgroup *dstgroup;
+	bool need_rescan = false;
 	u32 level_size = 0;
 	u64 nums;
 
@@ -2765,6 +2766,13 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
 				goto unlock;
 		}
 		++i_qgroups;
+
+		/*
+		 * If we're doing a snapshot, and adding the snapshot to a new
+		 * qgroup, the numbers are guaranteed to be incorrect.
+		 */
+		if (srcid)
+			need_rescan = true;
 	}
 
 	for (i = 0; i <  inherit->num_ref_copies; ++i, i_qgroups += 2) {
@@ -2784,6 +2792,9 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
 
 		dst->rfer = src->rfer - level_size;
 		dst->rfer_cmpr = src->rfer_cmpr - level_size;
+
+		/* Manually tweaking numbers certainly needs a rescan */
+		need_rescan = true;
 	}
 	for (i = 0; i <  inherit->num_excl_copies; ++i, i_qgroups += 2) {
 		struct btrfs_qgroup *src;
@@ -2802,6 +2813,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
 
 		dst->excl = src->excl + level_size;
 		dst->excl_cmpr = src->excl_cmpr + level_size;
+		need_rescan = true;
 	}
 
 unlock:
@@ -2809,6 +2821,8 @@ unlock:
 out:
 	if (!committing)
 		mutex_unlock(&fs_info->qgroup_ioctl_lock);
+	if (need_rescan)
+		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 	return ret;
 }
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 03bc7134e8cb..3bbae80c752f 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -24,6 +24,7 @@
 #include "delalloc-space.h"
 #include "block-group.h"
 #include "backref.h"
+#include "misc.h"
 
 /*
  * Relocation overview
@@ -72,100 +73,15 @@
  * The entry point of relocation is relocate_block_group() function.
  */
 
-/*
- * backref_node, mapping_node and tree_block start with this
- */
-struct tree_entry {
-	struct rb_node rb_node;
-	u64 bytenr;
-};
-
-/*
- * present a tree block in the backref cache
- */
-struct backref_node {
-	struct rb_node rb_node;
-	u64 bytenr;
-
-	u64 new_bytenr;
-	/* objectid of tree block owner, can be not uptodate */
-	u64 owner;
-	/* link to pending, changed or detached list */
-	struct list_head list;
-	/* list of upper level blocks reference this block */
-	struct list_head upper;
-	/* list of child blocks in the cache */
-	struct list_head lower;
-	/* NULL if this node is not tree root */
-	struct btrfs_root *root;
-	/* extent buffer got by COW the block */
-	struct extent_buffer *eb;
-	/* level of tree block */
-	unsigned int level:8;
-	/* is the block in non-reference counted tree */
-	unsigned int cowonly:1;
-	/* 1 if no child node in the cache */
-	unsigned int lowest:1;
-	/* is the extent buffer locked */
-	unsigned int locked:1;
-	/* has the block been processed */
-	unsigned int processed:1;
-	/* have backrefs of this block been checked */
-	unsigned int checked:1;
-	/*
-	 * 1 if corresponding block has been cowed but some upper
-	 * level block pointers may not point to the new location
-	 */
-	unsigned int pending:1;
-	/*
-	 * 1 if the backref node isn't connected to any other
-	 * backref node.
-	 */
-	unsigned int detached:1;
-};
-
-/*
- * present a block pointer in the backref cache
- */
-struct backref_edge {
-	struct list_head list[2];
-	struct backref_node *node[2];
-};
-
-#define LOWER	0
-#define UPPER	1
 #define RELOCATION_RESERVED_NODES	256
-
-struct backref_cache {
-	/* red black tree of all backref nodes in the cache */
-	struct rb_root rb_root;
-	/* for passing backref nodes to btrfs_reloc_cow_block */
-	struct backref_node *path[BTRFS_MAX_LEVEL];
-	/*
-	 * list of blocks that have been cowed but some block
-	 * pointers in upper level blocks may not reflect the
-	 * new location
-	 */
-	struct list_head pending[BTRFS_MAX_LEVEL];
-	/* list of backref nodes with no child node */
-	struct list_head leaves;
-	/* list of blocks that have been cowed in current transaction */
-	struct list_head changed;
-	/* list of detached backref node. */
-	struct list_head detached;
-
-	u64 last_trans;
-
-	int nr_nodes;
-	int nr_edges;
-};
-
 /*
  * map address of tree root to tree
  */
 struct mapping_node {
-	struct rb_node rb_node;
-	u64 bytenr;
+	struct {
+		struct rb_node rb_node;
+		u64 bytenr;
+	}; /* Use rb_simle_node for search/insert */
 	void *data;
 };
 
@@ -178,8 +94,10 @@ struct mapping_tree {
  * present a tree block to process
  */
 struct tree_block {
-	struct rb_node rb_node;
-	u64 bytenr;
+	struct {
+		struct rb_node rb_node;
+		u64 bytenr;
+	}; /* Use rb_simple_node for search/insert */
 	struct btrfs_key key;
 	unsigned int level:8;
 	unsigned int key_ready:1;
@@ -204,7 +122,7 @@ struct reloc_control {
 
 	struct btrfs_block_rsv *block_rsv;
 
-	struct backref_cache backref_cache;
+	struct btrfs_backref_cache backref_cache;
 
 	struct file_extent_cluster cluster;
 	/* tree blocks have been processed */
@@ -235,168 +153,41 @@ struct reloc_control {
 #define MOVE_DATA_EXTENTS	0
 #define UPDATE_DATA_PTRS	1
 
-static void remove_backref_node(struct backref_cache *cache,
-				struct backref_node *node);
-static void __mark_block_processed(struct reloc_control *rc,
-				   struct backref_node *node);
-
-static void mapping_tree_init(struct mapping_tree *tree)
-{
-	tree->rb_root = RB_ROOT;
-	spin_lock_init(&tree->lock);
-}
-
-static void backref_cache_init(struct backref_cache *cache)
-{
-	int i;
-	cache->rb_root = RB_ROOT;
-	for (i = 0; i < BTRFS_MAX_LEVEL; i++)
-		INIT_LIST_HEAD(&cache->pending[i]);
-	INIT_LIST_HEAD(&cache->changed);
-	INIT_LIST_HEAD(&cache->detached);
-	INIT_LIST_HEAD(&cache->leaves);
-}
-
-static void backref_cache_cleanup(struct backref_cache *cache)
-{
-	struct backref_node *node;
-	int i;
-
-	while (!list_empty(&cache->detached)) {
-		node = list_entry(cache->detached.next,
-				  struct backref_node, list);
-		remove_backref_node(cache, node);
-	}
-
-	while (!list_empty(&cache->leaves)) {
-		node = list_entry(cache->leaves.next,
-				  struct backref_node, lower);
-		remove_backref_node(cache, node);
-	}
-
-	cache->last_trans = 0;
-
-	for (i = 0; i < BTRFS_MAX_LEVEL; i++)
-		ASSERT(list_empty(&cache->pending[i]));
-	ASSERT(list_empty(&cache->changed));
-	ASSERT(list_empty(&cache->detached));
-	ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
-	ASSERT(!cache->nr_nodes);
-	ASSERT(!cache->nr_edges);
-}
-
-static struct backref_node *alloc_backref_node(struct backref_cache *cache)
-{
-	struct backref_node *node;
-
-	node = kzalloc(sizeof(*node), GFP_NOFS);
-	if (node) {
-		INIT_LIST_HEAD(&node->list);
-		INIT_LIST_HEAD(&node->upper);
-		INIT_LIST_HEAD(&node->lower);
-		RB_CLEAR_NODE(&node->rb_node);
-		cache->nr_nodes++;
-	}
-	return node;
-}
-
-static void free_backref_node(struct backref_cache *cache,
-			      struct backref_node *node)
-{
-	if (node) {
-		cache->nr_nodes--;
-		btrfs_put_root(node->root);
-		kfree(node);
-	}
-}
-
-static struct backref_edge *alloc_backref_edge(struct backref_cache *cache)
-{
-	struct backref_edge *edge;
-
-	edge = kzalloc(sizeof(*edge), GFP_NOFS);
-	if (edge)
-		cache->nr_edges++;
-	return edge;
-}
-
-static void free_backref_edge(struct backref_cache *cache,
-			      struct backref_edge *edge)
+static void mark_block_processed(struct reloc_control *rc,
+				 struct btrfs_backref_node *node)
 {
-	if (edge) {
-		cache->nr_edges--;
-		kfree(edge);
-	}
-}
+	u32 blocksize;
 
-static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
-				   struct rb_node *node)
-{
-	struct rb_node **p = &root->rb_node;
-	struct rb_node *parent = NULL;
-	struct tree_entry *entry;
-
-	while (*p) {
-		parent = *p;
-		entry = rb_entry(parent, struct tree_entry, rb_node);
-
-		if (bytenr < entry->bytenr)
-			p = &(*p)->rb_left;
-		else if (bytenr > entry->bytenr)
-			p = &(*p)->rb_right;
-		else
-			return parent;
+	if (node->level == 0 ||
+	    in_range(node->bytenr, rc->block_group->start,
+		     rc->block_group->length)) {
+		blocksize = rc->extent_root->fs_info->nodesize;
+		set_extent_bits(&rc->processed_blocks, node->bytenr,
+				node->bytenr + blocksize - 1, EXTENT_DIRTY);
 	}
-
-	rb_link_node(node, parent, p);
-	rb_insert_color(node, root);
-	return NULL;
+	node->processed = 1;
 }
 
-static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
-{
-	struct rb_node *n = root->rb_node;
-	struct tree_entry *entry;
-
-	while (n) {
-		entry = rb_entry(n, struct tree_entry, rb_node);
 
-		if (bytenr < entry->bytenr)
-			n = n->rb_left;
-		else if (bytenr > entry->bytenr)
-			n = n->rb_right;
-		else
-			return n;
-	}
-	return NULL;
-}
-
-static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
+static void mapping_tree_init(struct mapping_tree *tree)
 {
-
-	struct btrfs_fs_info *fs_info = NULL;
-	struct backref_node *bnode = rb_entry(rb_node, struct backref_node,
-					      rb_node);
-	if (bnode->root)
-		fs_info = bnode->root->fs_info;
-	btrfs_panic(fs_info, errno,
-		    "Inconsistency in backref cache found at offset %llu",
-		    bytenr);
+	tree->rb_root = RB_ROOT;
+	spin_lock_init(&tree->lock);
 }
 
 /*
  * walk up backref nodes until reach node presents tree root
  */
-static struct backref_node *walk_up_backref(struct backref_node *node,
-					    struct backref_edge *edges[],
-					    int *index)
+static struct btrfs_backref_node *walk_up_backref(
+		struct btrfs_backref_node *node,
+		struct btrfs_backref_edge *edges[], int *index)
 {
-	struct backref_edge *edge;
+	struct btrfs_backref_edge *edge;
 	int idx = *index;
 
 	while (!list_empty(&node->upper)) {
 		edge = list_entry(node->upper.next,
-				  struct backref_edge, list[LOWER]);
+				  struct btrfs_backref_edge, list[LOWER]);
 		edges[idx++] = edge;
 		node = edge->node[UPPER];
 	}
@@ -408,11 +199,11 @@ static struct backref_node *walk_up_backref(struct backref_node *node,
 /*
  * walk down backref nodes to find start of next reference path
  */
-static struct backref_node *walk_down_backref(struct backref_edge *edges[],
-					      int *index)
+static struct btrfs_backref_node *walk_down_backref(
+		struct btrfs_backref_edge *edges[], int *index)
 {
-	struct backref_edge *edge;
-	struct backref_node *lower;
+	struct btrfs_backref_edge *edge;
+	struct btrfs_backref_node *lower;
 	int idx = *index;
 
 	while (idx > 0) {
@@ -423,7 +214,7 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[],
 			continue;
 		}
 		edge = list_entry(edge->list[LOWER].next,
-				  struct backref_edge, list[LOWER]);
+				  struct btrfs_backref_edge, list[LOWER]);
 		edges[idx - 1] = edge;
 		*index = idx;
 		return edge->node[UPPER];
@@ -432,95 +223,24 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[],
 	return NULL;
 }
 
-static void unlock_node_buffer(struct backref_node *node)
-{
-	if (node->locked) {
-		btrfs_tree_unlock(node->eb);
-		node->locked = 0;
-	}
-}
-
-static void drop_node_buffer(struct backref_node *node)
-{
-	if (node->eb) {
-		unlock_node_buffer(node);
-		free_extent_buffer(node->eb);
-		node->eb = NULL;
-	}
-}
-
-static void drop_backref_node(struct backref_cache *tree,
-			      struct backref_node *node)
-{
-	BUG_ON(!list_empty(&node->upper));
-
-	drop_node_buffer(node);
-	list_del(&node->list);
-	list_del(&node->lower);
-	if (!RB_EMPTY_NODE(&node->rb_node))
-		rb_erase(&node->rb_node, &tree->rb_root);
-	free_backref_node(tree, node);
-}
-
-/*
- * remove a backref node from the backref cache
- */
-static void remove_backref_node(struct backref_cache *cache,
-				struct backref_node *node)
-{
-	struct backref_node *upper;
-	struct backref_edge *edge;
-
-	if (!node)
-		return;
-
-	BUG_ON(!node->lowest && !node->detached);
-	while (!list_empty(&node->upper)) {
-		edge = list_entry(node->upper.next, struct backref_edge,
-				  list[LOWER]);
-		upper = edge->node[UPPER];
-		list_del(&edge->list[LOWER]);
-		list_del(&edge->list[UPPER]);
-		free_backref_edge(cache, edge);
-
-		if (RB_EMPTY_NODE(&upper->rb_node)) {
-			BUG_ON(!list_empty(&node->upper));
-			drop_backref_node(cache, node);
-			node = upper;
-			node->lowest = 1;
-			continue;
-		}
-		/*
-		 * add the node to leaf node list if no other
-		 * child block cached.
-		 */
-		if (list_empty(&upper->lower)) {
-			list_add_tail(&upper->lower, &cache->leaves);
-			upper->lowest = 1;
-		}
-	}
-
-	drop_backref_node(cache, node);
-}
-
-static void update_backref_node(struct backref_cache *cache,
-				struct backref_node *node, u64 bytenr)
+static void update_backref_node(struct btrfs_backref_cache *cache,
+				struct btrfs_backref_node *node, u64 bytenr)
 {
 	struct rb_node *rb_node;
 	rb_erase(&node->rb_node, &cache->rb_root);
 	node->bytenr = bytenr;
-	rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
+	rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node);
 	if (rb_node)
-		backref_tree_panic(rb_node, -EEXIST, bytenr);
+		btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST);
 }
 
 /*
  * update backref cache after a transaction commit
  */
 static int update_backref_cache(struct btrfs_trans_handle *trans,
-				struct backref_cache *cache)
+				struct btrfs_backref_cache *cache)
 {
-	struct backref_node *node;
+	struct btrfs_backref_node *node;
 	int level = 0;
 
 	if (cache->last_trans == 0) {
@@ -538,13 +258,13 @@ static int update_backref_cache(struct btrfs_trans_handle *trans,
 	 */
 	while (!list_empty(&cache->detached)) {
 		node = list_entry(cache->detached.next,
-				  struct backref_node, list);
-		remove_backref_node(cache, node);
+				  struct btrfs_backref_node, list);
+		btrfs_backref_cleanup_node(cache, node);
 	}
 
 	while (!list_empty(&cache->changed)) {
 		node = list_entry(cache->changed.next,
-				  struct backref_node, list);
+				  struct btrfs_backref_node, list);
 		list_del_init(&node->list);
 		BUG_ON(node->pending);
 		update_backref_node(cache, node, node->new_bytenr);
@@ -585,7 +305,8 @@ static bool reloc_root_is_dead(struct btrfs_root *root)
  *
  * Reloc tree after swap is considered dead, thus not considered as valid.
  * This is enough for most callers, as they don't distinguish dead reloc root
- * from no reloc root.  But should_ignore_root() below is a special case.
+ * from no reloc root.  But btrfs_should_ignore_reloc_root() below is a
+ * special case.
  */
 static bool have_reloc_root(struct btrfs_root *root)
 {
@@ -596,11 +317,11 @@ static bool have_reloc_root(struct btrfs_root *root)
 	return true;
 }
 
-static int should_ignore_root(struct btrfs_root *root)
+int btrfs_should_ignore_reloc_root(struct btrfs_root *root)
 {
 	struct btrfs_root *reloc_root;
 
-	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
 		return 0;
 
 	/* This root has been merged with its reloc tree, we can ignore it */
@@ -622,18 +343,20 @@ static int should_ignore_root(struct btrfs_root *root)
 	 */
 	return 1;
 }
+
 /*
  * find reloc tree by address of tree root
  */
-static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
-					  u64 bytenr)
+struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
 {
+	struct reloc_control *rc = fs_info->reloc_ctl;
 	struct rb_node *rb_node;
 	struct mapping_node *node;
 	struct btrfs_root *root = NULL;
 
+	ASSERT(rc);
 	spin_lock(&rc->reloc_root_tree.lock);
-	rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
+	rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
 	if (rb_node) {
 		node = rb_entry(rb_node, struct mapping_node, rb_node);
 		root = (struct btrfs_root *)node->data;
@@ -642,594 +365,165 @@ static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
 	return btrfs_grab_root(root);
 }
 
-static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
-					u64 root_objectid)
+/*
+ * For useless nodes, do two major clean ups:
+ *
+ * - Cleanup the children edges and nodes
+ *   If child node is also orphan (no parent) during cleanup, then the child
+ *   node will also be cleaned up.
+ *
+ * - Freeing up leaves (level 0), keeps nodes detached
+ *   For nodes, the node is still cached as "detached"
+ *
+ * Return false if @node is not in the @useless_nodes list.
+ * Return true if @node is in the @useless_nodes list.
+ */
+static bool handle_useless_nodes(struct reloc_control *rc,
+				 struct btrfs_backref_node *node)
 {
-	struct btrfs_key key;
+	struct btrfs_backref_cache *cache = &rc->backref_cache;
+	struct list_head *useless_node = &cache->useless_node;
+	bool ret = false;
 
-	key.objectid = root_objectid;
-	key.type = BTRFS_ROOT_ITEM_KEY;
-	key.offset = (u64)-1;
+	while (!list_empty(useless_node)) {
+		struct btrfs_backref_node *cur;
 
-	return btrfs_get_fs_root(fs_info, &key, false);
-}
+		cur = list_first_entry(useless_node, struct btrfs_backref_node,
+				 list);
+		list_del_init(&cur->list);
 
-static noinline_for_stack
-int find_inline_backref(struct extent_buffer *leaf, int slot,
-			unsigned long *ptr, unsigned long *end)
-{
-	struct btrfs_key key;
-	struct btrfs_extent_item *ei;
-	struct btrfs_tree_block_info *bi;
-	u32 item_size;
+		/* Only tree root nodes can be added to @useless_nodes */
+		ASSERT(list_empty(&cur->upper));
 
-	btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (cur == node)
+			ret = true;
 
-	item_size = btrfs_item_size_nr(leaf, slot);
-	if (item_size < sizeof(*ei)) {
-		btrfs_print_v0_err(leaf->fs_info);
-		btrfs_handle_fs_error(leaf->fs_info, -EINVAL, NULL);
-		return 1;
-	}
-	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
-	WARN_ON(!(btrfs_extent_flags(leaf, ei) &
-		  BTRFS_EXTENT_FLAG_TREE_BLOCK));
+		/* The node is the lowest node */
+		if (cur->lowest) {
+			list_del_init(&cur->lower);
+			cur->lowest = 0;
+		}
 
-	if (key.type == BTRFS_EXTENT_ITEM_KEY &&
-	    item_size <= sizeof(*ei) + sizeof(*bi)) {
-		WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
-		return 1;
-	}
-	if (key.type == BTRFS_METADATA_ITEM_KEY &&
-	    item_size <= sizeof(*ei)) {
-		WARN_ON(item_size < sizeof(*ei));
-		return 1;
-	}
+		/* Cleanup the lower edges */
+		while (!list_empty(&cur->lower)) {
+			struct btrfs_backref_edge *edge;
+			struct btrfs_backref_node *lower;
 
-	if (key.type == BTRFS_EXTENT_ITEM_KEY) {
-		bi = (struct btrfs_tree_block_info *)(ei + 1);
-		*ptr = (unsigned long)(bi + 1);
-	} else {
-		*ptr = (unsigned long)(ei + 1);
+			edge = list_entry(cur->lower.next,
+					struct btrfs_backref_edge, list[UPPER]);
+			list_del(&edge->list[UPPER]);
+			list_del(&edge->list[LOWER]);
+			lower = edge->node[LOWER];
+			btrfs_backref_free_edge(cache, edge);
+
+			/* Child node is also orphan, queue for cleanup */
+			if (list_empty(&lower->upper))
+				list_add(&lower->list, useless_node);
+		}
+		/* Mark this block processed for relocation */
+		mark_block_processed(rc, cur);
+
+		/*
+		 * Backref nodes for tree leaves are deleted from the cache.
+		 * Backref nodes for upper level tree blocks are left in the
+		 * cache to avoid unnecessary backref lookup.
+		 */
+		if (cur->level > 0) {
+			list_add(&cur->list, &cache->detached);
+			cur->detached = 1;
+		} else {
+			rb_erase(&cur->rb_node, &cache->rb_root);
+			btrfs_backref_free_node(cache, cur);
+		}
 	}
-	*end = (unsigned long)ei + item_size;
-	return 0;
+	return ret;
 }
 
 /*
- * build backref tree for a given tree block. root of the backref tree
- * corresponds the tree block, leaves of the backref tree correspond
- * roots of b-trees that reference the tree block.
+ * Build backref tree for a given tree block. Root of the backref tree
+ * corresponds the tree block, leaves of the backref tree correspond roots of
+ * b-trees that reference the tree block.
  *
- * the basic idea of this function is check backrefs of a given block
- * to find upper level blocks that reference the block, and then check
- * backrefs of these upper level blocks recursively. the recursion stop
- * when tree root is reached or backrefs for the block is cached.
+ * The basic idea of this function is check backrefs of a given block to find
+ * upper level blocks that reference the block, and then check backrefs of
+ * these upper level blocks recursively. The recursion stops when tree root is
+ * reached or backrefs for the block is cached.
  *
- * NOTE: if we find backrefs for a block are cached, we know backrefs
- * for all upper level blocks that directly/indirectly reference the
- * block are also cached.
+ * NOTE: if we find that backrefs for a block are cached, we know backrefs for
+ * all upper level blocks that directly/indirectly reference the block are also
+ * cached.
  */
-static noinline_for_stack
-struct backref_node *build_backref_tree(struct reloc_control *rc,
-					struct btrfs_key *node_key,
-					int level, u64 bytenr)
+static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
+			struct reloc_control *rc, struct btrfs_key *node_key,
+			int level, u64 bytenr)
 {
-	struct backref_cache *cache = &rc->backref_cache;
-	struct btrfs_path *path1; /* For searching extent root */
-	struct btrfs_path *path2; /* For searching parent of TREE_BLOCK_REF */
-	struct extent_buffer *eb;
-	struct btrfs_root *root;
-	struct backref_node *cur;
-	struct backref_node *upper;
-	struct backref_node *lower;
-	struct backref_node *node = NULL;
-	struct backref_node *exist = NULL;
-	struct backref_edge *edge;
-	struct rb_node *rb_node;
-	struct btrfs_key key;
-	unsigned long end;
-	unsigned long ptr;
-	LIST_HEAD(list); /* Pending edge list, upper node needs to be checked */
-	LIST_HEAD(useless);
-	int cowonly;
+	struct btrfs_backref_iter *iter;
+	struct btrfs_backref_cache *cache = &rc->backref_cache;
+	/* For searching parent of TREE_BLOCK_REF */
+	struct btrfs_path *path;
+	struct btrfs_backref_node *cur;
+	struct btrfs_backref_node *node = NULL;
+	struct btrfs_backref_edge *edge;
 	int ret;
 	int err = 0;
-	bool need_check = true;
 
-	path1 = btrfs_alloc_path();
-	path2 = btrfs_alloc_path();
-	if (!path1 || !path2) {
+	iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS);
+	if (!iter)
+		return ERR_PTR(-ENOMEM);
+	path = btrfs_alloc_path();
+	if (!path) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	node = alloc_backref_node(cache);
+	node = btrfs_backref_alloc_node(cache, bytenr, level);
 	if (!node) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	node->bytenr = bytenr;
-	node->level = level;
 	node->lowest = 1;
 	cur = node;
-again:
-	end = 0;
-	ptr = 0;
-	key.objectid = cur->bytenr;
-	key.type = BTRFS_METADATA_ITEM_KEY;
-	key.offset = (u64)-1;
-
-	path1->search_commit_root = 1;
-	path1->skip_locking = 1;
-	ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
-				0, 0);
-	if (ret < 0) {
-		err = ret;
-		goto out;
-	}
-	ASSERT(ret);
-	ASSERT(path1->slots[0]);
-
-	path1->slots[0]--;
 
-	WARN_ON(cur->checked);
-	if (!list_empty(&cur->upper)) {
-		/*
-		 * the backref was added previously when processing
-		 * backref of type BTRFS_TREE_BLOCK_REF_KEY
-		 */
-		ASSERT(list_is_singular(&cur->upper));
-		edge = list_entry(cur->upper.next, struct backref_edge,
-				  list[LOWER]);
-		ASSERT(list_empty(&edge->list[UPPER]));
-		exist = edge->node[UPPER];
-		/*
-		 * add the upper level block to pending list if we need
-		 * check its backrefs
-		 */
-		if (!exist->checked)
-			list_add_tail(&edge->list[UPPER], &list);
-	} else {
-		exist = NULL;
-	}
-
-	while (1) {
-		cond_resched();
-		eb = path1->nodes[0];
-
-		if (ptr >= end) {
-			if (path1->slots[0] >= btrfs_header_nritems(eb)) {
-				ret = btrfs_next_leaf(rc->extent_root, path1);
-				if (ret < 0) {
-					err = ret;
-					goto out;
-				}
-				if (ret > 0)
-					break;
-				eb = path1->nodes[0];
-			}
-
-			btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
-			if (key.objectid != cur->bytenr) {
-				WARN_ON(exist);
-				break;
-			}
-
-			if (key.type == BTRFS_EXTENT_ITEM_KEY ||
-			    key.type == BTRFS_METADATA_ITEM_KEY) {
-				ret = find_inline_backref(eb, path1->slots[0],
-							  &ptr, &end);
-				if (ret)
-					goto next;
-			}
-		}
-
-		if (ptr < end) {
-			/* update key for inline back ref */
-			struct btrfs_extent_inline_ref *iref;
-			int type;
-			iref = (struct btrfs_extent_inline_ref *)ptr;
-			type = btrfs_get_extent_inline_ref_type(eb, iref,
-							BTRFS_REF_TYPE_BLOCK);
-			if (type == BTRFS_REF_TYPE_INVALID) {
-				err = -EUCLEAN;
-				goto out;
-			}
-			key.type = type;
-			key.offset = btrfs_extent_inline_ref_offset(eb, iref);
-
-			WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
-				key.type != BTRFS_SHARED_BLOCK_REF_KEY);
-		}
-
-		/*
-		 * Parent node found and matches current inline ref, no need to
-		 * rebuild this node for this inline ref.
-		 */
-		if (exist &&
-		    ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
-		      exist->owner == key.offset) ||
-		     (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
-		      exist->bytenr == key.offset))) {
-			exist = NULL;
-			goto next;
-		}
-
-		/* SHARED_BLOCK_REF means key.offset is the parent bytenr */
-		if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
-			if (key.objectid == key.offset) {
-				/*
-				 * Only root blocks of reloc trees use backref
-				 * pointing to itself.
-				 */
-				root = find_reloc_root(rc, cur->bytenr);
-				ASSERT(root);
-				cur->root = root;
-				break;
-			}
-
-			edge = alloc_backref_edge(cache);
-			if (!edge) {
-				err = -ENOMEM;
-				goto out;
-			}
-			rb_node = tree_search(&cache->rb_root, key.offset);
-			if (!rb_node) {
-				upper = alloc_backref_node(cache);
-				if (!upper) {
-					free_backref_edge(cache, edge);
-					err = -ENOMEM;
-					goto out;
-				}
-				upper->bytenr = key.offset;
-				upper->level = cur->level + 1;
-				/*
-				 *  backrefs for the upper level block isn't
-				 *  cached, add the block to pending list
-				 */
-				list_add_tail(&edge->list[UPPER], &list);
-			} else {
-				upper = rb_entry(rb_node, struct backref_node,
-						 rb_node);
-				ASSERT(upper->checked);
-				INIT_LIST_HEAD(&edge->list[UPPER]);
-			}
-			list_add_tail(&edge->list[LOWER], &cur->upper);
-			edge->node[LOWER] = cur;
-			edge->node[UPPER] = upper;
-
-			goto next;
-		} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
-			err = -EINVAL;
-			btrfs_print_v0_err(rc->extent_root->fs_info);
-			btrfs_handle_fs_error(rc->extent_root->fs_info, err,
-					      NULL);
-			goto out;
-		} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
-			goto next;
-		}
-
-		/*
-		 * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
-		 * means the root objectid. We need to search the tree to get
-		 * its parent bytenr.
-		 */
-		root = read_fs_root(rc->extent_root->fs_info, key.offset);
-		if (IS_ERR(root)) {
-			err = PTR_ERR(root);
-			goto out;
-		}
-
-		if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
-			cur->cowonly = 1;
-
-		if (btrfs_root_level(&root->root_item) == cur->level) {
-			/* tree root */
-			ASSERT(btrfs_root_bytenr(&root->root_item) ==
-			       cur->bytenr);
-			if (should_ignore_root(root)) {
-				btrfs_put_root(root);
-				list_add(&cur->list, &useless);
-			} else {
-				cur->root = root;
-			}
-			break;
-		}
-
-		level = cur->level + 1;
-
-		/* Search the tree to find parent blocks referring the block. */
-		path2->search_commit_root = 1;
-		path2->skip_locking = 1;
-		path2->lowest_level = level;
-		ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
-		path2->lowest_level = 0;
+	/* Breadth-first search to build backref cache */
+	do {
+		ret = btrfs_backref_add_tree_node(cache, path, iter, node_key,
+						  cur);
 		if (ret < 0) {
-			btrfs_put_root(root);
 			err = ret;
 			goto out;
 		}
-		if (ret > 0 && path2->slots[level] > 0)
-			path2->slots[level]--;
-
-		eb = path2->nodes[level];
-		if (btrfs_node_blockptr(eb, path2->slots[level]) !=
-		    cur->bytenr) {
-			btrfs_err(root->fs_info,
-	"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
-				  cur->bytenr, level - 1,
-				  root->root_key.objectid,
-				  node_key->objectid, node_key->type,
-				  node_key->offset);
-			btrfs_put_root(root);
-			err = -ENOENT;
-			goto out;
-		}
-		lower = cur;
-		need_check = true;
-
-		/* Add all nodes and edges in the path */
-		for (; level < BTRFS_MAX_LEVEL; level++) {
-			if (!path2->nodes[level]) {
-				ASSERT(btrfs_root_bytenr(&root->root_item) ==
-				       lower->bytenr);
-				if (should_ignore_root(root)) {
-					btrfs_put_root(root);
-					list_add(&lower->list, &useless);
-				} else {
-					lower->root = root;
-				}
-				break;
-			}
-
-			edge = alloc_backref_edge(cache);
-			if (!edge) {
-				btrfs_put_root(root);
-				err = -ENOMEM;
-				goto out;
-			}
-
-			eb = path2->nodes[level];
-			rb_node = tree_search(&cache->rb_root, eb->start);
-			if (!rb_node) {
-				upper = alloc_backref_node(cache);
-				if (!upper) {
-					btrfs_put_root(root);
-					free_backref_edge(cache, edge);
-					err = -ENOMEM;
-					goto out;
-				}
-				upper->bytenr = eb->start;
-				upper->owner = btrfs_header_owner(eb);
-				upper->level = lower->level + 1;
-				if (!test_bit(BTRFS_ROOT_REF_COWS,
-					      &root->state))
-					upper->cowonly = 1;
-
-				/*
-				 * if we know the block isn't shared
-				 * we can void checking its backrefs.
-				 */
-				if (btrfs_block_can_be_shared(root, eb))
-					upper->checked = 0;
-				else
-					upper->checked = 1;
-
-				/*
-				 * add the block to pending list if we
-				 * need check its backrefs, we only do this once
-				 * while walking up a tree as we will catch
-				 * anything else later on.
-				 */
-				if (!upper->checked && need_check) {
-					need_check = false;
-					list_add_tail(&edge->list[UPPER],
-						      &list);
-				} else {
-					if (upper->checked)
-						need_check = true;
-					INIT_LIST_HEAD(&edge->list[UPPER]);
-				}
-			} else {
-				upper = rb_entry(rb_node, struct backref_node,
-						 rb_node);
-				ASSERT(upper->checked);
-				INIT_LIST_HEAD(&edge->list[UPPER]);
-				if (!upper->owner)
-					upper->owner = btrfs_header_owner(eb);
-			}
-			list_add_tail(&edge->list[LOWER], &lower->upper);
-			edge->node[LOWER] = lower;
-			edge->node[UPPER] = upper;
-
-			if (rb_node) {
-				btrfs_put_root(root);
-				break;
-			}
-			lower = upper;
-			upper = NULL;
-		}
-		btrfs_release_path(path2);
-next:
-		if (ptr < end) {
-			ptr += btrfs_extent_inline_ref_size(key.type);
-			if (ptr >= end) {
-				WARN_ON(ptr > end);
-				ptr = 0;
-				end = 0;
-			}
-		}
-		if (ptr >= end)
-			path1->slots[0]++;
-	}
-	btrfs_release_path(path1);
-
-	cur->checked = 1;
-	WARN_ON(exist);
-
-	/* the pending list isn't empty, take the first block to process */
-	if (!list_empty(&list)) {
-		edge = list_entry(list.next, struct backref_edge, list[UPPER]);
-		list_del_init(&edge->list[UPPER]);
-		cur = edge->node[UPPER];
-		goto again;
-	}
-
-	/*
-	 * everything goes well, connect backref nodes and insert backref nodes
-	 * into the cache.
-	 */
-	ASSERT(node->checked);
-	cowonly = node->cowonly;
-	if (!cowonly) {
-		rb_node = tree_insert(&cache->rb_root, node->bytenr,
-				      &node->rb_node);
-		if (rb_node)
-			backref_tree_panic(rb_node, -EEXIST, node->bytenr);
-		list_add_tail(&node->lower, &cache->leaves);
-	}
-
-	list_for_each_entry(edge, &node->upper, list[LOWER])
-		list_add_tail(&edge->list[UPPER], &list);
-
-	while (!list_empty(&list)) {
-		edge = list_entry(list.next, struct backref_edge, list[UPPER]);
-		list_del_init(&edge->list[UPPER]);
-		upper = edge->node[UPPER];
-		if (upper->detached) {
-			list_del(&edge->list[LOWER]);
-			lower = edge->node[LOWER];
-			free_backref_edge(cache, edge);
-			if (list_empty(&lower->upper))
-				list_add(&lower->list, &useless);
-			continue;
-		}
-
-		if (!RB_EMPTY_NODE(&upper->rb_node)) {
-			if (upper->lowest) {
-				list_del_init(&upper->lower);
-				upper->lowest = 0;
-			}
-
-			list_add_tail(&edge->list[UPPER], &upper->lower);
-			continue;
-		}
-
-		if (!upper->checked) {
-			/*
-			 * Still want to blow up for developers since this is a
-			 * logic bug.
-			 */
-			ASSERT(0);
-			err = -EINVAL;
-			goto out;
-		}
-		if (cowonly != upper->cowonly) {
-			ASSERT(0);
-			err = -EINVAL;
-			goto out;
-		}
-
-		if (!cowonly) {
-			rb_node = tree_insert(&cache->rb_root, upper->bytenr,
-					      &upper->rb_node);
-			if (rb_node)
-				backref_tree_panic(rb_node, -EEXIST,
-						   upper->bytenr);
+		edge = list_first_entry_or_null(&cache->pending_edge,
+				struct btrfs_backref_edge, list[UPPER]);
+		/*
+		 * The pending list isn't empty, take the first block to
+		 * process
+		 */
+		if (edge) {
+			list_del_init(&edge->list[UPPER]);
+			cur = edge->node[UPPER];
 		}
+	} while (edge);
 
-		list_add_tail(&edge->list[UPPER], &upper->lower);
-
-		list_for_each_entry(edge, &upper->upper, list[LOWER])
-			list_add_tail(&edge->list[UPPER], &list);
+	/* Finish the upper linkage of newly added edges/nodes */
+	ret = btrfs_backref_finish_upper_links(cache, node);
+	if (ret < 0) {
+		err = ret;
+		goto out;
 	}
-	/*
-	 * process useless backref nodes. backref nodes for tree leaves
-	 * are deleted from the cache. backref nodes for upper level
-	 * tree blocks are left in the cache to avoid unnecessary backref
-	 * lookup.
-	 */
-	while (!list_empty(&useless)) {
-		upper = list_entry(useless.next, struct backref_node, list);
-		list_del_init(&upper->list);
-		ASSERT(list_empty(&upper->upper));
-		if (upper == node)
-			node = NULL;
-		if (upper->lowest) {
-			list_del_init(&upper->lower);
-			upper->lowest = 0;
-		}
-		while (!list_empty(&upper->lower)) {
-			edge = list_entry(upper->lower.next,
-					  struct backref_edge, list[UPPER]);
-			list_del(&edge->list[UPPER]);
-			list_del(&edge->list[LOWER]);
-			lower = edge->node[LOWER];
-			free_backref_edge(cache, edge);
 
-			if (list_empty(&lower->upper))
-				list_add(&lower->list, &useless);
-		}
-		__mark_block_processed(rc, upper);
-		if (upper->level > 0) {
-			list_add(&upper->list, &cache->detached);
-			upper->detached = 1;
-		} else {
-			rb_erase(&upper->rb_node, &cache->rb_root);
-			free_backref_node(cache, upper);
-		}
-	}
+	if (handle_useless_nodes(rc, node))
+		node = NULL;
 out:
-	btrfs_free_path(path1);
-	btrfs_free_path(path2);
+	btrfs_backref_iter_free(iter);
+	btrfs_free_path(path);
 	if (err) {
-		while (!list_empty(&useless)) {
-			lower = list_entry(useless.next,
-					   struct backref_node, list);
-			list_del_init(&lower->list);
-		}
-		while (!list_empty(&list)) {
-			edge = list_first_entry(&list, struct backref_edge,
-						list[UPPER]);
-			list_del(&edge->list[UPPER]);
-			list_del(&edge->list[LOWER]);
-			lower = edge->node[LOWER];
-			upper = edge->node[UPPER];
-			free_backref_edge(cache, edge);
-
-			/*
-			 * Lower is no longer linked to any upper backref nodes
-			 * and isn't in the cache, we can free it ourselves.
-			 */
-			if (list_empty(&lower->upper) &&
-			    RB_EMPTY_NODE(&lower->rb_node))
-				list_add(&lower->list, &useless);
-
-			if (!RB_EMPTY_NODE(&upper->rb_node))
-				continue;
-
-			/* Add this guy's upper edges to the list to process */
-			list_for_each_entry(edge, &upper->upper, list[LOWER])
-				list_add_tail(&edge->list[UPPER], &list);
-			if (list_empty(&upper->upper))
-				list_add(&upper->list, &useless);
-		}
-
-		while (!list_empty(&useless)) {
-			lower = list_entry(useless.next,
-					   struct backref_node, list);
-			list_del_init(&lower->list);
-			if (lower == node)
-				node = NULL;
-			free_backref_node(cache, lower);
-		}
-
-		remove_backref_node(cache, node);
+		btrfs_backref_error_cleanup(cache, node);
 		return ERR_PTR(err);
 	}
 	ASSERT(!node || !node->detached);
+	ASSERT(list_empty(&cache->useless_node) &&
+	       list_empty(&cache->pending_edge));
 	return node;
 }
 
@@ -1244,19 +538,19 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *dest)
 {
 	struct btrfs_root *reloc_root = src->reloc_root;
-	struct backref_cache *cache = &rc->backref_cache;
-	struct backref_node *node = NULL;
-	struct backref_node *new_node;
-	struct backref_edge *edge;
-	struct backref_edge *new_edge;
+	struct btrfs_backref_cache *cache = &rc->backref_cache;
+	struct btrfs_backref_node *node = NULL;
+	struct btrfs_backref_node *new_node;
+	struct btrfs_backref_edge *edge;
+	struct btrfs_backref_edge *new_edge;
 	struct rb_node *rb_node;
 
 	if (cache->last_trans > 0)
 		update_backref_cache(trans, cache);
 
-	rb_node = tree_search(&cache->rb_root, src->commit_root->start);
+	rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
 	if (rb_node) {
-		node = rb_entry(rb_node, struct backref_node, rb_node);
+		node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
 		if (node->detached)
 			node = NULL;
 		else
@@ -1264,10 +558,10 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
 	}
 
 	if (!node) {
-		rb_node = tree_search(&cache->rb_root,
-				      reloc_root->commit_root->start);
+		rb_node = rb_simple_search(&cache->rb_root,
+					   reloc_root->commit_root->start);
 		if (rb_node) {
-			node = rb_entry(rb_node, struct backref_node,
+			node = rb_entry(rb_node, struct btrfs_backref_node,
 					rb_node);
 			BUG_ON(node->detached);
 		}
@@ -1276,12 +570,11 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
 	if (!node)
 		return 0;
 
-	new_node = alloc_backref_node(cache);
+	new_node = btrfs_backref_alloc_node(cache, dest->node->start,
+					    node->level);
 	if (!new_node)
 		return -ENOMEM;
 
-	new_node->bytenr = dest->node->start;
-	new_node->level = node->level;
 	new_node->lowest = node->lowest;
 	new_node->checked = 1;
 	new_node->root = btrfs_grab_root(dest);
@@ -1289,23 +582,21 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
 
 	if (!node->lowest) {
 		list_for_each_entry(edge, &node->lower, list[UPPER]) {
-			new_edge = alloc_backref_edge(cache);
+			new_edge = btrfs_backref_alloc_edge(cache);
 			if (!new_edge)
 				goto fail;
 
-			new_edge->node[UPPER] = new_node;
-			new_edge->node[LOWER] = edge->node[LOWER];
-			list_add_tail(&new_edge->list[UPPER],
-				      &new_node->lower);
+			btrfs_backref_link_edge(new_edge, edge->node[LOWER],
+						new_node, LINK_UPPER);
 		}
 	} else {
 		list_add_tail(&new_node->lower, &cache->leaves);
 	}
 
-	rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
-			      &new_node->rb_node);
+	rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
+				   &new_node->rb_node);
 	if (rb_node)
-		backref_tree_panic(rb_node, -EEXIST, new_node->bytenr);
+		btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST);
 
 	if (!new_node->lowest) {
 		list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
@@ -1317,11 +608,11 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
 fail:
 	while (!list_empty(&new_node->lower)) {
 		new_edge = list_entry(new_node->lower.next,
-				      struct backref_edge, list[UPPER]);
+				      struct btrfs_backref_edge, list[UPPER]);
 		list_del(&new_edge->list[UPPER]);
-		free_backref_edge(cache, new_edge);
+		btrfs_backref_free_edge(cache, new_edge);
 	}
-	free_backref_node(cache, new_node);
+	btrfs_backref_free_node(cache, new_node);
 	return -ENOMEM;
 }
 
@@ -1343,8 +634,8 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
 	node->data = root;
 
 	spin_lock(&rc->reloc_root_tree.lock);
-	rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
-			      node->bytenr, &node->rb_node);
+	rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
+				   node->bytenr, &node->rb_node);
 	spin_unlock(&rc->reloc_root_tree.lock);
 	if (rb_node) {
 		btrfs_panic(fs_info, -EEXIST,
@@ -1370,8 +661,8 @@ static void __del_reloc_root(struct btrfs_root *root)
 
 	if (rc && root->node) {
 		spin_lock(&rc->reloc_root_tree.lock);
-		rb_node = tree_search(&rc->reloc_root_tree.rb_root,
-				      root->commit_root->start);
+		rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
+					   root->commit_root->start);
 		if (rb_node) {
 			node = rb_entry(rb_node, struct mapping_node, rb_node);
 			rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
@@ -1414,8 +705,8 @@ static int __update_reloc_root(struct btrfs_root *root)
 	struct reloc_control *rc = fs_info->reloc_ctl;
 
 	spin_lock(&rc->reloc_root_tree.lock);
-	rb_node = tree_search(&rc->reloc_root_tree.rb_root,
-			      root->commit_root->start);
+	rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
+				   root->commit_root->start);
 	if (rb_node) {
 		node = rb_entry(rb_node, struct mapping_node, rb_node);
 		rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
@@ -1428,11 +719,11 @@ static int __update_reloc_root(struct btrfs_root *root)
 
 	spin_lock(&rc->reloc_root_tree.lock);
 	node->bytenr = root->node->start;
-	rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
-			      node->bytenr, &node->rb_node);
+	rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
+				   node->bytenr, &node->rb_node);
 	spin_unlock(&rc->reloc_root_tree.lock);
 	if (rb_node)
-		backref_tree_panic(rb_node, -EEXIST, node->bytenr);
+		btrfs_backref_panic(fs_info, node->bytenr, -EEXIST);
 	return 0;
 }
 
@@ -1505,7 +796,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
 
 	reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
 	BUG_ON(IS_ERR(reloc_root));
-	set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state);
+	set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
 	reloc_root->last_trans = trans->transid;
 	return reloc_root;
 }
@@ -1679,14 +970,6 @@ again:
 	return NULL;
 }
 
-static int in_block_group(u64 bytenr, struct btrfs_block_group *block_group)
-{
-	if (bytenr >= block_group->start &&
-	    bytenr < block_group->start + block_group->length)
-		return 1;
-	return 0;
-}
-
 /*
  * get new location of data
  */
@@ -1784,7 +1067,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
 		num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
 		if (bytenr == 0)
 			continue;
-		if (!in_block_group(bytenr, rc->block_group))
+		if (!in_range(bytenr, rc->block_group->start,
+			      rc->block_group->length))
 			continue;
 
 		/*
@@ -1940,7 +1224,7 @@ again:
 		level = btrfs_header_level(parent);
 		BUG_ON(level < lowest_level);
 
-		ret = btrfs_bin_search(parent, &key, level, &slot);
+		ret = btrfs_bin_search(parent, &key, &slot);
 		if (ret < 0)
 			break;
 		if (ret && slot > 0)
@@ -2560,7 +1844,8 @@ again:
 					struct btrfs_root, root_list);
 		list_del_init(&reloc_root->root_list);
 
-		root = read_fs_root(fs_info, reloc_root->root_key.offset);
+		root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
+				false);
 		BUG_ON(IS_ERR(root));
 		BUG_ON(root->reloc_root != reloc_root);
 
@@ -2588,13 +1873,10 @@ again:
 static noinline_for_stack
 void free_reloc_roots(struct list_head *list)
 {
-	struct btrfs_root *reloc_root;
+	struct btrfs_root *reloc_root, *tmp;
 
-	while (!list_empty(list)) {
-		reloc_root = list_entry(list->next, struct btrfs_root,
-					root_list);
+	list_for_each_entry_safe(reloc_root, tmp, list, root_list)
 		__del_reloc_root(reloc_root);
-	}
 }
 
 static noinline_for_stack
@@ -2624,12 +1906,11 @@ again:
 		reloc_root = list_entry(reloc_roots.next,
 					struct btrfs_root, root_list);
 
+		root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
+					 false);
 		if (btrfs_root_refs(&reloc_root->root_item) > 0) {
-			root = read_fs_root(fs_info,
-					    reloc_root->root_key.offset);
 			BUG_ON(IS_ERR(root));
 			BUG_ON(root->reloc_root != reloc_root);
-
 			ret = merge_reloc_root(rc, root);
 			btrfs_put_root(root);
 			if (ret) {
@@ -2639,6 +1920,16 @@ again:
 				goto out;
 			}
 		} else {
+			if (!IS_ERR(root)) {
+				if (root->reloc_root == reloc_root) {
+					root->reloc_root = NULL;
+					btrfs_put_root(reloc_root);
+				}
+				clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE,
+					  &root->state);
+				btrfs_put_root(root);
+			}
+
 			list_del_init(&reloc_root->root_list);
 			/* Don't forget to queue this reloc root for cleanup */
 			list_add_tail(&reloc_root->reloc_dirty_list,
@@ -2653,15 +1944,13 @@ again:
 out:
 	if (ret) {
 		btrfs_handle_fs_error(fs_info, ret, NULL);
-		if (!list_empty(&reloc_roots))
-			free_reloc_roots(&reloc_roots);
+		free_reloc_roots(&reloc_roots);
 
 		/* new reloc root may be added */
 		mutex_lock(&fs_info->reloc_mutex);
 		list_splice_init(&rc->reloc_roots, &reloc_roots);
 		mutex_unlock(&fs_info->reloc_mutex);
-		if (!list_empty(&reloc_roots))
-			free_reloc_roots(&reloc_roots);
+		free_reloc_roots(&reloc_roots);
 	}
 
 	/*
@@ -2702,7 +1991,7 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
 	if (reloc_root->last_trans == trans->transid)
 		return 0;
 
-	root = read_fs_root(fs_info, reloc_root->root_key.offset);
+	root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false);
 	BUG_ON(IS_ERR(root));
 	BUG_ON(root->reloc_root != reloc_root);
 	ret = btrfs_record_root_in_trans(trans, root);
@@ -2714,10 +2003,10 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
 static noinline_for_stack
 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
 				     struct reloc_control *rc,
-				     struct backref_node *node,
-				     struct backref_edge *edges[])
+				     struct btrfs_backref_node *node,
+				     struct btrfs_backref_edge *edges[])
 {
-	struct backref_node *next;
+	struct btrfs_backref_node *next;
 	struct btrfs_root *root;
 	int index = 0;
 
@@ -2727,7 +2016,7 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
 		next = walk_up_backref(next, edges, &index);
 		root = next->root;
 		BUG_ON(!root);
-		BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state));
+		BUG_ON(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state));
 
 		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
 			record_reloc_root_in_trans(trans, root);
@@ -2746,7 +2035,7 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
 			ASSERT(next->root);
 			list_add_tail(&next->list,
 				      &rc->backref_cache.changed);
-			__mark_block_processed(rc, next);
+			mark_block_processed(rc, next);
 			break;
 		}
 
@@ -2771,18 +2060,21 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
 }
 
 /*
- * select a tree root for relocation. return NULL if the block
- * is reference counted. we should use do_relocation() in this
- * case. return a tree root pointer if the block isn't reference
- * counted. return -ENOENT if the block is root of reloc tree.
+ * Select a tree root for relocation.
+ *
+ * Return NULL if the block is not shareable. We should use do_relocation() in
+ * this case.
+ *
+ * Return a tree root pointer if the block is shareable.
+ * Return -ENOENT if the block is root of reloc tree.
  */
 static noinline_for_stack
-struct btrfs_root *select_one_root(struct backref_node *node)
+struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
 {
-	struct backref_node *next;
+	struct btrfs_backref_node *next;
 	struct btrfs_root *root;
 	struct btrfs_root *fs_root = NULL;
-	struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+	struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
 	int index = 0;
 
 	next = node;
@@ -2792,8 +2084,8 @@ struct btrfs_root *select_one_root(struct backref_node *node)
 		root = next->root;
 		BUG_ON(!root);
 
-		/* no other choice for non-references counted tree */
-		if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+		/* No other choice for non-shareable tree */
+		if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
 			return root;
 
 		if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
@@ -2814,12 +2106,12 @@ struct btrfs_root *select_one_root(struct backref_node *node)
 
 static noinline_for_stack
 u64 calcu_metadata_size(struct reloc_control *rc,
-			struct backref_node *node, int reserve)
+			struct btrfs_backref_node *node, int reserve)
 {
 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-	struct backref_node *next = node;
-	struct backref_edge *edge;
-	struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+	struct btrfs_backref_node *next = node;
+	struct btrfs_backref_edge *edge;
+	struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
 	u64 num_bytes = 0;
 	int index = 0;
 
@@ -2837,7 +2129,7 @@ u64 calcu_metadata_size(struct reloc_control *rc,
 				break;
 
 			edge = list_entry(next->upper.next,
-					  struct backref_edge, list[LOWER]);
+					struct btrfs_backref_edge, list[LOWER]);
 			edges[index++] = edge;
 			next = edge->node[UPPER];
 		}
@@ -2848,7 +2140,7 @@ u64 calcu_metadata_size(struct reloc_control *rc,
 
 static int reserve_metadata_space(struct btrfs_trans_handle *trans,
 				  struct reloc_control *rc,
-				  struct backref_node *node)
+				  struct btrfs_backref_node *node)
 {
 	struct btrfs_root *root = rc->extent_root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -2896,14 +2188,14 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
  */
 static int do_relocation(struct btrfs_trans_handle *trans,
 			 struct reloc_control *rc,
-			 struct backref_node *node,
+			 struct btrfs_backref_node *node,
 			 struct btrfs_key *key,
 			 struct btrfs_path *path, int lowest)
 {
 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-	struct backref_node *upper;
-	struct backref_edge *edge;
-	struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+	struct btrfs_backref_node *upper;
+	struct btrfs_backref_edge *edge;
+	struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
 	struct btrfs_root *root;
 	struct extent_buffer *eb;
 	u32 blocksize;
@@ -2929,8 +2221,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 
 		if (upper->eb && !upper->locked) {
 			if (!lowest) {
-				ret = btrfs_bin_search(upper->eb, key,
-						       upper->level, &slot);
+				ret = btrfs_bin_search(upper->eb, key, &slot);
 				if (ret < 0) {
 					err = ret;
 					goto next;
@@ -2940,7 +2231,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 				if (node->eb->start == bytenr)
 					goto next;
 			}
-			drop_node_buffer(upper);
+			btrfs_backref_drop_node_buffer(upper);
 		}
 
 		if (!upper->eb) {
@@ -2968,8 +2259,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 			slot = path->slots[upper->level];
 			btrfs_release_path(path);
 		} else {
-			ret = btrfs_bin_search(upper->eb, key, upper->level,
-					       &slot);
+			ret = btrfs_bin_search(upper->eb, key, &slot);
 			if (ret < 0) {
 				err = ret;
 				goto next;
@@ -3039,15 +2329,15 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 		}
 next:
 		if (!upper->pending)
-			drop_node_buffer(upper);
+			btrfs_backref_drop_node_buffer(upper);
 		else
-			unlock_node_buffer(upper);
+			btrfs_backref_unlock_node_buffer(upper);
 		if (err)
 			break;
 	}
 
 	if (!err && node->pending) {
-		drop_node_buffer(node);
+		btrfs_backref_drop_node_buffer(node);
 		list_move_tail(&node->list, &rc->backref_cache.changed);
 		node->pending = 0;
 	}
@@ -3059,7 +2349,7 @@ next:
 
 static int link_to_upper(struct btrfs_trans_handle *trans,
 			 struct reloc_control *rc,
-			 struct backref_node *node,
+			 struct btrfs_backref_node *node,
 			 struct btrfs_path *path)
 {
 	struct btrfs_key key;
@@ -3073,15 +2363,15 @@ static int finish_pending_nodes(struct btrfs_trans_handle *trans,
 				struct btrfs_path *path, int err)
 {
 	LIST_HEAD(list);
-	struct backref_cache *cache = &rc->backref_cache;
-	struct backref_node *node;
+	struct btrfs_backref_cache *cache = &rc->backref_cache;
+	struct btrfs_backref_node *node;
 	int level;
 	int ret;
 
 	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
 		while (!list_empty(&cache->pending[level])) {
 			node = list_entry(cache->pending[level].next,
-					  struct backref_node, list);
+					  struct btrfs_backref_node, list);
 			list_move_tail(&node->list, &list);
 			BUG_ON(!node->pending);
 
@@ -3096,35 +2386,16 @@ static int finish_pending_nodes(struct btrfs_trans_handle *trans,
 	return err;
 }
 
-static void mark_block_processed(struct reloc_control *rc,
-				 u64 bytenr, u32 blocksize)
-{
-	set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1,
-			EXTENT_DIRTY);
-}
-
-static void __mark_block_processed(struct reloc_control *rc,
-				   struct backref_node *node)
-{
-	u32 blocksize;
-	if (node->level == 0 ||
-	    in_block_group(node->bytenr, rc->block_group)) {
-		blocksize = rc->extent_root->fs_info->nodesize;
-		mark_block_processed(rc, node->bytenr, blocksize);
-	}
-	node->processed = 1;
-}
-
 /*
  * mark a block and all blocks directly/indirectly reference the block
  * as processed.
  */
 static void update_processed_blocks(struct reloc_control *rc,
-				    struct backref_node *node)
+				    struct btrfs_backref_node *node)
 {
-	struct backref_node *next = node;
-	struct backref_edge *edge;
-	struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+	struct btrfs_backref_node *next = node;
+	struct btrfs_backref_edge *edge;
+	struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
 	int index = 0;
 
 	while (next) {
@@ -3133,13 +2404,13 @@ static void update_processed_blocks(struct reloc_control *rc,
 			if (next->processed)
 				break;
 
-			__mark_block_processed(rc, next);
+			mark_block_processed(rc, next);
 
 			if (list_empty(&next->upper))
 				break;
 
 			edge = list_entry(next->upper.next,
-					  struct backref_edge, list[LOWER]);
+					struct btrfs_backref_edge, list[LOWER]);
 			edges[index++] = edge;
 			next = edge->node[UPPER];
 		}
@@ -3184,7 +2455,7 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
  */
 static int relocate_tree_block(struct btrfs_trans_handle *trans,
 				struct reloc_control *rc,
-				struct backref_node *node,
+				struct btrfs_backref_node *node,
 				struct btrfs_key *key,
 				struct btrfs_path *path)
 {
@@ -3210,7 +2481,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
 	}
 
 	if (root) {
-		if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
+		if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
 			BUG_ON(node->new_bytenr);
 			BUG_ON(!list_empty(&node->list));
 			btrfs_record_root_in_trans(trans, root);
@@ -3234,7 +2505,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
 	}
 out:
 	if (ret || node->level == 0 || node->cowonly)
-		remove_backref_node(&rc->backref_cache, node);
+		btrfs_backref_cleanup_node(&rc->backref_cache, node);
 	return ret;
 }
 
@@ -3246,7 +2517,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
 			 struct reloc_control *rc, struct rb_root *blocks)
 {
 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-	struct backref_node *node;
+	struct btrfs_backref_node *node;
 	struct btrfs_path *path;
 	struct tree_block *block;
 	struct tree_block *next;
@@ -3613,9 +2884,10 @@ static int add_tree_block(struct reloc_control *rc,
 	block->level = level;
 	block->key_ready = 0;
 
-	rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
+	rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
 	if (rb_node)
-		backref_tree_panic(rb_node, -EEXIST, block->bytenr);
+		btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr,
+				    -EEXIST);
 
 	return 0;
 }
@@ -3636,7 +2908,7 @@ static int __add_tree_block(struct reloc_control *rc,
 	if (tree_block_processed(bytenr, rc))
 		return 0;
 
-	if (tree_search(blocks, bytenr))
+	if (rb_simple_search(blocks, bytenr))
 		return 0;
 
 	path = btrfs_alloc_path();
@@ -3698,7 +2970,6 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
 				    struct inode *inode,
 				    u64 ino)
 {
-	struct btrfs_key key;
 	struct btrfs_root *root = fs_info->tree_root;
 	struct btrfs_trans_handle *trans;
 	int ret = 0;
@@ -3706,11 +2977,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
 	if (inode)
 		goto truncate;
 
-	key.objectid = ino;
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.offset = 0;
-
-	inode = btrfs_iget(fs_info->sb, &key, root);
+	inode = btrfs_iget(fs_info->sb, ino, root);
 	if (IS_ERR(inode))
 		return -ENOENT;
 
@@ -4122,7 +3389,7 @@ restart:
 	rc->create_reloc_tree = 0;
 	set_reloc_control(rc);
 
-	backref_cache_cleanup(&rc->backref_cache);
+	btrfs_backref_release_cache(&rc->backref_cache);
 	btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
 
 	/*
@@ -4198,14 +3465,10 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
 	struct inode *inode = NULL;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root;
-	struct btrfs_key key;
 	u64 objectid;
 	int err = 0;
 
-	root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
-	if (IS_ERR(root))
-		return ERR_CAST(root);
-
+	root = btrfs_grab_root(fs_info->data_reloc_root);
 	trans = btrfs_start_transaction(root, 6);
 	if (IS_ERR(trans)) {
 		btrfs_put_root(root);
@@ -4219,10 +3482,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
 	err = __insert_orphan_inode(trans, root, objectid);
 	BUG_ON(err);
 
-	key.objectid = objectid;
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.offset = 0;
-	inode = btrfs_iget(fs_info->sb, &key, root);
+	inode = btrfs_iget(fs_info->sb, objectid, root);
 	BUG_ON(IS_ERR(inode));
 	BTRFS_I(inode)->index_cnt = group->start;
 
@@ -4249,7 +3509,7 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
 
 	INIT_LIST_HEAD(&rc->reloc_roots);
 	INIT_LIST_HEAD(&rc->dirty_subvol_roots);
-	backref_cache_init(&rc->backref_cache);
+	btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
 	mapping_tree_init(&rc->reloc_root_tree);
 	extent_io_tree_init(fs_info, &rc->processed_blocks,
 			    IO_TREE_RELOC_BLOCKS, NULL);
@@ -4494,12 +3754,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)
 			goto out;
 		}
 
-		set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state);
+		set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
 		list_add(&reloc_root->root_list, &reloc_roots);
 
 		if (btrfs_root_refs(&reloc_root->root_item) > 0) {
-			fs_root = read_fs_root(fs_info,
-					       reloc_root->root_key.offset);
+			fs_root = btrfs_get_fs_root(fs_info,
+					reloc_root->root_key.offset, false);
 			if (IS_ERR(fs_root)) {
 				ret = PTR_ERR(fs_root);
 				if (ret != -ENOENT) {
@@ -4555,7 +3815,8 @@ int btrfs_recover_relocation(struct btrfs_root *root)
 			continue;
 		}
 
-		fs_root = read_fs_root(fs_info, reloc_root->root_key.offset);
+		fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
+					    false);
 		if (IS_ERR(fs_root)) {
 			err = PTR_ERR(fs_root);
 			list_add_tail(&reloc_root->root_list, &reloc_roots);
@@ -4591,20 +3852,16 @@ out_unset:
 	unset_reloc_control(rc);
 	free_reloc_control(rc);
 out:
-	if (!list_empty(&reloc_roots))
-		free_reloc_roots(&reloc_roots);
+	free_reloc_roots(&reloc_roots);
 
 	btrfs_free_path(path);
 
 	if (err == 0) {
 		/* cleanup orphan inode in data relocation tree */
-		fs_root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
-		if (IS_ERR(fs_root)) {
-			err = PTR_ERR(fs_root);
-		} else {
-			err = btrfs_orphan_cleanup(fs_root);
-			btrfs_put_root(fs_root);
-		}
+		fs_root = btrfs_grab_root(fs_info->data_reloc_root);
+		ASSERT(fs_root);
+		err = btrfs_orphan_cleanup(fs_root);
+		btrfs_put_root(fs_root);
 	}
 	return err;
 }
@@ -4666,7 +3923,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct reloc_control *rc;
-	struct backref_node *node;
+	struct btrfs_backref_node *node;
 	int first_cow = 0;
 	int level;
 	int ret = 0;
@@ -4691,7 +3948,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
 		BUG_ON(node->bytenr != buf->start &&
 		       node->new_bytenr != buf->start);
 
-		drop_node_buffer(node);
+		btrfs_backref_drop_node_buffer(node);
 		atomic_inc(&cow->refs);
 		node->eb = cow;
 		node->new_bytenr = cow->start;
@@ -4703,7 +3960,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
 		}
 
 		if (first_cow)
-			__mark_block_processed(rc, node);
+			mark_block_processed(rc, node);
 
 		if (first_cow && level > 0)
 			rc->nodes_relocated += buf->len;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 668f22844017..c89697486366 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -210,7 +210,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
 	struct extent_buffer *leaf;
 	struct btrfs_path *path;
 	struct btrfs_key key;
-	struct btrfs_key root_key;
 	struct btrfs_root *root;
 	int err = 0;
 	int ret;
@@ -223,10 +222,9 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
 	key.type = BTRFS_ORPHAN_ITEM_KEY;
 	key.offset = 0;
 
-	root_key.type = BTRFS_ROOT_ITEM_KEY;
-	root_key.offset = (u64)-1;
-
 	while (1) {
+		u64 root_objectid;
+
 		ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
 		if (ret < 0) {
 			err = ret;
@@ -250,10 +248,10 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
 		    key.type != BTRFS_ORPHAN_ITEM_KEY)
 			break;
 
-		root_key.objectid = key.offset;
+		root_objectid = key.offset;
 		key.offset++;
 
-		root = btrfs_get_fs_root(fs_info, &root_key, false);
+		root = btrfs_get_fs_root(fs_info, root_objectid, false);
 		err = PTR_ERR_OR_ZERO(root);
 		if (err && err != -ENOENT) {
 			break;
@@ -270,7 +268,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
 				break;
 			}
 			err = btrfs_del_orphan_item(trans, tree_root,
-						    root_key.objectid);
+						    root_objectid);
 			btrfs_end_transaction(trans);
 			if (err) {
 				btrfs_handle_fs_error(fs_info, err,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index adaf8ab694d5..016a025e36c7 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -647,13 +647,9 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
 	struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
 	struct inode_fs_paths *ipath = NULL;
 	struct btrfs_root *local_root;
-	struct btrfs_key root_key;
 	struct btrfs_key key;
 
-	root_key.objectid = root;
-	root_key.type = BTRFS_ROOT_ITEM_KEY;
-	root_key.offset = (u64)-1;
-	local_root = btrfs_get_fs_root(fs_info, &root_key, true);
+	local_root = btrfs_get_fs_root(fs_info, root, true);
 	if (IS_ERR(local_root)) {
 		ret = PTR_ERR(local_root);
 		goto err;
@@ -3046,7 +3042,8 @@ out:
 static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 					   struct map_lookup *map,
 					   struct btrfs_device *scrub_dev,
-					   int num, u64 base, u64 length)
+					   int num, u64 base, u64 length,
+					   struct btrfs_block_group *cache)
 {
 	struct btrfs_path *path, *ppath;
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
@@ -3284,6 +3281,20 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 				break;
 			}
 
+			/*
+			 * If our block group was removed in the meanwhile, just
+			 * stop scrubbing since there is no point in continuing.
+			 * Continuing would prevent reusing its device extents
+			 * for new block groups for a long time.
+			 */
+			spin_lock(&cache->lock);
+			if (cache->removed) {
+				spin_unlock(&cache->lock);
+				ret = 0;
+				goto out;
+			}
+			spin_unlock(&cache->lock);
+
 			extent = btrfs_item_ptr(l, slot,
 						struct btrfs_extent_item);
 			flags = btrfs_extent_flags(l, extent);
@@ -3328,13 +3339,14 @@ again:
 						   &extent_dev,
 						   &extent_mirror_num);
 
-			ret = btrfs_lookup_csums_range(csum_root,
-						       extent_logical,
-						       extent_logical +
-						       extent_len - 1,
-						       &sctx->csum_list, 1);
-			if (ret)
-				goto out;
+			if (flags & BTRFS_EXTENT_FLAG_DATA) {
+				ret = btrfs_lookup_csums_range(csum_root,
+						extent_logical,
+						extent_logical + extent_len - 1,
+						&sctx->csum_list, 1);
+				if (ret)
+					goto out;
+			}
 
 			ret = scrub_extent(sctx, map, extent_logical, extent_len,
 					   extent_physical, extent_dev, flags,
@@ -3457,7 +3469,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
 		if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
 		    map->stripes[i].physical == dev_offset) {
 			ret = scrub_stripe(sctx, map, scrub_dev, i,
-					   chunk_offset, length);
+					   chunk_offset, length, cache);
 			if (ret)
 				goto out;
 		}
@@ -3555,6 +3567,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 			goto skip;
 
 		/*
+		 * Make sure that while we are scrubbing the corresponding block
+		 * group doesn't get its logical address and its device extents
+		 * reused for another block group, which can possibly be of a
+		 * different type and different profile. We do this to prevent
+		 * false error detections and crashes due to bogus attempts to
+		 * repair extents.
+		 */
+		spin_lock(&cache->lock);
+		if (cache->removed) {
+			spin_unlock(&cache->lock);
+			btrfs_put_block_group(cache);
+			goto skip;
+		}
+		btrfs_freeze_block_group(cache);
+		spin_unlock(&cache->lock);
+
+		/*
 		 * we need call btrfs_inc_block_group_ro() with scrubs_paused,
 		 * to avoid deadlock caused by:
 		 * btrfs_inc_block_group_ro()
@@ -3609,6 +3638,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 		} else {
 			btrfs_warn(fs_info,
 				   "failed setting block group ro: %d", ret);
+			btrfs_unfreeze_block_group(cache);
 			btrfs_put_block_group(cache);
 			scrub_pause_off(fs_info);
 			break;
@@ -3695,6 +3725,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 			spin_unlock(&cache->lock);
 		}
 
+		btrfs_unfreeze_block_group(cache);
 		btrfs_put_block_group(cache);
 		if (ret)
 			break;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c5f41bd86765..d9813a5b075a 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -23,6 +23,7 @@
 #include "btrfs_inode.h"
 #include "transaction.h"
 #include "compression.h"
+#include "xattr.h"
 
 /*
  * Maximum number of references an extent can have in order for us to attempt to
@@ -4545,6 +4546,10 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key,
 	struct fs_path *p;
 	struct posix_acl_xattr_header dummy_acl;
 
+	/* Capabilities are emitted by finish_inode_if_needed */
+	if (!strncmp(name, XATTR_NAME_CAPS, name_len))
+		return 0;
+
 	p = fs_path_alloc();
 	if (!p)
 		return -ENOMEM;
@@ -4801,17 +4806,12 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
 	struct inode *inode;
 	struct page *page;
 	char *addr;
-	struct btrfs_key key;
 	pgoff_t index = offset >> PAGE_SHIFT;
 	pgoff_t last_index;
 	unsigned pg_offset = offset_in_page(offset);
 	ssize_t ret = 0;
 
-	key.objectid = sctx->cur_ino;
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.offset = 0;
-
-	inode = btrfs_iget(fs_info->sb, &key, root);
+	inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -5107,6 +5107,64 @@ static int send_extent_data(struct send_ctx *sctx,
 	return 0;
 }
 
+/*
+ * Search for a capability xattr related to sctx->cur_ino. If the capability is
+ * found, call send_set_xattr function to emit it.
+ *
+ * Return 0 if there isn't a capability, or when the capability was emitted
+ * successfully, or < 0 if an error occurred.
+ */
+static int send_capabilities(struct send_ctx *sctx)
+{
+	struct fs_path *fspath = NULL;
+	struct btrfs_path *path;
+	struct btrfs_dir_item *di;
+	struct extent_buffer *leaf;
+	unsigned long data_ptr;
+	char *buf = NULL;
+	int buf_len;
+	int ret = 0;
+
+	path = alloc_path_for_send();
+	if (!path)
+		return -ENOMEM;
+
+	di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
+				XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
+	if (!di) {
+		/* There is no xattr for this inode */
+		goto out;
+	} else if (IS_ERR(di)) {
+		ret = PTR_ERR(di);
+		goto out;
+	}
+
+	leaf = path->nodes[0];
+	buf_len = btrfs_dir_data_len(leaf, di);
+
+	fspath = fs_path_alloc();
+	buf = kmalloc(buf_len, GFP_KERNEL);
+	if (!fspath || !buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
+	if (ret < 0)
+		goto out;
+
+	data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
+	read_extent_buffer(leaf, buf, data_ptr, buf_len);
+
+	ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
+			strlen(XATTR_NAME_CAPS), buf, buf_len);
+out:
+	kfree(buf);
+	fs_path_free(fspath);
+	btrfs_free_path(path);
+	return ret;
+}
+
 static int clone_range(struct send_ctx *sctx,
 		       struct clone_root *clone_root,
 		       const u64 disk_byte,
@@ -5972,6 +6030,10 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 			goto out;
 	}
 
+	ret = send_capabilities(sctx);
+	if (ret < 0)
+		goto out;
+
 	/*
 	 * If other directory inodes depended on our current directory
 	 * inode's move/rename, now do their move/rename operations.
@@ -7021,7 +7083,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
 	struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
 	struct btrfs_fs_info *fs_info = send_root->fs_info;
 	struct btrfs_root *clone_root;
-	struct btrfs_key key;
 	struct send_ctx *sctx = NULL;
 	u32 i;
 	u64 *clone_sources_tmp = NULL;
@@ -7065,13 +7126,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
 		goto out;
 	}
 
-	if (!access_ok(arg->clone_sources,
-			sizeof(*arg->clone_sources) *
-			arg->clone_sources_count)) {
-		ret = -EFAULT;
-		goto out;
-	}
-
 	if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
 		ret = -EINVAL;
 		goto out;
@@ -7150,11 +7204,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
 		}
 
 		for (i = 0; i < arg->clone_sources_count; i++) {
-			key.objectid = clone_sources_tmp[i];
-			key.type = BTRFS_ROOT_ITEM_KEY;
-			key.offset = (u64)-1;
-
-			clone_root = btrfs_get_fs_root(fs_info, &key, true);
+			clone_root = btrfs_get_fs_root(fs_info,
+						clone_sources_tmp[i], true);
 			if (IS_ERR(clone_root)) {
 				ret = PTR_ERR(clone_root);
 				goto out;
@@ -7185,11 +7236,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
 	}
 
 	if (arg->parent_root) {
-		key.objectid = arg->parent_root;
-		key.type = BTRFS_ROOT_ITEM_KEY;
-		key.offset = (u64)-1;
-
-		sctx->parent_root = btrfs_get_fs_root(fs_info, &key, true);
+		sctx->parent_root = btrfs_get_fs_root(fs_info, arg->parent_root,
+						      true);
 		if (IS_ERR(sctx->parent_root)) {
 			ret = PTR_ERR(sctx->parent_root);
 			goto out;
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index ff17a4420358..41ee88633769 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -626,6 +626,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 	struct reserve_ticket *ticket = NULL;
 	struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
 	struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+	struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
 	struct btrfs_trans_handle *trans;
 	u64 bytes_needed;
 	u64 reclaim_bytes = 0;
@@ -688,6 +689,11 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 	spin_lock(&delayed_refs_rsv->lock);
 	reclaim_bytes += delayed_refs_rsv->reserved;
 	spin_unlock(&delayed_refs_rsv->lock);
+
+	spin_lock(&trans_rsv->lock);
+	reclaim_bytes += trans_rsv->reserved;
+	spin_unlock(&trans_rsv->lock);
+
 	if (reclaim_bytes >= bytes_needed)
 		goto commit;
 	bytes_needed -= reclaim_bytes;
@@ -856,6 +862,34 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
 		!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
 }
 
+static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
+				  struct btrfs_space_info *space_info,
+				  struct reserve_ticket *ticket)
+{
+	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+	u64 min_bytes;
+
+	if (global_rsv->space_info != space_info)
+		return false;
+
+	spin_lock(&global_rsv->lock);
+	min_bytes = div_factor(global_rsv->size, 1);
+	if (global_rsv->reserved < min_bytes + ticket->bytes) {
+		spin_unlock(&global_rsv->lock);
+		return false;
+	}
+	global_rsv->reserved -= ticket->bytes;
+	ticket->bytes = 0;
+	list_del_init(&ticket->list);
+	wake_up(&ticket->wait);
+	space_info->tickets_id++;
+	if (global_rsv->reserved < global_rsv->size)
+		global_rsv->full = 0;
+	spin_unlock(&global_rsv->lock);
+
+	return true;
+}
+
 /*
  * maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets
  * @fs_info - fs_info for this fs
@@ -888,6 +922,10 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
 		ticket = list_first_entry(&space_info->tickets,
 					  struct reserve_ticket, list);
 
+		if (ticket->steal &&
+		    steal_from_global_rsv(fs_info, space_info, ticket))
+			return true;
+
 		/*
 		 * may_commit_transaction will avoid committing the transaction
 		 * if it doesn't feel like the space reclaimed by the commit
@@ -1104,6 +1142,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
 
 	switch (flush) {
 	case BTRFS_RESERVE_FLUSH_ALL:
+	case BTRFS_RESERVE_FLUSH_ALL_STEAL:
 		wait_reserve_ticket(fs_info, space_info, ticket);
 		break;
 	case BTRFS_RESERVE_FLUSH_LIMIT:
@@ -1125,11 +1164,17 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
 	ret = ticket->error;
 	if (ticket->bytes || ticket->error) {
 		/*
-		 * Need to delete here for priority tickets. For regular tickets
-		 * either the async reclaim job deletes the ticket from the list
-		 * or we delete it ourselves at wait_reserve_ticket().
+		 * We were a priority ticket, so we need to delete ourselves
+		 * from the list.  Because we could have other priority tickets
+		 * behind us that require less space, run
+		 * btrfs_try_granting_tickets() to see if their reservations can
+		 * now be made.
 		 */
-		remove_ticket(space_info, ticket);
+		if (!list_empty(&ticket->list)) {
+			remove_ticket(space_info, ticket);
+			btrfs_try_granting_tickets(fs_info, space_info);
+		}
+
 		if (!ret)
 			ret = -ENOSPC;
 	}
@@ -1145,6 +1190,16 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 
+/*
+ * This returns true if this flush state will go through the ordinary flushing
+ * code.
+ */
+static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
+{
+	return	(flush == BTRFS_RESERVE_FLUSH_ALL) ||
+		(flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
+}
+
 /**
  * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
  * @root - the root we're allocating for
@@ -1175,8 +1230,17 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
 	spin_lock(&space_info->lock);
 	ret = -ENOSPC;
 	used = btrfs_space_info_used(space_info, true);
-	pending_tickets = !list_empty(&space_info->tickets) ||
-		!list_empty(&space_info->priority_tickets);
+
+	/*
+	 * We don't want NO_FLUSH allocations to jump everybody, they can
+	 * generally handle ENOSPC in a different way, so treat them the same as
+	 * normal flushers when it comes to skipping pending tickets.
+	 */
+	if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH))
+		pending_tickets = !list_empty(&space_info->tickets) ||
+			!list_empty(&space_info->priority_tickets);
+	else
+		pending_tickets = !list_empty(&space_info->priority_tickets);
 
 	/*
 	 * Carry on if we have enough space (short-circuit) OR call
@@ -1198,12 +1262,13 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
 	 * the list and we will do our own flushing further down.
 	 */
 	if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
-		ASSERT(space_info->reclaim_size >= 0);
 		ticket.bytes = orig_bytes;
 		ticket.error = 0;
 		space_info->reclaim_size += ticket.bytes;
 		init_waitqueue_head(&ticket.wait);
-		if (flush == BTRFS_RESERVE_FLUSH_ALL) {
+		ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
+		if (flush == BTRFS_RESERVE_FLUSH_ALL ||
+		    flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
 			list_add_tail(&ticket.list, &space_info->tickets);
 			if (!space_info->flush) {
 				space_info->flush = 1;
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 0a5001ef1481..c3c64019950a 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -78,6 +78,7 @@ struct btrfs_space_info {
 struct reserve_ticket {
 	u64 bytes;
 	int error;
+	bool steal;
 	struct list_head list;
 	wait_queue_head_t wait;
 };
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index 73f7987143df..079b059818e9 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -17,151 +17,152 @@ static inline void put_unaligned_le8(u8 val, void *p)
        *(u8 *)p = val;
 }
 
+static bool check_setget_bounds(const struct extent_buffer *eb,
+				const void *ptr, unsigned off, int size)
+{
+	const unsigned long member_offset = (unsigned long)ptr + off;
+
+	if (member_offset > eb->len) {
+		btrfs_warn(eb->fs_info,
+	"bad eb member start: ptr 0x%lx start %llu member offset %lu size %d",
+			(unsigned long)ptr, eb->start, member_offset, size);
+		return false;
+	}
+	if (member_offset + size > eb->len) {
+		btrfs_warn(eb->fs_info,
+	"bad eb member end: ptr 0x%lx start %llu member offset %lu size %d",
+			(unsigned long)ptr, eb->start, member_offset, size);
+		return false;
+	}
+
+	return true;
+}
+
 /*
- * this is some deeply nasty code.
+ * Macro templates that define helpers to read/write extent buffer data of a
+ * given size, that are also used via ctree.h for access to item members by
+ * specialized helpers.
  *
- * The end result is that anyone who #includes ctree.h gets a
- * declaration for the btrfs_set_foo functions and btrfs_foo functions,
- * which are wrappers of btrfs_set_token_#bits functions and
- * btrfs_get_token_#bits functions, which are defined in this file.
+ * Generic helpers:
+ * - btrfs_set_8 (for 8/16/32/64)
+ * - btrfs_get_8 (for 8/16/32/64)
  *
- * These setget functions do all the extent_buffer related mapping
- * required to efficiently read and write specific fields in the extent
- * buffers.  Every pointer to metadata items in btrfs is really just
- * an unsigned long offset into the extent buffer which has been
- * cast to a specific type.  This gives us all the gcc type checking.
+ * Generic helpers with a token (cached address of the most recently accessed
+ * page):
+ * - btrfs_set_token_8 (for 8/16/32/64)
+ * - btrfs_get_token_8 (for 8/16/32/64)
  *
- * The extent buffer api is used to do the page spanning work required to
- * have a metadata blocksize different from the page size.
+ * The set/get functions handle data spanning two pages transparently, in case
+ * metadata block size is larger than page.  Every pointer to metadata items is
+ * an offset into the extent buffer page array, cast to a specific type.  This
+ * gives us all the type checking.
  *
- * There are 2 variants defined, one with a token pointer and one without.
+ * The extent buffer pages stored in the array pages do not form a contiguous
+ * phyusical range, but the API functions assume the linear offset to the range
+ * from 0 to metadata node size.
  */
 
 #define DEFINE_BTRFS_SETGET_BITS(bits)					\
-u##bits btrfs_get_token_##bits(const struct extent_buffer *eb,		\
-			       const void *ptr, unsigned long off,	\
-			       struct btrfs_map_token *token)		\
+u##bits btrfs_get_token_##bits(struct btrfs_map_token *token,		\
+			       const void *ptr, unsigned long off)	\
 {									\
-	unsigned long part_offset = (unsigned long)ptr;			\
-	unsigned long offset = part_offset + off;			\
-	void *p;							\
-	int err;							\
-	char *kaddr;							\
-	unsigned long map_start;					\
-	unsigned long map_len;						\
-	int size = sizeof(u##bits);					\
-	u##bits res;							\
+	const unsigned long member_offset = (unsigned long)ptr + off;	\
+	const unsigned long idx = member_offset >> PAGE_SHIFT;		\
+	const unsigned long oip = offset_in_page(member_offset);	\
+	const int size = sizeof(u##bits);				\
+	u8 lebytes[sizeof(u##bits)];					\
+	const int part = PAGE_SIZE - oip;				\
 									\
 	ASSERT(token);							\
-	ASSERT(token->eb == eb);					\
-									\
-	if (token->kaddr && token->offset <= offset &&			\
-	   (token->offset + PAGE_SIZE >= offset + size)) {	\
-		kaddr = token->kaddr;					\
-		p = kaddr + part_offset - token->offset;		\
-		res = get_unaligned_le##bits(p + off);			\
-		return res;						\
+	ASSERT(token->kaddr);						\
+	ASSERT(check_setget_bounds(token->eb, ptr, off, size));		\
+	if (token->offset <= member_offset &&				\
+	    member_offset + size <= token->offset + PAGE_SIZE) {	\
+		return get_unaligned_le##bits(token->kaddr + oip);	\
 	}								\
-	err = map_private_extent_buffer(eb, offset, size,		\
-					&kaddr, &map_start, &map_len);	\
-	if (err) {							\
-		__le##bits leres;					\
+	token->kaddr = page_address(token->eb->pages[idx]);		\
+	token->offset = idx << PAGE_SHIFT;				\
+	if (oip + size <= PAGE_SIZE)					\
+		return get_unaligned_le##bits(token->kaddr + oip);	\
 									\
-		read_extent_buffer(eb, &leres, offset, size);		\
-		return le##bits##_to_cpu(leres);			\
-	}								\
-	p = kaddr + part_offset - map_start;				\
-	res = get_unaligned_le##bits(p + off);				\
-	token->kaddr = kaddr;						\
-	token->offset = map_start;					\
-	return res;							\
+	memcpy(lebytes, token->kaddr + oip, part);			\
+	token->kaddr = page_address(token->eb->pages[idx + 1]);		\
+	token->offset = (idx + 1) << PAGE_SHIFT;			\
+	memcpy(lebytes + part, token->kaddr, size - part);		\
+	return get_unaligned_le##bits(lebytes);				\
 }									\
 u##bits btrfs_get_##bits(const struct extent_buffer *eb,		\
 			 const void *ptr, unsigned long off)		\
 {									\
-	unsigned long part_offset = (unsigned long)ptr;			\
-	unsigned long offset = part_offset + off;			\
-	void *p;							\
-	int err;							\
-	char *kaddr;							\
-	unsigned long map_start;					\
-	unsigned long map_len;						\
-	int size = sizeof(u##bits);					\
-	u##bits res;							\
+	const unsigned long member_offset = (unsigned long)ptr + off;	\
+	const unsigned long oip = offset_in_page(member_offset);	\
+	const unsigned long idx = member_offset >> PAGE_SHIFT;		\
+	char *kaddr = page_address(eb->pages[idx]);			\
+	const int size = sizeof(u##bits);				\
+	const int part = PAGE_SIZE - oip;				\
+	u8 lebytes[sizeof(u##bits)];					\
 									\
-	err = map_private_extent_buffer(eb, offset, size,		\
-					&kaddr, &map_start, &map_len);	\
-	if (err) {							\
-		__le##bits leres;					\
+	ASSERT(check_setget_bounds(eb, ptr, off, size));		\
+	if (oip + size <= PAGE_SIZE)					\
+		return get_unaligned_le##bits(kaddr + oip);		\
 									\
-		read_extent_buffer(eb, &leres, offset, size);		\
-		return le##bits##_to_cpu(leres);			\
-	}								\
-	p = kaddr + part_offset - map_start;				\
-	res = get_unaligned_le##bits(p + off);				\
-	return res;							\
+	memcpy(lebytes, kaddr + oip, part);				\
+	kaddr = page_address(eb->pages[idx + 1]);			\
+	memcpy(lebytes + part, kaddr, size - part);			\
+	return get_unaligned_le##bits(lebytes);				\
 }									\
-void btrfs_set_token_##bits(struct extent_buffer *eb,			\
+void btrfs_set_token_##bits(struct btrfs_map_token *token,		\
 			    const void *ptr, unsigned long off,		\
-			    u##bits val,				\
-			    struct btrfs_map_token *token)		\
+			    u##bits val)				\
 {									\
-	unsigned long part_offset = (unsigned long)ptr;			\
-	unsigned long offset = part_offset + off;			\
-	void *p;							\
-	int err;							\
-	char *kaddr;							\
-	unsigned long map_start;					\
-	unsigned long map_len;						\
-	int size = sizeof(u##bits);					\
+	const unsigned long member_offset = (unsigned long)ptr + off;	\
+	const unsigned long idx = member_offset >> PAGE_SHIFT;		\
+	const unsigned long oip = offset_in_page(member_offset);	\
+	const int size = sizeof(u##bits);				\
+	u8 lebytes[sizeof(u##bits)];					\
+	const int part = PAGE_SIZE - oip;				\
 									\
 	ASSERT(token);							\
-	ASSERT(token->eb == eb);					\
-									\
-	if (token->kaddr && token->offset <= offset &&			\
-	   (token->offset + PAGE_SIZE >= offset + size)) {	\
-		kaddr = token->kaddr;					\
-		p = kaddr + part_offset - token->offset;		\
-		put_unaligned_le##bits(val, p + off);			\
+	ASSERT(token->kaddr);						\
+	ASSERT(check_setget_bounds(token->eb, ptr, off, size));		\
+	if (token->offset <= member_offset &&				\
+	    member_offset + size <= token->offset + PAGE_SIZE) {	\
+		put_unaligned_le##bits(val, token->kaddr + oip);	\
 		return;							\
 	}								\
-	err = map_private_extent_buffer(eb, offset, size,		\
-			&kaddr, &map_start, &map_len);			\
-	if (err) {							\
-		__le##bits val2;					\
-									\
-		val2 = cpu_to_le##bits(val);				\
-		write_extent_buffer(eb, &val2, offset, size);		\
+	token->kaddr = page_address(token->eb->pages[idx]);		\
+	token->offset = idx << PAGE_SHIFT;				\
+	if (oip + size <= PAGE_SIZE) {					\
+		put_unaligned_le##bits(val, token->kaddr + oip);	\
 		return;							\
 	}								\
-	p = kaddr + part_offset - map_start;				\
-	put_unaligned_le##bits(val, p + off);				\
-	token->kaddr = kaddr;						\
-	token->offset = map_start;					\
+	put_unaligned_le##bits(val, lebytes);				\
+	memcpy(token->kaddr + oip, lebytes, part);			\
+	token->kaddr = page_address(token->eb->pages[idx + 1]);		\
+	token->offset = (idx + 1) << PAGE_SHIFT;			\
+	memcpy(token->kaddr, lebytes + part, size - part);		\
 }									\
-void btrfs_set_##bits(struct extent_buffer *eb, void *ptr,		\
+void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr,	\
 		      unsigned long off, u##bits val)			\
 {									\
-	unsigned long part_offset = (unsigned long)ptr;			\
-	unsigned long offset = part_offset + off;			\
-	void *p;							\
-	int err;							\
-	char *kaddr;							\
-	unsigned long map_start;					\
-	unsigned long map_len;						\
-	int size = sizeof(u##bits);					\
-									\
-	err = map_private_extent_buffer(eb, offset, size,		\
-			&kaddr, &map_start, &map_len);			\
-	if (err) {							\
-		__le##bits val2;					\
+	const unsigned long member_offset = (unsigned long)ptr + off;	\
+	const unsigned long oip = offset_in_page(member_offset);	\
+	const unsigned long idx = member_offset >> PAGE_SHIFT;		\
+	char *kaddr = page_address(eb->pages[idx]);			\
+	const int size = sizeof(u##bits);				\
+	const int part = PAGE_SIZE - oip;				\
+	u8 lebytes[sizeof(u##bits)];					\
 									\
-		val2 = cpu_to_le##bits(val);				\
-		write_extent_buffer(eb, &val2, offset, size);		\
+	ASSERT(check_setget_bounds(eb, ptr, off, size));		\
+	if (oip + size <= PAGE_SIZE) {					\
+		put_unaligned_le##bits(val, kaddr + oip);		\
 		return;							\
 	}								\
-	p = kaddr + part_offset - map_start;				\
-	put_unaligned_le##bits(val, p + off);				\
+									\
+	put_unaligned_le##bits(val, lebytes);				\
+	memcpy(kaddr + oip, lebytes, part);				\
+	kaddr = page_address(eb->pages[idx + 1]);			\
+	memcpy(kaddr, lebytes + part, size - part);			\
 }
 
 DEFINE_BTRFS_SETGET_BITS(8)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 7932d8d07cff..bc73fd670702 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -72,23 +72,32 @@ const char * __attribute_const__ btrfs_decode_error(int errno)
 	char *errstr = "unknown";
 
 	switch (errno) {
-	case -EIO:
+	case -ENOENT:		/* -2 */
+		errstr = "No such entry";
+		break;
+	case -EIO:		/* -5 */
 		errstr = "IO failure";
 		break;
-	case -ENOMEM:
+	case -ENOMEM:		/* -12*/
 		errstr = "Out of memory";
 		break;
-	case -EROFS:
-		errstr = "Readonly filesystem";
-		break;
-	case -EEXIST:
+	case -EEXIST:		/* -17 */
 		errstr = "Object already exists";
 		break;
-	case -ENOSPC:
+	case -ENOSPC:		/* -28 */
 		errstr = "No space left";
 		break;
-	case -ENOENT:
-		errstr = "No such entry";
+	case -EROFS:		/* -30 */
+		errstr = "Readonly filesystem";
+		break;
+	case -EOPNOTSUPP:	/* -95 */
+		errstr = "Operation not supported";
+		break;
+	case -EUCLEAN:		/* -117 */
+		errstr = "Filesystem corrupted";
+		break;
+	case -EDQUOT:		/* -122 */
+		errstr = "Quota exceeded";
 		break;
 	}
 
@@ -1093,10 +1102,7 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
 		dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
 		btrfs_release_path(path);
 
-		key.objectid = subvol_objectid;
-		key.type = BTRFS_ROOT_ITEM_KEY;
-		key.offset = (u64)-1;
-		fs_root = btrfs_get_fs_root(fs_info, &key, true);
+		fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true);
 		if (IS_ERR(fs_root)) {
 			ret = PTR_ERR(fs_root);
 			fs_root = NULL;
@@ -1211,7 +1217,6 @@ static int btrfs_fill_super(struct super_block *sb,
 {
 	struct inode *inode;
 	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
-	struct btrfs_key key;
 	int err;
 
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -1239,10 +1244,7 @@ static int btrfs_fill_super(struct super_block *sb,
 		return err;
 	}
 
-	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.offset = 0;
-	inode = btrfs_iget(sb, &key, fs_info->fs_root);
+	inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		goto fail_close;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2d5498136e5e..b359d4b17658 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -21,6 +21,7 @@
 #include "dev-replace.h"
 #include "qgroup.h"
 #include "block-group.h"
+#include "space-info.h"
 
 #define BTRFS_ROOT_TRANS_TAG 0
 
@@ -141,7 +142,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
 						 struct btrfs_block_group,
 						 bg_list);
 			list_del_init(&cache->bg_list);
-			btrfs_put_block_group_trimming(cache);
+			btrfs_unfreeze_block_group(cache);
 			btrfs_put_block_group(cache);
 		}
 		WARN_ON(!list_empty(&transaction->dev_update_list));
@@ -348,10 +349,10 @@ loop:
 }
 
 /*
- * this does all the record keeping required to make sure that a reference
- * counted root is properly recorded in a given transaction.  This is required
- * to make sure the old root from before we joined the transaction is deleted
- * when the transaction commits
+ * This does all the record keeping required to make sure that a shareable root
+ * is properly recorded in a given transaction.  This is required to make sure
+ * the old root from before we joined the transaction is deleted when the
+ * transaction commits.
  */
 static int record_root_in_trans(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root,
@@ -359,7 +360,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
-	if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+	if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
 	    root->last_trans < trans->transid) || force) {
 		WARN_ON(root == fs_info->extent_root);
 		WARN_ON(!force && root->commit_root != root->node);
@@ -438,7 +439,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
-	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
 		return 0;
 
 	/*
@@ -503,7 +504,7 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
 	if (!fs_info->reloc_ctl ||
-	    !test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+	    !test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
 	    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
 	    root->reloc_root)
 		return false;
@@ -523,6 +524,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
 	u64 num_bytes = 0;
 	u64 qgroup_reserved = 0;
 	bool reloc_reserved = false;
+	bool do_chunk_alloc = false;
 	int ret;
 
 	/* Send isn't supposed to start transactions. */
@@ -563,7 +565,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
 		 * refill that amount for whatever is missing in the reserve.
 		 */
 		num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
-		if (delayed_refs_rsv->full == 0) {
+		if (flush == BTRFS_RESERVE_FLUSH_ALL &&
+		    delayed_refs_rsv->full == 0) {
 			delayed_refs_bytes = num_bytes;
 			num_bytes <<= 1;
 		}
@@ -584,6 +587,9 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
 							  delayed_refs_bytes);
 			num_bytes -= delayed_refs_bytes;
 		}
+
+		if (rsv->space_info->force_alloc)
+			do_chunk_alloc = true;
 	} else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
 		   !delayed_refs_rsv->full) {
 		/*
@@ -666,6 +672,19 @@ got_it:
 		current->journal_info = h;
 
 	/*
+	 * If the space_info is marked ALLOC_FORCE then we'll get upgraded to
+	 * ALLOC_FORCE the first run through, and then we won't allocate for
+	 * anybody else who races in later.  We don't care about the return
+	 * value here.
+	 */
+	if (do_chunk_alloc && num_bytes) {
+		u64 flags = h->block_rsv->space_info->flags;
+
+		btrfs_chunk_alloc(h, btrfs_get_alloc_profile(fs_info, flags),
+				  CHUNK_ALLOC_NO_FORCE);
+	}
+
+	/*
 	 * btrfs_record_root_in_trans() needs to alloc new extents, and may
 	 * call btrfs_join_transaction() while we're also starting a
 	 * transaction.
@@ -699,43 +718,10 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 
 struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
 					struct btrfs_root *root,
-					unsigned int num_items,
-					int min_factor)
+					unsigned int num_items)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
-	struct btrfs_trans_handle *trans;
-	u64 num_bytes;
-	int ret;
-
-	/*
-	 * We have two callers: unlink and block group removal.  The
-	 * former should succeed even if we will temporarily exceed
-	 * quota and the latter operates on the extent root so
-	 * qgroup enforcement is ignored anyway.
-	 */
-	trans = start_transaction(root, num_items, TRANS_START,
-				  BTRFS_RESERVE_FLUSH_ALL, false);
-	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
-		return trans;
-
-	trans = btrfs_start_transaction(root, 0);
-	if (IS_ERR(trans))
-		return trans;
-
-	num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
-	ret = btrfs_cond_migrate_bytes(fs_info, &fs_info->trans_block_rsv,
-				       num_bytes, min_factor);
-	if (ret) {
-		btrfs_end_transaction(trans);
-		return ERR_PTR(ret);
-	}
-
-	trans->block_rsv = &fs_info->trans_block_rsv;
-	trans->bytes_reserved = num_bytes;
-	trace_btrfs_space_reservation(fs_info, "transaction",
-				      trans->transid, num_bytes, 1);
-
-	return trans;
+	return start_transaction(root, num_items, TRANS_START,
+				 BTRFS_RESERVE_FLUSH_ALL_STEAL, false);
 }
 
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
@@ -1644,7 +1630,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	}
 
 	key.offset = (u64)-1;
-	pending->snap = btrfs_get_fs_root(fs_info, &key, true);
+	pending->snap = btrfs_get_fs_root(fs_info, objectid, true);
 	if (IS_ERR(pending->snap)) {
 		ret = PTR_ERR(pending->snap);
 		btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 31ae8d273065..bf102e64bfb2 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -193,8 +193,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 						   unsigned int num_items);
 struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
 					struct btrfs_root *root,
-					unsigned int num_items,
-					int min_factor);
+					unsigned int num_items);
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index a92f8a6dd192..517b44300a05 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -957,10 +957,6 @@ static int check_dev_item(struct extent_buffer *leaf,
 	return 0;
 }
 
-/* Inode item error output has the same format as dir_item_err() */
-#define inode_item_err(eb, slot, fmt, ...)			\
-	dir_item_err(eb, slot, fmt, __VA_ARGS__)
-
 static int check_inode_item(struct extent_buffer *leaf,
 			    struct btrfs_key *key, int slot)
 {
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 5f9e2dd413af..16c3a6d2586d 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -35,7 +35,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 		goto out;
 	}
 
-	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
 		goto out;
 
 	path = btrfs_alloc_path();
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 02ebdd9edc19..920cee312f4e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -505,13 +505,8 @@ insert:
 			 */
 			if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
 			    S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
-			    ino_size != 0) {
-				struct btrfs_map_token token;
-
-				btrfs_init_map_token(&token, dst_eb);
-				btrfs_set_token_inode_size(dst_eb, dst_item,
-							   ino_size, &token);
-			}
+			    ino_size != 0)
+				btrfs_set_inode_size(dst_eb, dst_item, ino_size);
 			goto no_copy;
 		}
 
@@ -555,13 +550,9 @@ no_copy:
 static noinline struct inode *read_one_inode(struct btrfs_root *root,
 					     u64 objectid)
 {
-	struct btrfs_key key;
 	struct inode *inode;
 
-	key.objectid = objectid;
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.offset = 0;
-	inode = btrfs_iget(root->fs_info->sb, &key, root);
+	inode = btrfs_iget(root->fs_info->sb, objectid, root);
 	if (IS_ERR(inode))
 		inode = NULL;
 	return inode;
@@ -3299,6 +3290,7 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
 
 	clear_extent_bits(&log->dirty_log_pages, 0, (u64)-1,
 			  EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
+	extent_io_tree_release(&log->log_csum_range);
 	btrfs_put_root(log);
 }
 
@@ -3816,8 +3808,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
 
 		found_key.offset = 0;
 		found_key.type = 0;
-		ret = btrfs_bin_search(path->nodes[0], &found_key, 0,
-				       &start_slot);
+		ret = btrfs_bin_search(path->nodes[0], &found_key, &start_slot);
 		if (ret < 0)
 			break;
 
@@ -3853,44 +3844,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 		 * just to say 'this inode exists' and a logging
 		 * to say 'update this inode with these values'
 		 */
-		btrfs_set_token_inode_generation(leaf, item, 0, &token);
-		btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
+		btrfs_set_token_inode_generation(&token, item, 0);
+		btrfs_set_token_inode_size(&token, item, logged_isize);
 	} else {
-		btrfs_set_token_inode_generation(leaf, item,
-						 BTRFS_I(inode)->generation,
-						 &token);
-		btrfs_set_token_inode_size(leaf, item, inode->i_size, &token);
-	}
-
-	btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
-	btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
-	btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
-	btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
-
-	btrfs_set_token_timespec_sec(leaf, &item->atime,
-				     inode->i_atime.tv_sec, &token);
-	btrfs_set_token_timespec_nsec(leaf, &item->atime,
-				      inode->i_atime.tv_nsec, &token);
-
-	btrfs_set_token_timespec_sec(leaf, &item->mtime,
-				     inode->i_mtime.tv_sec, &token);
-	btrfs_set_token_timespec_nsec(leaf, &item->mtime,
-				      inode->i_mtime.tv_nsec, &token);
-
-	btrfs_set_token_timespec_sec(leaf, &item->ctime,
-				     inode->i_ctime.tv_sec, &token);
-	btrfs_set_token_timespec_nsec(leaf, &item->ctime,
-				      inode->i_ctime.tv_nsec, &token);
-
-	btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
-				     &token);
-
-	btrfs_set_token_inode_sequence(leaf, item,
-				       inode_peek_iversion(inode), &token);
-	btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
-	btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
-	btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
-	btrfs_set_token_inode_block_group(leaf, item, 0, &token);
+		btrfs_set_token_inode_generation(&token, item,
+						 BTRFS_I(inode)->generation);
+		btrfs_set_token_inode_size(&token, item, inode->i_size);
+	}
+
+	btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
+	btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
+	btrfs_set_token_inode_mode(&token, item, inode->i_mode);
+	btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
+
+	btrfs_set_token_timespec_sec(&token, &item->atime,
+				     inode->i_atime.tv_sec);
+	btrfs_set_token_timespec_nsec(&token, &item->atime,
+				      inode->i_atime.tv_nsec);
+
+	btrfs_set_token_timespec_sec(&token, &item->mtime,
+				     inode->i_mtime.tv_sec);
+	btrfs_set_token_timespec_nsec(&token, &item->mtime,
+				      inode->i_mtime.tv_nsec);
+
+	btrfs_set_token_timespec_sec(&token, &item->ctime,
+				     inode->i_ctime.tv_sec);
+	btrfs_set_token_timespec_nsec(&token, &item->ctime,
+				      inode->i_ctime.tv_nsec);
+
+	btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
+
+	btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
+	btrfs_set_token_inode_transid(&token, item, trans->transid);
+	btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
+	btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
+	btrfs_set_token_inode_block_group(&token, item, 0);
 }
 
 static int log_inode_item(struct btrfs_trans_handle *trans,
@@ -3916,9 +3904,21 @@ static int log_csums(struct btrfs_trans_handle *trans,
 		     struct btrfs_root *log_root,
 		     struct btrfs_ordered_sum *sums)
 {
+	const u64 lock_end = sums->bytenr + sums->len - 1;
+	struct extent_state *cached_state = NULL;
 	int ret;
 
 	/*
+	 * Serialize logging for checksums. This is to avoid racing with the
+	 * same checksum being logged by another task that is logging another
+	 * file which happens to refer to the same extent as well. Such races
+	 * can leave checksum items in the log with overlapping ranges.
+	 */
+	ret = lock_extent_bits(&log_root->log_csum_range, sums->bytenr,
+			       lock_end, &cached_state);
+	if (ret)
+		return ret;
+	/*
 	 * Due to extent cloning, we might have logged a csum item that covers a
 	 * subrange of a cloned extent, and later we can end up logging a csum
 	 * item for a larger subrange of the same extent or the entire range.
@@ -3928,10 +3928,13 @@ static int log_csums(struct btrfs_trans_handle *trans,
 	 * trim and adjust) any existing csum items in the log for this range.
 	 */
 	ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
-	if (ret)
-		return ret;
+	if (!ret)
+		ret = btrfs_csum_file_blocks(trans, log_root, sums);
 
-	return btrfs_csum_file_blocks(trans, log_root, sums);
+	unlock_extent_cached(&log_root->log_csum_range, sums->bytenr, lock_end,
+			     &cached_state);
+
+	return ret;
 }
 
 static noinline int copy_items(struct btrfs_trans_handle *trans,
@@ -4164,43 +4167,35 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 	fi = btrfs_item_ptr(leaf, path->slots[0],
 			    struct btrfs_file_extent_item);
 
-	btrfs_set_token_file_extent_generation(leaf, fi, trans->transid,
-					       &token);
+	btrfs_set_token_file_extent_generation(&token, fi, trans->transid);
 	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
-		btrfs_set_token_file_extent_type(leaf, fi,
-						 BTRFS_FILE_EXTENT_PREALLOC,
-						 &token);
+		btrfs_set_token_file_extent_type(&token, fi,
+						 BTRFS_FILE_EXTENT_PREALLOC);
 	else
-		btrfs_set_token_file_extent_type(leaf, fi,
-						 BTRFS_FILE_EXTENT_REG,
-						 &token);
+		btrfs_set_token_file_extent_type(&token, fi,
+						 BTRFS_FILE_EXTENT_REG);
 
 	block_len = max(em->block_len, em->orig_block_len);
 	if (em->compress_type != BTRFS_COMPRESS_NONE) {
-		btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
-							em->block_start,
-							&token);
-		btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
-							   &token);
+		btrfs_set_token_file_extent_disk_bytenr(&token, fi,
+							em->block_start);
+		btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
 	} else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
-		btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
+		btrfs_set_token_file_extent_disk_bytenr(&token, fi,
 							em->block_start -
-							extent_offset, &token);
-		btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
-							   &token);
+							extent_offset);
+		btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
 	} else {
-		btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token);
-		btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0,
-							   &token);
-	}
-
-	btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, &token);
-	btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
-	btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
-	btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
-						&token);
-	btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
-	btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
+		btrfs_set_token_file_extent_disk_bytenr(&token, fi, 0);
+		btrfs_set_token_file_extent_disk_num_bytes(&token, fi, 0);
+	}
+
+	btrfs_set_token_file_extent_offset(&token, fi, extent_offset);
+	btrfs_set_token_file_extent_num_bytes(&token, fi, em->len);
+	btrfs_set_token_file_extent_ram_bytes(&token, fi, em->ram_bytes);
+	btrfs_set_token_file_extent_compression(&token, fi, em->compress_type);
+	btrfs_set_token_file_extent_encryption(&token, fi, 0);
+	btrfs_set_token_file_extent_other_encoding(&token, fi, 0);
 	btrfs_mark_buffer_dirty(leaf);
 
 	btrfs_release_path(path);
@@ -4336,12 +4331,9 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
 			}
 		}
 	}
-	if (ins_nr > 0) {
+	if (ins_nr > 0)
 		ret = copy_items(trans, inode, dst_path, path,
 				 start_slot, ins_nr, 1, 0);
-		if (ret > 0)
-			ret = 0;
-	}
 out:
 	btrfs_release_path(path);
 	btrfs_free_path(dst_path);
@@ -4835,10 +4827,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
 
 		btrfs_release_path(path);
 
-		key.objectid = ino;
-		key.type = BTRFS_INODE_ITEM_KEY;
-		key.offset = 0;
-		inode = btrfs_iget(fs_info->sb, &key, root);
+		inode = btrfs_iget(fs_info->sb, ino, root);
 		/*
 		 * If the other inode that had a conflicting dir entry was
 		 * deleted in the current transaction, we need to log its parent
@@ -4847,8 +4836,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
 		if (IS_ERR(inode)) {
 			ret = PTR_ERR(inode);
 			if (ret == -ENOENT) {
-				key.objectid = parent;
-				inode = btrfs_iget(fs_info->sb, &key, root);
+				inode = btrfs_iget(fs_info->sb, parent, root);
 				if (IS_ERR(inode)) {
 					ret = PTR_ERR(inode);
 				} else {
@@ -5587,7 +5575,7 @@ process_leaf:
 				continue;
 
 			btrfs_release_path(path);
-			di_inode = btrfs_iget(fs_info->sb, &di_key, root);
+			di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root);
 			if (IS_ERR(di_inode)) {
 				ret = PTR_ERR(di_inode);
 				goto next_dir_inode;
@@ -5713,7 +5701,8 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
 				cur_offset = item_size;
 			}
 
-			dir_inode = btrfs_iget(fs_info->sb, &inode_key, root);
+			dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid,
+					       root);
 			/*
 			 * If the parent inode was deleted, return an error to
 			 * fallback to a transaction commit. This is to prevent
@@ -5780,14 +5769,17 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
 		int slot = path->slots[0];
 		struct btrfs_key search_key;
 		struct inode *inode;
+		u64 ino;
 		int ret = 0;
 
 		btrfs_release_path(path);
 
+		ino = found_key.offset;
+
 		search_key.objectid = found_key.offset;
 		search_key.type = BTRFS_INODE_ITEM_KEY;
 		search_key.offset = 0;
-		inode = btrfs_iget(fs_info->sb, &search_key, root);
+		inode = btrfs_iget(fs_info->sb, ino, root);
 		if (IS_ERR(inode))
 			return PTR_ERR(inode);
 
@@ -6132,7 +6124,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
 	struct btrfs_trans_handle *trans;
 	struct btrfs_key key;
 	struct btrfs_key found_key;
-	struct btrfs_key tmp_key;
 	struct btrfs_root *log;
 	struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
 	struct walk_control wc = {
@@ -6194,11 +6185,8 @@ again:
 			goto error;
 		}
 
-		tmp_key.objectid = found_key.offset;
-		tmp_key.type = BTRFS_ROOT_ITEM_KEY;
-		tmp_key.offset = (u64)-1;
-
-		wc.replay_dest = btrfs_get_fs_root(fs_info, &tmp_key, true);
+		wc.replay_dest = btrfs_get_fs_root(fs_info, found_key.offset,
+						   true);
 		if (IS_ERR(wc.replay_dest)) {
 			ret = PTR_ERR(wc.replay_dest);
 
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 76671a6bcb61..28525ad7ff8c 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -257,7 +257,6 @@ out:
 static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
 				       u8 *uuid, u8 type, u64 subvolid)
 {
-	struct btrfs_key key;
 	int ret = 0;
 	struct btrfs_root *subvol_root;
 
@@ -265,10 +264,7 @@ static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
 	    type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
 		goto out;
 
-	key.objectid = subvolid;
-	key.type = BTRFS_ROOT_ITEM_KEY;
-	key.offset = (u64)-1;
-	subvol_root = btrfs_get_fs_root(fs_info, &key, true);
+	subvol_root = btrfs_get_fs_root(fs_info, subvolid, true);
 	if (IS_ERR(subvol_root)) {
 		ret = PTR_ERR(subvol_root);
 		if (ret == -ENOENT)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c1909e5f4506..0d6e785bcb98 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -280,10 +280,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
  * ============
  *
  * uuid_mutex
- *   volume_mutex
- *     device_list_mutex
- *       chunk_mutex
- *     balance_mutex
+ *   device_list_mutex
+ *     chunk_mutex
+ *   balance_mutex
  *
  *
  * Exclusive operations, BTRFS_FS_EXCL_OP
@@ -1042,6 +1041,8 @@ again:
 							&device->dev_state)) {
 			if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
 			     &device->dev_state) &&
+			    !test_bit(BTRFS_DEV_STATE_MISSING,
+				      &device->dev_state) &&
 			     (!latest_dev ||
 			      device->generation > latest_dev->generation)) {
 				latest_dev = device;
@@ -1185,7 +1186,6 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
 {
 	struct btrfs_device *device;
 	struct btrfs_device *latest_dev = NULL;
-	int ret = 0;
 
 	flags |= FMODE_EXCL;
 
@@ -1198,16 +1198,15 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
 		    device->generation > latest_dev->generation)
 			latest_dev = device;
 	}
-	if (fs_devices->open_devices == 0) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (fs_devices->open_devices == 0)
+		return -EINVAL;
+
 	fs_devices->opened = 1;
 	fs_devices->latest_bdev = latest_dev->bdev;
 	fs_devices->total_rw_bytes = 0;
 	fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
-out:
-	return ret;
+
+	return 0;
 }
 
 static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -1251,49 +1250,48 @@ void btrfs_release_disk_super(struct btrfs_super_block *super)
 	put_page(page);
 }
 
-static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
-				 struct page **page,
-				 struct btrfs_super_block **disk_super)
+static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
+						       u64 bytenr)
 {
+	struct btrfs_super_block *disk_super;
+	struct page *page;
 	void *p;
 	pgoff_t index;
 
 	/* make sure our super fits in the device */
 	if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
-		return 1;
+		return ERR_PTR(-EINVAL);
 
 	/* make sure our super fits in the page */
-	if (sizeof(**disk_super) > PAGE_SIZE)
-		return 1;
+	if (sizeof(*disk_super) > PAGE_SIZE)
+		return ERR_PTR(-EINVAL);
 
 	/* make sure our super doesn't straddle pages on disk */
 	index = bytenr >> PAGE_SHIFT;
-	if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index)
-		return 1;
+	if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
+		return ERR_PTR(-EINVAL);
 
 	/* pull in the page with our super */
-	*page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
-				   index, GFP_KERNEL);
+	page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL);
 
-	if (IS_ERR(*page))
-		return 1;
+	if (IS_ERR(page))
+		return ERR_CAST(page);
 
-	p = page_address(*page);
+	p = page_address(page);
 
 	/* align our pointer to the offset of the super block */
-	*disk_super = p + offset_in_page(bytenr);
+	disk_super = p + offset_in_page(bytenr);
 
-	if (btrfs_super_bytenr(*disk_super) != bytenr ||
-	    btrfs_super_magic(*disk_super) != BTRFS_MAGIC) {
+	if (btrfs_super_bytenr(disk_super) != bytenr ||
+	    btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
 		btrfs_release_disk_super(p);
-		return 1;
+		return ERR_PTR(-EINVAL);
 	}
 
-	if ((*disk_super)->label[0] &&
-		(*disk_super)->label[BTRFS_LABEL_SIZE - 1])
-		(*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0';
+	if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1])
+		disk_super->label[BTRFS_LABEL_SIZE - 1] = 0;
 
-	return 0;
+	return disk_super;
 }
 
 int btrfs_forget_devices(const char *path)
@@ -1319,7 +1317,6 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
 	bool new_device_added = false;
 	struct btrfs_device *device = NULL;
 	struct block_device *bdev;
-	struct page *page;
 	u64 bytenr;
 
 	lockdep_assert_held(&uuid_mutex);
@@ -1337,8 +1334,9 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
 	if (IS_ERR(bdev))
 		return ERR_CAST(bdev);
 
-	if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
-		device = ERR_PTR(-EINVAL);
+	disk_super = btrfs_read_disk_super(bdev, bytenr);
+	if (IS_ERR(disk_super)) {
+		device = ERR_CAST(disk_super);
 		goto error_bdev_put;
 	}
 
@@ -2663,8 +2661,18 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 		ret = btrfs_commit_transaction(trans);
 	}
 
-	/* Update ctime/mtime for libblkid */
+	/*
+	 * Now that we have written a new super block to this device, check all
+	 * other fs_devices list if device_path alienates any other scanned
+	 * device.
+	 * We can ignore the return value as it typically returns -EINVAL and
+	 * only succeeds if the device was an alien.
+	 */
+	btrfs_forget_devices(device_path);
+
+	/* Update ctime/mtime for blkid or udev */
 	update_dev_time(device_path);
+
 	return ret;
 
 error_sysfs:
diff --git a/fs/buffer.c b/fs/buffer.c
index a60f60396cfa..64fe82ec65ff 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -123,14 +123,6 @@ void __wait_on_buffer(struct buffer_head * bh)
 }
 EXPORT_SYMBOL(__wait_on_buffer);
 
-static void
-__clear_page_buffers(struct page *page)
-{
-	ClearPagePrivate(page);
-	set_page_private(page, 0);
-	put_page(page);
-}
-
 static void buffer_io_error(struct buffer_head *bh, char *msg)
 {
 	if (!test_bit(BH_Quiet, &bh->b_state))
@@ -906,7 +898,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
 		bh = bh->b_this_page;
 	} while (bh);
 	tail->b_this_page = head;
-	attach_page_buffers(page, head);
+	attach_page_private(page, head);
 }
 
 static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
@@ -1154,12 +1146,19 @@ EXPORT_SYMBOL(mark_buffer_dirty);
 
 void mark_buffer_write_io_error(struct buffer_head *bh)
 {
+	struct super_block *sb;
+
 	set_buffer_write_io_error(bh);
 	/* FIXME: do we need to set this in both places? */
 	if (bh->b_page && bh->b_page->mapping)
 		mapping_set_error(bh->b_page->mapping, -EIO);
 	if (bh->b_assoc_map)
 		mapping_set_error(bh->b_assoc_map, -EIO);
+	rcu_read_lock();
+	sb = READ_ONCE(bh->b_bdev->bd_super);
+	if (sb)
+		errseq_set(&sb->s_wb_err, -EIO);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(mark_buffer_write_io_error);
 
@@ -1580,7 +1579,7 @@ void create_empty_buffers(struct page *page,
 			bh = bh->b_this_page;
 		} while (bh != head);
 	}
-	attach_page_buffers(page, head);
+	attach_page_private(page, head);
 	spin_unlock(&page->mapping->private_lock);
 }
 EXPORT_SYMBOL(create_empty_buffers);
@@ -2567,7 +2566,7 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
 			bh->b_this_page = head;
 		bh = bh->b_this_page;
 	} while (bh != head);
-	attach_page_buffers(page, head);
+	attach_page_private(page, head);
 	spin_unlock(&page->mapping->private_lock);
 }
 
@@ -3227,7 +3226,7 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
 		bh = next;
 	} while (bh != head);
 	*buffers_to_free = head;
-	__clear_page_buffers(page);
+	detach_page_private(page);
 	return 1;
 failed:
 	return 0;
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig
index ae559ed5b3b3..ff9ca55a9ae9 100644
--- a/fs/cachefiles/Kconfig
+++ b/fs/cachefiles/Kconfig
@@ -8,7 +8,7 @@ config CACHEFILES
 	  filesystems - primarily networking filesystems - thus allowing fast
 	  local disk to enhance the speed of slower devices.
 
-	  See Documentation/filesystems/caching/cachefiles.txt for more
+	  See Documentation/filesystems/caching/cachefiles.rst for more
 	  information.
 
 config CACHEFILES_DEBUG
@@ -36,5 +36,5 @@ config CACHEFILES_HISTOGRAM
 	  bouncing between CPUs.  On the other hand, the histogram may be
 	  useful for debugging purposes.  Saying 'N' here is recommended.
 
-	  See Documentation/filesystems/caching/cachefiles.txt for more
+	  See Documentation/filesystems/caching/cachefiles.rst for more
 	  information.
diff --git a/fs/char_dev.c b/fs/char_dev.c
index c5e6eff5a381..ba0ded7842a7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -483,6 +483,9 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
 	p->dev = dev;
 	p->count = count;
 
+	if (WARN_ON(dev == WHITEOUT_DEV))
+		return -EBUSY;
+
 	error = kobj_map(cdev_map, dev, count, NULL,
 			 exact_match, exact_lock, p);
 	if (error)
diff --git a/fs/coda/Kconfig b/fs/coda/Kconfig
index ae6759f9594a..c3477eeafb3f 100644
--- a/fs/coda/Kconfig
+++ b/fs/coda/Kconfig
@@ -15,7 +15,7 @@ config CODA_FS
 	  *client*.  You will need user level code as well, both for the
 	  client and server.  Servers are currently user level, i.e. they need
 	  no kernel support.  Please read
-	  <file:Documentation/filesystems/coda.txt> and check out the Coda
+	  <file:Documentation/filesystems/coda.rst> and check out the Coda
 	  home page <http://www.coda.cs.cmu.edu/>.
 
 	  To compile the coda client support as a module, choose M here: the
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index aaad4ca1217e..e61f3fe8e32a 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -17,6 +17,8 @@
 #include <linux/elfcore-compat.h>
 #include <linux/time.h>
 
+#define ELF_COMPAT	1
+
 /*
  * Rename the basic ELF layout types to refer to the 32-bit class of files.
  */
@@ -28,18 +30,20 @@
 #undef	elf_shdr
 #undef	elf_note
 #undef	elf_addr_t
+#undef	ELF_GNU_PROPERTY_ALIGN
 #define elfhdr		elf32_hdr
 #define elf_phdr	elf32_phdr
 #define elf_shdr	elf32_shdr
 #define elf_note	elf32_note
 #define elf_addr_t	Elf32_Addr
+#define ELF_GNU_PROPERTY_ALIGN	ELF32_GNU_PROPERTY_ALIGN
 
 /*
  * Some data types as stored in coredump.
  */
 #define user_long_t		compat_long_t
 #define user_siginfo_t		compat_siginfo_t
-#define copy_siginfo_to_user	copy_siginfo_to_user32
+#define copy_siginfo_to_external	copy_siginfo_to_external32
 
 /*
  * The machine-dependent core note format types are defined in elfcore-compat.h,
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index fd0b5dd68f9e..8bd6a883c94c 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -9,7 +9,7 @@
  *
  * configfs Copyright (C) 2005 Oracle.  All rights reserved.
  *
- * Please see Documentation/filesystems/configfs/configfs.txt for more
+ * Please see Documentation/filesystems/configfs.rst for more
  * information.
  */
 
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 6e0f1fcb8a5b..704a4356f137 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -9,7 +9,7 @@
  *
  * configfs Copyright (C) 2005 Oracle.  All rights reserved.
  *
- * Please see the file Documentation/filesystems/configfs/configfs.txt for
+ * Please see the file Documentation/filesystems/configfs.rst for
  * critical information about using the config_item interface.
  */
 
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index c8bebb70a971..d98cef0dbb6b 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -9,7 +9,7 @@ config CRAMFS
 	  limited to 256MB file systems (with 16MB files), and doesn't support
 	  16/32 bits uid/gid, hard links and timestamps.
 
-	  See <file:Documentation/filesystems/cramfs.txt> and
+	  See <file:Documentation/filesystems/cramfs.rst> and
 	  <file:fs/cramfs/README> for further information.
 
 	  To compile this as a module, choose M here: the module will be called
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 1ecaac7ee3cb..ed015cb66c7c 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -54,6 +54,7 @@ struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags)
 
 /**
  * fscrypt_free_bounce_page() - free a ciphertext bounce page
+ * @bounce_page: the bounce page to free, or NULL
  *
  * Free a bounce page that was allocated by fscrypt_encrypt_pagecache_blocks(),
  * or by fscrypt_alloc_bounce_page() directly.
@@ -76,8 +77,12 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
 	memset(iv, 0, ci->ci_mode->ivsize);
 
 	if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) {
-		WARN_ON_ONCE((u32)lblk_num != lblk_num);
+		WARN_ON_ONCE(lblk_num > U32_MAX);
+		WARN_ON_ONCE(ci->ci_inode->i_ino > U32_MAX);
 		lblk_num |= (u64)ci->ci_inode->i_ino << 32;
+	} else if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) {
+		WARN_ON_ONCE(lblk_num > U32_MAX);
+		lblk_num = (u32)(ci->ci_hashed_ino + lblk_num);
 	} else if (flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) {
 		memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE);
 	}
@@ -132,7 +137,8 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw,
 }
 
 /**
- * fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a pagecache page
+ * fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a
+ *					pagecache page
  * @page:      The locked pagecache page containing the block(s) to encrypt
  * @len:       Total size of the block(s) to encrypt.  Must be a nonzero
  *		multiple of the filesystem's block size.
@@ -222,7 +228,8 @@ int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page,
 EXPORT_SYMBOL(fscrypt_encrypt_block_inplace);
 
 /**
- * fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a pagecache page
+ * fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a
+ *					pagecache page
  * @page:      The locked pagecache page containing the block(s) to decrypt
  * @len:       Total size of the block(s) to decrypt.  Must be a nonzero
  *		multiple of the filesystem's block size.
@@ -346,6 +353,8 @@ void fscrypt_msg(const struct inode *inode, const char *level,
 
 /**
  * fscrypt_init() - Set up for fs encryption.
+ *
+ * Return: 0 on success; -errno on failure
  */
 static int __init fscrypt_init(void)
 {
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 4c212442a8f7..83ca5f1e7934 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -18,7 +18,7 @@
 #include <crypto/skcipher.h>
 #include "fscrypt_private.h"
 
-/**
+/*
  * struct fscrypt_nokey_name - identifier for directory entry when key is absent
  *
  * When userspace lists an encrypted directory without access to the key, the
@@ -83,13 +83,8 @@ static int fscrypt_do_sha256(const u8 *data, unsigned int data_len, u8 *result)
 			tfm = prev_tfm;
 		}
 	}
-	{
-		SHASH_DESC_ON_STACK(desc, tfm);
 
-		desc->tfm = tfm;
-
-		return crypto_shash_digest(desc, data, data_len, result);
-	}
+	return crypto_shash_tfm_digest(tfm, data, data_len, result);
 }
 
 static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
@@ -105,9 +100,12 @@ static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
 
 /**
  * fscrypt_fname_encrypt() - encrypt a filename
- *
- * The output buffer must be at least as large as the input buffer.
- * Any extra space is filled with NUL padding before encryption.
+ * @inode: inode of the parent directory (for regular filenames)
+ *	   or of the symlink (for symlink targets)
+ * @iname: the filename to encrypt
+ * @out: (output) the encrypted filename
+ * @olen: size of the encrypted filename.  It must be at least @iname->len.
+ *	  Any extra space is filled with NUL padding before encryption.
  *
  * Return: 0 on success, -errno on failure
  */
@@ -157,8 +155,11 @@ int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname,
 
 /**
  * fname_decrypt() - decrypt a filename
- *
- * The caller must have allocated sufficient memory for the @oname string.
+ * @inode: inode of the parent directory (for regular filenames)
+ *	   or of the symlink (for symlink targets)
+ * @iname: the encrypted filename to decrypt
+ * @oname: (output) the decrypted filename.  The caller must have allocated
+ *	   enough space for this, e.g. using fscrypt_fname_alloc_buffer().
  *
  * Return: 0 on success, -errno on failure
  */
@@ -206,7 +207,10 @@ static const char lookup_table[65] =
 #define BASE64_CHARS(nbytes)	DIV_ROUND_UP((nbytes) * 4, 3)
 
 /**
- * base64_encode() -
+ * base64_encode() - base64-encode some bytes
+ * @src: the bytes to encode
+ * @len: number of bytes to encode
+ * @dst: (output) the base64-encoded string.  Not NUL-terminated.
  *
  * Encodes the input string using characters from the set [A-Za-z0-9+,].
  * The encoded string is roughly 4/3 times the size of the input string.
@@ -272,7 +276,12 @@ bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len,
 }
 
 /**
- * fscrypt_fname_alloc_buffer - allocate a buffer for presented filenames
+ * fscrypt_fname_alloc_buffer() - allocate a buffer for presented filenames
+ * @inode: inode of the parent directory (for regular filenames)
+ *	   or of the symlink (for symlink targets)
+ * @max_encrypted_len: maximum length of encrypted filenames the buffer will be
+ *		       used to present
+ * @crypto_str: (output) buffer to allocate
  *
  * Allocate a buffer that is large enough to hold any decrypted or encoded
  * filename (null-terminated), for the given maximum encrypted filename length.
@@ -297,9 +306,10 @@ int fscrypt_fname_alloc_buffer(const struct inode *inode,
 EXPORT_SYMBOL(fscrypt_fname_alloc_buffer);
 
 /**
- * fscrypt_fname_free_buffer - free the buffer for presented filenames
+ * fscrypt_fname_free_buffer() - free a buffer for presented filenames
+ * @crypto_str: the buffer to free
  *
- * Free the buffer allocated by fscrypt_fname_alloc_buffer().
+ * Free a buffer that was allocated by fscrypt_fname_alloc_buffer().
  */
 void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
 {
@@ -311,10 +321,19 @@ void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
 EXPORT_SYMBOL(fscrypt_fname_free_buffer);
 
 /**
- * fscrypt_fname_disk_to_usr() - converts a filename from disk space to user
- * space
- *
- * The caller must have allocated sufficient memory for the @oname string.
+ * fscrypt_fname_disk_to_usr() - convert an encrypted filename to
+ *				 user-presentable form
+ * @inode: inode of the parent directory (for regular filenames)
+ *	   or of the symlink (for symlink targets)
+ * @hash: first part of the name's dirhash, if applicable.  This only needs to
+ *	  be provided if the filename is located in an indexed directory whose
+ *	  encryption key may be unavailable.  Not needed for symlink targets.
+ * @minor_hash: second part of the name's dirhash, if applicable
+ * @iname: encrypted filename to convert.  May also be "." or "..", which
+ *	   aren't actually encrypted.
+ * @oname: output buffer for the user-presentable filename.  The caller must
+ *	   have allocated enough space for this, e.g. using
+ *	   fscrypt_fname_alloc_buffer().
  *
  * If the key is available, we'll decrypt the disk name.  Otherwise, we'll
  * encode it for presentation in fscrypt_nokey_name format.
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index dbced2937ec8..eb7fcd2b7fb8 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -43,7 +43,7 @@ struct fscrypt_context_v2 {
 	u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
 };
 
-/**
+/*
  * fscrypt_context - the encryption context of an inode
  *
  * This is the on-disk equivalent of an fscrypt_policy, stored alongside each
@@ -157,7 +157,7 @@ fscrypt_policy_flags(const union fscrypt_policy *policy)
 	BUG();
 }
 
-/**
+/*
  * For encrypted symlinks, the ciphertext length is stored at the beginning
  * of the string in little-endian format.
  */
@@ -222,6 +222,9 @@ struct fscrypt_info {
 
 	/* This inode's nonce, copied from the fscrypt_context */
 	u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+
+	/* Hashed inode number.  Only set for IV_INO_LBLK_32 */
+	u32 ci_hashed_ino;
 };
 
 typedef enum {
@@ -231,15 +234,14 @@ typedef enum {
 
 /* crypto.c */
 extern struct kmem_cache *fscrypt_info_cachep;
-extern int fscrypt_initialize(unsigned int cop_flags);
-extern int fscrypt_crypt_block(const struct inode *inode,
-			       fscrypt_direction_t rw, u64 lblk_num,
-			       struct page *src_page, struct page *dest_page,
-			       unsigned int len, unsigned int offs,
-			       gfp_t gfp_flags);
-extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags);
-
-extern void __printf(3, 4) __cold
+int fscrypt_initialize(unsigned int cop_flags);
+int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw,
+			u64 lblk_num, struct page *src_page,
+			struct page *dest_page, unsigned int len,
+			unsigned int offs, gfp_t gfp_flags);
+struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags);
+
+void __printf(3, 4) __cold
 fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...);
 
 #define fscrypt_warn(inode, fmt, ...)		\
@@ -264,12 +266,10 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
 			 const struct fscrypt_info *ci);
 
 /* fname.c */
-extern int fscrypt_fname_encrypt(const struct inode *inode,
-				 const struct qstr *iname,
-				 u8 *out, unsigned int olen);
-extern bool fscrypt_fname_encrypted_size(const struct inode *inode,
-					 u32 orig_len, u32 max_len,
-					 u32 *encrypted_len_ret);
+int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname,
+			  u8 *out, unsigned int olen);
+bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len,
+				  u32 max_len, u32 *encrypted_len_ret);
 extern const struct dentry_operations fscrypt_d_ops;
 
 /* hkdf.c */
@@ -278,8 +278,8 @@ struct fscrypt_hkdf {
 	struct crypto_shash *hmac_tfm;
 };
 
-extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key,
-			     unsigned int master_key_size);
+int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key,
+		      unsigned int master_key_size);
 
 /*
  * The list of contexts in which fscrypt uses HKDF.  These values are used as
@@ -293,12 +293,14 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key,
 #define HKDF_CONTEXT_DIRECT_KEY		3
 #define HKDF_CONTEXT_IV_INO_LBLK_64_KEY	4
 #define HKDF_CONTEXT_DIRHASH_KEY	5
+#define HKDF_CONTEXT_IV_INO_LBLK_32_KEY	6
+#define HKDF_CONTEXT_INODE_HASH_KEY	7
 
-extern int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context,
-			       const u8 *info, unsigned int infolen,
-			       u8 *okm, unsigned int okmlen);
+int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context,
+			const u8 *info, unsigned int infolen,
+			u8 *okm, unsigned int okmlen);
 
-extern void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf);
+void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf);
 
 /* keyring.c */
 
@@ -389,14 +391,17 @@ struct fscrypt_master_key {
 	struct list_head	mk_decrypted_inodes;
 	spinlock_t		mk_decrypted_inodes_lock;
 
-	/* Crypto API transforms for DIRECT_KEY policies, allocated on-demand */
-	struct crypto_skcipher	*mk_direct_tfms[__FSCRYPT_MODE_MAX + 1];
-
 	/*
-	 * Crypto API transforms for filesystem-layer implementation of
-	 * IV_INO_LBLK_64 policies, allocated on-demand.
+	 * Per-mode encryption keys for the various types of encryption policies
+	 * that use them.  Allocated and derived on-demand.
 	 */
-	struct crypto_skcipher	*mk_iv_ino_lblk_64_tfms[__FSCRYPT_MODE_MAX + 1];
+	struct crypto_skcipher *mk_direct_keys[__FSCRYPT_MODE_MAX + 1];
+	struct crypto_skcipher *mk_iv_ino_lblk_64_keys[__FSCRYPT_MODE_MAX + 1];
+	struct crypto_skcipher *mk_iv_ino_lblk_32_keys[__FSCRYPT_MODE_MAX + 1];
+
+	/* Hash key for inode numbers.  Initialized only when needed. */
+	siphash_key_t		mk_ino_hash_key;
+	bool			mk_ino_hash_key_initialized;
 
 } __randomize_layout;
 
@@ -436,14 +441,17 @@ static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec)
 	return 0;
 }
 
-extern struct key *
+struct key *
 fscrypt_find_master_key(struct super_block *sb,
 			const struct fscrypt_key_specifier *mk_spec);
 
-extern int fscrypt_verify_key_added(struct super_block *sb,
-				    const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]);
+int fscrypt_add_test_dummy_key(struct super_block *sb,
+			       struct fscrypt_key_specifier *key_spec);
+
+int fscrypt_verify_key_added(struct super_block *sb,
+			     const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]);
 
-extern int __init fscrypt_init_keyring(void);
+int __init fscrypt_init_keyring(void);
 
 /* keysetup.c */
 
@@ -457,33 +465,32 @@ struct fscrypt_mode {
 
 extern struct fscrypt_mode fscrypt_modes[];
 
-extern struct crypto_skcipher *
-fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key,
-			  const struct inode *inode);
+struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode,
+						  const u8 *raw_key,
+						  const struct inode *inode);
 
-extern int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci,
-					const u8 *raw_key);
+int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key);
 
-extern int fscrypt_derive_dirhash_key(struct fscrypt_info *ci,
-				      const struct fscrypt_master_key *mk);
+int fscrypt_derive_dirhash_key(struct fscrypt_info *ci,
+			       const struct fscrypt_master_key *mk);
 
 /* keysetup_v1.c */
 
-extern void fscrypt_put_direct_key(struct fscrypt_direct_key *dk);
+void fscrypt_put_direct_key(struct fscrypt_direct_key *dk);
+
+int fscrypt_setup_v1_file_key(struct fscrypt_info *ci,
+			      const u8 *raw_master_key);
 
-extern int fscrypt_setup_v1_file_key(struct fscrypt_info *ci,
-				     const u8 *raw_master_key);
+int fscrypt_setup_v1_file_key_via_subscribed_keyrings(struct fscrypt_info *ci);
 
-extern int fscrypt_setup_v1_file_key_via_subscribed_keyrings(
-					struct fscrypt_info *ci);
 /* policy.c */
 
-extern bool fscrypt_policies_equal(const union fscrypt_policy *policy1,
-				   const union fscrypt_policy *policy2);
-extern bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
-				     const struct inode *inode);
-extern int fscrypt_policy_from_context(union fscrypt_policy *policy_u,
-				       const union fscrypt_context *ctx_u,
-				       int ctx_size);
+bool fscrypt_policies_equal(const union fscrypt_policy *policy1,
+			    const union fscrypt_policy *policy2);
+bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
+			      const struct inode *inode);
+int fscrypt_policy_from_context(union fscrypt_policy *policy_u,
+				const union fscrypt_context *ctx_u,
+				int ctx_size);
 
 #endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c
index efb95bd19a89..0cba7928446d 100644
--- a/fs/crypto/hkdf.c
+++ b/fs/crypto/hkdf.c
@@ -44,17 +44,13 @@ static int hkdf_extract(struct crypto_shash *hmac_tfm, const u8 *ikm,
 			unsigned int ikmlen, u8 prk[HKDF_HASHLEN])
 {
 	static const u8 default_salt[HKDF_HASHLEN];
-	SHASH_DESC_ON_STACK(desc, hmac_tfm);
 	int err;
 
 	err = crypto_shash_setkey(hmac_tfm, default_salt, HKDF_HASHLEN);
 	if (err)
 		return err;
 
-	desc->tfm = hmac_tfm;
-	err = crypto_shash_digest(desc, ikm, ikmlen, prk);
-	shash_desc_zero(desc);
-	return err;
+	return crypto_shash_tfm_digest(hmac_tfm, ikm, ikmlen, prk);
 }
 
 /*
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 5ef861742921..09fb8aa0f2e9 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -10,7 +10,7 @@
 #include "fscrypt_private.h"
 
 /**
- * fscrypt_file_open - prepare to open a possibly-encrypted regular file
+ * fscrypt_file_open() - prepare to open a possibly-encrypted regular file
  * @inode: the inode being opened
  * @filp: the struct file being set up
  *
@@ -262,7 +262,7 @@ err_free_sd:
 EXPORT_SYMBOL_GPL(__fscrypt_encrypt_symlink);
 
 /**
- * fscrypt_get_symlink - get the target of an encrypted symlink
+ * fscrypt_get_symlink() - get the target of an encrypted symlink
  * @inode: the symlink inode
  * @caddr: the on-disk contents of the symlink
  * @max_size: size of @caddr buffer
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index ab41b25d4fa1..e24eb48bfbe1 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -20,6 +20,7 @@
 
 #include <crypto/skcipher.h>
 #include <linux/key-type.h>
+#include <linux/random.h>
 #include <linux/seq_file.h>
 
 #include "fscrypt_private.h"
@@ -44,8 +45,9 @@ static void free_master_key(struct fscrypt_master_key *mk)
 	wipe_master_key_secret(&mk->mk_secret);
 
 	for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) {
-		crypto_free_skcipher(mk->mk_direct_tfms[i]);
-		crypto_free_skcipher(mk->mk_iv_ino_lblk_64_tfms[i]);
+		crypto_free_skcipher(mk->mk_direct_keys[i]);
+		crypto_free_skcipher(mk->mk_iv_ino_lblk_64_keys[i]);
+		crypto_free_skcipher(mk->mk_iv_ino_lblk_32_keys[i]);
 	}
 
 	key_put(mk->mk_users);
@@ -424,9 +426,9 @@ static int add_existing_master_key(struct fscrypt_master_key *mk,
 	return 0;
 }
 
-static int add_master_key(struct super_block *sb,
-			  struct fscrypt_master_key_secret *secret,
-			  const struct fscrypt_key_specifier *mk_spec)
+static int do_add_master_key(struct super_block *sb,
+			     struct fscrypt_master_key_secret *secret,
+			     const struct fscrypt_key_specifier *mk_spec)
 {
 	static DEFINE_MUTEX(fscrypt_add_key_mutex);
 	struct key *key;
@@ -465,6 +467,35 @@ out_unlock:
 	return err;
 }
 
+static int add_master_key(struct super_block *sb,
+			  struct fscrypt_master_key_secret *secret,
+			  struct fscrypt_key_specifier *key_spec)
+{
+	int err;
+
+	if (key_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) {
+		err = fscrypt_init_hkdf(&secret->hkdf, secret->raw,
+					secret->size);
+		if (err)
+			return err;
+
+		/*
+		 * Now that the HKDF context is initialized, the raw key is no
+		 * longer needed.
+		 */
+		memzero_explicit(secret->raw, secret->size);
+
+		/* Calculate the key identifier */
+		err = fscrypt_hkdf_expand(&secret->hkdf,
+					  HKDF_CONTEXT_KEY_IDENTIFIER, NULL, 0,
+					  key_spec->u.identifier,
+					  FSCRYPT_KEY_IDENTIFIER_SIZE);
+		if (err)
+			return err;
+	}
+	return do_add_master_key(sb, secret, key_spec);
+}
+
 static int fscrypt_provisioning_key_preparse(struct key_preparsed_payload *prep)
 {
 	const struct fscrypt_provisioning_key_payload *payload = prep->data;
@@ -609,6 +640,15 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg)
 	if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved)))
 		return -EINVAL;
 
+	/*
+	 * Only root can add keys that are identified by an arbitrary descriptor
+	 * rather than by a cryptographic hash --- since otherwise a malicious
+	 * user could add the wrong key.
+	 */
+	if (arg.key_spec.type == FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR &&
+	    !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	memset(&secret, 0, sizeof(secret));
 	if (arg.key_id) {
 		if (arg.raw_size != 0)
@@ -626,48 +666,17 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg)
 			goto out_wipe_secret;
 	}
 
-	switch (arg.key_spec.type) {
-	case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
-		/*
-		 * Only root can add keys that are identified by an arbitrary
-		 * descriptor rather than by a cryptographic hash --- since
-		 * otherwise a malicious user could add the wrong key.
-		 */
-		err = -EACCES;
-		if (!capable(CAP_SYS_ADMIN))
-			goto out_wipe_secret;
-		break;
-	case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER:
-		err = fscrypt_init_hkdf(&secret.hkdf, secret.raw, secret.size);
-		if (err)
-			goto out_wipe_secret;
-
-		/*
-		 * Now that the HKDF context is initialized, the raw key is no
-		 * longer needed.
-		 */
-		memzero_explicit(secret.raw, secret.size);
-
-		/* Calculate the key identifier and return it to userspace. */
-		err = fscrypt_hkdf_expand(&secret.hkdf,
-					  HKDF_CONTEXT_KEY_IDENTIFIER,
-					  NULL, 0, arg.key_spec.u.identifier,
-					  FSCRYPT_KEY_IDENTIFIER_SIZE);
-		if (err)
-			goto out_wipe_secret;
-		err = -EFAULT;
-		if (copy_to_user(uarg->key_spec.u.identifier,
-				 arg.key_spec.u.identifier,
-				 FSCRYPT_KEY_IDENTIFIER_SIZE))
-			goto out_wipe_secret;
-		break;
-	default:
-		WARN_ON(1);
-		err = -EINVAL;
+	err = add_master_key(sb, &secret, &arg.key_spec);
+	if (err)
 		goto out_wipe_secret;
-	}
 
-	err = add_master_key(sb, &secret, &arg.key_spec);
+	/* Return the key identifier to userspace, if applicable */
+	err = -EFAULT;
+	if (arg.key_spec.type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER &&
+	    copy_to_user(uarg->key_spec.u.identifier, arg.key_spec.u.identifier,
+			 FSCRYPT_KEY_IDENTIFIER_SIZE))
+		goto out_wipe_secret;
+	err = 0;
 out_wipe_secret:
 	wipe_master_key_secret(&secret);
 	return err;
@@ -675,6 +684,29 @@ out_wipe_secret:
 EXPORT_SYMBOL_GPL(fscrypt_ioctl_add_key);
 
 /*
+ * Add the key for '-o test_dummy_encryption' to the filesystem keyring.
+ *
+ * Use a per-boot random key to prevent people from misusing this option.
+ */
+int fscrypt_add_test_dummy_key(struct super_block *sb,
+			       struct fscrypt_key_specifier *key_spec)
+{
+	static u8 test_key[FSCRYPT_MAX_KEY_SIZE];
+	struct fscrypt_master_key_secret secret;
+	int err;
+
+	get_random_once(test_key, FSCRYPT_MAX_KEY_SIZE);
+
+	memset(&secret, 0, sizeof(secret));
+	secret.size = FSCRYPT_MAX_KEY_SIZE;
+	memcpy(secret.raw, test_key, FSCRYPT_MAX_KEY_SIZE);
+
+	err = add_master_key(sb, &secret, key_spec);
+	wipe_master_key_secret(&secret);
+	return err;
+}
+
+/*
  * Verify that the current user has added a master key with the given identifier
  * (returns -ENOKEY if not).  This is needed to prevent a user from encrypting
  * their files using some other user's key which they don't actually know.
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index 302375e9f719..1129adfa097d 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -46,6 +46,8 @@ struct fscrypt_mode fscrypt_modes[] = {
 	},
 };
 
+static DEFINE_MUTEX(fscrypt_mode_key_setup_mutex);
+
 static struct fscrypt_mode *
 select_encryption_mode(const union fscrypt_policy *policy,
 		       const struct inode *inode)
@@ -130,7 +132,7 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci,
 	const struct super_block *sb = inode->i_sb;
 	struct fscrypt_mode *mode = ci->ci_mode;
 	const u8 mode_num = mode - fscrypt_modes;
-	struct crypto_skcipher *tfm, *prev_tfm;
+	struct crypto_skcipher *tfm;
 	u8 mode_key[FSCRYPT_MAX_KEY_SIZE];
 	u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)];
 	unsigned int hkdf_infolen = 0;
@@ -139,10 +141,17 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci,
 	if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX))
 		return -EINVAL;
 
-	/* pairs with cmpxchg() below */
+	/* pairs with smp_store_release() below */
 	tfm = READ_ONCE(tfms[mode_num]);
-	if (likely(tfm != NULL))
-		goto done;
+	if (likely(tfm != NULL)) {
+		ci->ci_ctfm = tfm;
+		return 0;
+	}
+
+	mutex_lock(&fscrypt_mode_key_setup_mutex);
+
+	if (tfms[mode_num])
+		goto done_unlock;
 
 	BUILD_BUG_ON(sizeof(mode_num) != 1);
 	BUILD_BUG_ON(sizeof(sb->s_uuid) != 16);
@@ -157,21 +166,21 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci,
 				  hkdf_context, hkdf_info, hkdf_infolen,
 				  mode_key, mode->keysize);
 	if (err)
-		return err;
+		goto out_unlock;
 	tfm = fscrypt_allocate_skcipher(mode, mode_key, inode);
 	memzero_explicit(mode_key, mode->keysize);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	/* pairs with READ_ONCE() above */
-	prev_tfm = cmpxchg(&tfms[mode_num], NULL, tfm);
-	if (prev_tfm != NULL) {
-		crypto_free_skcipher(tfm);
-		tfm = prev_tfm;
+	if (IS_ERR(tfm)) {
+		err = PTR_ERR(tfm);
+		goto out_unlock;
 	}
-done:
+	/* pairs with READ_ONCE() above */
+	smp_store_release(&tfms[mode_num], tfm);
+done_unlock:
 	ci->ci_ctfm = tfm;
-	return 0;
+	err = 0;
+out_unlock:
+	mutex_unlock(&fscrypt_mode_key_setup_mutex);
+	return err;
 }
 
 int fscrypt_derive_dirhash_key(struct fscrypt_info *ci,
@@ -189,6 +198,43 @@ int fscrypt_derive_dirhash_key(struct fscrypt_info *ci,
 	return 0;
 }
 
+static int fscrypt_setup_iv_ino_lblk_32_key(struct fscrypt_info *ci,
+					    struct fscrypt_master_key *mk)
+{
+	int err;
+
+	err = setup_per_mode_enc_key(ci, mk, mk->mk_iv_ino_lblk_32_keys,
+				     HKDF_CONTEXT_IV_INO_LBLK_32_KEY, true);
+	if (err)
+		return err;
+
+	/* pairs with smp_store_release() below */
+	if (!smp_load_acquire(&mk->mk_ino_hash_key_initialized)) {
+
+		mutex_lock(&fscrypt_mode_key_setup_mutex);
+
+		if (mk->mk_ino_hash_key_initialized)
+			goto unlock;
+
+		err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf,
+					  HKDF_CONTEXT_INODE_HASH_KEY, NULL, 0,
+					  (u8 *)&mk->mk_ino_hash_key,
+					  sizeof(mk->mk_ino_hash_key));
+		if (err)
+			goto unlock;
+		/* pairs with smp_load_acquire() above */
+		smp_store_release(&mk->mk_ino_hash_key_initialized, true);
+unlock:
+		mutex_unlock(&fscrypt_mode_key_setup_mutex);
+		if (err)
+			return err;
+	}
+
+	ci->ci_hashed_ino = (u32)siphash_1u64(ci->ci_inode->i_ino,
+					      &mk->mk_ino_hash_key);
+	return 0;
+}
+
 static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci,
 				     struct fscrypt_master_key *mk)
 {
@@ -203,7 +249,7 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci,
 		 * encryption key.  This ensures that the master key is
 		 * consistently used only for HKDF, avoiding key reuse issues.
 		 */
-		err = setup_per_mode_enc_key(ci, mk, mk->mk_direct_tfms,
+		err = setup_per_mode_enc_key(ci, mk, mk->mk_direct_keys,
 					     HKDF_CONTEXT_DIRECT_KEY, false);
 	} else if (ci->ci_policy.v2.flags &
 		   FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) {
@@ -211,11 +257,14 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci,
 		 * IV_INO_LBLK_64: encryption keys are derived from (master_key,
 		 * mode_num, filesystem_uuid), and inode number is included in
 		 * the IVs.  This format is optimized for use with inline
-		 * encryption hardware compliant with the UFS or eMMC standards.
+		 * encryption hardware compliant with the UFS standard.
 		 */
-		err = setup_per_mode_enc_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms,
+		err = setup_per_mode_enc_key(ci, mk, mk->mk_iv_ino_lblk_64_keys,
 					     HKDF_CONTEXT_IV_INO_LBLK_64_KEY,
 					     true);
+	} else if (ci->ci_policy.v2.flags &
+		   FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) {
+		err = fscrypt_setup_iv_ino_lblk_32_key(ci, mk);
 	} else {
 		u8 derived_key[FSCRYPT_MAX_KEY_SIZE];
 
@@ -395,21 +444,18 @@ int fscrypt_get_encryption_info(struct inode *inode)
 
 	res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
 	if (res < 0) {
-		if (!fscrypt_dummy_context_enabled(inode) ||
-		    IS_ENCRYPTED(inode)) {
+		const union fscrypt_context *dummy_ctx =
+			fscrypt_get_dummy_context(inode->i_sb);
+
+		if (IS_ENCRYPTED(inode) || !dummy_ctx) {
 			fscrypt_warn(inode,
 				     "Error %d getting encryption context",
 				     res);
 			return res;
 		}
 		/* Fake up a context for an unencrypted directory */
-		memset(&ctx, 0, sizeof(ctx));
-		ctx.version = FSCRYPT_CONTEXT_V1;
-		ctx.v1.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
-		ctx.v1.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
-		memset(ctx.v1.master_key_descriptor, 0x42,
-		       FSCRYPT_KEY_DESCRIPTOR_SIZE);
-		res = sizeof(ctx.v1);
+		res = fscrypt_context_size(dummy_ctx);
+		memcpy(&ctx, dummy_ctx, res);
 	}
 
 	crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS);
@@ -475,7 +521,8 @@ out:
 EXPORT_SYMBOL(fscrypt_get_encryption_info);
 
 /**
- * fscrypt_put_encryption_info - free most of an inode's fscrypt data
+ * fscrypt_put_encryption_info() - free most of an inode's fscrypt data
+ * @inode: an inode being evicted
  *
  * Free the inode's fscrypt_info.  Filesystems must call this when the inode is
  * being evicted.  An RCU grace period need not have elapsed yet.
@@ -488,7 +535,8 @@ void fscrypt_put_encryption_info(struct inode *inode)
 EXPORT_SYMBOL(fscrypt_put_encryption_info);
 
 /**
- * fscrypt_free_inode - free an inode's fscrypt data requiring RCU delay
+ * fscrypt_free_inode() - free an inode's fscrypt data requiring RCU delay
+ * @inode: an inode being freed
  *
  * Free the inode's cached decrypted symlink target, if any.  Filesystems must
  * call this after an RCU grace period, just before they free the inode.
@@ -503,7 +551,8 @@ void fscrypt_free_inode(struct inode *inode)
 EXPORT_SYMBOL(fscrypt_free_inode);
 
 /**
- * fscrypt_drop_inode - check whether the inode's master key has been removed
+ * fscrypt_drop_inode() - check whether the inode's master key has been removed
+ * @inode: an inode being considered for eviction
  *
  * Filesystems supporting fscrypt must call this from their ->drop_inode()
  * method so that encrypted inodes are evicted as soon as they're no longer in
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 10ccf945020c..d23ff162c78b 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -11,12 +11,15 @@
  */
 
 #include <linux/random.h>
+#include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/mount.h>
 #include "fscrypt_private.h"
 
 /**
- * fscrypt_policies_equal - check whether two encryption policies are the same
+ * fscrypt_policies_equal() - check whether two encryption policies are the same
+ * @policy1: the first policy
+ * @policy2: the second policy
  *
  * Return: %true if equal, else %false
  */
@@ -66,18 +69,14 @@ static bool supported_direct_key_modes(const struct inode *inode,
 	return true;
 }
 
-static bool supported_iv_ino_lblk_64_policy(
-					const struct fscrypt_policy_v2 *policy,
-					const struct inode *inode)
+static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy,
+					 const struct inode *inode,
+					 const char *type,
+					 int max_ino_bits, int max_lblk_bits)
 {
 	struct super_block *sb = inode->i_sb;
 	int ino_bits = 64, lblk_bits = 64;
 
-	if (policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) {
-		fscrypt_warn(inode,
-			     "The DIRECT_KEY and IV_INO_LBLK_64 flags are mutually exclusive");
-		return false;
-	}
 	/*
 	 * It's unsafe to include inode numbers in the IVs if the filesystem can
 	 * potentially renumber inodes, e.g. via filesystem shrinking.
@@ -85,16 +84,22 @@ static bool supported_iv_ino_lblk_64_policy(
 	if (!sb->s_cop->has_stable_inodes ||
 	    !sb->s_cop->has_stable_inodes(sb)) {
 		fscrypt_warn(inode,
-			     "Can't use IV_INO_LBLK_64 policy on filesystem '%s' because it doesn't have stable inode numbers",
-			     sb->s_id);
+			     "Can't use %s policy on filesystem '%s' because it doesn't have stable inode numbers",
+			     type, sb->s_id);
 		return false;
 	}
 	if (sb->s_cop->get_ino_and_lblk_bits)
 		sb->s_cop->get_ino_and_lblk_bits(sb, &ino_bits, &lblk_bits);
-	if (ino_bits > 32 || lblk_bits > 32) {
+	if (ino_bits > max_ino_bits) {
+		fscrypt_warn(inode,
+			     "Can't use %s policy on filesystem '%s' because its inode numbers are too long",
+			     type, sb->s_id);
+		return false;
+	}
+	if (lblk_bits > max_lblk_bits) {
 		fscrypt_warn(inode,
-			     "Can't use IV_INO_LBLK_64 policy on filesystem '%s' because it doesn't use 32-bit inode and block numbers",
-			     sb->s_id);
+			     "Can't use %s policy on filesystem '%s' because its block numbers are too long",
+			     type, sb->s_id);
 		return false;
 	}
 	return true;
@@ -137,6 +142,8 @@ static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy,
 static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy,
 					const struct inode *inode)
 {
+	int count = 0;
+
 	if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
 				     policy->filenames_encryption_mode)) {
 		fscrypt_warn(inode,
@@ -152,13 +159,29 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy,
 		return false;
 	}
 
+	count += !!(policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY);
+	count += !!(policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64);
+	count += !!(policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32);
+	if (count > 1) {
+		fscrypt_warn(inode, "Mutually exclusive encryption flags (0x%02x)",
+			     policy->flags);
+		return false;
+	}
+
 	if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) &&
 	    !supported_direct_key_modes(inode, policy->contents_encryption_mode,
 					policy->filenames_encryption_mode))
 		return false;
 
 	if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) &&
-	    !supported_iv_ino_lblk_64_policy(policy, inode))
+	    !supported_iv_ino_lblk_policy(policy, inode, "IV_INO_LBLK_64",
+					  32, 32))
+		return false;
+
+	if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) &&
+	    /* This uses hashed inode numbers, so ino_bits doesn't matter. */
+	    !supported_iv_ino_lblk_policy(policy, inode, "IV_INO_LBLK_32",
+					  INT_MAX, 32))
 		return false;
 
 	if (memchr_inv(policy->__reserved, 0, sizeof(policy->__reserved))) {
@@ -170,7 +193,9 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy,
 }
 
 /**
- * fscrypt_supported_policy - check whether an encryption policy is supported
+ * fscrypt_supported_policy() - check whether an encryption policy is supported
+ * @policy_u: the encryption policy
+ * @inode: the inode on which the policy will be used
  *
  * Given an encryption policy, check whether all its encryption modes and other
  * settings are supported by this kernel on the given inode.  (But we don't
@@ -192,7 +217,10 @@ bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
 }
 
 /**
- * fscrypt_new_context_from_policy - create a new fscrypt_context from a policy
+ * fscrypt_new_context_from_policy() - create a new fscrypt_context from
+ *				       an fscrypt_policy
+ * @ctx_u: output context
+ * @policy_u: input policy
  *
  * Create an fscrypt_context for an inode that is being assigned the given
  * encryption policy.  A new nonce is randomly generated.
@@ -242,7 +270,11 @@ static int fscrypt_new_context_from_policy(union fscrypt_context *ctx_u,
 }
 
 /**
- * fscrypt_policy_from_context - convert an fscrypt_context to an fscrypt_policy
+ * fscrypt_policy_from_context() - convert an fscrypt_context to
+ *				   an fscrypt_policy
+ * @policy_u: output policy
+ * @ctx_u: input context
+ * @ctx_size: size of input context in bytes
  *
  * Given an fscrypt_context, build the corresponding fscrypt_policy.
  *
@@ -354,6 +386,9 @@ static int set_encryption_policy(struct inode *inode,
 					       policy->v2.master_key_identifier);
 		if (err)
 			return err;
+		if (policy->v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)
+			pr_warn_once("%s (pid %d) is setting an IV_INO_LBLK_32 encryption policy.  This should only be used if there are certain hardware limitations.\n",
+				     current->comm, current->pid);
 		break;
 	default:
 		WARN_ON(1);
@@ -605,3 +640,127 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
 	return preload ? fscrypt_get_encryption_info(child): 0;
 }
 EXPORT_SYMBOL(fscrypt_inherit_context);
+
+/**
+ * fscrypt_set_test_dummy_encryption() - handle '-o test_dummy_encryption'
+ * @sb: the filesystem on which test_dummy_encryption is being specified
+ * @arg: the argument to the test_dummy_encryption option.
+ *	 If no argument was specified, then @arg->from == NULL.
+ * @dummy_ctx: the filesystem's current dummy context (input/output, see below)
+ *
+ * Handle the test_dummy_encryption mount option by creating a dummy encryption
+ * context, saving it in @dummy_ctx, and adding the corresponding dummy
+ * encryption key to the filesystem.  If the @dummy_ctx is already set, then
+ * instead validate that it matches @arg.  Don't support changing it via
+ * remount, as that is difficult to do safely.
+ *
+ * The reason we use an fscrypt_context rather than an fscrypt_policy is because
+ * we mustn't generate a new nonce each time we access a dummy-encrypted
+ * directory, as that would change the way filenames are encrypted.
+ *
+ * Return: 0 on success (dummy context set, or the same context is already set);
+ *         -EEXIST if a different dummy context is already set;
+ *         or another -errno value.
+ */
+int fscrypt_set_test_dummy_encryption(struct super_block *sb,
+				      const substring_t *arg,
+				      struct fscrypt_dummy_context *dummy_ctx)
+{
+	const char *argstr = "v2";
+	const char *argstr_to_free = NULL;
+	struct fscrypt_key_specifier key_spec = { 0 };
+	int version;
+	union fscrypt_context *ctx = NULL;
+	int err;
+
+	if (arg->from) {
+		argstr = argstr_to_free = match_strdup(arg);
+		if (!argstr)
+			return -ENOMEM;
+	}
+
+	if (!strcmp(argstr, "v1")) {
+		version = FSCRYPT_CONTEXT_V1;
+		key_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR;
+		memset(key_spec.u.descriptor, 0x42,
+		       FSCRYPT_KEY_DESCRIPTOR_SIZE);
+	} else if (!strcmp(argstr, "v2")) {
+		version = FSCRYPT_CONTEXT_V2;
+		key_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER;
+		/* key_spec.u.identifier gets filled in when adding the key */
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (dummy_ctx->ctx) {
+		/*
+		 * Note: if we ever make test_dummy_encryption support
+		 * specifying other encryption settings, such as the encryption
+		 * modes, we'll need to compare those settings here.
+		 */
+		if (dummy_ctx->ctx->version == version)
+			err = 0;
+		else
+			err = -EEXIST;
+		goto out;
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = fscrypt_add_test_dummy_key(sb, &key_spec);
+	if (err)
+		goto out;
+
+	ctx->version = version;
+	switch (ctx->version) {
+	case FSCRYPT_CONTEXT_V1:
+		ctx->v1.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
+		ctx->v1.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
+		memcpy(ctx->v1.master_key_descriptor, key_spec.u.descriptor,
+		       FSCRYPT_KEY_DESCRIPTOR_SIZE);
+		break;
+	case FSCRYPT_CONTEXT_V2:
+		ctx->v2.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
+		ctx->v2.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
+		memcpy(ctx->v2.master_key_identifier, key_spec.u.identifier,
+		       FSCRYPT_KEY_IDENTIFIER_SIZE);
+		break;
+	default:
+		WARN_ON(1);
+		err = -EINVAL;
+		goto out;
+	}
+	dummy_ctx->ctx = ctx;
+	ctx = NULL;
+	err = 0;
+out:
+	kfree(ctx);
+	kfree(argstr_to_free);
+	return err;
+}
+EXPORT_SYMBOL_GPL(fscrypt_set_test_dummy_encryption);
+
+/**
+ * fscrypt_show_test_dummy_encryption() - show '-o test_dummy_encryption'
+ * @seq: the seq_file to print the option to
+ * @sep: the separator character to use
+ * @sb: the filesystem whose options are being shown
+ *
+ * Show the test_dummy_encryption mount option, if it was specified.
+ * This is mainly used for /proc/mounts.
+ */
+void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep,
+					struct super_block *sb)
+{
+	const union fscrypt_context *ctx = fscrypt_get_dummy_context(sb);
+
+	if (!ctx)
+		return;
+	seq_printf(seq, "%ctest_dummy_encryption=v%d", sep, ctx->version);
+}
+EXPORT_SYMBOL_GPL(fscrypt_show_test_dummy_encryption);
diff --git a/fs/dcache.c b/fs/dcache.c
index 8dd4d8d7bd0b..361ea7ab30ea 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -647,6 +647,10 @@ static inline bool retain_dentry(struct dentry *dentry)
 		if (dentry->d_op->d_delete(dentry))
 			return false;
 	}
+
+	if (unlikely(dentry->d_flags & DCACHE_DONTCACHE))
+		return false;
+
 	/* retain; LRU fodder */
 	dentry->d_lockref.count--;
 	if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
@@ -656,6 +660,21 @@ static inline bool retain_dentry(struct dentry *dentry)
 	return true;
 }
 
+void d_mark_dontcache(struct inode *inode)
+{
+	struct dentry *de;
+
+	spin_lock(&inode->i_lock);
+	hlist_for_each_entry(de, &inode->i_dentry, d_u.d_alias) {
+		spin_lock(&de->d_lock);
+		de->d_flags |= DCACHE_DONTCACHE;
+		spin_unlock(&de->d_lock);
+	}
+	inode->i_state |= I_DONTCACHE;
+	spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(d_mark_dontcache);
+
 /*
  * Finish off a dentry we've decided to kill.
  * dentry->d_lock must be held, returns with it unlocked.
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 00b4d15bb811..1543b5af400e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -386,25 +386,6 @@ static void dio_bio_end_io(struct bio *bio)
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 }
 
-/**
- * dio_end_io - handle the end io action for the given bio
- * @bio: The direct io bio thats being completed
- *
- * This is meant to be called by any filesystem that uses their own dio_submit_t
- * so that the DIO specific endio actions are dealt with after the filesystem
- * has done it's completion work.
- */
-void dio_end_io(struct bio *bio)
-{
-	struct dio *dio = bio->bi_private;
-
-	if (dio->is_async)
-		dio_bio_end_aio(bio);
-	else
-		dio_bio_end_io(bio);
-}
-EXPORT_SYMBOL_GPL(dio_end_io);
-
 static inline void
 dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 	      struct block_device *bdev,
@@ -500,7 +481,7 @@ static struct bio *dio_await_one(struct dio *dio)
 		spin_unlock_irqrestore(&dio->bio_lock, flags);
 		if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
 		    !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true))
-			io_schedule();
+			blk_io_schedule();
 		/* wake up sets us TASK_RUNNING */
 		spin_lock_irqsave(&dio->bio_lock, flags);
 		dio->waiter = NULL;
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index 522c35d5292b..1bdeaa6d5790 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -7,7 +7,7 @@ config ECRYPT_FS
 	select CRYPTO_MD5
 	help
 	  Encrypted filesystem that operates on the VFS layer.  See
-	  <file:Documentation/filesystems/ecryptfs.txt> to learn more about
+	  <file:Documentation/filesystems/ecryptfs.rst> to learn more about
 	  eCryptfs.  Userspace components are required and can be
 	  obtained from <http://ecryptfs.sf.net>.
 
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 2c449aed1b92..0681540c48d9 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -48,18 +48,6 @@ void ecryptfs_from_hex(char *dst, char *src, int dst_size)
 	}
 }
 
-static int ecryptfs_hash_digest(struct crypto_shash *tfm,
-				char *src, int len, char *dst)
-{
-	SHASH_DESC_ON_STACK(desc, tfm);
-	int err;
-
-	desc->tfm = tfm;
-	err = crypto_shash_digest(desc, src, len, dst);
-	shash_desc_zero(desc);
-	return err;
-}
-
 /**
  * ecryptfs_calculate_md5 - calculates the md5 of @src
  * @dst: Pointer to 16 bytes of allocated memory
@@ -74,11 +62,8 @@ static int ecryptfs_calculate_md5(char *dst,
 				  struct ecryptfs_crypt_stat *crypt_stat,
 				  char *src, int len)
 {
-	struct crypto_shash *tfm;
-	int rc = 0;
+	int rc = crypto_shash_tfm_digest(crypt_stat->hash_tfm, src, len, dst);
 
-	tfm = crypt_stat->hash_tfm;
-	rc = ecryptfs_hash_digest(tfm, src, len, dst);
 	if (rc) {
 		printk(KERN_ERR
 		       "%s: Error computing crypto hash; rc = [%d]\n",
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index fc3a8d8064f8..64b56c7df023 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -265,7 +265,7 @@ submit_bio_out:
  */
 static int erofs_raw_access_readpage(struct file *file, struct page *page)
 {
-	erofs_off_t last_block;
+	erofs_off_t uninitialized_var(last_block);
 	struct bio *bio;
 
 	trace_erofs_readpage(page, true);
@@ -280,47 +280,36 @@ static int erofs_raw_access_readpage(struct file *file, struct page *page)
 	return 0;
 }
 
-static int erofs_raw_access_readpages(struct file *filp,
-				      struct address_space *mapping,
-				      struct list_head *pages,
-				      unsigned int nr_pages)
+static void erofs_raw_access_readahead(struct readahead_control *rac)
 {
-	erofs_off_t last_block;
+	erofs_off_t uninitialized_var(last_block);
 	struct bio *bio = NULL;
-	gfp_t gfp = readahead_gfp_mask(mapping);
-	struct page *page = list_last_entry(pages, struct page, lru);
-
-	trace_erofs_readpages(mapping->host, page, nr_pages, true);
+	struct page *page;
 
-	for (; nr_pages; --nr_pages) {
-		page = list_entry(pages->prev, struct page, lru);
+	trace_erofs_readpages(rac->mapping->host, readahead_index(rac),
+			readahead_count(rac), true);
 
+	while ((page = readahead_page(rac))) {
 		prefetchw(&page->flags);
-		list_del(&page->lru);
 
-		if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
-			bio = erofs_read_raw_page(bio, mapping, page,
-						  &last_block, nr_pages, true);
+		bio = erofs_read_raw_page(bio, rac->mapping, page, &last_block,
+				readahead_count(rac), true);
 
-			/* all the page errors are ignored when readahead */
-			if (IS_ERR(bio)) {
-				pr_err("%s, readahead error at page %lu of nid %llu\n",
-				       __func__, page->index,
-				       EROFS_I(mapping->host)->nid);
+		/* all the page errors are ignored when readahead */
+		if (IS_ERR(bio)) {
+			pr_err("%s, readahead error at page %lu of nid %llu\n",
+			       __func__, page->index,
+			       EROFS_I(rac->mapping->host)->nid);
 
-				bio = NULL;
-			}
+			bio = NULL;
 		}
 
-		/* pages could still be locked */
 		put_page(page);
 	}
-	DBG_BUGON(!list_empty(pages));
 
 	/* the rare case (end in gaps) */
 	if (bio)
 		submit_bio(bio);
-	return 0;
 }
 
 static int erofs_get_block(struct inode *inode, sector_t iblock,
@@ -358,7 +347,7 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
 /* for uncompressed (aligned) files and raw access for other files */
 const struct address_space_operations erofs_raw_access_aops = {
 	.readpage = erofs_raw_access_readpage,
-	.readpages = erofs_raw_access_readpages,
+	.readahead = erofs_raw_access_readahead,
 	.bmap = erofs_bmap,
 };
 
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 5d2d81940679..7628816f2453 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -274,7 +274,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
 
 	i = 0;
 	while (1) {
-		dst = vm_map_ram(rq->out, nrpages_out, -1, PAGE_KERNEL);
+		dst = vm_map_ram(rq->out, nrpages_out, -1);
 
 		/* retry two more times (totally 3 times) */
 		if (dst || ++i >= 3)
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 3350ab65d892..7dd4bbe9674f 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -311,27 +311,21 @@ int erofs_getattr(const struct path *path, struct kstat *stat,
 
 const struct inode_operations erofs_generic_iops = {
 	.getattr = erofs_getattr,
-#ifdef CONFIG_EROFS_FS_XATTR
 	.listxattr = erofs_listxattr,
-#endif
 	.get_acl = erofs_get_acl,
 };
 
 const struct inode_operations erofs_symlink_iops = {
 	.get_link = page_get_link,
 	.getattr = erofs_getattr,
-#ifdef CONFIG_EROFS_FS_XATTR
 	.listxattr = erofs_listxattr,
-#endif
 	.get_acl = erofs_get_acl,
 };
 
 const struct inode_operations erofs_fast_symlink_iops = {
 	.get_link = simple_get_link,
 	.getattr = erofs_getattr,
-#ifdef CONFIG_EROFS_FS_XATTR
 	.listxattr = erofs_listxattr,
-#endif
 	.get_acl = erofs_get_acl,
 };
 
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 5eead7fdc7a6..1c077b7bb43d 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -46,6 +46,17 @@ typedef u64 erofs_off_t;
 /* data type for filesystem-wide blocks number */
 typedef u32 erofs_blk_t;
 
+struct erofs_fs_context {
+#ifdef CONFIG_EROFS_FS_ZIP
+	/* current strategy of how to use managed cache */
+	unsigned char cache_strategy;
+
+	/* threshold for decompression synchronously */
+	unsigned int max_sync_decompress_pages;
+#endif
+	unsigned int mount_opt;
+};
+
 struct erofs_sb_info {
 #ifdef CONFIG_EROFS_FS_ZIP
 	/* list for all registered superblocks, mainly for shrinker */
@@ -55,14 +66,8 @@ struct erofs_sb_info {
 	/* managed XArray arranged in physical block number */
 	struct xarray managed_pslots;
 
-	/* threshold for decompression synchronously */
-	unsigned int max_sync_decompress_pages;
-
 	unsigned int shrinker_run_no;
 
-	/* current strategy of how to use managed cache */
-	unsigned char cache_strategy;
-
 	/* pseudo inode to manage cached pages */
 	struct inode *managed_cache;
 #endif	/* CONFIG_EROFS_FS_ZIP */
@@ -88,7 +93,7 @@ struct erofs_sb_info {
 	u32 feature_compat;
 	u32 feature_incompat;
 
-	unsigned int mount_opt;
+	struct erofs_fs_context ctx;	/* options */
 };
 
 #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
@@ -98,17 +103,17 @@ struct erofs_sb_info {
 #define EROFS_MOUNT_XATTR_USER		0x00000010
 #define EROFS_MOUNT_POSIX_ACL		0x00000020
 
-#define clear_opt(sbi, option)	((sbi)->mount_opt &= ~EROFS_MOUNT_##option)
-#define set_opt(sbi, option)	((sbi)->mount_opt |= EROFS_MOUNT_##option)
-#define test_opt(sbi, option)	((sbi)->mount_opt & EROFS_MOUNT_##option)
+#define clear_opt(ctx, option)	((ctx)->mount_opt &= ~EROFS_MOUNT_##option)
+#define set_opt(ctx, option)	((ctx)->mount_opt |= EROFS_MOUNT_##option)
+#define test_opt(ctx, option)	((ctx)->mount_opt & EROFS_MOUNT_##option)
 
-#ifdef CONFIG_EROFS_FS_ZIP
 enum {
 	EROFS_ZIP_CACHE_DISABLED,
 	EROFS_ZIP_CACHE_READAHEAD,
 	EROFS_ZIP_CACHE_READAROUND
 };
 
+#ifdef CONFIG_EROFS_FS_ZIP
 #define EROFS_LOCKED_MAGIC     (INT_MIN | 0xE0F510CCL)
 
 /* basic unit of the workstation of a super_block */
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index 3abbecbf73de..52f201e03c62 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -244,9 +244,7 @@ static struct dentry *erofs_lookup(struct inode *dir,
 const struct inode_operations erofs_dir_iops = {
 	.lookup = erofs_lookup,
 	.getattr = erofs_getattr,
-#ifdef CONFIG_EROFS_FS_XATTR
 	.listxattr = erofs_listxattr,
-#endif
 	.get_acl = erofs_get_acl,
 };
 
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index b514c67e5fc2..7a13ffb07c23 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -10,6 +10,8 @@
 #include <linux/parser.h>
 #include <linux/seq_file.h>
 #include <linux/crc32c.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include "xattr.h"
 
 #define CREATE_TRACE_POINTS
@@ -192,53 +194,18 @@ out:
 	return ret;
 }
 
-#ifdef CONFIG_EROFS_FS_ZIP
-static int erofs_build_cache_strategy(struct super_block *sb,
-				      substring_t *args)
-{
-	struct erofs_sb_info *sbi = EROFS_SB(sb);
-	const char *cs = match_strdup(args);
-	int err = 0;
-
-	if (!cs) {
-		erofs_err(sb, "Not enough memory to store cache strategy");
-		return -ENOMEM;
-	}
-
-	if (!strcmp(cs, "disabled")) {
-		sbi->cache_strategy = EROFS_ZIP_CACHE_DISABLED;
-	} else if (!strcmp(cs, "readahead")) {
-		sbi->cache_strategy = EROFS_ZIP_CACHE_READAHEAD;
-	} else if (!strcmp(cs, "readaround")) {
-		sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
-	} else {
-		erofs_err(sb, "Unrecognized cache strategy \"%s\"", cs);
-		err = -EINVAL;
-	}
-	kfree(cs);
-	return err;
-}
-#else
-static int erofs_build_cache_strategy(struct super_block *sb,
-				      substring_t *args)
-{
-	erofs_info(sb, "EROFS compression is disabled, so cache strategy is ignored");
-	return 0;
-}
-#endif
-
 /* set up default EROFS parameters */
-static void erofs_default_options(struct erofs_sb_info *sbi)
+static void erofs_default_options(struct erofs_fs_context *ctx)
 {
 #ifdef CONFIG_EROFS_FS_ZIP
-	sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
-	sbi->max_sync_decompress_pages = 3;
+	ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
+	ctx->max_sync_decompress_pages = 3;
 #endif
 #ifdef CONFIG_EROFS_FS_XATTR
-	set_opt(sbi, XATTR_USER);
+	set_opt(ctx, XATTR_USER);
 #endif
 #ifdef CONFIG_EROFS_FS_POSIX_ACL
-	set_opt(sbi, POSIX_ACL);
+	set_opt(ctx, POSIX_ACL);
 #endif
 }
 
@@ -251,73 +218,62 @@ enum {
 	Opt_err
 };
 
-static match_table_t erofs_tokens = {
-	{Opt_user_xattr, "user_xattr"},
-	{Opt_nouser_xattr, "nouser_xattr"},
-	{Opt_acl, "acl"},
-	{Opt_noacl, "noacl"},
-	{Opt_cache_strategy, "cache_strategy=%s"},
-	{Opt_err, NULL}
+static const struct constant_table erofs_param_cache_strategy[] = {
+	{"disabled",	EROFS_ZIP_CACHE_DISABLED},
+	{"readahead",	EROFS_ZIP_CACHE_READAHEAD},
+	{"readaround",	EROFS_ZIP_CACHE_READAROUND},
+	{}
 };
 
-static int erofs_parse_options(struct super_block *sb, char *options)
-{
-	substring_t args[MAX_OPT_ARGS];
-	char *p;
-	int err;
-
-	if (!options)
-		return 0;
-
-	while ((p = strsep(&options, ","))) {
-		int token;
+static const struct fs_parameter_spec erofs_fs_parameters[] = {
+	fsparam_flag_no("user_xattr",	Opt_user_xattr),
+	fsparam_flag_no("acl",		Opt_acl),
+	fsparam_enum("cache_strategy",	Opt_cache_strategy,
+		     erofs_param_cache_strategy),
+	{}
+};
 
-		if (!*p)
-			continue;
+static int erofs_fc_parse_param(struct fs_context *fc,
+				struct fs_parameter *param)
+{
+	struct erofs_fs_context *ctx __maybe_unused = fc->fs_private;
+	struct fs_parse_result result;
+	int opt;
 
-		args[0].to = args[0].from = NULL;
-		token = match_token(p, erofs_tokens, args);
+	opt = fs_parse(fc, erofs_fs_parameters, param, &result);
+	if (opt < 0)
+		return opt;
 
-		switch (token) {
+	switch (opt) {
+	case Opt_user_xattr:
 #ifdef CONFIG_EROFS_FS_XATTR
-		case Opt_user_xattr:
-			set_opt(EROFS_SB(sb), XATTR_USER);
-			break;
-		case Opt_nouser_xattr:
-			clear_opt(EROFS_SB(sb), XATTR_USER);
-			break;
+		if (result.boolean)
+			set_opt(ctx, XATTR_USER);
+		else
+			clear_opt(ctx, XATTR_USER);
 #else
-		case Opt_user_xattr:
-			erofs_info(sb, "user_xattr options not supported");
-			break;
-		case Opt_nouser_xattr:
-			erofs_info(sb, "nouser_xattr options not supported");
-			break;
+		errorfc(fc, "{,no}user_xattr options not supported");
 #endif
+		break;
+	case Opt_acl:
 #ifdef CONFIG_EROFS_FS_POSIX_ACL
-		case Opt_acl:
-			set_opt(EROFS_SB(sb), POSIX_ACL);
-			break;
-		case Opt_noacl:
-			clear_opt(EROFS_SB(sb), POSIX_ACL);
-			break;
+		if (result.boolean)
+			set_opt(ctx, POSIX_ACL);
+		else
+			clear_opt(ctx, POSIX_ACL);
 #else
-		case Opt_acl:
-			erofs_info(sb, "acl options not supported");
-			break;
-		case Opt_noacl:
-			erofs_info(sb, "noacl options not supported");
-			break;
+		errorfc(fc, "{,no}acl options not supported");
 #endif
-		case Opt_cache_strategy:
-			err = erofs_build_cache_strategy(sb, args);
-			if (err)
-				return err;
-			break;
-		default:
-			erofs_err(sb, "Unrecognized mount option \"%s\" or missing value", p);
-			return -EINVAL;
-		}
+		break;
+	case Opt_cache_strategy:
+#ifdef CONFIG_EROFS_FS_ZIP
+		ctx->cache_strategy = result.uint_32;
+#else
+		errorfc(fc, "compression not supported, cache_strategy ignored");
+#endif
+		break;
+	default:
+		return -ENOPARAM;
 	}
 	return 0;
 }
@@ -381,10 +337,11 @@ static int erofs_init_managed_cache(struct super_block *sb)
 static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
 #endif
 
-static int erofs_fill_super(struct super_block *sb, void *data, int silent)
+static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
 {
 	struct inode *inode;
 	struct erofs_sb_info *sbi;
+	struct erofs_fs_context *ctx = fc->fs_private;
 	int err;
 
 	sb->s_magic = EROFS_SUPER_MAGIC;
@@ -408,22 +365,15 @@ static int erofs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_time_gran = 1;
 
 	sb->s_op = &erofs_sops;
-
-#ifdef CONFIG_EROFS_FS_XATTR
 	sb->s_xattr = erofs_xattr_handlers;
-#endif
-	/* set erofs default mount options */
-	erofs_default_options(sbi);
 
-	err = erofs_parse_options(sb, data);
-	if (err)
-		return err;
-
-	if (test_opt(sbi, POSIX_ACL))
+	if (test_opt(ctx, POSIX_ACL))
 		sb->s_flags |= SB_POSIXACL;
 	else
 		sb->s_flags &= ~SB_POSIXACL;
 
+	sbi->ctx = *ctx;
+
 #ifdef CONFIG_EROFS_FS_ZIP
 	xa_init(&sbi->managed_pslots);
 #endif
@@ -450,15 +400,58 @@ static int erofs_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		return err;
 
-	erofs_info(sb, "mounted with opts: %s, root inode @ nid %llu.",
-		   (char *)data, ROOT_NID(sbi));
+	erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi));
+	return 0;
+}
+
+static int erofs_fc_get_tree(struct fs_context *fc)
+{
+	return get_tree_bdev(fc, erofs_fc_fill_super);
+}
+
+static int erofs_fc_reconfigure(struct fs_context *fc)
+{
+	struct super_block *sb = fc->root->d_sb;
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+	struct erofs_fs_context *ctx = fc->fs_private;
+
+	DBG_BUGON(!sb_rdonly(sb));
+
+	if (test_opt(ctx, POSIX_ACL))
+		fc->sb_flags |= SB_POSIXACL;
+	else
+		fc->sb_flags &= ~SB_POSIXACL;
+
+	sbi->ctx = *ctx;
+
+	fc->sb_flags |= SB_RDONLY;
 	return 0;
 }
 
-static struct dentry *erofs_mount(struct file_system_type *fs_type, int flags,
-				  const char *dev_name, void *data)
+static void erofs_fc_free(struct fs_context *fc)
 {
-	return mount_bdev(fs_type, flags, dev_name, data, erofs_fill_super);
+	kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations erofs_context_ops = {
+	.parse_param	= erofs_fc_parse_param,
+	.get_tree       = erofs_fc_get_tree,
+	.reconfigure    = erofs_fc_reconfigure,
+	.free		= erofs_fc_free,
+};
+
+static int erofs_init_fs_context(struct fs_context *fc)
+{
+	fc->fs_private = kzalloc(sizeof(struct erofs_fs_context), GFP_KERNEL);
+	if (!fc->fs_private)
+		return -ENOMEM;
+
+	/* set default mount options */
+	erofs_default_options(fc->fs_private);
+
+	fc->ops = &erofs_context_ops;
+
+	return 0;
 }
 
 /*
@@ -497,7 +490,7 @@ static void erofs_put_super(struct super_block *sb)
 static struct file_system_type erofs_fs_type = {
 	.owner          = THIS_MODULE,
 	.name           = "erofs",
-	.mount          = erofs_mount,
+	.init_fs_context = erofs_init_fs_context,
 	.kill_sb        = erofs_kill_sb,
 	.fs_flags       = FS_REQUIRES_DEV,
 };
@@ -578,61 +571,37 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
 static int erofs_show_options(struct seq_file *seq, struct dentry *root)
 {
 	struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
+	struct erofs_fs_context *ctx __maybe_unused = &sbi->ctx;
 
 #ifdef CONFIG_EROFS_FS_XATTR
-	if (test_opt(sbi, XATTR_USER))
+	if (test_opt(ctx, XATTR_USER))
 		seq_puts(seq, ",user_xattr");
 	else
 		seq_puts(seq, ",nouser_xattr");
 #endif
 #ifdef CONFIG_EROFS_FS_POSIX_ACL
-	if (test_opt(sbi, POSIX_ACL))
+	if (test_opt(ctx, POSIX_ACL))
 		seq_puts(seq, ",acl");
 	else
 		seq_puts(seq, ",noacl");
 #endif
 #ifdef CONFIG_EROFS_FS_ZIP
-	if (sbi->cache_strategy == EROFS_ZIP_CACHE_DISABLED) {
+	if (ctx->cache_strategy == EROFS_ZIP_CACHE_DISABLED)
 		seq_puts(seq, ",cache_strategy=disabled");
-	} else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) {
+	else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAHEAD)
 		seq_puts(seq, ",cache_strategy=readahead");
-	} else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAROUND) {
+	else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
 		seq_puts(seq, ",cache_strategy=readaround");
-	}
 #endif
 	return 0;
 }
 
-static int erofs_remount(struct super_block *sb, int *flags, char *data)
-{
-	struct erofs_sb_info *sbi = EROFS_SB(sb);
-	unsigned int org_mnt_opt = sbi->mount_opt;
-	int err;
-
-	DBG_BUGON(!sb_rdonly(sb));
-	err = erofs_parse_options(sb, data);
-	if (err)
-		goto out;
-
-	if (test_opt(sbi, POSIX_ACL))
-		sb->s_flags |= SB_POSIXACL;
-	else
-		sb->s_flags &= ~SB_POSIXACL;
-
-	*flags |= SB_RDONLY;
-	return 0;
-out:
-	sbi->mount_opt = org_mnt_opt;
-	return err;
-}
-
 const struct super_operations erofs_sops = {
 	.put_super = erofs_put_super,
 	.alloc_inode = erofs_alloc_inode,
 	.free_inode = erofs_free_inode,
 	.statfs = erofs_statfs,
 	.show_options = erofs_show_options,
-	.remount_fs = erofs_remount,
 };
 
 module_init(erofs_module_init);
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index b766c3ee5fa8..87e437e7b34f 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -422,7 +422,7 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
 
 static bool erofs_xattr_user_list(struct dentry *dentry)
 {
-	return test_opt(EROFS_SB(dentry->d_sb), XATTR_USER);
+	return test_opt(&EROFS_SB(dentry->d_sb)->ctx, XATTR_USER);
 }
 
 static bool erofs_xattr_trusted_list(struct dentry *dentry)
@@ -469,7 +469,7 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler,
 
 	switch (handler->flags) {
 	case EROFS_XATTR_INDEX_USER:
-		if (!test_opt(sbi, XATTR_USER))
+		if (!test_opt(&sbi->ctx, XATTR_USER))
 			return -EOPNOTSUPP;
 		break;
 	case EROFS_XATTR_INDEX_TRUSTED:
diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h
index 50966f1c676e..e4e5093f012c 100644
--- a/fs/erofs/xattr.h
+++ b/fs/erofs/xattr.h
@@ -76,11 +76,8 @@ static inline int erofs_getxattr(struct inode *inode, int index,
 	return -EOPNOTSUPP;
 }
 
-static inline ssize_t erofs_listxattr(struct dentry *dentry,
-				      char *buffer, size_t buffer_size)
-{
-	return -EOPNOTSUPP;
-}
+#define erofs_listxattr (NULL)
+#define erofs_xattr_handlers (NULL)
 #endif	/* !CONFIG_EROFS_FS_XATTR */
 
 #ifdef CONFIG_EROFS_FS_POSIX_ACL
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index c4b6c9aa87ec..be50a4d9d273 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -615,7 +615,7 @@ restart_now:
 		goto err_out;
 
 	/* preload all compressed pages (maybe downgrade role if necessary) */
-	if (should_alloc_managed_pages(fe, sbi->cache_strategy, map->m_la))
+	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
 		cache_strategy = DELAYEDALLOC;
 	else
 		cache_strategy = DONTALLOC;
@@ -1302,31 +1302,26 @@ static int z_erofs_readpage(struct file *file, struct page *page)
 static bool should_decompress_synchronously(struct erofs_sb_info *sbi,
 					    unsigned int nr)
 {
-	return nr <= sbi->max_sync_decompress_pages;
+	return nr <= sbi->ctx.max_sync_decompress_pages;
 }
 
-static int z_erofs_readpages(struct file *filp, struct address_space *mapping,
-			     struct list_head *pages, unsigned int nr_pages)
+static void z_erofs_readahead(struct readahead_control *rac)
 {
-	struct inode *const inode = mapping->host;
+	struct inode *const inode = rac->mapping->host;
 	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
 
-	bool sync = should_decompress_synchronously(sbi, nr_pages);
+	bool sync = should_decompress_synchronously(sbi, readahead_count(rac));
 	struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
-	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
-	struct page *head = NULL;
+	struct page *page, *head = NULL;
 	LIST_HEAD(pagepool);
 
-	trace_erofs_readpages(mapping->host, lru_to_page(pages),
-			      nr_pages, false);
+	trace_erofs_readpages(inode, readahead_index(rac),
+			readahead_count(rac), false);
 
-	f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT;
-
-	for (; nr_pages; --nr_pages) {
-		struct page *page = lru_to_page(pages);
+	f.headoffset = readahead_pos(rac);
 
+	while ((page = readahead_page(rac))) {
 		prefetchw(&page->flags);
-		list_del(&page->lru);
 
 		/*
 		 * A pure asynchronous readahead is indicated if
@@ -1335,11 +1330,6 @@ static int z_erofs_readpages(struct file *filp, struct address_space *mapping,
 		 */
 		sync &= !(PageReadahead(page) && !head);
 
-		if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
-			list_add(&page->lru, &pagepool);
-			continue;
-		}
-
 		set_page_private(page, (unsigned long)head);
 		head = page;
 	}
@@ -1368,11 +1358,10 @@ static int z_erofs_readpages(struct file *filp, struct address_space *mapping,
 
 	/* clean up the remaining free pages */
 	put_pages_list(&pagepool);
-	return 0;
 }
 
 const struct address_space_operations z_erofs_aops = {
 	.readpage = z_erofs_readpage,
-	.readpages = z_erofs_readpages,
+	.readahead = z_erofs_readahead,
 };
 
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 06887492f54b..785ead346543 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -372,10 +372,9 @@ static int exfat_readpage(struct file *file, struct page *page)
 	return mpage_readpage(page, exfat_get_block);
 }
 
-static int exfat_readpages(struct file *file, struct address_space *mapping,
-		struct list_head *pages, unsigned int nr_pages)
+static void exfat_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, exfat_get_block);
+	mpage_readahead(rac, exfat_get_block);
 }
 
 static int exfat_writepage(struct page *page, struct writeback_control *wbc)
@@ -502,7 +501,7 @@ int exfat_block_truncate_page(struct inode *inode, loff_t from)
 
 static const struct address_space_operations exfat_aops = {
 	.readpage	= exfat_readpage,
-	.readpages	= exfat_readpages,
+	.readahead	= exfat_readahead,
 	.writepage	= exfat_writepage,
 	.writepages	= exfat_writepages,
 	.write_begin	= exfat_write_begin,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c885cf7d724b..2875c0a705b5 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -877,11 +877,9 @@ static int ext2_readpage(struct file *file, struct page *page)
 	return mpage_readpage(page, ext2_get_block);
 }
 
-static int
-ext2_readpages(struct file *file, struct address_space *mapping,
-		struct list_head *pages, unsigned nr_pages)
+static void ext2_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
+	mpage_readahead(rac, ext2_get_block);
 }
 
 static int
@@ -967,7 +965,7 @@ ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc
 
 const struct address_space_operations ext2_aops = {
 	.readpage		= ext2_readpage,
-	.readpages		= ext2_readpages,
+	.readahead		= ext2_readahead,
 	.writepage		= ext2_writepage,
 	.write_begin		= ext2_write_begin,
 	.write_end		= ext2_write_end,
@@ -981,7 +979,7 @@ const struct address_space_operations ext2_aops = {
 
 const struct address_space_operations ext2_nobh_aops = {
 	.readpage		= ext2_readpage,
-	.readpages		= ext2_readpages,
+	.readahead		= ext2_readahead,
 	.writepage		= ext2_nobh_writepage,
 	.write_begin		= ext2_nobh_write_begin,
 	.write_end		= nobh_write_end,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ad2dbf6e4924..15b062efcff1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1357,11 +1357,9 @@ struct ext4_super_block {
  */
 #define EXT4_MF_MNTDIR_SAMPLED		0x0001
 #define EXT4_MF_FS_ABORTED		0x0002	/* Fatal error detected */
-#define EXT4_MF_TEST_DUMMY_ENCRYPTION	0x0004
 
 #ifdef CONFIG_FS_ENCRYPTION
-#define DUMMY_ENCRYPTION_ENABLED(sbi) (unlikely((sbi)->s_mount_flags & \
-						EXT4_MF_TEST_DUMMY_ENCRYPTION))
+#define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_ctx.ctx != NULL)
 #else
 #define DUMMY_ENCRYPTION_ENABLED(sbi) (0)
 #endif
@@ -1551,6 +1549,9 @@ struct ext4_sb_info {
 	struct ratelimit_state s_warning_ratelimit_state;
 	struct ratelimit_state s_msg_ratelimit_state;
 
+	/* Encryption context for '-o test_dummy_encryption' */
+	struct fscrypt_dummy_context s_dummy_enc_ctx;
+
 	/*
 	 * Barrier between writepages ops and changing any inode's JOURNAL_DATA
 	 * or EXTENTS flag.
@@ -3316,9 +3317,8 @@ static inline void ext4_set_de_type(struct super_block *sb,
 }
 
 /* readpages.c */
-extern int ext4_mpage_readpages(struct address_space *mapping,
-				struct list_head *pages, struct page *page,
-				unsigned nr_pages, bool is_readahead);
+extern int ext4_mpage_readpages(struct inode *inode,
+		struct readahead_control *rac, struct page *page);
 extern int __init ext4_init_post_read_processing(void);
 extern void ext4_exit_post_read_processing(void);
 
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index e10206e7f4bb..35ff9a56db67 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -176,7 +176,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 		ret = ext4_fsync_journal(inode, datasync, &needs_barrier);
 
 	if (needs_barrier) {
-		err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+		err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
 		if (!ret)
 			ret = err;
 	}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4b8c9a9bdf0c..499f08d8522e 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1440,7 +1440,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 	if (ret < 0)
 		goto err_out;
 	if (barrier)
-		blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
+		blkdev_issue_flush(sb->s_bdev, GFP_NOFS);
 
 skip_zeroout:
 	ext4_lock_group(sb, group);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a4aae6acdcb..52be85f96159 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3224,23 +3224,20 @@ static int ext4_readpage(struct file *file, struct page *page)
 		ret = ext4_readpage_inline(inode, page);
 
 	if (ret == -EAGAIN)
-		return ext4_mpage_readpages(page->mapping, NULL, page, 1,
-						false);
+		return ext4_mpage_readpages(inode, NULL, page);
 
 	return ret;
 }
 
-static int
-ext4_readpages(struct file *file, struct address_space *mapping,
-		struct list_head *pages, unsigned nr_pages)
+static void ext4_readahead(struct readahead_control *rac)
 {
-	struct inode *inode = mapping->host;
+	struct inode *inode = rac->mapping->host;
 
-	/* If the file has inline data, no need to do readpages. */
+	/* If the file has inline data, no need to do readahead. */
 	if (ext4_has_inline_data(inode))
-		return 0;
+		return;
 
-	return ext4_mpage_readpages(mapping, pages, NULL, nr_pages, true);
+	ext4_mpage_readpages(inode, rac, NULL);
 }
 
 static void ext4_invalidatepage(struct page *page, unsigned int offset,
@@ -3605,7 +3602,7 @@ static int ext4_set_page_dirty(struct page *page)
 
 static const struct address_space_operations ext4_aops = {
 	.readpage		= ext4_readpage,
-	.readpages		= ext4_readpages,
+	.readahead		= ext4_readahead,
 	.writepage		= ext4_writepage,
 	.writepages		= ext4_writepages,
 	.write_begin		= ext4_write_begin,
@@ -3622,7 +3619,7 @@ static const struct address_space_operations ext4_aops = {
 
 static const struct address_space_operations ext4_journalled_aops = {
 	.readpage		= ext4_readpage,
-	.readpages		= ext4_readpages,
+	.readahead		= ext4_readahead,
 	.writepage		= ext4_writepage,
 	.writepages		= ext4_writepages,
 	.write_begin		= ext4_write_begin,
@@ -3638,7 +3635,7 @@ static const struct address_space_operations ext4_journalled_aops = {
 
 static const struct address_space_operations ext4_da_aops = {
 	.readpage		= ext4_readpage,
-	.readpages		= ext4_readpages,
+	.readahead		= ext4_readahead,
 	.writepage		= ext4_writepage,
 	.writepages		= ext4_writepages,
 	.write_begin		= ext4_da_write_begin,
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index c1769afbf799..5761e9961682 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -7,8 +7,8 @@
  *
  * This was originally taken from fs/mpage.c
  *
- * The intent is the ext4_mpage_readpages() function here is intended
- * to replace mpage_readpages() in the general case, not just for
+ * The ext4_mpage_readpages() function here is intended to
+ * replace mpage_readahead() in the general case, not just for
  * encrypted files.  It has some limitations (see below), where it
  * will fall back to read_block_full_page(), but these limitations
  * should only be hit when page_size != block_size.
@@ -221,14 +221,12 @@ static inline loff_t ext4_readpage_limit(struct inode *inode)
 	return i_size_read(inode);
 }
 
-int ext4_mpage_readpages(struct address_space *mapping,
-			 struct list_head *pages, struct page *page,
-			 unsigned nr_pages, bool is_readahead)
+int ext4_mpage_readpages(struct inode *inode,
+		struct readahead_control *rac, struct page *page)
 {
 	struct bio *bio = NULL;
 	sector_t last_block_in_bio = 0;
 
-	struct inode *inode = mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
 	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
 	const unsigned blocksize = 1 << blkbits;
@@ -241,6 +239,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 	int length;
 	unsigned relative_block = 0;
 	struct ext4_map_blocks map;
+	unsigned int nr_pages = rac ? readahead_count(rac) : 1;
 
 	map.m_pblk = 0;
 	map.m_lblk = 0;
@@ -251,14 +250,9 @@ int ext4_mpage_readpages(struct address_space *mapping,
 		int fully_mapped = 1;
 		unsigned first_hole = blocks_per_page;
 
-		if (pages) {
-			page = lru_to_page(pages);
-
+		if (rac) {
+			page = readahead_page(rac);
 			prefetchw(&page->flags);
-			list_del(&page->lru);
-			if (add_to_page_cache_lru(page, mapping, page->index,
-				  readahead_gfp_mask(mapping)))
-				goto next_page;
 		}
 
 		if (page_has_buffers(page))
@@ -381,7 +375,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 			bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
 			bio->bi_end_io = mpage_end_io;
 			bio_set_op_attrs(bio, REQ_OP_READ,
-						is_readahead ? REQ_RAHEAD : 0);
+						rac ? REQ_RAHEAD : 0);
 		}
 
 		length = first_hole << blkbits;
@@ -406,10 +400,9 @@ int ext4_mpage_readpages(struct address_space *mapping,
 		else
 			unlock_page(page);
 	next_page:
-		if (pages)
+		if (rac)
 			put_page(page);
 	}
-	BUG_ON(pages && !list_empty(pages));
 	if (bio)
 		submit_bio(bio);
 	return 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bf5fcb477f66..9824cd8203e8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1106,6 +1106,7 @@ static void ext4_put_super(struct super_block *sb)
 		crypto_free_shash(sbi->s_chksum_driver);
 	kfree(sbi->s_blockgroup_lock);
 	fs_put_dax(sbi->s_daxdev);
+	fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
 #ifdef CONFIG_UNICODE
 	utf8_unload(sbi->s_encoding);
 #endif
@@ -1389,9 +1390,10 @@ retry:
 	return res;
 }
 
-static bool ext4_dummy_context(struct inode *inode)
+static const union fscrypt_context *
+ext4_get_dummy_context(struct super_block *sb)
 {
-	return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
+	return EXT4_SB(sb)->s_dummy_enc_ctx.ctx;
 }
 
 static bool ext4_has_stable_inodes(struct super_block *sb)
@@ -1410,7 +1412,7 @@ static const struct fscrypt_operations ext4_cryptops = {
 	.key_prefix		= "ext4:",
 	.get_context		= ext4_get_context,
 	.set_context		= ext4_set_context,
-	.dummy_context		= ext4_dummy_context,
+	.get_dummy_context	= ext4_get_dummy_context,
 	.empty_dir		= ext4_empty_dir,
 	.max_namelen		= EXT4_NAME_LEN,
 	.has_stable_inodes	= ext4_has_stable_inodes,
@@ -1605,6 +1607,7 @@ static const match_table_t tokens = {
 	{Opt_init_itable, "init_itable"},
 	{Opt_noinit_itable, "noinit_itable"},
 	{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
+	{Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
 	{Opt_nombcache, "nombcache"},
 	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
@@ -1816,7 +1819,7 @@ static const struct mount_opts {
 	{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
 	{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
 	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
-	{Opt_test_dummy_encryption, 0, MOPT_GTE0},
+	{Opt_test_dummy_encryption, 0, MOPT_STRING},
 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
 	{Opt_err, 0, 0}
 };
@@ -1851,6 +1854,48 @@ static int ext4_sb_read_encoding(const struct ext4_super_block *es,
 }
 #endif
 
+static int ext4_set_test_dummy_encryption(struct super_block *sb,
+					  const char *opt,
+					  const substring_t *arg,
+					  bool is_remount)
+{
+#ifdef CONFIG_FS_ENCRYPTION
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	int err;
+
+	/*
+	 * This mount option is just for testing, and it's not worthwhile to
+	 * implement the extra complexity (e.g. RCU protection) that would be
+	 * needed to allow it to be set or changed during remount.  We do allow
+	 * it to be specified during remount, but only if there is no change.
+	 */
+	if (is_remount && !sbi->s_dummy_enc_ctx.ctx) {
+		ext4_msg(sb, KERN_WARNING,
+			 "Can't set test_dummy_encryption on remount");
+		return -1;
+	}
+	err = fscrypt_set_test_dummy_encryption(sb, arg, &sbi->s_dummy_enc_ctx);
+	if (err) {
+		if (err == -EEXIST)
+			ext4_msg(sb, KERN_WARNING,
+				 "Can't change test_dummy_encryption on remount");
+		else if (err == -EINVAL)
+			ext4_msg(sb, KERN_WARNING,
+				 "Value of option \"%s\" is unrecognized", opt);
+		else
+			ext4_msg(sb, KERN_WARNING,
+				 "Error processing option \"%s\" [%d]",
+				 opt, err);
+		return -1;
+	}
+	ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
+#else
+	ext4_msg(sb, KERN_WARNING,
+		 "Test dummy encryption mount option ignored");
+#endif
+	return 1;
+}
+
 static int handle_mount_opt(struct super_block *sb, char *opt, int token,
 			    substring_t *args, unsigned long *journal_devnum,
 			    unsigned int *journal_ioprio, int is_remount)
@@ -2047,14 +2092,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
 		*journal_ioprio =
 			IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
 	} else if (token == Opt_test_dummy_encryption) {
-#ifdef CONFIG_FS_ENCRYPTION
-		sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
-		ext4_msg(sb, KERN_WARNING,
-			 "Test dummy encryption mode enabled");
-#else
-		ext4_msg(sb, KERN_WARNING,
-			 "Test dummy encryption mount option ignored");
-#endif
+		return ext4_set_test_dummy_encryption(sb, opt, &args[0],
+						      is_remount);
 	} else if (m->flags & MOPT_DATAJ) {
 		if (is_remount) {
 			if (!sbi->s_journal)
@@ -2311,8 +2350,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 		SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
 	if (test_opt(sb, DATA_ERR_ABORT))
 		SEQ_OPTS_PUTS("data_err=abort");
-	if (DUMMY_ENCRYPTION_ENABLED(sbi))
-		SEQ_OPTS_PUTS("test_dummy_encryption");
+
+	fscrypt_show_test_dummy_encryption(seq, sep, sb);
 
 	ext4_show_quota_options(seq, sb);
 	return 0;
@@ -4780,6 +4819,7 @@ failed_mount:
 	for (i = 0; i < EXT4_MAXQUOTAS; i++)
 		kfree(get_qf_name(sb, sbi, i));
 #endif
+	fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
 	ext4_blkdev_remove(sbi);
 	brelse(bh);
 out_fail:
@@ -5256,7 +5296,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 		needs_barrier = true;
 	if (needs_barrier) {
 		int err;
-		err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
+		err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
 		if (!ret)
 			ret = err;
 	}
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 04bfaf63752c..6c9fc9e21c13 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -293,6 +293,7 @@ EXT4_ATTR_FEATURE(batched_discard);
 EXT4_ATTR_FEATURE(meta_bg_resize);
 #ifdef CONFIG_FS_ENCRYPTION
 EXT4_ATTR_FEATURE(encryption);
+EXT4_ATTR_FEATURE(test_dummy_encryption_v2);
 #endif
 #ifdef CONFIG_UNICODE
 EXT4_ATTR_FEATURE(casefold);
@@ -308,6 +309,7 @@ static struct attribute *ext4_feat_attrs[] = {
 	ATTR_LIST(meta_bg_resize),
 #ifdef CONFIG_FS_ENCRYPTION
 	ATTR_LIST(encryption),
+	ATTR_LIST(test_dummy_encryption_v2),
 #endif
 #ifdef CONFIG_UNICODE
 	ATTR_LIST(casefold),
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index dc5ec724d889..dec1244dd062 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -342,37 +342,6 @@ static int ext4_get_verity_descriptor(struct inode *inode, void *buf,
 	return desc_size;
 }
 
-/*
- * Prefetch some pages from the file's Merkle tree.
- *
- * This is basically a stripped-down version of __do_page_cache_readahead()
- * which works on pages past i_size.
- */
-static void ext4_merkle_tree_readahead(struct address_space *mapping,
-				       pgoff_t start_index, unsigned long count)
-{
-	LIST_HEAD(pages);
-	unsigned int nr_pages = 0;
-	struct page *page;
-	pgoff_t index;
-	struct blk_plug plug;
-
-	for (index = start_index; index < start_index + count; index++) {
-		page = xa_load(&mapping->i_pages, index);
-		if (!page || xa_is_value(page)) {
-			page = __page_cache_alloc(readahead_gfp_mask(mapping));
-			if (!page)
-				break;
-			page->index = index;
-			list_add(&page->lru, &pages);
-			nr_pages++;
-		}
-	}
-	blk_start_plug(&plug);
-	ext4_mpage_readpages(mapping, &pages, NULL, nr_pages, true);
-	blk_finish_plug(&plug);
-}
-
 static struct page *ext4_read_merkle_tree_page(struct inode *inode,
 					       pgoff_t index,
 					       unsigned long num_ra_pages)
@@ -386,8 +355,8 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
 		if (page)
 			put_page(page);
 		else if (num_ra_pages > 1)
-			ext4_merkle_tree_readahead(inode->i_mapping, index,
-						   num_ra_pages);
+			page_cache_readahead_unbounded(inode->i_mapping, NULL,
+					index, num_ra_pages, 0);
 		page = read_mapping_page(inode->i_mapping, index, NULL);
 	}
 	return page;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cdf2f626bea7..03ec97f28235 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2177,13 +2177,11 @@ out:
  * use ->readpage() or do the necessary surgery to decouple ->readpages()
  * from read-ahead.
  */
-int f2fs_mpage_readpages(struct address_space *mapping,
-			struct list_head *pages, struct page *page,
-			unsigned nr_pages, bool is_readahead)
+static int f2fs_mpage_readpages(struct inode *inode,
+		struct readahead_control *rac, struct page *page)
 {
 	struct bio *bio = NULL;
 	sector_t last_block_in_bio = 0;
-	struct inode *inode = mapping->host;
 	struct f2fs_map_blocks map;
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 	struct compress_ctx cc = {
@@ -2197,6 +2195,7 @@ int f2fs_mpage_readpages(struct address_space *mapping,
 		.nr_cpages = 0,
 	};
 #endif
+	unsigned nr_pages = rac ? readahead_count(rac) : 1;
 	unsigned max_nr_pages = nr_pages;
 	int ret = 0;
 
@@ -2210,15 +2209,9 @@ int f2fs_mpage_readpages(struct address_space *mapping,
 	map.m_may_create = false;
 
 	for (; nr_pages; nr_pages--) {
-		if (pages) {
-			page = list_last_entry(pages, struct page, lru);
-
+		if (rac) {
+			page = readahead_page(rac);
 			prefetchw(&page->flags);
-			list_del(&page->lru);
-			if (add_to_page_cache_lru(page, mapping,
-						  page_index(page),
-						  readahead_gfp_mask(mapping)))
-				goto next_page;
 		}
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -2228,7 +2221,7 @@ int f2fs_mpage_readpages(struct address_space *mapping,
 				ret = f2fs_read_multi_pages(&cc, &bio,
 							max_nr_pages,
 							&last_block_in_bio,
-							is_readahead, false);
+							rac != NULL, false);
 				f2fs_destroy_compress_ctx(&cc);
 				if (ret)
 					goto set_error_page;
@@ -2251,7 +2244,7 @@ read_single_page:
 #endif
 
 		ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
-					&bio, &last_block_in_bio, is_readahead);
+					&bio, &last_block_in_bio, rac);
 		if (ret) {
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 set_error_page:
@@ -2260,8 +2253,10 @@ set_error_page:
 			zero_user_segment(page, 0, PAGE_SIZE);
 			unlock_page(page);
 		}
+#ifdef CONFIG_F2FS_FS_COMPRESSION
 next_page:
-		if (pages)
+#endif
+		if (rac)
 			put_page(page);
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -2271,16 +2266,15 @@ next_page:
 				ret = f2fs_read_multi_pages(&cc, &bio,
 							max_nr_pages,
 							&last_block_in_bio,
-							is_readahead, false);
+							rac != NULL, false);
 				f2fs_destroy_compress_ctx(&cc);
 			}
 		}
 #endif
 	}
-	BUG_ON(pages && !list_empty(pages));
 	if (bio)
 		__submit_bio(F2FS_I_SB(inode), bio, DATA);
-	return pages ? 0 : ret;
+	return ret;
 }
 
 static int f2fs_read_data_page(struct file *file, struct page *page)
@@ -2299,28 +2293,24 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
 	if (f2fs_has_inline_data(inode))
 		ret = f2fs_read_inline_data(inode, page);
 	if (ret == -EAGAIN)
-		ret = f2fs_mpage_readpages(page_file_mapping(page),
-						NULL, page, 1, false);
+		ret = f2fs_mpage_readpages(inode, NULL, page);
 	return ret;
 }
 
-static int f2fs_read_data_pages(struct file *file,
-			struct address_space *mapping,
-			struct list_head *pages, unsigned nr_pages)
+static void f2fs_readahead(struct readahead_control *rac)
 {
-	struct inode *inode = mapping->host;
-	struct page *page = list_last_entry(pages, struct page, lru);
+	struct inode *inode = rac->mapping->host;
 
-	trace_f2fs_readpages(inode, page, nr_pages);
+	trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
 
 	if (!f2fs_is_compress_backend_ready(inode))
-		return 0;
+		return;
 
 	/* If the file has inline data, skip readpages */
 	if (f2fs_has_inline_data(inode))
-		return 0;
+		return;
 
-	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
+	f2fs_mpage_readpages(inode, rac, NULL);
 }
 
 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
@@ -3805,7 +3795,7 @@ static void f2fs_swap_deactivate(struct file *file)
 
 const struct address_space_operations f2fs_dblock_aops = {
 	.readpage	= f2fs_read_data_page,
-	.readpages	= f2fs_read_data_pages,
+	.readahead	= f2fs_readahead,
 	.writepage	= f2fs_write_data_page,
 	.writepages	= f2fs_write_data_pages,
 	.write_begin	= f2fs_write_begin,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ba470d5687fe..5c0149d2f46a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -138,7 +138,7 @@ struct f2fs_mount_info {
 	int fsync_mode;			/* fsync policy */
 	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
 	int bggc_mode;			/* bggc mode: off, on or sync */
-	bool test_dummy_encryption;	/* test dummy encryption */
+	struct fscrypt_dummy_context dummy_enc_ctx; /* test dummy encryption */
 	block_t unusable_cap;		/* Amount of space allowed to be
 					 * unusable when disabling checkpoint
 					 */
@@ -1259,7 +1259,7 @@ enum fsync_mode {
 
 #ifdef CONFIG_FS_ENCRYPTION
 #define DUMMY_ENCRYPTION_ENABLED(sbi) \
-			(unlikely(F2FS_OPTION(sbi).test_dummy_encryption))
+	(unlikely(F2FS_OPTION(sbi).dummy_enc_ctx.ctx != NULL))
 #else
 #define DUMMY_ENCRYPTION_ENABLED(sbi) (0)
 #endif
@@ -3051,19 +3051,12 @@ static inline void f2fs_set_page_private(struct page *page,
 	if (PagePrivate(page))
 		return;
 
-	get_page(page);
-	SetPagePrivate(page);
-	set_page_private(page, data);
+	attach_page_private(page, (void *)data);
 }
 
 static inline void f2fs_clear_page_private(struct page *page)
 {
-	if (!PagePrivate(page))
-		return;
-
-	set_page_private(page, 0);
-	ClearPagePrivate(page);
-	f2fs_put_page(page, 0);
+	detach_page_private(page);
 }
 
 /*
@@ -3373,9 +3366,6 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn);
 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
 int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
-int f2fs_mpage_readpages(struct address_space *mapping,
-			struct list_head *pages, struct page *page,
-			unsigned nr_pages, bool is_readahead);
 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
 			int op_flags, bool for_write);
 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index);
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 5bc4dcd8fc03..8c4ea5003ef8 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -12,7 +12,6 @@
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
-#include <linux/cryptohash.h>
 #include <linux/pagemap.h>
 #include <linux/unicode.h>
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f2dfc21c6abb..8a9955902d84 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -202,6 +202,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_whint, "whint_mode=%s"},
 	{Opt_alloc, "alloc_mode=%s"},
 	{Opt_fsync, "fsync_mode=%s"},
+	{Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
 	{Opt_checkpoint_disable, "checkpoint=disable"},
 	{Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
@@ -394,7 +395,52 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
 }
 #endif
 
-static int parse_options(struct super_block *sb, char *options)
+static int f2fs_set_test_dummy_encryption(struct super_block *sb,
+					  const char *opt,
+					  const substring_t *arg,
+					  bool is_remount)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+#ifdef CONFIG_FS_ENCRYPTION
+	int err;
+
+	if (!f2fs_sb_has_encrypt(sbi)) {
+		f2fs_err(sbi, "Encrypt feature is off");
+		return -EINVAL;
+	}
+
+	/*
+	 * This mount option is just for testing, and it's not worthwhile to
+	 * implement the extra complexity (e.g. RCU protection) that would be
+	 * needed to allow it to be set or changed during remount.  We do allow
+	 * it to be specified during remount, but only if there is no change.
+	 */
+	if (is_remount && !F2FS_OPTION(sbi).dummy_enc_ctx.ctx) {
+		f2fs_warn(sbi, "Can't set test_dummy_encryption on remount");
+		return -EINVAL;
+	}
+	err = fscrypt_set_test_dummy_encryption(
+		sb, arg, &F2FS_OPTION(sbi).dummy_enc_ctx);
+	if (err) {
+		if (err == -EEXIST)
+			f2fs_warn(sbi,
+				  "Can't change test_dummy_encryption on remount");
+		else if (err == -EINVAL)
+			f2fs_warn(sbi, "Value of option \"%s\" is unrecognized",
+				  opt);
+		else
+			f2fs_warn(sbi, "Error processing option \"%s\" [%d]",
+				  opt, err);
+		return -EINVAL;
+	}
+	f2fs_warn(sbi, "Test dummy encryption mode enabled");
+#else
+	f2fs_warn(sbi, "Test dummy encryption mount option ignored");
+#endif
+	return 0;
+}
+
+static int parse_options(struct super_block *sb, char *options, bool is_remount)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	substring_t args[MAX_OPT_ARGS];
@@ -403,9 +449,7 @@ static int parse_options(struct super_block *sb, char *options)
 	int arg = 0, ext_cnt;
 	kuid_t uid;
 	kgid_t gid;
-#ifdef CONFIG_QUOTA
 	int ret;
-#endif
 
 	if (!options)
 		return 0;
@@ -778,17 +822,10 @@ static int parse_options(struct super_block *sb, char *options)
 			kvfree(name);
 			break;
 		case Opt_test_dummy_encryption:
-#ifdef CONFIG_FS_ENCRYPTION
-			if (!f2fs_sb_has_encrypt(sbi)) {
-				f2fs_err(sbi, "Encrypt feature is off");
-				return -EINVAL;
-			}
-
-			F2FS_OPTION(sbi).test_dummy_encryption = true;
-			f2fs_info(sbi, "Test dummy encryption mode enabled");
-#else
-			f2fs_info(sbi, "Test dummy encryption mount option ignored");
-#endif
+			ret = f2fs_set_test_dummy_encryption(sb, p, &args[0],
+							     is_remount);
+			if (ret)
+				return ret;
 			break;
 		case Opt_checkpoint_disable_cap_perc:
 			if (args->from && match_int(args, &arg))
@@ -1213,6 +1250,7 @@ static void f2fs_put_super(struct super_block *sb)
 	for (i = 0; i < MAXQUOTAS; i++)
 		kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
 #endif
+	fscrypt_free_dummy_context(&F2FS_OPTION(sbi).dummy_enc_ctx);
 	destroy_percpu_info(sbi);
 	for (i = 0; i < NR_PAGE_TYPE; i++)
 		kvfree(sbi->write_io[i]);
@@ -1543,10 +1581,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",whint_mode=%s", "user-based");
 	else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS)
 		seq_printf(seq, ",whint_mode=%s", "fs-based");
-#ifdef CONFIG_FS_ENCRYPTION
-	if (F2FS_OPTION(sbi).test_dummy_encryption)
-		seq_puts(seq, ",test_dummy_encryption");
-#endif
+
+	fscrypt_show_test_dummy_encryption(seq, ',', sbi->sb);
 
 	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT)
 		seq_printf(seq, ",alloc_mode=%s", "default");
@@ -1575,7 +1611,6 @@ static void default_options(struct f2fs_sb_info *sbi)
 	F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
 	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
 	F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
-	F2FS_OPTION(sbi).test_dummy_encryption = false;
 	F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
 	F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
 	F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4;
@@ -1734,7 +1769,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	default_options(sbi);
 
 	/* parse mount options */
-	err = parse_options(sb, data);
+	err = parse_options(sb, data, true);
 	if (err)
 		goto restore_opts;
 	checkpoint_changed =
@@ -2410,9 +2445,10 @@ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
 				ctx, len, fs_data, XATTR_CREATE);
 }
 
-static bool f2fs_dummy_context(struct inode *inode)
+static const union fscrypt_context *
+f2fs_get_dummy_context(struct super_block *sb)
 {
-	return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode));
+	return F2FS_OPTION(F2FS_SB(sb)).dummy_enc_ctx.ctx;
 }
 
 static bool f2fs_has_stable_inodes(struct super_block *sb)
@@ -2431,7 +2467,7 @@ static const struct fscrypt_operations f2fs_cryptops = {
 	.key_prefix		= "f2fs:",
 	.get_context		= f2fs_get_context,
 	.set_context		= f2fs_set_context,
-	.dummy_context		= f2fs_dummy_context,
+	.get_dummy_context	= f2fs_get_dummy_context,
 	.empty_dir		= f2fs_empty_dir,
 	.max_namelen		= F2FS_NAME_LEN,
 	.has_stable_inodes	= f2fs_has_stable_inodes,
@@ -3366,7 +3402,7 @@ try_onemore:
 		goto free_sb_buf;
 	}
 
-	err = parse_options(sb, options);
+	err = parse_options(sb, options, false);
 	if (err)
 		goto free_options;
 
@@ -3769,6 +3805,7 @@ free_options:
 	for (i = 0; i < MAXQUOTAS; i++)
 		kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
 #endif
+	fscrypt_free_dummy_context(&F2FS_OPTION(sbi).dummy_enc_ctx);
 	kvfree(options);
 free_sb_buf:
 	kvfree(raw_super);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index e3bbbef9b4f0..3162f46b3c9b 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -446,6 +446,7 @@ enum feat_id {
 	FEAT_SB_CHECKSUM,
 	FEAT_CASEFOLD,
 	FEAT_COMPRESSION,
+	FEAT_TEST_DUMMY_ENCRYPTION_V2,
 };
 
 static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -466,6 +467,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
 	case FEAT_SB_CHECKSUM:
 	case FEAT_CASEFOLD:
 	case FEAT_COMPRESSION:
+	case FEAT_TEST_DUMMY_ENCRYPTION_V2:
 		return sprintf(buf, "supported\n");
 	}
 	return 0;
@@ -563,6 +565,7 @@ F2FS_GENERAL_RO_ATTR(avg_vblocks);
 
 #ifdef CONFIG_FS_ENCRYPTION
 F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
+F2FS_FEATURE_RO_ATTR(test_dummy_encryption_v2, FEAT_TEST_DUMMY_ENCRYPTION_V2);
 #endif
 #ifdef CONFIG_BLK_DEV_ZONED
 F2FS_FEATURE_RO_ATTR(block_zoned, FEAT_BLKZONED);
@@ -647,6 +650,7 @@ ATTRIBUTE_GROUPS(f2fs);
 static struct attribute *f2fs_feat_attrs[] = {
 #ifdef CONFIG_FS_ENCRYPTION
 	ATTR_LIST(encryption),
+	ATTR_LIST(test_dummy_encryption_v2),
 #endif
 #ifdef CONFIG_BLK_DEV_ZONED
 	ATTR_LIST(block_zoned),
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index d7d430a6f130..865c9fb774fb 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -222,37 +222,6 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
 	return size;
 }
 
-/*
- * Prefetch some pages from the file's Merkle tree.
- *
- * This is basically a stripped-down version of __do_page_cache_readahead()
- * which works on pages past i_size.
- */
-static void f2fs_merkle_tree_readahead(struct address_space *mapping,
-				       pgoff_t start_index, unsigned long count)
-{
-	LIST_HEAD(pages);
-	unsigned int nr_pages = 0;
-	struct page *page;
-	pgoff_t index;
-	struct blk_plug plug;
-
-	for (index = start_index; index < start_index + count; index++) {
-		page = xa_load(&mapping->i_pages, index);
-		if (!page || xa_is_value(page)) {
-			page = __page_cache_alloc(readahead_gfp_mask(mapping));
-			if (!page)
-				break;
-			page->index = index;
-			list_add(&page->lru, &pages);
-			nr_pages++;
-		}
-	}
-	blk_start_plug(&plug);
-	f2fs_mpage_readpages(mapping, &pages, NULL, nr_pages, true);
-	blk_finish_plug(&plug);
-}
-
 static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
 					       pgoff_t index,
 					       unsigned long num_ra_pages)
@@ -266,8 +235,8 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
 		if (page)
 			put_page(page);
 		else if (num_ra_pages > 1)
-			f2fs_merkle_tree_readahead(inode->i_mapping, index,
-						   num_ra_pages);
+			page_cache_readahead_unbounded(inode->i_mapping, NULL,
+					index, num_ra_pages, 0);
 		page = read_mapping_page(inode->i_mapping, index, NULL);
 	}
 	return page;
diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index 718163d0c621..ca31993dcb47 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig
@@ -69,7 +69,7 @@ config VFAT_FS
 
 	  The VFAT support enlarges your kernel by about 10 KB and it only
 	  works if you said Y to the "DOS FAT fs support" above.  Please read
-	  the file <file:Documentation/filesystems/vfat.txt> for details.  If
+	  the file <file:Documentation/filesystems/vfat.rst> for details.  If
 	  unsure, say Y.
 
 	  To compile this as a module, choose M here: the module will be called
@@ -82,7 +82,7 @@ config FAT_DEFAULT_CODEPAGE
 	help
 	  This option should be set to the codepage of your FAT filesystems.
 	  It can be overridden with the "codepage" mount option.
-	  See <file:Documentation/filesystems/vfat.txt> for more information.
+	  See <file:Documentation/filesystems/vfat.rst> for more information.
 
 config FAT_DEFAULT_IOCHARSET
 	string "Default iocharset for FAT"
@@ -96,7 +96,7 @@ config FAT_DEFAULT_IOCHARSET
 	  Note that "utf8" is not recommended for FAT filesystems.
 	  If unsure, you shouldn't set "utf8" here - select the next option
 	  instead if you would like to use UTF-8 encoded file names by default.
-	  See <file:Documentation/filesystems/vfat.txt> for more information.
+	  See <file:Documentation/filesystems/vfat.rst> for more information.
 
 	  Enable any character sets you need in File Systems/Native Language
 	  Support.
@@ -114,4 +114,4 @@ config FAT_DEFAULT_UTF8
 
 	  Say Y if you use UTF-8 encoding for file names, N otherwise.
 
-	  See <file:Documentation/filesystems/vfat.txt> for more information.
+	  See <file:Documentation/filesystems/vfat.rst> for more information.
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 054acd9fd033..b4ddf48fa444 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -804,8 +804,6 @@ static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
 		return fat_generic_ioctl(filp, cmd, arg);
 	}
 
-	if (!access_ok(d1, sizeof(struct __fat_dirent[2])))
-		return -EFAULT;
 	/*
 	 * Yes, we don't need this put_user() absolutely. However old
 	 * code didn't return the right value. So, app use this value,
@@ -844,8 +842,6 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
 		return fat_generic_ioctl(filp, cmd, (unsigned long)arg);
 	}
 
-	if (!access_ok(d1, sizeof(struct compat_dirent[2])))
-		return -EFAULT;
 	/*
 	 * Yes, we don't need this put_user() absolutely. However old
 	 * code didn't return the right value. So, app use this value,
diff --git a/fs/fat/file.c b/fs/fat/file.c
index bdc4503c00a3..42134c58c87e 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -195,7 +195,7 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	if (err)
 		return err;
 
-	return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+	return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
 }
 
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 71946da84388..e6e68b2274a5 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -210,10 +210,9 @@ static int fat_readpage(struct file *file, struct page *page)
 	return mpage_readpage(page, fat_get_block);
 }
 
-static int fat_readpages(struct file *file, struct address_space *mapping,
-			 struct list_head *pages, unsigned nr_pages)
+static void fat_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
+	mpage_readahead(rac, fat_get_block);
 }
 
 static void fat_write_failed(struct address_space *mapping, loff_t to)
@@ -344,7 +343,7 @@ int fat_block_truncate_page(struct inode *inode, loff_t from)
 
 static const struct address_space_operations fat_aops = {
 	.readpage	= fat_readpage,
-	.readpages	= fat_readpages,
+	.readahead	= fat_readahead,
 	.writepage	= fat_writepage,
 	.writepages	= fat_writepages,
 	.write_begin	= fat_write_begin,
diff --git a/fs/file_table.c b/fs/file_table.c
index 3b612535391f..656647f9575a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -198,6 +198,7 @@ static struct file *alloc_file(const struct path *path, int flags,
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
 	file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
+	file->f_sb_err = file_sample_sb_err(file);
 	if ((file->f_mode & FMODE_READ) &&
 	     likely(fop->read || fop->read_iter))
 		file->f_mode |= FMODE_CAN_READ;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 76ac9c7d32ec..a750381d554a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1070,7 +1070,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
 static unsigned long get_nr_dirty_pages(void)
 {
 	return global_node_page_state(NR_FILE_DIRTY) +
-		global_node_page_state(NR_UNSTABLE_NFS) +
 		get_nr_dirty_inodes();
 }
 
@@ -2320,7 +2319,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 
 			WARN(bdi_cap_writeback_dirty(wb->bdi) &&
 			     !test_bit(WB_registered, &wb->state),
-			     "bdi-%s not registered\n", wb->bdi->name);
+			     "bdi-%s not registered\n", bdi_dev_name(wb->bdi));
 
 			inode->dirtied_when = jiffies;
 			if (dirtytime)
diff --git a/fs/fs_context.c b/fs/fs_context.c
index fc9f6ef93b55..7d5c5dd2b1d5 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -42,7 +42,6 @@ static const struct constant_table common_set_sb_flag[] = {
 	{ "dirsync",	SB_DIRSYNC },
 	{ "lazytime",	SB_LAZYTIME },
 	{ "mand",	SB_MANDLOCK },
-	{ "posixacl",	SB_POSIXACL },
 	{ "ro",		SB_RDONLY },
 	{ "sync",	SB_SYNCHRONOUS },
 	{ },
@@ -53,44 +52,15 @@ static const struct constant_table common_clear_sb_flag[] = {
 	{ "nolazytime",	SB_LAZYTIME },
 	{ "nomand",	SB_MANDLOCK },
 	{ "rw",		SB_RDONLY },
-	{ "silent",	SB_SILENT },
 	{ },
 };
 
-static const char *const forbidden_sb_flag[] = {
-	"bind",
-	"dev",
-	"exec",
-	"move",
-	"noatime",
-	"nodev",
-	"nodiratime",
-	"noexec",
-	"norelatime",
-	"nostrictatime",
-	"nosuid",
-	"private",
-	"rec",
-	"relatime",
-	"remount",
-	"shared",
-	"slave",
-	"strictatime",
-	"suid",
-	"unbindable",
-};
-
 /*
  * Check for a common mount option that manipulates s_flags.
  */
 static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
 {
 	unsigned int token;
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++)
-		if (strcmp(key, forbidden_sb_flag[i]) == 0)
-			return -EINVAL;
 
 	token = lookup_constant(common_set_sb_flag, key, 0);
 	if (token) {
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
index 506c5e643f0d..5e796e6c38e5 100644
--- a/fs/fscache/Kconfig
+++ b/fs/fscache/Kconfig
@@ -8,7 +8,7 @@ config FSCACHE
 	  Different sorts of caches can be plugged in, depending on the
 	  resources available.
 
-	  See Documentation/filesystems/caching/fscache.txt for more information.
+	  See Documentation/filesystems/caching/fscache.rst for more information.
 
 config FSCACHE_STATS
 	bool "Gather statistical information on local caching"
@@ -25,7 +25,7 @@ config FSCACHE_STATS
 	  between CPUs.  On the other hand, the stats are very useful for
 	  debugging purposes.  Saying 'Y' here is recommended.
 
-	  See Documentation/filesystems/caching/fscache.txt for more information.
+	  See Documentation/filesystems/caching/fscache.rst for more information.
 
 config FSCACHE_HISTOGRAM
 	bool "Gather latency information on local caching"
@@ -42,7 +42,7 @@ config FSCACHE_HISTOGRAM
 	  bouncing between CPUs.  On the other hand, the histogram may be
 	  useful for debugging purposes.  Saying 'N' here is recommended.
 
-	  See Documentation/filesystems/caching/fscache.txt for more information.
+	  See Documentation/filesystems/caching/fscache.rst for more information.
 
 config FSCACHE_DEBUG
 	bool "Debug FS-Cache"
@@ -52,7 +52,7 @@ config FSCACHE_DEBUG
 	  management module.  If this is set, the debugging output may be
 	  enabled by setting bits in /sys/modules/fscache/parameter/debug.
 
-	  See Documentation/filesystems/caching/fscache.txt for more information.
+	  See Documentation/filesystems/caching/fscache.rst for more information.
 
 config FSCACHE_OBJECT_LIST
 	bool "Maintain global object list for debugging purposes"
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index f78793f3d21e..fcc136361415 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -172,7 +172,7 @@ no_preference:
  *
  * Initialise a record of a cache and fill in the name.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 void fscache_init_cache(struct fscache_cache *cache,
@@ -207,7 +207,7 @@ EXPORT_SYMBOL(fscache_init_cache);
  *
  * Add a cache to the system, making it available for netfs's to use.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 int fscache_add_cache(struct fscache_cache *cache,
@@ -307,7 +307,7 @@ EXPORT_SYMBOL(fscache_add_cache);
  * Note that an I/O error occurred in a cache and that it should no longer be
  * used for anything.  This also reports the error into the kernel log.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 void fscache_io_error(struct fscache_cache *cache)
@@ -355,7 +355,7 @@ static void fscache_withdraw_all_objects(struct fscache_cache *cache,
  * Withdraw a cache from service, unbinding all its cache objects from the
  * netfs cookies they're currently representing.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 void fscache_withdraw_cache(struct fscache_cache *cache)
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 0ce39658a620..751bc5b1cddf 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
- * See Documentation/filesystems/caching/netfs-api.txt for more information on
+ * See Documentation/filesystems/caching/netfs-api.rst for more information on
  * the netfs API.
  */
 
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index cfeba839a0f2..cb2146e02cd5 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
- * See Documentation/filesystems/caching/object.txt for a description of the
+ * See Documentation/filesystems/caching/object.rst for a description of the
  * object state machine and the in-kernel representations.
  */
 
@@ -295,7 +295,7 @@ static void fscache_object_work_func(struct work_struct *work)
  *
  * Initialise a cache object description to its basic values.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 void fscache_object_init(struct fscache_object *object,
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 1a22a55f75a0..4a5651d4904e 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
- * See Documentation/filesystems/caching/operations.txt
+ * See Documentation/filesystems/caching/operations.rst
  */
 
 #define FSCACHE_DEBUG_LEVEL OPERATION
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index eb2a585572dc..774b2618018a 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -12,7 +12,7 @@ config FUSE_FS
 	  although chances are your distribution already has that library
 	  installed if you've installed the "fuse" package itself.
 
-	  See <file:Documentation/filesystems/fuse.txt> for more information.
+	  See <file:Documentation/filesystems/fuse.rst> for more information.
 	  See <file:Documentation/Changes> for needed library/utility version.
 
 	  If you want to develop a userspace FS, or if you want to use
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 97eec7522bf2..4907e3d1f56c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -805,7 +805,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 	if (cs->len != PAGE_SIZE)
 		goto out_fallback;
 
-	if (pipe_buf_steal(cs->pipe, buf) != 0)
+	if (!pipe_buf_try_steal(cs->pipe, buf))
 		goto out_fallback;
 
 	newpage = buf->page;
@@ -2081,7 +2081,7 @@ static void end_polls(struct fuse_conn *fc)
  * The same effect is usually achievable through killing the filesystem daemon
  * and all users of the filesystem.  The exception is the combination of an
  * asynchronous request and the tricky deadlock (see
- * Documentation/filesystems/fuse.txt).
+ * Documentation/filesystems/fuse.rst).
  *
  * Aborting requests under I/O goes as follows: 1: Separate out unlocked
  * requests, they should be finished off immediately.  Locked requests will be
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9d67b830fb7a..bac51c32d660 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -915,84 +915,40 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
 	fuse_readpages_end(fc, &ap->args, err);
 }
 
-struct fuse_fill_data {
-	struct fuse_io_args *ia;
-	struct file *file;
-	struct inode *inode;
-	unsigned int nr_pages;
-	unsigned int max_pages;
-};
-
-static int fuse_readpages_fill(void *_data, struct page *page)
+static void fuse_readahead(struct readahead_control *rac)
 {
-	struct fuse_fill_data *data = _data;
-	struct fuse_io_args *ia = data->ia;
-	struct fuse_args_pages *ap = &ia->ap;
-	struct inode *inode = data->inode;
+	struct inode *inode = rac->mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
+	unsigned int i, max_pages, nr_pages = 0;
 
-	fuse_wait_on_page_writeback(inode, page->index);
-
-	if (ap->num_pages &&
-	    (ap->num_pages == fc->max_pages ||
-	     (ap->num_pages + 1) * PAGE_SIZE > fc->max_read ||
-	     ap->pages[ap->num_pages - 1]->index + 1 != page->index)) {
-		data->max_pages = min_t(unsigned int, data->nr_pages,
-					fc->max_pages);
-		fuse_send_readpages(ia, data->file);
-		data->ia = ia = fuse_io_alloc(NULL, data->max_pages);
-		if (!ia) {
-			unlock_page(page);
-			return -ENOMEM;
-		}
-		ap = &ia->ap;
-	}
-
-	if (WARN_ON(ap->num_pages >= data->max_pages)) {
-		unlock_page(page);
-		fuse_io_free(ia);
-		return -EIO;
-	}
-
-	get_page(page);
-	ap->pages[ap->num_pages] = page;
-	ap->descs[ap->num_pages].length = PAGE_SIZE;
-	ap->num_pages++;
-	data->nr_pages--;
-	return 0;
-}
-
-static int fuse_readpages(struct file *file, struct address_space *mapping,
-			  struct list_head *pages, unsigned nr_pages)
-{
-	struct inode *inode = mapping->host;
-	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_fill_data data;
-	int err;
-
-	err = -EIO;
 	if (is_bad_inode(inode))
-		goto out;
+		return;
 
-	data.file = file;
-	data.inode = inode;
-	data.nr_pages = nr_pages;
-	data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages);
-;
-	data.ia = fuse_io_alloc(NULL, data.max_pages);
-	err = -ENOMEM;
-	if (!data.ia)
-		goto out;
+	max_pages = min_t(unsigned int, fc->max_pages,
+			fc->max_read / PAGE_SIZE);
 
-	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
-	if (!err) {
-		if (data.ia->ap.num_pages)
-			fuse_send_readpages(data.ia, file);
-		else
-			fuse_io_free(data.ia);
+	for (;;) {
+		struct fuse_io_args *ia;
+		struct fuse_args_pages *ap;
+
+		nr_pages = readahead_count(rac) - nr_pages;
+		if (nr_pages > max_pages)
+			nr_pages = max_pages;
+		if (nr_pages == 0)
+			break;
+		ia = fuse_io_alloc(NULL, nr_pages);
+		if (!ia)
+			return;
+		ap = &ia->ap;
+		nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
+		for (i = 0; i < nr_pages; i++) {
+			fuse_wait_on_page_writeback(inode,
+						    readahead_index(rac) + i);
+			ap->descs[i].length = PAGE_SIZE;
+		}
+		ap->num_pages = nr_pages;
+		fuse_send_readpages(ia, rac->file);
 	}
-out:
-	return err;
 }
 
 static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -3373,10 +3329,10 @@ static const struct file_operations fuse_file_operations = {
 
 static const struct address_space_operations fuse_file_aops  = {
 	.readpage	= fuse_readpage,
+	.readahead	= fuse_readahead,
 	.writepage	= fuse_writepage,
 	.writepages	= fuse_writepages,
 	.launder_page	= fuse_launder_page,
-	.readpages	= fuse_readpages,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
 	.bmap		= fuse_bmap,
 	.direct_IO	= fuse_direct_IO,
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 786c1ce8f030..72c9560f4467 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -577,7 +577,7 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
 }
 
 /**
- * gfs2_readpages - Read a bunch of pages at once
+ * gfs2_readahead - Read a bunch of pages at once
  * @file: The file to read from
  * @mapping: Address space info
  * @pages: List of pages to read
@@ -590,31 +590,24 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
  *    obviously not something we'd want to do on too regular a basis.
  *    Any I/O we ignore at this time will be done via readpage later.
  * 2. We don't handle stuffed files here we let readpage do the honours.
- * 3. mpage_readpages() does most of the heavy lifting in the common case.
+ * 3. mpage_readahead() does most of the heavy lifting in the common case.
  * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
  */
 
-static int gfs2_readpages(struct file *file, struct address_space *mapping,
-			  struct list_head *pages, unsigned nr_pages)
+static void gfs2_readahead(struct readahead_control *rac)
 {
-	struct inode *inode = mapping->host;
+	struct inode *inode = rac->mapping->host;
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_holder gh;
-	int ret;
 
 	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
-	ret = gfs2_glock_nq(&gh);
-	if (unlikely(ret))
+	if (gfs2_glock_nq(&gh))
 		goto out_uninit;
 	if (!gfs2_is_stuffed(ip))
-		ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
+		mpage_readahead(rac, gfs2_block_map);
 	gfs2_glock_dq(&gh);
 out_uninit:
 	gfs2_holder_uninit(&gh);
-	if (unlikely(gfs2_withdrawn(sdp)))
-		ret = -EIO;
-	return ret;
 }
 
 /**
@@ -833,7 +826,7 @@ static const struct address_space_operations gfs2_aops = {
 	.writepage = gfs2_writepage,
 	.writepages = gfs2_writepages,
 	.readpage = gfs2_readpage,
-	.readpages = gfs2_readpages,
+	.readahead = gfs2_readahead,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
@@ -847,7 +840,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
 	.writepage = gfs2_jdata_writepage,
 	.writepages = gfs2_jdata_writepages,
 	.readpage = gfs2_readpage,
-	.readpages = gfs2_readpages,
+	.readahead = gfs2_readahead,
 	.set_page_dirty = jdata_set_page_dirty,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c3f7732415be..c0f2875c946c 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -354,7 +354,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
 
 	hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN);
 	if (hc == NULL)
-		hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL);
+		hc = __vmalloc(hsize, GFP_NOFS);
 
 	if (hc == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1166,7 +1166,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
 
 	hc2 = kmalloc_array(hsize_bytes, 2, GFP_NOFS | __GFP_NOWARN);
 	if (hc2 == NULL)
-		hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL);
+		hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS);
 
 	if (!hc2)
 		return -ENOMEM;
@@ -1327,7 +1327,7 @@ static void *gfs2_alloc_sort_buffer(unsigned size)
 	if (size < KMALLOC_MAX_SIZE)
 		ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN);
 	if (!ptr)
-		ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL);
+		ptr = __vmalloc(size, GFP_NOFS);
 	return ptr;
 }
 
@@ -1987,8 +1987,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 
 	ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN);
 	if (ht == NULL)
-		ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO,
-			       PAGE_KERNEL);
+		ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO);
 	if (!ht)
 		return -ENOMEM;
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 8259fef3f986..4b67d47a7e00 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1365,7 +1365,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
 	sdp->sd_quota_bitmap = kzalloc(bm_size, GFP_NOFS | __GFP_NOWARN);
 	if (sdp->sd_quota_bitmap == NULL)
 		sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS |
-						 __GFP_ZERO, PAGE_KERNEL);
+						 __GFP_ZERO);
 	if (!sdp->sd_quota_bitmap)
 		return error;
 
diff --git a/fs/hfs/Kconfig b/fs/hfs/Kconfig
index 44f6e89bcb75..129926b5142d 100644
--- a/fs/hfs/Kconfig
+++ b/fs/hfs/Kconfig
@@ -6,7 +6,7 @@ config HFS_FS
 	help
 	  If you say Y here, you will be able to mount Macintosh-formatted
 	  floppy disks and hard drive partitions with full read-write access.
-	  Please read <file:Documentation/filesystems/hfs.txt> to learn about
+	  Please read <file:Documentation/filesystems/hfs.rst> to learn about
 	  the available mount options.
 
 	  To compile this file system support as a module, choose M here: the
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 460281b1299e..cdf0edeeb278 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -32,29 +32,35 @@
 static int hfs_get_last_session(struct super_block *sb,
 				sector_t *start, sector_t *size)
 {
-	struct cdrom_multisession ms_info;
-	struct cdrom_tocentry te;
-	int res;
+	struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
 
 	/* default values */
 	*start = 0;
 	*size = i_size_read(sb->s_bdev->bd_inode) >> 9;
 
 	if (HFS_SB(sb)->session >= 0) {
+		struct cdrom_tocentry te;
+	
+		if (!cdi)
+			return -EINVAL;
+
 		te.cdte_track = HFS_SB(sb)->session;
 		te.cdte_format = CDROM_LBA;
-		res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te);
-		if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) {
-			*start = (sector_t)te.cdte_addr.lba << 2;
-			return 0;
+		if (cdrom_read_tocentry(cdi, &te) ||
+		    (te.cdte_ctrl & CDROM_DATA_TRACK) != 4) {
+			pr_err("invalid session number or type of track\n");
+			return -EINVAL;
 		}
-		pr_err("invalid session number or type of track\n");
-		return -EINVAL;
+
+		*start = (sector_t)te.cdte_addr.lba << 2;
+	} else if (cdi) {
+		struct cdrom_multisession ms_info;
+
+		ms_info.addr_format = CDROM_LBA;
+		if (cdrom_multisession(cdi, &ms_info) == 0 && ms_info.xa_flag)
+			*start = (sector_t)ms_info.addr.lba << 2;
 	}
-	ms_info.addr_format = CDROM_LBA;
-	res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION, (unsigned long)&ms_info);
-	if (!res && ms_info.xa_flag)
-		*start = (sector_t)ms_info.addr.lba << 2;
+
 	return 0;
 }
 
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 94bd83b36644..e3da9e96b835 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -340,7 +340,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
 	}
 
 	if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
-		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
 
 	inode_unlock(inode);
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 2b9e5743105e..129dca3f4b78 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -239,7 +239,7 @@ out:
 	mutex_unlock(&sbi->vh_mutex);
 
 	if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
-		blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
+		blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
 
 	return error;
 }
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 08c1580bdf7a..61eec628805d 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -127,31 +127,34 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
 static int hfsplus_get_last_session(struct super_block *sb,
 				    sector_t *start, sector_t *size)
 {
-	struct cdrom_multisession ms_info;
-	struct cdrom_tocentry te;
-	int res;
+	struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
 
 	/* default values */
 	*start = 0;
 	*size = i_size_read(sb->s_bdev->bd_inode) >> 9;
 
 	if (HFSPLUS_SB(sb)->session >= 0) {
+		struct cdrom_tocentry te;
+
+		if (!cdi)
+			return -EINVAL;
+
 		te.cdte_track = HFSPLUS_SB(sb)->session;
 		te.cdte_format = CDROM_LBA;
-		res = ioctl_by_bdev(sb->s_bdev,
-			CDROMREADTOCENTRY, (unsigned long)&te);
-		if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) {
-			*start = (sector_t)te.cdte_addr.lba << 2;
-			return 0;
+		if (cdrom_read_tocentry(cdi, &te) ||
+		    (te.cdte_ctrl & CDROM_DATA_TRACK) != 4) {
+			pr_err("invalid session number or type of track\n");
+			return -EINVAL;
 		}
-		pr_err("invalid session number or type of track\n");
-		return -EINVAL;
+		*start = (sector_t)te.cdte_addr.lba << 2;
+	} else if (cdi) {
+		struct cdrom_multisession ms_info;
+
+		ms_info.addr_format = CDROM_LBA;
+		if (cdrom_multisession(cdi, &ms_info) == 0 && ms_info.xa_flag)
+			*start = (sector_t)ms_info.addr.lba << 2;
 	}
-	ms_info.addr_format = CDROM_LBA;
-	res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION,
-		(unsigned long)&ms_info);
-	if (!res && ms_info.xa_flag)
-		*start = (sector_t)ms_info.addr.lba << 2;
+
 	return 0;
 }
 
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 56aa0336254a..2b36dc6f0a10 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -9,7 +9,7 @@ config HPFS_FS
 	  write files to an OS/2 HPFS partition on your hard drive. OS/2
 	  floppies however are in regular MSDOS format, so you don't need this
 	  option in order to be able to read them. Read
-	  <file:Documentation/filesystems/hpfs.txt>.
+	  <file:Documentation/filesystems/hpfs.rst>.
 
 	  To compile this file system support as a module, choose M here: the
 	  module will be called hpfs.  If unsure, say N.
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index b36abf9cb345..2de0d3492d15 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -125,10 +125,9 @@ static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
 	return block_write_full_page(page, hpfs_get_block, wbc);
 }
 
-static int hpfs_readpages(struct file *file, struct address_space *mapping,
-			  struct list_head *pages, unsigned nr_pages)
+static void hpfs_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, hpfs_get_block);
+	mpage_readahead(rac, hpfs_get_block);
 }
 
 static int hpfs_writepages(struct address_space *mapping,
@@ -198,7 +197,7 @@ static int hpfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 const struct address_space_operations hpfs_aops = {
 	.readpage = hpfs_readpage,
 	.writepage = hpfs_writepage,
-	.readpages = hpfs_readpages,
+	.readahead = hpfs_readahead,
 	.writepages = hpfs_writepages,
 	.write_begin = hpfs_write_begin,
 	.write_end = hpfs_write_end,
diff --git a/fs/inode.c b/fs/inode.c
index cc6e701b7e5d..9fcec07a9d7c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1606,14 +1606,14 @@ EXPORT_SYMBOL(iput);
  *	@inode:  inode owning the block number being requested
  *	@block: pointer containing the block to find
  *
- *	Replaces the value in *block with the block number on the device holding
+ *	Replaces the value in ``*block`` with the block number on the device holding
  *	corresponding to the requested block number in the file.
  *	That is, asked for block 4 of inode 1 the function will replace the
- *	4 in *block, with disk block relative to the disk start that holds that
+ *	4 in ``*block``, with disk block relative to the disk start that holds that
  *	block of the file.
  *
  *	Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
- *	hole, returns 0 and *block is also set to 0.
+ *	hole, returns 0 and ``*block`` is also set to 0.
  */
 int bmap(struct inode *inode, sector_t *block)
 {
diff --git a/fs/internal.h b/fs/internal.h
index aa5d45524e87..b89d78f10396 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -126,7 +126,6 @@ extern struct open_how build_open_how(int flags, umode_t mode);
 extern int build_open_flags(const struct open_how *how, struct open_flags *op);
 
 long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
-long do_faccessat(int dfd, const char __user *filename, int mode);
 int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 		int flag);
@@ -186,5 +185,5 @@ int sb_init_dio_done_wq(struct super_block *sb);
 /*
  * fs/stat.c:
  */
-unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags, int flags);
-int cp_statx(const struct kstat *stat, struct statx __user *buffer);
+int do_statx(int dfd, const char __user *filename, unsigned flags,
+	     unsigned int mask, struct statx __user *buffer);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index bb25e3997d41..9d4bd0d3a080 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -142,7 +142,7 @@ struct io_rings {
 	 */
 	u32			sq_dropped;
 	/*
-	 * Runtime flags
+	 * Runtime SQ flags
 	 *
 	 * Written by the kernel, shouldn't be modified by the
 	 * application.
@@ -152,6 +152,13 @@ struct io_rings {
 	 */
 	u32			sq_flags;
 	/*
+	 * Runtime CQ flags
+	 *
+	 * Written by the application, shouldn't be modified by the
+	 * kernel.
+	 */
+	u32                     cq_flags;
+	/*
 	 * Number of completion events lost because the queue was full;
 	 * this should be avoided by the application by making sure
 	 * there are not more requests pending than there is space in
@@ -191,7 +198,7 @@ struct fixed_file_ref_node {
 	struct list_head		node;
 	struct list_head		file_list;
 	struct fixed_file_data		*file_data;
-	struct work_struct		work;
+	struct llist_node		llist;
 };
 
 struct fixed_file_data {
@@ -279,8 +286,8 @@ struct io_ring_ctx {
 
 	const struct cred	*creds;
 
-	/* 0 is for ctx quiesce/reinit/free, 1 is for sqo_thread started */
-	struct completion	*completions;
+	struct completion	ref_comp;
+	struct completion	sq_thread_comp;
 
 	/* if all else fails... */
 	struct io_kiocb		*fallback_req;
@@ -327,6 +334,9 @@ struct io_ring_ctx {
 		struct list_head	inflight_list;
 	} ____cacheline_aligned_in_smp;
 
+	struct delayed_work		file_put_work;
+	struct llist_head		file_put_llist;
+
 	struct work_struct		exit_work;
 };
 
@@ -384,7 +394,8 @@ struct io_timeout {
 	struct file			*file;
 	u64				addr;
 	int				flags;
-	u32				count;
+	u32				off;
+	u32				target_seq;
 };
 
 struct io_rw {
@@ -415,11 +426,7 @@ struct io_sr_msg {
 struct io_open {
 	struct file			*file;
 	int				dfd;
-	union {
-		unsigned		mask;
-	};
 	struct filename			*filename;
-	struct statx __user		*buffer;
 	struct open_how			how;
 	unsigned long			nofile;
 };
@@ -471,6 +478,15 @@ struct io_provide_buf {
 	__u16				bid;
 };
 
+struct io_statx {
+	struct file			*file;
+	int				dfd;
+	unsigned int			mask;
+	unsigned int			flags;
+	const char __user		*filename;
+	struct statx __user		*buffer;
+};
+
 struct io_async_connect {
 	struct sockaddr_storage		address;
 };
@@ -612,11 +628,11 @@ struct io_kiocb {
 		struct io_epoll		epoll;
 		struct io_splice	splice;
 		struct io_provide_buf	pbuf;
+		struct io_statx		statx;
 	};
 
 	struct io_async_ctx		*io;
 	int				cflags;
-	bool				needs_fixed_file;
 	u8				opcode;
 
 	u16				buf_index;
@@ -788,7 +804,6 @@ static const struct io_op_def io_op_defs[] = {
 		.needs_fs		= 1,
 	},
 	[IORING_OP_CLOSE] = {
-		.needs_file		= 1,
 		.file_table		= 1,
 	},
 	[IORING_OP_FILES_UPDATE] = {
@@ -847,6 +862,11 @@ static const struct io_op_def io_op_defs[] = {
 	},
 	[IORING_OP_PROVIDE_BUFFERS] = {},
 	[IORING_OP_REMOVE_BUFFERS] = {},
+	[IORING_OP_TEE] = {
+		.needs_file		= 1,
+		.hash_reg_file		= 1,
+		.unbound_nonreg_file	= 1,
+	},
 };
 
 static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -882,11 +902,18 @@ struct sock *io_uring_get_socket(struct file *file)
 }
 EXPORT_SYMBOL(io_uring_get_socket);
 
+static void io_file_put_work(struct work_struct *work);
+
+static inline bool io_async_submit(struct io_ring_ctx *ctx)
+{
+	return ctx->flags & IORING_SETUP_SQPOLL;
+}
+
 static void io_ring_ctx_ref_free(struct percpu_ref *ref)
 {
 	struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
 
-	complete(&ctx->completions[0]);
+	complete(&ctx->ref_comp);
 }
 
 static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
@@ -902,10 +929,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	if (!ctx->fallback_req)
 		goto err;
 
-	ctx->completions = kmalloc(2 * sizeof(struct completion), GFP_KERNEL);
-	if (!ctx->completions)
-		goto err;
-
 	/*
 	 * Use 5 bits less than the max cq entries, that should give us around
 	 * 32 entries per hash list if totally full and uniformly spread.
@@ -929,8 +952,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	init_waitqueue_head(&ctx->sqo_wait);
 	init_waitqueue_head(&ctx->cq_wait);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
-	init_completion(&ctx->completions[0]);
-	init_completion(&ctx->completions[1]);
+	init_completion(&ctx->ref_comp);
+	init_completion(&ctx->sq_thread_comp);
 	idr_init(&ctx->io_buffer_idr);
 	idr_init(&ctx->personality_idr);
 	mutex_init(&ctx->uring_lock);
@@ -942,11 +965,12 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	init_waitqueue_head(&ctx->inflight_wait);
 	spin_lock_init(&ctx->inflight_lock);
 	INIT_LIST_HEAD(&ctx->inflight_list);
+	INIT_DELAYED_WORK(&ctx->file_put_work, io_file_put_work);
+	init_llist_head(&ctx->file_put_llist);
 	return ctx;
 err:
 	if (ctx->fallback_req)
 		kmem_cache_free(req_cachep, ctx->fallback_req);
-	kfree(ctx->completions);
 	kfree(ctx->cancel_hash);
 	kfree(ctx);
 	return NULL;
@@ -968,36 +992,6 @@ static inline bool req_need_defer(struct io_kiocb *req)
 	return false;
 }
 
-static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
-{
-	struct io_kiocb *req;
-
-	req = list_first_entry_or_null(&ctx->defer_list, struct io_kiocb, list);
-	if (req && !req_need_defer(req)) {
-		list_del_init(&req->list);
-		return req;
-	}
-
-	return NULL;
-}
-
-static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
-{
-	struct io_kiocb *req;
-
-	req = list_first_entry_or_null(&ctx->timeout_list, struct io_kiocb, list);
-	if (req) {
-		if (req->flags & REQ_F_TIMEOUT_NOSEQ)
-			return NULL;
-		if (!__req_need_defer(req)) {
-			list_del_init(&req->list);
-			return req;
-		}
-	}
-
-	return NULL;
-}
-
 static void __io_commit_cqring(struct io_ring_ctx *ctx)
 {
 	struct io_rings *rings = ctx->rings;
@@ -1113,17 +1107,43 @@ static void io_kill_timeouts(struct io_ring_ctx *ctx)
 	spin_unlock_irq(&ctx->completion_lock);
 }
 
-static void io_commit_cqring(struct io_ring_ctx *ctx)
+static void __io_queue_deferred(struct io_ring_ctx *ctx)
 {
-	struct io_kiocb *req;
+	do {
+		struct io_kiocb *req = list_first_entry(&ctx->defer_list,
+							struct io_kiocb, list);
 
-	while ((req = io_get_timeout_req(ctx)) != NULL)
+		if (req_need_defer(req))
+			break;
+		list_del_init(&req->list);
+		io_queue_async_work(req);
+	} while (!list_empty(&ctx->defer_list));
+}
+
+static void io_flush_timeouts(struct io_ring_ctx *ctx)
+{
+	while (!list_empty(&ctx->timeout_list)) {
+		struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
+							struct io_kiocb, list);
+
+		if (req->flags & REQ_F_TIMEOUT_NOSEQ)
+			break;
+		if (req->timeout.target_seq != ctx->cached_cq_tail
+					- atomic_read(&ctx->cq_timeouts))
+			break;
+
+		list_del_init(&req->list);
 		io_kill_timeout(req);
+	}
+}
 
+static void io_commit_cqring(struct io_ring_ctx *ctx)
+{
+	io_flush_timeouts(ctx);
 	__io_commit_cqring(ctx);
 
-	while ((req = io_get_deferred_req(ctx)) != NULL)
-		io_queue_async_work(req);
+	if (unlikely(!list_empty(&ctx->defer_list)))
+		__io_queue_deferred(ctx);
 }
 
 static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
@@ -1148,6 +1168,8 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
 {
 	if (!ctx->cq_ev_fd)
 		return false;
+	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
+		return false;
 	if (!ctx->eventfd_async)
 		return true;
 	return io_wq_current_is_worker();
@@ -1984,15 +2006,19 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
 		wake_up(&ctx->sqo_wait);
 }
 
-static void io_file_put(struct io_submit_state *state)
+static void __io_state_file_put(struct io_submit_state *state)
 {
-	if (state->file) {
-		int diff = state->has_refs - state->used_refs;
+	int diff = state->has_refs - state->used_refs;
 
-		if (diff)
-			fput_many(state->file, diff);
-		state->file = NULL;
-	}
+	if (diff)
+		fput_many(state->file, diff);
+	state->file = NULL;
+}
+
+static inline void io_state_file_put(struct io_submit_state *state)
+{
+	if (state->file)
+		__io_state_file_put(state);
 }
 
 /*
@@ -2011,7 +2037,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd)
 			state->ios_left--;
 			return state->file;
 		}
-		io_file_put(state);
+		__io_state_file_put(state);
 	}
 	state->file = fget_many(fd, state->ios_left);
 	if (!state->file)
@@ -2727,7 +2753,8 @@ out_free:
 	return ret;
 }
 
-static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int __io_splice_prep(struct io_kiocb *req,
+			    const struct io_uring_sqe *sqe)
 {
 	struct io_splice* sp = &req->splice;
 	unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
@@ -2737,8 +2764,6 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return 0;
 
 	sp->file_in = NULL;
-	sp->off_in = READ_ONCE(sqe->splice_off_in);
-	sp->off_out = READ_ONCE(sqe->off);
 	sp->len = READ_ONCE(sqe->len);
 	sp->flags = READ_ONCE(sqe->splice_flags);
 
@@ -2757,6 +2782,46 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return 0;
 }
 
+static int io_tee_prep(struct io_kiocb *req,
+		       const struct io_uring_sqe *sqe)
+{
+	if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off))
+		return -EINVAL;
+	return __io_splice_prep(req, sqe);
+}
+
+static int io_tee(struct io_kiocb *req, bool force_nonblock)
+{
+	struct io_splice *sp = &req->splice;
+	struct file *in = sp->file_in;
+	struct file *out = sp->file_out;
+	unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
+	long ret = 0;
+
+	if (force_nonblock)
+		return -EAGAIN;
+	if (sp->len)
+		ret = do_tee(in, out, sp->len, flags);
+
+	io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
+	req->flags &= ~REQ_F_NEED_CLEANUP;
+
+	io_cqring_add_event(req, ret);
+	if (ret != sp->len)
+		req_set_fail_links(req);
+	io_put_req(req);
+	return 0;
+}
+
+static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_splice* sp = &req->splice;
+
+	sp->off_in = READ_ONCE(sqe->splice_off_in);
+	sp->off_out = READ_ONCE(sqe->off);
+	return __io_splice_prep(req, sqe);
+}
+
 static int io_splice(struct io_kiocb *req, bool force_nonblock)
 {
 	struct io_splice *sp = &req->splice;
@@ -3305,43 +3370,23 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
 
 static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	const char __user *fname;
-	unsigned lookup_flags;
-	int ret;
-
 	if (sqe->ioprio || sqe->buf_index)
 		return -EINVAL;
 	if (req->flags & REQ_F_FIXED_FILE)
 		return -EBADF;
-	if (req->flags & REQ_F_NEED_CLEANUP)
-		return 0;
-
-	req->open.dfd = READ_ONCE(sqe->fd);
-	req->open.mask = READ_ONCE(sqe->len);
-	fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
-	req->open.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2));
-	req->open.how.flags = READ_ONCE(sqe->statx_flags);
-
-	if (vfs_stat_set_lookup_flags(&lookup_flags, req->open.how.flags))
-		return -EINVAL;
 
-	req->open.filename = getname_flags(fname, lookup_flags, NULL);
-	if (IS_ERR(req->open.filename)) {
-		ret = PTR_ERR(req->open.filename);
-		req->open.filename = NULL;
-		return ret;
-	}
+	req->statx.dfd = READ_ONCE(sqe->fd);
+	req->statx.mask = READ_ONCE(sqe->len);
+	req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+	req->statx.flags = READ_ONCE(sqe->statx_flags);
 
-	req->flags |= REQ_F_NEED_CLEANUP;
 	return 0;
 }
 
 static int io_statx(struct io_kiocb *req, bool force_nonblock)
 {
-	struct io_open *ctx = &req->open;
-	unsigned lookup_flags;
-	struct path path;
-	struct kstat stat;
+	struct io_statx *ctx = &req->statx;
 	int ret;
 
 	if (force_nonblock) {
@@ -3351,29 +3396,9 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock)
 		return -EAGAIN;
 	}
 
-	if (vfs_stat_set_lookup_flags(&lookup_flags, ctx->how.flags))
-		return -EINVAL;
-
-retry:
-	/* filename_lookup() drops it, keep a reference */
-	ctx->filename->refcnt++;
+	ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask,
+		       ctx->buffer);
 
-	ret = filename_lookup(ctx->dfd, ctx->filename, lookup_flags, &path,
-				NULL);
-	if (ret)
-		goto err;
-
-	ret = vfs_getattr(&path, &stat, ctx->mask, ctx->how.flags);
-	path_put(&path);
-	if (retry_estale(ret, lookup_flags)) {
-		lookup_flags |= LOOKUP_REVAL;
-		goto retry;
-	}
-	if (!ret)
-		ret = cp_statx(&stat, ctx->buffer);
-err:
-	putname(ctx->filename);
-	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < 0)
 		req_set_fail_links(req);
 	io_cqring_add_event(req, ret);
@@ -3396,10 +3421,6 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return -EBADF;
 
 	req->close.fd = READ_ONCE(sqe->fd);
-	if (req->file->f_op == &io_uring_fops ||
-	    req->close.fd == req->ctx->ring_fd)
-		return -EBADF;
-
 	return 0;
 }
 
@@ -3432,21 +3453,14 @@ static int io_close(struct io_kiocb *req, bool force_nonblock)
 	req->close.put_file = NULL;
 	ret = __close_fd_get_file(req->close.fd, &req->close.put_file);
 	if (ret < 0)
-		return ret;
+		return (ret == -ENOENT) ? -EBADF : ret;
 
 	/* if the file has a flush method, be safe and punt to async */
 	if (req->close.put_file->f_op->flush && force_nonblock) {
-		/* submission ref will be dropped, take it for async */
-		refcount_inc(&req->refs);
-
+		/* avoid grabbing files - we don't need the files */
+		req->flags |= REQ_F_NO_FILE_TABLE | REQ_F_MUST_PUNT;
 		req->work.func = io_close_finish;
-		/*
-		 * Do manual async queue here to avoid grabbing files - we don't
-		 * need the files, and it'll cause io_close_finish() to close
-		 * the file again and cause a double CQE entry for this request
-		 */
-		io_queue_async_work(req);
-		return 0;
+		return -EAGAIN;
 	}
 
 	/*
@@ -4096,27 +4110,6 @@ struct io_poll_table {
 	int error;
 };
 
-static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
-			    struct wait_queue_head *head)
-{
-	if (unlikely(poll->head)) {
-		pt->error = -EINVAL;
-		return;
-	}
-
-	pt->error = 0;
-	poll->head = head;
-	add_wait_queue(head, &poll->wait);
-}
-
-static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
-			       struct poll_table_struct *p)
-{
-	struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
-
-	__io_queue_proc(&pt->req->apoll->poll, pt, head);
-}
-
 static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
 			   __poll_t mask, task_work_func_t func)
 {
@@ -4170,12 +4163,150 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
 	return false;
 }
 
+static void io_poll_remove_double(struct io_kiocb *req)
+{
+	struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+
+	lockdep_assert_held(&req->ctx->completion_lock);
+
+	if (poll && poll->head) {
+		struct wait_queue_head *head = poll->head;
+
+		spin_lock(&head->lock);
+		list_del_init(&poll->wait.entry);
+		if (poll->wait.private)
+			refcount_dec(&req->refs);
+		poll->head = NULL;
+		spin_unlock(&head->lock);
+	}
+}
+
+static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+
+	io_poll_remove_double(req);
+	req->poll.done = true;
+	io_cqring_fill_event(req, error ? error : mangle_poll(mask));
+	io_commit_cqring(ctx);
+}
+
+static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+
+	if (io_poll_rewait(req, &req->poll)) {
+		spin_unlock_irq(&ctx->completion_lock);
+		return;
+	}
+
+	hash_del(&req->hash_node);
+	io_poll_complete(req, req->result, 0);
+	req->flags |= REQ_F_COMP_LOCKED;
+	io_put_req_find_next(req, nxt);
+	spin_unlock_irq(&ctx->completion_lock);
+
+	io_cqring_ev_posted(ctx);
+}
+
+static void io_poll_task_func(struct callback_head *cb)
+{
+	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+	struct io_kiocb *nxt = NULL;
+
+	io_poll_task_handler(req, &nxt);
+	if (nxt) {
+		struct io_ring_ctx *ctx = nxt->ctx;
+
+		mutex_lock(&ctx->uring_lock);
+		__io_queue_sqe(nxt, NULL);
+		mutex_unlock(&ctx->uring_lock);
+	}
+}
+
+static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
+			       int sync, void *key)
+{
+	struct io_kiocb *req = wait->private;
+	struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+	__poll_t mask = key_to_poll(key);
+
+	/* for instances that support it check for an event match first: */
+	if (mask && !(mask & poll->events))
+		return 0;
+
+	if (req->poll.head) {
+		bool done;
+
+		spin_lock(&req->poll.head->lock);
+		done = list_empty(&req->poll.wait.entry);
+		if (!done)
+			list_del_init(&req->poll.wait.entry);
+		spin_unlock(&req->poll.head->lock);
+		if (!done)
+			__io_async_wake(req, poll, mask, io_poll_task_func);
+	}
+	refcount_dec(&req->refs);
+	return 1;
+}
+
+static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
+			      wait_queue_func_t wake_func)
+{
+	poll->head = NULL;
+	poll->done = false;
+	poll->canceled = false;
+	poll->events = events;
+	INIT_LIST_HEAD(&poll->wait.entry);
+	init_waitqueue_func_entry(&poll->wait, wake_func);
+}
+
+static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
+			    struct wait_queue_head *head)
+{
+	struct io_kiocb *req = pt->req;
+
+	/*
+	 * If poll->head is already set, it's because the file being polled
+	 * uses multiple waitqueues for poll handling (eg one for read, one
+	 * for write). Setup a separate io_poll_iocb if this happens.
+	 */
+	if (unlikely(poll->head)) {
+		/* already have a 2nd entry, fail a third attempt */
+		if (req->io) {
+			pt->error = -EINVAL;
+			return;
+		}
+		poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
+		if (!poll) {
+			pt->error = -ENOMEM;
+			return;
+		}
+		io_init_poll_iocb(poll, req->poll.events, io_poll_double_wake);
+		refcount_inc(&req->refs);
+		poll->wait.private = req;
+		req->io = (void *) poll;
+	}
+
+	pt->error = 0;
+	poll->head = head;
+	add_wait_queue(head, &poll->wait);
+}
+
+static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
+			       struct poll_table_struct *p)
+{
+	struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+
+	__io_queue_proc(&pt->req->apoll->poll, pt, head);
+}
+
 static void io_async_task_func(struct callback_head *cb)
 {
 	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
 	struct async_poll *apoll = req->apoll;
 	struct io_ring_ctx *ctx = req->ctx;
-	bool canceled;
+	bool canceled = false;
 
 	trace_io_uring_task_run(req->ctx, req->opcode, req->user_data);
 
@@ -4184,34 +4315,33 @@ static void io_async_task_func(struct callback_head *cb)
 		return;
 	}
 
-	if (hash_hashed(&req->hash_node))
+	/* If req is still hashed, it cannot have been canceled. Don't check. */
+	if (hash_hashed(&req->hash_node)) {
 		hash_del(&req->hash_node);
-
-	canceled = READ_ONCE(apoll->poll.canceled);
-	if (canceled) {
-		io_cqring_fill_event(req, -ECANCELED);
-		io_commit_cqring(ctx);
+	} else {
+		canceled = READ_ONCE(apoll->poll.canceled);
+		if (canceled) {
+			io_cqring_fill_event(req, -ECANCELED);
+			io_commit_cqring(ctx);
+		}
 	}
 
 	spin_unlock_irq(&ctx->completion_lock);
 
 	/* restore ->work in case we need to retry again */
 	memcpy(&req->work, &apoll->work, sizeof(req->work));
+	kfree(apoll);
 
-	if (canceled) {
-		kfree(apoll);
+	if (!canceled) {
+		__set_current_state(TASK_RUNNING);
+		mutex_lock(&ctx->uring_lock);
+		__io_queue_sqe(req, NULL);
+		mutex_unlock(&ctx->uring_lock);
+	} else {
 		io_cqring_ev_posted(ctx);
 		req_set_fail_links(req);
 		io_double_put_req(req);
-		return;
 	}
-
-	__set_current_state(TASK_RUNNING);
-	mutex_lock(&ctx->uring_lock);
-	__io_queue_sqe(req, NULL);
-	mutex_unlock(&ctx->uring_lock);
-
-	kfree(apoll);
 }
 
 static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -4245,18 +4375,13 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
 	bool cancel = false;
 
 	poll->file = req->file;
-	poll->head = NULL;
-	poll->done = poll->canceled = false;
-	poll->events = mask;
+	io_init_poll_iocb(poll, mask, wake_func);
+	poll->wait.private = req;
 
 	ipt->pt._key = mask;
 	ipt->req = req;
 	ipt->error = -EINVAL;
 
-	INIT_LIST_HEAD(&poll->wait.entry);
-	init_waitqueue_func_entry(&poll->wait, wake_func);
-	poll->wait.private = req;
-
 	mask = vfs_poll(req->file, &ipt->pt) & poll->events;
 
 	spin_lock_irq(&ctx->completion_lock);
@@ -4287,6 +4412,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 	struct async_poll *apoll;
 	struct io_poll_table ipt;
 	__poll_t mask, ret;
+	bool had_io;
 
 	if (!req->file || !file_can_poll(req->file))
 		return false;
@@ -4301,6 +4427,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 
 	req->flags |= REQ_F_POLLED;
 	memcpy(&apoll->work, &req->work, sizeof(req->work));
+	had_io = req->io != NULL;
 
 	get_task_struct(current);
 	req->task = current;
@@ -4320,7 +4447,9 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 					io_async_wake);
 	if (ret) {
 		ipt.error = 0;
-		apoll->poll.done = true;
+		/* only remove double add if we did it here */
+		if (!had_io)
+			io_poll_remove_double(req);
 		spin_unlock_irq(&ctx->completion_lock);
 		memcpy(&req->work, &apoll->work, sizeof(req->work));
 		kfree(apoll);
@@ -4344,32 +4473,32 @@ static bool __io_poll_remove_one(struct io_kiocb *req,
 		do_complete = true;
 	}
 	spin_unlock(&poll->head->lock);
+	hash_del(&req->hash_node);
 	return do_complete;
 }
 
 static bool io_poll_remove_one(struct io_kiocb *req)
 {
-	struct async_poll *apoll = NULL;
 	bool do_complete;
 
 	if (req->opcode == IORING_OP_POLL_ADD) {
+		io_poll_remove_double(req);
 		do_complete = __io_poll_remove_one(req, &req->poll);
 	} else {
-		apoll = req->apoll;
+		struct async_poll *apoll = req->apoll;
+
 		/* non-poll requests have submit ref still */
-		do_complete = __io_poll_remove_one(req, &req->apoll->poll);
-		if (do_complete)
+		do_complete = __io_poll_remove_one(req, &apoll->poll);
+		if (do_complete) {
 			io_put_req(req);
-	}
-
-	hash_del(&req->hash_node);
-
-	if (do_complete && apoll) {
-		/*
-		 * restore ->work because we need to call io_req_work_drop_env.
-		 */
-		memcpy(&req->work, &apoll->work, sizeof(req->work));
-		kfree(apoll);
+			/*
+			 * restore ->work because we will call
+			 * io_req_work_drop_env below when dropping the
+			 * final reference.
+			 */
+			memcpy(&req->work, &apoll->work, sizeof(req->work));
+			kfree(apoll);
+		}
 	}
 
 	if (do_complete) {
@@ -4454,49 +4583,6 @@ static int io_poll_remove(struct io_kiocb *req)
 	return 0;
 }
 
-static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
-{
-	struct io_ring_ctx *ctx = req->ctx;
-
-	req->poll.done = true;
-	io_cqring_fill_event(req, error ? error : mangle_poll(mask));
-	io_commit_cqring(ctx);
-}
-
-static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
-{
-	struct io_ring_ctx *ctx = req->ctx;
-	struct io_poll_iocb *poll = &req->poll;
-
-	if (io_poll_rewait(req, poll)) {
-		spin_unlock_irq(&ctx->completion_lock);
-		return;
-	}
-
-	hash_del(&req->hash_node);
-	io_poll_complete(req, req->result, 0);
-	req->flags |= REQ_F_COMP_LOCKED;
-	io_put_req_find_next(req, nxt);
-	spin_unlock_irq(&ctx->completion_lock);
-
-	io_cqring_ev_posted(ctx);
-}
-
-static void io_poll_task_func(struct callback_head *cb)
-{
-	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
-	struct io_kiocb *nxt = NULL;
-
-	io_poll_task_handler(req, &nxt);
-	if (nxt) {
-		struct io_ring_ctx *ctx = nxt->ctx;
-
-		mutex_lock(&ctx->uring_lock);
-		__io_queue_sqe(nxt, NULL);
-		mutex_unlock(&ctx->uring_lock);
-	}
-}
-
 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 			void *key)
 {
@@ -4576,20 +4662,8 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
 	 * We could be racing with timeout deletion. If the list is empty,
 	 * then timeout lookup already found it and will be handling it.
 	 */
-	if (!list_empty(&req->list)) {
-		struct io_kiocb *prev;
-
-		/*
-		 * Adjust the reqs sequence before the current one because it
-		 * will consume a slot in the cq_ring and the cq_tail
-		 * pointer will be increased, otherwise other timeout reqs may
-		 * return in advance without waiting for enough wait_nr.
-		 */
-		prev = req;
-		list_for_each_entry_continue_reverse(prev, &ctx->timeout_list, list)
-			prev->sequence++;
+	if (!list_empty(&req->list))
 		list_del_init(&req->list);
-	}
 
 	io_cqring_fill_event(req, -ETIME);
 	io_commit_cqring(ctx);
@@ -4669,18 +4743,19 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 {
 	struct io_timeout_data *data;
 	unsigned flags;
+	u32 off = READ_ONCE(sqe->off);
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 	if (sqe->ioprio || sqe->buf_index || sqe->len != 1)
 		return -EINVAL;
-	if (sqe->off && is_timeout_link)
+	if (off && is_timeout_link)
 		return -EINVAL;
 	flags = READ_ONCE(sqe->timeout_flags);
 	if (flags & ~IORING_TIMEOUT_ABS)
 		return -EINVAL;
 
-	req->timeout.count = READ_ONCE(sqe->off);
+	req->timeout.off = off;
 
 	if (!req->io && io_alloc_async_ctx(req))
 		return -ENOMEM;
@@ -4704,68 +4779,39 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 static int io_timeout(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_timeout_data *data;
+	struct io_timeout_data *data = &req->io->timeout;
 	struct list_head *entry;
-	unsigned span = 0;
-	u32 count = req->timeout.count;
-	u32 seq = req->sequence;
+	u32 tail, off = req->timeout.off;
 
-	data = &req->io->timeout;
+	spin_lock_irq(&ctx->completion_lock);
 
 	/*
 	 * sqe->off holds how many events that need to occur for this
 	 * timeout event to be satisfied. If it isn't set, then this is
 	 * a pure timeout request, sequence isn't used.
 	 */
-	if (!count) {
+	if (!off) {
 		req->flags |= REQ_F_TIMEOUT_NOSEQ;
-		spin_lock_irq(&ctx->completion_lock);
 		entry = ctx->timeout_list.prev;
 		goto add;
 	}
 
-	req->sequence = seq + count;
+	tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+	req->timeout.target_seq = tail + off;
 
 	/*
 	 * Insertion sort, ensuring the first entry in the list is always
 	 * the one we need first.
 	 */
-	spin_lock_irq(&ctx->completion_lock);
 	list_for_each_prev(entry, &ctx->timeout_list) {
 		struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
-		unsigned nxt_seq;
-		long long tmp, tmp_nxt;
-		u32 nxt_offset = nxt->timeout.count;
 
 		if (nxt->flags & REQ_F_TIMEOUT_NOSEQ)
 			continue;
-
-		/*
-		 * Since seq + count can overflow, use type long
-		 * long to store it.
-		 */
-		tmp = (long long)seq + count;
-		nxt_seq = nxt->sequence - nxt_offset;
-		tmp_nxt = (long long)nxt_seq + nxt_offset;
-
-		/*
-		 * cached_sq_head may overflow, and it will never overflow twice
-		 * once there is some timeout req still be valid.
-		 */
-		if (seq < nxt_seq)
-			tmp += UINT_MAX;
-
-		if (tmp > tmp_nxt)
+		/* nxt.seq is behind @tail, otherwise would've been completed */
+		if (off >= nxt->timeout.target_seq - tail)
 			break;
-
-		/*
-		 * Sequence of reqs after the insert one and itself should
-		 * be adjusted because each timeout req consumes a slot.
-		 */
-		span++;
-		nxt->sequence++;
 	}
-	req->sequence -= span;
 add:
 	list_add(&req->list, entry);
 	data->timer.function = io_timeout_fn;
@@ -4994,6 +5040,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
 	case IORING_OP_REMOVE_BUFFERS:
 		ret = io_remove_buffers_prep(req, sqe);
 		break;
+	case IORING_OP_TEE:
+		ret = io_tee_prep(req, sqe);
+		break;
 	default:
 		printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
 				req->opcode);
@@ -5064,10 +5113,9 @@ static void io_cleanup_req(struct io_kiocb *req)
 		break;
 	case IORING_OP_OPENAT:
 	case IORING_OP_OPENAT2:
-	case IORING_OP_STATX:
-		putname(req->open.filename);
 		break;
 	case IORING_OP_SPLICE:
+	case IORING_OP_TEE:
 		io_put_file(req, req->splice.file_in,
 			    (req->splice.flags & SPLICE_F_FD_IN_FIXED));
 		break;
@@ -5298,6 +5346,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		}
 		ret = io_remove_buffers(req, force_nonblock);
 		break;
+	case IORING_OP_TEE:
+		if (sqe) {
+			ret = io_tee_prep(req, sqe);
+			if (ret < 0)
+				break;
+		}
+		ret = io_tee(req, force_nonblock);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -5367,7 +5423,7 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
 	struct fixed_file_table *table;
 
 	table = &ctx->file_data->table[index >> IORING_FILE_TABLE_SHIFT];
-	return table->files[index & IORING_FILE_TABLE_MASK];;
+	return table->files[index & IORING_FILE_TABLE_MASK];
 }
 
 static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
@@ -5403,7 +5459,7 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
 	bool fixed;
 
 	fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
-	if (unlikely(!fixed && req->needs_fixed_file))
+	if (unlikely(!fixed && io_async_submit(req->ctx)))
 		return -EBADF;
 
 	return io_file_get(state, req, fd, &req->file, fixed);
@@ -5638,7 +5694,7 @@ static inline void io_queue_link_head(struct io_kiocb *req)
 }
 
 static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-			  struct io_submit_state *state, struct io_kiocb **link)
+			 struct io_kiocb **link)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
@@ -5711,7 +5767,7 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 static void io_submit_state_end(struct io_submit_state *state)
 {
 	blk_finish_plug(&state->plug);
-	io_file_put(state);
+	io_state_file_put(state);
 	if (state->free_reqs)
 		kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
 }
@@ -5782,7 +5838,7 @@ static inline void io_consume_sqe(struct io_ring_ctx *ctx)
 
 static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		       const struct io_uring_sqe *sqe,
-		       struct io_submit_state *state, bool async)
+		       struct io_submit_state *state)
 {
 	unsigned int sqe_flags;
 	int id;
@@ -5803,7 +5859,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	refcount_set(&req->refs, 2);
 	req->task = NULL;
 	req->result = 0;
-	req->needs_fixed_file = async;
 	INIT_IO_WORK(&req->work, io_wq_submit_work);
 
 	if (unlikely(req->opcode >= IORING_OP_LAST))
@@ -5833,9 +5888,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	}
 
 	/* same numerical values with corresponding REQ_F_*, safe to copy */
-	req->flags |= sqe_flags & (IOSQE_IO_DRAIN | IOSQE_IO_HARDLINK |
-					IOSQE_ASYNC | IOSQE_FIXED_FILE |
-					IOSQE_BUFFER_SELECT | IOSQE_IO_LINK);
+	req->flags |= sqe_flags;
 
 	if (!io_op_defs[req->opcode].needs_file)
 		return 0;
@@ -5844,7 +5897,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 }
 
 static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
-			  struct file *ring_file, int ring_fd, bool async)
+			  struct file *ring_file, int ring_fd)
 {
 	struct io_submit_state state, *statep = NULL;
 	struct io_kiocb *link = NULL;
@@ -5888,7 +5941,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			break;
 		}
 
-		err = io_init_req(ctx, req, sqe, statep, async);
+		err = io_init_req(ctx, req, sqe, statep);
 		io_consume_sqe(ctx);
 		/* will complete beyond this point, count as submitted */
 		submitted++;
@@ -5901,8 +5954,8 @@ fail_req:
 		}
 
 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
-						true, async);
-		err = io_submit_sqe(req, sqe, statep, &link);
+						true, io_async_submit(ctx));
+		err = io_submit_sqe(req, sqe, &link);
 		if (err)
 			goto fail_req;
 	}
@@ -5942,7 +5995,7 @@ static int io_sq_thread(void *data)
 	unsigned long timeout;
 	int ret = 0;
 
-	complete(&ctx->completions[1]);
+	complete(&ctx->sq_thread_comp);
 
 	old_fs = get_fs();
 	set_fs(USER_DS);
@@ -6041,7 +6094,8 @@ static int io_sq_thread(void *data)
 		}
 
 		mutex_lock(&ctx->uring_lock);
-		ret = io_submit_sqes(ctx, to_submit, NULL, -1, true);
+		if (likely(!percpu_ref_is_dying(&ctx->refs)))
+			ret = io_submit_sqes(ctx, to_submit, NULL, -1);
 		mutex_unlock(&ctx->uring_lock);
 		timeout = jiffies + ctx->sq_thread_idle;
 	}
@@ -6189,22 +6243,22 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 	struct fixed_file_data *data = ctx->file_data;
 	struct fixed_file_ref_node *ref_node = NULL;
 	unsigned nr_tables, i;
-	unsigned long flags;
 
 	if (!data)
 		return -ENXIO;
 
-	spin_lock_irqsave(&data->lock, flags);
+	spin_lock(&data->lock);
 	if (!list_empty(&data->ref_list))
 		ref_node = list_first_entry(&data->ref_list,
 				struct fixed_file_ref_node, node);
-	spin_unlock_irqrestore(&data->lock, flags);
+	spin_unlock(&data->lock);
 	if (ref_node)
 		percpu_ref_kill(&ref_node->refs);
 
 	percpu_ref_kill(&data->refs);
 
 	/* wait for all refs nodes to complete */
+	flush_delayed_work(&ctx->file_put_work);
 	wait_for_completion(&data->done);
 
 	__io_sqe_files_unregister(ctx);
@@ -6222,7 +6276,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 static void io_sq_thread_stop(struct io_ring_ctx *ctx)
 {
 	if (ctx->sqo_thread) {
-		wait_for_completion(&ctx->completions[1]);
+		wait_for_completion(&ctx->sq_thread_comp);
 		/*
 		 * The park is a bit of a work-around, without it we get
 		 * warning spews on shutdown with SQPOLL set and affinity
@@ -6435,40 +6489,63 @@ struct io_file_put {
 	struct file *file;
 };
 
-static void io_file_put_work(struct work_struct *work)
+static void __io_file_put_work(struct fixed_file_ref_node *ref_node)
 {
-	struct fixed_file_ref_node *ref_node;
-	struct fixed_file_data *file_data;
-	struct io_ring_ctx *ctx;
+	struct fixed_file_data *file_data = ref_node->file_data;
+	struct io_ring_ctx *ctx = file_data->ctx;
 	struct io_file_put *pfile, *tmp;
-	unsigned long flags;
-
-	ref_node = container_of(work, struct fixed_file_ref_node, work);
-	file_data = ref_node->file_data;
-	ctx = file_data->ctx;
 
 	list_for_each_entry_safe(pfile, tmp, &ref_node->file_list, list) {
-		list_del_init(&pfile->list);
+		list_del(&pfile->list);
 		io_ring_file_put(ctx, pfile->file);
 		kfree(pfile);
 	}
 
-	spin_lock_irqsave(&file_data->lock, flags);
-	list_del_init(&ref_node->node);
-	spin_unlock_irqrestore(&file_data->lock, flags);
+	spin_lock(&file_data->lock);
+	list_del(&ref_node->node);
+	spin_unlock(&file_data->lock);
 
 	percpu_ref_exit(&ref_node->refs);
 	kfree(ref_node);
 	percpu_ref_put(&file_data->refs);
 }
 
+static void io_file_put_work(struct work_struct *work)
+{
+	struct io_ring_ctx *ctx;
+	struct llist_node *node;
+
+	ctx = container_of(work, struct io_ring_ctx, file_put_work.work);
+	node = llist_del_all(&ctx->file_put_llist);
+
+	while (node) {
+		struct fixed_file_ref_node *ref_node;
+		struct llist_node *next = node->next;
+
+		ref_node = llist_entry(node, struct fixed_file_ref_node, llist);
+		__io_file_put_work(ref_node);
+		node = next;
+	}
+}
+
 static void io_file_data_ref_zero(struct percpu_ref *ref)
 {
 	struct fixed_file_ref_node *ref_node;
+	struct io_ring_ctx *ctx;
+	bool first_add;
+	int delay = HZ;
 
 	ref_node = container_of(ref, struct fixed_file_ref_node, refs);
+	ctx = ref_node->file_data->ctx;
+
+	if (percpu_ref_is_dying(&ctx->file_data->refs))
+		delay = 0;
 
-	queue_work(system_wq, &ref_node->work);
+	first_add = llist_add(&ref_node->llist, &ctx->file_put_llist);
+	if (!delay)
+		mod_delayed_work(system_wq, &ctx->file_put_work, 0);
+	else if (first_add)
+		queue_delayed_work(system_wq, &ctx->file_put_work, delay);
 }
 
 static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
@@ -6487,10 +6564,8 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
 	}
 	INIT_LIST_HEAD(&ref_node->node);
 	INIT_LIST_HEAD(&ref_node->file_list);
-	INIT_WORK(&ref_node->work, io_file_put_work);
 	ref_node->file_data = ctx->file_data;
 	return ref_node;
-
 }
 
 static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node)
@@ -6508,7 +6583,6 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	int fd, ret = 0;
 	unsigned i;
 	struct fixed_file_ref_node *ref_node;
-	unsigned long flags;
 
 	if (ctx->file_data)
 		return -EBUSY;
@@ -6616,9 +6690,9 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	}
 
 	ctx->file_data->cur_refs = &ref_node->refs;
-	spin_lock_irqsave(&ctx->file_data->lock, flags);
+	spin_lock(&ctx->file_data->lock);
 	list_add(&ref_node->node, &ctx->file_data->ref_list);
-	spin_unlock_irqrestore(&ctx->file_data->lock, flags);
+	spin_unlock(&ctx->file_data->lock);
 	percpu_ref_get(&ctx->file_data->refs);
 	return ret;
 }
@@ -6694,7 +6768,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 	__s32 __user *fds;
 	int fd, i, err;
 	__u32 done;
-	unsigned long flags;
 	bool needs_switch = false;
 
 	if (check_add_overflow(up->offset, nr_args, &done))
@@ -6759,10 +6832,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 
 	if (needs_switch) {
 		percpu_ref_kill(data->cur_refs);
-		spin_lock_irqsave(&data->lock, flags);
+		spin_lock(&data->lock);
 		list_add(&ref_node->node, &data->ref_list);
 		data->cur_refs = &ref_node->refs;
-		spin_unlock_irqrestore(&data->lock, flags);
+		spin_unlock(&data->lock);
 		percpu_ref_get(&ctx->file_data->refs);
 	} else
 		destroy_fixed_file_ref_node(ref_node);
@@ -7250,7 +7323,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 				ring_pages(ctx->sq_entries, ctx->cq_entries));
 	free_uid(ctx->user);
 	put_cred(ctx->creds);
-	kfree(ctx->completions);
 	kfree(ctx->cancel_hash);
 	kmem_cache_free(req_cachep, ctx->fallback_req);
 	kfree(ctx);
@@ -7302,7 +7374,7 @@ static void io_ring_exit_work(struct work_struct *work)
 	if (ctx->rings)
 		io_cqring_overflow_flush(ctx, true);
 
-	wait_for_completion(&ctx->completions[0]);
+	wait_for_completion(&ctx->ref_comp);
 	io_ring_ctx_free(ctx);
 }
 
@@ -7312,16 +7384,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 	percpu_ref_kill(&ctx->refs);
 	mutex_unlock(&ctx->uring_lock);
 
-	/*
-	 * Wait for sq thread to idle, if we have one. It won't spin on new
-	 * work after we've killed the ctx ref above. This is important to do
-	 * before we cancel existing commands, as the thread could otherwise
-	 * be queueing new work post that. If that's work we need to cancel,
-	 * it could cause shutdown to hang.
-	 */
-	while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait))
-		cond_resched();
-
 	io_kill_timeouts(ctx);
 	io_poll_remove_all(ctx);
 
@@ -7390,14 +7452,15 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
 			 * all we had, then we're done with this request.
 			 */
 			if (refcount_sub_and_test(2, &cancel_req->refs)) {
-				io_put_req(cancel_req);
+				io_free_req(cancel_req);
 				finish_wait(&ctx->inflight_wait, &wait);
 				continue;
 			}
+		} else {
+			io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
+			io_put_req(cancel_req);
 		}
 
-		io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
-		io_put_req(cancel_req);
 		schedule();
 		finish_wait(&ctx->inflight_wait, &wait);
 	}
@@ -7530,7 +7593,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		submitted = to_submit;
 	} else if (to_submit) {
 		mutex_lock(&ctx->uring_lock);
-		submitted = io_submit_sqes(ctx, to_submit, f.file, fd, false);
+		submitted = io_submit_sqes(ctx, to_submit, f.file, fd);
 		mutex_unlock(&ctx->uring_lock);
 
 		if (submitted != to_submit)
@@ -7841,6 +7904,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries);
 	p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
 	p->cq_off.cqes = offsetof(struct io_rings, cqes);
+	p->cq_off.flags = offsetof(struct io_rings, cq_flags);
 
 	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
@@ -8001,7 +8065,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		 * after we've killed the percpu ref.
 		 */
 		mutex_unlock(&ctx->uring_lock);
-		ret = wait_for_completion_interruptible(&ctx->completions[0]);
+		ret = wait_for_completion_interruptible(&ctx->ref_comp);
 		mutex_lock(&ctx->uring_lock);
 		if (ret) {
 			percpu_ref_resurrect(&ctx->refs);
@@ -8078,7 +8142,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		/* bring the ctx back to life */
 		percpu_ref_reinit(&ctx->refs);
 out:
-		reinit_completion(&ctx->completions[0]);
+		reinit_completion(&ctx->ref_comp);
 	}
 	return ret;
 }
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 89e21961d1ad..a1ed7620fbac 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -59,24 +59,19 @@ iomap_page_create(struct inode *inode, struct page *page)
 	 * migrate_page_move_mapping() assumes that pages with private data have
 	 * their count elevated by 1.
 	 */
-	get_page(page);
-	set_page_private(page, (unsigned long)iop);
-	SetPagePrivate(page);
+	attach_page_private(page, iop);
 	return iop;
 }
 
 static void
 iomap_page_release(struct page *page)
 {
-	struct iomap_page *iop = to_iomap_page(page);
+	struct iomap_page *iop = detach_page_private(page);
 
 	if (!iop)
 		return;
 	WARN_ON_ONCE(atomic_read(&iop->read_count));
 	WARN_ON_ONCE(atomic_read(&iop->write_count));
-	ClearPagePrivate(page);
-	set_page_private(page, 0);
-	put_page(page);
 	kfree(iop);
 }
 
@@ -214,9 +209,8 @@ iomap_read_end_io(struct bio *bio)
 struct iomap_readpage_ctx {
 	struct page		*cur_page;
 	bool			cur_page_in_bio;
-	bool			is_readahead;
 	struct bio		*bio;
-	struct list_head	*pages;
+	struct readahead_control *rac;
 };
 
 static void
@@ -308,7 +302,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		if (ctx->bio)
 			submit_bio(ctx->bio);
 
-		if (ctx->is_readahead) /* same as readahead_gfp_mask */
+		if (ctx->rac) /* same as readahead_gfp_mask */
 			gfp |= __GFP_NORETRY | __GFP_NOWARN;
 		ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
 		/*
@@ -319,7 +313,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		if (!ctx->bio)
 			ctx->bio = bio_alloc(orig_gfp, 1);
 		ctx->bio->bi_opf = REQ_OP_READ;
-		if (ctx->is_readahead)
+		if (ctx->rac)
 			ctx->bio->bi_opf |= REQ_RAHEAD;
 		ctx->bio->bi_iter.bi_sector = sector;
 		bio_set_dev(ctx->bio, iomap->bdev);
@@ -367,7 +361,7 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
 	}
 
 	/*
-	 * Just like mpage_readpages and block_read_full_page we always
+	 * Just like mpage_readahead and block_read_full_page we always
 	 * return 0 and just mark the page as PageError on errors.  This
 	 * should be cleaned up all through the stack eventually.
 	 */
@@ -375,36 +369,8 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
 }
 EXPORT_SYMBOL_GPL(iomap_readpage);
 
-static struct page *
-iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
-		loff_t length, loff_t *done)
-{
-	while (!list_empty(pages)) {
-		struct page *page = lru_to_page(pages);
-
-		if (page_offset(page) >= (u64)pos + length)
-			break;
-
-		list_del(&page->lru);
-		if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
-				GFP_NOFS))
-			return page;
-
-		/*
-		 * If we already have a page in the page cache at index we are
-		 * done.  Upper layers don't care if it is uptodate after the
-		 * readpages call itself as every page gets checked again once
-		 * actually needed.
-		 */
-		*done += PAGE_SIZE;
-		put_page(page);
-	}
-
-	return NULL;
-}
-
 static loff_t
-iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
+iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
 		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct iomap_readpage_ctx *ctx = data;
@@ -418,10 +384,7 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
 			ctx->cur_page = NULL;
 		}
 		if (!ctx->cur_page) {
-			ctx->cur_page = iomap_next_page(inode, ctx->pages,
-					pos, length, &done);
-			if (!ctx->cur_page)
-				break;
+			ctx->cur_page = readahead_page(ctx->rac);
 			ctx->cur_page_in_bio = false;
 		}
 		ret = iomap_readpage_actor(inode, pos + done, length - done,
@@ -431,32 +394,43 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
 	return done;
 }
 
-int
-iomap_readpages(struct address_space *mapping, struct list_head *pages,
-		unsigned nr_pages, const struct iomap_ops *ops)
+/**
+ * iomap_readahead - Attempt to read pages from a file.
+ * @rac: Describes the pages to be read.
+ * @ops: The operations vector for the filesystem.
+ *
+ * This function is for filesystems to call to implement their readahead
+ * address_space operation.
+ *
+ * Context: The @ops callbacks may submit I/O (eg to read the addresses of
+ * blocks from disc), and may wait for it.  The caller may be trying to
+ * access a different page, and so sleeping excessively should be avoided.
+ * It may allocate memory, but should avoid costly allocations.  This
+ * function is called with memalloc_nofs set, so allocations will not cause
+ * the filesystem to be reentered.
+ */
+void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
 {
+	struct inode *inode = rac->mapping->host;
+	loff_t pos = readahead_pos(rac);
+	loff_t length = readahead_length(rac);
 	struct iomap_readpage_ctx ctx = {
-		.pages		= pages,
-		.is_readahead	= true,
+		.rac	= rac,
 	};
-	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
-	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
-	loff_t length = last - pos + PAGE_SIZE, ret = 0;
 
-	trace_iomap_readpages(mapping->host, nr_pages);
+	trace_iomap_readahead(inode, readahead_count(rac));
 
 	while (length > 0) {
-		ret = iomap_apply(mapping->host, pos, length, 0, ops,
-				&ctx, iomap_readpages_actor);
+		loff_t ret = iomap_apply(inode, pos, length, 0, ops,
+				&ctx, iomap_readahead_actor);
 		if (ret <= 0) {
 			WARN_ON_ONCE(ret == 0);
-			goto done;
+			break;
 		}
 		pos += ret;
 		length -= ret;
 	}
-	ret = 0;
-done:
+
 	if (ctx.bio)
 		submit_bio(ctx.bio);
 	if (ctx.cur_page) {
@@ -464,15 +438,8 @@ done:
 			unlock_page(ctx.cur_page);
 		put_page(ctx.cur_page);
 	}
-
-	/*
-	 * Check that we didn't lose a page due to the arcance calling
-	 * conventions..
-	 */
-	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
-	return ret;
 }
-EXPORT_SYMBOL_GPL(iomap_readpages);
+EXPORT_SYMBOL_GPL(iomap_readahead);
 
 /*
  * iomap_is_partially_uptodate checks whether blocks within a page are
@@ -554,14 +521,8 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage,
 	if (ret != MIGRATEPAGE_SUCCESS)
 		return ret;
 
-	if (page_has_private(page)) {
-		ClearPagePrivate(page);
-		get_page(newpage);
-		set_page_private(newpage, page_private(page));
-		set_page_private(page, 0);
-		put_page(page);
-		SetPagePrivate(newpage);
-	}
+	if (page_has_private(page))
+		attach_page_private(newpage, detach_page_private(page));
 
 	if (mode != MIGRATE_SYNC_NO_COPY)
 		migrate_page_copy(newpage, page);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 20dde5aadcdd..ec7b78e6feca 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -59,7 +59,7 @@ int iomap_dio_iopoll(struct kiocb *kiocb, bool spin)
 EXPORT_SYMBOL_GPL(iomap_dio_iopoll);
 
 static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
-		struct bio *bio)
+		struct bio *bio, loff_t pos)
 {
 	atomic_inc(&dio->ref);
 
@@ -67,7 +67,12 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
 		bio_set_polled(bio, dio->iocb);
 
 	dio->submit.last_queue = bdev_get_queue(iomap->bdev);
-	dio->submit.cookie = submit_bio(bio);
+	if (dio->dops && dio->dops->submit_io)
+		dio->submit.cookie = dio->dops->submit_io(
+				file_inode(dio->iocb->ki_filp),
+				iomap, bio, pos);
+	else
+		dio->submit.cookie = submit_bio(bio);
 }
 
 static ssize_t iomap_dio_complete(struct iomap_dio *dio)
@@ -191,7 +196,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
 	get_page(page);
 	__bio_add_page(bio, page, len, 0);
 	bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
-	iomap_dio_submit_bio(dio, iomap, bio);
+	iomap_dio_submit_bio(dio, iomap, bio, pos);
 }
 
 static loff_t
@@ -299,11 +304,11 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 		}
 
 		dio->size += n;
-		pos += n;
 		copied += n;
 
 		nr_pages = iov_iter_npages(dio->submit.iter, BIO_MAX_PAGES);
-		iomap_dio_submit_bio(dio, iomap, bio);
+		iomap_dio_submit_bio(dio, iomap, bio, pos);
+		pos += n;
 	} while (nr_pages);
 
 	/*
@@ -411,8 +416,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	struct blk_plug plug;
 	struct iomap_dio *dio;
 
-	lockdep_assert_held(&inode->i_rwsem);
-
 	if (!count)
 		return 0;
 
@@ -561,7 +564,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 			    !dio->submit.last_queue ||
 			    !blk_poll(dio->submit.last_queue,
 					 dio->submit.cookie, true))
-				io_schedule();
+				blk_io_schedule();
 		}
 		__set_current_state(TASK_RUNNING);
 	}
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index 4df19c66f597..5693a39d52fb 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -39,7 +39,7 @@ DEFINE_EVENT(iomap_readpage_class, name,	\
 	TP_PROTO(struct inode *inode, int nr_pages), \
 	TP_ARGS(inode, nr_pages))
 DEFINE_READPAGE_EVENT(iomap_readpage);
-DEFINE_READPAGE_EVENT(iomap_readpages);
+DEFINE_READPAGE_EVENT(iomap_readahead);
 
 DECLARE_EVENT_CLASS(iomap_range_class,
 	TP_PROTO(struct inode *inode, unsigned long off, unsigned int len),
diff --git a/fs/isofs/Kconfig b/fs/isofs/Kconfig
index 5e7419599f50..08ffd37b9bb8 100644
--- a/fs/isofs/Kconfig
+++ b/fs/isofs/Kconfig
@@ -8,7 +8,7 @@ config ISO9660_FS
 	  long Unix filenames and symbolic links are also supported by this
 	  driver.  If you have a CD-ROM drive and want to do more with it than
 	  just listen to audio CDs and watch its LEDs, say Y (and read
-	  <file:Documentation/filesystems/isofs.txt> and the CD-ROM-HOWTO,
+	  <file:Documentation/filesystems/isofs.rst> and the CD-ROM-HOWTO,
 	  available from <http://www.tldp.org/docs.html#howto>), thereby
 	  enlarging your kernel by about 27 KB; otherwise say N.
 
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 62c0462dc89f..d634561f871a 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -544,43 +544,41 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
 
 static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
 {
-	struct cdrom_multisession ms_info;
-	unsigned int vol_desc_start;
-	struct block_device *bdev = sb->s_bdev;
-	int i;
+	struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
+	unsigned int vol_desc_start = 0;
 
-	vol_desc_start=0;
-	ms_info.addr_format=CDROM_LBA;
 	if (session > 0) {
-		struct cdrom_tocentry Te;
-		Te.cdte_track=session;
-		Te.cdte_format=CDROM_LBA;
-		i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te);
-		if (!i) {
+		struct cdrom_tocentry te;
+
+		if (!cdi)
+			return 0;
+
+		te.cdte_track = session;
+		te.cdte_format = CDROM_LBA;
+		if (cdrom_read_tocentry(cdi, &te) == 0) {
 			printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n",
-				session, Te.cdte_addr.lba,
-				Te.cdte_ctrl&CDROM_DATA_TRACK);
-			if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4)
-				return Te.cdte_addr.lba;
+				session, te.cdte_addr.lba,
+				te.cdte_ctrl & CDROM_DATA_TRACK);
+			if ((te.cdte_ctrl & CDROM_DATA_TRACK) == 4)
+				return te.cdte_addr.lba;
 		}
 
 		printk(KERN_ERR "ISOFS: Invalid session number or type of track\n");
 	}
-	i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
-	if (session > 0)
-		printk(KERN_ERR "ISOFS: Invalid session number\n");
-#if 0
-	printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
-	if (i==0) {
-		printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
-		printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
-	}
-#endif
-	if (i==0)
+
+	if (cdi) {
+		struct cdrom_multisession ms_info;
+
+		ms_info.addr_format = CDROM_LBA;
+		if (cdrom_multisession(cdi, &ms_info) == 0) {
 #if WE_OBEY_THE_WRITTEN_STANDARDS
-		if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
+			/* necessary for a valid ms_info.addr */
+			if (ms_info.xa_flag)
 #endif
-			vol_desc_start=ms_info.addr.lba;
+				vol_desc_start = ms_info.addr.lba;
+		}
+	}
+
 	return vol_desc_start;
 }
 
@@ -1185,10 +1183,9 @@ static int isofs_readpage(struct file *file, struct page *page)
 	return mpage_readpage(page, isofs_get_block);
 }
 
-static int isofs_readpages(struct file *file, struct address_space *mapping,
-			struct list_head *pages, unsigned nr_pages)
+static void isofs_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, isofs_get_block);
+	mpage_readahead(rac, isofs_get_block);
 }
 
 static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
@@ -1198,7 +1195,7 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
 
 static const struct address_space_operations isofs_aops = {
 	.readpage = isofs_readpage,
-	.readpages = isofs_readpages,
+	.readahead = isofs_readahead,
 	.bmap = _isofs_bmap
 };
 
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 96bf33986d03..263f02ad8ebf 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -414,7 +414,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	 * jbd2_cleanup_journal_tail() doesn't get called all that often.
 	 */
 	if (journal->j_flags & JBD2_BARRIER)
-		blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
+		blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
 
 	return __jbd2_update_log_tail(journal, first_tid, blocknr);
 }
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index e855d8260433..6d2da8ad0e6f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -775,7 +775,7 @@ start_journal_io:
 	if (commit_transaction->t_need_data_flush &&
 	    (journal->j_fs_dev != journal->j_dev) &&
 	    (journal->j_flags & JBD2_BARRIER))
-		blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
+		blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
 
 	/* Done it all: now write the commit record asynchronously. */
 	if (jbd2_has_feature_async_commit(journal)) {
@@ -882,7 +882,7 @@ start_journal_io:
 	stats.run.rs_blocks_logged++;
 	if (jbd2_has_feature_async_commit(journal) &&
 	    journal->j_flags & JBD2_BARRIER) {
-		blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
+		blkdev_issue_flush(journal->j_dev, GFP_NOFS);
 	}
 
 	if (err)
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index a4967b27ffb6..2ed278f0dced 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -286,7 +286,7 @@ int jbd2_journal_recover(journal_t *journal)
 		err = err2;
 	/* Make sure all replayed data is on permanent storage */
 	if (journal->j_flags & JBD2_BARRIER) {
-		err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+		err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL);
 		if (!err)
 			err = err2;
 	}
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9486afcdac76..6f65bfa9f18d 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -296,10 +296,9 @@ static int jfs_readpage(struct file *file, struct page *page)
 	return mpage_readpage(page, jfs_get_block);
 }
 
-static int jfs_readpages(struct file *file, struct address_space *mapping,
-		struct list_head *pages, unsigned nr_pages)
+static void jfs_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, jfs_get_block);
+	mpage_readahead(rac, jfs_get_block);
 }
 
 static void jfs_write_failed(struct address_space *mapping, loff_t to)
@@ -358,7 +357,7 @@ static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 const struct address_space_operations jfs_aops = {
 	.readpage	= jfs_readpage,
-	.readpages	= jfs_readpages,
+	.readahead	= jfs_readahead,
 	.writepage	= jfs_writepage,
 	.writepages	= jfs_writepages,
 	.write_begin	= jfs_write_begin,
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 3acc954f7c04..837d42f61464 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -2964,7 +2964,7 @@ struct jfs_dirent {
 	loff_t position;
 	int ino;
 	u16 name_len;
-	char name[0];
+	char name[];
 };
 
 /*
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index f0558b3348da..c50167a7bc50 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -17,12 +17,12 @@ struct jfs_ea {
 	u8 flag;	/* Unused? */
 	u8 namelen;	/* Length of name */
 	__le16 valuelen;	/* Length of value */
-	char name[0];	/* Attribute name (includes null-terminator) */
+	char name[];	/* Attribute name (includes null-terminator) */
 };			/* Value immediately follows name */
 
 struct jfs_ea_list {
 	__le32 size;		/* overall size */
-	struct jfs_ea ea[0];	/* Variable length list */
+	struct jfs_ea ea[];	/* Variable length list */
 };
 
 /* Macros for defining maxiumum number of bytes supported for EAs */
diff --git a/fs/libfs.c b/fs/libfs.c
index 3759fbacf522..4d08edf19c78 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1113,7 +1113,7 @@ int generic_file_fsync(struct file *file, loff_t start, loff_t end,
 	err = __generic_file_fsync(file, start, end, datasync);
 	if (err)
 		return err;
-	return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+	return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
 }
 EXPORT_SYMBOL(generic_file_fsync);
 
diff --git a/fs/locks.c b/fs/locks.c
index b8a31c1c4fff..1d4f4d5da704 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -61,7 +61,7 @@
  *
  *  Initial implementation of mandatory locks. SunOS turned out to be
  *  a rotten model, so I implemented the "obvious" semantics.
- *  See 'Documentation/filesystems/mandatory-locking.txt' for details.
+ *  See 'Documentation/filesystems/mandatory-locking.rst' for details.
  *  Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
  *
  *  Don't allow mandatory locks on mmap()'ed files. Added simple functions to
diff --git a/fs/mount.h b/fs/mount.h
index 711a4093e475..c7abb7b394d8 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -9,7 +9,13 @@ struct mnt_namespace {
 	atomic_t		count;
 	struct ns_common	ns;
 	struct mount *	root;
+	/*
+	 * Traversal and modification of .list is protected by either
+	 * - taking namespace_sem for write, OR
+	 * - taking namespace_sem for read AND taking .ns_lock.
+	 */
 	struct list_head	list;
+	spinlock_t		ns_lock;
 	struct user_namespace	*user_ns;
 	struct ucounts		*ucounts;
 	u64			seq;	/* Sequence number to prevent loops */
@@ -133,9 +139,7 @@ struct proc_mounts {
 	struct mnt_namespace *ns;
 	struct path root;
 	int (*show)(struct seq_file *, struct vfsmount *);
-	void *cached_mount;
-	u64 cached_event;
-	loff_t cached_index;
+	struct mount cursor;
 };
 
 extern const struct seq_operations mounts_op;
@@ -153,3 +157,5 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
 {
 	return ns->seq == 0;
 }
+
+extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
diff --git a/fs/mpage.c b/fs/mpage.c
index ccba3c4c4479..830e6cc2a9e7 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -91,7 +91,7 @@ mpage_alloc(struct block_device *bdev,
 }
 
 /*
- * support function for mpage_readpages.  The fs supplied get_block might
+ * support function for mpage_readahead.  The fs supplied get_block might
  * return an up to date buffer.  This is used to map that buffer into
  * the page, which allows readpage to avoid triggering a duplicate call
  * to get_block.
@@ -338,13 +338,8 @@ confused:
 }
 
 /**
- * mpage_readpages - populate an address space with some pages & start reads against them
- * @mapping: the address_space
- * @pages: The address of a list_head which contains the target pages.  These
- *   pages have their ->index populated and are otherwise uninitialised.
- *   The page at @pages->prev has the lowest file offset, and reads should be
- *   issued in @pages->prev to @pages->next order.
- * @nr_pages: The number of pages at *@pages
+ * mpage_readahead - start reads against pages
+ * @rac: Describes which pages to read.
  * @get_block: The filesystem's block mapper function.
  *
  * This function walks the pages and the blocks within each page, building and
@@ -381,36 +376,25 @@ confused:
  *
  * This all causes the disk requests to be issued in the correct order.
  */
-int
-mpage_readpages(struct address_space *mapping, struct list_head *pages,
-				unsigned nr_pages, get_block_t get_block)
+void mpage_readahead(struct readahead_control *rac, get_block_t get_block)
 {
+	struct page *page;
 	struct mpage_readpage_args args = {
 		.get_block = get_block,
 		.is_readahead = true,
 	};
-	unsigned page_idx;
-
-	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
-		struct page *page = lru_to_page(pages);
 
+	while ((page = readahead_page(rac))) {
 		prefetchw(&page->flags);
-		list_del(&page->lru);
-		if (!add_to_page_cache_lru(page, mapping,
-					page->index,
-					readahead_gfp_mask(mapping))) {
-			args.page = page;
-			args.nr_pages = nr_pages - page_idx;
-			args.bio = do_mpage_readpage(&args);
-		}
+		args.page = page;
+		args.nr_pages = readahead_count(rac);
+		args.bio = do_mpage_readpage(&args);
 		put_page(page);
 	}
-	BUG_ON(!list_empty(pages));
 	if (args.bio)
 		mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio);
-	return 0;
 }
-EXPORT_SYMBOL(mpage_readpages);
+EXPORT_SYMBOL(mpage_readahead);
 
 /*
  * This isn't called much at all
@@ -563,7 +547,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 		 * Page has buffers, but they are all unmapped. The page was
 		 * created by pagein or read over a hole which was handled by
 		 * block_read_full_page().  If this address_space is also
-		 * using mpage_readpages then this can rarely happen.
+		 * using mpage_readahead then this can rarely happen.
 		 */
 		goto confused;
 	}
diff --git a/fs/namei.c b/fs/namei.c
index a320371899cf..d81f73ff1a8b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3505,12 +3505,14 @@ EXPORT_SYMBOL(user_path_create);
 
 int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
+	bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
 	int error = may_create(dir, dentry);
 
 	if (error)
 		return error;
 
-	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
+	    !capable(CAP_MKNOD))
 		return -EPERM;
 
 	if (!dir->i_op->mknod)
@@ -4345,9 +4347,6 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
 	    (flags & RENAME_EXCHANGE))
 		return -EINVAL;
 
-	if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
-		return -EPERM;
-
 	if (flags & RENAME_EXCHANGE)
 		target_flags = 0;
 
@@ -4483,20 +4482,6 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
 	return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
 
-int vfs_whiteout(struct inode *dir, struct dentry *dentry)
-{
-	int error = may_create(dir, dentry);
-	if (error)
-		return error;
-
-	if (!dir->i_op->mknod)
-		return -EPERM;
-
-	return dir->i_op->mknod(dir, dentry,
-				S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
-}
-EXPORT_SYMBOL(vfs_whiteout);
-
 int readlink_copy(char __user *buffer, int buflen, const char *link)
 {
 	int len = PTR_ERR(link);
diff --git a/fs/namespace.c b/fs/namespace.c
index a28e4db075ed..6d499ab254b7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -648,6 +648,21 @@ struct vfsmount *lookup_mnt(const struct path *path)
 	return m;
 }
 
+static inline void lock_ns_list(struct mnt_namespace *ns)
+{
+	spin_lock(&ns->ns_lock);
+}
+
+static inline void unlock_ns_list(struct mnt_namespace *ns)
+{
+	spin_unlock(&ns->ns_lock);
+}
+
+static inline bool mnt_is_cursor(struct mount *mnt)
+{
+	return mnt->mnt.mnt_flags & MNT_CURSOR;
+}
+
 /*
  * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
  *                         current mount namespace.
@@ -673,11 +688,15 @@ bool __is_local_mountpoint(struct dentry *dentry)
 		goto out;
 
 	down_read(&namespace_sem);
+	lock_ns_list(ns);
 	list_for_each_entry(mnt, &ns->list, mnt_list) {
+		if (mnt_is_cursor(mnt))
+			continue;
 		is_covered = (mnt->mnt_mountpoint == dentry);
 		if (is_covered)
 			break;
 	}
+	unlock_ns_list(ns);
 	up_read(&namespace_sem);
 out:
 	return is_covered;
@@ -1245,46 +1264,71 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
 }
 
 #ifdef CONFIG_PROC_FS
+static struct mount *mnt_list_next(struct mnt_namespace *ns,
+				   struct list_head *p)
+{
+	struct mount *mnt, *ret = NULL;
+
+	lock_ns_list(ns);
+	list_for_each_continue(p, &ns->list) {
+		mnt = list_entry(p, typeof(*mnt), mnt_list);
+		if (!mnt_is_cursor(mnt)) {
+			ret = mnt;
+			break;
+		}
+	}
+	unlock_ns_list(ns);
+
+	return ret;
+}
+
 /* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
 	struct proc_mounts *p = m->private;
+	struct list_head *prev;
 
 	down_read(&namespace_sem);
-	if (p->cached_event == p->ns->event) {
-		void *v = p->cached_mount;
-		if (*pos == p->cached_index)
-			return v;
-		if (*pos == p->cached_index + 1) {
-			v = seq_list_next(v, &p->ns->list, &p->cached_index);
-			return p->cached_mount = v;
-		}
+	if (!*pos) {
+		prev = &p->ns->list;
+	} else {
+		prev = &p->cursor.mnt_list;
+
+		/* Read after we'd reached the end? */
+		if (list_empty(prev))
+			return NULL;
 	}
 
-	p->cached_event = p->ns->event;
-	p->cached_mount = seq_list_start(&p->ns->list, *pos);
-	p->cached_index = *pos;
-	return p->cached_mount;
+	return mnt_list_next(p->ns, prev);
 }
 
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct proc_mounts *p = m->private;
+	struct mount *mnt = v;
 
-	p->cached_mount = seq_list_next(v, &p->ns->list, pos);
-	p->cached_index = *pos;
-	return p->cached_mount;
+	++*pos;
+	return mnt_list_next(p->ns, &mnt->mnt_list);
 }
 
 static void m_stop(struct seq_file *m, void *v)
 {
+	struct proc_mounts *p = m->private;
+	struct mount *mnt = v;
+
+	lock_ns_list(p->ns);
+	if (mnt)
+		list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
+	else
+		list_del_init(&p->cursor.mnt_list);
+	unlock_ns_list(p->ns);
 	up_read(&namespace_sem);
 }
 
 static int m_show(struct seq_file *m, void *v)
 {
 	struct proc_mounts *p = m->private;
-	struct mount *r = list_entry(v, struct mount, mnt_list);
+	struct mount *r = v;
 	return p->show(m, &r->mnt);
 }
 
@@ -1294,6 +1338,15 @@ const struct seq_operations mounts_op = {
 	.stop	= m_stop,
 	.show	= m_show,
 };
+
+void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
+{
+	down_read(&namespace_sem);
+	lock_ns_list(ns);
+	list_del(&cursor->mnt_list);
+	unlock_ns_list(ns);
+	up_read(&namespace_sem);
+}
 #endif  /* CONFIG_PROC_FS */
 
 /**
@@ -1733,6 +1786,11 @@ static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
 	return container_of(ns, struct mnt_namespace, ns);
 }
 
+struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
+{
+	return &mnt->ns;
+}
+
 static bool mnt_ns_loop(struct dentry *dentry)
 {
 	/* Could bind mounting the mount namespace inode cause a
@@ -3202,6 +3260,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
 	atomic_set(&new_ns->count, 1);
 	INIT_LIST_HEAD(&new_ns->list);
 	init_waitqueue_head(&new_ns->poll);
+	spin_lock_init(&new_ns->ns_lock);
 	new_ns->user_ns = get_user_ns(user_ns);
 	new_ns->ucounts = ucounts;
 	return new_ns;
@@ -3595,7 +3654,7 @@ EXPORT_SYMBOL(path_is_under);
  * file system may be mounted on put_old. After all, new_root is a mountpoint.
  *
  * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
- * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
+ * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
  * in this situation.
  *
  * Notes:
@@ -3842,10 +3901,14 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
 	bool visible = false;
 
 	down_read(&namespace_sem);
+	lock_ns_list(ns);
 	list_for_each_entry(mnt, &ns->list, mnt_list) {
 		struct mount *child;
 		int mnt_flags;
 
+		if (mnt_is_cursor(mnt))
+			continue;
+
 		if (mnt->mnt.mnt_sb->s_type != sb->s_type)
 			continue;
 
@@ -3893,6 +3956,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
 	next:	;
 	}
 found:
+	unlock_ns_list(ns);
 	up_read(&namespace_sem);
 	return visible;
 }
@@ -3954,16 +4018,18 @@ static void mntns_put(struct ns_common *ns)
 	put_mnt_ns(to_mnt_ns(ns));
 }
 
-static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int mntns_install(struct nsset *nsset, struct ns_common *ns)
 {
-	struct fs_struct *fs = current->fs;
+	struct nsproxy *nsproxy = nsset->nsproxy;
+	struct fs_struct *fs = nsset->fs;
 	struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
+	struct user_namespace *user_ns = nsset->cred->user_ns;
 	struct path root;
 	int err;
 
 	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
-	    !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
-	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+	    !ns_capable(user_ns, CAP_SYS_CHROOT) ||
+	    !ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	if (is_anon_ns(mnt_ns))
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 7a57ff2528af..8f7cff7a4293 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -582,7 +582,7 @@ retry:
 		if (!arg->layoutupdate_pages)
 			return -ENOMEM;
 
-		start_p = __vmalloc(buffer_size, GFP_NOFS, PAGE_KERNEL);
+		start_p = __vmalloc(buffer_size, GFP_NOFS);
 		if (!start_p) {
 			kfree(arg->layoutupdate_pages);
 			return -ENOMEM;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 1f32a9fbfdaf..6673a77884d9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -668,7 +668,8 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
 }
 
 /*
- * Record the page as unstable and mark its inode as dirty.
+ * Record the page as unstable (an extra writeback period) and mark its
+ * inode as dirty.
  */
 static inline
 void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
@@ -676,8 +677,11 @@ void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
 	if (!cinfo->dreq) {
 		struct inode *inode = page_file_mapping(page)->host;
 
-		inc_node_page_state(page, NR_UNSTABLE_NFS);
-		inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE);
+		/* This page is really still in write-back - just that the
+		 * writeback is happening on the server now.
+		 */
+		inc_node_page_state(page, NR_WRITEBACK);
+		inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
 		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 	}
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1e767f779c49..639c34fec04a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -946,9 +946,9 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 static void
 nfs_clear_page_commit(struct page *page)
 {
-	dec_node_page_state(page, NR_UNSTABLE_NFS);
+	dec_node_page_state(page, NR_WRITEBACK);
 	dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
-		    WB_RECLAIMABLE);
+		    WB_WRITEBACK);
 }
 
 /* Called holding the request lock on @req */
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index a8fb18609146..9e40dfecf1b1 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -127,16 +127,8 @@ nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
  		goto out;
 	}
 
-	{
-		SHASH_DESC_ON_STACK(desc, tfm);
-
-		desc->tfm = tfm;
-
-		status = crypto_shash_digest(desc, clname->data, clname->len,
-					     cksum.data);
-		shash_desc_zero(desc);
-	}
-
+	status = crypto_shash_tfm_digest(tfm, clname->data, clname->len,
+					 cksum.data);
 	if (status)
 		goto out;
 
@@ -1148,7 +1140,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp)
 	struct crypto_shash *tfm = cn->cn_tfm;
 	struct xdr_netobj cksum;
 	char *principal = NULL;
-	SHASH_DESC_ON_STACK(desc, tfm);
 
 	/* Don't upcall if it's already stored */
 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
@@ -1170,16 +1161,14 @@ nfsd4_cld_create_v2(struct nfs4_client *clp)
 	else if (clp->cl_cred.cr_principal)
 		principal = clp->cl_cred.cr_principal;
 	if (principal) {
-		desc->tfm = tfm;
 		cksum.len = crypto_shash_digestsize(tfm);
 		cksum.data = kmalloc(cksum.len, GFP_KERNEL);
 		if (cksum.data == NULL) {
 			ret = -ENOMEM;
 			goto out;
 		}
-		ret = crypto_shash_digest(desc, principal, strlen(principal),
-					  cksum.data);
-		shash_desc_zero(desc);
+		ret = crypto_shash_tfm_digest(tfm, principal, strlen(principal),
+					      cksum.data);
 		if (ret) {
 			kfree(cksum.data);
 			goto out;
@@ -1343,7 +1332,6 @@ nfsd4_cld_check_v2(struct nfs4_client *clp)
 	struct crypto_shash *tfm = cn->cn_tfm;
 	struct xdr_netobj cksum;
 	char *principal = NULL;
-	SHASH_DESC_ON_STACK(desc, tfm);
 
 	/* did we already find that this client is stable? */
 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
@@ -1381,14 +1369,12 @@ found:
 			principal = clp->cl_cred.cr_principal;
 		if (principal == NULL)
 			return -ENOENT;
-		desc->tfm = tfm;
 		cksum.len = crypto_shash_digestsize(tfm);
 		cksum.data = kmalloc(cksum.len, GFP_KERNEL);
 		if (cksum.data == NULL)
 			return -ENOENT;
-		status = crypto_shash_digest(desc, principal, strlen(principal),
-					     cksum.data);
-		shash_desc_zero(desc);
+		status = crypto_shash_tfm_digest(tfm, principal,
+						 strlen(principal), cksum.data);
 		if (status) {
 			kfree(cksum.data);
 			return -ENOENT;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0aa02eb18bd3..c3fbab1753ec 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -979,12 +979,13 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
 
 	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
 		/*
-		 * We want less throttling in balance_dirty_pages()
-		 * and shrink_inactive_list() so that nfs to
+		 * We want throttling in balance_dirty_pages()
+		 * and shrink_inactive_list() to only consider
+		 * the backingdev we are writing to, so that nfs to
 		 * localhost doesn't cause nfsd to lock up due to all
 		 * the client's dirty pages or its congested queue.
 		 */
-		current->flags |= PF_LESS_THROTTLE;
+		current->flags |= PF_LOCAL_THROTTLE;
 
 	exp = fhp->fh_export;
 	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
@@ -1037,7 +1038,7 @@ out_nfserr:
 		nfserr = nfserrno(host_err);
 	}
 	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
-		current_restore_flags(pflags, PF_LESS_THROTTLE);
+		current_restore_flags(pflags, PF_LOCAL_THROTTLE);
 	return nfserr;
 }
 
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 671085512e0f..ceeb3b441844 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -145,18 +145,9 @@ static int nilfs_readpage(struct file *file, struct page *page)
 	return mpage_readpage(page, nilfs_get_block);
 }
 
-/**
- * nilfs_readpages() - implement readpages() method of nilfs_aops {}
- * address_space_operations.
- * @file - file struct of the file to be read
- * @mapping - address_space struct used for reading multiple pages
- * @pages - the pages to be read
- * @nr_pages - number of pages to be read
- */
-static int nilfs_readpages(struct file *file, struct address_space *mapping,
-			   struct list_head *pages, unsigned int nr_pages)
+static void nilfs_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block);
+	mpage_readahead(rac, nilfs_get_block);
 }
 
 static int nilfs_writepages(struct address_space *mapping,
@@ -308,7 +299,7 @@ const struct address_space_operations nilfs_aops = {
 	.readpage		= nilfs_readpage,
 	.writepages		= nilfs_writepages,
 	.set_page_dirty		= nilfs_set_page_dirty,
-	.readpages		= nilfs_readpages,
+	.readahead		= nilfs_readahead,
 	.write_begin		= nilfs_write_begin,
 	.write_end		= nilfs_write_end,
 	/* .releasepage		= nilfs_releasepage, */
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 380a543c5b19..b55cdeb4d169 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -375,7 +375,7 @@ static inline int nilfs_flush_device(struct the_nilfs *nilfs)
 	 */
 	smp_wmb();
 
-	err = blkdev_issue_flush(nilfs->ns_bdev, GFP_KERNEL, NULL);
+	err = blkdev_issue_flush(nilfs->ns_bdev, GFP_KERNEL);
 	if (err != -EIO)
 		err = 0;
 	return err;
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 6736e47d94d8..7715fadd5fff 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -12,6 +12,6 @@ config INOTIFY_USER
 	  new features including multiple file events, one-shot support, and
 	  unmount notification.
 
-	  For more information, see <file:Documentation/filesystems/inotify.txt>
+	  For more information, see <file:Documentation/filesystems/inotify.rst>
 
 	  If unsure, say Y.
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 4f1205725cfe..800c1d0eb0d0 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -229,6 +229,11 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task,
 	return res;
 }
 
+bool proc_ns_file(const struct file *file)
+{
+	return file->f_op == &ns_file_operations;
+}
+
 struct file *proc_ns_fget(int fd)
 {
 	struct file *file;
diff --git a/fs/ntfs/Kconfig b/fs/ntfs/Kconfig
index de9fb5cff226..1667a7e590d8 100644
--- a/fs/ntfs/Kconfig
+++ b/fs/ntfs/Kconfig
@@ -18,7 +18,7 @@ config NTFS_FS
 	  the Linux 2.4 kernel series is separately available as a patch
 	  from the project web site.
 
-	  For more information see <file:Documentation/filesystems/ntfs.txt>
+	  For more information see <file:Documentation/filesystems/ntfs.rst>
 	  and <http://www.linux-ntfs.org/>.
 
 	  To compile this file system support as a module, choose M here: the
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 554b744f41bf..bb0a43860ad2 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1732,7 +1732,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
 				bh = bh->b_this_page;
 			} while (bh);
 			tail->b_this_page = head;
-			attach_page_buffers(page, head);
+			attach_page_private(page, head);
 		} else
 			buffers_to_free = bh;
 	}
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index 842b0bfc3ac9..7068425735f1 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -34,7 +34,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
 		/* return (void *)__get_free_page(gfp_mask); */
 	}
 	if (likely((size >> PAGE_SHIFT) < totalram_pages()))
-		return __vmalloc(size, gfp_mask, PAGE_KERNEL);
+		return __vmalloc(size, gfp_mask);
 	return NULL;
 }
 
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 3aac5c917afe..fbb9f1bc623d 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -504,7 +504,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 			bh = bh->b_this_page;
 		} while (bh);
 		tail->b_this_page = head;
-		attach_page_buffers(page, head);
+		attach_page_private(page, head);
 	}
 	bh = head = page_buffers(page);
 	BUG_ON(!bh);
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 46bba20da6b5..1177c33df895 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -21,7 +21,7 @@ config OCFS2_FS
 	  OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
 
 	  For more information on OCFS2, see the file
-	  <file:Documentation/filesystems/ocfs2.txt>.
+	  <file:Documentation/filesystems/ocfs2.rst>.
 
 config OCFS2_FS_O2CB
 	tristate "O2CB Kernelspace Clustering"
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3a67a6518ddf..3bfb4147895a 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -350,14 +350,11 @@ out:
  * grow out to a tree. If need be, detecting boundary extents could
  * trivially be added in a future version of ocfs2_get_block().
  */
-static int ocfs2_readpages(struct file *filp, struct address_space *mapping,
-			   struct list_head *pages, unsigned nr_pages)
+static void ocfs2_readahead(struct readahead_control *rac)
 {
-	int ret, err = -EIO;
-	struct inode *inode = mapping->host;
+	int ret;
+	struct inode *inode = rac->mapping->host;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	loff_t start;
-	struct page *last;
 
 	/*
 	 * Use the nonblocking flag for the dlm code to avoid page
@@ -365,36 +362,31 @@ static int ocfs2_readpages(struct file *filp, struct address_space *mapping,
 	 */
 	ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK);
 	if (ret)
-		return err;
+		return;
 
-	if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
-		ocfs2_inode_unlock(inode, 0);
-		return err;
-	}
+	if (down_read_trylock(&oi->ip_alloc_sem) == 0)
+		goto out_unlock;
 
 	/*
 	 * Don't bother with inline-data. There isn't anything
 	 * to read-ahead in that case anyway...
 	 */
 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
-		goto out_unlock;
+		goto out_up;
 
 	/*
 	 * Check whether a remote node truncated this file - we just
 	 * drop out in that case as it's not worth handling here.
 	 */
-	last = lru_to_page(pages);
-	start = (loff_t)last->index << PAGE_SHIFT;
-	if (start >= i_size_read(inode))
-		goto out_unlock;
+	if (readahead_pos(rac) >= i_size_read(inode))
+		goto out_up;
 
-	err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block);
+	mpage_readahead(rac, ocfs2_get_block);
 
-out_unlock:
+out_up:
 	up_read(&oi->ip_alloc_sem);
+out_unlock:
 	ocfs2_inode_unlock(inode, 0);
-
-	return err;
 }
 
 /* Note: Because we don't support holes, our allocation has
@@ -2474,7 +2466,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 const struct address_space_operations ocfs2_aops = {
 	.readpage		= ocfs2_readpage,
-	.readpages		= ocfs2_readpages,
+	.readahead		= ocfs2_readahead,
 	.writepage		= ocfs2_writepage,
 	.write_begin		= ocfs2_write_begin,
 	.write_end		= ocfs2_write_end,
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 55a6512e9fde..f105746063ed 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2760,6 +2760,7 @@ leave:
  * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped
  */
 int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
+	__must_hold(&dlm->spinlock)
 {
 	int ret;
 	int lock_dropped = 0;
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 1de77f1a600b..ea868c6f9800 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -227,7 +227,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
 			       loff_t *ppos)
 {
 	int bytes_left;
-	ssize_t readlen, got;
+	ssize_t got;
 	char *lvb_buf;
 	struct inode *inode = file_inode(filp);
 
@@ -237,36 +237,31 @@ static ssize_t dlmfs_file_read(struct file *filp,
 	if (*ppos >= i_size_read(inode))
 		return 0;
 
+	/* don't read past the lvb */
+	if (count > i_size_read(inode) - *ppos)
+		count = i_size_read(inode) - *ppos;
+
 	if (!count)
 		return 0;
 
-	if (!access_ok(buf, count))
-		return -EFAULT;
-
-	/* don't read past the lvb */
-	if ((count + *ppos) > i_size_read(inode))
-		readlen = i_size_read(inode) - *ppos;
-	else
-		readlen = count;
-
-	lvb_buf = kmalloc(readlen, GFP_NOFS);
+	lvb_buf = kmalloc(count, GFP_NOFS);
 	if (!lvb_buf)
 		return -ENOMEM;
 
-	got = user_dlm_read_lvb(inode, lvb_buf, readlen);
+	got = user_dlm_read_lvb(inode, lvb_buf, count);
 	if (got) {
-		BUG_ON(got != readlen);
-		bytes_left = __copy_to_user(buf, lvb_buf, readlen);
-		readlen -= bytes_left;
+		BUG_ON(got != count);
+		bytes_left = copy_to_user(buf, lvb_buf, count);
+		count -= bytes_left;
 	} else
-		readlen = 0;
+		count = 0;
 
 	kfree(lvb_buf);
 
-	*ppos = *ppos + readlen;
+	*ppos = *ppos + count;
 
-	mlog(0, "read %zd bytes\n", readlen);
-	return readlen;
+	mlog(0, "read %zu bytes\n", count);
+	return count;
 }
 
 static ssize_t dlmfs_file_write(struct file *filp,
@@ -291,9 +286,6 @@ static ssize_t dlmfs_file_write(struct file *filp,
 	if (!count)
 		return 0;
 
-	if (!access_ok(buf, count))
-		return -EFAULT;
-
 	lvb_buf = kmalloc(count, GFP_NOFS);
 	if (!lvb_buf)
 		return -ENOMEM;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6cd5e4924e4d..85979e2214b3 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -194,7 +194,7 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
 		needs_barrier = true;
 	err = jbd2_complete_transaction(journal, commit_tid);
 	if (needs_barrier) {
-		ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+		ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
 		if (!err)
 			err = ret;
 	}
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 9150cfa4df7d..ee5d98516212 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -279,6 +279,7 @@ enum ocfs2_mount_options
 	OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15,  /* Journal Async Commit */
 	OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
 	OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
+	OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
 };
 
 #define OCFS2_OSB_SOFT_RO	0x0001
@@ -673,7 +674,8 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
 
 static inline int ocfs2_mount_local(struct ocfs2_super *osb)
 {
-	return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
+	return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
+		|| (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
 }
 
 static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 8caeceeaeda7..4da0e4b1e79b 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -254,14 +254,16 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
 	int i, ret = -ENOSPC;
 
 	if ((preferred >= 0) && (preferred < si->si_num_slots)) {
-		if (!si->si_slots[preferred].sl_valid) {
+		if (!si->si_slots[preferred].sl_valid ||
+		    !si->si_slots[preferred].sl_node_num) {
 			ret = preferred;
 			goto out;
 		}
 	}
 
 	for(i = 0; i < si->si_num_slots; i++) {
-		if (!si->si_slots[i].sl_valid) {
+		if (!si->si_slots[i].sl_valid ||
+		    !si->si_slots[i].sl_node_num) {
 			ret = i;
 			break;
 		}
@@ -456,24 +458,30 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
 	spin_lock(&osb->osb_lock);
 	ocfs2_update_slot_info(si);
 
-	/* search for ourselves first and take the slot if it already
-	 * exists. Perhaps we need to mark this in a variable for our
-	 * own journal recovery? Possibly not, though we certainly
-	 * need to warn to the user */
-	slot = __ocfs2_node_num_to_slot(si, osb->node_num);
-	if (slot < 0) {
-		/* if no slot yet, then just take 1st available
-		 * one. */
-		slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+	if (ocfs2_mount_local(osb))
+		/* use slot 0 directly in local mode */
+		slot = 0;
+	else {
+		/* search for ourselves first and take the slot if it already
+		 * exists. Perhaps we need to mark this in a variable for our
+		 * own journal recovery? Possibly not, though we certainly
+		 * need to warn to the user */
+		slot = __ocfs2_node_num_to_slot(si, osb->node_num);
 		if (slot < 0) {
-			spin_unlock(&osb->osb_lock);
-			mlog(ML_ERROR, "no free slots available!\n");
-			status = -EINVAL;
-			goto bail;
-		}
-	} else
-		printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
-		       "allocated to this node!\n", slot, osb->dev_str);
+			/* if no slot yet, then just take 1st available
+			 * one. */
+			slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+			if (slot < 0) {
+				spin_unlock(&osb->osb_lock);
+				mlog(ML_ERROR, "no free slots available!\n");
+				status = -EINVAL;
+				goto bail;
+			}
+		} else
+			printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
+			       "already allocated to this node!\n",
+			       slot, osb->dev_str);
+	}
 
 	ocfs2_set_slot(si, slot, osb->node_num);
 	osb->slot_num = slot;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ac61eeaf3837..71ea9ce71a6b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -175,6 +175,7 @@ enum {
 	Opt_dir_resv_level,
 	Opt_journal_async_commit,
 	Opt_err_cont,
+	Opt_nocluster,
 	Opt_err,
 };
 
@@ -208,6 +209,7 @@ static const match_table_t tokens = {
 	{Opt_dir_resv_level, "dir_resv_level=%u"},
 	{Opt_journal_async_commit, "journal_async_commit"},
 	{Opt_err_cont, "errors=continue"},
+	{Opt_nocluster, "nocluster"},
 	{Opt_err, NULL}
 };
 
@@ -619,6 +621,13 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
 		goto out;
 	}
 
+	tmp = OCFS2_MOUNT_NOCLUSTER;
+	if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+		ret = -EINVAL;
+		mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
+		goto out;
+	}
+
 	tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
 		OCFS2_MOUNT_HB_NONE;
 	if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
@@ -859,6 +868,7 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
 	}
 
 	if (ocfs2_userspace_stack(osb) &&
+	    !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
 	    strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
 		    OCFS2_STACK_LABEL_LEN)) {
 		mlog(ML_ERROR,
@@ -1139,6 +1149,11 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	       osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
 	       "ordered");
 
+	if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
+	   !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
+		printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
+		       "without cluster aware mode.\n", osb->dev_str);
+
 	atomic_set(&osb->vol_state, VOLUME_MOUNTED);
 	wake_up(&osb->osb_mount_event);
 
@@ -1445,6 +1460,9 @@ static int ocfs2_parse_options(struct super_block *sb,
 		case Opt_journal_async_commit:
 			mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
 			break;
+		case Opt_nocluster:
+			mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
+			break;
 		default:
 			mlog(ML_ERROR,
 			     "Unrecognized mount option \"%s\" "
@@ -1556,6 +1574,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
 	if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
 		seq_printf(s, ",journal_async_commit");
 
+	if (opts & OCFS2_MOUNT_NOCLUSTER)
+		seq_printf(s, ",nocluster");
+
 	return 0;
 }
 
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index d640b9388238..d7b5f09d298c 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -289,10 +289,9 @@ static int omfs_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page, omfs_get_block);
 }
 
-static int omfs_readpages(struct file *file, struct address_space *mapping,
-		struct list_head *pages, unsigned nr_pages)
+static void omfs_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, omfs_get_block);
+	mpage_readahead(rac, omfs_get_block);
 }
 
 static int omfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -373,7 +372,7 @@ const struct inode_operations omfs_file_inops = {
 
 const struct address_space_operations omfs_aops = {
 	.readpage = omfs_readpage,
-	.readpages = omfs_readpages,
+	.readahead = omfs_readahead,
 	.writepage = omfs_writepage,
 	.writepages = omfs_writepages,
 	.write_begin = omfs_write_begin,
diff --git a/fs/open.c b/fs/open.c
index 719b320ede52..6cd48a61cda3 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -345,21 +345,14 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
  * We do this by temporarily clearing all FS-related capabilities and
  * switching the fsuid/fsgid around to the real ones.
  */
-long do_faccessat(int dfd, const char __user *filename, int mode)
+static const struct cred *access_override_creds(void)
 {
 	const struct cred *old_cred;
 	struct cred *override_cred;
-	struct path path;
-	struct inode *inode;
-	int res;
-	unsigned int lookup_flags = LOOKUP_FOLLOW;
-
-	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
-		return -EINVAL;
 
 	override_cred = prepare_creds();
 	if (!override_cred)
-		return -ENOMEM;
+		return NULL;
 
 	override_cred->fsuid = override_cred->uid;
 	override_cred->fsgid = override_cred->gid;
@@ -394,6 +387,38 @@ long do_faccessat(int dfd, const char __user *filename, int mode)
 	override_cred->non_rcu = 1;
 
 	old_cred = override_creds(override_cred);
+
+	/* override_cred() gets its own ref */
+	put_cred(override_cred);
+
+	return old_cred;
+}
+
+long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
+{
+	struct path path;
+	struct inode *inode;
+	int res;
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
+	const struct cred *old_cred = NULL;
+
+	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
+		return -EINVAL;
+
+	if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
+		return -EINVAL;
+
+	if (flags & AT_SYMLINK_NOFOLLOW)
+		lookup_flags &= ~LOOKUP_FOLLOW;
+	if (flags & AT_EMPTY_PATH)
+		lookup_flags |= LOOKUP_EMPTY;
+
+	if (!(flags & AT_EACCESS)) {
+		old_cred = access_override_creds();
+		if (!old_cred)
+			return -ENOMEM;
+	}
+
 retry:
 	res = user_path_at(dfd, filename, lookup_flags, &path);
 	if (res)
@@ -435,19 +460,26 @@ out_path_release:
 		goto retry;
 	}
 out:
-	revert_creds(old_cred);
-	put_cred(override_cred);
+	if (old_cred)
+		revert_creds(old_cred);
+
 	return res;
 }
 
 SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
 {
-	return do_faccessat(dfd, filename, mode);
+	return do_faccessat(dfd, filename, mode, 0);
+}
+
+SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
+		int, flags)
+{
+	return do_faccessat(dfd, filename, mode, flags);
 }
 
 SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
 {
-	return do_faccessat(AT_FDCWD, filename, mode);
+	return do_faccessat(AT_FDCWD, filename, mode, 0);
 }
 
 int ksys_chdir(const char __user *filename)
@@ -743,9 +775,8 @@ static int do_dentry_open(struct file *f,
 	path_get(&f->f_path);
 	f->f_inode = inode;
 	f->f_mapping = inode->i_mapping;
-
-	/* Ensure that we skip any errors that predate opening of the file */
 	f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
+	f->f_sb_err = file_sample_sb_err(f);
 
 	if (unlikely(f->f_flags & O_PATH)) {
 		f->f_mode = FMODE_PATH | FMODE_OPENED;
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 12ae630fbed7..48f0547d4850 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -62,12 +62,7 @@ static int orangefs_writepage_locked(struct page *page,
 	} else {
 		ret = 0;
 	}
-	if (wr) {
-		kfree(wr);
-		set_page_private(page, 0);
-		ClearPagePrivate(page);
-		put_page(page);
-	}
+	kfree(detach_page_private(page));
 	return ret;
 }
 
@@ -409,9 +404,7 @@ static int orangefs_write_begin(struct file *file,
 	wr->len = len;
 	wr->uid = current_fsuid();
 	wr->gid = current_fsgid();
-	SetPagePrivate(page);
-	set_page_private(page, (unsigned long)wr);
-	get_page(page);
+	attach_page_private(page, wr);
 okay:
 	return 0;
 }
@@ -459,18 +452,12 @@ static void orangefs_invalidatepage(struct page *page,
 	wr = (struct orangefs_write_range *)page_private(page);
 
 	if (offset == 0 && length == PAGE_SIZE) {
-		kfree((struct orangefs_write_range *)page_private(page));
-		set_page_private(page, 0);
-		ClearPagePrivate(page);
-		put_page(page);
+		kfree(detach_page_private(page));
 		return;
 	/* write range entirely within invalidate range (or equal) */
 	} else if (page_offset(page) + offset <= wr->pos &&
 	    wr->pos + wr->len <= page_offset(page) + offset + length) {
-		kfree((struct orangefs_write_range *)page_private(page));
-		set_page_private(page, 0);
-		ClearPagePrivate(page);
-		put_page(page);
+		kfree(detach_page_private(page));
 		/* XXX is this right? only caller in fs */
 		cancel_dirty_page(page);
 		return;
@@ -535,12 +522,7 @@ static int orangefs_releasepage(struct page *page, gfp_t foo)
 
 static void orangefs_freepage(struct page *page)
 {
-	if (PagePrivate(page)) {
-		kfree((struct orangefs_write_range *)page_private(page));
-		set_page_private(page, 0);
-		ClearPagePrivate(page);
-		put_page(page);
-	}
+	kfree(detach_page_private(page));
 }
 
 static int orangefs_launder_page(struct page *page)
@@ -740,9 +722,7 @@ vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf)
 	wr->len = PAGE_SIZE;
 	wr->uid = current_fsuid();
 	wr->gid = current_fsgid();
-	SetPagePrivate(page);
-	set_page_private(page, (unsigned long)wr);
-	get_page(page);
+	attach_page_private(page, wr);
 okay:
 
 	file_update_time(vmf->vma->vm_file);
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 714c14c47ca5..dd188c7996b3 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -9,7 +9,7 @@ config OVERLAY_FS
 	  'lower' filesystem is either hidden or, in the case of directories,
 	  merged with the 'upper' object.
 
-	  For more information see Documentation/filesystems/overlayfs.txt
+	  For more information see Documentation/filesystems/overlayfs.rst
 
 config OVERLAY_FS_REDIRECT_DIR
 	bool "Overlayfs: turn on redirect directory feature by default"
@@ -38,7 +38,7 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
 	  If backward compatibility is not an issue, then it is safe and
 	  recommended to say N here.
 
-	  For more information, see Documentation/filesystems/overlayfs.txt
+	  For more information, see Documentation/filesystems/overlayfs.rst
 
 	  If unsure, say Y.
 
@@ -103,7 +103,7 @@ config OVERLAY_FS_XINO_AUTO
 	  If compatibility with applications that expect 32bit inodes is not an
 	  issue, then it is safe and recommended to say Y here.
 
-	  For more information, see Documentation/filesystems/overlayfs.txt
+	  For more information, see Documentation/filesystems/overlayfs.rst
 
 	  If unsure, say N.
 
diff --git a/fs/pipe.c b/fs/pipe.c
index 16fb72e9abf7..c7c4fb5f345f 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -140,21 +140,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 		put_page(page);
 }
 
-static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
-			       struct pipe_buffer *buf)
+static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
+		struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
 
-	if (page_count(page) == 1) {
-		memcg_kmem_uncharge_page(page, 0);
-		__SetPageLocked(page);
-		return 0;
-	}
-	return 1;
+	if (page_count(page) != 1)
+		return false;
+	memcg_kmem_uncharge_page(page, 0);
+	__SetPageLocked(page);
+	return true;
 }
 
 /**
- * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
+ * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
  * @pipe:	the pipe that the buffer belongs to
  * @buf:	the buffer to attempt to steal
  *
@@ -165,8 +164,8 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
  *	he wishes; the typical use is insertion into a different file
  *	page cache.
  */
-int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
-			   struct pipe_buffer *buf)
+bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
+		struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
 
@@ -177,12 +176,11 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
 	 */
 	if (page_count(page) == 1) {
 		lock_page(page);
-		return 0;
+		return true;
 	}
-
-	return 1;
+	return false;
 }
-EXPORT_SYMBOL(generic_pipe_buf_steal);
+EXPORT_SYMBOL(generic_pipe_buf_try_steal);
 
 /**
  * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
@@ -201,22 +199,6 @@ bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 EXPORT_SYMBOL(generic_pipe_buf_get);
 
 /**
- * generic_pipe_buf_confirm - verify contents of the pipe buffer
- * @info:	the pipe that the buffer belongs to
- * @buf:	the buffer to confirm
- *
- * Description:
- *	This function does nothing, because the generic pipe code uses
- *	pages that are always good when inserted into the pipe.
- */
-int generic_pipe_buf_confirm(struct pipe_inode_info *info,
-			     struct pipe_buffer *buf)
-{
-	return 0;
-}
-EXPORT_SYMBOL(generic_pipe_buf_confirm);
-
-/**
  * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
  * @pipe:	the pipe that the buffer belongs to
  * @buf:	the buffer to put a reference to
@@ -231,48 +213,12 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe,
 }
 EXPORT_SYMBOL(generic_pipe_buf_release);
 
-/* New data written to a pipe may be appended to a buffer with this type. */
 static const struct pipe_buf_operations anon_pipe_buf_ops = {
-	.confirm = generic_pipe_buf_confirm,
-	.release = anon_pipe_buf_release,
-	.steal = anon_pipe_buf_steal,
-	.get = generic_pipe_buf_get,
-};
-
-static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = {
-	.confirm = generic_pipe_buf_confirm,
-	.release = anon_pipe_buf_release,
-	.steal = anon_pipe_buf_steal,
-	.get = generic_pipe_buf_get,
-};
-
-static const struct pipe_buf_operations packet_pipe_buf_ops = {
-	.confirm = generic_pipe_buf_confirm,
-	.release = anon_pipe_buf_release,
-	.steal = anon_pipe_buf_steal,
-	.get = generic_pipe_buf_get,
+	.release	= anon_pipe_buf_release,
+	.try_steal	= anon_pipe_buf_try_steal,
+	.get		= generic_pipe_buf_get,
 };
 
-/**
- * pipe_buf_mark_unmergeable - mark a &struct pipe_buffer as unmergeable
- * @buf:	the buffer to mark
- *
- * Description:
- *	This function ensures that no future writes will be merged into the
- *	given &struct pipe_buffer. This is necessary when multiple pipe buffers
- *	share the same backing page.
- */
-void pipe_buf_mark_unmergeable(struct pipe_buffer *buf)
-{
-	if (buf->ops == &anon_pipe_buf_ops)
-		buf->ops = &anon_pipe_buf_nomerge_ops;
-}
-
-static bool pipe_buf_can_merge(struct pipe_buffer *buf)
-{
-	return buf->ops == &anon_pipe_buf_ops;
-}
-
 /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
 static inline bool pipe_readable(const struct pipe_inode_info *pipe)
 {
@@ -478,7 +424,8 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 		struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
 		int offset = buf->offset + buf->len;
 
-		if (pipe_buf_can_merge(buf) && offset + chars <= PAGE_SIZE) {
+		if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
+		    offset + chars <= PAGE_SIZE) {
 			ret = pipe_buf_confirm(pipe, buf);
 			if (ret)
 				goto out;
@@ -541,11 +488,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			buf->ops = &anon_pipe_buf_ops;
 			buf->offset = 0;
 			buf->len = 0;
-			buf->flags = 0;
-			if (is_packetized(filp)) {
-				buf->ops = &packet_pipe_buf_ops;
+			if (is_packetized(filp))
 				buf->flags = PIPE_BUF_FLAG_PACKET;
-			}
+			else
+				buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
 			pipe->tmp_page = NULL;
 
 			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 27ef84d99f59..971a42f6357d 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -23,7 +23,7 @@ config PROC_FS
 	  /proc" or the equivalent line in /etc/fstab does the job.
 
 	  The /proc file system is explained in the file
-	  <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
+	  <file:Documentation/filesystems/proc.rst> and on the proc(5) manpage
 	  ("man 5 proc").
 
 	  This option will enlarge your kernel by about 67 KB. Several
@@ -95,7 +95,7 @@ config PROC_CHILDREN
 	default n
 	help
 	  Provides a fast way to retrieve first level children pids of a task. See
-	  <file:Documentation/filesystems/proc.txt> for more information.
+	  <file:Documentation/filesystems/proc.rst> for more information.
 
 	  Say Y if you are running any user-space software which takes benefit from
 	  this interface. For example, rkt is such a piece of software.
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..ecc63ce01be7 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,11 +103,14 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	show_val_kb(m, "SUnreclaim:     ", sunreclaim);
 	seq_printf(m, "KernelStack:    %8lu kB\n",
 		   global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+	seq_printf(m, "ShadowCallStack:%8lu kB\n",
+		   global_zone_page_state(NR_KERNEL_SCS_KB));
+#endif
 	show_val_kb(m, "PageTables:     ",
 		    global_zone_page_state(NR_PAGETABLE));
 
-	show_val_kb(m, "NFS_Unstable:   ",
-		    global_node_page_state(NR_UNSTABLE_NFS));
+	show_val_kb(m, "NFS_Unstable:   ", 0);
 	show_val_kb(m, "Bounce:         ",
 		    global_zone_page_state(NR_BOUNCE));
 	show_val_kb(m, "WritebackTmp:   ",
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8d382d4ec067..6ad407d5efe2 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -546,10 +546,17 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
 	struct mem_size_stats *mss = walk->private;
 	struct vm_area_struct *vma = walk->vma;
 	bool locked = !!(vma->vm_flags & VM_LOCKED);
-	struct page *page;
+	struct page *page = NULL;
 
-	/* FOLL_DUMP will return -EFAULT on huge zero page */
-	page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+	if (pmd_present(*pmd)) {
+		/* FOLL_DUMP will return -EFAULT on huge zero page */
+		page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+	} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
+		swp_entry_t entry = pmd_to_swp_entry(*pmd);
+
+		if (is_migration_entry(entry))
+			page = migration_entry_to_page(entry);
+	}
 	if (IS_ERR_OR_NULL(page))
 		return;
 	if (PageAnon(page))
@@ -578,8 +585,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 
 	ptl = pmd_trans_huge_lock(pmd, vma);
 	if (ptl) {
-		if (pmd_present(*pmd))
-			smaps_pmd_entry(pmd, addr, walk);
+		smaps_pmd_entry(pmd, addr, walk);
 		spin_unlock(ptl);
 		goto out;
 	}
@@ -622,9 +628,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		[ilog2(VM_GROWSDOWN)]	= "gd",
 		[ilog2(VM_PFNMAP)]	= "pf",
 		[ilog2(VM_DENYWRITE)]	= "dw",
-#ifdef CONFIG_X86_INTEL_MPX
-		[ilog2(VM_MPX)]		= "mp",
-#endif
 		[ilog2(VM_LOCKED)]	= "lo",
 		[ilog2(VM_IO)]		= "io",
 		[ilog2(VM_SEQ_READ)]	= "sr",
@@ -638,6 +641,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		[ilog2(VM_ARCH_1)]	= "ar",
 		[ilog2(VM_WIPEONFORK)]	= "wf",
 		[ilog2(VM_DONTDUMP)]	= "dd",
+#ifdef CONFIG_ARM64_BTI
+		[ilog2(VM_ARM64_BTI)]	= "bt",
+#endif
 #ifdef CONFIG_MEM_SOFT_DIRTY
 		[ilog2(VM_SOFTDIRTY)]	= "sd",
 #endif
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 273ee82d8aa9..e4d70c0dffe9 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -279,7 +279,8 @@ static int mounts_open_common(struct inode *inode, struct file *file,
 	p->ns = ns;
 	p->root = root;
 	p->show = show;
-	p->cached_event = ~0ULL;
+	INIT_LIST_HEAD(&p->cursor.mnt_list);
+	p->cursor.mnt.mnt_flags = MNT_CURSOR;
 
 	return 0;
 
@@ -296,6 +297,7 @@ static int mounts_release(struct inode *inode, struct file *file)
 	struct seq_file *m = file->private_data;
 	struct proc_mounts *p = m->private;
 	path_put(&p->root);
+	mnt_cursor_del(p->ns, &p->cursor);
 	put_mnt_ns(p->ns);
 	return seq_release_private(inode, file);
 }
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 8f0369aad22a..e16a49ebfe54 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -153,3 +153,112 @@ config PSTORE_RAM
 	  "ramoops.ko".
 
 	  For more information, see Documentation/admin-guide/ramoops.rst.
+
+config PSTORE_ZONE
+	tristate
+	depends on PSTORE
+	help
+	  The common layer for pstore/blk (and pstore/ram in the future)
+	  to manage storage in zones.
+
+config PSTORE_BLK
+	tristate "Log panic/oops to a block device"
+	depends on PSTORE
+	depends on BLOCK
+	select PSTORE_ZONE
+	default n
+	help
+	  This enables panic and oops message to be logged to a block dev
+	  where it can be read back at some later point.
+
+	  For more information, see Documentation/admin-guide/pstore-blk.rst
+
+	  If unsure, say N.
+
+config PSTORE_BLK_BLKDEV
+	string "block device identifier"
+	depends on PSTORE_BLK
+	default ""
+	help
+	  Which block device should be used for pstore/blk.
+
+	  It accepts the following variants:
+	  1) <hex_major><hex_minor> device number in hexadecimal representation,
+	     with no leading 0x, for example b302.
+	  2) /dev/<disk_name> represents the device name of disk
+	  3) /dev/<disk_name><decimal> represents the device name and number
+	     of partition - device number of disk plus the partition number
+	  4) /dev/<disk_name>p<decimal> - same as the above, this form is
+	     used when disk name of partitioned disk ends with a digit.
+	  5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+	     unique id of a partition if the partition table provides it.
+	     The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
+	     partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
+	     filled hex representation of the 32-bit "NT disk signature", and PP
+	     is a zero-filled hex representation of the 1-based partition number.
+	  6) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation
+	     to a partition with a known unique id.
+	  7) <major>:<minor> major and minor number of the device separated by
+	     a colon.
+
+	  NOTE that, both Kconfig and module parameters can configure
+	  pstore/blk, but module parameters have priority over Kconfig.
+
+config PSTORE_BLK_KMSG_SIZE
+	int "Size in Kbytes of kmsg dump log to store"
+	depends on PSTORE_BLK
+	default 64
+	help
+	  This just sets size of kmsg dump (oops, panic, etc) log for
+	  pstore/blk. The size is in KB and must be a multiple of 4.
+
+	  NOTE that, both Kconfig and module parameters can configure
+	  pstore/blk, but module parameters have priority over Kconfig.
+
+config PSTORE_BLK_MAX_REASON
+	int "Maximum kmsg dump reason to store"
+	depends on PSTORE_BLK
+	default 2
+	help
+	  The maximum reason for kmsg dumps to store. The default is
+	  2 (KMSG_DUMP_OOPS), see include/linux/kmsg_dump.h's
+	  enum kmsg_dump_reason for more details.
+
+	  NOTE that, both Kconfig and module parameters can configure
+	  pstore/blk, but module parameters have priority over Kconfig.
+
+config PSTORE_BLK_PMSG_SIZE
+	int "Size in Kbytes of pmsg to store"
+	depends on PSTORE_BLK
+	depends on PSTORE_PMSG
+	default 64
+	help
+	  This just sets size of pmsg (pmsg_size) for pstore/blk. The size is
+	  in KB and must be a multiple of 4.
+
+	  NOTE that, both Kconfig and module parameters can configure
+	  pstore/blk, but module parameters have priority over Kconfig.
+
+config PSTORE_BLK_CONSOLE_SIZE
+	int "Size in Kbytes of console log to store"
+	depends on PSTORE_BLK
+	depends on PSTORE_CONSOLE
+	default 64
+	help
+	  This just sets size of console log (console_size) to store via
+	  pstore/blk. The size is in KB and must be a multiple of 4.
+
+	  NOTE that, both Kconfig and module parameters can configure
+	  pstore/blk, but module parameters have priority over Kconfig.
+
+config PSTORE_BLK_FTRACE_SIZE
+	int "Size in Kbytes of ftrace log to store"
+	depends on PSTORE_BLK
+	depends on PSTORE_FTRACE
+	default 64
+	help
+	  This just sets size of ftrace log (ftrace_size) for pstore/blk. The
+	  size is in KB and must be a multiple of 4.
+
+	  NOTE that, both Kconfig and module parameters can configure
+	  pstore/blk, but module parameters have priority over Kconfig.
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
index 967b5891f325..c270467aeece 100644
--- a/fs/pstore/Makefile
+++ b/fs/pstore/Makefile
@@ -12,3 +12,9 @@ pstore-$(CONFIG_PSTORE_PMSG)	+= pmsg.o
 
 ramoops-objs += ram.o ram_core.o
 obj-$(CONFIG_PSTORE_RAM)	+= ramoops.o
+
+pstore_zone-objs += zone.o
+obj-$(CONFIG_PSTORE_ZONE)	+= pstore_zone.o
+
+pstore_blk-objs += blk.o
+obj-$(CONFIG_PSTORE_BLK)	+= pstore_blk.o
diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
new file mode 100644
index 000000000000..fcd5563dde06
--- /dev/null
+++ b/fs/pstore/blk.c
@@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implements pstore backend driver that write to block (or non-block) storage
+ * devices, using the pstore/zone API.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include "../../block/blk.h"
+#include <linux/blkdev.h>
+#include <linux/string.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/pstore_blk.h>
+#include <linux/mount.h>
+#include <linux/uio.h>
+
+static long kmsg_size = CONFIG_PSTORE_BLK_KMSG_SIZE;
+module_param(kmsg_size, long, 0400);
+MODULE_PARM_DESC(kmsg_size, "kmsg dump record size in kbytes");
+
+static int max_reason = CONFIG_PSTORE_BLK_MAX_REASON;
+module_param(max_reason, int, 0400);
+MODULE_PARM_DESC(max_reason,
+		 "maximum reason for kmsg dump (default 2: Oops and Panic)");
+
+#if IS_ENABLED(CONFIG_PSTORE_PMSG)
+static long pmsg_size = CONFIG_PSTORE_BLK_PMSG_SIZE;
+#else
+static long pmsg_size = -1;
+#endif
+module_param(pmsg_size, long, 0400);
+MODULE_PARM_DESC(pmsg_size, "pmsg size in kbytes");
+
+#if IS_ENABLED(CONFIG_PSTORE_CONSOLE)
+static long console_size = CONFIG_PSTORE_BLK_CONSOLE_SIZE;
+#else
+static long console_size = -1;
+#endif
+module_param(console_size, long, 0400);
+MODULE_PARM_DESC(console_size, "console size in kbytes");
+
+#if IS_ENABLED(CONFIG_PSTORE_FTRACE)
+static long ftrace_size = CONFIG_PSTORE_BLK_FTRACE_SIZE;
+#else
+static long ftrace_size = -1;
+#endif
+module_param(ftrace_size, long, 0400);
+MODULE_PARM_DESC(ftrace_size, "ftrace size in kbytes");
+
+static bool best_effort;
+module_param(best_effort, bool, 0400);
+MODULE_PARM_DESC(best_effort, "use best effort to write (i.e. do not require storage driver pstore support, default: off)");
+
+/*
+ * blkdev - the block device to use for pstore storage
+ *
+ * Usually, this will be a partition of a block device.
+ *
+ * blkdev accepts the following variants:
+ * 1) <hex_major><hex_minor> device number in hexadecimal representation,
+ *    with no leading 0x, for example b302.
+ * 2) /dev/<disk_name> represents the device number of disk
+ * 3) /dev/<disk_name><decimal> represents the device number
+ *    of partition - device number of disk plus the partition number
+ * 4) /dev/<disk_name>p<decimal> - same as the above, that form is
+ *    used when disk name of partitioned disk ends on a digit.
+ * 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ *    unique id of a partition if the partition table provides it.
+ *    The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
+ *    partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
+ *    filled hex representation of the 32-bit "NT disk signature", and PP
+ *    is a zero-filled hex representation of the 1-based partition number.
+ * 6) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
+ *    a partition with a known unique id.
+ * 7) <major>:<minor> major and minor number of the device separated by
+ *    a colon.
+ */
+static char blkdev[80] = CONFIG_PSTORE_BLK_BLKDEV;
+module_param_string(blkdev, blkdev, 80, 0400);
+MODULE_PARM_DESC(blkdev, "block device for pstore storage");
+
+/*
+ * All globals must only be accessed under the pstore_blk_lock
+ * during the register/unregister functions.
+ */
+static DEFINE_MUTEX(pstore_blk_lock);
+static struct block_device *psblk_bdev;
+static struct pstore_zone_info *pstore_zone_info;
+static pstore_blk_panic_write_op blkdev_panic_write;
+
+struct bdev_info {
+	dev_t devt;
+	sector_t nr_sects;
+	sector_t start_sect;
+};
+
+#define check_size(name, alignsize) ({				\
+	long _##name_ = (name);					\
+	_##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024);	\
+	if (_##name_ & ((alignsize) - 1)) {			\
+		pr_info(#name " must align to %d\n",		\
+				(alignsize));			\
+		_##name_ = ALIGN(name, (alignsize));		\
+	}							\
+	_##name_;						\
+})
+
+static int __register_pstore_device(struct pstore_device_info *dev)
+{
+	int ret;
+
+	lockdep_assert_held(&pstore_blk_lock);
+
+	if (!dev || !dev->total_size || !dev->read || !dev->write)
+		return -EINVAL;
+
+	/* someone already registered before */
+	if (pstore_zone_info)
+		return -EBUSY;
+
+	pstore_zone_info = kzalloc(sizeof(struct pstore_zone_info), GFP_KERNEL);
+	if (!pstore_zone_info)
+		return -ENOMEM;
+
+	/* zero means not limit on which backends to attempt to store. */
+	if (!dev->flags)
+		dev->flags = UINT_MAX;
+
+#define verify_size(name, alignsize, enabled) {				\
+		long _##name_;						\
+		if (enabled)						\
+			_##name_ = check_size(name, alignsize);		\
+		else							\
+			_##name_ = 0;					\
+		name = _##name_ / 1024;					\
+		pstore_zone_info->name = _##name_;			\
+	}
+
+	verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG);
+	verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG);
+	verify_size(console_size, 4096, dev->flags & PSTORE_FLAGS_CONSOLE);
+	verify_size(ftrace_size, 4096, dev->flags & PSTORE_FLAGS_FTRACE);
+#undef verify_size
+
+	pstore_zone_info->total_size = dev->total_size;
+	pstore_zone_info->max_reason = max_reason;
+	pstore_zone_info->read = dev->read;
+	pstore_zone_info->write = dev->write;
+	pstore_zone_info->erase = dev->erase;
+	pstore_zone_info->panic_write = dev->panic_write;
+	pstore_zone_info->name = KBUILD_MODNAME;
+	pstore_zone_info->owner = THIS_MODULE;
+
+	ret = register_pstore_zone(pstore_zone_info);
+	if (ret) {
+		kfree(pstore_zone_info);
+		pstore_zone_info = NULL;
+	}
+	return ret;
+}
+/**
+ * register_pstore_device() - register non-block device to pstore/blk
+ *
+ * @dev: non-block device information
+ *
+ * Return:
+ * * 0		- OK
+ * * Others	- something error.
+ */
+int register_pstore_device(struct pstore_device_info *dev)
+{
+	int ret;
+
+	mutex_lock(&pstore_blk_lock);
+	ret = __register_pstore_device(dev);
+	mutex_unlock(&pstore_blk_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(register_pstore_device);
+
+static void __unregister_pstore_device(struct pstore_device_info *dev)
+{
+	lockdep_assert_held(&pstore_blk_lock);
+	if (pstore_zone_info && pstore_zone_info->read == dev->read) {
+		unregister_pstore_zone(pstore_zone_info);
+		kfree(pstore_zone_info);
+		pstore_zone_info = NULL;
+	}
+}
+
+/**
+ * unregister_pstore_device() - unregister non-block device from pstore/blk
+ *
+ * @dev: non-block device information
+ */
+void unregister_pstore_device(struct pstore_device_info *dev)
+{
+	mutex_lock(&pstore_blk_lock);
+	__unregister_pstore_device(dev);
+	mutex_unlock(&pstore_blk_lock);
+}
+EXPORT_SYMBOL_GPL(unregister_pstore_device);
+
+/**
+ * psblk_get_bdev() - open block device
+ *
+ * @holder:	Exclusive holder identifier
+ * @info:	Information about bdev to fill in
+ *
+ * Return: pointer to block device on success and others on error.
+ *
+ * On success, the returned block_device has reference count of one.
+ */
+static struct block_device *psblk_get_bdev(void *holder,
+					   struct bdev_info *info)
+{
+	struct block_device *bdev = ERR_PTR(-ENODEV);
+	fmode_t mode = FMODE_READ | FMODE_WRITE;
+	sector_t nr_sects;
+
+	lockdep_assert_held(&pstore_blk_lock);
+
+	if (pstore_zone_info)
+		return ERR_PTR(-EBUSY);
+
+	if (!blkdev[0])
+		return ERR_PTR(-ENODEV);
+
+	if (holder)
+		mode |= FMODE_EXCL;
+	bdev = blkdev_get_by_path(blkdev, mode, holder);
+	if (IS_ERR(bdev)) {
+		dev_t devt;
+
+		devt = name_to_dev_t(blkdev);
+		if (devt == 0)
+			return ERR_PTR(-ENODEV);
+		bdev = blkdev_get_by_dev(devt, mode, holder);
+		if (IS_ERR(bdev))
+			return bdev;
+	}
+
+	nr_sects = part_nr_sects_read(bdev->bd_part);
+	if (!nr_sects) {
+		pr_err("not enough space for '%s'\n", blkdev);
+		blkdev_put(bdev, mode);
+		return ERR_PTR(-ENOSPC);
+	}
+
+	if (info) {
+		info->devt = bdev->bd_dev;
+		info->nr_sects = nr_sects;
+		info->start_sect = get_start_sect(bdev);
+	}
+
+	return bdev;
+}
+
+static void psblk_put_bdev(struct block_device *bdev, void *holder)
+{
+	fmode_t mode = FMODE_READ | FMODE_WRITE;
+
+	lockdep_assert_held(&pstore_blk_lock);
+
+	if (!bdev)
+		return;
+
+	if (holder)
+		mode |= FMODE_EXCL;
+	blkdev_put(bdev, mode);
+}
+
+static ssize_t psblk_generic_blk_read(char *buf, size_t bytes, loff_t pos)
+{
+	struct block_device *bdev = psblk_bdev;
+	struct file file;
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	struct kvec iov = {.iov_base = buf, .iov_len = bytes};
+
+	if (!bdev)
+		return -ENODEV;
+
+	memset(&file, 0, sizeof(struct file));
+	file.f_mapping = bdev->bd_inode->i_mapping;
+	file.f_flags = O_DSYNC | __O_SYNC | O_NOATIME;
+	file.f_inode = bdev->bd_inode;
+	file_ra_state_init(&file.f_ra, file.f_mapping);
+
+	init_sync_kiocb(&kiocb, &file);
+	kiocb.ki_pos = pos;
+	iov_iter_kvec(&iter, READ, &iov, 1, bytes);
+
+	return generic_file_read_iter(&kiocb, &iter);
+}
+
+static ssize_t psblk_generic_blk_write(const char *buf, size_t bytes,
+		loff_t pos)
+{
+	struct block_device *bdev = psblk_bdev;
+	struct iov_iter iter;
+	struct kiocb kiocb;
+	struct file file;
+	ssize_t ret;
+	struct kvec iov = {.iov_base = (void *)buf, .iov_len = bytes};
+
+	if (!bdev)
+		return -ENODEV;
+
+	/* Console/Ftrace backend may handle buffer until flush dirty zones */
+	if (in_interrupt() || irqs_disabled())
+		return -EBUSY;
+
+	memset(&file, 0, sizeof(struct file));
+	file.f_mapping = bdev->bd_inode->i_mapping;
+	file.f_flags = O_DSYNC | __O_SYNC | O_NOATIME;
+	file.f_inode = bdev->bd_inode;
+
+	init_sync_kiocb(&kiocb, &file);
+	kiocb.ki_pos = pos;
+	iov_iter_kvec(&iter, WRITE, &iov, 1, bytes);
+
+	inode_lock(bdev->bd_inode);
+	ret = generic_write_checks(&kiocb, &iter);
+	if (ret > 0)
+		ret = generic_perform_write(&file, &iter, pos);
+	inode_unlock(bdev->bd_inode);
+
+	if (likely(ret > 0)) {
+		const struct file_operations f_op = {.fsync = blkdev_fsync};
+
+		file.f_op = &f_op;
+		kiocb.ki_pos += ret;
+		ret = generic_write_sync(&kiocb, ret);
+	}
+	return ret;
+}
+
+static ssize_t psblk_blk_panic_write(const char *buf, size_t size,
+		loff_t off)
+{
+	int ret;
+
+	if (!blkdev_panic_write)
+		return -EOPNOTSUPP;
+
+	/* size and off must align to SECTOR_SIZE for block device */
+	ret = blkdev_panic_write(buf, off >> SECTOR_SHIFT,
+			size >> SECTOR_SHIFT);
+	/* try next zone */
+	if (ret == -ENOMSG)
+		return ret;
+	return ret ? -EIO : size;
+}
+
+static int __register_pstore_blk(struct pstore_blk_info *info)
+{
+	char bdev_name[BDEVNAME_SIZE];
+	struct block_device *bdev;
+	struct pstore_device_info dev;
+	struct bdev_info binfo;
+	void *holder = blkdev;
+	int ret = -ENODEV;
+
+	lockdep_assert_held(&pstore_blk_lock);
+
+	/* hold bdev exclusively */
+	memset(&binfo, 0, sizeof(binfo));
+	bdev = psblk_get_bdev(holder, &binfo);
+	if (IS_ERR(bdev)) {
+		pr_err("failed to open '%s'!\n", blkdev);
+		return PTR_ERR(bdev);
+	}
+
+	/* only allow driver matching the @blkdev */
+	if (!binfo.devt || (!best_effort &&
+			    MAJOR(binfo.devt) != info->major)) {
+		pr_debug("invalid major %u (expect %u)\n",
+				info->major, MAJOR(binfo.devt));
+		ret = -ENODEV;
+		goto err_put_bdev;
+	}
+
+	/* psblk_bdev must be assigned before register to pstore/blk */
+	psblk_bdev = bdev;
+	blkdev_panic_write = info->panic_write;
+
+	/* Copy back block device details. */
+	info->devt = binfo.devt;
+	info->nr_sects = binfo.nr_sects;
+	info->start_sect = binfo.start_sect;
+
+	memset(&dev, 0, sizeof(dev));
+	dev.total_size = info->nr_sects << SECTOR_SHIFT;
+	dev.flags = info->flags;
+	dev.read = psblk_generic_blk_read;
+	dev.write = psblk_generic_blk_write;
+	dev.erase = NULL;
+	dev.panic_write = info->panic_write ? psblk_blk_panic_write : NULL;
+
+	ret = __register_pstore_device(&dev);
+	if (ret)
+		goto err_put_bdev;
+
+	bdevname(bdev, bdev_name);
+	pr_info("attached %s%s\n", bdev_name,
+		info->panic_write ? "" : " (no dedicated panic_write!)");
+	return 0;
+
+err_put_bdev:
+	psblk_bdev = NULL;
+	blkdev_panic_write = NULL;
+	psblk_put_bdev(bdev, holder);
+	return ret;
+}
+
+/**
+ * register_pstore_blk() - register block device to pstore/blk
+ *
+ * @info: details on the desired block device interface
+ *
+ * Return:
+ * * 0		- OK
+ * * Others	- something error.
+ */
+int register_pstore_blk(struct pstore_blk_info *info)
+{
+	int ret;
+
+	mutex_lock(&pstore_blk_lock);
+	ret = __register_pstore_blk(info);
+	mutex_unlock(&pstore_blk_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(register_pstore_blk);
+
+static void __unregister_pstore_blk(unsigned int major)
+{
+	struct pstore_device_info dev = { .read = psblk_generic_blk_read };
+	void *holder = blkdev;
+
+	lockdep_assert_held(&pstore_blk_lock);
+	if (psblk_bdev && MAJOR(psblk_bdev->bd_dev) == major) {
+		__unregister_pstore_device(&dev);
+		psblk_put_bdev(psblk_bdev, holder);
+		blkdev_panic_write = NULL;
+		psblk_bdev = NULL;
+	}
+}
+
+/**
+ * unregister_pstore_blk() - unregister block device from pstore/blk
+ *
+ * @major: the major device number of device
+ */
+void unregister_pstore_blk(unsigned int major)
+{
+	mutex_lock(&pstore_blk_lock);
+	__unregister_pstore_blk(major);
+	mutex_unlock(&pstore_blk_lock);
+}
+EXPORT_SYMBOL_GPL(unregister_pstore_blk);
+
+/* get information of pstore/blk */
+int pstore_blk_get_config(struct pstore_blk_config *info)
+{
+	strncpy(info->device, blkdev, 80);
+	info->max_reason = max_reason;
+	info->kmsg_size = check_size(kmsg_size, 4096);
+	info->pmsg_size = check_size(pmsg_size, 4096);
+	info->ftrace_size = check_size(ftrace_size, 4096);
+	info->console_size = check_size(console_size, 4096);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pstore_blk_get_config);
+
+static int __init pstore_blk_init(void)
+{
+	struct pstore_blk_info info = { };
+	int ret = 0;
+
+	mutex_lock(&pstore_blk_lock);
+	if (!pstore_zone_info && best_effort && blkdev[0])
+		ret = __register_pstore_blk(&info);
+	mutex_unlock(&pstore_blk_lock);
+
+	return ret;
+}
+late_initcall(pstore_blk_init);
+
+static void __exit pstore_blk_exit(void)
+{
+	mutex_lock(&pstore_blk_lock);
+	if (psblk_bdev)
+		__unregister_pstore_blk(MAJOR(psblk_bdev->bd_dev));
+	else {
+		struct pstore_device_info dev = { };
+
+		if (pstore_zone_info)
+			dev.read = pstore_zone_info->read;
+		__unregister_pstore_device(&dev);
+	}
+	mutex_unlock(&pstore_blk_lock);
+}
+module_exit(pstore_blk_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("WeiXiong Liao <liaoweixiong@allwinnertech.com>");
+MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
+MODULE_DESCRIPTION("pstore backend for block devices");
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index bfbfc2698070..5c0450701293 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -16,6 +16,7 @@
 #include <linux/debugfs.h>
 #include <linux/err.h>
 #include <linux/cache.h>
+#include <linux/slab.h>
 #include <asm/barrier.h>
 #include "internal.h"
 
@@ -132,3 +133,56 @@ void pstore_unregister_ftrace(void)
 
 	debugfs_remove_recursive(pstore_ftrace_dir);
 }
+
+ssize_t pstore_ftrace_combine_log(char **dest_log, size_t *dest_log_size,
+				  const char *src_log, size_t src_log_size)
+{
+	size_t dest_size, src_size, total, dest_off, src_off;
+	size_t dest_idx = 0, src_idx = 0, merged_idx = 0;
+	void *merged_buf;
+	struct pstore_ftrace_record *drec, *srec, *mrec;
+	size_t record_size = sizeof(struct pstore_ftrace_record);
+
+	dest_off = *dest_log_size % record_size;
+	dest_size = *dest_log_size - dest_off;
+
+	src_off = src_log_size % record_size;
+	src_size = src_log_size - src_off;
+
+	total = dest_size + src_size;
+	merged_buf = kmalloc(total, GFP_KERNEL);
+	if (!merged_buf)
+		return -ENOMEM;
+
+	drec = (struct pstore_ftrace_record *)(*dest_log + dest_off);
+	srec = (struct pstore_ftrace_record *)(src_log + src_off);
+	mrec = (struct pstore_ftrace_record *)(merged_buf);
+
+	while (dest_size > 0 && src_size > 0) {
+		if (pstore_ftrace_read_timestamp(&drec[dest_idx]) <
+		    pstore_ftrace_read_timestamp(&srec[src_idx])) {
+			mrec[merged_idx++] = drec[dest_idx++];
+			dest_size -= record_size;
+		} else {
+			mrec[merged_idx++] = srec[src_idx++];
+			src_size -= record_size;
+		}
+	}
+
+	while (dest_size > 0) {
+		mrec[merged_idx++] = drec[dest_idx++];
+		dest_size -= record_size;
+	}
+
+	while (src_size > 0) {
+		mrec[merged_idx++] = srec[src_idx++];
+		src_size -= record_size;
+	}
+
+	kfree(*dest_log);
+	*dest_log = merged_buf;
+	*dest_log_size = total;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pstore_ftrace_combine_log);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index d99b5d39aa90..c331efe8de95 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -22,18 +22,21 @@
 #include <linux/magic.h>
 #include <linux/pstore.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
 #include <linux/uaccess.h>
 
 #include "internal.h"
 
 #define	PSTORE_NAMELEN	64
 
-static DEFINE_SPINLOCK(allpstore_lock);
-static LIST_HEAD(allpstore);
+static DEFINE_MUTEX(records_list_lock);
+static LIST_HEAD(records_list);
+
+static DEFINE_MUTEX(pstore_sb_lock);
+static struct super_block *pstore_sb;
 
 struct pstore_private {
 	struct list_head list;
+	struct dentry *dentry;
 	struct pstore_record *record;
 	size_t total_size;
 };
@@ -178,10 +181,22 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct pstore_private *p = d_inode(dentry)->i_private;
 	struct pstore_record *record = p->record;
+	int rc = 0;
 
 	if (!record->psi->erase)
 		return -EPERM;
 
+	/* Make sure we can't race while removing this file. */
+	mutex_lock(&records_list_lock);
+	if (!list_empty(&p->list))
+		list_del_init(&p->list);
+	else
+		rc = -ENOENT;
+	p->dentry = NULL;
+	mutex_unlock(&records_list_lock);
+	if (rc)
+		return rc;
+
 	mutex_lock(&record->psi->read_mutex);
 	record->psi->erase(record);
 	mutex_unlock(&record->psi->read_mutex);
@@ -192,15 +207,9 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 static void pstore_evict_inode(struct inode *inode)
 {
 	struct pstore_private	*p = inode->i_private;
-	unsigned long		flags;
 
 	clear_inode(inode);
-	if (p) {
-		spin_lock_irqsave(&allpstore_lock, flags);
-		list_del(&p->list);
-		spin_unlock_irqrestore(&allpstore_lock, flags);
-		free_pstore_private(p);
-	}
+	free_pstore_private(p);
 }
 
 static const struct inode_operations pstore_dir_inode_operations = {
@@ -278,11 +287,54 @@ static const struct super_operations pstore_ops = {
 	.show_options	= pstore_show_options,
 };
 
-static struct super_block *pstore_sb;
+static struct dentry *psinfo_lock_root(void)
+{
+	struct dentry *root;
 
-bool pstore_is_mounted(void)
+	mutex_lock(&pstore_sb_lock);
+	/*
+	 * Having no backend is fine -- no records appear.
+	 * Not being mounted is fine -- nothing to do.
+	 */
+	if (!psinfo || !pstore_sb) {
+		mutex_unlock(&pstore_sb_lock);
+		return NULL;
+	}
+
+	root = pstore_sb->s_root;
+	inode_lock(d_inode(root));
+	mutex_unlock(&pstore_sb_lock);
+
+	return root;
+}
+
+int pstore_put_backend_records(struct pstore_info *psi)
 {
-	return pstore_sb != NULL;
+	struct pstore_private *pos, *tmp;
+	struct dentry *root;
+	int rc = 0;
+
+	root = psinfo_lock_root();
+	if (!root)
+		return 0;
+
+	mutex_lock(&records_list_lock);
+	list_for_each_entry_safe(pos, tmp, &records_list, list) {
+		if (pos->record->psi == psi) {
+			list_del_init(&pos->list);
+			rc = simple_unlink(d_inode(root), pos->dentry);
+			if (WARN_ON(rc))
+				break;
+			d_drop(pos->dentry);
+			dput(pos->dentry);
+			pos->dentry = NULL;
+		}
+	}
+	mutex_unlock(&records_list_lock);
+
+	inode_unlock(d_inode(root));
+
+	return rc;
 }
 
 /*
@@ -297,23 +349,20 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
 	int			rc = 0;
 	char			name[PSTORE_NAMELEN];
 	struct pstore_private	*private, *pos;
-	unsigned long		flags;
 	size_t			size = record->size + record->ecc_notice_size;
 
-	WARN_ON(!inode_is_locked(d_inode(root)));
+	if (WARN_ON(!inode_is_locked(d_inode(root))))
+		return -EINVAL;
 
-	spin_lock_irqsave(&allpstore_lock, flags);
-	list_for_each_entry(pos, &allpstore, list) {
+	rc = -EEXIST;
+	/* Skip records that are already present in the filesystem. */
+	mutex_lock(&records_list_lock);
+	list_for_each_entry(pos, &records_list, list) {
 		if (pos->record->type == record->type &&
 		    pos->record->id == record->id &&
-		    pos->record->psi == record->psi) {
-			rc = -EEXIST;
-			break;
-		}
+		    pos->record->psi == record->psi)
+			goto fail;
 	}
-	spin_unlock_irqrestore(&allpstore_lock, flags);
-	if (rc)
-		return rc;
 
 	rc = -ENOMEM;
 	inode = pstore_get_inode(root->d_sb);
@@ -334,6 +383,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
 	if (!dentry)
 		goto fail_private;
 
+	private->dentry = dentry;
 	private->record = record;
 	inode->i_size = private->total_size = size;
 	inode->i_private = private;
@@ -343,9 +393,8 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
 
 	d_add(dentry, inode);
 
-	spin_lock_irqsave(&allpstore_lock, flags);
-	list_add(&private->list, &allpstore);
-	spin_unlock_irqrestore(&allpstore_lock, flags);
+	list_add(&private->list, &records_list);
+	mutex_unlock(&records_list_lock);
 
 	return 0;
 
@@ -353,8 +402,8 @@ fail_private:
 	free_pstore_private(private);
 fail_inode:
 	iput(inode);
-
 fail:
+	mutex_unlock(&records_list_lock);
 	return rc;
 }
 
@@ -366,16 +415,13 @@ fail:
  */
 void pstore_get_records(int quiet)
 {
-	struct pstore_info *psi = psinfo;
 	struct dentry *root;
 
-	if (!psi || !pstore_sb)
+	root = psinfo_lock_root();
+	if (!root)
 		return;
 
-	root = pstore_sb->s_root;
-
-	inode_lock(d_inode(root));
-	pstore_get_backend_records(psi, root, quiet);
+	pstore_get_backend_records(psinfo, root, quiet);
 	inode_unlock(d_inode(root));
 }
 
@@ -383,8 +429,6 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
 
-	pstore_sb = sb;
-
 	sb->s_maxbytes		= MAX_LFS_FILESIZE;
 	sb->s_blocksize		= PAGE_SIZE;
 	sb->s_blocksize_bits	= PAGE_SHIFT;
@@ -405,6 +449,10 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sb->s_root)
 		return -ENOMEM;
 
+	mutex_lock(&pstore_sb_lock);
+	pstore_sb = sb;
+	mutex_unlock(&pstore_sb_lock);
+
 	pstore_get_records(0);
 
 	return 0;
@@ -418,8 +466,17 @@ static struct dentry *pstore_mount(struct file_system_type *fs_type,
 
 static void pstore_kill_sb(struct super_block *sb)
 {
+	mutex_lock(&pstore_sb_lock);
+	WARN_ON(pstore_sb != sb);
+
 	kill_litter_super(sb);
 	pstore_sb = NULL;
+
+	mutex_lock(&records_list_lock);
+	INIT_LIST_HEAD(&records_list);
+	mutex_unlock(&records_list_lock);
+
+	mutex_unlock(&pstore_sb_lock);
 }
 
 static struct file_system_type pstore_fs_type = {
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 7062ea4bc57c..7fb219042f13 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -12,9 +12,18 @@ extern unsigned long kmsg_bytes;
 #ifdef CONFIG_PSTORE_FTRACE
 extern void pstore_register_ftrace(void);
 extern void pstore_unregister_ftrace(void);
+ssize_t pstore_ftrace_combine_log(char **dest_log, size_t *dest_log_size,
+				  const char *src_log, size_t src_log_size);
 #else
 static inline void pstore_register_ftrace(void) {}
 static inline void pstore_unregister_ftrace(void) {}
+static inline ssize_t
+pstore_ftrace_combine_log(char **dest_log, size_t *dest_log_size,
+			  const char *src_log, size_t src_log_size)
+{
+	*dest_log_size = 0;
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_PSTORE_PMSG
@@ -31,9 +40,9 @@ extern void	pstore_set_kmsg_bytes(int);
 extern void	pstore_get_records(int);
 extern void	pstore_get_backend_records(struct pstore_info *psi,
 					   struct dentry *root, int quiet);
+extern int	pstore_put_backend_records(struct pstore_info *psi);
 extern int	pstore_mkfile(struct dentry *root,
 			      struct pstore_record *record);
-extern bool	pstore_is_mounted(void);
 extern void	pstore_record_init(struct pstore_record *record,
 				   struct pstore_info *psi);
 
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 408277ee3cdb..a9e297eefdff 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -44,7 +44,7 @@ static int pstore_update_ms = -1;
 module_param_named(update_ms, pstore_update_ms, int, 0600);
 MODULE_PARM_DESC(update_ms, "milliseconds before pstore updates its content "
 		 "(default is -1, which means runtime updates are disabled; "
-		 "enabling this option is not safe, it may lead to further "
+		 "enabling this option may not be safe; it may lead to further "
 		 "corruption on Oopses)");
 
 /* Names should be in the same order as the enum pstore_type_id */
@@ -69,19 +69,25 @@ static void pstore_dowork(struct work_struct *);
 static DECLARE_WORK(pstore_work, pstore_dowork);
 
 /*
- * pstore_lock just protects "psinfo" during
- * calls to pstore_register()
+ * psinfo_lock protects "psinfo" during calls to
+ * pstore_register(), pstore_unregister(), and
+ * the filesystem mount/unmount routines.
  */
-static DEFINE_SPINLOCK(pstore_lock);
+static DEFINE_MUTEX(psinfo_lock);
 struct pstore_info *psinfo;
 
 static char *backend;
+module_param(backend, charp, 0444);
+MODULE_PARM_DESC(backend, "specific backend to use");
+
 static char *compress =
 #ifdef CONFIG_PSTORE_COMPRESS_DEFAULT
 		CONFIG_PSTORE_COMPRESS_DEFAULT;
 #else
 		NULL;
 #endif
+module_param(compress, charp, 0444);
+MODULE_PARM_DESC(compress, "compression to use");
 
 /* Compression parameters */
 static struct crypto_comp *tfm;
@@ -129,24 +135,12 @@ enum pstore_type_id pstore_name_to_type(const char *name)
 }
 EXPORT_SYMBOL_GPL(pstore_name_to_type);
 
-static const char *get_reason_str(enum kmsg_dump_reason reason)
+static void pstore_timer_kick(void)
 {
-	switch (reason) {
-	case KMSG_DUMP_PANIC:
-		return "Panic";
-	case KMSG_DUMP_OOPS:
-		return "Oops";
-	case KMSG_DUMP_EMERG:
-		return "Emergency";
-	case KMSG_DUMP_RESTART:
-		return "Restart";
-	case KMSG_DUMP_HALT:
-		return "Halt";
-	case KMSG_DUMP_POWEROFF:
-		return "Poweroff";
-	default:
-		return "Unknown";
-	}
+	if (pstore_update_ms < 0)
+		return;
+
+	mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms));
 }
 
 /*
@@ -393,7 +387,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 	unsigned int	part = 1;
 	int		ret;
 
-	why = get_reason_str(reason);
+	why = kmsg_dump_reason_str(reason);
 
 	if (down_trylock(&psinfo->buf_lock)) {
 		/* Failed to acquire lock: give up if we cannot wait. */
@@ -459,8 +453,10 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		}
 
 		ret = psinfo->write(&record);
-		if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
+		if (ret == 0 && reason == KMSG_DUMP_OOPS) {
 			pstore_new_entry = 1;
+			pstore_timer_kick();
+		}
 
 		total += record.size;
 		part++;
@@ -503,14 +499,20 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
 }
 
 static struct console pstore_console = {
-	.name	= "pstore",
 	.write	= pstore_console_write,
-	.flags	= CON_PRINTBUFFER | CON_ENABLED | CON_ANYTIME,
 	.index	= -1,
 };
 
 static void pstore_register_console(void)
 {
+	/* Show which backend is going to get console writes. */
+	strscpy(pstore_console.name, psinfo->name,
+		sizeof(pstore_console.name));
+	/*
+	 * Always initialize flags here since prior unregister_console()
+	 * calls may have changed settings (specifically CON_ENABLED).
+	 */
+	pstore_console.flags = CON_PRINTBUFFER | CON_ENABLED | CON_ANYTIME;
 	register_console(&pstore_console);
 }
 
@@ -555,8 +557,6 @@ out:
  */
 int pstore_register(struct pstore_info *psi)
 {
-	struct module *owner = psi->owner;
-
 	if (backend && strcmp(backend, psi->name)) {
 		pr_warn("ignoring unexpected backend '%s'\n", psi->name);
 		return -EPERM;
@@ -576,11 +576,11 @@ int pstore_register(struct pstore_info *psi)
 		return -EINVAL;
 	}
 
-	spin_lock(&pstore_lock);
+	mutex_lock(&psinfo_lock);
 	if (psinfo) {
 		pr_warn("backend '%s' already loaded: ignoring '%s'\n",
 			psinfo->name, psi->name);
-		spin_unlock(&pstore_lock);
+		mutex_unlock(&psinfo_lock);
 		return -EBUSY;
 	}
 
@@ -589,21 +589,16 @@ int pstore_register(struct pstore_info *psi)
 	psinfo = psi;
 	mutex_init(&psinfo->read_mutex);
 	sema_init(&psinfo->buf_lock, 1);
-	spin_unlock(&pstore_lock);
-
-	if (owner && !try_module_get(owner)) {
-		psinfo = NULL;
-		return -EINVAL;
-	}
 
 	if (psi->flags & PSTORE_FLAGS_DMESG)
 		allocate_buf_for_compression();
 
-	if (pstore_is_mounted())
-		pstore_get_records(0);
+	pstore_get_records(0);
 
-	if (psi->flags & PSTORE_FLAGS_DMESG)
+	if (psi->flags & PSTORE_FLAGS_DMESG) {
+		pstore_dumper.max_reason = psinfo->max_reason;
 		pstore_register_kmsg();
+	}
 	if (psi->flags & PSTORE_FLAGS_CONSOLE)
 		pstore_register_console();
 	if (psi->flags & PSTORE_FLAGS_FTRACE)
@@ -612,33 +607,36 @@ int pstore_register(struct pstore_info *psi)
 		pstore_register_pmsg();
 
 	/* Start watching for new records, if desired. */
-	if (pstore_update_ms >= 0) {
-		pstore_timer.expires = jiffies +
-			msecs_to_jiffies(pstore_update_ms);
-		add_timer(&pstore_timer);
-	}
+	pstore_timer_kick();
 
 	/*
 	 * Update the module parameter backend, so it is visible
 	 * through /sys/module/pstore/parameters/backend
 	 */
-	backend = psi->name;
+	backend = kstrdup(psi->name, GFP_KERNEL);
 
 	pr_info("Registered %s as persistent store backend\n", psi->name);
 
-	module_put(owner);
-
+	mutex_unlock(&psinfo_lock);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pstore_register);
 
 void pstore_unregister(struct pstore_info *psi)
 {
-	/* Stop timer and make sure all work has finished. */
-	pstore_update_ms = -1;
-	del_timer_sync(&pstore_timer);
-	flush_work(&pstore_work);
+	/* It's okay to unregister nothing. */
+	if (!psi)
+		return;
+
+	mutex_lock(&psinfo_lock);
+
+	/* Only one backend can be registered at a time. */
+	if (WARN_ON(psi != psinfo)) {
+		mutex_unlock(&psinfo_lock);
+		return;
+	}
 
+	/* Unregister all callbacks. */
 	if (psi->flags & PSTORE_FLAGS_PMSG)
 		pstore_unregister_pmsg();
 	if (psi->flags & PSTORE_FLAGS_FTRACE)
@@ -648,10 +646,19 @@ void pstore_unregister(struct pstore_info *psi)
 	if (psi->flags & PSTORE_FLAGS_DMESG)
 		pstore_unregister_kmsg();
 
+	/* Stop timer and make sure all work has finished. */
+	del_timer_sync(&pstore_timer);
+	flush_work(&pstore_work);
+
+	/* Remove all backend records from filesystem tree. */
+	pstore_put_backend_records(psi);
+
 	free_buf_for_compression();
 
 	psinfo = NULL;
+	kfree(backend);
 	backend = NULL;
+	mutex_unlock(&psinfo_lock);
 }
 EXPORT_SYMBOL_GPL(pstore_unregister);
 
@@ -788,9 +795,7 @@ static void pstore_timefunc(struct timer_list *unused)
 		schedule_work(&pstore_work);
 	}
 
-	if (pstore_update_ms >= 0)
-		mod_timer(&pstore_timer,
-			  jiffies + msecs_to_jiffies(pstore_update_ms));
+	pstore_timer_kick();
 }
 
 static void __init pstore_choose_compression(void)
@@ -835,11 +840,5 @@ static void __exit pstore_exit(void)
 }
 module_exit(pstore_exit)
 
-module_param(compress, charp, 0444);
-MODULE_PARM_DESC(compress, "Pstore compression to use");
-
-module_param(backend, charp, 0444);
-MODULE_PARM_DESC(backend, "Pstore backend to use");
-
 MODULE_AUTHOR("Tony Luck <tony.luck@intel.com>");
 MODULE_LICENSE("GPL");
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 795622190c01..ca6d8a867285 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -21,6 +21,7 @@
 #include <linux/pstore_ram.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include "internal.h"
 
 #define RAMOOPS_KERNMSG_HDR "===="
 #define MIN_MEM_SIZE 4096UL
@@ -53,22 +54,27 @@ MODULE_PARM_DESC(mem_size,
 		"size of reserved RAM used to store oops/panic logs");
 
 static unsigned int mem_type;
-module_param(mem_type, uint, 0600);
+module_param(mem_type, uint, 0400);
 MODULE_PARM_DESC(mem_type,
 		"set to 1 to try to use unbuffered memory (default 0)");
 
-static int dump_oops = 1;
-module_param(dump_oops, int, 0600);
-MODULE_PARM_DESC(dump_oops,
-		"set to 1 to dump oopses, 0 to only dump panics (default 1)");
+static int ramoops_max_reason = -1;
+module_param_named(max_reason, ramoops_max_reason, int, 0400);
+MODULE_PARM_DESC(max_reason,
+		 "maximum reason for kmsg dump (default 2: Oops and Panic) ");
 
 static int ramoops_ecc;
-module_param_named(ecc, ramoops_ecc, int, 0600);
+module_param_named(ecc, ramoops_ecc, int, 0400);
 MODULE_PARM_DESC(ramoops_ecc,
 		"if non-zero, the option enables ECC support and specifies "
 		"ECC buffer size in bytes (1 is a special value, means 16 "
 		"bytes ECC)");
 
+static int ramoops_dump_oops = -1;
+module_param_named(dump_oops, ramoops_dump_oops, int, 0400);
+MODULE_PARM_DESC(dump_oops,
+		 "(deprecated: use max_reason instead) set to 1 to dump oopses & panics, 0 to only dump panics");
+
 struct ramoops_context {
 	struct persistent_ram_zone **dprzs;	/* Oops dump zones */
 	struct persistent_ram_zone *cprz;	/* Console zone */
@@ -81,7 +87,6 @@ struct ramoops_context {
 	size_t console_size;
 	size_t ftrace_size;
 	size_t pmsg_size;
-	int dump_oops;
 	u32 flags;
 	struct persistent_ram_ecc_info ecc_info;
 	unsigned int max_dump_cnt;
@@ -168,58 +173,6 @@ static bool prz_ok(struct persistent_ram_zone *prz)
 			   persistent_ram_ecc_string(prz, NULL, 0));
 }
 
-static ssize_t ftrace_log_combine(struct persistent_ram_zone *dest,
-				  struct persistent_ram_zone *src)
-{
-	size_t dest_size, src_size, total, dest_off, src_off;
-	size_t dest_idx = 0, src_idx = 0, merged_idx = 0;
-	void *merged_buf;
-	struct pstore_ftrace_record *drec, *srec, *mrec;
-	size_t record_size = sizeof(struct pstore_ftrace_record);
-
-	dest_off = dest->old_log_size % record_size;
-	dest_size = dest->old_log_size - dest_off;
-
-	src_off = src->old_log_size % record_size;
-	src_size = src->old_log_size - src_off;
-
-	total = dest_size + src_size;
-	merged_buf = kmalloc(total, GFP_KERNEL);
-	if (!merged_buf)
-		return -ENOMEM;
-
-	drec = (struct pstore_ftrace_record *)(dest->old_log + dest_off);
-	srec = (struct pstore_ftrace_record *)(src->old_log + src_off);
-	mrec = (struct pstore_ftrace_record *)(merged_buf);
-
-	while (dest_size > 0 && src_size > 0) {
-		if (pstore_ftrace_read_timestamp(&drec[dest_idx]) <
-		    pstore_ftrace_read_timestamp(&srec[src_idx])) {
-			mrec[merged_idx++] = drec[dest_idx++];
-			dest_size -= record_size;
-		} else {
-			mrec[merged_idx++] = srec[src_idx++];
-			src_size -= record_size;
-		}
-	}
-
-	while (dest_size > 0) {
-		mrec[merged_idx++] = drec[dest_idx++];
-		dest_size -= record_size;
-	}
-
-	while (src_size > 0) {
-		mrec[merged_idx++] = srec[src_idx++];
-		src_size -= record_size;
-	}
-
-	kfree(dest->old_log);
-	dest->old_log = merged_buf;
-	dest->old_log_size = total;
-
-	return 0;
-}
-
 static ssize_t ramoops_pstore_read(struct pstore_record *record)
 {
 	ssize_t size = 0;
@@ -291,7 +244,12 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
 				tmp_prz->corrected_bytes +=
 						prz_next->corrected_bytes;
 				tmp_prz->bad_blocks += prz_next->bad_blocks;
-				size = ftrace_log_combine(tmp_prz, prz_next);
+
+				size = pstore_ftrace_combine_log(
+						&tmp_prz->old_log,
+						&tmp_prz->old_log_size,
+						prz_next->old_log,
+						prz_next->old_log_size);
 				if (size)
 					goto out;
 			}
@@ -382,16 +340,14 @@ static int notrace ramoops_pstore_write(struct pstore_record *record)
 		return -EINVAL;
 
 	/*
-	 * Out of the various dmesg dump types, ramoops is currently designed
-	 * to only store crash logs, rather than storing general kernel logs.
+	 * We could filter on record->reason here if we wanted to (which
+	 * would duplicate what happened before the "max_reason" setting
+	 * was added), but that would defeat the purpose of a system
+	 * changing printk.always_kmsg_dump, so instead log everything that
+	 * the kmsg dumper sends us, since it should be doing the filtering
+	 * based on the combination of printk.always_kmsg_dump and our
+	 * requested "max_reason".
 	 */
-	if (record->reason != KMSG_DUMP_OOPS &&
-	    record->reason != KMSG_DUMP_PANIC)
-		return -EINVAL;
-
-	/* Skip Oopes when configured to do so. */
-	if (record->reason == KMSG_DUMP_OOPS && !cxt->dump_oops)
-		return -EINVAL;
 
 	/*
 	 * Explicitly only take the first part of any new crash.
@@ -644,19 +600,25 @@ static int ramoops_init_prz(const char *name,
 	return 0;
 }
 
-static int ramoops_parse_dt_size(struct platform_device *pdev,
-				 const char *propname, u32 *value)
+/* Read a u32 from a dt property and make sure it's safe for an int. */
+static int ramoops_parse_dt_u32(struct platform_device *pdev,
+				const char *propname,
+				u32 default_value, u32 *value)
 {
 	u32 val32 = 0;
 	int ret;
 
 	ret = of_property_read_u32(pdev->dev.of_node, propname, &val32);
-	if (ret < 0 && ret != -EINVAL) {
+	if (ret == -EINVAL) {
+		/* field is missing, use default value. */
+		val32 = default_value;
+	} else if (ret < 0) {
 		dev_err(&pdev->dev, "failed to parse property %s: %d\n",
 			propname, ret);
 		return ret;
 	}
 
+	/* Sanity check our results. */
 	if (val32 > INT_MAX) {
 		dev_err(&pdev->dev, "%s %u > INT_MAX\n", propname, val32);
 		return -EOVERFLOW;
@@ -687,23 +649,32 @@ static int ramoops_parse_dt(struct platform_device *pdev,
 	pdata->mem_size = resource_size(res);
 	pdata->mem_address = res->start;
 	pdata->mem_type = of_property_read_bool(of_node, "unbuffered");
-	pdata->dump_oops = !of_property_read_bool(of_node, "no-dump-oops");
-
-#define parse_size(name, field) {					\
-		ret = ramoops_parse_dt_size(pdev, name, &value);	\
+	/*
+	 * Setting "no-dump-oops" is deprecated and will be ignored if
+	 * "max_reason" is also specified.
+	 */
+	if (of_property_read_bool(of_node, "no-dump-oops"))
+		pdata->max_reason = KMSG_DUMP_PANIC;
+	else
+		pdata->max_reason = KMSG_DUMP_OOPS;
+
+#define parse_u32(name, field, default_value) {				\
+		ret = ramoops_parse_dt_u32(pdev, name, default_value,	\
+					    &value);			\
 		if (ret < 0)						\
 			return ret;					\
 		field = value;						\
 	}
 
-	parse_size("record-size", pdata->record_size);
-	parse_size("console-size", pdata->console_size);
-	parse_size("ftrace-size", pdata->ftrace_size);
-	parse_size("pmsg-size", pdata->pmsg_size);
-	parse_size("ecc-size", pdata->ecc_info.ecc_size);
-	parse_size("flags", pdata->flags);
+	parse_u32("record-size", pdata->record_size, 0);
+	parse_u32("console-size", pdata->console_size, 0);
+	parse_u32("ftrace-size", pdata->ftrace_size, 0);
+	parse_u32("pmsg-size", pdata->pmsg_size, 0);
+	parse_u32("ecc-size", pdata->ecc_info.ecc_size, 0);
+	parse_u32("flags", pdata->flags, 0);
+	parse_u32("max-reason", pdata->max_reason, pdata->max_reason);
 
-#undef parse_size
+#undef parse_u32
 
 	/*
 	 * Some old Chromebooks relied on the kernel setting the
@@ -785,7 +756,6 @@ static int ramoops_probe(struct platform_device *pdev)
 	cxt->console_size = pdata->console_size;
 	cxt->ftrace_size = pdata->ftrace_size;
 	cxt->pmsg_size = pdata->pmsg_size;
-	cxt->dump_oops = pdata->dump_oops;
 	cxt->flags = pdata->flags;
 	cxt->ecc_info = pdata->ecc_info;
 
@@ -828,8 +798,10 @@ static int ramoops_probe(struct platform_device *pdev)
 	 * the single region size is how to check.
 	 */
 	cxt->pstore.flags = 0;
-	if (cxt->max_dump_cnt)
+	if (cxt->max_dump_cnt) {
 		cxt->pstore.flags |= PSTORE_FLAGS_DMESG;
+		cxt->pstore.max_reason = pdata->max_reason;
+	}
 	if (cxt->console_size)
 		cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE;
 	if (cxt->max_ftrace_cnt)
@@ -865,7 +837,7 @@ static int ramoops_probe(struct platform_device *pdev)
 	mem_size = pdata->mem_size;
 	mem_address = pdata->mem_address;
 	record_size = pdata->record_size;
-	dump_oops = pdata->dump_oops;
+	ramoops_max_reason = pdata->max_reason;
 	ramoops_console_size = pdata->console_size;
 	ramoops_pmsg_size = pdata->pmsg_size;
 	ramoops_ftrace_size = pdata->ftrace_size;
@@ -948,7 +920,16 @@ static void __init ramoops_register_dummy(void)
 	pdata.console_size = ramoops_console_size;
 	pdata.ftrace_size = ramoops_ftrace_size;
 	pdata.pmsg_size = ramoops_pmsg_size;
-	pdata.dump_oops = dump_oops;
+	/* If "max_reason" is set, its value has priority over "dump_oops". */
+	if (ramoops_max_reason >= 0)
+		pdata.max_reason = ramoops_max_reason;
+	/* Otherwise, if "dump_oops" is set, parse it into "max_reason". */
+	else if (ramoops_dump_oops != -1)
+		pdata.max_reason = ramoops_dump_oops ? KMSG_DUMP_OOPS
+						     : KMSG_DUMP_PANIC;
+	/* And if neither are explicitly set, use the default. */
+	else
+		pdata.max_reason = KMSG_DUMP_OOPS;
 	pdata.flags = RAMOOPS_FLAG_FTRACE_PER_CPU;
 
 	/*
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index c917c191e78c..aa8e0b65ff1a 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -283,7 +283,7 @@ static int notrace persistent_ram_update_user(struct persistent_ram_zone *prz,
 	const void __user *s, unsigned int start, unsigned int count)
 {
 	struct persistent_ram_buffer *buffer = prz->buffer;
-	int ret = unlikely(__copy_from_user(buffer->data + start, s, count)) ?
+	int ret = unlikely(copy_from_user(buffer->data + start, s, count)) ?
 		-EFAULT : 0;
 	persistent_ram_update_ecc(prz, start, count);
 	return ret;
@@ -348,8 +348,6 @@ int notrace persistent_ram_write_user(struct persistent_ram_zone *prz,
 	int rem, ret = 0, c = count;
 	size_t start;
 
-	if (unlikely(!access_ok(s, count)))
-		return -EFAULT;
 	if (unlikely(c > prz->buffer_size)) {
 		s += c - prz->buffer_size;
 		c = prz->buffer_size;
diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
new file mode 100644
index 000000000000..819428dfa32f
--- /dev/null
+++ b/fs/pstore/zone.c
@@ -0,0 +1,1465 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide a pstore intermediate backend, organized into kernel memory
+ * allocated zones that are then mapped and flushed into a single
+ * contiguous region on a storage backend of some kind (block, mtd, etc).
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mount.h>
+#include <linux/printk.h>
+#include <linux/fs.h>
+#include <linux/pstore_zone.h>
+#include <linux/kdev_t.h>
+#include <linux/device.h>
+#include <linux/namei.h>
+#include <linux/fcntl.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+#include "internal.h"
+
+/**
+ * struct psz_head - header of zone to flush to storage
+ *
+ * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value)
+ * @datalen: length of data in @data
+ * @start: offset into @data where the beginning of the stored bytes begin
+ * @data: zone data.
+ */
+struct psz_buffer {
+#define PSZ_SIG (0x43474244) /* DBGC */
+	uint32_t sig;
+	atomic_t datalen;
+	atomic_t start;
+	uint8_t data[];
+};
+
+/**
+ * struct psz_kmsg_header - kmsg dump-specific header to flush to storage
+ *
+ * @magic: magic num for kmsg dump header
+ * @time: kmsg dump trigger time
+ * @compressed: whether conpressed
+ * @counter: kmsg dump counter
+ * @reason: the kmsg dump reason (e.g. oops, panic, etc)
+ * @data: pointer to log data
+ *
+ * This is a sub-header for a kmsg dump, trailing after &psz_buffer.
+ */
+struct psz_kmsg_header {
+#define PSTORE_KMSG_HEADER_MAGIC 0x4dfc3ae5 /* Just a random number */
+	uint32_t magic;
+	struct timespec64 time;
+	bool compressed;
+	uint32_t counter;
+	enum kmsg_dump_reason reason;
+	uint8_t data[];
+};
+
+/**
+ * struct pstore_zone - single stored buffer
+ *
+ * @off: zone offset of storage
+ * @type: front-end type for this zone
+ * @name: front-end name for this zone
+ * @buffer: pointer to data buffer managed by this zone
+ * @oldbuf: pointer to old data buffer
+ * @buffer_size: bytes in @buffer->data
+ * @should_recover: whether this zone should recover from storage
+ * @dirty: whether the data in @buffer dirty
+ *
+ * zone structure in memory.
+ */
+struct pstore_zone {
+	loff_t off;
+	const char *name;
+	enum pstore_type_id type;
+
+	struct psz_buffer *buffer;
+	struct psz_buffer *oldbuf;
+	size_t buffer_size;
+	bool should_recover;
+	atomic_t dirty;
+};
+
+/**
+ * struct psz_context - all about running state of pstore/zone
+ *
+ * @kpszs: kmsg dump storage zones
+ * @ppsz: pmsg storage zone
+ * @cpsz: console storage zone
+ * @fpszs: ftrace storage zones
+ * @kmsg_max_cnt: max count of @kpszs
+ * @kmsg_read_cnt: counter of total read kmsg dumps
+ * @kmsg_write_cnt: counter of total kmsg dump writes
+ * @pmsg_read_cnt: counter of total read pmsg zone
+ * @console_read_cnt: counter of total read console zone
+ * @ftrace_max_cnt: max count of @fpszs
+ * @ftrace_read_cnt: counter of max read ftrace zone
+ * @oops_counter: counter of oops dumps
+ * @panic_counter: counter of panic dumps
+ * @recovered: whether finished recovering data from storage
+ * @on_panic: whether panic is happening
+ * @pstore_zone_info_lock: lock to @pstore_zone_info
+ * @pstore_zone_info: information from backend
+ * @pstore: structure for pstore
+ */
+struct psz_context {
+	struct pstore_zone **kpszs;
+	struct pstore_zone *ppsz;
+	struct pstore_zone *cpsz;
+	struct pstore_zone **fpszs;
+	unsigned int kmsg_max_cnt;
+	unsigned int kmsg_read_cnt;
+	unsigned int kmsg_write_cnt;
+	unsigned int pmsg_read_cnt;
+	unsigned int console_read_cnt;
+	unsigned int ftrace_max_cnt;
+	unsigned int ftrace_read_cnt;
+	/*
+	 * These counters should be calculated during recovery.
+	 * It records the oops/panic times after crashes rather than boots.
+	 */
+	unsigned int oops_counter;
+	unsigned int panic_counter;
+	atomic_t recovered;
+	atomic_t on_panic;
+
+	/*
+	 * pstore_zone_info_lock protects this entire structure during calls
+	 * to register_pstore_zone()/unregister_pstore_zone().
+	 */
+	struct mutex pstore_zone_info_lock;
+	struct pstore_zone_info *pstore_zone_info;
+	struct pstore_info pstore;
+};
+static struct psz_context pstore_zone_cxt;
+
+static void psz_flush_all_dirty_zones(struct work_struct *);
+static DECLARE_DELAYED_WORK(psz_cleaner, psz_flush_all_dirty_zones);
+
+/**
+ * enum psz_flush_mode - flush mode for psz_zone_write()
+ *
+ * @FLUSH_NONE: do not flush to storage but update data on memory
+ * @FLUSH_PART: just flush part of data including meta data to storage
+ * @FLUSH_META: just flush meta data of zone to storage
+ * @FLUSH_ALL: flush all of zone
+ */
+enum psz_flush_mode {
+	FLUSH_NONE = 0,
+	FLUSH_PART,
+	FLUSH_META,
+	FLUSH_ALL,
+};
+
+static inline int buffer_datalen(struct pstore_zone *zone)
+{
+	return atomic_read(&zone->buffer->datalen);
+}
+
+static inline int buffer_start(struct pstore_zone *zone)
+{
+	return atomic_read(&zone->buffer->start);
+}
+
+static inline bool is_on_panic(void)
+{
+	return atomic_read(&pstore_zone_cxt.on_panic);
+}
+
+static ssize_t psz_zone_read_buffer(struct pstore_zone *zone, char *buf,
+		size_t len, unsigned long off)
+{
+	if (!buf || !zone || !zone->buffer)
+		return -EINVAL;
+	if (off > zone->buffer_size)
+		return -EINVAL;
+	len = min_t(size_t, len, zone->buffer_size - off);
+	memcpy(buf, zone->buffer->data + off, len);
+	return len;
+}
+
+static int psz_zone_read_oldbuf(struct pstore_zone *zone, char *buf,
+		size_t len, unsigned long off)
+{
+	if (!buf || !zone || !zone->oldbuf)
+		return -EINVAL;
+	if (off > zone->buffer_size)
+		return -EINVAL;
+	len = min_t(size_t, len, zone->buffer_size - off);
+	memcpy(buf, zone->oldbuf->data + off, len);
+	return 0;
+}
+
+static int psz_zone_write(struct pstore_zone *zone,
+		enum psz_flush_mode flush_mode, const char *buf,
+		size_t len, unsigned long off)
+{
+	struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info;
+	ssize_t wcnt = 0;
+	ssize_t (*writeop)(const char *buf, size_t bytes, loff_t pos);
+	size_t wlen;
+
+	if (off > zone->buffer_size)
+		return -EINVAL;
+
+	wlen = min_t(size_t, len, zone->buffer_size - off);
+	if (buf && wlen) {
+		memcpy(zone->buffer->data + off, buf, wlen);
+		atomic_set(&zone->buffer->datalen, wlen + off);
+	}
+
+	/* avoid to damage old records */
+	if (!is_on_panic() && !atomic_read(&pstore_zone_cxt.recovered))
+		goto dirty;
+
+	writeop = is_on_panic() ? info->panic_write : info->write;
+	if (!writeop)
+		goto dirty;
+
+	switch (flush_mode) {
+	case FLUSH_NONE:
+		if (unlikely(buf && wlen))
+			goto dirty;
+		return 0;
+	case FLUSH_PART:
+		wcnt = writeop((const char *)zone->buffer->data + off, wlen,
+				zone->off + sizeof(*zone->buffer) + off);
+		if (wcnt != wlen)
+			goto dirty;
+		fallthrough;
+	case FLUSH_META:
+		wlen = sizeof(struct psz_buffer);
+		wcnt = writeop((const char *)zone->buffer, wlen, zone->off);
+		if (wcnt != wlen)
+			goto dirty;
+		break;
+	case FLUSH_ALL:
+		wlen = zone->buffer_size + sizeof(*zone->buffer);
+		wcnt = writeop((const char *)zone->buffer, wlen, zone->off);
+		if (wcnt != wlen)
+			goto dirty;
+		break;
+	}
+
+	return 0;
+dirty:
+	/* no need to mark dirty if going to try next zone */
+	if (wcnt == -ENOMSG)
+		return -ENOMSG;
+	atomic_set(&zone->dirty, true);
+	/* flush dirty zones nicely */
+	if (wcnt == -EBUSY && !is_on_panic())
+		schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(500));
+	return -EBUSY;
+}
+
+static int psz_flush_dirty_zone(struct pstore_zone *zone)
+{
+	int ret;
+
+	if (unlikely(!zone))
+		return -EINVAL;
+
+	if (unlikely(!atomic_read(&pstore_zone_cxt.recovered)))
+		return -EBUSY;
+
+	if (!atomic_xchg(&zone->dirty, false))
+		return 0;
+
+	ret = psz_zone_write(zone, FLUSH_ALL, NULL, 0, 0);
+	if (ret)
+		atomic_set(&zone->dirty, true);
+	return ret;
+}
+
+static int psz_flush_dirty_zones(struct pstore_zone **zones, unsigned int cnt)
+{
+	int i, ret;
+	struct pstore_zone *zone;
+
+	if (!zones)
+		return -EINVAL;
+
+	for (i = 0; i < cnt; i++) {
+		zone = zones[i];
+		if (!zone)
+			return -EINVAL;
+		ret = psz_flush_dirty_zone(zone);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int psz_move_zone(struct pstore_zone *old, struct pstore_zone *new)
+{
+	const char *data = (const char *)old->buffer->data;
+	int ret;
+
+	ret = psz_zone_write(new, FLUSH_ALL, data, buffer_datalen(old), 0);
+	if (ret) {
+		atomic_set(&new->buffer->datalen, 0);
+		atomic_set(&new->dirty, false);
+		return ret;
+	}
+	atomic_set(&old->buffer->datalen, 0);
+	return 0;
+}
+
+static void psz_flush_all_dirty_zones(struct work_struct *work)
+{
+	struct psz_context *cxt = &pstore_zone_cxt;
+	int ret = 0;
+
+	if (cxt->ppsz)
+		ret |= psz_flush_dirty_zone(cxt->ppsz);
+	if (cxt->cpsz)
+		ret |= psz_flush_dirty_zone(cxt->cpsz);
+	if (cxt->kpszs)
+		ret |= psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt);
+	if (cxt->fpszs)
+		ret |= psz_flush_dirty_zones(cxt->fpszs, cxt->ftrace_max_cnt);
+	if (ret && cxt->pstore_zone_info)
+		schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(1000));
+}
+
+static int psz_kmsg_recover_data(struct psz_context *cxt)
+{
+	struct pstore_zone_info *info = cxt->pstore_zone_info;
+	struct pstore_zone *zone = NULL;
+	struct psz_buffer *buf;
+	unsigned long i;
+	ssize_t rcnt;
+
+	if (!info->read)
+		return -EINVAL;
+
+	for (i = 0; i < cxt->kmsg_max_cnt; i++) {
+		zone = cxt->kpszs[i];
+		if (unlikely(!zone))
+			return -EINVAL;
+		if (atomic_read(&zone->dirty)) {
+			unsigned int wcnt = cxt->kmsg_write_cnt;
+			struct pstore_zone *new = cxt->kpszs[wcnt];
+			int ret;
+
+			ret = psz_move_zone(zone, new);
+			if (ret) {
+				pr_err("move zone from %lu to %d failed\n",
+						i, wcnt);
+				return ret;
+			}
+			cxt->kmsg_write_cnt = (wcnt + 1) % cxt->kmsg_max_cnt;
+		}
+		if (!zone->should_recover)
+			continue;
+		buf = zone->buffer;
+		rcnt = info->read((char *)buf, zone->buffer_size + sizeof(*buf),
+				zone->off);
+		if (rcnt != zone->buffer_size + sizeof(*buf))
+			return (int)rcnt < 0 ? (int)rcnt : -EIO;
+	}
+	return 0;
+}
+
+static int psz_kmsg_recover_meta(struct psz_context *cxt)
+{
+	struct pstore_zone_info *info = cxt->pstore_zone_info;
+	struct pstore_zone *zone;
+	size_t rcnt, len;
+	struct psz_buffer *buf;
+	struct psz_kmsg_header *hdr;
+	struct timespec64 time = { };
+	unsigned long i;
+	/*
+	 * Recover may on panic, we can't allocate any memory by kmalloc.
+	 * So, we use local array instead.
+	 */
+	char buffer_header[sizeof(*buf) + sizeof(*hdr)] = {0};
+
+	if (!info->read)
+		return -EINVAL;
+
+	len = sizeof(*buf) + sizeof(*hdr);
+	buf = (struct psz_buffer *)buffer_header;
+	for (i = 0; i < cxt->kmsg_max_cnt; i++) {
+		zone = cxt->kpszs[i];
+		if (unlikely(!zone))
+			return -EINVAL;
+
+		rcnt = info->read((char *)buf, len, zone->off);
+		if (rcnt == -ENOMSG) {
+			pr_debug("%s with id %lu may be broken, skip\n",
+					zone->name, i);
+			continue;
+		} else if (rcnt != len) {
+			pr_err("read %s with id %lu failed\n", zone->name, i);
+			return (int)rcnt < 0 ? (int)rcnt : -EIO;
+		}
+
+		if (buf->sig != zone->buffer->sig) {
+			pr_debug("no valid data in kmsg dump zone %lu\n", i);
+			continue;
+		}
+
+		if (zone->buffer_size < atomic_read(&buf->datalen)) {
+			pr_info("found overtop zone: %s: id %lu, off %lld, size %zu\n",
+					zone->name, i, zone->off,
+					zone->buffer_size);
+			continue;
+		}
+
+		hdr = (struct psz_kmsg_header *)buf->data;
+		if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC) {
+			pr_info("found invalid zone: %s: id %lu, off %lld, size %zu\n",
+					zone->name, i, zone->off,
+					zone->buffer_size);
+			continue;
+		}
+
+		/*
+		 * we get the newest zone, and the next one must be the oldest
+		 * or unused zone, because we do write one by one like a circle.
+		 */
+		if (hdr->time.tv_sec >= time.tv_sec) {
+			time.tv_sec = hdr->time.tv_sec;
+			cxt->kmsg_write_cnt = (i + 1) % cxt->kmsg_max_cnt;
+		}
+
+		if (hdr->reason == KMSG_DUMP_OOPS)
+			cxt->oops_counter =
+				max(cxt->oops_counter, hdr->counter);
+		else if (hdr->reason == KMSG_DUMP_PANIC)
+			cxt->panic_counter =
+				max(cxt->panic_counter, hdr->counter);
+
+		if (!atomic_read(&buf->datalen)) {
+			pr_debug("found erased zone: %s: id %lu, off %lld, size %zu, datalen %d\n",
+					zone->name, i, zone->off,
+					zone->buffer_size,
+					atomic_read(&buf->datalen));
+			continue;
+		}
+
+		if (!is_on_panic())
+			zone->should_recover = true;
+		pr_debug("found nice zone: %s: id %lu, off %lld, size %zu, datalen %d\n",
+				zone->name, i, zone->off,
+				zone->buffer_size, atomic_read(&buf->datalen));
+	}
+
+	return 0;
+}
+
+static int psz_kmsg_recover(struct psz_context *cxt)
+{
+	int ret;
+
+	if (!cxt->kpszs)
+		return 0;
+
+	ret = psz_kmsg_recover_meta(cxt);
+	if (ret)
+		goto recover_fail;
+
+	ret = psz_kmsg_recover_data(cxt);
+	if (ret)
+		goto recover_fail;
+
+	return 0;
+recover_fail:
+	pr_debug("psz_recover_kmsg failed\n");
+	return ret;
+}
+
+static int psz_recover_zone(struct psz_context *cxt, struct pstore_zone *zone)
+{
+	struct pstore_zone_info *info = cxt->pstore_zone_info;
+	struct psz_buffer *oldbuf, tmpbuf;
+	int ret = 0;
+	char *buf;
+	ssize_t rcnt, len, start, off;
+
+	if (!zone || zone->oldbuf)
+		return 0;
+
+	if (is_on_panic()) {
+		/* save data as much as possible */
+		psz_flush_dirty_zone(zone);
+		return 0;
+	}
+
+	if (unlikely(!info->read))
+		return -EINVAL;
+
+	len = sizeof(struct psz_buffer);
+	rcnt = info->read((char *)&tmpbuf, len, zone->off);
+	if (rcnt != len) {
+		pr_debug("read zone %s failed\n", zone->name);
+		return (int)rcnt < 0 ? (int)rcnt : -EIO;
+	}
+
+	if (tmpbuf.sig != zone->buffer->sig) {
+		pr_debug("no valid data in zone %s\n", zone->name);
+		return 0;
+	}
+
+	if (zone->buffer_size < atomic_read(&tmpbuf.datalen) ||
+		zone->buffer_size < atomic_read(&tmpbuf.start)) {
+		pr_info("found overtop zone: %s: off %lld, size %zu\n",
+				zone->name, zone->off, zone->buffer_size);
+		/* just keep going */
+		return 0;
+	}
+
+	if (!atomic_read(&tmpbuf.datalen)) {
+		pr_debug("found erased zone: %s: off %lld, size %zu, datalen %d\n",
+				zone->name, zone->off, zone->buffer_size,
+				atomic_read(&tmpbuf.datalen));
+		return 0;
+	}
+
+	pr_debug("found nice zone: %s: off %lld, size %zu, datalen %d\n",
+			zone->name, zone->off, zone->buffer_size,
+			atomic_read(&tmpbuf.datalen));
+
+	len = atomic_read(&tmpbuf.datalen) + sizeof(*oldbuf);
+	oldbuf = kzalloc(len, GFP_KERNEL);
+	if (!oldbuf)
+		return -ENOMEM;
+
+	memcpy(oldbuf, &tmpbuf, sizeof(*oldbuf));
+	buf = (char *)oldbuf + sizeof(*oldbuf);
+	len = atomic_read(&oldbuf->datalen);
+	start = atomic_read(&oldbuf->start);
+	off = zone->off + sizeof(*oldbuf);
+
+	/* get part of data */
+	rcnt = info->read(buf, len - start, off + start);
+	if (rcnt != len - start) {
+		pr_err("read zone %s failed\n", zone->name);
+		ret = (int)rcnt < 0 ? (int)rcnt : -EIO;
+		goto free_oldbuf;
+	}
+
+	/* get the rest of data */
+	rcnt = info->read(buf + len - start, start, off);
+	if (rcnt != start) {
+		pr_err("read zone %s failed\n", zone->name);
+		ret = (int)rcnt < 0 ? (int)rcnt : -EIO;
+		goto free_oldbuf;
+	}
+
+	zone->oldbuf = oldbuf;
+	psz_flush_dirty_zone(zone);
+	return 0;
+
+free_oldbuf:
+	kfree(oldbuf);
+	return ret;
+}
+
+static int psz_recover_zones(struct psz_context *cxt,
+		struct pstore_zone **zones, unsigned int cnt)
+{
+	int ret;
+	unsigned int i;
+	struct pstore_zone *zone;
+
+	if (!zones)
+		return 0;
+
+	for (i = 0; i < cnt; i++) {
+		zone = zones[i];
+		if (unlikely(!zone))
+			continue;
+		ret = psz_recover_zone(cxt, zone);
+		if (ret)
+			goto recover_fail;
+	}
+
+	return 0;
+recover_fail:
+	pr_debug("recover %s[%u] failed\n", zone->name, i);
+	return ret;
+}
+
+/**
+ * psz_recovery() - recover data from storage
+ * @cxt: the context of pstore/zone
+ *
+ * recovery means reading data back from storage after rebooting
+ *
+ * Return: 0 on success, others on failure.
+ */
+static inline int psz_recovery(struct psz_context *cxt)
+{
+	int ret;
+
+	if (atomic_read(&cxt->recovered))
+		return 0;
+
+	ret = psz_kmsg_recover(cxt);
+	if (ret)
+		goto out;
+
+	ret = psz_recover_zone(cxt, cxt->ppsz);
+	if (ret)
+		goto out;
+
+	ret = psz_recover_zone(cxt, cxt->cpsz);
+	if (ret)
+		goto out;
+
+	ret = psz_recover_zones(cxt, cxt->fpszs, cxt->ftrace_max_cnt);
+
+out:
+	if (unlikely(ret))
+		pr_err("recover failed\n");
+	else {
+		pr_debug("recover end!\n");
+		atomic_set(&cxt->recovered, 1);
+	}
+	return ret;
+}
+
+static int psz_pstore_open(struct pstore_info *psi)
+{
+	struct psz_context *cxt = psi->data;
+
+	cxt->kmsg_read_cnt = 0;
+	cxt->pmsg_read_cnt = 0;
+	cxt->console_read_cnt = 0;
+	cxt->ftrace_read_cnt = 0;
+	return 0;
+}
+
+static inline bool psz_old_ok(struct pstore_zone *zone)
+{
+	if (zone && zone->oldbuf && atomic_read(&zone->oldbuf->datalen))
+		return true;
+	return false;
+}
+
+static inline bool psz_ok(struct pstore_zone *zone)
+{
+	if (zone && zone->buffer && buffer_datalen(zone))
+		return true;
+	return false;
+}
+
+static inline int psz_kmsg_erase(struct psz_context *cxt,
+		struct pstore_zone *zone, struct pstore_record *record)
+{
+	struct psz_buffer *buffer = zone->buffer;
+	struct psz_kmsg_header *hdr =
+		(struct psz_kmsg_header *)buffer->data;
+	size_t size;
+
+	if (unlikely(!psz_ok(zone)))
+		return 0;
+
+	/* this zone is already updated, no need to erase */
+	if (record->count != hdr->counter)
+		return 0;
+
+	size = buffer_datalen(zone) + sizeof(*zone->buffer);
+	atomic_set(&zone->buffer->datalen, 0);
+	if (cxt->pstore_zone_info->erase)
+		return cxt->pstore_zone_info->erase(size, zone->off);
+	else
+		return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
+}
+
+static inline int psz_record_erase(struct psz_context *cxt,
+		struct pstore_zone *zone)
+{
+	if (unlikely(!psz_old_ok(zone)))
+		return 0;
+
+	kfree(zone->oldbuf);
+	zone->oldbuf = NULL;
+	/*
+	 * if there are new data in zone buffer, that means the old data
+	 * are already invalid. It is no need to flush 0 (erase) to
+	 * block device.
+	 */
+	if (!buffer_datalen(zone))
+		return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
+	psz_flush_dirty_zone(zone);
+	return 0;
+}
+
+static int psz_pstore_erase(struct pstore_record *record)
+{
+	struct psz_context *cxt = record->psi->data;
+
+	switch (record->type) {
+	case PSTORE_TYPE_DMESG:
+		if (record->id >= cxt->kmsg_max_cnt)
+			return -EINVAL;
+		return psz_kmsg_erase(cxt, cxt->kpszs[record->id], record);
+	case PSTORE_TYPE_PMSG:
+		return psz_record_erase(cxt, cxt->ppsz);
+	case PSTORE_TYPE_CONSOLE:
+		return psz_record_erase(cxt, cxt->cpsz);
+	case PSTORE_TYPE_FTRACE:
+		if (record->id >= cxt->ftrace_max_cnt)
+			return -EINVAL;
+		return psz_record_erase(cxt, cxt->fpszs[record->id]);
+	default: return -EINVAL;
+	}
+}
+
+static void psz_write_kmsg_hdr(struct pstore_zone *zone,
+		struct pstore_record *record)
+{
+	struct psz_context *cxt = record->psi->data;
+	struct psz_buffer *buffer = zone->buffer;
+	struct psz_kmsg_header *hdr =
+		(struct psz_kmsg_header *)buffer->data;
+
+	hdr->magic = PSTORE_KMSG_HEADER_MAGIC;
+	hdr->compressed = record->compressed;
+	hdr->time.tv_sec = record->time.tv_sec;
+	hdr->time.tv_nsec = record->time.tv_nsec;
+	hdr->reason = record->reason;
+	if (hdr->reason == KMSG_DUMP_OOPS)
+		hdr->counter = ++cxt->oops_counter;
+	else if (hdr->reason == KMSG_DUMP_PANIC)
+		hdr->counter = ++cxt->panic_counter;
+	else
+		hdr->counter = 0;
+}
+
+/*
+ * In case zone is broken, which may occur to MTD device, we try each zones,
+ * start at cxt->kmsg_write_cnt.
+ */
+static inline int notrace psz_kmsg_write_record(struct psz_context *cxt,
+		struct pstore_record *record)
+{
+	size_t size, hlen;
+	struct pstore_zone *zone;
+	unsigned int i;
+
+	for (i = 0; i < cxt->kmsg_max_cnt; i++) {
+		unsigned int zonenum, len;
+		int ret;
+
+		zonenum = (cxt->kmsg_write_cnt + i) % cxt->kmsg_max_cnt;
+		zone = cxt->kpszs[zonenum];
+		if (unlikely(!zone))
+			return -ENOSPC;
+
+		/* avoid destroying old data, allocate a new one */
+		len = zone->buffer_size + sizeof(*zone->buffer);
+		zone->oldbuf = zone->buffer;
+		zone->buffer = kzalloc(len, GFP_KERNEL);
+		if (!zone->buffer) {
+			zone->buffer = zone->oldbuf;
+			return -ENOMEM;
+		}
+		zone->buffer->sig = zone->oldbuf->sig;
+
+		pr_debug("write %s to zone id %d\n", zone->name, zonenum);
+		psz_write_kmsg_hdr(zone, record);
+		hlen = sizeof(struct psz_kmsg_header);
+		size = min_t(size_t, record->size, zone->buffer_size - hlen);
+		ret = psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen);
+		if (likely(!ret || ret != -ENOMSG)) {
+			cxt->kmsg_write_cnt = zonenum + 1;
+			cxt->kmsg_write_cnt %= cxt->kmsg_max_cnt;
+			/* no need to try next zone, free last zone buffer */
+			kfree(zone->oldbuf);
+			zone->oldbuf = NULL;
+			return ret;
+		}
+
+		pr_debug("zone %u may be broken, try next dmesg zone\n",
+				zonenum);
+		kfree(zone->buffer);
+		zone->buffer = zone->oldbuf;
+		zone->oldbuf = NULL;
+	}
+
+	return -EBUSY;
+}
+
+static int notrace psz_kmsg_write(struct psz_context *cxt,
+		struct pstore_record *record)
+{
+	int ret;
+
+	/*
+	 * Explicitly only take the first part of any new crash.
+	 * If our buffer is larger than kmsg_bytes, this can never happen,
+	 * and if our buffer is smaller than kmsg_bytes, we don't want the
+	 * report split across multiple records.
+	 */
+	if (record->part != 1)
+		return -ENOSPC;
+
+	if (!cxt->kpszs)
+		return -ENOSPC;
+
+	ret = psz_kmsg_write_record(cxt, record);
+	if (!ret && is_on_panic()) {
+		/* ensure all data are flushed to storage when panic */
+		pr_debug("try to flush other dirty zones\n");
+		psz_flush_all_dirty_zones(NULL);
+	}
+
+	/* always return 0 as we had handled it on buffer */
+	return 0;
+}
+
+static int notrace psz_record_write(struct pstore_zone *zone,
+		struct pstore_record *record)
+{
+	size_t start, rem;
+	bool is_full_data = false;
+	char *buf;
+	int cnt;
+
+	if (!zone || !record)
+		return -ENOSPC;
+
+	if (atomic_read(&zone->buffer->datalen) >= zone->buffer_size)
+		is_full_data = true;
+
+	cnt = record->size;
+	buf = record->buf;
+	if (unlikely(cnt > zone->buffer_size)) {
+		buf += cnt - zone->buffer_size;
+		cnt = zone->buffer_size;
+	}
+
+	start = buffer_start(zone);
+	rem = zone->buffer_size - start;
+	if (unlikely(rem < cnt)) {
+		psz_zone_write(zone, FLUSH_PART, buf, rem, start);
+		buf += rem;
+		cnt -= rem;
+		start = 0;
+		is_full_data = true;
+	}
+
+	atomic_set(&zone->buffer->start, cnt + start);
+	psz_zone_write(zone, FLUSH_PART, buf, cnt, start);
+
+	/**
+	 * psz_zone_write will set datalen as start + cnt.
+	 * It work if actual data length lesser than buffer size.
+	 * If data length greater than buffer size, pmsg will rewrite to
+	 * beginning of zone, which make buffer->datalen wrongly.
+	 * So we should reset datalen as buffer size once actual data length
+	 * greater than buffer size.
+	 */
+	if (is_full_data) {
+		atomic_set(&zone->buffer->datalen, zone->buffer_size);
+		psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
+	}
+	return 0;
+}
+
+static int notrace psz_pstore_write(struct pstore_record *record)
+{
+	struct psz_context *cxt = record->psi->data;
+
+	if (record->type == PSTORE_TYPE_DMESG &&
+			record->reason == KMSG_DUMP_PANIC)
+		atomic_set(&cxt->on_panic, 1);
+
+	/*
+	 * if on panic, do not write except panic records
+	 * Fix case that panic_write prints log which wakes up console backend.
+	 */
+	if (is_on_panic() && record->type != PSTORE_TYPE_DMESG)
+		return -EBUSY;
+
+	switch (record->type) {
+	case PSTORE_TYPE_DMESG:
+		return psz_kmsg_write(cxt, record);
+	case PSTORE_TYPE_CONSOLE:
+		return psz_record_write(cxt->cpsz, record);
+	case PSTORE_TYPE_PMSG:
+		return psz_record_write(cxt->ppsz, record);
+	case PSTORE_TYPE_FTRACE: {
+		int zonenum = smp_processor_id();
+
+		if (!cxt->fpszs)
+			return -ENOSPC;
+		return psz_record_write(cxt->fpszs[zonenum], record);
+	}
+	default:
+		return -EINVAL;
+	}
+}
+
+static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt)
+{
+	struct pstore_zone *zone = NULL;
+
+	while (cxt->kmsg_read_cnt < cxt->kmsg_max_cnt) {
+		zone = cxt->kpszs[cxt->kmsg_read_cnt++];
+		if (psz_ok(zone))
+			return zone;
+	}
+
+	if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt)
+		/*
+		 * No need psz_old_ok(). Let psz_ftrace_read() do so for
+		 * combination. psz_ftrace_read() should traverse over
+		 * all zones in case of some zone without data.
+		 */
+		return cxt->fpszs[cxt->ftrace_read_cnt++];
+
+	if (cxt->pmsg_read_cnt == 0) {
+		cxt->pmsg_read_cnt++;
+		zone = cxt->ppsz;
+		if (psz_old_ok(zone))
+			return zone;
+	}
+
+	if (cxt->console_read_cnt == 0) {
+		cxt->console_read_cnt++;
+		zone = cxt->cpsz;
+		if (psz_old_ok(zone))
+			return zone;
+	}
+
+	return NULL;
+}
+
+static int psz_kmsg_read_hdr(struct pstore_zone *zone,
+		struct pstore_record *record)
+{
+	struct psz_buffer *buffer = zone->buffer;
+	struct psz_kmsg_header *hdr =
+		(struct psz_kmsg_header *)buffer->data;
+
+	if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC)
+		return -EINVAL;
+	record->compressed = hdr->compressed;
+	record->time.tv_sec = hdr->time.tv_sec;
+	record->time.tv_nsec = hdr->time.tv_nsec;
+	record->reason = hdr->reason;
+	record->count = hdr->counter;
+	return 0;
+}
+
+static ssize_t psz_kmsg_read(struct pstore_zone *zone,
+		struct pstore_record *record)
+{
+	ssize_t size, hlen = 0;
+
+	size = buffer_datalen(zone);
+	/* Clear and skip this kmsg dump record if it has no valid header */
+	if (psz_kmsg_read_hdr(zone, record)) {
+		atomic_set(&zone->buffer->datalen, 0);
+		atomic_set(&zone->dirty, 0);
+		return -ENOMSG;
+	}
+	size -= sizeof(struct psz_kmsg_header);
+
+	if (!record->compressed) {
+		char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n",
+				      kmsg_dump_reason_str(record->reason),
+				      record->count);
+		hlen = strlen(buf);
+		record->buf = krealloc(buf, hlen + size, GFP_KERNEL);
+		if (!record->buf) {
+			kfree(buf);
+			return -ENOMEM;
+		}
+	} else {
+		record->buf = kmalloc(size, GFP_KERNEL);
+		if (!record->buf)
+			return -ENOMEM;
+	}
+
+	size = psz_zone_read_buffer(zone, record->buf + hlen, size,
+			sizeof(struct psz_kmsg_header));
+	if (unlikely(size < 0)) {
+		kfree(record->buf);
+		return -ENOMSG;
+	}
+
+	return size + hlen;
+}
+
+/* try to combine all ftrace zones */
+static ssize_t psz_ftrace_read(struct pstore_zone *zone,
+		struct pstore_record *record)
+{
+	struct psz_context *cxt;
+	struct psz_buffer *buf;
+	int ret;
+
+	if (!zone || !record)
+		return -ENOSPC;
+
+	if (!psz_old_ok(zone))
+		goto out;
+
+	buf = (struct psz_buffer *)zone->oldbuf;
+	if (!buf)
+		return -ENOMSG;
+
+	ret = pstore_ftrace_combine_log(&record->buf, &record->size,
+			(char *)buf->data, atomic_read(&buf->datalen));
+	if (unlikely(ret))
+		return ret;
+
+out:
+	cxt = record->psi->data;
+	if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt)
+		/* then, read next ftrace zone */
+		return -ENOMSG;
+	record->id = 0;
+	return record->size ? record->size : -ENOMSG;
+}
+
+static ssize_t psz_record_read(struct pstore_zone *zone,
+		struct pstore_record *record)
+{
+	size_t len;
+	struct psz_buffer *buf;
+
+	if (!zone || !record)
+		return -ENOSPC;
+
+	buf = (struct psz_buffer *)zone->oldbuf;
+	if (!buf)
+		return -ENOMSG;
+
+	len = atomic_read(&buf->datalen);
+	record->buf = kmalloc(len, GFP_KERNEL);
+	if (!record->buf)
+		return -ENOMEM;
+
+	if (unlikely(psz_zone_read_oldbuf(zone, record->buf, len, 0))) {
+		kfree(record->buf);
+		return -ENOMSG;
+	}
+
+	return len;
+}
+
+static ssize_t psz_pstore_read(struct pstore_record *record)
+{
+	struct psz_context *cxt = record->psi->data;
+	ssize_t (*readop)(struct pstore_zone *zone,
+			struct pstore_record *record);
+	struct pstore_zone *zone;
+	ssize_t ret;
+
+	/* before read, we must recover from storage */
+	ret = psz_recovery(cxt);
+	if (ret)
+		return ret;
+
+next_zone:
+	zone = psz_read_next_zone(cxt);
+	if (!zone)
+		return 0;
+
+	record->type = zone->type;
+	switch (record->type) {
+	case PSTORE_TYPE_DMESG:
+		readop = psz_kmsg_read;
+		record->id = cxt->kmsg_read_cnt - 1;
+		break;
+	case PSTORE_TYPE_FTRACE:
+		readop = psz_ftrace_read;
+		break;
+	case PSTORE_TYPE_CONSOLE:
+		fallthrough;
+	case PSTORE_TYPE_PMSG:
+		readop = psz_record_read;
+		break;
+	default:
+		goto next_zone;
+	}
+
+	ret = readop(zone, record);
+	if (ret == -ENOMSG)
+		goto next_zone;
+	return ret;
+}
+
+static struct psz_context pstore_zone_cxt = {
+	.pstore_zone_info_lock =
+		__MUTEX_INITIALIZER(pstore_zone_cxt.pstore_zone_info_lock),
+	.recovered = ATOMIC_INIT(0),
+	.on_panic = ATOMIC_INIT(0),
+	.pstore = {
+		.owner = THIS_MODULE,
+		.open = psz_pstore_open,
+		.read = psz_pstore_read,
+		.write = psz_pstore_write,
+		.erase = psz_pstore_erase,
+	},
+};
+
+static void psz_free_zone(struct pstore_zone **pszone)
+{
+	struct pstore_zone *zone = *pszone;
+
+	if (!zone)
+		return;
+
+	kfree(zone->buffer);
+	kfree(zone);
+	*pszone = NULL;
+}
+
+static void psz_free_zones(struct pstore_zone ***pszones, unsigned int *cnt)
+{
+	struct pstore_zone **zones = *pszones;
+
+	if (!zones)
+		return;
+
+	while (*cnt > 0) {
+		(*cnt)--;
+		psz_free_zone(&(zones[*cnt]));
+	}
+	kfree(zones);
+	*pszones = NULL;
+}
+
+static void psz_free_all_zones(struct psz_context *cxt)
+{
+	if (cxt->kpszs)
+		psz_free_zones(&cxt->kpszs, &cxt->kmsg_max_cnt);
+	if (cxt->ppsz)
+		psz_free_zone(&cxt->ppsz);
+	if (cxt->cpsz)
+		psz_free_zone(&cxt->cpsz);
+	if (cxt->fpszs)
+		psz_free_zones(&cxt->fpszs, &cxt->ftrace_max_cnt);
+}
+
+static struct pstore_zone *psz_init_zone(enum pstore_type_id type,
+		loff_t *off, size_t size)
+{
+	struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info;
+	struct pstore_zone *zone;
+	const char *name = pstore_type_to_name(type);
+
+	if (!size)
+		return NULL;
+
+	if (*off + size > info->total_size) {
+		pr_err("no room for %s (0x%zx@0x%llx over 0x%lx)\n",
+			name, size, *off, info->total_size);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	zone = kzalloc(sizeof(struct pstore_zone), GFP_KERNEL);
+	if (!zone)
+		return ERR_PTR(-ENOMEM);
+
+	zone->buffer = kmalloc(size, GFP_KERNEL);
+	if (!zone->buffer) {
+		kfree(zone);
+		return ERR_PTR(-ENOMEM);
+	}
+	memset(zone->buffer, 0xFF, size);
+	zone->off = *off;
+	zone->name = name;
+	zone->type = type;
+	zone->buffer_size = size - sizeof(struct psz_buffer);
+	zone->buffer->sig = type ^ PSZ_SIG;
+	zone->oldbuf = NULL;
+	atomic_set(&zone->dirty, 0);
+	atomic_set(&zone->buffer->datalen, 0);
+	atomic_set(&zone->buffer->start, 0);
+
+	*off += size;
+
+	pr_debug("pszone %s: off 0x%llx, %zu header, %zu data\n", zone->name,
+			zone->off, sizeof(*zone->buffer), zone->buffer_size);
+	return zone;
+}
+
+static struct pstore_zone **psz_init_zones(enum pstore_type_id type,
+	loff_t *off, size_t total_size, ssize_t record_size,
+	unsigned int *cnt)
+{
+	struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info;
+	struct pstore_zone **zones, *zone;
+	const char *name = pstore_type_to_name(type);
+	int c, i;
+
+	*cnt = 0;
+	if (!total_size || !record_size)
+		return NULL;
+
+	if (*off + total_size > info->total_size) {
+		pr_err("no room for zones %s (0x%zx@0x%llx over 0x%lx)\n",
+			name, total_size, *off, info->total_size);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	c = total_size / record_size;
+	zones = kcalloc(c, sizeof(*zones), GFP_KERNEL);
+	if (!zones) {
+		pr_err("allocate for zones %s failed\n", name);
+		return ERR_PTR(-ENOMEM);
+	}
+	memset(zones, 0, c * sizeof(*zones));
+
+	for (i = 0; i < c; i++) {
+		zone = psz_init_zone(type, off, record_size);
+		if (!zone || IS_ERR(zone)) {
+			pr_err("initialize zones %s failed\n", name);
+			psz_free_zones(&zones, &i);
+			return (void *)zone;
+		}
+		zones[i] = zone;
+	}
+
+	*cnt = c;
+	return zones;
+}
+
+static int psz_alloc_zones(struct psz_context *cxt)
+{
+	struct pstore_zone_info *info = cxt->pstore_zone_info;
+	loff_t off = 0;
+	int err;
+	size_t off_size = 0;
+
+	off_size += info->pmsg_size;
+	cxt->ppsz = psz_init_zone(PSTORE_TYPE_PMSG, &off, info->pmsg_size);
+	if (IS_ERR(cxt->ppsz)) {
+		err = PTR_ERR(cxt->ppsz);
+		cxt->ppsz = NULL;
+		goto free_out;
+	}
+
+	off_size += info->console_size;
+	cxt->cpsz = psz_init_zone(PSTORE_TYPE_CONSOLE, &off,
+			info->console_size);
+	if (IS_ERR(cxt->cpsz)) {
+		err = PTR_ERR(cxt->cpsz);
+		cxt->cpsz = NULL;
+		goto free_out;
+	}
+
+	off_size += info->ftrace_size;
+	cxt->fpszs = psz_init_zones(PSTORE_TYPE_FTRACE, &off,
+			info->ftrace_size,
+			info->ftrace_size / nr_cpu_ids,
+			&cxt->ftrace_max_cnt);
+	if (IS_ERR(cxt->fpszs)) {
+		err = PTR_ERR(cxt->fpszs);
+		cxt->fpszs = NULL;
+		goto free_out;
+	}
+
+	cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off,
+			info->total_size - off_size,
+			info->kmsg_size, &cxt->kmsg_max_cnt);
+	if (IS_ERR(cxt->kpszs)) {
+		err = PTR_ERR(cxt->kpszs);
+		cxt->kpszs = NULL;
+		goto free_out;
+	}
+
+	return 0;
+free_out:
+	psz_free_all_zones(cxt);
+	return err;
+}
+
+/**
+ * register_pstore_zone() - register to pstore/zone
+ *
+ * @info: back-end driver information. See &struct pstore_zone_info.
+ *
+ * Only one back-end at one time.
+ *
+ * Return: 0 on success, others on failure.
+ */
+int register_pstore_zone(struct pstore_zone_info *info)
+{
+	int err = -EINVAL;
+	struct psz_context *cxt = &pstore_zone_cxt;
+
+	if (info->total_size < 4096) {
+		pr_warn("total_size must be >= 4096\n");
+		return -EINVAL;
+	}
+
+	if (!info->kmsg_size && !info->pmsg_size && !info->console_size &&
+	    !info->ftrace_size) {
+		pr_warn("at least one record size must be non-zero\n");
+		return -EINVAL;
+	}
+
+	if (!info->name || !info->name[0])
+		return -EINVAL;
+
+#define check_size(name, size) {					\
+		if (info->name > 0 && info->name < (size)) {		\
+			pr_err(#name " must be over %d\n", (size));	\
+			return -EINVAL;					\
+		}							\
+		if (info->name & (size - 1)) {				\
+			pr_err(#name " must be a multiple of %d\n",	\
+					(size));			\
+			return -EINVAL;					\
+		}							\
+	}
+
+	check_size(total_size, 4096);
+	check_size(kmsg_size, SECTOR_SIZE);
+	check_size(pmsg_size, SECTOR_SIZE);
+	check_size(console_size, SECTOR_SIZE);
+	check_size(ftrace_size, SECTOR_SIZE);
+
+#undef check_size
+
+	/*
+	 * the @read and @write must be applied.
+	 * if no @read, pstore may mount failed.
+	 * if no @write, pstore do not support to remove record file.
+	 */
+	if (!info->read || !info->write) {
+		pr_err("no valid general read/write interface\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&cxt->pstore_zone_info_lock);
+	if (cxt->pstore_zone_info) {
+		pr_warn("'%s' already loaded: ignoring '%s'\n",
+				cxt->pstore_zone_info->name, info->name);
+		mutex_unlock(&cxt->pstore_zone_info_lock);
+		return -EBUSY;
+	}
+	cxt->pstore_zone_info = info;
+
+	pr_debug("register %s with properties:\n", info->name);
+	pr_debug("\ttotal size : %ld Bytes\n", info->total_size);
+	pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size);
+	pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size);
+	pr_debug("\tconsole size : %ld Bytes\n", info->console_size);
+	pr_debug("\tftrace size : %ld Bytes\n", info->ftrace_size);
+
+	err = psz_alloc_zones(cxt);
+	if (err) {
+		pr_err("alloc zones failed\n");
+		goto fail_out;
+	}
+
+	if (info->kmsg_size) {
+		cxt->pstore.bufsize = cxt->kpszs[0]->buffer_size -
+			sizeof(struct psz_kmsg_header);
+		cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL);
+		if (!cxt->pstore.buf) {
+			err = -ENOMEM;
+			goto fail_free;
+		}
+	}
+	cxt->pstore.data = cxt;
+
+	pr_info("registered %s as backend for", info->name);
+	cxt->pstore.max_reason = info->max_reason;
+	cxt->pstore.name = info->name;
+	if (info->kmsg_size) {
+		cxt->pstore.flags |= PSTORE_FLAGS_DMESG;
+		pr_cont(" kmsg(%s",
+			kmsg_dump_reason_str(cxt->pstore.max_reason));
+		if (cxt->pstore_zone_info->panic_write)
+			pr_cont(",panic_write");
+		pr_cont(")");
+	}
+	if (info->pmsg_size) {
+		cxt->pstore.flags |= PSTORE_FLAGS_PMSG;
+		pr_cont(" pmsg");
+	}
+	if (info->console_size) {
+		cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE;
+		pr_cont(" console");
+	}
+	if (info->ftrace_size) {
+		cxt->pstore.flags |= PSTORE_FLAGS_FTRACE;
+		pr_cont(" ftrace");
+	}
+	pr_cont("\n");
+
+	err = pstore_register(&cxt->pstore);
+	if (err) {
+		pr_err("registering with pstore failed\n");
+		goto fail_free;
+	}
+	mutex_unlock(&pstore_zone_cxt.pstore_zone_info_lock);
+
+	return 0;
+
+fail_free:
+	kfree(cxt->pstore.buf);
+	cxt->pstore.buf = NULL;
+	cxt->pstore.bufsize = 0;
+	psz_free_all_zones(cxt);
+fail_out:
+	pstore_zone_cxt.pstore_zone_info = NULL;
+	mutex_unlock(&pstore_zone_cxt.pstore_zone_info_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(register_pstore_zone);
+
+/**
+ * unregister_pstore_zone() - unregister to pstore/zone
+ *
+ * @info: back-end driver information. See struct pstore_zone_info.
+ */
+void unregister_pstore_zone(struct pstore_zone_info *info)
+{
+	struct psz_context *cxt = &pstore_zone_cxt;
+
+	mutex_lock(&cxt->pstore_zone_info_lock);
+	if (!cxt->pstore_zone_info) {
+		mutex_unlock(&cxt->pstore_zone_info_lock);
+		return;
+	}
+
+	/* Stop incoming writes from pstore. */
+	pstore_unregister(&cxt->pstore);
+
+	/* Flush any pending writes. */
+	psz_flush_all_dirty_zones(NULL);
+	flush_delayed_work(&psz_cleaner);
+
+	/* Clean up allocations. */
+	kfree(cxt->pstore.buf);
+	cxt->pstore.buf = NULL;
+	cxt->pstore.bufsize = 0;
+	cxt->pstore_zone_info = NULL;
+
+	psz_free_all_zones(cxt);
+
+	/* Clear counters and zone state. */
+	cxt->oops_counter = 0;
+	cxt->panic_counter = 0;
+	atomic_set(&cxt->recovered, 0);
+	atomic_set(&cxt->on_panic, 0);
+
+	mutex_unlock(&cxt->pstore_zone_info_lock);
+}
+EXPORT_SYMBOL_GPL(unregister_pstore_zone);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("WeiXiong Liao <liaoweixiong@allwinnertech.com>");
+MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
+MODULE_DESCRIPTION("Storage Manager for pstore/blk");
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 345db56c98fd..755293c8c71a 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -99,10 +99,9 @@ static int qnx6_readpage(struct file *file, struct page *page)
 	return mpage_readpage(page, qnx6_get_block);
 }
 
-static int qnx6_readpages(struct file *file, struct address_space *mapping,
-		   struct list_head *pages, unsigned nr_pages)
+static void qnx6_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, qnx6_get_block);
+	mpage_readahead(rac, qnx6_get_block);
 }
 
 /*
@@ -499,7 +498,7 @@ static sector_t qnx6_bmap(struct address_space *mapping, sector_t block)
 }
 static const struct address_space_operations qnx6_aops = {
 	.readpage	= qnx6_readpage,
-	.readpages	= qnx6_readpages,
+	.readahead	= qnx6_readahead,
 	.bmap		= qnx6_bmap
 };
 
diff --git a/fs/readdir.c b/fs/readdir.c
index de2eceffdee8..a49f07c11cfb 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -157,17 +157,18 @@ static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
 	}
 	buf->result++;
 	dirent = buf->dirent;
-	if (!access_ok(dirent,
+	if (!user_write_access_begin(dirent,
 			(unsigned long)(dirent->d_name + namlen + 1) -
 				(unsigned long)dirent))
 		goto efault;
-	if (	__put_user(d_ino, &dirent->d_ino) ||
-		__put_user(offset, &dirent->d_offset) ||
-		__put_user(namlen, &dirent->d_namlen) ||
-		__copy_to_user(dirent->d_name, name, namlen) ||
-		__put_user(0, dirent->d_name + namlen))
-		goto efault;
+	unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
+	unsafe_put_user(offset, &dirent->d_offset, efault_end);
+	unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
+	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
+	user_write_access_end();
 	return 0;
+efault_end:
+	user_write_access_end();
 efault:
 	buf->result = -EFAULT;
 	return -EFAULT;
@@ -242,7 +243,7 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
 		return -EINTR;
 	dirent = buf->current_dir;
 	prev = (void __user *) dirent - prev_reclen;
-	if (!user_access_begin(prev, reclen + prev_reclen))
+	if (!user_write_access_begin(prev, reclen + prev_reclen))
 		goto efault;
 
 	/* This might be 'dirent->d_off', but if so it will get overwritten */
@@ -251,14 +252,14 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
 	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
 	unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
-	user_access_end();
+	user_write_access_end();
 
 	buf->current_dir = (void __user *)dirent + reclen;
 	buf->prev_reclen = reclen;
 	buf->count -= reclen;
 	return 0;
 efault_end:
-	user_access_end();
+	user_write_access_end();
 efault:
 	buf->error = -EFAULT;
 	return -EFAULT;
@@ -275,9 +276,6 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
 	};
 	int error;
 
-	if (!access_ok(dirent, count))
-		return -EFAULT;
-
 	f = fdget_pos(fd);
 	if (!f.file)
 		return -EBADF;
@@ -327,7 +325,7 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 		return -EINTR;
 	dirent = buf->current_dir;
 	prev = (void __user *)dirent - prev_reclen;
-	if (!user_access_begin(prev, reclen + prev_reclen))
+	if (!user_write_access_begin(prev, reclen + prev_reclen))
 		goto efault;
 
 	/* This might be 'dirent->d_off', but if so it will get overwritten */
@@ -336,7 +334,7 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
 	unsafe_put_user(d_type, &dirent->d_type, efault_end);
 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
-	user_access_end();
+	user_write_access_end();
 
 	buf->prev_reclen = reclen;
 	buf->current_dir = (void __user *)dirent + reclen;
@@ -344,7 +342,7 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 	return 0;
 
 efault_end:
-	user_access_end();
+	user_write_access_end();
 efault:
 	buf->error = -EFAULT;
 	return -EFAULT;
@@ -361,9 +359,6 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
 	};
 	int error;
 
-	if (!access_ok(dirent, count))
-		return -EFAULT;
-
 	f = fdget_pos(fd);
 	if (!f.file)
 		return -EBADF;
@@ -376,7 +371,7 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
 		typeof(lastdirent->d_off) d_off = buf.ctx.pos;
 
 		lastdirent = (void __user *) buf.current_dir - buf.prev_reclen;
-		if (__put_user(d_off, &lastdirent->d_off))
+		if (put_user(d_off, &lastdirent->d_off))
 			error = -EFAULT;
 		else
 			error = count - buf.count;
@@ -424,17 +419,18 @@ static int compat_fillonedir(struct dir_context *ctx, const char *name,
 	}
 	buf->result++;
 	dirent = buf->dirent;
-	if (!access_ok(dirent,
+	if (!user_write_access_begin(dirent,
 			(unsigned long)(dirent->d_name + namlen + 1) -
 				(unsigned long)dirent))
 		goto efault;
-	if (	__put_user(d_ino, &dirent->d_ino) ||
-		__put_user(offset, &dirent->d_offset) ||
-		__put_user(namlen, &dirent->d_namlen) ||
-		__copy_to_user(dirent->d_name, name, namlen) ||
-		__put_user(0, dirent->d_name + namlen))
-		goto efault;
+	unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
+	unsafe_put_user(offset, &dirent->d_offset, efault_end);
+	unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
+	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
+	user_write_access_end();
 	return 0;
+efault_end:
+	user_write_access_end();
 efault:
 	buf->result = -EFAULT;
 	return -EFAULT;
@@ -471,7 +467,7 @@ struct compat_linux_dirent {
 struct compat_getdents_callback {
 	struct dir_context ctx;
 	struct compat_linux_dirent __user *current_dir;
-	struct compat_linux_dirent __user *previous;
+	int prev_reclen;
 	int count;
 	int error;
 };
@@ -479,13 +475,17 @@ struct compat_getdents_callback {
 static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
 		loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct compat_linux_dirent __user * dirent;
+	struct compat_linux_dirent __user *dirent, *prev;
 	struct compat_getdents_callback *buf =
 		container_of(ctx, struct compat_getdents_callback, ctx);
 	compat_ulong_t d_ino;
 	int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
 		namlen + 2, sizeof(compat_long_t));
+	int prev_reclen;
 
+	buf->error = verify_dirent_name(name, namlen);
+	if (unlikely(buf->error))
+		return buf->error;
 	buf->error = -EINVAL;	/* only used if we fail.. */
 	if (reclen > buf->count)
 		return -EINVAL;
@@ -494,29 +494,27 @@ static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
 		buf->error = -EOVERFLOW;
 		return -EOVERFLOW;
 	}
-	dirent = buf->previous;
-	if (dirent) {
-		if (signal_pending(current))
-			return -EINTR;
-		if (__put_user(offset, &dirent->d_off))
-			goto efault;
-	}
+	prev_reclen = buf->prev_reclen;
+	if (prev_reclen && signal_pending(current))
+		return -EINTR;
 	dirent = buf->current_dir;
-	if (__put_user(d_ino, &dirent->d_ino))
-		goto efault;
-	if (__put_user(reclen, &dirent->d_reclen))
-		goto efault;
-	if (copy_to_user(dirent->d_name, name, namlen))
-		goto efault;
-	if (__put_user(0, dirent->d_name + namlen))
-		goto efault;
-	if (__put_user(d_type, (char  __user *) dirent + reclen - 1))
+	prev = (void __user *) dirent - prev_reclen;
+	if (!user_write_access_begin(prev, reclen + prev_reclen))
 		goto efault;
-	buf->previous = dirent;
-	dirent = (void __user *)dirent + reclen;
-	buf->current_dir = dirent;
+
+	unsafe_put_user(offset, &prev->d_off, efault_end);
+	unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
+	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
+	unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
+	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
+	user_write_access_end();
+
+	buf->prev_reclen = reclen;
+	buf->current_dir = (void __user *)dirent + reclen;
 	buf->count -= reclen;
 	return 0;
+efault_end:
+	user_write_access_end();
 efault:
 	buf->error = -EFAULT;
 	return -EFAULT;
@@ -526,7 +524,6 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
 		struct compat_linux_dirent __user *, dirent, unsigned int, count)
 {
 	struct fd f;
-	struct compat_linux_dirent __user * lastdirent;
 	struct compat_getdents_callback buf = {
 		.ctx.actor = compat_filldir,
 		.current_dir = dirent,
@@ -534,9 +531,6 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
 	};
 	int error;
 
-	if (!access_ok(dirent, count))
-		return -EFAULT;
-
 	f = fdget_pos(fd);
 	if (!f.file)
 		return -EBADF;
@@ -544,8 +538,10 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
 	error = iterate_dir(f.file, &buf.ctx);
 	if (error >= 0)
 		error = buf.error;
-	lastdirent = buf.previous;
-	if (lastdirent) {
+	if (buf.prev_reclen) {
+		struct compat_linux_dirent __user * lastdirent;
+		lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
+
 		if (put_user(buf.ctx.pos, &lastdirent->d_off))
 			error = -EFAULT;
 		else
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 84cf8bdbec9c..0b641ae694f1 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -159,7 +159,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
 	barrier_done = reiserfs_commit_for_inode(inode);
 	reiserfs_write_unlock(inode->i_sb);
 	if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
-		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
 	inode_unlock(inode);
 	if (barrier_done < 0)
 		return barrier_done;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6419e6dacc39..0031070b3692 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1160,11 +1160,9 @@ failure:
 	return retval;
 }
 
-static int
-reiserfs_readpages(struct file *file, struct address_space *mapping,
-		   struct list_head *pages, unsigned nr_pages)
+static void reiserfs_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
+	mpage_readahead(rac, reiserfs_get_block);
 }
 
 /*
@@ -3434,7 +3432,7 @@ out:
 const struct address_space_operations reiserfs_address_space_operations = {
 	.writepage = reiserfs_writepage,
 	.readpage = reiserfs_readpage,
-	.readpages = reiserfs_readpages,
+	.readahead = reiserfs_readahead,
 	.releasepage = reiserfs_releasepage,
 	.invalidatepage = reiserfs_invalidatepage,
 	.write_begin = reiserfs_write_begin,
diff --git a/fs/romfs/Kconfig b/fs/romfs/Kconfig
index ad4c45788896..9737b8e68878 100644
--- a/fs/romfs/Kconfig
+++ b/fs/romfs/Kconfig
@@ -6,7 +6,7 @@ config ROMFS_FS
 	  This is a very small read-only file system mainly intended for
 	  initial ram disks of installation disks, but it could be used for
 	  other read-only media as well.  Read
-	  <file:Documentation/filesystems/romfs.txt> for details.
+	  <file:Documentation/filesystems/romfs.rst> for details.
 
 	  To compile this file system support as a module, choose M here: the
 	  module will be called romfs.  Note that the file system of your
diff --git a/fs/splice.c b/fs/splice.c
index 4e53efbd621d..6b3c9a018a8e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -44,8 +44,8 @@
  * addition of remove_mapping(). If success is returned, the caller may
  * attempt to reuse this page for another destination.
  */
-static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
-				     struct pipe_buffer *buf)
+static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe,
+		struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
 	struct address_space *mapping;
@@ -76,7 +76,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
 		 */
 		if (remove_mapping(mapping, page)) {
 			buf->flags |= PIPE_BUF_FLAG_LRU;
-			return 0;
+			return true;
 		}
 	}
 
@@ -86,7 +86,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
 	 */
 out_unlock:
 	unlock_page(page);
-	return 1;
+	return false;
 }
 
 static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
@@ -139,27 +139,26 @@ error:
 }
 
 const struct pipe_buf_operations page_cache_pipe_buf_ops = {
-	.confirm = page_cache_pipe_buf_confirm,
-	.release = page_cache_pipe_buf_release,
-	.steal = page_cache_pipe_buf_steal,
-	.get = generic_pipe_buf_get,
+	.confirm	= page_cache_pipe_buf_confirm,
+	.release	= page_cache_pipe_buf_release,
+	.try_steal	= page_cache_pipe_buf_try_steal,
+	.get		= generic_pipe_buf_get,
 };
 
-static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
-				    struct pipe_buffer *buf)
+static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe,
+		struct pipe_buffer *buf)
 {
 	if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
-		return 1;
+		return false;
 
 	buf->flags |= PIPE_BUF_FLAG_LRU;
-	return generic_pipe_buf_steal(pipe, buf);
+	return generic_pipe_buf_try_steal(pipe, buf);
 }
 
 static const struct pipe_buf_operations user_page_pipe_buf_ops = {
-	.confirm = generic_pipe_buf_confirm,
-	.release = page_cache_pipe_buf_release,
-	.steal = user_page_pipe_buf_steal,
-	.get = generic_pipe_buf_get,
+	.release	= page_cache_pipe_buf_release,
+	.try_steal	= user_page_pipe_buf_try_steal,
+	.get		= generic_pipe_buf_get,
 };
 
 static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
@@ -331,24 +330,15 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 EXPORT_SYMBOL(generic_file_splice_read);
 
 const struct pipe_buf_operations default_pipe_buf_ops = {
-	.confirm = generic_pipe_buf_confirm,
-	.release = generic_pipe_buf_release,
-	.steal = generic_pipe_buf_steal,
-	.get = generic_pipe_buf_get,
+	.release	= generic_pipe_buf_release,
+	.try_steal	= generic_pipe_buf_try_steal,
+	.get		= generic_pipe_buf_get,
 };
 
-int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
-			     struct pipe_buffer *buf)
-{
-	return 1;
-}
-
 /* Pipe buffer operations for a socket and similar. */
 const struct pipe_buf_operations nosteal_pipe_buf_ops = {
-	.confirm = generic_pipe_buf_confirm,
-	.release = generic_pipe_buf_release,
-	.steal = generic_pipe_buf_nosteal,
-	.get = generic_pipe_buf_get,
+	.release	= generic_pipe_buf_release,
+	.get		= generic_pipe_buf_get,
 };
 EXPORT_SYMBOL(nosteal_pipe_buf_ops);
 
@@ -852,15 +842,9 @@ EXPORT_SYMBOL(generic_splice_sendpage);
 static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 			   loff_t *ppos, size_t len, unsigned int flags)
 {
-	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
-				loff_t *, size_t, unsigned int);
-
 	if (out->f_op->splice_write)
-		splice_write = out->f_op->splice_write;
-	else
-		splice_write = default_file_splice_write;
-
-	return splice_write(pipe, out, ppos, len, flags);
+		return out->f_op->splice_write(pipe, out, ppos, len, flags);
+	return default_file_splice_write(pipe, out, ppos, len, flags);
 }
 
 /*
@@ -870,8 +854,6 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 			 struct pipe_inode_info *pipe, size_t len,
 			 unsigned int flags)
 {
-	ssize_t (*splice_read)(struct file *, loff_t *,
-			       struct pipe_inode_info *, size_t, unsigned int);
 	int ret;
 
 	if (unlikely(!(in->f_mode & FMODE_READ)))
@@ -885,11 +867,8 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 		len = MAX_RW_COUNT;
 
 	if (in->f_op->splice_read)
-		splice_read = in->f_op->splice_read;
-	else
-		splice_read = default_file_splice_read;
-
-	return splice_read(in, ppos, pipe, len, flags);
+		return in->f_op->splice_read(in, ppos, pipe, len, flags);
+	return default_file_splice_read(in, ppos, pipe, len, flags);
 }
 
 /**
@@ -1626,12 +1605,11 @@ retry:
 			*obuf = *ibuf;
 
 			/*
-			 * Don't inherit the gift flag, we need to
+			 * Don't inherit the gift and merge flags, we need to
 			 * prevent multiple steals of this page.
 			 */
 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
-
-			pipe_buf_mark_unmergeable(obuf);
+			obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
 
 			obuf->len = len;
 			ibuf->offset += len;
@@ -1719,12 +1697,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		*obuf = *ibuf;
 
 		/*
-		 * Don't inherit the gift flag, we need to
-		 * prevent multiple steals of this page.
+		 * Don't inherit the gift and merge flag, we need to prevent
+		 * multiple steals of this page.
 		 */
 		obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
-
-		pipe_buf_mark_unmergeable(obuf);
+		obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
 
 		if (obuf->len > len)
 			obuf->len = len;
@@ -1754,8 +1731,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
  * The 'flags' used are the SPLICE_F_* variants, currently the only
  * applicable one is SPLICE_F_NONBLOCK.
  */
-static long do_tee(struct file *in, struct file *out, size_t len,
-		   unsigned int flags)
+long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
 {
 	struct pipe_inode_info *ipipe = get_pipe_info(in);
 	struct pipe_inode_info *opipe = get_pipe_info(out);
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 4f9b9fb59362..64f61330564a 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -13,6 +13,7 @@
  * datablocks and metadata blocks.
  */
 
+#include <linux/blkdev.h>
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
@@ -27,44 +28,103 @@
 #include "page_actor.h"
 
 /*
- * Read the metadata block length, this is stored in the first two
- * bytes of the metadata block.
+ * Returns the amount of bytes copied to the page actor.
  */
-static struct buffer_head *get_block_length(struct super_block *sb,
-			u64 *cur_index, int *offset, int *length)
+static int copy_bio_to_actor(struct bio *bio,
+			     struct squashfs_page_actor *actor,
+			     int offset, int req_length)
+{
+	void *actor_addr = squashfs_first_page(actor);
+	struct bvec_iter_all iter_all = {};
+	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
+	int copied_bytes = 0;
+	int actor_offset = 0;
+
+	if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all)))
+		return 0;
+
+	while (copied_bytes < req_length) {
+		int bytes_to_copy = min_t(int, bvec->bv_len - offset,
+					  PAGE_SIZE - actor_offset);
+
+		bytes_to_copy = min_t(int, bytes_to_copy,
+				      req_length - copied_bytes);
+		memcpy(actor_addr + actor_offset,
+		       page_address(bvec->bv_page) + bvec->bv_offset + offset,
+		       bytes_to_copy);
+
+		actor_offset += bytes_to_copy;
+		copied_bytes += bytes_to_copy;
+		offset += bytes_to_copy;
+
+		if (actor_offset >= PAGE_SIZE) {
+			actor_addr = squashfs_next_page(actor);
+			if (!actor_addr)
+				break;
+			actor_offset = 0;
+		}
+		if (offset >= bvec->bv_len) {
+			if (!bio_next_segment(bio, &iter_all))
+				break;
+			offset = 0;
+		}
+	}
+	squashfs_finish_page(actor);
+	return copied_bytes;
+}
+
+static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
+			     struct bio **biop, int *block_offset)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	struct buffer_head *bh;
-
-	bh = sb_bread(sb, *cur_index);
-	if (bh == NULL)
-		return NULL;
-
-	if (msblk->devblksize - *offset == 1) {
-		*length = (unsigned char) bh->b_data[*offset];
-		put_bh(bh);
-		bh = sb_bread(sb, ++(*cur_index));
-		if (bh == NULL)
-			return NULL;
-		*length |= (unsigned char) bh->b_data[0] << 8;
-		*offset = 1;
-	} else {
-		*length = (unsigned char) bh->b_data[*offset] |
-			(unsigned char) bh->b_data[*offset + 1] << 8;
-		*offset += 2;
-
-		if (*offset == msblk->devblksize) {
-			put_bh(bh);
-			bh = sb_bread(sb, ++(*cur_index));
-			if (bh == NULL)
-				return NULL;
-			*offset = 0;
+	const u64 read_start = round_down(index, msblk->devblksize);
+	const sector_t block = read_start >> msblk->devblksize_log2;
+	const u64 read_end = round_up(index + length, msblk->devblksize);
+	const sector_t block_end = read_end >> msblk->devblksize_log2;
+	int offset = read_start - round_down(index, PAGE_SIZE);
+	int total_len = (block_end - block) << msblk->devblksize_log2;
+	const int page_count = DIV_ROUND_UP(total_len + offset, PAGE_SIZE);
+	int error, i;
+	struct bio *bio;
+
+	bio = bio_alloc(GFP_NOIO, page_count);
+	if (!bio)
+		return -ENOMEM;
+
+	bio_set_dev(bio, sb->s_bdev);
+	bio->bi_opf = READ;
+	bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT);
+
+	for (i = 0; i < page_count; ++i) {
+		unsigned int len =
+			min_t(unsigned int, PAGE_SIZE - offset, total_len);
+		struct page *page = alloc_page(GFP_NOIO);
+
+		if (!page) {
+			error = -ENOMEM;
+			goto out_free_bio;
+		}
+		if (!bio_add_page(bio, page, len, offset)) {
+			error = -EIO;
+			goto out_free_bio;
 		}
+		offset = 0;
+		total_len -= len;
 	}
 
-	return bh;
-}
+	error = submit_bio_wait(bio);
+	if (error)
+		goto out_free_bio;
 
+	*biop = bio;
+	*block_offset = index & ((1 << msblk->devblksize_log2) - 1);
+	return 0;
+
+out_free_bio:
+	bio_free_pages(bio);
+	bio_put(bio);
+	return error;
+}
 
 /*
  * Read and decompress a metadata block or datablock.  Length is non-zero
@@ -76,129 +136,88 @@ static struct buffer_head *get_block_length(struct super_block *sb,
  * algorithms).
  */
 int squashfs_read_data(struct super_block *sb, u64 index, int length,
-		u64 *next_index, struct squashfs_page_actor *output)
+		       u64 *next_index, struct squashfs_page_actor *output)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	struct buffer_head **bh;
-	int offset = index & ((1 << msblk->devblksize_log2) - 1);
-	u64 cur_index = index >> msblk->devblksize_log2;
-	int bytes, compressed, b = 0, k = 0, avail, i;
-
-	bh = kcalloc(((output->length + msblk->devblksize - 1)
-		>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
-	if (bh == NULL)
-		return -ENOMEM;
+	struct bio *bio = NULL;
+	int compressed;
+	int res;
+	int offset;
 
 	if (length) {
 		/*
 		 * Datablock.
 		 */
-		bytes = -offset;
 		compressed = SQUASHFS_COMPRESSED_BLOCK(length);
 		length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
-		if (next_index)
-			*next_index = index + length;
-
 		TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
 			index, compressed ? "" : "un", length, output->length);
-
-		if (length < 0 || length > output->length ||
-				(index + length) > msblk->bytes_used)
-			goto read_failure;
-
-		for (b = 0; bytes < length; b++, cur_index++) {
-			bh[b] = sb_getblk(sb, cur_index);
-			if (bh[b] == NULL)
-				goto block_release;
-			bytes += msblk->devblksize;
-		}
-		ll_rw_block(REQ_OP_READ, 0, b, bh);
 	} else {
 		/*
 		 * Metadata block.
 		 */
-		if ((index + 2) > msblk->bytes_used)
-			goto read_failure;
+		const u8 *data;
+		struct bvec_iter_all iter_all = {};
+		struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
 
-		bh[0] = get_block_length(sb, &cur_index, &offset, &length);
-		if (bh[0] == NULL)
-			goto read_failure;
-		b = 1;
+		if (index + 2 > msblk->bytes_used) {
+			res = -EIO;
+			goto out;
+		}
+		res = squashfs_bio_read(sb, index, 2, &bio, &offset);
+		if (res)
+			goto out;
+
+		if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) {
+			res = -EIO;
+			goto out_free_bio;
+		}
+		/* Extract the length of the metadata block */
+		data = page_address(bvec->bv_page) + bvec->bv_offset;
+		length = data[offset];
+		if (offset <= bvec->bv_len - 1) {
+			length |= data[offset + 1] << 8;
+		} else {
+			if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) {
+				res = -EIO;
+				goto out_free_bio;
+			}
+			data = page_address(bvec->bv_page) + bvec->bv_offset;
+			length |= data[0] << 8;
+		}
+		bio_free_pages(bio);
+		bio_put(bio);
 
-		bytes = msblk->devblksize - offset;
 		compressed = SQUASHFS_COMPRESSED(length);
 		length = SQUASHFS_COMPRESSED_SIZE(length);
-		if (next_index)
-			*next_index = index + length + 2;
+		index += 2;
 
 		TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
-				compressed ? "" : "un", length);
-
-		if (length < 0 || length > output->length ||
-					(index + length) > msblk->bytes_used)
-			goto block_release;
-
-		for (; bytes < length; b++) {
-			bh[b] = sb_getblk(sb, ++cur_index);
-			if (bh[b] == NULL)
-				goto block_release;
-			bytes += msblk->devblksize;
-		}
-		ll_rw_block(REQ_OP_READ, 0, b - 1, bh + 1);
+		      compressed ? "" : "un", length);
 	}
+	if (next_index)
+		*next_index = index + length;
 
-	for (i = 0; i < b; i++) {
-		wait_on_buffer(bh[i]);
-		if (!buffer_uptodate(bh[i]))
-			goto block_release;
-	}
+	res = squashfs_bio_read(sb, index, length, &bio, &offset);
+	if (res)
+		goto out;
 
 	if (compressed) {
-		if (!msblk->stream)
-			goto read_failure;
-		length = squashfs_decompress(msblk, bh, b, offset, length,
-			output);
-		if (length < 0)
-			goto read_failure;
-	} else {
-		/*
-		 * Block is uncompressed.
-		 */
-		int in, pg_offset = 0;
-		void *data = squashfs_first_page(output);
-
-		for (bytes = length; k < b; k++) {
-			in = min(bytes, msblk->devblksize - offset);
-			bytes -= in;
-			while (in) {
-				if (pg_offset == PAGE_SIZE) {
-					data = squashfs_next_page(output);
-					pg_offset = 0;
-				}
-				avail = min_t(int, in, PAGE_SIZE -
-						pg_offset);
-				memcpy(data + pg_offset, bh[k]->b_data + offset,
-						avail);
-				in -= avail;
-				pg_offset += avail;
-				offset += avail;
-			}
-			offset = 0;
-			put_bh(bh[k]);
+		if (!msblk->stream) {
+			res = -EIO;
+			goto out_free_bio;
 		}
-		squashfs_finish_page(output);
+		res = squashfs_decompress(msblk, bio, offset, length, output);
+	} else {
+		res = copy_bio_to_actor(bio, output, offset, length);
 	}
 
-	kfree(bh);
-	return length;
-
-block_release:
-	for (; k < b; k++)
-		put_bh(bh[k]);
+out_free_bio:
+	bio_free_pages(bio);
+	bio_put(bio);
+out:
+	if (res < 0)
+		ERROR("Failed to read block 0x%llx: %d\n", index, res);
 
-read_failure:
-	ERROR("squashfs_read_data failed to read block 0x%llx\n",
-					(unsigned long long) index);
-	kfree(bh);
-	return -EIO;
+	return res;
 }
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index ec8617523e56..1b9ccfd0aa51 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -10,13 +10,14 @@
  * decompressor.h
  */
 
+#include <linux/bio.h>
+
 struct squashfs_decompressor {
 	void	*(*init)(struct squashfs_sb_info *, void *);
 	void	*(*comp_opts)(struct squashfs_sb_info *, void *, int);
 	void	(*free)(void *);
 	int	(*decompress)(struct squashfs_sb_info *, void *,
-		struct buffer_head **, int, int, int,
-		struct squashfs_page_actor *);
+		struct bio *, int, int, struct squashfs_page_actor *);
 	int	id;
 	char	*name;
 	int	supported;
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c
index c181dee235bb..db9f12a3ea05 100644
--- a/fs/squashfs/decompressor_multi.c
+++ b/fs/squashfs/decompressor_multi.c
@@ -6,7 +6,7 @@
 #include <linux/types.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/buffer_head.h>
+#include <linux/bio.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/cpumask.h>
@@ -180,14 +180,15 @@ wait:
 }
 
 
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
-	int b, int offset, int length, struct squashfs_page_actor *output)
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
+			int offset, int length,
+			struct squashfs_page_actor *output)
 {
 	int res;
 	struct squashfs_stream *stream = msblk->stream;
 	struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream);
 	res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
-		bh, b, offset, length, output);
+		bio, offset, length, output);
 	put_decomp_stream(decomp_stream, stream);
 	if (res < 0)
 		ERROR("%s decompression failed, data probably corrupt\n",
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
index 2a2a2d106440..b881b9283b7f 100644
--- a/fs/squashfs/decompressor_multi_percpu.c
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/percpu.h>
 #include <linux/buffer_head.h>
+#include <linux/local_lock.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -20,7 +21,8 @@
  */
 
 struct squashfs_stream {
-	void		*stream;
+	void			*stream;
+	local_lock_t	lock;
 };
 
 void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
@@ -41,6 +43,7 @@ void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
 			err = PTR_ERR(stream->stream);
 			goto out;
 		}
+		local_lock_init(&stream->lock);
 	}
 
 	kfree(comp_opts);
@@ -72,15 +75,19 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
 	}
 }
 
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
-	int b, int offset, int length, struct squashfs_page_actor *output)
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
+	int offset, int length, struct squashfs_page_actor *output)
 {
-	struct squashfs_stream __percpu *percpu =
-			(struct squashfs_stream __percpu *) msblk->stream;
-	struct squashfs_stream *stream = get_cpu_ptr(percpu);
-	int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
-		offset, length, output);
-	put_cpu_ptr(stream);
+	struct squashfs_stream *stream;
+	int res;
+
+	local_lock(&msblk->stream->lock);
+	stream = this_cpu_ptr(msblk->stream);
+
+	res = msblk->decompressor->decompress(msblk, stream->stream, bio,
+					      offset, length, output);
+
+	local_unlock(&msblk->stream->lock);
 
 	if (res < 0)
 		ERROR("%s decompression failed, data probably corrupt\n",
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c
index 550c3e592032..4eb3d083d45e 100644
--- a/fs/squashfs/decompressor_single.c
+++ b/fs/squashfs/decompressor_single.c
@@ -7,7 +7,7 @@
 #include <linux/types.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/buffer_head.h>
+#include <linux/bio.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -59,14 +59,15 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
 	}
 }
 
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
-	int b, int offset, int length, struct squashfs_page_actor *output)
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
+			int offset, int length,
+			struct squashfs_page_actor *output)
 {
 	int res;
 	struct squashfs_stream *stream = msblk->stream;
 
 	mutex_lock(&stream->mutex);
-	res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+	res = msblk->decompressor->decompress(msblk, stream->stream, bio,
 		offset, length, output);
 	mutex_unlock(&stream->mutex);
 
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index c4e47e0588c7..233d5582fbee 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -4,7 +4,7 @@
  * Phillip Lougher <phillip@squashfs.org.uk>
  */
 
-#include <linux/buffer_head.h>
+#include <linux/bio.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -89,20 +89,23 @@ static void lz4_free(void *strm)
 
 
 static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
-	struct buffer_head **bh, int b, int offset, int length,
+	struct bio *bio, int offset, int length,
 	struct squashfs_page_actor *output)
 {
+	struct bvec_iter_all iter_all = {};
+	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
 	struct squashfs_lz4 *stream = strm;
 	void *buff = stream->input, *data;
-	int avail, i, bytes = length, res;
+	int bytes = length, res;
 
-	for (i = 0; i < b; i++) {
-		avail = min(bytes, msblk->devblksize - offset);
-		memcpy(buff, bh[i]->b_data + offset, avail);
+	while (bio_next_segment(bio, &iter_all)) {
+		int avail = min(bytes, ((int)bvec->bv_len) - offset);
+
+		data = page_address(bvec->bv_page) + bvec->bv_offset;
+		memcpy(buff, data + offset, avail);
 		buff += avail;
 		bytes -= avail;
 		offset = 0;
-		put_bh(bh[i]);
 	}
 
 	res = LZ4_decompress_safe(stream->input, stream->output,
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index aa3c3dafc33d..97bb7d92ddcd 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/mutex.h>
-#include <linux/buffer_head.h>
+#include <linux/bio.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/lzo.h>
@@ -63,21 +63,24 @@ static void lzo_free(void *strm)
 
 
 static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
-	struct buffer_head **bh, int b, int offset, int length,
+	struct bio *bio, int offset, int length,
 	struct squashfs_page_actor *output)
 {
+	struct bvec_iter_all iter_all = {};
+	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
 	struct squashfs_lzo *stream = strm;
 	void *buff = stream->input, *data;
-	int avail, i, bytes = length, res;
+	int bytes = length, res;
 	size_t out_len = output->length;
 
-	for (i = 0; i < b; i++) {
-		avail = min(bytes, msblk->devblksize - offset);
-		memcpy(buff, bh[i]->b_data + offset, avail);
+	while (bio_next_segment(bio, &iter_all)) {
+		int avail = min(bytes, ((int)bvec->bv_len) - offset);
+
+		data = page_address(bvec->bv_page) + bvec->bv_offset;
+		memcpy(buff, data + offset, avail);
 		buff += avail;
 		bytes -= avail;
 		offset = 0;
-		put_bh(bh[i]);
 	}
 
 	res = lzo1x_decompress_safe(stream->input, (size_t)length,
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 2797763ed046..9783e01c8100 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -40,8 +40,8 @@ extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
 /* decompressor_xxx.c */
 extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
 extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
-extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **,
-	int, int, int, struct squashfs_page_actor *);
+extern int squashfs_decompress(struct squashfs_sb_info *, struct bio *,
+				int, int, struct squashfs_page_actor *);
 extern int squashfs_max_decompressors(void);
 
 /* export.c */
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 4b2f2051a6dc..e80419aed862 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -10,7 +10,7 @@
 
 
 #include <linux/mutex.h>
-#include <linux/buffer_head.h>
+#include <linux/bio.h>
 #include <linux/slab.h>
 #include <linux/xz.h>
 #include <linux/bitops.h>
@@ -117,11 +117,12 @@ static void squashfs_xz_free(void *strm)
 
 
 static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
-	struct buffer_head **bh, int b, int offset, int length,
+	struct bio *bio, int offset, int length,
 	struct squashfs_page_actor *output)
 {
-	enum xz_ret xz_err;
-	int avail, total = 0, k = 0;
+	struct bvec_iter_all iter_all = {};
+	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
+	int total = 0, error = 0;
 	struct squashfs_xz *stream = strm;
 
 	xz_dec_reset(stream->state);
@@ -131,11 +132,23 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	stream->buf.out_size = PAGE_SIZE;
 	stream->buf.out = squashfs_first_page(output);
 
-	do {
-		if (stream->buf.in_pos == stream->buf.in_size && k < b) {
-			avail = min(length, msblk->devblksize - offset);
+	for (;;) {
+		enum xz_ret xz_err;
+
+		if (stream->buf.in_pos == stream->buf.in_size) {
+			const void *data;
+			int avail;
+
+			if (!bio_next_segment(bio, &iter_all)) {
+				/* XZ_STREAM_END must be reached. */
+				error = -EIO;
+				break;
+			}
+
+			avail = min(length, ((int)bvec->bv_len) - offset);
+			data = page_address(bvec->bv_page) + bvec->bv_offset;
 			length -= avail;
-			stream->buf.in = bh[k]->b_data + offset;
+			stream->buf.in = data + offset;
 			stream->buf.in_size = avail;
 			stream->buf.in_pos = 0;
 			offset = 0;
@@ -150,23 +163,17 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		}
 
 		xz_err = xz_dec_run(stream->state, &stream->buf);
-
-		if (stream->buf.in_pos == stream->buf.in_size && k < b)
-			put_bh(bh[k++]);
-	} while (xz_err == XZ_OK);
+		if (xz_err == XZ_STREAM_END)
+			break;
+		if (xz_err != XZ_OK) {
+			error = -EIO;
+			break;
+		}
+	}
 
 	squashfs_finish_page(output);
 
-	if (xz_err != XZ_STREAM_END || k < b)
-		goto out;
-
-	return total + stream->buf.out_pos;
-
-out:
-	for (; k < b; k++)
-		put_bh(bh[k]);
-
-	return -EIO;
+	return error ? error : total + stream->buf.out_pos;
 }
 
 const struct squashfs_decompressor squashfs_xz_comp_ops = {
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index f2226afa1625..bcb881ec47f2 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -10,7 +10,7 @@
 
 
 #include <linux/mutex.h>
-#include <linux/buffer_head.h>
+#include <linux/bio.h>
 #include <linux/slab.h>
 #include <linux/zlib.h>
 #include <linux/vmalloc.h>
@@ -50,21 +50,35 @@ static void zlib_free(void *strm)
 
 
 static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
-	struct buffer_head **bh, int b, int offset, int length,
+	struct bio *bio, int offset, int length,
 	struct squashfs_page_actor *output)
 {
-	int zlib_err, zlib_init = 0, k = 0;
+	struct bvec_iter_all iter_all = {};
+	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
+	int zlib_init = 0, error = 0;
 	z_stream *stream = strm;
 
 	stream->avail_out = PAGE_SIZE;
 	stream->next_out = squashfs_first_page(output);
 	stream->avail_in = 0;
 
-	do {
-		if (stream->avail_in == 0 && k < b) {
-			int avail = min(length, msblk->devblksize - offset);
+	for (;;) {
+		int zlib_err;
+
+		if (stream->avail_in == 0) {
+			const void *data;
+			int avail;
+
+			if (!bio_next_segment(bio, &iter_all)) {
+				/* Z_STREAM_END must be reached. */
+				error = -EIO;
+				break;
+			}
+
+			avail = min(length, ((int)bvec->bv_len) - offset);
+			data = page_address(bvec->bv_page) + bvec->bv_offset;
 			length -= avail;
-			stream->next_in = bh[k]->b_data + offset;
+			stream->next_in = data + offset;
 			stream->avail_in = avail;
 			offset = 0;
 		}
@@ -78,37 +92,28 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		if (!zlib_init) {
 			zlib_err = zlib_inflateInit(stream);
 			if (zlib_err != Z_OK) {
-				squashfs_finish_page(output);
-				goto out;
+				error = -EIO;
+				break;
 			}
 			zlib_init = 1;
 		}
 
 		zlib_err = zlib_inflate(stream, Z_SYNC_FLUSH);
-
-		if (stream->avail_in == 0 && k < b)
-			put_bh(bh[k++]);
-	} while (zlib_err == Z_OK);
+		if (zlib_err == Z_STREAM_END)
+			break;
+		if (zlib_err != Z_OK) {
+			error = -EIO;
+			break;
+		}
+	}
 
 	squashfs_finish_page(output);
 
-	if (zlib_err != Z_STREAM_END)
-		goto out;
-
-	zlib_err = zlib_inflateEnd(stream);
-	if (zlib_err != Z_OK)
-		goto out;
-
-	if (k < b)
-		goto out;
-
-	return stream->total_out;
-
-out:
-	for (; k < b; k++)
-		put_bh(bh[k]);
+	if (!error)
+		if (zlib_inflateEnd(stream) != Z_OK)
+			error = -EIO;
 
-	return -EIO;
+	return error ? error : stream->total_out;
 }
 
 const struct squashfs_decompressor squashfs_zlib_comp_ops = {
diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c
index b448c2a1d0ed..b7cb1faa652d 100644
--- a/fs/squashfs/zstd_wrapper.c
+++ b/fs/squashfs/zstd_wrapper.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/mutex.h>
-#include <linux/buffer_head.h>
+#include <linux/bio.h>
 #include <linux/slab.h>
 #include <linux/zstd.h>
 #include <linux/vmalloc.h>
@@ -59,33 +59,44 @@ static void zstd_free(void *strm)
 
 
 static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
-	struct buffer_head **bh, int b, int offset, int length,
+	struct bio *bio, int offset, int length,
 	struct squashfs_page_actor *output)
 {
 	struct workspace *wksp = strm;
 	ZSTD_DStream *stream;
 	size_t total_out = 0;
-	size_t zstd_err;
-	int k = 0;
+	int error = 0;
 	ZSTD_inBuffer in_buf = { NULL, 0, 0 };
 	ZSTD_outBuffer out_buf = { NULL, 0, 0 };
+	struct bvec_iter_all iter_all = {};
+	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
 
 	stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size);
 
 	if (!stream) {
 		ERROR("Failed to initialize zstd decompressor\n");
-		goto out;
+		return -EIO;
 	}
 
 	out_buf.size = PAGE_SIZE;
 	out_buf.dst = squashfs_first_page(output);
 
-	do {
-		if (in_buf.pos == in_buf.size && k < b) {
-			int avail = min(length, msblk->devblksize - offset);
+	for (;;) {
+		size_t zstd_err;
 
+		if (in_buf.pos == in_buf.size) {
+			const void *data;
+			int avail;
+
+			if (!bio_next_segment(bio, &iter_all)) {
+				error = -EIO;
+				break;
+			}
+
+			avail = min(length, ((int)bvec->bv_len) - offset);
+			data = page_address(bvec->bv_page) + bvec->bv_offset;
 			length -= avail;
-			in_buf.src = bh[k]->b_data + offset;
+			in_buf.src = data + offset;
 			in_buf.size = avail;
 			in_buf.pos = 0;
 			offset = 0;
@@ -97,8 +108,8 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
 				/* Shouldn't run out of pages
 				 * before stream is done.
 				 */
-				squashfs_finish_page(output);
-				goto out;
+				error = -EIO;
+				break;
 			}
 			out_buf.pos = 0;
 			out_buf.size = PAGE_SIZE;
@@ -107,29 +118,20 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		total_out -= out_buf.pos;
 		zstd_err = ZSTD_decompressStream(stream, &out_buf, &in_buf);
 		total_out += out_buf.pos; /* add the additional data produced */
-
-		if (in_buf.pos == in_buf.size && k < b)
-			put_bh(bh[k++]);
-	} while (zstd_err != 0 && !ZSTD_isError(zstd_err));
-
-	squashfs_finish_page(output);
-
-	if (ZSTD_isError(zstd_err)) {
-		ERROR("zstd decompression error: %d\n",
-				(int)ZSTD_getErrorCode(zstd_err));
-		goto out;
+		if (zstd_err == 0)
+			break;
+
+		if (ZSTD_isError(zstd_err)) {
+			ERROR("zstd decompression error: %d\n",
+					(int)ZSTD_getErrorCode(zstd_err));
+			error = -EIO;
+			break;
+		}
 	}
 
-	if (k < b)
-		goto out;
-
-	return (int)total_out;
-
-out:
-	for (; k < b; k++)
-		put_bh(bh[k]);
+	squashfs_finish_page(output);
 
-	return -EIO;
+	return error ? error : total_out;
 }
 
 const struct squashfs_decompressor squashfs_zstd_comp_ops = {
diff --git a/fs/stat.c b/fs/stat.c
index 030008796479..44f8ad346db4 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -22,6 +22,7 @@
 #include <asm/unistd.h>
 
 #include "internal.h"
+#include "mount.h"
 
 /**
  * generic_fillattr - Fill in the basic attributes from the inode struct
@@ -70,15 +71,18 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
 
 	memset(stat, 0, sizeof(*stat));
 	stat->result_mask |= STATX_BASIC_STATS;
-	request_mask &= STATX_ALL;
 	query_flags &= KSTAT_QUERY_FLAGS;
 
 	/* allow the fs to override these if it really wants to */
-	if (IS_NOATIME(inode))
+	/* SB_NOATIME means filesystem supplies dummy atime value */
+	if (inode->i_sb->s_flags & SB_NOATIME)
 		stat->result_mask &= ~STATX_ATIME;
 	if (IS_AUTOMOUNT(inode))
 		stat->attributes |= STATX_ATTR_AUTOMOUNT;
 
+	if (IS_DAX(inode))
+		stat->attributes |= STATX_ATTR_DAX;
+
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(path, stat, request_mask,
 					    query_flags);
@@ -152,7 +156,8 @@ int vfs_statx_fd(unsigned int fd, struct kstat *stat,
 }
 EXPORT_SYMBOL(vfs_statx_fd);
 
-inline unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags, int flags)
+static inline unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags,
+						 int flags)
 {
 	if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
 		       AT_EMPTY_PATH | KSTAT_QUERY_FLAGS)) != 0)
@@ -199,6 +204,11 @@ retry:
 		goto out;
 
 	error = vfs_getattr(&path, stat, request_mask, flags);
+	stat->mnt_id = real_mount(path.mnt)->mnt_id;
+	stat->result_mask |= STATX_MNT_ID;
+	if (path.mnt->mnt_root == path.dentry)
+		stat->attributes |= STATX_ATTR_MOUNT_ROOT;
+	stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
 	path_put(&path);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
@@ -533,7 +543,7 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
 }
 #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
 
-noinline_for_stack int
+static noinline_for_stack int
 cp_statx(const struct kstat *stat, struct statx __user *buffer)
 {
 	struct statx tmp;
@@ -563,10 +573,29 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	tmp.stx_rdev_minor = MINOR(stat->rdev);
 	tmp.stx_dev_major = MAJOR(stat->dev);
 	tmp.stx_dev_minor = MINOR(stat->dev);
+	tmp.stx_mnt_id = stat->mnt_id;
 
 	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
 
+int do_statx(int dfd, const char __user *filename, unsigned flags,
+	     unsigned int mask, struct statx __user *buffer)
+{
+	struct kstat stat;
+	int error;
+
+	if (mask & STATX__RESERVED)
+		return -EINVAL;
+	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
+		return -EINVAL;
+
+	error = vfs_statx(dfd, filename, flags, &stat, mask);
+	if (error)
+		return error;
+
+	return cp_statx(&stat, buffer);
+}
+
 /**
  * sys_statx - System call to get enhanced stats
  * @dfd: Base directory to pathwalk from *or* fd to stat.
@@ -583,19 +612,7 @@ SYSCALL_DEFINE5(statx,
 		unsigned int, mask,
 		struct statx __user *, buffer)
 {
-	struct kstat stat;
-	int error;
-
-	if (mask & STATX__RESERVED)
-		return -EINVAL;
-	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
-		return -EINVAL;
-
-	error = vfs_statx(dfd, filename, flags, &stat, mask);
-	if (error)
-		return error;
-
-	return cp_statx(&stat, buffer);
+	return do_statx(dfd, filename, flags, mask, buffer);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/fs/super.c b/fs/super.c
index a288cd60d2ae..bf3b7685b52a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1598,12 +1598,10 @@ int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
 	int err;
 	va_list args;
 
-	bdi = bdi_alloc(GFP_KERNEL);
+	bdi = bdi_alloc(NUMA_NO_NODE);
 	if (!bdi)
 		return -ENOMEM;
 
-	bdi->name = sb->s_type->name;
-
 	va_start(args, fmt);
 	err = bdi_register_va(bdi, fmt, args);
 	va_end(args);
diff --git a/fs/sync.c b/fs/sync.c
index 4d1ff010bc5a..c6f6f5be5682 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -161,7 +161,7 @@ SYSCALL_DEFINE1(syncfs, int, fd)
 {
 	struct fd f = fdget(fd);
 	struct super_block *sb;
-	int ret;
+	int ret, ret2;
 
 	if (!f.file)
 		return -EBADF;
@@ -171,8 +171,10 @@ SYSCALL_DEFINE1(syncfs, int, fd)
 	ret = sync_filesystem(sb);
 	up_read(&sb->s_umount);
 
+	ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
+
 	fdput(f);
-	return ret;
+	return ret ? ret : ret2;
 }
 
 /**
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index aa85f2874a9f..59dffd5ca517 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2007 SUSE Linux Products GmbH
  * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
  *
- * Please see Documentation/filesystems/sysfs.txt for more information.
+ * Please see Documentation/filesystems/sysfs.rst for more information.
  */
 
 #define pr_fmt(fmt)	"sysfs: " fmt
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 26bbf960e2a2..f275fcda62fb 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2007 SUSE Linux Products GmbH
  * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
  *
- * Please see Documentation/filesystems/sysfs.txt for more information.
+ * Please see Documentation/filesystems/sysfs.rst for more information.
  */
 
 #include <linux/module.h>
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index db81cfbab9d6..e747c135c1d1 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2007 SUSE Linux Products GmbH
  * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
  *
- * Please see Documentation/filesystems/sysfs.txt for more information.
+ * Please see Documentation/filesystems/sysfs.rst for more information.
  */
 
 #include <linux/fs.h>
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index c4deecc80f67..5603530a1a52 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2007 SUSE Linux Products GmbH
  * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
  *
- * Please see Documentation/filesystems/sysfs.txt for more information.
+ * Please see Documentation/filesystems/sysfs.rst for more information.
  */
 
 #include <linux/fs.h>
diff --git a/fs/sysv/Kconfig b/fs/sysv/Kconfig
index d4edf7d9ae10..b4e23e03fbeb 100644
--- a/fs/sysv/Kconfig
+++ b/fs/sysv/Kconfig
@@ -28,7 +28,7 @@ config SYSV_FS
 	  tar" or preferably "info tar").  Note also that this option has
 	  nothing whatsoever to do with the option "System V IPC". Read about
 	  the System V file system in
-	  <file:Documentation/filesystems/sysv-fs.txt>.
+	  <file:Documentation/filesystems/sysv-fs.rst>.
 	  Saying Y here will enlarge your kernel by about 27 KB.
 
 	  To compile this as a module, choose M here: the module will be called
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
index f985a3fbbb36..cc5c0abfd536 100644
--- a/fs/ubifs/auth.c
+++ b/fs/ubifs/auth.c
@@ -31,15 +31,9 @@ int __ubifs_node_calc_hash(const struct ubifs_info *c, const void *node,
 			    u8 *hash)
 {
 	const struct ubifs_ch *ch = node;
-	SHASH_DESC_ON_STACK(shash, c->hash_tfm);
-	int err;
-
-	shash->tfm = c->hash_tfm;
 
-	err = crypto_shash_digest(shash, node, le32_to_cpu(ch->len), hash);
-	if (err < 0)
-		return err;
-	return 0;
+	return crypto_shash_tfm_digest(c->hash_tfm, node, le32_to_cpu(ch->len),
+				       hash);
 }
 
 /**
@@ -53,15 +47,7 @@ int __ubifs_node_calc_hash(const struct ubifs_info *c, const void *node,
 static int ubifs_hash_calc_hmac(const struct ubifs_info *c, const u8 *hash,
 				 u8 *hmac)
 {
-	SHASH_DESC_ON_STACK(shash, c->hmac_tfm);
-	int err;
-
-	shash->tfm = c->hmac_tfm;
-
-	err = crypto_shash_digest(shash, hash, c->hash_len, hmac);
-	if (err < 0)
-		return err;
-	return 0;
+	return crypto_shash_tfm_digest(c->hmac_tfm, hash, c->hash_len, hmac);
 }
 
 /**
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 0f5a480fe264..31288d8fa2ce 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -815,7 +815,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
 
 	pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
 
-	buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+	buf = __vmalloc(c->leb_size, GFP_NOFS);
 	if (!buf) {
 		ubifs_err(c, "cannot allocate memory for dumping LEB %d", lnum);
 		return;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 29826c51883a..22bfda158f7f 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1095,7 +1095,7 @@ static int scan_check_cb(struct ubifs_info *c,
 		return LPT_SCAN_CONTINUE;
 	}
 
-	buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+	buf = __vmalloc(c->leb_size, GFP_NOFS);
 	if (!buf)
 		return -ENOMEM;
 
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index ff5e0411cf2d..d76a19e460cd 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1596,7 +1596,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
 	if (!dbg_is_chk_lprops(c))
 		return 0;
 
-	buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+	buf = p = __vmalloc(c->leb_size, GFP_NOFS);
 	if (!buf) {
 		ubifs_err(c, "cannot allocate memory for ltab checking");
 		return 0;
@@ -1845,7 +1845,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
 	void *buf, *p;
 
 	pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
-	buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+	buf = p = __vmalloc(c->leb_size, GFP_NOFS);
 	if (!buf) {
 		ubifs_err(c, "cannot allocate memory to dump LPT");
 		return;
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 52a85c01397e..911d0555b9f2 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -68,12 +68,9 @@ static int mst_node_check_hash(const struct ubifs_info *c,
 	u8 calc[UBIFS_MAX_HASH_LEN];
 	const void *node = mst;
 
-	SHASH_DESC_ON_STACK(shash, c->hash_tfm);
-
-	shash->tfm = c->hash_tfm;
-
-	crypto_shash_digest(shash, node + sizeof(struct ubifs_ch),
-			    UBIFS_MST_NODE_SZ - sizeof(struct ubifs_ch), calc);
+	crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch),
+				UBIFS_MST_NODE_SZ - sizeof(struct ubifs_ch),
+				calc);
 
 	if (ubifs_check_hash(c, expected, calc))
 		return -EPERM;
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 283f9eb48410..2c294085ffed 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -977,7 +977,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
 	if (c->no_orphs)
 		return 0;
 
-	buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+	buf = __vmalloc(c->leb_size, GFP_NOFS);
 	if (!buf) {
 		ubifs_err(c, "cannot allocate memory to check orphans");
 		return 0;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 01fcf7975047..b69ffac7e415 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -558,7 +558,7 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
 	return data == 0xFFFFFFFF;
 }
 
-/* authenticate_sleb_hash and authenticate_sleb_hmac are split out for stack usage */
+/* authenticate_sleb_hash is split out for stack usage */
 static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_hash, u8 *hash)
 {
 	SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
@@ -569,15 +569,6 @@ static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_h
 	return crypto_shash_final(hash_desc, hash);
 }
 
-static int authenticate_sleb_hmac(struct ubifs_info *c, u8 *hash, u8 *hmac)
-{
-	SHASH_DESC_ON_STACK(hmac_desc, c->hmac_tfm);
-
-	hmac_desc->tfm = c->hmac_tfm;
-
-	return crypto_shash_digest(hmac_desc, hash, c->hash_len, hmac);
-}
-
 /**
  * authenticate_sleb - authenticate one scan LEB
  * @c: UBIFS file-system description object
@@ -618,7 +609,8 @@ static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 			if (err)
 				goto out;
 
-			err = authenticate_sleb_hmac(c, hash, hmac);
+			err = crypto_shash_tfm_digest(c->hmac_tfm, hash,
+						      c->hash_len, hmac);
 			if (err)
 				goto out;
 
diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig
index 6848de581ce1..26e1a49f3ba7 100644
--- a/fs/udf/Kconfig
+++ b/fs/udf/Kconfig
@@ -9,7 +9,7 @@ config UDF_FS
 	  compatible with standard unix file systems, it is also suitable for
 	  removable USB disks. Say Y if you intend to mount DVD discs or CDRW's
 	  written in packet mode, or if you want to use UDF for removable USB
-	  disks. Please read <file:Documentation/filesystems/udf.txt>.
+	  disks. Please read <file:Documentation/filesystems/udf.rst>.
 
 	  To compile this file system support as a module, choose M here: the
 	  module will be called udf.
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index e875bc5668ee..adaba8e8b326 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -195,10 +195,9 @@ static int udf_readpage(struct file *file, struct page *page)
 	return mpage_readpage(page, udf_get_block);
 }
 
-static int udf_readpages(struct file *file, struct address_space *mapping,
-			struct list_head *pages, unsigned nr_pages)
+static void udf_readahead(struct readahead_control *rac)
 {
-	return mpage_readpages(mapping, pages, nr_pages, udf_get_block);
+	mpage_readahead(rac, udf_get_block);
 }
 
 static int udf_write_begin(struct file *file, struct address_space *mapping,
@@ -234,7 +233,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
 
 const struct address_space_operations udf_aops = {
 	.readpage	= udf_readpage,
-	.readpages	= udf_readpages,
+	.readahead	= udf_readahead,
 	.writepage	= udf_writepage,
 	.writepages	= udf_writepages,
 	.write_begin	= udf_write_begin,
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 5c7ec121990d..f1094cdcd6cd 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -27,41 +27,38 @@
 
 unsigned int udf_get_last_session(struct super_block *sb)
 {
+	struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
 	struct cdrom_multisession ms_info;
-	unsigned int vol_desc_start;
-	struct block_device *bdev = sb->s_bdev;
-	int i;
 
-	vol_desc_start = 0;
-	ms_info.addr_format = CDROM_LBA;
-	i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info);
+	if (!cdi) {
+		udf_debug("CDROMMULTISESSION not supported.\n");
+		return 0;
+	}
 
-	if (i == 0) {
+	ms_info.addr_format = CDROM_LBA;
+	if (cdrom_multisession(cdi, &ms_info) == 0) {
 		udf_debug("XA disk: %s, vol_desc_start=%d\n",
 			  ms_info.xa_flag ? "yes" : "no", ms_info.addr.lba);
 		if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
-			vol_desc_start = ms_info.addr.lba;
-	} else {
-		udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i);
+			return ms_info.addr.lba;
 	}
-	return vol_desc_start;
+	return 0;
 }
 
 unsigned long udf_get_last_block(struct super_block *sb)
 {
 	struct block_device *bdev = sb->s_bdev;
+	struct cdrom_device_info *cdi = disk_to_cdi(bdev->bd_disk);
 	unsigned long lblock = 0;
 
 	/*
-	 * ioctl failed or returned obviously bogus value?
+	 * The cdrom layer call failed or returned obviously bogus value?
 	 * Try using the device size...
 	 */
-	if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) ||
-	    lblock == 0)
+	if (!cdi || cdrom_get_last_written(cdi, &lblock) || lblock == 0)
 		lblock = i_size_read(bdev->bd_inode) >> sb->s_blocksize_bits;
 
 	if (lblock)
 		return lblock - 1;
-	else
-		return 0;
+	return 0;
 }
diff --git a/fs/utimes.c b/fs/utimes.c
index 1d17ce98cb80..b7b927502d6e 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -95,13 +95,13 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times,
 		goto out;
 	}
 
-	if (flags & ~AT_SYMLINK_NOFOLLOW)
+	if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
 		goto out;
 
 	if (filename == NULL && dfd != AT_FDCWD) {
 		struct fd f;
 
-		if (flags & AT_SYMLINK_NOFOLLOW)
+		if (flags)
 			goto out;
 
 		f = fdget(dfd);
@@ -117,6 +117,8 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times,
 
 		if (!(flags & AT_SYMLINK_NOFOLLOW))
 			lookup_flags |= LOOKUP_FOLLOW;
+		if (flags & AT_EMPTY_PATH)
+			lookup_flags |= LOOKUP_EMPTY;
 retry:
 		error = user_path_at(dfd, filename, lookup_flags, &path);
 		if (error)
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
index d98bea308fd7..5ab3bbec8108 100644
--- a/fs/verity/enable.c
+++ b/fs/verity/enable.c
@@ -329,6 +329,8 @@ rollback:
 
 /**
  * fsverity_ioctl_enable() - enable verity on a file
+ * @filp: file to enable verity on
+ * @uarg: user pointer to fsverity_enable_arg
  *
  * Enable fs-verity on a file.  See the "FS_IOC_ENABLE_VERITY" section of
  * Documentation/filesystems/fsverity.rst for the documentation.
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index 74768cf539da..e96d99d5145e 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -61,7 +61,7 @@ struct merkle_tree_params {
 	u64 level_start[FS_VERITY_MAX_LEVELS];
 };
 
-/**
+/*
  * fsverity_info - cached verity metadata for an inode
  *
  * When a verity file is first opened, an instance of this struct is allocated
@@ -134,7 +134,7 @@ void __init fsverity_check_hash_algs(void);
 
 /* init.c */
 
-extern void __printf(3, 4) __cold
+void __printf(3, 4) __cold
 fsverity_msg(const struct inode *inode, const char *level,
 	     const char *fmt, ...);
 
diff --git a/fs/verity/measure.c b/fs/verity/measure.c
index 05049b68c745..df409a5682ed 100644
--- a/fs/verity/measure.c
+++ b/fs/verity/measure.c
@@ -11,6 +11,8 @@
 
 /**
  * fsverity_ioctl_measure() - get a verity file's measurement
+ * @filp: file to get measurement of
+ * @_uarg: user pointer to fsverity_digest
  *
  * Retrieve the file measurement that the kernel is enforcing for reads from a
  * verity file.  See the "FS_IOC_MEASURE_VERITY" section of
diff --git a/fs/verity/open.c b/fs/verity/open.c
index c5fe6948e262..d007db0c9304 100644
--- a/fs/verity/open.c
+++ b/fs/verity/open.c
@@ -330,6 +330,7 @@ EXPORT_SYMBOL_GPL(fsverity_prepare_setattr);
 
 /**
  * fsverity_cleanup_inode() - free the inode's verity info, if present
+ * @inode: an inode being evicted
  *
  * Filesystems must call this on inode eviction to free ->i_verity_info.
  */
diff --git a/fs/verity/signature.c b/fs/verity/signature.c
index c8b255232de5..b14ed96387ec 100644
--- a/fs/verity/signature.c
+++ b/fs/verity/signature.c
@@ -28,6 +28,9 @@ static struct key *fsverity_keyring;
 
 /**
  * fsverity_verify_signature() - check a verity file's signature
+ * @vi: the file's fsverity_info
+ * @desc: the file's fsverity_descriptor
+ * @desc_size: size of @desc
  *
  * If the file's fs-verity descriptor includes a signature of the file
  * measurement, verify it against the certificates in the fs-verity keyring.
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
index e0cb62da3864..a8b68c6f663d 100644
--- a/fs/verity/verify.c
+++ b/fs/verity/verify.c
@@ -179,6 +179,7 @@ out:
 
 /**
  * fsverity_verify_page() - verify a data page
+ * @page: the page to verity
  *
  * Verify a page that has just been read from a verity file.  The page must be a
  * pagecache page that is still locked and not yet uptodate.
@@ -206,6 +207,7 @@ EXPORT_SYMBOL_GPL(fsverity_verify_page);
 #ifdef CONFIG_BLOCK
 /**
  * fsverity_verify_bio() - verify a 'read' bio that has just completed
+ * @bio: the bio to verify
  *
  * Verify a set of pages that have just been read from a verity file.  The pages
  * must be pagecache pages that are still locked and not yet uptodate.  Pages
@@ -264,6 +266,7 @@ EXPORT_SYMBOL_GPL(fsverity_verify_bio);
 
 /**
  * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
+ * @work: the work to enqueue
  *
  * Enqueue verification work for asynchronous processing.
  */
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 4f95df476181..04611a1068b4 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -7,8 +7,6 @@
 ccflags-y += -I $(srctree)/$(src)		# needed for trace events
 ccflags-y += -I $(srctree)/$(src)/libxfs
 
-ccflags-$(CONFIG_XFS_DEBUG) += -g
-
 obj-$(CONFIG_XFS_FS)		+= xfs.o
 
 # this one should be compiled first, as the tracing macros can easily blow up
@@ -101,9 +99,12 @@ xfs-y				+= xfs_log.o \
 				   xfs_log_cil.o \
 				   xfs_bmap_item.o \
 				   xfs_buf_item.o \
+				   xfs_buf_item_recover.o \
+				   xfs_dquot_item_recover.o \
 				   xfs_extfree_item.o \
 				   xfs_icreate_item.o \
 				   xfs_inode_item.o \
+				   xfs_inode_item_recover.o \
 				   xfs_refcount_item.o \
 				   xfs_rmap_item.o \
 				   xfs_log_recover.o \
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 1da94237a8cf..f1366475c389 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -48,7 +48,7 @@ __kmem_vmalloc(size_t size, xfs_km_flags_t flags)
 	if (flags & KM_NOFS)
 		nofs_flag = memalloc_nofs_save();
 
-	ptr = __vmalloc(size, lflags, PAGE_KERNEL);
+	ptr = __vmalloc(size, lflags);
 
 	if (flags & KM_NOFS)
 		memalloc_nofs_restore(nofs_flag);
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 6143117770e9..34cbcfde9228 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
  * All Rights Reserved.
@@ -19,6 +19,7 @@ typedef unsigned __bitwise xfs_km_flags_t;
 #define KM_NOFS		((__force xfs_km_flags_t)0x0004u)
 #define KM_MAYFAIL	((__force xfs_km_flags_t)0x0008u)
 #define KM_ZERO		((__force xfs_km_flags_t)0x0010u)
+#define KM_NOLOCKDEP	((__force xfs_km_flags_t)0x0020u)
 
 /*
  * We use a special process flag to avoid recursive callbacks into
@@ -30,7 +31,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
 {
 	gfp_t	lflags;
 
-	BUG_ON(flags & ~(KM_NOFS|KM_MAYFAIL|KM_ZERO));
+	BUG_ON(flags & ~(KM_NOFS | KM_MAYFAIL | KM_ZERO | KM_NOLOCKDEP));
 
 	lflags = GFP_KERNEL | __GFP_NOWARN;
 	if (flags & KM_NOFS)
@@ -49,6 +50,9 @@ kmem_flags_convert(xfs_km_flags_t flags)
 	if (flags & KM_ZERO)
 		lflags |= __GFP_ZERO;
 
+	if (flags & KM_NOLOCKDEP)
+		lflags |= __GFP_NOLOCKDEP;
+
 	return lflags;
 }
 
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index c0352edc8e41..f3fd0ee9a7f7 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Copyright (C) 2016 Oracle.  All Rights Reserved.
  * Author: Darrick J. Wong <darrick.wong@oracle.com>
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index a851bf77f17b..6c22b12176b8 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h
index 047f09f0be3c..a5b998e950fe 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.h
+++ b/fs/xfs/libxfs/xfs_alloc_btree.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index e4fe3dca9883..3b1bd6e112f8 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -61,8 +61,8 @@ xfs_inode_hasattr(
 	struct xfs_inode	*ip)
 {
 	if (!XFS_IFORK_Q(ip) ||
-	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-	     ip->i_d.di_anextents == 0))
+	    (ip->i_afp->if_format == XFS_DINODE_FMT_EXTENTS &&
+	     ip->i_afp->if_nextents == 0))
 		return 0;
 	return 1;
 }
@@ -84,7 +84,7 @@ xfs_attr_get_ilocked(
 	if (!xfs_inode_hasattr(args->dp))
 		return -ENOATTR;
 
-	if (args->dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+	if (args->dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL)
 		return xfs_attr_shortform_getvalue(args);
 	if (xfs_bmap_one_block(args->dp, XFS_ATTR_FORK))
 		return xfs_attr_leaf_get(args);
@@ -212,14 +212,14 @@ xfs_attr_set_args(
 	 * If the attribute list is non-existent or a shortform list,
 	 * upgrade it to a single-leaf-block attribute list.
 	 */
-	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
-	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-	     dp->i_d.di_anextents == 0)) {
+	if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL ||
+	    (dp->i_afp->if_format == XFS_DINODE_FMT_EXTENTS &&
+	     dp->i_afp->if_nextents == 0)) {
 
 		/*
 		 * Build initial attribute list (if required).
 		 */
-		if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
+		if (dp->i_afp->if_format == XFS_DINODE_FMT_EXTENTS)
 			xfs_attr_shortform_create(args);
 
 		/*
@@ -272,7 +272,7 @@ xfs_attr_remove_args(
 
 	if (!xfs_inode_hasattr(dp)) {
 		error = -ENOATTR;
-	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+	} else if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL) {
 		ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
 		error = xfs_attr_shortform_remove(args);
 	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 0d2d05908537..db4717657ca1 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 863444e2dda7..2f7e89e4be3e 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -309,14 +309,6 @@ xfs_attr3_leaf_verify(
 		return fa;
 
 	/*
-	 * In recovery there is a transient state where count == 0 is valid
-	 * because we may have transitioned an empty shortform attr to a leaf
-	 * if the attr didn't fit in shortform.
-	 */
-	if (!xfs_log_in_recovery(mp) && ichdr.count == 0)
-		return __this_address;
-
-	/*
 	 * firstused is the block offset of the first name info structure.
 	 * Make sure it doesn't go off the block or crash into the header.
 	 */
@@ -331,6 +323,13 @@ xfs_attr3_leaf_verify(
 	    (char *)bp->b_addr + ichdr.firstused)
 		return __this_address;
 
+	/*
+	 * NOTE: This verifier historically failed empty leaf buffers because
+	 * we expect the fork to be in another format. Empty attr fork format
+	 * conversions are possible during xattr set, however, and format
+	 * conversion is not atomic with the xattr set that triggers it. We
+	 * cannot assume leaf blocks are non-empty until that is addressed.
+	*/
 	buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
 	for (i = 0, ent = entries; i < ichdr.count; ent++, i++) {
 		fa = xfs_attr3_leaf_verify_entry(mp, buf_end, leaf, &ichdr,
@@ -489,7 +488,7 @@ xfs_attr_copy_value(
 	}
 
 	if (!args->value) {
-		args->value = kmem_alloc_large(valuelen, 0);
+		args->value = kmem_alloc_large(valuelen, KM_NOLOCKDEP);
 		if (!args->value)
 			return -ENOMEM;
 	}
@@ -539,7 +538,7 @@ xfs_attr_shortform_bytesfit(
 	/* rounded down */
 	offset = (XFS_LITINO(mp) - bytes) >> 3;
 
-	if (dp->i_d.di_format == XFS_DINODE_FMT_DEV) {
+	if (dp->i_df.if_format == XFS_DINODE_FMT_DEV) {
 		minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
 		return (offset >= minforkoff) ? minforkoff : 0;
 	}
@@ -567,7 +566,7 @@ xfs_attr_shortform_bytesfit(
 
 	dsize = dp->i_df.if_bytes;
 
-	switch (dp->i_d.di_format) {
+	switch (dp->i_df.if_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		/*
 		 * If there is no attr fork and the data fork is extents, 
@@ -636,22 +635,19 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
  * Create the initial contents of a shortform attribute list.
  */
 void
-xfs_attr_shortform_create(xfs_da_args_t *args)
+xfs_attr_shortform_create(
+	struct xfs_da_args	*args)
 {
-	xfs_attr_sf_hdr_t *hdr;
-	xfs_inode_t *dp;
-	struct xfs_ifork *ifp;
+	struct xfs_inode	*dp = args->dp;
+	struct xfs_ifork	*ifp = dp->i_afp;
+	struct xfs_attr_sf_hdr	*hdr;
 
 	trace_xfs_attr_sf_create(args);
 
-	dp = args->dp;
-	ASSERT(dp != NULL);
-	ifp = dp->i_afp;
-	ASSERT(ifp != NULL);
 	ASSERT(ifp->if_bytes == 0);
-	if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) {
+	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS) {
 		ifp->if_flags &= ~XFS_IFEXTENTS;	/* just in case */
-		dp->i_d.di_aformat = XFS_DINODE_FMT_LOCAL;
+		ifp->if_format = XFS_DINODE_FMT_LOCAL;
 		ifp->if_flags |= XFS_IFINLINE;
 	} else {
 		ASSERT(ifp->if_flags & XFS_IFINLINE);
@@ -719,13 +715,12 @@ xfs_attr_fork_remove(
 	struct xfs_inode	*ip,
 	struct xfs_trans	*tp)
 {
-	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-	ip->i_d.di_forkoff = 0;
-	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
-
-	ASSERT(ip->i_d.di_anextents == 0);
-	ASSERT(ip->i_afp == NULL);
+	ASSERT(ip->i_afp->if_nextents == 0);
 
+	xfs_idestroy_fork(ip->i_afp);
+	kmem_cache_free(xfs_ifork_zone, ip->i_afp);
+	ip->i_afp = NULL;
+	ip->i_d.di_forkoff = 0;
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 }
 
@@ -775,7 +770,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	totsize -= size;
 	if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
 	    (mp->m_flags & XFS_MOUNT_ATTR2) &&
-	    (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
+	    (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
 	    !(args->op_flags & XFS_DA_OP_ADDNAME)) {
 		xfs_attr_fork_remove(dp, args->trans);
 	} else {
@@ -785,7 +780,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 		ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
 				(args->op_flags & XFS_DA_OP_ADDNAME) ||
 				!(mp->m_flags & XFS_MOUNT_ATTR2) ||
-				dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
+				dp->i_df.if_format == XFS_DINODE_FMT_BTREE);
 		xfs_trans_log_inode(args->trans, dp,
 					XFS_ILOG_CORE | XFS_ILOG_ADATA);
 	}
@@ -962,7 +957,7 @@ xfs_attr_shortform_allfit(
 				+ be16_to_cpu(name_loc->valuelen);
 	}
 	if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
-	    (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
+	    (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
 	    (bytes == sizeof(struct xfs_attr_sf_hdr)))
 		return -1;
 	return xfs_attr_shortform_bytesfit(dp, bytes);
@@ -981,7 +976,7 @@ xfs_attr_shortform_verify(
 	int				i;
 	int64_t				size;
 
-	ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL);
+	ASSERT(ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL);
 	ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
 	sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
 	size = ifp->if_bytes;
@@ -1085,7 +1080,7 @@ xfs_attr3_leaf_to_shortform(
 
 	if (forkoff == -1) {
 		ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
-		ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
+		ASSERT(dp->i_df.if_format != XFS_DINODE_FMT_BTREE);
 		xfs_attr_fork_remove(dp, args->trans);
 		goto out;
 	}
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 6dd2d937a42a..5be6be309302 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
  * Copyright (c) 2013 Red Hat, Inc.
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index 6fb4572845ce..e1144f22b005 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2013 Red Hat, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index aafa4fe70624..bb004fb7944a 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h
index 99017b8df292..a04f266ae644 100644
--- a/fs/xfs/libxfs/xfs_bit.h
+++ b/fs/xfs/libxfs/xfs_bit.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index fda13cd7add0..667cdd0dfdf4 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -61,10 +61,10 @@ xfs_bmap_compute_maxlevels(
 	int		sz;		/* root block size */
 
 	/*
-	 * The maximum number of extents in a file, hence the maximum
-	 * number of leaf entries, is controlled by the type of di_nextents
-	 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
-	 * (a signed 16-bit number, xfs_aextnum_t).
+	 * The maximum number of extents in a file, hence the maximum number of
+	 * leaf entries, is controlled by the size of the on-disk extent count,
+	 * either a signed 32-bit number for the data fork, or a signed 16-bit
+	 * number for the attr fork.
 	 *
 	 * Note that we can no longer assume that if we are in ATTR1 that
 	 * the fork offset of all the inodes will be
@@ -120,10 +120,11 @@ xfs_bmbt_lookup_first(
  */
 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 {
+	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+
 	return whichfork != XFS_COW_FORK &&
-		XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
-		XFS_IFORK_NEXTENTS(ip, whichfork) >
-			XFS_IFORK_MAXEXT(ip, whichfork);
+		ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
+		ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
 }
 
 /*
@@ -131,10 +132,11 @@ static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
  */
 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 {
+	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+
 	return whichfork != XFS_COW_FORK &&
-		XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
-		XFS_IFORK_NEXTENTS(ip, whichfork) <=
-			XFS_IFORK_MAXEXT(ip, whichfork);
+		ifp->if_format == XFS_DINODE_FMT_BTREE &&
+		ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
 }
 
 /*
@@ -213,8 +215,8 @@ xfs_bmap_forkoff_reset(
 	int		whichfork)
 {
 	if (whichfork == XFS_ATTR_FORK &&
-	    ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
-	    ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
+	    ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
+	    ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
 		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
 
 		if (dfl_forkoff > ip->i_d.di_forkoff)
@@ -315,31 +317,28 @@ xfs_bmap_check_leaf_extents(
 	xfs_inode_t		*ip,		/* incore inode pointer */
 	int			whichfork)	/* data or attr fork */
 {
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	struct xfs_btree_block	*block;	/* current btree block */
 	xfs_fsblock_t		bno;	/* block # of "block" */
 	xfs_buf_t		*bp;	/* buffer for "block" */
 	int			error;	/* error return value */
 	xfs_extnum_t		i=0, j;	/* index into the extents list */
-	struct xfs_ifork	*ifp;	/* fork structure */
 	int			level;	/* btree level, for checking */
-	xfs_mount_t		*mp;	/* file system mount structure */
 	__be64			*pp;	/* pointer to block address */
 	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
 	xfs_bmbt_rec_t		last = {0, 0}; /* last extent in prev block */
 	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
 	int			bp_release = 0;
 
-	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
+	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
 		return;
-	}
 
 	/* skip large extent count inodes */
-	if (ip->i_d.di_nextents > 10000)
+	if (ip->i_df.if_nextents > 10000)
 		return;
 
 	bno = NULLFSBLOCK;
-	mp = ip->i_mount;
-	ifp = XFS_IFORK_PTR(ip, whichfork);
 	block = ifp->if_broot;
 	/*
 	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
@@ -604,7 +603,7 @@ xfs_bmap_btree_to_extents(
 	ASSERT(cur);
 	ASSERT(whichfork != XFS_COW_FORK);
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
+	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
 	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
@@ -632,7 +631,7 @@ xfs_bmap_btree_to_extents(
 	xfs_iroot_realloc(ip, -1, whichfork);
 	ASSERT(ifp->if_broot == NULL);
 	ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
-	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 	return 0;
 }
@@ -668,7 +667,7 @@ xfs_bmap_extents_to_btree(
 	mp = ip->i_mount;
 	ASSERT(whichfork != XFS_COW_FORK);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
+	ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
 
 	/*
 	 * Make space in the inode incore. This needs to be undone if we fail
@@ -692,7 +691,7 @@ xfs_bmap_extents_to_btree(
 	/*
 	 * Convert to a btree with two levels, one record in root.
 	 */
-	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
+	ifp->if_format = XFS_DINODE_FMT_BTREE;
 	memset(&args, 0, sizeof(args));
 	args.tp = tp;
 	args.mp = mp;
@@ -750,7 +749,7 @@ xfs_bmap_extents_to_btree(
 		xfs_bmbt_disk_set_all(arp, &rec);
 		cnt++;
 	}
-	ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
+	ASSERT(cnt == ifp->if_nextents);
 	xfs_btree_set_numrecs(ablock, cnt);
 
 	/*
@@ -778,7 +777,7 @@ out_unreserve_dquot:
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 out_root_realloc:
 	xfs_iroot_realloc(ip, -1, whichfork);
-	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 	ASSERT(ifp->if_broot == NULL);
 	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 
@@ -800,16 +799,16 @@ xfs_bmap_local_to_extents_empty(
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 
 	ASSERT(whichfork != XFS_COW_FORK);
-	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
+	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 	ASSERT(ifp->if_bytes == 0);
-	ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
+	ASSERT(ifp->if_nextents == 0);
 
 	xfs_bmap_forkoff_reset(ip, whichfork);
 	ifp->if_flags &= ~XFS_IFINLINE;
 	ifp->if_flags |= XFS_IFEXTENTS;
 	ifp->if_u1.if_root = NULL;
 	ifp->if_height = 0;
-	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 }
 
@@ -840,7 +839,7 @@ xfs_bmap_local_to_extents(
 	 */
 	ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
+	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 
 	if (!ifp->if_bytes) {
 		xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
@@ -907,7 +906,7 @@ xfs_bmap_local_to_extents(
 	xfs_iext_first(ifp, &icur);
 	xfs_iext_insert(ip, &icur, &rec, 0);
 
-	XFS_IFORK_NEXT_SET(ip, whichfork, 1);
+	ifp->if_nextents = 1;
 	ip->i_d.di_nblocks = 1;
 	xfs_trans_mod_dquot_byino(tp, ip,
 		XFS_TRANS_DQ_BCOUNT, 1L);
@@ -972,7 +971,8 @@ xfs_bmap_add_attrfork_extents(
 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
 	int			error;		/* error return value */
 
-	if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
+	if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
+	    XFS_IFORK_DSIZE(ip))
 		return 0;
 	cur = NULL;
 	error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
@@ -1033,7 +1033,7 @@ xfs_bmap_set_attrforkoff(
 	int			size,
 	int			*version)
 {
-	switch (ip->i_d.di_format) {
+	switch (ip->i_df.if_format) {
 	case XFS_DINODE_FMT_DEV:
 		ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
 		break;
@@ -1091,17 +1091,6 @@ xfs_bmap_add_attrfork(
 		goto trans_cancel;
 	if (XFS_IFORK_Q(ip))
 		goto trans_cancel;
-	if (XFS_IS_CORRUPT(mp, ip->i_d.di_anextents != 0)) {
-		error = -EFSCORRUPTED;
-		goto trans_cancel;
-	}
-	if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
-		/*
-		 * For inodes coming from pre-6.2 filesystems.
-		 */
-		ASSERT(ip->i_d.di_aformat == 0);
-		ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
-	}
 
 	xfs_trans_ijoin(tp, ip, 0);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -1110,9 +1099,10 @@ xfs_bmap_add_attrfork(
 		goto trans_cancel;
 	ASSERT(ip->i_afp == NULL);
 	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, 0);
+	ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
 	ip->i_afp->if_flags = XFS_IFEXTENTS;
 	logflags = 0;
-	switch (ip->i_d.di_format) {
+	switch (ip->i_df.if_format) {
 	case XFS_DINODE_FMT_LOCAL:
 		error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
 		break;
@@ -1183,13 +1173,13 @@ xfs_iread_bmbt_block(
 	xfs_extnum_t		num_recs;
 	xfs_extnum_t		j;
 	int			whichfork = cur->bc_ino.whichfork;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 
 	block = xfs_btree_get_block(cur, level, &bp);
 
 	/* Abort if we find more records than nextents. */
 	num_recs = xfs_btree_get_numrecs(block);
-	if (unlikely(ir->loaded + num_recs >
-		     XFS_IFORK_NEXTENTS(ip, whichfork))) {
+	if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
 		xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
 				(unsigned long long)ip->i_ino);
 		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
@@ -1215,7 +1205,7 @@ xfs_iread_bmbt_block(
 				xfs_bmap_fork_to_state(whichfork));
 		trace_xfs_read_extent(ip, &ir->icur,
 				xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
-		xfs_iext_next(XFS_IFORK_PTR(ip, whichfork), &ir->icur);
+		xfs_iext_next(ifp, &ir->icur);
 	}
 
 	return 0;
@@ -1238,9 +1228,7 @@ xfs_iread_extents(
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
-	if (XFS_IS_CORRUPT(mp,
-			   XFS_IFORK_FORMAT(ip, whichfork) !=
-			   XFS_DINODE_FMT_BTREE)) {
+	if (XFS_IS_CORRUPT(mp, ifp->if_format != XFS_DINODE_FMT_BTREE)) {
 		error = -EFSCORRUPTED;
 		goto out;
 	}
@@ -1254,8 +1242,7 @@ xfs_iread_extents(
 	if (error)
 		goto out;
 
-	if (XFS_IS_CORRUPT(mp,
-			   ir.loaded != XFS_IFORK_NEXTENTS(ip, whichfork))) {
+	if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
 		error = -EFSCORRUPTED;
 		goto out;
 	}
@@ -1289,14 +1276,13 @@ xfs_bmap_first_unused(
 	xfs_fileoff_t		lowest, max;
 	int			error;
 
-	ASSERT(xfs_ifork_has_extents(ip, whichfork) ||
-	       XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
-
-	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+	if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
 		*first_unused = 0;
 		return 0;
 	}
 
+	ASSERT(xfs_ifork_has_extents(ifp));
+
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
 		error = xfs_iread_extents(tp, ip, whichfork);
 		if (error)
@@ -1337,7 +1323,7 @@ xfs_bmap_last_before(
 	struct xfs_iext_cursor	icur;
 	int			error;
 
-	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	switch (ifp->if_format) {
 	case XFS_DINODE_FMT_LOCAL:
 		*last_block = 0;
 		return 0;
@@ -1436,16 +1422,17 @@ xfs_bmap_last_offset(
 	xfs_fileoff_t		*last_block,
 	int			whichfork)
 {
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	struct xfs_bmbt_irec	rec;
 	int			is_empty;
 	int			error;
 
 	*last_block = 0;
 
-	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
+	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
 		return 0;
 
-	if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ip, whichfork)))
+	if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
 		return -EFSCORRUPTED;
 
 	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
@@ -1463,23 +1450,22 @@ xfs_bmap_last_offset(
  */
 int					/* 1=>1 block, 0=>otherwise */
 xfs_bmap_one_block(
-	xfs_inode_t	*ip,		/* incore inode */
-	int		whichfork)	/* data or attr fork */
+	struct xfs_inode	*ip,		/* incore inode */
+	int			whichfork)	/* data or attr fork */
 {
-	struct xfs_ifork *ifp;		/* inode fork pointer */
-	int		rval;		/* return value */
-	xfs_bmbt_irec_t	s;		/* internal version of extent */
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	int			rval;		/* return value */
+	struct xfs_bmbt_irec	s;		/* internal version of extent */
 	struct xfs_iext_cursor icur;
 
 #ifndef DEBUG
 	if (whichfork == XFS_DATA_FORK)
 		return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
 #endif	/* !DEBUG */
-	if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
+	if (ifp->if_nextents != 1)
 		return 0;
-	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS)
 		return 0;
-	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 	xfs_iext_first(ifp, &icur);
 	xfs_iext_get_extent(ifp, &icur, &s);
@@ -1501,10 +1487,11 @@ xfs_bmap_add_extent_delay_real(
 	struct xfs_bmalloca	*bma,
 	int			whichfork)
 {
+	struct xfs_mount	*mp = bma->ip->i_mount;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
 	struct xfs_bmbt_irec	*new = &bma->got;
 	int			error;	/* error return value */
 	int			i;	/* temp state */
-	struct xfs_ifork	*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
 					/* left is 0, right is 1, prev is 2 */
@@ -1514,16 +1501,9 @@ xfs_bmap_add_extent_delay_real(
 	xfs_filblks_t		da_old; /* old count del alloc blocks used */
 	xfs_filblks_t		temp=0;	/* value for da_new calculations */
 	int			tmp_rval;	/* partial logging flags */
-	struct xfs_mount	*mp;
-	xfs_extnum_t		*nextents;
 	struct xfs_bmbt_irec	old;
 
-	mp = bma->ip->i_mount;
-	ifp = XFS_IFORK_PTR(bma->ip, whichfork);
 	ASSERT(whichfork != XFS_ATTR_FORK);
-	nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents :
-						&bma->ip->i_d.di_nextents);
-
 	ASSERT(!isnullstartblock(new->br_startblock));
 	ASSERT(!bma->cur ||
 	       (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
@@ -1614,7 +1594,7 @@ xfs_bmap_add_extent_delay_real(
 		xfs_iext_remove(bma->ip, &bma->icur, state);
 		xfs_iext_prev(ifp, &bma->icur);
 		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
-		(*nextents)--;
+		ifp->if_nextents--;
 
 		if (bma->cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1718,8 +1698,8 @@ xfs_bmap_add_extent_delay_real(
 		PREV.br_startblock = new->br_startblock;
 		PREV.br_state = new->br_state;
 		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
+		ifp->if_nextents++;
 
-		(*nextents)++;
 		if (bma->cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
 		else {
@@ -1784,7 +1764,8 @@ xfs_bmap_add_extent_delay_real(
 		 * The left neighbor is not contiguous.
 		 */
 		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
-		(*nextents)++;
+		ifp->if_nextents++;
+
 		if (bma->cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
 		else {
@@ -1870,7 +1851,8 @@ xfs_bmap_add_extent_delay_real(
 		 * The right neighbor is not contiguous.
 		 */
 		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
-		(*nextents)++;
+		ifp->if_nextents++;
+
 		if (bma->cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
 		else {
@@ -1955,7 +1937,7 @@ xfs_bmap_add_extent_delay_real(
 		xfs_iext_next(ifp, &bma->icur);
 		xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
 		xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
-		(*nextents)++;
+		ifp->if_nextents++;
 
 		if (bma->cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -2159,8 +2141,7 @@ xfs_bmap_add_extent_unwritten_real(
 		xfs_iext_remove(ip, icur, state);
 		xfs_iext_prev(ifp, icur);
 		xfs_iext_update_extent(ip, state, icur, &LEFT);
-		XFS_IFORK_NEXT_SET(ip, whichfork,
-				XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
+		ifp->if_nextents -= 2;
 		if (cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
 		else {
@@ -2212,8 +2193,7 @@ xfs_bmap_add_extent_unwritten_real(
 		xfs_iext_remove(ip, icur, state);
 		xfs_iext_prev(ifp, icur);
 		xfs_iext_update_extent(ip, state, icur, &LEFT);
-		XFS_IFORK_NEXT_SET(ip, whichfork,
-				XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+		ifp->if_nextents--;
 		if (cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
 		else {
@@ -2255,9 +2235,8 @@ xfs_bmap_add_extent_unwritten_real(
 		xfs_iext_remove(ip, icur, state);
 		xfs_iext_prev(ifp, icur);
 		xfs_iext_update_extent(ip, state, icur, &PREV);
+		ifp->if_nextents--;
 
-		XFS_IFORK_NEXT_SET(ip, whichfork,
-				XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
 		if (cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
 		else {
@@ -2364,8 +2343,8 @@ xfs_bmap_add_extent_unwritten_real(
 
 		xfs_iext_update_extent(ip, state, icur, &PREV);
 		xfs_iext_insert(ip, icur, new, state);
-		XFS_IFORK_NEXT_SET(ip, whichfork,
-				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+		ifp->if_nextents++;
+
 		if (cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
 		else {
@@ -2440,9 +2419,8 @@ xfs_bmap_add_extent_unwritten_real(
 		xfs_iext_update_extent(ip, state, icur, &PREV);
 		xfs_iext_next(ifp, icur);
 		xfs_iext_insert(ip, icur, new, state);
+		ifp->if_nextents++;
 
-		XFS_IFORK_NEXT_SET(ip, whichfork,
-				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
 		if (cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
 		else {
@@ -2493,9 +2471,8 @@ xfs_bmap_add_extent_unwritten_real(
 		xfs_iext_next(ifp, icur);
 		xfs_iext_insert(ip, icur, &r[1], state);
 		xfs_iext_insert(ip, icur, &r[0], state);
+		ifp->if_nextents += 2;
 
-		XFS_IFORK_NEXT_SET(ip, whichfork,
-				XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
 		if (cur == NULL)
 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
 		else {
@@ -2810,9 +2787,8 @@ xfs_bmap_add_extent_hole_real(
 		xfs_iext_remove(ip, icur, state);
 		xfs_iext_prev(ifp, icur);
 		xfs_iext_update_extent(ip, state, icur, &left);
+		ifp->if_nextents--;
 
-		XFS_IFORK_NEXT_SET(ip, whichfork,
-			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
 		if (cur == NULL) {
 			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 		} else {
@@ -2910,8 +2886,8 @@ xfs_bmap_add_extent_hole_real(
 		 * Insert a new entry.
 		 */
 		xfs_iext_insert(ip, icur, new, state);
-		XFS_IFORK_NEXT_SET(ip, whichfork,
-			XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+		ifp->if_nextents++;
+
 		if (cur == NULL) {
 			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 		} else {
@@ -3891,7 +3867,8 @@ xfs_bmapi_read(
 	int			flags)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_ifork	*ifp;
+	int			whichfork = xfs_bmapi_whichfork(flags);
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	struct xfs_bmbt_irec	got;
 	xfs_fileoff_t		obno;
 	xfs_fileoff_t		end;
@@ -3899,48 +3876,23 @@ xfs_bmapi_read(
 	int			error;
 	bool			eof = false;
 	int			n = 0;
-	int			whichfork = xfs_bmapi_whichfork(flags);
 
 	ASSERT(*nmap >= 1);
-	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
-			   XFS_BMAPI_COWFORK)));
+	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
 
-	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
-	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+	if (WARN_ON_ONCE(!ifp))
+		return -EFSCORRUPTED;
+
+	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
+	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
 		return -EFSCORRUPTED;
-	}
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
 	XFS_STATS_INC(mp, xs_blk_mapr);
 
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	if (!ifp) {
-		/* No CoW fork?  Return a hole. */
-		if (whichfork == XFS_COW_FORK) {
-			mval->br_startoff = bno;
-			mval->br_startblock = HOLESTARTBLOCK;
-			mval->br_blockcount = len;
-			mval->br_state = XFS_EXT_NORM;
-			*nmap = 1;
-			return 0;
-		}
-
-		/*
-		 * A missing attr ifork implies that the inode says we're in
-		 * extents or btree format but failed to pass the inode fork
-		 * verifier while trying to load it.  Treat that as a file
-		 * corruption too.
-		 */
-#ifdef DEBUG
-		xfs_alert(mp, "%s: inode %llu missing fork %d",
-				__func__, ip->i_ino, whichfork);
-#endif /* DEBUG */
-		return -EFSCORRUPTED;
-	}
-
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
 		error = xfs_iread_extents(NULL, ip, whichfork);
 		if (error)
@@ -4193,17 +4145,7 @@ xfs_bmapi_allocate(
 	bma->got.br_blockcount = bma->length;
 	bma->got.br_state = XFS_EXT_NORM;
 
-	/*
-	 * In the data fork, a wasdelay extent has been initialized, so
-	 * shouldn't be flagged as unwritten.
-	 *
-	 * For the cow fork, however, we convert delalloc reservations
-	 * (extents allocated for speculative preallocation) to
-	 * allocated unwritten extents, and only convert the unwritten
-	 * extents to real extents when we're about to write the data.
-	 */
-	if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
-	    (bma->flags & XFS_BMAPI_PREALLOC))
+	if (bma->flags & XFS_BMAPI_PREALLOC)
 		bma->got.br_state = XFS_EXT_UNWRITTEN;
 
 	if (bma->wasdel)
@@ -4317,11 +4259,13 @@ xfs_bmapi_minleft(
 	struct xfs_inode	*ip,
 	int			fork)
 {
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, fork);
+
 	if (tp && tp->t_firstblock != NULLFSBLOCK)
 		return 0;
-	if (XFS_IFORK_FORMAT(ip, fork) != XFS_DINODE_FMT_BTREE)
+	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
 		return 1;
-	return be16_to_cpu(XFS_IFORK_PTR(ip, fork)->if_broot->bb_level) + 1;
+	return be16_to_cpu(ifp->if_broot->bb_level) + 1;
 }
 
 /*
@@ -4336,11 +4280,13 @@ xfs_bmapi_finish(
 	int			whichfork,
 	int			error)
 {
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+
 	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
-	    XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
 		bma->logflags &= ~xfs_ilog_fext(whichfork);
 	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
-		 XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_BTREE)
+		 ifp->if_format != XFS_DINODE_FMT_BTREE)
 		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
 
 	if (bma->logflags)
@@ -4372,13 +4318,13 @@ xfs_bmapi_write(
 		.total		= total,
 	};
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_ifork	*ifp;
+	int			whichfork = xfs_bmapi_whichfork(flags);
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_fileoff_t		end;		/* end of mapped file region */
 	bool			eof = false;	/* after the end of extents */
 	int			error;		/* error return */
 	int			n;		/* current extent index */
 	xfs_fileoff_t		obno;		/* old block number (offset) */
-	int			whichfork;	/* data or attr fork */
 
 #ifdef DEBUG
 	xfs_fileoff_t		orig_bno;	/* original block number value */
@@ -4393,13 +4339,12 @@ xfs_bmapi_write(
 	orig_mval = mval;
 	orig_nmap = *nmap;
 #endif
-	whichfork = xfs_bmapi_whichfork(flags);
 
 	ASSERT(*nmap >= 1);
 	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
 	ASSERT(tp != NULL);
 	ASSERT(len > 0);
-	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
+	ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	ASSERT(!(flags & XFS_BMAPI_REMAP));
 
@@ -4415,7 +4360,7 @@ xfs_bmapi_write(
 	ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
 			(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
 
-	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
 	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 		return -EFSCORRUPTED;
 	}
@@ -4423,8 +4368,6 @@ xfs_bmapi_write(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-
 	XFS_STATS_INC(mp, xs_blk_mapw);
 
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
@@ -4534,9 +4477,8 @@ xfs_bmapi_write(
 	if (error)
 		goto error0;
 
-	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
-	       XFS_IFORK_NEXTENTS(ip, whichfork) >
-		XFS_IFORK_MAXEXT(ip, whichfork));
+	ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
+	       ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
 	xfs_bmapi_finish(&bma, whichfork, 0);
 	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
 		orig_nmap, *nmap);
@@ -4611,8 +4553,23 @@ xfs_bmapi_convert_delalloc(
 	bma.offset = bma.got.br_startoff;
 	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
 	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
+
+	/*
+	 * When we're converting the delalloc reservations backing dirty pages
+	 * in the page cache, we must be careful about how we create the new
+	 * extents:
+	 *
+	 * New CoW fork extents are created unwritten, turned into real extents
+	 * when we're about to write the data to disk, and mapped into the data
+	 * fork after the write finishes.  End of story.
+	 *
+	 * New data fork extents must be mapped in as unwritten and converted
+	 * to real extents after the write succeeds to avoid exposing stale
+	 * disk contents if we crash.
+	 */
+	bma.flags = XFS_BMAPI_PREALLOC;
 	if (whichfork == XFS_COW_FORK)
-		bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
+		bma.flags |= XFS_BMAPI_COWFORK;
 
 	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
 		bma.prev.br_startoff = NULLFILEOFF;
@@ -4682,7 +4639,7 @@ xfs_bmapi_remap(
 	ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
 			(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
 
-	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
 	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 		return -EFSCORRUPTED;
 	}
@@ -4726,9 +4683,9 @@ xfs_bmapi_remap(
 	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
 
 error0:
-	if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS)
+	if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
 		logflags &= ~XFS_ILOG_DEXT;
-	else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
+	else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
 		logflags &= ~XFS_ILOG_DBROOT;
 
 	if (logflags)
@@ -5078,9 +5035,8 @@ xfs_bmap_del_extent_real(
 	 * conversion to btree format, since the transaction will be dirty then.
 	 */
 	if (tp->t_blk_res == 0 &&
-	    XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
-	    XFS_IFORK_NEXTENTS(ip, whichfork) >=
-			XFS_IFORK_MAXEXT(ip, whichfork) &&
+	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
+	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
 	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
 		return -ENOSPC;
 
@@ -5132,8 +5088,8 @@ xfs_bmap_del_extent_real(
 		 */
 		xfs_iext_remove(ip, icur, state);
 		xfs_iext_prev(ifp, icur);
-		XFS_IFORK_NEXT_SET(ip, whichfork,
-			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+		ifp->if_nextents--;
+
 		flags |= XFS_ILOG_CORE;
 		if (!cur) {
 			flags |= xfs_ilog_fext(whichfork);
@@ -5241,8 +5197,8 @@ xfs_bmap_del_extent_real(
 			}
 		} else
 			flags |= xfs_ilog_fext(whichfork);
-		XFS_IFORK_NEXT_SET(ip, whichfork,
-			XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+
+		ifp->if_nextents++;
 		xfs_iext_next(ifp, icur);
 		xfs_iext_insert(ip, icur, &new, state);
 		break;
@@ -5322,7 +5278,7 @@ __xfs_bunmapi(
 	whichfork = xfs_bmapi_whichfork(flags);
 	ASSERT(whichfork != XFS_COW_FORK);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)))
+	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
 		return -EFSCORRUPTED;
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
@@ -5360,7 +5316,7 @@ __xfs_bunmapi(
 
 	logflags = 0;
 	if (ifp->if_flags & XFS_IFBROOT) {
-		ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
+		ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_ino.flags = 0;
 	} else
@@ -5605,10 +5561,10 @@ error0:
 	 * logging the extent records if we've converted to btree format.
 	 */
 	if ((logflags & xfs_ilog_fext(whichfork)) &&
-	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
 		logflags &= ~xfs_ilog_fext(whichfork);
 	else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
-		 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+		 ifp->if_format != XFS_DINODE_FMT_BTREE)
 		logflags &= ~xfs_ilog_fbroot(whichfork);
 	/*
 	 * Log inode even in the error case, if the transaction
@@ -5690,6 +5646,7 @@ xfs_bmse_merge(
 	struct xfs_btree_cur		*cur,
 	int				*logflags)	/* output */
 {
+	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
 	struct xfs_bmbt_irec		new;
 	xfs_filblks_t			blockcount;
 	int				error, i;
@@ -5708,8 +5665,7 @@ xfs_bmse_merge(
 	 * Update the on-disk extent count, the btree if necessary and log the
 	 * inode.
 	 */
-	XFS_IFORK_NEXT_SET(ip, whichfork,
-			   XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+	ifp->if_nextents--;
 	*logflags |= XFS_ILOG_CORE;
 	if (!cur) {
 		*logflags |= XFS_ILOG_DEXT;
@@ -5747,7 +5703,7 @@ xfs_bmse_merge(
 
 done:
 	xfs_iext_remove(ip, icur, 0);
-	xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur);
+	xfs_iext_prev(ifp, icur);
 	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
 			&new);
 
@@ -5819,7 +5775,7 @@ xfs_bmap_collapse_extents(
 	int			error = 0;
 	int			logflags = 0;
 
-	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
 	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 		return -EFSCORRUPTED;
 	}
@@ -5936,7 +5892,7 @@ xfs_bmap_insert_extents(
 	int			error = 0;
 	int			logflags = 0;
 
-	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
 	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 		return -EFSCORRUPTED;
 	}
@@ -6030,18 +5986,18 @@ xfs_bmap_split_extent(
 	xfs_fileoff_t		split_fsb)
 {
 	int				whichfork = XFS_DATA_FORK;
+	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
 	struct xfs_btree_cur		*cur = NULL;
 	struct xfs_bmbt_irec		got;
 	struct xfs_bmbt_irec		new; /* split extent */
 	struct xfs_mount		*mp = ip->i_mount;
-	struct xfs_ifork		*ifp;
 	xfs_fsblock_t			gotblkcnt; /* new block count for got */
 	struct xfs_iext_cursor		icur;
 	int				error = 0;
 	int				logflags = 0;
 	int				i = 0;
 
-	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
 	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 		return -EFSCORRUPTED;
 	}
@@ -6049,7 +6005,6 @@ xfs_bmap_split_extent(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
 		/* Read in all the extents */
 		error = xfs_iread_extents(tp, ip, whichfork);
@@ -6097,8 +6052,7 @@ xfs_bmap_split_extent(
 	/* Add new extent */
 	xfs_iext_next(ifp, &icur);
 	xfs_iext_insert(ip, &icur, &new, 0);
-	XFS_IFORK_NEXT_SET(ip, whichfork,
-			   XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+	ifp->if_nextents++;
 
 	if (cur) {
 		error = xfs_bmbt_lookup_eq(cur, &new, &i);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index f3259ad5c22c..6028a3c825ba 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 295a59cf8840..d9c63f17d2de 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -636,10 +636,7 @@ xfs_bmbt_change_owner(
 
 	ASSERT(tp || buffer_list);
 	ASSERT(!(tp && buffer_list));
-	if (whichfork == XFS_DATA_FORK)
-		ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE);
-	else
-		ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE);
+	ASSERT(XFS_IFORK_PTR(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE);
 
 	cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
 	if (!cur)
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 29b407d053b4..72bf74c79fb9 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000,2002-2005 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 8626c5a81aad..10e50cbacacf 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 53e503b6f186..6e25de6621e4 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
  * Copyright (c) 2013 Red Hat, Inc.
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 08c0a4d98b89..059ac108b1b3 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
  * Copyright (c) 2013 Red Hat, Inc.
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 22557527cfdb..d8f586256add 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -178,6 +178,18 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
 	[XFS_DEFER_OPS_TYPE_AGFL_FREE]	= &xfs_agfl_free_defer_type,
 };
 
+static void
+xfs_defer_create_intent(
+	struct xfs_trans		*tp,
+	struct xfs_defer_pending	*dfp,
+	bool				sort)
+{
+	const struct xfs_defer_op_type	*ops = defer_op_types[dfp->dfp_type];
+
+	dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
+			dfp->dfp_count, sort);
+}
+
 /*
  * For each pending item in the intake list, log its intent item and the
  * associated extents, then add the entire intake list to the end of
@@ -187,17 +199,11 @@ STATIC void
 xfs_defer_create_intents(
 	struct xfs_trans		*tp)
 {
-	struct list_head		*li;
 	struct xfs_defer_pending	*dfp;
-	const struct xfs_defer_op_type	*ops;
 
 	list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
-		ops = defer_op_types[dfp->dfp_type];
-		dfp->dfp_intent = ops->create_intent(tp, dfp->dfp_count);
 		trace_xfs_defer_create_intent(tp->t_mountp, dfp);
-		list_sort(tp->t_mountp, &dfp->dfp_work, ops->diff_items);
-		list_for_each(li, &dfp->dfp_work)
-			ops->log_item(tp, dfp->dfp_intent, li);
+		xfs_defer_create_intent(tp, dfp, true);
 	}
 }
 
@@ -234,10 +240,13 @@ xfs_defer_trans_roll(
 	struct xfs_log_item		*lip;
 	struct xfs_buf			*bplist[XFS_DEFER_OPS_NR_BUFS];
 	struct xfs_inode		*iplist[XFS_DEFER_OPS_NR_INODES];
+	unsigned int			ordered = 0; /* bitmap */
 	int				bpcount = 0, ipcount = 0;
 	int				i;
 	int				error;
 
+	BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS);
+
 	list_for_each_entry(lip, &tp->t_items, li_trans) {
 		switch (lip->li_type) {
 		case XFS_LI_BUF:
@@ -248,7 +257,10 @@ xfs_defer_trans_roll(
 					ASSERT(0);
 					return -EFSCORRUPTED;
 				}
-				xfs_trans_dirty_buf(tp, bli->bli_buf);
+				if (bli->bli_flags & XFS_BLI_ORDERED)
+					ordered |= (1U << bpcount);
+				else
+					xfs_trans_dirty_buf(tp, bli->bli_buf);
 				bplist[bpcount++] = bli->bli_buf;
 			}
 			break;
@@ -289,6 +301,8 @@ xfs_defer_trans_roll(
 	/* Rejoin the buffers and dirty them so the log moves forward. */
 	for (i = 0; i < bpcount; i++) {
 		xfs_trans_bjoin(tp, bplist[i]);
+		if (ordered & (1U << i))
+			xfs_trans_ordered_buf(tp, bplist[i]);
 		xfs_trans_bhold(tp, bplist[i]);
 	}
 
@@ -346,6 +360,53 @@ xfs_defer_cancel_list(
 }
 
 /*
+ * Log an intent-done item for the first pending intent, and finish the work
+ * items.
+ */
+static int
+xfs_defer_finish_one(
+	struct xfs_trans		*tp,
+	struct xfs_defer_pending	*dfp)
+{
+	const struct xfs_defer_op_type	*ops = defer_op_types[dfp->dfp_type];
+	struct xfs_btree_cur		*state = NULL;
+	struct list_head		*li, *n;
+	int				error;
+
+	trace_xfs_defer_pending_finish(tp->t_mountp, dfp);
+
+	dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
+	list_for_each_safe(li, n, &dfp->dfp_work) {
+		list_del(li);
+		dfp->dfp_count--;
+		error = ops->finish_item(tp, dfp->dfp_done, li, &state);
+		if (error == -EAGAIN) {
+			/*
+			 * Caller wants a fresh transaction; put the work item
+			 * back on the list and log a new log intent item to
+			 * replace the old one.  See "Requesting a Fresh
+			 * Transaction while Finishing Deferred Work" above.
+			 */
+			list_add(li, &dfp->dfp_work);
+			dfp->dfp_count++;
+			dfp->dfp_done = NULL;
+			xfs_defer_create_intent(tp, dfp, false);
+		}
+
+		if (error)
+			goto out;
+	}
+
+	/* Done with the dfp, free it. */
+	list_del(&dfp->dfp_list);
+	kmem_free(dfp);
+out:
+	if (ops->finish_cleanup)
+		ops->finish_cleanup(tp, state, error);
+	return error;
+}
+
+/*
  * Finish all the pending work.  This involves logging intent items for
  * any work items that wandered in since the last transaction roll (if
  * one has even happened), rolling the transaction, and finishing the
@@ -358,11 +419,7 @@ xfs_defer_finish_noroll(
 	struct xfs_trans		**tp)
 {
 	struct xfs_defer_pending	*dfp;
-	struct list_head		*li;
-	struct list_head		*n;
-	void				*state;
 	int				error = 0;
-	const struct xfs_defer_op_type	*ops;
 	LIST_HEAD(dop_pending);
 
 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -371,87 +428,30 @@ xfs_defer_finish_noroll(
 
 	/* Until we run out of pending work to finish... */
 	while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
-		/* log intents and pull in intake items */
 		xfs_defer_create_intents(*tp);
 		list_splice_tail_init(&(*tp)->t_dfops, &dop_pending);
 
-		/*
-		 * Roll the transaction.
-		 */
 		error = xfs_defer_trans_roll(tp);
 		if (error)
-			goto out;
+			goto out_shutdown;
 
-		/* Log an intent-done item for the first pending item. */
 		dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
 				       dfp_list);
-		ops = defer_op_types[dfp->dfp_type];
-		trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
-		dfp->dfp_done = ops->create_done(*tp, dfp->dfp_intent,
-				dfp->dfp_count);
-
-		/* Finish the work items. */
-		state = NULL;
-		list_for_each_safe(li, n, &dfp->dfp_work) {
-			list_del(li);
-			dfp->dfp_count--;
-			error = ops->finish_item(*tp, li, dfp->dfp_done,
-					&state);
-			if (error == -EAGAIN) {
-				/*
-				 * Caller wants a fresh transaction;
-				 * put the work item back on the list
-				 * and jump out.
-				 */
-				list_add(li, &dfp->dfp_work);
-				dfp->dfp_count++;
-				break;
-			} else if (error) {
-				/*
-				 * Clean up after ourselves and jump out.
-				 * xfs_defer_cancel will take care of freeing
-				 * all these lists and stuff.
-				 */
-				if (ops->finish_cleanup)
-					ops->finish_cleanup(*tp, state, error);
-				goto out;
-			}
-		}
-		if (error == -EAGAIN) {
-			/*
-			 * Caller wants a fresh transaction, so log a
-			 * new log intent item to replace the old one
-			 * and roll the transaction.  See "Requesting
-			 * a Fresh Transaction while Finishing
-			 * Deferred Work" above.
-			 */
-			dfp->dfp_intent = ops->create_intent(*tp,
-					dfp->dfp_count);
-			dfp->dfp_done = NULL;
-			list_for_each(li, &dfp->dfp_work)
-				ops->log_item(*tp, dfp->dfp_intent, li);
-		} else {
-			/* Done with the dfp, free it. */
-			list_del(&dfp->dfp_list);
-			kmem_free(dfp);
-		}
-
-		if (ops->finish_cleanup)
-			ops->finish_cleanup(*tp, state, error);
-	}
-
-out:
-	if (error) {
-		xfs_defer_trans_abort(*tp, &dop_pending);
-		xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
-		trace_xfs_defer_finish_error(*tp, error);
-		xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
-		xfs_defer_cancel(*tp);
-		return error;
+		error = xfs_defer_finish_one(*tp, dfp);
+		if (error && error != -EAGAIN)
+			goto out_shutdown;
 	}
 
 	trace_xfs_defer_finish_done(*tp, _RET_IP_);
 	return 0;
+
+out_shutdown:
+	xfs_defer_trans_abort(*tp, &dop_pending);
+	xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
+	trace_xfs_defer_finish_error(*tp, error);
+	xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
+	xfs_defer_cancel(*tp);
+	return error;
 }
 
 int
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 7c28d7608ac6..6b2ca580f2b0 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Copyright (C) 2016 Oracle.  All Rights Reserved.
  * Author: Darrick J. Wong <darrick.wong@oracle.com>
@@ -6,6 +6,7 @@
 #ifndef __XFS_DEFER_H__
 #define	__XFS_DEFER_H__
 
+struct xfs_btree_cur;
 struct xfs_defer_op_type;
 
 /*
@@ -28,8 +29,8 @@ enum xfs_defer_ops_type {
 struct xfs_defer_pending {
 	struct list_head		dfp_list;	/* pending items */
 	struct list_head		dfp_work;	/* work items */
-	void				*dfp_intent;	/* log intent item */
-	void				*dfp_done;	/* log done item */
+	struct xfs_log_item		*dfp_intent;	/* log intent item */
+	struct xfs_log_item		*dfp_done;	/* log done item */
 	unsigned int			dfp_count;	/* # extent items */
 	enum xfs_defer_ops_type		dfp_type;
 };
@@ -43,15 +44,16 @@ void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp);
 
 /* Description of a deferred type. */
 struct xfs_defer_op_type {
-	void (*abort_intent)(void *);
-	void *(*create_done)(struct xfs_trans *, void *, unsigned int);
-	int (*finish_item)(struct xfs_trans *, struct list_head *, void *,
-			void **);
-	void (*finish_cleanup)(struct xfs_trans *, void *, int);
-	void (*cancel_item)(struct list_head *);
-	int (*diff_items)(void *, struct list_head *, struct list_head *);
-	void *(*create_intent)(struct xfs_trans *, uint);
-	void (*log_item)(struct xfs_trans *, void *, struct list_head *);
+	struct xfs_log_item *(*create_intent)(struct xfs_trans *tp,
+			struct list_head *items, unsigned int count, bool sort);
+	void (*abort_intent)(struct xfs_log_item *intent);
+	struct xfs_log_item *(*create_done)(struct xfs_trans *tp,
+			struct xfs_log_item *intent, unsigned int count);
+	int (*finish_item)(struct xfs_trans *tp, struct xfs_log_item *done,
+			struct list_head *item, struct xfs_btree_cur **state);
+	void (*finish_cleanup)(struct xfs_trans *tp,
+			struct xfs_btree_cur *state, int error);
+	void (*cancel_item)(struct list_head *item);
 	unsigned int		max_items;
 };
 
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index dd6fcaaea318..612a9c5e41b1 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -278,7 +278,7 @@ xfs_dir_createname(
 	if (!inum)
 		args->op_flags |= XFS_DA_OP_JUSTCHECK;
 
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+	if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
 		rval = xfs_dir2_sf_addname(args);
 		goto out_free;
 	}
@@ -373,7 +373,7 @@ xfs_dir_lookup(
 		args->op_flags |= XFS_DA_OP_CILOOKUP;
 
 	lock_mode = xfs_ilock_data_map_shared(dp);
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+	if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
 		rval = xfs_dir2_sf_lookup(args);
 		goto out_check_rval;
 	}
@@ -443,7 +443,7 @@ xfs_dir_removename(
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
 
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+	if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
 		rval = xfs_dir2_sf_removename(args);
 		goto out_free;
 	}
@@ -504,7 +504,7 @@ xfs_dir_replace(
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
 
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+	if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
 		rval = xfs_dir2_sf_replace(args);
 		goto out_free;
 	}
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 033777e282f2..e55378640b05 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 1dbf2f980a26..5b59d3f7746b 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -1104,7 +1104,7 @@ xfs_dir2_sf_to_block(
 	ASSERT(ifp->if_bytes == dp->i_d.di_size);
 	ASSERT(ifp->if_u1.if_data != NULL);
 	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count));
-	ASSERT(dp->i_d.di_nextents == 0);
+	ASSERT(dp->i_df.if_nextents == 0);
 
 	/*
 	 * Copy the directory into a temporary buffer.
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 01ee0b926572..44c6a77cba05 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 7b7f6fb2ea3b..2463b5d73447 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -343,7 +343,7 @@ xfs_dir2_block_to_sf(
 	 */
 	ASSERT(dp->i_df.if_bytes == 0);
 	xfs_init_local_fork(dp, XFS_DATA_FORK, sfp, size);
-	dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+	dp->i_df.if_format = XFS_DINODE_FMT_LOCAL;
 	dp->i_d.di_size = size;
 
 	logflags |= XFS_ILOG_DDATA;
@@ -710,11 +710,11 @@ xfs_dir2_sf_verify(
 	struct xfs_inode		*ip)
 {
 	struct xfs_mount		*mp = ip->i_mount;
+	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
 	struct xfs_dir2_sf_hdr		*sfp;
 	struct xfs_dir2_sf_entry	*sfep;
 	struct xfs_dir2_sf_entry	*next_sfep;
 	char				*endp;
-	struct xfs_ifork		*ifp;
 	xfs_ino_t			ino;
 	int				i;
 	int				i8count;
@@ -723,9 +723,8 @@ xfs_dir2_sf_verify(
 	int				error;
 	uint8_t				filetype;
 
-	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 
-	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
 	sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
 	size = ifp->if_bytes;
 
@@ -827,9 +826,9 @@ xfs_dir2_sf_create(
 	 * If it's currently a zero-length extent file,
 	 * convert it to local format.
 	 */
-	if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
+	if (dp->i_df.if_format == XFS_DINODE_FMT_EXTENTS) {
 		dp->i_df.if_flags &= ~XFS_IFEXTENTS;	/* just in case */
-		dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+		dp->i_df.if_format = XFS_DINODE_FMT_LOCAL;
 		xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
 		dp->i_df.if_flags |= XFS_IFINLINE;
 	}
@@ -1027,7 +1026,7 @@ xfs_dir2_sf_replace_needblock(
 	int			newsize;
 	struct xfs_dir2_sf_hdr	*sfp;
 
-	if (dp->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+	if (dp->i_df.if_format != XFS_DINODE_FMT_LOCAL)
 		return false;
 
 	sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data;
diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index 79e6c4fb1d8a..53b305dea381 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  * Copyright (C) 2017 Oracle.
@@ -55,7 +55,8 @@
 #define XFS_ERRTAG_FORCE_SCRUB_REPAIR			32
 #define XFS_ERRTAG_FORCE_SUMMARY_RECALC			33
 #define XFS_ERRTAG_IUNLINK_FALLBACK			34
-#define XFS_ERRTAG_MAX					35
+#define XFS_ERRTAG_BUF_IOERROR				35
+#define XFS_ERRTAG_MAX					36
 
 /*
  * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -95,5 +96,6 @@
 #define XFS_RANDOM_FORCE_SCRUB_REPAIR			1
 #define XFS_RANDOM_FORCE_SUMMARY_RECALC			1
 #define XFS_RANDOM_IUNLINK_FALLBACK			(XFS_RANDOM_DEFAULT/10)
+#define XFS_RANDOM_BUF_IOERROR				XFS_RANDOM_DEFAULT
 
 #endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 045556e78ee2..b42a52bfa1e9 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
  * All Rights Reserved.
@@ -964,13 +964,12 @@ enum xfs_dinode_fmt {
 /*
  * Inode data & attribute fork sizes, per inode.
  */
-#define XFS_DFORK_Q(dip)		((dip)->di_forkoff != 0)
 #define XFS_DFORK_BOFF(dip)		((int)((dip)->di_forkoff << 3))
 
 #define XFS_DFORK_DSIZE(dip,mp) \
-	(XFS_DFORK_Q(dip) ? XFS_DFORK_BOFF(dip) : XFS_LITINO(mp))
+	((dip)->di_forkoff ? XFS_DFORK_BOFF(dip) : XFS_LITINO(mp))
 #define XFS_DFORK_ASIZE(dip,mp) \
-	(XFS_DFORK_Q(dip) ? XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : 0)
+	((dip)->di_forkoff ? XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : 0)
 #define XFS_DFORK_SIZE(dip,mp,w) \
 	((w) == XFS_DATA_FORK ? \
 		XFS_DFORK_DSIZE(dip, mp) : \
@@ -1681,7 +1680,7 @@ struct xfs_acl_entry {
 
 struct xfs_acl {
 	__be32			acl_cnt;
-	struct xfs_acl_entry	acl_entry[0];
+	struct xfs_acl_entry	acl_entry[];
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 245188e4f6d3..84bcffa87753 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: LGPL-2.1
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  * Copyright (c) 1995-2005 Silicon Graphics, Inc.
  * All Rights Reserved.
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index 272005ac8c88..99e796256c5d 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Copyright (C) 2019 Oracle.  All Rights Reserved.
  * Author: Darrick J. Wong <darrick.wong@oracle.com>
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 39c5a6e24915..6f84ea85fdd8 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -161,8 +161,7 @@ xfs_imap_to_bp(
 	struct xfs_imap		*imap,
 	struct xfs_dinode       **dipp,
 	struct xfs_buf		**bpp,
-	uint			buf_flags,
-	uint			iget_flags)
+	uint			buf_flags)
 {
 	struct xfs_buf		*bp;
 	int			error;
@@ -172,12 +171,7 @@ xfs_imap_to_bp(
 				   (int)imap->im_len, buf_flags, &bp,
 				   &xfs_inode_buf_ops);
 	if (error) {
-		if (error == -EAGAIN) {
-			ASSERT(buf_flags & XBF_TRYLOCK);
-			return error;
-		}
-		xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
-			__func__, error);
+		ASSERT(error != -EAGAIN || (buf_flags & XBF_TRYLOCK));
 		return error;
 	}
 
@@ -186,13 +180,36 @@ xfs_imap_to_bp(
 	return 0;
 }
 
-void
+int
 xfs_inode_from_disk(
 	struct xfs_inode	*ip,
 	struct xfs_dinode	*from)
 {
 	struct xfs_icdinode	*to = &ip->i_d;
 	struct inode		*inode = VFS_I(ip);
+	int			error;
+	xfs_failaddr_t		fa;
+
+	ASSERT(ip->i_cowfp == NULL);
+	ASSERT(ip->i_afp == NULL);
+
+	fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from);
+	if (fa) {
+		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from,
+				sizeof(*from), fa);
+		return -EFSCORRUPTED;
+	}
+
+	/*
+	 * First get the permanent information that is needed to allocate an
+	 * inode. If the inode is unused, mode is zero and we shouldn't mess
+	 * with the unitialized part of it.
+	 */
+	to->di_flushiter = be16_to_cpu(from->di_flushiter);
+	inode->i_generation = be32_to_cpu(from->di_gen);
+	inode->i_mode = be16_to_cpu(from->di_mode);
+	if (!inode->i_mode)
+		return 0;
 
 	/*
 	 * Convert v1 inodes immediately to v2 inode format as this is the
@@ -208,10 +225,8 @@ xfs_inode_from_disk(
 					be16_to_cpu(from->di_projid_lo);
 	}
 
-	to->di_format = from->di_format;
 	i_uid_write(inode, be32_to_cpu(from->di_uid));
 	i_gid_write(inode, be32_to_cpu(from->di_gid));
-	to->di_flushiter = be16_to_cpu(from->di_flushiter);
 
 	/*
 	 * Time is signed, so need to convert to signed 32 bit before
@@ -225,16 +240,11 @@ xfs_inode_from_disk(
 	inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
 	inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
 	inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
-	inode->i_generation = be32_to_cpu(from->di_gen);
-	inode->i_mode = be16_to_cpu(from->di_mode);
 
 	to->di_size = be64_to_cpu(from->di_size);
 	to->di_nblocks = be64_to_cpu(from->di_nblocks);
 	to->di_extsize = be32_to_cpu(from->di_extsize);
-	to->di_nextents = be32_to_cpu(from->di_nextents);
-	to->di_anextents = be16_to_cpu(from->di_anextents);
 	to->di_forkoff = from->di_forkoff;
-	to->di_aformat	= from->di_aformat;
 	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
 	to->di_flags	= be16_to_cpu(from->di_flags);
@@ -247,6 +257,22 @@ xfs_inode_from_disk(
 		to->di_flags2 = be64_to_cpu(from->di_flags2);
 		to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
 	}
+
+	error = xfs_iformat_data_fork(ip, from);
+	if (error)
+		return error;
+	if (from->di_forkoff) {
+		error = xfs_iformat_attr_fork(ip, from);
+		if (error)
+			goto out_destroy_data_fork;
+	}
+	if (xfs_is_reflink_inode(ip))
+		xfs_ifork_init_cow(ip);
+	return 0;
+
+out_destroy_data_fork:
+	xfs_idestroy_fork(&ip->i_df);
+	return error;
 }
 
 void
@@ -261,7 +287,7 @@ xfs_inode_to_disk(
 	to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 	to->di_onlink = 0;
 
-	to->di_format = from->di_format;
+	to->di_format = xfs_ifork_format(&ip->i_df);
 	to->di_uid = cpu_to_be32(i_uid_read(inode));
 	to->di_gid = cpu_to_be32(i_gid_read(inode));
 	to->di_projid_lo = cpu_to_be16(from->di_projid & 0xffff);
@@ -281,10 +307,10 @@ xfs_inode_to_disk(
 	to->di_size = cpu_to_be64(from->di_size);
 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
 	to->di_extsize = cpu_to_be32(from->di_extsize);
-	to->di_nextents = cpu_to_be32(from->di_nextents);
-	to->di_anextents = cpu_to_be16(from->di_anextents);
+	to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
+	to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
 	to->di_forkoff = from->di_forkoff;
-	to->di_aformat = from->di_aformat;
+	to->di_aformat = xfs_ifork_format(ip->i_afp);
 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
 	to->di_flags = cpu_to_be16(from->di_flags);
@@ -405,7 +431,7 @@ xfs_dinode_verify_forkoff(
 	struct xfs_dinode	*dip,
 	struct xfs_mount	*mp)
 {
-	if (!XFS_DFORK_Q(dip))
+	if (!dip->di_forkoff)
 		return NULL;
 
 	switch (dip->di_format)  {
@@ -508,7 +534,7 @@ xfs_dinode_verify(
 		return __this_address;
 	}
 
-	if (XFS_DFORK_Q(dip)) {
+	if (dip->di_forkoff) {
 		fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
 		if (fa)
 			return fa;
@@ -585,122 +611,6 @@ xfs_dinode_calc_crc(
 }
 
 /*
- * Read the disk inode attributes into the in-core inode structure.
- *
- * For version 5 superblocks, if we are initialising a new inode and we are not
- * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
- * inode core with a random generation number. If we are keeping inodes around,
- * we need to read the inode cluster to get the existing generation number off
- * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
- * format) then log recovery is dependent on the di_flushiter field being
- * initialised from the current on-disk value and hence we must also read the
- * inode off disk.
- */
-int
-xfs_iread(
-	xfs_mount_t	*mp,
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	uint		iget_flags)
-{
-	xfs_buf_t	*bp;
-	xfs_dinode_t	*dip;
-	xfs_failaddr_t	fa;
-	int		error;
-
-	/*
-	 * Fill in the location information in the in-core inode.
-	 */
-	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
-	if (error)
-		return error;
-
-	/* shortcut IO on inode allocation if possible */
-	if ((iget_flags & XFS_IGET_CREATE) &&
-	    xfs_sb_version_has_v3inode(&mp->m_sb) &&
-	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
-		VFS_I(ip)->i_generation = prandom_u32();
-		return 0;
-	}
-
-	/*
-	 * Get pointers to the on-disk inode and the buffer containing it.
-	 */
-	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
-	if (error)
-		return error;
-
-	/* even unallocated inodes are verified */
-	fa = xfs_dinode_verify(mp, ip->i_ino, dip);
-	if (fa) {
-		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
-				sizeof(*dip), fa);
-		error = -EFSCORRUPTED;
-		goto out_brelse;
-	}
-
-	/*
-	 * If the on-disk inode is already linked to a directory
-	 * entry, copy all of the inode into the in-core inode.
-	 * xfs_iformat_fork() handles copying in the inode format
-	 * specific information.
-	 * Otherwise, just get the truly permanent information.
-	 */
-	if (dip->di_mode) {
-		xfs_inode_from_disk(ip, dip);
-		error = xfs_iformat_fork(ip, dip);
-		if (error)  {
-#ifdef DEBUG
-			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
-				__func__, error);
-#endif /* DEBUG */
-			goto out_brelse;
-		}
-	} else {
-		/*
-		 * Partial initialisation of the in-core inode. Just the bits
-		 * that xfs_ialloc won't overwrite or relies on being correct.
-		 */
-		VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
-		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
-
-		/*
-		 * Make sure to pull in the mode here as well in
-		 * case the inode is released without being used.
-		 * This ensures that xfs_inactive() will see that
-		 * the inode is already free and not try to mess
-		 * with the uninitialized part of it.
-		 */
-		VFS_I(ip)->i_mode = 0;
-	}
-
-	ip->i_delayed_blks = 0;
-
-	/*
-	 * Mark the buffer containing the inode as something to keep
-	 * around for a while.  This helps to keep recently accessed
-	 * meta-data in-core longer.
-	 */
-	xfs_buf_set_ref(bp, XFS_INO_REF);
-
-	/*
-	 * Use xfs_trans_brelse() to release the buffer containing the on-disk
-	 * inode, because it was acquired with xfs_trans_read_buf() in
-	 * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
-	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
-	 * will only release the buffer if it is not dirty within the
-	 * transaction.  It will be OK to release the buffer in this case,
-	 * because inodes on disk are never destroyed and we will be locking the
-	 * new in-core inode before putting it in the cache where other
-	 * processes can find it.  Thus we don't have to worry about the inode
-	 * being changed just because we released the buffer.
-	 */
- out_brelse:
-	xfs_trans_brelse(tp, bp);
-	return error;
-}
-
-/*
  * Validate di_extsize hint.
  *
  * The rules are documented at xfs_ioctl_setattr_check_extsize().
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 9b373dcf9e34..865ac493c72a 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -16,16 +16,12 @@ struct xfs_dinode;
  * format specific structures at the appropriate time.
  */
 struct xfs_icdinode {
-	int8_t		di_format;	/* format of di_c data */
 	uint16_t	di_flushiter;	/* incremented on flush */
 	uint32_t	di_projid;	/* owner's project id */
 	xfs_fsize_t	di_size;	/* number of bytes in file */
 	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
 	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
-	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
-	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
 	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
-	int8_t		di_aformat;	/* format of attr fork's data */
 	uint32_t	di_dmevmask;	/* DMIG event mask */
 	uint16_t	di_dmstate;	/* DMIG state info */
 	uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
@@ -48,13 +44,11 @@ struct xfs_imap {
 
 int	xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
 		       struct xfs_imap *, struct xfs_dinode **,
-		       struct xfs_buf **, uint, uint);
-int	xfs_iread(struct xfs_mount *, struct xfs_trans *,
-		  struct xfs_inode *, uint);
+		       struct xfs_buf **, uint);
 void	xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
 void	xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to,
 			  xfs_lsn_t lsn);
-void	xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
+int	xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
 void	xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
 			       struct xfs_dinode *to);
 
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 518c6f0ec3a6..28b366275ae0 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -26,110 +26,6 @@
 
 kmem_zone_t *xfs_ifork_zone;
 
-STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
-STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
-STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
-
-/*
- * Copy inode type and data and attr format specific information from the
- * on-disk inode to the in-core inode and fork structures.  For fifos, devices,
- * and sockets this means set i_rdev to the proper value.  For files,
- * directories, and symlinks this means to bring in the in-line data or extent
- * pointers as well as the attribute fork.  For a fork in B-tree format, only
- * the root is immediately brought in-core.  The rest will be read in later when
- * first referenced (see xfs_iread_extents()).
- */
-int
-xfs_iformat_fork(
-	struct xfs_inode	*ip,
-	struct xfs_dinode	*dip)
-{
-	struct inode		*inode = VFS_I(ip);
-	struct xfs_attr_shortform *atp;
-	int			size;
-	int			error = 0;
-	xfs_fsize_t             di_size;
-
-	switch (inode->i_mode & S_IFMT) {
-	case S_IFIFO:
-	case S_IFCHR:
-	case S_IFBLK:
-	case S_IFSOCK:
-		ip->i_d.di_size = 0;
-		inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip));
-		break;
-
-	case S_IFREG:
-	case S_IFLNK:
-	case S_IFDIR:
-		switch (dip->di_format) {
-		case XFS_DINODE_FMT_LOCAL:
-			di_size = be64_to_cpu(dip->di_size);
-			size = (int)di_size;
-			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
-			break;
-		case XFS_DINODE_FMT_EXTENTS:
-			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
-			break;
-		case XFS_DINODE_FMT_BTREE:
-			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
-			break;
-		default:
-			xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
-					dip, sizeof(*dip), __this_address);
-			return -EFSCORRUPTED;
-		}
-		break;
-
-	default:
-		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
-				sizeof(*dip), __this_address);
-		return -EFSCORRUPTED;
-	}
-	if (error)
-		return error;
-
-	if (xfs_is_reflink_inode(ip)) {
-		ASSERT(ip->i_cowfp == NULL);
-		xfs_ifork_init_cow(ip);
-	}
-
-	if (!XFS_DFORK_Q(dip))
-		return 0;
-
-	ASSERT(ip->i_afp == NULL);
-	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_NOFS);
-
-	switch (dip->di_aformat) {
-	case XFS_DINODE_FMT_LOCAL:
-		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
-		size = be16_to_cpu(atp->hdr.totsize);
-
-		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
-		break;
-	case XFS_DINODE_FMT_EXTENTS:
-		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
-		break;
-	case XFS_DINODE_FMT_BTREE:
-		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
-		break;
-	default:
-		xfs_inode_verifier_error(ip, error, __func__, dip,
-				sizeof(*dip), __this_address);
-		error = -EFSCORRUPTED;
-		break;
-	}
-	if (error) {
-		kmem_cache_free(xfs_ifork_zone, ip->i_afp);
-		ip->i_afp = NULL;
-		if (ip->i_cowfp)
-			kmem_cache_free(xfs_ifork_zone, ip->i_cowfp);
-		ip->i_cowfp = NULL;
-		xfs_idestroy_fork(ip, XFS_DATA_FORK);
-	}
-	return error;
-}
-
 void
 xfs_init_local_fork(
 	struct xfs_inode	*ip,
@@ -292,12 +188,11 @@ xfs_iformat_btree(
 	 * or the number of extents is greater than the number of
 	 * blocks.
 	 */
-	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
-					XFS_IFORK_MAXEXT(ip, whichfork) ||
+	if (unlikely(ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork) ||
 		     nrecs == 0 ||
 		     XFS_BMDR_SPACE_CALC(nrecs) >
 					XFS_DFORK_SIZE(dip, mp, whichfork) ||
-		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
+		     ifp->if_nextents > ip->i_d.di_nblocks) ||
 		     level == 0 || level > XFS_BTREE_MAXLEVELS) {
 		xfs_warn(mp, "corrupt inode %Lu (btree).",
 					(unsigned long long) ip->i_ino);
@@ -325,6 +220,110 @@ xfs_iformat_btree(
 	return 0;
 }
 
+int
+xfs_iformat_data_fork(
+	struct xfs_inode	*ip,
+	struct xfs_dinode	*dip)
+{
+	struct inode		*inode = VFS_I(ip);
+	int			error;
+
+	/*
+	 * Initialize the extent count early, as the per-format routines may
+	 * depend on it.
+	 */
+	ip->i_df.if_format = dip->di_format;
+	ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents);
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFIFO:
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFSOCK:
+		ip->i_d.di_size = 0;
+		inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip));
+		return 0;
+	case S_IFREG:
+	case S_IFLNK:
+	case S_IFDIR:
+		switch (ip->i_df.if_format) {
+		case XFS_DINODE_FMT_LOCAL:
+			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK,
+					be64_to_cpu(dip->di_size));
+			if (!error)
+				error = xfs_ifork_verify_local_data(ip);
+			return error;
+		case XFS_DINODE_FMT_EXTENTS:
+			return xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
+		case XFS_DINODE_FMT_BTREE:
+			return xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
+		default:
+			xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
+					dip, sizeof(*dip), __this_address);
+			return -EFSCORRUPTED;
+		}
+		break;
+	default:
+		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
+				sizeof(*dip), __this_address);
+		return -EFSCORRUPTED;
+	}
+}
+
+static uint16_t
+xfs_dfork_attr_shortform_size(
+	struct xfs_dinode		*dip)
+{
+	struct xfs_attr_shortform	*atp =
+		(struct xfs_attr_shortform *)XFS_DFORK_APTR(dip);
+
+	return be16_to_cpu(atp->hdr.totsize);
+}
+
+int
+xfs_iformat_attr_fork(
+	struct xfs_inode	*ip,
+	struct xfs_dinode	*dip)
+{
+	int			error = 0;
+
+	/*
+	 * Initialize the extent count early, as the per-format routines may
+	 * depend on it.
+	 */
+	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_NOFS);
+	ip->i_afp->if_format = dip->di_aformat;
+	if (unlikely(ip->i_afp->if_format == 0)) /* pre IRIX 6.2 file system */
+		ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
+	ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents);
+
+	switch (ip->i_afp->if_format) {
+	case XFS_DINODE_FMT_LOCAL:
+		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK,
+				xfs_dfork_attr_shortform_size(dip));
+		if (!error)
+			error = xfs_ifork_verify_local_attr(ip);
+		break;
+	case XFS_DINODE_FMT_EXTENTS:
+		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
+		break;
+	default:
+		xfs_inode_verifier_error(ip, error, __func__, dip,
+				sizeof(*dip), __this_address);
+		error = -EFSCORRUPTED;
+		break;
+	}
+
+	if (error) {
+		kmem_cache_free(xfs_ifork_zone, ip->i_afp);
+		ip->i_afp = NULL;
+	}
+	return error;
+}
+
 /*
  * Reallocate the space for if_broot based on the number of records
  * being added or deleted as indicated in rec_diff.  Move the records
@@ -504,38 +503,24 @@ xfs_idata_realloc(
 
 void
 xfs_idestroy_fork(
-	xfs_inode_t	*ip,
-	int		whichfork)
+	struct xfs_ifork	*ifp)
 {
-	struct xfs_ifork	*ifp;
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (ifp->if_broot != NULL) {
 		kmem_free(ifp->if_broot);
 		ifp->if_broot = NULL;
 	}
 
 	/*
-	 * If the format is local, then we can't have an extents
-	 * array so just look for an inline data array.  If we're
-	 * not local then we may or may not have an extents list,
-	 * so check and free it up if we do.
+	 * If the format is local, then we can't have an extents array so just
+	 * look for an inline data array.  If we're not local then we may or may
+	 * not have an extents list, so check and free it up if we do.
 	 */
-	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
-		if (ifp->if_u1.if_data != NULL) {
-			kmem_free(ifp->if_u1.if_data);
-			ifp->if_u1.if_data = NULL;
-		}
-	} else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) {
-		xfs_iext_destroy(ifp);
-	}
-
-	if (whichfork == XFS_ATTR_FORK) {
-		kmem_cache_free(xfs_ifork_zone, ip->i_afp);
-		ip->i_afp = NULL;
-	} else if (whichfork == XFS_COW_FORK) {
-		kmem_cache_free(xfs_ifork_zone, ip->i_cowfp);
-		ip->i_cowfp = NULL;
+	if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
+		kmem_free(ifp->if_u1.if_data);
+		ifp->if_u1.if_data = NULL;
+	} else if (ifp->if_flags & XFS_IFEXTENTS) {
+		if (ifp->if_height)
+			xfs_iext_destroy(ifp);
 	}
 }
 
@@ -592,7 +577,7 @@ void
 xfs_iflush_fork(
 	xfs_inode_t		*ip,
 	xfs_dinode_t		*dip,
-	xfs_inode_log_item_t	*iip,
+	struct xfs_inode_log_item *iip,
 	int			whichfork)
 {
 	char			*cp;
@@ -618,7 +603,7 @@ xfs_iflush_fork(
 	}
 	cp = XFS_DFORK_PTR(dip, whichfork);
 	mp = ip->i_mount;
-	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	switch (ifp->if_format) {
 	case XFS_DINODE_FMT_LOCAL:
 		if ((iip->ili_fields & dataflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
@@ -633,7 +618,7 @@ xfs_iflush_fork(
 		       !(iip->ili_fields & extflag[whichfork]));
 		if ((iip->ili_fields & extflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
-			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
+			ASSERT(ifp->if_nextents > 0);
 			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
 				whichfork);
 		}
@@ -691,48 +676,55 @@ xfs_ifork_init_cow(
 	ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone,
 				       KM_NOFS);
 	ip->i_cowfp->if_flags = XFS_IFEXTENTS;
-	ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
-	ip->i_cnextents = 0;
+	ip->i_cowfp->if_format = XFS_DINODE_FMT_EXTENTS;
 }
 
-/* Default fork content verifiers. */
-struct xfs_ifork_ops xfs_default_ifork_ops = {
-	.verify_attr	= xfs_attr_shortform_verify,
-	.verify_dir	= xfs_dir2_sf_verify,
-	.verify_symlink	= xfs_symlink_shortform_verify,
-};
-
 /* Verify the inline contents of the data fork of an inode. */
-xfs_failaddr_t
-xfs_ifork_verify_data(
-	struct xfs_inode	*ip,
-	struct xfs_ifork_ops	*ops)
+int
+xfs_ifork_verify_local_data(
+	struct xfs_inode	*ip)
 {
-	/* Non-local data fork, we're done. */
-	if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
-		return NULL;
+	xfs_failaddr_t		fa = NULL;
 
-	/* Check the inline data fork if there is one. */
 	switch (VFS_I(ip)->i_mode & S_IFMT) {
 	case S_IFDIR:
-		return ops->verify_dir(ip);
+		fa = xfs_dir2_sf_verify(ip);
+		break;
 	case S_IFLNK:
-		return ops->verify_symlink(ip);
+		fa = xfs_symlink_shortform_verify(ip);
+		break;
 	default:
-		return NULL;
+		break;
 	}
+
+	if (fa) {
+		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
+				ip->i_df.if_u1.if_data, ip->i_df.if_bytes, fa);
+		return -EFSCORRUPTED;
+	}
+
+	return 0;
 }
 
 /* Verify the inline contents of the attr fork of an inode. */
-xfs_failaddr_t
-xfs_ifork_verify_attr(
-	struct xfs_inode	*ip,
-	struct xfs_ifork_ops	*ops)
+int
+xfs_ifork_verify_local_attr(
+	struct xfs_inode	*ip)
 {
-	/* There has to be an attr fork allocated if aformat is local. */
-	if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
-		return NULL;
-	if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK))
-		return __this_address;
-	return ops->verify_attr(ip);
+	struct xfs_ifork	*ifp = ip->i_afp;
+	xfs_failaddr_t		fa;
+
+	if (!ifp)
+		fa = __this_address;
+	else
+		fa = xfs_attr_shortform_verify(ip);
+
+	if (fa) {
+		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
+				ifp ? ifp->if_u1.if_data : NULL,
+				ifp ? ifp->if_bytes : 0, fa);
+		return -EFSCORRUPTED;
+	}
+
+	return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 668ee942be22..a4953e95c4f3 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -23,6 +23,8 @@ struct xfs_ifork {
 	} if_u1;
 	short			if_broot_bytes;	/* bytes allocated for root */
 	unsigned char		if_flags;	/* per-fork flags */
+	int8_t			if_format;	/* format of this fork */
+	xfs_extnum_t		if_nextents;	/* # of extents in this fork */
 };
 
 /*
@@ -55,43 +57,36 @@ struct xfs_ifork {
 		((w) == XFS_ATTR_FORK ? \
 			XFS_IFORK_ASIZE(ip) : \
 			0))
-#define XFS_IFORK_FORMAT(ip,w) \
-	((w) == XFS_DATA_FORK ? \
-		(ip)->i_d.di_format : \
-		((w) == XFS_ATTR_FORK ? \
-			(ip)->i_d.di_aformat : \
-			(ip)->i_cformat))
-#define XFS_IFORK_FMT_SET(ip,w,n) \
-	((w) == XFS_DATA_FORK ? \
-		((ip)->i_d.di_format = (n)) : \
-		((w) == XFS_ATTR_FORK ? \
-			((ip)->i_d.di_aformat = (n)) : \
-			((ip)->i_cformat = (n))))
-#define XFS_IFORK_NEXTENTS(ip,w) \
-	((w) == XFS_DATA_FORK ? \
-		(ip)->i_d.di_nextents : \
-		((w) == XFS_ATTR_FORK ? \
-			(ip)->i_d.di_anextents : \
-			(ip)->i_cnextents))
-#define XFS_IFORK_NEXT_SET(ip,w,n) \
-	((w) == XFS_DATA_FORK ? \
-		((ip)->i_d.di_nextents = (n)) : \
-		((w) == XFS_ATTR_FORK ? \
-			((ip)->i_d.di_anextents = (n)) : \
-			((ip)->i_cnextents = (n))))
 #define XFS_IFORK_MAXEXT(ip, w) \
 	(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
 
-#define xfs_ifork_has_extents(ip, w) \
-	(XFS_IFORK_FORMAT((ip), (w)) == XFS_DINODE_FMT_EXTENTS || \
-	 XFS_IFORK_FORMAT((ip), (w)) == XFS_DINODE_FMT_BTREE)
+static inline bool xfs_ifork_has_extents(struct xfs_ifork *ifp)
+{
+	return ifp->if_format == XFS_DINODE_FMT_EXTENTS ||
+		ifp->if_format == XFS_DINODE_FMT_BTREE;
+}
+
+static inline xfs_extnum_t xfs_ifork_nextents(struct xfs_ifork *ifp)
+{
+	if (!ifp)
+		return 0;
+	return ifp->if_nextents;
+}
+
+static inline int8_t xfs_ifork_format(struct xfs_ifork *ifp)
+{
+	if (!ifp)
+		return XFS_DINODE_FMT_EXTENTS;
+	return ifp->if_format;
+}
 
 struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
 
-int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
+int		xfs_iformat_data_fork(struct xfs_inode *, struct xfs_dinode *);
+int		xfs_iformat_attr_fork(struct xfs_inode *, struct xfs_dinode *);
 void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
 				struct xfs_inode_log_item *, int);
-void		xfs_idestroy_fork(struct xfs_inode *, int);
+void		xfs_idestroy_fork(struct xfs_ifork *ifp);
 void		xfs_idata_realloc(struct xfs_inode *ip, int64_t byte_diff,
 				int whichfork);
 void		xfs_iroot_realloc(struct xfs_inode *, int, int);
@@ -175,18 +170,7 @@ extern struct kmem_zone	*xfs_ifork_zone;
 
 extern void xfs_ifork_init_cow(struct xfs_inode *ip);
 
-typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *);
-
-struct xfs_ifork_ops {
-	xfs_ifork_verifier_t	verify_symlink;
-	xfs_ifork_verifier_t	verify_dir;
-	xfs_ifork_verifier_t	verify_attr;
-};
-extern struct xfs_ifork_ops	xfs_default_ifork_ops;
-
-xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip,
-		struct xfs_ifork_ops *ops);
-xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip,
-		struct xfs_ifork_ops *ops);
+int xfs_ifork_verify_local_data(struct xfs_inode *ip);
+int xfs_ifork_verify_local_attr(struct xfs_inode *ip);
 
 #endif	/* __XFS_INODE_FORK_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 3bf671637a91..641132d0e39d 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -7,6 +7,73 @@
 #define __XFS_LOG_RECOVER_H__
 
 /*
+ * Each log item type (XFS_LI_*) gets its own xlog_recover_item_ops to
+ * define how recovery should work for that type of log item.
+ */
+struct xlog_recover_item;
+
+/* Sorting hat for log items as they're read in. */
+enum xlog_recover_reorder {
+	XLOG_REORDER_BUFFER_LIST,
+	XLOG_REORDER_ITEM_LIST,
+	XLOG_REORDER_INODE_BUFFER_LIST,
+	XLOG_REORDER_CANCEL_LIST,
+};
+
+struct xlog_recover_item_ops {
+	uint16_t	item_type;	/* XFS_LI_* type code. */
+
+	/*
+	 * Help sort recovered log items into the order required to replay them
+	 * correctly.  Log item types that always use XLOG_REORDER_ITEM_LIST do
+	 * not have to supply a function here.  See the comment preceding
+	 * xlog_recover_reorder_trans for more details about what the return
+	 * values mean.
+	 */
+	enum xlog_recover_reorder (*reorder)(struct xlog_recover_item *item);
+
+	/* Start readahead for pass2, if provided. */
+	void (*ra_pass2)(struct xlog *log, struct xlog_recover_item *item);
+
+	/* Do whatever work we need to do for pass1, if provided. */
+	int (*commit_pass1)(struct xlog *log, struct xlog_recover_item *item);
+
+	/*
+	 * This function should do whatever work is needed for pass2 of log
+	 * recovery, if provided.
+	 *
+	 * If the recovered item is an intent item, this function should parse
+	 * the recovered item to construct an in-core log intent item and
+	 * insert it into the AIL.  The in-core log intent item should have 1
+	 * refcount so that the item is freed either (a) when we commit the
+	 * recovered log item for the intent-done item; (b) replay the work and
+	 * log a new intent-done item; or (c) recovery fails and we have to
+	 * abort.
+	 *
+	 * If the recovered item is an intent-done item, this function should
+	 * parse the recovered item to find the id of the corresponding intent
+	 * log item.  Next, it should find the in-core log intent item in the
+	 * AIL and release it.
+	 */
+	int (*commit_pass2)(struct xlog *log, struct list_head *buffer_list,
+			    struct xlog_recover_item *item, xfs_lsn_t lsn);
+};
+
+extern const struct xlog_recover_item_ops xlog_icreate_item_ops;
+extern const struct xlog_recover_item_ops xlog_buf_item_ops;
+extern const struct xlog_recover_item_ops xlog_inode_item_ops;
+extern const struct xlog_recover_item_ops xlog_dquot_item_ops;
+extern const struct xlog_recover_item_ops xlog_quotaoff_item_ops;
+extern const struct xlog_recover_item_ops xlog_bui_item_ops;
+extern const struct xlog_recover_item_ops xlog_bud_item_ops;
+extern const struct xlog_recover_item_ops xlog_efi_item_ops;
+extern const struct xlog_recover_item_ops xlog_efd_item_ops;
+extern const struct xlog_recover_item_ops xlog_rui_item_ops;
+extern const struct xlog_recover_item_ops xlog_rud_item_ops;
+extern const struct xlog_recover_item_ops xlog_cui_item_ops;
+extern const struct xlog_recover_item_ops xlog_cud_item_ops;
+
+/*
  * Macros, structures, prototypes for internal log manager use.
  */
 
@@ -22,13 +89,13 @@
 /*
  * item headers are in ri_buf[0].  Additional buffers follow.
  */
-typedef struct xlog_recover_item {
+struct xlog_recover_item {
 	struct list_head	ri_list;
-	int			ri_type;
 	int			ri_cnt;	/* count of regions found */
 	int			ri_total;	/* total regions */
-	xfs_log_iovec_t		*ri_buf;	/* ptr to regions buffer */
-} xlog_recover_item_t;
+	struct xfs_log_iovec	*ri_buf;	/* ptr to regions buffer */
+	const struct xlog_recover_item_ops *ri_ops;
+};
 
 struct xlog_recover {
 	struct hlist_node	r_list;
@@ -51,4 +118,12 @@ struct xlog_recover {
 #define	XLOG_RECOVER_PASS1	1
 #define	XLOG_RECOVER_PASS2	2
 
+void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
+		const struct xfs_buf_ops *ops);
+bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
+void xlog_recover_iodone(struct xfs_buf *bp);
+
+void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
+		uint64_t intent_id);
+
 #endif	/* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index b2113b17e53c..56d9dd787e7b 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -100,7 +100,6 @@ typedef uint16_t	xfs_qwarncnt_t;
 #define XFS_QMOPT_FORCE_RES	0x0000010 /* ignore quota limits */
 #define XFS_QMOPT_SBVERSION	0x0000040 /* change superblock version num */
 #define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
-#define XFS_QMOPT_ENOSPC	0x0004000 /* enospc instead of edquot (prj) */
 
 /*
  * flags to xfs_trans_mod_dquot to indicate which field needs to be
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index f42c74cb8be5..9498ced947be 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -66,7 +66,7 @@ xfs_rtbuf_get(
 
 	ip = issum ? mp->m_rsumip : mp->m_rbmip;
 
-	error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK);
+	error = xfs_bmapi_read(ip, block, 1, &map, &nmap, 0);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index c526c5e5ab76..4df87546bd40 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -243,7 +243,7 @@ xfs_validate_sb_common(
 	} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
 				XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
 			xfs_notice(mp,
-"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.");
+"Superblock earlier than Version 5 has XFS_{P|G}QUOTA_{ENFD|CHKD} bits.");
 			return -EFSCORRUPTED;
 	}
 
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 3b8260ca7d1b..594bc447a7dd 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -204,16 +204,12 @@ xfs_failaddr_t
 xfs_symlink_shortform_verify(
 	struct xfs_inode	*ip)
 {
-	char			*sfp;
-	char			*endp;
-	struct xfs_ifork	*ifp;
-	int			size;
-
-	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
-	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-	sfp = (char *)ifp->if_u1.if_data;
-	size = ifp->if_bytes;
-	endp = sfp + size;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	char			*sfp = (char *)ifp->if_u1.if_data;
+	int			size = ifp->if_bytes;
+	char			*endp = sfp + size;
+
+	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 
 	/*
 	 * Zero length symlinks should never occur in memory as they are
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 2b8ccb5b975d..b5dfb6654842 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -27,7 +27,7 @@ xfs_trans_ijoin(
 	struct xfs_inode	*ip,
 	uint			lock_flags)
 {
-	xfs_inode_log_item_t	*iip;
+	struct xfs_inode_log_item *iip;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	if (ip->i_itemp == NULL)
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index add8598eacd5..7badd6dfe544 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -566,8 +566,9 @@ xchk_bmap_check_rmaps(
 	struct xfs_scrub	*sc,
 	int			whichfork)
 {
-	loff_t			size;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(sc->ip, whichfork);
 	xfs_agnumber_t		agno;
+	bool			zero_size;
 	int			error;
 
 	if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) ||
@@ -579,6 +580,8 @@ xchk_bmap_check_rmaps(
 	if (XFS_IS_REALTIME_INODE(sc->ip) && whichfork == XFS_DATA_FORK)
 		return 0;
 
+	ASSERT(XFS_IFORK_PTR(sc->ip, whichfork) != NULL);
+
 	/*
 	 * Only do this for complex maps that are in btree format, or for
 	 * situations where we would seem to have a size but zero extents.
@@ -586,19 +589,14 @@ xchk_bmap_check_rmaps(
 	 * to flag this bmap as corrupt if there are rmaps that need to be
 	 * reattached.
 	 */
-	switch (whichfork) {
-	case XFS_DATA_FORK:
-		size = i_size_read(VFS_I(sc->ip));
-		break;
-	case XFS_ATTR_FORK:
-		size = XFS_IFORK_Q(sc->ip);
-		break;
-	default:
-		size = 0;
-		break;
-	}
-	if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE &&
-	    (size == 0 || XFS_IFORK_NEXTENTS(sc->ip, whichfork) > 0))
+
+	if (whichfork == XFS_DATA_FORK)
+		zero_size = i_size_read(VFS_I(sc->ip)) == 0;
+	else
+		zero_size = false;
+
+	if (ifp->if_format != XFS_DINODE_FMT_BTREE &&
+	    (zero_size || ifp->if_nextents > 0))
 		return 0;
 
 	for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) {
@@ -627,12 +625,14 @@ xchk_bmap(
 	struct xchk_bmap_info	info = { NULL };
 	struct xfs_mount	*mp = sc->mp;
 	struct xfs_inode	*ip = sc->ip;
-	struct xfs_ifork	*ifp;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_fileoff_t		endoff;
 	struct xfs_iext_cursor	icur;
 	int			error = 0;
 
-	ifp = XFS_IFORK_PTR(ip, whichfork);
+	/* Non-existent forks can be ignored. */
+	if (!ifp)
+		goto out;
 
 	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
 	info.whichfork = whichfork;
@@ -641,9 +641,6 @@ xchk_bmap(
 
 	switch (whichfork) {
 	case XFS_COW_FORK:
-		/* Non-existent CoW forks are ignorable. */
-		if (!ifp)
-			goto out;
 		/* No CoW forks on non-reflink inodes/filesystems. */
 		if (!xfs_is_reflink_inode(ip)) {
 			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
@@ -651,8 +648,6 @@ xchk_bmap(
 		}
 		break;
 	case XFS_ATTR_FORK:
-		if (!ifp)
-			goto out_check_rmap;
 		if (!xfs_sb_version_hasattr(&mp->m_sb) &&
 		    !xfs_sb_version_hasattr2(&mp->m_sb))
 			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
@@ -663,7 +658,7 @@ xchk_bmap(
 	}
 
 	/* Check the fork values */
-	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	switch (ifp->if_format) {
 	case XFS_DINODE_FMT_UUID:
 	case XFS_DINODE_FMT_DEV:
 	case XFS_DINODE_FMT_LOCAL:
@@ -717,7 +712,6 @@ xchk_bmap(
 			goto out;
 	}
 
-out_check_rmap:
 	error = xchk_bmap_check_rmaps(sc, whichfork);
 	if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
 		goto out;
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 9a2e27ac1300..44b15015021f 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -468,7 +468,7 @@ xchk_da_btree(
 	int				error;
 
 	/* Skip short format data structures; no btree to scan. */
-	if (!xfs_ifork_has_extents(sc->ip, whichfork))
+	if (!xfs_ifork_has_extents(XFS_IFORK_PTR(sc->ip, whichfork)))
 		return 0;
 
 	/* Set up initial da state. */
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index fe2a6e030c8a..7c432997edad 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -635,7 +635,7 @@ xchk_directory_blocks(
 {
 	struct xfs_bmbt_irec	got;
 	struct xfs_da_args	args;
-	struct xfs_ifork	*ifp;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
 	struct xfs_mount	*mp = sc->mp;
 	xfs_fileoff_t		leaf_lblk;
 	xfs_fileoff_t		free_lblk;
@@ -647,11 +647,10 @@ xchk_directory_blocks(
 	int			error;
 
 	/* Ignore local format directories. */
-	if (sc->ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-	    sc->ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
+	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS &&
+	    ifp->if_format != XFS_DINODE_FMT_BTREE)
 		return 0;
 
-	ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
 	lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET);
 	leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET);
 	free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 64c217eb06a7..6517d67e8d51 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -278,8 +278,7 @@ xchk_iallocbt_check_cluster(
 			&XFS_RMAP_OINFO_INODES);
 
 	/* Grab the inode cluster buffer. */
-	error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &cluster_bp,
-			0, 0);
+	error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &cluster_bp, 0);
 	if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0, &error))
 		return error;
 
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 5705adc43a75..855aa8bcab64 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -90,7 +90,7 @@ xchk_parent_count_parent_dentries(
 	 * if there is one.
 	 */
 	lock_mode = xfs_ilock_data_map_shared(parent);
-	if (parent->i_d.di_nextents > 0)
+	if (parent->i_df.if_nextents > 0)
 		error = xfs_dir3_data_readahead(parent, 0, 0);
 	xfs_iunlock(parent, lock_mode);
 	if (error)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 9d9cebf18726..b35611882ff9 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -382,7 +382,7 @@ xfs_map_blocks(
 	 */
 retry:
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
+	ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
 
 	/*
@@ -621,14 +621,11 @@ xfs_vm_readpage(
 	return iomap_readpage(page, &xfs_read_iomap_ops);
 }
 
-STATIC int
-xfs_vm_readpages(
-	struct file		*unused,
-	struct address_space	*mapping,
-	struct list_head	*pages,
-	unsigned		nr_pages)
+STATIC void
+xfs_vm_readahead(
+	struct readahead_control	*rac)
 {
-	return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops);
+	iomap_readahead(rac, &xfs_read_iomap_ops);
 }
 
 static int
@@ -644,7 +641,7 @@ xfs_iomap_swapfile_activate(
 
 const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
-	.readpages		= xfs_vm_readpages,
+	.readahead		= xfs_vm_readahead,
 	.writepage		= xfs_vm_writepage,
 	.writepages		= xfs_vm_writepages,
 	.set_page_dirty		= iomap_set_page_dirty,
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index c42f90e16b4f..bfad669e6b2f 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -367,7 +367,7 @@ xfs_attr_inactive(
 	 * removal below.
 	 */
 	if (xfs_inode_hasattr(dp) &&
-	    dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
+	    dp->i_afp->if_format != XFS_DINODE_FMT_LOCAL) {
 		error = xfs_attr3_root_inactive(&trans, dp);
 		if (error)
 			goto out_cancel;
@@ -388,8 +388,11 @@ out_cancel:
 	xfs_trans_cancel(trans);
 out_destroy_fork:
 	/* kill the in-core attr fork before we drop the inode lock */
-	if (dp->i_afp)
-		xfs_idestroy_fork(dp, XFS_ATTR_FORK);
+	if (dp->i_afp) {
+		xfs_idestroy_fork(dp->i_afp);
+		kmem_cache_free(xfs_ifork_zone, dp->i_afp);
+		dp->i_afp = NULL;
+	}
 	if (lock_mode)
 		xfs_iunlock(dp, lock_mode);
 	return error;
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 5ff1d929d3b5..e380bd1a9bfc 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -512,9 +512,9 @@ xfs_attr_list_ilocked(
 	 */
 	if (!xfs_inode_hasattr(dp))
 		return 0;
-	else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+	if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL)
 		return xfs_attr_shortform_list(context);
-	else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
+	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
 		return xfs_attr_leaf_list(context);
 	return xfs_attr_node_list(context);
 }
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index ee6f4229cebc..6736c5ab188f 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -22,16 +22,20 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_trans_space.h"
 #include "xfs_error.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_bui_zone;
 kmem_zone_t	*xfs_bud_zone;
 
+static const struct xfs_item_ops xfs_bui_item_ops;
+
 static inline struct xfs_bui_log_item *BUI_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_bui_log_item, bui_item);
 }
 
-void
+STATIC void
 xfs_bui_item_free(
 	struct xfs_bui_log_item	*buip)
 {
@@ -45,13 +49,13 @@ xfs_bui_item_free(
  * committed vs unpin operations in bulk insert operations. Hence the reference
  * count to ensure only the last caller frees the BUI.
  */
-void
+STATIC void
 xfs_bui_release(
 	struct xfs_bui_log_item	*buip)
 {
 	ASSERT(atomic_read(&buip->bui_refcount) > 0);
 	if (atomic_dec_and_test(&buip->bui_refcount)) {
-		xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR);
+		xfs_trans_ail_delete(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR);
 		xfs_bui_item_free(buip);
 	}
 }
@@ -124,17 +128,10 @@ xfs_bui_item_release(
 	xfs_bui_release(BUI_ITEM(lip));
 }
 
-static const struct xfs_item_ops xfs_bui_item_ops = {
-	.iop_size	= xfs_bui_item_size,
-	.iop_format	= xfs_bui_item_format,
-	.iop_unpin	= xfs_bui_item_unpin,
-	.iop_release	= xfs_bui_item_release,
-};
-
 /*
  * Allocate and initialize an bui item with the given number of extents.
  */
-struct xfs_bui_log_item *
+STATIC struct xfs_bui_log_item *
 xfs_bui_init(
 	struct xfs_mount		*mp)
 
@@ -278,27 +275,6 @@ xfs_bmap_update_diff_items(
 	return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
 }
 
-/* Get an BUI. */
-STATIC void *
-xfs_bmap_update_create_intent(
-	struct xfs_trans		*tp,
-	unsigned int			count)
-{
-	struct xfs_bui_log_item		*buip;
-
-	ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
-	ASSERT(tp != NULL);
-
-	buip = xfs_bui_init(tp->t_mountp);
-	ASSERT(buip != NULL);
-
-	/*
-	 * Get a log_item_desc to point at the new item.
-	 */
-	xfs_trans_add_item(tp, &buip->bui_item);
-	return buip;
-}
-
 /* Set the map extent flags for this mapping. */
 static void
 xfs_trans_set_bmap_flags(
@@ -326,16 +302,12 @@ xfs_trans_set_bmap_flags(
 STATIC void
 xfs_bmap_update_log_item(
 	struct xfs_trans		*tp,
-	void				*intent,
-	struct list_head		*item)
+	struct xfs_bui_log_item		*buip,
+	struct xfs_bmap_intent		*bmap)
 {
-	struct xfs_bui_log_item		*buip = intent;
-	struct xfs_bmap_intent		*bmap;
 	uint				next_extent;
 	struct xfs_map_extent		*map;
 
-	bmap = container_of(item, struct xfs_bmap_intent, bi_list);
-
 	tp->t_flags |= XFS_TRANS_DIRTY;
 	set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
 
@@ -355,23 +327,44 @@ xfs_bmap_update_log_item(
 			bmap->bi_bmap.br_state);
 }
 
+static struct xfs_log_item *
+xfs_bmap_update_create_intent(
+	struct xfs_trans		*tp,
+	struct list_head		*items,
+	unsigned int			count,
+	bool				sort)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_bui_log_item		*buip = xfs_bui_init(mp);
+	struct xfs_bmap_intent		*bmap;
+
+	ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
+
+	xfs_trans_add_item(tp, &buip->bui_item);
+	if (sort)
+		list_sort(mp, items, xfs_bmap_update_diff_items);
+	list_for_each_entry(bmap, items, bi_list)
+		xfs_bmap_update_log_item(tp, buip, bmap);
+	return &buip->bui_item;
+}
+
 /* Get an BUD so we can process all the deferred rmap updates. */
-STATIC void *
+static struct xfs_log_item *
 xfs_bmap_update_create_done(
 	struct xfs_trans		*tp,
-	void				*intent,
+	struct xfs_log_item		*intent,
 	unsigned int			count)
 {
-	return xfs_trans_get_bud(tp, intent);
+	return &xfs_trans_get_bud(tp, BUI_ITEM(intent))->bud_item;
 }
 
 /* Process a deferred rmap update. */
 STATIC int
 xfs_bmap_update_finish_item(
 	struct xfs_trans		*tp,
+	struct xfs_log_item		*done,
 	struct list_head		*item,
-	void				*done_item,
-	void				**state)
+	struct xfs_btree_cur		**state)
 {
 	struct xfs_bmap_intent		*bmap;
 	xfs_filblks_t			count;
@@ -379,7 +372,7 @@ xfs_bmap_update_finish_item(
 
 	bmap = container_of(item, struct xfs_bmap_intent, bi_list);
 	count = bmap->bi_bmap.br_blockcount;
-	error = xfs_trans_log_finish_bmap_update(tp, done_item,
+	error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done),
 			bmap->bi_type,
 			bmap->bi_owner, bmap->bi_whichfork,
 			bmap->bi_bmap.br_startoff,
@@ -398,9 +391,9 @@ xfs_bmap_update_finish_item(
 /* Abort all pending BUIs. */
 STATIC void
 xfs_bmap_update_abort_intent(
-	void				*intent)
+	struct xfs_log_item		*intent)
 {
-	xfs_bui_release(intent);
+	xfs_bui_release(BUI_ITEM(intent));
 }
 
 /* Cancel a deferred rmap update. */
@@ -416,10 +409,8 @@ xfs_bmap_update_cancel_item(
 
 const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
 	.max_items	= XFS_BUI_MAX_FAST_EXTENTS,
-	.diff_items	= xfs_bmap_update_diff_items,
 	.create_intent	= xfs_bmap_update_create_intent,
 	.abort_intent	= xfs_bmap_update_abort_intent,
-	.log_item	= xfs_bmap_update_log_item,
 	.create_done	= xfs_bmap_update_create_done,
 	.finish_item	= xfs_bmap_update_finish_item,
 	.cancel_item	= xfs_bmap_update_cancel_item,
@@ -429,32 +420,30 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
  * Process a bmap update intent item that was recovered from the log.
  * We need to update some inode's bmbt.
  */
-int
-xfs_bui_recover(
-	struct xfs_trans		*parent_tp,
-	struct xfs_bui_log_item		*buip)
+STATIC int
+xfs_bui_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*parent_tp)
 {
-	int				error = 0;
-	unsigned int			bui_type;
+	struct xfs_bmbt_irec		irec;
+	struct xfs_bui_log_item		*buip = BUI_ITEM(lip);
+	struct xfs_trans		*tp;
+	struct xfs_inode		*ip = NULL;
+	struct xfs_mount		*mp = parent_tp->t_mountp;
 	struct xfs_map_extent		*bmap;
+	struct xfs_bud_log_item		*budp;
 	xfs_fsblock_t			startblock_fsb;
 	xfs_fsblock_t			inode_fsb;
 	xfs_filblks_t			count;
-	bool				op_ok;
-	struct xfs_bud_log_item		*budp;
+	xfs_exntst_t			state;
 	enum xfs_bmap_intent_type	type;
+	bool				op_ok;
+	unsigned int			bui_type;
 	int				whichfork;
-	xfs_exntst_t			state;
-	struct xfs_trans		*tp;
-	struct xfs_inode		*ip = NULL;
-	struct xfs_bmbt_irec		irec;
-	struct xfs_mount		*mp = parent_tp->t_mountp;
-
-	ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
+	int				error = 0;
 
 	/* Only one mapping operation per BUI... */
 	if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
-		set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
 		xfs_bui_release(buip);
 		return -EFSCORRUPTED;
 	}
@@ -488,7 +477,6 @@ xfs_bui_recover(
 		 * This will pull the BUI from the AIL and
 		 * free the memory associated with it.
 		 */
-		set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
 		xfs_bui_release(buip);
 		return -EFSCORRUPTED;
 	}
@@ -546,7 +534,6 @@ xfs_bui_recover(
 		xfs_bmap_unmap_extent(tp, ip, &irec);
 	}
 
-	set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
 	xfs_defer_move(parent_tp, tp);
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -563,3 +550,121 @@ err_inode:
 	}
 	return error;
 }
+
+STATIC bool
+xfs_bui_item_match(
+	struct xfs_log_item	*lip,
+	uint64_t		intent_id)
+{
+	return BUI_ITEM(lip)->bui_format.bui_id == intent_id;
+}
+
+static const struct xfs_item_ops xfs_bui_item_ops = {
+	.iop_size	= xfs_bui_item_size,
+	.iop_format	= xfs_bui_item_format,
+	.iop_unpin	= xfs_bui_item_unpin,
+	.iop_release	= xfs_bui_item_release,
+	.iop_recover	= xfs_bui_item_recover,
+	.iop_match	= xfs_bui_item_match,
+};
+
+/*
+ * Copy an BUI format buffer from the given buf, and into the destination
+ * BUI format structure.  The BUI/BUD items were designed not to need any
+ * special alignment handling.
+ */
+static int
+xfs_bui_copy_format(
+	struct xfs_log_iovec		*buf,
+	struct xfs_bui_log_format	*dst_bui_fmt)
+{
+	struct xfs_bui_log_format	*src_bui_fmt;
+	uint				len;
+
+	src_bui_fmt = buf->i_addr;
+	len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
+
+	if (buf->i_len == len) {
+		memcpy(dst_bui_fmt, src_bui_fmt, len);
+		return 0;
+	}
+	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+	return -EFSCORRUPTED;
+}
+
+/*
+ * This routine is called to create an in-core extent bmap update
+ * item from the bui format structure which was logged on disk.
+ * It allocates an in-core bui, copies the extents from the format
+ * structure into it, and adds the bui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_bui_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	int				error;
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_bui_log_item		*buip;
+	struct xfs_bui_log_format	*bui_formatp;
+
+	bui_formatp = item->ri_buf[0].i_addr;
+
+	if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+		return -EFSCORRUPTED;
+	}
+	buip = xfs_bui_init(mp);
+	error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
+	if (error) {
+		xfs_bui_item_free(buip);
+		return error;
+	}
+	atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
+	/*
+	 * Insert the intent into the AIL directly and drop one reference so
+	 * that finishing or canceling the work will drop the other.
+	 */
+	xfs_trans_ail_insert(log->l_ailp, &buip->bui_item, lsn);
+	xfs_bui_release(buip);
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_bui_item_ops = {
+	.item_type		= XFS_LI_BUI,
+	.commit_pass2		= xlog_recover_bui_commit_pass2,
+};
+
+/*
+ * This routine is called when an BUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding BUI if it
+ * was still in the log. To do this it searches the AIL for the BUI with an id
+ * equal to that in the BUD format structure. If we find it we drop the BUD
+ * reference, which removes the BUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_bud_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_bud_log_format	*bud_formatp;
+
+	bud_formatp = item->ri_buf[0].i_addr;
+	if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+		return -EFSCORRUPTED;
+	}
+
+	xlog_recover_release_intent(log, XFS_LI_BUI, bud_formatp->bud_bui_id);
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_bud_item_ops = {
+	.item_type		= XFS_LI_BUD,
+	.commit_pass2		= xlog_recover_bud_commit_pass2,
+};
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index ad479cc73de8..b9be62f8bd52 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -33,11 +33,6 @@ struct kmem_zone;
 #define	XFS_BUI_MAX_FAST_EXTENTS	1
 
 /*
- * Define BUI flag bits. Manipulated by set/clear/test_bit operators.
- */
-#define	XFS_BUI_RECOVERED		1
-
-/*
  * This is the "bmap update intent" log item.  It is used to log the fact that
  * some reverse mappings need to change.  It is used in conjunction with the
  * "bmap update done" log item described below.
@@ -49,7 +44,6 @@ struct xfs_bui_log_item {
 	struct xfs_log_item		bui_item;
 	atomic_t			bui_refcount;
 	atomic_t			bui_next_extent;
-	unsigned long			bui_flags;	/* misc flags */
 	struct xfs_bui_log_format	bui_format;
 };
 
@@ -74,9 +68,4 @@ struct xfs_bud_log_item {
 extern struct kmem_zone	*xfs_bui_zone;
 extern struct kmem_zone	*xfs_bud_zone;
 
-struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *);
-void xfs_bui_item_free(struct xfs_bui_log_item *);
-void xfs_bui_release(struct xfs_bui_log_item *);
-int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
-
 #endif	/* __XFS_BMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 4f800f7fe888..f37f5cc4b19f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -223,7 +223,7 @@ xfs_bmap_count_blocks(
 	if (!ifp)
 		return 0;
 
-	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	switch (ifp->if_format) {
 	case XFS_DINODE_FMT_BTREE:
 		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
 			error = xfs_iread_extents(tp, ip, whichfork);
@@ -449,7 +449,7 @@ xfs_getbmap(
 		break;
 	}
 
-	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	switch (ifp->if_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 	case XFS_DINODE_FMT_BTREE:
 		break;
@@ -1210,17 +1210,26 @@ xfs_swap_extents_check_format(
 	struct xfs_inode	*ip,	/* target inode */
 	struct xfs_inode	*tip)	/* tmp inode */
 {
+	struct xfs_ifork	*ifp = &ip->i_df;
+	struct xfs_ifork	*tifp = &tip->i_df;
+
+	/* User/group/project quota ids must match if quotas are enforced. */
+	if (XFS_IS_QUOTA_ON(ip->i_mount) &&
+	    (!uid_eq(VFS_I(ip)->i_uid, VFS_I(tip)->i_uid) ||
+	     !gid_eq(VFS_I(ip)->i_gid, VFS_I(tip)->i_gid) ||
+	     ip->i_d.di_projid != tip->i_d.di_projid))
+		return -EINVAL;
 
 	/* Should never get a local format */
-	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
-	    tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+	if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
+	    tifp->if_format == XFS_DINODE_FMT_LOCAL)
 		return -EINVAL;
 
 	/*
 	 * if the target inode has less extents that then temporary inode then
 	 * why did userspace call us?
 	 */
-	if (ip->i_d.di_nextents < tip->i_d.di_nextents)
+	if (ifp->if_nextents < tifp->if_nextents)
 		return -EINVAL;
 
 	/*
@@ -1235,20 +1244,18 @@ xfs_swap_extents_check_format(
 	 * form then we will end up with the target inode in the wrong format
 	 * as we already know there are less extents in the temp inode.
 	 */
-	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
-	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
+	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
+	    tifp->if_format == XFS_DINODE_FMT_BTREE)
 		return -EINVAL;
 
 	/* Check temp in extent form to max in target */
-	if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
-	    XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
-			XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
+	if (tifp->if_format == XFS_DINODE_FMT_EXTENTS &&
+	    tifp->if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
 		return -EINVAL;
 
 	/* Check target in extent form to max in temp */
-	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
-	    XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
-			XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
+	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
+	    ifp->if_nextents > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
 		return -EINVAL;
 
 	/*
@@ -1260,22 +1267,20 @@ xfs_swap_extents_check_format(
 	 * (a common defrag case) which will occur when the temp inode is in
 	 * extent format...
 	 */
-	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+	if (tifp->if_format == XFS_DINODE_FMT_BTREE) {
 		if (XFS_IFORK_Q(ip) &&
-		    XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
+		    XFS_BMAP_BMDR_SPACE(tifp->if_broot) > XFS_IFORK_BOFF(ip))
 			return -EINVAL;
-		if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
-		    XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
+		if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
 			return -EINVAL;
 	}
 
 	/* Reciprocal target->temp btree format checks */
-	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
 		if (XFS_IFORK_Q(tip) &&
 		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
 			return -EINVAL;
-		if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
-		    XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
+		if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
 			return -EINVAL;
 	}
 
@@ -1427,15 +1432,15 @@ xfs_swap_extent_forks(
 	/*
 	 * Count the number of extended attribute blocks
 	 */
-	if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
-	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
+	if (XFS_IFORK_Q(ip) && ip->i_afp->if_nextents > 0 &&
+	    ip->i_afp->if_format != XFS_DINODE_FMT_LOCAL) {
 		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
 				&aforkblks);
 		if (error)
 			return error;
 	}
-	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
-	     (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
+	if (XFS_IFORK_Q(tip) && tip->i_afp->if_nextents > 0 &&
+	    tip->i_afp->if_format != XFS_DINODE_FMT_LOCAL) {
 		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
 				&taforkblks);
 		if (error)
@@ -1450,9 +1455,9 @@ xfs_swap_extent_forks(
 	 * bmbt scan as the last step.
 	 */
 	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
-		if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE)
+		if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE)
 			(*target_log_flags) |= XFS_ILOG_DOWNER;
-		if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
+		if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE)
 			(*src_log_flags) |= XFS_ILOG_DOWNER;
 	}
 
@@ -1468,9 +1473,6 @@ xfs_swap_extent_forks(
 	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
 	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
 
-	swap(ip->i_d.di_nextents, tip->i_d.di_nextents);
-	swap(ip->i_d.di_format, tip->i_d.di_format);
-
 	/*
 	 * The extents in the source inode could still contain speculative
 	 * preallocation beyond EOF (e.g. the file is open but not modified
@@ -1484,7 +1486,7 @@ xfs_swap_extent_forks(
 	tip->i_delayed_blks = ip->i_delayed_blks;
 	ip->i_delayed_blks = 0;
 
-	switch (ip->i_d.di_format) {
+	switch (ip->i_df.if_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		(*src_log_flags) |= XFS_ILOG_DEXT;
 		break;
@@ -1495,7 +1497,7 @@ xfs_swap_extent_forks(
 		break;
 	}
 
-	switch (tip->i_d.di_format) {
+	switch (tip->i_df.if_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		(*target_log_flags) |= XFS_ILOG_DEXT;
 		break;
@@ -1606,7 +1608,7 @@ xfs_swap_extents(
 	if (xfs_inode_has_cow_data(tip)) {
 		error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true);
 		if (error)
-			return error;
+			goto out_unlock;
 	}
 
 	/*
@@ -1615,9 +1617,9 @@ xfs_swap_extents(
 	 * performed with log redo items!
 	 */
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
-		int		w	= XFS_DATA_FORK;
-		uint32_t	ipnext	= XFS_IFORK_NEXTENTS(ip, w);
-		uint32_t	tipnext	= XFS_IFORK_NEXTENTS(tip, w);
+		int		w = XFS_DATA_FORK;
+		uint32_t	ipnext = ip->i_df.if_nextents;
+		uint32_t	tipnext	= tip->i_df.if_nextents;
 
 		/*
 		 * Conceptually this shouldn't affect the shape of either bmbt,
@@ -1717,10 +1719,11 @@ xfs_swap_extents(
 
 	/* Swap the cow forks. */
 	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
-		ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS);
-		ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS);
+		ASSERT(!ip->i_cowfp ||
+		       ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
+		ASSERT(!tip->i_cowfp ||
+		       tip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
 
-		swap(ip->i_cnextents, tip->i_cnextents);
 		swap(ip->i_cowfp, tip->i_cowfp);
 
 		if (ip->i_cowfp && ip->i_cowfp->if_bytes)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 9ec3eaf1c618..20b748f7e186 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -477,7 +477,7 @@ _xfs_buf_map_pages(
 		nofs_flag = memalloc_nofs_save();
 		do {
 			bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
-						-1, PAGE_KERNEL);
+						-1);
 			if (bp->b_addr)
 				break;
 			vm_unmap_aliases();
@@ -1197,8 +1197,10 @@ xfs_buf_ioend(
 		bp->b_ops->verify_read(bp);
 	}
 
-	if (!bp->b_error)
+	if (!bp->b_error) {
+		bp->b_flags &= ~XBF_WRITE_FAIL;
 		bp->b_flags |= XBF_DONE;
+	}
 
 	if (bp->b_iodone)
 		(*(bp->b_iodone))(bp);
@@ -1242,10 +1244,26 @@ xfs_buf_ioerror_alert(
 	struct xfs_buf		*bp,
 	xfs_failaddr_t		func)
 {
-	xfs_alert_ratelimited(bp->b_mount,
-"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
-			func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
-			-bp->b_error);
+	xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error",
+		"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
+				  func, (uint64_t)XFS_BUF_ADDR(bp),
+				  bp->b_length, -bp->b_error);
+}
+
+/*
+ * To simulate an I/O failure, the buffer must be locked and held with at least
+ * three references. The LRU reference is dropped by the stale call. The buf
+ * item reference is dropped via ioend processing. The third reference is owned
+ * by the caller and is dropped on I/O completion if the buffer is XBF_ASYNC.
+ */
+void
+xfs_buf_ioend_fail(
+	struct xfs_buf	*bp)
+{
+	bp->b_flags &= ~XBF_DONE;
+	xfs_buf_stale(bp);
+	xfs_buf_ioerror(bp, -EIO);
+	xfs_buf_ioend(bp);
 }
 
 int
@@ -1258,7 +1276,7 @@ xfs_bwrite(
 
 	bp->b_flags |= XBF_WRITE;
 	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
-			 XBF_WRITE_FAIL | XBF_DONE);
+			 XBF_DONE);
 
 	error = xfs_buf_submit(bp);
 	if (error)
@@ -1272,6 +1290,11 @@ xfs_buf_bio_end_io(
 {
 	struct xfs_buf		*bp = (struct xfs_buf *)bio->bi_private;
 
+	if (!bio->bi_status &&
+	    (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
+	    XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
+		bio->bi_status = BLK_STS_IOERR;
+
 	/*
 	 * don't overwrite existing errors - otherwise we can lose errors on
 	 * buffers that require multiple bios to complete.
@@ -1480,10 +1503,7 @@ __xfs_buf_submit(
 
 	/* on shutdown we stale and complete the buffer immediately */
 	if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
-		xfs_buf_ioerror(bp, -EIO);
-		bp->b_flags &= ~XBF_DONE;
-		xfs_buf_stale(bp);
-		xfs_buf_ioend(bp);
+		xfs_buf_ioend_fail(bp);
 		return -EIO;
 	}
 
@@ -1642,7 +1662,8 @@ xfs_wait_buftarg(
 	struct xfs_buftarg	*btp)
 {
 	LIST_HEAD(dispose);
-	int loop = 0;
+	int			loop = 0;
+	bool			write_fail = false;
 
 	/*
 	 * First wait on the buftarg I/O count for all in-flight buffers to be
@@ -1670,17 +1691,29 @@ xfs_wait_buftarg(
 			bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
 			list_del_init(&bp->b_lru);
 			if (bp->b_flags & XBF_WRITE_FAIL) {
-				xfs_alert(btp->bt_mount,
+				write_fail = true;
+				xfs_buf_alert_ratelimited(bp,
+					"XFS: Corruption Alert",
 "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
 					(long long)bp->b_bn);
-				xfs_alert(btp->bt_mount,
-"Please run xfs_repair to determine the extent of the problem.");
 			}
 			xfs_buf_rele(bp);
 		}
 		if (loop++ != 0)
 			delay(100);
 	}
+
+	/*
+	 * If one or more failed buffers were freed, that means dirty metadata
+	 * was thrown away. This should only ever happen after I/O completion
+	 * handling has elevated I/O error(s) to permanent failures and shuts
+	 * down the fs.
+	 */
+	if (write_fail) {
+		ASSERT(XFS_FORCED_SHUTDOWN(btp->bt_mount));
+		xfs_alert(btp->bt_mount,
+	      "Please run xfs_repair to determine the extent of the problem.");
+	}
 }
 
 static enum lru_status
@@ -1813,6 +1846,13 @@ xfs_alloc_buftarg(
 	btp->bt_bdev = bdev;
 	btp->bt_daxdev = dax_dev;
 
+	/*
+	 * Buffer IO error rate limiting. Limit it to no more than 10 messages
+	 * per 30 seconds so as to not spam logs too much on repeated errors.
+	 */
+	ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
+			     DEFAULT_RATELIMIT_BURST);
+
 	if (xfs_setsize_buftarg_early(btp, bdev))
 		goto error_free;
 
@@ -1983,7 +2023,7 @@ xfs_buf_delwri_submit_buffers(
 		 * synchronously. Otherwise, drop the buffer from the delwri
 		 * queue and submit async.
 		 */
-		bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL);
+		bp->b_flags &= ~_XBF_DELWRI_Q;
 		bp->b_flags |= XBF_WRITE;
 		if (wait_list) {
 			bp->b_flags &= ~XBF_ASYNC;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 9a04c53c2488..050c53b739e2 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -91,6 +91,7 @@ typedef struct xfs_buftarg {
 	struct list_lru		bt_lru;
 
 	struct percpu_counter	bt_io_count;
+	struct ratelimit_state	bt_ioerror_rl;
 } xfs_buftarg_t;
 
 struct xfs_buf;
@@ -263,6 +264,7 @@ extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
 		xfs_failaddr_t failaddr);
 #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
 extern void xfs_buf_ioerror_alert(struct xfs_buf *bp, xfs_failaddr_t fa);
+void xfs_buf_ioend_fail(struct xfs_buf *);
 
 extern int __xfs_buf_submit(struct xfs_buf *bp, bool);
 static inline int xfs_buf_submit(struct xfs_buf *bp)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 1545657c3ca0..9e75e8d6042e 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -410,7 +410,6 @@ xfs_buf_item_unpin(
 {
 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
 	xfs_buf_t		*bp = bip->bli_buf;
-	struct xfs_ail		*ailp = lip->li_ailp;
 	int			stale = bip->bli_flags & XFS_BLI_STALE;
 	int			freed;
 
@@ -452,10 +451,10 @@ xfs_buf_item_unpin(
 		}
 
 		/*
-		 * If we get called here because of an IO error, we may
-		 * or may not have the item on the AIL. xfs_trans_ail_delete()
-		 * will take care of that situation.
-		 * xfs_trans_ail_delete() drops the AIL lock.
+		 * If we get called here because of an IO error, we may or may
+		 * not have the item on the AIL. xfs_trans_ail_delete() will
+		 * take care of that situation. xfs_trans_ail_delete() drops
+		 * the AIL lock.
 		 */
 		if (bip->bli_flags & XFS_BLI_STALE_INODE) {
 			xfs_buf_do_callbacks(bp);
@@ -463,47 +462,23 @@ xfs_buf_item_unpin(
 			list_del_init(&bp->b_li_list);
 			bp->b_iodone = NULL;
 		} else {
-			spin_lock(&ailp->ail_lock);
-			xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
+			xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
 			xfs_buf_item_relse(bp);
 			ASSERT(bp->b_log_item == NULL);
 		}
 		xfs_buf_relse(bp);
 	} else if (freed && remove) {
 		/*
-		 * There are currently two references to the buffer - the active
-		 * LRU reference and the buf log item. What we are about to do
-		 * here - simulate a failed IO completion - requires 3
-		 * references.
-		 *
-		 * The LRU reference is removed by the xfs_buf_stale() call. The
-		 * buf item reference is removed by the xfs_buf_iodone()
-		 * callback that is run by xfs_buf_do_callbacks() during ioend
-		 * processing (via the bp->b_iodone callback), and then finally
-		 * the ioend processing will drop the IO reference if the buffer
-		 * is marked XBF_ASYNC.
-		 *
-		 * Hence we need to take an additional reference here so that IO
-		 * completion processing doesn't free the buffer prematurely.
+		 * The buffer must be locked and held by the caller to simulate
+		 * an async I/O failure.
 		 */
 		xfs_buf_lock(bp);
 		xfs_buf_hold(bp);
 		bp->b_flags |= XBF_ASYNC;
-		xfs_buf_ioerror(bp, -EIO);
-		bp->b_flags &= ~XBF_DONE;
-		xfs_buf_stale(bp);
-		xfs_buf_ioend(bp);
+		xfs_buf_ioend_fail(bp);
 	}
 }
 
-/*
- * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
- * seconds so as to not spam logs too much on repeated detection of the same
- * buffer being bad..
- */
-
-static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
-
 STATIC uint
 xfs_buf_item_push(
 	struct xfs_log_item	*lip,
@@ -533,11 +508,10 @@ xfs_buf_item_push(
 	trace_xfs_buf_item_push(bip);
 
 	/* has a previous flush failed due to IO errors? */
-	if ((bp->b_flags & XBF_WRITE_FAIL) &&
-	    ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
-		xfs_warn(bp->b_mount,
-"Failing async write on buffer block 0x%llx. Retrying async write.",
-			 (long long)bp->b_bn);
+	if (bp->b_flags & XBF_WRITE_FAIL) {
+		xfs_buf_alert_ratelimited(bp, "XFS: Failing async write",
+	    "Failing async write on buffer block 0x%llx. Retrying async write.",
+					  (long long)bp->b_bn);
 	}
 
 	if (!xfs_buf_delwri_queue(bp, buffer_list))
@@ -584,7 +558,7 @@ xfs_buf_item_put(
 	 * state.
 	 */
 	if (aborted)
-		xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
+		xfs_trans_ail_delete(lip, 0);
 	xfs_buf_item_relse(bip->bli_buf);
 	return true;
 }
@@ -1229,61 +1203,19 @@ xfs_buf_iodone(
 	struct xfs_buf		*bp,
 	struct xfs_log_item	*lip)
 {
-	struct xfs_ail		*ailp = lip->li_ailp;
-
 	ASSERT(BUF_ITEM(lip)->bli_buf == bp);
 
 	xfs_buf_rele(bp);
 
 	/*
-	 * If we are forcibly shutting down, this may well be
-	 * off the AIL already. That's because we simulate the
-	 * log-committed callbacks to unpin these buffers. Or we may never
-	 * have put this item on AIL because of the transaction was
-	 * aborted forcibly. xfs_trans_ail_delete() takes care of these.
+	 * If we are forcibly shutting down, this may well be off the AIL
+	 * already. That's because we simulate the log-committed callbacks to
+	 * unpin these buffers. Or we may never have put this item on AIL
+	 * because of the transaction was aborted forcibly.
+	 * xfs_trans_ail_delete() takes care of these.
 	 *
 	 * Either way, AIL is useless if we're forcing a shutdown.
 	 */
-	spin_lock(&ailp->ail_lock);
-	xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
+	xfs_trans_ail_delete(lip, SHUTDOWN_CORRUPT_INCORE);
 	xfs_buf_item_free(BUF_ITEM(lip));
 }
-
-/*
- * Requeue a failed buffer for writeback.
- *
- * We clear the log item failed state here as well, but we have to be careful
- * about reference counts because the only active reference counts on the buffer
- * may be the failed log items. Hence if we clear the log item failed state
- * before queuing the buffer for IO we can release all active references to
- * the buffer and free it, leading to use after free problems in
- * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which
- * order we process them in - the buffer is locked, and we own the buffer list
- * so nothing on them is going to change while we are performing this action.
- *
- * Hence we can safely queue the buffer for IO before we clear the failed log
- * item state, therefore  always having an active reference to the buffer and
- * avoiding the transient zero-reference state that leads to use-after-free.
- *
- * Return true if the buffer was added to the buffer list, false if it was
- * already on the buffer list.
- */
-bool
-xfs_buf_resubmit_failed_buffers(
-	struct xfs_buf		*bp,
-	struct list_head	*buffer_list)
-{
-	struct xfs_log_item	*lip;
-	bool			ret;
-
-	ret = xfs_buf_delwri_queue(bp, buffer_list);
-
-	/*
-	 * XFS_LI_FAILED set/clear is protected by ail_lock, caller of this
-	 * function already have it acquired
-	 */
-	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
-		xfs_clear_li_failed(lip);
-
-	return ret;
-}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 30114b510332..c9c57e2da932 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -59,8 +59,6 @@ void	xfs_buf_attach_iodone(struct xfs_buf *,
 			      struct xfs_log_item *);
 void	xfs_buf_iodone_callbacks(struct xfs_buf *);
 void	xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
-bool	xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
-					struct list_head *);
 bool	xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
 
 extern kmem_zone_t	*xfs_buf_item_zone;
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
new file mode 100644
index 000000000000..04faa7310c4f
--- /dev/null
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -0,0 +1,984 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_mount.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_trace.h"
+#include "xfs_log.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+#include "xfs_error.h"
+#include "xfs_inode.h"
+#include "xfs_dir2.h"
+#include "xfs_quota.h"
+
+/*
+ * This structure is used during recovery to record the buf log items which
+ * have been canceled and should not be replayed.
+ */
+struct xfs_buf_cancel {
+	xfs_daddr_t		bc_blkno;
+	uint			bc_len;
+	int			bc_refcount;
+	struct list_head	bc_list;
+};
+
+static struct xfs_buf_cancel *
+xlog_find_buffer_cancelled(
+	struct xlog		*log,
+	xfs_daddr_t		blkno,
+	uint			len)
+{
+	struct list_head	*bucket;
+	struct xfs_buf_cancel	*bcp;
+
+	if (!log->l_buf_cancel_table)
+		return NULL;
+
+	bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
+	list_for_each_entry(bcp, bucket, bc_list) {
+		if (bcp->bc_blkno == blkno && bcp->bc_len == len)
+			return bcp;
+	}
+
+	return NULL;
+}
+
+static bool
+xlog_add_buffer_cancelled(
+	struct xlog		*log,
+	xfs_daddr_t		blkno,
+	uint			len)
+{
+	struct xfs_buf_cancel	*bcp;
+
+	/*
+	 * If we find an existing cancel record, this indicates that the buffer
+	 * was cancelled multiple times.  To ensure that during pass 2 we keep
+	 * the record in the table until we reach its last occurrence in the
+	 * log, a reference count is kept to tell how many times we expect to
+	 * see this record during the second pass.
+	 */
+	bcp = xlog_find_buffer_cancelled(log, blkno, len);
+	if (bcp) {
+		bcp->bc_refcount++;
+		return false;
+	}
+
+	bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0);
+	bcp->bc_blkno = blkno;
+	bcp->bc_len = len;
+	bcp->bc_refcount = 1;
+	list_add_tail(&bcp->bc_list, XLOG_BUF_CANCEL_BUCKET(log, blkno));
+	return true;
+}
+
+/*
+ * Check if there is and entry for blkno, len in the buffer cancel record table.
+ */
+bool
+xlog_is_buffer_cancelled(
+	struct xlog		*log,
+	xfs_daddr_t		blkno,
+	uint			len)
+{
+	return xlog_find_buffer_cancelled(log, blkno, len) != NULL;
+}
+
+/*
+ * Check if there is and entry for blkno, len in the buffer cancel record table,
+ * and decremented the reference count on it if there is one.
+ *
+ * Remove the cancel record once the refcount hits zero, so that if the same
+ * buffer is re-used again after its last cancellation we actually replay the
+ * changes made at that point.
+ */
+static bool
+xlog_put_buffer_cancelled(
+	struct xlog		*log,
+	xfs_daddr_t		blkno,
+	uint			len)
+{
+	struct xfs_buf_cancel	*bcp;
+
+	bcp = xlog_find_buffer_cancelled(log, blkno, len);
+	if (!bcp) {
+		ASSERT(0);
+		return false;
+	}
+
+	if (--bcp->bc_refcount == 0) {
+		list_del(&bcp->bc_list);
+		kmem_free(bcp);
+	}
+	return true;
+}
+
+/* log buffer item recovery */
+
+/*
+ * Sort buffer items for log recovery.  Most buffer items should end up on the
+ * buffer list and are recovered first, with the following exceptions:
+ *
+ * 1. XFS_BLF_CANCEL buffers must be processed last because some log items
+ *    might depend on the incor ecancellation record, and replaying a cancelled
+ *    buffer item can remove the incore record.
+ *
+ * 2. XFS_BLF_INODE_BUF buffers are handled after most regular items so that
+ *    we replay di_next_unlinked only after flushing the inode 'free' state
+ *    to the inode buffer.
+ *
+ * See xlog_recover_reorder_trans for more details.
+ */
+STATIC enum xlog_recover_reorder
+xlog_recover_buf_reorder(
+	struct xlog_recover_item	*item)
+{
+	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
+
+	if (buf_f->blf_flags & XFS_BLF_CANCEL)
+		return XLOG_REORDER_CANCEL_LIST;
+	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
+		return XLOG_REORDER_INODE_BUFFER_LIST;
+	return XLOG_REORDER_BUFFER_LIST;
+}
+
+STATIC void
+xlog_recover_buf_ra_pass2(
+	struct xlog                     *log,
+	struct xlog_recover_item        *item)
+{
+	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
+
+	xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL);
+}
+
+/*
+ * Build up the table of buf cancel records so that we don't replay cancelled
+ * data in the second pass.
+ */
+static int
+xlog_recover_buf_commit_pass1(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	struct xfs_buf_log_format	*bf = item->ri_buf[0].i_addr;
+
+	if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) {
+		xfs_err(log->l_mp, "bad buffer log item size (%d)",
+				item->ri_buf[0].i_len);
+		return -EFSCORRUPTED;
+	}
+
+	if (!(bf->blf_flags & XFS_BLF_CANCEL))
+		trace_xfs_log_recover_buf_not_cancel(log, bf);
+	else if (xlog_add_buffer_cancelled(log, bf->blf_blkno, bf->blf_len))
+		trace_xfs_log_recover_buf_cancel_add(log, bf);
+	else
+		trace_xfs_log_recover_buf_cancel_ref_inc(log, bf);
+	return 0;
+}
+
+/*
+ * Validate the recovered buffer is of the correct type and attach the
+ * appropriate buffer operations to them for writeback. Magic numbers are in a
+ * few places:
+ *	the first 16 bits of the buffer (inode buffer, dquot buffer),
+ *	the first 32 bits of the buffer (most blocks),
+ *	inside a struct xfs_da_blkinfo at the start of the buffer.
+ */
+static void
+xlog_recover_validate_buf_type(
+	struct xfs_mount		*mp,
+	struct xfs_buf			*bp,
+	struct xfs_buf_log_format	*buf_f,
+	xfs_lsn_t			current_lsn)
+{
+	struct xfs_da_blkinfo		*info = bp->b_addr;
+	uint32_t			magic32;
+	uint16_t			magic16;
+	uint16_t			magicda;
+	char				*warnmsg = NULL;
+
+	/*
+	 * We can only do post recovery validation on items on CRC enabled
+	 * fielsystems as we need to know when the buffer was written to be able
+	 * to determine if we should have replayed the item. If we replay old
+	 * metadata over a newer buffer, then it will enter a temporarily
+	 * inconsistent state resulting in verification failures. Hence for now
+	 * just avoid the verification stage for non-crc filesystems
+	 */
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		return;
+
+	magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
+	magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
+	magicda = be16_to_cpu(info->magic);
+	switch (xfs_blft_from_flags(buf_f)) {
+	case XFS_BLFT_BTREE_BUF:
+		switch (magic32) {
+		case XFS_ABTB_CRC_MAGIC:
+		case XFS_ABTB_MAGIC:
+			bp->b_ops = &xfs_bnobt_buf_ops;
+			break;
+		case XFS_ABTC_CRC_MAGIC:
+		case XFS_ABTC_MAGIC:
+			bp->b_ops = &xfs_cntbt_buf_ops;
+			break;
+		case XFS_IBT_CRC_MAGIC:
+		case XFS_IBT_MAGIC:
+			bp->b_ops = &xfs_inobt_buf_ops;
+			break;
+		case XFS_FIBT_CRC_MAGIC:
+		case XFS_FIBT_MAGIC:
+			bp->b_ops = &xfs_finobt_buf_ops;
+			break;
+		case XFS_BMAP_CRC_MAGIC:
+		case XFS_BMAP_MAGIC:
+			bp->b_ops = &xfs_bmbt_buf_ops;
+			break;
+		case XFS_RMAP_CRC_MAGIC:
+			bp->b_ops = &xfs_rmapbt_buf_ops;
+			break;
+		case XFS_REFC_CRC_MAGIC:
+			bp->b_ops = &xfs_refcountbt_buf_ops;
+			break;
+		default:
+			warnmsg = "Bad btree block magic!";
+			break;
+		}
+		break;
+	case XFS_BLFT_AGF_BUF:
+		if (magic32 != XFS_AGF_MAGIC) {
+			warnmsg = "Bad AGF block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_agf_buf_ops;
+		break;
+	case XFS_BLFT_AGFL_BUF:
+		if (magic32 != XFS_AGFL_MAGIC) {
+			warnmsg = "Bad AGFL block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_agfl_buf_ops;
+		break;
+	case XFS_BLFT_AGI_BUF:
+		if (magic32 != XFS_AGI_MAGIC) {
+			warnmsg = "Bad AGI block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_agi_buf_ops;
+		break;
+	case XFS_BLFT_UDQUOT_BUF:
+	case XFS_BLFT_PDQUOT_BUF:
+	case XFS_BLFT_GDQUOT_BUF:
+#ifdef CONFIG_XFS_QUOTA
+		if (magic16 != XFS_DQUOT_MAGIC) {
+			warnmsg = "Bad DQUOT block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dquot_buf_ops;
+#else
+		xfs_alert(mp,
+	"Trying to recover dquots without QUOTA support built in!");
+		ASSERT(0);
+#endif
+		break;
+	case XFS_BLFT_DINO_BUF:
+		if (magic16 != XFS_DINODE_MAGIC) {
+			warnmsg = "Bad INODE block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_inode_buf_ops;
+		break;
+	case XFS_BLFT_SYMLINK_BUF:
+		if (magic32 != XFS_SYMLINK_MAGIC) {
+			warnmsg = "Bad symlink block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_symlink_buf_ops;
+		break;
+	case XFS_BLFT_DIR_BLOCK_BUF:
+		if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
+		    magic32 != XFS_DIR3_BLOCK_MAGIC) {
+			warnmsg = "Bad dir block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dir3_block_buf_ops;
+		break;
+	case XFS_BLFT_DIR_DATA_BUF:
+		if (magic32 != XFS_DIR2_DATA_MAGIC &&
+		    magic32 != XFS_DIR3_DATA_MAGIC) {
+			warnmsg = "Bad dir data magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dir3_data_buf_ops;
+		break;
+	case XFS_BLFT_DIR_FREE_BUF:
+		if (magic32 != XFS_DIR2_FREE_MAGIC &&
+		    magic32 != XFS_DIR3_FREE_MAGIC) {
+			warnmsg = "Bad dir3 free magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dir3_free_buf_ops;
+		break;
+	case XFS_BLFT_DIR_LEAF1_BUF:
+		if (magicda != XFS_DIR2_LEAF1_MAGIC &&
+		    magicda != XFS_DIR3_LEAF1_MAGIC) {
+			warnmsg = "Bad dir leaf1 magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+		break;
+	case XFS_BLFT_DIR_LEAFN_BUF:
+		if (magicda != XFS_DIR2_LEAFN_MAGIC &&
+		    magicda != XFS_DIR3_LEAFN_MAGIC) {
+			warnmsg = "Bad dir leafn magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dir3_leafn_buf_ops;
+		break;
+	case XFS_BLFT_DA_NODE_BUF:
+		if (magicda != XFS_DA_NODE_MAGIC &&
+		    magicda != XFS_DA3_NODE_MAGIC) {
+			warnmsg = "Bad da node magic!";
+			break;
+		}
+		bp->b_ops = &xfs_da3_node_buf_ops;
+		break;
+	case XFS_BLFT_ATTR_LEAF_BUF:
+		if (magicda != XFS_ATTR_LEAF_MAGIC &&
+		    magicda != XFS_ATTR3_LEAF_MAGIC) {
+			warnmsg = "Bad attr leaf magic!";
+			break;
+		}
+		bp->b_ops = &xfs_attr3_leaf_buf_ops;
+		break;
+	case XFS_BLFT_ATTR_RMT_BUF:
+		if (magic32 != XFS_ATTR3_RMT_MAGIC) {
+			warnmsg = "Bad attr remote magic!";
+			break;
+		}
+		bp->b_ops = &xfs_attr3_rmt_buf_ops;
+		break;
+	case XFS_BLFT_SB_BUF:
+		if (magic32 != XFS_SB_MAGIC) {
+			warnmsg = "Bad SB block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_sb_buf_ops;
+		break;
+#ifdef CONFIG_XFS_RT
+	case XFS_BLFT_RTBITMAP_BUF:
+	case XFS_BLFT_RTSUMMARY_BUF:
+		/* no magic numbers for verification of RT buffers */
+		bp->b_ops = &xfs_rtbuf_ops;
+		break;
+#endif /* CONFIG_XFS_RT */
+	default:
+		xfs_warn(mp, "Unknown buffer type %d!",
+			 xfs_blft_from_flags(buf_f));
+		break;
+	}
+
+	/*
+	 * Nothing else to do in the case of a NULL current LSN as this means
+	 * the buffer is more recent than the change in the log and will be
+	 * skipped.
+	 */
+	if (current_lsn == NULLCOMMITLSN)
+		return;
+
+	if (warnmsg) {
+		xfs_warn(mp, warnmsg);
+		ASSERT(0);
+	}
+
+	/*
+	 * We must update the metadata LSN of the buffer as it is written out to
+	 * ensure that older transactions never replay over this one and corrupt
+	 * the buffer. This can occur if log recovery is interrupted at some
+	 * point after the current transaction completes, at which point a
+	 * subsequent mount starts recovery from the beginning.
+	 *
+	 * Write verifiers update the metadata LSN from log items attached to
+	 * the buffer. Therefore, initialize a bli purely to carry the LSN to
+	 * the verifier. We'll clean it up in our ->iodone() callback.
+	 */
+	if (bp->b_ops) {
+		struct xfs_buf_log_item	*bip;
+
+		ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
+		bp->b_iodone = xlog_recover_iodone;
+		xfs_buf_item_init(bp, mp);
+		bip = bp->b_log_item;
+		bip->bli_item.li_lsn = current_lsn;
+	}
+}
+
+/*
+ * Perform a 'normal' buffer recovery.  Each logged region of the
+ * buffer should be copied over the corresponding region in the
+ * given buffer.  The bitmap in the buf log format structure indicates
+ * where to place the logged data.
+ */
+STATIC void
+xlog_recover_do_reg_buffer(
+	struct xfs_mount		*mp,
+	struct xlog_recover_item	*item,
+	struct xfs_buf			*bp,
+	struct xfs_buf_log_format	*buf_f,
+	xfs_lsn_t			current_lsn)
+{
+	int			i;
+	int			bit;
+	int			nbits;
+	xfs_failaddr_t		fa;
+	const size_t		size_disk_dquot = sizeof(struct xfs_disk_dquot);
+
+	trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
+
+	bit = 0;
+	i = 1;  /* 0 is the buf format structure */
+	while (1) {
+		bit = xfs_next_bit(buf_f->blf_data_map,
+				   buf_f->blf_map_size, bit);
+		if (bit == -1)
+			break;
+		nbits = xfs_contig_bits(buf_f->blf_data_map,
+					buf_f->blf_map_size, bit);
+		ASSERT(nbits > 0);
+		ASSERT(item->ri_buf[i].i_addr != NULL);
+		ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
+		ASSERT(BBTOB(bp->b_length) >=
+		       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
+
+		/*
+		 * The dirty regions logged in the buffer, even though
+		 * contiguous, may span multiple chunks. This is because the
+		 * dirty region may span a physical page boundary in a buffer
+		 * and hence be split into two separate vectors for writing into
+		 * the log. Hence we need to trim nbits back to the length of
+		 * the current region being copied out of the log.
+		 */
+		if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
+			nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
+
+		/*
+		 * Do a sanity check if this is a dquot buffer. Just checking
+		 * the first dquot in the buffer should do. XXXThis is
+		 * probably a good thing to do for other buf types also.
+		 */
+		fa = NULL;
+		if (buf_f->blf_flags &
+		   (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
+			if (item->ri_buf[i].i_addr == NULL) {
+				xfs_alert(mp,
+					"XFS: NULL dquot in %s.", __func__);
+				goto next;
+			}
+			if (item->ri_buf[i].i_len < size_disk_dquot) {
+				xfs_alert(mp,
+					"XFS: dquot too small (%d) in %s.",
+					item->ri_buf[i].i_len, __func__);
+				goto next;
+			}
+			fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
+					       -1, 0);
+			if (fa) {
+				xfs_alert(mp,
+	"dquot corrupt at %pS trying to replay into block 0x%llx",
+					fa, bp->b_bn);
+				goto next;
+			}
+		}
+
+		memcpy(xfs_buf_offset(bp,
+			(uint)bit << XFS_BLF_SHIFT),	/* dest */
+			item->ri_buf[i].i_addr,		/* source */
+			nbits<<XFS_BLF_SHIFT);		/* length */
+ next:
+		i++;
+		bit += nbits;
+	}
+
+	/* Shouldn't be any more regions */
+	ASSERT(i == item->ri_total);
+
+	xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
+}
+
+/*
+ * Perform a dquot buffer recovery.
+ * Simple algorithm: if we have found a QUOTAOFF log item of the same type
+ * (ie. USR or GRP), then just toss this buffer away; don't recover it.
+ * Else, treat it as a regular buffer and do recovery.
+ *
+ * Return false if the buffer was tossed and true if we recovered the buffer to
+ * indicate to the caller if the buffer needs writing.
+ */
+STATIC bool
+xlog_recover_do_dquot_buffer(
+	struct xfs_mount		*mp,
+	struct xlog			*log,
+	struct xlog_recover_item	*item,
+	struct xfs_buf			*bp,
+	struct xfs_buf_log_format	*buf_f)
+{
+	uint			type;
+
+	trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
+
+	/*
+	 * Filesystems are required to send in quota flags at mount time.
+	 */
+	if (!mp->m_qflags)
+		return false;
+
+	type = 0;
+	if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
+		type |= XFS_DQ_USER;
+	if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
+		type |= XFS_DQ_PROJ;
+	if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
+		type |= XFS_DQ_GROUP;
+	/*
+	 * This type of quotas was turned off, so ignore this buffer
+	 */
+	if (log->l_quotaoffs_flag & type)
+		return false;
+
+	xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
+	return true;
+}
+
+/*
+ * Perform recovery for a buffer full of inodes.  In these buffers, the only
+ * data which should be recovered is that which corresponds to the
+ * di_next_unlinked pointers in the on disk inode structures.  The rest of the
+ * data for the inodes is always logged through the inodes themselves rather
+ * than the inode buffer and is recovered in xlog_recover_inode_pass2().
+ *
+ * The only time when buffers full of inodes are fully recovered is when the
+ * buffer is full of newly allocated inodes.  In this case the buffer will
+ * not be marked as an inode buffer and so will be sent to
+ * xlog_recover_do_reg_buffer() below during recovery.
+ */
+STATIC int
+xlog_recover_do_inode_buffer(
+	struct xfs_mount		*mp,
+	struct xlog_recover_item	*item,
+	struct xfs_buf			*bp,
+	struct xfs_buf_log_format	*buf_f)
+{
+	int				i;
+	int				item_index = 0;
+	int				bit = 0;
+	int				nbits = 0;
+	int				reg_buf_offset = 0;
+	int				reg_buf_bytes = 0;
+	int				next_unlinked_offset;
+	int				inodes_per_buf;
+	xfs_agino_t			*logged_nextp;
+	xfs_agino_t			*buffer_nextp;
+
+	trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
+
+	/*
+	 * Post recovery validation only works properly on CRC enabled
+	 * filesystems.
+	 */
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		bp->b_ops = &xfs_inode_buf_ops;
+
+	inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
+	for (i = 0; i < inodes_per_buf; i++) {
+		next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
+			offsetof(xfs_dinode_t, di_next_unlinked);
+
+		while (next_unlinked_offset >=
+		       (reg_buf_offset + reg_buf_bytes)) {
+			/*
+			 * The next di_next_unlinked field is beyond
+			 * the current logged region.  Find the next
+			 * logged region that contains or is beyond
+			 * the current di_next_unlinked field.
+			 */
+			bit += nbits;
+			bit = xfs_next_bit(buf_f->blf_data_map,
+					   buf_f->blf_map_size, bit);
+
+			/*
+			 * If there are no more logged regions in the
+			 * buffer, then we're done.
+			 */
+			if (bit == -1)
+				return 0;
+
+			nbits = xfs_contig_bits(buf_f->blf_data_map,
+						buf_f->blf_map_size, bit);
+			ASSERT(nbits > 0);
+			reg_buf_offset = bit << XFS_BLF_SHIFT;
+			reg_buf_bytes = nbits << XFS_BLF_SHIFT;
+			item_index++;
+		}
+
+		/*
+		 * If the current logged region starts after the current
+		 * di_next_unlinked field, then move on to the next
+		 * di_next_unlinked field.
+		 */
+		if (next_unlinked_offset < reg_buf_offset)
+			continue;
+
+		ASSERT(item->ri_buf[item_index].i_addr != NULL);
+		ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
+		ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
+
+		/*
+		 * The current logged region contains a copy of the
+		 * current di_next_unlinked field.  Extract its value
+		 * and copy it to the buffer copy.
+		 */
+		logged_nextp = item->ri_buf[item_index].i_addr +
+				next_unlinked_offset - reg_buf_offset;
+		if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
+			xfs_alert(mp,
+		"Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
+		"Trying to replay bad (0) inode di_next_unlinked field.",
+				item, bp);
+			return -EFSCORRUPTED;
+		}
+
+		buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
+		*buffer_nextp = *logged_nextp;
+
+		/*
+		 * If necessary, recalculate the CRC in the on-disk inode. We
+		 * have to leave the inode in a consistent state for whoever
+		 * reads it next....
+		 */
+		xfs_dinode_calc_crc(mp,
+				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
+
+	}
+
+	return 0;
+}
+
+/*
+ * V5 filesystems know the age of the buffer on disk being recovered. We can
+ * have newer objects on disk than we are replaying, and so for these cases we
+ * don't want to replay the current change as that will make the buffer contents
+ * temporarily invalid on disk.
+ *
+ * The magic number might not match the buffer type we are going to recover
+ * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags.  Hence
+ * extract the LSN of the existing object in the buffer based on it's current
+ * magic number.  If we don't recognise the magic number in the buffer, then
+ * return a LSN of -1 so that the caller knows it was an unrecognised block and
+ * so can recover the buffer.
+ *
+ * Note: we cannot rely solely on magic number matches to determine that the
+ * buffer has a valid LSN - we also need to verify that it belongs to this
+ * filesystem, so we need to extract the object's LSN and compare it to that
+ * which we read from the superblock. If the UUIDs don't match, then we've got a
+ * stale metadata block from an old filesystem instance that we need to recover
+ * over the top of.
+ */
+static xfs_lsn_t
+xlog_recover_get_buf_lsn(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp)
+{
+	uint32_t		magic32;
+	uint16_t		magic16;
+	uint16_t		magicda;
+	void			*blk = bp->b_addr;
+	uuid_t			*uuid;
+	xfs_lsn_t		lsn = -1;
+
+	/* v4 filesystems always recover immediately */
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		goto recover_immediately;
+
+	magic32 = be32_to_cpu(*(__be32 *)blk);
+	switch (magic32) {
+	case XFS_ABTB_CRC_MAGIC:
+	case XFS_ABTC_CRC_MAGIC:
+	case XFS_ABTB_MAGIC:
+	case XFS_ABTC_MAGIC:
+	case XFS_RMAP_CRC_MAGIC:
+	case XFS_REFC_CRC_MAGIC:
+	case XFS_IBT_CRC_MAGIC:
+	case XFS_IBT_MAGIC: {
+		struct xfs_btree_block *btb = blk;
+
+		lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
+		uuid = &btb->bb_u.s.bb_uuid;
+		break;
+	}
+	case XFS_BMAP_CRC_MAGIC:
+	case XFS_BMAP_MAGIC: {
+		struct xfs_btree_block *btb = blk;
+
+		lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
+		uuid = &btb->bb_u.l.bb_uuid;
+		break;
+	}
+	case XFS_AGF_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
+		uuid = &((struct xfs_agf *)blk)->agf_uuid;
+		break;
+	case XFS_AGFL_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
+		uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
+		break;
+	case XFS_AGI_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
+		uuid = &((struct xfs_agi *)blk)->agi_uuid;
+		break;
+	case XFS_SYMLINK_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
+		uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
+		break;
+	case XFS_DIR3_BLOCK_MAGIC:
+	case XFS_DIR3_DATA_MAGIC:
+	case XFS_DIR3_FREE_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
+		uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
+		break;
+	case XFS_ATTR3_RMT_MAGIC:
+		/*
+		 * Remote attr blocks are written synchronously, rather than
+		 * being logged. That means they do not contain a valid LSN
+		 * (i.e. transactionally ordered) in them, and hence any time we
+		 * see a buffer to replay over the top of a remote attribute
+		 * block we should simply do so.
+		 */
+		goto recover_immediately;
+	case XFS_SB_MAGIC:
+		/*
+		 * superblock uuids are magic. We may or may not have a
+		 * sb_meta_uuid on disk, but it will be set in the in-core
+		 * superblock. We set the uuid pointer for verification
+		 * according to the superblock feature mask to ensure we check
+		 * the relevant UUID in the superblock.
+		 */
+		lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
+		if (xfs_sb_version_hasmetauuid(&mp->m_sb))
+			uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
+		else
+			uuid = &((struct xfs_dsb *)blk)->sb_uuid;
+		break;
+	default:
+		break;
+	}
+
+	if (lsn != (xfs_lsn_t)-1) {
+		if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
+			goto recover_immediately;
+		return lsn;
+	}
+
+	magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
+	switch (magicda) {
+	case XFS_DIR3_LEAF1_MAGIC:
+	case XFS_DIR3_LEAFN_MAGIC:
+	case XFS_DA3_NODE_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
+		uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
+		break;
+	default:
+		break;
+	}
+
+	if (lsn != (xfs_lsn_t)-1) {
+		if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
+			goto recover_immediately;
+		return lsn;
+	}
+
+	/*
+	 * We do individual object checks on dquot and inode buffers as they
+	 * have their own individual LSN records. Also, we could have a stale
+	 * buffer here, so we have to at least recognise these buffer types.
+	 *
+	 * A notd complexity here is inode unlinked list processing - it logs
+	 * the inode directly in the buffer, but we don't know which inodes have
+	 * been modified, and there is no global buffer LSN. Hence we need to
+	 * recover all inode buffer types immediately. This problem will be
+	 * fixed by logical logging of the unlinked list modifications.
+	 */
+	magic16 = be16_to_cpu(*(__be16 *)blk);
+	switch (magic16) {
+	case XFS_DQUOT_MAGIC:
+	case XFS_DINODE_MAGIC:
+		goto recover_immediately;
+	default:
+		break;
+	}
+
+	/* unknown buffer contents, recover immediately */
+
+recover_immediately:
+	return (xfs_lsn_t)-1;
+
+}
+
+/*
+ * This routine replays a modification made to a buffer at runtime.
+ * There are actually two types of buffer, regular and inode, which
+ * are handled differently.  Inode buffers are handled differently
+ * in that we only recover a specific set of data from them, namely
+ * the inode di_next_unlinked fields.  This is because all other inode
+ * data is actually logged via inode records and any data we replay
+ * here which overlaps that may be stale.
+ *
+ * When meta-data buffers are freed at run time we log a buffer item
+ * with the XFS_BLF_CANCEL bit set to indicate that previous copies
+ * of the buffer in the log should not be replayed at recovery time.
+ * This is so that if the blocks covered by the buffer are reused for
+ * file data before we crash we don't end up replaying old, freed
+ * meta-data into a user's file.
+ *
+ * To handle the cancellation of buffer log items, we make two passes
+ * over the log during recovery.  During the first we build a table of
+ * those buffers which have been cancelled, and during the second we
+ * only replay those buffers which do not have corresponding cancel
+ * records in the table.  See xlog_recover_buf_pass[1,2] above
+ * for more details on the implementation of the table of cancel records.
+ */
+STATIC int
+xlog_recover_buf_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			current_lsn)
+{
+	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_buf			*bp;
+	int				error;
+	uint				buf_flags;
+	xfs_lsn_t			lsn;
+
+	/*
+	 * In this pass we only want to recover all the buffers which have
+	 * not been cancelled and are not cancellation buffers themselves.
+	 */
+	if (buf_f->blf_flags & XFS_BLF_CANCEL) {
+		if (xlog_put_buffer_cancelled(log, buf_f->blf_blkno,
+				buf_f->blf_len))
+			goto cancelled;
+	} else {
+
+		if (xlog_is_buffer_cancelled(log, buf_f->blf_blkno,
+				buf_f->blf_len))
+			goto cancelled;
+	}
+
+	trace_xfs_log_recover_buf_recover(log, buf_f);
+
+	buf_flags = 0;
+	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
+		buf_flags |= XBF_UNMAPPED;
+
+	error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
+			  buf_flags, &bp, NULL);
+	if (error)
+		return error;
+
+	/*
+	 * Recover the buffer only if we get an LSN from it and it's less than
+	 * the lsn of the transaction we are replaying.
+	 *
+	 * Note that we have to be extremely careful of readahead here.
+	 * Readahead does not attach verfiers to the buffers so if we don't
+	 * actually do any replay after readahead because of the LSN we found
+	 * in the buffer if more recent than that current transaction then we
+	 * need to attach the verifier directly. Failure to do so can lead to
+	 * future recovery actions (e.g. EFI and unlinked list recovery) can
+	 * operate on the buffers and they won't get the verifier attached. This
+	 * can lead to blocks on disk having the correct content but a stale
+	 * CRC.
+	 *
+	 * It is safe to assume these clean buffers are currently up to date.
+	 * If the buffer is dirtied by a later transaction being replayed, then
+	 * the verifier will be reset to match whatever recover turns that
+	 * buffer into.
+	 */
+	lsn = xlog_recover_get_buf_lsn(mp, bp);
+	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+		trace_xfs_log_recover_buf_skip(log, buf_f);
+		xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
+		goto out_release;
+	}
+
+	if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
+		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
+		if (error)
+			goto out_release;
+	} else if (buf_f->blf_flags &
+		  (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
+		bool	dirty;
+
+		dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+		if (!dirty)
+			goto out_release;
+	} else {
+		xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
+	}
+
+	/*
+	 * Perform delayed write on the buffer.  Asynchronous writes will be
+	 * slower when taking into account all the buffers to be flushed.
+	 *
+	 * Also make sure that only inode buffers with good sizes stay in
+	 * the buffer cache.  The kernel moves inodes in buffers of 1 block
+	 * or inode_cluster_size bytes, whichever is bigger.  The inode
+	 * buffers in the log can be a different size if the log was generated
+	 * by an older kernel using unclustered inode buffers or a newer kernel
+	 * running with a different inode cluster size.  Regardless, if the
+	 * the inode buffer size isn't max(blocksize, inode_cluster_size)
+	 * for *our* value of inode_cluster_size, then we need to keep
+	 * the buffer out of the buffer cache so that the buffer won't
+	 * overlap with future reads of those inodes.
+	 */
+	if (XFS_DINODE_MAGIC ==
+	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
+	    (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
+		xfs_buf_stale(bp);
+		error = xfs_bwrite(bp);
+	} else {
+		ASSERT(bp->b_mount == mp);
+		bp->b_iodone = xlog_recover_iodone;
+		xfs_buf_delwri_queue(bp, buffer_list);
+	}
+
+out_release:
+	xfs_buf_relse(bp);
+	return error;
+cancelled:
+	trace_xfs_log_recover_buf_cancel(log, buf_f);
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_buf_item_ops = {
+	.item_type		= XFS_LI_BUF,
+	.reorder		= xlog_recover_buf_reorder,
+	.ra_pass2		= xlog_recover_buf_ra_pass2,
+	.commit_pass1		= xlog_recover_buf_commit_pass1,
+	.commit_pass2		= xlog_recover_buf_commit_pass2,
+};
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 871ec22c9aee..66deddd5e296 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -524,7 +524,7 @@ xfs_readdir(
 	args.geo = dp->i_mount->m_dir_geo;
 	args.trans = tp;
 
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+	if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_getdents(&args, ctx);
 	else if ((rval = xfs_dir2_isblock(&args, &v)))
 		;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index af2c8e5ceea0..d5b7f03e93c8 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -75,7 +75,7 @@ xfs_qm_adjust_dqlimits(
 	int			prealloc = 0;
 
 	ASSERT(d->d_id);
-	defq = xfs_get_defquota(dq, q);
+	defq = xfs_get_defquota(q, xfs_dquot_type(dq));
 
 	if (defq->bsoftlimit && !d->d_blk_softlimit) {
 		d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
@@ -114,9 +114,14 @@ xfs_qm_adjust_dqlimits(
 void
 xfs_qm_adjust_dqtimers(
 	struct xfs_mount	*mp,
-	struct xfs_disk_dquot	*d)
+	struct xfs_dquot	*dq)
 {
+	struct xfs_quotainfo	*qi = mp->m_quotainfo;
+	struct xfs_disk_dquot	*d = &dq->q_core;
+	struct xfs_def_quota	*defq;
+
 	ASSERT(d->d_id);
+	defq = xfs_get_defquota(qi, xfs_dquot_type(dq));
 
 #ifdef DEBUG
 	if (d->d_blk_hardlimit)
@@ -138,7 +143,7 @@ xfs_qm_adjust_dqtimers(
 		     (be64_to_cpu(d->d_bcount) >
 		      be64_to_cpu(d->d_blk_hardlimit)))) {
 			d->d_btimer = cpu_to_be32(ktime_get_real_seconds() +
-					mp->m_quotainfo->qi_btimelimit);
+					defq->btimelimit);
 		} else {
 			d->d_bwarns = 0;
 		}
@@ -161,7 +166,7 @@ xfs_qm_adjust_dqtimers(
 		     (be64_to_cpu(d->d_icount) >
 		      be64_to_cpu(d->d_ino_hardlimit)))) {
 			d->d_itimer = cpu_to_be32(ktime_get_real_seconds() +
-					mp->m_quotainfo->qi_itimelimit);
+					defq->itimelimit);
 		} else {
 			d->d_iwarns = 0;
 		}
@@ -184,7 +189,7 @@ xfs_qm_adjust_dqtimers(
 		     (be64_to_cpu(d->d_rtbcount) >
 		      be64_to_cpu(d->d_rtb_hardlimit)))) {
 			d->d_rtbtimer = cpu_to_be32(ktime_get_real_seconds() +
-					mp->m_quotainfo->qi_rtbtimelimit);
+					defq->rtbtimelimit);
 		} else {
 			d->d_rtbwarns = 0;
 		}
@@ -205,16 +210,18 @@ xfs_qm_adjust_dqtimers(
  */
 STATIC void
 xfs_qm_init_dquot_blk(
-	xfs_trans_t	*tp,
-	xfs_mount_t	*mp,
-	xfs_dqid_t	id,
-	uint		type,
-	xfs_buf_t	*bp)
+	struct xfs_trans	*tp,
+	struct xfs_mount	*mp,
+	xfs_dqid_t		id,
+	uint			type,
+	struct xfs_buf		*bp)
 {
 	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	xfs_dqblk_t	*d;
-	xfs_dqid_t	curid;
-	int		i;
+	struct xfs_dqblk	*d;
+	xfs_dqid_t		curid;
+	unsigned int		qflag;
+	unsigned int		blftype;
+	int			i;
 
 	ASSERT(tp);
 	ASSERT(xfs_buf_islocked(bp));
@@ -238,11 +245,39 @@ xfs_qm_init_dquot_blk(
 		}
 	}
 
-	xfs_trans_dquot_buf(tp, bp,
-			    (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
-			    ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
-			     XFS_BLF_GDQUOT_BUF)));
-	xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
+	if (type & XFS_DQ_USER) {
+		qflag = XFS_UQUOTA_CHKD;
+		blftype = XFS_BLF_UDQUOT_BUF;
+	} else if (type & XFS_DQ_PROJ) {
+		qflag = XFS_PQUOTA_CHKD;
+		blftype = XFS_BLF_PDQUOT_BUF;
+	} else {
+		qflag = XFS_GQUOTA_CHKD;
+		blftype = XFS_BLF_GDQUOT_BUF;
+	}
+
+	xfs_trans_dquot_buf(tp, bp, blftype);
+
+	/*
+	 * quotacheck uses delayed writes to update all the dquots on disk in an
+	 * efficient manner instead of logging the individual dquot changes as
+	 * they are made. However if we log the buffer allocated here and crash
+	 * after quotacheck while the logged initialisation is still in the
+	 * active region of the log, log recovery can replay the dquot buffer
+	 * initialisation over the top of the checked dquots and corrupt quota
+	 * accounting.
+	 *
+	 * To avoid this problem, quotacheck cannot log the initialised buffer.
+	 * We must still dirty the buffer and write it back before the
+	 * allocation transaction clears the log. Therefore, mark the buffer as
+	 * ordered instead of logging it directly. This is safe for quotacheck
+	 * because it detects and repairs allocated but initialized dquot blocks
+	 * in the quota inodes.
+	 */
+	if (!(mp->m_qflags & qflag))
+		xfs_trans_ordered_buf(tp, bp);
+	else
+		xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
 }
 
 /*
@@ -1021,6 +1056,7 @@ xfs_qm_dqflush_done(
 	struct xfs_dq_logitem	*qip = (struct xfs_dq_logitem *)lip;
 	struct xfs_dquot	*dqp = qip->qli_dquot;
 	struct xfs_ail		*ailp = lip->li_ailp;
+	xfs_lsn_t		tail_lsn;
 
 	/*
 	 * We only want to pull the item from the AIL if its
@@ -1034,10 +1070,11 @@ xfs_qm_dqflush_done(
 	    ((lip->li_lsn == qip->qli_flush_lsn) ||
 	     test_bit(XFS_LI_FAILED, &lip->li_flags))) {
 
-		/* xfs_trans_ail_delete() drops the AIL lock. */
 		spin_lock(&ailp->ail_lock);
 		if (lip->li_lsn == qip->qli_flush_lsn) {
-			xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
+			/* xfs_ail_update_finish() drops the AIL lock */
+			tail_lsn = xfs_ail_delete_one(ailp, lip);
+			xfs_ail_update_finish(ailp, tail_lsn);
 		} else {
 			/*
 			 * Clear the failed state since we are about to drop the
@@ -1068,6 +1105,7 @@ xfs_qm_dqflush(
 	struct xfs_buf		**bpp)
 {
 	struct xfs_mount	*mp = dqp->q_mount;
+	struct xfs_log_item	*lip = &dqp->q_logitem.qli_item;
 	struct xfs_buf		*bp;
 	struct xfs_dqblk	*dqb;
 	struct xfs_disk_dquot	*ddqp;
@@ -1084,31 +1122,15 @@ xfs_qm_dqflush(
 	xfs_qm_dqunpin_wait(dqp);
 
 	/*
-	 * This may have been unpinned because the filesystem is shutting
-	 * down forcibly. If that's the case we must not write this dquot
-	 * to disk, because the log record didn't make it to disk.
-	 *
-	 * We also have to remove the log item from the AIL in this case,
-	 * as we wait for an emptry AIL as part of the unmount process.
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp)) {
-		struct xfs_log_item	*lip = &dqp->q_logitem.qli_item;
-		dqp->dq_flags &= ~XFS_DQ_DIRTY;
-
-		xfs_trans_ail_remove(lip, SHUTDOWN_CORRUPT_INCORE);
-
-		error = -EIO;
-		goto out_unlock;
-	}
-
-	/*
 	 * Get the buffer containing the on-disk dquot
 	 */
 	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
 				   mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK,
 				   &bp, &xfs_dquot_buf_ops);
-	if (error)
+	if (error == -EAGAIN)
 		goto out_unlock;
+	if (error)
+		goto out_abort;
 
 	/*
 	 * Calculate the location of the dquot inside the buffer.
@@ -1116,17 +1138,15 @@ xfs_qm_dqflush(
 	dqb = bp->b_addr + dqp->q_bufoffset;
 	ddqp = &dqb->dd_diskdq;
 
-	/*
-	 * A simple sanity check in case we got a corrupted dquot.
-	 */
-	fa = xfs_dqblk_verify(mp, dqb, be32_to_cpu(ddqp->d_id), 0);
+	/* sanity check the in-core structure before we flush */
+	fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(dqp->q_core.d_id),
+			      0);
 	if (fa) {
 		xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
-				be32_to_cpu(ddqp->d_id), fa);
+				be32_to_cpu(dqp->q_core.d_id), fa);
 		xfs_buf_relse(bp);
-		xfs_dqfunlock(dqp);
-		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-		return -EFSCORRUPTED;
+		error = -EFSCORRUPTED;
+		goto out_abort;
 	}
 
 	/* This is the only portion of data that needs to persist */
@@ -1175,6 +1195,10 @@ xfs_qm_dqflush(
 	*bpp = bp;
 	return 0;
 
+out_abort:
+	dqp->dq_flags &= ~XFS_DQ_DIRTY;
+	xfs_trans_ail_delete(lip, 0);
+	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 out_unlock:
 	xfs_dqfunlock(dqp);
 	return error;
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index fe3e46df604b..71e36c85e20b 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -154,7 +154,7 @@ void		xfs_qm_dqdestroy(struct xfs_dquot *dqp);
 int		xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp);
 void		xfs_qm_dqunpin_wait(struct xfs_dquot *dqp);
 void		xfs_qm_adjust_dqtimers(struct xfs_mount *mp,
-						struct xfs_disk_dquot *d);
+						struct xfs_dquot *d);
 void		xfs_qm_adjust_dqlimits(struct xfs_mount *mp,
 						struct xfs_dquot *d);
 xfs_dqid_t	xfs_qm_id_for_quotatype(struct xfs_inode *ip, uint type);
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index baad1748d0d1..349c92d26570 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -145,21 +145,6 @@ xfs_qm_dquot_logitem_push(
 	if (atomic_read(&dqp->q_pincount) > 0)
 		return XFS_ITEM_PINNED;
 
-	/*
-	 * The buffer containing this item failed to be written back
-	 * previously. Resubmit the buffer for IO
-	 */
-	if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
-		if (!xfs_buf_trylock(bp))
-			return XFS_ITEM_LOCKED;
-
-		if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
-			rval = XFS_ITEM_FLUSHING;
-
-		xfs_buf_unlock(bp);
-		return rval;
-	}
-
 	if (!xfs_dqlock_nowait(dqp))
 		return XFS_ITEM_LOCKED;
 
@@ -358,7 +343,7 @@ xfs_qm_qoff_logitem_relse(
 	ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags) ||
 	       test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
 	       XFS_FORCED_SHUTDOWN(lip->li_mountp));
-	xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
+	xfs_trans_ail_delete(lip, 0);
 	kmem_free(lip->li_lv_shadow);
 	kmem_free(qoff);
 }
diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
new file mode 100644
index 000000000000..3400be4c88f0
--- /dev/null
+++ b/fs/xfs/xfs_dquot_item_recover.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+#include "xfs_log.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+
+STATIC void
+xlog_recover_dquot_ra_pass2(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	struct xfs_mount	*mp = log->l_mp;
+	struct xfs_disk_dquot	*recddq;
+	struct xfs_dq_logformat	*dq_f;
+	uint			type;
+
+	if (mp->m_qflags == 0)
+		return;
+
+	recddq = item->ri_buf[1].i_addr;
+	if (recddq == NULL)
+		return;
+	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot))
+		return;
+
+	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
+	ASSERT(type);
+	if (log->l_quotaoffs_flag & type)
+		return;
+
+	dq_f = item->ri_buf[0].i_addr;
+	ASSERT(dq_f);
+	ASSERT(dq_f->qlf_len == 1);
+
+	xlog_buf_readahead(log, dq_f->qlf_blkno,
+			XFS_FSB_TO_BB(mp, dq_f->qlf_len),
+			&xfs_dquot_buf_ra_ops);
+}
+
+/*
+ * Recover a dquot record
+ */
+STATIC int
+xlog_recover_dquot_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			current_lsn)
+{
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_buf			*bp;
+	struct xfs_disk_dquot		*ddq, *recddq;
+	struct xfs_dq_logformat		*dq_f;
+	xfs_failaddr_t			fa;
+	int				error;
+	uint				type;
+
+	/*
+	 * Filesystems are required to send in quota flags at mount time.
+	 */
+	if (mp->m_qflags == 0)
+		return 0;
+
+	recddq = item->ri_buf[1].i_addr;
+	if (recddq == NULL) {
+		xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
+		return -EFSCORRUPTED;
+	}
+	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) {
+		xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
+			item->ri_buf[1].i_len, __func__);
+		return -EFSCORRUPTED;
+	}
+
+	/*
+	 * This type of quotas was turned off, so ignore this record.
+	 */
+	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
+	ASSERT(type);
+	if (log->l_quotaoffs_flag & type)
+		return 0;
+
+	/*
+	 * At this point we know that quota was _not_ turned off.
+	 * Since the mount flags are not indicating to us otherwise, this
+	 * must mean that quota is on, and the dquot needs to be replayed.
+	 * Remember that we may not have fully recovered the superblock yet,
+	 * so we can't do the usual trick of looking at the SB quota bits.
+	 *
+	 * The other possibility, of course, is that the quota subsystem was
+	 * removed since the last mount - ENOSYS.
+	 */
+	dq_f = item->ri_buf[0].i_addr;
+	ASSERT(dq_f);
+	fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0);
+	if (fa) {
+		xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
+				dq_f->qlf_id, fa);
+		return -EFSCORRUPTED;
+	}
+	ASSERT(dq_f->qlf_len == 1);
+
+	/*
+	 * At this point we are assuming that the dquots have been allocated
+	 * and hence the buffer has valid dquots stamped in it. It should,
+	 * therefore, pass verifier validation. If the dquot is bad, then the
+	 * we'll return an error here, so we don't need to specifically check
+	 * the dquot in the buffer after the verifier has run.
+	 */
+	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
+				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
+				   &xfs_dquot_buf_ops);
+	if (error)
+		return error;
+
+	ASSERT(bp);
+	ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
+
+	/*
+	 * If the dquot has an LSN in it, recover the dquot only if it's less
+	 * than the lsn of the transaction we are replaying.
+	 */
+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
+		xfs_lsn_t	lsn = be64_to_cpu(dqb->dd_lsn);
+
+		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+			goto out_release;
+		}
+	}
+
+	memcpy(ddq, recddq, item->ri_buf[1].i_len);
+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
+				 XFS_DQUOT_CRC_OFF);
+	}
+
+	ASSERT(dq_f->qlf_size == 2);
+	ASSERT(bp->b_mount == mp);
+	bp->b_iodone = xlog_recover_iodone;
+	xfs_buf_delwri_queue(bp, buffer_list);
+
+out_release:
+	xfs_buf_relse(bp);
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_dquot_item_ops = {
+	.item_type		= XFS_LI_DQUOT,
+	.ra_pass2		= xlog_recover_dquot_ra_pass2,
+	.commit_pass2		= xlog_recover_dquot_commit_pass2,
+};
+
+/*
+ * Recover QUOTAOFF records. We simply make a note of it in the xlog
+ * structure, so that we know not to do any dquot item or dquot buffer recovery,
+ * of that type.
+ */
+STATIC int
+xlog_recover_quotaoff_commit_pass1(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	struct xfs_qoff_logformat	*qoff_f = item->ri_buf[0].i_addr;
+	ASSERT(qoff_f);
+
+	/*
+	 * The logitem format's flag tells us if this was user quotaoff,
+	 * group/project quotaoff or both.
+	 */
+	if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
+		log->l_quotaoffs_flag |= XFS_DQ_USER;
+	if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
+		log->l_quotaoffs_flag |= XFS_DQ_PROJ;
+	if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
+		log->l_quotaoffs_flag |= XFS_DQ_GROUP;
+
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
+	.item_type		= XFS_LI_QUOTAOFF,
+	.commit_pass1		= xlog_recover_quotaoff_commit_pass1,
+	/* nothing to commit in pass2 */
+};
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index a21e9cc6516a..7f6e20899473 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -53,6 +53,7 @@ static unsigned int xfs_errortag_random_default[] = {
 	XFS_RANDOM_FORCE_SCRUB_REPAIR,
 	XFS_RANDOM_FORCE_SUMMARY_RECALC,
 	XFS_RANDOM_IUNLINK_FALLBACK,
+	XFS_RANDOM_BUF_IOERROR,
 };
 
 struct xfs_errortag_attr {
@@ -162,6 +163,7 @@ XFS_ERRORTAG_ATTR_RW(buf_lru_ref,	XFS_ERRTAG_BUF_LRU_REF);
 XFS_ERRORTAG_ATTR_RW(force_repair,	XFS_ERRTAG_FORCE_SCRUB_REPAIR);
 XFS_ERRORTAG_ATTR_RW(bad_summary,	XFS_ERRTAG_FORCE_SUMMARY_RECALC);
 XFS_ERRORTAG_ATTR_RW(iunlink_fallback,	XFS_ERRTAG_IUNLINK_FALLBACK);
+XFS_ERRORTAG_ATTR_RW(buf_ioerror,	XFS_ERRTAG_BUF_IOERROR);
 
 static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -199,6 +201,7 @@ static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(force_repair),
 	XFS_ERRORTAG_ATTR_LIST(bad_summary),
 	XFS_ERRORTAG_ATTR_LIST(iunlink_fallback),
+	XFS_ERRORTAG_ATTR_LIST(buf_ioerror),
 	NULL,
 };
 
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6ea847f6e298..b9c333bae0a1 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -22,16 +22,20 @@
 #include "xfs_bmap.h"
 #include "xfs_trace.h"
 #include "xfs_error.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_efi_zone;
 kmem_zone_t	*xfs_efd_zone;
 
+static const struct xfs_item_ops xfs_efi_item_ops;
+
 static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_efi_log_item, efi_item);
 }
 
-void
+STATIC void
 xfs_efi_item_free(
 	struct xfs_efi_log_item	*efip)
 {
@@ -49,13 +53,13 @@ xfs_efi_item_free(
  * committed vs unpin operations in bulk insert operations. Hence the reference
  * count to ensure only the last caller frees the EFI.
  */
-void
+STATIC void
 xfs_efi_release(
 	struct xfs_efi_log_item	*efip)
 {
 	ASSERT(atomic_read(&efip->efi_refcount) > 0);
 	if (atomic_dec_and_test(&efip->efi_refcount)) {
-		xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR);
+		xfs_trans_ail_delete(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR);
 		xfs_efi_item_free(efip);
 	}
 }
@@ -139,18 +143,10 @@ xfs_efi_item_release(
 	xfs_efi_release(EFI_ITEM(lip));
 }
 
-static const struct xfs_item_ops xfs_efi_item_ops = {
-	.iop_size	= xfs_efi_item_size,
-	.iop_format	= xfs_efi_item_format,
-	.iop_unpin	= xfs_efi_item_unpin,
-	.iop_release	= xfs_efi_item_release,
-};
-
-
 /*
  * Allocate and initialize an efi item with the given number of extents.
  */
-struct xfs_efi_log_item *
+STATIC struct xfs_efi_log_item *
 xfs_efi_init(
 	struct xfs_mount	*mp,
 	uint			nextents)
@@ -161,7 +157,7 @@ xfs_efi_init(
 
 	ASSERT(nextents > 0);
 	if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
-		size = (uint)(sizeof(xfs_efi_log_item_t) +
+		size = (uint)(sizeof(struct xfs_efi_log_item) +
 			((nextents - 1) * sizeof(xfs_extent_t)));
 		efip = kmem_zalloc(size, 0);
 	} else {
@@ -184,7 +180,7 @@ xfs_efi_init(
  * one of which will be the native format for this kernel.
  * It will handle the conversion of formats if necessary.
  */
-int
+STATIC int
 xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
 {
 	xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
@@ -412,41 +408,16 @@ xfs_extent_free_diff_items(
 		XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
 }
 
-/* Get an EFI. */
-STATIC void *
-xfs_extent_free_create_intent(
-	struct xfs_trans		*tp,
-	unsigned int			count)
-{
-	struct xfs_efi_log_item		*efip;
-
-	ASSERT(tp != NULL);
-	ASSERT(count > 0);
-
-	efip = xfs_efi_init(tp->t_mountp, count);
-	ASSERT(efip != NULL);
-
-	/*
-	 * Get a log_item_desc to point at the new item.
-	 */
-	xfs_trans_add_item(tp, &efip->efi_item);
-	return efip;
-}
-
 /* Log a free extent to the intent item. */
 STATIC void
 xfs_extent_free_log_item(
 	struct xfs_trans		*tp,
-	void				*intent,
-	struct list_head		*item)
+	struct xfs_efi_log_item		*efip,
+	struct xfs_extent_free_item	*free)
 {
-	struct xfs_efi_log_item		*efip = intent;
-	struct xfs_extent_free_item	*free;
 	uint				next_extent;
 	struct xfs_extent		*extp;
 
-	free = container_of(item, struct xfs_extent_free_item, xefi_list);
-
 	tp->t_flags |= XFS_TRANS_DIRTY;
 	set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
 
@@ -462,29 +433,50 @@ xfs_extent_free_log_item(
 	extp->ext_len = free->xefi_blockcount;
 }
 
+static struct xfs_log_item *
+xfs_extent_free_create_intent(
+	struct xfs_trans		*tp,
+	struct list_head		*items,
+	unsigned int			count,
+	bool				sort)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_efi_log_item		*efip = xfs_efi_init(mp, count);
+	struct xfs_extent_free_item	*free;
+
+	ASSERT(count > 0);
+
+	xfs_trans_add_item(tp, &efip->efi_item);
+	if (sort)
+		list_sort(mp, items, xfs_extent_free_diff_items);
+	list_for_each_entry(free, items, xefi_list)
+		xfs_extent_free_log_item(tp, efip, free);
+	return &efip->efi_item;
+}
+
 /* Get an EFD so we can process all the free extents. */
-STATIC void *
+static struct xfs_log_item *
 xfs_extent_free_create_done(
 	struct xfs_trans		*tp,
-	void				*intent,
+	struct xfs_log_item		*intent,
 	unsigned int			count)
 {
-	return xfs_trans_get_efd(tp, intent, count);
+	return &xfs_trans_get_efd(tp, EFI_ITEM(intent), count)->efd_item;
 }
 
 /* Process a free extent. */
 STATIC int
 xfs_extent_free_finish_item(
 	struct xfs_trans		*tp,
+	struct xfs_log_item		*done,
 	struct list_head		*item,
-	void				*done_item,
-	void				**state)
+	struct xfs_btree_cur		**state)
 {
 	struct xfs_extent_free_item	*free;
 	int				error;
 
 	free = container_of(item, struct xfs_extent_free_item, xefi_list);
-	error = xfs_trans_free_extent(tp, done_item,
+	error = xfs_trans_free_extent(tp, EFD_ITEM(done),
 			free->xefi_startblock,
 			free->xefi_blockcount,
 			&free->xefi_oinfo, free->xefi_skip_discard);
@@ -495,9 +487,9 @@ xfs_extent_free_finish_item(
 /* Abort all pending EFIs. */
 STATIC void
 xfs_extent_free_abort_intent(
-	void				*intent)
+	struct xfs_log_item		*intent)
 {
-	xfs_efi_release(intent);
+	xfs_efi_release(EFI_ITEM(intent));
 }
 
 /* Cancel a free extent. */
@@ -513,10 +505,8 @@ xfs_extent_free_cancel_item(
 
 const struct xfs_defer_op_type xfs_extent_free_defer_type = {
 	.max_items	= XFS_EFI_MAX_FAST_EXTENTS,
-	.diff_items	= xfs_extent_free_diff_items,
 	.create_intent	= xfs_extent_free_create_intent,
 	.abort_intent	= xfs_extent_free_abort_intent,
-	.log_item	= xfs_extent_free_log_item,
 	.create_done	= xfs_extent_free_create_done,
 	.finish_item	= xfs_extent_free_finish_item,
 	.cancel_item	= xfs_extent_free_cancel_item,
@@ -529,12 +519,12 @@ const struct xfs_defer_op_type xfs_extent_free_defer_type = {
 STATIC int
 xfs_agfl_free_finish_item(
 	struct xfs_trans		*tp,
+	struct xfs_log_item		*done,
 	struct list_head		*item,
-	void				*done_item,
-	void				**state)
+	struct xfs_btree_cur		**state)
 {
 	struct xfs_mount		*mp = tp->t_mountp;
-	struct xfs_efd_log_item		*efdp = done_item;
+	struct xfs_efd_log_item		*efdp = EFD_ITEM(done);
 	struct xfs_extent_free_item	*free;
 	struct xfs_extent		*extp;
 	struct xfs_buf			*agbp;
@@ -579,10 +569,8 @@ xfs_agfl_free_finish_item(
 /* sub-type with special handling for AGFL deferred frees */
 const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
 	.max_items	= XFS_EFI_MAX_FAST_EXTENTS,
-	.diff_items	= xfs_extent_free_diff_items,
 	.create_intent	= xfs_extent_free_create_intent,
 	.abort_intent	= xfs_extent_free_abort_intent,
-	.log_item	= xfs_extent_free_log_item,
 	.create_done	= xfs_extent_free_create_done,
 	.finish_item	= xfs_agfl_free_finish_item,
 	.cancel_item	= xfs_extent_free_cancel_item,
@@ -592,19 +580,19 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
  * Process an extent free intent item that was recovered from
  * the log.  We need to free the extents that it describes.
  */
-int
-xfs_efi_recover(
-	struct xfs_mount	*mp,
-	struct xfs_efi_log_item	*efip)
+STATIC int
+xfs_efi_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*parent_tp)
 {
-	struct xfs_efd_log_item	*efdp;
-	struct xfs_trans	*tp;
-	int			i;
-	int			error = 0;
-	xfs_extent_t		*extp;
-	xfs_fsblock_t		startblock_fsb;
-
-	ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
+	struct xfs_efi_log_item		*efip = EFI_ITEM(lip);
+	struct xfs_mount		*mp = parent_tp->t_mountp;
+	struct xfs_efd_log_item		*efdp;
+	struct xfs_trans		*tp;
+	struct xfs_extent		*extp;
+	xfs_fsblock_t			startblock_fsb;
+	int				i;
+	int				error = 0;
 
 	/*
 	 * First check the validity of the extents described by the
@@ -623,7 +611,6 @@ xfs_efi_recover(
 			 * This will pull the EFI from the AIL and
 			 * free the memory associated with it.
 			 */
-			set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
 			xfs_efi_release(efip);
 			return -EFSCORRUPTED;
 		}
@@ -644,7 +631,6 @@ xfs_efi_recover(
 
 	}
 
-	set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
 	error = xfs_trans_commit(tp);
 	return error;
 
@@ -652,3 +638,93 @@ abort_error:
 	xfs_trans_cancel(tp);
 	return error;
 }
+
+STATIC bool
+xfs_efi_item_match(
+	struct xfs_log_item	*lip,
+	uint64_t		intent_id)
+{
+	return EFI_ITEM(lip)->efi_format.efi_id == intent_id;
+}
+
+static const struct xfs_item_ops xfs_efi_item_ops = {
+	.iop_size	= xfs_efi_item_size,
+	.iop_format	= xfs_efi_item_format,
+	.iop_unpin	= xfs_efi_item_unpin,
+	.iop_release	= xfs_efi_item_release,
+	.iop_recover	= xfs_efi_item_recover,
+	.iop_match	= xfs_efi_item_match,
+};
+
+/*
+ * This routine is called to create an in-core extent free intent
+ * item from the efi format structure which was logged on disk.
+ * It allocates an in-core efi, copies the extents from the format
+ * structure into it, and adds the efi to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_efi_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_efi_log_item		*efip;
+	struct xfs_efi_log_format	*efi_formatp;
+	int				error;
+
+	efi_formatp = item->ri_buf[0].i_addr;
+
+	efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
+	error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
+	if (error) {
+		xfs_efi_item_free(efip);
+		return error;
+	}
+	atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
+	/*
+	 * Insert the intent into the AIL directly and drop one reference so
+	 * that finishing or canceling the work will drop the other.
+	 */
+	xfs_trans_ail_insert(log->l_ailp, &efip->efi_item, lsn);
+	xfs_efi_release(efip);
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_efi_item_ops = {
+	.item_type		= XFS_LI_EFI,
+	.commit_pass2		= xlog_recover_efi_commit_pass2,
+};
+
+/*
+ * This routine is called when an EFD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding EFI if it
+ * was still in the log. To do this it searches the AIL for the EFI with an id
+ * equal to that in the EFD format structure. If we find it we drop the EFD
+ * reference, which removes the EFI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_efd_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_efd_log_format	*efd_formatp;
+
+	efd_formatp = item->ri_buf[0].i_addr;
+	ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
+		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
+	       (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
+		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
+
+	xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id);
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_efd_item_ops = {
+	.item_type		= XFS_LI_EFD,
+	.commit_pass2		= xlog_recover_efd_commit_pass2,
+};
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 16aaab06d4ec..cd2860c875bf 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -17,11 +17,6 @@ struct kmem_zone;
 #define	XFS_EFI_MAX_FAST_EXTENTS	16
 
 /*
- * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
- */
-#define	XFS_EFI_RECOVERED	1
-
-/*
  * This is the "extent free intention" log item.  It is used to log the fact
  * that some extents need to be free.  It is used in conjunction with the
  * "extent free done" log item described below.
@@ -50,25 +45,24 @@ struct kmem_zone;
  * of commit failure or log I/O errors. Note that the EFD is not inserted in the
  * AIL, so at this point both the EFI and EFD are freed.
  */
-typedef struct xfs_efi_log_item {
+struct xfs_efi_log_item {
 	struct xfs_log_item	efi_item;
 	atomic_t		efi_refcount;
 	atomic_t		efi_next_extent;
-	unsigned long		efi_flags;	/* misc flags */
 	xfs_efi_log_format_t	efi_format;
-} xfs_efi_log_item_t;
+};
 
 /*
  * This is the "extent free done" log item.  It is used to log
  * the fact that some extents earlier mentioned in an efi item
  * have been freed.
  */
-typedef struct xfs_efd_log_item {
+struct xfs_efd_log_item {
 	struct xfs_log_item	efd_item;
-	xfs_efi_log_item_t	*efd_efip;
+	struct xfs_efi_log_item *efd_efip;
 	uint			efd_next_extent;
 	xfs_efd_log_format_t	efd_format;
-} xfs_efd_log_item_t;
+};
 
 /*
  * Max number of extents in fast allocation path.
@@ -78,13 +72,4 @@ typedef struct xfs_efd_log_item {
 extern struct kmem_zone	*xfs_efi_zone;
 extern struct kmem_zone	*xfs_efd_zone;
 
-xfs_efi_log_item_t	*xfs_efi_init(struct xfs_mount *, uint);
-int			xfs_efi_copy_format(xfs_log_iovec_t *buf,
-					    xfs_efi_log_format_t *dst_efi_fmt);
-void			xfs_efi_item_free(xfs_efi_log_item_t *);
-void			xfs_efi_release(struct xfs_efi_log_item *);
-
-int			xfs_efi_recover(struct xfs_mount *mp,
-					struct xfs_efi_log_item *efip);
-
 #endif	/* __XFS_EXTFREE_ITEM_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 4b8bdecc3863..403c90309a8f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1102,7 +1102,7 @@ xfs_dir_open(
 	 * certain to have the next operation be a read there.
 	 */
 	mode = xfs_ilock_data_map_shared(ip);
-	if (ip->i_d.di_nextents > 0)
+	if (ip->i_df.if_nextents > 0)
 		error = xfs_dir3_data_readahead(ip, 0, 0);
 	xfs_iunlock(ip, mode);
 	return error;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 3e61d0cc23f8..ef1d5bb88b93 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -504,10 +504,7 @@ xfs_do_force_shutdown(
 	} else if (logerror) {
 		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
 			"Log I/O Error Detected. Shutting down filesystem");
-	} else if (flags & SHUTDOWN_DEVICE_REQ) {
-		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
-			"All device paths lost. Shutting down filesystem");
-	} else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
+	} else {
 		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
 			"I/O Error Detected. Shutting down filesystem");
 	}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 8bf1d15be3f6..5daef654956c 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -22,6 +22,7 @@
 #include "xfs_dquot_item.h"
 #include "xfs_dquot.h"
 #include "xfs_reflink.h"
+#include "xfs_ialloc.h"
 
 #include <linux/iversion.h>
 
@@ -62,8 +63,6 @@ xfs_inode_alloc(
 	memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
 	ip->i_afp = NULL;
 	ip->i_cowfp = NULL;
-	ip->i_cnextents = 0;
-	ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
 	memset(&ip->i_df, 0, sizeof(ip->i_df));
 	ip->i_flags = 0;
 	ip->i_delayed_blks = 0;
@@ -88,15 +87,18 @@ xfs_inode_free_callback(
 	case S_IFREG:
 	case S_IFDIR:
 	case S_IFLNK:
-		xfs_idestroy_fork(ip, XFS_DATA_FORK);
+		xfs_idestroy_fork(&ip->i_df);
 		break;
 	}
 
-	if (ip->i_afp)
-		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-	if (ip->i_cowfp)
-		xfs_idestroy_fork(ip, XFS_COW_FORK);
-
+	if (ip->i_afp) {
+		xfs_idestroy_fork(ip->i_afp);
+		kmem_cache_free(xfs_ifork_zone, ip->i_afp);
+	}
+	if (ip->i_cowfp) {
+		xfs_idestroy_fork(ip->i_cowfp);
+		kmem_cache_free(xfs_ifork_zone, ip->i_cowfp);
+	}
 	if (ip->i_itemp) {
 		ASSERT(!test_bit(XFS_LI_IN_AIL,
 				 &ip->i_itemp->ili_item.li_flags));
@@ -423,6 +425,7 @@ xfs_iget_cache_hit(
 		spin_unlock(&ip->i_flags_lock);
 		rcu_read_unlock();
 
+		ASSERT(!rwsem_is_locked(&inode->i_rwsem));
 		error = xfs_reinit_inode(mp, inode);
 		if (error) {
 			bool wake;
@@ -456,9 +459,6 @@ xfs_iget_cache_hit(
 		ip->i_sick = 0;
 		ip->i_checked = 0;
 
-		ASSERT(!rwsem_is_locked(&inode->i_rwsem));
-		init_rwsem(&inode->i_rwsem);
-
 		spin_unlock(&ip->i_flags_lock);
 		spin_unlock(&pag->pag_ici_lock);
 	} else {
@@ -479,7 +479,7 @@ xfs_iget_cache_hit(
 		xfs_ilock(ip, lock_flags);
 
 	if (!(flags & XFS_IGET_INCORE))
-		xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
+		xfs_iflags_clear(ip, XFS_ISTALE);
 	XFS_STATS_INC(mp, xs_ig_found);
 
 	return 0;
@@ -510,18 +510,42 @@ xfs_iget_cache_miss(
 	if (!ip)
 		return -ENOMEM;
 
-	error = xfs_iread(mp, tp, ip, flags);
+	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, flags);
 	if (error)
 		goto out_destroy;
 
-	if (!xfs_inode_verify_forks(ip)) {
-		error = -EFSCORRUPTED;
-		goto out_destroy;
+	/*
+	 * For version 5 superblocks, if we are initialising a new inode and we
+	 * are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can
+	 * simply build the new inode core with a random generation number.
+	 *
+	 * For version 4 (and older) superblocks, log recovery is dependent on
+	 * the di_flushiter field being initialised from the current on-disk
+	 * value and hence we must also read the inode off disk even when
+	 * initializing new inodes.
+	 */
+	if (xfs_sb_version_has_v3inode(&mp->m_sb) &&
+	    (flags & XFS_IGET_CREATE) && !(mp->m_flags & XFS_MOUNT_IKEEP)) {
+		VFS_I(ip)->i_generation = prandom_u32();
+	} else {
+		struct xfs_dinode	*dip;
+		struct xfs_buf		*bp;
+
+		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0);
+		if (error)
+			goto out_destroy;
+
+		error = xfs_inode_from_disk(ip, dip);
+		if (!error)
+			xfs_buf_set_ref(bp, XFS_INO_REF);
+		xfs_trans_brelse(tp, bp);
+
+		if (error)
+			goto out_destroy;
 	}
 
 	trace_xfs_iget_miss(ip);
 
-
 	/*
 	 * Check the inode free state is valid. This also detects lookup
 	 * racing with unlinks.
@@ -561,7 +585,7 @@ xfs_iget_cache_miss(
 	 */
 	iflags = XFS_INEW;
 	if (flags & XFS_IGET_DONTCACHE)
-		iflags |= XFS_IDONTCACHE;
+		d_mark_dontcache(VFS_I(ip));
 	ip->i_udquot = NULL;
 	ip->i_gdquot = NULL;
 	ip->i_pdquot = NULL;
@@ -737,13 +761,18 @@ xfs_icache_inode_is_allocated(
  */
 #define XFS_LOOKUP_BATCH	32
 
-STATIC int
-xfs_inode_ag_walk_grab(
+/*
+ * Decide if the given @ip is eligible to be a part of the inode walk, and
+ * grab it if so.  Returns true if it's ready to go or false if we should just
+ * ignore it.
+ */
+STATIC bool
+xfs_inode_walk_ag_grab(
 	struct xfs_inode	*ip,
 	int			flags)
 {
 	struct inode		*inode = VFS_I(ip);
-	bool			newinos = !!(flags & XFS_AGITER_INEW_WAIT);
+	bool			newinos = !!(flags & XFS_INODE_WALK_INEW_WAIT);
 
 	ASSERT(rcu_read_lock_held());
 
@@ -768,39 +797,41 @@ xfs_inode_ag_walk_grab(
 
 	/* nothing to sync during shutdown */
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return -EFSCORRUPTED;
+		return false;
 
 	/* If we can't grab the inode, it must on it's way to reclaim. */
 	if (!igrab(inode))
-		return -ENOENT;
+		return false;
 
 	/* inode is valid */
-	return 0;
+	return true;
 
 out_unlock_noent:
 	spin_unlock(&ip->i_flags_lock);
-	return -ENOENT;
+	return false;
 }
 
+/*
+ * For a given per-AG structure @pag, grab, @execute, and rele all incore
+ * inodes with the given radix tree @tag.
+ */
 STATIC int
-xfs_inode_ag_walk(
-	struct xfs_mount	*mp,
+xfs_inode_walk_ag(
 	struct xfs_perag	*pag,
-	int			(*execute)(struct xfs_inode *ip, int flags,
-					   void *args),
-	int			flags,
+	int			iter_flags,
+	int			(*execute)(struct xfs_inode *ip, void *args),
 	void			*args,
-	int			tag,
-	int			iter_flags)
+	int			tag)
 {
+	struct xfs_mount	*mp = pag->pag_mount;
 	uint32_t		first_index;
 	int			last_error = 0;
 	int			skipped;
-	int			done;
+	bool			done;
 	int			nr_found;
 
 restart:
-	done = 0;
+	done = false;
 	skipped = 0;
 	first_index = 0;
 	nr_found = 0;
@@ -811,7 +842,7 @@ restart:
 
 		rcu_read_lock();
 
-		if (tag == -1)
+		if (tag == XFS_ICI_NO_TAG)
 			nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
 					(void **)batch, first_index,
 					XFS_LOOKUP_BATCH);
@@ -833,7 +864,7 @@ restart:
 		for (i = 0; i < nr_found; i++) {
 			struct xfs_inode *ip = batch[i];
 
-			if (done || xfs_inode_ag_walk_grab(ip, iter_flags))
+			if (done || !xfs_inode_walk_ag_grab(ip, iter_flags))
 				batch[i] = NULL;
 
 			/*
@@ -852,7 +883,7 @@ restart:
 				continue;
 			first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
 			if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-				done = 1;
+				done = true;
 		}
 
 		/* unlock now we've grabbed the inodes. */
@@ -861,10 +892,10 @@ restart:
 		for (i = 0; i < nr_found; i++) {
 			if (!batch[i])
 				continue;
-			if ((iter_flags & XFS_AGITER_INEW_WAIT) &&
+			if ((iter_flags & XFS_INODE_WALK_INEW_WAIT) &&
 			    xfs_iflags_test(batch[i], XFS_INEW))
 				xfs_inew_wait(batch[i]);
-			error = execute(batch[i], flags, args);
+			error = execute(batch[i], args);
 			xfs_irele(batch[i]);
 			if (error == -EAGAIN) {
 				skipped++;
@@ -889,6 +920,49 @@ restart:
 	return last_error;
 }
 
+/* Fetch the next (possibly tagged) per-AG structure. */
+static inline struct xfs_perag *
+xfs_inode_walk_get_perag(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	int			tag)
+{
+	if (tag == XFS_ICI_NO_TAG)
+		return xfs_perag_get(mp, agno);
+	return xfs_perag_get_tag(mp, agno, tag);
+}
+
+/*
+ * Call the @execute function on all incore inodes matching the radix tree
+ * @tag.
+ */
+int
+xfs_inode_walk(
+	struct xfs_mount	*mp,
+	int			iter_flags,
+	int			(*execute)(struct xfs_inode *ip, void *args),
+	void			*args,
+	int			tag)
+{
+	struct xfs_perag	*pag;
+	int			error = 0;
+	int			last_error = 0;
+	xfs_agnumber_t		ag;
+
+	ag = 0;
+	while ((pag = xfs_inode_walk_get_perag(mp, ag, tag))) {
+		ag = pag->pag_agno + 1;
+		error = xfs_inode_walk_ag(pag, iter_flags, execute, args, tag);
+		xfs_perag_put(pag);
+		if (error) {
+			last_error = error;
+			if (error == -EFSCORRUPTED)
+				break;
+		}
+	}
+	return last_error;
+}
+
 /*
  * Background scanning to trim post-EOF preallocated space. This is queued
  * based on the 'speculative_prealloc_lifetime' tunable (5m by default).
@@ -952,75 +1026,6 @@ xfs_cowblocks_worker(
 	xfs_queue_cowblocks(mp);
 }
 
-int
-xfs_inode_ag_iterator_flags(
-	struct xfs_mount	*mp,
-	int			(*execute)(struct xfs_inode *ip, int flags,
-					   void *args),
-	int			flags,
-	void			*args,
-	int			iter_flags)
-{
-	struct xfs_perag	*pag;
-	int			error = 0;
-	int			last_error = 0;
-	xfs_agnumber_t		ag;
-
-	ag = 0;
-	while ((pag = xfs_perag_get(mp, ag))) {
-		ag = pag->pag_agno + 1;
-		error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1,
-					  iter_flags);
-		xfs_perag_put(pag);
-		if (error) {
-			last_error = error;
-			if (error == -EFSCORRUPTED)
-				break;
-		}
-	}
-	return last_error;
-}
-
-int
-xfs_inode_ag_iterator(
-	struct xfs_mount	*mp,
-	int			(*execute)(struct xfs_inode *ip, int flags,
-					   void *args),
-	int			flags,
-	void			*args)
-{
-	return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0);
-}
-
-int
-xfs_inode_ag_iterator_tag(
-	struct xfs_mount	*mp,
-	int			(*execute)(struct xfs_inode *ip, int flags,
-					   void *args),
-	int			flags,
-	void			*args,
-	int			tag)
-{
-	struct xfs_perag	*pag;
-	int			error = 0;
-	int			last_error = 0;
-	xfs_agnumber_t		ag;
-
-	ag = 0;
-	while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
-		ag = pag->pag_agno + 1;
-		error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag,
-					  0);
-		xfs_perag_put(pag);
-		if (error) {
-			last_error = error;
-			if (error == -EFSCORRUPTED)
-				break;
-		}
-	}
-	return last_error;
-}
-
 /*
  * Grab the inode for reclaim exclusively.
  * Return 0 if we grabbed it, non-zero otherwise.
@@ -1128,7 +1133,7 @@ restart:
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 		xfs_iunpin_wait(ip);
 		/* xfs_iflush_abort() drops the flush lock */
-		xfs_iflush_abort(ip, false);
+		xfs_iflush_abort(ip);
 		goto reclaim;
 	}
 	if (xfs_ipincount(ip)) {
@@ -1419,59 +1424,90 @@ xfs_reclaim_inodes_count(
 	return reclaimable;
 }
 
-STATIC int
+STATIC bool
 xfs_inode_match_id(
 	struct xfs_inode	*ip,
 	struct xfs_eofblocks	*eofb)
 {
 	if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
 	    !uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
-		return 0;
+		return false;
 
 	if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
 	    !gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
-		return 0;
+		return false;
 
 	if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
 	    ip->i_d.di_projid != eofb->eof_prid)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
 /*
  * A union-based inode filtering algorithm. Process the inode if any of the
  * criteria match. This is for global/internal scans only.
  */
-STATIC int
+STATIC bool
 xfs_inode_match_id_union(
 	struct xfs_inode	*ip,
 	struct xfs_eofblocks	*eofb)
 {
 	if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
 	    uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
-		return 1;
+		return true;
 
 	if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
 	    gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
-		return 1;
+		return true;
 
 	if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
 	    ip->i_d.di_projid == eofb->eof_prid)
-		return 1;
+		return true;
 
-	return 0;
+	return false;
+}
+
+/*
+ * Is this inode @ip eligible for eof/cow block reclamation, given some
+ * filtering parameters @eofb?  The inode is eligible if @eofb is null or
+ * if the predicate functions match.
+ */
+static bool
+xfs_inode_matches_eofb(
+	struct xfs_inode	*ip,
+	struct xfs_eofblocks	*eofb)
+{
+	bool			match;
+
+	if (!eofb)
+		return true;
+
+	if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
+		match = xfs_inode_match_id_union(ip, eofb);
+	else
+		match = xfs_inode_match_id(ip, eofb);
+	if (!match)
+		return false;
+
+	/* skip the inode if the file size is too small */
+	if ((eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) &&
+	    XFS_ISIZE(ip) < eofb->eof_min_file_size)
+		return false;
+
+	return true;
 }
 
 STATIC int
 xfs_inode_free_eofblocks(
 	struct xfs_inode	*ip,
-	int			flags,
 	void			*args)
 {
-	int ret = 0;
-	struct xfs_eofblocks *eofb = args;
-	int match;
+	struct xfs_eofblocks	*eofb = args;
+	bool			wait;
+	int			ret;
+
+	wait = eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC);
 
 	if (!xfs_can_free_eofblocks(ip, false)) {
 		/* inode could be preallocated or append-only */
@@ -1484,62 +1520,34 @@ xfs_inode_free_eofblocks(
 	 * If the mapping is dirty the operation can block and wait for some
 	 * time. Unless we are waiting, skip it.
 	 */
-	if (!(flags & SYNC_WAIT) &&
-	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY))
+	if (!wait && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY))
 		return 0;
 
-	if (eofb) {
-		if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
-			match = xfs_inode_match_id_union(ip, eofb);
-		else
-			match = xfs_inode_match_id(ip, eofb);
-		if (!match)
-			return 0;
-
-		/* skip the inode if the file size is too small */
-		if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
-		    XFS_ISIZE(ip) < eofb->eof_min_file_size)
-			return 0;
-	}
+	if (!xfs_inode_matches_eofb(ip, eofb))
+		return 0;
 
 	/*
 	 * If the caller is waiting, return -EAGAIN to keep the background
 	 * scanner moving and revisit the inode in a subsequent pass.
 	 */
 	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
-		if (flags & SYNC_WAIT)
-			ret = -EAGAIN;
-		return ret;
+		if (wait)
+			return -EAGAIN;
+		return 0;
 	}
+
 	ret = xfs_free_eofblocks(ip);
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 
 	return ret;
 }
 
-static int
-__xfs_icache_free_eofblocks(
-	struct xfs_mount	*mp,
-	struct xfs_eofblocks	*eofb,
-	int			(*execute)(struct xfs_inode *ip, int flags,
-					   void *args),
-	int			tag)
-{
-	int flags = SYNC_TRYLOCK;
-
-	if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC))
-		flags = SYNC_WAIT;
-
-	return xfs_inode_ag_iterator_tag(mp, execute, flags,
-					 eofb, tag);
-}
-
 int
 xfs_icache_free_eofblocks(
 	struct xfs_mount	*mp,
 	struct xfs_eofblocks	*eofb)
 {
-	return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_eofblocks,
+	return xfs_inode_walk(mp, 0, xfs_inode_free_eofblocks, eofb,
 			XFS_ICI_EOFBLOCKS_TAG);
 }
 
@@ -1756,29 +1764,16 @@ xfs_prep_free_cowblocks(
 STATIC int
 xfs_inode_free_cowblocks(
 	struct xfs_inode	*ip,
-	int			flags,
 	void			*args)
 {
 	struct xfs_eofblocks	*eofb = args;
-	int			match;
 	int			ret = 0;
 
 	if (!xfs_prep_free_cowblocks(ip))
 		return 0;
 
-	if (eofb) {
-		if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
-			match = xfs_inode_match_id_union(ip, eofb);
-		else
-			match = xfs_inode_match_id(ip, eofb);
-		if (!match)
-			return 0;
-
-		/* skip the inode if the file size is too small */
-		if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
-		    XFS_ISIZE(ip) < eofb->eof_min_file_size)
-			return 0;
-	}
+	if (!xfs_inode_matches_eofb(ip, eofb))
+		return 0;
 
 	/* Free the CoW blocks */
 	xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -1802,7 +1797,7 @@ xfs_icache_free_cowblocks(
 	struct xfs_mount	*mp,
 	struct xfs_eofblocks	*eofb)
 {
-	return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_cowblocks,
+	return xfs_inode_walk(mp, 0, xfs_inode_free_cowblocks, eofb,
 			XFS_ICI_COWBLOCKS_TAG);
 }
 
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 48f1fd2bb6ad..93b54e7d55f0 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -24,7 +24,7 @@ struct xfs_eofblocks {
  * tags for inode radix tree
  */
 #define XFS_ICI_NO_TAG		(-1)	/* special flag for an untagged lookup
-					   in xfs_inode_ag_iterator */
+					   in xfs_inode_walk */
 #define XFS_ICI_RECLAIM_TAG	0	/* inode is to be reclaimed */
 #define XFS_ICI_EOFBLOCKS_TAG	1	/* inode has blocks beyond EOF */
 #define XFS_ICI_COWBLOCKS_TAG	2	/* inode can have cow blocks to gc */
@@ -40,7 +40,7 @@ struct xfs_eofblocks {
 /*
  * flags for AG inode iterator
  */
-#define XFS_AGITER_INEW_WAIT	0x1	/* wait on new inodes */
+#define XFS_INODE_WALK_INEW_WAIT	0x1	/* wait on new inodes */
 
 int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
 	     uint flags, uint lock_flags, xfs_inode_t **ipp);
@@ -71,50 +71,9 @@ int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
 void xfs_cowblocks_worker(struct work_struct *);
 void xfs_queue_cowblocks(struct xfs_mount *);
 
-int xfs_inode_ag_iterator(struct xfs_mount *mp,
-	int (*execute)(struct xfs_inode *ip, int flags, void *args),
-	int flags, void *args);
-int xfs_inode_ag_iterator_flags(struct xfs_mount *mp,
-	int (*execute)(struct xfs_inode *ip, int flags, void *args),
-	int flags, void *args, int iter_flags);
-int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
-	int (*execute)(struct xfs_inode *ip, int flags, void *args),
-	int flags, void *args, int tag);
-
-static inline int
-xfs_fs_eofblocks_from_user(
-	struct xfs_fs_eofblocks		*src,
-	struct xfs_eofblocks		*dst)
-{
-	if (src->eof_version != XFS_EOFBLOCKS_VERSION)
-		return -EINVAL;
-
-	if (src->eof_flags & ~XFS_EOF_FLAGS_VALID)
-		return -EINVAL;
-
-	if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) ||
-	    memchr_inv(src->pad64, 0, sizeof(src->pad64)))
-		return -EINVAL;
-
-	dst->eof_flags = src->eof_flags;
-	dst->eof_prid = src->eof_prid;
-	dst->eof_min_file_size = src->eof_min_file_size;
-
-	dst->eof_uid = INVALID_UID;
-	if (src->eof_flags & XFS_EOF_FLAGS_UID) {
-		dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid);
-		if (!uid_valid(dst->eof_uid))
-			return -EINVAL;
-	}
-
-	dst->eof_gid = INVALID_GID;
-	if (src->eof_flags & XFS_EOF_FLAGS_GID) {
-		dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid);
-		if (!gid_valid(dst->eof_gid))
-			return -EINVAL;
-	}
-	return 0;
-}
+int xfs_inode_walk(struct xfs_mount *mp, int iter_flags,
+	int (*execute)(struct xfs_inode *ip, void *args),
+	void *args, int tag);
 
 int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
 				  xfs_ino_t ino, bool *inuse);
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 490fee22b878..287a9e5c7d75 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -6,11 +6,19 @@
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
+#include "xfs_format.h"
 #include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
 #include "xfs_icreate_item.h"
 #include "xfs_log.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+#include "xfs_ialloc.h"
+#include "xfs_trace.h"
 
 kmem_zone_t	*xfs_icreate_zone;		/* inode create item zone */
 
@@ -107,3 +115,147 @@ xfs_icreate_log(
 	tp->t_flags |= XFS_TRANS_DIRTY;
 	set_bit(XFS_LI_DIRTY, &icp->ic_item.li_flags);
 }
+
+static enum xlog_recover_reorder
+xlog_recover_icreate_reorder(
+		struct xlog_recover_item *item)
+{
+	/*
+	 * Inode allocation buffers must be replayed before subsequent inode
+	 * items try to modify those buffers.  ICREATE items are the logical
+	 * equivalent of logging a newly initialized inode buffer, so recover
+	 * these at the same time that we recover logged buffers.
+	 */
+	return XLOG_REORDER_BUFFER_LIST;
+}
+
+/*
+ * This routine is called when an inode create format structure is found in a
+ * committed transaction in the log.  It's purpose is to initialise the inodes
+ * being allocated on disk. This requires us to get inode cluster buffers that
+ * match the range to be initialised, stamped with inode templates and written
+ * by delayed write so that subsequent modifications will hit the cached buffer
+ * and only need writing out at the end of recovery.
+ */
+STATIC int
+xlog_recover_icreate_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_icreate_log		*icl;
+	struct xfs_ino_geometry		*igeo = M_IGEO(mp);
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			agbno;
+	unsigned int			count;
+	unsigned int			isize;
+	xfs_agblock_t			length;
+	int				bb_per_cluster;
+	int				cancel_count;
+	int				nbufs;
+	int				i;
+
+	icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
+	if (icl->icl_type != XFS_LI_ICREATE) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
+		return -EINVAL;
+	}
+
+	if (icl->icl_size != 1) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
+		return -EINVAL;
+	}
+
+	agno = be32_to_cpu(icl->icl_ag);
+	if (agno >= mp->m_sb.sb_agcount) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
+		return -EINVAL;
+	}
+	agbno = be32_to_cpu(icl->icl_agbno);
+	if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
+		return -EINVAL;
+	}
+	isize = be32_to_cpu(icl->icl_isize);
+	if (isize != mp->m_sb.sb_inodesize) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
+		return -EINVAL;
+	}
+	count = be32_to_cpu(icl->icl_count);
+	if (!count) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
+		return -EINVAL;
+	}
+	length = be32_to_cpu(icl->icl_length);
+	if (!length || length >= mp->m_sb.sb_agblocks) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
+		return -EINVAL;
+	}
+
+	/*
+	 * The inode chunk is either full or sparse and we only support
+	 * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
+	 */
+	if (length != igeo->ialloc_blks &&
+	    length != igeo->ialloc_min_blks) {
+		xfs_warn(log->l_mp,
+			 "%s: unsupported chunk length", __FUNCTION__);
+		return -EINVAL;
+	}
+
+	/* verify inode count is consistent with extent length */
+	if ((count >> mp->m_sb.sb_inopblog) != length) {
+		xfs_warn(log->l_mp,
+			 "%s: inconsistent inode count and chunk length",
+			 __FUNCTION__);
+		return -EINVAL;
+	}
+
+	/*
+	 * The icreate transaction can cover multiple cluster buffers and these
+	 * buffers could have been freed and reused. Check the individual
+	 * buffers for cancellation so we don't overwrite anything written after
+	 * a cancellation.
+	 */
+	bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
+	nbufs = length / igeo->blocks_per_cluster;
+	for (i = 0, cancel_count = 0; i < nbufs; i++) {
+		xfs_daddr_t	daddr;
+
+		daddr = XFS_AGB_TO_DADDR(mp, agno,
+				agbno + i * igeo->blocks_per_cluster);
+		if (xlog_is_buffer_cancelled(log, daddr, bb_per_cluster))
+			cancel_count++;
+	}
+
+	/*
+	 * We currently only use icreate for a single allocation at a time. This
+	 * means we should expect either all or none of the buffers to be
+	 * cancelled. Be conservative and skip replay if at least one buffer is
+	 * cancelled, but warn the user that something is awry if the buffers
+	 * are not consistent.
+	 *
+	 * XXX: This must be refined to only skip cancelled clusters once we use
+	 * icreate for multiple chunk allocations.
+	 */
+	ASSERT(!cancel_count || cancel_count == nbufs);
+	if (cancel_count) {
+		if (cancel_count != nbufs)
+			xfs_warn(mp,
+	"WARNING: partial inode chunk cancellation, skipped icreate.");
+		trace_xfs_log_recover_icreate_cancel(log, icl);
+		return 0;
+	}
+
+	trace_xfs_log_recover_icreate_recover(log, icl);
+	return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
+				     length, be32_to_cpu(icl->icl_gen));
+}
+
+const struct xlog_recover_item_ops xlog_icreate_item_ops = {
+	.item_type		= XFS_LI_ICREATE,
+	.reorder		= xlog_recover_icreate_reorder,
+	.commit_pass2		= xlog_recover_icreate_commit_pass2,
+};
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d1772786af29..64f5f9a440ae 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -112,7 +112,7 @@ xfs_ilock_data_map_shared(
 {
 	uint			lock_mode = XFS_ILOCK_SHARED;
 
-	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
+	if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE &&
 	    (ip->i_df.if_flags & XFS_IFEXTENTS) == 0)
 		lock_mode = XFS_ILOCK_EXCL;
 	xfs_ilock(ip, lock_mode);
@@ -125,7 +125,8 @@ xfs_ilock_attr_map_shared(
 {
 	uint			lock_mode = XFS_ILOCK_SHARED;
 
-	if (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE &&
+	if (ip->i_afp &&
+	    ip->i_afp->if_format == XFS_DINODE_FMT_BTREE &&
 	    (ip->i_afp->if_flags & XFS_IFEXTENTS) == 0)
 		lock_mode = XFS_ILOCK_EXCL;
 	xfs_ilock(ip, lock_mode);
@@ -825,7 +826,7 @@ xfs_ialloc(
 		inode->i_mode &= ~S_ISGID;
 
 	ip->i_d.di_size = 0;
-	ip->i_d.di_nextents = 0;
+	ip->i_df.if_nextents = 0;
 	ASSERT(ip->i_d.di_nblocks == 0);
 
 	tv = current_time(inode);
@@ -851,7 +852,7 @@ xfs_ialloc(
 	case S_IFCHR:
 	case S_IFBLK:
 	case S_IFSOCK:
-		ip->i_d.di_format = XFS_DINODE_FMT_DEV;
+		ip->i_df.if_format = XFS_DINODE_FMT_DEV;
 		ip->i_df.if_flags = 0;
 		flags |= XFS_ILOG_DEV;
 		break;
@@ -907,7 +908,7 @@ xfs_ialloc(
 		}
 		/* FALLTHROUGH */
 	case S_IFLNK:
-		ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+		ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 		ip->i_df.if_flags = XFS_IFEXTENTS;
 		ip->i_df.if_bytes = 0;
 		ip->i_df.if_u1.if_root = NULL;
@@ -915,11 +916,6 @@ xfs_ialloc(
 	default:
 		ASSERT(0);
 	}
-	/*
-	 * Attribute fork settings for new inode.
-	 */
-	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
-	ip->i_d.di_anextents = 0;
 
 	/*
 	 * Log the new values stuffed into the inode.
@@ -1686,7 +1682,7 @@ xfs_inactive_truncate(
 	if (error)
 		goto error_trans_cancel;
 
-	ASSERT(ip->i_d.di_nextents == 0);
+	ASSERT(ip->i_df.if_nextents == 0);
 
 	error = xfs_trans_commit(tp);
 	if (error)
@@ -1836,7 +1832,7 @@ xfs_inactive(
 
 	if (S_ISREG(VFS_I(ip)->i_mode) &&
 	    (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
-	     ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
+	     ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
 		truncate = 1;
 
 	error = xfs_qm_dqattach(ip);
@@ -1862,7 +1858,6 @@ xfs_inactive(
 	}
 
 	ASSERT(!ip->i_afp);
-	ASSERT(ip->i_d.di_anextents == 0);
 	ASSERT(ip->i_d.di_forkoff == 0);
 
 	/*
@@ -2172,7 +2167,7 @@ xfs_iunlink_update_inode(
 
 	ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino));
 
-	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 0, 0);
+	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 0);
 	if (error)
 		return error;
 
@@ -2302,7 +2297,7 @@ xfs_iunlink_map_ino(
 		return error;
 	}
 
-	error = xfs_imap_to_bp(mp, tp, imap, dipp, bpp, 0, 0);
+	error = xfs_imap_to_bp(mp, tp, imap, dipp, bpp, 0);
 	if (error) {
 		xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
 				__func__, error);
@@ -2602,7 +2597,7 @@ xfs_ifree_cluster(
 	xfs_daddr_t		blkno;
 	xfs_buf_t		*bp;
 	xfs_inode_t		*ip;
-	xfs_inode_log_item_t	*iip;
+	struct xfs_inode_log_item *iip;
 	struct xfs_log_item	*lip;
 	struct xfs_perag	*pag;
 	struct xfs_ino_geometry	*igeo = M_IGEO(mp);
@@ -2662,7 +2657,7 @@ xfs_ifree_cluster(
 		 */
 		list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
 			if (lip->li_type == XFS_LI_INODE) {
-				iip = (xfs_inode_log_item_t *)lip;
+				iip = (struct xfs_inode_log_item *)lip;
 				ASSERT(iip->ili_logged == 1);
 				lip->li_cb = xfs_istale_done;
 				xfs_trans_ail_copy_lsn(mp->m_ail,
@@ -2712,24 +2707,6 @@ xfs_ifree_cluster(
 }
 
 /*
- * Free any local-format buffers sitting around before we reset to
- * extents format.
- */
-static inline void
-xfs_ifree_local_data(
-	struct xfs_inode	*ip,
-	int			whichfork)
-{
-	struct xfs_ifork	*ifp;
-
-	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
-		return;
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
-}
-
-/*
  * This is called to return an inode to the inode free list.
  * The inode should already be truncated to 0 length and have
  * no pages associated with it.  This routine also assumes that
@@ -2749,8 +2726,7 @@ xfs_ifree(
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	ASSERT(VFS_I(ip)->i_nlink == 0);
-	ASSERT(ip->i_d.di_nextents == 0);
-	ASSERT(ip->i_d.di_anextents == 0);
+	ASSERT(ip->i_df.if_nextents == 0);
 	ASSERT(ip->i_d.di_size == 0 || !S_ISREG(VFS_I(ip)->i_mode));
 	ASSERT(ip->i_d.di_nblocks == 0);
 
@@ -2765,16 +2741,23 @@ xfs_ifree(
 	if (error)
 		return error;
 
-	xfs_ifree_local_data(ip, XFS_DATA_FORK);
-	xfs_ifree_local_data(ip, XFS_ATTR_FORK);
+	/*
+	 * Free any local-format data sitting around before we reset the
+	 * data fork to extents format.  Note that the attr fork data has
+	 * already been freed by xfs_attr_inactive.
+	 */
+	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
+		kmem_free(ip->i_df.if_u1.if_data);
+		ip->i_df.if_u1.if_data = NULL;
+		ip->i_df.if_bytes = 0;
+	}
 
 	VFS_I(ip)->i_mode = 0;		/* mark incore inode as free */
 	ip->i_d.di_flags = 0;
 	ip->i_d.di_flags2 = 0;
 	ip->i_d.di_dmevmask = 0;
 	ip->i_d.di_forkoff = 0;		/* mark the attr fork not in use */
-	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
-	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+	ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 
 	/* Don't attempt to replay owner changes for a deleted inode */
 	ip->i_itemp->ili_fields &= ~(XFS_ILOG_AOWNER|XFS_ILOG_DOWNER);
@@ -3496,6 +3479,7 @@ xfs_iflush_cluster(
 	struct xfs_inode	**cilist;
 	struct xfs_inode	*cip;
 	struct xfs_ino_geometry	*igeo = M_IGEO(mp);
+	int			error = 0;
 	int			nr_found;
 	int			clcount = 0;
 	int			i;
@@ -3588,11 +3572,10 @@ xfs_iflush_cluster(
 		 * re-check that it's dirty before flushing.
 		 */
 		if (!xfs_inode_clean(cip)) {
-			int	error;
 			error = xfs_iflush_int(cip, bp);
 			if (error) {
 				xfs_iunlock(cip, XFS_ILOCK_SHARED);
-				goto cluster_corrupt_out;
+				goto out_free;
 			}
 			clcount++;
 		} else {
@@ -3611,37 +3594,7 @@ out_free:
 	kmem_free(cilist);
 out_put:
 	xfs_perag_put(pag);
-	return 0;
-
-
-cluster_corrupt_out:
-	/*
-	 * Corruption detected in the clustering loop.  Invalidate the
-	 * inode buffer and shut down the filesystem.
-	 */
-	rcu_read_unlock();
-
-	/*
-	 * We'll always have an inode attached to the buffer for completion
-	 * process by the time we are called from xfs_iflush(). Hence we have
-	 * always need to do IO completion processing to abort the inodes
-	 * attached to the buffer.  handle them just like the shutdown case in
-	 * xfs_buf_submit().
-	 */
-	ASSERT(bp->b_iodone);
-	bp->b_flags |= XBF_ASYNC;
-	bp->b_flags &= ~XBF_DONE;
-	xfs_buf_stale(bp);
-	xfs_buf_ioerror(bp, -EIO);
-	xfs_buf_ioend(bp);
-
-	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-
-	/* abort the corrupt inode, as it was not attached to the buffer */
-	xfs_iflush_abort(cip, false);
-	kmem_free(cilist);
-	xfs_perag_put(pag);
-	return -EFSCORRUPTED;
+	return error;
 }
 
 /*
@@ -3667,8 +3620,8 @@ xfs_iflush(
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
 	ASSERT(xfs_isiflocked(ip));
-	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
-	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
+	ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
+	       ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
 
 	*bpp = NULL;
 
@@ -3688,42 +3641,20 @@ xfs_iflush(
 	}
 
 	/*
-	 * This may have been unpinned because the filesystem is shutting
-	 * down forcibly. If that's the case we must not write this inode
-	 * to disk, because the log record didn't make it to disk.
-	 *
-	 * We also have to remove the log item from the AIL in this case,
-	 * as we wait for an empty AIL as part of the unmount process.
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp)) {
-		error = -EIO;
-		goto abort_out;
-	}
-
-	/*
 	 * Get the buffer containing the on-disk inode. We are doing a try-lock
-	 * operation here, so we may get  an EAGAIN error. In that case, we
-	 * simply want to return with the inode still dirty.
+	 * operation here, so we may get an EAGAIN error. In that case, return
+	 * leaving the inode dirty.
 	 *
 	 * If we get any other error, we effectively have a corruption situation
-	 * and we cannot flush the inode, so we treat it the same as failing
-	 * xfs_iflush_int().
+	 * and we cannot flush the inode. Abort the flush and shut down.
 	 */
-	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
-			       0);
+	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK);
 	if (error == -EAGAIN) {
 		xfs_ifunlock(ip);
 		return error;
 	}
 	if (error)
-		goto corrupt_out;
-
-	/*
-	 * First flush out the inode that xfs_iflush was called with.
-	 */
-	error = xfs_iflush_int(ip, bp);
-	if (error)
-		goto corrupt_out;
+		goto abort;
 
 	/*
 	 * If the buffer is pinned then push on the log now so we won't
@@ -3733,61 +3664,32 @@ xfs_iflush(
 		xfs_log_force(mp, 0);
 
 	/*
-	 * inode clustering: try to gather other inodes into this write
+	 * Flush the provided inode then attempt to gather others from the
+	 * cluster into the write.
 	 *
-	 * Note: Any error during clustering will result in the filesystem
-	 * being shut down and completion callbacks run on the cluster buffer.
-	 * As we have already flushed and attached this inode to the buffer,
-	 * it has already been aborted and released by xfs_iflush_cluster() and
-	 * so we have no further error handling to do here.
+	 * Note: Once we attempt to flush an inode, we must run buffer
+	 * completion callbacks on any failure. If this fails, simulate an I/O
+	 * failure on the buffer and shut down.
 	 */
-	error = xfs_iflush_cluster(ip, bp);
-	if (error)
-		return error;
+	error = xfs_iflush_int(ip, bp);
+	if (!error)
+		error = xfs_iflush_cluster(ip, bp);
+	if (error) {
+		bp->b_flags |= XBF_ASYNC;
+		xfs_buf_ioend_fail(bp);
+		goto shutdown;
+	}
 
 	*bpp = bp;
 	return 0;
 
-corrupt_out:
-	if (bp)
-		xfs_buf_relse(bp);
+abort:
+	xfs_iflush_abort(ip);
+shutdown:
 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-abort_out:
-	/* abort the corrupt inode, as it was not attached to the buffer */
-	xfs_iflush_abort(ip, false);
 	return error;
 }
 
-/*
- * If there are inline format data / attr forks attached to this inode,
- * make sure they're not corrupt.
- */
-bool
-xfs_inode_verify_forks(
-	struct xfs_inode	*ip)
-{
-	struct xfs_ifork	*ifp;
-	xfs_failaddr_t		fa;
-
-	fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops);
-	if (fa) {
-		ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
-				ifp->if_u1.if_data, ifp->if_bytes, fa);
-		return false;
-	}
-
-	fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops);
-	if (fa) {
-		ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
-		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
-				ifp ? ifp->if_u1.if_data : NULL,
-				ifp ? ifp->if_bytes : 0, fa);
-		return false;
-	}
-	return true;
-}
-
 STATIC int
 xfs_iflush_int(
 	struct xfs_inode	*ip,
@@ -3796,61 +3698,68 @@ xfs_iflush_int(
 	struct xfs_inode_log_item *iip = ip->i_itemp;
 	struct xfs_dinode	*dip;
 	struct xfs_mount	*mp = ip->i_mount;
+	int			error;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
 	ASSERT(xfs_isiflocked(ip));
-	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
-	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
+	ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
+	       ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
 	ASSERT(iip != NULL && iip->ili_fields != 0);
 
-	/* set *dip = inode's place in the buffer */
 	dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
 
+	/*
+	 * We don't flush the inode if any of the following checks fail, but we
+	 * do still update the log item and attach to the backing buffer as if
+	 * the flush happened. This is a formality to facilitate predictable
+	 * error handling as the caller will shutdown and fail the buffer.
+	 */
+	error = -EFSCORRUPTED;
 	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
 			       mp, XFS_ERRTAG_IFLUSH_1)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
 			__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
-		goto corrupt_out;
+		goto flush_out;
 	}
 	if (S_ISREG(VFS_I(ip)->i_mode)) {
 		if (XFS_TEST_ERROR(
-		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
-		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
+		    ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+		    ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
 		    mp, XFS_ERRTAG_IFLUSH_3)) {
 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 				"%s: Bad regular inode %Lu, ptr "PTR_FMT,
 				__func__, ip->i_ino, ip);
-			goto corrupt_out;
+			goto flush_out;
 		}
 	} else if (S_ISDIR(VFS_I(ip)->i_mode)) {
 		if (XFS_TEST_ERROR(
-		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
-		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
-		    (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
+		    ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+		    ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
+		    ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
 		    mp, XFS_ERRTAG_IFLUSH_4)) {
 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 				"%s: Bad directory inode %Lu, ptr "PTR_FMT,
 				__func__, ip->i_ino, ip);
-			goto corrupt_out;
+			goto flush_out;
 		}
 	}
-	if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
+	if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp) >
 				ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: detected corrupt incore inode %Lu, "
 			"total extents = %d, nblocks = %Ld, ptr "PTR_FMT,
 			__func__, ip->i_ino,
-			ip->i_d.di_nextents + ip->i_d.di_anextents,
+			ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp),
 			ip->i_d.di_nblocks, ip);
-		goto corrupt_out;
+		goto flush_out;
 	}
 	if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
 				mp, XFS_ERRTAG_IFLUSH_6)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
-		goto corrupt_out;
+		goto flush_out;
 	}
 
 	/*
@@ -3865,9 +3774,16 @@ xfs_iflush_int(
 	if (!xfs_sb_version_has_v3inode(&mp->m_sb))
 		ip->i_d.di_flushiter++;
 
-	/* Check the inline fork data before we write out. */
-	if (!xfs_inode_verify_forks(ip))
-		goto corrupt_out;
+	/*
+	 * If there are inline format data / attr forks attached to this inode,
+	 * make sure they are not corrupt.
+	 */
+	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL &&
+	    xfs_ifork_verify_local_data(ip))
+		goto flush_out;
+	if (ip->i_afp && ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL &&
+	    xfs_ifork_verify_local_attr(ip))
+		goto flush_out;
 
 	/*
 	 * Copy the dirty parts of the inode into the on-disk inode.  We always
@@ -3910,6 +3826,8 @@ xfs_iflush_int(
 	 * need the AIL lock, because it is a 64 bit value that cannot be read
 	 * atomically.
 	 */
+	error = 0;
+flush_out:
 	iip->ili_last_fields = iip->ili_fields;
 	iip->ili_fields = 0;
 	iip->ili_fsync_fields = 0;
@@ -3919,10 +3837,10 @@ xfs_iflush_int(
 				&iip->ili_item.li_lsn);
 
 	/*
-	 * Attach the function xfs_iflush_done to the inode's
-	 * buffer.  This will remove the inode from the AIL
-	 * and unlock the inode's flush lock when the inode is
-	 * completely written to disk.
+	 * Attach the inode item callback to the buffer whether the flush
+	 * succeeded or not. If not, the caller will shut down and fail I/O
+	 * completion on the buffer to remove the inode from the AIL and release
+	 * the flush lock.
 	 */
 	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
 
@@ -3931,10 +3849,7 @@ xfs_iflush_int(
 
 	ASSERT(!list_empty(&bp->b_li_list));
 	ASSERT(bp->b_iodone != NULL);
-	return 0;
-
-corrupt_out:
-	return -EFSCORRUPTED;
+	return error;
 }
 
 /* Release an inode. */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index c6a63f6764a6..47d3b391030d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -57,9 +57,6 @@ typedef struct xfs_inode {
 
 	struct xfs_icdinode	i_d;		/* most of ondisk inode */
 
-	xfs_extnum_t		i_cnextents;	/* # of extents in cow fork */
-	unsigned int		i_cformat;	/* format of cow fork */
-
 	/* VFS inode */
 	struct inode		i_vnode;	/* embedded VFS inode */
 
@@ -218,8 +215,7 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
 #define XFS_IFLOCK		(1 << __XFS_IFLOCK_BIT)
 #define __XFS_IPINNED_BIT	8	 /* wakeup key for zero pin count */
 #define XFS_IPINNED		(1 << __XFS_IPINNED_BIT)
-#define XFS_IDONTCACHE		(1 << 9) /* don't cache the inode long term */
-#define XFS_IEOFBLOCKS		(1 << 10)/* has the preallocblocks tag set */
+#define XFS_IEOFBLOCKS		(1 << 9) /* has the preallocblocks tag set */
 /*
  * If this unlinked inode is in the middle of recovery, don't let drop_inode
  * truncate and free the inode.  This can happen if we iget the inode during
@@ -467,6 +463,7 @@ int	xfs_break_layouts(struct inode *inode, uint *iolock,
 /* from xfs_iops.c */
 extern void xfs_setup_inode(struct xfs_inode *ip);
 extern void xfs_setup_iops(struct xfs_inode *ip);
+extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
 
 /*
  * When setting up a newly allocated inode, we need to call
@@ -497,8 +494,6 @@ extern struct kmem_zone	*xfs_inode_zone;
 /* The default CoW extent size hint. */
 #define XFS_DEFAULT_COWEXTSZ_HINT 32
 
-bool xfs_inode_verify_forks(struct xfs_inode *ip);
-
 int xfs_iunlink_init(struct xfs_perag *pag);
 void xfs_iunlink_destroy(struct xfs_perag *pag);
 
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index f779cca2346f..ba47bf65b772 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -36,10 +36,10 @@ xfs_inode_item_data_fork_size(
 {
 	struct xfs_inode	*ip = iip->ili_inode;
 
-	switch (ip->i_d.di_format) {
+	switch (ip->i_df.if_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
-		    ip->i_d.di_nextents > 0 &&
+		    ip->i_df.if_nextents > 0 &&
 		    ip->i_df.if_bytes > 0) {
 			/* worst case, doesn't subtract delalloc extents */
 			*nbytes += XFS_IFORK_DSIZE(ip);
@@ -77,10 +77,10 @@ xfs_inode_item_attr_fork_size(
 {
 	struct xfs_inode	*ip = iip->ili_inode;
 
-	switch (ip->i_d.di_aformat) {
+	switch (ip->i_afp->if_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
-		    ip->i_d.di_anextents > 0 &&
+		    ip->i_afp->if_nextents > 0 &&
 		    ip->i_afp->if_bytes > 0) {
 			/* worst case, doesn't subtract unused space */
 			*nbytes += XFS_IFORK_ASIZE(ip);
@@ -142,13 +142,13 @@ xfs_inode_item_format_data_fork(
 	struct xfs_inode	*ip = iip->ili_inode;
 	size_t			data_bytes;
 
-	switch (ip->i_d.di_format) {
+	switch (ip->i_df.if_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		iip->ili_fields &=
 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
 
 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
-		    ip->i_d.di_nextents > 0 &&
+		    ip->i_df.if_nextents > 0 &&
 		    ip->i_df.if_bytes > 0) {
 			struct xfs_bmbt_rec *p;
 
@@ -227,18 +227,18 @@ xfs_inode_item_format_attr_fork(
 	struct xfs_inode	*ip = iip->ili_inode;
 	size_t			data_bytes;
 
-	switch (ip->i_d.di_aformat) {
+	switch (ip->i_afp->if_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		iip->ili_fields &=
 			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
 
 		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
-		    ip->i_d.di_anextents > 0 &&
+		    ip->i_afp->if_nextents > 0 &&
 		    ip->i_afp->if_bytes > 0) {
 			struct xfs_bmbt_rec *p;
 
 			ASSERT(xfs_iext_count(ip->i_afp) ==
-				ip->i_d.di_anextents);
+				ip->i_afp->if_nextents);
 
 			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
 			data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
@@ -305,7 +305,7 @@ xfs_inode_to_log_dinode(
 	struct inode		*inode = VFS_I(ip);
 
 	to->di_magic = XFS_DINODE_MAGIC;
-	to->di_format = from->di_format;
+	to->di_format = xfs_ifork_format(&ip->i_df);
 	to->di_uid = i_uid_read(inode);
 	to->di_gid = i_gid_read(inode);
 	to->di_projid_lo = from->di_projid & 0xffff;
@@ -326,10 +326,10 @@ xfs_inode_to_log_dinode(
 	to->di_size = from->di_size;
 	to->di_nblocks = from->di_nblocks;
 	to->di_extsize = from->di_extsize;
-	to->di_nextents = from->di_nextents;
-	to->di_anextents = from->di_anextents;
+	to->di_nextents = xfs_ifork_nextents(&ip->i_df);
+	to->di_anextents = xfs_ifork_nextents(ip->i_afp);
 	to->di_forkoff = from->di_forkoff;
-	to->di_aformat = from->di_aformat;
+	to->di_aformat = xfs_ifork_format(ip->i_afp);
 	to->di_dmevmask = from->di_dmevmask;
 	to->di_dmstate = from->di_dmstate;
 	to->di_flags = from->di_flags;
@@ -497,21 +497,6 @@ xfs_inode_item_push(
 	if (xfs_ipincount(ip) > 0)
 		return XFS_ITEM_PINNED;
 
-	/*
-	 * The buffer containing this item failed to be written back
-	 * previously. Resubmit the buffer for IO.
-	 */
-	if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
-		if (!xfs_buf_trylock(bp))
-			return XFS_ITEM_LOCKED;
-
-		if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
-			rval = XFS_ITEM_FLUSHING;
-
-		xfs_buf_unlock(bp);
-		return rval;
-	}
-
 	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
 		return XFS_ITEM_LOCKED;
 
@@ -777,17 +762,12 @@ xfs_iflush_done(
  */
 void
 xfs_iflush_abort(
-	xfs_inode_t		*ip,
-	bool			stale)
+	struct xfs_inode		*ip)
 {
-	xfs_inode_log_item_t	*iip = ip->i_itemp;
+	struct xfs_inode_log_item	*iip = ip->i_itemp;
 
 	if (iip) {
-		if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)) {
-			xfs_trans_ail_remove(&iip->ili_item,
-					     stale ? SHUTDOWN_LOG_IO_ERROR :
-						     SHUTDOWN_CORRUPT_INCORE);
-		}
+		xfs_trans_ail_delete(&iip->ili_item, 0);
 		iip->ili_logged = 0;
 		/*
 		 * Clear the ili_last_fields bits now that we know that the
@@ -812,7 +792,7 @@ xfs_istale_done(
 	struct xfs_buf		*bp,
 	struct xfs_log_item	*lip)
 {
-	xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
+	xfs_iflush_abort(INODE_ITEM(lip)->ili_inode);
 }
 
 /*
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 07a60e74c39c..60b34bb66e8e 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -13,7 +13,7 @@ struct xfs_bmbt_rec;
 struct xfs_inode;
 struct xfs_mount;
 
-typedef struct xfs_inode_log_item {
+struct xfs_inode_log_item {
 	struct xfs_log_item	ili_item;	   /* common portion */
 	struct xfs_inode	*ili_inode;	   /* inode ptr */
 	xfs_lsn_t		ili_flush_lsn;	   /* lsn at last flush */
@@ -23,7 +23,7 @@ typedef struct xfs_inode_log_item {
 	unsigned int		ili_last_fields;   /* fields when flushed */
 	unsigned int		ili_fields;	   /* fields to be logged */
 	unsigned int		ili_fsync_fields;  /* logged since last fsync */
-} xfs_inode_log_item_t;
+};
 
 static inline int xfs_inode_clean(xfs_inode_t *ip)
 {
@@ -34,7 +34,7 @@ extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
 extern void xfs_inode_item_destroy(struct xfs_inode *);
 extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *);
 extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *);
-extern void xfs_iflush_abort(struct xfs_inode *, bool);
+extern void xfs_iflush_abort(struct xfs_inode *);
 extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
 					 struct xfs_inode_log_format *);
 
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
new file mode 100644
index 000000000000..dc3e26ff16c9
--- /dev/null
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_trace.h"
+#include "xfs_trans_priv.h"
+#include "xfs_buf_item.h"
+#include "xfs_log.h"
+#include "xfs_error.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+#include "xfs_icache.h"
+#include "xfs_bmap_btree.h"
+
+STATIC void
+xlog_recover_inode_ra_pass2(
+	struct xlog                     *log,
+	struct xlog_recover_item        *item)
+{
+	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
+		struct xfs_inode_log_format	*ilfp = item->ri_buf[0].i_addr;
+
+		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
+				   &xfs_inode_buf_ra_ops);
+	} else {
+		struct xfs_inode_log_format_32	*ilfp = item->ri_buf[0].i_addr;
+
+		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
+				   &xfs_inode_buf_ra_ops);
+	}
+}
+
+/*
+ * Inode fork owner changes
+ *
+ * If we have been told that we have to reparent the inode fork, it's because an
+ * extent swap operation on a CRC enabled filesystem has been done and we are
+ * replaying it. We need to walk the BMBT of the appropriate fork and change the
+ * owners of it.
+ *
+ * The complexity here is that we don't have an inode context to work with, so
+ * after we've replayed the inode we need to instantiate one.  This is where the
+ * fun begins.
+ *
+ * We are in the middle of log recovery, so we can't run transactions. That
+ * means we cannot use cache coherent inode instantiation via xfs_iget(), as
+ * that will result in the corresponding iput() running the inode through
+ * xfs_inactive(). If we've just replayed an inode core that changes the link
+ * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
+ * transactions (bad!).
+ *
+ * So, to avoid this, we instantiate an inode directly from the inode core we've
+ * just recovered. We have the buffer still locked, and all we really need to
+ * instantiate is the inode core and the forks being modified. We can do this
+ * manually, then run the inode btree owner change, and then tear down the
+ * xfs_inode without having to run any transactions at all.
+ *
+ * Also, because we don't have a transaction context available here but need to
+ * gather all the buffers we modify for writeback so we pass the buffer_list
+ * instead for the operation to use.
+ */
+
+STATIC int
+xfs_recover_inode_owner_change(
+	struct xfs_mount	*mp,
+	struct xfs_dinode	*dip,
+	struct xfs_inode_log_format *in_f,
+	struct list_head	*buffer_list)
+{
+	struct xfs_inode	*ip;
+	int			error;
+
+	ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
+
+	ip = xfs_inode_alloc(mp, in_f->ilf_ino);
+	if (!ip)
+		return -ENOMEM;
+
+	/* instantiate the inode */
+	ASSERT(dip->di_version >= 3);
+
+	error = xfs_inode_from_disk(ip, dip);
+	if (error)
+		goto out_free_ip;
+
+	if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
+		ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
+		error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
+					      ip->i_ino, buffer_list);
+		if (error)
+			goto out_free_ip;
+	}
+
+	if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
+		ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
+		error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
+					      ip->i_ino, buffer_list);
+		if (error)
+			goto out_free_ip;
+	}
+
+out_free_ip:
+	xfs_inode_free(ip);
+	return error;
+}
+
+STATIC int
+xlog_recover_inode_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			current_lsn)
+{
+	struct xfs_inode_log_format	*in_f;
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_buf			*bp;
+	struct xfs_dinode		*dip;
+	int				len;
+	char				*src;
+	char				*dest;
+	int				error;
+	int				attr_index;
+	uint				fields;
+	struct xfs_log_dinode		*ldip;
+	uint				isize;
+	int				need_free = 0;
+
+	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
+		in_f = item->ri_buf[0].i_addr;
+	} else {
+		in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
+		need_free = 1;
+		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
+		if (error)
+			goto error;
+	}
+
+	/*
+	 * Inode buffers can be freed, look out for it,
+	 * and do not replay the inode.
+	 */
+	if (xlog_is_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len)) {
+		error = 0;
+		trace_xfs_log_recover_inode_cancel(log, in_f);
+		goto error;
+	}
+	trace_xfs_log_recover_inode_recover(log, in_f);
+
+	error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
+			0, &bp, &xfs_inode_buf_ops);
+	if (error)
+		goto error;
+	ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
+	dip = xfs_buf_offset(bp, in_f->ilf_boffset);
+
+	/*
+	 * Make sure the place we're flushing out to really looks
+	 * like an inode!
+	 */
+	if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
+		xfs_alert(mp,
+	"%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
+			__func__, dip, bp, in_f->ilf_ino);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+	ldip = item->ri_buf[1].i_addr;
+	if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
+		xfs_alert(mp,
+			"%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
+			__func__, item, in_f->ilf_ino);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+
+	/*
+	 * If the inode has an LSN in it, recover the inode only if it's less
+	 * than the lsn of the transaction we are replaying. Note: we still
+	 * need to replay an owner change even though the inode is more recent
+	 * than the transaction as there is no guarantee that all the btree
+	 * blocks are more recent than this transaction, too.
+	 */
+	if (dip->di_version >= 3) {
+		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);
+
+		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+			trace_xfs_log_recover_inode_skip(log, in_f);
+			error = 0;
+			goto out_owner_change;
+		}
+	}
+
+	/*
+	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
+	 * are transactional and if ordering is necessary we can determine that
+	 * more accurately by the LSN field in the V3 inode core. Don't trust
+	 * the inode versions we might be changing them here - use the
+	 * superblock flag to determine whether we need to look at di_flushiter
+	 * to skip replay when the on disk inode is newer than the log one
+	 */
+	if (!xfs_sb_version_has_v3inode(&mp->m_sb) &&
+	    ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+		/*
+		 * Deal with the wrap case, DI_MAX_FLUSH is less
+		 * than smaller numbers
+		 */
+		if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
+		    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
+			/* do nothing */
+		} else {
+			trace_xfs_log_recover_inode_skip(log, in_f);
+			error = 0;
+			goto out_release;
+		}
+	}
+
+	/* Take the opportunity to reset the flush iteration count */
+	ldip->di_flushiter = 0;
+
+	if (unlikely(S_ISREG(ldip->di_mode))) {
+		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
+		    (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
+			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
+					 XFS_ERRLEVEL_LOW, mp, ldip,
+					 sizeof(*ldip));
+			xfs_alert(mp,
+		"%s: Bad regular inode log record, rec ptr "PTR_FMT", "
+		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
+				__func__, item, dip, bp, in_f->ilf_ino);
+			error = -EFSCORRUPTED;
+			goto out_release;
+		}
+	} else if (unlikely(S_ISDIR(ldip->di_mode))) {
+		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
+		    (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
+		    (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
+			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
+					     XFS_ERRLEVEL_LOW, mp, ldip,
+					     sizeof(*ldip));
+			xfs_alert(mp,
+		"%s: Bad dir inode log record, rec ptr "PTR_FMT", "
+		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
+				__func__, item, dip, bp, in_f->ilf_ino);
+			error = -EFSCORRUPTED;
+			goto out_release;
+		}
+	}
+	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
+		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
+				     XFS_ERRLEVEL_LOW, mp, ldip,
+				     sizeof(*ldip));
+		xfs_alert(mp,
+	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
+	"dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
+			__func__, item, dip, bp, in_f->ilf_ino,
+			ldip->di_nextents + ldip->di_anextents,
+			ldip->di_nblocks);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
+		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
+				     XFS_ERRLEVEL_LOW, mp, ldip,
+				     sizeof(*ldip));
+		xfs_alert(mp,
+	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
+	"dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
+			item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+	isize = xfs_log_dinode_size(mp);
+	if (unlikely(item->ri_buf[1].i_len > isize)) {
+		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
+				     XFS_ERRLEVEL_LOW, mp, ldip,
+				     sizeof(*ldip));
+		xfs_alert(mp,
+			"%s: Bad inode log record length %d, rec ptr "PTR_FMT,
+			__func__, item->ri_buf[1].i_len, item);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+
+	/* recover the log dinode inode into the on disk inode */
+	xfs_log_dinode_to_disk(ldip, dip);
+
+	fields = in_f->ilf_fields;
+	if (fields & XFS_ILOG_DEV)
+		xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
+
+	if (in_f->ilf_size == 2)
+		goto out_owner_change;
+	len = item->ri_buf[2].i_len;
+	src = item->ri_buf[2].i_addr;
+	ASSERT(in_f->ilf_size <= 4);
+	ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
+	ASSERT(!(fields & XFS_ILOG_DFORK) ||
+	       (len == in_f->ilf_dsize));
+
+	switch (fields & XFS_ILOG_DFORK) {
+	case XFS_ILOG_DDATA:
+	case XFS_ILOG_DEXT:
+		memcpy(XFS_DFORK_DPTR(dip), src, len);
+		break;
+
+	case XFS_ILOG_DBROOT:
+		xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
+				 (struct xfs_bmdr_block *)XFS_DFORK_DPTR(dip),
+				 XFS_DFORK_DSIZE(dip, mp));
+		break;
+
+	default:
+		/*
+		 * There are no data fork flags set.
+		 */
+		ASSERT((fields & XFS_ILOG_DFORK) == 0);
+		break;
+	}
+
+	/*
+	 * If we logged any attribute data, recover it.  There may or
+	 * may not have been any other non-core data logged in this
+	 * transaction.
+	 */
+	if (in_f->ilf_fields & XFS_ILOG_AFORK) {
+		if (in_f->ilf_fields & XFS_ILOG_DFORK) {
+			attr_index = 3;
+		} else {
+			attr_index = 2;
+		}
+		len = item->ri_buf[attr_index].i_len;
+		src = item->ri_buf[attr_index].i_addr;
+		ASSERT(len == in_f->ilf_asize);
+
+		switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
+		case XFS_ILOG_ADATA:
+		case XFS_ILOG_AEXT:
+			dest = XFS_DFORK_APTR(dip);
+			ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
+			memcpy(dest, src, len);
+			break;
+
+		case XFS_ILOG_ABROOT:
+			dest = XFS_DFORK_APTR(dip);
+			xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
+					 len, (struct xfs_bmdr_block *)dest,
+					 XFS_DFORK_ASIZE(dip, mp));
+			break;
+
+		default:
+			xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
+			ASSERT(0);
+			error = -EFSCORRUPTED;
+			goto out_release;
+		}
+	}
+
+out_owner_change:
+	/* Recover the swapext owner change unless inode has been deleted */
+	if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
+	    (dip->di_mode != 0))
+		error = xfs_recover_inode_owner_change(mp, dip, in_f,
+						       buffer_list);
+	/* re-generate the checksum. */
+	xfs_dinode_calc_crc(log->l_mp, dip);
+
+	ASSERT(bp->b_mount == mp);
+	bp->b_iodone = xlog_recover_iodone;
+	xfs_buf_delwri_queue(bp, buffer_list);
+
+out_release:
+	xfs_buf_relse(bp);
+error:
+	if (need_free)
+		kmem_free(in_f);
+	return error;
+}
+
+const struct xlog_recover_item_ops xlog_inode_item_ops = {
+	.item_type		= XFS_LI_INODE,
+	.ra_pass2		= xlog_recover_inode_ra_pass2,
+	.commit_pass2		= xlog_recover_inode_commit_pass2,
+};
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 309958186d33..a40f88cf3ab7 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1104,26 +1104,17 @@ xfs_fill_fsxattr(
 	bool			attr,
 	struct fsxattr		*fa)
 {
+	struct xfs_ifork	*ifp = attr ? ip->i_afp : &ip->i_df;
+
 	simple_fill_fsxattr(fa, xfs_ip2xflags(ip));
 	fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
 	fa->fsx_cowextsize = ip->i_d.di_cowextsize <<
 			ip->i_mount->m_sb.sb_blocklog;
 	fa->fsx_projid = ip->i_d.di_projid;
-
-	if (attr) {
-		if (ip->i_afp) {
-			if (ip->i_afp->if_flags & XFS_IFEXTENTS)
-				fa->fsx_nextents = xfs_iext_count(ip->i_afp);
-			else
-				fa->fsx_nextents = ip->i_d.di_anextents;
-		} else
-			fa->fsx_nextents = 0;
-	} else {
-		if (ip->i_df.if_flags & XFS_IFEXTENTS)
-			fa->fsx_nextents = xfs_iext_count(&ip->i_df);
-		else
-			fa->fsx_nextents = ip->i_d.di_nextents;
-	}
+	if (ifp && (ifp->if_flags & XFS_IFEXTENTS))
+		fa->fsx_nextents = xfs_iext_count(ifp);
+	else
+		fa->fsx_nextents = xfs_ifork_nextents(ifp);
 }
 
 STATIC int
@@ -1201,37 +1192,6 @@ xfs_flags2diflags2(
 	return di_flags2;
 }
 
-STATIC void
-xfs_diflags_to_linux(
-	struct xfs_inode	*ip)
-{
-	struct inode		*inode = VFS_I(ip);
-	unsigned int		xflags = xfs_ip2xflags(ip);
-
-	if (xflags & FS_XFLAG_IMMUTABLE)
-		inode->i_flags |= S_IMMUTABLE;
-	else
-		inode->i_flags &= ~S_IMMUTABLE;
-	if (xflags & FS_XFLAG_APPEND)
-		inode->i_flags |= S_APPEND;
-	else
-		inode->i_flags &= ~S_APPEND;
-	if (xflags & FS_XFLAG_SYNC)
-		inode->i_flags |= S_SYNC;
-	else
-		inode->i_flags &= ~S_SYNC;
-	if (xflags & FS_XFLAG_NOATIME)
-		inode->i_flags |= S_NOATIME;
-	else
-		inode->i_flags &= ~S_NOATIME;
-#if 0	/* disabled until the flag switching races are sorted out */
-	if (xflags & FS_XFLAG_DAX)
-		inode->i_flags |= S_DAX;
-	else
-		inode->i_flags &= ~S_DAX;
-#endif
-}
-
 static int
 xfs_ioctl_setattr_xflags(
 	struct xfs_trans	*tp,
@@ -1242,7 +1202,7 @@ xfs_ioctl_setattr_xflags(
 	uint64_t		di_flags2;
 
 	/* Can't change realtime flag if any extents are allocated. */
-	if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+	if ((ip->i_df.if_nextents || ip->i_delayed_blks) &&
 	    XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
 		return -EINVAL;
 
@@ -1269,7 +1229,7 @@ xfs_ioctl_setattr_xflags(
 	ip->i_d.di_flags = xfs_flags2diflags(ip, fa->fsx_xflags);
 	ip->i_d.di_flags2 = di_flags2;
 
-	xfs_diflags_to_linux(ip);
+	xfs_diflags_to_iflags(ip, false);
 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	XFS_STATS_INC(mp, xs_ig_attrchg);
@@ -1420,7 +1380,7 @@ xfs_ioctl_setattr_check_extsize(
 	xfs_extlen_t		size;
 	xfs_fsblock_t		extsize_fsb;
 
-	if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents &&
+	if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
 	    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
 		return -EINVAL;
 
@@ -1513,7 +1473,6 @@ xfs_ioctl_setattr(
 	struct fsxattr		old_fa;
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
-	struct xfs_dquot	*udqp = NULL;
 	struct xfs_dquot	*pdqp = NULL;
 	struct xfs_dquot	*olddquot = NULL;
 	int			code;
@@ -1536,7 +1495,7 @@ xfs_ioctl_setattr(
 	if (XFS_IS_QUOTA_ON(mp)) {
 		code = xfs_qm_vop_dqalloc(ip, VFS_I(ip)->i_uid,
 				VFS_I(ip)->i_gid, fa->fsx_projid,
-				XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp);
+				XFS_QMOPT_PQUOTA, NULL, NULL, &pdqp);
 		if (code)
 			return code;
 	}
@@ -1560,7 +1519,7 @@ xfs_ioctl_setattr(
 
 	if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) &&
 	    ip->i_d.di_projid != fa->fsx_projid) {
-		code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp,
+		code = xfs_qm_vop_chown_reserve(tp, ip, NULL, NULL, pdqp,
 				capable(CAP_FOWNER) ?  XFS_QMOPT_FORCE_RES : 0);
 		if (code)	/* out of quota */
 			goto error_trans_cancel;
@@ -1626,7 +1585,6 @@ xfs_ioctl_setattr(
 	 * Release any dquot(s) the inode had kept before chown.
 	 */
 	xfs_qm_dqrele(olddquot);
-	xfs_qm_dqrele(udqp);
 	xfs_qm_dqrele(pdqp);
 
 	return code;
@@ -1634,7 +1592,6 @@ xfs_ioctl_setattr(
 error_trans_cancel:
 	xfs_trans_cancel(tp);
 error_free_dquots:
-	xfs_qm_dqrele(udqp);
 	xfs_qm_dqrele(pdqp);
 	return code;
 }
@@ -2082,6 +2039,41 @@ out:
 	return error;
 }
 
+static inline int
+xfs_fs_eofblocks_from_user(
+	struct xfs_fs_eofblocks		*src,
+	struct xfs_eofblocks		*dst)
+{
+	if (src->eof_version != XFS_EOFBLOCKS_VERSION)
+		return -EINVAL;
+
+	if (src->eof_flags & ~XFS_EOF_FLAGS_VALID)
+		return -EINVAL;
+
+	if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) ||
+	    memchr_inv(src->pad64, 0, sizeof(src->pad64)))
+		return -EINVAL;
+
+	dst->eof_flags = src->eof_flags;
+	dst->eof_prid = src->eof_prid;
+	dst->eof_min_file_size = src->eof_min_file_size;
+
+	dst->eof_uid = INVALID_UID;
+	if (src->eof_flags & XFS_EOF_FLAGS_UID) {
+		dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid);
+		if (!uid_valid(dst->eof_uid))
+			return -EINVAL;
+	}
+
+	dst->eof_gid = INVALID_GID;
+	if (src->eof_flags & XFS_EOF_FLAGS_GID) {
+		dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid);
+		if (!gid_valid(dst->eof_gid))
+			return -EINVAL;
+	}
+	return 0;
+}
+
 /*
  * Note: some of the ioctl's return positive numbers as a
  * byte count indicating success, such as readlink_by_handle.
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index bb590a267a7f..b9a8c3798e08 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -352,22 +352,10 @@ xfs_quota_calc_throttle(
 }
 
 /*
- * If we are doing a write at the end of the file and there are no allocations
- * past this one, then extend the allocation out to the file system's write
- * iosize.
- *
  * If we don't have a user specified preallocation size, dynamically increase
  * the preallocation size as the size of the file grows.  Cap the maximum size
  * at a single extent or less if the filesystem is near full. The closer the
- * filesystem is to full, the smaller the maximum prealocation.
- *
- * As an exception we don't do any preallocation at all if the file is smaller
- * than the minimum preallocation and we are using the default dynamic
- * preallocation scheme, as it is likely this is the only write to the file that
- * is going to be done.
- *
- * We clean up any extra space left over when the file is closed in
- * xfs_inactive().
+ * filesystem is to being full, the smaller the maximum preallocation.
  */
 STATIC xfs_fsblock_t
 xfs_iomap_prealloc_size(
@@ -377,63 +365,70 @@ xfs_iomap_prealloc_size(
 	loff_t			count,
 	struct xfs_iext_cursor	*icur)
 {
+	struct xfs_iext_cursor	ncur = *icur;
+	struct xfs_bmbt_irec	prev, got;
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	struct xfs_bmbt_irec	prev;
-	int			shift = 0;
 	int64_t			freesp;
 	xfs_fsblock_t		qblocks;
-	int			qshift = 0;
 	xfs_fsblock_t		alloc_blocks = 0;
+	xfs_extlen_t		plen;
+	int			shift = 0;
+	int			qshift = 0;
 
-	if (offset + count <= XFS_ISIZE(ip))
-		return 0;
-
-	if (!(mp->m_flags & XFS_MOUNT_ALLOCSIZE) &&
-	    (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks)))
+	/*
+	 * As an exception we don't do any preallocation at all if the file is
+	 * smaller than the minimum preallocation and we are using the default
+	 * dynamic preallocation scheme, as it is likely this is the only write
+	 * to the file that is going to be done.
+	 */
+	if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks))
 		return 0;
 
 	/*
-	 * If an explicit allocsize is set, the file is small, or we
-	 * are writing behind a hole, then use the minimum prealloc:
+	 * Use the minimum preallocation size for small files or if we are
+	 * writing right after a hole.
 	 */
-	if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) ||
-	    XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
-	    !xfs_iext_peek_prev_extent(ifp, icur, &prev) ||
+	if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
+	    !xfs_iext_prev_extent(ifp, &ncur, &prev) ||
 	    prev.br_startoff + prev.br_blockcount < offset_fsb)
 		return mp->m_allocsize_blocks;
 
 	/*
-	 * Determine the initial size of the preallocation. We are beyond the
-	 * current EOF here, but we need to take into account whether this is
-	 * a sparse write or an extending write when determining the
-	 * preallocation size.  Hence we need to look up the extent that ends
-	 * at the current write offset and use the result to determine the
-	 * preallocation size.
-	 *
-	 * If the extent is a hole, then preallocation is essentially disabled.
-	 * Otherwise we take the size of the preceding data extent as the basis
-	 * for the preallocation size. If the size of the extent is greater than
-	 * half the maximum extent length, then use the current offset as the
-	 * basis. This ensures that for large files the preallocation size
-	 * always extends to MAXEXTLEN rather than falling short due to things
-	 * like stripe unit/width alignment of real extents.
+	 * Take the size of the preceding data extents as the basis for the
+	 * preallocation size. Note that we don't care if the previous extents
+	 * are written or not.
 	 */
-	if (prev.br_blockcount <= (MAXEXTLEN >> 1))
-		alloc_blocks = prev.br_blockcount << 1;
-	else
+	plen = prev.br_blockcount;
+	while (xfs_iext_prev_extent(ifp, &ncur, &got)) {
+		if (plen > MAXEXTLEN / 2 ||
+		    isnullstartblock(got.br_startblock) ||
+		    got.br_startoff + got.br_blockcount != prev.br_startoff ||
+		    got.br_startblock + got.br_blockcount != prev.br_startblock)
+			break;
+		plen += got.br_blockcount;
+		prev = got;
+	}
+
+	/*
+	 * If the size of the extents is greater than half the maximum extent
+	 * length, then use the current offset as the basis.  This ensures that
+	 * for large files the preallocation size always extends to MAXEXTLEN
+	 * rather than falling short due to things like stripe unit/width
+	 * alignment of real extents.
+	 */
+	alloc_blocks = plen * 2;
+	if (alloc_blocks > MAXEXTLEN)
 		alloc_blocks = XFS_B_TO_FSB(mp, offset);
-	if (!alloc_blocks)
-		goto check_writeio;
 	qblocks = alloc_blocks;
 
 	/*
 	 * MAXEXTLEN is not a power of two value but we round the prealloc down
 	 * to the nearest power of two value after throttling. To prevent the
-	 * round down from unconditionally reducing the maximum supported prealloc
-	 * size, we round up first, apply appropriate throttling, round down and
-	 * cap the value to MAXEXTLEN.
+	 * round down from unconditionally reducing the maximum supported
+	 * prealloc size, we round up first, apply appropriate throttling,
+	 * round down and cap the value to MAXEXTLEN.
 	 */
 	alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
 				       alloc_blocks);
@@ -494,7 +489,6 @@ xfs_iomap_prealloc_size(
 	 */
 	while (alloc_blocks && alloc_blocks >= freesp)
 		alloc_blocks >>= 4;
-check_writeio:
 	if (alloc_blocks < mp->m_allocsize_blocks)
 		alloc_blocks = mp->m_allocsize_blocks;
 	trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift,
@@ -563,7 +557,7 @@ xfs_iomap_write_unwritten(
 		xfs_trans_ijoin(tp, ip, 0);
 
 		error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
-				XFS_QMOPT_RES_REGBLKS);
+				XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES);
 		if (error)
 			goto error_on_bmapi_transaction;
 
@@ -856,7 +850,7 @@ xfs_buffered_write_iomap_begin(
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 
-	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, XFS_DATA_FORK)) ||
+	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) ||
 	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
 		error = -EFSCORRUPTED;
 		goto out_unlock;
@@ -961,9 +955,16 @@ xfs_buffered_write_iomap_begin(
 	if (error)
 		goto out_unlock;
 
-	if (eof) {
-		prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, offset,
-				count, &icur);
+	if (eof && offset + count > XFS_ISIZE(ip)) {
+		/*
+		 * Determine the initial size of the preallocation.
+		 * We clean up any extra preallocation when the file is closed.
+		 */
+		if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
+			prealloc_blocks = mp->m_allocsize_blocks;
+		else
+			prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork,
+						offset, count, &icur);
 		if (prealloc_blocks) {
 			xfs_extlen_t	align;
 			xfs_off_t	end_offset;
@@ -1258,12 +1259,12 @@ xfs_xattr_iomap_begin(
 	lockmode = xfs_ilock_attr_map_shared(ip);
 
 	/* if there are no attribute fork or extents, return ENOENT */
-	if (!XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
+	if (!XFS_IFORK_Q(ip) || !ip->i_afp->if_nextents) {
 		error = -ENOENT;
 		goto out_unlock;
 	}
 
-	ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
+	ASSERT(ip->i_afp->if_format != XFS_DINODE_FMT_LOCAL);
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 			       &nimaps, XFS_BMAPI_ATTRFORK);
 out_unlock:
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f7a99b3bbcf7..d66528fa3657 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -738,12 +738,7 @@ xfs_setattr_nonsize(
 			if (error)	/* out of quota */
 				goto out_cancel;
 		}
-	}
 
-	/*
-	 * Change file ownership.  Must be the owner or privileged.
-	 */
-	if (mask & (ATTR_UID|ATTR_GID)) {
 		/*
 		 * CAP_FSETID overrides the following restrictions:
 		 *
@@ -877,7 +872,7 @@ xfs_setattr_size(
 	/*
 	 * Short circuit the truncate case for zero length files.
 	 */
-	if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
+	if (newsize == 0 && oldsize == 0 && ip->i_df.if_nextents == 0) {
 		if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME)))
 			return 0;
 
@@ -1243,13 +1238,12 @@ xfs_inode_supports_dax(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 
-	/* Only supported on non-reflinked files. */
-	if (!S_ISREG(VFS_I(ip)->i_mode) || xfs_is_reflink_inode(ip))
+	/* Only supported on regular files. */
+	if (!S_ISREG(VFS_I(ip)->i_mode))
 		return false;
 
-	/* DAX mount option or DAX iflag must be set. */
-	if (!(mp->m_flags & XFS_MOUNT_DAX) &&
-	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
+	/* Only supported on non-reflinked files. */
+	if (xfs_is_reflink_inode(ip))
 		return false;
 
 	/* Block size must match page size */
@@ -1260,26 +1254,51 @@ xfs_inode_supports_dax(
 	return xfs_inode_buftarg(ip)->bt_daxdev != NULL;
 }
 
-STATIC void
+static bool
+xfs_inode_should_enable_dax(
+	struct xfs_inode *ip)
+{
+	if (!IS_ENABLED(CONFIG_FS_DAX))
+		return false;
+	if (ip->i_mount->m_flags & XFS_MOUNT_DAX_NEVER)
+		return false;
+	if (!xfs_inode_supports_dax(ip))
+		return false;
+	if (ip->i_mount->m_flags & XFS_MOUNT_DAX_ALWAYS)
+		return true;
+	if (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+		return true;
+	return false;
+}
+
+void
 xfs_diflags_to_iflags(
-	struct inode		*inode,
-	struct xfs_inode	*ip)
+	struct xfs_inode	*ip,
+	bool init)
 {
-	uint16_t		flags = ip->i_d.di_flags;
-
-	inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
-			    S_NOATIME | S_DAX);
-
-	if (flags & XFS_DIFLAG_IMMUTABLE)
-		inode->i_flags |= S_IMMUTABLE;
-	if (flags & XFS_DIFLAG_APPEND)
-		inode->i_flags |= S_APPEND;
-	if (flags & XFS_DIFLAG_SYNC)
-		inode->i_flags |= S_SYNC;
-	if (flags & XFS_DIFLAG_NOATIME)
-		inode->i_flags |= S_NOATIME;
-	if (xfs_inode_supports_dax(ip))
-		inode->i_flags |= S_DAX;
+	struct inode            *inode = VFS_I(ip);
+	unsigned int            xflags = xfs_ip2xflags(ip);
+	unsigned int            flags = 0;
+
+	ASSERT(!(IS_DAX(inode) && init));
+
+	if (xflags & FS_XFLAG_IMMUTABLE)
+		flags |= S_IMMUTABLE;
+	if (xflags & FS_XFLAG_APPEND)
+		flags |= S_APPEND;
+	if (xflags & FS_XFLAG_SYNC)
+		flags |= S_SYNC;
+	if (xflags & FS_XFLAG_NOATIME)
+		flags |= S_NOATIME;
+	if (init && xfs_inode_should_enable_dax(ip))
+		flags |= S_DAX;
+
+	/*
+	 * S_DAX can only be set during inode initialization and is never set by
+	 * the VFS, so we cannot mask off S_DAX in i_flags.
+	 */
+	inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | S_NOATIME);
+	inode->i_flags |= flags;
 }
 
 /*
@@ -1305,7 +1324,7 @@ xfs_setup_inode(
 	inode_fake_hash(inode);
 
 	i_size_write(inode, ip->i_d.di_size);
-	xfs_diflags_to_iflags(inode, ip);
+	xfs_diflags_to_iflags(ip, true);
 
 	if (S_ISDIR(inode->i_mode)) {
 		/*
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index ff2da28fed90..16ca97a7ff00 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -104,9 +104,9 @@ xfs_bulkstat_one_int(
 
 	buf->bs_xflags = xfs_ip2xflags(ip);
 	buf->bs_extsize_blks = dic->di_extsize;
-	buf->bs_extents = dic->di_nextents;
+	buf->bs_extents = xfs_ifork_nextents(&ip->i_df);
 	xfs_bulkstat_health(ip, buf);
-	buf->bs_aextents = dic->di_anextents;
+	buf->bs_aextents = xfs_ifork_nextents(ip->i_afp);
 	buf->bs_forkoff = XFS_IFORK_BOFF(ip);
 	buf->bs_version = XFS_BULKSTAT_VERSION_V5;
 
@@ -115,7 +115,7 @@ xfs_bulkstat_one_int(
 			buf->bs_cowextsize_blks = dic->di_cowextsize;
 	}
 
-	switch (dic->di_format) {
+	switch (ip->i_df.if_format) {
 	case XFS_DINODE_FMT_DEV:
 		buf->bs_rdev = sysv_encode_dev(inode->i_rdev);
 		buf->bs_blksize = BLKDEV_IOSIZE;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 11c3502b07b1..ec015df55b77 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -18,21 +18,13 @@
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-#include "xfs_extfree_item.h"
 #include "xfs_trans_priv.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
-#include "xfs_quota.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
-#include "xfs_bmap_btree.h"
 #include "xfs_error.h"
-#include "xfs_dir2.h"
-#include "xfs_rmap_item.h"
 #include "xfs_buf_item.h"
-#include "xfs_refcount_item.h"
-#include "xfs_bmap_item.h"
 
 #define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
 
@@ -56,17 +48,6 @@ xlog_do_recovery_pass(
         struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);
 
 /*
- * This structure is used during recovery to record the buf log items which
- * have been canceled and should not be replayed.
- */
-struct xfs_buf_cancel {
-	xfs_daddr_t		bc_blkno;
-	uint			bc_len;
-	int			bc_refcount;
-	struct list_head	bc_list;
-};
-
-/*
  * Sector aligned buffer routines for buffer create/read/write/access
  */
 
@@ -284,7 +265,7 @@ xlog_header_check_mount(
 	return 0;
 }
 
-STATIC void
+void
 xlog_recover_iodone(
 	struct xfs_buf	*bp)
 {
@@ -1779,12 +1760,72 @@ xlog_clear_stale_blocks(
 	return 0;
 }
 
+/*
+ * Release the recovered intent item in the AIL that matches the given intent
+ * type and intent id.
+ */
+void
+xlog_recover_release_intent(
+	struct xlog		*log,
+	unsigned short		intent_type,
+	uint64_t		intent_id)
+{
+	struct xfs_ail_cursor	cur;
+	struct xfs_log_item	*lip;
+	struct xfs_ail		*ailp = log->l_ailp;
+
+	spin_lock(&ailp->ail_lock);
+	for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL;
+	     lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
+		if (lip->li_type != intent_type)
+			continue;
+		if (!lip->li_ops->iop_match(lip, intent_id))
+			continue;
+
+		spin_unlock(&ailp->ail_lock);
+		lip->li_ops->iop_release(lip);
+		spin_lock(&ailp->ail_lock);
+		break;
+	}
+
+	xfs_trans_ail_cursor_done(&cur);
+	spin_unlock(&ailp->ail_lock);
+}
+
 /******************************************************************************
  *
  *		Log recover routines
  *
  ******************************************************************************
  */
+static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = {
+	&xlog_buf_item_ops,
+	&xlog_inode_item_ops,
+	&xlog_dquot_item_ops,
+	&xlog_quotaoff_item_ops,
+	&xlog_icreate_item_ops,
+	&xlog_efi_item_ops,
+	&xlog_efd_item_ops,
+	&xlog_rui_item_ops,
+	&xlog_rud_item_ops,
+	&xlog_cui_item_ops,
+	&xlog_cud_item_ops,
+	&xlog_bui_item_ops,
+	&xlog_bud_item_ops,
+};
+
+static const struct xlog_recover_item_ops *
+xlog_find_item_ops(
+	struct xlog_recover_item		*item)
+{
+	unsigned int				i;
+
+	for (i = 0; i < ARRAY_SIZE(xlog_recover_item_ops); i++)
+		if (ITEM_TYPE(item) == xlog_recover_item_ops[i]->item_type)
+			return xlog_recover_item_ops[i];
+
+	return NULL;
+}
 
 /*
  * Sort the log items in the transaction.
@@ -1841,54 +1882,23 @@ xlog_recover_reorder_trans(
 	struct xlog_recover	*trans,
 	int			pass)
 {
-	xlog_recover_item_t	*item, *n;
+	struct xlog_recover_item *item, *n;
 	int			error = 0;
 	LIST_HEAD(sort_list);
 	LIST_HEAD(cancel_list);
 	LIST_HEAD(buffer_list);
 	LIST_HEAD(inode_buffer_list);
-	LIST_HEAD(inode_list);
+	LIST_HEAD(item_list);
 
 	list_splice_init(&trans->r_itemq, &sort_list);
 	list_for_each_entry_safe(item, n, &sort_list, ri_list) {
-		xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
+		enum xlog_recover_reorder	fate = XLOG_REORDER_ITEM_LIST;
 
-		switch (ITEM_TYPE(item)) {
-		case XFS_LI_ICREATE:
-			list_move_tail(&item->ri_list, &buffer_list);
-			break;
-		case XFS_LI_BUF:
-			if (buf_f->blf_flags & XFS_BLF_CANCEL) {
-				trace_xfs_log_recover_item_reorder_head(log,
-							trans, item, pass);
-				list_move(&item->ri_list, &cancel_list);
-				break;
-			}
-			if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
-				list_move(&item->ri_list, &inode_buffer_list);
-				break;
-			}
-			list_move_tail(&item->ri_list, &buffer_list);
-			break;
-		case XFS_LI_INODE:
-		case XFS_LI_DQUOT:
-		case XFS_LI_QUOTAOFF:
-		case XFS_LI_EFD:
-		case XFS_LI_EFI:
-		case XFS_LI_RUI:
-		case XFS_LI_RUD:
-		case XFS_LI_CUI:
-		case XFS_LI_CUD:
-		case XFS_LI_BUI:
-		case XFS_LI_BUD:
-			trace_xfs_log_recover_item_reorder_tail(log,
-							trans, item, pass);
-			list_move_tail(&item->ri_list, &inode_list);
-			break;
-		default:
+		item->ri_ops = xlog_find_item_ops(item);
+		if (!item->ri_ops) {
 			xfs_warn(log->l_mp,
-				"%s: unrecognized type of log operation",
-				__func__);
+				"%s: unrecognized type of log operation (%d)",
+				__func__, ITEM_TYPE(item));
 			ASSERT(0);
 			/*
 			 * return the remaining items back to the transaction
@@ -1896,16 +1906,38 @@ xlog_recover_reorder_trans(
 			 */
 			if (!list_empty(&sort_list))
 				list_splice_init(&sort_list, &trans->r_itemq);
-			error = -EIO;
-			goto out;
+			error = -EFSCORRUPTED;
+			break;
+		}
+
+		if (item->ri_ops->reorder)
+			fate = item->ri_ops->reorder(item);
+
+		switch (fate) {
+		case XLOG_REORDER_BUFFER_LIST:
+			list_move_tail(&item->ri_list, &buffer_list);
+			break;
+		case XLOG_REORDER_CANCEL_LIST:
+			trace_xfs_log_recover_item_reorder_head(log,
+					trans, item, pass);
+			list_move(&item->ri_list, &cancel_list);
+			break;
+		case XLOG_REORDER_INODE_BUFFER_LIST:
+			list_move(&item->ri_list, &inode_buffer_list);
+			break;
+		case XLOG_REORDER_ITEM_LIST:
+			trace_xfs_log_recover_item_reorder_tail(log,
+							trans, item, pass);
+			list_move_tail(&item->ri_list, &item_list);
+			break;
 		}
 	}
-out:
+
 	ASSERT(list_empty(&sort_list));
 	if (!list_empty(&buffer_list))
 		list_splice(&buffer_list, &trans->r_itemq);
-	if (!list_empty(&inode_list))
-		list_splice_tail(&inode_list, &trans->r_itemq);
+	if (!list_empty(&item_list))
+		list_splice_tail(&item_list, &trans->r_itemq);
 	if (!list_empty(&inode_buffer_list))
 		list_splice_tail(&inode_buffer_list, &trans->r_itemq);
 	if (!list_empty(&cancel_list))
@@ -1913,2152 +1945,15 @@ out:
 	return error;
 }
 
-/*
- * Build up the table of buf cancel records so that we don't replay
- * cancelled data in the second pass.  For buffer records that are
- * not cancel records, there is nothing to do here so we just return.
- *
- * If we get a cancel record which is already in the table, this indicates
- * that the buffer was cancelled multiple times.  In order to ensure
- * that during pass 2 we keep the record in the table until we reach its
- * last occurrence in the log, we keep a reference count in the cancel
- * record in the table to tell us how many times we expect to see this
- * record during the second pass.
- */
-STATIC int
-xlog_recover_buffer_pass1(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
-	struct list_head	*bucket;
-	struct xfs_buf_cancel	*bcp;
-
-	if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) {
-		xfs_err(log->l_mp, "bad buffer log item size (%d)",
-				item->ri_buf[0].i_len);
-		return -EFSCORRUPTED;
-	}
-
-	/*
-	 * If this isn't a cancel buffer item, then just return.
-	 */
-	if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
-		trace_xfs_log_recover_buf_not_cancel(log, buf_f);
-		return 0;
-	}
-
-	/*
-	 * Insert an xfs_buf_cancel record into the hash table of them.
-	 * If there is already an identical record, bump its reference count.
-	 */
-	bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno);
-	list_for_each_entry(bcp, bucket, bc_list) {
-		if (bcp->bc_blkno == buf_f->blf_blkno &&
-		    bcp->bc_len == buf_f->blf_len) {
-			bcp->bc_refcount++;
-			trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
-			return 0;
-		}
-	}
-
-	bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0);
-	bcp->bc_blkno = buf_f->blf_blkno;
-	bcp->bc_len = buf_f->blf_len;
-	bcp->bc_refcount = 1;
-	list_add_tail(&bcp->bc_list, bucket);
-
-	trace_xfs_log_recover_buf_cancel_add(log, buf_f);
-	return 0;
-}
-
-/*
- * Check to see whether the buffer being recovered has a corresponding
- * entry in the buffer cancel record table. If it is, return the cancel
- * buffer structure to the caller.
- */
-STATIC struct xfs_buf_cancel *
-xlog_peek_buffer_cancelled(
-	struct xlog		*log,
-	xfs_daddr_t		blkno,
-	uint			len,
-	unsigned short			flags)
-{
-	struct list_head	*bucket;
-	struct xfs_buf_cancel	*bcp;
-
-	if (!log->l_buf_cancel_table) {
-		/* empty table means no cancelled buffers in the log */
-		ASSERT(!(flags & XFS_BLF_CANCEL));
-		return NULL;
-	}
-
-	bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
-	list_for_each_entry(bcp, bucket, bc_list) {
-		if (bcp->bc_blkno == blkno && bcp->bc_len == len)
-			return bcp;
-	}
-
-	/*
-	 * We didn't find a corresponding entry in the table, so return 0 so
-	 * that the buffer is NOT cancelled.
-	 */
-	ASSERT(!(flags & XFS_BLF_CANCEL));
-	return NULL;
-}
-
-/*
- * If the buffer is being cancelled then return 1 so that it will be cancelled,
- * otherwise return 0.  If the buffer is actually a buffer cancel item
- * (XFS_BLF_CANCEL is set), then decrement the refcount on the entry in the
- * table and remove it from the table if this is the last reference.
- *
- * We remove the cancel record from the table when we encounter its last
- * occurrence in the log so that if the same buffer is re-used again after its
- * last cancellation we actually replay the changes made at that point.
- */
-STATIC int
-xlog_check_buffer_cancelled(
+void
+xlog_buf_readahead(
 	struct xlog		*log,
 	xfs_daddr_t		blkno,
 	uint			len,
-	unsigned short			flags)
-{
-	struct xfs_buf_cancel	*bcp;
-
-	bcp = xlog_peek_buffer_cancelled(log, blkno, len, flags);
-	if (!bcp)
-		return 0;
-
-	/*
-	 * We've go a match, so return 1 so that the recovery of this buffer
-	 * is cancelled.  If this buffer is actually a buffer cancel log
-	 * item, then decrement the refcount on the one in the table and
-	 * remove it if this is the last reference.
-	 */
-	if (flags & XFS_BLF_CANCEL) {
-		if (--bcp->bc_refcount == 0) {
-			list_del(&bcp->bc_list);
-			kmem_free(bcp);
-		}
-	}
-	return 1;
-}
-
-/*
- * Perform recovery for a buffer full of inodes.  In these buffers, the only
- * data which should be recovered is that which corresponds to the
- * di_next_unlinked pointers in the on disk inode structures.  The rest of the
- * data for the inodes is always logged through the inodes themselves rather
- * than the inode buffer and is recovered in xlog_recover_inode_pass2().
- *
- * The only time when buffers full of inodes are fully recovered is when the
- * buffer is full of newly allocated inodes.  In this case the buffer will
- * not be marked as an inode buffer and so will be sent to
- * xlog_recover_do_reg_buffer() below during recovery.
- */
-STATIC int
-xlog_recover_do_inode_buffer(
-	struct xfs_mount	*mp,
-	xlog_recover_item_t	*item,
-	struct xfs_buf		*bp,
-	xfs_buf_log_format_t	*buf_f)
-{
-	int			i;
-	int			item_index = 0;
-	int			bit = 0;
-	int			nbits = 0;
-	int			reg_buf_offset = 0;
-	int			reg_buf_bytes = 0;
-	int			next_unlinked_offset;
-	int			inodes_per_buf;
-	xfs_agino_t		*logged_nextp;
-	xfs_agino_t		*buffer_nextp;
-
-	trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
-
-	/*
-	 * Post recovery validation only works properly on CRC enabled
-	 * filesystems.
-	 */
-	if (xfs_sb_version_hascrc(&mp->m_sb))
-		bp->b_ops = &xfs_inode_buf_ops;
-
-	inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
-	for (i = 0; i < inodes_per_buf; i++) {
-		next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
-			offsetof(xfs_dinode_t, di_next_unlinked);
-
-		while (next_unlinked_offset >=
-		       (reg_buf_offset + reg_buf_bytes)) {
-			/*
-			 * The next di_next_unlinked field is beyond
-			 * the current logged region.  Find the next
-			 * logged region that contains or is beyond
-			 * the current di_next_unlinked field.
-			 */
-			bit += nbits;
-			bit = xfs_next_bit(buf_f->blf_data_map,
-					   buf_f->blf_map_size, bit);
-
-			/*
-			 * If there are no more logged regions in the
-			 * buffer, then we're done.
-			 */
-			if (bit == -1)
-				return 0;
-
-			nbits = xfs_contig_bits(buf_f->blf_data_map,
-						buf_f->blf_map_size, bit);
-			ASSERT(nbits > 0);
-			reg_buf_offset = bit << XFS_BLF_SHIFT;
-			reg_buf_bytes = nbits << XFS_BLF_SHIFT;
-			item_index++;
-		}
-
-		/*
-		 * If the current logged region starts after the current
-		 * di_next_unlinked field, then move on to the next
-		 * di_next_unlinked field.
-		 */
-		if (next_unlinked_offset < reg_buf_offset)
-			continue;
-
-		ASSERT(item->ri_buf[item_index].i_addr != NULL);
-		ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
-		ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
-
-		/*
-		 * The current logged region contains a copy of the
-		 * current di_next_unlinked field.  Extract its value
-		 * and copy it to the buffer copy.
-		 */
-		logged_nextp = item->ri_buf[item_index].i_addr +
-				next_unlinked_offset - reg_buf_offset;
-		if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
-			xfs_alert(mp,
-		"Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
-		"Trying to replay bad (0) inode di_next_unlinked field.",
-				item, bp);
-			return -EFSCORRUPTED;
-		}
-
-		buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
-		*buffer_nextp = *logged_nextp;
-
-		/*
-		 * If necessary, recalculate the CRC in the on-disk inode. We
-		 * have to leave the inode in a consistent state for whoever
-		 * reads it next....
-		 */
-		xfs_dinode_calc_crc(mp,
-				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
-
-	}
-
-	return 0;
-}
-
-/*
- * V5 filesystems know the age of the buffer on disk being recovered. We can
- * have newer objects on disk than we are replaying, and so for these cases we
- * don't want to replay the current change as that will make the buffer contents
- * temporarily invalid on disk.
- *
- * The magic number might not match the buffer type we are going to recover
- * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags.  Hence
- * extract the LSN of the existing object in the buffer based on it's current
- * magic number.  If we don't recognise the magic number in the buffer, then
- * return a LSN of -1 so that the caller knows it was an unrecognised block and
- * so can recover the buffer.
- *
- * Note: we cannot rely solely on magic number matches to determine that the
- * buffer has a valid LSN - we also need to verify that it belongs to this
- * filesystem, so we need to extract the object's LSN and compare it to that
- * which we read from the superblock. If the UUIDs don't match, then we've got a
- * stale metadata block from an old filesystem instance that we need to recover
- * over the top of.
- */
-static xfs_lsn_t
-xlog_recover_get_buf_lsn(
-	struct xfs_mount	*mp,
-	struct xfs_buf		*bp)
-{
-	uint32_t		magic32;
-	uint16_t		magic16;
-	uint16_t		magicda;
-	void			*blk = bp->b_addr;
-	uuid_t			*uuid;
-	xfs_lsn_t		lsn = -1;
-
-	/* v4 filesystems always recover immediately */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		goto recover_immediately;
-
-	magic32 = be32_to_cpu(*(__be32 *)blk);
-	switch (magic32) {
-	case XFS_ABTB_CRC_MAGIC:
-	case XFS_ABTC_CRC_MAGIC:
-	case XFS_ABTB_MAGIC:
-	case XFS_ABTC_MAGIC:
-	case XFS_RMAP_CRC_MAGIC:
-	case XFS_REFC_CRC_MAGIC:
-	case XFS_IBT_CRC_MAGIC:
-	case XFS_IBT_MAGIC: {
-		struct xfs_btree_block *btb = blk;
-
-		lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
-		uuid = &btb->bb_u.s.bb_uuid;
-		break;
-	}
-	case XFS_BMAP_CRC_MAGIC:
-	case XFS_BMAP_MAGIC: {
-		struct xfs_btree_block *btb = blk;
-
-		lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
-		uuid = &btb->bb_u.l.bb_uuid;
-		break;
-	}
-	case XFS_AGF_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
-		uuid = &((struct xfs_agf *)blk)->agf_uuid;
-		break;
-	case XFS_AGFL_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
-		uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
-		break;
-	case XFS_AGI_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
-		uuid = &((struct xfs_agi *)blk)->agi_uuid;
-		break;
-	case XFS_SYMLINK_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
-		uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
-		break;
-	case XFS_DIR3_BLOCK_MAGIC:
-	case XFS_DIR3_DATA_MAGIC:
-	case XFS_DIR3_FREE_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
-		uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
-		break;
-	case XFS_ATTR3_RMT_MAGIC:
-		/*
-		 * Remote attr blocks are written synchronously, rather than
-		 * being logged. That means they do not contain a valid LSN
-		 * (i.e. transactionally ordered) in them, and hence any time we
-		 * see a buffer to replay over the top of a remote attribute
-		 * block we should simply do so.
-		 */
-		goto recover_immediately;
-	case XFS_SB_MAGIC:
-		/*
-		 * superblock uuids are magic. We may or may not have a
-		 * sb_meta_uuid on disk, but it will be set in the in-core
-		 * superblock. We set the uuid pointer for verification
-		 * according to the superblock feature mask to ensure we check
-		 * the relevant UUID in the superblock.
-		 */
-		lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
-		if (xfs_sb_version_hasmetauuid(&mp->m_sb))
-			uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
-		else
-			uuid = &((struct xfs_dsb *)blk)->sb_uuid;
-		break;
-	default:
-		break;
-	}
-
-	if (lsn != (xfs_lsn_t)-1) {
-		if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
-			goto recover_immediately;
-		return lsn;
-	}
-
-	magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
-	switch (magicda) {
-	case XFS_DIR3_LEAF1_MAGIC:
-	case XFS_DIR3_LEAFN_MAGIC:
-	case XFS_DA3_NODE_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
-		uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
-		break;
-	default:
-		break;
-	}
-
-	if (lsn != (xfs_lsn_t)-1) {
-		if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
-			goto recover_immediately;
-		return lsn;
-	}
-
-	/*
-	 * We do individual object checks on dquot and inode buffers as they
-	 * have their own individual LSN records. Also, we could have a stale
-	 * buffer here, so we have to at least recognise these buffer types.
-	 *
-	 * A notd complexity here is inode unlinked list processing - it logs
-	 * the inode directly in the buffer, but we don't know which inodes have
-	 * been modified, and there is no global buffer LSN. Hence we need to
-	 * recover all inode buffer types immediately. This problem will be
-	 * fixed by logical logging of the unlinked list modifications.
-	 */
-	magic16 = be16_to_cpu(*(__be16 *)blk);
-	switch (magic16) {
-	case XFS_DQUOT_MAGIC:
-	case XFS_DINODE_MAGIC:
-		goto recover_immediately;
-	default:
-		break;
-	}
-
-	/* unknown buffer contents, recover immediately */
-
-recover_immediately:
-	return (xfs_lsn_t)-1;
-
-}
-
-/*
- * Validate the recovered buffer is of the correct type and attach the
- * appropriate buffer operations to them for writeback. Magic numbers are in a
- * few places:
- *	the first 16 bits of the buffer (inode buffer, dquot buffer),
- *	the first 32 bits of the buffer (most blocks),
- *	inside a struct xfs_da_blkinfo at the start of the buffer.
- */
-static void
-xlog_recover_validate_buf_type(
-	struct xfs_mount	*mp,
-	struct xfs_buf		*bp,
-	xfs_buf_log_format_t	*buf_f,
-	xfs_lsn_t		current_lsn)
-{
-	struct xfs_da_blkinfo	*info = bp->b_addr;
-	uint32_t		magic32;
-	uint16_t		magic16;
-	uint16_t		magicda;
-	char			*warnmsg = NULL;
-
-	/*
-	 * We can only do post recovery validation on items on CRC enabled
-	 * fielsystems as we need to know when the buffer was written to be able
-	 * to determine if we should have replayed the item. If we replay old
-	 * metadata over a newer buffer, then it will enter a temporarily
-	 * inconsistent state resulting in verification failures. Hence for now
-	 * just avoid the verification stage for non-crc filesystems
-	 */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		return;
-
-	magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
-	magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
-	magicda = be16_to_cpu(info->magic);
-	switch (xfs_blft_from_flags(buf_f)) {
-	case XFS_BLFT_BTREE_BUF:
-		switch (magic32) {
-		case XFS_ABTB_CRC_MAGIC:
-		case XFS_ABTB_MAGIC:
-			bp->b_ops = &xfs_bnobt_buf_ops;
-			break;
-		case XFS_ABTC_CRC_MAGIC:
-		case XFS_ABTC_MAGIC:
-			bp->b_ops = &xfs_cntbt_buf_ops;
-			break;
-		case XFS_IBT_CRC_MAGIC:
-		case XFS_IBT_MAGIC:
-			bp->b_ops = &xfs_inobt_buf_ops;
-			break;
-		case XFS_FIBT_CRC_MAGIC:
-		case XFS_FIBT_MAGIC:
-			bp->b_ops = &xfs_finobt_buf_ops;
-			break;
-		case XFS_BMAP_CRC_MAGIC:
-		case XFS_BMAP_MAGIC:
-			bp->b_ops = &xfs_bmbt_buf_ops;
-			break;
-		case XFS_RMAP_CRC_MAGIC:
-			bp->b_ops = &xfs_rmapbt_buf_ops;
-			break;
-		case XFS_REFC_CRC_MAGIC:
-			bp->b_ops = &xfs_refcountbt_buf_ops;
-			break;
-		default:
-			warnmsg = "Bad btree block magic!";
-			break;
-		}
-		break;
-	case XFS_BLFT_AGF_BUF:
-		if (magic32 != XFS_AGF_MAGIC) {
-			warnmsg = "Bad AGF block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_agf_buf_ops;
-		break;
-	case XFS_BLFT_AGFL_BUF:
-		if (magic32 != XFS_AGFL_MAGIC) {
-			warnmsg = "Bad AGFL block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_agfl_buf_ops;
-		break;
-	case XFS_BLFT_AGI_BUF:
-		if (magic32 != XFS_AGI_MAGIC) {
-			warnmsg = "Bad AGI block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_agi_buf_ops;
-		break;
-	case XFS_BLFT_UDQUOT_BUF:
-	case XFS_BLFT_PDQUOT_BUF:
-	case XFS_BLFT_GDQUOT_BUF:
-#ifdef CONFIG_XFS_QUOTA
-		if (magic16 != XFS_DQUOT_MAGIC) {
-			warnmsg = "Bad DQUOT block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dquot_buf_ops;
-#else
-		xfs_alert(mp,
-	"Trying to recover dquots without QUOTA support built in!");
-		ASSERT(0);
-#endif
-		break;
-	case XFS_BLFT_DINO_BUF:
-		if (magic16 != XFS_DINODE_MAGIC) {
-			warnmsg = "Bad INODE block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_inode_buf_ops;
-		break;
-	case XFS_BLFT_SYMLINK_BUF:
-		if (magic32 != XFS_SYMLINK_MAGIC) {
-			warnmsg = "Bad symlink block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_symlink_buf_ops;
-		break;
-	case XFS_BLFT_DIR_BLOCK_BUF:
-		if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
-		    magic32 != XFS_DIR3_BLOCK_MAGIC) {
-			warnmsg = "Bad dir block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dir3_block_buf_ops;
-		break;
-	case XFS_BLFT_DIR_DATA_BUF:
-		if (magic32 != XFS_DIR2_DATA_MAGIC &&
-		    magic32 != XFS_DIR3_DATA_MAGIC) {
-			warnmsg = "Bad dir data magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dir3_data_buf_ops;
-		break;
-	case XFS_BLFT_DIR_FREE_BUF:
-		if (magic32 != XFS_DIR2_FREE_MAGIC &&
-		    magic32 != XFS_DIR3_FREE_MAGIC) {
-			warnmsg = "Bad dir3 free magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dir3_free_buf_ops;
-		break;
-	case XFS_BLFT_DIR_LEAF1_BUF:
-		if (magicda != XFS_DIR2_LEAF1_MAGIC &&
-		    magicda != XFS_DIR3_LEAF1_MAGIC) {
-			warnmsg = "Bad dir leaf1 magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
-		break;
-	case XFS_BLFT_DIR_LEAFN_BUF:
-		if (magicda != XFS_DIR2_LEAFN_MAGIC &&
-		    magicda != XFS_DIR3_LEAFN_MAGIC) {
-			warnmsg = "Bad dir leafn magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dir3_leafn_buf_ops;
-		break;
-	case XFS_BLFT_DA_NODE_BUF:
-		if (magicda != XFS_DA_NODE_MAGIC &&
-		    magicda != XFS_DA3_NODE_MAGIC) {
-			warnmsg = "Bad da node magic!";
-			break;
-		}
-		bp->b_ops = &xfs_da3_node_buf_ops;
-		break;
-	case XFS_BLFT_ATTR_LEAF_BUF:
-		if (magicda != XFS_ATTR_LEAF_MAGIC &&
-		    magicda != XFS_ATTR3_LEAF_MAGIC) {
-			warnmsg = "Bad attr leaf magic!";
-			break;
-		}
-		bp->b_ops = &xfs_attr3_leaf_buf_ops;
-		break;
-	case XFS_BLFT_ATTR_RMT_BUF:
-		if (magic32 != XFS_ATTR3_RMT_MAGIC) {
-			warnmsg = "Bad attr remote magic!";
-			break;
-		}
-		bp->b_ops = &xfs_attr3_rmt_buf_ops;
-		break;
-	case XFS_BLFT_SB_BUF:
-		if (magic32 != XFS_SB_MAGIC) {
-			warnmsg = "Bad SB block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_sb_buf_ops;
-		break;
-#ifdef CONFIG_XFS_RT
-	case XFS_BLFT_RTBITMAP_BUF:
-	case XFS_BLFT_RTSUMMARY_BUF:
-		/* no magic numbers for verification of RT buffers */
-		bp->b_ops = &xfs_rtbuf_ops;
-		break;
-#endif /* CONFIG_XFS_RT */
-	default:
-		xfs_warn(mp, "Unknown buffer type %d!",
-			 xfs_blft_from_flags(buf_f));
-		break;
-	}
-
-	/*
-	 * Nothing else to do in the case of a NULL current LSN as this means
-	 * the buffer is more recent than the change in the log and will be
-	 * skipped.
-	 */
-	if (current_lsn == NULLCOMMITLSN)
-		return;
-
-	if (warnmsg) {
-		xfs_warn(mp, warnmsg);
-		ASSERT(0);
-	}
-
-	/*
-	 * We must update the metadata LSN of the buffer as it is written out to
-	 * ensure that older transactions never replay over this one and corrupt
-	 * the buffer. This can occur if log recovery is interrupted at some
-	 * point after the current transaction completes, at which point a
-	 * subsequent mount starts recovery from the beginning.
-	 *
-	 * Write verifiers update the metadata LSN from log items attached to
-	 * the buffer. Therefore, initialize a bli purely to carry the LSN to
-	 * the verifier. We'll clean it up in our ->iodone() callback.
-	 */
-	if (bp->b_ops) {
-		struct xfs_buf_log_item	*bip;
-
-		ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
-		bp->b_iodone = xlog_recover_iodone;
-		xfs_buf_item_init(bp, mp);
-		bip = bp->b_log_item;
-		bip->bli_item.li_lsn = current_lsn;
-	}
-}
-
-/*
- * Perform a 'normal' buffer recovery.  Each logged region of the
- * buffer should be copied over the corresponding region in the
- * given buffer.  The bitmap in the buf log format structure indicates
- * where to place the logged data.
- */
-STATIC void
-xlog_recover_do_reg_buffer(
-	struct xfs_mount	*mp,
-	xlog_recover_item_t	*item,
-	struct xfs_buf		*bp,
-	xfs_buf_log_format_t	*buf_f,
-	xfs_lsn_t		current_lsn)
-{
-	int			i;
-	int			bit;
-	int			nbits;
-	xfs_failaddr_t		fa;
-	const size_t		size_disk_dquot = sizeof(struct xfs_disk_dquot);
-
-	trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
-
-	bit = 0;
-	i = 1;  /* 0 is the buf format structure */
-	while (1) {
-		bit = xfs_next_bit(buf_f->blf_data_map,
-				   buf_f->blf_map_size, bit);
-		if (bit == -1)
-			break;
-		nbits = xfs_contig_bits(buf_f->blf_data_map,
-					buf_f->blf_map_size, bit);
-		ASSERT(nbits > 0);
-		ASSERT(item->ri_buf[i].i_addr != NULL);
-		ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
-		ASSERT(BBTOB(bp->b_length) >=
-		       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
-
-		/*
-		 * The dirty regions logged in the buffer, even though
-		 * contiguous, may span multiple chunks. This is because the
-		 * dirty region may span a physical page boundary in a buffer
-		 * and hence be split into two separate vectors for writing into
-		 * the log. Hence we need to trim nbits back to the length of
-		 * the current region being copied out of the log.
-		 */
-		if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
-			nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
-
-		/*
-		 * Do a sanity check if this is a dquot buffer. Just checking
-		 * the first dquot in the buffer should do. XXXThis is
-		 * probably a good thing to do for other buf types also.
-		 */
-		fa = NULL;
-		if (buf_f->blf_flags &
-		   (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
-			if (item->ri_buf[i].i_addr == NULL) {
-				xfs_alert(mp,
-					"XFS: NULL dquot in %s.", __func__);
-				goto next;
-			}
-			if (item->ri_buf[i].i_len < size_disk_dquot) {
-				xfs_alert(mp,
-					"XFS: dquot too small (%d) in %s.",
-					item->ri_buf[i].i_len, __func__);
-				goto next;
-			}
-			fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
-					       -1, 0);
-			if (fa) {
-				xfs_alert(mp,
-	"dquot corrupt at %pS trying to replay into block 0x%llx",
-					fa, bp->b_bn);
-				goto next;
-			}
-		}
-
-		memcpy(xfs_buf_offset(bp,
-			(uint)bit << XFS_BLF_SHIFT),	/* dest */
-			item->ri_buf[i].i_addr,		/* source */
-			nbits<<XFS_BLF_SHIFT);		/* length */
- next:
-		i++;
-		bit += nbits;
-	}
-
-	/* Shouldn't be any more regions */
-	ASSERT(i == item->ri_total);
-
-	xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
-}
-
-/*
- * Perform a dquot buffer recovery.
- * Simple algorithm: if we have found a QUOTAOFF log item of the same type
- * (ie. USR or GRP), then just toss this buffer away; don't recover it.
- * Else, treat it as a regular buffer and do recovery.
- *
- * Return false if the buffer was tossed and true if we recovered the buffer to
- * indicate to the caller if the buffer needs writing.
- */
-STATIC bool
-xlog_recover_do_dquot_buffer(
-	struct xfs_mount		*mp,
-	struct xlog			*log,
-	struct xlog_recover_item	*item,
-	struct xfs_buf			*bp,
-	struct xfs_buf_log_format	*buf_f)
-{
-	uint			type;
-
-	trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
-
-	/*
-	 * Filesystems are required to send in quota flags at mount time.
-	 */
-	if (!mp->m_qflags)
-		return false;
-
-	type = 0;
-	if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
-		type |= XFS_DQ_USER;
-	if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
-		type |= XFS_DQ_PROJ;
-	if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
-		type |= XFS_DQ_GROUP;
-	/*
-	 * This type of quotas was turned off, so ignore this buffer
-	 */
-	if (log->l_quotaoffs_flag & type)
-		return false;
-
-	xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
-	return true;
-}
-
-/*
- * This routine replays a modification made to a buffer at runtime.
- * There are actually two types of buffer, regular and inode, which
- * are handled differently.  Inode buffers are handled differently
- * in that we only recover a specific set of data from them, namely
- * the inode di_next_unlinked fields.  This is because all other inode
- * data is actually logged via inode records and any data we replay
- * here which overlaps that may be stale.
- *
- * When meta-data buffers are freed at run time we log a buffer item
- * with the XFS_BLF_CANCEL bit set to indicate that previous copies
- * of the buffer in the log should not be replayed at recovery time.
- * This is so that if the blocks covered by the buffer are reused for
- * file data before we crash we don't end up replaying old, freed
- * meta-data into a user's file.
- *
- * To handle the cancellation of buffer log items, we make two passes
- * over the log during recovery.  During the first we build a table of
- * those buffers which have been cancelled, and during the second we
- * only replay those buffers which do not have corresponding cancel
- * records in the table.  See xlog_recover_buffer_pass[1,2] above
- * for more details on the implementation of the table of cancel records.
- */
-STATIC int
-xlog_recover_buffer_pass2(
-	struct xlog			*log,
-	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			current_lsn)
-{
-	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
-	xfs_mount_t		*mp = log->l_mp;
-	xfs_buf_t		*bp;
-	int			error;
-	uint			buf_flags;
-	xfs_lsn_t		lsn;
-
-	/*
-	 * In this pass we only want to recover all the buffers which have
-	 * not been cancelled and are not cancellation buffers themselves.
-	 */
-	if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno,
-			buf_f->blf_len, buf_f->blf_flags)) {
-		trace_xfs_log_recover_buf_cancel(log, buf_f);
-		return 0;
-	}
-
-	trace_xfs_log_recover_buf_recover(log, buf_f);
-
-	buf_flags = 0;
-	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
-		buf_flags |= XBF_UNMAPPED;
-
-	error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
-			  buf_flags, &bp, NULL);
-	if (error)
-		return error;
-
-	/*
-	 * Recover the buffer only if we get an LSN from it and it's less than
-	 * the lsn of the transaction we are replaying.
-	 *
-	 * Note that we have to be extremely careful of readahead here.
-	 * Readahead does not attach verfiers to the buffers so if we don't
-	 * actually do any replay after readahead because of the LSN we found
-	 * in the buffer if more recent than that current transaction then we
-	 * need to attach the verifier directly. Failure to do so can lead to
-	 * future recovery actions (e.g. EFI and unlinked list recovery) can
-	 * operate on the buffers and they won't get the verifier attached. This
-	 * can lead to blocks on disk having the correct content but a stale
-	 * CRC.
-	 *
-	 * It is safe to assume these clean buffers are currently up to date.
-	 * If the buffer is dirtied by a later transaction being replayed, then
-	 * the verifier will be reset to match whatever recover turns that
-	 * buffer into.
-	 */
-	lsn = xlog_recover_get_buf_lsn(mp, bp);
-	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
-		trace_xfs_log_recover_buf_skip(log, buf_f);
-		xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
-		goto out_release;
-	}
-
-	if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
-		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
-		if (error)
-			goto out_release;
-	} else if (buf_f->blf_flags &
-		  (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
-		bool	dirty;
-
-		dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
-		if (!dirty)
-			goto out_release;
-	} else {
-		xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
-	}
-
-	/*
-	 * Perform delayed write on the buffer.  Asynchronous writes will be
-	 * slower when taking into account all the buffers to be flushed.
-	 *
-	 * Also make sure that only inode buffers with good sizes stay in
-	 * the buffer cache.  The kernel moves inodes in buffers of 1 block
-	 * or inode_cluster_size bytes, whichever is bigger.  The inode
-	 * buffers in the log can be a different size if the log was generated
-	 * by an older kernel using unclustered inode buffers or a newer kernel
-	 * running with a different inode cluster size.  Regardless, if the
-	 * the inode buffer size isn't max(blocksize, inode_cluster_size)
-	 * for *our* value of inode_cluster_size, then we need to keep
-	 * the buffer out of the buffer cache so that the buffer won't
-	 * overlap with future reads of those inodes.
-	 */
-	if (XFS_DINODE_MAGIC ==
-	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
-	    (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
-		xfs_buf_stale(bp);
-		error = xfs_bwrite(bp);
-	} else {
-		ASSERT(bp->b_mount == mp);
-		bp->b_iodone = xlog_recover_iodone;
-		xfs_buf_delwri_queue(bp, buffer_list);
-	}
-
-out_release:
-	xfs_buf_relse(bp);
-	return error;
-}
-
-/*
- * Inode fork owner changes
- *
- * If we have been told that we have to reparent the inode fork, it's because an
- * extent swap operation on a CRC enabled filesystem has been done and we are
- * replaying it. We need to walk the BMBT of the appropriate fork and change the
- * owners of it.
- *
- * The complexity here is that we don't have an inode context to work with, so
- * after we've replayed the inode we need to instantiate one.  This is where the
- * fun begins.
- *
- * We are in the middle of log recovery, so we can't run transactions. That
- * means we cannot use cache coherent inode instantiation via xfs_iget(), as
- * that will result in the corresponding iput() running the inode through
- * xfs_inactive(). If we've just replayed an inode core that changes the link
- * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
- * transactions (bad!).
- *
- * So, to avoid this, we instantiate an inode directly from the inode core we've
- * just recovered. We have the buffer still locked, and all we really need to
- * instantiate is the inode core and the forks being modified. We can do this
- * manually, then run the inode btree owner change, and then tear down the
- * xfs_inode without having to run any transactions at all.
- *
- * Also, because we don't have a transaction context available here but need to
- * gather all the buffers we modify for writeback so we pass the buffer_list
- * instead for the operation to use.
- */
-
-STATIC int
-xfs_recover_inode_owner_change(
-	struct xfs_mount	*mp,
-	struct xfs_dinode	*dip,
-	struct xfs_inode_log_format *in_f,
-	struct list_head	*buffer_list)
+	const struct xfs_buf_ops *ops)
 {
-	struct xfs_inode	*ip;
-	int			error;
-
-	ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
-
-	ip = xfs_inode_alloc(mp, in_f->ilf_ino);
-	if (!ip)
-		return -ENOMEM;
-
-	/* instantiate the inode */
-	ASSERT(dip->di_version >= 3);
-	xfs_inode_from_disk(ip, dip);
-
-	error = xfs_iformat_fork(ip, dip);
-	if (error)
-		goto out_free_ip;
-
-	if (!xfs_inode_verify_forks(ip)) {
-		error = -EFSCORRUPTED;
-		goto out_free_ip;
-	}
-
-	if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
-		ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
-		error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
-					      ip->i_ino, buffer_list);
-		if (error)
-			goto out_free_ip;
-	}
-
-	if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
-		ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
-		error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
-					      ip->i_ino, buffer_list);
-		if (error)
-			goto out_free_ip;
-	}
-
-out_free_ip:
-	xfs_inode_free(ip);
-	return error;
-}
-
-STATIC int
-xlog_recover_inode_pass2(
-	struct xlog			*log,
-	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			current_lsn)
-{
-	struct xfs_inode_log_format	*in_f;
-	xfs_mount_t		*mp = log->l_mp;
-	xfs_buf_t		*bp;
-	xfs_dinode_t		*dip;
-	int			len;
-	char			*src;
-	char			*dest;
-	int			error;
-	int			attr_index;
-	uint			fields;
-	struct xfs_log_dinode	*ldip;
-	uint			isize;
-	int			need_free = 0;
-
-	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
-		in_f = item->ri_buf[0].i_addr;
-	} else {
-		in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
-		need_free = 1;
-		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
-		if (error)
-			goto error;
-	}
-
-	/*
-	 * Inode buffers can be freed, look out for it,
-	 * and do not replay the inode.
-	 */
-	if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
-					in_f->ilf_len, 0)) {
-		error = 0;
-		trace_xfs_log_recover_inode_cancel(log, in_f);
-		goto error;
-	}
-	trace_xfs_log_recover_inode_recover(log, in_f);
-
-	error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
-			0, &bp, &xfs_inode_buf_ops);
-	if (error)
-		goto error;
-	ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
-	dip = xfs_buf_offset(bp, in_f->ilf_boffset);
-
-	/*
-	 * Make sure the place we're flushing out to really looks
-	 * like an inode!
-	 */
-	if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
-		xfs_alert(mp,
-	"%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
-			__func__, dip, bp, in_f->ilf_ino);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
-	ldip = item->ri_buf[1].i_addr;
-	if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
-		xfs_alert(mp,
-			"%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
-			__func__, item, in_f->ilf_ino);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
-
-	/*
-	 * If the inode has an LSN in it, recover the inode only if it's less
-	 * than the lsn of the transaction we are replaying. Note: we still
-	 * need to replay an owner change even though the inode is more recent
-	 * than the transaction as there is no guarantee that all the btree
-	 * blocks are more recent than this transaction, too.
-	 */
-	if (dip->di_version >= 3) {
-		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);
-
-		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
-			trace_xfs_log_recover_inode_skip(log, in_f);
-			error = 0;
-			goto out_owner_change;
-		}
-	}
-
-	/*
-	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
-	 * are transactional and if ordering is necessary we can determine that
-	 * more accurately by the LSN field in the V3 inode core. Don't trust
-	 * the inode versions we might be changing them here - use the
-	 * superblock flag to determine whether we need to look at di_flushiter
-	 * to skip replay when the on disk inode is newer than the log one
-	 */
-	if (!xfs_sb_version_has_v3inode(&mp->m_sb) &&
-	    ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
-		/*
-		 * Deal with the wrap case, DI_MAX_FLUSH is less
-		 * than smaller numbers
-		 */
-		if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
-		    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
-			/* do nothing */
-		} else {
-			trace_xfs_log_recover_inode_skip(log, in_f);
-			error = 0;
-			goto out_release;
-		}
-	}
-
-	/* Take the opportunity to reset the flush iteration count */
-	ldip->di_flushiter = 0;
-
-	if (unlikely(S_ISREG(ldip->di_mode))) {
-		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
-		    (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
-			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
-					 XFS_ERRLEVEL_LOW, mp, ldip,
-					 sizeof(*ldip));
-			xfs_alert(mp,
-		"%s: Bad regular inode log record, rec ptr "PTR_FMT", "
-		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
-				__func__, item, dip, bp, in_f->ilf_ino);
-			error = -EFSCORRUPTED;
-			goto out_release;
-		}
-	} else if (unlikely(S_ISDIR(ldip->di_mode))) {
-		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
-		    (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
-		    (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
-			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
-					     XFS_ERRLEVEL_LOW, mp, ldip,
-					     sizeof(*ldip));
-			xfs_alert(mp,
-		"%s: Bad dir inode log record, rec ptr "PTR_FMT", "
-		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
-				__func__, item, dip, bp, in_f->ilf_ino);
-			error = -EFSCORRUPTED;
-			goto out_release;
-		}
-	}
-	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
-		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
-				     XFS_ERRLEVEL_LOW, mp, ldip,
-				     sizeof(*ldip));
-		xfs_alert(mp,
-	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
-	"dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
-			__func__, item, dip, bp, in_f->ilf_ino,
-			ldip->di_nextents + ldip->di_anextents,
-			ldip->di_nblocks);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
-	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
-		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
-				     XFS_ERRLEVEL_LOW, mp, ldip,
-				     sizeof(*ldip));
-		xfs_alert(mp,
-	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
-	"dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
-			item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
-	isize = xfs_log_dinode_size(mp);
-	if (unlikely(item->ri_buf[1].i_len > isize)) {
-		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
-				     XFS_ERRLEVEL_LOW, mp, ldip,
-				     sizeof(*ldip));
-		xfs_alert(mp,
-			"%s: Bad inode log record length %d, rec ptr "PTR_FMT,
-			__func__, item->ri_buf[1].i_len, item);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
-
-	/* recover the log dinode inode into the on disk inode */
-	xfs_log_dinode_to_disk(ldip, dip);
-
-	fields = in_f->ilf_fields;
-	if (fields & XFS_ILOG_DEV)
-		xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
-
-	if (in_f->ilf_size == 2)
-		goto out_owner_change;
-	len = item->ri_buf[2].i_len;
-	src = item->ri_buf[2].i_addr;
-	ASSERT(in_f->ilf_size <= 4);
-	ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
-	ASSERT(!(fields & XFS_ILOG_DFORK) ||
-	       (len == in_f->ilf_dsize));
-
-	switch (fields & XFS_ILOG_DFORK) {
-	case XFS_ILOG_DDATA:
-	case XFS_ILOG_DEXT:
-		memcpy(XFS_DFORK_DPTR(dip), src, len);
-		break;
-
-	case XFS_ILOG_DBROOT:
-		xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
-				 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip),
-				 XFS_DFORK_DSIZE(dip, mp));
-		break;
-
-	default:
-		/*
-		 * There are no data fork flags set.
-		 */
-		ASSERT((fields & XFS_ILOG_DFORK) == 0);
-		break;
-	}
-
-	/*
-	 * If we logged any attribute data, recover it.  There may or
-	 * may not have been any other non-core data logged in this
-	 * transaction.
-	 */
-	if (in_f->ilf_fields & XFS_ILOG_AFORK) {
-		if (in_f->ilf_fields & XFS_ILOG_DFORK) {
-			attr_index = 3;
-		} else {
-			attr_index = 2;
-		}
-		len = item->ri_buf[attr_index].i_len;
-		src = item->ri_buf[attr_index].i_addr;
-		ASSERT(len == in_f->ilf_asize);
-
-		switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
-		case XFS_ILOG_ADATA:
-		case XFS_ILOG_AEXT:
-			dest = XFS_DFORK_APTR(dip);
-			ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
-			memcpy(dest, src, len);
-			break;
-
-		case XFS_ILOG_ABROOT:
-			dest = XFS_DFORK_APTR(dip);
-			xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
-					 len, (xfs_bmdr_block_t*)dest,
-					 XFS_DFORK_ASIZE(dip, mp));
-			break;
-
-		default:
-			xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
-			ASSERT(0);
-			error = -EFSCORRUPTED;
-			goto out_release;
-		}
-	}
-
-out_owner_change:
-	/* Recover the swapext owner change unless inode has been deleted */
-	if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
-	    (dip->di_mode != 0))
-		error = xfs_recover_inode_owner_change(mp, dip, in_f,
-						       buffer_list);
-	/* re-generate the checksum. */
-	xfs_dinode_calc_crc(log->l_mp, dip);
-
-	ASSERT(bp->b_mount == mp);
-	bp->b_iodone = xlog_recover_iodone;
-	xfs_buf_delwri_queue(bp, buffer_list);
-
-out_release:
-	xfs_buf_relse(bp);
-error:
-	if (need_free)
-		kmem_free(in_f);
-	return error;
-}
-
-/*
- * Recover QUOTAOFF records. We simply make a note of it in the xlog
- * structure, so that we know not to do any dquot item or dquot buffer recovery,
- * of that type.
- */
-STATIC int
-xlog_recover_quotaoff_pass1(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	xfs_qoff_logformat_t	*qoff_f = item->ri_buf[0].i_addr;
-	ASSERT(qoff_f);
-
-	/*
-	 * The logitem format's flag tells us if this was user quotaoff,
-	 * group/project quotaoff or both.
-	 */
-	if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
-		log->l_quotaoffs_flag |= XFS_DQ_USER;
-	if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
-		log->l_quotaoffs_flag |= XFS_DQ_PROJ;
-	if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
-		log->l_quotaoffs_flag |= XFS_DQ_GROUP;
-
-	return 0;
-}
-
-/*
- * Recover a dquot record
- */
-STATIC int
-xlog_recover_dquot_pass2(
-	struct xlog			*log,
-	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			current_lsn)
-{
-	xfs_mount_t		*mp = log->l_mp;
-	xfs_buf_t		*bp;
-	struct xfs_disk_dquot	*ddq, *recddq;
-	xfs_failaddr_t		fa;
-	int			error;
-	xfs_dq_logformat_t	*dq_f;
-	uint			type;
-
-
-	/*
-	 * Filesystems are required to send in quota flags at mount time.
-	 */
-	if (mp->m_qflags == 0)
-		return 0;
-
-	recddq = item->ri_buf[1].i_addr;
-	if (recddq == NULL) {
-		xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
-		return -EFSCORRUPTED;
-	}
-	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) {
-		xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
-			item->ri_buf[1].i_len, __func__);
-		return -EFSCORRUPTED;
-	}
-
-	/*
-	 * This type of quotas was turned off, so ignore this record.
-	 */
-	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
-	ASSERT(type);
-	if (log->l_quotaoffs_flag & type)
-		return 0;
-
-	/*
-	 * At this point we know that quota was _not_ turned off.
-	 * Since the mount flags are not indicating to us otherwise, this
-	 * must mean that quota is on, and the dquot needs to be replayed.
-	 * Remember that we may not have fully recovered the superblock yet,
-	 * so we can't do the usual trick of looking at the SB quota bits.
-	 *
-	 * The other possibility, of course, is that the quota subsystem was
-	 * removed since the last mount - ENOSYS.
-	 */
-	dq_f = item->ri_buf[0].i_addr;
-	ASSERT(dq_f);
-	fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0);
-	if (fa) {
-		xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
-				dq_f->qlf_id, fa);
-		return -EFSCORRUPTED;
-	}
-	ASSERT(dq_f->qlf_len == 1);
-
-	/*
-	 * At this point we are assuming that the dquots have been allocated
-	 * and hence the buffer has valid dquots stamped in it. It should,
-	 * therefore, pass verifier validation. If the dquot is bad, then the
-	 * we'll return an error here, so we don't need to specifically check
-	 * the dquot in the buffer after the verifier has run.
-	 */
-	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
-				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
-				   &xfs_dquot_buf_ops);
-	if (error)
-		return error;
-
-	ASSERT(bp);
-	ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
-
-	/*
-	 * If the dquot has an LSN in it, recover the dquot only if it's less
-	 * than the lsn of the transaction we are replaying.
-	 */
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
-		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
-		xfs_lsn_t	lsn = be64_to_cpu(dqb->dd_lsn);
-
-		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
-			goto out_release;
-		}
-	}
-
-	memcpy(ddq, recddq, item->ri_buf[1].i_len);
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
-		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
-				 XFS_DQUOT_CRC_OFF);
-	}
-
-	ASSERT(dq_f->qlf_size == 2);
-	ASSERT(bp->b_mount == mp);
-	bp->b_iodone = xlog_recover_iodone;
-	xfs_buf_delwri_queue(bp, buffer_list);
-
-out_release:
-	xfs_buf_relse(bp);
-	return 0;
-}
-
-/*
- * This routine is called to create an in-core extent free intent
- * item from the efi format structure which was logged on disk.
- * It allocates an in-core efi, copies the extents from the format
- * structure into it, and adds the efi to the AIL with the given
- * LSN.
- */
-STATIC int
-xlog_recover_efi_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			lsn)
-{
-	int				error;
-	struct xfs_mount		*mp = log->l_mp;
-	struct xfs_efi_log_item		*efip;
-	struct xfs_efi_log_format	*efi_formatp;
-
-	efi_formatp = item->ri_buf[0].i_addr;
-
-	efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
-	error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
-	if (error) {
-		xfs_efi_item_free(efip);
-		return error;
-	}
-	atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The EFI has two references. One for the EFD and one for EFI to ensure
-	 * it makes it into the AIL. Insert the EFI into the AIL directly and
-	 * drop the EFI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
-	xfs_efi_release(efip);
-	return 0;
-}
-
-
-/*
- * This routine is called when an EFD format structure is found in a committed
- * transaction in the log. Its purpose is to cancel the corresponding EFI if it
- * was still in the log. To do this it searches the AIL for the EFI with an id
- * equal to that in the EFD format structure. If we find it we drop the EFD
- * reference, which removes the EFI from the AIL and frees it.
- */
-STATIC int
-xlog_recover_efd_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	xfs_efd_log_format_t	*efd_formatp;
-	xfs_efi_log_item_t	*efip = NULL;
-	struct xfs_log_item	*lip;
-	uint64_t		efi_id;
-	struct xfs_ail_cursor	cur;
-	struct xfs_ail		*ailp = log->l_ailp;
-
-	efd_formatp = item->ri_buf[0].i_addr;
-	ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
-		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
-	       (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
-		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
-	efi_id = efd_formatp->efd_efi_id;
-
-	/*
-	 * Search for the EFI with the id in the EFD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_EFI) {
-			efip = (xfs_efi_log_item_t *)lip;
-			if (efip->efi_format.efi_id == efi_id) {
-				/*
-				 * Drop the EFD reference to the EFI. This
-				 * removes the EFI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_efi_release(efip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
-
-	return 0;
-}
-
-/*
- * This routine is called to create an in-core extent rmap update
- * item from the rui format structure which was logged on disk.
- * It allocates an in-core rui, copies the extents from the format
- * structure into it, and adds the rui to the AIL with the given
- * LSN.
- */
-STATIC int
-xlog_recover_rui_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			lsn)
-{
-	int				error;
-	struct xfs_mount		*mp = log->l_mp;
-	struct xfs_rui_log_item		*ruip;
-	struct xfs_rui_log_format	*rui_formatp;
-
-	rui_formatp = item->ri_buf[0].i_addr;
-
-	ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
-	error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
-	if (error) {
-		xfs_rui_item_free(ruip);
-		return error;
-	}
-	atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The RUI has two references. One for the RUD and one for RUI to ensure
-	 * it makes it into the AIL. Insert the RUI into the AIL directly and
-	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
-	xfs_rui_release(ruip);
-	return 0;
-}
-
-
-/*
- * This routine is called when an RUD format structure is found in a committed
- * transaction in the log. Its purpose is to cancel the corresponding RUI if it
- * was still in the log. To do this it searches the AIL for the RUI with an id
- * equal to that in the RUD format structure. If we find it we drop the RUD
- * reference, which removes the RUI from the AIL and frees it.
- */
-STATIC int
-xlog_recover_rud_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	struct xfs_rud_log_format	*rud_formatp;
-	struct xfs_rui_log_item		*ruip = NULL;
-	struct xfs_log_item		*lip;
-	uint64_t			rui_id;
-	struct xfs_ail_cursor		cur;
-	struct xfs_ail			*ailp = log->l_ailp;
-
-	rud_formatp = item->ri_buf[0].i_addr;
-	ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
-	rui_id = rud_formatp->rud_rui_id;
-
-	/*
-	 * Search for the RUI with the id in the RUD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_RUI) {
-			ruip = (struct xfs_rui_log_item *)lip;
-			if (ruip->rui_format.rui_id == rui_id) {
-				/*
-				 * Drop the RUD reference to the RUI. This
-				 * removes the RUI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_rui_release(ruip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
-
-	return 0;
-}
-
-/*
- * Copy an CUI format buffer from the given buf, and into the destination
- * CUI format structure.  The CUI/CUD items were designed not to need any
- * special alignment handling.
- */
-static int
-xfs_cui_copy_format(
-	struct xfs_log_iovec		*buf,
-	struct xfs_cui_log_format	*dst_cui_fmt)
-{
-	struct xfs_cui_log_format	*src_cui_fmt;
-	uint				len;
-
-	src_cui_fmt = buf->i_addr;
-	len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
-
-	if (buf->i_len == len) {
-		memcpy(dst_cui_fmt, src_cui_fmt, len);
-		return 0;
-	}
-	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
-	return -EFSCORRUPTED;
-}
-
-/*
- * This routine is called to create an in-core extent refcount update
- * item from the cui format structure which was logged on disk.
- * It allocates an in-core cui, copies the extents from the format
- * structure into it, and adds the cui to the AIL with the given
- * LSN.
- */
-STATIC int
-xlog_recover_cui_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			lsn)
-{
-	int				error;
-	struct xfs_mount		*mp = log->l_mp;
-	struct xfs_cui_log_item		*cuip;
-	struct xfs_cui_log_format	*cui_formatp;
-
-	cui_formatp = item->ri_buf[0].i_addr;
-
-	cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
-	error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
-	if (error) {
-		xfs_cui_item_free(cuip);
-		return error;
-	}
-	atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The CUI has two references. One for the CUD and one for CUI to ensure
-	 * it makes it into the AIL. Insert the CUI into the AIL directly and
-	 * drop the CUI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn);
-	xfs_cui_release(cuip);
-	return 0;
-}
-
-
-/*
- * This routine is called when an CUD format structure is found in a committed
- * transaction in the log. Its purpose is to cancel the corresponding CUI if it
- * was still in the log. To do this it searches the AIL for the CUI with an id
- * equal to that in the CUD format structure. If we find it we drop the CUD
- * reference, which removes the CUI from the AIL and frees it.
- */
-STATIC int
-xlog_recover_cud_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	struct xfs_cud_log_format	*cud_formatp;
-	struct xfs_cui_log_item		*cuip = NULL;
-	struct xfs_log_item		*lip;
-	uint64_t			cui_id;
-	struct xfs_ail_cursor		cur;
-	struct xfs_ail			*ailp = log->l_ailp;
-
-	cud_formatp = item->ri_buf[0].i_addr;
-	if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
-		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
-		return -EFSCORRUPTED;
-	}
-	cui_id = cud_formatp->cud_cui_id;
-
-	/*
-	 * Search for the CUI with the id in the CUD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_CUI) {
-			cuip = (struct xfs_cui_log_item *)lip;
-			if (cuip->cui_format.cui_id == cui_id) {
-				/*
-				 * Drop the CUD reference to the CUI. This
-				 * removes the CUI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_cui_release(cuip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
-
-	return 0;
-}
-
-/*
- * Copy an BUI format buffer from the given buf, and into the destination
- * BUI format structure.  The BUI/BUD items were designed not to need any
- * special alignment handling.
- */
-static int
-xfs_bui_copy_format(
-	struct xfs_log_iovec		*buf,
-	struct xfs_bui_log_format	*dst_bui_fmt)
-{
-	struct xfs_bui_log_format	*src_bui_fmt;
-	uint				len;
-
-	src_bui_fmt = buf->i_addr;
-	len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
-
-	if (buf->i_len == len) {
-		memcpy(dst_bui_fmt, src_bui_fmt, len);
-		return 0;
-	}
-	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
-	return -EFSCORRUPTED;
-}
-
-/*
- * This routine is called to create an in-core extent bmap update
- * item from the bui format structure which was logged on disk.
- * It allocates an in-core bui, copies the extents from the format
- * structure into it, and adds the bui to the AIL with the given
- * LSN.
- */
-STATIC int
-xlog_recover_bui_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			lsn)
-{
-	int				error;
-	struct xfs_mount		*mp = log->l_mp;
-	struct xfs_bui_log_item		*buip;
-	struct xfs_bui_log_format	*bui_formatp;
-
-	bui_formatp = item->ri_buf[0].i_addr;
-
-	if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
-		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
-		return -EFSCORRUPTED;
-	}
-	buip = xfs_bui_init(mp);
-	error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
-	if (error) {
-		xfs_bui_item_free(buip);
-		return error;
-	}
-	atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The RUI has two references. One for the RUD and one for RUI to ensure
-	 * it makes it into the AIL. Insert the RUI into the AIL directly and
-	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn);
-	xfs_bui_release(buip);
-	return 0;
-}
-
-
-/*
- * This routine is called when an BUD format structure is found in a committed
- * transaction in the log. Its purpose is to cancel the corresponding BUI if it
- * was still in the log. To do this it searches the AIL for the BUI with an id
- * equal to that in the BUD format structure. If we find it we drop the BUD
- * reference, which removes the BUI from the AIL and frees it.
- */
-STATIC int
-xlog_recover_bud_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	struct xfs_bud_log_format	*bud_formatp;
-	struct xfs_bui_log_item		*buip = NULL;
-	struct xfs_log_item		*lip;
-	uint64_t			bui_id;
-	struct xfs_ail_cursor		cur;
-	struct xfs_ail			*ailp = log->l_ailp;
-
-	bud_formatp = item->ri_buf[0].i_addr;
-	if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
-		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
-		return -EFSCORRUPTED;
-	}
-	bui_id = bud_formatp->bud_bui_id;
-
-	/*
-	 * Search for the BUI with the id in the BUD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_BUI) {
-			buip = (struct xfs_bui_log_item *)lip;
-			if (buip->bui_format.bui_id == bui_id) {
-				/*
-				 * Drop the BUD reference to the BUI. This
-				 * removes the BUI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_bui_release(buip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
-
-	return 0;
-}
-
-/*
- * This routine is called when an inode create format structure is found in a
- * committed transaction in the log.  It's purpose is to initialise the inodes
- * being allocated on disk. This requires us to get inode cluster buffers that
- * match the range to be initialised, stamped with inode templates and written
- * by delayed write so that subsequent modifications will hit the cached buffer
- * and only need writing out at the end of recovery.
- */
-STATIC int
-xlog_recover_do_icreate_pass2(
-	struct xlog		*log,
-	struct list_head	*buffer_list,
-	xlog_recover_item_t	*item)
-{
-	struct xfs_mount	*mp = log->l_mp;
-	struct xfs_icreate_log	*icl;
-	struct xfs_ino_geometry	*igeo = M_IGEO(mp);
-	xfs_agnumber_t		agno;
-	xfs_agblock_t		agbno;
-	unsigned int		count;
-	unsigned int		isize;
-	xfs_agblock_t		length;
-	int			bb_per_cluster;
-	int			cancel_count;
-	int			nbufs;
-	int			i;
-
-	icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
-	if (icl->icl_type != XFS_LI_ICREATE) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
-		return -EINVAL;
-	}
-
-	if (icl->icl_size != 1) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
-		return -EINVAL;
-	}
-
-	agno = be32_to_cpu(icl->icl_ag);
-	if (agno >= mp->m_sb.sb_agcount) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
-		return -EINVAL;
-	}
-	agbno = be32_to_cpu(icl->icl_agbno);
-	if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
-		return -EINVAL;
-	}
-	isize = be32_to_cpu(icl->icl_isize);
-	if (isize != mp->m_sb.sb_inodesize) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
-		return -EINVAL;
-	}
-	count = be32_to_cpu(icl->icl_count);
-	if (!count) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
-		return -EINVAL;
-	}
-	length = be32_to_cpu(icl->icl_length);
-	if (!length || length >= mp->m_sb.sb_agblocks) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
-		return -EINVAL;
-	}
-
-	/*
-	 * The inode chunk is either full or sparse and we only support
-	 * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
-	 */
-	if (length != igeo->ialloc_blks &&
-	    length != igeo->ialloc_min_blks) {
-		xfs_warn(log->l_mp,
-			 "%s: unsupported chunk length", __FUNCTION__);
-		return -EINVAL;
-	}
-
-	/* verify inode count is consistent with extent length */
-	if ((count >> mp->m_sb.sb_inopblog) != length) {
-		xfs_warn(log->l_mp,
-			 "%s: inconsistent inode count and chunk length",
-			 __FUNCTION__);
-		return -EINVAL;
-	}
-
-	/*
-	 * The icreate transaction can cover multiple cluster buffers and these
-	 * buffers could have been freed and reused. Check the individual
-	 * buffers for cancellation so we don't overwrite anything written after
-	 * a cancellation.
-	 */
-	bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
-	nbufs = length / igeo->blocks_per_cluster;
-	for (i = 0, cancel_count = 0; i < nbufs; i++) {
-		xfs_daddr_t	daddr;
-
-		daddr = XFS_AGB_TO_DADDR(mp, agno,
-				agbno + i * igeo->blocks_per_cluster);
-		if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
-			cancel_count++;
-	}
-
-	/*
-	 * We currently only use icreate for a single allocation at a time. This
-	 * means we should expect either all or none of the buffers to be
-	 * cancelled. Be conservative and skip replay if at least one buffer is
-	 * cancelled, but warn the user that something is awry if the buffers
-	 * are not consistent.
-	 *
-	 * XXX: This must be refined to only skip cancelled clusters once we use
-	 * icreate for multiple chunk allocations.
-	 */
-	ASSERT(!cancel_count || cancel_count == nbufs);
-	if (cancel_count) {
-		if (cancel_count != nbufs)
-			xfs_warn(mp,
-	"WARNING: partial inode chunk cancellation, skipped icreate.");
-		trace_xfs_log_recover_icreate_cancel(log, icl);
-		return 0;
-	}
-
-	trace_xfs_log_recover_icreate_recover(log, icl);
-	return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
-				     length, be32_to_cpu(icl->icl_gen));
-}
-
-STATIC void
-xlog_recover_buffer_ra_pass2(
-	struct xlog                     *log,
-	struct xlog_recover_item        *item)
-{
-	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
-	struct xfs_mount		*mp = log->l_mp;
-
-	if (xlog_peek_buffer_cancelled(log, buf_f->blf_blkno,
-			buf_f->blf_len, buf_f->blf_flags)) {
-		return;
-	}
-
-	xfs_buf_readahead(mp->m_ddev_targp, buf_f->blf_blkno,
-				buf_f->blf_len, NULL);
-}
-
-STATIC void
-xlog_recover_inode_ra_pass2(
-	struct xlog                     *log,
-	struct xlog_recover_item        *item)
-{
-	struct xfs_inode_log_format	ilf_buf;
-	struct xfs_inode_log_format	*ilfp;
-	struct xfs_mount		*mp = log->l_mp;
-	int			error;
-
-	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
-		ilfp = item->ri_buf[0].i_addr;
-	} else {
-		ilfp = &ilf_buf;
-		memset(ilfp, 0, sizeof(*ilfp));
-		error = xfs_inode_item_format_convert(&item->ri_buf[0], ilfp);
-		if (error)
-			return;
-	}
-
-	if (xlog_peek_buffer_cancelled(log, ilfp->ilf_blkno, ilfp->ilf_len, 0))
-		return;
-
-	xfs_buf_readahead(mp->m_ddev_targp, ilfp->ilf_blkno,
-				ilfp->ilf_len, &xfs_inode_buf_ra_ops);
-}
-
-STATIC void
-xlog_recover_dquot_ra_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	struct xfs_mount	*mp = log->l_mp;
-	struct xfs_disk_dquot	*recddq;
-	struct xfs_dq_logformat	*dq_f;
-	uint			type;
-	int			len;
-
-
-	if (mp->m_qflags == 0)
-		return;
-
-	recddq = item->ri_buf[1].i_addr;
-	if (recddq == NULL)
-		return;
-	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot))
-		return;
-
-	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
-	ASSERT(type);
-	if (log->l_quotaoffs_flag & type)
-		return;
-
-	dq_f = item->ri_buf[0].i_addr;
-	ASSERT(dq_f);
-	ASSERT(dq_f->qlf_len == 1);
-
-	len = XFS_FSB_TO_BB(mp, dq_f->qlf_len);
-	if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0))
-		return;
-
-	xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len,
-			  &xfs_dquot_buf_ra_ops);
-}
-
-STATIC void
-xlog_recover_ra_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	switch (ITEM_TYPE(item)) {
-	case XFS_LI_BUF:
-		xlog_recover_buffer_ra_pass2(log, item);
-		break;
-	case XFS_LI_INODE:
-		xlog_recover_inode_ra_pass2(log, item);
-		break;
-	case XFS_LI_DQUOT:
-		xlog_recover_dquot_ra_pass2(log, item);
-		break;
-	case XFS_LI_EFI:
-	case XFS_LI_EFD:
-	case XFS_LI_QUOTAOFF:
-	case XFS_LI_RUI:
-	case XFS_LI_RUD:
-	case XFS_LI_CUI:
-	case XFS_LI_CUD:
-	case XFS_LI_BUI:
-	case XFS_LI_BUD:
-	default:
-		break;
-	}
-}
-
-STATIC int
-xlog_recover_commit_pass1(
-	struct xlog			*log,
-	struct xlog_recover		*trans,
-	struct xlog_recover_item	*item)
-{
-	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
-
-	switch (ITEM_TYPE(item)) {
-	case XFS_LI_BUF:
-		return xlog_recover_buffer_pass1(log, item);
-	case XFS_LI_QUOTAOFF:
-		return xlog_recover_quotaoff_pass1(log, item);
-	case XFS_LI_INODE:
-	case XFS_LI_EFI:
-	case XFS_LI_EFD:
-	case XFS_LI_DQUOT:
-	case XFS_LI_ICREATE:
-	case XFS_LI_RUI:
-	case XFS_LI_RUD:
-	case XFS_LI_CUI:
-	case XFS_LI_CUD:
-	case XFS_LI_BUI:
-	case XFS_LI_BUD:
-		/* nothing to do in pass 1 */
-		return 0;
-	default:
-		xfs_warn(log->l_mp, "%s: invalid item type (%d)",
-			__func__, ITEM_TYPE(item));
-		ASSERT(0);
-		return -EFSCORRUPTED;
-	}
-}
-
-STATIC int
-xlog_recover_commit_pass2(
-	struct xlog			*log,
-	struct xlog_recover		*trans,
-	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item)
-{
-	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
-
-	switch (ITEM_TYPE(item)) {
-	case XFS_LI_BUF:
-		return xlog_recover_buffer_pass2(log, buffer_list, item,
-						 trans->r_lsn);
-	case XFS_LI_INODE:
-		return xlog_recover_inode_pass2(log, buffer_list, item,
-						 trans->r_lsn);
-	case XFS_LI_EFI:
-		return xlog_recover_efi_pass2(log, item, trans->r_lsn);
-	case XFS_LI_EFD:
-		return xlog_recover_efd_pass2(log, item);
-	case XFS_LI_RUI:
-		return xlog_recover_rui_pass2(log, item, trans->r_lsn);
-	case XFS_LI_RUD:
-		return xlog_recover_rud_pass2(log, item);
-	case XFS_LI_CUI:
-		return xlog_recover_cui_pass2(log, item, trans->r_lsn);
-	case XFS_LI_CUD:
-		return xlog_recover_cud_pass2(log, item);
-	case XFS_LI_BUI:
-		return xlog_recover_bui_pass2(log, item, trans->r_lsn);
-	case XFS_LI_BUD:
-		return xlog_recover_bud_pass2(log, item);
-	case XFS_LI_DQUOT:
-		return xlog_recover_dquot_pass2(log, buffer_list, item,
-						trans->r_lsn);
-	case XFS_LI_ICREATE:
-		return xlog_recover_do_icreate_pass2(log, buffer_list, item);
-	case XFS_LI_QUOTAOFF:
-		/* nothing to do in pass2 */
-		return 0;
-	default:
-		xfs_warn(log->l_mp, "%s: invalid item type (%d)",
-			__func__, ITEM_TYPE(item));
-		ASSERT(0);
-		return -EFSCORRUPTED;
-	}
+	if (!xlog_is_buffer_cancelled(log, blkno, len))
+		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
 STATIC int
@@ -4072,8 +1967,12 @@ xlog_recover_items_pass2(
 	int				error = 0;
 
 	list_for_each_entry(item, item_list, ri_list) {
-		error = xlog_recover_commit_pass2(log, trans,
-					  buffer_list, item);
+		trace_xfs_log_recover_item_recover(log, trans, item,
+				XLOG_RECOVER_PASS2);
+
+		if (item->ri_ops->commit_pass2)
+			error = item->ri_ops->commit_pass2(log, buffer_list,
+					item, trans->r_lsn);
 		if (error)
 			return error;
 	}
@@ -4110,12 +2009,16 @@ xlog_recover_commit_trans(
 		return error;
 
 	list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
+		trace_xfs_log_recover_item_recover(log, trans, item, pass);
+
 		switch (pass) {
 		case XLOG_RECOVER_PASS1:
-			error = xlog_recover_commit_pass1(log, trans, item);
+			if (item->ri_ops->commit_pass1)
+				error = item->ri_ops->commit_pass1(log, item);
 			break;
 		case XLOG_RECOVER_PASS2:
-			xlog_recover_ra_pass2(log, item);
+			if (item->ri_ops->ra_pass2)
+				item->ri_ops->ra_pass2(log, item);
 			list_move_tail(&item->ri_list, &ra_list);
 			items_queued++;
 			if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {
@@ -4152,9 +2055,9 @@ STATIC void
 xlog_recover_add_item(
 	struct list_head	*head)
 {
-	xlog_recover_item_t	*item;
+	struct xlog_recover_item *item;
 
-	item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
+	item = kmem_zalloc(sizeof(struct xlog_recover_item), 0);
 	INIT_LIST_HEAD(&item->ri_list);
 	list_add_tail(&item->ri_list, head);
 }
@@ -4166,7 +2069,7 @@ xlog_recover_add_to_cont_trans(
 	char			*dp,
 	int			len)
 {
-	xlog_recover_item_t	*item;
+	struct xlog_recover_item *item;
 	char			*ptr, *old_ptr;
 	int			old_len;
 
@@ -4189,7 +2092,8 @@ xlog_recover_add_to_cont_trans(
 	}
 
 	/* take the tail entry */
-	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+	item = list_entry(trans->r_itemq.prev, struct xlog_recover_item,
+			  ri_list);
 
 	old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
 	old_len = item->ri_buf[item->ri_cnt-1].i_len;
@@ -4223,7 +2127,7 @@ xlog_recover_add_to_trans(
 	int			len)
 {
 	struct xfs_inode_log_format	*in_f;			/* any will do */
-	xlog_recover_item_t	*item;
+	struct xlog_recover_item *item;
 	char			*ptr;
 
 	if (!len)
@@ -4259,13 +2163,14 @@ xlog_recover_add_to_trans(
 	in_f = (struct xfs_inode_log_format *)ptr;
 
 	/* take the tail entry */
-	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+	item = list_entry(trans->r_itemq.prev, struct xlog_recover_item,
+			  ri_list);
 	if (item->ri_total != 0 &&
 	     item->ri_total == item->ri_cnt) {
 		/* tail item is in use, get a new one */
 		xlog_recover_add_item(&trans->r_itemq);
 		item = list_entry(trans->r_itemq.prev,
-					xlog_recover_item_t, ri_list);
+					struct xlog_recover_item, ri_list);
 	}
 
 	if (item->ri_total == 0) {		/* first region to be added */
@@ -4311,7 +2216,7 @@ STATIC void
 xlog_recover_free_trans(
 	struct xlog_recover	*trans)
 {
-	xlog_recover_item_t	*item, *n;
+	struct xlog_recover_item *item, *n;
 	int			i;
 
 	hlist_del_init(&trans->r_list);
@@ -4563,180 +2468,6 @@ xlog_recover_process_data(
 	return 0;
 }
 
-/* Recover the EFI if necessary. */
-STATIC int
-xlog_recover_process_efi(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_efi_log_item		*efip;
-	int				error;
-
-	/*
-	 * Skip EFIs that we've already processed.
-	 */
-	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
-	if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_efi_recover(mp, efip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
-/* Release the EFI since we're cancelling everything. */
-STATIC void
-xlog_recover_cancel_efi(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_efi_log_item		*efip;
-
-	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
-
-	spin_unlock(&ailp->ail_lock);
-	xfs_efi_release(efip);
-	spin_lock(&ailp->ail_lock);
-}
-
-/* Recover the RUI if necessary. */
-STATIC int
-xlog_recover_process_rui(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_rui_log_item		*ruip;
-	int				error;
-
-	/*
-	 * Skip RUIs that we've already processed.
-	 */
-	ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
-	if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_rui_recover(mp, ruip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
-/* Release the RUI since we're cancelling everything. */
-STATIC void
-xlog_recover_cancel_rui(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_rui_log_item		*ruip;
-
-	ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
-
-	spin_unlock(&ailp->ail_lock);
-	xfs_rui_release(ruip);
-	spin_lock(&ailp->ail_lock);
-}
-
-/* Recover the CUI if necessary. */
-STATIC int
-xlog_recover_process_cui(
-	struct xfs_trans		*parent_tp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_cui_log_item		*cuip;
-	int				error;
-
-	/*
-	 * Skip CUIs that we've already processed.
-	 */
-	cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
-	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_cui_recover(parent_tp, cuip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
-/* Release the CUI since we're cancelling everything. */
-STATIC void
-xlog_recover_cancel_cui(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_cui_log_item		*cuip;
-
-	cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
-
-	spin_unlock(&ailp->ail_lock);
-	xfs_cui_release(cuip);
-	spin_lock(&ailp->ail_lock);
-}
-
-/* Recover the BUI if necessary. */
-STATIC int
-xlog_recover_process_bui(
-	struct xfs_trans		*parent_tp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_bui_log_item		*buip;
-	int				error;
-
-	/*
-	 * Skip BUIs that we've already processed.
-	 */
-	buip = container_of(lip, struct xfs_bui_log_item, bui_item);
-	if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_bui_recover(parent_tp, buip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
-/* Release the BUI since we're cancelling everything. */
-STATIC void
-xlog_recover_cancel_bui(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_bui_log_item		*buip;
-
-	buip = container_of(lip, struct xfs_bui_log_item, bui_item);
-
-	spin_unlock(&ailp->ail_lock);
-	xfs_bui_release(buip);
-	spin_lock(&ailp->ail_lock);
-}
-
-/* Is this log item a deferred action intent? */
-static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
-{
-	switch (lip->li_type) {
-	case XFS_LI_EFI:
-	case XFS_LI_RUI:
-	case XFS_LI_CUI:
-	case XFS_LI_BUI:
-		return true;
-	default:
-		return false;
-	}
-}
-
 /* Take all the collected deferred ops and finish them in order. */
 static int
 xlog_finish_defer_ops(
@@ -4771,6 +2502,13 @@ xlog_finish_defer_ops(
 	return xfs_trans_commit(tp);
 }
 
+/* Is this log item a deferred action intent? */
+static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
+{
+	return lip->li_ops->iop_recover != NULL &&
+	       lip->li_ops->iop_match != NULL;
+}
+
 /*
  * When this is called, all of the log intent items which did not have
  * corresponding log done items should be in the AIL.  What we do now
@@ -4841,23 +2579,14 @@ xlog_recover_process_intents(
 
 		/*
 		 * NOTE: If your intent processing routine can create more
-		 * deferred ops, you /must/ attach them to the dfops in this
-		 * routine or else those subsequent intents will get
+		 * deferred ops, you /must/ attach them to the transaction in
+		 * this routine or else those subsequent intents will get
 		 * replayed in the wrong order!
 		 */
-		switch (lip->li_type) {
-		case XFS_LI_EFI:
-			error = xlog_recover_process_efi(log->l_mp, ailp, lip);
-			break;
-		case XFS_LI_RUI:
-			error = xlog_recover_process_rui(log->l_mp, ailp, lip);
-			break;
-		case XFS_LI_CUI:
-			error = xlog_recover_process_cui(parent_tp, ailp, lip);
-			break;
-		case XFS_LI_BUI:
-			error = xlog_recover_process_bui(parent_tp, ailp, lip);
-			break;
+		if (!test_and_set_bit(XFS_LI_RECOVERED, &lip->li_flags)) {
+			spin_unlock(&ailp->ail_lock);
+			error = lip->li_ops->iop_recover(lip, parent_tp);
+			spin_lock(&ailp->ail_lock);
 		}
 		if (error)
 			goto out;
@@ -4901,21 +2630,9 @@ xlog_recover_cancel_intents(
 			break;
 		}
 
-		switch (lip->li_type) {
-		case XFS_LI_EFI:
-			xlog_recover_cancel_efi(log->l_mp, ailp, lip);
-			break;
-		case XFS_LI_RUI:
-			xlog_recover_cancel_rui(log->l_mp, ailp, lip);
-			break;
-		case XFS_LI_CUI:
-			xlog_recover_cancel_cui(log->l_mp, ailp, lip);
-			break;
-		case XFS_LI_BUI:
-			xlog_recover_cancel_bui(log->l_mp, ailp, lip);
-			break;
-		}
-
+		spin_unlock(&ailp->ail_lock);
+		lip->li_ops->iop_release(lip);
+		spin_lock(&ailp->ail_lock);
 		lip = xfs_trans_ail_cursor_next(ailp, &cur);
 	}
 
@@ -4987,7 +2704,7 @@ xlog_recover_process_one_iunlink(
 	/*
 	 * Get the on disk inode to find the next inode in the bucket.
 	 */
-	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
+	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0);
 	if (error)
 		goto fail_iput;
 
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index e0f9d3b6abe9..bc66d95c8d4c 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -117,3 +117,25 @@ xfs_hex_dump(const void *p, int length)
 {
 	print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
 }
+
+void
+xfs_buf_alert_ratelimited(
+	struct xfs_buf		*bp,
+	const char		*rlmsg,
+	const char		*fmt,
+	...)
+{
+	struct xfs_mount	*mp = bp->b_mount;
+	struct va_format	vaf;
+	va_list			args;
+
+	/* use the more aggressive per-target rate limit for buffers */
+	if (!___ratelimit(&bp->b_target->bt_ioerror_rl, rlmsg))
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	__xfs_printk(KERN_ALERT, mp, &vaf);
+	va_end(args);
+}
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index 0b05e10995a0..4d9bd6bb63ca 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -31,15 +31,27 @@ void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
 }
 #endif
 
-#define xfs_printk_ratelimited(func, dev, fmt, ...)		\
+#define xfs_printk_ratelimited(func, dev, fmt, ...)			\
 do {									\
 	static DEFINE_RATELIMIT_STATE(_rs,				\
 				      DEFAULT_RATELIMIT_INTERVAL,	\
 				      DEFAULT_RATELIMIT_BURST);		\
 	if (__ratelimit(&_rs))						\
-		func(dev, fmt, ##__VA_ARGS__);			\
+		func(dev, fmt, ##__VA_ARGS__);				\
 } while (0)
 
+#define xfs_printk_once(func, dev, fmt, ...)			\
+({								\
+	static bool __section(.data.once) __print_once;		\
+	bool __ret_print_once = !__print_once; 			\
+								\
+	if (!__print_once) {					\
+		__print_once = true;				\
+		func(dev, fmt, ##__VA_ARGS__);			\
+	}							\
+	unlikely(__ret_print_once);				\
+})
+
 #define xfs_emerg_ratelimited(dev, fmt, ...)				\
 	xfs_printk_ratelimited(xfs_emerg, dev, fmt, ##__VA_ARGS__)
 #define xfs_alert_ratelimited(dev, fmt, ...)				\
@@ -57,9 +69,17 @@ do {									\
 #define xfs_debug_ratelimited(dev, fmt, ...)				\
 	xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__)
 
+#define xfs_warn_once(dev, fmt, ...)				\
+	xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__)
+#define xfs_notice_once(dev, fmt, ...)				\
+	xfs_printk_once(xfs_notice, dev, fmt, ##__VA_ARGS__)
+
 void assfail(struct xfs_mount *mp, char *expr, char *f, int l);
 void asswarn(struct xfs_mount *mp, char *expr, char *f, int l);
 
 extern void xfs_hex_dump(const void *p, int length);
 
+void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg,
+			       const char *fmt, ...);
+
 #endif	/* __XFS_MESSAGE_H */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c5513e5a226a..d5dcf9869860 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1190,39 +1190,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
 }
 
 /*
- * Deltas for the inode count are +/-64, hence we use a large batch size
- * of 128 so we don't need to take the counter lock on every update.
- */
-#define XFS_ICOUNT_BATCH	128
-int
-xfs_mod_icount(
-	struct xfs_mount	*mp,
-	int64_t			delta)
-{
-	percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
-	if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
-		ASSERT(0);
-		percpu_counter_add(&mp->m_icount, -delta);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-int
-xfs_mod_ifree(
-	struct xfs_mount	*mp,
-	int64_t			delta)
-{
-	percpu_counter_add(&mp->m_ifree, delta);
-	if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
-		ASSERT(0);
-		percpu_counter_add(&mp->m_ifree, -delta);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-/*
  * Deltas for the block count can vary from 1 to very large, but lock contention
  * only occurs on frequent small block count updates such as in the delayed
  * allocation path for buffered writes (page a time updates). Hence we set
@@ -1300,10 +1267,9 @@ xfs_mod_fdblocks(
 		spin_unlock(&mp->m_sb_lock);
 		return 0;
 	}
-	printk_once(KERN_WARNING
-		"Filesystem \"%s\": reserve blocks depleted! "
-		"Consider increasing reserve pool size.",
-		mp->m_super->s_id);
+	xfs_warn_once(mp,
+"Reserve blocks depleted! Consider increasing reserve pool size.");
+
 fdblocks_enospc:
 	spin_unlock(&mp->m_sb_lock);
 	return -ENOSPC;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b2e4598fdf7d..3725d25ad97e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -55,61 +55,25 @@ struct xfs_error_cfg {
 	long		retry_timeout;	/* in jiffies, -1 = infinite */
 };
 
+/*
+ * The struct xfsmount layout is optimised to separate read-mostly variables
+ * from variables that are frequently modified. We put the read-mostly variables
+ * first, then place all the other variables at the end.
+ *
+ * Typically, read-mostly variables are those that are set at mount time and
+ * never changed again, or only change rarely as a result of things like sysfs
+ * knobs being tweaked.
+ */
 typedef struct xfs_mount {
+	struct xfs_sb		m_sb;		/* copy of fs superblock */
 	struct super_block	*m_super;
-
-	/*
-	 * Bitsets of per-fs metadata that have been checked and/or are sick.
-	 * Callers must hold m_sb_lock to access these two fields.
-	 */
-	uint8_t			m_fs_checked;
-	uint8_t			m_fs_sick;
-	/*
-	 * Bitsets of rt metadata that have been checked and/or are sick.
-	 * Callers must hold m_sb_lock to access this field.
-	 */
-	uint8_t			m_rt_checked;
-	uint8_t			m_rt_sick;
-
 	struct xfs_ail		*m_ail;		/* fs active log item list */
-
-	struct xfs_sb		m_sb;		/* copy of fs superblock */
-	spinlock_t		m_sb_lock;	/* sb counter lock */
-	struct percpu_counter	m_icount;	/* allocated inodes counter */
-	struct percpu_counter	m_ifree;	/* free inodes counter */
-	struct percpu_counter	m_fdblocks;	/* free block counter */
-	/*
-	 * Count of data device blocks reserved for delayed allocations,
-	 * including indlen blocks.  Does not include allocated CoW staging
-	 * extents or anything related to the rt device.
-	 */
-	struct percpu_counter	m_delalloc_blks;
-
 	struct xfs_buf		*m_sb_bp;	/* buffer for superblock */
 	char			*m_rtname;	/* realtime device name */
 	char			*m_logname;	/* external log device name */
-	int			m_bsize;	/* fs logical block size */
-	xfs_agnumber_t		m_agfrotor;	/* last ag where space found */
-	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
-	spinlock_t		m_agirotor_lock;/* .. and lock protecting it */
-	xfs_agnumber_t		m_maxagi;	/* highest inode alloc group */
-	uint			m_allocsize_log;/* min write size log bytes */
-	uint			m_allocsize_blocks; /* min write size blocks */
 	struct xfs_da_geometry	*m_dir_geo;	/* directory block geometry */
 	struct xfs_da_geometry	*m_attr_geo;	/* attribute block geometry */
 	struct xlog		*m_log;		/* log specific stuff */
-	struct xfs_ino_geometry	m_ino_geo;	/* inode geometry */
-	int			m_logbufs;	/* number of log buffers */
-	int			m_logbsize;	/* size of each log buffer */
-	uint			m_rsumlevels;	/* rt summary levels */
-	uint			m_rsumsize;	/* size of rt summary, bytes */
-	/*
-	 * Optional cache of rt summary level per bitmap block with the
-	 * invariant that m_rsum_cache[bbno] <= the minimum i for which
-	 * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
-	 * inode lock.
-	 */
-	uint8_t			*m_rsum_cache;
 	struct xfs_inode	*m_rbmip;	/* pointer to bitmap inode */
 	struct xfs_inode	*m_rsumip;	/* pointer to summary inode */
 	struct xfs_inode	*m_rootip;	/* pointer to root directory */
@@ -117,9 +81,26 @@ typedef struct xfs_mount {
 	xfs_buftarg_t		*m_ddev_targp;	/* saves taking the address */
 	xfs_buftarg_t		*m_logdev_targp;/* ptr to log device */
 	xfs_buftarg_t		*m_rtdev_targp;	/* ptr to rt device */
+	/*
+	 * Optional cache of rt summary level per bitmap block with the
+	 * invariant that m_rsum_cache[bbno] <= the minimum i for which
+	 * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
+	 * inode lock.
+	 */
+	uint8_t			*m_rsum_cache;
+	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
+	struct workqueue_struct *m_buf_workqueue;
+	struct workqueue_struct	*m_unwritten_workqueue;
+	struct workqueue_struct	*m_cil_workqueue;
+	struct workqueue_struct	*m_reclaim_workqueue;
+	struct workqueue_struct *m_eofblocks_workqueue;
+	struct workqueue_struct	*m_sync_workqueue;
+
+	int			m_bsize;	/* fs logical block size */
 	uint8_t			m_blkbit_log;	/* blocklog + NBBY */
 	uint8_t			m_blkbb_log;	/* blocklog - BBSHIFT */
 	uint8_t			m_agno_log;	/* log #ag's */
+	uint8_t			m_sectbb_log;	/* sectlog - BBSHIFT */
 	uint			m_blockmask;	/* sb_blocksize-1 */
 	uint			m_blockwsize;	/* sb_blocksize in words */
 	uint			m_blockwmask;	/* blockwsize-1 */
@@ -138,47 +119,82 @@ typedef struct xfs_mount {
 	xfs_extlen_t		m_ag_prealloc_blocks; /* reserved ag blocks */
 	uint			m_alloc_set_aside; /* space we can't use */
 	uint			m_ag_max_usable; /* max space per AG */
-	struct radix_tree_root	m_perag_tree;	/* per-ag accounting info */
-	spinlock_t		m_perag_lock;	/* lock for m_perag_tree */
-	struct mutex		m_growlock;	/* growfs mutex */
+	int			m_dalign;	/* stripe unit */
+	int			m_swidth;	/* stripe width */
+	xfs_agnumber_t		m_maxagi;	/* highest inode alloc group */
+	uint			m_allocsize_log;/* min write size log bytes */
+	uint			m_allocsize_blocks; /* min write size blocks */
+	int			m_logbufs;	/* number of log buffers */
+	int			m_logbsize;	/* size of each log buffer */
+	uint			m_rsumlevels;	/* rt summary levels */
+	uint			m_rsumsize;	/* size of rt summary, bytes */
 	int			m_fixedfsid[2];	/* unchanged for life of FS */
-	uint64_t		m_flags;	/* global mount flags */
-	bool			m_finobt_nores; /* no per-AG finobt resv. */
 	uint			m_qflags;	/* quota status flags */
+	uint64_t		m_flags;	/* global mount flags */
+	int64_t			m_low_space[XFS_LOWSP_MAX];
+	struct xfs_ino_geometry	m_ino_geo;	/* inode geometry */
 	struct xfs_trans_resv	m_resv;		/* precomputed res values */
+						/* low free space thresholds */
+	bool			m_always_cow;
+	bool			m_fail_unmount;
+	bool			m_finobt_nores; /* no per-AG finobt resv. */
+	bool			m_update_sb;	/* sb needs update in mount */
+
+	/*
+	 * Bitsets of per-fs metadata that have been checked and/or are sick.
+	 * Callers must hold m_sb_lock to access these two fields.
+	 */
+	uint8_t			m_fs_checked;
+	uint8_t			m_fs_sick;
+	/*
+	 * Bitsets of rt metadata that have been checked and/or are sick.
+	 * Callers must hold m_sb_lock to access this field.
+	 */
+	uint8_t			m_rt_checked;
+	uint8_t			m_rt_sick;
+
+	/*
+	 * End of read-mostly variables. Frequently written variables and locks
+	 * should be placed below this comment from now on. The first variable
+	 * here is marked as cacheline aligned so they it is separated from
+	 * the read-mostly variables.
+	 */
+
+	spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */
+	struct percpu_counter	m_icount;	/* allocated inodes counter */
+	struct percpu_counter	m_ifree;	/* free inodes counter */
+	struct percpu_counter	m_fdblocks;	/* free block counter */
+	/*
+	 * Count of data device blocks reserved for delayed allocations,
+	 * including indlen blocks.  Does not include allocated CoW staging
+	 * extents or anything related to the rt device.
+	 */
+	struct percpu_counter	m_delalloc_blks;
+
+	struct radix_tree_root	m_perag_tree;	/* per-ag accounting info */
+	spinlock_t		m_perag_lock;	/* lock for m_perag_tree */
 	uint64_t		m_resblks;	/* total reserved blocks */
 	uint64_t		m_resblks_avail;/* available reserved blocks */
 	uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */
-	int			m_dalign;	/* stripe unit */
-	int			m_swidth;	/* stripe width */
-	uint8_t			m_sectbb_log;	/* sectlog - BBSHIFT */
-	atomic_t		m_active_trans;	/* number trans frozen */
-	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
 	struct delayed_work	m_reclaim_work;	/* background inode reclaim */
 	struct delayed_work	m_eofblocks_work; /* background eof blocks
 						     trimming */
 	struct delayed_work	m_cowblocks_work; /* background cow blocks
 						     trimming */
-	bool			m_update_sb;	/* sb needs update in mount */
-	int64_t			m_low_space[XFS_LOWSP_MAX];
-						/* low free space thresholds */
 	struct xfs_kobj		m_kobj;
 	struct xfs_kobj		m_error_kobj;
 	struct xfs_kobj		m_error_meta_kobj;
 	struct xfs_error_cfg	m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
 	struct xstats		m_stats;	/* per-fs stats */
+	xfs_agnumber_t		m_agfrotor;	/* last ag where space found */
+	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
+	spinlock_t		m_agirotor_lock;/* .. and lock protecting it */
 
 	/*
 	 * Workqueue item so that we can coalesce multiple inode flush attempts
 	 * into a single flush.
 	 */
 	struct work_struct	m_flush_inodes_work;
-	struct workqueue_struct *m_buf_workqueue;
-	struct workqueue_struct	*m_unwritten_workqueue;
-	struct workqueue_struct	*m_cil_workqueue;
-	struct workqueue_struct	*m_reclaim_workqueue;
-	struct workqueue_struct *m_eofblocks_workqueue;
-	struct workqueue_struct	*m_sync_workqueue;
 
 	/*
 	 * Generation of the filesysyem layout.  This is incremented by each
@@ -190,9 +206,8 @@ typedef struct xfs_mount {
 	 * to various other kinds of pain inflicted on the pNFS server.
 	 */
 	uint32_t		m_generation;
+	struct mutex		m_growlock;	/* growfs mutex */
 
-	bool			m_always_cow;
-	bool			m_fail_unmount;
 #ifdef DEBUG
 	/*
 	 * Frequency with which errors are injected.  Replaces xfs_etest; the
@@ -237,8 +252,8 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_FILESTREAMS	(1ULL << 24)	/* enable the filestreams
 						   allocator */
 #define XFS_MOUNT_NOATTR2	(1ULL << 25)	/* disable use of attr2 format */
-
-#define XFS_MOUNT_DAX		(1ULL << 62)	/* TEST ONLY! */
+#define XFS_MOUNT_DAX_ALWAYS	(1ULL << 26)
+#define XFS_MOUNT_DAX_NEVER	(1ULL << 27)
 
 /*
  * Max and min values for mount-option defined I/O
@@ -259,8 +274,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 #define SHUTDOWN_LOG_IO_ERROR	0x0002	/* write attempt to the log failed */
 #define SHUTDOWN_FORCE_UMOUNT	0x0004	/* shutdown from a forced unmount */
 #define SHUTDOWN_CORRUPT_INCORE	0x0008	/* corrupt in-memory data structures */
-#define SHUTDOWN_REMOTE_REQ	0x0010	/* shutdown came from remote cell */
-#define SHUTDOWN_DEVICE_REQ	0x0020	/* failed all paths to the device */
 
 /*
  * Flags for xfs_mountfs
@@ -394,8 +407,6 @@ extern int	xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
 				     xfs_agnumber_t *maxagi);
 extern void	xfs_unmountfs(xfs_mount_t *);
 
-extern int	xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
-extern int	xfs_mod_ifree(struct xfs_mount *mp, int64_t delta);
 extern int	xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
 				 bool reserved);
 extern int	xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index bb3008d390aa..b101feb2aab4 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -58,9 +58,8 @@ xfs_fs_get_uuid(
 {
 	struct xfs_mount	*mp = XFS_M(sb);
 
-	printk_once(KERN_NOTICE
-"XFS (%s): using experimental pNFS feature, use at your own risk!\n",
-		mp->m_super->s_id);
+	xfs_notice_once(mp,
+"Using experimental pNFS feature, use at your own risk!");
 
 	if (*len < sizeof(uuid_t))
 		return -EINVAL;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index c225691fad15..d6cd83317344 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -558,7 +558,7 @@ xfs_qm_set_defquota(
 		return;
 
 	ddqp = &dqp->q_core;
-	defq = xfs_get_defquota(dqp, qinf);
+	defq = xfs_get_defquota(qinf, xfs_dquot_type(dqp));
 
 	/*
 	 * Timers and warnings have been already set, let's just set the
@@ -577,19 +577,22 @@ xfs_qm_set_defquota(
 static void
 xfs_qm_init_timelimits(
 	struct xfs_mount	*mp,
-	struct xfs_quotainfo	*qinf)
+	uint			type)
 {
+	struct xfs_quotainfo	*qinf = mp->m_quotainfo;
+	struct xfs_def_quota	*defq;
 	struct xfs_disk_dquot	*ddqp;
 	struct xfs_dquot	*dqp;
-	uint			type;
 	int			error;
 
-	qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
-	qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
-	qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
-	qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
-	qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
-	qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
+	defq = xfs_get_defquota(qinf, type);
+
+	defq->btimelimit = XFS_QM_BTIMELIMIT;
+	defq->itimelimit = XFS_QM_ITIMELIMIT;
+	defq->rtbtimelimit = XFS_QM_RTBTIMELIMIT;
+	defq->bwarnlimit = XFS_QM_BWARNLIMIT;
+	defq->iwarnlimit = XFS_QM_IWARNLIMIT;
+	defq->rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
 
 	/*
 	 * We try to get the limits from the superuser's limits fields.
@@ -597,39 +600,30 @@ xfs_qm_init_timelimits(
 	 *
 	 * Since we may not have done a quotacheck by this point, just read
 	 * the dquot without attaching it to any hashtables or lists.
-	 *
-	 * Timers and warnings are globally set by the first timer found in
-	 * user/group/proj quota types, otherwise a default value is used.
-	 * This should be split into different fields per quota type.
 	 */
-	if (XFS_IS_UQUOTA_RUNNING(mp))
-		type = XFS_DQ_USER;
-	else if (XFS_IS_GQUOTA_RUNNING(mp))
-		type = XFS_DQ_GROUP;
-	else
-		type = XFS_DQ_PROJ;
 	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
 	if (error)
 		return;
 
 	ddqp = &dqp->q_core;
+
 	/*
 	 * The warnings and timers set the grace period given to
 	 * a user or group before he or she can not perform any
 	 * more writing. If it is zero, a default is used.
 	 */
 	if (ddqp->d_btimer)
-		qinf->qi_btimelimit = be32_to_cpu(ddqp->d_btimer);
+		defq->btimelimit = be32_to_cpu(ddqp->d_btimer);
 	if (ddqp->d_itimer)
-		qinf->qi_itimelimit = be32_to_cpu(ddqp->d_itimer);
+		defq->itimelimit = be32_to_cpu(ddqp->d_itimer);
 	if (ddqp->d_rtbtimer)
-		qinf->qi_rtbtimelimit = be32_to_cpu(ddqp->d_rtbtimer);
+		defq->rtbtimelimit = be32_to_cpu(ddqp->d_rtbtimer);
 	if (ddqp->d_bwarns)
-		qinf->qi_bwarnlimit = be16_to_cpu(ddqp->d_bwarns);
+		defq->bwarnlimit = be16_to_cpu(ddqp->d_bwarns);
 	if (ddqp->d_iwarns)
-		qinf->qi_iwarnlimit = be16_to_cpu(ddqp->d_iwarns);
+		defq->iwarnlimit = be16_to_cpu(ddqp->d_iwarns);
 	if (ddqp->d_rtbwarns)
-		qinf->qi_rtbwarnlimit = be16_to_cpu(ddqp->d_rtbwarns);
+		defq->rtbwarnlimit = be16_to_cpu(ddqp->d_rtbwarns);
 
 	xfs_qm_dqdestroy(dqp);
 }
@@ -675,7 +669,9 @@ xfs_qm_init_quotainfo(
 
 	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
 
-	xfs_qm_init_timelimits(mp, qinf);
+	xfs_qm_init_timelimits(mp, XFS_DQ_USER);
+	xfs_qm_init_timelimits(mp, XFS_DQ_GROUP);
+	xfs_qm_init_timelimits(mp, XFS_DQ_PROJ);
 
 	if (XFS_IS_UQUOTA_RUNNING(mp))
 		xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf);
@@ -780,7 +776,8 @@ xfs_qm_qino_alloc(
 	}
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
-			XFS_QM_QINOCREATE_SPACE_RES(mp), 0, 0, &tp);
+			need_alloc ? XFS_QM_QINOCREATE_SPACE_RES(mp) : 0,
+			0, 0, &tp);
 	if (error)
 		return error;
 
@@ -1116,7 +1113,7 @@ xfs_qm_quotacheck_dqadjust(
 	 */
 	if (dqp->q_core.d_id) {
 		xfs_qm_adjust_dqlimits(mp, dqp);
-		xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
+		xfs_qm_adjust_dqtimers(mp, dqp);
 	}
 
 	dqp->dq_flags |= XFS_DQ_DIRTY;
@@ -1730,8 +1727,7 @@ xfs_qm_vop_dqalloc(
 			pq = xfs_qm_dqhold(ip->i_pdquot);
 		}
 	}
-	if (uq)
-		trace_xfs_dquot_dqalloc(ip);
+	trace_xfs_dquot_dqalloc(ip);
 
 	xfs_iunlock(ip, lockflags);
 	if (O_udqpp)
@@ -1808,7 +1804,7 @@ xfs_qm_vop_chown_reserve(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	uint64_t		delblks;
-	unsigned int		blkflags, prjflags = 0;
+	unsigned int		blkflags;
 	struct xfs_dquot	*udq_unres = NULL;
 	struct xfs_dquot	*gdq_unres = NULL;
 	struct xfs_dquot	*pdq_unres = NULL;
@@ -1849,7 +1845,6 @@ xfs_qm_vop_chown_reserve(
 
 	if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp &&
 	    ip->i_d.di_projid != be32_to_cpu(pdqp->q_core.d_id)) {
-		prjflags = XFS_QMOPT_ENOSPC;
 		pdq_delblks = pdqp;
 		if (delblks) {
 			ASSERT(ip->i_pdquot);
@@ -1859,8 +1854,7 @@ xfs_qm_vop_chown_reserve(
 
 	error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
 				udq_delblks, gdq_delblks, pdq_delblks,
-				ip->i_d.di_nblocks, 1,
-				flags | blkflags | prjflags);
+				ip->i_d.di_nblocks, 1, flags | blkflags);
 	if (error)
 		return error;
 
@@ -1878,8 +1872,7 @@ xfs_qm_vop_chown_reserve(
 		ASSERT(udq_unres || gdq_unres || pdq_unres);
 		error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
 			    udq_delblks, gdq_delblks, pdq_delblks,
-			    (xfs_qcnt_t)delblks, 0,
-			    flags | blkflags | prjflags);
+			    (xfs_qcnt_t)delblks, 0, flags | blkflags);
 		if (error)
 			return error;
 		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
@@ -1932,7 +1925,6 @@ xfs_qm_vop_create_dqattach(
 		return;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
 	if (udqp && XFS_IS_UQUOTA_ON(mp)) {
 		ASSERT(ip->i_udquot == NULL);
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 4e57edca8bce..7b0e771fcbce 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -41,13 +41,20 @@ extern struct kmem_zone	*xfs_qm_dqtrxzone;
  */
 #define XFS_DQUOT_CLUSTER_SIZE_FSB	(xfs_filblks_t)1
 
+/* Defaults for each quota type: time limits, warn limits, usage limits */
 struct xfs_def_quota {
-	xfs_qcnt_t       bhardlimit;     /* default data blk hard limit */
-	xfs_qcnt_t       bsoftlimit;	 /* default data blk soft limit */
-	xfs_qcnt_t       ihardlimit;	 /* default inode count hard limit */
-	xfs_qcnt_t       isoftlimit;	 /* default inode count soft limit */
-	xfs_qcnt_t	 rtbhardlimit;   /* default realtime blk hard limit */
-	xfs_qcnt_t	 rtbsoftlimit;   /* default realtime blk soft limit */
+	time64_t	btimelimit;	/* limit for blks timer */
+	time64_t	itimelimit;	/* limit for inodes timer */
+	time64_t	rtbtimelimit;	/* limit for rt blks timer */
+	xfs_qwarncnt_t	bwarnlimit;	/* limit for blks warnings */
+	xfs_qwarncnt_t	iwarnlimit;	/* limit for inodes warnings */
+	xfs_qwarncnt_t	rtbwarnlimit;	/* limit for rt blks warnings */
+	xfs_qcnt_t	bhardlimit;	/* default data blk hard limit */
+	xfs_qcnt_t	bsoftlimit;	/* default data blk soft limit */
+	xfs_qcnt_t	ihardlimit;	/* default inode count hard limit */
+	xfs_qcnt_t	isoftlimit;	/* default inode count soft limit */
+	xfs_qcnt_t	rtbhardlimit;	/* default realtime blk hard limit */
+	xfs_qcnt_t	rtbsoftlimit;	/* default realtime blk soft limit */
 };
 
 /*
@@ -55,28 +62,22 @@ struct xfs_def_quota {
  * The mount structure keeps a pointer to this.
  */
 struct xfs_quotainfo {
-	struct radix_tree_root qi_uquota_tree;
-	struct radix_tree_root qi_gquota_tree;
-	struct radix_tree_root qi_pquota_tree;
-	struct mutex qi_tree_lock;
+	struct radix_tree_root	qi_uquota_tree;
+	struct radix_tree_root	qi_gquota_tree;
+	struct radix_tree_root	qi_pquota_tree;
+	struct mutex		qi_tree_lock;
 	struct xfs_inode	*qi_uquotaip;	/* user quota inode */
 	struct xfs_inode	*qi_gquotaip;	/* group quota inode */
 	struct xfs_inode	*qi_pquotaip;	/* project quota inode */
-	struct list_lru	 qi_lru;
-	int		 qi_dquots;
-	time64_t	 qi_btimelimit;	 /* limit for blks timer */
-	time64_t	 qi_itimelimit;	 /* limit for inodes timer */
-	time64_t	 qi_rtbtimelimit;/* limit for rt blks timer */
-	xfs_qwarncnt_t	 qi_bwarnlimit;	 /* limit for blks warnings */
-	xfs_qwarncnt_t	 qi_iwarnlimit;	 /* limit for inodes warnings */
-	xfs_qwarncnt_t	 qi_rtbwarnlimit;/* limit for rt blks warnings */
-	struct mutex	 qi_quotaofflock;/* to serialize quotaoff */
-	xfs_filblks_t	 qi_dqchunklen;	 /* # BBs in a chunk of dqs */
-	uint		 qi_dqperchunk;	 /* # ondisk dqs in above chunk */
+	struct list_lru		qi_lru;
+	int			qi_dquots;
+	struct mutex		qi_quotaofflock;/* to serialize quotaoff */
+	xfs_filblks_t		qi_dqchunklen;	/* # BBs in a chunk of dqs */
+	uint			qi_dqperchunk;	/* # ondisk dq in above chunk */
 	struct xfs_def_quota	qi_usr_default;
 	struct xfs_def_quota	qi_grp_default;
 	struct xfs_def_quota	qi_prj_default;
-	struct shrinker	qi_shrinker;
+	struct shrinker		qi_shrinker;
 };
 
 static inline struct radix_tree_root *
@@ -113,6 +114,17 @@ xfs_quota_inode(xfs_mount_t *mp, uint dq_flags)
 	return NULL;
 }
 
+static inline int
+xfs_dquot_type(struct xfs_dquot *dqp)
+{
+	if (XFS_QM_ISUDQ(dqp))
+		return XFS_DQ_USER;
+	if (XFS_QM_ISGDQ(dqp))
+		return XFS_DQ_GROUP;
+	ASSERT(XFS_QM_ISPDQ(dqp));
+	return XFS_DQ_PROJ;
+}
+
 extern void	xfs_trans_mod_dquot(struct xfs_trans *tp, struct xfs_dquot *dqp,
 				    uint field, int64_t delta);
 extern void	xfs_trans_dqjoin(struct xfs_trans *, struct xfs_dquot *);
@@ -164,19 +176,19 @@ extern int		xfs_qm_scall_quotaon(struct xfs_mount *, uint);
 extern int		xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
 
 static inline struct xfs_def_quota *
-xfs_get_defquota(struct xfs_dquot *dqp, struct xfs_quotainfo *qi)
+xfs_get_defquota(struct xfs_quotainfo *qi, int type)
 {
-	struct xfs_def_quota *defq;
-
-	if (XFS_QM_ISUDQ(dqp))
-		defq = &qi->qi_usr_default;
-	else if (XFS_QM_ISGDQ(dqp))
-		defq = &qi->qi_grp_default;
-	else {
-		ASSERT(XFS_QM_ISPDQ(dqp));
-		defq = &qi->qi_prj_default;
+	switch (type) {
+	case XFS_DQ_USER:
+		return &qi->qi_usr_default;
+	case XFS_DQ_GROUP:
+		return &qi->qi_grp_default;
+	case XFS_DQ_PROJ:
+		return &qi->qi_prj_default;
+	default:
+		ASSERT(0);
+		return NULL;
 	}
-	return defq;
 }
 
 #endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 5d5ac65aa1cc..7effd7a28136 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -302,7 +302,7 @@ xfs_qm_scall_trunc_qfile(
 		goto out_unlock;
 	}
 
-	ASSERT(ip->i_d.di_nextents == 0);
+	ASSERT(ip->i_df.if_nextents == 0);
 
 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 	error = xfs_trans_commit(tp);
@@ -357,11 +357,11 @@ xfs_qm_scall_quotaon(
 	int		error;
 	uint		qf;
 
-	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
 	/*
-	 * Switching on quota accounting must be done at mount time.
+	 * Switching on quota accounting must be done at mount time,
+	 * only consider quota enforcement stuff here.
 	 */
-	flags &= ~(XFS_ALL_QUOTA_ACCT);
+	flags &= XFS_ALL_QUOTA_ENFD;
 
 	if (flags == 0) {
 		xfs_debug(mp, "%s: zero flags, m_qflags=%x",
@@ -479,7 +479,7 @@ xfs_qm_scall_setqlim(
 		goto out_unlock;
 	}
 
-	defq = xfs_get_defquota(dqp, q);
+	defq = xfs_get_defquota(q, xfs_dquot_type(dqp));
 	xfs_dqunlock(dqp);
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_setqlim, 0, 0, 0, &tp);
@@ -555,32 +555,40 @@ xfs_qm_scall_setqlim(
 		ddq->d_rtbwarns = cpu_to_be16(newlim->d_rt_spc_warns);
 
 	if (id == 0) {
-		/*
-		 * Timelimits for the super user set the relative time
-		 * the other users can be over quota for this file system.
-		 * If it is zero a default is used.  Ditto for the default
-		 * soft and hard limit values (already done, above), and
-		 * for warnings.
-		 */
-		if (newlim->d_fieldmask & QC_SPC_TIMER) {
-			q->qi_btimelimit = newlim->d_spc_timer;
-			ddq->d_btimer = cpu_to_be32(newlim->d_spc_timer);
-		}
-		if (newlim->d_fieldmask & QC_INO_TIMER) {
-			q->qi_itimelimit = newlim->d_ino_timer;
-			ddq->d_itimer = cpu_to_be32(newlim->d_ino_timer);
-		}
-		if (newlim->d_fieldmask & QC_RT_SPC_TIMER) {
-			q->qi_rtbtimelimit = newlim->d_rt_spc_timer;
-			ddq->d_rtbtimer = cpu_to_be32(newlim->d_rt_spc_timer);
-		}
 		if (newlim->d_fieldmask & QC_SPC_WARNS)
-			q->qi_bwarnlimit = newlim->d_spc_warns;
+			defq->bwarnlimit = newlim->d_spc_warns;
 		if (newlim->d_fieldmask & QC_INO_WARNS)
-			q->qi_iwarnlimit = newlim->d_ino_warns;
+			defq->iwarnlimit = newlim->d_ino_warns;
 		if (newlim->d_fieldmask & QC_RT_SPC_WARNS)
-			q->qi_rtbwarnlimit = newlim->d_rt_spc_warns;
-	} else {
+			defq->rtbwarnlimit = newlim->d_rt_spc_warns;
+	}
+
+	/*
+	 * Timelimits for the super user set the relative time the other users
+	 * can be over quota for this file system. If it is zero a default is
+	 * used.  Ditto for the default soft and hard limit values (already
+	 * done, above), and for warnings.
+	 *
+	 * For other IDs, userspace can bump out the grace period if over
+	 * the soft limit.
+	 */
+	if (newlim->d_fieldmask & QC_SPC_TIMER)
+		ddq->d_btimer = cpu_to_be32(newlim->d_spc_timer);
+	if (newlim->d_fieldmask & QC_INO_TIMER)
+		ddq->d_itimer = cpu_to_be32(newlim->d_ino_timer);
+	if (newlim->d_fieldmask & QC_RT_SPC_TIMER)
+		ddq->d_rtbtimer = cpu_to_be32(newlim->d_rt_spc_timer);
+
+	if (id == 0) {
+		if (newlim->d_fieldmask & QC_SPC_TIMER)
+			defq->btimelimit = newlim->d_spc_timer;
+		if (newlim->d_fieldmask & QC_INO_TIMER)
+			defq->itimelimit = newlim->d_ino_timer;
+		if (newlim->d_fieldmask & QC_RT_SPC_TIMER)
+			defq->rtbtimelimit = newlim->d_rt_spc_timer;
+	}
+
+	if (id != 0) {
 		/*
 		 * If the user is now over quota, start the timelimit.
 		 * The user will not be 'warned'.
@@ -588,7 +596,7 @@ xfs_qm_scall_setqlim(
 		 * is on or off. We don't really want to bother with iterating
 		 * over all ondisk dquots and turning the timers on/off.
 		 */
-		xfs_qm_adjust_dqtimers(mp, ddq);
+		xfs_qm_adjust_dqtimers(mp, dqp);
 	}
 	dqp->dq_flags |= XFS_DQ_DIRTY;
 	xfs_trans_log_dquot(tp, dqp);
@@ -729,9 +737,10 @@ xfs_qm_scall_getquota_next(
 STATIC int
 xfs_dqrele_inode(
 	struct xfs_inode	*ip,
-	int			flags,
 	void			*args)
 {
+	uint			*flags = args;
+
 	/* skip quota inodes */
 	if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
 	    ip == ip->i_mount->m_quotainfo->qi_gquotaip ||
@@ -743,15 +752,15 @@ xfs_dqrele_inode(
 	}
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+	if ((*flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
 		xfs_qm_dqrele(ip->i_udquot);
 		ip->i_udquot = NULL;
 	}
-	if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+	if ((*flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) {
 		xfs_qm_dqrele(ip->i_gdquot);
 		ip->i_gdquot = NULL;
 	}
-	if ((flags & XFS_PQUOTA_ACCT) && ip->i_pdquot) {
+	if ((*flags & XFS_PQUOTA_ACCT) && ip->i_pdquot) {
 		xfs_qm_dqrele(ip->i_pdquot);
 		ip->i_pdquot = NULL;
 	}
@@ -768,10 +777,10 @@ xfs_dqrele_inode(
  */
 void
 xfs_qm_dqrele_all_inodes(
-	struct xfs_mount *mp,
-	uint		 flags)
+	struct xfs_mount	*mp,
+	uint			flags)
 {
 	ASSERT(mp->m_quotainfo);
-	xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL,
-				    XFS_AGITER_INEW_WAIT);
+	xfs_inode_walk(mp, XFS_INODE_WALK_INEW_WAIT, xfs_dqrele_inode,
+			&flags, XFS_ICI_NO_TAG);
 }
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 38669e827206..bf809b77a316 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -21,10 +21,10 @@ xfs_qm_fill_state(
 	struct qc_type_state	*tstate,
 	struct xfs_mount	*mp,
 	struct xfs_inode	*ip,
-	xfs_ino_t		ino)
+	xfs_ino_t		ino,
+	struct xfs_def_quota	*defq)
 {
-	struct xfs_quotainfo *q = mp->m_quotainfo;
-	bool tempqip = false;
+	bool			tempqip = false;
 
 	tstate->ino = ino;
 	if (!ip && ino == NULLFSINO)
@@ -36,13 +36,13 @@ xfs_qm_fill_state(
 	}
 	tstate->flags |= QCI_SYSFILE;
 	tstate->blocks = ip->i_d.di_nblocks;
-	tstate->nextents = ip->i_d.di_nextents;
-	tstate->spc_timelimit = (u32)q->qi_btimelimit;
-	tstate->ino_timelimit = (u32)q->qi_itimelimit;
-	tstate->rt_spc_timelimit = (u32)q->qi_rtbtimelimit;
-	tstate->spc_warnlimit = q->qi_bwarnlimit;
-	tstate->ino_warnlimit = q->qi_iwarnlimit;
-	tstate->rt_spc_warnlimit = q->qi_rtbwarnlimit;
+	tstate->nextents = ip->i_df.if_nextents;
+	tstate->spc_timelimit = (u32)defq->btimelimit;
+	tstate->ino_timelimit = (u32)defq->itimelimit;
+	tstate->rt_spc_timelimit = (u32)defq->rtbtimelimit;
+	tstate->spc_warnlimit = defq->bwarnlimit;
+	tstate->ino_warnlimit = defq->iwarnlimit;
+	tstate->rt_spc_warnlimit = defq->rtbwarnlimit;
 	if (tempqip)
 		xfs_irele(ip);
 }
@@ -77,11 +77,11 @@ xfs_fs_get_quota_state(
 		state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED;
 
 	xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, q->qi_uquotaip,
-			  mp->m_sb.sb_uquotino);
+			  mp->m_sb.sb_uquotino, &q->qi_usr_default);
 	xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, q->qi_gquotaip,
-			  mp->m_sb.sb_gquotino);
+			  mp->m_sb.sb_gquotino, &q->qi_grp_default);
 	xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, q->qi_pquotaip,
-			  mp->m_sb.sb_pquotino);
+			  mp->m_sb.sb_pquotino, &q->qi_prj_default);
 	return 0;
 }
 
@@ -109,8 +109,8 @@ xfs_fs_set_info(
 	int			type,
 	struct qc_info		*info)
 {
-	struct xfs_mount *mp = XFS_M(sb);
-	struct qc_dqblk newlim;
+	struct xfs_mount	*mp = XFS_M(sb);
+	struct qc_dqblk		newlim;
 
 	if (sb_rdonly(sb))
 		return -EROFS;
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 8eeed73928cd..c81639891e29 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -18,16 +18,20 @@
 #include "xfs_log.h"
 #include "xfs_refcount.h"
 #include "xfs_error.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_cui_zone;
 kmem_zone_t	*xfs_cud_zone;
 
+static const struct xfs_item_ops xfs_cui_item_ops;
+
 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_cui_log_item, cui_item);
 }
 
-void
+STATIC void
 xfs_cui_item_free(
 	struct xfs_cui_log_item	*cuip)
 {
@@ -44,13 +48,13 @@ xfs_cui_item_free(
  * committed vs unpin operations in bulk insert operations. Hence the reference
  * count to ensure only the last caller frees the CUI.
  */
-void
+STATIC void
 xfs_cui_release(
 	struct xfs_cui_log_item	*cuip)
 {
 	ASSERT(atomic_read(&cuip->cui_refcount) > 0);
 	if (atomic_dec_and_test(&cuip->cui_refcount)) {
-		xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
+		xfs_trans_ail_delete(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
 		xfs_cui_item_free(cuip);
 	}
 }
@@ -123,17 +127,10 @@ xfs_cui_item_release(
 	xfs_cui_release(CUI_ITEM(lip));
 }
 
-static const struct xfs_item_ops xfs_cui_item_ops = {
-	.iop_size	= xfs_cui_item_size,
-	.iop_format	= xfs_cui_item_format,
-	.iop_unpin	= xfs_cui_item_unpin,
-	.iop_release	= xfs_cui_item_release,
-};
-
 /*
  * Allocate and initialize an cui item with the given number of extents.
  */
-struct xfs_cui_log_item *
+STATIC struct xfs_cui_log_item *
 xfs_cui_init(
 	struct xfs_mount		*mp,
 	uint				nextents)
@@ -284,27 +281,6 @@ xfs_refcount_update_diff_items(
 		XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
 }
 
-/* Get an CUI. */
-STATIC void *
-xfs_refcount_update_create_intent(
-	struct xfs_trans		*tp,
-	unsigned int			count)
-{
-	struct xfs_cui_log_item		*cuip;
-
-	ASSERT(tp != NULL);
-	ASSERT(count > 0);
-
-	cuip = xfs_cui_init(tp->t_mountp, count);
-	ASSERT(cuip != NULL);
-
-	/*
-	 * Get a log_item_desc to point at the new item.
-	 */
-	xfs_trans_add_item(tp, &cuip->cui_item);
-	return cuip;
-}
-
 /* Set the phys extent flags for this reverse mapping. */
 static void
 xfs_trans_set_refcount_flags(
@@ -328,16 +304,12 @@ xfs_trans_set_refcount_flags(
 STATIC void
 xfs_refcount_update_log_item(
 	struct xfs_trans		*tp,
-	void				*intent,
-	struct list_head		*item)
+	struct xfs_cui_log_item		*cuip,
+	struct xfs_refcount_intent	*refc)
 {
-	struct xfs_cui_log_item		*cuip = intent;
-	struct xfs_refcount_intent	*refc;
 	uint				next_extent;
 	struct xfs_phys_extent		*ext;
 
-	refc = container_of(item, struct xfs_refcount_intent, ri_list);
-
 	tp->t_flags |= XFS_TRANS_DIRTY;
 	set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
 
@@ -354,23 +326,44 @@ xfs_refcount_update_log_item(
 	xfs_trans_set_refcount_flags(ext, refc->ri_type);
 }
 
+static struct xfs_log_item *
+xfs_refcount_update_create_intent(
+	struct xfs_trans		*tp,
+	struct list_head		*items,
+	unsigned int			count,
+	bool				sort)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_cui_log_item		*cuip = xfs_cui_init(mp, count);
+	struct xfs_refcount_intent	*refc;
+
+	ASSERT(count > 0);
+
+	xfs_trans_add_item(tp, &cuip->cui_item);
+	if (sort)
+		list_sort(mp, items, xfs_refcount_update_diff_items);
+	list_for_each_entry(refc, items, ri_list)
+		xfs_refcount_update_log_item(tp, cuip, refc);
+	return &cuip->cui_item;
+}
+
 /* Get an CUD so we can process all the deferred refcount updates. */
-STATIC void *
+static struct xfs_log_item *
 xfs_refcount_update_create_done(
 	struct xfs_trans		*tp,
-	void				*intent,
+	struct xfs_log_item		*intent,
 	unsigned int			count)
 {
-	return xfs_trans_get_cud(tp, intent);
+	return &xfs_trans_get_cud(tp, CUI_ITEM(intent))->cud_item;
 }
 
 /* Process a deferred refcount update. */
 STATIC int
 xfs_refcount_update_finish_item(
 	struct xfs_trans		*tp,
+	struct xfs_log_item		*done,
 	struct list_head		*item,
-	void				*done_item,
-	void				**state)
+	struct xfs_btree_cur		**state)
 {
 	struct xfs_refcount_intent	*refc;
 	xfs_fsblock_t			new_fsb;
@@ -378,12 +371,10 @@ xfs_refcount_update_finish_item(
 	int				error;
 
 	refc = container_of(item, struct xfs_refcount_intent, ri_list);
-	error = xfs_trans_log_finish_refcount_update(tp, done_item,
-			refc->ri_type,
-			refc->ri_startblock,
-			refc->ri_blockcount,
-			&new_fsb, &new_aglen,
-			(struct xfs_btree_cur **)state);
+	error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done),
+			refc->ri_type, refc->ri_startblock, refc->ri_blockcount,
+			&new_fsb, &new_aglen, state);
+
 	/* Did we run out of reservation?  Requeue what we didn't finish. */
 	if (!error && new_aglen > 0) {
 		ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
@@ -396,24 +387,12 @@ xfs_refcount_update_finish_item(
 	return error;
 }
 
-/* Clean up after processing deferred refcounts. */
-STATIC void
-xfs_refcount_update_finish_cleanup(
-	struct xfs_trans	*tp,
-	void			*state,
-	int			error)
-{
-	struct xfs_btree_cur	*rcur = state;
-
-	xfs_refcount_finish_one_cleanup(tp, rcur, error);
-}
-
 /* Abort all pending CUIs. */
 STATIC void
 xfs_refcount_update_abort_intent(
-	void				*intent)
+	struct xfs_log_item		*intent)
 {
-	xfs_cui_release(intent);
+	xfs_cui_release(CUI_ITEM(intent));
 }
 
 /* Cancel a deferred refcount update. */
@@ -429,13 +408,11 @@ xfs_refcount_update_cancel_item(
 
 const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
 	.max_items	= XFS_CUI_MAX_FAST_EXTENTS,
-	.diff_items	= xfs_refcount_update_diff_items,
 	.create_intent	= xfs_refcount_update_create_intent,
 	.abort_intent	= xfs_refcount_update_abort_intent,
-	.log_item	= xfs_refcount_update_log_item,
 	.create_done	= xfs_refcount_update_create_done,
 	.finish_item	= xfs_refcount_update_finish_item,
-	.finish_cleanup = xfs_refcount_update_finish_cleanup,
+	.finish_cleanup = xfs_refcount_finish_one_cleanup,
 	.cancel_item	= xfs_refcount_update_cancel_item,
 };
 
@@ -443,28 +420,27 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
  * Process a refcount update intent item that was recovered from the log.
  * We need to update the refcountbt.
  */
-int
-xfs_cui_recover(
-	struct xfs_trans		*parent_tp,
-	struct xfs_cui_log_item		*cuip)
+STATIC int
+xfs_cui_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*parent_tp)
 {
-	int				i;
-	int				error = 0;
-	unsigned int			refc_type;
+	struct xfs_bmbt_irec		irec;
+	struct xfs_cui_log_item		*cuip = CUI_ITEM(lip);
 	struct xfs_phys_extent		*refc;
-	xfs_fsblock_t			startblock_fsb;
-	bool				op_ok;
 	struct xfs_cud_log_item		*cudp;
 	struct xfs_trans		*tp;
 	struct xfs_btree_cur		*rcur = NULL;
-	enum xfs_refcount_intent_type	type;
+	struct xfs_mount		*mp = parent_tp->t_mountp;
+	xfs_fsblock_t			startblock_fsb;
 	xfs_fsblock_t			new_fsb;
 	xfs_extlen_t			new_len;
-	struct xfs_bmbt_irec		irec;
+	unsigned int			refc_type;
+	bool				op_ok;
 	bool				requeue_only = false;
-	struct xfs_mount		*mp = parent_tp->t_mountp;
-
-	ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
+	enum xfs_refcount_intent_type	type;
+	int				i;
+	int				error = 0;
 
 	/*
 	 * First check the validity of the extents described by the
@@ -495,7 +471,6 @@ xfs_cui_recover(
 			 * This will pull the CUI from the AIL and
 			 * free the memory associated with it.
 			 */
-			set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
 			xfs_cui_release(cuip);
 			return -EFSCORRUPTED;
 		}
@@ -579,7 +554,6 @@ xfs_cui_recover(
 	}
 
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
-	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
 	xfs_defer_move(parent_tp, tp);
 	error = xfs_trans_commit(tp);
 	return error;
@@ -590,3 +564,117 @@ abort_error:
 	xfs_trans_cancel(tp);
 	return error;
 }
+
+STATIC bool
+xfs_cui_item_match(
+	struct xfs_log_item	*lip,
+	uint64_t		intent_id)
+{
+	return CUI_ITEM(lip)->cui_format.cui_id == intent_id;
+}
+
+static const struct xfs_item_ops xfs_cui_item_ops = {
+	.iop_size	= xfs_cui_item_size,
+	.iop_format	= xfs_cui_item_format,
+	.iop_unpin	= xfs_cui_item_unpin,
+	.iop_release	= xfs_cui_item_release,
+	.iop_recover	= xfs_cui_item_recover,
+	.iop_match	= xfs_cui_item_match,
+};
+
+/*
+ * Copy an CUI format buffer from the given buf, and into the destination
+ * CUI format structure.  The CUI/CUD items were designed not to need any
+ * special alignment handling.
+ */
+static int
+xfs_cui_copy_format(
+	struct xfs_log_iovec		*buf,
+	struct xfs_cui_log_format	*dst_cui_fmt)
+{
+	struct xfs_cui_log_format	*src_cui_fmt;
+	uint				len;
+
+	src_cui_fmt = buf->i_addr;
+	len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
+
+	if (buf->i_len == len) {
+		memcpy(dst_cui_fmt, src_cui_fmt, len);
+		return 0;
+	}
+	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+	return -EFSCORRUPTED;
+}
+
+/*
+ * This routine is called to create an in-core extent refcount update
+ * item from the cui format structure which was logged on disk.
+ * It allocates an in-core cui, copies the extents from the format
+ * structure into it, and adds the cui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_cui_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	int				error;
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_cui_log_item		*cuip;
+	struct xfs_cui_log_format	*cui_formatp;
+
+	cui_formatp = item->ri_buf[0].i_addr;
+
+	cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
+	error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
+	if (error) {
+		xfs_cui_item_free(cuip);
+		return error;
+	}
+	atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
+	/*
+	 * Insert the intent into the AIL directly and drop one reference so
+	 * that finishing or canceling the work will drop the other.
+	 */
+	xfs_trans_ail_insert(log->l_ailp, &cuip->cui_item, lsn);
+	xfs_cui_release(cuip);
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_cui_item_ops = {
+	.item_type		= XFS_LI_CUI,
+	.commit_pass2		= xlog_recover_cui_commit_pass2,
+};
+
+/*
+ * This routine is called when an CUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding CUI if it
+ * was still in the log. To do this it searches the AIL for the CUI with an id
+ * equal to that in the CUD format structure. If we find it we drop the CUD
+ * reference, which removes the CUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_cud_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_cud_log_format	*cud_formatp;
+
+	cud_formatp = item->ri_buf[0].i_addr;
+	if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+		return -EFSCORRUPTED;
+	}
+
+	xlog_recover_release_intent(log, XFS_LI_CUI, cud_formatp->cud_cui_id);
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_cud_item_ops = {
+	.item_type		= XFS_LI_CUD,
+	.commit_pass2		= xlog_recover_cud_commit_pass2,
+};
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index e47530f30489..f4f2e836540b 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -33,11 +33,6 @@ struct kmem_zone;
 #define	XFS_CUI_MAX_FAST_EXTENTS	16
 
 /*
- * Define CUI flag bits. Manipulated by set/clear/test_bit operators.
- */
-#define	XFS_CUI_RECOVERED		1
-
-/*
  * This is the "refcount update intent" log item.  It is used to log
  * the fact that some reverse mappings need to change.  It is used in
  * conjunction with the "refcount update done" log item described
@@ -51,7 +46,6 @@ struct xfs_cui_log_item {
 	struct xfs_log_item		cui_item;
 	atomic_t			cui_refcount;
 	atomic_t			cui_next_extent;
-	unsigned long			cui_flags;	/* misc flags */
 	struct xfs_cui_log_format	cui_format;
 };
 
@@ -77,9 +71,4 @@ struct xfs_cud_log_item {
 extern struct kmem_zone	*xfs_cui_zone;
 extern struct kmem_zone	*xfs_cud_zone;
 
-struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint);
-void xfs_cui_item_free(struct xfs_cui_log_item *);
-void xfs_cui_release(struct xfs_cui_log_item *);
-int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
-
 #endif	/* __XFS_REFCOUNT_ITEM_H__ */
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 4911b68f95dd..a86599db20a6 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -18,16 +18,20 @@
 #include "xfs_log.h"
 #include "xfs_rmap.h"
 #include "xfs_error.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_rui_zone;
 kmem_zone_t	*xfs_rud_zone;
 
+static const struct xfs_item_ops xfs_rui_item_ops;
+
 static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_rui_log_item, rui_item);
 }
 
-void
+STATIC void
 xfs_rui_item_free(
 	struct xfs_rui_log_item	*ruip)
 {
@@ -44,13 +48,13 @@ xfs_rui_item_free(
  * committed vs unpin operations in bulk insert operations. Hence the reference
  * count to ensure only the last caller frees the RUI.
  */
-void
+STATIC void
 xfs_rui_release(
 	struct xfs_rui_log_item	*ruip)
 {
 	ASSERT(atomic_read(&ruip->rui_refcount) > 0);
 	if (atomic_dec_and_test(&ruip->rui_refcount)) {
-		xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
+		xfs_trans_ail_delete(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
 		xfs_rui_item_free(ruip);
 	}
 }
@@ -122,17 +126,10 @@ xfs_rui_item_release(
 	xfs_rui_release(RUI_ITEM(lip));
 }
 
-static const struct xfs_item_ops xfs_rui_item_ops = {
-	.iop_size	= xfs_rui_item_size,
-	.iop_format	= xfs_rui_item_format,
-	.iop_unpin	= xfs_rui_item_unpin,
-	.iop_release	= xfs_rui_item_release,
-};
-
 /*
  * Allocate and initialize an rui item with the given number of extents.
  */
-struct xfs_rui_log_item *
+STATIC struct xfs_rui_log_item *
 xfs_rui_init(
 	struct xfs_mount		*mp,
 	uint				nextents)
@@ -160,7 +157,7 @@ xfs_rui_init(
  * RUI format structure.  The RUI/RUD items were designed not to need any
  * special alignment handling.
  */
-int
+STATIC int
 xfs_rui_copy_format(
 	struct xfs_log_iovec		*buf,
 	struct xfs_rui_log_format	*dst_rui_fmt)
@@ -352,41 +349,16 @@ xfs_rmap_update_diff_items(
 		XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
 }
 
-/* Get an RUI. */
-STATIC void *
-xfs_rmap_update_create_intent(
-	struct xfs_trans		*tp,
-	unsigned int			count)
-{
-	struct xfs_rui_log_item		*ruip;
-
-	ASSERT(tp != NULL);
-	ASSERT(count > 0);
-
-	ruip = xfs_rui_init(tp->t_mountp, count);
-	ASSERT(ruip != NULL);
-
-	/*
-	 * Get a log_item_desc to point at the new item.
-	 */
-	xfs_trans_add_item(tp, &ruip->rui_item);
-	return ruip;
-}
-
 /* Log rmap updates in the intent item. */
 STATIC void
 xfs_rmap_update_log_item(
 	struct xfs_trans		*tp,
-	void				*intent,
-	struct list_head		*item)
+	struct xfs_rui_log_item		*ruip,
+	struct xfs_rmap_intent		*rmap)
 {
-	struct xfs_rui_log_item		*ruip = intent;
-	struct xfs_rmap_intent		*rmap;
 	uint				next_extent;
 	struct xfs_map_extent		*map;
 
-	rmap = container_of(item, struct xfs_rmap_intent, ri_list);
-
 	tp->t_flags |= XFS_TRANS_DIRTY;
 	set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
 
@@ -406,58 +378,64 @@ xfs_rmap_update_log_item(
 			rmap->ri_bmap.br_state);
 }
 
+static struct xfs_log_item *
+xfs_rmap_update_create_intent(
+	struct xfs_trans		*tp,
+	struct list_head		*items,
+	unsigned int			count,
+	bool				sort)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_rui_log_item		*ruip = xfs_rui_init(mp, count);
+	struct xfs_rmap_intent		*rmap;
+
+	ASSERT(count > 0);
+
+	xfs_trans_add_item(tp, &ruip->rui_item);
+	if (sort)
+		list_sort(mp, items, xfs_rmap_update_diff_items);
+	list_for_each_entry(rmap, items, ri_list)
+		xfs_rmap_update_log_item(tp, ruip, rmap);
+	return &ruip->rui_item;
+}
+
 /* Get an RUD so we can process all the deferred rmap updates. */
-STATIC void *
+static struct xfs_log_item *
 xfs_rmap_update_create_done(
 	struct xfs_trans		*tp,
-	void				*intent,
+	struct xfs_log_item		*intent,
 	unsigned int			count)
 {
-	return xfs_trans_get_rud(tp, intent);
+	return &xfs_trans_get_rud(tp, RUI_ITEM(intent))->rud_item;
 }
 
 /* Process a deferred rmap update. */
 STATIC int
 xfs_rmap_update_finish_item(
 	struct xfs_trans		*tp,
+	struct xfs_log_item		*done,
 	struct list_head		*item,
-	void				*done_item,
-	void				**state)
+	struct xfs_btree_cur		**state)
 {
 	struct xfs_rmap_intent		*rmap;
 	int				error;
 
 	rmap = container_of(item, struct xfs_rmap_intent, ri_list);
-	error = xfs_trans_log_finish_rmap_update(tp, done_item,
-			rmap->ri_type,
-			rmap->ri_owner, rmap->ri_whichfork,
-			rmap->ri_bmap.br_startoff,
-			rmap->ri_bmap.br_startblock,
-			rmap->ri_bmap.br_blockcount,
-			rmap->ri_bmap.br_state,
-			(struct xfs_btree_cur **)state);
+	error = xfs_trans_log_finish_rmap_update(tp, RUD_ITEM(done),
+			rmap->ri_type, rmap->ri_owner, rmap->ri_whichfork,
+			rmap->ri_bmap.br_startoff, rmap->ri_bmap.br_startblock,
+			rmap->ri_bmap.br_blockcount, rmap->ri_bmap.br_state,
+			state);
 	kmem_free(rmap);
 	return error;
 }
 
-/* Clean up after processing deferred rmaps. */
-STATIC void
-xfs_rmap_update_finish_cleanup(
-	struct xfs_trans	*tp,
-	void			*state,
-	int			error)
-{
-	struct xfs_btree_cur	*rcur = state;
-
-	xfs_rmap_finish_one_cleanup(tp, rcur, error);
-}
-
 /* Abort all pending RUIs. */
 STATIC void
 xfs_rmap_update_abort_intent(
-	void				*intent)
+	struct xfs_log_item	*intent)
 {
-	xfs_rui_release(intent);
+	xfs_rui_release(RUI_ITEM(intent));
 }
 
 /* Cancel a deferred rmap update. */
@@ -473,13 +451,11 @@ xfs_rmap_update_cancel_item(
 
 const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
 	.max_items	= XFS_RUI_MAX_FAST_EXTENTS,
-	.diff_items	= xfs_rmap_update_diff_items,
 	.create_intent	= xfs_rmap_update_create_intent,
 	.abort_intent	= xfs_rmap_update_abort_intent,
-	.log_item	= xfs_rmap_update_log_item,
 	.create_done	= xfs_rmap_update_create_done,
 	.finish_item	= xfs_rmap_update_finish_item,
-	.finish_cleanup = xfs_rmap_update_finish_cleanup,
+	.finish_cleanup = xfs_rmap_finish_one_cleanup,
 	.cancel_item	= xfs_rmap_update_cancel_item,
 };
 
@@ -487,24 +463,24 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
  * Process an rmap update intent item that was recovered from the log.
  * We need to update the rmapbt.
  */
-int
-xfs_rui_recover(
-	struct xfs_mount		*mp,
-	struct xfs_rui_log_item		*ruip)
+STATIC int
+xfs_rui_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*parent_tp)
 {
-	int				i;
-	int				error = 0;
+	struct xfs_rui_log_item		*ruip = RUI_ITEM(lip);
 	struct xfs_map_extent		*rmap;
-	xfs_fsblock_t			startblock_fsb;
-	bool				op_ok;
 	struct xfs_rud_log_item		*rudp;
-	enum xfs_rmap_intent_type	type;
-	int				whichfork;
-	xfs_exntst_t			state;
 	struct xfs_trans		*tp;
 	struct xfs_btree_cur		*rcur = NULL;
-
-	ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags));
+	struct xfs_mount		*mp = parent_tp->t_mountp;
+	xfs_fsblock_t			startblock_fsb;
+	enum xfs_rmap_intent_type	type;
+	xfs_exntst_t			state;
+	bool				op_ok;
+	int				i;
+	int				whichfork;
+	int				error = 0;
 
 	/*
 	 * First check the validity of the extents described by the
@@ -539,7 +515,6 @@ xfs_rui_recover(
 			 * This will pull the RUI from the AIL and
 			 * free the memory associated with it.
 			 */
-			set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
 			xfs_rui_release(ruip);
 			return -EFSCORRUPTED;
 		}
@@ -597,7 +572,6 @@ xfs_rui_recover(
 	}
 
 	xfs_rmap_finish_one_cleanup(tp, rcur, error);
-	set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
 	error = xfs_trans_commit(tp);
 	return error;
 
@@ -606,3 +580,90 @@ abort_error:
 	xfs_trans_cancel(tp);
 	return error;
 }
+
+STATIC bool
+xfs_rui_item_match(
+	struct xfs_log_item	*lip,
+	uint64_t		intent_id)
+{
+	return RUI_ITEM(lip)->rui_format.rui_id == intent_id;
+}
+
+static const struct xfs_item_ops xfs_rui_item_ops = {
+	.iop_size	= xfs_rui_item_size,
+	.iop_format	= xfs_rui_item_format,
+	.iop_unpin	= xfs_rui_item_unpin,
+	.iop_release	= xfs_rui_item_release,
+	.iop_recover	= xfs_rui_item_recover,
+	.iop_match	= xfs_rui_item_match,
+};
+
+/*
+ * This routine is called to create an in-core extent rmap update
+ * item from the rui format structure which was logged on disk.
+ * It allocates an in-core rui, copies the extents from the format
+ * structure into it, and adds the rui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_rui_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	int				error;
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_rui_log_item		*ruip;
+	struct xfs_rui_log_format	*rui_formatp;
+
+	rui_formatp = item->ri_buf[0].i_addr;
+
+	ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
+	error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
+	if (error) {
+		xfs_rui_item_free(ruip);
+		return error;
+	}
+	atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
+	/*
+	 * Insert the intent into the AIL directly and drop one reference so
+	 * that finishing or canceling the work will drop the other.
+	 */
+	xfs_trans_ail_insert(log->l_ailp, &ruip->rui_item, lsn);
+	xfs_rui_release(ruip);
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_rui_item_ops = {
+	.item_type		= XFS_LI_RUI,
+	.commit_pass2		= xlog_recover_rui_commit_pass2,
+};
+
+/*
+ * This routine is called when an RUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding RUI if it
+ * was still in the log. To do this it searches the AIL for the RUI with an id
+ * equal to that in the RUD format structure. If we find it we drop the RUD
+ * reference, which removes the RUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_rud_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_rud_log_format	*rud_formatp;
+
+	rud_formatp = item->ri_buf[0].i_addr;
+	ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
+
+	xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id);
+	return 0;
+}
+
+const struct xlog_recover_item_ops xlog_rud_item_ops = {
+	.item_type		= XFS_LI_RUD,
+	.commit_pass2		= xlog_recover_rud_commit_pass2,
+};
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
index 8708e4a5aa5c..31e6cdfff71f 100644
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -36,11 +36,6 @@ struct kmem_zone;
 #define	XFS_RUI_MAX_FAST_EXTENTS	16
 
 /*
- * Define RUI flag bits. Manipulated by set/clear/test_bit operators.
- */
-#define	XFS_RUI_RECOVERED		1
-
-/*
  * This is the "rmap update intent" log item.  It is used to log the fact that
  * some reverse mappings need to change.  It is used in conjunction with the
  * "rmap update done" log item described below.
@@ -52,7 +47,6 @@ struct xfs_rui_log_item {
 	struct xfs_log_item		rui_item;
 	atomic_t			rui_refcount;
 	atomic_t			rui_next_extent;
-	unsigned long			rui_flags;	/* misc flags */
 	struct xfs_rui_log_format	rui_format;
 };
 
@@ -77,11 +71,4 @@ struct xfs_rud_log_item {
 extern struct kmem_zone	*xfs_rui_zone;
 extern struct kmem_zone	*xfs_rud_zone;
 
-struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
-int xfs_rui_copy_format(struct xfs_log_iovec *buf,
-		struct xfs_rui_log_format *dst_rui_fmt);
-void xfs_rui_item_free(struct xfs_rui_log_item *);
-void xfs_rui_release(struct xfs_rui_log_item *);
-int xfs_rui_recover(struct xfs_mount *mp, struct xfs_rui_log_item *ruip);
-
 #endif	/* __XFS_RMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 424bb9a2d532..379cbff438bc 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -47,6 +47,39 @@ static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
 static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */
 #endif
 
+enum xfs_dax_mode {
+	XFS_DAX_INODE = 0,
+	XFS_DAX_ALWAYS = 1,
+	XFS_DAX_NEVER = 2,
+};
+
+static void
+xfs_mount_set_dax_mode(
+	struct xfs_mount	*mp,
+	enum xfs_dax_mode	mode)
+{
+	switch (mode) {
+	case XFS_DAX_INODE:
+		mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER);
+		break;
+	case XFS_DAX_ALWAYS:
+		mp->m_flags |= XFS_MOUNT_DAX_ALWAYS;
+		mp->m_flags &= ~XFS_MOUNT_DAX_NEVER;
+		break;
+	case XFS_DAX_NEVER:
+		mp->m_flags |= XFS_MOUNT_DAX_NEVER;
+		mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS;
+		break;
+	}
+}
+
+static const struct constant_table dax_param_enums[] = {
+	{"inode",	XFS_DAX_INODE },
+	{"always",	XFS_DAX_ALWAYS },
+	{"never",	XFS_DAX_NEVER },
+	{}
+};
+
 /*
  * Table driven mount option parser.
  */
@@ -59,7 +92,7 @@ enum {
 	Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
 	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
 	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
-	Opt_discard, Opt_nodiscard, Opt_dax,
+	Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum,
 };
 
 static const struct fs_parameter_spec xfs_fs_parameters[] = {
@@ -103,6 +136,7 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
 	fsparam_flag("discard",		Opt_discard),
 	fsparam_flag("nodiscard",	Opt_nodiscard),
 	fsparam_flag("dax",		Opt_dax),
+	fsparam_enum("dax",		Opt_dax_enum, dax_param_enums),
 	{}
 };
 
@@ -129,7 +163,8 @@ xfs_fs_show_options(
 		{ XFS_MOUNT_GRPID,		",grpid" },
 		{ XFS_MOUNT_DISCARD,		",discard" },
 		{ XFS_MOUNT_LARGEIO,		",largeio" },
-		{ XFS_MOUNT_DAX,		",dax" },
+		{ XFS_MOUNT_DAX_ALWAYS,		",dax=always" },
+		{ XFS_MOUNT_DAX_NEVER,		",dax=never" },
 		{ 0, NULL }
 	};
 	struct xfs_mount	*mp = XFS_M(root->d_sb);
@@ -305,7 +340,7 @@ void
 xfs_blkdev_issue_flush(
 	xfs_buftarg_t		*buftarg)
 {
-	blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
+	blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS);
 }
 
 STATIC void
@@ -702,7 +737,7 @@ xfs_fs_drop_inode(
 		return 0;
 	}
 
-	return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
+	return generic_drop_inode(inode);
 }
 
 static void
@@ -772,7 +807,8 @@ xfs_fs_statfs(
 	statp->f_blocks = sbp->sb_dblocks - lsize;
 	spin_unlock(&mp->m_sb_lock);
 
-	statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
+	/* make sure statp->f_bfree does not underflow */
+	statp->f_bfree = max_t(int64_t, fdblocks - mp->m_alloc_set_aside, 0);
 	statp->f_bavail = statp->f_bfree;
 
 	fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
@@ -838,8 +874,10 @@ xfs_restore_resvblks(struct xfs_mount *mp)
  * there is no log replay required to write the inodes to disk - this is the
  * primary difference between a sync and a quiesce.
  *
- * Note: xfs_log_quiesce() stops background log work - the callers must ensure
- * it is started again when appropriate.
+ * We cancel log work early here to ensure all transactions the log worker may
+ * run have finished before we clean up and log the superblock and write an
+ * unmount record. The unfreeze process is responsible for restarting the log
+ * worker correctly.
  */
 void
 xfs_quiesce_attr(
@@ -847,9 +885,7 @@ xfs_quiesce_attr(
 {
 	int	error = 0;
 
-	/* wait for all modifications to complete */
-	while (atomic_read(&mp->m_active_trans) > 0)
-		delay(100);
+	cancel_delayed_work_sync(&mp->m_log->l_work);
 
 	/* force the log to unpin objects from the now complete transactions */
 	xfs_log_force(mp, XFS_LOG_SYNC);
@@ -863,12 +899,6 @@ xfs_quiesce_attr(
 	if (error)
 		xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
 				"Frozen image may not be consistent.");
-	/*
-	 * Just warn here till VFS can correctly support
-	 * read-only remount without racing.
-	 */
-	WARN_ON(atomic_read(&mp->m_active_trans) != 0);
-
 	xfs_log_quiesce(mp);
 }
 
@@ -1261,7 +1291,10 @@ xfs_fc_parse_param(
 		return 0;
 #ifdef CONFIG_FS_DAX
 	case Opt_dax:
-		mp->m_flags |= XFS_MOUNT_DAX;
+		xfs_mount_set_dax_mode(mp, XFS_DAX_ALWAYS);
+		return 0;
+	case Opt_dax_enum:
+		xfs_mount_set_dax_mode(mp, result.uint_32);
 		return 0;
 #endif
 	default:
@@ -1454,7 +1487,7 @@ xfs_fc_fill_super(
 	if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
 		sb->s_flags |= SB_I_VERSION;
 
-	if (mp->m_flags & XFS_MOUNT_DAX) {
+	if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) {
 		bool rtdev_is_dax = false, datadev_is_dax;
 
 		xfs_warn(mp,
@@ -1468,7 +1501,7 @@ xfs_fc_fill_super(
 		if (!rtdev_is_dax && !datadev_is_dax) {
 			xfs_alert(mp,
 			"DAX unsupported by block device. Turning off DAX.");
-			mp->m_flags &= ~XFS_MOUNT_DAX;
+			xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
 		}
 		if (xfs_sb_version_hasreflink(&mp->m_sb)) {
 			xfs_alert(mp,
@@ -1754,7 +1787,6 @@ static int xfs_init_fs_context(
 	INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
 	spin_lock_init(&mp->m_perag_lock);
 	mutex_init(&mp->m_growlock);
-	atomic_set(&mp->m_active_trans, 0);
 	INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
 	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
 	INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 13fb4b919648..8e88a7ca387e 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -243,8 +243,7 @@ xfs_symlink(
 	 */
 	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
 
-	if (resblks)
-		resblks -= XFS_IALLOC_SPACE_RES(mp);
+	resblks -= XFS_IALLOC_SPACE_RES(mp);
 	/*
 	 * If the symlink will fit into the inode, write it inline.
 	 */
@@ -252,7 +251,7 @@ xfs_symlink(
 		xfs_init_local_fork(ip, XFS_DATA_FORK, target_path, pathlen);
 
 		ip->i_d.di_size = pathlen;
-		ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+		ip->i_df.if_format = XFS_DINODE_FMT_LOCAL;
 		xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
 	} else {
 		int	offset;
@@ -265,8 +264,7 @@ xfs_symlink(
 		if (error)
 			goto out_trans_cancel;
 
-		if (resblks)
-			resblks -= fs_blocks;
+		resblks -= fs_blocks;
 		ip->i_d.di_size = pathlen;
 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
@@ -386,7 +384,7 @@ xfs_inactive_symlink_rmt(
 	 * either 1 or 2 extents and that we can
 	 * free them all in one bunmapi call.
 	 */
-	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
+	ASSERT(ip->i_df.if_nextents > 0 && ip->i_df.if_nextents <= 2);
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
 	if (error)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a4323a63438d..460136628a79 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1897,8 +1897,8 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class,
 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
 		__entry->which = which;
 		__entry->ino = ip->i_ino;
-		__entry->format = ip->i_d.di_format;
-		__entry->nex = ip->i_d.di_nextents;
+		__entry->format = ip->i_df.if_format;
+		__entry->nex = ip->i_df.if_nextents;
 		__entry->broot_size = ip->i_df.if_broot_bytes;
 		__entry->fork_off = XFS_IFORK_BOFF(ip);
 	),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 28b983ff8b11..3c94e5ff4316 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -68,7 +68,6 @@ xfs_trans_free(
 	xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
 
 	trace_xfs_trans_free(tp, _RET_IP_);
-	atomic_dec(&tp->t_mountp->m_active_trans);
 	if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
 		sb_end_intwrite(tp->t_mountp->m_super);
 	xfs_trans_free_dqinfo(tp);
@@ -125,8 +124,6 @@ xfs_trans_dup(
 	xfs_defer_move(ntp, tp);
 
 	xfs_trans_dup_dqinfo(tp, ntp);
-
-	atomic_inc(&tp->t_mountp->m_active_trans);
 	return ntp;
 }
 
@@ -275,7 +272,6 @@ xfs_trans_alloc(
 	 */
 	WARN_ON(resp->tr_logres > 0 &&
 		mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
-	atomic_inc(&mp->m_active_trans);
 
 	tp->t_magic = XFS_TRANS_HEADER_MAGIC;
 	tp->t_flags = flags;
@@ -299,20 +295,19 @@ xfs_trans_alloc(
 
 /*
  * Create an empty transaction with no reservation.  This is a defensive
- * mechanism for routines that query metadata without actually modifying
- * them -- if the metadata being queried is somehow cross-linked (think a
- * btree block pointer that points higher in the tree), we risk deadlock.
- * However, blocks grabbed as part of a transaction can be re-grabbed.
- * The verifiers will notice the corrupt block and the operation will fail
- * back to userspace without deadlocking.
+ * mechanism for routines that query metadata without actually modifying them --
+ * if the metadata being queried is somehow cross-linked (think a btree block
+ * pointer that points higher in the tree), we risk deadlock.  However, blocks
+ * grabbed as part of a transaction can be re-grabbed.  The verifiers will
+ * notice the corrupt block and the operation will fail back to userspace
+ * without deadlocking.
  *
- * Note the zero-length reservation; this transaction MUST be cancelled
- * without any dirty data.
+ * Note the zero-length reservation; this transaction MUST be cancelled without
+ * any dirty data.
  *
- * Callers should obtain freeze protection to avoid two conflicts with fs
- * freezing: (1) having active transactions trip the m_active_trans ASSERTs;
- * and (2) grabbing buffers at the same time that freeze is trying to drain
- * the buffer LRU list.
+ * Callers should obtain freeze protection to avoid a conflict with fs freezing
+ * where we can be grabbing buffers at the same time that freeze is trying to
+ * drain the buffer LRU list.
  */
 int
 xfs_trans_alloc_empty(
@@ -534,57 +529,9 @@ xfs_trans_apply_sb_deltas(
 				  sizeof(sbp->sb_frextents) - 1);
 }
 
-STATIC int
-xfs_sb_mod8(
-	uint8_t			*field,
-	int8_t			delta)
-{
-	int8_t			counter = *field;
-
-	counter += delta;
-	if (counter < 0) {
-		ASSERT(0);
-		return -EINVAL;
-	}
-	*field = counter;
-	return 0;
-}
-
-STATIC int
-xfs_sb_mod32(
-	uint32_t		*field,
-	int32_t			delta)
-{
-	int32_t			counter = *field;
-
-	counter += delta;
-	if (counter < 0) {
-		ASSERT(0);
-		return -EINVAL;
-	}
-	*field = counter;
-	return 0;
-}
-
-STATIC int
-xfs_sb_mod64(
-	uint64_t		*field,
-	int64_t			delta)
-{
-	int64_t			counter = *field;
-
-	counter += delta;
-	if (counter < 0) {
-		ASSERT(0);
-		return -EINVAL;
-	}
-	*field = counter;
-	return 0;
-}
-
 /*
- * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
- * and apply superblock counter changes to the in-core superblock.  The
+ * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and
+ * apply superblock counter changes to the in-core superblock.  The
  * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
  * applied to the in-core superblock.  The idea is that that has already been
  * done.
@@ -593,7 +540,12 @@ xfs_sb_mod64(
  * used block counts are not updated in the on disk superblock. In this case,
  * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
  * still need to update the incore superblock with the changes.
+ *
+ * Deltas for the inode count are +/-64, hence we use a large batch size of 128
+ * so we don't need to take the counter lock on every update.
  */
+#define XFS_ICOUNT_BATCH	128
+
 void
 xfs_trans_unreserve_and_mod_sb(
 	struct xfs_trans	*tp)
@@ -629,20 +581,21 @@ xfs_trans_unreserve_and_mod_sb(
 	/* apply the per-cpu counters */
 	if (blkdelta) {
 		error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
-		if (error)
-			goto out;
+		ASSERT(!error);
 	}
 
 	if (idelta) {
-		error = xfs_mod_icount(mp, idelta);
-		if (error)
-			goto out_undo_fdblocks;
+		percpu_counter_add_batch(&mp->m_icount, idelta,
+					 XFS_ICOUNT_BATCH);
+		if (idelta < 0)
+			ASSERT(__percpu_counter_compare(&mp->m_icount, 0,
+							XFS_ICOUNT_BATCH) >= 0);
 	}
 
 	if (ifreedelta) {
-		error = xfs_mod_ifree(mp, ifreedelta);
-		if (error)
-			goto out_undo_icount;
+		percpu_counter_add(&mp->m_ifree, ifreedelta);
+		if (ifreedelta < 0)
+			ASSERT(percpu_counter_compare(&mp->m_ifree, 0) >= 0);
 	}
 
 	if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
@@ -650,95 +603,23 @@ xfs_trans_unreserve_and_mod_sb(
 
 	/* apply remaining deltas */
 	spin_lock(&mp->m_sb_lock);
-	if (rtxdelta) {
-		error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
-		if (error)
-			goto out_undo_ifree;
-	}
-
-	if (tp->t_dblocks_delta != 0) {
-		error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
-		if (error)
-			goto out_undo_frextents;
-	}
-	if (tp->t_agcount_delta != 0) {
-		error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
-		if (error)
-			goto out_undo_dblocks;
-	}
-	if (tp->t_imaxpct_delta != 0) {
-		error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
-		if (error)
-			goto out_undo_agcount;
-	}
-	if (tp->t_rextsize_delta != 0) {
-		error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
-				     tp->t_rextsize_delta);
-		if (error)
-			goto out_undo_imaxpct;
-	}
-	if (tp->t_rbmblocks_delta != 0) {
-		error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
-				     tp->t_rbmblocks_delta);
-		if (error)
-			goto out_undo_rextsize;
-	}
-	if (tp->t_rblocks_delta != 0) {
-		error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
-		if (error)
-			goto out_undo_rbmblocks;
-	}
-	if (tp->t_rextents_delta != 0) {
-		error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
-				     tp->t_rextents_delta);
-		if (error)
-			goto out_undo_rblocks;
-	}
-	if (tp->t_rextslog_delta != 0) {
-		error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
-				     tp->t_rextslog_delta);
-		if (error)
-			goto out_undo_rextents;
-	}
+	mp->m_sb.sb_frextents += rtxdelta;
+	mp->m_sb.sb_dblocks += tp->t_dblocks_delta;
+	mp->m_sb.sb_agcount += tp->t_agcount_delta;
+	mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
+	mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
+	mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
+	mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
+	mp->m_sb.sb_rextents += tp->t_rextents_delta;
+	mp->m_sb.sb_rextslog += tp->t_rextslog_delta;
 	spin_unlock(&mp->m_sb_lock);
-	return;
 
-out_undo_rextents:
-	if (tp->t_rextents_delta)
-		xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
-out_undo_rblocks:
-	if (tp->t_rblocks_delta)
-		xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
-out_undo_rbmblocks:
-	if (tp->t_rbmblocks_delta)
-		xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
-out_undo_rextsize:
-	if (tp->t_rextsize_delta)
-		xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
-out_undo_imaxpct:
-	if (tp->t_rextsize_delta)
-		xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
-out_undo_agcount:
-	if (tp->t_agcount_delta)
-		xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
-out_undo_dblocks:
-	if (tp->t_dblocks_delta)
-		xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
-out_undo_frextents:
-	if (rtxdelta)
-		xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
-out_undo_ifree:
-	spin_unlock(&mp->m_sb_lock);
-	if (ifreedelta)
-		xfs_mod_ifree(mp, -ifreedelta);
-out_undo_icount:
-	if (idelta)
-		xfs_mod_icount(mp, -idelta);
-out_undo_fdblocks:
-	if (blkdelta)
-		xfs_mod_fdblocks(mp, -blkdelta, rsvd);
-out:
-	ASSERT(error == 0);
+	/*
+	 * Debug checks outside of the spinlock so they don't lock up the
+	 * machine if they fail.
+	 */
+	ASSERT(mp->m_sb.sb_imax_pct >= 0);
+	ASSERT(mp->m_sb.sb_rextslog >= 0);
 	return;
 }
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 752c7fef9de7..8308bf6d7e40 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -59,12 +59,14 @@ struct xfs_log_item {
 #define	XFS_LI_ABORTED	1
 #define	XFS_LI_FAILED	2
 #define	XFS_LI_DIRTY	3	/* log item dirty in transaction */
+#define	XFS_LI_RECOVERED 4	/* log intent item has been recovered */
 
 #define XFS_LI_FLAGS \
 	{ (1 << XFS_LI_IN_AIL),		"IN_AIL" }, \
 	{ (1 << XFS_LI_ABORTED),	"ABORTED" }, \
 	{ (1 << XFS_LI_FAILED),		"FAILED" }, \
-	{ (1 << XFS_LI_DIRTY),		"DIRTY" }
+	{ (1 << XFS_LI_DIRTY),		"DIRTY" }, \
+	{ (1 << XFS_LI_RECOVERED),	"RECOVERED" }
 
 struct xfs_item_ops {
 	unsigned flags;
@@ -77,6 +79,8 @@ struct xfs_item_ops {
 	void (*iop_release)(struct xfs_log_item *);
 	xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
 	void (*iop_error)(struct xfs_log_item *, xfs_buf_t *);
+	int (*iop_recover)(struct xfs_log_item *lip, struct xfs_trans *tp);
+	bool (*iop_match)(struct xfs_log_item *item, uint64_t id);
 };
 
 /*
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 564253550b75..ac5019361a13 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -345,6 +345,45 @@ xfs_ail_delete(
 	xfs_trans_ail_cursor_clear(ailp, lip);
 }
 
+/*
+ * Requeue a failed buffer for writeback.
+ *
+ * We clear the log item failed state here as well, but we have to be careful
+ * about reference counts because the only active reference counts on the buffer
+ * may be the failed log items. Hence if we clear the log item failed state
+ * before queuing the buffer for IO we can release all active references to
+ * the buffer and free it, leading to use after free problems in
+ * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which
+ * order we process them in - the buffer is locked, and we own the buffer list
+ * so nothing on them is going to change while we are performing this action.
+ *
+ * Hence we can safely queue the buffer for IO before we clear the failed log
+ * item state, therefore  always having an active reference to the buffer and
+ * avoiding the transient zero-reference state that leads to use-after-free.
+ */
+static inline int
+xfsaild_resubmit_item(
+	struct xfs_log_item	*lip,
+	struct list_head	*buffer_list)
+{
+	struct xfs_buf		*bp = lip->li_buf;
+
+	if (!xfs_buf_trylock(bp))
+		return XFS_ITEM_LOCKED;
+
+	if (!xfs_buf_delwri_queue(bp, buffer_list)) {
+		xfs_buf_unlock(bp);
+		return XFS_ITEM_FLUSHING;
+	}
+
+	/* protected by ail_lock */
+	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
+		xfs_clear_li_failed(lip);
+
+	xfs_buf_unlock(bp);
+	return XFS_ITEM_SUCCESS;
+}
+
 static inline uint
 xfsaild_push_item(
 	struct xfs_ail		*ailp,
@@ -365,6 +404,8 @@ xfsaild_push_item(
 	 */
 	if (!lip->li_ops->iop_push)
 		return XFS_ITEM_PINNED;
+	if (test_bit(XFS_LI_FAILED, &lip->li_flags))
+		return xfsaild_resubmit_item(lip, &ailp->ail_buf_list);
 	return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
 }
 
@@ -774,6 +815,17 @@ xfs_trans_ail_update_bulk(
 	xfs_ail_update_finish(ailp, tail_lsn);
 }
 
+/* Insert a log item into the AIL. */
+void
+xfs_trans_ail_insert(
+	struct xfs_ail		*ailp,
+	struct xfs_log_item	*lip,
+	xfs_lsn_t		lsn)
+{
+	spin_lock(&ailp->ail_lock);
+	xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
+}
+
 /*
  * Delete one log item from the AIL.
  *
@@ -800,39 +852,19 @@ xfs_ail_delete_one(
 	return 0;
 }
 
-/**
- * Remove a log items from the AIL
- *
- * @xfs_trans_ail_delete_bulk takes an array of log items that all need to
- * removed from the AIL. The caller is already holding the AIL lock, and done
- * all the checks necessary to ensure the items passed in via @log_items are
- * ready for deletion. This includes checking that the items are in the AIL.
- *
- * For each log item to be removed, unlink it  from the AIL, clear the IN_AIL
- * flag from the item and reset the item's lsn to 0. If we remove the first
- * item in the AIL, update the log tail to match the new minimum LSN in the
- * AIL.
- *
- * This function will not drop the AIL lock until all items are removed from
- * the AIL to minimise the amount of lock traffic on the AIL. This does not
- * greatly increase the AIL hold time, but does significantly reduce the amount
- * of traffic on the lock, especially during IO completion.
- *
- * This function must be called with the AIL lock held.  The lock is dropped
- * before returning.
- */
 void
 xfs_trans_ail_delete(
-	struct xfs_ail		*ailp,
 	struct xfs_log_item	*lip,
 	int			shutdown_type)
 {
+	struct xfs_ail		*ailp = lip->li_ailp;
 	struct xfs_mount	*mp = ailp->ail_mount;
 	xfs_lsn_t		tail_lsn;
 
+	spin_lock(&ailp->ail_lock);
 	if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
 		spin_unlock(&ailp->ail_lock);
-		if (!XFS_FORCED_SHUTDOWN(mp)) {
+		if (shutdown_type && !XFS_FORCED_SHUTDOWN(mp)) {
 			xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
 	"%s: attempting to delete a log item that is not in the AIL",
 					__func__);
@@ -841,6 +873,7 @@ xfs_trans_ail_delete(
 		return;
 	}
 
+	/* xfs_ail_update_finish() drops the AIL lock */
 	tail_lsn = xfs_ail_delete_one(ailp, lip);
 	xfs_ail_update_finish(ailp, tail_lsn);
 }
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index d1b9869bc5fa..c0f73b82c055 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -388,7 +388,7 @@ xfs_trans_apply_dquot_deltas(
 			 */
 			if (d->d_id) {
 				xfs_qm_adjust_dqlimits(tp->t_mountp, dqp);
-				xfs_qm_adjust_dqtimers(tp->t_mountp, d);
+				xfs_qm_adjust_dqtimers(tp->t_mountp, dqp);
 			}
 
 			dqp->dq_flags |= XFS_DQ_DIRTY;
@@ -591,7 +591,7 @@ xfs_trans_dqresv(
 
 	xfs_dqlock(dqp);
 
-	defq = xfs_get_defquota(dqp, q);
+	defq = xfs_get_defquota(q, xfs_dquot_type(dqp));
 
 	if (flags & XFS_TRANS_DQ_RES_BLKS) {
 		hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
@@ -602,7 +602,7 @@ xfs_trans_dqresv(
 			softlimit = defq->bsoftlimit;
 		timer = be32_to_cpu(dqp->q_core.d_btimer);
 		warns = be16_to_cpu(dqp->q_core.d_bwarns);
-		warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
+		warnlimit = defq->bwarnlimit;
 		resbcountp = &dqp->q_res_bcount;
 	} else {
 		ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
@@ -614,7 +614,7 @@ xfs_trans_dqresv(
 			softlimit = defq->rtbsoftlimit;
 		timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
 		warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
-		warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
+		warnlimit = defq->rtbwarnlimit;
 		resbcountp = &dqp->q_res_rtbcount;
 	}
 
@@ -650,7 +650,7 @@ xfs_trans_dqresv(
 			total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
 			timer = be32_to_cpu(dqp->q_core.d_itimer);
 			warns = be16_to_cpu(dqp->q_core.d_iwarns);
-			warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
+			warnlimit = defq->iwarnlimit;
 			hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
 			if (!hardlimit)
 				hardlimit = defq->ihardlimit;
@@ -711,7 +711,7 @@ xfs_trans_dqresv(
 
 error_return:
 	xfs_dqunlock(dqp);
-	if (flags & XFS_QMOPT_ENOSPC)
+	if (XFS_QM_ISPDQ(dqp))
 		return -ENOSPC;
 	return -EDQUOT;
 }
@@ -751,8 +751,7 @@ xfs_trans_reserve_quota_bydquots(
 	ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
 
 	if (udqp) {
-		error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
-					(flags & ~XFS_QMOPT_ENOSPC));
+		error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, flags);
 		if (error)
 			return error;
 	}
@@ -803,16 +802,12 @@ xfs_trans_reserve_quota_nblks(
 
 	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
-	if (XFS_IS_PQUOTA_ON(mp))
-		flags |= XFS_QMOPT_ENOSPC;
 
 	ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino));
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
-				XFS_TRANS_DQ_RES_RTBLKS ||
-	       (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
-				XFS_TRANS_DQ_RES_BLKS);
+	ASSERT((flags & ~(XFS_QMOPT_FORCE_RES)) == XFS_TRANS_DQ_RES_RTBLKS ||
+	       (flags & ~(XFS_QMOPT_FORCE_RES)) == XFS_TRANS_DQ_RES_BLKS);
 
 	/*
 	 * Reserve nblks against these dquots, with trans as the mediator.
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 35655eac01a6..3004aeac9110 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -91,26 +91,13 @@ xfs_trans_ail_update(
 	xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
 }
 
+void xfs_trans_ail_insert(struct xfs_ail *ailp, struct xfs_log_item *lip,
+		xfs_lsn_t lsn);
+
 xfs_lsn_t xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
 void xfs_ail_update_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn)
 			__releases(ailp->ail_lock);
-void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
-		int shutdown_type);
-
-static inline void
-xfs_trans_ail_remove(
-	struct xfs_log_item	*lip,
-	int			shutdown_type)
-{
-	struct xfs_ail		*ailp = lip->li_ailp;
-
-	spin_lock(&ailp->ail_lock);
-	/* xfs_trans_ail_delete() drops the AIL lock */
-	if (test_bit(XFS_LI_IN_AIL, &lip->li_flags))
-		xfs_trans_ail_delete(ailp, lip, shutdown_type);
-	else
-		spin_unlock(&ailp->ail_lock);
-}
+void xfs_trans_ail_delete(struct xfs_log_item *lip, int shutdown_type);
 
 void			xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
 void			xfs_ail_push_all(struct xfs_ail *);
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index fc5d7276026e..bca48b308c02 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -12,7 +12,6 @@
 #include "xfs_inode.h"
 #include "xfs_attr.h"
 #include "xfs_acl.h"
-#include "xfs_da_format.h"
 #include "xfs_da_btree.h"
 
 #include <linux/posix_acl_xattr.h>
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 3ce9829a6936..d79b821ed1c7 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -20,6 +20,7 @@
 #include <linux/mman.h>
 #include <linux/sched/mm.h>
 #include <linux/crc32.h>
+#include <linux/task_io_accounting_ops.h>
 
 #include "zonefs.h"
 
@@ -78,10 +79,9 @@ static int zonefs_readpage(struct file *unused, struct page *page)
 	return iomap_readpage(page, &zonefs_iomap_ops);
 }
 
-static int zonefs_readpages(struct file *unused, struct address_space *mapping,
-			    struct list_head *pages, unsigned int nr_pages)
+static void zonefs_readahead(struct readahead_control *rac)
 {
-	return iomap_readpages(mapping, pages, nr_pages, &zonefs_iomap_ops);
+	iomap_readahead(rac, &zonefs_iomap_ops);
 }
 
 /*
@@ -128,7 +128,7 @@ static int zonefs_writepages(struct address_space *mapping,
 
 static const struct address_space_operations zonefs_file_aops = {
 	.readpage		= zonefs_readpage,
-	.readpages		= zonefs_readpages,
+	.readahead		= zonefs_readahead,
 	.writepage		= zonefs_writepage,
 	.writepages		= zonefs_writepages,
 	.set_page_dirty		= iomap_set_page_dirty,
@@ -478,7 +478,7 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
 	if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
 		ret = file_write_and_wait_range(file, start, end);
 	if (!ret)
-		ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+		ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
 
 	if (ret)
 		zonefs_io_error(inode, true);
@@ -596,6 +596,61 @@ static const struct iomap_dio_ops zonefs_write_dio_ops = {
 	.end_io			= zonefs_file_write_dio_end_io,
 };
 
+static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+	struct block_device *bdev = inode->i_sb->s_bdev;
+	unsigned int max;
+	struct bio *bio;
+	ssize_t size;
+	int nr_pages;
+	ssize_t ret;
+
+	nr_pages = iov_iter_npages(from, BIO_MAX_PAGES);
+	if (!nr_pages)
+		return 0;
+
+	max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
+	max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
+	iov_iter_truncate(from, max);
+
+	bio = bio_alloc_bioset(GFP_NOFS, nr_pages, &fs_bio_set);
+	if (!bio)
+		return -ENOMEM;
+
+	bio_set_dev(bio, bdev);
+	bio->bi_iter.bi_sector = zi->i_zsector;
+	bio->bi_write_hint = iocb->ki_hint;
+	bio->bi_ioprio = iocb->ki_ioprio;
+	bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
+	if (iocb->ki_flags & IOCB_DSYNC)
+		bio->bi_opf |= REQ_FUA;
+
+	ret = bio_iov_iter_get_pages(bio, from);
+	if (unlikely(ret)) {
+		bio_io_error(bio);
+		return ret;
+	}
+	size = bio->bi_iter.bi_size;
+	task_io_account_write(ret);
+
+	if (iocb->ki_flags & IOCB_HIPRI)
+		bio_set_polled(bio, iocb);
+
+	ret = submit_bio_wait(bio);
+
+	bio_put(bio);
+
+	zonefs_file_write_dio_end_io(iocb, size, ret, 0);
+	if (ret >= 0) {
+		iocb->ki_pos += size;
+		return size;
+	}
+
+	return ret;
+}
+
 /*
  * Handle direct writes. For sequential zone files, this is the only possible
  * write path. For these files, check that the user is issuing writes
@@ -611,6 +666,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
 	struct inode *inode = file_inode(iocb->ki_filp);
 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
 	struct super_block *sb = inode->i_sb;
+	bool sync = is_sync_kiocb(iocb);
+	bool append = false;
 	size_t count;
 	ssize_t ret;
 
@@ -619,7 +676,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
 	 * as this can cause write reordering (e.g. the first aio gets EAGAIN
 	 * on the inode lock but the second goes through but is now unaligned).
 	 */
-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) &&
+	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
 	    (iocb->ki_flags & IOCB_NOWAIT))
 		return -EOPNOTSUPP;
 
@@ -643,16 +700,22 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
 	}
 
 	/* Enforce sequential writes (append only) in sequential zones */
-	mutex_lock(&zi->i_truncate_mutex);
-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && iocb->ki_pos != zi->i_wpoffset) {
+	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
+		mutex_lock(&zi->i_truncate_mutex);
+		if (iocb->ki_pos != zi->i_wpoffset) {
+			mutex_unlock(&zi->i_truncate_mutex);
+			ret = -EINVAL;
+			goto inode_unlock;
+		}
 		mutex_unlock(&zi->i_truncate_mutex);
-		ret = -EINVAL;
-		goto inode_unlock;
+		append = sync;
 	}
-	mutex_unlock(&zi->i_truncate_mutex);
 
-	ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
-			   &zonefs_write_dio_ops, is_sync_kiocb(iocb));
+	if (append)
+		ret = zonefs_file_dio_append(iocb, from);
+	else
+		ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
+				   &zonefs_write_dio_ops, sync);
 	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
 	    (ret > 0 || ret == -EIOCBQUEUED)) {
 		if (ret > 0)
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index a92bea7184a8..5afb6ceb284f 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -44,6 +44,7 @@ acpi_status acpi_execute_simple_method(acpi_handle handle, char *method,
 				       u64 arg);
 acpi_status acpi_evaluate_ej0(acpi_handle handle);
 acpi_status acpi_evaluate_lck(acpi_handle handle, int lock);
+acpi_status acpi_evaluate_reg(acpi_handle handle, u8 space_id, u32 function);
 bool acpi_ata_match(acpi_handle handle);
 bool acpi_bay_match(acpi_handle handle);
 bool acpi_dock_match(acpi_handle handle);
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 49b519f36b69..1dc8d262035b 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20200326
+#define ACPI_CA_VERSION                 0x20200430
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index e3f1cddb4ac8..517a5231cc1b 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -33,6 +33,9 @@ struct ghes_estatus_node {
 	struct llist_node llnode;
 	struct acpi_hest_generic *generic;
 	struct ghes *ghes;
+
+	int task_work_cpu;
+	struct callback_head task_work;
 };
 
 struct ghes_estatus_cache {
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
index 4c74b1c1d13b..58046ddc08d0 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -17,8 +17,9 @@
 	((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
 		NULL : pud_offset(p4d, address))
 
-#define p4d_alloc(mm, pgd, address)	(pgd)
-#define p4d_offset(pgd, start)		(pgd)
+#define p4d_alloc(mm, pgd, address)		(pgd)
+#define p4d_alloc_track(mm, pgd, address, mask)	(pgd)
+#define p4d_offset(pgd, start)			(pgd)
 
 #ifndef __ASSEMBLY__
 static inline int p4d_none(p4d_t p4d)
diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h
index 34785c0f57b0..5a80f8e54300 100644
--- a/include/asm-generic/checksum.h
+++ b/include/asm-generic/checksum.h
@@ -25,15 +25,6 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum);
  */
 extern __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum);
 
-/*
- * the same as csum_partial_copy, but copies from user space.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
-					int len, __wsum sum, int *csum_err);
-
 #ifndef csum_partial_copy_nocheck
 #define csum_partial_copy_nocheck(src, dst, len, sum)	\
 	csum_partial_copy((src), (dst), (len), (sum))
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
new file mode 100644
index 000000000000..e73a11850055
--- /dev/null
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -0,0 +1,497 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
+ * Specification (TLFS):
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
+ */
+
+#ifndef _ASM_GENERIC_HYPERV_TLFS_H
+#define _ASM_GENERIC_HYPERV_TLFS_H
+
+#include <linux/types.h>
+#include <linux/bits.h>
+#include <linux/time64.h>
+
+/*
+ * While not explicitly listed in the TLFS, Hyper-V always runs with a page size
+ * of 4096. These definitions are used when communicating with Hyper-V using
+ * guest physical pages and guest physical page addresses, since the guest page
+ * size may not be 4096 on all architectures.
+ */
+#define HV_HYP_PAGE_SHIFT      12
+#define HV_HYP_PAGE_SIZE       BIT(HV_HYP_PAGE_SHIFT)
+#define HV_HYP_PAGE_MASK       (~(HV_HYP_PAGE_SIZE - 1))
+
+/*
+ * Hyper-V provides two categories of flags relevant to guest VMs.  The
+ * "Features" category indicates specific functionality that is available
+ * to guests on this particular instance of Hyper-V. The "Features"
+ * are presented in four groups, each of which is 32 bits. The group A
+ * and B definitions are common across architectures and are listed here.
+ * However, not all flags are relevant on all architectures.
+ *
+ * Groups C and D vary across architectures and are listed in the
+ * architecture specific portion of hyperv-tlfs.h. Some of these flags exist
+ * on multiple architectures, but the bit positions are different so they
+ * cannot appear in the generic portion of hyperv-tlfs.h.
+ *
+ * The "Enlightenments" category provides recommendations on whether to use
+ * specific enlightenments that are available. The Enlighenments are a single
+ * group of 32 bits, but they vary across architectures and are listed in
+ * the architecture specific portion of hyperv-tlfs.h.
+ */
+
+/*
+ * Group A Features.
+ */
+
+/* VP Runtime register available */
+#define HV_MSR_VP_RUNTIME_AVAILABLE		BIT(0)
+/* Partition Reference Counter available*/
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE		BIT(1)
+/* Basic SynIC register available */
+#define HV_MSR_SYNIC_AVAILABLE			BIT(2)
+/* Synthetic Timer registers available */
+#define HV_MSR_SYNTIMER_AVAILABLE		BIT(3)
+/* Virtual APIC assist and VP assist page registers available */
+#define HV_MSR_APIC_ACCESS_AVAILABLE		BIT(4)
+/* Hypercall and Guest OS ID registers available*/
+#define HV_MSR_HYPERCALL_AVAILABLE		BIT(5)
+/* Access virtual processor index register available*/
+#define HV_MSR_VP_INDEX_AVAILABLE		BIT(6)
+/* Virtual system reset register available*/
+#define HV_MSR_RESET_AVAILABLE			BIT(7)
+/* Access statistics page registers available */
+#define HV_MSR_STAT_PAGES_AVAILABLE		BIT(8)
+/* Partition reference TSC register is available */
+#define HV_MSR_REFERENCE_TSC_AVAILABLE		BIT(9)
+/* Partition Guest IDLE register is available */
+#define HV_MSR_GUEST_IDLE_AVAILABLE		BIT(10)
+/* Partition local APIC and TSC frequency registers available */
+#define HV_ACCESS_FREQUENCY_MSRS		BIT(11)
+/* AccessReenlightenmentControls privilege */
+#define HV_ACCESS_REENLIGHTENMENT		BIT(13)
+/* AccessTscInvariantControls privilege */
+#define HV_ACCESS_TSC_INVARIANT			BIT(15)
+
+/*
+ * Group B features.
+ */
+#define HV_CREATE_PARTITIONS			BIT(0)
+#define HV_ACCESS_PARTITION_ID			BIT(1)
+#define HV_ACCESS_MEMORY_POOL			BIT(2)
+#define HV_ADJUST_MESSAGE_BUFFERS		BIT(3)
+#define HV_POST_MESSAGES			BIT(4)
+#define HV_SIGNAL_EVENTS			BIT(5)
+#define HV_CREATE_PORT				BIT(6)
+#define HV_CONNECT_PORT				BIT(7)
+#define HV_ACCESS_STATS				BIT(8)
+#define HV_DEBUGGING				BIT(11)
+#define HV_CPU_POWER_MANAGEMENT			BIT(12)
+
+
+/*
+ * TSC page layout.
+ */
+struct ms_hyperv_tsc_page {
+	volatile u32 tsc_sequence;
+	u32 reserved1;
+	volatile u64 tsc_scale;
+	volatile s64 tsc_offset;
+} __packed;
+
+/*
+ * The guest OS needs to register the guest ID with the hypervisor.
+ * The guest ID is a 64 bit entity and the structure of this ID is
+ * specified in the Hyper-V specification:
+ *
+ * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
+ *
+ * While the current guideline does not specify how Linux guest ID(s)
+ * need to be generated, our plan is to publish the guidelines for
+ * Linux and other guest operating systems that currently are hosted
+ * on Hyper-V. The implementation here conforms to this yet
+ * unpublished guidelines.
+ *
+ *
+ * Bit(s)
+ * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
+ * 62:56 - Os Type; Linux is 0x100
+ * 55:48 - Distro specific identification
+ * 47:16 - Linux kernel version number
+ * 15:0  - Distro specific identification
+ *
+ *
+ */
+
+#define HV_LINUX_VENDOR_ID              0x8100
+
+/*
+ * Crash notification flags.
+ */
+#define HV_CRASH_CTL_CRASH_NOTIFY_MSG		BIT_ULL(62)
+#define HV_CRASH_CTL_CRASH_NOTIFY		BIT_ULL(63)
+
+/* Declare the various hypercall operations. */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE	0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST	0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT		0x0008
+#define HVCALL_SEND_IPI				0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX	0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX	0x0014
+#define HVCALL_SEND_IPI_EX			0x0015
+#define HVCALL_GET_VP_REGISTERS			0x0050
+#define HVCALL_SET_VP_REGISTERS			0x0051
+#define HVCALL_POST_MESSAGE			0x005c
+#define HVCALL_SIGNAL_EVENT			0x005d
+#define HVCALL_POST_DEBUG_DATA			0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA		0x006a
+#define HVCALL_RESET_DEBUG_SESSION		0x006b
+#define HVCALL_RETARGET_INTERRUPT		0x007e
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+
+#define HV_FLUSH_ALL_PROCESSORS			BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES	BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY	BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)
+
+enum HV_GENERIC_SET_FORMAT {
+	HV_GENERIC_SET_SPARSE_4K,
+	HV_GENERIC_SET_ALL,
+};
+
+#define HV_PARTITION_ID_SELF		((u64)-1)
+#define HV_VP_INDEX_SELF		((u32)-2)
+
+#define HV_HYPERCALL_RESULT_MASK	GENMASK_ULL(15, 0)
+#define HV_HYPERCALL_FAST_BIT		BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET	17
+#define HV_HYPERCALL_REP_COMP_OFFSET	32
+#define HV_HYPERCALL_REP_COMP_1		BIT_ULL(32)
+#define HV_HYPERCALL_REP_COMP_MASK	GENMASK_ULL(43, 32)
+#define HV_HYPERCALL_REP_START_OFFSET	48
+#define HV_HYPERCALL_REP_START_MASK	GENMASK_ULL(59, 48)
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS			0
+#define HV_STATUS_INVALID_HYPERCALL_CODE	2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT	3
+#define HV_STATUS_INVALID_ALIGNMENT		4
+#define HV_STATUS_INVALID_PARAMETER		5
+#define HV_STATUS_OPERATION_DENIED		8
+#define HV_STATUS_INSUFFICIENT_MEMORY		11
+#define HV_STATUS_INVALID_PORT_ID		17
+#define HV_STATUS_INVALID_CONNECTION_ID		18
+#define HV_STATUS_INSUFFICIENT_BUFFERS		19
+
+/*
+ * The Hyper-V TimeRefCount register and the TSC
+ * page provide a guest VM clock with 100ns tick rate
+ */
+#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
+
+/* Define the number of synthetic interrupt sources. */
+#define HV_SYNIC_SINT_COUNT		(16)
+/* Define the expected SynIC version. */
+#define HV_SYNIC_VERSION_1		(0x1)
+/* Valid SynIC vectors are 16-255. */
+#define HV_SYNIC_FIRST_VALID_VECTOR	(16)
+
+#define HV_SYNIC_CONTROL_ENABLE		(1ULL << 0)
+#define HV_SYNIC_SIMP_ENABLE		(1ULL << 0)
+#define HV_SYNIC_SIEFP_ENABLE		(1ULL << 0)
+#define HV_SYNIC_SINT_MASKED		(1ULL << 16)
+#define HV_SYNIC_SINT_AUTO_EOI		(1ULL << 17)
+#define HV_SYNIC_SINT_VECTOR_MASK	(0xFF)
+
+#define HV_SYNIC_STIMER_COUNT		(4)
+
+/* Define synthetic interrupt controller message constants. */
+#define HV_MESSAGE_SIZE			(256)
+#define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
+#define HV_MESSAGE_PAYLOAD_QWORD_COUNT	(30)
+
+/* Define synthetic interrupt controller message flags. */
+union hv_message_flags {
+	__u8 asu8;
+	struct {
+		__u8 msg_pending:1;
+		__u8 reserved:7;
+	} __packed;
+};
+
+/* Define port identifier type. */
+union hv_port_id {
+	__u32 asu32;
+	struct {
+		__u32 id:24;
+		__u32 reserved:8;
+	} __packed u;
+};
+
+/* Define synthetic interrupt controller message header. */
+struct hv_message_header {
+	__u32 message_type;
+	__u8 payload_size;
+	union hv_message_flags message_flags;
+	__u8 reserved[2];
+	union {
+		__u64 sender;
+		union hv_port_id port;
+	};
+} __packed;
+
+/* Define synthetic interrupt controller message format. */
+struct hv_message {
+	struct hv_message_header header;
+	union {
+		__u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
+	} u;
+} __packed;
+
+/* Define the synthetic interrupt message page layout. */
+struct hv_message_page {
+	struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
+} __packed;
+
+/* Define timer message payload structure. */
+struct hv_timer_message_payload {
+	__u32 timer_index;
+	__u32 reserved;
+	__u64 expiration_time;	/* When the timer expired */
+	__u64 delivery_time;	/* When the message was delivered */
+} __packed;
+
+
+/* Define synthetic interrupt controller flag constants. */
+#define HV_EVENT_FLAGS_COUNT		(256 * 8)
+#define HV_EVENT_FLAGS_LONG_COUNT	(256 / sizeof(unsigned long))
+
+/*
+ * Synthetic timer configuration.
+ */
+union hv_stimer_config {
+	u64 as_uint64;
+	struct {
+		u64 enable:1;
+		u64 periodic:1;
+		u64 lazy:1;
+		u64 auto_enable:1;
+		u64 apic_vector:8;
+		u64 direct_mode:1;
+		u64 reserved_z0:3;
+		u64 sintx:4;
+		u64 reserved_z1:44;
+	} __packed;
+};
+
+
+/* Define the synthetic interrupt controller event flags format. */
+union hv_synic_event_flags {
+	unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
+};
+
+/* Define SynIC control register. */
+union hv_synic_scontrol {
+	u64 as_uint64;
+	struct {
+		u64 enable:1;
+		u64 reserved:63;
+	} __packed;
+};
+
+/* Define synthetic interrupt source. */
+union hv_synic_sint {
+	u64 as_uint64;
+	struct {
+		u64 vector:8;
+		u64 reserved1:8;
+		u64 masked:1;
+		u64 auto_eoi:1;
+		u64 polling:1;
+		u64 reserved2:45;
+	} __packed;
+};
+
+/* Define the format of the SIMP register */
+union hv_synic_simp {
+	u64 as_uint64;
+	struct {
+		u64 simp_enabled:1;
+		u64 preserved:11;
+		u64 base_simp_gpa:52;
+	} __packed;
+};
+
+/* Define the format of the SIEFP register */
+union hv_synic_siefp {
+	u64 as_uint64;
+	struct {
+		u64 siefp_enabled:1;
+		u64 preserved:11;
+		u64 base_siefp_gpa:52;
+	} __packed;
+};
+
+struct hv_vpset {
+	u64 format;
+	u64 valid_bank_mask;
+	u64 bank_contents[];
+} __packed;
+
+/* HvCallSendSyntheticClusterIpi hypercall */
+struct hv_send_ipi {
+	u32 vector;
+	u32 reserved;
+	u64 cpu_mask;
+} __packed;
+
+/* HvCallSendSyntheticClusterIpiEx hypercall */
+struct hv_send_ipi_ex {
+	u32 vector;
+	u32 reserved;
+	struct hv_vpset vp_set;
+} __packed;
+
+/* HvFlushGuestPhysicalAddressSpace hypercalls */
+struct hv_guest_mapping_flush {
+	u64 address_space;
+	u64 flags;
+} __packed;
+
+/*
+ *  HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
+ *  by the bitwidth of "additional_pages" in union hv_gpa_page_range.
+ */
+#define HV_MAX_FLUSH_PAGES (2048)
+
+/* HvFlushGuestPhysicalAddressList hypercall */
+union hv_gpa_page_range {
+	u64 address_space;
+	struct {
+		u64 additional_pages:11;
+		u64 largepage:1;
+		u64 basepfn:52;
+	} page;
+};
+
+/*
+ * All input flush parameters should be in single page. The max flush
+ * count is equal with how many entries of union hv_gpa_page_range can
+ * be populated into the input parameter page.
+ */
+#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) /	\
+				sizeof(union hv_gpa_page_range))
+
+struct hv_guest_mapping_flush_list {
+	u64 address_space;
+	u64 flags;
+	union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
+};
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_tlb_flush {
+	u64 address_space;
+	u64 flags;
+	u64 processor_mask;
+	u64 gva_list[];
+} __packed;
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_tlb_flush_ex {
+	u64 address_space;
+	u64 flags;
+	struct hv_vpset hv_vp_set;
+	u64 gva_list[];
+} __packed;
+
+/* HvRetargetDeviceInterrupt hypercall */
+union hv_msi_entry {
+	u64 as_uint64;
+	struct {
+		u32 address;
+		u32 data;
+	} __packed;
+};
+
+struct hv_interrupt_entry {
+	u32 source;			/* 1 for MSI(-X) */
+	u32 reserved1;
+	union hv_msi_entry msi_entry;
+} __packed;
+
+/*
+ * flags for hv_device_interrupt_target.flags
+ */
+#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST		1
+#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET	2
+
+struct hv_device_interrupt_target {
+	u32 vector;
+	u32 flags;
+	union {
+		u64 vp_mask;
+		struct hv_vpset vp_set;
+	};
+} __packed;
+
+struct hv_retarget_device_interrupt {
+	u64 partition_id;		/* use "self" */
+	u64 device_id;
+	struct hv_interrupt_entry int_entry;
+	u64 reserved2;
+	struct hv_device_interrupt_target int_target;
+} __packed __aligned(8);
+
+
+/* HvGetVpRegisters hypercall input with variable size reg name list*/
+struct hv_get_vp_registers_input {
+	struct {
+		u64 partitionid;
+		u32 vpindex;
+		u8  inputvtl;
+		u8  padding[3];
+	} header;
+	struct input {
+		u32 name0;
+		u32 name1;
+	} element[];
+} __packed;
+
+
+/* HvGetVpRegisters returns an array of these output elements */
+struct hv_get_vp_registers_output {
+	union {
+		struct {
+			u32 a;
+			u32 b;
+			u32 c;
+			u32 d;
+		} as32 __packed;
+		struct {
+			u64 low;
+			u64 high;
+		} as64 __packed;
+	};
+};
+
+/* HvSetVpRegisters hypercall with variable size reg name/value list*/
+struct hv_set_vp_registers_input {
+	struct {
+		u64 partitionid;
+		u32 vpindex;
+		u8  inputvtl;
+		u8  padding[3];
+	} header;
+	struct {
+		u32 name;
+		u32 padding1;
+		u64 padding2;
+		u64 valuelow;
+		u64 valuehigh;
+	} element[];
+} __packed;
+
+#endif
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 329b8c8ca703..7056a25479d6 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -188,6 +188,23 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 }
 #endif
 
+
+/*
+ * If two threads concurrently fault at the same page, the thread that
+ * won the race updates the PTE and its local TLB/Cache. The other thread
+ * gives up, simply does nothing, and continues; on architectures where
+ * software can update TLB,  local TLB can be updated here to avoid next page
+ * fault. This function updates TLB only, do nothing with cache or others.
+ * It is the difference with function update_mmu_cache.
+ */
+#ifndef __HAVE_ARCH_UPDATE_MMU_TLB
+static inline void update_mmu_tlb(struct vm_area_struct *vma,
+				unsigned long address, pte_t *ptep)
+{
+}
+#define __HAVE_ARCH_UPDATE_MMU_TLB
+#endif
+
 /*
  * Some architectures may be able to avoid expensive synchronization
  * primitives when modifications are made to PTE's which are already
@@ -227,6 +244,22 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 }
 #endif
 
+/*
+ * On some architectures hardware does not set page access bit when accessing
+ * memory page, it is responsibilty of software setting this bit. It brings
+ * out extra page fault penalty to track page access bit. For optimization page
+ * access bit can be set during all page fault flow on these arches.
+ * To be differentiate with macro pte_mkyoung, this macro is used on platforms
+ * where software maintains page access bit.
+ */
+#ifndef pte_sw_mkyoung
+static inline pte_t pte_sw_mkyoung(pte_t pte)
+{
+	return pte;
+}
+#define pte_sw_mkyoung	pte_sw_mkyoung
+#endif
+
 #ifndef pte_savedwrite
 #define pte_savedwrite pte_write
 #endif
@@ -491,6 +524,10 @@ static inline int arch_unmap_one(struct mm_struct *mm,
 #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
 #endif
 
+#ifndef pgprot_nx
+#define pgprot_nx(prot)	(prot)
+#endif
+
 #ifndef pgprot_noncached
 #define pgprot_noncached(prot)	(prot)
 #endif
@@ -1209,6 +1246,29 @@ static inline bool arch_has_pfn_modify_check(void)
 # define PAGE_KERNEL_EXEC PAGE_KERNEL
 #endif
 
+/*
+ * Page Table Modification bits for pgtbl_mod_mask.
+ *
+ * These are used by the p?d_alloc_track*() set of functions an in the generic
+ * vmalloc/ioremap code to track at which page-table levels entries have been
+ * modified. Based on that the code can better decide when vmalloc and ioremap
+ * mapping changes need to be synchronized to other page-tables in the system.
+ */
+#define		__PGTBL_PGD_MODIFIED	0
+#define		__PGTBL_P4D_MODIFIED	1
+#define		__PGTBL_PUD_MODIFIED	2
+#define		__PGTBL_PMD_MODIFIED	3
+#define		__PGTBL_PTE_MODIFIED	4
+
+#define		PGTBL_PGD_MODIFIED	BIT(__PGTBL_PGD_MODIFIED)
+#define		PGTBL_P4D_MODIFIED	BIT(__PGTBL_P4D_MODIFIED)
+#define		PGTBL_PUD_MODIFIED	BIT(__PGTBL_PUD_MODIFIED)
+#define		PGTBL_PMD_MODIFIED	BIT(__PGTBL_PMD_MODIFIED)
+#define		PGTBL_PTE_MODIFIED	BIT(__PGTBL_PTE_MODIFIED)
+
+/* Page-Table Modification Mask */
+typedef unsigned int pgtbl_mod_mask;
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef io_remap_pfn_range
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index d1779d442aa5..66397ed10acb 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -53,6 +53,9 @@ extern char __ctors_start[], __ctors_end[];
 /* Start and end of .opd section - used for function descriptors. */
 extern char __start_opd[], __end_opd[];
 
+/* Start and end of instrumentation protected text section */
+extern char __noinstr_text_start[], __noinstr_text_end[];
+
 extern __visible const void __nosave_begin, __nosave_end;
 
 /* Function descriptor handling (if any).  Override in asm/sections.h */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 71e387a5fe90..db600ef218d7 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -541,6 +541,15 @@
 	__end_rodata = .;
 
 /*
+ * Non-instrumentable text section
+ */
+#define NOINSTR_TEXT							\
+		ALIGN_FUNCTION();					\
+		__noinstr_text_start = .;				\
+		*(.noinstr.text)					\
+		__noinstr_text_end = .;
+
+/*
  * .text section. Map to function alignment to avoid address changes
  * during second ld run in second ld pass when generating System.map
  *
@@ -551,6 +560,7 @@
 #define TEXT_TEXT							\
 		ALIGN_FUNCTION();					\
 		*(.text.hot TEXT_MAIN .text.fixup .text.unlikely)	\
+		NOINSTR_TEXT						\
 		*(.text..refcount)					\
 		*(.ref.text)						\
 	MEM_KEEP(init.text*)						\
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index d873f999b334..2b4d2b06ccbd 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -157,7 +157,7 @@ static inline int crypto_has_acomp(const char *alg_name, u32 type, u32 mask)
 {
 	type &= ~CRYPTO_ALG_TYPE_MASK;
 	type |= CRYPTO_ALG_TYPE_ACOMPRESS;
-	mask |= CRYPTO_ALG_TYPE_MASK;
+	mask |= CRYPTO_ALG_TYPE_ACOMPRESS_MASK;
 
 	return crypto_has_alg(alg_name, type, mask);
 }
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index e115f9215ed5..00a9cf98debe 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -125,6 +125,8 @@ int crypto_inst_setname(struct crypto_instance *inst, const char *name,
 void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen);
 int crypto_enqueue_request(struct crypto_queue *queue,
 			   struct crypto_async_request *request);
+void crypto_enqueue_request_head(struct crypto_queue *queue,
+				 struct crypto_async_request *request);
 struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue);
 static inline unsigned int crypto_queue_len(struct crypto_queue *queue)
 {
diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index 8c9af21efce1..c4165126937e 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -184,11 +184,7 @@ static inline size_t drbg_max_addtl(struct drbg_state *drbg)
 static inline size_t drbg_max_requests(struct drbg_state *drbg)
 {
 	/* SP800-90A requires 2**48 maximum requests before reseeding */
-#if (__BITS_PER_LONG == 32)
-	return SIZE_MAX;
-#else
-	return (1UL<<48);
-#endif
+	return (1<<20);
 }
 
 /*
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index e29cd67f93c7..3f06e40d063a 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -24,7 +24,9 @@
  * @idling: the engine is entering idle state
  * @busy: request pump is busy
  * @running: the engine is on working
- * @cur_req_prepared: current request is prepared
+ * @retry_support: indication that the hardware allows re-execution
+ * of a failed backlog request
+ * crypto-engine, in head position to keep order
  * @list: link with the global crypto engine list
  * @queue_lock: spinlock to syncronise access to request queue
  * @queue: the crypto queue of the engine
@@ -35,6 +37,8 @@
  * @unprepare_crypt_hardware: there are currently no more requests on the
  * queue so the subsystem notifies the driver that it may relax the
  * hardware by issuing this call
+ * @do_batch_requests: execute a batch of requests. Depends on multiple
+ * requests support.
  * @kworker: kthread worker struct for request pump
  * @pump_requests: work struct for scheduling work to the request pump
  * @priv_data: the engine private data
@@ -45,7 +49,8 @@ struct crypto_engine {
 	bool			idling;
 	bool			busy;
 	bool			running;
-	bool			cur_req_prepared;
+
+	bool			retry_support;
 
 	struct list_head	list;
 	spinlock_t		queue_lock;
@@ -56,6 +61,8 @@ struct crypto_engine {
 
 	int (*prepare_crypt_hardware)(struct crypto_engine *engine);
 	int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
+	int (*do_batch_requests)(struct crypto_engine *engine);
+
 
 	struct kthread_worker           *kworker;
 	struct kthread_work             pump_requests;
@@ -102,6 +109,10 @@ void crypto_finalize_skcipher_request(struct crypto_engine *engine,
 int crypto_engine_start(struct crypto_engine *engine);
 int crypto_engine_stop(struct crypto_engine *engine);
 struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt);
+struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev,
+						       bool retry_support,
+						       int (*cbk_do_batch)(struct crypto_engine *engine),
+						       bool rt, int qlen);
 int crypto_engine_exit(struct crypto_engine *engine);
 
 #endif /* _CRYPTO_ENGINE_H */
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index cee446c59497..4829d2367eda 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -856,6 +856,25 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
 			unsigned int len, u8 *out);
 
 /**
+ * crypto_shash_tfm_digest() - calculate message digest for buffer
+ * @tfm: hash transformation object
+ * @data: see crypto_shash_update()
+ * @len: see crypto_shash_update()
+ * @out: see crypto_shash_final()
+ *
+ * This is a simplified version of crypto_shash_digest() for users who don't
+ * want to allocate their own hash descriptor (shash_desc).  Instead,
+ * crypto_shash_tfm_digest() takes a hash transformation object (crypto_shash)
+ * directly, and it allocates a hash descriptor on the stack internally.
+ * Note that this stack allocation may be fairly large.
+ *
+ * Context: Any context.
+ * Return: 0 on success; < 0 if an error occurred.
+ */
+int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data,
+			    unsigned int len, u8 *out);
+
+/**
  * crypto_shash_export() - extract operational state for message digest
  * @desc: reference to the operational state handle whose state is exported
  * @out: output buffer of sufficient size that can hold the hash state
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index 5c2132c71900..10753ff71d46 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -114,6 +114,16 @@ extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
 			       unsigned int len, u8 *hash);
 
 /*
+ * An implementation of SHA-1's compression function.  Don't use in new code!
+ * You shouldn't be using SHA-1, and even if you *have* to use SHA-1, this isn't
+ * the correct way to hash something with SHA-1 (use crypto_shash instead).
+ */
+#define SHA1_DIGEST_WORDS	(SHA1_DIGEST_SIZE / 4)
+#define SHA1_WORKSPACE_WORDS	16
+void sha1_init(__u32 *buf);
+void sha1_transform(__u32 *digest, const char *data, __u32 *W);
+
+/*
  * Stand-alone implementation of the SHA256 algorithm. It is designed to
  * have as little dependencies as possible so it can be used in the
  * kexec_file purgatory. In other cases you should generally use the
@@ -123,7 +133,7 @@ extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
  * For details see lib/crypto/sha256.c
  */
 
-static inline int sha256_init(struct sha256_state *sctx)
+static inline void sha256_init(struct sha256_state *sctx)
 {
 	sctx->state[0] = SHA256_H0;
 	sctx->state[1] = SHA256_H1;
@@ -134,14 +144,11 @@ static inline int sha256_init(struct sha256_state *sctx)
 	sctx->state[6] = SHA256_H6;
 	sctx->state[7] = SHA256_H7;
 	sctx->count = 0;
-
-	return 0;
 }
-extern int sha256_update(struct sha256_state *sctx, const u8 *input,
-			 unsigned int length);
-extern int sha256_final(struct sha256_state *sctx, u8 *hash);
+void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len);
+void sha256_final(struct sha256_state *sctx, u8 *out);
 
-static inline int sha224_init(struct sha256_state *sctx)
+static inline void sha224_init(struct sha256_state *sctx)
 {
 	sctx->state[0] = SHA224_H0;
 	sctx->state[1] = SHA224_H1;
@@ -152,11 +159,8 @@ static inline int sha224_init(struct sha256_state *sctx)
 	sctx->state[6] = SHA224_H6;
 	sctx->state[7] = SHA224_H7;
 	sctx->count = 0;
-
-	return 0;
 }
-extern int sha224_update(struct sha256_state *sctx, const u8 *input,
-			 unsigned int length);
-extern int sha224_final(struct sha256_state *sctx, u8 *hash);
+void sha224_update(struct sha256_state *sctx, const u8 *data, unsigned int len);
+void sha224_final(struct sha256_state *sctx, u8 *out);
 
 #endif
diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h
index cea60cff80bd..6ded110783ae 100644
--- a/include/crypto/sha256_base.h
+++ b/include/crypto/sha256_base.h
@@ -22,14 +22,16 @@ static inline int sha224_base_init(struct shash_desc *desc)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
-	return sha224_init(sctx);
+	sha224_init(sctx);
+	return 0;
 }
 
 static inline int sha256_base_init(struct shash_desc *desc)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
-	return sha256_init(sctx);
+	sha256_init(sctx);
+	return 0;
 }
 
 static inline int sha256_base_do_update(struct shash_desc *desc,
diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h
index 3ed5dee899fd..eb259c2547af 100644
--- a/include/drm/drm_client.h
+++ b/include/drm/drm_client.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
 
 #ifndef _DRM_CLIENT_H_
 #define _DRM_CLIENT_H_
@@ -188,6 +188,6 @@ int drm_client_modeset_dpms(struct drm_client_dev *client, int mode);
 	drm_for_each_connector_iter(connector, iter) \
 		if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
 
-int drm_client_debugfs_init(struct drm_minor *minor);
+void drm_client_debugfs_init(struct drm_minor *minor);
 
 #endif
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 19ae6bb5c85b..fd543d1db9b2 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -1617,9 +1617,9 @@ struct drm_tile_group {
 };
 
 struct drm_tile_group *drm_mode_create_tile_group(struct drm_device *dev,
-						  char topology[8]);
+						  const char topology[8]);
 struct drm_tile_group *drm_mode_get_tile_group(struct drm_device *dev,
-					       char topology[8]);
+					       const char topology[8]);
 void drm_mode_put_tile_group(struct drm_device *dev,
 			     struct drm_tile_group *tg);
 
diff --git a/include/drm/drm_debugfs.h b/include/drm/drm_debugfs.h
index 7501e323d383..2188dc83957f 100644
--- a/include/drm/drm_debugfs.h
+++ b/include/drm/drm_debugfs.h
@@ -80,18 +80,16 @@ struct drm_info_node {
 };
 
 #if defined(CONFIG_DEBUG_FS)
-int drm_debugfs_create_files(const struct drm_info_list *files,
-			     int count, struct dentry *root,
-			     struct drm_minor *minor);
+void drm_debugfs_create_files(const struct drm_info_list *files,
+			      int count, struct dentry *root,
+			      struct drm_minor *minor);
 int drm_debugfs_remove_files(const struct drm_info_list *files,
 			     int count, struct drm_minor *minor);
 #else
-static inline int drm_debugfs_create_files(const struct drm_info_list *files,
-					   int count, struct dentry *root,
-					   struct drm_minor *minor)
-{
-	return 0;
-}
+static inline void drm_debugfs_create_files(const struct drm_info_list *files,
+					    int count, struct dentry *root,
+					    struct drm_minor *minor)
+{}
 
 static inline int drm_debugfs_remove_files(const struct drm_info_list *files,
 					   int count, struct drm_minor *minor)
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index bb60a949f416..a55874db9dd4 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -67,15 +67,33 @@ struct drm_device {
 	/** @dev: Device structure of bus-device */
 	struct device *dev;
 
+	/**
+	 * @managed:
+	 *
+	 * Managed resources linked to the lifetime of this &drm_device as
+	 * tracked by @ref.
+	 */
+	struct {
+		/** @managed.resources: managed resources list */
+		struct list_head resources;
+		/** @managed.final_kfree: pointer for final kfree() call */
+		void *final_kfree;
+		/** @managed.lock: protects @managed.resources */
+		spinlock_t lock;
+	} managed;
+
 	/** @driver: DRM driver managing the device */
 	struct drm_driver *driver;
 
 	/**
 	 * @dev_private:
 	 *
-	 * DRM driver private data. Instead of using this pointer it is
-	 * recommended that drivers use drm_dev_init() and embed struct
-	 * &drm_device in their larger per-device structure.
+	 * DRM driver private data. This is deprecated and should be left set to
+	 * NULL.
+	 *
+	 * Instead of using this pointer it is recommended that drivers use
+	 * drm_dev_init() and embed struct &drm_device in their larger
+	 * per-device structure.
 	 */
 	void *dev_private;
 
diff --git a/include/drm/drm_displayid.h b/include/drm/drm_displayid.h
index 9d3b745c3107..27bdd273fc4e 100644
--- a/include/drm/drm_displayid.h
+++ b/include/drm/drm_displayid.h
@@ -97,7 +97,7 @@ struct displayid_detailed_timing_block {
 	     (idx) + sizeof(struct displayid_block) <= (length) && \
 	     (idx) + sizeof(struct displayid_block) + (block)->num_bytes <= (length) && \
 	     (block)->num_bytes > 0; \
-	     (idx) += (block)->num_bytes + sizeof(struct displayid_block), \
+	     (idx) += sizeof(struct displayid_block) + (block)->num_bytes, \
 	     (block) = (struct displayid_block *)&(displayid)[idx])
 
 #endif
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index c6119e4c169a..2035ac44afde 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -292,7 +292,7 @@
 #define DP_DSC_PEAK_THROUGHPUT              0x06B
 # define DP_DSC_THROUGHPUT_MODE_0_MASK      (0xf << 0)
 # define DP_DSC_THROUGHPUT_MODE_0_SHIFT     0
-# define DP_DSC_THROUGHPUT_MODE_0_UPSUPPORTED 0
+# define DP_DSC_THROUGHPUT_MODE_0_UNSUPPORTED 0
 # define DP_DSC_THROUGHPUT_MODE_0_340       (1 << 0)
 # define DP_DSC_THROUGHPUT_MODE_0_400       (2 << 0)
 # define DP_DSC_THROUGHPUT_MODE_0_450       (3 << 0)
@@ -310,7 +310,7 @@
 # define DP_DSC_THROUGHPUT_MODE_0_170       (15 << 0) /* 1.4a */
 # define DP_DSC_THROUGHPUT_MODE_1_MASK      (0xf << 4)
 # define DP_DSC_THROUGHPUT_MODE_1_SHIFT     4
-# define DP_DSC_THROUGHPUT_MODE_1_UPSUPPORTED 0
+# define DP_DSC_THROUGHPUT_MODE_1_UNSUPPORTED 0
 # define DP_DSC_THROUGHPUT_MODE_1_340       (1 << 4)
 # define DP_DSC_THROUGHPUT_MODE_1_400       (2 << 4)
 # define DP_DSC_THROUGHPUT_MODE_1_450       (3 << 4)
@@ -701,7 +701,16 @@
 # define DP_TEST_CRC_SUPPORTED		    (1 << 5)
 # define DP_TEST_COUNT_MASK		    0xf
 
-#define DP_TEST_PHY_PATTERN                 0x248
+#define DP_PHY_TEST_PATTERN                 0x248
+# define DP_PHY_TEST_PATTERN_SEL_MASK       0x7
+# define DP_PHY_TEST_PATTERN_NONE           0x0
+# define DP_PHY_TEST_PATTERN_D10_2          0x1
+# define DP_PHY_TEST_PATTERN_ERROR_COUNT    0x2
+# define DP_PHY_TEST_PATTERN_PRBS7          0x3
+# define DP_PHY_TEST_PATTERN_80BIT_CUSTOM   0x4
+# define DP_PHY_TEST_PATTERN_CP2520         0x5
+
+#define DP_TEST_HBR2_SCRAMBLER_RESET        0x24A
 #define DP_TEST_80BIT_CUSTOM_PATTERN_7_0    0x250
 #define	DP_TEST_80BIT_CUSTOM_PATTERN_15_8   0x251
 #define	DP_TEST_80BIT_CUSTOM_PATTERN_23_16  0x252
@@ -1209,6 +1218,139 @@ struct dp_sdp {
 #define EDP_VSC_PSR_UPDATE_RFB		(1<<1)
 #define EDP_VSC_PSR_CRC_VALUES_VALID	(1<<2)
 
+/**
+ * enum dp_pixelformat - drm DP Pixel encoding formats
+ *
+ * This enum is used to indicate DP VSC SDP Pixel encoding formats.
+ * It is based on DP 1.4 spec [Table 2-117: VSC SDP Payload for DB16 through
+ * DB18]
+ *
+ * @DP_PIXELFORMAT_RGB: RGB pixel encoding format
+ * @DP_PIXELFORMAT_YUV444: YCbCr 4:4:4 pixel encoding format
+ * @DP_PIXELFORMAT_YUV422: YCbCr 4:2:2 pixel encoding format
+ * @DP_PIXELFORMAT_YUV420: YCbCr 4:2:0 pixel encoding format
+ * @DP_PIXELFORMAT_Y_ONLY: Y Only pixel encoding format
+ * @DP_PIXELFORMAT_RAW: RAW pixel encoding format
+ * @DP_PIXELFORMAT_RESERVED: Reserved pixel encoding format
+ */
+enum dp_pixelformat {
+	DP_PIXELFORMAT_RGB = 0,
+	DP_PIXELFORMAT_YUV444 = 0x1,
+	DP_PIXELFORMAT_YUV422 = 0x2,
+	DP_PIXELFORMAT_YUV420 = 0x3,
+	DP_PIXELFORMAT_Y_ONLY = 0x4,
+	DP_PIXELFORMAT_RAW = 0x5,
+	DP_PIXELFORMAT_RESERVED = 0x6,
+};
+
+/**
+ * enum dp_colorimetry - drm DP Colorimetry formats
+ *
+ * This enum is used to indicate DP VSC SDP Colorimetry formats.
+ * It is based on DP 1.4 spec [Table 2-117: VSC SDP Payload for DB16 through
+ * DB18] and a name of enum member follows DRM_MODE_COLORIMETRY definition.
+ *
+ * @DP_COLORIMETRY_DEFAULT: sRGB (IEC 61966-2-1) or
+ *                          ITU-R BT.601 colorimetry format
+ * @DP_COLORIMETRY_RGB_WIDE_FIXED: RGB wide gamut fixed point colorimetry format
+ * @DP_COLORIMETRY_BT709_YCC: ITU-R BT.709 colorimetry format
+ * @DP_COLORIMETRY_RGB_WIDE_FLOAT: RGB wide gamut floating point
+ *                                 (scRGB (IEC 61966-2-2)) colorimetry format
+ * @DP_COLORIMETRY_XVYCC_601: xvYCC601 colorimetry format
+ * @DP_COLORIMETRY_OPRGB: OpRGB colorimetry format
+ * @DP_COLORIMETRY_XVYCC_709: xvYCC709 colorimetry format
+ * @DP_COLORIMETRY_DCI_P3_RGB: DCI-P3 (SMPTE RP 431-2) colorimetry format
+ * @DP_COLORIMETRY_SYCC_601: sYCC601 colorimetry format
+ * @DP_COLORIMETRY_RGB_CUSTOM: RGB Custom Color Profile colorimetry format
+ * @DP_COLORIMETRY_OPYCC_601: opYCC601 colorimetry format
+ * @DP_COLORIMETRY_BT2020_RGB: ITU-R BT.2020 R' G' B' colorimetry format
+ * @DP_COLORIMETRY_BT2020_CYCC: ITU-R BT.2020 Y'c C'bc C'rc colorimetry format
+ * @DP_COLORIMETRY_BT2020_YCC: ITU-R BT.2020 Y' C'b C'r colorimetry format
+ */
+enum dp_colorimetry {
+	DP_COLORIMETRY_DEFAULT = 0,
+	DP_COLORIMETRY_RGB_WIDE_FIXED = 0x1,
+	DP_COLORIMETRY_BT709_YCC = 0x1,
+	DP_COLORIMETRY_RGB_WIDE_FLOAT = 0x2,
+	DP_COLORIMETRY_XVYCC_601 = 0x2,
+	DP_COLORIMETRY_OPRGB = 0x3,
+	DP_COLORIMETRY_XVYCC_709 = 0x3,
+	DP_COLORIMETRY_DCI_P3_RGB = 0x4,
+	DP_COLORIMETRY_SYCC_601 = 0x4,
+	DP_COLORIMETRY_RGB_CUSTOM = 0x5,
+	DP_COLORIMETRY_OPYCC_601 = 0x5,
+	DP_COLORIMETRY_BT2020_RGB = 0x6,
+	DP_COLORIMETRY_BT2020_CYCC = 0x6,
+	DP_COLORIMETRY_BT2020_YCC = 0x7,
+};
+
+/**
+ * enum dp_dynamic_range - drm DP Dynamic Range
+ *
+ * This enum is used to indicate DP VSC SDP Dynamic Range.
+ * It is based on DP 1.4 spec [Table 2-117: VSC SDP Payload for DB16 through
+ * DB18]
+ *
+ * @DP_DYNAMIC_RANGE_VESA: VESA range
+ * @DP_DYNAMIC_RANGE_CTA: CTA range
+ */
+enum dp_dynamic_range {
+	DP_DYNAMIC_RANGE_VESA = 0,
+	DP_DYNAMIC_RANGE_CTA = 1,
+};
+
+/**
+ * enum dp_content_type - drm DP Content Type
+ *
+ * This enum is used to indicate DP VSC SDP Content Types.
+ * It is based on DP 1.4 spec [Table 2-117: VSC SDP Payload for DB16 through
+ * DB18]
+ * CTA-861-G defines content types and expected processing by a sink device
+ *
+ * @DP_CONTENT_TYPE_NOT_DEFINED: Not defined type
+ * @DP_CONTENT_TYPE_GRAPHICS: Graphics type
+ * @DP_CONTENT_TYPE_PHOTO: Photo type
+ * @DP_CONTENT_TYPE_VIDEO: Video type
+ * @DP_CONTENT_TYPE_GAME: Game type
+ */
+enum dp_content_type {
+	DP_CONTENT_TYPE_NOT_DEFINED = 0x00,
+	DP_CONTENT_TYPE_GRAPHICS = 0x01,
+	DP_CONTENT_TYPE_PHOTO = 0x02,
+	DP_CONTENT_TYPE_VIDEO = 0x03,
+	DP_CONTENT_TYPE_GAME = 0x04,
+};
+
+/**
+ * struct drm_dp_vsc_sdp - drm DP VSC SDP
+ *
+ * This structure represents a DP VSC SDP of drm
+ * It is based on DP 1.4 spec [Table 2-116: VSC SDP Header Bytes] and
+ * [Table 2-117: VSC SDP Payload for DB16 through DB18]
+ *
+ * @sdp_type: secondary-data packet type
+ * @revision: revision number
+ * @length: number of valid data bytes
+ * @pixelformat: pixel encoding format
+ * @colorimetry: colorimetry format
+ * @bpc: bit per color
+ * @dynamic_range: dynamic range information
+ * @content_type: CTA-861-G defines content types and expected processing by a sink device
+ */
+struct drm_dp_vsc_sdp {
+	unsigned char sdp_type;
+	unsigned char revision;
+	unsigned char length;
+	enum dp_pixelformat pixelformat;
+	enum dp_colorimetry colorimetry;
+	int bpc;
+	enum dp_dynamic_range dynamic_range;
+	enum dp_content_type content_type;
+};
+
+void drm_dp_vsc_sdp_log(const char *level, struct device *dev,
+			const struct drm_dp_vsc_sdp *vsc);
+
 int drm_dp_psr_setup_time(const u8 psr_cap[EDP_PSR_RECEIVER_CAP_SIZE]);
 
 static inline int
@@ -1548,6 +1690,13 @@ enum drm_dp_quirk {
 	 * capabilities advertised.
 	 */
 	DP_QUIRK_FORCE_DPCD_BACKLIGHT,
+	/**
+	 * @DP_DPCD_QUIRK_CAN_DO_MAX_LINK_RATE_3_24_GBPS:
+	 *
+	 * The device supports a link rate of 3.24 Gbps (multiplier 0xc) despite
+	 * the DP_MAX_LINK_RATE register reporting a lower max multiplier.
+	 */
+	DP_DPCD_QUIRK_CAN_DO_MAX_LINK_RATE_3_24_GBPS,
 };
 
 /**
@@ -1598,4 +1747,26 @@ static inline void drm_dp_cec_unset_edid(struct drm_dp_aux *aux)
 
 #endif
 
+/**
+ * struct drm_dp_phy_test_params - DP Phy Compliance parameters
+ * @link_rate: Requested Link rate from DPCD 0x219
+ * @num_lanes: Number of lanes requested by sing through DPCD 0x220
+ * @phy_pattern: DP Phy test pattern from DPCD 0x248
+ * @hb2_reset: DP HBR2_COMPLIANCE_SCRAMBLER_RESET from DCPD 0x24A and 0x24B
+ * @custom80: DP Test_80BIT_CUSTOM_PATTERN from DPCDs 0x250 through 0x259
+ * @enhanced_frame_cap: flag for enhanced frame capability.
+ */
+struct drm_dp_phy_test_params {
+	int link_rate;
+	u8 num_lanes;
+	u8 phy_pattern;
+	u8 hbr2_reset[2];
+	u8 custom80[10];
+	bool enhanced_frame_cap;
+};
+
+int drm_dp_get_phy_test_pattern(struct drm_dp_aux *aux,
+				struct drm_dp_phy_test_params *data);
+int drm_dp_set_phy_test_pattern(struct drm_dp_aux *aux,
+				struct drm_dp_phy_test_params *data, u8 dp_rev);
 #endif /* _DRM_DP_HELPER_H_ */
diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index 3cde42b333c3..9e1ffcd7cb68 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -157,19 +157,45 @@ struct drm_dp_mst_port {
 	 */
 	bool has_audio;
 
+	/**
+	 * @fec_capable: bool indicating if FEC can be supported up to that
+	 * point in the MST topology.
+	 */
 	bool fec_capable;
 };
 
+/* sideband msg header - not bit struct */
+struct drm_dp_sideband_msg_hdr {
+	u8 lct;
+	u8 lcr;
+	u8 rad[8];
+	bool broadcast;
+	bool path_msg;
+	u8 msg_len;
+	bool somt;
+	bool eomt;
+	bool seqno;
+};
+
+struct drm_dp_sideband_msg_rx {
+	u8 chunk[48];
+	u8 msg[256];
+	u8 curchunk_len;
+	u8 curchunk_idx; /* chunk we are parsing now */
+	u8 curchunk_hdrlen;
+	u8 curlen; /* total length of the msg */
+	bool have_somt;
+	bool have_eomt;
+	struct drm_dp_sideband_msg_hdr initial_hdr;
+};
+
 /**
  * struct drm_dp_mst_branch - MST branch device.
  * @rad: Relative Address to talk to this branch device.
  * @lct: Link count total to talk to this branch device.
  * @num_ports: number of ports on the branch.
- * @msg_slots: one bit per transmitted msg slot.
  * @port_parent: pointer to the port parent, NULL if toplevel.
  * @mgr: topology manager for this branch device.
- * @tx_slots: transmission slots for this device.
- * @last_seqno: last sequence number used to talk to this.
  * @link_address_sent: if a link address message has been sent to this device yet.
  * @guid: guid for DP 1.2 branch device. port under this branch can be
  * identified by port #.
@@ -210,7 +236,6 @@ struct drm_dp_mst_branch {
 	u8 lct;
 	int num_ports;
 
-	int msg_slots;
 	/**
 	 * @ports: the list of ports on this branch device. This should be
 	 * considered protected for reading by &drm_dp_mst_topology_mgr.lock.
@@ -223,13 +248,9 @@ struct drm_dp_mst_branch {
 	 */
 	struct list_head ports;
 
-	/* list of tx ops queue for this port */
 	struct drm_dp_mst_port *port_parent;
 	struct drm_dp_mst_topology_mgr *mgr;
 
-	/* slots are protected by mstb->mgr->qlock */
-	struct drm_dp_sideband_msg_tx *tx_slots[2];
-	int last_seqno;
 	bool link_address_sent;
 
 	/* global unique identifier to identify branch devices */
@@ -237,19 +258,6 @@ struct drm_dp_mst_branch {
 };
 
 
-/* sideband msg header - not bit struct */
-struct drm_dp_sideband_msg_hdr {
-	u8 lct;
-	u8 lcr;
-	u8 rad[8];
-	bool broadcast;
-	bool path_msg;
-	u8 msg_len;
-	bool somt;
-	bool eomt;
-	bool seqno;
-};
-
 struct drm_dp_nak_reply {
 	u8 guid[16];
 	u8 reason;
@@ -306,18 +314,6 @@ struct drm_dp_remote_i2c_write_ack_reply {
 };
 
 
-struct drm_dp_sideband_msg_rx {
-	u8 chunk[48];
-	u8 msg[256];
-	u8 curchunk_len;
-	u8 curchunk_idx; /* chunk we are parsing now */
-	u8 curchunk_hdrlen;
-	u8 curlen; /* total length of the msg */
-	bool have_somt;
-	bool have_eomt;
-	struct drm_dp_sideband_msg_hdr initial_hdr;
-};
-
 #define DRM_DP_MAX_SDP_STREAMS 16
 struct drm_dp_allocate_payload {
 	u8 port_number;
@@ -479,8 +475,6 @@ struct drm_dp_mst_topology_mgr;
 struct drm_dp_mst_topology_cbs {
 	/* create a connector for a port */
 	struct drm_connector *(*add_connector)(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port, const char *path);
-	void (*destroy_connector)(struct drm_dp_mst_topology_mgr *mgr,
-				  struct drm_connector *connector);
 };
 
 #define DP_MAX_PAYLOAD (sizeof(unsigned long) * 8)
@@ -556,15 +550,17 @@ struct drm_dp_mst_topology_mgr {
 	int conn_base_id;
 
 	/**
-	 * @down_rep_recv: Message receiver state for down replies.
-	 */
-	struct drm_dp_sideband_msg_rx down_rep_recv;
-	/**
 	 * @up_req_recv: Message receiver state for up requests.
 	 */
 	struct drm_dp_sideband_msg_rx up_req_recv;
 
 	/**
+	 * @down_rep_recv: Message receiver state for replies to down
+	 * requests.
+	 */
+	struct drm_dp_sideband_msg_rx down_rep_recv;
+
+	/**
 	 * @lock: protects @mst_state, @mst_primary, @dpcd, and
 	 * @payload_id_table_cleared.
 	 */
@@ -590,11 +586,6 @@ struct drm_dp_mst_topology_mgr {
 	bool payload_id_table_cleared : 1;
 
 	/**
-	 * @is_waiting_for_dwn_reply: whether we're waiting for a down reply.
-	 */
-	bool is_waiting_for_dwn_reply : 1;
-
-	/**
 	 * @mst_primary: Pointer to the primary/first branch device.
 	 */
 	struct drm_dp_mst_branch *mst_primary;
@@ -618,13 +609,12 @@ struct drm_dp_mst_topology_mgr {
 	const struct drm_private_state_funcs *funcs;
 
 	/**
-	 * @qlock: protects @tx_msg_downq, the &drm_dp_mst_branch.txslost and
-	 * &drm_dp_sideband_msg_tx.state once they are queued
+	 * @qlock: protects @tx_msg_downq and &drm_dp_sideband_msg_tx.state
 	 */
 	struct mutex qlock;
 
 	/**
-	 * @tx_msg_downq: List of pending down replies.
+	 * @tx_msg_downq: List of pending down requests
 	 */
 	struct list_head tx_msg_downq;
 
@@ -734,8 +724,6 @@ drm_dp_mst_detect_port(struct drm_connector *connector,
 		       struct drm_dp_mst_topology_mgr *mgr,
 		       struct drm_dp_mst_port *port);
 
-bool drm_dp_mst_port_has_audio(struct drm_dp_mst_topology_mgr *mgr,
-					struct drm_dp_mst_port *port);
 struct edid *drm_dp_mst_get_edid(struct drm_connector *connector, struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port);
 
 
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 97109df5beac..6d457652f199 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -262,9 +262,11 @@ struct drm_driver {
 	 * @release:
 	 *
 	 * Optional callback for destroying device data after the final
-	 * reference is released, i.e. the device is being destroyed. Drivers
-	 * using this callback are responsible for calling drm_dev_fini()
-	 * to finalize the device and then freeing the struct themselves.
+	 * reference is released, i.e. the device is being destroyed.
+	 *
+	 * This is deprecated, clean up all memory allocations associated with a
+	 * &drm_device using drmm_add_action(), drmm_kmalloc() and related
+	 * managed resources functions.
 	 */
 	void (*release) (struct drm_device *);
 
@@ -323,7 +325,7 @@ struct drm_driver {
 	 *
 	 * Allows drivers to create driver-specific debugfs files.
 	 */
-	int (*debugfs_init)(struct drm_minor *minor);
+	void (*debugfs_init)(struct drm_minor *minor);
 
 	/**
 	 * @gem_free_object: deconstructor for drm_gem_objects
@@ -620,7 +622,39 @@ int drm_dev_init(struct drm_device *dev,
 int devm_drm_dev_init(struct device *parent,
 		      struct drm_device *dev,
 		      struct drm_driver *driver);
-void drm_dev_fini(struct drm_device *dev);
+
+void *__devm_drm_dev_alloc(struct device *parent, struct drm_driver *driver,
+			   size_t size, size_t offset);
+
+/**
+ * devm_drm_dev_alloc - Resource managed allocation of a &drm_device instance
+ * @parent: Parent device object
+ * @driver: DRM driver
+ * @type: the type of the struct which contains struct &drm_device
+ * @member: the name of the &drm_device within @type.
+ *
+ * This allocates and initialize a new DRM device. No device registration is done.
+ * Call drm_dev_register() to advertice the device to user space and register it
+ * with other core subsystems. This should be done last in the device
+ * initialization sequence to make sure userspace can't access an inconsistent
+ * state.
+ *
+ * The initial ref-count of the object is 1. Use drm_dev_get() and
+ * drm_dev_put() to take and drop further ref-counts.
+ *
+ * It is recommended that drivers embed &struct drm_device into their own device
+ * structure.
+ *
+ * Note that this manages the lifetime of the resulting &drm_device
+ * automatically using devres. The DRM device initialized with this function is
+ * automatically put on driver detach using drm_dev_put().
+ *
+ * RETURNS:
+ * Pointer to new DRM device, or ERR_PTR on failure.
+ */
+#define devm_drm_dev_alloc(parent, driver, type, member) \
+	((type *) __devm_drm_dev_alloc(parent, driver, sizeof(type), \
+				       offsetof(type, member)))
 
 struct drm_device *drm_dev_alloc(struct drm_driver *driver,
 				 struct device *parent);
diff --git a/include/drm/drm_encoder.h b/include/drm/drm_encoder.h
index 4370e039c015..a60f5f1555ac 100644
--- a/include/drm/drm_encoder.h
+++ b/include/drm/drm_encoder.h
@@ -142,7 +142,7 @@ struct drm_encoder {
 	 * the bits for all &drm_crtc objects this encoder can be connected to
 	 * before calling drm_dev_register().
 	 *
-	 * In reality almost every driver gets this wrong.
+	 * You will get a WARN if you get this wrong in the driver.
 	 *
 	 * Note that since CRTC objects can't be hotplugged the assigned indices
 	 * are stable and hence known before registering all objects.
@@ -159,7 +159,11 @@ struct drm_encoder {
 	 * encoders can be used in a cloned configuration, they both should have
 	 * each another bits set.
 	 *
-	 * In reality almost every driver gets this wrong.
+	 * As an exception to the above rule if the driver doesn't implement
+	 * any cloning it can leave @possible_clones set to 0. The core will
+	 * automagically fix this up by setting the bit for the encoder itself.
+	 *
+	 * You will get a WARN if you get this wrong in the driver.
 	 *
 	 * Note that since encoder objects can't be hotplugged the assigned indices
 	 * are stable and hence known before registering all objects.
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 208dbf87afa3..306aa3a60be9 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -269,7 +269,8 @@ int drm_fb_helper_debug_leave(struct fb_info *info);
 void drm_fb_helper_lastclose(struct drm_device *dev);
 void drm_fb_helper_output_poll_changed(struct drm_device *dev);
 
-int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp);
+void drm_fbdev_generic_setup(struct drm_device *dev,
+			     unsigned int preferred_bpp);
 #else
 static inline void drm_fb_helper_prepare(struct drm_device *dev,
 					struct drm_fb_helper *helper,
@@ -443,10 +444,9 @@ static inline void drm_fb_helper_output_poll_changed(struct drm_device *dev)
 {
 }
 
-static inline int
+static inline void
 drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
 {
-	return 0;
 }
 
 #endif
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 5aaf1c4593a9..716990bace10 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -202,6 +202,17 @@ struct drm_file {
 	bool writeback_connectors;
 
 	/**
+	 * @was_master:
+	 *
+	 * This client has or had, master capability. Protected by struct
+	 * &drm_device.master_mutex.
+	 *
+	 * This is used to ensure that CAP_SYS_ADMIN is not enforced, if the
+	 * client is or was master in the past.
+	 */
+	bool was_master;
+
+	/**
 	 * @is_master:
 	 *
 	 * This client is the creator of @master. Protected by struct
diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index c0e0256e3e98..be658ebbec72 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -297,4 +297,42 @@ int drm_framebuffer_plane_width(int width,
 int drm_framebuffer_plane_height(int height,
 				 const struct drm_framebuffer *fb, int plane);
 
+/**
+ * struct drm_afbc_framebuffer - a special afbc frame buffer object
+ *
+ * A derived class of struct drm_framebuffer, dedicated for afbc use cases.
+ */
+struct drm_afbc_framebuffer {
+	/**
+	 * @base: base framebuffer structure.
+	 */
+	struct drm_framebuffer base;
+	/**
+	 * @block_width: width of a single afbc block
+	 */
+	u32 block_width;
+	/**
+	 * @block_height: height of a single afbc block
+	 */
+	u32 block_height;
+	/**
+	 * @aligned_width: aligned frame buffer width
+	 */
+	u32 aligned_width;
+	/**
+	 * @aligned_height: aligned frame buffer height
+	 */
+	u32 aligned_height;
+	/**
+	 * @offset: offset of the first afbc header
+	 */
+	u32 offset;
+	/**
+	 * @afbc_size: minimum size of afbc buffer
+	 */
+	u32 afbc_size;
+};
+
+#define fb_to_afbc_fb(x) container_of(x, struct drm_afbc_framebuffer, base)
+
 #endif
diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h
index d9f13fd25b0a..6b013154911d 100644
--- a/include/drm/drm_gem_framebuffer_helper.h
+++ b/include/drm/drm_gem_framebuffer_helper.h
@@ -1,6 +1,7 @@
 #ifndef __DRM_GEM_FB_HELPER_H__
 #define __DRM_GEM_FB_HELPER_H__
 
+struct drm_afbc_framebuffer;
 struct drm_device;
 struct drm_fb_helper_surface_size;
 struct drm_file;
@@ -12,12 +13,19 @@ struct drm_plane;
 struct drm_plane_state;
 struct drm_simple_display_pipe;
 
+#define AFBC_VENDOR_AND_TYPE_MASK	GENMASK_ULL(63, 52)
+
 struct drm_gem_object *drm_gem_fb_get_obj(struct drm_framebuffer *fb,
 					  unsigned int plane);
 void drm_gem_fb_destroy(struct drm_framebuffer *fb);
 int drm_gem_fb_create_handle(struct drm_framebuffer *fb, struct drm_file *file,
 			     unsigned int *handle);
 
+int drm_gem_fb_init_with_funcs(struct drm_device *dev,
+			       struct drm_framebuffer *fb,
+			       struct drm_file *file,
+			       const struct drm_mode_fb_cmd2 *mode_cmd,
+			       const struct drm_framebuffer_funcs *funcs);
 struct drm_framebuffer *
 drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file,
 			     const struct drm_mode_fb_cmd2 *mode_cmd,
@@ -29,6 +37,13 @@ struct drm_framebuffer *
 drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file,
 			     const struct drm_mode_fb_cmd2 *mode_cmd);
 
+#define drm_is_afbc(modifier) \
+	(((modifier) & AFBC_VENDOR_AND_TYPE_MASK) == DRM_FORMAT_MOD_ARM_AFBC(0))
+
+int drm_gem_fb_afbc_init(struct drm_device *dev,
+			 const struct drm_mode_fb_cmd2 *mode_cmd,
+			 struct drm_afbc_framebuffer *afbc_fb);
+
 int drm_gem_fb_prepare_fb(struct drm_plane *plane,
 			  struct drm_plane_state *state);
 int drm_gem_fb_simple_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h
index 0f6e47213d8d..b63bcd1b996d 100644
--- a/include/drm/drm_gem_vram_helper.h
+++ b/include/drm/drm_gem_vram_helper.h
@@ -196,7 +196,7 @@ static inline struct drm_vram_mm *drm_vram_mm_of_bdev(
 	return container_of(bdev, struct drm_vram_mm, bdev);
 }
 
-int drm_vram_mm_debugfs_init(struct drm_minor *minor);
+void drm_vram_mm_debugfs_init(struct drm_minor *minor);
 
 /*
  * Helpers for integration with struct drm_device
diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h
index aed382c17b26..852d7451eeb1 100644
--- a/include/drm/drm_legacy.h
+++ b/include/drm/drm_legacy.h
@@ -194,11 +194,26 @@ void drm_legacy_idlelock_release(struct drm_lock_data *lock);
 
 #ifdef CONFIG_PCI
 
+struct drm_dma_handle *drm_pci_alloc(struct drm_device *dev, size_t size,
+				     size_t align);
+void drm_pci_free(struct drm_device *dev, struct drm_dma_handle *dmah);
+
 int drm_legacy_pci_init(struct drm_driver *driver, struct pci_driver *pdriver);
 void drm_legacy_pci_exit(struct drm_driver *driver, struct pci_driver *pdriver);
 
 #else
 
+static inline struct drm_dma_handle *drm_pci_alloc(struct drm_device *dev,
+						   size_t size, size_t align)
+{
+	return NULL;
+}
+
+static inline void drm_pci_free(struct drm_device *dev,
+				struct drm_dma_handle *dmah)
+{
+}
+
 static inline int drm_legacy_pci_init(struct drm_driver *driver,
 				      struct pci_driver *pdriver)
 {
diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
new file mode 100644
index 000000000000..ca4114633bf9
--- /dev/null
+++ b/include/drm/drm_managed.h
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _DRM_MANAGED_H_
+#define _DRM_MANAGED_H_
+
+#include <linux/gfp.h>
+#include <linux/overflow.h>
+#include <linux/types.h>
+
+struct drm_device;
+
+typedef void (*drmres_release_t)(struct drm_device *dev, void *res);
+
+/**
+ * drmm_add_action - add a managed release action to a &drm_device
+ * @dev: DRM device
+ * @action: function which should be called when @dev is released
+ * @data: opaque pointer, passed to @action
+ *
+ * This function adds the @release action with optional parameter @data to the
+ * list of cleanup actions for @dev. The cleanup actions will be run in reverse
+ * order in the final drm_dev_put() call for @dev.
+ */
+#define drmm_add_action(dev, action, data) \
+	__drmm_add_action(dev, action, data, #action)
+
+int __must_check __drmm_add_action(struct drm_device *dev,
+				   drmres_release_t action,
+				   void *data, const char *name);
+
+/**
+ * drmm_add_action_or_reset - add a managed release action to a &drm_device
+ * @dev: DRM device
+ * @action: function which should be called when @dev is released
+ * @data: opaque pointer, passed to @action
+ *
+ * Similar to drmm_add_action(), with the only difference that upon failure
+ * @action is directly called for any cleanup work necessary on failures.
+ */
+#define drmm_add_action_or_reset(dev, action, data) \
+	__drmm_add_action_or_reset(dev, action, data, #action)
+
+int __must_check __drmm_add_action_or_reset(struct drm_device *dev,
+					    drmres_release_t action,
+					    void *data, const char *name);
+
+void drmm_add_final_kfree(struct drm_device *dev, void *container);
+
+void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp) __malloc;
+
+/**
+ * drmm_kzalloc - &drm_device managed kzalloc()
+ * @dev: DRM device
+ * @size: size of the memory allocation
+ * @gfp: GFP allocation flags
+ *
+ * This is a &drm_device managed version of kzalloc(). The allocated memory is
+ * automatically freed on the final drm_dev_put(). Memory can also be freed
+ * before the final drm_dev_put() by calling drmm_kfree().
+ */
+static inline void *drmm_kzalloc(struct drm_device *dev, size_t size, gfp_t gfp)
+{
+	return drmm_kmalloc(dev, size, gfp | __GFP_ZERO);
+}
+
+/**
+ * drmm_kmalloc_array - &drm_device managed kmalloc_array()
+ * @dev: DRM device
+ * @n: number of array elements to allocate
+ * @size: size of array member
+ * @flags: GFP allocation flags
+ *
+ * This is a &drm_device managed version of kmalloc_array(). The allocated
+ * memory is automatically freed on the final drm_dev_put() and works exactly
+ * like a memory allocation obtained by drmm_kmalloc().
+ */
+static inline void *drmm_kmalloc_array(struct drm_device *dev,
+				       size_t n, size_t size, gfp_t flags)
+{
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(n, size, &bytes)))
+		return NULL;
+
+	return drmm_kmalloc(dev, bytes, flags);
+}
+
+/**
+ * drmm_kcalloc - &drm_device managed kcalloc()
+ * @dev: DRM device
+ * @n: number of array elements to allocate
+ * @size: size of array member
+ * @flags: GFP allocation flags
+ *
+ * This is a &drm_device managed version of kcalloc(). The allocated memory is
+ * automatically freed on the final drm_dev_put() and works exactly like a
+ * memory allocation obtained by drmm_kmalloc().
+ */
+static inline void *drmm_kcalloc(struct drm_device *dev,
+				 size_t n, size_t size, gfp_t flags)
+{
+	return drmm_kmalloc_array(dev, n, size, flags | __GFP_ZERO);
+}
+
+char *drmm_kstrdup(struct drm_device *dev, const char *s, gfp_t gfp);
+
+void drmm_kfree(struct drm_device *dev, void *data);
+
+#endif
diff --git a/include/drm/drm_mipi_dbi.h b/include/drm/drm_mipi_dbi.h
index 33f325f5af2b..4d0e49c0ed2c 100644
--- a/include/drm/drm_mipi_dbi.h
+++ b/include/drm/drm_mipi_dbi.h
@@ -152,7 +152,6 @@ int mipi_dbi_dev_init_with_formats(struct mipi_dbi_dev *dbidev,
 int mipi_dbi_dev_init(struct mipi_dbi_dev *dbidev,
 		      const struct drm_simple_display_pipe_funcs *funcs,
 		      const struct drm_display_mode *mode, unsigned int rotation);
-void mipi_dbi_release(struct drm_device *drm);
 void mipi_dbi_pipe_update(struct drm_simple_display_pipe *pipe,
 			  struct drm_plane_state *old_state);
 void mipi_dbi_enable_flush(struct mipi_dbi_dev *dbidev,
@@ -170,7 +169,8 @@ int mipi_dbi_spi_transfer(struct spi_device *spi, u32 speed_hz,
 
 int mipi_dbi_command_read(struct mipi_dbi *dbi, u8 cmd, u8 *val);
 int mipi_dbi_command_buf(struct mipi_dbi *dbi, u8 cmd, u8 *data, size_t len);
-int mipi_dbi_command_stackbuf(struct mipi_dbi *dbi, u8 cmd, u8 *data, size_t len);
+int mipi_dbi_command_stackbuf(struct mipi_dbi *dbi, u8 cmd, const u8 *data,
+			      size_t len);
 int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb,
 		      struct drm_rect *clip, bool swap);
 /**
@@ -187,12 +187,12 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb,
  */
 #define mipi_dbi_command(dbi, cmd, seq...) \
 ({ \
-	u8 d[] = { seq }; \
+	const u8 d[] = { seq }; \
 	mipi_dbi_command_stackbuf(dbi, cmd, d, ARRAY_SIZE(d)); \
 })
 
 #ifdef CONFIG_DEBUG_FS
-int mipi_dbi_debugfs_init(struct drm_minor *minor);
+void mipi_dbi_debugfs_init(struct drm_minor *minor);
 #else
 #define mipi_dbi_debugfs_init		NULL
 #endif
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ee8b0e80ca90..a01bc6fac83c 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -168,6 +168,7 @@ struct drm_mm_node {
 	struct rb_node rb_hole_addr;
 	u64 __subtree_last;
 	u64 hole_size;
+	u64 subtree_max_hole;
 	unsigned long flags;
 #define DRM_MM_NODE_ALLOCATED_BIT	0
 #define DRM_MM_NODE_SCANNED_BIT		1
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index 3bcbe30339f0..6c3ef49b46b3 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -929,7 +929,23 @@ struct drm_mode_config {
 	const struct drm_mode_config_helper_funcs *helper_private;
 };
 
-void drm_mode_config_init(struct drm_device *dev);
+int __must_check drmm_mode_config_init(struct drm_device *dev);
+
+/**
+ * drm_mode_config_init - DRM mode_configuration structure initialization
+ * @dev: DRM device
+ *
+ * This is the unmanaged version of drmm_mode_config_init() for drivers which
+ * still explicitly call drm_mode_config_cleanup().
+ *
+ * FIXME: This function is deprecated and drivers should be converted over to
+ * drmm_mode_config_init().
+ */
+static inline int drm_mode_config_init(struct drm_device *dev)
+{
+	return drmm_mode_config_init(dev);
+}
+
 void drm_mode_config_reset(struct drm_device *dev);
 void drm_mode_config_cleanup(struct drm_device *dev);
 
diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index 320f8112a0f8..303ee5fbbdd8 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h
@@ -391,16 +391,6 @@ struct drm_display_mode {
 	int vrefresh;
 
 	/**
-	 * @hsync:
-	 *
-	 * Horizontal refresh rate, for debug output in human readable form. Not
-	 * used in a functional way.
-	 *
-	 * This value is in kHz.
-	 */
-	int hsync;
-
-	/**
 	 * @picture_aspect_ratio:
 	 *
 	 * Field for setting the HDMI picture aspect ratio of a mode.
@@ -493,7 +483,6 @@ int of_get_drm_display_mode(struct device_node *np,
 			    int index);
 
 void drm_mode_set_name(struct drm_display_mode *mode);
-int drm_mode_hsync(const struct drm_display_mode *mode);
 int drm_mode_vrefresh(const struct drm_display_mode *mode);
 void drm_mode_get_hv_timing(const struct drm_display_mode *mode,
 			    int *hdisplay, int *vdisplay);
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index 7c20b1c8b6a7..421a30f08463 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -1075,8 +1075,35 @@ struct drm_connector_helper_funcs {
 	void (*atomic_commit)(struct drm_connector *connector,
 			      struct drm_connector_state *state);
 
+	/**
+	 * @prepare_writeback_job:
+	 *
+	 * As writeback jobs contain a framebuffer, drivers may need to
+	 * prepare and clean them up the same way they can prepare and
+	 * clean up framebuffers for planes. This optional connector operation
+	 * is used to support the preparation of writeback jobs. The job
+	 * prepare operation is called from drm_atomic_helper_prepare_planes()
+	 * for struct &drm_writeback_connector connectors only.
+	 *
+	 * This operation is optional.
+	 *
+	 * This callback is used by the atomic modeset helpers.
+	 */
 	int (*prepare_writeback_job)(struct drm_writeback_connector *connector,
 				     struct drm_writeback_job *job);
+	/**
+	 * @cleanup_writeback_job:
+	 *
+	 * This optional connector operation is used to support the
+	 * cleanup of writeback jobs. The job cleanup operation is called
+	 * from the existing drm_writeback_cleanup_job() function, invoked
+	 * both when destroying the job as part of an aborted commit, or when
+	 * the job completes.
+	 *
+	 * This operation is optional.
+	 *
+	 * This callback is used by the atomic modeset helpers.
+	 */
 	void (*cleanup_writeback_job)(struct drm_writeback_connector *connector,
 				      struct drm_writeback_job *job);
 };
diff --git a/include/drm/drm_pci.h b/include/drm/drm_pci.h
deleted file mode 100644
index 3941b0255ecf..000000000000
--- a/include/drm/drm_pci.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Internal Header for the Direct Rendering Manager
- *
- * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * Copyright (c) 2009-2010, Code Aurora Forum.
- * All rights reserved.
- *
- * Author: Rickard E. (Rik) Faith <faith@valinux.com>
- * Author: Gareth Hughes <gareth@valinux.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _DRM_PCI_H_
-#define _DRM_PCI_H_
-
-#include <linux/pci.h>
-
-struct drm_dma_handle;
-struct drm_device;
-struct drm_driver;
-struct drm_master;
-
-#ifdef CONFIG_PCI
-
-struct drm_dma_handle *drm_pci_alloc(struct drm_device *dev, size_t size,
-				     size_t align);
-void drm_pci_free(struct drm_device *dev, struct drm_dma_handle * dmah);
-
-#else
-
-static inline struct drm_dma_handle *drm_pci_alloc(struct drm_device *dev,
-						   size_t size, size_t align)
-{
-	return NULL;
-}
-
-static inline void drm_pci_free(struct drm_device *dev,
-				struct drm_dma_handle *dmah)
-{
-}
-
-#endif
-
-#endif /* _DRM_PCI_H_ */
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index ca7cee8e728a..1c9417430d08 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -313,6 +313,10 @@ enum drm_debug_category {
 	 * @DRM_UT_DP: Used in the DP code.
 	 */
 	DRM_UT_DP		= 0x100,
+	/**
+	 * @DRM_UT_DRMRES: Used in the drm managed resources code.
+	 */
+	DRM_UT_DRMRES		= 0x200,
 };
 
 static inline bool drm_debug_enabled(enum drm_debug_category category)
@@ -442,6 +446,8 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
 	drm_dev_dbg((drm)->dev, DRM_UT_LEASE, fmt, ##__VA_ARGS__)
 #define drm_dbg_dp(drm, fmt, ...)					\
 	drm_dev_dbg((drm)->dev, DRM_UT_DP, fmt, ##__VA_ARGS__)
+#define drm_dbg_drmres(drm, fmt, ...)					\
+	drm_dev_dbg((drm)->dev, DRM_UT_DRMRES, fmt, ##__VA_ARGS__)
 
 
 /*
diff --git a/include/drm/drm_writeback.h b/include/drm/drm_writeback.h
index 777c14c847f0..9697d2714d2a 100644
--- a/include/drm/drm_writeback.h
+++ b/include/drm/drm_writeback.h
@@ -15,7 +15,13 @@
 #include <drm/drm_encoder.h>
 #include <linux/workqueue.h>
 
+/**
+ * struct drm_writeback_connector - DRM writeback connector
+ */
 struct drm_writeback_connector {
+	/**
+	 * @base: base drm_connector object
+	 */
 	struct drm_connector base;
 
 	/**
@@ -78,6 +84,9 @@ struct drm_writeback_connector {
 	char timeline_name[32];
 };
 
+/**
+ * struct drm_writeback_job - DRM writeback job
+ */
 struct drm_writeback_job {
 	/**
 	 * @connector:
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 26b04ff62676..a21b3b92135a 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -56,6 +56,7 @@ enum drm_sched_priority {
  *              Jobs from this entity can be scheduled on any scheduler
  *              on this list.
  * @num_sched_list: number of drm_gpu_schedulers in the sched_list.
+ * @priority: priority of the entity
  * @rq_lock: lock to modify the runqueue to which this entity belongs.
  * @job_queue: the list of jobs of this entity.
  * @fence_seq: a linearly increasing seqno incremented with each
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 1d2c12219f44..662d8351c87a 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -593,12 +593,16 @@
 
 /* TGL */
 #define INTEL_TGL_12_IDS(info) \
-	INTEL_VGA_DEVICE(0x9A49, info), \
 	INTEL_VGA_DEVICE(0x9A40, info), \
+	INTEL_VGA_DEVICE(0x9A49, info), \
 	INTEL_VGA_DEVICE(0x9A59, info), \
 	INTEL_VGA_DEVICE(0x9A60, info), \
 	INTEL_VGA_DEVICE(0x9A68, info), \
 	INTEL_VGA_DEVICE(0x9A70, info), \
-	INTEL_VGA_DEVICE(0x9A78, info)
+	INTEL_VGA_DEVICE(0x9A78, info), \
+	INTEL_VGA_DEVICE(0x9AC0, info), \
+	INTEL_VGA_DEVICE(0x9AC9, info), \
+	INTEL_VGA_DEVICE(0x9AD9, info), \
+	INTEL_VGA_DEVICE(0x9AF8, info)
 
 #endif /* _I915_PCIIDS_H */
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index c9e0fd09f4b2..54a527aa79cc 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -390,7 +390,6 @@ struct ttm_bo_driver {
 /**
  * struct ttm_bo_global - Buffer object driver global data.
  *
- * @mem_glob: Pointer to a struct ttm_mem_global object for accounting.
  * @dummy_read_page: Pointer to a dummy page used for mapping requests
  * of unpopulated pages.
  * @shrink: A shrink callback object used for buffer object swap.
diff --git a/include/drm/ttm/ttm_debug.h b/include/drm/ttm/ttm_debug.h
deleted file mode 100644
index b5e460fa5086..000000000000
--- a/include/drm/ttm/ttm_debug.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/**************************************************************************
- *
- * Copyright (c) 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-/*
- * Authors: Tom St Denis <tom.stdenis@amd.com>
- */
-extern void ttm_trace_dma_map(struct device *dev, struct ttm_dma_tt *tt);
-extern void ttm_trace_dma_unmap(struct device *dev, struct ttm_dma_tt *tt);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 69f4164d6477..a8d8fdcd3723 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -274,6 +274,8 @@ struct vgic_v2_cpu_if {
 	u32		vgic_vmcr;
 	u32		vgic_apr;
 	u32		vgic_lr[VGIC_V2_MAX_LRS];
+
+	unsigned int used_lrs;
 };
 
 struct vgic_v3_cpu_if {
@@ -291,6 +293,8 @@ struct vgic_v3_cpu_if {
 	 * linking the Linux IRQ subsystem and the ITS together.
 	 */
 	struct its_vpe	its_vpe;
+
+	unsigned int used_lrs;
 };
 
 struct vgic_cpu {
@@ -300,7 +304,6 @@ struct vgic_cpu {
 		struct vgic_v3_cpu_if	vgic_v3;
 	};
 
-	unsigned int used_lrs;
 	struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS];
 
 	raw_spinlock_t ap_list_lock;	/* Protects the ap_list */
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 59494df0f55b..56d6a5c6e353 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -5,12 +5,15 @@
 #ifndef __LINUX_ARM_SMCCC_H
 #define __LINUX_ARM_SMCCC_H
 
+#include <linux/init.h>
 #include <uapi/linux/const.h>
 
 /*
  * This file provides common defines for ARM SMC Calling Convention as
  * specified in
- * http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html
+ * https://developer.arm.com/docs/den0028/latest
+ *
+ * This code is up-to-date with version DEN 0028 C
  */
 
 #define ARM_SMCCC_STD_CALL	        _AC(0,U)
@@ -56,6 +59,7 @@
 
 #define ARM_SMCCC_VERSION_1_0		0x10000
 #define ARM_SMCCC_VERSION_1_1		0x10001
+#define ARM_SMCCC_VERSION_1_2		0x10002
 
 #define ARM_SMCCC_VERSION_FUNC_ID					\
 	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,				\
@@ -98,6 +102,19 @@ enum arm_smccc_conduit {
 enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void);
 
 /**
+ * arm_smccc_get_version()
+ *
+ * Returns the version to be used for SMCCCv1.1 or later.
+ *
+ * When SMCCCv1.1 or above is not present, returns SMCCCv1.0, but this
+ * does not imply the presence of firmware or a valid conduit. Caller
+ * handling SMCCCv1.0 must determine the conduit by other means.
+ */
+u32 arm_smccc_get_version(void);
+
+void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit);
+
+/**
  * struct arm_smccc_res - Result from SMC/HVC call
  * @a0-a3 result values from registers 0 to 3
  */
@@ -314,10 +331,14 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
  */
 #define arm_smccc_1_1_hvc(...)	__arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__)
 
-/* Return codes defined in ARM DEN 0070A */
+/*
+ * Return codes defined in ARM DEN 0070A
+ * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C
+ */
 #define SMCCC_RET_SUCCESS			0
 #define SMCCC_RET_NOT_SUPPORTED			-1
 #define SMCCC_RET_NOT_REQUIRED			-2
+#define SMCCC_RET_INVALID_PARAMETER		-3
 
 /*
  * Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED.
diff --git a/include/linux/audit.h b/include/linux/audit.h
index f9ceae57ca8d..3fcd9ee49734 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -19,7 +19,7 @@
 struct audit_sig_info {
 	uid_t		uid;
 	pid_t		pid;
-	char		ctx[0];
+	char		ctx[];
 };
 
 struct audit_buffer;
@@ -94,6 +94,12 @@ struct audit_ntp_data {
 struct audit_ntp_data {};
 #endif
 
+enum audit_nfcfgop {
+	AUDIT_XT_OP_REGISTER,
+	AUDIT_XT_OP_REPLACE,
+	AUDIT_XT_OP_UNREGISTER,
+};
+
 extern int is_audit_feature_set(int which);
 
 extern int __init audit_register_class(int class, unsigned *list);
@@ -379,6 +385,8 @@ extern void __audit_log_kern_module(char *name);
 extern void __audit_fanotify(unsigned int response);
 extern void __audit_tk_injoffset(struct timespec64 offset);
 extern void __audit_ntp_log(const struct audit_ntp_data *ad);
+extern void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries,
+			      enum audit_nfcfgop op);
 
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
@@ -514,6 +522,14 @@ static inline void audit_ntp_log(const struct audit_ntp_data *ad)
 		__audit_ntp_log(ad);
 }
 
+static inline void audit_log_nfcfg(const char *name, u8 af,
+				   unsigned int nentries,
+				   enum audit_nfcfgop op)
+{
+	if (audit_enabled)
+		__audit_log_nfcfg(name, af, nentries, op);
+}
+
 extern int audit_n_rules;
 extern int audit_signals;
 #else /* CONFIG_AUDITSYSCALL */
@@ -646,6 +662,12 @@ static inline void audit_ntp_log(const struct audit_ntp_data *ad)
 
 static inline void audit_ptrace(struct task_struct *t)
 { }
+
+static inline void audit_log_nfcfg(const char *name, u8 af,
+				   unsigned int nentries,
+				   enum audit_nfcfgop op)
+{ }
+
 #define audit_n_rules 0
 #define audit_signals 0
 #endif /* CONFIG_AUDITSYSCALL */
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 7367150f962a..90a7e844a098 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -193,8 +193,6 @@ struct backing_dev_info {
 	congested_fn *congested_fn; /* Function pointer if device is md/dm */
 	void *congested_data;	/* Pointer to aux data for congested func */
 
-	const char *name;
-
 	struct kref refcnt;	/* Reference counter for the structure */
 	unsigned int capabilities; /* Device capabilities */
 	unsigned int min_ratio;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index c9ad5c3b7b4b..6b3504bf7a42 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -33,14 +33,10 @@ int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...);
 __printf(2, 0)
 int bdi_register_va(struct backing_dev_info *bdi, const char *fmt,
 		    va_list args);
-int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner);
+void bdi_set_owner(struct backing_dev_info *bdi, struct device *owner);
 void bdi_unregister(struct backing_dev_info *bdi);
 
-struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id);
-static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask)
-{
-	return bdi_alloc_node(gfp_mask, NUMA_NO_NODE);
-}
+struct backing_dev_info *bdi_alloc(int node_id);
 
 void wb_start_background_writeback(struct bdi_writeback *wb);
 void wb_workfn(struct work_struct *work);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a0ee494a6329..91676d4b2dfe 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -70,7 +70,7 @@ static inline bool bio_has_data(struct bio *bio)
 	return false;
 }
 
-static inline bool bio_no_advance_iter(struct bio *bio)
+static inline bool bio_no_advance_iter(const struct bio *bio)
 {
 	return bio_op(bio) == REQ_OP_DISCARD ||
 	       bio_op(bio) == REQ_OP_SECURE_ERASE ||
@@ -138,8 +138,8 @@ static inline bool bio_next_segment(const struct bio *bio,
 #define bio_for_each_segment_all(bvl, bio, iter) \
 	for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); )
 
-static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
-				    unsigned bytes)
+static inline void bio_advance_iter(const struct bio *bio,
+				    struct bvec_iter *iter, unsigned int bytes)
 {
 	iter->bi_sector += bytes >> 9;
 
@@ -169,6 +169,14 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 #define bio_for_each_bvec(bvl, bio, iter)			\
 	__bio_for_each_bvec(bvl, bio, iter, (bio)->bi_iter)
 
+/*
+ * Iterate over all multi-page bvecs. Drivers shouldn't use this version for the
+ * same reasons as bio_for_each_segment_all().
+ */
+#define bio_for_each_bvec_all(bvl, bio, i)		\
+	for (i = 0, bvl = bio_first_bvec_all(bio);	\
+	     i < (bio)->bi_vcnt; i++, bvl++)		\
+
 #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
 
 static inline unsigned bio_segments(struct bio *bio)
@@ -417,6 +425,7 @@ static inline void bio_io_error(struct bio *bio)
 
 static inline void bio_wouldblock_error(struct bio *bio)
 {
+	bio_set_flag(bio, BIO_QUIET);
 	bio->bi_status = BLK_STS_AGAIN;
 	bio_endio(bio);
 }
@@ -444,12 +453,6 @@ void bio_release_pages(struct bio *bio, bool mark_dirty);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 
-void generic_start_io_acct(struct request_queue *q, int op,
-				unsigned long sectors, struct hd_struct *part);
-void generic_end_io_acct(struct request_queue *q, int op,
-				struct hd_struct *part,
-				unsigned long start_time);
-
 extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
 			       struct bio *src, struct bvec_iter *src_iter);
 extern void bio_copy_data(struct bio *dst, struct bio *src);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 35f8ffe92b70..a57ebe2f00ab 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -607,12 +607,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 		u64_stats_update_begin(&bis->sync);
 
 		/*
-		 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
-		 * is a split bio and we would have already accounted for the
-		 * size of the bio.
+		 * If the bio is flagged with BIO_CGROUP_ACCT it means this is a
+		 * split bio and we would have already accounted for the size of
+		 * the bio.
 		 */
-		if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
+		if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
+			bio_set_flag(bio, BIO_CGROUP_ACCT);
 			bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
+		}
 		bis->cur.ios[rwd]++;
 
 		u64_stats_update_end(&bis->sync);
@@ -629,6 +631,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 
 static inline void blkcg_use_delay(struct blkcg_gq *blkg)
 {
+	if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
+		return;
 	if (atomic_add_return(1, &blkg->use_delay) == 1)
 		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
 }
@@ -637,6 +641,8 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
 {
 	int old = atomic_read(&blkg->use_delay);
 
+	if (WARN_ON_ONCE(old < 0))
+		return 0;
 	if (old == 0)
 		return 0;
 
@@ -661,20 +667,39 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
 	return 1;
 }
 
+/**
+ * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
+ * @blkg: target blkg
+ * @delay: delay duration in nsecs
+ *
+ * When enabled with this function, the delay is not decayed and must be
+ * explicitly cleared with blkcg_clear_delay(). Must not be mixed with
+ * blkcg_[un]use_delay() and blkcg_add_delay() usages.
+ */
+static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
+{
+	int old = atomic_read(&blkg->use_delay);
+
+	/* We only want 1 person setting the congestion count for this blkg. */
+	if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
+		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
+
+	atomic64_set(&blkg->delay_nsec, delay);
+}
+
+/**
+ * blkcg_clear_delay - Disable allocator delay mechanism
+ * @blkg: target blkg
+ *
+ * Disable use_delay mechanism. See blkcg_set_delay().
+ */
 static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
 {
 	int old = atomic_read(&blkg->use_delay);
-	if (!old)
-		return;
+
 	/* We only want 1 person clearing the congestion count for this blkg. */
-	while (old) {
-		int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
-		if (cur == old) {
-			atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
-			break;
-		}
-		old = cur;
-	}
+	if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
+		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
 }
 
 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h
new file mode 100644
index 000000000000..e82342907f2b
--- /dev/null
+++ b/include/linux/blk-crypto.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2019 Google LLC
+ */
+
+#ifndef __LINUX_BLK_CRYPTO_H
+#define __LINUX_BLK_CRYPTO_H
+
+#include <linux/types.h>
+
+enum blk_crypto_mode_num {
+	BLK_ENCRYPTION_MODE_INVALID,
+	BLK_ENCRYPTION_MODE_AES_256_XTS,
+	BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV,
+	BLK_ENCRYPTION_MODE_ADIANTUM,
+	BLK_ENCRYPTION_MODE_MAX,
+};
+
+#define BLK_CRYPTO_MAX_KEY_SIZE		64
+/**
+ * struct blk_crypto_config - an inline encryption key's crypto configuration
+ * @crypto_mode: encryption algorithm this key is for
+ * @data_unit_size: the data unit size for all encryption/decryptions with this
+ *	key.  This is the size in bytes of each individual plaintext and
+ *	ciphertext.  This is always a power of 2.  It might be e.g. the
+ *	filesystem block size or the disk sector size.
+ * @dun_bytes: the maximum number of bytes of DUN used when using this key
+ */
+struct blk_crypto_config {
+	enum blk_crypto_mode_num crypto_mode;
+	unsigned int data_unit_size;
+	unsigned int dun_bytes;
+};
+
+/**
+ * struct blk_crypto_key - an inline encryption key
+ * @crypto_cfg: the crypto configuration (like crypto_mode, key size) for this
+ *		key
+ * @data_unit_size_bits: log2 of data_unit_size
+ * @size: size of this key in bytes (determined by @crypto_cfg.crypto_mode)
+ * @raw: the raw bytes of this key.  Only the first @size bytes are used.
+ *
+ * A blk_crypto_key is immutable once created, and many bios can reference it at
+ * the same time.  It must not be freed until all bios using it have completed
+ * and it has been evicted from all devices on which it may have been used.
+ */
+struct blk_crypto_key {
+	struct blk_crypto_config crypto_cfg;
+	unsigned int data_unit_size_bits;
+	unsigned int size;
+	u8 raw[BLK_CRYPTO_MAX_KEY_SIZE];
+};
+
+#define BLK_CRYPTO_MAX_IV_SIZE		32
+#define BLK_CRYPTO_DUN_ARRAY_SIZE	(BLK_CRYPTO_MAX_IV_SIZE / sizeof(u64))
+
+/**
+ * struct bio_crypt_ctx - an inline encryption context
+ * @bc_key: the key, algorithm, and data unit size to use
+ * @bc_dun: the data unit number (starting IV) to use
+ *
+ * A bio_crypt_ctx specifies that the contents of the bio will be encrypted (for
+ * write requests) or decrypted (for read requests) inline by the storage device
+ * or controller, or by the crypto API fallback.
+ */
+struct bio_crypt_ctx {
+	const struct blk_crypto_key	*bc_key;
+	u64				bc_dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
+};
+
+#include <linux/blk_types.h>
+#include <linux/blkdev.h>
+
+struct request;
+struct request_queue;
+
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+
+static inline bool bio_has_crypt_ctx(struct bio *bio)
+{
+	return bio->bi_crypt_context;
+}
+
+void bio_crypt_set_ctx(struct bio *bio, const struct blk_crypto_key *key,
+		       const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
+		       gfp_t gfp_mask);
+
+bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc,
+				 unsigned int bytes,
+				 const u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]);
+
+int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
+			enum blk_crypto_mode_num crypto_mode,
+			unsigned int dun_bytes,
+			unsigned int data_unit_size);
+
+int blk_crypto_start_using_key(const struct blk_crypto_key *key,
+			       struct request_queue *q);
+
+int blk_crypto_evict_key(struct request_queue *q,
+			 const struct blk_crypto_key *key);
+
+bool blk_crypto_config_supported(struct request_queue *q,
+				 const struct blk_crypto_config *cfg);
+
+#else /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+static inline bool bio_has_crypt_ctx(struct bio *bio)
+{
+	return false;
+}
+
+#endif /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+void __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask);
+static inline void bio_crypt_clone(struct bio *dst, struct bio *src,
+				   gfp_t gfp_mask)
+{
+	if (bio_has_crypt_ctx(src))
+		__bio_crypt_clone(dst, src, gfp_mask);
+}
+
+#endif /* __LINUX_BLK_CRYPTO_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b45148ba3291..d6fcae17da5a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -140,6 +140,8 @@ struct blk_mq_hw_ctx {
 	 */
 	atomic_t		nr_active;
 
+	/** @cpuhp_online: List to store request if CPU is going to die */
+	struct hlist_node	cpuhp_online;
 	/** @cpuhp_dead: List to store request if some CPU die. */
 	struct hlist_node	cpuhp_dead;
 	/** @kobj: Kernel object for sysfs. */
@@ -391,6 +393,11 @@ struct blk_mq_ops {
 enum {
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_TAG_SHARED	= 1 << 1,
+	/*
+	 * Set when this device requires underlying blk-mq device for
+	 * completing IO:
+	 */
+	BLK_MQ_F_STACKING	= 1 << 2,
 	BLK_MQ_F_BLOCKING	= 1 << 5,
 	BLK_MQ_F_NO_SCHED	= 1 << 6,
 	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
@@ -400,6 +407,9 @@ enum {
 	BLK_MQ_S_TAG_ACTIVE	= 1,
 	BLK_MQ_S_SCHED_RESTART	= 2,
 
+	/* hw queue is inactive after all its CPUs become offline */
+	BLK_MQ_S_INACTIVE	= 3,
+
 	BLK_MQ_MAX_DEPTH	= 10240,
 
 	BLK_MQ_CPU_WORK_BATCH	= 8,
@@ -494,6 +504,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 bool blk_mq_complete_request(struct request *rq);
+void blk_mq_force_complete_rq(struct request *rq);
 bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
 			   struct bio *bio, unsigned int nr_segs);
 bool blk_mq_queue_stopped(struct request_queue *q);
@@ -508,6 +519,7 @@ void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
+void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs);
 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 		busy_tag_iter_fn *fn, void *priv);
 void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset);
@@ -577,4 +589,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq)
 		rq->q->mq_ops->cleanup_rq(rq);
 }
 
+blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio);
+
 #endif
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 31eb92876be7..ccb895f911b1 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -18,6 +18,7 @@ struct block_device;
 struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
+struct bio_crypt_ctx;
 
 /*
  * Block error status values.  See block/blk-core:blk_errors for the details.
@@ -63,6 +64,18 @@ typedef u8 __bitwise blk_status_t;
  */
 #define BLK_STS_DEV_RESOURCE	((__force blk_status_t)13)
 
+/*
+ * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone
+ * related resources are unavailable, but the driver can guarantee the queue
+ * will be rerun in the future once the resources become available again.
+ *
+ * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references
+ * a zone specific resource and IO to a different zone on the same device could
+ * still be served. Examples of that are zones that are write-locked, but a read
+ * to the same zone could be served.
+ */
+#define BLK_STS_ZONE_RESOURCE	((__force blk_status_t)14)
+
 /**
  * blk_path_error - returns true if error may be path related
  * @error: status the request was completed with
@@ -173,6 +186,11 @@ struct bio {
 	u64			bi_iocost_cost;
 #endif
 #endif
+
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+	struct bio_crypt_ctx	*bi_crypt_context;
+#endif
+
 	union {
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 		struct bio_integrity_payload *bi_integrity; /* data integrity */
@@ -220,7 +238,7 @@ enum {
 				 * throttling rules. Don't do it again. */
 	BIO_TRACE_COMPLETION,	/* bio_endio() should trace the final completion
 				 * of this bio. */
-	BIO_QUEUE_ENTERED,	/* can use blk_queue_enter_live() */
+	BIO_CGROUP_ACCT,	/* has been accounted to a cgroup */
 	BIO_TRACKED,		/* set if bio goes through the rq_qos path */
 	BIO_FLAG_LAST
 };
@@ -296,6 +314,8 @@ enum req_opf {
 	REQ_OP_ZONE_CLOSE	= 11,
 	/* Transition a zone to full */
 	REQ_OP_ZONE_FINISH	= 12,
+	/* write data at the current zone write pointer */
+	REQ_OP_ZONE_APPEND	= 13,
 
 	/* SCSI passthrough using struct scsi_request */
 	REQ_OP_SCSI_IN		= 32,
@@ -323,7 +343,6 @@ enum req_flag_bits {
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 	__REQ_BACKGROUND,	/* background IO */
 	__REQ_NOWAIT,           /* Don't wait if request will block */
-	__REQ_NOWAIT_INLINE,	/* Return would-block error inline */
 	/*
 	 * When a shared kthread needs to issue a bio for a cgroup, doing
 	 * so synchronously can lead to priority inversions as the kthread
@@ -358,7 +377,6 @@ enum req_flag_bits {
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
-#define REQ_NOWAIT_INLINE	(1ULL << __REQ_NOWAIT_INLINE)
 #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
 
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 32868fbedc9e..8fd900998b4e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -43,6 +43,7 @@ struct pr_ops;
 struct rq_qos;
 struct blk_queue_stats;
 struct blk_stat_callback;
+struct blk_keyslot_manager;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -82,8 +83,6 @@ typedef __u32 __bitwise req_flags_t;
 /* set for "ide_preempt" requests and also for requests for which the SCSI
    "quiesce" state must be ignored. */
 #define RQF_PREEMPT		((__force req_flags_t)(1 << 8))
-/* contains copies of user pages */
-#define RQF_COPY_USER		((__force req_flags_t)(1 << 9))
 /* vaguely specified driver internal error.  Ignored by the block layer */
 #define RQF_FAILED		((__force req_flags_t)(1 << 10))
 /* don't warn about errors */
@@ -223,11 +222,14 @@ struct request {
 	unsigned short nr_integrity_segments;
 #endif
 
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+	struct bio_crypt_ctx *crypt_ctx;
+	struct blk_ksm_keyslot *crypt_keyslot;
+#endif
+
 	unsigned short write_hint;
 	unsigned short ioprio;
 
-	unsigned int extra_len;	/* length of alignment and padding */
-
 	enum mq_rq_state state;
 	refcount_t ref;
 
@@ -290,7 +292,6 @@ struct blk_queue_ctx;
 typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
 
 struct bio_vec;
-typedef int (dma_drain_needed_fn)(struct request *);
 
 enum blk_eh_timer_return {
 	BLK_EH_DONE,		/* drivers has completed the command */
@@ -336,6 +337,7 @@ struct queue_limits {
 	unsigned int		max_hw_discard_sectors;
 	unsigned int		max_write_same_sectors;
 	unsigned int		max_write_zeroes_sectors;
+	unsigned int		max_zone_append_sectors;
 	unsigned int		discard_granularity;
 	unsigned int		discard_alignment;
 
@@ -361,7 +363,8 @@ unsigned int blkdev_nr_zones(struct gendisk *disk);
 extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 			    sector_t sectors, sector_t nr_sectors,
 			    gfp_t gfp_mask);
-extern int blk_revalidate_disk_zones(struct gendisk *disk);
+int blk_revalidate_disk_zones(struct gendisk *disk,
+			      void (*update_driver_data)(struct gendisk *disk));
 
 extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 				     unsigned int cmd, unsigned long arg);
@@ -399,7 +402,6 @@ struct request_queue {
 	struct rq_qos		*rq_qos;
 
 	make_request_fn		*make_request_fn;
-	dma_drain_needed_fn	*dma_drain_needed;
 
 	const struct blk_mq_ops	*mq_ops;
 
@@ -469,11 +471,14 @@ struct request_queue {
 	 */
 	unsigned long		nr_requests;	/* Max # of requests */
 
-	unsigned int		dma_drain_size;
-	void			*dma_drain_buffer;
 	unsigned int		dma_pad_mask;
 	unsigned int		dma_alignment;
 
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+	/* Inline crypto capabilities */
+	struct blk_keyslot_manager *ksm;
+#endif
+
 	unsigned int		rq_timeout;
 	int			poll_nsec;
 
@@ -729,6 +734,16 @@ static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
 {
 	return 0;
 }
+static inline bool blk_queue_zone_is_seq(struct request_queue *q,
+					 sector_t sector)
+{
+	return false;
+}
+static inline unsigned int blk_queue_zone_no(struct request_queue *q,
+					     sector_t sector)
+{
+	return 0;
+}
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 static inline bool rq_is_sync(struct request *rq)
@@ -747,6 +762,9 @@ static inline bool rq_mergeable(struct request *rq)
 	if (req_op(rq) == REQ_OP_WRITE_ZEROES)
 		return false;
 
+	if (req_op(rq) == REQ_OP_ZONE_APPEND)
+		return false;
+
 	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
 		return false;
 	if (rq->rq_flags & RQF_NOMERGE_FLAGS)
@@ -1081,6 +1099,8 @@ extern void blk_queue_max_write_same_sectors(struct request_queue *q,
 extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
 		unsigned int max_write_same_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
+extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
+		unsigned int max_zone_append_sectors);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
 extern void blk_queue_alignment_offset(struct request_queue *q,
 				       unsigned int alignment);
@@ -1099,9 +1119,6 @@ extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
-extern int blk_queue_dma_drain(struct request_queue *q,
-			       dma_drain_needed_fn *dma_drain_needed,
-			       void *buf, unsigned int size);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
@@ -1138,7 +1155,15 @@ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
 	return max_t(unsigned short, rq->nr_phys_segments, 1);
 }
 
-extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
+int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
+		struct scatterlist *sglist, struct scatterlist **last_sg);
+static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
+		struct scatterlist *sglist)
+{
+	struct scatterlist *last_sg = NULL;
+
+	return __blk_rq_map_sg(q, rq, sglist, &last_sg);
+}
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
 
@@ -1206,7 +1231,9 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 		 !list_empty(&plug->cb_list));
 }
 
-extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
+extern void blk_io_schedule(void);
+
+int blkdev_issue_flush(struct block_device *, gfp_t);
 extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, struct page *page);
 
@@ -1293,6 +1320,11 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
 	return q->limits.max_segment_size;
 }
 
+static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q)
+{
+	return q->limits.max_zone_append_sectors;
+}
+
 static inline unsigned queue_logical_block_size(const struct request_queue *q)
 {
 	int retval = 512;
@@ -1551,6 +1583,12 @@ struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
 	return blk_get_integrity(bdev->bd_disk);
 }
 
+static inline bool
+blk_integrity_queue_supports_integrity(struct request_queue *q)
+{
+	return q->integrity.profile;
+}
+
 static inline bool blk_integrity_rq(struct request *rq)
 {
 	return rq->cmd_flags & REQ_INTEGRITY;
@@ -1631,6 +1669,11 @@ static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
 	return NULL;
 }
+static inline bool
+blk_integrity_queue_supports_integrity(struct request_queue *q)
+{
+	return false;
+}
 static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
 {
 	return 0;
@@ -1682,6 +1725,25 @@ static inline struct bio_vec *rq_integrity_vec(struct request *rq)
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+
+bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q);
+
+void blk_ksm_unregister(struct request_queue *q);
+
+#else /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm,
+				    struct request_queue *q)
+{
+	return true;
+}
+
+static inline void blk_ksm_unregister(struct request_queue *q) { }
+
+#endif /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+
 struct block_device_operations {
 	int (*open) (struct block_device *, fmode_t);
 	void (*release) (struct gendisk *, fmode_t);
@@ -1719,6 +1781,7 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *,
 
 #ifdef CONFIG_BLK_DEV_ZONED
 bool blk_req_needs_zone_write_lock(struct request *rq);
+bool blk_req_zone_write_trylock(struct request *rq);
 void __blk_req_zone_write_lock(struct request *rq);
 void __blk_req_zone_write_unlock(struct request *rq);
 
@@ -1809,8 +1872,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 	return false;
 }
 
-static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
-				     sector_t *error_sector)
+static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
 {
 	return 0;
 }
@@ -1830,4 +1892,32 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
 		wake_up_process(waiter);
 }
 
+#ifdef CONFIG_BLOCK
+unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
+		unsigned int op);
+void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+		unsigned long start_time);
+
+/**
+ * bio_start_io_acct - start I/O accounting for bio based drivers
+ * @bio:	bio to start account for
+ *
+ * Returns the start time that should be passed back to bio_end_io_acct().
+ */
+static inline unsigned long bio_start_io_acct(struct bio *bio)
+{
+	return disk_start_io_acct(bio->bi_disk, bio_sectors(bio), bio_op(bio));
+}
+
+/**
+ * bio_end_io_acct - end I/O accounting for bio based drivers
+ * @bio:	bio to end account for
+ * @start:	start time returned by bio_start_io_acct()
+ */
+static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
+{
+	return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time);
+}
+#endif /* CONFIG_BLOCK */
+
 #endif
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 15b765a181b8..22fb11e2d2e0 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -272,14 +272,6 @@ void buffer_init(void);
  * inline definitions
  */
 
-static inline void attach_page_buffers(struct page *page,
-		struct buffer_head *head)
-{
-	get_page(page);
-	SetPagePrivate(page);
-	set_page_private(page, (unsigned long)head);
-}
-
 static inline void get_bh(struct buffer_head *bh)
 {
         atomic_inc(&bh->b_count);
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index a81c13ac1972..ac0c7299d5b8 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -12,8 +12,17 @@
 #include <linux/errno.h>
 #include <linux/mm.h>
 
-/*
- * was unsigned short, but we might as well be ready for > 64kB I/O pages
+/**
+ * struct bio_vec - a contiguous range of physical memory addresses
+ * @bv_page:   First page associated with the address range.
+ * @bv_len:    Number of bytes in the address range.
+ * @bv_offset: Start of the address range relative to the start of @bv_page.
+ *
+ * The following holds for a bvec if n * PAGE_SIZE < bv_offset + bv_len:
+ *
+ *   nth_page(@bv_page, n) == @bv_page + n
+ *
+ * This holds because page_is_mergeable() checks the above property.
  */
 struct bio_vec {
 	struct page	*bv_page;
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 528271c60018..8543fa59da72 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -94,6 +94,11 @@ struct cdrom_device_ops {
 			       struct packet_command *);
 };
 
+int cdrom_multisession(struct cdrom_device_info *cdi,
+		struct cdrom_multisession *info);
+int cdrom_read_tocentry(struct cdrom_device_info *cdi,
+		struct cdrom_tocentry *entry);
+
 /* the general block_device operations structure: */
 extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
 			fmode_t mode);
@@ -104,7 +109,7 @@ extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
 				       unsigned int clearing);
 extern int cdrom_media_changed(struct cdrom_device_info *);
 
-extern int register_cdrom(struct cdrom_device_info *cdi);
+extern int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi);
 extern void unregister_cdrom(struct cdrom_device_info *cdi);
 
 typedef struct {
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 0480ba4db592..e90100c0de72 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -402,8 +402,15 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 		       unsigned long bitmap_size);
 long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size);
-int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from);
-int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from);
+void copy_siginfo_to_external32(struct compat_siginfo *to,
+		const struct kernel_siginfo *from);
+int copy_siginfo_from_user32(kernel_siginfo_t *to,
+		const struct compat_siginfo __user *from);
+int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
+		const kernel_siginfo_t *from);
+#ifndef copy_siginfo_to_user32
+#define copy_siginfo_to_user32 __copy_siginfo_to_user32
+#endif
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..790c0c6b8552 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,7 @@
  * compilers, like ICC.
  */
 #define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs	__attribute__((__no_sanitize__("shadow-call-stack")))
+#endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 448c91bf543b..6325d64e3c3b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -120,12 +120,65 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 /* Annotate a C jump table to allow objtool to follow the code flow */
 #define __annotate_jump_table __section(.rodata..c_jump_table)
 
+#ifdef CONFIG_DEBUG_ENTRY
+/* Begin/end of an instrumentation safe region */
+#define instrumentation_begin() ({					\
+	asm volatile("%c0:\n\t"						\
+		     ".pushsection .discard.instr_begin\n\t"		\
+		     ".long %c0b - .\n\t"				\
+		     ".popsection\n\t" : : "i" (__COUNTER__));		\
+})
+
+/*
+ * Because instrumentation_{begin,end}() can nest, objtool validation considers
+ * _begin() a +1 and _end() a -1 and computes a sum over the instructions.
+ * When the value is greater than 0, we consider instrumentation allowed.
+ *
+ * There is a problem with code like:
+ *
+ * noinstr void foo()
+ * {
+ *	instrumentation_begin();
+ *	...
+ *	if (cond) {
+ *		instrumentation_begin();
+ *		...
+ *		instrumentation_end();
+ *	}
+ *	bar();
+ *	instrumentation_end();
+ * }
+ *
+ * If instrumentation_end() would be an empty label, like all the other
+ * annotations, the inner _end(), which is at the end of a conditional block,
+ * would land on the instruction after the block.
+ *
+ * If we then consider the sum of the !cond path, we'll see that the call to
+ * bar() is with a 0-value, even though, we meant it to happen with a positive
+ * value.
+ *
+ * To avoid this, have _end() be a NOP instruction, this ensures it will be
+ * part of the condition block and does not escape.
+ */
+#define instrumentation_end() ({					\
+	asm volatile("%c0: nop\n\t"					\
+		     ".pushsection .discard.instr_end\n\t"		\
+		     ".long %c0b - .\n\t"				\
+		     ".popsection\n\t" : : "i" (__COUNTER__));		\
+})
+#endif /* CONFIG_DEBUG_ENTRY */
+
 #else
 #define annotate_reachable()
 #define annotate_unreachable()
 #define __annotate_jump_table
 #endif
 
+#ifndef instrumentation_begin
+#define instrumentation_begin()		do { } while(0)
+#define instrumentation_end()		do { } while(0)
+#endif
+
 #ifndef ASM_UNREACHABLE
 # define ASM_UNREACHABLE
 #endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index e970f97a7fcb..6fcf73200b67 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -118,6 +118,10 @@ struct ftrace_likely_data {
 #define notrace			__attribute__((__no_instrument_function__))
 #endif
 
+/* Section for code which can't be instrumented at all */
+#define noinstr								\
+	noinline notrace __attribute((__section__(".noinstr.text")))
+
 /*
  * it doesn't make sense on ARM (currently the only user of __naked)
  * to trace naked functions because then mcount is called without
@@ -193,6 +197,10 @@ struct ftrace_likely_data {
 # define randomized_struct_fields_end
 #endif
 
+#ifndef __noscs
+# define __noscs
+#endif
+
 #ifndef asm_volatile_goto
 #define asm_volatile_goto(x...) asm goto(x)
 #endif
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index fa9490a8874c..2e8c69b43c64 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -13,7 +13,7 @@
  *
  * configfs Copyright (C) 2005 Oracle.  All rights reserved.
  *
- * Please read Documentation/filesystems/configfs/configfs.txt before using
+ * Please read Documentation/filesystems/configfs.rst before using
  * the configfs interface, ESPECIALLY the parts about reference counts and
  * item destructors.
  */
diff --git a/include/linux/console.h b/include/linux/console.h
index 7a140f4e5d0c..75dd20650fbe 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -134,7 +134,7 @@ static inline int con_debug_leave(void)
  */
 
 #define CON_PRINTBUFFER	(1)
-#define CON_CONSDEV	(2) /* Last on the command line */
+#define CON_CONSDEV	(2) /* Preferred console, /dev/console */
 #define CON_ENABLED	(4)
 #define CON_BOOT	(8)
 #define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 8150f5ac176c..8cac62ee6add 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -101,12 +101,14 @@ static inline void context_tracking_init(void) { }
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 /* must be called with irqs disabled */
-static inline void guest_enter_irqoff(void)
+static __always_inline void guest_enter_irqoff(void)
 {
+	instrumentation_begin();
 	if (vtime_accounting_enabled_this_cpu())
 		vtime_guest_enter(current);
 	else
 		current->flags |= PF_VCPU;
+	instrumentation_end();
 
 	if (context_tracking_enabled())
 		__context_tracking_enter(CONTEXT_GUEST);
@@ -118,39 +120,48 @@ static inline void guest_enter_irqoff(void)
 	 * one time slice). Lets treat guest mode as quiescent state, just like
 	 * we do with user-mode execution.
 	 */
-	if (!context_tracking_enabled_this_cpu())
+	if (!context_tracking_enabled_this_cpu()) {
+		instrumentation_begin();
 		rcu_virt_note_context_switch(smp_processor_id());
+		instrumentation_end();
+	}
 }
 
-static inline void guest_exit_irqoff(void)
+static __always_inline void guest_exit_irqoff(void)
 {
 	if (context_tracking_enabled())
 		__context_tracking_exit(CONTEXT_GUEST);
 
+	instrumentation_begin();
 	if (vtime_accounting_enabled_this_cpu())
 		vtime_guest_exit(current);
 	else
 		current->flags &= ~PF_VCPU;
+	instrumentation_end();
 }
 
 #else
-static inline void guest_enter_irqoff(void)
+static __always_inline void guest_enter_irqoff(void)
 {
 	/*
 	 * This is running in ioctl context so its safe
 	 * to assume that it's the stime pending cputime
 	 * to flush.
 	 */
+	instrumentation_begin();
 	vtime_account_kernel(current);
 	current->flags |= PF_VCPU;
 	rcu_virt_note_context_switch(smp_processor_id());
+	instrumentation_end();
 }
 
-static inline void guest_exit_irqoff(void)
+static __always_inline void guest_exit_irqoff(void)
 {
+	instrumentation_begin();
 	/* Flush the guest cputime we spent on the guest */
 	vtime_account_kernel(current);
 	current->flags &= ~PF_VCPU;
+	instrumentation_end();
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index beaed2dc269e..52692587f7fe 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -144,18 +144,8 @@ static inline void get_online_cpus(void) { cpus_read_lock(); }
 static inline void put_online_cpus(void) { cpus_read_unlock(); }
 
 #ifdef CONFIG_PM_SLEEP_SMP
-int __freeze_secondary_cpus(int primary, bool suspend);
-static inline int freeze_secondary_cpus(int primary)
-{
-	return __freeze_secondary_cpus(primary, true);
-}
-
-static inline int disable_nonboot_cpus(void)
-{
-	return __freeze_secondary_cpus(0, false);
-}
-
-void enable_nonboot_cpus(void);
+extern int freeze_secondary_cpus(int primary);
+extern void thaw_secondary_cpus(void);
 
 static inline int suspend_disable_secondary_cpus(void)
 {
@@ -168,12 +158,11 @@ static inline int suspend_disable_secondary_cpus(void)
 }
 static inline void suspend_enable_secondary_cpus(void)
 {
-	return enable_nonboot_cpus();
+	return thaw_secondary_cpus();
 }
 
 #else /* !CONFIG_PM_SLEEP_SMP */
-static inline int disable_nonboot_cpus(void) { return 0; }
-static inline void enable_nonboot_cpus(void) {}
+static inline void thaw_secondary_cpus(void) {}
 static inline int suspend_disable_secondary_cpus(void) { return 0; }
 static inline void suspend_enable_secondary_cpus(void) { }
 #endif /* !CONFIG_PM_SLEEP_SMP */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index f7240251a949..67d5950bd878 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -330,7 +330,7 @@ struct cpufreq_driver {
 	 *
 	 * get_intermediate should return a stable intermediate frequency
 	 * platform wants to switch to and target_intermediate() should set CPU
-	 * to to that frequency, before jumping to the frequency corresponding
+	 * to that frequency, before jumping to the frequency corresponding
 	 * to 'index'. Core will take care of sending notifications and driver
 	 * doesn't have to handle them in target_intermediate() or
 	 * target_index().
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 77d70b633531..24b3a77810b6 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -152,6 +152,7 @@ enum cpuhp_state {
 	CPUHP_AP_SMPBOOT_THREADS,
 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
 	CPUHP_AP_IRQ_AFFINITY_ONLINE,
+	CPUHP_AP_BLK_MQ_ONLINE,
 	CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
 	CPUHP_AP_X86_INTEL_EPB_ONLINE,
 	CPUHP_AP_PERF_ONLINE,
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 4664fc1871de..bc156285d097 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -97,8 +97,6 @@ extern void unregister_oldmem_pfn_is_ram(void);
 static inline bool is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
 
-extern unsigned long saved_max_pfn;
-
 /* Device Dump information to be filled by drivers */
 struct vmcoredd_data {
 	char dump_name[VMCOREDD_MAX_NAME_BYTES]; /* Unique name of the dump */
diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h
deleted file mode 100644
index f6ba4c3e60d7..000000000000
--- a/include/linux/cryptohash.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __CRYPTOHASH_H
-#define __CRYPTOHASH_H
-
-#include <uapi/linux/types.h>
-
-#define SHA_DIGEST_WORDS 5
-#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8)
-#define SHA_WORKSPACE_WORDS 16
-
-void sha_init(__u32 *buf);
-void sha_transform(__u32 *digest, const char *data, __u32 *W);
-
-#endif
diff --git a/include/linux/dasd_mod.h b/include/linux/dasd_mod.h
new file mode 100644
index 000000000000..d39abad2ff6e
--- /dev/null
+++ b/include/linux/dasd_mod.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef DASD_MOD_H
+#define DASD_MOD_H
+
+#include <asm/dasd.h>
+
+extern int dasd_biodasdinfo(struct gendisk *disk, dasd_information2_t *info);
+
+#endif
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c1488cc84fd9..a81f0c3cf352 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -177,6 +177,8 @@ struct dentry_operations {
 
 #define DCACHE_REFERENCED		0x00000040 /* Recently used, don't discard. */
 
+#define DCACHE_DONTCACHE		0x00000080 /* Purge from memory on final dput() */
+
 #define DCACHE_CANT_MOUNT		0x00000100
 #define DCACHE_GENOCIDE			0x00000200
 #define DCACHE_SHRINK_LIST		0x00000400
diff --git a/include/linux/device.h b/include/linux/device.h
index ac8e37cd716a..15460a5ac024 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -884,10 +884,6 @@ extern bool device_is_bound(struct device *dev);
 /*
  * Easy functions for dynamically creating devices on the fly
  */
-extern __printf(5, 0)
-struct device *device_create_vargs(struct class *cls, struct device *parent,
-				   dev_t devt, void *drvdata,
-				   const char *fmt, va_list vargs);
 extern __printf(5, 6)
 struct device *device_create(struct class *cls, struct device *parent,
 			     dev_t devt, void *drvdata,
diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h
index 9a72214496e5..d02f32b7514e 100644
--- a/include/linux/device_cgroup.h
+++ b/include/linux/device_cgroup.h
@@ -44,6 +44,9 @@ static inline int devcgroup_inode_mknod(int mode, dev_t dev)
 	if (!S_ISBLK(mode) && !S_ISCHR(mode))
 		return 0;
 
+	if (S_ISCHR(mode) && dev == WHITEOUT_DEV)
+		return 0;
+
 	if (S_ISBLK(mode))
 		type = DEVCG_DEV_BLOCK;
 	else
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 57bcef6f988a..ab0c156abee6 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -334,6 +334,14 @@ struct dma_buf {
  */
 struct dma_buf_attach_ops {
 	/**
+	 * @allow_peer2peer:
+	 *
+	 * If this is set to true the importer must be able to handle peer
+	 * resources without struct pages.
+	 */
+	bool allow_peer2peer;
+
+	/**
 	 * @move_notify: [optional] notification that the DMA-buf is moving
 	 *
 	 * If this callback is provided the framework can avoid pinning the
@@ -361,6 +369,7 @@ struct dma_buf_attach_ops {
  * @node: list of dma_buf_attachment, protected by dma_resv lock of the dmabuf.
  * @sgt: cached mapping.
  * @dir: direction of cached mapping.
+ * @peer2peer: true if the importer can handle peer resources without pages.
  * @priv: exporter specific attachment data.
  * @importer_ops: importer operations for this attachment, if provided
  * dma_buf_map/unmap_attachment() must be called with the dma_resv lock held.
@@ -381,6 +390,7 @@ struct dma_buf_attachment {
 	struct list_head node;
 	struct sg_table *sgt;
 	enum dma_data_direction dir;
+	bool peer2peer;
 	const struct dma_buf_attach_ops *importer_ops;
 	void *importer_priv;
 	void *priv;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 9430d01c0c3d..2c6495f72f79 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -39,6 +39,7 @@
 #define EFI_WRITE_PROTECTED	( 8 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_OUT_OF_RESOURCES	( 9 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_NOT_FOUND		(14 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_TIMEOUT		(18 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_ABORTED		(21 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_SECURITY_VIOLATION	(26 | (1UL << (BITS_PER_LONG-1)))
 
@@ -379,8 +380,8 @@ typedef union {
 
 typedef struct {
 	efi_guid_t guid;
-	const char *name;
 	unsigned long *ptr;
+	const char name[16];
 } efi_config_table_type_t;
 
 #define EFI_SYSTEM_TABLE_SIGNATURE ((u64)0x5453595320494249ULL)
@@ -426,6 +427,7 @@ typedef struct {
 	u32 tables;
 } efi_system_table_32_t;
 
+typedef union efi_simple_text_input_protocol efi_simple_text_input_protocol_t;
 typedef union efi_simple_text_output_protocol efi_simple_text_output_protocol_t;
 
 typedef union {
@@ -434,7 +436,7 @@ typedef union {
 		unsigned long fw_vendor;	/* physical addr of CHAR16 vendor string */
 		u32 fw_revision;
 		unsigned long con_in_handle;
-		unsigned long con_in;
+		efi_simple_text_input_protocol_t *con_in;
 		unsigned long con_out_handle;
 		efi_simple_text_output_protocol_t *con_out;
 		unsigned long stderr_handle;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 901bda352dcb..bacc40a0bdf3 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -39,7 +39,7 @@ struct elevator_mq_ops {
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
 	void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
-	void (*prepare_request)(struct request *, struct bio *bio);
+	void (*prepare_request)(struct request *);
 	void (*finish_request)(struct request *);
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
diff --git a/include/linux/elf.h b/include/linux/elf.h
index e3649b3e970e..5d5b0321da0b 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_ELF_H
 #define _LINUX_ELF_H
 
+#include <linux/types.h>
 #include <asm/elf.h>
 #include <uapi/linux/elf.h>
 
@@ -21,6 +22,9 @@
 	SET_PERSONALITY(ex)
 #endif
 
+#define ELF32_GNU_PROPERTY_ALIGN	4
+#define ELF64_GNU_PROPERTY_ALIGN	8
+
 #if ELF_CLASS == ELFCLASS32
 
 extern Elf32_Dyn _DYNAMIC [];
@@ -31,6 +35,7 @@ extern Elf32_Dyn _DYNAMIC [];
 #define elf_addr_t	Elf32_Off
 #define Elf_Half	Elf32_Half
 #define Elf_Word	Elf32_Word
+#define ELF_GNU_PROPERTY_ALIGN	ELF32_GNU_PROPERTY_ALIGN
 
 #else
 
@@ -42,6 +47,7 @@ extern Elf64_Dyn _DYNAMIC [];
 #define elf_addr_t	Elf64_Off
 #define Elf_Half	Elf64_Half
 #define Elf_Word	Elf64_Word
+#define ELF_GNU_PROPERTY_ALIGN	ELF64_GNU_PROPERTY_ALIGN
 
 #endif
 
@@ -56,4 +62,41 @@ static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) {
 extern int elf_coredump_extra_notes_size(void);
 extern int elf_coredump_extra_notes_write(struct coredump_params *cprm);
 #endif
+
+/*
+ * NT_GNU_PROPERTY_TYPE_0 header:
+ * Keep this internal until/unless there is an agreed UAPI definition.
+ * pr_type values (GNU_PROPERTY_*) are public and defined in the UAPI header.
+ */
+struct gnu_property {
+	u32 pr_type;
+	u32 pr_datasz;
+};
+
+struct arch_elf_state;
+
+#ifndef CONFIG_ARCH_USE_GNU_PROPERTY
+static inline int arch_parse_elf_property(u32 type, const void *data,
+					  size_t datasz, bool compat,
+					  struct arch_elf_state *arch)
+{
+	return 0;
+}
+#else
+extern int arch_parse_elf_property(u32 type, const void *data, size_t datasz,
+				   bool compat, struct arch_elf_state *arch);
+#endif
+
+#ifdef CONFIG_ARCH_HAVE_ELF_PROT
+int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
+			 bool has_interp, bool is_interp);
+#else
+static inline int arch_elf_adjust_prot(int prot,
+				       const struct arch_elf_state *state,
+				       bool has_interp, bool is_interp)
+{
+	return prot;
+}
+#endif
+
 #endif /* _LINUX_ELF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 73d06a39e2d6..259377723603 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -16,11 +16,11 @@
 #include <linux/workqueue.h>
 #include <linux/sched.h>
 #include <linux/capability.h>
-#include <linux/cryptohash.h>
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
 #include <linux/if_vlan.h>
 #include <linux/vmalloc.h>
+#include <crypto/sha.h>
 
 #include <net/sch_generic.h>
 
@@ -744,7 +744,7 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
 static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog)
 {
 	return round_up(bpf_prog_insn_size(prog) +
-			sizeof(__be64) + 1, SHA_MESSAGE_BYTES);
+			sizeof(__be64) + 1, SHA1_BLOCK_SIZE);
 }
 
 static inline unsigned int bpf_prog_size(unsigned int proglen)
diff --git a/include/linux/frame.h b/include/linux/frame.h
index 02d3ca2d9598..303cda600e56 100644
--- a/include/linux/frame.h
+++ b/include/linux/frame.h
@@ -15,9 +15,20 @@
 	static void __used __section(.discard.func_stack_frame_non_standard) \
 		*__func_stack_frame_non_standard_##func = func
 
+/*
+ * This macro indicates that the following intra-function call is valid.
+ * Any non-annotated intra-function call will cause objtool to issue a warning.
+ */
+#define ANNOTATE_INTRA_FUNCTION_CALL				\
+	999:							\
+	.pushsection .discard.intra_function_calls;		\
+	.long 999b;						\
+	.popsection;
+
 #else /* !CONFIG_STACK_VALIDATION */
 
 #define STACK_FRAME_NON_STANDARD(func)
+#define ANNOTATE_INTRA_FUNCTION_CALL
 
 #endif /* CONFIG_STACK_VALIDATION */
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5ee9e583bde2..9780ac31387c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -292,6 +292,7 @@ enum positive_aop_returns {
 struct page;
 struct address_space;
 struct writeback_control;
+struct readahead_control;
 
 /*
  * Write life time hint values.
@@ -375,6 +376,7 @@ struct address_space_operations {
 	 */
 	int (*readpages)(struct file *filp, struct address_space *mapping,
 			struct list_head *pages, unsigned nr_pages);
+	void (*readahead)(struct readahead_control *);
 
 	int (*write_begin)(struct file *, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
@@ -976,6 +978,7 @@ struct file {
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
 	errseq_t		f_wb_err;
+	errseq_t		f_sb_err; /* for syncfs */
 } __randomize_layout
   __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
 
@@ -1520,6 +1523,9 @@ struct super_block {
 	/* Being remounted read-only */
 	int s_readonly_remount;
 
+	/* per-sb errseq_t for reporting writeback errors via syncfs */
+	errseq_t s_wb_err;
+
 	/* AIO completions deferred from interrupt context */
 	struct workqueue_struct *s_dio_done_wq;
 	struct hlist_head s_pins;
@@ -1721,7 +1727,11 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
-extern int vfs_whiteout(struct inode *, struct dentry *);
+
+static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+{
+	return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+}
 
 extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
 				  int open_flag);
@@ -2156,6 +2166,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
  *
  * I_CREATING		New object's inode in the middle of setting up.
  *
+ * I_DONTCACHE		Evict inode as soon as it is not used anymore.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -2178,6 +2190,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 #define I_WB_SWITCH		(1 << 13)
 #define I_OVL_INUSE		(1 << 14)
 #define I_CREATING		(1 << 15)
+#define I_DONTCACHE		(1 << 16)
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@ -2581,7 +2594,6 @@ extern struct kmem_cache *names_cachep;
 #ifdef CONFIG_BLOCK
 extern int register_blkdev(unsigned int, const char *);
 extern void unregister_blkdev(unsigned int, const char *);
-extern void bdev_unhash_inode(dev_t dev);
 extern struct block_device *bdget(dev_t);
 extern struct block_device *bdgrab(struct block_device *bdev);
 extern void bd_set_size(struct block_device *, loff_t size);
@@ -2637,7 +2649,6 @@ extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
 #ifdef CONFIG_BLOCK
-extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
 extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
@@ -2723,7 +2734,6 @@ extern bool is_bad_inode(struct inode *);
 extern int revalidate_disk(struct gendisk *);
 extern int check_disk_change(struct block_device *);
 extern int __invalidate_device(struct block_device *, bool);
-extern int invalidate_partition(struct gendisk *, int);
 #endif
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
@@ -2827,6 +2837,18 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
 	return errseq_sample(&mapping->wb_err);
 }
 
+/**
+ * file_sample_sb_err - sample the current errseq_t to test for later errors
+ * @mapping: mapping to be sampled
+ *
+ * Grab the most current superblock-level errseq_t value for the given
+ * struct file.
+ */
+static inline errseq_t file_sample_sb_err(struct file *file)
+{
+	return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
+}
+
 static inline int filemap_nr_thps(struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
@@ -3049,8 +3071,10 @@ extern int inode_needs_sync(struct inode *inode);
 extern int generic_delete_inode(struct inode *inode);
 static inline int generic_drop_inode(struct inode *inode)
 {
-	return !inode->i_nlink || inode_unhashed(inode);
+	return !inode->i_nlink || inode_unhashed(inode) ||
+		(inode->i_state & I_DONTCACHE);
 }
+extern void d_mark_dontcache(struct inode *inode);
 
 extern struct inode *ilookup5_nowait(struct super_block *sb,
 		unsigned long hashval, int (*test)(struct inode *, void *),
@@ -3124,6 +3148,8 @@ extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
 extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
 				    struct file *file_out, loff_t pos_out,
 				    size_t *count, unsigned int flags);
+extern ssize_t generic_file_buffered_read(struct kiocb *iocb,
+		struct iov_iter *to, ssize_t already_read);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
@@ -3185,8 +3211,6 @@ enum {
 	DIO_SKIP_HOLES	= 0x02,
 };
 
-void dio_end_io(struct bio *bio);
-
 ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 			     struct block_device *bdev, struct iov_iter *iter,
 			     get_block_t get_block,
@@ -3394,11 +3418,6 @@ extern void setattr_copy(struct inode *inode, const struct iattr *attr);
 
 extern int file_update_time(struct file *file);
 
-static inline bool io_is_direct(struct file *filp)
-{
-	return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
-}
-
 static inline bool vma_is_dax(const struct vm_area_struct *vma)
 {
 	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
@@ -3423,7 +3442,7 @@ static inline int iocb_flags(struct file *file)
 	int res = 0;
 	if (file->f_flags & O_APPEND)
 		res |= IOCB_APPEND;
-	if (io_is_direct(file))
+	if (file->f_flags & O_DIRECT)
 		res |= IOCB_DIRECT;
 	if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))
 		res |= IOCB_DSYNC;
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index e6c3e4c61dad..5f24fcbfbfb4 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -85,7 +85,7 @@ struct p_log {
  * Superblock creation fills in ->root whereas reconfiguration begins with this
  * already set.
  *
- * See Documentation/filesystems/mount_api.txt
+ * See Documentation/filesystems/mount_api.rst
  */
 struct fs_context {
 	const struct fs_context_operations *ops;
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index d5ba431b5d63..ce0b5fbf239d 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -6,7 +6,7 @@
  *
  * NOTE!!! See:
  *
- *	Documentation/filesystems/caching/backend-api.txt
+ *	Documentation/filesystems/caching/backend-api.rst
  *
  * for a description of the cache backend interface declared here.
  */
@@ -454,7 +454,7 @@ static inline void fscache_object_lookup_error(struct fscache_object *object)
  * Set the maximum size an object is permitted to reach, implying the highest
  * byte that may be written.  Intended to be called by the attr_changed() op.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 static inline
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index ad044c0cb1f3..a1c928fe98e7 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -6,7 +6,7 @@
  *
  * NOTE!!! See:
  *
- *	Documentation/filesystems/caching/netfs-api.txt
+ *	Documentation/filesystems/caching/netfs-api.rst
  *
  * for a description of the network filesystem interface declared here.
  */
@@ -233,7 +233,7 @@ extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_
  *
  * Register a filesystem as desiring caching services if they're available.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -253,7 +253,7 @@ int fscache_register_netfs(struct fscache_netfs *netfs)
  * Indicate that a filesystem no longer desires caching services for the
  * moment.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -270,7 +270,7 @@ void fscache_unregister_netfs(struct fscache_netfs *netfs)
  * Acquire a specific cache referral tag that can be used to select a specific
  * cache in which to cache an index.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -288,7 +288,7 @@ struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name)
  *
  * Release a reference to a cache referral tag previously looked up.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -315,7 +315,7 @@ void fscache_release_cache_tag(struct fscache_cache_tag *tag)
  * that can be used to locate files.  This is done by requesting a cookie for
  * each index in the path to the file.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -351,7 +351,7 @@ struct fscache_cookie *fscache_acquire_cookie(
  * provided to update the auxiliary data in the cache before the object is
  * disconnected.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -394,7 +394,7 @@ int fscache_check_consistency(struct fscache_cookie *cookie,
  * cookie.  The auxiliary data on the cookie will be updated first if @aux_data
  * is set.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -410,7 +410,7 @@ void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data)
  *
  * Permit data-storage cache objects to be pinned in the cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -425,7 +425,7 @@ int fscache_pin_cookie(struct fscache_cookie *cookie)
  *
  * Permit data-storage cache objects to be unpinned from the cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -441,7 +441,7 @@ void fscache_unpin_cookie(struct fscache_cookie *cookie)
  * changed.  This includes the data size.  These attributes will be obtained
  * through the get_attr() cookie definition op.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -463,7 +463,7 @@ int fscache_attr_changed(struct fscache_cookie *cookie)
  *
  * This can be called with spinlocks held.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -479,7 +479,7 @@ void fscache_invalidate(struct fscache_cookie *cookie)
  *
  * Wait for the invalidation of an object to complete.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -498,7 +498,7 @@ void fscache_wait_on_invalidate(struct fscache_cookie *cookie)
  * cookie so that a write to that object within the space can always be
  * honoured.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -533,7 +533,7 @@ int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size)
  * Else, if the page is unbacked, -ENODATA is returned and a block may have
  * been allocated in the cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -582,7 +582,7 @@ int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
  * regard to different pages, the return values are prioritised in that order.
  * Any pages submitted for reading are removed from the pages list.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -617,7 +617,7 @@ int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
  * Else, a block will be allocated if one wasn't already, and 0 will be
  * returned
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -667,7 +667,7 @@ void fscache_readpages_cancel(struct fscache_cookie *cookie,
  * be cleared at the completion of the write to indicate the success or failure
  * of the operation.  Note that the completion may happen before the return.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -693,7 +693,7 @@ int fscache_write_page(struct fscache_cookie *cookie,
  * Note that this cannot cancel any outstanding I/O operations between this
  * page and the cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -711,7 +711,7 @@ void fscache_uncache_page(struct fscache_cookie *cookie,
  *
  * Ask the cache if a page is being written to the cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -731,7 +731,7 @@ bool fscache_check_page_write(struct fscache_cookie *cookie,
  * Ask the cache to wake us up when a page is no longer being written to the
  * cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index e3c2d2a15525..2862ca5fea33 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -15,12 +15,15 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/parser.h>
 #include <linux/slab.h>
 #include <uapi/linux/fscrypt.h>
 
 #define FS_CRYPTO_BLOCK_SIZE		16
 
+union fscrypt_context;
 struct fscrypt_info;
+struct seq_file;
 
 struct fscrypt_str {
 	unsigned char *name;
@@ -56,10 +59,12 @@ struct fscrypt_name {
 struct fscrypt_operations {
 	unsigned int flags;
 	const char *key_prefix;
-	int (*get_context)(struct inode *, void *, size_t);
-	int (*set_context)(struct inode *, const void *, size_t, void *);
-	bool (*dummy_context)(struct inode *);
-	bool (*empty_dir)(struct inode *);
+	int (*get_context)(struct inode *inode, void *ctx, size_t len);
+	int (*set_context)(struct inode *inode, const void *ctx, size_t len,
+			   void *fs_data);
+	const union fscrypt_context *(*get_dummy_context)(
+		struct super_block *sb);
+	bool (*empty_dir)(struct inode *inode);
 	unsigned int max_namelen;
 	bool (*has_stable_inodes)(struct super_block *sb);
 	void (*get_ino_and_lblk_bits)(struct super_block *sb,
@@ -75,6 +80,7 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode)
 /**
  * fscrypt_needs_contents_encryption() - check whether an inode needs
  *					 contents encryption
+ * @inode: the inode to check
  *
  * Return: %true iff the inode is an encrypted regular file and the kernel was
  * built with fscrypt support.
@@ -87,10 +93,12 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode)
 	return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode);
 }
 
-static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
+static inline const union fscrypt_context *
+fscrypt_get_dummy_context(struct super_block *sb)
 {
-	return inode->i_sb->s_cop->dummy_context &&
-		inode->i_sb->s_cop->dummy_context(inode);
+	if (!sb->s_cop->get_dummy_context)
+		return NULL;
+	return sb->s_cop->get_dummy_context(sb);
 }
 
 /*
@@ -106,22 +114,21 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry)
 }
 
 /* crypto.c */
-extern void fscrypt_enqueue_decrypt_work(struct work_struct *);
-
-extern struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
-						     unsigned int len,
-						     unsigned int offs,
-						     gfp_t gfp_flags);
-extern int fscrypt_encrypt_block_inplace(const struct inode *inode,
-					 struct page *page, unsigned int len,
-					 unsigned int offs, u64 lblk_num,
-					 gfp_t gfp_flags);
-
-extern int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len,
-					    unsigned int offs);
-extern int fscrypt_decrypt_block_inplace(const struct inode *inode,
-					 struct page *page, unsigned int len,
-					 unsigned int offs, u64 lblk_num);
+void fscrypt_enqueue_decrypt_work(struct work_struct *);
+
+struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
+					      unsigned int len,
+					      unsigned int offs,
+					      gfp_t gfp_flags);
+int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page,
+				  unsigned int len, unsigned int offs,
+				  u64 lblk_num, gfp_t gfp_flags);
+
+int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len,
+				     unsigned int offs);
+int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page,
+				  unsigned int len, unsigned int offs,
+				  u64 lblk_num);
 
 static inline bool fscrypt_is_bounce_page(struct page *page)
 {
@@ -133,78 +140,90 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
 	return (struct page *)page_private(bounce_page);
 }
 
-extern void fscrypt_free_bounce_page(struct page *bounce_page);
+void fscrypt_free_bounce_page(struct page *bounce_page);
 
 /* policy.c */
-extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
-extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
-extern int fscrypt_ioctl_get_policy_ex(struct file *, void __user *);
-extern int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg);
-extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
-extern int fscrypt_inherit_context(struct inode *, struct inode *,
-					void *, bool);
+int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg);
+int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg);
+int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *arg);
+int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg);
+int fscrypt_has_permitted_context(struct inode *parent, struct inode *child);
+int fscrypt_inherit_context(struct inode *parent, struct inode *child,
+			    void *fs_data, bool preload);
+
+struct fscrypt_dummy_context {
+	const union fscrypt_context *ctx;
+};
+
+int fscrypt_set_test_dummy_encryption(struct super_block *sb,
+				      const substring_t *arg,
+				      struct fscrypt_dummy_context *dummy_ctx);
+void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep,
+					struct super_block *sb);
+static inline void
+fscrypt_free_dummy_context(struct fscrypt_dummy_context *dummy_ctx)
+{
+	kfree(dummy_ctx->ctx);
+	dummy_ctx->ctx = NULL;
+}
+
 /* keyring.c */
-extern void fscrypt_sb_free(struct super_block *sb);
-extern int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
-extern int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg);
-extern int fscrypt_ioctl_remove_key_all_users(struct file *filp,
-					      void __user *arg);
-extern int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg);
+void fscrypt_sb_free(struct super_block *sb);
+int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
+int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg);
+int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg);
+int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg);
 
 /* keysetup.c */
-extern int fscrypt_get_encryption_info(struct inode *);
-extern void fscrypt_put_encryption_info(struct inode *);
-extern void fscrypt_free_inode(struct inode *);
-extern int fscrypt_drop_inode(struct inode *inode);
+int fscrypt_get_encryption_info(struct inode *inode);
+void fscrypt_put_encryption_info(struct inode *inode);
+void fscrypt_free_inode(struct inode *inode);
+int fscrypt_drop_inode(struct inode *inode);
 
 /* fname.c */
-extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
-				int lookup, struct fscrypt_name *);
+int fscrypt_setup_filename(struct inode *inode, const struct qstr *iname,
+			   int lookup, struct fscrypt_name *fname);
 
 static inline void fscrypt_free_filename(struct fscrypt_name *fname)
 {
 	kfree(fname->crypto_buf.name);
 }
 
-extern int fscrypt_fname_alloc_buffer(const struct inode *, u32,
-				struct fscrypt_str *);
-extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
-extern int fscrypt_fname_disk_to_usr(const struct inode *inode,
-				     u32 hash, u32 minor_hash,
-				     const struct fscrypt_str *iname,
-				     struct fscrypt_str *oname);
-extern bool fscrypt_match_name(const struct fscrypt_name *fname,
-			       const u8 *de_name, u32 de_name_len);
-extern u64 fscrypt_fname_siphash(const struct inode *dir,
-				 const struct qstr *name);
+int fscrypt_fname_alloc_buffer(const struct inode *inode, u32 max_encrypted_len,
+			       struct fscrypt_str *crypto_str);
+void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str);
+int fscrypt_fname_disk_to_usr(const struct inode *inode,
+			      u32 hash, u32 minor_hash,
+			      const struct fscrypt_str *iname,
+			      struct fscrypt_str *oname);
+bool fscrypt_match_name(const struct fscrypt_name *fname,
+			const u8 *de_name, u32 de_name_len);
+u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name);
 
 /* bio.c */
-extern void fscrypt_decrypt_bio(struct bio *);
-extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
-				 unsigned int);
+void fscrypt_decrypt_bio(struct bio *bio);
+int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
+			  sector_t pblk, unsigned int len);
 
 /* hooks.c */
-extern int fscrypt_file_open(struct inode *inode, struct file *filp);
-extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir,
-				  struct dentry *dentry);
-extern int __fscrypt_prepare_rename(struct inode *old_dir,
-				    struct dentry *old_dentry,
-				    struct inode *new_dir,
-				    struct dentry *new_dentry,
-				    unsigned int flags);
-extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry,
-				    struct fscrypt_name *fname);
-extern int fscrypt_prepare_setflags(struct inode *inode,
-				    unsigned int oldflags, unsigned int flags);
-extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len,
-				     unsigned int max_len,
-				     struct fscrypt_str *disk_link);
-extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target,
-				     unsigned int len,
-				     struct fscrypt_str *disk_link);
-extern const char *fscrypt_get_symlink(struct inode *inode, const void *caddr,
-				       unsigned int max_size,
-				       struct delayed_call *done);
+int fscrypt_file_open(struct inode *inode, struct file *filp);
+int __fscrypt_prepare_link(struct inode *inode, struct inode *dir,
+			   struct dentry *dentry);
+int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry,
+			     struct inode *new_dir, struct dentry *new_dentry,
+			     unsigned int flags);
+int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry,
+			     struct fscrypt_name *fname);
+int fscrypt_prepare_setflags(struct inode *inode,
+			     unsigned int oldflags, unsigned int flags);
+int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len,
+			      unsigned int max_len,
+			      struct fscrypt_str *disk_link);
+int __fscrypt_encrypt_symlink(struct inode *inode, const char *target,
+			      unsigned int len, struct fscrypt_str *disk_link);
+const char *fscrypt_get_symlink(struct inode *inode, const void *caddr,
+				unsigned int max_size,
+				struct delayed_call *done);
 static inline void fscrypt_set_ops(struct super_block *sb,
 				   const struct fscrypt_operations *s_cop)
 {
@@ -222,9 +241,10 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode)
 	return false;
 }
 
-static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
+static inline const union fscrypt_context *
+fscrypt_get_dummy_context(struct super_block *sb)
 {
-	return false;
+	return NULL;
 }
 
 static inline void fscrypt_handle_d_move(struct dentry *dentry)
@@ -319,6 +339,20 @@ static inline int fscrypt_inherit_context(struct inode *parent,
 	return -EOPNOTSUPP;
 }
 
+struct fscrypt_dummy_context {
+};
+
+static inline void fscrypt_show_test_dummy_encryption(struct seq_file *seq,
+						      char sep,
+						      struct super_block *sb)
+{
+}
+
+static inline void
+fscrypt_free_dummy_context(struct fscrypt_dummy_context *dummy_ctx)
+{
+}
+
 /* keyring.c */
 static inline void fscrypt_sb_free(struct super_block *sb)
 {
@@ -504,7 +538,7 @@ static inline void fscrypt_set_ops(struct super_block *sb,
 #endif	/* !CONFIG_FS_ENCRYPTION */
 
 /**
- * fscrypt_require_key - require an inode's encryption key
+ * fscrypt_require_key() - require an inode's encryption key
  * @inode: the inode we need the key for
  *
  * If the inode is encrypted, set up its encryption key if not already done.
@@ -530,7 +564,8 @@ static inline int fscrypt_require_key(struct inode *inode)
 }
 
 /**
- * fscrypt_prepare_link - prepare to link an inode into a possibly-encrypted directory
+ * fscrypt_prepare_link() - prepare to link an inode into a possibly-encrypted
+ *			    directory
  * @old_dentry: an existing dentry for the inode being linked
  * @dir: the target directory
  * @dentry: negative dentry for the target filename
@@ -557,7 +592,8 @@ static inline int fscrypt_prepare_link(struct dentry *old_dentry,
 }
 
 /**
- * fscrypt_prepare_rename - prepare for a rename between possibly-encrypted directories
+ * fscrypt_prepare_rename() - prepare for a rename between possibly-encrypted
+ *			      directories
  * @old_dir: source directory
  * @old_dentry: dentry for source file
  * @new_dir: target directory
@@ -590,7 +626,8 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir,
 }
 
 /**
- * fscrypt_prepare_lookup - prepare to lookup a name in a possibly-encrypted directory
+ * fscrypt_prepare_lookup() - prepare to lookup a name in a possibly-encrypted
+ *			      directory
  * @dir: directory being searched
  * @dentry: filename being looked up
  * @fname: (output) the name to use to search the on-disk directory
@@ -623,7 +660,8 @@ static inline int fscrypt_prepare_lookup(struct inode *dir,
 }
 
 /**
- * fscrypt_prepare_setattr - prepare to change a possibly-encrypted inode's attributes
+ * fscrypt_prepare_setattr() - prepare to change a possibly-encrypted inode's
+ *			       attributes
  * @dentry: dentry through which the inode is being changed
  * @attr: attributes to change
  *
@@ -648,7 +686,7 @@ static inline int fscrypt_prepare_setattr(struct dentry *dentry,
 }
 
 /**
- * fscrypt_prepare_symlink - prepare to create a possibly-encrypted symlink
+ * fscrypt_prepare_symlink() - prepare to create a possibly-encrypted symlink
  * @dir: directory in which the symlink is being created
  * @target: plaintext symlink target
  * @len: length of @target excluding null terminator
@@ -676,7 +714,7 @@ static inline int fscrypt_prepare_symlink(struct inode *dir,
 					  unsigned int max_len,
 					  struct fscrypt_str *disk_link)
 {
-	if (IS_ENCRYPTED(dir) || fscrypt_dummy_context_enabled(dir))
+	if (IS_ENCRYPTED(dir) || fscrypt_get_dummy_context(dir->i_sb) != NULL)
 		return __fscrypt_prepare_symlink(dir, len, max_len, disk_link);
 
 	disk_link->name = (unsigned char *)target;
@@ -687,7 +725,7 @@ static inline int fscrypt_prepare_symlink(struct inode *dir,
 }
 
 /**
- * fscrypt_encrypt_symlink - encrypt the symlink target if needed
+ * fscrypt_encrypt_symlink() - encrypt the symlink target if needed
  * @inode: symlink inode
  * @target: plaintext symlink target
  * @len: length of @target excluding null terminator
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index ecc604e61d61..78201a6d35f6 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -121,23 +121,23 @@ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
 
 /* enable.c */
 
-extern int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
+int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
 
 /* measure.c */
 
-extern int fsverity_ioctl_measure(struct file *filp, void __user *arg);
+int fsverity_ioctl_measure(struct file *filp, void __user *arg);
 
 /* open.c */
 
-extern int fsverity_file_open(struct inode *inode, struct file *filp);
-extern int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr);
-extern void fsverity_cleanup_inode(struct inode *inode);
+int fsverity_file_open(struct inode *inode, struct file *filp);
+int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr);
+void fsverity_cleanup_inode(struct inode *inode);
 
 /* verify.c */
 
-extern bool fsverity_verify_page(struct page *page);
-extern void fsverity_verify_bio(struct bio *bio);
-extern void fsverity_enqueue_verify_work(struct work_struct *work);
+bool fsverity_verify_page(struct page *page);
+void fsverity_verify_bio(struct bio *bio);
+void fsverity_enqueue_verify_work(struct work_struct *work);
 
 #else /* !CONFIG_FS_VERITY */
 
@@ -200,6 +200,7 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work)
 
 /**
  * fsverity_active() - do reads from the inode need to go through fs-verity?
+ * @inode: inode to check
  *
  * This checks whether ->i_verity_info has been set.
  *
@@ -207,6 +208,8 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work)
  * be verified or not.  Don't use IS_VERITY() for this purpose; it's subject to
  * a race condition where the file is being read concurrently with
  * FS_IOC_ENABLE_VERITY completing.  (S_VERITY is set before ->i_verity_info.)
+ *
+ * Return: true if reads need to go through fs-verity, otherwise false
  */
 static inline bool fsverity_active(const struct inode *inode)
 {
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
index ccda97dc7f8b..0abd9a1d2852 100644
--- a/include/linux/ftrace_irq.h
+++ b/include/linux/ftrace_irq.h
@@ -2,15 +2,6 @@
 #ifndef _LINUX_FTRACE_IRQ_H
 #define _LINUX_FTRACE_IRQ_H
 
-
-#ifdef CONFIG_FTRACE_NMI_ENTER
-extern void arch_ftrace_nmi_enter(void);
-extern void arch_ftrace_nmi_exit(void);
-#else
-static inline void arch_ftrace_nmi_enter(void) { }
-static inline void arch_ftrace_nmi_exit(void) { }
-#endif
-
 #ifdef CONFIG_HWLAT_TRACER
 extern bool trace_hwlat_callback_enabled;
 extern void trace_hwlat_callback(bool enter);
@@ -22,12 +13,10 @@ static inline void ftrace_nmi_enter(void)
 	if (trace_hwlat_callback_enabled)
 		trace_hwlat_callback(true);
 #endif
-	arch_ftrace_nmi_enter();
 }
 
 static inline void ftrace_nmi_exit(void)
 {
-	arch_ftrace_nmi_exit();
 #ifdef CONFIG_HWLAT_TRACER
 	if (trace_hwlat_callback_enabled)
 		trace_hwlat_callback(false);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 9b3fffdf4011..392aad5e29a2 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -39,15 +39,6 @@ extern struct class block_class;
 #include <linux/fs.h>
 #include <linux/workqueue.h>
 
-struct disk_stats {
-	u64 nsecs[NR_STAT_GROUPS];
-	unsigned long sectors[NR_STAT_GROUPS];
-	unsigned long ios[NR_STAT_GROUPS];
-	unsigned long merges[NR_STAT_GROUPS];
-	unsigned long io_ticks;
-	local_t in_flight[2];
-};
-
 #define PARTITION_META_INFO_VOLNAMELTH	64
 /*
  * Enough for the string representation of any kind of UUID plus NULL.
@@ -68,7 +59,13 @@ struct hd_struct {
 	 * can be non-atomic on 32bit machines with 64bit sector_t.
 	 */
 	sector_t nr_sects;
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
 	seqcount_t nr_sects_seq;
+#endif
+	unsigned long stamp;
+	struct disk_stats __percpu *dkstats;
+	struct percpu_ref ref;
+
 	sector_t alignment_offset;
 	unsigned int discard_alignment;
 	struct device __dev;
@@ -78,13 +75,6 @@ struct hd_struct {
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	int make_it_fail;
 #endif
-	unsigned long stamp;
-#ifdef	CONFIG_SMP
-	struct disk_stats __percpu *dkstats;
-#else
-	struct disk_stats dkstats;
-#endif
-	struct percpu_ref ref;
 	struct rcu_work rcu_work;
 };
 
@@ -169,8 +159,6 @@ struct disk_part_tbl {
 struct disk_events;
 struct badblocks;
 
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
 struct blk_integrity {
 	const struct blk_integrity_profile	*profile;
 	unsigned char				flags;
@@ -179,8 +167,6 @@ struct blk_integrity {
 	unsigned char				tag_size;
 };
 
-#endif	/* CONFIG_BLK_DEV_INTEGRITY */
-
 struct gendisk {
 	/* major, first_minor and minors are input parameters only,
 	 * don't use directly.  Use disk_devt() and disk_max_parts().
@@ -217,11 +203,20 @@ struct gendisk {
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct kobject integrity_kobj;
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
+#if IS_ENABLED(CONFIG_CDROM)
+	struct cdrom_device_info *cdi;
+#endif
 	int node_id;
 	struct badblocks *bb;
 	struct lockdep_map lockdep_map;
 };
 
+#if IS_REACHABLE(CONFIG_CDROM)
+#define disk_to_cdi(disk)	((disk)->cdi)
+#else
+#define disk_to_cdi(disk)	NULL
+#endif
+
 static inline struct gendisk *part_to_disk(struct hd_struct *part)
 {
 	if (likely(part)) {
@@ -265,6 +260,13 @@ static inline void disk_put_part(struct hd_struct *part)
 		put_device(part_to_dev(part));
 }
 
+static inline void hd_sects_seq_init(struct hd_struct *p)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	seqcount_init(&p->nr_sects_seq);
+#endif
+}
+
 /*
  * Smarter partition iterator without context limits.
  */
@@ -339,7 +341,7 @@ extern dev_t blk_lookup_devt(const char *name, int partno);
 
 int bdev_disk_changed(struct block_device *bdev, bool invalidate);
 int blk_add_partitions(struct gendisk *disk, struct block_device *bdev);
-int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev);
+int blk_drop_partitions(struct block_device *bdev);
 extern void printk_all_partitions(void);
 
 extern struct gendisk *__alloc_disk_node(int minors, int node_id);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 7c8b82f69288..e07cf853aa16 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -2,31 +2,28 @@
 #ifndef LINUX_HARDIRQ_H
 #define LINUX_HARDIRQ_H
 
+#include <linux/context_tracking_state.h>
 #include <linux/preempt.h>
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
 #include <linux/vtime.h>
 #include <asm/hardirq.h>
 
-
 extern void synchronize_irq(unsigned int irq);
 extern bool synchronize_hardirq(unsigned int irq);
 
-#if defined(CONFIG_TINY_RCU)
-
-static inline void rcu_nmi_enter(void)
-{
-}
+#ifdef CONFIG_NO_HZ_FULL
+void __rcu_irq_enter_check_tick(void);
+#else
+static inline void __rcu_irq_enter_check_tick(void) { }
+#endif
 
-static inline void rcu_nmi_exit(void)
+static __always_inline void rcu_irq_enter_check_tick(void)
 {
+	if (context_tracking_enabled())
+		__rcu_irq_enter_check_tick();
 }
 
-#else
-extern void rcu_nmi_enter(void);
-extern void rcu_nmi_exit(void);
-#endif
-
 /*
  * It is safe to do non-atomic ops on ->hardirq_context,
  * because NMI handlers may not preempt and the ops are
@@ -65,14 +62,34 @@ extern void irq_exit(void);
 #define arch_nmi_exit()		do { } while (0)
 #endif
 
+#ifdef CONFIG_TINY_RCU
+static inline void rcu_nmi_enter(void) { }
+static inline void rcu_nmi_exit(void) { }
+#else
+extern void rcu_nmi_enter(void);
+extern void rcu_nmi_exit(void);
+#endif
+
+/*
+ * NMI vs Tracing
+ * --------------
+ *
+ * We must not land in a tracer until (or after) we've changed preempt_count
+ * such that in_nmi() becomes true. To that effect all NMI C entry points must
+ * be marked 'notrace' and call nmi_enter() as soon as possible.
+ */
+
+/*
+ * nmi_enter() can nest up to 15 times; see NMI_BITS.
+ */
 #define nmi_enter()						\
 	do {							\
 		arch_nmi_enter();				\
 		printk_nmi_enter();				\
 		lockdep_off();					\
 		ftrace_nmi_enter();				\
-		BUG_ON(in_nmi());				\
-		preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);	\
+		BUG_ON(in_nmi() == NMI_MASK);			\
+		__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		rcu_nmi_enter();				\
 		lockdep_hardirq_enter();			\
 	} while (0)
@@ -82,7 +99,7 @@ extern void irq_exit(void);
 		lockdep_hardirq_exit();				\
 		rcu_nmi_exit();					\
 		BUG_ON(!in_nmi());				\
-		preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);	\
+		__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		ftrace_nmi_exit();				\
 		lockdep_on();					\
 		printk_nmi_exit();				\
diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
index 9613d796cfb1..50c31f1a0a2d 100644
--- a/include/linux/hdmi.h
+++ b/include/linux/hdmi.h
@@ -219,6 +219,8 @@ ssize_t hdmi_drm_infoframe_pack(struct hdmi_drm_infoframe *frame, void *buffer,
 ssize_t hdmi_drm_infoframe_pack_only(const struct hdmi_drm_infoframe *frame,
 				     void *buffer, size_t size);
 int hdmi_drm_infoframe_check(struct hdmi_drm_infoframe *frame);
+int hdmi_drm_infoframe_unpack_only(struct hdmi_drm_infoframe *frame,
+				   const void *buffer, size_t size);
 
 enum hdmi_spd_sdi {
 	HDMI_SPD_SDI_UNKNOWN,
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 7475051100c7..e912b9dc4633 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -19,51 +19,47 @@
 #include <linux/mmu_notifier.h>
 
 /*
- * hmm_pfn_flag_e - HMM flag enums
+ * On output:
+ * 0             - The page is faultable and a future call with 
+ *                 HMM_PFN_REQ_FAULT could succeed.
+ * HMM_PFN_VALID - the pfn field points to a valid PFN. This PFN is at
+ *                 least readable. If dev_private_owner is !NULL then this could
+ *                 point at a DEVICE_PRIVATE page.
+ * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID)
+ * HMM_PFN_ERROR - accessing the pfn is impossible and the device should
+ *                 fail. ie poisoned memory, special pages, no vma, etc
  *
- * Flags:
- * HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
- * HMM_PFN_WRITE: CPU page table has write permission set
- *
- * The driver provides a flags array for mapping page protections to device
- * PTE bits. If the driver valid bit for an entry is bit 3,
- * i.e., (entry & (1 << 3)), then the driver must provide
- * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3.
- * Same logic apply to all flags. This is the same idea as vm_page_prot in vma
- * except that this is per device driver rather than per architecture.
+ * On input:
+ * 0                 - Return the current state of the page, do not fault it.
+ * HMM_PFN_REQ_FAULT - The output must have HMM_PFN_VALID or hmm_range_fault()
+ *                     will fail
+ * HMM_PFN_REQ_WRITE - The output must have HMM_PFN_WRITE or hmm_range_fault()
+ *                     will fail. Must be combined with HMM_PFN_REQ_FAULT.
  */
-enum hmm_pfn_flag_e {
-	HMM_PFN_VALID = 0,
-	HMM_PFN_WRITE,
-	HMM_PFN_FLAG_MAX
+enum hmm_pfn_flags {
+	/* Output flags */
+	HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1),
+	HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2),
+	HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3),
+
+	/* Input flags */
+	HMM_PFN_REQ_FAULT = HMM_PFN_VALID,
+	HMM_PFN_REQ_WRITE = HMM_PFN_WRITE,
+
+	HMM_PFN_FLAGS = HMM_PFN_VALID | HMM_PFN_WRITE | HMM_PFN_ERROR,
 };
 
 /*
- * hmm_pfn_value_e - HMM pfn special value
- *
- * Flags:
- * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
- * HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
- * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
- *      result of vmf_insert_pfn() or vm_insert_page(). Therefore, it should not
- *      be mirrored by a device, because the entry will never have HMM_PFN_VALID
- *      set and the pfn value is undefined.
+ * hmm_pfn_to_page() - return struct page pointed to by a device entry
  *
- * Driver provides values for none entry, error entry, and special entry.
- * Driver can alias (i.e., use same value) error and special, but
- * it should not alias none with error or special.
- *
- * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be:
- * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous,
- * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table entry,
- * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one
+ * This must be called under the caller 'user_lock' after a successful
+ * mmu_interval_read_begin(). The caller must have tested for HMM_PFN_VALID
+ * already.
  */
-enum hmm_pfn_value_e {
-	HMM_PFN_ERROR,
-	HMM_PFN_NONE,
-	HMM_PFN_SPECIAL,
-	HMM_PFN_VALUE_MAX
-};
+static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn)
+{
+	return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS);
+}
 
 /*
  * struct hmm_range - track invalidation lock on virtual address range
@@ -72,12 +68,9 @@ enum hmm_pfn_value_e {
  * @notifier_seq: result of mmu_interval_read_begin()
  * @start: range virtual start address (inclusive)
  * @end: range virtual end address (exclusive)
- * @pfns: array of pfns (big enough for the range)
- * @flags: pfn flags to match device driver page table
- * @values: pfn value for some special case (none, special, error, ...)
+ * @hmm_pfns: array of pfns (big enough for the range)
  * @default_flags: default flags for the range (write, read, ... see hmm doc)
  * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
- * @pfn_shift: pfn shift value (should be <= PAGE_SHIFT)
  * @dev_private_owner: owner of device private pages
  */
 struct hmm_range {
@@ -85,42 +78,16 @@ struct hmm_range {
 	unsigned long		notifier_seq;
 	unsigned long		start;
 	unsigned long		end;
-	uint64_t		*pfns;
-	const uint64_t		*flags;
-	const uint64_t		*values;
-	uint64_t		default_flags;
-	uint64_t		pfn_flags_mask;
-	uint8_t			pfn_shift;
+	unsigned long		*hmm_pfns;
+	unsigned long		default_flags;
+	unsigned long		pfn_flags_mask;
 	void			*dev_private_owner;
 };
 
 /*
- * hmm_device_entry_to_page() - return struct page pointed to by a device entry
- * @range: range use to decode device entry value
- * @entry: device entry value to get corresponding struct page from
- * Return: struct page pointer if entry is a valid, NULL otherwise
- *
- * If the device entry is valid (ie valid flag set) then return the struct page
- * matching the entry value. Otherwise return NULL.
- */
-static inline struct page *hmm_device_entry_to_page(const struct hmm_range *range,
-						    uint64_t entry)
-{
-	if (entry == range->values[HMM_PFN_NONE])
-		return NULL;
-	if (entry == range->values[HMM_PFN_ERROR])
-		return NULL;
-	if (entry == range->values[HMM_PFN_SPECIAL])
-		return NULL;
-	if (!(entry & range->flags[HMM_PFN_VALID]))
-		return NULL;
-	return pfn_to_page(entry >> range->pfn_shift);
-}
-
-/*
  * Please see Documentation/vm/hmm.rst for how to use the range API.
  */
-long hmm_range_fault(struct hmm_range *range);
+int hmm_range_fault(struct hmm_range *range);
 
 /*
  * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 5e609f25878c..363d4a814aa1 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -436,6 +436,9 @@ devm_hwmon_device_register_with_info(struct device *dev,
 void hwmon_device_unregister(struct device *dev);
 void devm_hwmon_device_unregister(struct device *dev);
 
+int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type,
+		       u32 attr, int channel);
+
 /**
  * hwmon_is_bad_char - Is the char invalid in a hwmon name
  * @ch: the char to be considered
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 692c89ccf5df..40df3103e890 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -117,7 +117,7 @@ struct hv_ring_buffer {
 	 * Ring data starts here + RingDataStartOffset
 	 * !!! DO NOT place any fields below this !!!
 	 */
-	u8 buffer[0];
+	u8 buffer[];
 } __packed;
 
 struct hv_ring_buffer_info {
@@ -313,7 +313,7 @@ struct vmadd_remove_transfer_page_set {
 struct gpa_range {
 	u32 byte_count;
 	u32 byte_offset;
-	u64 pfn_array[0];
+	u64 pfn_array[];
 };
 
 /*
@@ -425,7 +425,7 @@ enum vmbus_channel_message_type {
 	CHANNELMSG_19				= 19,
 	CHANNELMSG_20				= 20,
 	CHANNELMSG_TL_CONNECT_REQUEST		= 21,
-	CHANNELMSG_22				= 22,
+	CHANNELMSG_MODIFYCHANNEL		= 22,
 	CHANNELMSG_TL_CONNECT_RESULT		= 23,
 	CHANNELMSG_COUNT
 };
@@ -563,7 +563,7 @@ struct vmbus_channel_gpadl_header {
 	u32 gpadl;
 	u16 range_buflen;
 	u16 rangecount;
-	struct gpa_range range[0];
+	struct gpa_range range[];
 } __packed;
 
 /* This is the followup packet that contains more PFNs. */
@@ -571,7 +571,7 @@ struct vmbus_channel_gpadl_body {
 	struct vmbus_channel_message_header header;
 	u32 msgnumber;
 	u32 gpadl;
-	u64 pfn[0];
+	u64 pfn[];
 } __packed;
 
 struct vmbus_channel_gpadl_created {
@@ -620,6 +620,13 @@ struct vmbus_channel_tl_connect_request {
 	guid_t host_service_id;
 } __packed;
 
+/* Modify Channel parameters, cf. vmbus_send_modifychannel() */
+struct vmbus_channel_modifychannel {
+	struct vmbus_channel_message_header header;
+	u32 child_relid;
+	u32 target_vp;
+} __packed;
+
 struct vmbus_channel_version_response {
 	struct vmbus_channel_message_header header;
 	u8 version_supported;
@@ -672,7 +679,7 @@ struct vmbus_channel_msginfo {
 	 * The channel message that goes out on the "wire".
 	 * It will contain at minimum the VMBUS_CHANNEL_MESSAGE_HEADER header
 	 */
-	unsigned char msg[0];
+	unsigned char msg[];
 };
 
 struct vmbus_close_msg {
@@ -689,11 +696,6 @@ union hv_connection_id {
 	} u;
 };
 
-enum hv_numa_policy {
-	HV_BALANCED = 0,
-	HV_LOCALIZED,
-};
-
 enum vmbus_device_type {
 	HV_IDE = 0,
 	HV_SCSI,
@@ -771,6 +773,15 @@ struct vmbus_channel {
 	void (*onchannel_callback)(void *context);
 	void *channel_callback_context;
 
+	void (*change_target_cpu_callback)(struct vmbus_channel *channel,
+			u32 old, u32 new);
+
+	/*
+	 * Synchronize channel scheduling and channel removal; see the inline
+	 * comments in vmbus_chan_sched() and vmbus_reset_channel_cb().
+	 */
+	spinlock_t sched_lock;
+
 	/*
 	 * A channel can be marked for one of three modes of reading:
 	 *   BATCHED - callback called from taslket and should read
@@ -802,10 +813,6 @@ struct vmbus_channel {
 	u32 target_vp;
 	/* The corresponding CPUID in the guest */
 	u32 target_cpu;
-	/*
-	 * State to manage the CPU affiliation of channels.
-	 */
-	struct cpumask alloced_cpus_in_node;
 	int numa_node;
 	/*
 	 * Support for sub-channels. For high performance devices,
@@ -854,11 +861,6 @@ struct vmbus_channel {
 	 * Support per-channel state for use by vmbus drivers.
 	 */
 	void *per_channel_state;
-	/*
-	 * To support per-cpu lookup mapping of relid to channel,
-	 * link up channels based on their CPU affinity.
-	 */
-	struct list_head percpu_list;
 
 	/*
 	 * Defer freeing channel until after all cpu's have
@@ -897,19 +899,14 @@ struct vmbus_channel {
 	 */
 	bool low_latency;
 
+	bool probe_done;
+
 	/*
-	 * NUMA distribution policy:
-	 * We support two policies:
-	 * 1) Balanced: Here all performance critical channels are
-	 *    distributed evenly amongst all the NUMA nodes.
-	 *    This policy will be the default policy.
-	 * 2) Localized: All channels of a given instance of a
-	 *    performance critical service will be assigned CPUs
-	 *    within a selected NUMA node.
+	 * Cache the device ID here for easy access; this is useful, in
+	 * particular, in situations where the channel's device_obj has
+	 * not been allocated/initialized yet.
 	 */
-	enum hv_numa_policy affinity_policy;
-
-	bool probe_done;
+	u16 device_id;
 
 	/*
 	 * We must offload the handling of the primary/sub channels
@@ -964,12 +961,6 @@ static inline bool is_sub_channel(const struct vmbus_channel *c)
 	return c->offermsg.offer.sub_channel_index != 0;
 }
 
-static inline void set_channel_affinity_state(struct vmbus_channel *c,
-					      enum hv_numa_policy policy)
-{
-	c->affinity_policy = policy;
-}
-
 static inline void set_channel_read_mode(struct vmbus_channel *c,
 					enum hv_callback_mode mode)
 {
@@ -1017,7 +1008,7 @@ static inline void clear_low_latency_mode(struct vmbus_channel *c)
 	c->low_latency = false;
 }
 
-void vmbus_onmessage(void *context);
+void vmbus_onmessage(struct vmbus_channel_message_header *hdr);
 
 int vmbus_request_offers(void);
 
@@ -1531,6 +1522,7 @@ extern __u32 vmbus_proto_version;
 
 int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
 				  const guid_t *shv_host_servie_id);
+int vmbus_send_modifychannel(u32 child_relid, u32 target_vp);
 void vmbus_set_event(struct vmbus_channel *channel);
 
 /* Get the start of the ring buffer. */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index ac6e946b6767..3ade03e5c7af 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -171,7 +171,7 @@ static inline bool idr_is_empty(const struct idr *idr)
  */
 static inline void idr_preload_end(void)
 {
-	preempt_enable();
+	local_unlock(&radix_tree_preloads.lock);
 }
 
 /**
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 8b09463dae0d..a5c219c29b10 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -155,8 +155,7 @@ loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
-int iomap_readpages(struct address_space *mapping, struct list_head *pages,
-		unsigned nr_pages, const struct iomap_ops *ops);
+void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
 int iomap_set_page_dirty(struct page *page);
 int iomap_is_partially_uptodate(struct page *page, unsigned long from,
 		unsigned long count);
@@ -252,6 +251,8 @@ int iomap_writepages(struct address_space *mapping,
 struct iomap_dio_ops {
 	int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
 		      unsigned flags);
+	blk_qc_t (*submit_io)(struct inode *inode, struct iomap *iomap,
+			struct bio *bio, loff_t file_offset);
 };
 
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
diff --git a/include/linux/irq_sim.h b/include/linux/irq_sim.h
index 4500d453a63e..ab831e5ae748 100644
--- a/include/linux/irq_sim.h
+++ b/include/linux/irq_sim.h
@@ -1,41 +1,26 @@
 /* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Copyright (C) 2017-2018 Bartosz Golaszewski <brgl@bgdev.pl>
+ * Copyright (C) 2020 Bartosz Golaszewski <bgolaszewski@baylibre.com>
  */
 
 #ifndef _LINUX_IRQ_SIM_H
 #define _LINUX_IRQ_SIM_H
 
-#include <linux/irq_work.h>
 #include <linux/device.h>
+#include <linux/fwnode.h>
+#include <linux/irqdomain.h>
 
 /*
  * Provides a framework for allocating simulated interrupts which can be
  * requested like normal irqs and enqueued from process context.
  */
 
-struct irq_sim_work_ctx {
-	struct irq_work		work;
-	unsigned long		*pending;
-};
-
-struct irq_sim_irq_ctx {
-	int			irqnum;
-	bool			enabled;
-};
-
-struct irq_sim {
-	struct irq_sim_work_ctx	work_ctx;
-	int			irq_base;
-	unsigned int		irq_count;
-	struct irq_sim_irq_ctx	*irqs;
-};
-
-int irq_sim_init(struct irq_sim *sim, unsigned int num_irqs);
-int devm_irq_sim_init(struct device *dev, struct irq_sim *sim,
-		      unsigned int num_irqs);
-void irq_sim_fini(struct irq_sim *sim);
-void irq_sim_fire(struct irq_sim *sim, unsigned int offset);
-int irq_sim_irqnum(struct irq_sim *sim, unsigned int offset);
+struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode,
+					 unsigned int num_irqs);
+struct irq_domain *devm_irq_domain_create_sim(struct device *dev,
+					      struct fwnode_handle *fwnode,
+					      unsigned int num_irqs);
+void irq_domain_remove_sim(struct irq_domain *domain);
 
 #endif /* _LINUX_IRQ_SIM_H */
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 3b752e80c017..2735da5f839e 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -13,6 +13,8 @@
  * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
  */
 
+/* flags share CSD_FLAG_ space */
+
 #define IRQ_WORK_PENDING	BIT(0)
 #define IRQ_WORK_BUSY		BIT(1)
 
@@ -23,9 +25,12 @@
 
 #define IRQ_WORK_CLAIMED	(IRQ_WORK_PENDING | IRQ_WORK_BUSY)
 
+/*
+ * structure shares layout with single_call_data_t.
+ */
 struct irq_work {
-	atomic_t flags;
 	struct llist_node llnode;
+	atomic_t flags;
 	void (*func)(struct irq_work *);
 };
 
@@ -53,9 +58,11 @@ void irq_work_sync(struct irq_work *work);
 
 void irq_work_run(void);
 bool irq_work_needs_cpu(void);
+void irq_work_single(void *arg);
 #else
 static inline bool irq_work_needs_cpu(void) { return false; }
 static inline void irq_work_run(void) { }
+static inline void irq_work_single(void *arg) { }
 #endif
 
 #endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 8d062e86d954..b37350c4fe37 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -450,6 +450,7 @@ extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
 				irq_hw_number_t hwirq, struct irq_chip *chip,
 				void *chip_data, irq_flow_handler_t handler,
 				void *handler_data, const char *handler_name);
+extern void irq_domain_reset_irq_data(struct irq_data *irq_data);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 extern struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
 			unsigned int flags, unsigned int size,
@@ -491,7 +492,6 @@ extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
 					 irq_hw_number_t hwirq,
 					 struct irq_chip *chip,
 					 void *chip_data);
-extern void irq_domain_reset_irq_data(struct irq_data *irq_data);
 extern void irq_domain_free_irqs_common(struct irq_domain *domain,
 					unsigned int virq,
 					unsigned int nr_irqs);
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 61a9ced3aa50..d7f7e436c3af 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -19,16 +19,20 @@
 #ifdef CONFIG_PROVE_LOCKING
   extern void lockdep_softirqs_on(unsigned long ip);
   extern void lockdep_softirqs_off(unsigned long ip);
+  extern void lockdep_hardirqs_on_prepare(unsigned long ip);
   extern void lockdep_hardirqs_on(unsigned long ip);
   extern void lockdep_hardirqs_off(unsigned long ip);
 #else
   static inline void lockdep_softirqs_on(unsigned long ip) { }
   static inline void lockdep_softirqs_off(unsigned long ip) { }
+  static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { }
   static inline void lockdep_hardirqs_on(unsigned long ip) { }
   static inline void lockdep_hardirqs_off(unsigned long ip) { }
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+  extern void trace_hardirqs_on_prepare(void);
+  extern void trace_hardirqs_off_prepare(void);
   extern void trace_hardirqs_on(void);
   extern void trace_hardirqs_off(void);
 # define lockdep_hardirq_context(p)	((p)->hardirq_context)
@@ -96,6 +100,8 @@ do {						\
 	  } while (0)
 
 #else
+# define trace_hardirqs_on_prepare()		do { } while (0)
+# define trace_hardirqs_off_prepare()		do { } while (0)
 # define trace_hardirqs_on()		do { } while (0)
 # define trace_hardirqs_off()		do { } while (0)
 # define lockdep_hardirq_context(p)	0
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 24cd447659e0..0125a677b67f 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -125,7 +125,7 @@ extern const char *kdb_diemsg;
 #define KDB_FLAG_NO_I8042	(1 << 7) /* No i8042 chip is available, do
 					  * not use keyboard */
 
-extern int kdb_flags;	/* Global flags, see kdb_state for per cpu state */
+extern unsigned int kdb_flags;	/* Global flags, see kdb_state for per cpu state */
 
 extern void kdb_save_flags(void);
 extern void kdb_restore_flags(void);
diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h
new file mode 100644
index 000000000000..18f3f5346843
--- /dev/null
+++ b/include/linux/keyslot-manager.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2019 Google LLC
+ */
+
+#ifndef __LINUX_KEYSLOT_MANAGER_H
+#define __LINUX_KEYSLOT_MANAGER_H
+
+#include <linux/bio.h>
+#include <linux/blk-crypto.h>
+
+struct blk_keyslot_manager;
+
+/**
+ * struct blk_ksm_ll_ops - functions to manage keyslots in hardware
+ * @keyslot_program:	Program the specified key into the specified slot in the
+ *			inline encryption hardware.
+ * @keyslot_evict:	Evict key from the specified keyslot in the hardware.
+ *			The key is provided so that e.g. dm layers can evict
+ *			keys from the devices that they map over.
+ *			Returns 0 on success, -errno otherwise.
+ *
+ * This structure should be provided by storage device drivers when they set up
+ * a keyslot manager - this structure holds the function ptrs that the keyslot
+ * manager will use to manipulate keyslots in the hardware.
+ */
+struct blk_ksm_ll_ops {
+	int (*keyslot_program)(struct blk_keyslot_manager *ksm,
+			       const struct blk_crypto_key *key,
+			       unsigned int slot);
+	int (*keyslot_evict)(struct blk_keyslot_manager *ksm,
+			     const struct blk_crypto_key *key,
+			     unsigned int slot);
+};
+
+struct blk_keyslot_manager {
+	/*
+	 * The struct blk_ksm_ll_ops that this keyslot manager will use
+	 * to perform operations like programming and evicting keys on the
+	 * device
+	 */
+	struct blk_ksm_ll_ops ksm_ll_ops;
+
+	/*
+	 * The maximum number of bytes supported for specifying the data unit
+	 * number.
+	 */
+	unsigned int max_dun_bytes_supported;
+
+	/*
+	 * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents
+	 * whether a crypto mode and data unit size are supported. The i'th
+	 * bit of crypto_mode_supported[crypto_mode] is set iff a data unit
+	 * size of (1 << i) is supported. We only support data unit sizes
+	 * that are powers of 2.
+	 */
+	unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX];
+
+	/* Device for runtime power management (NULL if none) */
+	struct device *dev;
+
+	/* Here onwards are *private* fields for internal keyslot manager use */
+
+	unsigned int num_slots;
+
+	/* Protects programming and evicting keys from the device */
+	struct rw_semaphore lock;
+
+	/* List of idle slots, with least recently used slot at front */
+	wait_queue_head_t idle_slots_wait_queue;
+	struct list_head idle_slots;
+	spinlock_t idle_slots_lock;
+
+	/*
+	 * Hash table which maps struct *blk_crypto_key to keyslots, so that we
+	 * can find a key's keyslot in O(1) time rather than O(num_slots).
+	 * Protected by 'lock'.
+	 */
+	struct hlist_head *slot_hashtable;
+	unsigned int log_slot_ht_size;
+
+	/* Per-keyslot data */
+	struct blk_ksm_keyslot *slots;
+};
+
+int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots);
+
+blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
+				      const struct blk_crypto_key *key,
+				      struct blk_ksm_keyslot **slot_ptr);
+
+unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot);
+
+void blk_ksm_put_slot(struct blk_ksm_keyslot *slot);
+
+bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
+				  const struct blk_crypto_config *cfg);
+
+int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
+		      const struct blk_crypto_key *key);
+
+void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm);
+
+void blk_ksm_destroy(struct blk_keyslot_manager *ksm);
+
+#endif /* __LINUX_KEYSLOT_MANAGER_H */
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index b072aeb1fd78..c62d76478adc 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -269,6 +269,9 @@ struct kgdb_arch {
  * @write_char: Pointer to a function that will write one char.
  * @flush: Pointer to a function that will flush any pending writes.
  * @init: Pointer to a function that will initialize the device.
+ * @deinit: Pointer to a function that will deinit the device. Implies that
+ * this I/O driver is temporary and expects to be replaced. Called when
+ * an I/O driver is replaced or explicitly unregistered.
  * @pre_exception: Pointer to a function that will do any prep work for
  * the I/O driver.
  * @post_exception: Pointer to a function that will do any cleanup work
@@ -282,6 +285,7 @@ struct kgdb_io {
 	void			(*write_char) (u8);
 	void			(*flush) (void);
 	int			(*init) (void);
+	void			(*deinit) (void);
 	void			(*pre_exception) (void);
 	void			(*post_exception) (void);
 	int			is_console;
@@ -298,7 +302,7 @@ extern bool kgdb_nmi_poll_knock(void);
 #else
 static inline int kgdb_register_nmi_console(void) { return 0; }
 static inline int kgdb_unregister_nmi_console(void) { return 0; }
-static inline bool kgdb_nmi_poll_knock(void) { return 1; }
+static inline bool kgdb_nmi_poll_knock(void) { return true; }
 #endif
 
 extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
@@ -323,7 +327,7 @@ extern void gdbstub_exit(int status);
 extern int			kgdb_single_step;
 extern atomic_t			kgdb_active;
 #define in_dbg_master() \
-	(raw_smp_processor_id() == atomic_read(&kgdb_active))
+	(irqs_disabled() && (smp_processor_id() == atomic_read(&kgdb_active)))
 extern bool dbg_is_early;
 extern void __init dbg_late_init(void);
 extern void kgdb_panic(const char *msg);
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 2e7a1e032c71..3378bcbe585e 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -25,9 +25,8 @@ enum kmsg_dump_reason {
 	KMSG_DUMP_PANIC,
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_EMERG,
-	KMSG_DUMP_RESTART,
-	KMSG_DUMP_HALT,
-	KMSG_DUMP_POWEROFF,
+	KMSG_DUMP_SHUTDOWN,
+	KMSG_DUMP_MAX
 };
 
 /**
@@ -71,6 +70,8 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper);
 int kmsg_dump_register(struct kmsg_dumper *dumper);
 
 int kmsg_dump_unregister(struct kmsg_dumper *dumper);
+
+const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason);
 #else
 static inline void kmsg_dump(enum kmsg_dump_reason reason)
 {
@@ -112,6 +113,11 @@ static inline int kmsg_dump_unregister(struct kmsg_dumper *dumper)
 {
 	return -EINVAL;
 }
+
+static inline const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
+{
+	return "Disabled";
+}
 #endif
 
 #endif /* _LINUX_KMSG_DUMP_H */
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index e2ca0a292e21..fc8d83e91379 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -7,7 +7,7 @@
  * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com>
  * Copyright (c) 2006-2008 Novell Inc.
  *
- * Please read Documentation/kobject.txt before using the kobject
+ * Please read Documentation/core-api/kobject.rst before using the kobject
  * interface, ESPECIALLY the parts about reference counts and object
  * destructors.
  */
diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
index 069aa2ebef90..2b5b64256cf4 100644
--- a/include/linux/kobject_ns.h
+++ b/include/linux/kobject_ns.h
@@ -8,7 +8,7 @@
  *
  * Split from kobject.h by David Howells (dhowells@redhat.com)
  *
- * Please read Documentation/kobject.txt before using the kobject
+ * Please read Documentation/core-api/kobject.rst before using the kobject
  * interface, ESPECIALLY the parts about reference counts and object
  * destructors.
  */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 131cc1527d68..f43b59b1294c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -23,7 +23,7 @@
 #include <linux/irqflags.h>
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
-#include <linux/swait.h>
+#include <linux/rcuwait.h>
 #include <linux/refcount.h>
 #include <linux/nospec.h>
 #include <asm/signal.h>
@@ -277,7 +277,7 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
-	struct swait_queue_head wq;
+	struct rcuwait wait;
 	struct pid __rcu *pid;
 	int sigset_active;
 	sigset_t sigset;
@@ -503,6 +503,7 @@ struct kvm {
 	struct srcu_struct srcu;
 	struct srcu_struct irq_srcu;
 	pid_t userspace_pid;
+	unsigned int max_halt_poll_ns;
 };
 
 #define kvm_err(fmt, ...) \
@@ -733,6 +734,9 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
 int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 			   void *data, unsigned long len);
+int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+				 void *data, unsigned int offset,
+				 unsigned long len);
 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
 			 int offset, int len);
 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
@@ -869,7 +873,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state);
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					struct kvm_guest_debug *dbg);
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
 
 int kvm_arch_init(void *opaque);
 void kvm_arch_exit(void);
@@ -959,12 +963,12 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
 }
 #endif
 
-static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
-	return vcpu->arch.wqp;
+	return vcpu->arch.waitp;
 #else
-	return &vcpu->wq;
+	return &vcpu->wait;
 #endif
 }
 
@@ -1133,6 +1137,11 @@ struct kvm_stats_debugfs_item {
 #define KVM_DBGFS_GET_MODE(dbgfs_item)                                         \
 	((dbgfs_item)->mode ? (dbgfs_item)->mode : 0644)
 
+#define VM_STAT(n, x, ...) 							\
+	{ n, offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ }
+#define VCPU_STAT(n, x, ...)							\
+	{ n, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ }
+
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
 
@@ -1355,6 +1364,12 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
 }
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
 
+static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot)
+{
+	return (memslot && memslot->id < KVM_USER_MEM_SLOTS &&
+		!(memslot->flags & KVM_MEMSLOT_INVALID));
+}
+
 struct kvm_vcpu *kvm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index cffa4714bfa8..af832852e620 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1092,6 +1092,7 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd,
 #define ATA_SCSI_COMPAT_IOCTL /* empty */
 #endif
 extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd);
+bool ata_scsi_dma_need_drain(struct request *rq);
 extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev,
 			    unsigned int cmd, void __user *arg);
 extern bool ata_link_online(struct ata_link *link);
@@ -1387,6 +1388,7 @@ extern struct device_attribute *ata_common_sdev_attrs[];
 	.ioctl			= ata_scsi_ioctl,		\
 	ATA_SCSI_COMPAT_IOCTL					\
 	.queuecommand		= ata_scsi_queuecmd,		\
+	.dma_need_drain		= ata_scsi_dma_need_drain,	\
 	.can_queue		= ATA_DEF_QUEUE,		\
 	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,		\
 	.this_id		= ATA_SHT_THIS_ID,		\
diff --git a/include/linux/linear_range.h b/include/linux/linear_range.h
new file mode 100644
index 000000000000..17b5943727d5
--- /dev/null
+++ b/include/linux/linear_range.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ROHM Semiconductors */
+
+#ifndef LINEAR_RANGE_H
+#define LINEAR_RANGE_H
+
+#include <linux/types.h>
+
+/**
+ * struct linear_range - table of selector - value pairs
+ *
+ * Define a lookup-table for range of values. Intended to help when looking
+ * for a register value matching certaing physical measure (like voltage).
+ * Usable when increment of one in register always results a constant increment
+ * of the physical measure (like voltage).
+ *
+ * @min:  Lowest value in range
+ * @min_sel: Lowest selector for range
+ * @max_sel: Highest selector for range
+ * @step: Value step size
+ */
+struct linear_range {
+	unsigned int min;
+	unsigned int min_sel;
+	unsigned int max_sel;
+	unsigned int step;
+};
+
+unsigned int linear_range_values_in_range(const struct linear_range *r);
+unsigned int linear_range_values_in_range_array(const struct linear_range *r,
+						int ranges);
+unsigned int linear_range_get_max_value(const struct linear_range *r);
+
+int linear_range_get_value(const struct linear_range *r, unsigned int selector,
+			   unsigned int *val);
+int linear_range_get_value_array(const struct linear_range *r, int ranges,
+				 unsigned int selector, unsigned int *val);
+int linear_range_get_selector_low(const struct linear_range *r,
+				  unsigned int val, unsigned int *selector,
+				  bool *found);
+int linear_range_get_selector_high(const struct linear_range *r,
+				   unsigned int val, unsigned int *selector,
+				   bool *found);
+int linear_range_get_selector_low_array(const struct linear_range *r,
+					int ranges, unsigned int val,
+					unsigned int *selector, bool *found);
+
+#endif
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 9280209d1f62..d796ec20d114 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -105,7 +105,7 @@
 
 /* === DEPRECATED annotations === */
 
-#ifndef CONFIG_X86
+#ifndef CONFIG_ARCH_USE_SYM_ANNOTATIONS
 #ifndef GLOBAL
 /* deprecated, use SYM_DATA*, SYM_ENTRY, or similar */
 #define GLOBAL(name) \
@@ -118,10 +118,10 @@
 #define ENTRY(name) \
 	SYM_FUNC_START(name)
 #endif
-#endif /* CONFIG_X86 */
+#endif /* CONFIG_ARCH_USE_SYM_ANNOTATIONS */
 #endif /* LINKER_SCRIPT */
 
-#ifndef CONFIG_X86
+#ifndef CONFIG_ARCH_USE_SYM_ANNOTATIONS
 #ifndef WEAK
 /* deprecated, use SYM_FUNC_START_WEAK* */
 #define WEAK(name)	   \
@@ -143,7 +143,7 @@
 #define ENDPROC(name) \
 	SYM_FUNC_END(name)
 #endif
-#endif /* CONFIG_X86 */
+#endif /* CONFIG_ARCH_USE_SYM_ANNOTATIONS */
 
 /* === generic annotations === */
 
diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
new file mode 100644
index 000000000000..e55010fa7329
--- /dev/null
+++ b/include/linux/local_lock.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_LOCAL_LOCK_H
+#define _LINUX_LOCAL_LOCK_H
+
+#include <linux/local_lock_internal.h>
+
+/**
+ * local_lock_init - Runtime initialize a lock instance
+ */
+#define local_lock_init(lock)		__local_lock_init(lock)
+
+/**
+ * local_lock - Acquire a per CPU local lock
+ * @lock:	The lock variable
+ */
+#define local_lock(lock)		__local_lock(lock)
+
+/**
+ * local_lock_irq - Acquire a per CPU local lock and disable interrupts
+ * @lock:	The lock variable
+ */
+#define local_lock_irq(lock)		__local_lock_irq(lock)
+
+/**
+ * local_lock_irqsave - Acquire a per CPU local lock, save and disable
+ *			 interrupts
+ * @lock:	The lock variable
+ * @flags:	Storage for interrupt flags
+ */
+#define local_lock_irqsave(lock, flags)				\
+	__local_lock_irqsave(lock, flags)
+
+/**
+ * local_unlock - Release a per CPU local lock
+ * @lock:	The lock variable
+ */
+#define local_unlock(lock)		__local_unlock(lock)
+
+/**
+ * local_unlock_irq - Release a per CPU local lock and enable interrupts
+ * @lock:	The lock variable
+ */
+#define local_unlock_irq(lock)		__local_unlock_irq(lock)
+
+/**
+ * local_unlock_irqrestore - Release a per CPU local lock and restore
+ *			      interrupt flags
+ * @lock:	The lock variable
+ * @flags:      Interrupt flags to restore
+ */
+#define local_unlock_irqrestore(lock, flags)			\
+	__local_unlock_irqrestore(lock, flags)
+
+#endif
diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
new file mode 100644
index 000000000000..4a8795b21d77
--- /dev/null
+++ b/include/linux/local_lock_internal.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_LOCAL_LOCK_H
+# error "Do not include directly, include linux/local_lock.h"
+#endif
+
+#include <linux/percpu-defs.h>
+#include <linux/lockdep.h>
+
+typedef struct {
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+	struct task_struct	*owner;
+#endif
+} local_lock_t;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define LL_DEP_MAP_INIT(lockname)			\
+	.dep_map = {					\
+		.name = #lockname,			\
+		.wait_type_inner = LD_WAIT_CONFIG,	\
+	}
+#else
+# define LL_DEP_MAP_INIT(lockname)
+#endif
+
+#define INIT_LOCAL_LOCK(lockname)	{ LL_DEP_MAP_INIT(lockname) }
+
+#define __local_lock_init(lock)					\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));\
+	lockdep_init_map_wait(&(lock)->dep_map, #lock, &__key, 0, LD_WAIT_CONFIG);\
+} while (0)
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline void local_lock_acquire(local_lock_t *l)
+{
+	lock_map_acquire(&l->dep_map);
+	DEBUG_LOCKS_WARN_ON(l->owner);
+	l->owner = current;
+}
+
+static inline void local_lock_release(local_lock_t *l)
+{
+	DEBUG_LOCKS_WARN_ON(l->owner != current);
+	l->owner = NULL;
+	lock_map_release(&l->dep_map);
+}
+
+#else /* CONFIG_DEBUG_LOCK_ALLOC */
+static inline void local_lock_acquire(local_lock_t *l) { }
+static inline void local_lock_release(local_lock_t *l) { }
+#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
+
+#define __local_lock(lock)					\
+	do {							\
+		preempt_disable();				\
+		local_lock_acquire(this_cpu_ptr(lock));		\
+	} while (0)
+
+#define __local_lock_irq(lock)					\
+	do {							\
+		local_irq_disable();				\
+		local_lock_acquire(this_cpu_ptr(lock));		\
+	} while (0)
+
+#define __local_lock_irqsave(lock, flags)			\
+	do {							\
+		local_irq_save(flags);				\
+		local_lock_acquire(this_cpu_ptr(lock));		\
+	} while (0)
+
+#define __local_unlock(lock)					\
+	do {							\
+		local_lock_release(this_cpu_ptr(lock));		\
+		preempt_enable();				\
+	} while (0)
+
+#define __local_unlock_irq(lock)				\
+	do {							\
+		local_lock_release(this_cpu_ptr(lock));		\
+		local_irq_enable();				\
+	} while (0)
+
+#define __local_unlock_irqrestore(lock, flags)			\
+	do {							\
+		local_lock_release(this_cpu_ptr(lock));		\
+		local_irq_restore(flags);			\
+	} while (0)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 206774ac6946..8fce5c98a4b0 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -308,8 +308,27 @@ extern void lockdep_set_selftest_task(struct task_struct *task);
 
 extern void lockdep_init_task(struct task_struct *task);
 
-extern void lockdep_off(void);
-extern void lockdep_on(void);
+/*
+ * Split the recrursion counter in two to readily detect 'off' vs recursion.
+ */
+#define LOCKDEP_RECURSION_BITS	16
+#define LOCKDEP_OFF		(1U << LOCKDEP_RECURSION_BITS)
+#define LOCKDEP_RECURSION_MASK	(LOCKDEP_OFF - 1)
+
+/*
+ * lockdep_{off,on}() are macros to avoid tracing and kprobes; not inlines due
+ * to header dependencies.
+ */
+
+#define lockdep_off()					\
+do {							\
+	current->lockdep_recursion += LOCKDEP_OFF;	\
+} while (0)
+
+#define lockdep_on()					\
+do {							\
+	current->lockdep_recursion -= LOCKDEP_OFF;	\
+} while (0)
 
 extern void lockdep_register_key(struct lock_class_key *key);
 extern void lockdep_unregister_key(struct lock_class_key *key);
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 988ca0df7824..44d5422c18e4 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -77,7 +77,7 @@
  *	state.  This is called immediately after commit_creds().
  *
  * Security hooks for mount using fs_context.
- *	[See also Documentation/filesystems/mount_api.txt]
+ *	[See also Documentation/filesystems/mount_api.rst]
  *
  * @fs_context_dup:
  *	Allocate and attach a security structure to sc->security.  This pointer
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 977edd3b7bd8..bfe9533bb67e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -45,6 +45,7 @@ enum memcg_memory_event {
 	MEMCG_MAX,
 	MEMCG_OOM,
 	MEMCG_OOM_KILL,
+	MEMCG_SWAP_HIGH,
 	MEMCG_SWAP_MAX,
 	MEMCG_SWAP_FAIL,
 	MEMCG_NR_MEMORY_EVENTS,
@@ -215,9 +216,6 @@ struct mem_cgroup {
 	struct page_counter kmem;
 	struct page_counter tcpmem;
 
-	/* Upper bound of normal memory consumption range */
-	unsigned long high;
-
 	/* Range enforcement for interrupt charges */
 	struct work_struct high_work;
 
diff --git a/include/linux/memstick.h b/include/linux/memstick.h
index 216a713bef7f..da4c65f9435f 100644
--- a/include/linux/memstick.h
+++ b/include/linux/memstick.h
@@ -288,7 +288,7 @@ struct memstick_host {
 	int                 (*set_param)(struct memstick_host *host,
 					 enum memstick_param param,
 					 int value);
-	unsigned long       private[0] ____cacheline_aligned;
+	unsigned long       private[] ____cacheline_aligned;
 };
 
 struct memstick_driver {
diff --git a/include/linux/mfd/gsc.h b/include/linux/mfd/gsc.h
new file mode 100644
index 000000000000..6bd639c285b4
--- /dev/null
+++ b/include/linux/mfd/gsc.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2020 Gateworks Corporation
+ */
+#ifndef __LINUX_MFD_GSC_H_
+#define __LINUX_MFD_GSC_H_
+
+#include <linux/regmap.h>
+
+/* Device Addresses */
+#define GSC_MISC	0x20
+#define GSC_UPDATE	0x21
+#define GSC_GPIO	0x23
+#define GSC_HWMON	0x29
+#define GSC_EEPROM0	0x50
+#define GSC_EEPROM1	0x51
+#define GSC_EEPROM2	0x52
+#define GSC_EEPROM3	0x53
+#define GSC_RTC		0x68
+
+/* Register offsets */
+enum {
+	GSC_CTRL_0	= 0x00,
+	GSC_CTRL_1	= 0x01,
+	GSC_TIME	= 0x02,
+	GSC_TIME_ADD	= 0x06,
+	GSC_IRQ_STATUS	= 0x0A,
+	GSC_IRQ_ENABLE	= 0x0B,
+	GSC_FW_CRC	= 0x0C,
+	GSC_FW_VER	= 0x0E,
+	GSC_WP		= 0x0F,
+};
+
+/* Bit definitions */
+#define GSC_CTRL_0_PB_HARD_RESET	0
+#define GSC_CTRL_0_PB_CLEAR_SECURE_KEY	1
+#define GSC_CTRL_0_PB_SOFT_POWER_DOWN	2
+#define GSC_CTRL_0_PB_BOOT_ALTERNATE	3
+#define GSC_CTRL_0_PERFORM_CRC		4
+#define GSC_CTRL_0_TAMPER_DETECT	5
+#define GSC_CTRL_0_SWITCH_HOLD		6
+
+#define GSC_CTRL_1_SLEEP_ENABLE		0
+#define GSC_CTRL_1_SLEEP_ACTIVATE	1
+#define GSC_CTRL_1_SLEEP_ADD		2
+#define GSC_CTRL_1_SLEEP_NOWAKEPB	3
+#define GSC_CTRL_1_WDT_TIME		4
+#define GSC_CTRL_1_WDT_ENABLE		5
+#define GSC_CTRL_1_SWITCH_BOOT_ENABLE	6
+#define GSC_CTRL_1_SWITCH_BOOT_CLEAR	7
+
+#define GSC_IRQ_PB			0
+#define GSC_IRQ_KEY_ERASED		1
+#define GSC_IRQ_EEPROM_WP		2
+#define GSC_IRQ_RESV			3
+#define GSC_IRQ_GPIO			4
+#define GSC_IRQ_TAMPER			5
+#define GSC_IRQ_WDT_TIMEOUT		6
+#define GSC_IRQ_SWITCH_HOLD		7
+
+int gsc_read(void *context, unsigned int reg, unsigned int *val);
+int gsc_write(void *context, unsigned int reg, unsigned int val);
+
+struct gsc_dev {
+	struct device *dev;
+
+	struct i2c_client *i2c;		/* 0x20: interrupt controller, WDT */
+	struct i2c_client *i2c_hwmon;	/* 0x29: hwmon, fan controller */
+
+	struct regmap *regmap;
+
+	unsigned int fwver;
+	unsigned short fwcrc;
+};
+
+#endif /* __LINUX_MFD_GSC_H_ */
diff --git a/include/linux/mfd/intel_pmc_bxt.h b/include/linux/mfd/intel_pmc_bxt.h
new file mode 100644
index 000000000000..f51a43d25ffd
--- /dev/null
+++ b/include/linux/mfd/intel_pmc_bxt.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef MFD_INTEL_PMC_BXT_H
+#define MFD_INTEL_PMC_BXT_H
+
+/* GCR reg offsets from GCR base */
+#define PMC_GCR_PMC_CFG_REG		0x08
+#define PMC_GCR_TELEM_DEEP_S0IX_REG	0x78
+#define PMC_GCR_TELEM_SHLW_S0IX_REG	0x80
+
+/* PMC_CFG_REG bit masks */
+#define PMC_CFG_NO_REBOOT_EN		BIT(4)
+
+/**
+ * struct intel_pmc_dev - Intel PMC device structure
+ * @dev: Pointer to the parent PMC device
+ * @scu: Pointer to the SCU IPC device data structure
+ * @gcr_mem_base: Virtual base address of GCR (Global Configuration Registers)
+ * @gcr_lock: Lock used to serialize access to GCR registers
+ * @telem_base: Pointer to telemetry SSRAM base resource or %NULL if not
+ *		available
+ */
+struct intel_pmc_dev {
+	struct device *dev;
+	struct intel_scu_ipc_dev *scu;
+	void __iomem *gcr_mem_base;
+	spinlock_t gcr_lock;
+	struct resource *telem_base;
+};
+
+#if IS_ENABLED(CONFIG_MFD_INTEL_PMC_BXT)
+int intel_pmc_gcr_read64(struct intel_pmc_dev *pmc, u32 offset, u64 *data);
+int intel_pmc_gcr_update(struct intel_pmc_dev *pmc, u32 offset, u32 mask, u32 val);
+int intel_pmc_s0ix_counter_read(struct intel_pmc_dev *pmc, u64 *data);
+#else
+static inline int intel_pmc_gcr_read64(struct intel_pmc_dev *pmc, u32 offset,
+				       u64 *data)
+{
+	return -ENOTSUPP;
+}
+
+static inline int intel_pmc_gcr_update(struct intel_pmc_dev *pmc, u32 offset,
+				       u32 mask, u32 val)
+{
+	return -ENOTSUPP;
+}
+
+static inline int intel_pmc_s0ix_counter_read(struct intel_pmc_dev *pmc, u64 *data)
+{
+	return -ENOTSUPP;
+}
+#endif
+
+#endif /* MFD_INTEL_PMC_BXT_H */
diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h
index bfecd6bd4990..6a88e34cb955 100644
--- a/include/linux/mfd/intel_soc_pmic.h
+++ b/include/linux/mfd/intel_soc_pmic.h
@@ -13,6 +13,20 @@
 
 #include <linux/regmap.h>
 
+/**
+ * struct intel_soc_pmic - Intel SoC PMIC data
+ * @irq: Master interrupt number of the parent PMIC device
+ * @regmap: Pointer to the parent PMIC device regmap structure
+ * @irq_chip_data: IRQ chip data for the PMIC itself
+ * @irq_chip_data_pwrbtn: Chained IRQ chip data for the Power Button
+ * @irq_chip_data_tmu: Chained IRQ chip data for the Time Management Unit
+ * @irq_chip_data_bcu: Chained IRQ chip data for the Burst Control Unit
+ * @irq_chip_data_adc: Chained IRQ chip data for the General Purpose ADC
+ * @irq_chip_data_chgr: Chained IRQ chip data for the External Charger
+ * @irq_chip_data_crit: Chained IRQ chip data for the Critical Event Handler
+ * @dev: Pointer to the parent PMIC device
+ * @scu: Pointer to the SCU IPC device data structure
+ */
 struct intel_soc_pmic {
 	int irq;
 	struct regmap *regmap;
@@ -24,6 +38,7 @@ struct intel_soc_pmic {
 	struct regmap_irq_chip_data *irq_chip_data_chgr;
 	struct regmap_irq_chip_data *irq_chip_data_crit;
 	struct device *dev;
+	struct intel_scu_ipc_dev *scu;
 };
 
 int intel_soc_pmic_exec_mipi_pmic_seq_element(u16 i2c_address, u32 reg_address,
diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h
index 061af220dcd3..79c020bd0c70 100644
--- a/include/linux/mfd/max8998.h
+++ b/include/linux/mfd/max8998.h
@@ -39,6 +39,7 @@ enum {
 	MAX8998_ENVICHG,
 	MAX8998_ESAFEOUT1,
 	MAX8998_ESAFEOUT2,
+	MAX8998_CHARGER,
 };
 
 /**
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7ad23d28c87d..59adb47efc55 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -325,17 +325,13 @@ extern unsigned int kobjsize(const void *objp);
 #elif defined(CONFIG_SPARC64)
 # define VM_SPARC_ADI	VM_ARCH_1	/* Uses ADI tag for access control */
 # define VM_ARCH_CLEAR	VM_SPARC_ADI
+#elif defined(CONFIG_ARM64)
+# define VM_ARM64_BTI	VM_ARCH_1	/* BTI guarded page, a.k.a. GP bit */
+# define VM_ARCH_CLEAR	VM_ARM64_BTI
 #elif !defined(CONFIG_MMU)
 # define VM_MAPPED_COPY	VM_ARCH_1	/* T if mapped copy of data (nommu mmap) */
 #endif
 
-#if defined(CONFIG_X86_INTEL_MPX)
-/* MPX specific bounds table or bounds directory */
-# define VM_MPX		VM_HIGH_ARCH_4
-#else
-# define VM_MPX		VM_NONE
-#endif
-
 #ifndef VM_GROWSUP
 # define VM_GROWSUP	VM_NONE
 #endif
@@ -1230,7 +1226,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages);
  * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS
  * scheme).
  *
- * For more information, please see Documentation/vm/pin_user_pages.rst.
+ * For more information, please see Documentation/core-api/pin_user_pages.rst.
  *
  * @page:	pointer to page to be queried.
  * @Return:	True, if it is likely that the page has been "dma-pinned".
@@ -1713,6 +1709,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages, int *locked);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    struct page **pages, unsigned int gup_flags);
+long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
+		    struct page **pages, unsigned int gup_flags);
 
 int get_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages);
@@ -2089,13 +2087,54 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
 	return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
 		NULL : pud_offset(p4d, address);
 }
+
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+				     unsigned long address,
+				     pgtbl_mod_mask *mod_mask)
+
+{
+	if (unlikely(pgd_none(*pgd))) {
+		if (__p4d_alloc(mm, pgd, address))
+			return NULL;
+		*mod_mask |= PGTBL_PGD_MODIFIED;
+	}
+
+	return p4d_offset(pgd, address);
+}
+
 #endif /* !__ARCH_HAS_5LEVEL_HACK */
 
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+				     unsigned long address,
+				     pgtbl_mod_mask *mod_mask)
+{
+	if (unlikely(p4d_none(*p4d))) {
+		if (__pud_alloc(mm, p4d, address))
+			return NULL;
+		*mod_mask |= PGTBL_P4D_MODIFIED;
+	}
+
+	return pud_offset(p4d, address);
+}
+
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 {
 	return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
 		NULL: pmd_offset(pud, address);
 }
+
+static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
+				     unsigned long address,
+				     pgtbl_mod_mask *mod_mask)
+{
+	if (unlikely(pud_none(*pud))) {
+		if (__pmd_alloc(mm, pud, address))
+			return NULL;
+		*mod_mask |= PGTBL_PUD_MODIFIED;
+	}
+
+	return pmd_offset(pud, address);
+}
 #endif /* CONFIG_MMU */
 
 #if USE_SPLIT_PTE_PTLOCKS
@@ -2211,6 +2250,11 @@ static inline void pgtable_pte_page_dtor(struct page *page)
 	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
 		NULL: pte_offset_kernel(pmd, address))
 
+#define pte_alloc_kernel_track(pmd, address, mask)			\
+	((unlikely(pmd_none(*(pmd))) &&					\
+	  (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
+		NULL: pte_offset_kernel(pmd, address))
+
 #if USE_SPLIT_PMD_PTLOCKS
 
 static struct page *pmd_to_page(pmd_t *pmd)
@@ -2612,25 +2656,6 @@ extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
 int __must_check write_one_page(struct page *page);
 void task_dirty_inc(struct task_struct *tsk);
 
-/* readahead.c */
-#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
-
-int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
-			pgoff_t offset, unsigned long nr_to_read);
-
-void page_cache_sync_readahead(struct address_space *mapping,
-			       struct file_ra_state *ra,
-			       struct file *filp,
-			       pgoff_t offset,
-			       unsigned long size);
-
-void page_cache_async_readahead(struct address_space *mapping,
-				struct file_ra_state *ra,
-				struct file *filp,
-				struct page *pg,
-				pgoff_t offset,
-				unsigned long size);
-
 extern unsigned long stack_guard_gap;
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
@@ -2845,7 +2870,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
  * releasing pages: get_user_pages*() pages must be released via put_page(),
  * while pin_user_pages*() pages must be released via unpin_user_page().
  *
- * Please see Documentation/vm/pin_user_pages.rst for more information.
+ * Please see Documentation/core-api/pin_user_pages.rst for more information.
  */
 
 static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
@@ -3023,6 +3048,7 @@ enum mf_flags {
 };
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue(unsigned long pfn, int flags);
+extern void memory_failure_queue_kick(int cpu);
 extern int unpoison_memory(unsigned long pfn);
 extern int get_hwpoison_page(struct page *page);
 #define put_hwpoison_page(page)	put_page(page)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4aba6c0c2ba8..ef6d3aface8a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -240,7 +240,11 @@ static inline atomic_t *compound_pincount_ptr(struct page *page)
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 
 #define page_private(page)		((page)->private)
-#define set_page_private(page, v)	((page)->private = (v))
+
+static inline void set_page_private(struct page *page, unsigned long private)
+{
+	page->private = private;
+}
 
 struct page_frag_cache {
 	void * va;
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index cf3780a6ccc4..7d46411ffaa2 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -48,6 +48,7 @@ struct mmc_ext_csd {
 	u8			sec_feature_support;
 	u8			rel_sectors;
 	u8			rel_param;
+	bool			enhanced_rpmb_supported;
 	u8			part_config;
 	u8			cache_ctrl;
 	u8			rst_n_function;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index c318fb5b6a94..7149bab555d7 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -92,6 +92,9 @@ struct mmc_host_ops {
 			    int err);
 	void	(*pre_req)(struct mmc_host *host, struct mmc_request *req);
 	void	(*request)(struct mmc_host *host, struct mmc_request *req);
+	/* Submit one request to host in atomic context. */
+	int	(*request_atomic)(struct mmc_host *host,
+				  struct mmc_request *req);
 
 	/*
 	 * Avoid calling the next three functions too often or in a "fast
@@ -318,7 +321,6 @@ struct mmc_host {
 #define MMC_CAP_AGGRESSIVE_PM	(1 << 7)	/* Suspend (e)MMC/SD at idle  */
 #define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
 #define MMC_CAP_WAIT_WHILE_BUSY	(1 << 9)	/* Waits while card is busy */
-#define MMC_CAP_ERASE		(1 << 10)	/* Allow erase/trim commands */
 #define MMC_CAP_3_3V_DDR	(1 << 11)	/* Host supports eMMC DDR 3.3V */
 #define MMC_CAP_1_8V_DDR	(1 << 12)	/* Host supports eMMC DDR 1.8V */
 #define MMC_CAP_1_2V_DDR	(1 << 13)	/* Host supports eMMC DDR 1.2V */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 4b85ef05a906..d9a65c6a8816 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -325,6 +325,7 @@ static inline bool mmc_ready_for_data(u32 status)
  */
 
 #define EXT_CSD_WR_REL_PARAM_EN		(1<<2)
+#define EXT_CSD_WR_REL_PARAM_EN_RPMB_REL_WR	(1<<4)
 
 #define EXT_CSD_BOOT_WP_B_PWR_WP_DIS	(0x40)
 #define EXT_CSD_BOOT_WP_B_PERM_WP_DIS	(0x10)
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 2e9a6e4634eb..15ed8ce9d394 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -24,59 +24,101 @@
 /*
  * Vendors and devices.  Sort key: vendor first, device next.
  */
+
+#define SDIO_VENDOR_ID_STE			0x0020
+#define SDIO_DEVICE_ID_STE_CW1200		0x2280
+
+#define SDIO_VENDOR_ID_INTEL			0x0089
+#define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX	0x1402
+#define SDIO_DEVICE_ID_INTEL_IWMC3200WIFI	0x1403
+#define SDIO_DEVICE_ID_INTEL_IWMC3200TOP	0x1404
+#define SDIO_DEVICE_ID_INTEL_IWMC3200GPS	0x1405
+#define SDIO_DEVICE_ID_INTEL_IWMC3200BT		0x1406
+#define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX_2G5	0x1407
+
+#define SDIO_VENDOR_ID_CGUYS			0x0092
+#define SDIO_DEVICE_ID_CGUYS_EW_CG1102GC	0x0004
+
+#define SDIO_VENDOR_ID_TI			0x0097
+#define SDIO_DEVICE_ID_TI_WL1271		0x4076
+
+#define SDIO_VENDOR_ID_ATHEROS			0x0271
+#define SDIO_DEVICE_ID_ATHEROS_AR6003_00	0x0300
+#define SDIO_DEVICE_ID_ATHEROS_AR6003_01	0x0301
+#define SDIO_DEVICE_ID_ATHEROS_AR6004_00	0x0400
+#define SDIO_DEVICE_ID_ATHEROS_AR6004_01	0x0401
+#define SDIO_DEVICE_ID_ATHEROS_AR6004_02	0x0402
+#define SDIO_DEVICE_ID_ATHEROS_AR6004_18	0x0418
+#define SDIO_DEVICE_ID_ATHEROS_AR6004_19	0x0419
+#define SDIO_DEVICE_ID_ATHEROS_AR6005		0x050A
+#define SDIO_DEVICE_ID_ATHEROS_QCA9377		0x0701
+
 #define SDIO_VENDOR_ID_BROADCOM			0x02d0
-#define SDIO_DEVICE_ID_BROADCOM_43143		0xa887
+#define SDIO_DEVICE_ID_BROADCOM_NINTENDO_WII	0x044b
 #define SDIO_DEVICE_ID_BROADCOM_43241		0x4324
 #define SDIO_DEVICE_ID_BROADCOM_4329		0x4329
 #define SDIO_DEVICE_ID_BROADCOM_4330		0x4330
 #define SDIO_DEVICE_ID_BROADCOM_4334		0x4334
-#define SDIO_DEVICE_ID_BROADCOM_43340		0xa94c
-#define SDIO_DEVICE_ID_BROADCOM_43341		0xa94d
 #define SDIO_DEVICE_ID_BROADCOM_4335_4339	0x4335
 #define SDIO_DEVICE_ID_BROADCOM_4339		0x4339
-#define SDIO_DEVICE_ID_BROADCOM_43362		0xa962
-#define SDIO_DEVICE_ID_BROADCOM_43364		0xa9a4
-#define SDIO_DEVICE_ID_BROADCOM_43430		0xa9a6
 #define SDIO_DEVICE_ID_BROADCOM_4345		0x4345
-#define SDIO_DEVICE_ID_BROADCOM_43455		0xa9bf
 #define SDIO_DEVICE_ID_BROADCOM_4354		0x4354
+#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_89359	0x4355
 #define SDIO_DEVICE_ID_BROADCOM_4356		0x4356
 #define SDIO_DEVICE_ID_BROADCOM_4359		0x4359
-#define SDIO_DEVICE_ID_CYPRESS_4373		0x4373
-#define SDIO_DEVICE_ID_CYPRESS_43012		43012
-#define SDIO_DEVICE_ID_CYPRESS_89359		0x4355
-
-#define SDIO_VENDOR_ID_INTEL			0x0089
-#define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX	0x1402
-#define SDIO_DEVICE_ID_INTEL_IWMC3200WIFI	0x1403
-#define SDIO_DEVICE_ID_INTEL_IWMC3200TOP	0x1404
-#define SDIO_DEVICE_ID_INTEL_IWMC3200GPS	0x1405
-#define SDIO_DEVICE_ID_INTEL_IWMC3200BT		0x1406
-#define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX_2G5	0x1407
+#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_4373	0x4373
+#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43012	0xa804
+#define SDIO_DEVICE_ID_BROADCOM_43143		0xa887
+#define SDIO_DEVICE_ID_BROADCOM_43340		0xa94c
+#define SDIO_DEVICE_ID_BROADCOM_43341		0xa94d
+#define SDIO_DEVICE_ID_BROADCOM_43362		0xa962
+#define SDIO_DEVICE_ID_BROADCOM_43364		0xa9a4
+#define SDIO_DEVICE_ID_BROADCOM_43430		0xa9a6
+#define SDIO_DEVICE_ID_BROADCOM_43455		0xa9bf
 
 #define SDIO_VENDOR_ID_MARVELL			0x02df
 #define SDIO_DEVICE_ID_MARVELL_LIBERTAS		0x9103
-#define SDIO_DEVICE_ID_MARVELL_8688WLAN		0x9104
-#define SDIO_DEVICE_ID_MARVELL_8688BT		0x9105
+#define SDIO_DEVICE_ID_MARVELL_8688_WLAN	0x9104
+#define SDIO_DEVICE_ID_MARVELL_8688_BT		0x9105
+#define SDIO_DEVICE_ID_MARVELL_8786_WLAN	0x9116
+#define SDIO_DEVICE_ID_MARVELL_8787_WLAN	0x9119
+#define SDIO_DEVICE_ID_MARVELL_8787_BT		0x911a
+#define SDIO_DEVICE_ID_MARVELL_8787_BT_AMP	0x911b
 #define SDIO_DEVICE_ID_MARVELL_8797_F0		0x9128
-#define SDIO_DEVICE_ID_MARVELL_8887WLAN	0x9134
+#define SDIO_DEVICE_ID_MARVELL_8797_WLAN	0x9129
+#define SDIO_DEVICE_ID_MARVELL_8797_BT		0x912a
+#define SDIO_DEVICE_ID_MARVELL_8897_WLAN	0x912d
+#define SDIO_DEVICE_ID_MARVELL_8897_BT		0x912e
+#define SDIO_DEVICE_ID_MARVELL_8887_F0		0x9134
+#define SDIO_DEVICE_ID_MARVELL_8887_WLAN	0x9135
+#define SDIO_DEVICE_ID_MARVELL_8887_BT		0x9136
+#define SDIO_DEVICE_ID_MARVELL_8801_WLAN	0x9139
+#define SDIO_DEVICE_ID_MARVELL_8997_F0		0x9140
+#define SDIO_DEVICE_ID_MARVELL_8997_WLAN	0x9141
+#define SDIO_DEVICE_ID_MARVELL_8997_BT		0x9142
+#define SDIO_DEVICE_ID_MARVELL_8977_WLAN	0x9145
+#define SDIO_DEVICE_ID_MARVELL_8977_BT		0x9146
+#define SDIO_DEVICE_ID_MARVELL_8987_WLAN	0x9149
+#define SDIO_DEVICE_ID_MARVELL_8987_BT		0x914a
 
 #define SDIO_VENDOR_ID_MEDIATEK			0x037a
+#define SDIO_DEVICE_ID_MEDIATEK_MT7663		0x7663
+#define SDIO_DEVICE_ID_MEDIATEK_MT7668		0x7668
 
 #define SDIO_VENDOR_ID_SIANO			0x039a
 #define SDIO_DEVICE_ID_SIANO_NOVA_B0		0x0201
 #define SDIO_DEVICE_ID_SIANO_NICE		0x0202
 #define SDIO_DEVICE_ID_SIANO_VEGA_A0		0x0300
 #define SDIO_DEVICE_ID_SIANO_VENICE		0x0301
+#define SDIO_DEVICE_ID_SIANO_MING		0x0302
+#define SDIO_DEVICE_ID_SIANO_PELE		0x0500
+#define SDIO_DEVICE_ID_SIANO_RIO		0x0600
+#define SDIO_DEVICE_ID_SIANO_DENVER_2160	0x0700
+#define SDIO_DEVICE_ID_SIANO_DENVER_1530	0x0800
 #define SDIO_DEVICE_ID_SIANO_NOVA_A0		0x1100
 #define SDIO_DEVICE_ID_SIANO_STELLAR 		0x5347
 
-#define SDIO_VENDOR_ID_TI			0x0097
-#define SDIO_DEVICE_ID_TI_WL1271		0x4076
 #define SDIO_VENDOR_ID_TI_WL1251		0x104c
 #define SDIO_DEVICE_ID_TI_WL1251		0x9066
 
-#define SDIO_VENDOR_ID_STE			0x0020
-#define SDIO_DEVICE_ID_STE_CW1200		0x2280
-
 #endif /* LINUX_MMC_SDIO_IDS_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 93cf20f41e26..f5b6ccf41141 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -156,6 +156,9 @@ enum zone_stat_item {
 	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
 	NR_PAGETABLE,		/* used for pagetables */
 	NR_KERNEL_STACK_KB,	/* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+	NR_KERNEL_SCS_KB,	/* measured in KiB */
+#endif
 	/* Second 128 byte cacheline */
 	NR_BOUNCE,
 #if IS_ENABLED(CONFIG_ZSMALLOC)
@@ -193,7 +196,6 @@ enum node_stat_item {
 	NR_FILE_THPS,
 	NR_FILE_PMDMAPPED,
 	NR_ANON_THPS,
-	NR_UNSTABLE_NFS,	/* NFS unstable pages */
 	NR_VMSCAN_WRITE,
 	NR_VMSCAN_IMMEDIATE,	/* Prioritise for reclaim when writeback ends */
 	NR_DIRTIED,		/* page dirtyings since bootup */
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 35942084cd40..8f882f5881e8 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -6,10 +6,12 @@
 struct mnt_namespace;
 struct fs_struct;
 struct user_namespace;
+struct ns_common;
 
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct user_namespace *, struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
+extern struct ns_common *from_mnt_ns(struct mnt_namespace *);
 
 extern const struct file_operations proc_mounts_operations;
 extern const struct file_operations proc_mountinfo_operations;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 4c2ddd0941a7..580e554fb6dc 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -434,7 +434,7 @@ struct virtio_device_id {
  * For Hyper-V devices we use the device guid as the id.
  */
 struct hv_vmbus_device_id {
-	uuid_le guid;
+	guid_t guid;
 	kernel_ulong_t driver_data;	/* Data private to the driver */
 };
 
@@ -663,6 +663,7 @@ struct x86_cpu_id {
 	__u16 vendor;
 	__u16 family;
 	__u16 model;
+	__u16 steppings;
 	__u16 feature;	/* bit index */
 	kernel_ulong_t driver_data;
 };
@@ -671,6 +672,7 @@ struct x86_cpu_id {
 #define X86_VENDOR_ANY 0xffff
 #define X86_FAMILY_ANY 0
 #define X86_MODEL_ANY  0
+#define X86_STEPPING_ANY 0
 #define X86_FEATURE_ANY 0	/* Same as FPU, you can't test for that */
 
 /*
diff --git a/include/linux/module.h b/include/linux/module.h
index 1ad393e62bef..d849d06e4d44 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -458,6 +458,8 @@ struct module {
 	void __percpu *percpu;
 	unsigned int percpu_size;
 #endif
+	void *noinstr_text_start;
+	unsigned int noinstr_text_size;
 
 #ifdef CONFIG_TRACEPOINTS
 	unsigned int num_tracepoints;
@@ -489,6 +491,12 @@ struct module {
 	unsigned int num_ftrace_callsites;
 	unsigned long *ftrace_callsites;
 #endif
+#ifdef CONFIG_KPROBES
+	void *kprobes_text_start;
+	unsigned int kprobes_text_size;
+	unsigned long *kprobe_blacklist;
+	unsigned int num_kprobe_blacklist;
+#endif
 
 #ifdef CONFIG_LIVEPATCH
 	bool klp; /* Is this a livepatch module? */
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index ca92aea8a6bd..4fa67a8b2265 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -29,6 +29,11 @@ void *module_alloc(unsigned long size);
 /* Free memory returned from module_alloc. */
 void module_memfree(void *module_region);
 
+/* Determines if the section name is an init section (that is only used during
+ * module loading).
+ */
+bool module_init_section(const char *name);
+
 /* Determines if the section name is an exit section (that is only used during
  * module unloading)
  */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index bf8cc4108b8f..7edac8c7a9c1 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -50,7 +50,8 @@ struct fs_context;
 #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
 
 #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
-			    MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
+			    MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \
+			    MNT_CURSOR)
 
 #define MNT_INTERNAL	0x4000
 
@@ -64,6 +65,7 @@ struct fs_context;
 #define MNT_SYNC_UMOUNT		0x2000000
 #define MNT_MARKED		0x4000000
 #define MNT_UMOUNT		0x8000000
+#define MNT_CURSOR		0x10000000
 
 struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
diff --git a/include/linux/mpage.h b/include/linux/mpage.h
index 001f1fcf9836..f4f5e90a6844 100644
--- a/include/linux/mpage.h
+++ b/include/linux/mpage.h
@@ -13,9 +13,9 @@
 #ifdef CONFIG_BLOCK
 
 struct writeback_control;
+struct readahead_control;
 
-int mpage_readpages(struct address_space *mapping, struct list_head *pages,
-				unsigned nr_pages, get_block_t get_block);
+void mpage_readahead(struct readahead_control *, get_block_t get_block);
 int mpage_readpage(struct page *page, get_block_t get_block);
 int mpage_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, get_block_t get_block);
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 074f395b9ad2..cdb171efc7cb 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -42,6 +42,30 @@ struct nsproxy {
 extern struct nsproxy init_nsproxy;
 
 /*
+ * A structure to encompass all bits needed to install
+ * a partial or complete new set of namespaces.
+ *
+ * If a new user namespace is requested cred will
+ * point to a modifiable set of credentials. If a pointer
+ * to a modifiable set is needed nsset_cred() must be
+ * used and tested.
+ */
+struct nsset {
+	unsigned flags;
+	struct nsproxy *nsproxy;
+	struct fs_struct *fs;
+	const struct cred *cred;
+};
+
+static inline struct cred *nsset_cred(struct nsset *set)
+{
+	if (set->flags & CLONE_NEWUSER)
+		return (struct cred *)set->cred;
+
+	return NULL;
+}
+
+/*
  * the namespaces access rules are:
  *
  *  1. only current task is allowed to change tsk->nsproxy pointer or
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 10f81629b9ce..41e7795a3ee4 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -10,47 +10,26 @@
 
 
 /*
- * **********************  LLDD FC-NVME Host API ********************
+ * **********************  FC-NVME LS API ********************
  *
- *  For FC LLDD's that are the NVME Host role.
+ *  Data structures used by both FC-NVME hosts and FC-NVME
+ *  targets to perform FC-NVME LS requests or transmit
+ *  responses.
  *
- * ******************************************************************
+ * ***********************************************************
  */
 
-
-
 /**
- * struct nvme_fc_port_info - port-specific ids and FC connection-specific
- *                            data element used during NVME Host role
- *                            registrations
- *
- * Static fields describing the port being registered:
- * @node_name: FC WWNN for the port
- * @port_name: FC WWPN for the port
- * @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx)
- * @dev_loss_tmo: maximum delay for reconnects to an association on
- *             this device. Used only on a remoteport.
+ * struct nvmefc_ls_req - Request structure passed from the transport
+ *            to the LLDD to perform a NVME-FC LS request and obtain
+ *            a response.
+ *            Used by nvme-fc transport (host) to send LS's such as
+ *              Create Association, Create Connection and Disconnect
+ *              Association.
+ *            Used by the nvmet-fc transport (controller) to send
+ *              LS's such as Disconnect Association.
  *
- * Initialization values for dynamic port fields:
- * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
- *                be set to 0.
- */
-struct nvme_fc_port_info {
-	u64			node_name;
-	u64			port_name;
-	u32			port_role;
-	u32			port_id;
-	u32			dev_loss_tmo;
-};
-
-
-/**
- * struct nvmefc_ls_req - Request structure passed from NVME-FC transport
- *                        to LLDD in order to perform a NVME FC-4 LS
- *                        request and obtain a response.
- *
- * Values set by the NVME-FC layer prior to calling the LLDD ls_req
- * entrypoint.
+ * Values set by the requestor prior to calling the LLDD ls_req entrypoint:
  * @rqstaddr: pointer to request buffer
  * @rqstdma:  PCI DMA address of request buffer
  * @rqstlen:  Length, in bytes, of request buffer
@@ -63,8 +42,8 @@ struct nvme_fc_port_info {
  * @private:  pointer to memory allocated alongside the ls request structure
  *            that is specifically for the LLDD to use while processing the
  *            request. The length of the buffer corresponds to the
- *            lsrqst_priv_sz value specified in the nvme_fc_port_template
- *            supplied by the LLDD.
+ *            lsrqst_priv_sz value specified in the xxx_template supplied
+ *            by the LLDD.
  * @done:     The callback routine the LLDD is to invoke upon completion of
  *            the LS request. req argument is the pointer to the original LS
  *            request structure. Status argument must be 0 upon success, a
@@ -86,6 +65,101 @@ struct nvmefc_ls_req {
 } __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
 
 
+/**
+ * struct nvmefc_ls_rsp - Structure passed from the transport to the LLDD
+ *            to request the transmit the NVME-FC LS response to a
+ *            NVME-FC LS request.   The structure originates in the LLDD
+ *            and is given to the transport via the xxx_rcv_ls_req()
+ *            transport routine. As such, the structure represents the
+ *            FC exchange context for the NVME-FC LS request that was
+ *            received and which the response is to be sent for.
+ *            Used by the LLDD to pass the nvmet-fc transport (controller)
+ *              received LS's such as Create Association, Create Connection
+ *              and Disconnect Association.
+ *            Used by the LLDD to pass the nvme-fc transport (host)
+ *              received LS's such as Disconnect Association or Disconnect
+ *              Connection.
+ *
+ * The structure is allocated by the LLDD whenever a LS Request is received
+ * from the FC link. The address of the structure is passed to the nvmet-fc
+ * or nvme-fc layer via the xxx_rcv_ls_req() transport routines.
+ *
+ * The address of the structure is to be passed back to the LLDD
+ * when the response is to be transmit. The LLDD will use the address to
+ * map back to the LLDD exchange structure which maintains information such
+ * the remote N_Port that sent the LS as well as any FC exchange context.
+ * Upon completion of the LS response transmit, the LLDD will pass the
+ * address of the structure back to the transport LS rsp done() routine,
+ * allowing the transport release dma resources. Upon completion of
+ * the done() routine, no further access to the structure will be made by
+ * the transport and the LLDD can de-allocate the structure.
+ *
+ * Field initialization:
+ *   At the time of the xxx_rcv_ls_req() call, there is no content that
+ *     is valid in the structure.
+ *
+ *   When the structure is used for the LLDD->xmt_ls_rsp() call, the
+ *     transport layer will fully set the fields in order to specify the
+ *     response payload buffer and its length as well as the done routine
+ *     to be called upon completion of the transmit.  The transport layer
+ *     will also set a private pointer for its own use in the done routine.
+ *
+ * Values set by the transport layer prior to calling the LLDD xmt_ls_rsp
+ * entrypoint:
+ * @rspbuf:   pointer to the LS response buffer
+ * @rspdma:   PCI DMA address of the LS response buffer
+ * @rsplen:   Length, in bytes, of the LS response buffer
+ * @done:     The callback routine the LLDD is to invoke upon completion of
+ *            transmitting the LS response. req argument is the pointer to
+ *            the original ls request.
+ * @nvme_fc_private:  pointer to an internal transport-specific structure
+ *            used as part of the transport done() processing. The LLDD is
+ *            not to access this pointer.
+ */
+struct nvmefc_ls_rsp {
+	void		*rspbuf;
+	dma_addr_t	rspdma;
+	u16		rsplen;
+
+	void (*done)(struct nvmefc_ls_rsp *rsp);
+	void		*nvme_fc_private;	/* LLDD is not to access !! */
+};
+
+
+
+/*
+ * **********************  LLDD FC-NVME Host API ********************
+ *
+ *  For FC LLDD's that are the NVME Host role.
+ *
+ * ******************************************************************
+ */
+
+
+/**
+ * struct nvme_fc_port_info - port-specific ids and FC connection-specific
+ *                            data element used during NVME Host role
+ *                            registrations
+ *
+ * Static fields describing the port being registered:
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx)
+ * @dev_loss_tmo: maximum delay for reconnects to an association on
+ *             this device. Used only on a remoteport.
+ *
+ * Initialization values for dynamic port fields:
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ */
+struct nvme_fc_port_info {
+	u64			node_name;
+	u64			port_name;
+	u32			port_role;
+	u32			port_id;
+	u32			dev_loss_tmo;
+};
+
 enum nvmefc_fcp_datadir {
 	NVMEFC_FCP_NODATA,	/* payload_length and sg_cnt will be zero */
 	NVMEFC_FCP_WRITE,
@@ -337,6 +411,21 @@ struct nvme_fc_remote_port {
  *       indicating an FC transport Aborted status.
  *       Entrypoint is Mandatory.
  *
+ * @xmt_ls_rsp:  Called to transmit the response to a FC-NVME FC-4 LS service.
+ *       The nvmefc_ls_rsp structure is the same LLDD-supplied exchange
+ *       structure specified in the nvme_fc_rcv_ls_req() call made when
+ *       the LS request was received. The structure will fully describe
+ *       the buffers for the response payload and the dma address of the
+ *       payload. The LLDD is to transmit the response (or return a
+ *       non-zero errno status), and upon completion of the transmit, call
+ *       the "done" routine specified in the nvmefc_ls_rsp structure
+ *       (argument to done is the address of the nvmefc_ls_rsp structure
+ *       itself). Upon the completion of the done routine, the LLDD shall
+ *       consider the LS handling complete and the nvmefc_ls_rsp structure
+ *       may be freed/released.
+ *       Entrypoint is mandatory if the LLDD calls the nvme_fc_rcv_ls_req()
+ *       entrypoint.
+ *
  * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
  *       supports for cpu affinitization.
  *       Value is Mandatory. Must be at least 1.
@@ -371,7 +460,7 @@ struct nvme_fc_remote_port {
  * @lsrqst_priv_sz: The LLDD sets this field to the amount of additional
  *       memory that it would like fc nvme layer to allocate on the LLDD's
  *       behalf whenever a ls request structure is allocated. The additional
- *       memory area solely for the of the LLDD and its location is
+ *       memory area is solely for use by the LLDD and its location is
  *       specified by the ls_request->private pointer.
  *       Value is Mandatory. Allowed to be zero.
  *
@@ -405,6 +494,9 @@ struct nvme_fc_port_template {
 				struct nvme_fc_remote_port *,
 				void *hw_queue_handle,
 				struct nvmefc_fcp_req *);
+	int	(*xmt_ls_rsp)(struct nvme_fc_local_port *localport,
+				struct nvme_fc_remote_port *rport,
+				struct nvmefc_ls_rsp *ls_rsp);
 
 	u32	max_hw_queues;
 	u16	max_sgl_segments;
@@ -441,6 +533,34 @@ void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport);
 int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport,
 			u32 dev_loss_tmo);
 
+/*
+ * Routine called to pass a NVME-FC LS request, received by the lldd,
+ * to the nvme-fc transport.
+ *
+ * If the return value is zero: the LS was successfully accepted by the
+ *   transport.
+ * If the return value is non-zero: the transport has not accepted the
+ *   LS. The lldd should ABTS-LS the LS.
+ *
+ * Note: if the LLDD receives and ABTS for the LS prior to the transport
+ * calling the ops->xmt_ls_rsp() routine to transmit a response, the LLDD
+ * shall mark the LS as aborted, and when the xmt_ls_rsp() is called: the
+ * response shall not be transmit and the struct nvmefc_ls_rsp() done
+ * routine shall be called.  The LLDD may transmit the ABTS response as
+ * soon as the LS was marked or can delay until the xmt_ls_rsp() call is
+ * made.
+ * Note: if an RCV LS was successfully posted to the transport and the
+ * remoteport is then unregistered before xmt_ls_rsp() was called for
+ * the lsrsp structure, the transport will still call xmt_ls_rsp()
+ * afterward to cleanup the outstanding lsrsp structure. The LLDD should
+ * noop the transmission of the rsp and call the lsrsp->done() routine
+ * to allow the lsrsp structure to be released.
+ */
+int nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *remoteport,
+			struct nvmefc_ls_rsp *lsrsp,
+			void *lsreqbuf, u32 lsreqbuf_len);
+
+
 
 /*
  * ***************  LLDD FC-NVME Target/Subsystem API ***************
@@ -470,55 +590,6 @@ struct nvmet_fc_port_info {
 };
 
 
-/**
- * struct nvmefc_tgt_ls_req - Structure used between LLDD and NVMET-FC
- *                            layer to represent the exchange context for
- *                            a FC-NVME Link Service (LS).
- *
- * The structure is allocated by the LLDD whenever a LS Request is received
- * from the FC link. The address of the structure is passed to the nvmet-fc
- * layer via the nvmet_fc_rcv_ls_req() call. The address of the structure
- * will be passed back to the LLDD when the response is to be transmit.
- * The LLDD is to use the address to map back to the LLDD exchange structure
- * which maintains information such as the targetport the LS was received
- * on, the remote FC NVME initiator that sent the LS, and any FC exchange
- * context.  Upon completion of the LS response transmit, the address of the
- * structure will be passed back to the LS rsp done() routine, allowing the
- * nvmet-fc layer to release dma resources. Upon completion of the done()
- * routine, no further access will be made by the nvmet-fc layer and the
- * LLDD can de-allocate the structure.
- *
- * Field initialization:
- *   At the time of the nvmet_fc_rcv_ls_req() call, there is no content that
- *     is valid in the structure.
- *
- *   When the structure is used for the LLDD->xmt_ls_rsp() call, the nvmet-fc
- *     layer will fully set the fields in order to specify the response
- *     payload buffer and its length as well as the done routine to be called
- *     upon compeletion of the transmit.  The nvmet-fc layer will also set a
- *     private pointer for its own use in the done routine.
- *
- * Values set by the NVMET-FC layer prior to calling the LLDD xmt_ls_rsp
- * entrypoint.
- * @rspbuf:   pointer to the LS response buffer
- * @rspdma:   PCI DMA address of the LS response buffer
- * @rsplen:   Length, in bytes, of the LS response buffer
- * @done:     The callback routine the LLDD is to invoke upon completion of
- *            transmitting the LS response. req argument is the pointer to
- *            the original ls request.
- * @nvmet_fc_private:  pointer to an internal NVMET-FC layer structure used
- *            as part of the NVMET-FC processing. The LLDD is not to access
- *            this pointer.
- */
-struct nvmefc_tgt_ls_req {
-	void		*rspbuf;
-	dma_addr_t	rspdma;
-	u16		rsplen;
-
-	void (*done)(struct nvmefc_tgt_ls_req *req);
-	void *nvmet_fc_private;		/* LLDD is not to access !! */
-};
-
 /* Operations that NVME-FC layer may request the LLDD to perform for FCP */
 enum {
 	NVMET_FCOP_READDATA	= 1,	/* xmt data to initiator */
@@ -693,17 +764,19 @@ struct nvmet_fc_target_port {
  *       Entrypoint is Mandatory.
  *
  * @xmt_ls_rsp:  Called to transmit the response to a FC-NVME FC-4 LS service.
- *       The nvmefc_tgt_ls_req structure is the same LLDD-supplied exchange
+ *       The nvmefc_ls_rsp structure is the same LLDD-supplied exchange
  *       structure specified in the nvmet_fc_rcv_ls_req() call made when
- *       the LS request was received.  The structure will fully describe
+ *       the LS request was received. The structure will fully describe
  *       the buffers for the response payload and the dma address of the
- *       payload. The LLDD is to transmit the response (or return a non-zero
- *       errno status), and upon completion of the transmit, call the
- *       "done" routine specified in the nvmefc_tgt_ls_req structure
- *       (argument to done is the ls reqwuest structure itself).
- *       After calling the done routine, the LLDD shall consider the
- *       LS handling complete and the nvmefc_tgt_ls_req structure may
- *       be freed/released.
+ *       payload. The LLDD is to transmit the response (or return a
+ *       non-zero errno status), and upon completion of the transmit, call
+ *       the "done" routine specified in the nvmefc_ls_rsp structure
+ *       (argument to done is the address of the nvmefc_ls_rsp structure
+ *       itself). Upon the completion of the done() routine, the LLDD shall
+ *       consider the LS handling complete and the nvmefc_ls_rsp structure
+ *       may be freed/released.
+ *       The transport will always call the xmt_ls_rsp() routine for any
+ *       LS received.
  *       Entrypoint is Mandatory.
  *
  * @fcp_op:  Called to perform a data transfer or transmit a response.
@@ -798,6 +871,39 @@ struct nvmet_fc_target_port {
  *       should cause the initiator to rescan the discovery controller
  *       on the targetport.
  *
+ * @ls_req:  Called to issue a FC-NVME FC-4 LS service request.
+ *       The nvme_fc_ls_req structure will fully describe the buffers for
+ *       the request payload and where to place the response payload.
+ *       The targetport that is to issue the LS request is identified by
+ *       the targetport argument.  The remote port that is to receive the
+ *       LS request is identified by the hosthandle argument. The nvmet-fc
+ *       transport is only allowed to issue FC-NVME LS's on behalf of an
+ *       association that was created prior by a Create Association LS.
+ *       The hosthandle will originate from the LLDD in the struct
+ *       nvmefc_ls_rsp structure for the Create Association LS that
+ *       was delivered to the transport. The transport will save the
+ *       hosthandle as an attribute of the association.  If the LLDD
+ *       loses connectivity with the remote port, it must call the
+ *       nvmet_fc_invalidate_host() routine to remove any references to
+ *       the remote port in the transport.
+ *       The LLDD is to allocate an exchange, issue the LS request, obtain
+ *       the LS response, and call the "done" routine specified in the
+ *       request structure (argument to done is the ls request structure
+ *       itself).
+ *       Entrypoint is Optional - but highly recommended.
+ *
+ * @ls_abort: called to request the LLDD to abort the indicated ls request.
+ *       The call may return before the abort has completed. After aborting
+ *       the request, the LLDD must still call the ls request done routine
+ *       indicating an FC transport Aborted status.
+ *       Entrypoint is Mandatory if the ls_req entry point is specified.
+ *
+ * @host_release: called to inform the LLDD that the request to invalidate
+ *       the host port indicated by the hosthandle has been fully completed.
+ *       No associations exist with the host port and there will be no
+ *       further references to hosthandle.
+ *       Entrypoint is Mandatory if the lldd calls nvmet_fc_invalidate_host().
+ *
  * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
  *       supports for cpu affinitization.
  *       Value is Mandatory. Must be at least 1.
@@ -826,11 +932,19 @@ struct nvmet_fc_target_port {
  *       area solely for the of the LLDD and its location is specified by
  *       the targetport->private pointer.
  *       Value is Mandatory. Allowed to be zero.
+ *
+ * @lsrqst_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like nvmet-fc layer to allocate on the LLDD's
+ *       behalf whenever a ls request structure is allocated. The additional
+ *       memory area is solely for use by the LLDD and its location is
+ *       specified by the ls_request->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ *
  */
 struct nvmet_fc_target_template {
 	void (*targetport_delete)(struct nvmet_fc_target_port *tgtport);
 	int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport,
-				struct nvmefc_tgt_ls_req *tls_req);
+				struct nvmefc_ls_rsp *ls_rsp);
 	int (*fcp_op)(struct nvmet_fc_target_port *tgtport,
 				struct nvmefc_tgt_fcp_req *fcpreq);
 	void (*fcp_abort)(struct nvmet_fc_target_port *tgtport,
@@ -840,6 +954,11 @@ struct nvmet_fc_target_template {
 	void (*defer_rcv)(struct nvmet_fc_target_port *tgtport,
 				struct nvmefc_tgt_fcp_req *fcpreq);
 	void (*discovery_event)(struct nvmet_fc_target_port *tgtport);
+	int  (*ls_req)(struct nvmet_fc_target_port *targetport,
+				void *hosthandle, struct nvmefc_ls_req *lsreq);
+	void (*ls_abort)(struct nvmet_fc_target_port *targetport,
+				void *hosthandle, struct nvmefc_ls_req *lsreq);
+	void (*host_release)(void *hosthandle);
 
 	u32	max_hw_queues;
 	u16	max_sgl_segments;
@@ -848,7 +967,9 @@ struct nvmet_fc_target_template {
 
 	u32	target_features;
 
+	/* sizes of additional private data for data structures */
 	u32	target_priv_sz;
+	u32	lsrqst_priv_sz;
 };
 
 
@@ -859,10 +980,61 @@ int nvmet_fc_register_targetport(struct nvmet_fc_port_info *portinfo,
 
 int nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *tgtport);
 
+/*
+ * Routine called to pass a NVME-FC LS request, received by the lldd,
+ * to the nvmet-fc transport.
+ *
+ * If the return value is zero: the LS was successfully accepted by the
+ *   transport.
+ * If the return value is non-zero: the transport has not accepted the
+ *   LS. The lldd should ABTS-LS the LS.
+ *
+ * Note: if the LLDD receives and ABTS for the LS prior to the transport
+ * calling the ops->xmt_ls_rsp() routine to transmit a response, the LLDD
+ * shall mark the LS as aborted, and when the xmt_ls_rsp() is called: the
+ * response shall not be transmit and the struct nvmefc_ls_rsp() done
+ * routine shall be called.  The LLDD may transmit the ABTS response as
+ * soon as the LS was marked or can delay until the xmt_ls_rsp() call is
+ * made.
+ * Note: if an RCV LS was successfully posted to the transport and the
+ * targetport is then unregistered before xmt_ls_rsp() was called for
+ * the lsrsp structure, the transport will still call xmt_ls_rsp()
+ * afterward to cleanup the outstanding lsrsp structure. The LLDD should
+ * noop the transmission of the rsp and call the lsrsp->done() routine
+ * to allow the lsrsp structure to be released.
+ */
 int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *tgtport,
-			struct nvmefc_tgt_ls_req *lsreq,
+			void *hosthandle,
+			struct nvmefc_ls_rsp *rsp,
 			void *lsreqbuf, u32 lsreqbuf_len);
 
+/*
+ * Routine called by the LLDD whenever it has a logout or loss of
+ * connectivity to a NVME-FC host port which there had been active
+ * NVMe controllers for.  The host port is indicated by the
+ * hosthandle. The hosthandle is given to the nvmet-fc transport
+ * when a NVME LS was received, typically to create a new association.
+ * The nvmet-fc transport will cache the hostport value with the
+ * association for use in LS requests for the association.
+ * When the LLDD calls this routine, the nvmet-fc transport will
+ * immediately terminate all associations that were created with
+ * the hosthandle host port.
+ * The LLDD, after calling this routine and having control returned,
+ * must assume the transport may subsequently utilize hosthandle as
+ * part of sending LS's to terminate the association.  The LLDD
+ * should reject the LS's if they are attempted.
+ * Once the last association has terminated for the hosthandle host
+ * port, the nvmet-fc transport will call the ops->host_release()
+ * callback. As of the callback, the nvmet-fc transport will no
+ * longer reference hosthandle.
+ */
+void nvmet_fc_invalidate_host(struct nvmet_fc_target_port *tgtport,
+			void *hosthandle);
+
+/*
+ * If nvmet_fc_rcv_fcp_req returns non-zero, the transport has not accepted
+ * the FCP cmd. The lldd should ABTS-LS the cmd.
+ */
 int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport,
 			struct nvmefc_tgt_fcp_req *fcpreq,
 			void *cmdiubuf, u32 cmdiubuf_len);
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index e8c30b39bb27..51fe44e0328b 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -4,8 +4,8 @@
  */
 
 /*
- * This file contains definitions relative to FC-NVME-2 r1.06
- * (T11-2019-00210-v001).
+ * This file contains definitions relative to FC-NVME-2 r1.08
+ * (T11-2019-00210-v004).
  */
 
 #ifndef _NVME_FC_H
@@ -81,7 +81,8 @@ struct nvme_fc_ersp_iu {
 };
 
 
-#define FCNVME_NVME_SR_OPCODE	0x01
+#define FCNVME_NVME_SR_OPCODE		0x01
+#define FCNVME_NVME_SR_RSP_OPCODE	0x02
 
 struct nvme_fc_nvme_sr_iu {
 	__u8			fc_id;
@@ -94,7 +95,7 @@ struct nvme_fc_nvme_sr_iu {
 
 enum {
 	FCNVME_SRSTAT_ACC		= 0x0,
-	FCNVME_SRSTAT_INV_FCID		= 0x1,
+	/* reserved			  0x1 */
 	/* reserved			  0x2 */
 	FCNVME_SRSTAT_LOGICAL_ERR	= 0x3,
 	FCNVME_SRSTAT_INV_QUALIF	= 0x4,
@@ -397,7 +398,7 @@ struct fcnvme_ls_disconnect_conn_rqst {
 	struct fcnvme_ls_rqst_w0		w0;
 	__be32					desc_list_len;
 	struct fcnvme_lsdesc_assoc_id		associd;
-	struct fcnvme_lsdesc_disconn_cmd	connectid;
+	struct fcnvme_lsdesc_conn_id		connectid;
 };
 
 struct fcnvme_ls_disconnect_conn_acc {
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 3d5189f46cb1..5ce51ab4c50e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -38,6 +38,8 @@ enum {
 	NVMF_ADDR_FAMILY_IP6	= 2,	/* IP6 */
 	NVMF_ADDR_FAMILY_IB	= 3,	/* InfiniBand */
 	NVMF_ADDR_FAMILY_FC	= 4,	/* Fibre Channel */
+	NVMF_ADDR_FAMILY_LOOP	= 254,	/* Reserved for host usage */
+	NVMF_ADDR_FAMILY_MAX,
 };
 
 /* Transport Type codes for Discovery Log Page entry TRTYPE field */
@@ -299,6 +301,8 @@ struct nvme_id_ctrl {
 };
 
 enum {
+	NVME_CTRL_CMIC_MULTI_CTRL		= 1 << 1,
+	NVME_CTRL_CMIC_ANA			= 1 << 3,
 	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
 	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
 	NVME_CTRL_ONCS_DSM			= 1 << 2,
@@ -394,8 +398,12 @@ enum {
 
 enum {
 	NVME_NS_FEAT_THIN	= 1 << 0,
+	NVME_NS_FEAT_ATOMICS	= 1 << 1,
+	NVME_NS_FEAT_IO_OPT	= 1 << 4,
+	NVME_NS_ATTR_RO		= 1 << 0,
 	NVME_NS_FLBAS_LBA_MASK	= 0xf,
 	NVME_NS_FLBAS_META_EXT	= 0x10,
+	NVME_NS_NMIC_SHARED	= 1 << 0,
 	NVME_LBAF_RP_BEST	= 0,
 	NVME_LBAF_RP_BETTER	= 1,
 	NVME_LBAF_RP_GOOD	= 2,
@@ -412,6 +420,12 @@ enum {
 	NVME_NS_DPS_PI_TYPE3	= 3,
 };
 
+/* Identify Namespace Metadata Capabilities (MC): */
+enum {
+	NVME_MC_EXTENDED_LBA	= (1 << 0),
+	NVME_MC_METADATA_PTR	= (1 << 1),
+};
+
 struct nvme_ns_id_desc {
 	__u8 nidt;
 	__u8 nidl;
@@ -1177,7 +1191,7 @@ struct nvmf_disc_rsp_page_hdr {
 	__le64		numrec;
 	__le16		recfmt;
 	__u8		resv14[1006];
-	struct nvmf_disc_rsp_page_entry entries[0];
+	struct nvmf_disc_rsp_page_entry entries[];
 };
 
 enum {
diff --git a/include/linux/padata.h b/include/linux/padata.h
index a0d8b41850b2..693cae9bfe66 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -139,7 +139,8 @@ struct padata_shell {
 /**
  * struct padata_instance - The overall control structure.
  *
- * @node: Used by CPU hotplug.
+ * @cpu_online_node: Linkage for CPU online callback.
+ * @cpu_dead_node: Linkage for CPU offline callback.
  * @parallel_wq: The workqueue used for parallel work.
  * @serial_wq: The workqueue used for serial work.
  * @pslist: List of padata_shell objects attached to this instance.
@@ -150,7 +151,8 @@ struct padata_shell {
  * @flags: padata flags.
  */
 struct padata_instance {
-	struct hlist_node		 node;
+	struct hlist_node		cpu_online_node;
+	struct hlist_node		cpu_dead_node;
 	struct workqueue_struct		*parallel_wq;
 	struct workqueue_struct		*serial_wq;
 	struct list_head		pslist;
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index bab7e57f659b..85bd413e784e 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -10,6 +10,7 @@ struct page_counter {
 	atomic_long_t usage;
 	unsigned long min;
 	unsigned long low;
+	unsigned long high;
 	unsigned long max;
 	struct page_counter *parent;
 
@@ -55,6 +56,13 @@ bool page_counter_try_charge(struct page_counter *counter,
 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
 void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages);
 void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages);
+
+static inline void page_counter_set_high(struct page_counter *counter,
+					 unsigned long nr_pages)
+{
+	WRITE_ONCE(counter->high, nr_pages);
+}
+
 int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages);
 int page_counter_memparse(const char *buf, const char *max,
 			  unsigned long *nr_pages);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a8f7bd8ea1c6..8e085713150c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -51,7 +51,10 @@ static inline void mapping_set_error(struct address_space *mapping, int error)
 		return;
 
 	/* Record in wb_err for checkers using errseq_t based tracking */
-	filemap_set_wb_err(mapping, error);
+	__filemap_set_wb_err(mapping, error);
+
+	/* Record it in superblock */
+	errseq_set(&mapping->host->i_sb->s_wb_err, error);
 
 	/* Record it in flags for now, for legacy callers */
 	if (error == -ENOSPC)
@@ -205,6 +208,43 @@ static inline int page_cache_add_speculative(struct page *page, int count)
 	return __page_cache_add_speculative(page, count);
 }
 
+/**
+ * attach_page_private - Attach private data to a page.
+ * @page: Page to attach data to.
+ * @data: Data to attach to page.
+ *
+ * Attaching private data to a page increments the page's reference count.
+ * The data must be detached before the page will be freed.
+ */
+static inline void attach_page_private(struct page *page, void *data)
+{
+	get_page(page);
+	set_page_private(page, (unsigned long)data);
+	SetPagePrivate(page);
+}
+
+/**
+ * detach_page_private - Detach private data from a page.
+ * @page: Page to detach data from.
+ *
+ * Removes the data that was previously attached to the page and decrements
+ * the refcount on the page.
+ *
+ * Return: Data that was attached to the page.
+ */
+static inline void *detach_page_private(struct page *page)
+{
+	void *data = (void *)page_private(page);
+
+	if (!PagePrivate(page))
+		return NULL;
+	ClearPagePrivate(page);
+	set_page_private(page, 0);
+	put_page(page);
+
+	return data;
+}
+
 #ifdef CONFIG_NUMA
 extern struct page *__page_cache_alloc(gfp_t gfp);
 #else
@@ -615,6 +655,17 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
 void delete_from_page_cache_batch(struct address_space *mapping,
 				  struct pagevec *pvec);
 
+#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
+
+void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
+		struct file *, pgoff_t index, unsigned long req_count);
+void page_cache_async_readahead(struct address_space *, struct file_ra_state *,
+		struct file *, struct page *, pgoff_t index,
+		unsigned long req_count);
+void page_cache_readahead_unbounded(struct address_space *, struct file *,
+		pgoff_t index, unsigned long nr_to_read,
+		unsigned long lookahead_count);
+
 /*
  * Like add_to_page_cache_locked, but used to add newly allocated pages:
  * the page is new, so we can just run __SetPageLocked() against it.
@@ -631,6 +682,146 @@ static inline int add_to_page_cache(struct page *page,
 	return error;
 }
 
+/**
+ * struct readahead_control - Describes a readahead request.
+ *
+ * A readahead request is for consecutive pages.  Filesystems which
+ * implement the ->readahead method should call readahead_page() or
+ * readahead_page_batch() in a loop and attempt to start I/O against
+ * each page in the request.
+ *
+ * Most of the fields in this struct are private and should be accessed
+ * by the functions below.
+ *
+ * @file: The file, used primarily by network filesystems for authentication.
+ *	  May be NULL if invoked internally by the filesystem.
+ * @mapping: Readahead this filesystem object.
+ */
+struct readahead_control {
+	struct file *file;
+	struct address_space *mapping;
+/* private: use the readahead_* accessors instead */
+	pgoff_t _index;
+	unsigned int _nr_pages;
+	unsigned int _batch_count;
+};
+
+/**
+ * readahead_page - Get the next page to read.
+ * @rac: The current readahead request.
+ *
+ * Context: The page is locked and has an elevated refcount.  The caller
+ * should decreases the refcount once the page has been submitted for I/O
+ * and unlock the page once all I/O to that page has completed.
+ * Return: A pointer to the next page, or %NULL if we are done.
+ */
+static inline struct page *readahead_page(struct readahead_control *rac)
+{
+	struct page *page;
+
+	BUG_ON(rac->_batch_count > rac->_nr_pages);
+	rac->_nr_pages -= rac->_batch_count;
+	rac->_index += rac->_batch_count;
+
+	if (!rac->_nr_pages) {
+		rac->_batch_count = 0;
+		return NULL;
+	}
+
+	page = xa_load(&rac->mapping->i_pages, rac->_index);
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	rac->_batch_count = hpage_nr_pages(page);
+
+	return page;
+}
+
+static inline unsigned int __readahead_batch(struct readahead_control *rac,
+		struct page **array, unsigned int array_sz)
+{
+	unsigned int i = 0;
+	XA_STATE(xas, &rac->mapping->i_pages, 0);
+	struct page *page;
+
+	BUG_ON(rac->_batch_count > rac->_nr_pages);
+	rac->_nr_pages -= rac->_batch_count;
+	rac->_index += rac->_batch_count;
+	rac->_batch_count = 0;
+
+	xas_set(&xas, rac->_index);
+	rcu_read_lock();
+	xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
+		VM_BUG_ON_PAGE(!PageLocked(page), page);
+		VM_BUG_ON_PAGE(PageTail(page), page);
+		array[i++] = page;
+		rac->_batch_count += hpage_nr_pages(page);
+
+		/*
+		 * The page cache isn't using multi-index entries yet,
+		 * so the xas cursor needs to be manually moved to the
+		 * next index.  This can be removed once the page cache
+		 * is converted.
+		 */
+		if (PageHead(page))
+			xas_set(&xas, rac->_index + rac->_batch_count);
+
+		if (i == array_sz)
+			break;
+	}
+	rcu_read_unlock();
+
+	return i;
+}
+
+/**
+ * readahead_page_batch - Get a batch of pages to read.
+ * @rac: The current readahead request.
+ * @array: An array of pointers to struct page.
+ *
+ * Context: The pages are locked and have an elevated refcount.  The caller
+ * should decreases the refcount once the page has been submitted for I/O
+ * and unlock the page once all I/O to that page has completed.
+ * Return: The number of pages placed in the array.  0 indicates the request
+ * is complete.
+ */
+#define readahead_page_batch(rac, array)				\
+	__readahead_batch(rac, array, ARRAY_SIZE(array))
+
+/**
+ * readahead_pos - The byte offset into the file of this readahead request.
+ * @rac: The readahead request.
+ */
+static inline loff_t readahead_pos(struct readahead_control *rac)
+{
+	return (loff_t)rac->_index * PAGE_SIZE;
+}
+
+/**
+ * readahead_length - The number of bytes in this readahead request.
+ * @rac: The readahead request.
+ */
+static inline loff_t readahead_length(struct readahead_control *rac)
+{
+	return (loff_t)rac->_nr_pages * PAGE_SIZE;
+}
+
+/**
+ * readahead_index - The index of the first page in this readahead request.
+ * @rac: The readahead request.
+ */
+static inline pgoff_t readahead_index(struct readahead_control *rac)
+{
+	return rac->_index;
+}
+
+/**
+ * readahead_count - The number of pages in this readahead request.
+ * @rac: The readahead request.
+ */
+static inline unsigned int readahead_count(struct readahead_control *rac)
+{
+	return rac->_nr_pages;
+}
+
 static inline unsigned long dir_pages(struct inode *inode)
 {
 	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
diff --git a/include/linux/parser.h b/include/linux/parser.h
index 12fc3482f5fc..89e2b23fb888 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -7,7 +7,8 @@
  * but could potentially be used anywhere else that simple option=arg
  * parsing is required.
  */
-
+#ifndef _LINUX_PARSER_H
+#define _LINUX_PARSER_H
 
 /* associates an integer enumerator with a pattern string. */
 struct match_token {
@@ -34,3 +35,5 @@ int match_hex(substring_t *, int *result);
 bool match_wildcard(const char *pattern, const char *str);
 size_t match_strlcpy(char *, const substring_t *, size_t);
 char *match_strdup(const substring_t *);
+
+#endif /* _LINUX_PARSER_H */
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index ece607607a86..24125778ef3e 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -4,21 +4,25 @@
 
 #include <linux/genhd.h>
 
+struct disk_stats {
+	u64 nsecs[NR_STAT_GROUPS];
+	unsigned long sectors[NR_STAT_GROUPS];
+	unsigned long ios[NR_STAT_GROUPS];
+	unsigned long merges[NR_STAT_GROUPS];
+	unsigned long io_ticks;
+	local_t in_flight[2];
+};
+
 /*
  * Macros to operate on percpu disk statistics:
  *
- * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters
- * and should be called between disk_stat_lock() and
- * disk_stat_unlock().
+ * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters and should
+ * be called between disk_stat_lock() and disk_stat_unlock().
  *
  * part_stat_read() can be called at any time.
- *
- * part_stat_{add|set_all}() and {init|free}_part_stats are for
- * internal use only.
  */
-#ifdef	CONFIG_SMP
-#define part_stat_lock()	({ rcu_read_lock(); get_cpu(); })
-#define part_stat_unlock()	do { put_cpu(); rcu_read_unlock(); } while (0)
+#define part_stat_lock()	preempt_disable()
+#define part_stat_unlock()	preempt_enable()
 
 #define part_stat_get_cpu(part, field, cpu)				\
 	(per_cpu_ptr((part)->dkstats, (cpu))->field)
@@ -44,50 +48,13 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
 				sizeof(struct disk_stats));
 }
 
-static inline int init_part_stats(struct hd_struct *part)
-{
-	part->dkstats = alloc_percpu(struct disk_stats);
-	if (!part->dkstats)
-		return 0;
-	return 1;
-}
-
-static inline void free_part_stats(struct hd_struct *part)
-{
-	free_percpu(part->dkstats);
-}
-
-#else /* !CONFIG_SMP */
-#define part_stat_lock()	({ rcu_read_lock(); 0; })
-#define part_stat_unlock()	rcu_read_unlock()
-
-#define part_stat_get(part, field)		((part)->dkstats.field)
-#define part_stat_get_cpu(part, field, cpu)	part_stat_get(part, field)
-#define part_stat_read(part, field)		part_stat_get(part, field)
-
-static inline void part_stat_set_all(struct hd_struct *part, int value)
-{
-	memset(&part->dkstats, value, sizeof(struct disk_stats));
-}
-
-static inline int init_part_stats(struct hd_struct *part)
-{
-	return 1;
-}
-
-static inline void free_part_stats(struct hd_struct *part)
-{
-}
-
-#endif /* CONFIG_SMP */
-
 #define part_stat_read_accum(part, field)				\
 	(part_stat_read(part, field[STAT_READ]) +			\
 	 part_stat_read(part, field[STAT_WRITE]) +			\
 	 part_stat_read(part, field[STAT_DISCARD]))
 
 #define __part_stat_add(part, field, addnd)				\
-	(part_stat_get(part, field) += (addnd))
+	__this_cpu_add((part)->dkstats->field, addnd)
 
 #define part_stat_add(part, field, addnd)	do {			\
 	__part_stat_add((part), field, addnd);				\
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 347ea379622a..b4bb32082342 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -61,7 +61,7 @@ struct perf_guest_info_callbacks {
 
 struct perf_callchain_entry {
 	__u64				nr;
-	__u64				ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
+	__u64				ip[]; /* /proc/sys/kernel/perf_event_max_stack */
 };
 
 struct perf_callchain_entry_ctx {
@@ -113,7 +113,7 @@ struct perf_raw_record {
 struct perf_branch_stack {
 	__u64				nr;
 	__u64				hw_idx;
-	struct perf_branch_entry	entries[0];
+	struct perf_branch_entry	entries[];
 };
 
 struct task_struct;
@@ -1302,7 +1302,7 @@ static inline int perf_is_paranoid(void)
 
 static inline int perf_allow_kernel(struct perf_event_attr *attr)
 {
-	if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN))
+	if (sysctl_perf_event_paranoid > 1 && !perfmon_capable())
 		return -EACCES;
 
 	return security_perf_event_open(attr, PERF_SECURITY_KERNEL);
@@ -1310,7 +1310,7 @@ static inline int perf_allow_kernel(struct perf_event_attr *attr)
 
 static inline int perf_allow_cpu(struct perf_event_attr *attr)
 {
-	if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN))
+	if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
 		return -EACCES;
 
 	return security_perf_event_open(attr, PERF_SECURITY_CPU);
@@ -1318,7 +1318,7 @@ static inline int perf_allow_cpu(struct perf_event_attr *attr)
 
 static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
 {
-	if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN))
+	if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
 		return -EPERM;
 
 	return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index ae58fad7f1e0..0c31b9461262 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -8,6 +8,7 @@
 #define PIPE_BUF_FLAG_ATOMIC	0x02	/* was atomically mapped */
 #define PIPE_BUF_FLAG_GIFT	0x04	/* page is a gift */
 #define PIPE_BUF_FLAG_PACKET	0x08	/* read() as a packet */
+#define PIPE_BUF_FLAG_CAN_MERGE	0x10	/* can merge buffers */
 
 /**
  *	struct pipe_buffer - a linux kernel pipe buffer
@@ -69,11 +70,11 @@ struct pipe_inode_info {
  * Note on the nesting of these functions:
  *
  * ->confirm()
- *	->steal()
+ *	->try_steal()
  *
- * That is, ->steal() must be called on a confirmed buffer.
- * See below for the meaning of each operation. Also see kerneldoc
- * in fs/pipe.c for the pipe and generic variants of these hooks.
+ * That is, ->try_steal() must be called on a confirmed buffer.  See below for
+ * the meaning of each operation.  Also see the kerneldoc in fs/pipe.c for the
+ * pipe and generic variants of these hooks.
  */
 struct pipe_buf_operations {
 	/*
@@ -81,7 +82,7 @@ struct pipe_buf_operations {
 	 * and that the contents are good. If the pages in the pipe belong
 	 * to a file system, we may need to wait for IO completion in this
 	 * hook. Returns 0 for good, or a negative error value in case of
-	 * error.
+	 * error.  If not present all pages are considered good.
 	 */
 	int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);
 
@@ -93,13 +94,13 @@ struct pipe_buf_operations {
 
 	/*
 	 * Attempt to take ownership of the pipe buffer and its contents.
-	 * ->steal() returns 0 for success, in which case the contents
-	 * of the pipe (the buf->page) is locked and now completely owned
-	 * by the caller. The page may then be transferred to a different
-	 * mapping, the most often used case is insertion into different
-	 * file address space cache.
+	 * ->try_steal() returns %true for success, in which case the contents
+	 * of the pipe (the buf->page) is locked and now completely owned by the
+	 * caller. The page may then be transferred to a different mapping, the
+	 * most often used case is insertion into different file address space
+	 * cache.
 	 */
-	int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
+	bool (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *);
 
 	/*
 	 * Get a reference to the pipe buffer.
@@ -194,18 +195,22 @@ static inline void pipe_buf_release(struct pipe_inode_info *pipe,
 static inline int pipe_buf_confirm(struct pipe_inode_info *pipe,
 				   struct pipe_buffer *buf)
 {
+	if (!buf->ops->confirm)
+		return 0;
 	return buf->ops->confirm(pipe, buf);
 }
 
 /**
- * pipe_buf_steal - attempt to take ownership of a pipe_buffer
+ * pipe_buf_try_steal - attempt to take ownership of a pipe_buffer
  * @pipe:	the pipe that the buffer belongs to
  * @buf:	the buffer to attempt to steal
  */
-static inline int pipe_buf_steal(struct pipe_inode_info *pipe,
-				 struct pipe_buffer *buf)
+static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
+		struct pipe_buffer *buf)
 {
-	return buf->ops->steal(pipe, buf);
+	if (!buf->ops->try_steal)
+		return false;
+	return buf->ops->try_steal(pipe, buf);
 }
 
 /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
@@ -229,11 +234,8 @@ void free_pipe_info(struct pipe_inode_info *);
 
 /* Generic pipe buffer ops functions */
 bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
-int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
-int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
-int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *);
+bool generic_pipe_buf_try_steal(struct pipe_inode_info *, struct pipe_buffer *);
 void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
-void pipe_buf_mark_unmergeable(struct pipe_buffer *buf);
 
 extern const struct pipe_buf_operations nosteal_pipe_buf_ops;
 
diff --git a/include/linux/platform_data/ad5761.h b/include/linux/platform_data/ad5761.h
index 02bef5177ff5..69e261e2ca14 100644
--- a/include/linux/platform_data/ad5761.h
+++ b/include/linux/platform_data/ad5761.h
@@ -3,7 +3,7 @@
  * AD5721, AD5721R, AD5761, AD5761R, Voltage Output Digital to Analog Converter
  *
  * Copyright 2016 Qtechnology A/S
- * 2016 Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ * 2016 Ricardo Ribalda <ribalda@kernel.org>
  */
 #ifndef __LINUX_PLATFORM_DATA_AD5761_H__
 #define __LINUX_PLATFORM_DATA_AD5761_H__
diff --git a/include/linux/platform_data/gsc_hwmon.h b/include/linux/platform_data/gsc_hwmon.h
new file mode 100644
index 000000000000..ec1611aff863
--- /dev/null
+++ b/include/linux/platform_data/gsc_hwmon.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _GSC_HWMON_H
+#define _GSC_HWMON_H
+
+enum gsc_hwmon_mode {
+	mode_temperature,
+	mode_voltage,
+	mode_voltage_raw,
+	mode_max,
+};
+
+/**
+ * struct gsc_hwmon_channel - configuration parameters
+ * @reg:  I2C register offset
+ * @mode: channel mode
+ * @name: channel name
+ * @mvoffset: voltage offset
+ * @vdiv: voltage divider array (2 resistor values in milli-ohms)
+ */
+struct gsc_hwmon_channel {
+	unsigned int reg;
+	unsigned int mode;
+	const char *name;
+	unsigned int mvoffset;
+	unsigned int vdiv[2];
+};
+
+/**
+ * struct gsc_hwmon_platform_data - platform data for gsc_hwmon driver
+ * @channels:	pointer to array of gsc_hwmon_channel structures
+ *		describing channels
+ * @nchannels:	number of elements in @channels array
+ * @vreference: voltage reference (mV)
+ * @resolution: ADC bit resolution
+ * @fan_base: register base for FAN controller
+ */
+struct gsc_hwmon_platform_data {
+	const struct gsc_hwmon_channel *channels;
+	int nchannels;
+	unsigned int resolution;
+	unsigned int vreference;
+	unsigned int fan_base;
+};
+#endif
diff --git a/include/linux/platform_data/itco_wdt.h b/include/linux/platform_data/itco_wdt.h
index 2ccdce6a4e27..45d860cac2b0 100644
--- a/include/linux/platform_data/itco_wdt.h
+++ b/include/linux/platform_data/itco_wdt.h
@@ -12,13 +12,16 @@
 #define ICH_RES_MEM_OFF		2
 #define ICH_RES_MEM_GCS_PMC	0
 
+/**
+ * struct itco_wdt_platform_data - iTCO_wdt platform data
+ * @name: Name of the platform
+ * @version: iTCO version
+ * @no_reboot_use_pmc: Use PMC BXT API to set and clear NO_REBOOT bit
+ */
 struct itco_wdt_platform_data {
 	char name[32];
 	unsigned int version;
-	/* private data to be passed to update_no_reboot_bit API */
-	void *no_reboot_priv;
-	/* pointer for platform specific no reboot update function */
-	int (*update_no_reboot_bit)(void *priv, bool set);
+	bool no_reboot_use_pmc;
 };
 
 #endif /* _ITCO_WDT_H_ */
diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h
index 0434f68eda86..cba1184b364c 100644
--- a/include/linux/platform_data/mmc-esdhc-imx.h
+++ b/include/linux/platform_data/mmc-esdhc-imx.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright 2010 Wolfram Sang <w.sang@pengutronix.de>
+ * Copyright 2010 Wolfram Sang <kernel@pengutronix.de>
  */
 
 #ifndef __ASM_ARCH_IMX_ESDHC_H
diff --git a/include/linux/platform_data/mmc-esdhc-mcf.h b/include/linux/platform_data/mmc-esdhc-mcf.h
new file mode 100644
index 000000000000..85cb786a62fe
--- /dev/null
+++ b/include/linux/platform_data/mmc-esdhc-mcf.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LINUX_PLATFORM_DATA_MCF_ESDHC_H__
+#define __LINUX_PLATFORM_DATA_MCF_ESDHC_H__
+
+enum cd_types {
+	ESDHC_CD_NONE,		/* no CD, neither controller nor gpio */
+	ESDHC_CD_CONTROLLER,	/* mmc controller internal CD */
+	ESDHC_CD_PERMANENT,	/* no CD, card permanently wired to host */
+};
+
+struct mcf_esdhc_platform_data {
+	int max_bus_width;
+	int cd_type;
+};
+
+#endif /* __LINUX_PLATFORM_DATA_MCF_ESDHC_H__ */
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index d39fc658c320..897b8332a39f 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -85,6 +85,9 @@
 /* Maximum charging percentage */
 #define ASUS_WMI_DEVID_RSOC		0x00120057
 
+/* Keyboard dock */
+#define ASUS_WMI_DEVID_KBD_DOCK		0x00120063
+
 /* DSTS masks */
 #define ASUS_WMI_DSTS_STATUS_BIT	0x00000001
 #define ASUS_WMI_DSTS_UNKNOWN_BIT	0x00000002
diff --git a/include/linux/pm.h b/include/linux/pm.h
index e057d1fa2469..121c104a4090 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -544,31 +544,17 @@ struct pm_subsys_data {
  * These flags can be set by device drivers at the probe time.  They need not be
  * cleared by the drivers as the driver core will take care of that.
  *
- * NEVER_SKIP: Do not skip all system suspend/resume callbacks for the device.
- * SMART_PREPARE: Check the return value of the driver's ->prepare callback.
- * SMART_SUSPEND: No need to resume the device from runtime suspend.
- * LEAVE_SUSPENDED: Avoid resuming the device during system resume if possible.
- *
- * Setting SMART_PREPARE instructs bus types and PM domains which may want
- * system suspend/resume callbacks to be skipped for the device to return 0 from
- * their ->prepare callbacks if the driver's ->prepare callback returns 0 (in
- * other words, the system suspend/resume callbacks can only be skipped for the
- * device if its driver doesn't object against that).  This flag has no effect
- * if NEVER_SKIP is set.
- *
- * Setting SMART_SUSPEND instructs bus types and PM domains which may want to
- * runtime resume the device upfront during system suspend that doing so is not
- * necessary from the driver's perspective.  It also may cause them to skip
- * invocations of the ->suspend_late and ->suspend_noirq callbacks provided by
- * the driver if they decide to leave the device in runtime suspend.
- *
- * Setting LEAVE_SUSPENDED informs the PM core and middle-layer code that the
- * driver prefers the device to be left in suspend after system resume.
+ * NO_DIRECT_COMPLETE: Do not apply direct-complete optimization to the device.
+ * SMART_PREPARE: Take the driver ->prepare callback return value into account.
+ * SMART_SUSPEND: Avoid resuming the device from runtime suspend.
+ * MAY_SKIP_RESUME: Allow driver "noirq" and "early" callbacks to be skipped.
+ *
+ * See Documentation/driver-api/pm/devices.rst for details.
  */
-#define DPM_FLAG_NEVER_SKIP		BIT(0)
+#define DPM_FLAG_NO_DIRECT_COMPLETE	BIT(0)
 #define DPM_FLAG_SMART_PREPARE		BIT(1)
 #define DPM_FLAG_SMART_SUSPEND		BIT(2)
-#define DPM_FLAG_LEAVE_SUSPENDED	BIT(3)
+#define DPM_FLAG_MAY_SKIP_RESUME	BIT(3)
 
 struct dev_pm_info {
 	pm_message_t		power_state;
@@ -758,8 +744,8 @@ extern int pm_generic_poweroff_late(struct device *dev);
 extern int pm_generic_poweroff(struct device *dev);
 extern void pm_generic_complete(struct device *dev);
 
-extern bool dev_pm_may_skip_resume(struct device *dev);
-extern bool dev_pm_smart_suspend_and_suspended(struct device *dev);
+extern bool dev_pm_skip_resume(struct device *dev);
+extern bool dev_pm_skip_suspend(struct device *dev);
 
 #else /* !CONFIG_PM_SLEEP */
 
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 3bdcbce8141a..3dbc207bff53 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -102,9 +102,9 @@ static inline bool pm_runtime_enabled(struct device *dev)
 	return !dev->power.disable_depth;
 }
 
-static inline bool pm_runtime_callbacks_present(struct device *dev)
+static inline bool pm_runtime_has_no_callbacks(struct device *dev)
 {
-	return !dev->power.no_callbacks;
+	return dev->power.no_callbacks;
 }
 
 static inline void pm_runtime_mark_last_busy(struct device *dev)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index bc3f1aecaa19..7d9c1c0e149c 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -26,13 +26,13 @@
  *         PREEMPT_MASK:	0x000000ff
  *         SOFTIRQ_MASK:	0x0000ff00
  *         HARDIRQ_MASK:	0x000f0000
- *             NMI_MASK:	0x00100000
+ *             NMI_MASK:	0x00f00000
  * PREEMPT_NEED_RESCHED:	0x80000000
  */
 #define PREEMPT_BITS	8
 #define SOFTIRQ_BITS	8
 #define HARDIRQ_BITS	4
-#define NMI_BITS	1
+#define NMI_BITS	4
 
 #define PREEMPT_SHIFT	0
 #define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index fcde0772ec98..3cc2f178bf06 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -279,39 +279,116 @@ static inline void printk_safe_flush_on_panic(void)
 
 extern int kptr_restrict;
 
+/**
+ * pr_fmt - used by the pr_*() macros to generate the printk format string
+ * @fmt: format string passed from a pr_*() macro
+ *
+ * This macro can be used to generate a unified format string for pr_*()
+ * macros. A common use is to prefix all pr_*() messages in a file with a common
+ * string. For example, defining this at the top of a source file:
+ *
+ *        #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ *
+ * would prefix all pr_info, pr_emerg... messages in the file with the module
+ * name.
+ */
 #ifndef pr_fmt
 #define pr_fmt(fmt) fmt
 #endif
 
-/*
- * These can be used to print at the various log levels.
- * All of these will print unconditionally, although note that pr_debug()
- * and other debug macros are compiled out unless either DEBUG is defined
- * or CONFIG_DYNAMIC_DEBUG is set.
+/**
+ * pr_emerg - Print an emergency-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_EMERG loglevel. It uses pr_fmt() to
+ * generate the format string.
  */
 #define pr_emerg(fmt, ...) \
 	printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_alert - Print an alert-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_ALERT loglevel. It uses pr_fmt() to
+ * generate the format string.
+ */
 #define pr_alert(fmt, ...) \
 	printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_crit - Print a critical-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_CRIT loglevel. It uses pr_fmt() to
+ * generate the format string.
+ */
 #define pr_crit(fmt, ...) \
 	printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_err - Print an error-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_ERR loglevel. It uses pr_fmt() to
+ * generate the format string.
+ */
 #define pr_err(fmt, ...) \
 	printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_warn - Print a warning-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_WARNING loglevel. It uses pr_fmt()
+ * to generate the format string.
+ */
 #define pr_warn(fmt, ...) \
 	printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_notice - Print a notice-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_NOTICE loglevel. It uses pr_fmt() to
+ * generate the format string.
+ */
 #define pr_notice(fmt, ...) \
 	printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_info - Print an info-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_INFO loglevel. It uses pr_fmt() to
+ * generate the format string.
+ */
 #define pr_info(fmt, ...) \
 	printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-/*
- * Like KERN_CONT, pr_cont() should only be used when continuing
- * a line with no newline ('\n') enclosed. Otherwise it defaults
- * back to KERN_DEFAULT.
+
+/**
+ * pr_cont - Continues a previous log message in the same line.
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_CONT loglevel. It should only be
+ * used when continuing a log message with no newline ('\n') enclosed. Otherwise
+ * it defaults back to KERN_DEFAULT loglevel.
  */
 #define pr_cont(fmt, ...) \
 	printk(KERN_CONT fmt, ##__VA_ARGS__)
 
-/* pr_devel() should produce zero code unless DEBUG is defined */
+/**
+ * pr_devel - Print a debug-level message conditionally
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_DEBUG loglevel if DEBUG is
+ * defined. Otherwise it does nothing.
+ *
+ * It uses pr_fmt() to generate the format string.
+ */
 #ifdef DEBUG
 #define pr_devel(fmt, ...) \
 	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
@@ -325,8 +402,19 @@ extern int kptr_restrict;
 #if defined(CONFIG_DYNAMIC_DEBUG)
 #include <linux/dynamic_debug.h>
 
-/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */
-#define pr_debug(fmt, ...) \
+/**
+ * pr_debug - Print a debug-level message conditionally
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to dynamic_pr_debug() if CONFIG_DYNAMIC_DEBUG is
+ * set. Otherwise, if DEBUG is defined, it's equivalent to a printk with
+ * KERN_DEBUG loglevel. If DEBUG is not defined it does nothing.
+ *
+ * It uses pr_fmt() to generate the format string (dynamic_pr_debug() uses
+ * pr_fmt() internally).
+ */
+#define pr_debug(fmt, ...)			\
 	dynamic_pr_debug(fmt, ##__VA_ARGS__)
 #elif defined(DEBUG)
 #define pr_debug(fmt, ...) \
@@ -384,8 +472,7 @@ extern int kptr_restrict;
 	printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_info_once(fmt, ...)					\
 	printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_cont_once(fmt, ...)					\
-	printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
+/* no pr_cont_once, don't do that... */
 
 #if defined(DEBUG)
 #define pr_devel_once(fmt, ...)					\
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 03953c59807d..ad4ff711fc02 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -182,4 +182,6 @@ static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
 	return inode->i_sb->s_fs_info;
 }
 
+bool proc_ns_file(const struct file *file);
+
 #endif /* _LINUX_PROC_FS_H */
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 6abe85c34681..75807ecef880 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -8,7 +8,7 @@
 #include <linux/ns_common.h>
 
 struct pid_namespace;
-struct nsproxy;
+struct nsset;
 struct path;
 struct task_struct;
 struct inode;
@@ -19,7 +19,7 @@ struct proc_ns_operations {
 	int type;
 	struct ns_common *(*get)(struct task_struct *task);
 	void (*put)(struct ns_common *ns);
-	int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
+	int (*install)(struct nsset *nsset, struct ns_common *ns);
 	struct user_namespace *(*owner)(struct ns_common *ns);
 	struct ns_common *(*get_parent)(struct ns_common *ns);
 } __randomize_layout;
diff --git a/include/linux/property.h b/include/linux/property.h
index d86de017c689..c7b5f3db36aa 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -440,6 +440,9 @@ software_node_find_by_name(const struct software_node *parent,
 int software_node_register_nodes(const struct software_node *nodes);
 void software_node_unregister_nodes(const struct software_node *nodes);
 
+int software_node_register_node_group(const struct software_node **node_group);
+void software_node_unregister_node_group(const struct software_node **node_group);
+
 int software_node_register(const struct software_node *node);
 
 int software_node_notify(struct device *dev, unsigned long action);
diff --git a/include/linux/psci.h b/include/linux/psci.h
index a67712b73b6c..14ad9b9ebcd6 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -21,11 +21,6 @@ bool psci_power_state_is_valid(u32 state);
 int psci_set_osi_mode(void);
 bool psci_has_osi_support(void);
 
-enum smccc_version {
-	SMCCC_VERSION_1_0,
-	SMCCC_VERSION_1_1,
-};
-
 struct psci_operations {
 	u32 (*get_version)(void);
 	int (*cpu_suspend)(u32 state, unsigned long entry_point);
@@ -35,8 +30,6 @@ struct psci_operations {
 	int (*affinity_info)(unsigned long target_affinity,
 			unsigned long lowest_affinity_level);
 	int (*migrate_info_type)(void);
-	enum arm_smccc_conduit conduit;
-	enum smccc_version smccc_version;
 };
 
 extern struct psci_operations psci_ops;
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 5167bf2bfc75..7fbc8679145c 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -100,6 +100,8 @@ struct sev_data_init {
 	u32 tmr_len;			/* In */
 } __packed;
 
+#define SEV_INIT_FLAGS_SEV_ES	0x01
+
 /**
  * struct sev_data_pek_csr - PEK_CSR command parameters
  *
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index e779441e6d26..eb93a54cff31 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -96,6 +96,12 @@ struct pstore_record {
  *
  * @read_mutex:	serializes @open, @read, @close, and @erase callbacks
  * @flags:	bitfield of frontends the backend can accept writes for
+ * @max_reason:	Used when PSTORE_FLAGS_DMESG is set. Contains the
+ *		kmsg_dump_reason enum value. KMSG_DUMP_UNDEF means
+ *		"use existing kmsg_dump() filtering, based on the
+ *		printk.always_kmsg_dump boot param" (which is either
+ *		KMSG_DUMP_OOPS when false, or KMSG_DUMP_MAX when
+ *		true); see printk.always_kmsg_dump for more details.
  * @data:	backend-private pointer passed back during callbacks
  *
  * Callbacks:
@@ -170,7 +176,7 @@ struct pstore_record {
  */
 struct pstore_info {
 	struct module	*owner;
-	char		*name;
+	const char	*name;
 
 	struct semaphore buf_lock;
 	char		*buf;
@@ -179,6 +185,7 @@ struct pstore_info {
 	struct mutex	read_mutex;
 
 	int		flags;
+	int		max_reason;
 	void		*data;
 
 	int		(*open)(struct pstore_info *psi);
diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h
new file mode 100644
index 000000000000..61e914522b01
--- /dev/null
+++ b/include/linux/pstore_blk.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PSTORE_BLK_H_
+#define __PSTORE_BLK_H_
+
+#include <linux/types.h>
+#include <linux/pstore.h>
+#include <linux/pstore_zone.h>
+
+/**
+ * typedef pstore_blk_panic_write_op - panic write operation to block device
+ *
+ * @buf: the data to write
+ * @start_sect: start sector to block device
+ * @sects: sectors count on buf
+ *
+ * Return: On success, zero should be returned. Others excluding -ENOMSG
+ * mean error. -ENOMSG means to try next zone.
+ *
+ * Panic write to block device must be aligned to SECTOR_SIZE.
+ */
+typedef int (*pstore_blk_panic_write_op)(const char *buf, sector_t start_sect,
+		sector_t sects);
+
+/**
+ * struct pstore_blk_info - pstore/blk registration details
+ *
+ * @major:	Which major device number to support with pstore/blk
+ * @flags:	The supported PSTORE_FLAGS_* from linux/pstore.h.
+ * @panic_write:The write operation only used for the panic case.
+ *		This can be NULL, but is recommended to avoid losing
+ *		crash data if the kernel's IO path or work queues are
+ *		broken during a panic.
+ * @devt:	The dev_t that pstore/blk has attached to.
+ * @nr_sects:	Number of sectors on @devt.
+ * @start_sect:	Starting sector on @devt.
+ */
+struct pstore_blk_info {
+	unsigned int major;
+	unsigned int flags;
+	pstore_blk_panic_write_op panic_write;
+
+	/* Filled in by pstore/blk after registration. */
+	dev_t devt;
+	sector_t nr_sects;
+	sector_t start_sect;
+};
+
+int  register_pstore_blk(struct pstore_blk_info *info);
+void unregister_pstore_blk(unsigned int major);
+
+/**
+ * struct pstore_device_info - back-end pstore/blk driver structure.
+ *
+ * @total_size: The total size in bytes pstore/blk can use. It must be greater
+ *		than 4096 and be multiple of 4096.
+ * @flags:	Refer to macro starting with PSTORE_FLAGS defined in
+ *		linux/pstore.h. It means what front-ends this device support.
+ *		Zero means all backends for compatible.
+ * @read:	The general read operation. Both of the function parameters
+ *		@size and @offset are relative value to bock device (not the
+ *		whole disk).
+ *		On success, the number of bytes should be returned, others
+ *		means error.
+ * @write:	The same as @read, but the following error number:
+ *		-EBUSY means try to write again later.
+ *		-ENOMSG means to try next zone.
+ * @erase:	The general erase operation for device with special removing
+ *		job. Both of the function parameters @size and @offset are
+ *		relative value to storage.
+ *		Return 0 on success and others on failure.
+ * @panic_write:The write operation only used for panic case. It's optional
+ *		if you do not care panic log. The parameters are relative
+ *		value to storage.
+ *		On success, the number of bytes should be returned, others
+ *		excluding -ENOMSG mean error. -ENOMSG means to try next zone.
+ */
+struct pstore_device_info {
+	unsigned long total_size;
+	unsigned int flags;
+	pstore_zone_read_op read;
+	pstore_zone_write_op write;
+	pstore_zone_erase_op erase;
+	pstore_zone_write_op panic_write;
+};
+
+int  register_pstore_device(struct pstore_device_info *dev);
+void unregister_pstore_device(struct pstore_device_info *dev);
+
+/**
+ * struct pstore_blk_config - the pstore_blk backend configuration
+ *
+ * @device:		Name of the desired block device
+ * @max_reason:		Maximum kmsg dump reason to store to block device
+ * @kmsg_size:		Total size of for kmsg dumps
+ * @pmsg_size:		Total size of the pmsg storage area
+ * @console_size:	Total size of the console storage area
+ * @ftrace_size:	Total size for ftrace logging data (for all CPUs)
+ */
+struct pstore_blk_config {
+	char device[80];
+	enum kmsg_dump_reason max_reason;
+	unsigned long kmsg_size;
+	unsigned long pmsg_size;
+	unsigned long console_size;
+	unsigned long ftrace_size;
+};
+
+/**
+ * pstore_blk_get_config - get a copy of the pstore_blk backend configuration
+ *
+ * @info:	The sturct pstore_blk_config to be filled in
+ *
+ * Failure returns negative error code, and success returns 0.
+ */
+int pstore_blk_get_config(struct pstore_blk_config *info);
+
+#endif
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 9cb9b9067298..9f16afec7290 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -133,7 +133,7 @@ struct ramoops_platform_data {
 	unsigned long	console_size;
 	unsigned long	ftrace_size;
 	unsigned long	pmsg_size;
-	int		dump_oops;
+	int		max_reason;
 	u32		flags;
 	struct persistent_ram_ecc_info ecc_info;
 };
diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h
new file mode 100644
index 000000000000..1e35eaa33e5e
--- /dev/null
+++ b/include/linux/pstore_zone.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PSTORE_ZONE_H_
+#define __PSTORE_ZONE_H_
+
+#include <linux/types.h>
+
+typedef ssize_t (*pstore_zone_read_op)(char *, size_t, loff_t);
+typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t);
+typedef ssize_t (*pstore_zone_erase_op)(size_t, loff_t);
+/**
+ * struct pstore_zone_info - pstore/zone back-end driver structure
+ *
+ * @owner:	Module which is responsible for this back-end driver.
+ * @name:	Name of the back-end driver.
+ * @total_size: The total size in bytes pstore/zone can use. It must be greater
+ *		than 4096 and be multiple of 4096.
+ * @kmsg_size:	The size of oops/panic zone. Zero means disabled, otherwise,
+ *		it must be multiple of SECTOR_SIZE(512 Bytes).
+ * @max_reason: Maximum kmsg dump reason to store.
+ * @pmsg_size:	The size of pmsg zone which is the same as @kmsg_size.
+ * @console_size:The size of console zone which is the same as @kmsg_size.
+ * @ftrace_size:The size of ftrace zone which is the same as @kmsg_size.
+ * @read:	The general read operation. Both of the function parameters
+ *		@size and @offset are relative value to storage.
+ *		On success, the number of bytes should be returned, others
+ *		mean error.
+ * @write:	The same as @read, but the following error number:
+ *		-EBUSY means try to write again later.
+ *		-ENOMSG means to try next zone.
+ * @erase:	The general erase operation for device with special removing
+ *		job. Both of the function parameters @size and @offset are
+ *		relative value to storage.
+ *		Return 0 on success and others on failure.
+ * @panic_write:The write operation only used for panic case. It's optional
+ *		if you do not care panic log. The parameters are relative
+ *		value to storage.
+ *		On success, the number of bytes should be returned, others
+ *		excluding -ENOMSG mean error. -ENOMSG means to try next zone.
+ */
+struct pstore_zone_info {
+	struct module *owner;
+	const char *name;
+
+	unsigned long total_size;
+	unsigned long kmsg_size;
+	int max_reason;
+	unsigned long pmsg_size;
+	unsigned long console_size;
+	unsigned long ftrace_size;
+	pstore_zone_read_op read;
+	pstore_zone_write_op write;
+	pstore_zone_erase_op erase;
+	pstore_zone_write_op panic_write;
+};
+
+extern int register_pstore_zone(struct pstore_zone_info *info);
+extern void unregister_pstore_zone(struct pstore_zone_info *info);
+
+#endif
diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h
index a67065c403c3..2a3a95586425 100644
--- a/include/linux/ptdump.h
+++ b/include/linux/ptdump.h
@@ -13,7 +13,8 @@ struct ptdump_range {
 struct ptdump_state {
 	/* level is 0:PGD to 4:PTE, or -1 if unknown */
 	void (*note_page)(struct ptdump_state *st, unsigned long addr,
-			  int level, unsigned long val);
+			  int level, u64 val);
+	void (*effective_prot)(struct ptdump_state *st, int level, u64 val);
 	const struct ptdump_range *range;
 };
 
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 63e62372443a..c2a9f7c90727 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -16,11 +16,20 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/xarray.h>
+#include <linux/local_lock.h>
 
 /* Keep unconverted code working */
 #define radix_tree_root		xarray
 #define radix_tree_node		xa_node
 
+struct radix_tree_preload {
+	local_lock_t lock;
+	unsigned nr;
+	/* nodes->parent points to next preallocated node */
+	struct radix_tree_node *nodes;
+};
+DECLARE_PER_CPU(struct radix_tree_preload, radix_tree_preloads);
+
 /*
  * The bottom two bits of the slot determine how the remaining bits in the
  * slot are interpreted:
@@ -245,7 +254,7 @@ int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag);
 
 static inline void radix_tree_preload_end(void)
 {
-	preempt_enable();
+	local_unlock(&radix_tree_preloads.lock);
 }
 
 void __rcu **idr_get_free(struct radix_tree_root *root,
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 1fd61a9af45c..d7db17996322 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -11,7 +11,7 @@
   I know it's not the cleaner way,  but in C (not in C++) to get
   performances and genericity...
 
-  See Documentation/rbtree.txt for documentation and samples.
+  See Documentation/core-api/rbtree.rst for documentation and samples.
 */
 
 #ifndef	_LINUX_RBTREE_H
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 724b0d036b57..d1c53e9d8c75 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -21,7 +21,7 @@
  * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
  * The rest are implementation details you are not expected to depend on.
  *
- * See Documentation/rbtree.txt for documentation and samples.
+ * See Documentation/core-api/rbtree.rst for documentation and samples.
  */
 
 struct rb_augment_callbacks {
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8214cdc715f2..7375bb3da140 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -371,7 +371,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the list_head within the struct.
- * @cond...:	optional lockdep expression if called from non-RCU protection.
+ * @cond:	optional lockdep expression if called from non-RCU protection.
  *
  * This list-traversal primitive may safely run concurrently with
  * the _rcu list-mutation primitives such as list_add_rcu()
@@ -646,7 +646,7 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the hlist_node within the struct.
- * @cond...:	optional lockdep expression if called from non-RCU protection.
+ * @cond:	optional lockdep expression if called from non-RCU protection.
  *
  * This list-traversal primitive may safely run concurrently with
  * the _rcu list-mutation primitives such as hlist_add_head_rcu()
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2678a37c3169..659cbfa7581a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -37,6 +37,7 @@
 /* Exported common interfaces */
 void call_rcu(struct rcu_head *head, rcu_callback_t func);
 void rcu_barrier_tasks(void);
+void rcu_barrier_tasks_rude(void);
 void synchronize_rcu(void);
 
 #ifdef CONFIG_PREEMPT_RCU
@@ -129,25 +130,57 @@ static inline void rcu_init_nohz(void) { }
  * Note a quasi-voluntary context switch for RCU-tasks's benefit.
  * This is a macro rather than an inline function to avoid #include hell.
  */
-#ifdef CONFIG_TASKS_RCU
-#define rcu_tasks_qs(t) \
-	do { \
-		if (READ_ONCE((t)->rcu_tasks_holdout)) \
-			WRITE_ONCE((t)->rcu_tasks_holdout, false); \
+#ifdef CONFIG_TASKS_RCU_GENERIC
+
+# ifdef CONFIG_TASKS_RCU
+# define rcu_tasks_classic_qs(t, preempt)				\
+	do {								\
+		if (!(preempt) && READ_ONCE((t)->rcu_tasks_holdout))	\
+			WRITE_ONCE((t)->rcu_tasks_holdout, false);	\
 	} while (0)
-#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t)
 void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
+# else
+# define rcu_tasks_classic_qs(t, preempt) do { } while (0)
+# define call_rcu_tasks call_rcu
+# define synchronize_rcu_tasks synchronize_rcu
+# endif
+
+# ifdef CONFIG_TASKS_RCU_TRACE
+# define rcu_tasks_trace_qs(t)						\
+	do {								\
+		if (!likely(READ_ONCE((t)->trc_reader_checked)) &&	\
+		    !unlikely(READ_ONCE((t)->trc_reader_nesting))) {	\
+			smp_store_release(&(t)->trc_reader_checked, true); \
+			smp_mb(); /* Readers partitioned by store. */	\
+		}							\
+	} while (0)
+# else
+# define rcu_tasks_trace_qs(t) do { } while (0)
+# endif
+
+#define rcu_tasks_qs(t, preempt)					\
+do {									\
+	rcu_tasks_classic_qs((t), (preempt));				\
+	rcu_tasks_trace_qs((t));					\
+} while (0)
+
+# ifdef CONFIG_TASKS_RUDE_RCU
+void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func);
+void synchronize_rcu_tasks_rude(void);
+# endif
+
+#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
 void exit_tasks_rcu_start(void);
 void exit_tasks_rcu_finish(void);
-#else /* #ifdef CONFIG_TASKS_RCU */
-#define rcu_tasks_qs(t)	do { } while (0)
+#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
+#define rcu_tasks_qs(t, preempt) do { } while (0)
 #define rcu_note_voluntary_context_switch(t) do { } while (0)
 #define call_rcu_tasks call_rcu
 #define synchronize_rcu_tasks synchronize_rcu
 static inline void exit_tasks_rcu_start(void) { }
 static inline void exit_tasks_rcu_finish(void) { }
-#endif /* #else #ifdef CONFIG_TASKS_RCU */
+#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
 
 /**
  * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
@@ -158,7 +191,7 @@ static inline void exit_tasks_rcu_finish(void) { }
  */
 #define cond_resched_tasks_rcu_qs() \
 do { \
-	rcu_tasks_qs(current); \
+	rcu_tasks_qs(current, false); \
 	cond_resched(); \
 } while (0)
 
diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
new file mode 100644
index 000000000000..4c25a41f8b27
--- /dev/null
+++ b/include/linux/rcupdate_trace.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Read-Copy Update mechanism for mutual exclusion, adapted for tracing.
+ *
+ * Copyright (C) 2020 Paul E. McKenney.
+ */
+
+#ifndef __LINUX_RCUPDATE_TRACE_H
+#define __LINUX_RCUPDATE_TRACE_H
+
+#include <linux/sched.h>
+#include <linux/rcupdate.h>
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+extern struct lockdep_map rcu_trace_lock_map;
+
+static inline int rcu_read_lock_trace_held(void)
+{
+	return lock_is_held(&rcu_trace_lock_map);
+}
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+static inline int rcu_read_lock_trace_held(void)
+{
+	return 1;
+}
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
+void rcu_read_unlock_trace_special(struct task_struct *t, int nesting);
+
+/**
+ * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
+ *
+ * When synchronize_rcu_trace() is invoked by one task, then that task
+ * is guaranteed to block until all other tasks exit their read-side
+ * critical sections.  Similarly, if call_rcu_trace() is invoked on one
+ * task while other tasks are within RCU read-side critical sections,
+ * invocation of the corresponding RCU callback is deferred until after
+ * the all the other tasks exit their critical sections.
+ *
+ * For more details, please see the documentation for rcu_read_lock().
+ */
+static inline void rcu_read_lock_trace(void)
+{
+	struct task_struct *t = current;
+
+	WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
+	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
+	    t->trc_reader_special.b.need_mb)
+		smp_mb(); // Pairs with update-side barriers
+	rcu_lock_acquire(&rcu_trace_lock_map);
+}
+
+/**
+ * rcu_read_unlock_trace - mark end of RCU-trace read-side critical section
+ *
+ * Pairs with a preceding call to rcu_read_lock_trace(), and nesting is
+ * allowed.  Invoking a rcu_read_unlock_trace() when there is no matching
+ * rcu_read_lock_trace() is verboten, and will result in lockdep complaints.
+ *
+ * For more details, please see the documentation for rcu_read_unlock().
+ */
+static inline void rcu_read_unlock_trace(void)
+{
+	int nesting;
+	struct task_struct *t = current;
+
+	rcu_lock_release(&rcu_trace_lock_map);
+	nesting = READ_ONCE(t->trc_reader_nesting) - 1;
+	if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) {
+		WRITE_ONCE(t->trc_reader_nesting, nesting);
+		return;  // We assume shallow reader nesting.
+	}
+	rcu_read_unlock_trace_special(t, nesting);
+}
+
+void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
+void synchronize_rcu_tasks_trace(void);
+void rcu_barrier_tasks_trace(void);
+
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
+
+#endif /* __LINUX_RCUPDATE_TRACE_H */
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index c0578ba23c1a..699b938358bf 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -31,4 +31,23 @@ do {									\
 
 #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
 
+/**
+ * synchronize_rcu_mult - Wait concurrently for multiple grace periods
+ * @...: List of call_rcu() functions for different grace periods to wait on
+ *
+ * This macro waits concurrently for multiple types of RCU grace periods.
+ * For example, synchronize_rcu_mult(call_rcu, call_rcu_tasks) would wait
+ * on concurrent RCU and RCU-tasks grace periods.  Waiting on a given SRCU
+ * domain requires you to write a wrapper function for that SRCU domain's
+ * call_srcu() function, with this wrapper supplying the pointer to the
+ * corresponding srcu_struct.
+ *
+ * The first argument tells Tiny RCU's _wait_rcu_gp() not to
+ * bother waiting for RCU.  The reason for this is because anywhere
+ * synchronize_rcu_mult() can be called is automatically already a full
+ * grace period.
+ */
+#define synchronize_rcu_mult(...) \
+	_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
+
 #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 045c28b71f4f..8512caeb7682 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -49,7 +49,7 @@ static inline void rcu_softirq_qs(void)
 #define rcu_note_context_switch(preempt) \
 	do { \
 		rcu_qs(); \
-		rcu_tasks_qs(current); \
+		rcu_tasks_qs(current, (preempt)); \
 	} while (0)
 
 static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
@@ -71,6 +71,8 @@ static inline void rcu_irq_enter(void) { }
 static inline void rcu_irq_exit_irqson(void) { }
 static inline void rcu_irq_enter_irqson(void) { }
 static inline void rcu_irq_exit(void) { }
+static inline void rcu_irq_exit_preempt(void) { }
+static inline void rcu_irq_exit_check_preempt(void) { }
 static inline void exit_rcu(void) { }
 static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 {
@@ -85,8 +87,10 @@ static inline void rcu_scheduler_starting(void) { }
 static inline void rcu_end_inkernel_boot(void) { }
 static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
 static inline bool rcu_is_watching(void) { return true; }
+static inline bool __rcu_is_watching(void) { return true; }
 static inline void rcu_momentary_dyntick_idle(void) { }
 static inline void kfree_rcu_scheduler_running(void) { }
+static inline bool rcu_gp_might_be_stalled(void) { return false; }
 
 /* Avoid RCU read-side critical sections leaking across. */
 static inline void rcu_all_qs(void) { barrier(); }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 45f3f66bb04d..d5cc9d675987 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -39,6 +39,7 @@ void rcu_barrier(void);
 bool rcu_eqs_special_set(int cpu);
 void rcu_momentary_dyntick_idle(void);
 void kfree_rcu_scheduler_running(void);
+bool rcu_gp_might_be_stalled(void);
 unsigned long get_state_synchronize_rcu(void);
 void cond_synchronize_rcu(unsigned long oldstate);
 
@@ -46,9 +47,16 @@ void rcu_idle_enter(void);
 void rcu_idle_exit(void);
 void rcu_irq_enter(void);
 void rcu_irq_exit(void);
+void rcu_irq_exit_preempt(void);
 void rcu_irq_enter_irqson(void);
 void rcu_irq_exit_irqson(void);
 
+#ifdef CONFIG_PROVE_RCU
+void rcu_irq_exit_check_preempt(void);
+#else
+static inline void rcu_irq_exit_check_preempt(void) { }
+#endif
+
 void exit_rcu(void);
 
 void rcu_scheduler_starting(void);
@@ -56,6 +64,7 @@ extern int rcu_scheduler_active __read_mostly;
 void rcu_end_inkernel_boot(void);
 bool rcu_inkernel_boot_has_ended(void);
 bool rcu_is_watching(void);
+bool __rcu_is_watching(void);
 #ifndef CONFIG_PREEMPTION
 void rcu_all_qs(void);
 #endif
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 2ffe1ee6d482..61c56cca95c4 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -25,16 +25,38 @@ static inline void rcuwait_init(struct rcuwait *w)
 	w->task = NULL;
 }
 
-extern void rcuwait_wake_up(struct rcuwait *w);
+/*
+ * Note: this provides no serialization and, just as with waitqueues,
+ * requires care to estimate as to whether or not the wait is active.
+ */
+static inline int rcuwait_active(struct rcuwait *w)
+{
+	return !!rcu_access_pointer(w->task);
+}
+
+extern int rcuwait_wake_up(struct rcuwait *w);
 
 /*
  * The caller is responsible for locking around rcuwait_wait_event(),
- * such that writes to @task are properly serialized.
+ * and [prepare_to/finish]_rcuwait() such that writes to @task are
+ * properly serialized.
  */
+
+static inline void prepare_to_rcuwait(struct rcuwait *w)
+{
+	rcu_assign_pointer(w->task, current);
+}
+
+static inline void finish_rcuwait(struct rcuwait *w)
+{
+        rcu_assign_pointer(w->task, NULL);
+	__set_current_state(TASK_RUNNING);
+}
+
 #define rcuwait_wait_event(w, condition, state)				\
 ({									\
 	int __ret = 0;							\
-	rcu_assign_pointer((w)->task, current);				\
+	prepare_to_rcuwait(w);						\
 	for (;;) {							\
 		/*							\
 		 * Implicit barrier (A) pairs with (B) in		\
@@ -51,9 +73,7 @@ extern void rcuwait_wake_up(struct rcuwait *w);
 									\
 		schedule();						\
 	}								\
-									\
-	WRITE_ONCE((w)->task, NULL);					\
-	__set_current_state(TASK_RUNNING);				\
+	finish_rcuwait(w);						\
 	__ret;								\
 })
 
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index ddf0baff195d..cb666b9c6b6a 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -17,10 +17,12 @@
 #include <linux/err.h>
 #include <linux/bug.h>
 #include <linux/lockdep.h>
+#include <linux/iopoll.h>
 
 struct module;
 struct clk;
 struct device;
+struct device_node;
 struct i2c_client;
 struct i3c_device;
 struct irq_domain;
@@ -71,6 +73,13 @@ struct reg_sequence {
 	unsigned int delay_us;
 };
 
+#define REG_SEQ(_reg, _def, _delay_us) {		\
+				.reg = _reg,		\
+				.def = _def,		\
+				.delay_us = _delay_us,	\
+				}
+#define REG_SEQ0(_reg, _def)	REG_SEQ(_reg, _def, 0)
+
 #define	regmap_update_bits(map, reg, mask, val) \
 	regmap_update_bits_base(map, reg, mask, val, NULL, false, false)
 #define	regmap_update_bits_async(map, reg, mask, val)\
@@ -122,26 +131,10 @@ struct reg_sequence {
  */
 #define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \
 ({ \
-	u64 __timeout_us = (timeout_us); \
-	unsigned long __sleep_us = (sleep_us); \
-	ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
-	int __ret; \
-	might_sleep_if(__sleep_us); \
-	for (;;) { \
-		__ret = regmap_read((map), (addr), &(val)); \
-		if (__ret) \
-			break; \
-		if (cond) \
-			break; \
-		if ((__timeout_us) && \
-		    ktime_compare(ktime_get(), __timeout) > 0) { \
-			__ret = regmap_read((map), (addr), &(val)); \
-			break; \
-		} \
-		if (__sleep_us) \
-			usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
-	} \
-	__ret ?: ((cond) ? 0 : -ETIMEDOUT); \
+	int __ret, __tmp; \
+	__tmp = read_poll_timeout(regmap_read, __ret, __ret || (cond), \
+			sleep_us, timeout_us, false, (map), (addr), &(val)); \
+	__ret ?: __tmp; \
 })
 
 /**
@@ -209,25 +202,10 @@ struct reg_sequence {
  */
 #define regmap_field_read_poll_timeout(field, val, cond, sleep_us, timeout_us) \
 ({ \
-	u64 __timeout_us = (timeout_us); \
-	unsigned long __sleep_us = (sleep_us); \
-	ktime_t timeout = ktime_add_us(ktime_get(), __timeout_us); \
-	int pollret; \
-	might_sleep_if(__sleep_us); \
-	for (;;) { \
-		pollret = regmap_field_read((field), &(val)); \
-		if (pollret) \
-			break; \
-		if (cond) \
-			break; \
-		if (__timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
-			pollret = regmap_field_read((field), &(val)); \
-			break; \
-		} \
-		if (__sleep_us) \
-			usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
-	} \
-	pollret ?: ((cond) ? 0 : -ETIMEDOUT); \
+	int __ret, __tmp; \
+	__tmp = read_poll_timeout(regmap_field_read, __ret, __ret || (cond), \
+			sleep_us, timeout_us, false, (field), &(val)); \
+	__ret ?: __tmp; \
 })
 
 #ifdef CONFIG_REGMAP
@@ -1149,6 +1127,14 @@ struct reg_field {
 				.msb = _msb,	\
 				}
 
+#define REG_FIELD_ID(_reg, _lsb, _msb, _size, _offset) {	\
+				.reg = _reg,			\
+				.lsb = _lsb,			\
+				.msb = _msb,			\
+				.id_size = _size,		\
+				.id_offset = _offset,		\
+				}
+
 struct regmap_field *regmap_field_alloc(struct regmap *regmap,
 		struct reg_field reg_field);
 void regmap_field_free(struct regmap_field *field);
@@ -1325,12 +1311,21 @@ struct regmap_irq_chip_data;
 int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 			int irq_base, const struct regmap_irq_chip *chip,
 			struct regmap_irq_chip_data **data);
+int regmap_add_irq_chip_np(struct device_node *np, struct regmap *map, int irq,
+			   int irq_flags, int irq_base,
+			   const struct regmap_irq_chip *chip,
+			   struct regmap_irq_chip_data **data);
 void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *data);
 
 int devm_regmap_add_irq_chip(struct device *dev, struct regmap *map, int irq,
 			     int irq_flags, int irq_base,
 			     const struct regmap_irq_chip *chip,
 			     struct regmap_irq_chip_data **data);
+int devm_regmap_add_irq_chip_np(struct device *dev, struct device_node *np,
+				struct regmap *map, int irq, int irq_flags,
+				int irq_base,
+				const struct regmap_irq_chip *chip,
+				struct regmap_irq_chip_data **data);
 void devm_regmap_del_irq_chip(struct device *dev, int irq,
 			      struct regmap_irq_chip_data *data);
 
diff --git a/include/linux/regulator/coupler.h b/include/linux/regulator/coupler.h
index 0212d6255e4e..5f86824bd117 100644
--- a/include/linux/regulator/coupler.h
+++ b/include/linux/regulator/coupler.h
@@ -62,6 +62,8 @@ int regulator_get_voltage_rdev(struct regulator_dev *rdev);
 int regulator_set_voltage_rdev(struct regulator_dev *rdev,
 			       int min_uV, int max_uV,
 			       suspend_state_t state);
+int regulator_do_balance_voltage(struct regulator_dev *rdev,
+				 suspend_state_t state, bool skip_coupled);
 #else
 static inline int regulator_coupler_register(struct regulator_coupler *coupler)
 {
@@ -92,6 +94,12 @@ static inline int regulator_set_voltage_rdev(struct regulator_dev *rdev,
 {
 	return -EINVAL;
 }
+static inline int regulator_do_balance_voltage(struct regulator_dev *rdev,
+					       suspend_state_t state,
+					       bool skip_coupled)
+{
+	return -EINVAL;
+}
 #endif
 
 #endif
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 29d920516e0b..7eb9fea8e482 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -13,6 +13,7 @@
 #define __LINUX_REGULATOR_DRIVER_H_
 
 #include <linux/device.h>
+#include <linux/linear_range.h>
 #include <linux/notifier.h>
 #include <linux/regulator/consumer.h>
 #include <linux/ww_mutex.h>
@@ -39,31 +40,13 @@ enum regulator_status {
 	REGULATOR_STATUS_UNDEFINED,
 };
 
-/**
- * struct regulator_linear_range - specify linear voltage ranges
- *
- * Specify a range of voltages for regulator_map_linear_range() and
- * regulator_list_linear_range().
- *
- * @min_uV:  Lowest voltage in range
- * @min_sel: Lowest selector for range
- * @max_sel: Highest selector for range
- * @uV_step: Step size
- */
-struct regulator_linear_range {
-	unsigned int min_uV;
-	unsigned int min_sel;
-	unsigned int max_sel;
-	unsigned int uV_step;
-};
-
-/* Initialize struct regulator_linear_range */
+/* Initialize struct linear_range for regulators */
 #define REGULATOR_LINEAR_RANGE(_min_uV, _min_sel, _max_sel, _step_uV)	\
 {									\
-	.min_uV		= _min_uV,					\
+	.min		= _min_uV,					\
 	.min_sel	= _min_sel,					\
 	.max_sel	= _max_sel,					\
-	.uV_step	= _step_uV,					\
+	.step		= _step_uV,					\
 }
 
 /**
@@ -348,7 +331,7 @@ struct regulator_desc {
 	unsigned int ramp_delay;
 	int min_dropout_uV;
 
-	const struct regulator_linear_range *linear_ranges;
+	const struct linear_range *linear_ranges;
 	const unsigned int *linear_range_selectors;
 
 	int n_linear_ranges;
diff --git a/include/linux/relay.h b/include/linux/relay.h
index c759f96e39c1..e13a333e7c37 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -141,7 +141,7 @@ struct rchan_callbacks
 	 * cause relay_open() to create a single global buffer rather
 	 * than the default set of per-cpu buffers.
 	 *
-	 * See Documentation/filesystems/relay.txt for more info.
+	 * See Documentation/filesystems/relay.rst for more info.
 	 */
 	struct dentry *(*create_buf_file)(const char *filename,
 					  struct dentry *parent,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4418f5cb8324..57a5ce9f33c5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -613,7 +613,7 @@ union rcu_special {
 		u8			blocked;
 		u8			need_qs;
 		u8			exp_hint; /* Hint for performance. */
-		u8			deferred_qs;
+		u8			need_mb; /* Readers need smp_mb(). */
 	} b; /* Bits. */
 	u32 s; /* Set of bits. */
 };
@@ -654,6 +654,7 @@ struct task_struct {
 
 #ifdef CONFIG_SMP
 	struct llist_node		wake_entry;
+	unsigned int			wake_entry_type;
 	int				on_cpu;
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/* Current CPU: */
@@ -724,6 +725,14 @@ struct task_struct {
 	struct list_head		rcu_tasks_holdout_list;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
+#ifdef CONFIG_TASKS_TRACE_RCU
+	int				trc_reader_nesting;
+	int				trc_ipi_to_cpu;
+	union rcu_special		trc_reader_special;
+	bool				trc_reader_checked;
+	struct list_head		trc_holdout_list;
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
+
 	struct sched_info		sched_info;
 
 	struct list_head		tasks;
@@ -983,6 +992,7 @@ struct task_struct {
 	unsigned int			hardirq_disable_event;
 	int				hardirqs_enabled;
 	int				hardirq_context;
+	u64				hardirq_chain_key;
 	unsigned long			softirq_disable_ip;
 	unsigned long			softirq_enable_ip;
 	unsigned int			softirq_disable_event;
@@ -1289,6 +1299,12 @@ struct task_struct {
 	unsigned long			prev_lowest_stack;
 #endif
 
+#ifdef CONFIG_X86_MCE
+	u64				mce_addr;
+	u64				mce_status;
+	struct callback_head		mce_kill_me;
+#endif
+
 	/*
 	 * New fields for task_struct should be added above here, so that
 	 * they are included in the randomized portion of task_struct.
@@ -1481,7 +1497,8 @@ extern struct pid *cad_pid;
 #define PF_KSWAPD		0x00020000	/* I am kswapd */
 #define PF_MEMALLOC_NOFS	0x00040000	/* All allocation requests will inherit GFP_NOFS */
 #define PF_MEMALLOC_NOIO	0x00080000	/* All allocation requests will inherit GFP_NOIO */
-#define PF_LESS_THROTTLE	0x00100000	/* Throttle me less: I clean memory */
+#define PF_LOCAL_THROTTLE	0x00100000	/* Throttle writes only against the bdi I write to,
+						 * I am cleaning dirty pages from some other bdi. */
 #define PF_KTHREAD		0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
 #define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
@@ -1715,7 +1732,15 @@ extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
 })
 
 #ifdef CONFIG_SMP
-void scheduler_ipi(void);
+static __always_inline void scheduler_ipi(void)
+{
+	/*
+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
+	 * this IPI.
+	 */
+	preempt_fold_need_resched();
+}
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
 static inline void scheduler_ipi(void) { }
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index c49257a3b510..a132d875d351 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,8 @@ static inline void mmdrop(struct mm_struct *mm)
 		__mmdrop(mm);
 }
 
+void mmdrop(struct mm_struct *mm);
+
 /*
  * This has to be called after a get_task_mm()/mmget_not_zero()
  * followed by taking the mmap_sem for writing before modifying the
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 95253ad792b0..fb11091129b3 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -11,21 +11,20 @@
  */
 #ifdef CONFIG_SMP
 
-#define SD_LOAD_BALANCE		0x0001	/* Do load balancing on this domain. */
-#define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
-#define SD_BALANCE_EXEC		0x0004	/* Balance on exec */
-#define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
-#define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
-#define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_ASYM_CPUCAPACITY	0x0040  /* Domain members have different CPU capacities */
-#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share CPU capacity */
-#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
-#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share CPU pkg resources */
-#define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
-#define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
-#define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
-#define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
-#define SD_NUMA			0x4000	/* cross-node balancing */
+#define SD_BALANCE_NEWIDLE	0x0001	/* Balance when about to become idle */
+#define SD_BALANCE_EXEC		0x0002	/* Balance on exec */
+#define SD_BALANCE_FORK		0x0004	/* Balance on fork, clone */
+#define SD_BALANCE_WAKE		0x0008  /* Balance on wakeup */
+#define SD_WAKE_AFFINE		0x0010	/* Wake task to waking CPU */
+#define SD_ASYM_CPUCAPACITY	0x0020  /* Domain members have different CPU capacities */
+#define SD_SHARE_CPUCAPACITY	0x0040	/* Domain members share CPU capacity */
+#define SD_SHARE_POWERDOMAIN	0x0080	/* Domain members share power domain */
+#define SD_SHARE_PKG_RESOURCES	0x0100	/* Domain members share CPU pkg resources */
+#define SD_SERIALIZE		0x0200	/* Only a single load balancing instance */
+#define SD_ASYM_PACKING		0x0400  /* Place busy groups earlier in the domain */
+#define SD_PREFER_SIBLING	0x0800	/* Prefer to place tasks in a sibling domain */
+#define SD_OVERLAP		0x1000	/* sched_domains of this level overlap */
+#define SD_NUMA			0x2000	/* cross-node balancing */
 
 #ifdef CONFIG_SCHED_SMT
 static inline int cpu_smt_flags(void)
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..6dec390cf154
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/poison.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE		SZ_1K
+#define GFP_SCS			(GFP_KERNEL | __GFP_ZERO)
+
+/* An illegal pointer value to mark the end of the shadow stack. */
+#define SCS_END_MAGIC		(0x5f6UL + POISON_POINTER_DELTA)
+
+/* Allocate a static per-CPU shadow stack */
+#define DEFINE_SCS(name)						\
+	DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name)	\
+
+#define task_scs(tsk)		(task_thread_info(tsk)->scs_base)
+#define task_scs_sp(tsk)	(task_thread_info(tsk)->scs_sp)
+
+void scs_init(void);
+int scs_prepare(struct task_struct *tsk, int node);
+void scs_release(struct task_struct *tsk);
+
+static inline void scs_task_reset(struct task_struct *tsk)
+{
+	/*
+	 * Reset the shadow stack to the base address in case the task
+	 * is reused.
+	 */
+	task_scs_sp(tsk) = task_scs(tsk);
+}
+
+static inline unsigned long *__scs_magic(void *s)
+{
+	return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline bool task_scs_end_corrupted(struct task_struct *tsk)
+{
+	unsigned long *magic = __scs_magic(task_scs(tsk));
+	unsigned long sz = task_scs_sp(tsk) - task_scs(tsk);
+
+	return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline void scs_release(struct task_struct *tsk) {}
+static inline bool task_scs_end_corrupted(struct task_struct *tsk) { return false; }
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 05bacd2ab135..6bb1a3f0258c 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -24,6 +24,14 @@ static inline void clear_siginfo(kernel_siginfo_t *info)
 
 #define SI_EXPANSION_SIZE (sizeof(struct siginfo) - sizeof(struct kernel_siginfo))
 
+static inline void copy_siginfo_to_external(siginfo_t *to,
+					    const kernel_siginfo_t *from)
+{
+	memcpy(to, from, sizeof(*from));
+	memset(((char *)to) + sizeof(struct kernel_siginfo), 0,
+		SI_EXPANSION_SIZE);
+}
+
 int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from);
 int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from);
 
diff --git a/include/linux/smp.h b/include/linux/smp.h
index cbc9162689d0..7ee202ad21a6 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -16,17 +16,39 @@
 
 typedef void (*smp_call_func_t)(void *info);
 typedef bool (*smp_cond_func_t)(int cpu, void *info);
+
+enum {
+	CSD_FLAG_LOCK		= 0x01,
+
+	/* IRQ_WORK_flags */
+
+	CSD_TYPE_ASYNC		= 0x00,
+	CSD_TYPE_SYNC		= 0x10,
+	CSD_TYPE_IRQ_WORK	= 0x20,
+	CSD_TYPE_TTWU		= 0x30,
+	CSD_FLAG_TYPE_MASK	= 0xF0,
+};
+
+/*
+ * structure shares (partial) layout with struct irq_work
+ */
 struct __call_single_data {
 	struct llist_node llist;
+	unsigned int flags;
 	smp_call_func_t func;
 	void *info;
-	unsigned int flags;
 };
 
 /* Use __aligned() to avoid to use 2 cache lines for 1 csd */
 typedef struct __call_single_data call_single_data_t
 	__aligned(sizeof(struct __call_single_data));
 
+/*
+ * Enqueue a llist_node on the call_single_queue; be very careful, read
+ * flush_smp_call_function_queue() in detail.
+ */
+extern void __smp_call_single_queue(int cpu, struct llist_node *node);
+
 /* total number of cpus in this system (may exceed NR_CPUS) */
 extern unsigned int total_cpus;
 
@@ -227,8 +249,8 @@ static inline int get_boot_cpu_id(void)
  */
 extern void arch_disable_smp_support(void);
 
-extern void arch_enable_nonboot_cpus_begin(void);
-extern void arch_enable_nonboot_cpus_end(void);
+extern void arch_thaw_secondary_cpus_begin(void);
+extern void arch_thaw_secondary_cpus_end(void);
 
 void smp_setup_processor_id(void);
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 38286de779e3..aac57b5b7c21 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -394,6 +394,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *                   for example doing DMA mapping.  Called from threaded
  *                   context.
  * @transfer_one: transfer a single spi_transfer.
+ *
  *                  - return 0 if the transfer is finished,
  *                  - return 1 if the transfer is still in progress. When
  *                    the driver is finished with this transfer it must
diff --git a/include/linux/splice.h b/include/linux/splice.h
index ebbbfea48aa0..5c47013f708e 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -82,6 +82,9 @@ extern long do_splice(struct file *in, loff_t __user *off_in,
 		      struct file *out, loff_t __user *off_out,
 		      size_t len, unsigned int flags);
 
+extern long do_tee(struct file *in, struct file *out, size_t len,
+		   unsigned int flags);
+
 /*
  * for dynamic pipe sizing
  */
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 528c4baad091..56614af83d4a 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -47,6 +47,7 @@ struct kstat {
 	struct timespec64 ctime;
 	struct timespec64 btime;			/* File creation time */
 	u64		blocks;
+	u64		mnt_id;
 };
 
 #endif
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 4fcc6fd0cbd6..b960098acfb0 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -466,6 +466,12 @@ static inline bool system_entering_hibernation(void) { return false; }
 static inline bool hibernation_available(void) { return false; }
 #endif /* CONFIG_HIBERNATION */
 
+#ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV
+int is_hibernate_resume_dev(const struct inode *);
+#else
+static inline int is_hibernate_resume_dev(const struct inode *i) { return 0; }
+#endif
+
 /* Hibernation and suspend events */
 #define PM_HIBERNATION_PREPARE	0x0001 /* Going to hibernate */
 #define PM_POST_HIBERNATION	0x0002 /* Hibernation finished */
diff --git a/include/linux/swait.h b/include/linux/swait.h
index 73e06e9986d4..6a8c22b8c2a5 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -9,23 +9,10 @@
 #include <asm/current.h>
 
 /*
- * BROKEN wait-queues.
- *
- * These "simple" wait-queues are broken garbage, and should never be
- * used. The comments below claim that they are "similar" to regular
- * wait-queues, but the semantics are actually completely different, and
- * every single user we have ever had has been buggy (or pointless).
- *
- * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what
- * "wake_up()" does, and has led to problems. In other cases, it has
- * been fine, because there's only ever one waiter (kvm), but in that
- * case gthe whole "simple" wait-queue is just pointless to begin with,
- * since there is no "queue". Use "wake_up_process()" with a direct
- * pointer instead.
- *
- * While these are very similar to regular wait queues (wait.h) the most
- * important difference is that the simple waitqueue allows for deterministic
- * behaviour -- IOW it has strictly bounded IRQ and lock hold times.
+ * Simple waitqueues are semantically very different to regular wait queues
+ * (wait.h). The most important difference is that the simple waitqueue allows
+ * for deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
+ * times.
  *
  * Mainly, this is accomplished by two things. Firstly not allowing swake_up_all
  * from IRQ disabled, and dropping the lock upon every wakeup, giving a higher
@@ -39,7 +26,7 @@
  *    sleeper state.
  *
  *  - the !exclusive mode; because that leads to O(n) wakeups, everything is
- *    exclusive.
+ *    exclusive. As such swake_up_one will only ever awake _one_ waiter.
  *
  *  - custom wake callback functions; because you cannot give any guarantees
  *    about random code. This also allows swait to be used in RT, such that
diff --git a/include/linux/swap.h b/include/linux/swap.h
index e1bbf7a16b27..e92176fc8824 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -183,12 +183,17 @@ enum {
 #define SWAP_CLUSTER_MAX 32UL
 #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
 
-#define SWAP_MAP_MAX	0x3e	/* Max duplication count, in first swap_map */
-#define SWAP_MAP_BAD	0x3f	/* Note pageblock is bad, in first swap_map */
+/* Bit flag in swap_map */
 #define SWAP_HAS_CACHE	0x40	/* Flag page is cached, in first swap_map */
-#define SWAP_CONT_MAX	0x7f	/* Max count, in each swap_map continuation */
-#define COUNT_CONTINUED	0x80	/* See swap_map continuation for full count */
-#define SWAP_MAP_SHMEM	0xbf	/* Owned by shmem/tmpfs, in first swap_map */
+#define COUNT_CONTINUED	0x80	/* Flag swap_map continuation for full count */
+
+/* Special value in first swap_map */
+#define SWAP_MAP_MAX	0x3e	/* Max count */
+#define SWAP_MAP_BAD	0x3f	/* Note page is bad */
+#define SWAP_MAP_SHMEM	0xbf	/* Owned by shmem/tmpfs */
+
+/* Special value in each swap_map continuation */
+#define SWAP_CONT_MAX	0x7f	/* Max count */
 
 /*
  * We use this to track usage of a cluster. A cluster is a block of swap disk
@@ -247,6 +252,7 @@ struct swap_info_struct {
 	unsigned int inuse_pages;	/* number of those currently in use */
 	unsigned int cluster_next;	/* likely index for next allocation */
 	unsigned int cluster_nr;	/* countdown to next cluster search */
+	unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */
 	struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */
 	struct rb_root swap_extent_root;/* root of the swap extent rbtree */
 	struct block_device *bdev;	/* swap device or bdev of swap file */
@@ -337,6 +343,7 @@ extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
+extern void lru_add_drain_cpu_zone(struct zone *zone);
 extern void lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
@@ -408,7 +415,6 @@ extern unsigned long total_swapcache_pages(void);
 extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *page);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
-extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
 extern void __delete_from_swap_cache(struct page *, swp_entry_t entry);
 extern void delete_from_swap_cache(struct page *);
 extern void free_page_and_swap_cache(struct page *);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 1815065d52f3..7c354c2955f5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -428,6 +428,8 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length);
 #endif
 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode);
+asmlinkage long sys_faccessat2(int dfd, const char __user *filename, int mode,
+			       int flags);
 asmlinkage long sys_chdir(const char __user *filename);
 asmlinkage long sys_fchdir(unsigned int fd);
 asmlinkage long sys_chroot(const char __user *filename);
@@ -1333,11 +1335,11 @@ static inline int ksys_chmod(const char __user *filename, umode_t mode)
 	return do_fchmodat(AT_FDCWD, filename, mode);
 }
 
-extern long do_faccessat(int dfd, const char __user *filename, int mode);
+long do_faccessat(int dfd, const char __user *filename, int mode, int flags);
 
 static inline long ksys_access(const char __user *filename, int mode)
 {
-	return do_faccessat(AT_FDCWD, filename, mode);
+	return do_faccessat(AT_FDCWD, filename, mode, 0);
 }
 
 extern int do_fchownat(int dfd, const char __user *filename, uid_t user,
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 80bb865b3a33..86067dbe7745 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -7,7 +7,7 @@
  * Copyright (c) 2007 SUSE Linux Products GmbH
  * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
  *
- * Please see Documentation/filesystems/sysfs.txt for more information.
+ * Please see Documentation/filesystems/sysfs.rst for more information.
  */
 
 #ifndef _SYSFS_H_
diff --git a/include/linux/tboot.h b/include/linux/tboot.h
index 5424bc6feac8..c7e424766360 100644
--- a/include/linux/tboot.h
+++ b/include/linux/tboot.h
@@ -121,13 +121,7 @@ struct tboot {
 #define TBOOT_UUID	{0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\
 			 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8}
 
-extern struct tboot *tboot;
-
-static inline int tboot_enabled(void)
-{
-	return tboot != NULL;
-}
-
+bool tboot_enabled(void);
 extern void tboot_probe(void);
 extern void tboot_shutdown(u32 shutdown_type);
 extern struct acpi_table_header *tboot_get_dmar_table(
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 6241f59e2d6f..629b66e6c161 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -89,7 +89,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp);
 #ifdef CONFIG_PREEMPTION
 #define torture_preempt_schedule() preempt_schedule()
 #else
-#define torture_preempt_schedule()
+#define torture_preempt_schedule()	do { } while (0)
 #endif
 
 #endif /* __LINUX_TORTURE_H */
diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index c253461b1c4e..4f8c90c93c29 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -97,7 +97,7 @@ struct tcg_pcr_event {
 	u32 event_type;
 	u8 digest[20];
 	u32 event_size;
-	u8 event[0];
+	u8 event[];
 } __packed;
 
 struct tcg_event_field {
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 67f016010aad..9861c89f93be 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -378,6 +378,14 @@ extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
 static inline unsigned long user_access_save(void) { return 0UL; }
 static inline void user_access_restore(unsigned long flags) { }
 #endif
+#ifndef user_write_access_begin
+#define user_write_access_begin user_access_begin
+#define user_write_access_end user_access_end
+#endif
+#ifndef user_read_access_begin
+#define user_read_access_begin user_access_begin
+#define user_read_access_end user_access_end
+#endif
 
 #ifdef CONFIG_HARDENED_USERCOPY
 void usercopy_warn(const char *name, const char *detail, bool to_user,
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index a95d3cc74d79..48bb681e6c2a 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -88,8 +88,7 @@ struct vmap_area {
  *	Highlevel APIs for driver use
  */
 extern void vm_unmap_ram(const void *mem, unsigned int count);
-extern void *vm_map_ram(struct page **pages, unsigned int count,
-				int node, pgprot_t prot);
+extern void *vm_map_ram(struct page **pages, unsigned int count, int node);
 extern void vm_unmap_aliases(void);
 
 #ifdef CONFIG_MMU
@@ -107,26 +106,16 @@ extern void *vzalloc(unsigned long size);
 extern void *vmalloc_user(unsigned long size);
 extern void *vmalloc_node(unsigned long size, int node);
 extern void *vzalloc_node(unsigned long size, int node);
-extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags);
 extern void *vmalloc_exec(unsigned long size);
 extern void *vmalloc_32(unsigned long size);
 extern void *vmalloc_32_user(unsigned long size);
-extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
+extern void *__vmalloc(unsigned long size, gfp_t gfp_mask);
 extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
 			unsigned long start, unsigned long end, gfp_t gfp_mask,
 			pgprot_t prot, unsigned long vm_flags, int node,
 			const void *caller);
-#ifndef CONFIG_MMU
-extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
-static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
-						gfp_t flags, void *caller)
-{
-	return __vmalloc_node_flags(size, node, flags);
-}
-#else
-extern void *__vmalloc_node_flags_caller(unsigned long size,
-					 int node, gfp_t flags, void *caller);
-#endif
+void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
+		int node, const void *caller);
 
 extern void vfree(const void *addr);
 extern void vfree_atomic(const void *addr);
@@ -141,8 +130,22 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
 
 extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 							unsigned long pgoff);
-void vmalloc_sync_mappings(void);
-void vmalloc_sync_unmappings(void);
+
+/*
+ * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
+ * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
+ * needs to be called.
+ */
+#ifndef ARCH_PAGE_TABLE_SYNC_MASK
+#define ARCH_PAGE_TABLE_SYNC_MASK 0
+#endif
+
+/*
+ * There is no default implementation for arch_sync_kernel_mappings(). It is
+ * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
+ * is 0.
+ */
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
 
 /*
  *	Lowlevel-APIs (not for driver use!)
@@ -161,8 +164,6 @@ static inline size_t get_vm_area_size(const struct vm_struct *area)
 extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
 extern struct vm_struct *get_vm_area_caller(unsigned long size,
 					unsigned long flags, const void *caller);
-extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
-					unsigned long start, unsigned long end);
 extern struct vm_struct *__get_vm_area_caller(unsigned long size,
 					unsigned long flags,
 					unsigned long start, unsigned long end,
@@ -170,11 +171,11 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
 extern struct vm_struct *remove_vm_area(const void *addr);
 extern struct vm_struct *find_vm_area(const void *addr);
 
-extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
-			struct page **pages);
 #ifdef CONFIG_MMU
 extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
 				    pgprot_t prot, struct page **pages);
+int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
+		struct page **pages);
 extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size);
 extern void unmap_kernel_range(unsigned long addr, unsigned long size);
 static inline void set_vm_flush_reset_perms(void *addr)
@@ -191,14 +192,12 @@ map_kernel_range_noflush(unsigned long start, unsigned long size,
 {
 	return size >> PAGE_SHIFT;
 }
+#define map_kernel_range map_kernel_range_noflush
 static inline void
 unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
 {
 }
-static inline void
-unmap_kernel_range(unsigned long addr, unsigned long size)
-{
-}
+#define unmap_kernel_range unmap_kernel_range_noflush
 static inline void set_vm_flush_reset_perms(void *addr)
 {
 }
diff --git a/include/linux/wait.h b/include/linux/wait.h
index feeb6be5cad6..898c890fc153 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -1149,4 +1149,6 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
 		(wait)->flags = 0;						\
 	} while (0)
 
+bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg);
+
 #endif /* _LINUX_WAIT_H */
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 417d9f37077a..1464ce6ffa31 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -37,15 +37,15 @@ struct watchdog_governor;
  *
  * The watchdog_ops structure contains a list of low-level operations
  * that control a watchdog device. It also contains the module that owns
- * these operations. The start and stop function are mandatory, all other
+ * these operations. The start function is mandatory, all other
  * functions are optional.
  */
 struct watchdog_ops {
 	struct module *owner;
 	/* mandatory operations */
 	int (*start)(struct watchdog_device *);
-	int (*stop)(struct watchdog_device *);
 	/* optional operations */
+	int (*stop)(struct watchdog_device *);
 	int (*ping)(struct watchdog_device *);
 	unsigned int (*status)(struct watchdog_device *);
 	int (*set_timeout)(struct watchdog_device *, unsigned int);
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 2219cce81ca4..0fdbf653b173 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -20,7 +20,7 @@
  * zsmalloc mapping modes
  *
  * NOTE: These only make a difference when a mapped object spans pages.
- * They also have no effect when PGTABLE_MAPPING is selected.
+ * They also have no effect when ZSMALLOC_PGTABLE_MAPPING is selected.
  */
 enum zs_mapmode {
 	ZS_MM_RW, /* normal read-write mapping */
diff --git a/include/media/cec-notifier.h b/include/media/cec-notifier.h
index 38956969fd12..b1c839734124 100644
--- a/include/media/cec-notifier.h
+++ b/include/media/cec-notifier.h
@@ -2,7 +2,7 @@
 /*
  * cec-notifier.h - notify CEC drivers of physical address changes
  *
- * Copyright 2016 Russell King <rmk+kernel@arm.linux.org.uk>
+ * Copyright 2016 Russell King.
  * Copyright 2016-2017 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  */
 
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 97bf4885a962..46754ba9d7b7 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -26,13 +26,9 @@ static inline
 __wsum csum_and_copy_from_user (const void __user *src, void *dst,
 				      int len, __wsum sum, int *err_ptr)
 {
-	if (access_ok(src, len))
-		return csum_partial_copy_from_user(src, dst, len, sum, err_ptr);
-
-	if (len)
+	if (copy_from_user(dst, src, len))
 		*err_ptr = -EFAULT;
-
-	return sum;
+	return csum_partial(dst, len, sum);
 }
 #endif
 
@@ -42,10 +38,8 @@ static __inline__ __wsum csum_and_copy_to_user
 {
 	sum = csum_partial(src, len, sum);
 
-	if (access_ok(dst, len)) {
-		if (copy_to_user(dst, src, len) == 0)
-			return sum;
-	}
+	if (copy_to_user(dst, src, len) == 0)
+		return sum;
 	if (len)
 		*err_ptr = -EFAULT;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index bca761ffa25f..4de9485f73d9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -23,7 +23,6 @@
 #include <linux/cache.h>
 #include <linux/percpu.h>
 #include <linux/skbuff.h>
-#include <linux/cryptohash.h>
 #include <linux/kref.h>
 #include <linux/ktime.h>
 
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 80ac89e47b47..f93c0b800790 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -142,6 +142,7 @@ struct scsi_cmnd {
 	unsigned long state;	/* Command completion state */
 
 	unsigned char tag;	/* SCSI-II queued command tag */
+	unsigned int extra_len;	/* length of alignment and padding */
 };
 
 /*
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index c3cba2aaf934..bc5909033d13 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -229,6 +229,9 @@ struct scsi_device {
 	struct scsi_device_handler *handler;
 	void			*handler_data;
 
+	size_t			dma_drain_len;
+	void			*dma_drain_buf;
+
 	unsigned char		access_state;
 	struct mutex		state_mutex;
 	enum scsi_device_state sdev_state;
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 822e8cda8d9b..46ef8cccc982 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -271,6 +271,13 @@ struct scsi_host_template {
 	int (* map_queues)(struct Scsi_Host *shost);
 
 	/*
+	 * Check if scatterlists need to be padded for DMA draining.
+	 *
+	 * Status: OPTIONAL
+	 */
+	bool (* dma_need_drain)(struct request *rq);
+
+	/*
 	 * This function determines the BIOS parameters for a given
 	 * harddisk.  These tend to be numbers that are made up by
 	 * the host adapter.  Parameters:
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index bcbc763b8814..360b0f9d2220 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -89,6 +89,7 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
 		{ IO_TREE_TRANS_DIRTY_PAGES,	  "TRANS_DIRTY_PAGES" },       \
 		{ IO_TREE_ROOT_DIRTY_LOG_PAGES,	  "ROOT_DIRTY_LOG_PAGES" },    \
 		{ IO_TREE_INODE_FILE_EXTENT,	  "INODE_FILE_EXTENT" },       \
+		{ IO_TREE_LOG_CSUM_RANGE,	  "LOG_CSUM_RANGE" },          \
 		{ IO_TREE_SELFTEST,		  "SELFTEST" })
 
 #define BTRFS_GROUP_FLAGS	\
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index 27f5caa6299a..bf9806fd1306 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -113,10 +113,10 @@ TRACE_EVENT(erofs_readpage,
 
 TRACE_EVENT(erofs_readpages,
 
-	TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage,
+	TP_PROTO(struct inode *inode, pgoff_t start, unsigned int nrpage,
 		bool raw),
 
-	TP_ARGS(inode, page, nrpage, raw),
+	TP_ARGS(inode, start, nrpage, raw),
 
 	TP_STRUCT__entry(
 		__field(dev_t,		dev	)
@@ -129,7 +129,7 @@ TRACE_EVENT(erofs_readpages,
 	TP_fast_assign(
 		__entry->dev	= inode->i_sb->s_dev;
 		__entry->nid	= EROFS_I(inode)->nid;
-		__entry->start	= page->index;
+		__entry->start	= start;
 		__entry->nrpage	= nrpage;
 		__entry->raw	= raw;
 	),
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index d97adfc327f0..24c2557c37f0 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1376,9 +1376,9 @@ TRACE_EVENT(f2fs_writepages,
 
 TRACE_EVENT(f2fs_readpages,
 
-	TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage),
+	TP_PROTO(struct inode *inode, pgoff_t start, unsigned int nrpage),
 
-	TP_ARGS(inode, page, nrpage),
+	TP_ARGS(inode, start, nrpage),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
@@ -1390,7 +1390,7 @@ TRACE_EVENT(f2fs_readpages,
 	TP_fast_assign(
 		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
-		__entry->start	= page->index;
+		__entry->start	= start;
 		__entry->nrpage	= nrpage;
 	),
 
diff --git a/include/trace/events/regulator.h b/include/trace/events/regulator.h
index b70583c32c08..72b3ba93b0a5 100644
--- a/include/trace/events/regulator.h
+++ b/include/trace/events/regulator.h
@@ -70,6 +70,38 @@ DEFINE_EVENT(regulator_basic, regulator_disable_complete,
 
 );
 
+DEFINE_EVENT(regulator_basic, regulator_bypass_enable,
+
+	TP_PROTO(const char *name),
+
+	TP_ARGS(name)
+
+);
+
+DEFINE_EVENT(regulator_basic, regulator_bypass_enable_complete,
+
+	TP_PROTO(const char *name),
+
+	TP_ARGS(name)
+
+);
+
+DEFINE_EVENT(regulator_basic, regulator_bypass_disable,
+
+	TP_PROTO(const char *name),
+
+	TP_ARGS(name)
+
+);
+
+DEFINE_EVENT(regulator_basic, regulator_bypass_disable_complete,
+
+	TP_PROTO(const char *name),
+
+	TP_ARGS(name)
+
+);
+
 /*
  * Events that take a range of numerical values, mostly for voltages
  * and so on.
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 85a33bea76f1..10f5d1fa7347 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -541,7 +541,6 @@ TRACE_EVENT(global_dirty_state,
 	TP_STRUCT__entry(
 		__field(unsigned long,	nr_dirty)
 		__field(unsigned long,	nr_writeback)
-		__field(unsigned long,	nr_unstable)
 		__field(unsigned long,	background_thresh)
 		__field(unsigned long,	dirty_thresh)
 		__field(unsigned long,	dirty_limit)
@@ -552,7 +551,6 @@ TRACE_EVENT(global_dirty_state,
 	TP_fast_assign(
 		__entry->nr_dirty	= global_node_page_state(NR_FILE_DIRTY);
 		__entry->nr_writeback	= global_node_page_state(NR_WRITEBACK);
-		__entry->nr_unstable	= global_node_page_state(NR_UNSTABLE_NFS);
 		__entry->nr_dirtied	= global_node_page_state(NR_DIRTIED);
 		__entry->nr_written	= global_node_page_state(NR_WRITTEN);
 		__entry->background_thresh = background_thresh;
@@ -560,12 +558,11 @@ TRACE_EVENT(global_dirty_state,
 		__entry->dirty_limit	= global_wb_domain.dirty_limit;
 	),
 
-	TP_printk("dirty=%lu writeback=%lu unstable=%lu "
+	TP_printk("dirty=%lu writeback=%lu "
 		  "bg_thresh=%lu thresh=%lu limit=%lu "
 		  "dirtied=%lu written=%lu",
 		  __entry->nr_dirty,
 		  __entry->nr_writeback,
-		  __entry->nr_unstable,
 		  __entry->background_thresh,
 		  __entry->dirty_thresh,
 		  __entry->dirty_limit,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 3a3201e4618e..f4a01305d9a6 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -855,9 +855,11 @@ __SYSCALL(__NR_clone3, sys_clone3)
 __SYSCALL(__NR_openat2, sys_openat2)
 #define __NR_pidfd_getfd 438
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
+#define __NR_faccessat2 439
+__SYSCALL(__NR_faccessat2, sys_faccessat2)
 
 #undef __NR_syscalls
-#define __NR_syscalls 439
+#define __NR_syscalls 440
 
 /*
  * 32 bit systems traditionally used different
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index d28b4ce744d5..4e873dcbe68f 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -133,6 +133,11 @@ extern "C" {
  * releasing the memory
  */
 #define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE	(1 << 9)
+/* Flag that BO will be encrypted and that the TMZ bit should be
+ * set in the PTEs when mapping this buffer via GPUVM or
+ * accessing it with various hw blocks
+ */
+#define AMDGPU_GEM_CREATE_ENCRYPTED		(1 << 10)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
@@ -559,7 +564,7 @@ struct drm_amdgpu_cs_in {
 	/**  Handle of resource list associated with CS */
 	__u32		bo_list_handle;
 	__u32		num_chunks;
-	__u32		_pad;
+	__u32		flags;
 	/** this points to __u64 * which point to cs chunks */
 	__u64		chunks;
 };
@@ -593,6 +598,14 @@ union drm_amdgpu_cs {
  */
 #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
 
+/* Flag the IB as secure (TMZ)
+ */
+#define AMDGPU_IB_FLAGS_SECURE  (1 << 5)
+
+/* Tell KMD to flush and invalidate caches
+ */
+#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC  (1 << 6)
+
 struct drm_amdgpu_cs_chunk_ib {
 	__u32 _pad;
 	/** AMDGPU_IB_FLAG_* */
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 8bc0b31597d8..490143500a50 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -354,9 +354,12 @@ extern "C" {
  * a platform-dependent stride. On top of that the memory can apply
  * platform-depending swizzling of some higher address bits into bit6.
  *
- * This format is highly platforms specific and not useful for cross-driver
- * sharing. It exists since on a given platform it does uniquely identify the
- * layout in a simple way for i915-specific userspace.
+ * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets.
+ * On earlier platforms the is highly platforms specific and not useful for
+ * cross-driver sharing. It exists since on a given platform it does uniquely
+ * identify the layout in a simple way for i915-specific userspace, which
+ * facilitated conversion of userspace to modifiers. Additionally the exact
+ * format on some really old platforms is not known.
  */
 #define I915_FORMAT_MOD_X_TILED	fourcc_mod_code(INTEL, 1)
 
@@ -369,9 +372,12 @@ extern "C" {
  * memory can apply platform-depending swizzling of some higher address bits
  * into bit6.
  *
- * This format is highly platforms specific and not useful for cross-driver
- * sharing. It exists since on a given platform it does uniquely identify the
- * layout in a simple way for i915-specific userspace.
+ * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets.
+ * On earlier platforms the is highly platforms specific and not useful for
+ * cross-driver sharing. It exists since on a given platform it does uniquely
+ * identify the layout in a simple way for i915-specific userspace, which
+ * facilitated conversion of userspace to modifiers. Additionally the exact
+ * format on some really old platforms is not known.
  */
 #define I915_FORMAT_MOD_Y_TILED	fourcc_mod_code(INTEL, 2)
 
@@ -521,7 +527,113 @@ extern "C" {
 #define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1)
 
 /*
- * 16Bx2 Block Linear layout, used by desktop GPUs, and Tegra K1 and later
+ * Generalized Block Linear layout, used by desktop GPUs starting with NV50/G80,
+ * and Tegra GPUs starting with Tegra K1.
+ *
+ * Pixels are arranged in Groups of Bytes (GOBs).  GOB size and layout varies
+ * based on the architecture generation.  GOBs themselves are then arranged in
+ * 3D blocks, with the block dimensions (in terms of GOBs) always being a power
+ * of two, and hence expressible as their log2 equivalent (E.g., "2" represents
+ * a block depth or height of "4").
+ *
+ * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format
+ * in full detail.
+ *
+ *       Macro
+ * Bits  Param Description
+ * ----  ----- -----------------------------------------------------------------
+ *
+ *  3:0  h     log2(height) of each block, in GOBs.  Placed here for
+ *             compatibility with the existing
+ *             DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers.
+ *
+ *  4:4  -     Must be 1, to indicate block-linear layout.  Necessary for
+ *             compatibility with the existing
+ *             DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers.
+ *
+ *  8:5  -     Reserved (To support 3D-surfaces with variable log2(depth) block
+ *             size).  Must be zero.
+ *
+ *             Note there is no log2(width) parameter.  Some portions of the
+ *             hardware support a block width of two gobs, but it is impractical
+ *             to use due to lack of support elsewhere, and has no known
+ *             benefits.
+ *
+ * 11:9  -     Reserved (To support 2D-array textures with variable array stride
+ *             in blocks, specified via log2(tile width in blocks)).  Must be
+ *             zero.
+ *
+ * 19:12 k     Page Kind.  This value directly maps to a field in the page
+ *             tables of all GPUs >= NV50.  It affects the exact layout of bits
+ *             in memory and can be derived from the tuple
+ *
+ *               (format, GPU model, compression type, samples per pixel)
+ *
+ *             Where compression type is defined below.  If GPU model were
+ *             implied by the format modifier, format, or memory buffer, page
+ *             kind would not need to be included in the modifier itself, but
+ *             since the modifier should define the layout of the associated
+ *             memory buffer independent from any device or other context, it
+ *             must be included here.
+ *
+ * 21:20 g     GOB Height and Page Kind Generation.  The height of a GOB changed
+ *             starting with Fermi GPUs.  Additionally, the mapping between page
+ *             kind and bit layout has changed at various points.
+ *
+ *               0 = Gob Height 8, Fermi - Volta, Tegra K1+ Page Kind mapping
+ *               1 = Gob Height 4, G80 - GT2XX Page Kind mapping
+ *               2 = Gob Height 8, Turing+ Page Kind mapping
+ *               3 = Reserved for future use.
+ *
+ * 22:22 s     Sector layout.  On Tegra GPUs prior to Xavier, there is a further
+ *             bit remapping step that occurs at an even lower level than the
+ *             page kind and block linear swizzles.  This causes the layout of
+ *             surfaces mapped in those SOC's GPUs to be incompatible with the
+ *             equivalent mapping on other GPUs in the same system.
+ *
+ *               0 = Tegra K1 - Tegra Parker/TX2 Layout.
+ *               1 = Desktop GPU and Tegra Xavier+ Layout
+ *
+ * 25:23 c     Lossless Framebuffer Compression type.
+ *
+ *               0 = none
+ *               1 = ROP/3D, layout 1, exact compression format implied by Page
+ *                   Kind field
+ *               2 = ROP/3D, layout 2, exact compression format implied by Page
+ *                   Kind field
+ *               3 = CDE horizontal
+ *               4 = CDE vertical
+ *               5 = Reserved for future use
+ *               6 = Reserved for future use
+ *               7 = Reserved for future use
+ *
+ * 55:25 -     Reserved for future use.  Must be zero.
+ */
+#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \
+	fourcc_mod_code(NVIDIA, (0x10 | \
+				 ((h) & 0xf) | \
+				 (((k) & 0xff) << 12) | \
+				 (((g) & 0x3) << 20) | \
+				 (((s) & 0x1) << 22) | \
+				 (((c) & 0x7) << 23)))
+
+/* To grandfather in prior block linear format modifiers to the above layout,
+ * the page kind "0", which corresponds to "pitch/linear" and hence is unusable
+ * with block-linear layouts, is remapped within drivers to the value 0xfe,
+ * which corresponds to the "generic" kind used for simple single-sample
+ * uncompressed color formats on Fermi - Volta GPUs.
+ */
+static inline __u64
+drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
+{
+	if (!(modifier & 0x10) || (modifier & (0xff << 12)))
+		return modifier;
+	else
+		return modifier | (0xfe << 12);
+}
+
+/*
+ * 16Bx2 Block Linear layout, used by Tegra K1 and later
  *
  * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked
  * vertically by a power of 2 (1 to 32 GOBs) to form a block.
@@ -542,20 +654,20 @@ extern "C" {
  * in full detail.
  */
 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \
-	fourcc_mod_code(NVIDIA, 0x10 | ((v) & 0xf))
+	DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 0, 0, 0, (v))
 
 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \
-	fourcc_mod_code(NVIDIA, 0x10)
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0)
 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \
-	fourcc_mod_code(NVIDIA, 0x11)
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1)
 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \
-	fourcc_mod_code(NVIDIA, 0x12)
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2)
 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \
-	fourcc_mod_code(NVIDIA, 0x13)
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3)
 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \
-	fourcc_mod_code(NVIDIA, 0x14)
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4)
 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \
-	fourcc_mod_code(NVIDIA, 0x15)
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5)
 
 /*
  * Some Broadcom modifiers take parameters, for example the number of
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2813e579b480..14b67cd6b54b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1969,6 +1969,30 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_HOLD_PREEMPTION,
 
+	/**
+	 * Specifying this pins all contexts to the specified SSEU power
+	 * configuration for the duration of the recording.
+	 *
+	 * This parameter's value is a pointer to a struct
+	 * drm_i915_gem_context_param_sseu.
+	 *
+	 * This property is available in perf revision 4.
+	 */
+	DRM_I915_PERF_PROP_GLOBAL_SSEU,
+
+	/**
+	 * This optional parameter specifies the timer interval in nanoseconds
+	 * at which the i915 driver will check the OA buffer for available data.
+	 * Minimum allowed value is 100 microseconds. A default value is used by
+	 * the driver if this parameter is not specified. Note that larger timer
+	 * values will reduce cpu consumption during OA perf captures. However,
+	 * excessively large values would potentially result in OA buffer
+	 * overwrites as captures reach end of the OA buffer.
+	 *
+	 * This property is available in perf revision 5.
+	 */
+	DRM_I915_PERF_PROP_POLL_OA_PERIOD,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index a534d71e689a..9b6a973f4cc3 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -117,6 +117,7 @@
 #define AUDIT_TIME_INJOFFSET	1332	/* Timekeeping offset injected */
 #define AUDIT_TIME_ADJNTPVAL	1333	/* NTP value adjustment */
 #define AUDIT_BPF		1334	/* BPF subsystem */
+#define AUDIT_EVENT_LISTENER	1335	/* Task joined multicast read socket */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 8e322e2c7e78..a3f3975df0de 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -519,15 +519,6 @@ struct btrfs_extent_inline_ref {
 	__le64 offset;
 } __attribute__ ((__packed__));
 
-/* old style backrefs item */
-struct btrfs_extent_ref_v0 {
-	__le64 root;
-	__le64 generation;
-	__le64 objectid;
-	__le32 count;
-} __attribute__ ((__packed__));
-
-
 /* dev extents record free space on individual devices.  The owner
  * field points back to the chunk allocation mapping tree that allocated
  * the extent.  The chunk tree uuid field is a way to double check the owner
diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index c7372180a0a9..48ff0757ae5e 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -333,6 +333,8 @@ struct vfs_ns_cap_data {
 
 #define CAP_AUDIT_CONTROL    30
 
+/* Set or remove capabilities on files */
+
 #define CAP_SETFCAP	     31
 
 /* Override MAC access.
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 34c02e4290fe..c6dd0215482e 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -36,6 +36,7 @@ typedef __s64	Elf64_Sxword;
 #define PT_LOPROC  0x70000000
 #define PT_HIPROC  0x7fffffff
 #define PT_GNU_EH_FRAME		0x6474e550
+#define PT_GNU_PROPERTY		0x6474e553
 
 #define PT_GNU_STACK	(PT_LOOS + 0x474e551)
 
@@ -367,6 +368,7 @@ typedef struct elf64_shdr {
  * Notes used in ET_CORE. Architectures export some of the arch register sets
  * using the corresponding note types via the PTRACE_GETREGSET and
  * PTRACE_SETREGSET requests.
+ * The note name for all these is "LINUX".
  */
 #define NT_PRSTATUS	1
 #define NT_PRFPREG	2
@@ -429,6 +431,9 @@ typedef struct elf64_shdr {
 #define NT_MIPS_FP_MODE	0x801		/* MIPS floating-point mode */
 #define NT_MIPS_MSA	0x802		/* MIPS SIMD registers */
 
+/* Note types with note name "GNU" */
+#define NT_GNU_PROPERTY_TYPE_0	5
+
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {
   Elf32_Word	n_namesz;	/* Name size */
@@ -443,4 +448,10 @@ typedef struct elf64_note {
   Elf64_Word n_type;	/* Content type */
 } Elf64_Nhdr;
 
+/* .note.gnu.property types for EM_AARCH64: */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI	(1U << 0)
+
 #endif /* _UAPI_LINUX_ELF_H */
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index fc9051f2eeac..4dda5e4244a7 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -2,7 +2,7 @@
 /*
  * include/uapi/linux/ethtool_netlink.h - netlink interface for ethtool
  *
- * See Documentation/networking/ethtool-netlink.txt in kernel source tree for
+ * See Documentation/networking/ethtool-netlink.rst in kernel source tree for
  * doucumentation of the interface.
  */
 
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index ca88b7bce553..2f86b2ad6d7e 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -84,10 +84,20 @@
 #define DN_ATTRIB	0x00000020	/* File changed attibutes */
 #define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
 
+/*
+ * The constants AT_REMOVEDIR and AT_EACCESS have the same value.  AT_EACCESS is
+ * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
+ * unlinkat.  The two functions do completely different things and therefore,
+ * the flags can be allowed to overlap.  For example, passing AT_REMOVEDIR to
+ * faccessat would be undefined behavior and thus treating it equivalent to
+ * AT_EACCESS is valid undefined behavior.
+ */
 #define AT_FDCWD		-100    /* Special value used to indicate
                                            openat should use the current
                                            working directory. */
 #define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
+#define AT_EACCESS		0x200	/* Test access permitted for
+                                           effective IDs, not real IDs.  */
 #define AT_REMOVEDIR		0x200   /* Remove directory instead of
                                            unlinking file.  */
 #define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
diff --git a/include/uapi/linux/fd.h b/include/uapi/linux/fd.h
index 90fb94712c41..8b80c63b971c 100644
--- a/include/uapi/linux/fd.h
+++ b/include/uapi/linux/fd.h
@@ -172,7 +172,10 @@ struct floppy_drive_params {
  * used in succession to try to read the disk. If the FDC cannot lock onto
  * the disk, the next format is tried. This uses the variable 'probing'.
  */
-	short autodetect[8];		/* autodetected formats */
+
+#define FD_AUTODETECT_SIZE 8
+
+	short autodetect[FD_AUTODETECT_SIZE]; /* autodetected formats */
 	
 	int checkfreq; /* how often should the drive be checked for disk 
 			* changes */
@@ -357,10 +360,25 @@ struct floppy_raw_cmd {
 	int buffer_length; /* length of allocated buffer */
 
 	unsigned char rate;
+
+#define FD_RAW_CMD_SIZE 16
+#define FD_RAW_REPLY_SIZE 16
+#define FD_RAW_CMD_FULLSIZE (FD_RAW_CMD_SIZE + 1 + FD_RAW_REPLY_SIZE)
+
+	/* The command may take up the space initially intended for the reply
+	 * and the reply count. Needed for long 82078 commands such as RESTORE,
+	 * which takes 17 command bytes.
+	 */
+
 	unsigned char cmd_count;
-	unsigned char cmd[16];
-	unsigned char reply_count;
-	unsigned char reply[16];
+	union {
+		struct {
+			unsigned char cmd[FD_RAW_CMD_SIZE];
+			unsigned char reply_count;
+			unsigned char reply[FD_RAW_REPLY_SIZE];
+		};
+		unsigned char fullcmd[FD_RAW_CMD_FULLSIZE];
+	};
 	int track;
 	int resultcode;
 
diff --git a/include/uapi/linux/fdreg.h b/include/uapi/linux/fdreg.h
index 1318881954e1..10d33632939d 100644
--- a/include/uapi/linux/fdreg.h
+++ b/include/uapi/linux/fdreg.h
@@ -7,13 +7,23 @@
  * Handbook", Sanches and Canton.
  */
 
-/* Fd controller regs. S&C, about page 340 */
-#define FD_STATUS	4
-#define FD_DATA		5
+/* 82077's auxiliary status registers A & B (R) */
+#define FD_SRA		0
+#define FD_SRB		1
 
 /* Digital Output Register */
 #define FD_DOR		2
 
+/* 82077's tape drive register (R/W) */
+#define FD_TDR		3
+
+/* 82077's data rate select register (W) */
+#define FD_DSR		4
+
+/* Fd controller regs. S&C, about page 340 */
+#define FD_STATUS	4
+#define FD_DATA		5
+
 /* Digital Input Register (read) */
 #define FD_DIR		7
 
diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h
index 1acd2b179aef..7e5b5c10a49c 100644
--- a/include/uapi/linux/firewire-cdev.h
+++ b/include/uapi/linux/firewire-cdev.h
@@ -308,7 +308,7 @@ struct fw_cdev_event_iso_interrupt_mc {
 /**
  * struct fw_cdev_event_iso_resource - Iso resources were allocated or freed
  * @closure:	See &fw_cdev_event_common;
- *		set by %FW_CDEV_IOC_(DE)ALLOCATE_ISO_RESOURCE(_ONCE) ioctl
+ *		set by``FW_CDEV_IOC_(DE)ALLOCATE_ISO_RESOURCE(_ONCE)`` ioctl
  * @type:	%FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or
  *		%FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED
  * @handle:	Reference by which an allocated resource can be deallocated
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index a10e3cdc2839..7875709ccfeb 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -19,7 +19,8 @@
 #define FSCRYPT_POLICY_FLAGS_PAD_MASK		0x03
 #define FSCRYPT_POLICY_FLAG_DIRECT_KEY		0x04
 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64	0x08
-#define FSCRYPT_POLICY_FLAGS_VALID		0x0F
+#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32	0x10
+#define FSCRYPT_POLICY_FLAGS_VALID		0x1F
 
 /* Encryption algorithms */
 #define FSCRYPT_MODE_AES_256_XTS		1
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index e48d746b8e2a..92c22699a5a7 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -129,6 +129,7 @@ enum {
 	IORING_OP_SPLICE,
 	IORING_OP_PROVIDE_BUFFERS,
 	IORING_OP_REMOVE_BUFFERS,
+	IORING_OP_TEE,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
@@ -204,10 +205,19 @@ struct io_cqring_offsets {
 	__u32 ring_entries;
 	__u32 overflow;
 	__u32 cqes;
-	__u64 resv[2];
+	__u32 flags;
+	__u32 resv1;
+	__u64 resv2;
 };
 
 /*
+ * cq_ring->flags
+ */
+
+/* disable eventfd notifications */
+#define IORING_CQ_EVENTFD_DISABLED	(1U << 0)
+
+/*
  * io_uring_enter(2) flags
  */
 #define IORING_ENTER_GETEVENTS	(1U << 0)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 20917c59f39c..b6be62356d34 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -251,7 +251,7 @@ struct kfd_memory_exception_failure {
 	__u32 imprecise;	/* Can't determine the	exact fault address */
 };
 
-/* memory exception data*/
+/* memory exception data */
 struct kfd_hsa_memory_exception_data {
 	struct kfd_memory_exception_failure failure;
 	__u64 va;
@@ -410,6 +410,20 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
 	__u32 n_success;		/* to/from KFD */
 };
 
+/* Allocate GWS for specific queue
+ *
+ * @queue_id:    queue's id that GWS is allocated for
+ * @num_gws:     how many GWS to allocate
+ * @first_gws:   index of the first GWS allocated.
+ *               only support contiguous GWS allocation
+ */
+struct kfd_ioctl_alloc_queue_gws_args {
+	__u32 queue_id;		/* to KFD */
+	__u32 num_gws;		/* to KFD */
+	__u32 first_gws;	/* from KFD */
+	__u32 pad;
+};
+
 struct kfd_ioctl_get_dmabuf_info_args {
 	__u64 size;		/* from KFD */
 	__u64 metadata_ptr;	/* to KFD */
@@ -529,7 +543,10 @@ enum kfd_mmio_remap {
 #define AMDKFD_IOC_IMPORT_DMABUF		\
 		AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
 
+#define AMDKFD_IOC_ALLOC_QUEUE_GWS		\
+		AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x1E
+#define AMDKFD_COMMAND_END		0x1F
 
 #endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 428c7dde6b4b..4fdf30316582 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -116,7 +116,7 @@ struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
-	 * For ARM: See Documentation/virt/kvm/api.txt
+	 * For ARM: See Documentation/virt/kvm/api.rst
 	 */
 	union {
 		__u32 irq;
@@ -188,10 +188,13 @@ struct kvm_s390_cmma_log {
 struct kvm_hyperv_exit {
 #define KVM_EXIT_HYPERV_SYNIC          1
 #define KVM_EXIT_HYPERV_HCALL          2
+#define KVM_EXIT_HYPERV_SYNDBG         3
 	__u32 type;
+	__u32 pad1;
 	union {
 		struct {
 			__u32 msr;
+			__u32 pad2;
 			__u64 control;
 			__u64 evt_page;
 			__u64 msg_page;
@@ -201,6 +204,15 @@ struct kvm_hyperv_exit {
 			__u64 result;
 			__u64 params[2];
 		} hcall;
+		struct {
+			__u32 msr;
+			__u32 pad2;
+			__u64 control;
+			__u64 status;
+			__u64 send_page;
+			__u64 recv_page;
+			__u64 pending_page;
+		} syndbg;
 	} u;
 };
 
@@ -1017,6 +1029,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_VCPU_RESETS 179
 #define KVM_CAP_S390_PROTECTED 180
 #define KVM_CAP_PPC_SECURE_GUEST 181
+#define KVM_CAP_HALT_POLL 182
+#define KVM_CAP_ASYNC_PF_INT 183
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1107,7 +1121,7 @@ struct kvm_xen_hvm_config {
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emulation.  See Documentation/virt/kvm/api.txt.
+ * emulation.  See Documentation/virt/kvm/api.rst.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index 080a8df134ef..24a1c45bd1ae 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -25,6 +25,16 @@ enum {
 	LO_FLAGS_DIRECT_IO	= 16,
 };
 
+/* LO_FLAGS that can be set using LOOP_SET_STATUS(64) */
+#define LOOP_SET_STATUS_SETTABLE_FLAGS (LO_FLAGS_AUTOCLEAR | LO_FLAGS_PARTSCAN)
+
+/* LO_FLAGS that can be cleared using LOOP_SET_STATUS(64) */
+#define LOOP_SET_STATUS_CLEARABLE_FLAGS (LO_FLAGS_AUTOCLEAR)
+
+/* LO_FLAGS that can be set using LOOP_CONFIGURE */
+#define LOOP_CONFIGURE_SETTABLE_FLAGS (LO_FLAGS_READ_ONLY | LO_FLAGS_AUTOCLEAR \
+				       | LO_FLAGS_PARTSCAN | LO_FLAGS_DIRECT_IO)
+
 #include <asm/posix_types.h>	/* for __kernel_old_dev_t */
 #include <linux/types.h>	/* for __u64 */
 
@@ -37,7 +47,7 @@ struct loop_info {
 	int		   lo_offset;
 	int		   lo_encrypt_type;
 	int		   lo_encrypt_key_size; 	/* ioctl w/o */
-	int		   lo_flags;			/* ioctl r/o */
+	int		   lo_flags;
 	char		   lo_name[LO_NAME_SIZE];
 	unsigned char	   lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
 	unsigned long	   lo_init[2];
@@ -53,13 +63,29 @@ struct loop_info64 {
 	__u32		   lo_number;			/* ioctl r/o */
 	__u32		   lo_encrypt_type;
 	__u32		   lo_encrypt_key_size;		/* ioctl w/o */
-	__u32		   lo_flags;			/* ioctl r/o */
+	__u32		   lo_flags;
 	__u8		   lo_file_name[LO_NAME_SIZE];
 	__u8		   lo_crypt_name[LO_NAME_SIZE];
 	__u8		   lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
 	__u64		   lo_init[2];
 };
 
+/**
+ * struct loop_config - Complete configuration for a loop device.
+ * @fd: fd of the file to be used as a backing file for the loop device.
+ * @block_size: block size to use; ignored if 0.
+ * @info: struct loop_info64 to configure the loop device with.
+ *
+ * This structure is used with the LOOP_CONFIGURE ioctl, and can be used to
+ * atomically setup and configure all loop device parameters at once.
+ */
+struct loop_config {
+	__u32			fd;
+	__u32                   block_size;
+	struct loop_info64	info;
+	__u64			__reserved[8];
+};
+
 /*
  * Loop filter types
  */
@@ -90,6 +116,7 @@ struct loop_info64 {
 #define LOOP_SET_CAPACITY	0x4C07
 #define LOOP_SET_DIRECT_IO	0x4C08
 #define LOOP_SET_BLOCK_SIZE	0x4C09
+#define LOOP_CONFIGURE		0x4C0A
 
 /* /dev/loop-control interface */
 #define LOOP_CTL_ADD		0x4C80
diff --git a/include/uapi/linux/mmc/ioctl.h b/include/uapi/linux/mmc/ioctl.h
index 00c08120f3ba..27a39847d55c 100644
--- a/include/uapi/linux/mmc/ioctl.h
+++ b/include/uapi/linux/mmc/ioctl.h
@@ -3,6 +3,7 @@
 #define LINUX_MMC_IOCTL_H
 
 #include <linux/types.h>
+#include <linux/major.h>
 
 struct mmc_ioc_cmd {
 	/*
diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h
index 0549a5c622bf..91b4c63d5cbf 100644
--- a/include/uapi/linux/psp-sev.h
+++ b/include/uapi/linux/psp-sev.h
@@ -83,6 +83,8 @@ struct sev_user_data_status {
 	__u32 guest_count;			/* Out */
 } __packed;
 
+#define SEV_STATUS_FLAGS_CONFIG_ES	0x0100
+
 /**
  * struct sev_user_data_pek_csr - PEK_CSR command parameters
  *
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index ad80a5c885d5..82cc58fe9368 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -123,7 +123,10 @@ struct statx {
 	__u32	stx_dev_major;	/* ID of device containing file [uncond] */
 	__u32	stx_dev_minor;
 	/* 0x90 */
-	__u64	__spare2[14];	/* Spare space for future expansion */
+	__u64	stx_mnt_id;
+	__u64	__spare2;
+	/* 0xa0 */
+	__u64	__spare3[12];	/* Spare space for future expansion */
 	/* 0x100 */
 };
 
@@ -148,9 +151,19 @@ struct statx {
 #define STATX_BLOCKS		0x00000400U	/* Want/got stx_blocks */
 #define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
 #define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
-#define STATX_ALL		0x00000fffU	/* All currently supported flags */
+#define STATX_MNT_ID		0x00001000U	/* Got stx_mnt_id */
+
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
+#ifndef __KERNEL__
+/*
+ * This is deprecated, and shall remain the same value in the future.  To avoid
+ * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME)
+ * instead.
+ */
+#define STATX_ALL		0x00000fffU
+#endif
+
 /*
  * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
  *
@@ -168,7 +181,9 @@ struct statx {
 #define STATX_ATTR_NODUMP		0x00000040 /* [I] File is not to be dumped */
 #define STATX_ATTR_ENCRYPTED		0x00000800 /* [I] File requires key to decrypt in fs */
 #define STATX_ATTR_AUTOMOUNT		0x00001000 /* Dir: Automount trigger */
+#define STATX_ATTR_MOUNT_ROOT		0x00002000 /* Root of a mount */
 #define STATX_ATTR_VERITY		0x00100000 /* [I] Verity protected file */
+#define STATX_ATTR_DAX			0x00002000 /* [I] File is DAX */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 7b1ec806f8f9..38ab7accb7be 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -36,7 +36,7 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
-/* Documentation/ioctl/ioctl-number.rst */
+/* Documentation/userspace-api/ioctl/ioctl-number.rst */
 #define RDMA_IOCTL_MAGIC	0x1b
 #define RDMA_VERBS_IOCTL \
 	_IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr)
diff --git a/init/Kconfig b/init/Kconfig
index 74a5ac65644f..5c7bafe8bb0b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -23,6 +23,9 @@ config LD_VERSION
 config CC_IS_CLANG
 	def_bool $(success,$(CC) --version | head -n 1 | grep -q clang)
 
+config LD_IS_LLD
+	def_bool $(success,$(LD) -v | head -n 1 | grep -q LLD)
+
 config CLANG_VERSION
 	int
 	default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index dab8b1151b56..d72beda824aa 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -28,7 +28,7 @@ static int __init no_initrd(char *str)
 
 __setup("noinitrd", no_initrd);
 
-static int __init early_initrd(char *p)
+static int __init early_initrdmem(char *p)
 {
 	phys_addr_t start;
 	unsigned long size;
@@ -43,6 +43,17 @@ static int __init early_initrd(char *p)
 	}
 	return 0;
 }
+early_param("initrdmem", early_initrdmem);
+
+/*
+ * This is here as the initrd keyword has been in use since 11/2018
+ * on ARM, PowerPC, and MIPS.
+ * It should not be; it is reserved for bootloaders.
+ */
+static int __init early_initrd(char *p)
+{
+	return early_initrdmem(p);
+}
 early_param("initrd", early_initrd);
 
 static int init_linuxrc(struct subprocess_info *info, struct cred *new)
diff --git a/init/init_task.c b/init/init_task.c
index bd403ed3e418..15303d58d9db 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
 #include <linux/mm.h>
 #include <linux/audit.h>
 #include <linux/numa.h>
+#include <linux/scs.h>
 
 #include <asm/pgtable.h>
 #include <linux/uaccess.h>
@@ -50,6 +51,13 @@ static struct sighand_struct init_sighand = {
 	.signalfd_wqh	= __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
 };
 
+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
+		__init_task_data = {
+	[(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
 /*
  * Set up the first task table, touch at your own risk!. Base=0,
  * limit=0x1fffff (=2MB)
@@ -141,6 +149,11 @@ struct task_struct init_task
 	.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
 	.rcu_tasks_idle_cpu = -1,
 #endif
+#ifdef CONFIG_TASKS_TRACE_RCU
+	.trc_reader_nesting = 0,
+	.trc_reader_special.s = 0,
+	.trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
+#endif
 #ifdef CONFIG_CPUSETS
 	.mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
 #endif
diff --git a/ipc/namespace.c b/ipc/namespace.c
index b3ca1476ca51..fdc3b5f3f53a 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -177,15 +177,14 @@ static void ipcns_put(struct ns_common *ns)
 	return put_ipc_ns(to_ipc_ns(ns));
 }
 
-static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
+static int ipcns_install(struct nsset *nsset, struct ns_common *new)
 {
+	struct nsproxy *nsproxy = nsset->nsproxy;
 	struct ipc_namespace *ns = to_ipc_ns(new);
 	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
-	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+	    !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	/* Ditch state from the old ipc namespace */
-	exit_sem(current);
 	put_ipc_ns(nsproxy->ipc_ns);
 	nsproxy->ipc_ns = get_ipc_ns(ns);
 	return 0;
diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb4130ced32..c332eb9d4841 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
 
diff --git a/kernel/audit.c b/kernel/audit.c
index 87f31bf1f0a0..8c201f414226 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -880,7 +880,7 @@ main_queue:
 	return 0;
 }
 
-int audit_send_list(void *_dest)
+int audit_send_list_thread(void *_dest)
 {
 	struct audit_netlink_list *dest = _dest;
 	struct sk_buff *skb;
@@ -924,19 +924,30 @@ out_kfree_skb:
 	return NULL;
 }
 
+static void audit_free_reply(struct audit_reply *reply)
+{
+	if (!reply)
+		return;
+
+	if (reply->skb)
+		kfree_skb(reply->skb);
+	if (reply->net)
+		put_net(reply->net);
+	kfree(reply);
+}
+
 static int audit_send_reply_thread(void *arg)
 {
 	struct audit_reply *reply = (struct audit_reply *)arg;
-	struct sock *sk = audit_get_sk(reply->net);
 
 	audit_ctl_lock();
 	audit_ctl_unlock();
 
 	/* Ignore failure. It'll only happen if the sender goes away,
 	   because our timeout is set to infinite. */
-	netlink_unicast(sk, reply->skb, reply->portid, 0);
-	put_net(reply->net);
-	kfree(reply);
+	netlink_unicast(audit_get_sk(reply->net), reply->skb, reply->portid, 0);
+	reply->skb = NULL;
+	audit_free_reply(reply);
 	return 0;
 }
 
@@ -950,35 +961,32 @@ static int audit_send_reply_thread(void *arg)
  * @payload: payload data
  * @size: payload size
  *
- * Allocates an skb, builds the netlink message, and sends it to the port id.
- * No failure notifications.
+ * Allocates a skb, builds the netlink message, and sends it to the port id.
  */
 static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done,
 			     int multi, const void *payload, int size)
 {
-	struct net *net = sock_net(NETLINK_CB(request_skb).sk);
-	struct sk_buff *skb;
 	struct task_struct *tsk;
-	struct audit_reply *reply = kmalloc(sizeof(struct audit_reply),
-					    GFP_KERNEL);
+	struct audit_reply *reply;
 
+	reply = kzalloc(sizeof(*reply), GFP_KERNEL);
 	if (!reply)
 		return;
 
-	skb = audit_make_reply(seq, type, done, multi, payload, size);
-	if (!skb)
-		goto out;
-
-	reply->net = get_net(net);
+	reply->skb = audit_make_reply(seq, type, done, multi, payload, size);
+	if (!reply->skb)
+		goto err;
+	reply->net = get_net(sock_net(NETLINK_CB(request_skb).sk));
 	reply->portid = NETLINK_CB(request_skb).portid;
-	reply->skb = skb;
 
 	tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply");
-	if (!IS_ERR(tsk))
-		return;
-	kfree_skb(skb);
-out:
-	kfree(reply);
+	if (IS_ERR(tsk))
+		goto err;
+
+	return;
+
+err:
+	audit_free_reply(reply);
 }
 
 /*
@@ -1525,20 +1533,60 @@ static void audit_receive(struct sk_buff  *skb)
 	audit_ctl_unlock();
 }
 
+/* Log information about who is connecting to the audit multicast socket */
+static void audit_log_multicast(int group, const char *op, int err)
+{
+	const struct cred *cred;
+	struct tty_struct *tty;
+	char comm[sizeof(current->comm)];
+	struct audit_buffer *ab;
+
+	if (!audit_enabled)
+		return;
+
+	ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_EVENT_LISTENER);
+	if (!ab)
+		return;
+
+	cred = current_cred();
+	tty = audit_get_tty();
+	audit_log_format(ab, "pid=%u uid=%u auid=%u tty=%s ses=%u",
+			 task_pid_nr(current),
+			 from_kuid(&init_user_ns, cred->uid),
+			 from_kuid(&init_user_ns, audit_get_loginuid(current)),
+			 tty ? tty_name(tty) : "(none)",
+			 audit_get_sessionid(current));
+	audit_put_tty(tty);
+	audit_log_task_context(ab); /* subj= */
+	audit_log_format(ab, " comm=");
+	audit_log_untrustedstring(ab, get_task_comm(comm, current));
+	audit_log_d_path_exe(ab, current->mm); /* exe= */
+	audit_log_format(ab, " nl-mcgrp=%d op=%s res=%d", group, op, !err);
+	audit_log_end(ab);
+}
+
 /* Run custom bind function on netlink socket group connect or bind requests. */
-static int audit_bind(struct net *net, int group)
+static int audit_multicast_bind(struct net *net, int group)
 {
+	int err = 0;
+
 	if (!capable(CAP_AUDIT_READ))
-		return -EPERM;
+		err = -EPERM;
+	audit_log_multicast(group, "connect", err);
+	return err;
+}
 
-	return 0;
+static void audit_multicast_unbind(struct net *net, int group)
+{
+	audit_log_multicast(group, "disconnect", 0);
 }
 
 static int __net_init audit_net_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
 		.input	= audit_receive,
-		.bind	= audit_bind,
+		.bind	= audit_multicast_bind,
+		.unbind	= audit_multicast_unbind,
 		.flags	= NL_CFG_F_NONROOT_RECV,
 		.groups	= AUDIT_NLGRP_MAX,
 	};
diff --git a/kernel/audit.h b/kernel/audit.h
index 2eed4d231624..f0233dc40b17 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -229,7 +229,7 @@ struct audit_netlink_list {
 	struct sk_buff_head q;
 };
 
-int audit_send_list(void *_dest);
+int audit_send_list_thread(void *_dest);
 
 extern int selinux_audit_rule_update(void);
 
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 026e34da4ace..a10e2997aa6c 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1161,11 +1161,8 @@ int audit_rule_change(int type, int seq, void *data, size_t datasz)
  */
 int audit_list_rules_send(struct sk_buff *request_skb, int seq)
 {
-	u32 portid = NETLINK_CB(request_skb).portid;
-	struct net *net = sock_net(NETLINK_CB(request_skb).sk);
 	struct task_struct *tsk;
 	struct audit_netlink_list *dest;
-	int err = 0;
 
 	/* We can't just spew out the rules here because we might fill
 	 * the available socket buffer space and deadlock waiting for
@@ -1173,25 +1170,26 @@ int audit_list_rules_send(struct sk_buff *request_skb, int seq)
 	 * happen if we're actually running in the context of auditctl
 	 * trying to _send_ the stuff */
 
-	dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL);
+	dest = kmalloc(sizeof(*dest), GFP_KERNEL);
 	if (!dest)
 		return -ENOMEM;
-	dest->net = get_net(net);
-	dest->portid = portid;
+	dest->net = get_net(sock_net(NETLINK_CB(request_skb).sk));
+	dest->portid = NETLINK_CB(request_skb).portid;
 	skb_queue_head_init(&dest->q);
 
 	mutex_lock(&audit_filter_mutex);
 	audit_list_rules(seq, &dest->q);
 	mutex_unlock(&audit_filter_mutex);
 
-	tsk = kthread_run(audit_send_list, dest, "audit_send_list");
+	tsk = kthread_run(audit_send_list_thread, dest, "audit_send_list");
 	if (IS_ERR(tsk)) {
 		skb_queue_purge(&dest->q);
+		put_net(dest->net);
 		kfree(dest);
-		err = PTR_ERR(tsk);
+		return PTR_ERR(tsk);
 	}
 
-	return err;
+	return 0;
 }
 
 int audit_comparator(u32 left, u32 op, u32 right)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 814406a35db1..468a23390457 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -130,6 +130,17 @@ struct audit_tree_refs {
 	struct audit_chunk *c[31];
 };
 
+struct audit_nfcfgop_tab {
+	enum audit_nfcfgop	op;
+	const char		*s;
+};
+
+static const struct audit_nfcfgop_tab audit_nfcfgs[] = {
+	{ AUDIT_XT_OP_REGISTER,		"register"	},
+	{ AUDIT_XT_OP_REPLACE,		"replace"	},
+	{ AUDIT_XT_OP_UNREGISTER,	"unregister"	},
+};
+
 static int audit_match_perm(struct audit_context *ctx, int mask)
 {
 	unsigned n;
@@ -2542,6 +2553,26 @@ void __audit_ntp_log(const struct audit_ntp_data *ad)
 	audit_log_ntp_val(ad, "adjust",	AUDIT_NTP_ADJUST);
 }
 
+void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries,
+		       enum audit_nfcfgop op)
+{
+	struct audit_buffer *ab;
+	char comm[sizeof(current->comm)];
+
+	ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_NETFILTER_CFG);
+	if (!ab)
+		return;
+	audit_log_format(ab, "table=%s family=%u entries=%u op=%s",
+			 name, af, nentries, audit_nfcfgs[op].s);
+
+	audit_log_format(ab, " pid=%u", task_pid_nr(current));
+	audit_log_task_context(ab); /* subj= */
+	audit_log_format(ab, " comm=");
+	audit_log_untrustedstring(ab, get_task_comm(comm, current));
+	audit_log_end(ab);
+}
+EXPORT_SYMBOL_GPL(__audit_log_nfcfg);
+
 static void audit_log_task(struct audit_buffer *ab)
 {
 	kuid_t auid, uid;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index af52ca658c73..9df4cc9a2907 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -82,7 +82,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 	struct bpf_prog *fp;
 
 	size = round_up(size, PAGE_SIZE);
-	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+	fp = __vmalloc(size, gfp_flags);
 	if (fp == NULL)
 		return NULL;
 
@@ -232,7 +232,7 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 	if (ret)
 		return NULL;
 
-	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+	fp = __vmalloc(size, gfp_flags);
 	if (fp == NULL) {
 		__bpf_prog_uncharge(fp_old->aux->user, delta);
 	} else {
@@ -262,10 +262,10 @@ void __bpf_prog_free(struct bpf_prog *fp)
 
 int bpf_prog_calc_tag(struct bpf_prog *fp)
 {
-	const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64);
+	const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
 	u32 raw_size = bpf_prog_tag_scratch_size(fp);
-	u32 digest[SHA_DIGEST_WORDS];
-	u32 ws[SHA_WORKSPACE_WORDS];
+	u32 digest[SHA1_DIGEST_WORDS];
+	u32 ws[SHA1_WORKSPACE_WORDS];
 	u32 i, bsize, psize, blocks;
 	struct bpf_insn *dst;
 	bool was_ld_map;
@@ -277,7 +277,7 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
 	if (!raw)
 		return -ENOMEM;
 
-	sha_init(digest);
+	sha1_init(digest);
 	memset(ws, 0, sizeof(ws));
 
 	/* We need to take out the map fd for the digest calculation
@@ -308,8 +308,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
 	memset(&raw[psize], 0, raw_size - psize);
 	raw[psize++] = 0x80;
 
-	bsize  = round_up(psize, SHA_MESSAGE_BYTES);
-	blocks = bsize / SHA_MESSAGE_BYTES;
+	bsize  = round_up(psize, SHA1_BLOCK_SIZE);
+	blocks = bsize / SHA1_BLOCK_SIZE;
 	todo   = raw;
 	if (bsize - psize >= sizeof(__be64)) {
 		bits = (__be64 *)(todo + bsize - sizeof(__be64));
@@ -320,12 +320,12 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
 	*bits = cpu_to_be64((psize - 1) << 3);
 
 	while (blocks--) {
-		sha_transform(digest, todo, ws);
-		todo += SHA_MESSAGE_BYTES;
+		sha1_transform(digest, todo, ws);
+		todo += SHA1_BLOCK_SIZE;
 	}
 
 	result = (__force __be32 *)digest;
-	for (i = 0; i < SHA_DIGEST_WORDS; i++)
+	for (i = 0; i < SHA1_DIGEST_WORDS; i++)
 		result[i] = cpu_to_be32(digest[i]);
 	memcpy(fp->tag, result, sizeof(fp->tag));
 
@@ -1089,7 +1089,7 @@ static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
 	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
 	struct bpf_prog *fp;
 
-	fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
+	fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags);
 	if (fp != NULL) {
 		/* aux->prog still points to the fp_other one, so
 		 * when promoting the clone to the real program,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e14a842d7e0d..4d530b1d5683 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -25,6 +25,7 @@
 #include <linux/nospec.h>
 #include <linux/audit.h>
 #include <uapi/linux/btf.h>
+#include <asm/pgtable.h>
 #include <linux/bpf_lsm.h>
 #include <linux/poll.h>
 #include <linux/bpf-netns.h>
@@ -287,27 +288,29 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
 	 * __GFP_RETRY_MAYFAIL to avoid such situations.
 	 */
 
-	const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
+	const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO;
+	unsigned int flags = 0;
+	unsigned long align = 1;
 	void *area;
 
 	if (size >= SIZE_MAX)
 		return NULL;
 
 	/* kmalloc()'ed memory can't be mmap()'ed */
-	if (!mmapable && size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
-		area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
+	if (mmapable) {
+		BUG_ON(!PAGE_ALIGNED(size));
+		align = SHMLBA;
+		flags = VM_USERMAP;
+	} else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+		area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY,
 				    numa_node);
 		if (area != NULL)
 			return area;
 	}
-	if (mmapable) {
-		BUG_ON(!PAGE_ALIGNED(size));
-		return vmalloc_user_node_flags(size, numa_node, GFP_KERNEL |
-					       __GFP_RETRY_MAYFAIL | flags);
-	}
-	return __vmalloc_node_flags_caller(size, numa_node,
-					   GFP_KERNEL | __GFP_RETRY_MAYFAIL |
-					   flags, __builtin_return_address(0));
+
+	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
+			gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL,
+			flags, numa_node, __builtin_return_address(0));
 }
 
 void *bpf_map_area_alloc(u64 size, int numa_node)
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
index b05f1dd58a62..812a61afd538 100644
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -95,11 +95,12 @@ static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
 	return container_of(ns, struct cgroup_namespace, ns);
 }
 
-static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int cgroupns_install(struct nsset *nsset, struct ns_common *ns)
 {
+	struct nsproxy *nsproxy = nsset->nsproxy;
 	struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
 
-	if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
+	if (!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN) ||
 	    !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
diff --git a/kernel/compat.c b/kernel/compat.c
index 843dd17e6078..b8d2800bb4b7 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -199,7 +199,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 	bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
 	nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
 
-	if (!user_access_begin(umask, bitmap_size / 8))
+	if (!user_read_access_begin(umask, bitmap_size / 8))
 		return -EFAULT;
 
 	while (nr_compat_longs > 1) {
@@ -211,11 +211,11 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 	}
 	if (nr_compat_longs)
 		unsafe_get_user(*mask, umask++, Efault);
-	user_access_end();
+	user_read_access_end();
 	return 0;
 
 Efault:
-	user_access_end();
+	user_read_access_end();
 	return -EFAULT;
 }
 
@@ -228,7 +228,7 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 	bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
 	nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
 
-	if (!user_access_begin(umask, bitmap_size / 8))
+	if (!user_write_access_begin(umask, bitmap_size / 8))
 		return -EFAULT;
 
 	while (nr_compat_longs > 1) {
@@ -239,10 +239,10 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 	}
 	if (nr_compat_longs)
 		unsafe_put_user((compat_ulong_t)*mask, umask++, Efault);
-	user_access_end();
+	user_write_access_end();
 	return 0;
 Efault:
-	user_access_end();
+	user_write_access_end();
 	return -EFAULT;
 }
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2371292f30b0..6ff2578ecf17 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -3,6 +3,7 @@
  *
  * This code is licenced under the GPL.
  */
+#include <linux/sched/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/smp.h>
 #include <linux/init.h>
@@ -432,7 +433,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
 	/*
 	 * On x86 it's required to boot all logical CPUs at least once so
 	 * that the init code can get a chance to set CR4.MCE on each
-	 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
+	 * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
 	 * core will shutdown the machine.
 	 */
 	return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
@@ -564,6 +565,21 @@ static int bringup_cpu(unsigned int cpu)
 	return bringup_wait_for_ap(cpu);
 }
 
+static int finish_cpu(unsigned int cpu)
+{
+	struct task_struct *idle = idle_thread_get(cpu);
+	struct mm_struct *mm = idle->active_mm;
+
+	/*
+	 * idle_task_exit() will have switched to &init_mm, now
+	 * clean up any remaining active_mm state.
+	 */
+	if (mm != &init_mm)
+		idle->active_mm = &init_mm;
+	mmdrop(mm);
+	return 0;
+}
+
 /*
  * Hotplug state machine related functions
  */
@@ -1327,7 +1343,7 @@ void bringup_nonboot_cpus(unsigned int setup_max_cpus)
 #ifdef CONFIG_PM_SLEEP_SMP
 static cpumask_var_t frozen_cpus;
 
-int __freeze_secondary_cpus(int primary, bool suspend)
+int freeze_secondary_cpus(int primary)
 {
 	int cpu, error = 0;
 
@@ -1352,7 +1368,7 @@ int __freeze_secondary_cpus(int primary, bool suspend)
 		if (cpu == primary)
 			continue;
 
-		if (suspend && pm_wakeup_pending()) {
+		if (pm_wakeup_pending()) {
 			pr_info("Wakeup pending. Abort CPU freeze\n");
 			error = -EBUSY;
 			break;
@@ -1376,8 +1392,8 @@ int __freeze_secondary_cpus(int primary, bool suspend)
 
 	/*
 	 * Make sure the CPUs won't be enabled by someone else. We need to do
-	 * this even in case of failure as all disable_nonboot_cpus() users are
-	 * supposed to do enable_nonboot_cpus() on the failure path.
+	 * this even in case of failure as all freeze_secondary_cpus() users are
+	 * supposed to do thaw_secondary_cpus() on the failure path.
 	 */
 	cpu_hotplug_disabled++;
 
@@ -1385,15 +1401,15 @@ int __freeze_secondary_cpus(int primary, bool suspend)
 	return error;
 }
 
-void __weak arch_enable_nonboot_cpus_begin(void)
+void __weak arch_thaw_secondary_cpus_begin(void)
 {
 }
 
-void __weak arch_enable_nonboot_cpus_end(void)
+void __weak arch_thaw_secondary_cpus_end(void)
 {
 }
 
-void enable_nonboot_cpus(void)
+void thaw_secondary_cpus(void)
 {
 	int cpu, error;
 
@@ -1405,7 +1421,7 @@ void enable_nonboot_cpus(void)
 
 	pr_info("Enabling non-boot CPUs ...\n");
 
-	arch_enable_nonboot_cpus_begin();
+	arch_thaw_secondary_cpus_begin();
 
 	for_each_cpu(cpu, frozen_cpus) {
 		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
@@ -1418,7 +1434,7 @@ void enable_nonboot_cpus(void)
 		pr_warn("Error taking CPU%d up: %d\n", cpu, error);
 	}
 
-	arch_enable_nonboot_cpus_end();
+	arch_thaw_secondary_cpus_end();
 
 	cpumask_clear(frozen_cpus);
 out:
@@ -1549,7 +1565,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 	[CPUHP_BRINGUP_CPU] = {
 		.name			= "cpu:bringup",
 		.startup.single		= bringup_cpu,
-		.teardown.single	= NULL,
+		.teardown.single	= finish_cpu,
 		.cant_stop		= true,
 	},
 	/* Final state before CPU kills itself */
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
index 9c23ae074b40..92da32275af5 100644
--- a/kernel/crash_dump.c
+++ b/kernel/crash_dump.c
@@ -6,12 +6,6 @@
 #include <linux/export.h>
 
 /*
- * If we have booted due to a crash, max_pfn will be a very low value. We need
- * to know the amount of memory that the previous kernel used.
- */
-unsigned long saved_max_pfn;
-
-/*
  * stores the physical address of elf header of crash image
  *
  * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 2b7c9b67931d..ef94e906f05a 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -67,9 +67,7 @@ static int kgdb_break_asap;
 
 struct debuggerinfo_struct kgdb_info[NR_CPUS];
 
-/**
- * kgdb_connected - Is a host GDB connected to us?
- */
+/* kgdb_connected - Is a host GDB connected to us? */
 int				kgdb_connected;
 EXPORT_SYMBOL_GPL(kgdb_connected);
 
@@ -532,6 +530,7 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
 
 	if (exception_level > 1) {
 		dump_stack();
+		kgdb_io_module_registered = false;
 		panic("Recursive entry to debugger");
 	}
 
@@ -668,6 +667,8 @@ return_normal:
 	if (kgdb_skipexception(ks->ex_vector, ks->linux_regs))
 		goto kgdb_restore;
 
+	atomic_inc(&ignore_console_lock_warning);
+
 	/* Call the I/O driver's pre_exception routine */
 	if (dbg_io_ops->pre_exception)
 		dbg_io_ops->pre_exception();
@@ -740,6 +741,8 @@ cpu_master_loop:
 	if (dbg_io_ops->post_exception)
 		dbg_io_ops->post_exception();
 
+	atomic_dec(&ignore_console_lock_warning);
+
 	if (!kgdb_single_step) {
 		raw_spin_unlock(&dbg_slave_lock);
 		/* Wait till all the CPUs have quit from the debugger. */
@@ -946,6 +949,14 @@ void kgdb_panic(const char *msg)
 	kgdb_breakpoint();
 }
 
+static void kgdb_initial_breakpoint(void)
+{
+	kgdb_break_asap = 0;
+
+	pr_crit("Waiting for connection from remote gdb...\n");
+	kgdb_breakpoint();
+}
+
 void __weak kgdb_arch_late(void)
 {
 }
@@ -956,6 +967,9 @@ void __init dbg_late_init(void)
 	if (kgdb_io_module_registered)
 		kgdb_arch_late();
 	kdb_init(KDB_INIT_FULL);
+
+	if (kgdb_io_module_registered && kgdb_break_asap)
+		kgdb_initial_breakpoint();
 }
 
 static int
@@ -1051,14 +1065,6 @@ void kgdb_schedule_breakpoint(void)
 }
 EXPORT_SYMBOL_GPL(kgdb_schedule_breakpoint);
 
-static void kgdb_initial_breakpoint(void)
-{
-	kgdb_break_asap = 0;
-
-	pr_crit("Waiting for connection from remote gdb...\n");
-	kgdb_breakpoint();
-}
-
 /**
  *	kgdb_register_io_module - register KGDB IO module
  *	@new_dbg_io_ops: the io ops vector
@@ -1067,15 +1073,22 @@ static void kgdb_initial_breakpoint(void)
  */
 int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops)
 {
+	struct kgdb_io *old_dbg_io_ops;
 	int err;
 
 	spin_lock(&kgdb_registration_lock);
 
-	if (dbg_io_ops) {
-		spin_unlock(&kgdb_registration_lock);
+	old_dbg_io_ops = dbg_io_ops;
+	if (old_dbg_io_ops) {
+		if (!old_dbg_io_ops->deinit) {
+			spin_unlock(&kgdb_registration_lock);
 
-		pr_err("Another I/O driver is already registered with KGDB\n");
-		return -EBUSY;
+			pr_err("KGDB I/O driver %s can't replace %s.\n",
+				new_dbg_io_ops->name, old_dbg_io_ops->name);
+			return -EBUSY;
+		}
+		pr_info("Replacing I/O driver %s with %s\n",
+			old_dbg_io_ops->name, new_dbg_io_ops->name);
 	}
 
 	if (new_dbg_io_ops->init) {
@@ -1090,12 +1103,18 @@ int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops)
 
 	spin_unlock(&kgdb_registration_lock);
 
+	if (old_dbg_io_ops) {
+		old_dbg_io_ops->deinit();
+		return 0;
+	}
+
 	pr_info("Registered I/O driver %s\n", new_dbg_io_ops->name);
 
 	/* Arm KGDB now. */
 	kgdb_register_callbacks();
 
-	if (kgdb_break_asap)
+	if (kgdb_break_asap &&
+	    (!dbg_is_early || IS_ENABLED(CONFIG_ARCH_HAS_EARLY_DEBUG)))
 		kgdb_initial_breakpoint();
 
 	return 0;
@@ -1125,6 +1144,9 @@ void kgdb_unregister_io_module(struct kgdb_io *old_dbg_io_ops)
 
 	spin_unlock(&kgdb_registration_lock);
 
+	if (old_dbg_io_ops->deinit)
+		old_dbg_io_ops->deinit();
+
 	pr_info("Unregistered I/O driver %s, debugger disabled\n",
 		old_dbg_io_ops->name);
 }
@@ -1165,7 +1187,8 @@ static int __init opt_kgdb_wait(char *str)
 	kgdb_break_asap = 1;
 
 	kdb_init(KDB_INIT_EARLY);
-	if (kgdb_io_module_registered)
+	if (kgdb_io_module_registered &&
+	    IS_ENABLED(CONFIG_ARCH_HAS_EARLY_DEBUG))
 		kgdb_initial_breakpoint();
 
 	return 0;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 515379cbf209..ec190569f690 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -62,7 +62,7 @@ int kdb_grep_trailing;
 /*
  * Kernel debugger state flags
  */
-int kdb_flags;
+unsigned int kdb_flags;
 
 /*
  * kdb_lock protects updates to kdb_initial_cpu.  Used to
@@ -418,8 +418,7 @@ int kdb_set(int argc, const char **argv)
 				    argv[2]);
 			return 0;
 		}
-		kdb_flags = (kdb_flags &
-			     ~(KDB_DEBUG_FLAG_MASK << KDB_DEBUG_FLAG_SHIFT))
+		kdb_flags = (kdb_flags & ~KDB_DEBUG(MASK))
 			| (debugflags << KDB_DEBUG_FLAG_SHIFT);
 
 		return 0;
@@ -1108,7 +1107,8 @@ static int handle_ctrl_cmd(char *cmd)
 	switch (*cmd) {
 	case CTRL_P:
 		if (cmdptr != cmd_tail)
-			cmdptr = (cmdptr-1) % KDB_CMD_HISTORY_COUNT;
+			cmdptr = (cmdptr + KDB_CMD_HISTORY_COUNT - 1) %
+				 KDB_CMD_HISTORY_COUNT;
 		strscpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
 		return 1;
 	case CTRL_N:
@@ -2081,7 +2081,8 @@ static int kdb_env(int argc, const char **argv)
 	}
 
 	if (KDB_DEBUG(MASK))
-		kdb_printf("KDBFLAGS=0x%x\n", kdb_flags);
+		kdb_printf("KDBDEBUG=0x%x\n",
+			(kdb_flags & KDB_DEBUG(MASK)) >> KDB_DEBUG_FLAG_SHIFT);
 
 	return 0;
 }
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index d14cbc83986a..914ff5a58dd5 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -20,23 +20,6 @@ struct page **dma_common_find_pages(void *cpu_addr)
 	return area->pages;
 }
 
-static struct vm_struct *__dma_common_pages_remap(struct page **pages,
-			size_t size, pgprot_t prot, const void *caller)
-{
-	struct vm_struct *area;
-
-	area = get_vm_area_caller(size, VM_DMA_COHERENT, caller);
-	if (!area)
-		return NULL;
-
-	if (map_vm_area(area, prot, pages)) {
-		vunmap(area->addr);
-		return NULL;
-	}
-
-	return area;
-}
-
 /*
  * Remaps an array of PAGE_SIZE pages into another vm_area.
  * Cannot be used in non-sleeping contexts
@@ -44,15 +27,12 @@ static struct vm_struct *__dma_common_pages_remap(struct page **pages,
 void *dma_common_pages_remap(struct page **pages, size_t size,
 			 pgprot_t prot, const void *caller)
 {
-	struct vm_struct *area;
+	void *vaddr;
 
-	area = __dma_common_pages_remap(pages, size, prot, caller);
-	if (!area)
-		return NULL;
-
-	area->pages = pages;
-
-	return area->addr;
+	vaddr = vmap(pages, size >> PAGE_SHIFT, VM_DMA_COHERENT, prot);
+	if (vaddr)
+		find_vm_area(vaddr)->pages = pages;
+	return vaddr;
 }
 
 /*
@@ -62,24 +42,20 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
 void *dma_common_contiguous_remap(struct page *page, size_t size,
 			pgprot_t prot, const void *caller)
 {
-	int i;
+	int count = size >> PAGE_SHIFT;
 	struct page **pages;
-	struct vm_struct *area;
+	void *vaddr;
+	int i;
 
-	pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
+	pages = kmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
 	if (!pages)
 		return NULL;
-
-	for (i = 0; i < (size >> PAGE_SHIFT); i++)
+	for (i = 0; i < count; i++)
 		pages[i] = nth_page(page, i);
-
-	area = __dma_common_pages_remap(pages, size, prot, caller);
-
+	vaddr = vmap(pages, count, VM_DMA_COHERENT, prot);
 	kfree(pages);
 
-	if (!area)
-		return NULL;
-	return area->addr;
+	return vaddr;
 }
 
 /*
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index bdb1533ada81..334d48b16c36 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -16,7 +16,7 @@
 
 struct callchain_cpus_entries {
 	struct rcu_head			rcu_head;
-	struct perf_callchain_entry	*cpu_entries[0];
+	struct perf_callchain_entry	*cpu_entries[];
 };
 
 int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 468139611e06..2e330f330303 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -95,11 +95,11 @@ static void remote_function(void *data)
  * @info:	the function call argument
  *
  * Calls the function @func when the task is currently running. This might
- * be on the current CPU, which just calls the function directly
+ * be on the current CPU, which just calls the function directly.  This will
+ * retry due to any failures in smp_call_function_single(), such as if the
+ * task_cpu() goes offline concurrently.
  *
- * returns: @func return value, or
- *	    -ESRCH  - when the process isn't running
- *	    -EAGAIN - when the process moved away
+ * returns @func return value or -ESRCH when the process isn't running
  */
 static int
 task_function_call(struct task_struct *p, remote_function_f func, void *info)
@@ -112,11 +112,16 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info)
 	};
 	int ret;
 
-	do {
-		ret = smp_call_function_single(task_cpu(p), remote_function, &data, 1);
-		if (!ret)
-			ret = data.ret;
-	} while (ret == -EAGAIN);
+	for (;;) {
+		ret = smp_call_function_single(task_cpu(p), remote_function,
+					       &data, 1);
+		ret = !ret ? data.ret : -EAGAIN;
+
+		if (ret != -EAGAIN)
+			break;
+
+		cond_resched();
+	}
 
 	return ret;
 }
@@ -9402,7 +9407,7 @@ static int perf_kprobe_event_init(struct perf_event *event)
 	if (event->attr.type != perf_kprobe.type)
 		return -ENOENT;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!perfmon_capable())
 		return -EACCES;
 
 	/*
@@ -9462,7 +9467,7 @@ static int perf_uprobe_event_init(struct perf_event *event)
 	if (event->attr.type != perf_uprobe.type)
 		return -ENOENT;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!perfmon_capable())
 		return -EACCES;
 
 	/*
@@ -11509,7 +11514,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	if (attr.namespaces) {
-		if (!capable(CAP_SYS_ADMIN))
+		if (!perfmon_capable())
 			return -EACCES;
 	}
 
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index f16f66b6b655..fcbf5616a441 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -55,7 +55,7 @@ struct perf_buffer {
 	void				*aux_priv;
 
 	struct perf_event_mmap_page	*user_page;
-	void				*data_pages[0];
+	void				*data_pages[];
 };
 
 extern void rb_free(struct perf_buffer *rb);
diff --git a/kernel/exit.c b/kernel/exit.c
index ce2a75bc0ade..c300253a7b8e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -228,8 +228,9 @@ repeat:
 		goto repeat;
 }
 
-void rcuwait_wake_up(struct rcuwait *w)
+int rcuwait_wake_up(struct rcuwait *w)
 {
+	int ret = 0;
 	struct task_struct *task;
 
 	rcu_read_lock();
@@ -237,7 +238,7 @@ void rcuwait_wake_up(struct rcuwait *w)
 	/*
 	 * Order condition vs @task, such that everything prior to the load
 	 * of @task is visible. This is the condition as to why the user called
-	 * rcuwait_trywake() in the first place. Pairs with set_current_state()
+	 * rcuwait_wake() in the first place. Pairs with set_current_state()
 	 * barrier (A) in rcuwait_wait_event().
 	 *
 	 *    WAIT                WAKE
@@ -249,8 +250,10 @@ void rcuwait_wake_up(struct rcuwait *w)
 
 	task = rcu_dereference(w->task);
 	if (task)
-		wake_up_process(task);
+		ret = wake_up_process(task);
 	rcu_read_unlock();
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(rcuwait_wake_up);
 
@@ -708,8 +711,12 @@ void __noreturn do_exit(long code)
 	struct task_struct *tsk = current;
 	int group_dead;
 
-	profile_task_exit(tsk);
-	kcov_task_exit(tsk);
+	/*
+	 * We can get here from a kernel oops, sometimes with preemption off.
+	 * Start by checking for critical errors.
+	 * Then fix up important state like USER_DS and preemption.
+	 * Then do everything else.
+	 */
 
 	WARN_ON(blk_needs_flush_plug(tsk));
 
@@ -727,6 +734,16 @@ void __noreturn do_exit(long code)
 	 */
 	set_fs(USER_DS);
 
+	if (unlikely(in_atomic())) {
+		pr_info("note: %s[%d] exited with preempt_count %d\n",
+			current->comm, task_pid_nr(current),
+			preempt_count());
+		preempt_count_set(PREEMPT_ENABLED);
+	}
+
+	profile_task_exit(tsk);
+	kcov_task_exit(tsk);
+
 	ptrace_event(PTRACE_EVENT_EXIT, code);
 
 	validate_creds_for_do_exit(tsk);
@@ -744,13 +761,6 @@ void __noreturn do_exit(long code)
 
 	exit_signals(tsk);  /* sets PF_EXITING */
 
-	if (unlikely(in_atomic())) {
-		pr_info("note: %s[%d] exited with preempt_count %d\n",
-			current->comm, task_pid_nr(current),
-			preempt_count());
-		preempt_count_set(PREEMPT_ENABLED);
-	}
-
 	/* sync mm's RSS info before statistics gathering */
 	if (tsk->mm)
 		sync_mm_rss(tsk->mm);
@@ -1558,7 +1568,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 	if (!infop)
 		return err;
 
-	if (!user_access_begin(infop, sizeof(*infop)))
+	if (!user_write_access_begin(infop, sizeof(*infop)))
 		return -EFAULT;
 
 	unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1567,10 +1577,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 	unsafe_put_user(info.pid, &infop->si_pid, Efault);
 	unsafe_put_user(info.uid, &infop->si_uid, Efault);
 	unsafe_put_user(info.status, &infop->si_status, Efault);
-	user_access_end();
+	user_write_access_end();
 	return err;
 Efault:
-	user_access_end();
+	user_write_access_end();
 	return -EFAULT;
 }
 
@@ -1685,7 +1695,7 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 	if (!infop)
 		return err;
 
-	if (!user_access_begin(infop, sizeof(*infop)))
+	if (!user_write_access_begin(infop, sizeof(*infop)))
 		return -EFAULT;
 
 	unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1694,10 +1704,10 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 	unsafe_put_user(info.pid, &infop->si_pid, Efault);
 	unsafe_put_user(info.uid, &infop->si_uid, Efault);
 	unsafe_put_user(info.status, &infop->si_status, Efault);
-	user_access_end();
+	user_write_access_end();
 	return err;
 Efault:
-	user_access_end();
+	user_write_access_end();
 	return -EFAULT;
 }
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 48ed22774efa..be98e94cb3cc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
 #include <linux/thread_info.h>
 #include <linux/stackleak.h>
 #include <linux/kasan.h>
+#include <linux/scs.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -456,6 +457,8 @@ void put_task_stack(struct task_struct *tsk)
 
 void free_task(struct task_struct *tsk)
 {
+	scs_release(tsk);
+
 #ifndef CONFIG_THREAD_INFO_IN_TASK
 	/*
 	 * The task is finally done with both the stack and thread_info,
@@ -840,6 +843,8 @@ void __init fork_init(void)
 			  NULL, free_vm_stack_cache);
 #endif
 
+	scs_init();
+
 	lockdep_init_task(&init_task);
 	uprobes_init();
 }
@@ -899,6 +904,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (err)
 		goto free_stack;
 
+	err = scs_prepare(tsk, node);
+	if (err)
+		goto free_stack;
+
 #ifdef CONFIG_SECCOMP
 	/*
 	 * We must handle setting up seccomp filters once we're under
@@ -1683,6 +1692,11 @@ static inline void rcu_copy_process(struct task_struct *p)
 	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
 	p->rcu_tasks_idle_cpu = -1;
 #endif /* #ifdef CONFIG_TASKS_RCU */
+#ifdef CONFIG_TASKS_TRACE_RCU
+	p->trc_reader_nesting = 0;
+	p->trc_reader_special.s = 0;
+	INIT_LIST_HEAD(&p->trc_holdout_list);
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
 }
 
 struct pid *pidfd_pid(const struct file *file)
diff --git a/kernel/futex.c b/kernel/futex.c
index b59532862bc0..b4b9f960b610 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -486,10 +486,13 @@ static u64 get_inode_sequence_number(struct inode *inode)
  * The key words are stored in @key on success.
  *
  * For shared mappings (when @fshared), the key is:
+ *
  *   ( inode->i_sequence, page->index, offset_within_page )
+ *
  * [ also see get_inode_sequence_number() ]
  *
  * For private mappings (or when !@fshared), the key is:
+ *
  *   ( current->mm, address, 0 )
  *
  * This allows (cross process, where applicable) identification of the futex
diff --git a/kernel/groups.c b/kernel/groups.c
index daae2f2dc6d4..6ee6691f6839 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -20,7 +20,7 @@ struct group_info *groups_alloc(int gidsetsize)
 	len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize;
 	gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY);
 	if (!gi)
-		gi = __vmalloc(len, GFP_KERNEL_ACCOUNT, PAGE_KERNEL);
+		gi = __vmalloc(len, GFP_KERNEL_ACCOUNT);
 	if (!gi)
 		return NULL;
 
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 20d501af4f2e..d63c324895ea 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -72,6 +72,7 @@ config IRQ_DOMAIN
 config IRQ_SIM
 	bool
 	select IRQ_WORK
+	select IRQ_DOMAIN
 
 # Support for hierarchical irq domains
 config IRQ_DOMAIN_HIERARCHY
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index b992f88c5613..48006608baf0 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -1,14 +1,31 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
  * Copyright (C) 2017-2018 Bartosz Golaszewski <brgl@bgdev.pl>
+ * Copyright (C) 2020 Bartosz Golaszewski <bgolaszewski@baylibre.com>
  */
 
-#include <linux/slab.h>
-#include <linux/irq_sim.h>
 #include <linux/irq.h>
+#include <linux/irq_sim.h>
+#include <linux/irq_work.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+
+struct irq_sim_work_ctx {
+	struct irq_work		work;
+	int			irq_base;
+	unsigned int		irq_count;
+	unsigned long		*pending;
+	struct irq_domain	*domain;
+};
+
+struct irq_sim_irq_ctx {
+	int			irqnum;
+	bool			enabled;
+	struct irq_sim_work_ctx	*work_ctx;
+};
 
 struct irq_sim_devres {
-	struct irq_sim		*sim;
+	struct irq_domain	*domain;
 };
 
 static void irq_sim_irqmask(struct irq_data *data)
@@ -36,159 +53,205 @@ static int irq_sim_set_type(struct irq_data *data, unsigned int type)
 	return 0;
 }
 
+static int irq_sim_get_irqchip_state(struct irq_data *data,
+				     enum irqchip_irq_state which, bool *state)
+{
+	struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data);
+	irq_hw_number_t hwirq = irqd_to_hwirq(data);
+
+	switch (which) {
+	case IRQCHIP_STATE_PENDING:
+		if (irq_ctx->enabled)
+			*state = test_bit(hwirq, irq_ctx->work_ctx->pending);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int irq_sim_set_irqchip_state(struct irq_data *data,
+				     enum irqchip_irq_state which, bool state)
+{
+	struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data);
+	irq_hw_number_t hwirq = irqd_to_hwirq(data);
+
+	switch (which) {
+	case IRQCHIP_STATE_PENDING:
+		if (irq_ctx->enabled) {
+			assign_bit(hwirq, irq_ctx->work_ctx->pending, state);
+			if (state)
+				irq_work_queue(&irq_ctx->work_ctx->work);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static struct irq_chip irq_sim_irqchip = {
-	.name		= "irq_sim",
-	.irq_mask	= irq_sim_irqmask,
-	.irq_unmask	= irq_sim_irqunmask,
-	.irq_set_type	= irq_sim_set_type,
+	.name			= "irq_sim",
+	.irq_mask		= irq_sim_irqmask,
+	.irq_unmask		= irq_sim_irqunmask,
+	.irq_set_type		= irq_sim_set_type,
+	.irq_get_irqchip_state	= irq_sim_get_irqchip_state,
+	.irq_set_irqchip_state	= irq_sim_set_irqchip_state,
 };
 
 static void irq_sim_handle_irq(struct irq_work *work)
 {
 	struct irq_sim_work_ctx *work_ctx;
 	unsigned int offset = 0;
-	struct irq_sim *sim;
 	int irqnum;
 
 	work_ctx = container_of(work, struct irq_sim_work_ctx, work);
-	sim = container_of(work_ctx, struct irq_sim, work_ctx);
 
-	while (!bitmap_empty(work_ctx->pending, sim->irq_count)) {
+	while (!bitmap_empty(work_ctx->pending, work_ctx->irq_count)) {
 		offset = find_next_bit(work_ctx->pending,
-				       sim->irq_count, offset);
+				       work_ctx->irq_count, offset);
 		clear_bit(offset, work_ctx->pending);
-		irqnum = irq_sim_irqnum(sim, offset);
+		irqnum = irq_find_mapping(work_ctx->domain, offset);
 		handle_simple_irq(irq_to_desc(irqnum));
 	}
 }
 
+static int irq_sim_domain_map(struct irq_domain *domain,
+			      unsigned int virq, irq_hw_number_t hw)
+{
+	struct irq_sim_work_ctx *work_ctx = domain->host_data;
+	struct irq_sim_irq_ctx *irq_ctx;
+
+	irq_ctx = kzalloc(sizeof(*irq_ctx), GFP_KERNEL);
+	if (!irq_ctx)
+		return -ENOMEM;
+
+	irq_set_chip(virq, &irq_sim_irqchip);
+	irq_set_chip_data(virq, irq_ctx);
+	irq_set_handler(virq, handle_simple_irq);
+	irq_modify_status(virq, IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE);
+	irq_ctx->work_ctx = work_ctx;
+
+	return 0;
+}
+
+static void irq_sim_domain_unmap(struct irq_domain *domain, unsigned int virq)
+{
+	struct irq_sim_irq_ctx *irq_ctx;
+	struct irq_data *irqd;
+
+	irqd = irq_domain_get_irq_data(domain, virq);
+	irq_ctx = irq_data_get_irq_chip_data(irqd);
+
+	irq_set_handler(virq, NULL);
+	irq_domain_reset_irq_data(irqd);
+	kfree(irq_ctx);
+}
+
+static const struct irq_domain_ops irq_sim_domain_ops = {
+	.map		= irq_sim_domain_map,
+	.unmap		= irq_sim_domain_unmap,
+};
+
 /**
- * irq_sim_init - Initialize the interrupt simulator: allocate a range of
- *                dummy interrupts.
+ * irq_domain_create_sim - Create a new interrupt simulator irq_domain and
+ *                         allocate a range of dummy interrupts.
  *
- * @sim:        The interrupt simulator object to initialize.
- * @num_irqs:   Number of interrupts to allocate
+ * @fnode:      struct fwnode_handle to be associated with this domain.
+ * @num_irqs:   Number of interrupts to allocate.
  *
- * On success: return the base of the allocated interrupt range.
- * On failure: a negative errno.
+ * On success: return a new irq_domain object.
+ * On failure: a negative errno wrapped with ERR_PTR().
  */
-int irq_sim_init(struct irq_sim *sim, unsigned int num_irqs)
+struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode,
+					 unsigned int num_irqs)
 {
-	int i;
+	struct irq_sim_work_ctx *work_ctx;
 
-	sim->irqs = kmalloc_array(num_irqs, sizeof(*sim->irqs), GFP_KERNEL);
-	if (!sim->irqs)
-		return -ENOMEM;
+	work_ctx = kmalloc(sizeof(*work_ctx), GFP_KERNEL);
+	if (!work_ctx)
+		goto err_out;
 
-	sim->irq_base = irq_alloc_descs(-1, 0, num_irqs, 0);
-	if (sim->irq_base < 0) {
-		kfree(sim->irqs);
-		return sim->irq_base;
-	}
+	work_ctx->pending = bitmap_zalloc(num_irqs, GFP_KERNEL);
+	if (!work_ctx->pending)
+		goto err_free_work_ctx;
 
-	sim->work_ctx.pending = bitmap_zalloc(num_irqs, GFP_KERNEL);
-	if (!sim->work_ctx.pending) {
-		kfree(sim->irqs);
-		irq_free_descs(sim->irq_base, num_irqs);
-		return -ENOMEM;
-	}
+	work_ctx->domain = irq_domain_create_linear(fwnode, num_irqs,
+						    &irq_sim_domain_ops,
+						    work_ctx);
+	if (!work_ctx->domain)
+		goto err_free_bitmap;
 
-	for (i = 0; i < num_irqs; i++) {
-		sim->irqs[i].irqnum = sim->irq_base + i;
-		sim->irqs[i].enabled = false;
-		irq_set_chip(sim->irq_base + i, &irq_sim_irqchip);
-		irq_set_chip_data(sim->irq_base + i, &sim->irqs[i]);
-		irq_set_handler(sim->irq_base + i, &handle_simple_irq);
-		irq_modify_status(sim->irq_base + i,
-				  IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE);
-	}
+	work_ctx->irq_count = num_irqs;
+	init_irq_work(&work_ctx->work, irq_sim_handle_irq);
 
-	init_irq_work(&sim->work_ctx.work, irq_sim_handle_irq);
-	sim->irq_count = num_irqs;
+	return work_ctx->domain;
 
-	return sim->irq_base;
+err_free_bitmap:
+	bitmap_free(work_ctx->pending);
+err_free_work_ctx:
+	kfree(work_ctx);
+err_out:
+	return ERR_PTR(-ENOMEM);
 }
-EXPORT_SYMBOL_GPL(irq_sim_init);
+EXPORT_SYMBOL_GPL(irq_domain_create_sim);
 
 /**
- * irq_sim_fini - Deinitialize the interrupt simulator: free the interrupt
- *                descriptors and allocated memory.
+ * irq_domain_remove_sim - Deinitialize the interrupt simulator domain: free
+ *                         the interrupt descriptors and allocated memory.
  *
- * @sim:        The interrupt simulator to tear down.
+ * @domain:     The interrupt simulator domain to tear down.
  */
-void irq_sim_fini(struct irq_sim *sim)
+void irq_domain_remove_sim(struct irq_domain *domain)
 {
-	irq_work_sync(&sim->work_ctx.work);
-	bitmap_free(sim->work_ctx.pending);
-	irq_free_descs(sim->irq_base, sim->irq_count);
-	kfree(sim->irqs);
+	struct irq_sim_work_ctx *work_ctx = domain->host_data;
+
+	irq_work_sync(&work_ctx->work);
+	bitmap_free(work_ctx->pending);
+	kfree(work_ctx);
+
+	irq_domain_remove(domain);
 }
-EXPORT_SYMBOL_GPL(irq_sim_fini);
+EXPORT_SYMBOL_GPL(irq_domain_remove_sim);
 
-static void devm_irq_sim_release(struct device *dev, void *res)
+static void devm_irq_domain_release_sim(struct device *dev, void *res)
 {
 	struct irq_sim_devres *this = res;
 
-	irq_sim_fini(this->sim);
+	irq_domain_remove_sim(this->domain);
 }
 
 /**
- * irq_sim_init - Initialize the interrupt simulator for a managed device.
+ * devm_irq_domain_create_sim - Create a new interrupt simulator for
+ *                              a managed device.
  *
  * @dev:        Device to initialize the simulator object for.
- * @sim:        The interrupt simulator object to initialize.
+ * @fnode:      struct fwnode_handle to be associated with this domain.
  * @num_irqs:   Number of interrupts to allocate
  *
- * On success: return the base of the allocated interrupt range.
- * On failure: a negative errno.
+ * On success: return a new irq_domain object.
+ * On failure: a negative errno wrapped with ERR_PTR().
  */
-int devm_irq_sim_init(struct device *dev, struct irq_sim *sim,
-		      unsigned int num_irqs)
+struct irq_domain *devm_irq_domain_create_sim(struct device *dev,
+					      struct fwnode_handle *fwnode,
+					      unsigned int num_irqs)
 {
 	struct irq_sim_devres *dr;
-	int rv;
 
-	dr = devres_alloc(devm_irq_sim_release, sizeof(*dr), GFP_KERNEL);
+	dr = devres_alloc(devm_irq_domain_release_sim,
+			  sizeof(*dr), GFP_KERNEL);
 	if (!dr)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	rv = irq_sim_init(sim, num_irqs);
-	if (rv < 0) {
+	dr->domain = irq_domain_create_sim(fwnode, num_irqs);
+	if (IS_ERR(dr->domain)) {
 		devres_free(dr);
-		return rv;
+		return dr->domain;
 	}
 
-	dr->sim = sim;
 	devres_add(dev, dr);
-
-	return rv;
-}
-EXPORT_SYMBOL_GPL(devm_irq_sim_init);
-
-/**
- * irq_sim_fire - Enqueue an interrupt.
- *
- * @sim:        The interrupt simulator object.
- * @offset:     Offset of the simulated interrupt which should be fired.
- */
-void irq_sim_fire(struct irq_sim *sim, unsigned int offset)
-{
-	if (sim->irqs[offset].enabled) {
-		set_bit(offset, sim->work_ctx.pending);
-		irq_work_queue(&sim->work_ctx.work);
-	}
-}
-EXPORT_SYMBOL_GPL(irq_sim_fire);
-
-/**
- * irq_sim_irqnum - Get the allocated number of a dummy interrupt.
- *
- * @sim:        The interrupt simulator object.
- * @offset:     Offset of the simulated interrupt for which to retrieve
- *              the number.
- */
-int irq_sim_irqnum(struct irq_sim *sim, unsigned int offset)
-{
-	return sim->irqs[offset].irqnum;
+	return dr->domain;
 }
-EXPORT_SYMBOL_GPL(irq_sim_irqnum);
+EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 35b8d97c3a1d..a4c2c915511d 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -132,14 +132,13 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 				    const struct irq_domain_ops *ops,
 				    void *host_data)
 {
-	struct device_node *of_node = to_of_node(fwnode);
 	struct irqchip_fwid *fwid;
 	struct irq_domain *domain;
 
 	static atomic_t unknown_domains;
 
 	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
-			      GFP_KERNEL, of_node_to_nid(of_node));
+			      GFP_KERNEL, of_node_to_nid(to_of_node(fwnode)));
 	if (!domain)
 		return NULL;
 
@@ -162,30 +161,16 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 			domain->name = fwid->name;
 			break;
 		}
-#ifdef CONFIG_ACPI
-	} else if (is_acpi_device_node(fwnode)) {
-		struct acpi_buffer buf = {
-			.length = ACPI_ALLOCATE_BUFFER,
-		};
-		acpi_handle handle;
-
-		handle = acpi_device_handle(to_acpi_device_node(fwnode));
-		if (acpi_get_name(handle, ACPI_FULL_PATHNAME, &buf) == AE_OK) {
-			domain->name = buf.pointer;
-			domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
-		}
-
-		domain->fwnode = fwnode;
-#endif
-	} else if (of_node) {
+	} else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) ||
+		   is_software_node(fwnode)) {
 		char *name;
 
 		/*
-		 * DT paths contain '/', which debugfs is legitimately
+		 * fwnode paths contain '/', which debugfs is legitimately
 		 * unhappy about. Replace them with ':', which does
 		 * the trick and is not as offensive as '\'...
 		 */
-		name = kasprintf(GFP_KERNEL, "%pOF", of_node);
+		name = kasprintf(GFP_KERNEL, "%pfw", fwnode);
 		if (!name) {
 			kfree(domain);
 			return NULL;
@@ -210,7 +195,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 		domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
 	}
 
-	of_node_get(of_node);
+	fwnode_handle_get(fwnode);
 
 	/* Fill structure */
 	INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
@@ -259,7 +244,7 @@ void irq_domain_remove(struct irq_domain *domain)
 
 	pr_debug("Removed domain %s\n", domain->name);
 
-	of_node_put(irq_domain_get_of_node(domain));
+	fwnode_handle_put(domain->fwnode);
 	if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
 		kfree(domain->name);
 	kfree(domain);
@@ -1047,6 +1032,18 @@ int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
 	return virq;
 }
 
+/**
+ * irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data
+ * @irq_data:	The pointer to irq_data
+ */
+void irq_domain_reset_irq_data(struct irq_data *irq_data)
+{
+	irq_data->hwirq = 0;
+	irq_data->chip = &no_irq_chip;
+	irq_data->chip_data = NULL;
+}
+EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data);
+
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 /**
  * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy
@@ -1248,18 +1245,6 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
 EXPORT_SYMBOL(irq_domain_set_info);
 
 /**
- * irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data
- * @irq_data:	The pointer to irq_data
- */
-void irq_domain_reset_irq_data(struct irq_data *irq_data)
-{
-	irq_data->hwirq = 0;
-	irq_data->chip = &no_irq_chip;
-	irq_data->chip_data = NULL;
-}
-EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data);
-
-/**
  * irq_domain_free_irqs_common - Clear irq_data and free the parent
  * @domain:	Interrupt domain to match
  * @virq:	IRQ number to start with
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 453a8a0f4804..761911168438 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2619,6 +2619,8 @@ int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
 
 	do {
 		chip = irq_data_get_irq_chip(data);
+		if (WARN_ON_ONCE(!chip))
+			return -ENODEV;
 		if (chip->irq_get_irqchip_state)
 			break;
 #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
@@ -2696,6 +2698,8 @@ int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
 
 	do {
 		chip = irq_data_get_irq_chip(data);
+		if (WARN_ON_ONCE(!chip))
+			return -ENODEV;
 		if (chip->irq_set_irqchip_state)
 			break;
 #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 48b5d1b6af4d..eca83965b631 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -31,7 +31,7 @@ static bool irq_work_claim(struct irq_work *work)
 {
 	int oflags;
 
-	oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags);
+	oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags);
 	/*
 	 * If the work is already pending, no need to raise the IPI.
 	 * The pairing atomic_fetch_andnot() in irq_work_run() makes sure
@@ -102,8 +102,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
 	if (cpu != smp_processor_id()) {
 		/* Arch remote IPI send/receive backend aren't NMI safe */
 		WARN_ON_ONCE(in_nmi());
-		if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
-			arch_send_call_function_single_ipi(cpu);
+		__smp_call_single_queue(cpu, &work->llnode);
 	} else {
 		__irq_work_queue_local(work);
 	}
@@ -131,6 +130,31 @@ bool irq_work_needs_cpu(void)
 	return true;
 }
 
+void irq_work_single(void *arg)
+{
+	struct irq_work *work = arg;
+	int flags;
+
+	/*
+	 * Clear the PENDING bit, after this point the @work
+	 * can be re-used.
+	 * Make it immediately visible so that other CPUs trying
+	 * to claim that work don't rely on us to handle their data
+	 * while we are in the middle of the func.
+	 */
+	flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
+
+	lockdep_irq_work_enter(work);
+	work->func(work);
+	lockdep_irq_work_exit(work);
+	/*
+	 * Clear the BUSY bit and return to the free state if
+	 * no-one else claimed it meanwhile.
+	 */
+	flags &= ~IRQ_WORK_PENDING;
+	(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
+}
+
 static void irq_work_run_list(struct llist_head *list)
 {
 	struct irq_work *work, *tmp;
@@ -142,27 +166,8 @@ static void irq_work_run_list(struct llist_head *list)
 		return;
 
 	llnode = llist_del_all(list);
-	llist_for_each_entry_safe(work, tmp, llnode, llnode) {
-		int flags;
-		/*
-		 * Clear the PENDING bit, after this point the @work
-		 * can be re-used.
-		 * Make it immediately visible so that other CPUs trying
-		 * to claim that work don't rely on us to handle their data
-		 * while we are in the middle of the func.
-		 */
-		flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
-
-		lockdep_irq_work_enter(work);
-		work->func(work);
-		lockdep_irq_work_exit(work);
-		/*
-		 * Clear the BUSY bit and return to the free state if
-		 * no-one else claimed it meanwhile.
-		 */
-		flags &= ~IRQ_WORK_PENDING;
-		(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
-	}
+	llist_for_each_entry_safe(work, tmp, llnode, llnode)
+		irq_work_single(work);
 }
 
 /*
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ffbe03a45c16..0fbdee78266b 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2179,6 +2179,24 @@ int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
 	return 0;
 }
 
+/* Remove all symbols in given area from kprobe blacklist */
+static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
+{
+	struct kprobe_blacklist_entry *ent, *n;
+
+	list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
+		if (ent->start_addr < start || ent->start_addr >= end)
+			continue;
+		list_del(&ent->list);
+		kfree(ent);
+	}
+}
+
+static void kprobe_remove_ksym_blacklist(unsigned long entry)
+{
+	kprobe_remove_area_blacklist(entry, entry + 1);
+}
+
 int __init __weak arch_populate_kprobe_blacklist(void)
 {
 	return 0;
@@ -2211,10 +2229,62 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
 	/* Symbols in __kprobes_text are blacklisted */
 	ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
 					(unsigned long)__kprobes_text_end);
+	if (ret)
+		return ret;
+
+	/* Symbols in noinstr section are blacklisted */
+	ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start,
+					(unsigned long)__noinstr_text_end);
 
 	return ret ? : arch_populate_kprobe_blacklist();
 }
 
+static void add_module_kprobe_blacklist(struct module *mod)
+{
+	unsigned long start, end;
+	int i;
+
+	if (mod->kprobe_blacklist) {
+		for (i = 0; i < mod->num_kprobe_blacklist; i++)
+			kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]);
+	}
+
+	start = (unsigned long)mod->kprobes_text_start;
+	if (start) {
+		end = start + mod->kprobes_text_size;
+		kprobe_add_area_blacklist(start, end);
+	}
+
+	start = (unsigned long)mod->noinstr_text_start;
+	if (start) {
+		end = start + mod->noinstr_text_size;
+		kprobe_add_area_blacklist(start, end);
+	}
+}
+
+static void remove_module_kprobe_blacklist(struct module *mod)
+{
+	unsigned long start, end;
+	int i;
+
+	if (mod->kprobe_blacklist) {
+		for (i = 0; i < mod->num_kprobe_blacklist; i++)
+			kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]);
+	}
+
+	start = (unsigned long)mod->kprobes_text_start;
+	if (start) {
+		end = start + mod->kprobes_text_size;
+		kprobe_remove_area_blacklist(start, end);
+	}
+
+	start = (unsigned long)mod->noinstr_text_start;
+	if (start) {
+		end = start + mod->noinstr_text_size;
+		kprobe_remove_area_blacklist(start, end);
+	}
+}
+
 /* Module notifier call back, checking kprobes on the module */
 static int kprobes_module_callback(struct notifier_block *nb,
 				   unsigned long val, void *data)
@@ -2225,6 +2295,11 @@ static int kprobes_module_callback(struct notifier_block *nb,
 	unsigned int i;
 	int checkcore = (val == MODULE_STATE_GOING);
 
+	if (val == MODULE_STATE_COMING) {
+		mutex_lock(&kprobe_mutex);
+		add_module_kprobe_blacklist(mod);
+		mutex_unlock(&kprobe_mutex);
+	}
 	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
 		return NOTIFY_DONE;
 
@@ -2255,6 +2330,8 @@ static int kprobes_module_callback(struct notifier_block *nb,
 				kill_kprobe(p);
 			}
 	}
+	if (val == MODULE_STATE_GOING)
+		remove_module_kprobe_blacklist(mod);
 	mutex_unlock(&kprobe_mutex);
 	return NOTIFY_DONE;
 }
@@ -2420,6 +2497,7 @@ static const struct file_operations debugfs_kprobes_operations = {
 /* kprobes/blacklist -- shows which functions can not be probed */
 static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
 {
+	mutex_lock(&kprobe_mutex);
 	return seq_list_start(&kprobe_blacklist, *pos);
 }
 
@@ -2446,10 +2524,15 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v)
+{
+	mutex_unlock(&kprobe_mutex);
+}
+
 static const struct seq_operations kprobe_blacklist_seq_ops = {
 	.start = kprobe_blacklist_seq_start,
 	.next  = kprobe_blacklist_seq_next,
-	.stop  = kprobe_seq_stop,	/* Reuse void function */
+	.stop  = kprobe_blacklist_seq_stop,
 	.show  = kprobe_blacklist_seq_show,
 };
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index ac10db66cc63..4c057dd8e93b 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -393,25 +393,6 @@ void lockdep_init_task(struct task_struct *task)
 	task->lockdep_recursion = 0;
 }
 
-/*
- * Split the recrursion counter in two to readily detect 'off' vs recursion.
- */
-#define LOCKDEP_RECURSION_BITS	16
-#define LOCKDEP_OFF		(1U << LOCKDEP_RECURSION_BITS)
-#define LOCKDEP_RECURSION_MASK	(LOCKDEP_OFF - 1)
-
-void lockdep_off(void)
-{
-	current->lockdep_recursion += LOCKDEP_OFF;
-}
-EXPORT_SYMBOL(lockdep_off);
-
-void lockdep_on(void)
-{
-	current->lockdep_recursion -= LOCKDEP_OFF;
-}
-EXPORT_SYMBOL(lockdep_on);
-
 static inline void lockdep_recursion_finish(void)
 {
 	if (WARN_ON_ONCE(--current->lockdep_recursion))
@@ -489,7 +470,7 @@ struct lock_trace {
 	struct hlist_node	hash_entry;
 	u32			hash;
 	u32			nr_entries;
-	unsigned long		entries[0] __aligned(sizeof(unsigned long));
+	unsigned long		entries[] __aligned(sizeof(unsigned long));
 };
 #define LOCK_TRACE_SIZE_IN_LONGS				\
 	(sizeof(struct lock_trace) / sizeof(unsigned long))
@@ -3635,13 +3616,10 @@ mark_held_locks(struct task_struct *curr, enum lock_usage_bit base_bit)
 /*
  * Hardirqs will be enabled:
  */
-static void __trace_hardirqs_on_caller(unsigned long ip)
+static void __trace_hardirqs_on_caller(void)
 {
 	struct task_struct *curr = current;
 
-	/* we'll do an OFF -> ON transition: */
-	curr->hardirqs_enabled = 1;
-
 	/*
 	 * We are going to turn hardirqs on, so set the
 	 * usage bit for all held locks:
@@ -3654,15 +3632,19 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
 	 * this bit from being set before)
 	 */
 	if (curr->softirqs_enabled)
-		if (!mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ))
-			return;
-
-	curr->hardirq_enable_ip = ip;
-	curr->hardirq_enable_event = ++curr->irq_events;
-	debug_atomic_inc(hardirqs_on_events);
+		mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ);
 }
 
-void lockdep_hardirqs_on(unsigned long ip)
+/**
+ * lockdep_hardirqs_on_prepare - Prepare for enabling interrupts
+ * @ip:		Caller address
+ *
+ * Invoked before a possible transition to RCU idle from exit to user or
+ * guest mode. This ensures that all RCU operations are done before RCU
+ * stops watching. After the RCU transition lockdep_hardirqs_on() has to be
+ * invoked to set the final state.
+ */
+void lockdep_hardirqs_on_prepare(unsigned long ip)
 {
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
@@ -3698,20 +3680,62 @@ void lockdep_hardirqs_on(unsigned long ip)
 	if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
 		return;
 
+	current->hardirq_chain_key = current->curr_chain_key;
+
 	current->lockdep_recursion++;
-	__trace_hardirqs_on_caller(ip);
+	__trace_hardirqs_on_caller();
 	lockdep_recursion_finish();
 }
-NOKPROBE_SYMBOL(lockdep_hardirqs_on);
+EXPORT_SYMBOL_GPL(lockdep_hardirqs_on_prepare);
+
+void noinstr lockdep_hardirqs_on(unsigned long ip)
+{
+	struct task_struct *curr = current;
+
+	if (unlikely(!debug_locks || curr->lockdep_recursion))
+		return;
+
+	if (curr->hardirqs_enabled) {
+		/*
+		 * Neither irq nor preemption are disabled here
+		 * so this is racy by nature but losing one hit
+		 * in a stat is not a big deal.
+		 */
+		__debug_atomic_inc(redundant_hardirqs_on);
+		return;
+	}
+
+	/*
+	 * We're enabling irqs and according to our state above irqs weren't
+	 * already enabled, yet we find the hardware thinks they are in fact
+	 * enabled.. someone messed up their IRQ state tracing.
+	 */
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+		return;
+
+	/*
+	 * Ensure the lock stack remained unchanged between
+	 * lockdep_hardirqs_on_prepare() and lockdep_hardirqs_on().
+	 */
+	DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
+			    current->curr_chain_key);
+
+	/* we'll do an OFF -> ON transition: */
+	curr->hardirqs_enabled = 1;
+	curr->hardirq_enable_ip = ip;
+	curr->hardirq_enable_event = ++curr->irq_events;
+	debug_atomic_inc(hardirqs_on_events);
+}
+EXPORT_SYMBOL_GPL(lockdep_hardirqs_on);
 
 /*
  * Hardirqs were disabled:
  */
-void lockdep_hardirqs_off(unsigned long ip)
+void noinstr lockdep_hardirqs_off(unsigned long ip)
 {
 	struct task_struct *curr = current;
 
-	if (unlikely(!debug_locks || current->lockdep_recursion))
+	if (unlikely(!debug_locks || curr->lockdep_recursion))
 		return;
 
 	/*
@@ -3729,10 +3753,11 @@ void lockdep_hardirqs_off(unsigned long ip)
 		curr->hardirq_disable_ip = ip;
 		curr->hardirq_disable_event = ++curr->irq_events;
 		debug_atomic_inc(hardirqs_off_events);
-	} else
+	} else {
 		debug_atomic_inc(redundant_hardirqs_off);
+	}
 }
-NOKPROBE_SYMBOL(lockdep_hardirqs_off);
+EXPORT_SYMBOL_GPL(lockdep_hardirqs_off);
 
 /*
  * Softirqs will be enabled:
@@ -4408,8 +4433,8 @@ static void print_unlock_imbalance_bug(struct task_struct *curr,
 	dump_stack();
 }
 
-static int match_held_lock(const struct held_lock *hlock,
-					const struct lockdep_map *lock)
+static noinstr int match_held_lock(const struct held_lock *hlock,
+				   const struct lockdep_map *lock)
 {
 	if (hlock->instance == lock)
 		return 1;
@@ -4696,7 +4721,7 @@ __lock_release(struct lockdep_map *lock, unsigned long ip)
 	return 0;
 }
 
-static nokprobe_inline
+static __always_inline
 int __lock_is_held(const struct lockdep_map *lock, int read)
 {
 	struct task_struct *curr = current;
@@ -4956,7 +4981,7 @@ void lock_release(struct lockdep_map *lock, unsigned long ip)
 }
 EXPORT_SYMBOL_GPL(lock_release);
 
-int lock_is_held_type(const struct lockdep_map *lock, int read)
+noinstr int lock_is_held_type(const struct lockdep_map *lock, int read)
 {
 	unsigned long flags;
 	int ret = 0;
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index c9f090d64f00..cfdd5b93264d 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -141,7 +141,6 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  * set up.
  */
 #ifndef CONFIG_DEBUG_RT_MUTEXES
-# define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
 # define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
 # define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
 
@@ -202,7 +201,6 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
 }
 
 #else
-# define rt_mutex_cmpxchg_relaxed(l,c,n)	(0)
 # define rt_mutex_cmpxchg_acquire(l,c,n)	(0)
 # define rt_mutex_cmpxchg_release(l,c,n)	(0)
 
diff --git a/kernel/module.c b/kernel/module.c
index 8833e848b73c..be5413903d20 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2403,7 +2403,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
 			if ((s->sh_flags & masks[m][0]) != masks[m][0]
 			    || (s->sh_flags & masks[m][1])
 			    || s->sh_entsize != ~0UL
-			    || strstarts(sname, ".init"))
+			    || module_init_section(sname))
 				continue;
 			s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i);
 			pr_debug("\t%s\n", sname);
@@ -2436,7 +2436,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
 			if ((s->sh_flags & masks[m][0]) != masks[m][0]
 			    || (s->sh_flags & masks[m][1])
 			    || s->sh_entsize != ~0UL
-			    || !strstarts(sname, ".init"))
+			    || !module_init_section(sname))
 				continue;
 			s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i)
 					 | INIT_OFFSET_MASK);
@@ -2771,6 +2771,11 @@ void * __weak module_alloc(unsigned long size)
 	return vmalloc_exec(size);
 }
 
+bool __weak module_init_section(const char *name)
+{
+	return strstarts(name, ".init");
+}
+
 bool __weak module_exit_section(const char *name)
 {
 	return strstarts(name, ".exit");
@@ -2949,8 +2954,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
 		return err;
 
 	/* Suck in entire file: we'll want most of it. */
-	info->hdr = __vmalloc(info->len,
-			GFP_KERNEL | __GFP_NOWARN, PAGE_KERNEL);
+	info->hdr = __vmalloc(info->len, GFP_KERNEL | __GFP_NOWARN);
 	if (!info->hdr)
 		return -ENOMEM;
 
@@ -3153,6 +3157,9 @@ static int find_module_sections(struct module *mod, struct load_info *info)
 	}
 #endif
 
+	mod->noinstr_text_start = section_objs(info, ".noinstr.text", 1,
+						&mod->noinstr_text_size);
+
 #ifdef CONFIG_TRACEPOINTS
 	mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
 					     sizeof(*mod->tracepoints_ptrs),
@@ -3197,6 +3204,13 @@ static int find_module_sections(struct module *mod, struct load_info *info)
 					    sizeof(*mod->ei_funcs),
 					    &mod->num_ei_funcs);
 #endif
+#ifdef CONFIG_KPROBES
+	mod->kprobes_text_start = section_objs(info, ".kprobes.text", 1,
+						&mod->kprobes_text_size);
+	mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist",
+						sizeof(unsigned long),
+						&mod->num_kprobe_blacklist);
+#endif
 	mod->extable = section_objs(info, "__ex_table",
 				    sizeof(*mod->extable), &mod->num_exentries);
 
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 5989bbb93039..84c987dfbe03 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -519,7 +519,6 @@ NOKPROBE_SYMBOL(notify_die);
 
 int register_die_notifier(struct notifier_block *nb)
 {
-	vmalloc_sync_mappings();
 	return atomic_notifier_chain_register(&die_chain, nb);
 }
 EXPORT_SYMBOL_GPL(register_die_notifier);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index ed9882108cd2..b03df67621d0 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -19,6 +19,8 @@
 #include <net/net_namespace.h>
 #include <linux/ipc_namespace.h>
 #include <linux/time_namespace.h>
+#include <linux/fs_struct.h>
+#include <linux/proc_fs.h>
 #include <linux/proc_ns.h>
 #include <linux/file.h>
 #include <linux/syscalls.h>
@@ -257,37 +259,296 @@ void exit_task_namespaces(struct task_struct *p)
 	switch_task_namespaces(p, NULL);
 }
 
-SYSCALL_DEFINE2(setns, int, fd, int, nstype)
+static int check_setns_flags(unsigned long flags)
 {
-	struct task_struct *tsk = current;
-	struct nsproxy *new_nsproxy;
-	struct file *file;
-	struct ns_common *ns;
-	int err;
+	if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+				 CLONE_NEWNET | CLONE_NEWUSER | CLONE_NEWPID |
+				 CLONE_NEWCGROUP)))
+		return -EINVAL;
 
-	file = proc_ns_fget(fd);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
+#ifndef CONFIG_USER_NS
+	if (flags & CLONE_NEWUSER)
+		return -EINVAL;
+#endif
+#ifndef CONFIG_PID_NS
+	if (flags & CLONE_NEWPID)
+		return -EINVAL;
+#endif
+#ifndef CONFIG_UTS_NS
+	if (flags & CLONE_NEWUTS)
+		return -EINVAL;
+#endif
+#ifndef CONFIG_IPC_NS
+	if (flags & CLONE_NEWIPC)
+		return -EINVAL;
+#endif
+#ifndef CONFIG_CGROUPS
+	if (flags & CLONE_NEWCGROUP)
+		return -EINVAL;
+#endif
+#ifndef CONFIG_NET_NS
+	if (flags & CLONE_NEWNET)
+		return -EINVAL;
+#endif
 
-	err = -EINVAL;
-	ns = get_proc_ns(file_inode(file));
-	if (nstype && (ns->ops->type != nstype))
-		goto out;
+	return 0;
+}
+
+static void put_nsset(struct nsset *nsset)
+{
+	unsigned flags = nsset->flags;
+
+	if (flags & CLONE_NEWUSER)
+		put_cred(nsset_cred(nsset));
+	/*
+	 * We only created a temporary copy if we attached to more than just
+	 * the mount namespace.
+	 */
+	if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
+		free_fs_struct(nsset->fs);
+	if (nsset->nsproxy)
+		free_nsproxy(nsset->nsproxy);
+}
+
+static int prepare_nsset(unsigned flags, struct nsset *nsset)
+{
+	struct task_struct *me = current;
+
+	nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs);
+	if (IS_ERR(nsset->nsproxy))
+		return PTR_ERR(nsset->nsproxy);
 
-	new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
-	if (IS_ERR(new_nsproxy)) {
-		err = PTR_ERR(new_nsproxy);
+	if (flags & CLONE_NEWUSER)
+		nsset->cred = prepare_creds();
+	else
+		nsset->cred = current_cred();
+	if (!nsset->cred)
 		goto out;
+
+	/* Only create a temporary copy of fs_struct if we really need to. */
+	if (flags == CLONE_NEWNS) {
+		nsset->fs = me->fs;
+	} else if (flags & CLONE_NEWNS) {
+		nsset->fs = copy_fs_struct(me->fs);
+		if (!nsset->fs)
+			goto out;
 	}
 
-	err = ns->ops->install(new_nsproxy, ns);
-	if (err) {
-		free_nsproxy(new_nsproxy);
-		goto out;
+	nsset->flags = flags;
+	return 0;
+
+out:
+	put_nsset(nsset);
+	return -ENOMEM;
+}
+
+static inline int validate_ns(struct nsset *nsset, struct ns_common *ns)
+{
+	return ns->ops->install(nsset, ns);
+}
+
+/*
+ * This is the inverse operation to unshare().
+ * Ordering is equivalent to the standard ordering used everywhere else
+ * during unshare and process creation. The switch to the new set of
+ * namespaces occurs at the point of no return after installation of
+ * all requested namespaces was successful in commit_nsset().
+ */
+static int validate_nsset(struct nsset *nsset, struct pid *pid)
+{
+	int ret = 0;
+	unsigned flags = nsset->flags;
+	struct user_namespace *user_ns = NULL;
+	struct pid_namespace *pid_ns = NULL;
+	struct nsproxy *nsp;
+	struct task_struct *tsk;
+
+	/* Take a "snapshot" of the target task's namespaces. */
+	rcu_read_lock();
+	tsk = pid_task(pid, PIDTYPE_PID);
+	if (!tsk) {
+		rcu_read_unlock();
+		return -ESRCH;
+	}
+
+	if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) {
+		rcu_read_unlock();
+		return -EPERM;
+	}
+
+	task_lock(tsk);
+	nsp = tsk->nsproxy;
+	if (nsp)
+		get_nsproxy(nsp);
+	task_unlock(tsk);
+	if (!nsp) {
+		rcu_read_unlock();
+		return -ESRCH;
+	}
+
+#ifdef CONFIG_PID_NS
+	if (flags & CLONE_NEWPID) {
+		pid_ns = task_active_pid_ns(tsk);
+		if (unlikely(!pid_ns)) {
+			rcu_read_unlock();
+			ret = -ESRCH;
+			goto out;
+		}
+		get_pid_ns(pid_ns);
+	}
+#endif
+
+#ifdef CONFIG_USER_NS
+	if (flags & CLONE_NEWUSER)
+		user_ns = get_user_ns(__task_cred(tsk)->user_ns);
+#endif
+	rcu_read_unlock();
+
+	/*
+	 * Install requested namespaces. The caller will have
+	 * verified earlier that the requested namespaces are
+	 * supported on this kernel. We don't report errors here
+	 * if a namespace is requested that isn't supported.
+	 */
+#ifdef CONFIG_USER_NS
+	if (flags & CLONE_NEWUSER) {
+		ret = validate_ns(nsset, &user_ns->ns);
+		if (ret)
+			goto out;
+	}
+#endif
+
+	if (flags & CLONE_NEWNS) {
+		ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns));
+		if (ret)
+			goto out;
+	}
+
+#ifdef CONFIG_UTS_NS
+	if (flags & CLONE_NEWUTS) {
+		ret = validate_ns(nsset, &nsp->uts_ns->ns);
+		if (ret)
+			goto out;
+	}
+#endif
+
+#ifdef CONFIG_IPC_NS
+	if (flags & CLONE_NEWIPC) {
+		ret = validate_ns(nsset, &nsp->ipc_ns->ns);
+		if (ret)
+			goto out;
+	}
+#endif
+
+#ifdef CONFIG_PID_NS
+	if (flags & CLONE_NEWPID) {
+		ret = validate_ns(nsset, &pid_ns->ns);
+		if (ret)
+			goto out;
+	}
+#endif
+
+#ifdef CONFIG_CGROUPS
+	if (flags & CLONE_NEWCGROUP) {
+		ret = validate_ns(nsset, &nsp->cgroup_ns->ns);
+		if (ret)
+			goto out;
+	}
+#endif
+
+#ifdef CONFIG_NET_NS
+	if (flags & CLONE_NEWNET) {
+		ret = validate_ns(nsset, &nsp->net_ns->ns);
+		if (ret)
+			goto out;
+	}
+#endif
+
+out:
+	if (pid_ns)
+		put_pid_ns(pid_ns);
+	if (nsp)
+		put_nsproxy(nsp);
+	put_user_ns(user_ns);
+
+	return ret;
+}
+
+/*
+ * This is the point of no return. There are just a few namespaces
+ * that do some actual work here and it's sufficiently minimal that
+ * a separate ns_common operation seems unnecessary for now.
+ * Unshare is doing the same thing. If we'll end up needing to do
+ * more in a given namespace or a helper here is ultimately not
+ * exported anymore a simple commit handler for each namespace
+ * should be added to ns_common.
+ */
+static void commit_nsset(struct nsset *nsset)
+{
+	unsigned flags = nsset->flags;
+	struct task_struct *me = current;
+
+#ifdef CONFIG_USER_NS
+	if (flags & CLONE_NEWUSER) {
+		/* transfer ownership */
+		commit_creds(nsset_cred(nsset));
+		nsset->cred = NULL;
+	}
+#endif
+
+	/* We only need to commit if we have used a temporary fs_struct. */
+	if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) {
+		set_fs_root(me->fs, &nsset->fs->root);
+		set_fs_pwd(me->fs, &nsset->fs->pwd);
 	}
-	switch_task_namespaces(tsk, new_nsproxy);
 
-	perf_event_namespaces(tsk);
+#ifdef CONFIG_IPC_NS
+	if (flags & CLONE_NEWIPC)
+		exit_sem(me);
+#endif
+
+	/* transfer ownership */
+	switch_task_namespaces(me, nsset->nsproxy);
+	nsset->nsproxy = NULL;
+}
+
+SYSCALL_DEFINE2(setns, int, fd, int, flags)
+{
+	struct file *file;
+	struct ns_common *ns = NULL;
+	struct nsset nsset = {};
+	int err = 0;
+
+	file = fget(fd);
+	if (!file)
+		return -EBADF;
+
+	if (proc_ns_file(file)) {
+		ns = get_proc_ns(file_inode(file));
+		if (flags && (ns->ops->type != flags))
+			err = -EINVAL;
+		flags = ns->ops->type;
+	} else if (!IS_ERR(pidfd_pid(file))) {
+		err = check_setns_flags(flags);
+	} else {
+		err = -EBADF;
+	}
+	if (err)
+		goto out;
+
+	err = prepare_nsset(flags, &nsset);
+	if (err)
+		goto out;
+
+	if (proc_ns_file(file))
+		err = validate_ns(&nsset, ns);
+	else
+		err = validate_nsset(&nsset, file->private_data);
+	if (!err) {
+		commit_nsset(&nsset);
+		perf_event_namespaces(current);
+	}
+	put_nsset(&nsset);
 out:
 	fput(file);
 	return err;
diff --git a/kernel/padata.c b/kernel/padata.c
index a6afa12fb75e..aae789896616 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -703,7 +703,7 @@ static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
 	struct padata_instance *pinst;
 	int ret;
 
-	pinst = hlist_entry_safe(node, struct padata_instance, node);
+	pinst = hlist_entry_safe(node, struct padata_instance, cpu_online_node);
 	if (!pinst_has_cpu(pinst, cpu))
 		return 0;
 
@@ -718,7 +718,7 @@ static int padata_cpu_dead(unsigned int cpu, struct hlist_node *node)
 	struct padata_instance *pinst;
 	int ret;
 
-	pinst = hlist_entry_safe(node, struct padata_instance, node);
+	pinst = hlist_entry_safe(node, struct padata_instance, cpu_dead_node);
 	if (!pinst_has_cpu(pinst, cpu))
 		return 0;
 
@@ -734,8 +734,9 @@ static enum cpuhp_state hp_online;
 static void __padata_free(struct padata_instance *pinst)
 {
 #ifdef CONFIG_HOTPLUG_CPU
-	cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD, &pinst->node);
-	cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
+	cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD,
+					    &pinst->cpu_dead_node);
+	cpuhp_state_remove_instance_nocalls(hp_online, &pinst->cpu_online_node);
 #endif
 
 	WARN_ON(!list_empty(&pinst->pslist));
@@ -939,9 +940,10 @@ static struct padata_instance *padata_alloc(const char *name,
 	mutex_init(&pinst->lock);
 
 #ifdef CONFIG_HOTPLUG_CPU
-	cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
+	cpuhp_state_add_instance_nocalls_cpuslocked(hp_online,
+						    &pinst->cpu_online_node);
 	cpuhp_state_add_instance_nocalls_cpuslocked(CPUHP_PADATA_DEAD,
-						    &pinst->node);
+						    &pinst->cpu_dead_node);
 #endif
 
 	put_online_cpus();
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 3ccaba5f15c0..0e5ac162c3a8 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -378,13 +378,14 @@ static void pidns_put(struct ns_common *ns)
 	put_pid_ns(to_pid_ns(ns));
 }
 
-static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int pidns_install(struct nsset *nsset, struct ns_common *ns)
 {
+	struct nsproxy *nsproxy = nsset->nsproxy;
 	struct pid_namespace *active = task_active_pid_ns(current);
 	struct pid_namespace *ancestor, *new = to_pid_ns(ns);
 
 	if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
-	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+	    !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/*
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index c208566c844b..4d0e6e815a2b 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -80,6 +80,18 @@ config HIBERNATION
 
 	  For more information take a look at <file:Documentation/power/swsusp.rst>.
 
+config HIBERNATION_SNAPSHOT_DEV
+	bool "Userspace snapshot device"
+	depends on HIBERNATION
+	default y
+	---help---
+	  Device used by the uswsusp tools.
+
+	  Say N if no snapshotting from userspace is needed, this also
+	  reduces the attack surface of the kernel.
+
+	  If in doubt, say Y.
+
 config PM_STD_PARTITION
 	string "Default resume partition"
 	depends on HIBERNATION
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index e7e47d9be1e5..5899260a8bef 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -10,7 +10,8 @@ obj-$(CONFIG_VT_CONSOLE_SLEEP)	+= console.o
 obj-$(CONFIG_FREEZER)		+= process.o
 obj-$(CONFIG_SUSPEND)		+= suspend.o
 obj-$(CONFIG_PM_TEST_SUSPEND)	+= suspend_test.o
-obj-$(CONFIG_HIBERNATION)	+= hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION)	+= hibernate.o snapshot.o swap.o
+obj-$(CONFIG_HIBERNATION_SNAPSHOT_DEV) += user.o
 obj-$(CONFIG_PM_AUTOSLEEP)	+= autosleep.o
 obj-$(CONFIG_PM_WAKELOCKS)	+= wakelock.o
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 30bd28d1d418..02ec716a4927 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -67,6 +67,18 @@ bool freezer_test_done;
 
 static const struct platform_hibernation_ops *hibernation_ops;
 
+static atomic_t hibernate_atomic = ATOMIC_INIT(1);
+
+bool hibernate_acquire(void)
+{
+	return atomic_add_unless(&hibernate_atomic, -1, 0);
+}
+
+void hibernate_release(void)
+{
+	atomic_inc(&hibernate_atomic);
+}
+
 bool hibernation_available(void)
 {
 	return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION);
@@ -704,7 +716,7 @@ int hibernate(void)
 
 	lock_system_sleep();
 	/* The snapshot device should not be opened while we're running */
-	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+	if (!hibernate_acquire()) {
 		error = -EBUSY;
 		goto Unlock;
 	}
@@ -775,7 +787,7 @@ int hibernate(void)
  Exit:
 	__pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL);
 	pm_restore_console();
-	atomic_inc(&snapshot_device_available);
+	hibernate_release();
  Unlock:
 	unlock_system_sleep();
 	pr_info("hibernation exit\n");
@@ -880,7 +892,7 @@ static int software_resume(void)
 		goto Unlock;
 
 	/* The snapshot device should not be opened while we're running */
-	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+	if (!hibernate_acquire()) {
 		error = -EBUSY;
 		swsusp_close(FMODE_READ);
 		goto Unlock;
@@ -911,7 +923,7 @@ static int software_resume(void)
 	__pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
 	pm_restore_console();
 	pr_info("resume failed (%d)\n", error);
-	atomic_inc(&snapshot_device_available);
+	hibernate_release();
 	/* For success case, the suspend path will release the lock */
  Unlock:
 	mutex_unlock(&system_transition_mutex);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 7cdc64dc2373..ba2094db6294 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -154,8 +154,8 @@ extern int snapshot_write_next(struct snapshot_handle *handle);
 extern void snapshot_write_finalize(struct snapshot_handle *handle);
 extern int snapshot_image_loaded(struct snapshot_handle *handle);
 
-/* If unset, the snapshot device cannot be open. */
-extern atomic_t snapshot_device_available;
+extern bool hibernate_acquire(void);
+extern void hibernate_release(void);
 
 extern sector_t alloc_swapdev_block(int swap);
 extern void free_all_swap_pages(int swap);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 7959449765d9..d5eedc2baa2a 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -35,9 +35,13 @@ static struct snapshot_data {
 	bool ready;
 	bool platform_support;
 	bool free_bitmaps;
+	struct inode *bd_inode;
 } snapshot_state;
 
-atomic_t snapshot_device_available = ATOMIC_INIT(1);
+int is_hibernate_resume_dev(const struct inode *bd_inode)
+{
+	return hibernation_available() && snapshot_state.bd_inode == bd_inode;
+}
 
 static int snapshot_open(struct inode *inode, struct file *filp)
 {
@@ -49,13 +53,13 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 
 	lock_system_sleep();
 
-	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+	if (!hibernate_acquire()) {
 		error = -EBUSY;
 		goto Unlock;
 	}
 
 	if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
-		atomic_inc(&snapshot_device_available);
+		hibernate_release();
 		error = -ENOSYS;
 		goto Unlock;
 	}
@@ -92,11 +96,12 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 			__pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
 	}
 	if (error)
-		atomic_inc(&snapshot_device_available);
+		hibernate_release();
 
 	data->frozen = false;
 	data->ready = false;
 	data->platform_support = false;
+	data->bd_inode = NULL;
 
  Unlock:
 	unlock_system_sleep();
@@ -112,6 +117,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
 
 	swsusp_free();
 	data = filp->private_data;
+	data->bd_inode = NULL;
 	free_all_swap_pages(data->swap);
 	if (data->frozen) {
 		pm_restore_gfp_mask();
@@ -122,7 +128,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
 	}
 	pm_notifier_call_chain(data->mode == O_RDONLY ?
 			PM_POST_HIBERNATION : PM_POST_RESTORE);
-	atomic_inc(&snapshot_device_available);
+	hibernate_release();
 
 	unlock_system_sleep();
 
@@ -204,6 +210,7 @@ struct compat_resume_swap_area {
 static int snapshot_set_swap_area(struct snapshot_data *data,
 		void __user *argp)
 {
+	struct block_device *bdev;
 	sector_t offset;
 	dev_t swdev;
 
@@ -234,9 +241,12 @@ static int snapshot_set_swap_area(struct snapshot_data *data,
 		data->swap = -1;
 		return -EINVAL;
 	}
-	data->swap = swap_type_of(swdev, offset, NULL);
+	data->swap = swap_type_of(swdev, offset, &bdev);
 	if (data->swap < 0)
 		return -ENODEV;
+
+	data->bd_inode = bdev->bd_inode;
+	bdput(bdev);
 	return 0;
 }
 
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
index 11f19c466af5..3ca74ad391d6 100644
--- a/kernel/printk/console_cmdline.h
+++ b/kernel/printk/console_cmdline.h
@@ -6,6 +6,7 @@ struct console_cmdline
 {
 	char	name[16];			/* Name of the driver	    */
 	int	index;				/* Minor dev. to use	    */
+	bool	user_specified;			/* Specified by command line vs. platform */
 	char	*options;			/* Options for the driver   */
 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE
 	char	*brl_options;			/* Options for braille driver */
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index b2b0f526f249..660f9a6bf73a 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -6,9 +6,11 @@
 
 #ifdef CONFIG_PRINTK
 
-#define PRINTK_SAFE_CONTEXT_MASK	 0x3fffffff
-#define PRINTK_NMI_DIRECT_CONTEXT_MASK	 0x40000000
-#define PRINTK_NMI_CONTEXT_MASK		 0x80000000
+#define PRINTK_SAFE_CONTEXT_MASK	0x007ffffff
+#define PRINTK_NMI_DIRECT_CONTEXT_MASK	0x008000000
+#define PRINTK_NMI_CONTEXT_MASK		0xff0000000
+
+#define PRINTK_NMI_CONTEXT_OFFSET	0x010000000
 
 extern raw_spinlock_t logbuf_lock;
 
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 471f649b5868..3132d6f860a8 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -280,6 +280,7 @@ static struct console *exclusive_console;
 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
 
 static int preferred_console = -1;
+static bool has_preferred_console;
 int console_set_on_cmdline;
 EXPORT_SYMBOL(console_set_on_cmdline);
 
@@ -974,6 +975,16 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
 		user->idx = log_next_idx;
 		user->seq = log_next_seq;
 		break;
+	case SEEK_CUR:
+		/*
+		 * It isn't supported due to the record nature of this
+		 * interface: _SET _DATA and _END point to very specific
+		 * record positions, while _CUR would be more useful in case
+		 * of a byte-based log. Because of that, return the default
+		 * errno value for invalid seek operation.
+		 */
+		ret = -ESPIPE;
+		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -2140,7 +2151,7 @@ asmlinkage __visible void early_printk(const char *fmt, ...)
 #endif
 
 static int __add_preferred_console(char *name, int idx, char *options,
-				   char *brl_options)
+				   char *brl_options, bool user_specified)
 {
 	struct console_cmdline *c;
 	int i;
@@ -2155,6 +2166,8 @@ static int __add_preferred_console(char *name, int idx, char *options,
 		if (strcmp(c->name, name) == 0 && c->index == idx) {
 			if (!brl_options)
 				preferred_console = i;
+			if (user_specified)
+				c->user_specified = true;
 			return 0;
 		}
 	}
@@ -2164,6 +2177,7 @@ static int __add_preferred_console(char *name, int idx, char *options,
 		preferred_console = i;
 	strlcpy(c->name, name, sizeof(c->name));
 	c->options = options;
+	c->user_specified = user_specified;
 	braille_set_options(c, brl_options);
 
 	c->index = idx;
@@ -2190,6 +2204,9 @@ static int __init console_setup(char *str)
 	char *s, *options, *brl_options = NULL;
 	int idx;
 
+	if (str[0] == 0)
+		return 1;
+
 	if (_braille_console_setup(&str, &brl_options))
 		return 1;
 
@@ -2218,7 +2235,7 @@ static int __init console_setup(char *str)
 	idx = simple_strtoul(s, NULL, 10);
 	*s = 0;
 
-	__add_preferred_console(buf, idx, options, brl_options);
+	__add_preferred_console(buf, idx, options, brl_options, true);
 	console_set_on_cmdline = 1;
 	return 1;
 }
@@ -2239,7 +2256,7 @@ __setup("console=", console_setup);
  */
 int add_preferred_console(char *name, int idx, char *options)
 {
-	return __add_preferred_console(name, idx, options, NULL);
+	return __add_preferred_console(name, idx, options, NULL, false);
 }
 
 bool console_suspend_enabled = true;
@@ -2438,9 +2455,9 @@ again:
 		printk_safe_enter_irqsave(flags);
 		raw_spin_lock(&logbuf_lock);
 		if (console_seq < log_first_seq) {
-			len = sprintf(text,
-				      "** %llu printk messages dropped **\n",
-				      log_first_seq - console_seq);
+			len = snprintf(text, sizeof(text),
+				       "** %llu printk messages dropped **\n",
+				       log_first_seq - console_seq);
 
 			/* messages are gone, move to first one */
 			console_seq = log_first_seq;
@@ -2652,6 +2669,63 @@ static int __init keep_bootcon_setup(char *str)
 early_param("keep_bootcon", keep_bootcon_setup);
 
 /*
+ * This is called by register_console() to try to match
+ * the newly registered console with any of the ones selected
+ * by either the command line or add_preferred_console() and
+ * setup/enable it.
+ *
+ * Care need to be taken with consoles that are statically
+ * enabled such as netconsole
+ */
+static int try_enable_new_console(struct console *newcon, bool user_specified)
+{
+	struct console_cmdline *c;
+	int i;
+
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
+		if (c->user_specified != user_specified)
+			continue;
+		if (!newcon->match ||
+		    newcon->match(newcon, c->name, c->index, c->options) != 0) {
+			/* default matching */
+			BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
+			if (strcmp(c->name, newcon->name) != 0)
+				continue;
+			if (newcon->index >= 0 &&
+			    newcon->index != c->index)
+				continue;
+			if (newcon->index < 0)
+				newcon->index = c->index;
+
+			if (_braille_register_console(newcon, c))
+				return 0;
+
+			if (newcon->setup &&
+			    newcon->setup(newcon, c->options) != 0)
+				return -EIO;
+		}
+		newcon->flags |= CON_ENABLED;
+		if (i == preferred_console) {
+			newcon->flags |= CON_CONSDEV;
+			has_preferred_console = true;
+		}
+		return 0;
+	}
+
+	/*
+	 * Some consoles, such as pstore and netconsole, can be enabled even
+	 * without matching. Accept the pre-enabled consoles only when match()
+	 * and setup() had a change to be called.
+	 */
+	if (newcon->flags & CON_ENABLED && c->user_specified ==	user_specified)
+		return 0;
+
+	return -ENOENT;
+}
+
+/*
  * The console driver calls this routine during kernel initialization
  * to register the console printing procedure with printk() and to
  * print any messages that were printed by the kernel before the
@@ -2672,11 +2746,9 @@ early_param("keep_bootcon", keep_bootcon_setup);
  */
 void register_console(struct console *newcon)
 {
-	int i;
 	unsigned long flags;
 	struct console *bcon = NULL;
-	struct console_cmdline *c;
-	static bool has_preferred;
+	int err;
 
 	for_each_console(bcon) {
 		if (WARN(bcon == newcon, "console '%s%d' already registered\n",
@@ -2701,15 +2773,15 @@ void register_console(struct console *newcon)
 	if (console_drivers && console_drivers->flags & CON_BOOT)
 		bcon = console_drivers;
 
-	if (!has_preferred || bcon || !console_drivers)
-		has_preferred = preferred_console >= 0;
+	if (!has_preferred_console || bcon || !console_drivers)
+		has_preferred_console = preferred_console >= 0;
 
 	/*
 	 *	See if we want to use this console driver. If we
 	 *	didn't select a console we take the first one
 	 *	that registers here.
 	 */
-	if (!has_preferred) {
+	if (!has_preferred_console) {
 		if (newcon->index < 0)
 			newcon->index = 0;
 		if (newcon->setup == NULL ||
@@ -2717,47 +2789,20 @@ void register_console(struct console *newcon)
 			newcon->flags |= CON_ENABLED;
 			if (newcon->device) {
 				newcon->flags |= CON_CONSDEV;
-				has_preferred = true;
+				has_preferred_console = true;
 			}
 		}
 	}
 
-	/*
-	 *	See if this console matches one we selected on
-	 *	the command line.
-	 */
-	for (i = 0, c = console_cmdline;
-	     i < MAX_CMDLINECONSOLES && c->name[0];
-	     i++, c++) {
-		if (!newcon->match ||
-		    newcon->match(newcon, c->name, c->index, c->options) != 0) {
-			/* default matching */
-			BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
-			if (strcmp(c->name, newcon->name) != 0)
-				continue;
-			if (newcon->index >= 0 &&
-			    newcon->index != c->index)
-				continue;
-			if (newcon->index < 0)
-				newcon->index = c->index;
-
-			if (_braille_register_console(newcon, c))
-				return;
-
-			if (newcon->setup &&
-			    newcon->setup(newcon, c->options) != 0)
-				break;
-		}
+	/* See if this console matches one we selected on the command line */
+	err = try_enable_new_console(newcon, true);
 
-		newcon->flags |= CON_ENABLED;
-		if (i == preferred_console) {
-			newcon->flags |= CON_CONSDEV;
-			has_preferred = true;
-		}
-		break;
-	}
+	/* If not, try to match against the platform default(s) */
+	if (err == -ENOENT)
+		err = try_enable_new_console(newcon, false);
 
-	if (!(newcon->flags & CON_ENABLED))
+	/* printk() messages are not printed to the Braille console. */
+	if (err || newcon->flags & CON_BRL)
 		return;
 
 	/*
@@ -2779,6 +2824,8 @@ void register_console(struct console *newcon)
 		console_drivers = newcon;
 		if (newcon->next)
 			newcon->next->flags &= ~CON_CONSDEV;
+		/* Ensure this flag is always set for the head of the list */
+		newcon->flags |= CON_CONSDEV;
 	} else {
 		newcon->next = console_drivers->next;
 		console_drivers->next = newcon;
@@ -3144,6 +3191,23 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
 static bool always_kmsg_dump;
 module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
 
+const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
+{
+	switch (reason) {
+	case KMSG_DUMP_PANIC:
+		return "Panic";
+	case KMSG_DUMP_OOPS:
+		return "Oops";
+	case KMSG_DUMP_EMERG:
+		return "Emergency";
+	case KMSG_DUMP_SHUTDOWN:
+		return "Shutdown";
+	default:
+		return "Unknown";
+	}
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
+
 /**
  * kmsg_dump - dump kernel log to kernel message dumpers.
  * @reason: the reason (oops, panic etc) for dumping
@@ -3157,12 +3221,19 @@ void kmsg_dump(enum kmsg_dump_reason reason)
 	struct kmsg_dumper *dumper;
 	unsigned long flags;
 
-	if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
-		return;
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(dumper, &dump_list, list) {
-		if (dumper->max_reason && reason > dumper->max_reason)
+		enum kmsg_dump_reason max_reason = dumper->max_reason;
+
+		/*
+		 * If client has not provided a specific max_reason, default
+		 * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set.
+		 */
+		if (max_reason == KMSG_DUMP_UNDEF) {
+			max_reason = always_kmsg_dump ? KMSG_DUMP_MAX :
+							KMSG_DUMP_OOPS;
+		}
+		if (reason > max_reason)
 			continue;
 
 		/* initialize iterator with data about the stored records */
@@ -3360,7 +3431,7 @@ out:
 EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
 
 /**
- * kmsg_dump_rewind_nolock - reset the interator (unlocked version)
+ * kmsg_dump_rewind_nolock - reset the iterator (unlocked version)
  * @dumper: registered kmsg dumper
  *
  * Reset the dumper's iterator so that kmsg_dump_get_line() and
@@ -3378,7 +3449,7 @@ void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
 }
 
 /**
- * kmsg_dump_rewind - reset the interator
+ * kmsg_dump_rewind - reset the iterator
  * @dumper: registered kmsg dumper
  *
  * Reset the dumper's iterator so that kmsg_dump_get_line() and
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index d9a659a686f3..4242403316bb 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -10,6 +10,7 @@
 #include <linux/cpumask.h>
 #include <linux/irq_work.h>
 #include <linux/printk.h>
+#include <linux/kprobes.h>
 
 #include "internal.h"
 
@@ -293,14 +294,14 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
 	return printk_safe_log_store(s, fmt, args);
 }
 
-void notrace printk_nmi_enter(void)
+void noinstr printk_nmi_enter(void)
 {
-	this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
+	this_cpu_add(printk_context, PRINTK_NMI_CONTEXT_OFFSET);
 }
 
-void notrace printk_nmi_exit(void)
+void noinstr printk_nmi_exit(void)
 {
-	this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
+	this_cpu_sub(printk_context, PRINTK_NMI_CONTEXT_OFFSET);
 }
 
 /*
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 1cc940fef17c..0ebe15a84985 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -70,13 +70,37 @@ config TREE_SRCU
 	help
 	  This option selects the full-fledged version of SRCU.
 
+config TASKS_RCU_GENERIC
+	def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
+	select SRCU
+	help
+	  This option enables generic infrastructure code supporting
+	  task-based RCU implementations.  Not for manual selection.
+
 config TASKS_RCU
 	def_bool PREEMPTION
-	select SRCU
 	help
 	  This option enables a task-based RCU implementation that uses
 	  only voluntary context switch (not preemption!), idle, and
-	  user-mode execution as quiescent states.
+	  user-mode execution as quiescent states.  Not for manual selection.
+
+config TASKS_RUDE_RCU
+	def_bool 0
+	help
+	  This option enables a task-based RCU implementation that uses
+	  only context switch (including preemption) and user-mode
+	  execution as quiescent states.  It forces IPIs and context
+	  switches on all online CPUs, including idle ones, so use
+	  with caution.
+
+config TASKS_TRACE_RCU
+	def_bool 0
+	help
+	  This option enables a task-based RCU implementation that uses
+	  explicit rcu_read_lock_trace() read-side markers, and allows
+	  these readers to appear in the idle loop as well as on the CPU
+	  hotplug code paths.  It can force IPIs on online CPUs, including
+	  idle ones, so use with caution.
 
 config RCU_STALL_COMMON
 	def_bool TREE_RCU
@@ -210,4 +234,22 @@ config RCU_NOCB_CPU
 	  Say Y here if you want to help to debug reduced OS jitter.
 	  Say N here if you are unsure.
 
+config TASKS_TRACE_RCU_READ_MB
+	bool "Tasks Trace RCU readers use memory barriers in user and idle"
+	depends on RCU_EXPERT
+	default PREEMPT_RT || NR_CPUS < 8
+	help
+	  Use this option to further reduce the number of IPIs sent
+	  to CPUs executing in userspace or idle during tasks trace
+	  RCU grace periods.  Given that a reasonable setting of
+	  the rcupdate.rcu_task_ipi_delay kernel boot parameter
+	  eliminates such IPIs for many workloads, proper setting
+	  of this Kconfig option is important mostly for aggressive
+	  real-time installations and for battery-powered devices,
+	  hence the default chosen above.
+
+	  Say Y here if you hate IPIs.
+	  Say N here if you hate read-side memory barriers.
+	  Take the default if you are unsure.
+
 endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 4aa02eee8f6c..452feae8de20 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -29,6 +29,8 @@ config RCU_PERF_TEST
 	select TORTURE_TEST
 	select SRCU
 	select TASKS_RCU
+	select TASKS_RUDE_RCU
+	select TASKS_TRACE_RCU
 	default n
 	help
 	  This option provides a kernel module that runs performance
@@ -46,6 +48,8 @@ config RCU_TORTURE_TEST
 	select TORTURE_TEST
 	select SRCU
 	select TASKS_RCU
+	select TASKS_RUDE_RCU
+	select TASKS_TRACE_RCU
 	default n
 	help
 	  This option provides a kernel module that runs torture tests
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 00ddc92c5774..cf66a3ccd757 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -431,6 +431,7 @@ bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
 void rcu_expedite_gp(void);
 void rcu_unexpedite_gp(void);
 void rcupdate_announce_bootup_oddness(void);
+void show_rcu_tasks_gp_kthreads(void);
 void rcu_request_urgent_qs_task(struct task_struct *t);
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
@@ -441,6 +442,8 @@ void rcu_request_urgent_qs_task(struct task_struct *t);
 enum rcutorture_type {
 	RCU_FLAVOR,
 	RCU_TASKS_FLAVOR,
+	RCU_TASKS_RUDE_FLAVOR,
+	RCU_TASKS_TRACING_FLAVOR,
 	RCU_TRIVIAL_FLAVOR,
 	SRCU_FLAVOR,
 	INVALID_RCU_FLAVOR
@@ -454,6 +457,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
 			       unsigned long secs,
 			       unsigned long c_old,
 			       unsigned long c);
+void rcu_gp_set_torture_wait(int duration);
 #else
 static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
 					  int *flags, unsigned long *gp_seq)
@@ -471,6 +475,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
 #define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
 	do { } while (0)
 #endif
+static inline void rcu_gp_set_torture_wait(int duration) { }
 #endif
 
 #if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
@@ -498,6 +503,7 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
 #endif
 
 #ifdef CONFIG_TINY_RCU
+static inline bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) { return false; }
 static inline unsigned long rcu_get_gp_seq(void) { return 0; }
 static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
 static inline unsigned long
@@ -507,6 +513,7 @@ static inline void show_rcu_gp_kthreads(void) { }
 static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
 static inline void rcu_fwd_progress_check(unsigned long j) { }
 #else /* #ifdef CONFIG_TINY_RCU */
+bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
 unsigned long rcu_get_gp_seq(void);
 unsigned long rcu_exp_batches_completed(void);
 unsigned long srcu_batches_completed(struct srcu_struct *sp);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index a4a8d097d84d..16dd1e6b7c09 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -88,6 +88,7 @@ torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
 torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
+torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
 
 static char *perf_type = "rcu";
 module_param(perf_type, charp, 0444);
@@ -635,7 +636,7 @@ kfree_perf_thread(void *arg)
 		}
 
 		for (i = 0; i < kfree_alloc_num; i++) {
-			alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
+			alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
 			if (!alloc_ptr)
 				return -ENOMEM;
 
@@ -722,6 +723,8 @@ kfree_perf_init(void)
 		schedule_timeout_uninterruptible(1);
 	}
 
+	pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
+
 	kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
 			       GFP_KERNEL);
 	if (kfree_reader_tasks == NULL) {
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 5453bd557f43..efb792e13fca 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -20,7 +20,7 @@
 #include <linux/err.h>
 #include <linux/spinlock.h>
 #include <linux/smp.h>
-#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
 #include <linux/interrupt.h>
 #include <linux/sched/signal.h>
 #include <uapi/linux/sched/types.h>
@@ -45,12 +45,25 @@
 #include <linux/sched/sysctl.h>
 #include <linux/oom.h>
 #include <linux/tick.h>
+#include <linux/rcupdate_trace.h>
 
 #include "rcu.h"
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
 
+#ifndef data_race
+#define data_race(expr)							\
+	({								\
+		expr;							\
+	})
+#endif
+#ifndef ASSERT_EXCLUSIVE_WRITER
+#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
+#endif
+#ifndef ASSERT_EXCLUSIVE_ACCESS
+#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
+#endif
 
 /* Bits for ->extendables field, extendables param, and related definitions. */
 #define RCUTORTURE_RDR_SHIFT	 8	/* Put SRCU index in upper bits. */
@@ -102,6 +115,9 @@ torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
 torture_param(int, stall_cpu_holdoff, 10,
 	     "Time to wait before starting stall (s).");
 torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
+torture_param(int, stall_cpu_block, 0, "Sleep while stalling.");
+torture_param(int, stall_gp_kthread, 0,
+	      "Grace-period kthread stall duration (s).");
 torture_param(int, stat_interval, 60,
 	     "Number of seconds between stats printk()s");
 torture_param(int, stutter, 5, "Number of seconds to run/halt test");
@@ -665,6 +681,11 @@ static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
 	call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
 }
 
+static void synchronize_rcu_mult_test(void)
+{
+	synchronize_rcu_mult(call_rcu_tasks, call_rcu);
+}
+
 static struct rcu_torture_ops tasks_ops = {
 	.ttype		= RCU_TASKS_FLAVOR,
 	.init		= rcu_sync_torture_init,
@@ -674,7 +695,7 @@ static struct rcu_torture_ops tasks_ops = {
 	.get_gp_seq	= rcu_no_completed,
 	.deferred_free	= rcu_tasks_torture_deferred_free,
 	.sync		= synchronize_rcu_tasks,
-	.exp_sync	= synchronize_rcu_tasks,
+	.exp_sync	= synchronize_rcu_mult_test,
 	.call		= call_rcu_tasks,
 	.cb_barrier	= rcu_barrier_tasks,
 	.fqs		= NULL,
@@ -725,6 +746,72 @@ static struct rcu_torture_ops trivial_ops = {
 	.name		= "trivial"
 };
 
+/*
+ * Definitions for rude RCU-tasks torture testing.
+ */
+
+static void rcu_tasks_rude_torture_deferred_free(struct rcu_torture *p)
+{
+	call_rcu_tasks_rude(&p->rtort_rcu, rcu_torture_cb);
+}
+
+static struct rcu_torture_ops tasks_rude_ops = {
+	.ttype		= RCU_TASKS_RUDE_FLAVOR,
+	.init		= rcu_sync_torture_init,
+	.readlock	= rcu_torture_read_lock_trivial,
+	.read_delay	= rcu_read_delay,  /* just reuse rcu's version. */
+	.readunlock	= rcu_torture_read_unlock_trivial,
+	.get_gp_seq	= rcu_no_completed,
+	.deferred_free	= rcu_tasks_rude_torture_deferred_free,
+	.sync		= synchronize_rcu_tasks_rude,
+	.exp_sync	= synchronize_rcu_tasks_rude,
+	.call		= call_rcu_tasks_rude,
+	.cb_barrier	= rcu_barrier_tasks_rude,
+	.fqs		= NULL,
+	.stats		= NULL,
+	.irq_capable	= 1,
+	.name		= "tasks-rude"
+};
+
+/*
+ * Definitions for tracing RCU-tasks torture testing.
+ */
+
+static int tasks_tracing_torture_read_lock(void)
+{
+	rcu_read_lock_trace();
+	return 0;
+}
+
+static void tasks_tracing_torture_read_unlock(int idx)
+{
+	rcu_read_unlock_trace();
+}
+
+static void rcu_tasks_tracing_torture_deferred_free(struct rcu_torture *p)
+{
+	call_rcu_tasks_trace(&p->rtort_rcu, rcu_torture_cb);
+}
+
+static struct rcu_torture_ops tasks_tracing_ops = {
+	.ttype		= RCU_TASKS_TRACING_FLAVOR,
+	.init		= rcu_sync_torture_init,
+	.readlock	= tasks_tracing_torture_read_lock,
+	.read_delay	= srcu_read_delay,  /* just reuse srcu's version. */
+	.readunlock	= tasks_tracing_torture_read_unlock,
+	.get_gp_seq	= rcu_no_completed,
+	.deferred_free	= rcu_tasks_tracing_torture_deferred_free,
+	.sync		= synchronize_rcu_tasks_trace,
+	.exp_sync	= synchronize_rcu_tasks_trace,
+	.call		= call_rcu_tasks_trace,
+	.cb_barrier	= rcu_barrier_tasks_trace,
+	.fqs		= NULL,
+	.stats		= NULL,
+	.irq_capable	= 1,
+	.slow_gps	= 1,
+	.name		= "tasks-tracing"
+};
+
 static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
 {
 	if (!cur_ops->gp_diff)
@@ -734,7 +821,7 @@ static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
 
 static bool __maybe_unused torturing_tasks(void)
 {
-	return cur_ops == &tasks_ops;
+	return cur_ops == &tasks_ops || cur_ops == &tasks_rude_ops;
 }
 
 /*
@@ -833,7 +920,7 @@ static int rcu_torture_boost(void *arg)
 
 		/* Wait for the next test interval. */
 		oldstarttime = boost_starttime;
-		while (ULONG_CMP_LT(jiffies, oldstarttime)) {
+		while (time_before(jiffies, oldstarttime)) {
 			schedule_timeout_interruptible(oldstarttime - jiffies);
 			stutter_wait("rcu_torture_boost");
 			if (torture_must_stop())
@@ -843,7 +930,7 @@ static int rcu_torture_boost(void *arg)
 		/* Do one boost-test interval. */
 		endtime = oldstarttime + test_boost_duration * HZ;
 		call_rcu_time = jiffies;
-		while (ULONG_CMP_LT(jiffies, endtime)) {
+		while (time_before(jiffies, endtime)) {
 			/* If we don't have a callback in flight, post one. */
 			if (!smp_load_acquire(&rbi.inflight)) {
 				/* RCU core before ->inflight = 1. */
@@ -914,7 +1001,7 @@ rcu_torture_fqs(void *arg)
 	VERBOSE_TOROUT_STRING("rcu_torture_fqs task started");
 	do {
 		fqs_resume_time = jiffies + fqs_stutter * HZ;
-		while (ULONG_CMP_LT(jiffies, fqs_resume_time) &&
+		while (time_before(jiffies, fqs_resume_time) &&
 		       !kthread_should_stop()) {
 			schedule_timeout_interruptible(1);
 		}
@@ -1147,6 +1234,7 @@ static void rcutorture_one_extend(int *readstate, int newstate,
 				  struct torture_random_state *trsp,
 				  struct rt_read_seg *rtrsp)
 {
+	unsigned long flags;
 	int idxnew = -1;
 	int idxold = *readstate;
 	int statesnew = ~*readstate & newstate;
@@ -1181,8 +1269,15 @@ static void rcutorture_one_extend(int *readstate, int newstate,
 		rcu_read_unlock_bh();
 	if (statesold & RCUTORTURE_RDR_SCHED)
 		rcu_read_unlock_sched();
-	if (statesold & RCUTORTURE_RDR_RCU)
+	if (statesold & RCUTORTURE_RDR_RCU) {
+		bool lockit = !statesnew && !(torture_random(trsp) & 0xffff);
+
+		if (lockit)
+			raw_spin_lock_irqsave(&current->pi_lock, flags);
 		cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT);
+		if (lockit)
+			raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+	}
 
 	/* Delay if neither beginning nor end and there was a change. */
 	if ((statesnew || statesold) && *readstate && newstate)
@@ -1283,6 +1378,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
 				  rcu_read_lock_bh_held() ||
 				  rcu_read_lock_sched_held() ||
 				  srcu_read_lock_held(srcu_ctlp) ||
+				  rcu_read_lock_trace_held() ||
 				  torturing_tasks());
 	if (p == NULL) {
 		/* Wait for rcu_torture_writer to get underway */
@@ -1444,9 +1540,9 @@ rcu_torture_stats_print(void)
 		atomic_long_read(&n_rcu_torture_timers));
 	torture_onoff_stats();
 	pr_cont("barrier: %ld/%ld:%ld\n",
-		n_barrier_successes,
-		n_barrier_attempts,
-		n_rcu_torture_barrier_error);
+		data_race(n_barrier_successes),
+		data_race(n_barrier_attempts),
+		data_race(n_rcu_torture_barrier_error));
 
 	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
 	if (atomic_read(&n_rcu_torture_mberror) ||
@@ -1536,6 +1632,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
 		 "test_boost=%d/%d test_boost_interval=%d "
 		 "test_boost_duration=%d shutdown_secs=%d "
 		 "stall_cpu=%d stall_cpu_holdoff=%d stall_cpu_irqsoff=%d "
+		 "stall_cpu_block=%d "
 		 "n_barrier_cbs=%d "
 		 "onoff_interval=%d onoff_holdoff=%d\n",
 		 torture_type, tag, nrealreaders, nfakewriters,
@@ -1544,6 +1641,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
 		 test_boost, cur_ops->can_boost,
 		 test_boost_interval, test_boost_duration, shutdown_secs,
 		 stall_cpu, stall_cpu_holdoff, stall_cpu_irqsoff,
+		 stall_cpu_block,
 		 n_barrier_cbs,
 		 onoff_interval, onoff_holdoff);
 }
@@ -1599,6 +1697,7 @@ static int rcutorture_booster_init(unsigned int cpu)
  */
 static int rcu_torture_stall(void *args)
 {
+	int idx;
 	unsigned long stop_at;
 
 	VERBOSE_TOROUT_STRING("rcu_torture_stall task started");
@@ -1607,26 +1706,37 @@ static int rcu_torture_stall(void *args)
 		schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
 		VERBOSE_TOROUT_STRING("rcu_torture_stall end holdoff");
 	}
-	if (!kthread_should_stop()) {
+	if (!kthread_should_stop() && stall_gp_kthread > 0) {
+		VERBOSE_TOROUT_STRING("rcu_torture_stall begin GP stall");
+		rcu_gp_set_torture_wait(stall_gp_kthread * HZ);
+		for (idx = 0; idx < stall_gp_kthread + 2; idx++) {
+			if (kthread_should_stop())
+				break;
+			schedule_timeout_uninterruptible(HZ);
+		}
+	}
+	if (!kthread_should_stop() && stall_cpu > 0) {
+		VERBOSE_TOROUT_STRING("rcu_torture_stall begin CPU stall");
 		stop_at = ktime_get_seconds() + stall_cpu;
 		/* RCU CPU stall is expected behavior in following code. */
-		rcu_read_lock();
+		idx = cur_ops->readlock();
 		if (stall_cpu_irqsoff)
 			local_irq_disable();
-		else
+		else if (!stall_cpu_block)
 			preempt_disable();
 		pr_alert("rcu_torture_stall start on CPU %d.\n",
-			 smp_processor_id());
+			 raw_smp_processor_id());
 		while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(),
 				    stop_at))
-			continue;  /* Induce RCU CPU stall warning. */
+			if (stall_cpu_block)
+				schedule_timeout_uninterruptible(HZ);
 		if (stall_cpu_irqsoff)
 			local_irq_enable();
-		else
+		else if (!stall_cpu_block)
 			preempt_enable();
-		rcu_read_unlock();
-		pr_alert("rcu_torture_stall end.\n");
+		cur_ops->readunlock(idx);
 	}
+	pr_alert("rcu_torture_stall end.\n");
 	torture_shutdown_absorb("rcu_torture_stall");
 	while (!kthread_should_stop())
 		schedule_timeout_interruptible(10 * HZ);
@@ -1636,7 +1746,7 @@ static int rcu_torture_stall(void *args)
 /* Spawn CPU-stall kthread, if stall_cpu specified. */
 static int __init rcu_torture_stall_init(void)
 {
-	if (stall_cpu <= 0)
+	if (stall_cpu <= 0 && stall_gp_kthread <= 0)
 		return 0;
 	return torture_create_kthread(rcu_torture_stall, NULL, stall_task);
 }
@@ -1692,8 +1802,8 @@ struct rcu_fwd {
 	unsigned long rcu_launder_gp_seq_start;
 };
 
-struct rcu_fwd *rcu_fwds;
-bool rcu_fwd_emergency_stop;
+static struct rcu_fwd *rcu_fwds;
+static bool rcu_fwd_emergency_stop;
 
 static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
 {
@@ -2400,7 +2510,8 @@ rcu_torture_init(void)
 	int firsterr = 0;
 	static struct rcu_torture_ops *torture_ops[] = {
 		&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
-		&busted_srcud_ops, &tasks_ops, &trivial_ops,
+		&busted_srcud_ops, &tasks_ops, &tasks_rude_ops,
+		&tasks_tracing_ops, &trivial_ops,
 	};
 
 	if (!torture_init_begin(torture_type, verbose))
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 0c71505f0e19..6d3ef700fb0e 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -29,6 +29,19 @@
 #include "rcu.h"
 #include "rcu_segcblist.h"
 
+#ifndef data_race
+#define data_race(expr)							\
+	({								\
+		expr;							\
+	})
+#endif
+#ifndef ASSERT_EXCLUSIVE_WRITER
+#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
+#endif
+#ifndef ASSERT_EXCLUSIVE_ACCESS
+#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
+#endif
+
 /* Holdoff in nanoseconds for auto-expediting. */
 #define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
 static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
@@ -1268,8 +1281,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 		struct srcu_data *sdp;
 
 		sdp = per_cpu_ptr(ssp->sda, cpu);
-		u0 = sdp->srcu_unlock_count[!idx];
-		u1 = sdp->srcu_unlock_count[idx];
+		u0 = data_race(sdp->srcu_unlock_count[!idx]);
+		u1 = data_race(sdp->srcu_unlock_count[idx]);
 
 		/*
 		 * Make sure that a lock is always counted if the corresponding
@@ -1277,8 +1290,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 		 */
 		smp_rmb();
 
-		l0 = sdp->srcu_lock_count[!idx];
-		l1 = sdp->srcu_lock_count[idx];
+		l0 = data_race(sdp->srcu_lock_count[!idx]);
+		l1 = data_race(sdp->srcu_lock_count[idx]);
 
 		c0 = l0 - u0;
 		c1 = l1 - u1;
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
new file mode 100644
index 000000000000..ce23f6cc5043
--- /dev/null
+++ b/kernel/rcu/tasks.h
@@ -0,0 +1,1193 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Task-based RCU implementations.
+ *
+ * Copyright (C) 2020 Paul E. McKenney
+ */
+
+#ifdef CONFIG_TASKS_RCU_GENERIC
+
+////////////////////////////////////////////////////////////////////////
+//
+// Generic data structures.
+
+struct rcu_tasks;
+typedef void (*rcu_tasks_gp_func_t)(struct rcu_tasks *rtp);
+typedef void (*pregp_func_t)(void);
+typedef void (*pertask_func_t)(struct task_struct *t, struct list_head *hop);
+typedef void (*postscan_func_t)(struct list_head *hop);
+typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp);
+typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
+
+/**
+ * Definition for a Tasks-RCU-like mechanism.
+ * @cbs_head: Head of callback list.
+ * @cbs_tail: Tail pointer for callback list.
+ * @cbs_wq: Wait queue allowning new callback to get kthread's attention.
+ * @cbs_lock: Lock protecting callback list.
+ * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
+ * @gp_func: This flavor's grace-period-wait function.
+ * @gp_state: Grace period's most recent state transition (debugging).
+ * @gp_jiffies: Time of last @gp_state transition.
+ * @gp_start: Most recent grace-period start in jiffies.
+ * @n_gps: Number of grace periods completed since boot.
+ * @n_ipis: Number of IPIs sent to encourage grace periods to end.
+ * @n_ipis_fails: Number of IPI-send failures.
+ * @pregp_func: This flavor's pre-grace-period function (optional).
+ * @pertask_func: This flavor's per-task scan function (optional).
+ * @postscan_func: This flavor's post-task scan function (optional).
+ * @holdout_func: This flavor's holdout-list scan function (optional).
+ * @postgp_func: This flavor's post-grace-period function (optional).
+ * @call_func: This flavor's call_rcu()-equivalent function.
+ * @name: This flavor's textual name.
+ * @kname: This flavor's kthread name.
+ */
+struct rcu_tasks {
+	struct rcu_head *cbs_head;
+	struct rcu_head **cbs_tail;
+	struct wait_queue_head cbs_wq;
+	raw_spinlock_t cbs_lock;
+	int gp_state;
+	unsigned long gp_jiffies;
+	unsigned long gp_start;
+	unsigned long n_gps;
+	unsigned long n_ipis;
+	unsigned long n_ipis_fails;
+	struct task_struct *kthread_ptr;
+	rcu_tasks_gp_func_t gp_func;
+	pregp_func_t pregp_func;
+	pertask_func_t pertask_func;
+	postscan_func_t postscan_func;
+	holdouts_func_t holdouts_func;
+	postgp_func_t postgp_func;
+	call_rcu_func_t call_func;
+	char *name;
+	char *kname;
+};
+
+#define DEFINE_RCU_TASKS(rt_name, gp, call, n)				\
+static struct rcu_tasks rt_name =					\
+{									\
+	.cbs_tail = &rt_name.cbs_head,					\
+	.cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq),	\
+	.cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_lock),		\
+	.gp_func = gp,							\
+	.call_func = call,						\
+	.name = n,							\
+	.kname = #rt_name,						\
+}
+
+/* Track exiting tasks in order to allow them to be waited for. */
+DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
+
+/* Avoid IPIing CPUs early in the grace period. */
+#define RCU_TASK_IPI_DELAY (HZ / 2)
+static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY;
+module_param(rcu_task_ipi_delay, int, 0644);
+
+/* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
+#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
+static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
+module_param(rcu_task_stall_timeout, int, 0644);
+
+/* RCU tasks grace-period state for debugging. */
+#define RTGS_INIT		 0
+#define RTGS_WAIT_WAIT_CBS	 1
+#define RTGS_WAIT_GP		 2
+#define RTGS_PRE_WAIT_GP	 3
+#define RTGS_SCAN_TASKLIST	 4
+#define RTGS_POST_SCAN_TASKLIST	 5
+#define RTGS_WAIT_SCAN_HOLDOUTS	 6
+#define RTGS_SCAN_HOLDOUTS	 7
+#define RTGS_POST_GP		 8
+#define RTGS_WAIT_READERS	 9
+#define RTGS_INVOKE_CBS		10
+#define RTGS_WAIT_CBS		11
+static const char * const rcu_tasks_gp_state_names[] = {
+	"RTGS_INIT",
+	"RTGS_WAIT_WAIT_CBS",
+	"RTGS_WAIT_GP",
+	"RTGS_PRE_WAIT_GP",
+	"RTGS_SCAN_TASKLIST",
+	"RTGS_POST_SCAN_TASKLIST",
+	"RTGS_WAIT_SCAN_HOLDOUTS",
+	"RTGS_SCAN_HOLDOUTS",
+	"RTGS_POST_GP",
+	"RTGS_WAIT_READERS",
+	"RTGS_INVOKE_CBS",
+	"RTGS_WAIT_CBS",
+};
+
+////////////////////////////////////////////////////////////////////////
+//
+// Generic code.
+
+/* Record grace-period phase and time. */
+static void set_tasks_gp_state(struct rcu_tasks *rtp, int newstate)
+{
+	rtp->gp_state = newstate;
+	rtp->gp_jiffies = jiffies;
+}
+
+/* Return state name. */
+static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
+{
+	int i = data_race(rtp->gp_state); // Let KCSAN detect update races
+	int j = READ_ONCE(i); // Prevent the compiler from reading twice
+
+	if (j >= ARRAY_SIZE(rcu_tasks_gp_state_names))
+		return "???";
+	return rcu_tasks_gp_state_names[j];
+}
+
+// Enqueue a callback for the specified flavor of Tasks RCU.
+static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
+				   struct rcu_tasks *rtp)
+{
+	unsigned long flags;
+	bool needwake;
+
+	rhp->next = NULL;
+	rhp->func = func;
+	raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
+	needwake = !rtp->cbs_head;
+	WRITE_ONCE(*rtp->cbs_tail, rhp);
+	rtp->cbs_tail = &rhp->next;
+	raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
+	/* We can't create the thread unless interrupts are enabled. */
+	if (needwake && READ_ONCE(rtp->kthread_ptr))
+		wake_up(&rtp->cbs_wq);
+}
+
+// Wait for a grace period for the specified flavor of Tasks RCU.
+static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
+{
+	/* Complain if the scheduler has not started.  */
+	RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
+			 "synchronize_rcu_tasks called too soon");
+
+	/* Wait for the grace period. */
+	wait_rcu_gp(rtp->call_func);
+}
+
+/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
+static int __noreturn rcu_tasks_kthread(void *arg)
+{
+	unsigned long flags;
+	struct rcu_head *list;
+	struct rcu_head *next;
+	struct rcu_tasks *rtp = arg;
+
+	/* Run on housekeeping CPUs by default.  Sysadm can move if desired. */
+	housekeeping_affine(current, HK_FLAG_RCU);
+	WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
+
+	/*
+	 * Each pass through the following loop makes one check for
+	 * newly arrived callbacks, and, if there are some, waits for
+	 * one RCU-tasks grace period and then invokes the callbacks.
+	 * This loop is terminated by the system going down.  ;-)
+	 */
+	for (;;) {
+
+		/* Pick up any new callbacks. */
+		raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
+		smp_mb__after_spinlock(); // Order updates vs. GP.
+		list = rtp->cbs_head;
+		rtp->cbs_head = NULL;
+		rtp->cbs_tail = &rtp->cbs_head;
+		raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
+
+		/* If there were none, wait a bit and start over. */
+		if (!list) {
+			wait_event_interruptible(rtp->cbs_wq,
+						 READ_ONCE(rtp->cbs_head));
+			if (!rtp->cbs_head) {
+				WARN_ON(signal_pending(current));
+				set_tasks_gp_state(rtp, RTGS_WAIT_WAIT_CBS);
+				schedule_timeout_interruptible(HZ/10);
+			}
+			continue;
+		}
+
+		// Wait for one grace period.
+		set_tasks_gp_state(rtp, RTGS_WAIT_GP);
+		rtp->gp_start = jiffies;
+		rtp->gp_func(rtp);
+		rtp->n_gps++;
+
+		/* Invoke the callbacks. */
+		set_tasks_gp_state(rtp, RTGS_INVOKE_CBS);
+		while (list) {
+			next = list->next;
+			local_bh_disable();
+			list->func(list);
+			local_bh_enable();
+			list = next;
+			cond_resched();
+		}
+		/* Paranoid sleep to keep this from entering a tight loop */
+		schedule_timeout_uninterruptible(HZ/10);
+
+		set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
+	}
+}
+
+/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */
+static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
+{
+	struct task_struct *t;
+
+	t = kthread_run(rcu_tasks_kthread, rtp, "%s_kthread", rtp->kname);
+	if (WARN_ONCE(IS_ERR(t), "%s: Could not start %s grace-period kthread, OOM is now expected behavior\n", __func__, rtp->name))
+		return;
+	smp_mb(); /* Ensure others see full kthread. */
+}
+
+#ifndef CONFIG_TINY_RCU
+
+/*
+ * Print any non-default Tasks RCU settings.
+ */
+static void __init rcu_tasks_bootup_oddness(void)
+{
+#if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
+	if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
+		pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
+#endif /* #ifdef CONFIG_TASKS_RCU */
+#ifdef CONFIG_TASKS_RCU
+	pr_info("\tTrampoline variant of Tasks RCU enabled.\n");
+#endif /* #ifdef CONFIG_TASKS_RCU */
+#ifdef CONFIG_TASKS_RUDE_RCU
+	pr_info("\tRude variant of Tasks RCU enabled.\n");
+#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
+#ifdef CONFIG_TASKS_TRACE_RCU
+	pr_info("\tTracing variant of Tasks RCU enabled.\n");
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
+}
+
+#endif /* #ifndef CONFIG_TINY_RCU */
+
+/* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
+static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
+{
+	pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
+		rtp->kname,
+		tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
+		jiffies - data_race(rtp->gp_jiffies),
+		data_race(rtp->n_gps),
+		data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
+		".k"[!!data_race(rtp->kthread_ptr)],
+		".C"[!!data_race(rtp->cbs_head)],
+		s);
+}
+
+static void exit_tasks_rcu_finish_trace(struct task_struct *t);
+
+#if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
+
+////////////////////////////////////////////////////////////////////////
+//
+// Shared code between task-list-scanning variants of Tasks RCU.
+
+/* Wait for one RCU-tasks grace period. */
+static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
+{
+	struct task_struct *g, *t;
+	unsigned long lastreport;
+	LIST_HEAD(holdouts);
+	int fract;
+
+	set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP);
+	rtp->pregp_func();
+
+	/*
+	 * There were callbacks, so we need to wait for an RCU-tasks
+	 * grace period.  Start off by scanning the task list for tasks
+	 * that are not already voluntarily blocked.  Mark these tasks
+	 * and make a list of them in holdouts.
+	 */
+	set_tasks_gp_state(rtp, RTGS_SCAN_TASKLIST);
+	rcu_read_lock();
+	for_each_process_thread(g, t)
+		rtp->pertask_func(t, &holdouts);
+	rcu_read_unlock();
+
+	set_tasks_gp_state(rtp, RTGS_POST_SCAN_TASKLIST);
+	rtp->postscan_func(&holdouts);
+
+	/*
+	 * Each pass through the following loop scans the list of holdout
+	 * tasks, removing any that are no longer holdouts.  When the list
+	 * is empty, we are done.
+	 */
+	lastreport = jiffies;
+
+	/* Start off with HZ/10 wait and slowly back off to 1 HZ wait. */
+	fract = 10;
+
+	for (;;) {
+		bool firstreport;
+		bool needreport;
+		int rtst;
+
+		if (list_empty(&holdouts))
+			break;
+
+		/* Slowly back off waiting for holdouts */
+		set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS);
+		schedule_timeout_interruptible(HZ/fract);
+
+		if (fract > 1)
+			fract--;
+
+		rtst = READ_ONCE(rcu_task_stall_timeout);
+		needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
+		if (needreport)
+			lastreport = jiffies;
+		firstreport = true;
+		WARN_ON(signal_pending(current));
+		set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS);
+		rtp->holdouts_func(&holdouts, needreport, &firstreport);
+	}
+
+	set_tasks_gp_state(rtp, RTGS_POST_GP);
+	rtp->postgp_func(rtp);
+}
+
+#endif /* #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) */
+
+#ifdef CONFIG_TASKS_RCU
+
+////////////////////////////////////////////////////////////////////////
+//
+// Simple variant of RCU whose quiescent states are voluntary context
+// switch, cond_resched_rcu_qs(), user-space execution, and idle.
+// As such, grace periods can take one good long time.  There are no
+// read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
+// because this implementation is intended to get the system into a safe
+// state for some of the manipulations involved in tracing and the like.
+// Finally, this implementation does not support high call_rcu_tasks()
+// rates from multiple CPUs.  If this is required, per-CPU callback lists
+// will be needed.
+
+/* Pre-grace-period preparation. */
+static void rcu_tasks_pregp_step(void)
+{
+	/*
+	 * Wait for all pre-existing t->on_rq and t->nvcsw transitions
+	 * to complete.  Invoking synchronize_rcu() suffices because all
+	 * these transitions occur with interrupts disabled.  Without this
+	 * synchronize_rcu(), a read-side critical section that started
+	 * before the grace period might be incorrectly seen as having
+	 * started after the grace period.
+	 *
+	 * This synchronize_rcu() also dispenses with the need for a
+	 * memory barrier on the first store to t->rcu_tasks_holdout,
+	 * as it forces the store to happen after the beginning of the
+	 * grace period.
+	 */
+	synchronize_rcu();
+}
+
+/* Per-task initial processing. */
+static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
+{
+	if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) {
+		get_task_struct(t);
+		t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
+		WRITE_ONCE(t->rcu_tasks_holdout, true);
+		list_add(&t->rcu_tasks_holdout_list, hop);
+	}
+}
+
+/* Processing between scanning taskslist and draining the holdout list. */
+void rcu_tasks_postscan(struct list_head *hop)
+{
+	/*
+	 * Wait for tasks that are in the process of exiting.  This
+	 * does only part of the job, ensuring that all tasks that were
+	 * previously exiting reach the point where they have disabled
+	 * preemption, allowing the later synchronize_rcu() to finish
+	 * the job.
+	 */
+	synchronize_srcu(&tasks_rcu_exit_srcu);
+}
+
+/* See if tasks are still holding out, complain if so. */
+static void check_holdout_task(struct task_struct *t,
+			       bool needreport, bool *firstreport)
+{
+	int cpu;
+
+	if (!READ_ONCE(t->rcu_tasks_holdout) ||
+	    t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
+	    !READ_ONCE(t->on_rq) ||
+	    (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
+	     !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
+		WRITE_ONCE(t->rcu_tasks_holdout, false);
+		list_del_init(&t->rcu_tasks_holdout_list);
+		put_task_struct(t);
+		return;
+	}
+	rcu_request_urgent_qs_task(t);
+	if (!needreport)
+		return;
+	if (*firstreport) {
+		pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
+		*firstreport = false;
+	}
+	cpu = task_cpu(t);
+	pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
+		 t, ".I"[is_idle_task(t)],
+		 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
+		 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
+		 t->rcu_tasks_idle_cpu, cpu);
+	sched_show_task(t);
+}
+
+/* Scan the holdout lists for tasks no longer holding out. */
+static void check_all_holdout_tasks(struct list_head *hop,
+				    bool needreport, bool *firstreport)
+{
+	struct task_struct *t, *t1;
+
+	list_for_each_entry_safe(t, t1, hop, rcu_tasks_holdout_list) {
+		check_holdout_task(t, needreport, firstreport);
+		cond_resched();
+	}
+}
+
+/* Finish off the Tasks-RCU grace period. */
+static void rcu_tasks_postgp(struct rcu_tasks *rtp)
+{
+	/*
+	 * Because ->on_rq and ->nvcsw are not guaranteed to have a full
+	 * memory barriers prior to them in the schedule() path, memory
+	 * reordering on other CPUs could cause their RCU-tasks read-side
+	 * critical sections to extend past the end of the grace period.
+	 * However, because these ->nvcsw updates are carried out with
+	 * interrupts disabled, we can use synchronize_rcu() to force the
+	 * needed ordering on all such CPUs.
+	 *
+	 * This synchronize_rcu() also confines all ->rcu_tasks_holdout
+	 * accesses to be within the grace period, avoiding the need for
+	 * memory barriers for ->rcu_tasks_holdout accesses.
+	 *
+	 * In addition, this synchronize_rcu() waits for exiting tasks
+	 * to complete their final preempt_disable() region of execution,
+	 * cleaning up after the synchronize_srcu() above.
+	 */
+	synchronize_rcu();
+}
+
+void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
+DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
+
+/**
+ * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
+ * @rhp: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_tasks() assumes
+ * that the read-side critical sections end at a voluntary context
+ * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
+ * or transition to usermode execution.  As such, there are no read-side
+ * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
+ * this primitive is intended to determine that all tasks have passed
+ * through a safe state, not so much for data-strcuture synchronization.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
+ */
+void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
+{
+	call_rcu_tasks_generic(rhp, func, &rcu_tasks);
+}
+EXPORT_SYMBOL_GPL(call_rcu_tasks);
+
+/**
+ * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu-tasks
+ * grace period has elapsed, in other words after all currently
+ * executing rcu-tasks read-side critical sections have elapsed.  These
+ * read-side critical sections are delimited by calls to schedule(),
+ * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
+ * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
+ *
+ * This is a very specialized primitive, intended only for a few uses in
+ * tracing and other situations requiring manipulation of function
+ * preambles and profiling hooks.  The synchronize_rcu_tasks() function
+ * is not (yet) intended for heavy use from multiple CPUs.
+ *
+ * See the description of synchronize_rcu() for more detailed information
+ * on memory ordering guarantees.
+ */
+void synchronize_rcu_tasks(void)
+{
+	synchronize_rcu_tasks_generic(&rcu_tasks);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
+
+/**
+ * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
+ *
+ * Although the current implementation is guaranteed to wait, it is not
+ * obligated to, for example, if there are no pending callbacks.
+ */
+void rcu_barrier_tasks(void)
+{
+	/* There is only one callback queue, so this is easy.  ;-) */
+	synchronize_rcu_tasks();
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
+
+static int __init rcu_spawn_tasks_kthread(void)
+{
+	rcu_tasks.pregp_func = rcu_tasks_pregp_step;
+	rcu_tasks.pertask_func = rcu_tasks_pertask;
+	rcu_tasks.postscan_func = rcu_tasks_postscan;
+	rcu_tasks.holdouts_func = check_all_holdout_tasks;
+	rcu_tasks.postgp_func = rcu_tasks_postgp;
+	rcu_spawn_tasks_kthread_generic(&rcu_tasks);
+	return 0;
+}
+core_initcall(rcu_spawn_tasks_kthread);
+
+static void show_rcu_tasks_classic_gp_kthread(void)
+{
+	show_rcu_tasks_generic_gp_kthread(&rcu_tasks, "");
+}
+
+/* Do the srcu_read_lock() for the above synchronize_srcu().  */
+void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
+{
+	preempt_disable();
+	current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
+	preempt_enable();
+}
+
+/* Do the srcu_read_unlock() for the above synchronize_srcu().  */
+void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
+{
+	struct task_struct *t = current;
+
+	preempt_disable();
+	__srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx);
+	preempt_enable();
+	exit_tasks_rcu_finish_trace(t);
+}
+
+#else /* #ifdef CONFIG_TASKS_RCU */
+static void show_rcu_tasks_classic_gp_kthread(void) { }
+void exit_tasks_rcu_start(void) { }
+void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
+#endif /* #else #ifdef CONFIG_TASKS_RCU */
+
+#ifdef CONFIG_TASKS_RUDE_RCU
+
+////////////////////////////////////////////////////////////////////////
+//
+// "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of
+// passing an empty function to schedule_on_each_cpu().  This approach
+// provides an asynchronous call_rcu_tasks_rude() API and batching
+// of concurrent calls to the synchronous synchronize_rcu_rude() API.
+// This sends IPIs far and wide and induces otherwise unnecessary context
+// switches on all online CPUs, whether idle or not.
+
+// Empty function to allow workqueues to force a context switch.
+static void rcu_tasks_be_rude(struct work_struct *work)
+{
+}
+
+// Wait for one rude RCU-tasks grace period.
+static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
+{
+	rtp->n_ipis += cpumask_weight(cpu_online_mask);
+	schedule_on_each_cpu(rcu_tasks_be_rude);
+}
+
+void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func);
+DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude,
+		 "RCU Tasks Rude");
+
+/**
+ * call_rcu_tasks_rude() - Queue a callback rude task-based grace period
+ * @rhp: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_tasks_rude()
+ * assumes that the read-side critical sections end at context switch,
+ * cond_resched_rcu_qs(), or transition to usermode execution.  As such,
+ * there are no read-side primitives analogous to rcu_read_lock() and
+ * rcu_read_unlock() because this primitive is intended to determine
+ * that all tasks have passed through a safe state, not so much for
+ * data-strcuture synchronization.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
+ */
+void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func)
+{
+	call_rcu_tasks_generic(rhp, func, &rcu_tasks_rude);
+}
+EXPORT_SYMBOL_GPL(call_rcu_tasks_rude);
+
+/**
+ * synchronize_rcu_tasks_rude - wait for a rude rcu-tasks grace period
+ *
+ * Control will return to the caller some time after a rude rcu-tasks
+ * grace period has elapsed, in other words after all currently
+ * executing rcu-tasks read-side critical sections have elapsed.  These
+ * read-side critical sections are delimited by calls to schedule(),
+ * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory,
+ * anyway) cond_resched().
+ *
+ * This is a very specialized primitive, intended only for a few uses in
+ * tracing and other situations requiring manipulation of function preambles
+ * and profiling hooks.  The synchronize_rcu_tasks_rude() function is not
+ * (yet) intended for heavy use from multiple CPUs.
+ *
+ * See the description of synchronize_rcu() for more detailed information
+ * on memory ordering guarantees.
+ */
+void synchronize_rcu_tasks_rude(void)
+{
+	synchronize_rcu_tasks_generic(&rcu_tasks_rude);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);
+
+/**
+ * rcu_barrier_tasks_rude - Wait for in-flight call_rcu_tasks_rude() callbacks.
+ *
+ * Although the current implementation is guaranteed to wait, it is not
+ * obligated to, for example, if there are no pending callbacks.
+ */
+void rcu_barrier_tasks_rude(void)
+{
+	/* There is only one callback queue, so this is easy.  ;-) */
+	synchronize_rcu_tasks_rude();
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
+
+static int __init rcu_spawn_tasks_rude_kthread(void)
+{
+	rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
+	return 0;
+}
+core_initcall(rcu_spawn_tasks_rude_kthread);
+
+static void show_rcu_tasks_rude_gp_kthread(void)
+{
+	show_rcu_tasks_generic_gp_kthread(&rcu_tasks_rude, "");
+}
+
+#else /* #ifdef CONFIG_TASKS_RUDE_RCU */
+static void show_rcu_tasks_rude_gp_kthread(void) {}
+#endif /* #else #ifdef CONFIG_TASKS_RUDE_RCU */
+
+////////////////////////////////////////////////////////////////////////
+//
+// Tracing variant of Tasks RCU.  This variant is designed to be used
+// to protect tracing hooks, including those of BPF.  This variant
+// therefore:
+//
+// 1.	Has explicit read-side markers to allow finite grace periods
+//	in the face of in-kernel loops for PREEMPT=n builds.
+//
+// 2.	Protects code in the idle loop, exception entry/exit, and
+//	CPU-hotplug code paths, similar to the capabilities of SRCU.
+//
+// 3.	Avoids expensive read-side instruction, having overhead similar
+//	to that of Preemptible RCU.
+//
+// There are of course downsides.  The grace-period code can send IPIs to
+// CPUs, even when those CPUs are in the idle loop or in nohz_full userspace.
+// It is necessary to scan the full tasklist, much as for Tasks RCU.  There
+// is a single callback queue guarded by a single lock, again, much as for
+// Tasks RCU.  If needed, these downsides can be at least partially remedied.
+//
+// Perhaps most important, this variant of RCU does not affect the vanilla
+// flavors, rcu_preempt and rcu_sched.  The fact that RCU Tasks Trace
+// readers can operate from idle, offline, and exception entry/exit in no
+// way allows rcu_preempt and rcu_sched readers to also do so.
+
+// The lockdep state must be outside of #ifdef to be useful.
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key rcu_lock_trace_key;
+struct lockdep_map rcu_trace_lock_map =
+	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_trace", &rcu_lock_trace_key);
+EXPORT_SYMBOL_GPL(rcu_trace_lock_map);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
+atomic_t trc_n_readers_need_end;	// Number of waited-for readers.
+DECLARE_WAIT_QUEUE_HEAD(trc_wait);	// List of holdout tasks.
+
+// Record outstanding IPIs to each CPU.  No point in sending two...
+static DEFINE_PER_CPU(bool, trc_ipi_to_cpu);
+
+// The number of detections of task quiescent state relying on
+// heavyweight readers executing explicit memory barriers.
+unsigned long n_heavy_reader_attempts;
+unsigned long n_heavy_reader_updates;
+unsigned long n_heavy_reader_ofl_updates;
+
+void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
+DEFINE_RCU_TASKS(rcu_tasks_trace, rcu_tasks_wait_gp, call_rcu_tasks_trace,
+		 "RCU Tasks Trace");
+
+/*
+ * This irq_work handler allows rcu_read_unlock_trace() to be invoked
+ * while the scheduler locks are held.
+ */
+static void rcu_read_unlock_iw(struct irq_work *iwp)
+{
+	wake_up(&trc_wait);
+}
+static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw);
+
+/* If we are the last reader, wake up the grace-period kthread. */
+void rcu_read_unlock_trace_special(struct task_struct *t, int nesting)
+{
+	int nq = t->trc_reader_special.b.need_qs;
+
+	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
+	    t->trc_reader_special.b.need_mb)
+		smp_mb(); // Pairs with update-side barriers.
+	// Update .need_qs before ->trc_reader_nesting for irq/NMI handlers.
+	if (nq)
+		WRITE_ONCE(t->trc_reader_special.b.need_qs, false);
+	WRITE_ONCE(t->trc_reader_nesting, nesting);
+	if (nq && atomic_dec_and_test(&trc_n_readers_need_end))
+		irq_work_queue(&rcu_tasks_trace_iw);
+}
+EXPORT_SYMBOL_GPL(rcu_read_unlock_trace_special);
+
+/* Add a task to the holdout list, if it is not already on the list. */
+static void trc_add_holdout(struct task_struct *t, struct list_head *bhp)
+{
+	if (list_empty(&t->trc_holdout_list)) {
+		get_task_struct(t);
+		list_add(&t->trc_holdout_list, bhp);
+	}
+}
+
+/* Remove a task from the holdout list, if it is in fact present. */
+static void trc_del_holdout(struct task_struct *t)
+{
+	if (!list_empty(&t->trc_holdout_list)) {
+		list_del_init(&t->trc_holdout_list);
+		put_task_struct(t);
+	}
+}
+
+/* IPI handler to check task state. */
+static void trc_read_check_handler(void *t_in)
+{
+	struct task_struct *t = current;
+	struct task_struct *texp = t_in;
+
+	// If the task is no longer running on this CPU, leave.
+	if (unlikely(texp != t)) {
+		if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
+			wake_up(&trc_wait);
+		goto reset_ipi; // Already on holdout list, so will check later.
+	}
+
+	// If the task is not in a read-side critical section, and
+	// if this is the last reader, awaken the grace-period kthread.
+	if (likely(!t->trc_reader_nesting)) {
+		if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
+			wake_up(&trc_wait);
+		// Mark as checked after decrement to avoid false
+		// positives on the above WARN_ON_ONCE().
+		WRITE_ONCE(t->trc_reader_checked, true);
+		goto reset_ipi;
+	}
+	WRITE_ONCE(t->trc_reader_checked, true);
+
+	// Get here if the task is in a read-side critical section.  Set
+	// its state so that it will awaken the grace-period kthread upon
+	// exit from that critical section.
+	WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
+	WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
+
+reset_ipi:
+	// Allow future IPIs to be sent on CPU and for task.
+	// Also order this IPI handler against any later manipulations of
+	// the intended task.
+	smp_store_release(&per_cpu(trc_ipi_to_cpu, smp_processor_id()), false); // ^^^
+	smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^
+}
+
+/* Callback function for scheduler to check locked-down task.  */
+static bool trc_inspect_reader(struct task_struct *t, void *arg)
+{
+	int cpu = task_cpu(t);
+	bool in_qs = false;
+	bool ofl = cpu_is_offline(cpu);
+
+	if (task_curr(t)) {
+		WARN_ON_ONCE(ofl & !is_idle_task(t));
+
+		// If no chance of heavyweight readers, do it the hard way.
+		if (!ofl && !IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
+			return false;
+
+		// If heavyweight readers are enabled on the remote task,
+		// we can inspect its state despite its currently running.
+		// However, we cannot safely change its state.
+		n_heavy_reader_attempts++;
+		if (!ofl && // Check for "running" idle tasks on offline CPUs.
+		    !rcu_dynticks_zero_in_eqs(cpu, &t->trc_reader_nesting))
+			return false; // No quiescent state, do it the hard way.
+		n_heavy_reader_updates++;
+		if (ofl)
+			n_heavy_reader_ofl_updates++;
+		in_qs = true;
+	} else {
+		in_qs = likely(!t->trc_reader_nesting);
+	}
+
+	// Mark as checked.  Because this is called from the grace-period
+	// kthread, also remove the task from the holdout list.
+	t->trc_reader_checked = true;
+	trc_del_holdout(t);
+
+	if (in_qs)
+		return true;  // Already in quiescent state, done!!!
+
+	// The task is in a read-side critical section, so set up its
+	// state so that it will awaken the grace-period kthread upon exit
+	// from that critical section.
+	atomic_inc(&trc_n_readers_need_end); // One more to wait on.
+	WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
+	WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
+	return true;
+}
+
+/* Attempt to extract the state for the specified task. */
+static void trc_wait_for_one_reader(struct task_struct *t,
+				    struct list_head *bhp)
+{
+	int cpu;
+
+	// If a previous IPI is still in flight, let it complete.
+	if (smp_load_acquire(&t->trc_ipi_to_cpu) != -1) // Order IPI
+		return;
+
+	// The current task had better be in a quiescent state.
+	if (t == current) {
+		t->trc_reader_checked = true;
+		trc_del_holdout(t);
+		WARN_ON_ONCE(t->trc_reader_nesting);
+		return;
+	}
+
+	// Attempt to nail down the task for inspection.
+	get_task_struct(t);
+	if (try_invoke_on_locked_down_task(t, trc_inspect_reader, NULL)) {
+		put_task_struct(t);
+		return;
+	}
+	put_task_struct(t);
+
+	// If currently running, send an IPI, either way, add to list.
+	trc_add_holdout(t, bhp);
+	if (task_curr(t) && time_after(jiffies, rcu_tasks_trace.gp_start + rcu_task_ipi_delay)) {
+		// The task is currently running, so try IPIing it.
+		cpu = task_cpu(t);
+
+		// If there is already an IPI outstanding, let it happen.
+		if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0)
+			return;
+
+		atomic_inc(&trc_n_readers_need_end);
+		per_cpu(trc_ipi_to_cpu, cpu) = true;
+		t->trc_ipi_to_cpu = cpu;
+		rcu_tasks_trace.n_ipis++;
+		if (smp_call_function_single(cpu,
+					     trc_read_check_handler, t, 0)) {
+			// Just in case there is some other reason for
+			// failure than the target CPU being offline.
+			rcu_tasks_trace.n_ipis_fails++;
+			per_cpu(trc_ipi_to_cpu, cpu) = false;
+			t->trc_ipi_to_cpu = cpu;
+			if (atomic_dec_and_test(&trc_n_readers_need_end)) {
+				WARN_ON_ONCE(1);
+				wake_up(&trc_wait);
+			}
+		}
+	}
+}
+
+/* Initialize for a new RCU-tasks-trace grace period. */
+static void rcu_tasks_trace_pregp_step(void)
+{
+	int cpu;
+
+	// Allow for fast-acting IPIs.
+	atomic_set(&trc_n_readers_need_end, 1);
+
+	// There shouldn't be any old IPIs, but...
+	for_each_possible_cpu(cpu)
+		WARN_ON_ONCE(per_cpu(trc_ipi_to_cpu, cpu));
+
+	// Disable CPU hotplug across the tasklist scan.
+	// This also waits for all readers in CPU-hotplug code paths.
+	cpus_read_lock();
+}
+
+/* Do first-round processing for the specified task. */
+static void rcu_tasks_trace_pertask(struct task_struct *t,
+				    struct list_head *hop)
+{
+	WRITE_ONCE(t->trc_reader_special.b.need_qs, false);
+	WRITE_ONCE(t->trc_reader_checked, false);
+	t->trc_ipi_to_cpu = -1;
+	trc_wait_for_one_reader(t, hop);
+}
+
+/*
+ * Do intermediate processing between task and holdout scans and
+ * pick up the idle tasks.
+ */
+static void rcu_tasks_trace_postscan(struct list_head *hop)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		rcu_tasks_trace_pertask(idle_task(cpu), hop);
+
+	// Re-enable CPU hotplug now that the tasklist scan has completed.
+	cpus_read_unlock();
+
+	// Wait for late-stage exiting tasks to finish exiting.
+	// These might have passed the call to exit_tasks_rcu_finish().
+	synchronize_rcu();
+	// Any tasks that exit after this point will set ->trc_reader_checked.
+}
+
+/* Show the state of a task stalling the current RCU tasks trace GP. */
+static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
+{
+	int cpu;
+
+	if (*firstreport) {
+		pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
+		*firstreport = false;
+	}
+	// FIXME: This should attempt to use try_invoke_on_nonrunning_task().
+	cpu = task_cpu(t);
+	pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
+		 t->pid,
+		 ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0],
+		 ".i"[is_idle_task(t)],
+		 ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)],
+		 t->trc_reader_nesting,
+		 " N"[!!t->trc_reader_special.b.need_qs],
+		 cpu);
+	sched_show_task(t);
+}
+
+/* List stalled IPIs for RCU tasks trace. */
+static void show_stalled_ipi_trace(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		if (per_cpu(trc_ipi_to_cpu, cpu))
+			pr_alert("\tIPI outstanding to CPU %d\n", cpu);
+}
+
+/* Do one scan of the holdout list. */
+static void check_all_holdout_tasks_trace(struct list_head *hop,
+					  bool needreport, bool *firstreport)
+{
+	struct task_struct *g, *t;
+
+	// Disable CPU hotplug across the holdout list scan.
+	cpus_read_lock();
+
+	list_for_each_entry_safe(t, g, hop, trc_holdout_list) {
+		// If safe and needed, try to check the current task.
+		if (READ_ONCE(t->trc_ipi_to_cpu) == -1 &&
+		    !READ_ONCE(t->trc_reader_checked))
+			trc_wait_for_one_reader(t, hop);
+
+		// If check succeeded, remove this task from the list.
+		if (READ_ONCE(t->trc_reader_checked))
+			trc_del_holdout(t);
+		else if (needreport)
+			show_stalled_task_trace(t, firstreport);
+	}
+
+	// Re-enable CPU hotplug now that the holdout list scan has completed.
+	cpus_read_unlock();
+
+	if (needreport) {
+		if (firstreport)
+			pr_err("INFO: rcu_tasks_trace detected stalls? (Late IPI?)\n");
+		show_stalled_ipi_trace();
+	}
+}
+
+/* Wait for grace period to complete and provide ordering. */
+static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
+{
+	bool firstreport;
+	struct task_struct *g, *t;
+	LIST_HEAD(holdouts);
+	long ret;
+
+	// Remove the safety count.
+	smp_mb__before_atomic();  // Order vs. earlier atomics
+	atomic_dec(&trc_n_readers_need_end);
+	smp_mb__after_atomic();  // Order vs. later atomics
+
+	// Wait for readers.
+	set_tasks_gp_state(rtp, RTGS_WAIT_READERS);
+	for (;;) {
+		ret = wait_event_idle_exclusive_timeout(
+				trc_wait,
+				atomic_read(&trc_n_readers_need_end) == 0,
+				READ_ONCE(rcu_task_stall_timeout));
+		if (ret)
+			break;  // Count reached zero.
+		// Stall warning time, so make a list of the offenders.
+		for_each_process_thread(g, t)
+			if (READ_ONCE(t->trc_reader_special.b.need_qs))
+				trc_add_holdout(t, &holdouts);
+		firstreport = true;
+		list_for_each_entry_safe(t, g, &holdouts, trc_holdout_list)
+			if (READ_ONCE(t->trc_reader_special.b.need_qs)) {
+				show_stalled_task_trace(t, &firstreport);
+				trc_del_holdout(t);
+			}
+		if (firstreport)
+			pr_err("INFO: rcu_tasks_trace detected stalls? (Counter/taskslist mismatch?)\n");
+		show_stalled_ipi_trace();
+		pr_err("\t%d holdouts\n", atomic_read(&trc_n_readers_need_end));
+	}
+	smp_mb(); // Caller's code must be ordered after wakeup.
+		  // Pairs with pretty much every ordering primitive.
+}
+
+/* Report any needed quiescent state for this exiting task. */
+static void exit_tasks_rcu_finish_trace(struct task_struct *t)
+{
+	WRITE_ONCE(t->trc_reader_checked, true);
+	WARN_ON_ONCE(t->trc_reader_nesting);
+	WRITE_ONCE(t->trc_reader_nesting, 0);
+	if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)))
+		rcu_read_unlock_trace_special(t, 0);
+}
+
+/**
+ * call_rcu_tasks_trace() - Queue a callback trace task-based grace period
+ * @rhp: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_tasks_trace()
+ * assumes that the read-side critical sections end at context switch,
+ * cond_resched_rcu_qs(), or transition to usermode execution.  As such,
+ * there are no read-side primitives analogous to rcu_read_lock() and
+ * rcu_read_unlock() because this primitive is intended to determine
+ * that all tasks have passed through a safe state, not so much for
+ * data-strcuture synchronization.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
+ */
+void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func)
+{
+	call_rcu_tasks_generic(rhp, func, &rcu_tasks_trace);
+}
+EXPORT_SYMBOL_GPL(call_rcu_tasks_trace);
+
+/**
+ * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period
+ *
+ * Control will return to the caller some time after a trace rcu-tasks
+ * grace period has elapsed, in other words after all currently
+ * executing rcu-tasks read-side critical sections have elapsed.  These
+ * read-side critical sections are delimited by calls to schedule(),
+ * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory,
+ * anyway) cond_resched().
+ *
+ * This is a very specialized primitive, intended only for a few uses in
+ * tracing and other situations requiring manipulation of function preambles
+ * and profiling hooks.  The synchronize_rcu_tasks_trace() function is not
+ * (yet) intended for heavy use from multiple CPUs.
+ *
+ * See the description of synchronize_rcu() for more detailed information
+ * on memory ordering guarantees.
+ */
+void synchronize_rcu_tasks_trace(void)
+{
+	RCU_LOCKDEP_WARN(lock_is_held(&rcu_trace_lock_map), "Illegal synchronize_rcu_tasks_trace() in RCU Tasks Trace read-side critical section");
+	synchronize_rcu_tasks_generic(&rcu_tasks_trace);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_trace);
+
+/**
+ * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks.
+ *
+ * Although the current implementation is guaranteed to wait, it is not
+ * obligated to, for example, if there are no pending callbacks.
+ */
+void rcu_barrier_tasks_trace(void)
+{
+	/* There is only one callback queue, so this is easy.  ;-) */
+	synchronize_rcu_tasks_trace();
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
+
+static int __init rcu_spawn_tasks_trace_kthread(void)
+{
+	rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step;
+	rcu_tasks_trace.pertask_func = rcu_tasks_trace_pertask;
+	rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan;
+	rcu_tasks_trace.holdouts_func = check_all_holdout_tasks_trace;
+	rcu_tasks_trace.postgp_func = rcu_tasks_trace_postgp;
+	rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace);
+	return 0;
+}
+core_initcall(rcu_spawn_tasks_trace_kthread);
+
+static void show_rcu_tasks_trace_gp_kthread(void)
+{
+	char buf[64];
+
+	sprintf(buf, "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end),
+		data_race(n_heavy_reader_ofl_updates),
+		data_race(n_heavy_reader_updates),
+		data_race(n_heavy_reader_attempts));
+	show_rcu_tasks_generic_gp_kthread(&rcu_tasks_trace, buf);
+}
+
+#else /* #ifdef CONFIG_TASKS_TRACE_RCU */
+static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
+static inline void show_rcu_tasks_trace_gp_kthread(void) {}
+#endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
+
+void show_rcu_tasks_gp_kthreads(void)
+{
+	show_rcu_tasks_classic_gp_kthread();
+	show_rcu_tasks_rude_gp_kthread();
+	show_rcu_tasks_trace_gp_kthread();
+}
+
+#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
+static inline void rcu_tasks_bootup_oddness(void) {}
+void show_rcu_tasks_gp_kthreads(void) {}
+#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d9a49cd6065a..c716eadc7617 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -67,6 +67,19 @@
 #endif
 #define MODULE_PARAM_PREFIX "rcutree."
 
+#ifndef data_race
+#define data_race(expr)							\
+	({								\
+		expr;							\
+	})
+#endif
+#ifndef ASSERT_EXCLUSIVE_WRITER
+#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
+#endif
+#ifndef ASSERT_EXCLUSIVE_ACCESS
+#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
+#endif
+
 /* Data structures. */
 
 /*
@@ -75,9 +88,6 @@
  */
 #define RCU_DYNTICK_CTRL_MASK 0x1
 #define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)
-#ifndef rcu_eqs_special_exit
-#define rcu_eqs_special_exit() do { } while (0)
-#endif
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
 	.dynticks_nesting = 1,
@@ -100,7 +110,7 @@ static struct rcu_state rcu_state = {
 static bool dump_tree;
 module_param(dump_tree, bool, 0444);
 /* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
-static bool use_softirq = 1;
+static bool use_softirq = true;
 module_param(use_softirq, bool, 0444);
 /* Control rcu_node-tree auto-balancing at boot time. */
 static bool rcu_fanout_exact;
@@ -225,9 +235,11 @@ void rcu_softirq_qs(void)
 
 /*
  * Record entry into an extended quiescent state.  This is only to be
- * called when not already in an extended quiescent state.
+ * called when not already in an extended quiescent state, that is,
+ * RCU is watching prior to the call to this function and is no longer
+ * watching upon return.
  */
-static void rcu_dynticks_eqs_enter(void)
+static noinstr void rcu_dynticks_eqs_enter(void)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 	int seq;
@@ -237,8 +249,9 @@ static void rcu_dynticks_eqs_enter(void)
 	 * critical sections, and we also must force ordering with the
 	 * next idle sojourn.
 	 */
+	rcu_dynticks_task_trace_enter();  // Before ->dynticks update!
 	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
-	/* Better be in an extended quiescent state! */
+	// RCU is no longer watching.  Better be in extended quiescent state!
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 		     (seq & RCU_DYNTICK_CTRL_CTR));
 	/* Better not have special action (TLB flush) pending! */
@@ -248,9 +261,10 @@ static void rcu_dynticks_eqs_enter(void)
 
 /*
  * Record exit from an extended quiescent state.  This is only to be
- * called from an extended quiescent state.
+ * called from an extended quiescent state, that is, RCU is not watching
+ * prior to the call to this function and is watching upon return.
  */
-static void rcu_dynticks_eqs_exit(void)
+static noinstr void rcu_dynticks_eqs_exit(void)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 	int seq;
@@ -261,13 +275,13 @@ static void rcu_dynticks_eqs_exit(void)
 	 * critical section.
 	 */
 	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
+	// RCU is now watching.  Better not be in an extended quiescent state!
+	rcu_dynticks_task_trace_exit();  // After ->dynticks update!
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 		     !(seq & RCU_DYNTICK_CTRL_CTR));
 	if (seq & RCU_DYNTICK_CTRL_MASK) {
 		atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
 		smp_mb__after_atomic(); /* _exit after clearing mask. */
-		/* Prefer duplicate flushes to losing a flush. */
-		rcu_eqs_special_exit();
 	}
 }
 
@@ -295,7 +309,7 @@ static void rcu_dynticks_eqs_online(void)
  *
  * No ordering, as we are sampling CPU-local information.
  */
-static bool rcu_dynticks_curr_cpu_in_eqs(void)
+static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 
@@ -333,6 +347,28 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
 }
 
 /*
+ * Return true if the referenced integer is zero while the specified
+ * CPU remains within a single extended quiescent state.
+ */
+bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
+{
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+	int snap;
+
+	// If not quiescent, force back to earlier extended quiescent state.
+	snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK |
+					       RCU_DYNTICK_CTRL_CTR);
+
+	smp_rmb(); // Order ->dynticks and *vp reads.
+	if (READ_ONCE(*vp))
+		return false;  // Non-zero, so report failure;
+	smp_rmb(); // Order *vp read and ->dynticks re-read.
+
+	// If still in the same extended quiescent state, we are good!
+	return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK);
+}
+
+/*
  * Set the special (bottom) bit of the specified CPU so that it
  * will take special action (such as flushing its TLB) on the
  * next exit from an extended quiescent state.  Returns true if
@@ -382,16 +418,23 @@ void rcu_momentary_dyntick_idle(void)
 EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
 
 /**
- * rcu_is_cpu_rrupt_from_idle - see if interrupted from idle
+ * rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle
  *
  * If the current CPU is idle and running at a first-level (not nested)
- * interrupt from idle, return true.  The caller must have at least
- * disabled preemption.
+ * interrupt, or directly, from idle, return true.
+ *
+ * The caller must have at least disabled IRQs.
  */
 static int rcu_is_cpu_rrupt_from_idle(void)
 {
-	/* Called only from within the scheduling-clock interrupt */
-	lockdep_assert_in_irq();
+	long nesting;
+
+	/*
+	 * Usually called from the tick; but also used from smp_function_call()
+	 * for expedited grace periods. This latter can result in running from
+	 * the idle task, instead of an actual IPI.
+	 */
+	lockdep_assert_irqs_disabled();
 
 	/* Check for counter underflows */
 	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0,
@@ -400,9 +443,15 @@ static int rcu_is_cpu_rrupt_from_idle(void)
 			 "RCU dynticks_nmi_nesting counter underflow/zero!");
 
 	/* Are we at first interrupt nesting level? */
-	if (__this_cpu_read(rcu_data.dynticks_nmi_nesting) != 1)
+	nesting = __this_cpu_read(rcu_data.dynticks_nmi_nesting);
+	if (nesting > 1)
 		return false;
 
+	/*
+	 * If we're not in an interrupt, we must be in the idle task!
+	 */
+	WARN_ON_ONCE(!nesting && !is_idle_task(current));
+
 	/* Does CPU appear to be idle from an RCU standpoint? */
 	return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
 }
@@ -562,7 +611,7 @@ EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
  * the possibility of usermode upcalls having messed up our count
  * of interrupt nesting level during the prior busy period.
  */
-static void rcu_eqs_enter(bool user)
+static noinstr void rcu_eqs_enter(bool user)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 
@@ -571,19 +620,24 @@ static void rcu_eqs_enter(bool user)
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 		     rdp->dynticks_nesting == 0);
 	if (rdp->dynticks_nesting != 1) {
+		// RCU will still be watching, so just do accounting and leave.
 		rdp->dynticks_nesting--;
 		return;
 	}
 
 	lockdep_assert_irqs_disabled();
+	instrumentation_begin();
 	trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
 	rdp = this_cpu_ptr(&rcu_data);
 	do_nocb_deferred_wakeup(rdp);
 	rcu_prepare_for_idle();
 	rcu_preempt_deferred_qs(current);
+	instrumentation_end();
 	WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
+	// RCU is watching here ...
 	rcu_dynticks_eqs_enter();
+	// ... but is no longer watching here.
 	rcu_dynticks_task_enter();
 }
 
@@ -616,23 +670,25 @@ void rcu_idle_enter(void)
  * If you add or remove a call to rcu_user_enter(), be sure to test with
  * CONFIG_RCU_EQS_DEBUG=y.
  */
-void rcu_user_enter(void)
+noinstr void rcu_user_enter(void)
 {
 	lockdep_assert_irqs_disabled();
 	rcu_eqs_enter(true);
 }
 #endif /* CONFIG_NO_HZ_FULL */
 
-/*
+/**
+ * rcu_nmi_exit - inform RCU of exit from NMI context
+ *
  * If we are returning from the outermost NMI handler that interrupted an
  * RCU-idle period, update rdp->dynticks and rdp->dynticks_nmi_nesting
  * to let the RCU grace-period handling know that the CPU is back to
  * being RCU-idle.
  *
- * If you add or remove a call to rcu_nmi_exit_common(), be sure to test
+ * If you add or remove a call to rcu_nmi_exit(), be sure to test
  * with CONFIG_RCU_EQS_DEBUG=y.
  */
-static __always_inline void rcu_nmi_exit_common(bool irq)
+noinstr void rcu_nmi_exit(void)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 
@@ -649,38 +705,33 @@ static __always_inline void rcu_nmi_exit_common(bool irq)
 	 * leave it in non-RCU-idle state.
 	 */
 	if (rdp->dynticks_nmi_nesting != 1) {
+		instrumentation_begin();
 		trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
 				  atomic_read(&rdp->dynticks));
 		WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
 			   rdp->dynticks_nmi_nesting - 2);
+		instrumentation_end();
 		return;
 	}
 
+	instrumentation_begin();
 	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
 	trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
 	WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
 
-	if (irq)
+	if (!in_nmi())
 		rcu_prepare_for_idle();
+	instrumentation_end();
 
+	// RCU is watching here ...
 	rcu_dynticks_eqs_enter();
+	// ... but is no longer watching here.
 
-	if (irq)
+	if (!in_nmi())
 		rcu_dynticks_task_enter();
 }
 
 /**
- * rcu_nmi_exit - inform RCU of exit from NMI context
- *
- * If you add or remove a call to rcu_nmi_exit(), be sure to test
- * with CONFIG_RCU_EQS_DEBUG=y.
- */
-void rcu_nmi_exit(void)
-{
-	rcu_nmi_exit_common(false);
-}
-
-/**
  * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
  *
  * Exit from an interrupt handler, which might possibly result in entering
@@ -699,12 +750,52 @@ void rcu_nmi_exit(void)
  * If you add or remove a call to rcu_irq_exit(), be sure to test with
  * CONFIG_RCU_EQS_DEBUG=y.
  */
-void rcu_irq_exit(void)
+void noinstr rcu_irq_exit(void)
+{
+	lockdep_assert_irqs_disabled();
+	rcu_nmi_exit();
+}
+
+/**
+ * rcu_irq_exit_preempt - Inform RCU that current CPU is exiting irq
+ *			  towards in kernel preemption
+ *
+ * Same as rcu_irq_exit() but has a sanity check that scheduling is safe
+ * from RCU point of view. Invoked from return from interrupt before kernel
+ * preemption.
+ */
+void rcu_irq_exit_preempt(void)
 {
 	lockdep_assert_irqs_disabled();
-	rcu_nmi_exit_common(true);
+	rcu_nmi_exit();
+
+	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0,
+			 "RCU dynticks_nesting counter underflow/zero!");
+	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) !=
+			 DYNTICK_IRQ_NONIDLE,
+			 "Bad RCU  dynticks_nmi_nesting counter\n");
+	RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
+			 "RCU in extended quiescent state!");
 }
 
+#ifdef CONFIG_PROVE_RCU
+/**
+ * rcu_irq_exit_check_preempt - Validate that scheduling is possible
+ */
+void rcu_irq_exit_check_preempt(void)
+{
+	lockdep_assert_irqs_disabled();
+
+	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0,
+			 "RCU dynticks_nesting counter underflow/zero!");
+	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) !=
+			 DYNTICK_IRQ_NONIDLE,
+			 "Bad RCU  dynticks_nmi_nesting counter\n");
+	RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
+			 "RCU in extended quiescent state!");
+}
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
 /*
  * Wrapper for rcu_irq_exit() where interrupts are enabled.
  *
@@ -728,7 +819,7 @@ void rcu_irq_exit_irqson(void)
  * allow for the possibility of usermode upcalls messing up our count of
  * interrupt nesting level during the busy period that is just now starting.
  */
-static void rcu_eqs_exit(bool user)
+static void noinstr rcu_eqs_exit(bool user)
 {
 	struct rcu_data *rdp;
 	long oldval;
@@ -738,17 +829,22 @@ static void rcu_eqs_exit(bool user)
 	oldval = rdp->dynticks_nesting;
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
 	if (oldval) {
+		// RCU was already watching, so just do accounting and leave.
 		rdp->dynticks_nesting++;
 		return;
 	}
 	rcu_dynticks_task_exit();
+	// RCU is not watching here ...
 	rcu_dynticks_eqs_exit();
+	// ... but is watching here.
+	instrumentation_begin();
 	rcu_cleanup_after_idle();
 	trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
 	WRITE_ONCE(rdp->dynticks_nesting, 1);
 	WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
 	WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
+	instrumentation_end();
 }
 
 /**
@@ -779,14 +875,75 @@ void rcu_idle_exit(void)
  * If you add or remove a call to rcu_user_exit(), be sure to test with
  * CONFIG_RCU_EQS_DEBUG=y.
  */
-void rcu_user_exit(void)
+void noinstr rcu_user_exit(void)
 {
 	rcu_eqs_exit(1);
 }
+
+/**
+ * __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
+ *
+ * The scheduler tick is not normally enabled when CPUs enter the kernel
+ * from nohz_full userspace execution.  After all, nohz_full userspace
+ * execution is an RCU quiescent state and the time executing in the kernel
+ * is quite short.  Except of course when it isn't.  And it is not hard to
+ * cause a large system to spend tens of seconds or even minutes looping
+ * in the kernel, which can cause a number of problems, include RCU CPU
+ * stall warnings.
+ *
+ * Therefore, if a nohz_full CPU fails to report a quiescent state
+ * in a timely manner, the RCU grace-period kthread sets that CPU's
+ * ->rcu_urgent_qs flag with the expectation that the next interrupt or
+ * exception will invoke this function, which will turn on the scheduler
+ * tick, which will enable RCU to detect that CPU's quiescent states,
+ * for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.
+ * The tick will be disabled once a quiescent state is reported for
+ * this CPU.
+ *
+ * Of course, in carefully tuned systems, there might never be an
+ * interrupt or exception.  In that case, the RCU grace-period kthread
+ * will eventually cause one to happen.  However, in less carefully
+ * controlled environments, this function allows RCU to get what it
+ * needs without creating otherwise useless interruptions.
+ */
+void __rcu_irq_enter_check_tick(void)
+{
+	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+
+	 // Enabling the tick is unsafe in NMI handlers.
+	if (WARN_ON_ONCE(in_nmi()))
+		return;
+
+	RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
+			 "Illegal rcu_irq_enter_check_tick() from extended quiescent state");
+
+	if (!tick_nohz_full_cpu(rdp->cpu) ||
+	    !READ_ONCE(rdp->rcu_urgent_qs) ||
+	    READ_ONCE(rdp->rcu_forced_tick)) {
+		// RCU doesn't need nohz_full help from this CPU, or it is
+		// already getting that help.
+		return;
+	}
+
+	// We get here only when not in an extended quiescent state and
+	// from interrupts (as opposed to NMIs).  Therefore, (1) RCU is
+	// already watching and (2) The fact that we are in an interrupt
+	// handler and that the rcu_node lock is an irq-disabled lock
+	// prevents self-deadlock.  So we can safely recheck under the lock.
+	// Note that the nohz_full state currently cannot change.
+	raw_spin_lock_rcu_node(rdp->mynode);
+	if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
+		// A nohz_full CPU is in the kernel and RCU needs a
+		// quiescent state.  Turn on the tick!
+		WRITE_ONCE(rdp->rcu_forced_tick, true);
+		tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
+	}
+	raw_spin_unlock_rcu_node(rdp->mynode);
+}
 #endif /* CONFIG_NO_HZ_FULL */
 
 /**
- * rcu_nmi_enter_common - inform RCU of entry to NMI context
+ * rcu_nmi_enter - inform RCU of entry to NMI context
  * @irq: Is this call from rcu_irq_enter?
  *
  * If the CPU was idle from RCU's viewpoint, update rdp->dynticks and
@@ -795,10 +952,10 @@ void rcu_user_exit(void)
  * long as the nesting level does not overflow an int.  (You will probably
  * run out of stack space first.)
  *
- * If you add or remove a call to rcu_nmi_enter_common(), be sure to test
+ * If you add or remove a call to rcu_nmi_enter(), be sure to test
  * with CONFIG_RCU_EQS_DEBUG=y.
  */
-static __always_inline void rcu_nmi_enter_common(bool irq)
+noinstr void rcu_nmi_enter(void)
 {
 	long incby = 2;
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
@@ -816,45 +973,33 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
 	 */
 	if (rcu_dynticks_curr_cpu_in_eqs()) {
 
-		if (irq)
+		if (!in_nmi())
 			rcu_dynticks_task_exit();
 
+		// RCU is not watching here ...
 		rcu_dynticks_eqs_exit();
+		// ... but is watching here.
 
-		if (irq)
+		if (!in_nmi())
 			rcu_cleanup_after_idle();
 
 		incby = 1;
-	} else if (irq && tick_nohz_full_cpu(rdp->cpu) &&
-		   rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
-		   READ_ONCE(rdp->rcu_urgent_qs) &&
-		   !READ_ONCE(rdp->rcu_forced_tick)) {
-		raw_spin_lock_rcu_node(rdp->mynode);
-		// Recheck under lock.
-		if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
-			WRITE_ONCE(rdp->rcu_forced_tick, true);
-			tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
-		}
-		raw_spin_unlock_rcu_node(rdp->mynode);
+	} else if (!in_nmi()) {
+		instrumentation_begin();
+		rcu_irq_enter_check_tick();
+		instrumentation_end();
 	}
+	instrumentation_begin();
 	trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
 			  rdp->dynticks_nmi_nesting,
 			  rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
+	instrumentation_end();
 	WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
 		   rdp->dynticks_nmi_nesting + incby);
 	barrier();
 }
 
 /**
- * rcu_nmi_enter - inform RCU of entry to NMI context
- */
-void rcu_nmi_enter(void)
-{
-	rcu_nmi_enter_common(false);
-}
-NOKPROBE_SYMBOL(rcu_nmi_enter);
-
-/**
  * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
  *
  * Enter an interrupt handler, which might possibly result in exiting
@@ -876,10 +1021,10 @@ NOKPROBE_SYMBOL(rcu_nmi_enter);
  * If you add or remove a call to rcu_irq_enter(), be sure to test with
  * CONFIG_RCU_EQS_DEBUG=y.
  */
-void rcu_irq_enter(void)
+noinstr void rcu_irq_enter(void)
 {
 	lockdep_assert_irqs_disabled();
-	rcu_nmi_enter_common(true);
+	rcu_nmi_enter();
 }
 
 /*
@@ -913,6 +1058,11 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
 	}
 }
 
+noinstr bool __rcu_is_watching(void)
+{
+	return !rcu_dynticks_curr_cpu_in_eqs();
+}
+
 /**
  * rcu_is_watching - see if RCU thinks that the current CPU is not idle
  *
@@ -921,7 +1071,7 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
  * if the current CPU is not in its idle loop or is in an interrupt or
  * NMI handler, return true.
  */
-bool notrace rcu_is_watching(void)
+bool rcu_is_watching(void)
 {
 	bool ret;
 
@@ -973,12 +1123,12 @@ bool rcu_lockdep_current_cpu_online(void)
 
 	if (in_nmi() || !rcu_scheduler_fully_active)
 		return true;
-	preempt_disable();
+	preempt_disable_notrace();
 	rdp = this_cpu_ptr(&rcu_data);
 	rnp = rdp->mynode;
 	if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
 		ret = true;
-	preempt_enable();
+	preempt_enable_notrace();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
@@ -1217,7 +1367,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp_start, struct rcu_data *rdp,
 		trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread"));
 		goto unlock_out;
 	}
-	trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq"));
+	trace_rcu_grace_period(rcu_state.name, data_race(rcu_state.gp_seq), TPS("newreq"));
 	ret = true;  /* Caller must wake GP kthread. */
 unlock_out:
 	/* Push furthest requested GP to leaf node and rcu_data structure. */
@@ -1473,6 +1623,31 @@ static void rcu_gp_slow(int delay)
 		schedule_timeout_uninterruptible(delay);
 }
 
+static unsigned long sleep_duration;
+
+/* Allow rcutorture to stall the grace-period kthread. */
+void rcu_gp_set_torture_wait(int duration)
+{
+	if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST) && duration > 0)
+		WRITE_ONCE(sleep_duration, duration);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_set_torture_wait);
+
+/* Actually implement the aforementioned wait. */
+static void rcu_gp_torture_wait(void)
+{
+	unsigned long duration;
+
+	if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST))
+		return;
+	duration = xchg(&sleep_duration, 0UL);
+	if (duration > 0) {
+		pr_alert("%s: Waiting %lu jiffies\n", __func__, duration);
+		schedule_timeout_uninterruptible(duration);
+		pr_alert("%s: Wait complete\n", __func__);
+	}
+}
+
 /*
  * Initialize a new grace period.  Return false if no grace period required.
  */
@@ -1506,6 +1681,7 @@ static bool rcu_gp_init(void)
 	record_gp_stall_check_time();
 	/* Record GP times before starting GP, hence rcu_seq_start(). */
 	rcu_seq_start(&rcu_state.gp_seq);
+	ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
 	trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
 	raw_spin_unlock_irq_rcu_node(rnp);
 
@@ -1611,12 +1787,16 @@ static bool rcu_gp_fqs_check_wake(int *gfp)
 {
 	struct rcu_node *rnp = rcu_get_root();
 
-	/* Someone like call_rcu() requested a force-quiescent-state scan. */
+	// If under overload conditions, force an immediate FQS scan.
+	if (*gfp & RCU_GP_FLAG_OVLD)
+		return true;
+
+	// Someone like call_rcu() requested a force-quiescent-state scan.
 	*gfp = READ_ONCE(rcu_state.gp_flags);
 	if (*gfp & RCU_GP_FLAG_FQS)
 		return true;
 
-	/* The current grace period has completed. */
+	// The current grace period has completed.
 	if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))
 		return true;
 
@@ -1654,13 +1834,15 @@ static void rcu_gp_fqs(bool first_time)
 static void rcu_gp_fqs_loop(void)
 {
 	bool first_gp_fqs;
-	int gf;
+	int gf = 0;
 	unsigned long j;
 	int ret;
 	struct rcu_node *rnp = rcu_get_root();
 
 	first_gp_fqs = true;
 	j = READ_ONCE(jiffies_till_first_fqs);
+	if (rcu_state.cbovld)
+		gf = RCU_GP_FLAG_OVLD;
 	ret = 0;
 	for (;;) {
 		if (!ret) {
@@ -1673,6 +1855,7 @@ static void rcu_gp_fqs_loop(void)
 		rcu_state.gp_state = RCU_GP_WAIT_FQS;
 		ret = swait_event_idle_timeout_exclusive(
 				rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);
+		rcu_gp_torture_wait();
 		rcu_state.gp_state = RCU_GP_DOING_FQS;
 		/* Locking provides needed memory barriers. */
 		/* If grace period done, leave loop. */
@@ -1680,12 +1863,16 @@ static void rcu_gp_fqs_loop(void)
 		    !rcu_preempt_blocked_readers_cgp(rnp))
 			break;
 		/* If time for quiescent-state forcing, do it. */
-		if (ULONG_CMP_GE(jiffies, rcu_state.jiffies_force_qs) ||
+		if (!time_after(rcu_state.jiffies_force_qs, jiffies) ||
 		    (gf & RCU_GP_FLAG_FQS)) {
 			trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
 					       TPS("fqsstart"));
 			rcu_gp_fqs(first_gp_fqs);
-			first_gp_fqs = false;
+			gf = 0;
+			if (first_gp_fqs) {
+				first_gp_fqs = false;
+				gf = rcu_state.cbovld ? RCU_GP_FLAG_OVLD : 0;
+			}
 			trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
 					       TPS("fqsend"));
 			cond_resched_tasks_rcu_qs();
@@ -1705,6 +1892,7 @@ static void rcu_gp_fqs_loop(void)
 				j = 1;
 			else
 				j = rcu_state.jiffies_force_qs - j;
+			gf = 0;
 		}
 	}
 }
@@ -1781,6 +1969,7 @@ static void rcu_gp_cleanup(void)
 	/* Declare grace period done, trace first to use old GP number. */
 	trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
 	rcu_seq_end(&rcu_state.gp_seq);
+	ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
 	rcu_state.gp_state = RCU_GP_IDLE;
 	/* Check for GP requests since above loop. */
 	rdp = this_cpu_ptr(&rcu_data);
@@ -1821,6 +2010,7 @@ static int __noreturn rcu_gp_kthread(void *unused)
 			swait_event_idle_exclusive(rcu_state.gp_wq,
 					 READ_ONCE(rcu_state.gp_flags) &
 					 RCU_GP_FLAG_INIT);
+			rcu_gp_torture_wait();
 			rcu_state.gp_state = RCU_GP_DONE_GPS;
 			/* Locking provides needed memory barrier. */
 			if (rcu_gp_init())
@@ -2811,6 +3001,8 @@ struct kfree_rcu_cpu {
 	struct delayed_work monitor_work;
 	bool monitor_todo;
 	bool initialized;
+	// Number of objects for which GP not started
+	int count;
 };
 
 static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
@@ -2924,6 +3116,8 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
 				krcp->head = NULL;
 			}
 
+			WRITE_ONCE(krcp->count, 0);
+
 			/*
 			 * One work is per one batch, so there are two "free channels",
 			 * "bhead_free" and "head_free" the batch can handle. It can be
@@ -3060,6 +3254,8 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 		krcp->head = head;
 	}
 
+	WRITE_ONCE(krcp->count, krcp->count + 1);
+
 	// Set timer to drain after KFREE_DRAIN_JIFFIES.
 	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
 	    !krcp->monitor_todo) {
@@ -3074,6 +3270,56 @@ unlock_return:
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
+static unsigned long
+kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	int cpu;
+	unsigned long count = 0;
+
+	/* Snapshot count of all CPUs */
+	for_each_online_cpu(cpu) {
+		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+		count += READ_ONCE(krcp->count);
+	}
+
+	return count;
+}
+
+static unsigned long
+kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	int cpu, freed = 0;
+	unsigned long flags;
+
+	for_each_online_cpu(cpu) {
+		int count;
+		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+		count = krcp->count;
+		spin_lock_irqsave(&krcp->lock, flags);
+		if (krcp->monitor_todo)
+			kfree_rcu_drain_unlock(krcp, flags);
+		else
+			spin_unlock_irqrestore(&krcp->lock, flags);
+
+		sc->nr_to_scan -= count;
+		freed += count;
+
+		if (sc->nr_to_scan <= 0)
+			break;
+	}
+
+	return freed;
+}
+
+static struct shrinker kfree_rcu_shrinker = {
+	.count_objects = kfree_rcu_shrink_count,
+	.scan_objects = kfree_rcu_shrink_scan,
+	.batch = 0,
+	.seeks = DEFAULT_SEEKS,
+};
+
 void __init kfree_rcu_scheduler_running(void)
 {
 	int cpu;
@@ -3599,6 +3845,7 @@ void rcu_cpu_starting(unsigned int cpu)
 	nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
 	/* Allow lockless access for expedited grace periods. */
 	smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + nbits); /* ^^^ */
+	ASSERT_EXCLUSIVE_WRITER(rcu_state.ncpus);
 	rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
 	rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
 	rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
@@ -3994,6 +4241,8 @@ static void __init kfree_rcu_batch_init(void)
 		INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
 		krcp->initialized = true;
 	}
+	if (register_shrinker(&kfree_rcu_shrinker))
+		pr_err("Failed to register kfree_rcu() shrinker!\n");
 }
 
 void __init rcu_init(void)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 9dc2ec021da5..43991a40b084 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -359,6 +359,7 @@ struct rcu_state {
 /* Values for rcu_state structure's gp_flags field. */
 #define RCU_GP_FLAG_INIT 0x1	/* Need grace-period initialization. */
 #define RCU_GP_FLAG_FQS  0x2	/* Need grace-period quiescent-state forcing. */
+#define RCU_GP_FLAG_OVLD 0x4	/* Experiencing callback overload. */
 
 /* Values for rcu_state structure's gp_state field. */
 #define RCU_GP_IDLE	 0	/* Initial state and no GP in progress. */
@@ -454,6 +455,8 @@ static void rcu_bind_gp_kthread(void);
 static bool rcu_nohz_full_cpu(void);
 static void rcu_dynticks_task_enter(void);
 static void rcu_dynticks_task_exit(void);
+static void rcu_dynticks_task_trace_enter(void);
+static void rcu_dynticks_task_trace_exit(void);
 
 /* Forward declarations for tree_stall.h */
 static void record_gp_stall_check_time(void);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 1a617b9dffb0..72952edad1e4 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -150,7 +150,7 @@ static void __maybe_unused sync_exp_reset_tree(void)
 static bool sync_rcu_exp_done(struct rcu_node *rnp)
 {
 	raw_lockdep_assert_held_rcu_node(rnp);
-	return rnp->exp_tasks == NULL &&
+	return READ_ONCE(rnp->exp_tasks) == NULL &&
 	       READ_ONCE(rnp->expmask) == 0;
 }
 
@@ -373,7 +373,7 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
 	 * until such time as the ->expmask bits are cleared.
 	 */
 	if (rcu_preempt_has_tasks(rnp))
-		rnp->exp_tasks = rnp->blkd_tasks.next;
+		WRITE_ONCE(rnp->exp_tasks, rnp->blkd_tasks.next);
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 	/* IPI the remaining CPUs for expedited quiescent state. */
@@ -542,8 +542,8 @@ static void synchronize_rcu_expedited_wait(void)
 		}
 		pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
 			jiffies - jiffies_start, rcu_state.expedited_sequence,
-			READ_ONCE(rnp_root->expmask),
-			".T"[!!rnp_root->exp_tasks]);
+			data_race(rnp_root->expmask),
+			".T"[!!data_race(rnp_root->exp_tasks)]);
 		if (ndetected) {
 			pr_err("blocking rcu_node structures:");
 			rcu_for_each_node_breadth_first(rnp) {
@@ -553,8 +553,8 @@ static void synchronize_rcu_expedited_wait(void)
 					continue;
 				pr_cont(" l=%u:%d-%d:%#lx/%c",
 					rnp->level, rnp->grplo, rnp->grphi,
-					READ_ONCE(rnp->expmask),
-					".T"[!!rnp->exp_tasks]);
+					data_race(rnp->expmask),
+					".T"[!!data_race(rnp->exp_tasks)]);
 			}
 			pr_cont("\n");
 		}
@@ -639,6 +639,7 @@ static void wait_rcu_exp_gp(struct work_struct *wp)
  */
 static void rcu_exp_handler(void *unused)
 {
+	int depth = rcu_preempt_depth();
 	unsigned long flags;
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 	struct rcu_node *rnp = rdp->mynode;
@@ -649,7 +650,7 @@ static void rcu_exp_handler(void *unused)
 	 * critical section.  If also enabled or idle, immediately
 	 * report the quiescent state, otherwise defer.
 	 */
-	if (!rcu_preempt_depth()) {
+	if (!depth) {
 		if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
 		    rcu_dynticks_curr_cpu_in_eqs()) {
 			rcu_report_exp_rdp(rdp);
@@ -673,7 +674,7 @@ static void rcu_exp_handler(void *unused)
 	 * can have caused this quiescent state to already have been
 	 * reported, so we really do need to check ->expmask.
 	 */
-	if (rcu_preempt_depth() > 0) {
+	if (depth > 0) {
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (rnp->expmask & rdp->grpmask) {
 			rdp->exp_deferred_qs = true;
@@ -683,30 +684,8 @@ static void rcu_exp_handler(void *unused)
 		return;
 	}
 
-	/*
-	 * The final and least likely case is where the interrupted
-	 * code was just about to or just finished exiting the RCU-preempt
-	 * read-side critical section, and no, we can't tell which.
-	 * So either way, set ->deferred_qs to flag later code that
-	 * a quiescent state is required.
-	 *
-	 * If the CPU is fully enabled (or if some buggy RCU-preempt
-	 * read-side critical section is being used from idle), just
-	 * invoke rcu_preempt_deferred_qs() to immediately report the
-	 * quiescent state.  We cannot use rcu_read_unlock_special()
-	 * because we are in an interrupt handler, which will cause that
-	 * function to take an early exit without doing anything.
-	 *
-	 * Otherwise, force a context switch after the CPU enables everything.
-	 */
-	rdp->exp_deferred_qs = true;
-	if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
-	    WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs())) {
-		rcu_preempt_deferred_qs(t);
-	} else {
-		set_tsk_need_resched(t);
-		set_preempt_need_resched();
-	}
+	// Finally, negative nesting depth should not happen.
+	WARN_ON_ONCE(1);
 }
 
 /* PREEMPTION=y, so no PREEMPTION=n expedited grace period to clean up after. */
@@ -721,17 +700,20 @@ static void sync_sched_exp_online_cleanup(int cpu)
  */
 static int rcu_print_task_exp_stall(struct rcu_node *rnp)
 {
-	struct task_struct *t;
+	unsigned long flags;
 	int ndetected = 0;
+	struct task_struct *t;
 
-	if (!rnp->exp_tasks)
+	if (!READ_ONCE(rnp->exp_tasks))
 		return 0;
+	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	t = list_entry(rnp->exp_tasks->prev,
 		       struct task_struct, rcu_node_entry);
 	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
 		pr_cont(" P%d", t->pid);
 		ndetected++;
 	}
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	return ndetected;
 }
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 097635c41135..352223664ebd 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -226,7 +226,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
 		WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
 	}
 	if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
-		rnp->exp_tasks = &t->rcu_node_entry;
+		WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
 	WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
 		     !(rnp->qsmask & rdp->grpmask));
 	WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
@@ -331,6 +331,7 @@ void rcu_note_context_switch(bool preempt)
 	rcu_qs();
 	if (rdp->exp_deferred_qs)
 		rcu_report_exp_rdp(rdp);
+	rcu_tasks_qs(current, preempt);
 	trace_rcu_utilization(TPS("End context switch"));
 }
 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
@@ -345,9 +346,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 	return READ_ONCE(rnp->gp_tasks) != NULL;
 }
 
-/* Bias and limit values for ->rcu_read_lock_nesting. */
-#define RCU_NEST_BIAS INT_MAX
-#define RCU_NEST_NMAX (-INT_MAX / 2)
+/* limit value for ->rcu_read_lock_nesting. */
 #define RCU_NEST_PMAX (INT_MAX / 2)
 
 static void rcu_preempt_read_enter(void)
@@ -355,9 +354,9 @@ static void rcu_preempt_read_enter(void)
 	current->rcu_read_lock_nesting++;
 }
 
-static void rcu_preempt_read_exit(void)
+static int rcu_preempt_read_exit(void)
 {
-	current->rcu_read_lock_nesting--;
+	return --current->rcu_read_lock_nesting;
 }
 
 static void rcu_preempt_depth_set(int val)
@@ -390,21 +389,15 @@ void __rcu_read_unlock(void)
 {
 	struct task_struct *t = current;
 
-	if (rcu_preempt_depth() != 1) {
-		rcu_preempt_read_exit();
-	} else {
+	if (rcu_preempt_read_exit() == 0) {
 		barrier();  /* critical section before exit code. */
-		rcu_preempt_depth_set(-RCU_NEST_BIAS);
-		barrier();  /* assign before ->rcu_read_unlock_special load */
 		if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
 			rcu_read_unlock_special(t);
-		barrier();  /* ->rcu_read_unlock_special load before assign */
-		rcu_preempt_depth_set(0);
 	}
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
 		int rrln = rcu_preempt_depth();
 
-		WARN_ON_ONCE(rrln < 0 && rrln > RCU_NEST_NMAX);
+		WARN_ON_ONCE(rrln < 0 || rrln > RCU_NEST_PMAX);
 	}
 }
 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
@@ -500,12 +493,12 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 		if (&t->rcu_node_entry == rnp->gp_tasks)
 			WRITE_ONCE(rnp->gp_tasks, np);
 		if (&t->rcu_node_entry == rnp->exp_tasks)
-			rnp->exp_tasks = np;
+			WRITE_ONCE(rnp->exp_tasks, np);
 		if (IS_ENABLED(CONFIG_RCU_BOOST)) {
 			/* Snapshot ->boost_mtx ownership w/rnp->lock held. */
 			drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
 			if (&t->rcu_node_entry == rnp->boost_tasks)
-				rnp->boost_tasks = np;
+				WRITE_ONCE(rnp->boost_tasks, np);
 		}
 
 		/*
@@ -556,7 +549,7 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 {
 	return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
 		READ_ONCE(t->rcu_read_unlock_special.s)) &&
-	       rcu_preempt_depth() <= 0;
+	       rcu_preempt_depth() == 0;
 }
 
 /*
@@ -569,16 +562,11 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 static void rcu_preempt_deferred_qs(struct task_struct *t)
 {
 	unsigned long flags;
-	bool couldrecurse = rcu_preempt_depth() >= 0;
 
 	if (!rcu_preempt_need_deferred_qs(t))
 		return;
-	if (couldrecurse)
-		rcu_preempt_depth_set(rcu_preempt_depth() - RCU_NEST_BIAS);
 	local_irq_save(flags);
 	rcu_preempt_deferred_qs_irqrestore(t, flags);
-	if (couldrecurse)
-		rcu_preempt_depth_set(rcu_preempt_depth() + RCU_NEST_BIAS);
 }
 
 /*
@@ -615,19 +603,18 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 		struct rcu_node *rnp = rdp->mynode;
 
-		exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) ||
-		      (rdp->grpmask & READ_ONCE(rnp->expmask)) ||
-		      tick_nohz_full_cpu(rdp->cpu);
+		exp = (t->rcu_blocked_node &&
+		       READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||
+		      (rdp->grpmask & READ_ONCE(rnp->expmask));
 		// Need to defer quiescent state until everything is enabled.
-		if (irqs_were_disabled && use_softirq &&
-		    (in_interrupt() ||
-		     (exp && !t->rcu_read_unlock_special.b.deferred_qs))) {
-			// Using softirq, safe to awaken, and we get
-			// no help from enabling irqs, unlike bh/preempt.
+		if (use_softirq && (in_irq() || (exp && !irqs_were_disabled))) {
+			// Using softirq, safe to awaken, and either the
+			// wakeup is free or there is an expedited GP.
 			raise_softirq_irqoff(RCU_SOFTIRQ);
 		} else {
 			// Enabling BH or preempt does reschedule, so...
-			// Also if no expediting or NO_HZ_FULL, slow is OK.
+			// Also if no expediting, slow is OK.
+			// Plus nohz_full CPUs eventually get tick enabled.
 			set_tsk_need_resched(current);
 			set_preempt_need_resched();
 			if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
@@ -640,7 +627,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
 				irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
 			}
 		}
-		t->rcu_read_unlock_special.b.deferred_qs = true;
 		local_irq_restore(flags);
 		return;
 	}
@@ -699,7 +685,7 @@ static void rcu_flavor_sched_clock_irq(int user)
 	} else if (rcu_preempt_need_deferred_qs(t)) {
 		rcu_preempt_deferred_qs(t); /* Report deferred QS. */
 		return;
-	} else if (!rcu_preempt_depth()) {
+	} else if (!WARN_ON_ONCE(rcu_preempt_depth())) {
 		rcu_qs(); /* Report immediate QS. */
 		return;
 	}
@@ -760,8 +746,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
 		pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
 			__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
 	pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
-		__func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks,
-		rnp->exp_tasks);
+		__func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
+		READ_ONCE(rnp->exp_tasks));
 	pr_info("%s: ->blkd_tasks", __func__);
 	i = 0;
 	list_for_each(lhp, &rnp->blkd_tasks) {
@@ -854,8 +840,7 @@ void rcu_note_context_switch(bool preempt)
 	this_cpu_write(rcu_data.rcu_urgent_qs, false);
 	if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
 		rcu_momentary_dyntick_idle();
-	if (!preempt)
-		rcu_tasks_qs(current);
+	rcu_tasks_qs(current, preempt);
 out:
 	trace_rcu_utilization(TPS("End context switch"));
 }
@@ -1036,7 +1021,8 @@ static int rcu_boost_kthread(void *arg)
 	for (;;) {
 		WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
 		trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
-		rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
+		rcu_wait(READ_ONCE(rnp->boost_tasks) ||
+			 READ_ONCE(rnp->exp_tasks));
 		trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
 		WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
 		more2boost = rcu_boost(rnp);
@@ -1079,9 +1065,9 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	    (rnp->gp_tasks != NULL &&
 	     rnp->boost_tasks == NULL &&
 	     rnp->qsmask == 0 &&
-	     (ULONG_CMP_GE(jiffies, rnp->boost_time) || rcu_state.cbovld))) {
+	     (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
 		if (rnp->exp_tasks == NULL)
-			rnp->boost_tasks = rnp->gp_tasks;
+			WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		rcu_wake_cond(rnp->boost_kthread_task,
 			      READ_ONCE(rnp->boost_kthread_status));
@@ -2536,7 +2522,7 @@ static bool rcu_nohz_full_cpu(void)
 #ifdef CONFIG_NO_HZ_FULL
 	if (tick_nohz_full_cpu(smp_processor_id()) &&
 	    (!rcu_gp_in_progress() ||
-	     ULONG_CMP_LT(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
+	     time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
 		return true;
 #endif /* #ifdef CONFIG_NO_HZ_FULL */
 	return false;
@@ -2553,7 +2539,7 @@ static void rcu_bind_gp_kthread(void)
 }
 
 /* Record the current task on dyntick-idle entry. */
-static void rcu_dynticks_task_enter(void)
+static void noinstr rcu_dynticks_task_enter(void)
 {
 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
 	WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
@@ -2561,9 +2547,27 @@ static void rcu_dynticks_task_enter(void)
 }
 
 /* Record no current task on dyntick-idle exit. */
-static void rcu_dynticks_task_exit(void)
+static void noinstr rcu_dynticks_task_exit(void)
 {
 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
 	WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
 }
+
+/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
+static void rcu_dynticks_task_trace_enter(void)
+{
+#ifdef CONFIG_TASKS_RCU_TRACE
+	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
+		current->trc_reader_special.b.need_mb = true;
+#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
+}
+
+/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
+static void rcu_dynticks_task_trace_exit(void)
+{
+#ifdef CONFIG_TASKS_RCU_TRACE
+	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
+		current->trc_reader_special.b.need_mb = false;
+#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
+}
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 119ed6afd20f..ae76bd329582 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -15,10 +15,12 @@
 int sysctl_panic_on_rcu_stall __read_mostly;
 
 #ifdef CONFIG_PROVE_RCU
-#define RCU_STALL_DELAY_DELTA	       (5 * HZ)
+#define RCU_STALL_DELAY_DELTA		(5 * HZ)
 #else
-#define RCU_STALL_DELAY_DELTA	       0
+#define RCU_STALL_DELAY_DELTA		0
 #endif
+#define RCU_STALL_MIGHT_DIV		8
+#define RCU_STALL_MIGHT_MIN		(2 * HZ)
 
 /* Limit-check stall timeouts specified at boottime and runtime. */
 int rcu_jiffies_till_stall_check(void)
@@ -40,6 +42,36 @@ int rcu_jiffies_till_stall_check(void)
 }
 EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
 
+/**
+ * rcu_gp_might_be_stalled - Is it likely that the grace period is stalled?
+ *
+ * Returns @true if the current grace period is sufficiently old that
+ * it is reasonable to assume that it might be stalled.  This can be
+ * useful when deciding whether to allocate memory to enable RCU-mediated
+ * freeing on the one hand or just invoking synchronize_rcu() on the other.
+ * The latter is preferable when the grace period is stalled.
+ *
+ * Note that sampling of the .gp_start and .gp_seq fields must be done
+ * carefully to avoid false positives at the beginnings and ends of
+ * grace periods.
+ */
+bool rcu_gp_might_be_stalled(void)
+{
+	unsigned long d = rcu_jiffies_till_stall_check() / RCU_STALL_MIGHT_DIV;
+	unsigned long j = jiffies;
+
+	if (d < RCU_STALL_MIGHT_MIN)
+		d = RCU_STALL_MIGHT_MIN;
+	smp_mb(); // jiffies before .gp_seq to avoid false positives.
+	if (!rcu_gp_in_progress())
+		return false;
+	// Long delays at this point avoids false positive, but a delay
+	// of ULONG_MAX/4 jiffies voids your no-false-positive warranty.
+	smp_mb(); // .gp_seq before second .gp_start
+	// And ditto here.
+	return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
+}
+
 /* Don't do RCU CPU stall warnings during long sysrq printouts. */
 void rcu_sysrq_start(void)
 {
@@ -104,8 +136,8 @@ static void record_gp_stall_check_time(void)
 
 	WRITE_ONCE(rcu_state.gp_start, j);
 	j1 = rcu_jiffies_till_stall_check();
-	/* Record ->gp_start before ->jiffies_stall. */
-	smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
+	smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq.
+	WRITE_ONCE(rcu_state.jiffies_stall, j + j1);
 	rcu_state.jiffies_resched = j + j1 / 2;
 	rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
 }
@@ -192,14 +224,40 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
+// Communicate task state back to the RCU CPU stall warning request.
+struct rcu_stall_chk_rdr {
+	int nesting;
+	union rcu_special rs;
+	bool on_blkd_list;
+};
+
+/*
+ * Report out the state of a not-running task that is stalling the
+ * current RCU grace period.
+ */
+static bool check_slow_task(struct task_struct *t, void *arg)
+{
+	struct rcu_node *rnp;
+	struct rcu_stall_chk_rdr *rscrp = arg;
+
+	if (task_curr(t))
+		return false; // It is running, so decline to inspect it.
+	rscrp->nesting = t->rcu_read_lock_nesting;
+	rscrp->rs = t->rcu_read_unlock_special;
+	rnp = t->rcu_blocked_node;
+	rscrp->on_blkd_list = !list_empty(&t->rcu_node_entry);
+	return true;
+}
+
 /*
  * Scan the current list of tasks blocked within RCU read-side critical
  * sections, printing out the tid of each.
  */
 static int rcu_print_task_stall(struct rcu_node *rnp)
 {
-	struct task_struct *t;
 	int ndetected = 0;
+	struct rcu_stall_chk_rdr rscr;
+	struct task_struct *t;
 
 	if (!rcu_preempt_blocked_readers_cgp(rnp))
 		return 0;
@@ -208,7 +266,15 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
 	t = list_entry(rnp->gp_tasks->prev,
 		       struct task_struct, rcu_node_entry);
 	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
-		pr_cont(" P%d", t->pid);
+		if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
+			pr_cont(" P%d", t->pid);
+		else
+			pr_cont(" P%d/%d:%c%c%c%c",
+				t->pid, rscr.nesting,
+				".b"[rscr.rs.b.blocked],
+				".q"[rscr.rs.b.need_qs],
+				".e"[rscr.rs.b.exp_hint],
+				".l"[rscr.on_blkd_list]);
 		ndetected++;
 	}
 	pr_cont("\n");
@@ -299,6 +365,16 @@ static const char *gp_state_getname(short gs)
 	return gp_state_names[gs];
 }
 
+/* Is the RCU grace-period kthread being starved of CPU time? */
+static bool rcu_is_gp_kthread_starving(unsigned long *jp)
+{
+	unsigned long j = jiffies - READ_ONCE(rcu_state.gp_activity);
+
+	if (jp)
+		*jp = j;
+	return j > 2 * HZ;
+}
+
 /*
  * Print out diagnostic information for the specified stalled CPU.
  *
@@ -313,6 +389,7 @@ static const char *gp_state_getname(short gs)
 static void print_cpu_stall_info(int cpu)
 {
 	unsigned long delta;
+	bool falsepositive;
 	char fast_no_hz[72];
 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 	char *ticks_title;
@@ -333,7 +410,9 @@ static void print_cpu_stall_info(int cpu)
 	}
 	print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
 	delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
-	pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
+	falsepositive = rcu_is_gp_kthread_starving(NULL) &&
+			rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
+	pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s%s\n",
 	       cpu,
 	       "O."[!!cpu_online(cpu)],
 	       "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
@@ -345,8 +424,9 @@ static void print_cpu_stall_info(int cpu)
 	       rcu_dynticks_snap(rdp) & 0xfff,
 	       rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
 	       rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
-	       READ_ONCE(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
-	       fast_no_hz);
+	       data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
+	       fast_no_hz,
+	       falsepositive ? " (false positive?)" : "");
 }
 
 /* Complain about starvation of grace-period kthread.  */
@@ -355,15 +435,15 @@ static void rcu_check_gp_kthread_starvation(void)
 	struct task_struct *gpk = rcu_state.gp_kthread;
 	unsigned long j;
 
-	j = jiffies - READ_ONCE(rcu_state.gp_activity);
-	if (j > 2 * HZ) {
+	if (rcu_is_gp_kthread_starving(&j)) {
 		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
 		       rcu_state.name, j,
 		       (long)rcu_seq_current(&rcu_state.gp_seq),
-		       READ_ONCE(rcu_state.gp_flags),
+		       data_race(rcu_state.gp_flags),
 		       gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
 		       gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
 		if (gpk) {
+			pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
 			pr_err("RCU grace-period kthread stack dump:\n");
 			sched_show_task(gpk);
 			wake_up_process(gpk);
@@ -371,7 +451,7 @@ static void rcu_check_gp_kthread_starvation(void)
 	}
 }
 
-static void print_other_cpu_stall(unsigned long gp_seq)
+static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
 {
 	int cpu;
 	unsigned long flags;
@@ -408,7 +488,7 @@ static void print_other_cpu_stall(unsigned long gp_seq)
 	for_each_possible_cpu(cpu)
 		totqlen += rcu_get_n_cbs_cpu(cpu);
 	pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
-	       smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
+	       smp_processor_id(), (long)(jiffies - gps),
 	       (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
 	if (ndetected) {
 		rcu_dump_cpu_stacks();
@@ -421,13 +501,11 @@ static void print_other_cpu_stall(unsigned long gp_seq)
 			pr_err("INFO: Stall ended before state dump start\n");
 		} else {
 			j = jiffies;
-			gpa = READ_ONCE(rcu_state.gp_activity);
+			gpa = data_race(rcu_state.gp_activity);
 			pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
 			       rcu_state.name, j - gpa, j, gpa,
-			       READ_ONCE(jiffies_till_next_fqs),
+			       data_race(jiffies_till_next_fqs),
 			       rcu_get_root()->qsmask);
-			/* In this case, the current CPU might be at fault. */
-			sched_show_task(current);
 		}
 	}
 	/* Rewrite if needed in case of slow consoles. */
@@ -442,7 +520,7 @@ static void print_other_cpu_stall(unsigned long gp_seq)
 	rcu_force_quiescent_state();  /* Kick them all. */
 }
 
-static void print_cpu_stall(void)
+static void print_cpu_stall(unsigned long gps)
 {
 	int cpu;
 	unsigned long flags;
@@ -467,7 +545,7 @@ static void print_cpu_stall(void)
 	for_each_possible_cpu(cpu)
 		totqlen += rcu_get_n_cbs_cpu(cpu);
 	pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n",
-		jiffies - rcu_state.gp_start,
+		jiffies - gps,
 		(long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
 
 	rcu_check_gp_kthread_starvation();
@@ -546,7 +624,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
 	    cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
 
 		/* We haven't checked in, so go dump stack. */
-		print_cpu_stall();
+		print_cpu_stall(gps);
 		if (rcu_cpu_stall_ftrace_dump)
 			rcu_ftrace_dump(DUMP_ALL);
 
@@ -555,7 +633,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
 		   cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
 
 		/* They had a few time units to dump stack, so complain. */
-		print_other_cpu_stall(gs2);
+		print_other_cpu_stall(gs2, gps);
 		if (rcu_cpu_stall_ftrace_dump)
 			rcu_ftrace_dump(DUMP_ALL);
 	}
@@ -581,23 +659,23 @@ void show_rcu_gp_kthreads(void)
 	struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
 
 	j = jiffies;
-	ja = j - READ_ONCE(rcu_state.gp_activity);
-	jr = j - READ_ONCE(rcu_state.gp_req_activity);
-	jw = j - READ_ONCE(rcu_state.gp_wake_time);
+	ja = j - data_race(rcu_state.gp_activity);
+	jr = j - data_race(rcu_state.gp_req_activity);
+	jw = j - data_race(rcu_state.gp_wake_time);
 	pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
 		rcu_state.name, gp_state_getname(rcu_state.gp_state),
 		rcu_state.gp_state, t ? t->state : 0x1ffffL,
-		ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
-		(long)READ_ONCE(rcu_state.gp_seq),
-		(long)READ_ONCE(rcu_get_root()->gp_seq_needed),
-		READ_ONCE(rcu_state.gp_flags));
+		ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq),
+		(long)data_race(rcu_state.gp_seq),
+		(long)data_race(rcu_get_root()->gp_seq_needed),
+		data_race(rcu_state.gp_flags));
 	rcu_for_each_node_breadth_first(rnp) {
 		if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq),
 				 READ_ONCE(rnp->gp_seq_needed)))
 			continue;
 		pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
-			rnp->grplo, rnp->grphi, (long)READ_ONCE(rnp->gp_seq),
-			(long)READ_ONCE(rnp->gp_seq_needed));
+			rnp->grplo, rnp->grphi, (long)data_race(rnp->gp_seq),
+			(long)data_race(rnp->gp_seq_needed));
 		if (!rcu_is_leaf_node(rnp))
 			continue;
 		for_each_leaf_node_possible_cpu(rnp, cpu) {
@@ -607,7 +685,7 @@ void show_rcu_gp_kthreads(void)
 					 READ_ONCE(rdp->gp_seq_needed)))
 				continue;
 			pr_info("\tcpu %d ->gp_seq_needed %ld\n",
-				cpu, (long)READ_ONCE(rdp->gp_seq_needed));
+				cpu, (long)data_race(rdp->gp_seq_needed));
 		}
 	}
 	for_each_possible_cpu(cpu) {
@@ -615,7 +693,7 @@ void show_rcu_gp_kthreads(void)
 		if (rcu_segcblist_is_offloaded(&rdp->cblist))
 			show_rcu_nocb_state(rdp);
 	}
-	/* sched_show_task(rcu_state.gp_kthread); */
+	show_rcu_tasks_gp_kthreads();
 }
 EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 28a8bdc5072f..84843adfd939 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -41,6 +41,7 @@
 #include <linux/sched/isolation.h>
 #include <linux/kprobes.h>
 #include <linux/slab.h>
+#include <linux/irq_work.h>
 
 #define CREATE_TRACE_POINTS
 
@@ -51,6 +52,19 @@
 #endif
 #define MODULE_PARAM_PREFIX "rcupdate."
 
+#ifndef data_race
+#define data_race(expr)							\
+	({								\
+		expr;							\
+	})
+#endif
+#ifndef ASSERT_EXCLUSIVE_WRITER
+#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
+#endif
+#ifndef ASSERT_EXCLUSIVE_ACCESS
+#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
+#endif
+
 #ifndef CONFIG_TINY_RCU
 module_param(rcu_expedited, int, 0);
 module_param(rcu_normal, int, 0);
@@ -63,12 +77,12 @@ module_param(rcu_normal_after_boot, int, 0);
  * rcu_read_lock_held_common() - might we be in RCU-sched read-side critical section?
  * @ret:	Best guess answer if lockdep cannot be relied on
  *
- * Returns true if lockdep must be ignored, in which case *ret contains
+ * Returns true if lockdep must be ignored, in which case ``*ret`` contains
  * the best guess described below.  Otherwise returns false, in which
- * case *ret tells the caller nothing and the caller should instead
+ * case ``*ret`` tells the caller nothing and the caller should instead
  * consult lockdep.
  *
- * If CONFIG_DEBUG_LOCK_ALLOC is selected, set *ret to nonzero iff in an
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, set ``*ret`` to nonzero iff in an
  * RCU-sched read-side critical section.  In absence of
  * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
  * critical section unless it can prove otherwise.  Note that disabling
@@ -82,7 +96,7 @@ module_param(rcu_normal_after_boot, int, 0);
  *
  * Note that if the CPU is in the idle loop from an RCU point of view (ie:
  * that we are in the section between rcu_idle_enter() and rcu_idle_exit())
- * then rcu_read_lock_held() sets *ret to false even if the CPU did an
+ * then rcu_read_lock_held() sets ``*ret`` to false even if the CPU did an
  * rcu_read_lock().  The reason for this is that RCU ignores CPUs that are
  * in such a section, considering these as in extended quiescent state,
  * so such a CPU is effectively never in an RCU read-side critical section
@@ -98,15 +112,15 @@ module_param(rcu_normal_after_boot, int, 0);
 static bool rcu_read_lock_held_common(bool *ret)
 {
 	if (!debug_lockdep_rcu_enabled()) {
-		*ret = 1;
+		*ret = true;
 		return true;
 	}
 	if (!rcu_is_watching()) {
-		*ret = 0;
+		*ret = false;
 		return true;
 	}
 	if (!rcu_lockdep_current_cpu_online()) {
-		*ret = 0;
+		*ret = false;
 		return true;
 	}
 	return false;
@@ -270,13 +284,12 @@ struct lockdep_map rcu_callback_map =
 	STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key);
 EXPORT_SYMBOL_GPL(rcu_callback_map);
 
-int notrace debug_lockdep_rcu_enabled(void)
+noinstr int notrace debug_lockdep_rcu_enabled(void)
 {
 	return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
 	       current->lockdep_recursion == 0;
 }
 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
-NOKPROBE_SYMBOL(debug_lockdep_rcu_enabled);
 
 /**
  * rcu_read_lock_held() - might we be in RCU read-side critical section?
@@ -501,370 +514,6 @@ int rcu_cpu_stall_suppress_at_boot __read_mostly; // !0 = suppress boot stalls.
 EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress_at_boot);
 module_param(rcu_cpu_stall_suppress_at_boot, int, 0444);
 
-#ifdef CONFIG_TASKS_RCU
-
-/*
- * Simple variant of RCU whose quiescent states are voluntary context
- * switch, cond_resched_rcu_qs(), user-space execution, and idle.
- * As such, grace periods can take one good long time.  There are no
- * read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
- * because this implementation is intended to get the system into a safe
- * state for some of the manipulations involved in tracing and the like.
- * Finally, this implementation does not support high call_rcu_tasks()
- * rates from multiple CPUs.  If this is required, per-CPU callback lists
- * will be needed.
- */
-
-/* Global list of callbacks and associated lock. */
-static struct rcu_head *rcu_tasks_cbs_head;
-static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
-static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
-static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
-
-/* Track exiting tasks in order to allow them to be waited for. */
-DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
-
-/* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
-#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
-static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
-module_param(rcu_task_stall_timeout, int, 0644);
-
-static struct task_struct *rcu_tasks_kthread_ptr;
-
-/**
- * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
- * @rhp: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_tasks() assumes
- * that the read-side critical sections end at a voluntary context
- * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
- * or transition to usermode execution.  As such, there are no read-side
- * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
- * this primitive is intended to determine that all tasks have passed
- * through a safe state, not so much for data-strcuture synchronization.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
-void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
-{
-	unsigned long flags;
-	bool needwake;
-
-	rhp->next = NULL;
-	rhp->func = func;
-	raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
-	needwake = !rcu_tasks_cbs_head;
-	WRITE_ONCE(*rcu_tasks_cbs_tail, rhp);
-	rcu_tasks_cbs_tail = &rhp->next;
-	raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
-	/* We can't create the thread unless interrupts are enabled. */
-	if (needwake && READ_ONCE(rcu_tasks_kthread_ptr))
-		wake_up(&rcu_tasks_cbs_wq);
-}
-EXPORT_SYMBOL_GPL(call_rcu_tasks);
-
-/**
- * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu-tasks
- * grace period has elapsed, in other words after all currently
- * executing rcu-tasks read-side critical sections have elapsed.  These
- * read-side critical sections are delimited by calls to schedule(),
- * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
- * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
- *
- * This is a very specialized primitive, intended only for a few uses in
- * tracing and other situations requiring manipulation of function
- * preambles and profiling hooks.  The synchronize_rcu_tasks() function
- * is not (yet) intended for heavy use from multiple CPUs.
- *
- * Note that this guarantee implies further memory-ordering guarantees.
- * On systems with more than one CPU, when synchronize_rcu_tasks() returns,
- * each CPU is guaranteed to have executed a full memory barrier since the
- * end of its last RCU-tasks read-side critical section whose beginning
- * preceded the call to synchronize_rcu_tasks().  In addition, each CPU
- * having an RCU-tasks read-side critical section that extends beyond
- * the return from synchronize_rcu_tasks() is guaranteed to have executed
- * a full memory barrier after the beginning of synchronize_rcu_tasks()
- * and before the beginning of that RCU-tasks read-side critical section.
- * Note that these guarantees include CPUs that are offline, idle, or
- * executing in user mode, as well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
- * to its caller on CPU B, then both CPU A and CPU B are guaranteed
- * to have executed a full memory barrier during the execution of
- * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
- * (but again only if the system has more than one CPU).
- */
-void synchronize_rcu_tasks(void)
-{
-	/* Complain if the scheduler has not started.  */
-	RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
-			 "synchronize_rcu_tasks called too soon");
-
-	/* Wait for the grace period. */
-	wait_rcu_gp(call_rcu_tasks);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
-
-/**
- * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
- *
- * Although the current implementation is guaranteed to wait, it is not
- * obligated to, for example, if there are no pending callbacks.
- */
-void rcu_barrier_tasks(void)
-{
-	/* There is only one callback queue, so this is easy.  ;-) */
-	synchronize_rcu_tasks();
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
-
-/* See if tasks are still holding out, complain if so. */
-static void check_holdout_task(struct task_struct *t,
-			       bool needreport, bool *firstreport)
-{
-	int cpu;
-
-	if (!READ_ONCE(t->rcu_tasks_holdout) ||
-	    t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
-	    !READ_ONCE(t->on_rq) ||
-	    (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
-	     !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
-		WRITE_ONCE(t->rcu_tasks_holdout, false);
-		list_del_init(&t->rcu_tasks_holdout_list);
-		put_task_struct(t);
-		return;
-	}
-	rcu_request_urgent_qs_task(t);
-	if (!needreport)
-		return;
-	if (*firstreport) {
-		pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
-		*firstreport = false;
-	}
-	cpu = task_cpu(t);
-	pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
-		 t, ".I"[is_idle_task(t)],
-		 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
-		 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
-		 t->rcu_tasks_idle_cpu, cpu);
-	sched_show_task(t);
-}
-
-/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
-static int __noreturn rcu_tasks_kthread(void *arg)
-{
-	unsigned long flags;
-	struct task_struct *g, *t;
-	unsigned long lastreport;
-	struct rcu_head *list;
-	struct rcu_head *next;
-	LIST_HEAD(rcu_tasks_holdouts);
-	int fract;
-
-	/* Run on housekeeping CPUs by default.  Sysadm can move if desired. */
-	housekeeping_affine(current, HK_FLAG_RCU);
-
-	/*
-	 * Each pass through the following loop makes one check for
-	 * newly arrived callbacks, and, if there are some, waits for
-	 * one RCU-tasks grace period and then invokes the callbacks.
-	 * This loop is terminated by the system going down.  ;-)
-	 */
-	for (;;) {
-
-		/* Pick up any new callbacks. */
-		raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
-		list = rcu_tasks_cbs_head;
-		rcu_tasks_cbs_head = NULL;
-		rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
-		raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
-
-		/* If there were none, wait a bit and start over. */
-		if (!list) {
-			wait_event_interruptible(rcu_tasks_cbs_wq,
-						 READ_ONCE(rcu_tasks_cbs_head));
-			if (!rcu_tasks_cbs_head) {
-				WARN_ON(signal_pending(current));
-				schedule_timeout_interruptible(HZ/10);
-			}
-			continue;
-		}
-
-		/*
-		 * Wait for all pre-existing t->on_rq and t->nvcsw
-		 * transitions to complete.  Invoking synchronize_rcu()
-		 * suffices because all these transitions occur with
-		 * interrupts disabled.  Without this synchronize_rcu(),
-		 * a read-side critical section that started before the
-		 * grace period might be incorrectly seen as having started
-		 * after the grace period.
-		 *
-		 * This synchronize_rcu() also dispenses with the
-		 * need for a memory barrier on the first store to
-		 * ->rcu_tasks_holdout, as it forces the store to happen
-		 * after the beginning of the grace period.
-		 */
-		synchronize_rcu();
-
-		/*
-		 * There were callbacks, so we need to wait for an
-		 * RCU-tasks grace period.  Start off by scanning
-		 * the task list for tasks that are not already
-		 * voluntarily blocked.  Mark these tasks and make
-		 * a list of them in rcu_tasks_holdouts.
-		 */
-		rcu_read_lock();
-		for_each_process_thread(g, t) {
-			if (t != current && READ_ONCE(t->on_rq) &&
-			    !is_idle_task(t)) {
-				get_task_struct(t);
-				t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
-				WRITE_ONCE(t->rcu_tasks_holdout, true);
-				list_add(&t->rcu_tasks_holdout_list,
-					 &rcu_tasks_holdouts);
-			}
-		}
-		rcu_read_unlock();
-
-		/*
-		 * Wait for tasks that are in the process of exiting.
-		 * This does only part of the job, ensuring that all
-		 * tasks that were previously exiting reach the point
-		 * where they have disabled preemption, allowing the
-		 * later synchronize_rcu() to finish the job.
-		 */
-		synchronize_srcu(&tasks_rcu_exit_srcu);
-
-		/*
-		 * Each pass through the following loop scans the list
-		 * of holdout tasks, removing any that are no longer
-		 * holdouts.  When the list is empty, we are done.
-		 */
-		lastreport = jiffies;
-
-		/* Start off with HZ/10 wait and slowly back off to 1 HZ wait*/
-		fract = 10;
-
-		for (;;) {
-			bool firstreport;
-			bool needreport;
-			int rtst;
-			struct task_struct *t1;
-
-			if (list_empty(&rcu_tasks_holdouts))
-				break;
-
-			/* Slowly back off waiting for holdouts */
-			schedule_timeout_interruptible(HZ/fract);
-
-			if (fract > 1)
-				fract--;
-
-			rtst = READ_ONCE(rcu_task_stall_timeout);
-			needreport = rtst > 0 &&
-				     time_after(jiffies, lastreport + rtst);
-			if (needreport)
-				lastreport = jiffies;
-			firstreport = true;
-			WARN_ON(signal_pending(current));
-			list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
-						rcu_tasks_holdout_list) {
-				check_holdout_task(t, needreport, &firstreport);
-				cond_resched();
-			}
-		}
-
-		/*
-		 * Because ->on_rq and ->nvcsw are not guaranteed
-		 * to have a full memory barriers prior to them in the
-		 * schedule() path, memory reordering on other CPUs could
-		 * cause their RCU-tasks read-side critical sections to
-		 * extend past the end of the grace period.  However,
-		 * because these ->nvcsw updates are carried out with
-		 * interrupts disabled, we can use synchronize_rcu()
-		 * to force the needed ordering on all such CPUs.
-		 *
-		 * This synchronize_rcu() also confines all
-		 * ->rcu_tasks_holdout accesses to be within the grace
-		 * period, avoiding the need for memory barriers for
-		 * ->rcu_tasks_holdout accesses.
-		 *
-		 * In addition, this synchronize_rcu() waits for exiting
-		 * tasks to complete their final preempt_disable() region
-		 * of execution, cleaning up after the synchronize_srcu()
-		 * above.
-		 */
-		synchronize_rcu();
-
-		/* Invoke the callbacks. */
-		while (list) {
-			next = list->next;
-			local_bh_disable();
-			list->func(list);
-			local_bh_enable();
-			list = next;
-			cond_resched();
-		}
-		/* Paranoid sleep to keep this from entering a tight loop */
-		schedule_timeout_uninterruptible(HZ/10);
-	}
-}
-
-/* Spawn rcu_tasks_kthread() at core_initcall() time. */
-static int __init rcu_spawn_tasks_kthread(void)
-{
-	struct task_struct *t;
-
-	t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
-	if (WARN_ONCE(IS_ERR(t), "%s: Could not start Tasks-RCU grace-period kthread, OOM is now expected behavior\n", __func__))
-		return 0;
-	smp_mb(); /* Ensure others see full kthread. */
-	WRITE_ONCE(rcu_tasks_kthread_ptr, t);
-	return 0;
-}
-core_initcall(rcu_spawn_tasks_kthread);
-
-/* Do the srcu_read_lock() for the above synchronize_srcu().  */
-void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
-{
-	preempt_disable();
-	current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
-	preempt_enable();
-}
-
-/* Do the srcu_read_unlock() for the above synchronize_srcu().  */
-void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
-{
-	preempt_disable();
-	__srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);
-	preempt_enable();
-}
-
-#endif /* #ifdef CONFIG_TASKS_RCU */
-
-#ifndef CONFIG_TINY_RCU
-
-/*
- * Print any non-default Tasks RCU settings.
- */
-static void __init rcu_tasks_bootup_oddness(void)
-{
-#ifdef CONFIG_TASKS_RCU
-	if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
-		pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
-	else
-		pr_info("\tTasks RCU enabled.\n");
-#endif /* #ifdef CONFIG_TASKS_RCU */
-}
-
-#endif /* #ifndef CONFIG_TINY_RCU */
-
 #ifdef CONFIG_PROVE_RCU
 
 /*
@@ -935,6 +584,8 @@ late_initcall(rcu_verify_early_boot_tests);
 void rcu_early_boot_tests(void) {}
 #endif /* CONFIG_PROVE_RCU */
 
+#include "tasks.h"
+
 #ifndef CONFIG_TINY_RCU
 
 /*
diff --git a/kernel/reboot.c b/kernel/reboot.c
index c4d472b7f1b4..491f1347bf43 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -250,7 +250,7 @@ void kernel_restart(char *cmd)
 		pr_emerg("Restarting system\n");
 	else
 		pr_emerg("Restarting system with command '%s'\n", cmd);
-	kmsg_dump(KMSG_DUMP_RESTART);
+	kmsg_dump(KMSG_DUMP_SHUTDOWN);
 	machine_restart(cmd);
 }
 EXPORT_SYMBOL_GPL(kernel_restart);
@@ -274,7 +274,7 @@ void kernel_halt(void)
 	migrate_to_reboot_cpu();
 	syscore_shutdown();
 	pr_emerg("System halted\n");
-	kmsg_dump(KMSG_DUMP_HALT);
+	kmsg_dump(KMSG_DUMP_SHUTDOWN);
 	machine_halt();
 }
 EXPORT_SYMBOL_GPL(kernel_halt);
@@ -292,7 +292,7 @@ void kernel_power_off(void)
 	migrate_to_reboot_cpu();
 	syscore_shutdown();
 	pr_emerg("Power down\n");
-	kmsg_dump(KMSG_DUMP_POWEROFF);
+	kmsg_dump(KMSG_DUMP_SHUTDOWN);
 	machine_power_off();
 }
 EXPORT_SYMBOL_GPL(kernel_power_off);
diff --git a/kernel/relay.c b/kernel/relay.c
index ade14fb7ce2e..90c7a002436d 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1,7 +1,7 @@
 /*
  * Public API and common code for kernel->userspace relay file support.
  *
- * See Documentation/filesystems/relay.txt for an overview.
+ * See Documentation/filesystems/relay.rst for an overview.
  *
  * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
  * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
@@ -1177,10 +1177,9 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
 }
 
 static const struct pipe_buf_operations relay_pipe_buf_ops = {
-	.confirm = generic_pipe_buf_confirm,
-	.release = relay_pipe_buf_release,
-	.steal = generic_pipe_buf_steal,
-	.get = generic_pipe_buf_get,
+	.release	= relay_pipe_buf_release,
+	.try_steal	= generic_pipe_buf_try_steal,
+	.get		= generic_pipe_buf_get,
 };
 
 static void relay_page_release(struct splice_pipe_desc *spd, unsigned int i)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3e89a042a48f..8298b2c240ce 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
 #include <linux/nospec.h>
 
 #include <linux/kcov.h>
+#include <linux/scs.h>
 
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
@@ -20,6 +21,7 @@
 #include "../smpboot.h"
 
 #include "pelt.h"
+#include "smp.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
@@ -219,6 +221,13 @@ void update_rq_clock(struct rq *rq)
 	update_rq_clock_task(rq, delta);
 }
 
+static inline void
+rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func)
+{
+	csd->flags = 0;
+	csd->func = func;
+	csd->info = rq;
+}
 
 #ifdef CONFIG_SCHED_HRTICK
 /*
@@ -314,16 +323,14 @@ void hrtick_start(struct rq *rq, u64 delay)
 	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
 		      HRTIMER_MODE_REL_PINNED_HARD);
 }
+
 #endif /* CONFIG_SMP */
 
 static void hrtick_rq_init(struct rq *rq)
 {
 #ifdef CONFIG_SMP
-	rq->hrtick_csd.flags = 0;
-	rq->hrtick_csd.func = __hrtick_start;
-	rq->hrtick_csd.info = rq;
+	rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start);
 #endif
-
 	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	rq->hrtick_timer.function = hrtick;
 }
@@ -632,29 +639,23 @@ void wake_up_nohz_cpu(int cpu)
 		wake_up_idle_cpu(cpu);
 }
 
-static inline bool got_nohz_idle_kick(void)
+static void nohz_csd_func(void *info)
 {
-	int cpu = smp_processor_id();
-
-	if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
-		return false;
-
-	if (idle_cpu(cpu) && !need_resched())
-		return true;
+	struct rq *rq = info;
+	int cpu = cpu_of(rq);
+	unsigned int flags;
 
 	/*
-	 * We can't run Idle Load Balance on this CPU for this time so we
-	 * cancel it and clear NOHZ_BALANCE_KICK
+	 * Release the rq::nohz_csd.
 	 */
-	atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
-	return false;
-}
-
-#else /* CONFIG_NO_HZ_COMMON */
+	flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
+	WARN_ON(!(flags & NOHZ_KICK_MASK));
 
-static inline bool got_nohz_idle_kick(void)
-{
-	return false;
+	rq->idle_balance = idle_cpu(cpu);
+	if (rq->idle_balance && !need_resched()) {
+		rq->nohz_idle_balance = flags;
+		raise_softirq_irqoff(SCHED_SOFTIRQ);
+	}
 }
 
 #endif /* CONFIG_NO_HZ_COMMON */
@@ -1538,7 +1539,7 @@ static int migration_cpu_stop(void *data)
 	 * __migrate_task() such that we will not miss enforcing cpus_ptr
 	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
 	 */
-	sched_ttwu_pending();
+	flush_smp_call_function_from_idle();
 
 	raw_spin_lock(&p->pi_lock);
 	rq_lock(rq, &rf);
@@ -2272,16 +2273,23 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
 }
 
 #ifdef CONFIG_SMP
-void sched_ttwu_pending(void)
+void sched_ttwu_pending(void *arg)
 {
+	struct llist_node *llist = arg;
 	struct rq *rq = this_rq();
-	struct llist_node *llist = llist_del_all(&rq->wake_list);
 	struct task_struct *p, *t;
 	struct rq_flags rf;
 
 	if (!llist)
 		return;
 
+	/*
+	 * rq::ttwu_pending racy indication of out-standing wakeups.
+	 * Races such that false-negatives are possible, since they
+	 * are shorter lived that false-positives would be.
+	 */
+	WRITE_ONCE(rq->ttwu_pending, 0);
+
 	rq_lock_irqsave(rq, &rf);
 	update_rq_clock(rq);
 
@@ -2291,56 +2299,30 @@ void sched_ttwu_pending(void)
 	rq_unlock_irqrestore(rq, &rf);
 }
 
-void scheduler_ipi(void)
+void send_call_function_single_ipi(int cpu)
 {
-	/*
-	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
-	 * TIF_NEED_RESCHED remotely (for the first time) will also send
-	 * this IPI.
-	 */
-	preempt_fold_need_resched();
-
-	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
-		return;
-
-	/*
-	 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
-	 * traditionally all their work was done from the interrupt return
-	 * path. Now that we actually do some work, we need to make sure
-	 * we do call them.
-	 *
-	 * Some archs already do call them, luckily irq_enter/exit nest
-	 * properly.
-	 *
-	 * Arguably we should visit all archs and update all handlers,
-	 * however a fair share of IPIs are still resched only so this would
-	 * somewhat pessimize the simple resched case.
-	 */
-	irq_enter();
-	sched_ttwu_pending();
+	struct rq *rq = cpu_rq(cpu);
 
-	/*
-	 * Check if someone kicked us for doing the nohz idle load balance.
-	 */
-	if (unlikely(got_nohz_idle_kick())) {
-		this_rq()->idle_balance = 1;
-		raise_softirq_irqoff(SCHED_SOFTIRQ);
-	}
-	irq_exit();
+	if (!set_nr_if_polling(rq->idle))
+		arch_send_call_function_single_ipi(cpu);
+	else
+		trace_sched_wake_idle_without_ipi(cpu);
 }
 
-static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
+/*
+ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if
+ * necessary. The wakee CPU on receipt of the IPI will queue the task
+ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost
+ * of the wakeup instead of the waker.
+ */
+static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
 {
 	struct rq *rq = cpu_rq(cpu);
 
 	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
 
-	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
-		if (!set_nr_if_polling(rq->idle))
-			smp_send_reschedule(cpu);
-		else
-			trace_sched_wake_idle_without_ipi(cpu);
-	}
+	WRITE_ONCE(rq->ttwu_pending, 1);
+	__smp_call_single_queue(cpu, &p->wake_entry);
 }
 
 void wake_up_if_idle(int cpu)
@@ -2371,6 +2353,38 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
 {
 	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
 }
+
+static inline bool ttwu_queue_cond(int cpu, int wake_flags)
+{
+	/*
+	 * If the CPU does not share cache, then queue the task on the
+	 * remote rqs wakelist to avoid accessing remote data.
+	 */
+	if (!cpus_share_cache(smp_processor_id(), cpu))
+		return true;
+
+	/*
+	 * If the task is descheduling and the only running task on the
+	 * CPU then use the wakelist to offload the task activation to
+	 * the soon-to-be-idle CPU as the current CPU is likely busy.
+	 * nr_running is checked to avoid unnecessary task stacking.
+	 */
+	if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1)
+		return true;
+
+	return false;
+}
+
+static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
+{
+	if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
+		__ttwu_queue_wakelist(p, cpu, wake_flags);
+		return true;
+	}
+
+	return false;
+}
 #endif /* CONFIG_SMP */
 
 static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
@@ -2379,11 +2393,8 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
 	struct rq_flags rf;
 
 #if defined(CONFIG_SMP)
-	if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
-		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-		ttwu_queue_remote(p, cpu, wake_flags);
+	if (ttwu_queue_wakelist(p, cpu, wake_flags))
 		return;
-	}
 #endif
 
 	rq_lock(rq, &rf);
@@ -2560,12 +2571,22 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 *
 	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
 	 * __schedule().  See the comment for smp_mb__after_spinlock().
+	 *
+	 * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
 	 */
 	smp_rmb();
 	if (p->on_rq && ttwu_remote(p, wake_flags))
 		goto unlock;
 
+	if (p->in_iowait) {
+		delayacct_blkio_end(p);
+		atomic_dec(&task_rq(p)->nr_iowait);
+	}
+
 #ifdef CONFIG_SMP
+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
+	p->state = TASK_WAKING;
+
 	/*
 	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
 	 * possible to, falsely, observe p->on_cpu == 0.
@@ -2589,6 +2610,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
 	/*
 	 * If the owning (remote) CPU is still in the middle of schedule() with
+	 * this task as prev, considering queueing p on the remote CPUs wake_list
+	 * which potentially sends an IPI instead of spinning on p->on_cpu to
+	 * let the waker make forward progress. This is safe because IRQs are
+	 * disabled and the IPI will deliver after on_cpu is cleared.
+	 */
+	if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ))
+		goto unlock;
+
+	/*
+	 * If the owning (remote) CPU is still in the middle of schedule() with
 	 * this task as prev, wait until its done referencing the task.
 	 *
 	 * Pairs with the smp_store_release() in finish_task().
@@ -2598,28 +2629,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 */
 	smp_cond_load_acquire(&p->on_cpu, !VAL);
 
-	p->sched_contributes_to_load = !!task_contributes_to_load(p);
-	p->state = TASK_WAKING;
-
-	if (p->in_iowait) {
-		delayacct_blkio_end(p);
-		atomic_dec(&task_rq(p)->nr_iowait);
-	}
-
 	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
 	if (task_cpu(p) != cpu) {
 		wake_flags |= WF_MIGRATED;
 		psi_ttwu_dequeue(p);
 		set_task_cpu(p, cpu);
 	}
-
-#else /* CONFIG_SMP */
-
-	if (p->in_iowait) {
-		delayacct_blkio_end(p);
-		atomic_dec(&task_rq(p)->nr_iowait);
-	}
-
 #endif /* CONFIG_SMP */
 
 	ttwu_queue(p, cpu, wake_flags);
@@ -2634,6 +2649,52 @@ out:
 }
 
 /**
+ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state
+ * @p: Process for which the function is to be invoked.
+ * @func: Function to invoke.
+ * @arg: Argument to function.
+ *
+ * If the specified task can be quickly locked into a definite state
+ * (either sleeping or on a given runqueue), arrange to keep it in that
+ * state while invoking @func(@arg).  This function can use ->on_rq and
+ * task_curr() to work out what the state is, if required.  Given that
+ * @func can be invoked with a runqueue lock held, it had better be quite
+ * lightweight.
+ *
+ * Returns:
+ *	@false if the task slipped out from under the locks.
+ *	@true if the task was locked onto a runqueue or is sleeping.
+ *		However, @func can override this by returning @false.
+ */
+bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg)
+{
+	bool ret = false;
+	struct rq_flags rf;
+	struct rq *rq;
+
+	lockdep_assert_irqs_enabled();
+	raw_spin_lock_irq(&p->pi_lock);
+	if (p->on_rq) {
+		rq = __task_rq_lock(p, &rf);
+		if (task_rq(p) == rq)
+			ret = func(p, arg);
+		rq_unlock(rq, &rf);
+	} else {
+		switch (p->state) {
+		case TASK_RUNNING:
+		case TASK_WAKING:
+			break;
+		default:
+			smp_rmb(); // See smp_rmb() comment in try_to_wake_up().
+			if (!p->on_rq)
+				ret = func(p, arg);
+		}
+	}
+	raw_spin_unlock_irq(&p->pi_lock);
+	return ret;
+}
+
+/**
  * wake_up_process - Wake up a specific process
  * @p: The process to be woken up.
  *
@@ -2701,6 +2762,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->capture_control = NULL;
 #endif
 	init_numa_balancing(clone_flags, p);
+#ifdef CONFIG_SMP
+	p->wake_entry_type = CSD_TYPE_TTWU;
+#endif
 }
 
 DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -3876,6 +3940,9 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
 #ifdef CONFIG_SCHED_STACK_END_CHECK
 	if (task_stack_end_corrupted(prev))
 		panic("corrupted stack end detected inside scheduler\n");
+
+	if (task_scs_end_corrupted(prev))
+		panic("corrupted shadow stack detected inside scheduler\n");
 #endif
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
@@ -3898,6 +3965,28 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
 	schedstat_inc(this_rq()->sched_count);
 }
 
+static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
+				  struct rq_flags *rf)
+{
+#ifdef CONFIG_SMP
+	const struct sched_class *class;
+	/*
+	 * We must do the balancing pass before put_prev_task(), such
+	 * that when we release the rq->lock the task is in the same
+	 * state as before we took rq->lock.
+	 *
+	 * We can terminate the balance pass as soon as we know there is
+	 * a runnable task of @class priority or higher.
+	 */
+	for_class_range(class, prev->sched_class, &idle_sched_class) {
+		if (class->balance(rq, prev, rf))
+			break;
+	}
+#endif
+
+	put_prev_task(rq, prev);
+}
+
 /*
  * Pick up the highest-prio task:
  */
@@ -3931,22 +4020,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	}
 
 restart:
-#ifdef CONFIG_SMP
-	/*
-	 * We must do the balancing pass before put_next_task(), such
-	 * that when we release the rq->lock the task is in the same
-	 * state as before we took rq->lock.
-	 *
-	 * We can terminate the balance pass as soon as we know there is
-	 * a runnable task of @class priority or higher.
-	 */
-	for_class_range(class, prev->sched_class, &idle_sched_class) {
-		if (class->balance(rq, prev, rf))
-			break;
-	}
-#endif
-
-	put_prev_task(rq, prev);
+	put_prev_task_balance(rq, prev, rf);
 
 	for_each_class(class) {
 		p = class->pick_next_task(rq);
@@ -4636,7 +4710,7 @@ int idle_cpu(int cpu)
 		return 0;
 
 #ifdef CONFIG_SMP
-	if (!llist_empty(&rq->wake_list))
+	if (rq->ttwu_pending)
 		return 0;
 #endif
 
@@ -6039,6 +6113,7 @@ void init_idle(struct task_struct *idle, int cpu)
 	idle->se.exec_start = sched_clock();
 	idle->flags |= PF_IDLE;
 
+	scs_task_reset(idle);
 	kasan_unpoison_task_stack(idle);
 
 #ifdef CONFIG_SMP
@@ -6189,13 +6264,14 @@ void idle_task_exit(void)
 	struct mm_struct *mm = current->active_mm;
 
 	BUG_ON(cpu_online(smp_processor_id()));
+	BUG_ON(current != this_rq()->idle);
 
 	if (mm != &init_mm) {
 		switch_mm(mm, &init_mm, current);
-		current->active_mm = &init_mm;
 		finish_arch_post_lock_switch();
 	}
-	mmdrop(mm);
+
+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
 }
 
 /*
@@ -6485,7 +6561,6 @@ int sched_cpu_dying(unsigned int cpu)
 	struct rq_flags rf;
 
 	/* Handle pending wakeups and then migrate everything off */
-	sched_ttwu_pending();
 	sched_tick_stop(cpu);
 
 	rq_lock_irqsave(rq, &rf);
@@ -6588,6 +6663,8 @@ void __init sched_init(void)
 		root_task_group.cfs_rq = (struct cfs_rq **)ptr;
 		ptr += nr_cpu_ids * sizeof(void **);
 
+		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
+		init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 #ifdef CONFIG_RT_GROUP_SCHED
 		root_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -6640,7 +6717,6 @@ void __init sched_init(void)
 		init_rt_rq(&rq->rt);
 		init_dl_rq(&rq->dl);
 #ifdef CONFIG_FAIR_GROUP_SCHED
-		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
 		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
 		rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
 		/*
@@ -6662,7 +6738,6 @@ void __init sched_init(void)
 		 * We achieve this by letting root_task_group's tasks sit
 		 * directly in rq->cfs (i.e root_task_group->se[] = NULL).
 		 */
-		init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
 		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
@@ -6690,6 +6765,8 @@ void __init sched_init(void)
 #ifdef CONFIG_NO_HZ_COMMON
 		rq->last_blocked_load_update_tick = jiffies;
 		atomic_set(&rq->nohz_flags, 0);
+
+		rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
 #endif
 #endif /* CONFIG_SMP */
 		hrtick_rq_init(rq);
@@ -7384,6 +7461,8 @@ static DEFINE_MUTEX(cfs_constraints_mutex);
 
 const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
 static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
+/* More than 203 days if BW_SHIFT equals 20. */
+static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
 
 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
 
@@ -7412,6 +7491,12 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
 		return -EINVAL;
 
 	/*
+	 * Bound quota to defend quota against overflow during bandwidth shift.
+	 */
+	if (quota != RUNTIME_INF && quota > max_cfs_runtime)
+		return -EINVAL;
+
+	/*
 	 * Prevent race between setting of cfs_rq->runtime_enabled and
 	 * unthrottle_offline_cfs_rqs().
 	 */
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 9fbb10383434..941c28cf9738 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -5,6 +5,7 @@
  * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
  * (balbir@in.ibm.com).
  */
+#include <asm/irq_regs.h>
 #include "sched.h"
 
 /* Time spent by the tasks of the CPU accounting group executing in ... */
@@ -339,7 +340,7 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
 	int index = CPUACCT_STAT_SYSTEM;
-	struct pt_regs *regs = task_pt_regs(tsk);
+	struct pt_regs *regs = get_irq_regs() ? : task_pt_regs(tsk);
 
 	if (regs && user_mode(regs))
 		index = CPUACCT_STAT_USER;
@@ -347,7 +348,7 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 	rcu_read_lock();
 
 	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
-		this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;
+		__this_cpu_add(ca->cpuusage->usages[index], cputime);
 
 	rcu_read_unlock();
 }
@@ -363,7 +364,7 @@ void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
 
 	rcu_read_lock();
 	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
-		this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
+		__this_cpu_add(ca->cpustat->cpustat[index], val);
 	rcu_read_unlock();
 }
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 239970b991c0..36c54265bb2b 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -258,7 +258,7 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
 	set_table_entry(&table[2], "busy_factor",	  &sd->busy_factor,	    sizeof(int),  0644, proc_dointvec_minmax);
 	set_table_entry(&table[3], "imbalance_pct",	  &sd->imbalance_pct,	    sizeof(int),  0644, proc_dointvec_minmax);
 	set_table_entry(&table[4], "cache_nice_tries",	  &sd->cache_nice_tries,    sizeof(int),  0644, proc_dointvec_minmax);
-	set_table_entry(&table[5], "flags",		  &sd->flags,		    sizeof(int),  0644, proc_dointvec_minmax);
+	set_table_entry(&table[5], "flags",		  &sd->flags,		    sizeof(int),  0444, proc_dointvec_minmax);
 	set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
 	set_table_entry(&table[7], "name",		  sd->name,	       CORENAME_MAX_SIZE, 0444, proc_dostring);
 	/* &table[8] is terminator */
@@ -437,7 +437,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 	else
 		SEQ_printf(m, " %c", task_state_to_char(p));
 
-	SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
+	SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ",
 		p->comm, task_pid_nr(p),
 		SPLIT_NS(p->se.vruntime),
 		(long long)(p->nvcsw + p->nivcsw),
@@ -464,10 +464,10 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 
 	SEQ_printf(m, "\n");
 	SEQ_printf(m, "runnable tasks:\n");
-	SEQ_printf(m, " S           task   PID         tree-key  switches  prio"
+	SEQ_printf(m, " S            task   PID         tree-key  switches  prio"
 		   "     wait-time             sum-exec        sum-sleep\n");
 	SEQ_printf(m, "-------------------------------------------------------"
-		   "----------------------------------------------------\n");
+		   "------------------------------------------------------\n");
 
 	rcu_read_lock();
 	for_each_process_thread(g, p) {
@@ -638,7 +638,6 @@ do {									\
 
 	P(nr_running);
 	P(nr_switches);
-	P(nr_load_updates);
 	P(nr_uninterruptible);
 	PN(next_balance);
 	SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 52c82b2c94dc..35f4cc024dcf 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -191,7 +191,7 @@ static void update_sysctl(void)
 #undef SET_SYSCTL
 }
 
-void sched_init_granularity(void)
+void __init sched_init_granularity(void)
 {
 	update_sysctl();
 }
@@ -1093,7 +1093,7 @@ struct numa_group {
 	 * more by CPU use than by memory faults.
 	 */
 	unsigned long *faults_cpu;
-	unsigned long faults[0];
+	unsigned long faults[];
 };
 
 /*
@@ -3440,52 +3440,46 @@ static inline void
 update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
 {
 	long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
+	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
 	/* Nothing to update */
 	if (!delta)
 		return;
 
-	/*
-	 * The relation between sum and avg is:
-	 *
-	 *   LOAD_AVG_MAX - 1024 + sa->period_contrib
-	 *
-	 * however, the PELT windows are not aligned between grq and gse.
-	 */
-
 	/* Set new sched_entity's utilization */
 	se->avg.util_avg = gcfs_rq->avg.util_avg;
-	se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
+	se->avg.util_sum = se->avg.util_avg * divider;
 
 	/* Update parent cfs_rq utilization */
 	add_positive(&cfs_rq->avg.util_avg, delta);
-	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
+	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
 }
 
 static inline void
 update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
 {
 	long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
+	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
 	/* Nothing to update */
 	if (!delta)
 		return;
 
-	/*
-	 * The relation between sum and avg is:
-	 *
-	 *   LOAD_AVG_MAX - 1024 + sa->period_contrib
-	 *
-	 * however, the PELT windows are not aligned between grq and gse.
-	 */
-
 	/* Set new sched_entity's runnable */
 	se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
-	se->avg.runnable_sum = se->avg.runnable_avg * LOAD_AVG_MAX;
+	se->avg.runnable_sum = se->avg.runnable_avg * divider;
 
 	/* Update parent cfs_rq runnable */
 	add_positive(&cfs_rq->avg.runnable_avg, delta);
-	cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * LOAD_AVG_MAX;
+	cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
 }
 
 static inline void
@@ -3495,19 +3489,26 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
 	unsigned long load_avg;
 	u64 load_sum = 0;
 	s64 delta_sum;
+	u32 divider;
 
 	if (!runnable_sum)
 		return;
 
 	gcfs_rq->prop_runnable_sum = 0;
 
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
+	divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
+
 	if (runnable_sum >= 0) {
 		/*
 		 * Add runnable; clip at LOAD_AVG_MAX. Reflects that until
 		 * the CPU is saturated running == runnable.
 		 */
 		runnable_sum += se->avg.load_sum;
-		runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX);
+		runnable_sum = min_t(long, runnable_sum, divider);
 	} else {
 		/*
 		 * Estimate the new unweighted runnable_sum of the gcfs_rq by
@@ -3532,7 +3533,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
 	runnable_sum = max(runnable_sum, running_sum);
 
 	load_sum = (s64)se_weight(se) * runnable_sum;
-	load_avg = div_s64(load_sum, LOAD_AVG_MAX);
+	load_avg = div_s64(load_sum, divider);
 
 	delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
 	delta_avg = load_avg - se->avg.load_avg;
@@ -3696,6 +3697,10 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
  */
 static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
 	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
 	/*
@@ -3872,6 +3877,8 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
 	return cfs_rq->avg.load_avg;
 }
 
+static int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
+
 static inline unsigned long task_util(struct task_struct *p)
 {
 	return READ_ONCE(p->se.avg.util_avg);
@@ -4053,7 +4060,7 @@ attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static inline void
 detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 
-static inline int idle_balance(struct rq *rq, struct rq_flags *rf)
+static inline int newidle_balance(struct rq *rq, struct rq_flags *rf)
 {
 	return 0;
 }
@@ -4587,16 +4594,16 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
 }
 
 /* returns 0 on failure to allocate runtime */
-static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
+static int __assign_cfs_rq_runtime(struct cfs_bandwidth *cfs_b,
+				   struct cfs_rq *cfs_rq, u64 target_runtime)
 {
-	struct task_group *tg = cfs_rq->tg;
-	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
-	u64 amount = 0, min_amount;
+	u64 min_amount, amount = 0;
+
+	lockdep_assert_held(&cfs_b->lock);
 
 	/* note: this is a positive sum as runtime_remaining <= 0 */
-	min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
+	min_amount = target_runtime - cfs_rq->runtime_remaining;
 
-	raw_spin_lock(&cfs_b->lock);
 	if (cfs_b->quota == RUNTIME_INF)
 		amount = min_amount;
 	else {
@@ -4608,13 +4615,25 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 			cfs_b->idle = 0;
 		}
 	}
-	raw_spin_unlock(&cfs_b->lock);
 
 	cfs_rq->runtime_remaining += amount;
 
 	return cfs_rq->runtime_remaining > 0;
 }
 
+/* returns 0 on failure to allocate runtime */
+static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
+{
+	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+	int ret;
+
+	raw_spin_lock(&cfs_b->lock);
+	ret = __assign_cfs_rq_runtime(cfs_b, cfs_rq, sched_cfs_bandwidth_slice());
+	raw_spin_unlock(&cfs_b->lock);
+
+	return ret;
+}
+
 static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
 {
 	/* dock delta_exec before expiring quota (as it could span periods) */
@@ -4703,13 +4722,33 @@ static int tg_throttle_down(struct task_group *tg, void *data)
 	return 0;
 }
 
-static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
+static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	struct rq *rq = rq_of(cfs_rq);
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
 	struct sched_entity *se;
 	long task_delta, idle_task_delta, dequeue = 1;
-	bool empty;
+
+	raw_spin_lock(&cfs_b->lock);
+	/* This will start the period timer if necessary */
+	if (__assign_cfs_rq_runtime(cfs_b, cfs_rq, 1)) {
+		/*
+		 * We have raced with bandwidth becoming available, and if we
+		 * actually throttled the timer might not unthrottle us for an
+		 * entire period. We additionally needed to make sure that any
+		 * subsequent check_cfs_rq_runtime calls agree not to throttle
+		 * us, as we may commit to do cfs put_prev+pick_next, so we ask
+		 * for 1ns of runtime rather than just check cfs_b.
+		 */
+		dequeue = 0;
+	} else {
+		list_add_tail_rcu(&cfs_rq->throttled_list,
+				  &cfs_b->throttled_cfs_rq);
+	}
+	raw_spin_unlock(&cfs_b->lock);
+
+	if (!dequeue)
+		return false;  /* Throttle no longer required. */
 
 	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
 
@@ -4743,29 +4782,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
 	if (!se)
 		sub_nr_running(rq, task_delta);
 
-	cfs_rq->throttled = 1;
-	cfs_rq->throttled_clock = rq_clock(rq);
-	raw_spin_lock(&cfs_b->lock);
-	empty = list_empty(&cfs_b->throttled_cfs_rq);
-
 	/*
-	 * Add to the _head_ of the list, so that an already-started
-	 * distribute_cfs_runtime will not see us. If disribute_cfs_runtime is
-	 * not running add to the tail so that later runqueues don't get starved.
+	 * Note: distribution will already see us throttled via the
+	 * throttled-list.  rq->lock protects completion.
 	 */
-	if (cfs_b->distribute_running)
-		list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
-	else
-		list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
-
-	/*
-	 * If we're the first throttled task, make sure the bandwidth
-	 * timer is running.
-	 */
-	if (empty)
-		start_cfs_bandwidth(cfs_b);
-
-	raw_spin_unlock(&cfs_b->lock);
+	cfs_rq->throttled = 1;
+	cfs_rq->throttled_clock = rq_clock(rq);
+	return true;
 }
 
 void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
@@ -4932,14 +4955,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u
 	/*
 	 * This check is repeated as we release cfs_b->lock while we unthrottle.
 	 */
-	while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
-		cfs_b->distribute_running = 1;
+	while (throttled && cfs_b->runtime > 0) {
 		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 		/* we can't nest cfs_b->lock while distributing bandwidth */
 		distribute_cfs_runtime(cfs_b);
 		raw_spin_lock_irqsave(&cfs_b->lock, flags);
 
-		cfs_b->distribute_running = 0;
 		throttled = !list_empty(&cfs_b->throttled_cfs_rq);
 	}
 
@@ -5053,10 +5074,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 	/* confirm we're still not at a refresh boundary */
 	raw_spin_lock_irqsave(&cfs_b->lock, flags);
 	cfs_b->slack_started = false;
-	if (cfs_b->distribute_running) {
-		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
-		return;
-	}
 
 	if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
 		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
@@ -5066,9 +5083,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 	if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
 		runtime = cfs_b->runtime;
 
-	if (runtime)
-		cfs_b->distribute_running = 1;
-
 	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 
 	if (!runtime)
@@ -5077,7 +5091,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 	distribute_cfs_runtime(cfs_b);
 
 	raw_spin_lock_irqsave(&cfs_b->lock, flags);
-	cfs_b->distribute_running = 0;
 	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 }
 
@@ -5138,8 +5151,7 @@ static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 	if (cfs_rq_throttled(cfs_rq))
 		return true;
 
-	throttle_cfs_rq(cfs_rq);
-	return true;
+	return throttle_cfs_rq(cfs_rq);
 }
 
 static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
@@ -5169,6 +5181,8 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 		if (!overrun)
 			break;
 
+		idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
+
 		if (++count > 3) {
 			u64 new, old = ktime_to_ns(cfs_b->period);
 
@@ -5198,8 +5212,6 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 			/* reset count so we don't come right back in here */
 			count = 0;
 		}
-
-		idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
 	}
 	if (idle)
 		cfs_b->period_active = 0;
@@ -5220,7 +5232,6 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 	cfs_b->period_timer.function = sched_cfs_period_timer;
 	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	cfs_b->slack_timer.function = sched_cfs_slack_timer;
-	cfs_b->distribute_running = 0;
 	cfs_b->slack_started = false;
 }
 
@@ -5505,28 +5516,27 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                        list_add_leaf_cfs_rq(cfs_rq);
 	}
 
-enqueue_throttle:
-	if (!se) {
-		add_nr_running(rq, 1);
-		/*
-		 * Since new tasks are assigned an initial util_avg equal to
-		 * half of the spare capacity of their CPU, tiny tasks have the
-		 * ability to cross the overutilized threshold, which will
-		 * result in the load balancer ruining all the task placement
-		 * done by EAS. As a way to mitigate that effect, do not account
-		 * for the first enqueue operation of new tasks during the
-		 * overutilized flag detection.
-		 *
-		 * A better way of solving this problem would be to wait for
-		 * the PELT signals of tasks to converge before taking them
-		 * into account, but that is not straightforward to implement,
-		 * and the following generally works well enough in practice.
-		 */
-		if (flags & ENQUEUE_WAKEUP)
-			update_overutilized_status(rq);
+	/* At this point se is NULL and we are at root level*/
+	add_nr_running(rq, 1);
 
-	}
+	/*
+	 * Since new tasks are assigned an initial util_avg equal to
+	 * half of the spare capacity of their CPU, tiny tasks have the
+	 * ability to cross the overutilized threshold, which will
+	 * result in the load balancer ruining all the task placement
+	 * done by EAS. As a way to mitigate that effect, do not account
+	 * for the first enqueue operation of new tasks during the
+	 * overutilized flag detection.
+	 *
+	 * A better way of solving this problem would be to wait for
+	 * the PELT signals of tasks to converge before taking them
+	 * into account, but that is not straightforward to implement,
+	 * and the following generally works well enough in practice.
+	 */
+	if (flags & ENQUEUE_WAKEUP)
+		update_overutilized_status(rq);
 
+enqueue_throttle:
 	if (cfs_bandwidth_used()) {
 		/*
 		 * When bandwidth control is enabled; the cfs_rq_throttled()
@@ -5736,7 +5746,7 @@ static int wake_wide(struct task_struct *p)
 {
 	unsigned int master = current->wakee_flips;
 	unsigned int slave = p->wakee_flips;
-	int factor = this_cpu_read(sd_llc_size);
+	int factor = __this_cpu_read(sd_llc_size);
 
 	if (master < slave)
 		swap(master, slave);
@@ -5845,8 +5855,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
 }
 
 static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p,
-		  int this_cpu, int sd_flag);
+find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu);
 
 /*
  * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group.
@@ -5929,7 +5938,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 			continue;
 		}
 
-		group = find_idlest_group(sd, p, cpu, sd_flag);
+		group = find_idlest_group(sd, p, cpu);
 		if (!group) {
 			sd = sd->child;
 			continue;
@@ -6670,9 +6679,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
 	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
-		if (!(tmp->flags & SD_LOAD_BALANCE))
-			break;
-
 		/*
 		 * If both 'cpu' and 'prev_cpu' are part of this domain,
 		 * cpu is a valid SD_WAKE_AFFINE target.
@@ -8583,7 +8589,7 @@ static int idle_cpu_without(int cpu, struct task_struct *p)
 	 */
 
 #ifdef CONFIG_SMP
-	if (!llist_empty(&rq->wake_list))
+	if (rq->ttwu_pending)
 		return 0;
 #endif
 
@@ -8701,8 +8707,7 @@ static bool update_pick_idlest(struct sched_group *idlest,
  * Assumes p is allowed on at least one CPU in sd.
  */
 static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p,
-		  int this_cpu, int sd_flag)
+find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 {
 	struct sched_group *idlest = NULL, *local = NULL, *group = sd->groups;
 	struct sg_lb_stats local_sgs, tmp_sgs;
@@ -9433,7 +9438,7 @@ static int active_load_balance_cpu_stop(void *data);
 static int should_we_balance(struct lb_env *env)
 {
 	struct sched_group *sg = env->sd->groups;
-	int cpu, balance_cpu = -1;
+	int cpu;
 
 	/*
 	 * Ensure the balancing environment is consistent; can happen
@@ -9454,18 +9459,12 @@ static int should_we_balance(struct lb_env *env)
 		if (!idle_cpu(cpu))
 			continue;
 
-		balance_cpu = cpu;
-		break;
+		/* Are we the first idle CPU? */
+		return cpu == env->dst_cpu;
 	}
 
-	if (balance_cpu == -1)
-		balance_cpu = group_balance_cpu(sg);
-
-	/*
-	 * First idle CPU or the first CPU(busiest) in this sched group
-	 * is eligible for doing load balancing at this and above domains.
-	 */
-	return balance_cpu == env->dst_cpu;
+	/* Are we the first CPU of this group ? */
+	return group_balance_cpu(sg) == env->dst_cpu;
 }
 
 /*
@@ -9818,9 +9817,8 @@ static int active_load_balance_cpu_stop(void *data)
 	/* Search for an sd spanning us and the target CPU. */
 	rcu_read_lock();
 	for_each_domain(target_cpu, sd) {
-		if ((sd->flags & SD_LOAD_BALANCE) &&
-		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
-				break;
+		if (cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
+			break;
 	}
 
 	if (likely(sd)) {
@@ -9909,9 +9907,6 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 		}
 		max_cost += sd->max_newidle_lb_cost;
 
-		if (!(sd->flags & SD_LOAD_BALANCE))
-			continue;
-
 		/*
 		 * Stop the load balance at this level. There is another
 		 * CPU in our sched group which is doing load balancing more
@@ -10028,17 +10023,20 @@ static void kick_ilb(unsigned int flags)
 	if (ilb_cpu >= nr_cpu_ids)
 		return;
 
+	/*
+	 * Access to rq::nohz_csd is serialized by NOHZ_KICK_MASK; he who sets
+	 * the first flag owns it; cleared by nohz_csd_func().
+	 */
 	flags = atomic_fetch_or(flags, nohz_flags(ilb_cpu));
 	if (flags & NOHZ_KICK_MASK)
 		return;
 
 	/*
-	 * Use smp_send_reschedule() instead of resched_cpu().
-	 * This way we generate a sched IPI on the target CPU which
+	 * This way we generate an IPI on the target CPU which
 	 * is idle. And the softirq performing nohz idle load balance
 	 * will be run before returning from the IPI.
 	 */
-	smp_send_reschedule(ilb_cpu);
+	smp_call_function_single_async(ilb_cpu, &cpu_rq(ilb_cpu)->nohz_csd);
 }
 
 /*
@@ -10376,20 +10374,14 @@ abort:
  */
 static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
 {
-	int this_cpu = this_rq->cpu;
-	unsigned int flags;
+	unsigned int flags = this_rq->nohz_idle_balance;
 
-	if (!(atomic_read(nohz_flags(this_cpu)) & NOHZ_KICK_MASK))
+	if (!flags)
 		return false;
 
-	if (idle != CPU_IDLE) {
-		atomic_andnot(NOHZ_KICK_MASK, nohz_flags(this_cpu));
-		return false;
-	}
+	this_rq->nohz_idle_balance = 0;
 
-	/* could be _relaxed() */
-	flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(this_cpu));
-	if (!(flags & NOHZ_KICK_MASK))
+	if (idle != CPU_IDLE)
 		return false;
 
 	_nohz_idle_balance(this_rq, flags, idle);
@@ -10449,7 +10441,7 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { }
  *     0 - failed, no new tasks
  *   > 0 - success, new (fair) tasks present
  */
-int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 {
 	unsigned long next_balance = jiffies + HZ;
 	int this_cpu = this_rq->cpu;
@@ -10500,9 +10492,6 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 		int continue_balancing = 1;
 		u64 t0, domain_cost;
 
-		if (!(sd->flags & SD_LOAD_BALANCE))
-			continue;
-
 		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
 			update_next_balance(sd, &next_balance);
 			break;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index b743bf38f08f..05deb81bb3e3 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -289,7 +289,11 @@ static void do_idle(void)
 	 */
 	smp_mb__after_atomic();
 
-	sched_ttwu_pending();
+	/*
+	 * RCU relies on this call to be done outside of an RCU read-side
+	 * critical section.
+	 */
+	flush_smp_call_function_from_idle();
 	schedule_idle();
 
 	if (unlikely(klp_patch_pending(current)))
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index b647d04d9c8b..b4b1ff96642f 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -237,6 +237,30 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
 	return 1;
 }
 
+/*
+ * When syncing *_avg with *_sum, we must take into account the current
+ * position in the PELT segment otherwise the remaining part of the segment
+ * will be considered as idle time whereas it's not yet elapsed and this will
+ * generate unwanted oscillation in the range [1002..1024[.
+ *
+ * The max value of *_sum varies with the position in the time segment and is
+ * equals to :
+ *
+ *   LOAD_AVG_MAX*y + sa->period_contrib
+ *
+ * which can be simplified into:
+ *
+ *   LOAD_AVG_MAX - 1024 + sa->period_contrib
+ *
+ * because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024
+ *
+ * The same care must be taken when a sched entity is added, updated or
+ * removed from a cfs_rq and we need to update sched_avg. Scheduler entities
+ * and the cfs rq, to which they are attached, have the same position in the
+ * time segment because they use the same clock. This means that we can use
+ * the period_contrib of cfs_rq when updating the sched_avg of a sched_entity
+ * if it's more convenient.
+ */
 static __always_inline void
 ___update_load_avg(struct sched_avg *sa, unsigned long load)
 {
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 45da29de3ecc..f395ddb75f38 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -9,6 +9,8 @@
 
 int sched_rr_timeslice = RR_TIMESLICE;
 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
+/* More than 4 hours if BW_SHIFT equals 20. */
+static const u64 max_rt_runtime = MAX_BW;
 
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
 
@@ -2585,6 +2587,12 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
 	if (rt_period == 0)
 		return -EINVAL;
 
+	/*
+	 * Bound quota to defend quota against overflow during bandwidth shift.
+	 */
+	if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
+		return -EINVAL;
+
 	mutex_lock(&rt_constraints_mutex);
 	err = __rt_schedulable(tg, rt_period, rt_runtime);
 	if (err)
@@ -2702,7 +2710,9 @@ static int sched_rt_global_validate(void)
 		return -EINVAL;
 
 	if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
-		(sysctl_sched_rt_runtime > sysctl_sched_rt_period))
+		((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
+		 ((u64)sysctl_sched_rt_runtime *
+			NSEC_PER_USEC > max_rt_runtime)))
 		return -EINVAL;
 
 	return 0;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index db3a57675ccf..1d4e94c1e5fe 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -349,7 +349,6 @@ struct cfs_bandwidth {
 
 	u8			idle;
 	u8			period_active;
-	u8			distribute_running;
 	u8			slack_started;
 	struct hrtimer		period_timer;
 	struct hrtimer		slack_timer;
@@ -890,12 +889,15 @@ struct rq {
 #ifdef CONFIG_SMP
 	unsigned long		last_blocked_load_update_tick;
 	unsigned int		has_blocked_load;
+	call_single_data_t	nohz_csd;
 #endif /* CONFIG_SMP */
 	unsigned int		nohz_tick_stopped;
-	atomic_t nohz_flags;
+	atomic_t		nohz_flags;
 #endif /* CONFIG_NO_HZ_COMMON */
 
-	unsigned long		nr_load_updates;
+#ifdef CONFIG_SMP
+	unsigned int		ttwu_pending;
+#endif
 	u64			nr_switches;
 
 #ifdef CONFIG_UCLAMP_TASK
@@ -951,6 +953,7 @@ struct rq {
 
 	struct callback_head	*balance_callback;
 
+	unsigned char		nohz_idle_balance;
 	unsigned char		idle_balance;
 
 	unsigned long		misfit_task_load;
@@ -979,7 +982,7 @@ struct rq {
 
 	/* This is used to determine avg_idle's max value */
 	u64			max_idle_balance_cost;
-#endif
+#endif /* CONFIG_SMP */
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 	u64			prev_irq_time;
@@ -1020,10 +1023,6 @@ struct rq {
 	unsigned int		ttwu_local;
 #endif
 
-#ifdef CONFIG_SMP
-	struct llist_head	wake_list;
-#endif
-
 #ifdef CONFIG_CPU_IDLE
 	/* Must be inspected within a rcu lock section */
 	struct cpuidle_state	*idle_state;
@@ -1367,8 +1366,6 @@ queue_balance_callback(struct rq *rq,
 	rq->balance_callback = head;
 }
 
-extern void sched_ttwu_pending(void);
-
 #define rcu_dereference_check_sched_domain(p) \
 	rcu_dereference_check((p), \
 			      lockdep_is_held(&sched_domains_mutex))
@@ -1461,7 +1458,7 @@ struct sched_group {
 	 * by attaching extra space to the end of the structure,
 	 * depending on how many CPUs the kernel has booted up with)
 	 */
-	unsigned long		cpumask[0];
+	unsigned long		cpumask[];
 };
 
 static inline struct cpumask *sched_group_span(struct sched_group *sg)
@@ -1504,15 +1501,11 @@ static inline void unregister_sched_domain_sysctl(void)
 }
 #endif
 
-extern int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
-
-#else
-
-static inline void sched_ttwu_pending(void) { }
+extern void flush_smp_call_function_from_idle(void);
 
-static inline int newidle_balance(struct rq *this_rq, struct rq_flags *rf) { return 0; }
-
-#endif /* CONFIG_SMP */
+#else /* !CONFIG_SMP: */
+static inline void flush_smp_call_function_from_idle(void) { }
+#endif
 
 #include "stats.h"
 #include "autogroup.h"
@@ -1688,7 +1681,8 @@ static inline int task_on_rq_migrating(struct task_struct *p)
  */
 #define WF_SYNC			0x01		/* Waker goes to sleep after wakeup */
 #define WF_FORK			0x02		/* Child wakeup after fork */
-#define WF_MIGRATED		0x4		/* Internal use, task got migrated */
+#define WF_MIGRATED		0x04		/* Internal use, task got migrated */
+#define WF_ON_RQ		0x08		/* Wakee is on_rq */
 
 /*
  * To aid in avoiding the subversion of "niceness" due to uneven distribution
@@ -1918,6 +1912,8 @@ extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
 #define BW_SHIFT		20
 #define BW_UNIT			(1 << BW_SHIFT)
 #define RATIO_SHIFT		8
+#define MAX_BW_BITS		(64 - BW_SHIFT)
+#define MAX_BW			((1ULL << MAX_BW_BITS) - 1)
 unsigned long to_ratio(u64 period, u64 runtime);
 
 extern void init_entity_runnable_average(struct sched_entity *se);
diff --git a/kernel/sched/smp.h b/kernel/sched/smp.h
new file mode 100644
index 000000000000..9620e323162c
--- /dev/null
+++ b/kernel/sched/smp.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Scheduler internal SMP callback types and methods between the scheduler
+ * and other internal parts of the core kernel:
+ */
+
+extern void sched_ttwu_pending(void *arg);
+
+extern void send_call_function_single_ipi(int cpu);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index fa64b2ee9fe6..ba81187bb7af 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -33,14 +33,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 	cpumask_clear(groupmask);
 
 	printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);
-
-	if (!(sd->flags & SD_LOAD_BALANCE)) {
-		printk("does not load-balance\n");
-		if (sd->parent)
-			printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
-		return -1;
-	}
-
 	printk(KERN_CONT "span=%*pbl level=%s\n",
 	       cpumask_pr_args(sched_domain_span(sd)), sd->name);
 
@@ -151,8 +143,7 @@ static int sd_degenerate(struct sched_domain *sd)
 		return 1;
 
 	/* Following flags need at least 2 groups */
-	if (sd->flags & (SD_LOAD_BALANCE |
-			 SD_BALANCE_NEWIDLE |
+	if (sd->flags & (SD_BALANCE_NEWIDLE |
 			 SD_BALANCE_FORK |
 			 SD_BALANCE_EXEC |
 			 SD_SHARE_CPUCAPACITY |
@@ -183,15 +174,14 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 
 	/* Flags needing groups don't count if only 1 group in parent */
 	if (parent->groups == parent->groups->next) {
-		pflags &= ~(SD_LOAD_BALANCE |
-				SD_BALANCE_NEWIDLE |
-				SD_BALANCE_FORK |
-				SD_BALANCE_EXEC |
-				SD_ASYM_CPUCAPACITY |
-				SD_SHARE_CPUCAPACITY |
-				SD_SHARE_PKG_RESOURCES |
-				SD_PREFER_SIBLING |
-				SD_SHARE_POWERDOMAIN);
+		pflags &= ~(SD_BALANCE_NEWIDLE |
+			    SD_BALANCE_FORK |
+			    SD_BALANCE_EXEC |
+			    SD_ASYM_CPUCAPACITY |
+			    SD_SHARE_CPUCAPACITY |
+			    SD_SHARE_PKG_RESOURCES |
+			    SD_PREFER_SIBLING |
+			    SD_SHARE_POWERDOMAIN);
 		if (nr_node_ids == 1)
 			pflags &= ~SD_SERIALIZE;
 	}
@@ -1351,8 +1341,7 @@ sd_init(struct sched_domain_topology_level *tl,
 
 		.cache_nice_tries	= 0,
 
-		.flags			= 1*SD_LOAD_BALANCE
-					| 1*SD_BALANCE_NEWIDLE
+		.flags			= 1*SD_BALANCE_NEWIDLE
 					| 1*SD_BALANCE_EXEC
 					| 1*SD_BALANCE_FORK
 					| 0*SD_BALANCE_WAKE
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..222a7a9ad543
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmstat.h>
+
+static struct kmem_cache *scs_cache;
+
+static void __scs_account(void *s, int account)
+{
+	struct page *scs_page = virt_to_page(s);
+
+	mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB,
+			    account * (SCS_SIZE / SZ_1K));
+}
+
+static void *scs_alloc(int node)
+{
+	void *s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+
+	if (!s)
+		return NULL;
+
+	*__scs_magic(s) = SCS_END_MAGIC;
+
+	/*
+	 * Poison the allocation to catch unintentional accesses to
+	 * the shadow stack when KASAN is enabled.
+	 */
+	kasan_poison_object_data(scs_cache, s);
+	__scs_account(s, 1);
+	return s;
+}
+
+static void scs_free(void *s)
+{
+	__scs_account(s, -1);
+	kasan_unpoison_object_data(scs_cache, s);
+	kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+	scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL);
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+	void *s = scs_alloc(node);
+
+	if (!s)
+		return -ENOMEM;
+
+	task_scs(tsk) = task_scs_sp(tsk) = s;
+	return 0;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+	static unsigned long highest;
+
+	unsigned long *p, prev, curr = highest, used = 0;
+
+	if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE))
+		return;
+
+	for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
+		if (!READ_ONCE_NOCHECK(*p))
+			break;
+		used++;
+	}
+
+	while (used > curr) {
+		prev = cmpxchg_relaxed(&highest, curr, used);
+
+		if (prev == curr) {
+			pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
+				tsk->comm, task_pid_nr(tsk), used);
+			break;
+		}
+
+		curr = prev;
+	}
+}
+
+void scs_release(struct task_struct *tsk)
+{
+	void *s = task_scs(tsk);
+
+	if (!s)
+		return;
+
+	WARN(task_scs_end_corrupted(tsk),
+	     "corrupted shadow stack detected when freeing task\n");
+	scs_check_usage(tsk);
+	scs_free(s);
+}
diff --git a/kernel/signal.c b/kernel/signal.c
index 284fc1600063..5ca48cc5da76 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3235,94 +3235,94 @@ int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
 }
 
 #ifdef CONFIG_COMPAT
-int copy_siginfo_to_user32(struct compat_siginfo __user *to,
-			   const struct kernel_siginfo *from)
-#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
-{
-	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
-}
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-			     const struct kernel_siginfo *from, bool x32_ABI)
-#endif
+/**
+ * copy_siginfo_to_external32 - copy a kernel siginfo into a compat user siginfo
+ * @to: compat siginfo destination
+ * @from: kernel siginfo source
+ *
+ * Note: This function does not work properly for the SIGCHLD on x32, but
+ * fortunately it doesn't have to.  The only valid callers for this function are
+ * copy_siginfo_to_user32, which is overriden for x32 and the coredump code.
+ * The latter does not care because SIGCHLD will never cause a coredump.
+ */
+void copy_siginfo_to_external32(struct compat_siginfo *to,
+		const struct kernel_siginfo *from)
 {
-	struct compat_siginfo new;
-	memset(&new, 0, sizeof(new));
+	memset(to, 0, sizeof(*to));
 
-	new.si_signo = from->si_signo;
-	new.si_errno = from->si_errno;
-	new.si_code  = from->si_code;
+	to->si_signo = from->si_signo;
+	to->si_errno = from->si_errno;
+	to->si_code  = from->si_code;
 	switch(siginfo_layout(from->si_signo, from->si_code)) {
 	case SIL_KILL:
-		new.si_pid = from->si_pid;
-		new.si_uid = from->si_uid;
+		to->si_pid = from->si_pid;
+		to->si_uid = from->si_uid;
 		break;
 	case SIL_TIMER:
-		new.si_tid     = from->si_tid;
-		new.si_overrun = from->si_overrun;
-		new.si_int     = from->si_int;
+		to->si_tid     = from->si_tid;
+		to->si_overrun = from->si_overrun;
+		to->si_int     = from->si_int;
 		break;
 	case SIL_POLL:
-		new.si_band = from->si_band;
-		new.si_fd   = from->si_fd;
+		to->si_band = from->si_band;
+		to->si_fd   = from->si_fd;
 		break;
 	case SIL_FAULT:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
 		break;
 	case SIL_FAULT_MCEERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_addr_lsb = from->si_addr_lsb;
+		to->si_addr_lsb = from->si_addr_lsb;
 		break;
 	case SIL_FAULT_BNDERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_lower = ptr_to_compat(from->si_lower);
-		new.si_upper = ptr_to_compat(from->si_upper);
+		to->si_lower = ptr_to_compat(from->si_lower);
+		to->si_upper = ptr_to_compat(from->si_upper);
 		break;
 	case SIL_FAULT_PKUERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_pkey = from->si_pkey;
+		to->si_pkey = from->si_pkey;
 		break;
 	case SIL_CHLD:
-		new.si_pid    = from->si_pid;
-		new.si_uid    = from->si_uid;
-		new.si_status = from->si_status;
-#ifdef CONFIG_X86_X32_ABI
-		if (x32_ABI) {
-			new._sifields._sigchld_x32._utime = from->si_utime;
-			new._sifields._sigchld_x32._stime = from->si_stime;
-		} else
-#endif
-		{
-			new.si_utime = from->si_utime;
-			new.si_stime = from->si_stime;
-		}
+		to->si_pid = from->si_pid;
+		to->si_uid = from->si_uid;
+		to->si_status = from->si_status;
+		to->si_utime = from->si_utime;
+		to->si_stime = from->si_stime;
 		break;
 	case SIL_RT:
-		new.si_pid = from->si_pid;
-		new.si_uid = from->si_uid;
-		new.si_int = from->si_int;
+		to->si_pid = from->si_pid;
+		to->si_uid = from->si_uid;
+		to->si_int = from->si_int;
 		break;
 	case SIL_SYS:
-		new.si_call_addr = ptr_to_compat(from->si_call_addr);
-		new.si_syscall   = from->si_syscall;
-		new.si_arch      = from->si_arch;
+		to->si_call_addr = ptr_to_compat(from->si_call_addr);
+		to->si_syscall   = from->si_syscall;
+		to->si_arch      = from->si_arch;
 		break;
 	}
+}
 
+int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
+			   const struct kernel_siginfo *from)
+{
+	struct compat_siginfo new;
+
+	copy_siginfo_to_external32(&new, from);
 	if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
 		return -EFAULT;
-
 	return 0;
 }
 
diff --git a/kernel/smp.c b/kernel/smp.c
index 786092aabdcd..472c2b274c65 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -22,11 +22,9 @@
 #include <linux/hypervisor.h>
 
 #include "smpboot.h"
+#include "sched/smp.h"
 
-enum {
-	CSD_FLAG_LOCK		= 0x01,
-	CSD_FLAG_SYNCHRONOUS	= 0x02,
-};
+#define CSD_TYPE(_csd)	((_csd)->flags & CSD_FLAG_TYPE_MASK)
 
 struct call_function_data {
 	call_single_data_t	__percpu *csd;
@@ -84,6 +82,7 @@ int smpcfd_dying_cpu(unsigned int cpu)
 	 * still pending.
 	 */
 	flush_smp_call_function_queue(false);
+	irq_work_run();
 	return 0;
 }
 
@@ -134,15 +133,33 @@ static __always_inline void csd_unlock(call_single_data_t *csd)
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
 
+void __smp_call_single_queue(int cpu, struct llist_node *node)
+{
+	/*
+	 * The list addition should be visible before sending the IPI
+	 * handler locks the list to pull the entry off it because of
+	 * normal cache coherency rules implied by spinlocks.
+	 *
+	 * If IPIs can go out of order to the cache coherency protocol
+	 * in an architecture, sufficient synchronisation should be added
+	 * to arch code to make it appear to obey cache coherency WRT
+	 * locking and barrier primitives. Generic code isn't really
+	 * equipped to do the right thing...
+	 */
+	if (llist_add(node, &per_cpu(call_single_queue, cpu)))
+		send_call_function_single_ipi(cpu);
+}
+
 /*
  * Insert a previously allocated call_single_data_t element
  * for execution on the given CPU. data must already have
  * ->func, ->info, and ->flags set.
  */
-static int generic_exec_single(int cpu, call_single_data_t *csd,
-			       smp_call_func_t func, void *info)
+static int generic_exec_single(int cpu, call_single_data_t *csd)
 {
 	if (cpu == smp_processor_id()) {
+		smp_call_func_t func = csd->func;
+		void *info = csd->info;
 		unsigned long flags;
 
 		/*
@@ -156,28 +173,12 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
 		return 0;
 	}
 
-
 	if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
 		csd_unlock(csd);
 		return -ENXIO;
 	}
 
-	csd->func = func;
-	csd->info = info;
-
-	/*
-	 * The list addition should be visible before sending the IPI
-	 * handler locks the list to pull the entry off it because of
-	 * normal cache coherency rules implied by spinlocks.
-	 *
-	 * If IPIs can go out of order to the cache coherency protocol
-	 * in an architecture, sufficient synchronisation should be added
-	 * to arch code to make it appear to obey cache coherency WRT
-	 * locking and barrier primitives. Generic code isn't really
-	 * equipped to do the right thing...
-	 */
-	if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
-		arch_send_call_function_single_ipi(cpu);
+	__smp_call_single_queue(cpu, &csd->llist);
 
 	return 0;
 }
@@ -209,9 +210,9 @@ void generic_smp_call_function_single_interrupt(void)
  */
 static void flush_smp_call_function_queue(bool warn_cpu_offline)
 {
-	struct llist_head *head;
-	struct llist_node *entry;
 	call_single_data_t *csd, *csd_next;
+	struct llist_node *entry, *prev;
+	struct llist_head *head;
 	static bool warned;
 
 	lockdep_assert_irqs_disabled();
@@ -230,32 +231,99 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
 		 * We don't have to use the _safe() variant here
 		 * because we are not invoking the IPI handlers yet.
 		 */
-		llist_for_each_entry(csd, entry, llist)
-			pr_warn("IPI callback %pS sent to offline CPU\n",
-				csd->func);
+		llist_for_each_entry(csd, entry, llist) {
+			switch (CSD_TYPE(csd)) {
+			case CSD_TYPE_ASYNC:
+			case CSD_TYPE_SYNC:
+			case CSD_TYPE_IRQ_WORK:
+				pr_warn("IPI callback %pS sent to offline CPU\n",
+					csd->func);
+				break;
+
+			case CSD_TYPE_TTWU:
+				pr_warn("IPI task-wakeup sent to offline CPU\n");
+				break;
+
+			default:
+				pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
+					CSD_TYPE(csd));
+				break;
+			}
+		}
 	}
 
+	/*
+	 * First; run all SYNC callbacks, people are waiting for us.
+	 */
+	prev = NULL;
 	llist_for_each_entry_safe(csd, csd_next, entry, llist) {
-		smp_call_func_t func = csd->func;
-		void *info = csd->info;
-
 		/* Do we wait until *after* callback? */
-		if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
+		if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
+			smp_call_func_t func = csd->func;
+			void *info = csd->info;
+
+			if (prev) {
+				prev->next = &csd_next->llist;
+			} else {
+				entry = &csd_next->llist;
+			}
+
 			func(info);
 			csd_unlock(csd);
 		} else {
-			csd_unlock(csd);
-			func(info);
+			prev = &csd->llist;
 		}
 	}
 
+	if (!entry)
+		return;
+
 	/*
-	 * Handle irq works queued remotely by irq_work_queue_on().
-	 * Smp functions above are typically synchronous so they
-	 * better run first since some other CPUs may be busy waiting
-	 * for them.
+	 * Second; run all !SYNC callbacks.
 	 */
-	irq_work_run();
+	prev = NULL;
+	llist_for_each_entry_safe(csd, csd_next, entry, llist) {
+		int type = CSD_TYPE(csd);
+
+		if (type != CSD_TYPE_TTWU) {
+			if (prev) {
+				prev->next = &csd_next->llist;
+			} else {
+				entry = &csd_next->llist;
+			}
+
+			if (type == CSD_TYPE_ASYNC) {
+				smp_call_func_t func = csd->func;
+				void *info = csd->info;
+
+				csd_unlock(csd);
+				func(info);
+			} else if (type == CSD_TYPE_IRQ_WORK) {
+				irq_work_single(csd);
+			}
+
+		} else {
+			prev = &csd->llist;
+		}
+	}
+
+	/*
+	 * Third; only CSD_TYPE_TTWU is left, issue those.
+	 */
+	if (entry)
+		sched_ttwu_pending(entry);
+}
+
+void flush_smp_call_function_from_idle(void)
+{
+	unsigned long flags;
+
+	if (llist_empty(this_cpu_ptr(&call_single_queue)))
+		return;
+
+	local_irq_save(flags);
+	flush_smp_call_function_queue(true);
+	local_irq_restore(flags);
 }
 
 /*
@@ -271,7 +339,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 {
 	call_single_data_t *csd;
 	call_single_data_t csd_stack = {
-		.flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS,
+		.flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC,
 	};
 	int this_cpu;
 	int err;
@@ -305,7 +373,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 		csd_lock(csd);
 	}
 
-	err = generic_exec_single(cpu, csd, func, info);
+	csd->func = func;
+	csd->info = info;
+
+	err = generic_exec_single(cpu, csd);
 
 	if (wait)
 		csd_lock_wait(csd);
@@ -351,7 +422,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
 	csd->flags = CSD_FLAG_LOCK;
 	smp_wmb();
 
-	err = generic_exec_single(cpu, csd, csd->func, csd->info);
+	err = generic_exec_single(cpu, csd);
 
 out:
 	preempt_enable();
@@ -466,7 +537,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 
 		csd_lock(csd);
 		if (wait)
-			csd->flags |= CSD_FLAG_SYNCHRONOUS;
+			csd->flags |= CSD_TYPE_SYNC;
 		csd->func = func;
 		csd->info = info;
 		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
@@ -598,6 +669,24 @@ void __init smp_init(void)
 {
 	int num_nodes, num_cpus;
 
+	/*
+	 * Ensure struct irq_work layout matches so that
+	 * flush_smp_call_function_queue() can do horrible things.
+	 */
+	BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
+		     offsetof(struct __call_single_data, llist));
+	BUILD_BUG_ON(offsetof(struct irq_work, func) !=
+		     offsetof(struct __call_single_data, func));
+	BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
+		     offsetof(struct __call_single_data, flags));
+
+	/*
+	 * Assert the CSD_TYPE_TTWU layout is similar enough
+	 * for task_struct to be on the @call_single_queue.
+	 */
+	BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
+		     offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
+
 	idle_threads_init();
 	cpuhp_threads_init();
 
@@ -620,7 +709,7 @@ void __init smp_init(void)
  * early_boot_irqs_disabled is set.  Use local_irq_save/restore() instead
  * of local_irq_disable/enable().
  */
-void on_each_cpu(void (*func) (void *info), void *info, int wait)
+void on_each_cpu(smp_call_func_t func, void *info, int wait)
 {
 	unsigned long flags;
 
diff --git a/kernel/sys.c b/kernel/sys.c
index d325f3ab624a..891667a49bb7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2262,7 +2262,7 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
 	return -EINVAL;
 }
 
-#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LESS_THROTTLE)
+#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)
 
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
@@ -2634,6 +2634,7 @@ struct compat_sysinfo {
 COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
 {
 	struct sysinfo s;
+	struct compat_sysinfo s_32;
 
 	do_sysinfo(&s);
 
@@ -2658,23 +2659,23 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
 		s.freehigh >>= bitcount;
 	}
 
-	if (!access_ok(info, sizeof(struct compat_sysinfo)) ||
-	    __put_user(s.uptime, &info->uptime) ||
-	    __put_user(s.loads[0], &info->loads[0]) ||
-	    __put_user(s.loads[1], &info->loads[1]) ||
-	    __put_user(s.loads[2], &info->loads[2]) ||
-	    __put_user(s.totalram, &info->totalram) ||
-	    __put_user(s.freeram, &info->freeram) ||
-	    __put_user(s.sharedram, &info->sharedram) ||
-	    __put_user(s.bufferram, &info->bufferram) ||
-	    __put_user(s.totalswap, &info->totalswap) ||
-	    __put_user(s.freeswap, &info->freeswap) ||
-	    __put_user(s.procs, &info->procs) ||
-	    __put_user(s.totalhigh, &info->totalhigh) ||
-	    __put_user(s.freehigh, &info->freehigh) ||
-	    __put_user(s.mem_unit, &info->mem_unit))
+	memset(&s_32, 0, sizeof(s_32));
+	s_32.uptime = s.uptime;
+	s_32.loads[0] = s.loads[0];
+	s_32.loads[1] = s.loads[1];
+	s_32.loads[2] = s.loads[2];
+	s_32.totalram = s.totalram;
+	s_32.freeram = s.freeram;
+	s_32.sharedram = s.sharedram;
+	s_32.bufferram = s.bufferram;
+	s_32.totalswap = s.totalswap;
+	s_32.freeswap = s.freeswap;
+	s_32.procs = s.procs;
+	s_32.totalhigh = s.totalhigh;
+	s_32.freehigh = s.freehigh;
+	s_32.mem_unit = s.mem_unit;
+	if (copy_to_user(info, &s_32, sizeof(s_32)))
 		return -EFAULT;
-
 	return 0;
 }
 #endif /* CONFIG_COMPAT */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 17c7633d90fc..7efe0d499c1d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2333,30 +2333,32 @@ static struct ctl_table kern_table[] = {
 		.proc_handler   = proc_dointvec,
 	},
 #endif
-#if defined(CONFIG_X86)
+
+#if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
+	defined(CONFIG_DEBUG_STACKOVERFLOW)
 	{
-		.procname	= "panic_on_unrecovered_nmi",
-		.data		= &panic_on_unrecovered_nmi,
+		.procname	= "panic_on_stackoverflow",
+		.data		= &sysctl_panic_on_stackoverflow,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#endif
+#if defined(CONFIG_X86)
 	{
-		.procname	= "panic_on_io_nmi",
-		.data		= &panic_on_io_nmi,
+		.procname	= "panic_on_unrecovered_nmi",
+		.data		= &panic_on_unrecovered_nmi,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
 	{
-		.procname	= "panic_on_stackoverflow",
-		.data		= &sysctl_panic_on_stackoverflow,
+		.procname	= "panic_on_io_nmi",
+		.data		= &panic_on_io_nmi,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-#endif
 	{
 		.procname	= "bootloader_type",
 		.data		= &bootloader_type,
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 53bce347cd50..5d9fc22d836a 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -280,8 +280,9 @@ static void timens_put(struct ns_common *ns)
 	put_time_ns(to_time_ns(ns));
 }
 
-static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
+static int timens_install(struct nsset *nsset, struct ns_common *new)
 {
+	struct nsproxy *nsproxy = nsset->nsproxy;
 	struct time_namespace *ns = to_time_ns(new);
 	int err;
 
@@ -289,7 +290,7 @@ static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
 		return -EUSERS;
 
 	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
-	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+	    !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	timens_set_vvar_page(current, ns);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 743647005f64..24876faac753 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -10,11 +10,6 @@ config USER_STACKTRACE_SUPPORT
 config NOP_TRACER
 	bool
 
-config HAVE_FTRACE_NMI_ENTER
-	bool
-	help
-	  See Documentation/trace/ftrace-design.rst
-
 config HAVE_FUNCTION_TRACER
 	bool
 	help
@@ -72,11 +67,6 @@ config RING_BUFFER
 	select TRACE_CLOCK
 	select IRQ_WORK
 
-config FTRACE_NMI_ENTER
-       bool
-       depends on HAVE_FTRACE_NMI_ENTER
-       default y
-
 config EVENT_TRACING
 	select CONTEXT_SWITCH_TRACER
 	select GLOB
@@ -158,6 +148,7 @@ config FUNCTION_TRACER
 	select CONTEXT_SWITCH_TRACER
 	select GLOB
 	select TASKS_RCU if PREEMPTION
+	select TASKS_RUDE_RCU
 	help
 	  Enable the kernel to trace every kernel function. This is done
 	  by using a compiler feature to insert a small, 5-byte No-Operation
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index ca39dc3230cb..ea47f2084087 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -170,10 +170,10 @@ void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
 	if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
 		blkcg = NULL;
 #ifdef CONFIG_BLK_CGROUP
-	trace_note(bt, 0, BLK_TN_MESSAGE, buf, n,
+	trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n,
 		   blkcg ? cgroup_id(blkcg->css.cgroup) : 1);
 #else
-	trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, 0);
+	trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0);
 #endif
 	local_irq_restore(flags);
 }
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c41186417d93..3744372a24e2 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1729,7 +1729,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
 	u32 *ids, prog_cnt, ids_len;
 	int ret;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!perfmon_capable())
 		return -EPERM;
 	if (event->attr.type != PERF_TYPE_TRACEPOINT)
 		return -EINVAL;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bd030b1b9514..b5765aeea698 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -160,17 +160,6 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
 	op->saved_func(ip, parent_ip, op, regs);
 }
 
-static void ftrace_sync(struct work_struct *work)
-{
-	/*
-	 * This function is just a stub to implement a hard force
-	 * of synchronize_rcu(). This requires synchronizing
-	 * tasks even in userspace and idle.
-	 *
-	 * Yes, function tracing is rude.
-	 */
-}
-
 static void ftrace_sync_ipi(void *data)
 {
 	/* Probably not needed, but do it anyway */
@@ -256,7 +245,7 @@ static void update_ftrace_function(void)
 	 * Make sure all CPUs see this. Yes this is slow, but static
 	 * tracing is slow and nasty to have enabled.
 	 */
-	schedule_on_each_cpu(ftrace_sync);
+	synchronize_rcu_tasks_rude();
 	/* Now all cpus are using the list ops. */
 	function_trace_op = set_function_trace_op;
 	/* Make sure the function_trace_op is visible on all CPUs */
@@ -2932,7 +2921,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
 		 * infrastructure to do the synchronization, thus we must do it
 		 * ourselves.
 		 */
-		schedule_on_each_cpu(ftrace_sync);
+		synchronize_rcu_tasks_rude();
 
 		/*
 		 * When the kernel is preeptive, tasks can be preempted
@@ -5888,7 +5877,7 @@ ftrace_graph_release(struct inode *inode, struct file *file)
 		 * infrastructure to do the synchronization, thus we must do it
 		 * ourselves.
 		 */
-		schedule_on_each_cpu(ftrace_sync);
+		synchronize_rcu_tasks_rude();
 
 		free_ftrace_hash(old_hash);
 	}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index db0ad96dfd50..3ab27022c20f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6305,13 +6305,6 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
 	__free_page(spd->pages[idx]);
 }
 
-static const struct pipe_buf_operations tracing_pipe_buf_ops = {
-	.confirm		= generic_pipe_buf_confirm,
-	.release		= generic_pipe_buf_release,
-	.steal			= generic_pipe_buf_steal,
-	.get			= generic_pipe_buf_get,
-};
-
 static size_t
 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
 {
@@ -6373,7 +6366,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 		.partial	= partial_def,
 		.nr_pages	= 0, /* This gets updated below. */
 		.nr_pages_max	= PIPE_DEF_BUFFERS,
-		.ops		= &tracing_pipe_buf_ops,
+		.ops		= &default_pipe_buf_ops,
 		.spd_release	= tracing_spd_release_pipe,
 	};
 	ssize_t ret;
@@ -7582,9 +7575,7 @@ static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 
 /* Pipe buffer operations for a buffer. */
 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
-	.confirm		= generic_pipe_buf_confirm,
 	.release		= buffer_pipe_buf_release,
-	.steal			= generic_pipe_buf_nosteal,
 	.get			= buffer_pipe_buf_get,
 };
 
@@ -8527,18 +8518,6 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
 	allocate_snapshot = false;
 #endif
 
-	/*
-	 * Because of some magic with the way alloc_percpu() works on
-	 * x86_64, we need to synchronize the pgd of all the tables,
-	 * otherwise the trace events that happen in x86_64 page fault
-	 * handlers can't cope with accessing the chance that a
-	 * alloc_percpu()'d memory might be touched in the page fault trace
-	 * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
-	 * calls in tracing, because something might get triggered within a
-	 * page fault trace event!
-	 */
-	vmalloc_sync_mappings();
-
 	return 0;
 }
 
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index 4d8e99fdbbbe..fb0691b8a88d 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -19,6 +19,24 @@
 /* Per-cpu variable to prevent redundant calls when IRQs already off */
 static DEFINE_PER_CPU(int, tracing_irq_cpu);
 
+/*
+ * Like trace_hardirqs_on() but without the lockdep invocation. This is
+ * used in the low level entry code where the ordering vs. RCU is important
+ * and lockdep uses a staged approach which splits the lockdep hardirq
+ * tracking into a RCU on and a RCU off section.
+ */
+void trace_hardirqs_on_prepare(void)
+{
+	if (this_cpu_read(tracing_irq_cpu)) {
+		if (!in_nmi())
+			trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
+		tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
+		this_cpu_write(tracing_irq_cpu, 0);
+	}
+}
+EXPORT_SYMBOL(trace_hardirqs_on_prepare);
+NOKPROBE_SYMBOL(trace_hardirqs_on_prepare);
+
 void trace_hardirqs_on(void)
 {
 	if (this_cpu_read(tracing_irq_cpu)) {
@@ -28,11 +46,31 @@ void trace_hardirqs_on(void)
 		this_cpu_write(tracing_irq_cpu, 0);
 	}
 
+	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
 	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_on);
 NOKPROBE_SYMBOL(trace_hardirqs_on);
 
+/*
+ * Like trace_hardirqs_off() but without the lockdep invocation. This is
+ * used in the low level entry code where the ordering vs. RCU is important
+ * and lockdep uses a staged approach which splits the lockdep hardirq
+ * tracking into a RCU on and a RCU off section.
+ */
+void trace_hardirqs_off_prepare(void)
+{
+	if (!this_cpu_read(tracing_irq_cpu)) {
+		this_cpu_write(tracing_irq_cpu, 1);
+		tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
+		if (!in_nmi())
+			trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
+	}
+
+}
+EXPORT_SYMBOL(trace_hardirqs_off_prepare);
+NOKPROBE_SYMBOL(trace_hardirqs_off_prepare);
+
 void trace_hardirqs_off(void)
 {
 	if (!this_cpu_read(tracing_irq_cpu)) {
@@ -56,6 +94,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
 		this_cpu_write(tracing_irq_cpu, 0);
 	}
 
+	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
 	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_on_caller);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 8eadadc478f9..87804e0371fe 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1253,7 +1253,7 @@ static void userns_put(struct ns_common *ns)
 	put_user_ns(to_user_ns(ns));
 }
 
-static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int userns_install(struct nsset *nsset, struct ns_common *ns)
 {
 	struct user_namespace *user_ns = to_user_ns(ns);
 	struct cred *cred;
@@ -1274,14 +1274,14 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	cred = prepare_creds();
+	cred = nsset_cred(nsset);
 	if (!cred)
-		return -ENOMEM;
+		return -EINVAL;
 
 	put_user_ns(cred->user_ns);
 	set_cred_user_ns(cred, get_user_ns(user_ns));
 
-	return commit_creds(cred);
+	return 0;
 }
 
 struct ns_common *ns_get_owner(struct ns_common *ns)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index f0e491193009..e488d0e2ab45 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -140,12 +140,13 @@ static void utsns_put(struct ns_common *ns)
 	put_uts_ns(to_uts_ns(ns));
 }
 
-static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new)
+static int utsns_install(struct nsset *nsset, struct ns_common *new)
 {
+	struct nsproxy *nsproxy = nsset->nsproxy;
 	struct uts_namespace *ns = to_uts_ns(new);
 
 	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
-	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+	    !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	get_uts_ns(ns);
diff --git a/lib/Kconfig b/lib/Kconfig
index 5d53f9609c25..df3f3da95990 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -19,6 +19,9 @@ config RAID6_PQ_BENCHMARK
 	  Benchmark all available RAID6 PQ functions on init and choose the
 	  fastest one.
 
+config LINEAR_RANGES
+	tristate
+
 config PACKING
 	bool "Generic bitfield packing and unpacking"
 	default n
@@ -80,6 +83,9 @@ config ARCH_USE_CMPXCHG_LOCKREF
 config ARCH_HAS_FAST_MULTIPLIER
 	bool
 
+config ARCH_USE_SYM_ANNOTATIONS
+	bool
+
 config INDIRECT_PIO
 	bool "Access I/O in non-MMIO mode"
 	depends on ARM64
@@ -427,7 +433,7 @@ config INTERVAL_TREE
 
 	  See:
 
-		Documentation/rbtree.txt
+		Documentation/core-api/rbtree.rst
 
 	  for more information.
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 21d9c5f6e7ec..0217ed126f77 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -369,6 +369,11 @@ config STACK_VALIDATION
 	  For more information, see
 	  tools/objtool/Documentation/stack-validation.txt.
 
+config VMLINUX_VALIDATION
+	bool
+	depends on STACK_VALIDATION && DEBUG_ENTRY && !PARAVIRT
+	default y
+
 config DEBUG_FORCE_WEAK_PER_CPU
 	bool "Force weak per-cpu definitions"
 	depends on DEBUG_KERNEL
@@ -1510,7 +1515,7 @@ config PROVIDE_OHCI1394_DMA_INIT
 	  This code (~1k) is freed after boot. By then, the firewire stack
 	  in charge of the OHCI-1394 controllers should be used instead.
 
-	  See Documentation/debugging-via-ohci1394.txt for more information.
+	  See Documentation/core-api/debugging-via-ohci1394.rst for more information.
 
 source "samples/Kconfig"
 
@@ -2092,6 +2097,18 @@ config LIST_KUNIT_TEST
 
 	  If unsure, say N.
 
+config LINEAR_RANGES_TEST
+	tristate "KUnit test for linear_ranges"
+	depends on KUNIT
+	select LINEAR_RANGES
+	help
+	  This builds the linear_ranges unit test, which runs on boot.
+	  Tests the linear_ranges logic correctness.
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help
@@ -2201,6 +2218,19 @@ config TEST_MEMINIT
 
 	  If unsure, say N.
 
+config TEST_HMM
+	tristate "Test HMM (Heterogeneous Memory Management)"
+	depends on TRANSPARENT_HUGEPAGE
+	depends on DEVICE_PRIVATE
+	select HMM_MIRROR
+	select MMU_NOTIFIER
+	help
+	  This is a pseudo device driver solely for testing HMM.
+	  Say M here if you want to build the HMM test module.
+	  Doing so will allow you to run tools/testing/selftest/vm/hmm-tests.
+
+	  If unsure, say N.
+
 endif # RUNTIME_TESTING_MENU
 
 config MEMTEST
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 933680b59e2d..ffa7a76de086 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -124,4 +124,22 @@ config KDB_CONTINUE_CATASTROPHIC
 	  CONFIG_KDB_CONTINUE_CATASTROPHIC == 2. KDB forces a reboot.
 	  If you are not sure, say 0.
 
+config ARCH_HAS_EARLY_DEBUG
+	bool
+	default n
+	help
+	  If an architecture can definitely handle entering the debugger
+	  when early_param's are parsed then it select this config.
+	  Otherwise, if "kgdbwait" is passed on the kernel command line it
+	  won't actually be processed until dbg_late_init() just after the
+	  call to kgdb_arch_late() is made.
+
+	  NOTE: Even if this isn't selected by an architecture we will
+	  still try to register kgdb to handle breakpoints and crashes
+	  when early_param's are parsed, we just won't act on the
+	  "kgdbwait" parameter until dbg_late_init().  If you get a
+	  crash and try to drop into kgdb somewhere between these two
+	  places you might or might not end up being able to use kgdb
+	  depending on exactly how far along the architecture has initted.
+
 endif # KGDB
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 929211039bac..27bcc2568c95 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -63,7 +63,7 @@ config UBSAN_SANITIZE_ALL
 config UBSAN_ALIGNMENT
 	bool "Enable checks for pointers alignment"
 	default !HAVE_EFFICIENT_UNALIGNED_ACCESS
-	depends on !X86 || !COMPILE_TEST
+	depends on !UBSAN_TRAP
 	help
 	  This option enables the check of unaligned memory accesses.
 	  Enabling this option on architectures that support unaligned
diff --git a/lib/Makefile b/lib/Makefile
index 685aee60de1d..5adf8949a757 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o
 obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
 obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o
 obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o
+obj-$(CONFIG_TEST_HMM) += test_hmm.o
 
 obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
 
@@ -125,6 +126,7 @@ obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
 obj-$(CONFIG_BITREVERSE) += bitrev.o
+obj-$(CONFIG_LINEAR_RANGES) += linear_ranges.o
 obj-$(CONFIG_PACKING)	+= packing.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC16)	+= crc16.o
@@ -309,3 +311,4 @@ obj-$(CONFIG_OBJAGG) += objagg.o
 
 # KUnit tests
 obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
+obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 89260aa342d6..21a7640c5eed 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -182,21 +182,22 @@ EXPORT_SYMBOL(__bitmap_shift_left);
  *
  * In pictures, example for a big-endian 32-bit architecture:
  *
- * @src:
- * 31                                   63
- * |                                    |
- * 10000000 11000001 11110010 00010101  10000000 11000001 01110010 00010101
- *                 |  |              |                                    |
- *                16  14             0                                   32
- *
- * if @cut is 3, and @first is 14, bits 14-16 in @src are cut and @dst is:
- *
- * 31                                   63
- * |                                    |
- * 10110000 00011000 00110010 00010101  00010000 00011000 00101110 01000010
- *                    |              |                                    |
- *                    14 (bit 17     0                                   32
- *                        from @src)
+ * The @src bitmap is::
+ *
+ *   31                                   63
+ *   |                                    |
+ *   10000000 11000001 11110010 00010101  10000000 11000001 01110010 00010101
+ *                   |  |              |                                    |
+ *                  16  14             0                                   32
+ *
+ * if @cut is 3, and @first is 14, bits 14-16 in @src are cut and @dst is::
+ *
+ *   31                                   63
+ *   |                                    |
+ *   10110000 00011000 00110010 00010101  00010000 00011000 00101110 01000010
+ *                      |              |                                    |
+ *                      14 (bit 17     0                                   32
+ *                          from @src)
  *
  * Note that @dst and @src might overlap partially or entirely.
  *
diff --git a/lib/checksum.c b/lib/checksum.c
index de032ad96f4a..7ac65a0000ff 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -146,26 +146,6 @@ __sum16 ip_compute_csum(const void *buff, int len)
 EXPORT_SYMBOL(ip_compute_csum);
 
 /*
- * copy from fs while checksumming, otherwise like csum_partial
- */
-__wsum
-csum_partial_copy_from_user(const void __user *src, void *dst, int len,
-						__wsum sum, int *csum_err)
-{
-	int missing;
-
-	missing = __copy_from_user(dst, src, len);
-	if (missing) {
-		memset(dst + len - missing, 0, missing);
-		*csum_err = -EFAULT;
-	} else
-		*csum_err = 0;
-
-	return csum_partial(dst, len, sum);
-}
-EXPORT_SYMBOL(csum_partial_copy_from_user);
-
-/*
  * copy from ds while checksumming, otherwise like csum_partial
  */
 __wsum
diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c
index 65ead6b0c7e0..4ccbec442469 100644
--- a/lib/crypto/chacha.c
+++ b/lib/crypto/chacha.c
@@ -10,7 +10,6 @@
 #include <linux/export.h>
 #include <linux/bitops.h>
 #include <linux/string.h>
-#include <linux/cryptohash.h>
 #include <asm/unaligned.h>
 #include <crypto/chacha.h>
 
diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
index 66cb04b0cf4e..2e621697c5c3 100644
--- a/lib/crypto/sha256.c
+++ b/lib/crypto/sha256.c
@@ -206,7 +206,7 @@ static void sha256_transform(u32 *state, const u8 *input)
 	memzero_explicit(W, 64 * sizeof(u32));
 }
 
-int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
+void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
 {
 	unsigned int partial, done;
 	const u8 *src;
@@ -232,18 +232,16 @@ int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
 		partial = 0;
 	}
 	memcpy(sctx->buf + partial, src, len - done);
-
-	return 0;
 }
 EXPORT_SYMBOL(sha256_update);
 
-int sha224_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
+void sha224_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
 {
-	return sha256_update(sctx, data, len);
+	sha256_update(sctx, data, len);
 }
 EXPORT_SYMBOL(sha224_update);
 
-static int __sha256_final(struct sha256_state *sctx, u8 *out, int digest_words)
+static void __sha256_final(struct sha256_state *sctx, u8 *out, int digest_words)
 {
 	__be32 *dst = (__be32 *)out;
 	__be64 bits;
@@ -268,19 +266,17 @@ static int __sha256_final(struct sha256_state *sctx, u8 *out, int digest_words)
 
 	/* Zeroize sensitive information. */
 	memset(sctx, 0, sizeof(*sctx));
-
-	return 0;
 }
 
-int sha256_final(struct sha256_state *sctx, u8 *out)
+void sha256_final(struct sha256_state *sctx, u8 *out)
 {
-	return __sha256_final(sctx, out, 8);
+	__sha256_final(sctx, out, 8);
 }
 EXPORT_SYMBOL(sha256_final);
 
-int sha224_final(struct sha256_state *sctx, u8 *out)
+void sha224_final(struct sha256_state *sctx, u8 *out)
 {
-	return __sha256_final(sctx, out, 7);
+	__sha256_final(sctx, out, 7);
 }
 EXPORT_SYMBOL(sha224_final);
 
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index a75ee30b77cb..06d3135bd184 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
 /*
  * Generic 'turn off all lock debugging' function:
  */
-int debug_locks_off(void)
+noinstr int debug_locks_off(void)
 {
 	if (debug_locks && __debug_locks_off()) {
 		if (!debug_locks_silent) {
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 3f0e18543de8..ad485f08173b 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -61,13 +61,14 @@ static inline int ioremap_pmd_enabled(void) { return 0; }
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
 
 static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
+		pgtbl_mod_mask *mask)
 {
 	pte_t *pte;
 	u64 pfn;
 
 	pfn = phys_addr >> PAGE_SHIFT;
-	pte = pte_alloc_kernel(pmd, addr);
+	pte = pte_alloc_kernel_track(pmd, addr, mask);
 	if (!pte)
 		return -ENOMEM;
 	do {
@@ -75,6 +76,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
 		set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
 		pfn++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
+	*mask |= PGTBL_PTE_MODIFIED;
 	return 0;
 }
 
@@ -101,21 +103,24 @@ static int ioremap_try_huge_pmd(pmd_t *pmd, unsigned long addr,
 }
 
 static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
+		pgtbl_mod_mask *mask)
 {
 	pmd_t *pmd;
 	unsigned long next;
 
-	pmd = pmd_alloc(&init_mm, pud, addr);
+	pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
 	if (!pmd)
 		return -ENOMEM;
 	do {
 		next = pmd_addr_end(addr, end);
 
-		if (ioremap_try_huge_pmd(pmd, addr, next, phys_addr, prot))
+		if (ioremap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) {
+			*mask |= PGTBL_PMD_MODIFIED;
 			continue;
+		}
 
-		if (ioremap_pte_range(pmd, addr, next, phys_addr, prot))
+		if (ioremap_pte_range(pmd, addr, next, phys_addr, prot, mask))
 			return -ENOMEM;
 	} while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
 	return 0;
@@ -144,21 +149,24 @@ static int ioremap_try_huge_pud(pud_t *pud, unsigned long addr,
 }
 
 static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
+		pgtbl_mod_mask *mask)
 {
 	pud_t *pud;
 	unsigned long next;
 
-	pud = pud_alloc(&init_mm, p4d, addr);
+	pud = pud_alloc_track(&init_mm, p4d, addr, mask);
 	if (!pud)
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
 
-		if (ioremap_try_huge_pud(pud, addr, next, phys_addr, prot))
+		if (ioremap_try_huge_pud(pud, addr, next, phys_addr, prot)) {
+			*mask |= PGTBL_PUD_MODIFIED;
 			continue;
+		}
 
-		if (ioremap_pmd_range(pud, addr, next, phys_addr, prot))
+		if (ioremap_pmd_range(pud, addr, next, phys_addr, prot, mask))
 			return -ENOMEM;
 	} while (pud++, phys_addr += (next - addr), addr = next, addr != end);
 	return 0;
@@ -187,21 +195,24 @@ static int ioremap_try_huge_p4d(p4d_t *p4d, unsigned long addr,
 }
 
 static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
+		pgtbl_mod_mask *mask)
 {
 	p4d_t *p4d;
 	unsigned long next;
 
-	p4d = p4d_alloc(&init_mm, pgd, addr);
+	p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
 	if (!p4d)
 		return -ENOMEM;
 	do {
 		next = p4d_addr_end(addr, end);
 
-		if (ioremap_try_huge_p4d(p4d, addr, next, phys_addr, prot))
+		if (ioremap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) {
+			*mask |= PGTBL_P4D_MODIFIED;
 			continue;
+		}
 
-		if (ioremap_pud_range(p4d, addr, next, phys_addr, prot))
+		if (ioremap_pud_range(p4d, addr, next, phys_addr, prot, mask))
 			return -ENOMEM;
 	} while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
 	return 0;
@@ -214,6 +225,7 @@ int ioremap_page_range(unsigned long addr,
 	unsigned long start;
 	unsigned long next;
 	int err;
+	pgtbl_mod_mask mask = 0;
 
 	might_sleep();
 	BUG_ON(addr >= end);
@@ -222,13 +234,17 @@ int ioremap_page_range(unsigned long addr,
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = ioremap_p4d_range(pgd, addr, next, phys_addr, prot);
+		err = ioremap_p4d_range(pgd, addr, next, phys_addr, prot,
+					&mask);
 		if (err)
 			break;
 	} while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
 
 	flush_cache_vmap(start, end);
 
+	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
+		arch_sync_kernel_mappings(start, end);
+
 	return err;
 }
 
diff --git a/lib/kobject.c b/lib/kobject.c
index 83198cb37d8d..65fa7bf70c57 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2006-2007 Greg Kroah-Hartman <greg@kroah.com>
  * Copyright (c) 2006-2007 Novell Inc.
  *
- * Please see the file Documentation/kobject.txt for critical information
+ * Please see the file Documentation/core-api/kobject.rst for critical information
  * about using the kobject interface.
  */
 
@@ -670,7 +670,7 @@ static void kobject_cleanup(struct kobject *kobj)
 		 kobject_name(kobj), kobj, __func__, kobj->parent);
 
 	if (t && !t->release)
-		pr_debug("kobject: '%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt.\n",
+		pr_debug("kobject: '%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
 			 kobject_name(kobj), kobj);
 
 	/* send "remove" if the caller did not do it but sent "add" */
diff --git a/lib/linear_ranges.c b/lib/linear_ranges.c
new file mode 100644
index 000000000000..9495ef3572b7
--- /dev/null
+++ b/lib/linear_ranges.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * helpers to map values in a linear range to range index
+ *
+ * Original idea borrowed from regulator framework
+ *
+ * It might be useful if we could support also inversely proportional ranges?
+ * Copyright 2020 ROHM Semiconductors
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/linear_range.h>
+#include <linux/module.h>
+
+/**
+ * linear_range_values_in_range - return the amount of values in a range
+ * @r:		pointer to linear range where values are counted
+ *
+ * Compute the amount of values in range pointed by @r. Note, values can
+ * be all equal - range with selectors 0,...,2 with step 0 still contains
+ * 3 values even though they are all equal.
+ *
+ * Return: the amount of values in range pointed by @r
+ */
+unsigned int linear_range_values_in_range(const struct linear_range *r)
+{
+	if (!r)
+		return 0;
+	return r->max_sel - r->min_sel + 1;
+}
+EXPORT_SYMBOL_GPL(linear_range_values_in_range);
+
+/**
+ * linear_range_values_in_range_array - return the amount of values in ranges
+ * @r:		pointer to array of linear ranges where values are counted
+ * @ranges:	amount of ranges we include in computation.
+ *
+ * Compute the amount of values in ranges pointed by @r. Note, values can
+ * be all equal - range with selectors 0,...,2 with step 0 still contains
+ * 3 values even though they are all equal.
+ *
+ * Return: the amount of values in first @ranges ranges pointed by @r
+ */
+unsigned int linear_range_values_in_range_array(const struct linear_range *r,
+						int ranges)
+{
+	int i, values_in_range = 0;
+
+	for (i = 0; i < ranges; i++) {
+		int values;
+
+		values = linear_range_values_in_range(&r[i]);
+		if (!values)
+			return values;
+
+		values_in_range += values;
+	}
+	return values_in_range;
+}
+EXPORT_SYMBOL_GPL(linear_range_values_in_range_array);
+
+/**
+ * linear_range_get_max_value - return the largest value in a range
+ * @r:		pointer to linear range where value is looked from
+ *
+ * Return: the largest value in the given range
+ */
+unsigned int linear_range_get_max_value(const struct linear_range *r)
+{
+	return r->min + (r->max_sel - r->min_sel) * r->step;
+}
+EXPORT_SYMBOL_GPL(linear_range_get_max_value);
+
+/**
+ * linear_range_get_value - fetch a value from given range
+ * @r:		pointer to linear range where value is looked from
+ * @selector:	selector for which the value is searched
+ * @val:	address where found value is updated
+ *
+ * Search given ranges for value which matches given selector.
+ *
+ * Return: 0 on success, -EINVAL given selector is not found from any of the
+ * ranges.
+ */
+int linear_range_get_value(const struct linear_range *r, unsigned int selector,
+			   unsigned int *val)
+{
+	if (r->min_sel > selector || r->max_sel < selector)
+		return -EINVAL;
+
+	*val = r->min + (selector - r->min_sel) * r->step;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(linear_range_get_value);
+
+/**
+ * linear_range_get_value_array - fetch a value from array of ranges
+ * @r:		pointer to array of linear ranges where value is looked from
+ * @ranges:	amount of ranges in an array
+ * @selector:	selector for which the value is searched
+ * @val:	address where found value is updated
+ *
+ * Search through an array of ranges for value which matches given selector.
+ *
+ * Return: 0 on success, -EINVAL given selector is not found from any of the
+ * ranges.
+ */
+int linear_range_get_value_array(const struct linear_range *r, int ranges,
+				 unsigned int selector, unsigned int *val)
+{
+	int i;
+
+	for (i = 0; i < ranges; i++)
+		if (r[i].min_sel <= selector && r[i].max_sel >= selector)
+			return linear_range_get_value(&r[i], selector, val);
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(linear_range_get_value_array);
+
+/**
+ * linear_range_get_selector_low - return linear range selector for value
+ * @r:		pointer to linear range where selector is looked from
+ * @val:	value for which the selector is searched
+ * @selector:	address where found selector value is updated
+ * @found:	flag to indicate that given value was in the range
+ *
+ * Return selector which which range value is closest match for given
+ * input value. Value is matching if it is equal or smaller than given
+ * value. If given value is in the range, then @found is set true.
+ *
+ * Return: 0 on success, -EINVAL if range is invalid or does not contain
+ * value smaller or equal to given value
+ */
+int linear_range_get_selector_low(const struct linear_range *r,
+				  unsigned int val, unsigned int *selector,
+				  bool *found)
+{
+	*found = false;
+
+	if (r->min > val)
+		return -EINVAL;
+
+	if (linear_range_get_max_value(r) < val) {
+		*selector = r->max_sel;
+		return 0;
+	}
+
+	*found = true;
+
+	if (r->step == 0)
+		*selector = r->min_sel;
+	else
+		*selector = (val - r->min) / r->step + r->min_sel;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(linear_range_get_selector_low);
+
+/**
+ * linear_range_get_selector_low_array - return linear range selector for value
+ * @r:		pointer to array of linear ranges where selector is looked from
+ * @ranges:	amount of ranges to scan from array
+ * @val:	value for which the selector is searched
+ * @selector:	address where found selector value is updated
+ * @found:	flag to indicate that given value was in the range
+ *
+ * Scan array of ranges for selector which which range value matches given
+ * input value. Value is matching if it is equal or smaller than given
+ * value. If given value is found to be in a range scanning is stopped and
+ * @found is set true. If a range with values smaller than given value is found
+ * but the range max is being smaller than given value, then the ranges
+ * biggest selector is updated to @selector but scanning ranges is continued
+ * and @found is set to false.
+ *
+ * Return: 0 on success, -EINVAL if range array is invalid or does not contain
+ * range with a value smaller or equal to given value
+ */
+int linear_range_get_selector_low_array(const struct linear_range *r,
+					int ranges, unsigned int val,
+					unsigned int *selector, bool *found)
+{
+	int i;
+	int ret = -EINVAL;
+
+	for (i = 0; i < ranges; i++) {
+		int tmpret;
+
+		tmpret = linear_range_get_selector_low(&r[i], val, selector,
+						       found);
+		if (!tmpret)
+			ret = 0;
+
+		if (*found)
+			break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(linear_range_get_selector_low_array);
+
+/**
+ * linear_range_get_selector_high - return linear range selector for value
+ * @r:		pointer to linear range where selector is looked from
+ * @val:	value for which the selector is searched
+ * @selector:	address where found selector value is updated
+ * @found:	flag to indicate that given value was in the range
+ *
+ * Return selector which which range value is closest match for given
+ * input value. Value is matching if it is equal or higher than given
+ * value. If given value is in the range, then @found is set true.
+ *
+ * Return: 0 on success, -EINVAL if range is invalid or does not contain
+ * value greater or equal to given value
+ */
+int linear_range_get_selector_high(const struct linear_range *r,
+				   unsigned int val, unsigned int *selector,
+				   bool *found)
+{
+	*found = false;
+
+	if (linear_range_get_max_value(r) < val)
+		return -EINVAL;
+
+	if (r->min > val) {
+		*selector = r->min_sel;
+		return 0;
+	}
+
+	*found = true;
+
+	if (r->step == 0)
+		*selector = r->max_sel;
+	else
+		*selector = DIV_ROUND_UP(val - r->min, r->step) + r->min_sel;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(linear_range_get_selector_high);
+
+MODULE_DESCRIPTION("linear-ranges helper");
+MODULE_LICENSE("GPL");
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index 891e1c3549c4..afbd99987cf8 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -653,7 +653,7 @@ do {						\
 	**************  MIPS/64  **************
 	***************************************/
 #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
+#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 && defined(CONFIG_CC_IS_GCC)
 /*
  * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C
  * code below, so we special case MIPS64r6 until the compiler can do better.
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 2ee6ae3b0ade..34e406fe561f 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
 #include <linux/percpu.h>
+#include <linux/local_lock.h>
 #include <linux/preempt.h>		/* in_interrupt() */
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
@@ -27,7 +28,6 @@
 #include <linux/string.h>
 #include <linux/xarray.h>
 
-
 /*
  * Radix tree node cache.
  */
@@ -58,12 +58,10 @@ struct kmem_cache *radix_tree_node_cachep;
 /*
  * Per-cpu pool of preloaded nodes
  */
-struct radix_tree_preload {
-	unsigned nr;
-	/* nodes->parent points to next preallocated node */
-	struct radix_tree_node *nodes;
+DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = {
+	.lock = INIT_LOCAL_LOCK(lock),
 };
-static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
+EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads);
 
 static inline struct radix_tree_node *entry_to_node(void *ptr)
 {
@@ -332,14 +330,14 @@ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
 	 */
 	gfp_mask &= ~__GFP_ACCOUNT;
 
-	preempt_disable();
+	local_lock(&radix_tree_preloads.lock);
 	rtp = this_cpu_ptr(&radix_tree_preloads);
 	while (rtp->nr < nr) {
-		preempt_enable();
+		local_unlock(&radix_tree_preloads.lock);
 		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
 		if (node == NULL)
 			goto out;
-		preempt_disable();
+		local_lock(&radix_tree_preloads.lock);
 		rtp = this_cpu_ptr(&radix_tree_preloads);
 		if (rtp->nr < nr) {
 			node->parent = rtp->nodes;
@@ -381,7 +379,7 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
 	if (gfpflags_allow_blocking(gfp_mask))
 		return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
 	/* Preloading doesn't help anything with this gfp mask, skip it */
-	preempt_disable();
+	local_lock(&radix_tree_preloads.lock);
 	return 0;
 }
 EXPORT_SYMBOL(radix_tree_maybe_preload);
@@ -1470,7 +1468,7 @@ EXPORT_SYMBOL(radix_tree_tagged);
 void idr_preload(gfp_t gfp_mask)
 {
 	if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
-		preempt_disable();
+		local_lock(&radix_tree_preloads.lock);
 }
 EXPORT_SYMBOL(idr_preload);
 
diff --git a/lib/sha1.c b/lib/sha1.c
index 1d96d2c02b82..49257a915bb6 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/bitops.h>
-#include <linux/cryptohash.h>
+#include <crypto/sha.h>
 #include <asm/unaligned.h>
 
 /*
@@ -64,22 +64,24 @@
 #define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) ,  0xca62c1d6, A, B, C, D, E )
 
 /**
- * sha_transform - single block SHA1 transform
+ * sha1_transform - single block SHA1 transform (deprecated)
  *
  * @digest: 160 bit digest to update
  * @data:   512 bits of data to hash
  * @array:  16 words of workspace (see note)
  *
- * This function generates a SHA1 digest for a single 512-bit block.
- * Be warned, it does not handle padding and message digest, do not
- * confuse it with the full FIPS 180-1 digest algorithm for variable
- * length messages.
+ * This function executes SHA-1's internal compression function.  It updates the
+ * 160-bit internal state (@digest) with a single 512-bit data block (@data).
+ *
+ * Don't use this function.  SHA-1 is no longer considered secure.  And even if
+ * you do have to use SHA-1, this isn't the correct way to hash something with
+ * SHA-1 as this doesn't handle padding and finalization.
  *
  * Note: If the hash is security sensitive, the caller should be sure
  * to clear the workspace. This is left to the caller to avoid
  * unnecessary clears between chained hashing operations.
  */
-void sha_transform(__u32 *digest, const char *data, __u32 *array)
+void sha1_transform(__u32 *digest, const char *data, __u32 *array)
 {
 	__u32 A, B, C, D, E;
 
@@ -185,13 +187,13 @@ void sha_transform(__u32 *digest, const char *data, __u32 *array)
 	digest[3] += D;
 	digest[4] += E;
 }
-EXPORT_SYMBOL(sha_transform);
+EXPORT_SYMBOL(sha1_transform);
 
 /**
- * sha_init - initialize the vectors for a SHA1 digest
+ * sha1_init - initialize the vectors for a SHA1 digest
  * @buf: vector to initialize
  */
-void sha_init(__u32 *buf)
+void sha1_init(__u32 *buf)
 {
 	buf[0] = 0x67452301;
 	buf[1] = 0xefcdab89;
@@ -199,4 +201,4 @@ void sha_init(__u32 *buf)
 	buf[3] = 0x10325476;
 	buf[4] = 0xc3d2e1f0;
 }
-EXPORT_SYMBOL(sha_init);
+EXPORT_SYMBOL(sha1_init);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 706020b06617..b90ec550183a 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -116,9 +116,9 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
 
 		kasan_check_write(dst, count);
 		check_object_size(dst, count, false);
-		if (user_access_begin(src, max)) {
+		if (user_read_access_begin(src, max)) {
 			retval = do_strncpy_from_user(dst, src, count, max);
-			user_access_end();
+			user_read_access_end();
 			return retval;
 		}
 	}
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 41670d4a5816..1616710b8a82 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -109,9 +109,9 @@ long strnlen_user(const char __user *str, long count)
 		if (max > count)
 			max = count;
 
-		if (user_access_begin(str, max)) {
+		if (user_read_access_begin(str, max)) {
 			retval = do_strnlen_user(str, count, max);
-			user_access_end();
+			user_read_access_end();
 			return retval;
 		}
 	}
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
new file mode 100644
index 000000000000..5c1858e325ba
--- /dev/null
+++ b/lib/test_hmm.c
@@ -0,0 +1,1164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is a module to test the HMM (Heterogeneous Memory Management)
+ * mirror and zone device private memory migration APIs of the kernel.
+ * Userspace programs can register with the driver to mirror their own address
+ * space and can use the device to read/write any valid virtual address.
+ */
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/delay.h>
+#include <linux/pagemap.h>
+#include <linux/hmm.h>
+#include <linux/vmalloc.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/sched/mm.h>
+#include <linux/platform_device.h>
+
+#include "test_hmm_uapi.h"
+
+#define DMIRROR_NDEVICES		2
+#define DMIRROR_RANGE_FAULT_TIMEOUT	1000
+#define DEVMEM_CHUNK_SIZE		(256 * 1024 * 1024U)
+#define DEVMEM_CHUNKS_RESERVE		16
+
+static const struct dev_pagemap_ops dmirror_devmem_ops;
+static const struct mmu_interval_notifier_ops dmirror_min_ops;
+static dev_t dmirror_dev;
+static struct page *dmirror_zero_page;
+
+struct dmirror_device;
+
+struct dmirror_bounce {
+	void			*ptr;
+	unsigned long		size;
+	unsigned long		addr;
+	unsigned long		cpages;
+};
+
+#define DPT_XA_TAG_WRITE 3UL
+
+/*
+ * Data structure to track address ranges and register for mmu interval
+ * notifier updates.
+ */
+struct dmirror_interval {
+	struct mmu_interval_notifier	notifier;
+	struct dmirror			*dmirror;
+};
+
+/*
+ * Data attached to the open device file.
+ * Note that it might be shared after a fork().
+ */
+struct dmirror {
+	struct dmirror_device		*mdevice;
+	struct xarray			pt;
+	struct mmu_interval_notifier	notifier;
+	struct mutex			mutex;
+};
+
+/*
+ * ZONE_DEVICE pages for migration and simulating device memory.
+ */
+struct dmirror_chunk {
+	struct dev_pagemap	pagemap;
+	struct dmirror_device	*mdevice;
+};
+
+/*
+ * Per device data.
+ */
+struct dmirror_device {
+	struct cdev		cdevice;
+	struct hmm_devmem	*devmem;
+
+	unsigned int		devmem_capacity;
+	unsigned int		devmem_count;
+	struct dmirror_chunk	**devmem_chunks;
+	struct mutex		devmem_lock;	/* protects the above */
+
+	unsigned long		calloc;
+	unsigned long		cfree;
+	struct page		*free_pages;
+	spinlock_t		lock;		/* protects the above */
+};
+
+static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES];
+
+static int dmirror_bounce_init(struct dmirror_bounce *bounce,
+			       unsigned long addr,
+			       unsigned long size)
+{
+	bounce->addr = addr;
+	bounce->size = size;
+	bounce->cpages = 0;
+	bounce->ptr = vmalloc(size);
+	if (!bounce->ptr)
+		return -ENOMEM;
+	return 0;
+}
+
+static void dmirror_bounce_fini(struct dmirror_bounce *bounce)
+{
+	vfree(bounce->ptr);
+}
+
+static int dmirror_fops_open(struct inode *inode, struct file *filp)
+{
+	struct cdev *cdev = inode->i_cdev;
+	struct dmirror *dmirror;
+	int ret;
+
+	/* Mirror this process address space */
+	dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL);
+	if (dmirror == NULL)
+		return -ENOMEM;
+
+	dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice);
+	mutex_init(&dmirror->mutex);
+	xa_init(&dmirror->pt);
+
+	ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm,
+				0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops);
+	if (ret) {
+		kfree(dmirror);
+		return ret;
+	}
+
+	filp->private_data = dmirror;
+	return 0;
+}
+
+static int dmirror_fops_release(struct inode *inode, struct file *filp)
+{
+	struct dmirror *dmirror = filp->private_data;
+
+	mmu_interval_notifier_remove(&dmirror->notifier);
+	xa_destroy(&dmirror->pt);
+	kfree(dmirror);
+	return 0;
+}
+
+static struct dmirror_device *dmirror_page_to_device(struct page *page)
+
+{
+	return container_of(page->pgmap, struct dmirror_chunk,
+			    pagemap)->mdevice;
+}
+
+static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range)
+{
+	unsigned long *pfns = range->hmm_pfns;
+	unsigned long pfn;
+
+	for (pfn = (range->start >> PAGE_SHIFT);
+	     pfn < (range->end >> PAGE_SHIFT);
+	     pfn++, pfns++) {
+		struct page *page;
+		void *entry;
+
+		/*
+		 * Since we asked for hmm_range_fault() to populate pages,
+		 * it shouldn't return an error entry on success.
+		 */
+		WARN_ON(*pfns & HMM_PFN_ERROR);
+		WARN_ON(!(*pfns & HMM_PFN_VALID));
+
+		page = hmm_pfn_to_page(*pfns);
+		WARN_ON(!page);
+
+		entry = page;
+		if (*pfns & HMM_PFN_WRITE)
+			entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
+		else if (WARN_ON(range->default_flags & HMM_PFN_WRITE))
+			return -EFAULT;
+		entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
+		if (xa_is_err(entry))
+			return xa_err(entry);
+	}
+
+	return 0;
+}
+
+static void dmirror_do_update(struct dmirror *dmirror, unsigned long start,
+			      unsigned long end)
+{
+	unsigned long pfn;
+	void *entry;
+
+	/*
+	 * The XArray doesn't hold references to pages since it relies on
+	 * the mmu notifier to clear page pointers when they become stale.
+	 * Therefore, it is OK to just clear the entry.
+	 */
+	xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT,
+			  end >> PAGE_SHIFT)
+		xa_erase(&dmirror->pt, pfn);
+}
+
+static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni,
+				const struct mmu_notifier_range *range,
+				unsigned long cur_seq)
+{
+	struct dmirror *dmirror = container_of(mni, struct dmirror, notifier);
+
+	if (mmu_notifier_range_blockable(range))
+		mutex_lock(&dmirror->mutex);
+	else if (!mutex_trylock(&dmirror->mutex))
+		return false;
+
+	mmu_interval_set_seq(mni, cur_seq);
+	dmirror_do_update(dmirror, range->start, range->end);
+
+	mutex_unlock(&dmirror->mutex);
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops dmirror_min_ops = {
+	.invalidate = dmirror_interval_invalidate,
+};
+
+static int dmirror_range_fault(struct dmirror *dmirror,
+				struct hmm_range *range)
+{
+	struct mm_struct *mm = dmirror->notifier.mm;
+	unsigned long timeout =
+		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+	int ret;
+
+	while (true) {
+		if (time_after(jiffies, timeout)) {
+			ret = -EBUSY;
+			goto out;
+		}
+
+		range->notifier_seq = mmu_interval_read_begin(range->notifier);
+		down_read(&mm->mmap_sem);
+		ret = hmm_range_fault(range);
+		up_read(&mm->mmap_sem);
+		if (ret) {
+			if (ret == -EBUSY)
+				continue;
+			goto out;
+		}
+
+		mutex_lock(&dmirror->mutex);
+		if (mmu_interval_read_retry(range->notifier,
+					    range->notifier_seq)) {
+			mutex_unlock(&dmirror->mutex);
+			continue;
+		}
+		break;
+	}
+
+	ret = dmirror_do_fault(dmirror, range);
+
+	mutex_unlock(&dmirror->mutex);
+out:
+	return ret;
+}
+
+static int dmirror_fault(struct dmirror *dmirror, unsigned long start,
+			 unsigned long end, bool write)
+{
+	struct mm_struct *mm = dmirror->notifier.mm;
+	unsigned long addr;
+	unsigned long pfns[64];
+	struct hmm_range range = {
+		.notifier = &dmirror->notifier,
+		.hmm_pfns = pfns,
+		.pfn_flags_mask = 0,
+		.default_flags =
+			HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0),
+		.dev_private_owner = dmirror->mdevice,
+	};
+	int ret = 0;
+
+	/* Since the mm is for the mirrored process, get a reference first. */
+	if (!mmget_not_zero(mm))
+		return 0;
+
+	for (addr = start; addr < end; addr = range.end) {
+		range.start = addr;
+		range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
+
+		ret = dmirror_range_fault(dmirror, &range);
+		if (ret)
+			break;
+	}
+
+	mmput(mm);
+	return ret;
+}
+
+static int dmirror_do_read(struct dmirror *dmirror, unsigned long start,
+			   unsigned long end, struct dmirror_bounce *bounce)
+{
+	unsigned long pfn;
+	void *ptr;
+
+	ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
+
+	for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
+		void *entry;
+		struct page *page;
+		void *tmp;
+
+		entry = xa_load(&dmirror->pt, pfn);
+		page = xa_untag_pointer(entry);
+		if (!page)
+			return -ENOENT;
+
+		tmp = kmap(page);
+		memcpy(ptr, tmp, PAGE_SIZE);
+		kunmap(page);
+
+		ptr += PAGE_SIZE;
+		bounce->cpages++;
+	}
+
+	return 0;
+}
+
+static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
+{
+	struct dmirror_bounce bounce;
+	unsigned long start, end;
+	unsigned long size = cmd->npages << PAGE_SHIFT;
+	int ret;
+
+	start = cmd->addr;
+	end = start + size;
+	if (end < start)
+		return -EINVAL;
+
+	ret = dmirror_bounce_init(&bounce, start, size);
+	if (ret)
+		return ret;
+
+	while (1) {
+		mutex_lock(&dmirror->mutex);
+		ret = dmirror_do_read(dmirror, start, end, &bounce);
+		mutex_unlock(&dmirror->mutex);
+		if (ret != -ENOENT)
+			break;
+
+		start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
+		ret = dmirror_fault(dmirror, start, end, false);
+		if (ret)
+			break;
+		cmd->faults++;
+	}
+
+	if (ret == 0) {
+		if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
+				 bounce.size))
+			ret = -EFAULT;
+	}
+	cmd->cpages = bounce.cpages;
+	dmirror_bounce_fini(&bounce);
+	return ret;
+}
+
+static int dmirror_do_write(struct dmirror *dmirror, unsigned long start,
+			    unsigned long end, struct dmirror_bounce *bounce)
+{
+	unsigned long pfn;
+	void *ptr;
+
+	ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
+
+	for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
+		void *entry;
+		struct page *page;
+		void *tmp;
+
+		entry = xa_load(&dmirror->pt, pfn);
+		page = xa_untag_pointer(entry);
+		if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE)
+			return -ENOENT;
+
+		tmp = kmap(page);
+		memcpy(tmp, ptr, PAGE_SIZE);
+		kunmap(page);
+
+		ptr += PAGE_SIZE;
+		bounce->cpages++;
+	}
+
+	return 0;
+}
+
+static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
+{
+	struct dmirror_bounce bounce;
+	unsigned long start, end;
+	unsigned long size = cmd->npages << PAGE_SHIFT;
+	int ret;
+
+	start = cmd->addr;
+	end = start + size;
+	if (end < start)
+		return -EINVAL;
+
+	ret = dmirror_bounce_init(&bounce, start, size);
+	if (ret)
+		return ret;
+	if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr),
+			   bounce.size)) {
+		ret = -EFAULT;
+		goto fini;
+	}
+
+	while (1) {
+		mutex_lock(&dmirror->mutex);
+		ret = dmirror_do_write(dmirror, start, end, &bounce);
+		mutex_unlock(&dmirror->mutex);
+		if (ret != -ENOENT)
+			break;
+
+		start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
+		ret = dmirror_fault(dmirror, start, end, true);
+		if (ret)
+			break;
+		cmd->faults++;
+	}
+
+fini:
+	cmd->cpages = bounce.cpages;
+	dmirror_bounce_fini(&bounce);
+	return ret;
+}
+
+static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
+				   struct page **ppage)
+{
+	struct dmirror_chunk *devmem;
+	struct resource *res;
+	unsigned long pfn;
+	unsigned long pfn_first;
+	unsigned long pfn_last;
+	void *ptr;
+
+	mutex_lock(&mdevice->devmem_lock);
+
+	if (mdevice->devmem_count == mdevice->devmem_capacity) {
+		struct dmirror_chunk **new_chunks;
+		unsigned int new_capacity;
+
+		new_capacity = mdevice->devmem_capacity +
+				DEVMEM_CHUNKS_RESERVE;
+		new_chunks = krealloc(mdevice->devmem_chunks,
+				sizeof(new_chunks[0]) * new_capacity,
+				GFP_KERNEL);
+		if (!new_chunks)
+			goto err;
+		mdevice->devmem_capacity = new_capacity;
+		mdevice->devmem_chunks = new_chunks;
+	}
+
+	res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
+					"hmm_dmirror");
+	if (IS_ERR(res))
+		goto err;
+
+	devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
+	if (!devmem)
+		goto err_release;
+
+	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
+	devmem->pagemap.res = *res;
+	devmem->pagemap.ops = &dmirror_devmem_ops;
+	devmem->pagemap.owner = mdevice;
+
+	ptr = memremap_pages(&devmem->pagemap, numa_node_id());
+	if (IS_ERR(ptr))
+		goto err_free;
+
+	devmem->mdevice = mdevice;
+	pfn_first = devmem->pagemap.res.start >> PAGE_SHIFT;
+	pfn_last = pfn_first +
+		(resource_size(&devmem->pagemap.res) >> PAGE_SHIFT);
+	mdevice->devmem_chunks[mdevice->devmem_count++] = devmem;
+
+	mutex_unlock(&mdevice->devmem_lock);
+
+	pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n",
+		DEVMEM_CHUNK_SIZE / (1024 * 1024),
+		mdevice->devmem_count,
+		mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)),
+		pfn_first, pfn_last);
+
+	spin_lock(&mdevice->lock);
+	for (pfn = pfn_first; pfn < pfn_last; pfn++) {
+		struct page *page = pfn_to_page(pfn);
+
+		page->zone_device_data = mdevice->free_pages;
+		mdevice->free_pages = page;
+	}
+	if (ppage) {
+		*ppage = mdevice->free_pages;
+		mdevice->free_pages = (*ppage)->zone_device_data;
+		mdevice->calloc++;
+	}
+	spin_unlock(&mdevice->lock);
+
+	return true;
+
+err_free:
+	kfree(devmem);
+err_release:
+	release_mem_region(devmem->pagemap.res.start,
+			   resource_size(&devmem->pagemap.res));
+err:
+	mutex_unlock(&mdevice->devmem_lock);
+	return false;
+}
+
+static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
+{
+	struct page *dpage = NULL;
+	struct page *rpage;
+
+	/*
+	 * This is a fake device so we alloc real system memory to store
+	 * our device memory.
+	 */
+	rpage = alloc_page(GFP_HIGHUSER);
+	if (!rpage)
+		return NULL;
+
+	spin_lock(&mdevice->lock);
+
+	if (mdevice->free_pages) {
+		dpage = mdevice->free_pages;
+		mdevice->free_pages = dpage->zone_device_data;
+		mdevice->calloc++;
+		spin_unlock(&mdevice->lock);
+	} else {
+		spin_unlock(&mdevice->lock);
+		if (!dmirror_allocate_chunk(mdevice, &dpage))
+			goto error;
+	}
+
+	dpage->zone_device_data = rpage;
+	get_page(dpage);
+	lock_page(dpage);
+	return dpage;
+
+error:
+	__free_page(rpage);
+	return NULL;
+}
+
+static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
+					   struct dmirror *dmirror)
+{
+	struct dmirror_device *mdevice = dmirror->mdevice;
+	const unsigned long *src = args->src;
+	unsigned long *dst = args->dst;
+	unsigned long addr;
+
+	for (addr = args->start; addr < args->end; addr += PAGE_SIZE,
+						   src++, dst++) {
+		struct page *spage;
+		struct page *dpage;
+		struct page *rpage;
+
+		if (!(*src & MIGRATE_PFN_MIGRATE))
+			continue;
+
+		/*
+		 * Note that spage might be NULL which is OK since it is an
+		 * unallocated pte_none() or read-only zero page.
+		 */
+		spage = migrate_pfn_to_page(*src);
+
+		/*
+		 * Don't migrate device private pages from our own driver or
+		 * others. For our own we would do a device private memory copy
+		 * not a migration and for others, we would need to fault the
+		 * other device's page into system memory first.
+		 */
+		if (spage && is_zone_device_page(spage))
+			continue;
+
+		dpage = dmirror_devmem_alloc_page(mdevice);
+		if (!dpage)
+			continue;
+
+		rpage = dpage->zone_device_data;
+		if (spage)
+			copy_highpage(rpage, spage);
+		else
+			clear_highpage(rpage);
+
+		/*
+		 * Normally, a device would use the page->zone_device_data to
+		 * point to the mirror but here we use it to hold the page for
+		 * the simulated device memory and that page holds the pointer
+		 * to the mirror.
+		 */
+		rpage->zone_device_data = dmirror;
+
+		*dst = migrate_pfn(page_to_pfn(dpage)) |
+			    MIGRATE_PFN_LOCKED;
+		if ((*src & MIGRATE_PFN_WRITE) ||
+		    (!spage && args->vma->vm_flags & VM_WRITE))
+			*dst |= MIGRATE_PFN_WRITE;
+	}
+}
+
+static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
+					    struct dmirror *dmirror)
+{
+	unsigned long start = args->start;
+	unsigned long end = args->end;
+	const unsigned long *src = args->src;
+	const unsigned long *dst = args->dst;
+	unsigned long pfn;
+
+	/* Map the migrated pages into the device's page tables. */
+	mutex_lock(&dmirror->mutex);
+
+	for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++,
+								src++, dst++) {
+		struct page *dpage;
+		void *entry;
+
+		if (!(*src & MIGRATE_PFN_MIGRATE))
+			continue;
+
+		dpage = migrate_pfn_to_page(*dst);
+		if (!dpage)
+			continue;
+
+		/*
+		 * Store the page that holds the data so the page table
+		 * doesn't have to deal with ZONE_DEVICE private pages.
+		 */
+		entry = dpage->zone_device_data;
+		if (*dst & MIGRATE_PFN_WRITE)
+			entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
+		entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
+		if (xa_is_err(entry)) {
+			mutex_unlock(&dmirror->mutex);
+			return xa_err(entry);
+		}
+	}
+
+	mutex_unlock(&dmirror->mutex);
+	return 0;
+}
+
+static int dmirror_migrate(struct dmirror *dmirror,
+			   struct hmm_dmirror_cmd *cmd)
+{
+	unsigned long start, end, addr;
+	unsigned long size = cmd->npages << PAGE_SHIFT;
+	struct mm_struct *mm = dmirror->notifier.mm;
+	struct vm_area_struct *vma;
+	unsigned long src_pfns[64];
+	unsigned long dst_pfns[64];
+	struct dmirror_bounce bounce;
+	struct migrate_vma args;
+	unsigned long next;
+	int ret;
+
+	start = cmd->addr;
+	end = start + size;
+	if (end < start)
+		return -EINVAL;
+
+	/* Since the mm is for the mirrored process, get a reference first. */
+	if (!mmget_not_zero(mm))
+		return -EINVAL;
+
+	down_read(&mm->mmap_sem);
+	for (addr = start; addr < end; addr = next) {
+		vma = find_vma(mm, addr);
+		if (!vma || addr < vma->vm_start ||
+		    !(vma->vm_flags & VM_READ)) {
+			ret = -EINVAL;
+			goto out;
+		}
+		next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
+		if (next > vma->vm_end)
+			next = vma->vm_end;
+
+		args.vma = vma;
+		args.src = src_pfns;
+		args.dst = dst_pfns;
+		args.start = addr;
+		args.end = next;
+		args.src_owner = NULL;
+		ret = migrate_vma_setup(&args);
+		if (ret)
+			goto out;
+
+		dmirror_migrate_alloc_and_copy(&args, dmirror);
+		migrate_vma_pages(&args);
+		dmirror_migrate_finalize_and_map(&args, dmirror);
+		migrate_vma_finalize(&args);
+	}
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+
+	/* Return the migrated data for verification. */
+	ret = dmirror_bounce_init(&bounce, start, size);
+	if (ret)
+		return ret;
+	mutex_lock(&dmirror->mutex);
+	ret = dmirror_do_read(dmirror, start, end, &bounce);
+	mutex_unlock(&dmirror->mutex);
+	if (ret == 0) {
+		if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
+				 bounce.size))
+			ret = -EFAULT;
+	}
+	cmd->cpages = bounce.cpages;
+	dmirror_bounce_fini(&bounce);
+	return ret;
+
+out:
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+	return ret;
+}
+
+static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,
+			    unsigned char *perm, unsigned long entry)
+{
+	struct page *page;
+
+	if (entry & HMM_PFN_ERROR) {
+		*perm = HMM_DMIRROR_PROT_ERROR;
+		return;
+	}
+	if (!(entry & HMM_PFN_VALID)) {
+		*perm = HMM_DMIRROR_PROT_NONE;
+		return;
+	}
+
+	page = hmm_pfn_to_page(entry);
+	if (is_device_private_page(page)) {
+		/* Is the page migrated to this device or some other? */
+		if (dmirror->mdevice == dmirror_page_to_device(page))
+			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
+		else
+			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
+	} else if (is_zero_pfn(page_to_pfn(page)))
+		*perm = HMM_DMIRROR_PROT_ZERO;
+	else
+		*perm = HMM_DMIRROR_PROT_NONE;
+	if (entry & HMM_PFN_WRITE)
+		*perm |= HMM_DMIRROR_PROT_WRITE;
+	else
+		*perm |= HMM_DMIRROR_PROT_READ;
+}
+
+static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni,
+				const struct mmu_notifier_range *range,
+				unsigned long cur_seq)
+{
+	struct dmirror_interval *dmi =
+		container_of(mni, struct dmirror_interval, notifier);
+	struct dmirror *dmirror = dmi->dmirror;
+
+	if (mmu_notifier_range_blockable(range))
+		mutex_lock(&dmirror->mutex);
+	else if (!mutex_trylock(&dmirror->mutex))
+		return false;
+
+	/*
+	 * Snapshots only need to set the sequence number since any
+	 * invalidation in the interval invalidates the whole snapshot.
+	 */
+	mmu_interval_set_seq(mni, cur_seq);
+
+	mutex_unlock(&dmirror->mutex);
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops dmirror_mrn_ops = {
+	.invalidate = dmirror_snapshot_invalidate,
+};
+
+static int dmirror_range_snapshot(struct dmirror *dmirror,
+				  struct hmm_range *range,
+				  unsigned char *perm)
+{
+	struct mm_struct *mm = dmirror->notifier.mm;
+	struct dmirror_interval notifier;
+	unsigned long timeout =
+		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+	unsigned long i;
+	unsigned long n;
+	int ret = 0;
+
+	notifier.dmirror = dmirror;
+	range->notifier = &notifier.notifier;
+
+	ret = mmu_interval_notifier_insert(range->notifier, mm,
+			range->start, range->end - range->start,
+			&dmirror_mrn_ops);
+	if (ret)
+		return ret;
+
+	while (true) {
+		if (time_after(jiffies, timeout)) {
+			ret = -EBUSY;
+			goto out;
+		}
+
+		range->notifier_seq = mmu_interval_read_begin(range->notifier);
+
+		down_read(&mm->mmap_sem);
+		ret = hmm_range_fault(range);
+		up_read(&mm->mmap_sem);
+		if (ret) {
+			if (ret == -EBUSY)
+				continue;
+			goto out;
+		}
+
+		mutex_lock(&dmirror->mutex);
+		if (mmu_interval_read_retry(range->notifier,
+					    range->notifier_seq)) {
+			mutex_unlock(&dmirror->mutex);
+			continue;
+		}
+		break;
+	}
+
+	n = (range->end - range->start) >> PAGE_SHIFT;
+	for (i = 0; i < n; i++)
+		dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]);
+
+	mutex_unlock(&dmirror->mutex);
+out:
+	mmu_interval_notifier_remove(range->notifier);
+	return ret;
+}
+
+static int dmirror_snapshot(struct dmirror *dmirror,
+			    struct hmm_dmirror_cmd *cmd)
+{
+	struct mm_struct *mm = dmirror->notifier.mm;
+	unsigned long start, end;
+	unsigned long size = cmd->npages << PAGE_SHIFT;
+	unsigned long addr;
+	unsigned long next;
+	unsigned long pfns[64];
+	unsigned char perm[64];
+	char __user *uptr;
+	struct hmm_range range = {
+		.hmm_pfns = pfns,
+		.dev_private_owner = dmirror->mdevice,
+	};
+	int ret = 0;
+
+	start = cmd->addr;
+	end = start + size;
+	if (end < start)
+		return -EINVAL;
+
+	/* Since the mm is for the mirrored process, get a reference first. */
+	if (!mmget_not_zero(mm))
+		return -EINVAL;
+
+	/*
+	 * Register a temporary notifier to detect invalidations even if it
+	 * overlaps with other mmu_interval_notifiers.
+	 */
+	uptr = u64_to_user_ptr(cmd->ptr);
+	for (addr = start; addr < end; addr = next) {
+		unsigned long n;
+
+		next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
+		range.start = addr;
+		range.end = next;
+
+		ret = dmirror_range_snapshot(dmirror, &range, perm);
+		if (ret)
+			break;
+
+		n = (range.end - range.start) >> PAGE_SHIFT;
+		if (copy_to_user(uptr, perm, n)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		cmd->cpages += n;
+		uptr += n;
+	}
+	mmput(mm);
+
+	return ret;
+}
+
+static long dmirror_fops_unlocked_ioctl(struct file *filp,
+					unsigned int command,
+					unsigned long arg)
+{
+	void __user *uarg = (void __user *)arg;
+	struct hmm_dmirror_cmd cmd;
+	struct dmirror *dmirror;
+	int ret;
+
+	dmirror = filp->private_data;
+	if (!dmirror)
+		return -EINVAL;
+
+	if (copy_from_user(&cmd, uarg, sizeof(cmd)))
+		return -EFAULT;
+
+	if (cmd.addr & ~PAGE_MASK)
+		return -EINVAL;
+	if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT)))
+		return -EINVAL;
+
+	cmd.cpages = 0;
+	cmd.faults = 0;
+
+	switch (command) {
+	case HMM_DMIRROR_READ:
+		ret = dmirror_read(dmirror, &cmd);
+		break;
+
+	case HMM_DMIRROR_WRITE:
+		ret = dmirror_write(dmirror, &cmd);
+		break;
+
+	case HMM_DMIRROR_MIGRATE:
+		ret = dmirror_migrate(dmirror, &cmd);
+		break;
+
+	case HMM_DMIRROR_SNAPSHOT:
+		ret = dmirror_snapshot(dmirror, &cmd);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+	if (ret)
+		return ret;
+
+	if (copy_to_user(uarg, &cmd, sizeof(cmd)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static const struct file_operations dmirror_fops = {
+	.open		= dmirror_fops_open,
+	.release	= dmirror_fops_release,
+	.unlocked_ioctl = dmirror_fops_unlocked_ioctl,
+	.llseek		= default_llseek,
+	.owner		= THIS_MODULE,
+};
+
+static void dmirror_devmem_free(struct page *page)
+{
+	struct page *rpage = page->zone_device_data;
+	struct dmirror_device *mdevice;
+
+	if (rpage)
+		__free_page(rpage);
+
+	mdevice = dmirror_page_to_device(page);
+
+	spin_lock(&mdevice->lock);
+	mdevice->cfree++;
+	page->zone_device_data = mdevice->free_pages;
+	mdevice->free_pages = page;
+	spin_unlock(&mdevice->lock);
+}
+
+static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
+						struct dmirror_device *mdevice)
+{
+	const unsigned long *src = args->src;
+	unsigned long *dst = args->dst;
+	unsigned long start = args->start;
+	unsigned long end = args->end;
+	unsigned long addr;
+
+	for (addr = start; addr < end; addr += PAGE_SIZE,
+				       src++, dst++) {
+		struct page *dpage, *spage;
+
+		spage = migrate_pfn_to_page(*src);
+		if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
+			continue;
+		spage = spage->zone_device_data;
+
+		dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
+		if (!dpage)
+			continue;
+
+		lock_page(dpage);
+		copy_highpage(dpage, spage);
+		*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+		if (*src & MIGRATE_PFN_WRITE)
+			*dst |= MIGRATE_PFN_WRITE;
+	}
+	return 0;
+}
+
+static void dmirror_devmem_fault_finalize_and_map(struct migrate_vma *args,
+						  struct dmirror *dmirror)
+{
+	/* Invalidate the device's page table mapping. */
+	mutex_lock(&dmirror->mutex);
+	dmirror_do_update(dmirror, args->start, args->end);
+	mutex_unlock(&dmirror->mutex);
+}
+
+static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
+{
+	struct migrate_vma args;
+	unsigned long src_pfns;
+	unsigned long dst_pfns;
+	struct page *rpage;
+	struct dmirror *dmirror;
+	vm_fault_t ret;
+
+	/*
+	 * Normally, a device would use the page->zone_device_data to point to
+	 * the mirror but here we use it to hold the page for the simulated
+	 * device memory and that page holds the pointer to the mirror.
+	 */
+	rpage = vmf->page->zone_device_data;
+	dmirror = rpage->zone_device_data;
+
+	/* FIXME demonstrate how we can adjust migrate range */
+	args.vma = vmf->vma;
+	args.start = vmf->address;
+	args.end = args.start + PAGE_SIZE;
+	args.src = &src_pfns;
+	args.dst = &dst_pfns;
+	args.src_owner = dmirror->mdevice;
+
+	if (migrate_vma_setup(&args))
+		return VM_FAULT_SIGBUS;
+
+	ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror->mdevice);
+	if (ret)
+		return ret;
+	migrate_vma_pages(&args);
+	dmirror_devmem_fault_finalize_and_map(&args, dmirror);
+	migrate_vma_finalize(&args);
+	return 0;
+}
+
+static const struct dev_pagemap_ops dmirror_devmem_ops = {
+	.page_free	= dmirror_devmem_free,
+	.migrate_to_ram	= dmirror_devmem_fault,
+};
+
+static int dmirror_device_init(struct dmirror_device *mdevice, int id)
+{
+	dev_t dev;
+	int ret;
+
+	dev = MKDEV(MAJOR(dmirror_dev), id);
+	mutex_init(&mdevice->devmem_lock);
+	spin_lock_init(&mdevice->lock);
+
+	cdev_init(&mdevice->cdevice, &dmirror_fops);
+	mdevice->cdevice.owner = THIS_MODULE;
+	ret = cdev_add(&mdevice->cdevice, dev, 1);
+	if (ret)
+		return ret;
+
+	/* Build a list of free ZONE_DEVICE private struct pages */
+	dmirror_allocate_chunk(mdevice, NULL);
+
+	return 0;
+}
+
+static void dmirror_device_remove(struct dmirror_device *mdevice)
+{
+	unsigned int i;
+
+	if (mdevice->devmem_chunks) {
+		for (i = 0; i < mdevice->devmem_count; i++) {
+			struct dmirror_chunk *devmem =
+				mdevice->devmem_chunks[i];
+
+			memunmap_pages(&devmem->pagemap);
+			release_mem_region(devmem->pagemap.res.start,
+					   resource_size(&devmem->pagemap.res));
+			kfree(devmem);
+		}
+		kfree(mdevice->devmem_chunks);
+	}
+
+	cdev_del(&mdevice->cdevice);
+}
+
+static int __init hmm_dmirror_init(void)
+{
+	int ret;
+	int id;
+
+	ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES,
+				  "HMM_DMIRROR");
+	if (ret)
+		goto err_unreg;
+
+	for (id = 0; id < DMIRROR_NDEVICES; id++) {
+		ret = dmirror_device_init(dmirror_devices + id, id);
+		if (ret)
+			goto err_chrdev;
+	}
+
+	/*
+	 * Allocate a zero page to simulate a reserved page of device private
+	 * memory which is always zero. The zero_pfn page isn't used just to
+	 * make the code here simpler (i.e., we need a struct page for it).
+	 */
+	dmirror_zero_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
+	if (!dmirror_zero_page) {
+		ret = -ENOMEM;
+		goto err_chrdev;
+	}
+
+	pr_info("HMM test module loaded. This is only for testing HMM.\n");
+	return 0;
+
+err_chrdev:
+	while (--id >= 0)
+		dmirror_device_remove(dmirror_devices + id);
+	unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
+err_unreg:
+	return ret;
+}
+
+static void __exit hmm_dmirror_exit(void)
+{
+	int id;
+
+	if (dmirror_zero_page)
+		__free_page(dmirror_zero_page);
+	for (id = 0; id < DMIRROR_NDEVICES; id++)
+		dmirror_device_remove(dmirror_devices + id);
+	unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
+}
+
+module_init(hmm_dmirror_init);
+module_exit(hmm_dmirror_exit);
+MODULE_LICENSE("GPL");
diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h
new file mode 100644
index 000000000000..67b3b2e6ff5d
--- /dev/null
+++ b/lib/test_hmm_uapi.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This is a module to test the HMM (Heterogeneous Memory Management) API
+ * of the kernel. It allows a userspace program to expose its entire address
+ * space through the HMM test module device file.
+ */
+#ifndef _LIB_TEST_HMM_UAPI_H
+#define _LIB_TEST_HMM_UAPI_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * Structure to pass to the HMM test driver to mimic a device accessing
+ * system memory and ZONE_DEVICE private memory through device page tables.
+ *
+ * @addr: (in) user address the device will read/write
+ * @ptr: (in) user address where device data is copied to/from
+ * @npages: (in) number of pages to read/write
+ * @cpages: (out) number of pages copied
+ * @faults: (out) number of device page faults seen
+ */
+struct hmm_dmirror_cmd {
+	__u64		addr;
+	__u64		ptr;
+	__u64		npages;
+	__u64		cpages;
+	__u64		faults;
+};
+
+/* Expose the address space of the calling process through hmm device file */
+#define HMM_DMIRROR_READ		_IOWR('H', 0x00, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_WRITE		_IOWR('H', 0x01, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_MIGRATE		_IOWR('H', 0x02, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_SNAPSHOT		_IOWR('H', 0x03, struct hmm_dmirror_cmd)
+
+/*
+ * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
+ * HMM_DMIRROR_PROT_ERROR: no valid mirror PTE for this page
+ * HMM_DMIRROR_PROT_NONE: unpopulated PTE or PTE with no access
+ * HMM_DMIRROR_PROT_READ: read-only PTE
+ * HMM_DMIRROR_PROT_WRITE: read/write PTE
+ * HMM_DMIRROR_PROT_ZERO: special read-only zero page
+ * HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL: Migrated device private page on the
+ *					device the ioctl() is made
+ * HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
+ *					other device
+ */
+enum {
+	HMM_DMIRROR_PROT_ERROR			= 0xFF,
+	HMM_DMIRROR_PROT_NONE			= 0x00,
+	HMM_DMIRROR_PROT_READ			= 0x01,
+	HMM_DMIRROR_PROT_WRITE			= 0x02,
+	HMM_DMIRROR_PROT_ZERO			= 0x10,
+	HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL	= 0x20,
+	HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE	= 0x30,
+};
+
+#endif /* _LIB_TEST_HMM_UAPI_H */
diff --git a/lib/test_linear_ranges.c b/lib/test_linear_ranges.c
new file mode 100644
index 000000000000..676e0b8abcdd
--- /dev/null
+++ b/lib/test_linear_ranges.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test for the linear_ranges helper.
+ *
+ * Copyright (C) 2020, ROHM Semiconductors.
+ * Author: Matti Vaittinen <matti.vaittien@fi.rohmeurope.com>
+ */
+#include <kunit/test.h>
+
+#include <linux/linear_range.h>
+
+/* First things first. I deeply dislike unit-tests. I have seen all the hell
+ * breaking loose when people who think the unit tests are "the silver bullet"
+ * to kill bugs get to decide how a company should implement testing strategy...
+ *
+ * Believe me, it may get _really_ ridiculous. It is tempting to think that
+ * walking through all the possible execution branches will nail down 100% of
+ * bugs. This may lead to ideas about demands to get certain % of "test
+ * coverage" - measured as line coverage. And that is one of the worst things
+ * you can do.
+ *
+ * Ask people to provide line coverage and they do. I've seen clever tools
+ * which generate test cases to test the existing functions - and by default
+ * these tools expect code to be correct and just generate checks which are
+ * passing when ran against current code-base. Run this generator and you'll get
+ * tests that do not test code is correct but just verify nothing changes.
+ * Problem is that testing working code is pointless. And if it is not
+ * working, your test must not assume it is working. You won't catch any bugs
+ * by such tests. What you can do is to generate a huge amount of tests.
+ * Especially if you were are asked to proivde 100% line-coverage x_x. So what
+ * does these tests - which are not finding any bugs now - do?
+ *
+ * They add inertia to every future development. I think it was Terry Pratchet
+ * who wrote someone having same impact as thick syrup has to chronometre.
+ * Excessive amount of unit-tests have this effect to development. If you do
+ * actually find _any_ bug from code in such environment and try fixing it...
+ * ...chances are you also need to fix the test cases. In sunny day you fix one
+ * test. But I've done refactoring which resulted 500+ broken tests (which had
+ * really zero value other than proving to managers that we do do "quality")...
+ *
+ * After this being said - there are situations where UTs can be handy. If you
+ * have algorithms which take some input and should produce output - then you
+ * can implement few, carefully selected simple UT-cases which test this. I've
+ * previously used this for example for netlink and device-tree data parsing
+ * functions. Feed some data examples to functions and verify the output is as
+ * expected. I am not covering all the cases but I will see the logic should be
+ * working.
+ *
+ * Here we also do some minor testing. I don't want to go through all branches
+ * or test more or less obvious things - but I want to see the main logic is
+ * working. And I definitely don't want to add 500+ test cases that break when
+ * some simple fix is done x_x. So - let's only add few, well selected tests
+ * which ensure as much logic is good as possible.
+ */
+
+/*
+ * Test Range 1:
+ * selectors:	2	3	4	5	6
+ * values (5):	10	20	30	40	50
+ *
+ * Test Range 2:
+ * selectors:	7	8	9	10
+ * values (4):	100	150	200	250
+ */
+
+#define RANGE1_MIN 10
+#define RANGE1_MIN_SEL 2
+#define RANGE1_STEP 10
+
+/* 2, 3, 4, 5, 6 */
+static const unsigned int range1_sels[] = { RANGE1_MIN_SEL, RANGE1_MIN_SEL + 1,
+					    RANGE1_MIN_SEL + 2,
+					    RANGE1_MIN_SEL + 3,
+					    RANGE1_MIN_SEL + 4 };
+/* 10, 20, 30, 40, 50 */
+static const unsigned int range1_vals[] = { RANGE1_MIN, RANGE1_MIN +
+					    RANGE1_STEP,
+					    RANGE1_MIN + RANGE1_STEP * 2,
+					    RANGE1_MIN + RANGE1_STEP * 3,
+					    RANGE1_MIN + RANGE1_STEP * 4 };
+
+#define RANGE2_MIN 100
+#define RANGE2_MIN_SEL 7
+#define RANGE2_STEP 50
+
+/*  7, 8, 9, 10 */
+static const unsigned int range2_sels[] = { RANGE2_MIN_SEL, RANGE2_MIN_SEL + 1,
+					    RANGE2_MIN_SEL + 2,
+					    RANGE2_MIN_SEL + 3 };
+/* 100, 150, 200, 250 */
+static const unsigned int range2_vals[] = { RANGE2_MIN, RANGE2_MIN +
+					    RANGE2_STEP,
+					    RANGE2_MIN + RANGE2_STEP * 2,
+					    RANGE2_MIN + RANGE2_STEP * 3 };
+
+#define RANGE1_NUM_VALS (ARRAY_SIZE(range1_vals))
+#define RANGE2_NUM_VALS (ARRAY_SIZE(range2_vals))
+#define RANGE_NUM_VALS (RANGE1_NUM_VALS + RANGE2_NUM_VALS)
+
+#define RANGE1_MAX_SEL (RANGE1_MIN_SEL + RANGE1_NUM_VALS - 1)
+#define RANGE1_MAX_VAL (range1_vals[RANGE1_NUM_VALS - 1])
+
+#define RANGE2_MAX_SEL (RANGE2_MIN_SEL + RANGE2_NUM_VALS - 1)
+#define RANGE2_MAX_VAL (range2_vals[RANGE2_NUM_VALS - 1])
+
+#define SMALLEST_SEL RANGE1_MIN_SEL
+#define SMALLEST_VAL RANGE1_MIN
+
+static struct linear_range testr[] = {
+	{
+		.min = RANGE1_MIN,
+		.min_sel = RANGE1_MIN_SEL,
+		.max_sel = RANGE1_MAX_SEL,
+		.step = RANGE1_STEP,
+	}, {
+		.min = RANGE2_MIN,
+		.min_sel = RANGE2_MIN_SEL,
+		.max_sel = RANGE2_MAX_SEL,
+		.step = RANGE2_STEP
+	},
+};
+
+static void range_test_get_value(struct kunit *test)
+{
+	int ret, i;
+	unsigned int sel, val;
+
+	for (i = 0; i < RANGE1_NUM_VALS; i++) {
+		sel = range1_sels[i];
+		ret = linear_range_get_value_array(&testr[0], 2, sel, &val);
+		KUNIT_EXPECT_EQ(test, 0, ret);
+		KUNIT_EXPECT_EQ(test, val, range1_vals[i]);
+	}
+	for (i = 0; i < RANGE2_NUM_VALS; i++) {
+		sel = range2_sels[i];
+		ret = linear_range_get_value_array(&testr[0], 2, sel, &val);
+		KUNIT_EXPECT_EQ(test, 0, ret);
+		KUNIT_EXPECT_EQ(test, val, range2_vals[i]);
+	}
+	ret = linear_range_get_value_array(&testr[0], 2, sel + 1, &val);
+	KUNIT_EXPECT_NE(test, 0, ret);
+}
+
+static void range_test_get_selector_high(struct kunit *test)
+{
+	int ret, i;
+	unsigned int sel;
+	bool found;
+
+	for (i = 0; i < RANGE1_NUM_VALS; i++) {
+		ret = linear_range_get_selector_high(&testr[0], range1_vals[i],
+						     &sel, &found);
+		KUNIT_EXPECT_EQ(test, 0, ret);
+		KUNIT_EXPECT_EQ(test, sel, range1_sels[i]);
+		KUNIT_EXPECT_TRUE(test, found);
+	}
+
+	ret = linear_range_get_selector_high(&testr[0], RANGE1_MAX_VAL + 1,
+					     &sel, &found);
+	KUNIT_EXPECT_LE(test, ret, 0);
+
+	ret = linear_range_get_selector_high(&testr[0], RANGE1_MIN - 1,
+					     &sel, &found);
+	KUNIT_EXPECT_EQ(test, 0, ret);
+	KUNIT_EXPECT_FALSE(test, found);
+	KUNIT_EXPECT_EQ(test, sel, range1_sels[0]);
+}
+
+static void range_test_get_value_amount(struct kunit *test)
+{
+	int ret;
+
+	ret = linear_range_values_in_range_array(&testr[0], 2);
+	KUNIT_EXPECT_EQ(test, (int)RANGE_NUM_VALS, ret);
+}
+
+static void range_test_get_selector_low(struct kunit *test)
+{
+	int i, ret;
+	unsigned int sel;
+	bool found;
+
+	for (i = 0; i < RANGE1_NUM_VALS; i++) {
+		ret = linear_range_get_selector_low_array(&testr[0], 2,
+							  range1_vals[i], &sel,
+							  &found);
+		KUNIT_EXPECT_EQ(test, 0, ret);
+		KUNIT_EXPECT_EQ(test, sel, range1_sels[i]);
+		KUNIT_EXPECT_TRUE(test, found);
+	}
+	for (i = 0; i < RANGE2_NUM_VALS; i++) {
+		ret = linear_range_get_selector_low_array(&testr[0], 2,
+							  range2_vals[i], &sel,
+							  &found);
+		KUNIT_EXPECT_EQ(test, 0, ret);
+		KUNIT_EXPECT_EQ(test, sel, range2_sels[i]);
+		KUNIT_EXPECT_TRUE(test, found);
+	}
+
+	/*
+	 * Seek value greater than range max => get_selector_*_low should
+	 * return Ok - but set found to false as value is not in range
+	 */
+	ret = linear_range_get_selector_low_array(&testr[0], 2,
+					range2_vals[RANGE2_NUM_VALS - 1] + 1,
+					&sel, &found);
+
+	KUNIT_EXPECT_EQ(test, 0, ret);
+	KUNIT_EXPECT_EQ(test, sel, range2_sels[RANGE2_NUM_VALS - 1]);
+	KUNIT_EXPECT_FALSE(test, found);
+}
+
+static struct kunit_case range_test_cases[] = {
+	KUNIT_CASE(range_test_get_value_amount),
+	KUNIT_CASE(range_test_get_selector_high),
+	KUNIT_CASE(range_test_get_selector_low),
+	KUNIT_CASE(range_test_get_value),
+	{},
+};
+
+static struct kunit_suite range_test_module = {
+	.name = "linear-ranges-test",
+	.test_cases = range_test_cases,
+};
+
+kunit_test_suites(&range_test_module);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 6b1622f4d7c2..7d60f24240a4 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -494,7 +494,7 @@ struct_va_format(void)
 }
 
 static void __init
-struct_rtc_time(void)
+time_and_date(void)
 {
 	/* 1543210543 */
 	const struct rtc_time tm = {
@@ -505,14 +505,21 @@ struct_rtc_time(void)
 		.tm_mon = 10,
 		.tm_year = 118,
 	};
+	/* 2019-01-04T15:32:23 */
+	time64_t t = 1546615943;
 
-	test("(%ptR?)", "%pt", &tm);
+	test("(%pt?)", "%pt", &tm);
 	test("2018-11-26T05:35:43", "%ptR", &tm);
 	test("0118-10-26T05:35:43", "%ptRr", &tm);
 	test("05:35:43|2018-11-26", "%ptRt|%ptRd", &tm, &tm);
 	test("05:35:43|0118-10-26", "%ptRtr|%ptRdr", &tm, &tm);
 	test("05:35:43|2018-11-26", "%ptRttr|%ptRdtr", &tm, &tm);
 	test("05:35:43 tr|2018-11-26 tr", "%ptRt tr|%ptRd tr", &tm, &tm);
+
+	test("2019-01-04T15:32:23", "%ptT", &t);
+	test("0119-00-04T15:32:23", "%ptTr", &t);
+	test("15:32:23|2019-01-04", "%ptTt|%ptTd", &t, &t);
+	test("15:32:23|0119-00-04", "%ptTtr|%ptTdr", &t, &t);
 }
 
 static void __init
@@ -678,7 +685,7 @@ test_pointer(void)
 	uuid();
 	dentry();
 	struct_va_format();
-	struct_rtc_time();
+	time_and_date();
 	struct_clk();
 	bitmap();
 	netdev_features();
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 8bbefcaddfe8..ddc9685702b1 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -91,12 +91,8 @@ static int random_size_align_alloc_test(void)
 		 */
 		size = ((rnd % 10) + 1) * PAGE_SIZE;
 
-		ptr = __vmalloc_node_range(size, align,
-		   VMALLOC_START, VMALLOC_END,
-		   GFP_KERNEL | __GFP_ZERO,
-		   PAGE_KERNEL,
-		   0, 0, __builtin_return_address(0));
-
+		ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0,
+				__builtin_return_address(0));
 		if (!ptr)
 			return -1;
 
@@ -118,12 +114,8 @@ static int align_shift_alloc_test(void)
 	for (i = 0; i < BITS_PER_LONG; i++) {
 		align = ((unsigned long) 1) << i;
 
-		ptr = __vmalloc_node_range(PAGE_SIZE, align,
-			VMALLOC_START, VMALLOC_END,
-			GFP_KERNEL | __GFP_ZERO,
-			PAGE_KERNEL,
-			0, 0, __builtin_return_address(0));
-
+		ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0,
+				__builtin_return_address(0));
 		if (!ptr)
 			return -1;
 
@@ -139,13 +131,9 @@ static int fix_align_alloc_test(void)
 	int i;
 
 	for (i = 0; i < test_loop_count; i++) {
-		ptr = __vmalloc_node_range(5 * PAGE_SIZE,
-			THREAD_ALIGN << 1,
-			VMALLOC_START, VMALLOC_END,
-			GFP_KERNEL | __GFP_ZERO,
-			PAGE_KERNEL,
-			0, 0, __builtin_return_address(0));
-
+		ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1,
+				GFP_KERNEL | __GFP_ZERO, 0,
+				__builtin_return_address(0));
 		if (!ptr)
 			return -1;
 
diff --git a/lib/usercopy.c b/lib/usercopy.c
index cbb4d9ec00f2..ca2a697a2061 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -58,7 +58,7 @@ int check_zeroed_user(const void __user *from, size_t size)
 	from -= align;
 	size += align;
 
-	if (!user_access_begin(from, size))
+	if (!user_read_access_begin(from, size))
 		return -EFAULT;
 
 	unsafe_get_user(val, (unsigned long __user *) from, err_fault);
@@ -79,10 +79,10 @@ int check_zeroed_user(const void __user *from, size_t size)
 		val &= aligned_byte_mask(size);
 
 done:
-	user_access_end();
+	user_read_access_end();
 	return (val == 0);
 err_fault:
-	user_access_end();
+	user_read_access_end();
 	return -EFAULT;
 }
 EXPORT_SYMBOL(check_zeroed_user);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 7c47ad52ce2f..259e55895933 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -34,6 +34,7 @@
 #include <linux/dcache.h>
 #include <linux/cred.h>
 #include <linux/rtc.h>
+#include <linux/time.h>
 #include <linux/uuid.h>
 #include <linux/of.h>
 #include <net/addrconf.h>
@@ -58,7 +59,7 @@
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
  *
- * This function is obsolete. Please use kstrtoull instead.
+ * This function has caveats. Please use kstrtoull instead.
  */
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
 {
@@ -83,7 +84,7 @@ EXPORT_SYMBOL(simple_strtoull);
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
  *
- * This function is obsolete. Please use kstrtoul instead.
+ * This function has caveats. Please use kstrtoul instead.
  */
 unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
 {
@@ -97,7 +98,7 @@ EXPORT_SYMBOL(simple_strtoul);
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
  *
- * This function is obsolete. Please use kstrtol instead.
+ * This function has caveats. Please use kstrtol instead.
  */
 long simple_strtol(const char *cp, char **endp, unsigned int base)
 {
@@ -114,7 +115,7 @@ EXPORT_SYMBOL(simple_strtol);
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
  *
- * This function is obsolete. Please use kstrtoll instead.
+ * This function has caveats. Please use kstrtoll instead.
  */
 long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 {
@@ -1827,14 +1828,39 @@ char *rtc_str(char *buf, char *end, const struct rtc_time *tm,
 }
 
 static noinline_for_stack
+char *time64_str(char *buf, char *end, const time64_t time,
+		 struct printf_spec spec, const char *fmt)
+{
+	struct rtc_time rtc_time;
+	struct tm tm;
+
+	time64_to_tm(time, 0, &tm);
+
+	rtc_time.tm_sec = tm.tm_sec;
+	rtc_time.tm_min = tm.tm_min;
+	rtc_time.tm_hour = tm.tm_hour;
+	rtc_time.tm_mday = tm.tm_mday;
+	rtc_time.tm_mon = tm.tm_mon;
+	rtc_time.tm_year = tm.tm_year;
+	rtc_time.tm_wday = tm.tm_wday;
+	rtc_time.tm_yday = tm.tm_yday;
+
+	rtc_time.tm_isdst = 0;
+
+	return rtc_str(buf, end, &rtc_time, spec, fmt);
+}
+
+static noinline_for_stack
 char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec,
 		    const char *fmt)
 {
 	switch (fmt[1]) {
 	case 'R':
 		return rtc_str(buf, end, (const struct rtc_time *)ptr, spec, fmt);
+	case 'T':
+		return time64_str(buf, end, *(const time64_t *)ptr, spec, fmt);
 	default:
-		return error_string(buf, end, "(%ptR?)", spec);
+		return error_string(buf, end, "(%pt?)", spec);
 	}
 }
 
@@ -2150,8 +2176,9 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode,
  * - 'd[234]' For a dentry name (optionally 2-4 last components)
  * - 'D[234]' Same as 'd' but for a struct file
  * - 'g' For block_device name (gendisk + partition number)
- * - 't[R][dt][r]' For time and date as represented:
+ * - 't[RT][dt][r]' For time and date as represented by:
  *      R    struct rtc_time
+ *      T    time64_t
  * - 'C' For a clock, it prints the name (Common Clock Framework) or address
  *       (legacy clock framework) of the clock
  * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address
diff --git a/mm/Kconfig b/mm/Kconfig
index c1acc34c1c35..5c0362bd8d56 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -705,9 +705,9 @@ config ZSMALLOC
 	  returned by an alloc().  This handle must be mapped in order to
 	  access the allocated space.
 
-config PGTABLE_MAPPING
+config ZSMALLOC_PGTABLE_MAPPING
 	bool "Use page table mapping to access object in zsmalloc"
-	depends on ZSMALLOC
+	depends on ZSMALLOC=y
 	help
 	  By default, zsmalloc uses a copy-based object mapping method to
 	  access allocations that span two pages. However, if a particular
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index efc5b83acd2d..d382272bcc31 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -15,7 +15,6 @@
 #include <trace/events/writeback.h>
 
 struct backing_dev_info noop_backing_dev_info = {
-	.name		= "noop",
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 EXPORT_SYMBOL_GPL(noop_backing_dev_info);
@@ -865,12 +864,11 @@ static int bdi_init(struct backing_dev_info *bdi)
 	return ret;
 }
 
-struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
+struct backing_dev_info *bdi_alloc(int node_id)
 {
 	struct backing_dev_info *bdi;
 
-	bdi = kmalloc_node(sizeof(struct backing_dev_info),
-			   gfp_mask | __GFP_ZERO, node_id);
+	bdi = kzalloc_node(sizeof(*bdi), GFP_KERNEL, node_id);
 	if (!bdi)
 		return NULL;
 
@@ -880,7 +878,7 @@ struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
 	}
 	return bdi;
 }
-EXPORT_SYMBOL(bdi_alloc_node);
+EXPORT_SYMBOL(bdi_alloc);
 
 static struct rb_node **bdi_lookup_rb_node(u64 id, struct rb_node **parentp)
 {
@@ -964,7 +962,6 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
 	trace_writeback_bdi_register(bdi);
 	return 0;
 }
-EXPORT_SYMBOL(bdi_register_va);
 
 int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
 {
@@ -978,20 +975,12 @@ int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
 }
 EXPORT_SYMBOL(bdi_register);
 
-int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
+void bdi_set_owner(struct backing_dev_info *bdi, struct device *owner)
 {
-	int rc;
-
-	rc = bdi_register(bdi, "%u:%u", MAJOR(owner->devt), MINOR(owner->devt));
-	if (rc)
-		return rc;
-	/* Leaking owner reference... */
-	WARN_ON(bdi->owner);
+	WARN_ON_ONCE(bdi->owner);
 	bdi->owner = owner;
 	get_device(owner);
-	return 0;
 }
-EXPORT_SYMBOL(bdi_register_owner);
 
 /*
  * Remove bdi from bdi_list, and ensure that it is no longer visible
diff --git a/mm/compaction.c b/mm/compaction.c
index d8cfb7b99a83..99b73e31afd7 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2243,15 +2243,11 @@ check_drain:
 		 * would succeed.
 		 */
 		if (cc->order > 0 && last_migrated_pfn) {
-			int cpu;
 			unsigned long current_block_start =
 				block_start_pfn(cc->migrate_pfn, cc->order);
 
 			if (last_migrated_pfn < current_block_start) {
-				cpu = get_cpu();
-				lru_add_drain_cpu(cpu);
-				drain_local_pages(cc->zone);
-				put_cpu();
+				lru_add_drain_cpu_zone(cc->zone);
 				/* No more flushing until we migrate again */
 				last_migrated_pfn = 0;
 			}
diff --git a/mm/debug.c b/mm/debug.c
index 2189357f0987..f2ede2df585a 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -110,13 +110,57 @@ void __dump_page(struct page *page, const char *reason)
 	else if (PageAnon(page))
 		type = "anon ";
 	else if (mapping) {
-		if (mapping->host && mapping->host->i_dentry.first) {
-			struct dentry *dentry;
-			dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias);
-			pr_warn("%ps name:\"%pd\"\n", mapping->a_ops, dentry);
-		} else
-			pr_warn("%ps\n", mapping->a_ops);
+		const struct inode *host;
+		const struct address_space_operations *a_ops;
+		const struct hlist_node *dentry_first;
+		const struct dentry *dentry_ptr;
+		struct dentry dentry;
+
+		/*
+		 * mapping can be invalid pointer and we don't want to crash
+		 * accessing it, so probe everything depending on it carefully
+		 */
+		if (probe_kernel_read_strict(&host, &mapping->host,
+						sizeof(struct inode *)) ||
+		    probe_kernel_read_strict(&a_ops, &mapping->a_ops,
+				sizeof(struct address_space_operations *))) {
+			pr_warn("failed to read mapping->host or a_ops, mapping not a valid kernel address?\n");
+			goto out_mapping;
+		}
+
+		if (!host) {
+			pr_warn("mapping->a_ops:%ps\n", a_ops);
+			goto out_mapping;
+		}
+
+		if (probe_kernel_read_strict(&dentry_first,
+			&host->i_dentry.first, sizeof(struct hlist_node *))) {
+			pr_warn("mapping->a_ops:%ps with invalid mapping->host inode address %px\n",
+				a_ops, host);
+			goto out_mapping;
+		}
+
+		if (!dentry_first) {
+			pr_warn("mapping->a_ops:%ps\n", a_ops);
+			goto out_mapping;
+		}
+
+		dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
+		if (probe_kernel_read_strict(&dentry, dentry_ptr,
+							sizeof(struct dentry))) {
+			pr_warn("mapping->aops:%ps with invalid mapping->host->i_dentry.first %px\n",
+				a_ops, dentry_ptr);
+		} else {
+			/*
+			 * if dentry is corrupted, the %pd handler may still
+			 * crash, but it's unlikely that we reach here with a
+			 * corrupted struct page
+			 */
+			pr_warn("mapping->aops:%ps dentry name:\"%pd\"\n",
+								a_ops, &dentry);
+		}
 	}
+out_mapping:
 	BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
 
 	pr_warn("%sflags: %#lx(%pGp)%s\n", type, page->flags, &page->flags,
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 4f17c83db575..0e66f2aaeea3 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -22,6 +22,8 @@
 
 #include <asm/unistd.h>
 
+#include "internal.h"
+
 /*
  * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
  * deactivate the pages and clear PG_Referenced.
@@ -102,10 +104,6 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 		if (!nrpages)
 			nrpages = ~0UL;
 
-		/*
-		 * Ignore return value because fadvise() shall return
-		 * success even if filesystem can't retrieve a hint,
-		 */
 		force_page_cache_readahead(mapping, file, start_index, nrpages);
 		break;
 	case POSIX_FADV_NOREUSE:
diff --git a/mm/filemap.c b/mm/filemap.c
index 23a051a7ef0f..3430280df607 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1991,7 +1991,7 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
  * * total number of bytes copied, including those the were already @written
  * * negative error code if nothing was copied
  */
-static ssize_t generic_file_buffered_read(struct kiocb *iocb,
+ssize_t generic_file_buffered_read(struct kiocb *iocb,
 		struct iov_iter *iter, ssize_t written)
 {
 	struct file *filp = iocb->ki_filp;
@@ -2243,6 +2243,7 @@ out:
 	file_accessed(filp);
 	return written ? written : error;
 }
+EXPORT_SYMBOL_GPL(generic_file_buffered_read);
 
 /**
  * generic_file_read_iter - generic filesystem read routine
@@ -2566,7 +2567,6 @@ page_not_uptodate:
 	if (!error || error == AOP_TRUNCATED_PAGE)
 		goto retry_find;
 
-	/* Things didn't work out. Return zero to tell the mm layer so. */
 	shrink_readahead_size_eio(ra);
 	return VM_FAULT_SIGBUS;
 
diff --git a/mm/gup.c b/mm/gup.c
index 87a6a59fe667..3edf740a3897 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -382,13 +382,22 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
 }
 
 /*
- * FOLL_FORCE can write to even unwritable pte's, but only
- * after we've gone through a COW cycle and they are dirty.
+ * FOLL_FORCE or a forced COW break can write even to unwritable pte's,
+ * but only after we've gone through a COW cycle and they are dirty.
  */
 static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
 {
-	return pte_write(pte) ||
-		((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+	return pte_write(pte) || ((flags & FOLL_COW) && pte_dirty(pte));
+}
+
+/*
+ * A (separate) COW fault might break the page the other way and
+ * get_user_pages() would return the page from what is now the wrong
+ * VM. So we need to force a COW break at GUP time even for reads.
+ */
+static inline bool should_force_cow_break(struct vm_area_struct *vma, unsigned int flags)
+{
+	return is_cow_mapping(vma->vm_flags) && (flags & (FOLL_GET | FOLL_PIN));
 }
 
 static struct page *follow_page_pte(struct vm_area_struct *vma,
@@ -1066,9 +1075,11 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				goto out;
 			}
 			if (is_vm_hugetlb_page(vma)) {
+				if (should_force_cow_break(vma, foll_flags))
+					foll_flags |= FOLL_WRITE;
 				i = follow_hugetlb_page(mm, vma, pages, vmas,
 						&start, &nr_pages, i,
-						gup_flags, locked);
+						foll_flags, locked);
 				if (locked && *locked == 0) {
 					/*
 					 * We've got a VM_FAULT_RETRY
@@ -1082,6 +1093,10 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				continue;
 			}
 		}
+
+		if (should_force_cow_break(vma, foll_flags))
+			foll_flags |= FOLL_WRITE;
+
 retry:
 		/*
 		 * If we have a pending SIGKILL, don't keep faulting pages and
@@ -1168,7 +1183,7 @@ static bool vma_permits_fault(struct vm_area_struct *vma,
 	return true;
 }
 
-/*
+/**
  * fixup_user_fault() - manually resolve a user page fault
  * @tsk:	the task_struct to use for page fault accounting, or
  *		NULL if faults are not to be recorded.
@@ -1176,7 +1191,8 @@ static bool vma_permits_fault(struct vm_area_struct *vma,
  * @address:	user address
  * @fault_flags:flags to pass down to handle_mm_fault()
  * @unlocked:	did we unlock the mmap_sem while retrying, maybe NULL if caller
- *		does not allow retry
+ *		does not allow retry. If NULL, the caller must guarantee
+ *		that fault_flags does not contain FAULT_FLAG_ALLOW_RETRY.
  *
  * This is meant to be called in the specific scenario where for locking reasons
  * we try to access user memory in atomic context (within a pagefault_disable()
@@ -1839,7 +1855,7 @@ static long __get_user_pages_remote(struct task_struct *tsk,
 				       gup_flags | FOLL_TOUCH | FOLL_REMOTE);
 }
 
-/*
+/**
  * get_user_pages_remote() - pin user pages in memory
  * @tsk:	the task_struct to use for page fault accounting, or
  *		NULL if faults are not to be recorded.
@@ -1870,13 +1886,13 @@ static long __get_user_pages_remote(struct task_struct *tsk,
  *
  * Must be called with mmap_sem held for read or write.
  *
- * get_user_pages walks a process's page tables and takes a reference to
- * each struct page that each user address corresponds to at a given
+ * get_user_pages_remote walks a process's page tables and takes a reference
+ * to each struct page that each user address corresponds to at a given
  * instant. That is, it takes the page that would be accessed if a user
  * thread accesses the given user virtual address at that instant.
  *
  * This does not guarantee that the page exists in the user mappings when
- * get_user_pages returns, and there may even be a completely different
+ * get_user_pages_remote returns, and there may even be a completely different
  * page there in some cases (eg. if mmapped pagecache has been invalidated
  * and subsequently re faulted). However it does guarantee that the page
  * won't be freed completely. And mostly callers simply care that the page
@@ -1888,17 +1904,17 @@ static long __get_user_pages_remote(struct task_struct *tsk,
  * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must
  * be called after the page is finished with, and before put_page is called.
  *
- * get_user_pages is typically used for fewer-copy IO operations, to get a
- * handle on the memory by some means other than accesses via the user virtual
- * addresses. The pages may be submitted for DMA to devices or accessed via
- * their kernel linear mapping (via the kmap APIs). Care should be taken to
- * use the correct cache flushing APIs.
+ * get_user_pages_remote is typically used for fewer-copy IO operations,
+ * to get a handle on the memory by some means other than accesses
+ * via the user virtual addresses. The pages may be submitted for
+ * DMA to devices or accessed via their kernel linear mapping (via the
+ * kmap APIs). Care should be taken to use the correct cache flushing APIs.
  *
  * See also get_user_pages_fast, for performance critical applications.
  *
- * get_user_pages should be phased out in favor of
+ * get_user_pages_remote should be phased out in favor of
  * get_user_pages_locked|unlocked or get_user_pages_fast. Nothing
- * should use get_user_pages because it cannot pass
+ * should use get_user_pages_remote because it cannot pass
  * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
  */
 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
@@ -1937,7 +1953,17 @@ static long __get_user_pages_remote(struct task_struct *tsk,
 }
 #endif /* !CONFIG_MMU */
 
-/*
+/**
+ * get_user_pages() - pin user pages in memory
+ * @start:      starting user address
+ * @nr_pages:   number of pages from start to pin
+ * @gup_flags:  flags modifying lookup behaviour
+ * @pages:      array that receives pointers to the pages pinned.
+ *              Should be at least nr_pages long. Or NULL, if caller
+ *              only intends to ensure the pages are faulted in.
+ * @vmas:       array of pointers to vmas corresponding to each page.
+ *              Or NULL if the caller does not require them.
+ *
  * This is the same as get_user_pages_remote(), just with a
  * less-flexible calling convention where we assume that the task
  * and mm being operated on are the current task's and don't allow
@@ -1960,11 +1986,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
 }
 EXPORT_SYMBOL(get_user_pages);
 
-/*
- * We can leverage the VM_FAULT_RETRY functionality in the page fault
- * paths better by using either get_user_pages_locked() or
- * get_user_pages_unlocked().
- *
+/**
  * get_user_pages_locked() is suitable to replace the form:
  *
  *      down_read(&mm->mmap_sem);
@@ -1980,6 +2002,21 @@ EXPORT_SYMBOL(get_user_pages);
  *      get_user_pages_locked(tsk, mm, ..., pages, &locked);
  *      if (locked)
  *          up_read(&mm->mmap_sem);
+ *
+ * @start:      starting user address
+ * @nr_pages:   number of pages from start to pin
+ * @gup_flags:  flags modifying lookup behaviour
+ * @pages:      array that receives pointers to the pages pinned.
+ *              Should be at least nr_pages long. Or NULL, if caller
+ *              only intends to ensure the pages are faulted in.
+ * @locked:     pointer to lock flag indicating whether lock is held and
+ *              subsequently whether VM_FAULT_RETRY functionality can be
+ *              utilised. Lock must initially be held.
+ *
+ * We can leverage the VM_FAULT_RETRY functionality in the page fault
+ * paths better by using either get_user_pages_locked() or
+ * get_user_pages_unlocked().
+ *
  */
 long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
 			   unsigned int gup_flags, struct page **pages,
@@ -2674,6 +2711,10 @@ static bool gup_fast_permitted(unsigned long start, unsigned long end)
  *
  * If the architecture does not support this function, simply return with no
  * pages pinned.
+ *
+ * Careful, careful! COW breaking can go either way, so a non-write
+ * access can get ambiguous page results. If you call this function without
+ * 'write' set, you'd better be sure that you're ok with that ambiguity.
  */
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages)
@@ -2709,6 +2750,12 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	 *
 	 * We do not adopt an rcu_read_lock(.) here as we also want to
 	 * block IPIs that come from THPs splitting.
+	 *
+	 * NOTE! We allow read-only gup_fast() here, but you'd better be
+	 * careful about possible COW pages. You'll get _a_ COW page, but
+	 * not necessarily the one you intended to get depending on what
+	 * COW event happens after this. COW may break the page copy in a
+	 * random direction.
 	 */
 
 	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
@@ -2766,10 +2813,17 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
 	if (unlikely(!access_ok((void __user *)start, len)))
 		return -EFAULT;
 
+	/*
+	 * The FAST_GUP case requires FOLL_WRITE even for pure reads,
+	 * because get_user_pages() may need to cause an early COW in
+	 * order to avoid confusing the normal COW routines. So only
+	 * targets that are already writable are safe to do by just
+	 * looking at the page tables.
+	 */
 	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
 	    gup_fast_permitted(start, end)) {
 		local_irq_disable();
-		gup_pgd_range(addr, end, gup_flags, pages, &nr_pinned);
+		gup_pgd_range(addr, end, gup_flags | FOLL_WRITE, pages, &nr_pinned);
 		local_irq_enable();
 		ret = nr_pinned;
 	}
@@ -2845,9 +2899,9 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
  * the arguments here are identical.
  *
  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
- * see Documentation/vm/pin_user_pages.rst for further details.
+ * see Documentation/core-api/pin_user_pages.rst for further details.
  *
- * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It
+ * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
  * is NOT intended for Case 2 (RDMA: long-term pins).
  */
 int pin_user_pages_fast(unsigned long start, int nr_pages,
@@ -2885,9 +2939,9 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast);
  * the arguments here are identical.
  *
  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
- * see Documentation/vm/pin_user_pages.rst for details.
+ * see Documentation/core-api/pin_user_pages.rst for details.
  *
- * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It
+ * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
  * is NOT intended for Case 2 (RDMA: long-term pins).
  */
 long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
@@ -2921,9 +2975,9 @@ EXPORT_SYMBOL(pin_user_pages_remote);
  * FOLL_PIN is set.
  *
  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
- * see Documentation/vm/pin_user_pages.rst for details.
+ * see Documentation/core-api/pin_user_pages.rst for details.
  *
- * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It
+ * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
  * is NOT intended for Case 2 (RDMA: long-term pins).
  */
 long pin_user_pages(unsigned long start, unsigned long nr_pages,
@@ -2939,3 +2993,20 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
 				     pages, vmas, gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages);
+
+/*
+ * pin_user_pages_unlocked() is the FOLL_PIN variant of
+ * get_user_pages_unlocked(). Behavior is the same, except that this one sets
+ * FOLL_PIN and rejects FOLL_GET.
+ */
+long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
+			     struct page **pages, unsigned int gup_flags)
+{
+	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
+	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
+		return -EINVAL;
+
+	gup_flags |= FOLL_PIN;
+	return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
+}
+EXPORT_SYMBOL(pin_user_pages_unlocked);
diff --git a/mm/hmm.c b/mm/hmm.c
index 280585833adf..41673a6d8d46 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -37,28 +37,13 @@ enum {
 	HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT,
 };
 
-/*
- * hmm_device_entry_from_pfn() - create a valid device entry value from pfn
- * @range: range use to encode HMM pfn value
- * @pfn: pfn value for which to create the device entry
- * Return: valid device entry for the pfn
- */
-static uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
-					  unsigned long pfn)
-{
-	return (pfn << range->pfn_shift) | range->flags[HMM_PFN_VALID];
-}
-
 static int hmm_pfns_fill(unsigned long addr, unsigned long end,
-		struct hmm_range *range, enum hmm_pfn_value_e value)
+			 struct hmm_range *range, unsigned long cpu_flags)
 {
-	uint64_t *pfns = range->pfns;
-	unsigned long i;
+	unsigned long i = (addr - range->start) >> PAGE_SHIFT;
 
-	i = (addr - range->start) >> PAGE_SHIFT;
 	for (; addr < end; addr += PAGE_SIZE, i++)
-		pfns[i] = range->values[value];
-
+		range->hmm_pfns[i] = cpu_flags;
 	return 0;
 }
 
@@ -96,7 +81,8 @@ static int hmm_vma_fault(unsigned long addr, unsigned long end,
 }
 
 static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
-				       uint64_t pfns, uint64_t cpu_flags)
+				       unsigned long pfn_req_flags,
+				       unsigned long cpu_flags)
 {
 	struct hmm_range *range = hmm_vma_walk->range;
 
@@ -110,27 +96,28 @@ static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
 	 * waste to have the user pre-fill the pfn arrays with a default
 	 * flags value.
 	 */
-	pfns = (pfns & range->pfn_flags_mask) | range->default_flags;
+	pfn_req_flags &= range->pfn_flags_mask;
+	pfn_req_flags |= range->default_flags;
 
 	/* We aren't ask to do anything ... */
-	if (!(pfns & range->flags[HMM_PFN_VALID]))
+	if (!(pfn_req_flags & HMM_PFN_REQ_FAULT))
 		return 0;
 
 	/* Need to write fault ? */
-	if ((pfns & range->flags[HMM_PFN_WRITE]) &&
-	    !(cpu_flags & range->flags[HMM_PFN_WRITE]))
+	if ((pfn_req_flags & HMM_PFN_REQ_WRITE) &&
+	    !(cpu_flags & HMM_PFN_WRITE))
 		return HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT;
 
 	/* If CPU page table is not valid then we need to fault */
-	if (!(cpu_flags & range->flags[HMM_PFN_VALID]))
+	if (!(cpu_flags & HMM_PFN_VALID))
 		return HMM_NEED_FAULT;
 	return 0;
 }
 
 static unsigned int
 hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
-		     const uint64_t *pfns, unsigned long npages,
-		     uint64_t cpu_flags)
+		     const unsigned long hmm_pfns[], unsigned long npages,
+		     unsigned long cpu_flags)
 {
 	struct hmm_range *range = hmm_vma_walk->range;
 	unsigned int required_fault = 0;
@@ -142,12 +129,12 @@ hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
 	 * hmm_pte_need_fault() will always return 0.
 	 */
 	if (!((range->default_flags | range->pfn_flags_mask) &
-	      range->flags[HMM_PFN_VALID]))
+	      HMM_PFN_REQ_FAULT))
 		return 0;
 
 	for (i = 0; i < npages; ++i) {
-		required_fault |=
-			hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags);
+		required_fault |= hmm_pte_need_fault(hmm_vma_walk, hmm_pfns[i],
+						     cpu_flags);
 		if (required_fault == HMM_NEED_ALL_BITS)
 			return required_fault;
 	}
@@ -161,12 +148,13 @@ static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
 	struct hmm_range *range = hmm_vma_walk->range;
 	unsigned int required_fault;
 	unsigned long i, npages;
-	uint64_t *pfns;
+	unsigned long *hmm_pfns;
 
 	i = (addr - range->start) >> PAGE_SHIFT;
 	npages = (end - addr) >> PAGE_SHIFT;
-	pfns = &range->pfns[i];
-	required_fault = hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0);
+	hmm_pfns = &range->hmm_pfns[i];
+	required_fault =
+		hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0);
 	if (!walk->vma) {
 		if (required_fault)
 			return -EFAULT;
@@ -174,46 +162,44 @@ static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
 	}
 	if (required_fault)
 		return hmm_vma_fault(addr, end, required_fault, walk);
-	hmm_vma_walk->last = addr;
-	return hmm_pfns_fill(addr, end, range, HMM_PFN_NONE);
+	return hmm_pfns_fill(addr, end, range, 0);
 }
 
-static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
+static inline unsigned long pmd_to_hmm_pfn_flags(struct hmm_range *range,
+						 pmd_t pmd)
 {
 	if (pmd_protnone(pmd))
 		return 0;
-	return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] |
-				range->flags[HMM_PFN_WRITE] :
-				range->flags[HMM_PFN_VALID];
+	return pmd_write(pmd) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
-		unsigned long end, uint64_t *pfns, pmd_t pmd)
+			      unsigned long end, unsigned long hmm_pfns[],
+			      pmd_t pmd)
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
 	unsigned long pfn, npages, i;
 	unsigned int required_fault;
-	uint64_t cpu_flags;
+	unsigned long cpu_flags;
 
 	npages = (end - addr) >> PAGE_SHIFT;
 	cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
 	required_fault =
-		hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags);
+		hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, cpu_flags);
 	if (required_fault)
 		return hmm_vma_fault(addr, end, required_fault, walk);
 
 	pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
 	for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
-		pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags;
-	hmm_vma_walk->last = end;
+		hmm_pfns[i] = pfn | cpu_flags;
 	return 0;
 }
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 /* stub to allow the code below to compile */
 int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
-		unsigned long end, uint64_t *pfns, pmd_t pmd);
+		unsigned long end, unsigned long hmm_pfns[], pmd_t pmd);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline bool hmm_is_device_private_entry(struct hmm_range *range,
@@ -224,31 +210,31 @@ static inline bool hmm_is_device_private_entry(struct hmm_range *range,
 		range->dev_private_owner;
 }
 
-static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
+static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
+						 pte_t pte)
 {
 	if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte))
 		return 0;
-	return pte_write(pte) ? range->flags[HMM_PFN_VALID] |
-				range->flags[HMM_PFN_WRITE] :
-				range->flags[HMM_PFN_VALID];
+	return pte_write(pte) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
 }
 
 static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 			      unsigned long end, pmd_t *pmdp, pte_t *ptep,
-			      uint64_t *pfn)
+			      unsigned long *hmm_pfn)
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
 	unsigned int required_fault;
-	uint64_t cpu_flags;
+	unsigned long cpu_flags;
 	pte_t pte = *ptep;
-	uint64_t orig_pfn = *pfn;
+	uint64_t pfn_req_flags = *hmm_pfn;
 
 	if (pte_none(pte)) {
-		required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0);
+		required_fault =
+			hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
 		if (required_fault)
 			goto fault;
-		*pfn = range->values[HMM_PFN_NONE];
+		*hmm_pfn = 0;
 		return 0;
 	}
 
@@ -260,17 +246,18 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 		 * the PFN even if not present.
 		 */
 		if (hmm_is_device_private_entry(range, entry)) {
-			*pfn = hmm_device_entry_from_pfn(range,
-				device_private_entry_to_pfn(entry));
-			*pfn |= range->flags[HMM_PFN_VALID];
+			cpu_flags = HMM_PFN_VALID;
 			if (is_write_device_private_entry(entry))
-				*pfn |= range->flags[HMM_PFN_WRITE];
+				cpu_flags |= HMM_PFN_WRITE;
+			*hmm_pfn = device_private_entry_to_pfn(entry) |
+					cpu_flags;
 			return 0;
 		}
 
-		required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0);
+		required_fault =
+			hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
 		if (!required_fault) {
-			*pfn = range->values[HMM_PFN_NONE];
+			*hmm_pfn = 0;
 			return 0;
 		}
 
@@ -290,7 +277,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 	}
 
 	cpu_flags = pte_to_hmm_pfn_flags(range, pte);
-	required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags);
+	required_fault =
+		hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags);
 	if (required_fault)
 		goto fault;
 
@@ -299,15 +287,15 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 	 * fall through and treat it like a normal page.
 	 */
 	if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) {
-		if (hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0)) {
+		if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
 			pte_unmap(ptep);
 			return -EFAULT;
 		}
-		*pfn = range->values[HMM_PFN_SPECIAL];
+		*hmm_pfn = HMM_PFN_ERROR;
 		return 0;
 	}
 
-	*pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags;
+	*hmm_pfn = pte_pfn(pte) | cpu_flags;
 	return 0;
 
 fault:
@@ -323,7 +311,8 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
-	uint64_t *pfns = &range->pfns[(start - range->start) >> PAGE_SHIFT];
+	unsigned long *hmm_pfns =
+		&range->hmm_pfns[(start - range->start) >> PAGE_SHIFT];
 	unsigned long npages = (end - start) >> PAGE_SHIFT;
 	unsigned long addr = start;
 	pte_t *ptep;
@@ -335,16 +324,16 @@ again:
 		return hmm_vma_walk_hole(start, end, -1, walk);
 
 	if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
-		if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0)) {
+		if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0)) {
 			hmm_vma_walk->last = addr;
 			pmd_migration_entry_wait(walk->mm, pmdp);
 			return -EBUSY;
 		}
-		return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
+		return hmm_pfns_fill(start, end, range, 0);
 	}
 
 	if (!pmd_present(pmd)) {
-		if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0))
+		if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0))
 			return -EFAULT;
 		return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
 	}
@@ -364,7 +353,7 @@ again:
 		if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
 			goto again;
 
-		return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd);
+		return hmm_vma_handle_pmd(walk, addr, end, hmm_pfns, pmd);
 	}
 
 	/*
@@ -374,37 +363,33 @@ again:
 	 * recover.
 	 */
 	if (pmd_bad(pmd)) {
-		if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0))
+		if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0))
 			return -EFAULT;
 		return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
 	}
 
 	ptep = pte_offset_map(pmdp, addr);
-	for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) {
+	for (; addr < end; addr += PAGE_SIZE, ptep++, hmm_pfns++) {
 		int r;
 
-		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns);
+		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, hmm_pfns);
 		if (r) {
 			/* hmm_vma_handle_pte() did pte_unmap() */
-			hmm_vma_walk->last = addr;
 			return r;
 		}
 	}
 	pte_unmap(ptep - 1);
-
-	hmm_vma_walk->last = addr;
 	return 0;
 }
 
 #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \
     defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
-static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud)
+static inline unsigned long pud_to_hmm_pfn_flags(struct hmm_range *range,
+						 pud_t pud)
 {
 	if (!pud_present(pud))
 		return 0;
-	return pud_write(pud) ? range->flags[HMM_PFN_VALID] |
-				range->flags[HMM_PFN_WRITE] :
-				range->flags[HMM_PFN_VALID];
+	return pud_write(pud) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
 }
 
 static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
@@ -432,7 +417,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
 	if (pud_huge(pud) && pud_devmap(pud)) {
 		unsigned long i, npages, pfn;
 		unsigned int required_fault;
-		uint64_t *pfns, cpu_flags;
+		unsigned long *hmm_pfns;
+		unsigned long cpu_flags;
 
 		if (!pud_present(pud)) {
 			spin_unlock(ptl);
@@ -441,10 +427,10 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
 
 		i = (addr - range->start) >> PAGE_SHIFT;
 		npages = (end - addr) >> PAGE_SHIFT;
-		pfns = &range->pfns[i];
+		hmm_pfns = &range->hmm_pfns[i];
 
 		cpu_flags = pud_to_hmm_pfn_flags(range, pud);
-		required_fault = hmm_range_need_fault(hmm_vma_walk, pfns,
+		required_fault = hmm_range_need_fault(hmm_vma_walk, hmm_pfns,
 						      npages, cpu_flags);
 		if (required_fault) {
 			spin_unlock(ptl);
@@ -453,9 +439,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
 
 		pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
 		for (i = 0; i < npages; ++i, ++pfn)
-			pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
-				  cpu_flags;
-		hmm_vma_walk->last = end;
+			hmm_pfns[i] = pfn | cpu_flags;
 		goto out_unlock;
 	}
 
@@ -479,8 +463,9 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
 	struct vm_area_struct *vma = walk->vma;
-	uint64_t orig_pfn, cpu_flags;
 	unsigned int required_fault;
+	unsigned long pfn_req_flags;
+	unsigned long cpu_flags;
 	spinlock_t *ptl;
 	pte_t entry;
 
@@ -488,9 +473,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
 	entry = huge_ptep_get(pte);
 
 	i = (start - range->start) >> PAGE_SHIFT;
-	orig_pfn = range->pfns[i];
+	pfn_req_flags = range->hmm_pfns[i];
 	cpu_flags = pte_to_hmm_pfn_flags(range, entry);
-	required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags);
+	required_fault =
+		hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags);
 	if (required_fault) {
 		spin_unlock(ptl);
 		return hmm_vma_fault(addr, end, required_fault, walk);
@@ -498,9 +484,8 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
 
 	pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
 	for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
-		range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
-				 cpu_flags;
-	hmm_vma_walk->last = end;
+		range->hmm_pfns[i] = pfn | cpu_flags;
+
 	spin_unlock(ptl);
 	return 0;
 }
@@ -531,13 +516,12 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end,
 	 * failure.
 	 */
 	if (hmm_range_need_fault(hmm_vma_walk,
-				 range->pfns +
+				 range->hmm_pfns +
 					 ((start - range->start) >> PAGE_SHIFT),
 				 (end - start) >> PAGE_SHIFT, 0))
 		return -EFAULT;
 
 	hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
-	hmm_vma_walk->last = end;
 
 	/* Skip this vma and continue processing the next vma. */
 	return 1;
@@ -555,9 +539,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
  * hmm_range_fault - try to fault some address in a virtual address range
  * @range:	argument structure
  *
- * Return: the number of valid pages in range->pfns[] (from range start
- * address), which may be zero.  On error one of the following status codes
- * can be returned:
+ * Returns 0 on success or one of the following error codes:
  *
  * -EINVAL:	Invalid arguments or mm or virtual address is in an invalid vma
  *		(e.g., device file vma).
@@ -572,7 +554,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
  * This is similar to get_user_pages(), except that it can read the page tables
  * without mutating them (ie causing faults).
  */
-long hmm_range_fault(struct hmm_range *range)
+int hmm_range_fault(struct hmm_range *range)
 {
 	struct hmm_vma_walk hmm_vma_walk = {
 		.range = range,
@@ -590,10 +572,13 @@ long hmm_range_fault(struct hmm_range *range)
 			return -EBUSY;
 		ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
 				      &hmm_walk_ops, &hmm_vma_walk);
+		/*
+		 * When -EBUSY is returned the loop restarts with
+		 * hmm_vma_walk.last set to an address that has not been stored
+		 * in pfns. All entries < last in the pfn array are set to their
+		 * output, and all >= are still at their input values.
+		 */
 	} while (ret == -EBUSY);
-
-	if (ret)
-		return ret;
-	return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
+	return ret;
 }
 EXPORT_SYMBOL(hmm_range_fault);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 6ecd1045113b..11fe0b4dbe67 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1515,13 +1515,12 @@ out_unlock:
 }
 
 /*
- * FOLL_FORCE can write to even unwritable pmd's, but only
- * after we've gone through a COW cycle and they are dirty.
+ * FOLL_FORCE or a forced COW break can write even to unwritable pmd's,
+ * but only after we've gone through a COW cycle and they are dirty.
  */
 static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
 {
-	return pmd_write(pmd) ||
-	       ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
+	return pmd_write(pmd) || ((flags & FOLL_COW) && pmd_dirty(pmd));
 }
 
 struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
diff --git a/mm/internal.h b/mm/internal.h
index b5634e78f01d..f762a34b0c57 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -49,18 +49,20 @@ void unmap_page_range(struct mmu_gather *tlb,
 			     unsigned long addr, unsigned long end,
 			     struct zap_details *details);
 
-extern unsigned int __do_page_cache_readahead(struct address_space *mapping,
-		struct file *filp, pgoff_t offset, unsigned long nr_to_read,
+void force_page_cache_readahead(struct address_space *, struct file *,
+		pgoff_t index, unsigned long nr_to_read);
+void __do_page_cache_readahead(struct address_space *, struct file *,
+		pgoff_t index, unsigned long nr_to_read,
 		unsigned long lookahead_size);
 
 /*
  * Submit IO for the read-ahead request in file_ra_state.
  */
-static inline unsigned long ra_submit(struct file_ra_state *ra,
+static inline void ra_submit(struct file_ra_state *ra,
 		struct address_space *mapping, struct file *filp)
 {
-	return __do_page_cache_readahead(mapping, filp,
-					ra->start, ra->size, ra->async_size);
+	__do_page_cache_readahead(mapping, filp,
+			ra->start, ra->size, ra->async_size);
 }
 
 /**
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index de3121848ddf..d532c2587731 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -15,14 +15,19 @@ CFLAGS_REMOVE_tags_report.o = $(CC_FLAGS_FTRACE)
 
 # Function splitter causes unnecessary splits in __asan_load1/__asan_store1
 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
-CFLAGS_common.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
-CFLAGS_generic.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
-CFLAGS_generic_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
-CFLAGS_init.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
-CFLAGS_quarantine.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
-CFLAGS_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
-CFLAGS_tags.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
-CFLAGS_tags_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) -DDISABLE_BRANCH_PROFILING
+CC_FLAGS_KASAN_RUNTIME := $(call cc-option, -fno-conserve-stack)
+CC_FLAGS_KASAN_RUNTIME += $(call cc-option, -fno-stack-protector)
+# Disable branch tracing to avoid recursion.
+CC_FLAGS_KASAN_RUNTIME += -DDISABLE_BRANCH_PROFILING
+
+CFLAGS_common.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_generic.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_generic_report.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_init.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_quarantine.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_report.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_tags_report.o := $(CC_FLAGS_KASAN_RUNTIME)
 
 obj-$(CONFIG_KASAN) := common.o init.o report.o
 obj-$(CONFIG_KASAN_GENERIC) += generic.o generic_report.o quarantine.o
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 2906358e42f0..757d4074fe28 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -33,7 +33,6 @@
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 #include <linux/bug.h>
-#include <linux/uaccess.h>
 
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
@@ -613,24 +612,6 @@ void kasan_free_shadow(const struct vm_struct *vm)
 }
 #endif
 
-extern void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip);
-extern bool report_enabled(void);
-
-bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip)
-{
-	unsigned long flags = user_access_save();
-	bool ret = false;
-
-	if (likely(report_enabled())) {
-		__kasan_report(addr, size, is_write, ip);
-		ret = true;
-	}
-
-	user_access_restore(flags);
-
-	return ret;
-}
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 static bool shadow_mapped(unsigned long addr)
 {
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 80f23c9da6b0..51ec45407a0b 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -29,6 +29,7 @@
 #include <linux/kasan.h>
 #include <linux/module.h>
 #include <linux/sched/task_stack.h>
+#include <linux/uaccess.h>
 
 #include <asm/sections.h>
 
@@ -454,7 +455,7 @@ static void print_shadow_for_address(const void *addr)
 	}
 }
 
-bool report_enabled(void)
+static bool report_enabled(void)
 {
 	if (current->kasan_depth)
 		return false;
@@ -479,7 +480,8 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
 	end_report(&flags);
 }
 
-void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip)
+static void __kasan_report(unsigned long addr, size_t size, bool is_write,
+				unsigned long ip)
 {
 	struct kasan_access_info info;
 	void *tagged_addr;
@@ -518,6 +520,22 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon
 	end_report(&flags);
 }
 
+bool kasan_report(unsigned long addr, size_t size, bool is_write,
+			unsigned long ip)
+{
+	unsigned long flags = user_access_save();
+	bool ret = false;
+
+	if (likely(report_enabled())) {
+		__kasan_report(addr, size, is_write, ip);
+		ret = true;
+	}
+
+	user_access_restore(flags);
+
+	return ret;
+}
+
 #ifdef CONFIG_KASAN_INLINE
 /*
  * With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a3b97f103966..f973a025569b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1314,7 +1314,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 	if (do_memsw_account()) {
 		count = page_counter_read(&memcg->memsw);
 		limit = READ_ONCE(memcg->memsw.max);
-		if (count <= limit)
+		if (count < limit)
 			margin = min(margin, limit - count);
 		else
 			margin = 0;
@@ -1451,6 +1451,8 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
 		       memcg_page_state(memcg, WORKINGSET_REFAULT));
 	seq_buf_printf(&s, "workingset_activate %lu\n",
 		       memcg_page_state(memcg, WORKINGSET_ACTIVATE));
+	seq_buf_printf(&s, "workingset_restore %lu\n",
+		       memcg_page_state(memcg, WORKINGSET_RESTORE));
 	seq_buf_printf(&s, "workingset_nodereclaim %lu\n",
 		       memcg_page_state(memcg, WORKINGSET_NODERECLAIM));
 
@@ -2250,7 +2252,8 @@ static void reclaim_high(struct mem_cgroup *memcg,
 			 gfp_t gfp_mask)
 {
 	do {
-		if (page_counter_read(&memcg->memory) <= READ_ONCE(memcg->high))
+		if (page_counter_read(&memcg->memory) <=
+		    READ_ONCE(memcg->memory.high))
 			continue;
 		memcg_memory_event(memcg, MEMCG_HIGH);
 		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
@@ -2319,41 +2322,64 @@ static void high_work_func(struct work_struct *work)
  #define MEMCG_DELAY_PRECISION_SHIFT 20
  #define MEMCG_DELAY_SCALING_SHIFT 14
 
-/*
- * Get the number of jiffies that we should penalise a mischievous cgroup which
- * is exceeding its memory.high by checking both it and its ancestors.
- */
-static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
-					  unsigned int nr_pages)
+static u64 calculate_overage(unsigned long usage, unsigned long high)
 {
-	unsigned long penalty_jiffies;
-	u64 max_overage = 0;
+	u64 overage;
 
-	do {
-		unsigned long usage, high;
-		u64 overage;
+	if (usage <= high)
+		return 0;
 
-		usage = page_counter_read(&memcg->memory);
-		high = READ_ONCE(memcg->high);
+	/*
+	 * Prevent division by 0 in overage calculation by acting as if
+	 * it was a threshold of 1 page
+	 */
+	high = max(high, 1UL);
 
-		if (usage <= high)
-			continue;
+	overage = usage - high;
+	overage <<= MEMCG_DELAY_PRECISION_SHIFT;
+	return div64_u64(overage, high);
+}
 
-		/*
-		 * Prevent division by 0 in overage calculation by acting as if
-		 * it was a threshold of 1 page
-		 */
-		high = max(high, 1UL);
+static u64 mem_find_max_overage(struct mem_cgroup *memcg)
+{
+	u64 overage, max_overage = 0;
+
+	do {
+		overage = calculate_overage(page_counter_read(&memcg->memory),
+					    READ_ONCE(memcg->memory.high));
+		max_overage = max(overage, max_overage);
+	} while ((memcg = parent_mem_cgroup(memcg)) &&
+		 !mem_cgroup_is_root(memcg));
 
-		overage = usage - high;
-		overage <<= MEMCG_DELAY_PRECISION_SHIFT;
-		overage = div64_u64(overage, high);
+	return max_overage;
+}
 
-		if (overage > max_overage)
-			max_overage = overage;
+static u64 swap_find_max_overage(struct mem_cgroup *memcg)
+{
+	u64 overage, max_overage = 0;
+
+	do {
+		overage = calculate_overage(page_counter_read(&memcg->swap),
+					    READ_ONCE(memcg->swap.high));
+		if (overage)
+			memcg_memory_event(memcg, MEMCG_SWAP_HIGH);
+		max_overage = max(overage, max_overage);
 	} while ((memcg = parent_mem_cgroup(memcg)) &&
 		 !mem_cgroup_is_root(memcg));
 
+	return max_overage;
+}
+
+/*
+ * Get the number of jiffies that we should penalise a mischievous cgroup which
+ * is exceeding its memory.high by checking both it and its ancestors.
+ */
+static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
+					  unsigned int nr_pages,
+					  u64 max_overage)
+{
+	unsigned long penalty_jiffies;
+
 	if (!max_overage)
 		return 0;
 
@@ -2377,14 +2403,7 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
 	 * MEMCG_CHARGE_BATCH pages is nominal, so work out how much smaller or
 	 * larger the current charge patch is than that.
 	 */
-	penalty_jiffies = penalty_jiffies * nr_pages / MEMCG_CHARGE_BATCH;
-
-	/*
-	 * Clamp the max delay per usermode return so as to still keep the
-	 * application moving forwards and also permit diagnostics, albeit
-	 * extremely slowly.
-	 */
-	return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+	return penalty_jiffies * nr_pages / MEMCG_CHARGE_BATCH;
 }
 
 /*
@@ -2409,7 +2428,18 @@ void mem_cgroup_handle_over_high(void)
 	 * memory.high is breached and reclaim is unable to keep up. Throttle
 	 * allocators proactively to slow down excessive growth.
 	 */
-	penalty_jiffies = calculate_high_delay(memcg, nr_pages);
+	penalty_jiffies = calculate_high_delay(memcg, nr_pages,
+					       mem_find_max_overage(memcg));
+
+	penalty_jiffies += calculate_high_delay(memcg, nr_pages,
+						swap_find_max_overage(memcg));
+
+	/*
+	 * Clamp the max delay per usermode return so as to still keep the
+	 * application moving forwards and also permit diagnostics, albeit
+	 * extremely slowly.
+	 */
+	penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
 
 	/*
 	 * Don't sleep if the amount of jiffies this memcg owes us is so low
@@ -2594,12 +2624,32 @@ done_restock:
 	 * reclaim, the cost of mismatch is negligible.
 	 */
 	do {
-		if (page_counter_read(&memcg->memory) > READ_ONCE(memcg->high)) {
-			/* Don't bother a random interrupted task */
-			if (in_interrupt()) {
+		bool mem_high, swap_high;
+
+		mem_high = page_counter_read(&memcg->memory) >
+			READ_ONCE(memcg->memory.high);
+		swap_high = page_counter_read(&memcg->swap) >
+			READ_ONCE(memcg->swap.high);
+
+		/* Don't bother a random interrupted task */
+		if (in_interrupt()) {
+			if (mem_high) {
 				schedule_work(&memcg->high_work);
 				break;
 			}
+			continue;
+		}
+
+		if (mem_high || swap_high) {
+			/*
+			 * The allocating tasks in this cgroup will need to do
+			 * reclaim or be throttled to prevent further growth
+			 * of the memory or swap footprints.
+			 *
+			 * Target some best-effort fairness between the tasks,
+			 * and distribute reclaim work and delay penalties
+			 * based on how much each task is actually allocating.
+			 */
 			current->memcg_nr_pages_over_high += batch;
 			set_notify_resume(current);
 			break;
@@ -2802,7 +2852,12 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
 
 static inline bool memcg_kmem_bypass(void)
 {
-	if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+	if (in_interrupt())
+		return true;
+
+	/* Allow remote memcg charging in kthread contexts. */
+	if ((!current->mm || (current->flags & PF_KTHREAD)) &&
+	     !current->active_memcg)
 		return true;
 	return false;
 }
@@ -4330,7 +4385,6 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 
 	*pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
 
-	/* this should eventually include NR_UNSTABLE_NFS */
 	*pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
 	*pfilepages = memcg_exact_page_state(memcg, NR_INACTIVE_FILE) +
 			memcg_exact_page_state(memcg, NR_ACTIVE_FILE);
@@ -4338,7 +4392,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 
 	while ((parent = parent_mem_cgroup(memcg))) {
 		unsigned long ceiling = min(READ_ONCE(memcg->memory.max),
-					    READ_ONCE(memcg->high));
+					    READ_ONCE(memcg->memory.high));
 		unsigned long used = page_counter_read(&memcg->memory);
 
 		*pheadroom = min(*pheadroom, ceiling - min(ceiling, used));
@@ -5063,8 +5117,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (IS_ERR(memcg))
 		return ERR_CAST(memcg);
 
-	WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
+	page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
 	memcg->soft_limit = PAGE_COUNTER_MAX;
+	page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
 	if (parent) {
 		memcg->swappiness = mem_cgroup_swappiness(parent);
 		memcg->oom_kill_disable = parent->oom_kill_disable;
@@ -5216,8 +5271,9 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 	page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
 	page_counter_set_min(&memcg->memory, 0);
 	page_counter_set_low(&memcg->memory, 0);
-	WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
+	page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
 	memcg->soft_limit = PAGE_COUNTER_MAX;
+	page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
 	memcg_wb_domain_size_changed(memcg);
 }
 
@@ -6015,7 +6071,8 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
 
 static int memory_high_show(struct seq_file *m, void *v)
 {
-	return seq_puts_memcg_tunable(m, READ_ONCE(mem_cgroup_from_seq(m)->high));
+	return seq_puts_memcg_tunable(m,
+		READ_ONCE(mem_cgroup_from_seq(m)->memory.high));
 }
 
 static ssize_t memory_high_write(struct kernfs_open_file *of,
@@ -6032,7 +6089,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
 	if (err)
 		return err;
 
-	WRITE_ONCE(memcg->high, high);
+	page_counter_set_high(&memcg->memory, high);
 
 	for (;;) {
 		unsigned long nr_pages = page_counter_read(&memcg->memory);
@@ -6227,7 +6284,6 @@ static struct cftype memory_files[] = {
 	},
 	{
 		.name = "stat",
-		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = memory_stat_show,
 	},
 	{
@@ -7131,10 +7187,13 @@ bool mem_cgroup_swap_full(struct page *page)
 	if (!memcg)
 		return false;
 
-	for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg))
-		if (page_counter_read(&memcg->swap) * 2 >=
-		    READ_ONCE(memcg->swap.max))
+	for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) {
+		unsigned long usage = page_counter_read(&memcg->swap);
+
+		if (usage * 2 >= READ_ONCE(memcg->swap.high) ||
+		    usage * 2 >= READ_ONCE(memcg->swap.max))
 			return true;
+	}
 
 	return false;
 }
@@ -7164,6 +7223,29 @@ static u64 swap_current_read(struct cgroup_subsys_state *css,
 	return (u64)page_counter_read(&memcg->swap) * PAGE_SIZE;
 }
 
+static int swap_high_show(struct seq_file *m, void *v)
+{
+	return seq_puts_memcg_tunable(m,
+		READ_ONCE(mem_cgroup_from_seq(m)->swap.high));
+}
+
+static ssize_t swap_high_write(struct kernfs_open_file *of,
+			       char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned long high;
+	int err;
+
+	buf = strstrip(buf);
+	err = page_counter_memparse(buf, "max", &high);
+	if (err)
+		return err;
+
+	page_counter_set_high(&memcg->swap, high);
+
+	return nbytes;
+}
+
 static int swap_max_show(struct seq_file *m, void *v)
 {
 	return seq_puts_memcg_tunable(m,
@@ -7191,6 +7273,8 @@ static int swap_events_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
+	seq_printf(m, "high %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]));
 	seq_printf(m, "max %lu\n",
 		   atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]));
 	seq_printf(m, "fail %lu\n",
@@ -7206,6 +7290,12 @@ static struct cftype swap_files[] = {
 		.read_u64 = swap_current_read,
 	},
 	{
+		.name = "swap.high",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = swap_high_show,
+		.write = swap_high_write,
+	},
+	{
 		.name = "swap.max",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = swap_max_show,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a96364be8ab4..ababa368cb68 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -210,14 +210,17 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
 {
 	struct task_struct *t = tk->tsk;
 	short addr_lsb = tk->size_shift;
-	int ret;
+	int ret = 0;
 
-	pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
-		pfn, t->comm, t->pid);
+	if ((t->mm == current->mm) || !(flags & MF_ACTION_REQUIRED))
+		pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
+			pfn, t->comm, t->pid);
 
-	if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
-		ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)tk->addr,
-				       addr_lsb);
+	if (flags & MF_ACTION_REQUIRED) {
+		if (t->mm == current->mm)
+			ret = force_sig_mceerr(BUS_MCEERR_AR,
+					 (void __user *)tk->addr, addr_lsb);
+		/* send no signal to non-current processes */
 	} else {
 		/*
 		 * Don't use force here, it's convenient if the signal
@@ -1493,7 +1496,7 @@ static void memory_failure_work_func(struct work_struct *work)
 	unsigned long proc_flags;
 	int gotten;
 
-	mf_cpu = this_cpu_ptr(&memory_failure_cpu);
+	mf_cpu = container_of(work, struct memory_failure_cpu, work);
 	for (;;) {
 		spin_lock_irqsave(&mf_cpu->lock, proc_flags);
 		gotten = kfifo_get(&mf_cpu->fifo, &entry);
@@ -1507,6 +1510,19 @@ static void memory_failure_work_func(struct work_struct *work)
 	}
 }
 
+/*
+ * Process memory_failure work queued on the specified CPU.
+ * Used to avoid return-to-userspace racing with the memory_failure workqueue.
+ */
+void memory_failure_queue_kick(int cpu)
+{
+	struct memory_failure_cpu *mf_cpu;
+
+	mf_cpu = &per_cpu(memory_failure_cpu, cpu);
+	cancel_work_sync(&mf_cpu->work);
+	memory_failure_work_func(&mf_cpu->work);
+}
+
 static int __init memory_failure_init(void)
 {
 	struct memory_failure_cpu *mf_cpu;
diff --git a/mm/memory.c b/mm/memory.c
index f703fe8c8346..a0e21e2c571e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -802,8 +802,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		get_page(page);
 		page_dup_rmap(page, false);
 		rss[mm_counter(page)]++;
-	} else if (pte_devmap(pte)) {
-		page = pte_page(pte);
 	}
 
 out_set_pte:
@@ -2436,10 +2434,9 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
 		if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
 			/*
 			 * Other thread has already handled the fault
-			 * and we don't need to do anything. If it's
-			 * not the case, the fault will be triggered
-			 * again on the same address.
+			 * and update local tlb only
 			 */
+			update_mmu_tlb(vma, addr, vmf->pte);
 			ret = false;
 			goto pte_unlock;
 		}
@@ -2463,7 +2460,8 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
 		locked = true;
 		if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
-			/* The PTE changed under us. Retry page fault. */
+			/* The PTE changed under us, update local tlb */
+			update_mmu_tlb(vma, addr, vmf->pte);
 			ret = false;
 			goto pte_unlock;
 		}
@@ -2704,6 +2702,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 		}
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
+		entry = pte_sw_mkyoung(entry);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		/*
 		 * Clear the pte entry and flush it first, before updating the
@@ -2752,6 +2751,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 		new_page = old_page;
 		page_copied = 1;
 	} else {
+		update_mmu_tlb(vma, vmf->address, vmf->pte);
 		mem_cgroup_cancel_charge(new_page, memcg, false);
 	}
 
@@ -2812,6 +2812,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
 	 * pte_offset_map_lock.
 	 */
 	if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+		update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		return VM_FAULT_NOPAGE;
 	}
@@ -2936,6 +2937,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					vmf->address, &vmf->ptl);
 			if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+				update_mmu_tlb(vma, vmf->address, vmf->pte);
 				unlock_page(vmf->page);
 				pte_unmap_unlock(vmf->pte, vmf->ptl);
 				put_page(vmf->page);
@@ -3341,8 +3343,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 						vma->vm_page_prot));
 		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 				vmf->address, &vmf->ptl);
-		if (!pte_none(*vmf->pte))
+		if (!pte_none(*vmf->pte)) {
+			update_mmu_tlb(vma, vmf->address, vmf->pte);
 			goto unlock;
+		}
 		ret = check_stable_address_space(vma->vm_mm);
 		if (ret)
 			goto unlock;
@@ -3373,13 +3377,16 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	__SetPageUptodate(page);
 
 	entry = mk_pte(page, vma->vm_page_prot);
+	entry = pte_sw_mkyoung(entry);
 	if (vma->vm_flags & VM_WRITE)
 		entry = pte_mkwrite(pte_mkdirty(entry));
 
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 			&vmf->ptl);
-	if (!pte_none(*vmf->pte))
+	if (!pte_none(*vmf->pte)) {
+		update_mmu_cache(vma, vmf->address, vmf->pte);
 		goto release;
+	}
 
 	ret = check_stable_address_space(vma->vm_mm);
 	if (ret)
@@ -3646,11 +3653,14 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 	}
 
 	/* Re-check under ptl */
-	if (unlikely(!pte_none(*vmf->pte)))
+	if (unlikely(!pte_none(*vmf->pte))) {
+		update_mmu_tlb(vma, vmf->address, vmf->pte);
 		return VM_FAULT_NOPAGE;
+	}
 
 	flush_icache_page(vma, page);
 	entry = mk_pte(page, vma->vm_page_prot);
+	entry = pte_sw_mkyoung(entry);
 	if (write)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 	/* copy-on-write page */
@@ -4224,8 +4234,10 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 	vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
 	spin_lock(vmf->ptl);
 	entry = vmf->orig_pte;
-	if (unlikely(!pte_same(*vmf->pte, entry)))
+	if (unlikely(!pte_same(*vmf->pte, entry))) {
+		update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
 		goto unlock;
+	}
 	if (vmf->flags & FAULT_FLAG_WRITE) {
 		if (!pte_write(entry))
 			return do_wp_page(vmf);
diff --git a/mm/migrate.c b/mm/migrate.c
index 7160c1556f79..846af96b84a5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -797,11 +797,7 @@ recheck_buffers:
 	if (rc != MIGRATEPAGE_SUCCESS)
 		goto unlock_buffers;
 
-	ClearPagePrivate(page);
-	set_page_private(newpage, page_private(page));
-	set_page_private(page, 0);
-	put_page(page);
-	get_page(newpage);
+	attach_page_private(newpage, detach_page_private(page));
 
 	bh = head;
 	do {
@@ -810,8 +806,6 @@ recheck_buffers:
 
 	} while (bh != head);
 
-	SetPagePrivate(newpage);
-
 	if (mode != MIGRATE_SYNC_NO_COPY)
 		migrate_page_copy(newpage, page);
 	else
@@ -1032,7 +1026,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		 * to the LRU. Later, when the IO completes the pages are
 		 * marked uptodate and unlocked. However, the queueing
 		 * could be merging multiple pages for one bio (e.g.
-		 * mpage_readpages). If an allocation happens for the
+		 * mpage_readahead). If an allocation happens for the
 		 * second or third page, the process can end up locking
 		 * the same page twice and deadlocking. Rather than
 		 * trying to be clever about what pages can be locked,
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 7da6991d9435..435e5f794b3b 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -67,26 +67,30 @@ void __init mminit_verify_pageflags_layout(void)
 	unsigned long or_mask, add_mask;
 
 	shift = 8 * sizeof(unsigned long);
-	width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - LAST_CPUPID_SHIFT;
+	width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
+		- LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH;
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
-		"Section %d Node %d Zone %d Lastcpupid %d Flags %d\n",
+		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n",
 		SECTIONS_WIDTH,
 		NODES_WIDTH,
 		ZONES_WIDTH,
 		LAST_CPUPID_WIDTH,
+		KASAN_TAG_WIDTH,
 		NR_PAGEFLAGS);
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
-		"Section %d Node %d Zone %d Lastcpupid %d\n",
+		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n",
 		SECTIONS_SHIFT,
 		NODES_SHIFT,
 		ZONES_SHIFT,
-		LAST_CPUPID_SHIFT);
+		LAST_CPUPID_SHIFT,
+		KASAN_TAG_WIDTH);
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_pgshifts",
-		"Section %lu Node %lu Zone %lu Lastcpupid %lu\n",
+		"Section %lu Node %lu Zone %lu Lastcpupid %lu Kasantag %lu\n",
 		(unsigned long)SECTIONS_PGSHIFT,
 		(unsigned long)NODES_PGSHIFT,
 		(unsigned long)ZONES_PGSHIFT,
-		(unsigned long)LAST_CPUPID_PGSHIFT);
+		(unsigned long)LAST_CPUPID_PGSHIFT,
+		(unsigned long)KASAN_TAG_PGSHIFT);
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodezoneid",
 		"Node/Zone ID: %lu -> %lu\n",
 		(unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT),
diff --git a/mm/nommu.c b/mm/nommu.c
index 318df4e236c9..dfae55f41901 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -140,7 +140,7 @@ void vfree(const void *addr)
 }
 EXPORT_SYMBOL(vfree);
 
-void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
+void *__vmalloc(unsigned long size, gfp_t gfp_mask)
 {
 	/*
 	 *  You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc()
@@ -150,16 +150,25 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 }
 EXPORT_SYMBOL(__vmalloc);
 
-void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags)
+void *__vmalloc_node_range(unsigned long size, unsigned long align,
+		unsigned long start, unsigned long end, gfp_t gfp_mask,
+		pgprot_t prot, unsigned long vm_flags, int node,
+		const void *caller)
 {
-	return __vmalloc(size, flags, PAGE_KERNEL);
+	return __vmalloc(size, gfp_mask);
+}
+
+void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
+		int node, const void *caller)
+{
+	return __vmalloc(size, gfp_mask);
 }
 
 static void *__vmalloc_user_flags(unsigned long size, gfp_t flags)
 {
 	void *ret;
 
-	ret = __vmalloc(size, flags, PAGE_KERNEL);
+	ret = __vmalloc(size, flags);
 	if (ret) {
 		struct vm_area_struct *vma;
 
@@ -179,12 +188,6 @@ void *vmalloc_user(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc_user);
 
-void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags)
-{
-	return __vmalloc_user_flags(size, flags | __GFP_ZERO);
-}
-EXPORT_SYMBOL(vmalloc_user_node_flags);
-
 struct page *vmalloc_to_page(const void *addr)
 {
 	return virt_to_page(addr);
@@ -230,7 +233,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
  */
 void *vmalloc(unsigned long size)
 {
-       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
+       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM);
 }
 EXPORT_SYMBOL(vmalloc);
 
@@ -248,8 +251,7 @@ EXPORT_SYMBOL(vmalloc);
  */
 void *vzalloc(unsigned long size)
 {
-	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
-			PAGE_KERNEL);
+	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
 }
 EXPORT_SYMBOL(vzalloc);
 
@@ -302,7 +304,7 @@ EXPORT_SYMBOL(vzalloc_node);
 
 void *vmalloc_exec(unsigned long size)
 {
-	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
+	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM);
 }
 
 /**
@@ -314,7 +316,7 @@ void *vmalloc_exec(unsigned long size)
  */
 void *vmalloc_32(unsigned long size)
 {
-	return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
+	return __vmalloc(size, GFP_KERNEL);
 }
 EXPORT_SYMBOL(vmalloc_32);
 
@@ -351,7 +353,7 @@ void vunmap(const void *addr)
 }
 EXPORT_SYMBOL(vunmap);
 
-void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
+void *vm_map_ram(struct page **pages, unsigned int count, int node)
 {
 	BUG();
 	return NULL;
@@ -369,18 +371,6 @@ void vm_unmap_aliases(void)
 }
 EXPORT_SYMBOL_GPL(vm_unmap_aliases);
 
-/*
- * Implement a stub for vmalloc_sync_[un]mapping() if the architecture
- * chose not to have one.
- */
-void __weak vmalloc_sync_mappings(void)
-{
-}
-
-void __weak vmalloc_sync_unmappings(void)
-{
-}
-
 struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
 {
 	BUG();
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d3ee4c4dafac..d79ed1f88c7a 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -387,8 +387,7 @@ static unsigned long global_dirtyable_memory(void)
  * Calculate @dtc->thresh and ->bg_thresh considering
  * vm_dirty_{bytes|ratio} and dirty_background_{bytes|ratio}.  The caller
  * must ensure that @dtc->avail is set before calling this function.  The
- * dirty limits will be lifted by 1/4 for PF_LESS_THROTTLE (ie. nfsd) and
- * real-time tasks.
+ * dirty limits will be lifted by 1/4 for real-time tasks.
  */
 static void domain_dirty_limits(struct dirty_throttle_control *dtc)
 {
@@ -436,7 +435,7 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
 	if (bg_thresh >= thresh)
 		bg_thresh = thresh / 2;
 	tsk = current;
-	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
+	if (rt_task(tsk)) {
 		bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
 		thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
 	}
@@ -486,7 +485,7 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat)
 	else
 		dirty = vm_dirty_ratio * node_memory / 100;
 
-	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
+	if (rt_task(tsk))
 		dirty += dirty / 4;
 
 	return dirty;
@@ -505,7 +504,6 @@ bool node_dirty_ok(struct pglist_data *pgdat)
 	unsigned long nr_pages = 0;
 
 	nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
-	nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
 	nr_pages += node_page_state(pgdat, NR_WRITEBACK);
 
 	return nr_pages <= limit;
@@ -755,7 +753,7 @@ static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
  * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
  *
  * Return: @wb's dirty limit in pages. The term "dirty" in the context of
- * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
+ * dirty balancing includes all PG_dirty and PG_writeback pages.
  */
 static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
 {
@@ -1563,7 +1561,7 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 	struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
 						     &mdtc_stor : NULL;
 	struct dirty_throttle_control *sdtc;
-	unsigned long nr_reclaimable;	/* = file_dirty + unstable_nfs */
+	unsigned long nr_reclaimable;	/* = file_dirty */
 	long period;
 	long pause;
 	long max_pause;
@@ -1583,14 +1581,7 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 		unsigned long m_thresh = 0;
 		unsigned long m_bg_thresh = 0;
 
-		/*
-		 * Unstable writes are a feature of certain networked
-		 * filesystems (i.e. NFS) in which data may have been
-		 * written to the server's write cache, but has not yet
-		 * been flushed to permanent storage.
-		 */
-		nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
-					global_node_page_state(NR_UNSTABLE_NFS);
+		nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
 		gdtc->avail = global_dirtyable_memory();
 		gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
 
@@ -1649,8 +1640,12 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 		if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
 		    (!mdtc ||
 		     m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
-			unsigned long intv = dirty_poll_interval(dirty, thresh);
-			unsigned long m_intv = ULONG_MAX;
+			unsigned long intv;
+			unsigned long m_intv;
+
+free_running:
+			intv = dirty_poll_interval(dirty, thresh);
+			m_intv = ULONG_MAX;
 
 			current->dirty_paused_when = now;
 			current->nr_dirtied = 0;
@@ -1669,9 +1664,20 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 		 * Calculate global domain's pos_ratio and select the
 		 * global dtc by default.
 		 */
-		if (!strictlimit)
+		if (!strictlimit) {
 			wb_dirty_limits(gdtc);
 
+			if ((current->flags & PF_LOCAL_THROTTLE) &&
+			    gdtc->wb_dirty <
+			    dirty_freerun_ceiling(gdtc->wb_thresh,
+						  gdtc->wb_bg_thresh))
+				/*
+				 * LOCAL_THROTTLE tasks must not be throttled
+				 * when below the per-wb freerun ceiling.
+				 */
+				goto free_running;
+		}
+
 		dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
 			((gdtc->dirty > gdtc->thresh) || strictlimit);
 
@@ -1685,9 +1691,20 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 			 * both global and memcg domains.  Choose the one
 			 * w/ lower pos_ratio.
 			 */
-			if (!strictlimit)
+			if (!strictlimit) {
 				wb_dirty_limits(mdtc);
 
+				if ((current->flags & PF_LOCAL_THROTTLE) &&
+				    mdtc->wb_dirty <
+				    dirty_freerun_ceiling(mdtc->wb_thresh,
+							  mdtc->wb_bg_thresh))
+					/*
+					 * LOCAL_THROTTLE tasks must not be
+					 * throttled when below the per-wb
+					 * freerun ceiling.
+					 */
+					goto free_running;
+			}
 			dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
 				((mdtc->dirty > mdtc->thresh) || strictlimit);
 
@@ -1934,8 +1951,7 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
 	 * as we're trying to decide whether to put more under writeback.
 	 */
 	gdtc->avail = global_dirtyable_memory();
-	gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
-		      global_node_page_state(NR_UNSTABLE_NFS);
+	gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
 	domain_dirty_limits(gdtc);
 
 	if (gdtc->dirty > gdtc->bg_thresh)
@@ -2160,7 +2176,6 @@ int write_cache_pages(struct address_space *mapping,
 	int error;
 	struct pagevec pvec;
 	int nr_pages;
-	pgoff_t uninitialized_var(writeback_index);
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	pgoff_t done_index;
@@ -2169,8 +2184,7 @@ int write_cache_pages(struct address_space *mapping,
 
 	pagevec_init(&pvec);
 	if (wbc->range_cyclic) {
-		writeback_index = mapping->writeback_index; /* prev offset */
-		index = writeback_index;
+		index = mapping->writeback_index; /* prev offset */
 		end = -1;
 	} else {
 		index = wbc->range_start >> PAGE_SHIFT;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b7e1b76284f9..843728e60f22 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5319,7 +5319,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 
 	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
 		" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
-		" unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+		" unevictable:%lu dirty:%lu writeback:%lu\n"
 		" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
 		" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
 		" free:%lu free_pcp:%lu free_cma:%lu\n",
@@ -5332,7 +5332,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 		global_node_page_state(NR_UNEVICTABLE),
 		global_node_page_state(NR_FILE_DIRTY),
 		global_node_page_state(NR_WRITEBACK),
-		global_node_page_state(NR_UNSTABLE_NFS),
 		global_node_page_state(NR_SLAB_RECLAIMABLE),
 		global_node_page_state(NR_SLAB_UNRECLAIMABLE),
 		global_node_page_state(NR_FILE_MAPPED),
@@ -5365,7 +5364,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			" anon_thp: %lukB"
 #endif
 			" writeback_tmp:%lukB"
-			" unstable:%lukB"
 			" all_unreclaimable? %s"
 			"\n",
 			pgdat->node_id,
@@ -5387,7 +5385,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
 #endif
 			K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
-			K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
 			pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
 				"yes" : "no");
 	}
@@ -5420,6 +5417,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			" managed:%lukB"
 			" mlocked:%lukB"
 			" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+			" shadow_call_stack:%lukB"
+#endif
 			" pagetables:%lukB"
 			" bounce:%lukB"
 			" free_pcp:%lukB"
@@ -5442,6 +5442,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			K(zone_managed_pages(zone)),
 			K(zone_page_state(zone, NR_MLOCK)),
 			zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+			zone_page_state(zone, NR_KERNEL_SCS_KB),
+#endif
 			K(zone_page_state(zone, NR_PAGETABLE)),
 			K(zone_page_state(zone, NR_BOUNCE)),
 			K(free_pcp),
@@ -8225,7 +8228,7 @@ void *__init alloc_large_system_hash(const char *tablename,
 				table = memblock_alloc_raw(size,
 							   SMP_CACHE_BYTES);
 		} else if (get_order(size) >= MAX_ORDER || hashdist) {
-			table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+			table = __vmalloc(size, gfp_flags);
 			virt = true;
 		} else {
 			/*
diff --git a/mm/percpu.c b/mm/percpu.c
index 7da7d7737dab..696367b18222 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -482,7 +482,7 @@ static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
 	if (size <= PAGE_SIZE)
 		return kzalloc(size, gfp);
 	else
-		return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
+		return __vmalloc(size, gfp | __GFP_ZERO);
 }
 
 /**
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 26208d0d03b7..f4ce916f5602 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -36,6 +36,9 @@ static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
 		return note_kasan_page_table(walk, addr);
 #endif
 
+	if (st->effective_prot)
+		st->effective_prot(st, 0, pgd_val(val));
+
 	if (pgd_leaf(val))
 		st->note_page(st, addr, 0, pgd_val(val));
 
@@ -53,6 +56,9 @@ static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
 		return note_kasan_page_table(walk, addr);
 #endif
 
+	if (st->effective_prot)
+		st->effective_prot(st, 1, p4d_val(val));
+
 	if (p4d_leaf(val))
 		st->note_page(st, addr, 1, p4d_val(val));
 
@@ -70,6 +76,9 @@ static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
 		return note_kasan_page_table(walk, addr);
 #endif
 
+	if (st->effective_prot)
+		st->effective_prot(st, 2, pud_val(val));
+
 	if (pud_leaf(val))
 		st->note_page(st, addr, 2, pud_val(val));
 
@@ -87,6 +96,8 @@ static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
 		return note_kasan_page_table(walk, addr);
 #endif
 
+	if (st->effective_prot)
+		st->effective_prot(st, 3, pmd_val(val));
 	if (pmd_leaf(val))
 		st->note_page(st, addr, 3, pmd_val(val));
 
@@ -97,8 +108,12 @@ static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
 			    unsigned long next, struct mm_walk *walk)
 {
 	struct ptdump_state *st = walk->private;
+	pte_t val = READ_ONCE(*pte);
+
+	if (st->effective_prot)
+		st->effective_prot(st, 4, pte_val(val));
 
-	st->note_page(st, addr, 4, pte_val(READ_ONCE(*pte)));
+	st->note_page(st, addr, 4, pte_val(val));
 
 	return 0;
 }
diff --git a/mm/readahead.c b/mm/readahead.c
index 2fe72cd29b47..3c9a8dd7c56c 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -22,6 +22,7 @@
 #include <linux/mm_inline.h>
 #include <linux/blk-cgroup.h>
 #include <linux/fadvise.h>
+#include <linux/sched/mm.h>
 
 #include "internal.h"
 
@@ -113,94 +114,126 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
 
 EXPORT_SYMBOL(read_cache_pages);
 
-static int read_pages(struct address_space *mapping, struct file *filp,
-		struct list_head *pages, unsigned int nr_pages, gfp_t gfp)
+static void read_pages(struct readahead_control *rac, struct list_head *pages,
+		bool skip_page)
 {
+	const struct address_space_operations *aops = rac->mapping->a_ops;
+	struct page *page;
 	struct blk_plug plug;
-	unsigned page_idx;
-	int ret;
+
+	if (!readahead_count(rac))
+		goto out;
 
 	blk_start_plug(&plug);
 
-	if (mapping->a_ops->readpages) {
-		ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
+	if (aops->readahead) {
+		aops->readahead(rac);
+		/* Clean up the remaining pages */
+		while ((page = readahead_page(rac))) {
+			unlock_page(page);
+			put_page(page);
+		}
+	} else if (aops->readpages) {
+		aops->readpages(rac->file, rac->mapping, pages,
+				readahead_count(rac));
 		/* Clean up the remaining pages */
 		put_pages_list(pages);
-		goto out;
-	}
-
-	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
-		struct page *page = lru_to_page(pages);
-		list_del(&page->lru);
-		if (!add_to_page_cache_lru(page, mapping, page->index, gfp))
-			mapping->a_ops->readpage(filp, page);
-		put_page(page);
+		rac->_index += rac->_nr_pages;
+		rac->_nr_pages = 0;
+	} else {
+		while ((page = readahead_page(rac))) {
+			aops->readpage(rac->file, page);
+			put_page(page);
+		}
 	}
-	ret = 0;
 
-out:
 	blk_finish_plug(&plug);
 
-	return ret;
+	BUG_ON(!list_empty(pages));
+	BUG_ON(readahead_count(rac));
+
+out:
+	if (skip_page)
+		rac->_index++;
 }
 
-/*
- * __do_page_cache_readahead() actually reads a chunk of disk.  It allocates
- * the pages first, then submits them for I/O. This avoids the very bad
- * behaviour which would occur if page allocations are causing VM writeback.
- * We really don't want to intermingle reads and writes like that.
+/**
+ * page_cache_readahead_unbounded - Start unchecked readahead.
+ * @mapping: File address space.
+ * @file: This instance of the open file; used for authentication.
+ * @index: First page index to read.
+ * @nr_to_read: The number of pages to read.
+ * @lookahead_size: Where to start the next readahead.
  *
- * Returns the number of pages requested, or the maximum amount of I/O allowed.
+ * This function is for filesystems to call when they want to start
+ * readahead beyond a file's stated i_size.  This is almost certainly
+ * not the function you want to call.  Use page_cache_async_readahead()
+ * or page_cache_sync_readahead() instead.
+ *
+ * Context: File is referenced by caller.  Mutexes may be held by caller.
+ * May sleep, but will not reenter filesystem to reclaim memory.
  */
-unsigned int __do_page_cache_readahead(struct address_space *mapping,
-		struct file *filp, pgoff_t offset, unsigned long nr_to_read,
+void page_cache_readahead_unbounded(struct address_space *mapping,
+		struct file *file, pgoff_t index, unsigned long nr_to_read,
 		unsigned long lookahead_size)
 {
-	struct inode *inode = mapping->host;
-	struct page *page;
-	unsigned long end_index;	/* The last page we want to read */
 	LIST_HEAD(page_pool);
-	int page_idx;
-	unsigned int nr_pages = 0;
-	loff_t isize = i_size_read(inode);
 	gfp_t gfp_mask = readahead_gfp_mask(mapping);
+	struct readahead_control rac = {
+		.mapping = mapping,
+		.file = file,
+		._index = index,
+	};
+	unsigned long i;
 
-	if (isize == 0)
-		goto out;
-
-	end_index = ((isize - 1) >> PAGE_SHIFT);
+	/*
+	 * Partway through the readahead operation, we will have added
+	 * locked pages to the page cache, but will not yet have submitted
+	 * them for I/O.  Adding another page may need to allocate memory,
+	 * which can trigger memory reclaim.  Telling the VM we're in
+	 * the middle of a filesystem operation will cause it to not
+	 * touch file-backed pages, preventing a deadlock.  Most (all?)
+	 * filesystems already specify __GFP_NOFS in their mapping's
+	 * gfp_mask, but let's be explicit here.
+	 */
+	unsigned int nofs = memalloc_nofs_save();
 
 	/*
 	 * Preallocate as many pages as we will need.
 	 */
-	for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
-		pgoff_t page_offset = offset + page_idx;
+	for (i = 0; i < nr_to_read; i++) {
+		struct page *page = xa_load(&mapping->i_pages, index + i);
 
-		if (page_offset > end_index)
-			break;
+		BUG_ON(index + i != rac._index + rac._nr_pages);
 
-		page = xa_load(&mapping->i_pages, page_offset);
 		if (page && !xa_is_value(page)) {
 			/*
-			 * Page already present?  Kick off the current batch of
-			 * contiguous pages before continuing with the next
-			 * batch.
+			 * Page already present?  Kick off the current batch
+			 * of contiguous pages before continuing with the
+			 * next batch.  This page may be the one we would
+			 * have intended to mark as Readahead, but we don't
+			 * have a stable reference to this page, and it's
+			 * not worth getting one just for that.
 			 */
-			if (nr_pages)
-				read_pages(mapping, filp, &page_pool, nr_pages,
-						gfp_mask);
-			nr_pages = 0;
+			read_pages(&rac, &page_pool, true);
 			continue;
 		}
 
 		page = __page_cache_alloc(gfp_mask);
 		if (!page)
 			break;
-		page->index = page_offset;
-		list_add(&page->lru, &page_pool);
-		if (page_idx == nr_to_read - lookahead_size)
+		if (mapping->a_ops->readpages) {
+			page->index = index + i;
+			list_add(&page->lru, &page_pool);
+		} else if (add_to_page_cache_lru(page, mapping, index + i,
+					gfp_mask) < 0) {
+			put_page(page);
+			read_pages(&rac, &page_pool, true);
+			continue;
+		}
+		if (i == nr_to_read - lookahead_size)
 			SetPageReadahead(page);
-		nr_pages++;
+		rac._nr_pages++;
 	}
 
 	/*
@@ -208,26 +241,53 @@ unsigned int __do_page_cache_readahead(struct address_space *mapping,
 	 * uptodate then the caller will launch readpage again, and
 	 * will then handle the error.
 	 */
-	if (nr_pages)
-		read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask);
-	BUG_ON(!list_empty(&page_pool));
-out:
-	return nr_pages;
+	read_pages(&rac, &page_pool, false);
+	memalloc_nofs_restore(nofs);
+}
+EXPORT_SYMBOL_GPL(page_cache_readahead_unbounded);
+
+/*
+ * __do_page_cache_readahead() actually reads a chunk of disk.  It allocates
+ * the pages first, then submits them for I/O. This avoids the very bad
+ * behaviour which would occur if page allocations are causing VM writeback.
+ * We really don't want to intermingle reads and writes like that.
+ */
+void __do_page_cache_readahead(struct address_space *mapping,
+		struct file *file, pgoff_t index, unsigned long nr_to_read,
+		unsigned long lookahead_size)
+{
+	struct inode *inode = mapping->host;
+	loff_t isize = i_size_read(inode);
+	pgoff_t end_index;	/* The last page we want to read */
+
+	if (isize == 0)
+		return;
+
+	end_index = (isize - 1) >> PAGE_SHIFT;
+	if (index > end_index)
+		return;
+	/* Don't read past the page containing the last byte of the file */
+	if (nr_to_read > end_index - index)
+		nr_to_read = end_index - index + 1;
+
+	page_cache_readahead_unbounded(mapping, file, index, nr_to_read,
+			lookahead_size);
 }
 
 /*
  * Chunk the readahead into 2 megabyte units, so that we don't pin too much
  * memory at once.
  */
-int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
-			       pgoff_t offset, unsigned long nr_to_read)
+void force_page_cache_readahead(struct address_space *mapping,
+		struct file *filp, pgoff_t index, unsigned long nr_to_read)
 {
 	struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
 	struct file_ra_state *ra = &filp->f_ra;
 	unsigned long max_pages;
 
-	if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
-		return -EINVAL;
+	if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages &&
+			!mapping->a_ops->readahead))
+		return;
 
 	/*
 	 * If the request exceeds the readahead window, allow the read to
@@ -240,12 +300,11 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 
 		if (this_chunk > nr_to_read)
 			this_chunk = nr_to_read;
-		__do_page_cache_readahead(mapping, filp, offset, this_chunk, 0);
+		__do_page_cache_readahead(mapping, filp, index, this_chunk, 0);
 
-		offset += this_chunk;
+		index += this_chunk;
 		nr_to_read -= this_chunk;
 	}
-	return 0;
 }
 
 /*
@@ -324,21 +383,21 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
  */
 
 /*
- * Count contiguously cached pages from @offset-1 to @offset-@max,
+ * Count contiguously cached pages from @index-1 to @index-@max,
  * this count is a conservative estimation of
  * 	- length of the sequential read sequence, or
  * 	- thrashing threshold in memory tight systems
  */
 static pgoff_t count_history_pages(struct address_space *mapping,
-				   pgoff_t offset, unsigned long max)
+				   pgoff_t index, unsigned long max)
 {
 	pgoff_t head;
 
 	rcu_read_lock();
-	head = page_cache_prev_miss(mapping, offset - 1, max);
+	head = page_cache_prev_miss(mapping, index - 1, max);
 	rcu_read_unlock();
 
-	return offset - 1 - head;
+	return index - 1 - head;
 }
 
 /*
@@ -346,13 +405,13 @@ static pgoff_t count_history_pages(struct address_space *mapping,
  */
 static int try_context_readahead(struct address_space *mapping,
 				 struct file_ra_state *ra,
-				 pgoff_t offset,
+				 pgoff_t index,
 				 unsigned long req_size,
 				 unsigned long max)
 {
 	pgoff_t size;
 
-	size = count_history_pages(mapping, offset, max);
+	size = count_history_pages(mapping, index, max);
 
 	/*
 	 * not enough history pages:
@@ -365,10 +424,10 @@ static int try_context_readahead(struct address_space *mapping,
 	 * starts from beginning of file:
 	 * it is a strong indication of long-run stream (or whole-file-read)
 	 */
-	if (size >= offset)
+	if (size >= index)
 		size *= 2;
 
-	ra->start = offset;
+	ra->start = index;
 	ra->size = min(size + req_size, max);
 	ra->async_size = 1;
 
@@ -378,16 +437,15 @@ static int try_context_readahead(struct address_space *mapping,
 /*
  * A minimal readahead algorithm for trivial sequential/random reads.
  */
-static unsigned long
-ondemand_readahead(struct address_space *mapping,
-		   struct file_ra_state *ra, struct file *filp,
-		   bool hit_readahead_marker, pgoff_t offset,
-		   unsigned long req_size)
+static void ondemand_readahead(struct address_space *mapping,
+		struct file_ra_state *ra, struct file *filp,
+		bool hit_readahead_marker, pgoff_t index,
+		unsigned long req_size)
 {
 	struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
 	unsigned long max_pages = ra->ra_pages;
 	unsigned long add_pages;
-	pgoff_t prev_offset;
+	pgoff_t prev_index;
 
 	/*
 	 * If the request exceeds the readahead window, allow the read to
@@ -399,15 +457,15 @@ ondemand_readahead(struct address_space *mapping,
 	/*
 	 * start of file
 	 */
-	if (!offset)
+	if (!index)
 		goto initial_readahead;
 
 	/*
-	 * It's the expected callback offset, assume sequential access.
+	 * It's the expected callback index, assume sequential access.
 	 * Ramp up sizes, and push forward the readahead window.
 	 */
-	if ((offset == (ra->start + ra->size - ra->async_size) ||
-	     offset == (ra->start + ra->size))) {
+	if ((index == (ra->start + ra->size - ra->async_size) ||
+	     index == (ra->start + ra->size))) {
 		ra->start += ra->size;
 		ra->size = get_next_ra_size(ra, max_pages);
 		ra->async_size = ra->size;
@@ -424,14 +482,14 @@ ondemand_readahead(struct address_space *mapping,
 		pgoff_t start;
 
 		rcu_read_lock();
-		start = page_cache_next_miss(mapping, offset + 1, max_pages);
+		start = page_cache_next_miss(mapping, index + 1, max_pages);
 		rcu_read_unlock();
 
-		if (!start || start - offset > max_pages)
-			return 0;
+		if (!start || start - index > max_pages)
+			return;
 
 		ra->start = start;
-		ra->size = start - offset;	/* old async_size */
+		ra->size = start - index;	/* old async_size */
 		ra->size += req_size;
 		ra->size = get_next_ra_size(ra, max_pages);
 		ra->async_size = ra->size;
@@ -446,28 +504,29 @@ ondemand_readahead(struct address_space *mapping,
 
 	/*
 	 * sequential cache miss
-	 * trivial case: (offset - prev_offset) == 1
-	 * unaligned reads: (offset - prev_offset) == 0
+	 * trivial case: (index - prev_index) == 1
+	 * unaligned reads: (index - prev_index) == 0
 	 */
-	prev_offset = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
-	if (offset - prev_offset <= 1UL)
+	prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
+	if (index - prev_index <= 1UL)
 		goto initial_readahead;
 
 	/*
 	 * Query the page cache and look for the traces(cached history pages)
 	 * that a sequential stream would leave behind.
 	 */
-	if (try_context_readahead(mapping, ra, offset, req_size, max_pages))
+	if (try_context_readahead(mapping, ra, index, req_size, max_pages))
 		goto readit;
 
 	/*
 	 * standalone, small random read
 	 * Read as is, and do not pollute the readahead state.
 	 */
-	return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
+	__do_page_cache_readahead(mapping, filp, index, req_size, 0);
+	return;
 
 initial_readahead:
-	ra->start = offset;
+	ra->start = index;
 	ra->size = get_init_ra_size(req_size, max_pages);
 	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
 
@@ -478,7 +537,7 @@ readit:
 	 * the resulted next readahead window into the current one.
 	 * Take care of maximum IO pages as above.
 	 */
-	if (offset == ra->start && ra->size == ra->async_size) {
+	if (index == ra->start && ra->size == ra->async_size) {
 		add_pages = get_next_ra_size(ra, max_pages);
 		if (ra->size + add_pages <= max_pages) {
 			ra->async_size = add_pages;
@@ -489,7 +548,7 @@ readit:
 		}
 	}
 
-	return ra_submit(ra, mapping, filp);
+	ra_submit(ra, mapping, filp);
 }
 
 /**
@@ -497,9 +556,8 @@ readit:
  * @mapping: address_space which holds the pagecache and I/O vectors
  * @ra: file_ra_state which holds the readahead state
  * @filp: passed on to ->readpage() and ->readpages()
- * @offset: start offset into @mapping, in pagecache page-sized units
- * @req_size: hint: total size of the read which the caller is performing in
- *            pagecache pages
+ * @index: Index of first page to be read.
+ * @req_count: Total number of pages being read by the caller.
  *
  * page_cache_sync_readahead() should be called when a cache miss happened:
  * it will submit the read.  The readahead logic may decide to piggyback more
@@ -508,7 +566,7 @@ readit:
  */
 void page_cache_sync_readahead(struct address_space *mapping,
 			       struct file_ra_state *ra, struct file *filp,
-			       pgoff_t offset, unsigned long req_size)
+			       pgoff_t index, unsigned long req_count)
 {
 	/* no read-ahead */
 	if (!ra->ra_pages)
@@ -519,12 +577,12 @@ void page_cache_sync_readahead(struct address_space *mapping,
 
 	/* be dumb */
 	if (filp && (filp->f_mode & FMODE_RANDOM)) {
-		force_page_cache_readahead(mapping, filp, offset, req_size);
+		force_page_cache_readahead(mapping, filp, index, req_count);
 		return;
 	}
 
 	/* do read-ahead */
-	ondemand_readahead(mapping, ra, filp, false, offset, req_size);
+	ondemand_readahead(mapping, ra, filp, false, index, req_count);
 }
 EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
 
@@ -533,21 +591,20 @@ EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
  * @mapping: address_space which holds the pagecache and I/O vectors
  * @ra: file_ra_state which holds the readahead state
  * @filp: passed on to ->readpage() and ->readpages()
- * @page: the page at @offset which has the PG_readahead flag set
- * @offset: start offset into @mapping, in pagecache page-sized units
- * @req_size: hint: total size of the read which the caller is performing in
- *            pagecache pages
+ * @page: The page at @index which triggered the readahead call.
+ * @index: Index of first page to be read.
+ * @req_count: Total number of pages being read by the caller.
  *
  * page_cache_async_readahead() should be called when a page is used which
- * has the PG_readahead flag; this is a marker to suggest that the application
+ * is marked as PageReadahead; this is a marker to suggest that the application
  * has used up enough of the readahead window that we should start pulling in
  * more pages.
  */
 void
 page_cache_async_readahead(struct address_space *mapping,
 			   struct file_ra_state *ra, struct file *filp,
-			   struct page *page, pgoff_t offset,
-			   unsigned long req_size)
+			   struct page *page, pgoff_t index,
+			   unsigned long req_count)
 {
 	/* no read-ahead */
 	if (!ra->ra_pages)
@@ -571,7 +628,7 @@ page_cache_async_readahead(struct address_space *mapping,
 		return;
 
 	/* do read-ahead */
-	ondemand_readahead(mapping, ra, filp, true, offset, req_size);
+	ondemand_readahead(mapping, ra, filp, true, index, req_count);
 }
 EXPORT_SYMBOL_GPL(page_cache_async_readahead);
 
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 23c7500eea7d..9e72ba224175 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1303,7 +1303,8 @@ void __init create_kmalloc_caches(slab_flags_t flags)
 			kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
 				kmalloc_info[i].name[KMALLOC_DMA],
 				kmalloc_info[i].size,
-				SLAB_CACHE_DMA | flags, 0, 0);
+				SLAB_CACHE_DMA | flags, 0,
+				kmalloc_info[i].size);
 		}
 	}
 #endif
diff --git a/mm/slob.c b/mm/slob.c
index fa53e9f73893..ac2aecfbc7a8 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -524,6 +524,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller)
 {
 	return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, caller);
 }
+EXPORT_SYMBOL(__kmalloc_track_caller);
 
 #ifdef CONFIG_NUMA
 void *__kmalloc_node_track_caller(size_t size, gfp_t gfp,
@@ -531,6 +532,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfp,
 {
 	return __do_kmalloc_node(size, gfp, node, caller);
 }
+EXPORT_SYMBOL(__kmalloc_node_track_caller);
 #endif
 
 void kfree(const void *block)
diff --git a/mm/slub.c b/mm/slub.c
index b762450fc9f0..336be3224092 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -679,6 +679,20 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
 	va_end(args);
 }
 
+static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
+			       void *freelist, void *nextfree)
+{
+	if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
+	    !check_valid_pointer(s, page, nextfree)) {
+		object_err(s, page, freelist, "Freechain corrupt");
+		freelist = NULL;
+		slab_fix(s, "Isolate corrupted freechain");
+		return true;
+	}
+
+	return false;
+}
+
 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 {
 	unsigned int off;	/* Offset of last byte */
@@ -1410,6 +1424,11 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
 static inline void dec_slabs_node(struct kmem_cache *s, int node,
 							int objects) {}
 
+static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
+			       void *freelist, void *nextfree)
+{
+	return false;
+}
 #endif /* CONFIG_SLUB_DEBUG */
 
 /*
@@ -2093,6 +2112,14 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
 		void *prior;
 		unsigned long counters;
 
+		/*
+		 * If 'nextfree' is invalid, it is possible that the object at
+		 * 'freelist' is already corrupted.  So isolate all objects
+		 * starting at 'freelist'.
+		 */
+		if (freelist_corrupted(s, page, freelist, nextfree))
+			break;
+
 		do {
 			prior = page->freelist;
 			counters = page->counters;
@@ -3739,12 +3766,14 @@ error:
 }
 
 static void list_slab_objects(struct kmem_cache *s, struct page *page,
-							const char *text)
+			      const char *text, unsigned long *map)
 {
 #ifdef CONFIG_SLUB_DEBUG
 	void *addr = page_address(page);
 	void *p;
-	unsigned long *map;
+
+	if (!map)
+		return;
 
 	slab_err(s, page, text, s->name);
 	slab_lock(page);
@@ -3757,8 +3786,6 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
 			print_tracking(s, p);
 		}
 	}
-	put_map(map);
-
 	slab_unlock(page);
 #endif
 }
@@ -3772,6 +3799,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 {
 	LIST_HEAD(discard);
 	struct page *page, *h;
+	unsigned long *map = NULL;
+
+#ifdef CONFIG_SLUB_DEBUG
+	map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
+#endif
 
 	BUG_ON(irqs_disabled());
 	spin_lock_irq(&n->list_lock);
@@ -3781,11 +3813,16 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 			list_add(&page->slab_list, &discard);
 		} else {
 			list_slab_objects(s, page,
-			"Objects remaining in %s on __kmem_cache_shutdown()");
+			  "Objects remaining in %s on __kmem_cache_shutdown()",
+			  map);
 		}
 	}
 	spin_unlock_irq(&n->list_lock);
 
+#ifdef CONFIG_SLUB_DEBUG
+	bitmap_free(map);
+#endif
+
 	list_for_each_entry_safe(page, h, &discard, slab_list)
 		discard_slab(s, page);
 }
@@ -4408,6 +4445,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 
 	return ret;
 }
+EXPORT_SYMBOL(__kmalloc_track_caller);
 
 #ifdef CONFIG_NUMA
 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
@@ -4438,6 +4476,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 
 	return ret;
 }
+EXPORT_SYMBOL(__kmalloc_node_track_caller);
 #endif
 
 #ifdef CONFIG_SYSFS
@@ -5654,7 +5693,8 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
 		 */
 		if (buffer)
 			buf = buffer;
-		else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
+		else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
+			 !IS_ENABLED(CONFIG_SLUB_STATS))
 			buf = mbuf;
 		else {
 			buffer = (char *) get_zeroed_page(GFP_KERNEL);
@@ -5688,19 +5728,6 @@ static struct kobj_type slab_ktype = {
 	.release = kmem_cache_release,
 };
 
-static int uevent_filter(struct kset *kset, struct kobject *kobj)
-{
-	struct kobj_type *ktype = get_ktype(kobj);
-
-	if (ktype == &slab_ktype)
-		return 1;
-	return 0;
-}
-
-static const struct kset_uevent_ops slab_uevent_ops = {
-	.filter = uevent_filter,
-};
-
 static struct kset *slab_kset;
 
 static inline struct kset *cache_kset(struct kmem_cache *s)
@@ -5768,7 +5795,6 @@ static void sysfs_slab_remove_workfn(struct work_struct *work)
 #ifdef CONFIG_MEMCG
 	kset_unregister(s->memcg_kset);
 #endif
-	kobject_uevent(&s->kobj, KOBJ_REMOVE);
 out:
 	kobject_put(&s->kobj);
 }
@@ -5826,7 +5852,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
 	}
 #endif
 
-	kobject_uevent(&s->kobj, KOBJ_ADD);
 	if (!unmergeable) {
 		/* Setup first alias */
 		sysfs_slab_alias(s, s->name);
@@ -5907,7 +5932,7 @@ static int __init slab_sysfs_init(void)
 
 	mutex_lock(&slab_mutex);
 
-	slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
+	slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
 	if (!slab_kset) {
 		mutex_unlock(&slab_mutex);
 		pr_err("Cannot register slab subsystem.\n");
diff --git a/mm/swap.c b/mm/swap.c
index bf9a79fed62d..0ac463d44cff 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -35,6 +35,7 @@
 #include <linux/uio.h>
 #include <linux/hugetlb.h>
 #include <linux/page_idle.h>
+#include <linux/local_lock.h>
 
 #include "internal.h"
 
@@ -44,14 +45,32 @@
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
-static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
-static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
+/* Protecting only lru_rotate.pvec which requires disabling interrupts */
+struct lru_rotate {
+	local_lock_t lock;
+	struct pagevec pvec;
+};
+static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = {
+	.lock = INIT_LOCAL_LOCK(lock),
+};
+
+/*
+ * The following struct pagevec are grouped together because they are protected
+ * by disabling preemption (and interrupts remain enabled).
+ */
+struct lru_pvecs {
+	local_lock_t lock;
+	struct pagevec lru_add;
+	struct pagevec lru_deactivate_file;
+	struct pagevec lru_deactivate;
+	struct pagevec lru_lazyfree;
 #ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
+	struct pagevec activate_page;
 #endif
+};
+static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
+	.lock = INIT_LOCAL_LOCK(lock),
+};
 
 /*
  * This path almost never happens for VM activity - pages are normally
@@ -254,11 +273,11 @@ void rotate_reclaimable_page(struct page *page)
 		unsigned long flags;
 
 		get_page(page);
-		local_irq_save(flags);
-		pvec = this_cpu_ptr(&lru_rotate_pvecs);
+		local_lock_irqsave(&lru_rotate.lock, flags);
+		pvec = this_cpu_ptr(&lru_rotate.pvec);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_move_tail(pvec);
-		local_irq_restore(flags);
+		local_unlock_irqrestore(&lru_rotate.lock, flags);
 	}
 }
 
@@ -293,7 +312,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
 #ifdef CONFIG_SMP
 static void activate_page_drain(int cpu)
 {
-	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
+	struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu);
 
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, __activate_page, NULL);
@@ -301,19 +320,21 @@ static void activate_page_drain(int cpu)
 
 static bool need_activate_page_drain(int cpu)
 {
-	return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
+	return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0;
 }
 
 void activate_page(struct page *page)
 {
 	page = compound_head(page);
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+		struct pagevec *pvec;
 
+		local_lock(&lru_pvecs.lock);
+		pvec = this_cpu_ptr(&lru_pvecs.activate_page);
 		get_page(page);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
-		put_cpu_var(activate_page_pvecs);
+		local_unlock(&lru_pvecs.lock);
 	}
 }
 
@@ -335,9 +356,12 @@ void activate_page(struct page *page)
 
 static void __lru_cache_activate_page(struct page *page)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
+	struct pagevec *pvec;
 	int i;
 
+	local_lock(&lru_pvecs.lock);
+	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
+
 	/*
 	 * Search backwards on the optimistic assumption that the page being
 	 * activated has just been added to this pagevec. Note that only
@@ -357,7 +381,7 @@ static void __lru_cache_activate_page(struct page *page)
 		}
 	}
 
-	put_cpu_var(lru_add_pvec);
+	local_unlock(&lru_pvecs.lock);
 }
 
 /*
@@ -385,7 +409,7 @@ void mark_page_accessed(struct page *page)
 	} else if (!PageActive(page)) {
 		/*
 		 * If the page is on the LRU, queue it for activation via
-		 * activate_page_pvecs. Otherwise, assume the page is on a
+		 * lru_pvecs.activate_page. Otherwise, assume the page is on a
 		 * pagevec, mark it active and it'll be moved to the active
 		 * LRU on the next drain.
 		 */
@@ -404,12 +428,14 @@ EXPORT_SYMBOL(mark_page_accessed);
 
 static void __lru_cache_add(struct page *page)
 {
-	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
+	struct pagevec *pvec;
 
+	local_lock(&lru_pvecs.lock);
+	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
 	get_page(page);
 	if (!pagevec_add(pvec, page) || PageCompound(page))
 		__pagevec_lru_add(pvec);
-	put_cpu_var(lru_add_pvec);
+	local_unlock(&lru_pvecs.lock);
 }
 
 /**
@@ -593,30 +619,30 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
  */
 void lru_add_drain_cpu(int cpu)
 {
-	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
+	struct pagevec *pvec = &per_cpu(lru_pvecs.lru_add, cpu);
 
 	if (pagevec_count(pvec))
 		__pagevec_lru_add(pvec);
 
-	pvec = &per_cpu(lru_rotate_pvecs, cpu);
+	pvec = &per_cpu(lru_rotate.pvec, cpu);
 	if (pagevec_count(pvec)) {
 		unsigned long flags;
 
 		/* No harm done if a racing interrupt already did this */
-		local_irq_save(flags);
+		local_lock_irqsave(&lru_rotate.lock, flags);
 		pagevec_move_tail(pvec);
-		local_irq_restore(flags);
+		local_unlock_irqrestore(&lru_rotate.lock, flags);
 	}
 
-	pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
+	pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu);
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
 
-	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+	pvec = &per_cpu(lru_pvecs.lru_deactivate, cpu);
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 
-	pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
+	pvec = &per_cpu(lru_pvecs.lru_lazyfree, cpu);
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
 
@@ -641,11 +667,14 @@ void deactivate_file_page(struct page *page)
 		return;
 
 	if (likely(get_page_unless_zero(page))) {
-		struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
+		struct pagevec *pvec;
+
+		local_lock(&lru_pvecs.lock);
+		pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
 
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
-		put_cpu_var(lru_deactivate_file_pvecs);
+		local_unlock(&lru_pvecs.lock);
 	}
 }
 
@@ -660,12 +689,14 @@ void deactivate_file_page(struct page *page)
 void deactivate_page(struct page *page)
 {
 	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
-		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+		struct pagevec *pvec;
 
+		local_lock(&lru_pvecs.lock);
+		pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
 		get_page(page);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
-		put_cpu_var(lru_deactivate_pvecs);
+		local_unlock(&lru_pvecs.lock);
 	}
 }
 
@@ -680,19 +711,30 @@ void mark_page_lazyfree(struct page *page)
 {
 	if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
 	    !PageSwapCache(page) && !PageUnevictable(page)) {
-		struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
+		struct pagevec *pvec;
 
+		local_lock(&lru_pvecs.lock);
+		pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
 		get_page(page);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
-		put_cpu_var(lru_lazyfree_pvecs);
+		local_unlock(&lru_pvecs.lock);
 	}
 }
 
 void lru_add_drain(void)
 {
-	lru_add_drain_cpu(get_cpu());
-	put_cpu();
+	local_lock(&lru_pvecs.lock);
+	lru_add_drain_cpu(smp_processor_id());
+	local_unlock(&lru_pvecs.lock);
+}
+
+void lru_add_drain_cpu_zone(struct zone *zone)
+{
+	local_lock(&lru_pvecs.lock);
+	lru_add_drain_cpu(smp_processor_id());
+	drain_local_pages(zone);
+	local_unlock(&lru_pvecs.lock);
 }
 
 #ifdef CONFIG_SMP
@@ -743,11 +785,11 @@ void lru_add_drain_all(void)
 	for_each_online_cpu(cpu) {
 		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
 
-		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
-		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
-		    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
-		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
-		    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
+		if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
+		    pagevec_count(&per_cpu(lru_rotate.pvec, cpu)) ||
+		    pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
+		    pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
+		    pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
 		    need_activate_page_drain(cpu)) {
 			INIT_WORK(work, lru_add_drain_per_cpu);
 			queue_work_on(cpu, mm_percpu_wq, work);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ebed37bbf7a3..8238954ae781 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -509,10 +509,11 @@ static unsigned long swapin_nr_pages(unsigned long offset)
 		return 1;
 
 	hits = atomic_xchg(&swapin_readahead_hits, 0);
-	pages = __swapin_nr_pages(prev_offset, offset, hits, max_pages,
+	pages = __swapin_nr_pages(READ_ONCE(prev_offset), offset, hits,
+				  max_pages,
 				  atomic_read(&last_readahead_pages));
 	if (!hits)
-		prev_offset = offset;
+		WRITE_ONCE(prev_offset, offset);
 	atomic_set(&last_readahead_pages, pages);
 
 	return pages;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 5871a2aa86a5..63ac67208453 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -601,7 +601,6 @@ static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
 {
 	struct percpu_cluster *cluster;
 	struct swap_cluster_info *ci;
-	bool found_free;
 	unsigned long tmp, max;
 
 new_cluster:
@@ -614,17 +613,17 @@ new_cluster:
 		} else if (!cluster_list_empty(&si->discard_clusters)) {
 			/*
 			 * we don't have free cluster but have some clusters in
-			 * discarding, do discard now and reclaim them
+			 * discarding, do discard now and reclaim them, then
+			 * reread cluster_next_cpu since we dropped si->lock
 			 */
 			swap_do_scheduled_discard(si);
-			*scan_base = *offset = si->cluster_next;
+			*scan_base = this_cpu_read(*si->cluster_next_cpu);
+			*offset = *scan_base;
 			goto new_cluster;
 		} else
 			return false;
 	}
 
-	found_free = false;
-
 	/*
 	 * Other CPUs can use our cluster if they can't find a free cluster,
 	 * check if there is still free entry in the cluster
@@ -632,27 +631,23 @@ new_cluster:
 	tmp = cluster->next;
 	max = min_t(unsigned long, si->max,
 		    (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
-	if (tmp >= max) {
-		cluster_set_null(&cluster->index);
-		goto new_cluster;
-	}
-	ci = lock_cluster(si, tmp);
-	while (tmp < max) {
-		if (!si->swap_map[tmp]) {
-			found_free = true;
-			break;
+	if (tmp < max) {
+		ci = lock_cluster(si, tmp);
+		while (tmp < max) {
+			if (!si->swap_map[tmp])
+				break;
+			tmp++;
 		}
-		tmp++;
+		unlock_cluster(ci);
 	}
-	unlock_cluster(ci);
-	if (!found_free) {
+	if (tmp >= max) {
 		cluster_set_null(&cluster->index);
 		goto new_cluster;
 	}
 	cluster->next = tmp + 1;
 	*offset = tmp;
 	*scan_base = tmp;
-	return found_free;
+	return true;
 }
 
 static void __del_from_avail_list(struct swap_info_struct *p)
@@ -729,6 +724,34 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
 	}
 }
 
+static void set_cluster_next(struct swap_info_struct *si, unsigned long next)
+{
+	unsigned long prev;
+
+	if (!(si->flags & SWP_SOLIDSTATE)) {
+		si->cluster_next = next;
+		return;
+	}
+
+	prev = this_cpu_read(*si->cluster_next_cpu);
+	/*
+	 * Cross the swap address space size aligned trunk, choose
+	 * another trunk randomly to avoid lock contention on swap
+	 * address space if possible.
+	 */
+	if ((prev >> SWAP_ADDRESS_SPACE_SHIFT) !=
+	    (next >> SWAP_ADDRESS_SPACE_SHIFT)) {
+		/* No free swap slots available */
+		if (si->highest_bit <= si->lowest_bit)
+			return;
+		next = si->lowest_bit +
+			prandom_u32_max(si->highest_bit - si->lowest_bit + 1);
+		next = ALIGN_DOWN(next, SWAP_ADDRESS_SPACE_PAGES);
+		next = max_t(unsigned int, next, si->lowest_bit);
+	}
+	this_cpu_write(*si->cluster_next_cpu, next);
+}
+
 static int scan_swap_map_slots(struct swap_info_struct *si,
 			       unsigned char usage, int nr,
 			       swp_entry_t slots[])
@@ -739,9 +762,7 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
 	unsigned long last_in_cluster = 0;
 	int latency_ration = LATENCY_LIMIT;
 	int n_ret = 0;
-
-	if (nr > SWAP_BATCH)
-		nr = SWAP_BATCH;
+	bool scanned_many = false;
 
 	/*
 	 * We try to cluster swap pages by allocating them sequentially
@@ -755,17 +776,22 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
 	 */
 
 	si->flags += SWP_SCANNING;
-	scan_base = offset = si->cluster_next;
+	/*
+	 * Use percpu scan base for SSD to reduce lock contention on
+	 * cluster and swap cache.  For HDD, sequential access is more
+	 * important.
+	 */
+	if (si->flags & SWP_SOLIDSTATE)
+		scan_base = this_cpu_read(*si->cluster_next_cpu);
+	else
+		scan_base = si->cluster_next;
+	offset = scan_base;
 
 	/* SSD algorithm */
 	if (si->cluster_info) {
-		if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
-			goto checks;
-		else
+		if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
 			goto scan;
-	}
-
-	if (unlikely(!si->cluster_nr--)) {
+	} else if (unlikely(!si->cluster_nr--)) {
 		if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
 			si->cluster_nr = SWAPFILE_CLUSTER - 1;
 			goto checks;
@@ -848,7 +874,6 @@ checks:
 	unlock_cluster(ci);
 
 	swap_range_alloc(si, offset, 1);
-	si->cluster_next = offset + 1;
 	slots[n_ret++] = swp_entry(si->type, offset);
 
 	/* got enough slots or reach max slots? */
@@ -871,19 +896,33 @@ checks:
 	if (si->cluster_info) {
 		if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
 			goto checks;
-		else
-			goto done;
-	}
-	/* non-ssd case */
-	++offset;
-
-	/* non-ssd case, still more slots in cluster? */
-	if (si->cluster_nr && !si->swap_map[offset]) {
+	} else if (si->cluster_nr && !si->swap_map[++offset]) {
+		/* non-ssd case, still more slots in cluster? */
 		--si->cluster_nr;
 		goto checks;
 	}
 
+	/*
+	 * Even if there's no free clusters available (fragmented),
+	 * try to scan a little more quickly with lock held unless we
+	 * have scanned too many slots already.
+	 */
+	if (!scanned_many) {
+		unsigned long scan_limit;
+
+		if (offset < scan_base)
+			scan_limit = scan_base;
+		else
+			scan_limit = si->highest_bit;
+		for (; offset <= scan_limit && --latency_ration > 0;
+		     offset++) {
+			if (!si->swap_map[offset])
+				goto checks;
+		}
+	}
+
 done:
+	set_cluster_next(si, offset + 1);
 	si->flags -= SWP_SCANNING;
 	return n_ret;
 
@@ -901,6 +940,7 @@ scan:
 		if (unlikely(--latency_ration < 0)) {
 			cond_resched();
 			latency_ration = LATENCY_LIMIT;
+			scanned_many = true;
 		}
 	}
 	offset = si->lowest_bit;
@@ -916,6 +956,7 @@ scan:
 		if (unlikely(--latency_ration < 0)) {
 			cond_resched();
 			latency_ration = LATENCY_LIMIT;
+			scanned_many = true;
 		}
 		offset++;
 	}
@@ -1004,11 +1045,7 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
 	if (avail_pgs <= 0)
 		goto noswap;
 
-	if (n_goal > SWAP_BATCH)
-		n_goal = SWAP_BATCH;
-
-	if (n_goal > avail_pgs)
-		n_goal = avail_pgs;
+	n_goal = min3((long)n_goal, (long)SWAP_BATCH, avail_pgs);
 
 	atomic_long_sub(n_goal * size, &nr_swap_pages);
 
@@ -1275,13 +1312,14 @@ unlock_out:
 }
 
 static unsigned char __swap_entry_free(struct swap_info_struct *p,
-				       swp_entry_t entry, unsigned char usage)
+				       swp_entry_t entry)
 {
 	struct swap_cluster_info *ci;
 	unsigned long offset = swp_offset(entry);
+	unsigned char usage;
 
 	ci = lock_cluster_or_swap_info(p, offset);
-	usage = __swap_entry_free_locked(p, offset, usage);
+	usage = __swap_entry_free_locked(p, offset, 1);
 	unlock_cluster_or_swap_info(p, ci);
 	if (!usage)
 		free_swap_slot(entry);
@@ -1316,7 +1354,7 @@ void swap_free(swp_entry_t entry)
 
 	p = _swap_info_get(entry);
 	if (p)
-		__swap_entry_free(p, entry, 1);
+		__swap_entry_free(p, entry);
 }
 
 /*
@@ -1739,7 +1777,7 @@ int free_swap_and_cache(swp_entry_t entry)
 
 	p = _swap_info_get(entry);
 	if (p) {
-		count = __swap_entry_free(p, entry, 1);
+		count = __swap_entry_free(p, entry);
 		if (count == SWAP_HAS_CACHE &&
 		    !swap_page_trans_huge_swapped(p, entry))
 			__try_to_reclaim_swap(p, swp_offset(entry),
@@ -1937,10 +1975,14 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 
 		pte_unmap(pte);
 		swap_map = &si->swap_map[offset];
-		vmf.vma = vma;
-		vmf.address = addr;
-		vmf.pmd = pmd;
-		page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, &vmf);
+		page = lookup_swap_cache(entry, vma, addr);
+		if (!page) {
+			vmf.vma = vma;
+			vmf.address = addr;
+			vmf.pmd = pmd;
+			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
+						&vmf);
+		}
 		if (!page) {
 			if (*swap_map == 0 || *swap_map == SWAP_MAP_BAD)
 				goto try_next;
@@ -2650,6 +2692,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	mutex_unlock(&swapon_mutex);
 	free_percpu(p->percpu_cluster);
 	p->percpu_cluster = NULL;
+	free_percpu(p->cluster_next_cpu);
+	p->cluster_next_cpu = NULL;
 	vfree(swap_map);
 	kvfree(cluster_info);
 	kvfree(frontswap_map);
@@ -2757,20 +2801,24 @@ static int swap_show(struct seq_file *swap, void *v)
 	struct swap_info_struct *si = v;
 	struct file *file;
 	int len;
+	unsigned int bytes, inuse;
 
 	if (si == SEQ_START_TOKEN) {
-		seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
+		seq_puts(swap,"Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n");
 		return 0;
 	}
 
+	bytes = si->pages << (PAGE_SHIFT - 10);
+	inuse = si->inuse_pages << (PAGE_SHIFT - 10);
+
 	file = si->swap_file;
 	len = seq_file_path(swap, file, " \t\n\\");
-	seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
+	seq_printf(swap, "%*s%s\t%u\t%s%u\t%s%d\n",
 			len < 40 ? 40 - len : 1, " ",
 			S_ISBLK(file_inode(file)->i_mode) ?
 				"partition" : "file\t",
-			si->pages << (PAGE_SHIFT - 10),
-			si->inuse_pages << (PAGE_SHIFT - 10),
+			bytes, bytes < 10000000 ? "\t" : "",
+			inuse, inuse < 10000000 ? "\t" : "",
 			si->prio);
 	return 0;
 }
@@ -3202,11 +3250,19 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		unsigned long ci, nr_cluster;
 
 		p->flags |= SWP_SOLIDSTATE;
+		p->cluster_next_cpu = alloc_percpu(unsigned int);
+		if (!p->cluster_next_cpu) {
+			error = -ENOMEM;
+			goto bad_swap_unlock_inode;
+		}
 		/*
 		 * select a random position to start with to help wear leveling
 		 * SSD
 		 */
-		p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
+		for_each_possible_cpu(cpu) {
+			per_cpu(*p->cluster_next_cpu, cpu) =
+				1 + prandom_u32_max(p->highest_bit);
+		}
 		nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
 
 		cluster_info = kvcalloc(nr_cluster, sizeof(*cluster_info),
@@ -3322,6 +3378,8 @@ bad_swap_unlock_inode:
 bad_swap:
 	free_percpu(p->percpu_cluster);
 	p->percpu_cluster = NULL;
+	free_percpu(p->cluster_next_cpu);
+	p->cluster_next_cpu = NULL;
 	if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
 		set_blocksize(p->bdev, p->old_block_size);
 		blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
@@ -3654,7 +3712,7 @@ static bool swap_count_continued(struct swap_info_struct *si,
 
 	spin_lock(&si->cont_lock);
 	offset &= ~PAGE_MASK;
-	page = list_entry(head->lru.next, struct page, lru);
+	page = list_next_entry(head, lru);
 	map = kmap_atomic(page) + offset;
 
 	if (count == SWAP_MAP_MAX)	/* initial increment from swap_map */
@@ -3666,13 +3724,13 @@ static bool swap_count_continued(struct swap_info_struct *si,
 		 */
 		while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) {
 			kunmap_atomic(map);
-			page = list_entry(page->lru.next, struct page, lru);
+			page = list_next_entry(page, lru);
 			BUG_ON(page == head);
 			map = kmap_atomic(page) + offset;
 		}
 		if (*map == SWAP_CONT_MAX) {
 			kunmap_atomic(map);
-			page = list_entry(page->lru.next, struct page, lru);
+			page = list_next_entry(page, lru);
 			if (page == head) {
 				ret = false;	/* add count continuation */
 				goto out;
@@ -3682,12 +3740,10 @@ init_map:		*map = 0;		/* we didn't zero the page */
 		}
 		*map += 1;
 		kunmap_atomic(map);
-		page = list_entry(page->lru.prev, struct page, lru);
-		while (page != head) {
+		while ((page = list_prev_entry(page, lru)) != head) {
 			map = kmap_atomic(page) + offset;
 			*map = COUNT_CONTINUED;
 			kunmap_atomic(map);
-			page = list_entry(page->lru.prev, struct page, lru);
 		}
 		ret = true;			/* incremented */
 
@@ -3698,7 +3754,7 @@ init_map:		*map = 0;		/* we didn't zero the page */
 		BUG_ON(count != COUNT_CONTINUED);
 		while (*map == COUNT_CONTINUED) {
 			kunmap_atomic(map);
-			page = list_entry(page->lru.next, struct page, lru);
+			page = list_next_entry(page, lru);
 			BUG_ON(page == head);
 			map = kmap_atomic(page) + offset;
 		}
@@ -3707,13 +3763,11 @@ init_map:		*map = 0;		/* we didn't zero the page */
 		if (*map == 0)
 			count = 0;
 		kunmap_atomic(map);
-		page = list_entry(page->lru.prev, struct page, lru);
-		while (page != head) {
+		while ((page = list_prev_entry(page, lru)) != head) {
 			map = kmap_atomic(page) + offset;
 			*map = SWAP_CONT_MAX | count;
 			count = COUNT_CONTINUED;
 			kunmap_atomic(map);
-			page = list_entry(page->lru.prev, struct page, lru);
 		}
 		ret = count == COUNT_CONTINUED;
 	}
diff --git a/mm/util.c b/mm/util.c
index 8defc8ec141f..41b47d8cae09 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -580,7 +580,7 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 	if (ret || size <= PAGE_SIZE)
 		return ret;
 
-	return __vmalloc_node_flags_caller(size, node, flags,
+	return __vmalloc_node(size, 1, flags, node,
 			__builtin_return_address(0));
 }
 EXPORT_SYMBOL(kvmalloc_node);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9a8227afa073..1e94497b7388 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -69,7 +69,8 @@ static void free_work(struct work_struct *w)
 
 /*** Page table manipulation functions ***/
 
-static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
+static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+			     pgtbl_mod_mask *mask)
 {
 	pte_t *pte;
 
@@ -78,73 +79,118 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
 		pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
 		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
 	} while (pte++, addr += PAGE_SIZE, addr != end);
+	*mask |= PGTBL_PTE_MODIFIED;
 }
 
-static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
+static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
+			     pgtbl_mod_mask *mask)
 {
 	pmd_t *pmd;
 	unsigned long next;
+	int cleared;
 
 	pmd = pmd_offset(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
-		if (pmd_clear_huge(pmd))
+
+		cleared = pmd_clear_huge(pmd);
+		if (cleared || pmd_bad(*pmd))
+			*mask |= PGTBL_PMD_MODIFIED;
+
+		if (cleared)
 			continue;
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		vunmap_pte_range(pmd, addr, next);
+		vunmap_pte_range(pmd, addr, next, mask);
 	} while (pmd++, addr = next, addr != end);
 }
 
-static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
+static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
+			     pgtbl_mod_mask *mask)
 {
 	pud_t *pud;
 	unsigned long next;
+	int cleared;
 
 	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
-		if (pud_clear_huge(pud))
+
+		cleared = pud_clear_huge(pud);
+		if (cleared || pud_bad(*pud))
+			*mask |= PGTBL_PUD_MODIFIED;
+
+		if (cleared)
 			continue;
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		vunmap_pmd_range(pud, addr, next);
+		vunmap_pmd_range(pud, addr, next, mask);
 	} while (pud++, addr = next, addr != end);
 }
 
-static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
+static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+			     pgtbl_mod_mask *mask)
 {
 	p4d_t *p4d;
 	unsigned long next;
+	int cleared;
 
 	p4d = p4d_offset(pgd, addr);
 	do {
 		next = p4d_addr_end(addr, end);
-		if (p4d_clear_huge(p4d))
+
+		cleared = p4d_clear_huge(p4d);
+		if (cleared || p4d_bad(*p4d))
+			*mask |= PGTBL_P4D_MODIFIED;
+
+		if (cleared)
 			continue;
 		if (p4d_none_or_clear_bad(p4d))
 			continue;
-		vunmap_pud_range(p4d, addr, next);
+		vunmap_pud_range(p4d, addr, next, mask);
 	} while (p4d++, addr = next, addr != end);
 }
 
-static void vunmap_page_range(unsigned long addr, unsigned long end)
+/**
+ * unmap_kernel_range_noflush - unmap kernel VM area
+ * @start: start of the VM area to unmap
+ * @size: size of the VM area to unmap
+ *
+ * Unmap PFN_UP(@size) pages at @addr.  The VM area @addr and @size specify
+ * should have been allocated using get_vm_area() and its friends.
+ *
+ * NOTE:
+ * This function does NOT do any cache flushing.  The caller is responsible
+ * for calling flush_cache_vunmap() on to-be-mapped areas before calling this
+ * function and flush_tlb_kernel_range() after.
+ */
+void unmap_kernel_range_noflush(unsigned long start, unsigned long size)
 {
-	pgd_t *pgd;
+	unsigned long end = start + size;
 	unsigned long next;
+	pgd_t *pgd;
+	unsigned long addr = start;
+	pgtbl_mod_mask mask = 0;
 
 	BUG_ON(addr >= end);
+	start = addr;
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
+		if (pgd_bad(*pgd))
+			mask |= PGTBL_PGD_MODIFIED;
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		vunmap_p4d_range(pgd, addr, next);
+		vunmap_p4d_range(pgd, addr, next, &mask);
 	} while (pgd++, addr = next, addr != end);
+
+	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
+		arch_sync_kernel_mappings(start, end);
 }
 
 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
+		pgtbl_mod_mask *mask)
 {
 	pte_t *pte;
 
@@ -153,7 +199,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
 	 * callers keep track of where we're up to.
 	 */
 
-	pte = pte_alloc_kernel(pmd, addr);
+	pte = pte_alloc_kernel_track(pmd, addr, mask);
 	if (!pte)
 		return -ENOMEM;
 	do {
@@ -166,94 +212,117 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
 		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
 		(*nr)++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
+	*mask |= PGTBL_PTE_MODIFIED;
 	return 0;
 }
 
 static int vmap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
+		pgtbl_mod_mask *mask)
 {
 	pmd_t *pmd;
 	unsigned long next;
 
-	pmd = pmd_alloc(&init_mm, pud, addr);
+	pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
 	if (!pmd)
 		return -ENOMEM;
 	do {
 		next = pmd_addr_end(addr, end);
-		if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
+		if (vmap_pte_range(pmd, addr, next, prot, pages, nr, mask))
 			return -ENOMEM;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
 
 static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
+		pgtbl_mod_mask *mask)
 {
 	pud_t *pud;
 	unsigned long next;
 
-	pud = pud_alloc(&init_mm, p4d, addr);
+	pud = pud_alloc_track(&init_mm, p4d, addr, mask);
 	if (!pud)
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
+		if (vmap_pmd_range(pud, addr, next, prot, pages, nr, mask))
 			return -ENOMEM;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
 
 static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
+		pgtbl_mod_mask *mask)
 {
 	p4d_t *p4d;
 	unsigned long next;
 
-	p4d = p4d_alloc(&init_mm, pgd, addr);
+	p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
 	if (!p4d)
 		return -ENOMEM;
 	do {
 		next = p4d_addr_end(addr, end);
-		if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
+		if (vmap_pud_range(p4d, addr, next, prot, pages, nr, mask))
 			return -ENOMEM;
 	} while (p4d++, addr = next, addr != end);
 	return 0;
 }
 
-/*
- * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
- * will have pfns corresponding to the "pages" array.
+/**
+ * map_kernel_range_noflush - map kernel VM area with the specified pages
+ * @addr: start of the VM area to map
+ * @size: size of the VM area to map
+ * @prot: page protection flags to use
+ * @pages: pages to map
  *
- * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
+ * Map PFN_UP(@size) pages at @addr.  The VM area @addr and @size specify should
+ * have been allocated using get_vm_area() and its friends.
+ *
+ * NOTE:
+ * This function does NOT do any cache flushing.  The caller is responsible for
+ * calling flush_cache_vmap() on to-be-mapped areas before calling this
+ * function.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
  */
-static int vmap_page_range_noflush(unsigned long start, unsigned long end,
-				   pgprot_t prot, struct page **pages)
+int map_kernel_range_noflush(unsigned long addr, unsigned long size,
+			     pgprot_t prot, struct page **pages)
 {
-	pgd_t *pgd;
+	unsigned long start = addr;
+	unsigned long end = addr + size;
 	unsigned long next;
-	unsigned long addr = start;
+	pgd_t *pgd;
 	int err = 0;
 	int nr = 0;
+	pgtbl_mod_mask mask = 0;
 
 	BUG_ON(addr >= end);
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
+		if (pgd_bad(*pgd))
+			mask |= PGTBL_PGD_MODIFIED;
+		err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
 		if (err)
 			return err;
 	} while (pgd++, addr = next, addr != end);
 
-	return nr;
+	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
+		arch_sync_kernel_mappings(start, end);
+
+	return 0;
 }
 
-static int vmap_page_range(unsigned long start, unsigned long end,
-			   pgprot_t prot, struct page **pages)
+int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
+		struct page **pages)
 {
 	int ret;
 
-	ret = vmap_page_range_noflush(start, end, prot, pages);
-	flush_cache_vmap(start, end);
+	ret = map_kernel_range_noflush(start, size, prot, pages);
+	flush_cache_vmap(start, start + size);
 	return ret;
 }
 
@@ -1223,14 +1292,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb)
 EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
 
 /*
- * Clear the pagetable entries of a given vmap_area
- */
-static void unmap_vmap_area(struct vmap_area *va)
-{
-	vunmap_page_range(va->va_start, va->va_end);
-}
-
-/*
  * lazy_max_pages is the maximum amount of virtual address space we gather up
  * before attempting to purge with a TLB flush.
  *
@@ -1293,12 +1354,6 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
 		return false;
 
 	/*
-	 * First make sure the mappings are removed from all page-tables
-	 * before they are freed.
-	 */
-	vmalloc_sync_unmappings();
-
-	/*
 	 * TODO: to calculate a flush range without looping.
 	 * The list can be up to lazy_max_pages() elements.
 	 */
@@ -1391,7 +1446,7 @@ static void free_vmap_area_noflush(struct vmap_area *va)
 static void free_unmap_vmap_area(struct vmap_area *va)
 {
 	flush_cache_vunmap(va->va_start, va->va_end);
-	unmap_vmap_area(va);
+	unmap_kernel_range_noflush(va->va_start, va->va_end - va->va_start);
 	if (debug_pagealloc_enabled_static())
 		flush_tlb_kernel_range(va->va_start, va->va_end);
 
@@ -1665,7 +1720,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
 	return vaddr;
 }
 
-static void vb_free(const void *addr, unsigned long size)
+static void vb_free(unsigned long addr, unsigned long size)
 {
 	unsigned long offset;
 	unsigned long vb_idx;
@@ -1675,24 +1730,22 @@ static void vb_free(const void *addr, unsigned long size)
 	BUG_ON(offset_in_page(size));
 	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
 
-	flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
+	flush_cache_vunmap(addr, addr + size);
 
 	order = get_order(size);
 
-	offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
-	offset >>= PAGE_SHIFT;
+	offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;
 
-	vb_idx = addr_to_vb_idx((unsigned long)addr);
+	vb_idx = addr_to_vb_idx(addr);
 	rcu_read_lock();
 	vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
 	rcu_read_unlock();
 	BUG_ON(!vb);
 
-	vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
+	unmap_kernel_range_noflush(addr, size);
 
 	if (debug_pagealloc_enabled_static())
-		flush_tlb_kernel_range((unsigned long)addr,
-					(unsigned long)addr + size);
+		flush_tlb_kernel_range(addr, addr + size);
 
 	spin_lock(&vb->lock);
 
@@ -1792,7 +1845,7 @@ void vm_unmap_ram(const void *mem, unsigned int count)
 
 	if (likely(count <= VMAP_MAX_ALLOC)) {
 		debug_check_no_locks_freed(mem, size);
-		vb_free(mem, size);
+		vb_free(addr, size);
 		return;
 	}
 
@@ -1819,7 +1872,7 @@ EXPORT_SYMBOL(vm_unmap_ram);
  *
  * Returns: a pointer to the address that has been mapped, or %NULL on failure
  */
-void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
+void *vm_map_ram(struct page **pages, unsigned int count, int node)
 {
 	unsigned long size = (unsigned long)count << PAGE_SHIFT;
 	unsigned long addr;
@@ -1843,7 +1896,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
 
 	kasan_unpoison_vmalloc(mem, size);
 
-	if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
+	if (map_kernel_range(addr, size, PAGE_KERNEL, pages) < 0) {
 		vm_unmap_ram(mem, count);
 		return NULL;
 	}
@@ -1988,51 +2041,6 @@ void __init vmalloc_init(void)
 }
 
 /**
- * map_kernel_range_noflush - map kernel VM area with the specified pages
- * @addr: start of the VM area to map
- * @size: size of the VM area to map
- * @prot: page protection flags to use
- * @pages: pages to map
- *
- * Map PFN_UP(@size) pages at @addr.  The VM area @addr and @size
- * specify should have been allocated using get_vm_area() and its
- * friends.
- *
- * NOTE:
- * This function does NOT do any cache flushing.  The caller is
- * responsible for calling flush_cache_vmap() on to-be-mapped areas
- * before calling this function.
- *
- * RETURNS:
- * The number of pages mapped on success, -errno on failure.
- */
-int map_kernel_range_noflush(unsigned long addr, unsigned long size,
-			     pgprot_t prot, struct page **pages)
-{
-	return vmap_page_range_noflush(addr, addr + size, prot, pages);
-}
-
-/**
- * unmap_kernel_range_noflush - unmap kernel VM area
- * @addr: start of the VM area to unmap
- * @size: size of the VM area to unmap
- *
- * Unmap PFN_UP(@size) pages at @addr.  The VM area @addr and @size
- * specify should have been allocated using get_vm_area() and its
- * friends.
- *
- * NOTE:
- * This function does NOT do any cache flushing.  The caller is
- * responsible for calling flush_cache_vunmap() on to-be-mapped areas
- * before calling this function and flush_tlb_kernel_range() after.
- */
-void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
-{
-	vunmap_page_range(addr, addr + size);
-}
-EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
-
-/**
  * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
  * @addr: start of the VM area to unmap
  * @size: size of the VM area to unmap
@@ -2045,22 +2053,9 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
 	unsigned long end = addr + size;
 
 	flush_cache_vunmap(addr, end);
-	vunmap_page_range(addr, end);
+	unmap_kernel_range_noflush(addr, size);
 	flush_tlb_kernel_range(addr, end);
 }
-EXPORT_SYMBOL_GPL(unmap_kernel_range);
-
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
-{
-	unsigned long addr = (unsigned long)area->addr;
-	unsigned long end = addr + get_vm_area_size(area);
-	int err;
-
-	err = vmap_page_range(addr, end, prot, pages);
-
-	return err > 0 ? 0 : err;
-}
-EXPORT_SYMBOL_GPL(map_vm_area);
 
 static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
 	struct vmap_area *va, unsigned long flags, const void *caller)
@@ -2128,14 +2123,6 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
 	return area;
 }
 
-struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
-				unsigned long start, unsigned long end)
-{
-	return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
-				  GFP_KERNEL, __builtin_return_address(0));
-}
-EXPORT_SYMBOL_GPL(__get_vm_area);
-
 struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
 				       unsigned long start, unsigned long end,
 				       const void *caller)
@@ -2441,7 +2428,8 @@ void *vmap(struct page **pages, unsigned int count,
 	if (!area)
 		return NULL;
 
-	if (map_vm_area(area, prot, pages)) {
+	if (map_kernel_range((unsigned long)area->addr, size, pgprot_nx(prot),
+			pages) < 0) {
 		vunmap(area->addr);
 		return NULL;
 	}
@@ -2450,9 +2438,6 @@ void *vmap(struct page **pages, unsigned int count,
 }
 EXPORT_SYMBOL(vmap);
 
-static void *__vmalloc_node(unsigned long size, unsigned long align,
-			    gfp_t gfp_mask, pgprot_t prot,
-			    int node, const void *caller);
 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 				 pgprot_t prot, int node)
 {
@@ -2470,7 +2455,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	/* Please note that the recursion is strictly bounded. */
 	if (array_size > PAGE_SIZE) {
 		pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
-				PAGE_KERNEL, node, area->caller);
+				node, area->caller);
 	} else {
 		pages = kmalloc_node(array_size, nested_gfp, node);
 	}
@@ -2504,8 +2489,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	}
 	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
 
-	if (map_vm_area(area, prot, pages))
+	if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area),
+			prot, pages) < 0)
 		goto fail;
+
 	return area->addr;
 
 fail:
@@ -2573,27 +2560,16 @@ fail:
 	return NULL;
 }
 
-/*
- * This is only for performance analysis of vmalloc and stress purpose.
- * It is required by vmalloc test module, therefore do not use it other
- * than that.
- */
-#ifdef CONFIG_TEST_VMALLOC_MODULE
-EXPORT_SYMBOL_GPL(__vmalloc_node_range);
-#endif
-
 /**
  * __vmalloc_node - allocate virtually contiguous memory
  * @size:	    allocation size
  * @align:	    desired alignment
  * @gfp_mask:	    flags for the page level allocator
- * @prot:	    protection mask for the allocated pages
  * @node:	    node to use for allocation or NUMA_NO_NODE
  * @caller:	    caller's return address
  *
- * Allocate enough pages to cover @size from the page level
- * allocator with @gfp_mask flags.  Map them into contiguous
- * kernel virtual space, using a pagetable protection of @prot.
+ * Allocate enough pages to cover @size from the page level allocator with
+ * @gfp_mask flags.  Map them into contiguous kernel virtual space.
  *
  * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
  * and __GFP_NOFAIL are not supported
@@ -2603,35 +2579,28 @@ EXPORT_SYMBOL_GPL(__vmalloc_node_range);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-static void *__vmalloc_node(unsigned long size, unsigned long align,
-			    gfp_t gfp_mask, pgprot_t prot,
-			    int node, const void *caller)
+void *__vmalloc_node(unsigned long size, unsigned long align,
+			    gfp_t gfp_mask, int node, const void *caller)
 {
 	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
-				gfp_mask, prot, 0, node, caller);
+				gfp_mask, PAGE_KERNEL, 0, node, caller);
 }
+/*
+ * This is only for performance analysis of vmalloc and stress purpose.
+ * It is required by vmalloc test module, therefore do not use it other
+ * than that.
+ */
+#ifdef CONFIG_TEST_VMALLOC_MODULE
+EXPORT_SYMBOL_GPL(__vmalloc_node);
+#endif
 
-void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
+void *__vmalloc(unsigned long size, gfp_t gfp_mask)
 {
-	return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE,
+	return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(__vmalloc);
 
-static inline void *__vmalloc_node_flags(unsigned long size,
-					int node, gfp_t flags)
-{
-	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
-					node, __builtin_return_address(0));
-}
-
-
-void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
-				  void *caller)
-{
-	return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller);
-}
-
 /**
  * vmalloc - allocate virtually contiguous memory
  * @size:    allocation size
@@ -2646,8 +2615,8 @@ void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
  */
 void *vmalloc(unsigned long size)
 {
-	return __vmalloc_node_flags(size, NUMA_NO_NODE,
-				    GFP_KERNEL);
+	return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
+				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc);
 
@@ -2666,8 +2635,8 @@ EXPORT_SYMBOL(vmalloc);
  */
 void *vzalloc(unsigned long size)
 {
-	return __vmalloc_node_flags(size, NUMA_NO_NODE,
-				GFP_KERNEL | __GFP_ZERO);
+	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
+				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(vzalloc);
 
@@ -2704,8 +2673,8 @@ EXPORT_SYMBOL(vmalloc_user);
  */
 void *vmalloc_node(unsigned long size, int node)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL,
-					node, __builtin_return_address(0));
+	return __vmalloc_node(size, 1, GFP_KERNEL, node,
+			__builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc_node);
 
@@ -2718,39 +2687,16 @@ EXPORT_SYMBOL(vmalloc_node);
  * allocator and map them into contiguous kernel virtual space.
  * The memory allocated is set to zero.
  *
- * For tight control over page level allocator and protection flags
- * use __vmalloc_node() instead.
- *
  * Return: pointer to the allocated memory or %NULL on error
  */
 void *vzalloc_node(unsigned long size, int node)
 {
-	return __vmalloc_node_flags(size, node,
-			 GFP_KERNEL | __GFP_ZERO);
+	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
+				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(vzalloc_node);
 
 /**
- * vmalloc_user_node_flags - allocate memory for userspace on a specific node
- * @size: allocation size
- * @node: numa node
- * @flags: flags for the page level allocator
- *
- * The resulting memory area is zeroed so it can be mapped to userspace
- * without leaking data.
- *
- * Return: pointer to the allocated memory or %NULL on error
- */
-void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags)
-{
-	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
-				    flags | __GFP_ZERO, PAGE_KERNEL,
-				    VM_USERMAP, node,
-				    __builtin_return_address(0));
-}
-EXPORT_SYMBOL(vmalloc_user_node_flags);
-
-/**
  * vmalloc_exec - allocate virtually contiguous, executable memory
  * @size:	  allocation size
  *
@@ -2793,8 +2739,8 @@ void *vmalloc_exec(unsigned long size)
  */
 void *vmalloc_32(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
-			      NUMA_NO_NODE, __builtin_return_address(0));
+	return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
+			__builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc_32);
 
@@ -3137,21 +3083,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 
-/*
- * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose
- * not to have one.
- *
- * The purpose of this function is to make sure the vmalloc area
- * mappings are identical in all page-tables in the system.
- */
-void __weak vmalloc_sync_mappings(void)
-{
-}
-
-void __weak vmalloc_sync_unmappings(void)
-{
-}
-
 static int f(pte_t *pte, unsigned long addr, void *data)
 {
 	pte_t ***p = data;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a37c87b5aee2..b2f5deb3603c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1878,13 +1878,13 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
 
 /*
  * If a kernel thread (such as nfsd for loop-back mounts) services
- * a backing device by writing to the page cache it sets PF_LESS_THROTTLE.
+ * a backing device by writing to the page cache it sets PF_LOCAL_THROTTLE.
  * In that case we should only throttle if the backing device it is
  * writing to is congested.  In other cases it is safe to throttle.
  */
 static int current_may_throttle(void)
 {
-	return !(current->flags & PF_LESS_THROTTLE) ||
+	return !(current->flags & PF_LOCAL_THROTTLE) ||
 		current->backing_dev_info == NULL ||
 		bdi_write_congested(current->backing_dev_info);
 }
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c03a8c914922..03987490ea96 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1108,7 +1108,7 @@ int fragmentation_index(struct zone *zone, unsigned int order)
 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
 
 const char * const vmstat_text[] = {
-	/* enum zone_stat_item countes */
+	/* enum zone_stat_item counters */
 	"nr_free_pages",
 	"nr_zone_inactive_anon",
 	"nr_zone_active_anon",
@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
 	"nr_mlock",
 	"nr_page_table_pages",
 	"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+	"nr_shadow_call_stack",
+#endif
 	"nr_bounce",
 #if IS_ENABLED(CONFIG_ZSMALLOC)
 	"nr_zspages",
@@ -1162,7 +1165,6 @@ const char * const vmstat_text[] = {
 	"nr_file_hugepages",
 	"nr_file_pmdmapped",
 	"nr_anon_transparent_hugepages",
-	"nr_unstable",
 	"nr_vmscan_write",
 	"nr_vmscan_immediate_reclaim",
 	"nr_dirtied",
@@ -1723,6 +1725,14 @@ static int vmstat_show(struct seq_file *m, void *arg)
 	seq_puts(m, vmstat_text[off]);
 	seq_put_decimal_ull(m, " ", *l);
 	seq_putc(m, '\n');
+
+	if (off == NR_VMSTAT_ITEMS - 1) {
+		/*
+		 * We've come to the end - add any deprecated counters to avoid
+		 * breaking userspace which might depend on them being present.
+		 */
+		seq_puts(m, "nr_unstable 0\n");
+	}
 	return 0;
 }
 
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 2f836a2b993f..f6dc0673e62c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -293,7 +293,7 @@ struct zspage {
 };
 
 struct mapping_area {
-#ifdef CONFIG_PGTABLE_MAPPING
+#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
 	struct vm_struct *vm; /* vm area for mapping object that span pages */
 #else
 	char *vm_buf; /* copy buffer for objects that span pages */
@@ -1113,7 +1113,7 @@ static struct zspage *find_get_zspage(struct size_class *class)
 	return zspage;
 }
 
-#ifdef CONFIG_PGTABLE_MAPPING
+#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
 static inline int __zs_cpu_up(struct mapping_area *area)
 {
 	/*
@@ -1138,7 +1138,9 @@ static inline void __zs_cpu_down(struct mapping_area *area)
 static inline void *__zs_map_object(struct mapping_area *area,
 				struct page *pages[2], int off, int size)
 {
-	BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
+	unsigned long addr = (unsigned long)area->vm->addr;
+
+	BUG_ON(map_kernel_range(addr, PAGE_SIZE * 2, PAGE_KERNEL, pages) < 0);
 	area->vm_addr = area->vm->addr;
 	return area->vm_addr + off;
 }
@@ -1151,7 +1153,7 @@ static inline void __zs_unmap_object(struct mapping_area *area,
 	unmap_kernel_range(addr, PAGE_SIZE * 2);
 }
 
-#else /* CONFIG_PGTABLE_MAPPING */
+#else /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */
 
 static inline int __zs_cpu_up(struct mapping_area *area)
 {
@@ -1233,7 +1235,7 @@ out:
 	pagefault_enable();
 }
 
-#endif /* CONFIG_PGTABLE_MAPPING */
+#endif /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */
 
 static int zs_cpu_prepare(unsigned int cpu)
 {
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 6fd9ddb2d85c..c2c5ab05fa7e 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -170,7 +170,6 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
 		    size_t len, u8 mac[16])
 {
 	uint8_t tmp[16], mac_msb[16], msg_msb[CMAC_MSG_MAX];
-	SHASH_DESC_ON_STACK(desc, tfm);
 	int err;
 
 	if (len > CMAC_MSG_MAX)
@@ -181,8 +180,6 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
 		return -EINVAL;
 	}
 
-	desc->tfm = tfm;
-
 	/* Swap key and message from LSB to MSB */
 	swap_buf(k, tmp, 16);
 	swap_buf(m, msg_msb, len);
@@ -196,8 +193,7 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
 		return err;
 	}
 
-	err = crypto_shash_digest(desc, msg_msb, len, mac_msb);
-	shash_desc_zero(desc);
+	err = crypto_shash_tfm_digest(tfm, msg_msb, len, mac_msb);
 	if (err) {
 		BT_ERR("Hash computation error %d", err);
 		return err;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 78db58c7aec2..c83ffe912163 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1046,14 +1046,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
 	vfree(table);
 	vfree(counterstmp);
 
-#ifdef CONFIG_AUDIT
-	if (audit_enabled) {
-		audit_log(audit_context(), GFP_KERNEL,
-			  AUDIT_NETFILTER_CFG,
-			  "table=%s family=%u entries=%u",
-			  repl->name, AF_BRIDGE, repl->nentries);
-	}
-#endif
+	audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
+			AUDIT_XT_OP_REPLACE);
 	return ret;
 
 free_unlock:
@@ -1095,16 +1089,14 @@ static int do_replace(struct net *net, const void __user *user,
 	tmp.name[sizeof(tmp.name) - 1] = 0;
 
 	countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids;
-	newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT,
-			    PAGE_KERNEL);
+	newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT);
 	if (!newinfo)
 		return -ENOMEM;
 
 	if (countersize)
 		memset(newinfo->counters, 0, countersize);
 
-	newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT,
-				     PAGE_KERNEL);
+	newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT);
 	if (!newinfo->entries) {
 		ret = -ENOMEM;
 		goto free_newinfo;
@@ -1130,6 +1122,8 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
 	mutex_lock(&ebt_mutex);
 	list_del(&table->list);
 	mutex_unlock(&ebt_mutex);
+	audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries,
+			AUDIT_XT_OP_UNREGISTER);
 	EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size,
 			  ebt_cleanup_entry, net, NULL);
 	if (table->private->nentries)
@@ -1223,6 +1217,8 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
 		*res = NULL;
 	}
 
+	audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
+			AUDIT_XT_OP_REGISTER);
 	return ret;
 free_unlock:
 	mutex_unlock(&ebt_mutex);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index a0e97f6c1072..66f22e8aa529 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -190,8 +190,7 @@ EXPORT_SYMBOL(ceph_compare_options);
  * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are
  * compatible with (a superset of) GFP_KERNEL.  This is because while the
  * actual pages are allocated with the specified flags, the page table pages
- * are always allocated with GFP_KERNEL.  map_vm_area() doesn't even take
- * flags because GFP_KERNEL is hard-coded in {p4d,pud,pmd,pte}_alloc().
+ * are always allocated with GFP_KERNEL.
  *
  * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO.
  */
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 190ca66a383b..dcd61aca343e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -1353,12 +1353,13 @@ static void netns_put(struct ns_common *ns)
 	put_net(to_net_ns(ns));
 }
 
-static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int netns_install(struct nsset *nsset, struct ns_common *ns)
 {
+	struct nsproxy *nsproxy = nsset->nsproxy;
 	struct net *net = to_net_ns(ns);
 
 	if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
-	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+	    !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	put_net(nsproxy->net_ns);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 7b6b1d2c3d10..b5bc680d4755 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -5,7 +5,6 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/cryptohash.h>
 #include <linux/module.h>
 #include <linux/cache.h>
 #include <linux/random.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 09cfbf5dd7ce..840bfdb3d7bd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3181,11 +3181,11 @@ static int ipv6_generate_stable_address(struct in6_addr *address,
 					const struct inet6_dev *idev)
 {
 	static DEFINE_SPINLOCK(lock);
-	static __u32 digest[SHA_DIGEST_WORDS];
-	static __u32 workspace[SHA_WORKSPACE_WORDS];
+	static __u32 digest[SHA1_DIGEST_WORDS];
+	static __u32 workspace[SHA1_WORKSPACE_WORDS];
 
 	static union {
-		char __data[SHA_MESSAGE_BYTES];
+		char __data[SHA1_BLOCK_SIZE];
 		struct {
 			struct in6_addr secret;
 			__be32 prefix[2];
@@ -3210,7 +3210,7 @@ static int ipv6_generate_stable_address(struct in6_addr *address,
 retry:
 	spin_lock_bh(&lock);
 
-	sha_init(digest);
+	sha1_init(digest);
 	memset(&data, 0, sizeof(data));
 	memset(workspace, 0, sizeof(workspace));
 	memcpy(data.hwaddr, idev->dev->perm_addr, idev->dev->addr_len);
@@ -3219,7 +3219,7 @@ retry:
 	data.secret = secret;
 	data.dad_count = dad_count;
 
-	sha_transform(digest, data.__data, workspace);
+	sha1_transform(digest, data.__data, workspace);
 
 	temp = *address;
 	temp.s6_addr32[2] = (__force __be32)digest[0];
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index ffcfcd2b128f..85dddfe3a2c6 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -34,7 +34,6 @@
 #include <net/addrconf.h>
 #include <net/xfrm.h>
 
-#include <linux/cryptohash.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <net/seg6.h>
diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c
index 0f5a414a9366..3d980713a9e2 100644
--- a/net/mptcp/crypto.c
+++ b/net/mptcp/crypto.c
@@ -59,7 +59,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
 	put_unaligned_be64(key2, key2be);
 
 	/* Generate key xored with ipad */
-	memset(input, 0x36, SHA_MESSAGE_BYTES);
+	memset(input, 0x36, SHA256_BLOCK_SIZE);
 	for (i = 0; i < 8; i++)
 		input[i] ^= key1be[i];
 	for (i = 0; i < 8; i++)
@@ -76,7 +76,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
 	sha256_final(&state, &input[SHA256_BLOCK_SIZE]);
 
 	/* Prepare second part of hmac */
-	memset(input, 0x5C, SHA_MESSAGE_BYTES);
+	memset(input, 0x5C, SHA256_BLOCK_SIZE);
 	for (i = 0; i < 8; i++)
 		input[i] ^= key1be[i];
 	for (i = 0; i < 8; i++)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index cd2b034eef59..99a468be4a59 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1408,15 +1408,9 @@ xt_replace_table(struct xt_table *table,
 		}
 	}
 
-#ifdef CONFIG_AUDIT
-	if (audit_enabled) {
-		audit_log(audit_context(), GFP_KERNEL,
-			  AUDIT_NETFILTER_CFG,
-			  "table=%s family=%u entries=%u",
-			  table->name, table->af, private->number);
-	}
-#endif
-
+	audit_log_nfcfg(table->name, table->af, private->number,
+			!private->number ? AUDIT_XT_OP_REGISTER :
+					   AUDIT_XT_OP_REPLACE);
 	return private;
 }
 EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -1478,6 +1472,8 @@ void *xt_unregister_table(struct xt_table *table)
 	private = table->private;
 	list_del(&table->list);
 	mutex_unlock(&xt[table->af].mutex);
+	audit_log_nfcfg(table->name, table->af, private->number,
+			AUDIT_XT_OP_UNREGISTER);
 	kfree(table);
 
 	return private;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 4278764d82b8..83e97e8892e0 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -741,14 +741,8 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
 	if (crypto_shash_setkey(tfm, &asoc_key->data[0], asoc_key->len))
 		goto free;
 
-	{
-		SHASH_DESC_ON_STACK(desc, tfm);
-
-		desc->tfm = tfm;
-		crypto_shash_digest(desc, (u8 *)auth,
-				    end - (unsigned char *)auth, digest);
-		shash_desc_zero(desc);
-	}
+	crypto_shash_tfm_digest(tfm, (u8 *)auth, end - (unsigned char *)auth,
+				digest);
 
 free:
 	if (free_key)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f7cb0b7faec2..47910470e532 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1670,17 +1670,14 @@ static struct sctp_cookie_param *sctp_pack_cookie(
 	       ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
 
 	if (sctp_sk(ep->base.sk)->hmac) {
-		SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac);
+		struct crypto_shash *tfm = sctp_sk(ep->base.sk)->hmac;
 		int err;
 
 		/* Sign the message.  */
-		desc->tfm = sctp_sk(ep->base.sk)->hmac;
-
-		err = crypto_shash_setkey(desc->tfm, ep->secret_key,
+		err = crypto_shash_setkey(tfm, ep->secret_key,
 					  sizeof(ep->secret_key)) ?:
-		      crypto_shash_digest(desc, (u8 *)&cookie->c, bodysize,
-					  cookie->signature);
-		shash_desc_zero(desc);
+		      crypto_shash_tfm_digest(tfm, (u8 *)&cookie->c, bodysize,
+					      cookie->signature);
 		if (err)
 			goto free_cookie;
 	}
@@ -1741,17 +1738,13 @@ struct sctp_association *sctp_unpack_cookie(
 
 	/* Check the signature.  */
 	{
-		SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac);
+		struct crypto_shash *tfm = sctp_sk(ep->base.sk)->hmac;
 		int err;
 
-		desc->tfm = sctp_sk(ep->base.sk)->hmac;
-
-		err = crypto_shash_setkey(desc->tfm, ep->secret_key,
+		err = crypto_shash_setkey(tfm, ep->secret_key,
 					  sizeof(ep->secret_key)) ?:
-		      crypto_shash_digest(desc, (u8 *)bear_cookie, bodysize,
-					  digest);
-		shash_desc_zero(desc);
-
+		      crypto_shash_tfm_digest(tfm, (u8 *)bear_cookie, bodysize,
+					      digest);
 		if (err) {
 			*error = -SCTP_IERROR_NOMEM;
 			goto fail;
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 39d7b34d06d2..fcfac59f8b72 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -129,16 +129,8 @@ out:
 	sock_put(sk);
 }
 
-static int smc_rx_pipe_buf_nosteal(struct pipe_inode_info *pipe,
-				   struct pipe_buffer *buf)
-{
-	return 1;
-}
-
 static const struct pipe_buf_operations smc_pipe_ops = {
-	.confirm = generic_pipe_buf_confirm,
 	.release = smc_rx_pipe_buf_release,
-	.steal = smc_rx_pipe_buf_nosteal,
 	.get = generic_pipe_buf_get
 };
 
diff --git a/samples/Kconfig b/samples/Kconfig
index 9d236c346de5..205076cf234e 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -171,7 +171,7 @@ config SAMPLE_VFIO_MDEV_MBOCHS
 
 config SAMPLE_ANDROID_BINDERFS
 	bool "Build Android binderfs example"
-	depends on CONFIG_ANDROID_BINDERFS
+	depends on ANDROID_BINDERFS
 	help
 	  Builds a sample program to illustrate the use of the Android binderfs
 	  filesystem.
diff --git a/samples/binderfs/Makefile b/samples/binderfs/Makefile
index ea4c93d36256..a3ac5476338a 100644
--- a/samples/binderfs/Makefile
+++ b/samples/binderfs/Makefile
@@ -1,2 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs_example.o
+ifndef CROSS_COMPILE
+ifdef CONFIG_SAMPLE_ANDROID_BINDERFS
+hostprogs := binderfs_example
+endif
+endif
diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
index e04229d21475..c13a5bc5095b 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -20,18 +20,22 @@ static unsigned long my_ip = (unsigned long)schedule;
 
 asm (
 "	.pushsection    .text, \"ax\", @progbits\n"
+"	.type		my_tramp1, @function\n"
 "   my_tramp1:"
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
 "	call my_direct_func1\n"
 "	leave\n"
+"	.size		my_tramp1, .-my_tramp1\n"
 "	ret\n"
+"	.type		my_tramp2, @function\n"
 "   my_tramp2:"
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
 "	call my_direct_func2\n"
 "	leave\n"
 "	ret\n"
+"	.size		my_tramp2, .-my_tramp2\n"
 "	.popsection\n"
 );
 
diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
index 27efa5f6ff52..d5c5022be664 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -15,6 +15,7 @@ extern void my_tramp(void *);
 
 asm (
 "	.pushsection    .text, \"ax\", @progbits\n"
+"	.type		my_tramp, @function\n"
 "   my_tramp:"
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
@@ -27,6 +28,7 @@ asm (
 "	popq %rdi\n"
 "	leave\n"
 "	ret\n"
+"	.size		my_tramp, .-my_tramp\n"
 "	.popsection\n"
 );
 
diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
index a2e3063bd306..63ca06d42c80 100644
--- a/samples/ftrace/ftrace-direct.c
+++ b/samples/ftrace/ftrace-direct.c
@@ -13,6 +13,7 @@ extern void my_tramp(void *);
 
 asm (
 "	.pushsection    .text, \"ax\", @progbits\n"
+"	.type		my_tramp, @function\n"
 "   my_tramp:"
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
@@ -21,6 +22,7 @@ asm (
 "	popq %rdi\n"
 "	leave\n"
 "	ret\n"
+"	.size		my_tramp, .-my_tramp\n"
 "	.popsection\n"
 );
 
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index d693c23a85e8..501911d1b327 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -25,7 +25,7 @@ static struct kprobe kp = {
 };
 
 /* kprobe pre_handler: called just before the probed instruction is executed */
-static int handler_pre(struct kprobe *p, struct pt_regs *regs)
+static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs)
 {
 #ifdef CONFIG_X86
 	pr_info("<%s> pre_handler: p->addr = 0x%p, ip = %lx, flags = 0x%lx\n",
@@ -54,7 +54,7 @@ static int handler_pre(struct kprobe *p, struct pt_regs *regs)
 }
 
 /* kprobe post_handler: called after the probed instruction is executed */
-static void handler_post(struct kprobe *p, struct pt_regs *regs,
+static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
 				unsigned long flags)
 {
 #ifdef CONFIG_X86
@@ -90,6 +90,8 @@ static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
 	/* Return 0 because we don't handle the fault. */
 	return 0;
 }
+/* NOKPROBE_SYMBOL() is also available */
+NOKPROBE_SYMBOL(handler_fault);
 
 static int __init kprobe_init(void)
 {
diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c
index 186315ca88b3..013e8e6ebae9 100644
--- a/samples/kprobes/kretprobe_example.c
+++ b/samples/kprobes/kretprobe_example.c
@@ -48,6 +48,7 @@ static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
 	data->entry_stamp = ktime_get();
 	return 0;
 }
+NOKPROBE_SYMBOL(entry_handler);
 
 /*
  * Return-probe handler: Log the return value and duration. Duration may turn
@@ -67,6 +68,7 @@ static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
 			func_name, retval, (long long)delta);
 	return 0;
 }
+NOKPROBE_SYMBOL(ret_handler);
 
 static struct kretprobe my_kretprobe = {
 	.handler		= ret_handler,
diff --git a/samples/vfs/test-statx.c b/samples/vfs/test-statx.c
index a3d68159fb51..76c577ea4fd8 100644
--- a/samples/vfs/test-statx.c
+++ b/samples/vfs/test-statx.c
@@ -216,7 +216,7 @@ int main(int argc, char **argv)
 	struct statx stx;
 	int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW;
 
-	unsigned int mask = STATX_ALL;
+	unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
 
 	for (argv++; *argv; argv++) {
 		if (strcmp(*argv, "-F") == 0) {
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index f746ca8fa403..f68d76dd97ba 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -213,7 +213,9 @@ my $type_constant = '\b``([^\`]+)``\b';
 my $type_constant2 = '\%([-_\w]+)';
 my $type_func = '(\w+)\(\)';
 my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
+my $type_param_ref = '([\!]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
 my $type_fp_param = '\@(\w+)\(\)';  # Special RST handling for func ptr params
+my $type_fp_param2 = '\@(\w+->\S+)\(\)';  # Special RST handling for structs with func ptr params
 my $type_env = '(\$\w+)';
 my $type_enum = '\&(enum\s*([_\w]+))';
 my $type_struct = '\&(struct\s*([_\w]+))';
@@ -236,6 +238,7 @@ my @highlights_man = (
                       [$type_typedef, "\\\\fI\$1\\\\fP"],
                       [$type_union, "\\\\fI\$1\\\\fP"],
                       [$type_param, "\\\\fI\$1\\\\fP"],
+                      [$type_param_ref, "\\\\fI\$1\$2\\\\fP"],
                       [$type_member, "\\\\fI\$1\$2\$3\\\\fP"],
                       [$type_fallback, "\\\\fI\$1\\\\fP"]
 		     );
@@ -249,6 +252,7 @@ my @highlights_rst = (
                        [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"],
                        [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"],
 		       [$type_fp_param, "**\$1\\\\(\\\\)**"],
+		       [$type_fp_param2, "**\$1\\\\(\\\\)**"],
                        [$type_func, "\$1()"],
                        [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"],
                        [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"],
@@ -256,7 +260,7 @@ my @highlights_rst = (
                        [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"],
                        # in rst this can refer to any type
                        [$type_fallback, "\\:c\\:type\\:`\$1`"],
-                       [$type_param, "**\$1**"]
+                       [$type_param_ref, "**\$1\$2**"]
 		      );
 my $blankline_rst = "\n";
 
@@ -327,13 +331,14 @@ my $lineprefix="";
 
 # Parser states
 use constant {
-    STATE_NORMAL        => 0, # normal code
-    STATE_NAME          => 1, # looking for function name
-    STATE_BODY_MAYBE    => 2, # body - or maybe more description
-    STATE_BODY          => 3, # the body of the comment
-    STATE_PROTO         => 4, # scanning prototype
-    STATE_DOCBLOCK      => 5, # documentation block
-    STATE_INLINE        => 6, # gathering documentation outside main block
+    STATE_NORMAL        => 0,        # normal code
+    STATE_NAME          => 1,        # looking for function name
+    STATE_BODY_MAYBE    => 2,        # body - or maybe more description
+    STATE_BODY          => 3,        # the body of the comment
+    STATE_BODY_WITH_BLANK_LINE => 4, # the body, which has a blank line
+    STATE_PROTO         => 5,        # scanning prototype
+    STATE_DOCBLOCK      => 6,        # documentation block
+    STATE_INLINE        => 7,        # gathering doc outside main block
 };
 my $state;
 my $in_doc_sect;
@@ -1953,6 +1958,12 @@ sub process_body($$) {
 	}
     }
 
+    if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) {
+	dump_section($file, $section, $contents);
+	$section = $section_default;
+	$contents = "";
+    }
+
     if (/$doc_sect/i) { # case insensitive for supported section names
 	$newsection = $1;
 	$newcontents = $2;
@@ -2006,18 +2017,21 @@ sub process_body($$) {
 	$state = STATE_PROTO;
 	$brcount = 0;
     } elsif (/$doc_content/) {
-	# miguel-style comment kludge, look for blank lines after
-	# @parameter line to signify start of description
 	if ($1 eq "") {
-	    if ($section =~ m/^@/ || $section eq $section_context) {
+	    if ($section eq $section_context) {
 		dump_section($file, $section, $contents);
 		$section = $section_default;
 		$contents = "";
 		$new_start_line = $.;
+		$state = STATE_BODY;
 	    } else {
+		if ($section ne $section_default) {
+		    $state = STATE_BODY_WITH_BLANK_LINE;
+		} else {
+		    $state = STATE_BODY;
+		}
 		$contents .= "\n";
 	    }
-	    $state = STATE_BODY;
 	} elsif ($state == STATE_BODY_MAYBE) {
 	    # Continued declaration purpose
 	    chomp($declaration_purpose);
@@ -2169,7 +2183,8 @@ sub process_file($) {
 	    process_normal();
 	} elsif ($state == STATE_NAME) {
 	    process_name($file, $_);
-	} elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE) {
+	} elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE ||
+		 $state == STATE_BODY_WITH_BLANK_LINE) {
 	    process_body($file, $_);
 	} elsif ($state == STATE_INLINE) { # scanning for inline parameters
 	    process_inline($file, $_);
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index d09ab4afbda4..3adef49250af 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -55,6 +55,29 @@ modpost_link()
 	${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
 }
 
+objtool_link()
+{
+	local objtoolopt;
+
+	if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then
+		objtoolopt="check"
+		if [ -z "${CONFIG_FRAME_POINTER}" ]; then
+			objtoolopt="${objtoolopt} --no-fp"
+		fi
+		if [ -n "${CONFIG_GCOV_KERNEL}" ]; then
+			objtoolopt="${objtoolopt} --no-unreachable"
+		fi
+		if [ -n "${CONFIG_RETPOLINE}" ]; then
+			objtoolopt="${objtoolopt} --retpoline"
+		fi
+		if [ -n "${CONFIG_X86_SMAP}" ]; then
+			objtoolopt="${objtoolopt} --uaccess"
+		fi
+		info OBJTOOL ${1}
+		tools/objtool/objtool ${objtoolopt} ${1}
+	fi
+}
+
 # Link of vmlinux
 # ${1} - output file
 # ${2}, ${3}, ... - optional extra .o files
@@ -251,6 +274,7 @@ ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1
 #link vmlinux.o
 info LD vmlinux.o
 modpost_link vmlinux.o
+objtool_link vmlinux.o
 
 # modpost vmlinux.o to check for section mismatches
 ${MAKE} -f "${srctree}/scripts/Makefile.modpost" MODPOST_VMLINUX=1
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 7f7d4ee7b652..9a98af90e625 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -948,7 +948,7 @@ static void check_section(const char *modname, struct elf_info *elf,
 
 #define DATA_SECTIONS ".data", ".data.rel"
 #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \
-		".kprobes.text", ".cpuidle.text"
+		".kprobes.text", ".cpuidle.text", ".noinstr.text"
 #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
 		".fixup", ".entry.text", ".exception.text", ".text.*", \
 		".coldtext"
diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index fa3fb05cd54b..c680c3efb176 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0-or-later
 use strict;
 
-# Copyright (c) 2017-2019 Mauro Carvalho Chehab <mchehab@kernel.org>
+# Copyright (c) 2017-2020 Mauro Carvalho Chehab <mchehab@kernel.org>
 #
 
 my $prefix = "./";
@@ -22,10 +22,16 @@ my $need = 0;
 my $optional = 0;
 my $need_symlink = 0;
 my $need_sphinx = 0;
+my $need_venv = 0;
+my $need_virtualenv = 0;
 my $rec_sphinx_upgrade = 0;
 my $install = "";
 my $virtenv_dir = "";
+my $python_cmd = "";
 my $min_version;
+my $cur_version;
+my $rec_version = "1.7.9";	# PDF won't build here
+my $min_pdf_version = "2.4.4";	# Min version where pdf builds
 
 #
 # Command line arguments
@@ -142,12 +148,30 @@ sub findprog($)
 	}
 }
 
+sub find_python_no_venv()
+{
+	my $prog = shift;
+
+	my $cur_dir = qx(pwd);
+	$cur_dir =~ s/\s+$//;
+
+	foreach my $dir (split(/:/, $ENV{PATH})) {
+		next if ($dir =~ m,($cur_dir)/sphinx,);
+		return "$dir/python3" if(-x "$dir/python3");
+	}
+	foreach my $dir (split(/:/, $ENV{PATH})) {
+		next if ($dir =~ m,($cur_dir)/sphinx,);
+		return "$dir/python" if(-x "$dir/python");
+	}
+	return "python";
+}
+
 sub check_program($$)
 {
 	my $prog = shift;
 	my $is_optional = shift;
 
-	return if findprog($prog);
+	return $prog if findprog($prog);
 
 	add_package($prog, $is_optional);
 }
@@ -168,9 +192,9 @@ sub check_python_module($$)
 	my $prog = shift;
 	my $is_optional = shift;
 
-	my $err = system("python3 -c 'import $prog' 2>/dev/null /dev/null");
-	return if ($err == 0);
-	my $err = system("python -c 'import $prog' 2>/dev/null /dev/null");
+	return if (!$python_cmd);
+
+	my $err = system("$python_cmd -c 'import $prog' 2>/dev/null /dev/null");
 	return if ($err == 0);
 
 	add_package($prog, $is_optional);
@@ -225,23 +249,33 @@ sub get_sphinx_fname()
 		return $fname;
 	}
 
-	if ($virtualenv) {
-		my $prog = findprog("virtualenv-3");
-		$prog = findprog("virtualenv-3.5") if (!$prog);
+	return "";
+}
 
-		check_program("virtualenv", 0) if (!$prog);
-		$need_sphinx = 1;
-	} else {
-		add_package("python-sphinx", 0);
-	}
+sub get_sphinx_version($)
+{
+	my $cmd = shift;
+	my $ver;
 
-	return "";
+	open IN, "$cmd --version 2>&1 |";
+	while (<IN>) {
+		if (m/^\s*sphinx-build\s+([\d\.]+)(\+\/[\da-f]+)?$/) {
+			$ver=$1;
+			last;
+		}
+		# Sphinx 1.2.x uses a different format
+		if (m/^\s*Sphinx.*\s+([\d\.]+)$/) {
+			$ver=$1;
+			last;
+		}
+	}
+	close IN;
+	return $ver;
 }
 
 sub check_sphinx()
 {
-	my $rec_version;
-	my $cur_version;
+	my $default_version;
 
 	open IN, $conf or die "Can't open $conf";
 	while (<IN>) {
@@ -257,45 +291,39 @@ sub check_sphinx()
 	open IN, $requirement_file or die "Can't open $requirement_file";
 	while (<IN>) {
 		if (m/^\s*Sphinx\s*==\s*([\d\.]+)$/) {
-			$rec_version=$1;
+			$default_version=$1;
 			last;
 		}
 	}
 	close IN;
 
-	die "Can't get recommended sphinx version from $requirement_file" if (!$min_version);
+	die "Can't get default sphinx version from $requirement_file" if (!$default_version);
 
-	$virtenv_dir = $virtenv_prefix . $rec_version;
+	$virtenv_dir = $virtenv_prefix . $default_version;
 
 	my $sphinx = get_sphinx_fname();
-	return if ($sphinx eq "");
-
-	open IN, "$sphinx --version 2>&1 |" or die "$sphinx returned an error";
-	while (<IN>) {
-		if (m/^\s*sphinx-build\s+([\d\.]+)(\+\/[\da-f]+)?$/) {
-			$cur_version=$1;
-			last;
-		}
-		# Sphinx 1.2.x uses a different format
-		if (m/^\s*Sphinx.*\s+([\d\.]+)$/) {
-			$cur_version=$1;
-			last;
-		}
+	if ($sphinx eq "") {
+		$need_sphinx = 1;
+		return;
 	}
-	close IN;
+
+	$cur_version = get_sphinx_version($sphinx);
+	die ("$sphinx returned an error") if (!$cur_version);
 
 	die "$sphinx didn't return its version" if (!$cur_version);
 
 	if ($cur_version lt $min_version) {
 		printf "ERROR: Sphinx version is %s. It should be >= %s (recommended >= %s)\n",
-		       $cur_version, $min_version, $rec_version;;
+		       $cur_version, $min_version, $default_version;
 		$need_sphinx = 1;
 		return;
 	}
 
 	if ($cur_version lt $rec_version) {
-		printf "Sphinx version %s\n", $cur_version;
-		print "Warning: It is recommended at least Sphinx version $rec_version.\n";
+		$rec_sphinx_upgrade = 1;
+		return;
+	}
+	if ($cur_version lt $min_pdf_version) {
 		$rec_sphinx_upgrade = 1;
 		return;
 	}
@@ -336,6 +364,7 @@ sub give_debian_hints()
 	my %map = (
 		"python-sphinx"		=> "python3-sphinx",
 		"sphinx_rtd_theme"	=> "python3-sphinx-rtd-theme",
+		"ensurepip"		=> "python3-venv",
 		"virtualenv"		=> "virtualenv",
 		"dot"			=> "graphviz",
 		"convert"		=> "imagemagick",
@@ -349,7 +378,8 @@ sub give_debian_hints()
 				   "fonts-dejavu", 2);
 
 		check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
-				   "/usr/share/fonts/opentype/noto/NotoSerifCJK-Regular.ttc"],
+				    "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
+				    "/usr/share/fonts/opentype/noto/NotoSerifCJK-Regular.ttc"],
 				   "fonts-noto-cjk", 2);
 	}
 
@@ -446,9 +476,11 @@ sub give_opensuse_hints()
 		"convert"		=> "ImageMagick",
 		"Pod::Usage"		=> "perl-Pod-Usage",
 		"xelatex"		=> "texlive-xetex-bin",
-		"rsvg-convert"		=> "rsvg-view",
 	);
 
+	# On Tumbleweed, this package is also named rsvg-convert
+	$map{"rsvg-convert"} = "rsvg-view" if (!($system_release =~ /Tumbleweed/));
+
 	my @suse_tex_pkgs = (
 		"texlive-babel-english",
 		"texlive-caption",
@@ -491,7 +523,7 @@ sub give_mageia_hints()
 		"convert"		=> "ImageMagick",
 		"Pod::Usage"		=> "perl-Pod-Usage",
 		"xelatex"		=> "texlive",
-		"rsvg-convert"		=> "librsvg2-tools",
+		"rsvg-convert"		=> "librsvg2",
 	);
 
 	my @tex_pkgs = (
@@ -500,16 +532,29 @@ sub give_mageia_hints()
 
 	$map{"latexmk"} = "texlive-collection-basic";
 
+	my $packager_cmd;
+	my $noto_sans;
+	if ($system_release =~ /OpenMandriva/) {
+		$packager_cmd = "dnf install";
+		$noto_sans = "noto-sans-cjk-fonts";
+		@tex_pkgs = ( "texlive-collection-fontsextra" );
+	} else {
+		$packager_cmd = "urpmi";
+		$noto_sans = "google-noto-sans-cjk-ttc-fonts";
+	}
+
+
 	if ($pdf) {
-		check_missing_file(["/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc"],
-				   "google-noto-sans-cjk-ttc-fonts", 2);
+		check_missing_file(["/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+				    "/usr/share/fonts/TTF/NotoSans-Regular.ttf"],
+				   $noto_sans, 2);
 	}
 
 	check_rpm_missing(\@tex_pkgs, 2) if ($pdf);
 	check_missing(\%map);
 
 	return if (!$need && !$optional);
-	printf("You should run:\n\n\tsudo urpmi $install\n");
+	printf("You should run:\n\n\tsudo $packager_cmd $install\n");
 }
 
 sub give_arch_linux_hints()
@@ -557,7 +602,8 @@ sub give_gentoo_hints()
 			   "media-fonts/dejavu", 2) if ($pdf);
 
 	if ($pdf) {
-		check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf"],
+		check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf",
+				    "/usr/share/fonts/noto-cjk/NotoSerifCJK-Regular.ttc"],
 				   "media-fonts/noto-cjk", 2);
 	}
 
@@ -572,10 +618,10 @@ sub give_gentoo_hints()
 	my $portage_imagemagick = "/etc/portage/package.use/imagemagick";
 	my $portage_cairo = "/etc/portage/package.use/graphviz";
 
-	if (qx(cat $portage_imagemagick) ne "$imagemagick\n") {
+	if (qx(grep imagemagick $portage_imagemagick 2>/dev/null) eq "") {
 		printf("\tsudo su -c 'echo \"$imagemagick\" > $portage_imagemagick'\n")
 	}
-	if (qx(cat $portage_cairo) ne  "$cairo\n") {
+	if (qx(grep graphviz $portage_cairo 2>/dev/null) eq  "") {
 		printf("\tsudo su -c 'echo \"$cairo\" > $portage_cairo'\n");
 	}
 
@@ -622,6 +668,10 @@ sub check_distros()
 		give_mageia_hints;
 		return;
 	}
+	if ($system_release =~ /OpenMandriva/) {
+		give_mageia_hints;
+		return;
+	}
 	if ($system_release =~ /Arch Linux/) {
 		give_arch_linux_hints;
 		return;
@@ -651,22 +701,58 @@ sub check_distros()
 
 sub deactivate_help()
 {
-	printf "\tIf you want to exit the virtualenv, you can use:\n";
+	printf "\nIf you want to exit the virtualenv, you can use:\n";
 	printf "\tdeactivate\n";
 }
 
 sub check_needs()
 {
-	# Check for needed programs/tools
+	# Check if Sphinx is already accessible from current environment
 	check_sphinx();
 
 	if ($system_release) {
-		print "Detected OS: $system_release.\n\n";
+		print "Detected OS: $system_release.\n";
 	} else {
-		print "Unknown OS\n\n";
+		print "Unknown OS\n";
+	}
+	printf "Sphinx version: %s\n\n", $cur_version if ($cur_version);
+
+	# Check python command line, trying first python3
+	$python_cmd = findprog("python3");
+	$python_cmd = check_program("python", 0) if (!$python_cmd);
+
+	# Check the type of virtual env, depending on Python version
+	if ($python_cmd) {
+		if ($virtualenv) {
+			my $tmp = qx($python_cmd --version 2>&1);
+			if ($tmp =~ m/(\d+\.)(\d+\.)/) {
+				if ($1 >= 3 && $2 >= 3) {
+					$need_venv = 1;		# python 3.3 or upper
+				} else {
+					$need_virtualenv = 1;
+				}
+				if ($1 < 3) {
+					# Complain if it finds python2 (or worse)
+					printf "Warning: python$1 support is deprecated. Use it with caution!\n";
+				}
+			} else {
+				die "Warning: couldn't identify $python_cmd version!";
+			}
+		} else {
+			add_package("python-sphinx", 0);
+		}
 	}
 
-	print "To upgrade Sphinx, use:\n\n" if ($rec_sphinx_upgrade);
+	# Set virtualenv command line, if python < 3.3
+	my $virtualenv_cmd;
+	if ($need_virtualenv) {
+		$virtualenv_cmd = findprog("virtualenv-3");
+		$virtualenv_cmd = findprog("virtualenv-3.5") if (!$virtualenv_cmd);
+		if (!$virtualenv_cmd) {
+			check_program("virtualenv", 0);
+			$virtualenv_cmd = "virtualenv";
+		}
+	}
 
 	# Check for needed programs/tools
 	check_perl_module("Pod::Usage", 0);
@@ -681,46 +767,81 @@ sub check_needs()
 	check_program("rsvg-convert", 2) if ($pdf);
 	check_program("latexmk", 2) if ($pdf);
 
+	if ($need_sphinx || $rec_sphinx_upgrade) {
+		check_python_module("ensurepip", 0) if ($need_venv);
+	}
+
+	# Do distro-specific checks and output distro-install commands
 	check_distros();
 
+	if (!$python_cmd) {
+		if ($need == 1) {
+			die "Can't build as $need mandatory dependency is missing";
+		} elsif ($need) {
+			die "Can't build as $need mandatory dependencies are missing";
+		}
+	}
+
+	# Check if sphinx-build is called sphinx-build-3
 	if ($need_symlink) {
 		printf "\tsudo ln -sf %s /usr/bin/sphinx-build\n\n",
 		       which("sphinx-build-3");
 	}
+
+	# NOTE: if the system has a too old Sphinx version installed,
+	# it will recommend installing a newer version using virtualenv
+
 	if ($need_sphinx || $rec_sphinx_upgrade) {
 		my $min_activate = "$ENV{'PWD'}/${virtenv_prefix}${min_version}/bin/activate";
 		my @activates = glob "$ENV{'PWD'}/${virtenv_prefix}*/bin/activate";
 
+		if ($cur_version lt $rec_version) {
+			print "Warning: It is recommended at least Sphinx version $rec_version.\n";
+			print "         If you want pdf, you need at least $min_pdf_version.\n";
+		}
+		if ($cur_version lt $min_pdf_version) {
+			print "Note: It is recommended at least Sphinx version $min_pdf_version if you need PDF support.\n";
+		}
 		@activates = sort {$b cmp $a} @activates;
-
-		if ($need_sphinx && scalar @activates > 0 && $activates[0] ge $min_activate) {
-			printf "\nNeed to activate a compatible Sphinx version on virtualenv with:\n";
-			printf "\t. $activates[0]\n";
-			deactivate_help();
-			exit (1);
-		} else {
-			my $rec_activate = "$virtenv_dir/bin/activate";
-			my $virtualenv = findprog("virtualenv-3");
-			my $rec_python3 = "";
-			$virtualenv = findprog("virtualenv-3.5") if (!$virtualenv);
-			$virtualenv = findprog("virtualenv") if (!$virtualenv);
-			$virtualenv = "virtualenv" if (!$virtualenv);
-
-			my $rel = "";
-			if (index($system_release, "Ubuntu") != -1) {
-				$rel = $1 if ($system_release =~ /Ubuntu\s+(\d+)[.]/);
-				if ($rel && $rel >= 16) {
-					$rec_python3 = " -p python3";
-				}
+		my ($activate, $ver);
+		foreach my $f (@activates) {
+			next if ($f lt $min_activate);
+
+			my $sphinx_cmd = $f;
+			$sphinx_cmd =~ s/activate/sphinx-build/;
+			next if (! -f $sphinx_cmd);
+
+			$ver = get_sphinx_version($sphinx_cmd);
+			if ($need_sphinx && ($ver ge $min_version)) {
+				$activate = $f;
+				last;
+			} elsif ($ver gt $cur_version) {
+				$activate = $f;
+				last;
 			}
-			if (index($system_release, "Debian") != -1) {
-				$rel = $1 if ($system_release =~ /Debian\s+(\d+)/);
-				if ($rel && $rel >= 7) {
-					$rec_python3 = " -p python3";
-				}
+		}
+		if ($activate ne "") {
+			if ($need_sphinx) {
+				printf "\nNeed to activate Sphinx (version $ver) on virtualenv with:\n";
+				printf "\t. $activate\n";
+				deactivate_help();
+				exit (1);
+			} else {
+				printf "\nYou may also use a newer Sphinx (version $ver) with:\n";
+				printf "\tdeactivate && . $activate\n";
 			}
+		} else {
+			my $rec_activate = "$virtenv_dir/bin/activate";
+
+			print "To upgrade Sphinx, use:\n\n" if ($rec_sphinx_upgrade);
+
+			$python_cmd = find_python_no_venv();
 
-			printf "\t$virtualenv$rec_python3 $virtenv_dir\n";
+			if ($need_venv) {
+				printf "\t$python_cmd -m venv $virtenv_dir\n";
+			} else {
+				printf "\t$virtualenv_cmd $virtenv_dir\n";
+			}
 			printf "\t. $rec_activate\n";
 			printf "\tpip install -r $requirement_file\n";
 			deactivate_help();
@@ -780,6 +901,24 @@ $system_release = catcheck("/etc/system-release") if !$system_release;
 $system_release = catcheck("/etc/redhat-release") if !$system_release;
 $system_release = catcheck("/etc/lsb-release") if !$system_release;
 $system_release = catcheck("/etc/gentoo-release") if !$system_release;
+
+# This seems more common than LSB these days
+if (!$system_release) {
+	my %os_var;
+	if (open IN, "cat /etc/os-release|") {
+		while (<IN>) {
+			if (m/^([\w\d\_]+)=\"?([^\"]*)\"?\n/) {
+				$os_var{$1}=$2;
+			}
+		}
+		$system_release = $os_var{"NAME"};
+		if (defined($os_var{"VERSION_ID"})) {
+			$system_release .= " " . $os_var{"VERSION_ID"} if (defined($os_var{"VERSION_ID"}));
+		} else {
+			$system_release .= " " . $os_var{"VERSION"};
+		}
+	}
+}
 $system_release = catcheck("/etc/issue") if !$system_release;
 $system_release =~ s/\s+$//;
 
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index f6797ba44bf7..14cf81d1a30b 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -323,19 +323,6 @@ error:
 	return ukey;
 }
 
-static int calc_hash(struct crypto_shash *tfm, u8 *digest,
-		     const u8 *buf, unsigned int buflen)
-{
-	SHASH_DESC_ON_STACK(desc, tfm);
-	int err;
-
-	desc->tfm = tfm;
-
-	err = crypto_shash_digest(desc, buf, buflen, digest);
-	shash_desc_zero(desc);
-	return err;
-}
-
 static int calc_hmac(u8 *digest, const u8 *key, unsigned int keylen,
 		     const u8 *buf, unsigned int buflen)
 {
@@ -351,7 +338,7 @@ static int calc_hmac(u8 *digest, const u8 *key, unsigned int keylen,
 
 	err = crypto_shash_setkey(tfm, key, keylen);
 	if (!err)
-		err = calc_hash(tfm, digest, buf, buflen);
+		err = crypto_shash_tfm_digest(tfm, buf, buflen, digest);
 	crypto_free_shash(tfm);
 	return err;
 }
@@ -381,7 +368,8 @@ static int get_derived_key(u8 *derived_key, enum derived_key_type key_type,
 
 	memcpy(derived_buf + strlen(derived_buf) + 1, master_key,
 	       master_keylen);
-	ret = calc_hash(hash_tfm, derived_key, derived_buf, derived_buf_len);
+	ret = crypto_shash_tfm_digest(hash_tfm, derived_buf, derived_buf_len,
+				      derived_key);
 	kzfree(derived_buf);
 	return ret;
 }
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 5a952617a0eb..87cbdc64d272 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -150,7 +150,7 @@ static int __init lockdown_secfs_init(void)
 {
 	struct dentry *dentry;
 
-	dentry = securityfs_create_file("lockdown", 0600, NULL, NULL,
+	dentry = securityfs_create_file("lockdown", 0644, NULL, NULL,
 					&lockdown_ops);
 	return PTR_ERR_OR_ZERO(dentry);
 }
diff --git a/security/selinux/Makefile b/security/selinux/Makefile
index 0c77ede1cc11..4d8e0e8adf0b 100644
--- a/security/selinux/Makefile
+++ b/security/selinux/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_SECURITY_SELINUX) := selinux.o
 selinux-y := avc.o hooks.o selinuxfs.o netlink.o nlmsgtab.o netif.o \
 	     netnode.o netport.o status.o \
 	     ss/ebitmap.o ss/hashtab.o ss/symtab.o ss/sidtab.o ss/avtab.o \
-	     ss/policydb.o ss/services.o ss/conditional.o ss/mls.o
+	     ss/policydb.o ss/services.o ss/conditional.o ss/mls.o ss/context.o
 
 selinux-$(CONFIG_SECURITY_NETWORK_XFRM) += xfrm.o
 
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index d30d8d7cdc9c..0c58f62dc6ab 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -98,12 +98,6 @@ static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
 	return 0;
 }
 
-static inline int selinux_netlbl_conn_setsid(struct sock *sk,
-					     struct sockaddr *addr)
-{
-	return 0;
-}
-
 static inline int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep,
 						    struct sk_buff *skb)
 {
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index d6036c018cf2..b0e02cfe3ce1 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -41,10 +41,11 @@
 #define POLICYDB_VERSION_XPERMS_IOCTL	30
 #define POLICYDB_VERSION_INFINIBAND		31
 #define POLICYDB_VERSION_GLBLUB		32
+#define POLICYDB_VERSION_COMP_FTRANS	33 /* compressed filename transitions */
 
 /* Range of policy versions we understand*/
 #define POLICYDB_VERSION_MIN   POLICYDB_VERSION_BASE
-#define POLICYDB_VERSION_MAX   POLICYDB_VERSION_GLBLUB
+#define POLICYDB_VERSION_MAX   POLICYDB_VERSION_COMP_FTRANS
 
 /* Mask for just the mount related flags */
 #define SE_MNTMASK	0x0f
diff --git a/security/selinux/ss/context.c b/security/selinux/ss/context.c
new file mode 100644
index 000000000000..38bc0aa524a6
--- /dev/null
+++ b/security/selinux/ss/context.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implementations of the security context functions.
+ *
+ * Author: Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2020 Red Hat, Inc.
+ */
+
+#include <linux/jhash.h>
+
+#include "context.h"
+#include "mls.h"
+
+u32 context_compute_hash(const struct context *c)
+{
+	u32 hash = 0;
+
+	/*
+	 * If a context is invalid, it will always be represented by a
+	 * context struct with only the len & str set (and vice versa)
+	 * under a given policy. Since context structs from different
+	 * policies should never meet, it is safe to hash valid and
+	 * invalid contexts differently. The context_cmp() function
+	 * already operates under the same assumption.
+	 */
+	if (c->len)
+		return full_name_hash(NULL, c->str, c->len);
+
+	hash = jhash_3words(c->user, c->role, c->type, hash);
+	hash = mls_range_hash(&c->range, hash);
+	return hash;
+}
diff --git a/security/selinux/ss/context.h b/security/selinux/ss/context.h
index 3ba044fe02ed..62990aa1ec9e 100644
--- a/security/selinux/ss/context.h
+++ b/security/selinux/ss/context.h
@@ -31,7 +31,6 @@ struct context {
 	u32 len;        /* length of string in bytes */
 	struct mls_range range;
 	char *str;	/* string representation if context cannot be mapped. */
-	u32 hash;	/* a hash of the string representation */
 };
 
 static inline void mls_context_init(struct context *c)
@@ -169,13 +168,12 @@ static inline int context_cpy(struct context *dst, struct context *src)
 		kfree(dst->str);
 		return rc;
 	}
-	dst->hash = src->hash;
 	return 0;
 }
 
 static inline void context_destroy(struct context *c)
 {
-	c->user = c->role = c->type = c->hash = 0;
+	c->user = c->role = c->type = 0;
 	kfree(c->str);
 	c->str = NULL;
 	c->len = 0;
@@ -184,8 +182,6 @@ static inline void context_destroy(struct context *c)
 
 static inline int context_cmp(struct context *c1, struct context *c2)
 {
-	if (c1->hash && c2->hash && (c1->hash != c2->hash))
-		return 0;
 	if (c1->len && c2->len)
 		return (c1->len == c2->len && !strcmp(c1->str, c2->str));
 	if (c1->len || c2->len)
@@ -196,10 +192,7 @@ static inline int context_cmp(struct context *c1, struct context *c2)
 		mls_context_cmp(c1, c2));
 }
 
-static inline unsigned int context_compute_hash(const char *s)
-{
-	return full_name_hash(NULL, s, strlen(s));
-}
+u32 context_compute_hash(const struct context *c);
 
 #endif	/* _SS_CONTEXT_H_ */
 
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index c8c3663111e2..14bedc95c6dc 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
+#include <linux/jhash.h>
 #include <net/netlabel.h>
 #include "ebitmap.h"
 #include "policydb.h"
@@ -542,6 +543,19 @@ int ebitmap_write(struct ebitmap *e, void *fp)
 	return 0;
 }
 
+u32 ebitmap_hash(const struct ebitmap *e, u32 hash)
+{
+	struct ebitmap_node *node;
+
+	/* need to change hash even if ebitmap is empty */
+	hash = jhash_1word(e->highbit, hash);
+	for (node = e->node; node; node = node->next) {
+		hash = jhash_1word(node->startbit, hash);
+		hash = jhash(node->maps, sizeof(node->maps), hash);
+	}
+	return hash;
+}
+
 void __init ebitmap_cache_init(void)
 {
 	ebitmap_node_cachep = kmem_cache_create("ebitmap_node",
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h
index 9a23b81b8832..9eb2d0af2805 100644
--- a/security/selinux/ss/ebitmap.h
+++ b/security/selinux/ss/ebitmap.h
@@ -131,6 +131,7 @@ int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value);
 void ebitmap_destroy(struct ebitmap *e);
 int ebitmap_read(struct ebitmap *e, void *fp);
 int ebitmap_write(struct ebitmap *e, void *fp);
+u32 ebitmap_hash(const struct ebitmap *e, u32 hash);
 
 #ifdef CONFIG_NETLABEL
 int ebitmap_netlbl_export(struct ebitmap *ebmap,
diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c
index 883f19d32c28..5ee868116d70 100644
--- a/security/selinux/ss/hashtab.c
+++ b/security/selinux/ss/hashtab.c
@@ -29,34 +29,21 @@ static u32 hashtab_compute_size(u32 nel)
 	return nel == 0 ? 0 : roundup_pow_of_two(nel);
 }
 
-struct hashtab *hashtab_create(u32 (*hash_value)(struct hashtab *h, const void *key),
-			       int (*keycmp)(struct hashtab *h, const void *key1, const void *key2),
-			       u32 nel_hint)
+int hashtab_init(struct hashtab *h,
+		 u32 (*hash_value)(struct hashtab *h, const void *key),
+		 int (*keycmp)(struct hashtab *h, const void *key1,
+			       const void *key2),
+		 u32 nel_hint)
 {
-	struct hashtab *p;
-	u32 i, size = hashtab_compute_size(nel_hint);
-
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (!p)
-		return p;
-
-	p->size = size;
-	p->nel = 0;
-	p->hash_value = hash_value;
-	p->keycmp = keycmp;
-	if (!size)
-		return p;
-
-	p->htable = kmalloc_array(size, sizeof(*p->htable), GFP_KERNEL);
-	if (!p->htable) {
-		kfree(p);
-		return NULL;
-	}
-
-	for (i = 0; i < size; i++)
-		p->htable[i] = NULL;
+	h->size = hashtab_compute_size(nel_hint);
+	h->nel = 0;
+	h->hash_value = hash_value;
+	h->keycmp = keycmp;
+	if (!h->size)
+		return 0;
 
-	return p;
+	h->htable = kcalloc(h->size, sizeof(*h->htable), GFP_KERNEL);
+	return h->htable ? 0 : -ENOMEM;
 }
 
 int hashtab_insert(struct hashtab *h, void *key, void *datum)
@@ -66,7 +53,7 @@ int hashtab_insert(struct hashtab *h, void *key, void *datum)
 
 	cond_resched();
 
-	if (!h || !h->size || h->nel == HASHTAB_MAX_NODES)
+	if (!h->size || h->nel == HASHTAB_MAX_NODES)
 		return -EINVAL;
 
 	hvalue = h->hash_value(h, key);
@@ -102,7 +89,7 @@ void *hashtab_search(struct hashtab *h, const void *key)
 	u32 hvalue;
 	struct hashtab_node *cur;
 
-	if (!h || !h->size)
+	if (!h->size)
 		return NULL;
 
 	hvalue = h->hash_value(h, key);
@@ -121,9 +108,6 @@ void hashtab_destroy(struct hashtab *h)
 	u32 i;
 	struct hashtab_node *cur, *temp;
 
-	if (!h)
-		return;
-
 	for (i = 0; i < h->size; i++) {
 		cur = h->htable[i];
 		while (cur) {
@@ -136,8 +120,6 @@ void hashtab_destroy(struct hashtab *h)
 
 	kfree(h->htable);
 	h->htable = NULL;
-
-	kfree(h);
 }
 
 int hashtab_map(struct hashtab *h,
@@ -148,9 +130,6 @@ int hashtab_map(struct hashtab *h,
 	int ret;
 	struct hashtab_node *cur;
 
-	if (!h)
-		return 0;
-
 	for (i = 0; i < h->size; i++) {
 		cur = h->htable[i];
 		while (cur) {
diff --git a/security/selinux/ss/hashtab.h b/security/selinux/ss/hashtab.h
index dde54d9ff01c..31c11511fe10 100644
--- a/security/selinux/ss/hashtab.h
+++ b/security/selinux/ss/hashtab.h
@@ -35,14 +35,15 @@ struct hashtab_info {
 };
 
 /*
- * Creates a new hash table with the specified characteristics.
+ * Initializes a new hash table with the specified characteristics.
  *
- * Returns NULL if insufficent space is available or
- * the new hash table otherwise.
+ * Returns -ENOMEM if insufficient space is available or 0 otherwise.
  */
-struct hashtab *hashtab_create(u32 (*hash_value)(struct hashtab *h, const void *key),
-			       int (*keycmp)(struct hashtab *h, const void *key1, const void *key2),
-			       u32 nel_hint);
+int hashtab_init(struct hashtab *h,
+		 u32 (*hash_value)(struct hashtab *h, const void *key),
+		 int (*keycmp)(struct hashtab *h, const void *key1,
+			       const void *key2),
+		 u32 nel_hint);
 
 /*
  * Inserts the specified (key, datum) pair into the specified hash table.
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index ec5e3d1da9ac..cd8734f25b39 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -165,7 +165,7 @@ int mls_level_isvalid(struct policydb *p, struct mls_level *l)
 
 	if (!l->sens || l->sens > p->p_levels.nprim)
 		return 0;
-	levdatum = hashtab_search(p->p_levels.table,
+	levdatum = hashtab_search(&p->p_levels.table,
 				  sym_name(p, SYM_LEVELS, l->sens - 1));
 	if (!levdatum)
 		return 0;
@@ -293,7 +293,7 @@ int mls_context_to_sid(struct policydb *pol,
 			*(next_cat++) = '\0';
 
 		/* Parse sensitivity. */
-		levdatum = hashtab_search(pol->p_levels.table, sensitivity);
+		levdatum = hashtab_search(&pol->p_levels.table, sensitivity);
 		if (!levdatum)
 			return -EINVAL;
 		context->range.level[l].sens = levdatum->level->sens;
@@ -312,7 +312,7 @@ int mls_context_to_sid(struct policydb *pol,
 				*rngptr++ = '\0';
 			}
 
-			catdatum = hashtab_search(pol->p_cats.table, cur_cat);
+			catdatum = hashtab_search(&pol->p_cats.table, cur_cat);
 			if (!catdatum)
 				return -EINVAL;
 
@@ -325,7 +325,7 @@ int mls_context_to_sid(struct policydb *pol,
 			if (rngptr == NULL)
 				continue;
 
-			rngdatum = hashtab_search(pol->p_cats.table, rngptr);
+			rngdatum = hashtab_search(&pol->p_cats.table, rngptr);
 			if (!rngdatum)
 				return -EINVAL;
 
@@ -458,7 +458,7 @@ int mls_convert_context(struct policydb *oldp,
 		return 0;
 
 	for (l = 0; l < 2; l++) {
-		levdatum = hashtab_search(newp->p_levels.table,
+		levdatum = hashtab_search(&newp->p_levels.table,
 					  sym_name(oldp, SYM_LEVELS,
 						   oldc->range.level[l].sens - 1));
 
@@ -470,7 +470,7 @@ int mls_convert_context(struct policydb *oldp,
 					      node, i) {
 			int rc;
 
-			catdatum = hashtab_search(newp->p_cats.table,
+			catdatum = hashtab_search(&newp->p_cats.table,
 						  sym_name(oldp, SYM_CATS, i));
 			if (!catdatum)
 				return -EINVAL;
@@ -506,7 +506,7 @@ int mls_compute_sid(struct policydb *p,
 		rtr.source_type = scontext->type;
 		rtr.target_type = tcontext->type;
 		rtr.target_class = tclass;
-		r = hashtab_search(p->range_tr, &rtr);
+		r = hashtab_search(&p->range_tr, &rtr);
 		if (r)
 			return mls_range_set(newcontext, r);
 
@@ -536,7 +536,7 @@ int mls_compute_sid(struct policydb *p,
 
 		/* Fallthrough */
 	case AVTAB_CHANGE:
-		if ((tclass == p->process_class) || (sock == true))
+		if ((tclass == p->process_class) || sock)
 			/* Use the process MLS attributes. */
 			return mls_context_cpy(newcontext, scontext);
 		else
diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h
index 7954b1e60b64..15cacde0ff61 100644
--- a/security/selinux/ss/mls.h
+++ b/security/selinux/ss/mls.h
@@ -22,7 +22,10 @@
 #ifndef _SS_MLS_H_
 #define _SS_MLS_H_
 
+#include <linux/jhash.h>
+
 #include "context.h"
+#include "ebitmap.h"
 #include "policydb.h"
 
 int mls_compute_context_len(struct policydb *p, struct context *context);
@@ -101,5 +104,13 @@ static inline int mls_import_netlbl_cat(struct policydb *p,
 }
 #endif
 
+static inline u32 mls_range_hash(const struct mls_range *r, u32 hash)
+{
+	hash = jhash_2words(r->level[0].sens, r->level[1].sens, hash);
+	hash = ebitmap_hash(&r->level[0].cat, hash);
+	hash = ebitmap_hash(&r->level[1].cat, hash);
+	return hash;
+}
+
 #endif	/* _SS_MLS_H */
 
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index c21b922e5ebe..98f343005d6b 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -154,6 +154,11 @@ static struct policydb_compat_info policydb_compat[] = {
 		.sym_num	= SYM_NUM,
 		.ocon_num	= OCON_NUM,
 	},
+	{
+		.version	= POLICYDB_VERSION_COMP_FTRANS,
+		.sym_num	= SYM_NUM,
+		.ocon_num	= OCON_NUM,
+	},
 };
 
 static struct policydb_compat_info *policydb_lookup_compat(int version)
@@ -190,8 +195,8 @@ static int common_destroy(void *key, void *datum, void *p)
 	kfree(key);
 	if (datum) {
 		comdatum = datum;
-		hashtab_map(comdatum->permissions.table, perm_destroy, NULL);
-		hashtab_destroy(comdatum->permissions.table);
+		hashtab_map(&comdatum->permissions.table, perm_destroy, NULL);
+		hashtab_destroy(&comdatum->permissions.table);
 	}
 	kfree(datum);
 	return 0;
@@ -219,8 +224,8 @@ static int cls_destroy(void *key, void *datum, void *p)
 	kfree(key);
 	if (datum) {
 		cladatum = datum;
-		hashtab_map(cladatum->permissions.table, perm_destroy, NULL);
-		hashtab_destroy(cladatum->permissions.table);
+		hashtab_map(&cladatum->permissions.table, perm_destroy, NULL);
+		hashtab_destroy(&cladatum->permissions.table);
 		constraint = cladatum->constraints;
 		while (constraint) {
 			e = constraint->expr;
@@ -352,6 +357,13 @@ static int range_tr_destroy(void *key, void *datum, void *p)
 	return 0;
 }
 
+static int role_tr_destroy(void *key, void *datum, void *p)
+{
+	kfree(key);
+	kfree(datum);
+	return 0;
+}
+
 static void ocontext_destroy(struct ocontext *c, int i)
 {
 	if (!c)
@@ -388,7 +400,7 @@ static int roles_init(struct policydb *p)
 	if (!key)
 		goto out;
 
-	rc = hashtab_insert(p->p_roles.table, key, role);
+	rc = hashtab_insert(&p->p_roles.table, key, role);
 	if (rc)
 		goto out;
 
@@ -458,26 +470,43 @@ static int rangetr_cmp(struct hashtab *h, const void *k1, const void *k2)
 	return v;
 }
 
+static u32 role_trans_hash(struct hashtab *h, const void *k)
+{
+	const struct role_trans_key *key = k;
+
+	return (key->role + (key->type << 3) + (key->tclass << 5)) &
+		(h->size - 1);
+}
+
+static int role_trans_cmp(struct hashtab *h, const void *k1, const void *k2)
+{
+	const struct role_trans_key *key1 = k1, *key2 = k2;
+	int v;
+
+	v = key1->role - key2->role;
+	if (v)
+		return v;
+
+	v = key1->type - key2->type;
+	if (v)
+		return v;
+
+	return key1->tclass - key2->tclass;
+}
+
 /*
  * Initialize a policy database structure.
  */
-static int policydb_init(struct policydb *p)
+static void policydb_init(struct policydb *p)
 {
 	memset(p, 0, sizeof(*p));
 
 	avtab_init(&p->te_avtab);
 	cond_policydb_init(p);
 
-	p->filename_trans = hashtab_create(filenametr_hash, filenametr_cmp,
-					   (1 << 11));
-	if (!p->filename_trans)
-		return -ENOMEM;
-
 	ebitmap_init(&p->filename_trans_ttypes);
 	ebitmap_init(&p->policycaps);
 	ebitmap_init(&p->permissive_map);
-
-	return 0;
 }
 
 /*
@@ -639,7 +668,7 @@ static void symtab_hash_eval(struct symtab *s)
 	int i;
 
 	for (i = 0; i < SYM_NUM; i++)
-		hash_eval(s[i].table, symtab_name[i]);
+		hash_eval(&s[i].table, symtab_name[i]);
 }
 
 #else
@@ -710,7 +739,7 @@ static int policydb_index(struct policydb *p)
 		if (!p->sym_val_to_name[i])
 			return -ENOMEM;
 
-		rc = hashtab_map(p->symtab[i].table, index_f[i], p);
+		rc = hashtab_map(&p->symtab[i].table, index_f[i], p);
 		if (rc)
 			goto out;
 	}
@@ -728,12 +757,11 @@ void policydb_destroy(struct policydb *p)
 	struct genfs *g, *gtmp;
 	int i;
 	struct role_allow *ra, *lra = NULL;
-	struct role_trans *tr, *ltr = NULL;
 
 	for (i = 0; i < SYM_NUM; i++) {
 		cond_resched();
-		hashtab_map(p->symtab[i].table, destroy_f[i], NULL);
-		hashtab_destroy(p->symtab[i].table);
+		hashtab_map(&p->symtab[i].table, destroy_f[i], NULL);
+		hashtab_destroy(&p->symtab[i].table);
 	}
 
 	for (i = 0; i < SYM_NUM; i++)
@@ -775,12 +803,8 @@ void policydb_destroy(struct policydb *p)
 
 	cond_policydb_destroy(p);
 
-	for (tr = p->role_tr; tr; tr = tr->next) {
-		cond_resched();
-		kfree(ltr);
-		ltr = tr;
-	}
-	kfree(ltr);
+	hashtab_map(&p->role_tr, role_tr_destroy, NULL);
+	hashtab_destroy(&p->role_tr);
 
 	for (ra = p->role_allow; ra; ra = ra->next) {
 		cond_resched();
@@ -789,11 +813,11 @@ void policydb_destroy(struct policydb *p)
 	}
 	kfree(lra);
 
-	hashtab_map(p->filename_trans, filenametr_destroy, NULL);
-	hashtab_destroy(p->filename_trans);
+	hashtab_map(&p->filename_trans, filenametr_destroy, NULL);
+	hashtab_destroy(&p->filename_trans);
 
-	hashtab_map(p->range_tr, range_tr_destroy, NULL);
-	hashtab_destroy(p->range_tr);
+	hashtab_map(&p->range_tr, range_tr_destroy, NULL);
+	hashtab_destroy(&p->range_tr);
 
 	if (p->type_attr_map_array) {
 		for (i = 0; i < p->p_types.nprim; i++)
@@ -836,11 +860,6 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
 		if (!name)
 			continue;
 
-		rc = context_add_hash(p, &c->context[0]);
-		if (rc) {
-			sidtab_destroy(s);
-			goto out;
-		}
 		rc = sidtab_set_initial(s, sid, &c->context[0]);
 		if (rc) {
 			pr_err("SELinux:  unable to load initial SID %s.\n",
@@ -1109,7 +1128,7 @@ static int common_read(struct policydb *p, struct hashtab *h, void *fp)
 		goto bad;
 
 	for (i = 0; i < nel; i++) {
-		rc = perm_read(p, comdatum->permissions.table, fp);
+		rc = perm_read(p, &comdatum->permissions.table, fp);
 		if (rc)
 			goto bad;
 	}
@@ -1281,7 +1300,8 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp)
 			goto bad;
 
 		rc = -EINVAL;
-		cladatum->comdatum = hashtab_search(p->p_commons.table, cladatum->comkey);
+		cladatum->comdatum = hashtab_search(&p->p_commons.table,
+						    cladatum->comkey);
 		if (!cladatum->comdatum) {
 			pr_err("SELinux:  unknown common %s\n",
 			       cladatum->comkey);
@@ -1289,7 +1309,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp)
 		}
 	}
 	for (i = 0; i < nel; i++) {
-		rc = perm_read(p, cladatum->permissions.table, fp);
+		rc = perm_read(p, &cladatum->permissions.table, fp);
 		if (rc)
 			goto bad;
 	}
@@ -1712,18 +1732,15 @@ static int policydb_bounds_sanity_check(struct policydb *p)
 	if (p->policyvers < POLICYDB_VERSION_BOUNDARY)
 		return 0;
 
-	rc = hashtab_map(p->p_users.table,
-			 user_bounds_sanity_check, p);
+	rc = hashtab_map(&p->p_users.table, user_bounds_sanity_check, p);
 	if (rc)
 		return rc;
 
-	rc = hashtab_map(p->p_roles.table,
-			 role_bounds_sanity_check, p);
+	rc = hashtab_map(&p->p_roles.table, role_bounds_sanity_check, p);
 	if (rc)
 		return rc;
 
-	rc = hashtab_map(p->p_types.table,
-			 type_bounds_sanity_check, p);
+	rc = hashtab_map(&p->p_types.table, type_bounds_sanity_check, p);
 	if (rc)
 		return rc;
 
@@ -1734,7 +1751,7 @@ u16 string_to_security_class(struct policydb *p, const char *name)
 {
 	struct class_datum *cladatum;
 
-	cladatum = hashtab_search(p->p_classes.table, name);
+	cladatum = hashtab_search(&p->p_classes.table, name);
 	if (!cladatum)
 		return 0;
 
@@ -1753,11 +1770,9 @@ u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name)
 	cladatum = p->class_val_to_struct[tclass-1];
 	comdatum = cladatum->comdatum;
 	if (comdatum)
-		perdatum = hashtab_search(comdatum->permissions.table,
-					  name);
+		perdatum = hashtab_search(&comdatum->permissions.table, name);
 	if (!perdatum)
-		perdatum = hashtab_search(cladatum->permissions.table,
-					  name);
+		perdatum = hashtab_search(&cladatum->permissions.table, name);
 	if (!perdatum)
 		return 0;
 
@@ -1781,9 +1796,9 @@ static int range_read(struct policydb *p, void *fp)
 
 	nel = le32_to_cpu(buf[0]);
 
-	p->range_tr = hashtab_create(rangetr_hash, rangetr_cmp, nel);
-	if (!p->range_tr)
-		return -ENOMEM;
+	rc = hashtab_init(&p->range_tr, rangetr_hash, rangetr_cmp, nel);
+	if (rc)
+		return rc;
 
 	for (i = 0; i < nel; i++) {
 		rc = -ENOMEM;
@@ -1826,14 +1841,14 @@ static int range_read(struct policydb *p, void *fp)
 			goto out;
 		}
 
-		rc = hashtab_insert(p->range_tr, rt, r);
+		rc = hashtab_insert(&p->range_tr, rt, r);
 		if (rc)
 			goto out;
 
 		rt = NULL;
 		r = NULL;
 	}
-	hash_eval(p->range_tr, "rangetr");
+	hash_eval(&p->range_tr, "rangetr");
 	rc = 0;
 out:
 	kfree(rt);
@@ -1841,7 +1856,7 @@ out:
 	return rc;
 }
 
-static int filename_trans_read_one(struct policydb *p, void *fp)
+static int filename_trans_read_helper_compat(struct policydb *p, void *fp)
 {
 	struct filename_trans_key key, *ft = NULL;
 	struct filename_trans_datum *last, *datum = NULL;
@@ -1873,7 +1888,7 @@ static int filename_trans_read_one(struct policydb *p, void *fp)
 	otype = le32_to_cpu(buf[3]);
 
 	last = NULL;
-	datum = hashtab_search(p->filename_trans, &key);
+	datum = hashtab_search(&p->filename_trans, &key);
 	while (datum) {
 		if (unlikely(ebitmap_get_bit(&datum->stypes, stype - 1))) {
 			/* conflicting/duplicate rules are ignored */
@@ -1903,7 +1918,7 @@ static int filename_trans_read_one(struct policydb *p, void *fp)
 			if (!ft)
 				goto out;
 
-			rc = hashtab_insert(p->filename_trans, ft, datum);
+			rc = hashtab_insert(&p->filename_trans, ft, datum);
 			if (rc)
 				goto out;
 			name = NULL;
@@ -1924,6 +1939,94 @@ out:
 	return rc;
 }
 
+static int filename_trans_read_helper(struct policydb *p, void *fp)
+{
+	struct filename_trans_key *ft = NULL;
+	struct filename_trans_datum **dst, *datum, *first = NULL;
+	char *name = NULL;
+	u32 len, ttype, tclass, ndatum, i;
+	__le32 buf[3];
+	int rc;
+
+	/* length of the path component string */
+	rc = next_entry(buf, fp, sizeof(u32));
+	if (rc)
+		return rc;
+	len = le32_to_cpu(buf[0]);
+
+	/* path component string */
+	rc = str_read(&name, GFP_KERNEL, fp, len);
+	if (rc)
+		return rc;
+
+	rc = next_entry(buf, fp, sizeof(u32) * 3);
+	if (rc)
+		goto out;
+
+	ttype = le32_to_cpu(buf[0]);
+	tclass = le32_to_cpu(buf[1]);
+
+	ndatum = le32_to_cpu(buf[2]);
+	if (ndatum == 0) {
+		pr_err("SELinux:  Filename transition key with no datum\n");
+		rc = -ENOENT;
+		goto out;
+	}
+
+	dst = &first;
+	for (i = 0; i < ndatum; i++) {
+		rc = -ENOMEM;
+		datum = kmalloc(sizeof(*datum), GFP_KERNEL);
+		if (!datum)
+			goto out;
+
+		*dst = datum;
+
+		/* ebitmap_read() will at least init the bitmap */
+		rc = ebitmap_read(&datum->stypes, fp);
+		if (rc)
+			goto out;
+
+		rc = next_entry(buf, fp, sizeof(u32));
+		if (rc)
+			goto out;
+
+		datum->otype = le32_to_cpu(buf[0]);
+		datum->next = NULL;
+
+		dst = &datum->next;
+	}
+
+	rc = -ENOMEM;
+	ft = kmalloc(sizeof(*ft), GFP_KERNEL);
+	if (!ft)
+		goto out;
+
+	ft->ttype = ttype;
+	ft->tclass = tclass;
+	ft->name = name;
+
+	rc = hashtab_insert(&p->filename_trans, ft, first);
+	if (rc == -EEXIST)
+		pr_err("SELinux:  Duplicate filename transition key\n");
+	if (rc)
+		goto out;
+
+	return ebitmap_set_bit(&p->filename_trans_ttypes, ttype, 1);
+
+out:
+	kfree(ft);
+	kfree(name);
+	while (first) {
+		datum = first;
+		first = first->next;
+
+		ebitmap_destroy(&datum->stypes);
+		kfree(datum);
+	}
+	return rc;
+}
+
 static int filename_trans_read(struct policydb *p, void *fp)
 {
 	u32 nel;
@@ -1938,14 +2041,32 @@ static int filename_trans_read(struct policydb *p, void *fp)
 		return rc;
 	nel = le32_to_cpu(buf[0]);
 
-	p->filename_trans_count = nel;
+	if (p->policyvers < POLICYDB_VERSION_COMP_FTRANS) {
+		p->compat_filename_trans_count = nel;
 
-	for (i = 0; i < nel; i++) {
-		rc = filename_trans_read_one(p, fp);
+		rc = hashtab_init(&p->filename_trans, filenametr_hash,
+				  filenametr_cmp, (1 << 11));
 		if (rc)
 			return rc;
+
+		for (i = 0; i < nel; i++) {
+			rc = filename_trans_read_helper_compat(p, fp);
+			if (rc)
+				return rc;
+		}
+	} else {
+		rc = hashtab_init(&p->filename_trans, filenametr_hash,
+				  filenametr_cmp, nel);
+		if (rc)
+			return rc;
+
+		for (i = 0; i < nel; i++) {
+			rc = filename_trans_read_helper(p, fp);
+			if (rc)
+				return rc;
+		}
 	}
-	hash_eval(p->filename_trans, "filenametr");
+	hash_eval(&p->filename_trans, "filenametr");
 	return 0;
 }
 
@@ -2251,7 +2372,8 @@ out:
 int policydb_read(struct policydb *p, void *fp)
 {
 	struct role_allow *ra, *lra;
-	struct role_trans *tr, *ltr;
+	struct role_trans_key *rtk = NULL;
+	struct role_trans_datum *rtd = NULL;
 	int i, j, rc;
 	__le32 buf[4];
 	u32 len, nprim, nel;
@@ -2259,9 +2381,7 @@ int policydb_read(struct policydb *p, void *fp)
 	char *policydb_str;
 	struct policydb_compat_info *info;
 
-	rc = policydb_init(p);
-	if (rc)
-		return rc;
+	policydb_init(p);
 
 	/* Read the magic number and string length. */
 	rc = next_entry(buf, fp, sizeof(u32) * 2);
@@ -2389,7 +2509,7 @@ int policydb_read(struct policydb *p, void *fp)
 		}
 
 		for (j = 0; j < nel; j++) {
-			rc = read_f[i](p, p->symtab[i].table, fp);
+			rc = read_f[i](p, &p->symtab[i].table, fp);
 			if (rc)
 				goto bad;
 		}
@@ -2416,39 +2536,50 @@ int policydb_read(struct policydb *p, void *fp)
 	if (rc)
 		goto bad;
 	nel = le32_to_cpu(buf[0]);
-	ltr = NULL;
+
+	rc = hashtab_init(&p->role_tr, role_trans_hash, role_trans_cmp, nel);
+	if (rc)
+		goto bad;
 	for (i = 0; i < nel; i++) {
 		rc = -ENOMEM;
-		tr = kzalloc(sizeof(*tr), GFP_KERNEL);
-		if (!tr)
+		rtk = kmalloc(sizeof(*rtk), GFP_KERNEL);
+		if (!rtk)
 			goto bad;
-		if (ltr)
-			ltr->next = tr;
-		else
-			p->role_tr = tr;
+
+		rc = -ENOMEM;
+		rtd = kmalloc(sizeof(*rtd), GFP_KERNEL);
+		if (!rtd)
+			goto bad;
+
 		rc = next_entry(buf, fp, sizeof(u32)*3);
 		if (rc)
 			goto bad;
 
 		rc = -EINVAL;
-		tr->role = le32_to_cpu(buf[0]);
-		tr->type = le32_to_cpu(buf[1]);
-		tr->new_role = le32_to_cpu(buf[2]);
+		rtk->role = le32_to_cpu(buf[0]);
+		rtk->type = le32_to_cpu(buf[1]);
+		rtd->new_role = le32_to_cpu(buf[2]);
 		if (p->policyvers >= POLICYDB_VERSION_ROLETRANS) {
 			rc = next_entry(buf, fp, sizeof(u32));
 			if (rc)
 				goto bad;
-			tr->tclass = le32_to_cpu(buf[0]);
+			rtk->tclass = le32_to_cpu(buf[0]);
 		} else
-			tr->tclass = p->process_class;
+			rtk->tclass = p->process_class;
 
 		rc = -EINVAL;
-		if (!policydb_role_isvalid(p, tr->role) ||
-		    !policydb_type_isvalid(p, tr->type) ||
-		    !policydb_class_isvalid(p, tr->tclass) ||
-		    !policydb_role_isvalid(p, tr->new_role))
+		if (!policydb_role_isvalid(p, rtk->role) ||
+		    !policydb_type_isvalid(p, rtk->type) ||
+		    !policydb_class_isvalid(p, rtk->tclass) ||
+		    !policydb_role_isvalid(p, rtd->new_role))
 			goto bad;
-		ltr = tr;
+
+		rc = hashtab_insert(&p->role_tr, rtk, rtd);
+		if (rc)
+			goto bad;
+
+		rtk = NULL;
+		rtd = NULL;
 	}
 
 	rc = next_entry(buf, fp, sizeof(u32));
@@ -2504,6 +2635,7 @@ int policydb_read(struct policydb *p, void *fp)
 	if (rc)
 		goto bad;
 
+	rc = -ENOMEM;
 	p->type_attr_map_array = kvcalloc(p->p_types.nprim,
 					  sizeof(*p->type_attr_map_array),
 					  GFP_KERNEL);
@@ -2536,6 +2668,8 @@ int policydb_read(struct policydb *p, void *fp)
 out:
 	return rc;
 bad:
+	kfree(rtk);
+	kfree(rtd);
 	policydb_destroy(p);
 	goto out;
 }
@@ -2653,39 +2787,45 @@ static int cat_write(void *vkey, void *datum, void *ptr)
 	return 0;
 }
 
-static int role_trans_write(struct policydb *p, void *fp)
+static int role_trans_write_one(void *key, void *datum, void *ptr)
 {
-	struct role_trans *r = p->role_tr;
-	struct role_trans *tr;
+	struct role_trans_key *rtk = key;
+	struct role_trans_datum *rtd = datum;
+	struct policy_data *pd = ptr;
+	void *fp = pd->fp;
+	struct policydb *p = pd->p;
 	__le32 buf[3];
-	size_t nel;
 	int rc;
 
-	nel = 0;
-	for (tr = r; tr; tr = tr->next)
-		nel++;
-	buf[0] = cpu_to_le32(nel);
-	rc = put_entry(buf, sizeof(u32), 1, fp);
+	buf[0] = cpu_to_le32(rtk->role);
+	buf[1] = cpu_to_le32(rtk->type);
+	buf[2] = cpu_to_le32(rtd->new_role);
+	rc = put_entry(buf, sizeof(u32), 3, fp);
 	if (rc)
 		return rc;
-	for (tr = r; tr; tr = tr->next) {
-		buf[0] = cpu_to_le32(tr->role);
-		buf[1] = cpu_to_le32(tr->type);
-		buf[2] = cpu_to_le32(tr->new_role);
-		rc = put_entry(buf, sizeof(u32), 3, fp);
+	if (p->policyvers >= POLICYDB_VERSION_ROLETRANS) {
+		buf[0] = cpu_to_le32(rtk->tclass);
+		rc = put_entry(buf, sizeof(u32), 1, fp);
 		if (rc)
 			return rc;
-		if (p->policyvers >= POLICYDB_VERSION_ROLETRANS) {
-			buf[0] = cpu_to_le32(tr->tclass);
-			rc = put_entry(buf, sizeof(u32), 1, fp);
-			if (rc)
-				return rc;
-		}
 	}
-
 	return 0;
 }
 
+static int role_trans_write(struct policydb *p, void *fp)
+{
+	struct policy_data pd = { .p = p, .fp = fp };
+	__le32 buf[1];
+	int rc;
+
+	buf[0] = cpu_to_le32(p->role_tr.nel);
+	rc = put_entry(buf, sizeof(u32), 1, fp);
+	if (rc)
+		return rc;
+
+	return hashtab_map(&p->role_tr, role_trans_write_one, &pd);
+}
+
 static int role_allow_write(struct role_allow *r, void *fp)
 {
 	struct role_allow *ra;
@@ -2777,7 +2917,7 @@ static int common_write(void *vkey, void *datum, void *ptr)
 	buf[0] = cpu_to_le32(len);
 	buf[1] = cpu_to_le32(comdatum->value);
 	buf[2] = cpu_to_le32(comdatum->permissions.nprim);
-	buf[3] = cpu_to_le32(comdatum->permissions.table->nel);
+	buf[3] = cpu_to_le32(comdatum->permissions.table.nel);
 	rc = put_entry(buf, sizeof(u32), 4, fp);
 	if (rc)
 		return rc;
@@ -2786,7 +2926,7 @@ static int common_write(void *vkey, void *datum, void *ptr)
 	if (rc)
 		return rc;
 
-	rc = hashtab_map(comdatum->permissions.table, perm_write, fp);
+	rc = hashtab_map(&comdatum->permissions.table, perm_write, fp);
 	if (rc)
 		return rc;
 
@@ -2885,10 +3025,7 @@ static int class_write(void *vkey, void *datum, void *ptr)
 	buf[1] = cpu_to_le32(len2);
 	buf[2] = cpu_to_le32(cladatum->value);
 	buf[3] = cpu_to_le32(cladatum->permissions.nprim);
-	if (cladatum->permissions.table)
-		buf[4] = cpu_to_le32(cladatum->permissions.table->nel);
-	else
-		buf[4] = 0;
+	buf[4] = cpu_to_le32(cladatum->permissions.table.nel);
 	buf[5] = cpu_to_le32(ncons);
 	rc = put_entry(buf, sizeof(u32), 6, fp);
 	if (rc)
@@ -2904,7 +3041,7 @@ static int class_write(void *vkey, void *datum, void *ptr)
 			return rc;
 	}
 
-	rc = hashtab_map(cladatum->permissions.table, perm_write, fp);
+	rc = hashtab_map(&cladatum->permissions.table, perm_write, fp);
 	if (rc)
 		return rc;
 
@@ -3262,14 +3399,6 @@ static int genfs_write(struct policydb *p, void *fp)
 	return 0;
 }
 
-static int hashtab_cnt(void *key, void *data, void *ptr)
-{
-	int *cnt = ptr;
-	*cnt = *cnt + 1;
-
-	return 0;
-}
-
 static int range_write_helper(void *key, void *data, void *ptr)
 {
 	__le32 buf[2];
@@ -3301,32 +3430,26 @@ static int range_write_helper(void *key, void *data, void *ptr)
 static int range_write(struct policydb *p, void *fp)
 {
 	__le32 buf[1];
-	int rc, nel;
+	int rc;
 	struct policy_data pd;
 
 	pd.p = p;
 	pd.fp = fp;
 
-	/* count the number of entries in the hashtab */
-	nel = 0;
-	rc = hashtab_map(p->range_tr, hashtab_cnt, &nel);
-	if (rc)
-		return rc;
-
-	buf[0] = cpu_to_le32(nel);
+	buf[0] = cpu_to_le32(p->range_tr.nel);
 	rc = put_entry(buf, sizeof(u32), 1, fp);
 	if (rc)
 		return rc;
 
 	/* actually write all of the entries */
-	rc = hashtab_map(p->range_tr, range_write_helper, &pd);
+	rc = hashtab_map(&p->range_tr, range_write_helper, &pd);
 	if (rc)
 		return rc;
 
 	return 0;
 }
 
-static int filename_write_helper(void *key, void *data, void *ptr)
+static int filename_write_helper_compat(void *key, void *data, void *ptr)
 {
 	struct filename_trans_key *ft = key;
 	struct filename_trans_datum *datum = data;
@@ -3363,26 +3486,82 @@ static int filename_write_helper(void *key, void *data, void *ptr)
 	return 0;
 }
 
-static int filename_trans_write(struct policydb *p, void *fp)
+static int filename_write_helper(void *key, void *data, void *ptr)
 {
-	__le32 buf[1];
+	struct filename_trans_key *ft = key;
+	struct filename_trans_datum *datum;
+	void *fp = ptr;
+	__le32 buf[3];
 	int rc;
+	u32 ndatum, len = strlen(ft->name);
 
-	if (p->policyvers < POLICYDB_VERSION_FILENAME_TRANS)
-		return 0;
-
-	buf[0] = cpu_to_le32(p->filename_trans_count);
+	buf[0] = cpu_to_le32(len);
 	rc = put_entry(buf, sizeof(u32), 1, fp);
 	if (rc)
 		return rc;
 
-	rc = hashtab_map(p->filename_trans, filename_write_helper, fp);
+	rc = put_entry(ft->name, sizeof(char), len, fp);
 	if (rc)
 		return rc;
 
+	ndatum = 0;
+	datum = data;
+	do {
+		ndatum++;
+		datum = datum->next;
+	} while (unlikely(datum));
+
+	buf[0] = cpu_to_le32(ft->ttype);
+	buf[1] = cpu_to_le32(ft->tclass);
+	buf[2] = cpu_to_le32(ndatum);
+	rc = put_entry(buf, sizeof(u32), 3, fp);
+	if (rc)
+		return rc;
+
+	datum = data;
+	do {
+		rc = ebitmap_write(&datum->stypes, fp);
+		if (rc)
+			return rc;
+
+		buf[0] = cpu_to_le32(datum->otype);
+		rc = put_entry(buf, sizeof(u32), 1, fp);
+		if (rc)
+			return rc;
+
+		datum = datum->next;
+	} while (unlikely(datum));
+
 	return 0;
 }
 
+static int filename_trans_write(struct policydb *p, void *fp)
+{
+	__le32 buf[1];
+	int rc;
+
+	if (p->policyvers < POLICYDB_VERSION_FILENAME_TRANS)
+		return 0;
+
+	if (p->policyvers < POLICYDB_VERSION_COMP_FTRANS) {
+		buf[0] = cpu_to_le32(p->compat_filename_trans_count);
+		rc = put_entry(buf, sizeof(u32), 1, fp);
+		if (rc)
+			return rc;
+
+		rc = hashtab_map(&p->filename_trans,
+				 filename_write_helper_compat, fp);
+	} else {
+		buf[0] = cpu_to_le32(p->filename_trans.nel);
+		rc = put_entry(buf, sizeof(u32), 1, fp);
+		if (rc)
+			return rc;
+
+		rc = hashtab_map(&p->filename_trans, filename_write_helper, fp);
+	}
+	return rc;
+}
+
 /*
  * Write the configuration data in a policy database
  * structure to a policy database binary representation
@@ -3467,12 +3646,12 @@ int policydb_write(struct policydb *p, void *fp)
 		pd.p = p;
 
 		buf[0] = cpu_to_le32(p->symtab[i].nprim);
-		buf[1] = cpu_to_le32(p->symtab[i].table->nel);
+		buf[1] = cpu_to_le32(p->symtab[i].table.nel);
 
 		rc = put_entry(buf, sizeof(u32), 2, fp);
 		if (rc)
 			return rc;
-		rc = hashtab_map(p->symtab[i].table, write_f[i], &pd);
+		rc = hashtab_map(&p->symtab[i].table, write_f[i], &pd);
 		if (rc)
 			return rc;
 	}
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index 72e2932fb12d..9591c9587cb6 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -81,12 +81,14 @@ struct role_datum {
 	struct ebitmap types;		/* set of authorized types for role */
 };
 
-struct role_trans {
+struct role_trans_key {
 	u32 role;		/* current role */
 	u32 type;		/* program executable type, or new object type */
 	u32 tclass;		/* process class, or new object class */
+};
+
+struct role_trans_datum {
 	u32 new_role;		/* new role */
-	struct role_trans *next;
 };
 
 struct filename_trans_key {
@@ -261,14 +263,15 @@ struct policydb {
 	struct avtab te_avtab;
 
 	/* role transitions */
-	struct role_trans *role_tr;
+	struct hashtab role_tr;
 
 	/* file transitions with the last path component */
 	/* quickly exclude lookups when parent ttype has no rules */
 	struct ebitmap filename_trans_ttypes;
 	/* actual set of filename_trans rules */
-	struct hashtab *filename_trans;
-	u32 filename_trans_count;
+	struct hashtab filename_trans;
+	/* only used if policyvers < POLICYDB_VERSION_COMP_FTRANS */
+	u32 compat_filename_trans_count;
 
 	/* bools indexed by (value - 1) */
 	struct cond_bool_datum **bool_val_to_struct;
@@ -291,7 +294,7 @@ struct policydb {
 	struct genfs *genfs;
 
 	/* range transitions table (range_trans_key -> mls_range) */
-	struct hashtab *range_tr;
+	struct hashtab range_tr;
 
 	/* type -> attribute reverse mapping */
 	struct ebitmap *type_attr_map_array;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 8ad34fd031d1..313919bd42f8 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -482,11 +482,11 @@ static void security_dump_masked_av(struct policydb *policydb,
 
 	/* init permission_names */
 	if (common_dat &&
-	    hashtab_map(common_dat->permissions.table,
+	    hashtab_map(&common_dat->permissions.table,
 			dump_masked_av_helper, permission_names) < 0)
 		goto out;
 
-	if (hashtab_map(tclass_dat->permissions.table,
+	if (hashtab_map(&tclass_dat->permissions.table,
 			dump_masked_av_helper, permission_names) < 0)
 		goto out;
 
@@ -1441,7 +1441,7 @@ static int string_to_context_struct(struct policydb *pol,
 
 	*p++ = 0;
 
-	usrdatum = hashtab_search(pol->p_users.table, scontextp);
+	usrdatum = hashtab_search(&pol->p_users.table, scontextp);
 	if (!usrdatum)
 		goto out;
 
@@ -1457,7 +1457,7 @@ static int string_to_context_struct(struct policydb *pol,
 
 	*p++ = 0;
 
-	role = hashtab_search(pol->p_roles.table, scontextp);
+	role = hashtab_search(&pol->p_roles.table, scontextp);
 	if (!role)
 		goto out;
 	ctx->role = role->value;
@@ -1469,7 +1469,7 @@ static int string_to_context_struct(struct policydb *pol,
 	oldc = *p;
 	*p++ = 0;
 
-	typdatum = hashtab_search(pol->p_types.table, scontextp);
+	typdatum = hashtab_search(&pol->p_types.table, scontextp);
 	if (!typdatum || typdatum->attribute)
 		goto out;
 
@@ -1490,42 +1490,6 @@ out:
 	return rc;
 }
 
-int context_add_hash(struct policydb *policydb,
-		     struct context *context)
-{
-	int rc;
-	char *str;
-	int len;
-
-	if (context->str) {
-		context->hash = context_compute_hash(context->str);
-	} else {
-		rc = context_struct_to_string(policydb, context,
-					      &str, &len);
-		if (rc)
-			return rc;
-		context->hash = context_compute_hash(str);
-		kfree(str);
-	}
-	return 0;
-}
-
-static int context_struct_to_sid(struct selinux_state *state,
-				 struct context *context, u32 *sid)
-{
-	int rc;
-	struct sidtab *sidtab = state->ss->sidtab;
-	struct policydb *policydb = &state->ss->policydb;
-
-	if (!context->hash) {
-		rc = context_add_hash(policydb, context);
-		if (rc)
-			return rc;
-	}
-
-	return sidtab_context_to_sid(sidtab, context, sid);
-}
-
 static int security_context_to_sid_core(struct selinux_state *state,
 					const char *scontext, u32 scontext_len,
 					u32 *sid, u32 def_sid, gfp_t gfp_flags,
@@ -1580,7 +1544,7 @@ static int security_context_to_sid_core(struct selinux_state *state,
 		str = NULL;
 	} else if (rc)
 		goto out_unlock;
-	rc = context_struct_to_sid(state, &context, sid);
+	rc = sidtab_context_to_sid(sidtab, &context, sid);
 	context_destroy(&context);
 out_unlock:
 	read_unlock(&state->ss->policy_rwlock);
@@ -1707,7 +1671,7 @@ static void filename_compute_type(struct policydb *policydb,
 	ft.tclass = tclass;
 	ft.name = objname;
 
-	datum = hashtab_search(policydb->filename_trans, &ft);
+	datum = hashtab_search(&policydb->filename_trans, &ft);
 	while (datum) {
 		if (ebitmap_get_bit(&datum->stypes, stype - 1)) {
 			newcontext->type = datum->otype;
@@ -1731,7 +1695,6 @@ static int security_compute_sid(struct selinux_state *state,
 	struct class_datum *cladatum = NULL;
 	struct context *scontext, *tcontext, newcontext;
 	struct sidtab_entry *sentry, *tentry;
-	struct role_trans *roletr = NULL;
 	struct avtab_key avkey;
 	struct avtab_datum *avdatum;
 	struct avtab_node *node;
@@ -1812,7 +1775,7 @@ static int security_compute_sid(struct selinux_state *state,
 	} else if (cladatum && cladatum->default_role == DEFAULT_TARGET) {
 		newcontext.role = tcontext->role;
 	} else {
-		if ((tclass == policydb->process_class) || (sock == true))
+		if ((tclass == policydb->process_class) || sock)
 			newcontext.role = scontext->role;
 		else
 			newcontext.role = OBJECT_R_VAL;
@@ -1824,7 +1787,7 @@ static int security_compute_sid(struct selinux_state *state,
 	} else if (cladatum && cladatum->default_type == DEFAULT_TARGET) {
 		newcontext.type = tcontext->type;
 	} else {
-		if ((tclass == policydb->process_class) || (sock == true)) {
+		if ((tclass == policydb->process_class) || sock) {
 			/* Use the type of process. */
 			newcontext.type = scontext->type;
 		} else {
@@ -1864,16 +1827,16 @@ static int security_compute_sid(struct selinux_state *state,
 	/* Check for class-specific changes. */
 	if (specified & AVTAB_TRANSITION) {
 		/* Look for a role transition rule. */
-		for (roletr = policydb->role_tr; roletr;
-		     roletr = roletr->next) {
-			if ((roletr->role == scontext->role) &&
-			    (roletr->type == tcontext->type) &&
-			    (roletr->tclass == tclass)) {
-				/* Use the role transition rule. */
-				newcontext.role = roletr->new_role;
-				break;
-			}
-		}
+		struct role_trans_datum *rtd;
+		struct role_trans_key rtk = {
+			.role = scontext->role,
+			.type = tcontext->type,
+			.tclass = tclass,
+		};
+
+		rtd = hashtab_search(&policydb->role_tr, &rtk);
+		if (rtd)
+			newcontext.role = rtd->new_role;
 	}
 
 	/* Set the MLS attributes.
@@ -1891,7 +1854,7 @@ static int security_compute_sid(struct selinux_state *state,
 			goto out_unlock;
 	}
 	/* Obtain the sid for the context. */
-	rc = context_struct_to_sid(state, &newcontext, out_sid);
+	rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid);
 out_unlock:
 	read_unlock(&state->ss->policy_rwlock);
 	context_destroy(&newcontext);
@@ -2043,7 +2006,6 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
 			context_init(newc);
 			newc->str = s;
 			newc->len = oldc->len;
-			newc->hash = oldc->hash;
 			return 0;
 		}
 		kfree(s);
@@ -2062,7 +2024,7 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
 
 	/* Convert the user. */
 	rc = -EINVAL;
-	usrdatum = hashtab_search(args->newp->p_users.table,
+	usrdatum = hashtab_search(&args->newp->p_users.table,
 				  sym_name(args->oldp,
 					   SYM_USERS, oldc->user - 1));
 	if (!usrdatum)
@@ -2071,7 +2033,7 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
 
 	/* Convert the role. */
 	rc = -EINVAL;
-	role = hashtab_search(args->newp->p_roles.table,
+	role = hashtab_search(&args->newp->p_roles.table,
 			      sym_name(args->oldp, SYM_ROLES, oldc->role - 1));
 	if (!role)
 		goto bad;
@@ -2079,7 +2041,7 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
 
 	/* Convert the type. */
 	rc = -EINVAL;
-	typdatum = hashtab_search(args->newp->p_types.table,
+	typdatum = hashtab_search(&args->newp->p_types.table,
 				  sym_name(args->oldp,
 					   SYM_TYPES, oldc->type - 1));
 	if (!typdatum)
@@ -2120,10 +2082,6 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
 			goto bad;
 	}
 
-	rc = context_add_hash(args->newp, newc);
-	if (rc)
-		goto bad;
-
 	return 0;
 bad:
 	/* Map old representation to string and save it. */
@@ -2133,7 +2091,6 @@ bad:
 	context_destroy(newc);
 	newc->str = s;
 	newc->len = len;
-	newc->hash = context_compute_hash(s);
 	pr_info("SELinux:  Context %s became invalid (unmapped).\n",
 		newc->str);
 	return 0;
@@ -2350,12 +2307,14 @@ int security_port_sid(struct selinux_state *state,
 		      u8 protocol, u16 port, u32 *out_sid)
 {
 	struct policydb *policydb;
+	struct sidtab *sidtab;
 	struct ocontext *c;
 	int rc = 0;
 
 	read_lock(&state->ss->policy_rwlock);
 
 	policydb = &state->ss->policydb;
+	sidtab = state->ss->sidtab;
 
 	c = policydb->ocontexts[OCON_PORT];
 	while (c) {
@@ -2368,7 +2327,7 @@ int security_port_sid(struct selinux_state *state,
 
 	if (c) {
 		if (!c->sid[0]) {
-			rc = context_struct_to_sid(state, &c->context[0],
+			rc = sidtab_context_to_sid(sidtab, &c->context[0],
 						   &c->sid[0]);
 			if (rc)
 				goto out;
@@ -2393,12 +2352,14 @@ int security_ib_pkey_sid(struct selinux_state *state,
 			 u64 subnet_prefix, u16 pkey_num, u32 *out_sid)
 {
 	struct policydb *policydb;
+	struct sidtab *sidtab;
 	struct ocontext *c;
 	int rc = 0;
 
 	read_lock(&state->ss->policy_rwlock);
 
 	policydb = &state->ss->policydb;
+	sidtab = state->ss->sidtab;
 
 	c = policydb->ocontexts[OCON_IBPKEY];
 	while (c) {
@@ -2412,7 +2373,7 @@ int security_ib_pkey_sid(struct selinux_state *state,
 
 	if (c) {
 		if (!c->sid[0]) {
-			rc = context_struct_to_sid(state,
+			rc = sidtab_context_to_sid(sidtab,
 						   &c->context[0],
 						   &c->sid[0]);
 			if (rc)
@@ -2437,12 +2398,14 @@ int security_ib_endport_sid(struct selinux_state *state,
 			    const char *dev_name, u8 port_num, u32 *out_sid)
 {
 	struct policydb *policydb;
+	struct sidtab *sidtab;
 	struct ocontext *c;
 	int rc = 0;
 
 	read_lock(&state->ss->policy_rwlock);
 
 	policydb = &state->ss->policydb;
+	sidtab = state->ss->sidtab;
 
 	c = policydb->ocontexts[OCON_IBENDPORT];
 	while (c) {
@@ -2457,7 +2420,7 @@ int security_ib_endport_sid(struct selinux_state *state,
 
 	if (c) {
 		if (!c->sid[0]) {
-			rc = context_struct_to_sid(state, &c->context[0],
+			rc = sidtab_context_to_sid(sidtab, &c->context[0],
 						   &c->sid[0]);
 			if (rc)
 				goto out;
@@ -2480,12 +2443,14 @@ int security_netif_sid(struct selinux_state *state,
 		       char *name, u32 *if_sid)
 {
 	struct policydb *policydb;
+	struct sidtab *sidtab;
 	int rc = 0;
 	struct ocontext *c;
 
 	read_lock(&state->ss->policy_rwlock);
 
 	policydb = &state->ss->policydb;
+	sidtab = state->ss->sidtab;
 
 	c = policydb->ocontexts[OCON_NETIF];
 	while (c) {
@@ -2496,11 +2461,11 @@ int security_netif_sid(struct selinux_state *state,
 
 	if (c) {
 		if (!c->sid[0] || !c->sid[1]) {
-			rc = context_struct_to_sid(state, &c->context[0],
+			rc = sidtab_context_to_sid(sidtab, &c->context[0],
 						   &c->sid[0]);
 			if (rc)
 				goto out;
-			rc = context_struct_to_sid(state, &c->context[1],
+			rc = sidtab_context_to_sid(sidtab, &c->context[1],
 						   &c->sid[1]);
 			if (rc)
 				goto out;
@@ -2541,12 +2506,14 @@ int security_node_sid(struct selinux_state *state,
 		      u32 *out_sid)
 {
 	struct policydb *policydb;
+	struct sidtab *sidtab;
 	int rc;
 	struct ocontext *c;
 
 	read_lock(&state->ss->policy_rwlock);
 
 	policydb = &state->ss->policydb;
+	sidtab = state->ss->sidtab;
 
 	switch (domain) {
 	case AF_INET: {
@@ -2588,7 +2555,7 @@ int security_node_sid(struct selinux_state *state,
 
 	if (c) {
 		if (!c->sid[0]) {
-			rc = context_struct_to_sid(state,
+			rc = sidtab_context_to_sid(sidtab,
 						   &c->context[0],
 						   &c->sid[0]);
 			if (rc)
@@ -2656,7 +2623,7 @@ int security_get_user_sids(struct selinux_state *state,
 		goto out_unlock;
 
 	rc = -EINVAL;
-	user = hashtab_search(policydb->p_users.table, username);
+	user = hashtab_search(&policydb->p_users.table, username);
 	if (!user)
 		goto out_unlock;
 
@@ -2672,17 +2639,12 @@ int security_get_user_sids(struct selinux_state *state,
 		usercon.role = i + 1;
 		ebitmap_for_each_positive_bit(&role->types, tnode, j) {
 			usercon.type = j + 1;
-			/*
-			 * The same context struct is reused here so the hash
-			 * must be reset.
-			 */
-			usercon.hash = 0;
 
 			if (mls_setup_user_range(policydb, fromcon, user,
 						 &usercon))
 				continue;
 
-			rc = context_struct_to_sid(state, &usercon, &sid);
+			rc = sidtab_context_to_sid(sidtab, &usercon, &sid);
 			if (rc)
 				goto out_unlock;
 			if (mynel < maxnel) {
@@ -2753,6 +2715,7 @@ static inline int __security_genfs_sid(struct selinux_state *state,
 				       u32 *sid)
 {
 	struct policydb *policydb = &state->ss->policydb;
+	struct sidtab *sidtab = state->ss->sidtab;
 	int len;
 	u16 sclass;
 	struct genfs *genfs;
@@ -2787,7 +2750,7 @@ static inline int __security_genfs_sid(struct selinux_state *state,
 		goto out;
 
 	if (!c->sid[0]) {
-		rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]);
+		rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]);
 		if (rc)
 			goto out;
 	}
@@ -2829,6 +2792,7 @@ int security_genfs_sid(struct selinux_state *state,
 int security_fs_use(struct selinux_state *state, struct super_block *sb)
 {
 	struct policydb *policydb;
+	struct sidtab *sidtab;
 	int rc = 0;
 	struct ocontext *c;
 	struct superblock_security_struct *sbsec = sb->s_security;
@@ -2837,6 +2801,7 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb)
 	read_lock(&state->ss->policy_rwlock);
 
 	policydb = &state->ss->policydb;
+	sidtab = state->ss->sidtab;
 
 	c = policydb->ocontexts[OCON_FSUSE];
 	while (c) {
@@ -2848,7 +2813,7 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb)
 	if (c) {
 		sbsec->behavior = c->v.behavior;
 		if (!c->sid[0]) {
-			rc = context_struct_to_sid(state, &c->context[0],
+			rc = sidtab_context_to_sid(sidtab, &c->context[0],
 						   &c->sid[0]);
 			if (rc)
 				goto out;
@@ -3010,7 +2975,7 @@ static int security_preserve_bools(struct selinux_state *state,
 	if (rc)
 		goto out;
 	for (i = 0; i < nbools; i++) {
-		booldatum = hashtab_search(policydb->p_bools.table, bnames[i]);
+		booldatum = hashtab_search(&policydb->p_bools.table, bnames[i]);
 		if (booldatum)
 			booldatum->state = bvalues[i];
 	}
@@ -3096,7 +3061,7 @@ int security_sid_mls_copy(struct selinux_state *state,
 			goto out_unlock;
 		}
 	}
-	rc = context_struct_to_sid(state, &newcon, new_sid);
+	rc = sidtab_context_to_sid(sidtab, &newcon, new_sid);
 out_unlock:
 	read_unlock(&state->ss->policy_rwlock);
 	context_destroy(&newcon);
@@ -3224,8 +3189,8 @@ int security_get_classes(struct selinux_state *state,
 	if (!*classes)
 		goto out;
 
-	rc = hashtab_map(policydb->p_classes.table, get_classes_callback,
-			*classes);
+	rc = hashtab_map(&policydb->p_classes.table, get_classes_callback,
+			 *classes);
 	if (rc) {
 		int i;
 		for (i = 0; i < *nclasses; i++)
@@ -3261,7 +3226,7 @@ int security_get_permissions(struct selinux_state *state,
 	read_lock(&state->ss->policy_rwlock);
 
 	rc = -EINVAL;
-	match = hashtab_search(policydb->p_classes.table, class);
+	match = hashtab_search(&policydb->p_classes.table, class);
 	if (!match) {
 		pr_err("SELinux: %s:  unrecognized class %s\n",
 			__func__, class);
@@ -3275,14 +3240,14 @@ int security_get_permissions(struct selinux_state *state,
 		goto out;
 
 	if (match->comdatum) {
-		rc = hashtab_map(match->comdatum->permissions.table,
-				get_permissions_callback, *perms);
+		rc = hashtab_map(&match->comdatum->permissions.table,
+				 get_permissions_callback, *perms);
 		if (rc)
 			goto err;
 	}
 
-	rc = hashtab_map(match->permissions.table, get_permissions_callback,
-			*perms);
+	rc = hashtab_map(&match->permissions.table, get_permissions_callback,
+			 *perms);
 	if (rc)
 		goto err;
 
@@ -3400,7 +3365,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
 	case AUDIT_SUBJ_USER:
 	case AUDIT_OBJ_USER:
 		rc = -EINVAL;
-		userdatum = hashtab_search(policydb->p_users.table, rulestr);
+		userdatum = hashtab_search(&policydb->p_users.table, rulestr);
 		if (!userdatum)
 			goto out;
 		tmprule->au_ctxt.user = userdatum->value;
@@ -3408,7 +3373,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
 	case AUDIT_SUBJ_ROLE:
 	case AUDIT_OBJ_ROLE:
 		rc = -EINVAL;
-		roledatum = hashtab_search(policydb->p_roles.table, rulestr);
+		roledatum = hashtab_search(&policydb->p_roles.table, rulestr);
 		if (!roledatum)
 			goto out;
 		tmprule->au_ctxt.role = roledatum->value;
@@ -3416,7 +3381,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
 	case AUDIT_SUBJ_TYPE:
 	case AUDIT_OBJ_TYPE:
 		rc = -EINVAL;
-		typedatum = hashtab_search(policydb->p_types.table, rulestr);
+		typedatum = hashtab_search(&policydb->p_types.table, rulestr);
 		if (!typedatum)
 			goto out;
 		tmprule->au_ctxt.type = typedatum->value;
@@ -3689,7 +3654,7 @@ int security_netlbl_secattr_to_sid(struct selinux_state *state,
 		if (!mls_context_isvalid(policydb, &ctx_new))
 			goto out_free;
 
-		rc = context_struct_to_sid(state, &ctx_new, sid);
+		rc = sidtab_context_to_sid(sidtab, &ctx_new, sid);
 		if (rc)
 			goto out_free;
 
diff --git a/security/selinux/ss/services.h b/security/selinux/ss/services.h
index e9bddf33e53d..a06f3d835216 100644
--- a/security/selinux/ss/services.h
+++ b/security/selinux/ss/services.h
@@ -8,7 +8,6 @@
 #define _SS_SERVICES_H_
 
 #include "policydb.h"
-#include "context.h"
 
 /* Mapping for a single class */
 struct selinux_mapping {
@@ -37,6 +36,4 @@ void services_compute_xperms_drivers(struct extended_perms *xperms,
 void services_compute_xperms_decision(struct extended_perms_decision *xpermd,
 					struct avtab_node *node);
 
-int context_add_hash(struct policydb *policydb, struct context *context);
-
 #endif	/* _SS_SERVICES_H_ */
diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c
index f511ffccb131..eb6d27b5aeb4 100644
--- a/security/selinux/ss/sidtab.c
+++ b/security/selinux/ss/sidtab.c
@@ -54,14 +54,15 @@ int sidtab_init(struct sidtab *s)
 	return 0;
 }
 
-static u32 context_to_sid(struct sidtab *s, struct context *context)
+static u32 context_to_sid(struct sidtab *s, struct context *context, u32 hash)
 {
 	struct sidtab_entry *entry;
 	u32 sid = 0;
 
 	rcu_read_lock();
-	hash_for_each_possible_rcu(s->context_to_sid, entry, list,
-				   context->hash) {
+	hash_for_each_possible_rcu(s->context_to_sid, entry, list, hash) {
+		if (entry->hash != hash)
+			continue;
 		if (context_cmp(&entry->context, context)) {
 			sid = entry->sid;
 			break;
@@ -74,6 +75,7 @@ static u32 context_to_sid(struct sidtab *s, struct context *context)
 int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context)
 {
 	struct sidtab_isid_entry *isid;
+	u32 hash;
 	int rc;
 
 	if (sid == 0 || sid > SECINITSID_NUM)
@@ -90,15 +92,18 @@ int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context)
 #endif
 	isid->set = 1;
 
+	hash = context_compute_hash(context);
+
 	/*
 	 * Multiple initial sids may map to the same context. Check that this
 	 * context is not already represented in the context_to_sid hashtable
 	 * to avoid duplicate entries and long linked lists upon hash
 	 * collision.
 	 */
-	if (!context_to_sid(s, context)) {
+	if (!context_to_sid(s, context, hash)) {
 		isid->entry.sid = sid;
-		hash_add(s->context_to_sid, &isid->entry.list, context->hash);
+		isid->entry.hash = hash;
+		hash_add(s->context_to_sid, &isid->entry.list, hash);
 	}
 
 	return 0;
@@ -259,12 +264,12 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context,
 			  u32 *sid)
 {
 	unsigned long flags;
-	u32 count;
+	u32 count, hash = context_compute_hash(context);
 	struct sidtab_convert_params *convert;
 	struct sidtab_entry *dst, *dst_convert;
 	int rc;
 
-	*sid = context_to_sid(s, context);
+	*sid = context_to_sid(s, context, hash);
 	if (*sid)
 		return 0;
 
@@ -272,12 +277,11 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context,
 	spin_lock_irqsave(&s->lock, flags);
 
 	rc = 0;
-	*sid = context_to_sid(s, context);
+	*sid = context_to_sid(s, context, hash);
 	if (*sid)
 		goto out_unlock;
 
-	/* read entries only after reading count */
-	count = smp_load_acquire(&s->count);
+	count = s->count;
 	convert = s->convert;
 
 	/* bail out if we already reached max entries */
@@ -292,6 +296,7 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context,
 		goto out_unlock;
 
 	dst->sid = index_to_sid(count);
+	dst->hash = hash;
 
 	rc = context_cpy(&dst->context, context);
 	if (rc)
@@ -316,10 +321,11 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context,
 			goto out_unlock;
 		}
 		dst_convert->sid = index_to_sid(count);
+		dst_convert->hash = context_compute_hash(&dst_convert->context);
 		convert->target->count = count + 1;
 
 		hash_add_rcu(convert->target->context_to_sid,
-			     &dst_convert->list, dst_convert->context.hash);
+			     &dst_convert->list, dst_convert->hash);
 	}
 
 	if (context->len)
@@ -330,7 +336,7 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context,
 
 	/* write entries before updating count */
 	smp_store_release(&s->count, count + 1);
-	hash_add_rcu(s->context_to_sid, &dst->list, dst->context.hash);
+	hash_add_rcu(s->context_to_sid, &dst->list, dst->hash);
 
 	rc = 0;
 out_unlock:
@@ -346,10 +352,9 @@ static void sidtab_convert_hashtable(struct sidtab *s, u32 count)
 	for (i = 0; i < count; i++) {
 		entry = sidtab_do_lookup(s, i, 0);
 		entry->sid = index_to_sid(i);
+		entry->hash = context_compute_hash(&entry->context);
 
-		hash_add_rcu(s->context_to_sid, &entry->list,
-			     entry->context.hash);
-
+		hash_add_rcu(s->context_to_sid, &entry->list, entry->hash);
 	}
 }
 
diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h
index 3311d9f236c0..f2a84560b8b3 100644
--- a/security/selinux/ss/sidtab.h
+++ b/security/selinux/ss/sidtab.h
@@ -19,6 +19,7 @@
 
 struct sidtab_entry {
 	u32 sid;
+	u32 hash;
 	struct context context;
 #if CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE > 0
 	struct sidtab_str_cache __rcu *cache;
diff --git a/security/selinux/ss/symtab.c b/security/selinux/ss/symtab.c
index dc2ce94165d3..92d7a948070e 100644
--- a/security/selinux/ss/symtab.c
+++ b/security/selinux/ss/symtab.c
@@ -35,10 +35,7 @@ static int symcmp(struct hashtab *h, const void *key1, const void *key2)
 
 int symtab_init(struct symtab *s, unsigned int size)
 {
-	s->table = hashtab_create(symhash, symcmp, size);
-	if (!s->table)
-		return -ENOMEM;
 	s->nprim = 0;
-	return 0;
+	return hashtab_init(&s->table, symhash, symcmp, size);
 }
 
diff --git a/security/selinux/ss/symtab.h b/security/selinux/ss/symtab.h
index d75fcafe7281..f145301b9d9f 100644
--- a/security/selinux/ss/symtab.h
+++ b/security/selinux/ss/symtab.h
@@ -13,7 +13,7 @@
 #include "hashtab.h"
 
 struct symtab {
-	struct hashtab *table;	/* hash table (keyed on a string) */
+	struct hashtab table;	/* hash table (keyed on a string) */
 	u32 nprim;		/* number of primary names in table */
 };
 
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index 1b467381986f..c16b8c1b03e7 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -1025,7 +1025,7 @@ static bool tomoyo_select_domain(struct tomoyo_io_buffer *head,
 	if (domain)
 		head->r.domain = &domain->list;
 	else
-		head->r.eof = 1;
+		head->r.eof = true;
 	tomoyo_io_printf(head, "# select %s\n", data);
 	if (domain && domain->is_deleted)
 		tomoyo_io_printf(head, "# This is a deleted domain.\n");
@@ -2662,8 +2662,6 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
 
 	if (!head->write)
 		return -EINVAL;
-	if (!access_ok(buffer, buffer_len))
-		return -EFAULT;
 	if (mutex_lock_interruptible(&head->io_sem))
 		return -EINTR;
 	head->read_user_buf_avail = 0;
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index a83553fbedf0..bea46ed157a6 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -143,7 +143,7 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size,
 		break;
 	case SNDRV_DMA_TYPE_VMALLOC:
 		gfp = snd_mem_get_gfp_flags(device, GFP_KERNEL | __GFP_HIGHMEM);
-		dmab->area = __vmalloc(size, gfp, PAGE_KERNEL);
+		dmab->area = __vmalloc(size, gfp);
 		dmab->addr = 0;
 		break;
 #ifdef CONFIG_HAS_DMA
diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c
index fcab37ea6641..860935e3aea4 100644
--- a/sound/core/pcm_memory.c
+++ b/sound/core/pcm_memory.c
@@ -460,7 +460,7 @@ int _snd_pcm_lib_alloc_vmalloc_buffer(struct snd_pcm_substream *substream,
 			return 0; /* already large enough */
 		vfree(runtime->dma_area);
 	}
-	runtime->dma_area = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+	runtime->dma_area = __vmalloc(size, gfp_flags);
 	if (!runtime->dma_area)
 		return -ENOMEM;
 	runtime->dma_bytes = size;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index aef860256278..47838f57a647 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3093,7 +3093,8 @@ static int snd_pcm_xferi_frames_ioctl(struct snd_pcm_substream *substream,
 		result = snd_pcm_lib_write(substream, xferi.buf, xferi.frames);
 	else
 		result = snd_pcm_lib_read(substream, xferi.buf, xferi.frames);
-	__put_user(result, &_xferi->result);
+	if (put_user(result, &_xferi->result))
+		return -EFAULT;
 	return result < 0 ? result : 0;
 }
 
@@ -3122,7 +3123,8 @@ static int snd_pcm_xfern_frames_ioctl(struct snd_pcm_substream *substream,
 	else
 		result = snd_pcm_lib_readv(substream, bufs, xfern.frames);
 	kfree(bufs);
-	__put_user(result, &_xfern->result);
+	if (put_user(result, &_xfern->result))
+		return -EFAULT;
 	return result < 0 ? result : 0;
 }
 
@@ -3137,7 +3139,8 @@ static int snd_pcm_rewind_ioctl(struct snd_pcm_substream *substream,
 	if (put_user(0, _frames))
 		return -EFAULT;
 	result = snd_pcm_rewind(substream, frames);
-	__put_user(result, _frames);
+	if (put_user(result, _frames))
+		return -EFAULT;
 	return result < 0 ? result : 0;
 }
 
@@ -3152,7 +3155,8 @@ static int snd_pcm_forward_ioctl(struct snd_pcm_substream *substream,
 	if (put_user(0, _frames))
 		return -EFAULT;
 	result = snd_pcm_forward(substream, frames);
-	__put_user(result, _frames);
+	if (put_user(result, _frames))
+		return -EFAULT;
 	return result < 0 ? result : 0;
 }
 
diff --git a/sound/soc/txx9/txx9aclc-ac97.c b/sound/soc/txx9/txx9aclc-ac97.c
index b1d9615f2375..d9e348444bd0 100644
--- a/sound/soc/txx9/txx9aclc-ac97.c
+++ b/sound/soc/txx9/txx9aclc-ac97.c
@@ -14,6 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
+#include <asm/mach-tx39xx/ioremap.h> /* for TXX9_DIRECTMAP_BASE */
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/soc.h>
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index 6e060907c163..d25534940bde 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -58,8 +58,7 @@
 #define ORC_TYPE_CALL			0
 #define ORC_TYPE_REGS			1
 #define ORC_TYPE_REGS_IRET		2
-#define UNWIND_HINT_TYPE_SAVE		3
-#define UNWIND_HINT_TYPE_RESTORE	4
+#define UNWIND_HINT_TYPE_RET_OFFSET	3
 
 #ifndef __ASSEMBLY__
 /*
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c65..43e24903812c 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -400,6 +400,7 @@ struct kvm_sync_regs {
 struct kvm_vmx_nested_state_data {
 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
 	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+	__u64 preemption_timer_deadline;
 };
 
 struct kvm_vmx_nested_state_hdr {
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 3e0c019ef297..3abd4316cd4f 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -98,7 +98,8 @@ FEATURE_TESTS_EXTRA :=                  \
          llvm                           \
          llvm-version                   \
          clang                          \
-         libbpf
+         libbpf                         \
+         libpfm4
 
 FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
 
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 92012381393a..84f845b9627d 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -69,7 +69,8 @@ FILES=                                          \
          test-libaio.bin			\
          test-libzstd.bin			\
          test-clang-bpf-global-var.bin		\
-         test-file-handle.bin
+         test-file-handle.bin			\
+         test-libpfm4.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
@@ -331,6 +332,9 @@ $(OUTPUT)test-clang-bpf-global-var.bin:
 $(OUTPUT)test-file-handle.bin:
 	$(BUILD)
 
+$(OUTPUT)test-libpfm4.bin:
+	$(BUILD) -lpfm
+
 ###############################
 
 clean:
diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 2b0e02c38870..1547bc2c0950 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -4,9 +4,9 @@
 /*
  * Check OpenCSD library version is sufficient to provide required features
  */
-#define OCSD_MIN_VER ((0 << 16) | (11 << 8) | (0))
+#define OCSD_MIN_VER ((0 << 16) | (14 << 8) | (0))
 #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
-#error "OpenCSD >= 0.11.0 is required"
+#error "OpenCSD >= 0.14.0 is required"
 #endif
 
 int main(void)
diff --git a/tools/build/feature/test-libpfm4.c b/tools/build/feature/test-libpfm4.c
new file mode 100644
index 000000000000..af49b259459e
--- /dev/null
+++ b/tools/build/feature/test-libpfm4.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <perfmon/pfmlib.h>
+
+int main(void)
+{
+	pfm_initialize();
+	return 0;
+}
diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py
index 9d8e9613008a..3c21de88af9e 100644
--- a/tools/cgroup/iocost_monitor.py
+++ b/tools/cgroup/iocost_monitor.py
@@ -28,7 +28,8 @@ parser.add_argument('devname', metavar='DEV',
 parser.add_argument('--cgroup', action='append', metavar='REGEX',
                     help='Regex for target cgroups, ')
 parser.add_argument('--interval', '-i', metavar='SECONDS', type=float, default=1,
-                    help='Monitoring interval in seconds')
+                    help='Monitoring interval in seconds (0 exits immediately '
+                    'after checking requirements)')
 parser.add_argument('--json', action='store_true',
                     help='Output in json')
 args = parser.parse_args()
@@ -112,14 +113,14 @@ class IocStat:
 
     def dict(self, now):
         return { 'device'               : devname,
-                 'timestamp'            : str(now),
-                 'enabled'              : str(int(self.enabled)),
-                 'running'              : str(int(self.running)),
-                 'period_ms'            : str(self.period_ms),
-                 'period_at'            : str(self.period_at),
-                 'period_vtime_at'      : str(self.vperiod_at),
-                 'busy_level'           : str(self.busy_level),
-                 'vrate_pct'            : str(self.vrate_pct), }
+                 'timestamp'            : now,
+                 'enabled'              : self.enabled,
+                 'running'              : self.running,
+                 'period_ms'            : self.period_ms,
+                 'period_at'            : self.period_at,
+                 'period_vtime_at'      : self.vperiod_at,
+                 'busy_level'           : self.busy_level,
+                 'vrate_pct'            : self.vrate_pct, }
 
     def table_preamble_str(self):
         state = ('RUN' if self.running else 'IDLE') if self.enabled else 'OFF'
@@ -179,19 +180,19 @@ class IocgStat:
 
     def dict(self, now, path):
         out = { 'cgroup'                : path,
-                'timestamp'             : str(now),
-                'is_active'             : str(int(self.is_active)),
-                'weight'                : str(self.weight),
-                'weight_active'         : str(self.active),
-                'weight_inuse'          : str(self.inuse),
-                'hweight_active_pct'    : str(self.hwa_pct),
-                'hweight_inuse_pct'     : str(self.hwi_pct),
-                'inflight_pct'          : str(self.inflight_pct),
-                'debt_ms'               : str(self.debt_ms),
-                'use_delay'             : str(self.use_delay),
-                'delay_ms'              : str(self.delay_ms),
-                'usage_pct'             : str(self.usage),
-                'address'               : str(hex(self.address)) }
+                'timestamp'             : now,
+                'is_active'             : self.is_active,
+                'weight'                : self.weight,
+                'weight_active'         : self.active,
+                'weight_inuse'          : self.inuse,
+                'hweight_active_pct'    : self.hwa_pct,
+                'hweight_inuse_pct'     : self.hwi_pct,
+                'inflight_pct'          : self.inflight_pct,
+                'debt_ms'               : self.debt_ms,
+                'use_delay'             : self.use_delay,
+                'delay_ms'              : self.delay_ms,
+                'usage_pct'             : self.usage,
+                'address'               : self.address }
         for i in range(len(self.usages)):
             out[f'usage_pct_{i}'] = str(self.usages[i])
         return out
@@ -248,6 +249,9 @@ for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree.address_of_()):
 if ioc is None:
     err(f'Could not find ioc for {devname}');
 
+if interval == 0:
+    sys.exit(0)
+
 # Keep printing
 while True:
     now = time.time()
diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h
index e03b1ea23e0e..30dd21f976c3 100644
--- a/tools/include/linux/rbtree.h
+++ b/tools/include/linux/rbtree.h
@@ -11,7 +11,7 @@
   I know it's not the cleaner way,  but in C (not in C++) to get
   performances and genericity...
 
-  See Documentation/rbtree.txt for documentation and samples.
+  See Documentation/core-api/rbtree.rst for documentation and samples.
 */
 
 #ifndef __TOOLS_LINUX_PERF_RBTREE_H
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
index 381aa948610d..570bb9794421 100644
--- a/tools/include/linux/rbtree_augmented.h
+++ b/tools/include/linux/rbtree_augmented.h
@@ -23,7 +23,7 @@
  * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
  * The rest are implementation details you are not expected to depend on.
  *
- * See Documentation/rbtree.txt for documentation and samples.
+ * See Documentation/core-api/rbtree.rst for documentation and samples.
  */
 
 struct rb_augment_callbacks {
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 428c7dde6b4b..fdd632c833b4 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -116,7 +116,7 @@ struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
-	 * For ARM: See Documentation/virt/kvm/api.txt
+	 * For ARM: See Documentation/virt/kvm/api.rst
 	 */
 	union {
 		__u32 irq;
@@ -1107,7 +1107,7 @@ struct kvm_xen_hvm_config {
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emulation.  See Documentation/virt/kvm/api.txt.
+ * emulation.  See Documentation/virt/kvm/api.rst.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index ad80a5c885d5..d1192783139a 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -148,9 +148,18 @@ struct statx {
 #define STATX_BLOCKS		0x00000400U	/* Want/got stx_blocks */
 #define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
 #define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
-#define STATX_ALL		0x00000fffU	/* All currently supported flags */
+
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
+#ifndef __KERNEL__
+/*
+ * This is deprecated, and shall remain the same value in the future.  To avoid
+ * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME)
+ * instead.
+ */
+#define STATX_ALL		0x00000fffU
+#endif
+
 /*
  * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
  *
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index e83fc8e868f4..d199a3694be8 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -32,6 +32,7 @@ import resource
 import struct
 import re
 import subprocess
+import signal
 from collections import defaultdict, namedtuple
 from functools import reduce
 from datetime import datetime
@@ -228,6 +229,8 @@ IOCTL_NUMBERS = {
     'RESET':       0x00002403,
 }
 
+signal_received = False
+
 ENCODING = locale.getpreferredencoding(False)
 TRACE_FILTER = re.compile(r'^[^\(]*$')
 
@@ -1500,8 +1503,7 @@ class StdFormat(object):
     def get_banner(self):
         return self._banner
 
-    @staticmethod
-    def get_statline(keys, s):
+    def get_statline(self, keys, s):
         res = ''
         for key in keys:
             res += ' %9d' % s[key].delta
@@ -1517,27 +1519,71 @@ class CSVFormat(object):
     def get_banner(self):
         return self._banner
 
-    @staticmethod
-    def get_statline(keys, s):
+    def get_statline(self, keys, s):
         return reduce(lambda res, key: "{},{!s}".format(res, s[key].delta),
                       keys, '')
 
 
 def log(stats, opts, frmt, keys):
     """Prints statistics as reiterating key block, multiple value blocks."""
+    global signal_received
     line = 0
     banner_repeat = 20
+    f = None
+
+    def do_banner(opts):
+        nonlocal f
+        if opts.log_to_file:
+            if not f:
+                try:
+                     f = open(opts.log_to_file, 'a')
+                except (IOError, OSError):
+                    sys.exit("Error: Could not open file: %s" %
+                             opts.log_to_file)
+                if isinstance(frmt, CSVFormat) and f.tell() != 0:
+                    return
+        print(frmt.get_banner(), file=f or sys.stdout)
+
+    def do_statline(opts, values):
+        statline = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + \
+                   frmt.get_statline(keys, values)
+        print(statline, file=f or sys.stdout)
+
+    do_banner(opts)
+    banner_printed = True
     while True:
         try:
             time.sleep(opts.set_delay)
-            if line % banner_repeat == 0:
-                print(frmt.get_banner())
-            print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") +
-                  frmt.get_statline(keys, stats.get()))
-            line += 1
+            if signal_received:
+                banner_printed = True
+                line = 0
+                f.close()
+                do_banner(opts)
+                signal_received = False
+            if (line % banner_repeat == 0 and not banner_printed and
+                not (opts.log_to_file and isinstance(frmt, CSVFormat))):
+                do_banner(opts)
+                banner_printed = True
+            values = stats.get()
+            if (not opts.skip_zero_records or
+                any(values[k].delta != 0 for k in keys)):
+                do_statline(opts, values)
+                line += 1
+                banner_printed = False
         except KeyboardInterrupt:
             break
 
+    if opts.log_to_file:
+        f.close()
+
+
+def handle_signal(sig, frame):
+    global signal_received
+
+    signal_received = True
+
+    return
+
 
 def is_delay_valid(delay):
     """Verify delay is in valid value range."""
@@ -1610,7 +1656,7 @@ Press any other key to refresh statistics immediately.
     argparser.add_argument('-c', '--csv',
                            action='store_true',
                            default=False,
-                           help='log in csv format - requires option -l/--log',
+                           help='log in csv format - requires option -l/-L',
                            )
     argparser.add_argument('-d', '--debugfs',
                            action='store_true',
@@ -1638,6 +1684,11 @@ Press any other key to refresh statistics immediately.
                            default=False,
                            help='run in logging mode (like vmstat)',
                            )
+    argparser.add_argument('-L', '--log-to-file',
+                           type=str,
+                           metavar='FILE',
+                           help="like '--log', but logging to a file"
+                           )
     argparser.add_argument('-p', '--pid',
                            type=int,
                            default=0,
@@ -1655,9 +1706,16 @@ Press any other key to refresh statistics immediately.
                            default=False,
                            help='retrieve statistics from tracepoints',
                            )
+    argparser.add_argument('-z', '--skip-zero-records',
+                           action='store_true',
+                           default=False,
+                           help='omit records with all zeros in logging mode',
+                           )
     options = argparser.parse_args()
-    if options.csv and not options.log:
+    if options.csv and not (options.log or options.log_to_file):
         sys.exit('Error: Option -c/--csv requires -l/--log')
+    if options.skip_zero_records and not (options.log or options.log_to_file):
+        sys.exit('Error: Option -z/--skip-zero-records requires -l/-L')
     try:
         # verify that we were passed a valid regex up front
         re.compile(options.fields)
@@ -1737,7 +1795,9 @@ def main():
         sys.stdout.write('  ' + '\n  '.join(sorted(set(event_list))) + '\n')
         sys.exit(0)
 
-    if options.log:
+    if options.log or options.log_to_file:
+        if options.log_to_file:
+            signal.signal(signal.SIGHUP, handle_signal)
         keys = sorted(stats.get().keys())
         if options.csv:
             frmt = CSVFormat(keys)
diff --git a/tools/kvm/kvm_stat/kvm_stat.service b/tools/kvm/kvm_stat/kvm_stat.service
new file mode 100644
index 000000000000..71aabaffe779
--- /dev/null
+++ b/tools/kvm/kvm_stat/kvm_stat.service
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+[Unit]
+Description=Service that logs KVM kernel module trace events
+Before=qemu-kvm.service
+
+[Service]
+Type=simple
+ExecStart=/usr/bin/kvm_stat -dtcz -s 10 -L /var/log/kvm_stat.csv
+ExecReload=/bin/kill -HUP $MAINPID
+Restart=always
+SyslogIdentifier=kvm_stat
+SyslogLevel=debug
+
+[Install]
+WantedBy=multi-user.target
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index a97ded2aedad..feaf46451e83 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -65,8 +65,10 @@ OPTIONS
 	run in batch mode for one second
 
 -c::
---csv=<file>::
-        log in csv format - requires option -l/--log
+--csv::
+        log in csv format. Requires option -l/--log or -L/--log-to-file.
+        When used with option -L/--log-to-file, the header is only ever
+        written to start of file to preserve the format.
 
 -d::
 --debugfs::
@@ -92,6 +94,11 @@ OPTIONS
 --log::
         run in logging mode (like vmstat)
 
+
+-L<file>::
+--log-to-file=<file>::
+        like -l/--log, but logging to a file. Appends to existing files.
+
 -p<pid>::
 --pid=<pid>::
 	limit statistics to one virtual machine (pid)
@@ -104,6 +111,10 @@ OPTIONS
 --tracepoints::
         retrieve statistics from tracepoints
 
+*z*::
+--skip-zero-records::
+        omit records with all zeros in logging mode
+
 SEE ALSO
 --------
 'perf'(1), 'trace-cmd'(1)
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 027b18f7ed8c..82f53d81a7a7 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -90,6 +90,7 @@ struct fs {
 	const char * const	*mounts;
 	char			 path[PATH_MAX];
 	bool			 found;
+	bool			 checked;
 	long			 magic;
 };
 
@@ -111,31 +112,37 @@ static struct fs fs__entries[] = {
 		.name	= "sysfs",
 		.mounts	= sysfs__fs_known_mountpoints,
 		.magic	= SYSFS_MAGIC,
+		.checked = false,
 	},
 	[FS__PROCFS] = {
 		.name	= "proc",
 		.mounts	= procfs__known_mountpoints,
 		.magic	= PROC_SUPER_MAGIC,
+		.checked = false,
 	},
 	[FS__DEBUGFS] = {
 		.name	= "debugfs",
 		.mounts	= debugfs__known_mountpoints,
 		.magic	= DEBUGFS_MAGIC,
+		.checked = false,
 	},
 	[FS__TRACEFS] = {
 		.name	= "tracefs",
 		.mounts	= tracefs__known_mountpoints,
 		.magic	= TRACEFS_MAGIC,
+		.checked = false,
 	},
 	[FS__HUGETLBFS] = {
 		.name	= "hugetlbfs",
 		.mounts = hugetlbfs__known_mountpoints,
 		.magic	= HUGETLBFS_MAGIC,
+		.checked = false,
 	},
 	[FS__BPF_FS] = {
 		.name	= "bpf",
 		.mounts = bpf_fs__known_mountpoints,
 		.magic	= BPF_FS_MAGIC,
+		.checked = false,
 	},
 };
 
@@ -158,6 +165,7 @@ static bool fs__read_mounts(struct fs *fs)
 	}
 
 	fclose(fp);
+	fs->checked = true;
 	return fs->found = found;
 }
 
@@ -220,6 +228,7 @@ static bool fs__env_override(struct fs *fs)
 		return false;
 
 	fs->found = true;
+	fs->checked = true;
 	strncpy(fs->path, override_path, sizeof(fs->path) - 1);
 	fs->path[sizeof(fs->path) - 1] = '\0';
 	return true;
@@ -246,6 +255,14 @@ static const char *fs__mountpoint(int idx)
 	if (fs->found)
 		return (const char *)fs->path;
 
+	/* the mount point was already checked for the mount point
+	 * but and did not exist, so return NULL to avoid scanning again.
+	 * This makes the found and not found paths cost equivalent
+	 * in case of multiple calls.
+	 */
+	if (fs->checked)
+		return NULL;
+
 	return fs__get_mountpoint(fs);
 }
 
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 936edb95e1f3..aa222ca30311 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -18,6 +18,18 @@
 	const char *name##__mount(void);	\
 	bool name##__configured(void);		\
 
+/*
+ * The xxxx__mountpoint() entry points find the first match mount point for each
+ * filesystems listed below, where xxxx is the filesystem type.
+ *
+ * The interface is as follows:
+ *
+ * - If a mount point is found on first call, it is cached and used for all
+ *   subsequent calls.
+ *
+ * - If a mount point is not found, NULL is returned on first call and all
+ *   subsequent calls.
+ */
 FS(sysfs)
 FS(procfs)
 FS(debugfs)
diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
new file mode 100644
index 000000000000..777c20f6b604
--- /dev/null
+++ b/tools/lib/api/io.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lightweight buffered reading library.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#ifndef __API_IO__
+#define __API_IO__
+
+#include <stdlib.h>
+#include <unistd.h>
+
+struct io {
+	/* File descriptor being read/ */
+	int fd;
+	/* Size of the read buffer. */
+	unsigned int buf_len;
+	/* Pointer to storage for buffering read. */
+	char *buf;
+	/* End of the storage. */
+	char *end;
+	/* Currently accessed data pointer. */
+	char *data;
+	/* Set true on when the end of file on read error. */
+	bool eof;
+};
+
+static inline void io__init(struct io *io, int fd,
+			    char *buf, unsigned int buf_len)
+{
+	io->fd = fd;
+	io->buf_len = buf_len;
+	io->buf = buf;
+	io->end = buf;
+	io->data = buf;
+	io->eof = false;
+}
+
+/* Reads one character from the "io" file with similar semantics to fgetc. */
+static inline int io__get_char(struct io *io)
+{
+	char *ptr = io->data;
+
+	if (io->eof)
+		return -1;
+
+	if (ptr == io->end) {
+		ssize_t n = read(io->fd, io->buf, io->buf_len);
+
+		if (n <= 0) {
+			io->eof = true;
+			return -1;
+		}
+		ptr = &io->buf[0];
+		io->end = &io->buf[n];
+	}
+	io->data = ptr + 1;
+	return *ptr;
+}
+
+/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
+ * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
+ * returns the character after the hexadecimal value which may be -1 for eof.
+ * If the read value is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_hex(struct io *io, __u64 *hex)
+{
+	bool first_read = true;
+
+	*hex = 0;
+	while (true) {
+		int ch = io__get_char(io);
+
+		if (ch < 0)
+			return ch;
+		if (ch >= '0' && ch <= '9')
+			*hex = (*hex << 4) | (ch - '0');
+		else if (ch >= 'a' && ch <= 'f')
+			*hex = (*hex << 4) | (ch - 'a' + 10);
+		else if (ch >= 'A' && ch <= 'F')
+			*hex = (*hex << 4) | (ch - 'A' + 10);
+		else if (first_read)
+			return -2;
+		else
+			return ch;
+		first_read = false;
+	}
+}
+
+/* Read a positive decimal value with out argument dec. If the first character
+ * isn't a decimal returns -2, io->eof returns -1, otherwise returns the
+ * character after the decimal value which may be -1 for eof. If the read value
+ * is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_dec(struct io *io, __u64 *dec)
+{
+	bool first_read = true;
+
+	*dec = 0;
+	while (true) {
+		int ch = io__get_char(io);
+
+		if (ch < 0)
+			return ch;
+		if (ch >= '0' && ch <= '9')
+			*dec = (*dec * 10) + ch - '0';
+		else if (first_read)
+			return -2;
+		else
+			return ch;
+		first_read = false;
+	}
+}
+
+#endif /* __API_IO__ */
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index f93f4e703e4c..ca0215047c32 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -247,7 +247,7 @@ out:
 
 int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
 {
-	if (idx < cpus->nr)
+	if (cpus && idx < cpus->nr)
 		return cpus->map[idx];
 
 	return -1;
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 5b9f2ca50591..6a875a0f01bb 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -11,10 +11,8 @@
 #include <internal/mmap.h>
 #include <internal/cpumap.h>
 #include <internal/threadmap.h>
-#include <internal/xyarray.h>
 #include <internal/lib.h>
 #include <linux/zalloc.h>
-#include <sys/ioctl.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <unistd.h>
@@ -125,8 +123,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
 void perf_evlist__exit(struct perf_evlist *evlist)
 {
 	perf_cpu_map__put(evlist->cpus);
+	perf_cpu_map__put(evlist->all_cpus);
 	perf_thread_map__put(evlist->threads);
 	evlist->cpus = NULL;
+	evlist->all_cpus = NULL;
 	evlist->threads = NULL;
 	fdarray__exit(&evlist->pollfd);
 }
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index af9def589863..d2414144eb8c 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -151,6 +151,8 @@ struct option {
 	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
 #define OPT_CALLBACK(s, l, v, a, h, f) \
 	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f) }
+#define OPT_CALLBACK_SET(s, l, v, os, a, h, f) \
+	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .set = check_vtype(os, bool *)}
 #define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \
 	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
 #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 1a7a9f877095..e335ac2b9e19 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "symbol/kallsyms.h"
+#include "api/io.h"
 #include <stdio.h>
-#include <stdlib.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 
 u8 kallsyms2elf_type(char type)
 {
@@ -15,74 +17,62 @@ bool kallsyms__is_function(char symbol_type)
 	return symbol_type == 'T' || symbol_type == 'W';
 }
 
-/*
- * While we find nice hex chars, build a long_val.
- * Return number of chars processed.
- */
-int hex2u64(const char *ptr, u64 *long_val)
+static void read_to_eol(struct io *io)
 {
-	char *p;
+	int ch;
 
-	*long_val = strtoull(ptr, &p, 16);
-
-	return p - ptr;
+	for (;;) {
+		ch = io__get_char(io);
+		if (ch < 0 || ch == '\n')
+			return;
+	}
 }
 
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start))
 {
-	char *line = NULL;
-	size_t n;
-	int err = -1;
-	FILE *file = fopen(filename, "r");
-
-	if (file == NULL)
-		goto out_failure;
-
-	err = 0;
+	struct io io;
+	char bf[BUFSIZ];
+	int err;
 
-	while (!feof(file)) {
-		u64 start;
-		int line_len, len;
-		char symbol_type;
-		char *symbol_name;
+	io.fd = open(filename, O_RDONLY, 0);
 
-		line_len = getline(&line, &n, file);
-		if (line_len < 0 || !line)
-			break;
+	if (io.fd < 0)
+		return -1;
 
-		line[--line_len] = '\0'; /* \n */
+	io__init(&io, io.fd, bf, sizeof(bf));
 
-		len = hex2u64(line, &start);
+	err = 0;
+	while (!io.eof) {
+		__u64 start;
+		int ch;
+		size_t i;
+		char symbol_type;
+		char symbol_name[KSYM_NAME_LEN + 1];
 
-		/* Skip the line if we failed to parse the address. */
-		if (!len)
+		if (io__get_hex(&io, &start) != ' ') {
+			read_to_eol(&io);
 			continue;
-
-		len++;
-		if (len + 2 >= line_len)
+		}
+		symbol_type = io__get_char(&io);
+		if (io__get_char(&io) != ' ') {
+			read_to_eol(&io);
 			continue;
-
-		symbol_type = line[len];
-		len += 2;
-		symbol_name = line + len;
-		len = line_len - len;
-
-		if (len >= KSYM_NAME_LEN) {
-			err = -1;
-			break;
 		}
+		for (i = 0; i < sizeof(symbol_name); i++) {
+			ch = io__get_char(&io);
+			if (ch < 0 || ch == '\n')
+				break;
+			symbol_name[i]  = ch;
+		}
+		symbol_name[i]  = '\0';
 
 		err = process_symbol(arg, symbol_name, symbol_type, start);
 		if (err)
 			break;
 	}
 
-	free(line);
-	fclose(file);
+	close(io.fd);
 	return err;
-
-out_failure:
-	return -1;
 }
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
index bd988f7b18d4..72ab9870454b 100644
--- a/tools/lib/symbol/kallsyms.h
+++ b/tools/lib/symbol/kallsyms.h
@@ -18,8 +18,6 @@ static inline u8 kallsyms2elf_binding(char type)
 	return isupper(type) ? STB_GLOBAL : STB_LOCAL;
 }
 
-int hex2u64(const char *ptr, u64 *long_val);
-
 u8 kallsyms2elf_type(char type);
 
 bool kallsyms__is_function(char symbol_type);
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
index b887e7437d67..27f3b07fdae8 100644
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -438,7 +438,7 @@ void *kbuffer_translate_data(int swap, void *data, unsigned int *size)
 	case KBUFFER_TYPE_TIME_EXTEND:
 	case KBUFFER_TYPE_TIME_STAMP:
 		return NULL;
-	};
+	}
 
 	*size = length;
 
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 20eed719542e..c271aeeb227d 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1958,7 +1958,8 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
 				default:
 					break;
 				}
-				asprintf(&str, val ? "TRUE" : "FALSE");
+				if (asprintf(&str, val ? "TRUE" : "FALSE") < 0)
+					str = NULL;
 				break;
 			}
 		}
@@ -1976,7 +1977,8 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
 			break;
 		}
 
-		asprintf(&str, "(%s) %s (%s)", left, op, right);
+		if (asprintf(&str, "(%s) %s (%s)", left, op, right) < 0)
+			str = NULL;
 		break;
 
 	case TEP_FILTER_OP_NOT:
@@ -1992,10 +1994,12 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
 			right_val = 0;
 		if (right_val >= 0) {
 			/* just return the opposite */
-			asprintf(&str, right_val ? "FALSE" : "TRUE");
+			if (asprintf(&str, right_val ? "FALSE" : "TRUE") < 0)
+				str = NULL;
 			break;
 		}
-		asprintf(&str, "%s(%s)", op, right);
+		if (asprintf(&str, "%s(%s)", op, right) < 0)
+			str = NULL;
 		break;
 
 	default:
@@ -2011,7 +2015,8 @@ static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
 {
 	char *str = NULL;
 
-	asprintf(&str, "%lld", arg->value.val);
+	if (asprintf(&str, "%lld", arg->value.val) < 0)
+		str = NULL;
 
 	return str;
 }
@@ -2069,7 +2074,8 @@ static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
 		break;
 	}
 
-	asprintf(&str, "%s %s %s", lstr, op, rstr);
+	if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
+		str = NULL;
 out:
 	free(lstr);
 	free(rstr);
@@ -2113,7 +2119,8 @@ static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
 		if (!op)
 			op = "<=";
 
-		asprintf(&str, "%s %s %s", lstr, op, rstr);
+		if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
+			str = NULL;
 		break;
 
 	default:
@@ -2148,8 +2155,9 @@ static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
 		if (!op)
 			op = "!~";
 
-		asprintf(&str, "%s %s \"%s\"",
-			 arg->str.field->name, op, arg->str.val);
+		if (asprintf(&str, "%s %s \"%s\"",
+			 arg->str.field->name, op, arg->str.val) < 0)
+			str = NULL;
 		break;
 
 	default:
@@ -2165,7 +2173,8 @@ static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
 
 	switch (arg->type) {
 	case TEP_FILTER_ARG_BOOLEAN:
-		asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE");
+		if (asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE") < 0)
+			str = NULL;
 		return str;
 
 	case TEP_FILTER_ARG_OP:
diff --git a/tools/objtool/Build b/tools/objtool/Build
index 66f44f5cd2a6..b7222d5cc7bc 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -1,11 +1,16 @@
 objtool-y += arch/$(SRCARCH)/
+
+objtool-y += weak.o
+
+objtool-$(SUBCMD_CHECK) += check.o
+objtool-$(SUBCMD_CHECK) += special.o
+objtool-$(SUBCMD_ORC) += check.o
+objtool-$(SUBCMD_ORC) += orc_gen.o
+objtool-$(SUBCMD_ORC) += orc_dump.o
+
 objtool-y += builtin-check.o
 objtool-y += builtin-orc.o
-objtool-y += check.o
-objtool-y += orc_gen.o
-objtool-y += orc_dump.o
 objtool-y += elf.o
-objtool-y += special.o
 objtool-y += objtool.o
 
 objtool-y += libstring.o
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index de094670050b..0542e46c7552 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -289,6 +289,47 @@ they mean, and suggestions for how to fix them.
       might be corrupt due to a gcc bug.  For more details, see:
       https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
 
+9. file.o: warning: objtool: funcA() call to funcB() with UACCESS enabled
+
+   This means that an unexpected call to a non-whitelisted function exists
+   outside of arch-specific guards.
+   X86: SMAP (stac/clac): __uaccess_begin()/__uaccess_end()
+   ARM: PAN: uaccess_enable()/uaccess_disable()
+
+   These functions should be called to denote a minimal critical section around
+   access to __user variables. See also: https://lwn.net/Articles/517475/
+
+   The intention of the warning is to prevent calls to funcB() from eventually
+   calling schedule(), potentially leaking the AC flags state, and not
+   restoring them correctly.
+
+   It also helps verify that there are no unexpected calls to funcB() which may
+   access user space pages with protections against doing so disabled.
+
+   To fix, either:
+   1) remove explicit calls to funcB() from funcA().
+   2) add the correct guards before and after calls to low level functions like
+      __get_user_size()/__put_user_size().
+   3) add funcB to uaccess_safe_builtin whitelist in tools/objtool/check.c, if
+      funcB obviously does not call schedule(), and is marked notrace (since
+      function tracing inserts additional calls, which is not obvious from the
+      sources).
+
+10. file.o: warning: func()+0x5c: alternative modifies stack
+
+    This means that an alternative includes instructions that modify the
+    stack. The problem is that there is only one ORC unwind table, this means
+    that the ORC unwind entries must be valid for each of the alternatives.
+    The easiest way to enforce this is to ensure alternatives do not contain
+    any ORC entries, which in turn implies the above constraint.
+
+11. file.o: warning: unannotated intra-function call
+
+   This warning means that a direct call is done to a destination which
+   is not at the beginning of a function. If this is a legit call, you
+   can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
+   directive right before the call.
+
 
 If the error doesn't seem to make sense, it could be a bug in objtool.
 Feel free to ask the objtool maintainer for help.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index f591c4d1b6fe..7770edcda3a0 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -35,7 +35,8 @@ all: $(OBJTOOL)
 
 INCLUDES := -I$(srctree)/tools/include \
 	    -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
-	    -I$(srctree)/tools/arch/$(SRCARCH)/include
+	    -I$(srctree)/tools/arch/$(SRCARCH)/include	\
+	    -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
 WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
 CFLAGS   := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
 LDFLAGS  += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
@@ -45,14 +46,24 @@ elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E -
 CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
 
 AWK = awk
+
+SUBCMD_CHECK := n
+SUBCMD_ORC := n
+
+ifeq ($(SRCARCH),x86)
+	SUBCMD_CHECK := y
+	SUBCMD_ORC := y
+endif
+
+export SUBCMD_CHECK SUBCMD_ORC
 export srctree OUTPUT CFLAGS SRCARCH AWK
 include $(srctree)/tools/build/Makefile.include
 
 $(OBJTOOL_IN): fixdep FORCE
+	@$(CONFIG_SHELL) ./sync-check.sh
 	@$(MAKE) $(build)=objtool
 
 $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
-	@$(CONFIG_SHELL) ./sync-check.sh
 	$(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
 
 
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index ced3765c4f44..eda15a5a285e 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -8,9 +8,11 @@
 
 #include <stdbool.h>
 #include <linux/list.h>
-#include "elf.h"
+#include "objtool.h"
 #include "cfi.h"
 
+#include <asm/orc_types.h>
+
 enum insn_type {
 	INSN_JUMP_CONDITIONAL,
 	INSN_JUMP_UNCONDITIONAL,
@@ -20,7 +22,6 @@ enum insn_type {
 	INSN_CALL_DYNAMIC,
 	INSN_RETURN,
 	INSN_CONTEXT_SWITCH,
-	INSN_STACK,
 	INSN_BUG,
 	INSN_NOP,
 	INSN_STAC,
@@ -64,15 +65,23 @@ struct op_src {
 struct stack_op {
 	struct op_dest dest;
 	struct op_src src;
+	struct list_head list;
 };
 
-void arch_initial_func_cfi_state(struct cfi_state *state);
+struct instruction;
+
+void arch_initial_func_cfi_state(struct cfi_init_state *state);
 
-int arch_decode_instruction(struct elf *elf, struct section *sec,
+int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate, struct stack_op *op);
+			    unsigned long *immediate,
+			    struct list_head *ops_list);
 
 bool arch_callee_saved_reg(unsigned char reg);
 
+unsigned long arch_jump_destination(struct instruction *insn);
+
+unsigned long arch_dest_rela_offset(int addend);
+
 #endif /* _ARCH_H */
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index a62e032863a8..4b504fc90bbb 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -11,6 +11,7 @@
 #include "../../../arch/x86/lib/inat.c"
 #include "../../../arch/x86/lib/insn.c"
 
+#include "../../check.h"
 #include "../../elf.h"
 #include "../../arch.h"
 #include "../../warn.h"
@@ -26,7 +27,7 @@ static unsigned char op_to_cfi_reg[][2] = {
 	{CFI_DI, CFI_R15},
 };
 
-static int is_x86_64(struct elf *elf)
+static int is_x86_64(const struct elf *elf)
 {
 	switch (elf->ehdr.e_machine) {
 	case EM_X86_64:
@@ -66,16 +67,34 @@ bool arch_callee_saved_reg(unsigned char reg)
 	}
 }
 
-int arch_decode_instruction(struct elf *elf, struct section *sec,
+unsigned long arch_dest_rela_offset(int addend)
+{
+	return addend + 4;
+}
+
+unsigned long arch_jump_destination(struct instruction *insn)
+{
+	return insn->offset + insn->len + insn->immediate;
+}
+
+#define ADD_OP(op) \
+	if (!(op = calloc(1, sizeof(*op)))) \
+		return -1; \
+	else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+
+int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, enum insn_type *type,
-			    unsigned long *immediate, struct stack_op *op)
+			    unsigned long *immediate,
+			    struct list_head *ops_list)
 {
 	struct insn insn;
 	int x86_64, sign;
 	unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
 		      rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
 		      modrm_reg = 0, sib = 0;
+	struct stack_op *op = NULL;
+	struct symbol *sym;
 
 	x86_64 = is_x86_64(elf);
 	if (x86_64 == -1)
@@ -85,7 +104,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	insn_get_length(&insn);
 
 	if (!insn_complete(&insn)) {
-		WARN_FUNC("can't decode instruction", sec, offset);
+		WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
 		return -1;
 	}
 
@@ -123,40 +142,44 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
 
 			/* add/sub reg, %rsp */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 		}
 		break;
 
 	case 0x50 ... 0x57:
 
 		/* push reg */
-		*type = INSN_STACK;
-		op->src.type = OP_SRC_REG;
-		op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
-		op->dest.type = OP_DEST_PUSH;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_REG;
+			op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+			op->dest.type = OP_DEST_PUSH;
+		}
 
 		break;
 
 	case 0x58 ... 0x5f:
 
 		/* pop reg */
-		*type = INSN_STACK;
-		op->src.type = OP_SRC_POP;
-		op->dest.type = OP_DEST_REG;
-		op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+		ADD_OP(op) {
+			op->src.type = OP_SRC_POP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+		}
 
 		break;
 
 	case 0x68:
 	case 0x6a:
 		/* push immediate */
-		*type = INSN_STACK;
-		op->src.type = OP_SRC_CONST;
-		op->dest.type = OP_DEST_PUSH;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+		}
 		break;
 
 	case 0x70 ... 0x7f:
@@ -170,12 +193,13 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 
 		if (modrm == 0xe4) {
 			/* and imm, %rsp */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_AND;
-			op->src.reg = CFI_SP;
-			op->src.offset = insn.immediate.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_AND;
+				op->src.reg = CFI_SP;
+				op->src.offset = insn.immediate.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 			break;
 		}
 
@@ -187,34 +211,37 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 			break;
 
 		/* add/sub imm, %rsp */
-		*type = INSN_STACK;
-		op->src.type = OP_SRC_ADD;
-		op->src.reg = CFI_SP;
-		op->src.offset = insn.immediate.value * sign;
-		op->dest.type = OP_DEST_REG;
-		op->dest.reg = CFI_SP;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.immediate.value * sign;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+		}
 		break;
 
 	case 0x89:
 		if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
 
 			/* mov %rsp, reg */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = CFI_SP;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = CFI_SP;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+			}
 			break;
 		}
 
 		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
 
 			/* mov reg, %rsp */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 			break;
 		}
 
@@ -224,22 +251,24 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		    (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
 
 			/* mov reg, disp(%rbp) */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG_INDIRECT;
-			op->dest.reg = CFI_BP;
-			op->dest.offset = insn.displacement.value;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG_INDIRECT;
+				op->dest.reg = CFI_BP;
+				op->dest.offset = insn.displacement.value;
+			}
 
 		} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
 
 			/* mov reg, disp(%rsp) */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG;
-			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			op->dest.type = OP_DEST_REG_INDIRECT;
-			op->dest.reg = CFI_SP;
-			op->dest.offset = insn.displacement.value;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG;
+				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.type = OP_DEST_REG_INDIRECT;
+				op->dest.reg = CFI_SP;
+				op->dest.offset = insn.displacement.value;
+			}
 		}
 
 		break;
@@ -248,23 +277,25 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
 
 			/* mov disp(%rbp), reg */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG_INDIRECT;
-			op->src.reg = CFI_BP;
-			op->src.offset = insn.displacement.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG_INDIRECT;
+				op->src.reg = CFI_BP;
+				op->src.offset = insn.displacement.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			}
 
 		} else if (rex_w && !rex_b && sib == 0x24 &&
 			   modrm_mod != 3 && modrm_rm == 4) {
 
 			/* mov disp(%rsp), reg */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_REG_INDIRECT;
-			op->src.reg = CFI_SP;
-			op->src.offset = insn.displacement.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			ADD_OP(op) {
+				op->src.type = OP_SRC_REG_INDIRECT;
+				op->src.reg = CFI_SP;
+				op->src.offset = insn.displacement.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			}
 		}
 
 		break;
@@ -272,28 +303,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	case 0x8d:
 		if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
 
-			*type = INSN_STACK;
-			if (!insn.displacement.value) {
-				/* lea (%rsp), reg */
-				op->src.type = OP_SRC_REG;
-			} else {
-				/* lea disp(%rsp), reg */
-				op->src.type = OP_SRC_ADD;
-				op->src.offset = insn.displacement.value;
+			ADD_OP(op) {
+				if (!insn.displacement.value) {
+					/* lea (%rsp), reg */
+					op->src.type = OP_SRC_REG;
+				} else {
+					/* lea disp(%rsp), reg */
+					op->src.type = OP_SRC_ADD;
+					op->src.offset = insn.displacement.value;
+				}
+				op->src.reg = CFI_SP;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
 			}
-			op->src.reg = CFI_SP;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
 
 		} else if (rex == 0x48 && modrm == 0x65) {
 
 			/* lea disp(%rbp), %rsp */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_BP;
-			op->src.offset = insn.displacement.value;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_BP;
+				op->src.offset = insn.displacement.value;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 
 		} else if (rex == 0x49 && modrm == 0x62 &&
 			   insn.displacement.value == -8) {
@@ -304,12 +337,13 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 			 * Restoring rsp back to its original value after a
 			 * stack realignment.
 			 */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_R10;
-			op->src.offset = -8;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_R10;
+				op->src.offset = -8;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 
 		} else if (rex == 0x49 && modrm == 0x65 &&
 			   insn.displacement.value == -16) {
@@ -320,21 +354,23 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 			 * Restoring rsp back to its original value after a
 			 * stack realignment.
 			 */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_R13;
-			op->src.offset = -16;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_R13;
+				op->src.offset = -16;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
 		}
 
 		break;
 
 	case 0x8f:
 		/* pop to mem */
-		*type = INSN_STACK;
-		op->src.type = OP_SRC_POP;
-		op->dest.type = OP_DEST_MEM;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_POP;
+			op->dest.type = OP_DEST_MEM;
+		}
 		break;
 
 	case 0x90:
@@ -343,16 +379,18 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 
 	case 0x9c:
 		/* pushf */
-		*type = INSN_STACK;
-		op->src.type = OP_SRC_CONST;
-		op->dest.type = OP_DEST_PUSHF;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSHF;
+		}
 		break;
 
 	case 0x9d:
 		/* popf */
-		*type = INSN_STACK;
-		op->src.type = OP_SRC_POPF;
-		op->dest.type = OP_DEST_MEM;
+		ADD_OP(op) {
+			op->src.type = OP_SRC_POPF;
+			op->dest.type = OP_DEST_MEM;
+		}
 		break;
 
 	case 0x0f:
@@ -387,16 +425,18 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		} else if (op2 == 0xa0 || op2 == 0xa8) {
 
 			/* push fs/gs */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_CONST;
-			op->dest.type = OP_DEST_PUSH;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_CONST;
+				op->dest.type = OP_DEST_PUSH;
+			}
 
 		} else if (op2 == 0xa1 || op2 == 0xa9) {
 
 			/* pop fs/gs */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_POP;
-			op->dest.type = OP_DEST_MEM;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_POP;
+				op->dest.type = OP_DEST_MEM;
+			}
 		}
 
 		break;
@@ -409,8 +449,8 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		 * mov bp, sp
 		 * pop bp
 		 */
-		*type = INSN_STACK;
-		op->dest.type = OP_DEST_LEAVE;
+		ADD_OP(op)
+			op->dest.type = OP_DEST_LEAVE;
 
 		break;
 
@@ -429,14 +469,41 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		*type = INSN_RETURN;
 		break;
 
+	case 0xcf: /* iret */
+		/*
+		 * Handle sync_core(), which has an IRET to self.
+		 * All other IRET are in STT_NONE entry code.
+		 */
+		sym = find_symbol_containing(sec, offset);
+		if (sym && sym->type == STT_FUNC) {
+			ADD_OP(op) {
+				/* add $40, %rsp */
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_SP;
+				op->src.offset = 5*8;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
+			break;
+		}
+
+		/* fallthrough */
+
 	case 0xca: /* retf */
 	case 0xcb: /* retf */
-	case 0xcf: /* iret */
 		*type = INSN_CONTEXT_SWITCH;
 		break;
 
 	case 0xe8:
 		*type = INSN_CALL;
+		/*
+		 * For the impact on the stack, a CALL behaves like
+		 * a PUSH of an immediate value (the return address).
+		 */
+		ADD_OP(op) {
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+		}
 		break;
 
 	case 0xfc:
@@ -464,9 +531,10 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		else if (modrm_reg == 6) {
 
 			/* push from mem */
-			*type = INSN_STACK;
-			op->src.type = OP_SRC_CONST;
-			op->dest.type = OP_DEST_PUSH;
+			ADD_OP(op) {
+				op->src.type = OP_SRC_CONST;
+				op->dest.type = OP_DEST_PUSH;
+			}
 		}
 
 		break;
@@ -480,7 +548,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	return 0;
 }
 
-void arch_initial_func_cfi_state(struct cfi_state *state)
+void arch_initial_func_cfi_state(struct cfi_init_state *state)
 {
 	int i;
 
diff --git a/tools/objtool/arch/x86/include/cfi_regs.h b/tools/objtool/arch/x86/include/cfi_regs.h
new file mode 100644
index 000000000000..79bc517efba8
--- /dev/null
+++ b/tools/objtool/arch/x86/include/cfi_regs.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _OBJTOOL_CFI_REGS_H
+#define _OBJTOOL_CFI_REGS_H
+
+#define CFI_AX			0
+#define CFI_DX			1
+#define CFI_CX			2
+#define CFI_BX			3
+#define CFI_SI			4
+#define CFI_DI			5
+#define CFI_BP			6
+#define CFI_SP			7
+#define CFI_R8			8
+#define CFI_R9			9
+#define CFI_R10			10
+#define CFI_R11			11
+#define CFI_R12			12
+#define CFI_R13			13
+#define CFI_R14			14
+#define CFI_R15			15
+#define CFI_RA			16
+#define CFI_NUM_REGS		17
+
+#endif /* _OBJTOOL_CFI_REGS_H */
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 10fbe75ab43d..7a44174967b5 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -14,10 +14,11 @@
  */
 
 #include <subcmd/parse-options.h>
+#include <string.h>
 #include "builtin.h"
-#include "check.h"
+#include "objtool.h"
 
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
@@ -32,12 +33,14 @@ const struct option check_options[] = {
 	OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
 	OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
 	OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
+	OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"),
+	OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
 	OPT_END(),
 };
 
 int cmd_check(int argc, const char **argv)
 {
-	const char *objname;
+	const char *objname, *s;
 
 	argc = parse_options(argc, argv, check_options, check_usage, 0);
 
@@ -46,5 +49,9 @@ int cmd_check(int argc, const char **argv)
 
 	objname = argv[0];
 
+	s = strstr(objname, "vmlinux.o");
+	if (s && !s[9])
+		vmlinux = true;
+
 	return check(objname, false);
 }
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 5f7cc6157edd..b1dfe2007962 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -14,8 +14,7 @@
 
 #include <string.h>
 #include "builtin.h"
-#include "check.h"
-
+#include "objtool.h"
 
 static const char *orc_usage[] = {
 	"objtool orc generate [<options>] file.o",
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index 0b907902ee79..85c979caa367 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -8,7 +8,7 @@
 #include <subcmd/parse-options.h>
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
 
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h
index 4427bf8ed686..c7c59c6a44ee 100644
--- a/tools/objtool/cfi.h
+++ b/tools/objtool/cfi.h
@@ -6,38 +6,33 @@
 #ifndef _OBJTOOL_CFI_H
 #define _OBJTOOL_CFI_H
 
+#include "cfi_regs.h"
+
 #define CFI_UNDEFINED		-1
 #define CFI_CFA			-2
 #define CFI_SP_INDIRECT		-3
 #define CFI_BP_INDIRECT		-4
 
-#define CFI_AX			0
-#define CFI_DX			1
-#define CFI_CX			2
-#define CFI_BX			3
-#define CFI_SI			4
-#define CFI_DI			5
-#define CFI_BP			6
-#define CFI_SP			7
-#define CFI_R8			8
-#define CFI_R9			9
-#define CFI_R10			10
-#define CFI_R11			11
-#define CFI_R12			12
-#define CFI_R13			13
-#define CFI_R14			14
-#define CFI_R15			15
-#define CFI_RA			16
-#define CFI_NUM_REGS		17
-
 struct cfi_reg {
 	int base;
 	int offset;
 };
 
-struct cfi_state {
+struct cfi_init_state {
+	struct cfi_reg regs[CFI_NUM_REGS];
 	struct cfi_reg cfa;
+};
+
+struct cfi_state {
 	struct cfi_reg regs[CFI_NUM_REGS];
+	struct cfi_reg vals[CFI_NUM_REGS];
+	struct cfi_reg cfa;
+	int stack_size;
+	int drap_reg, drap_offset;
+	unsigned char type;
+	bool bp_scratch;
+	bool drap;
+	bool end;
 };
 
 #endif /* _OBJTOOL_CFI_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 3c6da70e6084..63d65a702900 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -7,10 +7,10 @@
 #include <stdlib.h>
 
 #include "builtin.h"
+#include "cfi.h"
+#include "arch.h"
 #include "check.h"
-#include "elf.h"
 #include "special.h"
-#include "arch.h"
 #include "warn.h"
 
 #include <linux/hashtable.h>
@@ -27,16 +27,17 @@ struct alternative {
 };
 
 const char *objname;
-struct cfi_state initial_func_cfi;
+struct cfi_init_state initial_func_cfi;
 
 struct instruction *find_insn(struct objtool_file *file,
 			      struct section *sec, unsigned long offset)
 {
 	struct instruction *insn;
 
-	hash_for_each_possible(file->insn_hash, insn, hash, offset)
+	hash_for_each_possible(file->insn_hash, insn, hash, sec_offset_hash(sec, offset)) {
 		if (insn->sec == sec && insn->offset == offset)
 			return insn;
+	}
 
 	return NULL;
 }
@@ -226,18 +227,31 @@ static bool dead_end_function(struct objtool_file *file, struct symbol *func)
 	return __dead_end_function(file, func, 0);
 }
 
-static void clear_insn_state(struct insn_state *state)
+static void init_cfi_state(struct cfi_state *cfi)
 {
 	int i;
 
-	memset(state, 0, sizeof(*state));
-	state->cfa.base = CFI_UNDEFINED;
 	for (i = 0; i < CFI_NUM_REGS; i++) {
-		state->regs[i].base = CFI_UNDEFINED;
-		state->vals[i].base = CFI_UNDEFINED;
+		cfi->regs[i].base = CFI_UNDEFINED;
+		cfi->vals[i].base = CFI_UNDEFINED;
 	}
-	state->drap_reg = CFI_UNDEFINED;
-	state->drap_offset = -1;
+	cfi->cfa.base = CFI_UNDEFINED;
+	cfi->drap_reg = CFI_UNDEFINED;
+	cfi->drap_offset = -1;
+}
+
+static void init_insn_state(struct insn_state *state, struct section *sec)
+{
+	memset(state, 0, sizeof(*state));
+	init_cfi_state(&state->cfi);
+
+	/*
+	 * We need the full vmlinux for noinstr validation, otherwise we can
+	 * not correctly determine insn->call_dest->sec (external symbols do
+	 * not have a section).
+	 */
+	if (vmlinux && sec)
+		state->noinstr = sec->noinstr;
 }
 
 /*
@@ -263,6 +277,10 @@ static int decode_instructions(struct objtool_file *file)
 		    strncmp(sec->name, ".discard.", 9))
 			sec->text = true;
 
+		if (!strcmp(sec->name, ".noinstr.text") ||
+		    !strcmp(sec->name, ".entry.text"))
+			sec->noinstr = true;
+
 		for (offset = 0; offset < sec->len; offset += insn->len) {
 			insn = malloc(sizeof(*insn));
 			if (!insn) {
@@ -271,7 +289,8 @@ static int decode_instructions(struct objtool_file *file)
 			}
 			memset(insn, 0, sizeof(*insn));
 			INIT_LIST_HEAD(&insn->alts);
-			clear_insn_state(&insn->state);
+			INIT_LIST_HEAD(&insn->stack_ops);
+			init_cfi_state(&insn->cfi);
 
 			insn->sec = sec;
 			insn->offset = offset;
@@ -280,11 +299,11 @@ static int decode_instructions(struct objtool_file *file)
 						      sec->len - offset,
 						      &insn->len, &insn->type,
 						      &insn->immediate,
-						      &insn->stack_op);
+						      &insn->stack_ops);
 			if (ret)
 				goto err;
 
-			hash_add(file->insn_hash, &insn->hash, insn->offset);
+			hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset));
 			list_add_tail(&insn->list, &file->insn_list);
 			nr_insns++;
 		}
@@ -314,6 +333,19 @@ err:
 	return ret;
 }
 
+static struct instruction *find_last_insn(struct objtool_file *file,
+					  struct section *sec)
+{
+	struct instruction *insn = NULL;
+	unsigned int offset;
+	unsigned int end = (sec->len > 10) ? sec->len - 10 : 0;
+
+	for (offset = sec->len - 1; offset >= end && !insn; offset--)
+		insn = find_insn(file, sec, offset);
+
+	return insn;
+}
+
 /*
  * Mark "ud2" instructions and manually annotated dead ends.
  */
@@ -322,7 +354,6 @@ static int add_dead_ends(struct objtool_file *file)
 	struct section *sec;
 	struct rela *rela;
 	struct instruction *insn;
-	bool found;
 
 	/*
 	 * By default, "ud2" is a dead end unless otherwise annotated, because
@@ -348,15 +379,8 @@ static int add_dead_ends(struct objtool_file *file)
 		if (insn)
 			insn = list_prev_entry(insn, list);
 		else if (rela->addend == rela->sym->sec->len) {
-			found = false;
-			list_for_each_entry_reverse(insn, &file->insn_list, list) {
-				if (insn->sec == rela->sym->sec) {
-					found = true;
-					break;
-				}
-			}
-
-			if (!found) {
+			insn = find_last_insn(file, rela->sym->sec);
+			if (!insn) {
 				WARN("can't find unreachable insn at %s+0x%x",
 				     rela->sym->sec->name, rela->addend);
 				return -1;
@@ -390,15 +414,8 @@ reachable:
 		if (insn)
 			insn = list_prev_entry(insn, list);
 		else if (rela->addend == rela->sym->sec->len) {
-			found = false;
-			list_for_each_entry_reverse(insn, &file->insn_list, list) {
-				if (insn->sec == rela->sym->sec) {
-					found = true;
-					break;
-				}
-			}
-
-			if (!found) {
+			insn = find_last_insn(file, rela->sym->sec);
+			if (!insn) {
 				WARN("can't find reachable insn at %s+0x%x",
 				     rela->sym->sec->name, rela->addend);
 				return -1;
@@ -490,6 +507,7 @@ static const char *uaccess_safe_builtin[] = {
 	"__asan_report_store16_noabort",
 	/* KCOV */
 	"write_comp_data",
+	"check_kcov_mode",
 	"__sanitizer_cov_trace_pc",
 	"__sanitizer_cov_trace_const_cmp1",
 	"__sanitizer_cov_trace_const_cmp2",
@@ -585,13 +603,14 @@ static int add_jump_destinations(struct objtool_file *file)
 					       insn->offset, insn->len);
 		if (!rela) {
 			dest_sec = insn->sec;
-			dest_off = insn->offset + insn->len + insn->immediate;
+			dest_off = arch_jump_destination(insn);
 		} else if (rela->sym->type == STT_SECTION) {
 			dest_sec = rela->sym->sec;
-			dest_off = rela->addend + 4;
+			dest_off = arch_dest_rela_offset(rela->addend);
 		} else if (rela->sym->sec->idx) {
 			dest_sec = rela->sym->sec;
-			dest_off = rela->sym->sym.st_value + rela->addend + 4;
+			dest_off = rela->sym->sym.st_value +
+				   arch_dest_rela_offset(rela->addend);
 		} else if (strstr(rela->sym->name, "_indirect_thunk_")) {
 			/*
 			 * Retpoline jumps are really dynamic jumps in
@@ -665,6 +684,16 @@ static int add_jump_destinations(struct objtool_file *file)
 	return 0;
 }
 
+static void remove_insn_ops(struct instruction *insn)
+{
+	struct stack_op *op, *tmp;
+
+	list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
+		list_del(&op->list);
+		free(op);
+	}
+}
+
 /*
  * Find the destination instructions for all calls.
  */
@@ -681,7 +710,7 @@ static int add_call_destinations(struct objtool_file *file)
 		rela = find_rela_by_dest_range(file->elf, insn->sec,
 					       insn->offset, insn->len);
 		if (!rela) {
-			dest_off = insn->offset + insn->len + insn->immediate;
+			dest_off = arch_jump_destination(insn);
 			insn->call_dest = find_func_by_offset(insn->sec, dest_off);
 			if (!insn->call_dest)
 				insn->call_dest = find_symbol_by_offset(insn->sec, dest_off);
@@ -690,10 +719,7 @@ static int add_call_destinations(struct objtool_file *file)
 				continue;
 
 			if (!insn->call_dest) {
-				WARN_FUNC("unsupported intra-function call",
-					  insn->sec, insn->offset);
-				if (retpoline)
-					WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
+				WARN_FUNC("unannotated intra-function call", insn->sec, insn->offset);
 				return -1;
 			}
 
@@ -704,17 +730,27 @@ static int add_call_destinations(struct objtool_file *file)
 			}
 
 		} else if (rela->sym->type == STT_SECTION) {
+			dest_off = arch_dest_rela_offset(rela->addend);
 			insn->call_dest = find_func_by_offset(rela->sym->sec,
-							      rela->addend+4);
+							      dest_off);
 			if (!insn->call_dest) {
-				WARN_FUNC("can't find call dest symbol at %s+0x%x",
+				WARN_FUNC("can't find call dest symbol at %s+0x%lx",
 					  insn->sec, insn->offset,
 					  rela->sym->sec->name,
-					  rela->addend + 4);
+					  dest_off);
 				return -1;
 			}
 		} else
 			insn->call_dest = rela->sym;
+
+		/*
+		 * Whatever stack impact regular CALLs have, should be undone
+		 * by the RETURN of the called function.
+		 *
+		 * Annotated intra-function calls retain the stack_ops but
+		 * are converted to JUMP, see read_intra_function_calls().
+		 */
+		remove_insn_ops(insn);
 	}
 
 	return 0;
@@ -742,7 +778,9 @@ static int handle_group_alt(struct objtool_file *file,
 			    struct instruction *orig_insn,
 			    struct instruction **new_insn)
 {
+	static unsigned int alt_group_next_index = 1;
 	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
+	unsigned int alt_group = alt_group_next_index++;
 	unsigned long dest_off;
 
 	last_orig_insn = NULL;
@@ -751,7 +789,7 @@ static int handle_group_alt(struct objtool_file *file,
 		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
 			break;
 
-		insn->alt_group = true;
+		insn->alt_group = alt_group;
 		last_orig_insn = insn;
 	}
 
@@ -763,7 +801,8 @@ static int handle_group_alt(struct objtool_file *file,
 		}
 		memset(fake_jump, 0, sizeof(*fake_jump));
 		INIT_LIST_HEAD(&fake_jump->alts);
-		clear_insn_state(&fake_jump->state);
+		INIT_LIST_HEAD(&fake_jump->stack_ops);
+		init_cfi_state(&fake_jump->cfi);
 
 		fake_jump->sec = special_alt->new_sec;
 		fake_jump->offset = FAKE_JUMP_OFFSET;
@@ -784,6 +823,7 @@ static int handle_group_alt(struct objtool_file *file,
 	}
 
 	last_new_insn = NULL;
+	alt_group = alt_group_next_index++;
 	insn = *new_insn;
 	sec_for_each_insn_from(file, insn) {
 		if (insn->offset >= special_alt->new_off + special_alt->new_len)
@@ -793,6 +833,7 @@ static int handle_group_alt(struct objtool_file *file,
 
 		insn->ignore = orig_insn->ignore_alts;
 		insn->func = orig_insn->func;
+		insn->alt_group = alt_group;
 
 		/*
 		 * Since alternative replacement code is copy/pasted by the
@@ -821,7 +862,7 @@ static int handle_group_alt(struct objtool_file *file,
 		if (!insn->immediate)
 			continue;
 
-		dest_off = insn->offset + insn->len + insn->immediate;
+		dest_off = arch_jump_destination(insn);
 		if (dest_off == special_alt->new_off + special_alt->new_len) {
 			if (!fake_jump) {
 				WARN("%s: alternative jump to end of section",
@@ -916,6 +957,12 @@ static int add_special_section_alts(struct objtool_file *file)
 		}
 
 		if (special_alt->group) {
+			if (!special_alt->orig_len) {
+				WARN_FUNC("empty alternative entry",
+					  orig_insn->sec, orig_insn->offset);
+				continue;
+			}
+
 			ret = handle_group_alt(file, special_alt, orig_insn,
 					       &new_insn);
 			if (ret)
@@ -1253,15 +1300,10 @@ static int read_unwind_hints(struct objtool_file *file)
 			return -1;
 		}
 
-		cfa = &insn->state.cfa;
-
-		if (hint->type == UNWIND_HINT_TYPE_SAVE) {
-			insn->save = true;
-			continue;
+		cfa = &insn->cfi.cfa;
 
-		} else if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
-			insn->restore = true;
-			insn->hint = true;
+		if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) {
+			insn->ret_offset = hint->sp_offset;
 			continue;
 		}
 
@@ -1299,8 +1341,8 @@ static int read_unwind_hints(struct objtool_file *file)
 		}
 
 		cfa->offset = hint->sp_offset;
-		insn->state.type = hint->type;
-		insn->state.end = hint->end;
+		insn->cfi.type = hint->type;
+		insn->cfi.end = hint->end;
 	}
 
 	return 0;
@@ -1341,6 +1383,104 @@ static int read_retpoline_hints(struct objtool_file *file)
 	return 0;
 }
 
+static int read_instr_hints(struct objtool_file *file)
+{
+	struct section *sec;
+	struct instruction *insn;
+	struct rela *rela;
+
+	sec = find_section_by_name(file->elf, ".rela.discard.instr_end");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
+			return -1;
+		}
+
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!insn) {
+			WARN("bad .discard.instr_end entry");
+			return -1;
+		}
+
+		insn->instr--;
+	}
+
+	sec = find_section_by_name(file->elf, ".rela.discard.instr_begin");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
+			return -1;
+		}
+
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!insn) {
+			WARN("bad .discard.instr_begin entry");
+			return -1;
+		}
+
+		insn->instr++;
+	}
+
+	return 0;
+}
+
+static int read_intra_function_calls(struct objtool_file *file)
+{
+	struct instruction *insn;
+	struct section *sec;
+	struct rela *rela;
+
+	sec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		unsigned long dest_off;
+
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s",
+			     sec->name);
+			return -1;
+		}
+
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!insn) {
+			WARN("bad .discard.intra_function_call entry");
+			return -1;
+		}
+
+		if (insn->type != INSN_CALL) {
+			WARN_FUNC("intra_function_call not a direct call",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		/*
+		 * Treat intra-function CALLs as JMPs, but with a stack_op.
+		 * See add_call_destinations(), which strips stack_ops from
+		 * normal CALLs.
+		 */
+		insn->type = INSN_JUMP_UNCONDITIONAL;
+
+		dest_off = insn->offset + insn->len + insn->immediate;
+		insn->jump_dest = find_insn(file, insn->sec, dest_off);
+		if (!insn->jump_dest) {
+			WARN_FUNC("can't find call dest at %s+0x%lx",
+				  insn->sec, insn->offset,
+				  insn->sec->name, dest_off);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 static void mark_rodata(struct objtool_file *file)
 {
 	struct section *sec;
@@ -1357,8 +1497,8 @@ static void mark_rodata(struct objtool_file *file)
 	 * .rodata.str1.* sections are ignored; they don't contain jump tables.
 	 */
 	for_each_sec(file, sec) {
-		if ((!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.")) ||
-		    !strcmp(sec->name, C_JUMP_TABLE_SECTION)) {
+		if (!strncmp(sec->name, ".rodata", 7) &&
+		    !strstr(sec->name, ".str1.")) {
 			sec->rodata = true;
 			found = true;
 		}
@@ -1396,6 +1536,10 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	ret = read_intra_function_calls(file);
+	if (ret)
+		return ret;
+
 	ret = add_call_destinations(file);
 	if (ret)
 		return ret;
@@ -1412,12 +1556,16 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	ret = read_instr_hints(file);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
 static bool is_fentry_call(struct instruction *insn)
 {
-	if (insn->type == INSN_CALL &&
+	if (insn->type == INSN_CALL && insn->call_dest &&
 	    insn->call_dest->type == STT_NOTYPE &&
 	    !strcmp(insn->call_dest->name, "__fentry__"))
 		return true;
@@ -1425,40 +1573,57 @@ static bool is_fentry_call(struct instruction *insn)
 	return false;
 }
 
-static bool has_modified_stack_frame(struct insn_state *state)
+static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state)
 {
+	u8 ret_offset = insn->ret_offset;
+	struct cfi_state *cfi = &state->cfi;
 	int i;
 
-	if (state->cfa.base != initial_func_cfi.cfa.base ||
-	    state->cfa.offset != initial_func_cfi.cfa.offset ||
-	    state->stack_size != initial_func_cfi.cfa.offset ||
-	    state->drap)
+	if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap)
+		return true;
+
+	if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset)
+		return true;
+
+	if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset)
 		return true;
 
-	for (i = 0; i < CFI_NUM_REGS; i++)
-		if (state->regs[i].base != initial_func_cfi.regs[i].base ||
-		    state->regs[i].offset != initial_func_cfi.regs[i].offset)
+	/*
+	 * If there is a ret offset hint then don't check registers
+	 * because a callee-saved register might have been pushed on
+	 * the stack.
+	 */
+	if (ret_offset)
+		return false;
+
+	for (i = 0; i < CFI_NUM_REGS; i++) {
+		if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
+		    cfi->regs[i].offset != initial_func_cfi.regs[i].offset)
 			return true;
+	}
 
 	return false;
 }
 
 static bool has_valid_stack_frame(struct insn_state *state)
 {
-	if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA &&
-	    state->regs[CFI_BP].offset == -16)
+	struct cfi_state *cfi = &state->cfi;
+
+	if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA &&
+	    cfi->regs[CFI_BP].offset == -16)
 		return true;
 
-	if (state->drap && state->regs[CFI_BP].base == CFI_BP)
+	if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP)
 		return true;
 
 	return false;
 }
 
-static int update_insn_state_regs(struct instruction *insn, struct insn_state *state)
+static int update_cfi_state_regs(struct instruction *insn,
+				  struct cfi_state *cfi,
+				  struct stack_op *op)
 {
-	struct cfi_reg *cfa = &state->cfa;
-	struct stack_op *op = &insn->stack_op;
+	struct cfi_reg *cfa = &cfi->cfa;
 
 	if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT)
 		return 0;
@@ -1479,20 +1644,19 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s
 	return 0;
 }
 
-static void save_reg(struct insn_state *state, unsigned char reg, int base,
-		     int offset)
+static void save_reg(struct cfi_state *cfi, unsigned char reg, int base, int offset)
 {
 	if (arch_callee_saved_reg(reg) &&
-	    state->regs[reg].base == CFI_UNDEFINED) {
-		state->regs[reg].base = base;
-		state->regs[reg].offset = offset;
+	    cfi->regs[reg].base == CFI_UNDEFINED) {
+		cfi->regs[reg].base = base;
+		cfi->regs[reg].offset = offset;
 	}
 }
 
-static void restore_reg(struct insn_state *state, unsigned char reg)
+static void restore_reg(struct cfi_state *cfi, unsigned char reg)
 {
-	state->regs[reg].base = CFI_UNDEFINED;
-	state->regs[reg].offset = 0;
+	cfi->regs[reg].base = initial_func_cfi.regs[reg].base;
+	cfi->regs[reg].offset = initial_func_cfi.regs[reg].offset;
 }
 
 /*
@@ -1548,11 +1712,11 @@ static void restore_reg(struct insn_state *state, unsigned char reg)
  *   41 5d			pop    %r13
  *   c3				retq
  */
-static int update_insn_state(struct instruction *insn, struct insn_state *state)
+static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
+			     struct stack_op *op)
 {
-	struct stack_op *op = &insn->stack_op;
-	struct cfi_reg *cfa = &state->cfa;
-	struct cfi_reg *regs = state->regs;
+	struct cfi_reg *cfa = &cfi->cfa;
+	struct cfi_reg *regs = cfi->regs;
 
 	/* stack operations don't make sense with an undefined CFA */
 	if (cfa->base == CFI_UNDEFINED) {
@@ -1563,8 +1727,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 		return 0;
 	}
 
-	if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET)
-		return update_insn_state_regs(insn, state);
+	if (cfi->type == ORC_TYPE_REGS || cfi->type == ORC_TYPE_REGS_IRET)
+		return update_cfi_state_regs(insn, cfi, op);
 
 	switch (op->dest.type) {
 
@@ -1579,16 +1743,16 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 
 				/* mov %rsp, %rbp */
 				cfa->base = op->dest.reg;
-				state->bp_scratch = false;
+				cfi->bp_scratch = false;
 			}
 
 			else if (op->src.reg == CFI_SP &&
-				 op->dest.reg == CFI_BP && state->drap) {
+				 op->dest.reg == CFI_BP && cfi->drap) {
 
 				/* drap: mov %rsp, %rbp */
 				regs[CFI_BP].base = CFI_BP;
-				regs[CFI_BP].offset = -state->stack_size;
-				state->bp_scratch = false;
+				regs[CFI_BP].offset = -cfi->stack_size;
+				cfi->bp_scratch = false;
 			}
 
 			else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
@@ -1603,8 +1767,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 				 *   ...
 				 *   mov    %rax, %rsp
 				 */
-				state->vals[op->dest.reg].base = CFI_CFA;
-				state->vals[op->dest.reg].offset = -state->stack_size;
+				cfi->vals[op->dest.reg].base = CFI_CFA;
+				cfi->vals[op->dest.reg].offset = -cfi->stack_size;
 			}
 
 			else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP &&
@@ -1615,14 +1779,14 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 				 *
 				 * Restore the original stack pointer (Clang).
 				 */
-				state->stack_size = -state->regs[CFI_BP].offset;
+				cfi->stack_size = -cfi->regs[CFI_BP].offset;
 			}
 
 			else if (op->dest.reg == cfa->base) {
 
 				/* mov %reg, %rsp */
 				if (cfa->base == CFI_SP &&
-				    state->vals[op->src.reg].base == CFI_CFA) {
+				    cfi->vals[op->src.reg].base == CFI_CFA) {
 
 					/*
 					 * This is needed for the rare case
@@ -1632,8 +1796,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 					 *   ...
 					 *   mov    %rcx, %rsp
 					 */
-					cfa->offset = -state->vals[op->src.reg].offset;
-					state->stack_size = cfa->offset;
+					cfa->offset = -cfi->vals[op->src.reg].offset;
+					cfi->stack_size = cfa->offset;
 
 				} else {
 					cfa->base = CFI_UNDEFINED;
@@ -1647,7 +1811,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 			if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) {
 
 				/* add imm, %rsp */
-				state->stack_size -= op->src.offset;
+				cfi->stack_size -= op->src.offset;
 				if (cfa->base == CFI_SP)
 					cfa->offset -= op->src.offset;
 				break;
@@ -1656,14 +1820,14 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 			if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
 
 				/* lea disp(%rbp), %rsp */
-				state->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+				cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset);
 				break;
 			}
 
 			if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
 
 				/* drap: lea disp(%rsp), %drap */
-				state->drap_reg = op->dest.reg;
+				cfi->drap_reg = op->dest.reg;
 
 				/*
 				 * lea disp(%rsp), %reg
@@ -1675,25 +1839,25 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 				 *   ...
 				 *   mov    %rcx, %rsp
 				 */
-				state->vals[op->dest.reg].base = CFI_CFA;
-				state->vals[op->dest.reg].offset = \
-					-state->stack_size + op->src.offset;
+				cfi->vals[op->dest.reg].base = CFI_CFA;
+				cfi->vals[op->dest.reg].offset = \
+					-cfi->stack_size + op->src.offset;
 
 				break;
 			}
 
-			if (state->drap && op->dest.reg == CFI_SP &&
-			    op->src.reg == state->drap_reg) {
+			if (cfi->drap && op->dest.reg == CFI_SP &&
+			    op->src.reg == cfi->drap_reg) {
 
 				 /* drap: lea disp(%drap), %rsp */
 				cfa->base = CFI_SP;
-				cfa->offset = state->stack_size = -op->src.offset;
-				state->drap_reg = CFI_UNDEFINED;
-				state->drap = false;
+				cfa->offset = cfi->stack_size = -op->src.offset;
+				cfi->drap_reg = CFI_UNDEFINED;
+				cfi->drap = false;
 				break;
 			}
 
-			if (op->dest.reg == state->cfa.base) {
+			if (op->dest.reg == cfi->cfa.base) {
 				WARN_FUNC("unsupported stack register modification",
 					  insn->sec, insn->offset);
 				return -1;
@@ -1703,18 +1867,18 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 
 		case OP_SRC_AND:
 			if (op->dest.reg != CFI_SP ||
-			    (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
-			    (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
+			    (cfi->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
+			    (cfi->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
 				WARN_FUNC("unsupported stack pointer realignment",
 					  insn->sec, insn->offset);
 				return -1;
 			}
 
-			if (state->drap_reg != CFI_UNDEFINED) {
+			if (cfi->drap_reg != CFI_UNDEFINED) {
 				/* drap: and imm, %rsp */
-				cfa->base = state->drap_reg;
-				cfa->offset = state->stack_size = 0;
-				state->drap = true;
+				cfa->base = cfi->drap_reg;
+				cfa->offset = cfi->stack_size = 0;
+				cfi->drap = true;
 			}
 
 			/*
@@ -1726,57 +1890,55 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 
 		case OP_SRC_POP:
 		case OP_SRC_POPF:
-			if (!state->drap && op->dest.type == OP_DEST_REG &&
-			    op->dest.reg == cfa->base) {
+			if (!cfi->drap && op->dest.reg == cfa->base) {
 
 				/* pop %rbp */
 				cfa->base = CFI_SP;
 			}
 
-			if (state->drap && cfa->base == CFI_BP_INDIRECT &&
-			    op->dest.type == OP_DEST_REG &&
-			    op->dest.reg == state->drap_reg &&
-			    state->drap_offset == -state->stack_size) {
+			if (cfi->drap && cfa->base == CFI_BP_INDIRECT &&
+			    op->dest.reg == cfi->drap_reg &&
+			    cfi->drap_offset == -cfi->stack_size) {
 
 				/* drap: pop %drap */
-				cfa->base = state->drap_reg;
+				cfa->base = cfi->drap_reg;
 				cfa->offset = 0;
-				state->drap_offset = -1;
+				cfi->drap_offset = -1;
 
-			} else if (regs[op->dest.reg].offset == -state->stack_size) {
+			} else if (regs[op->dest.reg].offset == -cfi->stack_size) {
 
 				/* pop %reg */
-				restore_reg(state, op->dest.reg);
+				restore_reg(cfi, op->dest.reg);
 			}
 
-			state->stack_size -= 8;
+			cfi->stack_size -= 8;
 			if (cfa->base == CFI_SP)
 				cfa->offset -= 8;
 
 			break;
 
 		case OP_SRC_REG_INDIRECT:
-			if (state->drap && op->src.reg == CFI_BP &&
-			    op->src.offset == state->drap_offset) {
+			if (cfi->drap && op->src.reg == CFI_BP &&
+			    op->src.offset == cfi->drap_offset) {
 
 				/* drap: mov disp(%rbp), %drap */
-				cfa->base = state->drap_reg;
+				cfa->base = cfi->drap_reg;
 				cfa->offset = 0;
-				state->drap_offset = -1;
+				cfi->drap_offset = -1;
 			}
 
-			if (state->drap && op->src.reg == CFI_BP &&
+			if (cfi->drap && op->src.reg == CFI_BP &&
 			    op->src.offset == regs[op->dest.reg].offset) {
 
 				/* drap: mov disp(%rbp), %reg */
-				restore_reg(state, op->dest.reg);
+				restore_reg(cfi, op->dest.reg);
 
 			} else if (op->src.reg == cfa->base &&
 			    op->src.offset == regs[op->dest.reg].offset + cfa->offset) {
 
 				/* mov disp(%rbp), %reg */
 				/* mov disp(%rsp), %reg */
-				restore_reg(state, op->dest.reg);
+				restore_reg(cfi, op->dest.reg);
 			}
 
 			break;
@@ -1791,78 +1953,78 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 
 	case OP_DEST_PUSH:
 	case OP_DEST_PUSHF:
-		state->stack_size += 8;
+		cfi->stack_size += 8;
 		if (cfa->base == CFI_SP)
 			cfa->offset += 8;
 
 		if (op->src.type != OP_SRC_REG)
 			break;
 
-		if (state->drap) {
-			if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+		if (cfi->drap) {
+			if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) {
 
 				/* drap: push %drap */
 				cfa->base = CFI_BP_INDIRECT;
-				cfa->offset = -state->stack_size;
+				cfa->offset = -cfi->stack_size;
 
 				/* save drap so we know when to restore it */
-				state->drap_offset = -state->stack_size;
+				cfi->drap_offset = -cfi->stack_size;
 
-			} else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) {
+			} else if (op->src.reg == CFI_BP && cfa->base == cfi->drap_reg) {
 
 				/* drap: push %rbp */
-				state->stack_size = 0;
+				cfi->stack_size = 0;
 
 			} else if (regs[op->src.reg].base == CFI_UNDEFINED) {
 
 				/* drap: push %reg */
-				save_reg(state, op->src.reg, CFI_BP, -state->stack_size);
+				save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size);
 			}
 
 		} else {
 
 			/* push %reg */
-			save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+			save_reg(cfi, op->src.reg, CFI_CFA, -cfi->stack_size);
 		}
 
 		/* detect when asm code uses rbp as a scratch register */
 		if (!no_fp && insn->func && op->src.reg == CFI_BP &&
 		    cfa->base != CFI_BP)
-			state->bp_scratch = true;
+			cfi->bp_scratch = true;
 		break;
 
 	case OP_DEST_REG_INDIRECT:
 
-		if (state->drap) {
-			if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+		if (cfi->drap) {
+			if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) {
 
 				/* drap: mov %drap, disp(%rbp) */
 				cfa->base = CFI_BP_INDIRECT;
 				cfa->offset = op->dest.offset;
 
 				/* save drap offset so we know when to restore it */
-				state->drap_offset = op->dest.offset;
+				cfi->drap_offset = op->dest.offset;
 			}
 
 			else if (regs[op->src.reg].base == CFI_UNDEFINED) {
 
 				/* drap: mov reg, disp(%rbp) */
-				save_reg(state, op->src.reg, CFI_BP, op->dest.offset);
+				save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset);
 			}
 
 		} else if (op->dest.reg == cfa->base) {
 
 			/* mov reg, disp(%rbp) */
 			/* mov reg, disp(%rsp) */
-			save_reg(state, op->src.reg, CFI_CFA,
-				 op->dest.offset - state->cfa.offset);
+			save_reg(cfi, op->src.reg, CFI_CFA,
+				 op->dest.offset - cfi->cfa.offset);
 		}
 
 		break;
 
 	case OP_DEST_LEAVE:
-		if ((!state->drap && cfa->base != CFI_BP) ||
-		    (state->drap && cfa->base != state->drap_reg)) {
+		if ((!cfi->drap && cfa->base != CFI_BP) ||
+		    (cfi->drap && cfa->base != cfi->drap_reg)) {
 			WARN_FUNC("leave instruction with modified stack frame",
 				  insn->sec, insn->offset);
 			return -1;
@@ -1870,10 +2032,10 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 
 		/* leave (mov %rbp, %rsp; pop %rbp) */
 
-		state->stack_size = -state->regs[CFI_BP].offset - 8;
-		restore_reg(state, CFI_BP);
+		cfi->stack_size = -cfi->regs[CFI_BP].offset - 8;
+		restore_reg(cfi, CFI_BP);
 
-		if (!state->drap) {
+		if (!cfi->drap) {
 			cfa->base = CFI_SP;
 			cfa->offset -= 8;
 		}
@@ -1888,7 +2050,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 		}
 
 		/* pop mem */
-		state->stack_size -= 8;
+		cfi->stack_size -= 8;
 		if (cfa->base == CFI_SP)
 			cfa->offset -= 8;
 
@@ -1903,41 +2065,86 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 	return 0;
 }
 
-static bool insn_state_match(struct instruction *insn, struct insn_state *state)
+static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
+{
+	struct stack_op *op;
+
+	list_for_each_entry(op, &insn->stack_ops, list) {
+		struct cfi_state old_cfi = state->cfi;
+		int res;
+
+		res = update_cfi_state(insn, &state->cfi, op);
+		if (res)
+			return res;
+
+		if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) {
+			WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
+			return -1;
+		}
+
+		if (op->dest.type == OP_DEST_PUSHF) {
+			if (!state->uaccess_stack) {
+				state->uaccess_stack = 1;
+			} else if (state->uaccess_stack >> 31) {
+				WARN_FUNC("PUSHF stack exhausted",
+					  insn->sec, insn->offset);
+				return 1;
+			}
+			state->uaccess_stack <<= 1;
+			state->uaccess_stack  |= state->uaccess;
+		}
+
+		if (op->src.type == OP_SRC_POPF) {
+			if (state->uaccess_stack) {
+				state->uaccess = state->uaccess_stack & 1;
+				state->uaccess_stack >>= 1;
+				if (state->uaccess_stack == 1)
+					state->uaccess_stack = 0;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2)
 {
-	struct insn_state *state1 = &insn->state, *state2 = state;
+	struct cfi_state *cfi1 = &insn->cfi;
 	int i;
 
-	if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) {
+	if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) {
+
 		WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
 			  insn->sec, insn->offset,
-			  state1->cfa.base, state1->cfa.offset,
-			  state2->cfa.base, state2->cfa.offset);
+			  cfi1->cfa.base, cfi1->cfa.offset,
+			  cfi2->cfa.base, cfi2->cfa.offset);
 
-	} else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) {
+	} else if (memcmp(&cfi1->regs, &cfi2->regs, sizeof(cfi1->regs))) {
 		for (i = 0; i < CFI_NUM_REGS; i++) {
-			if (!memcmp(&state1->regs[i], &state2->regs[i],
+			if (!memcmp(&cfi1->regs[i], &cfi2->regs[i],
 				    sizeof(struct cfi_reg)))
 				continue;
 
 			WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
 				  insn->sec, insn->offset,
-				  i, state1->regs[i].base, state1->regs[i].offset,
-				  i, state2->regs[i].base, state2->regs[i].offset);
+				  i, cfi1->regs[i].base, cfi1->regs[i].offset,
+				  i, cfi2->regs[i].base, cfi2->regs[i].offset);
 			break;
 		}
 
-	} else if (state1->type != state2->type) {
+	} else if (cfi1->type != cfi2->type) {
+
 		WARN_FUNC("stack state mismatch: type1=%d type2=%d",
-			  insn->sec, insn->offset, state1->type, state2->type);
+			  insn->sec, insn->offset, cfi1->type, cfi2->type);
+
+	} else if (cfi1->drap != cfi2->drap ||
+		   (cfi1->drap && cfi1->drap_reg != cfi2->drap_reg) ||
+		   (cfi1->drap && cfi1->drap_offset != cfi2->drap_offset)) {
 
-	} else if (state1->drap != state2->drap ||
-		 (state1->drap && state1->drap_reg != state2->drap_reg) ||
-		 (state1->drap && state1->drap_offset != state2->drap_offset)) {
 		WARN_FUNC("stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)",
 			  insn->sec, insn->offset,
-			  state1->drap, state1->drap_reg, state1->drap_offset,
-			  state2->drap, state2->drap_reg, state2->drap_offset);
+			  cfi1->drap, cfi1->drap_reg, cfi1->drap_offset,
+			  cfi2->drap, cfi2->drap_reg, cfi2->drap_offset);
 
 	} else
 		return true;
@@ -1963,6 +2170,13 @@ static inline const char *call_dest_name(struct instruction *insn)
 
 static int validate_call(struct instruction *insn, struct insn_state *state)
 {
+	if (state->noinstr && state->instr <= 0 &&
+	    (!insn->call_dest || !insn->call_dest->sec->noinstr)) {
+		WARN_FUNC("call to %s() leaves .noinstr.text section",
+				insn->sec, insn->offset, call_dest_name(insn));
+		return 1;
+	}
+
 	if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
 		WARN_FUNC("call to %s() with UACCESS enabled",
 				insn->sec, insn->offset, call_dest_name(insn));
@@ -1980,7 +2194,7 @@ static int validate_call(struct instruction *insn, struct insn_state *state)
 
 static int validate_sibling_call(struct instruction *insn, struct insn_state *state)
 {
-	if (has_modified_stack_frame(state)) {
+	if (has_modified_stack_frame(insn, state)) {
 		WARN_FUNC("sibling call from callable instruction with modified stack frame",
 				insn->sec, insn->offset);
 		return 1;
@@ -1991,6 +2205,12 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st
 
 static int validate_return(struct symbol *func, struct instruction *insn, struct insn_state *state)
 {
+	if (state->noinstr && state->instr > 0) {
+		WARN_FUNC("return with instrumentation enabled",
+			  insn->sec, insn->offset);
+		return 1;
+	}
+
 	if (state->uaccess && !func_uaccess_safe(func)) {
 		WARN_FUNC("return with UACCESS enabled",
 			  insn->sec, insn->offset);
@@ -2009,13 +2229,13 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct
 		return 1;
 	}
 
-	if (func && has_modified_stack_frame(state)) {
+	if (func && has_modified_stack_frame(insn, state)) {
 		WARN_FUNC("return with modified stack frame",
 			  insn->sec, insn->offset);
 		return 1;
 	}
 
-	if (state->bp_scratch) {
+	if (state->cfi.bp_scratch) {
 		WARN_FUNC("BP used as a scratch register",
 			  insn->sec, insn->offset);
 		return 1;
@@ -2025,29 +2245,46 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct
 }
 
 /*
+ * Alternatives should not contain any ORC entries, this in turn means they
+ * should not contain any CFI ops, which implies all instructions should have
+ * the same same CFI state.
+ *
+ * It is possible to constuct alternatives that have unreachable holes that go
+ * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
+ * states which then results in ORC entries, which we just said we didn't want.
+ *
+ * Avoid them by copying the CFI entry of the first instruction into the whole
+ * alternative.
+ */
+static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
+{
+	struct instruction *first_insn = insn;
+	int alt_group = insn->alt_group;
+
+	sec_for_each_insn_continue(file, insn) {
+		if (insn->alt_group != alt_group)
+			break;
+		insn->cfi = first_insn->cfi;
+	}
+}
+
+/*
  * Follow the branch starting at the given instruction, and recursively follow
  * any other branches (jumps).  Meanwhile, track the frame pointer state at
  * each instruction and validate all the rules described in
  * tools/objtool/Documentation/stack-validation.txt.
  */
 static int validate_branch(struct objtool_file *file, struct symbol *func,
-			   struct instruction *first, struct insn_state state)
+			   struct instruction *insn, struct insn_state state)
 {
 	struct alternative *alt;
-	struct instruction *insn, *next_insn;
+	struct instruction *next_insn;
 	struct section *sec;
 	u8 visited;
 	int ret;
 
-	insn = first;
 	sec = insn->sec;
 
-	if (insn->alt_group && list_empty(&insn->alts)) {
-		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
-			  sec, insn->offset);
-		return 1;
-	}
-
 	while (1) {
 		next_insn = next_insn_same_sec(file, insn);
 
@@ -2065,59 +2302,24 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 
 		visited = 1 << state.uaccess;
 		if (insn->visited) {
-			if (!insn->hint && !insn_state_match(insn, &state))
+			if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
 				return 1;
 
 			if (insn->visited & visited)
 				return 0;
 		}
 
-		if (insn->hint) {
-			if (insn->restore) {
-				struct instruction *save_insn, *i;
-
-				i = insn;
-				save_insn = NULL;
-				sym_for_each_insn_continue_reverse(file, func, i) {
-					if (i->save) {
-						save_insn = i;
-						break;
-					}
-				}
-
-				if (!save_insn) {
-					WARN_FUNC("no corresponding CFI save for CFI restore",
-						  sec, insn->offset);
-					return 1;
-				}
-
-				if (!save_insn->visited) {
-					/*
-					 * Oops, no state to copy yet.
-					 * Hopefully we can reach this
-					 * instruction from another branch
-					 * after the save insn has been
-					 * visited.
-					 */
-					if (insn == first)
-						return 0;
-
-					WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
-						  sec, insn->offset);
-					return 1;
-				}
-
-				insn->state = save_insn->state;
-			}
-
-			state = insn->state;
+		if (state.noinstr)
+			state.instr += insn->instr;
 
-		} else
-			insn->state = state;
+		if (insn->hint)
+			state.cfi = insn->cfi;
+		else
+			insn->cfi = state.cfi;
 
 		insn->visited |= visited;
 
-		if (!insn->ignore_alts) {
+		if (!insn->ignore_alts && !list_empty(&insn->alts)) {
 			bool skip_orig = false;
 
 			list_for_each_entry(alt, &insn->alts, list) {
@@ -2132,10 +2334,16 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 				}
 			}
 
+			if (insn->alt_group)
+				fill_alternative_cfi(file, insn);
+
 			if (skip_orig)
 				return 0;
 		}
 
+		if (handle_insn_ops(insn, &state))
+			return 1;
+
 		switch (insn->type) {
 
 		case INSN_RETURN:
@@ -2202,32 +2410,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 			}
 			return 0;
 
-		case INSN_STACK:
-			if (update_insn_state(insn, &state))
-				return 1;
-
-			if (insn->stack_op.dest.type == OP_DEST_PUSHF) {
-				if (!state.uaccess_stack) {
-					state.uaccess_stack = 1;
-				} else if (state.uaccess_stack >> 31) {
-					WARN_FUNC("PUSHF stack exhausted", sec, insn->offset);
-					return 1;
-				}
-				state.uaccess_stack <<= 1;
-				state.uaccess_stack  |= state.uaccess;
-			}
-
-			if (insn->stack_op.src.type == OP_SRC_POPF) {
-				if (state.uaccess_stack) {
-					state.uaccess = state.uaccess_stack & 1;
-					state.uaccess_stack >>= 1;
-					if (state.uaccess_stack == 1)
-						state.uaccess_stack = 0;
-				}
-			}
-
-			break;
-
 		case INSN_STAC:
 			if (state.uaccess) {
 				WARN_FUNC("recursive UACCESS enable", sec, insn->offset);
@@ -2273,7 +2455,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 			return 0;
 
 		if (!next_insn) {
-			if (state.cfa.base == CFI_UNDEFINED)
+			if (state.cfi.cfa.base == CFI_UNDEFINED)
 				return 0;
 			WARN("%s: unexpected end of section", sec->name);
 			return 1;
@@ -2285,24 +2467,34 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 	return 0;
 }
 
-static int validate_unwind_hints(struct objtool_file *file)
+static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
 {
 	struct instruction *insn;
-	int ret, warnings = 0;
 	struct insn_state state;
+	int ret, warnings = 0;
 
 	if (!file->hints)
 		return 0;
 
-	clear_insn_state(&state);
+	init_insn_state(&state, sec);
 
-	for_each_insn(file, insn) {
+	if (sec) {
+		insn = find_insn(file, sec, 0);
+		if (!insn)
+			return 0;
+	} else {
+		insn = list_first_entry(&file->insn_list, typeof(*insn), list);
+	}
+
+	while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
 		if (insn->hint && !insn->visited) {
 			ret = validate_branch(file, insn->func, insn, state);
 			if (ret && backtrace)
 				BT_FUNC("<=== (hint)", insn);
 			warnings += ret;
 		}
+
+		insn = list_next_entry(insn, list);
 	}
 
 	return warnings;
@@ -2417,43 +2609,69 @@ static bool ignore_unreachable_insn(struct instruction *insn)
 	return false;
 }
 
-static int validate_section(struct objtool_file *file, struct section *sec)
+static int validate_symbol(struct objtool_file *file, struct section *sec,
+			   struct symbol *sym, struct insn_state *state)
 {
-	struct symbol *func;
 	struct instruction *insn;
-	struct insn_state state;
-	int ret, warnings = 0;
+	int ret;
+
+	if (!sym->len) {
+		WARN("%s() is missing an ELF size annotation", sym->name);
+		return 1;
+	}
+
+	if (sym->pfunc != sym || sym->alias != sym)
+		return 0;
 
-	clear_insn_state(&state);
+	insn = find_insn(file, sec, sym->offset);
+	if (!insn || insn->ignore || insn->visited)
+		return 0;
+
+	state->uaccess = sym->uaccess_safe;
+
+	ret = validate_branch(file, insn->func, insn, *state);
+	if (ret && backtrace)
+		BT_FUNC("<=== (sym)", insn);
+	return ret;
+}
 
-	state.cfa = initial_func_cfi.cfa;
-	memcpy(&state.regs, &initial_func_cfi.regs,
-	       CFI_NUM_REGS * sizeof(struct cfi_reg));
-	state.stack_size = initial_func_cfi.cfa.offset;
+static int validate_section(struct objtool_file *file, struct section *sec)
+{
+	struct insn_state state;
+	struct symbol *func;
+	int warnings = 0;
 
 	list_for_each_entry(func, &sec->symbol_list, list) {
 		if (func->type != STT_FUNC)
 			continue;
 
-		if (!func->len) {
-			WARN("%s() is missing an ELF size annotation",
-			     func->name);
-			warnings++;
-		}
+		init_insn_state(&state, sec);
+		state.cfi.cfa = initial_func_cfi.cfa;
+		memcpy(&state.cfi.regs, &initial_func_cfi.regs,
+		       CFI_NUM_REGS * sizeof(struct cfi_reg));
+		state.cfi.stack_size = initial_func_cfi.cfa.offset;
 
-		if (func->pfunc != func || func->alias != func)
-			continue;
+		warnings += validate_symbol(file, sec, func, &state);
+	}
 
-		insn = find_insn(file, sec, func->offset);
-		if (!insn || insn->ignore || insn->visited)
-			continue;
+	return warnings;
+}
 
-		state.uaccess = func->uaccess_safe;
+static int validate_vmlinux_functions(struct objtool_file *file)
+{
+	struct section *sec;
+	int warnings = 0;
 
-		ret = validate_branch(file, func, insn, state);
-		if (ret && backtrace)
-			BT_FUNC("<=== (func)", insn);
-		warnings += ret;
+	sec = find_section_by_name(file->elf, ".noinstr.text");
+	if (sec) {
+		warnings += validate_section(file, sec);
+		warnings += validate_unwind_hints(file, sec);
+	}
+
+	sec = find_section_by_name(file->elf, ".entry.text");
+	if (sec) {
+		warnings += validate_section(file, sec);
+		warnings += validate_unwind_hints(file, sec);
 	}
 
 	return warnings;
@@ -2464,8 +2682,12 @@ static int validate_functions(struct objtool_file *file)
 	struct section *sec;
 	int warnings = 0;
 
-	for_each_sec(file, sec)
+	for_each_sec(file, sec) {
+		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+			continue;
+
 		warnings += validate_section(file, sec);
+	}
 
 	return warnings;
 }
@@ -2496,7 +2718,7 @@ int check(const char *_objname, bool orc)
 
 	objname = _objname;
 
-	file.elf = elf_read(objname, orc ? O_RDWR : O_RDONLY);
+	file.elf = elf_open_read(objname, orc ? O_RDWR : O_RDONLY);
 	if (!file.elf)
 		return 1;
 
@@ -2516,6 +2738,15 @@ int check(const char *_objname, bool orc)
 	if (list_empty(&file.insn_list))
 		goto out;
 
+	if (vmlinux && !validate_dup) {
+		ret = validate_vmlinux_functions(&file);
+		if (ret < 0)
+			goto out;
+
+		warnings += ret;
+		goto out;
+	}
+
 	if (retpoline) {
 		ret = validate_retpoline(&file);
 		if (ret < 0)
@@ -2528,7 +2759,7 @@ int check(const char *_objname, bool orc)
 		goto out;
 	warnings += ret;
 
-	ret = validate_unwind_hints(&file);
+	ret = validate_unwind_hints(&file, NULL);
 	if (ret < 0)
 		goto out;
 	warnings += ret;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index f0ce8ffe7135..906b5210f7ca 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -7,22 +7,16 @@
 #define _CHECK_H
 
 #include <stdbool.h>
-#include "elf.h"
 #include "cfi.h"
 #include "arch.h"
-#include "orc.h"
-#include <linux/hashtable.h>
 
 struct insn_state {
-	struct cfi_reg cfa;
-	struct cfi_reg regs[CFI_NUM_REGS];
-	int stack_size;
-	unsigned char type;
-	bool bp_scratch;
-	bool drap, end, uaccess, df;
+	struct cfi_state cfi;
 	unsigned int uaccess_stack;
-	int drap_reg, drap_offset;
-	struct cfi_reg vals[CFI_NUM_REGS];
+	bool uaccess;
+	bool df;
+	bool noinstr;
+	s8 instr;
 };
 
 struct instruction {
@@ -33,29 +27,24 @@ struct instruction {
 	unsigned int len;
 	enum insn_type type;
 	unsigned long immediate;
-	bool alt_group, dead_end, ignore, hint, save, restore, ignore_alts;
+	bool dead_end, ignore, ignore_alts;
+	bool hint;
 	bool retpoline_safe;
+	s8 instr;
 	u8 visited;
+	u8 ret_offset;
+	int alt_group;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
 	struct rela *jump_table;
 	struct list_head alts;
 	struct symbol *func;
-	struct stack_op stack_op;
-	struct insn_state state;
+	struct list_head stack_ops;
+	struct cfi_state cfi;
 	struct orc_entry orc;
 };
 
-struct objtool_file {
-	struct elf *elf;
-	struct list_head insn_list;
-	DECLARE_HASHTABLE(insn_hash, 20);
-	bool ignore_unreachables, c_file, hints, rodata;
-};
-
-int check(const char *objname, bool orc);
-
 struct instruction *find_insn(struct objtool_file *file,
 			      struct section *sec, unsigned long offset);
 
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index c4857fa3f1d1..84225679f96d 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -27,6 +27,22 @@ static inline u32 str_hash(const char *str)
 	return jhash(str, strlen(str), 0);
 }
 
+static inline int elf_hash_bits(void)
+{
+	return vmlinux ? ELF_HASH_BITS : 16;
+}
+
+#define elf_hash_add(hashtable, node, key) \
+	hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())])
+
+static void elf_hash_init(struct hlist_head *table)
+{
+	__hash_init(table, 1U << elf_hash_bits());
+}
+
+#define elf_hash_for_each_possible(name, obj, member, key)			\
+	hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member)
+
 static void rb_add(struct rb_root *tree, struct rb_node *node,
 		   int (*cmp)(struct rb_node *, const struct rb_node *))
 {
@@ -45,7 +61,7 @@ static void rb_add(struct rb_root *tree, struct rb_node *node,
 	rb_insert_color(node, tree);
 }
 
-static struct rb_node *rb_find_first(struct rb_root *tree, const void *key,
+static struct rb_node *rb_find_first(const struct rb_root *tree, const void *key,
 			       int (*cmp)(const void *key, const struct rb_node *))
 {
 	struct rb_node *node = tree->rb_node;
@@ -111,11 +127,11 @@ static int symbol_by_offset(const void *key, const struct rb_node *node)
 	return 0;
 }
 
-struct section *find_section_by_name(struct elf *elf, const char *name)
+struct section *find_section_by_name(const struct elf *elf, const char *name)
 {
 	struct section *sec;
 
-	hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
+	elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
 		if (!strcmp(sec->name, name))
 			return sec;
 
@@ -127,7 +143,7 @@ static struct section *find_section_by_index(struct elf *elf,
 {
 	struct section *sec;
 
-	hash_for_each_possible(elf->section_hash, sec, hash, idx)
+	elf_hash_for_each_possible(elf->section_hash, sec, hash, idx)
 		if (sec->idx == idx)
 			return sec;
 
@@ -138,7 +154,7 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
 {
 	struct symbol *sym;
 
-	hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+	elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
 		if (sym->idx == idx)
 			return sym;
 
@@ -173,7 +189,7 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
 	return NULL;
 }
 
-struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset)
 {
 	struct rb_node *node;
 
@@ -201,18 +217,18 @@ struct symbol *find_func_containing(struct section *sec, unsigned long offset)
 	return NULL;
 }
 
-struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
+struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
 {
 	struct symbol *sym;
 
-	hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+	elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
 		if (!strcmp(sym->name, name))
 			return sym;
 
 	return NULL;
 }
 
-struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec,
+struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec,
 				     unsigned long offset, unsigned int len)
 {
 	struct rela *rela, *r = NULL;
@@ -224,7 +240,7 @@ struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec,
 	sec = sec->rela;
 
 	for_offset_range(o, offset, offset + len) {
-		hash_for_each_possible(elf->rela_hash, rela, hash,
+		elf_hash_for_each_possible(elf->rela_hash, rela, hash,
 				       sec_offset_hash(sec, o)) {
 			if (rela->sec != sec)
 				continue;
@@ -241,7 +257,7 @@ struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec,
 	return NULL;
 }
 
-struct rela *find_rela_by_dest(struct elf *elf, struct section *sec, unsigned long offset)
+struct rela *find_rela_by_dest(const struct elf *elf, struct section *sec, unsigned long offset)
 {
 	return find_rela_by_dest_range(elf, sec, offset, 1);
 }
@@ -309,8 +325,8 @@ static int read_sections(struct elf *elf)
 		sec->len = sec->sh.sh_size;
 
 		list_add_tail(&sec->list, &elf->sections);
-		hash_add(elf->section_hash, &sec->hash, sec->idx);
-		hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+		elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
+		elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
 	}
 
 	if (stats)
@@ -327,12 +343,14 @@ static int read_sections(struct elf *elf)
 
 static int read_symbols(struct elf *elf)
 {
-	struct section *symtab, *sec;
+	struct section *symtab, *symtab_shndx, *sec;
 	struct symbol *sym, *pfunc;
 	struct list_head *entry;
 	struct rb_node *pnode;
 	int symbols_nr, i;
 	char *coldstr;
+	Elf_Data *shndx_data = NULL;
+	Elf32_Word shndx;
 
 	symtab = find_section_by_name(elf, ".symtab");
 	if (!symtab) {
@@ -340,6 +358,10 @@ static int read_symbols(struct elf *elf)
 		return -1;
 	}
 
+	symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+	if (symtab_shndx)
+		shndx_data = symtab_shndx->data;
+
 	symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
 
 	for (i = 0; i < symbols_nr; i++) {
@@ -353,8 +375,9 @@ static int read_symbols(struct elf *elf)
 
 		sym->idx = i;
 
-		if (!gelf_getsym(symtab->data, i, &sym->sym)) {
-			WARN_ELF("gelf_getsym");
+		if (!gelf_getsymshndx(symtab->data, shndx_data, i, &sym->sym,
+				      &shndx)) {
+			WARN_ELF("gelf_getsymshndx");
 			goto err;
 		}
 
@@ -368,10 +391,13 @@ static int read_symbols(struct elf *elf)
 		sym->type = GELF_ST_TYPE(sym->sym.st_info);
 		sym->bind = GELF_ST_BIND(sym->sym.st_info);
 
-		if (sym->sym.st_shndx > SHN_UNDEF &&
-		    sym->sym.st_shndx < SHN_LORESERVE) {
-			sym->sec = find_section_by_index(elf,
-							 sym->sym.st_shndx);
+		if ((sym->sym.st_shndx > SHN_UNDEF &&
+		     sym->sym.st_shndx < SHN_LORESERVE) ||
+		    (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
+			if (sym->sym.st_shndx != SHN_XINDEX)
+				shndx = sym->sym.st_shndx;
+
+			sym->sec = find_section_by_index(elf, shndx);
 			if (!sym->sec) {
 				WARN("couldn't find section for symbol %s",
 				     sym->name);
@@ -394,8 +420,8 @@ static int read_symbols(struct elf *elf)
 		else
 			entry = &sym->sec->symbol_list;
 		list_add(&sym->list, entry);
-		hash_add(elf->symbol_hash, &sym->hash, sym->idx);
-		hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+		elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+		elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
 	}
 
 	if (stats)
@@ -456,6 +482,14 @@ err:
 	return -1;
 }
 
+void elf_add_rela(struct elf *elf, struct rela *rela)
+{
+	struct section *sec = rela->sec;
+
+	list_add_tail(&rela->list, &sec->rela_list);
+	elf_hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+}
+
 static int read_relas(struct elf *elf)
 {
 	struct section *sec;
@@ -503,8 +537,7 @@ static int read_relas(struct elf *elf)
 				return -1;
 			}
 
-			list_add_tail(&rela->list, &sec->rela_list);
-			hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+			elf_add_rela(elf, rela);
 			nr_rela++;
 		}
 		max_rela = max(max_rela, nr_rela);
@@ -519,7 +552,7 @@ static int read_relas(struct elf *elf)
 	return 0;
 }
 
-struct elf *elf_read(const char *name, int flags)
+struct elf *elf_open_read(const char *name, int flags)
 {
 	struct elf *elf;
 	Elf_Cmd cmd;
@@ -531,15 +564,16 @@ struct elf *elf_read(const char *name, int flags)
 		perror("malloc");
 		return NULL;
 	}
-	memset(elf, 0, sizeof(*elf));
+	memset(elf, 0, offsetof(struct elf, sections));
 
-	hash_init(elf->symbol_hash);
-	hash_init(elf->symbol_name_hash);
-	hash_init(elf->section_hash);
-	hash_init(elf->section_name_hash);
-	hash_init(elf->rela_hash);
 	INIT_LIST_HEAD(&elf->sections);
 
+	elf_hash_init(elf->symbol_hash);
+	elf_hash_init(elf->symbol_name_hash);
+	elf_hash_init(elf->section_hash);
+	elf_hash_init(elf->section_name_hash);
+	elf_hash_init(elf->rela_hash);
+
 	elf->fd = open(name, flags);
 	if (elf->fd == -1) {
 		fprintf(stderr, "objtool: Can't open '%s': %s\n",
@@ -676,8 +710,8 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 	shstrtab->changed = true;
 
 	list_add_tail(&sec->list, &elf->sections);
-	hash_add(elf->section_hash, &sec->hash, sec->idx);
-	hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+	elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
+	elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
 
 	return sec;
 }
@@ -745,7 +779,7 @@ int elf_rebuild_rela_section(struct section *sec)
 	return 0;
 }
 
-int elf_write(struct elf *elf)
+int elf_write(const struct elf *elf)
 {
 	struct section *sec;
 	Elf_Scn *s;
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 12e01ac190ec..f4fe1d6ea392 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -39,7 +39,7 @@ struct section {
 	char *name;
 	int idx;
 	unsigned int len;
-	bool changed, text, rodata;
+	bool changed, text, rodata, noinstr;
 };
 
 struct symbol {
@@ -70,17 +70,19 @@ struct rela {
 	bool jump_table_start;
 };
 
+#define ELF_HASH_BITS	20
+
 struct elf {
 	Elf *elf;
 	GElf_Ehdr ehdr;
 	int fd;
 	char *name;
 	struct list_head sections;
-	DECLARE_HASHTABLE(symbol_hash, 20);
-	DECLARE_HASHTABLE(symbol_name_hash, 20);
-	DECLARE_HASHTABLE(section_hash, 16);
-	DECLARE_HASHTABLE(section_name_hash, 16);
-	DECLARE_HASHTABLE(rela_hash, 20);
+	DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS);
+	DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS);
+	DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS);
+	DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS);
+	DECLARE_HASHTABLE(rela_hash, ELF_HASH_BITS);
 };
 
 #define OFFSET_STRIDE_BITS	4
@@ -112,22 +114,23 @@ static inline u32 rela_hash(struct rela *rela)
 	return sec_offset_hash(rela->sec, rela->offset);
 }
 
-struct elf *elf_read(const char *name, int flags);
-struct section *find_section_by_name(struct elf *elf, const char *name);
+struct elf *elf_open_read(const char *name, int flags);
+struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr);
+struct section *elf_create_rela_section(struct elf *elf, struct section *base);
+void elf_add_rela(struct elf *elf, struct rela *rela);
+int elf_write(const struct elf *elf);
+void elf_close(struct elf *elf);
+
+struct section *find_section_by_name(const struct elf *elf, const char *name);
 struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
-struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
-struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
-struct rela *find_rela_by_dest(struct elf *elf, struct section *sec, unsigned long offset);
-struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec,
+struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
+struct rela *find_rela_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
+struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec,
 				     unsigned long offset, unsigned int len);
 struct symbol *find_func_containing(struct section *sec, unsigned long offset);
-struct section *elf_create_section(struct elf *elf, const char *name, size_t
-				   entsize, int nr);
-struct section *elf_create_rela_section(struct elf *elf, struct section *base);
 int elf_rebuild_rela_section(struct section *sec);
-int elf_write(struct elf *elf);
-void elf_close(struct elf *elf);
 
 #define for_each_sec(file, sec)						\
 	list_for_each_entry(sec, &file->elf->sections, list)
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 0b3528f05053..58fdda510653 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -58,7 +58,9 @@ static void cmd_usage(void)
 
 	printf("\n");
 
-	exit(129);
+	if (!help)
+		exit(129);
+	exit(0);
 }
 
 static void handle_options(int *argc, const char ***argv)
diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h
new file mode 100644
index 000000000000..528028a66816
--- /dev/null
+++ b/tools/objtool/objtool.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 Matt Helsley <mhelsley@vmware.com>
+ */
+
+#ifndef _OBJTOOL_H
+#define _OBJTOOL_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+#include <linux/hashtable.h>
+
+#include "elf.h"
+
+struct objtool_file {
+	struct elf *elf;
+	struct list_head insn_list;
+	DECLARE_HASHTABLE(insn_hash, 20);
+	bool ignore_unreachables, c_file, hints, rodata;
+};
+
+int check(const char *objname, bool orc);
+int orc_dump(const char *objname);
+int create_orc(struct objtool_file *file);
+int create_orc_sections(struct objtool_file *file);
+
+#endif /* _OBJTOOL_H */
diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h
deleted file mode 100644
index ee2832221e62..000000000000
--- a/tools/objtool/orc.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- */
-
-#ifndef _ORC_H
-#define _ORC_H
-
-#include <asm/orc_types.h>
-
-struct objtool_file;
-
-int create_orc(struct objtool_file *file);
-int create_orc_sections(struct objtool_file *file);
-
-int orc_dump(const char *objname);
-
-#endif /* _ORC_H */
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index ba4cbb1cdd63..fca46e006fc2 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -4,7 +4,8 @@
  */
 
 #include <unistd.h>
-#include "orc.h"
+#include <asm/orc_types.h>
+#include "objtool.h"
 #include "warn.h"
 
 static const char *reg_name(unsigned int reg)
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 4c0dabd28000..c9549988121a 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -6,7 +6,6 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "orc.h"
 #include "check.h"
 #include "warn.h"
 
@@ -16,10 +15,10 @@ int create_orc(struct objtool_file *file)
 
 	for_each_insn(file, insn) {
 		struct orc_entry *orc = &insn->orc;
-		struct cfi_reg *cfa = &insn->state.cfa;
-		struct cfi_reg *bp = &insn->state.regs[CFI_BP];
+		struct cfi_reg *cfa = &insn->cfi.cfa;
+		struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
 
-		orc->end = insn->state.end;
+		orc->end = insn->cfi.end;
 
 		if (cfa->base == CFI_UNDEFINED) {
 			orc->sp_reg = ORC_REG_UNDEFINED;
@@ -75,7 +74,7 @@ int create_orc(struct objtool_file *file)
 
 		orc->sp_offset = cfa->offset;
 		orc->bp_offset = bp->offset;
-		orc->type = insn->state.type;
+		orc->type = insn->cfi.type;
 	}
 
 	return 0;
@@ -130,8 +129,7 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti
 	rela->offset = idx * sizeof(int);
 	rela->sec = ip_relasec;
 
-	list_add_tail(&rela->list, &ip_relasec->rela_list);
-	hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+	elf_add_rela(elf, rela);
 
 	return 0;
 }
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
new file mode 100644
index 000000000000..942ea5e8ac36
--- /dev/null
+++ b/tools/objtool/weak.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Matt Helsley <mhelsley@vmware.com>
+ * Weak definitions necessary to compile objtool without
+ * some subcommands (e.g. check, orc).
+ */
+
+#include <stdbool.h>
+#include <errno.h>
+#include "objtool.h"
+
+#define __weak __attribute__((weak))
+
+#define UNSUPPORTED(name)						\
+({									\
+	fprintf(stderr, "error: objtool: " name " not implemented\n");	\
+	return ENOSYS;							\
+})
+
+const char __weak *objname;
+
+int __weak check(const char *_objname, bool orc)
+{
+	UNSUPPORTED("check subcommand");
+}
+
+int __weak orc_dump(const char *_objname)
+{
+	UNSUPPORTED("orc");
+}
+
+int __weak create_orc(struct objtool_file *file)
+{
+	UNSUPPORTED("orc");
+}
+
+int __weak create_orc_sections(struct objtool_file *file)
+{
+	UNSUPPORTED("orc");
+}
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 31824d5269cc..6e54979c2124 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -48,7 +48,7 @@ man5dir=$(mandir)/man5
 man7dir=$(mandir)/man7
 
 ASCIIDOC=asciidoc
-ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
+ASCIIDOC_EXTRA += --unsafe -f asciidoc.conf
 ASCIIDOC_HTML = xhtml11
 MANPAGE_XSL = manpage-normal.xsl
 XMLTO_EXTRA =
@@ -59,7 +59,7 @@ HTML_REF = origin/html
 
 ifdef USE_ASCIIDOCTOR
 ASCIIDOC = asciidoctor
-ASCIIDOC_EXTRA = -a compat-mode
+ASCIIDOC_EXTRA += -a compat-mode
 ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
 ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual"
 ASCIIDOC_HTML = xhtml5
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index 82ff7dad40c2..271484754fee 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -10,7 +10,9 @@
 		e	synthesize error events
 		d	create a debug log
 		g	synthesize a call chain (use with i or x)
+		G	synthesize a call chain on existing event records
 		l	synthesize last branch entries (use with i or x)
+		L	synthesize last branch entries on existing event records
 		s       skip initial number of events
 
 	The default is all events i.e. the same as --itrace=ibxwpe,
@@ -31,6 +33,10 @@
 	Also the number of last branch entries (default 64, max. 1024) for
 	instructions or transactions events can be specified.
 
+	Similar to options g and l, size may also be specified for options G and L.
+	On x86, note that G and L work poorly when data has been recorded with
+	large PEBS. Refer linkperf:perf-intel-pt[1] man page for details.
+
 	It is also possible to skip events generated (instructions, branches, transactions,
 	ptwrite, power) at the beginning. This is useful to ignore initialization code.
 
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 0921a3c67381..bad16512c48d 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -61,6 +61,9 @@ SUBSYSTEM
 'epoll'::
 	Eventpoll (epoll) stressing benchmarks.
 
+'internals'::
+	Benchmark internal perf functionality.
+
 'all'::
 	All benchmark subsystems.
 
@@ -214,6 +217,11 @@ Suite for evaluating concurrent epoll_wait calls.
 *ctl*::
 Suite for evaluating multiple epoll_ctl calls.
 
+SUITES FOR 'internals'
+~~~~~~~~~~~~~~~~~~~~~~
+*synthesize*::
+Suite for evaluating perf's event synthesis performance.
+
 SEE ALSO
 --------
 linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index e6150f21267d..2133eb320cb0 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -111,6 +111,17 @@ REPORT OPTIONS
 --display::
 	Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
 
+--stitch-lbr::
+	Show callgraph with stitched LBRs, which may have more complete
+	callgraph. The perf.data file must have been obtained using
+	perf c2c record --call-graph lbr.
+	Disabled by default. In common cases with call stack overflows,
+	it can recreate better call stacks than the default lbr call stack
+	output. But this approach is not full proof. There can be cases
+	where it creates incorrect call stacks from incorrect matches.
+	The known limitations include exception handing such as
+	setjmp/longjmp will have calls/returns not match.
+
 C2C RECORD
 ----------
 The perf c2c record command setup options related to HITM cacheline analysis
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 456fdcbf26ac..eb8b7d42591a 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -69,22 +69,22 @@ And profiled with 'perf report' e.g.
 To also trace kernel space presents a problem, namely kernel self-modifying
 code.  A fairly good kernel image is available in /proc/kcore but to get an
 accurate image a copy of /proc/kcore needs to be made under the same conditions
-as the data capture.  A script perf-with-kcore can do that, but beware that the
-script makes use of 'sudo' to copy /proc/kcore.  If you have perf installed
-locally from the source tree you can do:
+as the data capture. 'perf record' can make a copy of /proc/kcore if the option
+--kcore is used, but access to /proc/kcore is restricted e.g.
 
-	~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
+	sudo perf record -o pt_ls --kcore -e intel_pt// -- ls
 
-which will create a directory named 'pt_ls' and put the perf.data file and
-copies of /proc/kcore, /proc/kallsyms and /proc/modules into it.  Then to use
-'perf report' becomes:
+which will create a directory named 'pt_ls' and put the perf.data file (named
+simply 'data') and copies of /proc/kcore, /proc/kallsyms and /proc/modules into
+it.  The other tools understand the directory format, so to use 'perf report'
+becomes:
 
-	~/libexec/perf-core/perf-with-kcore report pt_ls
+	sudo perf report -i pt_ls
 
 Because samples are synthesized after-the-fact, the sampling period can be
 selected for reporting. e.g. sample every microsecond
 
-	~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
+	sudo perf report pt_ls --itrace=i1usge
 
 See the sections below for more information about the --itrace option.
 
@@ -821,7 +821,9 @@ The letters are:
 	e	synthesize tracing error events
 	d	create a debug log
 	g	synthesize a call chain (use with i or x)
+	G	synthesize a call chain on existing event records
 	l	synthesize last branch entries (use with i or x)
+	L	synthesize last branch entries on existing event records
 	s	skip initial number of events
 
 "Instructions" events look like they were recorded by "perf record -e
@@ -912,6 +914,39 @@ transactions events can be specified. e.g.
 Note that last branch entries are cleared for each sample, so there is no overlap
 from one sample to the next.
 
+The G and L options are designed in particular for sample mode, and work much
+like g and l but add call chain and branch stack to the other selected events
+instead of synthesized events. For example, to record branch-misses events for
+'ls' and then add a call chain derived from the Intel PT trace:
+
+	perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' -- ls
+	perf report --itrace=Ge
+
+Although in fact G is a default for perf report, so that is the same as just:
+
+	perf report
+
+One caveat with the G and L options is that they work poorly with "Large PEBS".
+Large PEBS means PEBS records will be accumulated by hardware and the written
+into the event buffer in one go.  That reduces interrupts, but can give very
+late timestamps.  Because the Intel PT trace is synchronized by timestamps,
+the PEBS events do not match the trace.  Currently, Large PEBS is used only in
+certain circumstances:
+	- hardware supports it
+	- PEBS is used
+	- event period is specified, instead of frequency
+	- the sample type is limited to the following flags:
+		PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR |
+		PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID |
+		PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER |
+		PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR |
+		PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER |
+		PERF_SAMPLE_PERIOD (and sometimes) | PERF_SAMPLE_TIME
+Because Intel PT sample mode uses a different sample type to the list above,
+Large PEBS is not used with Intel PT sample mode. To avoid Large PEBS in other
+cases, avoid specifying the event period i.e. avoid the 'perf record' -c option,
+--count option, or 'period' config term.
+
 To disable trace decoding entirely, use the option --no-itrace.
 
 It is also possible to skip events generated (instructions, branches, transactions)
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 6345db33c533..376a50b3452d 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -115,6 +115,11 @@ raw encoding of 0x1A8 can be used:
  perf stat -e r1a8 -a sleep 1
  perf record -e r1a8 ...
 
+It's also possible to use pmu syntax:
+
+ perf record -e r1a8 -a sleep 1
+ perf record -e cpu/r1a8/ ...
+
 You should refer to the processor specific documentation for getting these
 details. Some of them are referenced in the SEE ALSO section below.
 
@@ -258,6 +263,9 @@ Normally all events in an event group sample, but with :S only
 the first event (the leader) samples, and it only reads the values of the
 other events in the group.
 
+However, in the case AUX area events (e.g. Intel PT or CoreSight), the AUX
+area event must be the leader, so then the second event samples, not the first.
+
 OPTIONS
 -------
 
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b3f3b3f1c161..561ef55743e2 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -556,6 +556,19 @@ overhead. You can still switch them on with:
 
   --switch-output --no-no-buildid  --no-no-buildid-cache
 
+--switch-output-event::
+Events that will cause the switch of the perf.data file, auto-selecting
+--switch-output=signal, the results are similar as internally the side band
+thread will also send a SIGUSR2 to the main one.
+
+Uses the same syntax as --event, it will just not be recorded, serving only to
+switch the perf.data file as soon as the --switch-output event is processed by
+a separate sideband thread.
+
+This sideband thread is also used to other purposes, like processing the
+PERF_RECORD_BPF_EVENT records as they happen, asking the kernel for extra BPF
+information, etc.
+
 --switch-max-files=N::
 
 When rotating perf.data with --switch-output, only keep N files.
@@ -596,6 +609,10 @@ Make a copy of /proc/kcore and place it into a directory with the perf data file
 Limit the sample data max size, <size> is expected to be a number with
 appended unit character - B/K/M/G
 
+--num-thread-synthesize::
+	The number of threads to run when synthesizing events for existing processes.
+	By default, the number of threads equals 1.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index f569b9ea4002..d068103690cc 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -488,6 +488,17 @@ include::itrace.txt[]
 	This option extends the perf report to show reference callgraphs,
 	which collected by reference event, in no callgraph event.
 
+--stitch-lbr::
+	Show callgraph with stitched LBRs, which may have more complete
+	callgraph. The perf.data file must have been obtained using
+	perf record --call-graph lbr.
+	Disabled by default. In common cases with call stack overflows,
+	it can recreate better call stacks than the default lbr call stack
+	output. But this approach is not full proof. There can be cases
+	where it creates incorrect call stacks from incorrect matches.
+	The known limitations include exception handing such as
+	setjmp/longjmp will have calls/returns not match.
+
 --socket-filter::
 	Only report the samples on the processor socket that match with this filter
 
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 963487e82edc..372dfd110e6d 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -440,6 +440,17 @@ include::itrace.txt[]
 --show-on-off-events::
 	Show the --switch-on/off events too.
 
+--stitch-lbr::
+	Show callgraph with stitched LBRs, which may have more complete
+	callgraph. The perf.data file must have been obtained using
+	perf record --call-graph lbr.
+	Disabled by default. In common cases with call stack overflows,
+	it can recreate better call stacks than the default lbr call stack
+	output. But this approach is not full proof. There can be cases
+	where it creates incorrect call stacks from incorrect matches.
+	The known limitations include exception handing such as
+	setjmp/longjmp will have calls/returns not match.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4d56586b2fb9..3fb5028aef08 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -176,6 +176,8 @@ Print count deltas every N milliseconds (minimum: 1ms)
 The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
 	example: 'perf stat -I 1000 -e cycles -a sleep 5'
 
+If the metric exists, it is calculated by the counts generated in this interval and the metric is printed after #.
+
 --interval-count times::
 Print count deltas for fixed number of times.
 This option should be used together with "-I" option.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 487737a725e9..20227dabc208 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -319,6 +319,15 @@ Default is to monitor all CPUS.
 	go straight to the histogram browser, just like 'perf top' with no events
 	explicitely specified does.
 
+--stitch-lbr::
+	Show callgraph with stitched LBRs, which may have more complete
+	callgraph. The option must be used with --call-graph lbr recording.
+	Disabled by default. In common cases with call stack overflows,
+	it can recreate better call stacks than the default lbr call stack
+	output. But this approach is not full proof. There can be cases
+	where it creates incorrect call stacks from incorrect matches.
+	The known limitations include exception handing such as
+	setjmp/longjmp will have calls/returns not match.
 
 INTERACTIVE PROMPTING KEYS
 --------------------------
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index b0152e1095c5..b6472e463284 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -373,6 +373,22 @@ struct {
 Indicates that trace contains records of PERF_RECORD_COMPRESSED type
 that have perf_events records in compressed form.
 
+	HEADER_CPU_PMU_CAPS = 28,
+
+	A list of cpu PMU capabilities. The format of data is as below.
+
+struct {
+	u32 nr_cpu_pmu_caps;
+	{
+		char	name[];
+		char	value[];
+	} [nr_cpu_pmu_caps]
+};
+
+
+Example:
+ cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake
+
 	other bits are reserved and should ignored for now
 	HEADER_FEAT_BITS	= 256,
 
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index d15a311408f1..94a495594e99 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -188,7 +188,7 @@ AWK     = awk
 # non-config cases
 config := 1
 
-NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
+NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help
 
 ifdef MAKECMDGOALS
 ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
@@ -832,7 +832,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
 
 # 'make doc' should call 'make -C Documentation all'
 $(DOC_TARGETS):
-	$(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all)
+	$(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all) ASCIIDOC_EXTRA=$(ASCIIDOC_EXTRA)
 
 TAG_FOLDERS= . ../lib ../include
 TAG_FILES= ../../include/uapi/linux/perf_event.h
@@ -959,7 +959,7 @@ install-python_ext:
 
 # 'make install-doc' should call 'make -C Documentation install'
 $(INSTALL_DOC_TARGETS):
-	$(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=)
+	$(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=) ASCIIDOC_EXTRA=$(ASCIIDOC_EXTRA)
 
 ### Cleaning rules
 
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 941f814820b8..97aa02c4491d 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -23,6 +23,7 @@
 #include "../../util/event.h"
 #include "../../util/evlist.h"
 #include "../../util/evsel.h"
+#include "../../util/perf_api_probe.h"
 #include "../../util/evsel_config.h"
 #include "../../util/pmu.h"
 #include "../../util/cs-etm.h"
@@ -232,7 +233,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
 		ret = perf_pmu__scan_file(pmu, path, "%x", &hash);
 		if (ret != 1) {
 			pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n",
-			       sink, perf_evsel__name(evsel), errno,
+			       sink, evsel__name(evsel), errno,
 			       str_error_r(errno, msg, sizeof(msg)));
 			return ret;
 		}
@@ -401,7 +402,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
 	 * when a context switch happened.
 	 */
 	if (!perf_cpu_map__empty(cpus)) {
-		perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
+		evsel__set_sample_bit(cs_etm_evsel, CPU);
 
 		err = cs_etm_set_option(itr, cs_etm_evsel,
 					ETM_OPT_CTXTID | ETM_OPT_TS);
@@ -425,7 +426,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
 
 		/* In per-cpu case, always need the time of mmap events etc */
 		if (!perf_cpu_map__empty(cpus))
-			perf_evsel__set_sample_bit(tracking_evsel, TIME);
+			evsel__set_sample_bit(tracking_evsel, TIME);
 	}
 
 out:
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 27653be24447..e3593063b3d1 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -120,9 +120,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
 	 */
 	perf_evlist__to_front(evlist, arm_spe_evsel);
 
-	perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
-	perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
-	perf_evsel__set_sample_bit(arm_spe_evsel, TID);
+	evsel__set_sample_bit(arm_spe_evsel, CPU);
+	evsel__set_sample_bit(arm_spe_evsel, TIME);
+	evsel__set_sample_bit(arm_spe_evsel, TID);
 
 	/* Add dummy event to keep tracking */
 	err = parse_events(evlist, "dummy:u", NULL);
@@ -134,9 +134,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
 
 	tracking_evsel->core.attr.freq = 0;
 	tracking_evsel->core.attr.sample_period = 1;
-	perf_evsel__set_sample_bit(tracking_evsel, TIME);
-	perf_evsel__set_sample_bit(tracking_evsel, CPU);
-	perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+	evsel__set_sample_bit(tracking_evsel, TIME);
+	evsel__set_sample_bit(tracking_evsel, CPU);
+	evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
 
 	return 0;
 }
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 3b4cdfc5efd6..d4870074f14c 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -7,6 +7,8 @@
 #include <string.h>
 #include <linux/stringify.h>
 #include "header.h"
+#include "metricgroup.h"
+#include <api/fs/fs.h>
 
 #define mfspr(rn)       ({unsigned long rval; \
 			 asm volatile("mfspr %0," __stringify(rn) \
@@ -44,3 +46,9 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
 
 	return bufp;
 }
+
+int arch_get_runtimeparam(void)
+{
+	int count;
+	return sysfs__read_int("/devices/hv_24x7/interface/sockets", &count) < 0 ? 1 : count;
+}
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c
index 16807269317c..eed9e5a42935 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -39,7 +39,7 @@ static void hcall_event_get_key(struct evsel *evsel,
 				struct event_key *key)
 {
 	key->info = 0;
-	key->key = perf_evsel__intval(evsel, sample, "req");
+	key->key = evsel__intval(evsel, sample, "req");
 }
 
 static const char *get_hcall_exit_reason(u64 exit_code)
diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c
index 0fd4e9f49ed0..34da89ced29a 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -30,7 +30,7 @@ static void event_icpt_insn_get_key(struct evsel *evsel,
 {
 	unsigned long insn;
 
-	insn = perf_evsel__intval(evsel, sample, "instruction");
+	insn = evsel__intval(evsel, sample, "instruction");
 	key->key = icpt_insn_decoder(insn);
 	key->exit_reasons = sie_icpt_insn_codes;
 }
@@ -39,7 +39,7 @@ static void event_sigp_get_key(struct evsel *evsel,
 			       struct perf_sample *sample,
 			       struct event_key *key)
 {
-	key->key = perf_evsel__intval(evsel, sample, "order_code");
+	key->key = evsel__intval(evsel, sample, "order_code");
 	key->exit_reasons = sie_sigp_order_codes;
 }
 
@@ -47,7 +47,7 @@ static void event_diag_get_key(struct evsel *evsel,
 			       struct perf_sample *sample,
 			       struct event_key *key)
 {
-	key->key = perf_evsel__intval(evsel, sample, "code");
+	key->key = evsel__intval(evsel, sample, "code");
 	key->exit_reasons = sie_diagnose_codes;
 }
 
@@ -55,7 +55,7 @@ static void event_icpt_prog_get_key(struct evsel *evsel,
 				    struct perf_sample *sample,
 				    struct event_key *key)
 {
-	key->key = perf_evsel__intval(evsel, sample, "code");
+	key->key = evsel__intval(evsel, sample, "code");
 	key->exit_reasons = sie_icpt_prog_codes;
 }
 
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 909ead08a6f6..026d32ed078e 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -130,13 +130,11 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 				goto next_event;
 
 			if (strcmp(event->comm.comm, comm1) == 0) {
-				CHECK__(perf_evsel__parse_sample(evsel, event,
-								 &sample));
+				CHECK__(evsel__parse_sample(evsel, event, &sample));
 				comm1_time = sample.time;
 			}
 			if (strcmp(event->comm.comm, comm2) == 0) {
-				CHECK__(perf_evsel__parse_sample(evsel, event,
-								 &sample));
+				CHECK__(evsel__parse_sample(evsel, event, &sample));
 				comm2_time = sample.time;
 			}
 next_event:
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 09f93800bffd..0dc09b5809c1 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
 		 * AUX event.
 		 */
 		if (!perf_cpu_map__empty(cpus))
-			perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
+			evsel__set_sample_bit(intel_bts_evsel, CPU);
 	}
 
 	/* Add dummy event to keep tracking */
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 1643aed8c4c8..3f7c20cc7b79 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -25,6 +25,7 @@
 #include "../../../util/pmu.h"
 #include "../../../util/debug.h"
 #include "../../../util/auxtrace.h"
+#include "../../../util/perf_api_probe.h"
 #include "../../../util/record.h"
 #include "../../../util/target.h"
 #include "../../../util/tsc.h"
@@ -420,8 +421,8 @@ static int intel_pt_track_switches(struct evlist *evlist)
 
 	evsel = evlist__last(evlist);
 
-	perf_evsel__set_sample_bit(evsel, CPU);
-	perf_evsel__set_sample_bit(evsel, TIME);
+	evsel__set_sample_bit(evsel, CPU);
+	evsel__set_sample_bit(evsel, TIME);
 
 	evsel->core.system_wide = true;
 	evsel->no_aux_samples = true;
@@ -801,10 +802,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 				switch_evsel->no_aux_samples = true;
 				switch_evsel->immediate = true;
 
-				perf_evsel__set_sample_bit(switch_evsel, TID);
-				perf_evsel__set_sample_bit(switch_evsel, TIME);
-				perf_evsel__set_sample_bit(switch_evsel, CPU);
-				perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
+				evsel__set_sample_bit(switch_evsel, TID);
+				evsel__set_sample_bit(switch_evsel, TIME);
+				evsel__set_sample_bit(switch_evsel, CPU);
+				evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
 
 				opts->record_switch_events = false;
 				ptr->have_sched_switch = 3;
@@ -838,7 +839,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 		 * AUX event.
 		 */
 		if (!perf_cpu_map__empty(cpus))
-			perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
+			evsel__set_sample_bit(intel_pt_evsel, CPU);
 	}
 
 	/* Add dummy event to keep tracking */
@@ -862,11 +863,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 
 		/* In per-cpu case, always need the time of mmap events etc */
 		if (!perf_cpu_map__empty(cpus)) {
-			perf_evsel__set_sample_bit(tracking_evsel, TIME);
+			evsel__set_sample_bit(tracking_evsel, TIME);
 			/* And the CPU for switch events */
-			perf_evsel__set_sample_bit(tracking_evsel, CPU);
+			evsel__set_sample_bit(tracking_evsel, CPU);
 		}
-		perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+		evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
 	}
 
 	/*
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
index c0775c39227f..072920475b65 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -31,8 +31,8 @@ const char *kvm_exit_trace = "kvm:kvm_exit";
 static void mmio_event_get_key(struct evsel *evsel, struct perf_sample *sample,
 			       struct event_key *key)
 {
-	key->key  = perf_evsel__intval(evsel, sample, "gpa");
-	key->info = perf_evsel__intval(evsel, sample, "type");
+	key->key  = evsel__intval(evsel, sample, "gpa");
+	key->info = evsel__intval(evsel, sample, "type");
 }
 
 #define KVM_TRACE_MMIO_READ_UNSATISFIED 0
@@ -48,7 +48,7 @@ static bool mmio_event_begin(struct evsel *evsel,
 
 	/* MMIO write begin event in kernel. */
 	if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
-	    perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
+	    evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
 		mmio_event_get_key(evsel, sample, key);
 		return true;
 	}
@@ -65,7 +65,7 @@ static bool mmio_event_end(struct evsel *evsel, struct perf_sample *sample,
 
 	/* MMIO read end event in kernel.*/
 	if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
-	    perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
+	    evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
 		mmio_event_get_key(evsel, sample, key);
 		return true;
 	}
@@ -94,8 +94,8 @@ static void ioport_event_get_key(struct evsel *evsel,
 				 struct perf_sample *sample,
 				 struct event_key *key)
 {
-	key->key  = perf_evsel__intval(evsel, sample, "port");
-	key->info = perf_evsel__intval(evsel, sample, "rw");
+	key->key  = evsel__intval(evsel, sample, "port");
+	key->info = evsel__intval(evsel, sample, "rw");
 }
 
 static bool ioport_event_begin(struct evsel *evsel,
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index e4e321b6f883..768e408757a0 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -6,9 +6,10 @@ perf-y += futex-wake.o
 perf-y += futex-wake-parallel.o
 perf-y += futex-requeue.o
 perf-y += futex-lock-pi.o
-
 perf-y += epoll-wait.o
 perf-y += epoll-ctl.o
+perf-y += synthesize.o
+perf-y += kallsyms-parse.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 4aa6de1aa67d..61cae4966cae 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -41,9 +41,10 @@ int bench_futex_wake_parallel(int argc, const char **argv);
 int bench_futex_requeue(int argc, const char **argv);
 /* pi futexes */
 int bench_futex_lock_pi(int argc, const char **argv);
-
 int bench_epoll_wait(int argc, const char **argv);
 int bench_epoll_ctl(int argc, const char **argv);
+int bench_synthesize(int argc, const char **argv);
+int bench_kallsyms_parse(int argc, const char **argv);
 
 #define BENCH_FORMAT_DEFAULT_STR	"default"
 #define BENCH_FORMAT_DEFAULT		0
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index f938c585d512..cf797362675b 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -519,7 +519,8 @@ int bench_epoll_wait(int argc, const char **argv)
 		qsort(worker, nthreads, sizeof(struct worker), cmpworker);
 
 	for (i = 0; i < nthreads; i++) {
-		unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+		unsigned long t = bench__runtime.tv_sec > 0 ?
+			worker[i].ops / bench__runtime.tv_sec : 0;
 
 		update_stats(&throughput_stats, t);
 
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 65eebe06c04d..915bf3da7ce2 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -205,7 +205,8 @@ int bench_futex_hash(int argc, const char **argv)
 	pthread_mutex_destroy(&thread_lock);
 
 	for (i = 0; i < nthreads; i++) {
-		unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+		unsigned long t = bench__runtime.tv_sec > 0 ?
+			worker[i].ops / bench__runtime.tv_sec : 0;
 		update_stats(&throughput_stats, t);
 		if (!silent) {
 			if (nfutexes == 1)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 89fd8f325f38..bb25d8beb3b8 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -211,7 +211,8 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	pthread_mutex_destroy(&thread_lock);
 
 	for (i = 0; i < nthreads; i++) {
-		unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+		unsigned long t = bench__runtime.tv_sec > 0 ?
+			worker[i].ops / bench__runtime.tv_sec : 0;
 
 		update_stats(&throughput_stats, t);
 		if (!silent)
diff --git a/tools/perf/bench/kallsyms-parse.c b/tools/perf/bench/kallsyms-parse.c
new file mode 100644
index 000000000000..2b0d0f980ae9
--- /dev/null
+++ b/tools/perf/bench/kallsyms-parse.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark of /proc/kallsyms parsing.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <stdlib.h>
+#include "bench.h"
+#include "../util/stat.h"
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+#include <symbol/kallsyms.h>
+
+static unsigned int iterations = 100;
+
+static const struct option options[] = {
+	OPT_UINTEGER('i', "iterations", &iterations,
+		"Number of iterations used to compute average"),
+	OPT_END()
+};
+
+static const char *const bench_usage[] = {
+	"perf bench internals kallsyms-parse <options>",
+	NULL
+};
+
+static int bench_process_symbol(void *arg __maybe_unused,
+				const char *name __maybe_unused,
+				char type __maybe_unused,
+				u64 start __maybe_unused)
+{
+	return 0;
+}
+
+static int do_kallsyms_parse(void)
+{
+	struct timeval start, end, diff;
+	u64 runtime_us;
+	unsigned int i;
+	double time_average, time_stddev;
+	int err;
+	struct stats time_stats;
+
+	init_stats(&time_stats);
+
+	for (i = 0; i < iterations; i++) {
+		gettimeofday(&start, NULL);
+		err = kallsyms__parse("/proc/kallsyms", NULL,
+				bench_process_symbol);
+		if (err)
+			return err;
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &diff);
+		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+		update_stats(&time_stats, runtime_us);
+	}
+
+	time_average = avg_stats(&time_stats) / USEC_PER_MSEC;
+	time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC;
+	printf("  Average kallsyms__parse took: %.3f ms (+- %.3f ms)\n",
+		time_average, time_stddev);
+	return 0;
+}
+
+int bench_kallsyms_parse(int argc, const char **argv)
+{
+	argc = parse_options(argc, argv, options, bench_usage, 0);
+	if (argc) {
+		usage_with_options(bench_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	return do_kallsyms_parse();
+}
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
new file mode 100644
index 000000000000..8d624aea1c5e
--- /dev/null
+++ b/tools/perf/bench/synthesize.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark synthesis of perf events such as at the start of a 'perf
+ * record'. Synthesis is done on the current process and the 'dummy' event
+ * handlers are invoked that support dump_trace but otherwise do nothing.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#include <stdio.h>
+#include "bench.h"
+#include "../util/debug.h"
+#include "../util/session.h"
+#include "../util/stat.h"
+#include "../util/synthetic-events.h"
+#include "../util/target.h"
+#include "../util/thread_map.h"
+#include "../util/tool.h"
+#include "../util/util.h"
+#include <linux/atomic.h>
+#include <linux/err.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int min_threads = 1;
+static unsigned int max_threads = UINT_MAX;
+static unsigned int single_iterations = 10000;
+static unsigned int multi_iterations = 10;
+static bool run_st;
+static bool run_mt;
+
+static const struct option options[] = {
+	OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
+	OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
+	OPT_UINTEGER('m', "min-threads", &min_threads,
+		"Minimum number of threads in multithreaded bench"),
+	OPT_UINTEGER('M', "max-threads", &max_threads,
+		"Maximum number of threads in multithreaded bench"),
+	OPT_UINTEGER('i', "single-iterations", &single_iterations,
+		"Number of iterations used to compute single-threaded average"),
+	OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
+		"Number of iterations used to compute multi-threaded average"),
+	OPT_END()
+};
+
+static const char *const bench_usage[] = {
+	"perf bench internals synthesize <options>",
+	NULL
+};
+
+static atomic_t event_count;
+
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event __maybe_unused,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	atomic_inc(&event_count);
+	return 0;
+}
+
+static int do_run_single_threaded(struct perf_session *session,
+				struct perf_thread_map *threads,
+				struct target *target, bool data_mmap)
+{
+	const unsigned int nr_threads_synthesize = 1;
+	struct timeval start, end, diff;
+	u64 runtime_us;
+	unsigned int i;
+	double time_average, time_stddev, event_average, event_stddev;
+	int err;
+	struct stats time_stats, event_stats;
+
+	init_stats(&time_stats);
+	init_stats(&event_stats);
+
+	for (i = 0; i < single_iterations; i++) {
+		atomic_set(&event_count, 0);
+		gettimeofday(&start, NULL);
+		err = __machine__synthesize_threads(&session->machines.host,
+						NULL,
+						target, threads,
+						process_synthesized_event,
+						data_mmap,
+						nr_threads_synthesize);
+		if (err)
+			return err;
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &diff);
+		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+		update_stats(&time_stats, runtime_us);
+		update_stats(&event_stats, atomic_read(&event_count));
+	}
+
+	time_average = avg_stats(&time_stats);
+	time_stddev = stddev_stats(&time_stats);
+	printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
+		data_mmap ? "data " : "", time_average, time_stddev);
+
+	event_average = avg_stats(&event_stats);
+	event_stddev = stddev_stats(&event_stats);
+	printf("  Average num. events: %.3f (+- %.3f)\n",
+		event_average, event_stddev);
+
+	printf("  Average time per event %.3f usec\n",
+		time_average / event_average);
+	return 0;
+}
+
+static int run_single_threaded(void)
+{
+	struct perf_session *session;
+	struct target target = {
+		.pid = "self",
+	};
+	struct perf_thread_map *threads;
+	int err;
+
+	perf_set_singlethreaded();
+	session = perf_session__new(NULL, false, NULL);
+	if (IS_ERR(session)) {
+		pr_err("Session creation failed.\n");
+		return PTR_ERR(session);
+	}
+	threads = thread_map__new_by_pid(getpid());
+	if (!threads) {
+		pr_err("Thread map creation failed.\n");
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	puts(
+"Computing performance of single threaded perf event synthesis by\n"
+"synthesizing events on the perf process itself:");
+
+	err = do_run_single_threaded(session, threads, &target, false);
+	if (err)
+		goto err_out;
+
+	err = do_run_single_threaded(session, threads, &target, true);
+
+err_out:
+	if (threads)
+		perf_thread_map__put(threads);
+
+	perf_session__delete(session);
+	return err;
+}
+
+static int do_run_multi_threaded(struct target *target,
+				unsigned int nr_threads_synthesize)
+{
+	struct timeval start, end, diff;
+	u64 runtime_us;
+	unsigned int i;
+	double time_average, time_stddev, event_average, event_stddev;
+	int err;
+	struct stats time_stats, event_stats;
+	struct perf_session *session;
+
+	init_stats(&time_stats);
+	init_stats(&event_stats);
+	for (i = 0; i < multi_iterations; i++) {
+		session = perf_session__new(NULL, false, NULL);
+		if (!session)
+			return -ENOMEM;
+
+		atomic_set(&event_count, 0);
+		gettimeofday(&start, NULL);
+		err = __machine__synthesize_threads(&session->machines.host,
+						NULL,
+						target, NULL,
+						process_synthesized_event,
+						false,
+						nr_threads_synthesize);
+		if (err) {
+			perf_session__delete(session);
+			return err;
+		}
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &diff);
+		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+		update_stats(&time_stats, runtime_us);
+		update_stats(&event_stats, atomic_read(&event_count));
+		perf_session__delete(session);
+	}
+
+	time_average = avg_stats(&time_stats);
+	time_stddev = stddev_stats(&time_stats);
+	printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
+		time_average, time_stddev);
+
+	event_average = avg_stats(&event_stats);
+	event_stddev = stddev_stats(&event_stats);
+	printf("    Average num. events: %.3f (+- %.3f)\n",
+		event_average, event_stddev);
+
+	printf("    Average time per event %.3f usec\n",
+		time_average / event_average);
+	return 0;
+}
+
+static int run_multi_threaded(void)
+{
+	struct target target = {
+		.cpu_list = "0"
+	};
+	unsigned int nr_threads_synthesize;
+	int err;
+
+	if (max_threads == UINT_MAX)
+		max_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+	puts(
+"Computing performance of multi threaded perf event synthesis by\n"
+"synthesizing events on CPU 0:");
+
+	for (nr_threads_synthesize = min_threads;
+	     nr_threads_synthesize <= max_threads;
+	     nr_threads_synthesize++) {
+		if (nr_threads_synthesize == 1)
+			perf_set_singlethreaded();
+		else
+			perf_set_multithreaded();
+
+		printf("  Number of synthesis threads: %u\n",
+			nr_threads_synthesize);
+
+		err = do_run_multi_threaded(&target, nr_threads_synthesize);
+		if (err)
+			return err;
+	}
+	perf_set_singlethreaded();
+	return 0;
+}
+
+int bench_synthesize(int argc, const char **argv)
+{
+	int err = 0;
+
+	argc = parse_options(argc, argv, options, bench_usage, 0);
+	if (argc) {
+		usage_with_options(bench_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * If neither single threaded or multi-threaded are specified, default
+	 * to running just single threaded.
+	 */
+	if (!run_st && !run_mt)
+		run_st = true;
+
+	if (run_st)
+		err = run_single_threaded();
+
+	if (!err && run_mt)
+		err = run_multi_threaded();
+
+	return err;
+}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6c0a0412502e..d3e5a84f87a2 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -212,11 +212,9 @@ static bool has_annotation(struct perf_annotate *ann)
 	return ui__has_annotation() || ann->use_stdio2;
 }
 
-static int perf_evsel__add_sample(struct evsel *evsel,
-				  struct perf_sample *sample,
-				  struct addr_location *al,
-				  struct perf_annotate *ann,
-				  struct machine *machine)
+static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample,
+			     struct addr_location *al, struct perf_annotate *ann,
+			     struct machine *machine)
 {
 	struct hists *hists = evsel__hists(evsel);
 	struct hist_entry *he;
@@ -278,7 +276,7 @@ static int process_sample_event(struct perf_tool *tool,
 		goto out_put;
 
 	if (!al.filtered &&
-	    perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
+	    evsel__add_sample(evsel, sample, &al, ann, machine)) {
 		pr_warning("problem incrementing symbol count, "
 			   "skipping event\n");
 		ret = -1;
@@ -433,11 +431,10 @@ static int __cmd_annotate(struct perf_annotate *ann)
 			total_nr_samples += nr_samples;
 			hists__collapse_resort(hists, NULL);
 			/* Don't sort callchain */
-			perf_evsel__reset_sample_bit(pos, CALLCHAIN);
+			evsel__reset_sample_bit(pos, CALLCHAIN);
 			perf_evsel__output_resort(pos, NULL);
 
-			if (symbol_conf.event_group &&
-			    !perf_evsel__is_group_leader(pos))
+			if (symbol_conf.event_group && !evsel__is_group_leader(pos))
 				continue;
 
 			hists__find_annotations(hists, pos, ann);
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index c06fe21c8613..083273209c88 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -76,6 +76,12 @@ static struct bench epoll_benchmarks[] = {
 };
 #endif // HAVE_EVENTFD
 
+static struct bench internals_benchmarks[] = {
+	{ "synthesize", "Benchmark perf event synthesis",	bench_synthesize	},
+	{ "kallsyms-parse", "Benchmark kallsyms parsing",	bench_kallsyms_parse	},
+	{ NULL,		NULL,					NULL			}
+};
+
 struct collection {
 	const char	*name;
 	const char	*summary;
@@ -92,6 +98,7 @@ static struct collection collections[] = {
 #ifdef HAVE_EVENTFD
 	{"epoll",       "Epoll stressing benchmarks",                   epoll_benchmarks        },
 #endif
+	{ "internals",	"Perf-internals benchmarks",			internals_benchmarks	},
 	{ "all",	"All benchmarks",				NULL			},
 	{ NULL,		NULL,						NULL			}
 };
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 246ac0b4d54f..1baf4cae086f 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -95,6 +95,7 @@ struct perf_c2c {
 	bool			 use_stdio;
 	bool			 stats_only;
 	bool			 symbol_full;
+	bool			 stitch_lbr;
 
 	/* HITM shared clines stats */
 	struct c2c_stats	hitm_stats;
@@ -273,6 +274,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 		return -1;
 	}
 
+	if (c2c.stitch_lbr)
+		al.thread->lbr_stitch_enable = true;
+
 	ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
 					evsel, &al, sysctl_perf_event_max_stack);
 	if (ret)
@@ -1705,7 +1709,7 @@ static struct c2c_dimension *get_dimension(const char *name)
 
 		if (!strcmp(dim->name, name))
 			return dim;
-	};
+	}
 
 	return NULL;
 }
@@ -1921,7 +1925,7 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
 		FILTER_HITM(tot_hitm);
 	default:
 		break;
-	};
+	}
 
 #undef FILTER_HITM
 
@@ -2255,8 +2259,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
 	fprintf(out, "=================================================\n");
 
 	evlist__for_each_entry(evlist, evsel) {
-		fprintf(out, "%-36s: %s\n", first ? "  Events" : "",
-			perf_evsel__name(evsel));
+		fprintf(out, "%-36s: %s\n", first ? "  Events" : "", evsel__name(evsel));
 		first = false;
 	}
 	fprintf(out, "  Cachelines sort on                : %s HITMs\n",
@@ -2601,6 +2604,12 @@ static int setup_callchain(struct evlist *evlist)
 		}
 	}
 
+	if (c2c.stitch_lbr && (mode != CALLCHAIN_LBR)) {
+		ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+			    "Please apply --call-graph lbr when recording.\n");
+		c2c.stitch_lbr = false;
+	}
+
 	callchain_param.record_mode = mode;
 	callchain_param.min_percent = 0;
 	return 0;
@@ -2752,6 +2761,8 @@ static int perf_c2c__report(int argc, const char **argv)
 	OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
 		   "coalesce fields: pid,tid,iaddr,dso"),
 	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+	OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr,
+		    "Enable LBR callgraph stitching approach"),
 	OPT_PARENT(c2c_options),
 	OPT_END()
 	};
@@ -2947,7 +2958,7 @@ static int perf_c2c__record(int argc, const char **argv)
 
 		rec_argv[i++] = "-e";
 		rec_argv[i++] = perf_mem_events__name(j);
-	};
+	}
 
 	if (all_user)
 		rec_argv[i++] = "--all-user";
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index c94a002f295e..f8c9bdd8269a 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -467,7 +467,7 @@ static struct evsel *evsel_match(struct evsel *evsel,
 	struct evsel *e;
 
 	evlist__for_each_entry(evlist, e) {
-		if (perf_evsel__match2(evsel, e))
+		if (evsel__match2(evsel, e))
 			return e;
 	}
 
@@ -981,7 +981,7 @@ static void data_process(void)
 
 		if (!quiet) {
 			fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
-				perf_evsel__name(evsel_base));
+				evsel__name(evsel_base));
 		}
 
 		first = false;
@@ -990,7 +990,7 @@ static void data_process(void)
 			data__fprintf();
 
 		/* Don't sort callchain for perf diff */
-		perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
+		evsel__reset_sample_bit(evsel_base, CALLCHAIN);
 
 		hists__process(hists_base);
 	}
@@ -1562,7 +1562,7 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
 
 	default:
 		BUG_ON(1);
-	};
+	}
 }
 
 static void
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index d5adc417a4ca..55eda54240fb 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -284,10 +284,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 		.events = POLLIN,
 	};
 
-	if (!perf_cap__capable(CAP_SYS_ADMIN)) {
+	if (!(perf_cap__capable(CAP_PERFMON) ||
+	      perf_cap__capable(CAP_SYS_ADMIN))) {
 		pr_err("ftrace only works for %s!\n",
 #ifdef HAVE_LIBCAP_SUPPORT
-		"users with the SYS_ADMIN capability"
+		"users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
 #else
 		"root"
 #endif
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 7e124a7b8bfd..53932db97a79 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -536,7 +536,7 @@ static int perf_inject__sched_stat(struct perf_tool *tool,
 	union perf_event *event_sw;
 	struct perf_sample sample_sw;
 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
-	u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	u32 pid = evsel__intval(evsel, sample, "pid");
 
 	list_for_each_entry(ent, &inject->samples, node) {
 		if (pid == ent->tid)
@@ -546,7 +546,7 @@ static int perf_inject__sched_stat(struct perf_tool *tool,
 	return 0;
 found:
 	event_sw = &ent->event[0];
-	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+	evsel__parse_sample(evsel, event_sw, &sample_sw);
 
 	sample_sw.period = sample->period;
 	sample_sw.time	 = sample->time;
@@ -561,11 +561,10 @@ static void sig_handler(int sig __maybe_unused)
 	session_done = 1;
 }
 
-static int perf_evsel__check_stype(struct evsel *evsel,
-				   u64 sample_type, const char *sample_msg)
+static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
-	const char *name = perf_evsel__name(evsel);
+	const char *name = evsel__name(evsel);
 
 	if (!(attr->sample_type & sample_type)) {
 		pr_err("Samples for %s event do not have %s attribute set.",
@@ -622,10 +621,10 @@ static int __cmd_inject(struct perf_inject *inject)
 		struct evsel *evsel;
 
 		evlist__for_each_entry(session->evlist, evsel) {
-			const char *name = perf_evsel__name(evsel);
+			const char *name = evsel__name(evsel);
 
 			if (!strcmp(name, "sched:sched_switch")) {
-				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+				if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
 					return -EINVAL;
 
 				evsel->handler = perf_inject__sched_switch;
@@ -684,14 +683,14 @@ static int __cmd_inject(struct perf_inject *inject)
 
 			perf_header__clear_feat(&session->header,
 						HEADER_AUXTRACE);
-			if (inject->itrace_synth_opts.last_branch)
+			if (inject->itrace_synth_opts.last_branch ||
+			    inject->itrace_synth_opts.add_last_branch)
 				perf_header__set_feat(&session->header,
 						      HEADER_BRANCH_STACK);
 			evsel = perf_evlist__id2evsel_strict(session->evlist,
 							     inject->aux_id);
 			if (evsel) {
-				pr_debug("Deleting %s\n",
-					 perf_evsel__name(evsel));
+				pr_debug("Deleting %s\n", evsel__name(evsel));
 				evlist__remove(session->evlist, evsel);
 				evsel__delete(evsel);
 			}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 003c85f5f56c..38a5ab683ebc 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -169,13 +169,12 @@ static int insert_caller_stat(unsigned long call_site,
 	return 0;
 }
 
-static int perf_evsel__process_alloc_event(struct evsel *evsel,
-					   struct perf_sample *sample)
+static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *sample)
 {
-	unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
-		      call_site = perf_evsel__intval(evsel, sample, "call_site");
-	int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
-	    bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
+	unsigned long ptr = evsel__intval(evsel, sample, "ptr"),
+		      call_site = evsel__intval(evsel, sample, "call_site");
+	int bytes_req = evsel__intval(evsel, sample, "bytes_req"),
+	    bytes_alloc = evsel__intval(evsel, sample, "bytes_alloc");
 
 	if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
 	    insert_caller_stat(call_site, bytes_req, bytes_alloc))
@@ -188,14 +187,13 @@ static int perf_evsel__process_alloc_event(struct evsel *evsel,
 	return 0;
 }
 
-static int perf_evsel__process_alloc_node_event(struct evsel *evsel,
-						struct perf_sample *sample)
+static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample)
 {
-	int ret = perf_evsel__process_alloc_event(evsel, sample);
+	int ret = evsel__process_alloc_event(evsel, sample);
 
 	if (!ret) {
 		int node1 = cpu__get_node(sample->cpu),
-		    node2 = perf_evsel__intval(evsel, sample, "node");
+		    node2 = evsel__intval(evsel, sample, "node");
 
 		if (node1 != node2)
 			nr_cross_allocs++;
@@ -232,10 +230,9 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr,
 	return NULL;
 }
 
-static int perf_evsel__process_free_event(struct evsel *evsel,
-					  struct perf_sample *sample)
+static int evsel__process_free_event(struct evsel *evsel, struct perf_sample *sample)
 {
-	unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
+	unsigned long ptr = evsel__intval(evsel, sample, "ptr");
 	struct alloc_stat *s_alloc, *s_caller;
 
 	s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
@@ -784,13 +781,12 @@ static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
 	return 0;
 }
 
-static int perf_evsel__process_page_alloc_event(struct evsel *evsel,
-						struct perf_sample *sample)
+static int evsel__process_page_alloc_event(struct evsel *evsel, struct perf_sample *sample)
 {
 	u64 page;
-	unsigned int order = perf_evsel__intval(evsel, sample, "order");
-	unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
-	unsigned int migrate_type = perf_evsel__intval(evsel, sample,
+	unsigned int order = evsel__intval(evsel, sample, "order");
+	unsigned int gfp_flags = evsel__intval(evsel, sample, "gfp_flags");
+	unsigned int migrate_type = evsel__intval(evsel, sample,
 						       "migratetype");
 	u64 bytes = kmem_page_size << order;
 	u64 callsite;
@@ -802,9 +798,9 @@ static int perf_evsel__process_page_alloc_event(struct evsel *evsel,
 	};
 
 	if (use_pfn)
-		page = perf_evsel__intval(evsel, sample, "pfn");
+		page = evsel__intval(evsel, sample, "pfn");
 	else
-		page = perf_evsel__intval(evsel, sample, "page");
+		page = evsel__intval(evsel, sample, "page");
 
 	nr_page_allocs++;
 	total_page_alloc_bytes += bytes;
@@ -857,11 +853,10 @@ static int perf_evsel__process_page_alloc_event(struct evsel *evsel,
 	return 0;
 }
 
-static int perf_evsel__process_page_free_event(struct evsel *evsel,
-						struct perf_sample *sample)
+static int evsel__process_page_free_event(struct evsel *evsel, struct perf_sample *sample)
 {
 	u64 page;
-	unsigned int order = perf_evsel__intval(evsel, sample, "order");
+	unsigned int order = evsel__intval(evsel, sample, "order");
 	u64 bytes = kmem_page_size << order;
 	struct page_stat *pstat;
 	struct page_stat this = {
@@ -869,9 +864,9 @@ static int perf_evsel__process_page_free_event(struct evsel *evsel,
 	};
 
 	if (use_pfn)
-		page = perf_evsel__intval(evsel, sample, "pfn");
+		page = evsel__intval(evsel, sample, "pfn");
 	else
-		page = perf_evsel__intval(evsel, sample, "page");
+		page = evsel__intval(evsel, sample, "page");
 
 	nr_page_frees++;
 	total_page_free_bytes += bytes;
@@ -1371,15 +1366,15 @@ static int __cmd_kmem(struct perf_session *session)
 	struct evsel *evsel;
 	const struct evsel_str_handler kmem_tracepoints[] = {
 		/* slab allocator */
-		{ "kmem:kmalloc",		perf_evsel__process_alloc_event, },
-    		{ "kmem:kmem_cache_alloc",	perf_evsel__process_alloc_event, },
-		{ "kmem:kmalloc_node",		perf_evsel__process_alloc_node_event, },
-    		{ "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
-		{ "kmem:kfree",			perf_evsel__process_free_event, },
-    		{ "kmem:kmem_cache_free",	perf_evsel__process_free_event, },
+		{ "kmem:kmalloc",		evsel__process_alloc_event, },
+		{ "kmem:kmem_cache_alloc",	evsel__process_alloc_event, },
+		{ "kmem:kmalloc_node",		evsel__process_alloc_node_event, },
+		{ "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, },
+		{ "kmem:kfree",			evsel__process_free_event, },
+		{ "kmem:kmem_cache_free",	evsel__process_free_event, },
 		/* page allocator */
-		{ "kmem:mm_page_alloc",		perf_evsel__process_page_alloc_event, },
-		{ "kmem:mm_page_free",		perf_evsel__process_page_free_event, },
+		{ "kmem:mm_page_alloc",		evsel__process_page_alloc_event, },
+		{ "kmem:mm_page_free",		evsel__process_page_free_event, },
 	};
 
 	if (!perf_session__has_traces(session, "kmem record"))
@@ -1391,8 +1386,8 @@ static int __cmd_kmem(struct perf_session *session)
 	}
 
 	evlist__for_each_entry(session->evlist, evsel) {
-		if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
-		    perf_evsel__field(evsel, "pfn")) {
+		if (!strcmp(evsel__name(evsel), "kmem:mm_page_alloc") &&
+		    evsel__field(evsel, "pfn")) {
 			use_pfn = true;
 			break;
 		}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 577af4f3297a..95a77058023e 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -69,7 +69,7 @@ void exit_event_get_key(struct evsel *evsel,
 			struct event_key *key)
 {
 	key->info = 0;
-	key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
+	key->key  = evsel__intval(evsel, sample, kvm_exit_reason);
 }
 
 bool kvm_exit_event(struct evsel *evsel)
@@ -416,8 +416,7 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread,
 			return NULL;
 		}
 
-		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
-							  vcpu_id_str);
+		vcpu_record->vcpu_id = evsel__intval(evsel, sample, vcpu_id_str);
 		thread__set_priv(thread, vcpu_record);
 	}
 
@@ -1033,16 +1032,16 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
 		struct perf_event_attr *attr = &pos->core.attr;
 
 		/* make sure these *are* set */
-		perf_evsel__set_sample_bit(pos, TID);
-		perf_evsel__set_sample_bit(pos, TIME);
-		perf_evsel__set_sample_bit(pos, CPU);
-		perf_evsel__set_sample_bit(pos, RAW);
+		evsel__set_sample_bit(pos, TID);
+		evsel__set_sample_bit(pos, TIME);
+		evsel__set_sample_bit(pos, CPU);
+		evsel__set_sample_bit(pos, RAW);
 		/* make sure these are *not*; want as small a sample as possible */
-		perf_evsel__reset_sample_bit(pos, PERIOD);
-		perf_evsel__reset_sample_bit(pos, IP);
-		perf_evsel__reset_sample_bit(pos, CALLCHAIN);
-		perf_evsel__reset_sample_bit(pos, ADDR);
-		perf_evsel__reset_sample_bit(pos, READ);
+		evsel__reset_sample_bit(pos, PERIOD);
+		evsel__reset_sample_bit(pos, IP);
+		evsel__reset_sample_bit(pos, CALLCHAIN);
+		evsel__reset_sample_bit(pos, ADDR);
+		evsel__reset_sample_bit(pos, READ);
 		attr->mmap = 0;
 		attr->comm = 0;
 		attr->task = 0;
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 474dfd59d7eb..f0a1dbacb46c 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -48,7 +48,7 @@ struct lock_stat {
 	struct rb_node		rb;		/* used for sorting */
 
 	/*
-	 * FIXME: perf_evsel__intval() returns u64,
+	 * FIXME: evsel__intval() returns u64,
 	 * so address of lockdep_map should be dealed as 64bit.
 	 * Is there more better solution?
 	 */
@@ -404,9 +404,9 @@ static int report_lock_acquire_event(struct evsel *evsel,
 	struct lock_stat *ls;
 	struct thread_stat *ts;
 	struct lock_seq_stat *seq;
-	const char *name = perf_evsel__strval(evsel, sample, "name");
-	u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
-	int flag = perf_evsel__intval(evsel, sample, "flag");
+	const char *name = evsel__strval(evsel, sample, "name");
+	u64 tmp	 = evsel__intval(evsel, sample, "lockdep_addr");
+	int flag = evsel__intval(evsel, sample, "flag");
 
 	memcpy(&addr, &tmp, sizeof(void *));
 
@@ -477,8 +477,8 @@ static int report_lock_acquired_event(struct evsel *evsel,
 	struct thread_stat *ts;
 	struct lock_seq_stat *seq;
 	u64 contended_term;
-	const char *name = perf_evsel__strval(evsel, sample, "name");
-	u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+	const char *name = evsel__strval(evsel, sample, "name");
+	u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
 
 	memcpy(&addr, &tmp, sizeof(void *));
 
@@ -539,8 +539,8 @@ static int report_lock_contended_event(struct evsel *evsel,
 	struct lock_stat *ls;
 	struct thread_stat *ts;
 	struct lock_seq_stat *seq;
-	const char *name = perf_evsel__strval(evsel, sample, "name");
-	u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+	const char *name = evsel__strval(evsel, sample, "name");
+	u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
 
 	memcpy(&addr, &tmp, sizeof(void *));
 
@@ -594,8 +594,8 @@ static int report_lock_release_event(struct evsel *evsel,
 	struct lock_stat *ls;
 	struct thread_stat *ts;
 	struct lock_seq_stat *seq;
-	const char *name = perf_evsel__strval(evsel, sample, "name");
-	u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+	const char *name = evsel__strval(evsel, sample, "name");
+	u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
 
 	memcpy(&addr, &tmp, sizeof(void *));
 
@@ -657,32 +657,28 @@ static struct trace_lock_handler report_lock_ops  = {
 
 static struct trace_lock_handler *trace_handler;
 
-static int perf_evsel__process_lock_acquire(struct evsel *evsel,
-					     struct perf_sample *sample)
+static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
 {
 	if (trace_handler->acquire_event)
 		return trace_handler->acquire_event(evsel, sample);
 	return 0;
 }
 
-static int perf_evsel__process_lock_acquired(struct evsel *evsel,
-					      struct perf_sample *sample)
+static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
 {
 	if (trace_handler->acquired_event)
 		return trace_handler->acquired_event(evsel, sample);
 	return 0;
 }
 
-static int perf_evsel__process_lock_contended(struct evsel *evsel,
-					      struct perf_sample *sample)
+static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
 {
 	if (trace_handler->contended_event)
 		return trace_handler->contended_event(evsel, sample);
 	return 0;
 }
 
-static int perf_evsel__process_lock_release(struct evsel *evsel,
-					    struct perf_sample *sample)
+static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
 {
 	if (trace_handler->release_event)
 		return trace_handler->release_event(evsel, sample);
@@ -775,7 +771,7 @@ static void dump_threads(void)
 		pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
 		node = rb_next(node);
 		thread__put(t);
-	};
+	}
 }
 
 static void dump_map(void)
@@ -849,10 +845,10 @@ static void sort_result(void)
 }
 
 static const struct evsel_str_handler lock_tracepoints[] = {
-	{ "lock:lock_acquire",	 perf_evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
-	{ "lock:lock_acquired",	 perf_evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
-	{ "lock:lock_contended", perf_evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
-	{ "lock:lock_release",	 perf_evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
+	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
+	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
 };
 
 static bool force;
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index a13f5817d6fc..68a7eb84561a 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -123,7 +123,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 
 		rec_argv[i++] = "-e";
 		rec_argv[i++] = perf_mem_events__name(j);
-	};
+	}
 
 	if (all_user)
 		rec_argv[i++] = "--all-user";
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 1ab349abe904..e4efdbf1a81e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,6 +34,7 @@
 #include "util/tsc.h"
 #include "util/parse-branch-options.h"
 #include "util/parse-regs-options.h"
+#include "util/perf_api_probe.h"
 #include "util/llvm-utils.h"
 #include "util/bpf-loader.h"
 #include "util/trigger.h"
@@ -43,6 +44,7 @@
 #include "util/time-utils.h"
 #include "util/units.h"
 #include "util/bpf-event.h"
+#include "util/util.h"
 #include "asm/bug.h"
 #include "perf.h"
 
@@ -50,6 +52,7 @@
 #include <inttypes.h>
 #include <locale.h>
 #include <poll.h>
+#include <pthread.h>
 #include <unistd.h>
 #include <sched.h>
 #include <signal.h>
@@ -84,7 +87,10 @@ struct record {
 	struct auxtrace_record	*itr;
 	struct evlist	*evlist;
 	struct perf_session	*session;
+	struct evlist		*sb_evlist;
+	pthread_t		thread_id;
 	int			realtime_prio;
+	bool			switch_output_event_set;
 	bool			no_buildid;
 	bool			no_buildid_set;
 	bool			no_buildid_cache;
@@ -503,6 +509,20 @@ static int process_synthesized_event(struct perf_tool *tool,
 	return record__write(rec, NULL, event, event->header.size);
 }
 
+static int process_locked_synthesized_event(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
+	int ret;
+
+	pthread_mutex_lock(&synth_lock);
+	ret = process_synthesized_event(tool, event, sample, machine);
+	pthread_mutex_unlock(&synth_lock);
+	return ret;
+}
+
 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
 {
 	struct record *rec = to;
@@ -825,7 +845,7 @@ static int record__open(struct record *rec)
 	evlist__for_each_entry(evlist, pos) {
 try_again:
 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
-			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
+			if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
 				if (verbose > 0)
 					ui__warning("%s\n", msg);
 				goto try_again;
@@ -837,8 +857,7 @@ try_again:
 				goto try_again;
 			}
 			rc = -errno;
-			perf_evsel__open_strerror(pos, &opts->target,
-						  errno, msg, sizeof(msg));
+			evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
 			ui__error("%s\n", msg);
 			goto out;
 		}
@@ -859,7 +878,7 @@ try_again:
 
 	if (perf_evlist__apply_filters(evlist, &pos)) {
 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
-			pos->filter, perf_evsel__name(pos), errno,
+			pos->filter, evsel__name(pos), errno,
 			str_error_r(errno, msg, sizeof(msg)));
 		rc = -1;
 		goto out;
@@ -1288,6 +1307,7 @@ static int record__synthesize(struct record *rec, bool tail)
 	struct perf_tool *tool = &rec->tool;
 	int fd = perf_data__fd(data);
 	int err = 0;
+	event_op f = process_synthesized_event;
 
 	if (rec->opts.tail_synthesize != tail)
 		return 0;
@@ -1402,13 +1422,67 @@ static int record__synthesize(struct record *rec, bool tail)
 	if (err < 0)
 		pr_warning("Couldn't synthesize cgroup events.\n");
 
+	if (rec->opts.nr_threads_synthesize > 1) {
+		perf_set_multithreaded();
+		f = process_locked_synthesized_event;
+	}
+
 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
-					    process_synthesized_event, opts->sample_address,
-					    1);
+					    f, opts->sample_address,
+					    rec->opts.nr_threads_synthesize);
+
+	if (rec->opts.nr_threads_synthesize > 1)
+		perf_set_singlethreaded();
+
 out:
 	return err;
 }
 
+static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
+{
+	struct record *rec = data;
+	pthread_kill(rec->thread_id, SIGUSR2);
+	return 0;
+}
+
+static int record__setup_sb_evlist(struct record *rec)
+{
+	struct record_opts *opts = &rec->opts;
+
+	if (rec->sb_evlist != NULL) {
+		/*
+		 * We get here if --switch-output-event populated the
+		 * sb_evlist, so associate a callback that will send a SIGUSR2
+		 * to the main thread.
+		 */
+		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
+		rec->thread_id = pthread_self();
+	}
+
+	if (!opts->no_bpf_event) {
+		if (rec->sb_evlist == NULL) {
+			rec->sb_evlist = evlist__new();
+
+			if (rec->sb_evlist == NULL) {
+				pr_err("Couldn't create side band evlist.\n.");
+				return -1;
+			}
+		}
+
+		if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
+			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
+			return -1;
+		}
+	}
+
+	if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
+		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
+		opts->no_bpf_event = true;
+	}
+
+	return 0;
+}
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
 	int err;
@@ -1420,7 +1494,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	struct perf_data *data = &rec->data;
 	struct perf_session *session;
 	bool disabled = false, draining = false;
-	struct evlist *sb_evlist = NULL;
 	int fd;
 	float ratio = 0;
 
@@ -1546,21 +1619,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 			goto out_child;
 	}
 
+	err = -1;
 	if (!rec->no_buildid
 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 		pr_err("Couldn't generate buildids. "
 		       "Use --no-buildid to profile anyway.\n");
-		err = -1;
 		goto out_child;
 	}
 
-	if (!opts->no_bpf_event)
-		bpf_event__add_sb_event(&sb_evlist, &session->header.env);
-
-	if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
-		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
-		opts->no_bpf_event = true;
-	}
+	err = record__setup_sb_evlist(rec);
+	if (err)
+		goto out_child;
 
 	err = record__synthesize(rec, false);
 	if (err < 0)
@@ -1831,7 +1900,7 @@ out_delete_session:
 	perf_session__delete(session);
 
 	if (!opts->no_bpf_event)
-		perf_evlist__stop_sb_thread(sb_evlist);
+		perf_evlist__stop_sb_thread(rec->sb_evlist);
 	return status;
 }
 
@@ -2142,10 +2211,19 @@ static int switch_output_setup(struct record *rec)
 	};
 	unsigned long val;
 
+	/*
+	 * If we're using --switch-output-events, then we imply its 
+	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
+	 *  thread to its parent.
+	 */
+	if (rec->switch_output_event_set)
+		goto do_signal;
+
 	if (!s->set)
 		return 0;
 
 	if (!strcmp(s->str, "signal")) {
+do_signal:
 		s->signal = true;
 		pr_debug("switch-output with SIGUSR2 signal\n");
 		goto enabled;
@@ -2232,6 +2310,7 @@ static struct record record = {
 			.default_per_cpu = true,
 		},
 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
+		.nr_threads_synthesize = 1,
 	},
 	.tool = {
 		.sample		= process_sample_event,
@@ -2402,6 +2481,9 @@ static struct option __record_options[] = {
 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
 			  "signal"),
+	OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
+			 "switch output event selector. use 'perf list' to list available events",
+			 parse_events_option_new_evlist),
 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
 		   "Limit number of switch output generated files"),
 	OPT_BOOLEAN(0, "dry-run", &dry_run,
@@ -2421,6 +2503,9 @@ static struct option __record_options[] = {
 #endif
 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
+	OPT_UINTEGER(0, "num-thread-synthesize",
+		     &record.opts.nr_threads_synthesize,
+		     "number of threads to run for event synthesis"),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 26d8fc27e427..ba63390246c2 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -84,6 +84,7 @@ struct report {
 	bool			header_only;
 	bool			nonany_branch_mode;
 	bool			group_set;
+	bool			stitch_lbr;
 	int			max_stack;
 	struct perf_read_values	show_threads_values;
 	struct annotation_options annotation_opts;
@@ -267,6 +268,9 @@ static int process_sample_event(struct perf_tool *tool,
 		return -1;
 	}
 
+	if (rep->stitch_lbr)
+		al.thread->lbr_stitch_enable = true;
+
 	if (symbol_conf.hide_unresolved && al.sym == NULL)
 		goto out_put;
 
@@ -317,7 +321,7 @@ static int process_read_event(struct perf_tool *tool,
 	struct report *rep = container_of(tool, struct report, tool);
 
 	if (rep->show_threads) {
-		const char *name = perf_evsel__name(evsel);
+		const char *name = evsel__name(evsel);
 		int err = perf_read_values_add_value(&rep->show_threads_values,
 					   event->read.pid, event->read.tid,
 					   evsel->idx,
@@ -339,12 +343,14 @@ static int report__setup_sample_type(struct report *rep)
 	bool is_pipe = perf_data__is_pipe(session->data);
 
 	if (session->itrace_synth_opts->callchain ||
+	    session->itrace_synth_opts->add_callchain ||
 	    (!is_pipe &&
 	     perf_header__has_feat(&session->header, HEADER_AUXTRACE) &&
 	     !session->itrace_synth_opts->set))
 		sample_type |= PERF_SAMPLE_CALLCHAIN;
 
-	if (session->itrace_synth_opts->last_branch)
+	if (session->itrace_synth_opts->last_branch ||
+	    session->itrace_synth_opts->add_last_branch)
 		sample_type |= PERF_SAMPLE_BRANCH_STACK;
 
 	if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
@@ -407,6 +413,12 @@ static int report__setup_sample_type(struct report *rep)
 			callchain_param.record_mode = CALLCHAIN_FP;
 	}
 
+	if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
+		ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+			    "Please apply --call-graph lbr when recording.\n");
+		rep->stitch_lbr = false;
+	}
+
 	/* ??? handle more cases than just ANY? */
 	if (!(perf_evlist__combined_branch_type(session->evlist) &
 				PERF_SAMPLE_BRANCH_ANY))
@@ -447,10 +459,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
 		nr_events = hists->stats.total_non_filtered_period;
 	}
 
-	if (perf_evsel__is_group_event(evsel)) {
+	if (evsel__is_group_event(evsel)) {
 		struct evsel *pos;
 
-		perf_evsel__group_desc(evsel, buf, size);
+		evsel__group_desc(evsel, buf, size);
 		evname = buf;
 
 		for_each_group_member(pos, evsel) {
@@ -525,10 +537,9 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist,
 
 	evlist__for_each_entry(evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
-		const char *evname = perf_evsel__name(pos);
+		const char *evname = evsel__name(pos);
 
-		if (symbol_conf.event_group &&
-		    !perf_evsel__is_group_leader(pos))
+		if (symbol_conf.event_group && !evsel__is_group_leader(pos))
 			continue;
 
 		hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
@@ -670,8 +681,7 @@ static int report__collapse_hists(struct report *rep)
 			break;
 
 		/* Non-group events are considered as leader */
-		if (symbol_conf.event_group &&
-		    !perf_evsel__is_group_leader(pos)) {
+		if (symbol_conf.event_group && !evsel__is_group_leader(pos)) {
 			struct hists *leader_hists = evsel__hists(pos->leader);
 
 			hists__match(leader_hists, hists);
@@ -1257,6 +1267,8 @@ int cmd_report(int argc, const char **argv)
 			"Show full source file name path for source lines"),
 	OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
 		    "Show callgraph from reference event"),
+	OPT_BOOLEAN(0, "stitch-lbr", &report.stitch_lbr,
+		    "Enable LBR callgraph stitching approach"),
 	OPT_INTEGER(0, "socket-filter", &report.socket_filter,
 		    "only show processor socket that match with this filter"),
 	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
@@ -1332,7 +1344,7 @@ int cmd_report(int argc, const char **argv)
 	if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
 		callchain_param.order = ORDER_CALLER;
 
-	if (itrace_synth_opts.callchain &&
+	if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) &&
 	    (int)itrace_synth_opts.callchain_sz > report.max_stack)
 		report.max_stack = itrace_synth_opts.callchain_sz;
 
@@ -1380,7 +1392,7 @@ repeat:
 		goto error;
 	}
 
-	if (itrace_synth_opts.last_branch)
+	if (itrace_synth_opts.last_branch || itrace_synth_opts.add_last_branch)
 		has_br_stack = true;
 
 	if (has_br_stack && branch_call_mode)
@@ -1400,7 +1412,7 @@ repeat:
 	}
 	if (branch_call_mode) {
 		callchain_param.key = CCKEY_ADDRESS;
-		callchain_param.branch_callstack = 1;
+		callchain_param.branch_callstack = true;
 		symbol_conf.use_callchain = true;
 		callchain_register_param(&callchain_param);
 		if (sort_order == NULL)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 82fcc2c15fe4..459e4229945e 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -811,8 +811,8 @@ replay_wakeup_event(struct perf_sched *sched,
 		    struct evsel *evsel, struct perf_sample *sample,
 		    struct machine *machine __maybe_unused)
 {
-	const char *comm = perf_evsel__strval(evsel, sample, "comm");
-	const u32 pid	 = perf_evsel__intval(evsel, sample, "pid");
+	const char *comm = evsel__strval(evsel, sample, "comm");
+	const u32 pid	 = evsel__intval(evsel, sample, "pid");
 	struct task_desc *waker, *wakee;
 
 	if (verbose > 0) {
@@ -833,11 +833,11 @@ static int replay_switch_event(struct perf_sched *sched,
 			       struct perf_sample *sample,
 			       struct machine *machine __maybe_unused)
 {
-	const char *prev_comm  = perf_evsel__strval(evsel, sample, "prev_comm"),
-		   *next_comm  = perf_evsel__strval(evsel, sample, "next_comm");
-	const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
-		  next_pid = perf_evsel__intval(evsel, sample, "next_pid");
-	const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+	const char *prev_comm  = evsel__strval(evsel, sample, "prev_comm"),
+		   *next_comm  = evsel__strval(evsel, sample, "next_comm");
+	const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
+		  next_pid = evsel__intval(evsel, sample, "next_pid");
+	const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
 	struct task_desc *prev, __maybe_unused *next;
 	u64 timestamp0, timestamp = sample->time;
 	int cpu = sample->cpu;
@@ -1106,9 +1106,9 @@ static int latency_switch_event(struct perf_sched *sched,
 				struct perf_sample *sample,
 				struct machine *machine)
 {
-	const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
-		  next_pid = perf_evsel__intval(evsel, sample, "next_pid");
-	const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+	const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
+		  next_pid = evsel__intval(evsel, sample, "next_pid");
+	const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
 	struct work_atoms *out_events, *in_events;
 	struct thread *sched_out, *sched_in;
 	u64 timestamp0, timestamp = sample->time;
@@ -1176,8 +1176,8 @@ static int latency_runtime_event(struct perf_sched *sched,
 				 struct perf_sample *sample,
 				 struct machine *machine)
 {
-	const u32 pid	   = perf_evsel__intval(evsel, sample, "pid");
-	const u64 runtime  = perf_evsel__intval(evsel, sample, "runtime");
+	const u32 pid	   = evsel__intval(evsel, sample, "pid");
+	const u64 runtime  = evsel__intval(evsel, sample, "runtime");
 	struct thread *thread = machine__findnew_thread(machine, -1, pid);
 	struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
 	u64 timestamp = sample->time;
@@ -1211,7 +1211,7 @@ static int latency_wakeup_event(struct perf_sched *sched,
 				struct perf_sample *sample,
 				struct machine *machine)
 {
-	const u32 pid	  = perf_evsel__intval(evsel, sample, "pid");
+	const u32 pid	  = evsel__intval(evsel, sample, "pid");
 	struct work_atoms *atoms;
 	struct work_atom *atom;
 	struct thread *wakee;
@@ -1272,7 +1272,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
 				      struct perf_sample *sample,
 				      struct machine *machine)
 {
-	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	const u32 pid = evsel__intval(evsel, sample, "pid");
 	u64 timestamp = sample->time;
 	struct work_atoms *atoms;
 	struct work_atom *atom;
@@ -1526,7 +1526,7 @@ map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid
 static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
 			    struct perf_sample *sample, struct machine *machine)
 {
-	const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
 	struct thread *sched_in;
 	struct thread_runtime *tr;
 	int new_shortname;
@@ -1670,8 +1670,8 @@ static int process_sched_switch_event(struct perf_tool *tool,
 {
 	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
 	int this_cpu = sample->cpu, err = 0;
-	u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
-	    next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
+	    next_pid = evsel__intval(evsel, sample, "next_pid");
 
 	if (sched->curr_pid[this_cpu] != (u32)-1) {
 		/*
@@ -1848,7 +1848,7 @@ static inline void print_sched_time(unsigned long long nsecs, int width)
  * returns runtime data for event, allocating memory for it the
  * first time it is used.
  */
-static struct evsel_runtime *perf_evsel__get_runtime(struct evsel *evsel)
+static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel)
 {
 	struct evsel_runtime *r = evsel->priv;
 
@@ -1863,10 +1863,9 @@ static struct evsel_runtime *perf_evsel__get_runtime(struct evsel *evsel)
 /*
  * save last time event was seen per cpu
  */
-static void perf_evsel__save_time(struct evsel *evsel,
-				  u64 timestamp, u32 cpu)
+static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu)
 {
-	struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
+	struct evsel_runtime *r = evsel__get_runtime(evsel);
 
 	if (r == NULL)
 		return;
@@ -1890,9 +1889,9 @@ static void perf_evsel__save_time(struct evsel *evsel,
 }
 
 /* returns last time this event was seen on the given cpu */
-static u64 perf_evsel__get_time(struct evsel *evsel, u32 cpu)
+static u64 evsel__get_time(struct evsel *evsel, u32 cpu)
 {
-	struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
+	struct evsel_runtime *r = evsel__get_runtime(evsel);
 
 	if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu))
 		return 0;
@@ -2004,8 +2003,8 @@ static void timehist_print_sample(struct perf_sched *sched,
 				  u64 t, int state)
 {
 	struct thread_runtime *tr = thread__priv(thread);
-	const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
-	const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	const char *next_comm = evsel__strval(evsel, sample, "next_comm");
+	const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
 	u32 max_cpus = sched->max_cpu + 1;
 	char tstr[64];
 	char nstr[30];
@@ -2136,8 +2135,8 @@ static bool is_idle_sample(struct perf_sample *sample,
 			   struct evsel *evsel)
 {
 	/* pid 0 == swapper == idle task */
-	if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0)
-		return perf_evsel__intval(evsel, sample, "prev_pid") == 0;
+	if (strcmp(evsel__name(evsel), "sched:sched_switch") == 0)
+		return evsel__intval(evsel, sample, "prev_pid") == 0;
 
 	return sample->pid == 0;
 }
@@ -2334,7 +2333,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched,
 			itr->last_thread = thread;
 
 			/* copy task callchain when entering to idle */
-			if (perf_evsel__intval(evsel, sample, "next_pid") == 0)
+			if (evsel__intval(evsel, sample, "next_pid") == 0)
 				save_idle_callchain(sched, itr, sample);
 		}
 	}
@@ -2355,10 +2354,10 @@ static bool timehist_skip_sample(struct perf_sched *sched,
 	}
 
 	if (sched->idle_hist) {
-		if (strcmp(perf_evsel__name(evsel), "sched:sched_switch"))
+		if (strcmp(evsel__name(evsel), "sched:sched_switch"))
 			rc = true;
-		else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 &&
-			 perf_evsel__intval(evsel, sample, "next_pid") != 0)
+		else if (evsel__intval(evsel, sample, "prev_pid") != 0 &&
+			 evsel__intval(evsel, sample, "next_pid") != 0)
 			rc = true;
 	}
 
@@ -2409,7 +2408,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool,
 	struct thread *thread;
 	struct thread_runtime *tr = NULL;
 	/* want pid of awakened task not pid in sample */
-	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	const u32 pid = evsel__intval(evsel, sample, "pid");
 
 	thread = machine__findnew_thread(machine, 0, pid);
 	if (thread == NULL)
@@ -2445,8 +2444,8 @@ static void timehist_print_migration_event(struct perf_sched *sched,
 		return;
 
 	max_cpus = sched->max_cpu + 1;
-	ocpu = perf_evsel__intval(evsel, sample, "orig_cpu");
-	dcpu = perf_evsel__intval(evsel, sample, "dest_cpu");
+	ocpu = evsel__intval(evsel, sample, "orig_cpu");
+	dcpu = evsel__intval(evsel, sample, "dest_cpu");
 
 	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
 	if (thread == NULL)
@@ -2493,7 +2492,7 @@ static int timehist_migrate_task_event(struct perf_tool *tool,
 	struct thread *thread;
 	struct thread_runtime *tr = NULL;
 	/* want pid of migrated task not pid in sample */
-	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	const u32 pid = evsel__intval(evsel, sample, "pid");
 
 	thread = machine__findnew_thread(machine, 0, pid);
 	if (thread == NULL)
@@ -2524,8 +2523,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
 	struct thread_runtime *tr = NULL;
 	u64 tprev, t = sample->time;
 	int rc = 0;
-	int state = perf_evsel__intval(evsel, sample, "prev_state");
-
+	int state = evsel__intval(evsel, sample, "prev_state");
 
 	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_err("problem processing %d event. skipping it\n",
@@ -2549,7 +2547,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
 		goto out;
 	}
 
-	tprev = perf_evsel__get_time(evsel, sample->cpu);
+	tprev = evsel__get_time(evsel, sample->cpu);
 
 	/*
 	 * If start time given:
@@ -2632,7 +2630,7 @@ out:
 		tr->ready_to_run = 0;
 	}
 
-	perf_evsel__save_time(evsel, sample->time, sample->cpu);
+	evsel__save_time(evsel, sample->time, sample->cpu);
 
 	return rc;
 }
@@ -2942,7 +2940,7 @@ static int timehist_check_attr(struct perf_sched *sched,
 	struct evsel_runtime *er;
 
 	list_for_each_entry(evsel, &evlist->core.entries, core.node) {
-		er = perf_evsel__get_runtime(evsel);
+		er = evsel__get_runtime(evsel);
 		if (er == NULL) {
 			pr_err("Failed to allocate memory for evsel runtime data\n");
 			return -1;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 1f57a7ecdf3d..56d7bcd12671 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -273,7 +273,7 @@ static struct evsel_script *perf_evsel_script__new(struct evsel *evsel,
 	struct evsel_script *es = zalloc(sizeof(*es));
 
 	if (es != NULL) {
-		if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
+		if (asprintf(&es->filename, "%s.%s.dump", data->file.path, evsel__name(evsel)) < 0)
 			goto out_free;
 		es->fp = fopen(es->filename, "w");
 		if (es->fp == NULL)
@@ -351,10 +351,8 @@ static const char *output_field2str(enum perf_output_field field)
 
 #define PRINT_FIELD(x)  (output[output_type(attr->type)].fields & PERF_OUTPUT_##x)
 
-static int perf_evsel__do_check_stype(struct evsel *evsel,
-				      u64 sample_type, const char *sample_msg,
-				      enum perf_output_field field,
-				      bool allow_user_set)
+static int evsel__do_check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg,
+				 enum perf_output_field field, bool allow_user_set)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 	int type = output_type(attr->type);
@@ -366,7 +364,7 @@ static int perf_evsel__do_check_stype(struct evsel *evsel,
 	if (output[type].user_set_fields & field) {
 		if (allow_user_set)
 			return 0;
-		evname = perf_evsel__name(evsel);
+		evname = evsel__name(evsel);
 		pr_err("Samples for '%s' event do not have %s attribute set. "
 		       "Cannot print '%s' field.\n",
 		       evname, sample_msg, output_field2str(field));
@@ -375,7 +373,7 @@ static int perf_evsel__do_check_stype(struct evsel *evsel,
 
 	/* user did not ask for it explicitly so remove from the default list */
 	output[type].fields &= ~field;
-	evname = perf_evsel__name(evsel);
+	evname = evsel__name(evsel);
 	pr_debug("Samples for '%s' event do not have %s attribute set. "
 		 "Skipping '%s' field.\n",
 		 evname, sample_msg, output_field2str(field));
@@ -383,16 +381,13 @@ static int perf_evsel__do_check_stype(struct evsel *evsel,
 	return 0;
 }
 
-static int perf_evsel__check_stype(struct evsel *evsel,
-				   u64 sample_type, const char *sample_msg,
-				   enum perf_output_field field)
+static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg,
+			      enum perf_output_field field)
 {
-	return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field,
-					  false);
+	return evsel__do_check_stype(evsel, sample_type, sample_msg, field, false);
 }
 
-static int perf_evsel__check_attr(struct evsel *evsel,
-				  struct perf_session *session)
+static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *session)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 	bool allow_user_set;
@@ -404,32 +399,28 @@ static int perf_evsel__check_attr(struct evsel *evsel,
 					       HEADER_AUXTRACE);
 
 	if (PRINT_FIELD(TRACE) &&
-		!perf_session__has_traces(session, "record -R"))
+	    !perf_session__has_traces(session, "record -R"))
 		return -EINVAL;
 
 	if (PRINT_FIELD(IP)) {
-		if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP",
-					    PERF_OUTPUT_IP))
+		if (evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP", PERF_OUTPUT_IP))
 			return -EINVAL;
 	}
 
 	if (PRINT_FIELD(ADDR) &&
-		perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
-					   PERF_OUTPUT_ADDR, allow_user_set))
+	    evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", PERF_OUTPUT_ADDR, allow_user_set))
 		return -EINVAL;
 
 	if (PRINT_FIELD(DATA_SRC) &&
-		perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC",
-					PERF_OUTPUT_DATA_SRC))
+	    evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC))
 		return -EINVAL;
 
 	if (PRINT_FIELD(WEIGHT) &&
-		perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT",
-					PERF_OUTPUT_WEIGHT))
+	    evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", PERF_OUTPUT_WEIGHT))
 		return -EINVAL;
 
 	if (PRINT_FIELD(SYM) &&
-		!(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
+	    !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
 		pr_err("Display of symbols requested but neither sample IP nor "
 			   "sample address\navailable. Hence, no addresses to convert "
 		       "to symbols.\n");
@@ -441,7 +432,7 @@ static int perf_evsel__check_attr(struct evsel *evsel,
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(DSO) &&
-		!(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
+	    !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
 		pr_err("Display of DSO requested but no address to convert.\n");
 		return -EINVAL;
 	}
@@ -458,33 +449,27 @@ static int perf_evsel__check_attr(struct evsel *evsel,
 		return -EINVAL;
 	}
 	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
-		perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
-					PERF_OUTPUT_TID|PERF_OUTPUT_PID))
+	    evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID))
 		return -EINVAL;
 
 	if (PRINT_FIELD(TIME) &&
-		perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME",
-					PERF_OUTPUT_TIME))
+	    evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME", PERF_OUTPUT_TIME))
 		return -EINVAL;
 
 	if (PRINT_FIELD(CPU) &&
-		perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
-					   PERF_OUTPUT_CPU, allow_user_set))
+	    evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU", PERF_OUTPUT_CPU, allow_user_set))
 		return -EINVAL;
 
 	if (PRINT_FIELD(IREGS) &&
-		perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
-					PERF_OUTPUT_IREGS))
+	    evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS))
 		return -EINVAL;
 
 	if (PRINT_FIELD(UREGS) &&
-		perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS",
-					PERF_OUTPUT_UREGS))
+	    evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS", PERF_OUTPUT_UREGS))
 		return -EINVAL;
 
 	if (PRINT_FIELD(PHYS_ADDR) &&
-		perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
-					PERF_OUTPUT_PHYS_ADDR))
+	    evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR))
 		return -EINVAL;
 
 	return 0;
@@ -604,8 +589,6 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
 		printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
 	}
 
-	fprintf(fp, "\n");
-
 	return printed;
 }
 
@@ -1697,6 +1680,7 @@ struct perf_script {
 	bool			show_cgroup_events;
 	bool			allocated;
 	bool			per_event_dump;
+	bool			stitch_lbr;
 	struct evswitch		evswitch;
 	struct perf_cpu_map	*cpus;
 	struct perf_thread_map *threads;
@@ -1713,7 +1697,7 @@ static int perf_evlist__max_name_len(struct evlist *evlist)
 	int max = 0;
 
 	evlist__for_each_entry(evlist, evsel) {
-		int len = strlen(perf_evsel__name(evsel));
+		int len = strlen(evsel__name(evsel));
 
 		max = MAX(len, max);
 	}
@@ -1887,7 +1871,7 @@ static void process_event(struct perf_script *script,
 		fprintf(fp, "%10" PRIu64 " ", sample->period);
 
 	if (PRINT_FIELD(EVNAME)) {
-		const char *evname = perf_evsel__name(evsel);
+		const char *evname = evsel__name(evsel);
 
 		if (!script->name_width)
 			script->name_width = perf_evlist__max_name_len(script->session->evlist);
@@ -1923,6 +1907,9 @@ static void process_event(struct perf_script *script,
 	if (PRINT_FIELD(IP)) {
 		struct callchain_cursor *cursor = NULL;
 
+		if (script->stitch_lbr)
+			al->thread->lbr_stitch_enable = true;
+
 		if (symbol_conf.use_callchain && sample->callchain &&
 		    thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
 					      sample, NULL, NULL, scripting_max_stack) == 0)
@@ -1946,7 +1933,7 @@ static void process_event(struct perf_script *script,
 	else if (PRINT_FIELD(BRSTACKOFF))
 		perf_sample__fprintf_brstackoff(sample, thread, attr, fp);
 
-	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
+	if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		perf_sample__fprintf_bpf_output(sample, fp);
 	perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
 
@@ -1975,7 +1962,7 @@ static struct scripting_ops	*scripting_ops;
 static void __process_stat(struct evsel *counter, u64 tstamp)
 {
 	int nthreads = perf_thread_map__nr(counter->core.threads);
-	int ncpus = perf_evsel__nr_cpus(counter);
+	int ncpus = evsel__nr_cpus(counter);
 	int cpu, thread;
 	static int header_printed;
 
@@ -2001,7 +1988,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp)
 				counts->ena,
 				counts->run,
 				tstamp,
-				perf_evsel__name(counter));
+				evsel__name(counter));
 		}
 	}
 }
@@ -2040,7 +2027,7 @@ static int cleanup_scripting(void)
 
 static bool filter_cpu(struct perf_sample *sample)
 {
-	if (cpu_list)
+	if (cpu_list && sample->cpu != (u32)-1)
 		return !test_bit(sample->cpu, cpu_bitmap);
 	return false;
 }
@@ -2138,41 +2125,59 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	return err;
 }
 
-static int process_comm_event(struct perf_tool *tool,
-			      union perf_event *event,
-			      struct perf_sample *sample,
-			      struct machine *machine)
+static int print_event_with_time(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_sample *sample,
+				 struct machine *machine,
+				 pid_t pid, pid_t tid, u64 timestamp)
 {
-	struct thread *thread;
 	struct perf_script *script = container_of(tool, struct perf_script, tool);
 	struct perf_session *session = script->session;
 	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-	int ret = -1;
+	struct thread *thread = NULL;
 
-	thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid);
-	if (thread == NULL) {
-		pr_debug("problem processing COMM event, skipping it.\n");
-		return -1;
+	if (evsel && !evsel->core.attr.sample_id_all) {
+		sample->cpu = 0;
+		sample->time = timestamp;
+		sample->pid = pid;
+		sample->tid = tid;
 	}
 
-	if (perf_event__process_comm(tool, event, sample, machine) < 0)
-		goto out;
+	if (filter_cpu(sample))
+		return 0;
 
-	if (!evsel->core.attr.sample_id_all) {
-		sample->cpu = 0;
-		sample->time = 0;
-		sample->tid = event->comm.tid;
-		sample->pid = event->comm.pid;
-	}
-	if (!filter_cpu(sample)) {
+	if (tid != -1)
+		thread = machine__findnew_thread(machine, pid, tid);
+
+	if (thread && evsel) {
 		perf_sample__fprintf_start(sample, thread, evsel,
-				   PERF_RECORD_COMM, stdout);
-		perf_event__fprintf(event, stdout);
+					   event->header.type, stdout);
 	}
-	ret = 0;
-out:
+
+	perf_event__fprintf(event, stdout);
+
 	thread__put(thread);
-	return ret;
+
+	return 0;
+}
+
+static int print_event(struct perf_tool *tool, union perf_event *event,
+		       struct perf_sample *sample, struct machine *machine,
+		       pid_t pid, pid_t tid)
+{
+	return print_event_with_time(tool, event, sample, machine, pid, tid, 0);
+}
+
+static int process_comm_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample,
+			      struct machine *machine)
+{
+	if (perf_event__process_comm(tool, event, sample, machine) < 0)
+		return -1;
+
+	return print_event(tool, event, sample, machine, event->comm.pid,
+			   event->comm.tid);
 }
 
 static int process_namespaces_event(struct perf_tool *tool,
@@ -2180,37 +2185,11 @@ static int process_namespaces_event(struct perf_tool *tool,
 				    struct perf_sample *sample,
 				    struct machine *machine)
 {
-	struct thread *thread;
-	struct perf_script *script = container_of(tool, struct perf_script, tool);
-	struct perf_session *session = script->session;
-	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-	int ret = -1;
-
-	thread = machine__findnew_thread(machine, event->namespaces.pid,
-					 event->namespaces.tid);
-	if (thread == NULL) {
-		pr_debug("problem processing NAMESPACES event, skipping it.\n");
-		return -1;
-	}
-
 	if (perf_event__process_namespaces(tool, event, sample, machine) < 0)
-		goto out;
+		return -1;
 
-	if (!evsel->core.attr.sample_id_all) {
-		sample->cpu = 0;
-		sample->time = 0;
-		sample->tid = event->namespaces.tid;
-		sample->pid = event->namespaces.pid;
-	}
-	if (!filter_cpu(sample)) {
-		perf_sample__fprintf_start(sample, thread, evsel,
-					   PERF_RECORD_NAMESPACES, stdout);
-		perf_event__fprintf(event, stdout);
-	}
-	ret = 0;
-out:
-	thread__put(thread);
-	return ret;
+	return print_event(tool, event, sample, machine, event->namespaces.pid,
+			   event->namespaces.tid);
 }
 
 static int process_cgroup_event(struct perf_tool *tool,
@@ -2218,34 +2197,11 @@ static int process_cgroup_event(struct perf_tool *tool,
 				struct perf_sample *sample,
 				struct machine *machine)
 {
-	struct thread *thread;
-	struct perf_script *script = container_of(tool, struct perf_script, tool);
-	struct perf_session *session = script->session;
-	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-	int ret = -1;
-
-	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
-	if (thread == NULL) {
-		pr_debug("problem processing CGROUP event, skipping it.\n");
-		return -1;
-	}
-
 	if (perf_event__process_cgroup(tool, event, sample, machine) < 0)
-		goto out;
+		return -1;
 
-	if (!evsel->core.attr.sample_id_all) {
-		sample->cpu = 0;
-		sample->time = 0;
-	}
-	if (!filter_cpu(sample)) {
-		perf_sample__fprintf_start(sample, thread, evsel,
-					   PERF_RECORD_CGROUP, stdout);
-		perf_event__fprintf(event, stdout);
-	}
-	ret = 0;
-out:
-	thread__put(thread);
-	return ret;
+	return print_event(tool, event, sample, machine, sample->pid,
+			    sample->tid);
 }
 
 static int process_fork_event(struct perf_tool *tool,
@@ -2253,69 +2209,24 @@ static int process_fork_event(struct perf_tool *tool,
 			      struct perf_sample *sample,
 			      struct machine *machine)
 {
-	struct thread *thread;
-	struct perf_script *script = container_of(tool, struct perf_script, tool);
-	struct perf_session *session = script->session;
-	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
 	if (perf_event__process_fork(tool, event, sample, machine) < 0)
 		return -1;
 
-	thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
-	if (thread == NULL) {
-		pr_debug("problem processing FORK event, skipping it.\n");
-		return -1;
-	}
-
-	if (!evsel->core.attr.sample_id_all) {
-		sample->cpu = 0;
-		sample->time = event->fork.time;
-		sample->tid = event->fork.tid;
-		sample->pid = event->fork.pid;
-	}
-	if (!filter_cpu(sample)) {
-		perf_sample__fprintf_start(sample, thread, evsel,
-					   PERF_RECORD_FORK, stdout);
-		perf_event__fprintf(event, stdout);
-	}
-	thread__put(thread);
-
-	return 0;
+	return print_event_with_time(tool, event, sample, machine,
+				     event->fork.pid, event->fork.tid,
+				     event->fork.time);
 }
 static int process_exit_event(struct perf_tool *tool,
 			      union perf_event *event,
 			      struct perf_sample *sample,
 			      struct machine *machine)
 {
-	int err = 0;
-	struct thread *thread;
-	struct perf_script *script = container_of(tool, struct perf_script, tool);
-	struct perf_session *session = script->session;
-	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
-	thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
-	if (thread == NULL) {
-		pr_debug("problem processing EXIT event, skipping it.\n");
+	/* Print before 'exit' deletes anything */
+	if (print_event_with_time(tool, event, sample, machine, event->fork.pid,
+				  event->fork.tid, event->fork.time))
 		return -1;
-	}
-
-	if (!evsel->core.attr.sample_id_all) {
-		sample->cpu = 0;
-		sample->time = 0;
-		sample->tid = event->fork.tid;
-		sample->pid = event->fork.pid;
-	}
-	if (!filter_cpu(sample)) {
-		perf_sample__fprintf_start(sample, thread, evsel,
-					   PERF_RECORD_EXIT, stdout);
-		perf_event__fprintf(event, stdout);
-	}
-
-	if (perf_event__process_exit(tool, event, sample, machine) < 0)
-		err = -1;
 
-	thread__put(thread);
-	return err;
+	return perf_event__process_exit(tool, event, sample, machine);
 }
 
 static int process_mmap_event(struct perf_tool *tool,
@@ -2323,33 +2234,11 @@ static int process_mmap_event(struct perf_tool *tool,
 			      struct perf_sample *sample,
 			      struct machine *machine)
 {
-	struct thread *thread;
-	struct perf_script *script = container_of(tool, struct perf_script, tool);
-	struct perf_session *session = script->session;
-	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
 	if (perf_event__process_mmap(tool, event, sample, machine) < 0)
 		return -1;
 
-	thread = machine__findnew_thread(machine, event->mmap.pid, event->mmap.tid);
-	if (thread == NULL) {
-		pr_debug("problem processing MMAP event, skipping it.\n");
-		return -1;
-	}
-
-	if (!evsel->core.attr.sample_id_all) {
-		sample->cpu = 0;
-		sample->time = 0;
-		sample->tid = event->mmap.tid;
-		sample->pid = event->mmap.pid;
-	}
-	if (!filter_cpu(sample)) {
-		perf_sample__fprintf_start(sample, thread, evsel,
-					   PERF_RECORD_MMAP, stdout);
-		perf_event__fprintf(event, stdout);
-	}
-	thread__put(thread);
-	return 0;
+	return print_event(tool, event, sample, machine, event->mmap.pid,
+			   event->mmap.tid);
 }
 
 static int process_mmap2_event(struct perf_tool *tool,
@@ -2357,33 +2246,11 @@ static int process_mmap2_event(struct perf_tool *tool,
 			      struct perf_sample *sample,
 			      struct machine *machine)
 {
-	struct thread *thread;
-	struct perf_script *script = container_of(tool, struct perf_script, tool);
-	struct perf_session *session = script->session;
-	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
 	if (perf_event__process_mmap2(tool, event, sample, machine) < 0)
 		return -1;
 
-	thread = machine__findnew_thread(machine, event->mmap2.pid, event->mmap2.tid);
-	if (thread == NULL) {
-		pr_debug("problem processing MMAP2 event, skipping it.\n");
-		return -1;
-	}
-
-	if (!evsel->core.attr.sample_id_all) {
-		sample->cpu = 0;
-		sample->time = 0;
-		sample->tid = event->mmap2.tid;
-		sample->pid = event->mmap2.pid;
-	}
-	if (!filter_cpu(sample)) {
-		perf_sample__fprintf_start(sample, thread, evsel,
-					   PERF_RECORD_MMAP2, stdout);
-		perf_event__fprintf(event, stdout);
-	}
-	thread__put(thread);
-	return 0;
+	return print_event(tool, event, sample, machine, event->mmap2.pid,
+			   event->mmap2.tid);
 }
 
 static int process_switch_event(struct perf_tool *tool,
@@ -2391,10 +2258,7 @@ static int process_switch_event(struct perf_tool *tool,
 				struct perf_sample *sample,
 				struct machine *machine)
 {
-	struct thread *thread;
 	struct perf_script *script = container_of(tool, struct perf_script, tool);
-	struct perf_session *session = script->session;
-	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
 
 	if (perf_event__process_switch(tool, event, sample, machine) < 0)
 		return -1;
@@ -2405,20 +2269,8 @@ static int process_switch_event(struct perf_tool *tool,
 	if (!script->show_switch_events)
 		return 0;
 
-	thread = machine__findnew_thread(machine, sample->pid,
-					 sample->tid);
-	if (thread == NULL) {
-		pr_debug("problem processing SWITCH event, skipping it.\n");
-		return -1;
-	}
-
-	if (!filter_cpu(sample)) {
-		perf_sample__fprintf_start(sample, thread, evsel,
-					   PERF_RECORD_SWITCH, stdout);
-		perf_event__fprintf(event, stdout);
-	}
-	thread__put(thread);
-	return 0;
+	return print_event(tool, event, sample, machine, sample->pid,
+			   sample->tid);
 }
 
 static int
@@ -2427,23 +2279,8 @@ process_lost_event(struct perf_tool *tool,
 		   struct perf_sample *sample,
 		   struct machine *machine)
 {
-	struct perf_script *script = container_of(tool, struct perf_script, tool);
-	struct perf_session *session = script->session;
-	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-	struct thread *thread;
-
-	thread = machine__findnew_thread(machine, sample->pid,
-					 sample->tid);
-	if (thread == NULL)
-		return -1;
-
-	if (!filter_cpu(sample)) {
-		perf_sample__fprintf_start(sample, thread, evsel,
-					   PERF_RECORD_LOST, stdout);
-		perf_event__fprintf(event, stdout);
-	}
-	thread__put(thread);
-	return 0;
+	return print_event(tool, event, sample, machine, sample->pid,
+			   sample->tid);
 }
 
 static int
@@ -2462,33 +2299,11 @@ process_bpf_events(struct perf_tool *tool __maybe_unused,
 		   struct perf_sample *sample,
 		   struct machine *machine)
 {
-	struct thread *thread;
-	struct perf_script *script = container_of(tool, struct perf_script, tool);
-	struct perf_session *session = script->session;
-	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
 	if (machine__process_ksymbol(machine, event, sample) < 0)
 		return -1;
 
-	if (!evsel->core.attr.sample_id_all) {
-		perf_event__fprintf(event, stdout);
-		return 0;
-	}
-
-	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
-	if (thread == NULL) {
-		pr_debug("problem processing MMAP event, skipping it.\n");
-		return -1;
-	}
-
-	if (!filter_cpu(sample)) {
-		perf_sample__fprintf_start(sample, thread, evsel,
-					   event->header.type, stdout);
-		perf_event__fprintf(event, stdout);
-	}
-
-	thread__put(thread);
-	return 0;
+	return print_event(tool, event, sample, machine, sample->pid,
+			   sample->tid);
 }
 
 static void sig_handler(int sig __maybe_unused)
@@ -3145,7 +2960,7 @@ static int check_ev_match(char *dir_name, char *scriptname,
 
 			match = 0;
 			evlist__for_each_entry(session->evlist, pos) {
-				if (!strcmp(perf_evsel__name(pos), evname)) {
+				if (!strcmp(evsel__name(pos), evname)) {
 					match = 1;
 					break;
 				}
@@ -3342,6 +3157,12 @@ static void script__setup_sample_type(struct perf_script *script)
 		else
 			callchain_param.record_mode = CALLCHAIN_FP;
 	}
+
+	if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
+		pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+			   "Please apply --call-graph lbr when recording.\n");
+		script->stitch_lbr = false;
+	}
 }
 
 static int process_stat_round_event(struct perf_session *session,
@@ -3653,6 +3474,8 @@ int cmd_script(int argc, const char **argv)
 		   "file", "file saving guest os /proc/kallsyms"),
 	OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
 		   "file", "file saving guest os /proc/modules"),
+	OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
+		    "Enable LBR callgraph stitching approach"),
 	OPTS_EVSWITCH(&script.evswitch),
 	OPT_END()
 	};
@@ -3709,7 +3532,7 @@ int cmd_script(int argc, const char **argv)
 		return -1;
 	}
 
-	if (itrace_synth_opts.callchain &&
+	if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) &&
 	    itrace_synth_opts.callchain_sz > scripting_max_stack)
 		scripting_max_stack = itrace_synth_opts.callchain_sz;
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9207b6c45475..e0c1ad23c768 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -238,9 +238,8 @@ static int write_stat_round_event(u64 tm, u64 type)
 
 #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
 
-static int
-perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
-			     struct perf_counts_values *count)
+static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
+				   struct perf_counts_values *count)
 {
 	struct perf_sample_id *sid = SID(counter, cpu, thread);
 
@@ -259,7 +258,7 @@ static int read_single_counter(struct evsel *counter, int cpu,
 		count->val = val;
 		return 0;
 	}
-	return perf_evsel__read_counter(counter, cpu, thread);
+	return evsel__read_counter(counter, cpu, thread);
 }
 
 /*
@@ -284,7 +283,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
 
 		/*
 		 * The leader's group read loads data into its group members
-		 * (via perf_evsel__read_counter()) and sets their count->loaded.
+		 * (via evsel__read_counter()) and sets their count->loaded.
 		 */
 		if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
 		    read_single_counter(counter, cpu, thread, rs)) {
@@ -297,7 +296,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
 		perf_counts__set_loaded(counter->counts, cpu, thread, false);
 
 		if (STAT_RECORD) {
-			if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+			if (evsel__write_stat_event(counter, cpu, thread, count)) {
 				pr_err("failed to write stat event\n");
 				return -1;
 			}
@@ -306,7 +305,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
 		if (verbose > 1) {
 			fprintf(stat_config.output,
 				"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-					perf_evsel__name(counter),
+					evsel__name(counter),
 					cpu,
 					count->val, count->ena, count->run);
 		}
@@ -359,6 +358,7 @@ static void process_interval(void)
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	diff_timespec(&rs, &ts, &ref_time);
 
+	perf_stat__reset_shadow_per_stat(&rt_stat);
 	read_counters(&rs);
 
 	if (STAT_RECORD) {
@@ -409,7 +409,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
 	workload_exec_errno = info->si_value.sival_int;
 }
 
-static bool perf_evsel__should_store_id(struct evsel *counter)
+static bool evsel__should_store_id(struct evsel *counter)
 {
 	return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
 }
@@ -454,7 +454,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
 	    errno == ENXIO) {
 		if (verbose > 0)
 			ui__warning("%s event is not supported by the kernel.\n",
-				    perf_evsel__name(counter));
+				    evsel__name(counter));
 		counter->supported = false;
 		/*
 		 * errored is a sticky flag that means one of the counter's
@@ -465,7 +465,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
 		if ((counter->leader != counter) ||
 		    !(counter->leader->core.nr_members > 1))
 			return COUNTER_SKIP;
-	} else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+	} else if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
 		if (verbose > 0)
 			ui__warning("%s\n", msg);
 		return COUNTER_RETRY;
@@ -483,8 +483,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
 		}
 	}
 
-	perf_evsel__open_strerror(counter, &target,
-				  errno, msg, sizeof(msg));
+	evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
 	ui__error("%s\n", msg);
 
 	if (child_pid != -1)
@@ -604,7 +603,7 @@ try_again:
 				if (!counter->reset_group)
 					continue;
 try_again_reset:
-				pr_debug2("reopening weak %s\n", perf_evsel__name(counter));
+				pr_debug2("reopening weak %s\n", evsel__name(counter));
 				if (create_perf_stat_counter(counter, &stat_config, &target,
 							     counter->cpu_iter - 1) < 0) {
 
@@ -635,14 +634,14 @@ try_again_reset:
 		if (l > stat_config.unit_width)
 			stat_config.unit_width = l;
 
-		if (perf_evsel__should_store_id(counter) &&
-		    perf_evsel__store_ids(counter, evsel_list))
+		if (evsel__should_store_id(counter) &&
+		    evsel__store_ids(counter, evsel_list))
 			return -1;
 	}
 
 	if (perf_evlist__apply_filters(evsel_list, &counter)) {
 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
-			counter->filter, perf_evsel__name(counter), errno,
+			counter->filter, evsel__name(counter), errno,
 			str_error_r(errno, msg, sizeof(msg)));
 		return -1;
 	}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 9e84fae9b096..c76f84b174c4 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -579,8 +579,8 @@ process_sample_cpu_idle(struct timechart *tchart __maybe_unused,
 			struct perf_sample *sample,
 			const char *backtrace __maybe_unused)
 {
-	u32 state = perf_evsel__intval(evsel, sample, "state");
-	u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+	u32 state  = evsel__intval(evsel, sample, "state");
+	u32 cpu_id = evsel__intval(evsel, sample, "cpu_id");
 
 	if (state == (u32)PWR_EVENT_EXIT)
 		c_state_end(tchart, cpu_id, sample->time);
@@ -595,8 +595,8 @@ process_sample_cpu_frequency(struct timechart *tchart,
 			     struct perf_sample *sample,
 			     const char *backtrace __maybe_unused)
 {
-	u32 state = perf_evsel__intval(evsel, sample, "state");
-	u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+	u32 state  = evsel__intval(evsel, sample, "state");
+	u32 cpu_id = evsel__intval(evsel, sample, "cpu_id");
 
 	p_state_change(tchart, cpu_id, sample->time, state);
 	return 0;
@@ -608,9 +608,9 @@ process_sample_sched_wakeup(struct timechart *tchart,
 			    struct perf_sample *sample,
 			    const char *backtrace)
 {
-	u8 flags = perf_evsel__intval(evsel, sample, "common_flags");
-	int waker = perf_evsel__intval(evsel, sample, "common_pid");
-	int wakee = perf_evsel__intval(evsel, sample, "pid");
+	u8 flags  = evsel__intval(evsel, sample, "common_flags");
+	int waker = evsel__intval(evsel, sample, "common_pid");
+	int wakee = evsel__intval(evsel, sample, "pid");
 
 	sched_wakeup(tchart, sample->cpu, sample->time, waker, wakee, flags, backtrace);
 	return 0;
@@ -622,9 +622,9 @@ process_sample_sched_switch(struct timechart *tchart,
 			    struct perf_sample *sample,
 			    const char *backtrace)
 {
-	int prev_pid = perf_evsel__intval(evsel, sample, "prev_pid");
-	int next_pid = perf_evsel__intval(evsel, sample, "next_pid");
-	u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+	int prev_pid   = evsel__intval(evsel, sample, "prev_pid");
+	int next_pid   = evsel__intval(evsel, sample, "next_pid");
+	u64 prev_state = evsel__intval(evsel, sample, "prev_state");
 
 	sched_switch(tchart, sample->cpu, sample->time, prev_pid, next_pid,
 		     prev_state, backtrace);
@@ -638,8 +638,8 @@ process_sample_power_start(struct timechart *tchart __maybe_unused,
 			   struct perf_sample *sample,
 			   const char *backtrace __maybe_unused)
 {
-	u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
-	u64 value = perf_evsel__intval(evsel, sample, "value");
+	u64 cpu_id = evsel__intval(evsel, sample, "cpu_id");
+	u64 value  = evsel__intval(evsel, sample, "value");
 
 	c_state_start(cpu_id, sample->time, value);
 	return 0;
@@ -661,8 +661,8 @@ process_sample_power_frequency(struct timechart *tchart,
 			       struct perf_sample *sample,
 			       const char *backtrace __maybe_unused)
 {
-	u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
-	u64 value = perf_evsel__intval(evsel, sample, "value");
+	u64 cpu_id = evsel__intval(evsel, sample, "cpu_id");
+	u64 value  = evsel__intval(evsel, sample, "value");
 
 	p_state_change(tchart, cpu_id, sample->time, value);
 	return 0;
@@ -843,7 +843,7 @@ process_enter_read(struct timechart *tchart,
 		   struct evsel *evsel,
 		   struct perf_sample *sample)
 {
-	long fd = perf_evsel__intval(evsel, sample, "fd");
+	long fd = evsel__intval(evsel, sample, "fd");
 	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_READ,
 				   sample->time, fd);
 }
@@ -853,7 +853,7 @@ process_exit_read(struct timechart *tchart,
 		  struct evsel *evsel,
 		  struct perf_sample *sample)
 {
-	long ret = perf_evsel__intval(evsel, sample, "ret");
+	long ret = evsel__intval(evsel, sample, "ret");
 	return pid_end_io_sample(tchart, sample->tid, IOTYPE_READ,
 				 sample->time, ret);
 }
@@ -863,7 +863,7 @@ process_enter_write(struct timechart *tchart,
 		    struct evsel *evsel,
 		    struct perf_sample *sample)
 {
-	long fd = perf_evsel__intval(evsel, sample, "fd");
+	long fd = evsel__intval(evsel, sample, "fd");
 	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_WRITE,
 				   sample->time, fd);
 }
@@ -873,7 +873,7 @@ process_exit_write(struct timechart *tchart,
 		   struct evsel *evsel,
 		   struct perf_sample *sample)
 {
-	long ret = perf_evsel__intval(evsel, sample, "ret");
+	long ret = evsel__intval(evsel, sample, "ret");
 	return pid_end_io_sample(tchart, sample->tid, IOTYPE_WRITE,
 				 sample->time, ret);
 }
@@ -883,7 +883,7 @@ process_enter_sync(struct timechart *tchart,
 		   struct evsel *evsel,
 		   struct perf_sample *sample)
 {
-	long fd = perf_evsel__intval(evsel, sample, "fd");
+	long fd = evsel__intval(evsel, sample, "fd");
 	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_SYNC,
 				   sample->time, fd);
 }
@@ -893,7 +893,7 @@ process_exit_sync(struct timechart *tchart,
 		  struct evsel *evsel,
 		  struct perf_sample *sample)
 {
-	long ret = perf_evsel__intval(evsel, sample, "ret");
+	long ret = evsel__intval(evsel, sample, "ret");
 	return pid_end_io_sample(tchart, sample->tid, IOTYPE_SYNC,
 				 sample->time, ret);
 }
@@ -903,7 +903,7 @@ process_enter_tx(struct timechart *tchart,
 		 struct evsel *evsel,
 		 struct perf_sample *sample)
 {
-	long fd = perf_evsel__intval(evsel, sample, "fd");
+	long fd = evsel__intval(evsel, sample, "fd");
 	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_TX,
 				   sample->time, fd);
 }
@@ -913,7 +913,7 @@ process_exit_tx(struct timechart *tchart,
 		struct evsel *evsel,
 		struct perf_sample *sample)
 {
-	long ret = perf_evsel__intval(evsel, sample, "ret");
+	long ret = evsel__intval(evsel, sample, "ret");
 	return pid_end_io_sample(tchart, sample->tid, IOTYPE_TX,
 				 sample->time, ret);
 }
@@ -923,7 +923,7 @@ process_enter_rx(struct timechart *tchart,
 		 struct evsel *evsel,
 		 struct perf_sample *sample)
 {
-	long fd = perf_evsel__intval(evsel, sample, "fd");
+	long fd = evsel__intval(evsel, sample, "fd");
 	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_RX,
 				   sample->time, fd);
 }
@@ -933,7 +933,7 @@ process_exit_rx(struct timechart *tchart,
 		struct evsel *evsel,
 		struct perf_sample *sample)
 {
-	long ret = perf_evsel__intval(evsel, sample, "ret");
+	long ret = evsel__intval(evsel, sample, "ret");
 	return pid_end_io_sample(tchart, sample->tid, IOTYPE_RX,
 				 sample->time, ret);
 }
@@ -943,7 +943,7 @@ process_enter_poll(struct timechart *tchart,
 		   struct evsel *evsel,
 		   struct perf_sample *sample)
 {
-	long fd = perf_evsel__intval(evsel, sample, "fd");
+	long fd = evsel__intval(evsel, sample, "fd");
 	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_POLL,
 				   sample->time, fd);
 }
@@ -953,7 +953,7 @@ process_exit_poll(struct timechart *tchart,
 		  struct evsel *evsel,
 		  struct perf_sample *sample)
 {
-	long ret = perf_evsel__intval(evsel, sample, "ret");
+	long ret = evsel__intval(evsel, sample, "ret");
 	return pid_end_io_sample(tchart, sample->tid, IOTYPE_POLL,
 				 sample->time, ret);
 }
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 289cf83e658a..372c38254654 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -33,6 +33,7 @@
 #include "util/map.h"
 #include "util/mmap.h"
 #include "util/session.h"
+#include "util/thread.h"
 #include "util/symbol.h"
 #include "util/synthetic-events.h"
 #include "util/top.h"
@@ -254,7 +255,7 @@ static void perf_top__show_details(struct perf_top *top)
 	if (notes->src == NULL)
 		goto out_unlock;
 
-	printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
+	printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name);
 	printf("  Events  Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt);
 
 	more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts);
@@ -297,8 +298,7 @@ static void perf_top__resort_hists(struct perf_top *t)
 		hists__collapse_resort(hists, NULL);
 
 		/* Non-group events are considered as leader */
-		if (symbol_conf.event_group &&
-		    !perf_evsel__is_group_leader(pos)) {
+		if (symbol_conf.event_group && !evsel__is_group_leader(pos)) {
 			struct hists *leader_hists = evsel__hists(pos->leader);
 
 			hists__match(leader_hists, hists);
@@ -441,7 +441,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top)
 	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
 
 	if (top->evlist->core.nr_entries > 1)
-		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", perf_evsel__name(top->sym_evsel));
+		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", evsel__name(top->sym_evsel));
 
 	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
 
@@ -528,13 +528,13 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 				fprintf(stderr, "\nAvailable events:");
 
 				evlist__for_each_entry(top->evlist, top->sym_evsel)
-					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
+					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, evsel__name(top->sym_evsel));
 
 				prompt_integer(&counter, "Enter details event counter");
 
 				if (counter >= top->evlist->core.nr_entries) {
 					top->sym_evsel = evlist__first(top->evlist);
-					fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
+					fprintf(stderr, "Sorry, no such event, using %s.\n", evsel__name(top->sym_evsel));
 					sleep(1);
 					break;
 				}
@@ -775,6 +775,9 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	if (machine__resolve(machine, &al, sample) < 0)
 		return;
 
+	if (top->stitch_lbr)
+		al.thread->lbr_stitch_enable = true;
+
 	if (!machine->kptr_restrict_warned &&
 	    symbol_conf.kptr_restrict &&
 	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
@@ -1042,14 +1045,13 @@ try_again:
 			    perf_top_overwrite_fallback(top, counter))
 				goto try_again;
 
-			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+			if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
 				if (verbose > 0)
 					ui__warning("%s\n", msg);
 				goto try_again;
 			}
 
-			perf_evsel__open_strerror(counter, &opts->target,
-						  errno, msg, sizeof(msg));
+			evsel__open_strerror(counter, &opts->target, errno, msg, sizeof(msg));
 			ui__error("%s\n", msg);
 			goto out_err;
 		}
@@ -1571,10 +1573,11 @@ int cmd_top(int argc, const char **argv)
 		    "Sort the output by the event at the index n in group. "
 		    "If n is invalid, sort by the first event. "
 		    "WARNING: should be used on grouped events."),
+	OPT_BOOLEAN(0, "stitch-lbr", &top.stitch_lbr,
+		    "Enable LBR callgraph stitching approach"),
 	OPTS_EVSWITCH(&top.evswitch),
 	OPT_END()
 	};
-	struct evlist *sb_evlist = NULL;
 	const char * const top_usage[] = {
 		"perf top [<options>]",
 		NULL
@@ -1640,6 +1643,11 @@ int cmd_top(int argc, const char **argv)
 		}
 	}
 
+	if (top.stitch_lbr && !(callchain_param.record_mode == CALLCHAIN_LBR)) {
+		pr_err("Error: --stitch-lbr must be used with --call-graph lbr\n");
+		goto out_delete_evlist;
+	}
+
 	if (opts->branch_stack && callchain_param.enabled)
 		symbol_conf.show_branchflag_count = true;
 
@@ -1732,10 +1740,21 @@ int cmd_top(int argc, const char **argv)
 		goto out_delete_evlist;
 	}
 
-	if (!top.record_opts.no_bpf_event)
-		bpf_event__add_sb_event(&sb_evlist, &perf_env);
+	if (!top.record_opts.no_bpf_event) {
+		top.sb_evlist = evlist__new();
+
+		if (top.sb_evlist == NULL) {
+			pr_err("Couldn't create side band evlist.\n.");
+			goto out_delete_evlist;
+		}
+
+		if (evlist__add_bpf_sb_event(top.sb_evlist, &perf_env)) {
+			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
+			goto out_delete_evlist;
+		}
+	}
 
-	if (perf_evlist__start_sb_thread(sb_evlist, target)) {
+	if (perf_evlist__start_sb_thread(top.sb_evlist, target)) {
 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
 		opts->no_bpf_event = true;
 	}
@@ -1743,7 +1762,7 @@ int cmd_top(int argc, const char **argv)
 	status = __cmd_top(&top);
 
 	if (!opts->no_bpf_event)
-		perf_evlist__stop_sb_thread(sb_evlist);
+		perf_evlist__stop_sb_thread(top.sb_evlist);
 
 out_delete_evlist:
 	evlist__delete(top.evlist);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 01d542007c8b..a46efb907bd4 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -366,11 +366,9 @@ out_delete:
 	return NULL;
 }
 
-static int perf_evsel__init_tp_uint_field(struct evsel *evsel,
-					  struct tp_field *field,
-					  const char *name)
+static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
 {
-	struct tep_format_field *format_field = perf_evsel__field(evsel, name);
+	struct tep_format_field *format_field = evsel__field(evsel, name);
 
 	if (format_field == NULL)
 		return -1;
@@ -380,13 +378,11 @@ static int perf_evsel__init_tp_uint_field(struct evsel *evsel,
 
 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 	({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
-	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
+	   evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 
-static int perf_evsel__init_tp_ptr_field(struct evsel *evsel,
-					 struct tp_field *field,
-					 const char *name)
+static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
 {
-	struct tep_format_field *format_field = perf_evsel__field(evsel, name);
+	struct tep_format_field *format_field = evsel__field(evsel, name);
 
 	if (format_field == NULL)
 		return -1;
@@ -396,7 +392,7 @@ static int perf_evsel__init_tp_ptr_field(struct evsel *evsel,
 
 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 	({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
-	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
+	   evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 
 static void evsel__delete_priv(struct evsel *evsel)
 {
@@ -404,13 +400,13 @@ static void evsel__delete_priv(struct evsel *evsel)
 	evsel__delete(evsel);
 }
 
-static int perf_evsel__init_syscall_tp(struct evsel *evsel)
+static int evsel__init_syscall_tp(struct evsel *evsel)
 {
 	struct syscall_tp *sc = evsel__syscall_tp(evsel);
 
 	if (sc != NULL) {
-		if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
-		    perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
+		if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
+		    evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
 			return -ENOENT;
 		return 0;
 	}
@@ -418,14 +414,14 @@ static int perf_evsel__init_syscall_tp(struct evsel *evsel)
 	return -ENOMEM;
 }
 
-static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
+static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
 {
 	struct syscall_tp *sc = evsel__syscall_tp(evsel);
 
 	if (sc != NULL) {
-		struct tep_format_field *syscall_id = perf_evsel__field(tp, "id");
+		struct tep_format_field *syscall_id = evsel__field(tp, "id");
 		if (syscall_id == NULL)
-			syscall_id = perf_evsel__field(tp, "__syscall_nr");
+			syscall_id = evsel__field(tp, "__syscall_nr");
 		if (syscall_id == NULL ||
 		    __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
 			return -EINVAL;
@@ -436,21 +432,21 @@ static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evs
 	return -ENOMEM;
 }
 
-static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
+static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
 {
 	struct syscall_tp *sc = __evsel__syscall_tp(evsel);
 
 	return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
 }
 
-static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
+static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
 {
 	struct syscall_tp *sc = __evsel__syscall_tp(evsel);
 
 	return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
 }
 
-static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
+static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
 {
 	if (evsel__syscall_tp(evsel) != NULL) {
 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
@@ -474,7 +470,7 @@ static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *
 	if (IS_ERR(evsel))
 		return NULL;
 
-	if (perf_evsel__init_raw_syscall_tp(evsel, handler))
+	if (evsel__init_raw_syscall_tp(evsel, handler))
 		goto out_delete;
 
 	return evsel;
@@ -1801,7 +1797,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 	return syscall__set_arg_fmts(sc);
 }
 
-static int perf_evsel__init_tp_arg_scnprintf(struct evsel *evsel)
+static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
 {
 	struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
 
@@ -2074,7 +2070,7 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 		if (verbose > 1) {
 			static u64 n;
 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
-				id, perf_evsel__name(evsel), ++n);
+				id, evsel__name(evsel), ++n);
 		}
 		return NULL;
 	}
@@ -2206,7 +2202,7 @@ static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
 		double ts = (double)sample->time / NSEC_PER_MSEC;
 
 		printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
-				   perf_evsel__name(evsel), ts,
+				   evsel__name(evsel), ts,
 				   thread__comm_str(thread),
 				   sample->pid, sample->tid, sample->cpu);
 	}
@@ -2382,7 +2378,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam
 
 static const char *errno_to_name(struct evsel *evsel, int err)
 {
-	struct perf_env *env = perf_evsel__env(evsel);
+	struct perf_env *env = evsel__env(evsel);
 	const char *arch_name = perf_env__arch(env);
 
 	return arch_syscalls__strerrno(arch_name, err);
@@ -2513,7 +2509,7 @@ errno_print: {
 	if (callchain_ret > 0)
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
-		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+		pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
 out:
 	ttrace->entry_pending = false;
 	err = 0;
@@ -2531,7 +2527,7 @@ static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
 	size_t filename_len, entry_str_len, to_move;
 	ssize_t remaining_space;
 	char *pos;
-	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
+	const char *filename = evsel__rawptr(evsel, sample, "pathname");
 
 	if (!thread)
 		goto out;
@@ -2587,7 +2583,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
 				     union perf_event *event __maybe_unused,
 				     struct perf_sample *sample)
 {
-        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
+        u64 runtime = evsel__intval(evsel, sample, "runtime");
 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
 	struct thread *thread = machine__findnew_thread(trace->host,
 							sample->pid,
@@ -2606,10 +2602,10 @@ out_put:
 out_dump:
 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
 	       evsel->name,
-	       perf_evsel__strval(evsel, sample, "comm"),
-	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
+	       evsel__strval(evsel, sample, "comm"),
+	       (pid_t)evsel__intval(evsel, sample, "pid"),
 	       runtime,
-	       perf_evsel__intval(evsel, sample, "vruntime"));
+	       evsel__intval(evsel, sample, "vruntime"));
 	goto out_put;
 }
 
@@ -2774,7 +2770,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
 
 	fprintf(trace->output, "%s(", evsel->name);
 
-	if (perf_evsel__is_bpf_output(evsel)) {
+	if (evsel__is_bpf_output(evsel)) {
 		bpf_output__fprintf(trace, sample);
 	} else if (evsel->tp_format) {
 		if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
@@ -2795,7 +2791,7 @@ newline:
 	if (callchain_ret > 0)
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
-		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+		pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
 
 	++trace->nr_events_printed;
 
@@ -2890,7 +2886,7 @@ static int trace__pgfault(struct trace *trace,
 	if (callchain_ret > 0)
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
-		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+		pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
 
 	++trace->nr_events_printed;
 out:
@@ -3032,10 +3028,10 @@ static bool evlist__add_vfs_getname(struct evlist *evlist)
 	}
 
 	evlist__for_each_entry_safe(evlist, evsel, tmp) {
-		if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
+		if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
 			continue;
 
-		if (perf_evsel__field(evsel, "pathname")) {
+		if (evsel__field(evsel, "pathname")) {
 			evsel->handler = trace__vfs_getname;
 			found = true;
 			continue;
@@ -3093,7 +3089,7 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
 	if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
 	    sample->raw_data == NULL) {
 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
-		       perf_evsel__name(evsel), sample->tid,
+		       evsel__name(evsel), sample->tid,
 		       sample->cpu, sample->raw_size);
 	} else {
 		tracepoint_handler handler = evsel->handler;
@@ -3124,8 +3120,8 @@ static int trace__add_syscall_newtp(struct trace *trace)
 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
 		goto out_delete_sys_exit;
 
-	perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
-	perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
+	evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
+	evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
 
 	evlist__add(evlist, sys_enter);
 	evlist__add(evlist, sys_exit);
@@ -3164,10 +3160,9 @@ static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
 	if (filter == NULL)
 		goto out_enomem;
 
-	if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
-					  filter)) {
+	if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) {
 		sys_exit = trace->syscalls.events.sys_exit;
-		err = perf_evsel__append_tp_filter(sys_exit, filter);
+		err = evsel__append_tp_filter(sys_exit, filter);
 	}
 
 	free(filter);
@@ -3695,7 +3690,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
 	return __trace__deliver_event(trace, event->event);
 }
 
-static struct syscall_arg_fmt *perf_evsel__syscall_arg_fmt(struct evsel *evsel, char *arg)
+static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
 {
 	struct tep_format_field *field;
 	struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
@@ -3750,7 +3745,7 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel
 
 			scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
 
-			fmt = perf_evsel__syscall_arg_fmt(evsel, arg);
+			fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
 			if (fmt == NULL) {
 				pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
 				       arg, evsel->name, evsel->filter);
@@ -3801,7 +3796,7 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel
 
 	if (new_filter != evsel->filter) {
 		pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
-		perf_evsel__set_filter(evsel, new_filter);
+		evsel__set_filter(evsel, new_filter);
 		free(new_filter);
 	}
 
@@ -3849,7 +3844,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
 		if (pgfault_maj == NULL)
 			goto out_error_mem;
-		perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
+		evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
 		evlist__add(evlist, pgfault_maj);
 	}
 
@@ -3857,7 +3852,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
 		if (pgfault_min == NULL)
 			goto out_error_mem;
-		perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
+		evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
 		evlist__add(evlist, pgfault_min);
 	}
 
@@ -4108,7 +4103,7 @@ out_error:
 out_error_apply_filters:
 	fprintf(trace->output,
 		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
-		evsel->filter, perf_evsel__name(evsel), errno,
+		evsel->filter, evsel__name(evsel), errno,
 		str_error_r(errno, errbuf, sizeof(errbuf)));
 	goto out_delete_evlist;
 }
@@ -4179,7 +4174,7 @@ static int trace__replay(struct trace *trace)
 							     "syscalls:sys_enter");
 
 	if (evsel &&
-	    (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
+	    (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
 		goto out;
@@ -4191,7 +4186,7 @@ static int trace__replay(struct trace *trace)
 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
 							     "syscalls:sys_exit");
 	if (evsel &&
-	    (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
+	    (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
 		goto out;
@@ -4471,11 +4466,11 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist)
 			continue;
 
 		if (strcmp(evsel->tp_format->system, "syscalls")) {
-			perf_evsel__init_tp_arg_scnprintf(evsel);
+			evsel__init_tp_arg_scnprintf(evsel);
 			continue;
 		}
 
-		if (perf_evsel__init_syscall_tp(evsel))
+		if (evsel__init_syscall_tp(evsel))
 			return -1;
 
 		if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
@@ -4989,7 +4984,7 @@ int cmd_trace(int argc, const char **argv)
 	 */
 	if (trace.syscalls.events.augmented) {
 		evlist__for_each_entry(trace.evlist, evsel) {
-			bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
+			bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
 
 			if (raw_syscalls_sys_exit) {
 				trace.raw_augmented_syscalls = true;
@@ -4997,10 +4992,10 @@ int cmd_trace(int argc, const char **argv)
 			}
 
 			if (trace.syscalls.events.augmented->priv == NULL &&
-			    strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) {
+			    strstr(evsel__name(evsel), "syscalls:sys_enter")) {
 				struct evsel *augmented = trace.syscalls.events.augmented;
-				if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
-				    perf_evsel__init_augmented_syscall_tp_args(augmented))
+				if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
+				    evsel__init_augmented_syscall_tp_args(augmented))
 					goto out;
 				/*
 				 * Augmented is __augmented_syscalls__ BPF_OUTPUT event
@@ -5014,16 +5009,16 @@ int cmd_trace(int argc, const char **argv)
 				 * as not to filter it, then we'll handle it just like we would
 				 * for the BPF_OUTPUT one:
 				 */
-				if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) ||
-				    perf_evsel__init_augmented_syscall_tp_args(evsel))
+				if (evsel__init_augmented_syscall_tp(evsel, evsel) ||
+				    evsel__init_augmented_syscall_tp_args(evsel))
 					goto out;
 				evsel->handler = trace__sys_enter;
 			}
 
-			if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
+			if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) {
 				struct syscall_tp *sc;
 init_augmented_syscall_tp:
-				if (perf_evsel__init_augmented_syscall_tp(evsel, evsel))
+				if (evsel__init_augmented_syscall_tp(evsel, evsel))
 					goto out;
 				sc = __evsel__syscall_tp(evsel);
 				/*
@@ -5047,7 +5042,7 @@ init_augmented_syscall_tp:
 				 */
 				if (trace.raw_augmented_syscalls)
 					trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
-				perf_evsel__init_augmented_syscall_tp_ret(evsel);
+				evsel__init_augmented_syscall_tp_ret(evsel);
 				evsel->handler = trace__sys_exit;
 			}
 		}
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 0453ba26cdbd..a42fab308ff6 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -258,7 +258,8 @@ gets schedule to. Per task counters can be created by any user, for
 their own tasks.
 
 A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
-all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+all events on CPU-x. Per CPU counters need CAP_PERFMON or CAP_SYS_ADMIN
+privilege.
 
 The 'flags' parameter is currently unused and must be zero.
 
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
new file mode 100644
index 000000000000..c121e526442a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
@@ -0,0 +1,19 @@
+[
+    {
+        "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)",
+        "MetricName": "Memory_RD_BW_Chip",
+        "MetricGroup": "Memory_BW",
+        "ScaleUnit": "1.6e-2MB"
+    },
+    {
+	"MetricExpr": "(hv_24x7@PM_MCS01_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_WR_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT23\\,chip\\=?@ )",
+        "MetricName": "Memory_WR_BW_Chip",
+        "MetricGroup": "Memory_BW",
+        "ScaleUnit": "1.6e-2MB"
+    },
+    {
+	"MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )",
+        "MetricName": "PowerBUS_Frequency",
+        "ScaleUnit": "2.5e-7GHz"
+    }
+]
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index 53e76d5d5b37..c8f306b572f4 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -26,7 +26,7 @@ struct pmu_event {
  * Map a CPU to its table of PMU events. The CPU is identified by the
  * cpuid field, which is an arch-specific identifier for the CPU.
  * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
- * must match the get_cpustr() in tools/perf/arch/xxx/util/header.c)
+ * must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
  *
  * The  cpuid can contain any character other than the comma.
  */
diff --git a/tools/perf/scripts/python/bin/flamegraph-record b/tools/perf/scripts/python/bin/flamegraph-record
new file mode 100755
index 000000000000..7df5a19c0163
--- /dev/null
+++ b/tools/perf/scripts/python/bin/flamegraph-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -g "$@"
diff --git a/tools/perf/scripts/python/bin/flamegraph-report b/tools/perf/scripts/python/bin/flamegraph-report
new file mode 100755
index 000000000000..53c5dc90c87e
--- /dev/null
+++ b/tools/perf/scripts/python/bin/flamegraph-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: create flame graphs
+perf script -s "$PERF_EXEC_PATH"/scripts/python/flamegraph.py -- "$@"
diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py
new file mode 100755
index 000000000000..61f3be9add6b
--- /dev/null
+++ b/tools/perf/scripts/python/flamegraph.py
@@ -0,0 +1,124 @@
+# flamegraph.py - create flame graphs from perf samples
+# SPDX-License-Identifier: GPL-2.0
+#
+# Usage:
+#
+#     perf record -a -g -F 99 sleep 60
+#     perf script report flamegraph
+#
+# Combined:
+#
+#     perf script flamegraph -a -F 99 sleep 60
+#
+# Written by Andreas Gerstmayr <agerstmayr@redhat.com>
+# Flame Graphs invented by Brendan Gregg <bgregg@netflix.com>
+# Works in tandem with d3-flame-graph by Martin Spier <mspier@netflix.com>
+
+from __future__ import print_function
+import sys
+import os
+import argparse
+import json
+
+
+class Node:
+    def __init__(self, name, libtype=""):
+        self.name = name
+        self.libtype = libtype
+        self.value = 0
+        self.children = []
+
+    def toJSON(self):
+        return {
+            "n": self.name,
+            "l": self.libtype,
+            "v": self.value,
+            "c": self.children
+        }
+
+
+class FlameGraphCLI:
+    def __init__(self, args):
+        self.args = args
+        self.stack = Node("root")
+
+        if self.args.format == "html" and \
+                not os.path.isfile(self.args.template):
+            print("Flame Graph template {} does not exist. Please install "
+                  "the js-d3-flame-graph (RPM) or libjs-d3-flame-graph (deb) "
+                  "package, specify an existing flame graph template "
+                  "(--template PATH) or another output format "
+                  "(--format FORMAT).".format(self.args.template),
+                  file=sys.stderr)
+            sys.exit(1)
+
+    def find_or_create_node(self, node, name, dso):
+        libtype = "kernel" if dso == "[kernel.kallsyms]" else ""
+        if name is None:
+            name = "[unknown]"
+
+        for child in node.children:
+            if child.name == name and child.libtype == libtype:
+                return child
+
+        child = Node(name, libtype)
+        node.children.append(child)
+        return child
+
+    def process_event(self, event):
+        node = self.find_or_create_node(self.stack, event["comm"], None)
+        if "callchain" in event:
+            for entry in reversed(event['callchain']):
+                node = self.find_or_create_node(
+                    node, entry.get("sym", {}).get("name"), event.get("dso"))
+        else:
+            node = self.find_or_create_node(
+                node, entry.get("symbol"), event.get("dso"))
+        node.value += 1
+
+    def trace_end(self):
+        json_str = json.dumps(self.stack, default=lambda x: x.toJSON())
+
+        if self.args.format == "html":
+            try:
+                with open(self.args.template) as f:
+                    output_str = f.read().replace("/** @flamegraph_json **/",
+                                                  json_str)
+            except IOError as e:
+                print("Error reading template file: {}".format(e), file=sys.stderr)
+                sys.exit(1)
+            output_fn = self.args.output or "flamegraph.html"
+        else:
+            output_str = json_str
+            output_fn = self.args.output or "stacks.json"
+
+        if output_fn == "-":
+            sys.stdout.write(output_str)
+        else:
+            print("dumping data to {}".format(output_fn))
+            try:
+                with open(output_fn, "w") as out:
+                    out.write(output_str)
+            except IOError as e:
+                print("Error writing output file: {}".format(e), file=sys.stderr)
+                sys.exit(1)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Create flame graphs.")
+    parser.add_argument("-f", "--format",
+                        default="html", choices=["json", "html"],
+                        help="output file format")
+    parser.add_argument("-o", "--output",
+                        help="output file name")
+    parser.add_argument("--template",
+                        default="/usr/share/d3-flame-graph/d3-flamegraph-base.html",
+                        help="path to flamegraph HTML template")
+    parser.add_argument("-i", "--input",
+                        help=argparse.SUPPRESS)
+
+    args = parser.parse_args()
+    cli = FlameGraphCLI(args)
+
+    process_event = cli.process_event
+    trace_end = cli.trace_end
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index b3d1bf13ca07..c75557aeef0e 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -56,6 +56,7 @@ perf-y += mem2node.o
 perf-y += maps.o
 perf-y += time-utils-test.o
 perf-y += genelf.o
+perf-y += api-io.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/api-io.c b/tools/perf/tests/api-io.c
new file mode 100644
index 000000000000..2ada86ad6084
--- /dev/null
+++ b/tools/perf/tests/api-io.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "tests.h"
+#include <api/io.h>
+#include <linux/kernel.h>
+
+#define TEMPL "/tmp/perf-test-XXXXXX"
+
+#define EXPECT_EQUAL(val, expected)                             \
+do {								\
+	if (val != expected) {					\
+		pr_debug("%s:%d: %d != %d\n",			\
+			__FILE__, __LINE__, val, expected);	\
+		ret = -1;					\
+	}							\
+} while (0)
+
+#define EXPECT_EQUAL64(val, expected)                           \
+do {								\
+	if (val != expected) {					\
+		pr_debug("%s:%d: %lld != %lld\n",		\
+			__FILE__, __LINE__, val, expected);	\
+		ret = -1;					\
+	}							\
+} while (0)
+
+static int make_test_file(char path[PATH_MAX], const char *contents)
+{
+	ssize_t contents_len = strlen(contents);
+	int fd;
+
+	strcpy(path, TEMPL);
+	fd = mkstemp(path);
+	if (fd < 0) {
+		pr_debug("mkstemp failed");
+		return -1;
+	}
+	if (write(fd, contents, contents_len) < contents_len) {
+		pr_debug("short write");
+		close(fd);
+		unlink(path);
+		return -1;
+	}
+	close(fd);
+	return 0;
+}
+
+static int setup_test(char path[PATH_MAX], const char *contents,
+		      size_t buf_size, struct io *io)
+{
+	if (make_test_file(path, contents))
+		return -1;
+
+	io->fd = open(path, O_RDONLY);
+	if (io->fd < 0) {
+		pr_debug("Failed to open '%s'\n", path);
+		unlink(path);
+		return -1;
+	}
+	io->buf = malloc(buf_size);
+	if (io->buf == NULL) {
+		pr_debug("Failed to allocate memory");
+		close(io->fd);
+		unlink(path);
+		return -1;
+	}
+	io__init(io, io->fd, io->buf, buf_size);
+	return 0;
+}
+
+static void cleanup_test(char path[PATH_MAX], struct io *io)
+{
+	free(io->buf);
+	close(io->fd);
+	unlink(path);
+}
+
+static int do_test_get_char(const char *test_string, size_t buf_size)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	size_t i;
+
+	if (setup_test(path, test_string, buf_size, &io))
+		return -1;
+
+	for (i = 0; i < strlen(test_string); i++) {
+		ch = io__get_char(&io);
+
+		EXPECT_EQUAL(ch, test_string[i]);
+		EXPECT_EQUAL(io.eof, false);
+	}
+	ch = io__get_char(&io);
+	EXPECT_EQUAL(ch, -1);
+	EXPECT_EQUAL(io.eof, true);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_char(void)
+{
+	int i, ret = 0;
+	size_t j;
+
+	static const char *const test_strings[] = {
+		"12345678abcdef90",
+		"a\nb\nc\nd\n",
+		"\a\b\t\v\f\r",
+	};
+	for (i = 0; i <= 10; i++) {
+		for (j = 0; j < ARRAY_SIZE(test_strings); j++) {
+			if (do_test_get_char(test_strings[j], 1 << i))
+				ret = -1;
+		}
+	}
+	return ret;
+}
+
+static int do_test_get_hex(const char *test_string,
+			__u64 val1, int ch1,
+			__u64 val2, int ch2,
+			__u64 val3, int ch3,
+			bool end_eof)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	__u64 hex;
+
+	if (setup_test(path, test_string, 4, &io))
+		return -1;
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val1);
+	EXPECT_EQUAL(ch, ch1);
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val2);
+	EXPECT_EQUAL(ch, ch2);
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val3);
+	EXPECT_EQUAL(ch, ch3);
+
+	EXPECT_EQUAL(io.eof, end_eof);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_hex(void)
+{
+	int ret = 0;
+
+	if (do_test_get_hex("12345678abcdef90",
+				0x12345678abcdef90, -1,
+				0, -1,
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("1\n2\n3\n",
+				1, '\n',
+				2, '\n',
+				3, '\n',
+				false))
+		ret = -1;
+
+	if (do_test_get_hex("12345678ABCDEF90;a;b",
+				0x12345678abcdef90, ';',
+				0xa, ';',
+				0xb, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("0x1x2x",
+				0, 'x',
+				1, 'x',
+				2, 'x',
+				false))
+		ret = -1;
+
+	if (do_test_get_hex("x1x",
+				0, -2,
+				1, 'x',
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("10000000000000000000000000000abcdefgh99i",
+				0xabcdef, 'g',
+				0, -2,
+				0x99, 'i',
+				false))
+		ret = -1;
+
+	return ret;
+}
+
+static int do_test_get_dec(const char *test_string,
+			__u64 val1, int ch1,
+			__u64 val2, int ch2,
+			__u64 val3, int ch3,
+			bool end_eof)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	__u64 dec;
+
+	if (setup_test(path, test_string, 4, &io))
+		return -1;
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val1);
+	EXPECT_EQUAL(ch, ch1);
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val2);
+	EXPECT_EQUAL(ch, ch2);
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val3);
+	EXPECT_EQUAL(ch, ch3);
+
+	EXPECT_EQUAL(io.eof, end_eof);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_dec(void)
+{
+	int ret = 0;
+
+	if (do_test_get_dec("12345678abcdef90",
+				12345678, 'a',
+				0, -2,
+				0, -2,
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("1\n2\n3\n",
+				1, '\n',
+				2, '\n',
+				3, '\n',
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("12345678;1;2",
+				12345678, ';',
+				1, ';',
+				2, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_dec("0x1x2x",
+				0, 'x',
+				1, 'x',
+				2, 'x',
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("x1x",
+				0, -2,
+				1, 'x',
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_dec("10000000000000000000000000000000000000000000000000000000000123456789ab99c",
+				123456789, 'a',
+				0, -2,
+				99, 'c',
+				false))
+		ret = -1;
+
+	return ret;
+}
+
+int test__api_io(struct test *test __maybe_unused,
+		int subtest __maybe_unused)
+{
+	int ret = 0;
+
+	if (test_get_char())
+		ret = TEST_FAIL;
+	if (test_get_hex())
+		ret = TEST_FAIL;
+	if (test_get_dec())
+		ret = TEST_FAIL;
+	return ret;
+}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index b6322eb0f423..3471ec52ea11 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -310,6 +310,10 @@ static struct test generic_tests[] = {
 		.func = test__jit_write_elf,
 	},
 	{
+		.desc = "Test api io",
+		.func = test__api_io,
+	},
+	{
 		.desc = "maps__merge_in",
 		.func = test__maps__merge_in,
 	},
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
index 1e8a9f5c356d..db68894a6f40 100644
--- a/tools/perf/tests/event-times.c
+++ b/tools/perf/tests/event-times.c
@@ -72,7 +72,7 @@ static int attach__current_disabled(struct evlist *evlist)
 
 	evsel->core.attr.disabled = 1;
 
-	err = perf_evsel__open_per_thread(evsel, threads);
+	err = evsel__open_per_thread(evsel, threads);
 	if (err) {
 		pr_debug("Failed to open event cpu-clock:u\n");
 		return err;
@@ -96,7 +96,7 @@ static int attach__current_enabled(struct evlist *evlist)
 		return -1;
 	}
 
-	err = perf_evsel__open_per_thread(evsel, threads);
+	err = evsel__open_per_thread(evsel, threads);
 
 	perf_thread_map__put(threads);
 	return err == 0 ? TEST_OK : TEST_FAIL;
@@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist)
 
 	evsel->core.attr.disabled = 1;
 
-	err = perf_evsel__open_per_cpu(evsel, cpus, -1);
+	err = evsel__open_per_cpu(evsel, cpus, -1);
 	if (err) {
 		if (err == -EACCES)
 			return TEST_SKIP;
@@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist)
 		return -1;
 	}
 
-	err = perf_evsel__open_per_cpu(evsel, cpus, -1);
+	err = evsel__open_per_cpu(evsel, cpus, -1);
 	if (err == -EACCES)
 		return TEST_SKIP;
 
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index c727379cf20e..bdcf032f8516 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -109,7 +109,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu
 	TEST_ASSERT_VAL("failed to synthesize attr update scale",
 			!perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale));
 
-	tmp.name = perf_evsel__name(evsel);
+	tmp.name = evsel__name(evsel);
 
 	TEST_ASSERT_VAL("failed to synthesize attr update name",
 			!perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name));
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 956205bf9326..61ecd8e33a01 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -20,12 +20,11 @@ static int perf_evsel__roundtrip_cache_name_test(void)
 	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
 		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
 			/* skip invalid cache type */
-			if (!perf_evsel__is_cache_op_valid(type, op))
+			if (!evsel__is_cache_op_valid(type, op))
 				continue;
 
 			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
-				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
-									name, sizeof(name));
+				__evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
 				err = parse_events(evlist, name, NULL);
 				if (err)
 					ret = err;
@@ -39,23 +38,22 @@ static int perf_evsel__roundtrip_cache_name_test(void)
 	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
 		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
 			/* skip invalid cache type */
-			if (!perf_evsel__is_cache_op_valid(type, op))
+			if (!evsel__is_cache_op_valid(type, op))
 				continue;
 
 			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
-				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
-									name, sizeof(name));
+				__evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
 				if (evsel->idx != idx)
 					continue;
 
 				++idx;
 
-				if (strcmp(perf_evsel__name(evsel), name)) {
-					pr_debug("%s != %s\n", perf_evsel__name(evsel), name);
+				if (strcmp(evsel__name(evsel), name)) {
+					pr_debug("%s != %s\n", evsel__name(evsel), name);
 					ret = -1;
 				}
 
-				evsel = perf_evsel__next(evsel);
+				evsel = evsel__next(evsel);
 			}
 		}
 	}
@@ -84,9 +82,9 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
 
 	err = 0;
 	evlist__for_each_entry(evlist, evsel) {
-		if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
+		if (strcmp(evsel__name(evsel), names[evsel->idx])) {
 			--err;
-			pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
+			pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->idx]);
 		}
 	}
 
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 261e6eaaee99..ce8aa32bc3ee 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -8,7 +8,7 @@
 static int perf_evsel__test_field(struct evsel *evsel, const char *name,
 				  int size, bool should_be_signed)
 {
-	struct tep_format_field *field = perf_evsel__field(evsel, name);
+	struct tep_format_field *field = evsel__field(evsel, name);
 	int is_signed;
 	int ret = 0;
 
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 28313e59d6f6..f9e8e5628836 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -6,11 +6,11 @@
 #include <string.h>
 #include <linux/zalloc.h>
 
-static int test(struct parse_ctx *ctx, const char *e, double val2)
+static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
 {
 	double val;
 
-	if (expr__parse(&val, ctx, e))
+	if (expr__parse(&val, ctx, e, 1))
 		TEST_ASSERT_VAL("parse test failed", 0);
 	TEST_ASSERT_VAL("unexpected value", val == val2);
 	return 0;
@@ -22,7 +22,7 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 	const char **other;
 	double val;
 	int i, ret;
-	struct parse_ctx ctx;
+	struct expr_parse_ctx ctx;
 	int num_other;
 
 	expr__ctx_init(&ctx);
@@ -44,21 +44,29 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 		return ret;
 
 	p = "FOO/0";
-	ret = expr__parse(&val, &ctx, p);
+	ret = expr__parse(&val, &ctx, p, 1);
 	TEST_ASSERT_VAL("division by zero", ret == -1);
 
 	p = "BAR/";
-	ret = expr__parse(&val, &ctx, p);
+	ret = expr__parse(&val, &ctx, p, 1);
 	TEST_ASSERT_VAL("missing operand", ret == -1);
 
 	TEST_ASSERT_VAL("find other",
-			expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
+			expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other, 1) == 0);
 	TEST_ASSERT_VAL("find other", num_other == 3);
 	TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR"));
 	TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ"));
 	TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO"));
 	TEST_ASSERT_VAL("find other", other[3] == NULL);
 
+	TEST_ASSERT_VAL("find other",
+			expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", NULL,
+				   &other, &num_other, 3) == 0);
+	TEST_ASSERT_VAL("find other", num_other == 2);
+	TEST_ASSERT_VAL("find other", !strcmp(other[0], "EVENT1,param=3/"));
+	TEST_ASSERT_VAL("find other", !strcmp(other[1], "EVENT2,param=3/"));
+	TEST_ASSERT_VAL("find other", other[2] == NULL);
+
 	for (i = 0; i < num_other; i++)
 		zfree(&other[i]);
 	free((void *)other);
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 6367c8f6ca22..7a542f1c1c78 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -280,7 +280,7 @@ static int test1(struct evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = false;
 	symbol_conf.cumulate_callchain = false;
-	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+	evsel__reset_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting(NULL);
 	callchain_register_param(&callchain_param);
@@ -427,7 +427,7 @@ static int test2(struct evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = true;
 	symbol_conf.cumulate_callchain = false;
-	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+	evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting(NULL);
 	callchain_register_param(&callchain_param);
@@ -485,7 +485,7 @@ static int test3(struct evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = false;
 	symbol_conf.cumulate_callchain = true;
-	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+	evsel__reset_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting(NULL);
 	callchain_register_param(&callchain_param);
@@ -669,7 +669,7 @@ static int test4(struct evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = true;
 	symbol_conf.cumulate_callchain = true;
-	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+	evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting(NULL);
 
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 5f4c0dbb4715..d4b8eb6e337a 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -86,7 +86,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 		}
 
 		evsels[i]->core.attr.wakeup_events = 1;
-		perf_evsel__set_sample_id(evsels[i], false);
+		evsel__set_sample_id(evsels[i], false);
 
 		evlist__add(evlist, evsels[i]);
 
@@ -150,7 +150,7 @@ out_init:
 		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
 			pr_debug("expected %d %s events, got %d\n",
 				 expected_nr_events[evsel->idx],
-				 perf_evsel__name(evsel), nr_events[evsel->idx]);
+				 evsel__name(evsel), nr_events[evsel->idx]);
 			err = -1;
 			goto out_delete_evlist;
 		}
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 93c176523e38..900934be22d2 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -103,15 +103,15 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
 		if (cpus->map[cpu] >= CPU_SETSIZE)
 			continue;
 
-		if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
-			pr_debug("perf_evsel__read_on_cpu\n");
+		if (evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+			pr_debug("evsel__read_on_cpu\n");
 			err = -1;
 			break;
 		}
 
 		expected = nr_openat_calls + cpu;
 		if (perf_counts(evsel->counts, cpu, 0)->val != expected) {
-			pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
+			pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
 				 expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val);
 			err = -1;
 		}
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index c6b2d7aab608..1dc2897d2df9 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -60,7 +60,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 		goto out_delete_evlist;
 	}
 
-	perf_evsel__config(evsel, &opts, NULL);
+	evsel__config(evsel, &opts, NULL);
 
 	perf_thread_map__set_pid(evlist->core.threads, 0, getpid());
 
@@ -108,13 +108,13 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 					continue;
 				}
 
-				err = perf_evsel__parse_sample(evsel, event, &sample);
+				err = evsel__parse_sample(evsel, event, &sample);
 				if (err) {
 					pr_debug("Can't parse sample, err = %d\n", err);
 					goto out_delete_evlist;
 				}
 
-				tp_flags = perf_evsel__intval(evsel, &sample, "flags");
+				tp_flags = evsel__intval(evsel, &sample, "flags");
 
 				if (flags != tp_flags) {
 					pr_debug("%s: Expected flags=%#x, got %#x\n",
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index 5ebffae18605..db5d8bb8cd06 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -34,7 +34,7 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m
 		goto out_thread_map_delete;
 	}
 
-	if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+	if (evsel__open_per_thread(evsel, threads) < 0) {
 		pr_debug("failed to open counter: %s, "
 			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
 			 str_error_r(errno, sbuf, sizeof(sbuf)));
@@ -46,13 +46,13 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m
 		close(fd);
 	}
 
-	if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
-		pr_debug("perf_evsel__read_on_cpu\n");
+	if (evsel__read_on_cpu(evsel, 0, 0) < 0) {
+		pr_debug("evsel__read_on_cpu\n");
 		goto out_close_fd;
 	}
 
 	if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) {
-		pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
+		pr_debug("evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
 			 nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val);
 		goto out_close_fd;
 	}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 091c3aeccc27..895188b63f96 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -371,7 +371,7 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong name",
-			!strcmp(perf_evsel__name(evsel), "mem:0:u"));
+			!strcmp(evsel__name(evsel), "mem:0:u"));
 
 	return test__checkevent_breakpoint(evlist);
 }
@@ -385,7 +385,7 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong name",
-			!strcmp(perf_evsel__name(evsel), "mem:0:x:k"));
+			!strcmp(evsel__name(evsel), "mem:0:x:k"));
 
 	return test__checkevent_breakpoint_x(evlist);
 }
@@ -399,7 +399,7 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong name",
-			!strcmp(perf_evsel__name(evsel), "mem:0:r:hp"));
+			!strcmp(evsel__name(evsel), "mem:0:r:hp"));
 
 	return test__checkevent_breakpoint_r(evlist);
 }
@@ -413,7 +413,7 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong name",
-			!strcmp(perf_evsel__name(evsel), "mem:0:w:up"));
+			!strcmp(evsel__name(evsel), "mem:0:w:up"));
 
 	return test__checkevent_breakpoint_w(evlist);
 }
@@ -427,7 +427,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong name",
-			!strcmp(perf_evsel__name(evsel), "mem:0:rw:kp"));
+			!strcmp(evsel__name(evsel), "mem:0:rw:kp"));
 
 	return test__checkevent_breakpoint_rw(evlist);
 }
@@ -468,7 +468,7 @@ static int test__checkevent_list(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 
 	/* syscalls:sys_enter_openat:k */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong sample_type",
 		PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type);
@@ -479,7 +479,7 @@ static int test__checkevent_list(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 
 	/* 1:1:hp */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config);
 	TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
@@ -498,15 +498,15 @@ static int test__checkevent_pmu_name(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",  1 == evsel->core.attr.config);
-	TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava"));
+	TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "krava"));
 
 	/* cpu/config=2/u" */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",  2 == evsel->core.attr.config);
 	TEST_ASSERT_VAL("wrong name",
-			!strcmp(perf_evsel__name(evsel), "cpu/config=2/u"));
+			!strcmp(evsel__name(evsel), "cpu/config=2/u"));
 
 	return 0;
 }
@@ -529,7 +529,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong time",  !(PERF_SAMPLE_TIME & evsel->core.attr.sample_type));
 
 	/* cpu/config=2,call-graph=no,time=0,period=2000/ */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",  2 == evsel->core.attr.config);
 	/*
@@ -577,7 +577,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
 
 	/* cpu/pmu-event/u*/
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong exclude_user",
@@ -652,13 +652,13 @@ static int test__group1(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cycles:upp */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -670,7 +670,7 @@ static int test__group1(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
@@ -694,13 +694,13 @@ static int test__group2(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cache-references + :u modifier */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CACHE_REFERENCES == evsel->core.attr.config);
@@ -711,11 +711,11 @@ static int test__group2(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cycles:k */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -725,7 +725,7 @@ static int test__group2(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
@@ -750,15 +750,15 @@ static int test__group3(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong group name",
 		!strcmp(leader->group_name, "group1"));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* group1 cycles:kppp */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -771,11 +771,11 @@ static int test__group3(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 3);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* group2 cycles + G modifier */
-	evsel = leader = perf_evsel__next(evsel);
+	evsel = leader = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -785,15 +785,15 @@ static int test__group3(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong group name",
 		!strcmp(leader->group_name, "group2"));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* group2 1:3 + G modifier */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config", 3 == evsel->core.attr.config);
 	TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
@@ -803,11 +803,11 @@ static int test__group3(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions:u */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -817,7 +817,7 @@ static int test__group3(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
@@ -843,13 +843,13 @@ static int test__group4(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions:kp + p */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -861,7 +861,7 @@ static int test__group4(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
@@ -886,13 +886,13 @@ static int test__group5(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions + G */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -903,11 +903,11 @@ static int test__group5(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cycles:G */
-	evsel = leader = perf_evsel__next(evsel);
+	evsel = leader = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -918,13 +918,13 @@ static int test__group5(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions:G */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -935,10 +935,10 @@ static int test__group5(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 
 	/* cycles */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -948,7 +948,7 @@ static int test__group5(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 
 	return 0;
 }
@@ -972,12 +972,12 @@ static int test__group_gh1(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 
 	/* cache-misses:G + :H group modifier */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -988,7 +988,7 @@ static int test__group_gh1(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 
 	return 0;
 }
@@ -1012,12 +1012,12 @@ static int test__group_gh2(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 
 	/* cache-misses:H + :G group modifier */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1028,7 +1028,7 @@ static int test__group_gh2(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 
 	return 0;
 }
@@ -1052,12 +1052,12 @@ static int test__group_gh3(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 
 	/* cache-misses:H + :u group modifier */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1068,7 +1068,7 @@ static int test__group_gh3(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 
 	return 0;
 }
@@ -1092,12 +1092,12 @@ static int test__group_gh4(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
 
 	/* cache-misses:H + :uG group modifier */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1108,7 +1108,7 @@ static int test__group_gh4(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
 
 	return 0;
 }
@@ -1135,7 +1135,7 @@ static int test__leader_sample1(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
 
 	/* cache-misses - not sampling */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1149,7 +1149,7 @@ static int test__leader_sample1(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
 
 	/* branch-misses - not sampling */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
@@ -1188,7 +1188,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
 
 	/* branch-misses - not sampling */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
@@ -1234,14 +1234,14 @@ static int test__pinned_group(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned);
 
 	/* cache-misses - can not be pinned, but will go on with the leader */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
 	TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
 
 	/* branch-misses - ditto */
-	evsel = perf_evsel__next(evsel);
+	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
 	TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
@@ -1356,6 +1356,16 @@ static int test__checkevent_complex_name(struct evlist *evlist)
 	return 0;
 }
 
+static int test__checkevent_raw_pmu(struct evlist *evlist)
+{
+	struct evsel *evsel = evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
+	TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config);
+	return 0;
+}
+
 static int test__sym_event_slash(struct evlist *evlist)
 {
 	struct evsel *evsel = evlist__first(evlist);
@@ -1750,7 +1760,12 @@ static struct evlist_test test__events_pmu[] = {
 		.name  = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
 		.check = test__checkevent_complex_name,
 		.id    = 3,
-	}
+	},
+	{
+		.name  = "software/r1a/",
+		.check = test__checkevent_raw_pmu,
+		.id    = 4,
+	},
 };
 
 struct terms_test {
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 2195fc205e72..83adfd846ccd 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -106,9 +106,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 	 * Config the evsels, setting attr->comm on the first one, etc.
 	 */
 	evsel = evlist__first(evlist);
-	perf_evsel__set_sample_bit(evsel, CPU);
-	perf_evsel__set_sample_bit(evsel, TID);
-	perf_evsel__set_sample_bit(evsel, TIME);
+	evsel__set_sample_bit(evsel, CPU);
+	evsel__set_sample_bit(evsel, TID);
+	evsel__set_sample_bit(evsel, TIME);
 	perf_evlist__config(evlist, &opts, NULL);
 
 	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 61865699c3f4..a0bdaf390ac8 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -296,12 +296,12 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
 		goto out_free;
 	}
 
-	evsel.sample_size = __perf_evsel__sample_size(sample_type);
+	evsel.sample_size = __evsel__sample_size(sample_type);
 
-	err = perf_evsel__parse_sample(&evsel, event, &sample_out);
+	err = evsel__parse_sample(&evsel, event, &sample_out);
 	if (err) {
 		pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
-			 "perf_evsel__parse_sample", sample_type, err);
+			 "evsel__parse_sample", sample_type, err);
 		goto out_free;
 	}
 
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index fcb0d03dba4e..db5e1f70053a 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -135,8 +135,8 @@ static int process_sample_event(struct evlist *evlist,
 
 	evsel = perf_evlist__id2evsel(evlist, sample.id);
 	if (evsel == switch_tracking->switch_evsel) {
-		next_tid = perf_evsel__intval(evsel, &sample, "next_pid");
-		prev_tid = perf_evsel__intval(evsel, &sample, "prev_pid");
+		next_tid = evsel__intval(evsel, &sample, "next_pid");
+		prev_tid = evsel__intval(evsel, &sample, "prev_pid");
 		cpu = sample.cpu;
 		pr_debug3("sched_switch: cpu: %d prev_tid %d next_tid %d\n",
 			  cpu, prev_tid, next_tid);
@@ -394,8 +394,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
 
 	switch_evsel = evlist__last(evlist);
 
-	perf_evsel__set_sample_bit(switch_evsel, CPU);
-	perf_evsel__set_sample_bit(switch_evsel, TIME);
+	evsel__set_sample_bit(switch_evsel, CPU);
+	evsel__set_sample_bit(switch_evsel, TIME);
 
 	switch_evsel->core.system_wide = true;
 	switch_evsel->no_aux_samples = true;
@@ -412,8 +412,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
 		goto out_err;
 	}
 
-	perf_evsel__set_sample_bit(cycles_evsel, CPU);
-	perf_evsel__set_sample_bit(cycles_evsel, TIME);
+	evsel__set_sample_bit(cycles_evsel, CPU);
+	evsel__set_sample_bit(cycles_evsel, TIME);
 
 	/* Fourth event */
 	err = parse_events(evlist, "dummy:u", NULL);
@@ -429,7 +429,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
 	tracking_evsel->core.attr.freq = 0;
 	tracking_evsel->core.attr.sample_period = 1;
 
-	perf_evsel__set_sample_bit(tracking_evsel, TIME);
+	evsel__set_sample_bit(tracking_evsel, TIME);
 
 	/* Config events */
 	perf_evlist__config(evlist, &opts, NULL);
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 61a1ab032080..d6d4ac34eeb7 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -112,6 +112,7 @@ int test__mem2node(struct test *t, int subtest);
 int test__maps__merge_in(struct test *t, int subtest);
 int test__time_utils(struct test *t, int subtest);
 int test__jit_write_elf(struct test *test, int subtest);
+int test__api_io(struct test *test, int subtest);
 
 bool test__bp_signal_is_supported(void);
 bool test__bp_account_is_supported(void);
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 4a800499d7c3..22daf2bdf5fa 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -33,10 +33,8 @@ static int session_write_header(char *path)
 {
 	struct perf_session *session;
 	struct perf_data data = {
-		.file      = {
-			.path = path,
-		},
-		.mode      = PERF_DATA_MODE_WRITE,
+		.path = path,
+		.mode = PERF_DATA_MODE_WRITE,
 	};
 
 	session = perf_session__new(&data, false, NULL);
@@ -63,10 +61,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 {
 	struct perf_session *session;
 	struct perf_data data = {
-		.file      = {
-			.path = path,
-		},
-		.mode      = PERF_DATA_MODE_READ,
+		.path = path,
+		.mode = PERF_DATA_MODE_READ,
 	};
 	int i;
 
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 487e54ef56a9..f98a118dfc49 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -3416,7 +3416,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
 	struct hists *hists = evsel__hists(evsel);
 	bool current_entry = ui_browser__is_current_entry(browser, row);
 	unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE];
-	const char *ev_name = perf_evsel__name(evsel);
+	const char *ev_name = evsel__name(evsel);
 	char bf[256], unit;
 	const char *warn = " ";
 	size_t printed;
@@ -3424,10 +3424,10 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
 	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
 						       HE_COLORSET_NORMAL);
 
-	if (perf_evsel__is_group_event(evsel)) {
+	if (evsel__is_group_event(evsel)) {
 		struct evsel *pos;
 
-		ev_name = perf_evsel__group_name(evsel);
+		ev_name = evsel__group_name(evsel);
 
 		for_each_group_member(pos, evsel) {
 			struct hists *pos_hists = evsel__hists(pos);
@@ -3512,13 +3512,13 @@ browse_hists:
 				if (pos->core.node.next == &evlist->core.entries)
 					pos = evlist__first(evlist);
 				else
-					pos = perf_evsel__next(pos);
+					pos = evsel__next(pos);
 				goto browse_hists;
 			case K_UNTAB:
 				if (pos->core.node.prev == &evlist->core.entries)
 					pos = evlist__last(evlist);
 				else
-					pos = perf_evsel__prev(pos);
+					pos = evsel__prev(pos);
 				goto browse_hists;
 			case K_SWITCH_INPUT_DATA:
 			case K_RELOAD:
@@ -3554,7 +3554,7 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused,
 {
 	struct evsel *evsel = list_entry(entry, struct evsel, core.node);
 
-	if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel))
+	if (symbol_conf.event_group && !evsel__is_group_leader(evsel))
 		return true;
 
 	return false;
@@ -3587,7 +3587,7 @@ static int __perf_evlist__tui_browse_hists(struct evlist *evlist,
 	ui_helpline__push("Press ESC to exit");
 
 	evlist__for_each_entry(evlist, pos) {
-		const char *ev_name = perf_evsel__name(pos);
+		const char *ev_name = evsel__name(pos);
 		size_t line_len = strlen(ev_name) + 7;
 
 		if (menu.b.width < line_len)
@@ -3622,7 +3622,7 @@ single_entry:
 
 		nr_entries = 0;
 		evlist__for_each_entry(evlist, pos) {
-			if (perf_evsel__is_group_leader(pos))
+			if (evsel__is_group_leader(pos))
 				nr_entries++;
 		}
 
@@ -3640,7 +3640,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf,
 				      size_t size)
 {
 	struct hists *hists = evsel__hists(browser->block_evsel);
-	const char *evname = perf_evsel__name(browser->block_evsel);
+	const char *evname = evsel__name(browser->block_evsel);
 	unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
 	int ret;
 
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 35f9641bf670..a7dff77f2018 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -130,7 +130,7 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms,
 
 		gtk_list_store_append(store, &iter);
 
-		if (perf_evsel__is_group_event(evsel)) {
+		if (evsel__is_group_event(evsel)) {
 			for (i = 0; i < evsel->core.nr_members; i++) {
 				ret += perf_gtk__get_percent(s + ret,
 							     sizeof(s) - ret,
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index ed1a97b2c4b0..53ef71a1b15d 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -635,18 +635,18 @@ int perf_evlist__gtk_browse_hists(struct evlist *evlist,
 
 	evlist__for_each_entry(evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
-		const char *evname = perf_evsel__name(pos);
+		const char *evname = evsel__name(pos);
 		GtkWidget *scrolled_window;
 		GtkWidget *tab_label;
 		char buf[512];
 		size_t size = sizeof(buf);
 
 		if (symbol_conf.event_group) {
-			if (!perf_evsel__is_group_leader(pos))
+			if (!evsel__is_group_leader(pos))
 				continue;
 
 			if (pos->core.nr_members > 1) {
-				perf_evsel__group_desc(pos, buf, size);
+				evsel__group_desc(pos, buf, size);
 				evname = buf;
 			}
 		}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 025f4c7f96bf..c1f24d004852 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -43,12 +43,12 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
 	} else
 		ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he));
 
-	if (perf_evsel__is_group_event(evsel)) {
+	if (evsel__is_group_event(evsel)) {
 		int prev_idx, idx_delta;
 		struct hist_entry *pair;
 		int nr_members = evsel->core.nr_members;
 
-		prev_idx = perf_evsel__group_idx(evsel);
+		prev_idx = evsel__group_idx(evsel);
 
 		list_for_each_entry(pair, &he->pairs.head, pairs.node) {
 			u64 period = get_field(pair);
@@ -58,7 +58,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
 				continue;
 
 			evsel = hists_to_evsel(pair->hists);
-			idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
+			idx_delta = evsel__group_idx(evsel) - prev_idx - 1;
 
 			while (idx_delta--) {
 				/*
@@ -82,7 +82,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
 							  len, period);
 			}
 
-			prev_idx = perf_evsel__group_idx(evsel);
+			prev_idx = evsel__group_idx(evsel);
 		}
 
 		idx_delta = nr_members - prev_idx - 1;
@@ -164,12 +164,12 @@ static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b,
 
 	list_for_each_entry(pair, &a->pairs.head, pairs.node) {
 		struct evsel *evsel = hists_to_evsel(pair->hists);
-		fa[perf_evsel__group_idx(evsel)] = get_field(pair);
+		fa[evsel__group_idx(evsel)] = get_field(pair);
 	}
 
 	list_for_each_entry(pair, &b->pairs.head, pairs.node) {
 		struct evsel *evsel = hists_to_evsel(pair->hists);
-		fb[perf_evsel__group_idx(evsel)] = get_field(pair);
+		fb[evsel__group_idx(evsel)] = get_field(pair);
 	}
 
 	*fields_a = fa;
@@ -190,7 +190,7 @@ static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b,
 	int cmp, nr_members, ret, i;
 
 	cmp = field_cmp(get_field(a), get_field(b));
-	if (!perf_evsel__is_group_event(evsel))
+	if (!evsel__is_group_event(evsel))
 		return cmp;
 
 	nr_members = evsel->core.nr_members;
@@ -240,7 +240,7 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
 		return ret;
 
 	evsel = hists_to_evsel(a->hists);
-	if (!perf_evsel__is_group_event(evsel))
+	if (!evsel__is_group_event(evsel))
 		return ret;
 
 	nr_members = evsel->core.nr_members;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index c0cf8dff694e..ca07a162d602 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -10,6 +10,7 @@ perf-y += db-export.o
 perf-y += env.o
 perf-y += event.o
 perf-y += evlist.o
+perf-y += sideband_evlist.o
 perf-y += evsel.o
 perf-y += evsel_fprintf.o
 perf-y += perf_event_attr_fprintf.o
@@ -88,6 +89,7 @@ perf-y += counts.o
 perf-y += stat.o
 perf-y += stat-shadow.o
 perf-y += stat-display.o
+perf-y += perf_api_probe.o
 perf-y += record.o
 perf-y += srcline.o
 perf-y += srccode.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 9760d58e979a..d828c2d2edee 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1191,7 +1191,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args)
 	struct disasm_line *dl = NULL;
 	int nr = 1;
 
-	if (perf_evsel__is_group_event(args->evsel))
+	if (evsel__is_group_event(args->evsel))
 		nr = args->evsel->core.nr_members;
 
 	dl = zalloc(disasm_line_size(nr));
@@ -1437,7 +1437,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
 		if (queue)
 			return -1;
 
-		if (perf_evsel__is_group_event(evsel))
+		if (evsel__is_group_event(evsel))
 			width *= evsel->core.nr_members;
 
 		if (!*al->line)
@@ -2156,7 +2156,7 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
 		.evsel		= evsel,
 		.options	= options,
 	};
-	struct perf_env *env = perf_evsel__env(evsel);
+	struct perf_env *env = evsel__env(evsel);
 	const char *arch_name = perf_env__arch(env);
 	struct arch *arch;
 	int err;
@@ -2344,7 +2344,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
 	struct dso *dso = map->dso;
 	char *filename;
 	const char *d_filename;
-	const char *evsel_name = perf_evsel__name(evsel);
+	const char *evsel_name = evsel__name(evsel);
 	struct annotation *notes = symbol__annotation(sym);
 	struct sym_hist *h = annotation__histogram(notes, evsel->idx);
 	struct annotation_line *pos, *queue = NULL;
@@ -2368,9 +2368,9 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
 
 	len = symbol__size(sym);
 
-	if (perf_evsel__is_group_event(evsel)) {
+	if (evsel__is_group_event(evsel)) {
 		width *= evsel->core.nr_members;
-		perf_evsel__group_desc(evsel, buf, sizeof(buf));
+		evsel__group_desc(evsel, buf, sizeof(buf));
 		evsel_name = buf;
 	}
 
@@ -2505,7 +2505,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
 int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
 				struct annotation_options *opts)
 {
-	const char *ev_name = perf_evsel__name(evsel);
+	const char *ev_name = evsel__name(evsel);
 	char buf[1024];
 	char *filename;
 	int err = -1;
@@ -2518,8 +2518,8 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
 	if (fp == NULL)
 		goto out_free_filename;
 
-	if (perf_evsel__is_group_event(evsel)) {
-		perf_evsel__group_desc(evsel, buf, sizeof(buf));
+	if (evsel__is_group_event(evsel)) {
+		evsel__group_desc(evsel, buf, sizeof(buf));
 		ev_name = buf;
 	}
 
@@ -3064,7 +3064,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
 	if (notes->offsets == NULL)
 		return ENOMEM;
 
-	if (perf_evsel__is_group_event(evsel))
+	if (evsel__is_group_event(evsel))
 		nr_pcnt = evsel->core.nr_members;
 
 	err = symbol__annotate(ms, evsel, options, parch);
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 53be12b23ff4..875a0dd540e5 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -176,6 +176,14 @@ static void arm_spe_free(struct perf_session *session)
 	free(spe);
 }
 
+static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
+				      struct evsel *evsel)
+{
+	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
+
+	return evsel->core.attr.type == spe->pmu_type;
+}
+
 static const char * const arm_spe_info_fmts[] = {
 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
 };
@@ -218,6 +226,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
 	spe->auxtrace.flush_events = arm_spe_flush;
 	spe->auxtrace.free_events = arm_spe_free_events;
 	spe->auxtrace.free = arm_spe_free;
+	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
 	session->auxtrace = &spe->auxtrace;
 
 	arm_spe_print_info(&auxtrace_info->priv[0]);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 3571ce72ca28..749487a41cc7 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -33,6 +33,7 @@
 #include "evsel.h"
 #include "evsel_config.h"
 #include "symbol.h"
+#include "util/perf_api_probe.h"
 #include "util/synthetic-events.h"
 #include "thread_map.h"
 #include "asm/bug.h"
@@ -58,25 +59,6 @@
 #include "symbol/kallsyms.h"
 #include <internal/lib.h>
 
-static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel)
-{
-	struct perf_pmu *pmu = NULL;
-
-	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
-		if (pmu->type == evsel->core.attr.type)
-			break;
-	}
-
-	return pmu;
-}
-
-static bool perf_evsel__is_aux_event(struct evsel *evsel)
-{
-	struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
-
-	return pmu && pmu->auxtrace;
-}
-
 /*
  * Make a group from 'leader' to 'last', requiring that the events were not
  * already grouped to a different leader.
@@ -88,7 +70,7 @@ static int perf_evlist__regroup(struct evlist *evlist,
 	struct evsel *evsel;
 	bool grp;
 
-	if (!perf_evsel__is_group_leader(leader))
+	if (!evsel__is_group_leader(leader))
 		return -EINVAL;
 
 	grp = false;
@@ -703,8 +685,8 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist,
 
 	evlist__for_each_entry(evlist, evsel) {
 		sz = evsel->core.attr.aux_sample_size;
-		if (perf_evsel__is_group_leader(evsel)) {
-			has_aux_leader = perf_evsel__is_aux_event(evsel);
+		if (evsel__is_group_leader(evsel)) {
+			has_aux_leader = evsel__is_aux_event(evsel);
 			if (sz) {
 				if (has_aux_leader)
 					pr_err("Cannot add AUX area sampling to an AUX area event\n");
@@ -723,10 +705,10 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist,
 				pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n");
 				return -EINVAL;
 			}
-			perf_evsel__set_sample_bit(evsel, AUX);
+			evsel__set_sample_bit(evsel, AUX);
 			opts->auxtrace_sample_mode = true;
 		} else {
-			perf_evsel__reset_sample_bit(evsel, AUX);
+			evsel__reset_sample_bit(evsel, AUX);
 		}
 	}
 
@@ -777,8 +759,8 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr,
 
 	/* Set aux_sample_size based on --aux-sample option */
 	evlist__for_each_entry(evlist, evsel) {
-		if (perf_evsel__is_group_leader(evsel)) {
-			has_aux_leader = perf_evsel__is_aux_event(evsel);
+		if (evsel__is_group_leader(evsel)) {
+			has_aux_leader = evsel__is_aux_event(evsel);
 		} else if (has_aux_leader) {
 			evsel->core.attr.aux_sample_size = sz;
 		}
@@ -787,7 +769,7 @@ no_opt:
 	aux_evsel = NULL;
 	/* Override with aux_sample_size from config term */
 	evlist__for_each_entry(evlist, evsel) {
-		if (perf_evsel__is_aux_event(evsel))
+		if (evsel__is_aux_event(evsel))
 			aux_evsel = evsel;
 		term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE);
 		if (term) {
@@ -1234,29 +1216,79 @@ out_free:
 	return err;
 }
 
+static void unleader_evsel(struct evlist *evlist, struct evsel *leader)
+{
+	struct evsel *new_leader = NULL;
+	struct evsel *evsel;
+
+	/* Find new leader for the group */
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->leader != leader || evsel == leader)
+			continue;
+		if (!new_leader)
+			new_leader = evsel;
+		evsel->leader = new_leader;
+	}
+
+	/* Update group information */
+	if (new_leader) {
+		zfree(&new_leader->group_name);
+		new_leader->group_name = leader->group_name;
+		leader->group_name = NULL;
+
+		new_leader->core.nr_members = leader->core.nr_members - 1;
+		leader->core.nr_members = 1;
+	}
+}
+
+static void unleader_auxtrace(struct perf_session *session)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (auxtrace__evsel_is_auxtrace(session, evsel) &&
+		    evsel__is_group_leader(evsel)) {
+			unleader_evsel(session->evlist, evsel);
+		}
+	}
+}
+
 int perf_event__process_auxtrace_info(struct perf_session *session,
 				      union perf_event *event)
 {
 	enum auxtrace_type type = event->auxtrace_info.type;
+	int err;
 
 	if (dump_trace)
 		fprintf(stdout, " type: %u\n", type);
 
 	switch (type) {
 	case PERF_AUXTRACE_INTEL_PT:
-		return intel_pt_process_auxtrace_info(event, session);
+		err = intel_pt_process_auxtrace_info(event, session);
+		break;
 	case PERF_AUXTRACE_INTEL_BTS:
-		return intel_bts_process_auxtrace_info(event, session);
+		err = intel_bts_process_auxtrace_info(event, session);
+		break;
 	case PERF_AUXTRACE_ARM_SPE:
-		return arm_spe_process_auxtrace_info(event, session);
+		err = arm_spe_process_auxtrace_info(event, session);
+		break;
 	case PERF_AUXTRACE_CS_ETM:
-		return cs_etm__process_auxtrace_info(event, session);
+		err = cs_etm__process_auxtrace_info(event, session);
+		break;
 	case PERF_AUXTRACE_S390_CPUMSF:
-		return s390_cpumsf_process_auxtrace_info(event, session);
+		err = s390_cpumsf_process_auxtrace_info(event, session);
+		break;
 	case PERF_AUXTRACE_UNKNOWN:
 	default:
 		return -EINVAL;
 	}
+
+	if (err)
+		return err;
+
+	unleader_auxtrace(session);
+
+	return 0;
 }
 
 s64 perf_event__process_auxtrace(struct perf_session *session,
@@ -1412,8 +1444,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 			synth_opts->branches = true;
 			synth_opts->returns = true;
 			break;
+		case 'G':
 		case 'g':
-			synth_opts->callchain = true;
+			if (p[-1] == 'G')
+				synth_opts->add_callchain = true;
+			else
+				synth_opts->callchain = true;
 			synth_opts->callchain_sz =
 					PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
 			while (*p == ' ' || *p == ',')
@@ -1428,8 +1464,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 				synth_opts->callchain_sz = val;
 			}
 			break;
+		case 'L':
 		case 'l':
-			synth_opts->last_branch = true;
+			if (p[-1] == 'L')
+				synth_opts->add_last_branch = true;
+			else
+				synth_opts->last_branch = true;
 			synth_opts->last_branch_sz =
 					PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
 			while (*p == ' ' || *p == ',')
@@ -2482,7 +2522,7 @@ static int parse_addr_filter(struct evsel *evsel, const char *filter,
 			goto out_exit;
 		}
 
-		if (perf_evsel__append_addr_filter(evsel, new_filter)) {
+		if (evsel__append_addr_filter(evsel, new_filter)) {
 			err = -ENOMEM;
 			goto out_exit;
 		}
@@ -2500,9 +2540,9 @@ out_exit:
 	return err;
 }
 
-static int perf_evsel__nr_addr_filter(struct evsel *evsel)
+static int evsel__nr_addr_filter(struct evsel *evsel)
 {
-	struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
+	struct perf_pmu *pmu = evsel__find_pmu(evsel);
 	int nr_addr_filters = 0;
 
 	if (!pmu)
@@ -2521,7 +2561,7 @@ int auxtrace_parse_filters(struct evlist *evlist)
 
 	evlist__for_each_entry(evlist, evsel) {
 		filter = evsel->filter;
-		max_nr = perf_evsel__nr_addr_filter(evsel);
+		max_nr = evsel__nr_addr_filter(evsel);
 		if (!filter || !max_nr)
 			continue;
 		evsel->filter = NULL;
@@ -2577,3 +2617,12 @@ void auxtrace__free(struct perf_session *session)
 
 	return session->auxtrace->free(session);
 }
+
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+				 struct evsel *evsel)
+{
+	if (!session->auxtrace || !session->auxtrace->evsel_is_auxtrace)
+		return false;
+
+	return session->auxtrace->evsel_is_auxtrace(session, evsel);
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index e58ef160b599..0220a2e86c16 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -21,6 +21,7 @@
 union perf_event;
 struct perf_session;
 struct evlist;
+struct evsel;
 struct perf_tool;
 struct mmap;
 struct perf_sample;
@@ -73,8 +74,10 @@ enum itrace_period_type {
  * @calls: limit branch samples to calls (can be combined with @returns)
  * @returns: limit branch samples to returns (can be combined with @calls)
  * @callchain: add callchain to 'instructions' events
+ * @add_callchain: add callchain to existing event records
  * @thread_stack: feed branches to the thread_stack
  * @last_branch: add branch context to 'instruction' events
+ * @add_last_branch: add branch context to existing event records
  * @callchain_sz: maximum callchain size
  * @last_branch_sz: branch context size
  * @period: 'instructions' events period
@@ -100,8 +103,10 @@ struct itrace_synth_opts {
 	bool			calls;
 	bool			returns;
 	bool			callchain;
+	bool			add_callchain;
 	bool			thread_stack;
 	bool			last_branch;
+	bool			add_last_branch;
 	unsigned int		callchain_sz;
 	unsigned int		last_branch_sz;
 	unsigned long long	period;
@@ -166,6 +171,8 @@ struct auxtrace {
 			    struct perf_tool *tool);
 	void (*free_events)(struct perf_session *session);
 	void (*free)(struct perf_session *session);
+	bool (*evsel_is_auxtrace)(struct perf_session *session,
+				  struct evsel *evsel);
 };
 
 /**
@@ -584,6 +591,8 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session,
 int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
 void auxtrace__free_events(struct perf_session *session);
 void auxtrace__free(struct perf_session *session);
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+				 struct evsel *evsel);
 
 #define ITRACE_HELP \
 "				i:	    		synthesize instructions events\n"		\
@@ -750,6 +759,13 @@ void auxtrace_index__free(struct list_head *head __maybe_unused)
 }
 
 static inline
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+				 struct evsel *evsel __maybe_unused)
+{
+	return false;
+}
+
+static inline
 int auxtrace_parse_filters(struct evlist *evlist __maybe_unused)
 {
 	return 0;
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 0cd41a862952..3742511a08d1 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -509,8 +509,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data)
 	return 0;
 }
 
-int bpf_event__add_sb_event(struct evlist **evlist,
-			    struct perf_env *env)
+int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env)
 {
 	struct perf_event_attr attr = {
 		.type	          = PERF_TYPE_SOFTWARE,
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index 81fdc88e6c1a..68f315c3df5b 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -33,8 +33,7 @@ struct btf_node {
 #ifdef HAVE_LIBBPF_SUPPORT
 int machine__process_bpf(struct machine *machine, union perf_event *event,
 			 struct perf_sample *sample);
-int bpf_event__add_sb_event(struct evlist **evlist,
-				 struct perf_env *env);
+int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env);
 void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
 				    struct perf_env *env,
 				    FILE *fp);
@@ -46,8 +45,8 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused,
 	return 0;
 }
 
-static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused,
-					  struct perf_env *env __maybe_unused)
+static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused,
+					   struct perf_env *env __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 10c187b8b8ea..83bfb8768235 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -1430,7 +1430,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
 		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
 	}
 
-	if (perf_evsel__is_bpf_output(evsel))
+	if (evsel__is_bpf_output(evsel))
 		check_pass = true;
 	if (attr->type == PERF_TYPE_RAW)
 		check_pass = true;
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 154a05cd03af..4d3f02fa223d 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -15,13 +15,18 @@
 #include "event.h"
 
 struct branch_flags {
-	u64 mispred:1;
-	u64 predicted:1;
-	u64 in_tx:1;
-	u64 abort:1;
-	u64 cycles:16;
-	u64 type:4;
-	u64 reserved:40;
+	union {
+		u64 value;
+		struct {
+			u64 mispred:1;
+			u64 predicted:1;
+			u64 in_tx:1;
+			u64 abort:1;
+			u64 cycles:16;
+			u64 type:4;
+			u64 reserved:40;
+		};
+	};
 };
 
 struct branch_info {
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 706bb7bbe1e1..8f668ee29f25 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -143,6 +143,9 @@ struct callchain_cursor_node {
 	u64				ip;
 	struct map_symbol		ms;
 	const char			*srcline;
+	/* Indicate valid cursor node for LBR stitch */
+	bool				valid;
+
 	bool				branch;
 	struct branch_flags		branch_flags;
 	u64				branch_from;
@@ -151,6 +154,11 @@ struct callchain_cursor_node {
 	struct callchain_cursor_node	*next;
 };
 
+struct stitch_list {
+	struct list_head		node;
+	struct callchain_cursor_node	cursor;
+};
+
 struct callchain_cursor {
 	u64				nr;
 	struct callchain_cursor_node	*first;
diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h
index 051dc590ceee..ae52878c0b2e 100644
--- a/tools/perf/util/cap.h
+++ b/tools/perf/util/cap.h
@@ -29,4 +29,8 @@ static inline bool perf_cap__capable(int cap __maybe_unused)
 #define CAP_SYSLOG	34
 #endif
 
+#ifndef CAP_PERFMON
+#define CAP_PERFMON	38
+#endif
+
 #endif /* __PERF_CAP_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index b73fb7823048..050dea9f1e88 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -107,7 +107,8 @@ found:
 
 static void cgroup__delete(struct cgroup *cgroup)
 {
-	close(cgroup->fd);
+	if (cgroup->fd >= 0)
+		close(cgroup->fd);
 	zfree(&cgroup->name);
 	free(cgroup);
 }
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
index a12872f2856a..6b3988a7aba8 100644
--- a/tools/perf/util/cloexec.c
+++ b/tools/perf/util/cloexec.c
@@ -28,7 +28,7 @@ int __weak sched_getcpu(void)
 
 static int perf_flag_probe(void)
 {
-	/* use 'safest' configuration as used in perf_evsel__fallback() */
+	/* use 'safest' configuration as used in evsel__fallback() */
 	struct perf_event_attr attr = {
 		.type = PERF_TYPE_SOFTWARE,
 		.config = PERF_COUNT_SW_CPU_CLOCK,
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index cd92a99eb89d..cd007cc9c283 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -564,6 +564,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
 		resp = cs_etm_decoder__set_tid(etmq, packet_queue,
 					       elem, trace_chan_id);
 		break;
+	/* Unused packet types */
+	case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH:
 	case OCSD_GEN_TRC_ELEM_ADDR_NACC:
 	case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
 	case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 62d2f9b9ce1b..c283223fb31f 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -94,6 +94,9 @@ struct cs_etm_queue {
 	struct cs_etm_traceid_queue **traceid_queues;
 };
 
+/* RB tree for quick conversion between traceID and metadata pointers */
+static struct intlist *traceid_list;
+
 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
@@ -631,6 +634,16 @@ static void cs_etm__free(struct perf_session *session)
 	zfree(&aux);
 }
 
+static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
+				      struct evsel *evsel)
+{
+	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+
+	return evsel->core.attr.type == aux->pmu_type;
+}
+
 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
 {
 	struct machine *machine;
@@ -2618,6 +2631,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	etm->auxtrace.flush_events = cs_etm__flush_events;
 	etm->auxtrace.free_events = cs_etm__free_events;
 	etm->auxtrace.free = cs_etm__free;
+	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
 	session->auxtrace = &etm->auxtrace;
 
 	etm->unknown_thread = thread__new(999999999, 999999999);
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 650ecc2a6349..4ad925d6d799 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -114,9 +114,6 @@ enum cs_etm_isa {
 	CS_ETM_ISA_T32,
 };
 
-/* RB tree for quick conversion between traceID and metadata pointers */
-struct intlist *traceid_list;
-
 struct cs_etm_queue;
 
 struct cs_etm_packet {
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index dbc772bfb04e..5f36fc6a5578 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -835,7 +835,7 @@ static int process_sample_event(struct perf_tool *tool,
 			return -1;
 	}
 
-	if (perf_evsel__is_bpf_output(evsel)) {
+	if (evsel__is_bpf_output(evsel)) {
 		ret = add_bpf_output_values(event_class, event, sample);
 		if (ret)
 			return -1;
@@ -1155,7 +1155,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel)
 {
 	struct bt_ctf_event_class *event_class;
 	struct evsel_priv *priv;
-	const char *name = perf_evsel__name(evsel);
+	const char *name = evsel__name(evsel);
 	int ret;
 
 	pr("Adding event '%s' (type %d)\n", name, evsel->core.attr.type);
@@ -1174,7 +1174,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel)
 			goto err;
 	}
 
-	if (perf_evsel__is_bpf_output(evsel)) {
+	if (evsel__is_bpf_output(evsel)) {
 		ret = add_bpf_output_types(cw, event_class);
 		if (ret)
 			goto err;
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 7632075a8792..1ab2682d5d2b 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -48,6 +48,7 @@ struct perf_env {
 	char			*cpuid;
 	unsigned long long	total_mem;
 	unsigned int		msr_pmu_type;
+	unsigned int		max_branches;
 
 	int			nr_cmdline;
 	int			nr_sibling_cores;
@@ -57,12 +58,14 @@ struct perf_env {
 	int			nr_memory_nodes;
 	int			nr_pmu_mappings;
 	int			nr_groups;
+	int			nr_cpu_pmu_caps;
 	char			*cmdline;
 	const char		**cmdline_argv;
 	char			*sibling_cores;
 	char			*sibling_dies;
 	char			*sibling_threads;
 	char			*pmu_mappings;
+	char			*cpu_pmu_caps;
 	struct cpu_topology_map	*cpu;
 	struct cpu_cache_level	*caches;
 	int			 caches_cnt;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index dc0e11214ae1..f581550a3015 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -626,7 +626,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
 			ret = strlist__has_entry(symbol_conf.sym_list,
 						al->sym->name);
 		}
-		if (!(ret && al->sym)) {
+		if (!ret && al->sym) {
 			snprintf(al_addr_str, sz, "0x%"PRIx64,
 				al->map->unmap_ip(al->map, al->sym->start));
 			ret = strlist__has_entry(symbol_conf.sym_list,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 1548237b6558..0a0b760d6948 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -23,6 +23,7 @@
 #include "asm/bug.h"
 #include "bpf-event.h"
 #include "util/string2.h"
+#include "util/perf_api_probe.h"
 #include <signal.h>
 #include <unistd.h>
 #include <sched.h>
@@ -118,7 +119,7 @@ static void perf_evlist__update_id_pos(struct evlist *evlist)
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel)
-		perf_evsel__calc_id_pos(evsel);
+		evsel__calc_id_pos(evsel);
 
 	perf_evlist__set_id_pos(evlist);
 }
@@ -390,14 +391,14 @@ void evlist__disable(struct evlist *evlist)
 		evlist__for_each_entry(evlist, pos) {
 			if (evsel__cpu_iter_skip(pos, cpu))
 				continue;
-			if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
+			if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
 				continue;
 			evsel__disable_cpu(pos, pos->cpu_iter - 1);
 		}
 	}
 	affinity__cleanup(&affinity);
 	evlist__for_each_entry(evlist, pos) {
-		if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+		if (!evsel__is_group_leader(pos) || !pos->core.fd)
 			continue;
 		pos->disabled = true;
 	}
@@ -420,14 +421,14 @@ void evlist__enable(struct evlist *evlist)
 		evlist__for_each_entry(evlist, pos) {
 			if (evsel__cpu_iter_skip(pos, cpu))
 				continue;
-			if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+			if (!evsel__is_group_leader(pos) || !pos->core.fd)
 				continue;
 			evsel__enable_cpu(pos, pos->cpu_iter - 1);
 		}
 	}
 	affinity__cleanup(&affinity);
 	evlist__for_each_entry(evlist, pos) {
-		if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+		if (!evsel__is_group_leader(pos) || !pos->core.fd)
 			continue;
 		pos->disabled = false;
 	}
@@ -947,7 +948,7 @@ void __perf_evlist__set_sample_bit(struct evlist *evlist,
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel)
-		__perf_evsel__set_sample_bit(evsel, bit);
+		__evsel__set_sample_bit(evsel, bit);
 }
 
 void __perf_evlist__reset_sample_bit(struct evlist *evlist,
@@ -956,7 +957,7 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist,
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel)
-		__perf_evsel__reset_sample_bit(evsel, bit);
+		__evsel__reset_sample_bit(evsel, bit);
 }
 
 int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
@@ -994,7 +995,7 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
 		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
 			continue;
 
-		err = perf_evsel__set_filter(evsel, filter);
+		err = evsel__set_filter(evsel, filter);
 		if (err)
 			break;
 	}
@@ -1014,7 +1015,7 @@ int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
 		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
 			continue;
 
-		err = perf_evsel__append_tp_filter(evsel, filter);
+		err = evsel__append_tp_filter(evsel, filter);
 		if (err)
 			break;
 	}
@@ -1131,8 +1132,10 @@ bool perf_evlist__valid_read_format(struct evlist *evlist)
 	u64 sample_type = first->core.attr.sample_type;
 
 	evlist__for_each_entry(evlist, pos) {
-		if (read_format != pos->core.attr.read_format)
-			return false;
+		if (read_format != pos->core.attr.read_format) {
+			pr_debug("Read format differs %#" PRIx64 " vs %#" PRIx64 "\n",
+				 read_format, (u64)pos->core.attr.read_format);
+		}
 	}
 
 	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
@@ -1436,7 +1439,7 @@ int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
 
 	if (!evsel)
 		return -EFAULT;
-	return perf_evsel__parse_sample(evsel, event, sample);
+	return evsel__parse_sample(evsel, event, sample);
 }
 
 int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
@@ -1447,7 +1450,7 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
 
 	if (!evsel)
 		return -EFAULT;
-	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
+	return evsel__parse_sample_timestamp(evsel, event, timestamp);
 }
 
 int perf_evlist__strerror_open(struct evlist *evlist,
@@ -1701,133 +1704,3 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
 	}
 	return leader;
 }
-
-int perf_evlist__add_sb_event(struct evlist **evlist,
-			      struct perf_event_attr *attr,
-			      perf_evsel__sb_cb_t cb,
-			      void *data)
-{
-	struct evsel *evsel;
-	bool new_evlist = (*evlist) == NULL;
-
-	if (*evlist == NULL)
-		*evlist = evlist__new();
-	if (*evlist == NULL)
-		return -1;
-
-	if (!attr->sample_id_all) {
-		pr_warning("enabling sample_id_all for all side band events\n");
-		attr->sample_id_all = 1;
-	}
-
-	evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
-	if (!evsel)
-		goto out_err;
-
-	evsel->side_band.cb = cb;
-	evsel->side_band.data = data;
-	evlist__add(*evlist, evsel);
-	return 0;
-
-out_err:
-	if (new_evlist) {
-		evlist__delete(*evlist);
-		*evlist = NULL;
-	}
-	return -1;
-}
-
-static void *perf_evlist__poll_thread(void *arg)
-{
-	struct evlist *evlist = arg;
-	bool draining = false;
-	int i, done = 0;
-	/*
-	 * In order to read symbols from other namespaces perf to needs to call
-	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
-	 * unshare(2) the fs so that we may continue to setns into namespaces
-	 * that we're observing when, for instance, reading the build-ids at
-	 * the end of a 'perf record' session.
-	 */
-	unshare(CLONE_FS);
-
-	while (!done) {
-		bool got_data = false;
-
-		if (evlist->thread.done)
-			draining = true;
-
-		if (!draining)
-			evlist__poll(evlist, 1000);
-
-		for (i = 0; i < evlist->core.nr_mmaps; i++) {
-			struct mmap *map = &evlist->mmap[i];
-			union perf_event *event;
-
-			if (perf_mmap__read_init(&map->core))
-				continue;
-			while ((event = perf_mmap__read_event(&map->core)) != NULL) {
-				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
-
-				if (evsel && evsel->side_band.cb)
-					evsel->side_band.cb(event, evsel->side_band.data);
-				else
-					pr_warning("cannot locate proper evsel for the side band event\n");
-
-				perf_mmap__consume(&map->core);
-				got_data = true;
-			}
-			perf_mmap__read_done(&map->core);
-		}
-
-		if (draining && !got_data)
-			break;
-	}
-	return NULL;
-}
-
-int perf_evlist__start_sb_thread(struct evlist *evlist,
-				 struct target *target)
-{
-	struct evsel *counter;
-
-	if (!evlist)
-		return 0;
-
-	if (perf_evlist__create_maps(evlist, target))
-		goto out_delete_evlist;
-
-	evlist__for_each_entry(evlist, counter) {
-		if (evsel__open(counter, evlist->core.cpus,
-				     evlist->core.threads) < 0)
-			goto out_delete_evlist;
-	}
-
-	if (evlist__mmap(evlist, UINT_MAX))
-		goto out_delete_evlist;
-
-	evlist__for_each_entry(evlist, counter) {
-		if (evsel__enable(counter))
-			goto out_delete_evlist;
-	}
-
-	evlist->thread.done = 0;
-	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
-		goto out_delete_evlist;
-
-	return 0;
-
-out_delete_evlist:
-	evlist__delete(evlist);
-	evlist = NULL;
-	return -1;
-}
-
-void perf_evlist__stop_sb_thread(struct evlist *evlist)
-{
-	if (!evlist)
-		return;
-	evlist->thread.done = 1;
-	pthread_join(evlist->thread.th, NULL);
-	evlist__delete(evlist);
-}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index f5bd5c386df1..b6f325dfb4d2 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -107,10 +107,11 @@ int __perf_evlist__add_default_attrs(struct evlist *evlist,
 
 int perf_evlist__add_dummy(struct evlist *evlist);
 
-int perf_evlist__add_sb_event(struct evlist **evlist,
+int perf_evlist__add_sb_event(struct evlist *evlist,
 			      struct perf_event_attr *attr,
-			      perf_evsel__sb_cb_t cb,
+			      evsel__sb_cb_t cb,
 			      void *data);
+void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data);
 int perf_evlist__start_sb_thread(struct evlist *evlist,
 				 struct target *target);
 void perf_evlist__stop_sb_thread(struct evlist *evlist);
@@ -173,10 +174,6 @@ void evlist__close(struct evlist *evlist);
 struct callchain_param;
 
 void perf_evlist__set_id_pos(struct evlist *evlist);
-bool perf_can_sample_identifier(void);
-bool perf_can_record_switch_events(void);
-bool perf_can_record_cpu_wide(void);
-bool perf_can_aux_sample(void);
 void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
 			 struct callchain_param *callchain);
 int record_opts__config(struct record_opts *opts);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index eb880efbce16..f3e60c45d59a 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -102,7 +102,7 @@ set_methods:
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
 
-int __perf_evsel__sample_size(u64 sample_type)
+int __evsel__sample_size(u64 sample_type)
 {
 	u64 mask = sample_type & PERF_SAMPLE_MASK;
 	int size = 0;
@@ -178,53 +178,53 @@ static int __perf_evsel__calc_is_pos(u64 sample_type)
 	return idx;
 }
 
-void perf_evsel__calc_id_pos(struct evsel *evsel)
+void evsel__calc_id_pos(struct evsel *evsel)
 {
 	evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type);
 	evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type);
 }
 
-void __perf_evsel__set_sample_bit(struct evsel *evsel,
+void __evsel__set_sample_bit(struct evsel *evsel,
 				  enum perf_event_sample_format bit)
 {
 	if (!(evsel->core.attr.sample_type & bit)) {
 		evsel->core.attr.sample_type |= bit;
 		evsel->sample_size += sizeof(u64);
-		perf_evsel__calc_id_pos(evsel);
+		evsel__calc_id_pos(evsel);
 	}
 }
 
-void __perf_evsel__reset_sample_bit(struct evsel *evsel,
+void __evsel__reset_sample_bit(struct evsel *evsel,
 				    enum perf_event_sample_format bit)
 {
 	if (evsel->core.attr.sample_type & bit) {
 		evsel->core.attr.sample_type &= ~bit;
 		evsel->sample_size -= sizeof(u64);
-		perf_evsel__calc_id_pos(evsel);
+		evsel__calc_id_pos(evsel);
 	}
 }
 
-void perf_evsel__set_sample_id(struct evsel *evsel,
+void evsel__set_sample_id(struct evsel *evsel,
 			       bool can_sample_identifier)
 {
 	if (can_sample_identifier) {
-		perf_evsel__reset_sample_bit(evsel, ID);
-		perf_evsel__set_sample_bit(evsel, IDENTIFIER);
+		evsel__reset_sample_bit(evsel, ID);
+		evsel__set_sample_bit(evsel, IDENTIFIER);
 	} else {
-		perf_evsel__set_sample_bit(evsel, ID);
+		evsel__set_sample_bit(evsel, ID);
 	}
 	evsel->core.attr.read_format |= PERF_FORMAT_ID;
 }
 
 /**
- * perf_evsel__is_function_event - Return whether given evsel is a function
+ * evsel__is_function_event - Return whether given evsel is a function
  * trace event
  *
  * @evsel - evsel selector to be tested
  *
  * Return %true if event is function trace event
  */
-bool perf_evsel__is_function_event(struct evsel *evsel)
+bool evsel__is_function_event(struct evsel *evsel)
 {
 #define FUNCTION_EVENT "ftrace:function"
 
@@ -249,8 +249,8 @@ void evsel__init(struct evsel *evsel,
 	evsel->bpf_fd	   = -1;
 	INIT_LIST_HEAD(&evsel->config_terms);
 	perf_evsel__object.init(evsel);
-	evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
-	perf_evsel__calc_id_pos(evsel);
+	evsel->sample_size = __evsel__sample_size(attr->sample_type);
+	evsel__calc_id_pos(evsel);
 	evsel->cmdline_group_boundary = false;
 	evsel->metric_expr   = NULL;
 	evsel->metric_name   = NULL;
@@ -267,13 +267,13 @@ struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
 		return NULL;
 	evsel__init(evsel, attr, idx);
 
-	if (perf_evsel__is_bpf_output(evsel)) {
+	if (evsel__is_bpf_output(evsel)) {
 		evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
 					    PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
 		evsel->core.attr.sample_period = 1;
 	}
 
-	if (perf_evsel__is_clock(evsel)) {
+	if (evsel__is_clock(evsel)) {
 		/*
 		 * The evsel->unit points to static alias->unit
 		 * so it's ok to use static string in here.
@@ -385,7 +385,7 @@ const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
 	"ref-cycles",
 };
 
-static const char *__perf_evsel__hw_name(u64 config)
+static const char *__evsel__hw_name(u64 config)
 {
 	if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
 		return perf_evsel__hw_names[config];
@@ -429,9 +429,9 @@ static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
 	return r;
 }
 
-static int perf_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
 {
-	int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->core.attr.config));
+	int r = scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config));
 	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
@@ -448,20 +448,20 @@ const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
 	"dummy",
 };
 
-static const char *__perf_evsel__sw_name(u64 config)
+static const char *__evsel__sw_name(u64 config)
 {
 	if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
 		return perf_evsel__sw_names[config];
 	return "unknown-software";
 }
 
-static int perf_evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
 {
-	int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->core.attr.config));
+	int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config));
 	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
-static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
+static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
 {
 	int r;
 
@@ -479,10 +479,10 @@ static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
 	return r;
 }
 
-static int perf_evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
-	int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
+	int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
 	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
@@ -531,7 +531,7 @@ static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
  [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 };
 
-bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
+bool evsel__is_cache_op_valid(u8 type, u8 op)
 {
 	if (perf_evsel__hw_cache_stat[type] & COP(op))
 		return true;	/* valid */
@@ -539,8 +539,7 @@ bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
 		return false;	/* invalid */
 }
 
-int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
-					    char *bf, size_t size)
+int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size)
 {
 	if (result) {
 		return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
@@ -552,7 +551,7 @@ int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
 			 perf_evsel__hw_cache_op[op][1]);
 }
 
-static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
+static int __evsel__hw_cache_name(u64 config, char *bf, size_t size)
 {
 	u8 op, result, type = (config >>  0) & 0xff;
 	const char *err = "unknown-ext-hardware-cache-type";
@@ -571,33 +570,33 @@ static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
 		goto out_err;
 
 	err = "invalid-cache";
-	if (!perf_evsel__is_cache_op_valid(type, op))
+	if (!evsel__is_cache_op_valid(type, op))
 		goto out_err;
 
-	return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
+	return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
 out_err:
 	return scnprintf(bf, size, "%s", err);
 }
 
-static int perf_evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
 {
-	int ret = __perf_evsel__hw_cache_name(evsel->core.attr.config, bf, size);
+	int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size);
 	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
 }
 
-static int perf_evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
 {
 	int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
 	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
 }
 
-static int perf_evsel__tool_name(char *bf, size_t size)
+static int evsel__tool_name(char *bf, size_t size)
 {
 	int ret = scnprintf(bf, size, "duration_time");
 	return ret;
 }
 
-const char *perf_evsel__name(struct evsel *evsel)
+const char *evsel__name(struct evsel *evsel)
 {
 	char bf[128];
 
@@ -609,22 +608,22 @@ const char *perf_evsel__name(struct evsel *evsel)
 
 	switch (evsel->core.attr.type) {
 	case PERF_TYPE_RAW:
-		perf_evsel__raw_name(evsel, bf, sizeof(bf));
+		evsel__raw_name(evsel, bf, sizeof(bf));
 		break;
 
 	case PERF_TYPE_HARDWARE:
-		perf_evsel__hw_name(evsel, bf, sizeof(bf));
+		evsel__hw_name(evsel, bf, sizeof(bf));
 		break;
 
 	case PERF_TYPE_HW_CACHE:
-		perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
+		evsel__hw_cache_name(evsel, bf, sizeof(bf));
 		break;
 
 	case PERF_TYPE_SOFTWARE:
 		if (evsel->tool_event)
-			perf_evsel__tool_name(bf, sizeof(bf));
+			evsel__tool_name(bf, sizeof(bf));
 		else
-			perf_evsel__sw_name(evsel, bf, sizeof(bf));
+			evsel__sw_name(evsel, bf, sizeof(bf));
 		break;
 
 	case PERF_TYPE_TRACEPOINT:
@@ -632,7 +631,7 @@ const char *perf_evsel__name(struct evsel *evsel)
 		break;
 
 	case PERF_TYPE_BREAKPOINT:
-		perf_evsel__bp_name(evsel, bf, sizeof(bf));
+		evsel__bp_name(evsel, bf, sizeof(bf));
 		break;
 
 	default:
@@ -649,7 +648,7 @@ out_unknown:
 	return "unknown";
 }
 
-const char *perf_evsel__group_name(struct evsel *evsel)
+const char *evsel__group_name(struct evsel *evsel)
 {
 	return evsel->group_name ?: "anon group";
 }
@@ -664,21 +663,19 @@ const char *perf_evsel__group_name(struct evsel *evsel)
  *  For record -e 'cycles,instructions' and report --group
  *    'cycles:u, instructions:u'
  */
-int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
+int evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
 {
 	int ret = 0;
 	struct evsel *pos;
-	const char *group_name = perf_evsel__group_name(evsel);
+	const char *group_name = evsel__group_name(evsel);
 
 	if (!evsel->forced_leader)
 		ret = scnprintf(buf, size, "%s { ", group_name);
 
-	ret += scnprintf(buf + ret, size - ret, "%s",
-			 perf_evsel__name(evsel));
+	ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel));
 
 	for_each_group_member(pos, evsel)
-		ret += scnprintf(buf + ret, size - ret, ", %s",
-				 perf_evsel__name(pos));
+		ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos));
 
 	if (!evsel->forced_leader)
 		ret += scnprintf(buf + ret, size - ret, " }");
@@ -686,14 +683,13 @@ int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
 	return ret;
 }
 
-static void __perf_evsel__config_callchain(struct evsel *evsel,
-					   struct record_opts *opts,
-					   struct callchain_param *param)
+static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+				      struct callchain_param *param)
 {
-	bool function = perf_evsel__is_function_event(evsel);
+	bool function = evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
 
-	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+	evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	attr->sample_max_stack = param->max_stack;
 
@@ -708,7 +704,7 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
 					   "to get user callchain information. "
 					   "Falling back to framepointers.\n");
 			} else {
-				perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+				evsel__set_sample_bit(evsel, BRANCH_STACK);
 				attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
 							PERF_SAMPLE_BRANCH_CALL_STACK |
 							PERF_SAMPLE_BRANCH_NO_CYCLES |
@@ -722,8 +718,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
 
 	if (param->record_mode == CALLCHAIN_DWARF) {
 		if (!function) {
-			perf_evsel__set_sample_bit(evsel, REGS_USER);
-			perf_evsel__set_sample_bit(evsel, STACK_USER);
+			evsel__set_sample_bit(evsel, REGS_USER);
+			evsel__set_sample_bit(evsel, STACK_USER);
 			if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
 				attr->sample_regs_user |= DWARF_MINIMAL_REGS;
 				pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
@@ -746,12 +742,11 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
 	}
 }
 
-void perf_evsel__config_callchain(struct evsel *evsel,
-				  struct record_opts *opts,
-				  struct callchain_param *param)
+void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+			     struct callchain_param *param)
 {
 	if (param->enabled)
-		return __perf_evsel__config_callchain(evsel, opts, param);
+		return __evsel__config_callchain(evsel, opts, param);
 }
 
 static void
@@ -760,16 +755,16 @@ perf_evsel__reset_callgraph(struct evsel *evsel,
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 
-	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+	evsel__reset_sample_bit(evsel, CALLCHAIN);
 	if (param->record_mode == CALLCHAIN_LBR) {
-		perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+		evsel__reset_sample_bit(evsel, BRANCH_STACK);
 		attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
 					      PERF_SAMPLE_BRANCH_CALL_STACK |
 					      PERF_SAMPLE_BRANCH_HW_INDEX);
 	}
 	if (param->record_mode == CALLCHAIN_DWARF) {
-		perf_evsel__reset_sample_bit(evsel, REGS_USER);
-		perf_evsel__reset_sample_bit(evsel, STACK_USER);
+		evsel__reset_sample_bit(evsel, REGS_USER);
+		evsel__reset_sample_bit(evsel, STACK_USER);
 	}
 }
 
@@ -793,32 +788,32 @@ static void apply_config_terms(struct evsel *evsel,
 			if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
 				attr->sample_period = term->val.period;
 				attr->freq = 0;
-				perf_evsel__reset_sample_bit(evsel, PERIOD);
+				evsel__reset_sample_bit(evsel, PERIOD);
 			}
 			break;
 		case PERF_EVSEL__CONFIG_TERM_FREQ:
 			if (!(term->weak && opts->user_freq != UINT_MAX)) {
 				attr->sample_freq = term->val.freq;
 				attr->freq = 1;
-				perf_evsel__set_sample_bit(evsel, PERIOD);
+				evsel__set_sample_bit(evsel, PERIOD);
 			}
 			break;
 		case PERF_EVSEL__CONFIG_TERM_TIME:
 			if (term->val.time)
-				perf_evsel__set_sample_bit(evsel, TIME);
+				evsel__set_sample_bit(evsel, TIME);
 			else
-				perf_evsel__reset_sample_bit(evsel, TIME);
+				evsel__reset_sample_bit(evsel, TIME);
 			break;
 		case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
 			callgraph_buf = term->val.str;
 			break;
 		case PERF_EVSEL__CONFIG_TERM_BRANCH:
 			if (term->val.str && strcmp(term->val.str, "no")) {
-				perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+				evsel__set_sample_bit(evsel, BRANCH_STACK);
 				parse_branch_str(term->val.str,
 						 &attr->branch_sample_type);
 			} else
-				perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+				evsel__reset_sample_bit(evsel, BRANCH_STACK);
 			break;
 		case PERF_EVSEL__CONFIG_TERM_STACK_USER:
 			dump_size = term->val.stack_user;
@@ -832,7 +827,7 @@ static void apply_config_terms(struct evsel *evsel,
 		case PERF_EVSEL__CONFIG_TERM_INHERIT:
 			/*
 			 * attr->inherit should has already been set by
-			 * perf_evsel__config. If user explicitly set
+			 * evsel__config. If user explicitly set
 			 * inherit using config terms, override global
 			 * opt->no_inherit setting.
 			 */
@@ -897,11 +892,11 @@ static void apply_config_terms(struct evsel *evsel,
 		/* set perf-event callgraph */
 		if (param.enabled) {
 			if (sample_address) {
-				perf_evsel__set_sample_bit(evsel, ADDR);
-				perf_evsel__set_sample_bit(evsel, DATA_SRC);
+				evsel__set_sample_bit(evsel, ADDR);
+				evsel__set_sample_bit(evsel, DATA_SRC);
 				evsel->core.attr.mmap_data = track;
 			}
-			perf_evsel__config_callchain(evsel, opts, &param);
+			evsel__config_callchain(evsel, opts, &param);
 		}
 	}
 }
@@ -953,8 +948,8 @@ struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel
  *     enable/disable events specifically, as there's no
  *     initial traced exec call.
  */
-void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
-			struct callchain_param *callchain)
+void evsel__config(struct evsel *evsel, struct record_opts *opts,
+		   struct callchain_param *callchain)
 {
 	struct evsel *leader = evsel->leader;
 	struct perf_event_attr *attr = &evsel->core.attr;
@@ -965,17 +960,17 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 	attr->inherit	    = !opts->no_inherit;
 	attr->write_backward = opts->overwrite ? 1 : 0;
 
-	perf_evsel__set_sample_bit(evsel, IP);
-	perf_evsel__set_sample_bit(evsel, TID);
+	evsel__set_sample_bit(evsel, IP);
+	evsel__set_sample_bit(evsel, TID);
 
 	if (evsel->sample_read) {
-		perf_evsel__set_sample_bit(evsel, READ);
+		evsel__set_sample_bit(evsel, READ);
 
 		/*
 		 * We need ID even in case of single event, because
 		 * PERF_SAMPLE_READ process ID specific data.
 		 */
-		perf_evsel__set_sample_id(evsel, false);
+		evsel__set_sample_id(evsel, false);
 
 		/*
 		 * Apply group format only if we belong to group
@@ -994,7 +989,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 	if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
 				     opts->user_interval != ULLONG_MAX)) {
 		if (opts->freq) {
-			perf_evsel__set_sample_bit(evsel, PERIOD);
+			evsel__set_sample_bit(evsel, PERIOD);
 			attr->freq		= 1;
 			attr->sample_freq	= opts->freq;
 		} else {
@@ -1002,25 +997,6 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 		}
 	}
 
-	/*
-	 * Disable sampling for all group members other
-	 * than leader in case leader 'leads' the sampling.
-	 */
-	if ((leader != evsel) && leader->sample_read) {
-		attr->freq           = 0;
-		attr->sample_freq    = 0;
-		attr->sample_period  = 0;
-		attr->write_backward = 0;
-
-		/*
-		 * We don't get sample for slave events, we make them
-		 * when delivering group leader sample. Set the slave
-		 * event to follow the master sample_type to ease up
-		 * report.
-		 */
-		attr->sample_type = leader->core.attr.sample_type;
-	}
-
 	if (opts->no_samples)
 		attr->sample_freq = 0;
 
@@ -1033,7 +1009,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 	}
 
 	if (opts->sample_address) {
-		perf_evsel__set_sample_bit(evsel, ADDR);
+		evsel__set_sample_bit(evsel, ADDR);
 		attr->mmap_data = track;
 	}
 
@@ -1042,24 +1018,24 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 	 * event, due to issues with page faults while tracing page
 	 * fault handler and its overall trickiness nature.
 	 */
-	if (perf_evsel__is_function_event(evsel))
+	if (evsel__is_function_event(evsel))
 		evsel->core.attr.exclude_callchain_user = 1;
 
 	if (callchain && callchain->enabled && !evsel->no_aux_samples)
-		perf_evsel__config_callchain(evsel, opts, callchain);
+		evsel__config_callchain(evsel, opts, callchain);
 
 	if (opts->sample_intr_regs) {
 		attr->sample_regs_intr = opts->sample_intr_regs;
-		perf_evsel__set_sample_bit(evsel, REGS_INTR);
+		evsel__set_sample_bit(evsel, REGS_INTR);
 	}
 
 	if (opts->sample_user_regs) {
 		attr->sample_regs_user |= opts->sample_user_regs;
-		perf_evsel__set_sample_bit(evsel, REGS_USER);
+		evsel__set_sample_bit(evsel, REGS_USER);
 	}
 
 	if (target__has_cpu(&opts->target) || opts->sample_cpu)
-		perf_evsel__set_sample_bit(evsel, CPU);
+		evsel__set_sample_bit(evsel, CPU);
 
 	/*
 	 * When the user explicitly disabled time don't force it here.
@@ -1068,31 +1044,31 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 	    (!perf_missing_features.sample_id_all &&
 	    (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
 	     opts->sample_time_set)))
-		perf_evsel__set_sample_bit(evsel, TIME);
+		evsel__set_sample_bit(evsel, TIME);
 
 	if (opts->raw_samples && !evsel->no_aux_samples) {
-		perf_evsel__set_sample_bit(evsel, TIME);
-		perf_evsel__set_sample_bit(evsel, RAW);
-		perf_evsel__set_sample_bit(evsel, CPU);
+		evsel__set_sample_bit(evsel, TIME);
+		evsel__set_sample_bit(evsel, RAW);
+		evsel__set_sample_bit(evsel, CPU);
 	}
 
 	if (opts->sample_address)
-		perf_evsel__set_sample_bit(evsel, DATA_SRC);
+		evsel__set_sample_bit(evsel, DATA_SRC);
 
 	if (opts->sample_phys_addr)
-		perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
+		evsel__set_sample_bit(evsel, PHYS_ADDR);
 
 	if (opts->no_buffering) {
 		attr->watermark = 0;
 		attr->wakeup_events = 1;
 	}
 	if (opts->branch_stack && !evsel->no_aux_samples) {
-		perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+		evsel__set_sample_bit(evsel, BRANCH_STACK);
 		attr->branch_sample_type = opts->branch_stack;
 	}
 
 	if (opts->sample_weight)
-		perf_evsel__set_sample_bit(evsel, WEIGHT);
+		evsel__set_sample_bit(evsel, WEIGHT);
 
 	attr->task  = track;
 	attr->mmap  = track;
@@ -1106,14 +1082,14 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 
 	if (opts->record_cgroup) {
 		attr->cgroup = track && !perf_missing_features.cgroup;
-		perf_evsel__set_sample_bit(evsel, CGROUP);
+		evsel__set_sample_bit(evsel, CGROUP);
 	}
 
 	if (opts->record_switch_events)
 		attr->context_switch = track;
 
 	if (opts->sample_transaction)
-		perf_evsel__set_sample_bit(evsel, TRANSACTION);
+		evsel__set_sample_bit(evsel, TRANSACTION);
 
 	if (opts->running_time) {
 		evsel->core.attr.read_format |=
@@ -1127,15 +1103,15 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 	 * Disabling only independent events or group leaders,
 	 * keeping group members enabled.
 	 */
-	if (perf_evsel__is_group_leader(evsel))
+	if (evsel__is_group_leader(evsel))
 		attr->disabled = 1;
 
 	/*
 	 * Setting enable_on_exec for independent events and
 	 * group leaders for traced executed by perf.
 	 */
-	if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) &&
-		!opts->initial_delay)
+	if (target__none(&opts->target) && evsel__is_group_leader(evsel) &&
+	    !opts->initial_delay)
 		attr->enable_on_exec = 1;
 
 	if (evsel->immediate) {
@@ -1176,9 +1152,9 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 	/* The --period option takes the precedence. */
 	if (opts->period_set) {
 		if (opts->period)
-			perf_evsel__set_sample_bit(evsel, PERIOD);
+			evsel__set_sample_bit(evsel, PERIOD);
 		else
-			perf_evsel__reset_sample_bit(evsel, PERIOD);
+			evsel__reset_sample_bit(evsel, PERIOD);
 	}
 
 	/*
@@ -1187,10 +1163,10 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 	 * if BRANCH_STACK bit is set.
 	 */
 	if (opts->initial_delay && is_dummy_event(evsel))
-		perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+		evsel__reset_sample_bit(evsel, BRANCH_STACK);
 }
 
-int perf_evsel__set_filter(struct evsel *evsel, const char *filter)
+int evsel__set_filter(struct evsel *evsel, const char *filter)
 {
 	char *new_filter = strdup(filter);
 
@@ -1203,13 +1179,12 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter)
 	return -1;
 }
 
-static int perf_evsel__append_filter(struct evsel *evsel,
-				     const char *fmt, const char *filter)
+static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter)
 {
 	char *new_filter;
 
 	if (evsel->filter == NULL)
-		return perf_evsel__set_filter(evsel, filter);
+		return evsel__set_filter(evsel, filter);
 
 	if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
 		free(evsel->filter);
@@ -1220,14 +1195,14 @@ static int perf_evsel__append_filter(struct evsel *evsel,
 	return -1;
 }
 
-int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter)
+int evsel__append_tp_filter(struct evsel *evsel, const char *filter)
 {
-	return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
+	return evsel__append_filter(evsel, "(%s) && (%s)", filter);
 }
 
-int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter)
+int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
 {
-	return perf_evsel__append_filter(evsel, "%s,%s", filter);
+	return evsel__append_filter(evsel, "%s,%s", filter);
 }
 
 /* Caller has to clear disabled after going through all CPUs. */
@@ -1278,7 +1253,7 @@ static void perf_evsel__free_config_terms(struct evsel *evsel)
 	}
 }
 
-void perf_evsel__exit(struct evsel *evsel)
+void evsel__exit(struct evsel *evsel)
 {
 	assert(list_empty(&evsel->core.node));
 	assert(evsel->evlist == NULL);
@@ -1298,12 +1273,12 @@ void perf_evsel__exit(struct evsel *evsel)
 
 void evsel__delete(struct evsel *evsel)
 {
-	perf_evsel__exit(evsel);
+	evsel__exit(evsel);
 	free(evsel);
 }
 
-void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
-				struct perf_counts_values *count)
+void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+			   struct perf_counts_values *count)
 {
 	struct perf_counts_values tmp;
 
@@ -1342,8 +1317,7 @@ void perf_counts_values__scale(struct perf_counts_values *count,
 		*pscaled = scaled;
 }
 
-static int
-perf_evsel__read_one(struct evsel *evsel, int cpu, int thread)
+static int evsel__read_one(struct evsel *evsel, int cpu, int thread)
 {
 	struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
 
@@ -1403,8 +1377,7 @@ perf_evsel__process_group_data(struct evsel *leader,
 	return 0;
 }
 
-static int
-perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
+static int evsel__read_group(struct evsel *leader, int cpu, int thread)
 {
 	struct perf_stat_evsel *ps = leader->stats;
 	u64 read_format = leader->core.attr.read_format;
@@ -1414,7 +1387,7 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
 	if (!(read_format & PERF_FORMAT_ID))
 		return -EINVAL;
 
-	if (!perf_evsel__is_group_leader(leader))
+	if (!evsel__is_group_leader(leader))
 		return -EINVAL;
 
 	if (!data) {
@@ -1434,18 +1407,17 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
 	return perf_evsel__process_group_data(leader, cpu, thread, data);
 }
 
-int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread)
+int evsel__read_counter(struct evsel *evsel, int cpu, int thread)
 {
 	u64 read_format = evsel->core.attr.read_format;
 
 	if (read_format & PERF_FORMAT_GROUP)
-		return perf_evsel__read_group(evsel, cpu, thread);
-	else
-		return perf_evsel__read_one(evsel, cpu, thread);
+		return evsel__read_group(evsel, cpu, thread);
+
+	return evsel__read_one(evsel, cpu, thread);
 }
 
-int __perf_evsel__read_on_cpu(struct evsel *evsel,
-			      int cpu, int thread, bool scale)
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale)
 {
 	struct perf_counts_values count;
 	size_t nv = scale ? 3 : 1;
@@ -1459,7 +1431,7 @@ int __perf_evsel__read_on_cpu(struct evsel *evsel,
 	if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
 		return -errno;
 
-	perf_evsel__compute_deltas(evsel, cpu, thread, &count);
+	evsel__compute_deltas(evsel, cpu, thread, &count);
 	perf_counts_values__scale(&count, scale, NULL);
 	*perf_counts(evsel->counts, cpu, thread) = count;
 	return 0;
@@ -1470,7 +1442,7 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
 	struct evsel *leader = evsel->leader;
 	int fd;
 
-	if (perf_evsel__is_group_leader(evsel))
+	if (evsel__is_group_leader(evsel))
 		return -1;
 
 	/*
@@ -1749,8 +1721,7 @@ retry_open:
 
 			/*
 			 * If we succeeded but had to kill clockid, fail and
-			 * have perf_evsel__open_strerror() print us a nice
-			 * error.
+			 * have evsel__open_strerror() print us a nice error.
 			 */
 			if (perf_missing_features.clockid ||
 			    perf_missing_features.clockid_wrong) {
@@ -1854,7 +1825,7 @@ try_fallback:
 	} else if (!perf_missing_features.group_read &&
 		    evsel->core.attr.inherit &&
 		   (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
-		   perf_evsel__is_group_leader(evsel)) {
+		   evsel__is_group_leader(evsel)) {
 		perf_missing_features.group_read = true;
 		pr_debug2_peo("switching off group read\n");
 		goto fallback_missing_features;
@@ -1888,9 +1859,7 @@ void evsel__close(struct evsel *evsel)
 	perf_evsel__free_id(&evsel->core);
 }
 
-int perf_evsel__open_per_cpu(struct evsel *evsel,
-			     struct perf_cpu_map *cpus,
-			     int cpu)
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu)
 {
 	if (cpu == -1)
 		return evsel__open_cpu(evsel, cpus, NULL, 0,
@@ -1899,8 +1868,7 @@ int perf_evsel__open_per_cpu(struct evsel *evsel,
 	return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
 }
 
-int perf_evsel__open_per_thread(struct evsel *evsel,
-				struct perf_thread_map *threads)
+int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
 {
 	return evsel__open(evsel, NULL, threads);
 }
@@ -1995,8 +1963,8 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size)
 	return 0;
 }
 
-int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
-			     struct perf_sample *data)
+int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
+			struct perf_sample *data)
 {
 	u64 type = evsel->core.attr.sample_type;
 	bool swapped = evsel->needs_swap;
@@ -2136,7 +2104,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 		}
 	}
 
-	if (evsel__has_callchain(evsel)) {
+	if (type & PERF_SAMPLE_CALLCHAIN) {
 		const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
 
 		OVERFLOW_CHECK_u64(array);
@@ -2190,7 +2158,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 			return -EFAULT;
 
 		sz = data->branch_stack->nr * sizeof(struct branch_entry);
-		if (perf_evsel__has_branch_hw_idx(evsel))
+		if (evsel__has_branch_hw_idx(evsel))
 			sz += sizeof(u64);
 		else
 			data->no_hw_idx = true;
@@ -2298,9 +2266,8 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 	return 0;
 }
 
-int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
-				       union perf_event *event,
-				       u64 *timestamp)
+int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
+				  u64 *timestamp)
 {
 	u64 type = evsel->core.attr.sample_type;
 	const __u64 *array;
@@ -2342,15 +2309,14 @@ int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
 	return 0;
 }
 
-struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name)
+struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
 {
 	return tep_find_field(evsel->tp_format, name);
 }
 
-void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample,
-			 const char *name)
+void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name)
 {
-	struct tep_format_field *field = perf_evsel__field(evsel, name);
+	struct tep_format_field *field = evsel__field(evsel, name);
 	int offset;
 
 	if (!field)
@@ -2405,10 +2371,9 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
 	return 0;
 }
 
-u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
-		       const char *name)
+u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name)
 {
-	struct tep_format_field *field = perf_evsel__field(evsel, name);
+	struct tep_format_field *field = evsel__field(evsel, name);
 
 	if (!field)
 		return 0;
@@ -2416,8 +2381,7 @@ u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
 	return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
 }
 
-bool perf_evsel__fallback(struct evsel *evsel, int err,
-			  char *msg, size_t msgsize)
+bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize)
 {
 	int paranoid;
 
@@ -2442,10 +2406,14 @@ bool perf_evsel__fallback(struct evsel *evsel, int err,
 		return true;
 	} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
 		   (paranoid = perf_event_paranoid()) > 1) {
-		const char *name = perf_evsel__name(evsel);
+		const char *name = evsel__name(evsel);
 		char *new_name;
 		const char *sep = ":";
 
+		/* If event has exclude user then don't exclude kernel. */
+		if (evsel->core.attr.exclude_user)
+			return false;
+
 		/* Is there already the separator in the name. */
 		if (strchr(name, '/') ||
 		    strchr(name, ':'))
@@ -2505,8 +2473,8 @@ static bool find_process(const char *name)
 	return ret ? false : true;
 }
 
-int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
-			      int err, char *msg, size_t size)
+int evsel__open_strerror(struct evsel *evsel, struct target *target,
+			 int err, char *msg, size_t size)
 {
 	char sbuf[STRERR_BUFSIZE];
 	int printed = 0;
@@ -2516,28 +2484,26 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
 	case EACCES:
 		if (err == EPERM)
 			printed = scnprintf(msg, size,
-				"No permission to enable %s event.\n\n",
-				perf_evsel__name(evsel));
+				"No permission to enable %s event.\n\n", evsel__name(evsel));
 
 		return scnprintf(msg + printed, size - printed,
 		 "You may not have permission to collect %sstats.\n\n"
 		 "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
 		 "which controls use of the performance events system by\n"
-		 "unprivileged users (without CAP_SYS_ADMIN).\n\n"
+		 "unprivileged users (without CAP_PERFMON or CAP_SYS_ADMIN).\n\n"
 		 "The current value is %d:\n\n"
 		 "  -1: Allow use of (almost) all events by all users\n"
 		 "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
-		 ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n"
-		 "      Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n"
-		 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
-		 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
+		 ">= 0: Disallow ftrace function tracepoint by users without CAP_PERFMON or CAP_SYS_ADMIN\n"
+		 "      Disallow raw tracepoint access by users without CAP_SYS_PERFMON or CAP_SYS_ADMIN\n"
+		 ">= 1: Disallow CPU event access by users without CAP_PERFMON or CAP_SYS_ADMIN\n"
+		 ">= 2: Disallow kernel profiling by users without CAP_PERFMON or CAP_SYS_ADMIN\n\n"
 		 "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
 		 "	kernel.perf_event_paranoid = -1\n" ,
 				 target->system_wide ? "system-wide " : "",
 				 perf_event_paranoid());
 	case ENOENT:
-		return scnprintf(msg, size, "The %s event is not supported.",
-				 perf_evsel__name(evsel));
+		return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel));
 	case EMFILE:
 		return scnprintf(msg, size, "%s",
 			 "Too many events are opened.\n"
@@ -2561,7 +2527,7 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
 		if (evsel->core.attr.sample_period != 0)
 			return scnprintf(msg, size,
 	"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
-					 perf_evsel__name(evsel));
+					 evsel__name(evsel));
 		if (evsel->core.attr.precise_ip)
 			return scnprintf(msg, size, "%s",
 	"\'precise\' request may not be supported. Try removing 'p' modifier.");
@@ -2594,11 +2560,10 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
 	return scnprintf(msg, size,
 	"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
 	"/bin/dmesg | grep -i perf may provide additional information.\n",
-			 err, str_error_r(err, sbuf, sizeof(sbuf)),
-			 perf_evsel__name(evsel));
+			 err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel));
 }
 
-struct perf_env *perf_evsel__env(struct evsel *evsel)
+struct perf_env *evsel__env(struct evsel *evsel)
 {
 	if (evsel && evsel->evlist)
 		return evsel->evlist->env;
@@ -2623,7 +2588,7 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
 	return 0;
 }
 
-int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
+int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
 {
 	struct perf_cpu_map *cpus = evsel->core.cpus;
 	struct perf_thread_map *threads = evsel->core.threads;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 53187c501ee8..351c0aaf2a11 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -18,7 +18,7 @@ struct perf_counts;
 struct perf_stat_evsel;
 union perf_event;
 
-typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data);
+typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
 
 enum perf_tool_event {
 	PERF_TOOL_NONE		= 0,
@@ -101,9 +101,17 @@ struct evsel {
 	int			cpu_iter;
 	const char		*pmu_name;
 	struct {
-		perf_evsel__sb_cb_t	*cb;
-		void			*data;
+		evsel__sb_cb_t	*cb;
+		void		*data;
 	} side_band;
+	/*
+	 * For reporting purposes, an evsel sample can have a callchain
+	 * synthesized from AUX area data. Keep track of synthesized sample
+	 * types here. Note, the recorded sample_type cannot be changed because
+	 * it is needed to continue to parse events.
+	 * See also evsel__has_callchain().
+	 */
+	__u64			synth_sample_type;
 };
 
 struct perf_missing_features {
@@ -135,7 +143,7 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel)
 	return perf_evsel__cpus(&evsel->core);
 }
 
-static inline int perf_evsel__nr_cpus(struct evsel *evsel)
+static inline int evsel__nr_cpus(struct evsel *evsel)
 {
 	return evsel__cpus(evsel)->nr;
 }
@@ -143,13 +151,16 @@ static inline int perf_evsel__nr_cpus(struct evsel *evsel)
 void perf_counts_values__scale(struct perf_counts_values *count,
 			       bool scale, s8 *pscaled);
 
-void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
-				struct perf_counts_values *count);
+void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+			   struct perf_counts_values *count);
 
 int perf_evsel__object_config(size_t object_size,
 			      int (*init)(struct evsel *evsel),
 			      void (*fini)(struct evsel *evsel));
 
+struct perf_pmu *evsel__find_pmu(struct evsel *evsel);
+bool evsel__is_aux_event(struct evsel *evsel);
+
 struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx);
 
 static inline struct evsel *evsel__new(struct perf_event_attr *attr)
@@ -172,22 +183,20 @@ struct evsel *perf_evsel__new_cycles(bool precise);
 struct tep_event *event_format__new(const char *sys, const char *name);
 
 void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);
-void perf_evsel__exit(struct evsel *evsel);
+void evsel__exit(struct evsel *evsel);
 void evsel__delete(struct evsel *evsel);
 
 struct callchain_param;
 
-void perf_evsel__config(struct evsel *evsel,
-			struct record_opts *opts,
-			struct callchain_param *callchain);
-void perf_evsel__config_callchain(struct evsel *evsel,
-				  struct record_opts *opts,
-				  struct callchain_param *callchain);
+void evsel__config(struct evsel *evsel, struct record_opts *opts,
+		   struct callchain_param *callchain);
+void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+			     struct callchain_param *callchain);
 
-int __perf_evsel__sample_size(u64 sample_type);
-void perf_evsel__calc_id_pos(struct evsel *evsel);
+int __evsel__sample_size(u64 sample_type);
+void evsel__calc_id_pos(struct evsel *evsel);
 
-bool perf_evsel__is_cache_op_valid(u8 type, u8 op);
+bool evsel__is_cache_op_valid(u8 type, u8 op);
 
 #define PERF_EVSEL__MAX_ALIASES 8
 
@@ -199,177 +208,153 @@ extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
 					      [PERF_EVSEL__MAX_ALIASES];
 extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX];
 extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX];
-int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
-					    char *bf, size_t size);
-const char *perf_evsel__name(struct evsel *evsel);
+int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
+const char *evsel__name(struct evsel *evsel);
 
-const char *perf_evsel__group_name(struct evsel *evsel);
-int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
+const char *evsel__group_name(struct evsel *evsel);
+int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
 
-void __perf_evsel__set_sample_bit(struct evsel *evsel,
-				  enum perf_event_sample_format bit);
-void __perf_evsel__reset_sample_bit(struct evsel *evsel,
-				    enum perf_event_sample_format bit);
+void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit);
+void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit);
 
-#define perf_evsel__set_sample_bit(evsel, bit) \
-	__perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
+#define evsel__set_sample_bit(evsel, bit) \
+	__evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
 
-#define perf_evsel__reset_sample_bit(evsel, bit) \
-	__perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
+#define evsel__reset_sample_bit(evsel, bit) \
+	__evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
 
-void perf_evsel__set_sample_id(struct evsel *evsel,
-			       bool use_sample_identifier);
+void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
 
-int perf_evsel__set_filter(struct evsel *evsel, const char *filter);
-int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter);
-int perf_evsel__append_addr_filter(struct evsel *evsel,
-				   const char *filter);
+int evsel__set_filter(struct evsel *evsel, const char *filter);
+int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
+int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
 int evsel__enable_cpu(struct evsel *evsel, int cpu);
 int evsel__enable(struct evsel *evsel);
 int evsel__disable(struct evsel *evsel);
 int evsel__disable_cpu(struct evsel *evsel, int cpu);
 
-int perf_evsel__open_per_cpu(struct evsel *evsel,
-			     struct perf_cpu_map *cpus,
-			     int cpu);
-int perf_evsel__open_per_thread(struct evsel *evsel,
-				struct perf_thread_map *threads);
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu);
+int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
 int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
 		struct perf_thread_map *threads);
 void evsel__close(struct evsel *evsel);
 
 struct perf_sample;
 
-void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample,
-			 const char *name);
-u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
-		       const char *name);
+void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name);
+u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name);
 
-static inline char *perf_evsel__strval(struct evsel *evsel,
-				       struct perf_sample *sample,
-				       const char *name)
+static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sample, const char *name)
 {
-	return perf_evsel__rawptr(evsel, sample, name);
+	return evsel__rawptr(evsel, sample, name);
 }
 
 struct tep_format_field;
 
 u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap);
 
-struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name);
+struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
 
-#define perf_evsel__match(evsel, t, c)		\
+#define evsel__match(evsel, t, c)		\
 	(evsel->core.attr.type == PERF_TYPE_##t &&	\
 	 evsel->core.attr.config == PERF_COUNT_##c)
 
-static inline bool perf_evsel__match2(struct evsel *e1,
-				      struct evsel *e2)
+static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
 {
 	return (e1->core.attr.type == e2->core.attr.type) &&
 	       (e1->core.attr.config == e2->core.attr.config);
 }
 
-#define perf_evsel__cmp(a, b)			\
-	((a) &&					\
-	 (b) &&					\
-	 (a)->core.attr.type == (b)->core.attr.type &&	\
-	 (a)->core.attr.config == (b)->core.attr.config)
-
-int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread);
+int evsel__read_counter(struct evsel *evsel, int cpu, int thread);
 
-int __perf_evsel__read_on_cpu(struct evsel *evsel,
-			      int cpu, int thread, bool scale);
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale);
 
 /**
- * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
+ * evsel__read_on_cpu - Read out the results on a CPU and thread
  *
  * @evsel - event selector to read value
  * @cpu - CPU of interest
  * @thread - thread of interest
  */
-static inline int perf_evsel__read_on_cpu(struct evsel *evsel,
-					  int cpu, int thread)
+static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread)
 {
-	return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
+	return __evsel__read_on_cpu(evsel, cpu, thread, false);
 }
 
 /**
- * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
  *
  * @evsel - event selector to read value
  * @cpu - CPU of interest
  * @thread - thread of interest
  */
-static inline int perf_evsel__read_on_cpu_scaled(struct evsel *evsel,
-						 int cpu, int thread)
+static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread)
 {
-	return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
+	return __evsel__read_on_cpu(evsel, cpu, thread, true);
 }
 
-int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
-			     struct perf_sample *sample);
+int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
+			struct perf_sample *sample);
 
-int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
-				       union perf_event *event,
-				       u64 *timestamp);
+int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
+				  u64 *timestamp);
 
-static inline struct evsel *perf_evsel__next(struct evsel *evsel)
+static inline struct evsel *evsel__next(struct evsel *evsel)
 {
 	return list_entry(evsel->core.node.next, struct evsel, core.node);
 }
 
-static inline struct evsel *perf_evsel__prev(struct evsel *evsel)
+static inline struct evsel *evsel__prev(struct evsel *evsel)
 {
 	return list_entry(evsel->core.node.prev, struct evsel, core.node);
 }
 
 /**
- * perf_evsel__is_group_leader - Return whether given evsel is a leader event
+ * evsel__is_group_leader - Return whether given evsel is a leader event
  *
  * @evsel - evsel selector to be tested
  *
  * Return %true if @evsel is a group leader or a stand-alone event
  */
-static inline bool perf_evsel__is_group_leader(const struct evsel *evsel)
+static inline bool evsel__is_group_leader(const struct evsel *evsel)
 {
 	return evsel->leader == evsel;
 }
 
 /**
- * perf_evsel__is_group_event - Return whether given evsel is a group event
+ * evsel__is_group_event - Return whether given evsel is a group event
  *
  * @evsel - evsel selector to be tested
  *
  * Return %true iff event group view is enabled and @evsel is a actual group
  * leader which has other members in the group
  */
-static inline bool perf_evsel__is_group_event(struct evsel *evsel)
+static inline bool evsel__is_group_event(struct evsel *evsel)
 {
 	if (!symbol_conf.event_group)
 		return false;
 
-	return perf_evsel__is_group_leader(evsel) && evsel->core.nr_members > 1;
+	return evsel__is_group_leader(evsel) && evsel->core.nr_members > 1;
 }
 
-bool perf_evsel__is_function_event(struct evsel *evsel);
+bool evsel__is_function_event(struct evsel *evsel);
 
-static inline bool perf_evsel__is_bpf_output(struct evsel *evsel)
+static inline bool evsel__is_bpf_output(struct evsel *evsel)
 {
-	return perf_evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
+	return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
 }
 
-static inline bool perf_evsel__is_clock(struct evsel *evsel)
+static inline bool evsel__is_clock(struct evsel *evsel)
 {
-	return perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
-	       perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
+	return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
+	       evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
 }
 
-bool perf_evsel__fallback(struct evsel *evsel, int err,
-			  char *msg, size_t msgsize);
-int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
-			      int err, char *msg, size_t size);
+bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
+int evsel__open_strerror(struct evsel *evsel, struct target *target,
+			 int err, char *msg, size_t size);
 
-static inline int perf_evsel__group_idx(struct evsel *evsel)
+static inline int evsel__group_idx(struct evsel *evsel)
 {
 	return evsel->idx - evsel->leader->idx;
 }
@@ -386,22 +371,37 @@ for ((_evsel) = _leader; 							\
      (_evsel) && (_evsel)->leader == (_leader);					\
      (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
 
-static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel)
+static inline bool evsel__has_branch_callstack(const struct evsel *evsel)
 {
 	return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
 }
 
-static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel)
+static inline bool evsel__has_branch_hw_idx(const struct evsel *evsel)
 {
 	return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
 }
 
 static inline bool evsel__has_callchain(const struct evsel *evsel)
 {
-	return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
+	/*
+	 * For reporting purposes, an evsel sample can have a recorded callchain
+	 * or a callchain synthesized from AUX area data.
+	 */
+	return evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN ||
+	       evsel->synth_sample_type & PERF_SAMPLE_CALLCHAIN;
+}
+
+static inline bool evsel__has_br_stack(const struct evsel *evsel)
+{
+	/*
+	 * For reporting purposes, an evsel sample can have a recorded branch
+	 * stack or a branch stack synthesized from AUX area data.
+	 */
+	return evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK ||
+	       evsel->synth_sample_type & PERF_SAMPLE_BRANCH_STACK;
 }
 
-struct perf_env *perf_evsel__env(struct evsel *evsel);
+struct perf_env *evsel__env(struct evsel *evsel);
 
-int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
 #endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
index e026ab67b008..f8938916577c 100644
--- a/tools/perf/util/evsel_config.h
+++ b/tools/perf/util/evsel_config.h
@@ -7,7 +7,7 @@
 
 /*
  * The 'struct perf_evsel_config_term' is used to pass event
- * specific configuration data to perf_evsel__config routine.
+ * specific configuration data to evsel__config routine.
  * It is allocated within event parsing and attached to
  * perf_evsel::config_terms list head.
 */
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 3b4842840db0..99aed708bd5a 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -44,22 +44,22 @@ int perf_evsel__fprintf(struct evsel *evsel,
 	if (details->event_group) {
 		struct evsel *pos;
 
-		if (!perf_evsel__is_group_leader(evsel))
+		if (!evsel__is_group_leader(evsel))
 			return 0;
 
 		if (evsel->core.nr_members > 1)
 			printed += fprintf(fp, "%s{", evsel->group_name ?: "");
 
-		printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+		printed += fprintf(fp, "%s", evsel__name(evsel));
 		for_each_group_member(pos, evsel)
-			printed += fprintf(fp, ",%s", perf_evsel__name(pos));
+			printed += fprintf(fp, ",%s", evsel__name(pos));
 
 		if (evsel->core.nr_members > 1)
 			printed += fprintf(fp, "}");
 		goto out;
 	}
 
-	printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+	printed += fprintf(fp, "%s", evsel__name(evsel));
 
 	if (details->verbose) {
 		printed += perf_event_attr__fprintf(fp, &evsel->core.attr,
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index fd192ddf93c1..aa631e37ad1e 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -3,7 +3,6 @@
 #include <assert.h>
 #include "expr.h"
 #include "expr-bison.h"
-#define YY_EXTRA_TYPE int
 #include "expr-flex.h"
 
 #ifdef PARSER_DEBUG
@@ -11,7 +10,7 @@ extern int expr_debug;
 #endif
 
 /* Caller must make sure id is allocated */
-void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
+void expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val)
 {
 	int idx;
 
@@ -21,20 +20,24 @@ void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
 	ctx->ids[idx].val = val;
 }
 
-void expr__ctx_init(struct parse_ctx *ctx)
+void expr__ctx_init(struct expr_parse_ctx *ctx)
 {
 	ctx->num_ids = 0;
 }
 
 static int
-__expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
-	      int start)
+__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
+	      int start, int runtime)
 {
+	struct expr_scanner_ctx scanner_ctx = {
+		.start_token = start,
+		.runtime = runtime,
+	};
 	YY_BUFFER_STATE buffer;
 	void *scanner;
 	int ret;
 
-	ret = expr_lex_init_extra(start, &scanner);
+	ret = expr_lex_init_extra(&scanner_ctx, &scanner);
 	if (ret)
 		return ret;
 
@@ -52,9 +55,9 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
 	return ret;
 }
 
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr)
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime)
 {
-	return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0;
+	return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0;
 }
 
 static bool
@@ -72,13 +75,13 @@ already_seen(const char *val, const char *one, const char **other,
 }
 
 int expr__find_other(const char *expr, const char *one, const char ***other,
-		     int *num_other)
+		     int *num_other, int runtime)
 {
 	int err, i = 0, j = 0;
-	struct parse_ctx ctx;
+	struct expr_parse_ctx ctx;
 
 	expr__ctx_init(&ctx);
-	err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER);
+	err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER, runtime);
 	if (err)
 		return -1;
 
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 9377538f4097..87d627bb699b 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -5,20 +5,25 @@
 #define EXPR_MAX_OTHER 20
 #define MAX_PARSE_ID EXPR_MAX_OTHER
 
-struct parse_id {
+struct expr_parse_id {
 	const char *name;
 	double val;
 };
 
-struct parse_ctx {
+struct expr_parse_ctx {
 	int num_ids;
-	struct parse_id ids[MAX_PARSE_ID];
+	struct expr_parse_id ids[MAX_PARSE_ID];
 };
 
-void expr__ctx_init(struct parse_ctx *ctx);
-void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr);
+struct expr_scanner_ctx {
+	int start_token;
+	int runtime;
+};
+
+void expr__ctx_init(struct expr_parse_ctx *ctx);
+void expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime);
 int expr__find_other(const char *expr, const char *one, const char ***other,
-		int *num_other);
+		int *num_other, int runtime);
 
 #endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index eaad29243c23..74b9b59b1aa5 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -35,7 +35,7 @@ static int value(yyscan_t scanner, int base)
  * Allow @ instead of / to be able to specify pmu/event/ without
  * conflicts with normal division.
  */
-static char *normalize(char *str)
+static char *normalize(char *str, int runtime)
 {
 	char *ret = str;
 	char *dst = str;
@@ -45,6 +45,19 @@ static char *normalize(char *str)
 			*dst++ = '/';
 		else if (*str == '\\')
 			*dst++ = *++str;
+		 else if (*str == '?') {
+			char *paramval;
+			int i = 0;
+			int size = asprintf(&paramval, "%d", runtime);
+
+			if (size < 0)
+				*dst++ = '0';
+			else {
+				while (i < size)
+					*dst++ = paramval[i++];
+				free(paramval);
+			}
+		}
 		else
 			*dst++ = *str;
 		str++;
@@ -54,16 +67,16 @@ static char *normalize(char *str)
 	return ret;
 }
 
-static int str(yyscan_t scanner, int token)
+static int str(yyscan_t scanner, int token, int runtime)
 {
 	YYSTYPE *yylval = expr_get_lval(scanner);
 	char *text = expr_get_text(scanner);
 
-	yylval->str = normalize(strdup(text));
+	yylval->str = normalize(strdup(text), runtime);
 	if (!yylval->str)
 		return EXPR_ERROR;
 
-	yylval->str = normalize(yylval->str);
+	yylval->str = normalize(yylval->str, runtime);
 	return token;
 }
 %}
@@ -72,17 +85,17 @@ number		[0-9]+
 
 sch		[-,=]
 spec		\\{sch}
-sym		[0-9a-zA-Z_\.:@]+
-symbol		{spec}*{sym}*{spec}*{sym}*
+sym		[0-9a-zA-Z_\.:@?]+
+symbol		{spec}*{sym}*{spec}*{sym}*{spec}*{sym}
 
 %%
-	{
-		int start_token;
+	struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
 
-		start_token = expr_get_extra(yyscanner);
+	{
+		int start_token = sctx->start_token;
 
-		if (start_token) {
-			expr_set_extra(NULL, yyscanner);
+		if (sctx->start_token) {
+			sctx->start_token = 0;
 			return start_token;
 		}
 	}
@@ -93,7 +106,7 @@ if		{ return IF; }
 else		{ return ELSE; }
 #smt_on		{ return SMT_ON; }
 {number}	{ return value(yyscanner, 10); }
-{symbol}	{ return str(yyscanner, ID); }
+{symbol}	{ return str(yyscanner, ID, sctx->runtime); }
 "|"		{ return '|'; }
 "^"		{ return '^'; }
 "&"		{ return '&'; }
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index 4720cbe79357..cd17486c1c5d 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -15,7 +15,7 @@
 %define api.pure full
 
 %parse-param { double *final_val }
-%parse-param { struct parse_ctx *ctx }
+%parse-param { struct expr_parse_ctx *ctx }
 %parse-param {void *scanner}
 %lex-param {void* scanner}
 
@@ -39,14 +39,14 @@
 
 %{
 static void expr_error(double *final_val __maybe_unused,
-		       struct parse_ctx *ctx __maybe_unused,
+		       struct expr_parse_ctx *ctx __maybe_unused,
 		       void *scanner,
 		       const char *s)
 {
 	pr_debug("%s\n", s);
 }
 
-static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
+static int lookup_id(struct expr_parse_ctx *ctx, char *id, double *val)
 {
 	int i;
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index acbd046bf95c..0ce47283a8a1 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -525,7 +525,7 @@ static int write_event_desc(struct feat_fd *ff,
 		/*
 		 * write event string as passed on cmdline
 		 */
-		ret = do_write_string(ff, perf_evsel__name(evsel));
+		ret = do_write_string(ff, evsel__name(evsel));
 		if (ret < 0)
 			return ret;
 		/*
@@ -783,8 +783,7 @@ static int write_group_desc(struct feat_fd *ff,
 		return ret;
 
 	evlist__for_each_entry(evlist, evsel) {
-		if (perf_evsel__is_group_leader(evsel) &&
-		    evsel->core.nr_members > 1) {
+		if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) {
 			const char *name = evsel->group_name ?: "{anon_group}";
 			u32 leader_idx = evsel->idx;
 			u32 nr_members = evsel->core.nr_members;
@@ -1395,6 +1394,38 @@ static int write_compressed(struct feat_fd *ff __maybe_unused,
 	return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
 }
 
+static int write_cpu_pmu_caps(struct feat_fd *ff,
+			      struct evlist *evlist __maybe_unused)
+{
+	struct perf_pmu *cpu_pmu = perf_pmu__find("cpu");
+	struct perf_pmu_caps *caps = NULL;
+	int nr_caps;
+	int ret;
+
+	if (!cpu_pmu)
+		return -ENOENT;
+
+	nr_caps = perf_pmu__caps_parse(cpu_pmu);
+	if (nr_caps < 0)
+		return nr_caps;
+
+	ret = do_write(ff, &nr_caps, sizeof(nr_caps));
+	if (ret < 0)
+		return ret;
+
+	list_for_each_entry(caps, &cpu_pmu->caps, list) {
+		ret = do_write_string(ff, caps->name);
+		if (ret < 0)
+			return ret;
+
+		ret = do_write_string(ff, caps->value);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
 static void print_hostname(struct feat_fd *ff, FILE *fp)
 {
 	fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1809,6 +1840,27 @@ static void print_compressed(struct feat_fd *ff, FILE *fp)
 		ff->ph->env.comp_level, ff->ph->env.comp_ratio);
 }
 
+static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
+{
+	const char *delimiter = "# cpu pmu capabilities: ";
+	u32 nr_caps = ff->ph->env.nr_cpu_pmu_caps;
+	char *str;
+
+	if (!nr_caps) {
+		fprintf(fp, "# cpu pmu capabilities: not available\n");
+		return;
+	}
+
+	str = ff->ph->env.cpu_pmu_caps;
+	while (nr_caps--) {
+		fprintf(fp, "%s%s", delimiter, str);
+		delimiter = ", ";
+		str += strlen(str) + 1;
+	}
+
+	fprintf(fp, "\n");
+}
+
 static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
 {
 	const char *delimiter = "# pmu mappings: ";
@@ -1854,14 +1906,12 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp)
 	session = container_of(ff->ph, struct perf_session, header);
 
 	evlist__for_each_entry(session->evlist, evsel) {
-		if (perf_evsel__is_group_leader(evsel) &&
-		    evsel->core.nr_members > 1) {
-			fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
-				perf_evsel__name(evsel));
+		if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) {
+			fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", evsel__name(evsel));
 
 			nr = evsel->core.nr_members - 1;
 		} else if (nr) {
-			fprintf(fp, ",%s", perf_evsel__name(evsel));
+			fprintf(fp, ",%s", evsel__name(evsel));
 
 			if (--nr == 0)
 				fprintf(fp, "}\n");
@@ -2846,6 +2896,60 @@ static int process_compressed(struct feat_fd *ff,
 	return 0;
 }
 
+static int process_cpu_pmu_caps(struct feat_fd *ff,
+				void *data __maybe_unused)
+{
+	char *name, *value;
+	struct strbuf sb;
+	u32 nr_caps;
+
+	if (do_read_u32(ff, &nr_caps))
+		return -1;
+
+	if (!nr_caps) {
+		pr_debug("cpu pmu capabilities not available\n");
+		return 0;
+	}
+
+	ff->ph->env.nr_cpu_pmu_caps = nr_caps;
+
+	if (strbuf_init(&sb, 128) < 0)
+		return -1;
+
+	while (nr_caps--) {
+		name = do_read_string(ff);
+		if (!name)
+			goto error;
+
+		value = do_read_string(ff);
+		if (!value)
+			goto free_name;
+
+		if (strbuf_addf(&sb, "%s=%s", name, value) < 0)
+			goto free_value;
+
+		/* include a NULL character at the end */
+		if (strbuf_add(&sb, "", 1) < 0)
+			goto free_value;
+
+		if (!strcmp(name, "branches"))
+			ff->ph->env.max_branches = atoi(value);
+
+		free(value);
+		free(name);
+	}
+	ff->ph->env.cpu_pmu_caps = strbuf_detach(&sb, NULL);
+	return 0;
+
+free_value:
+	free(value);
+free_name:
+	free(name);
+error:
+	strbuf_release(&sb);
+	return -1;
+}
+
 #define FEAT_OPR(n, func, __full_only) \
 	[HEADER_##n] = {					\
 		.name	    = __stringify(n),			\
@@ -2903,6 +3007,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPR(BPF_PROG_INFO, bpf_prog_info,  false),
 	FEAT_OPR(BPF_BTF,       bpf_btf,        false),
 	FEAT_OPR(COMPRESSED,	compressed,	false),
+	FEAT_OPR(CPU_PMU_CAPS,	cpu_pmu_caps,	false),
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 840f95cee349..650bd1c7a99b 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -43,6 +43,7 @@ enum {
 	HEADER_BPF_PROG_INFO,
 	HEADER_BPF_BTF,
 	HEADER_COMPRESSED,
+	HEADER_CPU_PMU_CAPS,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 283a69ff6a3d..12b65d00cf65 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1070,6 +1070,20 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter,
 	return fill_callchain_info(al, node, iter->hide_unresolved);
 }
 
+static bool
+hist_entry__fast__sym_diff(struct hist_entry *left,
+			   struct hist_entry *right)
+{
+	struct symbol *sym_l = left->ms.sym;
+	struct symbol *sym_r = right->ms.sym;
+
+	if (!sym_l && !sym_r)
+		return left->ip != right->ip;
+
+	return !!_sort__sym_cmp(sym_l, sym_r);
+}
+
+
 static int
 iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
 			       struct addr_location *al)
@@ -1096,6 +1110,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
 	};
 	int i;
 	struct callchain_cursor cursor;
+	bool fast = hists__has(he_tmp.hists, sym);
 
 	callchain_cursor_snapshot(&cursor, &callchain_cursor);
 
@@ -1106,6 +1121,14 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
 	 * It's possible that it has cycles or recursive calls.
 	 */
 	for (i = 0; i < iter->curr; i++) {
+		/*
+		 * For most cases, there are no duplicate entries in callchain.
+		 * The symbols are usually different. Do a quick check for
+		 * symbols first.
+		 */
+		if (fast && hist_entry__fast__sym_diff(he_cache[i], &he_tmp))
+			continue;
+
 		if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
 			/* to avoid calling callback function */
 			iter->he = NULL;
@@ -2637,7 +2660,7 @@ size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp)
 	size_t ret = 0;
 
 	evlist__for_each_entry(evlist, pos) {
-		ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
+		ret += fprintf(fp, "%s stats:\n", evsel__name(pos));
 		ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp);
 	}
 
@@ -2661,7 +2684,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
 	unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
 	u64 nr_events = hists->stats.total_period;
 	struct evsel *evsel = hists_to_evsel(hists);
-	const char *ev_name = perf_evsel__name(evsel);
+	const char *ev_name = evsel__name(evsel);
 	char buf[512], sample_freq_str[64] = "";
 	size_t buflen = sizeof(buf);
 	char ref[30] = " show reference callgraph, ";
@@ -2672,10 +2695,10 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
 		nr_events = hists->stats.total_non_filtered_period;
 	}
 
-	if (perf_evsel__is_group_event(evsel)) {
+	if (evsel__is_group_event(evsel)) {
 		struct evsel *pos;
 
-		perf_evsel__group_desc(evsel, buf, buflen);
+		evsel__group_desc(evsel, buf, buflen);
 		ev_name = buf;
 
 		for_each_group_member(pos, evsel) {
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 34cb380d19a3..af1e78d76228 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -432,7 +432,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
 					    le64_to_cpu(branch->from),
 					    le64_to_cpu(branch->to),
 					    btsq->intel_pt_insn.length,
-					    buffer->buffer_nr + 1);
+					    buffer->buffer_nr + 1, true, 0, 0);
 		if (filter && !(filter & btsq->sample_flags))
 			continue;
 		err = intel_bts_synth_branch_sample(btsq, branch);
@@ -728,6 +728,15 @@ static void intel_bts_free(struct perf_session *session)
 	free(bts);
 }
 
+static bool intel_bts_evsel_is_auxtrace(struct perf_session *session,
+					struct evsel *evsel)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+
+	return evsel->core.attr.type == bts->pmu_type;
+}
+
 struct intel_bts_synth {
 	struct perf_tool dummy_tool;
 	struct perf_session *session;
@@ -816,10 +825,10 @@ static int intel_bts_synth_events(struct intel_bts *bts,
 		bts->branches_id = id;
 		/*
 		 * We only use sample types from PERF_SAMPLE_MASK so we can use
-		 * __perf_evsel__sample_size() here.
+		 * __evsel__sample_size() here.
 		 */
 		bts->branches_event_size = sizeof(struct perf_record_sample) +
-				__perf_evsel__sample_size(attr.sample_type);
+					   __evsel__sample_size(attr.sample_type);
 	}
 
 	return 0;
@@ -883,6 +892,7 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
 	bts->auxtrace.flush_events = intel_bts_flush;
 	bts->auxtrace.free_events = intel_bts_free_events;
 	bts->auxtrace.free = intel_bts_free;
+	bts->auxtrace.evsel_is_auxtrace = intel_bts_evsel_is_auxtrace;
 	session->auxtrace = &bts->auxtrace;
 
 	intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 0ccf10a0bf44..4ce109993e74 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -552,7 +552,7 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
 		break;
 	default:
 		break;
-	};
+	}
 
 	if (!(byte & BIT(0))) {
 		if (byte == 0)
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 23c8289c2472..f17b1e769ae4 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -33,6 +33,7 @@
 #include "tsc.h"
 #include "intel-pt.h"
 #include "config.h"
+#include "util/perf_api_probe.h"
 #include "util/synthetic-events.h"
 #include "time-utils.h"
 
@@ -68,6 +69,10 @@ struct intel_pt {
 	bool est_tsc;
 	bool sync_switch;
 	bool mispred_all;
+	bool use_thread_stack;
+	bool callstack;
+	unsigned int br_stack_sz;
+	unsigned int br_stack_sz_plus;
 	int have_sched_switch;
 	u32 pmu_type;
 	u64 kernel_start;
@@ -124,6 +129,9 @@ struct intel_pt {
 
 	struct range *time_ranges;
 	unsigned int range_cnt;
+
+	struct ip_callchain *chain;
+	struct branch_stack *br_stack;
 };
 
 enum switch_state {
@@ -143,8 +151,6 @@ struct intel_pt_queue {
 	const struct intel_pt_state *state;
 	struct ip_callchain *chain;
 	struct branch_stack *last_branch;
-	struct branch_stack *last_branch_rb;
-	size_t last_branch_pos;
 	union perf_event *event_buf;
 	bool on_heap;
 	bool stop;
@@ -868,6 +874,83 @@ static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
 		pt->tc.time_mult;
 }
 
+static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt)
+{
+	size_t sz = sizeof(struct ip_callchain);
+
+	/* Add 1 to callchain_sz for callchain context */
+	sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
+	return zalloc(sz);
+}
+
+static int intel_pt_callchain_init(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN))
+			evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN;
+	}
+
+	pt->chain = intel_pt_alloc_chain(pt);
+	if (!pt->chain)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void intel_pt_add_callchain(struct intel_pt *pt,
+				   struct perf_sample *sample)
+{
+	struct thread *thread = machine__findnew_thread(pt->machine,
+							sample->pid,
+							sample->tid);
+
+	thread_stack__sample_late(thread, sample->cpu, pt->chain,
+				  pt->synth_opts.callchain_sz + 1, sample->ip,
+				  pt->kernel_start);
+
+	sample->callchain = pt->chain;
+}
+
+static struct branch_stack *intel_pt_alloc_br_stack(struct intel_pt *pt)
+{
+	size_t sz = sizeof(struct branch_stack);
+
+	sz += pt->br_stack_sz * sizeof(struct branch_entry);
+	return zalloc(sz);
+}
+
+static int intel_pt_br_stack_init(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+			evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
+	}
+
+	pt->br_stack = intel_pt_alloc_br_stack(pt);
+	if (!pt->br_stack)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void intel_pt_add_br_stack(struct intel_pt *pt,
+				  struct perf_sample *sample)
+{
+	struct thread *thread = machine__findnew_thread(pt->machine,
+							sample->pid,
+							sample->tid);
+
+	thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
+				     pt->br_stack_sz, sample->ip,
+				     pt->kernel_start);
+
+	sample->branch_stack = pt->br_stack;
+}
+
 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 						   unsigned int queue_nr)
 {
@@ -880,26 +963,15 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 		return NULL;
 
 	if (pt->synth_opts.callchain) {
-		size_t sz = sizeof(struct ip_callchain);
-
-		/* Add 1 to callchain_sz for callchain context */
-		sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
-		ptq->chain = zalloc(sz);
+		ptq->chain = intel_pt_alloc_chain(pt);
 		if (!ptq->chain)
 			goto out_free;
 	}
 
 	if (pt->synth_opts.last_branch) {
-		size_t sz = sizeof(struct branch_stack);
-
-		sz += pt->synth_opts.last_branch_sz *
-		      sizeof(struct branch_entry);
-		ptq->last_branch = zalloc(sz);
+		ptq->last_branch = intel_pt_alloc_br_stack(pt);
 		if (!ptq->last_branch)
 			goto out_free;
-		ptq->last_branch_rb = zalloc(sz);
-		if (!ptq->last_branch_rb)
-			goto out_free;
 	}
 
 	ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
@@ -968,7 +1040,6 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 out_free:
 	zfree(&ptq->event_buf);
 	zfree(&ptq->last_branch);
-	zfree(&ptq->last_branch_rb);
 	zfree(&ptq->chain);
 	free(ptq);
 	return NULL;
@@ -984,7 +1055,6 @@ static void intel_pt_free_queue(void *priv)
 	intel_pt_decoder_free(ptq->decoder);
 	zfree(&ptq->event_buf);
 	zfree(&ptq->last_branch);
-	zfree(&ptq->last_branch_rb);
 	zfree(&ptq->chain);
 	free(ptq);
 }
@@ -1152,58 +1222,6 @@ static int intel_pt_setup_queues(struct intel_pt *pt)
 	return 0;
 }
 
-static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
-{
-	struct branch_stack *bs_src = ptq->last_branch_rb;
-	struct branch_stack *bs_dst = ptq->last_branch;
-	size_t nr = 0;
-
-	bs_dst->nr = bs_src->nr;
-
-	if (!bs_src->nr)
-		return;
-
-	nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
-	memcpy(&bs_dst->entries[0],
-	       &bs_src->entries[ptq->last_branch_pos],
-	       sizeof(struct branch_entry) * nr);
-
-	if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
-		memcpy(&bs_dst->entries[nr],
-		       &bs_src->entries[0],
-		       sizeof(struct branch_entry) * ptq->last_branch_pos);
-	}
-}
-
-static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
-{
-	ptq->last_branch_pos = 0;
-	ptq->last_branch_rb->nr = 0;
-}
-
-static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
-{
-	const struct intel_pt_state *state = ptq->state;
-	struct branch_stack *bs = ptq->last_branch_rb;
-	struct branch_entry *be;
-
-	if (!ptq->last_branch_pos)
-		ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
-
-	ptq->last_branch_pos -= 1;
-
-	be              = &bs->entries[ptq->last_branch_pos];
-	be->from        = state->from_ip;
-	be->to          = state->to_ip;
-	be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
-	be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
-	/* No support for mispredict */
-	be->flags.mispred = ptq->pt->mispred_all;
-
-	if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
-		bs->nr += 1;
-}
-
 static inline bool intel_pt_skip_event(struct intel_pt *pt)
 {
 	return pt->synth_opts.initial_skip &&
@@ -1271,9 +1289,9 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt,
 	return intel_pt_inject_event(event, sample, type);
 }
 
-static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
-					  union perf_event *event,
-					  struct perf_sample *sample, u64 type)
+static int intel_pt_deliver_synth_event(struct intel_pt *pt,
+					union perf_event *event,
+					struct perf_sample *sample, u64 type)
 {
 	int ret;
 
@@ -1333,8 +1351,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 		ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
 	}
 
-	return intel_pt_deliver_synth_b_event(pt, event, &sample,
-					      pt->branches_sample_type);
+	return intel_pt_deliver_synth_event(pt, event, &sample,
+					    pt->branches_sample_type);
 }
 
 static void intel_pt_prep_sample(struct intel_pt *pt,
@@ -1352,27 +1370,12 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
 	}
 
 	if (pt->synth_opts.last_branch) {
-		intel_pt_copy_last_branch_rb(ptq);
+		thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch,
+					pt->br_stack_sz);
 		sample->branch_stack = ptq->last_branch;
 	}
 }
 
-static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
-					       struct intel_pt_queue *ptq,
-					       union perf_event *event,
-					       struct perf_sample *sample,
-					       u64 type)
-{
-	int ret;
-
-	ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
-
-	if (pt->synth_opts.last_branch)
-		intel_pt_reset_last_branch_rb(ptq);
-
-	return ret;
-}
-
 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 {
 	struct intel_pt *pt = ptq->pt;
@@ -1397,7 +1400,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 
 	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
 
-	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+	return intel_pt_deliver_synth_event(pt, event, &sample,
 					    pt->instructions_sample_type);
 }
 
@@ -1415,7 +1418,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 	sample.id = ptq->pt->transactions_id;
 	sample.stream_id = ptq->pt->transactions_id;
 
-	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+	return intel_pt_deliver_synth_event(pt, event, &sample,
 					    pt->transactions_sample_type);
 }
 
@@ -1456,7 +1459,7 @@ static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
 	sample.raw_size = perf_synth__raw_size(raw);
 	sample.raw_data = perf_synth__raw_data(&raw);
 
-	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+	return intel_pt_deliver_synth_event(pt, event, &sample,
 					    pt->ptwrites_sample_type);
 }
 
@@ -1486,7 +1489,7 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
 	sample.raw_size = perf_synth__raw_size(raw);
 	sample.raw_data = perf_synth__raw_data(&raw);
 
-	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+	return intel_pt_deliver_synth_event(pt, event, &sample,
 					    pt->pwr_events_sample_type);
 }
 
@@ -1511,7 +1514,7 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
 	sample.raw_size = perf_synth__raw_size(raw);
 	sample.raw_data = perf_synth__raw_data(&raw);
 
-	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+	return intel_pt_deliver_synth_event(pt, event, &sample,
 					    pt->pwr_events_sample_type);
 }
 
@@ -1536,7 +1539,7 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
 	sample.raw_size = perf_synth__raw_size(raw);
 	sample.raw_data = perf_synth__raw_data(&raw);
 
-	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+	return intel_pt_deliver_synth_event(pt, event, &sample,
 					    pt->pwr_events_sample_type);
 }
 
@@ -1561,7 +1564,7 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
 	sample.raw_size = perf_synth__raw_size(raw);
 	sample.raw_data = perf_synth__raw_data(&raw);
 
-	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+	return intel_pt_deliver_synth_event(pt, event, &sample,
 					    pt->pwr_events_sample_type);
 }
 
@@ -1586,7 +1589,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
 	sample.raw_size = perf_synth__raw_size(raw);
 	sample.raw_data = perf_synth__raw_data(&raw);
 
-	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+	return intel_pt_deliver_synth_event(pt, event, &sample,
 					    pt->pwr_events_sample_type);
 }
 
@@ -1680,15 +1683,14 @@ static u64 intel_pt_lbr_flags(u64 info)
 	union {
 		struct branch_flags flags;
 		u64 result;
-	} u = {
-		.flags = {
-			.mispred	= !!(info & LBR_INFO_MISPRED),
-			.predicted	= !(info & LBR_INFO_MISPRED),
-			.in_tx		= !!(info & LBR_INFO_IN_TX),
-			.abort		= !!(info & LBR_INFO_ABORT),
-			.cycles		= info & LBR_INFO_CYCLES,
-		}
-	};
+	} u;
+
+	u.result	  = 0;
+	u.flags.mispred	  = !!(info & LBR_INFO_MISPRED);
+	u.flags.predicted = !(info & LBR_INFO_MISPRED);
+	u.flags.in_tx	  = !!(info & LBR_INFO_IN_TX);
+	u.flags.abort	  = !!(info & LBR_INFO_ABORT);
+	u.flags.cycles	  = info & LBR_INFO_CYCLES;
 
 	return u.result;
 }
@@ -1807,7 +1809,9 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
 			intel_pt_add_lbrs(&br.br_stack, items);
 			sample.branch_stack = &br.br_stack;
 		} else if (pt->synth_opts.last_branch) {
-			intel_pt_copy_last_branch_rb(ptq);
+			thread_stack__br_sample(ptq->thread, ptq->cpu,
+						ptq->last_branch,
+						pt->br_stack_sz);
 			sample.branch_stack = ptq->last_branch;
 		} else {
 			br.br_stack.nr = 0;
@@ -1842,7 +1846,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
 		sample.transaction = txn;
 	}
 
-	return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type);
+	return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
 }
 
 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
@@ -1992,12 +1996,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 	if (!(state->type & INTEL_PT_BRANCH))
 		return 0;
 
-	if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
-		thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
-				    state->to_ip, ptq->insn_len,
-				    state->trace_nr);
-	else
+	if (pt->use_thread_stack) {
+		thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
+				    state->from_ip, state->to_ip, ptq->insn_len,
+				    state->trace_nr, pt->callstack,
+				    pt->br_stack_sz_plus,
+				    pt->mispred_all);
+	} else {
 		thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
+	}
 
 	if (pt->sample_branches) {
 		err = intel_pt_synth_branch_sample(ptq);
@@ -2005,9 +2012,6 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 			return err;
 	}
 
-	if (pt->synth_opts.last_branch)
-		intel_pt_update_last_branch_rb(ptq);
-
 	if (!ptq->sync_switch)
 		return 0;
 
@@ -2484,7 +2488,7 @@ static int intel_pt_process_switch(struct intel_pt *pt,
 	if (evsel != pt->switch_evsel)
 		return 0;
 
-	tid = perf_evsel__intval(evsel, sample, "next_pid");
+	tid = evsel__intval(evsel, sample, "next_pid");
 	cpu = sample->cpu;
 
 	intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
@@ -2639,6 +2643,13 @@ static int intel_pt_process_event(struct perf_session *session,
 	if (err)
 		return err;
 
+	if (event->header.type == PERF_RECORD_SAMPLE) {
+		if (pt->synth_opts.add_callchain && !sample->callchain)
+			intel_pt_add_callchain(pt, sample);
+		if (pt->synth_opts.add_last_branch && !sample->branch_stack)
+			intel_pt_add_br_stack(pt, sample);
+	}
+
 	if (event->header.type == PERF_RECORD_AUX &&
 	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
 	    pt->synth_opts.errors) {
@@ -2710,11 +2721,21 @@ static void intel_pt_free(struct perf_session *session)
 	session->auxtrace = NULL;
 	thread__put(pt->unknown_thread);
 	addr_filters__exit(&pt->filts);
+	zfree(&pt->chain);
 	zfree(&pt->filter);
 	zfree(&pt->time_ranges);
 	free(pt);
 }
 
+static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
+				       struct evsel *evsel)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+
+	return evsel->core.attr.type == pt->pmu_type;
+}
+
 static int intel_pt_process_auxtrace_event(struct perf_session *session,
 					   union perf_event *event,
 					   struct perf_tool *tool __maybe_unused)
@@ -3016,7 +3037,7 @@ static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist)
 	struct evsel *evsel;
 
 	evlist__for_each_entry_reverse(evlist, evsel) {
-		const char *name = perf_evsel__name(evsel);
+		const char *name = evsel__name(evsel);
 
 		if (!strcmp(name, "sched:sched_switch"))
 			return evsel;
@@ -3310,6 +3331,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 	pt->auxtrace.flush_events = intel_pt_flush;
 	pt->auxtrace.free_events = intel_pt_free_events;
 	pt->auxtrace.free = intel_pt_free;
+	pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace;
 	session->auxtrace = &pt->auxtrace;
 
 	if (dump_trace)
@@ -3338,6 +3360,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 		    !session->itrace_synth_opts->inject) {
 			pt->synth_opts.branches = false;
 			pt->synth_opts.callchain = true;
+			pt->synth_opts.add_callchain = true;
 		}
 		pt->synth_opts.thread_stack =
 				session->itrace_synth_opts->thread_stack;
@@ -3370,14 +3393,54 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 		pt->branches_filter |= PERF_IP_FLAG_RETURN |
 				       PERF_IP_FLAG_TRACE_BEGIN;
 
-	if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
+	if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) &&
+	    !symbol_conf.use_callchain) {
 		symbol_conf.use_callchain = true;
 		if (callchain_register_param(&callchain_param) < 0) {
 			symbol_conf.use_callchain = false;
 			pt->synth_opts.callchain = false;
+			pt->synth_opts.add_callchain = false;
 		}
 	}
 
+	if (pt->synth_opts.add_callchain) {
+		err = intel_pt_callchain_init(pt);
+		if (err)
+			goto err_delete_thread;
+	}
+
+	if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
+		pt->br_stack_sz = pt->synth_opts.last_branch_sz;
+		pt->br_stack_sz_plus = pt->br_stack_sz;
+	}
+
+	if (pt->synth_opts.add_last_branch) {
+		err = intel_pt_br_stack_init(pt);
+		if (err)
+			goto err_delete_thread;
+		/*
+		 * Additional branch stack size to cater for tracing from the
+		 * actual sample ip to where the sample time is recorded.
+		 * Measured at about 200 branches, but generously set to 1024.
+		 * If kernel space is not being traced, then add just 1 for the
+		 * branch to kernel space.
+		 */
+		if (intel_pt_tracing_kernel(pt))
+			pt->br_stack_sz_plus += 1024;
+		else
+			pt->br_stack_sz_plus += 1;
+	}
+
+	pt->use_thread_stack = pt->synth_opts.callchain ||
+			       pt->synth_opts.add_callchain ||
+			       pt->synth_opts.thread_stack ||
+			       pt->synth_opts.last_branch ||
+			       pt->synth_opts.add_last_branch;
+
+	pt->callstack = pt->synth_opts.callchain ||
+			pt->synth_opts.add_callchain ||
+			pt->synth_opts.thread_stack;
+
 	err = intel_pt_synth_events(pt, session);
 	if (err)
 		goto err_delete_thread;
@@ -3400,6 +3463,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 	return 0;
 
 err_delete_thread:
+	zfree(&pt->chain);
 	thread__zput(pt->unknown_thread);
 err_free_queues:
 	intel_pt_log_disable();
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 09845eae9c03..8ed2135893bb 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2190,6 +2190,303 @@ static int remove_loops(struct branch_entry *l, int nr,
 	return nr;
 }
 
+static int lbr_callchain_add_kernel_ip(struct thread *thread,
+				       struct callchain_cursor *cursor,
+				       struct perf_sample *sample,
+				       struct symbol **parent,
+				       struct addr_location *root_al,
+				       u64 branch_from,
+				       bool callee, int end)
+{
+	struct ip_callchain *chain = sample->callchain;
+	u8 cpumode = PERF_RECORD_MISC_USER;
+	int err, i;
+
+	if (callee) {
+		for (i = 0; i < end + 1; i++) {
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, &cpumode, chain->ips[i],
+					       false, NULL, NULL, branch_from);
+			if (err)
+				return err;
+		}
+		return 0;
+	}
+
+	for (i = end; i >= 0; i--) {
+		err = add_callchain_ip(thread, cursor, parent,
+				       root_al, &cpumode, chain->ips[i],
+				       false, NULL, NULL, branch_from);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void save_lbr_cursor_node(struct thread *thread,
+				 struct callchain_cursor *cursor,
+				 int idx)
+{
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+
+	if (!lbr_stitch)
+		return;
+
+	if (cursor->pos == cursor->nr) {
+		lbr_stitch->prev_lbr_cursor[idx].valid = false;
+		return;
+	}
+
+	if (!cursor->curr)
+		cursor->curr = cursor->first;
+	else
+		cursor->curr = cursor->curr->next;
+	memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr,
+	       sizeof(struct callchain_cursor_node));
+
+	lbr_stitch->prev_lbr_cursor[idx].valid = true;
+	cursor->pos++;
+}
+
+static int lbr_callchain_add_lbr_ip(struct thread *thread,
+				    struct callchain_cursor *cursor,
+				    struct perf_sample *sample,
+				    struct symbol **parent,
+				    struct addr_location *root_al,
+				    u64 *branch_from,
+				    bool callee)
+{
+	struct branch_stack *lbr_stack = sample->branch_stack;
+	struct branch_entry *entries = perf_sample__branch_entries(sample);
+	u8 cpumode = PERF_RECORD_MISC_USER;
+	int lbr_nr = lbr_stack->nr;
+	struct branch_flags *flags;
+	int err, i;
+	u64 ip;
+
+	/*
+	 * The curr and pos are not used in writing session. They are cleared
+	 * in callchain_cursor_commit() when the writing session is closed.
+	 * Using curr and pos to track the current cursor node.
+	 */
+	if (thread->lbr_stitch) {
+		cursor->curr = NULL;
+		cursor->pos = cursor->nr;
+		if (cursor->nr) {
+			cursor->curr = cursor->first;
+			for (i = 0; i < (int)(cursor->nr - 1); i++)
+				cursor->curr = cursor->curr->next;
+		}
+	}
+
+	if (callee) {
+		/* Add LBR ip from first entries.to */
+		ip = entries[0].to;
+		flags = &entries[0].flags;
+		*branch_from = entries[0].from;
+		err = add_callchain_ip(thread, cursor, parent,
+				       root_al, &cpumode, ip,
+				       true, flags, NULL,
+				       *branch_from);
+		if (err)
+			return err;
+
+		/*
+		 * The number of cursor node increases.
+		 * Move the current cursor node.
+		 * But does not need to save current cursor node for entry 0.
+		 * It's impossible to stitch the whole LBRs of previous sample.
+		 */
+		if (thread->lbr_stitch && (cursor->pos != cursor->nr)) {
+			if (!cursor->curr)
+				cursor->curr = cursor->first;
+			else
+				cursor->curr = cursor->curr->next;
+			cursor->pos++;
+		}
+
+		/* Add LBR ip from entries.from one by one. */
+		for (i = 0; i < lbr_nr; i++) {
+			ip = entries[i].from;
+			flags = &entries[i].flags;
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, &cpumode, ip,
+					       true, flags, NULL,
+					       *branch_from);
+			if (err)
+				return err;
+			save_lbr_cursor_node(thread, cursor, i);
+		}
+		return 0;
+	}
+
+	/* Add LBR ip from entries.from one by one. */
+	for (i = lbr_nr - 1; i >= 0; i--) {
+		ip = entries[i].from;
+		flags = &entries[i].flags;
+		err = add_callchain_ip(thread, cursor, parent,
+				       root_al, &cpumode, ip,
+				       true, flags, NULL,
+				       *branch_from);
+		if (err)
+			return err;
+		save_lbr_cursor_node(thread, cursor, i);
+	}
+
+	/* Add LBR ip from first entries.to */
+	ip = entries[0].to;
+	flags = &entries[0].flags;
+	*branch_from = entries[0].from;
+	err = add_callchain_ip(thread, cursor, parent,
+			       root_al, &cpumode, ip,
+			       true, flags, NULL,
+			       *branch_from);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread,
+					     struct callchain_cursor *cursor)
+{
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+	struct callchain_cursor_node *cnode;
+	struct stitch_list *stitch_node;
+	int err;
+
+	list_for_each_entry(stitch_node, &lbr_stitch->lists, node) {
+		cnode = &stitch_node->cursor;
+
+		err = callchain_cursor_append(cursor, cnode->ip,
+					      &cnode->ms,
+					      cnode->branch,
+					      &cnode->branch_flags,
+					      cnode->nr_loop_iter,
+					      cnode->iter_cycles,
+					      cnode->branch_from,
+					      cnode->srcline);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static struct stitch_list *get_stitch_node(struct thread *thread)
+{
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+	struct stitch_list *stitch_node;
+
+	if (!list_empty(&lbr_stitch->free_lists)) {
+		stitch_node = list_first_entry(&lbr_stitch->free_lists,
+					       struct stitch_list, node);
+		list_del(&stitch_node->node);
+
+		return stitch_node;
+	}
+
+	return malloc(sizeof(struct stitch_list));
+}
+
+static bool has_stitched_lbr(struct thread *thread,
+			     struct perf_sample *cur,
+			     struct perf_sample *prev,
+			     unsigned int max_lbr,
+			     bool callee)
+{
+	struct branch_stack *cur_stack = cur->branch_stack;
+	struct branch_entry *cur_entries = perf_sample__branch_entries(cur);
+	struct branch_stack *prev_stack = prev->branch_stack;
+	struct branch_entry *prev_entries = perf_sample__branch_entries(prev);
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+	int i, j, nr_identical_branches = 0;
+	struct stitch_list *stitch_node;
+	u64 cur_base, distance;
+
+	if (!cur_stack || !prev_stack)
+		return false;
+
+	/* Find the physical index of the base-of-stack for current sample. */
+	cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1;
+
+	distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) :
+						     (max_lbr + prev_stack->hw_idx - cur_base);
+	/* Previous sample has shorter stack. Nothing can be stitched. */
+	if (distance + 1 > prev_stack->nr)
+		return false;
+
+	/*
+	 * Check if there are identical LBRs between two samples.
+	 * Identicall LBRs must have same from, to and flags values. Also,
+	 * they have to be saved in the same LBR registers (same physical
+	 * index).
+	 *
+	 * Starts from the base-of-stack of current sample.
+	 */
+	for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) {
+		if ((prev_entries[i].from != cur_entries[j].from) ||
+		    (prev_entries[i].to != cur_entries[j].to) ||
+		    (prev_entries[i].flags.value != cur_entries[j].flags.value))
+			break;
+		nr_identical_branches++;
+	}
+
+	if (!nr_identical_branches)
+		return false;
+
+	/*
+	 * Save the LBRs between the base-of-stack of previous sample
+	 * and the base-of-stack of current sample into lbr_stitch->lists.
+	 * These LBRs will be stitched later.
+	 */
+	for (i = prev_stack->nr - 1; i > (int)distance; i--) {
+
+		if (!lbr_stitch->prev_lbr_cursor[i].valid)
+			continue;
+
+		stitch_node = get_stitch_node(thread);
+		if (!stitch_node)
+			return false;
+
+		memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
+		       sizeof(struct callchain_cursor_node));
+
+		if (callee)
+			list_add(&stitch_node->node, &lbr_stitch->lists);
+		else
+			list_add_tail(&stitch_node->node, &lbr_stitch->lists);
+	}
+
+	return true;
+}
+
+static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
+{
+	if (thread->lbr_stitch)
+		return true;
+
+	thread->lbr_stitch = zalloc(sizeof(*thread->lbr_stitch));
+	if (!thread->lbr_stitch)
+		goto err;
+
+	thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node));
+	if (!thread->lbr_stitch->prev_lbr_cursor)
+		goto free_lbr_stitch;
+
+	INIT_LIST_HEAD(&thread->lbr_stitch->lists);
+	INIT_LIST_HEAD(&thread->lbr_stitch->free_lists);
+
+	return true;
+
+free_lbr_stitch:
+	zfree(&thread->lbr_stitch);
+err:
+	pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n");
+	thread->lbr_stitch_enable = false;
+	return false;
+}
+
 /*
  * Recolve LBR callstack chain sample
  * Return:
@@ -2202,12 +2499,16 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 					struct perf_sample *sample,
 					struct symbol **parent,
 					struct addr_location *root_al,
-					int max_stack)
+					int max_stack,
+					unsigned int max_lbr)
 {
+	bool callee = (callchain_param.order == ORDER_CALLEE);
 	struct ip_callchain *chain = sample->callchain;
 	int chain_nr = min(max_stack, (int)chain->nr), i;
-	u8 cpumode = PERF_RECORD_MISC_USER;
-	u64 ip, branch_from = 0;
+	struct lbr_stitch *lbr_stitch;
+	bool stitched_lbr = false;
+	u64 branch_from = 0;
+	int err;
 
 	for (i = 0; i < chain_nr; i++) {
 		if (chain->ips[i] == PERF_CONTEXT_USER)
@@ -2215,71 +2516,65 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 	}
 
 	/* LBR only affects the user callchain */
-	if (i != chain_nr) {
-		struct branch_stack *lbr_stack = sample->branch_stack;
-		struct branch_entry *entries = perf_sample__branch_entries(sample);
-		int lbr_nr = lbr_stack->nr, j, k;
-		bool branch;
-		struct branch_flags *flags;
-		/*
-		 * LBR callstack can only get user call chain.
-		 * The mix_chain_nr is kernel call chain
-		 * number plus LBR user call chain number.
-		 * i is kernel call chain number,
-		 * 1 is PERF_CONTEXT_USER,
-		 * lbr_nr + 1 is the user call chain number.
-		 * For details, please refer to the comments
-		 * in callchain__printf
-		 */
-		int mix_chain_nr = i + 1 + lbr_nr + 1;
+	if (i == chain_nr)
+		return 0;
 
-		for (j = 0; j < mix_chain_nr; j++) {
-			int err;
-			branch = false;
-			flags = NULL;
+	if (thread->lbr_stitch_enable && !sample->no_hw_idx &&
+	    (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) {
+		lbr_stitch = thread->lbr_stitch;
 
-			if (callchain_param.order == ORDER_CALLEE) {
-				if (j < i + 1)
-					ip = chain->ips[j];
-				else if (j > i + 1) {
-					k = j - i - 2;
-					ip = entries[k].from;
-					branch = true;
-					flags = &entries[k].flags;
-				} else {
-					ip = entries[0].to;
-					branch = true;
-					flags = &entries[0].flags;
-					branch_from = entries[0].from;
-				}
-			} else {
-				if (j < lbr_nr) {
-					k = lbr_nr - j - 1;
-					ip = entries[k].from;
-					branch = true;
-					flags = &entries[k].flags;
-				}
-				else if (j > lbr_nr)
-					ip = chain->ips[i + 1 - (j - lbr_nr)];
-				else {
-					ip = entries[0].to;
-					branch = true;
-					flags = &entries[0].flags;
-					branch_from = entries[0].from;
-				}
-			}
+		stitched_lbr = has_stitched_lbr(thread, sample,
+						&lbr_stitch->prev_sample,
+						max_lbr, callee);
 
-			err = add_callchain_ip(thread, cursor, parent,
-					       root_al, &cpumode, ip,
-					       branch, flags, NULL,
-					       branch_from);
+		if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
+			list_replace_init(&lbr_stitch->lists,
+					  &lbr_stitch->free_lists);
+		}
+		memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
+	}
+
+	if (callee) {
+		/* Add kernel ip */
+		err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+						  parent, root_al, branch_from,
+						  true, i);
+		if (err)
+			goto error;
+
+		err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+					       root_al, &branch_from, true);
+		if (err)
+			goto error;
+
+		if (stitched_lbr) {
+			err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
 			if (err)
-				return (err < 0) ? err : 0;
+				goto error;
 		}
-		return 1;
+
+	} else {
+		if (stitched_lbr) {
+			err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+			if (err)
+				goto error;
+		}
+		err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+					       root_al, &branch_from, false);
+		if (err)
+			goto error;
+
+		/* Add kernel ip */
+		err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+						  parent, root_al, branch_from,
+						  false, i);
+		if (err)
+			goto error;
 	}
+	return 1;
 
-	return 0;
+error:
+	return (err < 0) ? err : 0;
 }
 
 static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
@@ -2323,9 +2618,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	if (chain)
 		chain_nr = chain->nr;
 
-	if (perf_evsel__has_branch_callstack(evsel)) {
+	if (evsel__has_branch_callstack(evsel)) {
+		struct perf_env *env = evsel__env(evsel);
+
 		err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
-						   root_al, max_stack);
+						   root_al, max_stack,
+						   !env ? 0 : env->max_branches);
 		if (err)
 			return (err < 0) ? err : 0;
 	}
diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c
index 797d86a1ab09..c84f5841c7ab 100644
--- a/tools/perf/util/mem2node.c
+++ b/tools/perf/util/mem2node.c
@@ -1,5 +1,6 @@
 #include <errno.h>
 #include <inttypes.h>
+#include <asm/bug.h>
 #include <linux/bitmap.h>
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
@@ -95,7 +96,7 @@ int mem2node__init(struct mem2node *map, struct perf_env *env)
 
 	/* Cut unused entries, due to merging. */
 	tmp_entries = realloc(entries, sizeof(*entries) * j);
-	if (tmp_entries)
+	if (tmp_entries || WARN_ON_ONCE(j == 0))
 		entries = tmp_entries;
 
 	for (i = 0; i < j; i++) {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 926449a7cdbf..b071df373f8b 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -90,6 +90,7 @@ struct egroup {
 	const char *metric_name;
 	const char *metric_expr;
 	const char *metric_unit;
+	int runtime;
 };
 
 static struct evsel *find_evsel_group(struct evlist *perf_evlist,
@@ -202,6 +203,7 @@ static int metricgroup__setup_events(struct list_head *groups,
 		expr->metric_name = eg->metric_name;
 		expr->metric_unit = eg->metric_unit;
 		expr->metric_events = metric_events;
+		expr->runtime = eg->runtime;
 		list_add(&expr->nd, &me->head);
 	}
 
@@ -485,6 +487,45 @@ static bool metricgroup__has_constraint(struct pmu_event *pe)
 	return false;
 }
 
+int __weak arch_get_runtimeparam(void)
+{
+	return 1;
+}
+
+static int __metricgroup__add_metric(struct strbuf *events,
+		struct list_head *group_list, struct pmu_event *pe, int runtime)
+{
+
+	const char **ids;
+	int idnum;
+	struct egroup *eg;
+
+	if (expr__find_other(pe->metric_expr, NULL, &ids, &idnum, runtime) < 0)
+		return -EINVAL;
+
+	if (events->len > 0)
+		strbuf_addf(events, ",");
+
+	if (metricgroup__has_constraint(pe))
+		metricgroup__add_metric_non_group(events, ids, idnum);
+	else
+		metricgroup__add_metric_weak_group(events, ids, idnum);
+
+	eg = malloc(sizeof(*eg));
+	if (!eg)
+		return -ENOMEM;
+
+	eg->ids = ids;
+	eg->idnum = idnum;
+	eg->metric_name = pe->metric_name;
+	eg->metric_expr = pe->metric_expr;
+	eg->metric_unit = pe->unit;
+	eg->runtime = runtime;
+	list_add_tail(&eg->nd, group_list);
+
+	return 0;
+}
+
 static int metricgroup__add_metric(const char *metric, struct strbuf *events,
 				   struct list_head *group_list)
 {
@@ -504,35 +545,26 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
 			continue;
 		if (match_metric(pe->metric_group, metric) ||
 		    match_metric(pe->metric_name, metric)) {
-			const char **ids;
-			int idnum;
-			struct egroup *eg;
 
 			pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
 
-			if (expr__find_other(pe->metric_expr,
-					     NULL, &ids, &idnum) < 0)
-				continue;
-			if (events->len > 0)
-				strbuf_addf(events, ",");
+			if (!strstr(pe->metric_expr, "?")) {
+				ret = __metricgroup__add_metric(events, group_list, pe, 1);
+			} else {
+				int j, count;
 
-			if (metricgroup__has_constraint(pe))
-				metricgroup__add_metric_non_group(events, ids, idnum);
-			else
-				metricgroup__add_metric_weak_group(events, ids, idnum);
+				count = arch_get_runtimeparam();
 
-			eg = malloc(sizeof(struct egroup));
-			if (!eg) {
-				ret = -ENOMEM;
-				break;
+				/* This loop is added to create multiple
+				 * events depend on count value and add
+				 * those events to group_list.
+				 */
+
+				for (j = 0; j < count; j++)
+					ret = __metricgroup__add_metric(events, group_list, pe, j);
 			}
-			eg->ids = ids;
-			eg->idnum = idnum;
-			eg->metric_name = pe->metric_name;
-			eg->metric_expr = pe->metric_expr;
-			eg->metric_unit = pe->unit;
-			list_add_tail(&eg->nd, group_list);
-			ret = 0;
+			if (ret == -ENOMEM)
+				break;
 		}
 	}
 	return ret;
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 475c7f912864..6b09eb30b4ec 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -22,6 +22,7 @@ struct metric_expr {
 	const char *metric_name;
 	const char *metric_unit;
 	struct evsel **metric_events;
+	int runtime;
 };
 
 struct metric_event *metricgroup__lookup(struct rblist *metric_events,
@@ -34,4 +35,5 @@ int metricgroup__parse_groups(const struct option *opt,
 void metricgroup__print(bool metrics, bool groups, char *filter,
 			bool raw, bool details);
 bool metricgroup__has_metric(const char *metric);
+int arch_get_runtimeparam(void);
 #endif
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 359db2b1fcef..48c8f609441b 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -314,7 +314,7 @@ static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how,
 	case OE_FLUSH__NONE:
 	default:
 		break;
-	};
+	}
 
 	pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE  %s, nr_events %u\n",
 		   str[how], oe->nr_events);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 10107747b361..b7a0518d607d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -461,7 +461,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
 			cache_op = parse_aliases(str, perf_evsel__hw_cache_op,
 						 PERF_COUNT_HW_CACHE_OP_MAX);
 			if (cache_op >= 0) {
-				if (!perf_evsel__is_cache_op_valid(cache_type, cache_op))
+				if (!evsel__is_cache_op_valid(cache_type, cache_op))
 					return -EINVAL;
 				continue;
 			}
@@ -1482,6 +1482,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 
 		list_for_each_entry_safe(pos, tmp, &config_terms, list) {
 			list_del_init(&pos->list);
+			zfree(&pos->val.str);
 			free(pos);
 		}
 		return -EINVAL;
@@ -1870,7 +1871,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
 		evsel->precise_max         = mod.precise_max;
 		evsel->weak_group	   = mod.weak;
 
-		if (perf_evsel__is_group_leader(evsel))
+		if (evsel__is_group_leader(evsel))
 			evsel->core.attr.pinned = mod.pinned;
 	}
 
@@ -2190,6 +2191,29 @@ int parse_events_option(const struct option *opt, const char *str,
 	return ret;
 }
 
+int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset)
+{
+	struct evlist **evlistp = opt->value;
+	int ret;
+
+	if (*evlistp == NULL) {
+		*evlistp = evlist__new();
+
+		if (*evlistp == NULL) {
+			fprintf(stderr, "Not enough memory to create evlist\n");
+			return -1;
+		}
+	}
+
+	ret = parse_events_option(opt, str, unset);
+	if (ret) {
+		evlist__delete(*evlistp);
+		*evlistp = NULL;
+	}
+
+	return ret;
+}
+
 static int
 foreach_evsel_in_last_glob(struct evlist *evlist,
 			   int (*func)(struct evsel *evsel,
@@ -2237,7 +2261,7 @@ static int set_filter(struct evsel *evsel, const void *arg)
 	}
 
 	if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
-		if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+		if (evsel__append_tp_filter(evsel, str) < 0) {
 			fprintf(stderr,
 				"not enough memory to hold filter string\n");
 			return -1;
@@ -2262,7 +2286,7 @@ static int set_filter(struct evsel *evsel, const void *arg)
 		return -1;
 	}
 
-	if (perf_evsel__append_addr_filter(evsel, str) < 0) {
+	if (evsel__append_addr_filter(evsel, str) < 0) {
 		fprintf(stderr,
 			"not enough memory to hold filter string\n");
 		return -1;
@@ -2293,7 +2317,7 @@ static int add_exclude_perf_filter(struct evsel *evsel,
 
 	snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
 
-	if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) {
+	if (evsel__append_tp_filter(evsel, new_filter) < 0) {
 		fprintf(stderr,
 			"not enough memory to hold filter string\n");
 		return -1;
@@ -2603,12 +2627,11 @@ restart:
 	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
 		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
 			/* skip invalid cache type */
-			if (!perf_evsel__is_cache_op_valid(type, op))
+			if (!evsel__is_cache_op_valid(type, op))
 				continue;
 
 			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
-				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
-									name, sizeof(name));
+				__evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
 				if (event_glob != NULL && !strglobmatch(name, event_glob))
 					continue;
 
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 27596cbd0ba0..6ead9661238c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -31,6 +31,7 @@ bool have_tracepoints(struct list_head *evlist);
 const char *event_type(int type);
 
 int parse_events_option(const struct option *opt, const char *str, int unset);
+int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
 int parse_events(struct evlist *evlist, const char *str,
 		 struct parse_events_error *error);
 int parse_events_terms(struct list_head *terms, const char *str);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index baa48f28d57d..c589fc42f058 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -286,6 +286,7 @@ no-overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
 percore			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
 aux-output		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
 aux-sample-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
+r{num_raw_hex}		{ return raw(yyscanner); }
 ,			{ return ','; }
 "/"			{ BEGIN(INITIAL); return '/'; }
 {name_minus}		{ return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 94f8bcd83582..c4ca932d092d 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -44,7 +44,7 @@ static void free_list_evsel(struct list_head* list_evsel)
 
 	list_for_each_entry_safe(evsel, tmp, list_evsel, core.node) {
 		list_del_init(&evsel->core.node);
-		perf_evsel__delete(evsel);
+		evsel__delete(evsel);
 	}
 	free(list_evsel);
 }
@@ -326,6 +326,7 @@ PE_NAME opt_pmu_config
 	}
 	parse_events_terms__delete($2);
 	parse_events_terms__delete(orig_terms);
+	free(pattern);
 	free($1);
 	$$ = list;
 #undef CLEANUP_YYABORT
@@ -706,6 +707,15 @@ event_term
 }
 
 event_term:
+PE_RAW
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG,
+					NULL, $1, false, &@1, NULL));
+	$$ = term;
+}
+|
 PE_NAME '=' PE_NAME
 {
 	struct parse_events_term *term;
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
new file mode 100644
index 000000000000..1337965673d7
--- /dev/null
+++ b/tools/perf/util/perf_api_probe.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "perf-sys.h"
+#include "util/cloexec.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include "util/perf_api_probe.h"
+#include <perf/cpumap.h>
+#include <errno.h>
+
+typedef void (*setup_probe_fn_t)(struct evsel *evsel);
+
+static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+{
+	struct evlist *evlist;
+	struct evsel *evsel;
+	unsigned long flags = perf_event_open_cloexec_flag();
+	int err = -EAGAIN, fd;
+	static pid_t pid = -1;
+
+	evlist = evlist__new();
+	if (!evlist)
+		return -ENOMEM;
+
+	if (parse_events(evlist, str, NULL))
+		goto out_delete;
+
+	evsel = evlist__first(evlist);
+
+	while (1) {
+		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+		if (fd < 0) {
+			if (pid == -1 && errno == EACCES) {
+				pid = 0;
+				continue;
+			}
+			goto out_delete;
+		}
+		break;
+	}
+	close(fd);
+
+	fn(evsel);
+
+	fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+	if (fd < 0) {
+		if (errno == EINVAL)
+			err = -EINVAL;
+		goto out_delete;
+	}
+	close(fd);
+	err = 0;
+
+out_delete:
+	evlist__delete(evlist);
+	return err;
+}
+
+static bool perf_probe_api(setup_probe_fn_t fn)
+{
+	const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
+	struct perf_cpu_map *cpus;
+	int cpu, ret, i = 0;
+
+	cpus = perf_cpu_map__new(NULL);
+	if (!cpus)
+		return false;
+	cpu = cpus->map[0];
+	perf_cpu_map__put(cpus);
+
+	do {
+		ret = perf_do_probe_api(fn, cpu, try[i++]);
+		if (!ret)
+			return true;
+	} while (ret == -EAGAIN && try[i]);
+
+	return false;
+}
+
+static void perf_probe_sample_identifier(struct evsel *evsel)
+{
+	evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
+}
+
+static void perf_probe_comm_exec(struct evsel *evsel)
+{
+	evsel->core.attr.comm_exec = 1;
+}
+
+static void perf_probe_context_switch(struct evsel *evsel)
+{
+	evsel->core.attr.context_switch = 1;
+}
+
+bool perf_can_sample_identifier(void)
+{
+	return perf_probe_api(perf_probe_sample_identifier);
+}
+
+bool perf_can_comm_exec(void)
+{
+	return perf_probe_api(perf_probe_comm_exec);
+}
+
+bool perf_can_record_switch_events(void)
+{
+	return perf_probe_api(perf_probe_context_switch);
+}
+
+bool perf_can_record_cpu_wide(void)
+{
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_CPU_CLOCK,
+		.exclude_kernel = 1,
+	};
+	struct perf_cpu_map *cpus;
+	int cpu, fd;
+
+	cpus = perf_cpu_map__new(NULL);
+	if (!cpus)
+		return false;
+	cpu = cpus->map[0];
+	perf_cpu_map__put(cpus);
+
+	fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+	if (fd < 0)
+		return false;
+	close(fd);
+
+	return true;
+}
+
+/*
+ * Architectures are expected to know if AUX area sampling is supported by the
+ * hardware. Here we check for kernel support.
+ */
+bool perf_can_aux_sample(void)
+{
+	struct perf_event_attr attr = {
+		.size = sizeof(struct perf_event_attr),
+		.exclude_kernel = 1,
+		/*
+		 * Non-zero value causes the kernel to calculate the effective
+		 * attribute size up to that byte.
+		 */
+		.aux_sample_size = 1,
+	};
+	int fd;
+
+	fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+	/*
+	 * If the kernel attribute is big enough to contain aux_sample_size
+	 * then we assume that it is supported. We are relying on the kernel to
+	 * validate the attribute size before anything else that could be wrong.
+	 */
+	if (fd < 0 && errno == E2BIG)
+		return false;
+	if (fd >= 0)
+		close(fd);
+
+	return true;
+}
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
new file mode 100644
index 000000000000..706c3c6426e2
--- /dev/null
+++ b/tools/perf/util/perf_api_probe.h
@@ -0,0 +1,14 @@
+
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_API_PROBE_H
+#define __PERF_API_PROBE_H
+
+#include <stdbool.h>
+
+bool perf_can_aux_sample(void);
+bool perf_can_comm_exec(void);
+bool perf_can_record_cpu_wide(void);
+bool perf_can_record_switch_events(void);
+bool perf_can_sample_identifier(void);
+
+#endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index ef6a63f3d386..92bd7fafcce6 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -18,6 +18,7 @@
 #include <regex.h>
 #include <perf/cpumap.h>
 #include "debug.h"
+#include "evsel.h"
 #include "pmu.h"
 #include "parse-events.h"
 #include "header.h"
@@ -849,6 +850,7 @@ static struct perf_pmu *pmu_lookup(const char *name)
 
 	INIT_LIST_HEAD(&pmu->format);
 	INIT_LIST_HEAD(&pmu->aliases);
+	INIT_LIST_HEAD(&pmu->caps);
 	list_splice(&format, &pmu->format);
 	list_splice(&aliases, &pmu->aliases);
 	list_add_tail(&pmu->list, &pmus);
@@ -869,6 +871,17 @@ static struct perf_pmu *pmu_find(const char *name)
 	return NULL;
 }
 
+struct perf_pmu *perf_pmu__find_by_type(unsigned int type)
+{
+	struct perf_pmu *pmu;
+
+	list_for_each_entry(pmu, &pmus, list)
+		if (pmu->type == type)
+			return pmu;
+
+	return NULL;
+}
+
 struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
 {
 	/*
@@ -884,6 +897,25 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
 	return NULL;
 }
 
+struct perf_pmu *evsel__find_pmu(struct evsel *evsel)
+{
+	struct perf_pmu *pmu = NULL;
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		if (pmu->type == evsel->core.attr.type)
+			break;
+	}
+
+	return pmu;
+}
+
+bool evsel__is_aux_event(struct evsel *evsel)
+{
+	struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+	return pmu && pmu->auxtrace;
+}
+
 struct perf_pmu *perf_pmu__find(const char *name)
 {
 	struct perf_pmu *pmu;
@@ -1574,3 +1606,84 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
 	va_end(args);
 	return ret;
 }
+
+static int perf_pmu__new_caps(struct list_head *list, char *name, char *value)
+{
+	struct perf_pmu_caps *caps = zalloc(sizeof(*caps));
+
+	if (!caps)
+		return -ENOMEM;
+
+	caps->name = strdup(name);
+	if (!caps->name)
+		goto free_caps;
+	caps->value = strndup(value, strlen(value) - 1);
+	if (!caps->value)
+		goto free_name;
+	list_add_tail(&caps->list, list);
+	return 0;
+
+free_name:
+	zfree(caps->name);
+free_caps:
+	free(caps);
+
+	return -ENOMEM;
+}
+
+/*
+ * Reading/parsing the given pmu capabilities, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes.
+ * Return the number of capabilities
+ */
+int perf_pmu__caps_parse(struct perf_pmu *pmu)
+{
+	struct stat st;
+	char caps_path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+	DIR *caps_dir;
+	struct dirent *evt_ent;
+	int nr_caps = 0;
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(caps_path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name);
+
+	if (stat(caps_path, &st) < 0)
+		return 0;	/* no error if caps does not exist */
+
+	caps_dir = opendir(caps_path);
+	if (!caps_dir)
+		return -EINVAL;
+
+	while ((evt_ent = readdir(caps_dir)) != NULL) {
+		char path[PATH_MAX + NAME_MAX + 1];
+		char *name = evt_ent->d_name;
+		char value[128];
+		FILE *file;
+
+		if (!strcmp(name, ".") || !strcmp(name, ".."))
+			continue;
+
+		snprintf(path, sizeof(path), "%s/%s", caps_path, name);
+
+		file = fopen(path, "r");
+		if (!file)
+			continue;
+
+		if (!fgets(value, sizeof(value), file) ||
+		    (perf_pmu__new_caps(&pmu->caps, name, value) < 0)) {
+			fclose(file);
+			continue;
+		}
+
+		nr_caps++;
+		fclose(file);
+	}
+
+	closedir(caps_dir);
+
+	return nr_caps;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 5fb3f16828df..cb6fbec50313 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -22,6 +22,12 @@ enum {
 
 struct perf_event_attr;
 
+struct perf_pmu_caps {
+	char *name;
+	char *value;
+	struct list_head list;
+};
+
 struct perf_pmu {
 	char *name;
 	__u32 type;
@@ -33,6 +39,7 @@ struct perf_pmu {
 	struct perf_cpu_map *cpus;
 	struct list_head format;  /* HEAD struct perf_pmu_format -> list */
 	struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
+	struct list_head caps;    /* HEAD struct perf_pmu_caps -> list */
 	struct list_head list;    /* ELEM */
 };
 
@@ -65,6 +72,7 @@ struct perf_pmu_alias {
 };
 
 struct perf_pmu *perf_pmu__find(const char *name);
+struct perf_pmu *perf_pmu__find_by_type(unsigned int type);
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 		     struct list_head *head_terms,
 		     struct parse_events_error *error);
@@ -107,4 +115,6 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
 
 int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
 
+int perf_pmu__caps_parse(struct perf_pmu *pmu);
+
 #endif /* __PMU_H */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 83212c65848b..75a9b1d62bba 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -801,7 +801,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
 
 static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
 {
-	perf_evsel__exit(&pevsel->evsel);
+	evsel__exit(&pevsel->evsel);
 	Py_TYPE(pevsel)->tp_free((PyObject*)pevsel);
 }
 
@@ -1044,7 +1044,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 
 		pevent->evsel = evsel;
 
-		err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
+		err = evsel__parse_sample(evsel, event, &pevent->sample);
 
 		/* Consume the even only after we parsed it out. */
 		perf_mmap__consume(&md->core);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 7def66168503..a4cc11592f6b 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -10,161 +10,64 @@
 #include <subcmd/parse-options.h>
 #include <perf/cpumap.h>
 #include "cloexec.h"
+#include "util/perf_api_probe.h"
 #include "record.h"
 #include "../perf-sys.h"
 
-typedef void (*setup_probe_fn_t)(struct evsel *evsel);
-
-static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+/*
+ * evsel__config_leader_sampling() uses special rules for leader sampling.
+ * However, if the leader is an AUX area event, then assume the event to sample
+ * is the next event.
+ */
+static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist)
 {
-	struct evlist *evlist;
-	struct evsel *evsel;
-	unsigned long flags = perf_event_open_cloexec_flag();
-	int err = -EAGAIN, fd;
-	static pid_t pid = -1;
-
-	evlist = evlist__new();
-	if (!evlist)
-		return -ENOMEM;
-
-	if (parse_events(evlist, str, NULL))
-		goto out_delete;
-
-	evsel = evlist__first(evlist);
+	struct evsel *leader = evsel->leader;
 
-	while (1) {
-		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
-		if (fd < 0) {
-			if (pid == -1 && errno == EACCES) {
-				pid = 0;
-				continue;
-			}
-			goto out_delete;
+	if (evsel__is_aux_event(leader)) {
+		evlist__for_each_entry(evlist, evsel) {
+			if (evsel->leader == leader && evsel != evsel->leader)
+				return evsel;
 		}
-		break;
-	}
-	close(fd);
-
-	fn(evsel);
-
-	fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
-	if (fd < 0) {
-		if (errno == EINVAL)
-			err = -EINVAL;
-		goto out_delete;
 	}
-	close(fd);
-	err = 0;
-
-out_delete:
-	evlist__delete(evlist);
-	return err;
-}
-
-static bool perf_probe_api(setup_probe_fn_t fn)
-{
-	const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
-	struct perf_cpu_map *cpus;
-	int cpu, ret, i = 0;
-
-	cpus = perf_cpu_map__new(NULL);
-	if (!cpus)
-		return false;
-	cpu = cpus->map[0];
-	perf_cpu_map__put(cpus);
-
-	do {
-		ret = perf_do_probe_api(fn, cpu, try[i++]);
-		if (!ret)
-			return true;
-	} while (ret == -EAGAIN && try[i]);
 
-	return false;
+	return leader;
 }
 
-static void perf_probe_sample_identifier(struct evsel *evsel)
+static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
 {
-	evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
-}
-
-static void perf_probe_comm_exec(struct evsel *evsel)
-{
-	evsel->core.attr.comm_exec = 1;
-}
-
-static void perf_probe_context_switch(struct evsel *evsel)
-{
-	evsel->core.attr.context_switch = 1;
-}
-
-bool perf_can_sample_identifier(void)
-{
-	return perf_probe_api(perf_probe_sample_identifier);
-}
+	struct perf_event_attr *attr = &evsel->core.attr;
+	struct evsel *leader = evsel->leader;
+	struct evsel *read_sampler;
 
-static bool perf_can_comm_exec(void)
-{
-	return perf_probe_api(perf_probe_comm_exec);
-}
+	if (!leader->sample_read)
+		return;
 
-bool perf_can_record_switch_events(void)
-{
-	return perf_probe_api(perf_probe_context_switch);
-}
+	read_sampler = evsel__read_sampler(evsel, evlist);
 
-bool perf_can_record_cpu_wide(void)
-{
-	struct perf_event_attr attr = {
-		.type = PERF_TYPE_SOFTWARE,
-		.config = PERF_COUNT_SW_CPU_CLOCK,
-		.exclude_kernel = 1,
-	};
-	struct perf_cpu_map *cpus;
-	int cpu, fd;
-
-	cpus = perf_cpu_map__new(NULL);
-	if (!cpus)
-		return false;
-	cpu = cpus->map[0];
-	perf_cpu_map__put(cpus);
+	if (evsel == read_sampler)
+		return;
 
-	fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
-	if (fd < 0)
-		return false;
-	close(fd);
-
-	return true;
-}
-
-/*
- * Architectures are expected to know if AUX area sampling is supported by the
- * hardware. Here we check for kernel support.
- */
-bool perf_can_aux_sample(void)
-{
-	struct perf_event_attr attr = {
-		.size = sizeof(struct perf_event_attr),
-		.exclude_kernel = 1,
-		/*
-		 * Non-zero value causes the kernel to calculate the effective
-		 * attribute size up to that byte.
-		 */
-		.aux_sample_size = 1,
-	};
-	int fd;
-
-	fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
 	/*
-	 * If the kernel attribute is big enough to contain aux_sample_size
-	 * then we assume that it is supported. We are relying on the kernel to
-	 * validate the attribute size before anything else that could be wrong.
+	 * Disable sampling for all group members other than the leader in
+	 * case the leader 'leads' the sampling, except when the leader is an
+	 * AUX area event, in which case the 2nd event in the group is the one
+	 * that 'leads' the sampling.
 	 */
-	if (fd < 0 && errno == E2BIG)
-		return false;
-	if (fd >= 0)
-		close(fd);
+	attr->freq           = 0;
+	attr->sample_freq    = 0;
+	attr->sample_period  = 0;
+	attr->write_backward = 0;
 
-	return true;
+	/*
+	 * We don't get a sample for slave events, we make them when delivering
+	 * the group leader sample. Set the slave event to follow the master
+	 * sample_type to ease up reporting.
+	 * An AUX area event also has sample_type requirements, so also include
+	 * the sample type bits from the leader's sample_type to cover that
+	 * case.
+	 */
+	attr->sample_type = read_sampler->core.attr.sample_type |
+			    leader->core.attr.sample_type;
 }
 
 void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
@@ -188,11 +91,15 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
 	use_comm_exec = perf_can_comm_exec();
 
 	evlist__for_each_entry(evlist, evsel) {
-		perf_evsel__config(evsel, opts, callchain);
+		evsel__config(evsel, opts, callchain);
 		if (evsel->tracking && use_comm_exec)
 			evsel->core.attr.comm_exec = 1;
 	}
 
+	/* Configure leader sampling here now that the sample type is known */
+	evlist__for_each_entry(evlist, evsel)
+		evsel__config_leader_sampling(evsel, evlist);
+
 	if (opts->full_auxtrace) {
 		/*
 		 * Need to be able to synthesize and parse selected events with
@@ -215,7 +122,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
 
 	if (sample_id) {
 		evlist__for_each_entry(evlist, evsel)
-			perf_evsel__set_sample_id(evsel, use_sample_identifier);
+			evsel__set_sample_id(evsel, use_sample_identifier);
 	}
 
 	perf_evlist__set_id_pos(evlist);
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 24316458be20..923565c3b155 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -68,6 +68,7 @@ struct record_opts {
 	int	      affinity;
 	int	      mmap_flush;
 	unsigned int  comp_level;
+	unsigned int  nr_threads_synthesize;
 };
 
 extern const char * const *record_usage;
diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h
index d4356030b504..f55ca07f3ca1 100644
--- a/tools/perf/util/s390-cpumcf-kernel.h
+++ b/tools/perf/util/s390-cpumcf-kernel.h
@@ -11,6 +11,7 @@
 
 #define	S390_CPUMCF_DIAG_DEF	0xfeef	/* Counter diagnostic entry ID */
 #define	PERF_EVENT_CPUM_CF_DIAG	0xBC000	/* Event: Counter sets */
+#define PERF_EVENT_CPUM_SF_DIAG	0xBD000 /* Event: Combined-sampling */
 
 struct cf_ctrset_entry {	/* CPU-M CF counter set entry (8 byte) */
 	unsigned int def:16;	/* 0-15  Data Entry Format */
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 6785cd87aa4d..f8861998e5bd 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -1047,6 +1047,14 @@ static void s390_cpumsf_free(struct perf_session *session)
 	free(sf);
 }
 
+static bool
+s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+			      struct evsel *evsel)
+{
+	return evsel->core.attr.type == PERF_TYPE_RAW &&
+	       evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG;
+}
+
 static int s390_cpumsf_get_type(const char *cpuid)
 {
 	int ret, family = 0;
@@ -1071,7 +1079,8 @@ static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
 		itops->pwr_events || itops->errors ||
 		itops->dont_decode || itops->calls || itops->returns ||
 		itops->callchain || itops->thread_stack ||
-		itops->last_branch;
+		itops->last_branch || itops->add_callchain ||
+		itops->add_last_branch;
 	if (!ison)
 		return true;
 	pr_err("Unsupported --itrace options specified\n");
@@ -1142,6 +1151,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event,
 	sf->auxtrace.flush_events = s390_cpumsf_flush;
 	sf->auxtrace.free_events = s390_cpumsf_free_events;
 	sf->auxtrace.free = s390_cpumsf_free;
+	sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace;
 	session->auxtrace = &sf->auxtrace;
 
 	if (dump_trace)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 2c372cf5495e..739516fdf6e3 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -741,7 +741,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
 	if (!dict_sample)
 		Py_FatalError("couldn't create Python dictionary");
 
-	pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel)));
+	pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(evsel__name(evsel)));
 	pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->core.attr, sizeof(evsel->core.attr)));
 
 	pydict_set_item_string_decref(dict_sample, "pid",
@@ -968,7 +968,7 @@ static int python_export_evsel(struct db_export *dbe, struct evsel *evsel)
 	t = tuple_new(2);
 
 	tuple_set_u64(t, 0, evsel->db_id);
-	tuple_set_string(t, 1, perf_evsel__name(evsel));
+	tuple_set_string(t, 1, evsel__name(evsel));
 
 	call_object(tables->evsel_handler, t, "evsel_table");
 
@@ -1349,7 +1349,7 @@ static void get_handler_name(char *str, size_t size,
 {
 	char *p = str;
 
-	scnprintf(str, size, "stat__%s", perf_evsel__name(evsel));
+	scnprintf(str, size, "stat__%s", evsel__name(evsel));
 
 	while ((p = strchr(p, ':'))) {
 		*p = '_';
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0b0bfe5bef17..c11d89e0ee55 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1059,7 +1059,7 @@ static void callchain__printf(struct evsel *evsel,
 	unsigned int i;
 	struct ip_callchain *callchain = sample->callchain;
 
-	if (perf_evsel__has_branch_callstack(evsel))
+	if (evsel__has_branch_callstack(evsel))
 		callchain__lbr_callstack_printf(sample);
 
 	printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
@@ -1243,8 +1243,8 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
 	if (evsel__has_callchain(evsel))
 		callchain__printf(evsel, sample);
 
-	if (sample_type & PERF_SAMPLE_BRANCH_STACK)
-		branch_stack__printf(sample, perf_evsel__has_branch_callstack(evsel));
+	if (evsel__has_br_stack(evsel))
+		branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
 
 	if (sample_type & PERF_SAMPLE_REGS_USER)
 		regs_user__printf(sample);
@@ -1280,8 +1280,7 @@ static void dump_read(struct evsel *evsel, union perf_event *event)
 		return;
 
 	printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid,
-	       perf_evsel__name(evsel),
-	       event->read.value);
+	       evsel__name(evsel), event->read.value);
 
 	if (!evsel)
 		return;
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
new file mode 100644
index 000000000000..1580a3cbec2d
--- /dev/null
+++ b/tools/perf/util/sideband_evlist.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/mmap.h"
+#include "util/perf_api_probe.h"
+#include <perf/mmap.h>
+#include <linux/perf_event.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdbool.h>
+
+int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+			      evsel__sb_cb_t cb, void *data)
+{
+	struct evsel *evsel;
+
+	if (!attr->sample_id_all) {
+		pr_warning("enabling sample_id_all for all side band events\n");
+		attr->sample_id_all = 1;
+	}
+
+	evsel = perf_evsel__new_idx(attr, evlist->core.nr_entries);
+	if (!evsel)
+		return -1;
+
+	evsel->side_band.cb = cb;
+	evsel->side_band.data = data;
+	evlist__add(evlist, evsel);
+	return 0;
+}
+
+static void *perf_evlist__poll_thread(void *arg)
+{
+	struct evlist *evlist = arg;
+	bool draining = false;
+	int i, done = 0;
+	/*
+	 * In order to read symbols from other namespaces perf to needs to call
+	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
+	 * unshare(2) the fs so that we may continue to setns into namespaces
+	 * that we're observing when, for instance, reading the build-ids at
+	 * the end of a 'perf record' session.
+	 */
+	unshare(CLONE_FS);
+
+	while (!done) {
+		bool got_data = false;
+
+		if (evlist->thread.done)
+			draining = true;
+
+		if (!draining)
+			evlist__poll(evlist, 1000);
+
+		for (i = 0; i < evlist->core.nr_mmaps; i++) {
+			struct mmap *map = &evlist->mmap[i];
+			union perf_event *event;
+
+			if (perf_mmap__read_init(&map->core))
+				continue;
+			while ((event = perf_mmap__read_event(&map->core)) != NULL) {
+				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
+
+				if (evsel && evsel->side_band.cb)
+					evsel->side_band.cb(event, evsel->side_band.data);
+				else
+					pr_warning("cannot locate proper evsel for the side band event\n");
+
+				perf_mmap__consume(&map->core);
+				got_data = true;
+			}
+			perf_mmap__read_done(&map->core);
+		}
+
+		if (draining && !got_data)
+			break;
+	}
+	return NULL;
+}
+
+void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		evsel->core.attr.sample_id_all    = 1;
+		evsel->core.attr.watermark        = 1;
+		evsel->core.attr.wakeup_watermark = 1;
+		evsel->side_band.cb   = cb;
+		evsel->side_band.data = data;
+      }
+}
+
+int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target)
+{
+	struct evsel *counter;
+
+	if (!evlist)
+		return 0;
+
+	if (perf_evlist__create_maps(evlist, target))
+		goto out_delete_evlist;
+
+	if (evlist->core.nr_entries > 1) {
+		bool can_sample_identifier = perf_can_sample_identifier();
+
+		evlist__for_each_entry(evlist, counter)
+			evsel__set_sample_id(counter, can_sample_identifier);
+
+		perf_evlist__set_id_pos(evlist);
+	}
+
+	evlist__for_each_entry(evlist, counter) {
+		if (evsel__open(counter, evlist->core.cpus, evlist->core.threads) < 0)
+			goto out_delete_evlist;
+	}
+
+	if (evlist__mmap(evlist, UINT_MAX))
+		goto out_delete_evlist;
+
+	evlist__for_each_entry(evlist, counter) {
+		if (evsel__enable(counter))
+			goto out_delete_evlist;
+	}
+
+	evlist->thread.done = 0;
+	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
+		goto out_delete_evlist;
+
+	return 0;
+
+out_delete_evlist:
+	evlist__delete(evlist);
+	evlist = NULL;
+	return -1;
+}
+
+void perf_evlist__stop_sb_thread(struct evlist *evlist)
+{
+	if (!evlist)
+		return;
+	evlist->thread.done = 1;
+	pthread_join(evlist->thread.th, NULL);
+	evlist__delete(evlist);
+}
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 3b791ef2cd50..20bacd5972ad 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -15,6 +15,9 @@ int smt_on(void)
 	if (cached)
 		return cached_result;
 
+	if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0)
+		goto done;
+
 	ncpu = sysconf(_SC_NPROCESSORS_CONF);
 	for (cpu = 0; cpu < ncpu; cpu++) {
 		unsigned long long siblings;
@@ -24,13 +27,13 @@ int smt_on(void)
 
 		snprintf(fn, sizeof fn,
 			"devices/system/cpu/cpu%d/topology/core_cpus", cpu);
-		if (access(fn, F_OK) == -1) {
+		if (sysfs__read_str(fn, &str, &strlen) < 0) {
 			snprintf(fn, sizeof fn,
 				"devices/system/cpu/cpu%d/topology/thread_siblings",
 				cpu);
+			if (sysfs__read_str(fn, &str, &strlen) < 0)
+				continue;
 		}
-		if (sysfs__read_str(fn, &str, &strlen) < 0)
-			continue;
 		/* Entry is hex, but does not have 0x, so need custom parser */
 		siblings = strtoull(str, NULL, 16);
 		free(str);
@@ -42,6 +45,7 @@ int smt_on(void)
 	}
 	if (!cached) {
 		cached_result = 0;
+done:
 		cached = true;
 	}
 	return cached_result;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f14cc728c358..c1f8879f92cc 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -237,7 +237,7 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
 	return (int64_t)(right_ip - left_ip);
 }
 
-static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
+int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
 {
 	if (!sym_l || !sym_r)
 		return cmp_null(sym_l, sym_r);
@@ -300,8 +300,14 @@ static int _hist_entry__sym_snprintf(struct map_symbol *ms,
 
 	if (verbose > 0) {
 		char o = map ? dso__symtab_origin(map->dso) : '!';
+		u64 rip = ip;
+
+		if (map && map->dso && map->dso->kernel
+		    && map->dso->adjust_symbols)
+			rip = map->unmap_ip(map, ip);
+
 		ret += repsep_snprintf(bf, size, "%-#*llx %c ",
-				       BITS_PER_LONG / 4 + 2, ip, o);
+				       BITS_PER_LONG / 4 + 2, rip, o);
 	}
 
 	ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
@@ -2354,7 +2360,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name)
 
 		evsel = evlist__first(evlist);
 		while (--nr > 0)
-			evsel = perf_evsel__next(evsel);
+			evsel = evsel__next(evsel);
 
 		return evsel;
 	}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index cfa6ac6f7d06..66d39c4cfe2b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -311,5 +311,7 @@ int64_t
 sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t
 sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r);
 char *hist_entry__srcline(struct hist_entry *he);
 #endif	/* __PERF_SORT_H */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 9e757d18d713..3c6976f7574c 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -237,8 +237,6 @@ static bool valid_only_metric(const char *unit)
 	if (!unit)
 		return false;
 	if (strstr(unit, "/sec") ||
-	    strstr(unit, "hz") ||
-	    strstr(unit, "Hz") ||
 	    strstr(unit, "CPUs utilized"))
 		return false;
 	return true;
@@ -248,7 +246,7 @@ static const char *fixunit(char *buf, struct evsel *evsel,
 			   const char *unit)
 {
 	if (!strncmp(unit, "of all", 6)) {
-		snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+		snprintf(buf, 1024, "%s %s", evsel__name(evsel),
 			 unit);
 		return buf;
 	}
@@ -335,7 +333,7 @@ static int first_shadow_cpu(struct perf_stat_config *config,
 	if (config->aggr_mode == AGGR_GLOBAL)
 		return 0;
 
-	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+	for (i = 0; i < evsel__nr_cpus(evsel); i++) {
 		int cpu2 = evsel__cpus(evsel)->map[i];
 
 		if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id)
@@ -369,7 +367,7 @@ static void abs_printout(struct perf_stat_config *config,
 			config->csv_output ? 0 : config->unit_width,
 			evsel->unit, config->csv_sep);
 
-	fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel));
+	fprintf(output, "%-*s", config->csv_output ? 0 : 25, evsel__name(evsel));
 
 	print_cgroup(config, evsel);
 }
@@ -463,8 +461,7 @@ static void printout(struct perf_stat_config *config, int id, int nr,
 			counter->unit, config->csv_sep);
 
 		fprintf(config->output, "%*s",
-			config->csv_output ? 0 : -25,
-			perf_evsel__name(counter));
+			config->csv_output ? 0 : -25, evsel__name(counter));
 
 		print_cgroup(config, counter);
 
@@ -510,7 +507,7 @@ static void aggr_update_shadow(struct perf_stat_config *config,
 		id = config->aggr_map->map[s];
 		evlist__for_each_entry(evlist, counter) {
 			val = 0;
-			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+			for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
 				s2 = config->aggr_get_id(config, evlist->core.cpus, cpu);
 				if (s2 != id)
 					continue;
@@ -561,11 +558,11 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
 
 	alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
 	list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
-		if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
+		if (strcmp(evsel__name(alias), evsel__name(counter)) ||
 		    alias->scale != counter->scale ||
 		    alias->cgrp != counter->cgrp ||
 		    strcmp(alias->unit, counter->unit) ||
-		    perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter) ||
+		    evsel__is_clock(alias) != evsel__is_clock(counter) ||
 		    !strcmp(alias->pmu_name, counter->pmu_name))
 			break;
 		alias->merged_stat = true;
@@ -601,7 +598,7 @@ static void aggr_cb(struct perf_stat_config *config,
 	struct aggr_data *ad = data;
 	int cpu, s2;
 
-	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+	for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
 		struct perf_counts_values *counts;
 
 		s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
@@ -849,7 +846,7 @@ static void print_counter(struct perf_stat_config *config,
 	double uval;
 	int cpu;
 
-	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+	for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
 		struct aggr_data ad = { .cpu = cpu };
 
 		if (!collect_data(config, counter, counter_cb, &ad))
@@ -1150,7 +1147,7 @@ static void print_percore_thread(struct perf_stat_config *config,
 	int s, s2, id;
 	bool first = true;
 
-	for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
+	for (int i = 0; i < evsel__nr_cpus(counter); i++) {
 		s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
 		for (s = 0; s < config->aggr_map->nr; s++) {
 			id = config->aggr_map->map[s];
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 03ecb8cd0eec..129b8c5f2538 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -216,9 +216,9 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
 
 	count *= counter->scale;
 
-	if (perf_evsel__is_clock(counter))
+	if (evsel__is_clock(counter))
 		update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
-	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+	else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 		update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
 		update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
@@ -241,25 +241,25 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
 	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
 		update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
 				    ctx, cpu, count);
-	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 		update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
 				    ctx, cpu, count);
-	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
 		update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
 				    ctx, cpu, count);
-	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+	else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
 		update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
-	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+	else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
 		update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
 		update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
 		update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+	else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
 		update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+	else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
 		update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+	else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
 		update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, SMI_NUM))
 		update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
@@ -336,7 +336,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
 		metric_events = counter->metric_events;
 		if (!metric_events) {
 			if (expr__find_other(counter->metric_expr, counter->name,
-						&metric_names, &num_metric_names) < 0)
+						&metric_names, &num_metric_names, 1) < 0)
 				continue;
 
 			metric_events = calloc(sizeof(struct evsel *),
@@ -723,13 +723,14 @@ static void generic_metric(struct perf_stat_config *config,
 			   char *name,
 			   const char *metric_name,
 			   const char *metric_unit,
+			   int runtime,
 			   double avg,
 			   int cpu,
 			   struct perf_stat_output_ctx *out,
 			   struct runtime_stat *st)
 {
 	print_metric_t print_metric = out->print_metric;
-	struct parse_ctx pctx;
+	struct expr_parse_ctx pctx;
 	double ratio, scale;
 	int i;
 	void *ctxp = out->ctx;
@@ -777,7 +778,7 @@ static void generic_metric(struct perf_stat_config *config,
 	}
 
 	if (!metric_events[i]) {
-		if (expr__parse(&ratio, &pctx, metric_expr) == 0) {
+		if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) {
 			char *unit;
 			char metric_bf[64];
 
@@ -786,9 +787,13 @@ static void generic_metric(struct perf_stat_config *config,
 					&unit, &scale) >= 0) {
 					ratio *= scale;
 				}
-
-				scnprintf(metric_bf, sizeof(metric_bf),
+				if (strstr(metric_expr, "?"))
+					scnprintf(metric_bf, sizeof(metric_bf),
+					  "%s  %s_%d", unit, metric_name, runtime);
+				else
+					scnprintf(metric_bf, sizeof(metric_bf),
 					  "%s  %s", unit, metric_name);
+
 				print_metric(config, ctxp, NULL, "%8.1f",
 					     metric_bf, ratio);
 			} else {
@@ -828,7 +833,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 	struct metric_event *me;
 	int num = 1;
 
-	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+	if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
 		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 
 		if (total) {
@@ -853,7 +858,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 					"stalled cycles per insn",
 					ratio);
 		}
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+	} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
 		if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
 			print_branch_misses(config, cpu, evsel, avg, out, st);
 		else
@@ -908,7 +913,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 			print_ll_cache_misses(config, cpu, evsel, avg, out, st);
 		else
 			print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
+	} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
 		total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
 
 		if (total)
@@ -919,11 +924,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 				     "of all cache refs", ratio);
 		else
 			print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+	} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
 		print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+	} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
 		print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+	} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
 
 		if (total) {
@@ -974,7 +979,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 			ratio = total / avg;
 
 		print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
-	} else if (perf_evsel__is_clock(evsel)) {
+	} else if (evsel__is_clock(evsel)) {
 		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
 			print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
 				     avg / (ratio * evsel->scale));
@@ -1022,7 +1027,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 			print_metric(config, ctxp, NULL, NULL, name, 0);
 	} else if (evsel->metric_expr) {
 		generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
-				evsel->metric_name, NULL, avg, cpu, out, st);
+				evsel->metric_name, NULL, 1, avg, cpu, out, st);
 	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
 		char unit = 'M';
 		char unit_buf[10];
@@ -1051,7 +1056,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 				out->new_line(config, ctxp);
 			generic_metric(config, mexp->metric_expr, mexp->metric_events,
 					evsel->name, mexp->metric_name,
-					mexp->metric_unit, avg, cpu, out, st);
+					mexp->metric_unit, mexp->runtime, avg, cpu, out, st);
 		}
 	}
 	if (num == 0)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 5f26137b8d60..774468341851 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -108,7 +108,7 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
 	/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
 
 	for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
-		if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
+		if (!strcmp(evsel__name(evsel), id_str[i])) {
 			ps->id = i;
 			break;
 		}
@@ -173,7 +173,7 @@ static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel)
 
 static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
 {
-	int ncpus = perf_evsel__nr_cpus(evsel);
+	int ncpus = evsel__nr_cpus(evsel);
 	int nthreads = perf_thread_map__nr(evsel->core.threads);
 
 	if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
@@ -302,7 +302,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 	case AGGR_NODE:
 	case AGGR_NONE:
 		if (!evsel->snapshot)
-			perf_evsel__compute_deltas(evsel, cpu, thread, count);
+			evsel__compute_deltas(evsel, cpu, thread, count);
 		perf_counts_values__scale(count, config->scale, NULL);
 		if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
 			perf_stat__update_shadow_stats(evsel, count->val,
@@ -334,7 +334,7 @@ static int process_counter_maps(struct perf_stat_config *config,
 				struct evsel *counter)
 {
 	int nthreads = perf_thread_map__nr(counter->core.threads);
-	int ncpus = perf_evsel__nr_cpus(counter);
+	int ncpus = evsel__nr_cpus(counter);
 	int cpu, thread;
 
 	if (counter->core.system_wide)
@@ -368,8 +368,10 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	 * interval mode, otherwise overall avg running
 	 * averages will be shown for each interval.
 	 */
-	if (config->interval)
-		init_stats(ps->res_stats);
+	if (config->interval) {
+		for (i = 0; i < 3; i++)
+			init_stats(&ps->res_stats[i]);
+	}
 
 	if (counter->per_pkg)
 		zero_per_pkg(counter);
@@ -382,7 +384,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 		return 0;
 
 	if (!counter->snapshot)
-		perf_evsel__compute_deltas(counter, -1, -1, aggr);
+		evsel__compute_deltas(counter, -1, -1, aggr);
 	perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
 
 	for (i = 0; i < 3; i++)
@@ -390,7 +392,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 
 	if (verbose > 0) {
 		fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-			perf_evsel__name(counter), count[0], count[1], count[2]);
+			evsel__name(counter), count[0], count[1], count[2]);
 	}
 
 	/*
@@ -507,7 +509,7 @@ int create_perf_stat_counter(struct evsel *evsel,
 	 * either manually by us or by kernel via enable_on_exec
 	 * set later.
 	 */
-	if (perf_evsel__is_group_leader(evsel)) {
+	if (evsel__is_group_leader(evsel)) {
 		attr->disabled = 1;
 
 		/*
@@ -519,7 +521,7 @@ int create_perf_stat_counter(struct evsel *evsel,
 	}
 
 	if (target__has_cpu(target) && !target__has_per_thread(target))
-		return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
+		return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
 
-	return perf_evsel__open_per_thread(evsel, evsel->core.threads);
+	return evsel__open_per_thread(evsel, evsel->core.threads);
 }
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 8f4300492dc7..381da6b39f89 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -566,6 +566,20 @@ void dso__sort_by_name(struct dso *dso)
 	return symbols__sort_by_name(&dso->symbol_names, &dso->symbols);
 }
 
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+static int hex2u64(const char *ptr, u64 *long_val)
+{
+	char *p;
+
+	*long_val = strtoull(ptr, &p, 16);
+
+	return p - ptr;
+}
+
+
 int modules__parse(const char *filename, void *arg,
 		   int (*process_module)(void *arg, const char *name,
 					 u64 start, u64 size))
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index a661b122d9d8..89b390623b63 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -37,6 +37,7 @@
 #include <string.h>
 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
 #include <api/fs/fs.h>
+#include <api/io.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -71,7 +72,6 @@ int perf_tool__process_synth_event(struct perf_tool *tool,
 static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
 				    pid_t *tgid, pid_t *ppid)
 {
-	char filename[PATH_MAX];
 	char bf[4096];
 	int fd;
 	size_t size = 0;
@@ -81,11 +81,11 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
 	*tgid = -1;
 	*ppid = -1;
 
-	snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
+	snprintf(bf, sizeof(bf), "/proc/%d/status", pid);
 
-	fd = open(filename, O_RDONLY);
+	fd = open(bf, O_RDONLY);
 	if (fd < 0) {
-		pr_debug("couldn't open %s\n", filename);
+		pr_debug("couldn't open %s\n", bf);
 		return -1;
 	}
 
@@ -274,6 +274,79 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	return 0;
 }
 
+static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
+				u32 *prot, u32 *flags, __u64 *offset,
+				u32 *maj, u32 *min,
+				__u64 *inode,
+				ssize_t pathname_size, char *pathname)
+{
+	__u64 temp;
+	int ch;
+	char *start_pathname = pathname;
+
+	if (io__get_hex(io, start) != '-')
+		return false;
+	if (io__get_hex(io, end) != ' ')
+		return false;
+
+	/* map protection and flags bits */
+	*prot = 0;
+	ch = io__get_char(io);
+	if (ch == 'r')
+		*prot |= PROT_READ;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 'w')
+		*prot |= PROT_WRITE;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 'x')
+		*prot |= PROT_EXEC;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 's')
+		*flags = MAP_SHARED;
+	else if (ch == 'p')
+		*flags = MAP_PRIVATE;
+	else
+		return false;
+	if (io__get_char(io) != ' ')
+		return false;
+
+	if (io__get_hex(io, offset) != ' ')
+		return false;
+
+	if (io__get_hex(io, &temp) != ':')
+		return false;
+	*maj = temp;
+	if (io__get_hex(io, &temp) != ' ')
+		return false;
+	*min = temp;
+
+	ch = io__get_dec(io, inode);
+	if (ch != ' ') {
+		*pathname = '\0';
+		return ch == '\n';
+	}
+	do {
+		ch = io__get_char(io);
+	} while (ch == ' ');
+	while (true) {
+		if (ch < 0)
+			return false;
+		if (ch == '\0' || ch == '\n' ||
+		    (pathname + 1 - start_pathname) >= pathname_size) {
+			*pathname = '\0';
+			return true;
+		}
+		*pathname++ = ch;
+		ch = io__get_char(io);
+	}
+}
+
 int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       union perf_event *event,
 				       pid_t pid, pid_t tgid,
@@ -281,9 +354,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       struct machine *machine,
 				       bool mmap_data)
 {
-	char filename[PATH_MAX];
-	FILE *fp;
 	unsigned long long t;
+	char bf[BUFSIZ];
+	struct io io;
 	bool truncation = false;
 	unsigned long long timeout = proc_map_timeout * 1000000ULL;
 	int rc = 0;
@@ -293,59 +366,52 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 	if (machine__is_default_guest(machine))
 		return 0;
 
-	snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
-		 machine->root_dir, pid, pid);
+	snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps",
+		machine->root_dir, pid, pid);
 
-	fp = fopen(filename, "r");
-	if (fp == NULL) {
+	io.fd = open(bf, O_RDONLY, 0);
+	if (io.fd < 0) {
 		/*
 		 * We raced with a task exiting - just return:
 		 */
-		pr_debug("couldn't open %s\n", filename);
+		pr_debug("couldn't open %s\n", bf);
 		return -1;
 	}
+	io__init(&io, io.fd, bf, sizeof(bf));
 
 	event->header.type = PERF_RECORD_MMAP2;
 	t = rdclock();
 
-	while (1) {
-		char bf[BUFSIZ];
-		char prot[5];
-		char execname[PATH_MAX];
-		char anonstr[] = "//anon";
-		unsigned int ino;
+	while (!io.eof) {
+		static const char anonstr[] = "//anon";
 		size_t size;
-		ssize_t n;
 
-		if (fgets(bf, sizeof(bf), fp) == NULL)
-			break;
+		/* ensure null termination since stack will be reused. */
+		event->mmap2.filename[0] = '\0';
+
+		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+		if (!read_proc_maps_line(&io,
+					&event->mmap2.start,
+					&event->mmap2.len,
+					&event->mmap2.prot,
+					&event->mmap2.flags,
+					&event->mmap2.pgoff,
+					&event->mmap2.maj,
+					&event->mmap2.min,
+					&event->mmap2.ino,
+					sizeof(event->mmap2.filename),
+					event->mmap2.filename))
+			continue;
 
 		if ((rdclock() - t) > timeout) {
-			pr_warning("Reading %s time out. "
+			pr_warning("Reading %s/proc/%d/task/%d/maps time out. "
 				   "You may want to increase "
 				   "the time limit by --proc-map-timeout\n",
-				   filename);
+				   machine->root_dir, pid, pid);
 			truncation = true;
 			goto out;
 		}
 
-		/* ensure null termination since stack will be reused. */
-		strcpy(execname, "");
-
-		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n",
-		       &event->mmap2.start, &event->mmap2.len, prot,
-		       &event->mmap2.pgoff, &event->mmap2.maj,
-		       &event->mmap2.min,
-		       &ino, execname);
-
-		/*
- 		 * Anon maps don't have the execname.
- 		 */
-		if (n < 7)
-			continue;
-
-		event->mmap2.ino = (u64)ino;
 		event->mmap2.ino_generation = 0;
 
 		/*
@@ -356,23 +422,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		else
 			event->header.misc = PERF_RECORD_MISC_GUEST_USER;
 
-		/* map protection and flags bits */
-		event->mmap2.prot = 0;
-		event->mmap2.flags = 0;
-		if (prot[0] == 'r')
-			event->mmap2.prot |= PROT_READ;
-		if (prot[1] == 'w')
-			event->mmap2.prot |= PROT_WRITE;
-		if (prot[2] == 'x')
-			event->mmap2.prot |= PROT_EXEC;
-
-		if (prot[3] == 's')
-			event->mmap2.flags |= MAP_SHARED;
-		else
-			event->mmap2.flags |= MAP_PRIVATE;
-
-		if (prot[2] != 'x') {
-			if (!mmap_data || prot[0] != 'r')
+		if ((event->mmap2.prot & PROT_EXEC) == 0) {
+			if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0)
 				continue;
 
 			event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -382,17 +433,17 @@ out:
 		if (truncation)
 			event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
 
-		if (!strcmp(execname, ""))
-			strcpy(execname, anonstr);
+		if (!strcmp(event->mmap2.filename, ""))
+			strcpy(event->mmap2.filename, anonstr);
 
 		if (hugetlbfs_mnt_len &&
-		    !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
-			strcpy(execname, anonstr);
+		    !strncmp(event->mmap2.filename, hugetlbfs_mnt,
+			     hugetlbfs_mnt_len)) {
+			strcpy(event->mmap2.filename, anonstr);
 			event->mmap2.flags |= MAP_HUGETLB;
 		}
 
-		size = strlen(execname) + 1;
-		memcpy(event->mmap2.filename, execname, size);
+		size = strlen(event->mmap2.filename) + 1;
 		size = PERF_ALIGN(size, sizeof(u64));
 		event->mmap2.len -= event->mmap.start;
 		event->mmap2.header.size = (sizeof(event->mmap2) -
@@ -411,7 +462,7 @@ out:
 			break;
 	}
 
-	fclose(fp);
+	close(io.fd);
 	return rc;
 }
 
@@ -1130,7 +1181,7 @@ void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf
 		synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max);
 	default:
 		break;
-	};
+	}
 }
 
 static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map)
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 0885967d5bc3..1b992bbba4e8 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -80,6 +80,10 @@ struct thread_stack_entry {
  * @comm: current comm
  * @arr_sz: size of array if this is the first element of an array
  * @rstate: used to detect retpolines
+ * @br_stack_rb: branch stack (ring buffer)
+ * @br_stack_sz: maximum branch stack size
+ * @br_stack_pos: current position in @br_stack_rb
+ * @mispred_all: mark all branches as mispredicted
  */
 struct thread_stack {
 	struct thread_stack_entry *stack;
@@ -95,6 +99,10 @@ struct thread_stack {
 	struct comm *comm;
 	unsigned int arr_sz;
 	enum retpoline_state_t rstate;
+	struct branch_stack *br_stack_rb;
+	unsigned int br_stack_sz;
+	unsigned int br_stack_pos;
+	bool mispred_all;
 };
 
 /*
@@ -126,13 +134,26 @@ static int thread_stack__grow(struct thread_stack *ts)
 }
 
 static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
-			      struct call_return_processor *crp)
+			      struct call_return_processor *crp,
+			      bool callstack, unsigned int br_stack_sz)
 {
 	int err;
 
-	err = thread_stack__grow(ts);
-	if (err)
-		return err;
+	if (callstack) {
+		err = thread_stack__grow(ts);
+		if (err)
+			return err;
+	}
+
+	if (br_stack_sz) {
+		size_t sz = sizeof(struct branch_stack);
+
+		sz += br_stack_sz * sizeof(struct branch_entry);
+		ts->br_stack_rb = zalloc(sz);
+		if (!ts->br_stack_rb)
+			return -ENOMEM;
+		ts->br_stack_sz = br_stack_sz;
+	}
 
 	if (thread->maps && thread->maps->machine) {
 		struct machine *machine = thread->maps->machine;
@@ -150,7 +171,9 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
 }
 
 static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
-					      struct call_return_processor *crp)
+					      struct call_return_processor *crp,
+					      bool callstack,
+					      unsigned int br_stack_sz)
 {
 	struct thread_stack *ts = thread->ts, *new_ts;
 	unsigned int old_sz = ts ? ts->arr_sz : 0;
@@ -176,7 +199,7 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
 		ts += cpu;
 
 	if (!ts->stack &&
-	    thread_stack__init(ts, thread, crp))
+	    thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
 		return NULL;
 
 	return ts;
@@ -319,6 +342,9 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
 
 	if (!crp) {
 		ts->cnt = 0;
+		ts->br_stack_pos = 0;
+		if (ts->br_stack_rb)
+			ts->br_stack_rb->nr = 0;
 		return 0;
 	}
 
@@ -353,8 +379,33 @@ int thread_stack__flush(struct thread *thread)
 	return err;
 }
 
+static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
+					  u64 from_ip, u64 to_ip)
+{
+	struct branch_stack *bs = ts->br_stack_rb;
+	struct branch_entry *be;
+
+	if (!ts->br_stack_pos)
+		ts->br_stack_pos = ts->br_stack_sz;
+
+	ts->br_stack_pos -= 1;
+
+	be              = &bs->entries[ts->br_stack_pos];
+	be->from        = from_ip;
+	be->to          = to_ip;
+	be->flags.value = 0;
+	be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
+	be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
+	/* No support for mispredict */
+	be->flags.mispred = ts->mispred_all;
+
+	if (bs->nr < ts->br_stack_sz)
+		bs->nr += 1;
+}
+
 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
-			u64 to_ip, u16 insn_len, u64 trace_nr)
+			u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
+			unsigned int br_stack_sz, bool mispred_all)
 {
 	struct thread_stack *ts = thread__stack(thread, cpu);
 
@@ -362,12 +413,13 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
 		return -EINVAL;
 
 	if (!ts) {
-		ts = thread_stack__new(thread, cpu, NULL);
+		ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
 		if (!ts) {
 			pr_warning("Out of memory: no thread stack\n");
 			return -ENOMEM;
 		}
 		ts->trace_nr = trace_nr;
+		ts->mispred_all = mispred_all;
 	}
 
 	/*
@@ -381,8 +433,14 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
 		ts->trace_nr = trace_nr;
 	}
 
-	/* Stop here if thread_stack__process() is in use */
-	if (ts->crp)
+	if (br_stack_sz)
+		thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
+
+	/*
+	 * Stop here if thread_stack__process() is in use, or not recording call
+	 * stack.
+	 */
+	if (ts->crp || !callstack)
 		return 0;
 
 	if (flags & PERF_IP_FLAG_CALL) {
@@ -430,6 +488,7 @@ static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
 {
 	__thread_stack__flush(thread, ts);
 	zfree(&ts->stack);
+	zfree(&ts->br_stack_rb);
 }
 
 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
@@ -497,6 +556,199 @@ void thread_stack__sample(struct thread *thread, int cpu,
 	chain->nr = i;
 }
 
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the call chain.
+ */
+void thread_stack__sample_late(struct thread *thread, int cpu,
+			       struct ip_callchain *chain, size_t sz,
+			       u64 sample_ip, u64 kernel_start)
+{
+	struct thread_stack *ts = thread__stack(thread, cpu);
+	u64 sample_context = callchain_context(sample_ip, kernel_start);
+	u64 last_context, context, ip;
+	size_t nr = 0, j;
+
+	if (sz < 2) {
+		chain->nr = 0;
+		return;
+	}
+
+	if (!ts)
+		goto out;
+
+	/*
+	 * When tracing kernel space, kernel addresses occur at the top of the
+	 * call chain after the event occurred but before tracing stopped.
+	 * Skip them.
+	 */
+	for (j = 1; j <= ts->cnt; j++) {
+		ip = ts->stack[ts->cnt - j].ret_addr;
+		context = callchain_context(ip, kernel_start);
+		if (context == PERF_CONTEXT_USER ||
+		    (context == sample_context && ip == sample_ip))
+			break;
+	}
+
+	last_context = sample_ip; /* Use sample_ip as an invalid context */
+
+	for (; nr < sz && j <= ts->cnt; nr++, j++) {
+		ip = ts->stack[ts->cnt - j].ret_addr;
+		context = callchain_context(ip, kernel_start);
+		if (context != last_context) {
+			if (nr >= sz - 1)
+				break;
+			chain->ips[nr++] = context;
+			last_context = context;
+		}
+		chain->ips[nr] = ip;
+	}
+out:
+	if (nr) {
+		chain->nr = nr;
+	} else {
+		chain->ips[0] = sample_context;
+		chain->ips[1] = sample_ip;
+		chain->nr = 2;
+	}
+}
+
+void thread_stack__br_sample(struct thread *thread, int cpu,
+			     struct branch_stack *dst, unsigned int sz)
+{
+	struct thread_stack *ts = thread__stack(thread, cpu);
+	const size_t bsz = sizeof(struct branch_entry);
+	struct branch_stack *src;
+	struct branch_entry *be;
+	unsigned int nr;
+
+	dst->nr = 0;
+
+	if (!ts)
+		return;
+
+	src = ts->br_stack_rb;
+	if (!src->nr)
+		return;
+
+	dst->nr = min((unsigned int)src->nr, sz);
+
+	be = &dst->entries[0];
+	nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
+	memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
+
+	if (src->nr >= ts->br_stack_sz) {
+		sz -= nr;
+		be = &dst->entries[nr];
+		nr = min(ts->br_stack_pos, sz);
+		memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
+	}
+}
+
+/* Start of user space branch entries */
+static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
+{
+	if (!*start)
+		*start = be->to && be->to < kernel_start;
+
+	return *start;
+}
+
+/*
+ * Start of branch entries after the ip fell in between 2 branches, or user
+ * space branch entries.
+ */
+static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
+		     bool *start, struct branch_entry *nb)
+{
+	if (!*start) {
+		*start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
+			 be->from < kernel_start ||
+			 (be->to && be->to < kernel_start);
+	}
+
+	return *start;
+}
+
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the branch stack.
+ */
+void thread_stack__br_sample_late(struct thread *thread, int cpu,
+				  struct branch_stack *dst, unsigned int sz,
+				  u64 ip, u64 kernel_start)
+{
+	struct thread_stack *ts = thread__stack(thread, cpu);
+	struct branch_entry *d, *s, *spos, *ssz;
+	struct branch_stack *src;
+	unsigned int nr = 0;
+	bool start = false;
+
+	dst->nr = 0;
+
+	if (!ts)
+		return;
+
+	src = ts->br_stack_rb;
+	if (!src->nr)
+		return;
+
+	spos = &src->entries[ts->br_stack_pos];
+	ssz  = &src->entries[ts->br_stack_sz];
+
+	d = &dst->entries[0];
+	s = spos;
+
+	if (ip < kernel_start) {
+		/*
+		 * User space sample: start copying branch entries when the
+		 * branch is in user space.
+		 */
+		for (s = spos; s < ssz && nr < sz; s++) {
+			if (us_start(s, kernel_start, &start)) {
+				*d++ = *s;
+				nr += 1;
+			}
+		}
+
+		if (src->nr >= ts->br_stack_sz) {
+			for (s = &src->entries[0]; s < spos && nr < sz; s++) {
+				if (us_start(s, kernel_start, &start)) {
+					*d++ = *s;
+					nr += 1;
+				}
+			}
+		}
+	} else {
+		struct branch_entry *nb = NULL;
+
+		/*
+		 * Kernel space sample: start copying branch entries when the ip
+		 * falls in between 2 branches (or the branch is in user space
+		 * because then the start must have been missed).
+		 */
+		for (s = spos; s < ssz && nr < sz; s++) {
+			if (ks_start(s, ip, kernel_start, &start, nb)) {
+				*d++ = *s;
+				nr += 1;
+			}
+			nb = s;
+		}
+
+		if (src->nr >= ts->br_stack_sz) {
+			for (s = &src->entries[0]; s < spos && nr < sz; s++) {
+				if (ks_start(s, ip, kernel_start, &start, nb)) {
+					*d++ = *s;
+					nr += 1;
+				}
+				nb = s;
+			}
+		}
+	}
+
+	dst->nr = nr;
+}
+
 struct call_return_processor *
 call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
 			   void *data)
@@ -864,7 +1116,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
 	}
 
 	if (!ts) {
-		ts = thread_stack__new(thread, sample->cpu, crp);
+		ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
 		if (!ts)
 			return -ENOMEM;
 		ts->comm = comm;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index e1ec5a58f1b2..3bc47a42af8e 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -81,10 +81,19 @@ struct call_return_processor {
 };
 
 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
-			u64 to_ip, u16 insn_len, u64 trace_nr);
+			u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
+			unsigned int br_stack_sz, bool mispred_all);
 void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
 void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
 			  size_t sz, u64 ip, u64 kernel_start);
+void thread_stack__sample_late(struct thread *thread, int cpu,
+			       struct ip_callchain *chain, size_t sz, u64 ip,
+			       u64 kernel_start);
+void thread_stack__br_sample(struct thread *thread, int cpu,
+			     struct branch_stack *dst, unsigned int sz);
+void thread_stack__br_sample_late(struct thread *thread, int cpu,
+				  struct branch_stack *dst, unsigned int sz,
+				  u64 sample_ip, u64 kernel_start);
 int thread_stack__flush(struct thread *thread);
 void thread_stack__free(struct thread *thread);
 size_t thread_stack__depth(struct thread *thread, int cpu);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 28b719388028..665e5c0618ed 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -47,6 +47,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
 		thread->tid = tid;
 		thread->ppid = -1;
 		thread->cpu = -1;
+		thread->lbr_stitch_enable = false;
 		INIT_LIST_HEAD(&thread->namespaces_list);
 		INIT_LIST_HEAD(&thread->comm_list);
 		init_rwsem(&thread->namespaces_lock);
@@ -110,6 +111,7 @@ void thread__delete(struct thread *thread)
 
 	exit_rwsem(&thread->namespaces_lock);
 	exit_rwsem(&thread->comm_lock);
+	thread__free_stitch_list(thread);
 	free(thread);
 }
 
@@ -452,3 +454,25 @@ int thread__memcpy(struct thread *thread, struct machine *machine,
 
        return dso__data_read_offset(al.map->dso, machine, offset, buf, len);
 }
+
+void thread__free_stitch_list(struct thread *thread)
+{
+	struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+	struct stitch_list *pos, *tmp;
+
+	if (!lbr_stitch)
+		return;
+
+	list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) {
+		list_del_init(&pos->node);
+		free(pos);
+	}
+
+	list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) {
+		list_del_init(&pos->node);
+		free(pos);
+	}
+
+	zfree(&lbr_stitch->prev_lbr_cursor);
+	zfree(&thread->lbr_stitch);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 20b96b5d1f15..b066fb30d203 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -13,6 +13,8 @@
 #include <strlist.h>
 #include <intlist.h>
 #include "rwsem.h"
+#include "event.h"
+#include "callchain.h"
 
 struct addr_location;
 struct map;
@@ -20,6 +22,13 @@ struct perf_record_namespaces;
 struct thread_stack;
 struct unwind_libunwind_ops;
 
+struct lbr_stitch {
+	struct list_head		lists;
+	struct list_head		free_lists;
+	struct perf_sample		prev_sample;
+	struct callchain_cursor_node	*prev_lbr_cursor;
+};
+
 struct thread {
 	union {
 		struct rb_node	 rb_node;
@@ -46,6 +55,10 @@ struct thread {
 	struct srccode_state	srccode_state;
 	bool			filter;
 	int			filter_entry_depth;
+
+	/* LBR call stack stitch */
+	bool			lbr_stitch_enable;
+	struct lbr_stitch	*lbr_stitch;
 };
 
 struct machine;
@@ -142,4 +155,6 @@ static inline bool thread__is_filtered(struct thread *thread)
 	return false;
 }
 
+void thread__free_stitch_list(struct thread *thread);
+
 #endif	/* __PERF_THREAD_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 3dce2de9d005..27945eeb0cb5 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -77,7 +77,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
 				opts->freq ? "Hz" : "");
 	}
 
-	ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel));
+	ret += SNPRINTF(bf + ret, size - ret, "%s", evsel__name(top->sym_evsel));
 
 	ret += SNPRINTF(bf + ret, size - ret, "], ");
 
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index f117d4f4821e..ff8391208ecd 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -18,7 +18,7 @@ struct perf_session;
 
 struct perf_top {
 	struct perf_tool   tool;
-	struct evlist *evlist;
+	struct evlist *evlist, *sb_evlist;
 	struct record_opts record_opts;
 	struct annotation_options annotation_opts;
 	struct evswitch	   evswitch;
@@ -36,6 +36,7 @@ struct perf_top {
 	bool		   use_tui, use_stdio;
 	bool		   vmlinux_warned;
 	bool		   dump_symtab;
+	bool		   stitch_lbr;
 	struct hist_entry  *sym_filter_entry;
 	struct evsel 	   *sym_evsel;
 	struct perf_session *session;
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 8593d3c200c6..f507dff713c9 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -75,7 +75,7 @@ static void skip(int size)
 		r = size > BUFSIZ ? BUFSIZ : size;
 		do_read(buf, r);
 		size -= r;
-	};
+	}
 }
 
 static unsigned int read4(struct tep_handle *pevent)
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d707c9624dd9..37a9492edb3e 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -290,6 +290,7 @@ int perf_event_paranoid(void)
 bool perf_event_paranoid_check(int max_level)
 {
 	return perf_cap__capable(CAP_SYS_ADMIN) ||
+			perf_cap__capable(CAP_PERFMON) ||
 			perf_event_paranoid() <= max_level;
 }
 
diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c
index d3755ea70d4d..0ba61a2c4d81 100644
--- a/tools/power/cpupower/utils/cpupower-info.c
+++ b/tools/power/cpupower/utils/cpupower-info.c
@@ -62,7 +62,7 @@ int cmd_info(int argc, char **argv)
 		default:
 			print_wrong_arg_exit();
 		}
-	};
+	}
 
 	if (!params.params)
 		params.params = 0x7;
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index 3cca6f715dd9..052044d7e012 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -72,7 +72,7 @@ int cmd_set(int argc, char **argv)
 		default:
 			print_wrong_arg_exit();
 		}
-	};
+	}
 
 	if (!params.params)
 		print_wrong_arg_exit();
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
index 20f46348271b..5edd35bd9ee9 100644
--- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
@@ -117,7 +117,7 @@ static int amd_fam14h_get_pci_info(struct cstate *state,
 		break;
 	default:
 		return -1;
-	};
+	}
 	return 0;
 }
 
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index a65f7d011513..8b42c2f0a5b0 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -53,7 +53,7 @@ static int cpuidle_start(void)
 			dprint("CPU %d - State: %d - Val: %llu\n",
 			       cpu, state, previous_count[cpu][state]);
 		}
-	};
+	}
 	return 0;
 }
 
@@ -72,7 +72,7 @@ static int cpuidle_stop(void)
 			dprint("CPU %d - State: %d - Val: %llu\n",
 			       cpu, state, previous_count[cpu][state]);
 		}
-	};
+	}
 	return 0;
 }
 
@@ -172,7 +172,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
 		cpuidle_cstates[num].id = num;
 		cpuidle_cstates[num].get_count_percent =
 			cpuidle_get_count_percent;
-	};
+	}
 
 	/* Free this at program termination */
 	previous_count = malloc(sizeof(long long *) * cpu_count);
diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
index 97ad3233a521..55e55b6b42f9 100644
--- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
@@ -79,7 +79,7 @@ static int hsw_ext_get_count(enum intel_hsw_ext_id id, unsigned long long *val,
 		break;
 	default:
 		return -1;
-	};
+	}
 	if (read_msr(cpu, msr, val))
 		return -1;
 	return 0;
diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
index 114271165182..16eaf006f61f 100644
--- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
@@ -91,7 +91,7 @@ static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val,
 		break;
 	default:
 		return -1;
-	};
+	}
 	if (read_msr(cpu, msr, val))
 		return -1;
 
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
index df8b223cc096..811d63ab17a7 100644
--- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -77,7 +77,7 @@ static int snb_get_count(enum intel_snb_id id, unsigned long long *val,
 		break;
 	default:
 		return -1;
-	};
+	}
 	if (read_msr(cpu, msr, val))
 		return -1;
 	return 0;
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
index 4f80ad7d7275..962e5768681c 100644
--- a/tools/power/pm-graph/config/custom-timeline-functions.cfg
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -125,7 +125,7 @@ acpi_suspend_begin:
 suspend_console:
 acpi_pm_prepare:
 syscore_suspend:
-arch_enable_nonboot_cpus_end:
+arch_thaw_secondary_cpus_end:
 syscore_resume:
 acpi_pm_finish:
 resume_console:
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 9b0404d10768..602e64b68ba7 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -198,7 +198,7 @@ class SystemValues:
 		'suspend_console': {},
 		'acpi_pm_prepare': {},
 		'syscore_suspend': {},
-		'arch_enable_nonboot_cpus_end': {},
+		'arch_thaw_secondary_cpus_end': {},
 		'syscore_resume': {},
 		'acpi_pm_finish': {},
 		'resume_console': {},
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index b73763489410..9f68f51ca652 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
 	int arg;
 };
 
-static const char *version_str = "v1.3";
+static const char *version_str = "v1.4";
 static const int supported_api_ver = 1;
 static struct isst_if_platform_info isst_platform_info;
 static char *progname;
@@ -25,7 +25,7 @@ static FILE *outf;
 static int cpu_model;
 static int cpu_stepping;
 
-#define MAX_CPUS_IN_ONE_REQ 64
+#define MAX_CPUS_IN_ONE_REQ 256
 static short max_target_cpus;
 static unsigned short target_cpus[MAX_CPUS_IN_ONE_REQ];
 
@@ -653,7 +653,7 @@ void set_cpu_mask_from_punit_coremask(int cpu, unsigned long long core_mask,
 	pkg_id = get_physical_package_id(cpu);
 
 	for (i = 0; i < 64; ++i) {
-		if (core_mask & BIT(i)) {
+		if (core_mask & BIT_ULL(i)) {
 			int j;
 
 			for (j = 0; j < topo_max_cpus; ++j) {
@@ -1169,6 +1169,7 @@ static void dump_clx_n_config_for_cpu(int cpu, void *arg1, void *arg2,
 
 		ctdp_level = &clx_n_pkg_dev.ctdp_level[0];
 		pbf_info = &ctdp_level->pbf_info;
+		clx_n_pkg_dev.processed = 1;
 		isst_ctdp_display_information(cpu, outf, tdp_level, &clx_n_pkg_dev);
 		free_cpu_set(ctdp_level->core_cpumask);
 		free_cpu_set(pbf_info->core_cpumask);
@@ -1631,6 +1632,8 @@ static int set_pbf_core_power(int cpu)
 static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 			    void *arg4)
 {
+	struct isst_pkg_ctdp_level_info ctdp_level;
+	struct isst_pkg_ctdp pkg_dev;
 	int ret;
 	int status = *(int *)arg4;
 
@@ -1646,6 +1649,24 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 		goto disp_result;
 	}
 
+	ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+	if (ret) {
+		isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
+		goto disp_result;
+	}
+
+	ret = isst_get_ctdp_control(cpu, pkg_dev.current_level, &ctdp_level);
+	if (ret) {
+		isst_display_error_info_message(1, "Failed to get current level", 0, 0);
+		goto disp_result;
+	}
+
+	if (!ctdp_level.pbf_support) {
+		isst_display_error_info_message(1, "base-freq feature is not present at this level", 1, pkg_dev.current_level);
+		ret = -1;
+		goto disp_result;
+	}
+
 	if (auto_mode && status) {
 		ret = set_pbf_core_power(cpu);
 		if (ret)
@@ -1772,10 +1793,30 @@ static void dump_fact_config(int arg)
 static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
 			     void *arg4)
 {
+	struct isst_pkg_ctdp_level_info ctdp_level;
+	struct isst_pkg_ctdp pkg_dev;
 	int ret;
 	int status = *(int *)arg4;
 
-	if (auto_mode && status) {
+	ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+	if (ret) {
+		isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
+		goto disp_results;
+	}
+
+	ret = isst_get_ctdp_control(cpu, pkg_dev.current_level, &ctdp_level);
+	if (ret) {
+		isst_display_error_info_message(1, "Failed to get current level", 0, 0);
+		goto disp_results;
+	}
+
+	if (!ctdp_level.fact_support) {
+		isst_display_error_info_message(1, "turbo-freq feature is not present at this level", 1, pkg_dev.current_level);
+		ret = -1;
+		goto disp_results;
+	}
+
+	if (status) {
 		ret = isst_pm_qos_config(cpu, 1, 1);
 		if (ret)
 			goto disp_results;
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 67c9b1139631..a7f4337c5777 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -912,16 +912,16 @@ int isst_pm_qos_config(int cpu, int enable_clos, int priority_type)
 			return ret;
 
 		if (ctdp_level.fact_enabled) {
-			debug_printf("Turbo-freq feature must be disabled first\n");
+			isst_display_error_info_message(1, "Ignoring request, turbo-freq feature is still enabled", 0, 0);
 			return -EINVAL;
 		}
 		ret = isst_write_pm_config(cpu, 0);
 		if (ret)
-			isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error\n", 0, 0);
+			isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error", 0, 0);
 	} else {
 		ret = isst_write_pm_config(cpu, 1);
 		if (ret)
-			isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error\n", 0, 0);
+			isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error", 0, 0);
 	}
 
 	ret = isst_send_mbox_command(cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0,
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 51dbaa5f02ec..e105fece47b6 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -316,21 +316,31 @@ void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
 {
 	char header[256];
 	char value[256];
+	int level = 1;
+
+	if (out_format_is_json()) {
+		snprintf(header, sizeof(header), "package-%d:die-%d:cpu-%d",
+			 get_physical_package_id(cpu), get_physical_die_id(cpu),
+			 cpu);
+		format_and_print(outf, level++, header, NULL);
+	} else {
+		snprintf(header, sizeof(header), "package-%d",
+			 get_physical_package_id(cpu));
+		format_and_print(outf, level++, header, NULL);
+		snprintf(header, sizeof(header), "die-%d",
+			 get_physical_die_id(cpu));
+		format_and_print(outf, level++, header, NULL);
+		snprintf(header, sizeof(header), "cpu-%d", cpu);
+		format_and_print(outf, level++, header, NULL);
+	}
 
-	snprintf(header, sizeof(header), "package-%d",
-		 get_physical_package_id(cpu));
-	format_and_print(outf, 1, header, NULL);
-	snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
-	format_and_print(outf, 2, header, NULL);
-	snprintf(header, sizeof(header), "cpu-%d", cpu);
-	format_and_print(outf, 3, header, NULL);
 	if (str0 && !val)
 		snprintf(value, sizeof(value), "%s", str0);
 	else if (str1 && val)
 		snprintf(value, sizeof(value), "%s", str1);
 	else
 		snprintf(value, sizeof(value), "%u", val);
-	format_and_print(outf, 4, prefix, value);
+	format_and_print(outf, level, prefix, value);
 
 	format_and_print(outf, 1, NULL, NULL);
 }
@@ -470,7 +480,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
 				_isst_pbf_display_information(cpu, outf,
 							      tdp_level,
 							  &ctdp_level->pbf_info,
-							      level + 1);
+							      level + 2);
 			continue;
 		}
 
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 2e1afd856a78..094ba4589a9c 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -29,6 +29,7 @@
 #include <sys/ioctl.h>
 
 #define BIT(x) (1 << (x))
+#define BIT_ULL(nr) (1ULL << (nr))
 #define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))
 #define GENMASK_ULL(h, l)                                                      \
 	(((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))
diff --git a/tools/spi/Makefile b/tools/spi/Makefile
index 2249a1546cc1..ada881afb489 100644
--- a/tools/spi/Makefile
+++ b/tools/spi/Makefile
@@ -52,7 +52,9 @@ $(OUTPUT)spidev_fdx: $(SPIDEV_FDX_IN)
 clean:
 	rm -f $(ALL_PROGRAMS)
 	rm -rf $(OUTPUT)include/
-	find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+	find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete
+	find $(if $(OUTPUT),$(OUTPUT),.) -name '\.*.o.d' -delete
+	find $(if $(OUTPUT),$(OUTPUT),.) -name '\.*.o.cmd' -delete
 
 install: $(ALL_PROGRAMS)
 	install -d -m 755 $(DESTDIR)$(bindir);		\
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index 27967dd90f8f..eec23fa693bd 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -128,18 +128,22 @@ static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len)
 		.bits_per_word = bits,
 	};
 
-	if (mode & SPI_TX_QUAD)
+	if (mode & SPI_TX_OCTAL)
+		tr.tx_nbits = 8;
+	else if (mode & SPI_TX_QUAD)
 		tr.tx_nbits = 4;
 	else if (mode & SPI_TX_DUAL)
 		tr.tx_nbits = 2;
-	if (mode & SPI_RX_QUAD)
+	if (mode & SPI_RX_OCTAL)
+		tr.rx_nbits = 8;
+	else if (mode & SPI_RX_QUAD)
 		tr.rx_nbits = 4;
 	else if (mode & SPI_RX_DUAL)
 		tr.rx_nbits = 2;
 	if (!(mode & SPI_LOOP)) {
-		if (mode & (SPI_TX_QUAD | SPI_TX_DUAL))
+		if (mode & (SPI_TX_OCTAL | SPI_TX_QUAD | SPI_TX_DUAL))
 			tr.rx_buf = 0;
-		else if (mode & (SPI_RX_QUAD | SPI_RX_DUAL))
+		else if (mode & (SPI_RX_OCTAL | SPI_RX_QUAD | SPI_RX_DUAL))
 			tr.tx_buf = 0;
 	}
 
@@ -187,6 +191,7 @@ static void print_usage(const char *prog)
 	     "  -R --ready    slave pulls low to pause\n"
 	     "  -2 --dual     dual transfer\n"
 	     "  -4 --quad     quad transfer\n"
+	     "  -8 --octal    octal transfer\n"
 	     "  -S --size     transfer size\n"
 	     "  -I --iter     iterations\n");
 	exit(1);
@@ -213,13 +218,14 @@ static void parse_opts(int argc, char *argv[])
 			{ "dual",    0, 0, '2' },
 			{ "verbose", 0, 0, 'v' },
 			{ "quad",    0, 0, '4' },
+			{ "octal",   0, 0, '8' },
 			{ "size",    1, 0, 'S' },
 			{ "iter",    1, 0, 'I' },
 			{ NULL, 0, 0, 0 },
 		};
 		int c;
 
-		c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR24p:vS:I:",
+		c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR248p:vS:I:",
 				lopts, NULL);
 
 		if (c == -1)
@@ -280,6 +286,9 @@ static void parse_opts(int argc, char *argv[])
 		case '4':
 			mode |= SPI_TX_QUAD;
 			break;
+		case '8':
+			mode |= SPI_TX_OCTAL;
+			break;
 		case 'S':
 			transfer_size = atoi(optarg);
 			break;
@@ -295,6 +304,8 @@ static void parse_opts(int argc, char *argv[])
 			mode |= SPI_RX_DUAL;
 		if (mode & SPI_TX_QUAD)
 			mode |= SPI_RX_QUAD;
+		if (mode & SPI_TX_OCTAL)
+			mode |= SPI_RX_OCTAL;
 	}
 }
 
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index a9b2b48947ff..f159718f90c0 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -7,10 +7,10 @@
 /x86_64/hyperv_cpuid
 /x86_64/mmio_warning_test
 /x86_64/platform_info_test
-/x86_64/set_memory_region_test
 /x86_64/set_sregs_test
 /x86_64/smm_test
 /x86_64/state_test
+/x86_64/vmx_preemption_timer_test
 /x86_64/svm_vmcall_test
 /x86_64/sync_regs_test
 /x86_64/vmx_close_while_nested_test
@@ -22,4 +22,5 @@
 /demand_paging_test
 /dirty_log_test
 /kvm_create_max_vcpus
+/set_memory_region_test
 /steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 42f4f49f2a48..b4ff112e5c7e 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -43,10 +43,10 @@ TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
-TEST_GEN_PROGS_x86_64 += x86_64/set_memory_region_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
 TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
@@ -59,12 +59,14 @@ TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
 TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_aarch64 += set_memory_region_test
 TEST_GEN_PROGS_aarch64 += steal_time
 
 TEST_GEN_PROGS_s390x = s390x/memop
@@ -73,6 +75,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test
 TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390x += set_memory_region_test
 
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
 LIBKVM += $(LIBKVM_$(UNAME_M))
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 92e184a422ee..919e161dd289 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -10,6 +10,7 @@
 #include "test_util.h"
 
 #include "asm/kvm.h"
+#include "linux/list.h"
 #include "linux/kvm.h"
 #include <sys/ioctl.h>
 
@@ -113,6 +114,7 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 			  uint32_t data_memslot, uint32_t pgd_memslot);
@@ -256,6 +258,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
 unsigned int vm_get_page_size(struct kvm_vm *vm);
 unsigned int vm_get_page_shift(struct kvm_vm *vm);
 unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+int vm_get_fd(struct kvm_vm *vm);
 
 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
 unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
@@ -311,13 +314,30 @@ void ucall_uninit(struct kvm_vm *vm);
 void ucall(uint64_t cmd, int nargs, ...);
 uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
 
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
+				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
 #define GUEST_DONE()		ucall(UCALL_DONE, 0)
-#define GUEST_ASSERT(_condition) do {			\
-	if (!(_condition))				\
-		ucall(UCALL_ABORT, 2,			\
-			"Failed guest assert: "		\
-			#_condition, __LINE__);		\
+#define __GUEST_ASSERT(_condition, _nargs, _args...) do {	\
+	if (!(_condition))					\
+		ucall(UCALL_ABORT, 2 + _nargs,			\
+			"Failed guest assert: "			\
+			#_condition, __LINE__, _args);		\
 } while (0)
 
+#define GUEST_ASSERT(_condition) \
+	__GUEST_ASSERT((_condition), 0, 0)
+
+#define GUEST_ASSERT_1(_condition, arg1) \
+	__GUEST_ASSERT((_condition), 1, (arg1))
+
+#define GUEST_ASSERT_2(_condition, arg1, arg2) \
+	__GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+
+#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
+	__GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+
+#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
+	__GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 7428513a4c68..82b7fe16a824 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct desc64 *desc)
 static inline uint64_t rdtsc(void)
 {
 	uint32_t eax, edx;
-
+	uint64_t tsc_val;
 	/*
 	 * The lfence is to wait (on Intel CPUs) until all previous
-	 * instructions have been executed.
+	 * instructions have been executed. If software requires RDTSC to be
+	 * executed prior to execution of any subsequent instruction, it can
+	 * execute LFENCE immediately after RDTSC
 	 */
-	__asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
-	return ((uint64_t)edx) << 32 | eax;
+	__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+	tsc_val = ((uint64_t)edx) << 32 | eax;
+	return tsc_val;
 }
 
 static inline uint64_t rdtscp(uint32_t *aux)
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index cd037917fece..674151d24fcf 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -35,4 +35,14 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
 void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
 void nested_svm_check_supported(void);
 
+static inline bool cpu_has_svm(void)
+{
+	u32 eax = 0x80000001, ecx;
+
+	asm("cpuid" :
+	    "=a" (eax), "=c" (ecx) : "0" (eax) : "ebx", "edx");
+
+	return ecx & CPUID_SVM;
+}
+
 #endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 3d27069b9ed9..ccff3e6e2704 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -575,6 +575,33 @@ struct vmx_pages {
 	void *eptp;
 };
 
+union vmx_basic {
+	u64 val;
+	struct {
+		u32 revision;
+		u32	size:13,
+			reserved1:3,
+			width:1,
+			dual:1,
+			type:4,
+			insouts:1,
+			ctrl:1,
+			vm_entry_exception_ctrl:1,
+			reserved2:7;
+	};
+};
+
+union vmx_ctrl_msr {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+};
+
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
 struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
 bool prepare_for_vmx_operation(struct vmx_pages *vmx);
 void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 9622431069bc..c9cede5c7d0d 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -161,6 +161,9 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	vm = calloc(1, sizeof(*vm));
 	TEST_ASSERT(vm != NULL, "Insufficient Memory");
 
+	INIT_LIST_HEAD(&vm->vcpus);
+	INIT_LIST_HEAD(&vm->userspace_mem_regions);
+
 	vm->mode = mode;
 	vm->type = 0;
 
@@ -258,8 +261,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
 	if (vmp->has_irqchip)
 		vm_create_irqchip(vmp);
 
-	for (region = vmp->userspace_mem_region_head; region;
-		region = region->next) {
+	list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
 		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
 		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
 			    "  rc: %i errno: %i\n"
@@ -319,8 +321,7 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
 {
 	struct userspace_mem_region *region;
 
-	for (region = vm->userspace_mem_region_head; region;
-		region = region->next) {
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
 		uint64_t existing_start = region->region.guest_phys_addr;
 		uint64_t existing_end = region->region.guest_phys_addr
 			+ region->region.memory_size - 1;
@@ -378,11 +379,11 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
  */
 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
 {
-	struct vcpu *vcpup;
+	struct vcpu *vcpu;
 
-	for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
-		if (vcpup->id == vcpuid)
-			return vcpup;
+	list_for_each_entry(vcpu, &vm->vcpus, list) {
+		if (vcpu->id == vcpuid)
+			return vcpu;
 	}
 
 	return NULL;
@@ -392,18 +393,16 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
  * VM VCPU Remove
  *
  * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
+ *   vcpu - VCPU to remove
  *
  * Output Args: None
  *
  * Return: None, TEST_ASSERT failures for all error conditions
  *
- * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ * Removes a vCPU from a VM and frees its resources.
  */
-static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+static void vm_vcpu_rm(struct vcpu *vcpu)
 {
-	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	ret = munmap(vcpu->state, sizeof(*vcpu->state));
@@ -413,21 +412,17 @@ static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
 	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
 		"errno: %i", ret, errno);
 
-	if (vcpu->next)
-		vcpu->next->prev = vcpu->prev;
-	if (vcpu->prev)
-		vcpu->prev->next = vcpu->next;
-	else
-		vm->vcpu_head = vcpu->next;
+	list_del(&vcpu->list);
 	free(vcpu);
 }
 
 void kvm_vm_release(struct kvm_vm *vmp)
 {
+	struct vcpu *vcpu, *tmp;
 	int ret;
 
-	while (vmp->vcpu_head)
-		vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+	list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
+		vm_vcpu_rm(vcpu);
 
 	ret = close(vmp->fd);
 	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
@@ -438,35 +433,38 @@ void kvm_vm_release(struct kvm_vm *vmp)
 		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
 }
 
+static void __vm_mem_region_delete(struct kvm_vm *vm,
+				   struct userspace_mem_region *region)
+{
+	int ret;
+
+	list_del(&region->list);
+
+	region->region.memory_size = 0;
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+		    "rc: %i errno: %i", ret, errno);
+
+	sparsebit_free(&region->unused_phy_pages);
+	ret = munmap(region->mmap_start, region->mmap_size);
+	TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
+
+	free(region);
+}
+
 /*
  * Destroys and frees the VM pointed to by vmp.
  */
 void kvm_vm_free(struct kvm_vm *vmp)
 {
-	int ret;
+	struct userspace_mem_region *region, *tmp;
 
 	if (vmp == NULL)
 		return;
 
 	/* Free userspace_mem_regions. */
-	while (vmp->userspace_mem_region_head) {
-		struct userspace_mem_region *region
-			= vmp->userspace_mem_region_head;
-
-		region->region.memory_size = 0;
-		ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
-			&region->region);
-		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
-			"rc: %i errno: %i", ret, errno);
-
-		vmp->userspace_mem_region_head = region->next;
-		sparsebit_free(&region->unused_phy_pages);
-		ret = munmap(region->mmap_start, region->mmap_size);
-		TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
-			    ret, errno);
-
-		free(region);
-	}
+	list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
+		__vm_mem_region_delete(vmp, region);
 
 	/* Free sparsebit arrays. */
 	sparsebit_free(&vmp->vpages_valid);
@@ -612,12 +610,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 			(uint64_t) region->region.memory_size);
 
 	/* Confirm no region with the requested slot already exists. */
-	for (region = vm->userspace_mem_region_head; region;
-		region = region->next) {
-		if (region->region.slot == slot)
-			break;
-	}
-	if (region != NULL)
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+		if (region->region.slot != slot)
+			continue;
+
 		TEST_FAIL("A mem region with the requested slot "
 			"already exists.\n"
 			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
@@ -626,6 +622,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 			region->region.slot,
 			(uint64_t) region->region.guest_phys_addr,
 			(uint64_t) region->region.memory_size);
+	}
 
 	/* Allocate and initialize new mem region structure. */
 	region = calloc(1, sizeof(*region));
@@ -686,10 +683,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 		guest_paddr, (uint64_t) region->region.memory_size);
 
 	/* Add to linked-list of memory regions. */
-	if (vm->userspace_mem_region_head)
-		vm->userspace_mem_region_head->prev = region;
-	region->next = vm->userspace_mem_region_head;
-	vm->userspace_mem_region_head = region;
+	list_add(&region->list, &vm->userspace_mem_regions);
 }
 
 /*
@@ -712,20 +706,17 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
 {
 	struct userspace_mem_region *region;
 
-	for (region = vm->userspace_mem_region_head; region;
-		region = region->next) {
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
 		if (region->region.slot == memslot)
-			break;
-	}
-	if (region == NULL) {
-		fprintf(stderr, "No mem region with the requested slot found,\n"
-			"  requested slot: %u\n", memslot);
-		fputs("---- vm dump ----\n", stderr);
-		vm_dump(stderr, vm, 2);
-		TEST_FAIL("Mem region not found");
+			return region;
 	}
 
-	return region;
+	fprintf(stderr, "No mem region with the requested slot found,\n"
+		"  requested slot: %u\n", memslot);
+	fputs("---- vm dump ----\n", stderr);
+	vm_dump(stderr, vm, 2);
+	TEST_FAIL("Mem region not found");
+	return NULL;
 }
 
 /*
@@ -789,6 +780,24 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
 }
 
 /*
+ * VM Memory Region Delete
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   slot - Slot of the memory region to delete
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Delete a memory region.
+ */
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
+{
+	__vm_mem_region_delete(vm, memslot2region(vm, slot));
+}
+
+/*
  * VCPU mmap Size
  *
  * Input Args: None
@@ -863,10 +872,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
 		"vcpu id: %u errno: %i", vcpuid, errno);
 
 	/* Add to linked-list of VCPUs. */
-	if (vm->vcpu_head)
-		vm->vcpu_head->prev = vcpu;
-	vcpu->next = vm->vcpu_head;
-	vm->vcpu_head = vcpu;
+	list_add(&vcpu->list, &vm->vcpus);
 }
 
 /*
@@ -1059,8 +1065,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
 {
 	struct userspace_mem_region *region;
-	for (region = vm->userspace_mem_region_head; region;
-	     region = region->next) {
+
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
 		if ((gpa >= region->region.guest_phys_addr)
 			&& (gpa <= (region->region.guest_phys_addr
 				+ region->region.memory_size - 1)))
@@ -1092,8 +1098,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
 {
 	struct userspace_mem_region *region;
-	for (region = vm->userspace_mem_region_head; region;
-	     region = region->next) {
+
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
 		if ((hva >= region->host_mem)
 			&& (hva <= (region->host_mem
 				+ region->region.memory_size - 1)))
@@ -1529,8 +1535,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
 	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
 	fprintf(stream, "%*sMem Regions:\n", indent, "");
-	for (region = vm->userspace_mem_region_head; region;
-		region = region->next) {
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
 		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
 			"host_virt: %p\n", indent + 2, "",
 			(uint64_t) region->region.guest_phys_addr,
@@ -1549,7 +1554,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 		virt_dump(stream, vm, indent + 4);
 	}
 	fprintf(stream, "%*sVCPUs:\n", indent, "");
-	for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+	list_for_each_entry(vcpu, &vm->vcpus, list)
 		vcpu_dump(stream, vm, vcpu->id, indent + 2);
 }
 
@@ -1743,6 +1748,11 @@ unsigned int vm_get_max_gfn(struct kvm_vm *vm)
 	return vm->max_gfn;
 }
 
+int vm_get_fd(struct kvm_vm *vm)
+{
+	return vm->fd;
+}
+
 static unsigned int vm_calc_num_pages(unsigned int num_pages,
 				      unsigned int page_shift,
 				      unsigned int new_page_shift,
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index ca56a0133127..2ef446520748 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -13,7 +13,6 @@
 #define KVM_DEV_PATH		"/dev/kvm"
 
 struct userspace_mem_region {
-	struct userspace_mem_region *next, *prev;
 	struct kvm_userspace_memory_region region;
 	struct sparsebit *unused_phy_pages;
 	int fd;
@@ -21,10 +20,11 @@ struct userspace_mem_region {
 	void *host_mem;
 	void *mmap_start;
 	size_t mmap_size;
+	struct list_head list;
 };
 
 struct vcpu {
-	struct vcpu *next, *prev;
+	struct list_head list;
 	uint32_t id;
 	int fd;
 	struct kvm_run *state;
@@ -41,8 +41,8 @@ struct kvm_vm {
 	unsigned int pa_bits;
 	unsigned int va_bits;
 	uint64_t max_gfn;
-	struct vcpu *vcpu_head;
-	struct userspace_mem_region *userspace_mem_region_head;
+	struct list_head vcpus;
+	struct list_head userspace_mem_regions;
 	struct sparsebit *vpages_valid;
 	struct sparsebit *vpages_mapped;
 	bool has_irqchip;
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 8d94961bd046..a88c5d665725 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -233,7 +233,10 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
 
 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
 {
-	struct vcpu *vcpu = vm->vcpu_head;
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+	if (!vcpu)
+		return;
 
 	fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
 		indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
new file mode 100644
index 000000000000..b3ece55a2da6
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/compiler.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+/*
+ * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
+ * 2MB sized and aligned region so that the initial region corresponds to
+ * exactly one large page.
+ */
+#define MEM_REGION_SIZE		0x200000
+
+#ifdef __x86_64__
+/*
+ * Somewhat arbitrary location and slot, intended to not overlap anything.
+ */
+#define MEM_REGION_GPA		0xc0000000
+#define MEM_REGION_SLOT		10
+
+static const uint64_t MMIO_VAL = 0xbeefull;
+
+extern const uint64_t final_rip_start;
+extern const uint64_t final_rip_end;
+
+static sem_t vcpu_ready;
+
+static inline uint64_t guest_spin_on_val(uint64_t spin_val)
+{
+	uint64_t val;
+
+	do {
+		val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
+	} while (val == spin_val);
+
+	GUEST_SYNC(0);
+	return val;
+}
+
+static void *vcpu_worker(void *data)
+{
+	struct kvm_vm *vm = data;
+	struct kvm_run *run;
+	struct ucall uc;
+	uint64_t cmd;
+
+	/*
+	 * Loop until the guest is done.  Re-enter the guest on all MMIO exits,
+	 * which will occur if the guest attempts to access a memslot after it
+	 * has been deleted or while it is being moved .
+	 */
+	run = vcpu_state(vm, VCPU_ID);
+
+	while (1) {
+		vcpu_run(vm, VCPU_ID);
+
+		if (run->exit_reason == KVM_EXIT_IO) {
+			cmd = get_ucall(vm, VCPU_ID, &uc);
+			if (cmd != UCALL_SYNC)
+				break;
+
+			sem_post(&vcpu_ready);
+			continue;
+		}
+
+		if (run->exit_reason != KVM_EXIT_MMIO)
+			break;
+
+		TEST_ASSERT(!run->mmio.is_write, "Unexpected exit mmio write");
+		TEST_ASSERT(run->mmio.len == 8,
+			    "Unexpected exit mmio size = %u", run->mmio.len);
+
+		TEST_ASSERT(run->mmio.phys_addr == MEM_REGION_GPA,
+			    "Unexpected exit mmio address = 0x%llx",
+			    run->mmio.phys_addr);
+		memcpy(run->mmio.data, &MMIO_VAL, 8);
+	}
+
+	if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
+		TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
+			  __FILE__, uc.args[1], uc.args[2]);
+
+	return NULL;
+}
+
+static void wait_for_vcpu(void)
+{
+	struct timespec ts;
+
+	TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
+		    "clock_gettime() failed: %d\n", errno);
+
+	ts.tv_sec += 2;
+	TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
+		    "sem_timedwait() failed: %d\n", errno);
+
+	/* Wait for the vCPU thread to reenter the guest. */
+	usleep(100000);
+}
+
+static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
+{
+	struct kvm_vm *vm;
+	uint64_t *hva;
+	uint64_t gpa;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+				    MEM_REGION_GPA, MEM_REGION_SLOT,
+				    MEM_REGION_SIZE / getpagesize(), 0);
+
+	/*
+	 * Allocate and map two pages so that the GPA accessed by guest_code()
+	 * stays valid across the memslot move.
+	 */
+	gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
+	TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+
+	virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+
+	/* Ditto for the host mapping so that both pages can be zeroed. */
+	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+	memset(hva, 0, 2 * 4096);
+
+	pthread_create(vcpu_thread, NULL, vcpu_worker, vm);
+
+	/* Ensure the guest thread is spun up. */
+	wait_for_vcpu();
+
+	return vm;
+}
+
+
+static void guest_code_move_memory_region(void)
+{
+	uint64_t val;
+
+	GUEST_SYNC(0);
+
+	/*
+	 * Spin until the memory region is moved to a misaligned address.  This
+	 * may or may not trigger MMIO, as the window where the memslot is
+	 * invalid is quite small.
+	 */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+
+	/* Spin until the memory region is realigned. */
+	val = guest_spin_on_val(MMIO_VAL);
+	GUEST_ASSERT_1(val == 1, val);
+
+	GUEST_DONE();
+}
+
+static void test_move_memory_region(void)
+{
+	pthread_t vcpu_thread;
+	struct kvm_vm *vm;
+	uint64_t *hva;
+
+	vm = spawn_vm(&vcpu_thread, guest_code_move_memory_region);
+
+	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+
+	/*
+	 * Shift the region's base GPA.  The guest should not see "2" as the
+	 * hva->gpa translation is misaligned, i.e. the guest is accessing a
+	 * different host pfn.
+	 */
+	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096);
+	WRITE_ONCE(*hva, 2);
+
+	/*
+	 * The guest _might_ see an invalid memslot and trigger MMIO, but it's
+	 * a tiny window.  Spin and defer the sync until the memslot is
+	 * restored and guest behavior is once again deterministic.
+	 */
+	usleep(100000);
+
+	/*
+	 * Note, value in memory needs to be changed *before* restoring the
+	 * memslot, else the guest could race the update and see "2".
+	 */
+	WRITE_ONCE(*hva, 1);
+
+	/* Restore the original base, the guest should see "1". */
+	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA);
+	wait_for_vcpu();
+	/* Defered sync from when the memslot was misaligned (above). */
+	wait_for_vcpu();
+
+	pthread_join(vcpu_thread, NULL);
+
+	kvm_vm_free(vm);
+}
+
+static void guest_code_delete_memory_region(void)
+{
+	uint64_t val;
+
+	GUEST_SYNC(0);
+
+	/* Spin until the memory region is deleted. */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == MMIO_VAL, val);
+
+	/* Spin until the memory region is recreated. */
+	val = guest_spin_on_val(MMIO_VAL);
+	GUEST_ASSERT_1(val == 0, val);
+
+	/* Spin until the memory region is deleted. */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == MMIO_VAL, val);
+
+	asm("1:\n\t"
+	    ".pushsection .rodata\n\t"
+	    ".global final_rip_start\n\t"
+	    "final_rip_start: .quad 1b\n\t"
+	    ".popsection");
+
+	/* Spin indefinitely (until the code memslot is deleted). */
+	guest_spin_on_val(MMIO_VAL);
+
+	asm("1:\n\t"
+	    ".pushsection .rodata\n\t"
+	    ".global final_rip_end\n\t"
+	    "final_rip_end: .quad 1b\n\t"
+	    ".popsection");
+
+	GUEST_ASSERT_1(0, 0);
+}
+
+static void test_delete_memory_region(void)
+{
+	pthread_t vcpu_thread;
+	struct kvm_regs regs;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+
+	vm = spawn_vm(&vcpu_thread, guest_code_delete_memory_region);
+
+	/* Delete the memory region, the guest should not die. */
+	vm_mem_region_delete(vm, MEM_REGION_SLOT);
+	wait_for_vcpu();
+
+	/* Recreate the memory region.  The guest should see "0". */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+				    MEM_REGION_GPA, MEM_REGION_SLOT,
+				    MEM_REGION_SIZE / getpagesize(), 0);
+	wait_for_vcpu();
+
+	/* Delete the region again so that there's only one memslot left. */
+	vm_mem_region_delete(vm, MEM_REGION_SLOT);
+	wait_for_vcpu();
+
+	/*
+	 * Delete the primary memslot.  This should cause an emulation error or
+	 * shutdown due to the page tables getting nuked.
+	 */
+	vm_mem_region_delete(vm, 0);
+
+	pthread_join(vcpu_thread, NULL);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN ||
+		    run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+		    "Unexpected exit reason = %d", run->exit_reason);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+
+	/*
+	 * On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already,
+	 * so the instruction pointer would point to the reset vector.
+	 */
+	if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR)
+		TEST_ASSERT(regs.rip >= final_rip_start &&
+			    regs.rip < final_rip_end,
+			    "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n",
+			    final_rip_start, final_rip_end, regs.rip);
+
+	kvm_vm_free(vm);
+}
+
+static void test_zero_memory_regions(void)
+{
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+
+	pr_info("Testing KVM_RUN with zero added memory regions\n");
+
+	vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+	vm_vcpu_add(vm, VCPU_ID);
+
+	TEST_ASSERT(!ioctl(vm_get_fd(vm), KVM_SET_NR_MMU_PAGES, 64),
+		    "KVM_SET_NR_MMU_PAGES failed, errno = %d\n", errno);
+	vcpu_run(vm, VCPU_ID);
+
+	run = vcpu_state(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+		    "Unexpected exit_reason = %u\n", run->exit_reason);
+
+	kvm_vm_free(vm);
+}
+#endif /* __x86_64__ */
+
+/*
+ * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any
+ * tentative to add further slots should fail.
+ */
+static void test_add_max_memory_regions(void)
+{
+	int ret;
+	struct kvm_vm *vm;
+	uint32_t max_mem_slots;
+	uint32_t slot;
+	uint64_t guest_addr = 0x0;
+	uint64_t mem_reg_npages;
+	void *mem;
+
+	max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+	TEST_ASSERT(max_mem_slots > 0,
+		    "KVM_CAP_NR_MEMSLOTS should be greater than 0");
+	pr_info("Allowed number of memory slots: %i\n", max_mem_slots);
+
+	vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+
+	mem_reg_npages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, MEM_REGION_SIZE);
+
+	/* Check it can be added memory slots up to the maximum allowed */
+	pr_info("Adding slots 0..%i, each memory region with %dK size\n",
+		(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
+	for (slot = 0; slot < max_mem_slots; slot++) {
+		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+					    guest_addr, slot, mem_reg_npages,
+					    0);
+		guest_addr += MEM_REGION_SIZE;
+	}
+
+	/* Check it cannot be added memory slots beyond the limit */
+	mem = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+	ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION,
+		    &(struct kvm_userspace_memory_region) {slot, 0, guest_addr,
+		    MEM_REGION_SIZE, (uint64_t) mem});
+	TEST_ASSERT(ret == -1 && errno == EINVAL,
+		    "Adding one more memory slot should fail with EINVAL");
+
+	munmap(mem, MEM_REGION_SIZE);
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+#ifdef __x86_64__
+	int i, loops;
+#endif
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+#ifdef __x86_64__
+	/*
+	 * FIXME: the zero-memslot test fails on aarch64 and s390x because
+	 * KVM_RUN fails with ENOEXEC or EFAULT.
+	 */
+	test_zero_memory_regions();
+#endif
+
+	test_add_max_memory_regions();
+
+#ifdef __x86_64__
+	if (argc > 1)
+		loops = atoi(argv[1]);
+	else
+		loops = 10;
+
+	pr_info("Testing MOVE of in-use region, %d loops\n", loops);
+	for (i = 0; i < loops; i++)
+		test_move_memory_region();
+
+	pr_info("Testing DELETE of in-use region, %d loops\n", loops);
+	for (i = 0; i < loops; i++)
+		test_delete_memory_region();
+#endif
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 83323f3d7ca0..4a7967cca281 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -26,18 +26,18 @@ static void guest_code(void)
 {
 }
 
-static int smt_possible(void)
+static bool smt_possible(void)
 {
 	char buf[16];
 	FILE *f;
-	bool res = 1;
+	bool res = true;
 
 	f = fopen("/sys/devices/system/cpu/smt/control", "r");
 	if (f) {
 		if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
 			if (!strncmp(buf, "forceoff", 8) ||
 			    !strncmp(buf, "notsupported", 12))
-				res = 0;
+				res = false;
 		}
 		fclose(f);
 	}
@@ -46,29 +46,31 @@ static int smt_possible(void)
 }
 
 static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
-			  int evmcs_enabled)
+			  bool evmcs_enabled)
 {
 	int i;
+	int nent = 9;
+	u32 test_val;
 
-	if (!evmcs_enabled)
-		TEST_ASSERT(hv_cpuid_entries->nent == 6,
-			    "KVM_GET_SUPPORTED_HV_CPUID should return 6 entries"
-			    " when Enlightened VMCS is disabled (returned %d)",
-			    hv_cpuid_entries->nent);
-	else
-		TEST_ASSERT(hv_cpuid_entries->nent == 7,
-			    "KVM_GET_SUPPORTED_HV_CPUID should return 7 entries"
-			    " when Enlightened VMCS is enabled (returned %d)",
-			    hv_cpuid_entries->nent);
+	if (evmcs_enabled)
+		nent += 1; /* 0x4000000A */
+
+	TEST_ASSERT(hv_cpuid_entries->nent == nent,
+		    "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
+		    " with evmcs=%d (returned %d)",
+		    nent, evmcs_enabled, hv_cpuid_entries->nent);
 
 	for (i = 0; i < hv_cpuid_entries->nent; i++) {
 		struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
 
 		TEST_ASSERT((entry->function >= 0x40000000) &&
-			    (entry->function <= 0x4000000A),
+			    (entry->function <= 0x40000082),
 			    "function %x is our of supported range",
 			    entry->function);
 
+		TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A),
+			    "0x4000000A leaf should not be reported");
+
 		TEST_ASSERT(entry->index == 0,
 			    ".index field should be zero");
 
@@ -78,12 +80,23 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 		TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
 			    !entry->padding[2], "padding should be zero");
 
-		if (entry->function == 0x40000004) {
-			int nononarchcs = !!(entry->eax & (1UL << 18));
+		switch (entry->function) {
+		case 0x40000000:
+			test_val = 0x40000082;
 
-			TEST_ASSERT(nononarchcs == !smt_possible(),
+			TEST_ASSERT(entry->eax == test_val,
+				    "Wrong max leaf report in 0x40000000.EAX: %x"
+				    " (evmcs=%d)",
+				    entry->eax, evmcs_enabled
+				);
+			break;
+		case 0x40000004:
+			test_val = entry->eax & (1UL << 18);
+
+			TEST_ASSERT(!!test_val == !smt_possible(),
 				    "NoNonArchitecturalCoreSharing bit"
 				    " doesn't reflect SMT setting");
+			break;
 		}
 
 		/*
@@ -133,8 +146,9 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
 int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
-	int rv;
+	int rv, stage;
 	struct kvm_cpuid2 *hv_cpuid_entries;
+	bool evmcs_enabled;
 
 	/* Tell stdout not to buffer its content */
 	setbuf(stdout, NULL);
@@ -145,36 +159,31 @@ int main(int argc, char *argv[])
 		exit(KSFT_SKIP);
 	}
 
-	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
-
-	test_hv_cpuid_e2big(vm);
-
-	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
-	if (!hv_cpuid_entries)
-		return 1;
-
-	test_hv_cpuid(hv_cpuid_entries, 0);
-
-	free(hv_cpuid_entries);
+	for (stage = 0; stage < 3; stage++) {
+		evmcs_enabled = false;
+
+		vm = vm_create_default(VCPU_ID, 0, guest_code);
+		switch (stage) {
+		case 0:
+			test_hv_cpuid_e2big(vm);
+			continue;
+		case 1:
+			break;
+		case 2:
+			if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+				print_skip("Enlightened VMCS is unsupported");
+				continue;
+			}
+			vcpu_enable_evmcs(vm, VCPU_ID);
+			evmcs_enabled = true;
+			break;
+		}
 
-	if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-		print_skip("Enlightened VMCS is unsupported");
-		goto vm_free;
+		hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
+		test_hv_cpuid(hv_cpuid_entries, evmcs_enabled);
+		free(hv_cpuid_entries);
+		kvm_vm_free(vm);
 	}
 
-	vcpu_enable_evmcs(vm, VCPU_ID);
-
-	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
-	if (!hv_cpuid_entries)
-		return 1;
-
-	test_hv_cpuid(hv_cpuid_entries, 1);
-
-	free(hv_cpuid_entries);
-
-vm_free:
-	kvm_vm_free(vm);
-
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c
deleted file mode 100644
index c6691cff4e19..000000000000
--- a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <pthread.h>
-#include <sched.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include <linux/compiler.h>
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-#define VCPU_ID 0
-
-/*
- * Somewhat arbitrary location and slot, intended to not overlap anything.  The
- * location and size are specifically 2mb sized/aligned so that the initial
- * region corresponds to exactly one large page.
- */
-#define MEM_REGION_GPA		0xc0000000
-#define MEM_REGION_SIZE		0x200000
-#define MEM_REGION_SLOT		10
-
-static void guest_code(void)
-{
-	uint64_t val;
-
-	do {
-		val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
-	} while (!val);
-
-	if (val != 1)
-		ucall(UCALL_ABORT, 1, val);
-
-	GUEST_DONE();
-}
-
-static void *vcpu_worker(void *data)
-{
-	struct kvm_vm *vm = data;
-	struct kvm_run *run;
-	struct ucall uc;
-	uint64_t cmd;
-
-	/*
-	 * Loop until the guest is done.  Re-enter the guest on all MMIO exits,
-	 * which will occur if the guest attempts to access a memslot while it
-	 * is being moved.
-	 */
-	run = vcpu_state(vm, VCPU_ID);
-	do {
-		vcpu_run(vm, VCPU_ID);
-	} while (run->exit_reason == KVM_EXIT_MMIO);
-
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason = %d", run->exit_reason);
-
-	cmd = get_ucall(vm, VCPU_ID, &uc);
-	TEST_ASSERT(cmd == UCALL_DONE, "Unexpected val in guest = %lu", uc.args[0]);
-	return NULL;
-}
-
-static void test_move_memory_region(void)
-{
-	pthread_t vcpu_thread;
-	struct kvm_vm *vm;
-	uint64_t *hva;
-	uint64_t gpa;
-
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
-
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
-	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
-				    MEM_REGION_GPA, MEM_REGION_SLOT,
-				    MEM_REGION_SIZE / getpagesize(), 0);
-
-	/*
-	 * Allocate and map two pages so that the GPA accessed by guest_code()
-	 * stays valid across the memslot move.
-	 */
-	gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
-	TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
-
-	virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
-
-	/* Ditto for the host mapping so that both pages can be zeroed. */
-	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
-	memset(hva, 0, 2 * 4096);
-
-	pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
-
-	/* Ensure the guest thread is spun up. */
-	usleep(100000);
-
-	/*
-	 * Shift the region's base GPA.  The guest should not see "2" as the
-	 * hva->gpa translation is misaligned, i.e. the guest is accessing a
-	 * different host pfn.
-	 */
-	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096);
-	WRITE_ONCE(*hva, 2);
-
-	usleep(100000);
-
-	/*
-	 * Note, value in memory needs to be changed *before* restoring the
-	 * memslot, else the guest could race the update and see "2".
-	 */
-	WRITE_ONCE(*hva, 1);
-
-	/* Restore the original base, the guest should see "1". */
-	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA);
-
-	pthread_join(vcpu_thread, NULL);
-
-	kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-	int i, loops;
-
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
-	if (argc > 1)
-		loops = atoi(argv[1]);
-	else
-		loops = 10;
-
-	for (i = 0; i < loops; i++)
-		test_move_memory_region();
-
-	return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 8230b6bc6b8f..6f8f478b3ceb 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -17,6 +17,7 @@
 #include "kvm_util.h"
 
 #include "vmx.h"
+#include "svm_util.h"
 
 #define VCPU_ID	      1
 
@@ -58,7 +59,7 @@ void self_smi(void)
 	      APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
 }
 
-void guest_code(struct vmx_pages *vmx_pages)
+void guest_code(void *arg)
 {
 	uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
 
@@ -72,8 +73,11 @@ void guest_code(struct vmx_pages *vmx_pages)
 
 	sync_with_host(4);
 
-	if (vmx_pages) {
-		GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	if (arg) {
+		if (cpu_has_svm())
+			generic_svm_setup(arg, NULL, NULL);
+		else
+			GUEST_ASSERT(prepare_for_vmx_operation(arg));
 
 		sync_with_host(5);
 
@@ -87,7 +91,7 @@ void guest_code(struct vmx_pages *vmx_pages)
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva = 0;
+	vm_vaddr_t nested_gva = 0;
 
 	struct kvm_regs regs;
 	struct kvm_vm *vm;
@@ -114,8 +118,11 @@ int main(int argc, char *argv[])
 	vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
 
 	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-		vcpu_alloc_vmx(vm, &vmx_pages_gva);
-		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+		if (kvm_get_supported_cpuid_entry(0x80000001)->ecx & CPUID_SVM)
+			vcpu_alloc_svm(vm, &nested_gva);
+		else
+			vcpu_alloc_vmx(vm, &nested_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
 	} else {
 		pr_info("will skip SMM test with VMX enabled\n");
 		vcpu_args_set(vm, VCPU_ID, 1, 0);
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 5b1a016edf55..d43b6f99b66c 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -18,14 +18,46 @@
 #include "kvm_util.h"
 #include "processor.h"
 #include "vmx.h"
+#include "svm_util.h"
 
 #define VCPU_ID		5
+#define L2_GUEST_STACK_SIZE 256
 
-void l2_guest_code(void)
+void svm_l2_guest_code(void)
 {
+	GUEST_SYNC(4);
+	/* Exit to L1 */
+	vmcall();
 	GUEST_SYNC(6);
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
 
-        /* Exit to L1 */
+static void svm_l1_guest_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	GUEST_ASSERT(svm->vmcb_gpa);
+	/* Prepare for L2 execution. */
+	generic_svm_setup(svm, svm_l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(3);
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(5);
+	vmcb->save.rip += 3;
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(7);
+}
+
+void vmx_l2_guest_code(void)
+{
+	GUEST_SYNC(6);
+
+	/* Exit to L1 */
 	vmcall();
 
 	/* L1 has now set up a shadow VMCS for us.  */
@@ -42,10 +74,9 @@ void l2_guest_code(void)
 	vmcall();
 }
 
-void l1_guest_code(struct vmx_pages *vmx_pages)
+static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
 {
-#define L2_GUEST_STACK_SIZE 64
-        unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 
 	GUEST_ASSERT(vmx_pages->vmcs_gpa);
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -56,7 +87,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_SYNC(4);
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
 
-	prepare_vmcs(vmx_pages, l2_guest_code,
+	prepare_vmcs(vmx_pages, vmx_l2_guest_code,
 		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
 
 	GUEST_SYNC(5);
@@ -106,20 +137,24 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_ASSERT(vmresume());
 }
 
-void guest_code(struct vmx_pages *vmx_pages)
+static void __attribute__((__flatten__)) guest_code(void *arg)
 {
 	GUEST_SYNC(1);
 	GUEST_SYNC(2);
 
-	if (vmx_pages)
-		l1_guest_code(vmx_pages);
+	if (arg) {
+		if (cpu_has_svm())
+			svm_l1_guest_code(arg);
+		else
+			vmx_l1_guest_code(arg);
+	}
 
 	GUEST_DONE();
 }
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva = 0;
+	vm_vaddr_t nested_gva = 0;
 
 	struct kvm_regs regs1, regs2;
 	struct kvm_vm *vm;
@@ -136,8 +171,11 @@ int main(int argc, char *argv[])
 	vcpu_regs_get(vm, VCPU_ID, &regs1);
 
 	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-		vcpu_alloc_vmx(vm, &vmx_pages_gva);
-		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+		if (kvm_get_supported_cpuid_entry(0x80000001)->ecx & CPUID_SVM)
+			vcpu_alloc_svm(vm, &nested_gva);
+		else
+			vcpu_alloc_vmx(vm, &nested_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
 	} else {
 		pr_info("will skip nested state checks\n");
 		vcpu_args_set(vm, VCPU_ID, 1, 0);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
new file mode 100644
index 000000000000..cc72b6188ca7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX-preemption timer test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure the VM-Enter after migration doesn't
+ * incorrectly restarts the timer with the full timer
+ * value instead of partially decayed timer value
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID		5
+#define PREEMPTION_TIMER_VALUE			100000000ull
+#define PREEMPTION_TIMER_VALUE_THRESHOLD1	 80000000ull
+
+u32 vmx_pt_rate;
+bool l2_save_restore_done;
+static u64 l2_vmx_pt_start;
+volatile u64 l2_vmx_pt_finish;
+
+void l2_guest_code(void)
+{
+	u64 vmx_pt_delta;
+
+	vmcall();
+	l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+	/*
+	 * Wait until the 1st threshold has passed
+	 */
+	do {
+		l2_vmx_pt_finish = rdtsc();
+		vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
+				vmx_pt_rate;
+	} while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
+
+	/*
+	 * Force L2 through Save and Restore cycle
+	 */
+	GUEST_SYNC(1);
+
+	l2_save_restore_done = 1;
+
+	/*
+	 * Now wait for the preemption timer to fire and
+	 * exit to L1
+	 */
+	while ((l2_vmx_pt_finish = rdtsc()))
+		;
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	u64 l1_vmx_pt_start;
+	u64 l1_vmx_pt_finish;
+	u64 l1_tsc_deadline, l2_tsc_deadline;
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/*
+	 * Check for Preemption timer support
+	 */
+	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
+			: MSR_IA32_VMX_PINBASED_CTLS);
+	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
+			: MSR_IA32_VMX_EXIT_CTLS);
+
+	if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+	    !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+		return;
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+
+	/*
+	 * Turn on PIN control and resume the guest
+	 */
+	GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+			      vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+			      PIN_BASED_VMX_PREEMPTION_TIMER));
+
+	GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
+			      PREEMPTION_TIMER_VALUE));
+
+	vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
+
+	l2_save_restore_done = 0;
+
+	l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+	GUEST_ASSERT(!vmresume());
+
+	l1_vmx_pt_finish = rdtsc();
+
+	/*
+	 * Ensure exit from L2 happens after L2 goes through
+	 * save and restore
+	 */
+	GUEST_ASSERT(l2_save_restore_done);
+
+	/*
+	 * Ensure the exit from L2 is due to preemption timer expiry
+	 */
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
+
+	l1_tsc_deadline = l1_vmx_pt_start +
+		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+	l2_tsc_deadline = l2_vmx_pt_start +
+		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+	/*
+	 * Sync with the host and pass the l1|l2 pt_expiry_finish times and
+	 * tsc deadlines so that host can verify they are as expected
+	 */
+	GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
+		l2_vmx_pt_finish, l2_tsc_deadline);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+
+	/*
+	 * AMD currently does not implement any VMX features, so for now we
+	 * just early out.
+	 */
+	nested_vmx_check_supported();
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		vcpu_alloc_vmx(vm, &vmx_pages_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	} else {
+		pr_info("will skip vmx preemption timer checks\n");
+		goto done;
+	}
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+				  __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage, (ulong)uc.args[1]);
+		/*
+		 * If this stage 2 then we should verify the vmx pt expiry
+		 * is as expected.
+		 * From L1's perspective verify Preemption timer hasn't
+		 * expired too early.
+		 * From L2's perspective verify Preemption timer hasn't
+		 * expired too late.
+		 */
+		if (stage == 2) {
+
+			pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
+				stage, uc.args[2], uc.args[3]);
+
+			pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
+				stage, uc.args[4], uc.args[5]);
+
+			TEST_ASSERT(uc.args[2] >= uc.args[3],
+				"Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
+				stage, uc.args[2], uc.args[3]);
+
+			TEST_ASSERT(uc.args[4] < uc.args[5],
+				"Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
+				stage, uc.args[4], uc.args[5]);
+		}
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 2d4db5afb142..973198a3ec3d 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -5,3 +5,4 @@ pidfd_test
 pidfd_wait
 pidfd_fdinfo_test
 pidfd_getfd_test
+pidfd_setns_test
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index 75a545861375..f4a2f28f926b 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 CFLAGS += -g -I../../../../usr/include/ -pthread
 
-TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test
+TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
+	pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config
new file mode 100644
index 000000000000..bb11de90c0c9
--- /dev/null
+++ b/tools/testing/selftests/pidfd/config
@@ -0,0 +1,6 @@
+CONFIG_UTS_NS=y
+CONFIG_IPC_NS=y
+CONFIG_USER_NS=y
+CONFIG_PID_NS=y
+CONFIG_NET_NS=y
+CONFIG_CGROUPS=y
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
new file mode 100644
index 000000000000..133ec5b6cda8
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <linux/kcmp.h>
+
+#include "pidfd.h"
+#include "../clone3/clone3_selftests.h"
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+enum {
+	PIDFD_NS_USER,
+	PIDFD_NS_MNT,
+	PIDFD_NS_PID,
+	PIDFD_NS_UTS,
+	PIDFD_NS_IPC,
+	PIDFD_NS_NET,
+	PIDFD_NS_CGROUP,
+	PIDFD_NS_PIDCLD,
+	PIDFD_NS_MAX
+};
+
+const struct ns_info {
+	const char *name;
+	int flag;
+} ns_info[] = {
+	[PIDFD_NS_USER]   = { "user",             CLONE_NEWUSER,   },
+	[PIDFD_NS_MNT]    = { "mnt",              CLONE_NEWNS,     },
+	[PIDFD_NS_PID]    = { "pid",              CLONE_NEWPID,    },
+	[PIDFD_NS_UTS]    = { "uts",              CLONE_NEWUTS,    },
+	[PIDFD_NS_IPC]    = { "ipc",              CLONE_NEWIPC,    },
+	[PIDFD_NS_NET]    = { "net",              CLONE_NEWNET,    },
+	[PIDFD_NS_CGROUP] = { "cgroup",           CLONE_NEWCGROUP, },
+	[PIDFD_NS_PIDCLD] = { "pid_for_children", 0,               },
+};
+
+FIXTURE(current_nsset)
+{
+	pid_t pid;
+	int pidfd;
+	int nsfds[PIDFD_NS_MAX];
+
+	pid_t child_pid_exited;
+	int child_pidfd_exited;
+
+	pid_t child_pid1;
+	int child_pidfd1;
+	int child_nsfds1[PIDFD_NS_MAX];
+
+	pid_t child_pid2;
+	int child_pidfd2;
+	int child_nsfds2[PIDFD_NS_MAX];
+};
+
+static int sys_waitid(int which, pid_t pid, int options)
+{
+	return syscall(__NR_waitid, which, pid, NULL, options, NULL);
+}
+
+pid_t create_child(int *pidfd, unsigned flags)
+{
+	struct clone_args args = {
+		.flags		= CLONE_PIDFD | flags,
+		.exit_signal	= SIGCHLD,
+		.pidfd		= ptr_to_u64(pidfd),
+	};
+
+	return sys_clone3(&args, sizeof(struct clone_args));
+}
+
+FIXTURE_SETUP(current_nsset)
+{
+	int i, proc_fd, ret;
+
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		self->nsfds[i]		= -EBADF;
+		self->child_nsfds1[i]	= -EBADF;
+		self->child_nsfds2[i]	= -EBADF;
+	}
+
+	proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
+	ASSERT_GE(proc_fd, 0) {
+		TH_LOG("%m - Failed to open /proc/self/ns");
+	}
+
+	self->pid = getpid();
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+		self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
+		if (self->nsfds[i] < 0) {
+			EXPECT_EQ(errno, ENOENT) {
+				TH_LOG("%m - Failed to open %s namespace for process %d",
+				       info->name, self->pid);
+			}
+		}
+	}
+
+	self->pidfd = sys_pidfd_open(self->pid, 0);
+	EXPECT_GT(self->pidfd, 0) {
+		TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+	}
+
+	/* Create task that exits right away. */
+	self->child_pid_exited = create_child(&self->child_pidfd_exited,
+					      CLONE_NEWUSER | CLONE_NEWNET);
+	EXPECT_GT(self->child_pid_exited, 0);
+
+	if (self->child_pid_exited == 0)
+		_exit(EXIT_SUCCESS);
+
+	ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
+
+	self->pidfd = sys_pidfd_open(self->pid, 0);
+	EXPECT_GE(self->pidfd, 0) {
+		TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+	}
+
+	/* Create tasks that will be stopped. */
+	self->child_pid1 = create_child(&self->child_pidfd1,
+					CLONE_NEWUSER | CLONE_NEWNS |
+					CLONE_NEWCGROUP | CLONE_NEWIPC |
+					CLONE_NEWUTS | CLONE_NEWPID |
+					CLONE_NEWNET);
+	EXPECT_GE(self->child_pid1, 0);
+
+	if (self->child_pid1 == 0) {
+		pause();
+		_exit(EXIT_SUCCESS);
+	}
+
+	self->child_pid2 = create_child(&self->child_pidfd2,
+					CLONE_NEWUSER | CLONE_NEWNS |
+					CLONE_NEWCGROUP | CLONE_NEWIPC |
+					CLONE_NEWUTS | CLONE_NEWPID |
+					CLONE_NEWNET);
+	EXPECT_GE(self->child_pid2, 0);
+
+	if (self->child_pid2 == 0) {
+		pause();
+		_exit(EXIT_SUCCESS);
+	}
+
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		char p[100];
+
+		const struct ns_info *info = &ns_info[i];
+
+		self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
+		if (self->nsfds[i] < 0) {
+			EXPECT_EQ(errno, ENOENT) {
+				TH_LOG("%m - Failed to open %s namespace for process %d",
+				       info->name, self->pid);
+			}
+		}
+
+		ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
+			       self->child_pid1, info->name);
+		EXPECT_GT(ret, 0);
+		EXPECT_LT(ret, sizeof(p));
+
+		self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
+		if (self->child_nsfds1[i] < 0) {
+			EXPECT_EQ(errno, ENOENT) {
+				TH_LOG("%m - Failed to open %s namespace for process %d",
+				       info->name, self->child_pid1);
+			}
+		}
+
+		ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
+			       self->child_pid2, info->name);
+		EXPECT_GT(ret, 0);
+		EXPECT_LT(ret, sizeof(p));
+
+		self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
+		if (self->child_nsfds2[i] < 0) {
+			EXPECT_EQ(errno, ENOENT) {
+				TH_LOG("%m - Failed to open %s namespace for process %d",
+				       info->name, self->child_pid1);
+			}
+		}
+	}
+
+	close(proc_fd);
+}
+
+FIXTURE_TEARDOWN(current_nsset)
+{
+	int i;
+
+	ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
+					SIGKILL, NULL, 0), 0);
+	ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
+					SIGKILL, NULL, 0), 0);
+
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		if (self->nsfds[i] >= 0)
+			close(self->nsfds[i]);
+		if (self->child_nsfds1[i] >= 0)
+			close(self->child_nsfds1[i]);
+		if (self->child_nsfds2[i] >= 0)
+			close(self->child_nsfds2[i]);
+	}
+
+	if (self->child_pidfd1 >= 0)
+		EXPECT_EQ(0, close(self->child_pidfd1));
+	if (self->child_pidfd2 >= 0)
+		EXPECT_EQ(0, close(self->child_pidfd2));
+	ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
+	ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
+	ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
+}
+
+static int preserve_ns(const int pid, const char *ns)
+{
+	int ret;
+	char path[50];
+
+	ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
+	if (ret < 0 || (size_t)ret >= sizeof(path))
+		return -EIO;
+
+	return open(path, O_RDONLY | O_CLOEXEC);
+}
+
+static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
+{
+	int ns_fd2 = -EBADF;
+	int ret = -1;
+	struct stat ns_st1, ns_st2;
+
+	ret = fstat(ns_fd1, &ns_st1);
+	if (ret < 0)
+		return -1;
+
+	ns_fd2 = preserve_ns(pid2, ns);
+	if (ns_fd2 < 0)
+		return -1;
+
+	ret = fstat(ns_fd2, &ns_st2);
+	close(ns_fd2);
+	if (ret < 0)
+		return -1;
+
+	/* processes are in the same namespace */
+	if ((ns_st1.st_dev == ns_st2.st_dev) &&
+	    (ns_st1.st_ino == ns_st2.st_ino))
+		return 1;
+
+	/* processes are in different namespaces */
+	return 0;
+}
+
+/* Test that we can't pass garbage to the kernel. */
+TEST_F(current_nsset, invalid_flags)
+{
+	ASSERT_NE(setns(self->pidfd, 0), 0);
+	EXPECT_EQ(errno, EINVAL);
+
+	ASSERT_NE(setns(self->pidfd, -1), 0);
+	EXPECT_EQ(errno, EINVAL);
+
+	ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
+	EXPECT_EQ(errno, EINVAL);
+
+	ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
+	EXPECT_EQ(errno, EINVAL);
+}
+
+/* Test that we can't attach to a task that has already exited. */
+TEST_F(current_nsset, pidfd_exited_child)
+{
+	int i;
+	pid_t pid;
+
+	ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
+		  0);
+	EXPECT_EQ(errno, ESRCH);
+
+	pid = getpid();
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+		/* Verify that we haven't changed any namespaces. */
+		if (self->nsfds[i] >= 0)
+			ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
+	}
+}
+
+TEST_F(current_nsset, pidfd_incremental_setns)
+{
+	int i;
+	pid_t pid;
+
+	pid = getpid();
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+		int nsfd;
+
+		if (self->child_nsfds1[i] < 0)
+			continue;
+
+		if (info->flag) {
+			ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
+				TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
+				       info->name, self->child_pid1,
+				       self->child_pidfd1);
+			}
+		}
+
+		/* Verify that we have changed to the correct namespaces. */
+		if (info->flag == CLONE_NEWPID)
+			nsfd = self->nsfds[i];
+		else
+			nsfd = self->child_nsfds1[i];
+		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+			TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
+			       info->name, self->child_pid1,
+			       self->child_pidfd1);
+		}
+		TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
+		       info->name, self->child_pid1, self->child_pidfd1);
+	}
+}
+
+TEST_F(current_nsset, nsfd_incremental_setns)
+{
+	int i;
+	pid_t pid;
+
+	pid = getpid();
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+		int nsfd;
+
+		if (self->child_nsfds1[i] < 0)
+			continue;
+
+		if (info->flag) {
+			ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
+				TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
+				       info->name, self->child_pid1,
+				       self->child_nsfds1[i]);
+			}
+		}
+
+		/* Verify that we have changed to the correct namespaces. */
+		if (info->flag == CLONE_NEWPID)
+			nsfd = self->nsfds[i];
+		else
+			nsfd = self->child_nsfds1[i];
+		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+			TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
+			       info->name, self->child_pid1,
+			       self->child_nsfds1[i]);
+		}
+		TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
+		       info->name, self->child_pid1, self->child_nsfds1[i]);
+	}
+}
+
+TEST_F(current_nsset, pidfd_one_shot_setns)
+{
+	unsigned flags = 0;
+	int i;
+	pid_t pid;
+
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+
+		if (self->child_nsfds1[i] < 0)
+			continue;
+
+		flags |= info->flag;
+		TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
+		       info->name, self->child_pid1);
+	}
+
+	ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
+		TH_LOG("%m - Failed to setns to namespaces of %d",
+		       self->child_pid1);
+	}
+
+	pid = getpid();
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+		int nsfd;
+
+		if (self->child_nsfds1[i] < 0)
+			continue;
+
+		/* Verify that we have changed to the correct namespaces. */
+		if (info->flag == CLONE_NEWPID)
+			nsfd = self->nsfds[i];
+		else
+			nsfd = self->child_nsfds1[i];
+		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+			TH_LOG("setns failed to place us correctly into %s namespace of %d",
+			       info->name, self->child_pid1);
+		}
+		TH_LOG("Managed to correctly setns to %s namespace of %d",
+		       info->name, self->child_pid1);
+	}
+}
+
+TEST_F(current_nsset, no_foul_play)
+{
+	unsigned flags = 0;
+	int i;
+
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+
+		if (self->child_nsfds1[i] < 0)
+			continue;
+
+		flags |= info->flag;
+		if (info->flag) /* No use logging pid_for_children. */
+			TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
+			       info->name, self->child_pid1);
+	}
+
+	ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
+		TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
+		       self->child_pid1, self->child_pidfd1);
+	}
+
+	/*
+	 * Can't setns to a user namespace outside of our hierarchy since we
+	 * don't have caps in there and didn't create it. That means that under
+	 * no circumstances should we be able to setns to any of the other
+	 * ones since they aren't owned by our user namespace.
+	 */
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+
+		if (self->child_nsfds2[i] < 0 || !info->flag)
+			continue;
+
+		ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
+			TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
+			       info->name, self->child_pid2,
+			       self->child_pidfd2);
+		}
+		TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
+		       info->name, self->child_pid2,
+		       self->child_pidfd2);
+
+		ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
+			TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
+			       info->name, self->child_pid2,
+			       self->child_nsfds2[i]);
+		}
+		TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
+		       info->name, self->child_pid2,
+		       self->child_nsfds2[i]);
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pstore/pstore_tests b/tools/testing/selftests/pstore/pstore_tests
index 1cef54458aff..2aa9a3852a84 100755
--- a/tools/testing/selftests/pstore/pstore_tests
+++ b/tools/testing/selftests/pstore/pstore_tests
@@ -10,7 +10,7 @@
 . ./common_tests
 
 prlog -n "Checking pstore console is registered ... "
-dmesg | grep -q "console \[pstore"
+dmesg | grep -Eq "console \[(pstore|${backend})"
 show_result $?
 
 prlog -n "Checking /dev/pmsg0 exists ... "
diff --git a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
new file mode 100755
index 000000000000..e5cc6b2f195e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# If this was a KCSAN run, collapse the reports in the various console.log
+# files onto pairs of functions.
+#
+# Usage: kcsan-collapse.sh resultsdir
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+if test -z "$TORTURE_KCONFIG_KCSAN_ARG"
+then
+	exit 0
+fi
+cat $1/*/console.log |
+	grep "BUG: KCSAN: " |
+	sed -e 's/^\[[^]]*] //' |
+	sort |
+	uniq -c |
+	sort -k1nr > $1/kcsan.sum
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 9d9a41625dd9..1706cd4466b4 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -41,7 +41,21 @@ else
 		title="$title ($ngpsps/s)"
 	fi
 	echo $title $stopstate $fwdprog
-	nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
+	nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | \
+		awk -v sum=0 '
+		{
+			for (i = 0; i <= NF; i++) {
+				sum += $i;
+				if ($i ~ /Batch:/) {
+					sum = 0;
+					i = i + 2;
+				}
+			}
+		}
+
+		END {
+			print sum
+		}'`
 	if test -z "$nclosecalls"
 	then
 		exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 0326f4a5ff9c..736f04749b90 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -70,6 +70,15 @@ do
 			fi
 		fi
 	done
+	if test -f "$rd/kcsan.sum"
+	then
+		if test -s "$rd/kcsan.sum"
+		then
+			echo KCSAN summary in $rd/kcsan.sum
+		else
+			echo Clean KCSAN run in $rd
+		fi
+	fi
 done
 EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1
 ret=$?
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index e0352304b98b..6ff611c630d1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -44,30 +44,32 @@ then
 fi
 echo ' ---' `date`: Starting build
 echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
-touch $resdir/ConfigFragment.input $resdir/ConfigFragment
-if test -r "$config_dir/CFcommon"
-then
-	echo " --- $config_dir/CFcommon" >> $resdir/ConfigFragment.input
-	cat < $config_dir/CFcommon >> $resdir/ConfigFragment.input
-	config_override.sh $config_dir/CFcommon $config_template > $T/Kc1
-	grep '#CHECK#' $config_dir/CFcommon >> $resdir/ConfigFragment
-else
-	cp $config_template $T/Kc1
-fi
-echo " --- $config_template" >> $resdir/ConfigFragment.input
-cat $config_template >> $resdir/ConfigFragment.input
-grep '#CHECK#' $config_template >> $resdir/ConfigFragment
-if test -n "$TORTURE_KCONFIG_ARG"
-then
-	echo $TORTURE_KCONFIG_ARG | tr -s " " "\012" > $T/cmdline
-	echo " --- --kconfig argument" >> $resdir/ConfigFragment.input
-	cat $T/cmdline >> $resdir/ConfigFragment.input
-	config_override.sh $T/Kc1 $T/cmdline > $T/Kc2
-	# Note that "#CHECK#" is not permitted on commandline.
-else
-	cp $T/Kc1 $T/Kc2
-fi
-cat $T/Kc2 >> $resdir/ConfigFragment
+touch $resdir/ConfigFragment.input
+
+# Combine additional Kconfig options into an existing set such that
+# newer options win.  The first argument is the Kconfig source ID, the
+# second the to-be-updated file within $T, and the third and final the
+# list of additional Kconfig options.  Note that a $2.tmp file is
+# created when doing the update.
+config_override_param () {
+	if test -n "$3"
+	then
+		echo $3 | sed -e 's/^ *//' -e 's/ *$//' | tr -s " " "\012" > $T/Kconfig_args
+		echo " --- $1" >> $resdir/ConfigFragment.input
+		cat $T/Kconfig_args >> $resdir/ConfigFragment.input
+		config_override.sh $T/$2 $T/Kconfig_args > $T/$2.tmp
+		mv $T/$2.tmp $T/$2
+		# Note that "#CHECK#" is not permitted on commandline.
+	fi
+}
+
+echo > $T/KcList
+config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`"
+config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`"
+config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG"
+config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG"
+config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG"
+cp $T/KcList $resdir/ConfigFragment
 
 base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
 if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
@@ -80,7 +82,7 @@ then
 	ln -s $base_resdir/.config $resdir  # for kvm-recheck.sh
 	# Arch-independent indicator
 	touch $resdir/builtkernel
-elif kvm-build.sh $T/Kc2 $resdir
+elif kvm-build.sh $T/KcList $resdir
 then
 	# Had to build a kernel for this test.
 	QEMU="`identify_qemu vmlinux`"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 2315e2ec12d6..c279cf9cb010 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -31,6 +31,8 @@ TORTURE_DEFCONFIG=defconfig
 TORTURE_BOOT_IMAGE=""
 TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG=""
+TORTURE_KCONFIG_KASAN_ARG=""
+TORTURE_KCONFIG_KCSAN_ARG=""
 TORTURE_KMAKE_ARG=""
 TORTURE_QEMU_MEM=512
 TORTURE_SHUTDOWN_GRACE=180
@@ -133,6 +135,12 @@ do
 		TORTURE_KCONFIG_ARG="$2"
 		shift
 		;;
+	--kasan)
+		TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+		;;
+	--kcsan)
+		TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_KCSAN_INTERRUPT_WATCHER=y"; export TORTURE_KCONFIG_KCSAN_ARG
+		;;
 	--kmake-arg)
 		checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
 		TORTURE_KMAKE_ARG="$2"
@@ -310,6 +318,8 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
 TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
 TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
+TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG
+TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
 TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
 TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
 TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
@@ -464,6 +474,7 @@ echo
 echo
 echo " --- `date` Test summary:"
 echo Results directory: $resdir/$ds
+kcsan-collapse.sh $resdir/$ds
 kvm-recheck.sh $resdir/$ds
 ___EOF___
 
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index c3c1fb5a9e1f..f2b20db9e296 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -14,3 +14,6 @@ TINY02
 TASKS01
 TASKS02
 TASKS03
+RUDE01
+TRACE01
+TRACE02
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
new file mode 100644
index 000000000000..bafe94cbd739
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -0,0 +1,10 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
new file mode 100644
index 000000000000..9363708c9075
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-rude
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
new file mode 100644
index 000000000000..12e7661b86f5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -0,0 +1,11 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_TASKS_TRACE_RCU_READ_MB=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
new file mode 100644
index 000000000000..9675ad632dcc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-tracing
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
new file mode 100644
index 000000000000..b69ed6673c41
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -0,0 +1,11 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_TASKS_TRACE_RCU_READ_MB=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
new file mode 100644
index 000000000000..9675ad632dcc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-tracing
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 2debe7891aeb..7311f84a5876 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -1,5 +1,5 @@
 CONFIG_SMP=y
-CONFIG_NR_CPUS=100
+CONFIG_NR_CPUS=56
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index ca17fe0c3280..4f1831e62ea5 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -17,3 +17,4 @@ gup_benchmark
 va_128TBswitch
 map_fixed_noreplace
 write_to_hugetlbfs
+hmm-tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 6998877f707e..c6eb5305a0f6 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -7,6 +7,7 @@ CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
 LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test
 TEST_GEN_FILES += gup_benchmark
+TEST_GEN_FILES += hmm-tests
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
 TEST_GEN_FILES += map_hugetlb
@@ -33,6 +34,8 @@ TEST_FILES := test_vmalloc.sh
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
+$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread
+
 $(OUTPUT)/userfaultfd: LDLIBS += -lpthread
 
 $(OUTPUT)/mlock-random-test: LDLIBS += -lcap
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 93b90a9b1eeb..3ba674b64fa9 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -1,3 +1,5 @@
 CONFIG_SYSVIPC=y
 CONFIG_USERFAULTFD=y
 CONFIG_TEST_VMALLOC=m
+CONFIG_DEVICE_PRIVATE=y
+CONFIG_TEST_HMM=m
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
new file mode 100644
index 000000000000..79db22604019
--- /dev/null
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -0,0 +1,1359 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HMM stands for Heterogeneous Memory Management, it is a helper layer inside
+ * the linux kernel to help device drivers mirror a process address space in
+ * the device. This allows the device to use the same address space which
+ * makes communication and data exchange a lot easier.
+ *
+ * This framework's sole purpose is to exercise various code paths inside
+ * the kernel to make sure that HMM performs as expected and to flush out any
+ * bugs.
+ */
+
+#include "../kselftest_harness.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <strings.h>
+#include <time.h>
+#include <pthread.h>
+#include <hugetlbfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+
+/*
+ * This is a private UAPI to the kernel test module so it isn't exported
+ * in the usual include/uapi/... directory.
+ */
+#include "../../../../lib/test_hmm_uapi.h"
+
+struct hmm_buffer {
+	void		*ptr;
+	void		*mirror;
+	unsigned long	size;
+	int		fd;
+	uint64_t	cpages;
+	uint64_t	faults;
+};
+
+#define TWOMEG		(1 << 21)
+#define HMM_BUFFER_SIZE (1024 << 12)
+#define HMM_PATH_MAX    64
+#define NTIMES		256
+
+#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+
+FIXTURE(hmm)
+{
+	int		fd;
+	unsigned int	page_size;
+	unsigned int	page_shift;
+};
+
+FIXTURE(hmm2)
+{
+	int		fd0;
+	int		fd1;
+	unsigned int	page_size;
+	unsigned int	page_shift;
+};
+
+static int hmm_open(int unit)
+{
+	char pathname[HMM_PATH_MAX];
+	int fd;
+
+	snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit);
+	fd = open(pathname, O_RDWR, 0);
+	if (fd < 0)
+		fprintf(stderr, "could not open hmm dmirror driver (%s)\n",
+			pathname);
+	return fd;
+}
+
+FIXTURE_SETUP(hmm)
+{
+	self->page_size = sysconf(_SC_PAGE_SIZE);
+	self->page_shift = ffs(self->page_size) - 1;
+
+	self->fd = hmm_open(0);
+	ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_SETUP(hmm2)
+{
+	self->page_size = sysconf(_SC_PAGE_SIZE);
+	self->page_shift = ffs(self->page_size) - 1;
+
+	self->fd0 = hmm_open(0);
+	ASSERT_GE(self->fd0, 0);
+	self->fd1 = hmm_open(1);
+	ASSERT_GE(self->fd1, 0);
+}
+
+FIXTURE_TEARDOWN(hmm)
+{
+	int ret = close(self->fd);
+
+	ASSERT_EQ(ret, 0);
+	self->fd = -1;
+}
+
+FIXTURE_TEARDOWN(hmm2)
+{
+	int ret = close(self->fd0);
+
+	ASSERT_EQ(ret, 0);
+	self->fd0 = -1;
+
+	ret = close(self->fd1);
+	ASSERT_EQ(ret, 0);
+	self->fd1 = -1;
+}
+
+static int hmm_dmirror_cmd(int fd,
+			   unsigned long request,
+			   struct hmm_buffer *buffer,
+			   unsigned long npages)
+{
+	struct hmm_dmirror_cmd cmd;
+	int ret;
+
+	/* Simulate a device reading system memory. */
+	cmd.addr = (__u64)buffer->ptr;
+	cmd.ptr = (__u64)buffer->mirror;
+	cmd.npages = npages;
+
+	for (;;) {
+		ret = ioctl(fd, request, &cmd);
+		if (ret == 0)
+			break;
+		if (errno == EINTR)
+			continue;
+		return -errno;
+	}
+	buffer->cpages = cmd.cpages;
+	buffer->faults = cmd.faults;
+
+	return 0;
+}
+
+static void hmm_buffer_free(struct hmm_buffer *buffer)
+{
+	if (buffer == NULL)
+		return;
+
+	if (buffer->ptr)
+		munmap(buffer->ptr, buffer->size);
+	free(buffer->mirror);
+	free(buffer);
+}
+
+/*
+ * Create a temporary file that will be deleted on close.
+ */
+static int hmm_create_file(unsigned long size)
+{
+	char path[HMM_PATH_MAX];
+	int fd;
+
+	strcpy(path, "/tmp");
+	fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600);
+	if (fd >= 0) {
+		int r;
+
+		do {
+			r = ftruncate(fd, size);
+		} while (r == -1 && errno == EINTR);
+		if (!r)
+			return fd;
+		close(fd);
+	}
+	return -1;
+}
+
+/*
+ * Return a random unsigned number.
+ */
+static unsigned int hmm_random(void)
+{
+	static int fd = -1;
+	unsigned int r;
+
+	if (fd < 0) {
+		fd = open("/dev/urandom", O_RDONLY);
+		if (fd < 0) {
+			fprintf(stderr, "%s:%d failed to open /dev/urandom\n",
+					__FILE__, __LINE__);
+			return ~0U;
+		}
+	}
+	read(fd, &r, sizeof(r));
+	return r;
+}
+
+static void hmm_nanosleep(unsigned int n)
+{
+	struct timespec t;
+
+	t.tv_sec = 0;
+	t.tv_nsec = n;
+	nanosleep(&t, NULL);
+}
+
+/*
+ * Simple NULL test of device open/close.
+ */
+TEST_F(hmm, open_close)
+{
+}
+
+/*
+ * Read private anonymous memory.
+ */
+TEST_F(hmm, anon_read)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+	int val;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/*
+	 * Initialize buffer in system memory but leave the first two pages
+	 * zero (pte_none and pfn_zero).
+	 */
+	i = 2 * self->page_size / sizeof(*ptr);
+	for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Set buffer permission to read-only. */
+	ret = mprotect(buffer->ptr, size, PROT_READ);
+	ASSERT_EQ(ret, 0);
+
+	/* Populate the CPU page table with a special zero page. */
+	val = *(int *)(buffer->ptr + self->page_size);
+	ASSERT_EQ(val, 0);
+
+	/* Simulate a device reading system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device read. */
+	ptr = buffer->mirror;
+	for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], 0);
+	for (; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Read private anonymous memory which has been protected with
+ * mprotect() PROT_NONE.
+ */
+TEST_F(hmm, anon_read_prot)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer in system memory. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Initialize mirror buffer so we can verify it isn't written. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = -i;
+
+	/* Protect buffer from reading. */
+	ret = mprotect(buffer->ptr, size, PROT_NONE);
+	ASSERT_EQ(ret, 0);
+
+	/* Simulate a device reading system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+	ASSERT_EQ(ret, -EFAULT);
+
+	/* Allow CPU to read the buffer so we can check it. */
+	ret = mprotect(buffer->ptr, size, PROT_READ);
+	ASSERT_EQ(ret, 0);
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], -i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Write private anonymous memory.
+ */
+TEST_F(hmm, anon_write)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Write private anonymous memory which has been protected with
+ * mprotect() PROT_READ.
+ */
+TEST_F(hmm, anon_write_prot)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Simulate a device reading a zero page of memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, 1);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, -EPERM);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], 0);
+
+	/* Now allow writing and see that the zero page is replaced. */
+	ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ);
+	ASSERT_EQ(ret, 0);
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Check that a device writing an anonymous private mapping
+ * will copy-on-write if a child process inherits the mapping.
+ */
+TEST_F(hmm, anon_write_child)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	pid_t pid;
+	int child_fd;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer->ptr so we can tell if it is written. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = -i;
+
+	pid = fork();
+	if (pid == -1)
+		ASSERT_EQ(pid, 0);
+	if (pid != 0) {
+		waitpid(pid, &ret, 0);
+		ASSERT_EQ(WIFEXITED(ret), 1);
+
+		/* Check that the parent's buffer did not change. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ASSERT_EQ(ptr[i], i);
+		return;
+	}
+
+	/* Check that we see the parent's values. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], -i);
+
+	/* The child process needs its own mirror to its own mm. */
+	child_fd = hmm_open(0);
+	ASSERT_GE(child_fd, 0);
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], -i);
+
+	close(child_fd);
+	exit(0);
+}
+
+/*
+ * Check that a device writing an anonymous shared mapping
+ * will not copy-on-write if a child process inherits the mapping.
+ */
+TEST_F(hmm, anon_write_child_shared)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	pid_t pid;
+	int child_fd;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_SHARED | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer->ptr so we can tell if it is written. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = -i;
+
+	pid = fork();
+	if (pid == -1)
+		ASSERT_EQ(pid, 0);
+	if (pid != 0) {
+		waitpid(pid, &ret, 0);
+		ASSERT_EQ(WIFEXITED(ret), 1);
+
+		/* Check that the parent's buffer did change. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ASSERT_EQ(ptr[i], -i);
+		return;
+	}
+
+	/* Check that we see the parent's values. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], -i);
+
+	/* The child process needs its own mirror to its own mm. */
+	child_fd = hmm_open(0);
+	ASSERT_GE(child_fd, 0);
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], -i);
+
+	close(child_fd);
+	exit(0);
+}
+
+/*
+ * Write private anonymous huge page.
+ */
+TEST_F(hmm, anon_write_huge)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	void *old_ptr;
+	void *map;
+	int *ptr;
+	int ret;
+
+	size = 2 * TWOMEG;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	size = TWOMEG;
+	npages = size >> self->page_shift;
+	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+	ret = madvise(map, size, MADV_HUGEPAGE);
+	ASSERT_EQ(ret, 0);
+	old_ptr = buffer->ptr;
+	buffer->ptr = map;
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	buffer->ptr = old_ptr;
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Write huge TLBFS page.
+ */
+TEST_F(hmm, anon_write_hugetlbfs)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+	long pagesizes[4];
+	int n, idx;
+
+	/* Skip test if we can't allocate a hugetlbfs page. */
+
+	n = gethugepagesizes(pagesizes, 4);
+	if (n <= 0)
+		return;
+	for (idx = 0; --n > 0; ) {
+		if (pagesizes[n] < pagesizes[idx])
+			idx = n;
+	}
+	size = ALIGN(TWOMEG, pagesizes[idx]);
+	npages = size >> self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->ptr = get_hugepage_region(size, GHR_STRICT);
+	if (buffer->ptr == NULL) {
+		free(buffer);
+		return;
+	}
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	free_hugepage_region(buffer->ptr);
+	buffer->ptr = NULL;
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Read mmap'ed file memory.
+ */
+TEST_F(hmm, file_read)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+	int fd;
+	ssize_t len;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	fd = hmm_create_file(size);
+	ASSERT_GE(fd, 0);
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = fd;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	/* Write initial contents of the file. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+	len = pwrite(fd, buffer->mirror, size, 0);
+	ASSERT_EQ(len, size);
+	memset(buffer->mirror, 0, size);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ,
+			   MAP_SHARED,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Simulate a device reading system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Write mmap'ed file memory.
+ */
+TEST_F(hmm, file_write)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+	int fd;
+	ssize_t len;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	fd = hmm_create_file(size);
+	ASSERT_GE(fd, 0);
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = fd;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_SHARED,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	/* Check that the device also wrote the file. */
+	len = pread(fd, buffer->mirror, size, 0);
+	ASSERT_EQ(len, size);
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory.
+ */
+TEST_F(hmm, migrate)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer in system memory. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Migrate memory to device. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory and fault it back to system
+ * memory.
+ */
+TEST_F(hmm, migrate_fault)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer in system memory. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Migrate memory to device. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	/* Fault pages back to system memory and check them. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Try to migrate various memory types to device private memory.
+ */
+TEST_F(hmm2, migrate_mixed)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	int *ptr;
+	unsigned char *p;
+	int ret;
+	int val;
+
+	npages = 6;
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	/* Reserve a range of addresses. */
+	buffer->ptr = mmap(NULL, size,
+			   PROT_NONE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+	p = buffer->ptr;
+
+	/* Migrating a protected area should be an error. */
+	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages);
+	ASSERT_EQ(ret, -EINVAL);
+
+	/* Punch a hole after the first page address. */
+	ret = munmap(buffer->ptr + self->page_size, self->page_size);
+	ASSERT_EQ(ret, 0);
+
+	/* We expect an error if the vma doesn't cover the range. */
+	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3);
+	ASSERT_EQ(ret, -EINVAL);
+
+	/* Page 2 will be a read-only zero page. */
+	ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
+				PROT_READ);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 2 * self->page_size);
+	val = *ptr + 3;
+	ASSERT_EQ(val, 3);
+
+	/* Page 3 will be read-only. */
+	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+				PROT_READ | PROT_WRITE);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 3 * self->page_size);
+	*ptr = val;
+	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+				PROT_READ);
+	ASSERT_EQ(ret, 0);
+
+	/* Page 4-5 will be read-write. */
+	ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size,
+				PROT_READ | PROT_WRITE);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 4 * self->page_size);
+	*ptr = val;
+	ptr = (int *)(buffer->ptr + 5 * self->page_size);
+	*ptr = val;
+
+	/* Now try to migrate pages 2-5 to device 1. */
+	buffer->ptr = p + 2 * self->page_size;
+	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, 4);
+
+	/* Page 5 won't be migrated to device 0 because it's on device 1. */
+	buffer->ptr = p + 5 * self->page_size;
+	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+	ASSERT_EQ(ret, -ENOENT);
+	buffer->ptr = p;
+
+	buffer->ptr = p;
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory and fault it back to system
+ * memory multiple times.
+ */
+TEST_F(hmm, migrate_multiple)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	unsigned long c;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	for (c = 0; c < NTIMES; c++) {
+		buffer = malloc(sizeof(*buffer));
+		ASSERT_NE(buffer, NULL);
+
+		buffer->fd = -1;
+		buffer->size = size;
+		buffer->mirror = malloc(size);
+		ASSERT_NE(buffer->mirror, NULL);
+
+		buffer->ptr = mmap(NULL, size,
+				   PROT_READ | PROT_WRITE,
+				   MAP_PRIVATE | MAP_ANONYMOUS,
+				   buffer->fd, 0);
+		ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+		/* Initialize buffer in system memory. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ptr[i] = i;
+
+		/* Migrate memory to device. */
+		ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer,
+				      npages);
+		ASSERT_EQ(ret, 0);
+		ASSERT_EQ(buffer->cpages, npages);
+
+		/* Check what the device read. */
+		for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+			ASSERT_EQ(ptr[i], i);
+
+		/* Fault pages back to system memory and check them. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ASSERT_EQ(ptr[i], i);
+
+		hmm_buffer_free(buffer);
+	}
+}
+
+/*
+ * Read anonymous memory multiple times.
+ */
+TEST_F(hmm, anon_read_multiple)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	unsigned long c;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	for (c = 0; c < NTIMES; c++) {
+		buffer = malloc(sizeof(*buffer));
+		ASSERT_NE(buffer, NULL);
+
+		buffer->fd = -1;
+		buffer->size = size;
+		buffer->mirror = malloc(size);
+		ASSERT_NE(buffer->mirror, NULL);
+
+		buffer->ptr = mmap(NULL, size,
+				   PROT_READ | PROT_WRITE,
+				   MAP_PRIVATE | MAP_ANONYMOUS,
+				   buffer->fd, 0);
+		ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+		/* Initialize buffer in system memory. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ptr[i] = i + c;
+
+		/* Simulate a device reading system memory. */
+		ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
+				      npages);
+		ASSERT_EQ(ret, 0);
+		ASSERT_EQ(buffer->cpages, npages);
+		ASSERT_EQ(buffer->faults, 1);
+
+		/* Check what the device read. */
+		for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+			ASSERT_EQ(ptr[i], i + c);
+
+		hmm_buffer_free(buffer);
+	}
+}
+
+void *unmap_buffer(void *p)
+{
+	struct hmm_buffer *buffer = p;
+
+	/* Delay for a bit and then unmap buffer while it is being read. */
+	hmm_nanosleep(hmm_random() % 32000);
+	munmap(buffer->ptr + buffer->size / 2, buffer->size / 2);
+	buffer->ptr = NULL;
+
+	return NULL;
+}
+
+/*
+ * Try reading anonymous memory while it is being unmapped.
+ */
+TEST_F(hmm, anon_teardown)
+{
+	unsigned long npages;
+	unsigned long size;
+	unsigned long c;
+	void *ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	for (c = 0; c < NTIMES; ++c) {
+		pthread_t thread;
+		struct hmm_buffer *buffer;
+		unsigned long i;
+		int *ptr;
+		int rc;
+
+		buffer = malloc(sizeof(*buffer));
+		ASSERT_NE(buffer, NULL);
+
+		buffer->fd = -1;
+		buffer->size = size;
+		buffer->mirror = malloc(size);
+		ASSERT_NE(buffer->mirror, NULL);
+
+		buffer->ptr = mmap(NULL, size,
+				   PROT_READ | PROT_WRITE,
+				   MAP_PRIVATE | MAP_ANONYMOUS,
+				   buffer->fd, 0);
+		ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+		/* Initialize buffer in system memory. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ptr[i] = i + c;
+
+		rc = pthread_create(&thread, NULL, unmap_buffer, buffer);
+		ASSERT_EQ(rc, 0);
+
+		/* Simulate a device reading system memory. */
+		rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
+				     npages);
+		if (rc == 0) {
+			ASSERT_EQ(buffer->cpages, npages);
+			ASSERT_EQ(buffer->faults, 1);
+
+			/* Check what the device read. */
+			for (i = 0, ptr = buffer->mirror;
+			     i < size / sizeof(*ptr);
+			     ++i)
+				ASSERT_EQ(ptr[i], i + c);
+		}
+
+		pthread_join(thread, &ret);
+		hmm_buffer_free(buffer);
+	}
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm2, snapshot)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	int *ptr;
+	unsigned char *p;
+	unsigned char *m;
+	int ret;
+	int val;
+
+	npages = 7;
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(npages);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	/* Reserve a range of addresses. */
+	buffer->ptr = mmap(NULL, size,
+			   PROT_NONE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+	p = buffer->ptr;
+
+	/* Punch a hole after the first page address. */
+	ret = munmap(buffer->ptr + self->page_size, self->page_size);
+	ASSERT_EQ(ret, 0);
+
+	/* Page 2 will be read-only zero page. */
+	ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
+				PROT_READ);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 2 * self->page_size);
+	val = *ptr + 3;
+	ASSERT_EQ(val, 3);
+
+	/* Page 3 will be read-only. */
+	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+				PROT_READ | PROT_WRITE);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 3 * self->page_size);
+	*ptr = val;
+	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+				PROT_READ);
+	ASSERT_EQ(ret, 0);
+
+	/* Page 4-6 will be read-write. */
+	ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size,
+				PROT_READ | PROT_WRITE);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 4 * self->page_size);
+	*ptr = val;
+
+	/* Page 5 will be migrated to device 0. */
+	buffer->ptr = p + 5 * self->page_size;
+	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, 1);
+
+	/* Page 6 will be migrated to device 1. */
+	buffer->ptr = p + 6 * self->page_size;
+	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, 1);
+
+	/* Simulate a device snapshotting CPU pagetables. */
+	buffer->ptr = p;
+	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+
+	/* Check what the device saw. */
+	m = buffer->mirror;
+	ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR);
+	ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR);
+	ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
+	ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
+	ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
+	ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
+			HMM_DMIRROR_PROT_WRITE);
+	ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Test two devices reading the same memory (double mapped).
+ */
+TEST_F(hmm2, double_map)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = 6;
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(npages);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	/* Reserve a range of addresses. */
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer in system memory. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Make region read-only. */
+	ret = mprotect(buffer->ptr, size, PROT_READ);
+	ASSERT_EQ(ret, 0);
+
+	/* Simulate device 0 reading system memory. */
+	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	/* Simulate device 1 reading system memory. */
+	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	/* Punch a hole after the first page address. */
+	ret = munmap(buffer->ptr + self->page_size, self->page_size);
+	ASSERT_EQ(ret, 0);
+
+	hmm_buffer_free(buffer);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 76ca5e7a3951..a3f4f30f0a2e 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -307,4 +307,20 @@ else
 	echo "[FAIL]"
 	exitcode=1
 fi
+
+echo "running HMM smoke test"
+echo "------------------------------------"
+./test_hmm.sh smoke
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	echo "[SKIP]"
+	exitcode=$ksft_skip
+else
+	echo "[FAIL]"
+	exitcode=1
+fi
+
 exit $exitcode
diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh
new file mode 100755
index 000000000000..0647b525a625
--- /dev/null
+++ b/tools/testing/selftests/vm/test_hmm.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
+#
+# This is a test script for the kernel test driver to analyse vmalloc
+# allocator. Therefore it is just a kernel module loader. You can specify
+# and pass different parameters in order to:
+#     a) analyse performance of vmalloc allocations;
+#     b) stressing and stability check of vmalloc subsystem.
+
+TEST_NAME="test_hmm"
+DRIVER="test_hmm"
+
+# 1 if fails
+exitcode=1
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_test_requirements()
+{
+	uid=$(id -u)
+	if [ $uid -ne 0 ]; then
+		echo "$0: Must be run as root"
+		exit $ksft_skip
+	fi
+
+	if ! which modprobe > /dev/null 2>&1; then
+		echo "$0: You need modprobe installed"
+		exit $ksft_skip
+	fi
+
+	if ! modinfo $DRIVER > /dev/null 2>&1; then
+		echo "$0: You must have the following enabled in your kernel:"
+		echo "CONFIG_TEST_HMM=m"
+		exit $ksft_skip
+	fi
+}
+
+load_driver()
+{
+	modprobe $DRIVER > /dev/null 2>&1
+	if [ $? == 0 ]; then
+		major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
+		mknod /dev/hmm_dmirror0 c $major 0
+		mknod /dev/hmm_dmirror1 c $major 1
+	fi
+}
+
+unload_driver()
+{
+	modprobe -r $DRIVER > /dev/null 2>&1
+	rm -f /dev/hmm_dmirror?
+}
+
+run_smoke()
+{
+	echo "Running smoke test. Note, this test provides basic coverage."
+
+	load_driver
+	$(dirname "${BASH_SOURCE[0]}")/hmm-tests
+	unload_driver
+}
+
+usage()
+{
+	echo -n "Usage: $0"
+	echo
+	echo "Example usage:"
+	echo
+	echo "# Shows help message"
+	echo "./${TEST_NAME}.sh"
+	echo
+	echo "# Smoke testing"
+	echo "./${TEST_NAME}.sh smoke"
+	echo
+	exit 0
+}
+
+function run_test()
+{
+	if [ $# -eq 0 ]; then
+		usage
+	else
+		if [ "$1" = "smoke" ]; then
+			run_smoke
+		else
+			usage
+		fi
+	fi
+}
+
+check_test_requirements
+run_test $@
+
+exit 0
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index 9803dbb54181..b50c2085c1ac 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -57,7 +57,6 @@ CONFIG_RCU_EQS_DEBUG=y
 CONFIG_USER_STACKTRACE_SUPPORT=y
 CONFIG_DEBUG_SG=y
 CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_DOUBLEFAULT=y
 CONFIG_X86_DEBUG_FPU=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_DEBUG_PAGEALLOC=y
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 15e5b037f92d..f1e07fae84e9 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -51,6 +51,7 @@ static void async_pf_execute(struct work_struct *work)
 	unsigned long addr = apf->addr;
 	gpa_t cr2_or_gpa = apf->cr2_or_gpa;
 	int locked = 1;
+	bool first;
 
 	might_sleep();
 
@@ -69,10 +70,14 @@ static void async_pf_execute(struct work_struct *work)
 		kvm_arch_async_page_present(vcpu, apf);
 
 	spin_lock(&vcpu->async_pf.lock);
+	first = list_empty(&vcpu->async_pf.done);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);
 	apf->vcpu = NULL;
 	spin_unlock(&vcpu->async_pf.lock);
 
+	if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
+		kvm_arch_async_page_present_queued(vcpu);
+
 	/*
 	 * apf may be freed by kvm_check_async_pf_completion() after
 	 * this point
@@ -80,8 +85,7 @@ static void async_pf_execute(struct work_struct *work)
 
 	trace_kvm_async_pf_completed(addr, cr2_or_gpa);
 
-	if (swq_has_sleeper(&vcpu->wq))
-		swake_up_one(&vcpu->wq);
+	rcuwait_wake_up(&vcpu->wait);
 
 	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
@@ -135,7 +139,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 	struct kvm_async_pf *work;
 
 	while (!list_empty_careful(&vcpu->async_pf.done) &&
-	      kvm_arch_can_inject_async_page_present(vcpu)) {
+	      kvm_arch_can_dequeue_async_page_present(vcpu)) {
 		spin_lock(&vcpu->async_pf.lock);
 		work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
 					      link);
@@ -202,6 +206,7 @@ retry_sync:
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
 {
 	struct kvm_async_pf *work;
+	bool first;
 
 	if (!list_empty_careful(&vcpu->async_pf.done))
 		return 0;
@@ -214,9 +219,13 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
 	INIT_LIST_HEAD(&work->queue); /* for list_del to work */
 
 	spin_lock(&vcpu->async_pf.lock);
+	first = list_empty(&vcpu->async_pf.done);
 	list_add_tail(&work->link, &vcpu->async_pf.done);
 	spin_unlock(&vcpu->async_pf.lock);
 
+	if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
+		kvm_arch_async_page_present_queued(vcpu);
+
 	vcpu->async_pf.queued++;
 	return 0;
 }
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 00c747dbc82e..e2c197fd4f9d 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -119,7 +119,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
 
 	/*
 	 * We're using this spinlock to sync access to the coalesced ring.
-	 * The list doesn't need it's own lock since device registration and
+	 * The list doesn't need its own lock since device registration and
 	 * unregistration should only happen when kvm->slots_lock is held.
 	 */
 	spin_lock_init(&kvm->ring_lock);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 67b6fc153e9c..ef7ed916ad4a 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -116,7 +116,7 @@ irqfd_shutdown(struct work_struct *work)
 	struct kvm *kvm = irqfd->kvm;
 	u64 cnt;
 
-	/* Make sure irqfd has been initalized in assign path. */
+	/* Make sure irqfd has been initialized in assign path. */
 	synchronize_srcu(&kvm->irq_srcu);
 
 	/*
@@ -721,7 +721,7 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
 		return false;
 	}
 
-	return _val == p->datamatch ? true : false;
+	return _val == p->datamatch;
 }
 
 /* MMIO/PIO writes trigger an event if the addr/val match */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 731c1e517716..7fa1e38e1659 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -349,7 +349,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	vcpu->kvm = kvm;
 	vcpu->vcpu_id = id;
 	vcpu->pid = NULL;
-	init_swait_queue_head(&vcpu->wq);
+	rcuwait_init(&vcpu->wait);
 	kvm_async_pf_vcpu_init(vcpu);
 
 	vcpu->pre_pcpu = -1;
@@ -718,6 +718,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
 			goto out_err_no_arch_destroy_vm;
 	}
 
+	kvm->max_halt_poll_ns = halt_poll_ns;
+
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
 		goto out_err_no_arch_destroy_vm;
@@ -1223,10 +1225,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
 		return -EINVAL;
 	/* We can read the guest memory with __xxx_user() later on. */
-	if ((id < KVM_USER_MEM_SLOTS) &&
-	    ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
+	if ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
 	     !access_ok((void __user *)(unsigned long)mem->userspace_addr,
-			mem->memory_size)))
+			mem->memory_size))
 		return -EINVAL;
 	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
 		return -EINVAL;
@@ -1610,16 +1611,13 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
 {
 	return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
 
 bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
 
-	if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
-	      memslot->flags & KVM_MEMSLOT_INVALID)
-		return false;
-
-	return true;
+	return kvm_is_visible_memslot(memslot);
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
@@ -2511,13 +2509,15 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
 
-int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
-			   void *data, unsigned long len)
+int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+				 void *data, unsigned int offset,
+				 unsigned long len)
 {
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 	int r;
+	gpa_t gpa = ghc->gpa + offset;
 
-	BUG_ON(len > ghc->len);
+	BUG_ON(len + offset > ghc->len);
 
 	if (slots->generation != ghc->generation) {
 		if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
@@ -2528,14 +2528,21 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 		return -EFAULT;
 
 	if (unlikely(!ghc->memslot))
-		return kvm_read_guest(kvm, ghc->gpa, data, len);
+		return kvm_read_guest(kvm, gpa, data, len);
 
-	r = __copy_from_user(data, (void __user *)ghc->hva, len);
+	r = __copy_from_user(data, (void __user *)ghc->hva + offset, len);
 	if (r)
 		return -EFAULT;
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(kvm_read_guest_offset_cached);
+
+int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+			  void *data, unsigned long len)
+{
+	return kvm_read_guest_offset_cached(kvm, ghc, data, 0, len);
+}
 EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
 
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
@@ -2673,19 +2680,27 @@ out:
 	return ret;
 }
 
+static inline void
+update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited)
+{
+	if (waited)
+		vcpu->stat.halt_poll_fail_ns += poll_ns;
+	else
+		vcpu->stat.halt_poll_success_ns += poll_ns;
+}
+
 /*
  * The vCPU has executed a HLT instruction with in-kernel mode enabled.
  */
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
-	ktime_t start, cur;
-	DECLARE_SWAITQUEUE(wait);
+	ktime_t start, cur, poll_end;
 	bool waited = false;
 	u64 block_ns;
 
 	kvm_arch_vcpu_blocking(vcpu);
 
-	start = cur = ktime_get();
+	start = cur = poll_end = ktime_get();
 	if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
 		ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
 
@@ -2701,12 +2716,13 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 					++vcpu->stat.halt_poll_invalid;
 				goto out;
 			}
-			cur = ktime_get();
+			poll_end = cur = ktime_get();
 		} while (single_task_running() && ktime_before(cur, stop));
 	}
 
+	prepare_to_rcuwait(&vcpu->wait);
 	for (;;) {
-		prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+		set_current_state(TASK_INTERRUPTIBLE);
 
 		if (kvm_vcpu_check_block(vcpu) < 0)
 			break;
@@ -2714,25 +2730,28 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		waited = true;
 		schedule();
 	}
-
-	finish_swait(&vcpu->wq, &wait);
+	finish_rcuwait(&vcpu->wait);
 	cur = ktime_get();
 out:
 	kvm_arch_vcpu_unblocking(vcpu);
 	block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
 
+	update_halt_poll_stats(
+		vcpu, ktime_to_ns(ktime_sub(poll_end, start)), waited);
+
 	if (!kvm_arch_no_poll(vcpu)) {
 		if (!vcpu_valid_wakeup(vcpu)) {
 			shrink_halt_poll_ns(vcpu);
-		} else if (halt_poll_ns) {
+		} else if (vcpu->kvm->max_halt_poll_ns) {
 			if (block_ns <= vcpu->halt_poll_ns)
 				;
 			/* we had a long block, shrink polling */
-			else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+			else if (vcpu->halt_poll_ns &&
+					block_ns > vcpu->kvm->max_halt_poll_ns)
 				shrink_halt_poll_ns(vcpu);
 			/* we had a short halt and our poll time is too small */
-			else if (vcpu->halt_poll_ns < halt_poll_ns &&
-				block_ns < halt_poll_ns)
+			else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
+					block_ns < vcpu->kvm->max_halt_poll_ns)
 				grow_halt_poll_ns(vcpu);
 		} else {
 			vcpu->halt_poll_ns = 0;
@@ -2746,11 +2765,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_block);
 
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 {
-	struct swait_queue_head *wqp;
+	struct rcuwait *waitp;
 
-	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (swq_has_sleeper(wqp)) {
-		swake_up_one(wqp);
+	waitp = kvm_arch_vcpu_get_wait(vcpu);
+	if (rcuwait_wake_up(waitp)) {
 		WRITE_ONCE(vcpu->ready, true);
 		++vcpu->stat.halt_wakeup;
 		return true;
@@ -2807,7 +2825,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
  *
  *  (a) VCPU which has not done pl-exit or cpu relax intercepted recently
  *  (preempted lock holder), indicated by @in_spin_loop.
- *  Set at the beiginning and cleared at the end of interception/PLE handler.
+ *  Set at the beginning and cleared at the end of interception/PLE handler.
  *
  *  (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
  *  chance last time (mostly it has become eligible now since we have probably
@@ -2892,7 +2910,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 				continue;
 			if (vcpu == me)
 				continue;
-			if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
+			if (rcuwait_active(&vcpu->wait) &&
+			    !vcpu_dy_runnable(vcpu))
 				continue;
 			if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
 				!kvm_arch_vcpu_in_kernel(vcpu))
@@ -3039,8 +3058,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	if (r)
 		goto vcpu_free_run_page;
 
-	kvm_create_vcpu_debugfs(vcpu);
-
 	mutex_lock(&kvm->lock);
 	if (kvm_get_vcpu_by_id(kvm, id)) {
 		r = -EEXIST;
@@ -3069,11 +3086,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 
 	mutex_unlock(&kvm->lock);
 	kvm_arch_vcpu_postcreate(vcpu);
+	kvm_create_vcpu_debugfs(vcpu);
 	return r;
 
 unlock_vcpu_destroy:
 	mutex_unlock(&kvm->lock);
-	debugfs_remove_recursive(vcpu->debugfs_dentry);
 	kvm_arch_vcpu_destroy(vcpu);
 vcpu_free_run_page:
 	free_page((unsigned long)vcpu->run);
@@ -3143,7 +3160,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
 				synchronize_rcu();
 			put_pid(oldpid);
 		}
-		r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
+		r = kvm_arch_vcpu_ioctl_run(vcpu);
 		trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
 		break;
 	}
@@ -3168,7 +3185,6 @@ out_free1:
 	case KVM_SET_REGS: {
 		struct kvm_regs *kvm_regs;
 
-		r = -ENOMEM;
 		kvm_regs = memdup_user(argp, sizeof(*kvm_regs));
 		if (IS_ERR(kvm_regs)) {
 			r = PTR_ERR(kvm_regs);
@@ -3524,6 +3540,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 	case KVM_CAP_IOEVENTFD_ANY_LENGTH:
 	case KVM_CAP_CHECK_EXTENSION_VM:
 	case KVM_CAP_ENABLE_CAP_VM:
+	case KVM_CAP_HALT_POLL:
 		return 1;
 #ifdef CONFIG_KVM_MMIO
 	case KVM_CAP_COALESCED_MMIO:
@@ -3574,6 +3591,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
 		return 0;
 	}
 #endif
+	case KVM_CAP_HALT_POLL: {
+		if (cap->flags || cap->args[0] != (unsigned int)cap->args[0])
+			return -EINVAL;
+
+		kvm->max_halt_poll_ns = cap->args[0];
+		return 0;
+	}
 	default:
 		return kvm_vm_ioctl_enable_cap(kvm, cap);
 	}
@@ -4647,6 +4671,7 @@ struct kvm_vcpu *kvm_get_running_vcpu(void)
 
 	return vcpu;
 }
+EXPORT_SYMBOL_GPL(kvm_get_running_vcpu);
 
 /**
  * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus.